[med-svn] [roary] 01/07: Imported Upstream version 3.5.7+dfsg

Sascha Steinbiss sascha at steinbiss.name
Sun Dec 20 14:17:49 UTC 2015


This is an automated email from the git hooks/post-receive script.

sascha-guest pushed a commit to branch master
in repository roary.

commit 5b94ea18ab9b2346f0f40040da63a6bd1c4402d7
Author: Sascha Steinbiss <sascha at steinbiss.name>
Date:   Thu Dec 17 15:29:00 2015 +0000

    Imported Upstream version 3.5.7+dfsg
---
 .gitignore                                         |   36 +
 .travis.yml                                        |   29 +
 AUTHORS                                            |    2 +
 GPL-LICENSE                                        |  680 ++++++
 README.md                                          |   92 +
 bin/create_pan_genome                              |   19 +
 bin/create_pan_genome_plots.R                      |   59 +
 bin/extract_proteome_from_gff                      |   19 +
 bin/iterative_cdhit                                |   19 +
 bin/pan_genome_assembly_statistics                 |   19 +
 bin/pan_genome_core_alignment                      |   19 +
 bin/pan_genome_post_analysis                       |   19 +
 bin/pan_genome_reorder_spreadsheet                 |   19 +
 bin/parallel_all_against_all_blastp                |   19 +
 bin/protein_alignment_from_nucleotides             |   19 +
 bin/query_pan_genome                               |   19 +
 bin/roary                                          |   19 +
 bin/transfer_annotation_to_groups                  |   19 +
 deployment_process                                 |    7 +
 dist.ini                                           |   32 +
 install_dependencies.sh                            |  248 +++
 lib/Bio/Roary.pm                                   |  146 ++
 lib/Bio/Roary/AccessoryBinaryFasta.pm              |  100 +
 lib/Bio/Roary/AccessoryClustering.pm               |   95 +
 lib/Bio/Roary/AnalyseGroups.pm                     |  119 ++
 lib/Bio/Roary/AnnotateGroups.pm                    |  356 ++++
 lib/Bio/Roary/AssemblyStatistics.pm                |  207 ++
 lib/Bio/Roary/BedFromGFFRole.pm                    |   71 +
 lib/Bio/Roary/ChunkFastaFile.pm                    |   77 +
 lib/Bio/Roary/ClustersRole.pm                      |   70 +
 lib/Bio/Roary/CombinedProteome.pm                  |   51 +
 lib/Bio/Roary/CommandLine/AssemblyStatistics.pm    |  134 ++
 lib/Bio/Roary/CommandLine/Common.pm                |   57 +
 lib/Bio/Roary/CommandLine/CreatePanGenome.pm       |   72 +
 .../Roary/CommandLine/ExtractProteomeFromGff.pm    |  121 ++
 .../CommandLine/GeneAlignmentFromNucleotides.pm    |  134 ++
 lib/Bio/Roary/CommandLine/IterativeCdhit.pm        |  119 ++
 .../CommandLine/ParallelAllAgainstAllBlastp.pm     |  141 ++
 lib/Bio/Roary/CommandLine/QueryRoary.pm            |  253 +++
 lib/Bio/Roary/CommandLine/Roary.pm                 |  356 ++++
 lib/Bio/Roary/CommandLine/RoaryCoreAlignment.pm    |  143 ++
 lib/Bio/Roary/CommandLine/RoaryPostAnalysis.pm     |  237 +++
 .../Roary/CommandLine/RoaryReorderSpreadsheet.pm   |  100 +
 .../CommandLine/TransferAnnotationToGroups.pm      |  108 +
 lib/Bio/Roary/ContigsToGeneIDsFromGFF.pm           |  145 ++
 lib/Bio/Roary/Exceptions.pm                        |   16 +
 lib/Bio/Roary/External/Blastp.pm                   |   68 +
 lib/Bio/Roary/External/Cdhit.pm                    |  103 +
 lib/Bio/Roary/External/CheckTools.pm               |  190 ++
 lib/Bio/Roary/External/Fasttree.pm                 |   73 +
 .../Roary/External/GeneAlignmentFromNucleotides.pm |  138 ++
 lib/Bio/Roary/External/IterativeCdhit.pm           |   92 +
 lib/Bio/Roary/External/Mafft.pm                    |   76 +
 lib/Bio/Roary/External/Makeblastdb.pm              |   72 +
 lib/Bio/Roary/External/Mcl.pm                      |  106 +
 lib/Bio/Roary/External/PostAnalysis.pm             |  187 ++
 lib/Bio/Roary/External/Prank.pm                    |   77 +
 lib/Bio/Roary/ExtractCoreGenesFromSpreadsheet.pm   |  170 ++
 lib/Bio/Roary/ExtractProteomeFromGFF.pm            |  197 ++
 lib/Bio/Roary/ExtractProteomeFromGFFs.pm           |   80 +
 lib/Bio/Roary/FilterFullClusters.pm                |  144 ++
 lib/Bio/Roary/FilterUnknownsFromFasta.pm           |   88 +
 lib/Bio/Roary/GeneNamesFromGFF.pm                  |   73 +
 lib/Bio/Roary/GroupLabels.pm                       |   62 +
 lib/Bio/Roary/GroupStatistics.pm                   |  242 +++
 lib/Bio/Roary/InflateClusters.pm                   |  109 +
 lib/Bio/Roary/IterativeCdhit.pm                    |  114 +
 lib/Bio/Roary/JobRunner/Local.pm                   |   64 +
 lib/Bio/Roary/JobRunner/Parallel.pm                |   69 +
 lib/Bio/Roary/JobRunner/Role.pm                    |   65 +
 lib/Bio/Roary/LookupGeneFiles.pm                   |   53 +
 lib/Bio/Roary/MergeMultifastaAlignments.pm         |  121 ++
 lib/Bio/Roary/OrderGenes.pm                        |  391 ++++
 lib/Bio/Roary/Output/BlastIdentityFrequency.pm     |   62 +
 .../Output/CoreGeneAlignmentCoordinatesEMBL.pm     |   81 +
 lib/Bio/Roary/Output/DifferenceBetweenSets.pm      |  126 ++
 lib/Bio/Roary/Output/EMBLHeaderCommon.pm           |   41 +
 lib/Bio/Roary/Output/EmblGroups.pm                 |  246 +++
 lib/Bio/Roary/Output/GroupMultifasta.pm            |   69 +
 lib/Bio/Roary/Output/GroupsMultifastaNucleotide.pm |  166 ++
 lib/Bio/Roary/Output/GroupsMultifastaProtein.pm    |   68 +
 lib/Bio/Roary/Output/GroupsMultifastas.pm          |   46 +
 .../Roary/Output/GroupsMultifastasNucleotide.pm    |   86 +
 lib/Bio/Roary/Output/NumberOfGroups.pm             |  121 ++
 lib/Bio/Roary/Output/QueryGroups.pm                |  139 ++
 lib/Bio/Roary/ParallelAllAgainstAllBlast.pm        |  143 ++
 lib/Bio/Roary/ParseGFFAnnotationRole.pm            |   32 +
 lib/Bio/Roary/PostAnalysis.pm                      |  353 ++++
 lib/Bio/Roary/PrepareInputFiles.pm                 |  116 ++
 lib/Bio/Roary/PresenceAbsenceMatrix.pm             |   84 +
 lib/Bio/Roary/QC/Report.pm                         |  241 +++
 lib/Bio/Roary/ReformatInputGFFs.pm                 |  162 ++
 lib/Bio/Roary/ReorderSpreadsheet.pm                |  133 ++
 lib/Bio/Roary/SampleOrder.pm                       |   50 +
 lib/Bio/Roary/SequenceLengths.pm                   |   43 +
 lib/Bio/Roary/SortFasta.pm                         |  127 ++
 lib/Bio/Roary/SplitGroups.pm                       |  324 +++
 lib/Bio/Roary/SpreadsheetRole.pm                   |   66 +
 t/00_requires_external.t                           |   20 +
 t/Bio/Roary/AccessoryBinaryFasta.t                 |   72 +
 t/Bio/Roary/AccessoryClustering.t                  |  104 +
 t/Bio/Roary/AnalyseGroups.t                        |   70 +
 t/Bio/Roary/AnnotateGroups.t                       |  102 +
 t/Bio/Roary/AssemblyStatistics.t                   |  121 ++
 t/Bio/Roary/ChunkFastaFile.t                       |   41 +
 t/Bio/Roary/CombinedProteome.t                     |   39 +
 t/Bio/Roary/CommandLine/ExtractProteomeFromGff.t   |   28 +
 .../CommandLine/GeneAlignmentFromNucleotides.t     |   41 +
 .../CommandLine/ParallelAllAgainstAllBlastp.t      |   29 +
 t/Bio/Roary/CommandLine/QueryRoary.t               |   71 +
 t/Bio/Roary/CommandLine/Roary.t                    |  322 +++
 t/Bio/Roary/CommandLine/RoaryCoreAlignment.t       |   27 +
 t/Bio/Roary/CommandLine/RoaryPostAnalysis.t        |  108 +
 t/Bio/Roary/CommandLine/RoaryReorderSpreadsheet.t  |   52 +
 .../Roary/CommandLine/TransferAnnotationToGroups.t |   26 +
 t/Bio/Roary/ContigsToGeneIDsFromGFF.t              |   76 +
 t/Bio/Roary/EmblGroups.t                           |   47 +
 t/Bio/Roary/External/Blastp.t                      |   44 +
 t/Bio/Roary/External/Cdhit.t                       |   39 +
 t/Bio/Roary/External/CheckTools.t                  |   23 +
 t/Bio/Roary/External/Mafft.t                       |   43 +
 t/Bio/Roary/External/Makeblastdb.t                 |   32 +
 t/Bio/Roary/External/Mcl.t                         |   54 +
 t/Bio/Roary/External/Prank.t                       |   44 +
 t/Bio/Roary/ExtractCoreGenesFromSpreadsheet.t      |   33 +
 t/Bio/Roary/ExtractProteomeFromGFFs.t              |   82 +
 t/Bio/Roary/FilterFullClusters.t                   |   36 +
 t/Bio/Roary/GeneNamesFromGFF.t                     |   94 +
 t/Bio/Roary/GroupLabels.t                          |   25 +
 t/Bio/Roary/GroupStatistics.t                      |   79 +
 t/Bio/Roary/InflateClusters.t                      |   40 +
 t/Bio/Roary/OrderGenes.t                           |  148 ++
 .../Output/CoreGeneAlignmentCoorindatesEMBL.t      |   42 +
 t/Bio/Roary/Output/DifferenceBetweenSets.t         |   38 +
 t/Bio/Roary/Output/GroupsMultifastaProtein.t       |   26 +
 t/Bio/Roary/Output/GroupsMultifastas.t             |   45 +
 t/Bio/Roary/Output/GroupsMultifastasNucleotide.t   |   91 +
 t/Bio/Roary/Output/NumberOfGroups.t                |   71 +
 t/Bio/Roary/Output/QueryGroups.t                   |   73 +
 t/Bio/Roary/ParallelAllAgainstAllBlast.t           |   30 +
 t/Bio/Roary/PrepareInputFiles.t                    |   56 +
 t/Bio/Roary/PresenceAbsenceMatrix.t                |   97 +
 t/Bio/Roary/QC/Report.t                            |  100 +
 t/Bio/Roary/ReformatInputGFFs.t                    |   69 +
 t/Bio/Roary/ReorderSpreadsheet.t                   |   36 +
 t/Bio/Roary/SampleOrder.t                          |  164 ++
 t/Bio/Roary/SequenceLengths.t                      |   34 +
 t/Bio/Roary/SortFasta.t                            |   58 +
 t/Bio/Roary/SplitGroups.t                          |   70 +
 t/bin/dummy_blastp                                 |   16 +
 t/bin/dummy_cd-hit                                 |    5 +
 t/bin/dummy_makeblastdb                            |   18 +
 t/bin/dummy_mcl                                    |   12 +
 t/bin/dummy_mcxdeblast                             |    2 +
 t/bin/dummy_segmasker                              |   15 +
 t/data/accessory_graphs/core_deletion              |    8 +
 t/data/accessory_graphs/core_island                |    8 +
 t/data/accessory_graphs/file_1.fa                  |   54 +
 t/data/accessory_graphs/file_1.gff                 |   36 +
 t/data/accessory_graphs/file_2.fa                  |   36 +
 t/data/accessory_graphs/file_2.gff                 |   27 +
 t/data/accessory_graphs/file_3.fa                  |   36 +
 t/data/accessory_graphs/file_3.gff                 |   27 +
 t/data/accessory_graphs/no_accessory               |    3 +
 t/data/accessory_graphs/one_branch                 |    5 +
 t/data/accessory_graphs/one_bubble                 |    6 +
 t/data/accessory_graphs/single_gene_contig         |    1 +
 t/data/accessory_graphs/two_graphs                 |    4 +
 t/data/blast_results                               |   13 +
 t/data/clustered_proteins                          |   12 +
 t/data/clustered_proteins_pan_genome               |   21 +
 t/data/clustered_proteins_post_analysis            |   13 +
 t/data/clusters_input.fa                           |   20 +
 t/data/clusters_to_inflate                         |   40 +
 t/data/clusters_to_inflate.mcl                     |    4 +
 t/data/clusters_to_inflate_original_input.fa       |   60 +
 t/data/clustersfile                                |   33 +
 t/data/core_alignment.csv                          |    4 +
 t/data/core_alignment/argF.fa.aln                  |    4 +
 t/data/core_alignment/hly.fa.aln                   |    4 +
 t/data/core_alignment/speH.fa.aln                  |    4 +
 t/data/core_alignment_core0.66.csv                 |    4 +
 .../expected_core_gene_alignment.aln               |   45 +
 t/data/core_alignment_gene_lookup/query_1.gff      |   29 +
 t/data/core_alignment_gene_lookup/query_2.gff      |   29 +
 t/data/core_alignment_gene_lookup/query_3.gff      |   29 +
 t/data/core_group_statistics.csv                   |   11 +
 t/data/empty_file                                  |    0
 t/data/example_1.faa                               |   29 +
 t/data/example_2.faa                               |   27 +
 t/data/example_3.faa                               |   43 +
 t/data/example_annotation.gff                      |  271 +++
 .../example_annotation.gff.proteome.faa.expected   |   75 +
 t/data/example_annotation_2.gff                    |  271 +++
 t/data/example_groups                              |    6 +
 t/data/example_groups_without_labels               |    6 +
 t/data/exp_qc_report.csv                           |    4 +
 t/data/exp_qc_report_real.csv                      |    3 +
 t/data/expected_0.seq                              |    4 +
 t/data/expected_5.seq                              |    6 +
 t/data/expected_accessory_binary_genes.fa          |    8 +
 t/data/expected_accessory_binary_genes_bounded.fa  |    8 +
 t/data/expected_clustered_proteins                 |    9 +
 t/data/expected_clusters_to_inflate                |    4 +
 t/data/expected_combined_proteome.fa               |   56 +
 .../expected_combined_proteome_with_filtering.fa   |   82 +
 t/data/expected_complement_of_groups.gg            |    6 +
 t/data/expected_complement_of_groups_core0.66.gg   |    3 +
 t/data/expected_core_60_summary_statistics.txt     |    5 +
 t/data/expected_core_gene_alignment.aln            |    4 +
 t/data/expected_core_gene_alignment_core0.66.aln   |    6 +
 t/data/expected_create_pan_genome.fa               |   56 +
 t/data/expected_example_annotation_1.faa           |   30 +
 t/data/expected_filtered_original_input.fa         |   48 +
 t/data/expected_g2_g5_pan_genome_reference.fa      |   70 +
 t/data/expected_gene_presence_and_absence.Rtab     |    8 +
 ...ed_gff_set_difference_common_set_statistics.csv |    4 +
 t/data/expected_group_labels                       |    6 +
 t/data/expected_group_statitics.csv                |    8 +
 t/data/expected_group_statitics_missing_genes.csv  |    8 +
 t/data/expected_group_statitics_verbose.csv        |    8 +
 t/data/expected_inflated_results                   |    5 +
 t/data/expected_intersection_of_groups.gg          |    1 +
 t/data/expected_intersection_of_groups_core0.66.gg |    4 +
 t/data/expected_intersection_of_groups_paralogs.gg |    2 +
 t/data/expected_mafft_input.fa.aln                 |   66 +
 ...xpected_mafft_real_data_core_gene_alignment.aln | 2170 ++++++++++++++++++++
 t/data/expected_nnn_at_end.fa                      |    8 +
 t/data/expected_nuc_multifasta.fa.aln              |   90 +
 t/data/expected_nuc_multifasta.faa                 |   48 +
 t/data/expected_nuc_multifasta_mafft.fa.aln        |   90 +
 t/data/expected_number_of_conserved_genes.tab      |   10 +
 t/data/expected_number_of_conserved_genes_0.6.tab  |   10 +
 t/data/expected_number_of_genes_in_pan_genome.tab  |   10 +
 t/data/expected_number_of_new_genes.tab            |   10 +
 t/data/expected_number_of_unique_genes.tab         |   10 +
 t/data/expected_one_gene_presence_and_absence.Rtab |    2 +
 t/data/expected_out_of_order_fasta.fa.sorted.fa    |   10 +
 t/data/expected_output_core_missing_genes.aln      |   10 +
 t/data/expected_output_filtered.fa                 |   18 +
 t/data/expected_output_groups                      |    2 +
 t/data/expected_output_groups_cdhit                |    1 +
 t/data/expected_output_groups_group_2.fa           |   19 +
 t/data/expected_output_groups_group_2_multi.fa     |   19 +
 t/data/expected_output_groups_group_5.fa           |    9 +
 t/data/expected_output_groups_group_5_multi.fa     |    9 +
 t/data/expected_output_merged.aln                  |    8 +
 t/data/expected_output_merged_sparse.aln           |    8 +
 t/data/expected_pan_genome.fa                      |   33 +
 t/data/expected_pan_genome_one_gene_per_fasta.fa   |   33 +
 t/data/expected_pan_genome_reference.fa            | 1466 +++++++++++++
 t/data/expected_prank_input.fa.aln                 |   66 +
 t/data/expected_query_1.fna                        |  252 +++
 t/data/expected_query_2.fna                        |  252 +++
 t/data/expected_real_data_core_gene_alignment.aln  | 2168 +++++++++++++++++++
 t/data/expected_reannotated_groups_file            |    7 +
 t/data/expected_sample_weights_accessory_graph.dot |   15 +
 ...xpected_sample_weights_core_accessory_graph.dot |   22 +
 t/data/expected_set_difference_common_set          |    3 +
 ...pected_set_difference_common_set_statistics.csv |    4 +
 t/data/expected_set_difference_unique_set_one      |    1 +
 ...ed_set_difference_unique_set_one_statistics.csv |    2 +
 t/data/expected_set_difference_unique_set_two      |    3 +
 ...ed_set_difference_unique_set_two_statistics.csv |    4 +
 t/data/expected_some_different_output              |   95 +
 t/data/expected_summary_statistics.txt             |    5 +
 t/data/expected_uneven_sequences.fa                |   18 +
 t/data/expected_union_of_groups.gg                 |    7 +
 t/data/genbank_gbff/genbank1.gff                   |  195 ++
 .../genbank1.gff.proteome.faa.expected             |   49 +
 t/data/genbank_gbff/genbank2.gff                   |  282 +++
 .../genbank2.gff.proteome.faa.expected             |   26 +
 t/data/genbank_gbff/genbank3.gff                   |  282 +++
 .../genbank3.gff.proteome.faa.expected             |   26 +
 .../genbank_gbff/genbank_gene_presence_absence.csv |   12 +
 t/data/gene_category_count.csv                     |    1 +
 t/data/group_1.fa.aln                              |   84 +
 t/data/group_9.fa                                  |   15 +
 t/data/input_accessory_binary.fa                   |   20 +
 t/data/input_block_spreadsheet.csv                 |   52 +
 t/data/kraken_report.txt                           |   19 +
 t/data/kraken_test/database.idx                    |  Bin 0 -> 8208 bytes
 t/data/kraken_test/database.jdb                    |  Bin 0 -> 2872 bytes
 t/data/kraken_test/database.kdb                    |  Bin 0 -> 2872 bytes
 t/data/kraken_test/taxonomy/names.dmp              |   77 +
 t/data/kraken_test/taxonomy/nodes.dmp              |   12 +
 t/data/large_accessory_binary_genes.fa             |   94 +
 t/data/locus_tag_gffs/query_1.gff                  |  271 +++
 .../query_1.gff.proteome.faa.expected              |   75 +
 t/data/locus_tag_gffs/query_2.gff                  |  271 +++
 .../query_2.gff.proteome.faa.expected              |   75 +
 t/data/locus_tag_gffs/query_3.gff                  |  271 +++
 .../query_3.gff.proteome.faa.expected              |   75 +
 t/data/locus_tag_gffs/query_5.gff                  |  271 +++
 .../query_5.gff.proteome.faa.expected              |   75 +
 t/data/mafft_input.fa                              |   59 +
 t/data/mcl_file                                    |    5 +
 t/data/mdoH.fa.aln                                 |   88 +
 t/data/mdoH_mafft.fa.aln                           |   88 +
 t/data/multfasta1.aln                              |    8 +
 t/data/multfasta2.aln                              |    8 +
 t/data/multfasta3.aln                              |    8 +
 t/data/multfasta4.aln                              |    7 +
 t/data/multfasta5.aln                              |    7 +
 t/data/multifasta_files/expected_output.embl       |   19 +
 t/data/nnn_at_end.fa                               |    8 +
 t/data/nnn_at_end.fa.sorted.fa                     |    8 +
 t/data/nuc_multifasta.fa                           |  112 +
 t/data/nuc_to_be_aligned.fa                        |   90 +
 t/data/out_of_order_fasta.fa                       |   10 +
 t/data/out_of_order_fasta.fa.sorted.fa             |   10 +
 t/data/overall_gene_presence_absence.csv           |   22 +
 t/data/pan_genome_sequences/argF.fa                |   36 +
 t/data/pan_genome_sequences/hly.fa                 |   51 +
 t/data/pan_genome_sequences/speH.fa                |   28 +
 t/data/post_analysis/_clustered.clstr              |   55 +
 t/data/post_analysis/_combined_files               |  169 ++
 t/data/post_analysis/_combined_files.groups        |    0
 t/data/post_analysis/_fasta_files                  |    3 +
 t/data/post_analysis/_gff_files                    |    3 +
 t/data/post_analysis/_uninflated_mcl_groups        |    0
 t/data/post_analysis/query_1.gff.proteome.faa      |   75 +
 t/data/post_analysis/query_2.gff.proteome.faa      |   75 +
 t/data/post_analysis/query_6.gff.proteome.faa      |   19 +
 .../post_analysis_expected/accessory.header.embl   |    7 +
 t/data/post_analysis_expected/accessory.tab        |    0
 .../core_accessory.header.embl                     |   55 +
 t/data/post_analysis_expected/core_accessory.tab   |   48 +
 .../gene_presence_absence.csv                      |   14 +
 t/data/prank_input.fa                              |   59 +
 t/data/proteome_with_and_without_descriptions.faa  |   29 +
 t/data/query_1.fa                                  |    8 +
 t/data/query_1.gff                                 |  271 +++
 t/data/query_1_alternative_patterns.gff            |  262 +++
 t/data/query_2.fa                                  |    8 +
 t/data/query_2.gff                                 |  271 +++
 t/data/query_3.fa                                  |    8 +
 t/data/query_3.gff                                 |  271 +++
 t/data/query_4_missing_genes.fa                    |    2 +
 t/data/query_4_missing_genes.gff                   |  256 +++
 t/data/query_5.gff                                 |  271 +++
 t/data/query_6.gff                                 |  271 +++
 t/data/query_groups                                |    7 +
 t/data/query_groups_all_merged                     |    1 +
 t/data/query_groups_missing_genes                  |    7 +
 t/data/query_groups_paralogs                       |    6 +
 t/data/query_groups_reference                      |    6 +
 t/data/raxml.tre                                   |    1 +
 t/data/real_data_1.gff                             | 1641 +++++++++++++++
 t/data/real_data_2.gff                             | 1641 +++++++++++++++
 t/data/real_data_core_gene_alignment.aln           | 1950 ++++++++++++++++++
 .../reformat_input_gffs/expected_fixed_query_2.gff |  220 ++
 .../reformat_input_gffs/expected_fixed_query_3.gff |  220 ++
 t/data/reformat_input_gffs/expected_real_1.gff     |  223 ++
 t/data/reformat_input_gffs/query_1.gff             |  220 ++
 t/data/reformat_input_gffs/query_2.gff             |  220 ++
 t/data/reformat_input_gffs/query_3.gff             |  220 ++
 t/data/reformat_input_gffs/real_1.gff              |  223 ++
 t/data/reorder_isolates.tre                        |    1 +
 t/data/reorder_isolates_expected_output.csv        |    8 +
 ...rder_isolates_expected_output_breadth_alpha.csv |    8 +
 ...r_isolates_expected_output_breadth_creation.csv |    8 +
 ...der_isolates_expected_output_breadth_height.csv |    8 +
 ...r_isolates_expected_output_breadth_revalpha.csv |    8 +
 ...eorder_isolates_expected_output_depth_alpha.csv |    8 +
 ...der_isolates_expected_output_depth_creation.csv |    8 +
 ...order_isolates_expected_output_depth_height.csv |    8 +
 ...der_isolates_expected_output_depth_revalpha.csv |    8 +
 t/data/reorder_isolates_input.csv                  |    8 +
 t/data/sequences_with_unknowns.faa                 |   26 +
 t/data/shred1.gff                                  |    9 +
 t/data/shred1.shred.fa                             |   16 +
 t/data/shred2.gff                                  |    9 +
 t/data/shred2.shred.fa                             |   16 +
 t/data/sopB.fa.aln                                 |   60 +
 t/data/speH.fa.aln                                 |   32 +
 t/data/split_groups/paralog_clusters1              |   13 +
 t/data/split_groups/paralog_clusters2              |   11 +
 t/data/split_groups/paralog_clusters3              |   13 +
 t/data/split_groups/paralog_clusters4              |   14 +
 t/data/split_groups/paralog_exp_clusters1          |   14 +
 t/data/split_groups/paralog_exp_clusters2          |   12 +
 t/data/split_groups/paralog_exp_clusters3          |   15 +
 t/data/split_groups/paralog_exp_clusters4          |   17 +
 t/data/split_groups/paralogs1.fa                   |   50 +
 t/data/split_groups/paralogs2.fa                   |   50 +
 t/data/split_groups/paralogs3.fa                   |   50 +
 t/data/split_pan_genome_sequences/argF.fa          |   36 +
 t/data/split_pan_genome_sequences/different.fa     |   38 +
 t/data/split_pan_genome_sequences/hly.fa           |   34 +
 .../reannotated_groups_file                        |    4 +
 t/data/split_pan_genome_sequences/speH.fa          |   28 +
 t/data/uneven_sequences.fa                         |   18 +
 t/data/uneven_sequences.fa.sorted.fa               |   18 +
 t/data/variable_core/gene_1.fa.aln                 |   10 +
 t/data/variable_core/gene_2.fa.aln                 |    8 +
 t/data/variable_core/gene_3.fa.aln                 |    8 +
 t/data/variable_core/gene_4.fa.aln                 |    4 +
 t/data/variable_core/gene_5.fa.aln                 |   10 +
 t/dummy_blastp                                     |    3 +
 t/dummy_cd-hit                                     |    5 +
 t/dummy_makeblastdb                                |    5 +
 t/lib/TestHelper.pm                                |  323 +++
 403 files changed, 37165 insertions(+)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5a1d3b0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,36 @@
+.DS_Store
+blib/
+.build/
+_build/
+cover_db/
+inc/
+Build
+!Build/
+Build.bat
+.last_cover_stats
+Makefile
+Makefile.old
+MANIFEST.bak
+META.yml
+MYMETA.yml
+nytprof.out
+pm_to_blib
+_clustered
+_clustered.bak.clstr
+example_1.faa.tmp.filtered.fa
+example_2.faa.tmp.filtered.fa
+pan_genome.fa
+query_1.fa.tmp.filtered.fa
+query_2.fa.tmp.filtered.fa
+query_3.fa.tmp.filtered.fa
+accessory.header.embl
+blast_identity_frequency.Rtab
+core_accessory.header.embl
+reannotated_groups_file
+set_difference_common_set_reannotated
+set_difference_unique_set_one_reannotated
+set_difference_unique_set_two_reannotated
+extras/
+bin/shred_assemblies
+Bio-Roary-*
+Roary-*
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..e7d60b1
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,29 @@
+language: perl
+sudo: false
+addons:
+  apt:
+    packages:
+    - libssl-dev
+    - mafft
+cache:
+  directories:
+  - build/parallel-20150522
+  - build/parallel-20141022
+  - build/parallel-20130922
+  - build/bedtools2
+  - build/cd-hit-v4.6.3-2015-0515
+  - build/prank-msa-master
+  - build/ncbi-blast-2.2.30+
+  - build/mcl-14-137
+  - build/fasttree
+perl:
+  - "5.10"
+  - "5.14"
+  - "5.20"
+env:
+  - PARALLEL_VERSION=20150522
+  - PARALLEL_VERSION=20141022
+  - PARALLEL_VERSION=20130922
+install:
+  - "source ./install_dependencies.sh"
+script: "dzil test"
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..f9d4e15
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,2 @@
+Andrew J. Page (ap13 at sanger.ac.uk)
+Carla A. Cummins (cc21 at sanger.ac.uk)
diff --git a/GPL-LICENSE b/GPL-LICENSE
new file mode 100644
index 0000000..3f03f8b
--- /dev/null
+++ b/GPL-LICENSE
@@ -0,0 +1,680 @@
+This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
+
+This is free software, licensed under:
+
+  The GNU General Public License, Version 3, June 2007
+
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..6c3ece6
--- /dev/null
+++ b/README.md
@@ -0,0 +1,92 @@
+#Roary the pan genome pipeline
+For instructions on how to use the software, the input format and output formats, please see [the Roary website](http://sanger-pathogens.github.io/Roary).
+
+[![Build Status](https://travis-ci.org/sanger-pathogens/Roary.svg?branch=master)](https://travis-ci.org/sanger-pathogens/Roary)
+
+Roary is a high speed stand alone pan genome pipeline, which takes annotated assemblies in GFF3 format (produced by Prokka) and calculates the pan genome.  Using a standard desktop PC, it can analyse datasets with thousands of samples, something which is computationally infeasible with existing methods, without compromising the quality of the results.  128 samples can be analysed in under 1 hour using 1 GB of RAM and a single processor. To perform this analysis using existing methods wou [...]
+
+##Citation
+    "Roary: Rapid large-scale prokaryote pan genome analysis",
+    Andrew J. Page, Carla A. Cummins, Martin Hunt, Vanessa K. Wong, Sandra Reuter, Matthew T. G. Holden, Maria Fookes, Daniel Falush, Jacqueline A. Keane, Julian Parkhill,
+    Bioinformatics, (2015). doi: http://dx.doi.org/10.1093/bioinformatics/btv421
+[Roary: Rapid large-scale prokaryote pan genome analysis](http://dx.doi.org/10.1093/bioinformatics/btv421)
+
+# Installation
+Theres are a number of dependancies required for Roary, with instructions specific to the type of system you have:
+* Ubuntu/Debian
+* CentOS/RedHat
+* Homebrew/Linuxbrew - OSX/Linux
+* Installing from source - OSX/Linux
+* Virtual Machine - OSX/Linux/Windows
+
+If the installation fails please contact your system administrator. If you encounter a bug please let us know by emailing roary at sanger.ac.uk .
+
+##Ubuntu/Debian
+All the dependancies can be installed using apt and cpanm (tested on Ubuntu 14.04). Root permissions are required.
+
+```
+sudo apt-get install bedtools cd-hit ncbi-blast+ mcl parallel cpanminus prank mafft fasttree
+sudo cpanm -f Bio::Roary
+```   
+
+###Ubuntu 12.04
+Some of the software versions in apt are quite old so follow the instructions for [LinuxBrew](http://brew.sh/linuxbrew/) below.
+
+##CentOS/RedHat
+To install the dependancies, the easiest way is to install [LinuxBrew](http://brew.sh/linuxbrew/) using the steps for Fedora, then follow the steps below for installing Roary on LinuxBrew.
+
+##Homebrew/Linuxbrew - OSX/Linux
+Assuming you have [homebrew](http://brew.sh/) (OSX) or [linuxbrew](http://brew.sh/linuxbrew/) (Linux) setup and installed on your system:
+
+```
+brew tap homebrew/science
+brew install bedtools cd-hit blast mcl parallel prank mafft fasttree cpanm
+sudo cpanm -f Bio::Roary
+```
+
+##Virtual Machine - OSX/Linux/Windows
+Roary wont run natively on Windows but we have created virtual machine which has all of the software setup, including Prokka, along with the test datasets from the paper. It is based on [Bio-Linux 8](http://environmentalomics.org/bio-linux/).  You need to first install [VirtualBox](https://www.virtualbox.org/), then load the virtual machine, using the 'File -> Import Appliance' menu option. The root password is 'manager'.
+
+ftp://ftp.sanger.ac.uk/pub/pathogens/pathogens-vm/pathogens-vm.latest.ova
+
+More importantly though, if your trying to do bioinformatics on Windows, your not going to get very far and you should seriously consider upgrading to Linux.
+
+##Installing from source (advanced Linux users only)
+As a last resort you can install everything from source. This is for users with advanced Linux skills and we do not provide any support with this method since you have the skills to figure things out.
+Download the latest software from (https://github.com/sanger-pathogens/Roary/tarball/master).
+
+Choose somewhere to put it, for example in your home directory (no root access required):
+
+```
+cd $HOME
+tar zxvf sanger-pathogens-Roary-xxxxxx.tar.gz
+ls Roary-*
+```
+
+Add the following lines to your $HOME/.bashrc file, or to /etc/profile.d/roary.sh to make it available to all users:
+
+```
+export PATH=$PATH:$HOME/Roary-x.x.x/bin
+export PERL5LIB=$PERL5LIB:$HOME/Roary-x.x.x/lib
+```
+Install the perl dependancies:
+
+```
+sudo cpanm  Array::Utils Bio::Perl Exception::Class File::Basename File::Copy File::Find::Rule File::Grep File::Path File::Slurper File::Spec File::Temp File::Which FindBin Getopt::Long Graph Graph::Writer::Dot List::Util Log::Log4perl Moose Moose::Role Text::CSV PerlIO::utf8_strict 
+```
+Install the external dependances either from source or from your packaging system:
+```
+bedtools cd-hit blast mcl GNUparallel prank mafft fasttree
+```
+
+## Ancient systems and versions of perl
+The code will not work with perl 5.8 or below (pre-modern perl). If your running a very old verison of Linux, your also in trouble.
+
+#Versions of software we test against
+* Perl 5.10, 5.14, 5.16, 5.18, 5.20
+* cdhit 4.6.1
+* ncbi blast+ 2.2.30
+* mcl 14-137
+* bedtools 2.20.1
+* prank 130410
+* GNU parallel 20130922, 20141022, 20150122
diff --git a/bin/create_pan_genome b/bin/create_pan_genome
new file mode 100755
index 0000000..0382093
--- /dev/null
+++ b/bin/create_pan_genome
@@ -0,0 +1,19 @@
+#!/usr/bin/env perl
+
+package Bio::Roary::Main::CreatePanGenome;
+
+# ABSTRACT: Create a pan genome from a set of GFF files with WTSI defaults
+# PODNAME: create_pan_genome
+
+=head1 SYNOPSIS
+
+Create a pan genome from a set of GFF files with WTSI defaults
+
+=cut
+
+use Cwd qw(abs_path); 
+BEGIN { unshift( @INC, abs_path('./lib') ) }
+BEGIN { unshift( @INC, abs_path('./t/lib') ) }
+use Bio::Roary::CommandLine::CreatePanGenome;
+
+Bio::Roary::CommandLine::CreatePanGenome->new(args => \@ARGV, script_name => $0)->run;
diff --git a/bin/create_pan_genome_plots.R b/bin/create_pan_genome_plots.R
new file mode 100755
index 0000000..8bf392e
--- /dev/null
+++ b/bin/create_pan_genome_plots.R
@@ -0,0 +1,59 @@
+#!/usr/bin/env Rscript
+# ABSTRACT: Create R plots
+# PODNAME: create_plots.R
+# Take the output files from the pan genome pipeline and create nice plots.
+library(ggplot2)
+
+
+mydata = read.table("number_of_new_genes.Rtab")
+boxplot(mydata, data=mydata, main="Number of new genes",
+         xlab="No. of genomes", ylab="No. of genes",varwidth=TRUE, ylim=c(0,max(mydata)), outline=FALSE)
+
+mydata = read.table("number_of_conserved_genes.Rtab")
+boxplot(mydata, data=mydata, main="Number of conserved genes",
+          xlab="No. of genomes", ylab="No. of genes",varwidth=TRUE, ylim=c(0,max(mydata)), outline=FALSE)
+ 
+mydata = read.table("number_of_genes_in_pan_genome.Rtab")
+boxplot(mydata, data=mydata, main="No. of genes in the pan-genome",
+          xlab="No. of genomes", ylab="No. of genes",varwidth=TRUE, ylim=c(0,max(mydata)), outline=FALSE)
+
+mydata = read.table("number_of_unique_genes.Rtab")
+boxplot(mydata, data=mydata, main="Number of unique genes",
+         xlab="No. of genomes", ylab="No. of genes",varwidth=TRUE, ylim=c(0,max(mydata)), outline=FALSE)
+
+mydata = read.table("blast_identity_frequency.Rtab")
+plot(mydata,main="Number of blastp hits with different percentage identity",  xlab="Blast percentage identity", ylab="No. blast results")
+
+
+library(ggplot2)
+conserved = colMeans(read.table("number_of_conserved_genes.Rtab"))
+total = colMeans(read.table("number_of_genes_in_pan_genome.Rtab"))
+
+genes = data.frame( genes_to_genomes = c(conserved,total),
+                    genomes = c(c(1:length(conserved)),c(1:length(conserved))),
+                    Key = c(rep("Conserved genes",length(conserved)), rep("Total genes",length(total))) )
+                    
+ggplot(data = genes, aes(x = genomes, y = genes_to_genomes, group = Key, linetype=Key)) +geom_line()+
+theme_classic() +
+ylim(c(1,max(total)))+
+xlim(c(1,length(total)))+
+xlab("No. of genomes") +
+ylab("No. of genes")+ theme_bw(base_size = 16) +  theme(legend.justification=c(0,1),legend.position=c(0,1))+
+ggsave(filename="conserved_vs_total_genes.png", scale=1)
+
+######################
+
+unique_genes = colMeans(read.table("number_of_unique_genes.Rtab"))
+new_genes = colMeans(read.table("number_of_new_genes.Rtab"))
+
+genes = data.frame( genes_to_genomes = c(unique_genes,new_genes),
+                    genomes = c(c(1:length(unique_genes)),c(1:length(unique_genes))),
+                    Key = c(rep("Unique genes",length(unique_genes)), rep("New genes",length(new_genes))) )
+                    
+ggplot(data = genes, aes(x = genomes, y = genes_to_genomes, group = Key, linetype=Key)) +geom_line()+
+theme_classic() +
+ylim(c(1,max(unique_genes)))+
+xlim(c(1,length(unique_genes)))+
+xlab("No. of genomes") +
+ylab("No. of genes")+ theme_bw(base_size = 16) +  theme(legend.justification=c(1,1),legend.position=c(1,1))+
+ggsave(filename="unique_vs_new_genes.png", scale=1)
diff --git a/bin/extract_proteome_from_gff b/bin/extract_proteome_from_gff
new file mode 100755
index 0000000..54e9897
--- /dev/null
+++ b/bin/extract_proteome_from_gff
@@ -0,0 +1,19 @@
+#!/usr/bin/env perl
+
+package Bio::Roary::Main::ExtractProteomeFromGFF;
+
+# ABSTRACT: Take in GFF files and output the proteome
+# PODNAME: extract_proteome_from_gff
+
+=head1 SYNOPSIS
+
+Take in GFF files and output the proteome
+
+=cut
+
+use Cwd qw(abs_path); 
+BEGIN { unshift( @INC, abs_path('./lib') ) }
+BEGIN { unshift( @INC, abs_path('./t/lib') ) }
+use Bio::Roary::CommandLine::ExtractProteomeFromGff;
+
+Bio::Roary::CommandLine::ExtractProteomeFromGff->new(args => \@ARGV, script_name => $0)->run;
diff --git a/bin/iterative_cdhit b/bin/iterative_cdhit
new file mode 100755
index 0000000..b90e31a
--- /dev/null
+++ b/bin/iterative_cdhit
@@ -0,0 +1,19 @@
+#!/usr/bin/env perl
+
+package Bio::Roary::Main::IterativeCdhit;
+
+# ABSTRACT: Iteratively run cdhit
+# PODNAME:  iterative_cdhit
+
+=head1 SYNOPSIS
+
+Iteratively run cdhit
+
+=cut
+
+use Cwd qw(abs_path); 
+BEGIN { unshift( @INC, abs_path('./lib') ) }
+BEGIN { unshift( @INC, abs_path('./t/lib') ) }
+use Bio::Roary::CommandLine::IterativeCdhit;
+
+Bio::Roary::CommandLine::IterativeCdhit->new(args => \@ARGV, script_name => $0)->run;
diff --git a/bin/pan_genome_assembly_statistics b/bin/pan_genome_assembly_statistics
new file mode 100755
index 0000000..5960b3e
--- /dev/null
+++ b/bin/pan_genome_assembly_statistics
@@ -0,0 +1,19 @@
+#!/usr/bin/env perl
+
+package Bio::Roary::Main::AssemblyStatistics;
+
+# ABSTRACT: Given a spreadsheet of gene presence and absense calculate some statistics
+# PODNAME: pan_genome_assembly_statistics
+
+=head1 SYNOPSIS
+
+Given a spreadsheet of gene presence and absense calculate some statistics
+
+=cut
+
+use Cwd qw(abs_path); 
+BEGIN { unshift( @INC, abs_path('./lib') ) }
+BEGIN { unshift( @INC, abs_path('./t/lib') ) }
+use Bio::Roary::CommandLine::AssemblyStatistics;
+
+Bio::Roary::CommandLine::AssemblyStatistics->new(args => \@ARGV, script_name => $0)->run;
diff --git a/bin/pan_genome_core_alignment b/bin/pan_genome_core_alignment
new file mode 100755
index 0000000..5370331
--- /dev/null
+++ b/bin/pan_genome_core_alignment
@@ -0,0 +1,19 @@
+#!/usr/bin/env perl
+
+package Bio::Roary::Main::RoaryCoreAlignment;
+
+# ABSTRACT: Take in the group statistics spreadsheet and the location of the gene multifasta files and create a core alignment.
+# PODNAME: pan_genome_core_alignment
+
+=head1 SYNOPSIS
+
+ Take in the group statistics spreadsheet and the location of the gene multifasta files and create a core alignment.
+
+=cut
+
+use Cwd qw(abs_path); 
+BEGIN { unshift( @INC, abs_path('./lib') ) }
+BEGIN { unshift( @INC, abs_path('./t/lib') ) }
+use Bio::Roary::CommandLine::RoaryCoreAlignment;
+
+Bio::Roary::CommandLine::RoaryCoreAlignment->new(args => \@ARGV, script_name => $0)->run;
diff --git a/bin/pan_genome_post_analysis b/bin/pan_genome_post_analysis
new file mode 100755
index 0000000..7fceb3f
--- /dev/null
+++ b/bin/pan_genome_post_analysis
@@ -0,0 +1,19 @@
+#!/usr/bin/env perl
+
+package Bio::Roary::Main::RoaryPostAnalysis;
+
+# ABSTRACT: Perform the post analysis on the pan genome
+# PODNAME: pan_genome_post_analysis
+
+=head1 SYNOPSIS
+
+Perform the post analysis on the pan genome
+
+=cut
+
+use Cwd qw(abs_path); 
+BEGIN { unshift( @INC, abs_path('./lib') ) }
+BEGIN { unshift( @INC, abs_path('./t/lib') ) }
+use Bio::Roary::CommandLine::RoaryPostAnalysis;
+
+Bio::Roary::CommandLine::RoaryPostAnalysis->new(args => \@ARGV, script_name => $0)->run;
diff --git a/bin/pan_genome_reorder_spreadsheet b/bin/pan_genome_reorder_spreadsheet
new file mode 100755
index 0000000..f436248
--- /dev/null
+++ b/bin/pan_genome_reorder_spreadsheet
@@ -0,0 +1,19 @@
+#!/usr/bin/env perl
+
+package Bio::Roary::Main::RoaryReorderSpreadsheet;
+
+# ABSTRACT: Take in a tree and a spreadsheet and output a reordered spreadsheet
+# PODNAME: pan_genome_reorder_spreadsheet
+
+=head1 SYNOPSIS
+
+Take in a tree and a spreadsheet and output a reordered spreadsheet
+
+=cut
+
+use Cwd qw(abs_path); 
+BEGIN { unshift( @INC, abs_path('./lib') ) }
+BEGIN { unshift( @INC, abs_path('./t/lib') ) }
+use Bio::Roary::CommandLine::RoaryReorderSpreadsheet;
+
+Bio::Roary::CommandLine::RoaryReorderSpreadsheet->new(args => \@ARGV, script_name => $0)->run;
diff --git a/bin/parallel_all_against_all_blastp b/bin/parallel_all_against_all_blastp
new file mode 100755
index 0000000..50cebc9
--- /dev/null
+++ b/bin/parallel_all_against_all_blastp
@@ -0,0 +1,19 @@
+#!/usr/bin/env perl
+
+package Bio::Roary::Main::ParallelAllAgainstAllBlastp;
+
+# ABSTRACT: Take in a FASTA file of proteins and blast against itself
+# PODNAME: parallel_all_against_all_blastp
+
+=head1 SYNOPSIS
+
+Take in a FASTA file of proteins and blast against itself
+
+=cut
+
+use Cwd qw(abs_path); 
+BEGIN { unshift( @INC, abs_path('./lib') ) }
+BEGIN { unshift( @INC, abs_path('./t/lib') ) }
+use Bio::Roary::CommandLine::ParallelAllAgainstAllBlastp;
+
+Bio::Roary::CommandLine::ParallelAllAgainstAllBlastp->new(args => \@ARGV, script_name => $0)->run;
diff --git a/bin/protein_alignment_from_nucleotides b/bin/protein_alignment_from_nucleotides
new file mode 100755
index 0000000..b1f86f8
--- /dev/null
+++ b/bin/protein_alignment_from_nucleotides
@@ -0,0 +1,19 @@
+#!/usr/bin/env perl
+
+package Bio::Roary::Main::GeneAlignmentFromNucleotides;
+
+# ABSTRACT: Take in multi-FASTA files of nucleotides and align each file with PRANK or MAFFT
+# PODNAME: protein_alignment_from_nucleotides
+
+=head1 SYNOPSIS
+
+Take in multi-FASTA files of nucleotides and align each file with PRANK or MAFFT
+
+=cut
+
+use Cwd qw(abs_path); 
+BEGIN { unshift( @INC, abs_path('./lib') ) }
+BEGIN { unshift( @INC, abs_path('./t/lib') ) }
+use Bio::Roary::CommandLine::GeneAlignmentFromNucleotides;
+
+Bio::Roary::CommandLine::GeneAlignmentFromNucleotides->new(args => \@ARGV, script_name => $0)->run;
diff --git a/bin/query_pan_genome b/bin/query_pan_genome
new file mode 100755
index 0000000..0e62826
--- /dev/null
+++ b/bin/query_pan_genome
@@ -0,0 +1,19 @@
+#!/usr/bin/env perl
+
+package Bio::Roary::Main::QueryRoary;
+
+# ABSTRACT: Take in a groups file and the protein fasta files and output selected data
+# PODNAME: query_pan_genome
+
+=head1 SYNOPSIS
+
+Take in a groups file and the protein fasta files and output selected data
+
+=cut
+
+use Cwd qw(abs_path); 
+BEGIN { unshift( @INC, abs_path('./lib') ) }
+BEGIN { unshift( @INC, abs_path('./t/lib') ) }
+use Bio::Roary::CommandLine::QueryRoary;
+
+Bio::Roary::CommandLine::QueryRoary->new(args => \@ARGV, script_name => $0)->run;
diff --git a/bin/roary b/bin/roary
new file mode 100755
index 0000000..774341e
--- /dev/null
+++ b/bin/roary
@@ -0,0 +1,19 @@
+#!/usr/bin/env perl
+
+package Bio::Roary::Main::Roary;
+
+# ABSTRACT: Create a pan genome from a set of GFF files
+# PODNAME: roary
+
+=head1 SYNOPSIS
+
+Create a pan genome from a set of GFF files
+
+=cut
+
+use Cwd qw(abs_path); 
+BEGIN { unshift( @INC, abs_path('./lib') ) }
+BEGIN { unshift( @INC, abs_path('./t/lib') ) }
+use Bio::Roary::CommandLine::Roary;
+
+Bio::Roary::CommandLine::Roary->new(args => \@ARGV, script_name => $0)->run;
diff --git a/bin/transfer_annotation_to_groups b/bin/transfer_annotation_to_groups
new file mode 100755
index 0000000..06a485e
--- /dev/null
+++ b/bin/transfer_annotation_to_groups
@@ -0,0 +1,19 @@
+#!/usr/bin/env perl
+
+package Bio::Roary::Main::TransferAnnotationToGroups;
+
+# ABSTRACT: Take in a groups file and a set of GFF files and transfer the consensus annotation
+# PODNAME: transfer_annotation_to_groups
+
+=head1 SYNOPSIS
+
+Take in a groups file and a set of GFF files and transfer the consensus annotation
+
+=cut
+
+use Cwd qw(abs_path); 
+BEGIN { unshift( @INC, abs_path('./lib') ) }
+BEGIN { unshift( @INC, abs_path('./t/lib') ) }
+use Bio::Roary::CommandLine::TransferAnnotationToGroups;
+
+Bio::Roary::CommandLine::TransferAnnotationToGroups->new(args => \@ARGV, script_name => $0)->run;
diff --git a/deployment_process b/deployment_process
new file mode 100644
index 0000000..3eccc71
--- /dev/null
+++ b/deployment_process
@@ -0,0 +1,7 @@
+# Releasing a new version
+
+Update the version number in dist.ini
+Commit all changes.
+Tag with the current version.
+Push all changes, and on github.com submit,accepted pull request
+dzil release
diff --git a/dist.ini b/dist.ini
new file mode 100644
index 0000000..e1942c2
--- /dev/null
+++ b/dist.ini
@@ -0,0 +1,32 @@
+name    = Bio-Roary
+version = 3.5.7
+author  = Andrew J. Page <ap13 at sanger.ac.uk>
+license = GPL_3
+copyright_holder = Wellcome Trust Sanger Institute
+copyright_year   = 2013
+main_module = lib/Bio/Roary.pm
+
+[MetaResources]
+homepage        = http://www.sanger.ac.uk/
+repository.web  = http://sanger-pathogens.github.io/Roary/
+repository.url  = https://github.com/sanger-pathogens/Roary.git
+repository.type = git
+
+[@Basic]
+[PruneCruft]
+[ExtraTests]
+[AutoPrereqs]
+[PodWeaver]
+[PkgVersion]
+
+[Prereqs]
+PerlIO::utf8_strict   = 0
+
+[Encoding]
+filename = t/data/kraken_test/database.idx
+filename = t/data/kraken_test/database.jdb
+filename = t/data/kraken_test/database.kdb
+filename = t/data/kraken_test/taxonomy/names.dmp
+filename = t/data/kraken_test/taxonomy/nodes.dmp
+
+encoding = bytes
diff --git a/install_dependencies.sh b/install_dependencies.sh
new file mode 100755
index 0000000..1dc2914
--- /dev/null
+++ b/install_dependencies.sh
@@ -0,0 +1,248 @@
+#!/bin/bash
+
+set -x
+set -eu
+
+start_dir=$(pwd)
+ROARY_LIB_DIR="${start_dir}/lib"
+ROARY_BIN_DIR="${start_dir}/bin"
+
+PARALLEL_VERSION=${PARALLEL_VERSION:-"20150522"}
+PARALLEL_DOWNLOAD_FILENAME="parallel-${PARALLEL_VERSION}.tar.bz2" 
+PARALLEL_URL="http://ftp.gnu.org/gnu/parallel/${PARALLEL_DOWNLOAD_FILENAME}"
+
+BEDTOOLS_VERSION="2.24.0"
+BEDTOOLS_DOWNLOAD_FILENAME="bedtools-${BEDTOOLS_VERSION}.tar.gz"
+BEDTOOLS_URL="https://github.com/arq5x/bedtools2/releases/download/v${BEDTOOLS_VERSION}/${BEDTOOLS_DOWNLOAD_FILENAME}"
+
+CDHIT_SHORT_VERSION="4.6.3"
+CDHIT_LONG_VERSION="4.6.3-2015-0515"
+CDHIT_DOWNLOAD_FILENAME="cd-hit-${CDHIT_SHORT_VERSION}.tar.gz"
+CDHIT_URL="https://github.com/weizhongli/cdhit/releases/download/V${CDHIT_SHORT_VERSION}/cd-hit-v${CDHIT_LONG_VERSION}.tar.gz"
+
+PRANK_VERSION="0.140603"
+PRANK_DOWNLOAD_FILENAME="prank-msa-master.tar.gz"
+PRANK_URL="https://github.com/ariloytynoja/prank-msa/archive/master.tar.gz"
+
+BLAST_VERSION="2.2.30"
+BLAST_DOWNLOAD_FILENAME="ncbi-blast-${BLAST_VERSION}+-x64-linux.tar.gz"
+BLAST_URL="ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/${BLAST_VERSION}/${BLAST_DOWNLOAD_FILENAME}"
+
+MCL_VERSION="14-137"
+MCL_DOWNLOAD_FILENAME="mcl-${MCL_VERSION}.tar.gz"
+MCL_URL="http://micans.org/mcl/src/mcl-${MCL_VERSION}.tar.gz"
+
+FASTTREE_VERSION="2.1.8"
+FASTTREE_DOWNLOAD_FILENAME="FastTree-${FASTTREE_VERSION}.c"
+FASTTREE_URL="http://microbesonline.org/fasttree/FastTree-${FASTTREE_VERSION}.c"
+
+MAFFT_VERSION="7.221"
+MAFFT_DOWNLOAD_FILENAME="mafft-${MAFFT_VERSION}-without-extensions-src.tgz"
+MAFFT_URL="http://mafft.cbrc.jp/alignment/software/${MAFFT_DOWNLOAD_FILENAME}"
+
+# Make an install location
+if [ ! -d 'build' ]; then
+  mkdir build
+fi
+cd build
+build_dir=$(pwd)
+
+PARALLEL_DOWNLOAD_PATH="$(pwd)/${PARALLEL_DOWNLOAD_FILENAME}"
+BEDTOOLS_DOWNLOAD_PATH="$(pwd)/${BEDTOOLS_DOWNLOAD_FILENAME}"
+CDHIT_DOWNLOAD_PATH="$(pwd)/${CDHIT_DOWNLOAD_FILENAME}"
+PRANK_DOWNLOAD_PATH="$(pwd)/${PRANK_DOWNLOAD_FILENAME}"
+BLAST_DOWNLOAD_PATH="$(pwd)/${BLAST_DOWNLOAD_FILENAME}"
+MCL_DOWNLOAD_PATH="$(pwd)/${MCL_DOWNLOAD_FILENAME}"
+FASTTREE_DOWNLOAD_PATH="$(pwd)/${FASTTREE_DOWNLOAD_FILENAME}"
+MAFFT_DOWNLOAD_PATH="$(pwd)/${MAFFT_DOWNLOAD_FILENAME}"
+
+PARALLEL_BUILD_DIR="$(pwd)/parallel-${PARALLEL_VERSION}"
+BEDTOOLS_BUILD_DIR="$(pwd)/bedtools2"
+CDHIT_BUILD_DIR="$(pwd)/cd-hit-v${CDHIT_LONG_VERSION}"
+PRANK_BUILD_DIR="$(pwd)/prank-msa-master"
+BLAST_BUILD_DIR="$(pwd)/ncbi-blast-${BLAST_VERSION}+"
+MCL_BUILD_DIR="$(pwd)/mcl-${MCL_VERSION}"
+FASTTREE_BUILD_DIR="$(pwd)/fasttree"
+MAFFT_BUILD_DIR="$(pwd)/mafft-${MAFFT_VERSION}-without-extensions"
+
+
+download () {
+  download_url=$1
+  download_path=$2
+  cd $build_dir
+  if [ -e "$download_path" ]; then
+    echo "Skipping download of $download_url, $download_path already exists"
+  else
+    echo "Downloading $download_url to $download_path"
+    wget $download_url -O $download_path
+    pwd
+  fi
+}
+
+untar () {
+  to_untar=$1
+  expected_directory=$2
+  if [ -d "$expected_directory" ]; then
+    rm -rf $expected_directory
+  fi
+  echo "Untarring $to_untar to $expected_directory"
+  cd $build_dir
+  tar xzvf $to_untar
+  rm $to_untar
+  pwd
+}
+
+if [ -e "$BLAST_BUILD_DIR/bin/blastp" ]; then
+  echo "blast already untarred to $BLAST_BUILD_DIR, skipping"
+else
+  download $BLAST_URL $BLAST_DOWNLOAD_PATH
+  untar $BLAST_DOWNLOAD_PATH $BLAST_BUILD_DIR
+fi
+
+# Build parallel
+if [ -e "$PARALLEL_BUILD_DIR/src/parallel" ]; then
+  echo "Parallel already built, skipping"
+else
+  download $PARALLEL_URL $PARALLEL_DOWNLOAD_PATH
+  echo "Untarring parallel to $PARALLEL_BUILD_DIR"
+  tar xjvf $PARALLEL_DOWNLOAD_PATH
+  echo "Building parallel"
+  cd $PARALLEL_BUILD_DIR
+  ./configure
+  make
+fi
+
+# Build bedtools
+if [ -e "$BEDTOOLS_BUILD_DIR/bin/bedtools" ]; then
+  echo "Bedtools already built, skipping"
+else
+
+  download $BEDTOOLS_URL $BEDTOOLS_DOWNLOAD_PATH
+  untar $BEDTOOLS_DOWNLOAD_PATH $BEDTOOLS_BUILD_DIR
+  cd $BEDTOOLS_BUILD_DIR
+  echo "Building bedtools"
+  ls -alrt
+  make
+fi
+
+# Build cd-hit
+if [ -e "$CDHIT_BUILD_DIR/cd-hit" ]; then
+  echo "cd-hit already built, skipping"
+else
+  download $CDHIT_URL $CDHIT_DOWNLOAD_PATH
+  untar $CDHIT_DOWNLOAD_PATH $CDHIT_BUILD_DIR
+  echo "Building cd-hit"
+  cd $CDHIT_BUILD_DIR
+  make
+fi
+
+# Build prank
+if [ -e "$PRANK_BUILD_DIR/src/prank" ]; then
+  echo "prank already built, skipping"
+else
+  download $PRANK_URL $PRANK_DOWNLOAD_PATH
+  untar $PRANK_DOWNLOAD_PATH $PRANK_BUILD_DIR
+  echo "Building prank"
+  cd $PRANK_BUILD_DIR
+  cd src
+  make
+fi
+
+# Build MCL
+if [ -e "$MCL_BUILD_DIR/src/shmcl/mcl" ]; then
+  echo "MCL already built, skipping"
+else
+  download $MCL_URL $MCL_DOWNLOAD_PATH
+  untar $MCL_DOWNLOAD_PATH $MCL_BUILD_DIR
+  echo "Building MCL"
+  cd $MCL_BUILD_DIR
+  ./configure
+  make
+fi
+
+# Build FastTree
+if [ -e "$FASTTREE_BUILD_DIR/FastTree" ]; then
+  echo "FastTree already built, skipping"
+else
+  download $FASTTREE_URL $FASTTREE_DOWNLOAD_PATH
+  mkdir -p $FASTTREE_BUILD_DIR
+  mv $FASTTREE_DOWNLOAD_FILENAME $FASTTREE_BUILD_DIR
+  cd $FASTTREE_BUILD_DIR
+  echo "Building FastTree"
+  gcc -o FastTree FastTree-${FASTTREE_VERSION}.c -lm
+fi
+
+export MAFFT_INSTALL_DIR="${MAFFT_BUILD_DIR}/build"
+# Build MAFFT
+if [ -e "$MAFFT_BUILD_DIR/build/mafft" ]; then
+  echo "MAFFT already built, skipping"
+else
+  download $MAFFT_URL $MAFFT_DOWNLOAD_PATH
+  untar $MAFFT_DOWNLOAD_PATH $MAFFT_BUILD_DIR
+  echo "Building MAFFT"
+  cd $MAFFT_BUILD_DIR
+  mkdir -p $MAFFT_INSTALL_DIR
+  cd core
+  sed -i '1s!.*!PREFIX = $(MAFFT_INSTALL_DIR)!' Makefile
+  make
+  make install
+fi
+
+
+# Add things to PATH
+update_path () {
+  new_dir=$1
+  if [[ ! "$PATH" =~ (^|:)"${new_dir}"(:|$) ]]; then
+	echo "export PATH=${new_dir}:${PATH}"
+    export PATH=${new_dir}:${PATH}
+  fi
+}
+
+export PATH
+PARALLEL_BIN_DIR="$PARALLEL_BUILD_DIR/src"
+update_path $PARALLEL_BIN_DIR
+BEDTOOLS_BIN_DIR="$BEDTOOLS_BUILD_DIR/bin"
+update_path $BEDTOOLS_BIN_DIR
+CDHIT_BIN_DIR="$CDHIT_BUILD_DIR"
+update_path $CDHIT_BIN_DIR
+PRANK_BIN_DIR="$PRANK_BUILD_DIR/src"
+update_path $PRANK_BIN_DIR
+
+BLAST_BIN_DIR="$BLAST_BUILD_DIR/bin"
+update_path $BLAST_BIN_DIR
+
+MCL_BIN_DIR="$MCL_BUILD_DIR/src/shmcl"
+update_path $MCL_BIN_DIR
+MCL_BIN_DIR_2="$MCL_BUILD_DIR/src/alien/oxygen/src"
+update_path $MCL_BIN_DIR_2
+
+FASTTREE_BIN_DIR=$FASTTREE_BUILD_DIR
+update_path $FASTTREE_BIN_DIR
+MAFFT_BIN_DIR="$MAFFT_INSTALL_DIR/bin"
+update_path $MAFFT_BIN_DIR
+
+update_perl_path () {
+  new_dir=$1
+  PERL5LIB=${PERL5LIB-$new_dir}
+  if [[ ! "$PERL5LIB" =~ (^|:)"${new_dir}"(:|$) ]]; then
+	echo "export PERL5LIB=${new_dir}:${PERL5LIB}"
+    export PERL5LIB=${new_dir}:${PERL5LIB}
+  fi
+}
+
+BEDTOOLS_LIB_DIR="$BEDTOOLS_BUILD_DIR/lib"
+update_perl_path $BEDTOOLS_LIB_DIR
+
+cd $start_dir
+cpanm --notest Dist::Zilla 
+dzil authordeps --missing | cpanm --notest
+dzil listdeps --missing | cpanm --notest
+
+cd $start_dir
+
+echo "Add the following lines to one of these files ~/.bashrc or ~/.bash_profile or ~/.profile"
+echo "export PATH=${ROARY_BIN_DIR}:${PARALLEL_BIN_DIR}:${BEDTOOLS_BIN_DIR}:${CDHIT_BIN_DIR}:${PRANK_BIN_DIR}:${BLAST_BIN_DIR}:${MCL_BIN_DIR}:${MCL_BIN_DIR_2}:${FASTTREE_BIN_DIR}:${MAFFT_BIN_DIR}:${PATH}"
+echo "export PERL5LIB=${ROARY_LIB_DIR}:${BEDTOOLS_LIB_DIR}:${PERL5LIB}"
+
+set +eu
+set +x
diff --git a/lib/Bio/Roary.pm b/lib/Bio/Roary.pm
new file mode 100644
index 0000000..d8b5fa3
--- /dev/null
+++ b/lib/Bio/Roary.pm
@@ -0,0 +1,146 @@
+package Bio::Roary;
+
+# ABSTRACT: Create a pan genome
+
+=head1 SYNOPSIS
+
+Create a pan genome
+
+=cut
+
+use Moose;
+use File::Copy;
+use Bio::Perl;
+use Bio::Roary::ParallelAllAgainstAllBlast;
+use Bio::Roary::CombinedProteome;
+use Bio::Roary::External::Cdhit;
+use Bio::Roary::External::Mcl;
+use Bio::Roary::InflateClusters;
+use Bio::Roary::AnalyseGroups;
+use Bio::Roary::GroupLabels;
+use Bio::Roary::AnnotateGroups;
+use Bio::Roary::GroupStatistics;
+use Bio::Roary::Output::GroupsMultifastasNucleotide;
+use Bio::Roary::External::PostAnalysis;
+use Bio::Roary::FilterFullClusters;
+use Bio::Roary::External::IterativeCdhit;
+use Bio::Roary::Output::BlastIdentityFrequency;
+
+has 'fasta_files'                 => ( is => 'rw', isa => 'ArrayRef', required => 1 );
+has 'input_files'                 => ( is => 'rw', isa => 'ArrayRef', required => 1 );
+has 'output_filename'             => ( is => 'rw', isa => 'Str',      default  => 'clustered_proteins' );
+has 'output_pan_geneome_filename' => ( is => 'rw', isa => 'Str',      default  => 'pan_genome.fa' );
+has 'output_statistics_filename'  => ( is => 'rw', isa => 'Str',      default  => 'gene_presence_absence.csv' );
+has 'job_runner'                  => ( is => 'rw', isa => 'Str',      default  => 'Local' );
+has 'cpus'                        => ( is => 'ro', isa => 'Int',      default  => 1 );
+has 'makeblastdb_exec'            => ( is => 'rw', isa => 'Str',      default  => 'makeblastdb' );
+has 'blastp_exec'                 => ( is => 'rw', isa => 'Str',      default  => 'blastp' );
+has 'mcxdeblast_exec'             => ( is => 'ro', isa => 'Str',      default  => 'mcxdeblast' );
+has 'mcl_exec'                    => ( is => 'ro', isa => 'Str',      default  => 'mcl' );
+has 'perc_identity'               => ( is => 'ro', isa => 'Num',      default  => 98 );
+has 'dont_delete_files'           => ( is => 'ro', isa => 'Bool',     default  => 0 );
+has 'dont_create_rplots'          => ( is => 'rw', isa => 'Bool',     default  => 0 );
+has 'dont_split_groups'           => ( is => 'ro', isa => 'Bool',     default  => 0 );
+has 'verbose_stats'               => ( is => 'rw', isa => 'Bool',     default  => 0 );
+has 'translation_table'           => ( is => 'rw', isa => 'Int',      default  => 11 );
+has 'group_limit'                 => ( is => 'rw', isa => 'Num',      default  => 50000 );
+has 'core_definition'             => ( is => 'rw', isa => 'Num',      default  => 1.0 );
+has 'verbose'                     => ( is => 'rw', isa => 'Bool',     default  => 0 );
+has 'mafft'                       => ( is => 'ro', isa => 'Bool',     default  => 0 );
+
+has 'output_multifasta_files' => ( is => 'ro', isa => 'Bool', default => 0 );
+
+sub run {
+    my ($self) = @_;
+
+    my $output_combined_filename      = '_combined_files';
+    my $output_cd_hit_filename        = '_clustered';
+    my $output_blast_results_filename = '_blast_results';
+    my $output_mcl_filename           = '_uninflated_mcl_groups';
+    my $output_filtered_clustered_fasta  = '_clustered_filtered.fa';
+    my $cdhit_groups = $output_combined_filename.'.groups';
+    
+    
+    unlink($cdhit_groups) unless($self->dont_delete_files == 1);
+
+	print "Combine proteins into a single file\n" if($self->verbose);
+    my $combine_fasta_files = Bio::Roary::CombinedProteome->new(
+        proteome_files  => $self->fasta_files,
+        output_filename => $output_combined_filename,
+    );
+    $combine_fasta_files->create_combined_proteome_file;
+
+    my $number_of_input_files = @{$self->input_files};
+
+	print "Iteratively run cd-hit\n" if($self->verbose);
+    my $iterative_cdhit= Bio::Roary::External::IterativeCdhit->new(
+      output_cd_hit_filename           => $output_cd_hit_filename,
+      output_combined_filename         => $output_combined_filename,
+      number_of_input_files            => $number_of_input_files, 
+      output_filtered_clustered_fasta  => $output_filtered_clustered_fasta,
+      job_runner                       => $self->job_runner,
+      cpus                             => $self->cpus
+    );
+    
+    $iterative_cdhit->run();
+
+	print "Parallel all against all blast\n" if($self->verbose);
+    my $blast_obj = Bio::Roary::ParallelAllAgainstAllBlast->new(
+        fasta_file              => $output_cd_hit_filename,
+        blast_results_file_name => $output_blast_results_filename,
+        job_runner              => $self->job_runner,
+        cpus                    => $self->cpus,
+        makeblastdb_exec        => $self->makeblastdb_exec,
+        blastp_exec             => $self->blastp_exec,
+        perc_identity           => $self->perc_identity
+    );
+    $blast_obj->run();
+    
+    my $blast_identity_frequency_obj = Bio::Roary::Output::BlastIdentityFrequency->new(
+        input_filename      => $output_blast_results_filename,
+      );
+    $blast_identity_frequency_obj->create_file();
+
+	print "Cluster with MCL\n" if($self->verbose);
+    my $mcl = Bio::Roary::External::Mcl->new(
+        blast_results   => $output_blast_results_filename,
+        mcxdeblast_exec => $self->mcxdeblast_exec,
+        mcl_exec        => $self->mcl_exec,
+        job_runner      => $self->job_runner,
+        cpus            => $self->cpus,
+        output_file     => $output_mcl_filename
+    );
+    $mcl->run();
+
+    unlink($output_blast_results_filename) unless($self->dont_delete_files == 1);
+    
+    my $post_analysis = Bio::Roary::External::PostAnalysis->new(
+        job_runner                  => $self->job_runner,
+        cpus                        => $self->cpus,
+        fasta_files                 => $self->fasta_files,
+        input_files                 => $self->input_files,
+        output_filename             => $self->output_filename,
+        output_pan_geneome_filename => $self->output_pan_geneome_filename,
+        output_statistics_filename  => $self->output_statistics_filename,
+        clusters_filename           => $output_cd_hit_filename.'.clstr',
+        dont_wait                   => 1,
+        output_multifasta_files     => $self->output_multifasta_files,
+        dont_delete_files           => $self->dont_delete_files,
+        dont_create_rplots          => $self->dont_create_rplots,
+        dont_split_groups           => $self->dont_split_groups,
+        verbose_stats               => $self->verbose_stats,
+        translation_table           => $self->translation_table,
+        group_limit                 => $self->group_limit,
+        core_definition             => $self->core_definition,
+		verbose                     => $self->verbose,
+		mafft                       => $self->mafft,
+    );
+    $post_analysis->run();
+
+}
+
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/AccessoryBinaryFasta.pm b/lib/Bio/Roary/AccessoryBinaryFasta.pm
new file mode 100644
index 0000000..b398878
--- /dev/null
+++ b/lib/Bio/Roary/AccessoryBinaryFasta.pm
@@ -0,0 +1,100 @@
+package Bio::Roary::AccessoryBinaryFasta;
+
+# ABSTRACT: Output a FASTA file which represents the binary presence and absence of genes in the accessory genome
+
+=head1 SYNOPSIS
+
+Output a FASTA file which represents the binary presence and absence of genes in the accessory genome
+   use Bio::Roary::AccessoryBinaryFasta;
+   my $obj = Bio::Roary::AccessoryBinaryFasta->new(input_files => ['abc','efg'],
+		groups_to_files => {'group_1' => ['abc'], group_2 => ['abc', 'efg']}
+   );
+   $obj->create_accessory_binary_fasta();
+=cut
+
+use Moose;
+use POSIX;
+use Bio::Roary::AnnotateGroups;
+use Bio::Roary::AnalyseGroups;
+use Bio::Roary::Exceptions;
+use Bio::SeqIO;
+use File::Basename;
+
+has 'input_files'            => ( is => 'ro', isa => 'ArrayRef',                   required => 1 );
+has 'annotate_groups_obj'    => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
+has 'analyse_groups_obj'     => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups',  required => 1 );
+has 'output_filename'        => ( is => 'ro', isa => 'Str',                        default  => 'accessory_binary_genes.fa' );
+has 'lower_bound_percentage' => ( is => 'ro', isa => 'Int',                        default  => 5 );
+has 'upper_bound_percentage' => ( is => 'ro', isa => 'Int',                        default  => 5 );
+has 'max_accessory_to_include' => ( is => 'ro', isa => 'Int',                      default  => 4000 );
+has 'groups_to_files'        => ( is => 'ro', isa => 'HashRef',                    lazy     => 1, builder => '_build__groups_to_files' );
+has '_lower_bound_value'     => ( is => 'ro', isa => 'Int',                        lazy     => 1, builder => '_build__lower_bound_value' );
+has '_upper_bound_value'     => ( is => 'ro', isa => 'Int',                        lazy     => 1, builder => '_build__upper_bound_value' );
+
+sub _build__groups_to_files {
+    my ($self) = @_;
+    my %groups_to_files;
+    for my $group ( @{ $self->annotate_groups_obj->_groups } ) {
+        my $genes = $self->annotate_groups_obj->_groups_to_id_names->{$group};
+        my %filenames;
+        for my $gene_name ( @{$genes} ) {
+            my $filename = $self->analyse_groups_obj->_genes_to_file->{$gene_name};
+            push( @{ $filenames{$filename} }, $gene_name );
+        }
+        $groups_to_files{$group} = \%filenames;
+    }
+
+    return \%groups_to_files;
+}
+
+sub _build__lower_bound_value {
+    my ($self) = @_;
+    my $num_files = @{ $self->input_files };
+    return ceil( $num_files * ( $self->lower_bound_percentage / 100 ) );
+}
+
+sub _build__upper_bound_value {
+    my ($self) = @_;
+    my $num_files = @{ $self->input_files };
+    return $num_files - ceil( $num_files * ( $self->upper_bound_percentage / 100 ) );
+}
+
+sub create_accessory_binary_fasta {
+    my ($self) = @_;
+    my $out_seq_io = Bio::SeqIO->new( -file => ">" . $self->output_filename, -format => 'Fasta' );
+
+    for my $full_filename ( @{ $self->input_files } ) {
+        my($filename, $dirs, $suffix) = fileparse($full_filename);
+        
+        my $output_sequence = '';
+        my $sample_name     = $filename;
+        $sample_name =~ s!\.gff\.proteome\.faa!!;
+
+		my $gene_count = 0;
+        for my $group ( sort keys %{ $self->groups_to_files } ) {
+			last if($gene_count > $self->max_accessory_to_include);
+
+            my @files = keys %{ $self->groups_to_files->{$group} };
+
+            next if ( @files <= $self->_lower_bound_value || @files > $self->_upper_bound_value );
+
+            my $group_to_file_genes = $self->groups_to_files->{$group}->{$full_filename};
+            if ( defined($group_to_file_genes) && @{$group_to_file_genes} > 0 ) {
+                $output_sequence .= 'A';
+            }
+            else {
+                $output_sequence .= 'C';
+            }
+			$gene_count++;
+			
+        }
+		next if($output_sequence eq '');
+        $out_seq_io->write_seq( Bio::Seq->new( -display_id => $sample_name, -seq => $output_sequence ) );
+    }
+    return 1;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/AccessoryClustering.pm b/lib/Bio/Roary/AccessoryClustering.pm
new file mode 100644
index 0000000..95110e2
--- /dev/null
+++ b/lib/Bio/Roary/AccessoryClustering.pm
@@ -0,0 +1,95 @@
+package Bio::Roary::AccessoryClustering;
+
+# ABSTRACT: Take an a clusters file from CD-hit and the fasta file and output a fasta file without full clusters
+
+=head1 SYNOPSIS
+
+Take an a clusters file from CD-hit and the fasta file and output a fasta file without full clusters
+   use Bio::Roary::AccessoryClustering;
+   
+   my $obj = Bio::Roary::AccessoryClustering->new(
+       input_file        => 'accessory_binary_genes.fa',
+       identity           => 0.96,
+       cpus => 10,
+     );
+   $obj->sample_weights();
+
+=cut
+
+use Moose;
+use Bio::Roary::External::Cdhit;
+with 'Bio::Roary::ClustersRole';
+
+has 'input_file'              => ( is => 'ro', isa => 'Str',     required => 1 );
+has 'identity'                => ( is => 'ro', isa => 'Num',     default  => 0.9 );
+has 'cpus'                    => ( is => 'ro', isa => 'Int',      default  => 1 );
+has '_output_cd_hit_filename' => ( is => 'ro', isa => 'Str',     default  => '_accessory_clusters' );
+has 'clusters_to_samples'     => ( is => 'ro', isa => 'HashRef', lazy     => 1, builder => '_build_clusters_to_samples' );
+has 'samples_to_clusters'     => ( is => 'ro', isa => 'HashRef', lazy     => 1, builder => '_build_samples_to_clusters' );
+has 'sample_weights'          => ( is => 'ro', isa => 'HashRef', lazy     => 1, builder => '_build_sample_weights' );
+has 'clusters_filename'       => ( is => 'ro', isa => 'Str',     lazy     => 1, builder => '_build_clusters_filename' );
+has 'clusters'                => ( is => 'ro', isa => 'HashRef', lazy     => 1, builder => '_build__clusters' );
+
+sub _build_sample_weights {
+    my ($self) = @_;
+    my %sample_weights;
+    for my $cluster_name ( keys %{ $self->clusters_to_samples } ) {
+        my $cluster_size = @{ $self->clusters_to_samples->{$cluster_name} };
+        for my $sample_name ( @{ $self->clusters_to_samples->{$cluster_name} } ) {
+            $sample_weights{$sample_name} = 1 / $cluster_size;
+        }
+    }
+    return \%sample_weights;
+}
+
+sub _build_samples_to_clusters {
+    my ($self) = @_;
+    my %samples_to_clusters;
+    for my $cluster_name ( keys %{ $self->clusters_to_samples } ) {
+        for my $sample_name ( @{ $self->clusters_to_samples->{$cluster_name} } ) {
+            $samples_to_clusters{$sample_name} = $cluster_name;
+        }
+    }
+    return \%samples_to_clusters;
+}
+
+sub _build_clusters_filename {
+    my ($self) = @_;
+    return $self->_output_cd_hit_filename . '.clstr';
+}
+
+sub _build_clusters_to_samples {
+    my ($self) = @_;
+
+    my $cdhit_obj = Bio::Roary::External::Cdhit->new(
+        input_file                   => $self->input_file,
+        output_base                  => $self->_output_cd_hit_filename,
+        _length_difference_cutoff    => 1,
+        _sequence_identity_threshold => $self->identity,
+		cpus                         => $self->cpus
+    );
+    $cdhit_obj->run();
+    my $clusterd_genes = $self->_clustered_genes;
+
+    for my $cluster_name ( keys %{$clusterd_genes} ) {
+        my $found = 0;
+        for my $gene_name ( @{ $clusterd_genes->{$cluster_name} } ) {
+            if ( $gene_name eq $cluster_name ) {
+                $found = 1;
+                last;
+            }
+        }
+
+        if ( $found == 0 ) {
+            push( @{ $clusterd_genes->{$cluster_name} }, $cluster_name );
+        }
+    }
+
+    return $clusterd_genes;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
+
diff --git a/lib/Bio/Roary/AnalyseGroups.pm b/lib/Bio/Roary/AnalyseGroups.pm
new file mode 100644
index 0000000..2e72536
--- /dev/null
+++ b/lib/Bio/Roary/AnalyseGroups.pm
@@ -0,0 +1,119 @@
+package Bio::Roary::AnalyseGroups;
+
+# ABSTRACT: Take in a groups file and the original FASTA files and create plots and stats
+
+=head1 SYNOPSIS
+
+Take in a groups file and the original FASTA files and create plots and stats 
+   use Bio::Roary::AnalyseGroups;
+   
+   my $plot_groups_obj = Bio::Roary::AnalyseGroups->new(
+       fasta_files      => $fasta_files,
+       groups_filename  => $groups_filename,
+       output_filename  => $output_filename
+     );
+   $plot_groups_obj->create_plots();
+
+=cut
+
+use Moose;
+use Bio::Roary::Exceptions;
+
+has 'fasta_files'          => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'groups_filename'      => ( is => 'ro', isa => 'Str',      required => 1 );
+has 'output_filename'      => ( is => 'ro', isa => 'Str',      default  => 'summary_of_groups' );
+
+has '_number_of_isolates'  => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_builder__number_of_isolates' );
+has '_genes_to_file'       => ( is => 'rw', isa => 'HashRef' );
+has '_files_to_genes'      => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_builder__files_to_genes' );
+has '_groups_to_genes'     => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_builder__groups_to_genes' );
+has '_genes_to_groups'     => ( is => 'rw', isa => 'HashRef' );
+
+has '_groups' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_builder__groups' );
+
+
+sub BUILD {
+    my ($self) = @_;
+    # This triggers _genes_to_groups to be built
+    $self->_groups_to_genes;
+    # This triggers _genes_to_file to be built
+    $self->_files_to_genes;
+}
+
+sub _builder__groups
+{
+  my ($self) = @_;
+  my @groups = sort keys %{$self->_groups_to_genes};
+  return \@groups;
+}
+
+sub _builder__number_of_isolates {
+    my ($self) = @_;
+    return @{ $self->fasta_files };
+}
+
+sub _builder__files_to_genes {
+    my ($self) = @_;
+    my %files_to_genes;
+    my %genes_to_file;
+    for my $filename ( @{ $self->fasta_files } ) {
+        open( my $fh, '-|', 'grep \> ' . $filename . ' | awk \'{print $1}\' | sed \'s/>//\' ' );
+        while (<$fh>) {
+            chomp;
+            my $gene_name = $_;
+            next if($gene_name eq "");
+            push( @{ $files_to_genes{$filename} }, $gene_name );
+            $genes_to_file{$gene_name} = $filename;
+        }
+        close($fh);
+    }
+    $self->_genes_to_file(\%genes_to_file);
+    
+    return \%files_to_genes;
+}
+
+sub _count_num_files_in_group {
+    my ( $self, $genes ) = @_;
+    my $count = 0;
+    my %filename_freq;
+    for my $gene ( @{$genes} ) {
+        next if ( $gene eq "" );
+        if ( defined( $self->_genes_to_file->{$gene} ) ) {
+            $filename_freq{ $self->_genes_to_file->{$gene} }++;
+        }
+    }
+    my @uniq_filenames = keys %filename_freq;
+    return @uniq_filenames;
+}
+
+sub _builder__groups_to_genes {
+    my ($self) = @_;
+    my %groups_to_genes;
+    my %genes_to_groups;
+
+    open( my $fh, $self->groups_filename )
+      or Bio::Roary::Exceptions::FileNotFound->throw( error => "Group file not found:" . $self->groups_filename );
+    while (<$fh>) {
+        chomp;
+        my $line = $_;
+        if ( $line =~ /^(.+): (.+)$/ ) {
+            my $group_name = $1;
+            my $genes      = $2;
+            my @elements   = split( /[\s\t]+/, $genes );
+            $groups_to_genes{$group_name} = \@elements;
+            
+            for my $gene (@elements) {
+                $genes_to_groups{$gene} = $group_name;
+            }
+        }
+    }
+    $self->_genes_to_groups(\%genes_to_groups);
+    
+    return \%groups_to_genes;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
+
diff --git a/lib/Bio/Roary/AnnotateGroups.pm b/lib/Bio/Roary/AnnotateGroups.pm
new file mode 100644
index 0000000..2c02b1a
--- /dev/null
+++ b/lib/Bio/Roary/AnnotateGroups.pm
@@ -0,0 +1,356 @@
+package Bio::Roary::AnnotateGroups;
+
+# ABSTRACT: Take in a group file and assosiated GFF files for the isolates and update the group name to the gene name
+
+=head1 SYNOPSIS
+
+Take in a group file and assosiated GFF files for the isolates and update the group name to the gene name
+   use Bio::Roary::AnnotateGroups;
+   
+   my $obj = Bio::Roary::AnnotateGroups->new(
+     gff_files   => ['abc.gff','efg.gff'],
+     output_filename   => 'example_output.fa',
+     groups_filename => 'groupsfile',
+   );
+   $obj->reannotate;
+
+=cut
+
+use Moose;
+use Bio::Roary::Exceptions;
+use Bio::Roary::GeneNamesFromGFF;
+use Array::Utils qw(array_minus);
+use List::Util qw(max min sum);
+use File::Grep qw(fgrep);
+
+has 'gff_files'          => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'output_filename'    => ( is => 'ro', isa => 'Str',      default  => 'reannotated_groups_file' );
+has 'groups_filename'    => ( is => 'ro', isa => 'Str',      required => 1 );
+has '_ids_to_gene_names' => ( is => 'ro', isa => 'HashRef',  lazy     => 1, builder => '_build__ids_to_gene_names' );
+has '_ids_to_product'    => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
+has '_ids_to_gene_size'  => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
+has 'group_nucleotide_lengths'  => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_group_nucleotide_lengths');
+
+has '_groups_to_id_names'   => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_builder__groups_to_id_names' );
+has '_output_fh'            => ( is => 'ro', lazy => 1, builder => '_build__output_fh' );
+has '_groups_to_consensus_gene_names' =>
+  ( is => 'rw', isa => 'HashRef', lazy => 1, builder => '_build__groups_to_consensus_gene_names' );
+has '_filtered_gff_files'   => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__filtered_gff_files' );
+has '_number_of_files'      => ( is => 'ro', isa => 'Int',      lazy => 1, builder => '_build__number_of_files' );
+has '_ids_to_groups'        => ( is => 'rw', isa => 'HashRef',  lazy => 1, builder => '_builder__ids_to_groups' );
+has '_group_counter'        => ( is => 'rw', isa => 'Int', lazy => 1, builder => '_builder__group_counter' );
+has '_group_default_prefix' => ( is => 'rw', isa => 'Str', default => 'group_' );
+has '_ids_to_verbose_stats' => ( is => 'rw', isa => 'HashRef', lazy_build => 1 );
+
+sub BUILD {
+    my ($self) = @_;
+    $self->_ids_to_gene_names;
+}
+
+sub _builder__group_counter {
+    my ($self)        = @_;
+    my $prefix        = $self->_group_default_prefix;
+    my $highest_group = 0;
+    for my $group ( @{ $self->_groups } ) {
+        if ( $group =~ /$prefix([\d]+)$/ ) {
+            my $group_id = $1;
+            if ( $group_id > $highest_group ) {
+                $highest_group = $group_id;
+            }
+        }
+    }
+    return $highest_group + 1;
+}
+
+sub _generate__ids_to_groups {
+    my ($self) = @_;
+    my %ids_to_groups;
+
+    for my $group ( keys %{ $self->_groups_to_id_names } ) {
+        for my $id_name ( @{ $self->_groups_to_id_names->{$group} } ) {
+            $ids_to_groups{$id_name} = $group;
+        }
+    }
+    return \%ids_to_groups;
+}
+
+sub _builder__ids_to_groups {
+    my ($self) = @_;
+    return $self->_generate__ids_to_groups;
+}
+
+sub _build__output_fh {
+    my ($self) = @_;
+    open( my $fh, '>', $self->output_filename )
+      or Bio::Roary::Exceptions::CouldntWriteToFile->throw(
+        error => "Couldnt write output file:" . $self->output_filename );
+    return $fh;
+}
+
+sub _build__filtered_gff_files {
+    my ($self) = @_;
+    my @gff_files = grep( /\.gff$/, @{ $self->gff_files } );
+    return \@gff_files;
+}
+
+sub _build__ids_to_gene_names {
+    my ($self) = @_;
+    my %ids_to_gene_names;
+    my %ids_to_product;
+	my %ids_to_gene_size;
+    for my $filename ( @{ $self->_filtered_gff_files } ) {
+        my $gene_names_from_gff = Bio::Roary::GeneNamesFromGFF->new( gff_file => $filename );
+        my %id_to_gene_lookup = %{ $gene_names_from_gff->ids_to_gene_name };
+        @ids_to_gene_names{ keys %id_to_gene_lookup } = values %id_to_gene_lookup;
+
+        my %id_to_product_lookup = %{ $gene_names_from_gff->ids_to_product };
+        @ids_to_product{ keys %id_to_product_lookup } = values %id_to_product_lookup;
+		
+		my %ids_to_gene_size_lookup = %{ $gene_names_from_gff->ids_to_gene_size };
+        @ids_to_gene_size{ keys %ids_to_gene_size_lookup } = values %ids_to_gene_size_lookup;
+    }
+    $self->_ids_to_product( \%ids_to_product );
+	$self->_ids_to_gene_size( \%ids_to_gene_size );
+
+    return \%ids_to_gene_names;
+}
+
+sub _build__ids_to_verbose_stats {
+        my $self = shift;
+
+        my @matches_hash = fgrep { /ID=/i } @{ $self->_filtered_gff_files };
+        my @matches;
+        foreach my $m ( @matches_hash ){
+            push( @matches, values %{$m->{matches}} );
+        }
+        # chomp @matches;
+        
+        my %verbose;
+        foreach my $line ( @matches ){
+            my ( $id, $inf, $prod );
+            if( $line =~ m/ID=["']?([^;"']+)["']?;?/i ){
+                $id = $1;
+            }
+            else {
+                next;
+            }
+
+            $inf = $1 if ( $line =~ m/inference=([^;]+);/ );
+            $prod = $1 if ( $line =~ m/product=([^;]+)[;\n]/ );
+
+            my %info = ( 'inference' => $inf, 'product' => $prod );
+            $verbose{$id} = \%info;
+        }
+        return \%verbose;
+}
+
+
+sub consensus_product_for_id_names {
+    my ( $self, $id_names ) = @_;
+    my %product_freq;
+    for my $id_name ( @{$id_names} ) {
+        next unless ( defined( $self->_ids_to_product->{$id_name} ) );
+        $product_freq{ $self->_ids_to_product->{$id_name} }++;
+    }
+
+    my @sorted_product_keys = sort { $product_freq{$b} <=> $product_freq{$a} } keys(%product_freq);
+
+    if ( @sorted_product_keys > 0 ) {
+        return $sorted_product_keys[0];
+    }
+    else {
+        return '';
+    }
+}
+
+sub _builder__groups_to_id_names {
+    my ($self) = @_;
+    my %groups_to_id_names;
+
+    open( my $fh, $self->groups_filename )
+      or Bio::Roary::Exceptions::FileNotFound->throw( error => "Group file not found:" . $self->groups_filename );
+    while (<$fh>) {
+        chomp;
+        my $line = $_;
+        if ( $line =~ /^(.+): (.+)$/ ) {
+            my $group_name = $1;
+            my $genes      = $2;
+            my @elements   = split( /[\s\t]+/, $genes );
+            $groups_to_id_names{$group_name} = \@elements;
+        }
+    }
+    
+    return \%groups_to_id_names;
+}
+
+sub _groups {
+    my ($self) = @_;
+    my @groups = keys %{ $self->_groups_to_id_names };
+    return \@groups;
+}
+
+sub _ids_grouped_by_gene_name_for_group {
+    my ( $self, $group_name ) = @_;
+    my %gene_name_freq;
+    for my $id_name ( @{ $self->_groups_to_id_names->{$group_name} } ) {
+        if ( defined( $self->_ids_to_gene_names->{$id_name} ) && $self->_ids_to_gene_names->{$id_name} ne "" ) {
+            push( @{ $gene_name_freq{ $self->_ids_to_gene_names->{$id_name} } }, $id_name );
+        }
+    }
+    return \%gene_name_freq;
+}
+
+sub _consensus_gene_name_for_group {
+    my ( $self, $group_name ) = @_;
+    my $gene_name_freq = $self->_ids_grouped_by_gene_name_for_group($group_name);
+
+    my @sorted_gene_names = sort { @{ $gene_name_freq->{$b} } <=> @{ $gene_name_freq->{$a} } } keys %{$gene_name_freq};
+    if ( @sorted_gene_names > 0 ) {
+        return shift(@sorted_gene_names);
+    }
+    else {
+        return $group_name;
+    }
+}
+
+sub _build_group_nucleotide_lengths
+{
+	my ($self) = @_;
+	my %group_nucleotide_lengths;
+    for my $group_name (keys %{ $self->_groups_to_id_names } )
+    {
+		my @gene_lengths;
+		for my $gene_id (@{$self->_groups_to_id_names->{$group_name}})
+		{
+			my $current_gene_size = $self->_ids_to_gene_size->{$gene_id};
+			next unless(defined($current_gene_size) );
+			next if($current_gene_size < 1);
+			push(@gene_lengths, $current_gene_size);
+		}
+		
+		next if(@gene_lengths == 0);
+		my $average_gene_size = (int((sum @gene_lengths)/@gene_lengths)) || 0;
+		my $min_gene_size = (min @gene_lengths) || 0;
+		my $max_gene_size = (max @gene_lengths) || 0;
+		$group_nucleotide_lengths{$group_name} = {'min' => $min_gene_size, 'max' =>$max_gene_size , 'average' => $average_gene_size};
+    }
+	return \%group_nucleotide_lengths;
+}
+
+sub _generate_groups_to_consensus_gene_names {
+    my ($self) = @_;
+    my %groups_to_gene_names;
+    my %gene_name_freq;
+    my $group_prefix = $self->_group_default_prefix;
+
+    # These are already annotated
+    for my $group_name ( sort { @{ $self->_groups_to_id_names->{$b} } <=> @{ $self->_groups_to_id_names->{$a} } }
+        keys %{ $self->_groups_to_id_names } )
+    {
+        next if ( $group_name =~ /$group_prefix/ );
+        $groups_to_gene_names{$group_name} = $group_name;
+    }
+
+    for my $group_name ( sort { @{ $self->_groups_to_id_names->{$b} } <=> @{ $self->_groups_to_id_names->{$a} } }
+        keys %{ $self->_groups_to_id_names } )
+    {
+        next unless ( $group_name =~ /$group_prefix/ );
+        my $consensus_gene_name = $self->_consensus_gene_name_for_group($group_name);
+
+        if ( defined( $gene_name_freq{$consensus_gene_name} ) ) {
+            $groups_to_gene_names{$group_name} = $group_name;
+        }
+        else {
+            $groups_to_gene_names{$group_name} = $consensus_gene_name;
+            $gene_name_freq{$consensus_gene_name}++;
+        }
+    }
+    return \%groups_to_gene_names;
+}
+
+sub _build__groups_to_consensus_gene_names {
+    my ($self) = @_;
+    return $self->_generate_groups_to_consensus_gene_names;
+}
+
+sub _build__number_of_files {
+    my ($self) = @_;
+    return @{ $self->gff_files };
+}
+
+
+sub _split_groups {
+    my ($self) = @_;
+     
+    $self->_groups_to_consensus_gene_names( $self->_generate_groups_to_consensus_gene_names );
+    $self->_ids_to_groups( $self->_generate__ids_to_groups );
+}
+
+sub _remove_ids_from_group {
+    my ( $self, $ids_to_remove, $group ) = @_;
+
+    my @remaining_ids = array_minus( @{ $self->_groups_to_id_names->{$group} }, @{ $ids_to_remove } );
+    $self->_groups_to_id_names->{$group} = \@remaining_ids;
+    if ( @{ $self->_groups_to_id_names->{$group} } == 0 ) {
+        delete( $self->_groups_to_id_names->{$group} );
+    }
+}
+
+sub reannotate {
+    my ($self) = @_;
+
+    $self->_split_groups;
+
+    my %groups_to_id_names = %{ $self->_groups_to_id_names };
+    for
+      my $group_name ( sort { @{ $groups_to_id_names{$b} } <=> @{ $groups_to_id_names{$a} } } keys %groups_to_id_names )
+    {
+        my $consensus_gene_name = $self->_groups_to_consensus_gene_names->{$group_name};
+        print { $self->_output_fh } $consensus_gene_name . ": "
+          . join( "\t", @{ $self->_groups_to_id_names->{$group_name} } ) . "\n";
+    }
+    close( $self->_output_fh );
+    return $self;
+}
+
+sub full_annotation {
+    my ( $self, $group ) = @_;
+
+    my @id_names = @{ $self->_groups_to_id_names->{$group} };
+
+    my %product_freq;
+    for my $id_name ( @id_names ) {
+        next unless ( defined( $self->_ids_to_verbose_stats->{$id_name}->{'product'} ) );
+        $product_freq{ $self->_ids_to_verbose_stats->{$id_name}->{'product'} }++;
+    }
+
+    my @sorted_product_keys = sort { $product_freq{$b} <=> $product_freq{$a} } keys(%product_freq);
+
+    if ( @sorted_product_keys > 0 ) {
+        return join('; ', @sorted_product_keys);
+    }
+    else {
+        return '';
+    }
+    
+}
+
+sub inference {
+    my ( $self, $group ) = @_;
+
+    my @infs;
+    foreach my $g ( @{ $self->_groups_to_id_names->{$group} } ){
+        next unless ( defined  $self->_ids_to_verbose_stats->{$g}->{'inference'} );
+        push( @infs, $self->_ids_to_verbose_stats->{$g}->{'inference'} );
+    }
+
+    # maybe make a consensus in the future?
+
+    return $infs[0];
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
+ 
\ No newline at end of file
diff --git a/lib/Bio/Roary/AssemblyStatistics.pm b/lib/Bio/Roary/AssemblyStatistics.pm
new file mode 100644
index 0000000..cc0b32a
--- /dev/null
+++ b/lib/Bio/Roary/AssemblyStatistics.pm
@@ -0,0 +1,207 @@
+package Bio::Roary::AssemblyStatistics;
+
+# ABSTRACT: Given a spreadsheet of gene presence and absense calculate some statistics
+
+=head1 SYNOPSIS
+
+Given a spreadsheet of gene presence and absense calculate some statistics
+
+=cut
+
+use Moose;
+use Bio::Roary::ExtractCoreGenesFromSpreadsheet;
+use Log::Log4perl qw(:easy);
+with 'Bio::Roary::SpreadsheetRole';
+
+has 'output_filename'       => ( is => 'ro', isa => 'Str',      default => 'assembly_statistics.csv' );
+has 'job_runner'            => ( is => 'ro', isa => 'Str',      default => 'Local' );
+has 'cpus'                  => ( is => 'ro', isa => 'Int',      default => 1 );
+has 'core_definition'       => ( is => 'rw', isa => 'Num',      default => 0.99 );
+has '_cloud_percentage'     => ( is => 'rw', isa => 'Num',      default => 0.15 );
+has '_shell_percentage'     => ( is => 'rw', isa => 'Num',      default => 0.95 );
+has '_soft_core_percentage' => ( is => 'rw', isa => 'Num',      default => 0.99 );
+has 'verbose'               => ( is => 'ro', isa => 'Bool',     default => 0 );
+has 'contiguous_window'     => ( is => 'ro', isa => 'Int',      default => 10 );
+has 'ordered_genes'         => ( is => 'ro', isa => 'ArrayRef', lazy    => 1, builder => '_build_ordered_genes' );
+has '_genes_to_rows'        => ( is => 'ro', isa => 'HashRef',  lazy    => 1, builder => '_build__genes_to_rows' );
+has 'all_sample_statistics' => ( is => 'ro', isa => 'HashRef',  lazy    => 1, builder => '_build_all_sample_statistics' );
+has 'sample_names_to_column_index' => ( is => 'rw', isa => 'Maybe[HashRef]' );
+has 'summary_output_filename'=> ( is => 'ro', isa => 'Str',      default => 'summary_statistics.txt' );
+has 'logger'                 => ( is => 'ro', lazy => 1, builder => '_build_logger');
+has 'gene_category_count'   => ( is => 'ro', isa => 'HashRef',  lazy    => 1, builder => '_build_gene_category_count' );
+
+sub BUILD {
+    my ($self) = @_;
+    $self->_genes_to_rows;
+	$self->gene_category_count;
+}
+
+sub _build_logger
+{
+    my ($self) = @_;
+    Log::Log4perl->easy_init(level => $ERROR);
+    my $logger = get_logger();
+    return $logger;
+}
+
+sub create_summary_output
+{
+	my ($self) = @_;
+	open(my $fh, '>', $self->summary_output_filename) or Bio::Roary::Exceptions::CouldntWriteToFile->throw(error => "Couldnt write to ".$self->summary_output_filename);
+
+    my $core_percentage      = $self->core_definition()*100;
+	my $soft_core_percentage = $self->_soft_core_percentage*100;
+	my $shell_percentage     = $self->_shell_percentage()*100;
+	my $cloud_percentage     = $self->_cloud_percentage()*100;
+	
+	my $core_genes      = ($self->gene_category_count->{core} ? $self->gene_category_count->{core} : 0);
+	my $soft_core_genes = ($self->gene_category_count->{soft_core} ? $self->gene_category_count->{soft_core} : 0);
+	my $shell_genes     =($self->gene_category_count->{shell} ? $self->gene_category_count->{shell} : 0);
+	my $cloud_genes     = ($self->gene_category_count->{cloud} ? $self->gene_category_count->{cloud} : 0);
+	my $total_genes = $core_genes  + $soft_core_genes  + $shell_genes + $cloud_genes  ;
+	
+	$self->logger->warn("Very few core genes detected with the current settings. Try modifying the core definition ( -cd 90 ) and/or 
+	the blast identity (-i 70) parameters.  Also try checking for contamination (-qc) and ensure you only have one species.") if($core_genes < 100);
+	
+	print {$fh} "Core genes\t($core_percentage".'% <= strains <= 100%)'."\t$core_genes\n";
+	print {$fh} "Soft core genes\t(".$shell_percentage."% <= strains < ".$soft_core_percentage."%)\t$soft_core_genes\n";
+	print {$fh} "Shell genes\t(".$cloud_percentage."% <= strains < ".$shell_percentage."%)\t$shell_genes\n";
+	print {$fh} "Cloud genes\t(0% <= strains < ".$cloud_percentage."%)\t$cloud_genes\n";
+	print {$fh} "Total genes\t(0% <= strains <= 100%)\t$total_genes\n";
+	
+	close($fh);
+	return 1;
+}
+
+sub _build_gene_category_count {
+    my ($self) = @_;
+    my %gene_category_count;
+	$self->_soft_core_percentage($self->core_definition);
+	
+    if ( $self->_soft_core_percentage <= $self->_shell_percentage ) {
+        $self->_shell_percentage( $self->_soft_core_percentage - 0.01 );
+    }
+
+    my $number_of_samples = keys %{ $self->sample_names_to_column_index };
+    for my $gene_name ( keys %{ $self->_genes_to_rows } ) {
+        my $isolates_with_gene = 0;
+
+        for ( my $i = $self->_num_fixed_headers ; $i < @{ $self->_genes_to_rows->{$gene_name} } ; $i++ ) {
+            $isolates_with_gene++
+              if ( defined( $self->_genes_to_rows->{$gene_name}->[$i] ) && $self->_genes_to_rows->{$gene_name}->[$i] ne "" );
+        }
+
+        if ( $isolates_with_gene < $self->_cloud_percentage() * $number_of_samples ) {
+            $gene_category_count{cloud}++;
+        }
+        elsif ( $isolates_with_gene < $self->_shell_percentage() * $number_of_samples ) {
+            $gene_category_count{shell}++;
+        }
+        elsif ( $isolates_with_gene < $self->_soft_core_percentage() * $number_of_samples ) {
+            $gene_category_count{soft_core}++;
+        }
+        else {
+            $gene_category_count{core}++;
+        }
+    }
+    return \%gene_category_count;
+}
+
+sub _build_ordered_genes {
+    my ($self) = @_;
+    return Bio::Roary::ExtractCoreGenesFromSpreadsheet->new( spreadsheet => $self->spreadsheet, core_definition => $self->core_definition )
+      ->ordered_core_genes();
+}
+
+sub _build__genes_to_rows {
+    my ($self) = @_;
+
+    my %genes_to_rows;
+    seek( $self->_input_spreadsheet_fh, 0, 0 );
+    my $header_row = $self->_csv_parser->getline( $self->_input_spreadsheet_fh );
+    $self->_populate_sample_names_to_column_index($header_row);
+
+    while ( my $row = $self->_csv_parser->getline( $self->_input_spreadsheet_fh ) ) {
+        next if ( !defined( $row->[0] ) || $row->[0] eq "" );
+        $genes_to_rows{ $row->[0] } = $row;
+    }
+
+    return \%genes_to_rows;
+}
+
+sub _populate_sample_names_to_column_index {
+    my ( $self, $row ) = @_;
+
+    my %samples_to_index;
+    for ( my $i = $self->_num_fixed_headers ; $i < @{$row} ; $i++ ) {
+        next if ( ( !defined( $row->[$i] ) ) || $row->[$i] eq "" );
+        $samples_to_index{ $row->[$i] } = $i;
+    }
+    $self->sample_names_to_column_index( \%samples_to_index );
+}
+
+sub _build_all_sample_statistics {
+    my ($self) = @_;
+
+    my %sample_stats;
+
+    # For each sample - loop over genes in order - number of contiguous blocks - max size of contiguous block - n50 - incorrect joins
+    for my $sample_name ( sort keys %{ $self->sample_names_to_column_index } ) {
+        $sample_stats{$sample_name} = $self->_sample_statistics($sample_name);
+    }
+    return \%sample_stats;
+}
+
+sub _sample_statistics {
+    my ( $self, $sample_name ) = @_;
+
+    my $sample_column_index = $self->sample_names_to_column_index->{$sample_name};
+    my @gene_ids;
+    for my $gene_name ( @{ $self->ordered_genes } ) {
+        my $sample_gene_id = $self->_genes_to_rows->{$gene_name}->[$sample_column_index];
+        next unless ( defined($sample_gene_id) );
+
+        if ( $sample_gene_id =~ /_([\d]+)$/ ) {
+            my $gene_number = $1;
+            push( @gene_ids, $gene_number );
+        }
+        else {
+            next;
+        }
+    }
+
+    return $self->_number_of_contiguous_blocks( \@gene_ids );
+}
+
+sub _number_of_contiguous_blocks {
+    my ( $self, $gene_ids ) = @_;
+
+    my $current_gene_id    = $gene_ids->[0];
+    my $number_of_blocks   = 1;
+    my $largest_block_size = 0;
+    my $block_size         = 0;
+    for my $gene_id ( @{$gene_ids} ) {
+        if ( !( ( $current_gene_id + $self->contiguous_window >= $gene_id ) && ( $current_gene_id - $self->contiguous_window <= $gene_id ) )
+          )
+        {
+            if ( $block_size >= $largest_block_size ) {
+                $largest_block_size = $block_size;
+                $block_size         = 0;
+            }
+            $number_of_blocks++;
+        }
+        $current_gene_id = $gene_id;
+        $block_size++;
+    }
+
+    if ( $block_size > $largest_block_size ) {
+        $largest_block_size = $block_size;
+    }
+    return { num_blocks => $number_of_blocks, largest_block_size => $largest_block_size };
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
+
diff --git a/lib/Bio/Roary/BedFromGFFRole.pm b/lib/Bio/Roary/BedFromGFFRole.pm
new file mode 100644
index 0000000..8dc926e
--- /dev/null
+++ b/lib/Bio/Roary/BedFromGFFRole.pm
@@ -0,0 +1,71 @@
+package Bio::Roary::BedFromGFFRole;
+
+# ABSTRACT: A role to create a bed file from a gff
+
+=head1 SYNOPSIS
+
+ A role to create a bed file from a gff
+   with 'Bio::Roary::BedFromGFFRole';
+
+=cut
+
+use Moose::Role;
+use Bio::Tools::GFF;
+
+has '_tags_to_filter'   => ( is => 'ro', isa => 'Str', default => '(CDS|ncRNA|tRNA|tmRNA|rRNA)' );
+has 'min_gene_size_in_nucleotides'   => ( is => 'ro', isa => 'Int',  default  => 120 );
+has 'output_directory'               => ( is => 'ro', isa => 'Str', default => '.' );
+
+sub _bed_output_filename {
+    my ($self) = @_;
+    return join('/',($self->output_directory,join( '.', ( $self->output_filename, 'intermediate.bed' ) )));
+}
+
+sub _create_bed_file_from_gff {
+    my ($self) = @_;
+
+    open( my $bed_fh, '>', $self->_bed_output_filename );
+    my $gffio = Bio::Tools::GFF->new( -file => $self->gff_file, -gff_version => 3 );
+    while ( my $feature = $gffio->next_feature() ) {
+
+        next unless defined($feature);
+
+        # Only interested in a few tags
+        my $tags_regex = $self->_tags_to_filter;
+        next if !( $feature->primary_tag =~ /$tags_regex/ );
+
+        # Must have an ID tag
+        my $gene_id = $self->_get_feature_id($feature);
+        next unless($gene_id);
+
+        #filter out small genes
+        next if ( ( $feature->end - $feature->start ) < $self->min_gene_size_in_nucleotides );
+
+        my $strand = ($feature->strand > 0)? '+':'-' ;
+        print {$bed_fh} join( "\t", ( $feature->seq_id, $feature->start -1, $feature->end, $gene_id, 1, $strand ) ) . "\n";
+    }
+    $gffio->close();
+}
+
+sub _get_feature_id
+{
+    my ($self, $feature) = @_;
+    my ( $gene_id, @junk ) ;
+    if ( $feature->has_tag('ID') )
+    {
+         ( $gene_id, @junk ) = $feature->get_tag_values('ID');
+    }
+    elsif($feature->has_tag('locus_tag'))
+    {
+        ( $gene_id, @junk ) = $feature->get_tag_values('locus_tag');
+    }
+    else
+    {
+        return undef;
+    }
+    $gene_id =~ s!["']!!g;
+    return undef if ( $gene_id eq "" );
+    return $gene_id ;
+}
+
+1;
diff --git a/lib/Bio/Roary/ChunkFastaFile.pm b/lib/Bio/Roary/ChunkFastaFile.pm
new file mode 100644
index 0000000..0fcfabb
--- /dev/null
+++ b/lib/Bio/Roary/ChunkFastaFile.pm
@@ -0,0 +1,77 @@
+package Bio::Roary::ChunkFastaFile;
+
+# ABSTRACT: Take in a FASTA file and chunk it up into smaller pieces.
+
+=head1 SYNOPSIS
+
+Take in a FASTA file and chunk it up into smaller pieces.
+   use Bio::Roary::ChunkFastaFile;
+   
+   my $obj = Bio::Roary::ChunkFastaFile->new(
+     fasta_file   => 'abc.fa',
+   );
+   $obj->sequence_file_names;
+
+=cut
+
+use Moose;
+use Bio::SeqIO;
+use Bio::Roary::Exceptions;
+use Cwd;
+use File::Temp;
+
+has 'fasta_file'          => ( is => 'ro', isa => 'Str',      required => 1 );
+has 'target_chunk_size'   => ( is => 'ro', isa => 'Int',      default  => 200000 );
+has 'sequence_file_names' => ( is => 'ro', isa => 'ArrayRef', lazy     => 1, builder => '_build_sequence_file_names' );
+has '_working_directory' =>
+  ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
+has '_working_directory_name' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__working_directory_name' );
+has '_input_seqio' => ( is => 'ro', isa => 'Bio::SeqIO', lazy => 1, builder => '_build__input_seqio' );
+
+sub _build__working_directory_name {
+    my ($self) = @_;
+    return $self->_working_directory->dirname();
+}
+
+sub _build__input_seqio {
+    my ($self) = @_;
+    return Bio::SeqIO->new( -file => $self->fasta_file, -format => 'Fasta' );
+}
+
+sub _create_next_chunk_file_name {
+    my ( $self, $chunk_number ) = @_;
+    return join( '/', ( $self->_working_directory_name, $chunk_number . '.seq' ) );
+}
+
+sub _create_next_chunk_seqio {
+    my ( $self, $chunk_number ) = @_;
+    return Bio::SeqIO->new( -file => ">".$self->_create_next_chunk_file_name($chunk_number), -format => 'Fasta' );
+}
+
+sub _build_sequence_file_names {
+    my ($self) = @_;
+    my @sequence_file_names;
+    my $chunk_number         = 0;
+    my $current_chunk_length = 0;
+    my $current_chunk_seqio  = $self->_create_next_chunk_seqio($chunk_number);
+    push( @sequence_file_names, $self->_create_next_chunk_file_name($chunk_number) );
+
+    while ( my $input_seq = $self->_input_seqio->next_seq() ) {
+        if ( $current_chunk_length > $self->target_chunk_size ) {
+
+            # next chunk
+            $chunk_number++;
+            $current_chunk_length = 0;
+            $current_chunk_seqio  = $self->_create_next_chunk_seqio($chunk_number);
+            push( @sequence_file_names, $self->_create_next_chunk_file_name($chunk_number) );
+        }
+        $current_chunk_seqio->write_seq($input_seq);
+        $current_chunk_length += $input_seq->length();
+    }
+    return \@sequence_file_names;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/ClustersRole.pm b/lib/Bio/Roary/ClustersRole.pm
new file mode 100644
index 0000000..9cb295b
--- /dev/null
+++ b/lib/Bio/Roary/ClustersRole.pm
@@ -0,0 +1,70 @@
+package Bio::Roary::ClustersRole;
+# ABSTRACT: A role to read a clusters file from CD hit 
+
+=head1 SYNOPSIS
+
+A role to read a clusters file from CD hit 
+   with 'Bio::Roary::ClustersRole';
+
+=cut
+
+use Moose::Role;
+use Bio::Roary::Exceptions;
+
+has 'clusters_filename' => ( is => 'ro', isa => 'Str', required => 1 );
+has '_clustered_genes'  => ( is => 'ro',lazy => 1, builder => '_build__clustered_genes' );
+has '_clusters_fh'      => ( is => 'ro',lazy => 1, builder => '_build__clusters_fh' );
+
+sub _build__clusters_fh
+{
+  my($self) = @_;
+  open(my $fh, $self->clusters_filename) or Bio::Roary::Exceptions::FileNotFound->throw( error => 'Cant open file: ' . $self->clusters_filename );
+  return $fh;
+}
+
+sub _build__clustered_genes
+{
+  my($self) = @_;
+  my $fh = $self->_clusters_fh;
+  my %clustered_genes ;
+
+  my %raw_clusters;
+  my $current_cluster_name;
+  while(<$fh>)
+  {
+    my $line = $_;
+    if($line =~ /^>(.+)$/)
+    {
+      $current_cluster_name = $1;
+    }
+    
+    #>Cluster 5
+    #0	4201aa, >6630_4#9_00008... *
+    #1	4201aa, >6631_1#23_00379... at 100.00%    
+        
+    if($line =~ /[\d]+\t[\w]+, >(.+)\.\.\. (.+)$/)
+    {
+      my $gene_name = $1;
+      my $identity  = $2;
+      
+      if($identity eq '*')
+      {
+        $raw_clusters{$current_cluster_name}{representative_gene_name} = $gene_name;
+      }
+      else
+      {
+        push(@{$raw_clusters{$current_cluster_name}{gene_names}}, $gene_name);
+      }
+    }
+  }
+  
+  # iterate over the raw clusters and convert to a simple hash
+  for my $cluster_name (keys %raw_clusters)
+  {
+    $clustered_genes{$raw_clusters{$cluster_name}{representative_gene_name}} = $raw_clusters{$cluster_name}{gene_names};
+  }
+  
+  return \%clustered_genes;
+}
+
+1;
\ No newline at end of file
diff --git a/lib/Bio/Roary/CombinedProteome.pm b/lib/Bio/Roary/CombinedProteome.pm
new file mode 100644
index 0000000..89e1a66
--- /dev/null
+++ b/lib/Bio/Roary/CombinedProteome.pm
@@ -0,0 +1,51 @@
+package Bio::Roary::CombinedProteome;
+
+# ABSTRACT: Take in multiple FASTA sequences containing proteomes and concat them together and output a FASTA file, filtering out more than 5% X's
+
+=head1 SYNOPSIS
+
+Take in multiple FASTA sequences containing proteomes and concat them together and output a FASTA file, filtering out more than 5% X's
+   use Bio::Roary::CombinedProteome;
+   
+   my $obj = Bio::Roary::CombinedProteome->new(
+     proteome_files   => ['abc.fa','efg.fa'],
+     output_filename   => 'example_output.fa',
+     maximum_percentage_of_unknowns => 5.0,
+   );
+   $obj->create_combined_proteome_file;
+
+=cut
+
+use Moose;
+use Bio::Roary::Exceptions;
+
+has 'proteome_files'                 => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'output_filename'                => ( is => 'ro', isa => 'Str',      default  => 'combined_output.fa' );
+
+sub BUILD {
+    my ($self) = @_;
+
+    for my $filename ( @{ $self->proteome_files } ) {
+        Bio::Roary::Exceptions::FileNotFound->throw( error => 'Cant open file: ' . $filename )
+          unless ( -e $filename );
+    }
+}
+
+
+
+sub create_combined_proteome_file {
+    my ($self) = @_;
+    
+    unlink($self->output_filename);
+    for my $filename (@{$self->proteome_files })
+    {
+       system(join(' ', ("cat", $filename, ">>", $self->output_filename)));
+    }
+
+    1;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/CommandLine/AssemblyStatistics.pm b/lib/Bio/Roary/CommandLine/AssemblyStatistics.pm
new file mode 100644
index 0000000..5562c65
--- /dev/null
+++ b/lib/Bio/Roary/CommandLine/AssemblyStatistics.pm
@@ -0,0 +1,134 @@
+undef $VERSION;
+package Bio::Roary::CommandLine::AssemblyStatistics;
+
+# ABSTRACT: Given a spreadsheet of gene presence and absense calculate some statistics
+
+=head1 SYNOPSIS
+
+Given a spreadsheet of gene presence and absense calculate some statistics
+
+=cut
+
+use Moose;
+use Getopt::Long qw(GetOptionsFromArray);
+use Bio::Roary::AssemblyStatistics;
+extends 'Bio::Roary::CommandLine::Common';
+
+has 'args'            => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'script_name'     => ( is => 'ro', isa => 'Str',      required => 1 );
+has 'help'            => ( is => 'rw', isa => 'Bool',     default  => 0 );
+has 'spreadsheet'     => ( is => 'rw', isa => 'Str',      default  => 'gene_presence_absence.csv' );
+has 'job_runner'      => ( is => 'rw', isa => 'Str',      default  => 'Local' );
+has 'cpus'            => ( is => 'rw', isa => 'Int',      default  => 1 );
+has 'output_filename' => ( is => 'rw', isa => 'Str',      default  => 'assembly_statistics.csv' );
+has 'version'         => ( is => 'rw', isa => 'Bool',     default  => 0 );
+has 'core_definition' => ( is => 'rw', isa => 'Num',      default  => 0.99 );
+has 'verbose'         => ( is => 'rw', isa => 'Bool',     default  => 0 );
+
+
+sub BUILD {
+    my ($self) = @_;
+
+    my (
+        $spreadsheet,
+		$job_runner,       
+		$cpus,
+		$output_filename,
+		$version,
+		$core_definition,
+		$verbose,
+		$help
+    );
+
+    GetOptionsFromArray(
+        $self->args,
+        'o|output_filename=s'       => \$output_filename,
+        'j|job_runner=s'            => \$job_runner,
+        'p|processors=i'            => \$cpus,
+        'cd|core_definition=f'      => \$core_definition,
+        'v|verbose'                 => \$verbose,
+		'w|version'                 => \$version,
+        'h|help'                    => \$help,
+    );
+
+	$self->version($version)                   if ( defined($version) );
+    if ( $self->version ) {
+        die($self->_version());
+    }
+
+    if ( defined($verbose) ) {
+        $self->verbose($verbose);
+        $self->logger->level(10000);
+    }
+
+    $self->help($help) if ( defined($help) );
+	( !$self->help ) or die $self->usage_text;
+    if(@{$self->args} == 0)
+    {
+        $self->logger->error("Error: You need to provide a gene_presence_absence.csv spreadsheet");
+        die $self->usage_text;
+    }
+	$self->output_filename($output_filename)   if ( defined($output_filename) );
+    $self->job_runner($job_runner)             if ( defined($job_runner) );
+    $self->cpus($cpus)                         if ( defined($cpus) );
+
+    if ( $self->cpus > 1 ) {
+        $self->job_runner('Parallel');
+    }
+
+    $self->core_definition( $core_definition / 100 ) if ( defined($core_definition) );
+
+    for my $filename ( @{ $self->args } ) {
+        if ( !-e $filename ) {
+            $self->logger->error("Error: Cant access file $filename");
+            die $self->usage_text;
+        }
+    }
+    $self->spreadsheet( $self->args->[0] );
+
+}
+
+sub _version
+{
+	my ($self) = @_;
+	if(defined($Bio::Roary::CommandLine::AssemblyStatistics::VERSION))
+	{
+	   return $Bio::Roary::CommandLine::AssemblyStatistics::VERSION ."\n";
+    }
+	else
+	{
+	   return "x.y.z\n";
+	}
+}
+
+sub run {
+    my ($self) = @_;
+
+    my $obj = Bio::Roary::AssemblyStatistics->new( spreadsheet => $self->spreadsheet, logger => $self->logger );
+	$obj->create_summary_output;
+}
+
+sub usage_text {
+    my ($self) = @_;
+
+    return <<USAGE;
+Usage: pan_genome_assembly_statistics [options] gene_presence_absence.csv
+Take in a gene presence and absence spreadsheet and output some statistics
+  
+Options: -p INT    number of threads [1]	
+         -o STR    output filename [assembly_statistics.csv]
+         -cd FLOAT percentage of isolates a gene must be in to be core [99]
+         -v        verbose output to STDOUT
+         -w        print version and exit
+         -h        this help message
+		 
+Example: Run with defaults
+         pan_genome_assembly_statistics gene_presence_absence.csv
+
+For further information see: http://sanger-pathogens.github.io/Roary/
+USAGE
+}
+
+__PACKAGE__->meta->make_immutable;
+no Moose;
+1;
diff --git a/lib/Bio/Roary/CommandLine/Common.pm b/lib/Bio/Roary/CommandLine/Common.pm
new file mode 100644
index 0000000..9db65f5
--- /dev/null
+++ b/lib/Bio/Roary/CommandLine/Common.pm
@@ -0,0 +1,57 @@
+package Bio::Roary::CommandLine::Common;
+# ABSTRACT: Common command line settings
+
+=head1 SYNOPSIS
+
+Common command line settings
+
+   extends 'Bio::Roary::CommandLine::Common';
+
+=cut
+
+use Moose;
+use FindBin;
+use Log::Log4perl qw(:easy);
+
+has 'logger'                  => ( is => 'ro', lazy => 1, builder => '_build_logger');
+has 'version'                 => ( is => 'rw', isa => 'Bool', default => 0 );
+
+sub _build_logger
+{
+    my ($self) = @_;
+    Log::Log4perl->easy_init(level => $ERROR);
+    my $logger = get_logger();
+    return $logger;
+}
+
+
+sub run {
+	my ($self) = @_;
+}
+
+sub usage_text {
+    my ($self) = @_;
+	return "Usage text";
+}
+
+sub _version {
+    my ($self) = @_;
+    return "x.y.z\n";
+}
+
+
+# add our included binaries to the END of the PATH
+before 'run' => sub {
+	my ($self) = @_;
+	my $OPSYS = $^O;
+	my $BINDIR = "$FindBin::RealBin/../binaries/$OPSYS";
+
+    for my $dir ($BINDIR, $FindBin::RealBin) {
+      if (-d $dir) {
+        $ENV{PATH} .= ":$dir";
+       }
+  }
+};
+
+no Moose;
+1;
\ No newline at end of file
diff --git a/lib/Bio/Roary/CommandLine/CreatePanGenome.pm b/lib/Bio/Roary/CommandLine/CreatePanGenome.pm
new file mode 100644
index 0000000..4fe32d8
--- /dev/null
+++ b/lib/Bio/Roary/CommandLine/CreatePanGenome.pm
@@ -0,0 +1,72 @@
+undef $VERSION;
+package Bio::Roary::CommandLine::CreatePanGenome;
+
+# ABSTRACT: Take in FASTA files of proteins and cluster them
+
+=head1 SYNOPSIS
+
+Take in FASTA files of proteins and cluster them
+
+=cut
+
+use Moose;
+use Getopt::Long qw(GetOptionsFromArray);
+use Bio::Roary;
+use Bio::Roary::PrepareInputFiles;
+use Bio::Roary::QC::Report;
+extends 'Bio::Roary::CommandLine::Roary';
+
+has 'job_runner'                  => ( is => 'rw', isa => 'Str',  default => 'Local' );
+has 'output_multifasta_files'     => ( is => 'rw', isa => 'Bool', default => 1 );
+has 'dont_create_rplots'          => ( is => 'rw', isa => 'Bool', default => 0 );
+has 'core_definition'             => ( is => 'rw', isa => 'Num',  default => 0.99 );
+has 'run_qc'                      => ( is => 'rw', isa => 'Bool', default => 1 );
+
+sub usage_text {
+    my ($self) = @_;
+
+    return <<USAGE;
+Usage:   create_pan_genome [options] *.gff
+Build a pan genome with WTSI defaults.
+
+Options: -p INT    number of threads [1]
+         -o STR    clusters output filename [clustered_proteins]
+		 -f STR    output directory [.]
+         -e        create a multiFASTA alignment of core genes
+         -n        fast core gene alignement with MAFFT, use with -e
+         -i        minimum percentage identity for blastp [95]
+         -cd FLOAT percentage of isolates a gene must be in to be core [99]
+         -z        dont delete intermediate files
+         -t INT    translation table [11]
+         -v        verbose output to STDOUT
+         -y        add gene inference information to spreadsheet, doesnt work with -e
+         -g INT    maximum number of clusters [50000]
+         -qc       generate QC report with Kraken
+         -k STR    path to Kraken database for QC, use with -qc
+         -w        print version and exit
+		 -a        check dependancies and print versions
+         -h        this help message
+
+Example: Quickly generate a core gene alignment using 16 threads
+
+         bsub.py --threads 16 10 log create_pan_genome -e --mafft -p 16  *.gff
+         
+Example: Allow Roary to bsub the jobs to LSF - you cant bsub this command itself
+
+         create_pan_genome -j LSF -e --mafft -p 16  *.gff
+		 
+Example: Create a tree and visualise with iCANDY
+
+		 annotationfind –t file –i file_of_lanes -symlink .
+		 bsub.py --threads 16 10 log create_pan_genome -e --mafft -p 16 *.gff
+		 ~sh16/scripts/run_RAxML.py -a core_gene_alignment.aln -q normal  -M 8 -n 8 -V AVX -o tree
+		 bsub.py 10 log ~sh16/scripts/iCANDY.py -t RAxML_bipartitions.tree -q taxa -l 1 -E 30 -o accessory.pdf -M -L left -p A1 -g 90 accessory.tab accessory.header.embl
+
+For further info see: http://mediawiki.internal.sanger.ac.uk/index.php/Pathogen_Informatics_Pan_Genome_Pipeline
+
+USAGE
+}
+
+__PACKAGE__->meta->make_immutable;
+no Moose;
+1;
diff --git a/lib/Bio/Roary/CommandLine/ExtractProteomeFromGff.pm b/lib/Bio/Roary/CommandLine/ExtractProteomeFromGff.pm
new file mode 100644
index 0000000..e80eab5
--- /dev/null
+++ b/lib/Bio/Roary/CommandLine/ExtractProteomeFromGff.pm
@@ -0,0 +1,121 @@
+undef $VERSION;
+package Bio::Roary::CommandLine::ExtractProteomeFromGff;
+
+# ABSTRACT: Take in GFF files and output the proteome
+
+=head1 SYNOPSIS
+
+Take in a GFF file and output the proteome
+
+=cut
+
+use Moose;
+use Getopt::Long qw(GetOptionsFromArray);
+use Bio::Roary::ExtractProteomeFromGFF;
+use File::Basename;
+extends 'Bio::Roary::CommandLine::Common';
+
+has 'args'        => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'script_name' => ( is => 'ro', isa => 'Str',      required => 1 );
+has 'help'        => ( is => 'rw', isa => 'Bool',     default  => 0 );
+
+has 'gff_files'             => ( is => 'rw', isa => 'ArrayRef' );
+has 'output_suffix'         => ( is => 'rw', isa => 'Str',  default => 'proteome.faa' );
+has '_error_message'        => ( is => 'rw', isa => 'Str' );
+has 'apply_unknowns_filter' => ( is => 'rw', isa => 'Bool', default => 1 );
+has 'translation_table'     => ( is => 'rw', isa => 'Int',  default => 11 );
+has 'verbose'               => ( is => 'rw', isa => 'Bool', default => 0 );
+has 'output_directory'      => ( is => 'rw', isa => 'Str',  default => '.' );
+
+sub BUILD {
+    my ($self) = @_;
+
+    my ( $gff_files, $output_suffix, $apply_unknowns_filter, $help, $translation_table, $verbose, $cmd_version, $output_directory  );
+
+    GetOptionsFromArray(
+        $self->args,
+        'o|output_suffix=s'       => \$output_suffix,
+        'f|apply_unknowns_filter=i' => \$apply_unknowns_filter,
+        't|translation_table=i'   => \$translation_table,
+		'v|verbose'               => \$verbose,
+        'd|output_directory=s'    => \$output_directory,
+		'w|version'               => \$cmd_version,
+        'h|help'                  => \$help,
+    );
+	
+    if ( defined($verbose) ) {
+        $self->verbose($verbose);
+        $self->logger->level(10000);
+    }
+	
+	$self->help($help) if(defined($help));
+	( !$self->help ) or die $self->usage_text;
+	
+    $self->version($cmd_version) if ( defined($cmd_version) );
+    if ( $self->version ) {
+        die($self->_version());
+    }
+
+    if ( @{ $self->args } == 0 ) {
+        $self->_error_message("Error: You need to provide a GFF file");
+    }
+
+    $self->output_suffix($output_suffix)                 if ( defined($output_suffix) ) ;
+    $self->apply_unknowns_filter($apply_unknowns_filter) if ( defined($apply_unknowns_filter) );
+    $self->translation_table($translation_table)         if ( defined($translation_table) );
+    $self->output_directory($output_directory)           if ( defined($output_directory) );
+
+    for my $filename ( @{ $self->args } ) {
+        if ( !-e $filename ) {
+            $self->_error_message("Error: Cant access file $filename");
+            last;
+        }
+    }
+    $self->gff_files( $self->args );
+
+}
+
+sub run {
+    my ($self) = @_;
+
+    if ( defined( $self->_error_message ) ) {
+        print $self->_error_message . "\n";
+        die $self->usage_text;
+    }
+
+    for my $gff_file ( @{ $self->gff_files } ) {
+        my ( $filename, $directories, $suffix ) = fileparse($gff_file);
+        my $obj = Bio::Roary::ExtractProteomeFromGFF->new(
+            gff_file              => $gff_file,
+            output_filename       => $filename . '.' . $self->output_suffix,
+            apply_unknowns_filter => $self->apply_unknowns_filter,
+            translation_table     => $self->translation_table,
+            output_directory      => $self->output_directory,
+        );
+        $obj->fasta_file();
+    }
+
+}
+
+sub usage_text {
+    my ($self) = @_;
+
+    return <<USAGE;
+Usage: extract_proteome_from_gff [options] *.gff
+Take in GFF files and create FASTA files of the protein sequences
+
+Options: -o STR    output suffix [proteome.faa]
+         -t INT    translation table [11]
+         -f        filter sequences with missing data
+         -v        verbose output to STDOUT
+         -d STR    output directory
+         -w        print version and exit
+         -h        this help message
+
+For further info see: http://sanger-pathogens.github.io/Roary/
+USAGE
+}
+
+__PACKAGE__->meta->make_immutable;
+no Moose;
+1;
diff --git a/lib/Bio/Roary/CommandLine/GeneAlignmentFromNucleotides.pm b/lib/Bio/Roary/CommandLine/GeneAlignmentFromNucleotides.pm
new file mode 100644
index 0000000..f929629
--- /dev/null
+++ b/lib/Bio/Roary/CommandLine/GeneAlignmentFromNucleotides.pm
@@ -0,0 +1,134 @@
+undef $VERSION;
+package Bio::Roary::CommandLine::GeneAlignmentFromNucleotides;
+
+# ABSTRACT: Take in a multifasta file of nucleotides, convert to proteins and align with PRANK
+
+=head1 SYNOPSIS
+
+Take in a multifasta file of nucleotides, convert to proteins and align with PRANK or MAFFT, reverse translate back to nucleotides
+
+=cut
+
+use Moose;
+use Getopt::Long qw(GetOptionsFromArray);
+use File::Copy;
+use Bio::Roary::AnnotateGroups;
+use Bio::Roary::External::Prank;
+use Bio::Roary::Output::GroupsMultifastaProtein;
+use Bio::Roary::SortFasta;
+use Bio::Roary::External::Mafft;
+extends 'Bio::Roary::CommandLine::Common';
+
+has 'args'        => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'script_name' => ( is => 'ro', isa => 'Str',      required => 1 );
+has 'help'        => ( is => 'rw', isa => 'Bool',     default  => 0 );
+
+has 'nucleotide_fasta_files' => ( is => 'rw', isa => 'ArrayRef' );
+has '_error_message'         => ( is => 'rw', isa => 'Str' );
+has 'verbose'                => ( is => 'rw', isa => 'Bool', default => 0 );
+has 'mafft'                  => ( is => 'rw', isa => 'Bool', default => 0 );
+has '_min_similarity'        => ( is => 'rw', isa => 'Num',  default  => 0.98 );
+
+sub BUILD {
+    my ($self) = @_;
+
+    my ( $nucleotide_fasta_files, $help, $verbose,$mafft, );
+
+    GetOptionsFromArray(
+        $self->args,
+        'v|verbose' => \$verbose,
+        'n|mafft'   => \$mafft,
+        'h|help'    => \$help,
+    );
+
+    if ( defined($verbose) ) {
+        $self->verbose($verbose);
+        $self->logger->level(10000);
+    }
+    $self->mafft($mafft) if (defined($mafft));
+    $self->help($help) if ( defined($help) );
+    if ( @{ $self->args } == 0 ) {
+        $self->_error_message("Error: You need to provide at least 1 FASTA file");
+    }
+
+    for my $filename ( @{ $self->args } ) {
+        if ( !-e $filename ) {
+            $self->_error_message("Error: Cant access file $filename");
+            last;
+        }
+    }
+    $self->nucleotide_fasta_files( $self->args );
+}
+
+sub run {
+    my ($self) = @_;
+
+    ( !$self->help ) or die $self->usage_text;
+    if ( defined( $self->_error_message ) ) {
+        print $self->_error_message . "\n";
+        die $self->usage_text;
+    }
+
+    for my $fasta_file ( @{ $self->nucleotide_fasta_files } ) {
+
+        my $sort_fasta_before = Bio::Roary::SortFasta->new(
+            input_filename         => $fasta_file,
+            make_multiple_of_three => 1,
+        );
+        $sort_fasta_before->sort_fasta->replace_input_with_output_file;
+
+        if ( $sort_fasta_before->sequences_unaligned == 1  || $sort_fasta_before->sequences_unaligned == 0 && $sort_fasta_before->similarity <= $self->_min_similarity) {
+
+            if ( $self->mafft == 1 ) {
+                my $mafft_obj = Bio::Roary::External::Mafft->new(
+                    input_filename  => $fasta_file,
+                    output_filename => $fasta_file . '.aln',
+                    job_runner      => 'Local',
+                    logger          => $self->logger,
+                    verbose         => $self->verbose
+                );
+                $mafft_obj->run();
+            }
+            else {
+
+                my $prank_obj = Bio::Roary::External::Prank->new(
+                    input_filename  => $fasta_file,
+                    output_filename => $fasta_file . '.aln',
+                    job_runner      => 'Local',
+                    logger          => $self->logger,
+                    verbose         => $self->verbose
+                );
+                $prank_obj->run();
+            }
+        }
+        else {
+            move( $fasta_file, $fasta_file . '.aln' );
+        }
+
+        my $sort_fasta_after_revtrans = Bio::Roary::SortFasta->new(
+            input_filename      => $fasta_file . '.aln',
+            remove_nnn_from_end => 1,
+        );
+        $sort_fasta_after_revtrans->sort_fasta->replace_input_with_output_file;
+        unlink($fasta_file);
+    }
+}
+
+sub usage_text {
+    my ($self) = @_;
+
+    return <<USAGE;
+Usage: protein_alignment_from_nucleotides [options] *.fa
+Take in multi-FASTA files of nucleotides and align each file with PRANK or MAFFT
+
+Options: -n        nucleotide alignment with MAFFT
+         -v        verbose output to STDOUT
+         -h        this help message
+
+For further info see: http://sanger-pathogens.github.io/Roary/
+USAGE
+}
+
+__PACKAGE__->meta->make_immutable;
+no Moose;
+1;
diff --git a/lib/Bio/Roary/CommandLine/IterativeCdhit.pm b/lib/Bio/Roary/CommandLine/IterativeCdhit.pm
new file mode 100644
index 0000000..eef7966
--- /dev/null
+++ b/lib/Bio/Roary/CommandLine/IterativeCdhit.pm
@@ -0,0 +1,119 @@
+undef $VERSION;
+package Bio::Roary::CommandLine::IterativeCdhit;
+
+# ABSTRACT: Iteratively run cdhit
+
+=head1 SYNOPSIS
+
+Iteratively run cdhit
+
+=cut
+
+use Moose;
+use Getopt::Long qw(GetOptionsFromArray);
+use Bio::Roary::IterativeCdhit;
+extends 'Bio::Roary::CommandLine::Common';
+
+has 'args'        => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'script_name' => ( is => 'ro', isa => 'Str',      required => 1 );
+has 'help'        => ( is => 'rw', isa => 'Bool',     default  => 0 );
+has '_error_message' => ( is => 'rw', isa => 'Str' );
+
+has 'output_cd_hit_filename'          => ( is => 'rw', isa => 'Str', default => '_clustered' );
+has 'output_combined_filename'        => ( is => 'rw', isa => 'Str', default => '_combined_files' );
+has 'number_of_input_files'           => ( is => 'rw', isa => 'Int', default => 1 );
+has 'output_filtered_clustered_fasta' => ( is => 'rw', isa => 'Str', default => '_clustered_filtered.fa' );
+
+has 'lower_bound_percentage'          => ( is => 'rw', isa => 'Num', default => 0.98 );
+has 'upper_bound_percentage'          => ( is => 'rw', isa => 'Num', default => 0.99 );
+has 'step_size_percentage'            => ( is => 'rw', isa => 'Num', default => 0.005 );
+has 'cpus'                            => ( is => 'rw', isa => 'Int', default => 1 );
+has 'verbose'                         => ( is => 'rw', isa => 'Bool', default => 0 );
+
+
+sub BUILD {
+    my ($self) = @_;
+
+    my ( $output_cd_hit_filename,$cpus,$lower_bound_percentage,$upper_bound_percentage,$step_size_percentage, $output_combined_filename, $number_of_input_files, $output_filtered_clustered_fasta,$verbose, 
+        $help );
+
+    GetOptionsFromArray(
+        $self->args,
+        'c|output_cd_hit_filename=s'          => \$output_cd_hit_filename,
+        'm|output_combined_filename=s'        => \$output_combined_filename,
+        'n|number_of_input_files=i'           => \$number_of_input_files,
+        'f|output_filtered_clustered_fasta=s' => \$output_filtered_clustered_fasta,
+        'l|lower_bound_percentage=s'          => \$lower_bound_percentage,
+        'u|upper_bound_percentage=s'          => \$upper_bound_percentage,
+        's|step_size_percentage=s'            => \$step_size_percentage,
+        'p|cpus=i'                              => \$cpus,
+		'v|verbose'                           => \$verbose,
+        'h|help'                              => \$help,
+    );
+
+    if ( defined($verbose) ) {
+        $self->verbose($verbose);
+        $self->logger->level(10000);
+    }
+    $self->help($help) if(defined($help));
+    $self->lower_bound_percentage($lower_bound_percentage/100) if ( defined($lower_bound_percentage) );
+    $self->upper_bound_percentage($upper_bound_percentage/100) if ( defined($upper_bound_percentage) );
+    $self->step_size_percentage($step_size_percentage/100)     if ( defined($step_size_percentage) );
+    $self->output_cd_hit_filename($output_cd_hit_filename)     if ( defined($output_cd_hit_filename) );
+    $self->output_combined_filename($output_combined_filename) if ( defined($output_combined_filename) );
+    $self->number_of_input_files($number_of_input_files)       if ( defined($number_of_input_files) );
+    $self->cpus($cpus)                                         if ( defined($cpus) );
+    $self->output_filtered_clustered_fasta($output_filtered_clustered_fasta)
+      if ( defined($output_filtered_clustered_fasta) );
+
+}
+
+sub run {
+    my ($self) = @_;
+
+    ( !$self->help ) or die $self->usage_text;
+    if ( defined( $self->_error_message ) ) {
+        print $self->_error_message . "\n";
+        die $self->usage_text;
+    }
+
+    my $obj = Bio::Roary::IterativeCdhit->new(
+        output_cd_hit_filename          => $self->output_cd_hit_filename,
+        output_combined_filename        => $self->output_combined_filename,
+        number_of_input_files           => $self->number_of_input_files,
+        output_filtered_clustered_fasta => $self->output_filtered_clustered_fasta,
+        lower_bound_percentage          => $self->lower_bound_percentage,
+        upper_bound_percentage          => $self->upper_bound_percentage,
+        step_size_percentage            => $self->step_size_percentage,
+        cpus                            => $self->cpus,
+		logger                          => $self->logger
+        
+    );
+    $obj->run;
+}
+
+sub usage_text {
+    my ($self) = @_;
+
+    return <<USAGE;
+Usage: iterative_cdhit [options]
+Iteratively cluster a set of proteins with CD-hit, lower the threshold each time and extracting core genes (1 per isolate) to another file, and remove them from the input proteins file.
+
+Options: -p INT   number of threads [1]
+         -m STR   output filename for combined proteins [_combined_files]
+         -n INT   number of isolates [1]
+         -c STR   cd-hit output filename [_clustered]
+         -f STR   output filename for filtered sequences [_clustered_filtered.fa]
+         -l FLOAT lower bound percentage identity [98.0]
+         -u FLOAT upper bound percentage identity [99.0]
+         -s FLOAT step size for percentage identity [0.5]
+         -v       verbose output to STDOUT
+         -h       this help message
+
+For further info see: http://sanger-pathogens.github.io/Roary/
+USAGE
+}
+
+__PACKAGE__->meta->make_immutable;
+no Moose;
+1;
diff --git a/lib/Bio/Roary/CommandLine/ParallelAllAgainstAllBlastp.pm b/lib/Bio/Roary/CommandLine/ParallelAllAgainstAllBlastp.pm
new file mode 100644
index 0000000..6a5de8b
--- /dev/null
+++ b/lib/Bio/Roary/CommandLine/ParallelAllAgainstAllBlastp.pm
@@ -0,0 +1,141 @@
+undef $VERSION;
+package Bio::Roary::CommandLine::ParallelAllAgainstAllBlastp;
+
+# ABSTRACT: Take in a FASTA file of proteins and blast against itself
+
+=head1 SYNOPSIS
+
+Take in a FASTA file of proteins and blast against itself
+
+=cut
+
+use Moose;
+use Getopt::Long qw(GetOptionsFromArray);
+use Bio::Roary::ParallelAllAgainstAllBlast;
+use Bio::Roary::CombinedProteome;
+use Bio::Roary::PrepareInputFiles;
+extends 'Bio::Roary::CommandLine::Common';
+
+has 'args'        => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'script_name' => ( is => 'ro', isa => 'Str',      required => 1 );
+has 'help'        => ( is => 'rw', isa => 'Bool',     default  => 0 );
+
+has 'fasta_files'       => ( is => 'rw', isa => 'ArrayRef' );
+has 'output_filename'   => ( is => 'rw', isa => 'Str', default => 'blast_results' );
+has 'job_runner'        => ( is => 'rw', isa => 'Str', default => 'Local' );
+has 'cpus'                        => ( is => 'rw', isa => 'Int',  default => 1 );
+has 'makeblastdb_exec'  => ( is => 'rw', isa => 'Str', default => 'makeblastdb' );
+has 'blastp_exec'       => ( is => 'rw', isa => 'Str', default => 'blastp' );
+has 'verbose'           => ( is => 'rw', isa => 'Bool', default => 0 );
+
+has '_error_message' => ( is => 'rw', isa => 'Str' );
+
+sub BUILD {
+    my ($self) = @_;
+
+    my ( $fasta_files, $output_filename, $job_runner, $makeblastdb_exec, $blastp_exec, $help, $cpus, $verbose, );
+
+    GetOptionsFromArray(
+        $self->args,
+        'o|output=s'           => \$output_filename,
+        'j|job_runner=s'       => \$job_runner,
+        'm|makeblastdb_exec=s' => \$makeblastdb_exec,
+        'b|blastp_exec=s'      => \$blastp_exec,
+        'p|processors=i'       => \$cpus,
+		'v|verbose'            => \$verbose,
+        'h|help'               => \$help,
+    );
+    
+    if ( @{ $self->args } == 0 ) {
+        $self->_error_message("Error: You need to provide a FASTA file");
+    }
+
+    if ( defined($verbose) ) {
+        $self->verbose($verbose);
+        $self->logger->level(10000);
+    }
+    $self->help($help) if(defined($help));
+    $self->output_filename($output_filename)   if ( defined($output_filename) );
+    $self->makeblastdb_exec($makeblastdb_exec) if ( defined($makeblastdb_exec) );
+    $self->blastp_exec($blastp_exec)           if ( defined($blastp_exec) );
+    $self->job_runner($job_runner)             if ( defined($job_runner) );
+    $self->cpus($cpus)                         if ( defined($cpus) );
+    if ( $self->cpus > 1 ) {
+        $self->job_runner('Parallel');
+    }
+
+    for my $filename ( @{ $self->args } ) {
+        if ( !-e $filename ) {
+            $self->_error_message("Error: Cant access file $filename");
+            last;
+        }
+    }
+    $self->fasta_files( $self->args );
+
+}
+
+sub run {
+    my ($self) = @_;
+
+    ( !$self->help ) or die $self->usage_text;
+    if ( defined( $self->_error_message ) ) {
+        print $self->_error_message . "\n";
+        die $self->usage_text;
+    }
+    
+    my $prepare_input_files = Bio::Roary::PrepareInputFiles->new(
+      input_files   => $self->fasta_files,
+    );
+    
+    my $output_combined_filename;
+    if(@{$self->fasta_files} > 1)
+    {
+      $output_combined_filename = 'combined_files.fa';
+	  $self->logger->info("Combining protein files");
+      my $combine_fasta_files = Bio::Roary::CombinedProteome->new(
+        proteome_files                 => $prepare_input_files->fasta_files,
+        output_filename                => $output_combined_filename,
+        maximum_percentage_of_unknowns => 5.0,
+        apply_unknowns_filter          => 0
+      );
+      $combine_fasta_files->create_combined_proteome_file;
+    }
+    else
+    {
+      $output_combined_filename = $self->fasta_files->[0];
+    }
+
+    $self->logger->info("Beginning all against all blast");
+    my $blast_obj = Bio::Roary::ParallelAllAgainstAllBlast->new(
+        fasta_file       => $output_combined_filename,
+        blast_results_file_name  => $self->output_filename,
+        job_runner       => $self->job_runner,
+        cpus             => $self->cpus,
+        makeblastdb_exec => $self->makeblastdb_exec,
+        blastp_exec      => $self->blastp_exec,
+		logger           => $self->logger
+    );
+    $blast_obj->run();
+}
+
+sub usage_text {
+    my ($self) = @_;
+
+    return <<USAGE;
+Usage: parallel_all_against_all_blastp [options] file.faa
+Take in a FASTA file of proteins and blast against itself
+
+Options: -p INT    number of threads [1]
+         -o STR    output filename for blast results [blast_results]
+         -m STR    makeblastdb executable [makeblastdb]
+         -b STR    blastp executable [blastp]
+         -v        verbose output to STDOUT
+         -h        this help message
+
+For further info see: http://sanger-pathogens.github.io/Roary/
+USAGE
+}
+
+__PACKAGE__->meta->make_immutable;
+no Moose;
+1;
diff --git a/lib/Bio/Roary/CommandLine/QueryRoary.pm b/lib/Bio/Roary/CommandLine/QueryRoary.pm
new file mode 100644
index 0000000..ee11ee7
--- /dev/null
+++ b/lib/Bio/Roary/CommandLine/QueryRoary.pm
@@ -0,0 +1,253 @@
+undef $VERSION;
+package Bio::Roary::CommandLine::QueryRoary;
+
+# ABSTRACT: Take in a groups file and the protein fasta files and output selected data
+
+=head1 SYNOPSIS
+
+Take in a groups file and the protein fasta files and output selected data
+
+=cut
+
+use Moose;
+use Getopt::Long qw(GetOptionsFromArray);
+use Bio::Roary::AnalyseGroups;
+use Bio::Roary::Output::GroupsMultifastas;
+use Bio::Roary::Output::QueryGroups;
+use Bio::Roary::PrepareInputFiles;
+use Bio::Roary::Output::DifferenceBetweenSets;
+use Bio::Roary::AnnotateGroups;
+use Bio::Roary::GroupStatistics;
+use Bio::Roary::OrderGenes;
+extends 'Bio::Roary::CommandLine::Common';
+
+has 'args'        => ( is => 'rw', isa => 'ArrayRef', required => 1 );
+has 'script_name' => ( is => 'ro', isa => 'Str',      required => 1 );
+has 'help'        => ( is => 'rw', isa => 'Bool',     default  => 0 );
+
+has 'input_files'     => ( is => 'rw', isa => 'ArrayRef' );
+has 'groups_filename' => ( is => 'rw', isa => 'Str', default => 'clustered_proteins');
+has 'group_names'     => ( is => 'rw', isa => 'ArrayRef' );
+has 'input_set_one'   => ( is => 'rw', isa => 'ArrayRef' );
+has 'input_set_two'   => ( is => 'rw', isa => 'ArrayRef' );
+has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'pan_genome_results' );
+has 'action'          => ( is => 'rw', isa => 'Str', default => 'union' );
+has 'core_definition' => ( is => 'rw', isa => 'Num', default => 0.99 );
+has 'verbose'         => ( is => 'rw', isa => 'Bool', default => 0 );
+
+has '_error_message' => ( is => 'rw', isa => 'Str' );
+
+sub BUILD {
+    my ($self) = @_;
+
+    my ( $input_files, $output_filename, $groups_filename, @group_names, @input_set_one, @input_set_two, $action, $core_definition,$verbose,  $help );
+
+    GetOptionsFromArray(
+        $self->args,
+        'o|output=s'          => \$output_filename,
+        'g|groups_filename=s' => \$groups_filename,
+        'n|group_names=s'     => \@group_names,
+        'a|action=s'          => \$action,
+        'i|input_set_one=s'   => \@input_set_one,
+        't|input_set_two=s'   => \@input_set_two,
+        'c|core_definition=f' => \$core_definition,
+		'v|verbose'           => \$verbose,
+        'h|help'              => \$help,
+    );
+
+    if ( defined($verbose) ) {
+        $self->verbose($verbose);
+        $self->logger->level(10000);
+    }
+    $self->help($help) if(defined($help));
+    
+    $self->output_filename($output_filename) if ( defined($output_filename) );
+    $self->action($action)                   if ( defined($action) );
+	$self->core_definition( $core_definition / 100 ) if ( defined($core_definition) );
+    if ( defined($groups_filename) && ( -e $groups_filename ) ) {
+        $self->groups_filename($groups_filename);
+    }
+    
+    if(! (-e $self->groups_filename)) {
+        $self->_error_message("Error: Cant access the groups file: ".$self->groups_filename);
+    }
+
+    @group_names = split( /,/, join( ',', @group_names ) );
+    $self->group_names( \@group_names ) if (@group_names);
+    
+    @input_set_one = split( /,/, join( ',', @input_set_one ) );
+    $self->input_set_one( \@input_set_one ) if (@input_set_one);
+    
+    @input_set_two = split( /,/, join( ',', @input_set_two ) );
+    $self->input_set_two( \@input_set_two ) if (@input_set_two);
+    
+    if(defined($self->input_set_one) && defined($self->input_set_two) )
+    {
+        my @all_input_files = (@{ $self->input_set_one },@{ $self->input_set_two });
+        $self->args(\@all_input_files);
+    }
+
+
+    if ( !defined($self->input_set_two) &&  @{ $self->args } == 0) {
+        $self->_error_message("Error: You need to provide a FASTA file");
+    }
+    
+    for my $filename ( @{ $self->args } ) {
+        if ( !-e $filename ) {
+            $self->_error_message("Error: Cant access file $filename");
+            last;
+        }
+    }
+    $self->input_files( $self->args );
+
+}
+
+sub run {
+    my ($self) = @_;
+
+    ( !$self->help ) or die $self->usage_text;
+    if ( defined( $self->_error_message ) ) {
+        print $self->_error_message . "\n";
+        die $self->usage_text;
+    }
+    
+    my $prepare_input_files = Bio::Roary::PrepareInputFiles->new(
+      input_files   => $self->input_files,
+    );
+
+    my $analyse_groups_obj = Bio::Roary::AnalyseGroups->new(
+        fasta_files     => $prepare_input_files->fasta_files,
+        groups_filename => $self->groups_filename,
+    );
+
+	if ( $self->action eq 'union' ) {
+        my $query_groups = Bio::Roary::Output::QueryGroups->new(
+            analyse_groups        => $analyse_groups_obj,
+            output_union_filename => $self->output_filename,
+            input_filenames       => $prepare_input_files->fasta_files
+        );
+        $query_groups->groups_union();
+    }
+    elsif ( $self->action eq 'intersection' ) {
+        my $query_groups = Bio::Roary::Output::QueryGroups->new(
+            analyse_groups               => $analyse_groups_obj,
+            output_intersection_filename => $self->output_filename, 
+            input_filenames => $prepare_input_files->fasta_files,
+            core_definition => $self->core_definition
+        );
+        $query_groups->groups_intersection();
+    }
+    elsif ( $self->action eq 'complement' ) {
+        my $query_groups = Bio::Roary::Output::QueryGroups->new(
+            analyse_groups             => $analyse_groups_obj,
+            output_complement_filename => $self->output_filename, 
+            input_filenames => $prepare_input_files->fasta_files,
+            core_definition => $self->core_definition
+        );
+        $query_groups->groups_complement();
+    }
+    elsif ( $self->action eq 'gene_multifasta' && defined( $self->group_names ) ) {
+        my $group_multi_fastas = Bio::Roary::Output::GroupsMultifastas->new(
+            group_names          => $self->group_names,
+            analyse_groups       => $analyse_groups_obj,
+            output_filename_base => $self->output_filename
+        );
+        $group_multi_fastas->create_files();
+    }
+    elsif($self->action eq 'difference' && defined($self->input_set_one) && defined($self->input_set_two))
+    {
+      my $difference_between_sets = Bio::Roary::Output::DifferenceBetweenSets->new(
+          analyse_groups       => $analyse_groups_obj,
+          input_filenames_sets => [ 
+            $prepare_input_files->lookup_fasta_files_from_unknown_input_files($self->input_set_one),  
+            $prepare_input_files->lookup_fasta_files_from_unknown_input_files($self->input_set_two) 
+          ],
+        );
+      $difference_between_sets->groups_set_one_unique();
+      $difference_between_sets->groups_set_two_unique();
+      $difference_between_sets->groups_in_common();
+      
+      for my $differences_group_filename(($difference_between_sets->groups_set_one_unique_filename,$difference_between_sets->groups_set_two_unique_filename,$difference_between_sets->groups_in_common_filename))
+      {
+        $self->create_spreadsheets($differences_group_filename, $prepare_input_files->fasta_files, $self->input_files);
+      }
+
+    }
+    else {
+        print "Nothing done\n";
+    }
+}
+
+sub create_spreadsheets
+{
+      my ($self, $groups_file, $fasta_files, $gff_files) = @_;
+
+      my $analyse_groups_obj = Bio::Roary::AnalyseGroups->new(
+          fasta_files     => $fasta_files,
+          groups_filename => $groups_file,
+      );
+      
+      my $annotate_groups = Bio::Roary::AnnotateGroups->new(
+          gff_files       => $gff_files,
+          output_filename => $groups_file.'_reannotated',
+          groups_filename => $groups_file,
+      );
+      $annotate_groups->reannotate;
+    
+      my $order_genes_obj = Bio::Roary::OrderGenes->new(
+        analyse_groups_obj => $analyse_groups_obj,
+        gff_files          => $gff_files,
+		core_definition    => $self->core_definition,
+		pan_graph_filename => 'set_difference_core_accessory_graph.dot',
+		accessory_graph_filename  => 'set_difference_accessory_graph.dot',
+      );
+      
+      my $group_statistics = Bio::Roary::GroupStatistics->new(
+          output_filename     => $groups_file.'_statistics.csv',
+          annotate_groups_obj => $annotate_groups,
+          analyse_groups_obj  => $analyse_groups_obj,
+          groups_to_contigs   => $order_genes_obj->groups_to_contigs
+      );
+      $group_statistics->create_spreadsheet;
+}
+
+sub usage_text {
+    my ($self) = @_;
+
+    return <<USAGE;
+Usage: query_pan_genome [options] *.gff
+Perform set operations on the pan genome to see the gene differences between groups of isolates.
+
+Options: -g STR    groups filename [clustered_proteins]
+         -a STR    action (union/intersection/complement/gene_multifasta/difference) [union]
+         -c FLOAT  percentage of isolates a gene must be in to be core [99]
+         -o STR    output filename [pan_genome_results]
+         -n STR    comma separated list of gene names for use with gene_multifasta action
+         -i STR    comma separated list of filenames, comparison set one
+         -t STR    comma separated list of filenames, comparison set two
+         -v        verbose output to STDOUT
+         -h        this help message
+ 
+Examples: 
+Union of genes found in isolates
+         query_pan_genome -a union *.gff
+         
+Intersection of genes found in isolates (core genes)
+         query_pan_genome -a intersection *.gff
+         
+Complement of genes found in isolates (accessory genes)
+         query_pan_genome -a complement *.gff
+
+Extract the sequence of each gene listed and create multi-FASTA files
+         query_pan_genome -a gene_multifasta -n gryA,mecA,abc *.gff
+
+Gene differences between sets of isolates
+         query_pan_genome -a difference --input_set_one 1.gff,2.gff --input_set_two 3.gff,4.gff,5.gff
+
+For further info see: http://sanger-pathogens.github.io/Roary/
+USAGE
+}
+
+__PACKAGE__->meta->make_immutable;
+no Moose;
+1;
diff --git a/lib/Bio/Roary/CommandLine/Roary.pm b/lib/Bio/Roary/CommandLine/Roary.pm
new file mode 100644
index 0000000..103c0b9
--- /dev/null
+++ b/lib/Bio/Roary/CommandLine/Roary.pm
@@ -0,0 +1,356 @@
+undef $VERSION;
+
+package Bio::Roary::CommandLine::Roary;
+
+# ABSTRACT: Take in FASTA files of proteins and cluster them
+
+=head1 SYNOPSIS
+
+Take in FASTA files of proteins and cluster them
+
+=cut
+
+use Moose;
+use Getopt::Long qw(GetOptionsFromArray);
+use Bio::Roary;
+use Bio::Roary::PrepareInputFiles;
+use Bio::Roary::QC::Report;
+use Bio::Roary::ReformatInputGFFs;
+use Bio::Roary::External::CheckTools;
+use File::Which;
+use File::Path qw(make_path);
+use Cwd qw(abs_path getcwd);
+use File::Temp;
+extends 'Bio::Roary::CommandLine::Common';
+
+has 'args'        => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'script_name' => ( is => 'ro', isa => 'Str',      required => 1 );
+has 'help'        => ( is => 'rw', isa => 'Bool',     default  => 0 );
+
+has 'fasta_files' => ( is => 'rw', isa => 'ArrayRef', default => sub { [] } );
+has 'output_filename'         => ( is => 'rw', isa => 'Str',  default => 'clustered_proteins' );
+has 'output_directory'        => ( is => 'rw', isa => 'Str',  default => '.' );
+has '_original_directory'     => ( is => 'rw', isa => 'Str',  default => '.' );
+has 'job_runner'              => ( is => 'rw', isa => 'Str',  default => 'Local' );
+has 'makeblastdb_exec'        => ( is => 'rw', isa => 'Str',  default => 'makeblastdb' );
+has 'blastp_exec'             => ( is => 'rw', isa => 'Str',  default => 'blastp' );
+has 'mcxdeblast_exec'         => ( is => 'rw', isa => 'Str',  default => 'mcxdeblast' );
+has 'mcl_exec'                => ( is => 'rw', isa => 'Str',  default => 'mcl' );
+has 'apply_unknowns_filter'   => ( is => 'rw', isa => 'Bool', default => 1 );
+has 'cpus'                    => ( is => 'rw', isa => 'Int',  default => 1 );
+has 'output_multifasta_files' => ( is => 'rw', isa => 'Bool', default => 0 );
+has 'perc_identity'           => ( is => 'rw', isa => 'Num',  default => 95 );
+has 'dont_delete_files'       => ( is => 'rw', isa => 'Bool', default => 0 );
+has 'dont_create_rplots'      => ( is => 'rw', isa => 'Bool', default => 1 );
+has 'dont_run_qc'             => ( is => 'rw', isa => 'Bool', default => 0 );
+has 'dont_split_groups'       => ( is => 'rw', isa => 'Bool', default => 0 );
+has 'verbose_stats'           => ( is => 'rw', isa => 'Bool', default => 0 );
+has 'translation_table'       => ( is => 'rw', isa => 'Int',  default => 11 );
+has 'mafft'                   => ( is => 'rw', isa => 'Bool', default => 0 );
+has 'group_limit'             => ( is => 'rw', isa => 'Num',  default => 50000 );
+has 'core_definition'         => ( is => 'rw', isa => 'Num',  default => 0.99 );
+has 'verbose'                 => ( is => 'rw', isa => 'Bool', default => 0 );
+has 'kraken_db' => ( is => 'rw', isa => 'Str',  default => '/lustre/scratch108/pathogen/pathpipe/kraken/minikraken_20140330/' );
+has 'run_qc'    => ( is => 'rw', isa => 'Bool', default => 0 );
+has '_working_directory' => ( is => 'rw', isa => 'File::Temp::Dir', lazy => 1, builder => '_build__working_directory' );
+
+sub _build__working_directory
+{
+	my ($self) = @_;
+	return File::Temp->newdir( DIR => getcwd, CLEANUP => 1 );
+}
+
+sub BUILD {
+    my ($self) = @_;
+
+    my (
+        $fasta_files,           $verbose,           $create_rplots,           $group_limit,   $dont_run_qc,
+        $max_threads,           $dont_delete_files, $dont_split_groups,       $perc_identity, $output_filename,
+        $job_runner,            $makeblastdb_exec,  $mcxdeblast_exec,         $mcl_exec,      $blastp_exec,
+        $apply_unknowns_filter, $cpus,              $output_multifasta_files, $verbose_stats, $translation_table,
+        $run_qc,                $core_definition,   $help,                    $kraken_db,     $cmd_version,
+        $mafft,                 $output_directory,  $check_dependancies,
+    );
+
+    GetOptionsFromArray(
+        $self->args,
+        'o|output=s'                => \$output_filename,
+        'f|output_directory=s'      => \$output_directory,
+        'j|job_runner=s'            => \$job_runner,
+        'm|makeblastdb_exec=s'      => \$makeblastdb_exec,
+        'b|blastp_exec=s'           => \$blastp_exec,
+        'd|mcxdeblast_exec=s'       => \$mcxdeblast_exec,
+        'c|mcl_exec=s'              => \$mcl_exec,
+        'p|processors=i'            => \$cpus,
+        'u|apply_unknowns_filter=i' => \$apply_unknowns_filter,
+        'e|output_multifasta_files' => \$output_multifasta_files,
+        'i|perc_identity=i'         => \$perc_identity,
+        'z|dont_delete_files'       => \$dont_delete_files,
+        's|dont_split_groups'       => \$dont_split_groups,
+        'r|create_rplots'           => \$create_rplots,
+        'y|verbose_stats'           => \$verbose_stats,
+        't|translation_table=i'     => \$translation_table,
+        'g|group_limit=i'           => \$group_limit,
+        'qc|run_qc'                 => \$run_qc,
+        'x|dont_run_qc'             => \$dont_run_qc,
+        'cd|core_definition=f'      => \$core_definition,
+        'v|verbose'                 => \$verbose,
+        'n|mafft'                   => \$mafft,
+        'k|kraken_db=s'             => \$kraken_db,
+        'w|version'                 => \$cmd_version,
+        'a|check_dependancies'      => \$check_dependancies,
+        'h|help'                    => \$help,
+    );
+
+    $self->version($cmd_version) if ( defined($cmd_version) );
+    if ( $self->version ) {
+		print $self->_version() ;
+        return;
+    }
+
+    print "\nPlease cite Roary if you use any of the results it produces:
+    Andrew J. Page, Carla A. Cummins, Martin Hunt, Vanessa K. Wong, Sandra Reuter, Matthew T. G. Holden, Maria Fookes, Daniel Falush, Jacqueline A. Keane, Julian Parkhill,
+	\"Roary: Rapid large-scale prokaryote pan genome analysis\", Bioinformatics, 2015 Nov 15;31(22):3691-3693
+    doi: http://doi.org/10.1093/bioinformatics/btv421
+	Pubmed: 26198102\n\n";
+
+    $self->help($help) if ( defined($help) );
+    if( $self->help ) 
+	{
+		print $self->usage_text;
+		return;
+	}
+
+    if ($check_dependancies) {
+        my $check_tools = Bio::Roary::External::CheckTools->new();
+        $check_tools->check_all_tools;
+        $self->logger->error( "Roary version " . $self->_version() );
+    }
+
+    if ( defined($verbose) ) {
+        $self->verbose($verbose);
+        $self->logger->level(10000);
+    }
+
+    if ( @{ $self->args } < 2 ) {
+        $self->logger->error("Error: You need to provide at least 2 files to build a pan genome");
+        die $self->usage_text;
+    }
+    $self->output_filename($output_filename)   if ( defined($output_filename) );
+    $self->job_runner($job_runner)             if ( defined($job_runner) );
+    $self->makeblastdb_exec($makeblastdb_exec) if ( defined($makeblastdb_exec) );
+    $self->blastp_exec($blastp_exec)           if ( defined($blastp_exec) );
+    $self->mcxdeblast_exec($mcxdeblast_exec)   if ( defined($mcxdeblast_exec) );
+    $self->mcl_exec($mcl_exec)                 if ( defined($mcl_exec) );
+    $self->cpus($cpus)                         if ( defined($cpus) );
+
+    if ( defined($perc_identity) ) {
+        $self->perc_identity($perc_identity);
+        if ( $perc_identity < 50 ) {
+            $self->logger->error(
+"The percentage identity is too low. Either somethings wrong with your data, like contamination, or your doing something that the software isnt designed to support."
+            );
+        }
+    }
+
+    $self->mafft($mafft) if ( defined($mafft) );
+    $self->apply_unknowns_filter($apply_unknowns_filter)
+      if ( defined($apply_unknowns_filter) );
+
+    if ( defined($output_multifasta_files) ) {
+        if ( which('prank') ) {
+            $self->output_multifasta_files($output_multifasta_files);
+        }
+        else {
+
+            if ( which('mafft') ) {
+                $self->output_multifasta_files($output_multifasta_files);
+                $self->mafft(1);
+                $self->logger->warn("PRANK not found in your PATH so using MAFFT instead to generate multiFASTA alignments.");
+            }
+            else {
+                $self->logger->warn("PRANK (or MAFFT) not found in your PATH so cannot generate multiFASTA alignments, skipping for now.");
+            }
+        }
+    }
+    $self->dont_delete_files($dont_delete_files) if ( defined($dont_delete_files) );
+    $self->dont_split_groups($dont_split_groups) if ( defined($dont_split_groups) );
+    $self->dont_create_rplots(0)                 if ( defined($create_rplots) );
+    $self->verbose_stats($verbose_stats)         if ( defined $verbose_stats );
+    $self->translation_table($translation_table) if ( defined($translation_table) );
+    $self->group_limit($group_limit)             if ( defined($group_limit) );
+    $self->kraken_db($kraken_db)                 if ( defined($kraken_db) );
+    $self->output_directory($output_directory)   if ( defined($output_directory) );
+
+    if ( defined $verbose_stats && defined($output_multifasta_files) ) {
+        $self->verbose_stats(0);
+        $self->logger->warn("The verbose stats spreadsheet is not compatible with the core gene alignement so disabling verbose_stats");
+    }
+
+    if ( defined($run_qc) ) {
+        if ( which('kraken') && which('kraken-report') ) {
+            $self->run_qc($run_qc);
+        }
+        else {
+            $self->logger->warn("kraken or kraken-report not found in your PATH so cannot run QC, skipping for now.");
+        }
+    }
+
+    if ( $self->cpus > 1 ) {
+        $self->job_runner('Parallel');
+    }
+
+    $self->core_definition( $core_definition / 100 ) if ( defined($core_definition) );
+
+    for my $filename ( @{ $self->args } ) {
+        if ( !-e $filename ) {
+            $self->logger->error("Error: Cant access file $filename");
+            die $self->usage_text;
+        }
+        push( @{ $self->fasta_files }, abs_path($filename) );
+    }
+
+    $self->_working_directory( File::Temp->newdir( DIR => getcwd, CLEANUP => 0 ) ) if ( $self->dont_delete_files );
+}
+
+sub _setup_output_directory {
+    my ($self) = @_;
+    return if ( $self->output_directory eq '.' || $self->output_directory eq '' );
+
+    if ( -e $self->output_directory || -d $self->output_directory ) {
+        $self->logger->warn("Output directory name exists already so adding a timestamp to the end");
+        $self->output_directory( $self->output_directory() . '_' . time() );
+        if ( -e $self->output_directory || -d $self->output_directory ) {
+            die("Output directory name with time stamp exist so giving up");
+        }
+    }
+    make_path( $self->output_directory, { error => \my $err } );
+    if (@$err) {
+        for my $diag (@$err) {
+            my ( $file, $message ) = %$diag;
+            die("Error creating output directory $message");
+        }
+    }
+    $self->logger->info( "Output directory created: " . $self->output_directory );
+
+    $self->_original_directory( getcwd() );
+    chdir( $self->output_directory );
+    return $self;
+}
+
+sub run {
+    my ($self) = @_;
+	
+	return if($self->version || $self->help);
+
+    $self->_setup_output_directory;
+
+    $self->logger->info("Fixing input GFF files");
+    my $reformat_input_files = Bio::Roary::ReformatInputGFFs->new( gff_files => $self->fasta_files, logger => $self->logger );
+    $reformat_input_files->fix_duplicate_gene_ids();
+    if ( @{ $reformat_input_files->fixed_gff_files } == 0 ) {
+        die(
+"All input files have been excluded from analysis. Please check you have valid GFF files, with annotation and a FASTA sequence at the end. Better still, reannotate your FASTA file with PROKKA."
+        );
+    }
+    $self->fasta_files( $reformat_input_files->fixed_gff_files );
+
+    $self->logger->info("Extracting proteins from GFF files");
+    my $prepare_input_files = Bio::Roary::PrepareInputFiles->new(
+        input_files           => $self->fasta_files,
+        job_runner            => $self->job_runner,
+        apply_unknowns_filter => $self->apply_unknowns_filter,
+        cpus                  => $self->cpus,
+        translation_table     => $self->translation_table,
+        verbose               => $self->verbose,
+        working_directory     => $self->_working_directory,
+
+    );
+
+    if ( $self->run_qc ) {
+        $self->logger->info("Running Kraken on each input assembly");
+        my $qc_input_files = Bio::Roary::QC::Report->new(
+            input_files => $self->fasta_files,
+            job_runner  => $self->job_runner,
+            cpus        => $self->cpus,
+            verbose     => $self->verbose,
+            kraken_db   => $self->kraken_db
+        );
+        $qc_input_files->report;
+    }
+
+    my $pan_genome_obj = Bio::Roary->new(
+        input_files             => $self->fasta_files,
+        fasta_files             => $prepare_input_files->fasta_files,
+        output_filename         => $self->output_filename,
+        job_runner              => $self->job_runner,
+        cpus                    => $self->cpus,
+        makeblastdb_exec        => $self->makeblastdb_exec,
+        blastp_exec             => $self->blastp_exec,
+        output_multifasta_files => $self->output_multifasta_files,
+        perc_identity           => $self->perc_identity,
+        dont_delete_files       => $self->dont_delete_files,
+        dont_create_rplots      => $self->dont_create_rplots,
+        dont_split_groups       => $self->dont_split_groups,
+        verbose_stats           => $self->verbose_stats,
+        translation_table       => $self->translation_table,
+        group_limit             => $self->group_limit,
+        core_definition         => $self->core_definition,
+        verbose                 => $self->verbose,
+        mafft                   => $self->mafft,
+    );
+    $pan_genome_obj->run();
+
+    chdir( $self->_original_directory );
+}
+
+sub _version {
+    my ($self) = @_;
+    if ( defined($Bio::Roary::CommandLine::Roary::VERSION) ) {
+        return $Bio::Roary::CommandLine::Roary::VERSION . "\n";
+    }
+    else {
+        return "x.y.z\n";
+    }
+}
+
+sub usage_text {
+    my ($self) = @_;
+
+    return <<USAGE;
+Usage:   roary [options] *.gff
+
+Options: -p INT    number of threads [1]
+         -o STR    clusters output filename [clustered_proteins]
+         -f STR    output directory [.]
+         -e        create a multiFASTA alignment of core genes using PRANK
+         -n        fast core gene alignment with MAFFT, use with -e
+         -i        minimum percentage identity for blastp [95]
+         -cd FLOAT percentage of isolates a gene must be in to be core [99]
+         -qc       generate QC report with Kraken
+         -k STR    path to Kraken database for QC, use with -qc
+         -a        check dependancies and print versions
+         -b STR    blastp executable [blastp]
+         -c STR    mcl executable [mcl]
+         -d STR    mcxdeblast executable [mcxdeblast]
+         -g INT    maximum number of clusters [50000]
+         -m STR    makeblastdb executable [makeblastdb]
+         -r        create R plots, requires R and ggplot2
+         -s        dont split paralogs
+         -t INT    translation table [11]
+         -z        dont delete intermediate files
+         -v        verbose output to STDOUT
+         -w        print version and exit
+         -y        add gene inference information to spreadsheet, doesnt work with -e
+         -h        this help message
+
+Example: Quickly generate a core gene alignment using 8 threads
+         roary -e --mafft -p 8 *.gff
+
+For further info see: http://sanger-pathogens.github.io/Roary/
+USAGE
+}
+
+__PACKAGE__->meta->make_immutable;
+no Moose;
+1;
diff --git a/lib/Bio/Roary/CommandLine/RoaryCoreAlignment.pm b/lib/Bio/Roary/CommandLine/RoaryCoreAlignment.pm
new file mode 100644
index 0000000..dc52a60
--- /dev/null
+++ b/lib/Bio/Roary/CommandLine/RoaryCoreAlignment.pm
@@ -0,0 +1,143 @@
+undef $VERSION;
+package Bio::Roary::CommandLine::RoaryCoreAlignment;
+
+# ABSTRACT: Take in the group statistics spreadsheet and the location of the gene multifasta files and create a core alignment.
+
+=head1 SYNOPSIS
+
+Take in the group statistics spreadsheet and the location of the gene multifasta files and create a core alignment.
+
+=cut
+
+use Moose;
+use Getopt::Long qw(GetOptionsFromArray);
+use Cwd 'abs_path';
+use File::Path qw(remove_tree);
+use Bio::Roary::ExtractCoreGenesFromSpreadsheet;
+use Bio::Roary::LookupGeneFiles;
+use Bio::Roary::MergeMultifastaAlignments;
+extends 'Bio::Roary::CommandLine::Common';
+
+has 'args'        => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'script_name' => ( is => 'ro', isa => 'Str',      required => 1 );
+has 'help'        => ( is => 'rw', isa => 'Bool',     default  => 0 );
+
+has 'multifasta_base_directory' => ( is => 'rw', isa => 'Str', default => 'pan_genome_sequences' );
+has 'spreadsheet_filename'      => ( is => 'rw', isa => 'Str', default => 'gene_presence_absence.csv' );
+has 'output_filename'           => ( is => 'rw', isa => 'Str', default => 'core_gene_alignment.aln' );
+has 'core_definition'           => ( is => 'rw', isa => 'Num', default => 0.99 );
+has 'dont_delete_files'         => ( is => 'rw', isa => 'Bool', default => 0 );
+has '_error_message'            => ( is => 'rw', isa => 'Str' );
+has 'verbose'                   => ( is => 'rw', isa => 'Bool', default => 0 );
+
+sub BUILD {
+    my ($self) = @_;
+
+    my ( $multifasta_base_directory, $spreadsheet_filename, $output_filename, $core_definition,$verbose,  $help, $mafft, $dont_delete_files );
+
+    GetOptionsFromArray(
+        $self->args,
+        'm|multifasta_base_directory=s' => \$multifasta_base_directory,
+        's|spreadsheet_filename=s'      => \$spreadsheet_filename,
+        'o|output_filename=s'           => \$output_filename,
+        'cd|core_definition=f'          => \$core_definition,
+        'z|dont_delete_files'           => \$dont_delete_files,
+		'v|verbose'                     => \$verbose,
+        'h|help'                        => \$help,
+    );
+    
+    if ( defined($verbose) ) {
+        $self->verbose($verbose);
+        $self->logger->level(10000);
+    }
+    $self->help($help) if(defined($help));
+
+    if ( defined($multifasta_base_directory) && ( -d $multifasta_base_directory ) ) {
+        $self->multifasta_base_directory( abs_path($multifasta_base_directory));
+    }
+    if(! -d $self->multifasta_base_directory ) 
+    {
+        $self->_error_message("Error: Cant access the multifasta base directory: ".$self->multifasta_base_directory);
+    }
+    
+    if ( defined($spreadsheet_filename) && ( -e $spreadsheet_filename ) ) {
+        $self->spreadsheet_filename( abs_path($spreadsheet_filename));
+    }
+    if(! -e $self->spreadsheet_filename ) 
+    {
+        $self->_error_message("Error: Cant access the spreadsheet: ".$self->spreadsheet_filename);
+    }
+    $self->output_filename( $output_filename ) if ( defined($output_filename) );
+    if ( defined($core_definition) ) 
+	{
+		if($core_definition > 1)
+		{
+			$self->core_definition( $core_definition/100 );
+		}
+		else
+		{
+			$self->core_definition( $core_definition );
+		}
+	}
+    $self->dont_delete_files($dont_delete_files) if ( defined($dont_delete_files) );
+    
+}
+
+sub run {
+    my ($self) = @_;
+
+    ( !$self->help ) or die $self->usage_text;
+    if ( defined( $self->_error_message ) ) {
+        print $self->_error_message . "\n";
+        die $self->usage_text;
+    }
+
+	$self->logger->info("Extract core genes from spreadsheet");
+    my $core_genes_obj = Bio::Roary::ExtractCoreGenesFromSpreadsheet->new( 
+        spreadsheet     => $self->spreadsheet_filename,
+        core_definition => $self->core_definition
+    );
+	
+	$self->logger->info("Looking up genes in files");
+    my $gene_files = Bio::Roary::LookupGeneFiles->new(
+        multifasta_directory => $self->multifasta_base_directory,
+        ordered_genes        => $core_genes_obj->ordered_core_genes,
+      );
+	 
+	$self->logger->info("Merge multifasta alignments");
+    my $merge_alignments_obj = Bio::Roary::MergeMultifastaAlignments->new(
+	  sample_names          => $core_genes_obj->sample_names,
+      multifasta_files      => $gene_files->ordered_gene_files(),
+      output_filename       => $self->output_filename,
+	  sample_names_to_genes => $core_genes_obj->sample_names_to_genes
+    );
+    $merge_alignments_obj->merge_files;
+    
+    if($self->dont_delete_files == 0)
+    {
+      remove_tree('pan_genome_sequences');
+    }
+}
+
+sub usage_text {
+    my ($self) = @_;
+
+    return <<USAGE;
+Usage: pan_genome_core_alignment [options]
+Create an alignment of core genes from the spreadsheet and the directory of gene multi-FASTAs.
+
+Options: -o STR    output filename [core_gene_alignment.aln]
+         -cd FLOAT percentage of isolates a gene must be in to be core [99]
+         -m STR    directory containing gene multi-FASTAs [pan_genome_sequences]
+         -s STR    gene presence and absence spreadsheet [gene_presence_absence.csv]
+         -z        dont delete intermediate files
+         -v        verbose output to STDOUT
+         -h        this help message
+
+For further info see: http://sanger-pathogens.github.io/Roary/
+USAGE
+}
+
+__PACKAGE__->meta->make_immutable;
+no Moose;
+1;
diff --git a/lib/Bio/Roary/CommandLine/RoaryPostAnalysis.pm b/lib/Bio/Roary/CommandLine/RoaryPostAnalysis.pm
new file mode 100644
index 0000000..5206382
--- /dev/null
+++ b/lib/Bio/Roary/CommandLine/RoaryPostAnalysis.pm
@@ -0,0 +1,237 @@
+undef $VERSION;
+package Bio::Roary::CommandLine::RoaryPostAnalysis;
+
+# ABSTRACT: Perform the post analysis on the pan genome
+
+=head1 SYNOPSIS
+
+Perform the post analysis on the pan genome
+
+=cut
+
+use Moose;
+use Getopt::Long qw(GetOptionsFromArray);
+use Bio::Roary::PostAnalysis;
+use File::Find::Rule;
+use Bio::Roary::External::GeneAlignmentFromNucleotides;
+use File::Path qw(remove_tree);
+use Bio::Roary::External::Fasttree;
+extends 'Bio::Roary::CommandLine::Common';
+
+has 'args'                        => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'script_name'                 => ( is => 'ro', isa => 'Str',      required => 1 );
+has 'help'                        => ( is => 'rw', isa => 'Bool',     default  => 0 );
+has '_error_message'              => ( is => 'rw', isa => 'Str' );
+
+has 'fasta_files'                 => ( is => 'rw', isa => 'Str',  default  => '_fasta_files' );
+has 'input_files'                 => ( is => 'rw', isa => 'Str',  default  => '_gff_files');
+has 'output_filename'             => ( is => 'rw', isa => 'Str',  default  => 'clustered_proteins' );
+has 'output_pan_geneome_filename' => ( is => 'rw', isa => 'Str',  default  => 'pan_genome.fa' );
+has 'output_statistics_filename'  => ( is => 'rw', isa => 'Str',  default  => 'gene_presence_absence.csv' );
+has 'output_multifasta_files'     => ( is => 'rw', isa => 'Bool', default  => 0 );
+has 'clusters_filename'           => ( is => 'rw', isa => 'Str',  default  => '_clustered.clstr' );
+has 'job_runner'                  => ( is => 'rw', isa => 'Str',  default  => 'Local' );
+has 'cpus'                        => ( is => 'rw', isa => 'Int',  default => 1 );
+has 'dont_delete_files'           => ( is => 'rw', isa => 'Bool', default  => 0 );
+has 'dont_create_rplots'          => ( is => 'rw', isa => 'Bool', default  => 0 );
+has 'dont_split_groups'           => ( is => 'rw', isa => 'Bool', default  => 0 );
+has 'verbose_stats'               => ( is => 'rw', isa => 'Bool', default  => 0 );
+has 'translation_table'           => ( is => 'rw', isa => 'Int',  default => 11 );
+has 'group_limit'                 => ( is => 'rw', isa => 'Num',  default => 50000 );
+has 'core_definition'             => ( is => 'rw', isa => 'Num',  default => 0.99 );
+has 'verbose'                     => ( is => 'rw', isa => 'Bool', default => 0 );
+has 'mafft'                       => ( is => 'rw', isa => 'Bool', default => 0 );
+
+sub BUILD {
+    my ($self) = @_;
+
+    my ( 
+      $output_filename, $dont_create_rplots, $dont_delete_files, $dont_split_groups, $output_pan_geneome_filename, 
+      $job_runner, $output_statistics_filename, $output_multifasta_files, $clusters_filename, $core_definition,
+      $fasta_files, $input_files, $verbose_stats, $translation_table, $help, $cpus,$group_limit,$verbose,$mafft
+    );
+
+
+    GetOptionsFromArray(
+        $self->args,
+        'o|output=s'                => \$output_filename,
+        'j|job_runner=s'            => \$job_runner,
+        'm|output_multifasta_files' => \$output_multifasta_files,
+        'p=s'                       => \$output_pan_geneome_filename,
+        's=s'                       => \$output_statistics_filename,
+        'c=s'                       => \$clusters_filename,
+        'f=s'                       => \$fasta_files,
+        'i=s'                       => \$input_files,
+        'a|dont_delete_files'       => \$dont_delete_files,
+        'b|dont_create_rplots'      => \$dont_create_rplots,
+        'd|dont_split_groups'       => \$dont_split_groups,
+        'e|verbose_stats'           => \$verbose_stats,
+        'z|processors=i'            => \$cpus,
+        't|translation_table=i'     => \$translation_table,
+        'g|group_limit=i'           => \$group_limit,
+        'cd|core_definition=f'      => \$core_definition,
+		'v|verbose'                 => \$verbose,
+		'n|mafft'                   => \$mafft,
+        'h|help'                    => \$help,
+    );
+    
+    $self->help($help) if(defined($help));
+    $self->job_runner($job_runner)                                   if ( defined($job_runner) );
+    $self->fasta_files($fasta_files)                                 if ( defined($fasta_files) );
+    $self->input_files($input_files)                                 if ( defined($input_files) );
+    $self->output_filename($output_filename)                         if ( defined($output_filename) );
+    $self->output_pan_geneome_filename($output_pan_geneome_filename) if ( defined($output_pan_geneome_filename) );
+    $self->output_statistics_filename($output_statistics_filename)   if ( defined($output_statistics_filename) );
+    $self->output_multifasta_files($output_multifasta_files)         if ( defined($output_multifasta_files) );
+    $self->clusters_filename($clusters_filename)                     if ( defined($clusters_filename) );
+    $self->dont_delete_files($dont_delete_files)                     if ( defined($dont_delete_files) );
+    $self->dont_create_rplots($dont_create_rplots)                   if ( defined($dont_create_rplots) );
+    $self->dont_split_groups($dont_split_groups)                     if ( defined($dont_split_groups) );
+    $self->verbose_stats($verbose_stats)                             if ( defined($verbose_stats));
+    $self->translation_table($translation_table)                     if ( defined($translation_table) );
+    $self->cpus($cpus)                                               if ( defined($cpus) );
+    $self->group_limit($group_limit)                                 if ( defined($group_limit) );
+    $self->core_definition( $core_definition/100 )                   if ( defined($core_definition) );
+	$self->mafft($mafft)                                             if ( defined($mafft) );
+    if ( defined($verbose) ) {
+        $self->verbose($verbose);
+        $self->logger->level(10000);
+    }
+}
+
+sub run {
+    my ($self) = @_;
+
+    ( !$self->help ) or die $self->usage_text;
+    if ( defined( $self->_error_message ) ) {
+        print $self->_error_message . "\n";
+        die $self->usage_text;
+    }
+
+    my $input_files = $self->_read_file_into_array($self->input_files);
+    my $obj = Bio::Roary::PostAnalysis->new(
+      fasta_files                     =>  $self->_read_file_into_array($self->fasta_files) ,
+      input_files                     =>  $input_files ,
+      output_filename                 =>  $self->output_filename            ,
+      output_pan_geneome_filename     =>  $self->output_pan_geneome_filename,
+      output_statistics_filename      =>  $self->output_statistics_filename ,
+      output_multifasta_files         =>  $self->output_multifasta_files    ,
+      clusters_filename               =>  $self->clusters_filename          ,
+      dont_delete_files               =>  $self->dont_delete_files,
+      dont_create_rplots              =>  $self->dont_create_rplots,
+      dont_split_groups               =>  $self->dont_split_groups,
+      verbose_stats                   =>  $self->verbose_stats,
+      group_limit                     =>  $self->group_limit,
+	  verbose                         =>  $self->verbose,
+	  cpus                            =>  $self->cpus,
+	  logger                          =>  $self->logger,
+	  core_definition                 =>  $self->core_definition,
+      );                                                             
+    $obj->run();
+	
+    if($self->dont_delete_files == 0)
+    {
+		unlink('_inflated_unsplit_mcl_groups');
+        remove_tree('split_groups');
+    }
+
+    if($self->output_multifasta_files == 1)
+    {
+	  print "Aligning each cluster\n" if($self->verbose);
+      
+      my $job_runner_to_use = $self->job_runner;
+      if($self->_is_lsf_job_runner_available && $self->job_runner eq "LSF")
+      {
+          $job_runner_to_use = $self->job_runner;
+      }
+      else
+      {
+          $job_runner_to_use = 'Local';
+      }
+      
+      my $output_gene_files = $self->_find_input_files;
+      my $seg = Bio::Roary::External::GeneAlignmentFromNucleotides->new(
+        fasta_files         => $output_gene_files,
+        job_runner          => $job_runner_to_use,
+        translation_table   => $self->translation_table,
+        core_definition     => $self->core_definition,
+        cpus                => $self->cpus,
+		verbose             => $self->verbose,
+		mafft               => $self->mafft,
+        dont_delete_files   => $self->dont_delete_files,
+        num_input_files     => $#{$input_files},
+      );
+      $seg->run();
+    }
+}
+
+sub _is_lsf_job_runner_available
+{
+    my ($self) = @_;
+    my $rc = eval "require Bio::Roary::JobRunner::LSF; 1;";
+    if(defined($rc) && $rc == 1)
+    {
+        return 1;
+    }
+    else
+    {
+        return 0;
+    }
+}
+
+sub _find_input_files
+{
+   my ($self) = @_;
+   my @files = File::Find::Rule->file()
+                               ->name( '*.fa' )
+                               ->in('pan_genome_sequences' );
+   return \@files;
+}
+
+sub _read_file_into_array
+{
+  my ($self, $filename) = @_;
+  open(my $in_fh, $filename);
+  
+  my @filenames;
+  while(<$in_fh>){
+    chomp;
+    my $line = $_;
+    push(@filenames, $line);
+  }
+  return \@filenames;
+}
+
+sub usage_text {
+    my ($self) = @_;
+
+    return <<USAGE;
+Usage: pan_genome_post_analysis [options]
+Perform the post analysis on the pan genome. This script is usally only called by another script.
+
+Options: -a        dont delete intermediate files
+         -b        dont create R plots
+         -c STR    clusters filename [_clustered.clstr]
+         -cd FLOAT percentage of isolates a gene must be in to be core [0.99]
+         -d        dont split groups
+         -e        add inference values to gene presence and absence spreadsheet
+         -f STR    file of protein filenames [_fasta_files]
+         -g INT    maximum number of clusters [50000]
+         -i STR    file of GFF filenames [_gff_files]
+         -m        core gene alignement with PRANK
+         -n        fast core gene alignement with MAFFT instead of PRANK
+         -o STR    clusters output filename [clustered_proteins]
+         -p STR    output pan genome filename [pan_genome.fa]
+         -s STR    output gene presence and absence filename [gene_presence_absence.csv]
+         -t INT    translation table [11]
+         -z INT    number of threads [1]
+         -v        verbose output to STDOUT
+         -h        this help message
+         
+For further info see: http://sanger-pathogens.github.io/Roary/
+USAGE
+}
+
+__PACKAGE__->meta->make_immutable;
+no Moose;
+1;
diff --git a/lib/Bio/Roary/CommandLine/RoaryReorderSpreadsheet.pm b/lib/Bio/Roary/CommandLine/RoaryReorderSpreadsheet.pm
new file mode 100644
index 0000000..6785a8c
--- /dev/null
+++ b/lib/Bio/Roary/CommandLine/RoaryReorderSpreadsheet.pm
@@ -0,0 +1,100 @@
+undef $VERSION;
+package Bio::Roary::CommandLine::RoaryReorderSpreadsheet;
+
+# ABSTRACT: Take in a tree and a spreadsheet and output a reordered spreadsheet
+
+=head1 SYNOPSIS
+
+Take in a tree and a spreadsheet and output a reordered spreadsheet
+
+=cut
+
+use Moose;
+use Getopt::Long qw(GetOptionsFromArray);
+use Bio::Roary::ReorderSpreadsheet;
+extends 'Bio::Roary::CommandLine::Common';
+
+has 'args'        => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'script_name' => ( is => 'ro', isa => 'Str',      required => 1 );
+has 'help'        => ( is => 'rw', isa => 'Bool',     default  => 0 );
+
+has 'tree_file'            => ( is => 'rw', isa => 'Str' );
+has 'spreadsheet_filename' => ( is => 'rw', isa => 'Str', default => 'gene_presence_absence.csv' );
+has 'output_filename'      => ( is => 'rw', isa => 'Str', default => 'reordered_spreadsheet.csv' );
+has 'tree_format'          => ( is => 'rw', isa => 'Str', default => 'newick' );
+has 'search_strategy'      => ( is => 'rw', isa => 'Str', default => 'depth' );
+has 'sortby'               => ( is => 'rw', isa => 'Str', default => 'height');
+has 'verbose'              => ( is => 'rw', isa => 'Bool', default => 0 );
+
+
+sub BUILD {
+    my ($self) = @_;
+
+    my ( $output_filename, $tree_file,$search_strategy, $sortby, $tree_format, $spreadsheet_filename,$verbose,  $help );
+
+    GetOptionsFromArray(
+        $self->args,
+        'o|output_filename=s'      => \$output_filename,
+        't|tree_file=s'            => \$tree_file,
+        'f|tree_format=s'          => \$tree_format,
+        's|spreadsheet_filename=s' => \$spreadsheet_filename,
+        'a|search_strategy=s'      => \$search_strategy,
+        'b|sortby=s'               => \$sortby,
+		'v|verbose'                => \$verbose,
+        'h|help'                   => \$help,
+    );
+
+    if ( defined($verbose) ) {
+        $self->verbose($verbose);
+        $self->logger->level(10000);
+    }
+    $self->help($help) if(defined($help));
+    $self->output_filename($output_filename)           if ( defined($output_filename) );
+    $self->tree_file($tree_file)                       if ( defined($tree_file) );
+    $self->tree_format($tree_format)                   if ( defined($tree_format) );
+    $self->spreadsheet_filename($spreadsheet_filename) if ( defined($spreadsheet_filename) );
+    $self->sortby($sortby)                             if ( defined($sortby) );
+    $self->search_strategy($search_strategy)           if ( defined($search_strategy) );
+}
+
+sub run {
+    my ($self) = @_;
+    ( defined($self->spreadsheet_filename) && defined($self->tree_file) && ( -e $self->spreadsheet_filename ) && ( -e $self->tree_file ) && ( !$self->help ) ) or die $self->usage_text;
+
+    ($self->sortby eq "height" || $self->sortby eq "creation" || $self->sortby eq "alpha" || $self->sortby eq "revalpha") or die $self->usage_text;
+    ($self->search_strategy eq "breadth" || $self->search_strategy eq "depth") or die $self->usage_text;
+
+    my $obj = Bio::Roary::ReorderSpreadsheet->new(
+        tree_file       => $self->tree_file,
+        spreadsheet     => $self->spreadsheet_filename,
+        output_filename => $self->output_filename,
+        sortby          => $self->sortby,
+        search_strategy => $self->search_strategy
+    );
+    $obj->reorder_spreadsheet();
+
+}
+
+sub usage_text {
+    my ($self) = @_;
+
+    return <<USAGE;
+Usage: pan_genome_reorder_spreadsheet [options] -t tree.newick
+Reorder the columns in the gene presence and absence spreadsheet against a phylogenetic tree.
+
+Options: -t STR tree filename []
+         -o STR output filename [reordered_spreadsheet.csv]
+         -f STR tree format (newick/nexus/nhx/svggraph/tabtree/lintree) [newick]
+         -s STR input gene presence and absence spreadsheet [gene_presence_absence.csv]
+         -a STR search strategy (depth/breadth) [depth]
+         -b STR sorting method (height/creation/alpha/revalpha) [height]
+         -v     verbose output to STDOUT
+         -h     this help message
+
+For further info see: http://sanger-pathogens.github.io/Roary/
+USAGE
+}
+
+__PACKAGE__->meta->make_immutable;
+no Moose;
+1;
diff --git a/lib/Bio/Roary/CommandLine/TransferAnnotationToGroups.pm b/lib/Bio/Roary/CommandLine/TransferAnnotationToGroups.pm
new file mode 100644
index 0000000..35736ec
--- /dev/null
+++ b/lib/Bio/Roary/CommandLine/TransferAnnotationToGroups.pm
@@ -0,0 +1,108 @@
+undef $VERSION;
+package Bio::Roary::CommandLine::TransferAnnotationToGroups;
+
+# ABSTRACT: Take in a groups file and a set of GFF files and transfer the consensus annotation
+
+=head1 SYNOPSIS
+
+Take in a groups file and a set of GFF files and transfer the consensus annotation
+
+=cut
+
+use Moose;
+use Getopt::Long qw(GetOptionsFromArray);
+use Bio::Roary::AnnotateGroups;
+extends 'Bio::Roary::CommandLine::Common';
+
+
+has 'args'        => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'script_name' => ( is => 'ro', isa => 'Str',      required => 1 );
+has 'help'        => ( is => 'rw', isa => 'Bool',     default  => 0 );
+
+has 'gff_files'       => ( is => 'rw', isa => 'ArrayRef' );
+has 'groups_filename' => ( is => 'rw', isa => 'Str' );
+has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'reannotated_groups' );
+has 'verbose'         => ( is => 'rw', isa => 'Bool', default => 0 );
+has '_error_message'  => ( is => 'rw', isa => 'Str' );
+
+sub BUILD {
+    my ($self) = @_;
+
+    my ( $gff_files, $output_filename, $groups_filename, @group_names, $action,$verbose,  $help );
+
+    GetOptionsFromArray(
+        $self->args,
+        'o|output=s'          => \$output_filename,
+        'g|groups_filename=s' => \$groups_filename,
+		'v|verbose'           => \$verbose,
+        'h|help'              => \$help,
+    );
+	
+    if ( defined($verbose) ) {
+        $self->verbose($verbose);
+        $self->logger->level(10000);
+    }
+	
+    $self->help($help) if(defined($help));
+    ( !$self->help ) or die $self->usage_text;
+    
+    if ( @{ $self->args } == 0 ) {
+        $self->_error_message("Error: You need to provide a FASTA file");
+    }
+
+    $self->output_filename($output_filename) if ( defined($output_filename) );
+    if ( defined($groups_filename) && ( -e $groups_filename ) ) {
+        $self->groups_filename($groups_filename);
+    }
+    else {
+        $self->_error_message("Error: Cant access the groups file");
+    }
+
+    for my $filename ( @{ $self->args } ) {
+        if ( !-e $filename ) {
+            $self->_error_message("Error: Cant access file $filename");
+            last;
+        }
+    }
+    $self->gff_files( $self->args );
+
+}
+
+sub run {
+    my ($self) = @_;
+
+    
+    if ( defined( $self->_error_message ) ) {
+        print $self->_error_message . "\n";
+        die $self->usage_text;
+    }
+
+  
+    my $obj = Bio::Roary::AnnotateGroups->new(
+      gff_files   => $self->gff_files,
+      output_filename   => $self->output_filename,
+      groups_filename => $self->groups_filename,
+    );
+    $obj->reannotate;
+
+}
+
+sub usage_text {
+    my ($self) = @_;
+
+    return <<USAGE;
+Usage: transfer_annotation_to_groups [options] *.gff
+Take in a groups file and the protein fasta files and output selected data
+
+Options: -o STR output filename [reannotated_groups]
+         -g STR clusters filename [clustered_proteins]
+         -v     verbose output to STDOUT
+         -h     this help message
+
+For further info see: http://sanger-pathogens.github.io/Roary/
+USAGE
+}
+
+__PACKAGE__->meta->make_immutable;
+no Moose;
+1;
diff --git a/lib/Bio/Roary/ContigsToGeneIDsFromGFF.pm b/lib/Bio/Roary/ContigsToGeneIDsFromGFF.pm
new file mode 100644
index 0000000..a2cc6f8
--- /dev/null
+++ b/lib/Bio/Roary/ContigsToGeneIDsFromGFF.pm
@@ -0,0 +1,145 @@
+package Bio::Roary::ContigsToGeneIDsFromGFF;
+
+# ABSTRACT: Parse a GFF and efficiently and extract ordered gene ids on each contig
+
+=head1 SYNOPSIS
+
+Parse a GFF and efficiently and extract ordered gene ids on each contig
+   use Bio::Roary::ContigsToGeneIDsFromGFF;
+   
+   my $obj = Bio::Roary::ContigsToGeneIDsFromGFF->new(
+     gff_file   => 'abc.gff'
+   );
+   $obj->contig_to_ids;
+
+=cut
+
+use Moose;
+use Bio::Tools::GFF;
+with 'Bio::Roary::ParseGFFAnnotationRole';
+
+has 'contig_to_ids' => ( is => 'rw', isa => 'HashRef', lazy => 1, builder => '_build_contig_to_ids');
+
+has 'overlapping_hypothetical_protein_ids' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_overlapping_hypothetical_protein_ids');
+has '_genes_annotation' => ( is => 'rw', isa => 'ArrayRef', default => sub{[]});
+
+has '_min_nucleotide_overlap_percentage' => ( is => 'ro', isa => 'Int', default => 10);
+
+# Manually parse the GFF file because the BioPerl module is too slow
+sub _build_contig_to_ids
+{
+  my ($self) = @_;
+  my %contigs_to_ids;
+  my @genes_annotation;
+  
+  open( my $fh, '-|', $self->_gff_fh_input_string ) or die "Couldnt open GFF file";
+  while(<$fh>)
+  {
+    chomp;
+    my $line = $_;   
+    my $id_name;
+    if($line =~/ID=["']?([^;"']+)["']?;?/i)
+    {
+      $id_name= $1;
+    }
+    else
+    {
+      next;
+    }
+    
+    my @annotation_elements = split(/\t/,$line);
+    # Map gene IDs to the contig
+    push(@{$contigs_to_ids{$annotation_elements[0]}}, $id_name);
+    
+    if($line =~/product=["']?([^;,"']+)[,"']?;?/i)
+    {
+	  my %gene_data; 
+      $gene_data{product} = $1;
+	  $gene_data{id_name} = $id_name;
+      if($line =~ /UniProtKB/ || $line =~ /RefSeq/ || $line =~ /protein motif/)
+      {
+        $gene_data{database_annotation_exists} = 1;
+      }
+	  else
+	  {
+	  	$gene_data{database_annotation_exists} = 0;
+	  }
+      
+      $gene_data{contig}  = $annotation_elements[0];
+      $gene_data{start}   = $annotation_elements[1];
+      $gene_data{end}     = $annotation_elements[2];
+	  push(@genes_annotation,\%gene_data);
+    }
+
+  }
+  close($fh);
+  
+  $self->_genes_annotation(\@genes_annotation);
+  return \%contigs_to_ids;
+}
+
+sub _build_overlapping_hypothetical_protein_ids
+{
+  my ($self) = @_;
+  $self->contig_to_ids;
+  
+  my %overlapping_protein_ids;
+  
+  #Checking to see if the current feature is hypotheitical and if the next one has annotation
+  for(my $i = 0; $i< (@{$self->_genes_annotation} -1) ; $i++ )
+  {
+	  my $current_feature = $self->_genes_annotation->[$i];
+	  my $next_feature = $self->_genes_annotation->[$i+1];
+	  
+	  next if($current_feature->{database_annotation_exists} == 1);
+	  next unless($current_feature->{product} =~ /hypothetical/i);
+	  next unless($next_feature->{database_annotation_exists} == 1);
+	  
+	  my $start_coord = $current_feature->{start} ;
+      my $end_coord   = $current_feature->{end} ;
+	  my $comparison_start_coord =$next_feature->{start} ;
+	  my $comparison_end_coord   =$next_feature->{end} ;
+      if($comparison_start_coord < $end_coord  && $comparison_end_coord > $start_coord )
+      {
+        my $percent_overlap = $self->_percent_overlap($start_coord, $end_coord , $comparison_start_coord,$comparison_end_coord);
+        if($percent_overlap >= $self->_min_nucleotide_overlap_percentage)
+        {
+          $overlapping_protein_ids{$current_feature->{id_name}}++;
+        }
+      }
+  }
+  
+  return \%overlapping_protein_ids;
+}
+
+sub _percent_overlap
+{
+   my ($self, $start_coord, $end_coord , $comparison_start_coord,$comparison_end_coord) = @_;
+   my $size_of_hypothetical_gene =  $end_coord - $start_coord;
+   
+   my $lower_bound = $start_coord;
+   if($comparison_start_coord > $start_coord)
+   {
+     $lower_bound = $comparison_start_coord;
+   }
+   my $upper_bound = $end_coord;
+   if($comparison_end_coord < $end_coord   )
+   {
+      $upper_bound = $comparison_end_coord;
+   }
+   return (($upper_bound-$lower_bound)*100) / $size_of_hypothetical_gene;
+}
+
+
+sub _build__awk_filter {
+    my ($self) = @_;
+    return
+        'awk \'BEGIN {FS="\t"};{ if ($3 ~/'
+      . $self->_tags_to_filter
+      . '/) print $1"\t"$4"\t"$5"\t"$9;}\' ';
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/Exceptions.pm b/lib/Bio/Roary/Exceptions.pm
new file mode 100644
index 0000000..0c2e55a
--- /dev/null
+++ b/lib/Bio/Roary/Exceptions.pm
@@ -0,0 +1,16 @@
+package Bio::Roary::Exceptions;
+# ABSTRACT: Exceptions for input data 
+
+=head1 SYNOPSIS
+
+Exceptions for input data 
+
+=cut
+
+use strict; use warnings;
+use Exception::Class (
+    'Bio::Roary::Exceptions::FileNotFound'   => { description => 'Couldnt open the file' },
+    'Bio::Roary::Exceptions::CouldntWriteToFile'   => { description => 'Couldnt open the file for writing' },
+);  
+
+1;
diff --git a/lib/Bio/Roary/External/Blastp.pm b/lib/Bio/Roary/External/Blastp.pm
new file mode 100644
index 0000000..483ab1e
--- /dev/null
+++ b/lib/Bio/Roary/External/Blastp.pm
@@ -0,0 +1,68 @@
+package Bio::Roary::External::Blastp;
+
+# ABSTRACT: Wrapper around NCBIs blastp command
+
+=head1 SYNOPSIS
+
+Wrapper around NCBIs blastp command
+
+   use Bio::Roary::External::Blastp;
+   
+   my $blast_database= Bio::Roary::External::Blastp->new(
+     fasta_file => 'contigs.fa',
+     blast_database => 'db',
+     exec       => 'blastp',
+     output_file => 'results.out'
+   );
+   
+   $blast_database->run();
+
+=method result_file
+
+Returns the path to the results file
+
+=cut
+
+use Moose;
+with 'Bio::Roary::JobRunner::Role';
+
+has 'fasta_file'        => ( is => 'ro', isa => 'Str', required => 1 );
+has 'blast_database'    => ( is => 'ro', isa => 'Str', required => 1 );
+has 'exec'              => ( is => 'ro', isa => 'Str', default  => 'blastp' );
+has '_evalue'           => ( is => 'ro', isa => 'Num', default  => 1E-6 );
+has '_num_threads'      => ( is => 'ro', isa => 'Int', default  => 1 );
+has '_max_target_seqs'  => ( is => 'ro', isa => 'Int', default  => 2000 );
+has '_logging'          => ( is => 'ro', isa => 'Str', default  => '2> /dev/null' );
+has 'output_file'       => ( is => 'ro', isa => 'Str', default  => 'results.out' );
+has 'perc_identity'     => ( is => 'ro', isa => 'Num', default  => 98 );
+
+sub _command_to_run {
+    my ($self) = @_;
+    return join(
+        " ",
+        (
+            $self->exec,  
+            '-query', $self->fasta_file, 
+            '-db', $self->blast_database, 
+            '-evalue', $self->_evalue,
+            '-num_threads', $self->_num_threads,
+            '-outfmt 6',
+            '-max_target_seqs', $self->_max_target_seqs,
+            ' | awk \'{ if ($3 > '.$self->perc_identity.') print $0;}\'',  
+            $self->_logging, '1> ', $self->output_file,
+        )
+        
+        
+    );
+}
+
+sub run {
+    my ($self) = @_;
+	$self->logger->info( "Running command: " . $self->_command_to_run() );
+    system( $self->_command_to_run );
+    1;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+1;
diff --git a/lib/Bio/Roary/External/Cdhit.pm b/lib/Bio/Roary/External/Cdhit.pm
new file mode 100644
index 0000000..716ee07
--- /dev/null
+++ b/lib/Bio/Roary/External/Cdhit.pm
@@ -0,0 +1,103 @@
+package Bio::Roary::External::Cdhit;
+
+# ABSTRACT: Wrapper to run cd-hit
+
+=head1 SYNOPSIS
+
+Wrapper to run cd-hit
+   use Bio::Roary::External::Cdhit;
+   
+   my $obj = Bio::Roary::External::Cdhit->new(
+     input_file   => 'abc.fa',
+     exec         => 'cd-hit',
+     output_base  => 'efg',
+   );
+  $obj->run;
+
+=cut
+
+use Moose;
+
+with 'Bio::Roary::JobRunner::Role';
+
+has 'input_file'                   => ( is => 'ro', isa => 'Str',  required => 1 );
+has 'output_base'                  => ( is => 'ro', isa => 'Str',  default  => 'output' );
+has 'exec'                         => ( is => 'ro', isa => 'Str',  default  => 'cd-hit' );
+has 'alt_exec'                     => ( is => 'ro', isa => 'Str',  default  => 'cdhit' );
+has '_max_available_memory_in_mb'  => ( is => 'ro', isa => 'Int',  lazy => 1, builder => '_build__max_available_memory_in_mb' );
+has '_use_most_similar_clustering' => ( is => 'ro', isa => 'Bool', default  => 1 );
+has '_length_difference_cutoff'    => ( is => 'ro', isa => 'Num',  default  => 1 );
+has '_sequence_identity_threshold' => ( is => 'ro', isa => 'Num',  default  => 1 );
+has '_description_length'          => ( is => 'ro', isa => 'Int',  default  => 256 );
+has '_logging'                     => ( is => 'ro', isa => 'Str',  default  => '> /dev/null 2>&1' );
+has '_max_cpus'                    => ( is => 'ro', isa => 'Int',  default  => 40 );
+
+
+# Overload Role
+has 'memory_in_mb'  => ( is => 'ro', isa => 'Int',  lazy => 1, builder => '_build_memory_in_mb' );
+
+sub _build_memory_in_mb
+{
+  my ($self) = @_;
+  my $filename = $self->input_file;
+  my $memory_required = 2000;
+  if(-e $filename)
+  {
+    $memory_required = -s $filename;
+    # Convert to mb
+    $memory_required = int($memory_required/1000000);
+    # Triple memory for worst case senario
+    $memory_required *= 5;
+    $memory_required = 2000 if($memory_required < 2000);
+  }
+
+  return $memory_required;
+}
+
+sub _build__max_available_memory_in_mb
+{
+  my ($self) = @_;
+  my $memory_to_cdhit = int($self->memory_in_mb *0.9);
+  return $memory_to_cdhit;
+}
+
+sub clusters_filename
+{
+  my ($self) = @_;
+  return join('.',($self->output_base,'clstr'));
+}
+
+sub _command_to_run {
+    my ($self) = @_;
+	
+	my $executable = $self->_find_exe([$self->exec, $self->alt_exec]);
+	
+	my $cpus = ($self->cpus > $self->_max_cpus) ? $self->_max_cpus :  $self->cpus;
+    return join(
+        ' ',
+        (
+            $executable,                        '-i', $self->input_file,                   '-o',
+            $self->output_base,                 '-T', $cpus,                               '-M',
+            $self->_max_available_memory_in_mb, '-g', $self->_use_most_similar_clustering, '-s',
+            $self->_length_difference_cutoff,   '-d', $self->_description_length ,'-c', $self->_sequence_identity_threshold, 
+            $self->_logging
+        )
+    );
+}
+
+sub run {
+    my ($self) = @_;
+    my @commands_to_run;
+	
+    push(@commands_to_run, $self->_command_to_run() );
+    $self->logger->info( "Running command: " . $self->_command_to_run() );
+    my $job_runner_obj = $self->_job_runner_class->new( commands_to_run => \@commands_to_run, memory_in_mb => $self->memory_in_mb, queue => $self->_queue, cpus => $self->cpus );
+    $job_runner_obj->run();
+    
+    1;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/External/CheckTools.pm b/lib/Bio/Roary/External/CheckTools.pm
new file mode 100644
index 0000000..b05593a
--- /dev/null
+++ b/lib/Bio/Roary/External/CheckTools.pm
@@ -0,0 +1,190 @@
+package Bio::Roary::External::CheckTools;
+
+# ABSTRACT: Check external executables are available and are the correct version
+
+=head1 SYNOPSIS
+Functionality borrowed from PROKKA by Torsten Seemann.
+Check external executables are available and are the correct version
+
+   use Bio::Roary::External::CheckTools;
+   
+   my $obj = Bio::Roary::External::CheckTools->new();
+   $obj->check_all_tools;
+
+=cut
+
+use Moose;
+use File::Spec;
+use Log::Log4perl qw(:easy);
+has 'logger' => ( is => 'ro', lazy => 1, builder => '_build_logger' );
+
+sub _build_logger {
+    my ($self) = @_;
+    Log::Log4perl->easy_init($DEBUG);
+    my $logger = get_logger();
+    return $logger;
+}
+
+my $BIDEC = '(\d+\.\d+)';    # pattern of NN.NN for versions that can be compared
+
+my %tools = (
+    'parallel' => {
+        GETVER => "parallel --version | grep '^GNU parallel 2'",
+        REGEXP => qr/GNU parallel (\d+)/,
+        MINVER => "20130422",
+        NEEDED => 1,
+    },
+    'blastp' => {
+        GETVER => "blastp -version",
+        REGEXP => qr/blastp:\s+(\d+\.\d+\.\d+)/,
+        NEEDED => 1,
+    },
+    'makeblastdb' => {
+        GETVER => "makeblastdb -version",
+        REGEXP => qr/makeblastdb:\s+(\d+\.\d+\.\d+)/,
+        NEEDED => 1,
+    },
+    'mcl' => {
+        GETVER => "mcl --version | head -n 1",
+        REGEXP => qr/(\d+\-\d+)/,
+        NEEDED => 1,
+    },
+    'bedtools' => {
+        GETVER => "bedtools --version",
+        REGEXP => qr/bedtools v($BIDEC)/,
+        MINVER => "2.1",
+        NEEDED => 1,
+    },
+    'mafft' => {
+        GETVER => "mafft --version < /dev/null 2>&1",
+        REGEXP => qr/v($BIDEC) /,
+        NEEDED => 1,
+    },
+    'kraken' => {
+        GETVER => "kraken --version | head -n 1",
+        REGEXP => qr/Kraken version kraken-(\d+\.\d+\.\d+.*)/,
+        NEEDED => 0,
+    },
+    'kraken-report' => {
+        GETVER => "kraken-report --version | head -n 1",
+        REGEXP => qr/Kraken version kraken-(\d+\.\d+\.\d+.*)/,
+        NEEDED => 0,
+    },
+	'Rscript'  => {
+        GETVER => "Rscript --version 2>&1 | head -n 1",
+        REGEXP => qr/R scripting front-end version ($BIDEC)/,
+		MINVER => "3",
+        NEEDED => 0,
+    },
+
+    # prank version also performs an update check so cant use it
+    'prank' => { NEEDED => 1 },
+
+    # now just the standard unix tools we need
+    'grep' => { NEEDED => 1 },
+    'sed'  => { NEEDED => 1 },
+    'awk'  => { NEEDED => 1 },
+	
+);
+
+my %cdhit_tools = (
+    'cdhit' => {
+        GETVER => "cdhit -h | grep 'CD-HIT version'",
+        REGEXP => qr/version ($BIDEC) /,
+        MINVER => "4.6",
+    },
+    'cd-hit' => {
+        GETVER => "cd-hit -h | grep 'CD-HIT version'",
+        REGEXP => qr/version ($BIDEC) /,
+        MINVER => "4.6",
+    }
+);
+
+my %fasttree_tools = (
+    'fasttree' => {
+        GETVER => "fasttree 2>&1 | head -n 1",
+        REGEXP => qr/Usage for FastTree version ($BIDEC)/,
+    },
+    'FastTree' => {
+        GETVER => "FastTree 2>&1 | head -n 1",
+        REGEXP => qr/Usage for FastTree version ($BIDEC)/,
+    }
+);
+
+sub which_tool_exec {
+    my ( $self, $alt_tools ) = @_;
+    for my $toolname ( sort keys %{$alt_tools} ) {
+        my $fp = $self->find_exe($toolname);
+        return $toolname if $fp;
+    }
+    $self->logger->error( "Required tool missing. Can't find one of " . join( '/', keys %{$alt_tools} ) . " in your \$PATH." );
+    return undef;
+}
+
+sub check_tool {
+    my ( $self, $toolname ) = @_;
+    my $t  = $tools{$toolname};
+    my $fp = $self->find_exe($toolname);
+    $self->logger->error("ERROR: Can't find required '$toolname' in your \$PATH")     if !$fp and $t->{NEEDED};
+    $self->logger->error("Optional tool '$toolname' not found in your \$PATH") if !$fp and !$t->{NEEDED};
+
+    if ($fp) {
+        $t->{HAVE} = $fp;
+        $self->logger->warn("Looking for '$toolname' - found $fp");
+        if ( $t->{GETVER} ) {
+            my ($s) = qx($t->{GETVER});
+            if ( defined $s ) {
+                $s =~ $t->{REGEXP};
+                $t->{VERSION} = $1 if defined $1;
+                $self->logger->warn("Determined $toolname version is $t->{VERSION}");
+                if ( defined $t->{MINVER} and $t->{VERSION} < $t->{MINVER} ) {
+                    $self->logger->error("Roary needs $toolname $t->{MINVER} or higher. Please upgrade and try again.");
+                }
+                if ( defined $t->{MAXVER} and $t->{VERSION} > $t->{MAXVER} ) {
+                    $self->logger->error(
+                        "Roary needs a version of $toolname between $t->{MINVER} and $t->{MAXVER}. Please downgrade and try again.");
+                }
+            }
+            else {
+                $self->logger->error( "Could not determine version of $toolname - please install version ", $t->{MINVER}, " or higher" )
+                  ;    # FIXME: or less <= MAXVER if given
+            }
+        }
+    }
+}
+
+sub check_all_tools {
+    my ($self) = @_;
+    $ENV{"GREP_OPTIONS"} = '';    # --colour => version grep fails (Issue #117)
+    for my $toolname ( sort keys %tools ) {
+        $self->check_tool($toolname);
+    }
+	
+    my $cdhit = $self->which_tool_exec( \%cdhit_tools );
+    if ($cdhit) {
+        $tools{$cdhit} = $cdhit_tools{$cdhit};
+        $self->check_tool($cdhit);
+    }
+
+    my $fasttree = $self->which_tool_exec( \%fasttree_tools );
+    if ($fasttree) {
+        $tools{$fasttree} = $fasttree_tools{$fasttree};
+        $self->check_tool($fasttree);
+    }
+
+    return $self;
+}
+
+sub find_exe {
+    my ( $self, $bin ) = @_;
+    for my $dir ( File::Spec->path ) {
+        my $exe = File::Spec->catfile( $dir, $bin );
+        return $exe if -x $exe;
+    }
+    return;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/External/Fasttree.pm b/lib/Bio/Roary/External/Fasttree.pm
new file mode 100644
index 0000000..b29414e
--- /dev/null
+++ b/lib/Bio/Roary/External/Fasttree.pm
@@ -0,0 +1,73 @@
+package Bio::Roary::External::Fasttree;
+
+# ABSTRACT: Wrapper to run Fasttree
+
+=head1 SYNOPSIS
+
+Wrapper to run cd-hit
+   use Bio::Roary::External::Fasttree;
+   
+   my $obj = Bio::Roary::External::Fasttree->new(
+     input_file   => 'abc.fa',
+     exec         => 'Fasttree',
+     output_base  => 'efg',
+   );
+  $obj->run;
+
+=cut
+
+use Moose;
+with 'Bio::Roary::JobRunner::Role';
+
+has 'input_file'                   => ( is => 'ro', isa => 'Str', required => 1 );
+has 'output_file'                  => ( is => 'ro', isa => 'Str', lazy     => 1,  builder => '_build_output_file' );
+has 'exec'                         => ( is => 'ro', isa => 'Str', default  => 'FastTree' );
+has 'alt_exec'                     => ( is => 'ro', isa => 'Str', default  => 'fasttree' );
+has '_logging'                     => ( is => 'ro', isa => 'Str', default  => '2> /dev/null' );
+
+sub _build_output_file
+{
+    my ($self) = @_;
+	return $self->input_file.".newick";
+}
+
+sub _command_to_run {
+    my ($self) = @_;
+
+	my $executable = $self->_find_exe([$self->exec, $self->alt_exec]);
+    my $logging_str = "";
+	$logging_str = $self->_logging if(! $self->verbose);
+
+    return join(
+        ' ', ($executable, '-fastest', '-nt', $self->input_file, '>', $self->output_file, $logging_str)
+    );
+}
+
+sub run {
+    my ($self) = @_;
+    my @commands_to_run;
+
+	if(!defined($self->input_file) || ! ( -e $self->input_file))
+	{
+		$self->logger->error( "The input file is missing so not creating a tree" );
+		return 1;
+	}
+
+	if(-s $self->input_file < 5)
+	{
+		$self->logger->info( "The input file is too small so not creating a tree" );
+		return 1;
+	}
+
+    push(@commands_to_run, $self->_command_to_run() );
+    $self->logger->info( "Running command: " . $self->_command_to_run() );
+    my $job_runner_obj = $self->_job_runner_class->new( commands_to_run => \@commands_to_run, memory_in_mb => $self->memory_in_mb, queue => $self->_queue, cpus => $self->cpus );
+    $job_runner_obj->run();
+    
+    1;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/External/GeneAlignmentFromNucleotides.pm b/lib/Bio/Roary/External/GeneAlignmentFromNucleotides.pm
new file mode 100644
index 0000000..cc90548
--- /dev/null
+++ b/lib/Bio/Roary/External/GeneAlignmentFromNucleotides.pm
@@ -0,0 +1,138 @@
+package Bio::Roary::External::GeneAlignmentFromNucleotides;
+
+# ABSTRACT: Take in multi-FASTA files of nucleotides and align each file with PRANK or MAFFT
+
+=head1 SYNOPSIS
+
+Take in multi-FASTA files of nucleotides and align each file with PRANK or MAFFT
+
+   use Bio::Roary::External::GeneAlignmentFromNucleotides;
+   
+   my $seg = Bio::Roary::External::GeneAlignmentFromNucleotides->new(
+     fasta_files => [],
+   );
+   
+   $seg->run();
+
+=method output_file
+
+Returns the path to the results file
+
+=cut
+
+use Moose;
+with 'Bio::Roary::JobRunner::Role';
+
+has 'fasta_files'                 => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'exec'                        => ( is => 'ro', isa => 'Str',      default  => 'protein_alignment_from_nucleotides' );
+has 'translation_table'           => ( is => 'rw', isa => 'Int',      default => 11 );
+has 'core_definition'             => ( is => 'ro', isa => 'Num',      default => 1 );
+has 'mafft'                       => ( is => 'ro', isa => 'Bool',     default => 0 );
+has 'dont_delete_files'           => ( is => 'rw', isa => 'Bool',     default  => 0 );
+has 'num_input_files'             => ( is => 'ro', isa => 'Int',      required => 1);
+
+# Overload Role
+has 'memory_in_mb' => ( is => 'rw', isa => 'Int', lazy     => 1, builder => '_build_memory_in_mb' );
+has '_min_memory_in_mb'      => ( is => 'ro', isa => 'Int', default => 1500 );
+has '_max_memory_in_mb'      => ( is => 'ro', isa => 'Int', default => 60000 );
+has '_queue'                 => ( is => 'rw', isa => 'Str', default  => 'normal' );
+has '_files_per_chunk'       => ( is => 'ro', isa => 'Int', lazy     => 1, builder => '_build__files_per_chunk' );
+has '_core_alignment_cmd'    => ( is => 'rw', isa => 'Str', lazy_build => 1 );
+has '_dependancy_memory_in_mb'  => ( is => 'ro', isa => 'Int', default => 15000 );
+
+sub _build__files_per_chunk
+{
+    my ($self) = @_;
+    if($self->num_input_files > 1000)
+    {
+               return 5;
+    }
+    elsif($self->num_input_files > 500)
+    {
+               return 7;
+    }
+    return 10;
+}
+
+sub _build_memory_in_mb {
+    my ($self)          = @_;
+
+    my $largest_file_size = 1;
+    for my $file (@{$self->fasta_files})
+    {
+        my $file_size = -s $file;
+        if($file_size > $largest_file_size)
+        {
+            $largest_file_size = $file_size;
+        }
+    }
+    
+    my $approx_sequence_length_of_largest_file = $largest_file_size/ $self->num_input_files;
+    my $memory_required = int((($approx_sequence_length_of_largest_file*$approx_sequence_length_of_largest_file)/1000000)*2 + $self->_min_memory_in_mb);
+    
+    $memory_required = $self->_max_memory_in_mb if($memory_required  > $self->_max_memory_in_mb);
+
+    return $memory_required;
+}
+
+sub _command_to_run {
+    my ( $self, $fasta_files) = @_;
+	my $verbose = "";
+	if($self->verbose)
+	{
+		$verbose = ' -v ';
+	}
+    my $mafft_str = "";	
+	$mafft_str = ' --mafft ' if($self->mafft);
+    return $self->exec." ".$verbose.$mafft_str.join( " ", @{$fasta_files}  );
+}
+
+sub _build__core_alignment_cmd {
+    my ( $self ) = @_;
+    
+    my $core_cmd = "pan_genome_core_alignment";
+    $core_cmd .= " -cd " . ($self->core_definition*100) if ( defined $self->core_definition );
+    $core_cmd .= " --dont_delete_files " if ( defined $self->dont_delete_files  && $self->dont_delete_files == 1 );
+
+    return $core_cmd;
+}
+
+sub run {
+    my ($self) = @_;
+    my @commands_to_run;
+
+    my @files_chunk;
+    for my $fasta_file ( @{ $self->fasta_files } ) {
+        push(@files_chunk,$fasta_file);
+        if(@files_chunk == $self->_files_per_chunk )
+        {
+          push(@commands_to_run, $self->_command_to_run(\@files_chunk));
+		  $self->logger->info( "Running command: " . $self->_command_to_run(\@files_chunk) );
+          @files_chunk = ();
+        }
+    }
+    
+    if(@files_chunk > 0)
+    {
+      push(@commands_to_run, $self->_command_to_run(\@files_chunk));
+	  $self->logger->info( "Running command: " . $self->_command_to_run(\@files_chunk) );
+    }
+
+    my $job_runner_obj = $self->_job_runner_class->new(
+        commands_to_run => \@commands_to_run,
+        memory_in_mb    => $self->memory_in_mb,
+        queue           => $self->_queue,
+        dont_wait       => 1,
+        cpus            => $self->cpus 
+    );
+    $job_runner_obj->run();
+    
+	$job_runner_obj->memory_in_mb($self->_dependancy_memory_in_mb);
+	$self->logger->info( "Running command: " . $self->_core_alignment_cmd() );
+    $job_runner_obj->submit_dependancy_job($self->_core_alignment_cmd);
+    1;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+1;
diff --git a/lib/Bio/Roary/External/IterativeCdhit.pm b/lib/Bio/Roary/External/IterativeCdhit.pm
new file mode 100644
index 0000000..91a1db5
--- /dev/null
+++ b/lib/Bio/Roary/External/IterativeCdhit.pm
@@ -0,0 +1,92 @@
+package Bio::Roary::External::IterativeCdhit;
+
+# ABSTRACT: Iteratively run CDhit
+
+=head1 SYNOPSIS
+
+Iteratively run CDhit
+
+   use Bio::Roary::External::IterativeCdhit;
+   
+   my $seg= Bio::Roary::External::IterativeCdhit->new(
+     output_cd_hit_filename => '',
+     output_combined_filename  => '',
+     number_of_input_files => 10, 
+     output_filtered_clustered_fasta  => '',
+   );
+   
+   $seg->run();
+
+=cut
+
+use Moose;
+with 'Bio::Roary::JobRunner::Role';
+
+has 'output_cd_hit_filename'          => ( is => 'ro', isa => 'Str', required => 1 );
+has 'output_combined_filename'        => ( is => 'ro', isa => 'Str', required => 1 );
+has 'number_of_input_files'           => ( is => 'ro', isa => 'Int', required => 1 );
+has 'output_filtered_clustered_fasta' => ( is => 'ro', isa => 'Str', required => 1 );
+has 'exec'                            => ( is => 'ro', isa => 'Str', default  => 'iterative_cdhit' );
+has '_max_cpus'                       => ( is => 'ro', isa => 'Int',  default  => 40 );
+# Overload Role
+has 'memory_in_mb' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build_memory_in_mb' );
+
+sub _build_memory_in_mb {
+    my ($self)          = @_;
+    my $filename        = $self->output_combined_filename;
+    my $memory_required = 2000;
+    if ( -e $filename ) {
+        $memory_required = -s $filename;
+
+        # Convert to mb
+        $memory_required = int( $memory_required / 1000000 );
+
+        # Pentuple memory for worst case senario
+        $memory_required *= 5;
+        $memory_required = 2000 if ( $memory_required < 2000 );
+    }
+
+    return $memory_required;
+}
+
+sub _build__max_available_memory_in_mb {
+    my ($self) = @_;
+    my $memory_to_cdhit = int( $self->memory_in_mb * 0.9 );
+    return $memory_to_cdhit;
+}
+
+sub _command_to_run {
+    my ($self) = @_;
+	my $cpus = ($self->cpus > $self->_max_cpus) ? $self->_max_cpus :  $self->cpus;
+	
+    return join(
+        ' ',
+        (
+            $self->exec,                     '-c', $self->output_cd_hit_filename, '-m',
+            $self->output_combined_filename, '-n', $self->number_of_input_files, '--cpus', $cpus, '-f',
+            $self->output_filtered_clustered_fasta
+        )
+    );
+}
+
+sub run {
+    my ($self) = @_;
+    my @commands_to_run;
+    push( @commands_to_run, $self->_command_to_run );
+    $self->logger->info( "Running command: " . $self->_command_to_run() );
+	
+    my $job_runner_obj = $self->_job_runner_class->new(
+        commands_to_run => \@commands_to_run,
+        memory_in_mb    => $self->memory_in_mb,
+        queue           => $self->_queue,
+        cpus            => $self->cpus 
+    );
+    $job_runner_obj->run();
+
+    1;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/External/Mafft.pm b/lib/Bio/Roary/External/Mafft.pm
new file mode 100644
index 0000000..d54541c
--- /dev/null
+++ b/lib/Bio/Roary/External/Mafft.pm
@@ -0,0 +1,76 @@
+package Bio::Roary::External::Mafft;
+
+# ABSTRACT: Wrapper to run mafft
+
+=head1 SYNOPSIS
+
+Wrapper to run mafft
+   use Bio::Roary::External::Mafft;
+   
+	my $mafft_obj = Bio::Roary::External::Mafft->new(
+	  input_filename  => $fasta_file,
+	  output_filename => $fasta_file.'.aln',
+	  job_runner      => 'Local'
+	);
+	$mafft_obj->run();
+=cut
+
+use Moose;
+use File::Spec;
+with 'Bio::Roary::JobRunner::Role';
+
+has 'input_filename'  => ( is => 'ro', isa => 'Str', required => 1 );
+has 'output_filename' => ( is => 'ro', isa => 'Str', default  => 'output' );
+has 'exec'            => ( is => 'ro', isa => 'Str', default  => 'mafft' );
+
+# Overload Role
+has 'memory_in_mb' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build_memory_in_mb' );
+
+sub _build_memory_in_mb {
+    my ($self) = @_;
+    my $memory_required = 2000;
+    return $memory_required;
+}
+
+sub _command_to_run {
+    my ($self) = @_;
+
+    if(! -e $self->input_filename)
+	{
+		$self->logger->error( "Input file to MAFFT missing: " . $self->input_filename );
+	}
+    return join(
+        ' ',
+        (
+            $self->exec,
+			'--auto',
+			'--quiet',
+            $self->input_filename,
+			'>',
+            $self->output_filename
+        )
+    );
+}
+
+sub run {
+    my ($self) = @_;
+    my @commands_to_run;
+
+    push( @commands_to_run, $self->_command_to_run() );
+    $self->logger->info( "Running command: " . $self->_command_to_run() );
+
+    my $job_runner_obj = $self->_job_runner_class->new(
+        commands_to_run => \@commands_to_run,
+        memory_in_mb    => $self->memory_in_mb,
+        queue           => $self->_queue,
+        cpus            => $self->cpus
+    );
+    $job_runner_obj->run();
+
+    1;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/External/Makeblastdb.pm b/lib/Bio/Roary/External/Makeblastdb.pm
new file mode 100644
index 0000000..e1b75e3
--- /dev/null
+++ b/lib/Bio/Roary/External/Makeblastdb.pm
@@ -0,0 +1,72 @@
+package Bio::Roary::External::Makeblastdb;
+
+# ABSTRACT: Wrapper around NCBIs makeblastdb command
+
+=head1 SYNOPSIS
+
+Take in a fasta file and create a temporary blast database.
+
+   use Bio::Roary::External::Makeblastdb;
+   
+   my $blast_database= Bio::Roary::External::Makeblastdb->new(
+     fasta_file => 'contigs.fa',
+     exec       => 'makeblastdb'
+   );
+   
+   $blast_database->run();
+
+=method output_database
+
+Returns the path to the temporary blast database files
+
+=cut
+
+use Moose;
+use File::Temp;
+use Cwd;
+with 'Bio::Roary::JobRunner::Role';
+
+has 'fasta_file'         => ( is => 'ro', isa => 'Str', required => 1 );
+has 'exec'               => ( is => 'ro', isa => 'Str', default  => 'makeblastdb' );
+has '_working_directory' => ( is => 'ro', isa => 'File::Temp::Dir', default  => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
+has '_dbtype'            => ( is => 'ro', isa => 'Str', default  => 'prot' );
+has '_logfile'           => ( is => 'ro', isa => 'Str', default  => '/dev/null' );
+has 'output_database'    => ( is => 'ro', isa => 'Str', lazy     => 1, builder => '_build_output_database' );
+
+# Overload Role
+has 'memory_in_mb'  => ( is => 'ro', isa => 'Int', default => 4000);
+
+sub _build_output_database {
+    my ($self) = @_;
+    return join( '/', ( $self->_working_directory->dirname(), 'output_contigs' ) );
+}
+
+sub _command_to_run {
+    my ($self) = @_;
+    return join(
+        " ",
+        (
+            $self->exec,    
+            '-in',      $self->fasta_file,       
+            '-dbtype',  $self->_dbtype, 
+            '-parse_seqids',
+            '-out',     $self->output_database, 
+            '-logfile', $self->_logfile
+        )
+    );
+}
+
+sub run {
+  my ($self) = @_;
+  my @commands_to_run;
+  push(@commands_to_run, $self->_command_to_run );
+  $self->logger->info( "Running command: " . $self->_command_to_run() );
+  my $job_runner_obj = $self->_job_runner_class->new( commands_to_run => \@commands_to_run, memory_in_mb => $self->memory_in_mb, queue => $self->_queue, cpus  => $self->cpus  );
+  $job_runner_obj->run();
+  
+  1;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+1;
diff --git a/lib/Bio/Roary/External/Mcl.pm b/lib/Bio/Roary/External/Mcl.pm
new file mode 100644
index 0000000..7ae30d4
--- /dev/null
+++ b/lib/Bio/Roary/External/Mcl.pm
@@ -0,0 +1,106 @@
+package Bio::Roary::External::Mcl;
+
+# ABSTRACT: Wrapper around MCL which takes in blast results and outputs clustered results
+
+=head1 SYNOPSIS
+
+Wrapper around MCL which takes in blast results and outputs clustered results
+
+   use Bio::Roary::External::Mcl;
+   
+   my $mcl= Bio::Roary::External::Mcl->new(
+     blast_results     => 'db',
+     mcxdeblast_exec   => 'mcxdeblast',
+     mcl_exec          => 'mcl',
+     output_file       => 'output.groups'
+   );
+   
+   $mcl->run();
+
+=cut
+
+use Moose;
+use File::Which;
+with 'Bio::Roary::JobRunner::Role';
+
+has 'blast_results'   => ( is => 'ro', isa => 'Str', required => 1 );
+has 'mcxdeblast_exec' => ( is => 'ro', isa => 'Str', default  => 'mcxdeblast' );
+has '_full_mcxdeblast_exec' =>  ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__full_mcxdeblast_exec');
+has 'mcl_exec'        => ( is => 'ro', isa => 'Str', default  => 'mcl' );
+has 'output_file'     => ( is => 'ro', isa => 'Str', default  => 'output_groups' );
+
+has '_score'     => ( is => 'ro', isa => 'Str', default  => 'r' );
+
+has '_inflation_value' => ( is => 'ro', isa => 'Num', default => 1.5 );
+has '_logging'         => ( is => 'ro', isa => 'Str', default  => '> /dev/null 2>&1' );
+
+has 'memory_in_mb'  => ( is => 'ro', isa => 'Int',  lazy => 1, builder => '_build_memory_in_mb' );
+
+sub _build_memory_in_mb
+{
+  my ($self) = @_;
+  # Todo: implement this equation for memory estimation if this hardcoded value proves too unstable.
+  # http://micans.org/mcl/man/mcl.html#opt-how-much-ram
+  
+  my $filename = $self->blast_results;
+  my $memory_required = 2000;
+  if(-e $filename)
+  {
+    $memory_required = -s $filename;
+    # Convert to mb
+    $memory_required = int($memory_required/1000000);
+    # increase memory for worst case senario
+    $memory_required *= 3;
+    $memory_required += 2000;
+  }
+
+  return  $memory_required;
+}
+
+
+sub _build__full_mcxdeblast_exec
+{
+	my ($self) = @_;
+	
+	if(-e $self->mcxdeblast_exec)
+	{
+		return $self->mcxdeblast_exec;
+	}
+	
+	my $full_exec = which($self->mcxdeblast_exec);	
+	if(! defined($full_exec))
+	{
+		$self->logger->error("Cannot find the mcxdeblast executable, please ensure its in your PATH") ;
+		exit();
+	}
+	return "perl $full_exec";
+}
+
+sub _command_to_run {
+    my ($self) = @_;
+    return join(
+        " ",
+        (
+            $self->_full_mcxdeblast_exec, '-m9', '--score='.$self->_score,
+            '--line-mode=abc', $self->blast_results, '2> /dev/null',
+            '|', $self->mcl_exec, '-', '--abc',
+            '-I', $self->_inflation_value, '-o', $self->output_file, 
+            $self->_logging
+        )
+    );
+}
+
+sub run {
+    my ($self) = @_;
+    my @commands_to_run;
+    push(@commands_to_run, $self->_command_to_run );
+    $self->logger->info( "Running command: " . $self->_command_to_run() );
+    my $job_runner_obj = $self->_job_runner_class->new( commands_to_run => \@commands_to_run, memory_in_mb => $self->memory_in_mb, queue => $self->_queue,        cpus            => $self->cpus  );
+    $job_runner_obj->run();
+    
+    1;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+1;
diff --git a/lib/Bio/Roary/External/PostAnalysis.pm b/lib/Bio/Roary/External/PostAnalysis.pm
new file mode 100644
index 0000000..211b746
--- /dev/null
+++ b/lib/Bio/Roary/External/PostAnalysis.pm
@@ -0,0 +1,187 @@
+package Bio::Roary::External::PostAnalysis;
+
+# ABSTRACT: Perform the post analysis
+
+=head1 SYNOPSIS
+
+Perform the post analysis 
+
+   use Bio::Roary::External::PostAnalysis;
+   
+   my $seg= Bio::Roary::External::PostAnalysis->new(
+     fasta_file => 'contigs.fa',
+   );
+   
+   $seg->run();
+
+=cut
+
+use Moose;
+use Cwd  qw(getcwd); 
+with 'Bio::Roary::JobRunner::Role';
+
+has 'input_files'                 => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'exec'                        => ( is => 'ro', isa => 'Str', default  => 'pan_genome_post_analysis' );
+has 'fasta_files'                 => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'output_filename'             => ( is => 'ro', isa => 'Str', required => 1 );
+has 'output_pan_geneome_filename' => ( is => 'ro', isa => 'Str', required => 1 );
+has 'output_statistics_filename'  => ( is => 'ro', isa => 'Str', required => 1 );
+has 'clusters_filename'           => ( is => 'ro', isa => 'Str', required => 1 );
+has 'output_multifasta_files'     => ( is => 'ro', isa => 'Bool', required => 1 );
+has 'dont_delete_files'           => ( is => 'ro', isa => 'Bool', default  => 0 );
+has 'dont_create_rplots'          => ( is => 'rw', isa => 'Bool', default  => 0 );
+has 'dont_split_groups'           => ( is => 'rw', isa => 'Bool', default  => 0 );
+has 'verbose_stats'               => ( is => 'rw', isa => 'Bool', default  => 0 );
+has 'translation_table'           => ( is => 'rw', isa => 'Int',  default  => 11 );
+has 'group_limit'                 => ( is => 'rw', isa => 'Num',  default  => 50000 );
+has 'core_definition'             => ( is => 'ro', isa => 'Num',  default  => 1.0 );
+has 'verbose'                     => ( is => 'rw', isa => 'Bool', default  => 0 );
+has 'mafft'                       => ( is => 'ro', isa => 'Bool', default  => 0 );
+has '_working_directory'          => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
+has '_gff_fofn'                   => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__gff_fofn' );
+has '_fasta_fofn'                 => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__fasta_fofn'  );
+
+# Overload Role
+has 'memory_in_mb' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build_memory_in_mb' );
+has '_minimum_memory_mb'    => ( is => 'ro', isa => 'Int', default => 4000 );
+has '_memory_per_sample_mb' => ( is => 'ro', isa => 'Int', default => 30 );
+has '_queue'                => ( is => 'rw', isa => 'Str',  lazy => 1, builder => '_build__queue');
+
+
+sub _build__queue {
+    my ($self) = @_;
+    my $queue = 'normal';
+    my $num_samples = @{ $self->input_files };
+    if($num_samples > 200)
+    {
+      $queue = 'long';
+    }
+    elsif($num_samples > 600)
+    {
+      $queue = 'basement';
+    }
+    return $queue;
+}
+
+
+sub _build_memory_in_mb {
+    my ($self) = @_;
+    my $num_samples = @{ $self->input_files };
+
+    my $memory_required = $num_samples * $self->_memory_per_sample_mb;
+    if ( $memory_required < $self->_minimum_memory_mb ) {
+        $memory_required = $self->_minimum_memory_mb;
+    }
+
+    return $memory_required;
+}
+
+sub _build__gff_fofn
+{
+    my ($self) = @_;
+    return join('/', ($self->_working_directory, '/_gff_files'));
+}
+
+sub _build__fasta_fofn
+{
+    my ($self) = @_;
+    return join('/', ($self->_working_directory, '/_fasta_files'));
+}
+
+
+sub _output_gff_files
+{
+  my ($self) = @_;
+  open(my $out_fh, '>', $self->_gff_fofn);
+  for my $filename (@{$self->input_files})
+  {
+    print {$out_fh} $filename."\n";
+  }
+  close($out_fh);
+}
+
+sub _output_fasta_files
+{
+  my ($self) = @_;
+  open(my $out_fh, '>', $self->_fasta_fofn);
+  for my $filename (@{$self->fasta_files})
+  {
+    print {$out_fh} $filename."\n";
+  }
+  close($out_fh);
+}
+
+sub _command_to_run {
+    my ($self) = @_;
+    
+    $self->_output_fasta_files;
+    $self->_output_gff_files;
+    
+    my $output_multifasta_files_flag = '';
+    $output_multifasta_files_flag = '--output_multifasta_files' if(defined($self->output_multifasta_files) && $self->output_multifasta_files == 1);
+
+    my $dont_delete_files_flag = '';
+    $dont_delete_files_flag = '--dont_delete_files' if(defined($self->dont_delete_files) && $self->dont_delete_files == 1);
+    
+    my $dont_create_rplots_flag = '';
+    $dont_create_rplots_flag = '--dont_create_rplots' if(defined($self->dont_create_rplots) && $self->dont_create_rplots == 1);
+    
+    my $dont_split_groups_flag = '';
+    $dont_split_groups_flag = '--dont_split_groups' if ( defined $self->dont_split_groups && $self->dont_split_groups == 1 );
+
+    my $verbose_stats_flag = '';
+    $verbose_stats_flag = '--verbose_stats' if ( defined($self->verbose_stats) && $self->verbose_stats == 1 );
+	
+    my $mafft_flag = '';
+    $mafft_flag = '--mafft' if ( defined($self->mafft) && $self->mafft == 1 );
+	
+    my $verbose_flag = '';
+    $verbose_flag = '-v' if ( defined($self->verbose) && $self->verbose == 1 );
+    
+    return join(
+        " ",
+        (
+            $self->exec,
+            '-o', $self->output_filename,
+            '-p', $self->output_pan_geneome_filename,
+            '-s', $self->output_statistics_filename,
+            '-c', $self->clusters_filename,
+            $output_multifasta_files_flag,
+            '-i', $self->_gff_fofn,
+            '-f', $self->_fasta_fofn,
+            '-t', $self->translation_table,
+            $dont_delete_files_flag,
+            $dont_create_rplots_flag,
+            $dont_split_groups_flag,
+            $verbose_stats_flag,
+			$verbose_flag,
+			$mafft_flag,
+            '-j', $self->job_runner,
+            '--processors', $self->cpus,
+            '--group_limit', $self->group_limit,
+            '-cd', ($self->core_definition*100)
+        )
+    );
+}
+
+sub run {
+    my ($self) = @_;
+
+    my @commands_to_run;
+    push( @commands_to_run, $self->_command_to_run );
+    $self->logger->info( "Running command: " . $self->_command_to_run() );
+    my $job_runner_obj = $self->_job_runner_class->new(
+        commands_to_run => \@commands_to_run,
+        memory_in_mb    => $self->memory_in_mb,
+        queue           => $self->_queue,
+        dont_wait       => $self->dont_wait,
+        cpus            => $self->cpus 
+    );
+    $job_runner_obj->run();
+
+    1;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+1;
diff --git a/lib/Bio/Roary/External/Prank.pm b/lib/Bio/Roary/External/Prank.pm
new file mode 100644
index 0000000..be40ac3
--- /dev/null
+++ b/lib/Bio/Roary/External/Prank.pm
@@ -0,0 +1,77 @@
+package Bio::Roary::External::Prank;
+
+# ABSTRACT: Wrapper to run prank
+
+=head1 SYNOPSIS
+
+Wrapper to run cd-hit
+   use Bio::Roary::External::Prank;
+   
+	my $prank_obj = Bio::Roary::External::Prank->new(
+	  input_filename  => $fasta_file,
+	  output_filename => $fasta_file.'.aln',
+	  job_runner      => 'Local'
+	);
+	$prank_obj->run();
+=cut
+
+use Moose;
+use File::Spec;
+with 'Bio::Roary::JobRunner::Role';
+
+has 'input_filename'  => ( is => 'ro', isa => 'Str', required => 1 );
+has 'output_filename' => ( is => 'ro', isa => 'Str', default  => 'output' );
+has 'exec'            => ( is => 'ro', isa => 'Str', default  => 'prank' );
+
+# Overload Role
+has 'memory_in_mb' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build_memory_in_mb' );
+
+sub _build_memory_in_mb {
+    my ($self) = @_;
+    my $memory_required = 2000;
+    return $memory_required;
+}
+
+sub _command_to_run {
+    my ($self) = @_;
+
+    if(! -e $self->input_filename)
+	{
+		$self->logger->error( "Input file to PRANK missing: " . $self->input_filename );
+	}
+
+    return join(
+        ' ',
+        (
+            $self->exec,
+            "-d=" . $self->input_filename,
+            "-o=" . $self->output_filename,
+            '-codon', '-F', '-quiet', '-once', '> /dev/null 2>&1',
+            '&&', 'mv', $self->output_filename . '*.fas',
+            $self->output_filename
+        )
+    );
+}
+
+sub run {
+    my ($self) = @_;
+    my @commands_to_run;
+
+    push( @commands_to_run, $self->_command_to_run() );
+    $self->logger->info( "Running command: " . $self->_command_to_run() );
+
+    my $job_runner_obj = $self->_job_runner_class->new(
+        commands_to_run => \@commands_to_run,
+        memory_in_mb    => $self->memory_in_mb,
+        queue           => $self->_queue,
+        cpus            => $self->cpus
+    );
+    $job_runner_obj->run();
+
+    1;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/ExtractCoreGenesFromSpreadsheet.pm b/lib/Bio/Roary/ExtractCoreGenesFromSpreadsheet.pm
new file mode 100644
index 0000000..9fa6fed
--- /dev/null
+++ b/lib/Bio/Roary/ExtractCoreGenesFromSpreadsheet.pm
@@ -0,0 +1,170 @@
+package Bio::Roary::ExtractCoreGenesFromSpreadsheet;
+
+# ABSTRACT: Take in a spreadsheet produced by the pipeline and identify the core genes.
+
+=head1 SYNOPSIS
+
+Take in a spreadsheet produced by the pipeline and identify the core genes.
+   use Bio::Roary::ExtractCoreGenesFromSpreadsheet;
+   
+   my $obj = Bio::Roary::ExtractCoreGenesFromSpreadsheet->new(
+       spreadsheet        => 'group_statistics.csv',
+     );
+   $obj->ordered_core_genes();
+
+=cut
+
+use Moose;
+use Text::CSV;
+use Bio::Roary::GroupStatistics;
+use POSIX;
+
+has 'spreadsheet'            => ( is => 'ro', isa  => 'Str',      required => 1 );
+has '_csv_parser'            => ( is => 'ro', isa  => 'Text::CSV',lazy     => 1, builder => '_build__csv_parser' );
+has '_input_spreadsheet_fh'  => ( is => 'ro', lazy => 1,          builder  => '_build__input_spreadsheet_fh' );
+has 'ordered_core_genes'     => ( is => 'ro', isa  => 'ArrayRef', lazy     => 1, builder  => '_build_ordered_core_genes' );
+has 'core_definition'        => ( is => 'ro', isa => 'Num', default => 1 );
+has 'sample_names'           => ( is => 'rw', isa => 'ArrayRef', default => sub {[]} );
+has 'sample_names_to_genes'  => ( is => 'rw', isa => 'HashRef',  default => sub {{}} );
+
+has '_number_of_isolates'                 => ( is => 'rw', isa  => 'Int');
+has '_gene_column'                        => ( is => 'rw', isa  => 'Int');
+has '_num_isolates_column'                => ( is => 'rw', isa  => 'Int');
+has '_avg_sequences_per_isolate_column'   => ( is => 'rw', isa  => 'Int');
+has '_genome_fragement_column'            => ( is => 'rw', isa  => 'Int');
+has '_order_within_fragement_column'      => ( is => 'rw', isa  => 'Int');
+has '_min_no_isolates_for_core'           => ( is => 'rw', isa  => 'Num', lazy => 1, builder => '_build__min_no_isolates_for_core' );
+
+sub _build__min_no_isolates_for_core {
+  my ($self) = @_;
+  my $threshold =  $self->_number_of_isolates * $self->core_definition;
+
+  return $threshold;
+}
+
+sub _build__csv_parser
+{
+  my ($self) = @_;
+  return Text::CSV->new( { binary => 1, always_quote => 1} );
+}
+
+sub _build__input_spreadsheet_fh {
+    my ($self) = @_;
+    open( my $fh, $self->spreadsheet );
+    return $fh;
+}
+
+sub _update_number_of_isolates
+{
+  my ($self, $header_row) = @_;
+  my $number_of_isolates = @{$header_row} - @{Bio::Roary::GroupStatistics->fixed_headers};
+  $self->_number_of_isolates($number_of_isolates);
+}
+
+sub _setup_column_mappings
+{
+  my ($self, $header_row) = @_;
+  # current ordering
+  my %columns_of_interest_mappings = (
+    'Gene'                         => 0,
+    'No. isolates'                 => 3,
+    'Avg sequences per isolate'    => 5,
+    'Genome Fragment'              => 6,
+    'Order within Fragment'        => 7,
+	'QC'                           => 10,
+    );
+  
+  # Dynamically overwrite the default ordering
+  for(my $i = 0; $i < @{$header_row}; $i++)
+  {
+    for my $col_name (%columns_of_interest_mappings)
+    {
+      if($header_row->[$i] eq $col_name)
+      {
+        $columns_of_interest_mappings{$col_name} = $i;
+        last;
+      }
+    }
+  }
+  $self->_gene_column($columns_of_interest_mappings{'Gene'});
+  $self->_num_isolates_column($columns_of_interest_mappings{'No. isolates'});
+  $self->_avg_sequences_per_isolate_column($columns_of_interest_mappings{'Avg sequences per isolate'});
+  $self->_genome_fragement_column($columns_of_interest_mappings{'Genome Fragment'});
+  $self->_order_within_fragement_column($columns_of_interest_mappings{'Order within Fragment'});
+  $self->_update_number_of_isolates($header_row);
+  
+  # Get the sample_names
+  my @sample_names;
+  for(my $i = $self->_length_of_fixed_headers(); $i < @{$header_row}; $i++)
+  {
+	  push(@sample_names,$header_row->[$i]);
+  }
+  $self->sample_names(\@sample_names);
+}
+
+sub _length_of_fixed_headers
+{
+	my ($self) = @_;
+    return @{Bio::Roary::GroupStatistics->fixed_headers()};
+}
+
+sub _populate_sample_to_gene_lookup_with_row
+{
+	 my ($self, $row) = @_;
+	 
+	 for(my $i = $self->_length_of_fixed_headers(); $i < @{$row}; $i++ )
+	 {
+		 if(defined($row->[$i]) && $row->[$i] ne "" )
+		 {
+		 	my $sample_name = $self->sample_names->[$i - $self->_length_of_fixed_headers()];
+			
+			$self->sample_names_to_genes->{$sample_name}->{$row->[$i]} = 1;
+		 }
+	 }
+	 return 1;
+}
+
+
+sub _ordered_core_genes
+{
+  my ($self) = @_;
+  my %ordered_genes;
+  while ( my $row = $self->_csv_parser->getline( $self->_input_spreadsheet_fh ) ) 
+  {
+    next if(@{$row} < 12); # no genes in group
+    next if(!defined($row->[$self->_gene_column]) || $row->[$self->_gene_column] eq '' ); # no gene name
+    next if(!defined($row->[$self->_avg_sequences_per_isolate_column]) || $row->[$self->_avg_sequences_per_isolate_column] eq '' ); # no average
+    next if(!defined($row->[$self->_genome_fragement_column]) || $row->[$self->_genome_fragement_column] eq '' ); # fragment not defined
+    
+    # next if($self->_number_of_isolates != $row->[$self->_num_isolates_column]); # if gene is not in all isolates
+    next if ( $row->[$self->_num_isolates_column] < $self->_min_no_isolates_for_core );
+    next if($row->[$self->_avg_sequences_per_isolate_column] != 1);
+    $ordered_genes{$row->[$self->_genome_fragement_column]}{$row->[$self->_order_within_fragement_column]} = $row->[$self->_gene_column];
+	$self->_populate_sample_to_gene_lookup_with_row($row);
+  }
+  
+  my @ordered_core_genes ;
+  for my $fragment_key(sort {$a <=> $b } keys %ordered_genes)
+  {
+    for my $order_within_fragement(sort {$a <=> $b } keys %{$ordered_genes{$fragment_key}})
+    {
+      push(@ordered_core_genes,$ordered_genes{$fragment_key}{$order_within_fragement});
+    }
+  }
+  return \@ordered_core_genes;
+}
+
+sub _build_ordered_core_genes
+{
+  my ($self) = @_;
+  my $header_row = $self->_csv_parser->getline( $self->_input_spreadsheet_fh );
+  $self->_setup_column_mappings($header_row);
+
+  return $self->_ordered_core_genes();
+}
+
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/ExtractProteomeFromGFF.pm b/lib/Bio/Roary/ExtractProteomeFromGFF.pm
new file mode 100644
index 0000000..3f83345
--- /dev/null
+++ b/lib/Bio/Roary/ExtractProteomeFromGFF.pm
@@ -0,0 +1,197 @@
+package Bio::Roary::ExtractProteomeFromGFF;
+
+# ABSTRACT: Take in a GFF file and create protein sequences in FASTA format
+
+=head1 SYNOPSIS
+
+Take in GFF files and create protein sequences in FASTA format
+   use Bio::Roary::ExtractProteomeFromGFF;
+   
+   my $obj = Bio::Roary::ExtractProteomeFromGFF->new(
+       gff_file        => $fasta_file,
+     );
+   $obj->fasta_file();
+
+=cut
+
+use Moose;
+use Bio::SeqIO;
+use Cwd;
+use Bio::Roary::Exceptions;
+use File::Basename;
+use File::Temp;
+use File::Copy;
+use Bio::Tools::GFF;
+with 'Bio::Roary::JobRunner::Role';
+with 'Bio::Roary::BedFromGFFRole';
+
+has 'gff_file'                       => ( is => 'ro', isa => 'Str',  required => 1 );
+has 'apply_unknowns_filter'          => ( is => 'rw', isa => 'Bool', default  => 1 );
+has 'maximum_percentage_of_unknowns' => ( is => 'ro', isa => 'Num',  default  => 5 );
+has 'output_filename'                => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build_output_filename' );
+has 'fasta_file'                     => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build_fasta_file' );
+has '_working_directory'             => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
+has '_working_directory_name'        => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__working_directory_name' );
+has 'translation_table'              => ( is => 'rw', isa => 'Int', default => 11 );
+
+sub _build_fasta_file {
+    my ($self) = @_;
+    $self->_extract_nucleotide_regions;
+    $self->_convert_nucleotide_to_protein;
+    $self->_cleanup_fasta;
+    $self->_cleanup_intermediate_files;
+    $self->_filter_fasta_sequences( join('/',($self->output_directory,$self->output_filename)) );
+    return join('/',($self->output_directory,$self->output_filename));
+}
+
+sub _build__working_directory_name {
+    my ($self) = @_;
+    return $self->_working_directory->dirname();
+}
+
+sub _build_output_filename {
+    my ($self) = @_;
+    my ( $filename, $directories, $suffix ) = fileparse( $self->gff_file, qr/\.[^.]*/ );
+    return join( '/', ( $self->_working_directory_name, $filename . '.faa' ) );
+}
+
+
+
+sub _cleanup_intermediate_files {
+    my ($self) = @_;
+    unlink( $self->_unfiltered_output_filename );
+    unlink( $self->_fastatranslate_filename );
+}
+
+sub _nucleotide_fasta_file_from_gff_filename {
+    my ($self) = @_;
+    return join('/',($self->output_directory,join( '.', ( $self->output_filename, 'intermediate.fa' ) )));
+}
+
+sub _extracted_nucleotide_fasta_file_from_bed_filename {
+    my ($self) = @_;
+    return join('/',($self->output_directory,join( '.', ( $self->output_filename,'intermediate.extracted.fa' ) )));
+}
+
+sub _unfiltered_output_filename {
+    my $self = shift;
+    return join('/',($self->output_directory,join( '.', ( $self->output_filename, 'unfiltered.fa' ) )));
+}
+
+
+sub _create_nucleotide_fasta_file_from_gff {
+    my ($self) = @_;
+    my $cmd = 'sed -n \'/##FASTA/,//p\' ' . $self->gff_file . ' | grep -v \'##FASTA\' > ' . $self->_nucleotide_fasta_file_from_gff_filename;
+    $self->logger->debug($cmd);
+    system($cmd);
+}
+
+sub _extract_nucleotide_regions {
+    my ($self) = @_;
+
+    $self->_create_nucleotide_fasta_file_from_gff;
+    $self->_create_bed_file_from_gff;
+
+    my $cmd =
+        'bedtools getfasta -s -fi '
+      . $self->_nucleotide_fasta_file_from_gff_filename
+      . ' -bed '
+      . $self->_bed_output_filename . ' -fo '
+      . $self->_extracted_nucleotide_fasta_file_from_bed_filename
+      . ' -name > /dev/null 2>&1';
+
+    $self->logger->debug($cmd);
+    system($cmd);
+    unlink( $self->_nucleotide_fasta_file_from_gff_filename );
+    unlink( $self->_bed_output_filename );
+    unlink( $self->_nucleotide_fasta_file_from_gff_filename . '.fai' );
+}
+
+sub _cleanup_fasta {
+    my $self    = shift;
+    my $infile  = $self->_unfiltered_output_filename;
+    my $outfile = join('/',($self->output_directory,$self->output_filename));
+    return unless ( -e $infile );
+
+    open( my $in,  '<', $infile );
+    open( my $out, '>', $outfile );
+    while ( my $line = <$in> ) {
+        chomp $line;
+        $line =~ s/"//g if ( $line =~ /^>/ );
+        print $out "$line\n";
+    }
+    close $in;
+    close $out;
+}
+
+sub _fastatranslate_filename {
+    my ($self) = @_;
+    return join('/',($self->output_directory,join( '.', ( $self->output_filename, 'intermediate.translate.fa' ) )));
+}
+
+sub _fastatranslate {
+    my ( $self, $inputfile, $outputfile ) = @_;
+
+    my $input_fasta_file_obj = Bio::SeqIO->new( -file => $inputfile, -format => 'Fasta' );
+    my $output_protein_file_obj = Bio::SeqIO->new( -file => ">" . $outputfile, -format => 'Fasta', -alphabet => 'protein' );
+
+    my %protein_sequence_objs;
+    while ( my $seq = $input_fasta_file_obj->next_seq ) {
+        $seq->desc(undef);
+        my $protseq = $seq->translate( -codontable_id => $self->translation_table );
+        $output_protein_file_obj->write_seq($protseq);
+    }
+    return 1;
+}
+
+sub _convert_nucleotide_to_protein {
+    my ($self) = @_;
+    $self->_fastatranslate( $self->_extracted_nucleotide_fasta_file_from_bed_filename, $self->_unfiltered_output_filename );
+    unlink( $self->_extracted_nucleotide_fasta_file_from_bed_filename );
+}
+
+sub _does_sequence_contain_too_many_unknowns {
+    my ( $self, $sequence_obj ) = @_;
+    my $maximum_number_of_Xs = int( ( $sequence_obj->length() * $self->maximum_percentage_of_unknowns ) / 100 );
+    my $number_of_Xs_found = () = $sequence_obj->seq() =~ /X/g;
+    if ( $number_of_Xs_found > $maximum_number_of_Xs ) {
+        return 1;
+    }
+    else {
+        return 0;
+    }
+}
+
+sub _filter_fasta_sequences {
+    my ( $self, $filename ) = @_;
+    my $temp_output_file = $filename . '.tmp.filtered.fa';
+    my $out_fasta_obj    = Bio::SeqIO->new( -file => ">" . $temp_output_file, -format => 'Fasta' );
+    my $fasta_obj        = Bio::SeqIO->new( -file => $filename, -format => 'Fasta' );
+
+    my $sequence_found = 0;
+
+    while ( my $seq = $fasta_obj->next_seq() ) {
+        if ( $self->_does_sequence_contain_too_many_unknowns($seq) ) {
+            next;
+        }
+        $seq->desc(undef);
+        $out_fasta_obj->write_seq($seq);
+        $sequence_found = 1;
+    }
+
+    if ( $sequence_found == 0 ) {
+        $self->logger->error( "Could not extract any protein sequences from "
+              . $self->gff_file
+              . ". Does the file contain the assembly as well as the annotation?" );
+    }
+
+    # Replace the original file.
+    move( $temp_output_file, $filename );
+    return 1;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
+
diff --git a/lib/Bio/Roary/ExtractProteomeFromGFFs.pm b/lib/Bio/Roary/ExtractProteomeFromGFFs.pm
new file mode 100644
index 0000000..241d3b8
--- /dev/null
+++ b/lib/Bio/Roary/ExtractProteomeFromGFFs.pm
@@ -0,0 +1,80 @@
+package Bio::Roary::ExtractProteomeFromGFFs;
+
+# ABSTRACT: Take in GFF files and create protein sequences in FASTA format
+
+=head1 SYNOPSIS
+
+Take in GFF files and create protein sequences in FASTA format
+   use Bio::Roary::ExtractProteomeFromGFFs;
+   
+   my $plot_groups_obj = Bio::Roary::ExtractProteomeFromGFFs->new(
+       gff_files        => $fasta_files,
+     );
+   $plot_groups_obj->fasta_files();
+
+=cut
+
+use Moose;
+use Bio::Roary::Exceptions;
+use Bio::Roary::ExtractProteomeFromGFF;
+use File::Basename;
+use Cwd qw(getcwd); 
+use File::Temp;
+with 'Bio::Roary::JobRunner::Role';
+
+has 'gff_files'                => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'fasta_files'              => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_fasta_files' );
+has 'fasta_files_to_gff_files' => ( is => 'ro', isa => 'HashRef',  lazy => 1, builder => '_build_fasta_files_to_gff_files' );
+has 'apply_unknowns_filter'    => ( is => 'rw', isa => 'Bool', default => 1 );
+has '_queue'                   => ( is => 'rw', isa => 'Str',  default => 'small' );
+has 'translation_table'        => ( is => 'rw', isa => 'Int',  default => 11 );
+has 'verbose'                  => ( is => 'rw', isa => 'Bool', default => 0 );
+has 'working_directory'        => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
+
+sub _build__extract_proteome_objects
+{
+  my ($self) = @_;
+
+  my %extract_proteome_objects; 
+  for my $filename ( @{ $self->gff_files } ) {
+    my $extract_proteome = Bio::Roary::ExtractProteomeFromGFF->new(
+        gff_file        => $filename,
+      );
+      $extract_proteome_objects{ $filename  } = $extract_proteome;
+  }
+  return \%extract_proteome_objects;
+}
+
+sub _build_fasta_files {
+    my ($self) = @_;
+    my @fasta_files = sort values( %{$self->fasta_files_to_gff_files} );
+    return \@fasta_files;
+}
+
+sub _build_fasta_files_to_gff_files {
+    my ($self) = @_;
+
+    my %fasta_files;
+    my @commands_to_run;
+    for my $filename ( @{ $self->gff_files } ) 
+    {
+		print "Extracting proteins from $filename\n" if($self->verbose);
+        my($gff_filename_without_directory, $directories, $suffix) = fileparse($filename);
+        my $output_suffix = "proteome.faa";
+        
+        my $output_filename = $filename.'.'.$output_suffix;
+        $fasta_files{ $filename  } = $self->working_directory.'/'.$gff_filename_without_directory.'.'.$output_suffix;
+        push(@commands_to_run, "extract_proteome_from_gff --translation_table ".$self->translation_table." --apply_unknowns_filter ".$self->apply_unknowns_filter." -d ".$self->working_directory." -o $output_suffix $filename");
+    }
+    #Farm out the computation and block until its ready
+    my $job_runner_obj = $self->_job_runner_class->new( commands_to_run => \@commands_to_run, memory_in_mb => $self->memory_in_mb, queue => $self->_queue, cpus  => $self->cpus);
+    $job_runner_obj->run();
+    
+    return \%fasta_files;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
+
diff --git a/lib/Bio/Roary/FilterFullClusters.pm b/lib/Bio/Roary/FilterFullClusters.pm
new file mode 100644
index 0000000..5109a7a
--- /dev/null
+++ b/lib/Bio/Roary/FilterFullClusters.pm
@@ -0,0 +1,144 @@
+package Bio::Roary::FilterFullClusters;
+
+# ABSTRACT: Take an a clusters file from CD-hit and the fasta file and output a fasta file without full clusters
+
+=head1 SYNOPSIS
+
+Take an a clusters file from CD-hit and the fasta file and output a fasta file without full clusters
+   use Bio::Roary::FilterFullClusters;
+   
+   my $obj = Bio::Roary::FilterFullClusters->new(
+       clusters_filename        => $cluster_file,
+       fasta_file           => $fasta_file,
+       number_of_input_files => 10,
+       output_file => 'filtered_file'
+     );
+   $obj->filter_full_clusters_from_fasta();
+
+=cut
+
+use Moose;
+use Bio::SeqIO;
+with 'Bio::Roary::ClustersRole';
+
+has 'number_of_input_files' => ( is => 'ro', isa => 'Int', required => 1 );
+has 'fasta_file'     => ( is => 'ro', isa => 'Str', required => 1 );
+has 'output_file'    => ( is => 'ro', isa => 'Str', required => 1 );
+has '_greater_than_or_equal' =>  ( is => 'ro', isa => 'Bool', default => 0 );
+has 'cdhit_input_fasta_file'    => ( is => 'ro', isa => 'Str', required => 1 );
+has 'cdhit_output_fasta_file'    => ( is => 'ro', isa => 'Str', required => 1 );
+
+has 'output_groups_file' => ( is => 'ro', isa => 'Str', required => 1 );
+
+has '_full_cluster_gene_names'    => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__full_cluster_gene_names' );
+has '_input_seqio'  => ( is => 'ro', isa => 'Bio::SeqIO', lazy => 1, builder => '_build__input_seqio' );
+has '_output_seqio' => ( is => 'ro', isa => 'Bio::SeqIO', lazy => 1, builder => '_build__output_seqio' );
+
+has '_all_full_cluster_genes'    => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__all_full_cluster_genes' );
+
+sub _build__full_cluster_gene_names
+{
+  my($self) = @_;
+  
+  my %full_cluster_gene_names ;
+  
+  for my $gene_name (keys %{$self->_clustered_genes})
+  {
+  
+    if($self->_greater_than_or_equal == 0)
+    {
+      if(defined($self->_clustered_genes->{$gene_name}) && @{$self->_clustered_genes->{$gene_name}} >= ($self->number_of_input_files -1))
+      {
+        $full_cluster_gene_names{$gene_name}++;
+      }
+    }
+    else
+    {
+      if(defined($self->_clustered_genes->{$gene_name}) && @{$self->_clustered_genes->{$gene_name}} == ($self->number_of_input_files -1))
+      {
+        $full_cluster_gene_names{$gene_name}++;
+      }
+    }
+  }
+  
+  return \%full_cluster_gene_names;
+}
+
+sub _build__input_seqio {
+    my ($self) = @_;
+    return Bio::SeqIO->new( -file => $self->fasta_file, -format => 'Fasta' );
+}
+
+sub _build__output_seqio {
+    my ( $self, $chunk_number ) = @_;
+    return Bio::SeqIO->new( -file => ">".$self->output_file, -format => 'Fasta' );
+}
+
+sub _build__all_full_cluster_genes
+{
+   my ($self) = @_;
+   my %full_cluster_genes;
+   
+   for my $gene_name (keys %{$self->_full_cluster_gene_names})
+   {
+     $full_cluster_genes{$gene_name}++;
+     for my $cluster_gene_name (@{$self->_clustered_genes->{$gene_name}})
+     {
+       $full_cluster_genes{$cluster_gene_name}++;
+     }
+   }
+   return \%full_cluster_genes;
+}
+
+
+sub _create_groups_file
+{
+  my ($self) = @_;
+  open(my $out_fh, '>>', $self->output_groups_file);
+  
+  for my $gene_name (keys %{$self->_full_cluster_gene_names})
+  {
+    print {$out_fh} $gene_name."\t". join("\t", @{$self->_clustered_genes->{$gene_name}}). "\n";
+  }
+  close($out_fh);
+}
+
+
+
+sub filter_complete_cluster_from_original_fasta
+{
+  my ($self) = @_;
+
+  my $input_seq_io  = Bio::SeqIO->new( -file => $self->cdhit_input_fasta_file, -format => 'Fasta' );
+  my $output_seq_io = Bio::SeqIO->new( -file => ">".$self->cdhit_output_fasta_file, -format => 'Fasta' );
+  
+  while ( my $input_seq = $input_seq_io->next_seq() ) 
+  {
+    unless(defined($self->_all_full_cluster_genes->{$input_seq->display_id}))
+    {
+      $output_seq_io->write_seq($input_seq);
+    }
+  }
+  
+  $self->_create_groups_file;
+  return $self;
+}
+
+sub filter_full_clusters_from_fasta
+{
+    my ($self) = @_;
+ 
+    while ( my $input_seq = $self->_input_seqio->next_seq() ) {
+      unless(defined($self->_full_cluster_gene_names->{$input_seq->display_id}))
+      {
+        $self->_output_seqio->write_seq($input_seq);
+      }
+    }
+    return $self;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
+
diff --git a/lib/Bio/Roary/FilterUnknownsFromFasta.pm b/lib/Bio/Roary/FilterUnknownsFromFasta.pm
new file mode 100644
index 0000000..ce48f35
--- /dev/null
+++ b/lib/Bio/Roary/FilterUnknownsFromFasta.pm
@@ -0,0 +1,88 @@
+package Bio::Roary::FilterUnknownsFromFasta;
+
+# ABSTRACT: Take in fasta files, remove sequences with too many unknowns and return a list of the new files
+
+=head1 SYNOPSIS
+
+Take in fasta files, remove sequences with too many unknowns and return a list of the new files
+   use Bio::Roary::FilterUnknownsFromFasta;
+   
+   my $obj = Bio::Roary::FilterUnknownsFromFasta->new(
+       fasta_files        => [],
+     );
+   $obj->filtered_fasta_files();
+
+=cut
+
+use Moose;
+use Bio::SeqIO;
+use Cwd;
+use Bio::Roary::Exceptions;
+use File::Basename;
+
+has 'fasta_files'                    => ( is => 'ro', isa => 'ArrayRef',  required => 1 );
+has 'apply_unknowns_filter'          => ( is => 'rw', isa => 'Bool', default => 1 );
+has 'maximum_percentage_of_unknowns' => ( is => 'ro', isa => 'Num',  default  => 5 );
+
+has 'filtered_fasta_files' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_filtered_fasta_files' );
+
+has 'input_fasta_to_output_fasta' => ( is => 'ro', isa => 'HashRef', default => sub {{}} );
+
+sub _build_filtered_fasta_files
+{
+  my ($self) = @_;
+  
+  my @output_file_names;
+  for my $fasta_file (@{$self->fasta_files})
+  {
+    my ( $filename, $directories, $suffix ) = fileparse($fasta_file);
+    push(@output_file_names, $self->_filter_fasta_sequences_and_return_new_file($filename,$fasta_file ));
+  }
+  return \@output_file_names;
+}
+
+sub _does_sequence_contain_too_many_unknowns
+{
+  my ($self, $sequence_obj) = @_;
+  my $maximum_number_of_Xs = int(($sequence_obj->length()*$self->maximum_percentage_of_unknowns)/100);
+  my $number_of_Xs_found = () = $sequence_obj->seq() =~ /X/g;
+  if($number_of_Xs_found  > $maximum_number_of_Xs)
+  {
+    return 1;
+  }
+  else
+  {
+    return 0;
+  }
+}
+
+
+sub _filter_fasta_sequences_and_return_new_file
+{
+  my ($self, $output_file, $input_file) = @_;
+  my $output_filename = $output_file.'.tmp.filtered.fa';
+  my $out_fasta_obj = Bio::SeqIO->new( -file => ">".$output_filename, -format => 'Fasta');
+  my $fasta_obj     = Bio::SeqIO->new( -file => $input_file, -format => 'Fasta');
+  
+  $self->input_fasta_to_output_fasta->{$input_file} = $output_filename;
+
+  while(my $seq = $fasta_obj->next_seq())
+  {
+    if($self->_does_sequence_contain_too_many_unknowns($seq))
+    {
+      next; 
+    }
+    # strip out extra details put in by fastatranslate
+    $seq->description(undef);
+    $out_fasta_obj->write_seq($seq);
+  }
+  return $output_filename;
+}
+
+
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
+
diff --git a/lib/Bio/Roary/GeneNamesFromGFF.pm b/lib/Bio/Roary/GeneNamesFromGFF.pm
new file mode 100644
index 0000000..804d7ed
--- /dev/null
+++ b/lib/Bio/Roary/GeneNamesFromGFF.pm
@@ -0,0 +1,73 @@
+package Bio::Roary::GeneNamesFromGFF;
+
+# ABSTRACT: Parse a GFF and efficiently extract ID -> Gene Name
+
+=head1 SYNOPSIS
+
+Parse a GFF and efficiently extract ID -> Gene Name
+   use Bio::Roary::GeneNamesFromGFF;
+   
+   my $obj = Bio::Roary::GeneNamesFromGFF->new(
+     gff_file   => 'abc.gff'
+   );
+   $obj->ids_to_gene_name;
+
+=cut
+
+use Moose;
+
+use Bio::Tools::GFF;
+with 'Bio::Roary::ParseGFFAnnotationRole';
+
+has 'ids_to_gene_name' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_ids_to_gene_name' );
+has 'ids_to_product' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
+has 'ids_to_gene_size' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
+
+# Parsing with the perl GFF module is exceptionally slow.
+sub _build_ids_to_gene_name {
+    my ($self) = @_;
+    my %id_to_gene_name;
+
+    my $gffio = Bio::Tools::GFF->new( -file => $self->gff_file, -gff_version => 3 );
+    while ( my $feature = $gffio->next_feature() ) {
+        my $gene_id = $self->_get_feature_id($feature);
+        next unless ($gene_id);
+
+        if ( $feature->has_tag('gene') ) {
+            my ( $gene_name, @junk ) = $feature->get_tag_values('gene');
+            $gene_name =~ s!"!!g;
+            if ( $gene_name ne "" ) {
+                $id_to_gene_name{$gene_id} = $gene_name;
+            }
+        }
+        if ( $feature->has_tag('product') ) {
+            my ( $product, @junk ) = $feature->get_tag_values('product');
+            $self->ids_to_product->{$gene_id} = $product;
+        }
+		$self->ids_to_gene_size->{$gene_id} = $feature->end - $feature->start;
+    }
+
+    return \%id_to_gene_name;
+}
+
+sub _get_feature_id {
+    my ( $self, $feature ) = @_;
+    my ( $gene_id, @junk );
+    if ( $feature->has_tag('ID') ) {
+        ( $gene_id, @junk ) = $feature->get_tag_values('ID');
+    }
+    elsif ( $feature->has_tag('locus_tag') ) {
+        ( $gene_id, @junk ) = $feature->get_tag_values('locus_tag');
+    }
+    else {
+        return undef;
+    }
+    $gene_id =~ s!["']!!g;
+    return undef if ( $gene_id eq "" );
+    return $gene_id;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/GroupLabels.pm b/lib/Bio/Roary/GroupLabels.pm
new file mode 100644
index 0000000..bf45f36
--- /dev/null
+++ b/lib/Bio/Roary/GroupLabels.pm
@@ -0,0 +1,62 @@
+package Bio::Roary::GroupLabels;
+
+# ABSTRACT: Add labels to the groups
+
+=head1 SYNOPSIS
+
+Add labels to the groups
+   use Bio::Roary::GroupLabels;
+   
+   my $obj = Bio::Roary::GroupLabels->new(
+     groups_filename   => 'abc.groups',
+     output_filename => 'output.groups'
+   );
+   $obj->add_labels;
+
+=cut
+
+use Moose;
+use Bio::Roary::Exceptions;
+
+has 'groups_filename' => ( is => 'ro', isa => 'Str', required => 1 );
+has 'output_filename' => ( is => 'ro', isa => 'Str', default  => 'labelled_groups_file' );
+
+has '_input_fh'             => ( is => 'ro', lazy => 1,     builder => '_build__input_fh' );
+has '_output_fh'            => ( is => 'ro', lazy => 1,     builder => '_build__output_fh' );
+has '_group_default_prefix' => ( is => 'ro', isa  => 'Str', default => 'group_' );
+
+sub _build__input_fh {
+    my ($self) = @_;
+    open( my $fh, $self->groups_filename )
+      or Bio::Roary::Exceptions::FileNotFound->throw( error => "Group file not found:" . $self->groups_filename );
+    return $fh;
+}
+
+sub _build__output_fh {
+    my ($self) = @_;
+    open( my $fh, '>', $self->output_filename )
+      or Bio::Roary::Exceptions::CouldntWriteToFile->throw(
+        error => "Couldnt write output file:" . $self->output_filename );
+    return $fh;
+}
+
+sub add_labels {
+    my ($self) = @_;
+
+    my $counter = 1;
+    my $in_fh   = $self->_input_fh;
+    while (<$in_fh>) {
+        my $line = $_;
+        next if ( $line eq "" );
+        print { $self->_output_fh } $self->_group_default_prefix . $counter . ": " . $line;
+        $counter++;
+    }
+    close( $self->_input_fh );
+    close( $self->_output_fh );
+    return 1;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/GroupStatistics.pm b/lib/Bio/Roary/GroupStatistics.pm
new file mode 100644
index 0000000..6af9045
--- /dev/null
+++ b/lib/Bio/Roary/GroupStatistics.pm
@@ -0,0 +1,242 @@
+package Bio::Roary::GroupStatistics;
+
+# ABSTRACT: Add labels to the groups
+
+=head1 SYNOPSIS
+
+Add labels to the groups
+   use Bio::Roary::GroupStatistics;
+   
+   my $obj = Bio::Roary::GroupStatistics->new(
+     output_filename => 'group_statitics.csv',
+     annotate_groups_obj => $annotate_groups_obj,
+     analyse_groups_obj  => $analyse_groups_obj
+   );
+   $obj->create_spreadsheet;
+
+=cut
+
+use Moose;
+use POSIX;
+use Text::CSV;
+use File::Basename;
+use Bio::SeqIO;
+use Bio::Roary::Exceptions;
+use Bio::Roary::AnalyseGroups;
+use Bio::Roary::AnnotateGroups;
+use Bio::Roary::PresenceAbsenceMatrix;
+
+has 'annotate_groups_obj'  => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
+has 'analyse_groups_obj'   => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups',  required => 1 );
+has 'output_filename'      => ( is => 'ro', isa => 'Str',                        default  => 'gene_presence_absence.csv' );
+has 'output_rtab_filename' => ( is => 'ro', isa => 'Str',                        default  => 'gene_presence_absence.Rtab' );
+has 'groups_to_contigs'    => ( is => 'ro', isa => 'Maybe[HashRef]');
+has '_output_fh'           => ( is => 'ro', lazy => 1,           builder => '_build__output_fh' );
+has '_text_csv_obj'        => ( is => 'ro', isa  => 'Text::CSV', lazy    => 1, builder => '_build__text_csv_obj' );
+has '_sorted_file_names'   => ( is => 'ro', isa  => 'ArrayRef',  lazy    => 1, builder => '_build__sorted_file_names' );
+has '_groups_to_files'     => ( is => 'ro', isa  => 'HashRef',   lazy    => 1, builder => '_build__groups_to_files' );
+has '_files_to_groups'     => ( is => 'ro', isa  => 'HashRef',   lazy    => 1, builder => '_build__files_to_groups' );
+has '_num_files_in_groups' => ( is => 'ro', isa  => 'HashRef',   lazy    => 1, builder => '_build__num_files_in_groups' );
+has '_verbose'             => ( is => 'ro', isa => 'Bool', default => 0 );
+
+
+sub _build__output_fh {
+    my ($self) = @_;
+    open( my $fh, '>', $self->output_filename )
+      or Bio::Roary::Exceptions::CouldntWriteToFile->throw(
+        error => "Couldnt write output file:" . $self->output_filename );
+    return $fh;
+}
+
+sub _build__text_csv_obj {
+    my ($self) = @_;
+    return Text::CSV->new( { binary => 1, always_quote => 1, eol => "\r\n" } );
+}
+
+sub fixed_headers {
+    my ($self) = @_;
+    my @header =
+      ( 'Gene', 'Non-unique Gene name', 'Annotation', 'No. isolates', 'No. sequences', 'Avg sequences per isolate', 'Genome Fragment','Order within Fragment', 'Accessory Fragment','Accessory Order with Fragment', 'QC','Min group size nuc', 'Max group size nuc', 'Avg group size nuc' );
+    return \@header;
+}
+
+sub _sample_headers
+{
+	my ($self) = @_;
+	my @header;
+    for my $filename ( @{ $self->_sorted_file_names } ) {
+        my $filename_cpy = basename($filename);
+        $filename_cpy =~ s!\.gff\.proteome\.faa!!;
+        push( @header, $filename_cpy );
+    }
+	return \@header;
+}
+
+sub _header {
+    my ($self) = @_;
+    my @header = @{ $self->fixed_headers };
+    push( @header, @{$self->_sample_headers});
+    push( @header, 'Inference' ) if ( $self->_verbose );
+    return \@header;
+}
+
+sub _build__sorted_file_names {
+    my ($self) = @_;
+    my @sorted_file_names = sort( @{ $self->analyse_groups_obj->fasta_files } );
+    return \@sorted_file_names;
+}
+
+sub _non_unique_name_for_group {
+    my ( $self, $annotated_group_name ) = @_;
+    my $duplicate_gene_name = '';
+    my $prefix              = $self->annotate_groups_obj->_group_default_prefix;
+    if ( $annotated_group_name =~ /$prefix/ ) {
+        my $non_unique_name_for_group =
+          $self->annotate_groups_obj->_consensus_gene_name_for_group($annotated_group_name);
+        if ( !( $non_unique_name_for_group =~ /$prefix/ ) ) {
+            $duplicate_gene_name = $non_unique_name_for_group;
+        }
+    }
+    return $duplicate_gene_name;
+}
+
+sub _build__groups_to_files {
+    my ($self) = @_;
+    my %groups_to_files;
+    for my $group ( @{ $self->annotate_groups_obj->_groups } ) {
+        my $genes = $self->annotate_groups_obj->_groups_to_id_names->{$group};
+        my %filenames;
+        for my $gene_name ( @{$genes} ) {
+            my $filename = $self->analyse_groups_obj->_genes_to_file->{$gene_name};
+            push( @{ $filenames{$filename} }, $gene_name );
+        }
+        $groups_to_files{$group} = \%filenames;
+    }
+    
+    return \%groups_to_files;
+}
+
+sub _build__files_to_groups
+{
+  my ($self) = @_;
+  my %files_to_groups;
+  
+  for my $group (keys %{$self->_groups_to_files})
+  {
+    for my $filename (keys %{$self->_groups_to_files->{$group}})
+    {
+      push(@{$files_to_groups{$filename}}, $group);
+    }
+  }
+  
+  return \%files_to_groups;
+}
+
+sub _build__num_files_in_groups
+{
+	 my ($self) = @_;
+ 	my %num_files_in_groups;
+ 	for my $group (@{ $self->annotate_groups_obj->_groups })
+ 	{
+ 	  my $num_files = $self->analyse_groups_obj->_count_num_files_in_group( $self->annotate_groups_obj->_groups_to_id_names->{$group});
+ 	  $num_files_in_groups{$group} = $num_files;
+ 	}
+	return \%num_files_in_groups;
+}
+
+sub _row {
+    my ( $self, $group ) = @_;
+    my $genes = $self->annotate_groups_obj->_groups_to_id_names->{$group};
+
+    my $num_isolates_in_group     = $self->analyse_groups_obj->_count_num_files_in_group($genes);
+    my $num_sequences_in_group    = $#{$genes} + 1;
+    my $avg_sequences_per_isolate = ceil( ( $num_sequences_in_group / $num_isolates_in_group ) * 100 ) / 100;
+
+    my $annotation           = $self->annotate_groups_obj->consensus_product_for_id_names($genes);
+    my $annotated_group_name = $self->annotate_groups_obj->_groups_to_consensus_gene_names->{$group};
+
+    my $duplicate_gene_name = $self->_non_unique_name_for_group($annotated_group_name);
+    
+    my $genome_number = '';
+    my $qc_comment = '';
+    my $order_within_fragement = '';
+    my $accessory_order_within_fragement = '';
+    my $accessory_genome_number = '';
+    if(defined($self->groups_to_contigs) && defined($self->groups_to_contigs->{$annotated_group_name}))
+    {
+      $genome_number = $self->groups_to_contigs->{$annotated_group_name}->{label};
+      $qc_comment = $self->groups_to_contigs->{$annotated_group_name}->{comment};
+      $order_within_fragement = $self->groups_to_contigs->{$annotated_group_name}->{order};
+      
+      $accessory_genome_number = $self->groups_to_contigs->{$annotated_group_name}->{accessory_label};
+      $accessory_order_within_fragement = $self->groups_to_contigs->{$annotated_group_name}->{accessory_order};
+    }
+	
+	my $group_size = $self->annotate_groups_obj->group_nucleotide_lengths->{$group};
+    
+    my @row = (
+        $annotated_group_name,  $duplicate_gene_name,    $annotation,
+        $num_isolates_in_group, $num_sequences_in_group, $avg_sequences_per_isolate,$genome_number,$order_within_fragement,$accessory_genome_number,$accessory_order_within_fragement,$qc_comment,$group_size->{min}, $group_size->{max}, $group_size->{average}
+    );
+	
+	for(my $i =0; $i < @row; $i++)
+	{
+		if(!defined($row[$i]))
+		{
+			$row[$i] = '';
+		}
+	}
+
+    for my $filename ( @{ $self->_sorted_file_names } ) {
+        my $group_to_file_genes = $self->_groups_to_files->{$group}->{$filename};
+
+        if ( defined($group_to_file_genes) && @{$group_to_file_genes} > 0 ) {
+
+            push( @row, join( "\t", @{$group_to_file_genes} ) );
+            next;
+        }
+        else {
+            push( @row, '' );
+        }
+    }
+
+    ## ADD INFERENCE AND FULL ANNOTATION IF VERBOSE REQUESTED ##
+    if ( $self->_verbose ){
+      my ( $full_annotation, $inference );
+        $row[2] = $self->annotate_groups_obj->full_annotation($group);
+        push( @row, $self->annotate_groups_obj->inference($group) );
+    }
+
+    return \@row;
+}
+
+sub create_rtab
+{
+	my ($self) = @_;
+    my $presence_absence_matrix_obj = Bio::Roary::PresenceAbsenceMatrix->new(
+      output_filename     => $self->output_rtab_filename,
+	  annotate_groups_obj => $self->annotate_groups_obj,
+      sorted_file_names   => $self->_sorted_file_names,
+      groups_to_files     => $self->_groups_to_files,
+      num_files_in_groups => $self->_num_files_in_groups,
+      sample_headers      => $self->_sample_headers,
+    );
+    $presence_absence_matrix_obj->create_matrix_file;
+	return $self;
+}
+
+sub create_spreadsheet {
+    my ($self) = @_;
+
+    $self->_text_csv_obj->print( $self->_output_fh, $self->_header );
+
+    for my $group (sort {$self->_num_files_in_groups->{$b}<=>$self->_num_files_in_groups->{$a} || $a cmp $b} keys %{$self->_num_files_in_groups}){
+        $self->_text_csv_obj->print( $self->_output_fh, $self->_row($group) );
+    }
+    close( $self->_output_fh );
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/InflateClusters.pm b/lib/Bio/Roary/InflateClusters.pm
new file mode 100644
index 0000000..8051fc3
--- /dev/null
+++ b/lib/Bio/Roary/InflateClusters.pm
@@ -0,0 +1,109 @@
+package Bio::Roary::InflateClusters;
+
+# ABSTRACT: Take the clusters file from cd-hit and use it to inflate the output of MCL
+
+=head1 SYNOPSIS
+
+Take the clusters file from cd-hit and use it to inflate the output of MCL
+   use Bio::Roary::InflateClusters;
+   
+   my $obj = Bio::Roary::InflateClusters->new(
+     clusters_filename  => 'example.clstr',
+     mcl_filename       => 'example.mcl',
+     output_file        => 'example.output'
+   );
+   $obj->inflate;
+
+=cut
+
+use Moose;
+use Bio::Roary::Exceptions;
+with 'Bio::Roary::ClustersRole';
+
+has 'mcl_filename'      => ( is => 'ro', isa => 'Str', required => 1 );
+has 'output_file'       => ( is => 'ro', isa => 'Str', default  => 'inflated_results' );
+has '_mcl_fh'           => ( is => 'ro',lazy => 1, builder => '_build__mcl_fh' );
+has '_output_fh'        => ( is => 'ro',lazy => 1, builder => '_build__output_fh' );
+has 'cdhit_groups_filename'  => ( is => 'ro', isa => 'Maybe[Str]' );
+
+sub _build__output_fh
+{
+  my($self) = @_;
+  open(my $fh, '>', $self->output_file) or Bio::Roary::Exceptions::CouldntWriteToFile->throw( error => 'Cant write to file: ' . $self->output_file );
+  return $fh;
+}
+
+sub _build__mcl_fh
+{
+   my($self) = @_;
+   open(my $fh, $self->mcl_filename) or Bio::Roary::Exceptions::FileNotFound->throw( error => 'Cant open file: ' . $self->mcl_filename );
+   return $fh;
+}
+
+sub _inflate_line
+{
+   my($self, $line) = @_;
+   my @inflated_genes;
+   chomp($line);
+   my @gene_names = split(/[\t\s]+/, $line);
+   for my $gene_name (@gene_names)
+   {
+     push(@inflated_genes, $self->_inflate_gene($gene_name));
+   }
+   return join(' ', at inflated_genes);
+}
+
+sub _inflate_gene
+{
+   my($self, $gene_name) = @_;
+   my $inflated_gene = $gene_name;
+   if(defined($self->_clustered_genes->{$gene_name}))
+   {
+     $inflated_gene = $inflated_gene."\t". join("\t",@{$self->_clustered_genes->{$gene_name}});     
+     delete($self->_clustered_genes->{$gene_name});
+   }
+   return $inflated_gene;
+}
+
+sub inflate
+{
+  my($self) = @_;
+  my $mcl_fh = $self->_mcl_fh;
+  
+  # Inflate genes from cdhit which were sent to mcl
+  while(<$mcl_fh>)
+  {
+    my $line = $_;
+    print { $self->_output_fh } $self->_inflate_line($line) . "\n";
+  }
+  
+  # Inflate any clusters that were in the clusters file but not sent to mcl
+  for my $gene_name(keys %{$self->_clustered_genes})
+  {
+    next unless(defined($self->_clustered_genes->{$gene_name}));
+    print { $self->_output_fh } $gene_name."\t". join("\t",@{$self->_clustered_genes->{$gene_name}})."\n";
+  }
+  
+  if(defined($self->cdhit_groups_filename))
+  {
+    # Add clusters which were excluded because the groups were full at the cdhit stage
+    open(my $cdhit_fh, $self->cdhit_groups_filename) or Bio::Roary::Exceptions::FileNotFound->throw( error => "CD hit group file not found: " . $self->cdhit_groups_filename);
+    while(<$cdhit_fh>)
+    {
+      my $line = $_;
+      
+      if(defined($line))
+      {
+        print { $self->_output_fh } $line ;
+      }
+    }
+  }
+  
+  close($self->_output_fh);
+  1;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/IterativeCdhit.pm b/lib/Bio/Roary/IterativeCdhit.pm
new file mode 100644
index 0000000..702f9ed
--- /dev/null
+++ b/lib/Bio/Roary/IterativeCdhit.pm
@@ -0,0 +1,114 @@
+package Bio::Roary::IterativeCdhit;
+
+# ABSTRACT:  Run CDhit iteratively with reducing thresholds, removing full clusters each time
+
+=head1 SYNOPSIS
+
+Run CDhit iteratively with reducing thresholds, removing full clusters each time
+   use Bio::Roary::IterativeCdhit;
+   
+   my $obj = Bio::Roary::IterativeCdhit->new(
+     output_cd_hit_filename   => 'output_cd_hit_filename.fa',
+     output_combined_filename => 'output_combined_filename.fa',
+     number_of_input_files     => 5,
+     output_filtered_clustered_fasta= > 'output_filtered_clustered_fasta.fa',
+   );
+   $obj->run;
+
+=cut
+
+use Moose;
+use Bio::SeqIO;
+use Bio::Roary::Exceptions;
+use Bio::Roary::External::Cdhit;
+use Bio::Roary::FilterFullClusters;
+use File::Copy;
+use Log::Log4perl qw(:easy);
+# CD hit is run locally
+
+has 'output_cd_hit_filename'          => ( is => 'ro', isa => 'Str', required => 1 );
+has 'output_combined_filename'        => ( is => 'ro', isa => 'Str', required => 1 );
+has 'number_of_input_files'           => ( is => 'ro', isa => 'Int', required => 1 );
+has 'output_filtered_clustered_fasta' => ( is => 'ro', isa => 'Str', required => 1 );
+
+has 'lower_bound_percentage'          => ( is => 'ro', isa => 'Num', default => 0.98 );
+has 'upper_bound_percentage'          => ( is => 'ro', isa => 'Num', default => 0.99 );
+has 'step_size_percentage'            => ( is => 'ro', isa => 'Num', default => 0.005 );
+has 'cpus'                            => ( is => 'ro', isa => 'Int', default => 1 );
+has 'logger'                          => ( is => 'ro', lazy => 1, builder => '_build_logger');
+
+sub _build_logger
+{
+    my ($self) = @_;
+    Log::Log4perl->easy_init(level => $ERROR);
+    my $logger = get_logger();
+    return $logger;
+}
+
+
+sub run {
+    my ($self) = @_;
+
+    $self->filter_complete_clusters(
+        $self->output_cd_hit_filename,
+        1,
+        $self->output_combined_filename,
+        $self->number_of_input_files,
+        $self->output_filtered_clustered_fasta, 1
+    );
+
+    for ( my $percent_match = $self->upper_bound_percentage ; $percent_match >= $self->lower_bound_percentage ; $percent_match -= $self->step_size_percentage ) {
+        $self->filter_complete_clusters(
+            $self->output_cd_hit_filename,
+            $percent_match,
+            $self->output_combined_filename,
+            $self->number_of_input_files,
+            $self->output_filtered_clustered_fasta, 0
+        );
+    }
+
+    my $cdhit_obj = Bio::Roary::External::Cdhit->new(
+        input_file                   => $self->output_combined_filename,
+        output_base                  => $self->output_cd_hit_filename,
+        _length_difference_cutoff    => $self->lower_bound_percentage,
+        _sequence_identity_threshold => $self->lower_bound_percentage,
+        cpus                         => $self->cpus,
+		logger                       => $self->logger
+    );
+    $cdhit_obj->run();
+    return $cdhit_obj->clusters_filename;
+}
+
+sub filter_complete_clusters {
+    my ( $self, $output_cd_hit_filename, $percentage_match, $output_combined_filename, $number_of_input_files,
+        $output_filtered_clustered_fasta,
+        $greater_than_or_equal )
+      = @_;
+    my $cdhit_obj = Bio::Roary::External::Cdhit->new(
+        input_file                   => $output_combined_filename,
+        output_base                  => $output_cd_hit_filename,
+        _length_difference_cutoff    => $percentage_match,
+        _sequence_identity_threshold => $percentage_match,
+        cpus                         => $self->cpus,
+    );
+    $cdhit_obj->run();
+
+    my $filter_clusters = Bio::Roary::FilterFullClusters->new(
+        clusters_filename       => $cdhit_obj->clusters_filename,
+        fasta_file              => $output_cd_hit_filename,
+        number_of_input_files   => $number_of_input_files,
+        output_file             => $output_filtered_clustered_fasta,
+        _greater_than_or_equal  => $greater_than_or_equal,
+        cdhit_input_fasta_file  => $output_combined_filename,
+        cdhit_output_fasta_file => $output_combined_filename . '.filtered',
+        output_groups_file      => $output_combined_filename . '.groups'
+    );
+
+    $filter_clusters->filter_complete_cluster_from_original_fasta();
+    move( $filter_clusters->cdhit_output_fasta_file, $output_combined_filename );
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/JobRunner/Local.pm b/lib/Bio/Roary/JobRunner/Local.pm
new file mode 100644
index 0000000..b0960ea
--- /dev/null
+++ b/lib/Bio/Roary/JobRunner/Local.pm
@@ -0,0 +1,64 @@
+package Bio::Roary::JobRunner::Local;
+
+# ABSTRACT: Execute a set of commands locally
+
+=head1 SYNOPSIS
+
+ Execute a set of commands locally
+   use Bio::Roary::JobRunner::Local;
+   
+   my $obj = Bio::Roary::JobRunner::Local->new(
+     commands_to_run   => ['ls', 'echo "abc"'],
+   );
+   $obj->run();
+
+=cut
+
+use Moose;
+use Log::Log4perl qw(:easy);
+
+has 'commands_to_run' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'logger'          => ( is => 'ro', lazy => 1, builder => '_build_logger');
+has 'verbose'         => ( is => 'rw', isa => 'Bool', default => 0 );
+has 'memory_in_mb'    => ( is => 'rw', isa => 'Int',  default => '200' );
+
+sub run {
+    my ($self) = @_;
+
+    for my $command_to_run ( @{ $self->commands_to_run } ) {  
+        $self->logger->info($command_to_run);
+        system($command_to_run );
+    }
+    1;
+}
+
+
+sub _construct_dependancy_params
+{
+  my ($self) = @_;
+  return '';
+}
+
+sub submit_dependancy_job {
+    my ( $self,$command_to_run) = @_;
+    $self->logger->info($command_to_run);
+    system($command_to_run );
+}
+
+sub _build_logger
+{
+    my ($self) = @_;
+    my $level = $ERROR;
+    if($self->verbose)
+    {
+       $level = $DEBUG;
+    }
+    Log::Log4perl->easy_init($level);
+    my $logger = get_logger();
+    return $logger;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/JobRunner/Parallel.pm b/lib/Bio/Roary/JobRunner/Parallel.pm
new file mode 100644
index 0000000..0660384
--- /dev/null
+++ b/lib/Bio/Roary/JobRunner/Parallel.pm
@@ -0,0 +1,69 @@
+package Bio::Roary::JobRunner::Parallel;
+
+# ABSTRACT: Use GNU Parallel
+
+=head1 SYNOPSIS
+
+ Execute a set of commands using GNU parallel
+   use Bio::Roary::JobRunner::Parallel;
+   
+   my $obj = Bio::Roary::JobRunner::Local->new(
+     commands_to_run   => ['ls', 'echo "abc"'],
+     max_jobs => 4
+   );
+   $obj->run();
+
+=cut
+
+use Moose;
+use File::Temp qw/ tempfile /;
+use Log::Log4perl qw(:easy);
+
+has 'commands_to_run' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'cpus'            => ( is => 'ro', isa => 'Int',      default => 1 );
+has 'logger'          => ( is => 'ro', lazy => 1, builder => '_build_logger');
+has 'verbose'         => ( is => 'rw', isa => 'Bool', default => 0 );
+has 'memory_in_mb'    => ( is => 'rw', isa => 'Int',  default => '200' );
+
+sub run {
+    my ($self) = @_;
+	
+    for my $command_to_run(@{ $self->commands_to_run })
+    {
+       $self->logger->info($command_to_run);
+    }
+	open(my $fh,"|-","parallel --gnu -j ".$self->cpus) || die "GNU Parallel failed";
+	print $fh join("\n", @{ $self->commands_to_run });
+	close $fh;
+    1;
+}
+
+sub _construct_dependancy_params
+{
+  my ($self) = @_;
+  return '';
+}
+
+sub submit_dependancy_job {
+    my ( $self,$command_to_run) = @_;
+    $self->logger->info($command_to_run);
+    system($command_to_run );
+}
+
+sub _build_logger
+{
+    my ($self) = @_;
+    my $level = $ERROR;
+    if($self->verbose)
+    {
+       $level = $DEBUG;
+    }
+    Log::Log4perl->easy_init($level);
+    my $logger = get_logger();
+    return $logger;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/JobRunner/Role.pm b/lib/Bio/Roary/JobRunner/Role.pm
new file mode 100644
index 0000000..f8f44e0
--- /dev/null
+++ b/lib/Bio/Roary/JobRunner/Role.pm
@@ -0,0 +1,65 @@
+package Bio::Roary::JobRunner::Role;
+
+# ABSTRACT: A role to add job runner functionality
+
+=head1 SYNOPSIS
+
+A role to add job runner functionality
+   with 'Bio::Roary::JobRunner::Role';
+
+=cut
+
+use Moose::Role;
+use Log::Log4perl qw(:easy);
+use File::Spec;
+
+has 'job_runner'        => ( is => 'rw', isa => 'Str',  default  => 'Local' );
+has '_job_runner_class' => ( is => 'ro', isa => 'Str',  lazy => 1, builder => '_build__job_runner_class' );
+has 'memory_in_mb'      => ( is => 'rw', isa => 'Int',  default => '200' );
+has '_queue'            => ( is => 'rw', isa => 'Str',  default => 'normal' );
+has 'dont_wait'         => ( is => 'rw', isa => 'Bool', default => 0 );
+has 'cpus'              => ( is => 'ro', isa => 'Int',      default => 1 );
+has 'logger'            => ( is => 'ro', lazy => 1, builder => '_build_logger');
+has 'verbose'           => ( is => 'rw', isa => 'Bool', default => 0 );
+
+sub _build_logger
+{
+    my ($self) = @_;
+    my $level = $ERROR;
+    if($self->verbose)
+    {
+       $level = $DEBUG;
+    }
+    Log::Log4perl->easy_init($level);
+    my $logger = get_logger();
+    return $logger;
+}
+
+sub _build__job_runner_class {
+    my ($self) = @_;
+    my $job_runner_class = "Bio::Roary::JobRunner::" . $self->job_runner;
+    eval "require $job_runner_class";
+    return $job_runner_class;
+}
+
+sub _find_exe {
+  my($self,$executables) = @_;
+  
+  # If there is an explicit full path passed in, just return.
+  if($executables->[0] =~ m!/!)
+  {
+	  return $executables->[0];
+  }
+  
+  for my $dir (File::Spec->path) {
+	  for my $exec (@{$executables})
+	  {
+        my $exe = File::Spec->catfile($dir, $exec);
+        return $exe if -x $exe; 
+      }
+  }
+  return $executables->[0];
+}
+
+
+1;
diff --git a/lib/Bio/Roary/LookupGeneFiles.pm b/lib/Bio/Roary/LookupGeneFiles.pm
new file mode 100644
index 0000000..7821f61
--- /dev/null
+++ b/lib/Bio/Roary/LookupGeneFiles.pm
@@ -0,0 +1,53 @@
+package Bio::Roary::LookupGeneFiles;
+
+# ABSTRACT: Take in an ordering of genes and a directory and return an ordered list of file locations
+
+=head1 SYNOPSIS
+
+Take in an ordering of genes and a directory and return an ordered list of file locations
+   use Bio::Roary::LookupGeneFiles;
+   
+   my $obj = Bio::Roary::LookupGeneFiles->new(
+       multifasta_directory        => 'pan_genome_sequences',
+       ordered_genes           => ['gene5','gene2','gene3'],
+
+     );
+   $obj->ordered_gene_files();
+
+=cut
+
+use Moose;
+
+has 'multifasta_directory' => ( is => 'ro', isa => 'Str', default => 'pan_genome_sequences' );
+has 'ordered_genes'        => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+
+has 'ordered_gene_files' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_ordered_gene_files' );
+
+
+sub _build_ordered_gene_files
+{
+  my ($self) = @_;
+  my @gene_files;
+  for my $gene (@{$self->ordered_genes})
+  {
+    $gene =~ s!\W!_!gi;
+    my $filename = $gene.'.fa.aln';
+    my $gene_filepath = join('/',($self->multifasta_directory, $filename));
+    
+    if(! -e $gene_filepath)
+    {
+      print "Core gene file missing: ". $gene_filepath."\n";
+    }
+    else
+    {
+      push(@gene_files, $gene_filepath);
+    }
+  }
+  return \@gene_files;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
+
diff --git a/lib/Bio/Roary/MergeMultifastaAlignments.pm b/lib/Bio/Roary/MergeMultifastaAlignments.pm
new file mode 100644
index 0000000..d2dd55e
--- /dev/null
+++ b/lib/Bio/Roary/MergeMultifastaAlignments.pm
@@ -0,0 +1,121 @@
+package Bio::Roary::MergeMultifastaAlignments;
+
+# ABSTRACT: Merge multifasta alignment files with equal numbers of sequences.
+
+=head1 SYNOPSIS
+
+Merge multifasta alignment files with equal numbers of sequences.So each sequence in each file gets concatenated together.  It is assumed the 
+sequences are in the correct order.
+   use Bio::Roary::MergeMultifastaAlignments;
+   
+   my $obj = Bio::Roary::MergeMultifastaAlignments->new(
+     multifasta_files => [],
+     output_filename  => 'output_merged.aln'
+   );
+   $obj->merge_files;
+
+=cut
+
+use Moose;
+use Bio::SeqIO;
+use Bio::Roary::Output::CoreGeneAlignmentCoordinatesEMBL;
+
+has 'multifasta_files'       => ( is => 'ro', isa => 'ArrayRef',   required => 1 );
+has 'sample_names'           => ( is => 'ro', isa => 'ArrayRef',   required => 1 );
+has 'sample_names_to_genes'  => ( is => 'rw', isa => 'HashRef',    required => 1 );
+has 'output_filename'        => ( is => 'ro', isa => 'Str',        default  => 'core_alignment.aln' );
+has 'output_header_filename' => ( is => 'ro', isa => 'Str',        default  => 'core_alignment_header.embl' );
+has '_output_seqio_obj'      => ( is => 'ro', isa => 'Bio::SeqIO', lazy     => 1, builder => '_build__output_seqio_obj' );
+has '_gene_lengths'          => ( is => 'rw', isa => 'HashRef',    lazy     => 1, builder => '_build__gene_lengths' );
+has '_gene_to_sequence' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
+has '_sorted_multifasta_files' => ( is => 'rw', isa => 'ArrayRef', lazy => 1, builder => '_build__sorted_multifasta_files' );
+
+sub BUILD {
+    my ($self) = @_;
+    $self->_gene_lengths;
+}
+
+sub _input_seq_io_obj {
+    my ( $self, $filename ) = @_;
+    return Bio::SeqIO->new( -file => $filename, -format => 'Fasta' );
+}
+
+sub _build__output_seqio_obj {
+    my ($self) = @_;
+    return Bio::SeqIO->new( -file => ">" . $self->output_filename, -format => 'Fasta' );
+}
+
+sub _build__gene_lengths {
+    my ($self) = @_;
+    my %gene_lengths;
+    for my $filename ( @{ $self->_sorted_multifasta_files } ) {
+        my $seq_io = $self->_input_seq_io_obj($filename);
+        next unless ( defined($seq_io) );
+        while ( my $seq_record = $seq_io->next_seq ) {
+
+            # Save all of the gene sequences to memory, massive speedup but a bit naughty.
+            $self->_gene_to_sequence->{$filename}->{ $seq_record->display_id } = $seq_record->seq;
+            $gene_lengths{$filename} = $seq_record->length() if ( !defined( $gene_lengths{$filename} ) );
+        }
+    }
+
+    return \%gene_lengths;
+}
+
+sub _build__sorted_multifasta_files {
+    my ($self) = @_;
+    my @sorted_gene_files = sort @{ $self->multifasta_files };
+    return \@sorted_gene_files;
+}
+
+sub _sequence_for_sample_from_gene_file {
+    my ( $self, $sample_name, $gene_file ) = @_;
+
+    # loop over this to get the geneIDs
+    for my $gene_id ( keys %{ $self->_gene_to_sequence->{$gene_file} } ) {
+        if ( defined( $self->sample_names_to_genes->{$sample_name}->{$gene_id} ) ) {
+            return $self->_gene_to_sequence->{$gene_file}->{$gene_id};
+        }
+    }
+    return $self->_padded_string_for_gene_file($gene_file);
+}
+
+sub _padded_string_for_gene_file {
+    my ( $self, $gene_file ) = @_;
+    return '' unless ( defined( $self->_gene_lengths->{$gene_file} ) );
+    return 'N' x ( $self->_gene_lengths->{$gene_file} );
+}
+
+sub _create_merged_sequence_for_sample {
+    my ( $self, $sample_name ) = @_;
+    my $merged_sequence = '';
+    for my $gene_file ( @{ $self->_sorted_multifasta_files } ) {
+        $merged_sequence .= $self->_sequence_for_sample_from_gene_file( $sample_name, $gene_file );
+    }
+    return $merged_sequence;
+}
+
+sub merge_files {
+    my ($self) = @_;
+
+    for my $sample_name ( @{ $self->sample_names } ) {
+        my $sequence = $self->_create_merged_sequence_for_sample($sample_name);
+        my $seq_io = Bio::Seq->new( -display_id => $sample_name, -seq => $sequence );
+        $self->_output_seqio_obj->write_seq($seq_io);
+    }
+
+    # Create a header file which gives the coordinates of each gene in the multifasta
+    Bio::Roary::Output::CoreGeneAlignmentCoordinatesEMBL->new(
+        multifasta_files => $self->_sorted_multifasta_files,
+        gene_lengths     => $self->_gene_lengths,
+        output_filename  => $self->output_header_filename
+    )->create_file();
+
+    return 1;
+}
+
+no Moose;
+	__PACKAGE__->meta->make_immutable;
+
+1;
+
diff --git a/lib/Bio/Roary/OrderGenes.pm b/lib/Bio/Roary/OrderGenes.pm
new file mode 100644
index 0000000..e4652c2
--- /dev/null
+++ b/lib/Bio/Roary/OrderGenes.pm
@@ -0,0 +1,391 @@
+package Bio::Roary::OrderGenes;
+
+# ABSTRACT: Take in GFF files and create a matrix of what genes are beside what other genes
+
+=head1 SYNOPSIS
+
+Take in the analyse groups and create a matrix of what genes are beside what other genes
+   use Bio::Roary::OrderGenes;
+   
+   my $obj = Bio::Roary::OrderGenes->new(
+     analyse_groups_obj => $analyse_groups_obj,
+     gff_files => ['file1.gff','file2.gff']
+   );
+   $obj->groups_to_contigs;
+
+=cut
+
+use Moose;
+use Bio::Roary::Exceptions;
+use Bio::Roary::AnalyseGroups;
+use Bio::Roary::ContigsToGeneIDsFromGFF;
+use Graph;
+use Graph::Writer::Dot;
+use File::Basename;
+
+has 'gff_files'                => ( is => 'ro', isa => 'ArrayRef',                  required => 1 );
+has 'analyse_groups_obj'       => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 );
+has 'core_definition'          => ( is => 'ro', isa => 'Num',                       default  => 1.0 );
+has 'pan_graph_filename'       => ( is => 'ro', isa => 'Str',                       default  => 'core_accessory_graph.dot' );
+has 'accessory_graph_filename' => ( is => 'ro', isa => 'Str',                       default  => 'accessory_graph.dot' );
+has 'sample_weights'           => ( is => 'ro', isa => 'Maybe[HashRef]' );
+has 'samples_to_clusters'      => ( is => 'ro', isa => 'Maybe[HashRef]' );
+has 'group_order' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_group_order' );
+has 'groups_to_sample_names' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
+has 'group_graphs'            => ( is => 'ro', isa => 'Graph',   lazy => 1, builder => '_build_group_graphs' );
+has 'groups_to_contigs'       => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_groups_to_contigs' );
+has '_groups_to_file_contigs' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__groups_to_file_contigs' );
+has '_groups'                 => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_groups' );
+has 'number_of_files'         => ( is => 'ro', isa => 'Int',     lazy => 1, builder => '_build_number_of_files' );
+has '_groups_qc' => ( is => 'ro', isa => 'HashRef', default => sub { {} } );
+has '_percentage_of_largest_weak_threshold' => ( is => 'ro', isa => 'Num', default => 0.9 );
+
+sub _build_number_of_files {
+    my ($self) = @_;
+    return @{ $self->gff_files };
+}
+
+sub _build_groups {
+    my ($self) = @_;
+    my %groups;
+    for my $group_name ( @{ $self->analyse_groups_obj->_groups } ) {
+        $groups{$group_name}++;
+    }
+    return \%groups;
+}
+
+sub _build__groups_to_file_contigs {
+    my ($self) = @_;
+
+    my @overlapping_hypothetical_gene_ids;
+    my %samples_to_groups_contigs;
+
+    # Open each GFF file
+    for my $filename ( @{ $self->gff_files } ) {
+        my @groups_to_contigs;
+        my $contigs_to_ids_obj = Bio::Roary::ContigsToGeneIDsFromGFF->new( gff_file => $filename );
+
+        my ( $sample_name, $directories, $suffix ) = fileparse($filename);
+        $sample_name =~ s/\.gff//gi;
+
+        # Loop over each contig in the GFF file
+        for my $contig_name ( keys %{ $contigs_to_ids_obj->contig_to_ids } ) {
+            my @groups_on_contig;
+
+            # loop over each gene in each contig in the GFF file
+            for my $gene_id ( @{ $contigs_to_ids_obj->contig_to_ids->{$contig_name} } ) {
+
+                # convert to group name
+                my $group_name = $self->analyse_groups_obj->_genes_to_groups->{$gene_id};
+                next unless ( defined($group_name) );
+
+                if ( $contigs_to_ids_obj->overlapping_hypothetical_protein_ids->{$gene_id} ) {
+                    $self->_groups_qc->{$group_name} =
+'Hypothetical protein with no hits to refseq/uniprot/clusters/cdd/tigrfams/pfam overlapping another protein with hits';
+                }
+                push( @groups_on_contig, $group_name );
+            }
+            push( @groups_to_contigs, \@groups_on_contig );
+        }
+        $samples_to_groups_contigs{$sample_name} = \@groups_to_contigs;
+    }
+
+    return \%samples_to_groups_contigs;
+
+}
+
+sub _build_group_order {
+    my ($self) = @_;
+    my %group_order;
+
+    my %groups_to_sample_names;
+    for my $sample_name ( keys %{ $self->_groups_to_file_contigs } ) {
+        my $groups_to_file_contigs = $self->_groups_to_file_contigs->{$sample_name};
+        for my $groups_on_contig ( @{$groups_to_file_contigs} ) {
+            for ( my $i = 1 ; $i < @{$groups_on_contig} ; $i++ ) {
+                my $group_from = $groups_on_contig->[ $i - 1 ];
+                my $group_to   = $groups_on_contig->[$i];
+
+                if ( defined( $self->sample_weights ) && $self->sample_weights->{$sample_name} ) {
+                    $group_order{$group_from}{$group_to} += $self->sample_weights->{$sample_name};
+                    push( @{ $groups_to_sample_names{$group_from} }, $sample_name );
+                }
+                else {
+                    $group_order{$group_from}{$group_to}++;
+                }
+            }
+            if ( @{$groups_on_contig} == 1 ) {
+                my $group_from = $groups_on_contig->[0];
+                my $group_to   = $groups_on_contig->[0];
+                if ( defined( $self->sample_weights ) && $self->sample_weights->{$sample_name} ) {
+                    $group_order{$group_from}{$group_to} += $self->sample_weights->{$sample_name};
+                    push( @{ $groups_to_sample_names{$group_from} }, $sample_name );
+                }
+                else {
+                    $group_order{$group_from}{$group_to}++;
+                }
+            }
+        }
+    }
+
+    $self->groups_to_sample_names( \%groups_to_sample_names );
+    return \%group_order;
+}
+
+sub _build_group_graphs {
+    my ($self) = @_;
+    return Graph->new( undirected => 1 );
+}
+
+sub _save_graph_to_file {
+    my ( $self, $graph, $output_filename ) = @_;
+    my $writer = Graph::Writer::Dot->new();
+    $writer->write_graph( $graph, $output_filename );
+    return 1;
+}
+
+sub _add_groups_to_graph {
+    my ($self) = @_;
+
+    for my $current_group ( keys %{ $self->group_order() } ) {
+        for my $group_to ( keys %{ $self->group_order->{$current_group} } ) {
+            my $weight = 1.0 / ( $self->group_order->{$current_group}->{$group_to} );
+            $self->group_graphs->add_weighted_edge( $current_group, $group_to, $weight );
+        }
+    }
+
+}
+
+sub _reorder_connected_components {
+    my ( $self, $graph_groups ) = @_;
+    my @ordered_graph_groups;
+    my @paths_and_weights;
+
+    for my $graph_group ( @{$graph_groups} ) {
+        my %groups;
+        $groups{$_}++ for ( @{$graph_group} );
+        my $edge_sum = 0;
+
+        for my $current_group ( keys %groups ) {
+            for my $group_to ( keys %{ $self->group_order->{$current_group} } ) {
+                next unless defined( $groups{$group_to} );
+                $edge_sum += $self->group_order->{$current_group}->{$group_to};
+            }
+        }
+
+        my %samples_in_graph;
+        for my $current_group ( keys %groups ) {
+            my $sample_names = $self->groups_to_sample_names->{$current_group};
+            if ( defined($sample_names) ) {
+                for my $sample_name ( @{$sample_names} ) {
+                    $samples_in_graph{$sample_name}++;
+                }
+            }
+        }
+        my @sample_names = sort keys %samples_in_graph;
+
+        if ( @{$graph_group} == 1 ) {
+
+            push(
+                @paths_and_weights,
+                {
+                    path           => $graph_group,
+                    average_weight => $edge_sum,
+                    sample_names   => \@sample_names
+                }
+            );
+        }
+        else {
+            my $graph = Graph->new( undirected => 1 );
+            for my $current_group ( keys %groups ) {
+                for my $group_to ( keys %{ $self->group_order->{$current_group} } ) {
+                    if ( $groups{$group_to} ) {
+                        my $weight = 1 / $self->group_order->{$current_group}->{$group_to};
+                        $graph->add_weighted_edge( $current_group, $group_to, $weight );
+                    }
+                }
+            }
+            my $minimum_spanning_tree = $graph->minimum_spanning_tree;
+            my $dfs_obj               = Graph::Traversal::DFS->new($minimum_spanning_tree);
+            my @reordered_dfs_groups  = $dfs_obj->dfs;
+            push(
+                @paths_and_weights,
+                {
+                    path           => \@reordered_dfs_groups,
+                    average_weight => $edge_sum,
+                    sample_names   => \@sample_names
+                }
+            );
+        }
+
+    }
+
+    return $self->_order_by_samples_and_weights( \@paths_and_weights );
+}
+
+sub _order_by_samples_and_weights {
+    my ( $self, $paths_and_weights ) = @_;
+
+    my @ordered_graph_groups;
+    if ( !defined( $self->samples_to_clusters ) ) {
+        my @ordered_paths_and_weights = sort { $a->{average_weight} <=> $b->{average_weight} } @{$paths_and_weights};
+        @ordered_graph_groups = map { $_->{path} } @ordered_paths_and_weights;
+        return \@ordered_graph_groups;
+    }
+
+    # Find the largest cluster in each graph and regroup
+    my %largest_cluster_to_paths_and_weights;
+    for my $graph_details ( @{$paths_and_weights} ) {
+        my %cluster_count;
+        for my $sample_name ( @{ $graph_details->{sample_names} } ) {
+            if ( defined( $self->samples_to_clusters->{$sample_name} ) ) {
+                $cluster_count{ $self->samples_to_clusters->{$sample_name} }++;
+            }
+        }
+        my $largest_cluster = ( sort { $cluster_count{$b} <=> $cluster_count{$a} || $a cmp $b} keys %cluster_count )[0];
+        if ( !defined($largest_cluster) ) {
+            my @ordered_paths_and_weights = sort { $b->{average_weight} <=> $a->{average_weight} } @{$paths_and_weights};
+            @ordered_graph_groups = map { $_->{path} } @ordered_paths_and_weights;
+            return \@ordered_graph_groups;
+        }
+
+        push( @{ $largest_cluster_to_paths_and_weights{$largest_cluster}{graph_details} }, $graph_details );
+        $largest_cluster_to_paths_and_weights{$largest_cluster}{largest_cluster_size} += $cluster_count{$largest_cluster};
+    }
+
+    # go through each cluster group and order by weight
+    my @clustered_ordered_graph_groups;
+    for my $cluster_name (
+        sort {
+            $largest_cluster_to_paths_and_weights{$b}->{largest_cluster_size}
+              <=> $largest_cluster_to_paths_and_weights{$a}->{largest_cluster_size}
+        } keys %largest_cluster_to_paths_and_weights
+      )
+    {
+		
+        my @ordered_paths_and_weights =
+          sort { $b->{average_weight} <=> $a->{average_weight} } @{ $largest_cluster_to_paths_and_weights{$cluster_name}->{graph_details} };
+        @ordered_graph_groups = map { $_->{path} } @ordered_paths_and_weights;
+
+        for my $graph_group (@ordered_graph_groups) {
+            push( @clustered_ordered_graph_groups, $graph_group );
+        }
+    }
+    return \@clustered_ordered_graph_groups;
+}
+
+sub _build_groups_to_contigs {
+    my ($self) = @_;
+    $self->_add_groups_to_graph;
+
+    my %groups_to_contigs;
+    my $counter          = 1;
+    my $overall_counter  = 1;
+    my $counter_filtered = 1;
+
+    # Accessory
+    my $accessory_graph  = $self->_create_accessory_graph;
+    my @group_graphs     = $accessory_graph->connected_components();
+    my $reordered_graphs = $self->_reorder_connected_components( \@group_graphs );
+
+    $self->_save_graph_to_file( $accessory_graph, $self->accessory_graph_filename );
+
+    for my $contig_groups ( @{$reordered_graphs} ) {
+        my $order_counter = 1;
+
+        for my $group_name ( @{$contig_groups} ) {
+            $groups_to_contigs{$group_name}{accessory_label}           = $counter;
+            $groups_to_contigs{$group_name}{accessory_order}           = $order_counter;
+            $groups_to_contigs{$group_name}{'accessory_overall_order'} = $overall_counter;
+            $order_counter++;
+            $overall_counter++;
+        }
+        $counter++;
+    }
+
+    # Core + accessory
+    my @group_graphs_all     = $self->group_graphs->connected_components();
+    my $reordered_graphs_all = $self->_reorder_connected_components( \@group_graphs_all );
+    $self->_save_graph_to_file( $self->group_graphs, $self->pan_graph_filename );
+
+    $overall_counter  = 1;
+    $counter          = 1;
+    $counter_filtered = 1;
+    for my $contig_groups ( @{$reordered_graphs_all} ) {
+        my $order_counter = 1;
+
+        for my $group_name ( @{$contig_groups} ) {
+            $groups_to_contigs{$group_name}{label}                          = $counter;
+            $groups_to_contigs{$group_name}{comment}                        = '';
+            $groups_to_contigs{$group_name}{order}                          = $order_counter;
+            $groups_to_contigs{$group_name}{'core_accessory_overall_order'} = $overall_counter;
+
+            if ( @{$contig_groups} <= 2 ) {
+                $groups_to_contigs{$group_name}{comment} = 'Investigate';
+            }
+            elsif ( $self->_groups_qc->{$group_name} ) {
+                $groups_to_contigs{$group_name}{comment} = $self->_groups_qc->{$group_name};
+            }
+            else {
+                $groups_to_contigs{$group_name}{'core_accessory_overall_order_filtered'} = $counter_filtered;
+                $counter_filtered++;
+            }
+            $order_counter++;
+            $overall_counter++;
+        }
+        $counter++;
+    }
+
+    $counter_filtered = 1;
+    for my $contig_groups ( @{$reordered_graphs} ) {
+        for my $group_name ( @{$contig_groups} ) {
+            if (   ( !defined( $groups_to_contigs{$group_name}{comment} ) )
+                || ( defined( $groups_to_contigs{$group_name}{comment} ) && $groups_to_contigs{$group_name}{comment} eq '' ) )
+            {
+                $groups_to_contigs{$group_name}{'accessory_overall_order_filtered'} = $counter_filtered;
+                $counter_filtered++;
+            }
+        }
+    }
+
+    return \%groups_to_contigs;
+}
+
+sub _create_accessory_graph {
+    my ($self) = @_;
+    my $graph = Graph->new( undirected => 1 );
+
+    my %core_groups;
+    my %group_freq;
+
+    for my $sample_name ( keys %{ $self->_groups_to_file_contigs } ) {
+        my $groups_to_file_contigs = $self->_groups_to_file_contigs->{$sample_name};
+
+        for my $groups_on_contig ( @{$groups_to_file_contigs} ) {
+            for my $current_group ( @{$groups_on_contig} ) {
+                $group_freq{$current_group}++;
+            }
+        }
+    }
+
+    for my $current_group ( keys %{ $self->group_order() } ) {
+        next if ( $group_freq{$current_group} >= ( $self->number_of_files * $self->core_definition ) );
+		
+        for my $group_to ( keys %{ $self->group_order->{$current_group} } ) {
+            if ( $group_freq{$group_to} >= ( $self->number_of_files * $self->core_definition ) ) {
+                $graph->add_vertex($current_group);
+            }
+            else {
+                my $weight = 1.0 / ( $self->group_order->{$current_group}->{$group_to} );
+                $graph->add_weighted_edge( $current_group, $group_to, $weight );
+            }
+        }
+    }
+
+    return $graph;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/Output/BlastIdentityFrequency.pm b/lib/Bio/Roary/Output/BlastIdentityFrequency.pm
new file mode 100644
index 0000000..cf3512e
--- /dev/null
+++ b/lib/Bio/Roary/Output/BlastIdentityFrequency.pm
@@ -0,0 +1,62 @@
+package Bio::Roary::Output::BlastIdentityFrequency;
+
+# ABSTRACT:  Take in blast results and find the percentage identity graph
+
+=head1 SYNOPSIS
+
+Take in blast results and find the percentage identity graph
+   use Bio::Roary::Output::BlastIdentityFrequency;
+   
+   my $obj = Bio::Roary::Output::BlastIdentityFrequency->new(
+       input_filename      => '_blast_results',
+       output_filename  => 'blast_identity_frequency.Rtab',
+     );
+   $obj->create_file();
+
+=cut
+
+use Moose;
+use Bio::SeqIO;
+use Bio::Roary::Exceptions;
+
+has 'input_filename'        => ( is => 'ro', isa => 'Str', default => '_blast_results' );
+has 'output_filename'       => ( is => 'ro', isa => 'Str', default => 'blast_identity_frequency.Rtab' );
+
+has '_output_fh'            => ( is => 'ro', lazy => 1, builder => '_build__output_fh' );
+has '_input_fh'             => ( is => 'ro', lazy => 1, builder => '_build__input_fh' );
+
+sub _build__output_fh
+{
+  my ($self) = @_;
+  open( my $fh, '>', $self->output_filename )
+    or Bio::Roary::Exceptions::CouldntWriteToFile->throw(
+      error => "Couldnt write output file:" . $self->output_filename );
+  return $fh;
+}
+
+sub _build__input_fh
+{
+  my ($self) = @_;
+  my $input_string  = 'awk \'{print $3}\' '.$self->input_filename.'  | awk \'BEGIN {FS="."}; {print $1}\'| sort | uniq -c | awk \'{print $2"\t"$1}\'';
+  
+  open( my $fh, '-|', $input_string ) or die "Couldnt open results file";
+  return $fh;
+}
+
+sub create_file
+{
+  my ($self) = @_;
+  
+  my $input_fh = $self->_input_fh;
+  while(<$input_fh>)
+  {
+    print {$self->_output_fh} $_;
+  }
+  close($self->_input_fh);
+  close($self->_output_fh);
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/Output/CoreGeneAlignmentCoordinatesEMBL.pm b/lib/Bio/Roary/Output/CoreGeneAlignmentCoordinatesEMBL.pm
new file mode 100644
index 0000000..a2b2761
--- /dev/null
+++ b/lib/Bio/Roary/Output/CoreGeneAlignmentCoordinatesEMBL.pm
@@ -0,0 +1,81 @@
+package Bio::Roary::Output::CoreGeneAlignmentCoordinatesEMBL;
+
+# ABSTRACT: Create an embl file for the header with locations of where genes are in the multifasta alignment of core genes
+
+=head1 SYNOPSIS
+
+Create an embl file for the header with locations of where genes are in the multifasta alignment of core genes
+   use Bio::Roary::Output::CoreGeneAlignmentCoordinatesEMBL;
+   
+   my $obj = Bio::Roary::Output::CoreGeneAlignmentCoordinatesEMBL->new(
+        output_filename => 'core_alignment_header.embl',
+        multifasta_files => [
+            't/data/multifasta_files/1.aln', 't/data/multifasta_files/outof_order.aln',
+            't/data/multifasta_files/2.aln', 't/data/multifasta_files/3.aln'
+        ],
+        gene_lengths => {
+            't/data/multifasta_files/1.aln'           => 1,
+            't/data/multifasta_files/outof_order.aln' => 10,
+            't/data/multifasta_files/2.aln'           => 100,
+            't/data/multifasta_files/3.aln'           => 1000
+        },
+   );
+   $obj->create_file;
+
+=cut
+
+use Moose;
+use Bio::Roary::Exceptions;
+use File::Basename;
+with 'Bio::Roary::Output::EMBLHeaderCommon';
+
+has 'output_filename'     => ( is => 'ro', isa => 'Str',      default  => 'core_alignment_header.embl' );
+has 'multifasta_files'    => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'gene_lengths'        => ( is => 'ro', isa => 'HashRef',  required => 1 );
+has '_current_coordinate' => ( is => 'rw', isa => 'Int',      default  => 1 );
+has '_output_fh'          => ( is => 'ro', lazy => 1,         builder => '_build__output_fh' );
+
+sub _build__output_fh {
+    my ($self) = @_;
+    open( my $fh, '>', $self->output_filename )
+      or Bio::Roary::Exceptions::CouldntWriteToFile->throw( error => "Couldnt write output file:" . $self->output_filename );
+    return $fh;
+}
+
+sub _gene_name_from_filename {
+    my ( $self, $filename ) = @_;
+    my $gene_name = basename($filename);
+		$gene_name =~ s!\.aln!!;
+    $gene_name =~ s!\.fa!!;
+		return $gene_name;
+}
+
+sub _header_block {
+    my ( $self, $gene_filename ) = @_;
+    my $gene_name       = $self->_gene_name_from_filename($gene_filename);
+    my $gene_length     = $self->gene_lengths->{$gene_filename};
+    my $end_coordinate  = $self->_current_coordinate + $gene_length - 1;
+    my $annotation_type = $self->_annotation_type($gene_name);
+
+    my $tab_file_entry = join( '', ( 'FT', $annotation_type, $self->_current_coordinate, '..', $end_coordinate, "\n" ) );
+    $tab_file_entry .= "FT                   /label=$gene_name\n";
+    $tab_file_entry .= "FT                   /locus_tag=$gene_name\n";
+
+    $self->_current_coordinate( $end_coordinate + 1 );
+    return $tab_file_entry;
+}
+
+sub create_file {
+    my ($self) = @_;
+    print { $self->_output_fh } $self->_header_top;
+    for my $filename ( @{ $self->multifasta_files } ) {
+        print { $self->_output_fh } $self->_header_block($filename);
+    }
+    print { $self->_output_fh } $self->_header_bottom;
+    close( $self->_output_fh );
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/Output/DifferenceBetweenSets.pm b/lib/Bio/Roary/Output/DifferenceBetweenSets.pm
new file mode 100644
index 0000000..75aa77c
--- /dev/null
+++ b/lib/Bio/Roary/Output/DifferenceBetweenSets.pm
@@ -0,0 +1,126 @@
+package Bio::Roary::Output::DifferenceBetweenSets;
+
+# ABSTRACT:  Given two sets of isolates and a group file, output whats unique in each and whats in common
+
+=head1 SYNOPSIS
+
+Given two sets of isolates and a group file, output whats unique in each and whats in common
+   use Bio::Roary::Output::DifferenceBetweenSets;
+   
+   my $obj = Bio::Roary::Output::DifferenceBetweenSets->new(
+       analyse_groups  => $analyse_groups,
+       input_filenames_sets => 
+       [
+         ['aaa.faa','bbb.faa'],
+         ['ccc.faa','ddd.faa']
+       ],
+     );
+   $obj->groups_set_one_unique();
+   $obj->groups_set_two_unique();
+   $obj->groups_in_common();
+
+=cut
+
+use Moose;
+use Bio::SeqIO;
+use Bio::Roary::Exceptions;
+use Bio::Roary::AnalyseGroups;
+use Bio::Roary::Output::QueryGroups;
+
+has 'analyse_groups'       => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 );
+has 'input_filenames_sets' => ( is => 'ro', isa => 'ArrayRef[ArrayRef]',            required => 1 );
+has 'output_filename_base' => ( is => 'ro', isa => 'Str',                           default  => 'set_difference' );
+
+has '_query_groups_objs' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__query_groups_objs' );
+
+# TODO: update to handle more than 2 input sets
+
+sub _build__query_groups_objs {
+    my ($self) = @_;
+    my @query_groups_objs;
+    for my $file_name_set ( @{ $self->input_filenames_sets } ) {
+        push(
+            @query_groups_objs,
+            Bio::Roary::Output::QueryGroups->new(
+                analyse_groups  => $self->analyse_groups,
+                input_filenames => $file_name_set
+            )
+        );
+    }
+    
+    my @all_input_files = (@{ $self->input_filenames_sets->[0] },@{ $self->input_filenames_sets->[1] });
+    push(
+        @query_groups_objs,
+        Bio::Roary::Output::QueryGroups->new(
+            analyse_groups  => $self->analyse_groups,
+            input_filenames => \@all_input_files
+        )
+    );
+    
+    
+    return \@query_groups_objs;
+}
+
+sub _subtract_arrays {
+    my ( $self, $array_1, $array_2 ) = @_;
+    my %array_1 = map { $_ => 1 } @{$array_1};
+    my @difference = grep { not $array_1{$_} } @{$array_2};
+    return \@difference;
+}
+
+sub _groups_unique {
+    my ( $self, $output_filename, $query_group1, $query_group2 ) = @_;
+    my $unique_groups = $self->_subtract_arrays( $query_group2->_groups, $query_group1->_groups  );
+    $query_group1->groups_with_external_inputs( $output_filename, $unique_groups );
+}
+
+sub groups_set_one_unique_filename
+{
+  my ($self) = @_;
+  return $self->output_filename_base . '_unique_set_one';
+}
+
+sub groups_set_two_unique_filename
+{
+  my ($self) = @_;
+  return $self->output_filename_base . '_unique_set_two';
+}
+
+sub groups_in_common_filename
+{
+  my ($self) = @_;
+  return $self->output_filename_base . '_common_set';
+}
+
+
+sub groups_set_one_unique {
+    my ($self) = @_;
+    $self->_groups_unique(
+        $self->groups_set_one_unique_filename,
+        $self->_query_groups_objs->[0],
+        $self->_query_groups_objs->[1]
+    );
+}
+
+sub groups_set_two_unique {
+    my ($self) = @_;
+    $self->_groups_unique(
+        $self->groups_set_two_unique_filename,
+        $self->_query_groups_objs->[1],
+        $self->_query_groups_objs->[0]
+    );
+}
+
+sub groups_in_common {
+    my ($self) = @_;
+    my $unique_group_1 = $self->_subtract_arrays( $self->_query_groups_objs->[0]->_groups, $self->_query_groups_objs->[1]->_groups );
+    my $unique_group_2 = $self->_subtract_arrays( $self->_query_groups_objs->[1]->_groups, $self->_query_groups_objs->[0]->_groups );
+    my $common_groups_1  = $self->_subtract_arrays(  $unique_group_1,$self->_query_groups_objs->[2]->_groups);
+    my $common_groups_2  = $self->_subtract_arrays(  $unique_group_2,$common_groups_1);
+    $self->_query_groups_objs->[2]->groups_with_external_inputs( $self->groups_in_common_filename, $common_groups_2  );
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/Output/EMBLHeaderCommon.pm b/lib/Bio/Roary/Output/EMBLHeaderCommon.pm
new file mode 100644
index 0000000..933a51a
--- /dev/null
+++ b/lib/Bio/Roary/Output/EMBLHeaderCommon.pm
@@ -0,0 +1,41 @@
+package Bio::Roary::Output::EMBLHeaderCommon;
+
+# ABSTRACT: a role containing some common methods for embl header files
+
+=head1 SYNOPSIS
+
+a role containing some common methods for embl header files
+   with 'Bio::Roary::Output::EMBLHeaderCommon';
+
+=cut
+
+use Moose::Role;
+
+sub _header_top {
+    my ($self) = @_;
+    my $header_lines = 'ID   Genome standard; DNA; PRO; 1234 BP.' . "\n";
+    $header_lines .= 'XX' . "\n";
+    $header_lines .= 'FH   Key             Location/Qualifiers' . "\n";
+    $header_lines .= 'FH' . "\n";
+    return $header_lines;
+}
+
+sub _header_bottom {
+    my ($self) = @_;
+    my $header_lines = 'XX' . "\n";
+    $header_lines .= 'SQ   Sequence 1234 BP; 789 A; 1717 C; 1693 G; 691 T; 0 other;' . "\n";
+    $header_lines .= '//' . "\n";
+    return $header_lines;
+}
+
+sub _annotation_type {
+    my ( $self, $annotated_group_name ) = @_;
+    my $annotation_type = "   feature         ";
+    if ( $annotated_group_name =~ /group_/ ) {
+        $annotation_type = "   misc_feature    ";
+    }
+    return $annotation_type;
+}
+
+
+1;
diff --git a/lib/Bio/Roary/Output/EmblGroups.pm b/lib/Bio/Roary/Output/EmblGroups.pm
new file mode 100644
index 0000000..3612049
--- /dev/null
+++ b/lib/Bio/Roary/Output/EmblGroups.pm
@@ -0,0 +1,246 @@
+package Bio::Roary::Output::EmblGroups;
+
+# ABSTRACT: Create a tab/embl file with the features for drawing pretty pictures
+
+=head1 SYNOPSIS
+
+reate a tab/embl file with the features for drawing pretty pictures
+   use Bio::Roary::Output::EmblGroups;
+   
+   my $obj = Bio::Roary::Output::EmblGroups->new(
+     output_filename => 'group_statitics.csv',
+     annotate_groups_obj => $annotate_groups_obj,
+     analyse_groups_obj  => $analyse_groups_obj
+   );
+   $obj->create_file;
+
+=cut
+
+use Moose;
+use POSIX;
+use File::Basename;
+use Bio::Roary::Exceptions;
+use Bio::Roary::AnalyseGroups;
+use Bio::Roary::AnnotateGroups;
+with 'Bio::Roary::Output::EMBLHeaderCommon';
+
+has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
+has 'analyse_groups_obj'  => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups',  required => 1 );
+has 'output_filename'     => ( is => 'ro', isa => 'Str',                        default  => 'core_accessory.tab' );
+has 'output_header_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build_output_header_filename' );
+has 'groups_to_contigs' => ( is => 'ro', isa => 'Maybe[HashRef]' );
+has 'ordering_key' => ( is => 'ro', isa => 'Str', default => 'core_accessory_overall_order' );
+
+has '_output_fh'           => ( is => 'ro', lazy => 1,          builder => '_build__output_fh' );
+has '_output_header_fh'    => ( is => 'ro', lazy => 1,          builder => '_build__output_header_fh' );
+has '_sorted_file_names'   => ( is => 'ro', isa  => 'ArrayRef', lazy    => 1, builder => '_build__sorted_file_names' );
+has '_groups_to_files'     => ( is => 'ro', isa  => 'HashRef',  lazy    => 1, builder => '_build__groups_to_files' );
+has 'heatmap_lookup_table' => ( is => 'ro', isa  => 'ArrayRef', lazy    => 1, builder => '_build_heatmap_lookup_table' );
+
+sub _build__output_fh {
+    my ($self) = @_;
+    open( my $fh, '>', $self->output_filename )
+      or Bio::Roary::Exceptions::CouldntWriteToFile->throw( error => "Couldnt write output file:" . $self->output_filename );
+    return $fh;
+}
+
+sub _build__output_header_fh {
+    my ($self) = @_;
+    open( my $fh, '>', $self->output_header_filename )
+      or Bio::Roary::Exceptions::CouldntWriteToFile->throw( error => "Couldnt write output file:" . $self->output_filename );
+    return $fh;
+}
+
+sub _build_output_header_filename {
+    my ($self) = @_;
+    my $base_name = $self->output_filename;
+    $base_name =~ s/\.tab/.header.embl/i;
+    return $base_name;
+}
+
+sub _build__sorted_file_names {
+    my ($self) = @_;
+    my @sorted_file_names = sort( @{ $self->analyse_groups_obj->fasta_files } );
+    return \@sorted_file_names;
+}
+
+sub _build__groups_to_files {
+    my ($self) = @_;
+    my %groups_to_files;
+    for my $group ( @{ $self->annotate_groups_obj->_groups } ) {
+        my $genes = $self->annotate_groups_obj->_groups_to_id_names->{$group};
+        my %filenames;
+        for my $gene_name ( @{$genes} ) {
+            my $filename = $self->analyse_groups_obj->_genes_to_file->{$gene_name};
+            push( @{ $filenames{$filename} }, $gene_name );
+        }
+        $groups_to_files{$group} = \%filenames;
+    }
+    return \%groups_to_files;
+}
+
+sub _block {
+    my ( $self, $group ) = @_;
+    my @taxon_names_array;
+    my $annotated_group_name = $self->annotate_groups_obj->_groups_to_consensus_gene_names->{$group};
+
+    return ''
+      if (
+        !(
+               defined( $self->groups_to_contigs->{$annotated_group_name} )
+            && defined( $self->groups_to_contigs->{$annotated_group_name}->{ $self->ordering_key } )
+        )
+      );
+
+    return ''
+      if ( defined( $self->groups_to_contigs->{$annotated_group_name}->{comment} )
+        && $self->groups_to_contigs->{$annotated_group_name}->{comment} ne '' );
+
+    my $coordindates = $self->groups_to_contigs->{$annotated_group_name}->{ $self->ordering_key };
+
+    for my $filename ( @{ $self->_sorted_file_names } ) {
+        my $group_to_file_genes = $self->_groups_to_files->{$group}->{$filename};
+
+        if ( defined($group_to_file_genes) && @{$group_to_file_genes} > 0 ) {
+            my $filename_cpy = basename($filename);
+            $filename_cpy =~ s!\.gff\.proteome\.faa!!;
+            push( @taxon_names_array, $filename_cpy );
+            next;
+        }
+    }
+
+    my $colour = $self->_get_heat_map_colour( \@taxon_names_array, $self->annotate_groups_obj->_number_of_files );
+
+    my $taxon_names = join( " ", @taxon_names_array );
+
+    my $tab_file_entry = "FT   variation       $coordindates\n";
+    $tab_file_entry .= "FT                   /colour=$colour\n";
+    $tab_file_entry .= "FT                   /gene=$annotated_group_name\n";
+    $tab_file_entry .= "FT                   /taxa=\"$taxon_names\"\n";
+
+    return $tab_file_entry;
+}
+
+sub _get_heat_map_colour {
+    my ( $self, $taxon_names, $number_of_files ) = @_;
+    return $self->heatmap_lookup_table->[0] if ( @{$taxon_names} == 1 );
+    my $number_of_colours = @{ $self->heatmap_lookup_table };
+    return $self->heatmap_lookup_table->[ $number_of_colours - 1 ] if ( @{$taxon_names} == $number_of_files );
+
+    my $block_size   = $number_of_files / @{ $self->heatmap_lookup_table };
+    my $colour_index = ceil( @{$taxon_names} / $block_size ) - 1;
+    return $self->heatmap_lookup_table->[$colour_index];
+}
+
+sub _build_heatmap_lookup_table {
+    my ($self) = @_;
+    return [
+        4,     # blue (RGB values: 0 0 255)
+        5,     # cyan (RGB values: 0 255 255)
+        9,     # light sky blue (RGB values: 135 206 250)
+        8,     # pale green (RGB values: 152 251 152)
+        3,     # green (RGB values: 0 255 0)
+        7,     # yellow (RGB values: 255 255 0)
+        10,    # orange (RGB values: 255 165 0)
+        16,    # light red (RGB values: 255 127 127)
+        15,    # mid red: (RGB values: 255 63 63)
+        2,     # red (RGB values: 255 0 0)
+    ];
+}
+
+sub _block_colour {
+    my ( $self, $accessory_label ) = @_;
+    my $colour = 2;
+    return $colour unless ( defined($accessory_label) );
+
+    $colour += $accessory_label % 6;
+    return $colour;
+}
+
+sub _header_block {
+    my ( $self, $group ) = @_;
+    my $annotated_group_name = $self->annotate_groups_obj->_groups_to_consensus_gene_names->{$group};
+    my $colour               = 1;
+
+    return ''
+      if (
+        !(
+               defined( $self->groups_to_contigs->{$annotated_group_name} )
+            && defined( $self->groups_to_contigs->{$annotated_group_name}->{ $self->ordering_key } )
+        )
+      );
+    return ''
+      if ( defined( $self->groups_to_contigs->{$annotated_group_name}->{comment} )
+        && $self->groups_to_contigs->{$annotated_group_name}->{comment} ne '' );
+    my $coordindates    = $self->groups_to_contigs->{$annotated_group_name}->{ $self->ordering_key };
+    my $annotation_type = $self->_annotation_type($annotated_group_name);
+
+    $colour = $self->_block_colour( $self->groups_to_contigs->{$annotated_group_name}->{accessory_label} );
+
+    my $tab_file_entry = "FT$annotation_type$coordindates\n";
+    $tab_file_entry .= "FT                   /label=$annotated_group_name\n";
+    $tab_file_entry .= "FT                   /locus_tag=$annotated_group_name\n";
+    $tab_file_entry .= "FT                   /colour=$colour\n";
+
+    return $tab_file_entry;
+}
+
+sub _fragment_blocks {
+    my ( $self, $fh ) = @_;
+    my %fragment_numbers;
+    for my $group ( @{ $self->annotate_groups_obj->_groups } ) {
+        my $annotated_group_name = $self->annotate_groups_obj->_groups_to_consensus_gene_names->{$group};
+
+        next unless ( defined( $self->groups_to_contigs->{$annotated_group_name}->{accessory_label} ) );
+        next unless ( defined( $self->groups_to_contigs->{$annotated_group_name}->{ $self->ordering_key } ) );
+        next if ( $self->groups_to_contigs->{$annotated_group_name}->{ $self->ordering_key } eq '' );
+        push(
+            @{ $fragment_numbers{ $self->groups_to_contigs->{$annotated_group_name}->{accessory_label} } },
+            $self->groups_to_contigs->{$annotated_group_name}->{ $self->ordering_key }
+        );
+    }
+
+    for my $accessory_label ( keys %fragment_numbers ) {
+        next unless ( defined( $fragment_numbers{$accessory_label} ) );
+        my @sorted_fragment = sort { $a <=> $b } @{ $fragment_numbers{$accessory_label} };
+        my $tab_file_entry = '';
+        if ( @sorted_fragment > 1 ) {
+            my $min = $sorted_fragment[0];
+            my $max = $sorted_fragment[-1];
+
+            next if ( !defined($min) || !defined($max) || $min eq '' || $max eq '' );
+            $tab_file_entry = "FT   feature         $min" . '..' . "$max\n";
+        }
+        elsif ( @sorted_fragment == 1 ) {
+            my $min = $sorted_fragment[0];
+            next if ( !defined($min) || $min eq '' );
+            $tab_file_entry = "FT   feature         $min\n";
+        }
+        else {
+            next;
+        }
+        $tab_file_entry .= "FT                   /colour=" . $self->_block_colour($accessory_label) . "\n";
+
+        print {$fh} $tab_file_entry;
+    }
+
+}
+
+sub create_files {
+    my ($self) = @_;
+
+    print { $self->_output_header_fh } $self->_header_top();
+    for my $group ( @{ $self->annotate_groups_obj->_groups } ) {
+        print { $self->_output_fh } $self->_block($group);
+        print { $self->_output_header_fh } $self->_header_block($group);
+    }
+    $self->_fragment_blocks( $self->_output_header_fh );
+    print { $self->_output_header_fh } $self->_header_bottom();
+    close( $self->_output_header_fh );
+    close( $self->_output_fh );
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/Output/GroupMultifasta.pm b/lib/Bio/Roary/Output/GroupMultifasta.pm
new file mode 100644
index 0000000..97204fe
--- /dev/null
+++ b/lib/Bio/Roary/Output/GroupMultifasta.pm
@@ -0,0 +1,69 @@
+package Bio::Roary::Output::GroupMultifasta;
+
+# ABSTRACT:  Take in a group and create a multifasta file
+
+=head1 SYNOPSIS
+
+Take in a group and create a multifasta file
+   use Bio::Roary::Output::GroupMultifasta;
+   
+   my $obj = Bio::Roary::Output::GroupMultifasta->new(
+       group_name      => 'aaa',
+       analyse_groups  => $analyse_groups,
+       output_filename_base => 'abc'
+     );
+   $obj->create_file();
+
+=cut
+
+use Moose;
+use Bio::SeqIO;
+use Bio::Roary::Exceptions;
+use Bio::Roary::AnalyseGroups;
+
+has 'group_name'           => ( is => 'ro', isa => 'Str',                           required => 1 );
+has 'analyse_groups'       => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 );
+has 'output_filename_base' => ( is => 'ro', isa => 'Str',                           default  => 'output_groups' );
+has '_genes'         => ( is => 'ro', isa  => 'ArrayRef', lazy    => 1, builder => '_build__genes' );
+has '_output_seq_io' => ( is => 'ro', lazy => 1,          builder => '_build__output_seq_io' );
+
+sub _build__output_seq_io {
+    my ($self) = @_;
+    my $output_name = $self->output_filename_base . '_' . $self->group_name;
+    $output_name =~ s!\W!_!g;
+    $output_name .= '.fa';
+    return Bio::SeqIO->new( -file => ">" . $output_name, -format => 'Fasta' );
+}
+
+sub _build__genes {
+    my ($self) = @_;
+    return $self->analyse_groups->_groups_to_genes->{ $self->group_name };
+}
+
+sub _lookup_sequence {
+    my ( $self, $gene, $filename ) = @_;
+    return undef if(! defined($filename));
+    my $fasta_obj = Bio::SeqIO->new( -file => $filename, -format => 'Fasta' );
+    while ( my $seq = $fasta_obj->next_seq() ) {
+        next unless ( $seq->display_id eq $gene );
+        return $seq;
+    }
+    return undef;
+}
+
+sub create_file {
+    my ($self) = @_;
+    for my $gene ( @{ $self->_genes } ) {
+        my $seq = $self->_lookup_sequence( $gene, $self->analyse_groups->_genes_to_file->{$gene} );
+        next unless ( defined($seq) );
+        $self->_output_seq_io->write_seq($seq);
+    }
+
+    1;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
+
diff --git a/lib/Bio/Roary/Output/GroupsMultifastaNucleotide.pm b/lib/Bio/Roary/Output/GroupsMultifastaNucleotide.pm
new file mode 100644
index 0000000..0433c58
--- /dev/null
+++ b/lib/Bio/Roary/Output/GroupsMultifastaNucleotide.pm
@@ -0,0 +1,166 @@
+package Bio::Roary::Output::GroupsMultifastaNucleotide;
+
+# ABSTRACT:  Take in a GFF files and a groups file and output one multifasta file per group with nucleotide sequences.
+
+=head1 SYNOPSIS
+
+Take in a GFF files and a groups file and output one multifasta file per group with nucleotide sequences.
+   use Bio::Roary::Output::GroupsMultifastas;
+   
+   my $obj = Bio::Roary::Output::GroupsMultifastasNucleotide->new(
+       group_names      => ['aaa','bbb'],
+     );
+   $obj->populate_files();
+
+=cut
+
+use Moose;
+use Bio::SeqIO;
+use File::Path qw(make_path);
+use File::Basename;
+use Bio::Roary::Exceptions;
+use Bio::Roary::AnalyseGroups;
+use Bio::Tools::GFF;
+with 'Bio::Roary::BedFromGFFRole';
+
+has 'gff_file'         => ( is => 'ro', isa => 'Str',                           required => 1 );
+has 'group_names'      => ( is => 'ro', isa => 'ArrayRef',                      required => 0 );
+has 'output_directory' => ( is => 'ro', isa => 'Str',                           required => 1 );
+has 'pan_reference_groups_seen' => ( is => 'rw', isa => 'HashRef',              required => 1 );
+has 'number_of_gff_files'    => ( is => 'ro', isa => 'Int', required => 1 );
+has 'pan_reference_filename' => ( is => 'ro', isa  => 'Str',default  => 'pan_genome_reference.fa' );
+has 'dont_delete_files'      => ( is => 'ro', isa => 'Bool',default  => 0 );
+has 'core_definition'        => ( is => 'ro', isa => 'Num', default  => 1.0 );
+
+has 'annotate_groups'  => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
+has 'output_multifasta_files'     => ( is => 'ro', isa => 'Bool',     default  => 0 );
+
+has 'fasta_file'   => ( is => 'ro', isa => 'Str',        lazy => 1, builder => '_build_fasta_file' );
+has '_input_seqio' => ( is => 'ro', isa => 'Bio::SeqIO', lazy => 1, builder => '_build__input_seqio' );
+
+has 'output_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build_output_filename' );
+
+sub _build_output_filename
+{
+  my ($self) = @_;
+  my ( $filename, $directories, $suffix ) = fileparse($self->gff_file);
+  return join('/',($self->output_directory, $filename.'.tmp_nuc_sequences.fa' ));
+}
+
+sub _build__input_seqio {
+    my ($self) = @_;
+    return Bio::SeqIO->new( -file => $self->fasta_file, -format => 'Fasta' );
+}
+
+sub _bed_output_filename {
+    my ($self) = @_;
+    return join( '.', ( $self->output_filename, 'intermediate.bed' ) );
+}
+
+sub populate_files {
+    my ($self) = @_;
+    while ( my $input_seq = $self->_input_seqio->next_seq() ) 
+    {
+        if ( $self->annotate_groups->_ids_to_groups->{$input_seq->display_id} ) 
+        {
+          my $current_group =  $self->annotate_groups->_ids_to_groups->{$input_seq->display_id};
+		  my $gene_name = $self->annotate_groups->_groups_to_consensus_gene_names->{$current_group};
+
+          if(! defined($self->pan_reference_groups_seen->{$current_group}))
+		  {
+		  	my $pan_output_seq = $self->_pan_genome_reference_io_obj($current_group);
+			$pan_output_seq->write_seq(Bio::Seq->new( -display_id => $input_seq->display_id, -desc => ($gene_name ? $gene_name : $current_group), -seq => $input_seq->seq ) );
+			$self->pan_reference_groups_seen->{$current_group} = 1;
+		  }
+
+          my $number_of_genes = @{$self->annotate_groups->_groups_to_id_names->{$current_group}};
+          # Theres no need to align noncore files
+          next if($self->dont_delete_files == 0 && $number_of_genes < ($self->core_definition * $self->number_of_gff_files ));
+          
+          my $output_seq = $self->_group_seq_io_obj($current_group,$number_of_genes);
+          $output_seq->write_seq($input_seq);
+        }
+    }
+
+    unlink($self->fasta_file);
+    1;
+}
+
+sub _group_file_name
+{ 
+  my ($self,$group_name,$num_group_genes) = @_;
+  my $annotated_group_name = $self->annotate_groups->_groups_to_consensus_gene_names->{$group_name};
+  $annotated_group_name =~ s!\W!_!gi;
+  my $filename = $annotated_group_name.'.fa';
+  my $group_file_name = join('/',($self->output_directory, $filename ));
+  return $group_file_name;
+}
+
+
+sub _pan_genome_reference_io_obj
+{
+  my ($self) = @_;
+  return Bio::SeqIO->new( -file => ">>".$self->pan_reference_filename, -format => 'Fasta' );
+}
+
+
+sub _group_seq_io_obj
+{
+  my ($self,$group_name,$num_group_genes) = @_;
+  my $filename = $self->_group_file_name($group_name,$num_group_genes);
+  return Bio::SeqIO->new( -file => ">>".$filename, -format => 'Fasta' );
+}
+
+
+sub _extracted_nucleotide_fasta_file_from_bed_filename {
+    my ($self) = @_;
+    return join( '.', ( $self->output_filename, 'intermediate.extracted.fa' ) );
+}
+
+
+
+sub _create_nucleotide_fasta_file_from_gff {
+    my ($self) = @_;
+    my $cmd =
+        'sed -n \'/##FASTA/,//p\' '
+      . $self->gff_file
+      . ' | grep -v \'##FASTA\' > '
+      . $self->_nucleotide_fasta_file_from_gff_filename;
+    system($cmd);
+}
+
+sub _nucleotide_fasta_file_from_gff_filename {
+    my ($self) = @_;
+    return join( '.', ( $self->output_filename, 'intermediate.fa' ) );
+}
+
+sub _extract_nucleotide_regions {
+    my ($self) = @_;
+
+    $self->_create_nucleotide_fasta_file_from_gff;
+    $self->_create_bed_file_from_gff;
+
+    my $cmd =
+        'bedtools getfasta -s -fi '
+      . $self->_nucleotide_fasta_file_from_gff_filename
+      . ' -bed '
+      . $self->_bed_output_filename . ' -fo '
+      . $self->_extracted_nucleotide_fasta_file_from_bed_filename
+      . ' -name > /dev/null 2>&1';
+    system($cmd);
+    unlink( $self->_nucleotide_fasta_file_from_gff_filename );
+    unlink( $self->_bed_output_filename );
+    unlink( $self->_nucleotide_fasta_file_from_gff_filename . '.fai' );
+    return $self->_extracted_nucleotide_fasta_file_from_bed_filename;
+}
+
+sub _build_fasta_file {
+    my ($self) = @_;
+    return $self->_extract_nucleotide_regions;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
+
diff --git a/lib/Bio/Roary/Output/GroupsMultifastaProtein.pm b/lib/Bio/Roary/Output/GroupsMultifastaProtein.pm
new file mode 100644
index 0000000..3b9df70
--- /dev/null
+++ b/lib/Bio/Roary/Output/GroupsMultifastaProtein.pm
@@ -0,0 +1,68 @@
+package Bio::Roary::Output::GroupsMultifastaProtein;
+
+# ABSTRACT:  Take a multifasta nucleotide file and output it as proteins.
+
+=head1 SYNOPSIS
+
+Take a multifasta nucleotide file and output it as proteins.
+   use Bio::Roary::Output::GroupsMultifastaProtein;
+
+   my $obj = Bio::Roary::Output::GroupsMultifastaProtein->new(
+       nucleotide_fasta_file => 'example.fa'
+     );
+   $obj->convert_nucleotide_to_protein();
+
+=cut
+
+use Moose;
+use Bio::SeqIO;
+use File::Path qw(make_path);
+use File::Basename;
+use Bio::Roary::Exceptions;
+use Bio::Roary::AnalyseGroups;
+
+has 'nucleotide_fasta_file' => ( is => 'ro', isa => 'Str',  required => 1 );
+has 'output_filename'       => ( is => 'ro', isa => 'Str',  lazy     => 1, builder => '_build_output_filename' );
+has '_suffix'               => ( is => 'ro', isa => 'Str',  default  => '.faa' );
+has 'translation_table'  => ( is => 'rw', isa => 'Int',      default => 11 );
+
+sub _build_output_filename
+{
+  my ($self) = @_;
+  my ( $filename, $directories, $suffix ) = fileparse($self->nucleotide_fasta_file, qr/\.[^.]*/);
+
+  return join('',($directories, $filename.$self->_suffix));
+}
+
+# Read all the sequences for a gene into memory to sort them - very small files so shouldnt be a problem
+sub _fastatranslate
+{
+  my ($self) = @_;
+  my $input_fasta_file_obj    = Bio::SeqIO->new(-file => $self->nucleotide_fasta_file, -format => 'Fasta' );
+  my $output_protein_file_obj = Bio::SeqIO->new(-file =>">".$self->output_filename,    -format => 'Fasta', -alphabet => 'protein' );
+
+  my %protein_sequence_objs;
+  while (my $seq = $input_fasta_file_obj->next_seq){
+    $protein_sequence_objs{$seq->display_id} = $seq->translate(-codontable_id => $self->translation_table );
+  }
+
+  for my $sequence_name ( sort keys %protein_sequence_objs)
+  {
+    $output_protein_file_obj->write_seq($protein_sequence_objs{$sequence_name});
+  }
+
+  return 1;
+}
+
+sub convert_nucleotide_to_protein
+{
+  my ($self) = @_;
+  $self->_fastatranslate();
+  1;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
+
diff --git a/lib/Bio/Roary/Output/GroupsMultifastas.pm b/lib/Bio/Roary/Output/GroupsMultifastas.pm
new file mode 100644
index 0000000..4881806
--- /dev/null
+++ b/lib/Bio/Roary/Output/GroupsMultifastas.pm
@@ -0,0 +1,46 @@
+package Bio::Roary::Output::GroupsMultifastas;
+
+# ABSTRACT:  Take in a list of groups and create multifastas files for each group
+
+=head1 SYNOPSIS
+
+Take in a list of groups and create multifastas files for each group
+   use Bio::Roary::Output::GroupsMultifastas;
+   
+   my $obj = Bio::Roary::Output::GroupsMultifastas->new(
+       group_names      => ['aaa','bbb'],
+       analyse_groups  => $analyse_groups
+     );
+   $obj->create_files();
+
+=cut
+
+use Moose;
+use Bio::Roary::Exceptions;
+use Bio::Roary::AnalyseGroups;
+use Bio::Roary::Output::GroupMultifasta;
+
+has 'group_names'          => ( is => 'ro', isa => 'ArrayRef',                      required => 1 );
+has 'analyse_groups'       => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 );
+has 'output_filename_base' => ( is => 'ro', isa => 'Str',                           default  => 'output_groups' );
+
+sub create_files {
+    my ($self) = @_;
+    for my $group_name ( @{ $self->group_names } ) {
+      # Check the group name exists
+      next unless($self->analyse_groups->_groups_to_genes->{$group_name});    
+        my $group_multifasta = Bio::Roary::Output::GroupMultifasta->new(
+            group_name           => $group_name,
+            analyse_groups       => $self->analyse_groups,
+            output_filename_base => $self->output_filename_base
+        );
+        $group_multifasta->create_file;
+    }
+    1;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
+
diff --git a/lib/Bio/Roary/Output/GroupsMultifastasNucleotide.pm b/lib/Bio/Roary/Output/GroupsMultifastasNucleotide.pm
new file mode 100644
index 0000000..b466905
--- /dev/null
+++ b/lib/Bio/Roary/Output/GroupsMultifastasNucleotide.pm
@@ -0,0 +1,86 @@
+
+package Bio::Roary::Output::GroupsMultifastasNucleotide;
+
+# ABSTRACT:  Take in a set of GFF files and a groups file and output one multifasta file per group with nucleotide sequences.
+
+=head1 SYNOPSIS
+
+Take in a set of GFF files and a groups file and output one multifasta file per group with nucleotide sequences.
+   use Bio::Roary::Output::GroupsMultifastasNucleotide;
+   
+   my $obj = Bio::Roary::Output::GroupsMultifastasNucleotide->new(
+       group_names      => ['aaa','bbb'],
+       analyse_groups  => $analyse_groups
+     );
+   $obj->create_files();
+
+=cut
+
+use Moose;
+use File::Path qw(make_path);
+use Bio::Roary::Exceptions;
+use Bio::Roary::AnalyseGroups;
+use Bio::Roary::Output::GroupsMultifastaNucleotide;
+
+has 'gff_files'               => ( is => 'ro', isa => 'ArrayRef',                   required => 1 );
+has 'group_names'             => ( is => 'ro', isa => 'ArrayRef',                   required => 0 );
+has 'annotate_groups'         => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
+has 'output_multifasta_files' => ( is => 'ro', isa => 'Bool', default  => 0 );
+has 'core_definition'         => ( is => 'ro', isa => 'Num',  default  => 1.0 );
+has 'dont_delete_files'       => ( is => 'ro', isa => 'Bool', default  => 0 );
+has 'output_directory'        => ( is => 'ro', isa => 'Str',  lazy => 1, builder => '_build_output_directory');
+has '_number_of_groups'       => ( is => 'rw', isa => 'Num',  lazy => 1, builder => '_build__number_of_groups' );
+has 'group_limit'             => ( is => 'rw', isa => 'Num',  default => 50000 );
+
+sub _build_output_directory
+{
+  my ($self) = @_;
+  my $output_directory = 'pan_genome_sequences';
+  return $output_directory;
+}
+
+sub _build__number_of_groups {
+  my $self = shift;
+
+  return $self->annotate_groups->_group_counter;
+}
+
+sub create_files {
+    my ($self) = @_;
+
+    my $num_groups = $self->_number_of_groups;
+    my $limit      = $self->group_limit;
+    if ( $num_groups > $limit ){
+      print STDERR "Number of clusters ($num_groups) exceeds limit ($limit). Multifastas not created. Please check the spreadsheet for contamination from different species or increase the --group_limit parameter.\n";
+      return 0;
+    }
+
+    make_path($self->output_directory);
+    unlink('pan_genome_reference.fa');
+    
+	my $number_of_gff_files = @{$self->gff_files};
+	my %pan_reference_groups_seen;
+    # if its output_multifasta_files == false then you want to create the core genome and delete all intermediate multifasta files
+    for my $gff_file ( @{ $self->gff_files } ) 
+    {
+      my $gff_multifasta = Bio::Roary::Output::GroupsMultifastaNucleotide->new(
+          gff_file             => $gff_file,
+          group_names          => $self->group_names,
+          output_directory     => $self->output_directory,
+          annotate_groups      => $self->annotate_groups,
+          output_multifasta_files => $self->output_multifasta_files,
+		  pan_reference_groups_seen => \%pan_reference_groups_seen,
+		  core_definition      => $self->core_definition,
+		  dont_delete_files    => $self->dont_delete_files,
+		  number_of_gff_files  => $number_of_gff_files
+      );
+      $gff_multifasta->populate_files;
+    }
+    1;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
+
diff --git a/lib/Bio/Roary/Output/NumberOfGroups.pm b/lib/Bio/Roary/Output/NumberOfGroups.pm
new file mode 100644
index 0000000..0dd9370
--- /dev/null
+++ b/lib/Bio/Roary/Output/NumberOfGroups.pm
@@ -0,0 +1,121 @@
+package Bio::Roary::Output::NumberOfGroups;
+
+# ABSTRACT: Create raw output files of group counts for turning into plots
+
+=head1 SYNOPSIS
+
+# ABSTRACT: Create raw output files of group counts for turning into plots
+use Bio::Roary::Output::NumberOfGroups;
+
+my $obj = Bio::Roary::Output::NumberOfGroups->new(
+    group_statistics_obj => $group_stats
+  );
+$obj->create_files();
+
+=cut
+
+use Moose;
+use List::Util qw(shuffle);
+use Bio::Roary::AnnotateGroups;
+use Bio::Roary::GroupStatistics;
+
+has 'group_statistics_obj' => ( is => 'ro', isa => 'Bio::Roary::GroupStatistics', required => 1 );
+has 'number_of_iterations' => ( is => 'ro', isa => 'Int', default => 10);
+has 'groups_to_contigs'    => ( is => 'ro', isa => 'Maybe[HashRef]' );
+has 'annotate_groups_obj'  => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
+has 'core_definition'      => ( is => 'ro', isa => 'Num', default  => 1.0 );
+
+has 'output_raw_filename_conserved_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_conserved_genes.Rtab' );
+has 'output_raw_filename_unique_genes'    => ( is => 'ro', isa => 'Str', default => 'number_of_unique_genes.Rtab' );
+has 'output_raw_filename_total_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_genes_in_pan_genome.Rtab' );
+has 'output_raw_filename_new_genes'   => ( is => 'ro', isa => 'Str', default => 'number_of_new_genes.Rtab' );
+has '_conserved_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
+has '_unique_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
+has '_total_genes'  => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
+has '_new_genes'    => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
+
+sub create_output_files {
+    my ($self) = @_;
+
+    for ( my $i = 0 ; $i < $self->number_of_iterations ; $i++ ) {
+        $self->_single_iteration_gene_expansion;
+    }
+
+    $self->_create_raw_output_file( $self->output_raw_filename_conserved_genes, $self->_conserved_genes );
+    $self->_create_raw_output_file( $self->output_raw_filename_unique_genes,    $self->_unique_genes );
+    $self->_create_raw_output_file( $self->output_raw_filename_total_genes,     $self->_total_genes );
+    $self->_create_raw_output_file( $self->output_raw_filename_new_genes,       $self->_new_genes );
+    return 1;
+}
+
+sub _create_raw_output_file {
+    my ( $self, $filename, $output_data ) = @_;
+    open( my $fh, '>', $filename );
+    for my $iterations ( @{$output_data} ) {
+        print {$fh} join( "\t", @{$iterations} );
+        print {$fh} "\n";
+    }
+    close($fh);
+}
+
+sub _shuffle_input_files {
+    my ($self) = @_;
+    my @shuffled_input_files = shuffle( @{ $self->group_statistics_obj->_sorted_file_names } );
+    return \@shuffled_input_files;
+}
+
+sub _single_iteration_gene_expansion {
+    my ($self) = @_;
+    my %existing_groups;
+    my @conserved_genes_added_per_file;
+    my @unique_genes_added_per_file;
+    my @total_genes_added_per_file;
+    my @new_genes_added_per_file;
+
+    my $shuffled_input_files = $self->_shuffle_input_files();
+
+    my $files_counter = 1;
+    for my $input_file ( @{$shuffled_input_files} ) {
+        my $unique_groups_counter    = 0;
+        my $total_groups_counter     = 0;
+        my $new_group_counter        = 0;
+        my $conserved_groups_counter = 0;
+        my $new_groups               = $self->group_statistics_obj->_files_to_groups->{$input_file};
+
+        for my $group ( @{$new_groups} ) {          
+            if ( !defined( $existing_groups{$group} ) ) {
+                $new_group_counter++;
+            }
+            $existing_groups{$group}++;
+        }
+
+        for my $group ( keys %existing_groups ) {
+            if ( $existing_groups{$group} >= ($files_counter*$self->core_definition) ) {
+                $conserved_groups_counter++;
+            }
+
+            if ( $existing_groups{$group} == 1 ) {
+                $unique_groups_counter++;
+            }
+            $total_groups_counter++;
+        }
+
+        push( @conserved_genes_added_per_file, $conserved_groups_counter );
+        push( @unique_genes_added_per_file,    $unique_groups_counter );
+        push( @total_genes_added_per_file,     $total_groups_counter );
+        push( @new_genes_added_per_file,       $new_group_counter );
+        $files_counter++;
+    }
+    push( @{ $self->_conserved_genes }, \@conserved_genes_added_per_file );
+    push( @{ $self->_unique_genes },    \@unique_genes_added_per_file );
+    push( @{ $self->_total_genes },     \@total_genes_added_per_file );
+    push( @{ $self->_new_genes },       \@new_genes_added_per_file );
+
+    return;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
+
diff --git a/lib/Bio/Roary/Output/QueryGroups.pm b/lib/Bio/Roary/Output/QueryGroups.pm
new file mode 100644
index 0000000..312973d
--- /dev/null
+++ b/lib/Bio/Roary/Output/QueryGroups.pm
@@ -0,0 +1,139 @@
+package Bio::Roary::Output::QueryGroups;
+
+# ABSTRACT:  Output the groups of the union of a set of input isolates
+
+=head1 SYNOPSIS
+
+Output the groups of the union of a set of input isolates
+   use Bio::Roary::Output::QueryGroups;
+   
+   my $obj = Bio::Roary::Output::QueryGroups->new(
+       analyse_groups  => $analyse_groups
+     );
+   $obj->groups_union();
+   $obj->groups_intersection();
+   $obj->groups_complement();
+
+=cut
+
+use Moose;
+use Bio::SeqIO;
+use Bio::Roary::Exceptions;
+use Bio::Roary::AnalyseGroups;
+use POSIX;
+
+has 'analyse_groups'        => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 );
+has 'input_filenames'       => ( is => 'ro', isa => 'ArrayRef',                      required => 1 );
+has 'output_union_filename' => ( is => 'ro', isa => 'Str',                           default  => 'union_of_groups.gg' );
+has 'output_intersection_filename' => ( is => 'ro', isa => 'Str',      default => 'intersection_of_groups.gg' );
+has 'output_complement_filename'   => ( is => 'ro', isa => 'Str',      default => 'complement_of_groups.gg' );
+has 'core_definition'       => ( is => 'ro', isa => 'Num', default => 1.0 );
+
+has '_groups_freq'                 => ( is => 'ro', isa => 'HashRef', lazy    => 1, builder => '_build__groups_freq' );
+has '_groups_intersection' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__groups_intersection' );
+has '_groups_complement'  => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__groups_complement' );
+has '_groups'             => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__groups' );
+has '_number_of_isolates' => ( is => 'ro', isa => 'Int',      lazy => 1, builder => '_builder__number_of_isolates' );
+has '_min_no_isolates_for_core' => ( is => 'rw', isa => 'Int',      lazy_build => 1 );
+
+sub _build__min_no_isolates_for_core {
+    my ( $self ) = @_;
+    my $threshold = ceil( $self->_number_of_isolates * $self->core_definition );
+
+    return $threshold;
+}
+
+sub _builder__number_of_isolates {
+    my ($self) = @_;
+    return @{ $self->input_filenames };
+}
+
+sub _build__groups_freq {
+    my ($self) = @_;
+    my %groups_freq;
+
+    for my $filename ( @{ $self->input_filenames } ) {
+        my $genes = $self->analyse_groups->_files_to_genes->{$filename};
+        
+		my %file_groups_seen;
+        for my $gene ( @{$genes} ) {
+          next if(!defined($gene));
+          next if(!defined($self->analyse_groups->_genes_to_groups->{$gene}));
+		  next if(defined($file_groups_seen{$self->analyse_groups->_genes_to_groups->{$gene}}));
+		  
+          push(@{$groups_freq{ $self->analyse_groups->_genes_to_groups->{$gene} }}, $gene);
+          $file_groups_seen{$self->analyse_groups->_genes_to_groups->{$gene}} = 1;
+        }
+    }
+
+    return \%groups_freq;
+}
+
+sub _build__groups {
+    my ($self) = @_;
+    my %groups_freq = %{ $self->_groups_freq };
+    my @groups = sort { @{$groups_freq{$b}} <=> @{$groups_freq{$a}} } keys %groups_freq;
+    return \@groups;
+}
+
+sub _build__groups_intersection {
+    my ($self) = @_;
+    my @groups_intersection;
+
+    for my $group ( @{$self->_groups} ) {
+        if ( scalar @{$self->_groups_freq->{$group}} >= $self->_min_no_isolates_for_core ) {
+            push( @groups_intersection, $group );
+        }
+    }
+    return \@groups_intersection;
+}
+
+sub _build__groups_complement {
+    my ($self) = @_;
+    my %groups_intersection = map { $_ => 1 } @{ $self->_groups_intersection };
+    my @complement = grep { not $groups_intersection{$_} } @{ $self->_groups };
+    return \@complement;
+}
+
+sub _print_out_groups {
+    my ( $self, $filename, $groups ) = @_;
+    open( my $fh, '>', $filename )
+      or Bio::Roary::Exceptions::CouldntWriteToFile->throw( error => 'Couldnt write to file: ' . $filename );
+
+   my %groups_freq = %{ $self->_groups_freq };
+   my @sorted_groups = sort { @{$groups_freq{$b}} <=> @{$groups_freq{$a}} } @{$groups};
+
+    for my $group ( @sorted_groups ) {
+        print {$fh} $group.': '.join("\t",@{$self->_groups_freq->{$group}}) . "\n";
+    }
+    close($fh);
+    return $self;
+}
+
+sub groups_complement {
+    my ($self) = @_;
+    $self->_print_out_groups( $self->output_complement_filename, $self->_groups_complement );
+}
+
+sub groups_intersection {
+    my ($self) = @_;
+    $self->_print_out_groups( $self->output_intersection_filename, $self->_groups_intersection );
+}
+
+sub groups_union {
+    my ($self) = @_;
+    $self->_print_out_groups( $self->output_union_filename, $self->_groups );
+}
+
+sub groups_with_external_inputs
+{
+  my ($self, $output_filename,$groups) = @_;
+  $self->_print_out_groups( $output_filename, $groups );
+  
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
+
diff --git a/lib/Bio/Roary/ParallelAllAgainstAllBlast.pm b/lib/Bio/Roary/ParallelAllAgainstAllBlast.pm
new file mode 100644
index 0000000..17474b0
--- /dev/null
+++ b/lib/Bio/Roary/ParallelAllAgainstAllBlast.pm
@@ -0,0 +1,143 @@
+package Bio::Roary::ParallelAllAgainstAllBlast;
+
+# ABSTRACT: Run all against all blast in parallel
+
+=head1 SYNOPSIS
+
+Run blastp in parallel over a FASTA file of proteins
+   use Bio::Roary::ParallelAllAgainstAllBlast;
+   
+   my $obj = Bio::Roary::ParallelAllAgainstAllBlast->new(
+     fasta_file   => 'abc.fa',
+   );
+   $obj->run();
+
+=cut
+
+use Moose;
+use Bio::Roary::Exceptions;
+use Bio::Roary::ChunkFastaFile;
+use Bio::Roary::External::Makeblastdb;
+use Bio::Roary::External::Blastp;
+use Cwd;
+use File::Temp;
+use File::Basename;
+with 'Bio::Roary::JobRunner::Role';
+
+has 'fasta_file'              => ( is => 'ro', isa => 'Str',      required => 1 );
+has 'blast_results_file_name' => ( is => 'ro', isa => 'Str',      lazy => 1, builder => '_build_blast_results_file_name' );
+has 'makeblastdb_exec'        => ( is => 'ro', isa => 'Str',      default => 'makeblastdb' );
+has 'blastp_exec'             => ( is => 'ro', isa => 'Str',      default => 'blastp' );
+has 'segmasker_exec'          => ( is => 'ro', isa => 'Str',      default => 'segmasker' );
+has 'perc_identity'           => ( is => 'ro', isa => 'Num',      default => 98 );
+has '_chunk_fasta_file_obj'   => ( is => 'ro', isa => 'Bio::Roary::ChunkFastaFile', lazy => 1, builder => '_build__chunk_fasta_file_obj' );
+has '_sequence_file_names'    => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__sequence_file_names' );
+has '_makeblastdb_obj'        => ( is => 'ro', isa => 'Bio::Roary::External::Makeblastdb', lazy => 1, builder => '_build__makeblastdb_obj' );
+has '_blast_database'         => ( is => 'ro', isa => 'Str',      lazy => 1, builder => '_build__blast_database' );
+has 'cpus'                    => ( is => 'ro', isa => 'Int',  default => 1 );
+
+has '_working_directory' =>
+  ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
+has '_working_directory_name' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__working_directory_name' );
+
+has 'memory_in_mb'  => ( is => 'ro', isa => 'Int',  lazy => 1, builder => '_build_memory_in_mb' );
+
+
+sub BUILD {
+    my ($self) = @_;
+	$self->_makeblastdb_obj();
+}
+
+
+sub _build__blast_database {
+    my ($self) = @_;
+    return $self->_makeblastdb_obj->output_database;
+}
+
+sub _build__makeblastdb_obj {
+    my ($self) = @_;
+    my $blast_database =
+      Bio::Roary::External::Makeblastdb->new( fasta_file => $self->fasta_file, exec => $self->makeblastdb_exec, job_runner => $self->job_runner, cpus  => $self->cpus  );
+    $blast_database->run();
+    return $blast_database;
+}
+
+sub _build__chunk_fasta_file_obj {
+    my ($self) = @_;
+    return Bio::Roary::ChunkFastaFile->new( fasta_file => $self->fasta_file, );
+}
+
+sub _build__sequence_file_names {
+    my ($self) = @_;
+    return $self->_chunk_fasta_file_obj->sequence_file_names;
+}
+
+sub _build__working_directory_name {
+    my ($self) = @_;
+    return $self->_working_directory->dirname();
+}
+
+sub _build_blast_results_file_name {
+    my ($self) = @_;
+    return join( '/', ( $self->_working_directory_name, 'blast_results' ) );
+}
+
+sub _combine_blast_results {
+    my ( $self, $output_files ) = @_;
+    for my $output_file ( @{$output_files} ) {
+        Bio::Roary::Exceptions::FileNotFound->throw( error => "Cant find blast results: " . $output_file )
+          unless ( -e $output_file );
+    }
+    my $output_files_param = join( ' ', @{$output_files} );
+    system( "cat $output_files_param > " . $self->blast_results_file_name );
+    return 1;
+}
+
+sub _build_memory_in_mb
+{
+  my ($self) = @_;
+  my $filename = $self->fasta_file;
+  my $file_size = 3000;
+  if(-e $filename)
+  {
+    $file_size = -s $filename;
+    $file_size *=12;
+    $file_size = int($file_size/1000000);
+    $file_size = 3000 if($file_size < 3000);
+  }
+
+  return $file_size;
+}
+
+sub run {
+    my ($self) = @_;
+    my @expected_output_files;
+    my @commands_to_run;
+	
+    for my $filename ( @{ $self->_sequence_file_names } ) {
+        my ( $filename_without_directory, $directories, $suffix ) = fileparse($filename);
+        my $output_seq_results_file =
+          join( '/', ( $self->_working_directory_name, $filename_without_directory . '.out' ) );
+
+        my $blast_database = Bio::Roary::External::Blastp->new(
+            fasta_file     => $filename,
+            blast_database => $self->_blast_database,
+            exec           => $self->blastp_exec,
+            output_file    => $output_seq_results_file,
+            perc_identity  => $self->perc_identity
+        );
+        push( @expected_output_files, $output_seq_results_file );
+        push( @commands_to_run,       $blast_database->_command_to_run() );
+		$self->logger->info( "Running command: " . $blast_database->_command_to_run() );
+    }
+    my $job_runner_obj = $self->_job_runner_class->new( commands_to_run => \@commands_to_run, memory_in_mb => $self->memory_in_mb, queue => $self->_queue, cpus  => $self->cpus );
+    $job_runner_obj->run();
+	$self->logger->info( "Combining blast results" );
+    $self->_combine_blast_results(\@expected_output_files);
+    return 1;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/ParseGFFAnnotationRole.pm b/lib/Bio/Roary/ParseGFFAnnotationRole.pm
new file mode 100644
index 0000000..60cb4c1
--- /dev/null
+++ b/lib/Bio/Roary/ParseGFFAnnotationRole.pm
@@ -0,0 +1,32 @@
+package Bio::Roary::ParseGFFAnnotationRole;
+
+# ABSTRACT: A role for parsing a gff file efficiently
+
+=head1 SYNOPSIS
+
+with 'Bio::Roary::ParseGFFAnnotationRole';
+
+=cut
+use Moose::Role;
+use Bio::Tools::GFF;
+
+has 'gff_file' => ( is => 'ro', isa => 'Str', required => 1 );
+
+has '_tags_to_filter' => ( is => 'ro', isa => 'Str',             default => '(CDS|ncRNA|tRNA|tmRNA|rRNA)' );
+has '_gff_parser'     => ( is => 'ro', isa => 'Bio::Tools::GFF', lazy    => 1, builder => '_build__gff_parser' );
+has '_awk_filter'     => ( is => 'ro', isa => 'Str',             lazy    => 1, builder => '_build__awk_filter' );
+
+sub _gff_fh_input_string {
+    my ($self) = @_;
+    return 'sed -n \'/##gff-version 3/,/##FASTA/p\' '.$self->gff_file.'| grep -v \'##FASTA\''." | " .  $self->_awk_filter;
+}
+
+sub _build__awk_filter {
+    my ($self) = @_;
+    return
+        'awk \'BEGIN {FS="\t"};{ if ($3 ~/'
+      . $self->_tags_to_filter
+      . '/) print $9;}\' ';
+}
+
+1;
diff --git a/lib/Bio/Roary/PostAnalysis.pm b/lib/Bio/Roary/PostAnalysis.pm
new file mode 100644
index 0000000..0997684
--- /dev/null
+++ b/lib/Bio/Roary/PostAnalysis.pm
@@ -0,0 +1,353 @@
+package Bio::Roary::PostAnalysis;
+
+# ABSTRACT: Post analysis of pan genomes
+
+=head1 SYNOPSIS
+
+Create a pan genome
+
+=cut
+
+use Moose;
+use File::Copy;
+use Bio::Roary::InflateClusters;
+use Bio::Roary::AnalyseGroups;
+use Bio::Roary::GroupLabels;
+use Bio::Roary::AnnotateGroups;
+use Bio::Roary::GroupStatistics;
+use Bio::Roary::Output::GroupsMultifastasNucleotide;
+use Bio::Roary::Output::NumberOfGroups;
+use Bio::Roary::OrderGenes;
+use Bio::Roary::Output::EmblGroups;
+use Bio::Roary::SplitGroups;
+use Bio::Roary::AccessoryBinaryFasta;
+use Bio::Roary::External::Fasttree;
+use Bio::Roary::AccessoryClustering;
+use Bio::Roary::AssemblyStatistics;
+use Log::Log4perl qw(:easy);
+
+has 'fasta_files'                 => ( is => 'rw', isa => 'ArrayRef', required => 1 );
+has 'input_files'                 => ( is => 'rw', isa => 'ArrayRef', required => 1 );
+has 'output_filename'             => ( is => 'rw', isa => 'Str',      default  => 'clustered_proteins' );
+has 'output_pan_geneome_filename' => ( is => 'rw', isa => 'Str',      default  => 'pan_genome.fa' );
+has 'output_statistics_filename'  => ( is => 'rw', isa => 'Str',      default  => 'gene_presence_absence.csv' );
+has 'output_multifasta_files'     => ( is => 'ro', isa => 'Bool',     default  => 0 );
+has 'verbose_stats'               => ( is => 'rw', isa => 'Bool',     default  => 0 );
+has 'verbose'                     => ( is => 'rw', isa => 'Bool',     default  => 0 );
+has 'cpus'                        => ( is => 'ro', isa => 'Int',      default  => 1 );
+
+has 'clusters_filename'  => ( is => 'rw', isa => 'Str',  required => 1 );
+has 'dont_delete_files'  => ( is => 'ro', isa => 'Bool', default  => 0 );
+has 'dont_split_groups'  => ( is => 'ro', isa => 'Bool', default  => 0 );
+has 'dont_create_rplots' => ( is => 'rw', isa => 'Bool', default  => 1 );
+has 'group_limit'        => ( is => 'rw', isa => 'Num',  default  => 50000 );
+
+has '_output_mcl_filename'                      => ( is => 'ro', isa => 'Str', default => '_uninflated_mcl_groups' );
+has '_output_inflate_unsplit_clusters_filename' => ( is => 'ro', isa => 'Str', default => '_inflated_unsplit_mcl_groups' );
+has '_output_inflate_clusters_filename'         => ( is => 'ro', isa => 'Str', default => '_inflated_mcl_groups' );
+has '_output_group_labels_filename'             => ( is => 'ro', isa => 'Str', default => '_labeled_mcl_groups' );
+has '_output_combined_filename'                 => ( is => 'ro', isa => 'Str', default => '_combined_files' );
+has '_input_cd_hit_groups_file'                 => ( is => 'ro', isa => 'Str', default => '_combined_files.groups' );
+has 'core_accessory_tab_output_filename'        => ( is => 'ro', isa => 'Str', default => 'core_accessory.tab' );
+has 'accessory_tab_output_filename'             => ( is => 'ro', isa => 'Str', default => 'accessory.tab' );
+has 'core_accessory_ordering_key'               => ( is => 'ro', isa => 'Str', default => 'core_accessory_overall_order_filtered' );
+has 'accessory_ordering_key'                    => ( is => 'ro', isa => 'Str', default => 'accessory_overall_order_filtered' );
+has 'core_definition'                           => ( is => 'ro', isa => 'Num', default => 1.0 );
+has 'pan_genome_reference_filename'             => ( is => 'ro', isa => 'Str', default => 'pan_genome_reference.fa' );
+
+has '_inflate_clusters_obj' => ( is => 'ro', isa => 'Bio::Roary::InflateClusters', lazy => 1, builder => '_build__inflate_clusters_obj' );
+has '_group_labels_obj'     => ( is => 'ro', isa => 'Bio::Roary::GroupLabels',     lazy => 1, builder => '_build__group_labels_obj' );
+has '_annotate_groups_obj'  => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups',  lazy => 1, builder => '_build__annotate_groups_obj' );
+has '_analyse_groups_obj'   => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups',   lazy => 1, builder => '_build__analyse_groups_obj' );
+has '_order_genes_obj'      => ( is => 'ro', isa => 'Bio::Roary::OrderGenes',      lazy => 1, builder => '_build__order_genes_obj' );
+has '_group_statistics_obj' => ( is => 'ro', isa => 'Bio::Roary::GroupStatistics', lazy => 1, builder => '_build__group_statistics_obj' );
+has '_number_of_groups_obj' =>
+  ( is => 'ro', isa => 'Bio::Roary::Output::NumberOfGroups', lazy => 1, builder => '_build__number_of_groups_obj' );
+has '_accessory_binary_fasta' =>
+  ( is => 'ro', isa => 'Bio::Roary::AccessoryBinaryFasta', lazy => 1, builder => '_build__accessory_binary_fasta' );
+has '_groups_multifastas_nuc_obj' =>
+  ( is => 'ro', isa => 'Bio::Roary::Output::GroupsMultifastasNucleotide', lazy => 1, builder => '_build__groups_multifastas_nuc_obj' );
+has '_split_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::SplitGroups', lazy => 1, builder => '_build__split_groups_obj' );
+has '_accessory_binary_tree' =>
+  ( is => 'ro', isa => 'Bio::Roary::External::Fasttree', lazy => 1, builder => '_build__accessory_binary_tree' );
+has '_accessory_clustering' =>
+  ( is => 'ro', isa => 'Maybe[Bio::Roary::AccessoryClustering]', lazy => 1, builder => '_build__accessory_clustering' );
+has '_assembly_statistics' => ( is => 'ro', isa => 'Bio::Roary::AssemblyStatistics', lazy => 1, builder => '_build__assembly_statistics' );
+
+has 'logger' => ( is => 'ro', lazy => 1, builder => '_build_logger' );
+
+sub _build_logger {
+    my ($self) = @_;
+    Log::Log4perl->easy_init( level => $ERROR );
+    my $logger = get_logger();
+    return $logger;
+}
+
+sub run {
+    my ($self) = @_;
+
+    $self->logger->info("Reinflate clusters");
+    $self->_inflate_clusters_obj->inflate();
+
+    $self->logger->info("Split groups with paralogs");
+    ## SPLIT GROUPS WITH PARALOGS ##
+    if ( $self->dont_split_groups ) {
+        move( $self->_output_inflate_unsplit_clusters_filename, $self->_output_inflate_clusters_filename );
+    }
+    else {
+        $self->_split_groups_obj->split_groups;
+    }
+
+    $self->logger->info("Labelling the groups");
+    $self->_group_labels_obj->add_labels();
+
+    $self->logger->info("Transfering the annotation to the groups");
+    $self->_annotate_groups_obj->reannotate;
+
+    $self->logger->info("Creating accessory binary gene presence and absence fasta");
+    $self->_accessory_binary_fasta->create_accessory_binary_fasta;
+
+    $self->logger->info("Creating accessory binary gene presence and absence tree");
+    $self->_accessory_binary_tree->run;
+
+    $self->logger->info("Creating accessory gene presence and absence clusters");
+    if ( $self->_accessory_clustering ) {
+        $self->_accessory_clustering->sample_weights;
+    }
+
+    $self->logger->info("Creating the spreadsheet with gene presence and absence");
+    $self->_group_statistics_obj->create_spreadsheet;
+	$self->_group_statistics_obj->create_rtab;
+
+    $self->logger->info("Creating summary statistics of the spreadsheet");
+    $self->_assembly_statistics->create_summary_output;
+
+    $self->logger->info("Creating tab files for R");
+    $self->_number_of_groups_obj->create_output_files;
+
+    system("create_pan_genome_plots.R") unless ( $self->dont_create_rplots == 1 );
+
+    $self->logger->info("Create EMBL files");
+    $self->_create_embl_files;
+
+    my $clusters_not_exceeded = 1;
+    if ( $self->output_multifasta_files ) {
+        $self->logger->info("Creating files with the nucleotide sequences for every cluster");
+        $clusters_not_exceeded = $self->_groups_multifastas_nuc_obj->create_files();
+    }
+
+    $self->_delete_intermediate_files;
+    if ( $clusters_not_exceeded == 0 && $self->output_multifasta_files ) {
+        $self->logger->error("Exiting early because number of clusters is too high");
+        exit();
+    }
+}
+
+sub _build__assembly_statistics {
+    my ($self) = @_;
+    return Bio::Roary::AssemblyStatistics->new(
+        spreadsheet     => $self->_group_statistics_obj->output_filename,
+        core_definition => $self->core_definition,
+        logger          => $self->logger
+    );
+}
+
+sub _build__accessory_clustering {
+    my ($self) = @_;
+    if ( ( -e $self->_accessory_binary_fasta->output_filename ) && ( -s $self->_accessory_binary_fasta->output_filename > 5 ) ) {
+        $self->logger->info( $self->_accessory_binary_fasta->output_filename );
+        return Bio::Roary::AccessoryClustering->new(
+            input_file => $self->_accessory_binary_fasta->output_filename,
+            cpus       => $self->cpus,
+            logger     => $self->logger
+        );
+    }
+    else {
+        $self->logger->info("Theres no accessory binary file so skipping accessory binary clustering");
+        return undef;
+    }
+
+}
+
+sub _build__accessory_binary_tree {
+    my ($self) = @_;
+    return Bio::Roary::External::Fasttree->new(
+        input_file => $self->_accessory_binary_fasta->output_filename,
+        verbose    => $self->verbose,
+        logger     => $self->logger
+    );
+}
+
+sub _build__accessory_binary_fasta {
+    my ($self) = @_;
+    return Bio::Roary::AccessoryBinaryFasta->new(
+        input_files         => $self->fasta_files,
+        annotate_groups_obj => $self->_annotate_groups_obj,
+        analyse_groups_obj  => $self->_analyse_groups_obj,
+        logger              => $self->logger
+    );
+}
+
+sub _build__split_groups_obj {
+    my ($self) = @_;
+    return Bio::Roary::SplitGroups->new(
+        groupfile   => $self->_output_inflate_unsplit_clusters_filename,
+        gff_files   => $self->input_files,
+        fasta_files => $self->fasta_files,
+        outfile     => $self->_output_inflate_clusters_filename,
+        dont_delete => $self->dont_delete_files,
+        logger      => $self->logger
+    );
+}
+
+sub _build__number_of_groups_obj {
+    my ($self) = @_;
+    return Bio::Roary::Output::NumberOfGroups->new(
+        group_statistics_obj => $self->_group_statistics_obj,
+        groups_to_contigs    => $self->_order_genes_obj->groups_to_contigs,
+        annotate_groups_obj  => $self->_annotate_groups_obj,
+        core_definition      => $self->core_definition,
+        logger               => $self->logger
+    );
+}
+
+sub _build__group_statistics_obj {
+    my ($self) = @_;
+    return Bio::Roary::GroupStatistics->new(
+        output_filename     => $self->output_statistics_filename,
+        annotate_groups_obj => $self->_annotate_groups_obj,
+        analyse_groups_obj  => $self->_analyse_groups_obj,
+        groups_to_contigs   => $self->_order_genes_obj->groups_to_contigs,
+        _verbose            => $self->verbose_stats,
+        logger              => $self->logger
+    );
+}
+
+sub _build__order_genes_obj {
+    my ($self) = @_;
+    if ( defined( $self->_accessory_clustering ) ) {
+        return Bio::Roary::OrderGenes->new(
+            analyse_groups_obj  => $self->_analyse_groups_obj,
+            gff_files           => $self->input_files,
+            core_definition     => $self->core_definition,
+            sample_weights      => $self->_accessory_clustering->sample_weights,
+            samples_to_clusters => $self->_accessory_clustering->samples_to_clusters,
+            logger              => $self->logger
+        );
+    }
+    else {
+        return Bio::Roary::OrderGenes->new(
+            analyse_groups_obj => $self->_analyse_groups_obj,
+            gff_files          => $self->input_files,
+            core_definition    => $self->core_definition,
+            logger             => $self->logger
+        );
+    }
+}
+
+sub _build__group_labels_obj {
+    my ($self) = @_;
+    return Bio::Roary::GroupLabels->new(
+        groups_filename => $self->_output_inflate_clusters_filename,
+        output_filename => $self->_output_group_labels_filename,
+        logger          => $self->logger
+    );
+}
+
+sub _build__annotate_groups_obj {
+    my ($self) = @_;
+    return Bio::Roary::AnnotateGroups->new(
+        gff_files       => $self->input_files,
+        output_filename => $self->output_filename,
+        groups_filename => $self->_output_group_labels_filename,
+        logger          => $self->logger
+    );
+}
+
+sub _build__analyse_groups_obj {
+    my ($self) = @_;
+    return Bio::Roary::AnalyseGroups->new(
+        fasta_files     => $self->fasta_files,
+        groups_filename => $self->output_filename,
+        logger          => $self->logger
+    );
+}
+
+sub _build__inflate_clusters_obj {
+    my ($self) = @_;
+    return Bio::Roary::InflateClusters->new(
+        clusters_filename     => $self->clusters_filename,
+        cdhit_groups_filename => $self->_input_cd_hit_groups_file,
+        mcl_filename          => $self->_output_mcl_filename,
+        output_file           => $self->_output_inflate_unsplit_clusters_filename,
+        logger                => $self->logger
+    );
+}
+
+sub _build__groups_multifastas_nuc_obj {
+    my ($self) = @_;
+    return Bio::Roary::Output::GroupsMultifastasNucleotide->new(
+        output_multifasta_files => $self->output_multifasta_files,
+        gff_files               => $self->input_files,
+        annotate_groups         => $self->_annotate_groups_obj,
+        group_names             => $self->_analyse_groups_obj->_groups,
+        group_limit             => $self->group_limit,
+        core_definition         => $self->core_definition,
+        dont_delete_files       => $self->dont_delete_files,
+        logger                  => $self->logger
+    );
+}
+
+sub _create_embl_files {
+    my ($self) = @_;
+    my $core_accessory_tab_obj = Bio::Roary::Output::EmblGroups->new(
+        output_filename     => $self->core_accessory_tab_output_filename,
+        annotate_groups_obj => $self->_annotate_groups_obj,
+        analyse_groups_obj  => $self->_analyse_groups_obj,
+        ordering_key        => $self->core_accessory_ordering_key,
+        groups_to_contigs   => $self->_order_genes_obj->groups_to_contigs,
+        logger              => $self->logger
+    );
+    $core_accessory_tab_obj->create_files;
+
+    my $accessory_tab_obj = Bio::Roary::Output::EmblGroups->new(
+        output_filename     => $self->accessory_tab_output_filename,
+        annotate_groups_obj => $self->_annotate_groups_obj,
+        analyse_groups_obj  => $self->_analyse_groups_obj,
+        ordering_key        => $self->accessory_ordering_key,
+        groups_to_contigs   => $self->_order_genes_obj->groups_to_contigs,
+        logger              => $self->logger
+    );
+    $accessory_tab_obj->create_files;
+}
+
+sub _delete_intermediate_files {
+    my ($self) = @_;
+    return if ( $self->dont_delete_files == 1 );
+	$self->logger->info("Cleaning up files");
+
+    for my $fasta_file ( @{ $self->fasta_files } ) {
+        unlink($fasta_file) if ( -e $fasta_file );
+    }
+
+    unlink( $self->_output_mcl_filename );
+    unlink( $self->_output_inflate_clusters_filename );
+    unlink( $self->_output_group_labels_filename );
+    unlink( $self->_output_combined_filename );
+    unlink( $self->clusters_filename );
+    unlink( $self->clusters_filename . '.clstr' );
+    unlink( $self->clusters_filename . '.bak.clstr' );
+    unlink('_gff_files');
+    unlink('_fasta_files');
+    unlink('_clustered_filtered.fa');
+    unlink( $self->_input_cd_hit_groups_file );
+    unlink('database_masking.asnb');
+    unlink('_clustered');
+    unlink('_accessory_clusters');
+    unlink('_accessory_clusters.clstr');
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/PrepareInputFiles.pm b/lib/Bio/Roary/PrepareInputFiles.pm
new file mode 100644
index 0000000..bedb9bb
--- /dev/null
+++ b/lib/Bio/Roary/PrepareInputFiles.pm
@@ -0,0 +1,116 @@
+package Bio::Roary::PrepareInputFiles;
+
+# ABSTRACT: Take in a mixture of FASTA and GFF input files and output FASTA proteomes only
+
+=head1 SYNOPSIS
+
+Take in a mixture of FASTA and GFF input files and output FASTA proteomes only
+   use Bio::Roary::PrepareInputFiles;
+   
+   my $obj = Bio::Roary::PrepareInputFiles->new(
+     input_files   => ['abc.gff','ddd.faa'],
+   );
+   $obj->fasta_files;
+
+=cut
+
+use Moose;
+use Bio::Roary::Exceptions;
+use Bio::Roary::ExtractProteomeFromGFFs;
+use Bio::Roary::FilterUnknownsFromFasta;
+use Cwd qw(getcwd); 
+use File::Temp;
+
+has 'input_files'      => ( is => 'ro', isa => 'ArrayRef',        required => 1 );
+has 'job_runner'       => ( is => 'ro', isa => 'Str',             default  => 'Local' );
+has 'cpus'             => ( is => 'ro', isa => 'Int',      default => 1 );
+has '_input_gff_files' => ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy     => 1, builder => '_build__input_gff_files' );
+has '_input_fasta_files' => ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__input_fasta_files' );
+has '_input_fasta_files_filtered' =>
+  ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__input_fasta_files_filtered' );
+has '_input_fasta_files_filtered_obj' =>
+    ( is => 'ro', isa => 'Bio::Roary::FilterUnknownsFromFasta', lazy => 1, builder => '_build__input_fasta_files_filtered_obj' );
+
+has '_derived_fasta_files' =>
+  ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__derived_fasta_files' );
+has '_extract_proteome_obj' => (
+    is      => 'ro',
+    isa     => 'Bio::Roary::ExtractProteomeFromGFFs',
+    lazy    => 1,
+    builder => '_build__extract_proteome_obj'
+);
+has 'apply_unknowns_filter' => ( is => 'rw', isa => 'Bool', default => 1 );
+has 'translation_table'     => ( is => 'rw', isa => 'Int',  default => 11 );
+has 'verbose'               => ( is => 'rw', isa => 'Bool', default => 0 );
+has '_fasta_filter_obj'     =>  ( is => 'ro', isa => 'Bio::Roary::FilterUnknowsFromFasta', lazy => 1, builder => '_fasta_filter_obj' );
+has 'working_directory'    => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
+
+sub _build__input_gff_files {
+    my ($self) = @_;
+    my @gff_files = grep( /\.gff$/, @{ $self->input_files } );
+    return \@gff_files;
+}
+
+sub _build__input_fasta_files {
+    my ($self) = @_;
+    my @fasta_files = grep( !/\.gff$/, @{ $self->input_files } );
+    return \@fasta_files;
+}
+
+sub _build__input_fasta_files_filtered_obj {
+    my ($self) = @_;
+    return Bio::Roary::FilterUnknownsFromFasta->new( fasta_files => $self->_input_fasta_files );
+}
+
+sub _build__input_fasta_files_filtered
+{
+  my ($self) = @_;
+  return undef if ( !defined( $self->_input_fasta_files ) );
+  return $self->_input_fasta_files_filtered_obj->filtered_fasta_files();
+}
+
+sub _build__extract_proteome_obj {
+    my ($self) = @_;
+    return Bio::Roary::ExtractProteomeFromGFFs->new(
+        gff_files             => $self->_input_gff_files,
+        job_runner            => $self->job_runner,
+        apply_unknowns_filter => $self->apply_unknowns_filter,
+        translation_table     => $self->translation_table,
+        cpus                  => $self->cpus,
+		verbose               => $self->verbose,
+        working_directory    => $self->working_directory,
+    );
+}
+
+sub _build__derived_fasta_files {
+    my ($self) = @_;
+    return undef if ( !defined( $self->_input_gff_files ) );
+    return $self->_extract_proteome_obj->fasta_files();
+}
+
+sub fasta_files {
+    my ($self) = @_;
+    my @output_fasta_files = ( @{ $self->_input_fasta_files_filtered }, @{ $self->_derived_fasta_files } );
+    return \@output_fasta_files;
+}
+
+sub lookup_fasta_files_from_unknown_input_files {
+    my ( $self, $input_files ) = @_;
+    $self->fasta_files;
+
+    my @output_fasta_files;
+    for my $input_file ( @{$input_files} ) {
+        if ( defined( $self->_extract_proteome_obj->fasta_files_to_gff_files->{$input_file} ) ) {
+            push( @output_fasta_files, $self->_extract_proteome_obj->fasta_files_to_gff_files->{$input_file} );
+        }
+        else {
+            push( @output_fasta_files, $self->_input_fasta_files_filtered_obj->input_fasta_to_output_fasta->{$input_file} );
+        }
+    }
+    return \@output_fasta_files;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/PresenceAbsenceMatrix.pm b/lib/Bio/Roary/PresenceAbsenceMatrix.pm
new file mode 100644
index 0000000..0796aeb
--- /dev/null
+++ b/lib/Bio/Roary/PresenceAbsenceMatrix.pm
@@ -0,0 +1,84 @@
+package Bio::Roary::PresenceAbsenceMatrix;
+
+# ABSTRACT: Create a matrix with presence and absence
+
+=head1 SYNOPSIS
+
+Create a matrix with presence and absence. Since its computationally intensive to generate the inputs, calculate them once
+in the GroupStatistics module and pass them through.
+   use Bio::Roary::PresenceAbsenceMatrix;
+   
+   my $obj = Bio::Roary::PresenceAbsenceMatrix->new(
+     annotate_groups_obj => $annotate_groups_obj,
+     output_filename     => 'gene_presence_absence.Rtab',
+     sorted_file_names   => $sorted_file_names,
+     groups_to_files     => $groups_to_files,
+     num_files_in_groups => $num_files_in_groups,
+     sample_headers      => $sample_headers,
+   );
+   $obj->create_matrix_file;
+
+=cut
+
+use Moose;
+use Text::CSV;
+use Bio::SeqIO;
+use Bio::Roary::Exceptions;
+use Bio::Roary::AnnotateGroups;
+
+has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
+has 'sorted_file_names'   => ( is => 'ro', isa => 'ArrayRef',                   required => 1 );
+has 'groups_to_files'     => ( is => 'ro', isa => 'HashRef',                    required => 1 );
+has 'num_files_in_groups' => ( is => 'ro', isa => 'HashRef',                    required => 1 );
+has 'sample_headers'      => ( is => 'ro', isa => 'ArrayRef',                   required => 1 );
+has 'output_filename'     => ( is => 'ro', isa => 'Str',                        default  => 'gene_presence_absence.Rtab' );
+
+has '_output_fh' => ( is => 'ro', lazy => 1, builder => '_build__output_fh' );
+has '_text_csv_obj' => ( is => 'ro', isa => 'Text::CSV', lazy => 1, builder => '_build__text_csv_obj' );
+
+sub _build__output_fh {
+    my ($self) = @_;
+    open( my $fh, '>', $self->output_filename )
+      or Bio::Roary::Exceptions::CouldntWriteToFile->throw( error => "Couldnt write output file:" . $self->output_filename );
+    return $fh;
+}
+
+sub _build__text_csv_obj {
+    my ($self) = @_;
+    return Text::CSV->new( { binary => 1, always_quote => 0, sep_char => "\t", eol => "\r\n" } );
+}
+
+sub create_matrix_file {
+    my ($self) = @_;
+
+    # Header row
+    unshift @{ $self->sample_headers }, 'Gene';
+    $self->_text_csv_obj->print( $self->_output_fh, $self->sample_headers );
+
+    for my $group ( sort { $self->num_files_in_groups->{$b} <=> $self->num_files_in_groups->{$a} || $a cmp $b }
+        keys %{ $self->num_files_in_groups } )
+    {
+        my @row;
+        my $annotated_group_name = $self->annotate_groups_obj->_groups_to_consensus_gene_names->{$group};
+        push( @row, $annotated_group_name );
+        for my $filename ( @{ $self->sorted_file_names } ) {
+            my $group_to_file_genes = $self->groups_to_files->{$group}->{$filename};
+
+            if ( defined($group_to_file_genes) && @{$group_to_file_genes} > 0 ) {
+                push( @row, 1 );
+                next;
+            }
+            else {
+                push( @row, 0 );
+            }
+        }
+        $self->_text_csv_obj->print( $self->_output_fh, \@row );
+    }
+	close( $self->_output_fh );
+    return $self;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/QC/Report.pm b/lib/Bio/Roary/QC/Report.pm
new file mode 100644
index 0000000..3baa8b2
--- /dev/null
+++ b/lib/Bio/Roary/QC/Report.pm
@@ -0,0 +1,241 @@
+package Bio::Roary::QC::Report;
+
+# ABSTRACT: generate a report based on kraken output
+
+=head1 SYNOPSIS
+
+=cut
+
+use Moose;
+use File::Temp;
+use File::Path 'rmtree';
+use Cwd;
+use File::Basename;
+with 'Bio::Roary::JobRunner::Role';
+
+has 'input_files'        => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'kraken_exec'        => ( is => 'ro', isa => 'Str',      default => 'kraken' );
+has 'kraken_report_exec' => ( is => 'ro', isa => 'Str',      default => 'kraken-report' );
+has 'kraken_db'          => ( is => 'ro', isa => 'Str',      required => 1 );
+has 'outfile'            => ( is => 'rw', isa => 'Str',      default => 'qc_report.csv' );
+has '_kraken_data'       => ( is => 'rw', isa => 'ArrayRef', lazy_build => 1 );
+has '_header'            => ( is => 'rw', isa => 'Str',      lazy_build => 1 );
+has 'kraken_memory'      => ( is => 'rw', isa => 'Int',      default => 2000 );
+
+has '_tmp_directory_obj' => ( is => 'rw', lazy_build => 1 );
+has '_tmp_directory'     => ( is => 'rw', lazy_build => 1, isa => 'Str', );
+
+
+sub _nuc_fasta_filename
+{
+	my ($self, $gff) = @_;
+
+	my $prefix = basename( $gff, ".gff" );
+	my $outfile = $self->_tmp_directory . "/$prefix.fna";
+    return  $outfile;
+}
+
+sub _extract_nuc_fasta_cmd {
+	my ($self, $gff) = @_;
+	my $outfile = $self->_nuc_fasta_filename($gff);
+	my $cmd = "sed -n '/##FASTA/,//p' $gff | grep -v \'##FASTA\' > $outfile";
+
+	return $cmd;
+}
+
+sub _extract_nuc_files_from_all_gffs
+{
+    my ($self) = @_;
+    my @nuc_files;
+    my @commands_to_run;
+    for my $input_file(@{$self->input_files})
+    {
+        push(@nuc_files,$self->_nuc_fasta_filename($input_file));
+        push(@commands_to_run,$self->_extract_nuc_fasta_cmd($input_file));
+    }
+	my $kraken_runner_obj = $self->_job_runner_class->new( 
+		commands_to_run => \@commands_to_run, 
+		memory_in_mb    => $self->kraken_memory,
+        verbose         => $self->verbose,
+        cpus            => $self->cpus
+	);
+    $kraken_runner_obj->run();
+    return \@nuc_files;
+}
+
+sub _kraken_cmd {
+	my ( $self, $a, $kraken_output ) = @_;
+
+	my $kcmd = $self->kraken_exec . 
+    " --fasta-input ".
+	" --preload ".
+	" --db " . $self->kraken_db . 
+	" --output $kraken_output $a  > /dev/null 2>&1";
+	return $kcmd;
+}
+
+sub _kraken_report_cmd {
+	my ( $self, $k, $report_output ) = @_;
+
+	my $krcmd = $self->kraken_report_exec .
+	" --db " . $self->kraken_db .
+	" $k > $report_output";
+	return $krcmd;
+}
+
+sub _kraken_output_filename
+{
+    my ( $self, $assembly ) = @_;
+	my $kraken_output = $assembly;
+	$kraken_output =~ s/fna$/kraken/;
+    return $kraken_output;
+}
+
+sub _run_kraken_on_nuc_files
+{
+    my ( $self, $nuc_files ) = @_;
+    my @kraken_output_files;
+    my @commands_to_run;
+    for my $nuc_file(@{$nuc_files})
+    {
+        my $kraken_output = $self->_kraken_output_filename($nuc_file);
+        push(@kraken_output_files, $kraken_output );
+        push(@commands_to_run, $self->_kraken_cmd( $nuc_file, $kraken_output ));
+    }
+    
+	my $kraken_runner_obj = $self->_job_runner_class->new( 
+		commands_to_run => \@commands_to_run, 
+		memory_in_mb    => $self->kraken_memory,
+        verbose         => $self->verbose,
+        cpus            => $self->cpus
+	);
+    $kraken_runner_obj->run();
+    
+    for my $filename(@{$nuc_files})
+    {
+        unlink($filename);
+    }
+    
+    return \@kraken_output_files;
+}
+
+sub _kraken_report_output_filename
+{
+    my ( $self, $assembly ) = @_;
+    return $assembly.".report";
+}
+
+sub _run_kraken_report_on_kraken_files
+{
+    my ( $self, $kraken_files ) = @_;
+    
+    my @kraken_report_output_files;
+    my @commands_to_run;
+    for my $nuc_file(@{$kraken_files})
+    {
+        my $kraken_output = $self->_kraken_report_output_filename($nuc_file);
+        push(@kraken_report_output_files, $kraken_output );
+        push(@commands_to_run, $self->_kraken_report_cmd( $nuc_file, $kraken_output ));
+    }
+    
+	my $kraken_runner_obj = $self->_job_runner_class->new( 
+		commands_to_run => \@commands_to_run, 
+		memory_in_mb    => $self->kraken_memory,
+        verbose         => $self->verbose,
+        cpus            => $self->cpus
+	);
+    $kraken_runner_obj->run();
+    for my $filename(@{$kraken_files})
+    {
+        unlink($filename);
+    }
+    return \@kraken_report_output_files;
+}
+
+sub _build__kraken_data {
+	my $self = shift;
+    my $nuc_files = $self->_extract_nuc_files_from_all_gffs();
+    my $kraken_files = $self->_run_kraken_on_nuc_files($nuc_files);
+    my $kraken_report_files = $self->_run_kraken_report_on_kraken_files( $kraken_files );
+    
+	return $self->_parse_kraken_reports($kraken_report_files);
+}
+
+sub _parse_kraken_reports
+{
+    my ( $self, $kraken_report_files ) = @_;
+    
+    my @report_rows;
+    for my $kraken_report(@{$kraken_report_files})
+    {
+        push(@report_rows, $self->_parse_kraken_report($kraken_report));
+    }
+    
+    for my $kraken_report(@{$kraken_report_files})
+    {
+        unlink($kraken_report);
+    }   
+    
+    return \@report_rows;
+}
+
+sub _parse_kraken_report {
+	my ( $self, $kraken_report ) = @_;
+
+	# parse report
+	open( my $report_fh, '<', $kraken_report );
+    
+    my $sample_name = $kraken_report;
+    $sample_name =~ s/.report$//;
+    $sample_name =~ s/.kraken$//;
+    my($sample_base_name, $dirs, $suffix) = fileparse($sample_name);
+    
+	my ( $top_genus, $top_species );
+	while ( <$report_fh> ){
+		my @parts = split( "\t" );
+		chomp @parts;
+
+		$top_genus = $parts[5] if ( (! defined $top_genus) && $parts[3] eq 'G' );
+		$top_species = $parts[5] if ( (! defined $top_species) && $parts[3] eq 'S' );
+
+		last if (defined $top_genus && defined $top_species);
+	}
+    close($report_fh);
+
+	$top_genus   ||= "not_found";
+	$top_genus   =~ s/^\s+//g;
+	$top_species ||= "not_found";
+	$top_species =~ s/^\s+//g;
+
+	return [ $sample_base_name, $top_genus, $top_species ];
+}
+
+
+sub _build__header {
+	return join( ',', ( 'Sample', 'Genus', 'Species' ) );
+}
+
+sub _build__tmp_directory_obj {
+	return File::Temp->newdir(DIR => getcwd, CLEANUP => 1 ); 
+}
+
+sub _build__tmp_directory {
+	my $self = shift;
+	return $self->_tmp_directory_obj->dirname();
+}
+
+sub report {
+	my $self = shift;
+
+	open( OUTFILE, '>', $self->outfile );
+	print OUTFILE $self->_header . "\n";
+	for my $line ( @{ $self->_kraken_data } ){
+		print OUTFILE join( ',', @{ $line } ) . "\n";
+	}
+	close OUTFILE;
+}
+
+
+__PACKAGE__->meta->make_immutable;
+no Moose;
+1;
diff --git a/lib/Bio/Roary/ReformatInputGFFs.pm b/lib/Bio/Roary/ReformatInputGFFs.pm
new file mode 100644
index 0000000..86289ca
--- /dev/null
+++ b/lib/Bio/Roary/ReformatInputGFFs.pm
@@ -0,0 +1,162 @@
+package Bio::Roary::ReformatInputGFFs;
+
+# ABSTRACT: Take in gff files and add suffix where a gene id is seen twice
+
+=head1 SYNOPSIS
+
+Take in gff files and add suffix where a gene id is seen twice
+   use Bio::Roary::ReformatInputGFFs;
+   
+   my $obj = Bio::Roary::PrepareInputFiles->new(
+     gff_files   => ['abc.gff','ddd.faa'],
+   );
+   $obj->fix_duplicate_gene_ids;
+   $obj->fixed_gff_files;
+
+=cut
+
+use Moose;
+use Bio::Roary::Exceptions;
+use Cwd;
+use Log::Log4perl qw(:easy);
+use Bio::Tools::GFF;
+use File::Path qw(make_path);
+use File::Basename;
+
+has 'gff_files'        => ( is => 'ro', isa  => 'ArrayRef', required => 1 );
+has 'logger'           => ( is => 'ro', lazy => 1,          builder  => '_build_logger' );
+has '_tags_to_filter'  => ( is => 'ro', isa  => 'Str',      default  => '(CDS|ncRNA|tRNA|tmRNA|rRNA)' );
+has 'output_directory' => ( is => 'ro', isa  => 'Str',      default  => 'fixed_input_files' );
+has 'suffix_counter'   => ( is => 'rw', isa  => 'Int',      default  => 1 );
+
+has 'fixed_gff_files' => ( is => 'rw', isa => 'ArrayRef', default => sub { [] } );
+
+sub _build_logger {
+    my ($self) = @_;
+    Log::Log4perl->easy_init( level => $ERROR );
+    my $logger = get_logger();
+    return $logger;
+}
+
+sub fix_duplicate_gene_ids {
+    my ($self) = @_;
+
+    my %gene_ids_seen_before;
+    for my $file ( @{ $self->gff_files } ) {
+
+        my $ids_seen      = 0;
+        my $ids_from_file = $self->_get_ids_for_gff_file($file);
+
+        if ( @{$ids_from_file} < 1 ) {
+            $self->logger->warn(
+                "Input GFF file doesnt contain annotation we can use so excluding it from the analysis: $file"
+            );
+        }
+        else {
+            for my $gene_id ( @{$ids_from_file} ) {
+                if ( $gene_ids_seen_before{$gene_id} ) {
+                    $self->logger->warn(
+  "Input file contains duplicate gene IDs, attempting to fix by adding a unique suffix.  New GFF in the fixed_input_files directory.  $file "
+                    );
+                    my $updated_file = $self->_add_suffix_to_gene_ids_and_return_new_file($file);
+                    push( @{ $self->fixed_gff_files }, $updated_file ) if ( defined($updated_file) );
+                    $ids_seen = 1;
+                    last;
+                }
+                $gene_ids_seen_before{$gene_id}++;
+            }
+            if ( $ids_seen == 0 ) {
+                push( @{ $self->fixed_gff_files }, $file );
+            }
+        }
+    }
+    return 1;
+}
+
+sub _add_suffix_to_gene_ids_and_return_new_file {
+    my ( $self, $input_file ) = @_;
+    my ( $filename, $directories, $suffix ) = fileparse( $input_file, qr/\.[^.]*/ );
+    make_path( $self->output_directory ) if ( !( -d $self->output_directory ) );
+    my $output_file = $self->output_directory . '/' . $filename . $suffix;
+
+    open( my $input_gff_fh, $input_file );
+    open( my $out_gff_fh, '>', $output_file );
+
+    my $found_fasta = 0;
+    while (<$input_gff_fh>) {
+        my $line = $_;
+
+        if ( $line =~ /^\#\#FASTA/ ) {
+            $found_fasta = 1;
+        }
+
+        if ( $line =~ /\#/ || $found_fasta == 1 ) {
+            print {$out_gff_fh} $line;
+            next;
+        }
+
+        my @cells = split( /\t/, $line );
+        my @tags  = split( /;/,  $cells[8] );
+        my $found_id = 0;
+        for ( my $i = 0 ; $i < @tags ; $i++ ) {
+            if ( $tags[$i] =~ /^(ID=["']?)([^;"']+)(["']?)/ ) {
+                my $current_id = $2;
+                $current_id .= '___' . $self->suffix_counter;
+                $tags[$i] = $1 . $current_id . $3;
+                $self->suffix_counter( $self->suffix_counter + 1 );
+                $found_id++;
+                last;
+            }
+        }
+        if ( $found_id == 0 ) {
+            unshift( @tags, 'ID=id___' . $self->suffix_counter );
+            $self->suffix_counter( $self->suffix_counter + 1 );
+        }
+        $cells[8] = join( ';', @tags );
+        print {$out_gff_fh} join( "\t", @cells );
+    }
+
+    if ( $found_fasta == 0 ) {
+        $self->logger->warn(
+            "Input GFF file doesnt appear to have the FASTA sequence at the end of the file so is being excluded from the analysis: $input_file" );
+        return undef;
+    }
+    close($out_gff_fh);
+    close($input_gff_fh);
+    return $output_file;
+}
+
+sub _get_ids_for_gff_file {
+    my ( $self, $file ) = @_;
+    my @gene_ids;
+    my $tags_regex = $self->_tags_to_filter;
+    my $gffio = Bio::Tools::GFF->new( -file => $file, -gff_version => 3 );
+    while ( my $feature = $gffio->next_feature() ) {
+        next if !( $feature->primary_tag =~ /$tags_regex/ );
+        my $gene_id = $self->_get_feature_id($feature);
+        push( @gene_ids, $gene_id ) if ( defined($gene_id) );
+    }
+    return \@gene_ids;
+}
+
+sub _get_feature_id {
+    my ( $self, $feature ) = @_;
+    my ( $gene_id, @junk );
+    if ( $feature->has_tag('ID') ) {
+        ( $gene_id, @junk ) = $feature->get_tag_values('ID');
+    }
+    elsif ( $feature->has_tag('locus_tag') ) {
+        ( $gene_id, @junk ) = $feature->get_tag_values('locus_tag');
+    }
+    else {
+        return undef;
+    }
+    $gene_id =~ s!["']!!g;
+    return undef if ( $gene_id eq "" );
+    return $gene_id;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/ReorderSpreadsheet.pm b/lib/Bio/Roary/ReorderSpreadsheet.pm
new file mode 100644
index 0000000..87a1a39
--- /dev/null
+++ b/lib/Bio/Roary/ReorderSpreadsheet.pm
@@ -0,0 +1,133 @@
+package Bio::Roary::ReorderSpreadsheet;
+
+# ABSTRACT: Take in a tree file and a spreadsheet and output a spreadsheet with reordered columns
+
+=head1 SYNOPSIS
+
+Take in a tree file and a spreadsheet and output a spreadsheet with reordered columns
+   use Bio::Roary::ReorderSpreadsheet;
+   
+   my $obj = Bio::Roary::ReorderSpreadsheet->new(
+       tree_file        => $tree_file,
+       spreadsheet   => 'groups.csv'
+     );
+   $obj->reorder_spreadsheet();
+
+=cut
+
+use Moose;
+use Text::CSV;
+use Bio::Roary::SampleOrder;
+use Bio::Roary::GroupStatistics;
+with 'Bio::Roary::SpreadsheetRole';
+
+has 'tree_file'   => ( is => 'ro', isa => 'Str', required => 1 );
+has 'spreadsheet' => ( is => 'ro', isa => 'Str', required => 1 );
+has 'tree_format' => ( is => 'ro', isa => 'Str', default  => 'newick' );
+has 'output_filename'        => ( is => 'ro', isa => 'Str',      default => 'reordered_groups_stats.csv' );
+has 'search_strategy'        => ( is => 'ro', isa => 'Str', default =>  'depth' );
+has 'sortby'                 => ( is => 'ro', isa => 'Maybe[Str]');
+
+has '_sample_order'          => ( is => 'ro', isa  => 'ArrayRef', lazy    => 1, builder => '_build__sample_order' );
+has '_column_mappings'       => ( is => 'ro', isa  => 'ArrayRef', lazy    => 1, builder => '_build__column_mappings' );
+
+
+sub BUILD {
+  my ($self) = @_;
+  # read the headers first
+  $self->_column_mappings;
+}
+
+
+sub reorder_spreadsheet {
+    my ($self) = @_;
+
+    # make sure the file handle is at the start
+    seek($self->_input_spreadsheet_fh  ,0,0);
+    while ( my $row = $self->_csv_parser->getline( $self->_input_spreadsheet_fh ) ) 
+    {
+      $self->_csv_output->print($self->_output_spreadsheet_fh, $self->_remap_columns($row));
+    }
+    
+    close($self->_output_spreadsheet_fh);
+    close($self->_input_spreadsheet_fh);
+    return 1;
+}
+
+sub _remap_columns
+{
+  my ($self, $row) = @_;
+  
+  my @output_row;
+  for(my $output_index = 0; $output_index < @{$self->_column_mappings}; $output_index++)
+  {
+    my $input_index = $self->_column_mappings->[$output_index];
+    push(@output_row, $row->[$input_index]);
+  }
+  return \@output_row;
+}
+
+sub _column_mappings_populate_fixed_headers
+{
+  my ($self, $column_mappings,$header_row) = @_;
+  my $column_counter = 0;
+  for($column_counter = 0; $column_counter < $self->_num_fixed_headers; $column_counter++)
+  {
+    push(@{$column_mappings}, $column_counter);
+    shift(@{$header_row});
+  }
+  return $column_counter;
+}
+
+sub _build__column_mappings
+{
+  my ($self) = @_;
+  my $header_row = $self->_csv_parser->getline( $self->_input_spreadsheet_fh );
+  
+  my @column_mappings;
+  my $column_counter = $self->_column_mappings_populate_fixed_headers(\@column_mappings, $header_row);
+
+  # put the input column names into an array where the key is the name and the value is the order
+  my %input_sample_order;
+  for(my $i = 0; $i < @{$header_row}; $i++)
+  {
+    $input_sample_order{$header_row->[$i]} = $i + $column_counter;
+  }
+  
+  # Go through the order of the samples from the tree and see if the headers exist
+  for my $sample_name (@{$self->_sample_order})
+  {
+    if(defined($input_sample_order{$sample_name}))
+    {
+      push(@column_mappings, $input_sample_order{$sample_name});
+      delete($input_sample_order{$sample_name});
+    }
+    $column_counter++;
+  }
+  
+  # Add any columns not in the tree to the end
+  for my $sample_name  (keys %input_sample_order)
+  {
+    push(@column_mappings, $input_sample_order{$sample_name});
+    delete($input_sample_order{$sample_name});
+    $column_counter++;
+  }
+  return \@column_mappings;
+}
+
+sub _build__sample_order {
+    my ($self) = @_;
+    my $obj = Bio::Roary::SampleOrder->new(
+        tree_file   => $self->tree_file,
+        tree_format => $self->tree_format,
+        search_strategy => $self->search_strategy,
+        sortby => $self->sortby
+    );
+    return $obj->ordered_samples();
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
+
diff --git a/lib/Bio/Roary/SampleOrder.pm b/lib/Bio/Roary/SampleOrder.pm
new file mode 100644
index 0000000..dae0ddf
--- /dev/null
+++ b/lib/Bio/Roary/SampleOrder.pm
@@ -0,0 +1,50 @@
+package Bio::Roary::SampleOrder;
+
+# ABSTRACT: Take in a tree file and return an ordering of the samples
+
+=head1 SYNOPSIS
+
+Take in a tree file and return an ordering of the samples. Defaults to depth first search
+   use Bio::Roary::SampleOrder;
+   
+   my $obj = Bio::Roary::SampleOrder->new(
+       tree_file        => $tree_file,
+     );
+   $obj->ordered_samples();
+
+=cut
+
+use Moose;
+use Bio::TreeIO;
+
+has 'tree_file'       => ( is => 'ro', isa => 'Str',      required => 1 );
+has 'tree_format'     => ( is => 'ro', isa => 'Str',      default  => 'newick' );
+has 'ordered_samples' => ( is => 'ro', isa => 'ArrayRef', lazy     => 1, builder => '_build_ordered_samples' );
+
+# 'b|breadth' first order or 'd|depth' first order
+has 'search_strategy' => ( is => 'ro', isa => 'Str', default =>  'depth' );
+has 'sortby' => (is => 'ro', isa => 'Maybe[Str]');
+
+
+sub _build_ordered_samples {
+    my ($self) = @_;
+    my $input = Bio::TreeIO->new(
+        -file   => $self->tree_file,
+        -format => $self->tree_format
+    );
+    my $tree = $input->next_tree;
+    my @taxa;
+    for my $leaf_node ( $tree->get_nodes($self->search_strategy,$self->sortby) ) {
+      if($leaf_node->is_Leaf)
+      {
+        push( @taxa, $leaf_node->id );
+      }
+    }
+    return \@taxa;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
+
diff --git a/lib/Bio/Roary/SequenceLengths.pm b/lib/Bio/Roary/SequenceLengths.pm
new file mode 100644
index 0000000..6f62fe1
--- /dev/null
+++ b/lib/Bio/Roary/SequenceLengths.pm
@@ -0,0 +1,43 @@
+package Bio::Roary::SequenceLengths;
+
+# ABSTRACT:  Take in a fasta file and create a hash with the length of each sequence
+
+=head1 SYNOPSIS
+
+Add labels to the groups
+   use Bio::Roary::SequenceLengths;
+   
+   my $obj = Bio::Roary::SequenceLengths->new(
+     fasta_file   => 'abc.fa',
+   );
+   $obj->sequence_lengths;
+
+=cut
+
+use Moose;
+use Bio::SeqIO;
+use Bio::Roary::Exceptions;
+
+has 'fasta_file'       => ( is => 'ro', isa => 'Str',        required => 1 );
+has 'sequence_lengths' => ( is => 'ro', isa => 'HashRef',    lazy     => 1, builder => '_build_sequence_lengths' );
+has '_input_seqio'     => ( is => 'ro', isa => 'Bio::SeqIO', lazy     => 1, builder => '_build__input_seqio' );
+
+sub _build__input_seqio {
+    my ($self) = @_;
+    return Bio::SeqIO->new( -file => $self->fasta_file, -format => 'Fasta' );
+}
+
+sub _build_sequence_lengths {
+    my ($self) = @_;
+
+    my %sequence_lengths;
+    while ( my $input_seq = $self->_input_seqio->next_seq() ) {
+        $sequence_lengths{ $input_seq->display_id } = $input_seq->length();
+    }
+    return \%sequence_lengths;
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/SortFasta.pm b/lib/Bio/Roary/SortFasta.pm
new file mode 100644
index 0000000..85ed0e4
--- /dev/null
+++ b/lib/Bio/Roary/SortFasta.pm
@@ -0,0 +1,127 @@
+package Bio::Roary::SortFasta;
+
+# ABSTRACT: sort a fasta file by name
+
+=head1 SYNOPSIS
+
+sort a fasta file by name
+   use Bio::Roary::SortFasta;
+   
+   my $obj = Bio::Roary::SortFasta->new(
+     input_filename   => 'infasta.fa',
+   );
+   $obj->sort_fasta->replace_input_with_output_file;
+
+=cut
+
+use Moose;
+use File::Copy;
+use Bio::SeqIO;
+
+has 'input_filename'         => ( is => 'ro', isa => 'Str',  required => 1 );
+has 'output_filename'        => ( is => 'ro', isa => 'Str',  lazy     => 1, builder => '_build_output_filename' );
+has 'make_multiple_of_three' => ( is => 'ro', isa => 'Bool', default  => 0 );
+has 'remove_nnn_from_end'    => ( is => 'ro', isa => 'Bool', default  => 0 );
+has 'similarity'             => ( is => 'rw', isa => 'Num',  default  => 1 );
+has 'sequences_unaligned'    => ( is => 'rw', isa => 'Bool', default  => 0 );
+
+has '_input_seqio'  => ( is => 'ro', isa => 'Bio::SeqIO', lazy => 1, builder => '_build__input_seqio' );
+has '_output_seqio' => ( is => 'ro', isa => 'Bio::SeqIO', lazy => 1, builder => '_build__output_seqio' );
+
+sub _build_output_filename {
+    my ($self) = @_;
+    return $self->input_filename . ".sorted.fa";
+}
+
+sub _build__input_seqio {
+    my ($self) = @_;
+    return Bio::SeqIO->new( -file => $self->input_filename, -format => 'Fasta' );
+}
+
+sub _build__output_seqio {
+    my ($self) = @_;
+    return Bio::SeqIO->new( -file => ">" . $self->output_filename, -format => 'Fasta' );
+}
+
+sub _add_padding_to_make_sequence_length_multiple_of_three {
+    my ( $self, $input_seq ) = @_;
+
+    my $seq_length = $input_seq->length();
+    if ( $seq_length % 3 == 1 ) {
+        $input_seq->seq( $input_seq->seq() . "NN" );
+    }
+    elsif ( $seq_length % 3 == 2 ) {
+        $input_seq->seq( $input_seq->seq() . "N" );
+    }
+
+    return $input_seq;
+}
+
+sub _remove_nnn_from_all_sequences {
+    my ( $self, $input_sequences ) = @_;
+
+    for my $sequence_name ( sort keys %{$input_sequences} ) {
+        my $sequence = $input_sequences->{$sequence_name}->seq();
+        $sequence =~ s/NNN$//i;
+        $input_sequences->{$sequence_name}->seq($sequence);
+    }
+    return $input_sequences;
+}
+
+sub sort_fasta {
+    my ($self) = @_;
+
+    my %input_sequences;
+
+    my $nnn_at_end_of_all_sequences = 1;
+	my $sequence;
+	my $variation_detected = 0;
+    while ( my $input_seq = $self->_input_seqio->next_seq() ) {
+		$sequence = $input_seq->seq if(!defined($sequence));
+        $self->_add_padding_to_make_sequence_length_multiple_of_three($input_seq) if ( $self->make_multiple_of_three );
+        $nnn_at_end_of_all_sequences = 0 if ( $nnn_at_end_of_all_sequences == 1 && !( $input_seq->seq() =~ /NNN$/i ) );
+        $input_sequences{ $input_seq->display_id } = $input_seq;
+        
+		my $factor = $self->_percentage_similarity($sequence, $input_seq->seq);
+        if($factor < $self->similarity)
+        {
+            $self->similarity($factor);
+        }
+    }
+
+    $self->_remove_nnn_from_all_sequences( \%input_sequences ) if ( $self->remove_nnn_from_end && $nnn_at_end_of_all_sequences );
+
+    my $sequence_length = 0;
+    my $sequences_unaligned = 0;
+    for my $sequence_name ( sort keys %input_sequences ) {
+        $sequence_length = $input_sequences{$sequence_name}->length if($sequence_length == 0);
+        $self->sequences_unaligned(1) if($input_sequences{$sequence_name}->length != $sequence_length);
+        $self->_output_seqio->write_seq( $input_sequences{$sequence_name} );
+    }
+    return $self;
+}
+
+sub replace_input_with_output_file {
+    my ($self) = @_;
+    move( $self->output_filename, $self->input_filename );
+    return $self;
+}
+
+sub _percentage_similarity
+{
+    my ($self, $string1, $string2) = @_;
+    my $num_differences = 0;
+    my $string1_length = length($string1);
+    for(my $i = 0; $i < $string1_length && $i< length($string2); $i++)
+    {
+        $num_differences++ if( substr($string1, $i, 1) ne substr($string2, $i, 1));
+    }
+    return 1 if($num_differences == 0);
+    return 0 if($string1_length == 0);
+    return (1.0 - ($num_differences/$string1_length));
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;
diff --git a/lib/Bio/Roary/SplitGroups.pm b/lib/Bio/Roary/SplitGroups.pm
new file mode 100644
index 0000000..0c2a991
--- /dev/null
+++ b/lib/Bio/Roary/SplitGroups.pm
@@ -0,0 +1,324 @@
+package Bio::Roary::SplitGroups;
+
+# ABSTRACT: 
+
+=head1 SYNOPSIS
+
+	use Bio::Roary::SplitGroups;
+
+=cut
+
+use Moose;
+use Bio::Roary::AnalyseGroups;
+use File::Path qw(make_path remove_tree);
+use File::Copy qw(move);
+use File::Temp;
+use File::Basename;
+use File::Slurper 'read_lines';
+use Cwd;
+
+
+has 'groupfile'   => ( is => 'ro', isa => 'Str',      required => 1 );
+has 'fasta_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
+has 'outfile'     => ( is => 'ro', isa => 'Str',      required => 1 );
+has 'iterations'  => ( is => 'ro', isa => 'Int',      default  => 5 );
+has 'dont_delete' => ( is => 'ro', isa => 'Bool',     default  => 0 );
+
+has '_neighbourhood_size' => ( is => 'ro', isa => 'Int', default => 5 );
+
+has '_group_filelist'  => ( is => 'rw', isa => 'ArrayRef', lazy_build => 1 );
+has '_tmp_dir_object' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
+has '_tmp_dir'        => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__tmp_dir' );
+
+has '_analyse_groups_obj' => ( is => 'ro', lazy_build => 1 );
+has '_genes_to_files'     => ( is => 'ro', lazy_build => 1 );
+has '_genes_to_groups'    => ( is => 'rw', isa => 'HashRef' );
+
+has '_first_gene_of_group_which_doesnt_have_paralogs'    => ( is => 'rw', isa => 'HashRef', default => sub {{}} );
+
+has '_genes_to_neighbourhood' => ( is => 'rw', isa => 'HashRef', lazy => 1, builder => '_build__genes_to_neighbourhood' );
+
+
+has '_gene_files_temp_dir_obj' =>
+  ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
+
+
+has '_do_sorting' => ( is => 'rw', isa => 'Bool', default => 0 ); # set to 1 for testing only
+
+sub _build__tmp_dir {
+    my ($self) = @_;
+    return $self->_tmp_dir_object->dirname();
+}
+
+sub _build__analyse_groups_obj {
+	my ( $self ) = @_;
+	
+	return Bio::Roary::AnalyseGroups->new(
+		fasta_files     => $self->fasta_files,
+		groups_filename => $self->groupfile
+	);
+}
+
+sub _build__genes_to_files {
+	my ( $self ) = @_;
+	return $self->_analyse_groups_obj->_genes_to_file;
+}
+
+sub _build__group_filelist {
+	my ( $self ) = @_;
+	my $tmp = $self->_tmp_dir;
+
+	my @filelist = ( $self->groupfile );
+	for my $i ( 1..($self->iterations - 1) ){
+		push( @filelist, "$tmp/group_$i" );
+	}
+	push( @filelist, $self->outfile );
+
+	return \@filelist;
+}
+
+sub _build__genes_to_neighbourhood
+{
+  my ( $self ) = @_;
+  my %genes_to_neighbourhood;
+  for my $fasta_file( @{$self->fasta_files})
+  {
+	my ( $filename, $directories, $suffix ) = fileparse( $fasta_file, qr/\.[^.]*/ );
+  	system('grep \> '.$fasta_file.'| sed  \'s/>//\' >'.$self->_gene_files_temp_dir_obj."/".$filename.$suffix ) ;
+	
+	my @genes = read_lines($self->_gene_files_temp_dir_obj."/".$filename.$suffix );
+	
+	for(my $i =0; $i< @genes; $i++)
+	{
+		for(my $offset = 1; $offset <= $self->_neighbourhood_size; $offset++)
+		{
+			if($i -$offset >= 0)
+			{
+			   push(@{$genes_to_neighbourhood{$genes[$i]}}, $genes[$i - $offset ]);
+		    }
+			if($i +$offset <@genes)
+			{
+			   push(@{$genes_to_neighbourhood{$genes[$i]}}, $genes[$i + $offset ]);
+		    }
+		}
+	}
+  }
+  return \%genes_to_neighbourhood;
+}
+
+sub split_groups {
+	my ( $self ) = @_;
+
+	# iteratively
+	for my $x ( 0..($self->iterations - 1) ){
+		my ( $in_groups, $out_groups ) = $self->_get_files_for_iteration( $x ); 
+
+		# read in groups, check paralogs and split
+		my @newgroups;
+		my $any_paralogs = 0;
+		$self->_set_genes_to_groups( $in_groups );
+		open( my $group_handle, '<', $in_groups );
+		while( my $line = <$group_handle> ){
+			my @group = split( /\s+/, $line );
+
+			if($self->_first_gene_of_group_which_doesnt_have_paralogs->{$group[0]})
+			{
+				push( @newgroups, \@group );
+			}
+			elsif(@group == 1)
+			{
+				$self->_first_gene_of_group_which_doesnt_have_paralogs->{$group[0]}++;
+				push( @newgroups, \@group );
+			}
+			elsif( $self->_contains_paralogs( \@group ) ){
+				my @true_orthologs = @{ $self->_true_orthologs( \@group ) };
+				push( @newgroups,  @true_orthologs);
+				$any_paralogs = 1;
+			}
+			else {
+				$self->_first_gene_of_group_which_doesnt_have_paralogs->{$group[0]}++;
+				push( @newgroups, \@group );
+			}
+		}
+		close( $group_handle );
+
+		# check if next iteration required, move output if not
+		unless ($any_paralogs){
+			move $in_groups, $self->outfile; # input file will be the same as new output file if no splitting has been performed
+			last;
+		}
+
+		# write split groups to file
+		open( my $outfile_handle, '>', $out_groups );
+		for my $g ( @newgroups ) {
+			my $group_str = join( "\t", @{ $g } ) . "\n";
+			print $outfile_handle $group_str;
+		}
+		close( $outfile_handle );
+	}
+}
+
+sub _set_genes_to_groups {
+	my ( $self, $groupfile ) = @_;
+
+	my %genes2groups;
+	my $c = 0;
+	open( my $gfh, '<', $groupfile );
+	while( my $line = <$gfh> ){
+		chomp $line;
+		my @genes = split( /\s+/, $line );
+		for my $g ( @genes ){
+			$genes2groups{$g} = $c;
+		}
+		$c++;
+	}
+    close($gfh);
+	$self->_genes_to_groups( \%genes2groups );
+}
+
+sub _update_genes_to_groups {
+	my ( $self, $groups ) = @_;
+
+	my %genes2groups = %{ $self->_genes_to_groups };
+	my $c = 1;
+	for my $g ( @{ $groups } ){
+		for my $h ( @{ $g } ){
+			$genes2groups{$h} .= ".$c";
+		}
+		$c++;
+	}
+
+	$self->_genes_to_groups( \%genes2groups );
+}
+
+sub _get_files_for_iteration {
+	my ( $self, $n ) = @_;
+	my @filelist = @{ $self->_group_filelist };
+	return ( $filelist[$n], $filelist[$n+1] );
+}
+
+sub _contains_paralogs {
+	my ( $self, $group ) = @_;
+
+	return 1 if defined $self->_find_paralogs( $group );
+	return 0;
+}
+
+sub _find_paralogs {
+	my ( $self, $group ) = @_;
+
+	my %occ;
+	for my $gene ( @{ $group } ){
+		my $gene_file = $self->_genes_to_files->{ $gene };
+		push( @{ $occ{$gene_file} }, $gene );
+	}
+
+	# pick the smallest number of paralogs
+	my $smallest_number = 1000000;
+	my $smallest_group;
+	for my $v ( values %occ ){
+		my $v_len = scalar( @{$v} );
+		if ( $v_len < $smallest_number && $v_len > 1 ){
+			$smallest_number = $v_len;
+			$smallest_group  = $v;
+		}
+	}
+	return $smallest_group if ( defined $smallest_group );
+
+	return undef;
+}
+
+sub _true_orthologs {
+	my ( $self, $group ) = @_;
+
+	# first, create CGN hash for group
+	my %cgns;
+	for my $g ( @{ $group } ){
+		$cgns{$g} = $self->_parse_gene_neighbourhood( $g );
+	}
+
+	# finding paralogs in the group
+	my @paralogs = @{ $self->_find_paralogs( $group ) };
+	my @paralog_cgns_groups;
+	for my $p ( @paralogs ){
+		my %paralog_groups ;
+		for my $paralog_gene (@{$cgns{$p}})
+		{
+			my $gene_paralog_group = $self->_genes_to_groups->{$paralog_gene};
+			next unless( defined($gene_paralog_group));
+			$paralog_groups{$self->_genes_to_groups->{$paralog_gene}}++;
+		}
+		push( @paralog_cgns_groups, \%paralog_groups );
+	}
+
+	# create data structure to hold new groups
+	my @new_groups;
+	for my $p ( @paralogs ){
+		push( @new_groups, [ $p ] );
+	}
+	push( @new_groups, [] ); # extra "leftovers" array to gather genes that don't share CGN with anything
+
+	# cluster other members of the group to their closest match
+	for my $g ( @{ $group } ){
+		next if ( grep {$_ eq $g} @paralogs );
+		my $closest = $self->_closest_cgn( $cgns{$g}, \@paralog_cgns_groups );
+		push( @{ $new_groups[$closest] }, $g );
+	}
+
+	# check for "leftovers", remove if absent
+	my $last = pop @new_groups;
+	push( @new_groups, $last ) if ( @$last > 0 );
+
+	# sort
+	if ( $self->_do_sorting ){
+		my @sorted_new_groups;
+		for my $gr ( @new_groups ){
+			my @s_gr = sort @{ $gr };
+			push( @sorted_new_groups, \@s_gr );
+		}
+		return \@sorted_new_groups;
+	}
+
+	return \@new_groups;
+}
+
+sub _closest_cgn {
+	my ( $self, $cgn, $p_cgns ) = @_;
+
+	my @paralog_cgns = @{ $p_cgns };
+	my $best_score = 0;
+	my $bs_index = -1; # return -1 to add to "leftovers" array if no better score is found
+	for my $i ( 0..$#paralog_cgns ){
+		my $p_cgn = $paralog_cgns[$i];
+		my $score = $self->_shared_cgn_score( $cgn, $p_cgn );
+		if ( $score > $best_score ){
+			$best_score = $score;
+			$bs_index   = $i;
+		}
+	}
+	return $bs_index;
+}
+
+sub _shared_cgn_score {
+	my ( $self, $cgn1, $cgn2 ) = @_;
+
+	my $total_shared = 0;
+	for my $i ( @{ $cgn1 } ){
+		my $input_group = $self->_genes_to_groups->{$i};
+		next unless(defined($input_group));
+		$total_shared++ if($cgn2->{$input_group});
+	}
+	my $score = $total_shared/scalar @{ $cgn1 };
+	return $score;
+}
+
+sub _parse_gene_neighbourhood {
+	my ( $self, $gene_id ) = @_;
+
+    return $self->_genes_to_neighbourhood->{$gene_id };
+
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+1;
\ No newline at end of file
diff --git a/lib/Bio/Roary/SpreadsheetRole.pm b/lib/Bio/Roary/SpreadsheetRole.pm
new file mode 100644
index 0000000..467f47a
--- /dev/null
+++ b/lib/Bio/Roary/SpreadsheetRole.pm
@@ -0,0 +1,66 @@
+package Bio::Roary::SpreadsheetRole;
+
+# ABSTRACT: Read and write a spreadsheet
+
+=head1 SYNOPSIS
+
+with 'Bio::Roary::SpreadsheetRole';
+
+=cut
+use Moose::Role;
+
+has 'spreadsheet'            => ( is => 'ro', isa  => 'Str',      required => 1 );
+has '_fixed_headers'         => ( is => 'ro', isa  => 'ArrayRef', lazy    => 1, builder => '_build__fixed_headers' );
+has '_input_spreadsheet_fh'  => ( is => 'ro', lazy => 1,          builder => '_build__input_spreadsheet_fh' );
+has '_output_spreadsheet_fh' => ( is => 'ro', lazy => 1,          builder => '_build__output_spreadsheet_fh' );
+has '_fixed_headers'         => ( is => 'ro', isa  => 'ArrayRef', lazy    => 1, builder => '_build__fixed_headers' );
+has '_num_fixed_headers'     => ( is => 'ro', isa  => 'Int',      lazy    => 1, builder => '_build__num_fixed_headers' );
+has '_csv_parser'            => ( is => 'ro', isa  => 'Text::CSV',lazy    => 1, builder => '_build__csv_parser' );
+has '_csv_output'            => ( is => 'ro', isa  => 'Text::CSV',lazy    => 1, builder => '_build__csv_output' );
+
+sub BUILD
+{
+	my ($self) = @_;
+	$self->_input_spreadsheet_fh;
+}
+
+sub _build__fixed_headers
+{
+  my ($self) = @_;
+  my @fixed_headers = @{Bio::Roary::GroupStatistics->fixed_headers()};
+  return \@fixed_headers;
+}
+
+sub _build__csv_parser
+{
+  my ($self) = @_;
+  return Text::CSV->new( { binary => 1, always_quote => 1} );
+}
+
+sub _build__csv_output
+{
+  my ($self) = @_;
+  return Text::CSV->new( { binary => 1, always_quote => 1, eol => "\r\n"} );
+}
+
+sub _build__input_spreadsheet_fh {
+    my ($self) = @_;
+    open( my $fh, $self->spreadsheet ) or die "Couldnt open input spreadsheet: ".$self->spreadsheet ;
+    return $fh;
+}
+
+sub _build__output_spreadsheet_fh {
+    my ($self) = @_;
+    open( my $fh, '>', $self->output_filename );
+    return $fh;
+}
+
+sub _build__num_fixed_headers
+{
+  my ($self) = @_;
+  return @{$self->_fixed_headers};
+}
+
+
+
+1;
diff --git a/t/00_requires_external.t b/t/00_requires_external.t
new file mode 100755
index 0000000..b668039
--- /dev/null
+++ b/t/00_requires_external.t
@@ -0,0 +1,20 @@
+#!/usr/bin/env perl
+
+use Test::Most;
+use FindBin;
+plan tests => 8;
+bail_on_fail if 0;
+use Env::Path 'PATH';
+
+
+my $OPSYS = $^O;
+my $BINDIR = "$FindBin::RealBin/../binaries/$OPSYS";
+
+for my $dir ($BINDIR, $FindBin::RealBin) {
+    if (-d $dir) {
+      $ENV{PATH} .= ":$dir";
+     }
+}
+
+ok(scalar PATH->Whence($_), "$_ in PATH") for qw(blastp makeblastdb mcl mcxdeblast bedtools prank parallel mafft);
+
diff --git a/t/Bio/Roary/AccessoryBinaryFasta.t b/t/Bio/Roary/AccessoryBinaryFasta.t
new file mode 100755
index 0000000..435839b
--- /dev/null
+++ b/t/Bio/Roary/AccessoryBinaryFasta.t
@@ -0,0 +1,72 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Test::Files;
+
+BEGIN { unshift( @INC, './lib' ) }
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::AccessoryBinaryFasta');
+}
+
+my $dummy_annotate_groups = Bio::Roary::AnnotateGroups->new(
+  gff_files   => ['t/data/query_1.gff','t/data/query_2.gff','t/data/query_3.gff'],
+  groups_filename => 't/data/query_groups',
+);
+
+my $dummy_analyse_groups = Bio::Roary::AnalyseGroups->new(
+    fasta_files     => ['t/data/query_1.fa','t/data/query_2.fa','t/data/query_3.fa'],
+    groups_filename => 't/data/query_groups'
+);
+
+
+
+ok(
+    my $obj = Bio::Roary::AccessoryBinaryFasta->new(
+        input_files => [ 't/abc/aaa', 't/abc/bbb', 't/abc/ccc', 't/abc/ddd' ],
+        groups_to_files => 
+		{
+            group_1 => { 't/abc/aaa' => [1] },
+            group_2 => { 't/abc/aaa' => [1], 't/abc/bbb' => [2] },
+            group_3 => { 't/abc/aaa' => [1], 't/abc/bbb' => [2], 't/abc/ccc' => [3] },
+            group_4 => { 't/abc/aaa' => [1], 't/abc/bbb' => [2], 't/abc/ccc' => [3], 't/abc/ddd' => [4] },
+        },
+		_lower_bound_value  => 0,
+		_upper_bound_value  => 4,
+		annotate_groups_obj => $dummy_annotate_groups,
+		analyse_groups_obj  => $dummy_analyse_groups
+    ),
+    'initialise accessory binary fasta file'
+);
+
+ok( $obj->create_accessory_binary_fasta(), 'create output file' );
+
+compare_ok( 'accessory_binary_genes.fa', 't/data/expected_accessory_binary_genes.fa','binary accessory fasta file created');
+
+
+ok(
+    $obj = Bio::Roary::AccessoryBinaryFasta->new(
+        input_files => [ 'aaa', 'bbb', 'ccc', 'ddd' ],
+        groups_to_files => 
+		{
+            group_1 => { 'aaa' => [1] },
+            group_2 => { 'aaa' => [1], 'bbb' => [2] },
+            group_3 => { 'aaa' => [1], 'bbb' => [2], 'ccc' => [3] },
+            group_4 => { 'aaa' => [1], 'bbb' => [2], 'ccc' => [3], 'ddd' => [4] },
+        },
+		annotate_groups_obj => $dummy_annotate_groups,
+		analyse_groups_obj  => $dummy_analyse_groups
+    ),
+    'initialise accessory binary fasta file bounded'
+);
+
+is($obj->_lower_bound_value, 1, 'lower bound value');
+is($obj->_upper_bound_value, 3, 'upper bound value');
+ok( $obj->create_accessory_binary_fasta(), 'create output file bounded' );
+
+compare_ok( 'accessory_binary_genes.fa', 't/data/expected_accessory_binary_genes_bounded.fa','binary accessory fasta file created bounded');
+
+
+done_testing();
diff --git a/t/Bio/Roary/AccessoryClustering.t b/t/Bio/Roary/AccessoryClustering.t
new file mode 100755
index 0000000..c9a3333
--- /dev/null
+++ b/t/Bio/Roary/AccessoryClustering.t
@@ -0,0 +1,104 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+
+BEGIN { unshift( @INC, './lib' ) }
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::AccessoryClustering');
+}
+
+my $identity_to_num_clusters = {
+    '1'    => [ 10, 10 ],
+    '0.99' => [ 4,  5 ],
+    '0.95' => [ 2,  4 ],
+    '0.90' => [ 1,  1 ],
+};
+
+for my $percentage_identity ( keys %{$identity_to_num_clusters} ) {
+    ok(
+        my $obj = Bio::Roary::AccessoryClustering->new(
+            input_file => 't/data/input_accessory_binary.fa',
+            identity   => $percentage_identity
+        ),
+        "initialise object with identity of $percentage_identity"
+    );
+    ok( my @clusters = keys %{ $obj->clusters_to_samples }, "build the clusters for $percentage_identity" );
+    ok( $obj->sample_weights,      "build samples weights for $percentage_identity" );
+    ok( $obj->samples_to_clusters, "build samples to clusters for $percentage_identity" );
+
+    my $min_cluster_size = $identity_to_num_clusters->{$percentage_identity}->[0];
+    my $max_cluster_size = $identity_to_num_clusters->{$percentage_identity}->[1];
+    ok(
+        ( @clusters >= $min_cluster_size && @clusters <= $max_cluster_size ? 1 : 0 ),
+        "check number of clusters as expected, allowing for some variation for $percentage_identity"
+    );
+}
+
+my $obj = Bio::Roary::AccessoryClustering->new(
+    input_file => 't/data/input_accessory_binary.fa',
+    identity   => 0.9
+);
+is_deeply(
+    $obj->samples_to_clusters,
+    {
+        'seq6'  => 'seq1',
+        'seq3'  => 'seq1',
+        'seq7'  => 'seq1',
+        'seq9'  => 'seq1',
+        'seq10' => 'seq1',
+        'seq2'  => 'seq1',
+        'seq8'  => 'seq1',
+        'seq1'  => 'seq1',
+        'seq4'  => 'seq1',
+        'seq5'  => 'seq1'
+    },
+    'samples to clusters'
+);
+my @sample_weights = values %{ $obj->sample_weights };
+is_deeply( \@sample_weights, [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1 ], 'sample weights' );
+
+$obj = Bio::Roary::AccessoryClustering->new(
+    input_file => 't/data/input_accessory_binary.fa',
+    identity   => 1
+);
+
+is_deeply(
+    $obj->samples_to_clusters,
+    {
+        'seq6'  => 'seq6',
+        'seq3'  => 'seq3',
+        'seq7'  => 'seq7',
+        'seq9'  => 'seq9',
+        'seq10' => 'seq10',
+        'seq2'  => 'seq2',
+        'seq8'  => 'seq8',
+        'seq1'  => 'seq1',
+        'seq4'  => 'seq4',
+        'seq5'  => 'seq5'
+    },
+    'samples to clusters'
+);
+ at sample_weights = values %{ $obj->sample_weights };
+is_deeply( \@sample_weights, [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ], 'sample weights' );
+
+
+
+$obj = Bio::Roary::AccessoryClustering->new(
+    input_file => 't/data/large_accessory_binary_genes.fa'
+);
+
+ok( my @clusters = keys %{ $obj->clusters_to_samples }, "build the clusters for large_accessory_binary_genes.fa" );
+ok( $obj->sample_weights,      "build samples weights for large_accessory_binary_genes.fa" );
+ok( $obj->samples_to_clusters, "build samples to clusters for large_accessory_binary_genes.fa" );
+
+ok(
+    ( @clusters >= 6 && @clusters <= 14 ? 1 : 0 ),
+    "check number of clusters as expected, allowing for some variation for large_accessory_binary_genes.fa"
+);
+
+unlink('_accessory_clusters');
+unlink('_accessory_clusters.clstr');
+done_testing();
diff --git a/t/Bio/Roary/AnalyseGroups.t b/t/Bio/Roary/AnalyseGroups.t
new file mode 100644
index 0000000..051a3e3
--- /dev/null
+++ b/t/Bio/Roary/AnalyseGroups.t
@@ -0,0 +1,70 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::AnalyseGroups');
+}
+
+ok(
+    my $plot_groups_obj = Bio::Roary::AnalyseGroups->new(
+        fasta_files     => [ 't/data/example_1.faa', 't/data/example_2.faa' ],
+        groups_filename => 't/data/example_groups'
+    ),
+    'initialise with two fasta files'
+);
+
+is( $plot_groups_obj->_number_of_isolates, 2, 'Number of isolates' );
+
+is_deeply(
+    $plot_groups_obj->_genes_to_file,
+    {
+        '1234#10_00003' => 't/data/example_1.faa',
+        '1234#10_00017' => 't/data/example_2.faa',
+        '1234#10_00001' => 't/data/example_1.faa',
+        '1234#10_00016' => 't/data/example_2.faa',
+        '1234#10_00007' => 't/data/example_1.faa',
+        '1234#10_00006' => 't/data/example_1.faa',
+        '1234#10_00018' => 't/data/example_2.faa',
+        '1234#10_00005' => 't/data/example_1.faa',
+        '1234#10_00002' => 't/data/example_1.faa'
+    },
+    'genes map to the correct files'
+);
+
+
+is_deeply(
+    $plot_groups_obj->_groups_to_genes,
+    {
+        'group_3' => [ '1234#10_00005', '1234#10_00005' ],
+        'group_5' => [ '1234#10_00016' ],
+        'group_4' => [ '1234#10_00006', '1234#10_00007' ],
+        'group_6' => [ '1234#10_00017' ],
+        'group_1' => [ '1234#10_00001', '1234#10_00002' ],
+        'group_2' => [ '1234#10_00003', '1234#10_00018', '1234#10_00005' ]
+    },
+    'Groups to genes hash'
+);
+
+is_deeply(
+    $plot_groups_obj->_genes_to_groups,
+    {
+        '1234#10_00003' => 'group_2',
+        '1234#10_00017' => 'group_6',
+        '1234#10_00001' => 'group_1',
+        '1234#10_00016' => 'group_5',
+        '1234#10_00007' => 'group_4',
+        '1234#10_00006' => 'group_4',
+        '1234#10_00018' => 'group_2',
+        '1234#10_00005' => 'group_3',
+        '1234#10_00002' => 'group_1'
+    },
+    'genes to groups hash'
+);
+
+done_testing();
diff --git a/t/Bio/Roary/AnnotateGroups.t b/t/Bio/Roary/AnnotateGroups.t
new file mode 100755
index 0000000..8b205da
--- /dev/null
+++ b/t/Bio/Roary/AnnotateGroups.t
@@ -0,0 +1,102 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Moose;
+BEGIN { unshift( @INC, './t/lib' ) }
+with 'TestHelper';
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::AnnotateGroups');
+}
+
+my $obj;
+
+ok(
+    $obj = Bio::Roary::AnnotateGroups->new(
+        gff_files       => [ 't/data/query_1.gff', 't/data/query_2.gff', 't/data/query_3.gff' ],
+        groups_filename => 't/data/query_groups',
+    ),
+    'initalise'
+);
+
+ok( $obj->reannotate, 'reannotate' );
+is_deeply(
+    $obj->_ids_to_gene_size,
+    {
+        'abc_00012' => 188,
+        '2_3'       => 1001,
+        '1_1'       => 959,
+        'abc_00004' => 716,
+        '3_3'       => 1001,
+        '3_2'       => 725,
+        '2_2'       => 725,
+        'abc_00006' => 725,
+        'abc_00008' => 935,
+        '1_6'       => 134,
+        'abc_00015' => 134,
+        '3_1'       => 959,
+        'abc_00014' => 134,
+        'abc_01705' => 1556,
+        'abc_00013' => 75,
+        'abc_00010' => 227,
+        '1_2'       => 725,
+        'abc_00011' => 947,
+        'abc_00016' => 686,
+        '2_7'       => 134,
+        '1_3'       => 1001,
+        '2_1'       => 959,
+        '3_5'       => 686,
+        'abc_00002' => 146,
+        'abc_00003' => 197
+    },
+    'gene lengths as expected'
+);
+
+is_deeply(
+    $obj->group_nucleotide_lengths,
+    {
+        'group_3' => {
+            'average' => 1001,
+            'min'     => 1001,
+            'max'     => 1001
+        },
+        'group_5' => {
+            'average' => 686,
+            'min'     => 686,
+            'max'     => 686
+        },
+        'group_7' => {
+            'average' => 134,
+            'min'     => 134,
+            'max'     => 134
+        },
+        'group_1' => {
+            'average' => 959,
+            'min'     => 959,
+            'max'     => 959
+        },
+        'group_6' => {
+            'average' => 134,
+            'min'     => 134,
+            'max'     => 134
+        },
+        'group_2' => {
+            'average' => 725,
+            'min'     => 725,
+            'max'     => 725
+        }
+    },
+    'group lengths'
+);
+
+compare_files( 'reannotated_groups_file', 't/data/expected_reannotated_groups_file', 'groups reannotated as expected' );
+
+unlink('reannotated_groups_file');
+
+done_testing();
+
diff --git a/t/Bio/Roary/AssemblyStatistics.t b/t/Bio/Roary/AssemblyStatistics.t
new file mode 100644
index 0000000..0bd8925
--- /dev/null
+++ b/t/Bio/Roary/AssemblyStatistics.t
@@ -0,0 +1,121 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Test::Files;
+
+BEGIN { unshift( @INC, './lib' ) }
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::AssemblyStatistics');
+}
+
+my $obj;
+ok( $obj = Bio::Roary::AssemblyStatistics->new( spreadsheet => 't/data/input_block_spreadsheet.csv' ), 'initialise spreadsheet' );
+
+my @genes = sort keys %{ $obj->_genes_to_rows };
+is_deeply(
+    \@genes,
+    [
+        'SBOV29371', 'SBOV38871', 'SBOV43201',  'STY3593',    'STY4162',    'bcsC_1',     'betC_2',     'comM_2',
+        'dmsA4_1',   'dosC',      'dsbA_3',     'fadH_1',     'fimD_3',     'fliB_2',     'fliF',       'ftsN',
+        'gatY_1',    'glfT2',     'group_1000', 'group_1001', 'group_1004', 'group_1006', 'group_1009', 'group_220',
+        'group_277', 'group_281', 'group_283',  'group_284',  'group_288',  'hemD',       'hsrA_2',     'icsA',
+        'kdpD',      'ligB_1',    'marT_1',     'nepI',       'rffH',       'rpoS',       'selA_1',     'speC_3',
+        'sptP',      'srgB',      'stp',        'tmcA',       'tub',        'yadA',       'ybbW_1',     'yhaO_2',
+        'yicJ_1',    'yigZ'
+    ],
+    'all gene rows available'
+);
+
+is_deeply(
+    $obj->ordered_genes,
+    [
+        'dmsA4_1',    'group_1000', 'group_1001', 'SBOV43201', 'dosC',      'stp',    'fliB_2', 'fliF',
+        'dsbA_3',     'srgB',       'fimD_3',     'betC_2',    'tmcA',      'tub',    'rffH',   'hemD',
+        'group_1006', 'STY3593',    'group_1004', 'yigZ',      'group_220', 'glfT2',  'kdpD',   'speC_3',
+        'ybbW_1',     'sptP',       'SBOV29371',  'rpoS',      'fadH_1',    'yhaO_2', 'bcsC_1', 'STY4162',
+        'yadA',       'ligB_1',     'icsA',       'marT_1',    'selA_1',    'nepI',   'gatY_1', 'SBOV38871',
+        'group_288',  'hsrA_2',     'group_281',  'group_283', 'group_284', 'yicJ_1', 'ftsN',   'group_277',
+        'group_1009', 'comM_2'
+    ],
+    'ordered genes'
+);
+
+is_deeply(
+    $obj->sample_names_to_column_index,
+    {
+        'threeblocks'          => 18,
+        'nocontigs'            => 17,
+        'contigwithgaps'       => 16,
+        'oneblock'             => 14,
+        'threeblocksinversion' => 19,
+        'oneblockrev'          => 15
+    },
+    'sample names to column index'
+);
+
+is_deeply( $obj->_sample_statistics('oneblock'),    { num_blocks => 1, largest_block_size => 50 }, 'one block' );
+is_deeply( $obj->_sample_statistics('oneblockrev'), { num_blocks => 1, largest_block_size => 50 }, 'one block reversed' );
+is_deeply(
+    $obj->_sample_statistics('contigwithgaps'),
+    { num_blocks => 1, largest_block_size => 50 },
+    'one block where there are gaps everywhere'
+);
+is_deeply( $obj->_sample_statistics('nocontigs'),   { num_blocks => 50, largest_block_size => 1 },  'no contiguous blocks' );
+is_deeply( $obj->_sample_statistics('threeblocks'), { num_blocks => 3,  largest_block_size => 21 }, 'three blocks' );
+is_deeply(
+    $obj->_sample_statistics('threeblocksinversion'),
+    { num_blocks => 3, largest_block_size => 20 },
+    'three blocks with an inversion in the middle'
+);
+is_deeply( $obj->gene_category_count, { core => 50 }, 'Gene category counts' );
+
+# t/data/gene_category_count.csv
+ok( $obj = Bio::Roary::AssemblyStatistics->new( spreadsheet => 't/data/gene_category_count.csv' ),
+    'initialise spreadsheet with variable numbers of genes in samples' );
+is_deeply(
+    $obj->gene_category_count,
+    {
+        'core'      => 1,
+        'cloud'     => 4,
+        'soft_core' => 1,
+        'shell'     => 24
+    },
+    'Categories as expected'
+);
+ok($obj->create_summary_output, 'create output file');
+compare_ok('summary_statistics.txt', 't/data/expected_summary_statistics.txt', 'summary statistics as expected');
+
+
+# t/data/gene_category_count.csv
+ok( $obj = Bio::Roary::AssemblyStatistics->new( spreadsheet => 't/data/gene_category_count.csv', core_definition => 0.9667 ),
+    'initialise spreadsheet with core of 96.67%' );
+is_deeply(
+    $obj->gene_category_count,
+    {
+        'core'      => 1,
+		'soft_core' => 1,
+        'cloud'     => 4,
+        'shell'     => 24
+    },
+    'Categories as expected with cd of 96.67%'
+);
+
+# t/data/gene_category_count.csv
+ok( $obj = Bio::Roary::AssemblyStatistics->new( spreadsheet => 't/data/gene_category_count.csv', core_definition => 0.9666 ),
+    'initialise spreadsheet with core of 96.66%' );
+is_deeply(
+    $obj->gene_category_count,
+    {
+        'core'      => 2,
+        'cloud'     => 4,
+        'shell'     => 24
+    },
+    'Categories as expected with cd of 96.66%'
+);
+
+
+unlink('summary_statistics.txt');
+done_testing();
diff --git a/t/Bio/Roary/ChunkFastaFile.t b/t/Bio/Roary/ChunkFastaFile.t
new file mode 100644
index 0000000..fdd4d62
--- /dev/null
+++ b/t/Bio/Roary/ChunkFastaFile.t
@@ -0,0 +1,41 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Test::Files;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::ChunkFastaFile');
+}
+
+my $obj;
+
+
+ok($obj = Bio::Roary::ChunkFastaFile->new(
+  fasta_file   => 't/data/example_1.faa',
+),'initalise object to produce a single sequence file');
+is_deeply($obj->sequence_file_names, [$obj->_working_directory_name.'/0.seq'], 'a single sequence file is created' );
+compare_ok('t/data/example_1.faa', $obj->_working_directory_name.'/0.seq', 'input and output file should be the same');
+
+ok($obj = Bio::Roary::ChunkFastaFile->new(
+  fasta_file        => 't/data/example_1.faa',
+  target_chunk_size => 1,
+),'initalise object to produce one file per sequence');
+is_deeply($obj->sequence_file_names, [
+  $obj->_working_directory_name.'/0.seq',
+$obj->_working_directory_name.'/1.seq',
+$obj->_working_directory_name.'/2.seq',
+$obj->_working_directory_name.'/3.seq',
+$obj->_working_directory_name.'/4.seq',
+$obj->_working_directory_name.'/5.seq',
+], 
+'a sequence file per sequence is created' );
+compare_ok('t/data/expected_0.seq',$obj->_working_directory_name.'/0.seq', 'the first sequence file is as expected');
+compare_ok('t/data/expected_5.seq', $obj->_working_directory_name.'/5.seq', 'the last sequence file is as expected');
+
+
+done_testing();
diff --git a/t/Bio/Roary/CombinedProteome.t b/t/Bio/Roary/CombinedProteome.t
new file mode 100644
index 0000000..ccbeb40
--- /dev/null
+++ b/t/Bio/Roary/CombinedProteome.t
@@ -0,0 +1,39 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Test::Files;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::CombinedProteome');
+}
+
+my $obj;
+
+ok(
+    $obj = Bio::Roary::CombinedProteome->new(
+        proteome_files  => [ 't/data/example_1.faa', 't/data/example_2.faa' ],
+        output_filename => 'combined_proteome.fa'
+    ),
+    'initalise object with two files'
+);
+
+ok( $obj->create_combined_proteome_file, 'Create a combined file' );
+
+compare_ok('combined_proteome.fa',
+    't/data/expected_combined_proteome.fa',
+    'Combined file is as expected'
+);
+unlink('combined_proteome.fa');
+
+throws_ok{
+    Bio::Roary::CombinedProteome->new(
+        proteome_files  => [ 't/data/example_1.faa', 't/data/non_existant_file.faa' ],
+        output_filename => 'combined_proteome.fa')
+    } qr /Cant open file/, 'non existant files should throw an error';
+
+done_testing();
diff --git a/t/Bio/Roary/CommandLine/ExtractProteomeFromGff.t b/t/Bio/Roary/CommandLine/ExtractProteomeFromGff.t
new file mode 100644
index 0000000..76e7472
--- /dev/null
+++ b/t/Bio/Roary/CommandLine/ExtractProteomeFromGff.t
@@ -0,0 +1,28 @@
+#!/usr/bin/env perl
+use Moose;
+use Data::Dumper;
+use Cwd;
+
+BEGIN { unshift( @INC, './lib' ) }
+BEGIN { unshift( @INC, './t/lib' ) }
+with 'TestHelper';
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::CommandLine::ExtractProteomeFromGff');
+}
+my $script_name = 'Bio::Roary::CommandLine::ExtractProteomeFromGff';
+my $cwd         = getcwd();
+system('touch empty_file');
+my %scripts_and_expected_files = (
+    't/data/example_annotation.gff' =>
+      ['example_annotation.gff.proteome.faa','t/data/example_annotation.gff.proteome.faa.expected' ],
+      '-t 1 t/data/example_annotation.gff' =>
+        ['example_annotation.gff.proteome.faa','t/data/example_annotation.gff.proteome.faa.expected' ],
+      '-h' =>
+        [ 'empty_file', 't/data/empty_file' ],
+);
+
+mock_execute_script_and_check_output( $script_name, \%scripts_and_expected_files );
+
+done_testing();
diff --git a/t/Bio/Roary/CommandLine/GeneAlignmentFromNucleotides.t b/t/Bio/Roary/CommandLine/GeneAlignmentFromNucleotides.t
new file mode 100644
index 0000000..c682c32
--- /dev/null
+++ b/t/Bio/Roary/CommandLine/GeneAlignmentFromNucleotides.t
@@ -0,0 +1,41 @@
+#!/usr/bin/env perl
+use Moose;
+use Data::Dumper;
+use Cwd;
+use File::Which;
+
+BEGIN { unshift( @INC, './lib' ) }
+BEGIN { unshift( @INC, './t/lib' ) }
+with 'TestHelper';
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::CommandLine::GeneAlignmentFromNucleotides');
+}
+
+my $script_name = 'Bio::Roary::CommandLine::GeneAlignmentFromNucleotides';
+my $cwd         = getcwd();
+system('touch empty_file');
+system('cp t/data/nuc_to_be_aligned.fa t/data/f.fa');
+my %scripts_and_expected_files = (
+    't/data/f.fa' => [ 't/data/f.fa.aln', 't/data/expected_nuc_multifasta.fa.aln' ],
+    '-h'          => [ 'empty_file',      't/data/empty_file' ],
+);
+
+SKIP:
+{
+    skip "prank not installed", 2 unless ( which('prank') );
+    mock_execute_script_and_check_output( $script_name, \%scripts_and_expected_files );
+}
+
+SKIP:
+{
+    skip "mafft not installed", 2 unless ( which('mafft') );
+	system('cp t/data/nuc_to_be_aligned.fa t/data/f.fa');
+	%scripts_and_expected_files = (
+	    '--mafft t/data/f.fa' => [ 't/data/f.fa.aln', 't/data/expected_nuc_multifasta_mafft.fa.aln' ],
+	);
+    mock_execute_script_and_check_output( $script_name, \%scripts_and_expected_files );
+}
+
+done_testing();
diff --git a/t/Bio/Roary/CommandLine/ParallelAllAgainstAllBlastp.t b/t/Bio/Roary/CommandLine/ParallelAllAgainstAllBlastp.t
new file mode 100644
index 0000000..73040a4
--- /dev/null
+++ b/t/Bio/Roary/CommandLine/ParallelAllAgainstAllBlastp.t
@@ -0,0 +1,29 @@
+#!/usr/bin/env perl
+use Moose;
+use Data::Dumper;
+use Cwd;
+
+BEGIN { unshift( @INC, './lib' ) }
+BEGIN { unshift( @INC, './t/lib' ) }
+with 'TestHelper';
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::CommandLine::ParallelAllAgainstAllBlastp');
+}
+my $script_name = 'Bio::Roary::CommandLine::ParallelAllAgainstAllBlastp';
+my $cwd = getcwd();
+
+system('touch empty_file');
+my %scripts_and_expected_files = (
+    '-m '.$cwd.'/t/bin/dummy_makeblastdb -b '.$cwd.'/t/bin/dummy_blastp -j Local t/data/example_1.faa' =>
+      [ 'blast_results', 't/data/empty_file' ],
+   '-o different_output_filename -m '.$cwd.'/t/bin/dummy_makeblastdb -b '.$cwd.'/t/bin/dummy_blastp -j Local t/data/example_1.faa' =>
+      [ 'different_output_filename', 't/data/empty_file'  ],
+      '-h' =>
+        [ 'empty_file', 't/data/empty_file' ],
+);
+
+mock_execute_script_and_check_output( $script_name, \%scripts_and_expected_files );
+
+done_testing();
\ No newline at end of file
diff --git a/t/Bio/Roary/CommandLine/QueryRoary.t b/t/Bio/Roary/CommandLine/QueryRoary.t
new file mode 100755
index 0000000..9e83f83
--- /dev/null
+++ b/t/Bio/Roary/CommandLine/QueryRoary.t
@@ -0,0 +1,71 @@
+#!/usr/bin/env perl
+use Moose;
+use Data::Dumper;
+use Cwd;
+
+BEGIN { unshift( @INC, './lib' ) }
+BEGIN { unshift( @INC, './t/lib' ) }
+with 'TestHelper';
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::CommandLine::QueryRoary');
+}
+my $script_name = 'Bio::Roary::CommandLine::QueryRoary';
+my $cwd         = getcwd();
+
+system('touch empty_file');
+system('touch empty_file2');
+
+my %scripts_and_expected_files = (
+    '-g t/data/example_groups -a gene_multifasta -n group_2 t/data/example_1.faa t/data/example_2.faa' =>
+      [ 'pan_genome_results_group_2.fa', 't/data/expected_output_groups_group_2.fa' ],
+    '-g t/data/example_groups -a gene_multifasta -n group_5 t/data/example_1.faa t/data/example_2.faa ' =>
+      [ 'pan_genome_results_group_5.fa', 't/data/expected_output_groups_group_5.fa' ],
+    '-g t/data/example_groups -a gene_multifasta -n group_2,group_5 t/data/example_1.faa t/data/example_2.faa' =>
+      [ 'pan_genome_results_group_5.fa', 't/data/expected_output_groups_group_5.fa' ],
+    '-g t/data/example_groups -a gene_multifasta -n group_5,group_2 t/data/example_1.faa t/data/example_2.faa ' =>
+      [ 'pan_genome_results_group_5.fa', 't/data/expected_output_groups_group_5.fa' ],
+    '-g t/data/example_groups -a gene_multifasta -n group_5,group_2 t/data/example_1.faa t/data/example_2.faa  ' =>
+      [ 'pan_genome_results_group_2.fa', 't/data/expected_output_groups_group_2.fa' ],
+    '-g t/data/example_groups -a gene_multifasta -n group_2,group_5 t/data/example_1.faa t/data/example_2.faa   ' =>
+      [ 'pan_genome_results_group_2.fa', 't/data/expected_output_groups_group_2.fa' ],
+    '-g t/data/example_groups -n group_which_doesnt_exist t/data/example_1.faa t/data/example_2.faa' =>
+      [ 'empty_file', 't/data/empty_file' ],
+    '-g t/data/query_groups -a union t/data/query_1.fa t/data/query_2.fa t/data/query_3.fa' =>
+      [ 'pan_genome_results', 't/data/expected_union_of_groups.gg' ],
+    '-g t/data/query_groups -a intersection t/data/query_1.fa t/data/query_2.fa t/data/query_3.fa' =>
+      [ 'pan_genome_results', 't/data/expected_intersection_of_groups.gg' ],
+    '-g t/data/query_groups -a complement t/data/query_1.fa t/data/query_2.fa t/data/query_3.fa' =>
+      [ 'pan_genome_results', 't/data/expected_complement_of_groups.gg' ],
+    '-g t/data/query_groups -a difference -i t/data/query_1.fa -t t/data/query_2.fa,t/data/query_3.fa' =>
+      [ 'set_difference_unique_set_one', 't/data/expected_set_difference_unique_set_one' ],
+    '-g t/data/query_groups -a difference  -i t/data/query_1.fa -t t/data/query_2.fa,t/data/query_3.fa' =>
+      [ 'set_difference_unique_set_two', 't/data/expected_set_difference_unique_set_two' ],
+    '-g t/data/query_groups -a difference   -i t/data/query_1.fa -t t/data/query_2.fa,t/data/query_3.fa' =>
+      [ 'set_difference_common_set', 't/data/expected_set_difference_common_set' ],
+    '-g t/data/query_groups -a difference   -i t/data/query_1.fa -t t/data/query_2.fa,t/data/query_3.fa ' =>
+      [ 'set_difference_unique_set_two_statistics.csv', 't/data/expected_set_difference_unique_set_two_statistics.csv' ],
+    '-g t/data/query_groups -a difference   -i t/data/query_1.fa -t t/data/query_2.fa,t/data/query_3.fa     ' =>
+      [ 'set_difference_unique_set_one_statistics.csv', 't/data/expected_set_difference_unique_set_one_statistics.csv' ],
+    '-g t/data/query_groups -a difference   -i t/data/query_1.fa -t t/data/query_2.fa,t/data/query_3.fa   ' =>
+      [ 'set_difference_common_set_statistics.csv', 't/data/expected_set_difference_common_set_statistics.csv' ],
+    '-g t/data/query_groups -a difference   -i t/data/query_1.gff -t t/data/query_2.gff,t/data/query_3.gff' =>
+      [ 'set_difference_common_set_statistics.csv', 't/data/expected_gff_set_difference_common_set_statistics.csv' ],
+    '-h' => [ 'empty_file2', 't/data/empty_file' ],
+);
+
+mock_execute_script_and_check_output_sorted( $script_name, \%scripts_and_expected_files );
+
+unlink('set_difference_unique_set_two')                if ( -e 'set_difference_unique_set_two' );
+unlink('set_difference_common_set')                    if ( -e 'set_difference_common_set' );
+unlink('pan_genome_results_group_5.fa')                if ( -e 'pan_genome_results_group_5.fa' );
+unlink('gene_presence_absence.csv')                    if ( -e 'gene_presence_absence.csv' );
+unlink('set_difference_unique_set_two_statistics.csv') if ( -e 'set_difference_unique_set_two_statistics.csv' );
+unlink('set_difference_unique_set_one_statistics.csv') if ( -e 'set_difference_unique_set_one_statistics.csv' );
+unlink('set_difference_common_set_statistics.csv')     if ( -e 'set_difference_common_set_statistics.csv' );
+unlink('pan_genome_reference.fa')                      if ( -e 'pan_genome_reference.fa' );
+unlink('set_difference_core_accessory_graph.dot')      if ( -e 'set_difference_core_accessory_graph.dot' );
+unlink('set_difference_accessory_graph.dot')           if ( -e 'set_difference_accessory_graph.dot' );
+
+done_testing();
diff --git a/t/Bio/Roary/CommandLine/Roary.t b/t/Bio/Roary/CommandLine/Roary.t
new file mode 100755
index 0000000..2e76d9b
--- /dev/null
+++ b/t/Bio/Roary/CommandLine/Roary.t
@@ -0,0 +1,322 @@
+#!/usr/bin/env perl
+use Moose;
+use Data::Dumper;
+use File::Path qw( remove_tree);
+use File::Which;
+use File::Path qw(make_path);
+use Cwd qw(abs_path getcwd); 
+use File::Find::Rule;
+
+#Test changes current working directory so relative paths can get out of sync
+local $ENV{PERL5LIB} = join(':', ("$ENV{PERL5LIB}", abs_path('./lib'), abs_path('./t/lib')));
+local $ENV{PATH} = join(':', ("$ENV{PATH}", abs_path('./bin')));
+
+BEGIN { unshift( @INC, abs_path('./lib') ) }
+BEGIN { unshift( @INC, abs_path('./t/lib') ) }
+with 'TestHelper';
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::CommandLine::Roary');
+    use_ok('Bio::Roary::CommandLine::CreatePanGenome');
+    use Bio::Roary::SequenceLengths;
+}
+my $script_name = 'Bio::Roary::CommandLine::Roary';
+my $cwd         = getcwd();
+
+local $ENV{PATH} = "$ENV{PATH}:./bin";
+my %scripts_and_expected_files;
+system('touch empty_file');
+cleanup_files();
+
+%scripts_and_expected_files = (
+
+   ' --dont_split_groups   t/data/query_1.gff t/data/query_2.gff t/data/query_5.gff    ' =>
+     [ 'gene_presence_absence.csv', 't/data/overall_gene_presence_absence.csv' ],
+   ' -j Local -t 1 --dont_split_groups   t/data/query_1.gff t/data/query_2.gff t/data/query_5.gff    ' =>
+     [ 'gene_presence_absence.csv', 't/data/overall_gene_presence_absence.csv' ],
+   ' -j Parallel  --dont_split_groups t/data/query_1.gff t/data/query_2.gff t/data/query_5.gff    ' =>
+     [ 'gene_presence_absence.csv', 't/data/overall_gene_presence_absence.csv' ],
+   ' -t 1 -j Parallel --dont_split_groups  t/data/query_1.gff t/data/query_2.gff t/data/query_5.gff    ' =>
+     [ 'gene_presence_absence.csv', 't/data/overall_gene_presence_absence.csv' ],
+   ' -j Local --dont_split_groups t/data/genbank_gbff/genbank1.gff t/data/genbank_gbff/genbank2.gff t/data/genbank_gbff/genbank3.gff' =>
+     [ 'gene_presence_absence.csv', 't/data/genbank_gbff/genbank_gene_presence_absence.csv' ],
+    '-h' => [ 'empty_file', 't/data/empty_file' ],
+);
+
+mock_execute_script_and_check_output_sorted( $script_name, \%scripts_and_expected_files, [ 0, 6, 7, 8, 9 ] );
+
+cleanup_files();
+
+%scripts_and_expected_files = (
+    ' -j Local --dont_split_groups   t/data/query_1.gff t/data/query_2.gff t/data/query_5.gff ' =>
+      [ 'clustered_proteins', 't/data/clustered_proteins_pan_genome' ],
+    ' -j Parallel --dont_split_groups  t/data/query_1.gff t/data/query_2.gff t/data/query_5.gff ' =>
+      [ 'clustered_proteins', 't/data/clustered_proteins_pan_genome' ],
+);
+
+mock_execute_script_and_check_output_sorted_groups( $script_name, \%scripts_and_expected_files, [ 0, 6, 7, 8, 9 ] );
+
+# Make sure faa files are cleaned up automatically
+ok( !( -e 'query_1.gff.proteome.faa' ), 'Check protein query_1.gff.proteome.faa is cleaned up' );
+ok( !( -e 'query_2.gff.proteome.faa' ), 'Check protein query_2.gff.proteome.faa is cleaned up' );
+ok( !( -e 'query_5.gff.proteome.faa' ), 'Check protein query_5.gff.proteome.faa is cleaned up' );
+
+cleanup_files();
+
+stderr_should_have($script_name,'-a', 'Looking for');
+
+my $current_cwd = getcwd();
+stderr_should_have($script_name,'-v --output_directory t/data/directory_which_doesnt_exist t/data/real_data_1.gff t/data/real_data_2.gff', 'Output directory created');
+ok( ( -e 't/data/directory_which_doesnt_exist/clustered_proteins' ), 'pan genome files should be in directory' );
+is(getcwd(),$current_cwd , 'current working directory should not have changed after script is finished'); 
+
+SKIP:
+{
+    skip "prank not installed", 11 unless ( which('prank') );
+
+    %scripts_and_expected_files =
+      ( '-j Local --dont_delete_files --dont_split_groups  --output_multifasta_files t/data/real_data_1.gff t/data/real_data_2.gff' =>
+          [ 'pan_genome_sequences/mdoH.fa.aln', 't/data/mdoH.fa.aln' ], );
+    mock_execute_script_and_check_output( $script_name, \%scripts_and_expected_files );
+
+    ok( -e 'core_gene_alignment.aln', 'Core gene alignment exists' );
+
+    ok(
+        my $seq_len = Bio::Roary::SequenceLengths->new(
+            fasta_file => 'core_gene_alignment.aln',
+        ),
+        'Check size of the core_gene_alignment.aln init'
+    );
+
+    my @keys = keys %{ $seq_len->sequence_lengths };
+    is( $seq_len->sequence_lengths->{ $keys[0] }, 64983, 'length of first sequence' );
+		
+		ok( -e 'core_alignment_header.embl', 'Core gene alignment header exists' );
+
+    ok( -e 'accessory.tab' );
+    ok( -e 'core_accessory.tab' );
+    ok( -e 'number_of_conserved_genes.Rtab' );
+    ok( -e 'number_of_genes_in_pan_genome.Rtab' );
+    ok( -e 'number_of_new_genes.Rtab' );
+    ok( -e 'number_of_unique_genes.Rtab' );
+    ok( -e 'blast_identity_frequency.Rtab' );
+
+    cleanup_files();
+    %scripts_and_expected_files =
+      (
+'-j Local --output_multifasta_files t/data/core_alignment_gene_lookup/query_1.gff t/data/core_alignment_gene_lookup/query_2.gff t/data/core_alignment_gene_lookup/query_3.gff'
+          => [ 'core_gene_alignment.aln', 't/data/core_alignment_gene_lookup/expected_core_gene_alignment.aln' ], );
+    mock_execute_script_and_check_output( $script_name, \%scripts_and_expected_files );
+
+    cleanup_files();
+}
+
+SKIP:
+{
+    skip "mafft not installed", 11 unless ( which('mafft') );
+    %scripts_and_expected_files =
+      ( '-j Local --dont_delete_files --dont_split_groups  --output_multifasta_files --mafft t/data/real_data_1.gff t/data/real_data_2.gff'
+          => [ 'pan_genome_sequences/mdoH.fa.aln', 't/data/mdoH_mafft.fa.aln' ], );
+    mock_execute_script_and_check_output( $script_name, \%scripts_and_expected_files );
+
+    ok( -e 'core_gene_alignment.aln', 'Core gene alignment exists' );
+}
+
+SKIP:
+{
+	skip "extended tests not run",  40 unless ( defined($ENV{ROARY_FULL_TESTS}));
+
+    %scripts_and_expected_files = (
+        '-o some_different_output t/data/real_data_1.gff t/data/real_data_2.gff'    => [ 'some_different_output', 't/data/expected_some_different_output' ],
+        '-o some_different_output -i 90 t/data/real_data_1.gff t/data/real_data_2.gff' => [ 'some_different_output', 't/data/expected_some_different_output' ],
+        '-o some_different_output --translation_table 1 t/data/real_data_1.gff t/data/real_data_2.gff' =>
+          [ 'some_different_output', 't/data/expected_some_different_output' ],
+    	'-p 2 -o some_different_output t/data/real_data_1.gff t/data/real_data_2.gff' => [ 'some_different_output', 't/data/expected_some_different_output' ],
+    	'-p 2 -i 90 t/data/real_data_1.gff t/data/real_data_2.gff'                    => [ 'clustered_proteins', 't/data/expected_some_different_output' ],
+    	'-i 90 t/data/real_data_1.gff t/data/real_data_2.gff'                         => [ 'clustered_proteins', 't/data/expected_some_different_output' ],
+    	'-p 2 --translation_table 1 t/data/real_data_1.gff t/data/real_data_2.gff'    => [ 'clustered_proteins', 't/data/expected_some_different_output' ],
+    	);
+    mock_execute_script_and_check_output_sorted( $script_name, \%scripts_and_expected_files, [ 0 ] );
+    
+    stderr_should_have($script_name,'-e --group_limit 10 t/data/real_data_1.gff t/data/real_data_2.gff', 'Exiting early because number of clusters is too high');
+    stderr_should_have($script_name,'-p 2 --group_limit 10 -e t/data/real_data_1.gff t/data/real_data_2.gff', 'Exiting early because number of clusters is too high');
+    stderr_should_have($script_name,'--core_definition 60 --group_limit 10 -e t/data/real_data_1.gff t/data/real_data_2.gff', 'Exiting early because number of clusters is too high');
+    stderr_should_have($script_name,'--mafft --group_limit 10 -e t/data/real_data_1.gff t/data/real_data_2.gff', 'Exiting early because number of clusters is too high');
+    stderr_should_have($script_name,'-o some_different_output --group_limit 10 -e t/data/real_data_1.gff t/data/real_data_2.gff', 'Exiting early because number of clusters is too high');
+    stderr_should_have($script_name,'--translation_table 1 --group_limit 10 -e t/data/real_data_1.gff t/data/real_data_2.gff', 'Exiting early because number of clusters is too high');
+    stderr_should_have($script_name,'--verbose_stats --group_limit 10 -e t/data/real_data_1.gff t/data/real_data_2.gff', 'Exiting early because number of clusters is too high');
+    stderr_should_not_have($script_name,'-e --group_limit 10 t/data/real_data_1.gff t/data/real_data_2.gff', 'Cant access the multifasta base directory');
+    stderr_should_have($script_name,'-v t/data/real_data_1.gff t/data/real_data_2.gff','Cleaning up files');
+    stderr_should_have($script_name,'-p 2 -v t/data/real_data_1.gff t/data/real_data_2.gff','Cleaning up files');
+    stderr_should_have($script_name,'--core_definition 60 -v t/data/real_data_1.gff t/data/real_data_2.gff','Cleaning up files');
+    stderr_should_have($script_name,'-i 90 -v t/data/real_data_1.gff t/data/real_data_2.gff','Cleaning up files');
+	stderr_should_have($script_name,'-i 30 t/data/real_data_1.gff t/data/real_data_2.gff','The percentage identity is too low');
+    stderr_should_not_have($script_name,'--dont_delete_files -v t/data/real_data_1.gff t/data/real_data_2.gff','Cleaning up files');
+    stderr_should_have($script_name,'-v --verbose_stats t/data/real_data_1.gff t/data/real_data_2.gff'   ,'Cleaning up files');
+    stderr_should_have($script_name,'-v --group_limit 100000 -e t/data/real_data_1.gff t/data/real_data_2.gff' ,'Cleaning up files');
+    
+    stderr_should_have($script_name,'--translation_table 1 -v t/data/real_data_1.gff t/data/real_data_2.gff' ,'Cleaning up files');
+    stderr_should_have($script_name,'-e -v t/data/real_data_1.gff t/data/real_data_2.gff','Creating files with the nucleotide sequences for every cluster');
+    
+    SKIP:
+    {
+        skip "kraken not installed",        2 unless ( which('kraken') );
+        skip "kraken-report not installed", 2 unless ( which('kraken-report') );
+        stderr_should_not_have($script_name,'--group_limit 10 --qc t/data/real_data_1.gff t/data/real_data_2.gff', 'Exiting early because number of clusters is too high');
+        stderr_should_have($script_name,'-v --qc t/data/real_data_1.gff t/data/real_data_2.gff' ,'Running Kraken on each input assembly');
+    }
+    
+    %scripts_and_expected_files = (
+        # output
+        '-o some_different_output -e t/data/real_data_1.gff t/data/real_data_2.gff' => [ 'core_gene_alignment.aln', 't/data/expected_real_data_core_gene_alignment.aln' ],
+        '-o some_different_output -e --dont_delete_files t/data/real_data_1.gff t/data/real_data_2.gff' =>
+          [ 'pan_genome_sequences/mdoH.fa.aln', 't/data/mdoH.fa.aln' ],
+        '-o some_different_output --core_definition 60 t/data/real_data_1.gff t/data/real_data_2.gff' =>
+          [ 'summary_statistics.txt', 't/data/expected_core_60_summary_statistics.txt' ],
+        '-o some_different_output -e --mafft t/data/real_data_1.gff t/data/real_data_2.gff' =>
+          [ 'core_gene_alignment.aln', 't/data/expected_mafft_real_data_core_gene_alignment.aln' ],
+         # -e
+        '-e -i 95.3 t/data/real_data_1.gff t/data/real_data_2.gff'                 => [ 'core_gene_alignment.aln', 't/data/expected_real_data_core_gene_alignment.aln' ],
+        '-e --translation_table 1 t/data/real_data_1.gff t/data/real_data_2.gff'   => [ 'core_gene_alignment.aln', 't/data/expected_real_data_core_gene_alignment.aln' ],
+        '-e -v t/data/real_data_1.gff t/data/real_data_2.gff'                      => [ 'core_gene_alignment.aln', 't/data/expected_real_data_core_gene_alignment.aln' ],
+        '-e --verbose_stats t/data/real_data_1.gff t/data/real_data_2.gff'         => [ 'core_gene_alignment.aln', 't/data/expected_real_data_core_gene_alignment.aln' ],
+        '-e --group_limit 100000 -e t/data/real_data_1.gff t/data/real_data_2.gff' => [ 'core_gene_alignment.aln', 't/data/expected_real_data_core_gene_alignment.aln' ],
+        '-e --qc t/data/real_data_1.gff t/data/real_data_2.gff'                    => [ 'core_gene_alignment.aln', 't/data/expected_real_data_core_gene_alignment.aln' ],
+    
+    #    '-o some_different_output -v t/data/real_data_1.gff t/data/real_data_2.gff' => [ 'empty_file', 't/data/empty_file' ],
+    #    '-o some_different_output --verbose_stats t/data/real_data_1.gff t/data/real_data_2.gff' =>
+    #      [ 'empty_file', 't/data/empty_file' ],
+    #    '-o some_different_output --group_limit 100000 -e t/data/real_data_1.gff t/data/real_data_2.gff' =>
+    #      [ 'empty_file', 't/data/empty_file' ],
+    #    '-o some_different_output --qc t/data/real_data_1.gff t/data/real_data_2.gff' => [ 'empty_file', 't/data/empty_file' ],
+    #
+    #    # single parameters
+        '--core_definition 60 t/data/real_data_1.gff t/data/real_data_2.gff'    => [ 'summary_statistics.txt', 't/data/expected_core_60_summary_statistics.txt' ],
+    #    '--translation_table 1 t/data/real_data_1.gff t/data/real_data_2.gff'   => [ 'empty_file', 't/data/empty_file' ],
+    #    '--verbose_stats t/data/real_data_1.gff t/data/real_data_2.gff'         => [ 'empty_file', 't/data/empty_file' ],
+    #    '--group_limit 100000 -e t/data/real_data_1.gff t/data/real_data_2.gff' => [ 'empty_file', 't/data/empty_file' ],
+    #    '--qc t/data/real_data_1.gff t/data/real_data_2.gff'                    => [ 'empty_file', 't/data/empty_file' ],
+    #
+    #    # Parallel
+        '-p 2 -e t/data/real_data_1.gff t/data/real_data_2.gff'                       => [ 'core_gene_alignment.aln', 't/data/expected_real_data_core_gene_alignment.aln' ],
+        '-p 2 -e --dont_delete_files t/data/real_data_1.gff t/data/real_data_2.gff'   => [ 'pan_genome_sequences/mdoH.fa.aln', 't/data/mdoH.fa.aln' ],
+        '-p 2 --core_definition 60 t/data/real_data_1.gff t/data/real_data_2.gff'     => [ 'summary_statistics.txt', 't/data/expected_core_60_summary_statistics.txt' ],
+        '-p 2 -e --mafft t/data/real_data_1.gff t/data/real_data_2.gff'               => [ 'core_gene_alignment.aln', 't/data/expected_mafft_real_data_core_gene_alignment.aln' ],
+    #    '-p 2 --verbose_stats t/data/real_data_1.gff t/data/real_data_2.gff'          => [ 'empty_file', 't/data/empty_file' ],
+    #    '-p 2 --group_limit 100000 -e t/data/real_data_1.gff t/data/real_data_2.gff'  => [ 'empty_file', 't/data/empty_file' ],
+    #    '-p 2 --qc t/data/real_data_1.gff t/data/real_data_2.gff'                     => [ 'empty_file', 't/data/empty_file' ],
+    #
+    #    # core definition
+    #    '--core_definition 60 -e t/data/real_data_1.gff t/data/real_data_2.gff' => [ 'empty_file', 't/data/empty_file' ],
+    #    '--core_definition 60 -e --dont_delete_files t/data/real_data_1.gff t/data/real_data_2.gff' =>
+    #      [ 'empty_file', 't/data/empty_file' ],
+    #    '--core_definition 60 -e --mafft t/data/real_data_1.gff t/data/real_data_2.gff' => [ 'empty_file', 't/data/empty_file' ],
+    #    '--core_definition 60 -i 90 t/data/real_data_1.gff t/data/real_data_2.gff'      => [ 'empty_file', 't/data/empty_file' ],
+    #    '--core_definition 60 --translation_table 1 t/data/real_data_1.gff t/data/real_data_2.gff' =>
+    #      [ 'empty_file', 't/data/empty_file' ],
+    #    '--core_definition 60 --verbose_stats t/data/real_data_1.gff t/data/real_data_2.gff' =>
+    #      [ 'empty_file', 't/data/empty_file' ],
+    #    '--core_definition 60 --group_limit 100000 -e t/data/real_data_1.gff t/data/real_data_2.gff' =>
+    #      [ 'empty_file', 't/data/empty_file' ],
+    #    '--core_definition 60 --qc t/data/real_data_1.gff t/data/real_data_2.gff' => [ 'empty_file', 't/data/empty_file' ],
+    #
+    #    # mafft
+    #    '--mafft -i 90 t/data/real_data_1.gff t/data/real_data_2.gff'                   => [ 'empty_file', 't/data/empty_file' ],
+    #    '--mafft --translation_table 1 t/data/real_data_1.gff t/data/real_data_2.gff'   => [ 'empty_file', 't/data/empty_file' ],
+    #    '--mafft --verbose_stats t/data/real_data_1.gff t/data/real_data_2.gff'         => [ 'empty_file', 't/data/empty_file' ],
+    #    '--mafft --group_limit 100000 -e t/data/real_data_1.gff t/data/real_data_2.gff' => [ 'empty_file', 't/data/empty_file' ],
+    #    '--mafft --qc t/data/real_data_1.gff t/data/real_data_2.gff'                    => [ 'empty_file', 't/data/empty_file' ],
+    #
+    #
+    #    # dont_delete_files
+    #    '--dont_delete_files -i 90 t/data/real_data_1.gff t/data/real_data_2.gff' => [ 'empty_file', 't/data/empty_file' ],
+    #    '--dont_delete_files --translation_table 1 t/data/real_data_1.gff t/data/real_data_2.gff' =>
+    #      [ 'empty_file', 't/data/empty_file' ],
+    
+    #    '--dont_delete_files --verbose_stats t/data/real_data_1.gff t/data/real_data_2.gff' =>
+    #      [ 'empty_file', 't/data/empty_file' ],
+    #    '--dont_delete_files --group_limit 10 -e t/data/real_data_1.gff t/data/real_data_2.gff' =>
+    #      [ 'empty_file', 't/data/empty_file' ],
+    #    '--dont_delete_files --group_limit 100000 -e t/data/real_data_1.gff t/data/real_data_2.gff' =>
+    #      [ 'empty_file', 't/data/empty_file' ],
+    #    '--dont_delete_files --qc t/data/real_data_1.gff t/data/real_data_2.gff' => [ 'empty_file', 't/data/empty_file' ],
+    #
+    #    # change identity
+    #    '-i 90 --translation_table 1 t/data/real_data_1.gff t/data/real_data_2.gff'   => [ 'empty_file', 't/data/empty_file' ],
+    
+    #    '-i 90 --verbose_stats t/data/real_data_1.gff t/data/real_data_2.gff'         => [ 'empty_file', 't/data/empty_file' ],
+    #    '-i 90 --group_limit 10 -e t/data/real_data_1.gff t/data/real_data_2.gff'     => [ 'empty_file', 't/data/empty_file' ],
+    #    '-i 90 --group_limit 100000 -e t/data/real_data_1.gff t/data/real_data_2.gff' => [ 'empty_file', 't/data/empty_file' ],
+    #    '-i 90 --qc t/data/real_data_1.gff t/data/real_data_2.gff'                    => [ 'empty_file', 't/data/empty_file' ],
+    #
+    #    # translation_table
+    #    '--translation_table 1 --verbose_stats t/data/real_data_1.gff t/data/real_data_2.gff' =>
+    #      [ 'empty_file', 't/data/empty_file' ],
+    #    '--translation_table 1 --group_limit 100000 -e t/data/real_data_1.gff t/data/real_data_2.gff' =>
+    #      [ 'empty_file', 't/data/empty_file' ],
+    #    '--translation_table 1 --qc t/data/real_data_1.gff t/data/real_data_2.gff' => [ 'empty_file', 't/data/empty_file' ],
+    #
+    #
+    #    # verbose stats
+    #    '--verbose_stats --group_limit 100000 -e t/data/real_data_1.gff t/data/real_data_2.gff' =>
+    #      [ 'empty_file', 't/data/empty_file' ],
+    #    '--verbose_stats --qc t/data/real_data_1.gff t/data/real_data_2.gff' => [ 'empty_file', 't/data/empty_file' ],
+    #
+    );
+    mock_execute_script_and_check_output( $script_name, \%scripts_and_expected_files );
+
+}
+
+cleanup_files();
+
+done_testing();
+
+sub cleanup_files {
+    remove_tree('pan_genome_sequences');
+    remove_tree('fixed_input_files');
+	remove_tree('t/data/directory_which_doesnt_exist');
+    remove_tree('locus_tags_gffs_output');
+    unlink('_blast_results');
+    unlink('_clustered');
+    unlink('_clustered.bak.clstr');
+    unlink('_clustered.clstr');
+    unlink('_combined_files');
+    unlink('_combined_files.groups');
+    unlink('_fasta_files');
+    unlink('_gff_files');
+    unlink('_inflated_mcl_groups');
+    unlink('_inflated_unsplit_mcl_groups');
+    unlink('_labeled_mcl_groups');
+    unlink('_uninflated_mcl_groups');
+    unlink('accessory.header.embl');
+    unlink('accessory.header.tab');
+    unlink('accessory.tab');
+    unlink('blast_identity_frequency.Rtab');
+    unlink('clustered_proteins');
+    unlink('core_accessory.header.embl');
+    unlink('core_accessory.header.tab');
+    unlink('core_accessory.tab');
+    unlink('core_gene_alignment.aln');
+    unlink('database_masking.asnb');
+    unlink('example_1.faa.tmp.filtered.fa');
+    unlink('example_2.faa.tmp.filtered.fa');
+    unlink('example_3.faa.tmp.filtered.fa');
+    unlink('gene_presence_absence.csv');
+    unlink('number_of_conserved_genes.Rtab');
+    unlink('number_of_genes_in_pan_genome.Rtab');
+    unlink('number_of_new_genes.Rtab');
+    unlink('number_of_unique_genes.Rtab');
+    unlink('pan_genome.fa');
+    unlink('query_1.gff.proteome.faa');
+    unlink('query_2.gff.proteome.faa');
+    unlink('query_3.gff.proteome.faa');
+    unlink('query_5.gff.proteome.faa');
+    unlink('real_data_1.gff.proteome.faa');
+    unlink('real_data_2.gff.proteome.faa');
+    unlink('pan_genome_reference.fa');
+    unlink('accessory_graph.dot');
+    unlink('core_accessory_graph.dot');
+	  unlink('some_different_output');
+	  unlink('core_alignment_header.embl');
+}
diff --git a/t/Bio/Roary/CommandLine/RoaryCoreAlignment.t b/t/Bio/Roary/CommandLine/RoaryCoreAlignment.t
new file mode 100644
index 0000000..cb609d1
--- /dev/null
+++ b/t/Bio/Roary/CommandLine/RoaryCoreAlignment.t
@@ -0,0 +1,27 @@
+#!/usr/bin/env perl
+use Moose;
+use Data::Dumper;
+use Cwd;
+
+BEGIN { unshift( @INC, './lib' ) }
+BEGIN { unshift( @INC, './t/lib' ) }
+with 'TestHelper';
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::CommandLine::RoaryCoreAlignment');
+}
+my $script_name = 'Bio::Roary::CommandLine::RoaryCoreAlignment';
+system('touch empty_file');
+my %scripts_and_expected_files = (
+    '-m t/data/core_alignment -s t/data/core_alignment.csv' =>
+      [ 'core_gene_alignment.aln', 't/data/expected_core_gene_alignment.aln' ],
+    '-m t/data/core_alignment -s t/data/core_alignment_core0.66.csv --core_definition 0.66' => 
+      [ 'core_gene_alignment.aln', 't/data/expected_core_gene_alignment_core0.66.aln' ],
+    '-h' =>
+      [ 'empty_file', 't/data/empty_file' ],
+);
+
+mock_execute_script_and_check_output( $script_name, \%scripts_and_expected_files );
+
+done_testing();
diff --git a/t/Bio/Roary/CommandLine/RoaryPostAnalysis.t b/t/Bio/Roary/CommandLine/RoaryPostAnalysis.t
new file mode 100755
index 0000000..68b1c86
--- /dev/null
+++ b/t/Bio/Roary/CommandLine/RoaryPostAnalysis.t
@@ -0,0 +1,108 @@
+#!/usr/bin/env perl
+use Moose;
+use Data::Dumper;
+use File::Path qw( remove_tree);
+use Cwd;
+
+BEGIN { unshift( @INC, './lib' ) }
+BEGIN { unshift( @INC, './t/lib' ) }
+with 'TestHelper';
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::CommandLine::RoaryPostAnalysis');
+}
+my $script_name = 'Bio::Roary::CommandLine::RoaryPostAnalysis';
+my $cwd = getcwd();
+
+local $ENV{PATH} = "$ENV{PATH}:./bin";
+
+system('cp t/data/post_analysis/* .');
+system('touch empty_file');
+
+my %scripts_and_expected_files = (
+       '-o clustered_proteins -p pan_genome.fa -s gene_presence_absence.csv -c _clustered.clstr  -i _gff_files -f _fasta_files  -j Local --dont_create_rplots --dont_split_groups' =>
+       [ 'clustered_proteins', 't/data/clustered_proteins_post_analysis' ], 
+       '-h' =>
+         [ 'empty_file', 't/data/empty_file' ],   
+);
+
+SKIP: 
+{
+
+  skip "Tests dont take variablity into account", 2 if(1);  
+  mock_execute_script_and_check_output_sorted_groups( $script_name, \%scripts_and_expected_files );
+
+  ok( -e 'number_of_unique_genes.Rtab', 'number_of_unique_genes.Rtab exists');
+  ok( -e 'number_of_new_genes.Rtab', 'number_of_new_genes exists');
+  ok( -e 'number_of_genes_in_pan_genome.Rtab', 'number_of_genes_in_pan_genome exists');
+  ok( -e 'number_of_conserved_genes.Rtab','number_of_conserved_genes');
+  ok( -e 'gene_presence_absence.csv', 'gene_presence_absence exists');
+  ok( -e 'core_accessory.tab', 'core_accessory.tab exists');
+  ok( -e 'core_accessory.header.embl','core_accessory.header.embl exists');
+  ok( -e 'accessory.tab','accessory.tab exists');
+  ok( -e 'accessory.header.embl','accessory.header.embl exists');
+  ok( -e 'summary_statistics.txt' ,'summary_statistics.txt exists');
+  
+  compare_tab_files_with_variable_coordinates('accessory.header.embl', 't/data/post_analysis_expected/accessory.header.embl');
+  compare_tab_files_with_variable_coordinates('accessory.tab', 't/data/post_analysis_expected/accessory.tab');
+  compare_tab_files_with_variable_coordinates('core_accessory.header.embl', 't/data/post_analysis_expected/core_accessory.header.embl');
+  compare_tab_files_with_variable_coordinates('core_accessory.tab', 't/data/post_analysis_expected/core_accessory.tab');
+  
+  cleanup_files();
+  
+  system('cp t/data/post_analysis/* .');
+  system('touch empty_file');
+  %scripts_and_expected_files = (
+         '-t 1 -o clustered_proteins -p pan_genome.fa -s gene_presence_absence.csv -c _clustered.clstr  -i _gff_files -f _fasta_files  -j Local --dont_create_rplots --dont_split_groups' =>
+         [ 'clustered_proteins', 't/data/clustered_proteins_post_analysis' ], 
+         '-h' =>
+           [ 'empty_file', 't/data/empty_file' ],   
+  );
+  
+  mock_execute_script_and_check_output_sorted_groups( $script_name, \%scripts_and_expected_files );
+  
+  ok( -e 'number_of_unique_genes.Rtab', 'number_of_unique_genes.Rtab exists');
+  ok( -e 'number_of_new_genes.Rtab', 'number_of_new_genes exists');
+  ok( -e 'number_of_genes_in_pan_genome.Rtab', 'number_of_genes_in_pan_genome exists');
+  ok( -e 'number_of_conserved_genes.Rtab','number_of_conserved_genes');
+  ok( -e 'gene_presence_absence.csv', 'gene_presence_absence exists');
+  ok( -e 'core_accessory.tab', 'core_accessory.tab exists');
+  ok( -e 'core_accessory.header.embl','core_accessory.header.embl exists');
+  ok( -e 'accessory.tab','accessory.tab exists');
+  ok( -e 'accessory.header.embl','accessory.header.embl exists');
+  
+  compare_tab_files_with_variable_coordinates('accessory.header.embl', 't/data/post_analysis_expected/accessory.header.embl');
+  compare_tab_files_with_variable_coordinates('accessory.tab', 't/data/post_analysis_expected/accessory.tab');
+  compare_tab_files_with_variable_coordinates('core_accessory.header.embl', 't/data/post_analysis_expected/core_accessory.header.embl');
+  compare_tab_files_with_variable_coordinates('core_accessory.tab', 't/data/post_analysis_expected/core_accessory.tab');
+
+}
+cleanup_files();
+done_testing();
+
+sub cleanup_files
+{
+  unlink('_clustered');
+  unlink('_clustered.bak.clstr');
+  unlink('_clustered.clstr');
+  unlink('_combined_files');
+  unlink('_combined_files.groups');
+  unlink('_fasta_files');
+  unlink('_gff_files');
+  unlink('_uninflated_mcl_groups');
+  unlink('query_1.gff.proteome.faa');
+  unlink('query_2.gff.proteome.faa');
+  unlink('query_6.gff.proteome.faa');
+  unlink('accessory.header.embl');
+  unlink('accessory.tab');
+  unlink('core_accessory.header.embl');
+  unlink('core_accessory.tab');
+  unlink('gene_presence_absence.csv');
+  unlink('number_of_unique_genes.Rtab');
+  unlink('number_of_new_genes.Rtab');
+  unlink('number_of_genes_in_pan_genome.Rtab');
+  unlink('number_of_conserved_genes.Rtab');
+}
+
+
diff --git a/t/Bio/Roary/CommandLine/RoaryReorderSpreadsheet.t b/t/Bio/Roary/CommandLine/RoaryReorderSpreadsheet.t
new file mode 100644
index 0000000..27ed114
--- /dev/null
+++ b/t/Bio/Roary/CommandLine/RoaryReorderSpreadsheet.t
@@ -0,0 +1,52 @@
+#!/usr/bin/env perl
+use Moose;
+use Data::Dumper;
+use Cwd;
+
+BEGIN { unshift( @INC, './lib' ) }
+BEGIN { unshift( @INC, './t/lib' ) }
+with 'TestHelper';
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::CommandLine::RoaryReorderSpreadsheet');
+}
+my $script_name = 'Bio::Roary::CommandLine::RoaryReorderSpreadsheet';
+system('touch empty_file');
+my %scripts_and_expected_files = (
+    '-t t/data/reorder_isolates.tre -s t/data/reorder_isolates_input.csv' =>
+      [ 'reordered_spreadsheet.csv', 't/data/reorder_isolates_expected_output.csv' ],
+    '-t t/data/reorder_isolates.tre -s t/data/reorder_isolates_input.csv -o different_output_name.csv' =>
+      [ 'different_output_name.csv', 't/data/reorder_isolates_expected_output.csv' ],
+    '-t t/data/reorder_isolates.tre -s t/data/reorder_isolates_input.csv -f newick' =>
+      [ 'reordered_spreadsheet.csv', 't/data/reorder_isolates_expected_output.csv' ],
+      
+    '-t t/data/reorder_isolates.tre -s t/data/reorder_isolates_input.csv -a depth' =>
+      [ 'reordered_spreadsheet.csv', 't/data/reorder_isolates_expected_output.csv' ],
+    '-t t/data/reorder_isolates.tre -s t/data/reorder_isolates_input.csv -a depth -b height' =>
+      [ 'reordered_spreadsheet.csv', 't/data/reorder_isolates_expected_output_depth_height.csv' ],
+    '-t t/data/reorder_isolates.tre -s t/data/reorder_isolates_input.csv -a depth -b creation' =>
+      [ 'reordered_spreadsheet.csv', 't/data/reorder_isolates_expected_output_depth_creation.csv' ],  
+    '-t t/data/reorder_isolates.tre -s t/data/reorder_isolates_input.csv -a depth -b alpha' =>
+      [ 'reordered_spreadsheet.csv', 't/data/reorder_isolates_expected_output_depth_alpha.csv' ],
+    '-t t/data/reorder_isolates.tre -s t/data/reorder_isolates_input.csv -a depth -b revalpha' =>
+      [ 'reordered_spreadsheet.csv', 't/data/reorder_isolates_expected_output_depth_revalpha.csv' ],
+      
+    '-t t/data/reorder_isolates.tre -s t/data/reorder_isolates_input.csv -a breadth' =>
+      [ 'reordered_spreadsheet.csv', 't/data/reorder_isolates_expected_output.csv' ],
+    '-t t/data/reorder_isolates.tre -s t/data/reorder_isolates_input.csv -a breadth -b height' =>
+      [ 'reordered_spreadsheet.csv', 't/data/reorder_isolates_expected_output_breadth_height.csv' ],
+    '-t t/data/reorder_isolates.tre -s t/data/reorder_isolates_input.csv -a breadth -b creation' =>
+      [ 'reordered_spreadsheet.csv', 't/data/reorder_isolates_expected_output_breadth_creation.csv' ],  
+    '-t t/data/reorder_isolates.tre -s t/data/reorder_isolates_input.csv -a breadth -b alpha' =>
+      [ 'reordered_spreadsheet.csv', 't/data/reorder_isolates_expected_output_breadth_alpha.csv' ],
+    '-t t/data/reorder_isolates.tre -s t/data/reorder_isolates_input.csv -a breadth -b revalpha' =>
+      [ 'reordered_spreadsheet.csv', 't/data/reorder_isolates_expected_output_breadth_revalpha.csv' ],
+
+      '-h' =>
+        [ 'empty_file', 't/data/empty_file' ],
+);
+
+mock_execute_script_and_check_output( $script_name, \%scripts_and_expected_files );
+
+done_testing();
diff --git a/t/Bio/Roary/CommandLine/TransferAnnotationToGroups.t b/t/Bio/Roary/CommandLine/TransferAnnotationToGroups.t
new file mode 100644
index 0000000..e56d407
--- /dev/null
+++ b/t/Bio/Roary/CommandLine/TransferAnnotationToGroups.t
@@ -0,0 +1,26 @@
+#!/usr/bin/env perl
+use Moose;
+use Data::Dumper;
+use Cwd;
+
+BEGIN { unshift( @INC, './lib' ) }
+BEGIN { unshift( @INC, './t/lib' ) }
+with 'TestHelper';
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::CommandLine::TransferAnnotationToGroups');
+}
+my $script_name = 'Bio::Roary::CommandLine::TransferAnnotationToGroups';
+my $cwd         = getcwd();
+system('touch empty_file');
+my %scripts_and_expected_files = (
+    '-g t/data/query_groups t/data/query_1.gff t/data/query_2.gff t/data/query_3.gff' =>
+      [ 'reannotated_groups', 't/data/expected_reannotated_groups_file' ],
+      '-h' =>
+        [ 'empty_file', 't/data/empty_file' ],
+);
+
+mock_execute_script_and_check_output_sorted( $script_name, \%scripts_and_expected_files );
+
+done_testing();
diff --git a/t/Bio/Roary/ContigsToGeneIDsFromGFF.t b/t/Bio/Roary/ContigsToGeneIDsFromGFF.t
new file mode 100755
index 0000000..ce4c36c
--- /dev/null
+++ b/t/Bio/Roary/ContigsToGeneIDsFromGFF.t
@@ -0,0 +1,76 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::ContigsToGeneIDsFromGFF');
+}
+
+ok(
+    my $obj = Bio::Roary::ContigsToGeneIDsFromGFF->new(
+        gff_file => 't/data/query_1.gff'
+    ),
+    'Initialise contigs to gene ids obj'
+);
+
+is_deeply(
+    $obj->contig_to_ids,
+    {
+        'abc|SC|contig000001' => [
+            '1_1',       'abc_00002', 'abc_00003', 'abc_00004', '1_2',       'abc_00006', '1_3', 'abc_00008',
+            'abc_01705', 'abc_00010', 'abc_00011', 'abc_00012', 'abc_00013', 'abc_00014', '1_6', 'abc_00016'
+        ]
+    },
+    'Contigs match expected with standard output'
+);
+
+ok(
+    $obj = Bio::Roary::ContigsToGeneIDsFromGFF->new(
+        gff_file => 't/data/query_1_alternative_patterns.gff'
+    ),
+    'Initialise contigs to gene ids obj with alternative ID patterns'
+);
+is_deeply(
+    $obj->contig_to_ids,
+    {
+        'abc|SC|contig000001' => [ '1_1', 'abc_00002', 'abc_00003', 'abc_00004', '1_2', 'abc_00006' ]
+    },
+    'Contigs match expected with alternative output'
+);
+
+is_deeply(
+    $obj->_genes_annotation,
+    [
+        {
+            'database_annotation_exists' => 1,
+            'product'                    => 'superantigen-like protein',
+            'end'                        => '3337',
+            'start'                      => '2621',
+            'contig'                     => 'abc|SC|contig000001',
+            'id_name'                    => 'abc_00004'
+        },
+        {
+            'database_annotation_exists' => 1,
+            'product'                    => 'hypothetical protein',
+            'end'                        => '4170',
+            'start'                      => '3445',
+            'contig'                     => 'abc|SC|contig000001',
+            'id_name'                    => '1_2'
+        },
+        {
+            'database_annotation_exists' => 1,
+            'product'                    => 'superantigen-like protein',
+            'end'                        => '4990',
+            'start'                      => '4265',
+            'contig'                     => 'abc|SC|contig000001',
+            'id_name'                    => 'abc_00006'
+        }
+    ],
+    'Product annotation with non standard format'
+);
+done_testing();
diff --git a/t/Bio/Roary/EmblGroups.t b/t/Bio/Roary/EmblGroups.t
new file mode 100644
index 0000000..2d3243a
--- /dev/null
+++ b/t/Bio/Roary/EmblGroups.t
@@ -0,0 +1,47 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+
+BEGIN { unshift( @INC, './lib' ) }
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::Output::EmblGroups');
+}
+
+
+my $annotate_groups = Bio::Roary::AnnotateGroups->new(
+  gff_files   => ['t/data/query_1.gff','t/data/query_2.gff','t/data/query_3.gff'],
+  groups_filename => 't/data/query_groups',
+);
+
+my $analyse_groups = Bio::Roary::AnalyseGroups->new(
+    fasta_files     => ['t/data/query_1.fa','t/data/query_2.fa','t/data/query_3.fa'],
+    groups_filename => 't/data/query_groups'
+);
+
+ok(my $obj = Bio::Roary::Output::EmblGroups->new(
+  output_filename => 'group_statitics.csv',
+  annotate_groups_obj => $annotate_groups,
+  analyse_groups_obj  => $analyse_groups
+), 'initialise embl groups');
+
+is($obj->_get_heat_map_colour(['a','b','c','d'], 4),2,  'heatmap colour');
+is($obj->_get_heat_map_colour(['a','b','c'],     4),16, 'heatmap colour');
+is($obj->_get_heat_map_colour(['a','b'],         4),3,  'heatmap colour');
+is($obj->_get_heat_map_colour(['a'],             4),4,  'heatmap colour');
+
+
+is($obj->_get_heat_map_colour(['a','b','c','d','e','f','g','h','i','j'], 10),2,  'heatmap colour loop over each colour 10');
+is($obj->_get_heat_map_colour(['a','b','c','d','e','f','g','h','i'    ], 10),15, 'heatmap colour loop over each colour 9');
+is($obj->_get_heat_map_colour(['a','b','c','d','e','f','g','h'        ], 10),16, 'heatmap colour loop over each colour 8');
+is($obj->_get_heat_map_colour(['a','b','c','d','e','f','g'            ], 10),10, 'heatmap colour loop over each colour 7');
+is($obj->_get_heat_map_colour(['a','b','c','d','e','f'                ], 10),7,  'heatmap colour loop over each colour 6');
+is($obj->_get_heat_map_colour(['a','b','c','d','e'                    ], 10),3,  'heatmap colour loop over each colour 5');
+is($obj->_get_heat_map_colour(['a','b','c','d'                        ], 10),8,  'heatmap colour loop over each colour 4');
+is($obj->_get_heat_map_colour(['a','b','c'                            ], 10),9,  'heatmap colour loop over each colour 3');
+is($obj->_get_heat_map_colour(['a','b'                                ], 10),5,  'heatmap colour loop over each colour 2');
+is($obj->_get_heat_map_colour(['a'                                    ], 10),4,  'heatmap colour loop over each colour 1 ');
+
+done_testing();
diff --git a/t/Bio/Roary/External/Blastp.t b/t/Bio/Roary/External/Blastp.t
new file mode 100644
index 0000000..826ad4f
--- /dev/null
+++ b/t/Bio/Roary/External/Blastp.t
@@ -0,0 +1,44 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Cwd;
+
+
+BEGIN { unshift( @INC, './lib' ) }
+use Bio::Roary::External::Makeblastdb;
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::External::Blastp');
+}
+
+my $cwd = getcwd();
+my $obj;
+
+ok($obj = Bio::Roary::External::Blastp->new(
+  fasta_file      => 't/data/some_fasta_file.fa',
+  blast_database  => 'some_blast_database',
+  exec            => $cwd.'/t/bin/dummy_blastp',
+),'initialise object');
+
+is($obj->_command_to_run, $cwd.'/t/bin/dummy_blastp -query t/data/some_fasta_file.fa -db some_blast_database -evalue 1e-06 -num_threads 1 -outfmt 6 -max_target_seqs 2000  | awk \'{ if ($3 > 98) print $0;}\' 2> /dev/null 1>  results.out', 'Command constructed as expected');
+ok($obj->run(), 'run dummy command');
+unlink('results.out');
+
+done_testing();
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/t/Bio/Roary/External/Cdhit.t b/t/Bio/Roary/External/Cdhit.t
new file mode 100644
index 0000000..d64af7b
--- /dev/null
+++ b/t/Bio/Roary/External/Cdhit.t
@@ -0,0 +1,39 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Cwd;
+
+BEGIN { unshift( @INC, './lib' ) }
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::External::Cdhit');
+}
+
+my $cwd = getcwd();
+my $obj;
+
+ok($obj = Bio::Roary::External::Cdhit->new(
+  input_file   => 't/data/some_fasta_file.fa',
+  output_base  => 'output',
+  exec         =>  $cwd.'/t/bin/dummy_cd-hit',
+),'initialise object');
+
+is($obj->_command_to_run, $cwd.'/t/bin/dummy_cd-hit -i t/data/some_fasta_file.fa -o output -T 1 -M 1800 -g 1 -s 1 -d 256 -c 1 > /dev/null 2>&1', 'Command constructed as expected');
+ok($obj->run(), 'run dummy command');
+unlink('output');
+unlink('output.clstr');
+unlink('output.bak.clstr');
+
+
+ok($obj = Bio::Roary::External::Cdhit->new(
+  input_file   => 't/data/some_fasta_file.fa',
+  output_base  => 'output',
+  exec         =>  $cwd.'/t/bin/dummy_cd-hit',
+  cpus         => 1000
+),'initialise object with lots of threads');
+is($obj->_command_to_run, $cwd.'/t/bin/dummy_cd-hit -i t/data/some_fasta_file.fa -o output -T 40 -M 1800 -g 1 -s 1 -d 256 -c 1 > /dev/null 2>&1', 'number of threads capped at a lower level');
+
+
+done_testing();
diff --git a/t/Bio/Roary/External/CheckTools.t b/t/Bio/Roary/External/CheckTools.t
new file mode 100644
index 0000000..dddfee4
--- /dev/null
+++ b/t/Bio/Roary/External/CheckTools.t
@@ -0,0 +1,23 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Cwd;
+use Test::Output;
+
+BEGIN { unshift( @INC, './lib' ) }
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::External::CheckTools');
+}
+ok( my $check_tools = Bio::Roary::External::CheckTools->new(), 'initialise checking for tools' );
+for my $tool ( ( 'parallel', 'blastp', 'makeblastdb', 'mcl', 'bedtools', 'prank', 'mafft', 'grep', 'sed', 'awk', ) ) {
+    my $pattern = "Looking for '$tool' - found ";
+    stderr_like { $check_tools->check_tool($tool); } qr/$pattern/, "Check for $tool";
+}
+
+stderr_like { $check_tools->check_all_tools; } qr/Looking for /, "Check for all tools";
+1;
+
+done_testing();
diff --git a/t/Bio/Roary/External/Mafft.t b/t/Bio/Roary/External/Mafft.t
new file mode 100644
index 0000000..10f702d
--- /dev/null
+++ b/t/Bio/Roary/External/Mafft.t
@@ -0,0 +1,43 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Cwd;
+use Test::Files;
+
+BEGIN { unshift( @INC, './lib' ) }
+BEGIN {
+    use Test::Most;
+	use Bio::Roary::SortFasta;
+    use_ok('Bio::Roary::External::Mafft');
+}
+
+ok(
+    my $obj = Bio::Roary::External::Mafft->new(
+        input_filename  => 't/data/mafft_input.fa',
+        output_filename => 't/data/mafft_input.fa.aln',
+        job_runner      => 'Local'
+    ),
+    'initialise mafft obj'
+);
+
+is(
+    $obj->_command_to_run,
+'mafft --auto --quiet t/data/mafft_input.fa > t/data/mafft_input.fa.aln',
+    'Command constructed as expected'
+);
+
+ok( $obj->run(), 'run mafft' );
+
+ok(-e 't/data/mafft_input.fa.aln', 'output file exists');
+my $sort_fasta_after_revtrans = Bio::Roary::SortFasta->new(
+   input_filename      => 't/data/mafft_input.fa.aln',
+   remove_nnn_from_end => 1,
+);
+$sort_fasta_after_revtrans->sort_fasta->replace_input_with_output_file;
+
+compare_ok( 't/data/mafft_input.fa.aln', 't/data/expected_mafft_input.fa.aln', "output for mafft matches" );
+
+unlink('t/data/mafft_input.fa.aln');
+
+done_testing();
diff --git a/t/Bio/Roary/External/Makeblastdb.t b/t/Bio/Roary/External/Makeblastdb.t
new file mode 100644
index 0000000..02dab30
--- /dev/null
+++ b/t/Bio/Roary/External/Makeblastdb.t
@@ -0,0 +1,32 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Cwd;
+
+BEGIN { unshift( @INC, './lib' ) }
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::External::Makeblastdb');
+}
+
+my $cwd = getcwd();
+my $obj;
+
+ok($obj = Bio::Roary::External::Makeblastdb->new(
+  fasta_file      => 't/data/some_fasta_file.fa',
+  exec            => $cwd.'/t/bin/dummy_makeblastdb',
+  mask_data       => 'masking_data_file'
+),'initialise object');
+
+is($obj->_command_to_run, $cwd.'/t/bin/dummy_makeblastdb -in t/data/some_fasta_file.fa -dbtype prot -parse_seqids -out '.$obj->_working_directory->dirname().'/output_contigs -logfile /dev/null', 'Command constructed as expected');
+ok($obj->run(), 'run dummy command');
+
+unlink("output_contigs.phr");
+unlink("output_contigs.pin");
+unlink("output_contigs.psq");
+
+1;
+
+done_testing();
diff --git a/t/Bio/Roary/External/Mcl.t b/t/Bio/Roary/External/Mcl.t
new file mode 100644
index 0000000..a062bcc
--- /dev/null
+++ b/t/Bio/Roary/External/Mcl.t
@@ -0,0 +1,54 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Cwd;
+use Test::Files;
+
+BEGIN { unshift( @INC, './lib' ) }
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::External::Mcl');
+}
+
+my $cwd = getcwd();
+my $obj;
+
+
+ok(
+    $obj = Bio::Roary::External::Mcl->new(
+        blast_results   => 'some_blast_results',
+        mcxdeblast_exec => $cwd . '/t/bin/dummy_mcxdeblast',
+        mcl_exec        => $cwd . '/t/bin/dummy_mcl',
+        output_file     => 'output.groups'
+    ),
+    'initialise object with dummy values'
+);
+
+is(
+    $obj->_command_to_run,
+    $cwd
+      . '/t/bin/dummy_mcxdeblast -m9 --score=r --line-mode=abc some_blast_results 2> /dev/null | '
+      . $cwd
+      . '/t/bin/dummy_mcl - --abc -I 1.5 -o output.groups > /dev/null 2>&1',
+    'Command constructed as expected'
+);
+ok( $obj->run(), 'run dummy command' );
+
+unlink('output.groups');
+
+ok(
+    $obj = Bio::Roary::External::Mcl->new(
+        blast_results => 't/data/blast_results',
+    ),
+    'initialise object with real values'
+);
+ok( $obj->run(), 'run the real command' );
+compare_ok('output_groups', 't/data/expected_output_groups', 'outgroups as expected');
+
+unlink('output_groups');
+
+1;
+
+done_testing();
diff --git a/t/Bio/Roary/External/Prank.t b/t/Bio/Roary/External/Prank.t
new file mode 100755
index 0000000..c09fa35
--- /dev/null
+++ b/t/Bio/Roary/External/Prank.t
@@ -0,0 +1,44 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Cwd;
+use Test::Files;
+use Bio::Roary::SortFasta;
+
+BEGIN { unshift( @INC, './lib' ) }
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::External::Prank');
+}
+
+ok(
+    my $obj = Bio::Roary::External::Prank->new(
+        input_filename  => 't/data/prank_input.fa',
+        output_filename => 't/data/prank_input.fa.aln',
+        job_runner      => 'Local'
+    ),
+    'initialise prank obj'
+);
+
+is(
+    $obj->_command_to_run,
+'prank -d=t/data/prank_input.fa -o=t/data/prank_input.fa.aln -codon -F -quiet -once > /dev/null 2>&1 && mv t/data/prank_input.fa.aln*.fas t/data/prank_input.fa.aln',
+    'Command constructed as expected'
+);
+
+ok( $obj->run(), 'run prank' );
+
+ok(-e 't/data/prank_input.fa.aln', 'output file exists');
+my $sort_fasta_after_revtrans = Bio::Roary::SortFasta->new(
+   input_filename      => 't/data/prank_input.fa.aln',
+   remove_nnn_from_end => 1,
+);
+$sort_fasta_after_revtrans->sort_fasta->replace_input_with_output_file;
+
+compare_ok( 't/data/prank_input.fa.aln', 't/data/expected_prank_input.fa.aln', "output for prank matches" );
+
+unlink('t/data/prank_input.fa.aln');
+
+done_testing();
diff --git a/t/Bio/Roary/ExtractCoreGenesFromSpreadsheet.t b/t/Bio/Roary/ExtractCoreGenesFromSpreadsheet.t
new file mode 100755
index 0000000..92c747d
--- /dev/null
+++ b/t/Bio/Roary/ExtractCoreGenesFromSpreadsheet.t
@@ -0,0 +1,33 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::ExtractCoreGenesFromSpreadsheet');
+}
+
+my $obj;
+
+ok($obj = Bio::Roary::ExtractCoreGenesFromSpreadsheet->new(
+  spreadsheet  => 't/data/core_group_statistics.csv',
+),'initalise obj');
+is_deeply($obj->ordered_core_genes, ['argF','speH','group_5'], 'Correct ordering');
+is_deeply($obj->sample_names_to_genes, {
+          'query_2' => {
+                         '2_3' => 1,
+                         '2_7' => 1,
+                         '2_2' => 1
+                       },
+          'query_1' => {
+                         '1_6' => 1,
+                         '1_3' => 1,
+                         '1_2' => 1
+                       }
+        }, 'Correct of sample names to genes is correct');
+
+done_testing();
diff --git a/t/Bio/Roary/ExtractProteomeFromGFFs.t b/t/Bio/Roary/ExtractProteomeFromGFFs.t
new file mode 100755
index 0000000..e670f1b
--- /dev/null
+++ b/t/Bio/Roary/ExtractProteomeFromGFFs.t
@@ -0,0 +1,82 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use File::Basename;
+use Test::Files;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::ExtractProteomeFromGFFs');
+}
+
+my $plot_groups_obj;
+
+ok(
+    $plot_groups_obj = Bio::Roary::ExtractProteomeFromGFFs->new(
+        gff_files => [ 't/data/example_annotation.gff', 't/data/example_annotation_2.gff' ],
+    ),
+    'initialise object'
+);
+
+my @sorted_fasta_files = map { basename($_) } sort( @{ $plot_groups_obj->fasta_files() } );
+my @sorted_expected_files = sort( ( 'example_annotation.gff.proteome.faa', 'example_annotation_2.gff.proteome.faa' ) );
+
+is_deeply( \@sorted_fasta_files, \@sorted_expected_files, 'one file created' );
+
+compare_ok( $plot_groups_obj->fasta_files->[0] ,
+    't/data/example_annotation.gff.proteome.faa.expected',
+    'content of proteome 1 as expected'
+);
+
+unlink('example_annotation.gff.proteome.faa');
+unlink('example_annotation_2.gff.proteome.faa');
+
+ok(
+    $plot_groups_obj = Bio::Roary::ExtractProteomeFromGFFs->new(
+        gff_files => [ 't/data/genbank_gbff/genbank1.gff', 't/data/genbank_gbff/genbank2.gff', 't/data/genbank_gbff/genbank3.gff' ],
+    ),
+    'initialise object with genbank gff files'
+);
+ at sorted_fasta_files = map { basename($_) } sort( @{ $plot_groups_obj->fasta_files() } );
+ at sorted_expected_files = sort( ( 'genbank1.gff.proteome.faa', 'genbank2.gff.proteome.faa', 'genbank3.gff.proteome.faa' ) );
+
+is_deeply( \@sorted_fasta_files, \@sorted_expected_files, 'GB files created output' );
+
+for my $full_filename ( @{ $plot_groups_obj->fasta_files() } ) {
+    my $base_filename = basename($full_filename);
+    compare_ok($full_filename,
+        't/data/genbank_gbff/' . $base_filename . '.expected',
+        "content of proteome $full_filename as expected"
+    );
+}
+
+unlink('genbank1.gff.proteome.faa');
+unlink('genbank2.gff.proteome.faa');
+unlink('genbank3.gff.proteome.faa');
+
+ok(
+    $plot_groups_obj = Bio::Roary::ExtractProteomeFromGFFs->new(
+        gff_files => [ 't/data/locus_tag_gffs/query_1.gff', 't/data/locus_tag_gffs/query_2.gff', 't/data/locus_tag_gffs/query_3.gff' ],
+    ),
+    'initialise object with locus tag id gff files'
+);
+ at sorted_fasta_files = map { basename($_) } sort( @{ $plot_groups_obj->fasta_files() } );
+ at sorted_expected_files = sort( ( 'query_1.gff.proteome.faa', 'query_2.gff.proteome.faa', 'query_3.gff.proteome.faa' ) );
+
+is_deeply( \@sorted_fasta_files, \@sorted_expected_files, 'locus tag id files created output' );
+
+for my $full_filename ( @{ $plot_groups_obj->fasta_files() } ) {
+    my $base_filename = basename($full_filename);
+    compare_ok($full_filename, 't/data/locus_tag_gffs/' . $base_filename . '.expected' ,
+        "content of proteome $full_filename as expected" );
+}
+
+unlink('query_1.gff.proteome.faa');
+unlink('query_2.gff.proteome.faa');
+unlink('query_3.gff.proteome.faa');
+
+done_testing();
diff --git a/t/Bio/Roary/FilterFullClusters.t b/t/Bio/Roary/FilterFullClusters.t
new file mode 100644
index 0000000..149ca5e
--- /dev/null
+++ b/t/Bio/Roary/FilterFullClusters.t
@@ -0,0 +1,36 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Test::Files;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::FilterFullClusters');
+}
+
+ok(my $filter_clusters = Bio::Roary::FilterFullClusters->new(
+    clusters_filename        => 't/data/clusters_to_inflate',
+    fasta_file           =>  't/data/clusters_input.fa',
+    number_of_input_files => 6,
+    output_file => 'output_filtered.fa',
+    _greater_than_or_equal => 1,
+    cdhit_input_fasta_file => 't/data/clusters_to_inflate_original_input.fa',
+    cdhit_output_fasta_file => 'filtered_original_input.fa',
+    output_groups_file => 'output_groups',
+  ),'initialise object');
+ok($filter_clusters->filter_full_clusters_from_fasta(),'filter the clusters');
+ok($filter_clusters->filter_complete_cluster_from_original_fasta(),'filter original input and save full groups');
+
+compare_ok('output_filtered.fa', 't/data/expected_output_filtered.fa', 'content as expected');
+compare_ok('output_groups', 't/data/expected_output_groups_cdhit', 'content as expected');
+compare_ok('filtered_original_input.fa', 't/data/expected_filtered_original_input.fa', 'content as expected');
+
+unlink('output_groups');
+unlink('filtered_original_input.fa');
+unlink('output_filtered.fa');
+
+done_testing();
diff --git a/t/Bio/Roary/GeneNamesFromGFF.t b/t/Bio/Roary/GeneNamesFromGFF.t
new file mode 100755
index 0000000..8cc5cf2
--- /dev/null
+++ b/t/Bio/Roary/GeneNamesFromGFF.t
@@ -0,0 +1,94 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::GeneNamesFromGFF');
+}
+
+my $obj;
+
+ok(
+    $obj = Bio::Roary::GeneNamesFromGFF->new(
+        gff_file => 't/data/query_1.gff'
+    ),
+    'initialise reading GFF file'
+);
+is_deeply(
+    $obj->ids_to_gene_name,
+    {
+        '1_3'       => 'argF',
+        '1_1'       => 'different',
+        '1_2'       => 'speH',
+        'abc_00016' => 'yfnB',
+        'abc_00008' => 'arcC1'
+    },
+    'ids to gene names as expected'
+);
+
+is_deeply(
+    $obj->ids_to_gene_size,
+    {
+        'abc_00012' => 188,
+        '1_1'       => 959,
+        'abc_00004' => 716,
+        'abc_00006' => 725,
+        'abc_00008' => 935,
+        '1_6'       => 134,
+        'abc_00014' => 134,
+        'abc_01705' => 1556,
+        'abc_00013' => 75,
+        'abc_00010' => 227,
+        '1_2'       => 725,
+        'abc_00011' => 947,
+        'abc_00016' => 686,
+        '1_3'       => 1001,
+        'abc_00002' => 146,
+        'abc_00003' => 197
+    },
+    'ids to gene lengths as expected'
+);
+
+ok(
+    $obj = Bio::Roary::GeneNamesFromGFF->new(
+        gff_file => 't/data/query_2.gff'
+    ),
+    'initialise reading another GFF file'
+);
+is_deeply(
+    $obj->ids_to_gene_name,
+    {
+        '2_3'       => 'argF',
+        '2_1'       => 'hly',
+        '2_2'       => 'speH',
+        'abc_00016' => 'yfnB',
+        'abc_00008' => 'arcC1'
+    },
+    'ids to gene names as expected again'
+);
+
+ok(
+    $obj = Bio::Roary::GeneNamesFromGFF->new(
+        gff_file => 't/data/locus_tag_gffs/query_1.gff'
+    ),
+    'initialise a GFF file with locus tags only'
+);
+
+is_deeply(
+    $obj->ids_to_gene_name,
+    {
+        'abc_00005' => 'speH',
+        'abc_00007' => 'argF',
+        'abc_00001' => 'different',
+        'abc_00016' => 'yfnB',
+        'abc_00008' => 'arcC1'
+    },
+    'ids to gene names with GFF file with locus tags only'
+);
+
+done_testing();
diff --git a/t/Bio/Roary/GroupLabels.t b/t/Bio/Roary/GroupLabels.t
new file mode 100644
index 0000000..d3fc2f0
--- /dev/null
+++ b/t/Bio/Roary/GroupLabels.t
@@ -0,0 +1,25 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Test::Files;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::GroupLabels');
+}
+
+ok(
+    my $obj = Bio::Roary::GroupLabels->new(
+        groups_filename => 't/data/example_groups_without_labels'
+    ),
+    'initialise with a groups file'
+);
+ok($obj->add_labels, 'Add labels to groups');
+compare_ok($obj->output_filename, 't/data/expected_group_labels', 'groups labeled as expected');
+unlink('labelled_groups_file');
+
+done_testing();
diff --git a/t/Bio/Roary/GroupStatistics.t b/t/Bio/Roary/GroupStatistics.t
new file mode 100755
index 0000000..9ea58be
--- /dev/null
+++ b/t/Bio/Roary/GroupStatistics.t
@@ -0,0 +1,79 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Test::Files;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::GroupStatistics');
+}
+
+my $annotate_groups = Bio::Roary::AnnotateGroups->new(
+  gff_files   => ['t/data/query_1.gff','t/data/query_2.gff','t/data/query_3.gff'],
+  groups_filename => 't/data/query_groups',
+);
+
+my $analyse_groups = Bio::Roary::AnalyseGroups->new(
+    fasta_files     => ['t/data/query_1.fa','t/data/query_2.fa','t/data/query_3.fa'],
+    groups_filename => 't/data/query_groups'
+);
+
+my $obj;
+
+ok($obj = Bio::Roary::GroupStatistics->new(
+  annotate_groups_obj => $annotate_groups,
+  analyse_groups_obj  => $analyse_groups 
+),'Initialise group statistics object');
+ok($obj->create_spreadsheet,'Create the CSV file');
+ok(-e 'gene_presence_absence.csv', 'CSV file exists');
+compare_ok('gene_presence_absence.csv','t/data/expected_group_statitics.csv', 'Spreadsheet content as expected');
+ok($obj->create_rtab,'Create the Rtab file');
+ok(-e 'gene_presence_absence.Rtab', 'Rtab file exists');
+compare_ok('gene_presence_absence.Rtab','t/data/expected_gene_presence_and_absence.Rtab', 'Rtab matrix content as expected');
+
+unlink('gene_presence_absence.csv');
+unlink('gene_presence_absence.Rtab');
+
+############################
+
+my $annotate_groups_2 = Bio::Roary::AnnotateGroups->new(
+  gff_files   => ['t/data/query_1.gff','t/data/query_2.gff','t/data/query_3.gff','t/data/query_4_missing_genes.gff'],
+  groups_filename => 't/data/query_groups_missing_genes',
+);
+
+my $analyse_groups_2 = Bio::Roary::AnalyseGroups->new(
+    fasta_files     => ['t/data/query_1.fa','t/data/query_2.fa','t/data/query_3.fa','t/data/query_4_missing_genes.fa'],
+    groups_filename => 't/data/query_groups_missing_genes'
+);
+
+ok($obj = Bio::Roary::GroupStatistics->new(
+  annotate_groups_obj => $annotate_groups_2,
+  analyse_groups_obj  => $analyse_groups_2,
+  output_filename     => 'missing_genes_stats.csv' 
+),'Initialise group statistics object where one isolate has only 1 gene');
+ok($obj->create_spreadsheet,'Create the CSV file');
+ok(-e 'missing_genes_stats.csv', 'CSV file exists');
+compare_ok('missing_genes_stats.csv','t/data/expected_group_statitics_missing_genes.csv', 'Spreadsheet content as expected with missing genes');
+
+unlink('missing_genes_stats.csv');
+
+
+## TEST VERBOSE STATS ##
+
+ok($obj = Bio::Roary::GroupStatistics->new(
+  annotate_groups_obj => $annotate_groups,
+  analyse_groups_obj  => $analyse_groups,
+  _verbose            => 1,
+  output_filename     => 'verbose_stats.csv'
+),'Initialise group statistics object');
+ok($obj->create_spreadsheet,'Create the CSV file');
+ok(-e 'verbose_stats.csv', 'CSV file exists');
+compare_ok('verbose_stats.csv','t/data/expected_group_statitics_verbose.csv', 'Verbose spreadsheet content as expected');
+
+unlink('verbose_stats.csv');
+
+done_testing();
diff --git a/t/Bio/Roary/InflateClusters.t b/t/Bio/Roary/InflateClusters.t
new file mode 100644
index 0000000..0a2826d
--- /dev/null
+++ b/t/Bio/Roary/InflateClusters.t
@@ -0,0 +1,40 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Test::Files;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::InflateClusters');
+}
+
+my $obj;
+
+
+ok( $obj = Bio::Roary::InflateClusters->new(
+  clusters_filename  => 't/data/clustersfile',
+  mcl_filename       => 't/data/mcl_file',
+  output_file        => 'example.output'
+),'initialise object');
+ok($obj->inflate,'inflate the results');
+
+compare_ok('example.output','t/data/expected_inflated_results', 'inflated results as expected');
+unlink('example.output');
+
+
+ok( $obj = Bio::Roary::InflateClusters->new(
+  clusters_filename  => 't/data/clusters_to_inflate',
+  mcl_filename       => 't/data/clusters_to_inflate.mcl',
+  output_file        => 'example.output'
+),'initialise object');
+ok($obj->inflate,'inflate the results');
+
+compare_ok('example.output','t/data/expected_clusters_to_inflate', 'inflated results as expected');
+unlink('example.output');
+
+done_testing();
+
diff --git a/t/Bio/Roary/OrderGenes.t b/t/Bio/Roary/OrderGenes.t
new file mode 100755
index 0000000..fb780fe
--- /dev/null
+++ b/t/Bio/Roary/OrderGenes.t
@@ -0,0 +1,148 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use File::Slurper 'read_text';
+use Test::Files;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::OrderGenes');
+    use Bio::Roary::AnalyseGroups;
+}
+
+my $no_accessory_100 = order_genes_obj( 't/data/accessory_graphs/no_accessory', 1 );
+my $no_accessory_50  = order_genes_obj( 't/data/accessory_graphs/no_accessory', 0.5 );
+
+my $one_bubble_100 = order_genes_obj( 't/data/accessory_graphs/one_bubble', 1 );
+my $one_bubble_50  = order_genes_obj( 't/data/accessory_graphs/one_bubble', 0.5 );
+
+my $one_branch_100 = order_genes_obj( 't/data/accessory_graphs/one_branch', 1 );
+my $one_branch_50  = order_genes_obj( 't/data/accessory_graphs/one_branch', 0.5 );
+
+my $two_graphs_100 = order_genes_obj( 't/data/accessory_graphs/two_graphs', 1 );
+my $two_graphs_50  = order_genes_obj( 't/data/accessory_graphs/two_graphs', 0.5 );
+
+my $single_gene_100 = order_genes_obj( 't/data/accessory_graphs/single_gene_contig', 1 );
+my $single_gene_50  = order_genes_obj( 't/data/accessory_graphs/single_gene_contig', 0.5 );
+
+my $core_deletion_100 = order_genes_obj( 't/data/accessory_graphs/core_deletion', 1 );
+my $core_deletion_50  = order_genes_obj( 't/data/accessory_graphs/core_deletion', 0.5 );
+
+my $core_island_100 = order_genes_obj( 't/data/accessory_graphs/core_island', 1 );
+my $core_island_50  = order_genes_obj( 't/data/accessory_graphs/core_island', 0.5 );
+
+cleanup();
+my $analyse_groups = Bio::Roary::AnalyseGroups->new(
+    fasta_files     => [ 't/data/accessory_graphs/file_1.fa', 't/data/accessory_graphs/file_2.fa', 't/data/accessory_graphs/file_3.fa' ],
+    groups_filename => 't/data/accessory_graphs/core_island'
+);
+
+ok(
+    my $obj = Bio::Roary::OrderGenes->new(
+        analyse_groups_obj => $analyse_groups,
+        gff_files => [ 't/data/accessory_graphs/file_1.gff', 't/data/accessory_graphs/file_2.gff', 't/data/accessory_graphs/file_3.gff' ],
+        core_definition => 1,
+        sample_weights  => { 'file_1' => 0.5, 'file_2' => 1, 'file_3' => 0.1 }
+    ),
+    "Initialise order genes object for sample weights"
+);
+ok( $obj->groups_to_contigs,       'build the graph for sample weights' );
+ok( -e 'core_accessory_graph.dot', 'core accessory graph created for sample weights' );
+ok( -e 'accessory_graph.dot',      'accessory graph created for sample weights' );
+
+my $actual_graph = read_text('accessory_graph.dot');
+$actual_graph =~ s/group_[\w]/group_X/gi;
+is_deeply( $actual_graph, read_text('t/data/expected_sample_weights_accessory_graph.dot'), 'graph weights changed' );
+
+# Check how the final graphs get reordered.
+
+$obj = Bio::Roary::OrderGenes->new(
+    analyse_groups_obj => $analyse_groups,
+    gff_files       => [ 't/data/accessory_graphs/file_1.gff', 't/data/accessory_graphs/file_2.gff', 't/data/accessory_graphs/file_3.gff' ],
+    core_definition => 1,
+    sample_weights      => { 'file_1' => 0.5,  'file_2' => 1,    'file_3' => 0.1 },
+    samples_to_clusters => { 's1'     => 'c1', 's2'     => 'c1', 's3'     => 'c2', 's4' => 'c2' },
+);
+
+my @paths_and_weights = (
+    {
+        path           => [ 'g1', 'g2' ],
+        average_weight => 3,
+        sample_names   => [ 's1', 's2' ]
+    },
+    {
+        path           => [ 'g5', 'g6' ],
+        average_weight => 2,
+        sample_names   => [ 's3', 's4' ]
+    },
+    {
+        path           => [ 'g3', 'g4' ],
+        average_weight => 1,
+        sample_names   => [ 's1', 's2' ]
+    }
+);
+my @expected_path_order = ( [ 'g1', 'g2' ], [ 'g3', 'g4' ], [ 'g5', 'g6' ] );
+is_deeply( $obj->_order_by_samples_and_weights( \@paths_and_weights ), \@expected_path_order, 'graphs reordered as expected' );
+
+cleanup();
+done_testing();
+
+sub order_genes_obj {
+    my ( $groups_filename, $core_definition ) = @_;
+
+    cleanup();
+    my $analyse_groups = Bio::Roary::AnalyseGroups->new(
+        fasta_files => [ 't/data/accessory_graphs/file_1.fa', 't/data/accessory_graphs/file_2.fa', 't/data/accessory_graphs/file_3.fa' ],
+        groups_filename => $groups_filename
+    );
+
+    ok(
+        my $obj = Bio::Roary::OrderGenes->new(
+            analyse_groups_obj => $analyse_groups,
+            gff_files =>
+              [ 't/data/accessory_graphs/file_1.gff', 't/data/accessory_graphs/file_2.gff', 't/data/accessory_graphs/file_3.gff' ],
+            core_definition => $core_definition
+        ),
+        "Initialise order genes object for $groups_filename"
+    );
+
+    ok( $obj->groups_to_contigs, 'build the graph' );
+    check_all_groups_in_output_graph( $groups_filename, $obj->groups_to_contigs, $core_definition );
+    ok( -e 'core_accessory_graph.dot', 'core accessory graph created' );
+    ok( -e 'accessory_graph.dot',      'accessory graph created' );
+
+    return $obj;
+}
+
+sub check_all_groups_in_output_graph {
+    my ( $groups_filename, $groups_to_contigs, $core_definition ) = @_;
+
+    open( my $groups_in, $groups_filename );
+    while (<$groups_in>) {
+        chomp;
+        my $line = $_;
+        next if ( $line eq '' );
+        my ( $group, $attributes ) = split( ':', $line );
+        ok( ( $groups_to_contigs->{$group} ), "group $group found in file $groups_filename" );
+
+        # Check to see if the accessory groups are tagged properly
+        $attributes =~ s/ //gi;
+        my @sequence_ids = split( /\t/, $attributes );
+        if ( @sequence_ids >= 3 * $core_definition ) {
+            ok( !defined( $groups_to_contigs->{$group}->{accessory_label} ), "group $group is core so shouldnt have any accessory labels" );
+        }
+        else {
+            ok( defined( $groups_to_contigs->{$group}->{accessory_label} ), "group $group is accessory so should have accessory label" );
+        }
+    }
+}
+
+sub cleanup {
+    unlink('core_accessory_graph.dot');
+    unlink('accessory_graph.dot');
+}
+
diff --git a/t/Bio/Roary/Output/CoreGeneAlignmentCoorindatesEMBL.t b/t/Bio/Roary/Output/CoreGeneAlignmentCoorindatesEMBL.t
new file mode 100644
index 0000000..43cde31
--- /dev/null
+++ b/t/Bio/Roary/Output/CoreGeneAlignmentCoorindatesEMBL.t
@@ -0,0 +1,42 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Test::Files;
+
+BEGIN { unshift( @INC, './lib' ) }
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::Output::CoreGeneAlignmentCoordinatesEMBL');
+}
+
+ok(
+    my $core_gene_obj = Bio::Roary::Output::CoreGeneAlignmentCoordinatesEMBL->new(
+        multifasta_files => [
+            't/data/multifasta_files/1.fa.aln', 't/data/multifasta_files/outof_order.fa.aln',
+            't/data/multifasta_files/2.fa.aln', 't/data/multifasta_files/3.fa.aln'
+        ],
+        gene_lengths => {
+            't/data/multifasta_files/1.fa.aln'           => 1,
+            't/data/multifasta_files/outof_order.fa.aln' => 10,
+            't/data/multifasta_files/2.fa.aln'           => 100,
+            't/data/multifasta_files/3.fa.aln'           => 1000
+        },
+				output_filename => 'output_name.embl'
+    ),
+    'initialise core gene obj'
+);
+
+is('efg',$core_gene_obj->_gene_name_from_filename('t/abc/efg.fa.aln'), 'Get gene name with directory');
+is('efg',$core_gene_obj->_gene_name_from_filename('efg.fa.aln'), 'Get gene name with no directory');
+is('efg',$core_gene_obj->_gene_name_from_filename('efg'), 'Get gene name where theres no extension');
+is('efg',$core_gene_obj->_gene_name_from_filename('efg.fa'), 'Get gene name with partial extension');
+
+ok($core_gene_obj->create_file,'create the embl header file');
+compare_ok('output_name.embl', 't/data/multifasta_files/expected_output.embl', 'content of embl file as expected');
+
+is(1112,$core_gene_obj->_current_coordinate,'next coordinate');
+unlink('output_name.embl');
+
+done_testing();
diff --git a/t/Bio/Roary/Output/DifferenceBetweenSets.t b/t/Bio/Roary/Output/DifferenceBetweenSets.t
new file mode 100644
index 0000000..0b89164
--- /dev/null
+++ b/t/Bio/Roary/Output/DifferenceBetweenSets.t
@@ -0,0 +1,38 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Moose;
+BEGIN { unshift( @INC, './t/lib' ) }
+with 'TestHelper';
+
+BEGIN { unshift( @INC, './lib' ) }
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::Output::DifferenceBetweenSets');
+}
+
+my $plot_groups_obj = Bio::Roary::AnalyseGroups->new(
+    fasta_files     => [ 't/data/query_1.fa', 't/data/query_2.fa','t/data/query_3.fa' ],
+    groups_filename => 't/data/query_groups'
+);
+
+ok(my $obj = Bio::Roary::Output::DifferenceBetweenSets->new(
+    analyse_groups  => $plot_groups_obj,
+    input_filenames_sets => [ ['t/data/query_1.fa'], ['t/data/query_2.fa','t/data/query_3.fa'] ]
+  ),'initialise set difference obj');
+  
+ok($obj->groups_set_one_unique,'create set one unique');
+ok($obj->groups_set_two_unique,'create set two unique');
+ok($obj->groups_in_common,'create common set unique');
+
+compare_files('set_difference_unique_set_one','t/data/expected_set_difference_unique_set_one','set one file content as expected');
+compare_files('set_difference_unique_set_two','t/data/expected_set_difference_unique_set_two','set two file content as expected');
+compare_files('set_difference_common_set','t/data/expected_set_difference_common_set','common set file content as expected');
+
+unlink('set_difference_unique_set_one');
+unlink('set_difference_unique_set_two');
+unlink('set_difference_common_set');
+
+done_testing();
diff --git a/t/Bio/Roary/Output/GroupsMultifastaProtein.t b/t/Bio/Roary/Output/GroupsMultifastaProtein.t
new file mode 100644
index 0000000..0042990
--- /dev/null
+++ b/t/Bio/Roary/Output/GroupsMultifastaProtein.t
@@ -0,0 +1,26 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Test::Files;
+
+BEGIN { unshift( @INC, './lib' ) }
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::Output::GroupsMultifastaProtein');
+}
+
+ok(
+    my $obj = Bio::Roary::Output::GroupsMultifastaProtein->new(
+        nucleotide_fasta_file    => 't/data/nuc_multifasta.fa',
+    ),
+    'initialise creating the nuc fasta obj'
+);
+ok($obj->convert_nucleotide_to_protein(),'perform the conversion');
+
+compare_ok('t/data/nuc_multifasta.faa', 't/data/expected_nuc_multifasta.faa', 'File content as expected');
+
+unlink('t/data/nuc_multifasta.faa');
+
+done_testing();
diff --git a/t/Bio/Roary/Output/GroupsMultifastas.t b/t/Bio/Roary/Output/GroupsMultifastas.t
new file mode 100755
index 0000000..ed3549c
--- /dev/null
+++ b/t/Bio/Roary/Output/GroupsMultifastas.t
@@ -0,0 +1,45 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Test::Files;
+
+BEGIN { unshift( @INC, './lib' ) }
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::Output::GroupsMultifastas');
+}
+
+my $plot_groups_obj = Bio::Roary::AnalyseGroups->new(
+    fasta_files     => [ 't/data/example_1.faa', 't/data/example_2.faa' ],
+    groups_filename => 't/data/example_groups'
+);
+
+ok(
+    my $obj = Bio::Roary::Output::GroupsMultifastas->new(
+        group_names    => [ 'group_2', 'group_5' ],
+        analyse_groups => $plot_groups_obj
+    ),
+    'initialise creating multiple fastas'
+);
+
+ok( $obj->create_files(), 'Create multiple fasta files' );
+
+# Check that the files have been created
+ok( -e $obj->output_filename_base . '_group_2.fa', $obj->output_filename_base . '_group_2.fa'.' group created' );
+ok( -e $obj->output_filename_base . '_group_5.fa', $obj->output_filename_base . '_group_2.fa'.' group created' );
+
+compare_ok( $obj->output_filename_base . '_group_2.fa' ,
+    't/data/expected_output_groups_group_2_multi.fa',
+    'group 2 contect as expected'
+);
+compare_ok( $obj->output_filename_base . '_group_5.fa' ,
+    't/data/expected_output_groups_group_5_multi.fa',
+    'group 5 contect as expected'
+);
+
+unlink( $obj->output_filename_base . '_group_2.fa' );
+unlink( $obj->output_filename_base . '_group_5.fa' );
+
+done_testing();
diff --git a/t/Bio/Roary/Output/GroupsMultifastasNucleotide.t b/t/Bio/Roary/Output/GroupsMultifastasNucleotide.t
new file mode 100644
index 0000000..2665b8e
--- /dev/null
+++ b/t/Bio/Roary/Output/GroupsMultifastasNucleotide.t
@@ -0,0 +1,91 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use File::Path qw( remove_tree);
+use Data::Dumper;
+use Test::Files;
+
+BEGIN { unshift( @INC, './lib' ) }
+
+BEGIN {
+    use Test::Most;
+    use Test::Output;
+    use_ok('Bio::Roary::Output::GroupsMultifastasNucleotide');
+    use Bio::Roary::AnnotateGroups;
+    use Bio::Roary::AnalyseGroups;
+    
+}
+
+cleanup_files();
+my $gff_files = [ 't/data/query_1.gff', 't/data/query_2.gff','t/data/query_3.gff' ];
+
+my $obj;
+
+my $annotate_groups = Bio::Roary::AnnotateGroups->new(
+  gff_files       => $gff_files,
+  groups_filename => 't/data/query_groups_reference',
+);
+
+ok($annotate_groups->reannotate);
+
+ok(
+    $obj = Bio::Roary::Output::GroupsMultifastasNucleotide->new(
+        group_names     => [ 'group_2', 'group_5' ],
+        gff_files       => $gff_files,
+        annotate_groups => $annotate_groups,
+		dont_delete_files => 1,
+    ),
+    'initialise creating multiple fastas where you dont delete non core files'
+);
+ok( $obj->create_files(), 'Create multiple fasta files where you dont delete non core files' );
+
+compare_ok('pan_genome_sequences/hly.fa', 't/data/pan_genome_sequences/hly.fa', 'Check multifasta content is correct for 3-hly.fa');
+compare_ok('pan_genome_sequences/speH.fa','t/data/pan_genome_sequences/speH.fa','Check multifasta content is correct for 2-speH.fa');
+compare_ok('pan_genome_sequences/argF.fa','t/data/pan_genome_sequences/argF.fa','Check multifasta content is correct for 2-argF.fa');
+ok(-e 'pan_genome_reference.fa','pan genome reference file created');
+compare_ok('pan_genome_reference.fa', 't/data/expected_g2_g5_pan_genome_reference.fa', 'pan genome reference as expected');
+
+cleanup_files();
+
+
+ok(
+    $obj = Bio::Roary::Output::GroupsMultifastasNucleotide->new(
+        group_names     => [ 'group_2', 'group_5' ],
+        gff_files       => $gff_files,
+        annotate_groups => $annotate_groups,
+		dont_delete_files => 0,
+    ),
+    'initialise creating multiple fastas where you delete non core files'
+);
+ok( $obj->create_files(), 'Create multiple fasta files where you delete non core files' );
+
+compare_ok('pan_genome_sequences/hly.fa', 't/data/pan_genome_sequences/hly.fa' , 'Check multifasta content is correct for 3-hly.fa ');
+ok(! -e 'pan_genome_sequences/speH.fa', 'Check 2-speH.fa doesnt exist since its non core');
+ok(! -e 'pan_genome_sequences/argF.fa', 'Check 2-argF.fa doesnt exist since its non core');
+cleanup_files();
+
+
+
+# test group number limit
+ok(
+    $obj = Bio::Roary::Output::GroupsMultifastasNucleotide->new(
+        group_names     => [ 'group_2', 'group_5' ],
+        gff_files       => $gff_files,
+        annotate_groups => $annotate_groups,
+        group_limit    => 4
+    ),
+    'initialise creating multiple fastas'
+);
+my $exp_stderr = "Number of clusters (8) exceeds limit (4). Multifastas not created. Please check the spreadsheet for contamination from different species or increase the --group_limit parameter.\n";
+stderr_is { $obj->create_files() } $exp_stderr, 'multifasta creation fails when group limit exceeded';
+
+cleanup_files();
+
+done_testing();
+
+
+sub cleanup_files {
+    remove_tree('pan_genome_sequences');
+    unlink('reannotated_groups_file');
+    unlink('pan_genome_reference.fa');
+}
\ No newline at end of file
diff --git a/t/Bio/Roary/Output/NumberOfGroups.t b/t/Bio/Roary/Output/NumberOfGroups.t
new file mode 100644
index 0000000..549a620
--- /dev/null
+++ b/t/Bio/Roary/Output/NumberOfGroups.t
@@ -0,0 +1,71 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Test::Files;
+
+BEGIN { unshift( @INC, './lib' ) }
+
+BEGIN {
+    use Test::Most;
+    use Bio::Roary::AnnotateGroups;
+    use Bio::Roary::AnalyseGroups;
+    use Bio::Roary::GroupStatistics;
+    use_ok('Bio::Roary::Output::NumberOfGroups');
+}
+
+my $annotate_groups = Bio::Roary::AnnotateGroups->new(
+  gff_files   => ['t/data/query_1.gff','t/data/query_2.gff','t/data/query_3.gff'],
+  groups_filename => 't/data/query_groups',
+);
+
+my $analyse_groups = Bio::Roary::AnalyseGroups->new(
+    fasta_files     => ['t/data/query_1.fa','t/data/query_2.fa','t/data/query_3.fa'],
+    groups_filename => 't/data/query_groups'
+);
+
+my $group_statistics = Bio::Roary::GroupStatistics->new(
+  annotate_groups_obj => $annotate_groups,
+  analyse_groups_obj  => $analyse_groups 
+);
+
+ok(my $obj = Bio::Roary::Output::NumberOfGroups->new(
+  group_statistics_obj => $group_statistics,
+  annotate_groups_obj      => $annotate_groups
+  ),'initialise object');
+
+ok($obj->create_output_files, 'create the raw output file');
+
+ok(-e 'number_of_conserved_genes.Rtab', 'check raw output file created');
+compare_ok('t/data/expected_number_of_conserved_genes.tab', 'number_of_conserved_genes.Rtab', 'Content of total groups tab file as expected');
+unlink('number_of_conserved_genes.Rtab');
+
+ok(-e 'number_of_new_genes.Rtab', 'check raw output file created');
+compare_ok('t/data/expected_number_of_new_genes.tab', 'number_of_new_genes.Rtab', '');
+unlink('number_of_new_genes.Rtab');
+
+ok(-e 'number_of_genes_in_pan_genome.Rtab', 'check raw output file created');
+compare_ok('t/data/expected_number_of_genes_in_pan_genome.tab', 'number_of_genes_in_pan_genome.Rtab', 'Content of total groups tab file as expected');
+unlink('number_of_genes_in_pan_genome.Rtab');
+
+ok(-e 'number_of_unique_genes.Rtab', 'check raw output file created');
+compare_ok('t/data/expected_number_of_unique_genes.tab', 'number_of_unique_genes.Rtab', 'Content of unique groups tab file as expected');
+unlink('number_of_unique_genes.Rtab');
+
+
+# Vary the core
+ok($obj = Bio::Roary::Output::NumberOfGroups->new(
+  group_statistics_obj => $group_statistics,
+  annotate_groups_obj      => $annotate_groups,
+  core_definition => 0.6
+  ),"initialise object with 60 percent core definition");
+ok($obj->create_output_files, 'create the raw output files for 60 percent core def');
+compare_ok('t/data/expected_number_of_conserved_genes_0.6.tab','number_of_conserved_genes.Rtab', 'Content of conserved genes with 60 percent core def');
+
+unlink('number_of_conserved_genes.Rtab');
+unlink('number_of_new_genes.Rtab');
+unlink('number_of_genes_in_pan_genome.Rtab');
+unlink('number_of_unique_genes.Rtab');
+unlink('group_statitics.csv');
+
+done_testing();
diff --git a/t/Bio/Roary/Output/QueryGroups.t b/t/Bio/Roary/Output/QueryGroups.t
new file mode 100755
index 0000000..dbc78e9
--- /dev/null
+++ b/t/Bio/Roary/Output/QueryGroups.t
@@ -0,0 +1,73 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Moose;
+BEGIN { unshift( @INC, './t/lib' ) }
+with 'TestHelper';
+
+BEGIN { unshift( @INC, './lib' ) }
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::Output::QueryGroups');
+}
+
+my $plot_groups_obj = Bio::Roary::AnalyseGroups->new(
+    fasta_files     => [ 't/data/query_1.fa', 't/data/query_2.fa','t/data/query_3.fa' ],
+    groups_filename => 't/data/query_groups'
+);
+
+my $obj;
+ok($obj = Bio::Roary::Output::QueryGroups->new(
+    analyse_groups  => $plot_groups_obj,
+    input_filenames => [ 't/data/query_1.fa', 't/data/query_2.fa','t/data/query_3.fa' ]
+  ),'initialise groups query object');
+  
+ok($obj->groups_union(), 'create the union file');
+ok($obj->groups_intersection(), 'create the intersection file');
+ok($obj->groups_complement(), 'create the complement file');
+
+compare_files('union_of_groups.gg','t/data/expected_union_of_groups.gg', 'contents of the union groups as expected');
+compare_files('intersection_of_groups.gg', 't/data/expected_intersection_of_groups.gg', 'contents of the intersection groups as expected');
+compare_files('complement_of_groups.gg', 't/data/expected_complement_of_groups.gg', 'contents of the complement groups as expected');
+
+######################################
+# test varying core definition
+ok($obj = Bio::Roary::Output::QueryGroups->new(
+    analyse_groups  => $plot_groups_obj,
+    input_filenames => [ 't/data/query_1.fa', 't/data/query_2.fa','t/data/query_3.fa' ],
+    core_definition => 0.66
+  ),'initialise groups query object');
+  
+ok($obj->groups_intersection(), 'create the intersection file');
+ok($obj->groups_complement(), 'create the complement file');
+
+compare_files('intersection_of_groups.gg', 't/data/expected_intersection_of_groups_core0.66.gg', 'contents of the intersection groups as expected');
+compare_files('complement_of_groups.gg', 't/data/expected_complement_of_groups_core0.66.gg', 'contents of the complement groups as expected');
+
+
+unlink('union_of_groups.gg');
+unlink('intersection_of_groups.gg');
+unlink('complement_of_groups.gg');
+
+######################################
+
+$plot_groups_obj = Bio::Roary::AnalyseGroups->new(
+    fasta_files     => [ 't/data/query_1.fa', 't/data/query_2.fa','t/data/query_3.fa' ],
+    groups_filename => 't/data/query_groups_paralogs'
+);
+
+ok($obj = Bio::Roary::Output::QueryGroups->new(
+    analyse_groups  => $plot_groups_obj,
+    input_filenames => [ 't/data/query_1.fa', 't/data/query_2.fa','t/data/query_3.fa' ]
+  ),'initialise groups query object with paralogs');
+  
+ok($obj->groups_intersection(), 'create the intersection file');
+
+compare_files('intersection_of_groups.gg', 't/data/expected_intersection_of_groups_paralogs.gg', 'contents of the intersection groups with paralogs as expected');
+unlink('intersection_of_groups.gg');
+
+
+done_testing();
+
diff --git a/t/Bio/Roary/ParallelAllAgainstAllBlast.t b/t/Bio/Roary/ParallelAllAgainstAllBlast.t
new file mode 100644
index 0000000..7ffb0ca
--- /dev/null
+++ b/t/Bio/Roary/ParallelAllAgainstAllBlast.t
@@ -0,0 +1,30 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Cwd;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::ParallelAllAgainstAllBlast');
+}
+my $obj;
+my $cwd = getcwd();
+
+ok($obj = Bio::Roary::ParallelAllAgainstAllBlast->new(
+  fasta_file       => 't/data/example_1.faa',
+  blastp_exec      => $cwd.'/t/bin/dummy_blastp',
+  makeblastdb_exec => $cwd.'/t/bin/dummy_makeblastdb',
+),'initialise obj with mocked external applications');
+ok($obj->run(),'Run locally');
+ok(-e $obj->_working_directory_name.'/blast_results', 'Combined blast results');
+
+unlink('output_contigs.phr');
+unlink('output_contigs.pin');
+unlink('output_contigs.psq');
+unlink('results.out');
+
+done_testing();
diff --git a/t/Bio/Roary/PrepareInputFiles.t b/t/Bio/Roary/PrepareInputFiles.t
new file mode 100644
index 0000000..8bccdae
--- /dev/null
+++ b/t/Bio/Roary/PrepareInputFiles.t
@@ -0,0 +1,56 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use File::Basename;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::PrepareInputFiles');
+}
+
+my $obj;
+
+ok(
+    $obj = Bio::Roary::PrepareInputFiles->new(
+        input_files => [
+            't/data/example_annotation.gff',   't/data/example_1.faa',
+            't/data/example_annotation_2.gff', 't/data/example_2.faa','t/data/sequences_with_unknowns.faa'
+        ],
+    ),
+    'initalise'
+);
+
+my @sorted_fasta_files = sort map { basename($_) } sort @{$obj->fasta_files};
+my @expected_fasta_files = sort((
+            'example_1.faa.tmp.filtered.fa',
+            'example_2.faa.tmp.filtered.fa',
+            'example_annotation.gff.proteome.faa',
+            'example_annotation_2.gff.proteome.faa',
+            'sequences_with_unknowns.faa.tmp.filtered.fa'
+));
+
+is_deeply(
+    \@sorted_fasta_files,
+    \@expected_fasta_files,
+    'proteome extracted from gff files, input fasta files filtered'
+);
+
+my @input_files_lookup = sort map { basename($_) } @{$obj->lookup_fasta_files_from_unknown_input_files( [ 't/data/example_annotation_2.gff', 't/data/example_1.faa' ] )};
+is_deeply(
+    \@input_files_lookup,
+    ['example_1.faa.tmp.filtered.fa','example_annotation_2.gff.proteome.faa'],
+    'previously created faa file looked up from gff filename'
+);
+
+unlink('example_annotation.gff.proteome.faa');
+unlink('example_annotation_2.gff.proteome.faa');
+unlink('sequences_with_unknowns.faa.tmp.filtered.fa');
+unlink('example_1.faa.tmp.filtered.fa');
+unlink('example_2.faa.tmp.filtered.fa');
+
+done_testing();
+
diff --git a/t/Bio/Roary/PresenceAbsenceMatrix.t b/t/Bio/Roary/PresenceAbsenceMatrix.t
new file mode 100644
index 0000000..022656b
--- /dev/null
+++ b/t/Bio/Roary/PresenceAbsenceMatrix.t
@@ -0,0 +1,97 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Test::Files;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::PresenceAbsenceMatrix');
+    use Bio::Roary::AnnotateGroups;
+}
+my $obj;
+my $annotate_groups = Bio::Roary::AnnotateGroups->new(
+    gff_files       => [ 't/data/query_1.gff', 't/data/query_2.gff', 't/data/query_3.gff' ],
+    groups_filename => 't/data/query_groups',
+);
+
+my $sorted_file_names = [ 't/data/query_1.fa', 't/data/query_2.fa', 't/data/query_3.fa' ];
+my $groups_to_files = {
+    'group_3' => {
+        't/data/query_1.fa' => [ '1_3' ],
+        't/data/query_3.fa' => [ '3_3' ]
+    },
+    'group_5' => {
+        't/data/query_3.fa' => [ '3_5' ]
+    },
+    'group_7' => {
+        't/data/query_2.fa' => [ '2_7' ]
+    },
+    'group_4' => {
+        't/data/query_3.fa' => [ '3_4' ],
+        't/data/query_2.fa' => [ '2_4' ]
+    },
+    'group_1' => {
+        't/data/query_1.fa' => [ '1_1' ],
+        't/data/query_3.fa' => [ '3_1' ],
+        't/data/query_2.fa' => [ '2_1' ]
+    },
+    'group_6' => {
+        't/data/query_1.fa' => [ '1_6' ]
+    },
+    'group_2' => {
+        't/data/query_1.fa' => [ '1_2' ],
+        't/data/query_2.fa' => [ '2_2' ]
+    }
+};
+my $num_files_in_groups = {
+    'group_3' => 2,
+    'group_5' => 1,
+    'group_7' => 1,
+    'group_4' => 2,
+    'group_1' => 3,
+    'group_6' => 1,
+    'group_2' => 2
+};
+my $sample_headers = [ 'query_1.fa', 'query_2.fa', 'query_3.fa' ];
+
+ok(
+    $obj = Bio::Roary::PresenceAbsenceMatrix->new(
+        annotate_groups_obj => $annotate_groups,
+        output_filename     => 'test_gene_presence_absence.Rtab',
+        sorted_file_names   => $sorted_file_names,
+        groups_to_files     => $groups_to_files,
+        num_files_in_groups => $num_files_in_groups,
+        sample_headers      => $sample_headers,
+    ),
+    'initialise object'
+);
+
+ok( $obj->create_matrix_file,             'create matrix file' );
+ok( -e 'test_gene_presence_absence.Rtab', 'matrix file exists' );
+compare_ok( 'test_gene_presence_absence.Rtab', 't/data/expected_gene_presence_and_absence.Rtab', 'Rtab matrix content as expected' );
+
+# one gene one group
+$groups_to_files = {'group_1' => {'t/data/query_1.fa' => [ '1_1' ]}};
+$num_files_in_groups = {'group_1' => 1};
+
+ok(
+    $obj = Bio::Roary::PresenceAbsenceMatrix->new(
+        annotate_groups_obj => $annotate_groups,
+        output_filename     => 'test_gene_presence_absence.Rtab',
+        sorted_file_names   => $sorted_file_names,
+        groups_to_files     => $groups_to_files,
+        num_files_in_groups => $num_files_in_groups,
+        sample_headers      => $sample_headers,
+    ),
+    'initialise object one gene one group'
+);
+
+ok( $obj->create_matrix_file,             'create matrix file one gene one group' );
+compare_ok( 'test_gene_presence_absence.Rtab', 't/data/expected_one_gene_presence_and_absence.Rtab', 'Rtab matrix content as expected for one gene one group' );
+
+unlink('test_gene_presence_absence.Rtab');
+done_testing();
diff --git a/t/Bio/Roary/QC/Report.t b/t/Bio/Roary/QC/Report.t
new file mode 100755
index 0000000..4ff288c
--- /dev/null
+++ b/t/Bio/Roary/QC/Report.t
@@ -0,0 +1,100 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Test::Files;
+use File::Which;
+
+BEGIN { unshift( @INC, './lib' ) }
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::QC::Report');
+}
+
+
+
+my $kraken_data = [
+    [ 'assembly1', 'Clostridium',   'Clostridium difficile' ],
+    [ 'assembly2', 'Escherichia',   'Escherichia coli' ],
+    [ 'assembly3', 'Streptococcus', 'Streptococcus pneumoniae' ]
+];
+
+ok(
+    my $qc_report_obj = Bio::Roary::QC::Report->new(
+        input_files  => [],
+        outfile      => "kraken_report.csv",
+        _kraken_data => $kraken_data,
+        kraken_db    => 't/data/kraken_test/',
+        job_runner   => "Local"
+    ),
+    'QC report object created with no input gff files'
+);
+
+ok( $qc_report_obj->report, 'report generated' );
+ok( -e 'kraken_report.csv', 'report file exists' );
+
+compare_ok('kraken_report.csv',"t/data/exp_qc_report.csv", 'report file correct' );
+
+unlink('kraken_report.csv');
+
+
+ok(
+    $qc_report_obj = Bio::Roary::QC::Report->new(
+        input_files => [ 't/data/query_1.gff', 't/data/query_2.gff' ],
+        outfile     => "kraken_report.csv",
+        job_runner  => "Local",
+        kraken_db   => 't/data/kraken_test/',
+        verbose  => 0,
+    ),
+    'QC report object created with data'
+);
+
+is( $qc_report_obj->_tmp_directory . '/abc.fna', $qc_report_obj->_nuc_fasta_filename('abc.gff'), 'filename of nuc from gff' );
+is(
+    'sed -n \'/##FASTA/,//p\' abc.gff | grep -v \'##FASTA\' > ' . $qc_report_obj->_tmp_directory . '/abc.fna',
+    $qc_report_obj->_extract_nuc_fasta_cmd('abc.gff'),
+    'extract nuc command'
+);
+
+ok( my $nuc_files = $qc_report_obj->_extract_nuc_files_from_all_gffs(), 'extract nuc files from gffs' );
+
+is_deeply( [ $qc_report_obj->_tmp_directory . '/query_1.fna', $qc_report_obj->_tmp_directory . '/query_2.fna' ],
+    $nuc_files, 'check extracted nuc files from gffs list' );
+
+compare_ok( $qc_report_obj->_tmp_directory . '/query_1.fna' ,
+    't/data/expected_query_1.fna',
+    'Check FASTA file 1 extracted as expected'
+);
+compare_ok( $qc_report_obj->_tmp_directory . '/query_2.fna' ,
+    't/data/expected_query_2.fna',
+    'Check FASTA file 2 extracted as expected'
+);
+
+SKIP:
+{
+
+    skip "kraken not installed",        2 unless ( which('kraken') );
+    skip "kraken-report not installed", 2 unless ( which('kraken-report') );
+
+    ok( my $kraken_files = $qc_report_obj->_run_kraken_on_nuc_files($nuc_files), 'run kraken over everything' );
+    is_deeply( [ $qc_report_obj->_tmp_directory . '/query_1.kraken', $qc_report_obj->_tmp_directory . '/query_2.kraken' ],
+        $kraken_files, 'check kraken files are created from nuc files' );
+        
+    ok(my $kraken_report_files = $qc_report_obj->_run_kraken_report_on_kraken_files( $kraken_files ), 'build reports');
+    is_deeply( [ $qc_report_obj->_tmp_directory . '/query_1.kraken.report', $qc_report_obj->_tmp_directory . '/query_2.kraken.report' ],
+        $kraken_report_files, 'check kraken report files are created from kraken files' );
+        
+    is_deeply([['query_1','Staphylococcus', 'Staphylococcus aureus'],['query_2','Staphylococcus', 'Staphylococcus aureus']],$qc_report_obj->_parse_kraken_reports($kraken_report_files),'check output report');
+    
+    
+    ok( $qc_report_obj->report, 'report generated with real data' );
+    ok( -e 'kraken_report.csv', 'report file exists with real data' );
+    compare_ok('kraken_report.csv',"t/data/exp_qc_report_real.csv", 'report file correct' );
+    unlink('kraken_report.csv');
+    
+}
+
+
+done_testing();
+
diff --git a/t/Bio/Roary/ReformatInputGFFs.t b/t/Bio/Roary/ReformatInputGFFs.t
new file mode 100755
index 0000000..16ad53b
--- /dev/null
+++ b/t/Bio/Roary/ReformatInputGFFs.t
@@ -0,0 +1,69 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use File::Path qw(remove_tree);
+use Test::Files;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::ReformatInputGFFs');
+}
+
+
+my $obj;
+remove_tree('fixed_input_files');
+ok($obj = Bio::Roary::ReformatInputGFFs->new(gff_files => ['t/data/reformat_input_gffs/query_1.gff']), 'initialise with one input gff');
+ok($obj->fix_duplicate_gene_ids, 'fix duplicates with one input gff');
+is_deeply($obj->fixed_gff_files, ['t/data/reformat_input_gffs/query_1.gff'] ,'list of gff files with one input gff, nothing should change');
+ok(!( -d 'fixed_input_files'), 'Directory shouldnt exist because there arent any fixed input files');
+
+
+ok($obj = Bio::Roary::ReformatInputGFFs->new(gff_files => ['t/data/reformat_input_gffs/query_1.gff', 't/data/reformat_input_gffs/query_2.gff',]), 'initialise with 2 input gffs');
+ok(!( -d 'fixed_input_files'), 'Directory shouldnt exist before running');
+is_deeply($obj->_get_ids_for_gff_file('t/data/reformat_input_gffs/query_1.gff'),[
+          '1_1',
+          'abc_00002',
+          'abc_00003',
+          'abc_00004',
+          '1_2'
+        ],'extract ids');
+is_deeply($obj->_get_ids_for_gff_file('t/data/reformat_input_gffs/query_2.gff'),[
+          '1_1',
+          'abc_00002',
+          'abc_00003',
+          'abc_00004',
+          '1_2'
+        ],'extract ids');
+ok($obj->fix_duplicate_gene_ids, 'fix duplicates with 2 input gffs');
+ok(( -d 'fixed_input_files'), 'Directory should exist because there is one gff thats fixed');
+is_deeply($obj->fixed_gff_files, ['t/data/reformat_input_gffs/query_1.gff','fixed_input_files/query_2.gff' ] ,'list of gff files one in the fixed directory');
+ok(( -e 'fixed_input_files/query_2.gff'), 'fixed file should exist');
+compare_ok('fixed_input_files/query_2.gff', 't/data/reformat_input_gffs/expected_fixed_query_2.gff',  'fixed file should have expected changes');
+remove_tree('fixed_input_files');
+
+
+ok($obj = Bio::Roary::ReformatInputGFFs->new(gff_files => ['t/data/reformat_input_gffs/query_1.gff', 't/data/reformat_input_gffs/query_2.gff', 't/data/reformat_input_gffs/query_3.gff']), 'initialise with 3 input gffs');
+ok(!( -d 'fixed_input_files'), 'Directory shouldnt exist before running');
+ok($obj->fix_duplicate_gene_ids, 'fix duplicates with 3 input gffs');
+ok(( -d 'fixed_input_files'), 'Directory should exist because there is 2 gffs thats fixed');
+is_deeply($obj->fixed_gff_files, ['t/data/reformat_input_gffs/query_1.gff','fixed_input_files/query_2.gff','fixed_input_files/query_3.gff' ] ,'list of gff files 2 in the fixed directory');
+ok(( -e 'fixed_input_files/query_2.gff'), 'fixed file should exist');
+ok(( -e 'fixed_input_files/query_3.gff'), 'fixed file should exist');
+compare_ok('fixed_input_files/query_2.gff','t/data/reformat_input_gffs/expected_fixed_query_2.gff',  'fixed file should have expected changes');
+compare_ok('fixed_input_files/query_3.gff', 't/data/reformat_input_gffs/expected_fixed_query_3.gff',  'fixed file should have expected changes');
+remove_tree('fixed_input_files');
+	
+
+ok($obj = Bio::Roary::ReformatInputGFFs->new(gff_files => ['t/data/reformat_input_gffs/real_1.gff']), 'initialise with 1 gff that has shown to have a bug');
+ok(my $fixed_file = $obj->_add_suffix_to_gene_ids_and_return_new_file('t/data/reformat_input_gffs/real_1.gff'), 'fix duplicates');
+ok(( -e 'fixed_input_files/real_1.gff'), 'fixed file should exist');
+compare_ok('fixed_input_files/real_1.gff', 't/data/reformat_input_gffs/expected_real_1.gff',  'fixed file should have expected changes');
+remove_tree('fixed_input_files');
+
+
+done_testing();
+
diff --git a/t/Bio/Roary/ReorderSpreadsheet.t b/t/Bio/Roary/ReorderSpreadsheet.t
new file mode 100644
index 0000000..d080bca
--- /dev/null
+++ b/t/Bio/Roary/ReorderSpreadsheet.t
@@ -0,0 +1,36 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Test::Files;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::ReorderSpreadsheet');
+}
+
+ok(
+    my $obj = Bio::Roary::ReorderSpreadsheet->new(
+        tree_file       => 't/data/reorder_isolates.tre',
+        spreadsheet     => 't/data/reorder_isolates_input.csv',
+        output_filename => 'reorder_isolates_output.csv',
+        sortby => 'height'
+    ),
+    'initialise reordering the spreadsheet'
+);
+        
+is_deeply($obj->_column_mappings,[0,1,2,3,4,5,6,7,8,9,10,11,12,13],'Column mappings with fixed in same order and end columns ordered by tree file');
+ok( $obj->reorder_spreadsheet(), 'run the reorder method' );
+ok( -e $obj->output_filename,    'check the output file exists' );
+
+compare_ok('t/data/reorder_isolates_expected_output.csv',
+    'reorder_isolates_output.csv',
+    'content of the spreadsheet as expected'
+);
+
+unlink('reorder_isolates_output.csv');
+
+done_testing();
diff --git a/t/Bio/Roary/SampleOrder.t b/t/Bio/Roary/SampleOrder.t
new file mode 100644
index 0000000..4a20faa
--- /dev/null
+++ b/t/Bio/Roary/SampleOrder.t
@@ -0,0 +1,164 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::SampleOrder');
+}
+
+ok(my $obj = Bio::Roary::SampleOrder->new(
+    tree_file        => 't/data/reorder_isolates.tre',
+  ), 'initialise sample order object');
+
+is_deeply($obj->ordered_samples(),['query_1', 'query_3','query_4','query_2'],'order of sample names matches the tree');
+
+ok( $obj = Bio::Roary::SampleOrder->new(
+    tree_file        => 't/data/raxml.tre',
+  ), 'initialise sample order object with raxml tree');
+
+is_deeply($obj->ordered_samples(),[
+          'efgh_7#3',
+          'abcd_4#15',
+          'abcd_3#9',
+          'abcd_4#17',
+          'abcd_3#20',
+          'abcd_3#96',
+          'abcd_3#7',
+          '6753_5#30',
+          'abcd_3#8',
+          'abcd_3#12',
+          'abcd_4#13',
+          'abcd_3#4',
+          'abcd_3#47',
+          'abcd_4#36',
+          'abcd_3#90',
+          'abcd_3#45',
+          'abcd_3#51',
+          'abcd_3#76',
+          'abcd_4#22',
+          'abcd_3#94',
+          'abcd_3#72',
+          'abcd_3#18',
+          'abcd_3#82',
+          'abcd_3#88',
+          'abcd_3#87',
+          'abcd_3#58',
+          'abcd_3#85',
+          'abcd_4#24',
+          'abcd_3#86',
+          'abcd_4#38',
+          'abcd_3#70',
+          'abcd_3#89',
+          'abcd_3#19',
+          'abcd_3#84',
+          'abcd_3#60',
+          'abcd_4#21',
+          'abcd_3#35',
+          'abcd_3#32',
+          'abcd_4#20',
+          'abcd_3#11',
+          'abcd_4#28',
+          'abcd_4#27',
+          'abcd_3#54',
+          'abcd_3#53',
+          'abcd_3#43',
+          'abcd_3#50',
+          'abcd_4#12',
+          'abcd_3#15',
+          'abcd_3#21',
+          'abcd_3#91',
+          'abcd_3#73',
+          'abcd_3#61',
+          'abcd_4#35',
+          'abcd_3#17',
+          'abcd_3#67',
+          'abcd_3#27',
+          'abcd_3#13',
+          'abcd_3#24',
+          'abcd_3#95',
+          'abcd_3#23',
+          'abcd_3#29',
+          'abcd_3#75',
+          'abcd_3#25',
+          'abcd_4#16',
+          'abcd_4#30',
+          'abcd_4#26',
+          'abcd_3#36',
+          'abcd_4#25',
+          'abcd_3#64',
+          'abcd_3#44',
+          'abcd_3#68',
+          'abcd_3#69',
+          'efgh_7#12',
+          'abcd_3#55',
+          'abcd_4#1',
+          'abcd_3#56',
+          'abcd_3#14',
+          'abcd_4#7',
+          'abcd_4#8',
+          'abcd_3#26',
+          'abcd_4#9',
+          'abcd_4#40',
+          'abcd_4#10',
+          'abcd_4#6',
+          'abcd_4#5',
+          'abcd_3#3',
+          'abcd_3#33',
+          'abcd_3#28',
+          'abcd_3#6',
+          'abcd_3#16',
+          'abcd_3#79',
+          'abcd_3#77',
+          'abcd_4#41',
+          'abcd_4#34',
+          'abcd_3#5',
+          'abcd_3#74',
+          'abcd_3#34',
+          'abcd_3#2',
+          'abcd_3#22',
+          'abcd_4#32',
+          'abcd_3#92',
+          'abcd_4#19',
+          'abcd_4#23',
+          'abcd_4#18',
+          'abcd_3#37',
+          'abcd_3#59',
+          'abcd_3#30',
+          'abcd_3#1',
+          'abcd_4#42',
+          'abcd_3#10',
+          'abcd_4#37',
+          'abcd_3#81',
+          'abcd_3#80',
+          'abcd_3#83',
+          'abcd_4#33',
+          'abcd_4#31',
+          'abcd_3#71',
+          'abcd_3#78',
+          'abcd_4#39',
+          'abcd_3#41',
+          'abcd_4#29',
+          'abcd_4#14',
+          'abcd_3#31',
+          'abcd_3#93',
+          'abcd_3#62',
+          'abcd_3#48',
+          'abcd_3#42',
+          'abcd_3#52',
+          'abcd_3#57',
+          'abcd_3#49',
+          'abcd_4#3',
+          'abcd_4#2',
+          'abcd_3#39',
+          'abcd_3#38',
+          'abcd_4#11',
+          '5749_2#1'
+        ],'order of sample names matches the raxml tree');
+
+
+done_testing();
diff --git a/t/Bio/Roary/SequenceLengths.t b/t/Bio/Roary/SequenceLengths.t
new file mode 100644
index 0000000..15acd8f
--- /dev/null
+++ b/t/Bio/Roary/SequenceLengths.t
@@ -0,0 +1,34 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::SequenceLengths');
+}
+
+ok(
+    my $obj = Bio::Roary::SequenceLengths->new(
+        fasta_file => 't/data/example_1.faa',
+    ),
+    'Initialise object'
+);
+
+is_deeply(
+    $obj->sequence_lengths,
+    {
+        '1234#10_00006' => 211,
+        '1234#10_00003' => 113,
+        '1234#10_00001' => 145,
+        '1234#10_00005' => 207,
+        '1234#10_00002' => 246,
+        '1234#10_00007' => 242
+    },
+    'hash with lengths of each sequence'
+);
+
+done_testing();
diff --git a/t/Bio/Roary/SortFasta.t b/t/Bio/Roary/SortFasta.t
new file mode 100755
index 0000000..3450084
--- /dev/null
+++ b/t/Bio/Roary/SortFasta.t
@@ -0,0 +1,58 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Test::Files;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::SortFasta');
+}
+
+my $obj;
+
+
+ok( $obj = Bio::Roary::SortFasta->new(
+  input_filename   => 't/data/out_of_order_fasta.fa',
+), 'initalise object');
+
+
+ok($obj->sort_fasta, 'sort the fasta file');
+ok(-e 't/data/out_of_order_fasta.fa.sorted.fa', 'the new file exists');
+
+compare_ok('t/data/out_of_order_fasta.fa.sorted.fa', 't/data/expected_out_of_order_fasta.fa.sorted.fa', 'check order of sorted fasta');
+
+
+ok( $obj = Bio::Roary::SortFasta->new(
+  input_filename   => 't/data/uneven_sequences.fa',
+  make_multiple_of_three => 1,
+), 'initalise object with uneven sequences');
+
+ok($obj->sort_fasta, 'sort the fasta file');
+compare_ok($obj->output_filename, 't/data/expected_uneven_sequences.fa', "output sequences are now divisible by three");
+
+ok( $obj = Bio::Roary::SortFasta->new(
+  input_filename   => 't/data/nnn_at_end.fa',
+  remove_nnn_from_end => 1,
+), 'initalise object with alignment with nnn at end ');
+ok($obj->sort_fasta, 'sort the fasta file and remove nnn at end');
+compare_ok($obj->output_filename, 't/data/expected_nnn_at_end.fa', "output sequences are now divisible by three");
+
+ok( $obj = Bio::Roary::SortFasta->new(
+  input_filename   => 't/data/uneven_sequences.fa',
+  make_multiple_of_three => 1,
+  remove_nnn_from_end => 1,
+), 'initalise object with uneven sequences and remove nnn from end but nothing to remove');
+ok($obj->sort_fasta, 'sort the fasta file');
+compare_ok($obj->output_filename, 't/data/expected_uneven_sequences.fa', "output sequences are now divisible by three and no nnn removed");
+
+
+is(0,$obj->_percentage_similarity("AAA","BBB"), 'totally different');
+is(1,$obj->_percentage_similarity("AAA","AAA"), 'all the same');
+is(0.5,$obj->_percentage_similarity("AAAA","AABB"), 'half different');
+is(1,$obj->_percentage_similarity("AAAA","AAAABB"), 'first half the same');
+
+done_testing();
diff --git a/t/Bio/Roary/SplitGroups.t b/t/Bio/Roary/SplitGroups.t
new file mode 100644
index 0000000..796575f
--- /dev/null
+++ b/t/Bio/Roary/SplitGroups.t
@@ -0,0 +1,70 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Data::Dumper;
+use Test::Files;
+
+BEGIN { unshift( @INC, './lib' ) }
+$ENV{PATH} .= ":./bin";
+
+BEGIN {
+    use Test::Most;
+    use_ok('Bio::Roary::SplitGroups');
+}
+
+my $obj;
+
+# test 1 - 100% shared CGN
+ok( $obj = Bio::Roary::SplitGroups->new(
+  groupfile   => 't/data/split_groups/paralog_clusters1',
+  fasta_files => [ 't/data/split_groups/paralogs1.fa', 't/data/split_groups/paralogs2.fa' ],
+  outfile     => 'blah.out',
+  _do_sorting => 1
+), 'initalise object');
+
+$obj->split_groups;
+ok( -e 'blah.out', 'output file exists' );
+compare_ok('blah.out','t/data/split_groups/paralog_exp_clusters1', 'split group output correct for test 1');
+
+# test 2 - partial sharing of CGN
+ok( $obj = Bio::Roary::SplitGroups->new(
+  groupfile   => 't/data/split_groups/paralog_clusters2',
+  fasta_files => [ 't/data/split_groups/paralogs1.fa', 't/data/split_groups/paralogs2.fa' ],
+  outfile     => 'blah2.out',
+  _do_sorting => 1
+), 'initalise object');
+
+$obj->split_groups;
+ok( -e 'blah2.out', 'output file exists' );
+compare_ok('blah2.out', 't/data/split_groups/paralog_exp_clusters2', 'split group output correct for test 2');
+
+# test 3 - one gene with no shared CGN
+ok( $obj = Bio::Roary::SplitGroups->new(
+  groupfile   => 't/data/split_groups/paralog_clusters3',
+  fasta_files => [ 't/data/split_groups/paralogs1.fa', 't/data/split_groups/paralogs2.fa' ],
+  outfile     => 'blah3.out',
+  _do_sorting => 1
+), 'initalise object');
+
+$obj->split_groups;
+ok( -e 'blah3.out', 'output file exists' );
+compare_ok('blah3.out', 't/data/split_groups/paralog_exp_clusters3', 'split group output correct for test 3');
+
+# test 4 - paralogs inside paralogs (inception paralog)
+ok( $obj = Bio::Roary::SplitGroups->new(
+  groupfile   => 't/data/split_groups/paralog_clusters4',
+  fasta_files => [ 't/data/split_groups/paralogs1.fa', 't/data/split_groups/paralogs2.fa', 't/data/split_groups/paralogs3.fa' ],
+  outfile     => 'blah4.out',
+  _do_sorting => 1
+), 'initalise object');
+
+$obj->split_groups;
+ok( -e 'blah4.out', 'output file exists' );
+compare_ok('blah4.out','t/data/split_groups/paralog_exp_clusters4', 'split group output correct for test 4');
+
+unlink( "blah.out" );
+unlink( "blah2.out" );
+unlink( "blah3.out" );
+unlink( "blah4.out" );
+
+done_testing();
diff --git a/t/bin/dummy_blastp b/t/bin/dummy_blastp
new file mode 100755
index 0000000..989625b
--- /dev/null
+++ b/t/bin/dummy_blastp
@@ -0,0 +1,16 @@
+#!/usr/bin/env perl
+use Moose;
+use Getopt::Long;
+my ($output_filename,$query,$db,$evalue,$num_threads,$outfmt,$max_target_seqs) ;
+GetOptions(
+    'query=s'    => \$query,
+    'db=s'    => \$db,
+    'evalue=s'    => \$evalue,
+    'num_threads=s'    => \$num_threads,
+    'outfmt=s'    => \$outfmt,
+    'max_target_seqs=s'    => \$max_target_seqs,
+    
+);
+system("touch _blast_results");
+
+1;
\ No newline at end of file
diff --git a/t/bin/dummy_cd-hit b/t/bin/dummy_cd-hit
new file mode 100755
index 0000000..c3e21e3
--- /dev/null
+++ b/t/bin/dummy_cd-hit
@@ -0,0 +1,5 @@
+#!/usr/bin/env perl
+system("touch output");
+system("touch output.clstr");
+system("touch output.bak.clstr");
+1;
\ No newline at end of file
diff --git a/t/bin/dummy_makeblastdb b/t/bin/dummy_makeblastdb
new file mode 100755
index 0000000..885b6e4
--- /dev/null
+++ b/t/bin/dummy_makeblastdb
@@ -0,0 +1,18 @@
+#!/usr/bin/env perl
+use Moose;
+use Getopt::Long;
+my($output_filename,$dbtype,$in,$logfile,$parse_seqids,$mask_data);
+GetOptions(
+    'o|out=s' => \$output_filename,
+    'd|dbtype=s' => \$dbtype,
+    'l|logfile=s' => \$logfile,
+    'i|in=s' => \$in,
+    'parse_seqids' => \$parse_seqids,
+    'mask_data=s' => \$mask_data,
+);
+
+system("touch $output_filename.phr");
+system("touch $output_filename.pin");
+system("touch $output_filename.psq");
+
+1;
\ No newline at end of file
diff --git a/t/bin/dummy_mcl b/t/bin/dummy_mcl
new file mode 100755
index 0000000..0f35be9
--- /dev/null
+++ b/t/bin/dummy_mcl
@@ -0,0 +1,12 @@
+#!/usr/bin/env perl
+use Moose;
+use Getopt::Long;
+my ($output_filename,$abc_format,$inflation);
+GetOptions(
+    'o|output=s' => \$output_filename,
+    'i|inflation=s' => \$inflation,
+    'a|abc'         => \$abc_format
+);
+
+system("touch $output_filename");
+1;
\ No newline at end of file
diff --git a/t/bin/dummy_mcxdeblast b/t/bin/dummy_mcxdeblast
new file mode 100755
index 0000000..94a7af4
--- /dev/null
+++ b/t/bin/dummy_mcxdeblast
@@ -0,0 +1,2 @@
+#!/usr/bin/env perl
+1;
\ No newline at end of file
diff --git a/t/bin/dummy_segmasker b/t/bin/dummy_segmasker
new file mode 100755
index 0000000..1069b82
--- /dev/null
+++ b/t/bin/dummy_segmasker
@@ -0,0 +1,15 @@
+#!/usr/bin/env perl
+use Moose;
+use Getopt::Long;
+my($output_filename,$dbtype,$in,$logfile,$parse_seqids,$mask_data,$infmt,$outfmt);
+GetOptions(
+    'o|out=s'      => \$output_filename,
+    'i|in=s'       => \$in,
+    'infmt=s'      => \$infmt,
+    'outfmt=s'     => \$outfmt,
+    'parse_seqids' => \$parse_seqids,
+    'mask_data=s'  => \$mask_data,
+);
+
+system("touch $output_filename");
+1;
\ No newline at end of file
diff --git a/t/data/accessory_graphs/core_deletion b/t/data/accessory_graphs/core_deletion
new file mode 100644
index 0000000..7ff1cab
--- /dev/null
+++ b/t/data/accessory_graphs/core_deletion
@@ -0,0 +1,8 @@
+group_A: 1_AA	2_AA	3_AA
+group_B: 1_BB	2_BB	3_BB
+group_C: 1_CC	2_CC	3_CC
+group_D: 1_DD
+group_E: 1_EE
+group_F: 1_FF
+group_G: 1_GG	2_GG	3_GG
+group_H: 1_HH	2_HH	3_HH
\ No newline at end of file
diff --git a/t/data/accessory_graphs/core_island b/t/data/accessory_graphs/core_island
new file mode 100644
index 0000000..0414a96
--- /dev/null
+++ b/t/data/accessory_graphs/core_island
@@ -0,0 +1,8 @@
+group_A: 1_core_island_A
+group_B: 1_core_island_B
+group_C: 1_core_island_C
+group_D: 1_core_island_D	2_core_island_D	3_core_island_D
+group_E: 1_core_island_E	2_core_island_E	3_core_island_E
+group_F: 1_core_island_F	2_core_island_F	3_core_island_F
+group_G: 1_core_island_G
+group_H: 1_core_island_H
\ No newline at end of file
diff --git a/t/data/accessory_graphs/file_1.fa b/t/data/accessory_graphs/file_1.fa
new file mode 100644
index 0000000..02fa631
--- /dev/null
+++ b/t/data/accessory_graphs/file_1.fa
@@ -0,0 +1,54 @@
+>1_A
+AAAA
+>1_B
+BBBB
+>1_C
+CCCC
+>1_E
+AAAA
+>1_F
+BBBB
+>1_F_insertion
+BBBB
+>1_G
+CCCC
+>1_H
+AAAA
+>1_I
+BBBB
+>1_J
+CCCC
+>1_Z
+DDDD
+>1_AA
+AAAA
+>1_BB
+AAAA
+>1_CC
+AAAA
+>1_DD
+AAAA
+>1_EE
+AAAA
+>1_FF
+AAAA
+>1_GG
+AAAA
+>1_HH
+AAAA
+>1_core_island_A
+AAAA
+>1_core_island_B
+AAAA
+>1_core_island_C
+AAAA
+>1_core_island_D
+AAAA
+>1_core_island_E
+AAAA
+>1_core_island_F
+AAAA
+>1_core_island_G
+AAAA
+>1_core_island_H
+AAAA
\ No newline at end of file
diff --git a/t/data/accessory_graphs/file_1.gff b/t/data/accessory_graphs/file_1.gff
new file mode 100644
index 0000000..cfb2f61
--- /dev/null
+++ b/t/data/accessory_graphs/file_1.gff
@@ -0,0 +1,36 @@
+##gff-version 3
+##sequence-region contig01 1 1500
+contig01	abc	CDS	1	90	.	-	0	ID=1_A
+contig01	abc	CDS	100	190	.	+	0	ID=1_B
+contig01	abc	CDS	200	290	.	+	0	ID=1_C
+contig01	abc	CDS	300	390	.	-	0	ID=1_E
+contig01	abc	CDS	400	490	.	+	0	ID=1_F
+contig01	abc	CDS	500	590	.	+	0	ID=1_F_insertion
+contig01	abc	CDS	600	690	.	+	0	ID=1_G
+contig01	abc	CDS	700	790	.	+	0	ID=1_H
+contig02	abc	CDS	800	890	.	+	0	ID=1_Z
+contig03	abc	CDS	1	90	.	-	0	ID=1_AA
+contig03	abc	CDS	100	190	.	+	0	ID=1_BB
+contig03	abc	CDS	200	290	.	+	0	ID=1_CC
+contig03	abc	CDS	300	390	.	-	0	ID=1_DD
+contig03	abc	CDS	400	490	.	+	0	ID=1_EE
+contig03	abc	CDS	500	590	.	+	0	ID=1_FF
+contig03	abc	CDS	600	690	.	+	0	ID=1_GG
+contig03	abc	CDS	700	790	.	+	0	ID=1_HH
+contig04	abc	CDS	1	90	.	-	0	ID=1_core_island_A
+contig04	abc	CDS	100	190	.	+	0	ID=1_core_island_B
+contig04	abc	CDS	200	290	.	+	0	ID=1_core_island_C
+contig04	abc	CDS	300	390	.	-	0	ID=1_core_island_D
+contig04	abc	CDS	400	490	.	+	0	ID=1_core_island_E
+contig04	abc	CDS	500	590	.	+	0	ID=1_core_island_F
+contig04	abc	CDS	600	690	.	+	0	ID=1_core_island_G
+contig04	abc	CDS	700	790	.	+	0	ID=1_core_island_H
+##FASTA
+>contig01
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+>contig02
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+>contig03
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+>contig04
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
diff --git a/t/data/accessory_graphs/file_2.fa b/t/data/accessory_graphs/file_2.fa
new file mode 100644
index 0000000..e2a2319
--- /dev/null
+++ b/t/data/accessory_graphs/file_2.fa
@@ -0,0 +1,36 @@
+>2_A
+AAAA
+>2_B
+BBBB
+>2_C
+CCCC
+>2_E
+AAAA
+>2_F
+BBBB
+>2_G
+CCCC
+>2_H
+AAAA
+>2_I
+BBBB
+>2_J
+CCCC
+>2_Z
+DDDD
+>2_AA
+AAAA
+>2_BB
+AAAA
+>2_CC
+AAAA
+>2_GG
+AAAA
+>2_HH
+AAAA
+>2_core_island_D
+AAAA
+>2_core_island_E
+AAAA
+>2_core_island_F
+AAAA
\ No newline at end of file
diff --git a/t/data/accessory_graphs/file_2.gff b/t/data/accessory_graphs/file_2.gff
new file mode 100644
index 0000000..73ecc2f
--- /dev/null
+++ b/t/data/accessory_graphs/file_2.gff
@@ -0,0 +1,27 @@
+##gff-version 3
+##sequence-region contig01 1 1500
+contig01	abc	CDS	1	90	.	-	0	ID=2_A
+contig01	abc	CDS	100	190	.	+	0	ID=2_B
+contig01	abc	CDS	200	290	.	+	0	ID=2_C
+contig01	abc	CDS	300	390	.	-	0	ID=2_E
+contig01	abc	CDS	400	490	.	+	0	ID=2_F
+contig01	abc	CDS	600	690	.	+	0	ID=2_G
+contig01	abc	CDS	700	790	.	+	0	ID=2_H
+contig02	abc	CDS	800	890	.	+	0	ID=2_Z
+contig03	abc	CDS	1	90	.	-	0	ID=2_AA
+contig03	abc	CDS	100	190	.	+	0	ID=2_BB
+contig03	abc	CDS	200	290	.	+	0	ID=2_CC
+contig03	abc	CDS	600	690	.	+	0	ID=2_GG
+contig03	abc	CDS	700	790	.	+	0	ID=2_HH
+contig04	abc	CDS	300	390	.	-	0	ID=2_core_island_D
+contig04	abc	CDS	400	490	.	+	0	ID=2_core_island_E
+contig04	abc	CDS	500	590	.	+	0	ID=2_core_island_F
+##FASTA
+>contig01
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+>contig02
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+>contig03
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+>contig04
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
diff --git a/t/data/accessory_graphs/file_3.fa b/t/data/accessory_graphs/file_3.fa
new file mode 100644
index 0000000..de397e1
--- /dev/null
+++ b/t/data/accessory_graphs/file_3.fa
@@ -0,0 +1,36 @@
+>3_A
+AAAA
+>3_B
+BBBB
+>3_C
+CCCC
+>3_E
+AAAA
+>3_F
+BBBB
+>3_G
+CCCC
+>3_H
+AAAA
+>3_I
+BBBB
+>3_J
+CCCC
+>3_Z
+DDDD
+>3_AA
+AAAA
+>3_BB
+AAAA
+>3_CC
+AAAA
+>3_GG
+AAAA
+>3_HH
+AAAA
+>3_core_island_D
+AAAA
+>3_core_island_E
+AAAA
+>3_core_island_F
+AAAA
\ No newline at end of file
diff --git a/t/data/accessory_graphs/file_3.gff b/t/data/accessory_graphs/file_3.gff
new file mode 100644
index 0000000..7682649
--- /dev/null
+++ b/t/data/accessory_graphs/file_3.gff
@@ -0,0 +1,27 @@
+##gff-version 3
+##sequence-region contig01 1 1500
+contig01	abc	CDS	1	99	.	-	0	ID=3_A
+contig01	abc	CDS	100	199	.	+	0	ID=3_B
+contig01	abc	CDS	200	299	.	+	0	ID=3_C
+contig01	abc	CDS	300	390	.	-	0	ID=3_E
+contig01	abc	CDS	400	490	.	+	0	ID=3_F
+contig01	abc	CDS	600	690	.	+	0	ID=3_G
+contig01	abc	CDS	700	790	.	+	0	ID=3_H
+contig02	abc	CDS	800	890	.	+	0	ID=3_Z
+contig03	abc	CDS	1	90	.	-	0	ID=3_AA
+contig03	abc	CDS	100	190	.	+	0	ID=3_BB
+contig03	abc	CDS	200	290	.	+	0	ID=3_CC
+contig03	abc	CDS	600	690	.	+	0	ID=3_GG
+contig03	abc	CDS	700	790	.	+	0	ID=3_HH
+contig04	abc	CDS	300	390	.	-	0	ID=3_core_island_D
+contig04	abc	CDS	400	490	.	+	0	ID=3_core_island_E
+contig04	abc	CDS	500	590	.	+	0	ID=3_core_island_F
+##FASTA
+>contig01
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+>contig02
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+>contig03
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+>contig04
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
\ No newline at end of file
diff --git a/t/data/accessory_graphs/no_accessory b/t/data/accessory_graphs/no_accessory
new file mode 100644
index 0000000..71e8f20
--- /dev/null
+++ b/t/data/accessory_graphs/no_accessory
@@ -0,0 +1,3 @@
+group_A: 1_A	2_A	3_A
+group_B: 1_B	2_B	3_B
+group_C: 1_C	2_C	3_C
\ No newline at end of file
diff --git a/t/data/accessory_graphs/one_branch b/t/data/accessory_graphs/one_branch
new file mode 100644
index 0000000..d9dcdd8
--- /dev/null
+++ b/t/data/accessory_graphs/one_branch
@@ -0,0 +1,5 @@
+group_A: 1_A	2_A	3_A
+group_B1: 1_B
+group_B2: 2_B	3_B
+group_C: 1_C	2_C	3_C
+group_E: 1_E	2_E	3_E
\ No newline at end of file
diff --git a/t/data/accessory_graphs/one_bubble b/t/data/accessory_graphs/one_bubble
new file mode 100644
index 0000000..f6f0271
--- /dev/null
+++ b/t/data/accessory_graphs/one_bubble
@@ -0,0 +1,6 @@
+group_C: 1_C	2_C	3_C
+group_E: 1_E	2_E	3_E
+group_bubble_1: 1_F
+group_bubble_2: 1_F_insertion
+group_F: 2_F	3_F
+group_G: 1_G	2_G	3_G
\ No newline at end of file
diff --git a/t/data/accessory_graphs/single_gene_contig b/t/data/accessory_graphs/single_gene_contig
new file mode 100644
index 0000000..7c1cd4d
--- /dev/null
+++ b/t/data/accessory_graphs/single_gene_contig
@@ -0,0 +1 @@
+group_A: 1_Z	2_Z	3_Z
diff --git a/t/data/accessory_graphs/two_graphs b/t/data/accessory_graphs/two_graphs
new file mode 100644
index 0000000..b6c5aa5
--- /dev/null
+++ b/t/data/accessory_graphs/two_graphs
@@ -0,0 +1,4 @@
+group_A: 1_A	2_A	3_A
+group_B: 1_B	2_B	3_B
+group_G: 1_G	2_G	3_G
+group_H: 1_H	2_H	3_H
\ No newline at end of file
diff --git a/t/data/blast_results b/t/data/blast_results
new file mode 100644
index 0000000..21ebb4e
--- /dev/null
+++ b/t/data/blast_results
@@ -0,0 +1,13 @@
+1234#12_01975	1234#1_01317	100.00	60	0	0	1	60	30	89	8e-38	 123
+1234#1_01317	1234#1_01317	100.00	89	0	0	1	89	1	89	1e-60	 182
+1234#20_01662	1234#1_01317	100.00	37	0	0	1	37	53	89	3e-20	77.4
+1234#3_01167	1234#1_01317	98.33	60	1	0	1	60	30	89	2e-37	 122
+6259_6#6_02209	6259_7#20_00601	99.71	684	2	0	1	684	1427	2110	0.0	1415
+6259_7#20_00601	6259_7#20_00601	100.00	2110	0	0	1	2110	1	2110	0.0	4335
+6593_5#7_01700	6259_7#20_00601	99.72	1057	3	0	1	1057	1054	2110	0.0	2181
+6630_2#15_02136	6259_7#20_00601	100.00	1015	0	0	1	1015	1096	2110	0.0	2104
+6630_4#12_02032	6259_7#20_00601	100.00	1046	0	0	1	1046	1065	2110	0.0	2164
+6631_2#21_02147	6259_7#20_00601	99.13	578	5	0	1	578	1533	2110	0.0	1199
+6631_4#23_02060	6259_7#20_00601	100.00	1290	0	0	1	1290	821	2110	0.0	2663
+6631_7#21_02171	6259_7#20_00601	97.37	684	0	1	1	666	1427	2110	0.0	1371
+6664_1#10_02278	6259_6#6_02209	100.00	404	0	0	1	404	281	684	0.0	 838
\ No newline at end of file
diff --git a/t/data/clustered_proteins b/t/data/clustered_proteins
new file mode 100644
index 0000000..7922af2
--- /dev/null
+++ b/t/data/clustered_proteins
@@ -0,0 +1,12 @@
+speH: abc_00004	abc_00004	abc_00004	abc_00006	abc_00006	abc_00006	1_2	2_2	3_2
+group_2: abc_00014	abc_00014	abc_00014	1_6	2_7	abc_00015
+yfnB: abc_00016	abc_00016	3_5
+group_5: abc_00003	abc_00003	abc_00003
+argF: 1_3	2_3	3_3
+group_8: abc_01705	abc_01705	abc_01705
+group_9: abc_00010	abc_00010	abc_00010
+hly: 1_1	2_1	3_1
+arcC1: abc_00008	abc_00008	abc_00008
+group_10: abc_00011	abc_00011	abc_00011
+group_4: abc_00002	abc_00002	abc_00002
+group_11: abc_00012	abc_00012	abc_00012
diff --git a/t/data/clustered_proteins_pan_genome b/t/data/clustered_proteins_pan_genome
new file mode 100644
index 0000000..77d294e
--- /dev/null
+++ b/t/data/clustered_proteins_pan_genome
@@ -0,0 +1,21 @@
+group_13: abc_00014	abc_00014
+speH: 1_2	2_2
+arcC1: abc_00008	abc_00008
+group_2: abc_00002	abc_00002
+yfnB: abc_00016	abc_00016
+group_10: abc_00010	abc_00010
+group_12: abc_00012	abc_00012
+hly: 1_1	2_1
+group_6: abc_00006	abc_00006
+group_9: abc_01705	abc_01705
+group_14: 1_6	2_7
+group_3: abc_00003	abc_00003
+argF: 1_3	2_3
+group_11: abc_00011	abc_00011
+group_4: abc_00004	abc_00004
+group_18: abc_50010
+group_20: abc_50014
+group_16: abc_50002
+group_17: 3_3
+group_21: 3_5
+group_19: abc_50012
diff --git a/t/data/clustered_proteins_post_analysis b/t/data/clustered_proteins_post_analysis
new file mode 100644
index 0000000..8688f1e
--- /dev/null
+++ b/t/data/clustered_proteins_post_analysis
@@ -0,0 +1,13 @@
+group_12: abc_00002	abc_00002
+group_13: abc_00003	abc_00003
+group_5: 1_6	2_7
+hly: 1_1	2_1
+group_6: abc_01705	abc_01705
+group_8: abc_00011	abc_00011
+speH: 1_2	2_2
+group_2: abc_00004	abc_00004
+group_3: abc_00006	abc_00006
+group_7: abc_00010	abc_00010
+yfnB: abc_00016	abc_00016
+arcC1: abc_00008	abc_00008
+argF: 1_3	2_3
diff --git a/t/data/clusters_input.fa b/t/data/clusters_input.fa
new file mode 100644
index 0000000..9e028da
--- /dev/null
+++ b/t/data/clusters_input.fa
@@ -0,0 +1,20 @@
+>2363_5_03666
+AAAAAAAAAAAAAGGGGGGGGGGGGGG
+>4075_2#3_03437
+AAAAAAAAAAAAAGGGGGGGGGGGGGG
+>4075_1#8_03461
+AAAAAAAAAAAAAGGGGGGGGGGGGGG
+>2212_3_02841
+AAAAAAAAAAAAAGGGGGGGGGGGGGG
+>2363_5_00947
+AAAAAAAAAAAAAGGGGGGGGGGGGGG
+>2363_7_00085
+AAAAAAAAAAAAAGGGGGGGGGGGGGG
+>2460_2_00826
+AAAAAAAAAAAAAGGGGGGGGGGGGGG
+>4075_1#6_04091
+AAAAAAAAAAAAAGGGGGGGGGGGGGG
+>4075_1#3_04238
+AAAAAAAAAAAAAGGGGGGGGGGGGGG
+>3634_6_04078
+AAAAAAAAAAAAAGGGGGGGGGGGGGG
diff --git a/t/data/clusters_to_inflate b/t/data/clusters_to_inflate
new file mode 100644
index 0000000..e4ec02b
--- /dev/null
+++ b/t/data/clusters_to_inflate
@@ -0,0 +1,40 @@
+>Cluster 43
+0	2184aa, >2363_5_03666... *
+>Cluster 44
+0	2138aa, >4075_2#3_03437... *
+>Cluster 45
+0	2130aa, >2212_1_02994... at 99.48%
+1	2130aa, >2212_6_02081... at 99.01%
+2	2130aa, >2363_1_00606... at 99.48%
+3	2130aa, >2363_2_02124... at 99.48%
+4	2130aa, >2363_3_01371... at 99.39%
+5	2130aa, >2363_6_01272... at 99.15%
+6	2130aa, >2363_8_00966... at 99.48%
+7	2130aa, >2541_2_02425... at 99.48%
+8	2130aa, >2541_3_02449... at 99.01%
+9	2130aa, >2541_7_00441... at 99.48%
+10	2130aa, >2541_8_00644... at 99.48%
+11	2130aa, >2781_2_02909... at 99.48%
+12	2130aa, >3634_6_00968... at 99.48%
+13	2130aa, >3634_7_01056... at 99.39%
+14	2130aa, >3634_8_02606... at 99.48%
+15	2137aa, >4075_1#8_03461... *
+>Cluster 46
+0	2132aa, >2212_3_02841... *
+>Cluster 47
+0	2130aa, >2363_5_00947... *
+>Cluster 48
+0	2130aa, >2363_7_00085... *
+>Cluster 49
+0	2130aa, >2460_2_00826... *
+>Cluster 50
+0	2054aa, >4075_1#6_04091... *
+>Cluster 51
+0	2042aa, >4075_1#3_04238... *
+>Cluster 52
+0	1994aa, >2212_1_01414... at 99.75%
+1	1994aa, >2363_1_00811... at 99.95%
+2	1994aa, >2541_2_00696... at 99.95%
+3	1994aa, >2541_8_00920... at 99.95%
+4	2005aa, >3634_6_04078... *
+5	2005aa, >3634_7_00911... at 99.95%
\ No newline at end of file
diff --git a/t/data/clusters_to_inflate.mcl b/t/data/clusters_to_inflate.mcl
new file mode 100644
index 0000000..97f4006
--- /dev/null
+++ b/t/data/clusters_to_inflate.mcl
@@ -0,0 +1,4 @@
+2363_5_03666 4075_2#3_03437 
+4075_1#8_03461 
+2212_3_02841 2363_5_00947
+2363_7_00085 2460_2_00826 4075_1#6_04091 4075_1#3_04238 3634_6_04078
\ No newline at end of file
diff --git a/t/data/clusters_to_inflate_original_input.fa b/t/data/clusters_to_inflate_original_input.fa
new file mode 100644
index 0000000..ef3f0b4
--- /dev/null
+++ b/t/data/clusters_to_inflate_original_input.fa
@@ -0,0 +1,60 @@
+>2212_1_01414
+AAAGGGTTT
+>4075_1#3_04238
+AAAGGGTTT
+>4075_1#6_04091
+AAAGGGTTT
+>2212_1_02994
+AAAGGGTTT
+>2363_5_00947
+AAAGGGTTT
+>2363_7_00085
+AAAGGGTTT
+>2460_2_00826
+AAAGGGTTT
+>2212_3_02841
+AAAGGGTTT
+>4075_2#3_03437
+AAAGGGTTT
+>2363_5_03666
+AAAGGGTTT
+>2363_1_00811
+AAAGGGTTT
+>2212_6_02081
+AAAGGGTTT
+>2541_8_00644
+AAAGGGTTT
+>2781_2_02909
+AAAGGGTTT
+>3634_6_00968
+AAAGGGTTT
+>3634_7_01056
+AAAGGGTTT
+>3634_8_02606
+AAAGGGTTT
+>4075_1#8_03461
+AAAGGGTTT
+>2541_2_00696
+AAAGGGTTT
+>2363_1_00606
+AAAGGGTTT
+>2541_8_00920
+AAAGGGTTT
+>2363_2_02124
+AAAGGGTTT
+>3634_6_04078
+AAAGGGTTT
+>2363_3_01371
+AAAGGGTTT
+>3634_7_00911
+AAAGGGTTT
+>2363_6_01272
+AAAGGGTTT
+>2363_8_00966
+AAAGGGTTT
+>2541_2_02425
+AAAGGGTTT
+>2541_3_02449
+AAAGGGTTT
+>2541_7_00441
+AAAGGGTTT
diff --git a/t/data/clustersfile b/t/data/clustersfile
new file mode 100644
index 0000000..42b2d7f
--- /dev/null
+++ b/t/data/clustersfile
@@ -0,0 +1,33 @@
+>Cluster 1
+0	4863aa, >6259_8#9_01142... *
+>Cluster 2
+0	4666aa, >6631_2#11_00851... *
+>Cluster 3
+0	4516aa, >6631_4#5_01901... *
+>Cluster 4
+0	4230aa, >6664_1#21_00211... *
+>Cluster 5
+0	4220aa, >6259_8#13_01077... *
+>Cluster 6
+0	4201aa, >6630_4#9_00008... *
+1	4201aa, >6631_1#23_00379... at 100.00%
+2	4201aa, >6631_2#7_00715... at 99.95%
+3	4201aa, >6631_5#1_00087... at 99.95%
+4	4201aa, >6649_8#10_00838... at 99.95%
+5	4201aa, >6664_1#13_00087... at 99.95%
+>Cluster 7
+0	4195aa, >6259_5#1_00688... *
+>Cluster 8
+0	4165aa, >6259_5#8_00815... *
+>Cluster 9
+0	4153aa, >6259_7#7_02078... *
+1	4153aa, >6259_7#8_01884... at 100.00%
+2	4153aa, >6631_2#22_01965... at 99.95%
+>Cluster 10
+0	4153aa, >6631_5#24_01032... *
+>Cluster 11
+0	4124aa, >6593_5#13_00457... *
+1	4124aa, >6630_1#9_00031... at 100.00%
+2	4124aa, >6630_3#1_00008... at 100.00%
+3	4124aa, >6630_3#2_00088... at 100.00%
+4	4124aa, >6631_1#4_00087... at 100.00%
\ No newline at end of file
diff --git a/t/data/core_alignment.csv b/t/data/core_alignment.csv
new file mode 100644
index 0000000..0c0703a
--- /dev/null
+++ b/t/data/core_alignment.csv
@@ -0,0 +1,4 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","query_1","query_2"
+"hly","","Alpha-toxin","2","2","1","1","7",,,"",,,,"query_1_1","query_2_1"
+"speH","","hypothetical protein","2","2","1","1","11",,,"",,,,"query_1_2","query_2_2"
+"argF","","Ornithine carbamoyltransferase","2","2","1","1","6",,,"",,,,"query_1_3","query_2_3"
diff --git a/t/data/core_alignment/argF.fa.aln b/t/data/core_alignment/argF.fa.aln
new file mode 100644
index 0000000..92d144b
--- /dev/null
+++ b/t/data/core_alignment/argF.fa.aln
@@ -0,0 +1,4 @@
+>query_1_3
+AAAAA-
+>query_2_3
+CCCCC-
\ No newline at end of file
diff --git a/t/data/core_alignment/hly.fa.aln b/t/data/core_alignment/hly.fa.aln
new file mode 100644
index 0000000..e5c0a1e
--- /dev/null
+++ b/t/data/core_alignment/hly.fa.aln
@@ -0,0 +1,4 @@
+>query_1_1
+GGGGG
+>query_2_1
+TTTTT
\ No newline at end of file
diff --git a/t/data/core_alignment/speH.fa.aln b/t/data/core_alignment/speH.fa.aln
new file mode 100644
index 0000000..954f5d4
--- /dev/null
+++ b/t/data/core_alignment/speH.fa.aln
@@ -0,0 +1,4 @@
+>query_1_2
+TTTTT
+>query_2_2
+GGGGG
\ No newline at end of file
diff --git a/t/data/core_alignment_core0.66.csv b/t/data/core_alignment_core0.66.csv
new file mode 100644
index 0000000..8bbe319
--- /dev/null
+++ b/t/data/core_alignment_core0.66.csv
@@ -0,0 +1,4 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","query_1","query_2","query_3"
+"hly","","Alpha-toxin","1","1","1","1","7",,,"",,,,"query_1_1","",""
+"speH","","hypothetical protein","2","2","1","1","11",,,"",,,,"query_1_2","query_2_2",""
+"argF","","Ornithine carbamoyltransferase","1","1","1","1","6",,,"",,,,"","query_2_3",""
diff --git a/t/data/core_alignment_gene_lookup/expected_core_gene_alignment.aln b/t/data/core_alignment_gene_lookup/expected_core_gene_alignment.aln
new file mode 100644
index 0000000..227cd1c
--- /dev/null
+++ b/t/data/core_alignment_gene_lookup/expected_core_gene_alignment.aln
@@ -0,0 +1,45 @@
+>query_1
+ATGAATCTACCTTTACCTGATAATTATGAATTTGTGTTTTTATCTGGTGGATTATCTGGG
+CATGCTGCAATGATGTCATTTTTTAATGTTTGTGGCATTGGATATTTGTATCATCATATG
+GATTTAATGAAAAATAGATATATAGATTATTACCATTTTTCTAGGATTGAAAATTTATAT
+TCAATTATAACATATGGACAATACAGTTTAACGCAAGGAATGAATAATATAGGTAAATAT
+TTGACTTTAATTAATAAAATTCCAATTCTTTTTTTGGTAAGAGATCCCATATCAAGACTA
+AAAACCGGAGTAAATCATCCTATTCTAAATCCAAAAAGTATGAAGGAGATATGTTTAAAC
+AATGATTATAGTGATGTGTTTAAGAATAAAATGTATGTTGGCGATATTGGAAAAAATTTT
+TACTATTCAGAAAAGCCAAGCATGAAATATTTACCTAGATTGAAATATGAAAATTTGGGA
+ATATTTTTAAAACCACAAGAATTTGAGCGTTTAAAGCAAGATTCTAAGCTATTTGATGTT
+GCTAAAAGATATTTGAATAATTTTATTGAAGCTTTAGAAGAGAGAATAGACCTAGAAAAA
+GCTAAATTATTTAAAGAAAAAGACGTGTTAAACTATTTAAAAGAAAATAAAGAATTAAGA
+GTTAAGTTAAAAAACATATTAGATAAAGAACTTGTTCATATTAAACAACATCGTCCAGAT
+ATAGTAGCTTCTTGGAAATACTATCAAGAATTTGAACAAATGTGCAAGGAGTTGAATGGT
+AATATTTAG
+>query_2
+ATGAATCTACCTTTACCTGATAATTATGAATTTGTGTTTTTATCTGGTGGATTATCTGGG
+CATGCTGCAATGATGTCATTTTTTAATGTTTGTGGCATTGGATATTTGTATCATCATATG
+GATTTAATGAAAAATAGATATATAGATTATTACCATTTTTCTAGGATTGAAAATTTATAT
+TCAATTATAACATATGGACAATACAGTTTAACGCAAGGAATGAATAATATAGGTAAATAT
+TTGACTTTAATTAATNNNNNNNNNATTCTTTTTTTGGTAAGAGATCCCATATCAAGACTA
+AAAACCGGAGTAAATCATCCTATTCTAAATCCAAAAAGTATGAAGGAGATATGTTTAAAC
+AATGATTATAGTGATNNNNNNNNNAATAAAATGTATGTTGGCGATATTGGAAAAAATTTT
+TACTATTCAGAAAAGCCAAGCATGAAATATTTACCTAGATTGAAATATGAAAATTTGGGA
+ATATTTTTAAAACCACAAGAATTTGAGCGTTTAAAGCAAGATTCTAAGCTATTTGATGTT
+GCTAAAAGATATTTGAATAATTTTATTGAAGCTTTAGAAGAGAGAATAGACCTAGAAAAA
+GCTAAATTATTTAAAGAAAAAGACGTGTTAAACTATTTAAAAGAAAATAAAGAATTAAGA
+GTTAAGTTAAAAAACATATTAGATAAAGAACTTGTTCATATTAAACAACATCGTCCAGAT
+ATAGTAGCTTCTTGGAAATACTATCAAGAATTTGAACAAATGTGCAAGGAGTTGAATGGT
+AATATTTAG
+>query_3
+ATGAATCTACCTTTACCTGATAATTATGAATTTGTGTTTTTATCTGGTGGATTATCTGGG
+CATGCTGCAATGATGTCATTTTTTAATGTTTGTGGCATTGGATATTTGTATCATCATATG
+GATTTAATGAAAAATAGATATATAGATTATTACCATTTTTCTAGGATTGAAAATTTATAT
+TCAATTATAACATATGGACAATACAGTTTAACGCAAGGAATGAATAATATAGGTAAATAT
+TTGACTTTAATTAATAAAATTCCAATTCTTTTTTTGGTAAGAGATCCCATATCAAGACTA
+AAAACCGGAGTAAATCATCCTATTCTAAATCCAAAAAGTATGAAGGAGATATGTTTAAAC
+AATGATTATAGTGATGTGTTTAAGAATAAAATGTATGTTGGCGATATTGGAAAAAATTTT
+TACTATTCAGAAAAGCCAAGCATGAAATATTTACCTAGATTGAAATATGAAAATTTGGGA
+ATATTTTTAAAACCACAAGAATTCGAGCGTTTAAAGCAAGATTCTAAGCTATTTGATGTT
+GCTAAAAGATATTTGAATAATTTTATTGAAGCTTTAGAAGAGAGAATAGACCTAGAAAAA
+GCTAAATTATTTAAAGAAAAAGACGTGTTAAACTATTTAAAAGAAAATAAAGAATTAAGA
+GTTAAGTTAAAAAACATATTAGATAAAGAACTTGTTCATATTAAACAACATCGTCCAGAT
+ATAGTAGCTTCTTGGAAATACTATCAAGAATTTGAACAAATGTGCAAGGAGTTGAATGGT
+AATATTTAG
diff --git a/t/data/core_alignment_gene_lookup/query_1.gff b/t/data/core_alignment_gene_lookup/query_1.gff
new file mode 100644
index 0000000..a3dc10a
--- /dev/null
+++ b/t/data/core_alignment_gene_lookup/query_1.gff
@@ -0,0 +1,29 @@
+##gff-version 3
+##sequence-region gnl|IFR|Ef_ABC123_contig000001 1 1320
+gnl|IFR|Ef_ABC123_contig000001	Prodigal:2.6	CDS	52	513	.	+	0	ID=Ef_ABC123_00010;Parent=Ef_ABC123_00010_gene;inference=ab initio prediction:Prodigal:2.6,similar to AA sequence:RefSeq:CJSA_1363;locus_tag=Ef_ABC123_00010;product=CJSA_1363 putative sugar transferase:1372570 reverse MW:76273;protein_id=gnl|IFR|Ef_ABC123_00010
+gnl|IFR|Ef_ABC123_contig000001	Prodigal:2.6	CDS	510	746	.	+	0	ID=Ef_ABC123_00020;Parent=Ef_ABC123_00020_gene;inference=ab initio prediction:Prodigal:2.6,similar to AA sequence:RefSeq:CJSA_1363;locus_tag=Ef_ABC123_00020;product=CJSA_1363 putative sugar transferase:1372570 reverse MW:76273;protein_id=gnl|IFR|Ef_ABC123_00020
+gnl|IFR|Ef_ABC123_contig000001	Prodigal:2.6	CDS	878	1207	.	+	0	ID=Ef_ABC123_00030;Parent=Ef_ABC123_00030_gene;inference=ab initio prediction:Prodigal:2.6,similar to AA sequence:RefSeq:A911_06907;locus_tag=Ef_ABC123_00030;product=A911_06907 putative sugar transferase:1355468 reverse MW:73559;protein_id=gnl|IFR|Ef_ABC123_00030
+##FASTA
+>gnl|IFR|Ef_ABC123_contig000001
+GAAAAAGAAAAAATAAACTATAAAAACATATCAGCTGAACTTGCTTGGGAAATGAATCTA
+CCTTTACCTGATAATTATGAATTTGTGTTTTTATCTGGTGGATTATCTGGGCATGCTGCA
+ATGATGTCATTTTTTAATGTTTGTGGCATTGGATATTTGTATCATCATATGGATTTAATG
+AAAAATAGATATATAGATTATTACCATTTTTCTAGGATTGAAAATTTATATTCAATTATA
+ACATATGGACAATACAGTTTAACGCAAGGAATGAATAATATAGGTAAATATTTGACTTTA
+ATTAATAAAATTCCAATTCTTTTTTTGGTAAGAGATCCCATATCAAGACTAAAAACCGGA
+GTAAATCATCCTATTCTAAATCCAAAAAGTATGAAGGAGATATGTTTAAACAATGATTAT
+AGTGATGTGTTTAAGAATAAAATGTATGTTGGCGATATTGGAAAAAATTTTTACTATTCA
+GAAAAGCCAAGCATGAAATATTTACCTAGATGATTAATGAAGATACAATGTATCAGACAT
+CGCTTTGCTTATTATTTTCAAATAGAGATATTACTTATATTGACATGGAAGAAATAAAAC
+CAGCAAAGGCATTCGATACAATGTGTGATTTAGCTAACAAATTTGGTTTTAAGAAACCAA
+CGGATAAAAAATTTTTTGAAGGAGTTATGAATGGAGATTTAGCAGGTTTTATACCTATTA
+ATTTATTTATTGATAAAAAAATTTAATTTATAATAACAAAGTTATTTATAAAGATAATGA
+TAGCATCCATCTTCAAATCACATCTACAAATTTAATTGAAATTTATAAACAATCTAAAGA
+ATATATCAACTTTACTAAAGAATTTTTTGATAAACCTTTGAAATATGAAAATTTGGGAAT
+ATTTTTAAAACCACAAGAATTTGAGCGTTTAAAGCAAGATTCTAAGCTATTTGATGTTGC
+TAAAAGATATTTGAATAATTTTATTGAAGCTTTAGAAGAGAGAATAGACCTAGAAAAAGC
+TAAATTATTTAAAGAAAAAGACGTGTTAAACTATTTAAAAGAAAATAAAGAATTAAGAGT
+TAAGTTAAAAAACATATTAGATAAAGAACTTGTTCATATTAAACAACATCGTCCAGATAT
+AGTAGCTTCTTGGAAATACTATCAAGAATTTGAACAAATGTGCAAGGAGTTGAATGGTAA
+TATTTAGGAGAAAGATTTGTAAATTAATAAAAAAAAGATATAATTGTGTTTAAAATTAGA
+GAAAGGAATCAATATGAAACAAGGGGATTTTACAAAAGTTGCAAAACATTATCACAATAG
\ No newline at end of file
diff --git a/t/data/core_alignment_gene_lookup/query_2.gff b/t/data/core_alignment_gene_lookup/query_2.gff
new file mode 100644
index 0000000..2638a86
--- /dev/null
+++ b/t/data/core_alignment_gene_lookup/query_2.gff
@@ -0,0 +1,29 @@
+##gff-version 3
+##sequence-region gnl|IFR|Ef_EFG123_contig000001 1 1320
+gnl|IFR|Ef_EFG123_contig000001	Prodigal:2.6	CDS	52	513	.	+	0	ID=Ef_EFG123_00010;Parent=Ef_EFG123_00010_gene;inference=ab initio prediction:Prodigal:2.6,similar to AA sequence:RefSeq:CJSA_1363;locus_tag=Ef_EFG123_00010;product=CJSA_1363 putative sugar transferase:1372570 reverse MW:76273;protein_id=gnl|IFR|Ef_EFG123_00010
+gnl|IFR|Ef_EFG123_contig000001	Prodigal:2.6	CDS	510	746	.	+	0	ID=Ef_EFG123_00020;Parent=Ef_EFG123_00020_gene;inference=ab initio prediction:Prodigal:2.6,similar to AA sequence:RefSeq:CJSA_1363;locus_tag=Ef_EFG123_00020;product=CJSA_1363 putative sugar transferase:1372570 reverse MW:76273;protein_id=gnl|IFR|Ef_EFG123_00020
+gnl|IFR|Ef_EFG123_contig000001	Prodigal:2.6	CDS	878	1207	.	+	0	ID=Ef_EFG123_00030;Parent=Ef_EFG123_00030_gene;inference=ab initio prediction:Prodigal:2.6,similar to AA sequence:RefSeq:A911_06907;locus_tag=Ef_EFG123_00030;product=A911_06907 putative sugar transferase:1355468 reverse MW:73559;protein_id=gnl|IFR|Ef_EFG123_00030
+##FASTA
+>gnl|IFR|Ef_EFG123_contig000001
+GAAAAAGAAAAAATAAACTATAAAAACATATCAGCTGAACTTGCTTGGGAAATGAATCTA
+CCTTTACCTGATAATTATGAATTTGTGTTTTTATCTGGTGGATTATCTGGGCATGCTGCA
+ATGATGTCATTTTTTAATGTTTGTGGCATTGGATATTTGTATCATCATATGGATTTAATG
+AAAAATAGATATATAGATTATTACCATTTTTCTAGGATTGAAAATTTATATTCAATTATA
+ACATATGGACAATACAGTTTAACGCAAGGAATGAATAATATAGGTAAATATTTGACTTTA
+ATTAATAANNNNNNNATTCTTTTTTTGGTAAGAGATCCCATATCAAGACTAAAAACCGGA
+GTAAATCATCCTATTCTAAATCCAAAAAGTATGAAGGAGATATGTTTAAACAATGATTAT
+AGTGATGTNNNNNNNAATAAAATGTATGTTGGCGATATTGGAAAAAATTTTTACTATTCA
+GAAAAGCCAAGCATGAAATATTTACCTAGATGATTAATGAAGATACAATGTATCAGACAT
+CGCTTTGCNNNNNNTTTTCAAATAGAGATATTACTTATATTGACATGGAAGAAATAAAAC
+CAGCAAAGGCATTCGATACAATGTGTGATTTAGCTAACAAATTTGGTTTTAAGAAACCAA
+CGGATAAAAAATTTTTTGAAGGAGTTATGAATGGAGATTTAGCAGGTTTTATACCTATTA
+ATTTATTTNNNNNNNAAAAAATTTAATTTATAATAACAAAGTTATTTATAAAGATAATGA
+TAGCATCCATCTTCAAATCACATCTACAAATTTAATTGAAATTTATAAACAATCTAAAGA
+ATATATCAACTTTACTAAAGAATTTTTTGATAAACCTTTGAAATATGAAAATTTGGGAAT
+ATTTTTAAAACCACAAGAATTTGAGCGTTTAAAGCAAGATTCTAAGCTATTTGATGTTGC
+TAAAAGATATTTGAATAATTTTATTGAAGCTTTAGAAGAGAGAATAGACCTAGAAAAAGC
+TAAATTATTTAAAGAAAAAGACGTGTTAAACTATTTAAAAGAAAATAAAGAATTAAGAGT
+TAAGTTAAAAAACATATTAGATAAAGAACTTGTTCATATTAAACAACATCGTCCAGATAT
+AGTAGCTTCTTGGAAATACTATCAAGAATTTGAACAAATGTGCAAGGAGTTGAATGGTAA
+TATTTAGGAGAAAGATTTGTAAATTAATAAAAAAAAGATATAATTGTGTTTAAAATTAGA
+GAAAGGAATCAATATGAAACAAGGGGATTTTACAAAAGTTGCAAAACATTATCACAATAG
\ No newline at end of file
diff --git a/t/data/core_alignment_gene_lookup/query_3.gff b/t/data/core_alignment_gene_lookup/query_3.gff
new file mode 100644
index 0000000..2813d43
--- /dev/null
+++ b/t/data/core_alignment_gene_lookup/query_3.gff
@@ -0,0 +1,29 @@
+##gff-version 3
+##sequence-region gnl|IFR|Ef_HIJ123_contig000001 1 1320
+gnl|IFR|Ef_HIJ123_contig000001	Prodigal:2.6	CDS	52	513	.	+	0	ID=Ef_HIJ123_00010;Parent=Ef_HIJ123_00010_gene;inference=ab initio prediction:Prodigal:2.6,similar to AA sequence:RefSeq:CJSA_1363;locus_tag=Ef_HIJ123_00010;product=CJSA_1363 putative sugar transferase:1372570 reverse MW:76273;protein_id=gnl|IFR|Ef_HIJ123_00010
+gnl|IFR|Ef_HIJ123_contig000001	Prodigal:2.6	CDS	510	746	.	+	0	ID=Ef_HIJ123_00020;Parent=Ef_HIJ123_00020_gene;inference=ab initio prediction:Prodigal:2.6,similar to AA sequence:RefSeq:CJSA_1363;locus_tag=Ef_HIJ123_00020;product=CJSA_1363 putative sugar transferase:1372570 reverse MW:76273;protein_id=gnl|IFR|Ef_HIJ123_00020
+gnl|IFR|Ef_HIJ123_contig000001	Prodigal:2.6	CDS	878	1207	.	+	0	ID=Ef_HIJ123_00030;Parent=Ef_HIJ123_00030_gene;inference=ab initio prediction:Prodigal:2.6,similar to AA sequence:RefSeq:A911_06907;locus_tag=Ef_HIJ123_00030;product=A911_06907 putative sugar transferase:1355468 reverse MW:73559;protein_id=gnl|IFR|Ef_HIJ123_00030
+##FASTA
+>gnl|IFR|Ef_HIJ123_contig000001
+GAAAAAGAAAAAATAAACTATAAAAACATATCAGCTGAACTTGCTTGGGAAATGAATCTA
+CCTTTACCTGATAATTATGAATTTGTGTTTTTATCTGGTGGATTATCTGGGCATGCTGCA
+ATGATGTCATTTTTTAATGTTTGTGGCATTGGATATTTGTATCATCATATGGATTTAATG
+AAAAATAGATATATAGATTATTACCATTTTTCTAGGATTGAAAATTTATATTCAATTATA
+ACATATGGACAATACAGTTTAACGCAAGGAATGAATAATATAGGTAAATATTTGACTTTA
+ATTAATAAAATTCCAATTCTTTTTTTGGTAAGAGATCCCATATCAAGACTAAAAACCGGA
+GTAAATCATCCTATTCTAAATCCAAAAAGTATGAAGGAGATATGTTTAAACAATGATTAT
+AGTGATGTGTTTAAGAATAAAATGTATGTTGGCGATATTGGAAAAAATTTTTACTATTCA
+GAAAAGCCAAGCATGAAATATTTACCTAGATGATTAATGAAGATACAATGTATCAGACAT
+CGCTTTGCTTATTATTTTCAAATAGAGATATTACTTATATTGACATGGAAGAAATAAAAC
+CAGCAAAGGCATTCGATACAATGTGTGATTCAGCTAACAAATTTGGTTTTAAGAAACCAA
+CGGATAAAAAATTTTTTGAAGGAGTTATGAATGGAGATTTAGCAGGTTTTATACCTATTA
+ATTTATTTATTGATAAAAAAATTTAATTTATAATAACAAAGTTATTTATAAAGATAATGA
+TAGCATCCATCTTCAAATCACATCTACAAATTTAATTGAAATTTATAAACAATCTAAAGA
+ATATATCAACTTTACTAAAGAATTTTTTGATAAACCTTTGAAATATGAAAATTTGGGAAT
+ATTTTTAAAACCACAAGAATTCGAGCGTTTAAAGCAAGATTCTAAGCTATTTGATGTTGC
+TAAAAGATATTTGAATAATTTTATTGAAGCTTTAGAAGAGAGAATAGACCTAGAAAAAGC
+TAAATTATTTAAAGAAAAAGACGTGTTAAACTATTTAAAAGAAAATAAAGAATTAAGAGT
+TAAGTTAAAAAACATATTAGATAAAGAACTTGTTCATATTAAACAACATCGTCCAGATAT
+AGTAGCTTCTTGGAAATACTATCAAGAATTTGAACAAATGTGCAAGGAGTTGAATGGTAA
+TATTTAGGAGAAAGATCTGTAAATTAATAAAAAAAAGATATAACTGTGTTTAAAATTAGA
+GAAAGGAATCAATATGAAACAAGGGGATTTTACAAAAGTTGCAAAACATTATCACAATAG
\ No newline at end of file
diff --git a/t/data/core_group_statistics.csv b/t/data/core_group_statistics.csv
new file mode 100644
index 0000000..842ed55
--- /dev/null
+++ b/t/data/core_group_statistics.csv
@@ -0,0 +1,11 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","query_1","query_2"
+"group_5","","Gonococcal growth inhibitor III","2","2","1","3","2",,,"",,,,"1_6","2_7"
+"hly","","Alpha-toxin","2","2","1.01","1","7",,,"",,,,"1_1","2_1"
+"speH","","hypothetical protein","2","2","1","1","11",,,"",,,,"1_2","2_2"
+"argF","","Ornithine carbamoyltransferase","2","2","1","1","6",,,"",,,,"1_3","2_3"
+"group_12","","hypothetical protein","1","2","2","2","8",,,"",,,,"","abc_00002	abc_00002"
+"group_13","","hypothetical protein","1","2","2","1","9",,,"",,,,"","abc_00003	abc_00003"
+"group_6","","","1","2","1.99","","","","","",,,,"","abc_01705	abc_01705"
+"group_8","","C4-dicarboxylate transporter/malic acid transport protein","1","2","2","1","3",,,"",,,,"","abc_00011	abc_00011"
+"group_2","","superantigen-like protein","1","2","2","1","10",,,"",,,,"","abc_00004	abc_00004"
+"arcC1","","Carbamate kinase 1","2","2","0.99","1","5",,,"",,,,"","abc_00008	abc_00008"
diff --git a/t/data/empty_file b/t/data/empty_file
new file mode 100644
index 0000000..e69de29
diff --git a/t/data/example_1.faa b/t/data/example_1.faa
new file mode 100644
index 0000000..d0f6304
--- /dev/null
+++ b/t/data/example_1.faa
@@ -0,0 +1,29 @@
+>1234#10_00001 nudix hydrolase
+MSKHGFFQITQKLFLRKGDELLILRDRKSGLGDLPGGRMNENEFFEDWSLSMQREIEEEL
+GSQVQIRVSTKPLFIHKHKVNEGNFPCIIIAYHADYLGGDIILSDEHDYISWEKVQTYEP
+SPLFTEYMLDAVNLYLKEYAPLVH*
+>1234#10_00002 intramembrane serine protease
+MIIKLKIILNSYLIYYFLRGQNTLIRTLLFEFPLTTFFVFLMVATFFIVNVFLPEHLIRQ
+YFLNHPGQIQPLSWIGAVFYHGNLIHLFGNMFYLFFLGRAVEYKAGKGRWLLFFFMAALI
+SSLLDSFIRGVILHDPTPVVGASGAISGIAAVAALLSPFSLRFNQRNIPFPVFLVAWIMV
+YSDITNVFTEDGVARWAHLGGFISVIFAAYFLKPTERKQLHSGFILNLIFIILTLILAFF
+YSNRS*
+>1234#10_00003 hypothetical protein
+VKTTLSGEIEKLRYEVAVKIVNLQGEVLDLRAEMKINFSEVNSKILKLQFEFEMAKIRKE
+LKTEIADLRAETKTDFLELQKSIVDIYKTISTQTRWILGVATLFAAIGKVIN*
+>1234#10_00005 imidazoleglycerol-phosphate dehydratase or histidinol-phosphatase
+LTDKLIGFYDPVRMKAERKTSETEIKLEMNLRGTGQYQFDTEIPFFEHMLSHISKHGLID
+LNLWLRGDIEIDCHHSVEDTAILMGATIHKQLGDKAGIFRYGHFTLTMDEVLTTVAVDLG
+GRYFFKYTGPELTGKFGIYDAELSLEFLQKLALNAKMNLHVVVHYGDNKHHVHESIFKAL
+GKALRMAIAQDSAAAGAIPSTKGVLE*
+>1234#10_00006 imidazole glycerol phosphate synthase subunit HisH
+MIAILDYGMGNIHSCLKAVSLYTKDFVFTKDHSTIENSKALILPGDGHFDKAMENLNSTG
+LRKTIDKHVTSGKPLFGICIGFQILFESSEEIAQGSKKEQIEGLGYIKGKIKKFHGKDFK
+VPHIGWNRLQIRRKDKSVLLKGIGDQSFFYFIHSYRPTDAEGNAITGLCDYYQEKFPAVV
+EKNNIFGTQFHPEKSHTHGLKLLENFIRFI*
+>1234#10_00007 1-(5-phosphoribosyl)-5-[(5- phosphoribosylamino)methylideneamino] imidazole-4-carboxamide isomerase
+MIVIPAIDLFDNCAVRLFKGNYEEKKIYSSEPWKLAESFAKNGATLLHLVDLNGARNQLG
+VNEDSILKIRETTSLKVQLGGGIRDKEKLAYYDKIGINRFILGTAAVTNPDLLKYALDNY
+GKERVVVAVDARDGIVKIAGWEKDSGIHYRDLLERLVKAGIEHIVFTDIAQDGTLAGPNL
+EAYREILNSYPFQVIASGGIASLKDLMDLSSLKTKISLYGVITGKALYEGKLDLAKAISS
+I*
diff --git a/t/data/example_2.faa b/t/data/example_2.faa
new file mode 100644
index 0000000..f6929ec
--- /dev/null
+++ b/t/data/example_2.faa
@@ -0,0 +1,27 @@
+>1234#10_00016 hypothetical protein
+MKVTHSCLEFDSIEGLIDFAREFETGSMIRFLSPIEDNSGNVLVKEEVQVKESTLARLKD
+IKGQYTPKFEVKLNKELLEQIQNILAIKIVNQLKVTDMKFLKFMYENTNYNFKGIIRNSL
+LSKKTTLTLLKVYNQNLNFFKYISELGLLSLGIVMIPDTMRFRLLRRYAFTAGILMDVPR
+IGVDKFTKLPSDDNEKVRVAHKCSDILQKLDLIEFTYGSISNHMPLGMIEDSTSSDKAAP
+GENIDETFLDDIISNDGESDSKVDGSREDAIPEKSYDIFQALLTDALKLARYIANVSHNA
+VDKDYVMEELVYYIAYNTSKKYFDELLANPLVATFKEFEVNVKRLRKIAEVEMKCVYPPS
+AWAYPKPKSSQVLCKNKVWDCPNIVMGWDIHVITAQEAFGWVGTSLPVDNYPKCRLEEEL
+DEIMVEPEKPKKK*
+>1234#10_00017 LipL45-like lipoprotein
+MKRYLSIVILCTFAMLLLVCSTNKSSGSDQVKTESNATSARIVWLLGDVKILSDSGEKKA
+ELGASLSSTDRVVTGPNGGAEIMVADSGIIKMSKNSDIEISSLMNPNGSDTNVQVNYGKI
+VTMVKKGQKTTEFTVSTPTALAGVRGTSFLTSVESPEGSKINCAKANCTVRFAVIEGTIA
+VSKKGESSEVILSKNRELRIEKNQKLTDKLIRSLQNDSLSEMKELIVLHKNETFEYGKLV
+EELKSSSEELKILSQSGSVEEVKAAFQKREADRNNADEITKTAKAVNETKYVQQDVQKEK
+LKLNPKETF*
+>1234#10_00018 putative lipoprotein
+MDFSFNLVDNNGTVMRSTFSPIRFFTFSFLWILIDCSSVQKIENFNSVLQEPTFKSLKEE
+EAILGGSSDSDYKIRKTGNTIPVFVLSPIQTPEGMDSKLAAFLSDEVRLIWAKVKGKQVR
+IQEMSWKNPSQLSQELKRLNVDAVIKTDIREVSGKWVVNQKITDPVKEIVYGSVDGSFQS
+PKIEDELQANQAYYLKHGSGVLALDAKSSLVPIWEKSLSSGEIDSILKKSIQGYLSFSAS
+SADTEVLFQGEKIGIASFRNYPLPEGLQQIQITRPGQKDISKSLQIRSGQTISIYQEWKE
+DRTLGGVRILSFPEALQVALDGLKMGETPFYRSNLTPGAMQLELVRETENGPLVYYEGQL
+IVDADKITEIALPYKTDNLISEPEFWKLSGEKGFQAFSGKTLDFQNVSSLPPGWYGVFSA
+PFVPENMELEGIIPITAESDSGIVAISFHTSKKTISLEYEKERLSVYSFPSNGNNVGTYK
+FKKEDKEDGRPFRIITDVKEGTIRLYLGYSKVLEDRLDVSGVWRISILTRGENFSKRSPL
+RNLKIEYKGYK*
diff --git a/t/data/example_3.faa b/t/data/example_3.faa
new file mode 100644
index 0000000..3391a10
--- /dev/null
+++ b/t/data/example_3.faa
@@ -0,0 +1,43 @@
+>3333#10_00016 hypothetical protein
+MKVTHSCLEFDSIEGLIDFAREFETGSMIRFLSPIEDNSGNVLVKEEVQVKESTLARLKD
+IKGQYTPKFEVKLNKELLEQIQNILAIKIVNQLKVTDMKFLKFMYENTNYNFKGIIRNSL
+LSKKTTLTLLKVYNQNLNFFKYISELGLLSLGIVMIPDTMRFRLLRRYAFTAGILMDVPR
+IGVDKFTKLPSDDNEKVRVAHKCSDILQKLDLIEFTYGSISNHMPLGMIEDSTSSDKAAP
+GENIDETFLDDIISNDGESDSKVDGSREDAIPEKSYDIFQALLTDALKLARYIANVSHNA
+VDKDYVMEELVYYIAYNTSKKYFDELLANPLVATFKEFEVNVKRLRKIAEVEMKCVYPPS
+AWAYPKPKSSQVLCKNKVWDCPNIVMGWDIHVITAQEAFGWVGTSLPVDNYPKCRLEEEL
+DEIMVEPEKPKKK*
+>3333#10_00018 putative lipoprotein
+MDFSFNLVDNNGTVMRSTFSPIRFFTFSFLWILIDCSSVQKIENFNSVLQEPTFKSLKEE
+EAILGGSSDSDYKIRKTGNTIPVFVLSPIQTPEGMDSKLAAFLSDEVRLIWAKVKGKQVR
+IQEMSWKNPSQLSQELKRLNVDAVIKTDIREVSGKWVVNQKITDPVKEIVYGSVDGSFQS
+PKIEDELQANQAYYLKHGSGVLALDAKSSLVPIWEKSLSSGEIDSILKKSIQGYLSFSAS
+SADTEVLFQGEKIGIASFRNYPLPEGLQQIQITRPGQKDISKSLQIRSGQTISIYQEWKE
+DRTLGGVRILSFPEALQVALDGLKMGETPFYRSNLTPGAMQLELVRETENGPLVYYEGQL
+IVDADKITEIALPYKTDNLISEPEFWKLSGEKGFQAFSGKTLDFQNVSSLPPGWYGVFSA
+PFVPENMELEGIIPITAESDSGIVAISFHTSKKTISLEYEKERLSVYSFPSNGNNVGTYK
+FKKEDKEDGRPFRIITDVKEGTIRLYLGYSKVLEDRLDVSGVWRISILTRGENFSKRSPL
+RNLKIEYKGYK*
+>3333#10_00001 nudix hydrolase
+MSKHGFFQITQKLFLRKGDELLILRDRKSGLGDLPGGRMNENEFFEDWSLSMQREIEEEL
+GSQVQIRVSTKPLFIHKHKVNEGNFPCIIIAYHADYLGGDIILSDEHDYISWEKVQTYEP
+SPLFTEYMLDAVNLYLKEYAPLVH*
+>3333#10_00003 hypothetical protein
+VKTTLSGEIEKLRYEVAVKIVNLQGEVLDLRAEMKINFSEVNSKILKLQFEFEMAKIRKE
+LKTEIADLRAETKTDFLELQKSIVDIYKTISTQTRWILGVATLFAAIGKVIN*
+>3333#10_00005 imidazoleglycerol-phosphate dehydratase or histidinol-phosphatase
+LTDKLIGFYDPVRMKAERKTSETEIKLEMNLRGTGQYQFDTEIPFFEHMLSHISKHGLID
+LNLWLRGDIEIDCHHSVEDTAILMGATIHKQLGDKAGIFRYGHFTLTMDEVLTTVAVDLG
+GRYFFKYTGPELTGKFGIYDAELSLEFLQKLALNAKMNLHVVVHYGDNKHHVHESIFKAL
+GKALRMAIAQDSAAAGAIPSTKGVLE*
+>3333#10_00006 imidazole glycerol phosphate synthase subunit HisH
+MIAILDYGMGNIHSCLKAVSLYTKDFVFTKDHSTIENSKALILPGDGHFDKAMENLNSTG
+LRKTIDKHVTSGKPLFGICIGFQILFESSEEIAQGSKKEQIEGLGYIKGKIKKFHGKDFK
+VPHIGWNRLQIRRKDKSVLLKGIGDQSFFYFIHSYRPTDAEGNAITGLCDYYQEKFPAVV
+EKNNIFGTQFHPEKSHTHGLKLLENFIRFI*
+>3333#10_00007 1-(5-phosphoribosyl)-5-[(5- phosphoribosylamino)methylideneamino] imidazole-4-carboxamide isomerase
+MIVIPAIDLFDNCAVRLFKGNYEEKKIYSSEPWKLAESFAKNGATLLHLVDLNGARNQLG
+VNEDSILKIRETTSLKVQLGGGIRDKEKLAYYDKIGINRFILGTAAVTNPDLLKYALDNY
+GKERVVVAVDARDGIVKIAGWEKDSGIHYRDLLERLVKAGIEHIVFTDIAQDGTLAGPNL
+EAYREILNSYPFQVIASGGIASLKDLMDLSSLKTKISLYGVITGKALYEGKLDLAKAISS
+I*
diff --git a/t/data/example_annotation.gff b/t/data/example_annotation.gff
new file mode 100644
index 0000000..ce5e1bd
--- /dev/null
+++ b/t/data/example_annotation.gff
@@ -0,0 +1,271 @@
+##gff-version 3
+##sequence-region abc|SC|contig000001 1 15000
+abc|SC|contig000001	Prodigal:2.60	CDS	172	1131	.	-	0	ID=abc_00001;gene=hly;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag=abc_00001;product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|abc_00001
+abc|SC|contig000001	Prodigal:2.60	CDS	1804	1950	.	+	0	ID=abc_00002;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00002;product=hypothetical protein;protein_id=gnl|SC|abc_00002
+abc|SC|contig000001	Prodigal:2.60	CDS	1934	2131	.	+	0	ID=abc_00003;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00003;product=hypothetical protein;protein_id=gnl|SC|abc_00003
+abc|SC|contig000001	Prodigal:2.60	CDS	2621	3337	.	-	0	ID=abc_00004;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag=abc_00004;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00004
+abc|SC|contig000001	Prodigal:2.60	CDS	3445	4170	.	-	0	ID=abc_00005;gene=speH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=abc_00005;note=SPE H;product=hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00005
+abc|SC|contig000001	Prodigal:2.60	CDS	4265	4990	.	-	0	ID=abc_00006;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13043,protein motif:Pfam:PF02876.11;locus_tag=abc_00006;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00006
+abc|SC|contig000001	Prodigal:2.60	CDS	5428	6429	.	+	0	ID=abc_00007;eC_number=2.1.3.3;gene=argF;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P99073,protein motif:CLUSTERS:PRK04284,protein motif:Cdd:COG0078,protein motif:TIGRFAMs:TIGR00658,protein motif:Pfam:PF00185.1;locus_tag=abc_00007;product=Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Aspartate/ornithine carbamoyltransferase [...]
+abc|SC|contig000001	Prodigal:2.60	CDS	6449	7384	.	+	0	ID=abc_00008;eC_number=2.7.2.2;gene=arcC1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q7A627,protein motif:CLUSTERS:PRK12353,protein motif:TIGRFAMs:TIGR00746,protein motif:Pfam:PF00696.22;locus_tag=abc_00008;product=Carbamate kinase 1,putative amino acid kinase,carbamate kinase,Amino acid kinase family protein;protein_id=gnl|SC|abc_00008
+abc|SC|contig000001	RNAmmer:1.2	rRNA	7556	9112	.	+	0	ID=abc_01705;inference=COORDINATES:profile:RNAmmer:1.2;locus_tag=abc_01705;product=16S ribosomal RNA
+abc|SC|contig000001	Prodigal:2.60	CDS	9419	9646	.	+	0	ID=abc_00010;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00010;product=hypothetical protein;protein_id=gnl|SC|abc_00010
+abc|SC|contig000001	Prodigal:2.60	CDS	9952	10899	.	-	0	ID=abc_00011;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF03595.11;locus_tag=abc_00011;product=C4-dicarboxylate transporter/malic acid transport protein;protein_id=gnl|SC|abc_00011
+abc|SC|contig000001	Prodigal:2.60	CDS	11148	11336	.	+	0	ID=abc_00012;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00012;product=hypothetical protein;protein_id=gnl|SC|abc_00012
+abc|SC|contig000001	Aragorn:1.2.34	tRNA	11803	11878	.	-	0	ID=abc_00013;inference=COORDINATES:profile:Aragorn:1.2.34;locus_tag=abc_00013;product=tRNA-Arg(tct)
+abc|SC|contig000001	Prodigal:2.60	CDS	12241	12375	.	+	0	ID=abc_00014;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=abc_00014;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00014
+abc|SC|contig000001	Prodigal:2.60	CDS	12432	12566	.	+	0	ID=abc_00015;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=abc_00015;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00015
+abc|SC|contig000001	Prodigal:2.60	CDS	12699	13385	.	+	0	ID=abc_00016;eC_number=3.-.-.-;gene=yfnB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O06480,protein motif:CLUSTERS:PRK09449,protein motif:TIGRFAMs:TIGR02254,protein motif:Pfam:PF00702.1;locus_tag=abc_00016;product=Putative HAD-hydrolase yfnB,dUMP phosphatase,HAD hydrolase,haloacid dehalogenase-like hydrolase;protein_id=gnl|SC|abc_00016
+##FASTA
+>abc|SC|contig000001
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA
+TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT
+CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC
+TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC
+TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT
+AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC
+AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC
+ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT
+AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC
+AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT
+TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA
+CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC
+AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC
+TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG
+TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT
+TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT
+AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT
+GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT
+CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG
+CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG
+ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA
+AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA
+AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT
+GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT
+AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT
+AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT
+AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT
+CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA
+GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT
+AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA
+AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA
+AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA
+GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA
+TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA
+TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA
+ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT
+ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG
+GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG
+CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA
+ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA
+TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA
+GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT
+TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA
+CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC
+TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT
+TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT
+TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA
+GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG
+TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT
+TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG
+CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC
+CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT
+TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT
+AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG
+TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA
+TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA
+TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC
+ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC
+TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT
+TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC
+TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC
+ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA
+AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC
+TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA
+CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT
+GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA
+AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT
+GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA
+TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT
+AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT
+CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA
+TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT
+TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC
+TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT
+AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT
+TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC
+AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC
+CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT
+TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT
+GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT
+TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC
+GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT
+ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC
+GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG
+TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT
+TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA
+AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG
+TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT
+TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA
+CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA
+TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT
+GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA
+GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT
+GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA
+GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA
+TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA
+AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC
+ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT
+GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA
+CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG
+CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA
+TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA
+TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT
+GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG
+TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC
+ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA
+GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT
+TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG
+CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA
+AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT
+TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA
+TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT
+AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG
+TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA
+AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC
+AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC
+AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT
+ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT
+AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA
+GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA
+TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA
+TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA
+AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT
+TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG
+TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA
+TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT
+TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA
+TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG
+TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG
+GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG
+TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT
+AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC
+ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT
+TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG
+TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA
+AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT
+TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG
+GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC
+AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT
+AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC
+TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC
+ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT
+CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT
+CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA
+ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA
+AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT
+GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA
+ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA
+GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT
+AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT
+TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT
+GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG
+TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT
+AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC
+TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA
+GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT
+TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG
+AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC
+ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA
+CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA
+CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT
+AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA
+AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA
+AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG
+TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC
+ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT
+TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC
+AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA
+AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT
+CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT
+CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT
+AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA
+AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA
+ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC
+AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC
+TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT
+GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT
+ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA
+AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA
+TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA
+TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC
+CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA
+AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG
+AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA
+AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC
+AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA
+AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA
+TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT
+TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC
+GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA
+CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC
+GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG
+TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT
+TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT
+TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA
+ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT
+GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC
+GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA
+TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG
+GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC
+AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT
+TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT
+GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT
+AAGATTTAGATACATAGACAATATAGGAGATGGGGAAATTGGGATATAAAAATATTTTGA
+TAGACTTTGATGATACAATTGTTGATTTTTATGATGCAGAAGAATGGGCGTTTCACTATA
+TGGCGAATGTTTTTAATCATAAAGCAACAAAGGATGATTTTTTAACATTTAAAAAAATCA
+ATCACCAACATTGGGAAGCTTTTCAACAAAATAAATTAACGAAGTCTGAAGTATTATCAG
+AACGATTTGTGAATTACTTCAAACATCATCAAATGGAAGTTGATGGGCATCGTGCAGATG
+TGTTATTTAGAAATGGATTAGCAGAAGCTAAAGTTAAATACTTTGATCAAACATTAGAAA
+CAATTGTCGAATTATCGAAAAGACATGATTTATATATTGTTACTAATGGTGTAACCGAAA
+CGCAAAAGAGAAGGTTAAATCAGACGCCGTTGCATAAATATATTAAAAAGATATTTATAT
+CTGAGGAAACAGGATATCAAAAACCTAATCCGGAATTTTTTAATTATGTTTTTAATGATA
+TTGGTGAGGATGAAAGACAGCACTCGATTATAGTTGGAGATTCTTTAACATCTGACATTC
+TAGGTGGAATCAATGCGGGTATAGCTACTTGCTGGTTTAATTTTAGAGGATTTGATCATA
+ATCCAGGAATTATACCTGATTATGAAATTAATTCATGGAAACAACTAAATGATATTGTAC
+GTTAAGACAAATAATTTGAAAATGATTTATATTAGCTTAAGAAAGTAATTGTAAAAACGA
+CACTACGAAAAGATGCATTTTCATGCTCGTAGTGTCGAATATTATTATCTTATGGTTTCC
+ACAATAAGACATCATGTCCCTCTATATTTTTAGCTTCCACATCTGTAAATCCATGATGTA
+AAAAGAAATCCTTAGAATCATTTCTGCCAATGGCTTTAATTGGCATATTGAAACTCTTTG
+CAAAATCAATCAATTCTGAAGCGTACCCTCTGTTTTGATATTTTGGTAATACTTCTAACT
+TCCATAATAATATATAATCTTCAAAATCTGGGAAGTAGATTTCTTCGACATCACCTTTTT
+TTAGTAATGCCATTCTAGCTCCTAATTGATCTCCGACAAATATGCCATAAAATGGTGAAT
+CTGAACTTGCATCAATCATTTGACCGTTTAACTCATTGACCATGTATAAGTCTTTGTTGC
+CAAACGCTCTAAAGTTTTCGAATAATTCGTCAGTTTTGTAATTAATTTCAAGACGTTTGA
+TTTCACTCATATTTATCTACCCCTTATTTGTTTACTACTATTATTATACATTAAACCACT
+TGTGTTTTTCATTGATTTGAAAATGTAAAACAATGAACATTATAAATTTTCCATGATTAA
+ATTGATTTTCAAGCCGTAAAATTGTCAATCACCGGTATAGACTTTATACTATAGTTTGTA
+ATCGCGTACTTAAGGAGAAAAAACATGGACTGTAAAGTAGTTAGTTTAAATGAAAAAGAT
+CAGTTTATACCAAAAATAAAGAGCAGTGACCCTGTAATAACAGGATTATTTCAATATGAT
+GCAGCTCAACAAACTAGTTTTGAAAAAAGGATGTCTAAAGAAAATAATGGAAGAGAAGCG
+GCATTAGCGAATGTTATTCGTGAATATATGAGTGATTTAAAGCTTTCAAATGAACAAGAA
+TTAAACATACAACATTTAGCTAATGGTTCAAAAGTTGTGATTGGTGGACAACAAGCAGGG
+CTTTTCGGGGGACCATTGTATACATTCCATAAAATATTTTCAATCATTACTTTATCTAAG
+GAATTAACGGATACACATAAGCAACAAGTAGTACCAGTTTTTTGGATTGCAGGAGAAGAT
+CATGATTTCGATGAAGTGAATCATACATTTGTTTATAACGAAAATCATGGGTCGCTGCAT
+AAGGTTAAATATCATACAATGGAGATGCCAGAGACGACAGTCTCTAGATATTATCCTGAT
+AAGGCTGAGTTGAAACAAACTTTAAAAACGATGTTCATTCATATGAAAGAAACTGTTCAT
+ACACAAGGTCTACTGGAGATTTGTGACAGAATTATTGACCAATATGACTCGTGGACTGAT
+ATGTTTAAAGCACTACTGCATGAAACATTTAAAGCATATGGCGTTCTATTTATAGATGCG
+CAGTTTGAGCCGTTAAGAAAAATGGAAGCGCCTATGTTTAAAAAGATTTTGAAAAAACAT
+CAGTTGCTTGATGATGCTTTTAGAGCAACACAACAACGTACTCAAAATCAAGGCTTGAAT
+GCGATGATACAAACAGATACAAATGTTCATTTATTCTTACATGATGAAAATATGCGCCAA
+TTAGTTTCGTATGATGGTAAGCATTTTAAATTAAATAAAACAGATAAGACATATATAAAG
\ No newline at end of file
diff --git a/t/data/example_annotation.gff.proteome.faa.expected b/t/data/example_annotation.gff.proteome.faa.expected
new file mode 100644
index 0000000..78f8a5d
--- /dev/null
+++ b/t/data/example_annotation.gff.proteome.faa.expected
@@ -0,0 +1,75 @@
+>abc_00001
+MKTRIVSSVTTTLLLGSILMNPVANAADSDINIKTGTTDIGSNTTVKTGDLVTYDKENGM
+HKKVFYSFIDDKNHNKKLLVIRTKGTIAGQYRVYSEEGANKSGLAWPSAFKVQLQLPDNE
+VAQISDYYPRNSIDTKEYMSTLTYGFNGNVTGDDTGKIGGLIGANVSIGHTLKYVQPDFK
+TILESPTDKKVGWKVIFNNMVNQNWGPYDRDSWNPVYGNQLFMKTRNGSMKAAENFLDPN
+KASSLLSSGFSPDFATVITMDRKASKQQTNIDVIYERVRDDYQLHWTSTNWKGTNTKDKW
+TDRSSERYKIDWEKEEMTN*
+>abc_00002
+MKLFYIVFLIIIWLNIFLGNEIIHTLTVLITTLYIVNSRKGIKNDRVE*
+>abc_00003
+MTELNNIINSLQSLFESESGYKISKNSGVPYQTVQDLRNGKTKLEDARFRTIIKLYSYYV
+SLKEH*
+>abc_00004
+MSKNITKNIILTTTLLLLGTVLPQNQKPVFSFYSEAKAYSIGQDETNINELIKYYTQPHF
+SFSNKWLYQYDNGNIYVELKRYSWSAHISLWGAESWGNINQLKDRYVDVFGLKDKDTDQL
+WWSYRETFTGGVTPAAKPSDKTYNLFVQYKDKLQTIIGAHKIYQGNKPVLTLKEIDFRAR
+EALIKNKILYTENRNKGKLKITGGGNNYTIDLSKRLHSDLANVYVKNPNKITVDVLFD*
+>abc_00005
+MNNNITKKIILSTTLLLLGTAFTQFPNTPINSSSEAKAYYINQNETNVNELTKYYSQKYL
+TFSNSTLWQKDNGTIHATLLQFSWYSHIQVYGPESWGNINQLRNKSVDIFGIKDQETIDS
+FALSQETFTGGVTPAATSNDKHYKLNVTYKDKAETFTGGFPVYEGNKPVLTLKELDFRIR
+QTLIKSKKLYNNSYNKGQIKITGTDNNYTIDLSKRLPSTDANRYVKKPQNAKIEVILEKS
+N*
+>abc_00006
+MKKNIMNKLVLSTALLLLGTTSTQLPKTPISFSSEAKAYNISENETNINELIKYYTQPHF
+SLSGKWLWQKPNGSIHATLQTWVWYSHIQVFGSESWGNINQLRNKYVDIFGTKDEDTVEG
+YWTYDETFTGGVTPAATSSDKPYRLFLKYSDKQQTIIGGHEFYKGNKPVLTLKELDFRIR
+QTLIKNKKLYNGEFNKGQIKITADGNNYTIDLSKKLKLTDTNRYVKNPKNAQIEVILEKS
+N*
+>abc_00007
+MKNLRNRSFLTLLDFSRQEVEFLLTLSEDLKRAKYIGTEKPMLKNKNIALLFEKDSTRTR
+CAFEVAAHDQGANVTYLGPTGSQMGKKETTKDTARVLGGMYDGIEYRGFSQRTVETLAEN
+SGVPVWNGLTDEDHPTQVLADFLTAKEVLKKDYADINFTYVGDGRNNVANALMQGAAIMG
+MNFHLVCPKELNPTDELLNRCKNIAAENGGNILITDDIDQGVKGSDVIYTDVWVSMGEPD
+EVWKERLELLKPYQVNKEMMDKTGNPNVIFEHCLPSFHNADTKIGQQIFEKYGIREMEVT
+DEVFESKASVVFQEAENRMHTIKAVMVATLGEF*
+>abc_00008
+MMAKIVVALGGNALGKSPQEQLELVKNTAKSLVGLITKGHEIVISHGNGPQVGSINLGLN
+YAAEHNQGPAFPFAECGAMSQAYIGYQLQESLQNELHSIGMDKQVVTLVTQVEVDENDPA
+FNNPSKPIGLFYNKEEAEQIQKEKGFIFVEDAGRGYRRVVPSPQPISIIELESIKTLIKN
+DTLVIAAGGGGIPVIREQHDGFKGIDAVIDKDKTSALLGANIQCDQLIILTAIDYVYINF
+NTENQQPLKTTNVDELKRYIDENQFAKGSMLPKIEAAISFIENNPKGSVLITSLNELDAA
+LEGKVGTVIKK*
+>abc_01705
+VENTINESEKKKRFKLKMPGAFMILFILTVVAVIATWVIPAGAYSKLSYEPSSQELKIVN
+PHNQVKKVPGTQQELDKMGVKIKIEQFKSGAINKPVSIPNTYERLKQHPAGPEQITSSMV
+EGTIEAVDIMVFILVLGGLIGVVQASGSFESGLLALTKKTKGHEFMLIVFVSILMIIGGT
+LCGIEEEAVAFYPILVPIFIALGYDSIVSVGAIFLASSVGSTFSTINPFSVVIASNAAGT
+TFTDGLYWRIGACIVGAIFVISYLYWYCKKIKNDPKASYSYEDKDAFEQQWSVLKDDDSA
+HFTLRKKIILTLFVLPFPIMVWGVMTQGWWFPVMASAFLIFTIIIMFIAGTGKSGLGEKG
+TVDAFVNGASSLVGVSLIIGLARGINLVLNEGMISDTILHFSSSLVQHMSGPLFIIVLLF
+IFFCLGFIVPSSSGLAVLSMPIFAPLADTVGIPRFVIVTTYQFGQYAMLFLAPTGLVMAT
+LQMLNMRYSHWFRFVWPVVAFVLIFGGGVLITQVLIYS*
+>abc_00010
+MTHLTKVLDTLTGICVVLLFSKYFVAYANMVFDWNLRWYLLENIPHLPIILFILMFIFGV
+PSEMIKDRQRKNNGV*
+>abc_00011
+MRLQKAPLVTSGLVLGLLGLGNLLKDLSLTLNAVCGIFAFLIWIHLLCTMIKYFNNVKEQ
+LNSPLVSSVFTTFFMSGFLGTTYLNTFFSNITFINSLITPIWILCLVGIMTHMIIFSIKY
+LKDFSLENVYPSWTVLFIGIAIAGLTAPVSGCFFIGQLTVIYGFVATCIVLPIVFKRLKA
+FPLQTSIKPNTSTICAPFSLVAAAYVIAFPKANAFIVIIFLILAQIFYFYIIIQLPKLLK
+EPFSPVFSAFTFPLVISATALKNSLPVLMFPDIWKGLLFIEVLLATVIVLRVFIGYLHFF
+LKKENQDKFLRNASQ*
+>abc_00012
+MRNQIQKLLDSDLSSLHISKQTGVPQSTIHRMRKKERSLDNMSLKNAELLYKFANSIFSN
+EN*
+>abc_00014
+MEGLFNAIKDTVTAAINNDGAKLGTSIVSIVENGVGLLGKLFGF*
+>abc_00015
+MTGLAEAIANTVQAAQQHDSVKLGTSIVDIVANGVGLLGKLFGF*
+>abc_00016
+LGYKNILIDFDDTIVDFYDAEEWAFHYMANVFNHKATKDDFLTFKKINHQHWEAFQQNKL
+TKSEVLSERFVNYFKHHQMEVDGHRADVLFRNGLAEAKVKYFDQTLETIVELSKRHDLYI
+VTNGVTETQKRRLNQTPLHKYIKKIFISEETGYQKPNPEFFNYVFNDIGEDERQHSIIVG
+DSLTSDILGGINAGIATCWFNFRGFDHNPGIIPDYEINSWKQLNDIVR*
diff --git a/t/data/example_annotation_2.gff b/t/data/example_annotation_2.gff
new file mode 100644
index 0000000..ce5e1bd
--- /dev/null
+++ b/t/data/example_annotation_2.gff
@@ -0,0 +1,271 @@
+##gff-version 3
+##sequence-region abc|SC|contig000001 1 15000
+abc|SC|contig000001	Prodigal:2.60	CDS	172	1131	.	-	0	ID=abc_00001;gene=hly;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag=abc_00001;product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|abc_00001
+abc|SC|contig000001	Prodigal:2.60	CDS	1804	1950	.	+	0	ID=abc_00002;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00002;product=hypothetical protein;protein_id=gnl|SC|abc_00002
+abc|SC|contig000001	Prodigal:2.60	CDS	1934	2131	.	+	0	ID=abc_00003;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00003;product=hypothetical protein;protein_id=gnl|SC|abc_00003
+abc|SC|contig000001	Prodigal:2.60	CDS	2621	3337	.	-	0	ID=abc_00004;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag=abc_00004;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00004
+abc|SC|contig000001	Prodigal:2.60	CDS	3445	4170	.	-	0	ID=abc_00005;gene=speH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=abc_00005;note=SPE H;product=hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00005
+abc|SC|contig000001	Prodigal:2.60	CDS	4265	4990	.	-	0	ID=abc_00006;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13043,protein motif:Pfam:PF02876.11;locus_tag=abc_00006;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00006
+abc|SC|contig000001	Prodigal:2.60	CDS	5428	6429	.	+	0	ID=abc_00007;eC_number=2.1.3.3;gene=argF;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P99073,protein motif:CLUSTERS:PRK04284,protein motif:Cdd:COG0078,protein motif:TIGRFAMs:TIGR00658,protein motif:Pfam:PF00185.1;locus_tag=abc_00007;product=Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Aspartate/ornithine carbamoyltransferase [...]
+abc|SC|contig000001	Prodigal:2.60	CDS	6449	7384	.	+	0	ID=abc_00008;eC_number=2.7.2.2;gene=arcC1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q7A627,protein motif:CLUSTERS:PRK12353,protein motif:TIGRFAMs:TIGR00746,protein motif:Pfam:PF00696.22;locus_tag=abc_00008;product=Carbamate kinase 1,putative amino acid kinase,carbamate kinase,Amino acid kinase family protein;protein_id=gnl|SC|abc_00008
+abc|SC|contig000001	RNAmmer:1.2	rRNA	7556	9112	.	+	0	ID=abc_01705;inference=COORDINATES:profile:RNAmmer:1.2;locus_tag=abc_01705;product=16S ribosomal RNA
+abc|SC|contig000001	Prodigal:2.60	CDS	9419	9646	.	+	0	ID=abc_00010;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00010;product=hypothetical protein;protein_id=gnl|SC|abc_00010
+abc|SC|contig000001	Prodigal:2.60	CDS	9952	10899	.	-	0	ID=abc_00011;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF03595.11;locus_tag=abc_00011;product=C4-dicarboxylate transporter/malic acid transport protein;protein_id=gnl|SC|abc_00011
+abc|SC|contig000001	Prodigal:2.60	CDS	11148	11336	.	+	0	ID=abc_00012;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00012;product=hypothetical protein;protein_id=gnl|SC|abc_00012
+abc|SC|contig000001	Aragorn:1.2.34	tRNA	11803	11878	.	-	0	ID=abc_00013;inference=COORDINATES:profile:Aragorn:1.2.34;locus_tag=abc_00013;product=tRNA-Arg(tct)
+abc|SC|contig000001	Prodigal:2.60	CDS	12241	12375	.	+	0	ID=abc_00014;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=abc_00014;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00014
+abc|SC|contig000001	Prodigal:2.60	CDS	12432	12566	.	+	0	ID=abc_00015;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=abc_00015;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00015
+abc|SC|contig000001	Prodigal:2.60	CDS	12699	13385	.	+	0	ID=abc_00016;eC_number=3.-.-.-;gene=yfnB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O06480,protein motif:CLUSTERS:PRK09449,protein motif:TIGRFAMs:TIGR02254,protein motif:Pfam:PF00702.1;locus_tag=abc_00016;product=Putative HAD-hydrolase yfnB,dUMP phosphatase,HAD hydrolase,haloacid dehalogenase-like hydrolase;protein_id=gnl|SC|abc_00016
+##FASTA
+>abc|SC|contig000001
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA
+TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT
+CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC
+TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC
+TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT
+AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC
+AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC
+ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT
+AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC
+AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT
+TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA
+CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC
+AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC
+TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG
+TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT
+TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT
+AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT
+GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT
+CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG
+CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG
+ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA
+AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA
+AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT
+GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT
+AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT
+AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT
+AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT
+CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA
+GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT
+AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA
+AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA
+AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA
+GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA
+TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA
+TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA
+ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT
+ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG
+GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG
+CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA
+ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA
+TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA
+GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT
+TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA
+CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC
+TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT
+TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT
+TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA
+GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG
+TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT
+TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG
+CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC
+CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT
+TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT
+AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG
+TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA
+TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA
+TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC
+ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC
+TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT
+TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC
+TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC
+ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA
+AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC
+TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA
+CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT
+GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA
+AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT
+GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA
+TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT
+AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT
+CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA
+TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT
+TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC
+TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT
+AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT
+TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC
+AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC
+CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT
+TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT
+GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT
+TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC
+GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT
+ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC
+GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG
+TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT
+TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA
+AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG
+TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT
+TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA
+CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA
+TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT
+GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA
+GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT
+GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA
+GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA
+TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA
+AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC
+ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT
+GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA
+CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG
+CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA
+TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA
+TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT
+GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG
+TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC
+ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA
+GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT
+TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG
+CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA
+AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT
+TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA
+TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT
+AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG
+TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA
+AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC
+AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC
+AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT
+ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT
+AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA
+GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA
+TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA
+TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA
+AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT
+TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG
+TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA
+TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT
+TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA
+TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG
+TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG
+GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG
+TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT
+AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC
+ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT
+TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG
+TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA
+AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT
+TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG
+GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC
+AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT
+AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC
+TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC
+ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT
+CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT
+CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA
+ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA
+AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT
+GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA
+ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA
+GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT
+AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT
+TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT
+GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG
+TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT
+AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC
+TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA
+GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT
+TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG
+AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC
+ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA
+CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA
+CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT
+AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA
+AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA
+AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG
+TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC
+ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT
+TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC
+AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA
+AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT
+CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT
+CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT
+AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA
+AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA
+ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC
+AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC
+TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT
+GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT
+ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA
+AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA
+TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA
+TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC
+CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA
+AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG
+AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA
+AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC
+AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA
+AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA
+TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT
+TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC
+GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA
+CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC
+GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG
+TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT
+TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT
+TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA
+ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT
+GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC
+GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA
+TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG
+GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC
+AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT
+TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT
+GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT
+AAGATTTAGATACATAGACAATATAGGAGATGGGGAAATTGGGATATAAAAATATTTTGA
+TAGACTTTGATGATACAATTGTTGATTTTTATGATGCAGAAGAATGGGCGTTTCACTATA
+TGGCGAATGTTTTTAATCATAAAGCAACAAAGGATGATTTTTTAACATTTAAAAAAATCA
+ATCACCAACATTGGGAAGCTTTTCAACAAAATAAATTAACGAAGTCTGAAGTATTATCAG
+AACGATTTGTGAATTACTTCAAACATCATCAAATGGAAGTTGATGGGCATCGTGCAGATG
+TGTTATTTAGAAATGGATTAGCAGAAGCTAAAGTTAAATACTTTGATCAAACATTAGAAA
+CAATTGTCGAATTATCGAAAAGACATGATTTATATATTGTTACTAATGGTGTAACCGAAA
+CGCAAAAGAGAAGGTTAAATCAGACGCCGTTGCATAAATATATTAAAAAGATATTTATAT
+CTGAGGAAACAGGATATCAAAAACCTAATCCGGAATTTTTTAATTATGTTTTTAATGATA
+TTGGTGAGGATGAAAGACAGCACTCGATTATAGTTGGAGATTCTTTAACATCTGACATTC
+TAGGTGGAATCAATGCGGGTATAGCTACTTGCTGGTTTAATTTTAGAGGATTTGATCATA
+ATCCAGGAATTATACCTGATTATGAAATTAATTCATGGAAACAACTAAATGATATTGTAC
+GTTAAGACAAATAATTTGAAAATGATTTATATTAGCTTAAGAAAGTAATTGTAAAAACGA
+CACTACGAAAAGATGCATTTTCATGCTCGTAGTGTCGAATATTATTATCTTATGGTTTCC
+ACAATAAGACATCATGTCCCTCTATATTTTTAGCTTCCACATCTGTAAATCCATGATGTA
+AAAAGAAATCCTTAGAATCATTTCTGCCAATGGCTTTAATTGGCATATTGAAACTCTTTG
+CAAAATCAATCAATTCTGAAGCGTACCCTCTGTTTTGATATTTTGGTAATACTTCTAACT
+TCCATAATAATATATAATCTTCAAAATCTGGGAAGTAGATTTCTTCGACATCACCTTTTT
+TTAGTAATGCCATTCTAGCTCCTAATTGATCTCCGACAAATATGCCATAAAATGGTGAAT
+CTGAACTTGCATCAATCATTTGACCGTTTAACTCATTGACCATGTATAAGTCTTTGTTGC
+CAAACGCTCTAAAGTTTTCGAATAATTCGTCAGTTTTGTAATTAATTTCAAGACGTTTGA
+TTTCACTCATATTTATCTACCCCTTATTTGTTTACTACTATTATTATACATTAAACCACT
+TGTGTTTTTCATTGATTTGAAAATGTAAAACAATGAACATTATAAATTTTCCATGATTAA
+ATTGATTTTCAAGCCGTAAAATTGTCAATCACCGGTATAGACTTTATACTATAGTTTGTA
+ATCGCGTACTTAAGGAGAAAAAACATGGACTGTAAAGTAGTTAGTTTAAATGAAAAAGAT
+CAGTTTATACCAAAAATAAAGAGCAGTGACCCTGTAATAACAGGATTATTTCAATATGAT
+GCAGCTCAACAAACTAGTTTTGAAAAAAGGATGTCTAAAGAAAATAATGGAAGAGAAGCG
+GCATTAGCGAATGTTATTCGTGAATATATGAGTGATTTAAAGCTTTCAAATGAACAAGAA
+TTAAACATACAACATTTAGCTAATGGTTCAAAAGTTGTGATTGGTGGACAACAAGCAGGG
+CTTTTCGGGGGACCATTGTATACATTCCATAAAATATTTTCAATCATTACTTTATCTAAG
+GAATTAACGGATACACATAAGCAACAAGTAGTACCAGTTTTTTGGATTGCAGGAGAAGAT
+CATGATTTCGATGAAGTGAATCATACATTTGTTTATAACGAAAATCATGGGTCGCTGCAT
+AAGGTTAAATATCATACAATGGAGATGCCAGAGACGACAGTCTCTAGATATTATCCTGAT
+AAGGCTGAGTTGAAACAAACTTTAAAAACGATGTTCATTCATATGAAAGAAACTGTTCAT
+ACACAAGGTCTACTGGAGATTTGTGACAGAATTATTGACCAATATGACTCGTGGACTGAT
+ATGTTTAAAGCACTACTGCATGAAACATTTAAAGCATATGGCGTTCTATTTATAGATGCG
+CAGTTTGAGCCGTTAAGAAAAATGGAAGCGCCTATGTTTAAAAAGATTTTGAAAAAACAT
+CAGTTGCTTGATGATGCTTTTAGAGCAACACAACAACGTACTCAAAATCAAGGCTTGAAT
+GCGATGATACAAACAGATACAAATGTTCATTTATTCTTACATGATGAAAATATGCGCCAA
+TTAGTTTCGTATGATGGTAAGCATTTTAAATTAAATAAAACAGATAAGACATATATAAAG
\ No newline at end of file
diff --git a/t/data/example_groups b/t/data/example_groups
new file mode 100644
index 0000000..adbc922
--- /dev/null
+++ b/t/data/example_groups
@@ -0,0 +1,6 @@
+group_1: 1234#10_00001 1234#10_00002
+group_2: 1234#10_00003 1234#10_00018 1234#10_00005
+group_3: 1234#10_00005 1234#10_00005
+group_4: 1234#10_00006       1234#10_00007
+group_5: 1234#10_00016
+group_6: 1234#10_00017
\ No newline at end of file
diff --git a/t/data/example_groups_without_labels b/t/data/example_groups_without_labels
new file mode 100644
index 0000000..2c56a9a
--- /dev/null
+++ b/t/data/example_groups_without_labels
@@ -0,0 +1,6 @@
+1234#10_00001 1234#10_00002
+1234#10_00003 1234#10_00018 1234#10_00005
+1234#10_00005 1234#10_00005
+1234#10_00006       1234#10_00007
+1234#10_00016
+1234#10_00017
\ No newline at end of file
diff --git a/t/data/exp_qc_report.csv b/t/data/exp_qc_report.csv
new file mode 100644
index 0000000..83477e5
--- /dev/null
+++ b/t/data/exp_qc_report.csv
@@ -0,0 +1,4 @@
+Sample,Genus,Species
+assembly1,Clostridium,Clostridium difficile
+assembly2,Escherichia,Escherichia coli
+assembly3,Streptococcus,Streptococcus pneumoniae
diff --git a/t/data/exp_qc_report_real.csv b/t/data/exp_qc_report_real.csv
new file mode 100644
index 0000000..3d18a94
--- /dev/null
+++ b/t/data/exp_qc_report_real.csv
@@ -0,0 +1,3 @@
+Sample,Genus,Species
+query_1,Staphylococcus,Staphylococcus aureus
+query_2,Staphylococcus,Staphylococcus aureus
diff --git a/t/data/expected_0.seq b/t/data/expected_0.seq
new file mode 100644
index 0000000..fcf59dd
--- /dev/null
+++ b/t/data/expected_0.seq
@@ -0,0 +1,4 @@
+>1234#10_00001 nudix hydrolase
+MSKHGFFQITQKLFLRKGDELLILRDRKSGLGDLPGGRMNENEFFEDWSLSMQREIEEEL
+GSQVQIRVSTKPLFIHKHKVNEGNFPCIIIAYHADYLGGDIILSDEHDYISWEKVQTYEP
+SPLFTEYMLDAVNLYLKEYAPLVH*
diff --git a/t/data/expected_5.seq b/t/data/expected_5.seq
new file mode 100644
index 0000000..635abbe
--- /dev/null
+++ b/t/data/expected_5.seq
@@ -0,0 +1,6 @@
+>1234#10_00007 1-(5-phosphoribosyl)-5-[(5- phosphoribosylamino)methylideneamino] imidazole-4-carboxamide isomerase
+MIVIPAIDLFDNCAVRLFKGNYEEKKIYSSEPWKLAESFAKNGATLLHLVDLNGARNQLG
+VNEDSILKIRETTSLKVQLGGGIRDKEKLAYYDKIGINRFILGTAAVTNPDLLKYALDNY
+GKERVVVAVDARDGIVKIAGWEKDSGIHYRDLLERLVKAGIEHIVFTDIAQDGTLAGPNL
+EAYREILNSYPFQVIASGGIASLKDLMDLSSLKTKISLYGVITGKALYEGKLDLAKAISS
+I*
diff --git a/t/data/expected_accessory_binary_genes.fa b/t/data/expected_accessory_binary_genes.fa
new file mode 100644
index 0000000..b8b45df
--- /dev/null
+++ b/t/data/expected_accessory_binary_genes.fa
@@ -0,0 +1,8 @@
+>aaa
+AAAA
+>bbb
+CAAA
+>ccc
+CCAA
+>ddd
+CCCA
diff --git a/t/data/expected_accessory_binary_genes_bounded.fa b/t/data/expected_accessory_binary_genes_bounded.fa
new file mode 100644
index 0000000..2663ac3
--- /dev/null
+++ b/t/data/expected_accessory_binary_genes_bounded.fa
@@ -0,0 +1,8 @@
+>aaa
+AA
+>bbb
+AA
+>ccc
+CA
+>ddd
+CC
diff --git a/t/data/expected_clustered_proteins b/t/data/expected_clustered_proteins
new file mode 100644
index 0000000..6646b09
--- /dev/null
+++ b/t/data/expected_clustered_proteins
@@ -0,0 +1,9 @@
+group_5: 1234#10_00006	3333#10_00006
+group_1: 1234#10_00001	3333#10_00001
+group_6: 1234#10_00007	3333#10_00007
+group_9: 1234#10_00018	3333#10_00018
+group_3: 1234#10_00003	3333#10_00003
+group_7: 1234#10_00016	3333#10_00016
+group_4: 1234#10_00005	3333#10_00005
+group_8: 1234#10_00017
+group_2: 1234#10_00002
diff --git a/t/data/expected_clusters_to_inflate b/t/data/expected_clusters_to_inflate
new file mode 100644
index 0000000..5fc874a
--- /dev/null
+++ b/t/data/expected_clusters_to_inflate
@@ -0,0 +1,4 @@
+2363_5_03666 4075_2#3_03437
+4075_1#8_03461	2212_1_02994	2212_6_02081	2363_1_00606	2363_2_02124	2363_3_01371	2363_6_01272	2363_8_00966	2541_2_02425	2541_3_02449	2541_7_00441	2541_8_00644	2781_2_02909	3634_6_00968	3634_7_01056	3634_8_02606
+2212_3_02841 2363_5_00947
+2363_7_00085 2460_2_00826 4075_1#6_04091 4075_1#3_04238 3634_6_04078	2212_1_01414	2363_1_00811	2541_2_00696	2541_8_00920	3634_7_00911
diff --git a/t/data/expected_combined_proteome.fa b/t/data/expected_combined_proteome.fa
new file mode 100644
index 0000000..fc98fa8
--- /dev/null
+++ b/t/data/expected_combined_proteome.fa
@@ -0,0 +1,56 @@
+>1234#10_00001 nudix hydrolase
+MSKHGFFQITQKLFLRKGDELLILRDRKSGLGDLPGGRMNENEFFEDWSLSMQREIEEEL
+GSQVQIRVSTKPLFIHKHKVNEGNFPCIIIAYHADYLGGDIILSDEHDYISWEKVQTYEP
+SPLFTEYMLDAVNLYLKEYAPLVH*
+>1234#10_00002 intramembrane serine protease
+MIIKLKIILNSYLIYYFLRGQNTLIRTLLFEFPLTTFFVFLMVATFFIVNVFLPEHLIRQ
+YFLNHPGQIQPLSWIGAVFYHGNLIHLFGNMFYLFFLGRAVEYKAGKGRWLLFFFMAALI
+SSLLDSFIRGVILHDPTPVVGASGAISGIAAVAALLSPFSLRFNQRNIPFPVFLVAWIMV
+YSDITNVFTEDGVARWAHLGGFISVIFAAYFLKPTERKQLHSGFILNLIFIILTLILAFF
+YSNRS*
+>1234#10_00003 hypothetical protein
+VKTTLSGEIEKLRYEVAVKIVNLQGEVLDLRAEMKINFSEVNSKILKLQFEFEMAKIRKE
+LKTEIADLRAETKTDFLELQKSIVDIYKTISTQTRWILGVATLFAAIGKVIN*
+>1234#10_00005 imidazoleglycerol-phosphate dehydratase or histidinol-phosphatase
+LTDKLIGFYDPVRMKAERKTSETEIKLEMNLRGTGQYQFDTEIPFFEHMLSHISKHGLID
+LNLWLRGDIEIDCHHSVEDTAILMGATIHKQLGDKAGIFRYGHFTLTMDEVLTTVAVDLG
+GRYFFKYTGPELTGKFGIYDAELSLEFLQKLALNAKMNLHVVVHYGDNKHHVHESIFKAL
+GKALRMAIAQDSAAAGAIPSTKGVLE*
+>1234#10_00006 imidazole glycerol phosphate synthase subunit HisH
+MIAILDYGMGNIHSCLKAVSLYTKDFVFTKDHSTIENSKALILPGDGHFDKAMENLNSTG
+LRKTIDKHVTSGKPLFGICIGFQILFESSEEIAQGSKKEQIEGLGYIKGKIKKFHGKDFK
+VPHIGWNRLQIRRKDKSVLLKGIGDQSFFYFIHSYRPTDAEGNAITGLCDYYQEKFPAVV
+EKNNIFGTQFHPEKSHTHGLKLLENFIRFI*
+>1234#10_00007 1-(5-phosphoribosyl)-5-[(5- phosphoribosylamino)methylideneamino] imidazole-4-carboxamide isomerase
+MIVIPAIDLFDNCAVRLFKGNYEEKKIYSSEPWKLAESFAKNGATLLHLVDLNGARNQLG
+VNEDSILKIRETTSLKVQLGGGIRDKEKLAYYDKIGINRFILGTAAVTNPDLLKYALDNY
+GKERVVVAVDARDGIVKIAGWEKDSGIHYRDLLERLVKAGIEHIVFTDIAQDGTLAGPNL
+EAYREILNSYPFQVIASGGIASLKDLMDLSSLKTKISLYGVITGKALYEGKLDLAKAISS
+I*
+>1234#10_00016 hypothetical protein
+MKVTHSCLEFDSIEGLIDFAREFETGSMIRFLSPIEDNSGNVLVKEEVQVKESTLARLKD
+IKGQYTPKFEVKLNKELLEQIQNILAIKIVNQLKVTDMKFLKFMYENTNYNFKGIIRNSL
+LSKKTTLTLLKVYNQNLNFFKYISELGLLSLGIVMIPDTMRFRLLRRYAFTAGILMDVPR
+IGVDKFTKLPSDDNEKVRVAHKCSDILQKLDLIEFTYGSISNHMPLGMIEDSTSSDKAAP
+GENIDETFLDDIISNDGESDSKVDGSREDAIPEKSYDIFQALLTDALKLARYIANVSHNA
+VDKDYVMEELVYYIAYNTSKKYFDELLANPLVATFKEFEVNVKRLRKIAEVEMKCVYPPS
+AWAYPKPKSSQVLCKNKVWDCPNIVMGWDIHVITAQEAFGWVGTSLPVDNYPKCRLEEEL
+DEIMVEPEKPKKK*
+>1234#10_00017 LipL45-like lipoprotein
+MKRYLSIVILCTFAMLLLVCSTNKSSGSDQVKTESNATSARIVWLLGDVKILSDSGEKKA
+ELGASLSSTDRVVTGPNGGAEIMVADSGIIKMSKNSDIEISSLMNPNGSDTNVQVNYGKI
+VTMVKKGQKTTEFTVSTPTALAGVRGTSFLTSVESPEGSKINCAKANCTVRFAVIEGTIA
+VSKKGESSEVILSKNRELRIEKNQKLTDKLIRSLQNDSLSEMKELIVLHKNETFEYGKLV
+EELKSSSEELKILSQSGSVEEVKAAFQKREADRNNADEITKTAKAVNETKYVQQDVQKEK
+LKLNPKETF*
+>1234#10_00018 putative lipoprotein
+MDFSFNLVDNNGTVMRSTFSPIRFFTFSFLWILIDCSSVQKIENFNSVLQEPTFKSLKEE
+EAILGGSSDSDYKIRKTGNTIPVFVLSPIQTPEGMDSKLAAFLSDEVRLIWAKVKGKQVR
+IQEMSWKNPSQLSQELKRLNVDAVIKTDIREVSGKWVVNQKITDPVKEIVYGSVDGSFQS
+PKIEDELQANQAYYLKHGSGVLALDAKSSLVPIWEKSLSSGEIDSILKKSIQGYLSFSAS
+SADTEVLFQGEKIGIASFRNYPLPEGLQQIQITRPGQKDISKSLQIRSGQTISIYQEWKE
+DRTLGGVRILSFPEALQVALDGLKMGETPFYRSNLTPGAMQLELVRETENGPLVYYEGQL
+IVDADKITEIALPYKTDNLISEPEFWKLSGEKGFQAFSGKTLDFQNVSSLPPGWYGVFSA
+PFVPENMELEGIIPITAESDSGIVAISFHTSKKTISLEYEKERLSVYSFPSNGNNVGTYK
+FKKEDKEDGRPFRIITDVKEGTIRLYLGYSKVLEDRLDVSGVWRISILTRGENFSKRSPL
+RNLKIEYKGYK*
diff --git a/t/data/expected_combined_proteome_with_filtering.fa b/t/data/expected_combined_proteome_with_filtering.fa
new file mode 100644
index 0000000..b037784
--- /dev/null
+++ b/t/data/expected_combined_proteome_with_filtering.fa
@@ -0,0 +1,82 @@
+>1234#10_00001 nudix hydrolase
+MSKHGFFQITQKLFLRKGDELLILRDRKSGLGDLPGGRMNENEFFEDWSLSMQREIEEEL
+GSQVQIRVSTKPLFIHKHKVNEGNFPCIIIAYHADYLGGDIILSDEHDYISWEKVQTYEP
+SPLFTEYMLDAVNLYLKEYAPLVH*
+>1234#10_00002 intramembrane serine protease
+MIIKLKIILNSYLIYYFLRGQNTLIRTLLFEFPLTTFFVFLMVATFFIVNVFLPEHLIRQ
+YFLNHPGQIQPLSWIGAVFYHGNLIHLFGNMFYLFFLGRAVEYKAGKGRWLLFFFMAALI
+SSLLDSFIRGVILHDPTPVVGASGAISGIAAVAALLSPFSLRFNQRNIPFPVFLVAWIMV
+YSDITNVFTEDGVARWAHLGGFISVIFAAYFLKPTERKQLHSGFILNLIFIILTLILAFF
+YSNRS*
+>1234#10_00003 hypothetical protein
+VKTTLSGEIEKLRYEVAVKIVNLQGEVLDLRAEMKINFSEVNSKILKLQFEFEMAKIRKE
+LKTEIADLRAETKTDFLELQKSIVDIYKTISTQTRWILGVATLFAAIGKVIN*
+>1234#10_00005 imidazoleglycerol-phosphate dehydratase or histidinol-phosphatase
+LTDKLIGFYDPVRMKAERKTSETEIKLEMNLRGTGQYQFDTEIPFFEHMLSHISKHGLID
+LNLWLRGDIEIDCHHSVEDTAILMGATIHKQLGDKAGIFRYGHFTLTMDEVLTTVAVDLG
+GRYFFKYTGPELTGKFGIYDAELSLEFLQKLALNAKMNLHVVVHYGDNKHHVHESIFKAL
+GKALRMAIAQDSAAAGAIPSTKGVLE*
+>1234#10_00006 imidazole glycerol phosphate synthase subunit HisH
+MIAILDYGMGNIHSCLKAVSLYTKDFVFTKDHSTIENSKALILPGDGHFDKAMENLNSTG
+LRKTIDKHVTSGKPLFGICIGFQILFESSEEIAQGSKKEQIEGLGYIKGKIKKFHGKDFK
+VPHIGWNRLQIRRKDKSVLLKGIGDQSFFYFIHSYRPTDAEGNAITGLCDYYQEKFPAVV
+EKNNIFGTQFHPEKSHTHGLKLLENFIRFI*
+>1234#10_00007 1-(5-phosphoribosyl)-5-[(5- phosphoribosylamino)methylideneamino] imidazole-4-carboxamide isomerase
+MIVIPAIDLFDNCAVRLFKGNYEEKKIYSSEPWKLAESFAKNGATLLHLVDLNGARNQLG
+VNEDSILKIRETTSLKVQLGGGIRDKEKLAYYDKIGINRFILGTAAVTNPDLLKYALDNY
+GKERVVVAVDARDGIVKIAGWEKDSGIHYRDLLERLVKAGIEHIVFTDIAQDGTLAGPNL
+EAYREILNSYPFQVIASGGIASLKDLMDLSSLKTKISLYGVITGKALYEGKLDLAKAISS
+I*
+>1234_00186 GGDEF family protein
+MNFENEYDLEKLVNNSLDLLTIVDLSGNVLLVNPAFERTLGWKKEDLVGKDPFHLLHPED
+KESTYKEFEKLNQGLLTLSFQNRYICADGQYRYFSWTASPDLVSGLVYVTGRDITDVIES
+NRKISQLAVKLKETNDRLFEQASTDPLTKLKNRRMFNEELNNLIHACDKESHPLSLLMID
+ADHFKDYNDKFGHIAGDKVLVELASILTKTFRKKDVLARYGGEEFIAALPNTSEPEANQI
+AERLVQTVREFSWEKRSVTISVGITTYNFNPTSKSINSEYLLNLIEQADKALYCSKVSGR
+XXXXXXXXXXXXXXXXXXXYYI*
+>1234_00325 only a few unknowns so keep
+MAFDPSVPQQQAQAPAGTLLFPEGSSANTLNVLHSGTVRYLTEVPGGRKLELFKLNGANL
+TPGSVALFTSGRYPFHLQAEEACVISTYAMNRDTISKSVGSRVSLGLMVARTLLREITEL
+FKKSNQIRKITSEIEKVNDNLSILYYQFNPSVFPDIKPGSPIPEVSADVVDPVMRLCREN
+LKLFFDNGGILPDRPSPQFLEEEHESQLTRLYPEEIDFQDGEFNFIRKLVMQDPKILNVL
+FTADPSMLAYVCSKLANVLDQISGILKTCLTDLDEAFRIFFIGENSLVEKFYLILDITSS
+GYGTAPAEFVIPVLGAFAGKIEKYKNGHQALFGVPVANISPNTQAFQSKAVTLAKKMEET
+APKVQAPVTSSATAGVDVDAIRKELDNSASVIIQFSGLGAEQIKEFSALMVKVKSLKNPL
+DPEGDNRKVRRTLGRHYWDMYQECFTKYMNSNRNVPKPVELMLKYGYFDETLVDDSQIAF
+MYTQKDPANFTSNVPISLGTEWLEKVFKREVPTSLDEMGQNFFEKVKLENRNIVIKKESD
+IPPELDNPDTRLKFEFASLYEANVRLTSGSPATHFPILTKFHSQMAIDKSYVSKKILEEV
+VHELMAVDYSIFHREVIYNNNELGITKEFIQKCVIPDFILVPSIGTKVMMWQDLSIHRGA
+GSKESPGRIVLPIFAQGDLKTMVADALAAFRWELTKSILGAEWNNVGNPSITADYTDYIQ
+FFKKNKDLSMEIKEKLASDFKRFRNDRDIFANDYQLWMKYEADGVQRLNKVVRGIFYRHI
+PFSKQVRDKVAXXXXXXXXXXXXXLLPLQRSITDLLILEIENILK*
+>1234_00377 Uncharacterized protein conserved in bacteria
+MTLNEFAKNVLFGSGLEDKLFSPPVHPVDIRSFDFLNVPSLPAREKKIQISEQKSKIPRL
+EQLFNEENRIITLHHFANHELMAIELFAWAILKFQDAPSSIRFGLYRTLLEEQTHLKMYL
+SEMKKGGMELGDRPLNFIFWKQVPKMQTLEKFXXXXXXXXXXXXXXXXXF*
+>1234#10_00016 hypothetical protein
+MKVTHSCLEFDSIEGLIDFAREFETGSMIRFLSPIEDNSGNVLVKEEVQVKESTLARLKD
+IKGQYTPKFEVKLNKELLEQIQNILAIKIVNQLKVTDMKFLKFMYENTNYNFKGIIRNSL
+LSKKTTLTLLKVYNQNLNFFKYISELGLLSLGIVMIPDTMRFRLLRRYAFTAGILMDVPR
+IGVDKFTKLPSDDNEKVRVAHKCSDILQKLDLIEFTYGSISNHMPLGMIEDSTSSDKAAP
+GENIDETFLDDIISNDGESDSKVDGSREDAIPEKSYDIFQALLTDALKLARYIANVSHNA
+VDKDYVMEELVYYIAYNTSKKYFDELLANPLVATFKEFEVNVKRLRKIAEVEMKCVYPPS
+AWAYPKPKSSQVLCKNKVWDCPNIVMGWDIHVITAQEAFGWVGTSLPVDNYPKCRLEEEL
+DEIMVEPEKPKKK*
+>1234#10_00017 LipL45-like lipoprotein
+MKRYLSIVILCTFAMLLLVCSTNKSSGSDQVKTESNATSARIVWLLGDVKILSDSGEKKA
+ELGASLSSTDRVVTGPNGGAEIMVADSGIIKMSKNSDIEISSLMNPNGSDTNVQVNYGKI
+VTMVKKGQKTTEFTVSTPTALAGVRGTSFLTSVESPEGSKINCAKANCTVRFAVIEGTIA
+VSKKGESSEVILSKNRELRIEKNQKLTDKLIRSLQNDSLSEMKELIVLHKNETFEYGKLV
+EELKSSSEELKILSQSGSVEEVKAAFQKREADRNNADEITKTAKAVNETKYVQQDVQKEK
+LKLNPKETF*
+>1234#10_00018 putative lipoprotein
+MDFSFNLVDNNGTVMRSTFSPIRFFTFSFLWILIDCSSVQKIENFNSVLQEPTFKSLKEE
+EAILGGSSDSDYKIRKTGNTIPVFVLSPIQTPEGMDSKLAAFLSDEVRLIWAKVKGKQVR
+IQEMSWKNPSQLSQELKRLNVDAVIKTDIREVSGKWVVNQKITDPVKEIVYGSVDGSFQS
+PKIEDELQANQAYYLKHGSGVLALDAKSSLVPIWEKSLSSGEIDSILKKSIQGYLSFSAS
+SADTEVLFQGEKIGIASFRNYPLPEGLQQIQITRPGQKDISKSLQIRSGQTISIYQEWKE
+DRTLGGVRILSFPEALQVALDGLKMGETPFYRSNLTPGAMQLELVRETENGPLVYYEGQL
+IVDADKITEIALPYKTDNLISEPEFWKLSGEKGFQAFSGKTLDFQNVSSLPPGWYGVFSA
+PFVPENMELEGIIPITAESDSGIVAISFHTSKKTISLEYEKERLSVYSFPSNGNNVGTYK
+FKKEDKEDGRPFRIITDVKEGTIRLYLGYSKVLEDRLDVSGVWRISILTRGENFSKRSPL
+RNLKIEYKGYK*
diff --git a/t/data/expected_complement_of_groups.gg b/t/data/expected_complement_of_groups.gg
new file mode 100644
index 0000000..5efc9be
--- /dev/null
+++ b/t/data/expected_complement_of_groups.gg
@@ -0,0 +1,6 @@
+group_3: 1_3	3_3
+group_4: 2_4	3_4
+group_2: 1_2	2_2
+group_7: 2_7
+group_5: 3_5
+group_6: 1_6
diff --git a/t/data/expected_complement_of_groups_core0.66.gg b/t/data/expected_complement_of_groups_core0.66.gg
new file mode 100644
index 0000000..0028476
--- /dev/null
+++ b/t/data/expected_complement_of_groups_core0.66.gg
@@ -0,0 +1,3 @@
+group_7: 2_7
+group_5: 3_5
+group_6: 1_6
diff --git a/t/data/expected_core_60_summary_statistics.txt b/t/data/expected_core_60_summary_statistics.txt
new file mode 100644
index 0000000..40c71d8
--- /dev/null
+++ b/t/data/expected_core_60_summary_statistics.txt
@@ -0,0 +1,5 @@
+Core genes	(60% <= strains <= 100%)	75
+Soft core genes	(58% <= strains < 59%)	0
+Shell genes	(15% <= strains < 58%)	20
+Cloud genes	(0% <= strains < 15%)	0
+Total genes	(0% <= strains <= 100%)	95
diff --git a/t/data/expected_core_gene_alignment.aln b/t/data/expected_core_gene_alignment.aln
new file mode 100644
index 0000000..804067d
--- /dev/null
+++ b/t/data/expected_core_gene_alignment.aln
@@ -0,0 +1,4 @@
+>query_1
+AAAAA-GGGGGTTTTT
+>query_2
+CCCCC-TTTTTGGGGG
diff --git a/t/data/expected_core_gene_alignment_core0.66.aln b/t/data/expected_core_gene_alignment_core0.66.aln
new file mode 100644
index 0000000..e542299
--- /dev/null
+++ b/t/data/expected_core_gene_alignment_core0.66.aln
@@ -0,0 +1,6 @@
+>query_1
+TTTTT
+>query_2
+GGGGG
+>query_3
+NNNNN
diff --git a/t/data/expected_create_pan_genome.fa b/t/data/expected_create_pan_genome.fa
new file mode 100644
index 0000000..7c3efb9
--- /dev/null
+++ b/t/data/expected_create_pan_genome.fa
@@ -0,0 +1,56 @@
+>1234#10_00016 (null)
+MKVTHSCLEFDSIEGLIDFAREFETGSMIRFLSPIEDNSGNVLVKEEVQVKESTLARLKD
+IKGQYTPKFEVKLNKELLEQIQNILAIKIVNQLKVTDMKFLKFMYENTNYNFKGIIRNSL
+LSKKTTLTLLKVYNQNLNFFKYISELGLLSLGIVMIPDTMRFRLLRRYAFTAGILMDVPR
+IGVDKFTKLPSDDNEKVRVAHKCSDILQKLDLIEFTYGSISNHMPLGMIEDSTSSDKAAP
+GENIDETFLDDIISNDGESDSKVDGSREDAIPEKSYDIFQALLTDALKLARYIANVSHNA
+VDKDYVMEELVYYIAYNTSKKYFDELLANPLVATFKEFEVNVKRLRKIAEVEMKCVYPPS
+AWAYPKPKSSQVLCKNKVWDCPNIVMGWDIHVITAQEAFGWVGTSLPVDNYPKCRLEEEL
+DEIMVEPEKPKKK*
+>1234#10_00017 (null)
+MKRYLSIVILCTFAMLLLVCSTNKSSGSDQVKTESNATSARIVWLLGDVKILSDSGEKKA
+ELGASLSSTDRVVTGPNGGAEIMVADSGIIKMSKNSDIEISSLMNPNGSDTNVQVNYGKI
+VTMVKKGQKTTEFTVSTPTALAGVRGTSFLTSVESPEGSKINCAKANCTVRFAVIEGTIA
+VSKKGESSEVILSKNRELRIEKNQKLTDKLIRSLQNDSLSEMKELIVLHKNETFEYGKLV
+EELKSSSEELKILSQSGSVEEVKAAFQKREADRNNADEITKTAKAVNETKYVQQDVQKEK
+LKLNPKETF*
+>1234#10_00018 (null)
+MDFSFNLVDNNGTVMRSTFSPIRFFTFSFLWILIDCSSVQKIENFNSVLQEPTFKSLKEE
+EAILGGSSDSDYKIRKTGNTIPVFVLSPIQTPEGMDSKLAAFLSDEVRLIWAKVKGKQVR
+IQEMSWKNPSQLSQELKRLNVDAVIKTDIREVSGKWVVNQKITDPVKEIVYGSVDGSFQS
+PKIEDELQANQAYYLKHGSGVLALDAKSSLVPIWEKSLSSGEIDSILKKSIQGYLSFSAS
+SADTEVLFQGEKIGIASFRNYPLPEGLQQIQITRPGQKDISKSLQIRSGQTISIYQEWKE
+DRTLGGVRILSFPEALQVALDGLKMGETPFYRSNLTPGAMQLELVRETENGPLVYYEGQL
+IVDADKITEIALPYKTDNLISEPEFWKLSGEKGFQAFSGKTLDFQNVSSLPPGWYGVFSA
+PFVPENMELEGIIPITAESDSGIVAISFHTSKKTISLEYEKERLSVYSFPSNGNNVGTYK
+FKKEDKEDGRPFRIITDVKEGTIRLYLGYSKVLEDRLDVSGVWRISILTRGENFSKRSPL
+RNLKIEYKGYK*
+>1234#10_00001 (null)
+MSKHGFFQITQKLFLRKGDELLILRDRKSGLGDLPGGRMNENEFFEDWSLSMQREIEEEL
+GSQVQIRVSTKPLFIHKHKVNEGNFPCIIIAYHADYLGGDIILSDEHDYISWEKVQTYEP
+SPLFTEYMLDAVNLYLKEYAPLVH*
+>1234#10_00002 (null)
+MIIKLKIILNSYLIYYFLRGQNTLIRTLLFEFPLTTFFVFLMVATFFIVNVFLPEHLIRQ
+YFLNHPGQIQPLSWIGAVFYHGNLIHLFGNMFYLFFLGRAVEYKAGKGRWLLFFFMAALI
+SSLLDSFIRGVILHDPTPVVGASGAISGIAAVAALLSPFSLRFNQRNIPFPVFLVAWIMV
+YSDITNVFTEDGVARWAHLGGFISVIFAAYFLKPTERKQLHSGFILNLIFIILTLILAFF
+YSNRS*
+>1234#10_00003 (null)
+VKTTLSGEIEKLRYEVAVKIVNLQGEVLDLRAEMKINFSEVNSKILKLQFEFEMAKIRKE
+LKTEIADLRAETKTDFLELQKSIVDIYKTISTQTRWILGVATLFAAIGKVIN*
+>1234#10_00005 (null)
+LTDKLIGFYDPVRMKAERKTSETEIKLEMNLRGTGQYQFDTEIPFFEHMLSHISKHGLID
+LNLWLRGDIEIDCHHSVEDTAILMGATIHKQLGDKAGIFRYGHFTLTMDEVLTTVAVDLG
+GRYFFKYTGPELTGKFGIYDAELSLEFLQKLALNAKMNLHVVVHYGDNKHHVHESIFKAL
+GKALRMAIAQDSAAAGAIPSTKGVLE*
+>1234#10_00006 (null)
+MIAILDYGMGNIHSCLKAVSLYTKDFVFTKDHSTIENSKALILPGDGHFDKAMENLNSTG
+LRKTIDKHVTSGKPLFGICIGFQILFESSEEIAQGSKKEQIEGLGYIKGKIKKFHGKDFK
+VPHIGWNRLQIRRKDKSVLLKGIGDQSFFYFIHSYRPTDAEGNAITGLCDYYQEKFPAVV
+EKNNIFGTQFHPEKSHTHGLKLLENFIRFI*
+>1234#10_00007 (null)
+MIVIPAIDLFDNCAVRLFKGNYEEKKIYSSEPWKLAESFAKNGATLLHLVDLNGARNQLG
+VNEDSILKIRETTSLKVQLGGGIRDKEKLAYYDKIGINRFILGTAAVTNPDLLKYALDNY
+GKERVVVAVDARDGIVKIAGWEKDSGIHYRDLLERLVKAGIEHIVFTDIAQDGTLAGPNL
+EAYREILNSYPFQVIASGGIASLKDLMDLSSLKTKISLYGVITGKALYEGKLDLAKAISS
+I*
diff --git a/t/data/expected_example_annotation_1.faa b/t/data/expected_example_annotation_1.faa
new file mode 100644
index 0000000..cc39033
--- /dev/null
+++ b/t/data/expected_example_annotation_1.faa
@@ -0,0 +1,30 @@
+>abc_00001 [revcomp]:[translate(1)]
+MKTRIVSSVTTTLLLGSILMNPVANAADSDINIKTGTTDIGSNTTVKTGDLVTYDKENGMHKKVFYSFIDDKNHNKKLLVIRTKGTIAGQYRVYSEEGANKSGLAWPSAFKVQLQLPDNEVAQISDYYPRNSIDTKEYMSTLTYGFNGNVTGDDTGKIGGLIGANVSIGHTLKYVQPDFKTILESPTDKKVGWKVIFNNMVNQNWGPYDRDSWNPVYGNQLFMKTRNGSMKAAENFLDPNKASSLLSSGFSPDFATVITMDRKASKQQTNIDVIYERVRDDYQLHWTSTNWKGTNTKDKWTDRSSERYKIDWEKEEMTN*
+>abc_00002 [translate(1)]
+MKLFYIVFLIIIWLNIFLGNEIIHTLTVLITTLYIVNSRKGIKNDRVE*
+>abc_00003 [translate(1)]
+MTELNNIINSLQSLFESESGYKISKNSGVPYQTVQDLRNGKTKLEDARFRTIIKLYSYYVSLKEH*
+>abc_00004 [revcomp]:[translate(1)]
+MSKNITKNIILTTTLLLLGTVLPQNQKPVFSFYSEAKAYSIGQDETNINELIKYYTQPHFSFSNKWLYQYDNGNIYVELKRYSWSAHISLWGAESWGNINQLKDRYVDVFGLKDKDTDQLWWSYRETFTGGVTPAAKPSDKTYNLFVQYKDKLQTIIGAHKIYQGNKPVLTLKEIDFRAREALIKNKILYTENRNKGKLKITGGGNNYTIDLSKRLHSDLANVYVKNPNKITVDVLFD*
+>abc_00005 [revcomp]:[translate(1)]
+MNNNITKKIILSTTLLLLGTAFTQFPNTPINSSSEAKAYYINQNETNVNELTKYYSQKYLTFSNSTLWQKDNGTIHATLLQFSWYSHIQVYGPESWGNINQLRNKSVDIFGIKDQETIDSFALSQETFTGGVTPAATSNDKHYKLNVTYKDKAETFTGGFPVYEGNKPVLTLKELDFRIRQTLIKSKKLYNNSYNKGQIKITGTDNNYTIDLSKRLPSTDANRYVKKPQNAKIEVILEKSN*
+>abc_00006 [revcomp]:[translate(1)]
+MKKNIMNKLVLSTALLLLGTTSTQLPKTPISFSSEAKAYNISENETNINELIKYYTQPHFSLSGKWLWQKPNGSIHATLQTWVWYSHIQVFGSESWGNINQLRNKYVDIFGTKDEDTVEGYWTYDETFTGGVTPAATSSDKPYRLFLKYSDKQQTIIGGHEFYKGNKPVLTLKELDFRIRQTLIKNKKLYNGEFNKGQIKITADGNNYTIDLSKKLKLTDTNRYVKNPKNAQIEVILEKSN*
+>abc_00007 [translate(1)]
+MKNLRNRSFLTLLDFSRQEVEFLLTLSEDLKRAKYIGTEKPMLKNKNIALLFEKDSTRTRCAFEVAAHDQGANVTYLGPTGSQMGKKETTKDTARVLGGMYDGIEYRGFSQRTVETLAENSGVPVWNGLTDEDHPTQVLADFLTAKEVLKKDYADINFTYVGDGRNNVANALMQGAAIMGMNFHLVCPKELNPTDELLNRCKNIAAENGGNILITDDIDQGVKGSDVIYTDVWVSMGEPDEVWKERLELLKPYQVNKEMMDKTGNPNVIFEHCLPSFHNADTKIGQQIFEKYGIREMEVTDEVFESKASVVFQEAENRMHTIKAVMVATLGEF*
+>abc_00008 [translate(1)]
+MMAKIVVALGGNALGKSPQEQLELVKNTAKSLVGLITKGHEIVISHGNGPQVGSINLGLNYAAEHNQGPAFPFAECGAMSQAYIGYQLQESLQNELHSIGMDKQVVTLVTQVEVDENDPAFNNPSKPIGLFYNKEEAEQIQKEKGFIFVEDAGRGYRRVVPSPQPISIIELESIKTLIKNDTLVIAAGGGGIPVIREQHDGFKGIDAVIDKDKTSALLGANIQCDQLIILTAIDYVYINFNTENQQPLKTTNVDELKRYIDENQFAKGSMLPKIEAAISFIENNPKGSVLITSLNELDAALEGKVGTVIKK*
+>abc_01705 [translate(1)]
+VENTINESEKKKRFKLKMPGAFMILFILTVVAVIATWVIPAGAYSKLSYEPSSQELKIVNPHNQVKKVPGTQQELDKMGVKIKIEQFKSGAINKPVSIPNTYERLKQHPAGPEQITSSMVEGTIEAVDIMVFILVLGGLIGVVQASGSFESGLLALTKKTKGHEFMLIVFVSILMIIGGTLCGIEEEAVAFYPILVPIFIALGYDSIVSVGAIFLASSVGSTFSTINPFSVVIASNAAGTTFTDGLYWRIGACIVGAIFVISYLYWYCKKIKNDPKASYSYEDKDAFEQQWSVLKDDDSAHFTLRKKIILTLFVLPFPIMVWGVMTQGWWFPVMASAFLIFTIIIMFIAGTGKSGLGEKGTVDAFVNGASSLVGVSLIIGLARGINLVLNEGMISDTILHFSSSLVQHMSGPLFIIVLLFIFFCLGFIVPSSSGLAVLSMPIFAPLADTVGIPRFVIVTTYQFGQYAMLFLAPTGLVMATLQMLNMRYSHWF [...]
+>abc_00010 [translate(1)]
+MTHLTKVLDTLTGICVVLLFSKYFVAYANMVFDWNLRWYLLENIPHLPIILFILMFIFGVPSEMIKDRQRKNNGV*
+>abc_00011 [revcomp]:[translate(1)]
+MRLQKAPLVTSGLVLGLLGLGNLLKDLSLTLNAVCGIFAFLIWIHLLCTMIKYFNNVKEQLNSPLVSSVFTTFFMSGFLGTTYLNTFFSNITFINSLITPIWILCLVGIMTHMIIFSIKYLKDFSLENVYPSWTVLFIGIAIAGLTAPVSGCFFIGQLTVIYGFVATCIVLPIVFKRLKAFPLQTSIKPNTSTICAPFSLVAAAYVIAFPKANAFIVIIFLILAQIFYFYIIIQLPKLLKEPFSPVFSAFTFPLVISATALKNSLPVLMFPDIWKGLLFIEVLLATVIVLRVFIGYLHFFLKKENQDKFLRNASQ*
+>abc_00012 [translate(1)]
+MRNQIQKLLDSDLSSLHISKQTGVPQSTIHRMRKKERSLDNMSLKNAELLYKFANSIFSNEN*
+>abc_00014 [translate(1)]
+MEGLFNAIKDTVTAAINNDGAKLGTSIVSIVENGVGLLGKLFGF*
+>abc_00015 [translate(1)]
+MTGLAEAIANTVQAAQQHDSVKLGTSIVDIVANGVGLLGKLFGF*
+>abc_00016 [translate(1)]
+LGYKNILIDFDDTIVDFYDAEEWAFHYMANVFNHKATKDDFLTFKKINHQHWEAFQQNKLTKSEVLSERFVNYFKHHQMEVDGHRADVLFRNGLAEAKVKYFDQTLETIVELSKRHDLYIVTNGVTETQKRRLNQTPLHKYIKKIFISEETGYQKPNPEFFNYVFNDIGEDERQHSIIVGDSLTSDILGGINAGIATCWFNFRGFDHNPGIIPDYEINSWKQLNDIVR*
diff --git a/t/data/expected_filtered_original_input.fa b/t/data/expected_filtered_original_input.fa
new file mode 100644
index 0000000..ec8984e
--- /dev/null
+++ b/t/data/expected_filtered_original_input.fa
@@ -0,0 +1,48 @@
+>4075_1#3_04238
+AAAGGGTTT
+>4075_1#6_04091
+AAAGGGTTT
+>2212_1_02994
+AAAGGGTTT
+>2363_5_00947
+AAAGGGTTT
+>2363_7_00085
+AAAGGGTTT
+>2460_2_00826
+AAAGGGTTT
+>2212_3_02841
+AAAGGGTTT
+>4075_2#3_03437
+AAAGGGTTT
+>2363_5_03666
+AAAGGGTTT
+>2212_6_02081
+AAAGGGTTT
+>2541_8_00644
+AAAGGGTTT
+>2781_2_02909
+AAAGGGTTT
+>3634_6_00968
+AAAGGGTTT
+>3634_7_01056
+AAAGGGTTT
+>3634_8_02606
+AAAGGGTTT
+>4075_1#8_03461
+AAAGGGTTT
+>2363_1_00606
+AAAGGGTTT
+>2363_2_02124
+AAAGGGTTT
+>2363_3_01371
+AAAGGGTTT
+>2363_6_01272
+AAAGGGTTT
+>2363_8_00966
+AAAGGGTTT
+>2541_2_02425
+AAAGGGTTT
+>2541_3_02449
+AAAGGGTTT
+>2541_7_00441
+AAAGGGTTT
diff --git a/t/data/expected_g2_g5_pan_genome_reference.fa b/t/data/expected_g2_g5_pan_genome_reference.fa
new file mode 100644
index 0000000..5e6c0e7
--- /dev/null
+++ b/t/data/expected_g2_g5_pan_genome_reference.fa
@@ -0,0 +1,70 @@
+>1_1 hly
+ATGAAAACACGTATAGTCAGCTCAGTAACAACAACACTATTGCTAGGTTCCATATTAATG
+AATCCTGTCGCTAATGCCGCAGATTCTGATATTAATATTAAAACCGGTACTACAGATATT
+GGAAGCAATACTACAGTAAAAACAGGTGATTTAGTCACTTATGATAAAGAAAATGGCATG
+CACAAAAAAGTATTTTATAGTTTTATCGATGATAAAAATCACAATAAAAAACTGCTAGTT
+ATTAGAACGAAAGGTACCATTGCTGGTCAATATAGAGTTTATAGCGAAGAAGGTGCTAAC
+AAAAGTGGTTTAGCCTGGCCTTCAGCCTTTAAGGTACAGTTGCAACTACCTGATAATGAA
+GTAGCTCAAATATCTGATTACTATCCAAGAAATTCGATTGATACAAAAGAGTATATGAGT
+ACTTTAACTTATGGATTCAACGGTAATGTTACTGGTGATGATACAGGAAAAATTGGCGGC
+CTTATTGGTGCAAATGTTTCGATTGGTCATACACTGAAATATGTTCAACCTGATTTCAAA
+ACAATTTTAGAGAGCCCAACTGATAAAAAAGTAGGCTGGAAAGTGATATTTAACAATATG
+GTGAATCAAAATTGGGGACCATATGATAGAGATTCTTGGAACCCGGTATATGGCAATCAA
+CTTTTCATGAAAACTAGAAATGGTTCTATGAAAGCAGCAGAGAACTTCCTTGATCCTAAC
+AAAGCAAGTTCTCTATTATCTTCAGGGTTTTCACCAGACTTCGCTACAGTTATTACTATG
+GATAGAAAAGCATCCAAACAACAAACAAATATAGATGTAATATACGAACGAGTTCGTGAT
+GACTACCAATTGCATTGGACTTCAACAAATTGGAAAGGTACCAATACTAAAGATAAATGG
+ACAGATCGTTCTTCAGAAAGATATAAAATCGATTGGGAAAAAGAAGAAATGACAAATTAA
+>1_2 speH
+ATGAACAATAACATCACGAAAAAAATTATTTTATCAACAACATTGTTACTATTAGGTACA
+GCATTTACACAATTTCCTAATACACCTATCAATTCTTCATCTGAAGCGAAAGCTTATTAT
+ATAAATCAAAACGAAACTAACGTTAATGAGTTAACTAAATATTACTCGCAAAAATATTTA
+ACCTTCTCTAACAGTACGTTATGGCAAAAAGATAACGGTACGATTCATGCAACGTTGTTA
+CAGTTTTCTTGGTATAGTCATATTCAAGTTTATGGACCTGAAAGTTGGGGCAATATCAAC
+CAATTAAGAAATAAAAGCGTTGATATTTTTGGCATAAAAGACCAAGAAACCATTGATTCT
+TTTGCATTATCTCAAGAAACGTTTACTGGTGGTGTTACTCCTGCAGCAACATCTAACGAT
+AAACACTATAAACTGAATGTAACATATAAAGATAAAGCAGAAACGTTTACTGGCGGATTT
+CCAGTTTATGAAGGCAATAAGCCTGTTTTAACTTTAAAAGAATTAGATTTTCGTATTCGT
+CAAACATTAATTAAAAGTAAAAAATTATATAATAATTCTTATAATAAAGGACAAATTAAA
+ATAACAGGTACAGACAATAACTACACAATAGATTTAAGTAAAAGGTTGCCATCAACTGAT
+GCAAATAGATATGTTAAAAAACCTCAAAATGCAAAAATTGAAGTTATCCTCGAAAAATCA
+AACTAA
+>1_3 argF
+ATGAAAAATTTACGAAACAGAAGTTTTTTAACTTTATTAGACTTTTCACGACAAGAGGTA
+GAATTCTTATTAACACTCTCCGAGGATTTAAAACGTGCTAAATATATTGGCACTGAAAAG
+CCTATGTTAAAAAATAAAAATATTGCACTGTTATTTGAAAAAGATTCTACAAGAACGCGA
+TGTGCATTTGAAGTTGCAGCGCATGATCAAGGTGCAAATGTAACTTATTTAGGCCCAACT
+GGATCACAAATGGGTAAAAAAGAAACAACTAAAGATACTGCACGTGTGCTTGGTGGAATG
+TATGATGGCATTGAATACCGTGGTTTTTCACAAAGAACAGTAGAAACTTTAGCTGAAAAT
+TCAGGCGTACCAGTGTGGAATGGTTTAACTGATGAAGATCATCCTACTCAAGTTCTTGCT
+GATTTCTTAACAGCAAAAGAAGTCTTAAAAAAAGATTATGCAGATATTAACTTTACATAT
+GTTGGAGATGGTCGTAATAACGTTGCAAATGCATTAATGCAAGGTGCTGCCATTATGGGT
+ATGAACTTCCATTTAGTTTGTCCAAAAGAATTAAATCCAACAGATGAATTATTAAATCGC
+TGTAAAAATATTGCCGCTGAAAATGGTGGCAACATATTAATCACAGATGATATTGACCAA
+GGTGTAAAAGGTTCGGATGTAATTTACACTGATGTTTGGGTATCAATGGGTGAACCTGAT
+GAAGTATGGAAAGAACGACTTGAATTATTGAAACCATATCAAGTAAATAAAGAAATGATG
+GATAAAACTGGTAATCCAAATGTTATTTTTGAGCATTGCTTACCATCTTTCCATAATGCT
+GATACGAAAATTGGTCAACAAATTTTTGAAAAATATGGTATTCGAGAAATGGAAGTTACA
+GATGAAGTATTCGAAAGTAAAGCTTCAGTTGTATTCCAAGAAGCTGAGAACAGAATGCAT
+ACAATCAAAGCAGTCATGGTTGCTACATTGGGTGAATTTTAA
+>1_6 group_6
+ATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAACAACATGATAGT
+GTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTTTACTAGGTAAA
+TTATTTGGATTCTAA
+>2_7 group_7
+ATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAACAACATGATAGT
+GTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTTTACTAGGTAAA
+TTATTTGGATTCTAA
+>3_5 yfnB
+TTGGGATATAAAAATATTTTGATAGACTTTGATGATACAATTGTTGATTTTTATGATGCA
+GAAGAATGGGCGTTTCACTATATGGCGAATGTTTTTAATCATAAAGCAACAAAGGATGAT
+TTTTTAACATTTAAAAAAATCAATCACCAACATTGGGAAGCTTTTCAACAAAATAAATTA
+ACGAAGTCTGAAGTATTATCAGAACGATTTGTGAATTACTTCAAACATCATCAAATGGAA
+GTTGATGGGCATCGTGCAGATGTGTTATTTAGAAATGGATTAGCAGAAGCTAAAGTTAAA
+TACTTTGATCAAACATTAGAAACAATTGTCGAATTATCGAAAAGACATGATTTATATATT
+GTTACTAATGGTGTAACCGAAACGCAAAAGAGAAGGTTAAATCAGACGCCGTTGCATAAA
+TATATTAAAAAGATATTTATATCTGAGGAAACAGGATATCAAAAACCTAATCCGGAATTT
+TTTAATTATGTTTTTAATGATATTGGTGAGGATGAAAGACAGCACTCGATTATAGTTGGA
+GATTCTTTAACATCTGACATTCTAGGTGGAATCAATGCGGGTATAGCTACTTGCTGGTTT
+AATTTTAGAGGATTTGATCATAATCCAGGAATTATACCTGATTATGAAATTAATTCATGG
+AAACAACTAAATGATATTGTACGTTAA
diff --git a/t/data/expected_gene_presence_and_absence.Rtab b/t/data/expected_gene_presence_and_absence.Rtab
new file mode 100644
index 0000000..0d9c5ff
--- /dev/null
+++ b/t/data/expected_gene_presence_and_absence.Rtab
@@ -0,0 +1,8 @@
+Gene	query_1.fa	query_2.fa	query_3.fa
+hly	1	1	1
+speH	1	1	0
+argF	1	0	1
+group_4	0	1	1
+yfnB	0	0	1
+group_6	1	0	0
+group_7	0	1	0
diff --git a/t/data/expected_gff_set_difference_common_set_statistics.csv b/t/data/expected_gff_set_difference_common_set_statistics.csv
new file mode 100644
index 0000000..8bdba55
--- /dev/null
+++ b/t/data/expected_gff_set_difference_common_set_statistics.csv
@@ -0,0 +1,4 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","query_1","query_2","query_3"
+"hly","","Alpha-toxin","3","3","1","","","","","","959","959","959","1_1","2_1","3_1"
+"speH","","hypothetical protein","2","2","1","","","","","","725","725","725","1_2","2_2",""
+"argF","","Ornithine carbamoyltransferase","2","2","1","","","","","","1001","1001","1001","1_3","","3_3"
diff --git a/t/data/expected_group_labels b/t/data/expected_group_labels
new file mode 100644
index 0000000..adbc922
--- /dev/null
+++ b/t/data/expected_group_labels
@@ -0,0 +1,6 @@
+group_1: 1234#10_00001 1234#10_00002
+group_2: 1234#10_00003 1234#10_00018 1234#10_00005
+group_3: 1234#10_00005 1234#10_00005
+group_4: 1234#10_00006       1234#10_00007
+group_5: 1234#10_00016
+group_6: 1234#10_00017
\ No newline at end of file
diff --git a/t/data/expected_group_statitics.csv b/t/data/expected_group_statitics.csv
new file mode 100644
index 0000000..588e641
--- /dev/null
+++ b/t/data/expected_group_statitics.csv
@@ -0,0 +1,8 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","query_1.fa","query_2.fa","query_3.fa"
+"hly","","Alpha-toxin","3","3","1","","","","","","959","959","959","1_1","2_1","3_1"
+"speH","","hypothetical protein","2","2","1","","","","","","725","725","725","1_2","2_2",""
+"argF","","Ornithine carbamoyltransferase","2","2","1","","","","","","1001","1001","1001","1_3","","3_3"
+"group_4","","","2","2","1","","","","","","","","","","2_4","3_4"
+"yfnB","","Putative HAD-hydrolase yfnB","1","1","1","","","","","","686","686","686","","","3_5"
+"group_6","","Gonococcal growth inhibitor III","1","1","1","","","","","","134","134","134","1_6","",""
+"group_7","","Gonococcal growth inhibitor III","1","1","1","","","","","","134","134","134","","2_7",""
diff --git a/t/data/expected_group_statitics_missing_genes.csv b/t/data/expected_group_statitics_missing_genes.csv
new file mode 100644
index 0000000..d23a540
--- /dev/null
+++ b/t/data/expected_group_statitics_missing_genes.csv
@@ -0,0 +1,8 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","query_1.fa","query_2.fa","query_3.fa","query_4_missing_genes.fa"
+"hly","","Alpha-toxin","4","4","1","","","","","","959","959","959","1_1","2_1","3_1","4_1"
+"speH","","hypothetical protein","2","2","1","","","","","","725","725","725","1_2","2_2","",""
+"argF","","Ornithine carbamoyltransferase","2","2","1","","","","","","1001","1001","1001","1_3","","3_3",""
+"group_4","","","2","2","1","","","","","","","","","","2_4","3_4",""
+"yfnB","","Putative HAD-hydrolase yfnB","1","1","1","","","","","","686","686","686","","","3_5",""
+"group_6","","Gonococcal growth inhibitor III","1","1","1","","","","","","134","134","134","1_6","","",""
+"group_7","","Gonococcal growth inhibitor III","1","1","1","","","","","","134","134","134","","2_7","",""
diff --git a/t/data/expected_group_statitics_verbose.csv b/t/data/expected_group_statitics_verbose.csv
new file mode 100644
index 0000000..6e257ae
--- /dev/null
+++ b/t/data/expected_group_statitics_verbose.csv
@@ -0,0 +1,8 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","query_1.fa","query_2.fa","query_3.fa","Inference"
+"hly","","Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein","3","3","1","","","","","","959","959","959","1_1","2_1","3_1","ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6"
+"speH","","hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain","2","2","1","","","","","","725","725","725","1_2","2_2","","ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11"
+"argF","","Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Aspartate/ornithine carbamoyltransferase%2C Asp/Orn binding domain","2","2","1","","","","","","1001","1001","1001","1_3","","3_3","ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P99073,protein motif:CLUSTERS:PRK04284,protein motif:Cdd:COG0078,protein motif:TIGRFAMs:TIGR00658,protein motif:Pfam:PF00185.1"
+"group_4","","","2","2","1","","","","","","","","","","2_4","3_4",
+"yfnB","","Putative HAD-hydrolase yfnB,dUMP phosphatase,HAD hydrolase,haloacid dehalogenase-like hydrolase","1","1","1","","","","","","686","686","686","","","3_5","ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O06480,protein motif:CLUSTERS:PRK09449,protein motif:TIGRFAMs:TIGR02254,protein motif:Pfam:PF00702.1"
+"group_6","","Gonococcal growth inhibitor III,Staphylococcus haemolytic protein","1","1","1","","","","","","134","134","134","1_6","","","ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5"
+"group_7","","Gonococcal growth inhibitor III,Staphylococcus haemolytic protein","1","1","1","","","","","","134","134","134","","2_7","","ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5"
diff --git a/t/data/expected_inflated_results b/t/data/expected_inflated_results
new file mode 100644
index 0000000..4dbcc2e
--- /dev/null
+++ b/t/data/expected_inflated_results
@@ -0,0 +1,5 @@
+6259_8#9_01142 6631_2#11_00851 6631_4#5_01901
+6664_1#21_00211
+6259_8#13_01077
+6630_4#9_00008	6631_1#23_00379	6631_2#7_00715	6631_5#1_00087	6649_8#10_00838	6664_1#13_00087 6259_5#1_00688 6259_5#8_00815
+6259_7#7_02078	6259_7#8_01884	6631_2#22_01965 6631_5#24_01032 6593_5#13_00457	6630_1#9_00031	6630_3#1_00008	6630_3#2_00088	6631_1#4_00087
diff --git a/t/data/expected_intersection_of_groups.gg b/t/data/expected_intersection_of_groups.gg
new file mode 100644
index 0000000..66e7a90
--- /dev/null
+++ b/t/data/expected_intersection_of_groups.gg
@@ -0,0 +1 @@
+group_1: 1_1	2_1	3_1
diff --git a/t/data/expected_intersection_of_groups_core0.66.gg b/t/data/expected_intersection_of_groups_core0.66.gg
new file mode 100644
index 0000000..ea0fd7a
--- /dev/null
+++ b/t/data/expected_intersection_of_groups_core0.66.gg
@@ -0,0 +1,4 @@
+group_1: 1_1	2_1	3_1
+group_3: 1_3	3_3
+group_4: 2_4	3_4
+group_2: 1_2	2_2
diff --git a/t/data/expected_intersection_of_groups_paralogs.gg b/t/data/expected_intersection_of_groups_paralogs.gg
new file mode 100644
index 0000000..c79cc10
--- /dev/null
+++ b/t/data/expected_intersection_of_groups_paralogs.gg
@@ -0,0 +1,2 @@
+group_3: 1_3	2_4	3_3
+group_1: 1_1	2_1	3_1
diff --git a/t/data/expected_mafft_input.fa.aln b/t/data/expected_mafft_input.fa.aln
new file mode 100644
index 0000000..33a59d7
--- /dev/null
+++ b/t/data/expected_mafft_input.fa.aln
@@ -0,0 +1,66 @@
+>1111#5_04506
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+---------atggcaatcaccttacgggagctggatggcctgagctatgaagagatagcg
+gctatcatggattgtccggtggggacggtgcgttcacgtatcttccgggcgcgggaagct
+attgataataaagttcaaccgcttatcaggcgttga
+>1234_8#75_04759
+atgagcgagcagttaacggaccaggtcctggttgaacgggtccagaagggagatcagaaa
+gcctttaacttactggtagtgcgctaccagcataaagtggcgagtctggtttcccgctat
+gtgccatcgggcgacgttcccgatgtcgtacaggaatcatttattaaggcctatcgcgcg
+ctggattctttccggcgggatagtgctttttatacctggttgtatcgtattgcggtcaat
+accgcgaagaactacctggttgcgcaggggcgtcgtccgccttccagtgatgtagacgcg
+attgaagcagaaaactttgaaagcggcggcgcgctgaaagaaatttcgaaccctgagaac
+ttaatgttgtcagaagaactgagacagatagttttccgaactattgagtccctcccggaa
+gatttacgtatggcaatcaccttacgggagctggatggcctgagctatgaagagatagcg
+gctatcatggattgtccggtggggacggtgcgttcacgtatcttccgggcgcgggaagct
+attgataataaagttcaaccgcttatcaggcgttga
+>Salmonella_enterica_subsp_enterica_serovar_Typhimurium_DT104_v1_02853
+atgagcgagcagttaacggaccaggtcctggttgaacgggtccagaagggagatcagaaa
+gcctttaacttactggtagtgcgctaccagcataaagtggcgagtctggtttcccgctat
+gtgccatcgggcgacgttcccgatgtcgtacaggaatcatttattaaggcctatcgcgcg
+ctggattctttccggggggatagtgctttttatacctggttgtatcgtattgcggtcaat
+accgcgaagaactacctggttgcgcaggggcgtcgtccgccttccagtgatgtagacgcg
+attgaagcagaaaactttgaaagcggcggcgcgctgaaagaaatttcgaaccctgagaac
+ttaatgttttcagaagaactgagacagatagttttccgaactattgagtccctcccggaa
+gatttacgtatggcaatcaccttacgggagctggatggcctgagctatgaagagatagcg
+gctatcatggattgtccggtggggacggtgcgttcacgtatcttccgggcgcgggaagct
+attgataataaagttcaaccgcttatcaggcgttga
+>Salmonella_enterica_subsp_enterica_serovar_Typhimurium_SL1344_v2_02736
+atgagcgagcagttaacggaccaggtcctggttgaacgggtccagaagggagatcagaaa
+gcctttaacttactggtagtgcgctaccagcataaagtggcgagtctggtttcccgctat
+gtgccatcgggcgacgttcccgatgtcgtacaggaatcatttattaaggcctatcgcgcg
+ctggattctttccggggggatagtgctttttatacctggttgtatcgtattgcggtcaat
+accgcgaagaactacctggttgcgcaggggcgtcgtccgccttccagtgatgtagacgcg
+attgaagcagaaaactttgaaagcggcggcgcgctgaaagaaatttcgaaccctgagaac
+ttaatgttgtcagaagaactgagacagatagttttccgaactattgagtccctcccggaa
+gatttacgtatggcaatcaccttacgggagctggatggcctgagctgtgaagagatagcg
+gctatcatggattgtccggtggggacggtgcgttcacgtatcttccgggcgcgggaagct
+attgataataaagttcaaccgcttatcaggcgttga
+>Salmonella_enterica_subsp_enterica_serovar_Typhimurium_str_D23580_v1_02783
+atgagcgagcagttaacggaccaggtcctggttgaacgggtccagaagggagatcagaaa
+gcctttaacttactggtagtgcgctaccagcataaagtggcgagtctggtttcccgctat
+gtgccatcgggcgacgttcccgatgtcgtacaggaatcatttattaaggcctatcgcgcg
+ctggattctttccggggggatagtgctttttatacctggttgtatcgtattgcggtcaat
+accgcgaagaactacctggttgcgcaggggcgtcgtccgccttccagtgatgtagacgcg
+attgaagcagaaaactttgaaagcggcggcgcgctgaaagaaatttcgaaccctgagaac
+ttaatgttgtcagaagaactgagacagatagttttccgaactattgagtccctcccggaa
+gatttacgtatggcaatcaccttacgggagctggatggcctgagctatgaagagatagcg
+gctatcatggattgtccggtggggacggtgcgttcacgtatcttccgggcgcgggaagct
+attgataataaagttcaaccgcttatcaggcgttga
+>Salmonella_enterica_subsp_enterica_serovar_Typhimurium_str_DT2_v1_02741
+atgagcgagcagttaacggac---gtcctggttgaacgggtccagaagggagatcagaaa
+gcctttaacttactggtagtgcgctaccagcataaagtggcgagtctggtttcccgctat
+gtgccatcgggcgacgttcccgatgtcgtacaggaatcatttattaaggcctatcgcgcg
+ctggattctttccggggggatagtgctttttatacctggttgtatcgtattgcggtcaat
+accgcgaagaactacctggttgcgcaggggcgtcgtccgccttccagtgatgtagacgcg
+attgaagcagaaaactttgaaagcggcggcgcgctgaaagaaatttcgaaccctgagaac
+ttaatgttgtcagaagaactgagacagatagttttccgaactattgagtccctcccggaa
+gatttacgtatggcaatcaccttacgggagctggatggcctgagctatgaagagatagcg
+gctatcatggattgtccggtggggacggtgcgttcacgtatcttccgggcgcgggaagct
+attgataataaagttcaaccgcttatcaggcgttga
diff --git a/t/data/expected_mafft_real_data_core_gene_alignment.aln b/t/data/expected_mafft_real_data_core_gene_alignment.aln
new file mode 100644
index 0000000..5fcda15
--- /dev/null
+++ b/t/data/expected_mafft_real_data_core_gene_alignment.aln
@@ -0,0 +1,2170 @@
+>real_data_1
+ATGAAAAAATCATTACTCGCTGTTGCTGTGGCAGGGGCTGTTTTGTTGTCATCCGCCGTA
+CAGGCGCAGACAACGCCGGAAGGTTATCAATTACAACAGGTGCTGATGATGAGCCGCCAT
+AATCTGCGGGCGCCGCTGGCGAATAATGGCAACGTACTGGCGCAGTCGACGCCGAACGCC
+TGGCCGGCGTGGGACGTTCCCGGCGGGCAACTGACGACGAAAGGCGGCGTGCTGGAAGTC
+TATATGGGACACTACACACGTGAATGGCTGGTCGCGCAGGGGCTGATACCGTCGGGAGAA
+TGTCCGGCGCCCGACACGGTATATGCCTATGCGAATAGTTTGCAGCGCACCGTCGCCACC
+GCGCAATTTTTCATTACCAGCGCTTTCCCCGGCTGTGATATTCCTGTTCATCATCAGGAA
+AAAATGGGCACTATGGACCCTACCTTCAATCCGGTGATTACCGATGATTCCGCCGCGTTC
+CGGCAACAGGCCGTACAGGCGATGGAAAAGGCGCGTAGTCAGCTACATCTTGATGAGAGT
+TATAAACTGCTTGAGCAGATAACGCATTATCAGGACTCGCCGTCCTGCAAAGAGAAGCAT
+CAGTGTTCGCTAATCGACGCGAAAGATACCTTCAGCGCGAACTATCAGCAAGAGCCTGGC
+GTGCAGGGGCCGCTGAAAGTAGGGAACTCGCTGGTGGATGCGTTTACCCTGCAATATTAC
+GAAGGCTTTCCGATGGATCAGGTCGCTTGGGGCGGGATCCACACCGATCGGCAGTGGAAG
+GTGCTGTCAAAACTGAAAAACGGCTACCAGGACAGCCTGTTTACCTCACCCACGGTGGCG
+CGCAATGTCGCTGCGCCGCTGGTAAAATATATCGATAAGGTGCTGGTTGCCGAGCGCGTT
+AGCGCGCCGAAGGTTACCGTGCTGGTGGGGCATGATTCCAATATCGCGTCGCTGCTGACG
+GCGCTGGATTTTAAACCCTATCAGCTCCATGACCAGTATGAGAGAACGCCGATTGGTGGT
+CAGCTTGTCTTCCAACGCTGGCATGACGGTAACGCTAACCGGGATTTGATGAAAATCGAG
+TATGTCTACCAGAGCGCCCGGCAGTTACGTAATGCGGAAGCGTTAACGCTCAAATCGCCT
+GCGCAAAGGGTAACGCTGGAACTGAAAGGATGTCCGGTGGATGCGAACGGCTTCTGTCCG
+CTGGATAAGTTCGATAACGTCATGAACACTGCTGCAAAATAGATGGAAAAGAATAATGAA
+GTCATTCAGACCCATCCGCTTGTAGGATGGGACATCAGCACCGTCGATAGCTATGATGCG
+CTGATGCTGCGTTTACACTACCAGACCCCAAATCGTCCGGAACCGGAAGGGACTGAAGTT
+GGTCAAACGCTCTGGTTAACGACAGATGTAGCCAGGCAATTTATTTCAATATTAGAAGCC
+GGCATCGCCAAAATAGAATCAGGCGATTACCAGGAAAACGAGTATCGTCACCATTAGATG
+GAACTTAAGGATTATTACGCCATTATGGGCGTGAAACCGACGGACGATCTCAAGACGATT
+AAGACCGCCTATCGCCGACTGGCCCGCAAGTACCATCCAGATGTCAGCAAAGAACCCGAT
+GCCGAAGCCCGTTTCAAAGAGGTTGCTGAAGCATGGGAAGTGCTGAGTGATGAGCAACGG
+CGCGCCGAGTATGACCAGTTATGGCAACACCGTAACGATCCACAATTTAATCGCCAGTTC
+CAGCAACACGAAGGCCAGCCGTATAACGCCGAAGATTTTGATGATATTTTCTCGTCTATT
+TTTGGTCAGCACGGTCGTCATTCGCACCACCGCCACGCCGCACGCGGTCATGATATCGAA
+ATTGAAGTGGCGGTATTCCTGGAAGAAACGCTGGAAGAGCACCAGCGTACGATTAGCTAT
+TCCGTCCCCGTTTATAACGCGTTCGGCCTGGTGGAGCGGGAAATTCCCAAAACATTGAAT
+GTGAAAATCCCGGCTGGCGTCAGCAACGGGCAACGAATCAGACTGAAAGGCCAGGGCACG
+CCGGGGGAAAACGGCGGACCTAATGGCGATTTATGGCTCGTTATCCATATTGCCCCGCAT
+CCGCTCTTTGATATCGTCAATCAGGATCTGGAAGTCGTCCTTCCGCTTGCCCCATGGGAG
+GCGGCGCTCGGCGCTAAGGTGTCTGTGCCAACGCTTAAAGAGCGTATTTTGCTGACCATT
+CCCCCCGGCAGCCAGGCAGGTCAGCGGCTGCGTATCAAAGGAAAAGGATTAGCCAGTAAA
+AAGCACACTGGCGATCTCTATGCCATCATCAAAATCGTTATGCCGCCGAAACCTGACGAG
+AAAACAGCTGCCCTGTGGCAACAACTGGCGGACGCGCAGTCGTCCTTTGACCCACGCCAG
+CAATGGGGGAAAGCATAAATGGCTAACATCACTGTCACCTTTACCATCACCGAATTTTGT
+TTGCACACCGGCGTGACGGAAGAGGAGCTAAACGAAATCGTCGGACTTGGCGTAATTGAG
+CCTTACGAAGACGATAACGCCGACTGGCAATTCGACGATCGCGCAGCGAGCGTGGTACAA
+CGCGCGCTACGCTTACGCGAGGAGCTGGCGCTCGACTGGCCAGGGATCGCGGTCGCGTTA
+ACGCTGCTGGAAGAGAATTCACGGCTGCGCGAAGAAAACCGGTTACTGCTGCAACGCCTT
+TCTCGCTTTATCTCGCATCCCTAAATGTCATCTTGTTGGAGATTTACGGATTCGCTAACA
+AGCCTATGGCATACTGCGTTGATGAAGATTTTATTGATTGAAGATAACCAGAAAACCATT
+GAGTGGGTACGTCAGGGACTCACGGAGGCAGGCTATGTGGTTGATTATGCCTGTGATGGA
+CGAGACGGATTACACCTAGCCCTTCAGGAACATTATTCATTGATTATTCTTGATATTATG
+CTGCCGGGGCTTGATGGATGGCAGGTTTTACGCGCGTTGCGCACTGCATATCAGCCCCCT
+GTTATTTGCCTGACGGCGCGCGACTCGGTTGAGGATCGCGTCAAAGGTCTTGAGGCGGGC
+GCTAATGATTACCTTGTTAAGCCTTTTTCCTTCGCCGAACTGCTGGCCCGGGTGAGAGCT
+CAACTCAGACAGCATGTCCCGGTCTTTACCCGACTGACGATCAATGGTCTGGACATGGAT
+GCCACAAAGCAATCGGTGTTACGAAATGGCAAACCGATTTCCCTGACCCGCAAAGAATTC
+CTGCTCCTCTGGTTACTGGCGTCCCGGGCAGGGGAAATCGTGCCCCGAACCGCGATCGCC
+AGCGAAGTTTGGGGAATTAACTTTGATAGTGAAACCAACACCGTTGATGTCGCGATTCGT
+CGGCTGCGCGCCAAAGTAGACGATCCATTTGAAAAGAAGCTCATTATGACCGTCCAGGGG
+ATGGGTTATCGATTACAGGCGGAAACGTCGCAGAATGGTTAAatgaaaaacaaattgtta
+tttatgatgttgacaatactgggtgcgcctgggattgcaaccgcgacaaattatgatctg
+gctcgttcagagtataattttgcggtaaatgaattaagcaagtcttcatttaatcaggcg
+gccattattggtcaagtcggcacggataatagtgccagagtacgccaggaaggatcaaaa
+ctattgtccgttatttcacaagaaggagaaaataatcgggcgaaagtcgaccaggcaggg
+aattataactttgcgtatattgagcaaacgggcaatgccaacgatgccagtatatcgcaa
+agcgcttacggtaatagtgcggctattatccagaaaggttctggaaataaggccaatatt
+acccagtacggtacgcagaaaacagcagttgtagtgcagaaacagtcgcatatggctatt
+cgcgtcacccaacgctaaatgcatactttattgctccttgccgcactttcaaatcagatt
+acgtttaccacgactcagcaaggcgatatttacacggtgatccctcaggtcacattaaac
+gaaccctgcgtctgtctggtgcaaattctctctgtgcgcgacggcgtcgggggacaaagc
+catacacagcaaaaacaaacgctatctttacctgctaatcaaccgattgagttgtctcgt
+cttagtgtaaatatatcttcagaggactcggttaaaattattgttactgtttcggacgga
+caatcactgcatttatcacaacaatggccgccttctgcacagtagatgtttaatgaagtc
+catagtagtcatggtcacacactattgttgatcacaaagccatctctgcaagctacggca
+ttattgcaacatttaaagcaatcgctggccataaccggaaaactgcataatattcaacgt
+tctctggaagatatctcagccggttgcattgttttaatggatatgatggaagcggataag
+aagcttatccactattggcaggataatttaagccgcaaaaacaataatataaaaacatta
+ttgttaaatacccctgacgattatccctaccgtgaaattgaaaactggcctcatattaac
+ggcgtgttttacgccactgaagaccaggaacacgtggtcagcggattacagggtattctg
+cggggcgaatgctatttttcacaaaaattagccagttacctgattacacactcaggaaat
+taccgctacaacagcaccgagtccgcattactcactcatcgcgaaaaagagatcctcaat
+aagttacgtattggtgcctctaataatgaaatcgccaggtcgctatttatcagcgagaat
+acggttaagacacatctttataatcttttcaaaaagatagctgtcaaaaatcgcacccag
+gcagtttcatgaatgaaacgctatctgacctggattgtagcagcagagttactgttcgct
+accggaaacctgcatgccaatgaagttgaagtcgaggttcccggattgttaaccgaccat
+accgtctcttcgataggacatgaattctatcgtgcattcagcgacaaatgggaaagcgaa
+tacaccggcaatctgaccattaatgaaagacccagtgcgcgttggggaagctggatcacc
+ataacggtaaatcaggacgttattttccagacctttttatttccaatgaaaagagacttc
+gagaaaaccgtcgtcttcgcattagcgcaaacagaggaagcattaaatcgccgacaaata
+gatcaaacgctattaagtacgagtgatttagcgcgtgatgaattctaaatgcgtgttaaa
+catgcagtagtgctgctcatgcttttttcgccattaacctgggctggaaatatgacgttc
+cagttccgtaatcctaactttggtggaaaccccaataacggttcctttttattgaatagc
+gcccaggcgcaaaattcatataaagaccccgcttatgataacgattttggtatcgagacc
+ccctcagcgttggataactttacgcaggctattcaatcgcaaattctgggcggcttgttg
+accaatattaataccggaaaaccaggacgtatggtgaccaatgattttattatcgatatc
+gctaatcgcgacggacagctccagctcaacgtcacggacagaaaaacgggaagaacctcg
+accatcgaagtgtcaggtttacaaactcagtcaaccgatttttaaatgccgcgcttactt
+attttggttgccgttttattgttgagcggatgcttaactgccccgccgaaacaagctgcg
+aaaccgacattaatgccccgcgcacaaagttacaaagatttgacgcacttacctgctccc
+accggtaagatctttgtttcggtatataacattcaggatgaaacgggccaatttaaacct
+tacccggcaagtaacttttccacggctgtgccgcagagcgccaccgctatgttggtcacc
+gcgctgaaagattcgcgctggtttatcccactagaacgacaaggcttacagaatcttttg
+aatgaacggaaaattattcgcgcagcccaggaaaacggcaccgtggcgatgaataaccgt
+atcccgcttcagtcgttgacggcggcaaatattatggtggaaggttctattattggttat
+gaaagtaacgtcaaatccggcggggtcggcgcaagatatttcggtattggcgccgatacg
+cagtatcagctggatcagattgctgtcaacctgcgcgtggttaacgtcagtacgggcgag
+atcctttcttcggtgaacaccagtaaaacgatcctttcctatgaagtacaggcaggcgtg
+ttccgttttattgattaccagcgcttactggaaggcgaaatcggctatacctcgaacgaa
+ccggtgatgctgtgtctgatgtcagccattgaaaccggcgttatcttcctcattaatgat
+ggtatcgatcgcggactgtgggatttgcagaataaagcggacaggcaaaatgatattctg
+gtgaaataccgtgagctgtcagtaccgccagaatcctgaATGTCTATTGCCGTAAATATG
+AATGACCCGACCAACACGGGCGTCAAAACGACGACCGGCAGCGGGTCGATGACCGGAAGC
+AACGCTGCCGATCTGCAAAGCAGTTTCCTGACCTTACTGGTCGCGCAATTGAAGAACCAG
+GACCCGACTAACCCATTACAAAATAATGAGTTAACGACACAGTTGGCGCAAATCAGTACC
+GTGAGCGGCATTGAAAAACTGAATACGACGCTGGGGGCTATTTCCGGGCAAATCGATAAT
+AGTCAGTCCCTACAGGCGACCACGCTGATTGGACATGGCGTTATGGTGCCTGGCACCACA
+ATTCTGGCGGGTAAAGGCGCGGAAGAAGGGGCCGTGACGTCCACGACGCCGTTTGGCGTG
+GAATTGCAACAGCCTGCGGACAAAGTGACGGCAACCATTACCGATAAAGATGGCCGGGTG
+GTACGGACGCTGGAGATCGGTGAGTTGCGAGCCGGGGTACACACCTTTACCTGGGATGGT
+AAGCAAACGGACGGAACAACGGTACCGAATGGTTCTTACAACATTGCGATTACCGCCAGC
+AATGGCGGGACGCAACTGGTGGCGCAGCCGCTGCAATTCGCTCTGGTACAGGGCGTGACG
+AAGGGCAGTAACGGCAACCTGTTGGATCTGGGTACCTACGGCACCACCACACTCGACGAA
+GTTCGGCAAATAATCTAAATGTCTTTTTCTCAAGCGGTTAGCGGCCTGAACGCTGCGGCC
+ACCAACCTTGATGTTATCGGTAATAACATCGCCAACTCCGCCACCTATGGCTTTAAGTCC
+GGTACGGCATCATTTGCCGATATGTTCGCCGGTTCCAAAGTGGGGTTGGGCGTAAAAGTG
+GCGGGGATTACCCAGGATTTTACCGACGGTACGACAACGAACACCGGGCGCGGGCTGGAT
+GTCGCGATTAGCCAGAACGGTTTTTTCCGCCTGGTAGACAGCAACGGTTCCGTGTTCTAT
+AGCCGCAACGGCCAGTTCAAACTGGACGAGAACCGTAACCTGGTCAATATGCAGGGGATG
+CAGTTGACCGGCTATCCGGCCACCGGTACGCCGCCGACCATTCAGCAGGGGGCGAATCCT
+GCGCCGATCACCATTCCGAACACGCTGATGGCGGCGAAATCGACCACCACCGCGTCAATG
+CAGATCAACCTGAACTCAACGGACCCTGTACCGTCTAAAACGCCCTTTAGCGTGAGTGAT
+GCGGATTCGTATAACAAAAAAGGCACCGTCACCGTTTATGACAGCCAGGGTAATGCCCAT
+GACATGAACGTCTATTTTGTGAAAACCAAAGATAATGAATGGGCTGTGTACACCCATGAC
+AGCAGCGATCCTGCAGCCACTGCGCCAACAACGGCGTCCACTACGCTGAAATTCAATGAA
+AACGGGATTCTGGAGTCTGGCGGTACGGTGAACATCACCACCGGTACGATTAATGGCGCG
+ACAGCGGCCACCTTCTCCCTCAGCTTCCTTAACTCCATGCAGCAGAACACCGGGGCTAAT
+AACATCGTCGCCACCAATCAAAACGGCTATAAGCCTGGCGACCTGGTGAGCTACCAGATT
+AACAATGATGGCACCGTGGTTGGCAACTACTCCAACGAGCAGGAGCAGGTGCTGGGGCAG
+ATTGTGCTGGCTAACTTCGCCAACAACGAAGGTCTGGCATCCCAGGGCGATAACGTCTGG
+GCGGCGACGCAGGCCTCCGGGGTAGCGCTGCTGGGGACTGCCGGTTCCGGCAACTTCGGT
+AAGCTGACGAACGGCGCGCTGGAAGCCTCTAACGTGGATTTGAGTAAAGAGCTGGTGAAT
+ATGATCGTCGCGCAGCGTAACTACCAGTCGAATGCGCAGACCATCAAAACCCAGGACCAG
+ATCCTCAATACGCTGGTTAACCTGCGCTAAATGGATCACGCAATTTATACCGCCATGGGG
+GCGGCCAGCCAGACGCTTAACCAGCAGGCGGTAACGGCCAGCAACCTGGCTAATGCCTCA
+ACGCCGGGCTTTCGCGCGCAGCTTAACGCGCTACGCGCGGTGCCCGTTGATGGCCTCTCT
+TTAGCGACGCGCACGTTGGTTACGGCGTCGACGCCGGGGGCGGATATGACCCAGGGTCAG
+TTGGACTACACTTCCCGCCCGCTGGATGTTGCGTTACAGCAGGACGGCTGGCTGGTGGTG
+CAAGCGGCGGATGGCGCTGAAGGATATACCCGTAACGGGAATATCCAGGTGGGCCCGACC
+GGGCAGTTAACCATTCAGGGACATCCGGTTATCGGCGAAGGCGGCCCGATTACCGTTCCG
+GAAGGGTCGGAAATCACCATTGCGGCAGACGGCACGATCTCCGCGCTCAATCCCGGCGAC
+CCGCCAAACACGGTGGCGCCCGTTGGGCGGCTGAAGCTGGTCAAAGCGGAAGGCAATGAG
+GTGCAGCGGAGCGATGACGGTTTATTCCGCCTTACCGCCGAGGCACAGGCTGAACGCGGG
+GCGGTACTGGCCGCCGACCCGTCAATTCGCATTATGTCGGGCGTGCTGGAGGGCAGTAAC
+GTCAAGCCGGTTGAAGCCATGACCGACATGATCGCCAACGCACGTCGTTTTGAAATGCAG
+ATGAAGGTTATCACCAGCGTAGATGAGAACGAAGGGCGAGCTAACCAACTGCTGTCGATG
+AGTTAAATGATCAGTTCATTATGGATCGCCAAAACCGGTCTGGACGCGCAGCAAACCAAT
+ATGGATGTGATTGCCAATAACCTGGCAAACGTCAGCACCAATGGTTTTAAGCGTCAGCGC
+GCGGTATTTGAAGATCTGTTGTATCAGACCATCCGCCAGCCGGGCGCGCAGTCGTCCGAG
+CAGACGACGCTGCCTTCCGGGCTGCAAATCGGTACCGGCGTGCGTCCGGTCGCCACGGAG
+CGCCTGCACAGTCAGGGGAACCTGTCGCAGACCAACAACAGTAAAGATGTGGCGATTAAA
+GGGCAGGGCTTTTTCCAGGTCATGCTGCCGGACGGTACGTCTGCCTATACCCGCGACGGC
+TCTTTCCAGGTGGATCAGAATGGTCAACTGGTGACGGCGGGCGGTTTTCAGGTGCAGCCG
+GCAATCACCATTCCGGCCAACGCGTTAAGCATCACGATTGGCCGCGACGGCGTGGTCAGC
+GTTACCCAGCAAGGGCAGGCCGCGCCGGTTCAGGTCGGGCAGCTTAACCTGACCACCTTT
+ATGAACGACACCGGTCTGGAAAGCATCGGCGAGAACCTCTATATCGAAACGCAATCGTCC
+GGCGCGCCGAACGAAAGCACGCCGGGGCTCAACGGCGCGGGGTTGTTGTATCAAGGGTAT
+GTCGAAACGTCGAACGTTAACGTGGCGGAAGAGCTGGTGAACATGATTCAGGTTCAACGC
+GCCTATGAAATTAACAGTAAAGCAGTATCGACGACCGATCAGATGCTGCAGAAACTGACG
+CAACTCTAAATGGCCCTGATGGTCGCGACGCTGACAGGATGCGCCTGGATACCCGCTAAA
+CCGCTCGTGCAGGGGGCGACCACGGCGCAGCCGATACCTGGCCCGGTACCGGTGGCGAAT
+GGCTCCATATTTCAGTCTGCGCAGCCGATTAATTATGGCTATCAGCCGCTTTTTGAAGAT
+CGTCGACCGCGTAATATCGGCGATACGCTCACGATTGTGTTACAGGAAAACGTCAGCGCC
+AGTAAAAGCTCGTCGGCAAATGCCAGCCGCGACGGCAAAACCAGCTTTGGTTTTGATACG
+GTACCGCGTTATCTGCAGGGATTATTCGGTAATTCCCGCGCGGATATGGAGGCCTCCGGC
+GGCAACTCTTTTAATGGTAAAGGCGGCGCGAATGCCAGCAATACCTTTAGCGGCACGCTG
+ACCGTGACCGTCGATCAGGTTCTGGCCAATGGCAATTTACACGTCGTGGGGGAAAAACAG
+ATCGCGATTAATCAGGGAACGGAATTCATCCGCTTCTCCGGCGTGGTAAATCCACGCACC
+ATCAGCGGTAGCAACTCTGTTCCCTCGACACAGGTGGCGGATGCGCGGATTGAATATGTC
+GGGAACGGCTATATTAACGAAGCGCAAAATATGGGCTGGCTGCAACGTTTCTTCCTTAAT
+TTGTCGCCGATGTAAGTGTTTAAAGCTCTTGCAGGAATCGTTCTGGCACTGGTTGCCACT
+CTGGCGCACGCCGAGCGTATCCGGGATCTGACCAGTGTCCAGGGAGTACGGGAAAACTCG
+CTGATCGGCTACGGGCTGGTGGTCGGGCTGGACGGTACGGGCGACCAGACGACCCAGACG
+CCATTTACCACCCAGACGCTGAATAACATGCTGTCACAACTGGGGATTACGGTCCCCACC
+GGCACCAATATGCAGTTGAAAAACGTGGCGGCGGTGATGGTGACGGCGTCGTATCCGCCT
+TTTGCGCGACAGGGACAAACGATCGATGTCGTCGTTTCCTCAATGGGGAACGCTAAAAGT
+CTGCGTGGCGGGACGTTATTAATGACGCCGTTAAAAGGGGTGGACAGCCAGGTGTATGCT
+CTGGCGCAGGGCAATATTCTGGTCGGCGGCGCGGGCGCTTCCGCAGGCGGCAGTAGCGTG
+CAGGTTAACCAGCTTAATGGCGGGCGCATCACTAATGGCGCGATTATCGAACGCGAGTTG
+CCGACTCAGTTCGGCGCTGGCAACACCATTAATCTGCAATTGAACGACGAAGATTTTACG
+ATGGCGCAGCAAATTACCGACGCCATCAACCGCGCCCGCGGTTACGGCAGCGCCACTGCG
+CTTGATGCGCGAACGGTACAGGTACGCGTGCCCAGCGGCAACAGCTCGCAGGTGCGTTTT
+CTGGCGGACATTCAAAATATGGAAGTCAACGTGACGCCGCAGGATGCAAAAGTCGTGATC
+AACTCGCGTACCGGTTCGGTGGTCATGAATCGGGAAGTCACGCTGGATAGCTGCGCTGTG
+GCGCAGGGCAATTTGTCAGTGACAGTCAATCGCCAACTCAACGTCAACCAGCCGAATACG
+CCATTTGGCGGCGGGCAGACCGTGGTGACGCCACAGACTCAGATAGATTTGCGTCAGAGC
+GGCGGATCGCTACAGAGCGTGCGTTCCAGCGCCAATCTGAACAGCGTAGTGCGCGCGCTG
+AATGCGCTTGGCGCGACGCCGATGGATCTGATGTCGATTTTGCAGTCCATGCAGAGCGCG
+GGCTGTCTACGCGCCAAACTGGAAATCATCTGAATGATCGGAGACGGTAAATTGCTGGCC
+AGCGCGGCCTGGGATGCGCAATCTCTGAACGAACTGAAAGCGAAAGCGGGCCAGGACCCG
+GCGGCGAATATCCGTCCTGTGGCCCGTCAGGTGGAAGGGATGTTTGTGCAGATGATGCTG
+AAAAGTATGCGCGAGGCTTTACCCAAAGATGGTTTATTCAGCAGCGATCAGACGCGTCTG
+TATACCAGCATGTATGACCAGCAGATCGCCCAGCAGATGACCGCCGGTAAGGGATTGGGG
+CTGGCGGATATGATGGTTAAACAGATGACGGGCGGGCAGACGATGCCTGCAGATGATGCG
+CCGCAAGTACCGCTTAAATTCTCCCTGGAGACGGTAAACAGCTATCAAAATCAGGCGCTG
+ACCCAACTGGTGCGCAAAGCCATACCGAAAACGCCGGACAGCAGCGATGCGCCGCTCTCC
+GGCGACAGTAAAGACTTTCTGGCCCGGCTTTCGCTCCCGGCGAGGCTGGCCAGCGAACAA
+AGCGGGGTGCCGCATCATCTGATTCTGGCGCAGGCGGCGCTGGAGTCCGGCTGGGGGCAG
+CGGCAAATCCTGCGGGAGAATGGCGAACCCAGCTATAACGTATTTGGCGTGAAAGCGACC
+GCCAGTTGGAAAGGGCCGGTGACGGAAATCACCACCACTGAATACGAAAATGGCGAAGCG
+AAAAAAGTGAAAGCGAAATTCCGCGTCTATAGCTCGTATCTGGAGGCGTTATCGGATTAT
+GTCGCGCTGTTAACGCGTAACCCACGCTACGCTGCCGTGACCACTGCCGCCACGGCAGAG
+CAGGGCGCAGTGGCTCTGCAAAACGCCGGATACGCCACTGACCCGAATTACGCGCGTAAA
+TTGGCCAGCATGATTCAGCAGTTGAAAGCGATGAGTGAAAAGGTCAGCAAAACCTACAGC
+GCGAATCTCGACAATCTCTTTTAAATGTCCAGCTTGATTAATCACGCCATGAGCGGACTT
+AACGCCGCGCAGGCCGCGTTAAATACGGTCAGTAATAACATCAACAATTATAACGTTGCG
+GGTTATACCCGGCAGACAACTATTCTGGCGCAGGCAAACAGTACGTTAGGGGCTGGCGGC
+TGGATAGGTAATGGCGTTTACGTTTCAGGCGTACAGCGCGAATATGATGCGTTTATCACT
+AATCAGCTACGCGGCGCGCAAAACCAGAGCAGCGGCTTAACCACGCGCTATGAACAAATG
+TCGAAAATCGACAACCTGCTGGCCGATAAATCCAGCTCACTGTCTGGCTCGCTGCAGAGT
+TTTTTTACCAGCCTGCAAACGTTAGTCAGTAATGCGGAAGATCCTGCGGCGCGTCAGGCG
+CTGATTGGTAAAGCGGAAGGGCTGGTAAACCAGTTCAAAACCACCGATCAGTATCTGCGC
+GATCAGGATAAACAGGTCAATATCGCGATTGGCTCCAGCGTGGCGCAAATCAACAATTAC
+GCGAAGCAGATAGCTAACCTGAACGATCAAATCTCCCGTATGACGGGCGTAGGCGCGGGC
+GCATCGCCGAACGACCTGCTCGATCAACGTGATCAGTTGGTTAGCGAGCTTAACAAGATC
+GTTGGCGTCGAGGTGAGTGTACAGGACGGCGGCACCTATAACCTGACGATGGCCAATGGC
+TATACGCTGGTGCAGGGGTCGACGGCGCGTCAGTTGGCGGCGGTTCCCTCCAGCGCCGAC
+CCGACGCGAACGACTGTCGCTTATGTCGATGAGGCCGCCGGTAACATCGAAATTCCGGAA
+AAGTTGCTGAACACCGGTTCGCTCGGCGGGCTACTGACGTTCCGTTCTCAGGATCTGGAT
+CAGACTCGTAATACGCTGGGCCAGTTGGCGTTGGCGTTTGCCGATGCGTTTAACGCGCAG
+CATACCAAAGGTTATGACGCCGACGGCAATAAAGGGAAAGACTTCTTTAGCATTGGCTCG
+CCGGTGGTATATAGCAACAGTAATAATGCCGATAAAACGGTATCGCTAACCGCTAAGGTG
+GTCGACAGCACGAAGGTTCAGGCGACGGATTATAAGATTGTTTTTGACGGTACAGACTGG
+CAGGTTACTCGCACTGCGGATAACACCACCTTCACGGCAACAAAAGATGCTGACGGAAAA
+CTGGAGATTGACGGTCTGAAAGTGACGGTAGGGACTGGCGCACAGAAAAACGACAGTTTT
+CTTCTCAAGCCGGTCAGCAATGCTATCGTCGACATGAACGTTAAAGTGACAAATGAAGCC
+GAGATTGCGATGGCGTCTGAGTCAAAACTCGATCCTGATGTGGATACCGGCGACAGCGAT
+AACCGCAATGGTCAGGCATTGCTGGACTTACAAAACAGCAATGTAGTGGGCGGCAACAAA
+ACCTTTAACGATGCTTACGCCACGTTGGTCAGCGATGTGGGTAACAAAACGTCAACGCTG
+AAAACCAGCAGCACCACGCAGGCGAATGTGGTTAAACAGCTTTATAAACAGCAACAGTCG
+GTTTCCGGCGTTAACCTCGACGAAGAGTACGGCAATTTGCAGCGTTATCAGCAGTATTAT
+CTGGCGAATGCGCAAGTATTGCAGACCGCGAATGCGCTGTTTGATGCGTTATTGAATATT
+CGCTAAATGCGTATCAGTACCCAGATGATGTACGAACAAAATATGAGCGGCATCACTAAT
+TCTCAGGCCGAATGGATGAAGCTGGGCGAGCAGATGTCTACCGGTAAGCGCGTTACCAAC
+CCATCTGACGATCCGATCGCCGCGTCGCAGGCGGTAGTACTCTTTCAGGCGCAGGCGCAG
+AATAGCCAGTACGCCCTGGCGCGTACGTTTGCCACCCAAAAAGTGTCGCTGGAAGAAAGC
+GTACTCAGTCAGGTGACGACGGCGATTCAAACCGCGCAGGAAAAAATCGTCTATGCCGGA
+AACGGCACGTTAAGCGACGATGACCGCGCGTCGCTGGCGACGGATTTACAGGGGATCCGC
+GATCAGCTGATGAACCTGGCAAACAGCACTGACGGCAATGGTCGCTATATCTTTGCCGGG
+TATAAAACGGAAGCGGCGCCATTCGACCAGGCGACAGGTGGTTATCATGGCGGCGAGAAA
+AGTGTTACCCAGCAGGTGGATTCCGCACGCACGATGGTAATTGGCCATACGGGAGCGCAA
+ATTTTTAATAGCATCACCAGCAATGCGGTGCCGGAACCGGATGGCTCGGACTCCGAAAAG
+AATCTGTTTGTCATGCTCGATACGGCAATTGCCGCGCTCAAGACCCCGGTGGAAGGCAAT
+GACGTGGAAAAAGAAAAAGCCGCTGCCGCCATTGATAAAACCAATCGCGGCTTAAAAAAT
+TCGCTTAATAACGTCCTGACCGTTCGTGCGGAACTGGGAACGCAACTGAGCGAACTCAGT
+ACGCTGGATTCACTGGGAAGCGACCGTGCGCTGGGACAGAAGCTACAGATGAGCAACCTG
+GTAGATGTGGACTGGAACTCGGTCATTTCCTCCTACGTCATGCAACAGGCGGCATTACAG
+GCGTCCTATAAAACGTTTACCGACATGCAGGGAATGTCGCTTTTCCAGTTGAACCGGTAA
+atggagataattttttatcacccgacatttaacgccgcctggtgggtaaatgcgctggag
+aaggctctcccacatgcgcgcgttcgtgaatggaaggtcggtgataacaaccccgcagac
+tatgcgcttgtatggcagcccccggttgaaatgctggccggaagacgcttaaaagccgtc
+tttgtgctgggcgcgggggtggatgcaattctgagtaaattaaatgcgcatccggaaatg
+ctggacgcctccattcctctattccgtctggaagataccggaatgggcctgcaaatgcag
+gagtatgccgccagccaggtattacactggttccgtcgtttcgatgattatcaggcgctg
+aaaaatcaggcgctatggaaaccgttgccggaatatacccgcgaagagtttagcgtcggt
+atcataggcgcaggggtactgggcgcaaaagtggcagaaagtctacaggcgtgggggttc
+ccgttacgttgctggagtcgtagccgcaaatcctggcctggcgtggaaagttatgtaggg
+cgtgaagaactgcgcgctttcctgaaccagacgcgggtgctgattaatctgctgccgaat
+acggcccaaacggtaggaattattaatagcgaattgttggatcaattgccggatggcgct
+tacgtgctgaatctcgcgcgcggcgttcatgttcaggaggcggatctgctggctgcgctt
+gatagcggtaagctaaaaggcgcgatgttggatgtctttagccaggaaccgttaccgcag
+gaaagtccattatggcgccatccgcgagtcgccatgacgccgcacattgcggcagtcacc
+cgtccggcggaagccatcgattatattagccgcaccattacccagctggagaagggagag
+ccggtgacggggcaggtggatcgggcgagaggatattgaATGTCCGTAATCAAGAAAAAT
+ATCCCTGCCATAGGCCTGTGTATCTGCGCTTTTTTTATCCATTCTGCGGTAGGGCAACAA
+ACGGTACAGGGCGGCGTTATCCATTTTCGCGGCGCGATTGTTGAGCCACTGTGCGATATT
+TCTACTCACGCCGAAAATATTGATTTAACCTGCCTACGCGAAGGTAAAAAGCAAATGCAC
+CGGATAGACCTTCGGCAGGCATCTGGATTACCGCAGGATATTCAGTCCATTGCGACGGTA
+CGGCTGCATTATCTCGATGCGCAAAAAAGCCTGGCGGTGATGAATATTGAGTACCGTTAA
+ATGGCAAACCATCGTGGCGGTTCCGGTAATTTTGCGGAAGACCGCGAAAGAGCATCAGAA
+GCAGGTCGTAAAAGTGGTCAGCACAGCGGGGGCAATTTTAAGAATGACCCGCAGCGTGCA
+TCCGAAGCAGGCAAAAAAGGGGGCAAAAGCAGTAACCGTAATCGCTAGATGGTAATGTCC
+GCACCAGGACACATTGTTTACAGTAGTTACAACACCCTGTACGGACATTCTCTCTCCGGT
+GGTGGTCTTGTCATCTTAAAAGCTCTCATCATTTCCCTTACTGTCCATACCCATGACGCC
+ATATGTGGTGCGCGTAGCCGTGTGTGGCGTCGTTTCAAAAAGCAAGCTAAGGCTTACAAG
+GAAGCCAACCCTCAGATGTGTGTGCGCATAATCGCGTTCAAGAGAACGCGGGTGATGTAT
+ACCTACAACTCAAGGTGCTATCCATGGGAAGACAAAAAGCAGTGAATGAAACGAATTTTC
+CTTACCTGCGCGGCGTTGTTGTTCAGCAGTCAGGCGTTGGCCGATGAGTGTGCCAGCGCC
+AGTACGCAGCTGGAAATGAATCGCTGCGCCGCCGCGCAATACCAGGCGGCAGATAAAAAG
+CTGAACGAAACCTATCAAAGCGCGATTAAGCGTGCGCAACCGCCGCAGCGTGAGCTATTG
+CAAAAAGCGCAGGTGGCATGGATTGCCCTGCGCGACGCCGATTGCGCGCTGATTCGCTCA
+GGTACGGAGGGCGGCAGCGTTCAACCCATGATCGCCAGCCAGTGCCTGACCGATAAAACG
+AACGAACGCGAAGCGTTTTTAGCCTCGCTGCTGCAATGTGAAGAGGGTGATTTGAGCTGC
+CCACTGCCGCCAGCCGGTTAAgtgcgtatattcgcggtgagcataatggtgattaccctg
+agcggctgcggcagtattatcagcagaacgatccccggacaaggacacggcaaccagtat
+taccctggcgtgcagtgggatatgcgtgattccgcatggcgctatatcactatcctcgat
+ctgcccttctcactgatcttcgatacactgctactgccgctcgatattcaccacgggcct
+tatgagtaaATGTGCCAACGTGCGATCGCCAATATTGATATCAGCAAAGAGTATGACGAA
+AGCATGGGCAGTAACGATGTGCATTATCAGTCGTTTGCTCGTATGGCGGATTTCTTTGGT
+CGTGATATGCAGGCGCATCGCCACGACCAGTTTTTTCAAATGCACTTTCTTGATACCGGG
+CAGATTGAGCTACAGCTCGACGATCATCGCTATTCGGTGCAGGCGCCGCTATTTGTGCTA
+ACGCCGCCCTCGGTGCCGCATGCTTTTATTACCGAATCGGATAGCGATGGTCATGTTCTG
+ACGGTACGCGAAGAGCTGGTTTGGCCGCTGCTGGAAGTGCTTTATCCCGGCACCAGAGAG
+GCCTTCGGCCTGCCGGGAATCTGCCTGTCGCTGGCGGATAAACTCAACGAGCTGGCGGCG
+CTCAAACATTACTGGCAGCTAATTGAGCGGGAGTCCACGGAACAACTGGCTGGCTGCGAA
+CATACCTTGGTACTACTGGCGCAGGCGGTATTTACCTTGCTGTTGCGTAATGCGAAGCTG
+GACGATCATGCCGCAACCGGGATGCGCGGTGAACTGAAACTTTTTCAGCGCTTTACCCTG
+TTAATTGACAACCACTTCCATCAGCACTGGACGGTGCCCGATTATGCCTGTGAGTTGCAT
+ATTACCGAATCTCGTTTGACCGATATTTGCCGACGTTTTGCTAATCGCCCGCCTAAACGC
+CTGATTTTTGATCGGCAATTACGCGAGGCGAAACGACTGCTGCTTTTTTCCGACAATGCT
+GTCAACGAGATCGCCTGGCAATTAGGTTTTAAAGATCCGGCTTATTTCGCCCGTTTCTTT
+AATCGCCTTGCTGGCTGTTCTCCTTCGCAGTTTCGCCAACGTGAAGTTCCTTCTTTTCTC
+AACTAAATGGGACGCACACCGGATTACAAAGCCGCCTTTGGCTGCGCTCTGGGCGCTAAC
+CCAGCCTTCTACGGCCAGTTTGAGCAGAACGCCCGTAACTGGTACACCCGTATTCAGGAG
+ACCGGCCTGTACTTTAACCATGCAATCGTCAACCCGCCCATTGACCGCCACAAACCTGCC
+GACGAAGTGAAAGACGTCTATATCAAGCTGGAGAAAGAGACGGACGCCGGGATTATTGTC
+AGCGGGGCGAAAGTTGTCGCCACTAACTCCGCCCTGACTCACTACAACATGATTGGTTTC
+GGCTCAGCCCAGGTGATGGGCGAAAACCCGGATTTTGCTCTGATGTTTGTCGCGCCAATG
+GATGCCGAAGGCGTAAAACTTATTTCGCGCGCCTCGTATGAAATGGTCGCGGGCGCGACG
+GGCTCGCCGTTTGATTATCCCCTCTCCAGCCGTTTTGATGAAAACGATGCCATTCTGGTG
+ATGGACAAGGTGCTGATCCCGTGGGAAAACGTATTAATTTACCGTGATTTCGATCGTTGT
+CGTCGCTGGACGATGGAAGGCGGCTTTGCCCGTATGTATCCACTGCAAGCCTGTGTTCGT
+CTGGCGGTAAAACTTGATTTCATTACCGCGCTGCTGAAAAAATCGCTCGAATGTACGGGT
+ACCGTAGAGTTCCGGGGCGTGCAGGCCGATCTCGGCGAAGTCGTGGCCTGGCGCAATATG
+TTCTGGGCATTGAGCGATTCTATGTGTTCTGAAGCAACCCCGTGGGTAAACGGCGCCTGG
+CTACCGGACCACGCCGCGCTGCAAACCTATCGTGTGATGGCCCCAATGGCCTACGCGAAA
+ATTAAAAATATTATTGAACGTAACGTTACCAGCGGCCTGATTTACCTGCCTTCCAGCGCC
+CGCGATCTGAATAATCCGCAAATCGACCAGTACCTGGCGAAATACGTACGCGGCTCTAAC
+GGAATGGACCATGTTGAACGTATCAAAATTCTTAAATTGATGTGGGATGCCATCGGCAGC
+GAGTTTGGCGGTCGCCATGAGCTGTACGAGATTAACTACTCGGGCAGCCAGGATGAAATT
+CGTCTGCAGTGTCTGCGTCAGGCCCAGAGCTCCGGCAATATGGATAAGATGATGGCAATG
+GTCGATCGCTGCCTCTCCGAATACGATCAGAATGGCTGGACGGTTTCGCATTTGCACAAT
+AACGACGACATCAATCAACTGGATAAGCTGCTGAAATAAATGCAAGTAGATGAACAACGT
+CTGCGTTTTCGCGATGCGATGGCAAGTCTGGCGGCAGCGGTCAACATCGTAACCACGGCG
+GGTCACGCCGGACGCTGCGGTATCACCGCAACAGCGGTTTGCTCAGTCACTGATACGCCG
+CCCTCCGTGATGGTATGTATTAATGCCAATAGCGCCATGAACCCCGTTTTTCAGGGCAAC
+GGCAGGCTGTGCATTAATGTACTTAACCATGAGCAGGAGCTGATGGCGCGCCACTTTGCC
+GGTATGACGGGGATGGCGATGGAGGAGCGTTTTCACCAGCCATGTTGGCAAAACGGGCCG
+CTGGGCCAGCCGGTACTTAACGGCGCGCTGGCCAGTCTTGAAGGCGAGATCAGCGAGGTA
+CAAACCATTGGCACGCATCTGGTGTATCTGGTGGCGATCAAAAATATTATTCTTAGCCAG
+GAGGGGCATGGCCTGATTTATTTCAAACGCCGTTTTCATCCGGTCAGACTTGAGATGGAA
+GCGCCTGTTTAAATGAAGGGTACTGTTTTCGCCGTTGCGTTAAACCATCGCAGCCAGCTT
+GATGCCTGGCAAGAGGCTTTCTCTCAGCCTCCCTATAATGCGCCGCCTAAAACCGCAGTG
+TGGTTCATCAAGCCGCGTAATACGGTGATTCGTCACGGCGAACCCATTCCTTATCCGCAG
+GGAGAAAAGGTACTGAGCGGCGCGACAGTGGCGCTCATTGTGGGGAAAACCGCCAGCCGG
+ATACGCCCTGAAGCGGCGGCGGACTATATCGCCGGGTATGCGCTGGCTAACGAGGTCAGC
+CTGCCGGAAGAGAGCTTTTATCGCCCGGCGATTAAAGCGAAATGTCGCGATGGCTTTTGC
+CCGCTGGGTGAAATGGCGCCGCTGAGTGATGTGGATAATCTCACCATTATCACTGAAATC
+AACGGACGAGAAGCGGACCACTGGAATACTGCCGATTTACAGCGTAGCGCCGCACAACTG
+CTTAGCGCGTTAAGTGAGTTCGCTACACTTAACCCTGGCGATGCGATCTTACTTGGTACG
+CCGCAGAATCGCGTTGCGCTGCGTCCCGGCGATCGGGTGCGTATTCTGGCGAAAGGTTTA
+CCCGCGCTGGAAAATCCGGTTGTCGCAGAAGATGAATTCGCCCGCCACCAGACGTTTACG
+TGGCCGCTGTCAGCGACGGGAACGTTATTTGCGCTGGGGTTGAACTACGCCGATCACGCC
+AGCGAGCTGGCATTTACGCCGCCGAAAGAGCCGCTGGTATTTATCAAAGCGCCAAACACC
+TTTACCGAACATCACCAAACGTCGGTGCGCCCGAACAACGTCGAATATATGCACTACGAA
+GCCGAGCTGGTCGTGGTGATTGGCAAAACGGCGCGTAAGGTGAGCGAAGCCGAAGCCATG
+GAGTATGTGGCCGGTTACACCGTCTGTAACGACTACGCGATCCGCGACTATCTGGAAAAC
+TACTACCGTCCGAATCTGCGGGTAAAAAGCCGCGACGGCCTGACGCCGATAGGCCCGTGG
+ATTGTGGATAAAGAGGCGGTTTCTGATCCGCACAACCTGACGTTACGCACCTTTGTCAAC
+GGTGAGCTGCGGCAGGAAGGGACGACCGCCGATCTGATCTTCAGCATCCCGTTCCTGATT
+TCTTATCTGAGCGAATTTATGACGTTGCAACCGGGCGACATGATTGCCACCGGTACGCCG
+AAAGGGCTGTCCGATGTGGTGCCGGGGGATGAAGTTGTCGTTGAAGTAGAAGGCGTGGGT
+CGCCTGGTTAACCGAATCGTCAGTGAGGAGAGCGCAAAATGAATGAAAAATGCTTTCAAA
+GACGCGTTAAAAGCGGGGCGCCCGCAAATCGGTTTGTGGCTGGGGCTTGCCAACAGTTAC
+AGCGCTGAACTGTTAGCGGGCGCCGGCTTCGACTGGCTACTGATTGACGGTGAACACGCG
+CCAAACAACGTGCAGACGGTGTTGACCCAGTTGCAGGCGATTGCGCCTTATCCCAGCCAG
+CCGGTGGTGCGTCCGTCATGGAACGATCCGGTACAGATTAAGCAACTGCTCGACGTCGGC
+GCGCAAACGCTGCTGATACCGATGGTGCAGAATGCCGATGAAGCGCGAAACGCCGTGGCG
+GCTACGCGTTATCCGCCTGCCGGTATTCGCGGCGTGGGCAGCGCGCTGGCGCGGGCATCG
+CGCTGGAATCGCATTCCGGACTATCTCCACCAGGCCAACGACGCCATGTGCGTACTGGTG
+CAGATTGAAACGCGTGAGGCGATGAGCAATCTGGCGTCAATTCTCGACGTGGATGGCATT
+GACGGCGTGTTTATTGGCCCGGCGGATCTCAGCGCCGATATGGGCTTTGCCGGCAATCCG
+CAGCACCCGGAAGTGCAGGCGGCGATTGAGAACGCCATCGTGCAGATACGCGCGGCGGGG
+AAAGCGCCGGGGATTCTGATGGCCAATGAAGCACTGGCGAAACGTTATCTGGAACTGGGG
+GCGCTATTTGTCGCCGTCGGCGTTGACACCACGCTGCTGGCGCGCGGAGCGGAGGCGCTG
+GCGGCGCGCTTTGGCGCAGAAAAAAAACTGTCCGGTGCGTCCGGCGTCTATTAAATGCAT
+GATTCATTAACCATCGCCTTGCTTCAGGCGCGCGAAGCGGCAATGACCTATTTCCGCCCC
+ATCGTTAAAAGCCACAATCTGACCGACCAGCAATGGCGCATTGTGCGAATCCTGGCCGAT
+AGCCCCTCTATGGATTTTCACGAGCTGGCCTTTCGTACCTGTATTTTGCGTCCAAGTCTG
+ACCGGAATATTGACGCGCATGGAGCGAGACGGACTGGTGTTGCGACTCAAGCCGGTTAAC
+GATCAGCGTAAGTTATATGTCATGTTGACGGAGCAGGGACAAACGTTGTACGCCCGTGCC
+CGGAGCGAGGTAGAAGAGGCTTATCGAAAAATTGAGGCCGATTTCACGCCCGAAAAAACA
+CAGCAATTGATGCTGCTGCTGGACGATCTTATTGCTCTGGGGCGCCAGCATCCTGATAGC
+GAAGCGGAAGCATAGATGAGCGACACATCATCTGCACTTCCGGAAAGCCCCGAGTCTGTC
+GGTTCGCACAACGCGCTCAGCACGGGTCAACAAACCGTCATAAATAAACTGTTCCGCCGA
+CTGATCGTATTTTTATTCGTGTTGTTTATCTTCTCGTTTTTAGACCGTATCAACATCGGT
+TTTGCCGGGTTGACGATGGGGCAGGATCTGGGGTTAAGCGCCACCATGTTTGGTCTTGCC
+ACGACGCTGTTTTACGCCACCTACGTCATTTTCGGCATTCCCAGCAACGTGATGTTGAGC
+ATCGTCGGCGCCCGCCGCTGGATTGCGACCATTATGGTGCTATGGGGCATTGCATCTACC
+GCCACGATGTTCGCGGTGGGACCGAAAAGCCTGTATGTGCTGCGAATGCTGGTGGGCATT
+ACCGAAGCGGGCTTTTTGCCAGGAATATTGCTCTATTTAACCTACTGGTTCCCGGCATTT
+TTCCGCGCCCGCGCCAACGCATTATTTATGATTGCCATGCCGGCCACTACCGCGTTGGGG
+TCAATTGTCTCCGGCTATATTTTATCGCTGGACGGCATATTCAATCTGCATGGATGGCAG
+TGGTTATTCCTGTTGGAAGGATTTCCGTCAGTTTTGTTAGGCATTATGGTCTGGTTTTAC
+CTGGATGATACCCCGGCAAAAGCCAAATGGCTGACGGCAGAGGATAAAAAATGTTTGCAG
+GAGATGATGGATAATGATCGCCTGACGCTGGTTCAGCCTGAGGGGGCCATCAGCCATAAC
+GCCATGCAGCAGCGTAGCCTGTGGCGCGAAGTATTCACGCCAATTGTACTGATGTATACG
+CTGGCCTATTTTTGCCTTACCAATACGCTTAGCGCCATTAGTATCTGGACGCCGCAAATC
+CTGAAAAGTTTTAATGAAGGCAGCAGCAATATCACCATCGGCCTGCTGGCGGCGATCCCG
+CAGATTTGTACTGTTCTGGGCATGATTTACTGGAGCCGCCATTCGGACAAACATCAGGAG
+CGTAAACACCACACTGCGTTACCGTTCCTGTTTGCCGCCGCGGGCTGGCTGCTGGCGTCG
+GCGACCGACCGTAACCTGATCCAGCTCCTGGGGATCGTGATGGCATCCACGGGTTCCTTT
+AGCGCGATGGCGATCTTCTGGACCACGCCGGATCAGTCGATCAGTTTACGCGCCAGGGCG
+ATAGGCATTGCGGTCATCAATGCCACCGGCAATATTGGCTCCGCGCTCAGCCCGGTTATG
+ATTGGCTGGCTAAAAGATATCACTGGTAGCTTCAATAGCGGACTCTGGTTTGTCGCTTCT
+CTGTTAGTCGTCGGCGCCGCCATTATCTGGCTCATTCCCATGAAAGCATCGCGTCCGCGC
+GCCACCCCTTGAATGGGCAAGTTAGCGTTAGCAGCAAAAATTACCCACGTGCCGTCGATG
+TATCTTTCTGAACTGCCAGGAAAAAATCACGGTTGTCGTCAGGCAGCCATTGATGGGCAT
+ATTGAAATTGGCAAGCGTTGCCGCGAAATGGGCGTTGACACCATTATCGTATTCGACACC
+CACTGGCTGGTGAATAGCGCTTACCACATTAATTGTGCCGACCATTTCCAGGGCGTCTAT
+ACCAGCAACGAATTGCCGCACTTTATTCGCGACATGACCTATGACTATGACGGTAATCCG
+GCGCTCGGCCATCTGATCGCCGACGAGGCGGTCAAACTGGGCGTGCGCGCCAAAGCGCAC
+AACATCCCGAGCCTGAAGCTGGAGTATGGCACGCTGGTGCCGATGCGCTACATGAACAGC
+GACAAGCACTTCAAAGTGGTCTCCATCTCGGCGTTCTGCACTGTGCATGATTTTGCCGAC
+AGCCGCAAACTGGGCGAAGCCATTCTCAAGGCGATTGAGAAATATGACGGTACCGTAGCG
+GTATTCGCCAGTGGTTCTCTGTCGCACCGTTTTATTGACGACCAACGGGCGGAAGAGGGG
+ATGAACAGCTACACCCGCGAGTTCGATCATCAAATGGACGAGCGCGTGGTCAAGCTGTGG
+CGCGAAGGCAAATTCAAGGAGTTTTGCACCATGTTGCCGGAGTACGCCGACTACTGCTAC
+GGCGAAGGCAACATGCACGACACGGTCATGCTACTGGGAATGCTGGGGTGGGACAAATAC
+GACGGCAAGGTGGAGTTCATCACCGACCTGTTCGCCAGCTCCGGTACCGGCCAGGTAAAC
+GCTGTTTTCCCGCTGCCTGCGTAAATGAAGAAAATAAATCATTGGATTAACGGCAAAAAC
+GTTGCAGGTAACGACTACTTCCAGACCACTAACCCGGCGACCGGTGATGTGCTGGCGGAA
+GTAGCCTCCGGCGGTGAAGCAGAAGTGAACCAGGCTGTCGCGGCGGCAAAAGAGGCGTTC
+CCGAAATGGGCCAACCTGCCGATGAAAGAGCGCGCGCGCCTGATGCGCCGCCTTGGCGAC
+CTGATTGACCAGCATGTGCCGGAAATCGCGGCGATGGAAACCGCCGACACCGGCCTGCCT
+ATTCACCAGACTAAAACGTGCTGAGTGCTGATCCCGCGCGCCTCGCATAACTTCGAATTC
+TTCGCCGAAGTGTGCCAGCAGATGAACGGCAAGACCTATCCGGTTGACGATAAAATGCTC
+AATTATACGCTGGTGCAGCCCGTCGGCGTCTGCGCGCTGGTGTCGCCGTGGAACGTGCCG
+TTTATGACCGCGACTTGGAAAGTTGCGCCGTGCCTGGCGCTGGGTAACACCGCGGTGCTC
+AAAATGTCCGAGCTGTCGCCGCTGACTGCCGACAGGCTGGGCGAGCTGGCACTGGAGGCA
+GGAATTCCGGCAGGCGTGCTGAACGTGGTGCAGGGCTACGGCGCGACGGCGGGCGATGCG
+CTGGTACGCCACCATGACGTGCGTGCGGTGTCGTTTACCGGCGGTACCGCCACCGGTCGC
+AATATCATGAAAAATGCCGGGCTGAAAAAATACTCGATGGAGCTGGGCGGCAAATCGCCG
+GTGCTGATTTTTGAAGACGCCGACATTGAGCGCGCGCTGGACGCCGCGCTGTTCACCATC
+TTCTCGATCAACGGCGAACGCTGCACCGCTGGGTCGCGCATCTTTATCCAGCAGAGCATT
+TACCCTGAGTTCGTGAAGCGCTTTGCCGAACGCGCGAATCGCCTGCGTGTCGGCGATCCG
+ACCGACCCGAACACCCAGGTCGGCGCGCTGATTAGCCAACAGCACTGGGAGAAAGTCTCC
+GGTTATATCCGCCTCGGCATTGAAGAGGGGGCAACGCTGCTGGCGGGCGGTGCGGAAAAA
+CCCACTGACCTGCCTGCGCATCTGAAAGGCGGTAACTTCCTGCGCCCAACCGTGCTGGCC
+GATGTCGACAACCGTATGCGCGTTGCGCAGGAAGAGATCTTTGGGCCGGTCGCCTGCCTG
+CTGCCATTCAAAGACGAAGCGGAAGGGTTACGTTTGGCGAACGATGTGGAATACGGTCTG
+GCCTCTTATATCTGGACCCAGGACGTGAGCAAAGTGTTGCGCCTGGCGCGTGGGATTGAA
+GCCGGCATGGTCTTCGTCAACACCCAGAACGTCCGCGACCTGCGCCAGCCGTTCGGCGGC
+GTGAAAGCCTCCGGTACCGGGCGCGAAGGCGGCGAATATAGCTTCGAAGTGTTTGCGGAA
+ATGAAAAACGTCTGCATCTCAATGGGCGACCATCCTATCCCAAAATGGGGAGTTTGAATG
+CCGCACTTTATTGCTGAATGTACTGAAAATATTCGCGAGCAGGCTGATTTACCAAGCCTG
+TTCAGCAAGGTAAACGAGGCGCTGGCCGCCACCGGGATTTTCCCCATCGGCGGTATCCGC
+AGTCGCGCCCACTGGCTGGATACCTGGCAGATGGCTGACGGTAAGCATGATTACGCGTTT
+GTGCATATGACGCTGAAAATCGGCGCCGGGCGCAGCCTGGAGAGCCGTCAGGAAGTCGGC
+GAAATGCTGTTTGGGCTGATTAAAGCCCACTTCGCCGACCTGATGGAGAACCGCTATCTG
+GCGCTGTCGTTTGAGATTGCCGAGTTACATCCAACGCTCAATTACAAACAAAACAACGTA
+CACGCGTTATTTAAATAGATGCTCGATAAACAGACCCATACCCTGATCGCTCAGCGACTT
+AATCAGGCTGAAAAACAGCGTGAACAGATTCGCGCAGTGTCGCTGGATTATCCCAACATC
+ACTATTGAAGATGCCTATGCCGTACAGCGTGAATGGGTCAATATCAAGATTGCCGAAGGG
+CGCACGCTCAAAGGCCACAAAATCGGCCTGACCTCAAAAGCGATGCAGGCCAGCTCGCAA
+ATCAGCGAACCGGATTACGGCGCGCTGCTTGACGATATGTTCTTCCATGACGGCGGAGAT
+ATCCCCACCGACCGTTTTATCGTCCCGCGTATTGAAGTGGAGCTGGCGTTCGTGCTGGCG
+AAACCGCTGCGCGGCCCTCACTGCACGCTGTTCGACGTCTACAACGCCACGGATTATGTG
+ATTCCGGCGCTGGAACTGATTGACGCCCGCAGCCACAACATCGACCCGGAAACCCAGCGC
+CCGCGCAAAGTGTTCGACACCATTTCCGACAACGCCGCCAACGCCGGGGTGATCCTCGGT
+GGTCGCCCCATCAAACCAGACGAGCTGGATCTGCGCTGGATCTCCGCGCTGCTCTATCGC
+AACGGCGTGATCGAAGAAACCGGCGTCGCCGCAGGCGTGCTGAATCATCCGGCCAACGGC
+GTGGCGTGGCTGGCGAACAAGCTTGCCCCCTACGATGTCCAGCTTGAAGCCGGGCAGATC
+ATCCTCGGCGGCTCGTTCACCCGCCCGGTGCCGGCGCGCAAGGGCGACACCTTCCATGTC
+GATTACGGCAACATGGGCGCGATCAGTTGCCGGTTTGTGTAAATGAGCTCTGTACCCGCG
+CCGCGTGAATATTTTCTTGACTCTATCCGCGCATGGCTGATGTTGTTAGGGATTCCCTTT
+CATATCTCGTTGATCTATTCCACTCACAGTTGGCATGTCAATAGCGCCGCGCCATCGTGG
+TGGCTAACCCTGTTTAACGATTTTATCCACGCTTTTCGTATGCAGGTGTTTTTTGTTATT
+TCTGGTTATTTTTCGTACATGTTATTTTTACGTTATCCATTAAAACACTGGTGGAAAGTA
+CGGGTAGAACGTGTGGGTATTCCCATGCTTACCGCAATCCCTTTGCTTACCTTGCCGCAA
+TTTATCCTGTTGCAATATGTCAAAGAGAAAACAGAGAACTGGCCTACACTCTCTGCCTAT
+GAAAAATATAATACGTTAGCGTGGGAACTCATTTCACATCTGTGGTTTTTACTGGTGCTG
+GTGATATTAACCACCGTCAGCATCGGGATTTTTACCTGGTTCCAAAAAAGGCAGGAAACA
+AGCAAGCCTCGTCCCGCCGCTATTTCGCTGGCCAAACTTTCGCTTATTTTTTTCCTGCTG
+GGGGTGGCGTACGCTGCTATCAGGCGCATTATATTCATCGTATATCCGGCAATCCTCAGT
+GACGGCATGTTCAATTTTATTGTGATGCAAACGCTATTTTATGTGCCGTTTTTTATTCTC
+GGCGCGTTGGCCTTCATTCACCCCGATCTGAAAGCGCGCTTCACCACGCCCTCACGCGGA
+TGCACTTTAGGCGCTGCCGTTGCTTTTATCGCGTATCTGCTGAATCAACGTTATGGGAGC
+GGCGACGCCTGGATGTACGAAACCGAATCCGTGATTACGATGGTAATGGGGCTATGGATG
+GTGAACGTGGTATTTTCACTGGGGCATCGCTTGTTAAACTTTCAGTCCGCGCGTGTCACC
+TATTTCGTGAATGCTTCGCTGTTTATTTATCTGGTGCATCATCCCTTAACGCTTTTCTTT
+GGCGCGTATATTACACCGCATATCTCCTCCAACCTGATCGGGTTCTTGTGCGGGCTGATA
+TTTGTTATGGGTATTGCGTTAATTCTGTATGAAATTCATTTACGCATCCCGCTCCTGAAA
+TTTCTCTTTTCAGGTAAACCGCCGGTAAAACAAGAAAGCCGCGCCGCGATCGGGTAGATG
+AAACATAAACGACAAATGATGAAAATGCGTTGGTTGGGCGCAGCTATTATGTTAACGCTC
+TACGCATCATCGAGCTGGGCGTTCAGTATTGATGACGTGGCAAAACAAGCTCAATCTTTA
+GCCGGGAAAGGCTATGAGGCGCCTAAAAGCAACTTGCCCTCCGTTTTCCGCGACATGAAA
+TATGCGGATTATCAGCAGATCCAGTTTAACAGCGATAAAGCCTACTGGAACAACTTAAAG
+ACCCCTTTTAAGCTCGAATTTTACCATCAGGGGATGTACTTCGATACGCCGGTCAAGATT
+AACGAAGTGACGGCGACGACGGTCAAAAGAATCAAATACAGCCCGGATTACTTCAATTTT
+GGCAATGTTCAGCACGATAAAGACACGGTAAAAGATTTAGGCTTCGCCGGGTTCAAAGTC
+CTGTACCCCATTAACAGTAAAGATAAGAACGACGAAATCGTCAGTATGCTTGGCGCCAGC
+TATTTCCGCGTTATCGGCGCAGGCCAGGTGTATGGCTTATCTGCGCGCGGCCTGGCGATT
+GATACCGCCTTACCATCTGGTGAAGAGTTTCCCCGCTTTCGCGAGTTCTGGATTGAGCGT
+CCAAAACCCACCGATAAGCGTTTGACCGTCTATGCATTACTGGATTCTCCGCGCGCGACC
+GGCGCTTACCGTTTTGTGATCATTCCTGGCCGCGATACCGTGGTGGACGTGCAGTCAAAA
+GTCTATCTGCGCGATAAGGTGGGCAAGCTGGGCGTTGCGCCATTAACCAGTATGTTCCTG
+TTTGGGCCAAACCAGCCGTCGCCGACGACCAACTATCGTCCGGAATTGCATGACTCGAAC
+GGCTTATCCATTCATGCGGGTAATGGCGAGTGGATTTGGCGTCCGCTGAACAATCCAAAA
+CACCTCGCTGTGAGCAGCTATGCGATGGAAAACCCTCAGGGATTCGGCCTGTTGCAGCGT
+GGTCGCGAGTTCTCGCGCTTTGAAGATTTAGACGATCGCTATGACCTGCGTCCAAGCGCC
+TGGATTACCCCGAAAGGCGACTGGGGCAAAGGTAAGGTTGAACTGGTTGAAATTCCGACC
+AATGATGAAACCAACGATAACATCGTCGCTTACTGGACTCCGGATCAACTGCCGGAACCG
+GGTAAAGAGATGAACTTCAAGTACACTCTGACCTTCAGCCGCGATGAAGATAAACTTCAT
+GCGCCGGATAATGCCTGGGTGCTGCAAACACGCCGCTCAACGGGCGACGTTAAACAGTCG
+AATCTGATTCGCCAGCCCGACGGCACTATTGCCTTTGTGGTGGATTTCGTTGGCGCCGAC
+ATGAAAAAACTGCCGCCGGATACGCCCGTCGCTGCACAAACCAGCATTGGCGATAACGGT
+GAAATCGTTGACAGTAATGTACGCTATAACCCAGTCACTAAAGGCTGGCGTTTAATGCTG
+CGCGTGAAAGTCAAAGACGCGAAGAAAACCACGGAAATGCGTGCCGCATTGGTGAATGCC
+GATCAGACGCTAAGTGAAACCTGGAGCTACCAGTTACCTGCCAATGAATAAatgaataaa
+acaactgagtatattgacgcactgctgctttctgaacgtgagaaagcggcattgccgaaa
+actgacatccgcgccgtgcatcaggcgctggatgccgagcatcggacttactcgcgagaa
+gacgattcaccgcagggttccgtaaaagcccgccttgaacacgcctggccggattcattg
+gcgaaggggcagttaattaaagatgatgaagggcgcgatcagttgcaggctatgccaaaa
+gcgacgcgctcttcgatgtttcctgatccctggcgaaccaacccggttggccgtttctgg
+gatcgcctgcgtgggcgggatgtaacgccgcgctatgtttctcgtctgacaaaagaagag
+caggcgagtgagcaaaaatggcgtaccgtcggcactatacgccgctatattttgttaatt
+ttgactctggcgcaaaccgtcgtcgcgacctggtatatgaagaccattctgccctatcag
+ggatgggcgctcatcaatcctatggatatggtggggcaggatatttgggtctcctttatg
+cagctcctgccctacatgctgcaaaccggtatcctgattttgtttgccgtgctgttctgc
+tgggtgtctgccggattctggacggcgctgatgggcttcctgcaactgcttatcgggcgc
+gataagtacagtatctccgcgtctacggttggcgatgagcccctcaatccggaacaccag
+acggcgctgatcatgcctatctgtaatgaagacgttagccgcgttttcgccggtctgcgc
+gcgacctgggagtccgttaaagccacaggcaacgccgcgcattttgacgtctatatcctt
+agcgatagttataacccggatatctgcgtggcggagcaaaaggcgtggatggagctcatc
+gcggaagtgcagggcgaaggccaaattttttaccgtcgccgccgccgccgtatgaaacgc
+aaaagcggcaatattgacgatttttgccgccgctggggcaatcagtacagctatatggtg
+gtgctggacgcggactcagtgatgagcggcgagtgtctgagcgggctggtgcgcctgatg
+gaagcgaaccctaacgccgggattatccagtcttcgccgaaagcgtcggggatggatact
+ctgtatgcccgctgccaacagttcgcgacccgtgtttatggaccgctgtttaccgccggg
+ctgcacttctggcagttgggggagtcgcactactgggggcacaatgccattatccgcgtg
+aagccgtttatcgagcactgcgctctggcgccgctgccgggagaaggttcgttcgccgga
+tcgattctttcccacgactttgtggaggcggcgctaatgcgtcgggcagggtggggcgtc
+tggattgcctacgatctccccggctcctatgaagagctgccgccaaacctgctggatgag
+cttaaacgcgaccgccgctggtgtcacggcaacctgatgaactttcgtctgttcctggtg
+aaaggaatgcacccggtgcatcgcgccgtgttcctgaccggggtaatgtcatacctgtcc
+gcgccgttatggtttatgttcctcgcgctttctaccgcgctgcaggtcgttcatgcgtta
+acagagccgcaatatttccttcagccgcgccagctttttccggtctggccgcagtggcgt
+ccggaactggcaatcgcgctgtttgcgtcaacgatggtgctgctgttcctgccgaagctg
+ctcagtattatgctgatctggtgtaaaggcaccaaagagtatggcggtttctggcgcgtt
+acgctgtcgctattgctggaagtgctgttctccgtgttgctggcgccggtgcgtatgctg
+tttcataccgtgtttgtggtcagcgcgttcctcggctgggaagtggtctggaactcaccg
+caacgcgacgatgattctacgccgtggggagaagcctttatgcgtcacggctctcaactg
+ctgctggggctggtctgggcggtgggtatggcgtggctggatttacgctttctgttctgg
+ctggcgccgattgtcttttcgctgattctgtcgccatttgtttcggtgatctccagtcgt
+tcaacggtaggattacgcaccaaacgctggaagctgttcctgatcccggaagagtattcg
+ccgcctcaggtgttggtcgataccgataaatatctggagatgaatcgccgccgtattctg
+gacgatggctttatgcatgcggtatttaacccgtcgcttaatgcgctggcgaccgcgatg
+gccaccgcgcgtcaccgcgccagtaaggtgctggaaatagcccgcgatcgtcatgtggag
+caggcgctaaacgaaacgccggagaaactgaaccgcgatcggcgtctggttttgctcagc
+gatccggtgacgatggcgcgtttacactatcgggtctggaatgcgccagagagatactct
+tcctgggtaaaccattatcagtctctcgtcctgaatccgcaggcgttgcagggacgaaca
+tcgtcagcgggataaatgtcgcgcgtctcgcaggcgaggaacctgggtaaatattttctt
+ctcatcgataacatgttagtggtgctgggttttttcgtcgtcttcccgctcatctctatt
+cgctttgtcgatcaaatggggtgggctgccgtaatggtagggatcgcgctcggcctgcgt
+cagtttattcaacaaggtctgggcatttttggcggcgccatcgccgatcgctttggcgcg
+aaaccgatgattgtcaccggtatgctgatgcgcgccgcaggctttgccaccatgggtatc
+gcgcatgagccctggctcttgtggttttcctgctttctttccggtctcggcggtacgctt
+ttcgacccgccgcgttcagcgctggtggtcaaattaattcgtccggagcaacggggccgc
+ttcttctctctgttgatgatgcaggacagcgcgggcgcggtgattggcgcgctgctggga
+agctggttgctacaatacgattttcgcctggtctgcgcgacgggcgctattttgttcata
+ttatgcgcccttttcaacgcatggctgcttccggcctggaagctatcaacggccagaacg
+ccggtgcgtgaaggaatgcgccgcgtcatgagcaataaaaggtttgtcacctacgtgctg
+acgctggcgggctactatatgctggcggtacaggtcatgttaatgctgccgattatggta
+aacgatatcgccggttcgcctgctgccgtgaaatggatgtacgctattgaggcgtgtctc
+tcgctgacgttgctctacccgattgcccgctggagcgaaaagcgttttcggctggagcat
+cggctgatggccggtttgctcgtcatgtcgctgagcatgctccccatcgggatggtgggc
+aatttacagcagctttttacgcttatttgcgctttctacatcggctcggttatcgccgaa
+ccggcgcgcgaaacgctcagcgcgtcgcccgcggacgcgagggcgcggggaagctatatg
+ggctttagccgtctgggattagccattggcggcgcgattagttatatcggcggcggctgg
+ttgtttgatatgggtaaagcgcttgcgcagcctgaactaccgtggatgatgctcggtatt
+atcggctttatcacctttttggctttaggctggcaatttagtcataagcgcacgccgcgc
+cggatgctggaacccggcgcctgaatgaccatgtatgccacgctggaagaagctatcgat
+gcagcccgggaagaatttctggctgaccatccaggcctcgaacaagacgaagcgaatgtg
+cagcagttcaacgttcagaaatatgtactgcaggatggggacatcatgtggcaggtcgaa
+tttttcgccgatgaaggtgaagatggcgaatgtctgccgatgctgagtggtgaagccgca
+cagagcgtgtttgacggcgattatgatgagatagagatccgccaggaatggcaggaagag
+aatactttgcatgaatgggatgaaggggaattccagcttgaacccccgcttgataccgag
+gaaggccgtactgcggcagacgaatgggatgagcgttaaATGTCACTATTAGCCAGGCTG
+GAACAAAGTGTACACGAAAACGGTGGGCTGATTGTCTCATGCCAACCGGTACCAGGCAGC
+CCTATGGATAAACCTGAAATTGTGGCTGCAATGGCACAGGCAGCGGCTTCGGCGGGTGCG
+GTCGCTGTGCGCATTGAAGGCATTGAGAATCTGCGGACTGTTCGTCCCCATCTTTCTGTT
+CCTATTATTGGGATAATTAAACGTGACCTTACAGGGTCGCCAGTCCGTATCACTCCATAT
+TTACAGGATGTTGACGCCCTGGCGCAGGCAGGTGCCGATATTATCGCTTTTGATGCCTCA
+TTCCGCTCTCGCCCGGTTGATATTGATAGTTTACTGACACGTATTCGCCTGCATGGATTA
+CTGGCGATGGCAGACTGTTCAACCGTGAATGAAGGCATAAGTTGCCATCAGAAAGGAATC
+GAATTCATTGGTACAACACTGTCTGGCTATACCGGTCCCATCACGCCGGTTGAGCCAGAT
+TTGGCAATGGTGACACAACTGAGTCATGCAGGTTGTCGTGTTATTGCCGAGGGGCGCTAT
+AACACGCCTGCACTGGCGGCCAATGCTATTGAGCATGGTGCCTGGGCAGTTACCGTTGGT
+TCCGCTATCACCCGTATCGAGCATATCTGTCAGTGGTTCAGTCACGCAGTAAAACGCTGA
+ATGAAAAATTTTAAGAAAATGATGACGCTAATGGCGCTATGTTTATCAGTTGCTATCACC
+ACATCAGGATATGCAACCACGCTTCCTGATATACCAGAACCACTGAAAAATGGTACTGGC
+GCTATTGATAATAATGGCGTGATTTATGTCGGCTTAGGTACCGCAGGGACATCCTGGTAT
+AAAATTGATCTTAAAAAGCAACATAAAGACTGGGAGCGTATAAAGTCGTTTCCTGGTGGA
+GCTCGTGAGCAATCCGTGTCGGTATTTTTAAATGATAAGCTGTATGTTTTTGGTGGCGTA
+GGGAAAAAAAACAGTGAATCACCGTTGCAGGTTTATAGCGATGTGTACAAATACTCACCG
+GTGAAAAATACATGGCAAAAAGTTGATACTATATCTCCAGTTGGATTAACAGGGCATACG
+GGAGTAAAATTAAACGAAACGATGGTACTTATTACCGGAGGGGTTAATGAGCATATCTTT
+GATAAGTATTTTATTGATATAGCGGCTGCGGATGAAAGTGAAAAAAATAAAGTCATCTAT
+AATTATTTTAATAAACCTGCCAAAGATTATTTTTTTAATAAAATCGTATTTATCTACAAT
+GCTAAAGAGAACACATGGAAGAATGCCGGTGAGCTGCCAGGCGCGGGGACGGCAGGATCG
+TCATCGGTAATGGAAAATAATTTCTTGATGCTGATTAATGGTGAGCTCAAACCGGGTTTA
+CGTACCGATGTGATTTACCGCGCCATGTGGGATAACGATAAGCTAACATGGTTGAAGAAC
+AGCCAGTTACCGCCATCGCCTGGAGAACAACAGCAGGAAGGGTTGGCCGGAGCATTTTCG
+GGCTATAGCCACGGTGTCCTGCTTGTCGGTGGTGGCGCGAATTTTCCGGGAGCAAAACAA
+AATTATACTAATGGAAAGTTTTATTCCCACGAAGGGATAAATAAAAAATGGCGAGATGAA
+GTCTATGGTTTGATTAATGGCCATTGGCAATATATGGGTAAAATGAAACAACCTCTCGGC
+TATGGTGTATCAGTAAGTTATGGTGATGAAGTTTTCCTTATTGGTGGTGAAAATGCTAAA
+GGGAAACCTGTTTCGTCTGTAACCTCCTTTACCATGCGTGATGGTAATTTATTAATAAAA
+TAAGTGATAGCAAAATTCTTCCCGTGGTATAGCGAGATAACACGTCCACAAAAAAATGCT
+TTATTTTCAGCATGGCTGGGTTACGTTTTTGATGGCTTCGACTTTATGCTGATTTTCTAC
+ATTATGTATCTGATCAAGGCTGACTTAGGATTGACAGATATGGAGGGCGCATTCCTTGCC
+ACAGCGGCCTTTATTGGGCGACCATTTGGCGGGGCGCTATTTGGTCTGCTGGCAGACAAA
+TTTGGCCGTAAGCCGTTAATGATGTGGTCGATAGTTGCCTATTCTGTAGGTACAGGGTTA
+AGTGGCCTGGCTTCCGGTGTAATTATGCTGACGCTTAGTCGTTTTATTGTCGGTATGGGG
+ATGGCGGGGAAGTATGCTTGCGCTTCTACTTATGCCGTGGAAAGTTGGCCAAAGCATTTA
+AAATCTAAAGCGAGCGCATTTCTGGTTTCAGGTTTCGGTATTGGTAACATCATAGCAGCC
+TATTTTATGCCGTCATTTGCCGAAGCGTATGGTTGGCGTGCTGCTTTTTTTGTCGGTTTG
+CTACCCGTTCTTTTAGTAATCTACATCCGGGCCAGGGCTCCTGAATCTAAAGAGTGGGAA
+GAAGCCAAACTCAGTGGTCTCGGAAAGCATTCACAAAGTGCCTGGTCAGTTTTCTCTTTG
+TCAATGAAAGGGCTATTTAATCGAGCTCAATTTCCACTGACATTATGTGTATTTATTGTT
+CTGTTCTCTATTTTCGGCGCAAACTGGCCGATCTTTGGTCTACTGCCTACATATTTGGCG
+GGAGAGGGCTTTGATACGGGCGTGGTCTCTAATTTAATGACGGCGGCGGCATTCGGCACT
+GTATTGGGAAATATCGTTTGGGGTCTGTGCGCAGATAGAATTGGTTTGAAGAAAACGTTC
+AGCATTGGTCTTCTCATGTCCTTTTTATTCATTTTCCCGTTATTCAGAATTCCGCAAGAT
+AATTATTTACTGCTGGGCGCATGTTTATTCGGTTTAATGGCGACTAACGTAGGTGTTGGC
+GGGCTGGTTCCCAAATTTCTCTACGACTACTTTCCTCTTGAGGTTCGTGGTTTGGGTACC
+GGGCTTATTTATAATCTTGCTGCGACATCAGGCACATTCAATTCAATGGCGGCGACCTGG
+CTTGGAATAACAATGGGGCTAGGCGCTGCGCTAACGTTCATTGTTGCTTTCTGGACCGCA
+ACAATTCTACTCATTATTGGCCTATCCATTCCGGATAGACTAAAAGCACGTCGTGAAAGG
+TTTCAGTCAACAAAAGAATTTTAAATGAAAAAGTATCTTGCTTTCGCCGTTACGCTGCTG
+GGTATGGGTAAAGTCATCGCCTGTACTACCCTTTTGGTAGGCAATCAGGCTTCGGCTGAC
+GGCTCCTTTATTATCGCGCGCAACGAGGATGGCTCGGCAAATAACGCCAAGCATAAGGTT
+ATTCATCCCGTCGCGTTTCATCAACAAGGCGAGTATAAAGCACATCGCAACAATTTTAGC
+TGGCCGCTTCCGGAGACAGCGATGCGCTATACGGCGATTCATGACTTTGATACTAACGAT
+AACGCCATGGGTGAAGCCGGTTTCAATTCGGCGGGCGTCGGAATGAGCGCAACGGAAACC
+ATTTACAACGGCAGAGCGGCGCTGGCTGCCGATCCTTACGTGACAAAAACGGGAATCACG
+GAAGACGCCATTGAGTCCGTGATCCTGCCAGTGGCGCAATCGGCGCGTCAGGGCGCCAAA
+TTACTGGGAGATATTATTGAACAAAAAGGCGCGGGCGAAGGTTTCGGCGTCGCGTTTATT
+GATAGCAAAGAGATATGGTATCTGGAGACGGGAAGCGGACATCAATGGCTGGCAGTACGA
+CTTCCGGCAGATAGCTATTTCGTTTCCGCCAATCAGGGACGTTTACGCCATTACGATCCG
+AATGATAACGCGAATTATATGGCGTCACCAACGTTAGTAAGCTTTGCGAAAAAGCAGGGA
+TTATATGATCCGGCCCGCGGCGAATTCGACTTTCATCAAGCCTATTCGCAGGATAACAAA
+AACGATACCACCTATAATTATCCGCGCGTCTGGACGCTACAACACCAGTTTAATCCGCAT
+CTGGATACGGTCGTTAGCGAAGGGGAAACATTTTCTGTTTTTTTAACGCCAATAACGAAG
+ATCAGCGTGGCGGCAGTAAAAAACGCGTTACGCAATCACTATCAGGGAACGTCGCACGAC
+CCTTATGCCAGTCATAATCCACAAGAACCATGGCGACCTATATCCGTTTTTCGTACCCAG
+GAGTCACATATTTTACAGGTCAGACCGAAATTACCGCAGGCTATCGGCAACGTAGAATAC
+ATCGCCTATGGAATGCCATCTCTTAGCGTCTATCTCCCCTATTACCAGGGGATGCGTCAT
+TATCAACCCGGAGATGATAAAGGAACCGATCGGGCGAGCAACGACTCTACCTACTGGACA
+TTCCGCACGCTGCAAACACTGGTTATGCAAGACTACAATACGTTTGCGCCAGATGTGCAA
+CATGCCTGGAAAACATTTGAACAGCAAACAGCTAAGCAACAGTATAAGATGGAGCAGAGC
+TATCTGAGATTATATGCGTCGCATCCGAAAGAAGCACAACGCTTACTGCAAAATTTTGAA
+GATAAAACGATGCAAAATGCGCAGACGCTCGCCCGTCGCCTGACCAATAATATTATTACG
+ACAATGACTTACCGCACAGATATGAAATATCACTTTTCAAGTACGCAGCCATAAATGGGA
+AGACAAAAAGCAGTGATCAAAGCTCGTCGTGAAGCAAAGCGTGTGTTGAGACGAGATTCG
+CGTAGTCATAAGCAACGTGAAGAAGAATCGGTCACGTCACTGGTACAGATGGGCGGAGTA
+GAAGCCATTGGCATGGCGCGCGATAGTCGCGATACCTCTCCTGTTAAGGCGCGAAATGAA
+GCACAGGCGCATTATCTGAACGCTATCGACAGTAAACAGCTTATTTTTGCGACCGGCGAA
+GCCGGCTGCGGAAAAACATGGATCAGTGCGGCAAAGGCGGCAGAAGCATTGATTCATAAG
+GACGTCGAGAGGATCATTGTGACGCGTCCGGTATTGCAGGCTGATGAAGATCTTGGTTTT
+TTGCCCGGTGATATCGCTGAAAAATTCGCGCCTTATTTTCGTCCCGTCTACGATGTCCTG
+CTTAAACGGTTGGGCGCGTCCTTTATGCAATATTGTTTGCGCCCGGAAATCGGTAAGGTA
+GAAATTGCCCCGTTCGCCTATATGCGTGGGCGTACTTTTGAAAATGCGGTCGTGATCCTC
+GACGAGGCGCAAAATGTGACTGCGGCGCAAATGAAAATGTTTTTGACGCGATTAGGCGAA
+AATGTCACGGTCATTGTCAATGGCGATATTACGCAATGCGACCTGCCGCGCGGTGTGCGT
+TCCGGGTTGAGTGATGCGTTGGAACGCTTTGAAGAAGATGAAATGGTGGGGATTGTGCAT
+TTCAACAAAGACGACTGCGTGCGCTCGGCGCTTTGTCAGCGAACGCTCCACGCATACAGC
+TAAATGGGAACCACCACGATGGGGGTTAAGCTGGACGACGCCACGCGCGAACGGATCAAA
+ATGGCCGCGTCGCGTATCGATCGCACGCCGCACTGGTTAATAAAACAGGCAATCTTTAGC
+TATCTGGACAAGCTGGAAAATAGCGATACGCTACCGGAGCTACCTGCGCTGTTTGCCGGC
+GCGGCAAATGAAAGCGAGGAGCCGGTCGCGCCGCAGGATGAGCCGCATCAGCCCTTTCTG
+GAGTTTGCCGAACAGATTCTTCCCCAATCCGTCTCTCGCGCCGCCATCACCGCCGCCTGG
+CGCCGCCCGGAAACCGATGCGGTGTCAATGCTAATGGAACAGGCGCGCCTGTCGCCGCCT
+GTCGCTGAGCAGGCGCATAAACTGGCGTATCAACTGGCGGAGAAATTGCGCAATCAAAAA
+TCCGCCAGCGGTCGCGCGGGTATGGTGCAAGGCCTGTTGCAGGAGTTTTCCCTCTCTTCG
+CAAGAAGGCGTAGCGCTGATGTGTCTGGCGGAAGCGCTGCTGCGTATTCCCGACAAAGCT
+ACGCGCGATGCGTTAATTCGCGACAAAATCAGTAATGGCAACTGGCAGTCGCATATTGGC
+CGTAGCCCGTCGCTGTTTGTAAACGCCGCCACCTGGGGGCTGCTCTTTACCGGCCGACTG
+GTCTCAACGCATAACGAAGCCAATCTTTCGCGCTCGCTGAACCGCATTATCGGCAAGAGC
+GGCGAACCGTTAATCCGCAAAGGCGTCGACATGGCGATGCGTTTAATGGGCGAGCAGTTC
+GTGACTGGCGAAACCATTGCTCAGGCGCTGGCGAATGCCCGAAAACTGGAAGAGAAAGGG
+TTCCGCTATTCTTACGATATGCTGGGCGAAGCCGCGTTAACCGCCGCCGATGCGCAGGCC
+TATATGGTCTCTTACCAGCAAGCGATTCATGCCATCGGCAAAGCGTCTAACGGTCGCGGT
+ATTTACGAAGGGCCAGGCATCTCGATTAAGCTGTCCGCCCTGCATCCACGCTATAGTCGC
+GCGCAATACGATCGGGTAATGGAGGAGCTTTATCCGCGCCTGAAATCCCTGACGCTGCTG
+GCGCGCCAGTATGATATCGGTCTCAATATCGACGCCGAAGAGGCGGATCGTCTGGAGATC
+TCGCTTGATCTGCTGGAAAAACTCTGCTTCGAACCCGAACTGGCGGGCTGGAACGGCATT
+GGCTTTGTGATTCAGGCTTACCAGAAACGCTGCCCGCTGGTCATTGATTATTTAGTCGAT
+CTGGCCTCCCGTAGCCGCCGTCGGCTGATGATTCGTCTGGTGAAAGGCGCCTACTGGGAT
+AGCGAGATCAAACGCGCGCAAATGGAAGGGCTGGAGGGCTATCCAGTTTATACCCGCAAA
+GTGTATACCGATGTCTCTTATCTGGCCTGCGCGAAAAAACTGCTCGCCGTCCCTAATCTG
+ATCTACCCGCAGTTCGCGACCCATAACGCTCACACACTGGCGGCGATTTATCATCTGGCC
+GGGCAAAATTACTATCCGGGTCAGTACGAATTCCAGTGCCTGCACGGCATGGGAGAACCG
+CTGTATGAACAGGTCACCGGTAAAGTGGGGGACGGAAAACTTAACCGTCCCTGCCGTATT
+TACGCGCCGGTGGGAACACACGAAACCCTGCTGGCCTATCTGGTACGACGCCTGCTGGAA
+AACGGCGCCAACACCTCTTTTGTCAACCGCATCGCCGATGCCACCCTACCGCTCGATGAA
+CTGGTGGCCGACCCGGTCGAGGCCGTGGAAAAACTGGCGCAGCAGGAAGGTCAGGCTGGC
+ATACCGCATCCAAAAATTCCGCTGCCGCGCGATCTGTACGGCGAAGGTCGGATAAACTCC
+GCCGGACTTGATTTAGCGAATGAACATCGCCTCGCCTCGCTTTCTTCTGCCCTGTTAAGC
+AACGCCATGCAGAAATGGCAGGCCAAACCTGTGCTGGAACAACCGGTGGCCGACGGTGAG
+ATGACGCCGGTTATCAACCCGGCGGAACCGAAAGATATTGTTGGCTGGGGACGCGAAGCG
+ACAGAAAGCGAGGTTGAACAGGCGTTGCAAAACGCGGTCAATCAGGCGCCGGTTTGGTTT
+GCGACGCCGCCGCAAGAACGCGCCGCTATTTTGCAGCGGGCGGCGGTATTGATGGAAGAC
+CAAATGCAGCAGTTGATTGGCCTGTTGGTGCGTGAAGCGGGGAAAACGTTCAGCAACGCC
+ATTGCCGAAGTGCGCGAAGCGGTAGACTTCCTCCATTATTATGCCGGTCAAGTGCGTGAC
+GATTTCGATAACGAAACGCATCGCCCGTTAGGGCCGGTGGTCTGTATCAGTCCGTGGAAC
+TTTCCGCTGGCCATTTTCACTGGCCAAATCGCCGCCGCGCTGGCGGCAGGTAACAGCGTT
+CTGGCGAAACCGGCAGAGCAGACATCGCTGATTGCCGCCCAGGGCATTGCCATTTTGCTG
+GAAGCGGGCGTACCGCCGGGCGTCGTGCAACTGTTGCCGGGACGGGGAGAAACCGTCGGC
+GCCCAGCTTACCGCCGATGCGCGTGTACGCGGCGTGATGTTTACCGGTTCCACGGAGGTC
+GCGACGTTGTTGCAGCGCAACATCGCCACGCGTCTTGACGCCCAGGGGCGCCCTATTCCG
+TTGATTGCGGAAACCGGCGGTATGAACGCTATGATTGTCGACTCTTCCGCGCTCACCGAG
+CAGGTGGTCGTGGATGTGCTGGCTTCCGCCTTCGACAGCGCCGGACAACGCTGTTCCGCG
+CTCCGCGTGCTGTGTTTGCAGGACGATATCGCCGAACATACGCTGAAAATGTTACGCGGC
+GCGATGGCGGAGTGTCGGATGGGGAATCCAGGCCGTCTGACGACCGATATCGGGCCGGTG
+ATCGATAGCGAGGCCAAAGCCAACATTGAACGTCATATCCAGACGATGCGCGCCAAAGGC
+CGCCCGGTTTTCCAGGCCGCGCGTGAAAACAGCGATGACGCGCAGGAATGGCAGACCGGT
+ACGTTTGTTATGCCCACGCTTATTGAGCTGGAAAACTTCGCAGAACTGGAAAAAGAGGTC
+TTCGGGCCCGTGCTGCACGTCGTGCGTTATAACCGTAACCAACTGGCGGAGCTTATCGAA
+CAGATTAACGCTTCCGGCTACGGGCTAACGCTGGGCGTACATACCCGTATTGATGAAACC
+ATTGCGCAAGTCACCGGTTCCGCCCATGTCGGCAACCTGTACGTTAACCGTAATATGGTG
+GGCGCGGTCGTCGGCGTCCAGCCGTTTGGCGGCGAAGGCCTGTCCGGCACCGGGCCAAAA
+GCGGGAGGGCCGCTCTATCTCTACCGCCTGCTGGCACACCGCCCGCCCAATGCGCTCAAT
+ACGACGCTGACTCGTCAGGATGCGCGTTACCCGGTGGATGCGCAGCTTAAAACCACGCTA
+CTCGCGCCGTTGACCGCTCTGACGCAATGGGCGGCGGATCGCCCGGCGCTACAGACGCTC
+TGCCGACAATTCGCCGATCTGGCGCAGGCCGGCACGCAGCGCCTGCTACCGGGGCCGACC
+GGCGAGCGTAATACCTGGACGCTGTTGCCGCGTGAACGGGTGTTATGCCTGGCTGATGAT
+GAACAGGACGCGTTGACGCAGCTTGCCGCCGTTCTCGCCGTCGGCAGTCAGGCGCTATGG
+TCAGACGACGCCTTCCACCGCGATCTGGCGAAACGTCTCCCCGCCGCCGTCGCGGCGCGT
+GTCCAGTTTGCGAAAGCGGAAACGCTGATGGCGCAGCCGTTTGACGCGGTGATTTTCCAC
+GGCGACTCCGACAAGCTGCGAACCGTGTGCGAAGCCGTCGCCGCCCGCGAAGGCGCGATA
+GTGTCGGTACAGGGGTTCGCCCGCGGCGAAAGCAATATGCTGCTGGAACGGCTCTATATT
+GAACGTTCGCTGAGCGTAAACACTGCCGCCGCTGGCGGTAATGCCAGCCTGATGACAATT
+GGCTAAATGGCTATTAGCACACCGATGTTGGTGACATTCTGTGTCTATATTTTTGGCATG
+ATATTGATTGGGTTTATCGCCTGGCGCTCAACCAAAAACTTTGATGACTATATTCTTGGC
+GGTCGCAGCCTGGGGCCGTTTGTTACGGCTTTATCAGCCGGCGCGTCGGATATGAGCGGC
+TGGCTGTTAATGGGGCTGCCTGGCGCTATCTTTCTGTCGGGGATCTCTGAAAGCTGGATC
+GCCATTGGCCTGACGTTAGGCGCATGGATTAACTGGAAGCTGGTGGCCGGGCGCCTGCGC
+GTGCATACCGAATTTAACAATAACGCGCTCACGCTGCCGGACTATTTTACCGGTCGGTTT
+GAGGATAAGAGCCGAGTCCTGCGTATTATTTCCGCGCTGGTCATTCTGCTGTTTTTCACT
+ATCTATTGCGCATCAGGTATTGTCGCTGGGGCACGACTGTTCGAAAGCACCTTCGGTATG
+AGCTATGAAACCGCACTGTGGGCGGGGGCCGCGGCAACCATTATTTATACCTTTATCGGC
+GGGTTTCTTGCCGTTAGCTGGACGGATACCGTTCAGGCCAGCCTGATGATTTTTGCGTTA
+ATCCTGACGCCGGTGATGGTTATTGTCGGCGTAGGCGGTTTTAGCGAGTCGCTGGAAGTG
+ATCAAGCAAAAGAGCATCGAGAATGTCGACATGCTCAAGGGGCTGAATTTTGTCGCTATT
+ATTTCTCTGATGGGCTGGGGGCTGGGTTACTTCGGTCAGCCGCATATCCTGGCGCGCTTT
+ATGGCGGCGGATTCCCATCACAGTATTGTTCATGCGCGTCGTATCAGTATGACCTGGATG
+ATTCTGTGTCTGGCGGGCGCGGTGGCGGTGGGCTTCTTTGGCATTGCGTACTTTAACAAT
+AACCCCGCGCTGGCCGGGGCGGTGAACCAAAACTCAGAACGCGTATTTATTGAACTGGCG
+CAGATCCTGTTTAACCCGTGGATTGCCGGTGTTCTGCTGTCTGCTATCCTGGCGGCGGTG
+ATGTCGACGTTGAGCTGTCAGTTGCTGGTATGCTCCAGCGCGATTACGGAAGATTTATAT
+AAGGCTTTTCTGCGTAAAAGCGCCAGCCAGCAAGAGCTGGTATGGGTAGGGCGAGTGATG
+GTGCTGGTGGTAGCGCTGATCGCCATTGCGCTGGCGGCGAATCCTGATAACCGTGTGCTG
+GGGCTGGTGAGCTACGCCTGGGCTGGATTCGGCGCGGCATTTGGACCTGTTGTCCTGTTT
+TCTGTGATGTGGTCGCGTATGACACGTAACGGCGCGCTGGCGGGAATGATTATTGGCGCG
+GTGACGGTTATCGTCTGGAAACAATATGGCTGGCTGGATCTGTATGAGATTATCCCTGGC
+TTCATTTTCGGCAGCCTGGGGATCGTAATCTTTAGCCTGCTTGGCAAAGCGCCGACAGCA
+ACGATGCAGGAACGCTTTGCAAAAGCGGACGCGCATTATCATTCCGCGCCGCCGTCGAAG
+CTACAGGCGGAATAAATGGCGGGTAAACTGCGGCGTTGGCTGCGTGAAGCCGCGGTTTTT
+CTGGCGCTCCTCATCGCGATAATGGTGGTCATGGACGTCTGGCGCGCGCCGCAGGCGCCT
+CCGGCGTTTGCCGCGACACCATTACATACGCTGACGGGAGAGTCGACAACTCTGGCGACC
+TTGAGCGAGGAACGCCCCGTACTGCTCTATTTTTGGGCCAGCTGGTGCGGGGTATGCCGC
+TTTACCACGCCTGCGGTCGCTCACCTGGCGGCGGAAGGGGAAAACGTCATGACCGTTGCG
+CTCCGCTCCGGCGGTGATGCTGAGGTTGCCCGCTGGCTGGCGCGCAAGGGCGTTGACTTC
+CCGGTCGTCAATGATGCTAACGGCGCCTTATCCGCTGGCTGGGAAATCAGCGTGACGCCA
+ACGCTGGTGGTGGTTTCACAAGGTCGGGTTGTGTTCACCACCAGCGGCTGGACCAGCTAT
+TGGGGCATGAAGCTTCGGCTGTGGTGGGCAAAAACGTTCTGAATGATGAAAAAAAGCGTC
+GCTATGCTGGCGGTTTGTATGCTGGCGCAAAGCCACCTTGCCATTGCTGCCGGTGCTCCT
+GCGCCTCAAGAGATCAACATTGTTTTACTGGGCACCAAAGGCGGGCCTTCTTTGCTCAAT
+ACAGCCAGACTACCGCAAGCGACGGCGCTCACTATCGGCGATAAGATATGGCTGATAGAT
+GCCGGCTACGGCGCCAGTCTGCAACTGGTGAAAAATGGCATTCCACTGCGCAACATCAAT
+ACTATTTTGCTCACCCATCTGCACAGCGACCACATACTGGATTATCCTTCCTTGCTGATG
+AATGCCTGGGCAAGTGGCCTGAAAGACCATACCATACAGGTTTATGGCCCGCCGGGAACC
+CAGGCGATGACGAAGGCTAGCTGGAAGGTCTTTGACAGGGATATCACGTTACGCATGGAA
+GAAGAGGGGAAACCCGATCCGCGCAACCTGGTTAAGGCGACCGATATCGGCCAGGGCGTC
+ATCTATAAAGATGAACTGGTCACAATAAGCGCGCTGAAAGTGCCTCATTCCCCTTTCCCG
+GACGGTGAAGCGTTTGCTTACCGTTTTGATACTCAGGGTAAGCGAATCGTCTTCTCTGGC
+GATACGTCCTGGTTTCCTCCGCTTGCAACGTTTGCCCAGGGGGCGGATATCCTGGTACAT
+GAGGCGGTACATGTCCCTTCGGTAGCAAAACTGGCTAATAGTATTGGCAACGGAAAAACG
+CTGGCTGAAGCGATTGCGTCGCATCACACCACGATTGAAGATGTCGGTAAGATTGCTCGC
+GAGGCCCACGTGAAAAAACTGGTGTTAAGTCATCTGGTGCCTGCGACGGTTGCGGATGAC
+GTCTGGCAACAGGAAGCCATGAAAAATTACCCGGGCCCTGTCATTGTCGGTCATGACAAT
+ATGACGATAAGCGTACCGTAAATGTCGCAACGCACAGAGAAAAAAATCGGGAAACGTTCG
+CAGGCCACCGGTGCAAAACGGCAGCTTATCTTAACCGCCGCGCTTGCCGTTTTTTCCCAG
+TATGGCATTCATGGCGCGCGTCTTGAACAGGTCGCCGAGCGGGCAGGCGTCTCCAAAACC
+AATCTGCTTTATTATTATCCCTCGAAAGAGGCGCTGTATGTCGCGGTAATGCGACAGATT
+CTGGATGTCTGGTTGGCGCCGCTCAAGGCGTTTCGCGCAGAATTTTCCCCTCTGGAGGCC
+ATCAAAGAGTATATCCGTCTCAAGCTGGAGGTTTCGCGTGATTATCCGCAGGCGTCGCGG
+CTCTTCTGCATGGAGATGCTGGCGGGCGCGCCGCTCTTAATGGATGAACTGACCGGCGAT
+CTAAAAGCGTTGATAGATGAAAAATCCGCGCTGATTGCCGGATGGGTGCACAGCGGGAAA
+CTCGCGCCCGTTTCTCCGCATCATTTGATCTTCATGATTTGGGCCGCCACGCAACATTAC
+GCCGATTTCGCCCCTCAGGTTGAAGCGGTAACCGGCGCGACGCTTCGCGATGAAGCCTTT
+TTCAACCAAACGGTCGAAAGCGTTCAGCGCATTATTATTGAAGGGATTCGCGTGCGTTAA
+ATGGCGAAACAACAACGGATGGGCTGGTGGTTTCTTTGCCTTGCATGTGTCGTGGTAATG
+GTTTGTACCGCGCAACGCATGGCGGGCCTGCACGCCTTGCAGATGCAGGCGACGGCCTCT
+GCTGCGGTGGTCAGCGCTCCCTCCTCGACAGATGACGGCTCGCCGGTCACTCCCTGCGAA
+TTAAGCGCCAAGTCGCTGCTGGCGGCGCCTCCAGTACTCTTTGAAGGTGCTATCCTTGCG
+CTTTATCTACTGCTTTCCTTACTGGCGCCTGTCCGGGTCATGCGCCTGCCGTTTTCGCCT
+CCACGGGCTATTTCGCCGCCCACATTACGGGTACATCTACGATTTTGTGTCTTCCGTGAA
+TGAATGATGATTTTATTCAGGCGGATACTGTTCTGCCTGTTATGGCTTTGGCTGCCCGTC
+TCCTGGGCGGCGGAAAGCGGCTGGCTGCGTTCGCCCGATAACGACCATGCCAGCATACGG
+CTACGTGCCGATACGTCCGCTAACAGTGAGACCCGGCTGTTGCTGGATGTCAAACTGGAA
+AACGGCTGGAAAACCTACTGGCGCGCGCCGGGGGAAGGGGGCGTGGCACCCTCTATCGCC
+TGGAAAGGCGACATGCCTGAGGTAAGCTGGTTCTGGCCAACCCCCTCGCGCTTTGATGTG
+GCGAATATCACCACCCAGGGATATCACGACGAGGTGACCTTTCCGATGATCGTGCGCGGT
+ACGCCGCCGGCGACCTTGCGCGGTGTGTTGACGTTATCAACCTGCAGCAATGTTTGTCTG
+TTGACCGATTACCCCTTTTCCGTGACGCCCACTGTGCAGAATGCCGATTTTGCCCATGAC
+TATGCGCGGGCGATGGGTAAAGTTCCGCTCCGCAGTGGGCTAACGGACTCGCTTGACGTT
+GGCTATCGCCCGGGAGAACTGGTGGTCACTGCTACGCGAGCGGCGGGCTGGTCATCGCCC
+GGGCTCTATCTTGACACCATAGATGACGTCGATTTTGCGAAGCCTCGCCTGCGCGTAGAG
+GGCGACAGGTTACAGGCGACGGTGCCGGTGACGGACAGTTGGGGCGAAAAGGCGCCCGAT
+TTGCGCGACAAATCGCTGACCCTCGTGTTAGCCGATGGCGCTATCGCCCAGGAGAGCACG
+CAAACCATTGGCGCTGCGCCAGCGCAAACGCCGGACAATGCGGCGCTACCTTTCTGGCAA
+GTTGTAATGATGGCGCTAATCGGCGGACTGATTCTTAATTTAATGCCCTGCGTACTGCCT
+GTTCTGGGCATGAAACTTGGCTCTATTTTATTGGTAGAGGAAAAAAGCCGCTCTCACATC
+AGGCGACAATTTTTGGCTTCGGTCGCCGGTATCATTGCGTCATTTATGGCGCTGGCGGCG
+TTTATGACCCTCCTTCGCCTGTCAAACCATGCGCTGGCCTGGGGAGTCCAGTTCCAGAAT
+GCATGGTTTATTGGTTTTATGGCGCTGGTGATGTTGTTGTTTAGCGCCAGCCTGTTCGGG
+CTTTTTGAGTTCAGGCTTCCCTCATCTATGACCACGAAACTGGCCACTTACGGCGGTAAC
+GGTATGTCGGGACATTTCTGGCAGGGGGCGTTCGCCACGCTGCTGGCGACGCCTTGTAGC
+GCGCCGTTTCTGGGCACGGCGGTCGCGGTGGCGCTCACGGCGTCGCTGCCGACGCTGTGG
+GGGCTGTTCCTTGCGCTTGGCCTGGGAATGAGCGCGCCGTGGCTACTGGTCGCGATACGA
+CCAGGGCTTGCGCTACGTTTACCGCGCCCCGGGCGTTGGATGAATGTCCTGCGCAGGATC
+CTCGGTCTGATGATGCTGGGGTCGGCTATCTGGCTGGCGACGTTACTCCTGCCGCATTTC
+GGCTTCACTGCGTCAAAGAGCGCGCAAGACACGGTTCAGTGGCAACCGTTGAGTGAACAG
+GCAATCCAGTCGGCGCTGGCGCAGCATAAGCGGGTATTTGTCGATGTCACTGCGGACTGG
+TGTATTACCTGTAAAGTGAATAAATACAACGTCCTGCAAAAAGAGGATGTGCAGGCCGCC
+TTGCAACAGCCGGATGTTGTGGCGCTGCGGGGAGACTGGACGCTGCCGTCCGATGCCATT
+ACAGATTTTCTGAAAACGCGCGGCCAGGTCGCCGTGCCGTTTAATCAGGTATATGGCCCC
+GGTTTGCCGGAAGGGGAGGCACTGCCCACTTTGCTGACCCGCGATGCGGTATTACAAACG
+TTGAAAAAAGCGAAAGGAATAACCCAATGAATGAAATACATGATTGTTTTACTGCTGGCG
+CTGTTTTCGACGCTGAGCATCGCGCAAGAAACCGCTCCTTTTACGCCGGATCAGGAAAAG
+CAGATTAAAAATCTGATCCATGCGGCGTTGTTTAACGATCCTGCCAGCCCGCGGATAGGC
+GCTAAACACCCTAAGCTGACGCTGGTGAACTTTACGGATTACAACTGCCCGTACTGCAAA
+CAGCTCGATCCGATGCTGGAAAAGATTGTGCAGAAATATCCTGACGTTGCGGTCATTATT
+AAACCGCTGCCATTCAAAGGAGAGAGTTCCATACTGGCGGCGCGTATTGCGCTGACCACC
+TGGCGCGATCATCCGCAACAGTTCCTCGCGCTACATGAAAAACTTATGCAAAAGCGCGGT
+TACCATACGGATGACAGTATTAAACAGGCCCAGCAGAAAGCAGGGGCGACGCCAGTGACG
+CTGGATGAAAAAAGCATGGAAACGATACGCACTAATTTGCAGTTGGCAAGACTGGTCGAC
+GTGCAAGGAACGCCAGCGACGATCATTGGCGACGAGCTGATTCCGGGCGCAGTGCCCTGG
+GATACGCTGGAAGCGGTGGTGAAAGAAAAACTGGCGGCTGCCAATGGCGGGTAAatgatt
+acacattctttcggcatcgttaattattttgtattatttggctacctcctggccatgatg
+ttagtcggtgtctatttttccagacggcaaaaaacagcagacgattattttcgcggtggt
+ggccgggttcctggttgggcggctggggtcagtgtatttgctactacgttaagctcaatt
+acatttatgtcaattcctgccaaagcgtttacttccgactggacgtttatcattggtcag
+tatctggctatcgcaattttaccgctggttttttatttctatattccgttttttcggaaa
+ttgaaagtcacatcagcctatgaatatctcgaagcacggttcgatgtgcgctgccgtctg
+ttcgccagcatgtcatttatgttgtttcatattggacgtatcgccattatcactttcctc
+accgtgctggccttgcgccccttcatcgctatagacccggtgattttggtactgttgatt
+agtgtgatgtgtatcatttatacctggatgggggggaattgaatggaaagtctattaaat
+cgtttatatgacgcgttaggcctggatgcgccagaagatgagccactgcttatcattgat
+gatgggatacaggtttattttaatgaatccgatcatacactggaaatgtgctgtcccttt
+atgccactgcctgacgacactctgactttgcagcattttttacgtcttaactacgccagc
+gccgtcactatcggcgctgatgcagacaatactgctttagtggcgctttatcgcttgccg
+caaaccagtaccgaagaagaggcgctcactggttttgaattattcatttcaaacgtgaag
+caattgaaagagcattatgcataaATGAAATACGACCTTATTATTATCGGCAGCGGTTCG
+GTTGGCGCCGCCGCTGGTTATTACGCCACCCGCGCCGGGCTAAAGGTCCTGATGACCGAT
+GCGCATATGCCGCCTTATCAACAGGGCAGCCACCACGGCGATACCCGTCTTATCCGCCAC
+GCTTATGGTGAAGGCGAAAAATATGTCCCGCTGGTGCTTCGCGCCCAGACGCTTTGGGAT
+GAGCTCTCCACACACAATGAAGAGCCTATTTTTGTCCGCTCCGGCGTCGTCAACCTCGGC
+CCGGCCGATTCCGCTTTCTTAGCCAACGTCGCACGAAGCGCGCAACAGTGGCAATTGAAC
+GTCGAGCGCCTGGACGCGACGGCCCTCATGACGCGCTGGCCGGAAATTCGCGTGCCCGAT
+AATTATATCGGGCTGTTTGAAGCTGACTCCGGTTTCCTGCGCAGCGAATTAGCCATTACC
+ACATGGCTTCGTCTGGCCCGAGAGGCAGGCTGCGCACAGCTATTCAACAGCCCGGTAAGC
+CATATTCACCATGATGATAACGGTGTGACGATAGAGACGAGTGAAGGCTGCTACCACGCC
+AGCAAAGCGCTGATTAGCGCGGGCACCTGGGTCAAAACGCTGGTACCGGAGCTGCCCGTT
+CAGCCCGTACGTAAAGTTTTTGCCTGGTTTAAGGCGGATGGACGTTACAGCACTAAAAAC
+CGCTTTCCGGCCTTTACCGGCGAAATGCCCAACGGCGATCACTATTACGGTTTCCCGGCG
+GAGAACGACGAGTTAAAAATCGGCAAACACAATGGCGGGCAGCGAATACAGGCACCGGAA
+GAGCGCAAGCCCTTTGCCGCCGTTGCCAGCGATGGCGCGGAAGCATTTCCTTTCCTGCGT
+AACGTACTGCCGGGTATCGGCGGTTGTTTACATGGGGCGGCATGTACCTATGATAATTCG
+CCGGACGAGGATTTTATTATCGATACGCTGCCTGGCCATGAGAATACGCTTGTCATCACT
+GGACTCAGCGGACATGGTTTTAAATTCGCCCCGGTGTTAGGAGAAATCGCTGCGGATTTT
+GCGTTGGGAAAAACGCCCTCCTTTGATCTGACGCCGTTCCGGCTTTCCCGTTTTAGCCAA
+TAAatgcaaatacagagcttctatcactcagcttcactaaaaacccaggaggcttttaaa
+agcctacaaaaaaccttatacaacggaatgcagattctctcaggccagggcaaagcgccg
+gctaaagcgcccgacgctcgcccggaaattattgtcctgcgagaacctggcgcgacatgg
+gggaattatctacagcatcagaagacgtctaaccactcgctgcataacctctataactta
+cagcgcgatcttcttaccgtcgcggcaaccgttctgggtaaacaagacccggttctaacg
+tcaatggcaaaccaaatggagttagccaaagttaaagcggaccggccagcaacaaaacaa
+gaagaagctgcggcaaaagcattgaagaaaaatcttatcgaacttattgcagcacgcact
+cagcagcaaaatggcttacctgcaaaagaagctcatcgctttgcggcagtagcgtttaga
+gatgctcaggtcaagcagctcaataaccagccctggcaaaccataaaaaatacactcacg
+cataacgggcatcactataccaacacgcagctccctgccgcagagatgaaaatcggcgca
+aaagatatctttcccagtgcttatgagggaaagggcgtatgcagttgggataccaagaat
+attcatcacgccaataatttgtggatgtccacggtgagtgtgcatgaggacggtaaagat
+aaaacgcttttttgcgggatacgtcatggtgtgctttccccctatcatgaaaaagatccg
+cttctgcgtcaggccggcgctgaaaacaaagccaaagaagtattagctgcggcacttttt
+agtaaacctgagttgcttaacagagccttagagggcgaagcggtaagcctgaaactggta
+tccgtcgggttactcaccgcgtcgaatattttcggcaaagagggaactatggtcgaggat
+caaatgcgcgcatggcaatcgttgacccagccgggaaaaatgattcatttaaaaatccgc
+aataaagatggcgatctacagacggtaaaaataaaaccggacgtcgccgcatttaatgtg
+ggtgttaatgagctggcgctcaagctcggctttggccttaaagcatcagatagctataat
+gccgaagcgctacatcagttattaggcaatgatttacgccctgaagccagaccaggtggc
+tgggttggcgaatggctggcgcaatacccggataattatgaggtcgtcaatacattagcg
+cgccagattaaggatatctggaaaaataaccaacatcataaagatggcggcgaaccctat
+aaactcgcacaacgccttgccatgttagcccatgaaattgacgcggtgcccgcctggaat
+tgtaaaagcggcaaagatcgtacagggatgatggattcagaaatcaagcgagagctcatt
+tctttccatcagacccatatgttaagtgcgcctggtagtcttccggatagcggtggacag
+aaaattttccaaaaagtattactgaatagcggtaacctggagattcagaaacaaaatacg
+ggcggggcgggaaacaaagtaatgaaaaatttatcgccagaggtgctcaatctttcctat
+caaaaacgagttggggatgaaaatatttggcagtcagtaaaaggtatttcttcattaatc
+acatcttgaATGAAACGATATATACTGGCTACCGCGATAGCGTCTCTTGTTGCAGCCCCG
+GCAATGGCGCTGGCCGCTGGCAGCAATATTCTCAGCGTACATATTCTCGATCAGCAAACA
+GGCAAACCAGCGCCCGGCGTGGAGGTGGTACTGGAGCAGAAAAAGGATAACGGATGGACG
+CAATTAAACACCGGGCATACCGACCAGGATGGACGAATTAAAGCACTGTGGCCCGAAAAA
+GCTGCCGCGCCGGGGGATTATCGCGTTATTTTTAAAACCGGCCAGTATTTTGAAAGTAAA
+AAACTGGACACGTTTTTCCCGGAGATTCCCGTCGAGTTTCATATCAGCAAAACGAATGAG
+CACTATCATGTGCCGCTGTTATTAAGTCAGTATGGTTATTCAACCTATCGCGGGAGCTAA
+ATGGCAAAGATTCTGGTGCTCTATTATTCCATGTACGGACACATTGAAACCATGGCGCAC
+GCGGTGGCGGAAGGGGCAAAGAAAGTCGACGGCGCAGAGGTCATTATAAAGCGTGTGCCA
+GAAACAATGCCGCCTGAAATCTTCGCAAAAGCTGGCGGTAAAACGCAAAACGCACCGGTT
+GCCACCCCACAGGAGCTGGCGGATTACGATGCCATTATTTTTGGTACGCCAACCCGGTTT
+GGCAATATGTCAGGCCAGATGCGTACCTTCCTGGACCAAACCGGCGGACTGTGGGCATCC
+GGCGCGCTATACGGCAAGCTCGGCGGCGTGTTCAGTTCTACCGGAACGGGCGGCGGCCAG
+GAGCAGACCATCACCTCGACCTGGACTACGCTTGCCCATCATGGGATGGTGATTGTCCCG
+ATAGGCTATTCCGCACAGGAACTGTTTGACGTCTCCCAGGTTCGCGGCGGTACGCCTTAC
+GGCGCAACGACTATCGCTGGAGGCGACGGTTCACGTCAACCAAGCCAGGAGGAACTCTCT
+ATCGCTCGCTATCAGGGGGAATACGTCGCCGGTCTGGCAGTCAAACTCAACGGCTAAatg
+gagcctcaacccccacgtcttaaacccggaaaaatccttgacactctgggtgctatgcaa
+aaaagcctgacacgtgcctcccagcgtattgcgcaatatattttagccttccccagacag
+gtgacacagtcatctattgccgatttgtcgcgcgacacacaggccggagaagccacggtt
+attcgcttttgtcgcaccctgggctataaaggttttcaggattttaaaatggacctggcc
+attgaacttgccactaccgagtctgatgacagtagtcctctactggatgccgaagttagc
+gaatccgacgatgcccacgccattggtttaaaattgcagaacaccattagtaatgtatta
+tctgaaacgctaaatctgcttgatatgcaacaggttctcggtgtcgtggacgccctacgt
+cactgtcactcagtttatatctttggtgtgggctcatcggggatcacggcgctggatatg
+aaacacaagctaatgcgtattggtttacggggcgatgcggtaagcaataaccattttatg
+tacatgcaggctacgctattaaaagcaggcgatgtcgcgatgggtgtcagtcactcgggc
+acatcgccagaaacagtgcattcactccgattggcccgacaggctggcgccaccacagtc
+gccattacccataatctgggttctccattatgtgaagaggccgatttttgcctgatcaat
+ggtaatcggcaaggaatgttgcagggtgactcgatcggtacgaaagccgcgcagcttttc
+gtctttgacctgctctatacccttcttgtacagtcctcgccggaacaggcccgagaaagc
+aaattacggacaatgaatgccctggacatgacaaaataaATGAAGAAACTGCCCGGCTTT
+ACGCAAGATTACTTACTCAGCAAGGCGACGACCCTGCCTGATAAAACACGCCTGGAGCGT
+GCCGTTGAACCGCTATGCGCGCGCCATCCCGGAGAGTGCGGCATTCTTGCGCTGGATAAC
+AGTCTGGACGCTTTTGCCGCCCGCTACCGCCTGACCGAAATGGCGGCGCGGACGCTGGAT
+GTGCAGTATTATATTTGGGAAGACGATATGTCCGGGCGGCTGCTCTTTTCGGTTCTGCTG
+TCGGCGGCGAAGCGCGGCGTTCATGTTCGTCTGCTGCTGGATGATAACAATACGCCTGGT
+CTGGATGATACGTTGCGCTTGCTGGATAGCCATCCTAATATCGAAGTTCGTCTGTTTAAT
+CCTTTCTCTTTTCGTACGCTACGCGCGCTGGGATATTTGACGGATTTTGCGCGGCTGAAT
+CGGCGGATGCACAATAAAAGTTACACTGCCGACGGCGTAGTGACGCTGGTCGGTGGGCGC
+AACATCGGCGATGCCTATTTCGGCGCTGGCGAGGAGCCGCTATTTTCCGATCTGGACGTG
+ATGGCCATTGGCCCGGTGGTCAATGATGTCGCCAATGATTTTGAACGTTACTGGCGCTGT
+AGTTCAGTGTCGACATTGCAGCAAGTATTATCCCTTTCTGAGCAGGAACTGACGCAGCGT
+ATCGAACTTCCCGAATCCTGGTATAACGATGAGATCACCCGCCGTTATCTGCATAAGCTG
+GAAACCAGCCAGTTTATGGCGGATCTCGATCGCGGAACGTTGCCGCTGATTTGGGCAAAA
+ACACGCTTGCTTAGCGATGACCCTTCTAAAGGCGAGGGGAAGGCGCAGCGCCATTCGCTT
+CTTCCGCAGCGATTATTTGACGTGATGGGGTCGCCGACGGAGCGTATCGACATTATTTCC
+GCTTACTTTGTCCCTACGCGCGCAGGCGTGGCGCAGTTGCTTAATCTGGTCAGGAAAGGT
+GTGAAGATCGCCATCTTAACTAACTCTCTGGCGGCCAACGATGTGGCGGTCGTTCACGCA
+GGGTACGCGCGCTGGCGCAAGAAATTACTGCGCTATGGCGTGGAGCTCTACGAACTGAAA
+CCGACCCGCGAACATGAAACCGCCGTACATGATCGCGGACTCACCGGGAACTCAGGTTCC
+AGCTTACATGCTAAAACGTTCAGTATTGATGGTAGTAAGGTGTTTATCGGGTCGCTTAAT
+TTTGATCCCCGTTCAACGCTTTTAAATACCGAAATGGGCTTTGTCATTGAAAGTGAAACG
+CTGGCGACGCTTATTCATAAGCGTTTTACGCAGAGCCAACGCGATGCGGCCTGGCAACTG
+CGGCTGGATCGCTGGGGACGAATTAACTGGATCGATCGTCAGCAAGAAGAGGAAAAGGTG
+TTAAAGAAAGAACCCGCTACGCGTTTCTGGCAGCGAGTTCTGGTACGGTTGGCGGCAATT
+TTACCTGTGGAATGGTTGCTGTGAATGCCAACTCAAGAAGCAAAAGCGCACCGCGTCGGC
+GAATGGGCAAGCCTGCGTAATACGTCGCCGGAAATTGCCGAAGCCATTTTTGAAGTCGCT
+CACTATGACGAGAAACTGGCAGAAAAAATATGGGAAGAAGGTAGCGATGAGGTGCTGATC
+AAAGCCTTTGAGAAAACGGACAAAGACTCGCTCTTCTGGGGCGAACAAGTCATCGAACGT
+AAGAACGTATAAatgtatcccgttgacctgcatatgcataccgtcgccagcactcatgcc
+tacagtactctgagcgattatatcgcggaagccaaacgcaaaggcattaaactttttgcg
+attaccgatcatggtccggacatggaagatgcgccgcatcactggcattttattaacatg
+cgcatctggccgcgtctggttgacggcgtggggatactgcgtggcattgaggcgaatatc
+aagaatattaacggtgaaattgattgttccggaaagatgttcgactcgctggatctgatt
+atcgcaggctttcatgagcccgtttttgcgccgcatgataaagaaaccaatactcaggcg
+atgatcgcgaccatcgccagcggcaaggtgcatataattagtcaccccggaaatccaaag
+tatccagtggaggttaaagccatcgcgcaggcggcggcgaaacaccatgtagcgctggaa
+atcaacaactcttcttttctgcattcgcgtaaaggaagcgaagataattgccgcgcggtc
+gctgccgccgtacgcgatgcgggaggctgggtagcgttaggctctgattcccatacggcc
+tttacgcttggcgatttcaccgaatgccggaaaattctggatgcggtgaattttccggaa
+gatcgaatcctgaacgtctctccgcagcgcttactggcctttctcgaatcacgcggtatg
+gcgcctgtaccggaatttgccgaactttaaatgaatgagttttcaatcctgtgccgtgtg
+ctgggatcgttgttttaccgccaaccgcaagatcctttactggttccgctgtttacgtta
+atccgtgaaggtaaactggcggcaaactggccgctggagcaggatgacatgctggcgcgt
+ttacagaaaagctgcgatatcacgcagatttccactgattacaatgcgttatttgttggg
+gaagagtgcgcggtagcgccataccgcagtgcgtgggtcgaaggcgcggaagagtctgag
+gtgcgcgcttttttaacgtcgcgagggatgccgctggccgatacgcctgccgatcacatt
+ggcactttattgctcgcggcctcctggctggaagatcagtctgccgaagatgaaagtgaa
+gcgctggaaaccttatttgccgattatctgcttccctggtgcaataccttcctcggtaaa
+gttgaagcccatgccgttacgccattctggcgcactctggcgccgctaacgcgtgatgcg
+ataggggccatgtgggatgaacttcaggaagaagatgaagaataaatgatgcgcgccatg
+aacatacttctttctattgctatcactacgggcatcctttctggaatatggggatgggtg
+gccgtctccctggggttactaagctgggccggttttttaggctgtacggcttatttcgcc
+tgtccgcagggcggctttaagggattgttgatttccgcctgtacgctgttaagcggtatg
+gtgtgggcgctggtcattattcacggtagcgcgttggcgccgcatctggaaattgtcagt
+tacgtgttgacggggatcgtggcattcctgatgtgtatccaggcaaagcagctattgctt
+tcttttgttccgggaacatttatcggcgcctgcgcgacatttgcagggcagggtgactgg
+cggttggtattaccgtcgctggcgctggggctaatctttggctatgccatgaaaaatagt
+gggctatggctggcatcacgccgcgagcaacattcagcgaatacggcggtcacaaaataa
+ATGAAAAAAAACCTGCTGGGATTCACCCTCGCATCCTTGTTATTCACGACCGGTTCCGCC
+GTGGCGGCGGAGTATAAAATTGATAAAGAAGGCCAACATGCGTTCGTCAATTTCCGCATC
+CAGCATCTGGGCTACAGCTGGCTATACGGCACCTTTAAAGATTTCGACGGCACGTTCACT
+TTTGACGAAAAAAATCCGTCAGCAGACAAAGTGAATGTGACCATTAACACCAATAGCGTC
+GACACTAACCATGCCGAACGTGACAAACACCTGCGTAGCGCGGAGTTTCTTAATGTTGCG
+AAATTCCCGCAGGCAACCTTCACCTCTACCAGCGTGAAAAAAGAGGGCGATGAACTGGAT
+ATTACCGGCAATCTGACGCTCAATGGCGTGACTAAACCGGTGACGCTGGAAGCGAAGCTG
+ATGGGCCAGGGCGACGATCCGTGGGGCGGTAAGCGCGCGGGCTTTGAGGCCGAAGGAAAA
+ATTAAGCTGAAAGATTTCAATATAACTACCGATCTCGGCCCAGCCTCACAAGAGGTGGAG
+CTTATCATCTCAGTAGAAGGCGTTCAGCAGAAGTAAATGTTACTGATGATGGCGCTGATC
+GTGCGTATTATCTGGCGGCTTTATTCTCCGCCGCCCGTTGCGTTGACCAGCTATTCCCGT
+TTAACGCGCATTGGCGCCGCCGCGGGTCATATCCTTCTGTATCTCCTGCTCTTTGCGATA
+ATCATTAGCGGCTACCTGATTTCCACCGCCGACGGTAAACCGATTAGCGTCTTTGGCTGG
+TTTGAGATTCCGGCCACGCTTACGGACGCGGGCGCGCAGGCTGACATCGCCGGAACACTG
+CATCTGTGGTTTGCCTGGTCGCTGGTCATTATCTCGCTCTCGCATGGGGTTATGGCGCTA
+AAACACCATTTCATCGATAAAGACGACACACTGAAACGTATGACAGGAATGTCGTCATCT
+GACTATGGAGCTCAAAAATGAATGGTTAAGTTATCAATGACGCTGCGCCTGACAATTTCT
+TTTATCGCCATACTTATCCTCGCCTGTACCGGCATTAGCTGGACGCTCTATAACGCGCTG
+AGCAAAGAATTAACGTATCGGGATGATATGACGCTAATAAATCGGGCGGCGCAAATGCAG
+CAACTGTTACTGGATGGCGCCAGGCCGGAAAATCTGCCGCTCTATTTCAATCGGATGGTG
+GATACGAAGCAGGATATCTTATTGATCCACTCAGCAACAGGCCATAATGTTGCGATTAAT
+CATAGCGGCATCCCCGACCAACGCTTTAACGAGATTCCGCTGGCTAAAAACATCACCCGC
+GAAACCTTATTTCGCCAGGCGGTACAAGGCACGGAGCTGACCGCGGTACGAGTAAACGCC
+AGAAGCGGCGATAACCCGCTGACCCTTACTATTGCCAGGCTGGCGACGGAAAGGCGGCAA
+ATGCTGGCGCAATATCGCCGCAACAGTTTGCTGATTAGCCTTATCGCGATCCTCGTCTGT
+TCGGCGCTCAGTCCATTAGTCATCAGAAACGGGCTGCGGGCCATTACGTCGCTCAGCCGA
+CTCACCGCGGCGACAGATAGCGGCACACTTCGCCAGCCGCTGGCGGAACAGGCGTTACCC
+GTCGAGCTCAGGCCGCTTGGGCAAGCGCTAAATACCATGCGCCAGAAGCTTTCCGACGAT
+TTTGAACGCCTGAACCAATTTGCCGACGATCTGGCGCATGAGCTGCGCACGCCGGTTAAT
+ATTTTACTGGGGAAGAATCAGGTTATGCTGAGTCAGGAACGCAGCGCCGAAGAGTATCAA
+CAAGCCCTTGTCGATAATATTGAAGAGCTGGAGGGACTGTCGCGACTGACAGAAAATATT
+CTCTTTCTGGCACGCGCGGAGCACCAGAATATAGCGGTAAAAAAACAGCCTGTTTCGCTC
+AATGCGCTGGTCGAAAATATGCTGGATTATCTTAGCCCCCTTGCCGAAGAGAAGCACATC
+TGTTTTATAAATCAATGTCAGGGAACGGTATGGGCTGACGAAATATTATTACAAAGAGTG
+CTCTCAAACCTGCTGACGAATGCCATCCGTTATTCTGATGAAAACGCCGTGATACGTATT
+GAAAGCGCTTATGATGATAACGTTGCAGAAATTCGGGTCGCTAATCCGGGCAGCCCCACC
+GCCGATGCGGATAAGCTTTTCCGGCGTTTTTGGCGAGGAGATAATGCCCGCTACACTGCC
+GGTTTCGGCCTGGGGTTATCGTTAGTTAACGCGATTGCCCTATTGCACGGTGGCTCGGCA
+TCTTACCGCTATGCCGATGAACATAATATCTTTTCGGTTCGTCTGCCTGATAGCGGTGAT
+AGCTAAgtgatatgtctcaaagtccagggcggcattggtgaaatttttacggtgacgcag
+caggcggataaattctttccggctacgcagttccactggagctggacggaaagcacagta
+cctgtattgatgattggttttctgtttgccaatattcagcaatttactgccagtcaggat
+gtggtccaacgctatatcgtgactgactccatagaggaaacgaagaaaacattacttaca
+aatgccaaactggttgctgtgatccctgttttcttttttgctatcggctcggcattattt
+gtctactatcagcaacatccacaattattaccggcgggattcaacactggcggcattttg
+cccttattcgtggtcaccgaaatgccagtcggcattgcagggttgataatctccgctatt
+ttcgctgccgcgcagtccagcatctccagcagcttaaacagcatttccagttgttttaat
+tccgatatctatcagcgtttgagtcataaaaaaggaacgccagaaaaccgtatgaaaata
+gctaagttagttattctggtcgcgggcctgataagtagcgcggcctcggtatggctggtc
+atggccgatgaatcagaaatctgggatgcatttaatagtctgataggtctgatgggaggg
+ccaatgaccggtctgttcatgctgggcattttctttaaacgagcaaatgccgggagtgcg
+gttttaggaattattatcagcgtcattaccgtgctgggcacacgctatgccactgacctt
+aacttcttcttttatggggtcattggctcgctaagcgtggtgatcagcggcgttattttc
+gccccgttatttgccccggcaccgccattgacgctggatgaaaaacctgaaccaaaggtg
+acattatgaATGAAAATCAACAGATATCTTCTGGGTATGGTTTCGTTTATAGCATTTTCA
+TCATATCTACAAGCGGCAACCCTTGATTATCGGCATGAATATGCTGATAGAACCAGAATT
+AATAAAGACCGTATTGCTATAATTGAAAAGCTTCCTAACGGCATTGGTTTTTATGTCGAT
+GCCAGCGTTAAATCGGGAGGAGTAGATGGTGAGCAGGATAAGCATTTAAGCGATCTCGTC
+GCAAACGCTATAGAACTGGGCGTAAGTTATAATTATAAAGTTACGGACCATTTTGTTTTG
+CAGCCTGGATTTATATTTGAAAGCGGTCCAGACACTTCAATTTATAAGCCTTATTTAAGG
+GCGCAATATAATTTTGATTCTGGTGTTTATATGGCTGGTCGTTACCGTTATGACTATGCA
+AGGAAGACAGCTAACTATAATGATGATGAGAAAACGAATAGATTTGATACTTATATAGGT
+TATGTTTTTGATGAGTTGAAATTGGAATATAAATTTACCTGGATGGATAGCGATCAAATT
+AAATTTGATAACAAAAAAACAAACTATGAACATAATGTGGCTTTAGCCTGGAAACTGAAT
+AAGTCATTTACACCATACGTTGAGGTCGGAAATGTAGCGGTGAGAAATAATACCGATGAG
+AGACAGACCCGTTATCGCGTTGGATTACAATACCACTTTTGAATGACGAAATACGGTGTT
+ATAGGTACAGGTTATTTTGGCGCTGAACTGGCGCGATTTATGTCTAAGGTTGAAGGGGCG
+AAAATCACTGCGATTTACGATCCGGTAAATGCGGCTCCGATAGCGAAAGAGCTGAACTGT
+GTCGCCACTTCAACGATGGAGGCGCTTTGTACCCATCCTGATGTGGATTGCGTAATTATT
+GCTTCACCAAATTACTTACATAAAGCGCCGGTCATTGCGGCGGCTAAAGCGGGTAAACAC
+GTGTTTTGTGAAAAACCTATCGCCTTAAATTACCAGGATTGTAAGGATATGGTTGATGCC
+TGCAAAGAAGCTGGTGTTACCTTTATGGCGGGTCACGTTATGAACTTTTTTCACGGGGTT
+CGCCACGCTAAAGCGCTCATCAAAGCCGGTGAAATCGGTGAAGTTACACAAGTTCACACT
+AAACGTAATGGTTTTGAAGACGTGCAGGATGAGATCTCATGGAAGAAGATTCGCGCAAAG
+TCAGGTGGGCATCTGTACCATCACATTCACGAGCTAGATTGTACACTGTTCATCATGGAT
+GAAACCCCATCCCTGGTTTCAATGGCGGCGGGGAATGTTGCGCACAAAGGTGAAAAATTT
+GGTGATGAAGATGATGTTGTCCTAATCACCCTTGAGTTTGAAAGCGGTCGTTTCGCGACA
+CTTCAGTGGGGATCATCGTTCCACTACCCTGAGCACTATGTATTAATTGAGGGCACGACA
+GGTGCAATTCTCATTGATATGCAAAACACGGCTGGTTATCTAATAAAAGCGGGCAAAAAA
+ACACACTTTCTTGTGCATGAAAGCCAGGCGGAGGATGATGATCGTCGCAACGGTAACATA
+TCCAGCGAGATGGATGGCGCAATCGCTTATGGTAAACCCGGTAAACGTACGCCGATGTGG
+CTCTCATCAATTATGAAACTGGAGATGCAGTACTTGCATGATGTGATAAACGGTCTGGAG
+CCAGGCGAGGAGTTTGCTAAATTGCTAACGGGAGAAGCGGCGACAAATGCCATTGCTACC
+GCTGATGCTGCGACGCTTTCTTCAAACGAGGGGCGCAAAGTTAAACTCACTGAAATTCTT
+GGCTAAATGACATCACGTCTTCAGGTCATACAGGGTGATATCACTCAACTTAGCGTCGAT
+GCGATTGTGAATGCCGCTAACGCATCATTAATGGGCGGCGGTGGCGTAGACGGCGCAATT
+CATCGCGCGGCGGGGCCGGCATTGCTGGACGCCTGTAAACTCATCCGTCAGCAACAGGGC
+GAATGTCAGACGGGACATGCGGTTATCACGCCTGCTGGCAAGCTTTCGGCAAAGGCGGTT
+ATTCACACAGTGGGGCCCGTCTGGCGAGGCGGCGAACACCAGGAAGCTGAGCTACTCGAA
+GAGGCATACCGGAATTGTTTGCTGCTTGCCGAGGCGAATCACTTTCGTTCCATCGCTTTT
+CCGGCAATCAGTACCGGCGTTTATGGCTATCCACGCGCCCAGGCCGCTGAAGTCGCCGTC
+AGGACGGTTTCAGATTTTATTACCCGTTACGCTCTGCCTGAACAGGTATACTTTGTCTGT
+TATGATGAAGAAACTGCCCGGCTTTACGCAAGATTACTTACTCAGCAAGGCGACGACCCT
+GCCTGA
+>real_data_2
+ATGAAAAAATCATTACTCGCTGTTGCTGTGGCAGGGGCTGTTTTGTTGTCATCCGCCGTA
+CAGGCGCAGACAACGCCGGAAGGTTATCAATTACAACAGGTGCTGATGATGAGCCGCCAT
+AATCTGCGGGCGCCGCTGGCGAATAATGGCAACGTACTGGCGCAGTCGACGCCGAACGCC
+TGGCCGGCGTGGGACGTTCCCGGCGGGCAACTGACGACGAAAGGCGGCGTGCTGGAAGTC
+TATATGGGACACTACACACGTGAATGGCTGGTCGCGCAGGGGCTGATACCGTCGGGAGAA
+TGTCCGGCGCCCGACACGGTATATGCCTATGCGAATAGTTTGCAGCGCACCGTCGCCACC
+GCGCAATTTTTCATTACCAGCGCTTTCCCCGGCTGTGATATTCCTGTTCATCATCAGGAA
+AAAATGGGCACTATGGACCCTACCTTCAATCCGGTGATTACCGATGATTCCGCCGCGTTC
+CGGCAACAGGCCGTACAGGCGATGGAAAAGGCGCGTAGTCAGCTACATCTTGATGAGAGT
+TATAAACTGCTTGAGCAGATAACGCATTATCAGGACTCGCCGTCCTGCAAAGAGAAGCAT
+CAGTGTTCGCTAATCGACGCGAAAGATACCTTCAGCGCGAACTATCAGCAAGAGCCTGGC
+GTGCAGGGGCCGCTGAAAGTAGGGAACTCGCTGGTGGATGCGTTTACCCTGCAATATTAC
+GAAGGCTTTCCGATGGATCAGGTCGCTTGGGGCGGGATCCACACCGATCGGCAGTGGAAG
+GTGCTGTCAAAACTGAAAAACGGCTACCAGGACAGCCTGTTTACCTCACCCACGGTGGCG
+CGCAATGTCGCTGCGCCGCTGGTAAAATATATCGATAAGGTGCTGGTTGCCGAGCGCGTT
+AGCGCGCCGAAGGTTACCGTGCTGGTGGGGCATGATTCCAATATCGCGTCGCTGCTGACG
+GCGCTGGATTTTAAACCCTATCAGCTCCATGACCAGTATGAGAGAACGCCGATTGGTGGT
+CAGCTTGTCTTCCAACGCTGGCATGACGGTAACGCTAACCGGGATTTGATGAAAATCGAG
+TATGTCTACCAGAGCGCCCGGCAGTTACGTAATGCGGAAGCGTTAACGCTCAAATCGCCT
+GCGCAAAGGGTAACGCTGGAACTGAAAGGATGTCCGGTGGATGCGAACGGCTTCTGTCCG
+CTGGATAAGTTCGATAACGTCATGAACACTGCTGCAAAATAGATGGAAAAGAATAATGAA
+GTCATTCAGACCCATCCGCTTGTAGGATGGGACATCAGCACCGTCGATAGCTATGATGCG
+CTGATGCTGCGTTTACACTACCAGACCCCAAATCGTCCGGAACCGGAAGGGACTGAAGTT
+GGTCAAACGCTCTGGTTAACGACAGATGTAGCCAGGCAATTTATTTCAATATTAGAAGCC
+GGCATCGCCAAAATAGAATCAGGCGATTACCAGGAAAACGAGTATCGTCACCATTAGATG
+GAACTTAAGGATTATTACGCCATTATGGGCGTGAAACCGACGGACGATCTCAAGACGATT
+AAGACCGCCTATCGCCGACTGGCCCGCAAGTACCATCCAGATGTCAGCAAAGAACCCGAT
+GCCGAAGCCCGTTTCAAAGAGGTTGCTGAAGCATGGGAAGTGCTGAGTGATGAGCAACGG
+CGCGCCGAGTATGACCAGTTATGGCAACACCGTAACGATCCACAATTTAATCGCCAGTTC
+CAGCAACACGAAGGCCAGCCGTATAACGCCGAAGATTTTGATGATATTTTCTCGTCTATT
+TTTGGTCAGCACGGTCGTCATTCGCACCACCGCCACGCCGCACGCGGTCATGATATCGAA
+ATTGAAGTGGCGGTATTCCTGGAAGAAACGCTGGAAGAGCACCAGCGTACGATTAGCTAT
+TCCGTCCCCGTTTATAACGCGTTCGGCCTGGTGGAGCGGGAAATTCCCAAAACATTGAAT
+GTGAAAATCCCGGCTGGCGTCAGCAACGGGCAACGAATCAGACTGAAAGGCCAGGGCACG
+CCGGGGGAAAACGGCGGACCTAATGGCGATTTATGGCTCGTTATCCATATTGCCCCGCAT
+CCGCTCTTTGATATCGTCAATCAGGATCTGGAAGTCGTCCTTCCGCTTGCCCCATGGGAG
+GCGGCGCTCGGCGCTAAGGTGTCTGTGCCAACGCTTAAAGAGCGTATTTTGCTGACCATT
+CCCCCCGGCAGCCAGGCAGGTCAGCGGCTGCGTATCAAAGGAAAAGGATTAGCCAGTAAA
+AAGCACACTGGCGATCTCTATGCCATCATCAAAATCGTTATGCCGCCGAAACCTGACGAG
+AAAACAGCTGCCCTGTGGCAACAACTGGCGGACGCGCAGTCGTCCTTTGACCCACGCCAG
+CAATGGGGGAAAGCATAAATGGCTAACATCACTGTCACCTTTACCATCACCGAATTTTGT
+TTGCACACCGGCGTGACGGAAGAGGAGCTAAACGAAATCGTCGGACTTGGCGTAATTGAG
+CCTTACGAAGACGATAACGCCGACTGGCAATTCGACGATCGCGCAGCGAGCGTGGTACAA
+CGCGCGCTACGCTTACGCGAGGAGCTGGCGCTCGACTGGCCAGGGATCGCGGTCGCGTTA
+ACGCTGCTGGAAGAGAATTCACGGCTGCGCGAAGAAAACCGGTTACTGCTGCAACGCCTT
+TCTCGCTTTATCTCGCATCCCTAAATGTCATCTTGTTGGAGATTTACGGATTCGCTAACA
+AGCCTATGGCATACTGCGTTGATGAAGATTTTATTGATTGAAGATAACCAGAAAACCATT
+GAGTGGGTACGTCAGGGACTCACGGAGGCAGGCTATGTGGTTGATTATGCCTGTGATGGA
+CGAGACGGATTACACCTAGCCCTTCAGGAACATTATTCATTGATTATTCTTGATATTATG
+CTGCCGGGGCTTGATGGATGGCAGGTTTTACGCGCGTTGCGCACTGCATATCAGCCCCCT
+GTTATTTGCCTGACGGCGCGCGACTCGGTTGAGGATCGCGTCAAAGGTCTTGAGGCGGGC
+GCTAATGATTACCTTGTTAAGCCTTTTTCCTTCGCCGAACTGCTGGCCCGGGTGAGAGCT
+CAACTCAGACAGCATGTCCCGGTCTTTACCCGACTGACGATCAATGGTCTGGACATGGAT
+GCCACAAAGCAATCGGTGTTACGAAATGGCAAACCGATTTCCCTGACCCGCAAAGAATTC
+CTGCTCCTCTGGTTACTGGCGTCCCGGGCAGGGGAAATCGTGCCCCGAACCGCGATCGCC
+AGCGAAGTTTGGGGAATTAACTTTGATAGTGAAACCAACACCGTTGATGTCGCGATTCGT
+CGGCTGCGCGCCAAAGTAGACGATCCATTTGAAAAGAAGCTCATTATGACCGTCCAGGGG
+ATGGGTTATCGATTACAGGCGGAAACGTCGCAGAATGGTTAAatgaaaaacaaattgtta
+tttatgatgttggcaatactgggtgcgcctgggattgcaaccgcgacaaattatgatctg
+gctcgttcagaggataattttgcggtaaatgaattaagcaagtcttcatttaatcaggcg
+gccattattggtgaagtcggcacggataatagtgccagagtacgccaggaaggatcaaaa
+ctattgtccgttgtttcacaagaaggagaaaataatcgggcgaaagtcgaccaggcaggg
+aattataactttgcgtatattgagcaaacgggcaatgccaacgatgccagtatatcgcaa
+agcgcttacggtgatagtgcggctattatccagaaaggttctggaaataaggccaatatt
+acccagtacggtgcgcagaaaacagcagttgtagtgcagaaacagtcgcatatggctatt
+cgcgtcacccaaggctaaatgcatactttattgctccttgccgcactttcaaatgagatt
+acgtttaccacgactcagcaaggcgatatttacacggtgatccctcaggtcacagtaaac
+gaaccctgcgtctgtctggtgcaaattctctctgtgcgcgacggcgtcgggggagaaagc
+catacacagcaaaaacaaacgctatctttacctgctaatcaaccgattgagttggctcgt
+cttagtgtaaatatatcttcagaggactcggttaaaattattgttactgtttcggacgga
+caatcactgcatttatcacaacaatggccgccttctgcacagtagatgtttaatgaagtc
+catagtagtcatggtcacacactattgttgatcacaaagccatctctgcaagctacggca
+ttattgcaacatttaaagcaatcgctggccataaccggacaactgcataatattcaacgt
+tctctggaagatatctcagccggttgcattgttttaatgcatatgatggaagcggataag
+aagcttatccactattggcaggataatttaagccgcaaacacaataatataaaaacatta
+ttgttaaatacccctgacgattatccctaccgtgaaattcaaaactggcctcatattaac
+ggcgtgttttacgccactgaagaccaggaacacgtggtccgcggattacagggtattctg
+cggggcgaatgctatttttcacaaaaattagccagttacctgattacacactcaggaaat
+taccgctacaacagcaccgagtccgcattactcactcatcgcgaaaaagagatcctcaat
+aagttacgtattggtgcctctaataatgaaatcgccaggccgctatttatcagcgagaat
+acggttaagacacatctttataatcttttcaaaaagatacctgtcaaaaatcgcacccag
+gcagtttcatgaatgaaacgctatctgacctggattgtagcagcagagttactgttcgct
+accggaaacctccatgccaatgaagttgaagtcgaggttcccggattgttaaccgaccat
+accgtctcttccataggacatgaattctatcgtgcattcagcgacaaatgggaaagcgaa
+tacaccggcaacctgaccattaatgaaagacccagtgcgcgttggggaagctggatcacc
+ataacggtaaaccaggacgttattttccagacctttttatttccaatgaaaagagacttc
+gagaaaaccgtcgtcttcgcattagcgcaaacagaggaagcattaaatcgccgacaaata
+gatcaaacgctcttaagtacgagtgatttagcgcgtgatgaattctaaatgcgtgttaaa
+catgcagtagtgctgctcatgcttttttcgccattaacctgggctcgaaatatgacgttc
+cagttccgtaatcctaactttggtggaaaccccaataacggttcccttttattgaatagc
+gcccaggcgcaaaattcatataaagaccccgcttatgataacgatcttggtatcgagacc
+ccctcagcgttggataactttacgcaggctattcaatcgcaaattctgggcggcttgttg
+accaatattaataccggaaaaccaggacgtatggtgaccaatgatcttattatcgatatc
+gctaatcgcgacggacagctccagctcaacgtcacggacagaaaaccgggaagaacctcg
+accatcgaagtgtcaggtttacaaactcagtcaaccgatttttaaatgccgcgcttactt
+attttggttgccgttttatcgttgagcggatgcttaactgccccgccgaaacaagctgcg
+aaaccgacattaatgccccccgcacaaagttacaaagatttgacgcacttacctgctccc
+accggtaagatctttgtttcggtatataacattcaggatgaaacgggccaatttaaacct
+tacccggcaagtaacttttccacggctgtgccgcagagcgccaccgctatgttggtcacc
+gcgctgaaagattcgcgctcgtttatcccactagaacgacaaggcttacagaatcttttg
+aatgaacggaaaattattcccgcagcccaggaaaacggcaccgtggcgatgaataaccgt
+atcccgcttcagtcgttgacggcggcaaatattatggtggaaggttctattattggttat
+gaaagtaacgtcaaatccgccggggtcggcgcaagatatttcggtattggcgccgatacg
+cagtatcagctggatcagactgctgtcaacctgcgcgtggttaacgtcagtacgggcgag
+atcctttcttcggtgaacaccagtaaaacgatcctttcctatgaagtacaggcaggcgtg
+ttccgttttattgattacccgcgcttactggaaggcgaaatcggctatacctcgaacgaa
+ccggtgatgctgtgtctgacgtcagccattgaaaccggcgttatcttcctcattaatgat
+ggtatcgatcgcggactgtcggatttgcagaataaagcggacaggcaaaatgatattctg
+gtgaaataccgtgagctgtcagtaccgccagaatcctgaATGTCTATTGCCGTAAATATG
+AATGACCCGACCAACACGGGCGTCAAAACGACGACCGGCAGCGGGTCGATGACCGGAAGC
+AACGCTGCCGATCTGCAAAGCAGTTTCCTGACCTTACTGGTCGCGCAATTGAAGAACCAG
+GACCCGACTAACCCATTACAAAATAATGAGTTAACGACACAGTTGGCGCAAATCAGTACC
+GTGAGCGGCATTGAAAAACTGAATACGACGCTGGGGGCTATTTCCGGGCAAATCGATAAT
+AGTCAGTCCCTACAGGCGACCACGCTGATTGGACATGGCGTTATGGTGCCTGGCACCACA
+ATTCTGGCGGGTAAAGGCGCGGAAGAAGGGGCCGTGACGTCCACGACGCCGTTTGGCGTG
+GAATTGCAACAGCCTGCGGACAAAGTGACGGCAACCATTACCGATAAAGATGGCCGGGTG
+GTACGGACGCTGGAGATCGGTGAGTTGCGAGCCGGGGTACACACCTTTACCTGGGATGGT
+AAGCAAACGGACGGAACAACGGTACCGAATGGTTCTTACAACATTGCGATTACCGCCAGC
+AATGGCGGGACGCAACTGGTGGCGCAGCCGCTGCAATTCGCTCTGGTACAGGGCGTGACG
+AAGGGCAGTAACGGCAACCTGTTGGATCTGGGTACCTACGGCACCACCACACTCGACGAA
+GTTCGGCAAATAATCTAAATGTCTTTTTCTCAAGCGGTTAGCGGCCTGAACGCTGCGGCC
+ACCAACCTTGATGTTATCGGTAATAACATCGCCAACTCCGCCACCTATGGCTTTAAGTCC
+GGTACGGCATCATTTGCCGATATGTTCGCCGGTTCCAAAGTGGGGTTGGGCGTAAAAGTG
+GCGGGGATTACCCAGGATTTTACCGACGGTACGACAACGAACACCGGGCGCGGGCTGGAT
+GTCGCGATTAGCCAGAACGGTTTTTTCCGCCTGGTAGACAGCAACGGTTCCGTGTTCTAT
+AGCCGCAACGGCCAGTTCAAACTGGACGAGAACCGTAACCTGGTCAATATGCAGGGGATG
+CAGTTGACCGGCTATCCGGCCACCGGTACGCCGCCGACCATTCAGCAGGGGGCGAATCCT
+GCGCCGATCACCATTCCGAACACGCTGATGGCGGCGAAATCGACCACCACCGCGTCAATG
+CAGATCAACCTGAACTCAACGGACCCTGTACCGTCTAAAACGCCCTTTAGCGTGAGTGAT
+GCGGATTCGTATAACAAAAAAGGCACCGTCACCGTTTATGACAGCCAGGGTAATGCCCAT
+GACATGAACGTCTATTTTGTGAAAACCAAAGATAATGAATGGGCTGTGTACACCCATGAC
+AGCAGCGATCCTGCAGCCACTGCGCCAACAACGGCGTCCACTACGCTGAAATTCAATGAA
+AACGGGATTCTGGAGTCTGGCGGTACGGTGAACATCACCACCGGTACGATTAATGGCGCG
+ACAGCGGCCACCTTCTCCCTCAGCTTCCTTAACTCCATGCAGCAGAACACCGGGGCTAAT
+AACATCGTCGCCACCAATCAAAACGGCTATAAGCCTGGCGACCTGGTGAGCTACCAGATT
+AACAATGATGGCACCGTGGTTGGCAACTACTCCAACGAGCAGGAGCAGGTGCTGGGGCAG
+ATTGTGCTGGCTAACTTCGCCAACAACGAAGGTCTGGCATCCCAGGGCGATAACGTCTGG
+GCGGCGACGCAGGCCTCCGGGGTAGCGCTGCTGGGGACTGCCGGTTCCGGCAACTTCGGT
+AAGCTGACGAACGGCGCGCTGGAAGCCTCTAACGTGGATTTGAGTAAAGAGCTGGTGAAT
+ATGATCGTCGCGCAGCGTAACTACCAGTCGAATGCGCAGACCATCAAAACCCAGGACCAG
+ATCCTCAATACGCTGGTTAACCTGCGCTAAATGGATCACGCAATTTATACCGCCATGGGG
+GCGGCCAGCCAGACGCTTAACCAGCAGGCGGTAACGGCCAGCAACCTGGCTAATGCCTCA
+ACGCCGGGCTTTCGCGCGCAGCTTAACGCGCTACGCGCGGTGCCCGTTGATGGCCTCTCT
+TTAGCGACGCGCACGTTGGTTACGGCGTCGACGCCGGGGGCGGATATGACCCAGGGTCAG
+TTGGACTACACTTCCCGCCCGCTGGATGTTGCGTTACAGCAGGACGGCTGGCTGGTGGTG
+CAAGCGGCGGATGGCGCTGAAGGATATACCCGTAACGGGAATATCCAGGTGGGCCCGACC
+GGGCAGTTAACCATTCAGGGACATCCGGTTATCGGCGAAGGCGGCCCGATTACCGTTCCG
+GAAGGGTCGGAAATCACCATTGCGGCAGACGGCACGATCTCCGCGCTCAATCCCGGCGAC
+CCGCCAAACACGGTGGCGCCCGTTGGGCGGCTGAAGCTGGTCAAAGCGGAAGGCAATGAG
+GTGCAGCGGAGCGATGACGGTTTATTCCGCCTTACCGCCGAGGCACAGGCTGAACGCGGG
+GCGGTACTGGCCGCCGACCCGTCAATTCGCATTATGTCGGGCGTGCTGGAGGGCAGTAAC
+GTCAAGCCGGTTGAAGCCATGACCGACATGATCGCCAACGCACGTCGTTTTGAAATGCAG
+ATGAAGGTTATCACCAGCGTAGATGAGAACGAAGGGCGAGCTAACCAACTGCTGTCGATG
+AGTTAAATGATCAGTTCATTATGGATCGCCAAAACCGGTCTGGACGCGCAGCAAACCAAT
+ATGGATGTGATTGCCAATAACCTGGCAAACGTCAGCACCAATGGTTTTAAGCGTCAGCGC
+GCGGTATTTGAAGATCTGTTGTATCAGACCATCCGCCAGCCGGGCGCGCAGTCGTCCGAG
+CAGACGACGCTGCCTTCCGGGCTGCAAATCGGTACCGGCGTGCGTCCGGTCGCCACGGAG
+CGCCTGCACAGTCAGGGGAACCTGTCGCAGACCAACAACAGTAAAGATGTGGCGATTAAA
+GGGCAGGGCTTTTTCCAGGTCATGCTGCCGGACGGTACGTCTGCCTATACCCGCGACGGC
+TCTTTCCAGGTGGATCAGAATGGTCAACTGGTGACGGCGGGCGGTTTTCAGGTGCAGCCG
+GCAATCACCATTCCGGCCAACGCGTTAAGCATCACGATTGGCCGCGACGGCGTGGTCAGC
+GTTACCCAGCAAGGGCAGGCCGCGCCGGTTCAGGTCGGGCAGCTTAACCTGACCACCTTT
+ATGAACGACACCGGTCTGGAAAGCATCGGCGAGAACCTCTATATCGAAACGCAATCGTCC
+GGCGCGCCGAACGAAAGCACGCCGGGGCTCAACGGCGCGGGGTTGTTGTATCAAGGGTAT
+GTCGAAACGTCGAACGTTAACGTGGCGGAAGAGCTGGTGAACATGATTCAGGTTCAACGC
+GCCTATGAAATTAACAGTAAAGCAGTATCGACGACCGATCAGATGCTGCAGAAACTGACG
+CAACTCTAAATGGCCCTGATGGTCGCGACGCTGACAGGATGCGCCTGGATACCCGCTAAA
+CCGCTCGTGCAGGGGGCGACCACGGCGCAGCCGATACCTGGCCCGGTACCGGTGGCGAAT
+GGCTCCATATTTCAGTCTGCGCAGCCGATTAATTATGGCTATCAGCCGCTTTTTGAAGAT
+CGTCGACCGCGTAATATCGGCGATACGCTCACGATTGTGTTACAGGAAAACGTCAGCGCC
+AGTAAAAGCTCGTCGGCAAATGCCAGCCGCGACGGCAAAACCAGCTTTGGTTTTGATACG
+GTACCGCGTTATCTGCAGGGATTATTCGGTAATTCCCGCGCGGATATGGAGGCCTCCGGC
+GGCAACTCTTTTAATGGTAAAGGCGGCGCGAATGCCAGCAATACCTTTAGCGGCACGCTG
+ACCGTGACCGTCGATCAGGTTCTGGCCAATGGCAATTTACACGTCGTGGGGGAAAAACAG
+ATCGCGATTAATCAGGGAACGGAATTCATCCGCTTCTCCGGCGTGGTAAATCCACGCACC
+ATCAGCGGTAGCAACTCTGTTCCCTCGACACAGGTGGCGGATGCGCGGATTGAATATGTC
+GGGAACGGCTATATTAACGAAGCGCAAAATATGGGCTGGCTGCAACGTTTCTTCCTTAAT
+TTGTCGCCGATGTAAGTGTTTAAAGCTCTTGCAGGAATCGTTCTGGCACTGGTTGCCACT
+CTGGCGCACGCCGAGCGTATCCGGGATCTGACCAGTGTCCAGGGAGTACGGGAAAACTCG
+CTGATCGGCTACGGGCTGGTGGTCGGGCTGGACGGTACGGGCGACCAGACGACCCAGACG
+CCATTTACCACCCAGACGCTGAATAACATGCTGTCACAACTGGGGATTACGGTCCCCACC
+GGCACCAATATGCAGTTGAAAAACGTGGCGGCGGTGATGGTGACGGCGTCGTATCCGCCT
+TTTGCGCGACAGGGACAAACGATCGATGTCGTCGTTTCCTCAATGGGGAACGCTAAAAGT
+CTGCGTGGCGGGACGTTATTAATGACGCCGTTAAAAGGGGTGGACAGCCAGGTGTATGCT
+CTGGCGCAGGGCAATATTCTGGTCGGCGGCGCGGGCGCTTCCGCAGGCGGCAGTAGCGTG
+CAGGTTAACCAGCTTAATGGCGGGCGCATCACTAATGGCGCGATTATCGAACGCGAGTTG
+CCGACTCAGTTCGGCGCTGGCAACACCATTAATCTGCAATTGAACGACGAAGATTTTACG
+ATGGCGCAGCAAATTACCGACGCCATCAACCGCGCCCGCGGTTACGGCAGCGCCACTGCG
+CTTGATGCGCGAACGGTACAGGTACGCGTGCCCAGCGGCAACAGCTCGCAGGTGCGTTTT
+CTGGCGGACATTCAAAATATGGAAGTCAACGTGACGCCGCAGGATGCAAAAGTCGTGATC
+AACTCGCGTACCGGTTCGGTGGTCATGAATCGGGAAGTCACGCTGGATAGCTGCGCTGTG
+GCGCAGGGCAATTTGTCAGTGACAGTCAATCGCCAACTCAACGTCAACCAGCCGAATACG
+CCATTTGGCGGCGGGCAGACCGTGGTGACGCCACAGACTCAGATAGATTTGCGTCAGAGC
+GGCGGATCGCTACAGAGCGTGCGTTCCAGCGCCAATCTGAACAGCGTAGTGCGCGCGCTG
+AATGCGCTTGGCGCGACGCCGATGGATCTGATGTCGATTTTGCAGTCCATGCAGAGCGCG
+GGCTGTCTACGCGCCAAACTGGAAATCATCTGAATGATCGGAGACGGTAAATTGCTGGCC
+AGCGCGGCCTGGGATGCGCAATCTCTGAACGAACTGAAAGCGAAAGCGGGCCAGGACCCG
+GCGGCGAATATCCGTCCTGTGGCCCGTCAGGTGGAAGGGATGTTTGTGCAGATGATGCTG
+AAAAGTATGCGCGAGGCTTTACCCAAAGATGGTTTATTCAGCAGCGATCAGACGCGTCTG
+TATACCAGCATGTATGACCAGCAGATCGCCCAGCAGATGACCGCCGGTAAGGGATTGGGG
+CTGGCGGATATGATGGTTAAACAGATGACGGGCGGGCAGACGATGCCTGCAGATGATGCG
+CCGCAAGTACCGCTTAAATTCTCCCTGGAGACGGTAAACAGCTATCAAAATCAGGCGCTG
+ACCCAACTGGTGCGCAAAGCCATACCGAAAACGCCGGACAGCAGCGATGCGCCGCTCTCC
+GGCGACAGTAAAGACTTTCTGGCCCGGCTTTCGCTCCCGGCGAGGCTGGCCAGCGAACAA
+AGCGGGGTGCCGCATCATCTGATTCTGGCGCAGGCGGCGCTGGAGTCCGGCTGGGGGCAG
+CGGCAAATCCTGCGGGAGAATGGCGAACCCAGCTATAACGTATTTGGCGTGAAAGCGACC
+GCCAGTTGGAAAGGGCCGGTGACGGAAATCACCACCACTGAATACGAAAATGGCGAAGCG
+AAAAAAGTGAAAGCGAAATTCCGCGTCTATAGCTCGTATCTGGAGGCGTTATCGGATTAT
+GTCGCGCTGTTAACGCGTAACCCACGCTACGCTGCCGTGACCACTGCCGCCACGGCAGAG
+CAGGGCGCAGTGGCTCTGCAAAACGCCGGATACGCCACTGACCCGAATTACGCGCGTAAA
+TTGGCCAGCATGATTCAGCAGTTGAAAGCGATGAGTGAAAAGGTCAGCAAAACCTACAGC
+GCGAATCTCGACAATCTCTTTTAAATGTCCAGCTTGATTAATCACGCCATGAGCGGACTT
+AACGCCGCGCAGGCCGCGTTAAATACGGTCAGTAATAACATCAACAATTATAACGTTGCG
+GGTTATACCCGGCAGACAACTATTCTGGCGCAGGCAAACAGTACGTTAGGGGCTGGCGGC
+TGGATAGGTAATGGCGTTTACGTTTCAGGCGTACAGCGCGAATATGATGCGTTTATCACT
+AATCAGCTACGCGGCGCGCAAAACCAGAGCAGCGGCTTAACCACGCGCTATGAACAAATG
+TCGAAAATCGACAACCTGCTGGCCGATAAATCCAGCTCACTGTCTGGCTCGCTGCAGAGT
+TTTTTTACCAGCCTGCAAACGTTAGTCAGTAATGCGGAAGATCCTGCGGCGCGTCAGGCG
+CTGATTGGTAAAGCGGAAGGGCTGGTAAACCAGTTCAAAACCACCGATCAGTATCTGCGC
+GATCAGGATAAACAGGTCAATATCGCGATTGGCTCCAGCGTGGCGCAAATCAACAATTAC
+GCGAAGCAGATAGCTAACCTGAACGATCAAATCTCCCGTATGACGGGCGTAGGCGCGGGC
+GCATCGCCGAACGACCTGCTCGATCAACGTGATCAGTTGGTTAGCGAGCTTAACAAGATC
+GTTGGCGTCGAGGTGAGTGTACAGGACGGCGGCACCTATAACCTGACGATGGCCAATGGC
+TATACGCTGGTGCAGGGGTCGACGGCGCGTCAGTTGGCGGCGGTTCCCTCCAGCGCCGAC
+CCGACGCGAACGACTGTCGCTTATGTCGATGAGGCCGCCGGTAACATCGAAATTCCGGAA
+AAGTTGCTGAACACCGGTTCGCTCGGCGGGCTACTGACGTTCCGTTCTCAGGATCTGGAT
+CAGACTCGTAATACGCTGGGCCAGTTGGCGTTGGCGTTTGCCGATGCGTTTAACGCGCAG
+CATACCAAAGGTTATGACGCCGACGGCAATAAAGGGAAAGACTTCTTTAGCATTGGCTCG
+CCGGTGGTATATAGCAACAGTAATAATGCCGATAAAACGGTATCGCTAACCGCTAAGGTG
+GTCGACAGCACGAAGGTTCAGGCGACGGATTATAAGATTGTTTTTGACGGTACAGACTGG
+CAGGTTACTCGCACTGCGGATAACACCACCTTCACGGCAACAAAAGATGCTGACGGAAAA
+CTGGAGATTGACGGTCTGAAAGTGACGGTAGGGACTGGCGCACAGAAAAACGACAGTTTT
+CTTCTCAAGCCGGTCAGCAATGCTATCGTCGACATGAACGTTAAAGTGACAAATGAAGCC
+GAGATTGCGATGGCGTCTGAGTCAAAACTCGATCCTGATGTGGATACCGGCGACAGCGAT
+AACCGCAATGGTCAGGCATTGCTGGACTTACAAAACAGCAATGTAGTGGGCGGCAACAAA
+ACCTTTAACGATGCTTACGCCACGTTGGTCAGCGATGTGGGTAACAAAACGTCAACGCTG
+AAAACCAGCAGCACCACGCAGGCGAATGTGGTTAAACAGCTTTATAAACAGCAACAGTCG
+GTTTCCGGCGTTAACCTCGACGAAGAGTACGGCAATTTGCAGCGTTATCAGCAGTATTAT
+CTGGCGAATGCGCAAGTATTGCAGACCGCGAATGCGCTGTTTGATGCGTTATTGAATATT
+CGCTAAATGCGTATCAGTACCCAGATGATGTACGAACAAAATATGAGCGGCATCACTAAT
+TCTCAGGCCGAATGGATGAAGCTGGGCGAGCAGATGTCTACCGGTAAGCGCGTTACCAAC
+CCATCTGACGATCCGATCGCCGCGTCGCAGGCGGTAGTACTCTTTCAGGCGCAGGCGCAG
+AATAGCCAGTACGCCCTGGCGCGTACGTTTGCCACCCAAAAAGTGTCGCTGGAAGAAAGC
+GTACTCAGTCAGGTGACGACGGCGATTCAAACCGCGCAGGAAAAAATCGTCTATGCCGGA
+AACGGCACGTTAAGCGACGATGACCGCGCGTCGCTGGCGACGGATTTACAGGGGATCCGC
+GATCAGCTGATGAACCTGGCAAACAGCACTGACGGCAATGGTCGCTATATCTTTGCCGGG
+TATAAAACGGAAGCGGCGCCATTCGACCAGGCGACAGGTGGTTATCATGGCGGCGAGAAA
+AGTGTTACCCAGCAGGTGGATTCCGCACGCACGATGGTAATTGGCCATACGGGAGCGCAA
+ATTTTTAATAGCATCACCAGCAATGCGGTGCCGGAACCGGATGGCTCGGACTCCGAAAAG
+AATCTGTTTGTCATGCTCGATACGGCAATTGCCGCGCTCAAGACCCCGGTGGAAGGCAAT
+GACGTGGAAAAAGAAAAAGCCGCTGCCGCCATTGATAAAACCAATCGCGGCTTAAAAAAT
+TCGCTTAATAACGTCCTGACCGTTCGTGCGGAACTGGGAACGCAACTGAGCGAACTCAGT
+ACGCTGGATTCACTGGGAAGCGACCGTGCGCTGGGACAGAAGCTACAGATGAGCAACCTG
+GTAGATGTGGACTGGAACTCGGTCATTTCCTCCTACGTCATGCAACAGGCGGCATTACAG
+GCGTCCTATAAAACGTTTACCGACATGCAGGGAATGTCGCTTTTCCAGTTGAACCGGTAA
+atggagataattttttatcacccgacatttaacgccgcctggtgggtaaatgcgctggag
+aaggctctcccacatgcgcgcgttcgtgaatggaaggtcggtgataacaaccccgcagac
+tatgcgcttgtatggcagcccccggttgaaatgctggccggaagacgcttaaaagccgtc
+tttgtgctgggcgcgggggtggatgcaattctgagtaaattaaatgcgcatccggaaatg
+ctggacgcctccattcctctattccgtctggaagataccggaatgggcctgcaaatgcag
+gagtatgccgccagccaggtattacactggttccgtcgtttcgatgattatcaggcgctg
+aaaaatcaggcgctatggaaaccgttgccggaatatacccgcgaagagtttagcgtcggt
+atcataggcgcaggggtactgggcgcaaaagtggcagaaagtctacaggcgtgggggttc
+ccgttacgttgctggagtcgtagccgcaaatcctggcctggcgtggaaagttatgtaggg
+cgtgaagaactgcgcgctttcctgaaccagacgcgggtgctgattaatctgctgccgaat
+acggcccaaacggtaggaattattaatagcgaattgttggatcaattgccggatggcgct
+tacgtgctgaatctcgcgcgcggcgttcatgttcaggaggcggatctgctggctgcgctt
+gatagcggtaagctaaaaggcgcgatgttggatgtctttagccaggaaccgttaccgcag
+gaaagtccattatggcgccatccgcgagtcgccatgacgccgcacattgcggcagtcacc
+cgtccggcggaagccatcgattatattagccgcaccattacccagctggagaagggagag
+ccggtgacggggcaggtggatcgggcgagaggatattggATGTCCGTAATCAAGAAAAAT
+ATCCCTGCCATAGGCCTGTGTATCTGCGCTTTTTTTATCCATTCTGCGGTAGGGCAACAA
+ACGGTACAGGGCGGCGTTATCCATTTTCGCGGCGCGATTGTTGAGCCACTGTGCGATATT
+TCTACTCACGCCGAAAATATTGATTTAACCTGCCTACGCGAAGGTAAAAAGCAAATGCAC
+CGGATAGACCTTCGGCAGGCATCTGGATTACCGCAGGATATTCAGTCCATTGCGACGGTA
+CGGCTGCATTATCTCGATGCGCAAAAAAGCCTGGCGGTGATGAATATTGAGTACCGTTAA
+ATGGCAAACCATCGTGGCGGTTCCGGTAATTTTGCGGAAGACCGCGAAAGAGCATCAGAA
+GCAGGTCGTAAAAGTGGTCAGCACAGCGGGGGCAATTTTAAGAATGACCCGCAGCGTGCA
+TCCGAAGCAGGCAAAAAAGGGGGCAAAAGCAGTAACCGTAATCGCTAGATGGTAATGTCC
+GCACCAGGACACATTGTTTACAGTAGTTACAACACCCTGTACGGACATTCTCTCTCCGGT
+GGTGGTCTTGTCATCTTAAAAGCTCTCATCATTTCCCTTACTGTCCATACCCATGACGCC
+ATATGTGGTGCGCGTAGCCGTGTGTGGCGTCGTTTCAAAAAGCAAGCTAAGGCTTACAAG
+GAAGCCAACCCTCAGATGTGTGTGCGCATAATCGCGTTCAAGAGAACGCGGGTGATGTAT
+ACCTACAACTCAAGGTGCTATCCATGGGAAGACAAAAAGCAGTGAATGAAACGAATTTTC
+CTTACCTGCGCGGCGTTGTTGTTCAGCAGTCAGGCGTTGGCCGATGAGTGTGCCAGCGCC
+AGTACGCAGCTGGAAATGAATCGCTGCGCCGCCGCGCAATACCAGGCGGCAGATAAAAAG
+CTGAACGAAACCTATCAAAGCGCGATTAAGCGTGCGCAACCGCCGCAGCGTGAGCTATTG
+CAAAAAGCGCAGGTGGCATGGATTGCCCTGCGCGACGCCGATTGCGCGCTGATTCGCTCA
+GGTACGGAGGGCGGCAGCGTTCAACCCATGATCGCCAGCCAGTGCCTGACCGATAAAACG
+AACGAACGCGAAGCGTTTTTAGCCTCGCTGCTGCAATGTGAAGAGGGTGATTTGAGCTGC
+CCACTGCCGCCAGCCGGTTAAgtgcgtatattcgcggtgagcataatggtgattaccctg
+agcggctgcggcagtattatcagcagaacgatccccggacaaggacacggcaaccagtat
+taccctggcgtgcagttggatatgcgtgattccgcatggcgctatatcactatcctcgat
+ctgcccttctcactgatcttcgatacactgctactgccgctcgatattcaccacgggcct
+tatgagtaaATGTGCCAACGTGCGATCGCCAATATTGATATCAGCAAAGAGTATGACGAA
+AGCATGGGCAGTAACGATGTGCATTATCAGTCGTTTGCTCGTATGGCGGATTTCTTTGGT
+CGTGATATGCAGGCGCATCGCCACGACCAGTTTTTTCAAATGCACTTTCTTGATACCGGG
+CAGATTGAGCTACAGCTCGACGATCATCGCTATTCGGTGCAGGCGCCGCTATTTGTGCTA
+ACGCCGCCCTCGGTGCCGCATGCTTTTATTACCGAATCGGATAGCGATGGTCATGTTCTG
+ACGGTACGCGAAGAGCTGGTTTGGCCGCTGCTGGAAGTGCTTTATCCCGGCACCAGAGAG
+GCCTTCGGCCTGCCGGGAATCTGCCTGTCGCTGGCGGATAAACTCAACGAGCTGGCGGCG
+CTCAAACATTACTGGCAGCTAATTGAGCGGGAGTCCACGGAACAACTGGCTGGCTGCGAA
+CATACCTTGGTACTACTGGCGCAGGCGGTATTTACCTTGCTGTTGCGTAATGCGAAGCTG
+GACGATCATGCCGCAACCGGGATGCGCGGTGAACTGAAACTTTTTCAGCGCTTTACCCTG
+TTAATTGACAACCACTTCCATCAGCACTGGACGGTGCCCGATTATGCCTGTGAGTTGCAT
+ATTACCGAATCTCGTTTGACCGATATTTGCCGACGTTTTGCTAATCGCCCGCCTAAACGC
+CTGATTTTTGATCGGCAATTACGCGAGGCGAAACGACTGCTGCTTTTTTCCGACAATGCT
+GTCAACGAGATCGCCTGGCAATTAGGTTTTAAAGATCCGGCTTATTTCGCCCGTTTCTTT
+AATCGCCTTGCTGGCTGTTCTCCTTCGCAGTTTCGCCAACGTGAAGTTCCTTCTTTTCTC
+AACTAAATGGGACGCACACCGGATTACAAAGCCGCCTTTGGCTGCGCTCTGGGCGCTAAC
+CCAGCCTTCTACGGCCAGTTTGAGCAGAACGCCCGTAACTGGTACACCCGTATTCAGGAG
+ACCGGCCTGTACTTTAACCATGCAATCGTCAACCCGCCCATTGACCGCCACAAACCTGCC
+GACGAAGTGAAAGACGTCTATATCAAGCTGGAGAAAGAGACGGACGCCGGGATTATTGTC
+AGCGGGGCGAAAGTTGTCGCCACTAACTCCGCCCTGACTCACTACAACATGATTGGTTTC
+GGCTCAGCCCAGGTGATGGGCGAAAACCCGGATTTTGCTCTGATGTTTGTCGCGCCAATG
+GATGCCGAAGGCGTAAAACTTATTTCGCGCGCCTCGTATGAAATGGTCGCGGGCGCGACG
+GGCTCGCCGTTTGATTATCCCCTCTCCAGCCGTTTTGATGAAAACGATGCCATTCTGGTG
+ATGGACAAGGTGCTGATCCCGTGGGAAAACGTATTAATTTACCGTGATTTCGATCGTTGT
+CGTCGCTGGACGATGGAAGGCGGCTTTGCCCGTATGTATCCACTGCAAGCCTGTGTTCGT
+CTGGCGGTAAAACTTGATTTCATTACCGCGCTGCTGAAAAAATCGCTCGAATGTACGGGT
+ACCGTAGAGTTCCGGGGCGTGCAGGCCGATCTCGGCGAAGTCGTGGCCTGGCGCAATATG
+TTCTGGGCATTGAGCGATTCTATGTGTTCTGAAGCAACCCCGTGGGTAAACGGCGCCTGG
+CTACCGGACCACGCCGCGCTGCAAACCTATCGTGTGATGGCCCCAATGGCCTACGCGAAA
+ATTAAAAATATTATTGAACGTAACGTTACCAGCGGCCTGATTTACCTGCCTTCCAGCGCC
+CGCGATCTGAATAATCCGCAAATCGACCAGTACCTGGCGAAATACGTACGCGGCTCTAAC
+GGAATGGACCATGTTGAACGTATCAAAATTCTTAAATTGATGTGGGATGCCATCGGCAGC
+GAGTTTGGCGGTCGCCATGAGCTGTACGAGATTAACTACTCGGGCAGCCAGGATGAAATT
+CGTCTGCAGTGTCTGCGTCAGGCCCAGAGCTCCGGCAATATGGATAAGATGATGGCAATG
+GTCGATCGCTGCCTCTCCGAATACGATCAGAATGGCTGGACGGTTTCGCATTTGCACAAT
+AACGACGACATCAATCAACTGGATAAGCTGCTGAAATAAATGCAAGTAGATGAACAACGT
+CTGCGTTTTCGCGATGCGATGGCAAGTCTGGCGGCAGCGGTCAACATCGTAACCACGGCG
+GGTCACGCCGGACGCTGCGGTATCACCGCAACAGCGGTTTGCTCAGTCACTGATACGCCG
+CCCTCCGTGATGGTATGTATTAATGCCAATAGCGCCATGAACCCCGTTTTTCAGGGCAAC
+GGCAGGCTGTGCATTAATGTACTTAACCATGAGCAGGAGCTGATGGCGCGCCACTTTGCC
+GGTATGACGGGGATGGCGATGGAGGAGCGTTTTCACCAGCCATGTTGGCAAAACGGGCCG
+CTGGGCCAGCCGGTACTTAACGGCGCGCTGGCCAGTCTTGAAGGCGAGATCAGCGAGGTA
+CAAACCATTGGCACGCATCTGGTGTATCTGGTGGCGATCAAAAATATTATTCTTAGCCAG
+GAGGGGCATGGCCTGATTTATTTCAAACGCCGTTTTCATCCGGTCAGACTTGAGATGGAA
+GCGCCTGTTTAAATGAAGGGTACTGTTTTCGCCGTTGCGTTAAACCATCGCAGCCAGCTT
+GATGCCTGGCAAGAGGCTTTCTCTCAGCCTCCCTATAATGCGCCGCCTAAAACCGCAGTG
+TGGTTCATCAAGCCGCGTAATACGGTGATTCGTCACGGCGAACCCATTCCTTATCCGCAG
+GGAGAAAAGGTACTGAGCGGCGCGACAGTGGCGCTCATTGTGGGGAAAACCGCCAGCCGG
+ATACGCCCTGAAGCGGCGGCGGACTATATCGCCGGGTATGCGCTGGCTAACGAGGTCAGC
+CTGCCGGAAGAGAGCTTTTATCGCCCGGCGATTAAAGCGAAATGTCGCGATGGCTTTTGC
+CCGCTGGGTGAAATGGCGCCGCTGAGTGATGTGGATAATCTCACCATTATCACTGAAATC
+AACGGACGAGAAGCGGACCACTGGAATACTGCCGATTTACAGCGTAGCGCCGCACAACTG
+CTTAGCGCGTTAAGTGAGTTCGCTACACTTAACCCTGGCGATGCGATCTTACTTGGTACG
+CCGCAGAATCGCGTTGCGCTGCGTCCCGGCGATCGGGTGCGTATTCTGGCGAAAGGTTTA
+CCCGCGCTGGAAAATCCGGTTGTCGCAGAAGATGAATTCGCCCGCCACCAGACGTTTACG
+TGGCCGCTGTCAGCGACGGGAACGTTATTTGCGCTGGGGTTGAACTACGCCGATCACGCC
+AGCGAGCTGGCATTTACGCCGCCGAAAGAGCCGCTGGTATTTATCAAAGCGCCAAACACC
+TTTACCGAACATCACCAAACGTCGGTGCGCCCGAACAACGTCGAATATATGCACTACGAA
+GCCGAGCTGGTCGTGGTGATTGGCAAAACGGCGCGTAAGGTGAGCGAAGCCGAAGCCATG
+GAGTATGTGGCCGGTTACACCGTCTGTAACGACTACGCGATCCGCGACTATCTGGAAAAC
+TACTACCGTCCGAATCTGCGGGTAAAAAGCCGCGACGGCCTGACGCCGATAGGCCCGTGG
+ATTGTGGATAAAGAGGCGGTTTCTGATCCGCACAACCTGACGTTACGCACCTTTGTCAAC
+GGTGAGCTGCGGCAGGAAGGGACGACCGCCGATCTGATCTTCAGCATCCCGTTCCTGATT
+TCTTATCTGAGCGAATTTATGACGTTGCAACCGGGCGACATGATTGCCACCGGTACGCCG
+AAAGGGCTGTCCGATGTGGTGCCGGGGGATGAAGTTGTCGTTGAAGTAGAAGGCGTGGGT
+CGCCTGGTTAACCGAATCGTCAGTGAGGAGAGCGCAAAATGAATGAAAAATGCTTTCAAA
+GACGCGTTAAAAGCGGGGCGCCCGCAAATCGGTTTGTGGCTGGGGCTTGCCAACAGTTAC
+AGCGCTGAACTGTTAGCGGGCGCCGGCTTCGACTGGCTACTGATTGACGGTGAACACGCG
+CCAAACAACGTGCAGACGGTGTTGACCCAGTTGCAGGCGATTGCGCCTTATCCCAGCCAG
+CCGGTGGTGCGTCCGTCATGGAACGATCCGGTACAGATTAAGCAACTGCTCGACGTCGGC
+GCGCAAACGCTGCTGATACCGATGGTGCAGAATGCCGATGAAGCGCGAAACGCCGTGGCG
+GCTACGCGTTATCCGCCTGCCGGTATTCGCGGCGTGGGCAGCGCGCTGGCGCGGGCATCG
+CGCTGGAATCGCATTCCGGACTATCTCCACCAGGCCAACGACGCCATGTGCGTACTGGTG
+CAGATTGAAACGCGTGAGGCGATGAGCAATCTGGCGTCAATTCTCGACGTGGATGGCATT
+GACGGCGTGTTTATTGGCCCGGCGGATCTCAGCGCCGATATGGGCTTTGCCGGCAATCCG
+CAGCACCCGGAAGTGCAGGCGGCGATTGAGAACGCCATCGTGCAGATACGCGCGGCGGGG
+AAAGCGCCGGGGATTCTGATGGCCAATGAAGCACTGGCGAAACGTTATCTGGAACTGGGG
+GCGCTATTTGTCGCCGTCGGCGTTGACACCACGCTGCTGGCGCGCGGAGCGGAGGCGCTG
+GCGGCGCGCTTTGGCGCAGAAAAAAAACTGTCCGGTGCGTCCGGCGTCTATTAAATGCAT
+GATTCATTAACCATCGCCTTGCTTCAGGCGCGCGAAGCGGCAATGACCTATTTCCGCCCC
+ATCGTTAAAAGCCACAATCTGACCGACCAGCAATGGCGCATTGTGCGAATCCTGGCCGAT
+AGCCCCTCTATGGATTTTCACGAGCTGGCCTTTCGTACCTGTATTTTGCGTCCAAGTCTG
+ACCGGAATATTGACGCGCATGGAGCGAGACGGACTGGTGTTGCGACTCAAGCCGGTTAAC
+GATCAGCGTAAGTTATATGTCATGTTGACGGAGCAGGGACAAACGTTGTACGCCCGTGCC
+CGGAGCGAGGTAGAAGAGGCTTATCGAAAAATTGAGGCCGATTTCACGCCCGAAAAAACA
+CAGCAATTGATGCTGCTGCTGGACGATCTTATTGCTCTGGGGCGCCAGCATCCTGATAGC
+GAAGCGGAAGCATAGATGAGCGACACATCATCTGCACTTCCGGAAAGCCCCGAGTCTGTC
+GGTTCGCACAACGCGCTCAGCACGGGTCAACAAACCGTCATAAATAAACTGTTCCGCCGA
+CTGATCGTATTTTTATTCGTGTTGTTTATCTTCTCGTTTTTAGACCGTATCAACATCGGT
+TTTGCCGGGTTGACGATGGGGCAGGATCTGGGGTTAAGCGCCACCATGTTTGGTCTTGCC
+ACGACGCTGTTTTACGCCACCTACGTCATTTTCGGCATTCCCAGCAACGTGATGTTGAGC
+ATCGTCGGCGCCCGCCGCTGGATTGCGACCATTATGGTGCTATGGGGCATTGCATCTACC
+GCCACGATGTTCGCGGTGGGACCGAAAAGCCTGTATGTGCTGCGAATGCTGGTGGGCATT
+ACCGAAGCGGGCTTTTTGCCAGGAATATTGCTCTATTTAACCTACTGGTTCCCGGCATTT
+TTCCGCGCCCGCGCCAACGCATTATTTATGATTGCCATGCCGGCCACTACCGCGTTGGGG
+TCAATTGTCTCCGGCTATATTTTATCGCTGGACGGCATATTCAATCTGCATGGATGGCAG
+TGGTTATTCCTGTTGGAAGGATTTCCGTCAGTTTTGTTAGGCATTATGGTCTGGTTTTAC
+CTGGATGATACCCCGGCAAAAGCCAAATGGCTGACGGCAGAGGATAAAAAATGTTTGCAG
+GAGATGATGGATAATGATCGCCTGACGCTGGTTCAGCCTGAGGGGGCCATCAGCCATAAC
+GCCATGCAGCAGCGTAGCCTGTGGCGCGAAGTATTCACGCCAATTGTACTGATGTATACG
+CTGGCCTATTTTTGCCTTACCAATACGCTTAGCGCCATTAGTATCTGGACGCCGCAAATC
+CTGAAAAGTTTTAATGAAGGCAGCAGCAATATCACCATCGGCCTGCTGGCGGCGATCCCG
+CAGATTTGTACTGTTCTGGGCATGATTTACTGGAGCCGCCATTCGGACAAACATCAGGAG
+CGTAAACACCACACTGCGTTACCGTTCCTGTTTGCCGCCGCGGGCTGGCTGCTGGCGTCG
+GCGACCGACCGTAACCTGATCCAGCTCCTGGGGATCGTGATGGCATCCACGGGTTCCTTT
+AGCGCGATGGCGATCTTCTGGACCACGCCGGATCAGTCGATCAGTTTACGCGCCAGGGCG
+ATAGGCATTGCGGTCATCAATGCCACCGGCAATATTGGCTCCGCGCTCAGCCCGGTTATG
+ATTGGCTGGCTAAAAGATATCACTGGTAGCTTCAATAGCGGACTCTGGTTTGTCGCTTCT
+CTGTTAGTCGTCGGCGCCGCCATTATCTGGCTCATTCCCATGAAAGCATCGCGTCCGCGC
+GCCACCCCTTGAATGGGCAAGTTAGCGTTAGCAGCAAAAATTACCCACGTGCCGTCGATG
+TATCTTTCTGAACTGCCAGGAAAAAATCACGGTTGTCGTCAGGCAGCCATTGATGGGCAT
+ATTGAAATTGGCAAGCGTTGCCGCGAAATGGGCGTTGACACCATTATCGTATTCGACACC
+CACTGGCTGGTGAATAGCGCTTACCACATTAATTGTGCCGACCATTTCCAGGGCGTCTAT
+ACCAGCAACGAATTGCCGCACTTTATTCGCGACATGACCTATGACTATGACGGTAATCCG
+GCGCTCGGCCATCTGATCGCCGACGAGGCGGTCAAACTGGGCGTGCGCGCCAAAGCGCAC
+AACATCCCGAGCCTGAAGCTGGAGTATGGCACGCTGGTGCCGATGCGCTACATGAACAGC
+GACAAGCACTTCAAAGTGGTCTCCATCTCGGCGTTCTGCACTGTGCATGATTTTGCCGAC
+AGCCGCAAACTGGGCGAAGCCATTCTCAAGGCGATTGAGAAATATGACGGTACCGTAGCG
+GTATTCGCCAGTGGTTCTCTGTCGCACCGTTTTATTGACGACCAACGGGCGGAAGAGGGG
+ATGAACAGCTACACCCGCGAGTTCGATCATCAAATGGACGAGCGCGTGGTCAAGCTGTGG
+CGCGAAGGCAAATTCAAGGAGTTTTGCACCATGTTGCCGGAGTACGCCGACTACTGCTAC
+GGCGAAGGCAACATGCACGACACGGTCATGCTACTGGGAATGCTGGGGTGGGACAAATAC
+GACGGCAAGGTGGAGTTCATCACCGACCTGTTCGCCAGCTCCGGTACCGGCCAGGTAAAC
+GCTGTTTTCCCGCTGCCTGCGTAAATGAAGAAAATAAATCATTGGATTAACGGCAAAAAC
+GTTGCAGGTAACGACTACTTCCAGACCACTAACCCGGCGACCGGTGATGTGCTGGCGGAA
+GTAGCCTCCGGCGGTGAAGCAGAAGTGAACCAGGCTGTCGCGGCGGCAAAAGAGGCGTTC
+CCGAAATGGGCCAACCTGCCGATGAAAGAGCGCGCGCGCCTGATGCGCCGCCTTGGCGAC
+CTGATTGACCAGCATGTGCCGGAAATCGCGGCGATGGAAACCGCCGACACCGGCCTGCCT
+ATTCACCAGACTAAAACGTGCTGAGTGCTGATCCCGCGCGCCTCGCATAACTTCGAATTC
+TTCGCCGAAGTGTGCCAGCAGATGAACGGCAAGACCTATCCGGTTGACGATAAAATGCTC
+AATTATACGCTGGTGCAGCCCGTCGGCGTCTGCGCGCTGGTGTCGCCGTGGAACGTGCCG
+TTTATGACCGCGACTTGGAAAGTTGCGCCGTGCCTGGCGCTGGGTAACACCGCGGTGCTC
+AAAATGTCCGAGCTGTCGCCGCTGACTGCCGACAGGCTGGGCGAGCTGGCACTGGAGGCA
+GGAATTCCGGCAGGCGTGCTGAACGTGGTGCAGGGCTACGGCGCGACGGCGGGCGATGCG
+CTGGTACGCCACCATGACGTGCGTGCGGTGTCGTTTACCGGCGGTACCGCCACCGGTCGC
+AATATCATGAAAAATGCCGGGCTGAAAAAATACTCGATGGAGCTGGGCGGCAAATCGCCG
+GTGCTGATTTTTGAAGACGCCGACATTGAGCGCGCGCTGGACGCCGCGCTGTTCACCATC
+TTCTCGATCAACGGCGAACGCTGCACCGCTGGGTCGCGCATCTTTATCCAGCAGAGCATT
+TACCCTGAGTTCGTGAAGCGCTTTGCCGAACGCGCGAATCGCCTGCGTGTCGGCGATCCG
+ACCGACCCGAACACCCAGGTCGGCGCGCTGATTAGCCAACAGCACTGGGAGAAAGTCTCC
+GGTTATATCCGCCTCGGCATTGAAGAGGGGGCAACGCTGCTGGCGGGCGGTGCGGAAAAA
+CCCACTGACCTGCCTGCGCATCTGAAAGGCGGTAACTTCCTGCGCCCAACCGTGCTGGCC
+GATGTCGACAACCGTATGCGCGTTGCGCAGGAAGAGATCTTTGGGCCGGTCGCCTGCCTG
+CTGCCATTCAAAGACGAAGCGGAAGGGTTACGTTTGGCGAACGATGTGGAATACGGTCTG
+GCCTCTTATATCTGGACCCAGGACGTGAGCAAAGTGTTGCGCCTGGCGCGTGGGATTGAA
+GCCGGCATGGTCTTCGTCAACACCCAGAACGTCCGCGACCTGCGCCAGCCGTTCGGCGGC
+GTGAAAGCCTCCGGTACCGGGCGCGAAGGCGGCGAATATAGCTTCGAAGTGTTTGCGGAA
+ATGAAAAACGTCTGCATCTCAATGGGCGACCATCCTATCCCAAAATGGGGAGTTTGAATG
+CCGCACTTTATTGCTGAATGTACTGAAAATATTCGCGAGCAGGCTGATTTACCAAGCCTG
+TTCAGCAAGGTAAACGAGGCGCTGGCCGCCACCGGGATTTTCCCCATCGGCGGTATCCGC
+AGTCGCGCCCACTGGCTGGATACCTGGCAGATGGCTGACGGTAAGCATGATTACGCGTTT
+GTGCATATGACGCTGAAAATCGGCGCCGGGCGCAGCCTGGAGAGCCGTCAGGAAGTCGGC
+GAAATGCTGTTTGGGCTGATTAAAGCCCACTTCGCCGACCTGATGGAGAACCGCTATCTG
+GCGCTGTCGTTTGAGATTGCCGAGTTACATCCAACGCTCAATTACAAACAAAACAACGTA
+CACGCGTTATTTAAATAGATGCTCGATAAACAGACCCATACCCTGATCGCTCAGCGACTT
+AATCAGGCTGAAAAACAGCGTGAACAGATTCGCGCAGTGTCGCTGGATTATCCCAACATC
+ACTATTGAAGATGCCTATGCCGTACAGCGTGAATGGGTCAATATCAAGATTGCCGAAGGG
+CGCACGCTCAAAGGCCACAAAATCGGCCTGACCTCAAAAGCGATGCAGGCCAGCTCGCAA
+ATCAGCGAACCGGATTACGGCGCGCTGCTTGACGATATGTTCTTCCATGACGGCGGAGAT
+ATCCCCACCGACCGTTTTATCGTCCCGCGTATTGAAGTGGAGCTGGCGTTCGTGCTGGCG
+AAACCGCTGCGCGGCCCTCACTGCACGCTGTTCGACGTCTACAACGCCACGGATTATGTG
+ATTCCGGCGCTGGAACTGATTGACGCCCGCAGCCACAACATCGACCCGGAAACCCAGCGC
+CCGCGCAAAGTGTTCGACACCATTTCCGACAACGCCGCCAACGCCGGGGTGATCCTCGGT
+GGTCGCCCCATCAAACCAGACGAGCTGGATCTGCGCTGGATCTCCGCGCTGCTCTATCGC
+AACGGCGTGATCGAAGAAACCGGCGTCGCCGCAGGCGTGCTGAATCATCCGGCCAACGGC
+GTGGCGTGGCTGGCGAACAAGCTTGCCCCCTACGATGTCCAGCTTGAAGCCGGGCAGATC
+ATCCTCGGCGGCTCGTTCACCCGCCCGGTGCCGGCGCGCAAGGGCGACACCTTCCATGTC
+GATTACGGCAACATGGGCGCGATCAGTTGCCGGTTTGTGTAAATGAGCTCTGTACCCGCG
+CCGCGTGAATATTTTCTTGACTCTATCCGCGCATGGCTGATGTTGTTAGGGATTCCCTTT
+CATATCTCGTTGATCTATTCCACTCACAGTTGGCATGTCAATAGCGCCGCGCCATCGTGG
+TGGCTAACCCTGTTTAACGATTTTATCCACGCTTTTCGTATGCAGGTGTTTTTTGTTATT
+TCTGGTTATTTTTCGTACATGTTATTTTTACGTTATCCATTAAAACACTGGTGGAAAGTA
+CGGGTAGAACGTGTGGGTATTCCCATGCTTACCGCAATCCCTTTGCTTACCTTGCCGCAA
+TTTATCCTGTTGCAATATGTCAAAGAGAAAACAGAGAACTGGCCTACACTCTCTGCCTAT
+GAAAAATATAATACGTTAGCGTGGGAACTCATTTCACATCTGTGGTTTTTACTGGTGCTG
+GTGATATTAACCACCGTCAGCATCGGGATTTTTACCTGGTTCCAAAAAAGGCAGGAAACA
+AGCAAGCCTCGTCCCGCCGCTATTTCGCTGGCCAAACTTTCGCTTATTTTTTTCCTGCTG
+GGGGTGGCGTACGCTGCTATCAGGCGCATTATATTCATCGTATATCCGGCAATCCTCAGT
+GACGGCATGTTCAATTTTATTGTGATGCAAACGCTATTTTATGTGCCGTTTTTTATTCTC
+GGCGCGTTGGCCTTCATTCACCCCGATCTGAAAGCGCGCTTCACCACGCCCTCACGCGGA
+TGCACTTTAGGCGCTGCCGTTGCTTTTATCGCGTATCTGCTGAATCAACGTTATGGGAGC
+GGCGACGCCTGGATGTACGAAACCGAATCCGTGATTACGATGGTAATGGGGCTATGGATG
+GTGAACGTGGTATTTTCACTGGGGCATCGCTTGTTAAACTTTCAGTCCGCGCGTGTCACC
+TATTTCGTGAATGCTTCGCTGTTTATTTATCTGGTGCATCATCCCTTAACGCTTTTCTTT
+GGCGCGTATATTACACCGCATATCTCCTCCAACCTGATCGGGTTCTTGTGCGGGCTGATA
+TTTGTTATGGGTATTGCGTTAATTCTGTATGAAATTCATTTACGCATCCCGCTCCTGAAA
+TTTCTCTTTTCAGGTAAACCGCCGGTAAAACAAGAAAGCCGCGCCGCGATCGGGTAGATG
+AAACATAAACGACAAATGATGAAAATGCGTTGGTTGGGCGCAGCTATTATGTTAACGCTC
+TACGCATCATCGAGCTGGGCGTTCAGTATTGATGACGTGGCAAAACAAGCTCAATCTTTA
+GCCGGGAAAGGCTATGAGGCGCCTAAAAGCAACTTGCCCTCCGTTTTCCGCGACATGAAA
+TATGCGGATTATCAGCAGATCCAGTTTAACAGCGATAAAGCCTACTGGAACAACTTAAAG
+ACCCCTTTTAAGCTCGAATTTTACCATCAGGGGATGTACTTCGATACGCCGGTCAAGATT
+AACGAAGTGACGGCGACGACGGTCAAAAGAATCAAATACAGCCCGGATTACTTCAATTTT
+GGCAATGTTCAGCACGATAAAGACACGGTAAAAGATTTAGGCTTCGCCGGGTTCAAAGTC
+CTGTACCCCATTAACAGTAAAGATAAGAACGACGAAATCGTCAGTATGCTTGGCGCCAGC
+TATTTCCGCGTTATCGGCGCAGGCCAGGTGTATGGCTTATCTGCGCGCGGCCTGGCGATT
+GATACCGCCTTACCATCTGGTGAAGAGTTTCCCCGCTTTCGCGAGTTCTGGATTGAGCGT
+CCAAAACCCACCGATAAGCGTTTGACCGTCTATGCATTACTGGATTCTCCGCGCGCGACC
+GGCGCTTACCGTTTTGTGATCATTCCTGGCCGCGATACCGTGGTGGACGTGCAGTCAAAA
+GTCTATCTGCGCGATAAGGTGGGCAAGCTGGGCGTTGCGCCATTAACCAGTATGTTCCTG
+TTTGGGCCAAACCAGCCGTCGCCGACGACCAACTATCGTCCGGAATTGCATGACTCGAAC
+GGCTTATCCATTCATGCGGGTAATGGCGAGTGGATTTGGCGTCCGCTGAACAATCCAAAA
+CACCTCGCTGTGAGCAGCTATGCGATGGAAAACCCTCAGGGATTCGGCCTGTTGCAGCGT
+GGTCGCGAGTTCTCGCGCTTTGAAGATTTAGACGATCGCTATGACCTGCGTCCAAGCGCC
+TGGATTACCCCGAAAGGCGACTGGGGCAAAGGTAAGGTTGAACTGGTTGAAATTCCGACC
+AATGATGAAACCAACGATAACATCGTCGCTTACTGGACTCCGGATCAACTGCCGGAACCG
+GGTAAAGAGATGAACTTCAAGTACACTCTGACCTTCAGCCGCGATGAAGATAAACTTCAT
+GCGCCGGATAATGCCTGGGTGCTGCAAACACGCCGCTCAACGGGCGACGTTAAACAGTCG
+AATCTGATTCGCCAGCCCGACGGCACTATTGCCTTTGTGGTGGATTTCGTTGGCGCCGAC
+ATGAAAAAACTGCCGCCGGATACGCCCGTCGCTGCACAAACCAGCATTGGCGATAACGGT
+GAAATCGTTGACAGTAATGTACGCTATAACCCAGTCACTAAAGGCTGGCGTTTAATGCTG
+CGCGTGAAAGTCAAAGACGCGAAGAAAACCACGGAAATGCGTGCCGCATTGGTGAATGCC
+GATCAGACGCTAAGTGAAACCTGGAGCTACCAGTTACCTGCCAATGAATAAatgaataaa
+acaactgagtatattgacgcactgctgctttctgaacgtgagaaagcggcattgccgaaa
+actgacatccgcgccgtgcatcaggcgctggatgccgagcatcggacttactcgcgagaa
+gacgattcaccgcagggttccgtaaaagcccgccttgaacacgcctggccggattcattg
+gcgaaggggcagttaattaaagatgatgaagggcgcgatcagttgcaggctatgccaaaa
+gcgacgcgctcttcgatgtttcctgatccctggcgaaccaacccggttggccgtttctgg
+gatcgcctgcgtgggcgggatgttacgccgcgctatgtttctcgtctgacaaaagaagag
+caggcgagtgagcaaaaatggcgtaccgtcggcactatacgccgctatattttgttaatt
+ttgactctggcgcaaaccgtcgttgcgacctggtatatgaagaccattctgccctatcag
+ggatgggcgctcatcaatcctattgatatggtggggcaggatatttgggtctcctttatg
+cagctcctgccctacatgctgcataccggtatcctgattttgtttgccgtgctgttctgc
+tgggtgtctgccggattctggactgcgctgatgggcttcctgcaactgcttatcgggcgc
+gataagtacagtatctccgcgtctacggttggcgatgagcccctcaatccggaacaccag
+acggcgctgatcatgcctatctgtaatgaagacgttagccgcgttttcgccggtctgcgc
+gcgacctgggagtccgttaaagctacaggcaacgccgcgcattttgacgtctatatcctt
+agcgatagttataacccggatatttgcgtggcggagcaaaaggcgtggatggagctcatc
+gcggaagtgcagggcgaaggccatattttttaccgtcgccgccgccgccgtatgaaacgc
+aaaagcggcaatattgacgatttttgccgccgctggggcaatcagtacagctatatggtg
+gtgctggacgcggactcagtgattagcggcgagtgtctgagcgggctggtgcgcctgatg
+gaagcgaaccctaacgccgggattatccagtcttcgccgaaagcgtcggggatggatact
+ctgtatgcccgctgccaacagtttgcgacccgtgtttatggaccgctgtttaccgccggg
+ctgcacttctggcagttgggggattcgcactactgggggcacaatgccattatccgcgtg
+aagccgtttatcgagcactgcgctctggcgccgctgccgggagaaggttcgttcgccgga
+tcgattctttcccacgactttgttgaggcggcgctaatgcgtcgggcagggtggggcgtc
+tggattgcctacgatctccccggttcctatgaagagctgccgccaaacctgctggatgag
+cttaaacgcgaccgccgctggtgtcacggcaacctgatgaactttcgtctgttcctggtg
+aaaggaatgcacccggtgcatcgtgccgtgttcctgaccggggtaatgtcatacctgtcc
+gcgccgttatggtttatgttccttgcgctttctaccgcgctgcaggtcgttcatgcgtta
+acagagccgcaatatttccttcatccgcgccagctttttccggtctggccgcagtggcgt
+ccggaactggcaatcgcgctgtttgcgtcaacgatggtgctgctgttcctgccgaagctg
+ctcagtattatgctgatctggtgtaaaggcaccaaagagtatggcggtttctggcgcgtt
+acgctgtcgctattgctggaagttctgttctccgtgttgctggcgccggtgcgtatgctg
+tttcataccgtgtttgtggtcagtgcgttcctcggctgggaagtggtctggaactcaccg
+caacgcgacgatgattctacgccttggggagaagcctttatgcgtcacggctctcaactg
+ctgctggggctggtctgggcggttggtatggcgtggctggatttacgctttctgttctgg
+ctggcgccgattgtcttttcgcttattctgtcgccatttgtttcggtgatctccagtcgt
+tcaacggtaggattacgcaccaatcgctggaagctgttcctgatcccggaagagtattcg
+ccgcctcaggtgttggtcgatactgataaatatctggagatgaatcgccgccgtattctg
+gacgatggctttatgcatgcggtttttaacccgtcgcttaatgcgctggcgaccgcgatg
+gccaccgcgcgtcaccgcgccagtaaggtgctggaaatagcccgcgatcgtcatgtggag
+caggcgctaaacgaaacgccggataaactgaaccgcgatcggcgtctggttttgctcagc
+gatccggtgacgatggcgcgttttcactatcgggtctggaatgcgccagagagatactct
+tcctgggtaaaccattatcagtctctcgtcctgaatccgcaggcgttgcagggacgaaca
+tcgtcagcgggataaatgtcgcgcgtctcgnnggcgaggaacctgggtaaatattttctt
+ctcatcgataacatgttagtggtgctgggtnntttcgtcgtcttcccgctcatctctatt
+cgctttgtcgatcaaatggggtgggctgccnnaatggtagggatcgcgctcggcctgcgt
+cagtttattcaacaaggtctgggcatttttnncggcgccatcgccgatcgctttggcgcg
+aaaccgatgattgtcaccggtatgctgatgnncgccgcaggctttgccaccatgggtatc
+gcgcatgagccctggctcttgtggttttccnnctttctttccggtctcggcggtacgctt
+ttcgacccgccgcgttcagcgctggtggtcnnattaattcgtccggagcaacggggccgc
+ttcttctctctgttgatgatgcaggacagcnngggcgcggtgattggcgcgctgctggga
+agctggttgctacaatacgattttcgcctgnnctgcgcgacgggcgctattttgttcata
+ttatgcgcccttttcaacgcatggctgcttnnggcctggaagctatcaacggccagaacg
+ccggtgcgtgaaggaatgcgccgcgtcatgnncaataaaaggtttgtcacctacgtgctg
+acgctggcgggctactatatgctggcggtannggtcatgttaatgctgccgattatggta
+aacgatatcgccggttcgcctgctgccgtgnnatggatgtacgctattgaggcgtgtctc
+tcgctgacgttgctctacccgattgcccgcnngagcgaaaagcgttttcggctggagcat
+cggctgatggccggtttgctcgtcatgtcgnngagcatgctccccatcgggatggtgggc
+aatttacagcagctttttacgcttatttgcnntttctacatcggctcggttatcgccgaa
+ccggcgcgcgaaacgctcagcgcgtcgcccnnggacgcgagggcgcggggaagctatatg
+ggctttagccgtctgggattagccattggcnncgcgattagttatatcggcggcggctgg
+ttgtttgatatgggtaaagcgcttgcgcagnntgaactaccgtggatgatgctcggtatt
+atcggctttatcacctttttggctttaggcnngcaatttagtcataagcgcacgccgcgc
+cggatgctggaacccggcgcctgaatgaccaagtatgccacgctggaagaagctatcgat
+gcagcccgggaagaatttctggctgaccatcaaggcctcgaacaagacgaagcgaatgtg
+cagcagttcaacgttcagaaatatgtactgcaggatggggacatcatgtggcaggtcgaa
+tttttcgccgatgaaggtgaagatggcgaatatctgccgatgctgagtggtgaagccgca
+cagagcgtgtttgacggcgattatgatgagaaagagatccgccaggaatggcaggaagag
+aatactttgcatgaatgggatgaaggggaataccagcttgaacccccgcttgataccgag
+gaaggccgtactgcggcagacgaatgggatgagcgttaaATGTCACTATTAGCCAGGCTG
+GAACAAAGTGTACACGAAAACGGTGGGCTGATTGTCTCATGCCAACCGGTACCAGGCAGC
+CCTATGGATAAACCTGAAATTGTGGCTGCAATGGCACAGGCAGCGGCTTCGGCGGGTGCG
+GTCGCTGTGCGCATTGAAGGCATTGAGAATCTGCGGACTGTTCGTCCCCATCTTTCTGTT
+CCTATTATTGGGATAATTAAACGTGACCTTACAGGGTCGCCAGTCCGTATCACTCCATAT
+TTACAGGATGTTGACGCCCTGGCGCAGGCAGGTGCCGATATTATCGCTTTTGATGCCTCA
+TTCCGCTCTCGCCCGGTTGATATTGATAGTTTACTGACACGTATTCGCCTGCATGGATTA
+CTGGCGATGGCAGACTGTTCAACCGTGAATGAAGGCATAAGTTGCCATCAGAAAGGAATC
+GAATTCATTGGTACAACACTGTCTGGCTATACCGGTCCCATCACGCCGGTTGAGCCAGAT
+TTGGCAATGGTGACACAACTGAGTCATGCAGGTTGTCGTGTTATTGCCGAGGGGCGCTAT
+AACACGCCTGCACTGGCGGCCAATGCTATTGAGCATGGTGCCTGGGCAGTTACCGTTGGT
+TCCGCTATCACCCGTATCGAGCATATCTGTCAGTGGTTCAGTCACGCAGTAAAACGCTGA
+ATGAAAAATTTTAAGAAAATGATGACGCTAATGGCGCTATGTTTATCAGTTGCTATCACC
+ACATCAGGATATGCAACCACGCTTCCTGATATACCAGAACCACTGAAAAATGGTACTGGC
+GCTATTGATAATAATGGCGTGATTTATGTCGGCTTAGGTACCGCAGGGACATCCTGGTAT
+AAAATTGATCTTAAAAAGCAACATAAAGACTGGGAGCGTATAAAGTCGTTTCCTGGTGGA
+GCTCGTGAGCAATCCGTGTCGGTATTTTTAAATGATAAGCTGTATGTTTTTGGTGGCGTA
+GGGAAAAAAAACAGTGAATCACCGTTGCAGGTTTATAGCGATGTGTACAAATACTCACCG
+GTGAAAAATACATGGCAAAAAGTTGATACTATATCTCCAGTTGGATTAACAGGGCATACG
+GGAGTAAAATTAAACGAAACGATGGTACTTATTACCGGAGGGGTTAATGAGCATATCTTT
+GATAAGTATTTTATTGATATAGCGGCTGCGGATGAAAGTGAAAAAAATAAAGTCATCTAT
+AATTATTTTAATAAACCTGCCAAAGATTATTTTTTTAATAAAATCGTATTTATCTACAAT
+GCTAAAGAGAACACATGGAAGAATGCCGGTGAGCTGCCAGGCGCGGGGACGGCAGGATCG
+TCATCGGTAATGGAAAATAATTTCTTGATGCTGATTAATGGTGAGCTCAAACCGGGTTTA
+CGTACCGATGTGATTTACCGCGCCATGTGGGATAACGATAAGCTAACATGGTTGAAGAAC
+AGCCAGTTACCGCCATCGCCTGGAGAACAACAGCAGGAAGGGTTGGCCGGAGCATTTTCG
+GGCTATAGCCACGGTGTCCTGCTTGTCGGTGGTGGCGCGAATTTTCCGGGAGCAAAACAA
+AATTATACTAATGGAAAGTTTTATTCCCACGAAGGGATAAATAAAAAATGGCGAGATGAA
+GTCTATGGTTTGATTAATGGCCATTGGCAATATATGGGTAAAATGAAACAACCTCTCGGC
+TATGGTGTATCAGTAAGTTATGGTGATGAAGTTTTCCTTATTGGTGGTGAAAATGCTAAA
+GGGAAACCTGTTTCGTCTGTAACCTCCTTTACCATGCGTGATGGTAATTTATTAATAAAA
+TAAGTGATAGCAAAATTCTTCCCGTGGTATAGCGAGATAACACGTCCACAAAAAAATGCT
+TTATTTTCAGCATGGCTGGGTTACGTTTTTGATGGCTTCGACTTTATGCTGATTTTCTAC
+ATTATGTATCTGATCAAGGCTGACTTAGGATTGACAGATATGGAGGGCGCATTCCTTGCC
+ACAGCGGCCTTTATTGGGCGACCATTTGGCGGGGCGCTATTTGGTCTGCTGGCAGACAAA
+TTTGGCCGTAAGCCGTTAATGATGTGGTCGATAGTTGCCTATTCTGTAGGTACAGGGTTA
+AGTGGCCTGGCTTCCGGTGTAATTATGCTGACGCTTAGTCGTTTTATTGTCGGTATGGGG
+ATGGCGGGGAAGTATGCTTGCGCTTCTACTTATGCCGTGGAAAGTTGGCCAAAGCATTTA
+AAATCTAAAGCGAGCGCATTTCTGGTTTCAGGTTTCGGTATTGGTAACATCATAGCAGCC
+TATTTTATGCCGTCATTTGCCGAAGCGTATGGTTGGCGTGCTGCTTTTTTTGTCGGTTTG
+CTACCCGTTCTTTTAGTAATCTACATCCGGGCCAGGGCTCCTGAATCTAAAGAGTGGGAA
+GAAGCCAAACTCAGTGGTCTCGGAAAGCATTCACAAAGTGCCTGGTCAGTTTTCTCTTTG
+TCAATGAAAGGGCTATTTAATCGAGCTCAATTTCCACTGACATTATGTGTATTTATTGTT
+CTGTTCTCTATTTTCGGCGCAAACTGGCCGATCTTTGGTCTACTGCCTACATATTTGGCG
+GGAGAGGGCTTTGATACGGGCGTGGTCTCTAATTTAATGACGGCGGCGGCATTCGGCACT
+GTATTGGGAAATATCGTTTGGGGTCTGTGCGCAGATAGAATTGGTTTGAAGAAAACGTTC
+AGCATTGGTCTTCTCATGTCCTTTTTATTCATTTTCCCGTTATTCAGAATTCCGCAAGAT
+AATTATTTACTGCTGGGCGCATGTTTATTCGGTTTAATGGCGACTAACGTAGGTGTTGGC
+GGGCTGGTTCCCAAATTTCTCTACGACTACTTTCCTCTTGAGGTTCGTGGTTTGGGTACC
+GGGCTTATTTATAATCTTGCTGCGACATCAGGCACATTCAATTCAATGGCGGCGACCTGG
+CTTGGAATAACAATGGGGCTAGGCGCTGCGCTAACGTTCATTGTTGCTTTCTGGACCGCA
+ACAATTCTACTCATTATTGGCCTATCCATTCCGGATAGACTAAAAGCACGTCGTGAAAGG
+TTTCAGTCAACAAAAGAATTTTAAATGAAAAAGTATCTTGCTTTCGCCGTTACGCTGCTG
+GGTATGGGTAAAGTCATCGCCTGTACTACCCTTTTGGTAGGCAATCAGGCTTCGGCTGAC
+GGCTCCTTTATTATCGCGCGCAACGAGGATGGCTCGGCAAATAACGCCAAGCATAAGGTT
+ATTCATCCCGTCGCGTTTCATCAACAAGGCGAGTATAAAGCACATCGCAACAATTTTAGC
+TGGCCGCTTCCGGAGACAGCGATGCGCTATACGGCGATTCATGACTTTGATACTAACGAT
+AACGCCATGGGTGAAGCCGGTTTCAATTCGGCGGGCGTCGGAATGAGCGCAACGGAAACC
+ATTTACAACGGCAGAGCGGCGCTGGCTGCCGATCCTTACGTGACAAAAACGGGAATCACG
+GAAGACGCCATTGAGTCCGTGATCCTGCCAGTGGCGCAATCGGCGCGTCAGGGCGCCAAA
+TTACTGGGAGATATTATTGAACAAAAAGGCGCGGGCGAAGGTTTCGGCGTCGCGTTTATT
+GATAGCAAAGAGATATGGTATCTGGAGACGGGAAGCGGACATCAATGGCTGGCAGTACGA
+CTTCCGGCAGATAGCTATTTCGTTTCCGCCAATCAGGGACGTTTACGCCATTACGATCCG
+AATGATAACGCGAATTATATGGCGTCACCAACGTTAGTAAGCTTTGCGAAAAAGCAGGGA
+TTATATGATCCGGCCCGCGGCGAATTCGACTTTCATCAAGCCTATTCGCAGGATAACAAA
+AACGATACCACCTATAATTATCCGCGCGTCTGGACGCTACAACACCAGTTTAATCCGCAT
+CTGGATACGGTCGTTAGCGAAGGGGAAACATTTTCTGTTTTTTTAACGCCAATAACGAAG
+ATCAGCGTGGCGGCAGTAAAAAACGCGTTACGCAATCACTATCAGGGAACGTCGCACGAC
+CCTTATGCCAGTCATAATCCACAAGAACCATGGCGACCTATATCCGTTTTTCGTACCCAG
+GAGTCACATATTTTACAGGTCAGACCGAAATTACCGCAGGCTATCGGCAACGTAGAATAC
+ATCGCCTATGGAATGCCATCTCTTAGCGTCTATCTCCCCTATTACCAGGGGATGCGTCAT
+TATCAACCCGGAGATGATAAAGGAACCGATCGGGCGAGCAACGACTCTACCTACTGGACA
+TTCCGCACGCTGCAAACACTGGTTATGCAAGACTACAATACGTTTGCGCCAGATGTGCAA
+CATGCCTGGAAAACATTTGAACAGCAAACAGCTAAGCAACAGTATAAGATGGAGCAGAGC
+TATCTGAGATTATATGCGTCGCATCCGAAAGAAGCACAACGCTTACTGCAAAATTTTGAA
+GATAAAACGATGCAAAATGCGCAGACGCTCGCCCGTCGCCTGACCAATAATATTATTACG
+ACAATGACTTACCGCACAGATATGAAATATCACTTTTCAAGTACGCAGCCATAAATGGGA
+AGACAAAAAGCAGTGATCAAAGCTCGTCGTGAAGCAAAGCGTGTGTTGAGACGAGATTCG
+CGTAGTCATAAGCAACGTGAAGAAGAATCGGTCACGTCACTGGTACAGATGGGCGGAGTA
+GAAGCCATTGGCATGGCGCGCGATAGTCGCGATACCTCTCCTGTTAAGGCGCGAAATGAA
+GCACAGGCGCATTATCTGAACGCTATCGACAGTAAACAGCTTATTTTTGCGACCGGCGAA
+GCCGGCTGCGGAAAAACATGGATCAGTGCGGCAAAGGCGGCAGAAGCATTGATTCATAAG
+GACGTCGAGAGGATCATTGTGACGCGTCCGGTATTGCAGGCTGATGAAGATCTTGGTTTT
+TTGCCCGGTGATATCGCTGAAAAATTCGCGCCTTATTTTCGTCCCGTCTACGATGTCCTG
+CTTAAACGGTTGGGCGCGTCCTTTATGCAATATTGTTTGCGCCCGGAAATCGGTAAGGTA
+GAAATTGCCCCGTTCGCCTATATGCGTGGGCGTACTTTTGAAAATGCGGTCGTGATCCTC
+GACGAGGCGCAAAATGTGACTGCGGCGCAAATGAAAATGTTTTTGACGCGATTAGGCGAA
+AATGTCACGGTCATTGTCAATGGCGATATTACGCAATGCGACCTGCCGCGCGGTGTGCGT
+TCCGGGTTGAGTGATGCGTTGGAACGCTTTGAAGAAGATGAAATGGTGGGGATTGTGCAT
+TTCAACAAAGACGACTGCGTGCGCTCGGCGCTTTGTCAGCGAACGCTCCACGCATACAGC
+TAAATGGGAACCACCACGATGGGGGTTAAGCTGGACGACGCCACGCGCGAACGGATCAAA
+ATGGCCGCGTCGCGTATCGATCGCACGCCGCACTGGTTAATAAAACAGGCAATCTTTAGC
+TATCTGGACAAGCTGGAAAATAGCGATACGCTACCGGAGCTACCTGCGCTGTTTGCCGGC
+GCGGCAAATGAAAGCGAGGAGCCGGTCGCGCCGCAGGATGAGCCGCATCAGCCCTTTCTG
+GAGTTTGCCGAACAGATTCTTCCCCAATCCGTCTCTCGCGCCGCCATCACCGCCGCCTGG
+CGCCGCCCGGAAACCGATGCGGTGTCAATGCTAATGGAACAGGCGCGCCTGTCGCCGCCT
+GTCGCTGAGCAGGCGCATAAACTGGCGTATCAACTGGCGGAGAAATTGCGCAATCAAAAA
+TCCGCCAGCGGTCGCGCGGGTATGGTGCAAGGCCTGTTGCAGGAGTTTTCCCTCTCTTCG
+CAAGAAGGCGTAGCGCTGATGTGTCTGGCGGAAGCGCTGCTGCGTATTCCCGACAAAGCT
+ACGCGCGATGCGTTAATTCGCGACAAAATCAGTAATGGCAACTGGCAGTCGCATATTGGC
+CGTAGCCCGTCGCTGTTTGTAAACGCCGCCACCTGGGGGCTGCTCTTTACCGGCCGACTG
+GTCTCAACGCATAACGAAGCCAATCTTTCGCGCTCGCTGAACCGCATTATCGGCAAGAGC
+GGCGAACCGTTAATCCGCAAAGGCGTCGACATGGCGATGCGTTTAATGGGCGAGCAGTTC
+GTGACTGGCGAAACCATTGCTCAGGCGCTGGCGAATGCCCGAAAACTGGAAGAGAAAGGG
+TTCCGCTATTCTTACGATATGCTGGGCGAAGCCGCGTTAACCGCCGCCGATGCGCAGGCC
+TATATGGTCTCTTACCAGCAAGCGATTCATGCCATCGGCAAAGCGTCTAACGGTCGCGGT
+ATTTACGAAGGGCCAGGCATCTCGATTAAGCTGTCCGCCCTGCATCCACGCTATAGTCGC
+GCGCAATACGATCGGGTAATGGAGGAGCTTTATCCGCGCCTGAAATCCCTGACGCTGCTG
+GCGCGCCAGTATGATATCGGTCTCAATATCGACGCCGAAGAGGCGGATCGTCTGGAGATC
+TCGCTTGATCTGCTGGAAAAACTCTGCTTCGAACCCGAACTGGCGGGCTGGAACGGCATT
+GGCTTTGTGATTCAGGCTTACCAGAAACGCTGCCCGCTGGTCATTGATTATTTAGTCGAT
+CTGGCCTCCCGTAGCCGCCGTCGGCTGATGATTCGTCTGGTGAAAGGCGCCTACTGGGAT
+AGCGAGATCAAACGCGCGCAAATGGAAGGGCTGGAGGGCTATCCAGTTTATACCCGCAAA
+GTGTATACCGATGTCTCTTATCTGGCCTGCGCGAAAAAACTGCTCGCCGTCCCTAATCTG
+ATCTACCCGCAGTTCGCGACCCATAACGCTCACACACTGGCGGCGATTTATCATCTGGCC
+GGGCAAAATTACTATCCGGGTCAGTACGAATTCCAGTGCCTGCACGGCATGGGAGAACCG
+CTGTATGAACAGGTCACCGGTAAAGTGGGGGACGGAAAACTTAACCGTCCCTGCCGTATT
+TACGCGCCGGTGGGAACACACGAAACCCTGCTGGCCTATCTGGTACGACGCCTGCTGGAA
+AACGGCGCCAACACCTCTTTTGTCAACCGCATCGCCGATGCCACCCTACCGCTCGATGAA
+CTGGTGGCCGACCCGGTCGAGGCCGTGGAAAAACTGGCGCAGCAGGAAGGTCAGGCTGGC
+ATACCGCATCCAAAAATTCCGCTGCCGCGCGATCTGTACGGCGAAGGTCGGATAAACTCC
+GCCGGACTTGATTTAGCGAATGAACATCGCCTCGCCTCGCTTTCTTCTGCCCTGTTAAGC
+AACGCCATGCAGAAATGGCAGGCCAAACCTGTGCTGGAACAACCGGTGGCCGACGGTGAG
+ATGACGCCGGTTATCAACCCGGCGGAACCGAAAGATATTGTTGGCTGGGGACGCGAAGCG
+ACAGAAAGCGAGGTTGAACAGGCGTTGCAAAACGCGGTCAATCAGGCGCCGGTTTGGTTT
+GCGACGCCGCCGCAAGAACGCGCCGCTATTTTGCAGCGGGCGGCGGTATTGATGGAAGAC
+CAAATGCAGCAGTTGATTGGCCTGTTGGTGCGTGAAGCGGGGAAAACGTTCAGCAACGCC
+ATTGCCGAAGTGCGCGAAGCGGTAGACTTCCTCCATTATTATGCCGGTCAAGTGCGTGAC
+GATTTCGATAACGAAACGCATCGCCCGTTAGGGCCGGTGGTCTGTATCAGTCCGTGGAAC
+TTTCCGCTGGCCATTTTCACTGGCCAAATCGCCGCCGCGCTGGCGGCAGGTAACAGCGTT
+CTGGCGAAACCGGCAGAGCAGACATCGCTGATTGCCGCCCAGGGCATTGCCATTTTGCTG
+GAAGCGGGCGTACCGCCGGGCGTCGTGCAACTGTTGCCGGGACGGGGAGAAACCGTCGGC
+GCCCAGCTTACCGCCGATGCGCGTGTACGCGGCGTGATGTTTACCGGTTCCACGGAGGTC
+GCGACGTTGTTGCAGCGCAACATCGCCACGCGTCTTGACGCCCAGGGGCGCCCTATTCCG
+TTGATTGCGGAAACCGGCGGTATGAACGCTATGATTGTCGACTCTTCCGCGCTCACCGAG
+CAGGTGGTCGTGGATGTGCTGGCTTCCGCCTTCGACAGCGCCGGACAACGCTGTTCCGCG
+CTCCGCGTGCTGTGTTTGCAGGACGATATCGCCGAACATACGCTGAAAATGTTACGCGGC
+GCGATGGCGGAGTGTCGGATGGGGAATCCAGGCCGTCTGACGACCGATATCGGGCCGGTG
+ATCGATAGCGAGGCCAAAGCCAACATTGAACGTCATATCCAGACGATGCGCGCCAAAGGC
+CGCCCGGTTTTCCAGGCCGCGCGTGAAAACAGCGATGACGCGCAGGAATGGCAGACCGGT
+ACGTTTGTTATGCCCACGCTTATTGAGCTGGAAAACTTCGCAGAACTGGAAAAAGAGGTC
+TTCGGGCCCGTGCTGCACGTCGTGCGTTATAACCGTAACCAACTGGCGGAGCTTATCGAA
+CAGATTAACGCTTCCGGCTACGGGCTAACGCTGGGCGTACATACCCGTATTGATGAAACC
+ATTGCGCAAGTCACCGGTTCCGCCCATGTCGGCAACCTGTACGTTAACCGTAATATGGTG
+GGCGCGGTCGTCGGCGTCCAGCCGTTTGGCGGCGAAGGCCTGTCCGGCACCGGGCCAAAA
+GCGGGAGGGCCGCTCTATCTCTACCGCCTGCTGGCACACCGCCCGCCCAATGCGCTCAAT
+ACGACGCTGACTCGTCAGGATGCGCGTTACCCGGTGGATGCGCAGCTTAAAACCACGCTA
+CTCGCGCCGTTGACCGCTCTGACGCAATGGGCGGCGGATCGCCCGGCGCTACAGACGCTC
+TGCCGACAATTCGCCGATCTGGCGCAGGCCGGCACGCAGCGCCTGCTACCGGGGCCGACC
+GGCGAGCGTAATACCTGGACGCTGTTGCCGCGTGAACGGGTGTTATGCCTGGCTGATGAT
+GAACAGGACGCGTTGACGCAGCTTGCCGCCGTTCTCGCCGTCGGCAGTCAGGCGCTATGG
+TCAGACGACGCCTTCCACCGCGATCTGGCGAAACGTCTCCCCGCCGCCGTCGCGGCGCGT
+GTCCAGTTTGCGAAAGCGGAAACGCTGATGGCGCAGCCGTTTGACGCGGTGATTTTCCAC
+GGCGACTCCGACAAGCTGCGAACCGTGTGCGAAGCCGTCGCCGCCCGCGAAGGCGCGATA
+GTGTCGGTACAGGGGTTCGCCCGCGGCGAAAGCAATATGCTGCTGGAACGGCTCTATATT
+GAACGTTCGCTGAGCGTAAACACTGCCGCCGCTGGCGGTAATGCCAGCCTGATGACAATT
+GGCTAAATGGCTATTAGCACACCGATGTTGGTGACATTCTGTGTCTATATTTTTGGCATG
+ATATTGATTGGGTTTATCGCCTGGCGCTCAACCAAAAACTTTGATGACTATATTCTTGGC
+GGTCGCAGCCTGGGGCCGTTTGTTACGGCTTTATCAGCCGGCGCGTCGGATATGAGCGGC
+TGGCTGTTAATGGGGCTGCCTGGCGCTATCTTTCTGTCGGGGATCTCTGAAAGCTGGATC
+GCCATTGGCCTGACGTTAGGCGCATGGATTAACTGGAAGCTGGTGGCCGGGCGCCTGCGC
+GTGCATACCGAATTTAACAATAACGCGCTCACGCTGCCGGACTATTTTACCGGTCGGTTT
+GAGGATAAGAGCCGAGTCCTGCGTATTATTTCCGCGCTGGTCATTCTGCTGTTTTTCACT
+ATCTATTGCGCATCAGGTATTGTCGCTGGGGCACGACTGTTCGAAAGCACCTTCGGTATG
+AGCTATGAAACCGCACTGTGGGCGGGGGCCGCGGCAACCATTATTTATACCTTTATCGGC
+GGGTTTCTTGCCGTTAGCTGGACGGATACCGTTCAGGCCAGCCTGATGATTTTTGCGTTA
+ATCCTGACGCCGGTGATGGTTATTGTCGGCGTAGGCGGTTTTAGCGAGTCGCTGGAAGTG
+ATCAAGCAAAAGAGCATCGAGAATGTCGACATGCTCAAGGGGCTGAATTTTGTCGCTATT
+ATTTCTCTGATGGGCTGGGGGCTGGGTTACTTCGGTCAGCCGCATATCCTGGCGCGCTTT
+ATGGCGGCGGATTCCCATCACAGTATTGTTCATGCGCGTCGTATCAGTATGACCTGGATG
+ATTCTGTGTCTGGCGGGCGCGGTGGCGGTGGGCTTCTTTGGCATTGCGTACTTTAACAAT
+AACCCCGCGCTGGCCGGGGCGGTGAACCAAAACTCAGAACGCGTATTTATTGAACTGGCG
+CAGATCCTGTTTAACCCGTGGATTGCCGGTGTTCTGCTGTCTGCTATCCTGGCGGCGGTG
+ATGTCGACGTTGAGCTGTCAGTTGCTGGTATGCTCCAGCGCGATTACGGAAGATTTATAT
+AAGGCTTTTCTGCGTAAAAGCGCCAGCCAGCAAGAGCTGGTATGGGTAGGGCGAGTGATG
+GTGCTGGTGGTAGCGCTGATCGCCATTGCGCTGGCGGCGAATCCTGATAACCGTGTGCTG
+GGGCTGGTGAGCTACGCCTGGGCTGGATTCGGCGCGGCATTTGGACCTGTTGTCCTGTTT
+TCTGTGATGTGGTCGCGTATGACACGTAACGGCGCGCTGGCGGGAATGATTATTGGCGCG
+GTGACGGTTATCGTCTGGAAACAATATGGCTGGCTGGATCTGTATGAGATTATCCCTGGC
+TTCATTTTCGGCAGCCTGGGGATCGTAATCTTTAGCCTGCTTGGCAAAGCGCCGACAGCA
+ACGATGCAGGAACGCTTTGCAAAAGCGGACGCGCATTATCATTCCGCGCCGCCGTCGAAG
+CTACAGGCGGAATAAATGGCGGGTAAACTGCGGCGTTGGCTGCGTGAAGCCGCGGTTTTT
+CTGGCGCTCCTCATCGCGATAATGGTGGTCATGGACGTCTGGCGCGCGCCGCAGGCGCCT
+CCGGCGTTTGCCGCGACACCATTACATACGCTGACGGGAGAGTCGACAACTCTGGCGACC
+TTGAGCGAGGAACGCCCCGTACTGCTCTATTTTTGGGCCAGCTGGTGCGGGGTATGCCGC
+TTTACCACGCCTGCGGTCGCTCACCTGGCGGCGGAAGGGGAAAACGTCATGACCGTTGCG
+CTCCGCTCCGGCGGTGATGCTGAGGTTGCCCGCTGGCTGGCGCGCAAGGGCGTTGACTTC
+CCGGTCGTCAATGATGCTAACGGCGCCTTATCCGCTGGCTGGGAAATCAGCGTGACGCCA
+ACGCTGGTGGTGGTTTCACAAGGTCGGGTTGTGTTCACCACCAGCGGCTGGACCAGCTAT
+TGGGGCATGAAGCTTCGGCTGTGGTGGGCAAAAACGTTCTGAATGATGAAAAAAAGCGTC
+GCTATGCTGGCGGTTTGTATGCTGGCGCAAAGCCACCTTGCCATTGCTGCCGGTGCTCCT
+GCGCCTCAAGAGATCAACATTGTTTTACTGGGCACCAAAGGCGGGCCTTCTTTGCTCAAT
+ACAGCCAGACTACCGCAAGCGACGGCGCTCACTATCGGCGATAAGATATGGCTGATAGAT
+GCCGGCTACGGCGCCAGTCTGCAACTGGTGAAAAATGGCATTCCACTGCGCAACATCAAT
+ACTATTTTGCTCACCCATCTGCACAGCGACCACATACTGGATTATCCTTCCTTGCTGATG
+AATGCCTGGGCAAGTGGCCTGAAAGACCATACCATACAGGTTTATGGCCCGCCGGGAACC
+CAGGCGATGACGAAGGCTAGCTGGAAGGTCTTTGACAGGGATATCACGTTACGCATGGAA
+GAAGAGGGGAAACCCGATCCGCGCAACCTGGTTAAGGCGACCGATATCGGCCAGGGCGTC
+ATCTATAAAGATGAACTGGTCACAATAAGCGCGCTGAAAGTGCCTCATTCCCCTTTCCCG
+GACGGTGAAGCGTTTGCTTACCGTTTTGATACTCAGGGTAAGCGAATCGTCTTCTCTGGC
+GATACGTCCTGGTTTCCTCCGCTTGCAACGTTTGCCCAGGGGGCGGATATCCTGGTACAT
+GAGGCGGTACATGTCCCTTCGGTAGCAAAACTGGCTAATAGTATTGGCAACGGAAAAACG
+CTGGCTGAAGCGATTGCGTCGCATCACACCACGATTGAAGATGTCGGTAAGATTGCTCGC
+GAGGCCCACGTGAAAAAACTGGTGTTAAGTCATCTGGTGCCTGCGACGGTTGCGGATGAC
+GTCTGGCAACAGGAAGCCATGAAAAATTACCCGGGCCCTGTCATTGTCGGTCATGACAAT
+ATGACGATAAGCGTACCGTAAATGTCGCAACGCACAGAGAAAAAAATCGGGAAACGTTCG
+CAGGCCACCGGTGCAAAACGGCAGCTTATCTTAACCGCCGCGCTTGCCGTTTTTTCCCAG
+TATGGCATTCATGGCGCGCGTCTTGAACAGGTCGCCGAGCGGGCAGGCGTCTCCAAAACC
+AATCTGCTTTATTATTATCCCTCGAAAGAGGCGCTGTATGTCGCGGTAATGCGACAGATT
+CTGGATGTCTGGTTGGCGCCGCTCAAGGCGTTTCGCGCAGAATTTTCCCCTCTGGAGGCC
+ATCAAAGAGTATATCCGTCTCAAGCTGGAGGTTTCGCGTGATTATCCGCAGGCGTCGCGG
+CTCTTCTGCATGGAGATGCTGGCGGGCGCGCCGCTCTTAATGGATGAACTGACCGGCGAT
+CTAAAAGCGTTGATAGATGAAAAATCCGCGCTGATTGCCGGATGGGTGCACAGCGGGAAA
+CTCGCGCCCGTTTCTCCGCATCATTTGATCTTCATGATTTGGGCCGCCACGCAACATTAC
+GCCGATTTCGCCCCTCAGGTTGAAGCGGTAACCGGCGCGACGCTTCGCGATGAAGCCTTT
+TTCAACCAAACGGTCGAAAGCGTTCAGCGCATTATTATTGAAGGGATTCGCGTGCGTTAA
+ATGGCGAAACAACAACGGATGGGCTGGTGGTTTCTTTGCCTTGCATGTGTCGTGGTAATG
+GTTTGTACCGCGCAACGCATGGCGGGCCTGCACGCCTTGCAGATGCAGGCGACGGCCTCT
+GCTGCGGTGGTCAGCGCTCCCTCCTCGACAGATGACGGCTCGCCGGTCACTCCCTGCGAA
+TTAAGCGCCAAGTCGCTGCTGGCGGCGCCTCCAGTACTCTTTGAAGGTGCTATCCTTGCG
+CTTTATCTACTGCTTTCCTTACTGGCGCCTGTCCGGGTCATGCGCCTGCCGTTTTCGCCT
+CCACGGGCTATTTCGCCGCCCACATTACGGGTACATCTACGATTTTGTGTCTTCCGTGAA
+TGAATGATGATTTTATTCAGGCGGATACTGTTCTGCCTGTTATGGCTTTGGCTGCCCGTC
+TCCTGGGCGGCGGAAAGCGGCTGGCTGCGTTCGCCCGATAACGACCATGCCAGCATACGG
+CTACGTGCCGATACGTCCGCTAACAGTGAGACCCGGCTGTTGCTGGATGTCAAACTGGAA
+AACGGCTGGAAAACCTACTGGCGCGCGCCGGGGGAAGGGGGCGTGGCACCCTCTATCGCC
+TGGAAAGGCGACATGCCTGAGGTAAGCTGGTTCTGGCCAACCCCCTCGCGCTTTGATGTG
+GCGAATATCACCACCCAGGGATATCACGACGAGGTGACCTTTCCGATGATCGTGCGCGGT
+ACGCCGCCGGCGACCTTGCGCGGTGTGTTGACGTTATCAACCTGCAGCAATGTTTGTCTG
+TTGACCGATTACCCCTTTTCCGTGACGCCCACTGTGCAGAATGCCGATTTTGCCCATGAC
+TATGCGCGGGCGATGGGTAAAGTTCCGCTCCGCAGTGGGCTAACGGACTCGCTTGACGTT
+GGCTATCGCCCGGGAGAACTGGTGGTCACTGCTACGCGAGCGGCGGGCTGGTCATCGCCC
+GGGCTCTATCTTGACACCATAGATGACGTCGATTTTGCGAAGCCTCGCCTGCGCGTAGAG
+GGCGACAGGTTACAGGCGACGGTGCCGGTGACGGACAGTTGGGGCGAAAAGGCGCCCGAT
+TTGCGCGACAAATCGCTGACCCTCGTGTTAGCCGATGGCGCTATCGCCCAGGAGAGCACG
+CAAACCATTGGCGCTGCGCCAGCGCAAACGCCGGACAATGCGGCGCTACCTTTCTGGCAA
+GTTGTAATGATGGCGCTAATCGGCGGACTGATTCTTAATTTAATGCCCTGCGTACTGCCT
+GTTCTGGGCATGAAACTTGGCTCTATTTTATTGGTAGAGGAAAAAAGCCGCTCTCACATC
+AGGCGACAATTTTTGGCTTCGGTCGCCGGTATCATTGCGTCATTTATGGCGCTGGCGGCG
+TTTATGACCCTCCTTCGCCTGTCAAACCATGCGCTGGCCTGGGGAGTCCAGTTCCAGAAT
+GCATGGTTTATTGGTTTTATGGCGCTGGTGATGTTGTTGTTTAGCGCCAGCCTGTTCGGG
+CTTTTTGAGTTCAGGCTTCCCTCATCTATGACCACGAAACTGGCCACTTACGGCGGTAAC
+GGTATGTCGGGACATTTCTGGCAGGGGGCGTTCGCCACGCTGCTGGCGACGCCTTGTAGC
+GCGCCGTTTCTGGGCACGGCGGTCGCGGTGGCGCTCACGGCGTCGCTGCCGACGCTGTGG
+GGGCTGTTCCTTGCGCTTGGCCTGGGAATGAGCGCGCCGTGGCTACTGGTCGCGATACGA
+CCAGGGCTTGCGCTACGTTTACCGCGCCCCGGGCGTTGGATGAATGTCCTGCGCAGGATC
+CTCGGTCTGATGATGCTGGGGTCGGCTATCTGGCTGGCGACGTTACTCCTGCCGCATTTC
+GGCTTCACTGCGTCAAAGAGCGCGCAAGACACGGTTCAGTGGCAACCGTTGAGTGAACAG
+GCAATCCAGTCGGCGCTGGCGCAGCATAAGCGGGTATTTGTCGATGTCACTGCGGACTGG
+TGTATTACCTGTAAAGTGAATAAATACAACGTCCTGCAAAAAGAGGATGTGCAGGCCGCC
+TTGCAACAGCCGGATGTTGTGGCGCTGCGGGGAGACTGGACGCTGCCGTCCGATGCCATT
+ACAGATTTTCTGAAAACGCGCGGCCAGGTCGCCGTGCCGTTTAATCAGGTATATGGCCCC
+GGTTTGCCGGAAGGGGAGGCACTGCCCACTTTGCTGACCCGCGATGCGGTATTACAAACG
+TTGAAAAAAGCGAAAGGAATAACCCAATGAATGAAATACATGATTGTTTTACTGCTGGCG
+CTGTTTTCGACGCTGAGCATCGCGCAAGAAACCGCTCCTTTTACGCCGGATCAGGAAAAG
+CAGATTAAAAATCTGATCCATGCGGCGTTGTTTAACGATCCTGCCAGCCCGCGGATAGGC
+GCTAAACACCCTAAGCTGACGCTGGTGAACTTTACGGATTACAACTGCCCGTACTGCAAA
+CAGCTCGATCCGATGCTGGAAAAGATTGTGCAGAAATATCCTGACGTTGCGGTCATTATT
+AAACCGCTGCCATTCAAAGGAGAGAGTTCCATACTGGCGGCGCGTATTGCGCTGACCACC
+TGGCGCGATCATCCGCAACAGTTCCTCGCGCTACATGAAAAACTTATGCAAAAGCGCGGT
+TACCATACGGATGACAGTATTAAACAGGCCCAGCAGAAAGCAGGGGCGACGCCAGTGACG
+CTGGATGAAAAAAGCATGGAAACGATACGCACTAATTTGCAGTTGGCAAGACTGGTCGAC
+GTGCAAGGAACGCCAGCGACGATCATTGGCGACGAGCTGATTCCGGGCGCAGTGCCCTGG
+GATACGCTGGAAGCGGTGGTGAAAGAAAAACTGGCGGCTGCCAATGGCGGGTAAatgatt
+gcacattctttcggcatcgttaattattttgtattatttggctacctcctggccatgatg
+gtagtcggtgtctatttttccagacggcaaaaaacagcagacgattattttcgcggtggt
+ggccgggttcctggttgggcggctggggtcagtgtatttgctactacgttaagctcaatt
+gcatttatgtcaattcctgccaaagcgtttacttccgactggacgtttatcattggtcag
+gatctggctatcgcaattttaccgctggttttttatttctatattccgttttttcggaaa
+gtgaaagtcacatcagcctatgaatatctcgaagcacggttcgatgtgcgctgccgtctg
+gtcgccagcatgtcatttatgttgtttcatattggacgtatcgccattatcactttcctc
+gccgtgctggccttgcgccccttcatcgctatagacccggtgattttggtactgttgatt
+ggtgtgatgtgtatcatttatacctggatgggggggaattgaatggaaagtctattaaat
+cgtttatatgacgcgttaggcctggatgcgccagaagttgagccactgcttatcattgat
+gatgggatacaggtttattttaatgaatccgatcatatactggaaatgtgctgtcccttt
+atgccactgcctgacgacactctgactttgcagcattttttacgtcttaactacgccagc
+gccgtcactatcggcgctgatgcagacaatactgctttagtggcgctttatcgcttgccg
+caaaccagtaccgaagaagaggcgctcactggttttgtattattcatttcaaacgtgaag
+caattgaaagagcattatgcataaATGAAATACGACCTTATTATTATCGGCAGCGGTTCG
+GTTGGCGCCGCCGCTGGTTATTACGCCACCCGCGCCGGGCTAAAGGTCCTGATGACCGAT
+GCGCATATGCCGCCTTATCAACAGGGCAGCCACCACGGCGATACCCGTCTTATCCGCCAC
+GCTTATGGTGAAGGCGAAAAATATGTCCCGCTGGTGCTTCGCGCCCAGACGCTTTGGGAT
+GAGCTCTCCACACACAATGAAGAGCCTATTTTTGTCCGCTCCGGCGTCGTCAACCTCGGC
+CCGGCCGATTCCGCTTTCTTAGCCAACGTCGCACGAAGCGCGCAACAGTGGCAATTGAAC
+GTCGAGCGCCTGGACGCGACGGCCCTCATGACGCGCTGGCCGGAAATTCGCGTGCCCGAT
+AATTATATCGGGCTGTTTGAAGCTGACTCCGGTTTCCTGCGCAGCGAATTAGCCATTACC
+ACATGGCTTCGTCTGGCCCGAGAGGCAGGCTGCGCACAGCTATTCAACAGCCCGGTAAGC
+CATATTCACCATGATGATAACGGTGTGACGATAGAGACGAGTGAAGGCTGCTACCACGCC
+AGCAAAGCGCTGATTAGCGCGGGCACCTGGGTCAAAACGCTGGTACCGGAGCTGCCCGTT
+CAGCCCGTACGTAAAGTTTTTGCCTGGTTTAAGGCGGATGGACGTTACAGCACTAAAAAC
+CGCTTTCCGGCCTTTACCGGCGAAATGCCCAACGGCGATCACTATTACGGTTTCCCGGCG
+GAGAACGACGAGTTAAAAATCGGCAAACACAATGGCGGGCAGCGAATACAGGCACCGGAA
+GAGCGCAAGCCCTTTGCCGCCGTTGCCAGCGATGGCGCGGAAGCATTTCCTTTCCTGCGT
+AACGTACTGCCGGGTATCGGCGGTTGTTTACATGGGGCGGCATGTACCTATGATAATTCG
+CCGGACGAGGATTTTATTATCGATACGCTGCCTGGCCATGAGAATACGCTTGTCATCACT
+GGACTCAGCGGACATGGTTTTAAATTCGCCCCGGTGTTAGGAGAAATCGCTGCGGATTTT
+GCGTTGGGAAAAACGCCCTCCTTTGATCTGACGCCGTTCCGGCTTTCCCGTTTTAGCCAA
+TAAatgcaaatacagagcttctatcactcagcttcactaaaaacccaggaggcttttaaa
+agcctacaaaaaaccttatacaacggaatgcagattctctcaggccagggcaaagcgccg
+gctaaagcgcccgacgctcgcccggaaattattgtcctgcgagaacctggcgcgacatgg
+gggaattatctacagcatcagaagacgtctaaccactcgctgcataacctctataactta
+cagcgcgatcttcttaccgtcgcggcaaccgttctgggtaaacaagacccggttctaacg
+tcaatggcaaaccaaatggagttagccaaagttaaagcggaccggccagcaacaaaacaa
+gaagaagctgcggcaaaagcattgaagaaaaatcttatcgaacttattgcagcacgcact
+cagcagcaaaatggcttacctgcaaaagaagctcatcgctttgcggcagtagcgtttaga
+gatgctcaggtcaagcagctcaataaccagccctggcaaaccataaaaaatacactcacg
+cataacgggcatcactataccaacacgcagctccctgccgcagagatgaaaatcggcgca
+aaagatatctttcccagtgcttatgagggaaagggcgtatgcagttgggataccaagaat
+attcatcacgccaataatttgtggatgtccacggtgagtgtgcatgaggacggtaaagat
+aaaacgcttttttgcgggatacgtcatggtgtgctttccccctatcatgaaaaagatccg
+cttctgcgtcaggccggcgctgaaaacaaagccaaagaagtattagctgcggcacttttt
+agtaaacctgagttgcttaacagagccttagagggcgaagcggtaagcctgaaactggta
+tccgtcgggttactcaccgcgtcgaatattttcggcaaagagggaactatggtcgaggat
+caaatgcgcgcatggcaatcgttgacccagccgggaaaaatgattcatttaaaaatccgc
+aataaagatggcgatctacagacggtaaaaataaaaccggacgtcgccgcatttaatgtg
+ggtgttaatgagctggcgctcaagctcggctttggccttaaagcatcagatagctataat
+gccgaagcgctacatcagttattaggcaatgatttacgccctgaagccagaccaggtggc
+tgggttggcgaatggctggcgcaatacccggataattatgaggtcgtcaatacattagcg
+cgccagattaaggatatctggaaaaataaccaacatcataaagatggcggcgaaccctat
+aaactcgcacaacgccttgccatgttagcccatgaaattgacgcggtgcccgcctggaat
+tgtaaaagcggcaaagatcgtacagggatgatggattcagaaatcaagcgagagctcatt
+tctttccatcagacccatatgttaagtgcgcctggtagtcttccggatagcggtggacag
+aaaattttccaaaaagtattactgaatagcggtaacctggagattcagaaacaaaatacg
+ggcggggcgggaaacaaagtaatgaaaaatttatcgccagaggtgctcaatctttcctat
+caaaaacgagttggggatgataatatttggcagtcagtaaaaggtatttcttcattaatc
+acatcttgaATGAAACGATATATACTGGCTACCGCGATAGCGTCTCTTGTTGCAGCCCCG
+GCAATGGCGCTGGCCGCTGGCAGCAATATTCTCAGCGTACATATTCTCGATCAGCAAACA
+GGCAAACCAGCGCCCGGCGTGGAGGTGGTACTGGAGCAGAAAAAGGATAACGGATGGACG
+CAATTAAACACCGGGCATACCGACCAGGATGGACGAATTAAAGCACTGTGGCCCGAAAAA
+GCTGCCGCGCCGGGGGATTATCGCGTTATTTTTAAAACCGGCCAGTATTTTGAAAGTAAA
+AAACTGGACACGTTTTTCCCGGAGATTCCCGTCGAGTTTCATATCAGCAAAACGAATGAG
+CACTATCATGTGCCGCTGTTATTAAGTCAGTATGGTTATTCAACCTATCGCGGGAGCTAA
+ATGGCAAAGATTCTGGTGCTCTATTATTCCATGTACGGACACATTGAAACCATGGCGCAC
+GCGGTGGCGGAAGGGGCAAAGAAAGTCGACGGCGCAGAGGTCATTATAAAGCGTGTGCCA
+GAAACAATGCCGCCTGAAATCTTCGCAAAAGCTGGCGGTAAAACGCAAAACGCACCGGTT
+GCCACCCCACAGGAGCTGGCGGATTACGATGCCATTATTTTTGGTACGCCAACCCGGTTT
+GGCAATATGTCAGGCCAGATGCGTACCTTCCTGGACCAAACCGGCGGACTGTGGGCATCC
+GGCGCGCTATACGGCAAGCTCGGCGGCGTGTTCAGTTCTACCGGAACGGGCGGCGGCCAG
+GAGCAGACCATCACCTCGACCTGGACTACGCTTGCCCATCATGGGATGGTGATTGTCCCG
+ATAGGCTATTCCGCACAGGAACTGTTTGACGTCTCCCAGGTTCGCGGCGGTACGCCTTAC
+GGCGCAACGACTATCGCTGGAGGCGACGGTTCACGTCAACCAAGCCAGGAGGAACTCTCT
+ATCGCTCGCTATCAGGGGGAATACGTCGCCGGTCTGGCAGTCAAACTCAACGGCTAAatg
+gagcctcaacccccacgtctgaaacccggaaaaatccttgacactctgggtgctatgcaa
+aaaagcctgacacgtgcctcgcagcgtattgcgcaatatattttagccttccccagacag
+gtgacacagtcatctattgcggatttgtcgcgcgacacacaggccggagaagccacggtt
+attcgcttttgtcgcaccctgggctataaaggttttcaggattttaaaatggacctggcc
+attgaacttgccactaccgagtctgatgacagtagtcctctactggatgccgaagttagc
+gaatccgacgatgcccacgcgattggtttaaaattgcagaacaccattagtaatgtatta
+tctgaaacgctaaatctgctggatatgcaacaggttctcggtgtcgtggacgccctacgt
+cactgtcactcagtttatatgtttggtgtgggctcatcggggatcacggcgctggatatg
+aaacacaagctaatgcgtatgggtttacggggcgatgcggtaagcaataaccattttatg
+tacatgcaggctacgctattgaaagcaggcgatgtcgcgatgggtgtcagtcactcgggc
+acatcgccagaaacagtgcattcactccgattggcccgacaggctggcgccaccacagtc
+gccattacccataatctgggttctccattatgtgaagaggccgatttttgcctgatcaat
+ggtaatcggcaaggaatgttgcagggtgactcgatcggtacgaaagccgcgcagcttttc
+gtctttgacctgctctatacccttcttgtacagtcctcgccggaacaggcccgagaaagc
+aaattacggacaatgaatgccctggacatgacaaaataaATGAAGAAACTGCCCGGCTTT
+ACGCAAGATTACTTACTCAGCAAGGCGACGACCCTGCCTGATAAAACACGCCTGGAGCGT
+GCCGTTGAACCGCTATGCGCGCGCCATCCCGGAGAGTGCGGCATTCTTGCGCTGGATAAC
+AGTCTGGACGCTTTTGCCGCCCGCTACCGCCTGACCGAAATGGCGGCGCGGACGCTGGAT
+GTGCAGTATTATATTTGGGAAGACGATATGTCCGGGCGGCTGCTCTTTTCGGTTCTGCTG
+TCGGCGGCGAAGCGCGGCGTTCATGTTCGTCTGCTGCTGGATGATAACAATACGCCTGGT
+CTGGATGATACGTTGCGCTTGCTGGATAGCCATCCTAATATCGAAGTTCGTCTGTTTAAT
+CCTTTCTCTTTTCGTACGCTACGCGCGCTGGGATATTTGACGGATTTTGCGCGGCTGAAT
+CGGCGGATGCACAATAAAAGTTACACTGCCGACGGCGTAGTGACGCTGGTCGGTGGGCGC
+AACATCGGCGATGCCTATTTCGGCGCTGGCGAGGAGCCGCTATTTTCCGATCTGGACGTG
+ATGGCCATTGGCCCGGTGGTCAATGATGTCGCCAATGATTTTGAACGTTACTGGCGCTGT
+AGTTCAGTGTCGACATTGCAGCAAGTATTATCCCTTTCTGAGCAGGAACTGACGCAGCGT
+ATCGAACTTCCCGAATCCTGGTATAACGATGAGATCACCCGCCGTTATCTGCATAAGCTG
+GAAACCAGCCAGTTTATGGCGGATCTCGATCGCGGAACGTTGCCGCTGATTTGGGCAAAA
+ACACGCTTGCTTAGCGATGACCCTTCTAAAGGCGAGGGGAAGGCGCAGCGCCATTCGCTT
+CTTCCGCAGCGATTATTTGACGTGATGGGGTCGCCGACGGAGCGTATCGACATTATTTCC
+GCTTACTTTGTCCCTACGCGCGCAGGCGTGGCGCAGTTGCTTAATCTGGTCAGGAAAGGT
+GTGAAGATCGCCATCTTAACTAACTCTCTGGCGGCCAACGATGTGGCGGTCGTTCACGCA
+GGGTACGCGCGCTGGCGCAAGAAATTACTGCGCTATGGCGTGGAGCTCTACGAACTGAAA
+CCGACCCGCGAACATGAAACCGCCGTACATGATCGCGGACTCACCGGGAACTCAGGTTCC
+AGCTTACATGCTAAAACGTTCAGTATTGATGGTAGTAAGGTGTTTATCGGGTCGCTTAAT
+TTTGATCCCCGTTCAACGCTTTTAAATACCGAAATGGGCTTTGTCATTGAAAGTGAAACG
+CTGGCGACGCTTATTCATAAGCGTTTTACGCAGAGCCAACGCGATGCGGCCTGGCAACTG
+CGGCTGGATCGCTGGGGACGAATTAACTGGATCGATCGTCAGCAAGAAGAGGAAAAGGTG
+TTAAAGAAAGAACCCGCTACGCGTTTCTGGCAGCGAGTTCTGGTACGGTTGGCGGCAATT
+TTACCTGTGGAATGGTTGCTGTGAATGCCAACTCAAGAAGCAAAAGCGCACCGCGTCGGC
+GAATGGGCAAGCCTGCGTAATACGTCGCCGGAAATTGCCGAAGCCATTTTTGAAGTCGCT
+CACTATGACGAGAAACTGGCAGAAAAAATATGGGAAGAAGGTAGCGATGAGGTGCTGATC
+AAAGCCTTTGAGAAAACGGACAAAGACTCGCTCTTCTGGGGCGAACAAGTCATCGAACGT
+AAGAACGTATAAatgtatcccgttgacctgcatatgcataccgtcgcgagcactcatgcc
+tacagtactctgagcgattatatcgcggaagccaaacgcaaaggcatgaaactttttgcg
+attaccgatcatggtccggacatggaagatgcgccgcatcactggcagtttattaacatg
+cgcatctggccgcgtctggttgacggcgtggggatactgcgtggcatggaggcgaatatc
+aagaatattaacggtgaaattgattgttccggaaagatgttcgactcgctggatctgatt
+atcgcaggctttcatgagcccgtttttgcgccgcatgataaagaaacgaatactcaggcg
+atgatcgcgaccatcgccagcggcaaggtgcatataattagtcacccgggaaatccaaag
+tatccagtggaggttaaagccatcgcgcaggcggcggcgaaacaccaggtagcgctggaa
+atcaacaactcttcttttctgcattcgcgtaaaggaagcgaagataagtgccgcgcggtc
+gctgccgccgtacgcgatgcgggaggctgggtagcgttaggctctgagtcccatacggcc
+tttacgcttggcgatttcaccgaatgccggaaaattctggatgcggtgaattttccggaa
+gatcgaatcctgaacgtctctccgcagcgcttactggcctttctcgagtcacgcggtatg
+gcgcctgtaccggaatttgccgaactttaaatgaatgagttttcaatcctgtgccgtgtg
+ctgggatcgttgttttaccgccaagcgcaagatcctttactggttccgctgtttacgtta
+atccgtgaaggtaaactggcggcagactggccgctggagcaggatgacatgctggcgcgt
+ttacagaaaagctgcgatatcacggagatttccactgattacaatgcgttatttgttggg
+gaagagtgcgcggtagcgccatacggcagtgcgtgggtcgaaggcgcggaagagtctgag
+gtgcgcgcttttttaacgtcgcgagggatgccgctggccgatacgcctgccgatcacatt
+ggcactttattgctcgcggcctccgggctggaagatcagtctgccgaagatgaaagtgaa
+gcgctggaaaccttatttgccgatgatctgcttccctggtgcaataccttcctcggtaaa
+gttgaagcccatgccgttacgccagtctggcgcactctggcgccgctaacgcgtgatgcg
+ataggggccatgtgggatgaacttgaggaagaagatgaagaataaatgatgcgcgcgatg
+aacatacttctttctattgctatcactacgggcatcctttctggaatatggggatgggtg
+gccgtctccctggggttactaagctgggccggttttttaggctgtacggcttatttggcc
+tgtccgcagggcggctttaagggattgttgatttccgcctgtacgctgttaagcgggatg
+gtgtgggcgctggtcattattcacggtagcgcgttggcgccgcatctggaaattgtgagt
+tacgtgttgacggggatcgtggcattcctgatgtgtatccaggcaaagcagctattgctt
+tcttttgttccgggaacatttatcggcgcctgcgcgacatttgcagggcagggtgagtgg
+cggttggtattaccgtcgctggcgctggggctaatctttggctatgccatgaaaaagagt
+gggctatggctggcatcacgccgcgagcaacattcagcgaatacggcggtcacaaagtaa
+ATGAAAAAAAACCTGCTGGGATTCACCCTCGCATCCTTGTTATTCACGACCGGTTCCGCC
+GTGGCGGCGGAGTATAAAATTGATAAAGAAGGCCAACATGCGTTCGTCAATTTCCGCATC
+CAGCATCTGGGCTACAGCTGGCTATACGGCACCTTTAAAGATTTCGACGGCACGTTCACT
+TTTGACGAAAAAAATCCGTCAGCAGACAAAGTGAATGTGACCATTAACACCAATAGCGTC
+GACACTAACCATGCCGAACGTGACAAACACCTGCGTAGCGCGGAGTTTCTTAATGTTGCG
+AAATTCCCGCAGGCAACCTTCACCTCTACCAGCGTGAAAAAAGAGGGCGATGAACTGGAT
+ATTACCGGCAATCTGACGCTCAATGGCGTGACTAAACCGGTGACGCTGGAAGCGAAGCTG
+ATGGGCCAGGGCGACGATCCGTGGGGCGGTAAGCGCGCGGGCTTTGAGGCCGAAGGAAAA
+ATTAAGCTGAAAGATTTCAATATAACTACCGATCTCGGCCCAGCCTCACAAGAGGTGGAG
+CTTATCATCTCAGTAGAAGGCGTTCAGCAGAAGTAAATGTTACTGATGATGGCGCTGATC
+GTGCGTATTATCTGGCGGCTTTATTCTCCGCCGCCCGTTGCGTTGACCAGCTATTCCCGT
+TTAACGCGCATTGGCGCCGCCGCGGGTCATATCCTTCTGTATCTCCTGCTCTTTGCGATA
+ATCATTAGCGGCTACCTGATTTCCACCGCCGACGGTAAACCGATTAGCGTCTTTGGCTGG
+TTTGAGATTCCGGCCACGCTTACGGACGCGGGCGCGCAGGCTGACATCGCCGGAACACTG
+CATCTGTGGTTTGCCTGGTCGCTGGTCATTATCTCGCTCTCGCATGGGGTTATGGCGCTA
+AAACACCATTTCATCGATAAAGACGACACACTGAAACGTATGACAGGAATGTCGTCATCT
+GACTATGGAGCTCAAAAATGAATGGTTAAGTTATCAATGACGCTGCGCCTGACAATTTCT
+TTTATCGCCATACTTATCCTCGCCTGTACCGGCATTAGCTGGACGCTCTATAACGCGCTG
+AGCAAAGAATTAACGTATCGGGATGATATGACGCTAATAAATCGGGCGGCGCAAATGCAG
+CAACTGTTACTGGATGGCGCCAGGCCGGAAAATCTGCCGCTCTATTTCAATCGGATGGTG
+GATACGAAGCAGGATATCTTATTGATCCACTCAGCAACAGGCCATAATGTTGCGATTAAT
+CATAGCGGCATCCCCGACCAACGCTTTAACGAGATTCCGCTGGCTAAAAACATCACCCGC
+GAAACCTTATTTCGCCAGGCGGTACAAGGCACGGAGCTGACCGCGGTACGAGTAAACGCC
+AGAAGCGGCGATAACCCGCTGACCCTTACTATTGCCAGGCTGGCGACGGAAAGGCGGCAA
+ATGCTGGCGCAATATCGCCGCAACAGTTTGCTGATTAGCCTTATCGCGATCCTCGTCTGT
+TCGGCGCTCAGTCCATTAGTCATCAGAAACGGGCTGCGGGCCATTACGTCGCTCAGCCGA
+CTCACCGCGGCGACAGATAGCGGCACACTTCGCCAGCCGCTGGCGGAACAGGCGTTACCC
+GTCGAGCTCAGGCCGCTTGGGCAAGCGCTAAATACCATGCGCCAGAAGCTTTCCGACGAT
+TTTGAACGCCTGAACCAATTTGCCGACGATCTGGCGCATGAGCTGCGCACGCCGGTTAAT
+ATTTTACTGGGGAAGAATCAGGTTATGCTGAGTCAGGAACGCAGCGCCGAAGAGTATCAA
+CAAGCCCTTGTCGATAATATTGAAGAGCTGGAGGGACTGTCGCGACTGACAGAAAATATT
+CTCTTTCTGGCACGCGCGGAGCACCAGAATATAGCGGTAAAAAAACAGCCTGTTTCGCTC
+AATGCGCTGGTCGAAAATATGCTGGATTATCTTAGCCCCCTTGCCGAAGAGAAGCACATC
+TGTTTTATAAATCAATGTCAGGGAACGGTATGGGCTGACGAAATATTATTACAAAGAGTG
+CTCTCAAACCTGCTGACGAATGCCATCCGTTATTCTGATGAAAACGCCGTGATACGTATT
+GAAAGCGCTTATGATGATAACGTTGCAGAAATTCGGGTCGCTAATCCGGGCAGCCCCACC
+GCCGATGCGGATAAGCTTTTCCGGCGTTTTTGGCGAGGAGATAATGCCCGCTACACTGCC
+GGTTTCGGCCTGGGGTTATCGTTAGTTAACGCGATTGCCCTATTGCACGGTGGCTCGGCA
+TCTTACCGCTATGCCGATGAACATAATATCTTTTCGGTTCGTCTGCCTGATAGCGGTGAT
+AGCTAAgtgatatgtctgaaagtccagggcggcattggtgaaatttttacggtgacgcag
+caggcggataaattcttgccggctacgcagttccactggagctggacggaaagcacagta
+cctgtattgatgattgggtttctgtttgccaatattcagcaatttactgccagtcaggat
+gtggtccaacgctatatggtgactgactccatagaggaaacgaagaaaacattacttaca
+aatgccaaactggttgcggtgatccctgttttcttttttgctatcggctcggcattattt
+gtctactatcagcaacagccacaattattaccggcgggattcaacactggcggcattttg
+cccttattcgtggtcacggaaatgccagtcggcattgcagggttgataatctccgctatt
+ttcgctgccgcgcagtcgagcatctccagcagcttaaacagcatttccagttgttttaat
+tccgatatctatcagcggttgagtcataaaaaaggaacgccagaaaaccgtatgaaaata
+gctaagttagttattctggtcgcgggcctgataagtagcgcggcctcggtatggctggtc
+atggccgatgaatcagagatctgggatgcatttaatagtctgataggtctgatgggaggg
+ccaatgaccggtctgttgatgctgggcattttctttaaacgagcaaatgccgggagtgcg
+gttttaggaattattatgagcgtcattaccgtgctgggcacacgctatgccactgacctt
+aacttcttcttttatggggtcattggctcgctaagcgtggtgatcagcggcgttattttc
+gccccgttatttgccccggcaccgccattgacgctggatgaaaaacctgaaccaaaggtg
+acattatgaATGAAAATCAACAGATATCTTCTGGGTATGGTTTCGTTTATAGCATTTTCA
+TCATATCTACAAGCGGCAACCCTTGATTATCGGCATGAATATGCTGATAGAACCAGAATT
+AATAAAGACCGTATTGCTATAATTGAAAAGCTTCCTAACGGCATTGGTTTTTATGTCGAT
+GCCAGCGTTAAATCGGGAGGAGTAGATGGTGAGCAGGATAAGCATTTAAGCGATCTCGTC
+GCAAACGCTATAGAACTGGGCGTAAGTTATAATTATAAAGTTACGGACCATTTTGTTTTG
+CAGCCTGGATTTATATTTGAAAGCGGTCCAGACACTTCAATTTATAAGCCTTATTTAAGG
+GCGCAATATAATTTTGATTCTGGTGTTTATATGGCTGGTCGTTACCGTTATGACTATGCA
+AGGAAGACAGCTAACTATAATGATGATGAGAAAACGAATAGATTTGATACTTATATAGGT
+TATGTTTTTGATGAGTTGAAATTGGAATATAAATTTACCTGGATGGATAGCGATCAAATT
+AAATTTGATAACAAAAAAACAAACTATGAACATAATGTGGCTTTAGCCTGGAAACTGAAT
+AAGTCATTTACACCATACGTTGAGGTCGGAAATGTAGCGGTGAGAAATAATACCGATGAG
+AGACAGACCCGTTATCGCGTTGGATTACAATACCACTTTTGAATGACGAAATACGGTGTT
+ATAGGTACAGGTTATTTTGGCGCTGAACTGGCGCGATTTATGTCTAAGGTTGAAGGGGCG
+AAAATCACTGCGATTTACGATCCGGTAAATGCGGCTCCGATAGCGAAAGAGCTGAACTGT
+GTCGCCACTTCAACGATGGAGGCGCTTTGTACCCATCCTGATGTGGATTGCGTAATTATT
+GCTTCACCAAATTACTTACATAAAGCGCCGGTCATTGCGGCGGCTAAAGCGGGTAAACAC
+GTGTTTTGTGAAAAACCTATCGCCTTAAATTACCAGGATTGTAAGGATATGGTTGATGCC
+TGCAAAGAAGCTGGTGTTACCTTTATGGCGGGTCACGTTATGAACTTTTTTCACGGGGTT
+CGCCACGCTAAAGCGCTCATCAAAGCCGGTGAAATCGGTGAAGTTACACAAGTTCACACT
+AAACGTAATGGTTTTGAAGACGTGCAGGATGAGATCTCATGGAAGAAGATTCGCGCAAAG
+TCAGGTGGGCATCTGTACCATCACATTCACGAGCTAGATTGTACACTGTTCATCATGGAT
+GAAACCCCATCCCTGGTTTCAATGGCGGCGGGGAATGTTGCGCACAAAGGTGAAAAATTT
+GGTGATGAAGATGATGTTGTCCTAATCACCCTTGAGTTTGAAAGCGGTCGTTTCGCGACA
+CTTCAGTGGGGATCATCGTTCCACTACCCTGAGCACTATGTATTAATTGAGGGCACGACA
+GGTGCAATTCTCATTGATATGCAAAACACGGCTGGTTATCTAATAAAAGCGGGCAAAAAA
+ACACACTTTCTTGTGCATGAAAGCCAGGCGGAGGATGATGATCGTCGCAACGGTAACATA
+TCCAGCGAGATGGATGGCGCAATCGCTTATGGTAAACCCGGTAAACGTACGCCGATGTGG
+CTCTCATCAATTATGAAACTGGAGATGCAGTACTTGCATGATGTGATAAACGGTCTGGAG
+CCAGGCGAGGAGTTTGCTAAATTGCTAACGGGAGAAGCGGCGACAAATGCCATTGCTACC
+GCTGATGCTGCGACGCTTTCTTCAAACGAGGGGCGCAAAGTTAAACTCACTGAAATTCTT
+GGCTAAATGACATCACGTCTTCAGGTCATACAGGGTGATATCACTCAACTTAGCGTCGAT
+GCGATTGTGAATGCCGCTAACGCATCATTAATGGGCGGCGGTGGCGTAGACGGCGCAATT
+CATCGCGCGGCGGGGCCGGCATTGCTGGACGCCTGTAAACTCATCCGTCAGCAACAGGGC
+GAATGTCAGACGGGACATGCGGTTATCACGCCTGCTGGCAAGCTTTCGGCAAAGGCGGTT
+ATTCACACAGTGGGGCCCGTCTGGCGAGGCGGCGAACACCAGGAAGCTGAGCTACTCGAA
+GAGGCATACCGGAATTGTTTGCTGCTTGCCGAGGCGAATCACTTTCGTTCCATCGCTTTT
+CCGGCAATCAGTACCGGCGTTTATGGCTATCCACGCGCCCAGGCCGCTGAAGTCGCCGTC
+AGGACGGTTTCAGATTTTATTACCCGTTACGCTCTGCCTGAACAGGTATACTTTGTCTGT
+TATGATGAAGAAACTGCCCGGCTTTACGCAAGATTACTTACTCAGCAAGGCGACGACCCT
+GCCTGA
diff --git a/t/data/expected_nnn_at_end.fa b/t/data/expected_nnn_at_end.fa
new file mode 100644
index 0000000..87b65fc
--- /dev/null
+++ b/t/data/expected_nnn_at_end.fa
@@ -0,0 +1,8 @@
+>1
+AAA
+>2
+AAACCC
+>3
+AAACCCGGG
+>4
+AAACCCGGGTTT
diff --git a/t/data/expected_nuc_multifasta.fa.aln b/t/data/expected_nuc_multifasta.fa.aln
new file mode 100644
index 0000000..1548ce8
--- /dev/null
+++ b/t/data/expected_nuc_multifasta.fa.aln
@@ -0,0 +1,90 @@
+>1
+ATGCAAATACAGAGCTTCTATCACTCAGCTTCACTAAAAACCCAGGAGGCTTTTAAAAGC
+CTACAAAAAACCTTATACAACGGAATGCAGATTCTCTCAGGCCAGGGCAAAGCGCCGGCT
+AAAGCGCCCGACGCTCGCCCGGAAATTATTGTCCTGCGAGAACCTGGCGCGACATGGGGG
+AATTATCTACAGCATCAGAAGACGTCTAACCACTCGCTGCATAACCTCTATAACTTACAG
+CGCGATCTTCTTACCGTCGCGGCAACCGTTCTGGGTAAACAAGACCCGGTTCTAACGTCA
+ATGGCAAACCAAATGGAGTTAGCCAAAGTTAAAGCGGACCGGCCAGCAACAAAACAAGAA
+GAAGCTGCGGCAAAAGCATTGAAGAAAAATCTTATCGAACTTATTGCAGCACGCACTCAG
+CAGCAAAATGGCTTACCTGCAAAAGAAGCTCATCGCTTTGCGGCAGTAGCGTTTAGAGAT
+GCTCAGGTCAAGCAGCTCAATAACCAGCCCTGGCAAACCATAAAAAATACACTCACGCAT
+AACGGGCATCACTATACCAACACGCAGCTCCCTGCCGCAGAGATGAAAATCGGCGCAAAA
+GATATCTTTCCCAGTGCTTATGAGGGAAAGGGCGTATGCAGTTGGGATACCAAGAATATT
+CATCACGCCAATAATTTGTGGATGTCCACGGTGAGTGTGCATGAGGACGGTAAAGATAAA
+ACGCTTTTTTGCGGGATACGTCATGGTGTGCTTTCCCCCTATCATGAAAAAGATCCGCTT
+CTGCGTCAGGCCGGCGCTGAAAACAAAGCCAAAGAAGTATTAGCTGCGGCACTTTTTAGT
+AAACCTGAGTTGCTTAACAGAGCCTTAGAGGGCGAAGCGGTAAGCCTGAAACTGGTATCC
+GTCGGGTTACTCACCGCGTCGAATATTTTCGGCAAAGAGGGAACTATGGTCGAGGATCAA
+ATGCGCGCATGGCAATCGTTGACCCAGCCGGGAAAAATGATTCATTTAAAAATCCGCAAT
+AAAGATGGCGATCTACAGACGGTAAAAATAAAACCGGACGTCGCCGCATTTAATGTGGGT
+GTTAATGAGCTGGCGCTCAAGCTCGGCTTTGGCCTTAAAGCATCAGATAGCTATAATGCC
+GAAGCGCTGCATCAGTTATTAGGCAATGATTTACGCCCTGAAGCCAGACCAGGTGGCTGG
+GTTGGCGAATGGCTGGCGCAATACCCGGATAATTATGAGGTCGTCAATACATTAGCGCGC
+CAGATTAAGGATATCTGGAAAAATAACCAACATCATAAAGATGGCGGCGAACCCTATAAA
+CTCGCACAACGCCTTGCCATGTTAGCCCATGAAATTGACGCGGTGCCCGCCTGGAATTGT
+AAAAGCGGCAAAGATCGTACAGGGATGATGGATTCAGAAATCAAGCGAGAGCTCATTTCT
+TTCCATCAGACCCATATGTTAAGTGCGCCTGGTAGTCTTCCGGATAGCGGTGGACAGAAA
+ATTTTCCAAAAAGTATTACTGAATAGCGGTAACCTGGAGATTCAGAAACAAAATACGGGC
+GGGGCGGGAAACAAAGTAATGAAAAATTTATCGCCAGAGGTGCTCAATCTTTCCTATCAA
+AAACGAGTTGGGGATGAAAATATTTGGCGTCAG------NNNAATATTTCTTCATTAATC
+ACATCT
+>2
+ATGCAAATACAGAGCTTCTATCACTCAGCTTCACTAAAAACCCAGGAGGCTTTTAAAAGC
+CTACAAAAAACCTTATACAACGGAATGCAGATTCTCTCAGGCCAGGGCAAAGCGCCGGCT
+AAAGCGCCCGACGCTCGCCCGGAAATTATTGTCCTGCGAGAACCTGGCGCGACATGGGGG
+AATTATCTACAGCATCAGAAGACGTCTAACCACTCGCTGCATAACCTCTATAACTTACAG
+CGCGATCTTCTTACCGTCGCGGCAACCGTTCTGGGTAAACAAGACCCGGTTCTAACGTCA
+ATGGCAAACCAAATGGAGTTAGCCAAAGTTAAAGCGGACCGGCCAGCAACAAAACAAGAA
+GAAGCTGCGGCAAAAGCATTGAAGAAAAATCTTATCGAACTTATTGCAGCACGCACTCAG
+CAGCAAAATGGCTTACCTGCAAAAGAAGCTCATCGCTTTGCGGCAGTAGCGTTTAGAGAT
+GCTCAGGTCAAGCAGCTCAATAACCAGCCCTGGCAAACCATAAAAAATACACTCACGCAT
+AACGGGCATCACTATACCAACACGCAGCTCCCT---GCAGAGATGAAAATCGGCGCAAAA
+GATATCTTTCCCAGTGCTTATGAGGGAAAGGGCGTATGCAGTTGGGATACCAAGAATATT
+CATCACGCCAATAATTTGTGGGTGTCCACGGTGAGTGTGCATGAGGACGGTAAAGATAAA
+ACGCTTTTTTGCGGGATACGTCATGGTGTGCTTTCCCCCTATCATGAAAAAGATCCGCTT
+CTGCGTCAGGCCGGCGCTGAAAACAAAGCCAAAGAAGTATTAGCTGCGGCACTTTTTAGT
+AAACCTGAGTTGCTTAACAGAGCCTTAGAGGGCGAAGCGGTAAGCCTGAAACTGGTATCC
+GTCGGGTTACTCACCGCGTCGAATATTTTCGGCAAAGAGGGAACTATGGTCGAGGATCAA
+ATGCGCGCATGGCAATCGTTGACCCAGCCGGGAAAAATGATTCATTTAAAAATCCGCAAT
+AAAGATGGCGATCTACAGACGGTAAAAATAAAACCGGACGTCGCCGCATTTAATGTGGGT
+GTTAATGAGCTGGCGCTCAAGCTCGGCTTTGGCCTTAAAGCATCAGATAGCTATAATGCC
+GAAGCGCTACATCAGTTATTAGGCAATGATTTACGCCCTGAAGCCAGACCAGGTGGCTGG
+GTTGGCGAATGGCTGGCGCAATACCCGGATAATTATGAGGTCGTCAATACATTAGCGCGC
+CAGATTAAGGATATCTGGAAAAATAACCAACATCATAAAGATGGCGGCGAACCCTATAAA
+CTCGCACAACGCCTTGCCATGTTAGCCCATGAAATTGACGCGGTGCCCGCCTGGAATTGT
+AAAAGCGGCAAAGATCGTACAGGGATGATGGATTCAGAAATCAAGCGAGAGCTCATTTCT
+TTCCATCAGACCCATATGTTAAGTGCGCCTGGTAGTCTTCCGGATAGCGGTGGACAGAAA
+ATTTTCCAAAAAGTATTACTGAATAGCGGTAACCTGGAGATTCAGAAACAAAATACGGGC
+GGGGCGGGAAACAAAGTAATGAAAAATTTATCGCCAGAGGTGCTCAATCTTTCCTATCAA
+AAACGAGTTGGGGATGAAAATATTTGG---CAGTCAGTAAAAGGTATTTCTTCATTAATC
+ACATCT
+>3
+ATGCAAATACAGAGCTTCTATCACTCAGCTTCACTAAAAACCCAGGAGGCTTTTAAAAGC
+CTACAAAAAACCTTATACAACGGAATGCAGATTCTCTCAGGCCAGGGCAAAGCGCCGGCT
+AAAGCGCCCGACGCTCGCCCGGAAATTATTGTCCTGCGAGAACCTGGCGCGACATGGGGG
+AATTATCTACAGCATCAGAAGACGTCTAACCACTCGCTGCATAACCTCTATAACTTACAG
+CGCGATCTTCTTACCGTCGCGGCAACCGTTCTGGGTAAACAAGACCCGGTTCTAACGTCA
+ATGGCAAACCAAATGGAGTTAGCCAAGGTTAAAGCGGACCGGCCAGCAACAAAACAAGAA
+GAAGCTGCGGCAAAAGCATTGAAGAAAAATCTTATCGAACTTATTGCAGCACGCACTCAG
+CAGCAAAATGGCTTACCTGCAAAAGAAGCTCATCGCTTTGCGGCAGTAGCGTTTAGAGAT
+GCTCAGGTCAAGCAGCTCAATAACCAGCCCTGGCAAACCATAAAAAATACACTCACGCAT
+AACGGGCATCACTATACCAACACGCAGCTCCCTGCCGCAGAGATGAAAATCGGCGCAAAA
+GATATCTTTCCCAGTGCTTATGAGGGAAAGGGCGTATGCAGTTGGGATACCAAGAATATT
+CATCACGCCAATAATTTGTGGATGTCCACGGTGAGTGTGCATGAGGACGGTAAAGATAAA
+ACGCTTTTTTGCGGGATACGTCATGGTGTGCTTTCCCCCTATCATGAAAAAGATCCGCTT
+CTGCGTCAGGCCGGCGCTGAAAACAAAGCCAAAGAAGTATTAGCTGCGGCACTTTTTAGT
+AAACCTGAGTTGCTTAACAGAGCCTTAGAGGGCGAAGCGGTAAGCCTGAAACTGGTATCC
+GTCGGGTTACTCACCGCGTCGAATATTTTCGGCAAAGAGGGAACTATGGTCGAGGATCAA
+ATGCGCGCATGGCAATCGTTGACCCAGCCGGGAAAAATGATTCATTTAAAAATCCGCAAT
+AAAGATGGCGATCTACAGACGGTAAAAATAAAACCGGACGTCGCCGCATTTAATGTGGGT
+GTTAATGAGCTGGCGCTCAAGCTCGGCTTTGGCCTTAAAGCATCAGATAGCTATAATGCC
+GAAGCGCTACATCAGTTATTAGGCAATGATTTACGCCCTGAAGCCAGACCAGGTGGCTGG
+GTTGGCGAATGGCTGGCGCAATACCCGGATAATTATGAGGTCGTCAATACATTAGCGCGC
+CAGATTAAGGATATCTGGAAAAATAACCAACATCATAAAGATGGCGGCGAACCCTATAAA
+CTCGCACAACGCCTTGCCATGTTAGCCCATGAAATTGACGCGGTGCCCGCCTGGAATTGT
+AAAAGCGGCAAAGATCGTACAGGGATGATGGATTCAGAAATCAAGCGAGAGCTCATTTCT
+TTCCATCAGACCCATATGTTAAGTGCGCCTGGTAGTCTTCCGGATAGCGGTGGACAGAAA
+ATTTTCCAAAAAGTATTACTGAATAGCGGTAACCTGGAGATTCAGAAACAAAATACGGGC
+GGGGCGGGAAACAAAGTAATGAAAAATTTATCGCCAGAGGTGCTCAATCTTTCCTATCAA
+AAACGAGTTGGGGATGAAAATATTTGG---CAGTCAGTAAAAGGTATTTCTTCATTAATC
+ACATCT
diff --git a/t/data/expected_nuc_multifasta.faa b/t/data/expected_nuc_multifasta.faa
new file mode 100644
index 0000000..951f61b
--- /dev/null
+++ b/t/data/expected_nuc_multifasta.faa
@@ -0,0 +1,48 @@
+>AAAA#74_01075
+CQYHLF*PYWRALPPLLARFLAFLAKNRLTACWLFRWASPQGSCCSSR*WRCCPPR*IPR
+GCRLYWATGCLLSACWATSGWIVCFLTLIRRIWCKKGSSRFPAR*NALRFY*RSALACTT
+FRKESPPLSLPAAILNWVSASHWRWRCTIFLKGWRLPARFMPRRAQNVPRFFGPVSPAWR
+KFLAACWRG*FWAAWFHRSLWRLSWQQSPALWWRSPSMN*CRWQKRSILTITPAMVCFAV
+CPSWGSVSSFCRR*VSV
+>BBBB#75_01314
+MSVPLILTLLAGAATFIGAFLGVLGQKPSNRVLAFSLGFAAGIMLLISLMDAARRVRYRG
+DVACTGLRDVYYRPVGLLRAGSSASSRSSAGSGAKKAAAASRLDKTHCDFIDARH*PAQL
+SGRNRHLCHCQQQS*TGFRHRTGGGVAQYS*RAGGCRPGLCRDGLKTYRDFLGRYLRHGG
+NSWRRAGVADFGQPGFTDRYGGYHGSSRRHYGGALRR*TDAVGKRDRS*Q*PQLWCALRY
+VHHGAQSRHFADDRYRL
+>CCCC#76_00877
+MSVPLILTLLAGAATFIGAFLGVLGQKPSNRVLAFSLGFAAGIMLLISLMEMLPAALDTE
+GMSPVLGYGSLLSACWATSGWIVCFLTLIRRIWCKKGSSRFPAR*NALRFY*RSALACTT
+FRKESPLSLPAAILNWVSASHWRWRCTIFLKGWRLPARFMPRRAQNVPRFFGPVSPAWRK
+FLAACWRG*FWAAWFHRSLWRLSWQQSPALWWRSPSMN*CRWQKRSILTITPAMVCFAVC
+PSWGSVSSFCRR*VSV
+>DDDD#77_01105
+MSVPLILTLLAGAATFIGAFLGVLGQKPSNRVLAFSLGFAAGIMLLISLMEMLPAALDTE
+GMSPVLGYGMFIIGLLGYFGLDRLLPHAHPQDLVQKRQQPLPGSIKRTAILLTLGISLHN
+FPEGIATFVTASSNLELGFGIALAVALHNIPEGLAVAGPVYAATGSKRTAIFWAGISGMA
+EILGGVLAWLILGSLVSPIVMAAIMAAVAGIMVALSVDELMPLAKEIDPNNNPSYGVLCG
+MSIMGLSLVILQTIGIG*
+>EEEE
+MSVPLILTLLAGAATFIGAFLGVLGQKPSNRVLAFSLGFAAGIMLLISLMEMLPAALDTE
+GMSPVLGYGMFIIGLLGYFGLDRLLPHAHPQDLVQKRQQPLPGSIKRTAILLTLGISLHN
+FPEGIATFVTASSNLELGFGIALAVALHNIPEGLAVAGPVYAATGSKRTAIFWAGISGMA
+EILGVLAWLILGSLVSPIVMAAIMAAVAGIMVALSVDELMPLAKEIDPNNNPSYGVLCGM
+SIMGLSLVILQTIGIG*
+>FFFF
+MSVPLILTLLAGAATFIGAFLGVLGQKPSNRVLAFSLGFAAGIMLLISLMEMLPAALDTE
+GMSPVLGYGMFIIGLLGYFGLDRLLPHAHPQDLVQKRQQPLPGSIKRTAILLTLGISLHN
+FPEGIATFVTASSNLELGFGIALAVALHNIPEGLAVAGPVYAATGSKRTAIFWAGISGMA
+EILGGVLAWLILGSLAPIVMAAIMAAVAGIMVALSVDELMPLAKEIDPNNNPSYGVLCGM
+SIMGLSLVILQTIGIG*
+>GGGG
+MSVPLILTLLAGAATFIGAFLGVLGQKPSNRVLAFSLGFAAGIMLLISLMEMLPAALDTE
+GMSPVLGYGMFIIGLLGYFGLDRLLPHAHPQDLVQKRQQPLPGSIKRTAILLTLGISLHN
+FPEGIATFVTASSNLELGFGIALGWRCTIFLKGWRLPARFMPRRAQNVPRFFGPVSPAWR
+KFLAACWRG*FWAAWFHRSLWRLSWQQSPALWWRSPMN*CRWQKRSILTITPAMVCFAVC
+PSWGSVSSFCRR*VSV
+>HHHH
+MSVPLILTLLAGAATFIGAFLGVLGQKPSNRVLAFSLGFAAGIMLLISLMEMLPAALDTE
+GMSPVLGYGMFIIGLLGYFGLDRLLPHASAGSGAKKAAAASRLDKTHCDFIDARH*PAQL
+SGRNRHLCHCQQQS*TGFRHRTGGGVAQYS*RAGGCRPGLCRDGLKTYRDFLGRYLRHGG
+NSWRRAGVADFGQPGFTDRYGGYHGSSRRHYGGALRR*TDAVGKRDRS*Q*PQLWCALRY
+VHHGAQSRHFADDRYRL
diff --git a/t/data/expected_nuc_multifasta_mafft.fa.aln b/t/data/expected_nuc_multifasta_mafft.fa.aln
new file mode 100644
index 0000000..3001679
--- /dev/null
+++ b/t/data/expected_nuc_multifasta_mafft.fa.aln
@@ -0,0 +1,90 @@
+>1
+atgcaaatacagagcttctatcactcagcttcactaaaaacccaggaggcttttaaaagc
+ctacaaaaaaccttatacaacggaatgcagattctctcaggccagggcaaagcgccggct
+aaagcgcccgacgctcgcccggaaattattgtcctgcgagaacctggcgcgacatggggg
+aattatctacagcatcagaagacgtctaaccactcgctgcataacctctataacttacag
+cgcgatcttcttaccgtcgcggcaaccgttctgggtaaacaagacccggttctaacgtca
+atggcaaaccaaatggagttagccaaagttaaagcggaccggccagcaacaaaacaagaa
+gaagctgcggcaaaagcattgaagaaaaatcttatcgaacttattgcagcacgcactcag
+cagcaaaatggcttacctgcaaaagaagctcatcgctttgcggcagtagcgtttagagat
+gctcaggtcaagcagctcaataaccagccctggcaaaccataaaaaatacactcacgcat
+aacgggcatcactataccaacacgcagctccctgccgcagagatgaaaatcggcgcaaaa
+gatatctttcccagtgcttatgagggaaagggcgtatgcagttgggataccaagaatatt
+catcacgccaataatttgtggatgtccacggtgagtgtgcatgaggacggtaaagataaa
+acgcttttttgcgggatacgtcatggtgtgctttccccctatcatgaaaaagatccgctt
+ctgcgtcaggccggcgctgaaaacaaagccaaagaagtattagctgcggcactttttagt
+aaacctgagttgcttaacagagccttagagggcgaagcggtaagcctgaaactggtatcc
+gtcgggttactcaccgcgtcgaatattttcggcaaagagggaactatggtcgaggatcaa
+atgcgcgcatggcaatcgttgacccagccgggaaaaatgattcatttaaaaatccgcaat
+aaagatggcgatctacagacggtaaaaataaaaccggacgtcgccgcatttaatgtgggt
+gttaatgagctggcgctcaagctcggctttggccttaaagcatcagatagctataatgcc
+gaagcgctgcatcagttattaggcaatgatttacgccctgaagccagaccaggtggctgg
+gttggcgaatggctggcgcaatacccggataattatgaggtcgtcaatacattagcgcgc
+cagattaaggatatctggaaaaataaccaacatcataaagatggcggcgaaccctataaa
+ctcgcacaacgccttgccatgttagcccatgaaattgacgcggtgcccgcctggaattgt
+aaaagcggcaaagatcgtacagggatgatggattcagaaatcaagcgagagctcatttct
+ttccatcagacccatatgttaagtgcgcctggtagtcttccggatagcggtggacagaaa
+attttccaaaaagtattactgaatagcggtaacctggagattcagaaacaaaatacgggc
+ggggcgggaaacaaagtaatgaaaaatttatcgccagaggtgctcaatctttcctatcaa
+aaacgagttggggatgaaaatatttggc-gtcagtaaaa--tatttcttcattaatcaca
+tct
+>2
+atgcaaatacagagcttctatcactcagcttcactaaaaacccaggaggcttttaaaagc
+ctacaaaaaaccttatacaacggaatgcagattctctcaggccagggcaaagcgccggct
+aaagcgcccgacgctcgcccggaaattattgtcctgcgagaacctggcgcgacatggggg
+aattatctacagcatcagaagacgtctaaccactcgctgcataacctctataacttacag
+cgcgatcttcttaccgtcgcggcaaccgttctgggtaaacaagacccggttctaacgtca
+atggcaaaccaaatggagttagccaaagttaaagcggaccggccagcaacaaaacaagaa
+gaagctgcggcaaaagcattgaagaaaaatcttatcgaacttattgcagcacgcactcag
+cagcaaaatggcttacctgcaaaagaagctcatcgctttgcggcagtagcgtttagagat
+gctcaggtcaagcagctcaataaccagccctggcaaaccataaaaaatacactcacgcat
+aacgggcatcactataccaacacgcagctccct---gcagagatgaaaatcggcgcaaaa
+gatatctttcccagtgcttatgagggaaagggcgtatgcagttgggataccaagaatatt
+catcacgccaataatttgtgggtgtccacggtgagtgtgcatgaggacggtaaagataaa
+acgcttttttgcgggatacgtcatggtgtgctttccccctatcatgaaaaagatccgctt
+ctgcgtcaggccggcgctgaaaacaaagccaaagaagtattagctgcggcactttttagt
+aaacctgagttgcttaacagagccttagagggcgaagcggtaagcctgaaactggtatcc
+gtcgggttactcaccgcgtcgaatattttcggcaaagagggaactatggtcgaggatcaa
+atgcgcgcatggcaatcgttgacccagccgggaaaaatgattcatttaaaaatccgcaat
+aaagatggcgatctacagacggtaaaaataaaaccggacgtcgccgcatttaatgtgggt
+gttaatgagctggcgctcaagctcggctttggccttaaagcatcagatagctataatgcc
+gaagcgctacatcagttattaggcaatgatttacgccctgaagccagaccaggtggctgg
+gttggcgaatggctggcgcaatacccggataattatgaggtcgtcaatacattagcgcgc
+cagattaaggatatctggaaaaataaccaacatcataaagatggcggcgaaccctataaa
+ctcgcacaacgccttgccatgttagcccatgaaattgacgcggtgcccgcctggaattgt
+aaaagcggcaaagatcgtacagggatgatggattcagaaatcaagcgagagctcatttct
+ttccatcagacccatatgttaagtgcgcctggtagtcttccggatagcggtggacagaaa
+attttccaaaaagtattactgaatagcggtaacctggagattcagaaacaaaatacgggc
+ggggcgggaaacaaagtaatgaaaaatttatcgccagaggtgctcaatctttcctatcaa
+aaacgagttggggatgaaaatatttggcagtcagtaaaaggtatttcttcattaatcaca
+tct
+>3
+atgcaaatacagagcttctatcactcagcttcactaaaaacccaggaggcttttaaaagc
+ctacaaaaaaccttatacaacggaatgcagattctctcaggccagggcaaagcgccggct
+aaagcgcccgacgctcgcccggaaattattgtcctgcgagaacctggcgcgacatggggg
+aattatctacagcatcagaagacgtctaaccactcgctgcataacctctataacttacag
+cgcgatcttcttaccgtcgcggcaaccgttctgggtaaacaagacccggttctaacgtca
+atggcaaaccaaatggagttagccaaggttaaagcggaccggccagcaacaaaacaagaa
+gaagctgcggcaaaagcattgaagaaaaatcttatcgaacttattgcagcacgcactcag
+cagcaaaatggcttacctgcaaaagaagctcatcgctttgcggcagtagcgtttagagat
+gctcaggtcaagcagctcaataaccagccctggcaaaccataaaaaatacactcacgcat
+aacgggcatcactataccaacacgcagctccctgccgcagagatgaaaatcggcgcaaaa
+gatatctttcccagtgcttatgagggaaagggcgtatgcagttgggataccaagaatatt
+catcacgccaataatttgtggatgtccacggtgagtgtgcatgaggacggtaaagataaa
+acgcttttttgcgggatacgtcatggtgtgctttccccctatcatgaaaaagatccgctt
+ctgcgtcaggccggcgctgaaaacaaagccaaagaagtattagctgcggcactttttagt
+aaacctgagttgcttaacagagccttagagggcgaagcggtaagcctgaaactggtatcc
+gtcgggttactcaccgcgtcgaatattttcggcaaagagggaactatggtcgaggatcaa
+atgcgcgcatggcaatcgttgacccagccgggaaaaatgattcatttaaaaatccgcaat
+aaagatggcgatctacagacggtaaaaataaaaccggacgtcgccgcatttaatgtgggt
+gttaatgagctggcgctcaagctcggctttggccttaaagcatcagatagctataatgcc
+gaagcgctacatcagttattaggcaatgatttacgccctgaagccagaccaggtggctgg
+gttggcgaatggctggcgcaatacccggataattatgaggtcgtcaatacattagcgcgc
+cagattaaggatatctggaaaaataaccaacatcataaagatggcggcgaaccctataaa
+ctcgcacaacgccttgccatgttagcccatgaaattgacgcggtgcccgcctggaattgt
+aaaagcggcaaagatcgtacagggatgatggattcagaaatcaagcgagagctcatttct
+ttccatcagacccatatgttaagtgcgcctggtagtcttccggatagcggtggacagaaa
+attttccaaaaagtattactgaatagcggtaacctggagattcagaaacaaaatacgggc
+ggggcgggaaacaaagtaatgaaaaatttatcgccagaggtgctcaatctttcctatcaa
+aaacgagttggggatgaaaatatttggcagtcagtaaaaggtatttcttcattaatcaca
+tct
diff --git a/t/data/expected_number_of_conserved_genes.tab b/t/data/expected_number_of_conserved_genes.tab
new file mode 100644
index 0000000..6957be8
--- /dev/null
+++ b/t/data/expected_number_of_conserved_genes.tab
@@ -0,0 +1,10 @@
+4	2	1
+4	2	1
+4	2	1
+4	2	1
+4	2	1
+4	2	1
+4	2	1
+4	2	1
+4	2	1
+4	2	1
diff --git a/t/data/expected_number_of_conserved_genes_0.6.tab b/t/data/expected_number_of_conserved_genes_0.6.tab
new file mode 100644
index 0000000..e3b3694
--- /dev/null
+++ b/t/data/expected_number_of_conserved_genes_0.6.tab
@@ -0,0 +1,10 @@
+4	2	4
+4	2	4
+4	2	4
+4	2	4
+4	2	4
+4	2	4
+4	2	4
+4	2	4
+4	2	4
+4	2	4
diff --git a/t/data/expected_number_of_genes_in_pan_genome.tab b/t/data/expected_number_of_genes_in_pan_genome.tab
new file mode 100644
index 0000000..bef0a4c
--- /dev/null
+++ b/t/data/expected_number_of_genes_in_pan_genome.tab
@@ -0,0 +1,10 @@
+4	6	7
+4	6	7
+4	6	7
+4	6	7
+4	6	7
+4	6	7
+4	6	7
+4	6	7
+4	6	7
+4	6	7
diff --git a/t/data/expected_number_of_new_genes.tab b/t/data/expected_number_of_new_genes.tab
new file mode 100644
index 0000000..6957be8
--- /dev/null
+++ b/t/data/expected_number_of_new_genes.tab
@@ -0,0 +1,10 @@
+4	2	1
+4	2	1
+4	2	1
+4	2	1
+4	2	1
+4	2	1
+4	2	1
+4	2	1
+4	2	1
+4	2	1
diff --git a/t/data/expected_number_of_unique_genes.tab b/t/data/expected_number_of_unique_genes.tab
new file mode 100644
index 0000000..66c0f85
--- /dev/null
+++ b/t/data/expected_number_of_unique_genes.tab
@@ -0,0 +1,10 @@
+4	4	3
+4	4	3
+4	4	3
+4	4	3
+4	4	3
+4	4	3
+4	4	3
+4	4	3
+4	4	3
+4	4	3
diff --git a/t/data/expected_one_gene_presence_and_absence.Rtab b/t/data/expected_one_gene_presence_and_absence.Rtab
new file mode 100644
index 0000000..79db304
--- /dev/null
+++ b/t/data/expected_one_gene_presence_and_absence.Rtab
@@ -0,0 +1,2 @@
+Gene	Gene	query_1.fa	query_2.fa	query_3.fa
+hly	1	0	0
diff --git a/t/data/expected_out_of_order_fasta.fa.sorted.fa b/t/data/expected_out_of_order_fasta.fa.sorted.fa
new file mode 100644
index 0000000..56ce040
--- /dev/null
+++ b/t/data/expected_out_of_order_fasta.fa.sorted.fa
@@ -0,0 +1,10 @@
+>1111
+AAAAAAA
+>2222
+TTTTTTT
+>3333
+GGGGGGG
+>4444
+CCCCCCC
+>5555
+AAAAAAA
diff --git a/t/data/expected_output_core_missing_genes.aln b/t/data/expected_output_core_missing_genes.aln
new file mode 100644
index 0000000..22e00d6
--- /dev/null
+++ b/t/data/expected_output_core_missing_genes.aln
@@ -0,0 +1,10 @@
+>111
+AAAANNNNNNNGGNNNNNXXXXXXX
+>222
+AAAACCCCCCCGGTTTTTXXXXXXX
+>333
+AAAACCCCCCCGGNNNNNXXXXXXX
+>444
+AAAACCCCCCCGGTTTTTXXXXXXX
+>555
+AAAACCCCCCCNNNNNNNXXXXXXX
diff --git a/t/data/expected_output_filtered.fa b/t/data/expected_output_filtered.fa
new file mode 100644
index 0000000..e4321b8
--- /dev/null
+++ b/t/data/expected_output_filtered.fa
@@ -0,0 +1,18 @@
+>2363_5_03666
+AAAAAAAAAAAAAGGGGGGGGGGGGGG
+>4075_2#3_03437
+AAAAAAAAAAAAAGGGGGGGGGGGGGG
+>4075_1#8_03461
+AAAAAAAAAAAAAGGGGGGGGGGGGGG
+>2212_3_02841
+AAAAAAAAAAAAAGGGGGGGGGGGGGG
+>2363_5_00947
+AAAAAAAAAAAAAGGGGGGGGGGGGGG
+>2363_7_00085
+AAAAAAAAAAAAAGGGGGGGGGGGGGG
+>2460_2_00826
+AAAAAAAAAAAAAGGGGGGGGGGGGGG
+>4075_1#6_04091
+AAAAAAAAAAAAAGGGGGGGGGGGGGG
+>4075_1#3_04238
+AAAAAAAAAAAAAGGGGGGGGGGGGGG
diff --git a/t/data/expected_output_groups b/t/data/expected_output_groups
new file mode 100644
index 0000000..726ff09
--- /dev/null
+++ b/t/data/expected_output_groups
@@ -0,0 +1,2 @@
+6259_6#6_02209	6259_7#20_00601	6593_5#7_01700	6630_2#15_02136	6630_4#12_02032	6631_2#21_02147	6631_4#23_02060	6631_7#21_02171	6664_1#10_02278
+1234#12_01975	1234#1_01317	1234#20_01662	1234#3_01167
diff --git a/t/data/expected_output_groups_cdhit b/t/data/expected_output_groups_cdhit
new file mode 100644
index 0000000..6e492aa
--- /dev/null
+++ b/t/data/expected_output_groups_cdhit
@@ -0,0 +1 @@
+3634_6_04078	2212_1_01414	2363_1_00811	2541_2_00696	2541_8_00920	3634_7_00911
diff --git a/t/data/expected_output_groups_group_2.fa b/t/data/expected_output_groups_group_2.fa
new file mode 100644
index 0000000..48be2fc
--- /dev/null
+++ b/t/data/expected_output_groups_group_2.fa
@@ -0,0 +1,19 @@
+>1234#10_00003
+VKTTLSGEIEKLRYEVAVKIVNLQGEVLDLRAEMKINFSEVNSKILKLQFEFEMAKIRKE
+LKTEIADLRAETKTDFLELQKSIVDIYKTISTQTRWILGVATLFAAIGKVIN*
+>1234#10_00018
+MDFSFNLVDNNGTVMRSTFSPIRFFTFSFLWILIDCSSVQKIENFNSVLQEPTFKSLKEE
+EAILGGSSDSDYKIRKTGNTIPVFVLSPIQTPEGMDSKLAAFLSDEVRLIWAKVKGKQVR
+IQEMSWKNPSQLSQELKRLNVDAVIKTDIREVSGKWVVNQKITDPVKEIVYGSVDGSFQS
+PKIEDELQANQAYYLKHGSGVLALDAKSSLVPIWEKSLSSGEIDSILKKSIQGYLSFSAS
+SADTEVLFQGEKIGIASFRNYPLPEGLQQIQITRPGQKDISKSLQIRSGQTISIYQEWKE
+DRTLGGVRILSFPEALQVALDGLKMGETPFYRSNLTPGAMQLELVRETENGPLVYYEGQL
+IVDADKITEIALPYKTDNLISEPEFWKLSGEKGFQAFSGKTLDFQNVSSLPPGWYGVFSA
+PFVPENMELEGIIPITAESDSGIVAISFHTSKKTISLEYEKERLSVYSFPSNGNNVGTYK
+FKKEDKEDGRPFRIITDVKEGTIRLYLGYSKVLEDRLDVSGVWRISILTRGENFSKRSPL
+RNLKIEYKGYK*
+>1234#10_00005
+LTDKLIGFYDPVRMKAERKTSETEIKLEMNLRGTGQYQFDTEIPFFEHMLSHISKHGLID
+LNLWLRGDIEIDCHHSVEDTAILMGATIHKQLGDKAGIFRYGHFTLTMDEVLTTVAVDLG
+GRYFFKYTGPELTGKFGIYDAELSLEFLQKLALNAKMNLHVVVHYGDNKHHVHESIFKAL
+GKALRMAIAQDSAAAGAIPSTKGVLE*
diff --git a/t/data/expected_output_groups_group_2_multi.fa b/t/data/expected_output_groups_group_2_multi.fa
new file mode 100644
index 0000000..e06ec9f
--- /dev/null
+++ b/t/data/expected_output_groups_group_2_multi.fa
@@ -0,0 +1,19 @@
+>1234#10_00003 hypothetical protein
+VKTTLSGEIEKLRYEVAVKIVNLQGEVLDLRAEMKINFSEVNSKILKLQFEFEMAKIRKE
+LKTEIADLRAETKTDFLELQKSIVDIYKTISTQTRWILGVATLFAAIGKVIN*
+>1234#10_00018 putative lipoprotein
+MDFSFNLVDNNGTVMRSTFSPIRFFTFSFLWILIDCSSVQKIENFNSVLQEPTFKSLKEE
+EAILGGSSDSDYKIRKTGNTIPVFVLSPIQTPEGMDSKLAAFLSDEVRLIWAKVKGKQVR
+IQEMSWKNPSQLSQELKRLNVDAVIKTDIREVSGKWVVNQKITDPVKEIVYGSVDGSFQS
+PKIEDELQANQAYYLKHGSGVLALDAKSSLVPIWEKSLSSGEIDSILKKSIQGYLSFSAS
+SADTEVLFQGEKIGIASFRNYPLPEGLQQIQITRPGQKDISKSLQIRSGQTISIYQEWKE
+DRTLGGVRILSFPEALQVALDGLKMGETPFYRSNLTPGAMQLELVRETENGPLVYYEGQL
+IVDADKITEIALPYKTDNLISEPEFWKLSGEKGFQAFSGKTLDFQNVSSLPPGWYGVFSA
+PFVPENMELEGIIPITAESDSGIVAISFHTSKKTISLEYEKERLSVYSFPSNGNNVGTYK
+FKKEDKEDGRPFRIITDVKEGTIRLYLGYSKVLEDRLDVSGVWRISILTRGENFSKRSPL
+RNLKIEYKGYK*
+>1234#10_00005 imidazoleglycerol-phosphate dehydratase or histidinol-phosphatase
+LTDKLIGFYDPVRMKAERKTSETEIKLEMNLRGTGQYQFDTEIPFFEHMLSHISKHGLID
+LNLWLRGDIEIDCHHSVEDTAILMGATIHKQLGDKAGIFRYGHFTLTMDEVLTTVAVDLG
+GRYFFKYTGPELTGKFGIYDAELSLEFLQKLALNAKMNLHVVVHYGDNKHHVHESIFKAL
+GKALRMAIAQDSAAAGAIPSTKGVLE*
diff --git a/t/data/expected_output_groups_group_5.fa b/t/data/expected_output_groups_group_5.fa
new file mode 100644
index 0000000..4ff2725
--- /dev/null
+++ b/t/data/expected_output_groups_group_5.fa
@@ -0,0 +1,9 @@
+>1234#10_00016
+MKVTHSCLEFDSIEGLIDFAREFETGSMIRFLSPIEDNSGNVLVKEEVQVKESTLARLKD
+IKGQYTPKFEVKLNKELLEQIQNILAIKIVNQLKVTDMKFLKFMYENTNYNFKGIIRNSL
+LSKKTTLTLLKVYNQNLNFFKYISELGLLSLGIVMIPDTMRFRLLRRYAFTAGILMDVPR
+IGVDKFTKLPSDDNEKVRVAHKCSDILQKLDLIEFTYGSISNHMPLGMIEDSTSSDKAAP
+GENIDETFLDDIISNDGESDSKVDGSREDAIPEKSYDIFQALLTDALKLARYIANVSHNA
+VDKDYVMEELVYYIAYNTSKKYFDELLANPLVATFKEFEVNVKRLRKIAEVEMKCVYPPS
+AWAYPKPKSSQVLCKNKVWDCPNIVMGWDIHVITAQEAFGWVGTSLPVDNYPKCRLEEEL
+DEIMVEPEKPKKK*
diff --git a/t/data/expected_output_groups_group_5_multi.fa b/t/data/expected_output_groups_group_5_multi.fa
new file mode 100644
index 0000000..7d344e3
--- /dev/null
+++ b/t/data/expected_output_groups_group_5_multi.fa
@@ -0,0 +1,9 @@
+>1234#10_00016 hypothetical protein
+MKVTHSCLEFDSIEGLIDFAREFETGSMIRFLSPIEDNSGNVLVKEEVQVKESTLARLKD
+IKGQYTPKFEVKLNKELLEQIQNILAIKIVNQLKVTDMKFLKFMYENTNYNFKGIIRNSL
+LSKKTTLTLLKVYNQNLNFFKYISELGLLSLGIVMIPDTMRFRLLRRYAFTAGILMDVPR
+IGVDKFTKLPSDDNEKVRVAHKCSDILQKLDLIEFTYGSISNHMPLGMIEDSTSSDKAAP
+GENIDETFLDDIISNDGESDSKVDGSREDAIPEKSYDIFQALLTDALKLARYIANVSHNA
+VDKDYVMEELVYYIAYNTSKKYFDELLANPLVATFKEFEVNVKRLRKIAEVEMKCVYPPS
+AWAYPKPKSSQVLCKNKVWDCPNIVMGWDIHVITAQEAFGWVGTSLPVDNYPKCRLEEEL
+DEIMVEPEKPKKK*
diff --git a/t/data/expected_output_merged.aln b/t/data/expected_output_merged.aln
new file mode 100644
index 0000000..100dd07
--- /dev/null
+++ b/t/data/expected_output_merged.aln
@@ -0,0 +1,8 @@
+>1111_1#1
+AAAAA-CCCC----TTTTTTTTTTTTTTT
+>2222_2#2
+CCCCC-AAAA----AAAAAAAAAAAAAAA
+>3333_3#3
+TTTTT-GGGG----CCCCCCCCCCCCCCC
+>4444_4#4
+GGGGG-TTTT----GGGGGGGGGGGGGGG
diff --git a/t/data/expected_output_merged_sparse.aln b/t/data/expected_output_merged_sparse.aln
new file mode 100644
index 0000000..ae71404
--- /dev/null
+++ b/t/data/expected_output_merged_sparse.aln
@@ -0,0 +1,8 @@
+>1111_1#1
+CCCC-----AAAA-AAAAA--AAAA-
+>2222_2#2
+AAAA-----TTTT-CCCCC--TTTT-
+>3333_3#3
+GGGG----NNNNNNTTTTT-NNNNNN
+>4444_4#4
+TTTT-----CCCC-GGGGG--CCCC-
diff --git a/t/data/expected_pan_genome.fa b/t/data/expected_pan_genome.fa
new file mode 100644
index 0000000..5354813
--- /dev/null
+++ b/t/data/expected_pan_genome.fa
@@ -0,0 +1,33 @@
+>1234#10_00016
+MKVTHSCLEFDSIEGLIDFAREFETGSMIRFLSPIEDNSGNVLVKEEVQVKESTLARLKD
+IKGQYTPKFEVKLNKELLEQIQNILAIKIVNQLKVTDMKFLKFMYENTNYNFKGIIRNSL
+LSKKTTLTLLKVYNQNLNFFKYISELGLLSLGIVMIPDTMRFRLLRRYAFTAGILMDVPR
+IGVDKFTKLPSDDNEKVRVAHKCSDILQKLDLIEFTYGSISNHMPLGMIEDSTSSDKAAP
+GENIDETFLDDIISNDGESDSKVDGSREDAIPEKSYDIFQALLTDALKLARYIANVSHNA
+VDKDYVMEELVYYIAYNTSKKYFDELLANPLVATFKEFEVNVKRLRKIAEVEMKCVYPPS
+AWAYPKPKSSQVLCKNKVWDCPNIVMGWDIHVITAQEAFGWVGTSLPVDNYPKCRLEEEL
+DEIMVEPEKPKKK*
+>1234#10_00017
+MKRYLSIVILCTFAMLLLVCSTNKSSGSDQVKTESNATSARIVWLLGDVKILSDSGEKKA
+ELGASLSSTDRVVTGPNGGAEIMVADSGIIKMSKNSDIEISSLMNPNGSDTNVQVNYGKI
+VTMVKKGQKTTEFTVSTPTALAGVRGTSFLTSVESPEGSKINCAKANCTVRFAVIEGTIA
+VSKKGESSEVILSKNRELRIEKNQKLTDKLIRSLQNDSLSEMKELIVLHKNETFEYGKLV
+EELKSSSEELKILSQSGSVEEVKAAFQKREADRNNADEITKTAKAVNETKYVQQDVQKEK
+LKLNPKETF*
+>1234#10_00001
+MSKHGFFQITQKLFLRKGDELLILRDRKSGLGDLPGGRMNENEFFEDWSLSMQREIEEEL
+GSQVQIRVSTKPLFIHKHKVNEGNFPCIIIAYHADYLGGDIILSDEHDYISWEKVQTYEP
+SPLFTEYMLDAVNLYLKEYAPLVH*
+>1234#10_00003
+VKTTLSGEIEKLRYEVAVKIVNLQGEVLDLRAEMKINFSEVNSKILKLQFEFEMAKIRKE
+LKTEIADLRAETKTDFLELQKSIVDIYKTISTQTRWILGVATLFAAIGKVIN*
+>1234#10_00005
+LTDKLIGFYDPVRMKAERKTSETEIKLEMNLRGTGQYQFDTEIPFFEHMLSHISKHGLID
+LNLWLRGDIEIDCHHSVEDTAILMGATIHKQLGDKAGIFRYGHFTLTMDEVLTTVAVDLG
+GRYFFKYTGPELTGKFGIYDAELSLEFLQKLALNAKMNLHVVVHYGDNKHHVHESIFKAL
+GKALRMAIAQDSAAAGAIPSTKGVLE*
+>1234#10_00006
+MIAILDYGMGNIHSCLKAVSLYTKDFVFTKDHSTIENSKALILPGDGHFDKAMENLNSTG
+LRKTIDKHVTSGKPLFGICIGFQILFESSEEIAQGSKKEQIEGLGYIKGKIKKFHGKDFK
+VPHIGWNRLQIRRKDKSVLLKGIGDQSFFYFIHSYRPTDAEGNAITGLCDYYQEKFPAVV
+EKNNIFGTQFHPEKSHTHGLKLLENFIRFI*
diff --git a/t/data/expected_pan_genome_one_gene_per_fasta.fa b/t/data/expected_pan_genome_one_gene_per_fasta.fa
new file mode 100644
index 0000000..b788b9b
--- /dev/null
+++ b/t/data/expected_pan_genome_one_gene_per_fasta.fa
@@ -0,0 +1,33 @@
+>1234#10_00001 nudix hydrolase
+MSKHGFFQITQKLFLRKGDELLILRDRKSGLGDLPGGRMNENEFFEDWSLSMQREIEEEL
+GSQVQIRVSTKPLFIHKHKVNEGNFPCIIIAYHADYLGGDIILSDEHDYISWEKVQTYEP
+SPLFTEYMLDAVNLYLKEYAPLVH*
+>1234#10_00003 hypothetical protein
+VKTTLSGEIEKLRYEVAVKIVNLQGEVLDLRAEMKINFSEVNSKILKLQFEFEMAKIRKE
+LKTEIADLRAETKTDFLELQKSIVDIYKTISTQTRWILGVATLFAAIGKVIN*
+>1234#10_00005 imidazoleglycerol-phosphate dehydratase or histidinol-phosphatase
+LTDKLIGFYDPVRMKAERKTSETEIKLEMNLRGTGQYQFDTEIPFFEHMLSHISKHGLID
+LNLWLRGDIEIDCHHSVEDTAILMGATIHKQLGDKAGIFRYGHFTLTMDEVLTTVAVDLG
+GRYFFKYTGPELTGKFGIYDAELSLEFLQKLALNAKMNLHVVVHYGDNKHHVHESIFKAL
+GKALRMAIAQDSAAAGAIPSTKGVLE*
+>1234#10_00006 imidazole glycerol phosphate synthase subunit HisH
+MIAILDYGMGNIHSCLKAVSLYTKDFVFTKDHSTIENSKALILPGDGHFDKAMENLNSTG
+LRKTIDKHVTSGKPLFGICIGFQILFESSEEIAQGSKKEQIEGLGYIKGKIKKFHGKDFK
+VPHIGWNRLQIRRKDKSVLLKGIGDQSFFYFIHSYRPTDAEGNAITGLCDYYQEKFPAVV
+EKNNIFGTQFHPEKSHTHGLKLLENFIRFI*
+>1234#10_00016 hypothetical protein
+MKVTHSCLEFDSIEGLIDFAREFETGSMIRFLSPIEDNSGNVLVKEEVQVKESTLARLKD
+IKGQYTPKFEVKLNKELLEQIQNILAIKIVNQLKVTDMKFLKFMYENTNYNFKGIIRNSL
+LSKKTTLTLLKVYNQNLNFFKYISELGLLSLGIVMIPDTMRFRLLRRYAFTAGILMDVPR
+IGVDKFTKLPSDDNEKVRVAHKCSDILQKLDLIEFTYGSISNHMPLGMIEDSTSSDKAAP
+GENIDETFLDDIISNDGESDSKVDGSREDAIPEKSYDIFQALLTDALKLARYIANVSHNA
+VDKDYVMEELVYYIAYNTSKKYFDELLANPLVATFKEFEVNVKRLRKIAEVEMKCVYPPS
+AWAYPKPKSSQVLCKNKVWDCPNIVMGWDIHVITAQEAFGWVGTSLPVDNYPKCRLEEEL
+DEIMVEPEKPKKK*
+>1234#10_00017 LipL45-like lipoprotein
+MKRYLSIVILCTFAMLLLVCSTNKSSGSDQVKTESNATSARIVWLLGDVKILSDSGEKKA
+ELGASLSSTDRVVTGPNGGAEIMVADSGIIKMSKNSDIEISSLMNPNGSDTNVQVNYGKI
+VTMVKKGQKTTEFTVSTPTALAGVRGTSFLTSVESPEGSKINCAKANCTVRFAVIEGTIA
+VSKKGESSEVILSKNRELRIEKNQKLTDKLIRSLQNDSLSEMKELIVLHKNETFEYGKLV
+EELKSSSEELKILSQSGSVEEVKAAFQKREADRNNADEITKTAKAVNETKYVQQDVQKEK
+LKLNPKETF*
diff --git a/t/data/expected_pan_genome_reference.fa b/t/data/expected_pan_genome_reference.fa
new file mode 100644
index 0000000..61a7a0d
--- /dev/null
+++ b/t/data/expected_pan_genome_reference.fa
@@ -0,0 +1,1466 @@
+>11111_1#11_04055
+ATGCTTCCGGTCACCTACAGATTAATACCTCAAAGCGGAGTATCCACATATGGATTAAAT
+ACCGCAGATACACCTGTTTTCCCCGATATTCCCGAACATGCACCAAACCCCTCACGGCTA
+CGCCTTGCTCATGACAGCCTTGCCATAAACAGTGAATTCCGTCTGGAGCCAGAGTGTGTG
+GTGGAGTACCTTATCTCAGGCGCGGGTGGAATAGACCCTGATACAGAAATTGATGACGAC
+ACTTATGACGAATGCTACGATGAACTATCCTCCGTACTTCAAAATGCGTATACCCAAAGC
+GAAACATTCCGCAGACTGATGAATTACGCATATGAAAAAGAACTACATGATGTGGAGCAG
+CGCTGGCTACCGGGGGCAGGCGAAGCCTTTGAAACTACCGTGGCTCAGGAACACTTCAAA
+CTTTCAGAAGGCAGGAAAGTTATTTGTCTCAATCTGGACGATTCTGATGATTCATATACC
+GAACATTATGAAAGTAACGAAGGAAGACAACTTTTTGACACAAAACGTTCATTTACTCAT
+GAAGTTGTACATGCACTGACCCATCTTCAGGATAAAGAAGAAAATCATCCAAGAGGCCCT
+GTTGTCGAATATACCAACATTATTCTGAAAGAGATGGGGCATCCTTCACCTCCCAGAATG
+GTCTACATCTTCAATAAATAG
+>11111_1#11_04056
+ATGCCAATAACTAACGCGTCCCCAGAAAATATATTAAGATATTTGCATGCGGCCGGTACC
+GGTACGAAAGAAGCAATGAAAAGTGCAACTTCACCACGCGGTATACTGGAATGGTTTGTC
+AATTTTTTTACCTGTGGTGGAGTAAGAAGAAGCAATGAAAGATGCTTTTGGGAGGTAATT
+GGAAAACTGACCACATCATTATTATATGTAAATAAAGATGCTTTCTTCGATGGTAATAAA
+ATATTTCTGGAGGATGTCAACGGGTGTACTATATGTCTGTCATGTGGAGCAGCATCCGAA
+AATACGGATCCCATGGTCACTATTGAAGTGAACAAAAATGGAAAAACTGTAACGGATAAA
+GTTGATAGTGAGAGATTTTGGAATGTATGTCGAATGTTAAAACTGATGAGTAAACATAAT
+ATACAACAGCCTGATTCACTTATAACCGAGGATGGTTTTCTGAACCTGCGCGGAGTAAAC
+CTAGCTCATAAAGATTTCCAGGGAGAAGATTTGTCAGACATAGATGCTTCTAATGCAGAT
+TTCCGTGAAACAAATCTATCTAATGTAAATTTAGTCGGTGCAAATTTGTGTTGTGCAAAT
+CTACACGCTGTAAATCTAATGGGTTCAAACATGACTAAAGCAAACCTGACTCACGCAGAC
+CTGACTTGCGCTAACATGTCCGGTGTAAACTTAACCGCTGCAATTCTATTCGGCTCAGAC
+TTAACTGACACCAAACTAAATGGTGCAAAATTAGATAAGATAGCTCTAACTTTAGCGAAA
+GCATTAACAGGAGCCGATCTGACAGGTAGTCAACATACCCCTACTCCACTCCCGGATTAC
+AATGATAAAACGCTTTTCCCCCATCCAATATTTTAG
+>11111_1#11_04058
+ATGGAAAGTCTATTAAATCGTTTATATGACGCGTTAGGCCTGGATGCGCCAGAAGATGAG
+CCACTGCTTATCATTGATGATGGGATACAGGTTTATTTTAATGAATCCGATCATACACTG
+GAAATGTGCTGTCCCTTTATGCCACTGCCTGACGACACTCTGACTTTGCAGCATTTTTTA
+CGTCTTAACTACGCCAGCGCCGTCACTATCGGCGCTGATGCAGACAATACTGCTTTAGTG
+GCGCTTTATCGCTTGCCGCAAACCAGTACCGAAGAAGAGGCGCTCACTGGTTTTGAATTA
+TTCATTTCAAACGTGAAGCAATTGAAAGAGCATTATGCATAA
+>11111_1#11_04059
+ATGCAAATACAGAGCTTCTATCACTCAGCTTCACTAAAAACCCAGGAGGCTTTTAAAAGC
+CTACAAAAAACCTTATACAACGGAATGCAGATTCTCTCAGGCCAGGGCAAAGCGCCGGCT
+AAAGCGCCCGACGCTCGCCCGGAAATTATTGTCCTGCGAGAACCTGGCGCGACATGGGGG
+AATTATCTACAGCATCAGAAGACGTCTAACCACTCGCTGCATAACCTCTATAACTTACAG
+CGCGATCTTCTTACCGTCGCGGCAACCGTTCTGGGTAAACAAGACCCGGTTCTAACGTCA
+ATGGCAAACCAAATGGAGTTAGCCAAAGTTAAAGCGGACCGGCCAGCAACAAAACAAGAA
+GAAGCTGCGGCAAAAGCATTGAAGAAAAATCTTATCGAACTTATTGCAGCACGCACTCAG
+CAGCAAAATGGCTTACCTGCAAAAGAAGCTCATCGCTTTGCGGCAGTAGCGTTTAGAGAT
+GCTCAGGTCAAGCAGCTCAATAACCAGCCCTGGCAAACCATAAAAAATACACTCACGCAT
+AACGGGCATCACTATACCAACACGCAGCTCCCTGCCGCAGAGATGAAAATCGGCGCAAAA
+GATATCTTTCCCAGTGCTTATGAGGGAAAGGGCGTATGCAGTTGGGATACCAAGAATATT
+CATCACGCCAATAATTTGTGGATGTCCACGGTGAGTGTGCATGAGGACGGTAAAGATAAA
+ACGCTTTTTTGCGGGATACGTCATGGTGTGCTTTCCCCCTATCATGAAAAAGATCCGCTT
+CTGCGTCAGGCCGGCGCTGAAAACAAAGCCAAAGAAGTATTAGCTGCGGCACTTTTTAGT
+AAACCTGAGTTGCTTAACAGAGCCTTAGAGGGCGAAGCGGTAAGCCTGAAACTGGTATCC
+GTCGGGTTACTCACCGCGTCGAATATTTTCGGCAAAGAGGGAACTATGGTCGAGGATCAA
+ATGCGCGCATGGCAATCGTTGACCCAGCCGGGAAAAATGATTCATTTAAAAATCCGCAAT
+AAAGATGGCGATCTACAGACGGTAAAAATAAAACCGGACGTCGCCGCATTTAATGTGGGT
+GTTAATGAGCTGGCGCTCAAGCTCGGCTTTGGCCTTAAAGCATCAGATAGCTATAATGCC
+GAAGCGCTACATCAGTTATTAGGCAATGATTTACGCCCTGAAGCCAGACCAGGTGGCTGG
+GTTGGCGAATGGCTGGCGCAATACCCGGATAATTATGAGGTCGTCAATACATTAGCGCGC
+CAGATTAAGGATATCTGGAAAAATAACCAACATCATAAAGATGGCGGCGAACCCTATAAA
+CTCGCACAACGCCTTGCCATGTTAGCCCATGAAATTGACGCGGTGCCCGCCTGGAATTGT
+AAAAGCGGCAAAGATCGTACAGGGATGATGGATTCAGAAATCAAGCGAGAGCTCATTTCT
+TTCCATCAGACCCATATGTTAAGTGCGCCTGGTAGTCTTCCGGATAGCGGTGGACAGAAA
+ATTTTCCAAAAAGTATTACTGAATAGCGGTAACCTGGAGATTCAGAAACAAAATACGGGC
+GGGGCGGGAAACAAAGTAATGAAAAATTTATCGCCAGAGGTGCTCAATCTTTCCTATCAA
+AAACGAGTTGGGGATGAAAATATTTGGCAGTCAGTAAAAGGTATTTCTTCATTAATCACA
+TCTTGA
+>11111_1#11_04060
+ATGAAAAAGTATCTTGCTTTCGCCGTTACGCTGCTGGGTATGGGTAAAGTCATCGCCTGT
+ACTACCCTTTTGGTAGGCAATCAGGCTTCGGCTGACGGCTCCTTTATTATCGCGCGCAAC
+GAGGATGGCTCGGCAAATAACGCCAAGCATAAGGTTATTCATCCCGTCGCGTTTCATCAA
+CAAGGCGAGTATAAAGCACATCGCAACAATTTTAGCTGGCCGCTTCCGGAGACAGCGATG
+CGCTATACGGCGATTCATGACTTTGATACTAACGATAACGCCATGGGTGAAGCCGGTTTC
+AATTCGGCGGGCGTCGGAATGAGCGCAACGGAAACCATTTACAACGGCAGAGCGGCGCTG
+GCTGCCGATCCTTACGTGACAAAAACGGGAATCACGGAAGACGCCATTGAGTCCGTGATC
+CTGCCAGTGGCGCAATCGGCGCGTCAGGGCGCCAAATTACTGGGAGATATTATTGAACAA
+AAAGGCGCGGGCGAAGGTTTCGGCGTCGCGTTTATTGATAGCAAAGAGATATGGTATCTG
+GAGACGGGAAGCGGACATCAATGGCTGGCAGTACGACTTCCGGCAGATAGCTATTTCGTT
+TCCGCCAATCAGGGACGTTTACGCCATTACGATCCGAATGATAACGCGAATTATATGGCG
+TCACCAACGTTAGTAAGCTTTGCGAAAAAGCAGGGATTATATGATCCGGCCCGCGGCGAA
+TTCGACTTTCATCAAGCCTATTCGCAGGATAACAAAAACGATACCACCTATAATTATCCG
+CGCGTCTGGACGCTACAACACCAGTTTAATCCGCATCTGGATACGGTCGTTAGCGAAGGG
+GAAACATTTTCTGTTTTTTTAACGCCAATAACGAAGATCAGCGTGGCGGCAGTAAAAAAC
+GCGTTACGCAATCACTATCAGGGAACGTCGCACGACCCTTATGCCAGTCATAATCCACAA
+GAACCATGGCGACCTATATCCGTTTTTCGTACCCAGGAGTCACATATTTTACAGGTCAGA
+CCGAAATTACCGCAGGCTATCGGCAACGTAGAATACATCGCCTATGGAATGCCATCTCTT
+AGCGTCTATCTCCCCTATTACCAGGGGATGCGTCATTATCAACCCGGAGATGATAAAGGA
+ACCGATCGGGCGAGCAACGACTCTACCTACTGGACATTCCGCACGCTGCAAACACTGGTT
+ATGCAAGACTACAATACGTTTGCGCCAGATGTGCAACATGCCTGGAAAACATTTGAACAG
+CAAACAGCTAAGCAACAGTATAAGATGGAGCAGAGCTATCTGAGATTATATGCGTCGCAT
+CCGAAAGAAGCACAACGCTTACTGCAAAATTTTGAAGATAAAACGATGCAAAATGCGCAG
+ACGCTCGCCCGTCGCCTGACCAATAATATTATTACGACAATGACTTACCGCACAGATATG
+AAATATCACTTTTCAAGTACGCAGCCATAA
+>11111_1#11_04061
+ATGGTTAAGTTATCAATGACGCTGCGCCTGACAATTTCTTTTATCGCCATACTTATCCTC
+GCCTGTACCGGCATTAGCTGGACGCTCTATAACGCGCTGAGCAAAGAATTAACGTATCGG
+GATGATATGACGCTAATAAATCGGGCGGCGCAAATGCAGCAACTGTTACTGGATGGCGCC
+AGGCCGGAAAATCTGCCGCTCTATTTCAATCGGATGGTGGATACGAAGCAGGATATCTTA
+TTGATCCACTCAGCAACAGGCCATAATGTTGCGATTAATCATAGCGGCATCCCCGACCAA
+CGCTTTAACGAGATTCCGCTGGCTAAAAACATCACCCGCGAAACCTTATTTCGCCAGGCG
+GTACAAGGCACGGAGCTGACCGCGGTACGAGTAAACGCCAGAAGCGGCGATAACCCGCTG
+ACCCTTACTATTGCCAGGCTGGCGACGGAAAGGCGGCAAATGCTGGCGCAATATCGCCGC
+AACAGTTTGCTGATTAGCCTTATCGCGATCCTCGTCTGTTCGGCGCTCAGTCCATTAGTC
+ATCAGAAACGGGCTGCGGGCCATTACGTCGCTCAGCCGACTCACCGCGGCGACAGATAGC
+GGCACACTTCGCCAGCCGCTGGCGGAACAGGCGTTACCCGTCGAGCTCAGGCCGCTTGGG
+CAAGCGCTAAATACCATGCGCCAGAAGCTTTCCGACGATTTTGAACGCCTGAACCAATTT
+GCCGACGATCTGGCGCATGAGCTGCGCACGCCGGTTAATATTTTACTGGGGAAGAATCAG
+GTTATGCTGAGTCAGGAACGCAGCGCCGAAGAGTATCAACAAGCCCTTGTCGATAATATT
+GAAGAGCTGGAGGGACTGTCGCGACTGACAGAAAATATTCTCTTTCTGGCACGCGCGGAG
+CACCAGAATATAGCGGTAAAAAAACAGCCTGTTTCGCTCAATGCGCTGGTCGAAAATATG
+CTGGATTATCTTAGCCCCCTTGCCGAAGAGAAGCACATCTGTTTTATAAATCAATGTCAG
+GGAACGGTATGGGCTGACGAAATATTATTACAAAGAGTGCTCTCAAACCTGCTGACGAAT
+GCCATCCGTTATTCTGATGAAAACGCCGTGATACGTATTGAAAGCGCTTATGATGATAAC
+GTTGCAGAAATTCGGGTCGCTAATCCGGGCAGCCCCACCGCCGATGCGGATAAGCTTTTC
+CGGCGTTTTTGGCGAGGAGATAATGCCCGCTACACTGCCGGTTTCGGCCTGGGGTTATCG
+TTAGTTAACGCGATTGCCCTATTGCACGGTGGCTCGGCATCTTACCGCTATGCCGATGAA
+CATAATATCTTTTCGGTTCGTCTGCCTGATAGCGGTGATAGCTAA
+>11111_1#11_04062
+ATGTCATCTTGTTGGAGATTTACGGATTCGCTAACAAGCCTATGGCATACTGCGTTGATG
+AAGATTTTATTGATTGAAGATAACCAGAAAACCATTGAGTGGGTACGTCAGGGACTCACG
+GAGGCAGGCTATGTGGTTGATTATGCCTGTGATGGACGAGACGGATTACACCTAGCCCTT
+CAGGAACATTATTCATTGATTATTCTTGATATTATGCTGCCGGGGCTTGATGGATGGCAG
+GTTTTACGCGCGTTGCGCACTGCATATCAGCCCCCTGTTATTTGCCTGACGGCGCGCGAC
+TCGGTTGAGGATCGCGTCAAAGGTCTTGAGGCGGGCGCTAATGATTACCTTGTTAAGCCT
+TTTTCCTTCGCCGAACTGCTGGCCCGGGTGAGAGCTCAACTCAGACAGCATGTCCCGGTC
+TTTACCCGACTGACGATCAATGGTCTGGACATGGATGCCACAAAGCAATCGGTGTTACGA
+AATGGCAAACCGATTTCCCTGACCCGCAAAGAATTCCTGCTCCTCTGGTTACTGGCGTCC
+CGGGCAGGGGAAATCGTGCCCCGAACCGCGATCGCCAGCGAAGTTTGGGGAATTAACTTT
+GATAGTGAAACCAACACCGTTGATGTCGCGATTCGTCGGCTGCGCGCCAAAGTAGACGAT
+CCATTTGAAAAGAAGCTCATTATGACCGTCCAGGGGATGGGTTATCGATTACAGGCGGAA
+ACGTCGCAGAATGGTTAA
+>11111_1#11_04063
+ATGAAACGATATATACTGGCTACCGCGATAGCGTCTCTTGTTGCAGCCCCGGCAATGGCG
+CTGGCCGCTGGCAGCAATATTCTCAGCGTACATATTCTCGATCAGCAAACAGGCAAACCA
+GCGCCCGGCGTGGAGGTGGTACTGGAGCAGAAAAAGGATAACGGATGGACGCAATTAAAC
+ACCGGGCATACCGACCAGGATGGACGAATTAAAGCACTGTGGCCCGAAAAAGCTGCCGCG
+CCGGGGGATTATCGCGTTATTTTTAAAACCGGCCAGTATTTTGAAAGTAAAAAACTGGAC
+ACGTTTTTCCCGGAGATTCCCGTCGAGTTTCATATCAGCAAAACGAATGAGCACTATCAT
+GTGCCGCTGTTATTAAGTCAGTATGGTTATTCAACCTATCGCGGGAGCTAA
+>11111_1#11_04064
+ATGCAAGTAGATGAACAACGTCTGCGTTTTCGCGATGCGATGGCAAGTCTGGCGGCAGCG
+GTCAACATCGTAACCACGGCGGGTCACGCCGGACGCTGCGGTATCACCGCAACAGCGGTT
+TGCTCAGTCACTGATACGCCGCCCTCCGTGATGGTATGTATTAATGCCAATAGCGCCATG
+AACCCCGTTTTTCAGGGCAACGGCAGGCTGTGCATTAATGTACTTAACCATGAGCAGGAG
+CTGATGGCGCGCCACTTTGCCGGTATGACGGGGATGGCGATGGAGGAGCGTTTTCACCAG
+CCATGTTGGCAAAACGGGCCGCTGGGCCAGCCGGTACTTAACGGCGCGCTGGCCAGTCTT
+GAAGGCGAGATCAGCGAGGTACAAACCATTGGCACGCATCTGGTGTATCTGGTGGCGATC
+AAAAATATTATTCTTAGCCAGGAGGGGCATGGCCTGATTTATTTCAAACGCCGTTTTCAT
+CCGGTCAGACTTGAGATGGAAGCGCCTGTTTAA
+>11111_1#11_04065
+ATGGGACGCACACCGGATTACAAAGCCGCCTTTGGCTGCGCTCTGGGCGCTAACCCAGCC
+TTCTACGGCCAGTTTGAGCAGAACGCCCGTAACTGGTACACCCGTATTCAGGAGACCGGC
+CTGTACTTTAACCATGCAATCGTCAACCCGCCCATTGACCGCCACAAACCTGCCGACGAA
+GTGAAAGACGTCTATATCAAGCTGGAGAAAGAGACGGACGCCGGGATTATTGTCAGCGGG
+GCGAAAGTTGTCGCCACTAACTCCGCCCTGACTCACTACAACATGATTGGTTTCGGCTCA
+GCCCAGGTGATGGGCGAAAACCCGGATTTTGCTCTGATGTTTGTCGCGCCAATGGATGCC
+GAAGGCGTAAAACTTATTTCGCGCGCCTCGTATGAAATGGTCGCGGGCGCGACGGGCTCG
+CCGTTTGATTATCCCCTCTCCAGCCGTTTTGATGAAAACGATGCCATTCTGGTGATGGAC
+AAGGTGCTGATCCCGTGGGAAAACGTATTAATTTACCGTGATTTCGATCGTTGTCGTCGC
+TGGACGATGGAAGGCGGCTTTGCCCGTATGTATCCACTGCAAGCCTGTGTTCGTCTGGCG
+GTAAAACTTGATTTCATTACCGCGCTGCTGAAAAAATCGCTCGAATGTACGGGTACCGTA
+GAGTTCCGGGGCGTGCAGGCCGATCTCGGCGAAGTCGTGGCCTGGCGCAATATGTTCTGG
+GCATTGAGCGATTCTATGTGTTCTGAAGCAACCCCGTGGGTAAACGGCGCCTGGCTACCG
+GACCACGCCGCGCTGCAAACCTATCGTGTGATGGCCCCAATGGCCTACGCGAAAATTAAA
+AATATTATTGAACGTAACGTTACCAGCGGCCTGATTTACCTGCCTTCCAGCGCCCGCGAT
+CTGAATAATCCGCAAATCGACCAGTACCTGGCGAAATACGTACGCGGCTCTAACGGAATG
+GACCATGTTGAACGTATCAAAATTCTTAAATTGATGTGGGATGCCATCGGCAGCGAGTTT
+GGCGGTCGCCATGAGCTGTACGAGATTAACTACTCGGGCAGCCAGGATGAAATTCGTCTG
+CAGTGTCTGCGTCAGGCCCAGAGCTCCGGCAATATGGATAAGATGATGGCAATGGTCGAT
+CGCTGCCTCTCCGAATACGATCAGAATGGCTGGACGGTTTCGCATTTGCACAATAACGAC
+GACATCAATCAACTGGATAAGCTGCTGAAATAA
+>11111_1#11_04066
+ATGCATGATTCATTAACCATCGCCTTGCTTCAGGCGCGCGAAGCGGCAATGACCTATTTC
+CGCCCCATCGTTAAAAGCCACAATCTGACCGACCAGCAATGGCGCATTGTGCGAATCCTG
+GCCGATAGCCCCTCTATGGATTTTCACGAGCTGGCCTTTCGTACCTGTATTTTGCGTCCA
+AGTCTGACCGGAATATTGACGCGCATGGAGCGAGACGGACTGGTGTTGCGACTCAAGCCG
+GTTAACGATCAGCGTAAGTTATATGTCATGTTGACGGAGCAGGGACAAACGTTGTACGCC
+CGTGCCCGGAGCGAGGTAGAAGAGGCTTATCGAAAAATTGAGGCCGATTTCACGCCCGAA
+AAAACACAGCAATTGATGCTGCTGCTGGACGATCTTATTGCTCTGGGGCGCCAGCATCCT
+GATAGCGAAGCGGAAGCATAG
+>11111_1#11_04067
+ATGAAGGGTACTGTTTTCGCCGTTGCGTTAAACCATCGCAGCCAGCTTGATGCCTGGCAA
+GAGGCTTTCTCTCAGCCTCCCTATAATGCGCCGCCTAAAACCGCAGTGTGGTTCATCAAG
+CCGCGTAATACGGTGATTCGTCACGGCGAACCCATTCCTTATCCGCAGGGAGAAAAGGTA
+CTGAGCGGCGCGACAGTGGCGCTCATTGTGGGGAAAACCGCCAGCCGGATACGCCCTGAA
+GCGGCGGCGGACTATATCGCCGGGTATGCGCTGGCTAACGAGGTCAGCCTGCCGGAAGAG
+AGCTTTTATCGCCCGGCGATTAAAGCGAAATGTCGCGATGGCTTTTGCCCGCTGGGTGAA
+ATGGCGCCGCTGAGTGATGTGGATAATCTCACCATTATCACTGAAATCAACGGACGAGAA
+GCGGACCACTGGAATACTGCCGATTTACAGCGTAGCGCCGCACAACTGCTTAGCGCGTTA
+AGTGAGTTCGCTACACTTAACCCTGGCGATGCGATCTTACTTGGTACGCCGCAGAATCGC
+GTTGCGCTGCGTCCCGGCGATCGGGTGCGTATTCTGGCGAAAGGTTTACCCGCGCTGGAA
+AATCCGGTTGTCGCAGAAGATGAATTCGCCCGCCACCAGACGTTTACGTGGCCGCTGTCA
+GCGACGGGAACGTTATTTGCGCTGGGGTTGAACTACGCCGATCACGCCAGCGAGCTGGCA
+TTTACGCCGCCGAAAGAGCCGCTGGTATTTATCAAAGCGCCAAACACCTTTACCGAACAT
+CACCAAACGTCGGTGCGCCCGAACAACGTCGAATATATGCACTACGAAGCCGAGCTGGTC
+GTGGTGATTGGCAAAACGGCGCGTAAGGTGAGCGAAGCCGAAGCCATGGAGTATGTGGCC
+GGTTACACCGTCTGTAACGACTACGCGATCCGCGACTATCTGGAAAACTACTACCGTCCG
+AATCTGCGGGTAAAAAGCCGCGACGGCCTGACGCCGATAGGCCCGTGGATTGTGGATAAA
+GAGGCGGTTTCTGATCCGCACAACCTGACGTTACGCACCTTTGTCAACGGTGAGCTGCGG
+CAGGAAGGGACGACCGCCGATCTGATCTTCAGCATCCCGTTCCTGATTTCTTATCTGAGC
+GAATTTATGACGTTGCAACCGGGCGACATGATTGCCACCGGTACGCCGAAAGGGCTGTCC
+GATGTGGTGCCGGGGGATGAAGTTGTCGTTGAAGTAGAAGGCGTGGGTCGCCTGGTTAAC
+CGAATCGTCAGTGAGGAGAGCGCAAAATGA
+>11111_1#11_04068
+ATGAAGAAAATAAATCATTGGATTAACGGCAAAAACGTTGCAGGTAACGACTACTTCCAG
+ACCACTAACCCGGCGACCGGTGATGTGCTGGCGGAAGTAGCCTCCGGCGGTGAAGCAGAA
+GTGAACCAGGCTGTCGCGGCGGCAAAAGAGGCGTTCCCGAAATGGGCCAACCTGCCGATG
+AAAGAGCGCGCGCGCCTGATGCGCCGCCTTGGCGACCTGATTGACCAGCATGTGCCGGAA
+ATCGCGGCGATGGAAACCGCCGACACCGGCCTGCCTATTCACCAGACTAAAACGTGCTGA
+>11111_1#11_04069
+GTGCTGATCCCGCGCGCCTCGCATAACTTCGAATTCTTCGCCGAAGTGTGCCAGCAGATG
+AACGGCAAGACCTATCCGGTTGACGATAAAATGCTCAATTATACGCTGGTGCAGCCCGTC
+GGCGTCTGCGCGCTGGTGTCGCCGTGGAACGTGCCGTTTATGACCGCGACTTGGAAAGTT
+GCGCCGTGCCTGGCGCTGGGTAACACCGCGGTGCTCAAAATGTCCGAGCTGTCGCCGCTG
+ACTGCCGACAGGCTGGGCGAGCTGGCACTGGAGGCAGGAATTCCGGCAGGCGTGCTGAAC
+GTGGTGCAGGGCTACGGCGCGACGGCGGGCGATGCGCTGGTACGCCACCATGACGTGCGT
+GCGGTGTCGTTTACCGGCGGTACCGCCACCGGTCGCAATATCATGAAAAATGCCGGGCTG
+AAAAAATACTCGATGGAGCTGGGCGGCAAATCGCCGGTGCTGATTTTTGAAGACGCCGAC
+ATTGAGCGCGCGCTGGACGCCGCGCTGTTCACCATCTTCTCGATCAACGGCGAACGCTGC
+ACCGCTGGGTCGCGCATCTTTATCCAGCAGAGCATTTACCCTGAGTTCGTGAAGCGCTTT
+GCCGAACGCGCGAATCGCCTGCGTGTCGGCGATCCGACCGACCCGAACACCCAGGTCGGC
+GCGCTGATTAGCCAACAGCACTGGGAGAAAGTCTCCGGTTATATCCGCCTCGGCATTGAA
+GAGGGGGCAACGCTGCTGGCGGGCGGTGCGGAAAAACCCACTGACCTGCCTGCGCATCTG
+AAAGGCGGTAACTTCCTGCGCCCAACCGTGCTGGCCGATGTCGACAACCGTATGCGCGTT
+GCGCAGGAAGAGATCTTTGGGCCGGTCGCCTGCCTGCTGCCATTCAAAGACGAAGCGGAA
+GGGTTACGTTTGGCGAACGATGTGGAATACGGTCTGGCCTCTTATATCTGGACCCAGGAC
+GTGAGCAAAGTGTTGCGCCTGGCGCGTGGGATTGAAGCCGGCATGGTCTTCGTCAACACC
+CAGAACGTCCGCGACCTGCGCCAGCCGTTCGGCGGCGTGAAAGCCTCCGGTACCGGGCGC
+GAAGGCGGCGAATATAGCTTCGAAGTGTTTGCGGAAATGAAAAACGTCTGCATCTCAATG
+GGCGACCATCCTATCCCAAAATGGGGAGTTTGA
+>11111_1#11_04070
+ATGGGCAAGTTAGCGTTAGCAGCAAAAATTACCCACGTGCCGTCGATGTATCTTTCTGAA
+CTGCCAGGAAAAAATCACGGTTGTCGTCAGGCAGCCATTGATGGGCATATTGAAATTGGC
+AAGCGTTGCCGCGAAATGGGCGTTGACACCATTATCGTATTCGACACCCACTGGCTGGTG
+AATAGCGCTTACCACATTAATTGTGCCGACCATTTCCAGGGCGTCTATACCAGCAACGAA
+TTGCCGCACTTTATTCGCGACATGACCTATGACTATGACGGTAATCCGGCGCTCGGCCAT
+CTGATCGCCGACGAGGCGGTCAAACTGGGCGTGCGCGCCAAAGCGCACAACATCCCGAGC
+CTGAAGCTGGAGTATGGCACGCTGGTGCCGATGCGCTACATGAACAGCGACAAGCACTTC
+AAAGTGGTCTCCATCTCGGCGTTCTGCACTGTGCATGATTTTGCCGACAGCCGCAAACTG
+GGCGAAGCCATTCTCAAGGCGATTGAGAAATATGACGGTACCGTAGCGGTATTCGCCAGT
+GGTTCTCTGTCGCACCGTTTTATTGACGACCAACGGGCGGAAGAGGGGATGAACAGCTAC
+ACCCGCGAGTTCGATCATCAAATGGACGAGCGCGTGGTCAAGCTGTGGCGCGAAGGCAAA
+TTCAAGGAGTTTTGCACCATGTTGCCGGAGTACGCCGACTACTGCTACGGCGAAGGCAAC
+ATGCACGACACGGTCATGCTACTGGGAATGCTGGGGTGGGACAAATACGACGGCAAGGTG
+GAGTTCATCACCGACCTGTTCGCCAGCTCCGGTACCGGCCAGGTAAACGCTGTTTTCCCG
+CTGCCTGCGTAA
+>11111_1#11_04071
+ATGCCGCACTTTATTGCTGAATGTACTGAAAATATTCGCGAGCAGGCTGATTTACCAAGC
+CTGTTCAGCAAGGTAAACGAGGCGCTGGCCGCCACCGGGATTTTCCCCATCGGCGGTATC
+CGCAGTCGCGCCCACTGGCTGGATACCTGGCAGATGGCTGACGGTAAGCATGATTACGCG
+TTTGTGCATATGACGCTGAAAATCGGCGCCGGGCGCAGCCTGGAGAGCCGTCAGGAAGTC
+GGCGAAATGCTGTTTGGGCTGATTAAAGCCCACTTCGCCGACCTGATGGAGAACCGCTAT
+CTGGCGCTGTCGTTTGAGATTGCCGAGTTACATCCAACGCTCAATTACAAACAAAACAAC
+GTACACGCGTTATTTAAATAG
+>11111_1#11_04072
+ATGCTCGATAAACAGACCCATACCCTGATCGCTCAGCGACTTAATCAGGCTGAAAAACAG
+CGTGAACAGATTCGCGCAGTGTCGCTGGATTATCCCAACATCACTATTGAAGATGCCTAT
+GCCGTACAGCGTGAATGGGTCAATATCAAGATTGCCGAAGGGCGCACGCTCAAAGGCCAC
+AAAATCGGCCTGACCTCAAAAGCGATGCAGGCCAGCTCGCAAATCAGCGAACCGGATTAC
+GGCGCGCTGCTTGACGATATGTTCTTCCATGACGGCGGAGATATCCCCACCGACCGTTTT
+ATCGTCCCGCGTATTGAAGTGGAGCTGGCGTTCGTGCTGGCGAAACCGCTGCGCGGCCCT
+CACTGCACGCTGTTCGACGTCTACAACGCCACGGATTATGTGATTCCGGCGCTGGAACTG
+ATTGACGCCCGCAGCCACAACATCGACCCGGAAACCCAGCGCCCGCGCAAAGTGTTCGAC
+ACCATTTCCGACAACGCCGCCAACGCCGGGGTGATCCTCGGTGGTCGCCCCATCAAACCA
+GACGAGCTGGATCTGCGCTGGATCTCCGCGCTGCTCTATCGCAACGGCGTGATCGAAGAA
+ACCGGCGTCGCCGCAGGCGTGCTGAATCATCCGGCCAACGGCGTGGCGTGGCTGGCGAAC
+AAGCTTGCCCCCTACGATGTCCAGCTTGAAGCCGGGCAGATCATCCTCGGCGGCTCGTTC
+ACCCGCCCGGTGCCGGCGCGCAAGGGCGACACCTTCCATGTCGATTACGGCAACATGGGC
+GCGATCAGTTGCCGGTTTGTGTAA
+>11111_1#11_04073
+ATGAAAAATGCTTTCAAAGACGCGTTAAAAGCGGGGCGCCCGCAAATCGGTTTGTGGCTG
+GGGCTTGCCAACAGTTACAGCGCTGAACTGTTAGCGGGCGCCGGCTTCGACTGGCTACTG
+ATTGACGGTGAACACGCGCCAAACAACGTGCAGACGGTGTTGACCCAGTTGCAGGCGATT
+GCGCCTTATCCCAGCCAGCCGGTGGTGCGTCCGTCATGGAACGATCCGGTACAGATTAAG
+CAACTGCTCGACGTCGGCGCGCAAACGCTGCTGATACCGATGGTGCAGAATGCCGATGAA
+GCGCGAAACGCCGTGGCGGCTACGCGTTATCCGCCTGCCGGTATTCGCGGCGTGGGCAGC
+GCGCTGGCGCGGGCATCGCGCTGGAATCGCATTCCGGACTATCTCCACCAGGCCAACGAC
+GCCATGTGCGTACTGGTGCAGATTGAAACGCGTGAGGCGATGAGCAATCTGGCGTCAATT
+CTCGACGTGGATGGCATTGACGGCGTGTTTATTGGCCCGGCGGATCTCAGCGCCGATATG
+GGCTTTGCCGGCAATCCGCAGCACCCGGAAGTGCAGGCGGCGATTGAGAACGCCATCGTG
+CAGATACGCGCGGCGGGGAAAGCGCCGGGGATTCTGATGGCCAATGAAGCACTGGCGAAA
+CGTTATCTGGAACTGGGGGCGCTATTTGTCGCCGTCGGCGTTGACACCACGCTGCTGGCG
+CGCGGAGCGGAGGCGCTGGCGGCGCGCTTTGGCGCAGAAAAAAAACTGTCCGGTGCGTCC
+GGCGTCTATTAA
+>11111_1#11_04074
+ATGAGCGACACATCATCTGCACTTCCGGAAAGCCCCGAGTCTGTCGGTTCGCACAACGCG
+CTCAGCACGGGTCAACAAACCGTCATAAATAAACTGTTCCGCCGACTGATCGTATTTTTA
+TTCGTGTTGTTTATCTTCTCGTTTTTAGACCGTATCAACATCGGTTTTGCCGGGTTGACG
+ATGGGGCAGGATCTGGGGTTAAGCGCCACCATGTTTGGTCTTGCCACGACGCTGTTTTAC
+GCCACCTACGTCATTTTCGGCATTCCCAGCAACGTGATGTTGAGCATCGTCGGCGCCCGC
+CGCTGGATTGCGACCATTATGGTGCTATGGGGCATTGCATCTACCGCCACGATGTTCGCG
+GTGGGACCGAAAAGCCTGTATGTGCTGCGAATGCTGGTGGGCATTACCGAAGCGGGCTTT
+TTGCCAGGAATATTGCTCTATTTAACCTACTGGTTCCCGGCATTTTTCCGCGCCCGCGCC
+AACGCATTATTTATGATTGCCATGCCGGCCACTACCGCGTTGGGGTCAATTGTCTCCGGC
+TATATTTTATCGCTGGACGGCATATTCAATCTGCATGGATGGCAGTGGTTATTCCTGTTG
+GAAGGATTTCCGTCAGTTTTGTTAGGCATTATGGTCTGGTTTTACCTGGATGATACCCCG
+GCAAAAGCCAAATGGCTGACGGCAGAGGATAAAAAATGTTTGCAGGAGATGATGGATAAT
+GATCGCCTGACGCTGGTTCAGCCTGAGGGGGCCATCAGCCATAACGCCATGCAGCAGCGT
+AGCCTGTGGCGCGAAGTATTCACGCCAATTGTACTGATGTATACGCTGGCCTATTTTTGC
+CTTACCAATACGCTTAGCGCCATTAGTATCTGGACGCCGCAAATCCTGAAAAGTTTTAAT
+GAAGGCAGCAGCAATATCACCATCGGCCTGCTGGCGGCGATCCCGCAGATTTGTACTGTT
+CTGGGCATGATTTACTGGAGCCGCCATTCGGACAAACATCAGGAGCGTAAACACCACACT
+GCGTTACCGTTCCTGTTTGCCGCCGCGGGCTGGCTGCTGGCGTCGGCGACCGACCGTAAC
+CTGATCCAGCTCCTGGGGATCGTGATGGCATCCACGGGTTCCTTTAGCGCGATGGCGATC
+TTCTGGACCACGCCGGATCAGTCGATCAGTTTACGCGCCAGGGCGATAGGCATTGCGGTC
+ATCAATGCCACCGGCAATATTGGCTCCGCGCTCAGCCCGGTTATGATTGGCTGGCTAAAA
+GATATCACTGGTAGCTTCAATAGCGGACTCTGGTTTGTCGCTTCTCTGTTAGTCGTCGGC
+GCCGCCATTATCTGGCTCATTCCCATGAAAGCATCGCGTCCGCGCGCCACCCCTTGA
+>11111_1#11_04075
+ATGTGCCAACGTGCGATCGCCAATATTGATATCAGCAAAGAGTATGACGAAAGCATGGGC
+AGTAACGATGTGCATTATCAGTCGTTTGCTCGTATGGCGGATTTCTTTGGTCGTGATATG
+CAGGCGCATCGCCACGACCAGTTTTTTCAAATGCACTTTCTTGATACCGGGCAGATTGAG
+CTACAGCTCGACGATCATCGCTATTCGGTGCAGGCGCCGCTATTTGTGCTAACGCCGCCC
+TCGGTGCCGCATGCTTTTATTACCGAATCGGATAGCGATGGTCATGTTCTGACGGTACGC
+GAAGAGCTGGTTTGGCCGCTGCTGGAAGTGCTTTATCCCGGCACCAGAGAGGCCTTCGGC
+CTGCCGGGAATCTGCCTGTCGCTGGCGGATAAACTCAACGAGCTGGCGGCGCTCAAACAT
+TACTGGCAGCTAATTGAGCGGGAGTCCACGGAACAACTGGCTGGCTGCGAACATACCTTG
+GTACTACTGGCGCAGGCGGTATTTACCTTGCTGTTGCGTAATGCGAAGCTGGACGATCAT
+GCCGCAACCGGGATGCGCGGTGAACTGAAACTTTTTCAGCGCTTTACCCTGTTAATTGAC
+AACCACTTCCATCAGCACTGGACGGTGCCCGATTATGCCTGTGAGTTGCATATTACCGAA
+TCTCGTTTGACCGATATTTGCCGACGTTTTGCTAATCGCCCGCCTAAACGCCTGATTTTT
+GATCGGCAATTACGCGAGGCGAAACGACTGCTGCTTTTTTCCGACAATGCTGTCAACGAG
+ATCGCCTGGCAATTAGGTTTTAAAGATCCGGCTTATTTCGCCCGTTTCTTTAATCGCCTT
+GCTGGCTGTTCTCCTTCGCAGTTTCGCCAACGTGAAGTTCCTTCTTTTCTCAACTAA
+>11111_1#11_04076
+ATGATGAAAAAAAGCGTCGCTATGCTGGCGGTTTGTATGCTGGCGCAAAGCCACCTTGCC
+ATTGCTGCCGGTGCTCCTGCGCCTCAAGAGATCAACATTGTTTTACTGGGCACCAAAGGC
+GGGCCTTCTTTGCTCAATACAGCCAGACTACCGCAAGCGACGGCGCTCACTATCGGCGAT
+AAGATATGGCTGATAGATGCCGGCTACGGCGCCAGTCTGCAACTGGTGAAAAATGGCATT
+CCACTGCGCAACATCAATACTATTTTGCTCACCCATCTGCACAGCGACCACATACTGGAT
+TATCCTTCCTTGCTGATGAATGCCTGGGCAAGTGGCCTGAAAGACCATACCATACAGGTT
+TATGGCCCGCCGGGAACCCAGGCGATGACGAAGGCTAGCTGGAAGGTCTTTGACAGGGAT
+ATCACGTTACGCATGGAAGAAGAGGGGAAACCCGATCCGCGCAACCTGGTTAAGGCGACC
+GATATCGGCCAGGGCGTCATCTATAAAGATGAACTGGTCACAATAAGCGCGCTGAAAGTG
+CCTCATTCCCCTTTCCCGGACGGTGAAGCGTTTGCTTACCGTTTTGATACTCAGGGTAAG
+CGAATCGTCTTCTCTGGCGATACGTCCTGGTTTCCTCCGCTTGCAACGTTTGCCCAGGGG
+GCGGATATCCTGGTACATGAGGCGGTACATGTCCCTTCGGTAGCAAAACTGGCTAATAGT
+ATTGGCAACGGAAAAACGCTGGCTGAAGCGATTGCGTCGCATCACACCACGATTGAAGAT
+GTCGGTAAGATTGCTCGCGAGGCCCACGTGAAAAAACTGGTGTTAAGTCATCTGGTGCCT
+GCGACGGTTGCGGATGACGTCTGGCAACAGGAAGCCATGAAAAATTACCCGGGCCCTGTC
+ATTGTCGGTCATGACAATATGACGATAAGCGTACCGTAA
+>11111_1#11_04077
+ATGGCTAACATCACTGTCACCTTTACCATCACCGAATTTTGTTTGCACACCGGCGTGACG
+GAAGAGGAGCTAAACGAAATCGTCGGACTTGGCGTAATTGAGCCTTACGAAGACGATAAC
+GCCGACTGGCAATTCGACGATCGCGCAGCGAGCGTGGTACAACGCGCGCTACGCTTACGC
+GAGGAGCTGGCGCTCGACTGGCCAGGGATCGCGGTCGCGTTAACGCTGCTGGAAGAGAAT
+TCACGGCTGCGCGAAGAAAACCGGTTACTGCTGCAACGCCTTTCTCGCTTTATCTCGCAT
+CCCTAA
+>11111_1#11_04078
+ATGGAACTTAAGGATTATTACGCCATTATGGGCGTGAAACCGACGGACGATCTCAAGACG
+ATTAAGACCGCCTATCGCCGACTGGCCCGCAAGTACCATCCAGATGTCAGCAAAGAACCC
+GATGCCGAAGCCCGTTTCAAAGAGGTTGCTGAAGCATGGGAAGTGCTGAGTGATGAGCAA
+CGGCGCGCCGAGTATGACCAGTTATGGCAACACCGTAACGATCCACAATTTAATCGCCAG
+TTCCAGCAACACGAAGGCCAGCCGTATAACGCCGAAGATTTTGATGATATTTTCTCGTCT
+ATTTTTGGTCAGCACGGTCGTCATTCGCACCACCGCCACGCCGCACGCGGTCATGATATC
+GAAATTGAAGTGGCGGTATTCCTGGAAGAAACGCTGGAAGAGCACCAGCGTACGATTAGC
+TATTCCGTCCCCGTTTATAACGCGTTCGGCCTGGTGGAGCGGGAAATTCCCAAAACATTG
+AATGTGAAAATCCCGGCTGGCGTCAGCAACGGGCAACGAATCAGACTGAAAGGCCAGGGC
+ACGCCGGGGGAAAACGGCGGACCTAATGGCGATTTATGGCTCGTTATCCATATTGCCCCG
+CATCCGCTCTTTGATATCGTCAATCAGGATCTGGAAGTCGTCCTTCCGCTTGCCCCATGG
+GAGGCGGCGCTCGGCGCTAAGGTGTCTGTGCCAACGCTTAAAGAGCGTATTTTGCTGACC
+ATTCCCCCCGGCAGCCAGGCAGGTCAGCGGCTGCGTATCAAAGGAAAAGGATTAGCCAGT
+AAAAAGCACACTGGCGATCTCTATGCCATCATCAAAATCGTTATGCCGCCGAAACCTGAC
+GAGAAAACAGCTGCCCTGTGGCAACAACTGGCGGACGCGCAGTCGTCCTTTGACCCACGC
+CAGCAATGGGGGAAAGCATAA
+>11111_1#11_04079
+ATGGCGAAACAACAACGGATGGGCTGGTGGTTTCTTTGCCTTGCATGTGTCGTGGTAATG
+GTTTGTACCGCGCAACGCATGGCGGGCCTGCACGCCTTGCAGATGCAGGCGACGGCCTCT
+GCTGCGGTGGTCAGCGCTCCCTCCTCGACAGATGACGGCTCGCCGGTCACTCCCTGCGAA
+TTAAGCGCCAAGTCGCTGCTGGCGGCGCCTCCAGTACTCTTTGAAGGTGCTATCCTTGCG
+CTTTATCTACTGCTTTCCTTACTGGCGCCTGTCCGGGTCATGCGCCTGCCGTTTTCGCCT
+CCACGGGCTATTTCGCCGCCCACATTACGGGTACATCTACGATTTTGTGTCTTCCGTGAA
+TGA
+>11111_1#11_04080
+ATGATGATTTTATTCAGGCGGATACTGTTCTGCCTGTTATGGCTTTGGCTGCCCGTCTCC
+TGGGCGGCGGAAAGCGGCTGGCTGCGTTCGCCCGATAACGACCATGCCAGCATACGGCTA
+CGTGCCGATACGTCCGCTAACAGTGAGACCCGGCTGTTGCTGGATGTCAAACTGGAAAAC
+GGCTGGAAAACCTACTGGCGCGCGCCGGGGGAAGGGGGCGTGGCACCCTCTATCGCCTGG
+AAAGGCGACATGCCTGAGGTAAGCTGGTTCTGGCCAACCCCCTCGCGCTTTGATGTGGCG
+AATATCACCACCCAGGGATATCACGACGAGGTGACCTTTCCGATGATCGTGCGCGGTACG
+CCGCCGGCGACCTTGCGCGGTGTGTTGACGTTATCAACCTGCAGCAATGTTTGTCTGTTG
+ACCGATTACCCCTTTTCCGTGACGCCCACTGTGCAGAATGCCGATTTTGCCCATGACTAT
+GCGCGGGCGATGGGTAAAGTTCCGCTCCGCAGTGGGCTAACGGACTCGCTTGACGTTGGC
+TATCGCCCGGGAGAACTGGTGGTCACTGCTACGCGAGCGGCGGGCTGGTCATCGCCCGGG
+CTCTATCTTGACACCATAGATGACGTCGATTTTGCGAAGCCTCGCCTGCGCGTAGAGGGC
+GACAGGTTACAGGCGACGGTGCCGGTGACGGACAGTTGGGGCGAAAAGGCGCCCGATTTG
+CGCGACAAATCGCTGACCCTCGTGTTAGCCGATGGCGCTATCGCCCAGGAGAGCACGCAA
+ACCATTGGCGCTGCGCCAGCGCAAACGCCGGACAATGCGGCGCTACCTTTCTGGCAAGTT
+GTAATGATGGCGCTAATCGGCGGACTGATTCTTAATTTAATGCCCTGCGTACTGCCTGTT
+CTGGGCATGAAACTTGGCTCTATTTTATTGGTAGAGGAAAAAAGCCGCTCTCACATCAGG
+CGACAATTTTTGGCTTCGGTCGCCGGTATCATTGCGTCATTTATGGCGCTGGCGGCGTTT
+ATGACCCTCCTTCGCCTGTCAAACCATGCGCTGGCCTGGGGAGTCCAGTTCCAGAATGCA
+TGGTTTATTGGTTTTATGGCGCTGGTGATGTTGTTGTTTAGCGCCAGCCTGTTCGGGCTT
+TTTGAGTTCAGGCTTCCCTCATCTATGACCACGAAACTGGCCACTTACGGCGGTAACGGT
+ATGTCGGGACATTTCTGGCAGGGGGCGTTCGCCACGCTGCTGGCGACGCCTTGTAGCGCG
+CCGTTTCTGGGCACGGCGGTCGCGGTGGCGCTCACGGCGTCGCTGCCGACGCTGTGGGGG
+CTGTTCCTTGCGCTTGGCCTGGGAATGAGCGCGCCGTGGCTACTGGTCGCGATACGACCA
+GGGCTTGCGCTACGTTTACCGCGCCCCGGGCGTTGGATGAATGTCCTGCGCAGGATCCTC
+GGTCTGATGATGCTGGGGTCGGCTATCTGGCTGGCGACGTTACTCCTGCCGCATTTCGGC
+TTCACTGCGTCAAAGAGCGCGCAAGACACGGTTCAGTGGCAACCGTTGAGTGAACAGGCA
+ATCCAGTCGGCGCTGGCGCAGCATAAGCGGGTATTTGTCGATGTCACTGCGGACTGGTGT
+ATTACCTGTAAAGTGAATAAATACAACGTCCTGCAAAAAGAGGATGTGCAGGCCGCCTTG
+CAACAGCCGGATGTTGTGGCGCTGCGGGGAGACTGGACGCTGCCGTCCGATGCCATTACA
+GATTTTCTGAAAACGCGCGGCCAGGTCGCCGTGCCGTTTAATCAGGTATATGGCCCCGGT
+TTGCCGGAAGGGGAGGCACTGCCCACTTTGCTGACCCGCGATGCGGTATTACAAACGTTG
+AAAAAAGCGAAAGGAATAACCCAATGA
+>11111_1#11_04081
+ATGAAATACATGATTGTTTTACTGCTGGCGCTGTTTTCGACGCTGAGCATCGCGCAAGAA
+ACCGCTCCTTTTACGCCGGATCAGGAAAAGCAGATTAAAAATCTGATCCATGCGGCGTTG
+TTTAACGATCCTGCCAGCCCGCGGATAGGCGCTAAACACCCTAAGCTGACGCTGGTGAAC
+TTTACGGATTACAACTGCCCGTACTGCAAACAGCTCGATCCGATGCTGGAAAAGATTGTG
+CAGAAATATCCTGACGTTGCGGTCATTATTAAACCGCTGCCATTCAAAGGAGAGAGTTCC
+ATACTGGCGGCGCGTATTGCGCTGACCACCTGGCGCGATCATCCGCAACAGTTCCTCGCG
+CTACATGAAAAACTTATGCAAAAGCGCGGTTACCATACGGATGACAGTATTAAACAGGCC
+CAGCAGAAAGCAGGGGCGACGCCAGTGACGCTGGATGAAAAAAGCATGGAAACGATACGC
+ACTAATTTGCAGTTGGCAAGACTGGTCGACGTGCAAGGAACGCCAGCGACGATCATTGGC
+GACGAGCTGATTCCGGGCGCAGTGCCCTGGGATACGCTGGAAGCGGTGGTGAAAGAAAAA
+CTGGCGGCTGCCAATGGCGGGTAA
+>11111_1#11_04082
+ATGGCGGGTAAACTGCGGCGTTGGCTGCGTGAAGCCGCGGTTTTTCTGGCGCTCCTCATC
+GCGATAATGGTGGTCATGGACGTCTGGCGCGCGCCGCAGGCGCCTCCGGCGTTTGCCGCG
+ACACCATTACATACGCTGACGGGAGAGTCGACAACTCTGGCGACCTTGAGCGAGGAACGC
+CCCGTACTGCTCTATTTTTGGGCCAGCTGGTGCGGGGTATGCCGCTTTACCACGCCTGCG
+GTCGCTCACCTGGCGGCGGAAGGGGAAAACGTCATGACCGTTGCGCTCCGCTCCGGCGGT
+GATGCTGAGGTTGCCCGCTGGCTGGCGCGCAAGGGCGTTGACTTCCCGGTCGTCAATGAT
+GCTAACGGCGCCTTATCCGCTGGCTGGGAAATCAGCGTGACGCCAACGCTGGTGGTGGTT
+TCACAAGGTCGGGTTGTGTTCACCACCAGCGGCTGGACCAGCTATTGGGGCATGAAGCTT
+CGGCTGTGGTGGGCAAAAACGTTCTGA
+>11111_1#11_04083
+ATGAAAAAATCATTACTCGCTGTTGCTGTGGCAGGGGCTGTTTTGTTGTCATCCGCCGTA
+CAGGCGCAGACAACGCCGGAAGGTTATCAATTACAACAGGTGCTGATGATGAGCCGCCAT
+AATCTGCGGGCGCCGCTGGCGAATAATGGCAACGTACTGGCGCAGTCGACGCCGAACGCC
+TGGCCGGCGTGGGACGTTCCCGGCGGGCAACTGACGACGAAAGGCGGCGTGCTGGAAGTC
+TATATGGGACACTACACACGTGAATGGCTGGTCGCGCAGGGGCTGATACCGTCGGGAGAA
+TGTCCGGCGCCCGACACGGTATATGCCTATGCGAATAGTTTGCAGCGCACCGTCGCCACC
+GCGCAATTTTTCATTACCAGCGCTTTCCCCGGCTGTGATATTCCTGTTCATCATCAGGAA
+AAAATGGGCACTATGGACCCTACCTTCAATCCGGTGATTACCGATGATTCCGCCGCGTTC
+CGGCAACAGGCCGTACAGGCGATGGAAAAGGCGCGTAGTCAGCTACATCTTGATGAGAGT
+TATAAACTGCTTGAGCAGATAACGCATTATCAGGACTCGCCGTCCTGCAAAGAGAAGCAT
+CAGTGTTCGCTAATCGACGCGAAAGATACCTTCAGCGCGAACTATCAGCAAGAGCCTGGC
+GTGCAGGGGCCGCTGAAAGTAGGGAACTCGCTGGTGGATGCGTTTACCCTGCAATATTAC
+GAAGGCTTTCCGATGGATCAGGTCGCTTGGGGCGGGATCCACACCGATCGGCAGTGGAAG
+GTGCTGTCAAAACTGAAAAACGGCTACCAGGACAGCCTGTTTACCTCACCCACGGTGGCG
+CGCAATGTCGCTGCGCCGCTGGTAAAATATATCGATAAGGTGCTGGTTGCCGAGCGCGTT
+AGCGCGCCGAAGGTTACCGTGCTGGTGGGGCATGATTCCAATATCGCGTCGCTGCTGACG
+GCGCTGGATTTTAAACCCTATCAGCTCCATGACCAGTATGAGAGAACGCCGATTGGTGGT
+CAGCTTGTCTTCCAACGCTGGCATGACGGTAACGCTAACCGGGATTTGATGAAAATCGAG
+TATGTCTACCAGAGCGCCCGGCAGTTACGTAATGCGGAAGCGTTAACGCTCAAATCGCCT
+GCGCAAAGGGTAACGCTGGAACTGAAAGGATGTCCGGTGGATGCGAACGGCTTCTGTCCG
+CTGGATAAGTTCGATAACGTCATGAACACTGCTGCAAAATAG
+>11111_1#11_04084
+ATGCCAACTCAAGAAGCAAAAGCGCACCGCGTCGGCGAATGGGCAAGCCTGCGTAATACG
+TCGCCGGAAATTGCCGAAGCCATTTTTGAAGTCGCTCACTATGACGAGAAACTGGCAGAA
+AAAATATGGGAAGAAGGTAGCGATGAGGTGCTGATCAAAGCCTTTGAGAAAACGGACAAA
+GACTCGCTCTTCTGGGGCGAACAAGTCATCGAACGTAAGAACGTATAA
+>11111_1#11_04085
+ATGGCAAAGATTCTGGTGCTCTATTATTCCATGTACGGACACATTGAAACCATGGCGCAC
+GCGGTGGCGGAAGGGGCAAAGAAAGTCGACGGCGCAGAGGTCATTATAAAGCGTGTGCCA
+GAAACAATGCCGCCTGAAATCTTCGCAAAAGCTGGCGGTAAAACGCAAAACGCACCGGTT
+GCCACCCCACAGGAGCTGGCGGATTACGATGCCATTATTTTTGGTACGCCAACCCGGTTT
+GGCAATATGTCAGGCCAGATGCGTACCTTCCTGGACCAAACCGGCGGACTGTGGGCATCC
+GGCGCGCTATACGGCAAGCTCGGCGGCGTGTTCAGTTCTACCGGAACGGGCGGCGGCCAG
+GAGCAGACCATCACCTCGACCTGGACTACGCTTGCCCATCATGGGATGGTGATTGTCCCG
+ATAGGCTATTCCGCACAGGAACTGTTTGACGTCTCCCAGGTTCGCGGCGGTACGCCTTAC
+GGCGCAACGACTATCGCTGGAGGCGACGGTTCACGTCAACCAAGCCAGGAGGAACTCTCT
+ATCGCTCGCTATCAGGGGGAATACGTCGCCGGTCTGGCAGTCAAACTCAACGGCTAA
+>11111_1#11_04086
+ATGGCAAACCATCGTGGCGGTTCCGGTAATTTTGCGGAAGACCGCGAAAGAGCATCAGAA
+GCAGGTCGTAAAAGTGGTCAGCACAGCGGGGGCAATTTTAAGAATGACCCGCAGCGTGCA
+TCCGAAGCAGGCAAAAAAGGGGGCAAAAGCAGTAACCGTAATCGCTAG
+>11111_1#11_04087
+ATGTCGCAACGCACAGAGAAAAAAATCGGGAAACGTTCGCAGGCCACCGGTGCAAAACGG
+CAGCTTATCTTAACCGCCGCGCTTGCCGTTTTTTCCCAGTATGGCATTCATGGCGCGCGT
+CTTGAACAGGTCGCCGAGCGGGCAGGCGTCTCCAAAACCAATCTGCTTTATTATTATCCC
+TCGAAAGAGGCGCTGTATGTCGCGGTAATGCGACAGATTCTGGATGTCTGGTTGGCGCCG
+CTCAAGGCGTTTCGCGCAGAATTTTCCCCTCTGGAGGCCATCAAAGAGTATATCCGTCTC
+AAGCTGGAGGTTTCGCGTGATTATCCGCAGGCGTCGCGGCTCTTCTGCATGGAGATGCTG
+GCGGGCGCGCCGCTCTTAATGGATGAACTGACCGGCGATCTAAAAGCGTTGATAGATGAA
+AAATCCGCGCTGATTGCCGGATGGGTGCACAGCGGGAAACTCGCGCCCGTTTCTCCGCAT
+CATTTGATCTTCATGATTTGGGCCGCCACGCAACATTACGCCGATTTCGCCCCTCAGGTT
+GAAGCGGTAACCGGCGCGACGCTTCGCGATGAAGCCTTTTTCAACCAAACGGTCGAAAGC
+GTTCAGCGCATTATTATTGAAGGGATTCGCGTGCGTTAA
+>11111_1#11_04088
+ATGAAACGAATTTTCCTTACCTGCGCGGCGTTGTTGTTCAGCAGTCAGGCGTTGGCCGAT
+GAGTGTGCCAGCGCCAGTACGCAGCTGGAAATGAATCGCTGCGCCGCCGCGCAATACCAG
+GCGGCAGATAAAAAGCTGAACGAAACCTATCAAAGCGCGATTAAGCGTGCGCAACCGCCG
+CAGCGTGAGCTATTGCAAAAAGCGCAGGTGGCATGGATTGCCCTGCGCGACGCCGATTGC
+GCGCTGATTCGCTCAGGTACGGAGGGCGGCAGCGTTCAACCCATGATCGCCAGCCAGTGC
+CTGACCGATAAAACGAACGAACGCGAAGCGTTTTTAGCCTCGCTGCTGCAATGTGAAGAG
+GGTGATTTGAGCTGCCCACTGCCGCCAGCCGGTTAA
+>11111_1#11_04089
+ATGGGAACCACCACGATGGGGGTTAAGCTGGACGACGCCACGCGCGAACGGATCAAAATG
+GCCGCGTCGCGTATCGATCGCACGCCGCACTGGTTAATAAAACAGGCAATCTTTAGCTAT
+CTGGACAAGCTGGAAAATAGCGATACGCTACCGGAGCTACCTGCGCTGTTTGCCGGCGCG
+GCAAATGAAAGCGAGGAGCCGGTCGCGCCGCAGGATGAGCCGCATCAGCCCTTTCTGGAG
+TTTGCCGAACAGATTCTTCCCCAATCCGTCTCTCGCGCCGCCATCACCGCCGCCTGGCGC
+CGCCCGGAAACCGATGCGGTGTCAATGCTAATGGAACAGGCGCGCCTGTCGCCGCCTGTC
+GCTGAGCAGGCGCATAAACTGGCGTATCAACTGGCGGAGAAATTGCGCAATCAAAAATCC
+GCCAGCGGTCGCGCGGGTATGGTGCAAGGCCTGTTGCAGGAGTTTTCCCTCTCTTCGCAA
+GAAGGCGTAGCGCTGATGTGTCTGGCGGAAGCGCTGCTGCGTATTCCCGACAAAGCTACG
+CGCGATGCGTTAATTCGCGACAAAATCAGTAATGGCAACTGGCAGTCGCATATTGGCCGT
+AGCCCGTCGCTGTTTGTAAACGCCGCCACCTGGGGGCTGCTCTTTACCGGCCGACTGGTC
+TCAACGCATAACGAAGCCAATCTTTCGCGCTCGCTGAACCGCATTATCGGCAAGAGCGGC
+GAACCGTTAATCCGCAAAGGCGTCGACATGGCGATGCGTTTAATGGGCGAGCAGTTCGTG
+ACTGGCGAAACCATTGCTCAGGCGCTGGCGAATGCCCGAAAACTGGAAGAGAAAGGGTTC
+CGCTATTCTTACGATATGCTGGGCGAAGCCGCGTTAACCGCCGCCGATGCGCAGGCCTAT
+ATGGTCTCTTACCAGCAAGCGATTCATGCCATCGGCAAAGCGTCTAACGGTCGCGGTATT
+TACGAAGGGCCAGGCATCTCGATTAAGCTGTCCGCCCTGCATCCACGCTATAGTCGCGCG
+CAATACGATCGGGTAATGGAGGAGCTTTATCCGCGCCTGAAATCCCTGACGCTGCTGGCG
+CGCCAGTATGATATCGGTCTCAATATCGACGCCGAAGAGGCGGATCGTCTGGAGATCTCG
+CTTGATCTGCTGGAAAAACTCTGCTTCGAACCCGAACTGGCGGGCTGGAACGGCATTGGC
+TTTGTGATTCAGGCTTACCAGAAACGCTGCCCGCTGGTCATTGATTATTTAGTCGATCTG
+GCCTCCCGTAGCCGCCGTCGGCTGATGATTCGTCTGGTGAAAGGCGCCTACTGGGATAGC
+GAGATCAAACGCGCGCAAATGGAAGGGCTGGAGGGCTATCCAGTTTATACCCGCAAAGTG
+TATACCGATGTCTCTTATCTGGCCTGCGCGAAAAAACTGCTCGCCGTCCCTAATCTGATC
+TACCCGCAGTTCGCGACCCATAACGCTCACACACTGGCGGCGATTTATCATCTGGCCGGG
+CAAAATTACTATCCGGGTCAGTACGAATTCCAGTGCCTGCACGGCATGGGAGAACCGCTG
+TATGAACAGGTCACCGGTAAAGTGGGGGACGGAAAACTTAACCGTCCCTGCCGTATTTAC
+GCGCCGGTGGGAACACACGAAACCCTGCTGGCCTATCTGGTACGACGCCTGCTGGAAAAC
+GGCGCCAACACCTCTTTTGTCAACCGCATCGCCGATGCCACCCTACCGCTCGATGAACTG
+GTGGCCGACCCGGTCGAGGCCGTGGAAAAACTGGCGCAGCAGGAAGGTCAGGCTGGCATA
+CCGCATCCAAAAATTCCGCTGCCGCGCGATCTGTACGGCGAAGGTCGGATAAACTCCGCC
+GGACTTGATTTAGCGAATGAACATCGCCTCGCCTCGCTTTCTTCTGCCCTGTTAAGCAAC
+GCCATGCAGAAATGGCAGGCCAAACCTGTGCTGGAACAACCGGTGGCCGACGGTGAGATG
+ACGCCGGTTATCAACCCGGCGGAACCGAAAGATATTGTTGGCTGGGGACGCGAAGCGACA
+GAAAGCGAGGTTGAACAGGCGTTGCAAAACGCGGTCAATCAGGCGCCGGTTTGGTTTGCG
+ACGCCGCCGCAAGAACGCGCCGCTATTTTGCAGCGGGCGGCGGTATTGATGGAAGACCAA
+ATGCAGCAGTTGATTGGCCTGTTGGTGCGTGAAGCGGGGAAAACGTTCAGCAACGCCATT
+GCCGAAGTGCGCGAAGCGGTAGACTTCCTCCATTATTATGCCGGTCAAGTGCGTGACGAT
+TTCGATAACGAAACGCATCGCCCGTTAGGGCCGGTGGTCTGTATCAGTCCGTGGAACTTT
+CCGCTGGCCATTTTCACTGGCCAAATCGCCGCCGCGCTGGCGGCAGGTAACAGCGTTCTG
+GCGAAACCGGCAGAGCAGACATCGCTGATTGCCGCCCAGGGCATTGCCATTTTGCTGGAA
+GCGGGCGTACCGCCGGGCGTCGTGCAACTGTTGCCGGGACGGGGAGAAACCGTCGGCGCC
+CAGCTTACCGCCGATGCGCGTGTACGCGGCGTGATGTTTACCGGTTCCACGGAGGTCGCG
+ACGTTGTTGCAGCGCAACATCGCCACGCGTCTTGACGCCCAGGGGCGCCCTATTCCGTTG
+ATTGCGGAAACCGGCGGTATGAACGCTATGATTGTCGACTCTTCCGCGCTCACCGAGCAG
+GTGGTCGTGGATGTGCTGGCTTCCGCCTTCGACAGCGCCGGACAACGCTGTTCCGCGCTC
+CGCGTGCTGTGTTTGCAGGACGATATCGCCGAACATACGCTGAAAATGTTACGCGGCGCG
+ATGGCGGAGTGTCGGATGGGGAATCCAGGCCGTCTGACGACCGATATCGGGCCGGTGATC
+GATAGCGAGGCCAAAGCCAACATTGAACGTCATATCCAGACGATGCGCGCCAAAGGCCGC
+CCGGTTTTCCAGGCCGCGCGTGAAAACAGCGATGACGCGCAGGAATGGCAGACCGGTACG
+TTTGTTATGCCCACGCTTATTGAGCTGGAAAACTTCGCAGAACTGGAAAAAGAGGTCTTC
+GGGCCCGTGCTGCACGTCGTGCGTTATAACCGTAACCAACTGGCGGAGCTTATCGAACAG
+ATTAACGCTTCCGGCTACGGGCTAACGCTGGGCGTACATACCCGTATTGATGAAACCATT
+GCGCAAGTCACCGGTTCCGCCCATGTCGGCAACCTGTACGTTAACCGTAATATGGTGGGC
+GCGGTCGTCGGCGTCCAGCCGTTTGGCGGCGAAGGCCTGTCCGGCACCGGGCCAAAAGCG
+GGAGGGCCGCTCTATCTCTACCGCCTGCTGGCACACCGCCCGCCCAATGCGCTCAATACG
+ACGCTGACTCGTCAGGATGCGCGTTACCCGGTGGATGCGCAGCTTAAAACCACGCTACTC
+GCGCCGTTGACCGCTCTGACGCAATGGGCGGCGGATCGCCCGGCGCTACAGACGCTCTGC
+CGACAATTCGCCGATCTGGCGCAGGCCGGCACGCAGCGCCTGCTACCGGGGCCGACCGGC
+GAGCGTAATACCTGGACGCTGTTGCCGCGTGAACGGGTGTTATGCCTGGCTGATGATGAA
+CAGGACGCGTTGACGCAGCTTGCCGCCGTTCTCGCCGTCGGCAGTCAGGCGCTATGGTCA
+GACGACGCCTTCCACCGCGATCTGGCGAAACGTCTCCCCGCCGCCGTCGCGGCGCGTGTC
+CAGTTTGCGAAAGCGGAAACGCTGATGGCGCAGCCGTTTGACGCGGTGATTTTCCACGGC
+GACTCCGACAAGCTGCGAACCGTGTGCGAAGCCGTCGCCGCCCGCGAAGGCGCGATAGTG
+TCGGTACAGGGGTTCGCCCGCGGCGAAAGCAATATGCTGCTGGAACGGCTCTATATTGAA
+CGTTCGCTGAGCGTAAACACTGCCGCCGCTGGCGGTAATGCCAGCCTGATGACAATTGGC
+TAA
+>11111_1#11_04090
+ATGGCTATTAGCACACCGATGTTGGTGACATTCTGTGTCTATATTTTTGGCATGATATTG
+ATTGGGTTTATCGCCTGGCGCTCAACCAAAAACTTTGATGACTATATTCTTGGCGGTCGC
+AGCCTGGGGCCGTTTGTTACGGCTTTATCAGCCGGCGCGTCGGATATGAGCGGCTGGCTG
+TTAATGGGGCTGCCTGGCGCTATCTTTCTGTCGGGGATCTCTGAAAGCTGGATCGCCATT
+GGCCTGACGTTAGGCGCATGGATTAACTGGAAGCTGGTGGCCGGGCGCCTGCGCGTGCAT
+ACCGAATTTAACAATAACGCGCTCACGCTGCCGGACTATTTTACCGGTCGGTTTGAGGAT
+AAGAGCCGAGTCCTGCGTATTATTTCCGCGCTGGTCATTCTGCTGTTTTTCACTATCTAT
+TGCGCATCAGGTATTGTCGCTGGGGCACGACTGTTCGAAAGCACCTTCGGTATGAGCTAT
+GAAACCGCACTGTGGGCGGGGGCCGCGGCAACCATTATTTATACCTTTATCGGCGGGTTT
+CTTGCCGTTAGCTGGACGGATACCGTTCAGGCCAGCCTGATGATTTTTGCGTTAATCCTG
+ACGCCGGTGATGGTTATTGTCGGCGTAGGCGGTTTTAGCGAGTCGCTGGAAGTGATCAAG
+CAAAAGAGCATCGAGAATGTCGACATGCTCAAGGGGCTGAATTTTGTCGCTATTATTTCT
+CTGATGGGCTGGGGGCTGGGTTACTTCGGTCAGCCGCATATCCTGGCGCGCTTTATGGCG
+GCGGATTCCCATCACAGTATTGTTCATGCGCGTCGTATCAGTATGACCTGGATGATTCTG
+TGTCTGGCGGGCGCGGTGGCGGTGGGCTTCTTTGGCATTGCGTACTTTAACAATAACCCC
+GCGCTGGCCGGGGCGGTGAACCAAAACTCAGAACGCGTATTTATTGAACTGGCGCAGATC
+CTGTTTAACCCGTGGATTGCCGGTGTTCTGCTGTCTGCTATCCTGGCGGCGGTGATGTCG
+ACGTTGAGCTGTCAGTTGCTGGTATGCTCCAGCGCGATTACGGAAGATTTATATAAGGCT
+TTTCTGCGTAAAAGCGCCAGCCAGCAAGAGCTGGTATGGGTAGGGCGAGTGATGGTGCTG
+GTGGTAGCGCTGATCGCCATTGCGCTGGCGGCGAATCCTGATAACCGTGTGCTGGGGCTG
+GTGAGCTACGCCTGGGCTGGATTCGGCGCGGCATTTGGACCTGTTGTCCTGTTTTCTGTG
+ATGTGGTCGCGTATGACACGTAACGGCGCGCTGGCGGGAATGATTATTGGCGCGGTGACG
+GTTATCGTCTGGAAACAATATGGCTGGCTGGATCTGTATGAGATTATCCCTGGCTTCATT
+TTCGGCAGCCTGGGGATCGTAATCTTTAGCCTGCTTGGCAAAGCGCCGACAGCAACGATG
+CAGGAACGCTTTGCAAAAGCGGACGCGCATTATCATTCCGCGCCGCCGTCGAAGCTACAG
+GCGGAATAA
+>11111_1#11_04091
+ATGGTAATGTCCGCACCAGGACACATTGTTTACAGTAGTTACAACACCCTGTACGGACAT
+TCTCTCTCCGGTGGTGGTCTTGTCATCTTAAAAGCTCTCATCATTTCCCTTACTGTCCAT
+ACCCATGACGCCATATGTGGTGCGCGTAGCCGTGTGTGGCGTCGTTTCAAAAAGCAAGCT
+AAGGCTTACAAGGAAGCCAACCCTCAGATGTGTGTGCGCATAATCGCGTTCAAGAGAACG
+CGGGTGATGTATACCTACAACTCAAGGTGCTATCCATGGGAAGACAAAAAGCAGTGA
+>11111_1#11_04092
+ATGGGAAGACAAAAAGCAGTGATCAAAGCTCGTCGTGAAGCAAAGCGTGTGTTGAGACGA
+GATTCGCGTAGTCATAAGCAACGTGAAGAAGAATCGGTCACGTCACTGGTACAGATGGGC
+GGAGTAGAAGCCATTGGCATGGCGCGCGATAGTCGCGATACCTCTCCTGTTAAGGCGCGA
+AATGAAGCACAGGCGCATTATCTGAACGCTATCGACAGTAAACAGCTTATTTTTGCGACC
+GGCGAAGCCGGCTGCGGAAAAACATGGATCAGTGCGGCAAAGGCGGCAGAAGCATTGATT
+CATAAGGACGTCGAGAGGATCATTGTGACGCGTCCGGTATTGCAGGCTGATGAAGATCTT
+GGTTTTTTGCCCGGTGATATCGCTGAAAAATTCGCGCCTTATTTTCGTCCCGTCTACGAT
+GTCCTGCTTAAACGGTTGGGCGCGTCCTTTATGCAATATTGTTTGCGCCCGGAAATCGGT
+AAGGTAGAAATTGCCCCGTTCGCCTATATGCGTGGGCGTACTTTTGAAAATGCGGTCGTG
+ATCCTCGACGAGGCGCAAAATGTGACTGCGGCGCAAATGAAAATGTTTTTGACGCGATTA
+GGCGAAAATGTCACGGTCATTGTCAATGGCGATATTACGCAATGCGACCTGCCGCGCGGT
+GTGCGTTCCGGGTTGAGTGATGCGTTGGAACGCTTTGAAGAAGATGAAATGGTGGGGATT
+GTGCATTTCAACAAAGACGACTGCGTGCGCTCGGCGCTTTGTCAGCGAACGCTCCACGCA
+TACAGCTAA
+>11111_1#11_04093
+ATGGAGCCTCAACCCCCACGTCTTAAACCCGGAAAAATCCTTGACACTCTGGGTGCTATG
+CAAAAAAGCCTGACACGTGCCTCCCAGCGTATTGCGCAATATATTTTAGCCTTCCCCAGA
+CAGGTGACACAGTCATCTATTGCCGATTTGTCGCGCGACACACAGGCCGGAGAAGCCACG
+GTTATTCGCTTTTGTCGCACCCTGGGCTATAAAGGTTTTCAGGATTTTAAAATGGACCTG
+GCCATTGAACTTGCCACTACCGAGTCTGATGACAGTAGTCCTCTACTGGATGCCGAAGTT
+AGCGAATCCGACGATGCCCACGCCATTGGTTTAAAATTGCAGAACACCATTAGTAATGTA
+TTATCTGAAACGCTAAATCTGCTTGATATGCAACAGGTTCTCGGTGTCGTGGACGCCCTA
+CGTCACTGTCACTCAGTTTATATCTTTGGTGTGGGCTCATCGGGGATCACGGCGCTGGAT
+ATGAAACACAAGCTAATGCGTATTGGTTTACGGGGCGATGCGGTAAGCAATAACCATTTT
+ATGTACATGCAGGCTACGCTATTAAAAGCAGGCGATGTCGCGATGGGTGTCAGTCACTCG
+GGCACATCGCCAGAAACAGTGCATTCACTCCGATTGGCCCGACAGGCTGGCGCCACCACA
+GTCGCCATTACCCATAATCTGGGTTCTCCATTATGTGAAGAGGCCGATTTTTGCCTGATC
+AATGGTAATCGGCAAGGAATGTTGCAGGGTGACTCGATCGGTACGAAAGCCGCGCAGCTT
+TTCGTCTTTGACCTGCTCTATACCCTTCTTGTACAGTCCTCGCCGGAACAGGCCCGAGAA
+AGCAAATTACGGACAATGAATGCCCTGGACATGACAAAATAA
+>11111_1#11_04095
+GTGATATGTCTCAAAGTCCAGGGCGGCATTGGTGAAATTTTTACGGTGACGCAGCAGGCG
+GATAAATTCTTTCCGGCTACGCAGTTCCACTGGAGCTGGACGGAAAGCACAGTACCTGTA
+TTGATGATTGGTTTTCTGTTTGCCAATATTCAGCAATTTACTGCCAGTCAGGATGTGGTC
+CAACGCTATATCGTGACTGACTCCATAGAGGAAACGAAGAAAACATTACTTACAAATGCC
+AAACTGGTTGCTGTGATCCCTGTTTTCTTTTTTGCTATCGGCTCGGCATTATTTGTCTAC
+TATCAGCAACATCCACAATTATTACCGGCGGGATTCAACACTGGCGGCATTTTGCCCTTA
+TTCGTGGTCACCGAAATGCCAGTCGGCATTGCAGGGTTGATAATCTCCGCTATTTTCGCT
+GCCGCGCAGTCCAGCATCTCCAGCAGCTTAAACAGCATTTCCAGTTGTTTTAATTCCGAT
+ATCTATCAGCGTTTGAGTCATAAAAAAGGAACGCCAGAAAACCGTATGAAAATAGCTAAG
+TTAGTTATTCTGGTCGCGGGCCTGATAAGTAGCGCGGCCTCGGTATGGCTGGTCATGGCC
+GATGAATCAGAAATCTGGGATGCATTTAATAGTCTGATAGGTCTGATGGGAGGGCCAATG
+ACCGGTCTGTTCATGCTGGGCATTTTCTTTAAACGAGCAAATGCCGGGAGTGCGGTTTTA
+GGAATTATTATCAGCGTCATTACCGTGCTGGGCACACGCTATGCCACTGACCTTAACTTC
+TTCTTTTATGGGGTCATTGGCTCGCTAAGCGTGGTGATCAGCGGCGTTATTTTCGCCCCG
+TTATTTGCCCCGGCACCGCCATTGACGCTGGATGAAAAACCTGAACCAAAGGTGACATTA
+TGA
+>11111_1#11_04096
+ATGATTACACATTCTTTCGGCATCGTTAATTATTTTGTATTATTTGGCTACCTCCTGGCC
+ATGATGTTAGTCGGTGTCTATTTTTCCAGACGGCAAAAAACAGCAGACGATTATTTTCGC
+GGTGGTGGCCGGGTTCCTGGTTGGGCGGCTGGGGTCAGTGTATTTGCTACTACGTTAAGC
+TCAATTACATTTATGTCAATTCCTGCCAAAGCGTTTACTTCCGACTGGACGTTTATCATT
+GGTCAGTATCTGGCTATCGCAATTTTACCGCTGGTTTTTTATTTCTATATTCCGTTTTTT
+CGGAAATTGAAAGTCACATCAGCCTATGAATATCTCGAAGCACGGTTCGATGTGCGCTGC
+CGTCTGTTCGCCAGCATGTCATTTATGTTGTTTCATATTGGACGTATCGCCATTATCACT
+TTCCTCACCGTGCTGGCCTTGCGCCCCTTCATCGCTATAGACCCGGTGATTTTGGTACTG
+TTGATTAGTGTGATGTGTATCATTTATACCTGGATGGGGGGGAATTGA
+>11111_1#11_04097
+ATGTCACTATTAGCCAGGCTGGAACAAAGTGTACACGAAAACGGTGGGCTGATTGTCTCA
+TGCCAACCGGTACCAGGCAGCCCTATGGATAAACCTGAAATTGTGGCTGCAATGGCACAG
+GCAGCGGCTTCGGCGGGTGCGGTCGCTGTGCGCATTGAAGGCATTGAGAATCTGCGGACT
+GTTCGTCCCCATCTTTCTGTTCCTATTATTGGGATAATTAAACGTGACCTTACAGGGTCG
+CCAGTCCGTATCACTCCATATTTACAGGATGTTGACGCCCTGGCGCAGGCAGGTGCCGAT
+ATTATCGCTTTTGATGCCTCATTCCGCTCTCGCCCGGTTGATATTGATAGTTTACTGACA
+CGTATTCGCCTGCATGGATTACTGGCGATGGCAGACTGTTCAACCGTGAATGAAGGCATA
+AGTTGCCATCAGAAAGGAATCGAATTCATTGGTACAACACTGTCTGGCTATACCGGTCCC
+ATCACGCCGGTTGAGCCAGATTTGGCAATGGTGACACAACTGAGTCATGCAGGTTGTCGT
+GTTATTGCCGAGGGGCGCTATAACACGCCTGCACTGGCGGCCAATGCTATTGAGCATGGT
+GCCTGGGCAGTTACCGTTGGTTCCGCTATCACCCGTATCGAGCATATCTGTCAGTGGTTC
+AGTCACGCAGTAAAACGCTGA
+>11111_1#11_04098
+ATGAAAAATTTTAAGAAAATGATGACGCTAATGGCGCTATGTTTATCAGTTGCTATCACC
+ACATCAGGATATGCAACCACGCTTCCTGATATACCAGAACCACTGAAAAATGGTACTGGC
+GCTATTGATAATAATGGCGTGATTTATGTCGGCTTAGGTACCGCAGGGACATCCTGGTAT
+AAAATTGATCTTAAAAAGCAACATAAAGACTGGGAGCGTATAAAGTCGTTTCCTGGTGGA
+GCTCGTGAGCAATCCGTGTCGGTATTTTTAAATGATAAGCTGTATGTTTTTGGTGGCGTA
+GGGAAAAAAAACAGTGAATCACCGTTGCAGGTTTATAGCGATGTGTACAAATACTCACCG
+GTGAAAAATACATGGCAAAAAGTTGATACTATATCTCCAGTTGGATTAACAGGGCATACG
+GGAGTAAAATTAAACGAAACGATGGTACTTATTACCGGAGGGGTTAATGAGCATATCTTT
+GATAAGTATTTTATTGATATAGCGGCTGCGGATGAAAGTGAAAAAAATAAAGTCATCTAT
+AATTATTTTAATAAACCTGCCAAAGATTATTTTTTTAATAAAATCGTATTTATCTACAAT
+GCTAAAGAGAACACATGGAAGAATGCCGGTGAGCTGCCAGGCGCGGGGACGGCAGGATCG
+TCATCGGTAATGGAAAATAATTTCTTGATGCTGATTAATGGTGAGCTCAAACCGGGTTTA
+CGTACCGATGTGATTTACCGCGCCATGTGGGATAACGATAAGCTAACATGGTTGAAGAAC
+AGCCAGTTACCGCCATCGCCTGGAGAACAACAGCAGGAAGGGTTGGCCGGAGCATTTTCG
+GGCTATAGCCACGGTGTCCTGCTTGTCGGTGGTGGCGCGAATTTTCCGGGAGCAAAACAA
+AATTATACTAATGGAAAGTTTTATTCCCACGAAGGGATAAATAAAAAATGGCGAGATGAA
+GTCTATGGTTTGATTAATGGCCATTGGCAATATATGGGTAAAATGAAACAACCTCTCGGC
+TATGGTGTATCAGTAAGTTATGGTGATGAAGTTTTCCTTATTGGTGGTGAAAATGCTAAA
+GGGAAACCTGTTTCGTCTGTAACCTCCTTTACCATGCGTGATGGTAATTTATTAATAAAA
+TAA
+>11111_1#11_04099
+ATGAAAATCAACAGATATCTTCTGGGTATGGTTTCGTTTATAGCATTTTCATCATATCTA
+CAAGCGGCAACCCTTGATTATCGGCATGAATATGCTGATAGAACCAGAATTAATAAAGAC
+CGTATTGCTATAATTGAAAAGCTTCCTAACGGCATTGGTTTTTATGTCGATGCCAGCGTT
+AAATCGGGAGGAGTAGATGGTGAGCAGGATAAGCATTTAAGCGATCTCGTCGCAAACGCT
+ATAGAACTGGGCGTAAGTTATAATTATAAAGTTACGGACCATTTTGTTTTGCAGCCTGGA
+TTTATATTTGAAAGCGGTCCAGACACTTCAATTTATAAGCCTTATTTAAGGGCGCAATAT
+AATTTTGATTCTGGTGTTTATATGGCTGGTCGTTACCGTTATGACTATGCAAGGAAGACA
+GCTAACTATAATGATGATGAGAAAACGAATAGATTTGATACTTATATAGGTTATGTTTTT
+GATGAGTTGAAATTGGAATATAAATTTACCTGGATGGATAGCGATCAAATTAAATTTGAT
+AACAAAAAAACAAACTATGAACATAATGTGGCTTTAGCCTGGAAACTGAATAAGTCATTT
+ACACCATACGTTGAGGTCGGAAATGTAGCGGTGAGAAATAATACCGATGAGAGACAGACC
+CGTTATCGCGTTGGATTACAATACCACTTTTGA
+>11111_1#11_04100
+GTGATAGCAAAATTCTTCCCGTGGTATAGCGAGATAACACGTCCACAAAAAAATGCTTTA
+TTTTCAGCATGGCTGGGTTACGTTTTTGATGGCTTCGACTTTATGCTGATTTTCTACATT
+ATGTATCTGATCAAGGCTGACTTAGGATTGACAGATATGGAGGGCGCATTCCTTGCCACA
+GCGGCCTTTATTGGGCGACCATTTGGCGGGGCGCTATTTGGTCTGCTGGCAGACAAATTT
+GGCCGTAAGCCGTTAATGATGTGGTCGATAGTTGCCTATTCTGTAGGTACAGGGTTAAGT
+GGCCTGGCTTCCGGTGTAATTATGCTGACGCTTAGTCGTTTTATTGTCGGTATGGGGATG
+GCGGGGAAGTATGCTTGCGCTTCTACTTATGCCGTGGAAAGTTGGCCAAAGCATTTAAAA
+TCTAAAGCGAGCGCATTTCTGGTTTCAGGTTTCGGTATTGGTAACATCATAGCAGCCTAT
+TTTATGCCGTCATTTGCCGAAGCGTATGGTTGGCGTGCTGCTTTTTTTGTCGGTTTGCTA
+CCCGTTCTTTTAGTAATCTACATCCGGGCCAGGGCTCCTGAATCTAAAGAGTGGGAAGAA
+GCCAAACTCAGTGGTCTCGGAAAGCATTCACAAAGTGCCTGGTCAGTTTTCTCTTTGTCA
+ATGAAAGGGCTATTTAATCGAGCTCAATTTCCACTGACATTATGTGTATTTATTGTTCTG
+TTCTCTATTTTCGGCGCAAACTGGCCGATCTTTGGTCTACTGCCTACATATTTGGCGGGA
+GAGGGCTTTGATACGGGCGTGGTCTCTAATTTAATGACGGCGGCGGCATTCGGCACTGTA
+TTGGGAAATATCGTTTGGGGTCTGTGCGCAGATAGAATTGGTTTGAAGAAAACGTTCAGC
+ATTGGTCTTCTCATGTCCTTTTTATTCATTTTCCCGTTATTCAGAATTCCGCAAGATAAT
+TATTTACTGCTGGGCGCATGTTTATTCGGTTTAATGGCGACTAACGTAGGTGTTGGCGGG
+CTGGTTCCCAAATTTCTCTACGACTACTTTCCTCTTGAGGTTCGTGGTTTGGGTACCGGG
+CTTATTTATAATCTTGCTGCGACATCAGGCACATTCAATTCAATGGCGGCGACCTGGCTT
+GGAATAACAATGGGGCTAGGCGCTGCGCTAACGTTCATTGTTGCTTTCTGGACCGCAACA
+ATTCTACTCATTATTGGCCTATCCATTCCGGATAGACTAAAAGCACGTCGTGAAAGGTTT
+CAGTCAACAAAAGAATTTTAA
+>11111_1#11_04101
+ATGACGAAATACGGTGTTATAGGTACAGGTTATTTTGGCGCTGAACTGGCGCGATTTATG
+TCTAAGGTTGAAGGGGCGAAAATCACTGCGATTTACGATCCGGTAAATGCGGCTCCGATA
+GCGAAAGAGCTGAACTGTGTCGCCACTTCAACGATGGAGGCGCTTTGTACCCATCCTGAT
+GTGGATTGCGTAATTATTGCTTCACCAAATTACTTACATAAAGCGCCGGTCATTGCGGCG
+GCTAAAGCGGGTAAACACGTGTTTTGTGAAAAACCTATCGCCTTAAATTACCAGGATTGT
+AAGGATATGGTTGATGCCTGCAAAGAAGCTGGTGTTACCTTTATGGCGGGTCACGTTATG
+AACTTTTTTCACGGGGTTCGCCACGCTAAAGCGCTCATCAAAGCCGGTGAAATCGGTGAA
+GTTACACAAGTTCACACTAAACGTAATGGTTTTGAAGACGTGCAGGATGAGATCTCATGG
+AAGAAGATTCGCGCAAAGTCAGGTGGGCATCTGTACCATCACATTCACGAGCTAGATTGT
+ACACTGTTCATCATGGATGAAACCCCATCCCTGGTTTCAATGGCGGCGGGGAATGTTGCG
+CACAAAGGTGAAAAATTTGGTGATGAAGATGATGTTGTCCTAATCACCCTTGAGTTTGAA
+AGCGGTCGTTTCGCGACACTTCAGTGGGGATCATCGTTCCACTACCCTGAGCACTATGTA
+TTAATTGAGGGCACGACAGGTGCAATTCTCATTGATATGCAAAACACGGCTGGTTATCTA
+ATAAAAGCGGGCAAAAAAACACACTTTCTTGTGCATGAAAGCCAGGCGGAGGATGATGAT
+CGTCGCAACGGTAACATATCCAGCGAGATGGATGGCGCAATCGCTTATGGTAAACCCGGT
+AAACGTACGCCGATGTGGCTCTCATCAATTATGAAACTGGAGATGCAGTACTTGCATGAT
+GTGATAAACGGTCTGGAGCCAGGCGAGGAGTTTGCTAAATTGCTAACGGGAGAAGCGGCG
+ACAAATGCCATTGCTACCGCTGATGCTGCGACGCTTTCTTCAAACGAGGGGCGCAAAGTT
+AAACTCACTGAAATTCTTGGCTAA
+>11111_1#11_04103
+ATGGAGATAATTTTTTATCACCCGACATTTAACGCCGCCTGGTGGGTAAATGCGCTGGAG
+AAGGCTCTCCCACATGCGCGCGTTCGTGAATGGAAGGTCGGTGATAACAACCCCGCAGAC
+TATGCGCTTGTATGGCAGCCCCCGGTTGAAATGCTGGCCGGAAGACGCTTAAAAGCCGTC
+TTTGTGCTGGGCGCGGGGGTGGATGCAATTCTGAGTAAATTAAATGCGCATCCGGAAATG
+CTGGACGCCTCCATTCCTCTATTCCGTCTGGAAGATACCGGAATGGGCCTGCAAATGCAG
+GAGTATGCCGCCAGCCAGGTATTACACTGGTTCCGTCGTTTCGATGATTATCAGGCGCTG
+AAAAATCAGGCGCTATGGAAACCGTTGCCGGAATATACCCGCGAAGAGTTTAGCGTCGGT
+ATCATAGGCGCAGGGGTACTGGGCGCAAAAGTGGCAGAAAGTCTACAGGCGTGGGGGTTC
+CCGTTACGTTGCTGGAGTCGTAGCCGCAAATCCTGGCCTGGCGTGGAAAGTTATGTAGGG
+CGTGAAGAACTGCGCGCTTTCCTGAACCAGACGCGGGTGCTGATTAATCTGCTGCCGAAT
+ACGGCCCAAACGGTAGGAATTATTAATAGCGAATTGTTGGATCAATTGCCGGATGGCGCT
+TACGTGCTGAATCTCGCGCGCGGCGTTCATGTTCAGGAGGCGGATCTGCTGGCTGCGCTT
+GATAGCGGTAAGCTAAAAGGCGCGATGTTGGATGTCTTTAGCCAGGAACCGTTACCGCAG
+GAAAGTCCATTATGGCGCCATCCGCGAGTCGCCATGACGCCGCACATTGCGGCAGTCACC
+CGTCCGGCGGAAGCCATCGATTATATTAGCCGCACCATTACCCAGCTGGAGAAGGGAGAG
+CCGGTGACGGGGCAGGTGGATCGGGCGAGAGGATATTGA
+>11111_1#11_04104
+ATGTATCCCGTTGACCTGCATATGCATACCGTCGCCAGCACTCATGCCTACAGTACTCTG
+AGCGATTATATCGCGGAAGCCAAACGCAAAGGCATTAAACTTTTTGCGATTACCGATCAT
+GGTCCGGACATGGAAGATGCGCCGCATCACTGGCATTTTATTAACATGCGCATCTGGCCG
+CGTCTGGTTGACGGCGTGGGGATACTGCGTGGCATTGAGGCGAATATCAAGAATATTAAC
+GGTGAAATTGATTGTTCCGGAAAGATGTTCGACTCGCTGGATCTGATTATCGCAGGCTTT
+CATGAGCCCGTTTTTGCGCCGCATGATAAAGAAACCAATACTCAGGCGATGATCGCGACC
+ATCGCCAGCGGCAAGGTGCATATAATTAGTCACCCCGGAAATCCAAAGTATCCAGTGGAG
+GTTAAAGCCATCGCGCAGGCGGCGGCGAAACACCATGTAGCGCTGGAAATCAACAACTCT
+TCTTTTCTGCATTCGCGTAAAGGAAGCGAAGATAATTGCCGCGCGGTCGCTGCCGCCGTA
+CGCGATGCGGGAGGCTGGGTAGCGTTAGGCTCTGATTCCCATACGGCCTTTACGCTTGGC
+GATTTCACCGAATGCCGGAAAATTCTGGATGCGGTGAATTTTCCGGAAGATCGAATCCTG
+AACGTCTCTCCGCAGCGCTTACTGGCCTTTCTCGAATCACGCGGTATGGCGCCTGTACCG
+GAATTTGCCGAACTTTAA
+>11111_1#11_04105
+ATGAATGAGTTTTCAATCCTGTGCCGTGTGCTGGGATCGTTGTTTTACCGCCAACCGCAA
+GATCCTTTACTGGTTCCGCTGTTTACGTTAATCCGTGAAGGTAAACTGGCGGCAAACTGG
+CCGCTGGAGCAGGATGACATGCTGGCGCGTTTACAGAAAAGCTGCGATATCACGCAGATT
+TCCACTGATTACAATGCGTTATTTGTTGGGGAAGAGTGCGCGGTAGCGCCATACCGCAGT
+GCGTGGGTCGAAGGCGCGGAAGAGTCTGAGGTGCGCGCTTTTTTAACGTCGCGAGGGATG
+CCGCTGGCCGATACGCCTGCCGATCACATTGGCACTTTATTGCTCGCGGCCTCCTGGCTG
+GAAGATCAGTCTGCCGAAGATGAAAGTGAAGCGCTGGAAACCTTATTTGCCGATTATCTG
+CTTCCCTGGTGCAATACCTTCCTCGGTAAAGTTGAAGCCCATGCCGTTACGCCATTCTGG
+CGCACTCTGGCGCCGCTAACGCGTGATGCGATAGGGGCCATGTGGGATGAACTTCAGGAA
+GAAGATGAAGAATAA
+>11111_1#11_04106
+ATGATGCGCGCCATGAACATACTTCTTTCTATTGCTATCACTACGGGCATCCTTTCTGGA
+ATATGGGGATGGGTGGCCGTCTCCCTGGGGTTACTAAGCTGGGCCGGTTTTTTAGGCTGT
+ACGGCTTATTTCGCCTGTCCGCAGGGCGGCTTTAAGGGATTGTTGATTTCCGCCTGTACG
+CTGTTAAGCGGTATGGTGTGGGCGCTGGTCATTATTCACGGTAGCGCGTTGGCGCCGCAT
+CTGGAAATTGTCAGTTACGTGTTGACGGGGATCGTGGCATTCCTGATGTGTATCCAGGCA
+AAGCAGCTATTGCTTTCTTTTGTTCCGGGAACATTTATCGGCGCCTGCGCGACATTTGCA
+GGGCAGGGTGACTGGCGGTTGGTATTACCGTCGCTGGCGCTGGGGCTAATCTTTGGCTAT
+GCCATGAAAAATAGTGGGCTATGGCTGGCATCACGCCGCGAGCAACATTCAGCGAATACG
+GCGGTCACAAAATAA
+>11111_1#11_04107
+ATGCCGCGCTTACTTATTTTGGTTGCCGTTTTATTGTTGAGCGGATGCTTAACTGCCCCG
+CCGAAACAAGCTGCGAAACCGACATTAATGCCCCGCGCACAAAGTTACAAAGATTTGACG
+CACTTACCTGCTCCCACCGGTAAGATCTTTGTTTCGGTATATAACATTCAGGATGAAACG
+GGCCAATTTAAACCTTACCCGGCAAGTAACTTTTCCACGGCTGTGCCGCAGAGCGCCACC
+GCTATGTTGGTCACCGCGCTGAAAGATTCGCGCTGGTTTATCCCACTAGAACGACAAGGC
+TTACAGAATCTTTTGAATGAACGGAAAATTATTCGCGCAGCCCAGGAAAACGGCACCGTG
+GCGATGAATAACCGTATCCCGCTTCAGTCGTTGACGGCGGCAAATATTATGGTGGAAGGT
+TCTATTATTGGTTATGAAAGTAACGTCAAATCCGGCGGGGTCGGCGCAAGATATTTCGGT
+ATTGGCGCCGATACGCAGTATCAGCTGGATCAGATTGCTGTCAACCTGCGCGTGGTTAAC
+GTCAGTACGGGCGAGATCCTTTCTTCGGTGAACACCAGTAAAACGATCCTTTCCTATGAA
+GTACAGGCAGGCGTGTTCCGTTTTATTGATTACCAGCGCTTACTGGAAGGCGAAATCGGC
+TATACCTCGAACGAACCGGTGATGCTGTGTCTGATGTCAGCCATTGAAACCGGCGTTATC
+TTCCTCATTAATGATGGTATCGATCGCGGACTGTGGGATTTGCAGAATAAAGCGGACAGG
+CAAAATGATATTCTGGTGAAATACCGTGAGCTGTCAGTACCGCCAGAATCCTGA
+>11111_1#11_04108
+ATGCGTGTTAAACATGCAGTAGTGCTGCTCATGCTTTTTTCGCCATTAACCTGGGCTGGA
+AATATGACGTTCCAGTTCCGTAATCCTAACTTTGGTGGAAACCCCAATAACGGTTCCTTT
+TTATTGAATAGCGCCCAGGCGCAAAATTCATATAAAGACCCCGCTTATGATAACGATTTT
+GGTATCGAGACCCCCTCAGCGTTGGATAACTTTACGCAGGCTATTCAATCGCAAATTCTG
+GGCGGCTTGTTGACCAATATTAATACCGGAAAACCAGGACGTATGGTGACCAATGATTTT
+ATTATCGATATCGCTAATCGCGACGGACAGCTCCAGCTCAACGTCACGGACAGAAAAACG
+GGAAGAACCTCGACCATCGAAGTGTCAGGTTTACAAACTCAGTCAACCGATTTTTAA
+>11111_1#11_04109
+ATGAAACGCTATCTGACCTGGATTGTAGCAGCAGAGTTACTGTTCGCTACCGGAAACCTG
+CATGCCAATGAAGTTGAAGTCGAGGTTCCCGGATTGTTAACCGACCATACCGTCTCTTCG
+ATAGGACATGAATTCTATCGTGCATTCAGCGACAAATGGGAAAGCGAATACACCGGCAAT
+CTGACCATTAATGAAAGACCCAGTGCGCGTTGGGGAAGCTGGATCACCATAACGGTAAAT
+CAGGACGTTATTTTCCAGACCTTTTTATTTCCAATGAAAAGAGACTTCGAGAAAACCGTC
+GTCTTCGCATTAGCGCAAACAGAGGAAGCATTAAATCGCCGACAAATAGATCAAACGCTA
+TTAAGTACGAGTGATTTAGCGCGTGATGAATTCTAA
+>11111_1#11_04110
+ATGTTTAATGAAGTCCATAGTAGTCATGGTCACACACTATTGTTGATCACAAAGCCATCT
+CTGCAAGCTACGGCATTATTGCAACATTTAAAGCAATCGCTGGCCATAACCGGAAAACTG
+CATAATATTCAACGTTCTCTGGAAGATATCTCAGCCGGTTGCATTGTTTTAATGGATATG
+ATGGAAGCGGATAAGAAGCTTATCCACTATTGGCAGGATAATTTAAGCCGCAAAAACAAT
+AATATAAAAACATTATTGTTAAATACCCCTGACGATTATCCCTACCGTGAAATTGAAAAC
+TGGCCTCATATTAACGGCGTGTTTTACGCCACTGAAGACCAGGAACACGTGGTCAGCGGA
+TTACAGGGTATTCTGCGGGGCGAATGCTATTTTTCACAAAAATTAGCCAGTTACCTGATT
+ACACACTCAGGAAATTACCGCTACAACAGCACCGAGTCCGCATTACTCACTCATCGCGAA
+AAAGAGATCCTCAATAAGTTACGTATTGGTGCCTCTAATAATGAAATCGCCAGGTCGCTA
+TTTATCAGCGAGAATACGGTTAAGACACATCTTTATAATCTTTTCAAAAAGATAGCTGTC
+AAAAATCGCACCCAGGCAGTTTCATGA
+>11111_1#11_04111
+ATGAAAAACAAATTGTTATTTATGATGTTGACAATACTGGGTGCGCCTGGGATTGCAACC
+GCGACAAATTATGATCTGGCTCGTTCAGAGTATAATTTTGCGGTAAATGAATTAAGCAAG
+TCTTCATTTAATCAGGCGGCCATTATTGGTCAAGTCGGCACGGATAATAGTGCCAGAGTA
+CGCCAGGAAGGATCAAAACTATTGTCCGTTATTTCACAAGAAGGAGAAAATAATCGGGCG
+AAAGTCGACCAGGCAGGGAATTATAACTTTGCGTATATTGAGCAAACGGGCAATGCCAAC
+GATGCCAGTATATCGCAAAGCGCTTACGGTAATAGTGCGGCTATTATCCAGAAAGGTTCT
+GGAAATAAGGCCAATATTACCCAGTACGGTACGCAGAAAACAGCAGTTGTAGTGCAGAAA
+CAGTCGCATATGGCTATTCGCGTCACCCAACGCTAA
+>11111_1#11_04112
+ATGAAACTTTTAAAAGTGGCAGCATTCGCAGCAATCGTAGTTTCTGGCAGTGCTCTGGCT
+GGCGTCGTTCCACAATGGGGCGGCGGCGGTAATCATAACGGCGGCGGCAATAGTTCCGGG
+CCGGATTCCACGTTGAGCATTTATCAGTACGGTTCCGCTAACGCTGCGCTTGCTCTGCAA
+AGCGATGCCCGTAAATCTGAAACGACCATTACCCAGAGCGGTTATGGTAACGGCGCCGAT
+GTAGGCCAGGGTGCGGATAACAGTACTATTGAACTGACTCAGAATGGTTTCAGAAACAAT
+GCCACCATCGACCAGTGGAACGCTAAAAACTCCGATATTACTGTCGGTCAATACGGCGGT
+AATAACGCCGCGCTGGTTAATCAGACCGCATCTGATTCCAGCGTAATGGTGCGTCAGGTT
+GGTTTTGGCAACAACGCCACGGCTAACCAGTATTAA
+>11111_1#11_04113
+ATGCATACTTTATTGCTCCTTGCCGCACTTTCAAATCAGATTACGTTTACCACGACTCAG
+CAAGGCGATATTTACACGGTGATCCCTCAGGTCACATTAAACGAACCCTGCGTCTGTCTG
+GTGCAAATTCTCTCTGTGCGCGACGGCGTCGGGGGACAAAGCCATACACAGCAAAAACAA
+ACGCTATCTTTACCTGCTAATCAACCGATTGAGTTGTCTCGTCTTAGTGTAAATATATCT
+TCAGAGGACTCGGTTAAAATTATTGTTACTGTTTCGGACGGACAATCACTGCATTTATCA
+CAACAATGGCCGCCTTCTGCACAGTAG
+>11111_1#11_04114
+ATGTCCGTAATCAAGAAAAATATCCCTGCCATAGGCCTGTGTATCTGCGCTTTTTTTATC
+CATTCTGCGGTAGGGCAACAAACGGTACAGGGCGGCGTTATCCATTTTCGCGGCGCGATT
+GTTGAGCCACTGTGCGATATTTCTACTCACGCCGAAAATATTGATTTAACCTGCCTACGC
+GAAGGTAAAAAGCAAATGCACCGGATAGACCTTCGGCAGGCATCTGGATTACCGCAGGAT
+ATTCAGTCCATTGCGACGGTACGGCTGCATTATCTCGATGCGCAAAAAAGCCTGGCGGTG
+ATGAATATTGAGTACCGTTAA
+>11111_1#11_04115
+ATGACATCACGTCTTCAGGTCATACAGGGTGATATCACTCAACTTAGCGTCGATGCGATT
+GTGAATGCCGCTAACGCATCATTAATGGGCGGCGGTGGCGTAGACGGCGCAATTCATCGC
+GCGGCGGGGCCGGCATTGCTGGACGCCTGTAAACTCATCCGTCAGCAACAGGGCGAATGT
+CAGACGGGACATGCGGTTATCACGCCTGCTGGCAAGCTTTCGGCAAAGGCGGTTATTCAC
+ACAGTGGGGCCCGTCTGGCGAGGCGGCGAACACCAGGAAGCTGAGCTACTCGAAGAGGCA
+TACCGGAATTGTTTGCTGCTTGCCGAGGCGAATCACTTTCGTTCCATCGCTTTTCCGGCA
+ATCAGTACCGGCGTTTATGGCTATCCACGCGCCCAGGCCGCTGAAGTCGCCGTCAGGACG
+GTTTCAGATTTTATTACCCGTTACGCTCTGCCTGAACAGGTATACTTTGTCTGTTATGAT
+GAAGAAACTGCCCGGCTTTACGCAAGATTACTTACTCAGCAAGGCGACGACCCTGCCTGA
+>11111_1#11_04116
+ATGAAGAAACTGCCCGGCTTTACGCAAGATTACTTACTCAGCAAGGCGACGACCCTGCCT
+GATAAAACACGCCTGGAGCGTGCCGTTGAACCGCTATGCGCGCGCCATCCCGGAGAGTGC
+GGCATTCTTGCGCTGGATAACAGTCTGGACGCTTTTGCCGCCCGCTACCGCCTGACCGAA
+ATGGCGGCGCGGACGCTGGATGTGCAGTATTATATTTGGGAAGACGATATGTCCGGGCGG
+CTGCTCTTTTCGGTTCTGCTGTCGGCGGCGAAGCGCGGCGTTCATGTTCGTCTGCTGCTG
+GATGATAACAATACGCCTGGTCTGGATGATACGTTGCGCTTGCTGGATAGCCATCCTAAT
+ATCGAAGTTCGTCTGTTTAATCCTTTCTCTTTTCGTACGCTACGCGCGCTGGGATATTTG
+ACGGATTTTGCGCGGCTGAATCGGCGGATGCACAATAAAAGTTACACTGCCGACGGCGTA
+GTGACGCTGGTCGGTGGGCGCAACATCGGCGATGCCTATTTCGGCGCTGGCGAGGAGCCG
+CTATTTTCCGATCTGGACGTGATGGCCATTGGCCCGGTGGTCAATGATGTCGCCAATGAT
+TTTGAACGTTACTGGCGCTGTAGTTCAGTGTCGACATTGCAGCAAGTATTATCCCTTTCT
+GAGCAGGAACTGACGCAGCGTATCGAACTTCCCGAATCCTGGTATAACGATGAGATCACC
+CGCCGTTATCTGCATAAGCTGGAAACCAGCCAGTTTATGGCGGATCTCGATCGCGGAACG
+TTGCCGCTGATTTGGGCAAAAACACGCTTGCTTAGCGATGACCCTTCTAAAGGCGAGGGG
+AAGGCGCAGCGCCATTCGCTTCTTCCGCAGCGATTATTTGACGTGATGGGGTCGCCGACG
+GAGCGTATCGACATTATTTCCGCTTACTTTGTCCCTACGCGCGCAGGCGTGGCGCAGTTG
+CTTAATCTGGTCAGGAAAGGTGTGAAGATCGCCATCTTAACTAACTCTCTGGCGGCCAAC
+GATGTGGCGGTCGTTCACGCAGGGTACGCGCGCTGGCGCAAGAAATTACTGCGCTATGGC
+GTGGAGCTCTACGAACTGAAACCGACCCGCGAACATGAAACCGCCGTACATGATCGCGGA
+CTCACCGGGAACTCAGGTTCCAGCTTACATGCTAAAACGTTCAGTATTGATGGTAGTAAG
+GTGTTTATCGGGTCGCTTAATTTTGATCCCCGTTCAACGCTTTTAAATACCGAAATGGGC
+TTTGTCATTGAAAGTGAAACGCTGGCGACGCTTATTCATAAGCGTTTTACGCAGAGCCAA
+CGCGATGCGGCCTGGCAACTGCGGCTGGATCGCTGGGGACGAATTAACTGGATCGATCGT
+CAGCAAGAAGAGGAAAAGGTGTTAAAGAAAGAACCCGCTACGCGTTTCTGGCAGCGAGTT
+CTGGTACGGTTGGCGGCAATTTTACCTGTGGAATGGTTGCTGTGA
+>11111_1#11_04117
+ATGAGCTCTGTACCCGCGCCGCGTGAATATTTTCTTGACTCTATCCGCGCATGGCTGATG
+TTGTTAGGGATTCCCTTTCATATCTCGTTGATCTATTCCACTCACAGTTGGCATGTCAAT
+AGCGCCGCGCCATCGTGGTGGCTAACCCTGTTTAACGATTTTATCCACGCTTTTCGTATG
+CAGGTGTTTTTTGTTATTTCTGGTTATTTTTCGTACATGTTATTTTTACGTTATCCATTA
+AAACACTGGTGGAAAGTACGGGTAGAACGTGTGGGTATTCCCATGCTTACCGCAATCCCT
+TTGCTTACCTTGCCGCAATTTATCCTGTTGCAATATGTCAAAGAGAAAACAGAGAACTGG
+CCTACACTCTCTGCCTATGAAAAATATAATACGTTAGCGTGGGAACTCATTTCACATCTG
+TGGTTTTTACTGGTGCTGGTGATATTAACCACCGTCAGCATCGGGATTTTTACCTGGTTC
+CAAAAAAGGCAGGAAACAAGCAAGCCTCGTCCCGCCGCTATTTCGCTGGCCAAACTTTCG
+CTTATTTTTTTCCTGCTGGGGGTGGCGTACGCTGCTATCAGGCGCATTATATTCATCGTA
+TATCCGGCAATCCTCAGTGACGGCATGTTCAATTTTATTGTGATGCAAACGCTATTTTAT
+GTGCCGTTTTTTATTCTCGGCGCGTTGGCCTTCATTCACCCCGATCTGAAAGCGCGCTTC
+ACCACGCCCTCACGCGGATGCACTTTAGGCGCTGCCGTTGCTTTTATCGCGTATCTGCTG
+AATCAACGTTATGGGAGCGGCGACGCCTGGATGTACGAAACCGAATCCGTGATTACGATG
+GTAATGGGGCTATGGATGGTGAACGTGGTATTTTCACTGGGGCATCGCTTGTTAAACTTT
+CAGTCCGCGCGTGTCACCTATTTCGTGAATGCTTCGCTGTTTATTTATCTGGTGCATCAT
+CCCTTAACGCTTTTCTTTGGCGCGTATATTACACCGCATATCTCCTCCAACCTGATCGGG
+TTCTTGTGCGGGCTGATATTTGTTATGGGTATTGCGTTAATTCTGTATGAAATTCATTTA
+CGCATCCCGCTCCTGAAATTTCTCTTTTCAGGTAAACCGCCGGTAAAACAAGAAAGCCGC
+GCCGCGATCGGGTAG
+>11111_1#11_04118
+ATGAAACATAAACGACAAATGATGAAAATGCGTTGGTTGGGCGCAGCTATTATGTTAACG
+CTCTACGCATCATCGAGCTGGGCGTTCAGTATTGATGACGTGGCAAAACAAGCTCAATCT
+TTAGCCGGGAAAGGCTATGAGGCGCCTAAAAGCAACTTGCCCTCCGTTTTCCGCGACATG
+AAATATGCGGATTATCAGCAGATCCAGTTTAACAGCGATAAAGCCTACTGGAACAACTTA
+AAGACCCCTTTTAAGCTCGAATTTTACCATCAGGGGATGTACTTCGATACGCCGGTCAAG
+ATTAACGAAGTGACGGCGACGACGGTCAAAAGAATCAAATACAGCCCGGATTACTTCAAT
+TTTGGCAATGTTCAGCACGATAAAGACACGGTAAAAGATTTAGGCTTCGCCGGGTTCAAA
+GTCCTGTACCCCATTAACAGTAAAGATAAGAACGACGAAATCGTCAGTATGCTTGGCGCC
+AGCTATTTCCGCGTTATCGGCGCAGGCCAGGTGTATGGCTTATCTGCGCGCGGCCTGGCG
+ATTGATACCGCCTTACCATCTGGTGAAGAGTTTCCCCGCTTTCGCGAGTTCTGGATTGAG
+CGTCCAAAACCCACCGATAAGCGTTTGACCGTCTATGCATTACTGGATTCTCCGCGCGCG
+ACCGGCGCTTACCGTTTTGTGATCATTCCTGGCCGCGATACCGTGGTGGACGTGCAGTCA
+AAAGTCTATCTGCGCGATAAGGTGGGCAAGCTGGGCGTTGCGCCATTAACCAGTATGTTC
+CTGTTTGGGCCAAACCAGCCGTCGCCGACGACCAACTATCGTCCGGAATTGCATGACTCG
+AACGGCTTATCCATTCATGCGGGTAATGGCGAGTGGATTTGGCGTCCGCTGAACAATCCA
+AAACACCTCGCTGTGAGCAGCTATGCGATGGAAAACCCTCAGGGATTCGGCCTGTTGCAG
+CGTGGTCGCGAGTTCTCGCGCTTTGAAGATTTAGACGATCGCTATGACCTGCGTCCAAGC
+GCCTGGATTACCCCGAAAGGCGACTGGGGCAAAGGTAAGGTTGAACTGGTTGAAATTCCG
+ACCAATGATGAAACCAACGATAACATCGTCGCTTACTGGACTCCGGATCAACTGCCGGAA
+CCGGGTAAAGAGATGAACTTCAAGTACACTCTGACCTTCAGCCGCGATGAAGATAAACTT
+CATGCGCCGGATAATGCCTGGGTGCTGCAAACACGCCGCTCAACGGGCGACGTTAAACAG
+TCGAATCTGATTCGCCAGCCCGACGGCACTATTGCCTTTGTGGTGGATTTCGTTGGCGCC
+GACATGAAAAAACTGCCGCCGGATACGCCCGTCGCTGCACAAACCAGCATTGGCGATAAC
+GGTGAAATCGTTGACAGTAATGTACGCTATAACCCAGTCACTAAAGGCTGGCGTTTAATG
+CTGCGCGTGAAAGTCAAAGACGCGAAGAAAACCACGGAAATGCGTGCCGCATTGGTGAAT
+GCCGATCAGACGCTAAGTGAAACCTGGAGCTACCAGTTACCTGCCAATGAATAA
+>11111_1#11_04119
+ATGAATAAAACAACTGAGTATATTGACGCACTGCTGCTTTCTGAACGTGAGAAAGCGGCA
+TTGCCGAAAACTGACATCCGCGCCGTGCATCAGGCGCTGGATGCCGAGCATCGGACTTAC
+TCGCGAGAAGACGATTCACCGCAGGGTTCCGTAAAAGCCCGCCTTGAACACGCCTGGCCG
+GATTCATTGGCGAAGGGGCAGTTAATTAAAGATGATGAAGGGCGCGATCAGTTGCAGGCT
+ATGCCAAAAGCGACGCGCTCTTCGATGTTTCCTGATCCCTGGCGAACCAACCCGGTTGGC
+CGTTTCTGGGATCGCCTGCGTGGGCGGGATGTAACGCCGCGCTATGTTTCTCGTCTGACA
+AAAGAAGAGCAGGCGAGTGAGCAAAAATGGCGTACCGTCGGCACTATACGCCGCTATATT
+TTGTTAATTTTGACTCTGGCGCAAACCGTCGTCGCGACCTGGTATATGAAGACCATTCTG
+CCCTATCAGGGATGGGCGCTCATCAATCCTATGGATATGGTGGGGCAGGATATTTGGGTC
+TCCTTTATGCAGCTCCTGCCCTACATGCTGCAAACCGGTATCCTGATTTTGTTTGCCGTG
+CTGTTCTGCTGGGTGTCTGCCGGATTCTGGACGGCGCTGATGGGCTTCCTGCAACTGCTT
+ATCGGGCGCGATAAGTACAGTATCTCCGCGTCTACGGTTGGCGATGAGCCCCTCAATCCG
+GAACACCAGACGGCGCTGATCATGCCTATCTGTAATGAAGACGTTAGCCGCGTTTTCGCC
+GGTCTGCGCGCGACCTGGGAGTCCGTTAAAGCCACAGGCAACGCCGCGCATTTTGACGTC
+TATATCCTTAGCGATAGTTATAACCCGGATATCTGCGTGGCGGAGCAAAAGGCGTGGATG
+GAGCTCATCGCGGAAGTGCAGGGCGAAGGCCAAATTTTTTACCGTCGCCGCCGCCGCCGT
+ATGAAACGCAAAAGCGGCAATATTGACGATTTTTGCCGCCGCTGGGGCAATCAGTACAGC
+TATATGGTGGTGCTGGACGCGGACTCAGTGATGAGCGGCGAGTGTCTGAGCGGGCTGGTG
+CGCCTGATGGAAGCGAACCCTAACGCCGGGATTATCCAGTCTTCGCCGAAAGCGTCGGGG
+ATGGATACTCTGTATGCCCGCTGCCAACAGTTCGCGACCCGTGTTTATGGACCGCTGTTT
+ACCGCCGGGCTGCACTTCTGGCAGTTGGGGGAGTCGCACTACTGGGGGCACAATGCCATT
+ATCCGCGTGAAGCCGTTTATCGAGCACTGCGCTCTGGCGCCGCTGCCGGGAGAAGGTTCG
+TTCGCCGGATCGATTCTTTCCCACGACTTTGTGGAGGCGGCGCTAATGCGTCGGGCAGGG
+TGGGGCGTCTGGATTGCCTACGATCTCCCCGGCTCCTATGAAGAGCTGCCGCCAAACCTG
+CTGGATGAGCTTAAACGCGACCGCCGCTGGTGTCACGGCAACCTGATGAACTTTCGTCTG
+TTCCTGGTGAAAGGAATGCACCCGGTGCATCGCGCCGTGTTCCTGACCGGGGTAATGTCA
+TACCTGTCCGCGCCGTTATGGTTTATGTTCCTCGCGCTTTCTACCGCGCTGCAGGTCGTT
+CATGCGTTAACAGAGCCGCAATATTTCCTTCAGCCGCGCCAGCTTTTTCCGGTCTGGCCG
+CAGTGGCGTCCGGAACTGGCAATCGCGCTGTTTGCGTCAACGATGGTGCTGCTGTTCCTG
+CCGAAGCTGCTCAGTATTATGCTGATCTGGTGTAAAGGCACCAAAGAGTATGGCGGTTTC
+TGGCGCGTTACGCTGTCGCTATTGCTGGAAGTGCTGTTCTCCGTGTTGCTGGCGCCGGTG
+CGTATGCTGTTTCATACCGTGTTTGTGGTCAGCGCGTTCCTCGGCTGGGAAGTGGTCTGG
+AACTCACCGCAACGCGACGATGATTCTACGCCGTGGGGAGAAGCCTTTATGCGTCACGGC
+TCTCAACTGCTGCTGGGGCTGGTCTGGGCGGTGGGTATGGCGTGGCTGGATTTACGCTTT
+CTGTTCTGGCTGGCGCCGATTGTCTTTTCGCTGATTCTGTCGCCATTTGTTTCGGTGATC
+TCCAGTCGTTCAACGGTAGGATTACGCACCAAACGCTGGAAGCTGTTCCTGATCCCGGAA
+GAGTATTCGCCGCCTCAGGTGTTGGTCGATACCGATAAATATCTGGAGATGAATCGCCGC
+CGTATTCTGGACGATGGCTTTATGCATGCGGTATTTAACCCGTCGCTTAATGCGCTGGCG
+ACCGCGATGGCCACCGCGCGTCACCGCGCCAGTAAGGTGCTGGAAATAGCCCGCGATCGT
+CATGTGGAGCAGGCGCTAAACGAAACGCCGGAGAAACTGAACCGCGATCGGCGTCTGGTT
+TTGCTCAGCGATCCGGTGACGATGGCGCGTTTACACTATCGGGTCTGGAATGCGCCAGAG
+AGATACTCTTCCTGGGTAAACCATTATCAGTCTCTCGTCCTGAATCCGCAGGCGTTGCAG
+GGACGAACATCGTCAGCGGGATAA
+>11111_1#11_04120
+GTGCGTATATTCGCGGTGAGCATAATGGTGATTACCCTGAGCGGCTGCGGCAGTATTATC
+AGCAGAACGATCCCCGGACAAGGACACGGCAACCAGTATTACCCTGGCGTGCAGTGGGAT
+ATGCGTGATTCCGCATGGCGCTATATCACTATCCTCGATCTGCCCTTCTCACTGATCTTC
+GATACACTGCTACTGCCGCTCGATATTCACCACGGGCCTTATGAGTAA
+>11111_1#11_04121
+ATGACCATGTATGCCACGCTGGAAGAAGCTATCGATGCAGCCCGGGAAGAATTTCTGGCT
+GACCATCCAGGCCTCGAACAAGACGAAGCGAATGTGCAGCAGTTCAACGTTCAGAAATAT
+GTACTGCAGGATGGGGACATCATGTGGCAGGTCGAATTTTTCGCCGATGAAGGTGAAGAT
+GGCGAATGTCTGCCGATGCTGAGTGGTGAAGCCGCACAGAGCGTGTTTGACGGCGATTAT
+GATGAGATAGAGATCCGCCAGGAATGGCAGGAAGAGAATACTTTGCATGAATGGGATGAA
+GGGGAATTCCAGCTTGAACCCCCGCTTGATACCGAGGAAGGCCGTACTGCGGCAGACGAA
+TGGGATGAGCGTTAA
+>11111_1#11_04122
+ATGTCACCCTCTGATGTCCCCATAAACTGGAAACGCAATCTAACCGTTACCTGGCTGGGC
+TGTTTTTTAACCGGCGCGGCATTCAGTCTGGTCATGCCTTTTCTCCCTCTCTACGTTGAG
+CAACTCGGCGTAACAGGCCATAGCGCGCTCAACATGTGGTCCGGGCTGGTCTTTAGTATT
+ACGTTCCTCTTTTCAGCTATCGCTTCCCCTTTTTGGGGCGGTCTGGCGGATCGTAAAGGG
+CGTAAGATCATACTGTTGCGTTCCGCTCTCGGCATGGCCATTGTGATGCTGTTAATGGGC
+ATGGCGCAAAATATCTGGCAATTTCTGATCCTCCGCGCGTTATTAGGCCTGCTGGGCGGA
+TTTATTCCCAACGCCAATGCGCTCATCGCCACTCAGGTGCCGCGCCACAAAAGCGGCTGG
+GCGTTAGGGACGCTCTCTACTGGCGGCGTCAGCGGCGCTCTGCTTGGCCCACTTGCGGGC
+GGTCTGCTCGCCGACCATTACGGCCTTCGCCCGGTCTTTTTTATTACCGCCAGCGTTTTG
+TTTATCTGTTTTCTGCTCACCTTCTTTTTTATTCGCGAGAATTTCCTGCCGGTGAGCAAA
+AAAGAGATGCTTCACGTTCGGGAAGTGGTCGCCTCGTTAAAAAATCCCCGCCTGGTGTTA
+AGCCTGTTTGTCACCACGCTGATTATTCAGGTGGCGACGGGGTCTATTGCGCCCATCCTG
+ACGCTCTATGTGCGCGAACTCGCGGGAAATGTCAGTAATATTGCGTTCATTAGCGGGATG
+ATAGCGTCCGTTCCCGGCGTGGCGGCATTACTTAGCGCCCCCCGGCTCGGCAAGCTCGGC
+GACAGAATTGGCCCGGAAAAGATCCTTATCGTCGCGCTGATTATTTCCGTACTGCTGCTG
+ATTCCAATGTCTTTTGTGCAAACGCCGTGGCAGCTCGCGCTATTGCGGTTTCTGCTTGGC
+GCGGCGGATGGCGCGCTGCTGCCAGCCGTTCAAACTCTGCTGGTTTACAACTCTACCAAC
+CAGATAGCCGGGCGCATATTCAGTTACAACCAATCTTTCCGCGATATCGGCAACGTCACC
+GGCCCTCTCATGGGTGCCGCAATTTCCGCGAGCTATGGCTTCCGCGCCGTATTCTGCGTC
+ACGGCAGGCGTGGTGTTGTTCAATGCTATCTATTCATGGAACAGCTTACGACGGCGCAGA
+CTGGCAATAGAATGA
+>11111_1#11_04123
+ATGACGAAGTTGCCTAAGTTCTCCGTGGCCTTACTGCACCCGCGCTATTGGTTAACCTGG
+TTGGGTATAGGCGCGCTTTGGTTGGTCGTGCAACTCCCCTACCCGGTCATCTATAAATTG
+GGTTGCGCATTGGGCCACCTGGCGCGACGCGTAATGAAACGCCGCGCGAAAATCGCCTAT
+CGCAACCTTGAATTATGCTTTCCGGAAATGAGCGCGCAGGAACGCCACACCATGGTGGTC
+AAAAACTTTGAATCCGTCGGTATGGGCGTGATGGAAACCGGCATGGCCTGGTTCTGGCCC
+GATCGGCGAGTGAACCGCTGGATGGAAGCGAGCGGTCTGGAGCATATCCGTGAAGTTAAG
+GCGCAGGGGCTGGGGTTCATTCTGGTAGGAATACATTTCCTTACCCTTGAGTTTGGCGCC
+CGCATGTTTGGTATGCATAACCCGGGTATCGGCGTTTATCGCCCGAATGATAATCCGTTG
+CTCGACTGGTTACAGACCTGGGGGCGCTTACGCTCCAATAAATCGATGTTGGATCGCAAA
+GATCTGAAAGGTATGGTTAAGGCATTGAAAAGCGGCGAATTGATCTGGTATGCGCCGGAT
+CACGATTATGGCCCGCGCGCCAGCGTCTTTGTACCGTTATTCGCCGTCGACCAGGCCGCT
+ACGACCTCCGGTACCTGGATGCTCGCGCGCATGTCTAAAGCCTGTATTATCCCCTCCGTG
+CCTCGCCGTAAACCCGACGGGAAAGGCTACGAACTGATCATCCTTCCTGCGGAATATTCG
+CCTCCGCTGGAGAGCGCAGAAGCGACTGCCGCATGGATGAATAAGATTGTTGAGCAGTGC
+ATTATGATGGCGCCAGAGCAGTATATGTGGCTGCATCGCCGCTTTAAAACTCGCCCTGAA
+GGCGTACCGTCGCGCTATTGA
+>11111_1#11_04124
+ATGCCAGTGTTACACAACCGCATCTCTAATGACGAGCTGAAAGCCAAAATGCTGGCGGAA
+AGCGAGCCGCGTACGACAATTTCTTTTTATAAATATTTCACTATCGCCTCGCCGCAACAG
+ACGCGGGACGCGTTGTATCAGGTGTTTACGGCGTTGGACGTTTTTGGTCGCGTTTACCTG
+GCGCATGAGGGCATCAATGCGCAAATCAGCGTGCCGCAAAGCAAGGTTGAGACCTTTCGT
+CAACAGCTTTATACGTTCGACCCCGCGCTGGACGGGGTGCGTTTAAATATCGCGCTGGAG
+GATGACGGAAAGTCATTTTGGGTGCTGCGTATGAAAGTTCGCGACCGTATCGTCGCTGAC
+GGTATTGACGATCCGAGTTTTGACGCCAGTAATGTCGGCGATTATCTGAAGGCGGCAGAT
+GTGAATGCGATGCTGGACGATCCTGACGCGGTCTTTATTGATATGCGCAACCACTATGAG
+TATGAAGTCGGCCATTTCGAAAATGCTCTGGAAATCCCGGCGGATACGTTTCGTGAACAG
+TTGCCAAAAGCGGTTGAAATGCTGCGGGAACATGCAGATAAAAAGATAGTGATGTACTGT
+ACCGGCGGTATTCGTTGTGAGAAAGCCAGCGCCTGGATGAAACACAACGGTTTCAATAAA
+GTCTGGCATATTGAGGGTGGCATCATTGAGTACGCCCGTCGCGCGCGCGAGCAGGGGCTT
+CCCGTTCGCTTTATCGGCAAAAACTTTGTATTTGATGAGCGAATGGGCGAGCGCATCTCG
+GATGAGGTTATCGCGCATTGCCATCAGTGCGGCGTGTCCTGCGATAGCCATACCAACTGC
+AAAAACGACGGTTGCCATCTGCTGTTTATCCAGTGTCCGCAGTGCGCCAGTAAATTTAAC
+GGCTGCTGTAGTGAACAATGCTGTGAAGAGTTGGCCTTGCCGGAGGAAGAACAGCGCCGA
+CGTCGCGCGGGTCGTGAGAACGGCAACAAAATTTTTAATAAATCGCGGGGTCGGCTTAAT
+AGCAAACTGAGCATTCCCGATCCGGCTGAGTAA
+>11111_1#11_04125
+ATGAAAAAAAACCTGCTGGGATTCACCCTCGCATCCTTGTTATTCACGACCGGTTCCGCC
+GTGGCGGCGGAGTATAAAATTGATAAAGAAGGCCAACATGCGTTCGTCAATTTCCGCATC
+CAGCATCTGGGCTACAGCTGGCTATACGGCACCTTTAAAGATTTCGACGGCACGTTCACT
+TTTGACGAAAAAAATCCGTCAGCAGACAAAGTGAATGTGACCATTAACACCAATAGCGTC
+GACACTAACCATGCCGAACGTGACAAACACCTGCGTAGCGCGGAGTTTCTTAATGTTGCG
+AAATTCCCGCAGGCAACCTTCACCTCTACCAGCGTGAAAAAAGAGGGCGATGAACTGGAT
+ATTACCGGCAATCTGACGCTCAATGGCGTGACTAAACCGGTGACGCTGGAAGCGAAGCTG
+ATGGGCCAGGGCGACGATCCGTGGGGCGGTAAGCGCGCGGGCTTTGAGGCCGAAGGAAAA
+ATTAAGCTGAAAGATTTCAATATAACTACCGATCTCGGCCCAGCCTCACAAGAGGTGGAG
+CTTATCATCTCAGTAGAAGGCGTTCAGCAGAAGTAA
+>11111_1#11_04126
+ATGTTACTGATGATGGCGCTGATCGTGCGTATTATCTGGCGGCTTTATTCTCCGCCGCCC
+GTTGCGTTGACCAGCTATTCCCGTTTAACGCGCATTGGCGCCGCCGCGGGTCATATCCTT
+CTGTATCTCCTGCTCTTTGCGATAATCATTAGCGGCTACCTGATTTCCACCGCCGACGGT
+AAACCGATTAGCGTCTTTGGCTGGTTTGAGATTCCGGCCACGCTTACGGACGCGGGCGCG
+CAGGCTGACATCGCCGGAACACTGCATCTGTGGTTTGCCTGGTCGCTGGTCATTATCTCG
+CTCTCGCATGGGGTTATGGCGCTAAAACACCATTTCATCGATAAAGACGACACACTGAAA
+CGTATGACAGGAATGTCGTCATCTGACTATGGAGCTCAAAAATGA
+>11111_1#11_04128
+ATGAAATACGACCTTATTATTATCGGCAGCGGTTCGGTTGGCGCCGCCGCTGGTTATTAC
+GCCACCCGCGCCGGGCTAAAGGTCCTGATGACCGATGCGCATATGCCGCCTTATCAACAG
+GGCAGCCACCACGGCGATACCCGTCTTATCCGCCACGCTTATGGTGAAGGCGAAAAATAT
+GTCCCGCTGGTGCTTCGCGCCCAGACGCTTTGGGATGAGCTCTCCACACACAATGAAGAG
+CCTATTTTTGTCCGCTCCGGCGTCGTCAACCTCGGCCCGGCCGATTCCGCTTTCTTAGCC
+AACGTCGCACGAAGCGCGCAACAGTGGCAATTGAACGTCGAGCGCCTGGACGCGACGGCC
+CTCATGACGCGCTGGCCGGAAATTCGCGTGCCCGATAATTATATCGGGCTGTTTGAAGCT
+GACTCCGGTTTCCTGCGCAGCGAATTAGCCATTACCACATGGCTTCGTCTGGCCCGAGAG
+GCAGGCTGCGCACAGCTATTCAACAGCCCGGTAAGCCATATTCACCATGATGATAACGGT
+GTGACGATAGAGACGAGTGAAGGCTGCTACCACGCCAGCAAAGCGCTGATTAGCGCGGGC
+ACCTGGGTCAAAACGCTGGTACCGGAGCTGCCCGTTCAGCCCGTACGTAAAGTTTTTGCC
+TGGTTTAAGGCGGATGGACGTTACAGCACTAAAAACCGCTTTCCGGCCTTTACCGGCGAA
+ATGCCCAACGGCGATCACTATTACGGTTTCCCGGCGGAGAACGACGAGTTAAAAATCGGC
+AAACACAATGGCGGGCAGCGAATACAGGCACCGGAAGAGCGCAAGCCCTTTGCCGCCGTT
+GCCAGCGATGGCGCGGAAGCATTTCCTTTCCTGCGTAACGTACTGCCGGGTATCGGCGGT
+TGTTTACATGGGGCGGCATGTACCTATGATAATTCGCCGGACGAGGATTTTATTATCGAT
+ACGCTGCCTGGCCATGAGAATACGCTTGTCATCACTGGACTCAGCGGACATGGTTTTAAA
+TTCGCCCCGGTGTTAGGAGAAATCGCTGCGGATTTTGCGTTGGGAAAAACGCCCTCCTTT
+GATCTGACGCCGTTCCGGCTTTCCCGTTTTAGCCAATAA
+>11111_1#11_04129
+ATGGAAAAGAATAATGAAGTCATTCAGACCCATCCGCTTGTAGGATGGGACATCAGCACC
+GTCGATAGCTATGATGCGCTGATGCTGCGTTTACACTACCAGACCCCAAATCGTCCGGAA
+CCGGAAGGGACTGAAGTTGGTCAAACGCTCTGGTTAACGACAGATGTAGCCAGGCAATTT
+ATTTCAATATTAGAAGCCGGCATCGCCAAAATAGAATCAGGCGATTACCAGGAAAACGAG
+TATCGTCACCATTAG
+>11111_1#11_04131
+ATGACTGCACCATCCCAGGTTTTAAAGATCCGCCGCCCGGACGACTGGCACGTTCACCTT
+CGCGATGGCGACATGTTAAAAACGGTCGTACCCTATACCAGCGAAATTTATGGTCGCGCT
+ATCGTGATGCCGAACCTGGCGTCCCCCATTACGACCGTTGATGCAGCGATCGCCTACCGC
+CAGCGTATTCTCGATGCGGTGCCCGCCGGGCATGATTTCACGCCGTTAATGACCTGCTAT
+TTAACGGATTCGCTCGATGCCGATGAACTGGAGCGTGGTTTCCATGAAGGCGTGTTTACT
+GCGGCCAAGCTTTACCCGGCCAATGCCACCACTAACTCCAGTCATGGCGTAACGTCAGTC
+GACGCTATCATGCCGGTACTGGAGCGGATGGAAAAACTCGGAATGCCATTGCTGGTCCAC
+GGTGAGGTGACCCATGCGGATGTTGATATCTTCGATCGCGAAGCGCGTTTTATCGACACC
+GTAATGGAACCGCTACGCCAGCGTCTGACCGCGCTTAAAGTGGTCTTTGAACACATCACA
+ACCAAAGATGCCGCGCAGTATGTACGTGACGGCAGCTACAACCTGGCGGCGACCATTACG
+CCTCAACATTTAATGTTTAACCGTAATGATATGCTGGTTGGCGGCATTCGTCCTCACCTG
+TACTGTCTGCCGATTCTGAAACGCAATATTCACCAGCAGGCGTTACGCGACCTGGTCGCC
+AGTGGTTTTACGCGCGCCTTCCTGGGGACGGATTCAGCGCCGCATTCACGTCATCGTAAA
+GAGACCCGTTGCGGCTGCGCCGGTTGTTTCAACGCCCCCTCCGCTCTTGGCAGTTATGCC
+GCCGTGTTTGAGGAAATGAACGCGCTGGCGCACTTTGAAGCGTTCTGTTCACTGAATGGC
+CCGCAATTCTATGGCCTGCCGGTGAATACGGGGTGGGTGGAACTGGTTCGCGATGAACAA
+CAGATACCGGAAAATATCGCGCTGGCTGATGATTCGCTGGTGCCTTTTTTAGCGGGTGAA
+ACAGTACGCTGGTCAGTAAAAAAATAA
+>11111_1#11_04132
+ATGTCCGCTGGCGCGACATGCTGCTTTGTGTCAAGATTAGGGCACAGGCAACATGCTAAG
+GAACGCCATGAAAAAGTTTTTTTTGCCGCTGCGCTGGTAGTTAGCGGCCTGTTAGTGGGT
+TGTAATCAACTTACGCAATACACCATTAGCGAACAAGAAATCAATCAAGCGCTTGAAAAA
+CGGAATAATTTCTCAAAAGATATTGGTCTGCCTGGCATTGCCGACGCGCATATCGTATTG
+ACTAACCTTGCCAGCCAGATTGGTCGGGAAGAGCCTAATAAAGTCACCCTTACTGGCGAT
+GCCAGGCTGGATATGAATTCCCTGTTTGGCAGTCAAAAAGCGACGATGAAACTCAAGCTA
+AAAGCCCTGCCGGTCTTTGATAAAGAAAAAGGCGCCATCTATCTTCAGGAGATGGAAGTA
+GTAGACGCCACCGTGACGCCGGAGAAAATGCAATCAGTGCTACAAACGCTCCTGCCCTAT
+TTAAATCAGTCTTTACGTAGCTACTTTAACCAGCGGCCCGCTTACGTGTTGCGTGAGGAC
+AGTAGTAAAGGCGAAGCGTTAGCGAAGAAACTCGCGAAAGGCATTGAAGTTAAACCTGGT
+GAAATCGTTATTCCTTTCACCAATTGA
+>11111_1#11_04133
+GTGAAGCTTTATATTTACGATCATTGCCCTTTCTGCGTTAAAGCCCGCATGATTTTCGGC
+CTGAAGAACATCCCCGTTGAACTTAACGTGTTACAAAATGACGACGAGGCGACGCCCACC
+CGGATGATTGGTCAAAAGATGGTGCCCATCCTGCAAAAAGATGATAGTCGCTACCTTCCT
+GAAAGTATGGATATTGTACATTATGTCGACAACCTCGACGGCAAACCGCTGTTAACCGGG
+AAACGTAATCCAGCCATTGAAGAGTGGTTGCGCAAAGTTAACGGTTACGTTAATCAGCTA
+CTGCTGCCGCGATTTGCAAAATCCGCTTTCGATGAATTTTCTACCCCTGCGGCGCGCCAG
+TATTTTATCCGCAAAAAAGAGGCCTCATCTGGCAGTTTTGACAACCATCTTGCGCACTCT
+GCCGGACTGATTAAAAAGATCGGCGATGATTTACGTTTGCTGGATAAACTCATCGTACAG
+CCTAACGCCGTTAACGGCGAATTATCGGAAGATGATATTCATCTGTTTCCGCTTCTGCGT
+AATCTGACGCTGGTCGCCGGTATTCACTGGCCGACAAAAGTCGCGGACTATCGCGATAAT
+ATGGCTAAGCAGACGCAAATTAATTTACTCTCCTCAATGGCTATCTAA
+>11111_1#11_04134
+ATGTCGCGCGTCTCGCAGGCGAGGAACCTGGGTAAATATTTTCTTCTCATCGATAACATG
+TTAGTGGTGCTGGGTTTTTTCGTCGTCTTCCCGCTCATCTCTATTCGCTTTGTCGATCAA
+ATGGGGTGGGCTGCCGTAATGGTAGGGATCGCGCTCGGCCTGCGTCAGTTTATTCAACAA
+GGTCTGGGCATTTTTGGCGGCGCCATCGCCGATCGCTTTGGCGCGAAACCGATGATTGTC
+ACCGGTATGCTGATGCGCGCCGCAGGCTTTGCCACCATGGGTATCGCGCATGAGCCCTGG
+CTCTTGTGGTTTTCCTGCTTTCTTTCCGGTCTCGGCGGTACGCTTTTCGACCCGCCGCGT
+TCAGCGCTGGTGGTCAAATTAATTCGTCCGGAGCAACGGGGCCGCTTCTTCTCTCTGTTG
+ATGATGCAGGACAGCGCGGGCGCGGTGATTGGCGCGCTGCTGGGAAGCTGGTTGCTACAA
+TACGATTTTCGCCTGGTCTGCGCGACGGGCGCTATTTTGTTCATATTATGCGCCCTTTTC
+AACGCATGGCTGCTTCCGGCCTGGAAGCTATCAACGGCCAGAACGCCGGTGCGTGAAGGA
+ATGCGCCGCGTCATGAGCAATAAAAGGTTTGTCACCTACGTGCTGACGCTGGCGGGCTAC
+TATATGCTGGCGGTACAGGTCATGTTAATGCTGCCGATTATGGTAAACGATATCGCCGGT
+TCGCCTGCTGCCGTGAAATGGATGTACGCTATTGAGGCGTGTCTCTCGCTGACGTTGCTC
+TACCCGATTGCCCGCTGGAGCGAAAAGCGTTTTCGGCTGGAGCATCGGCTGATGGCCGGT
+TTGCTCGTCATGTCGCTGAGCATGCTCCCCATCGGGATGGTGGGCAATTTACAGCAGCTT
+TTTACGCTTATTTGCGCTTTCTACATCGGCTCGGTTATCGCCGAACCGGCGCGCGAAACG
+CTCAGCGCGTCGCCCGCGGACGCGAGGGCGCGGGGAAGCTATATGGGCTTTAGCCGTCTG
+GGATTAGCCATTGGCGGCGCGATTAGTTATATCGGCGGCGGCTGGTTGTTTGATATGGGT
+AAAGCGCTTGCGCAGCCTGAACTACCGTGGATGATGCTCGGTATTATCGGCTTTATCACC
+TTTTTGGCTTTAGGCTGGCAATTTAGTCATAAGCGCACGCCGCGCCGGATGCTGGAACCC
+GGCGCCTGA
+>11111_1#11_04135
+ATGTTTGGCTATCGCAGTAACGTGCCAAAAGTGCGCTTAACCACCGATCGTCTGGTGGTA
+CGTTTAGTGCATGAGCGTGATGCCTGGCGTCTGGCCGATTATTACGCGGAAAATCGTCAT
+TTTTTAAAACCCTGGGAACCGGTCCGTGATGAAAGTCATTGTTATCCTTCAGGATGGCAG
+GCGCGTCTGGGAATGATCGGTGAATTTCACAAACAGGGCTCCGCCTTCTATTTCGCGCTA
+CTTGATCCGGAAGAAAAAGAAATTATCGGCGTGGCGAATTTTTCCAATGTGGTGCGCGGT
+TCTTTTCATGCCTGTTATCTGGGCTATTCCATTGCGCAAGAGTGGCAGGGGCAAGGGCTG
+ATGTTTGAAGCCTTAACCGCTGCGATTCGCTATATGCAGCGCACTCAGCATATCCACCGT
+ATCATGGCGAACTATATGCCGCACAACAAACGTAGCGGCGCGTTGCTGGCGCGGCTTGGC
+TTTGAAAAAGAAGGCTATGCGAAAGATTACCTGTTGATTGATGGACAATGGCGCGACCAT
+GTCCTGACGGCGTTAACCACGCCGTTATGGACGCCGGGGCGTTGA
+>11111_1#11_04136
+ATGAAATATGAATTAACCGCCACTGAAGCGCGAGTGATTGGCTGTCTGCTGGAAAAGCAG
+GTGACAACGCCGGAACAGTATCCGCTTTCCGTCAACGGGGTGGTGACAGCCTGTAATCAG
+AAAACCAACCGTGAACCGGTGATGAACCTGACGGAACAAGAGGTACAAGAACAGCTCGAT
+AACCTGGTGAAACGCCACTTTTTGCGTACGGTCAGCGGGTTTGGCAACCGCGTCACCAAA
+TATGAACAGCGCTTCTGTAATTCCGAATTTGGCGATCTGAAACTTAGCGCGGCGGAAGTG
+GCGCTCGTCACTACGTTGCTGCTGCGCGGCGCGCAAACGCCCGGCGAGTTGCGTAGCCGG
+GCGTCGCGGATGCATGAATTCAGCGATATGGCGGAAGTTGAATCCACGCTGGAACGGCTT
+GCCAGTCGTGAGGACGGCCCGTATGTCGTCCGTCTGGCGCGTGAACCGGGTAAGCGCGAA
+AGCCGCTATATGCACCTTTTTTGCGGCGACGTCGATGAACTGTCTCTCCAGACGTCTGCG
+CCGGAAAGTGCGTCGGGCGATCTTCAGTCGCGCGTCGAAGCGCTGGAAAGCGAAGTGGCG
+GAGTTAAAGCAGCGGCTGGATTCTTTGTTAGCTCACCTGGGAGAGTAA
+>11111_1#11_04137
+GTGAGAACATTACGGATTGGCATTGTCGTGTTAGGTGGTATTGCGCAGAAGGCCTGGCTG
+CCGGTATTAACCAACACCGCCGGATGGACGTTACAGGGCGCCTGGTCTCCTTCGCGGGAT
+AAAGCCTTACGTATTTGCGAAAGCTGGCGCATACCGTATGTGGATTCGCTGGCGAATTTA
+GCGTCCGGCTGCGATGCGGTCTTCGTCCACTCCAGTACCGCAAGCCATTATGCCGTGGTC
+AGCGAACTTCTCAACGCTGGCGTCCATGTCTGCGTGGATAAACCGCTGGCGGAAAATCTA
+CGTGATGCCGAACGGCTGGTGGCGCTGGCGGCGCAAAAAAAATTGACGCTGATGGTTGGC
+TTTAATCGCCGTTTCGCGCCGCTGTACCGCGAACTGAAGACGCGCCTCGGCACTGCGGCG
+TCACTGCGTATGGATAAACATCGTACCGATAGCATCGGGCCGCATGACTTACGTTTTACT
+TTGCTCGATGACTATCTGCATGTCGTGGATACCGTTCTGTGGCTGGCGGGCGGCGAGGCG
+CGCCTTGCCAGCGGCACGTTGCTCACCAGCGAGTCCGGCGAAATGTGCTATGCGGAACAT
+CATTTTTCCGCCGACAAATTACAAATTACCACCAGTATGCACCGGCGCGCCGGAAGTCAG
+CGTGAATCGGTCCAGGCCGTCACCGATGGCGGGCTGTATGACGTGACGGATATGCGTGAA
+TGGCGCGAAGAGCGCGGGCAGGGTATTCTCATCAAACCCATTCCGGGTTGGCAAACAACG
+CTTGAGCAGCGTGGTTTTGTCGGATGCGCGCGGCATTTCATTGACTGCGTACAAAATCAG
+ACGGTTCCGGAAACGGCGGGGGAGCAGGCGATTTTGGCCCAGCGCGTCGTGGAGGCGCTG
+TGGCGGGACGCCATCAGCGAATAA
+>11111_1#11_04138
+ATGCAGGAGTTTTACGCCAGGGTCTGGAATACAAAAGAAATGAATTTATTGAAATCGCTG
+GCTGCCGTCAGCTCGATGACTATGTTTTCACGCGTGTTGGGCTTTGCCCGTGATGCGATT
+GTCGCCAGAATTTTTGGCGCAGGGATGGCGACCGACGCCTTTTTTGTGGCGTTTAAACTT
+CCCAATCTACTACGCCGGATCTTTGCCGAAGGCGCTTTTTCTCAGGCCTTTGTGCCTATC
+CTGGCGGAATATAAGAGCAAGCAGGGTGAAGAAGCGACGCGGATCTTTGTCGCTTACGTT
+TCCGGCCTGTTGACGCTGGCATTGGCCGTCGTGACGGTGGCCGGTATGCTGGCCGCCCCG
+TGGGTGATTATGGTAACCGCGCCGGGTTTTGCCGATACTGCGGATAAATTTGCGCTGACG
+ACGCAACTGCTGCGGATTACGTTTCCCTATATTCTGCTGATCTCGCTGGCTTCACTGGTT
+GGCGCCATTCTCAACACCTGGAATCGCTTCTCTATTCCCGCTTTTGCGCCGACATTTCTT
+AATATCAGTATGATCGGTTTTGCATTATTCGCCGCGCCATACTTTAATCCGCCGGTGCTG
+GCGTTAGCCTGGGCAGTCACCGTCGGCGGCGTGCTGCAACTGGTGTATCAACTTCCGTAT
+TTGAAAAAGATCGGTATGCTGGTGCTGCCGCGCATTAACTTTCACGACACCGGGGCGATG
+CGGGTGGTCAAACAGATGGGGCCGGCGATTTTGGGCGTTTCCGTCAGTCAGATCTCCCTT
+ATCATCAATACCATTTTCGCCTCGTTTCTGGCCTCCGGCTCGGTCTCATGGTTGTACTAT
+GCCGATCGGTTGATGGAGTTCCCGTCCGGCGTGCTGGGCGTGGCGTTGGGGACCATCCTG
+TTGCCGTCATTGTCGAAAAGCTTTGCCAGCGGCAATCATGATGAGTACTGCCGCCTGATG
+GACTGGGGGCTGCGTTTGTGCTTTTTACTGGCGTTGCCGAGCGCGGTAGCGCTAGGCATT
+CTGGCGAAGCCGCTGACGGTCTCGCTGTTTCAGTACGGTAAATTCACCGCCTTTGATGCG
+GCGATGACGCAGCGGGCGTTAATCGCCTATTCGGTGGGGCTGATTGGCTTGATCGTCGTA
+AAAGTGCTGGCCCCGGGCTTCTATTCTCGCCAGGATATTAAAACGCCGGTGAAAATCGCC
+ATCGTGACGTTAATCATGACGCAGTTAATGAACCTGGCGTTTATTGGACCGCTGAAACAC
+GCCGGGCTGTCGCTCTCTATTGGTCTGGCGGCATGTCTCAATGCGTCGCTGCTGTACTGG
+CAACTGCGCAAACAGAATATCTTTACGCCACAACCGGGGTGGATGTGGTTCCTGATGCGT
+CTGATCATTTCCGTACTGGTAATGGCCGCCGTGTTGTTCGGCGTGTTGCATATTATGCCG
+GAGTGGTCGCAAGGGTCGATGCTATGGCGTTTGCTGCGTTTGATGGCGGTAGTGATCGCG
+GGTATCGCGGCCTATTTCGCCGCGCTTGCCGTGCTGGGCTTTAAAGTGAAAGAGTTTGTT
+CGCCGGACGGCGTAA
+>11111_1#11_04139
+ATGACTCGTTTGTCAGAAATACTTGACCAGATGACCACCGTCCTGAATGACCTGAAGACG
+GTGATGGACGCCGAGCAACAACAGCTTTCCGTAGGCCAGATTAACGGCAGCCAGCTACAG
+CGTATTACAGAAGAAAAAAGCTCGTTGCTGGCGACGCTGGATTATCTGGAACAACAGCGC
+CGTCTGGAGCAGAACGCGCAGCGTAGCGCAAACGATGACATTGCAGAGCGCTGGCAGGCG
+ATTACCGAAAAAACGCAGCATCTGCGCGACCTCAACCAGCACAACGGCTGGCTGCTGGAA
+GGGCAAATCGAGCGTAATCAACAGGCGCTGGAGGTGCTGAAACCTCACCAGGAGCCGACC
+TTGTACGGGGCTGACGGTCAGACTTCCGTCTCTCACCGCGGCGGTAAAAAAATATCTATC
+TAA
+>11111_1#11_04140
+ATGAGCATTGACCGTACCTCACCTTTGAAACCCGTTAGCACTGTCCAGACGCGCGAAACC
+AGCGACACGCCGGTACAAAAAACGCGTCAGGAAAAAACGTCCGCCGCGACGAGCGCCAGC
+GTAACGTTAAGCGACGCGCAAGCGAAGCTTATGCAGCCAGGCGTCAGCGACATTAATATG
+GAACGCGTCGAAGCATTAAAAACGGCTATCCGTAACGGTGAGTTAAAAATGGATACGGGA
+AAAATAGCAGACTCGCTCATTCGCGAGGCGCAGAGCTACTTACAGAATAAATAA
+>11111_1#11_04141
+ATGCAAACGTTAAAACGAGGATTCGCCGTGGCGGCTTTGCTGTTCAGCCCCCTGACAATG
+GCGCAGGACATCAACGCCCAGCTGACCACGTGGTTTTCCCAGCGTCTGGCTGGCTTCAGC
+GACGAGGTGGTTGTCACGCTCCGTTCACCGCCCAATCTGCTACCGAGCTGCGAGCAGCCA
+GCCTTCAGCATGACGGGCAGCGCGAAGCTGTGGGGCAACGTCAATGTGGTGGCGCGCTGC
+GCCAATGAAAAACGTTATTTGCAGGTGAATGTGCAAGCGACCGGCAATTATGTCGCCGTA
+GCCGCGCCCGTCGCGCGCGGCGGAAAATTGACGCCGGCCAACGTCACGCTAAAACGTGGC
+AGACTGGATCAGTTACCGCCGCGGACGGTACTGGATATCCGTCAGATTCAGGATGCCGTC
+AGTTTGCGCGATCTCGCTCCCGGGCAGCCGGTACAGCTTACGATGATACGTCAGGCCTGG
+CGTGTCAAAGCGGGTCAACGAGTACAGGTCATCGCCAATGGCGAGGGGTTTAGCGTCAAT
+GCGGAAGGTCAGGCGATGAATAATGCCGCCGTCGCGCAAAATGCGCGCGTGCGCATGACA
+TCGGGTCAAATCGTGAGTGGAACCGTCGATTCTGATGGGAATATTCTTATTAACCTATAA
+>11111_1#11_04142
+ATGCTCGACAGGCTCGATGCCGCCTTACGATTTCAGCAGGAAGCGCTAAATCTGCGCGCG
+CAACGTCAGGAAATATTAGCGGCGAATATCGCCAATGCCGATACGCCGGGGTATCAGGCG
+CGCGATATTGATTTTGCCAGTGAGTTAAAAAAAGTGATGGTGCGCGGACGGGAAGAAACC
+GGCGGCGTCGCGTTGTCGTTGACTTCTTCTCGCCATATTCCCGCCCAGGCGGTCTCTTCT
+CCCGCAGTGGATCTGCTTTACCGCGTACCCGATCAGCCTTCTTTGGATGGTAACACCGTA
+GATATGGACAGGGAACGTACGCAGTTTGCGGATAACAGTCTCAAATATCAGATGGGGCTT
+ACCGTTCTGGGTAGCCAACTCAAAGGCATGATGAATGTGCTACAGGGAGGAAACTAA
+>11111_1#11_04143
+GTGGCGCTGTTAAACATTTTTGATATTGCCGGATCGGCGCTTGCCGCACAGTCCAAGCGG
+TTGAACGTTGCGGCCAGTAACCTTGCGAATGCGGATAGCGTCACCGGCCCGGACGGACAG
+CCTTATCGCGCCAAACAGGTGGTTTTTCAGGTGGACGCCGCGCCGGGTCAAGCCACTGGC
+GGGGTAAAGGTCGCCAGCGTGATTGAAAGTCAGGCACCGGAAAAGCTGGTTTATGAGCCA
+GGCAATCCGCTGGCGGACGCTAATGGTTACGTCAAAATGCCCAACGTCGATGTGGTCGGC
+GAAATGGTCAACACGATGTCAGCCTCGCGCAGCTATCAGGCAAATATCGAAGTCCTGAAT
+ACCGTAAAAAGCATGATGCTTAAAACGCTGACATTAGGCCAGTAA
+>11111_1#11_04144
+ATGTCTATTGCCGTAAATATGAATGACCCGACCAACACGGGCGTCAAAACGACGACCGGC
+AGCGGGTCGATGACCGGAAGCAACGCTGCCGATCTGCAAAGCAGTTTCCTGACCTTACTG
+GTCGCGCAATTGAAGAACCAGGACCCGACTAACCCATTACAAAATAATGAGTTAACGACA
+CAGTTGGCGCAAATCAGTACCGTGAGCGGCATTGAAAAACTGAATACGACGCTGGGGGCT
+ATTTCCGGGCAAATCGATAATAGTCAGTCCCTACAGGCGACCACGCTGATTGGACATGGC
+GTTATGGTGCCTGGCACCACAATTCTGGCGGGTAAAGGCGCGGAAGAAGGGGCCGTGACG
+TCCACGACGCCGTTTGGCGTGGAATTGCAACAGCCTGCGGACAAAGTGACGGCAACCATT
+ACCGATAAAGATGGCCGGGTGGTACGGACGCTGGAGATCGGTGAGTTGCGAGCCGGGGTA
+CACACCTTTACCTGGGATGGTAAGCAAACGGACGGAACAACGGTACCGAATGGTTCTTAC
+AACATTGCGATTACCGCCAGCAATGGCGGGACGCAACTGGTGGCGCAGCCGCTGCAATTC
+GCTCTGGTACAGGGCGTGACGAAGGGCAGTAACGGCAACCTGTTGGATCTGGGTACCTAC
+GGCACCACCACACTCGACGAAGTTCGGCAAATAATCTAA
+>11111_1#11_04145
+ATGTCTTTTTCTCAAGCGGTTAGCGGCCTGAACGCTGCGGCCACCAACCTTGATGTTATC
+GGTAATAACATCGCCAACTCCGCCACCTATGGCTTTAAGTCCGGTACGGCATCATTTGCC
+GATATGTTCGCCGGTTCCAAAGTGGGGTTGGGCGTAAAAGTGGCGGGGATTACCCAGGAT
+TTTACCGACGGTACGACAACGAACACCGGGCGCGGGCTGGATGTCGCGATTAGCCAGAAC
+GGTTTTTTCCGCCTGGTAGACAGCAACGGTTCCGTGTTCTATAGCCGCAACGGCCAGTTC
+AAACTGGACGAGAACCGTAACCTGGTCAATATGCAGGGGATGCAGTTGACCGGCTATCCG
+GCCACCGGTACGCCGCCGACCATTCAGCAGGGGGCGAATCCTGCGCCGATCACCATTCCG
+AACACGCTGATGGCGGCGAAATCGACCACCACCGCGTCAATGCAGATCAACCTGAACTCA
+ACGGACCCTGTACCGTCTAAAACGCCCTTTAGCGTGAGTGATGCGGATTCGTATAACAAA
+AAAGGCACCGTCACCGTTTATGACAGCCAGGGTAATGCCCATGACATGAACGTCTATTTT
+GTGAAAACCAAAGATAATGAATGGGCTGTGTACACCCATGACAGCAGCGATCCTGCAGCC
+ACTGCGCCAACAACGGCGTCCACTACGCTGAAATTCAATGAAAACGGGATTCTGGAGTCT
+GGCGGTACGGTGAACATCACCACCGGTACGATTAATGGCGCGACAGCGGCCACCTTCTCC
+CTCAGCTTCCTTAACTCCATGCAGCAGAACACCGGGGCTAATAACATCGTCGCCACCAAT
+CAAAACGGCTATAAGCCTGGCGACCTGGTGAGCTACCAGATTAACAATGATGGCACCGTG
+GTTGGCAACTACTCCAACGAGCAGGAGCAGGTGCTGGGGCAGATTGTGCTGGCTAACTTC
+GCCAACAACGAAGGTCTGGCATCCCAGGGCGATAACGTCTGGGCGGCGACGCAGGCCTCC
+GGGGTAGCGCTGCTGGGGACTGCCGGTTCCGGCAACTTCGGTAAGCTGACGAACGGCGCG
+CTGGAAGCCTCTAACGTGGATTTGAGTAAAGAGCTGGTGAATATGATCGTCGCGCAGCGT
+AACTACCAGTCGAATGCGCAGACCATCAAAACCCAGGACCAGATCCTCAATACGCTGGTT
+AACCTGCGCTAA
+>11111_1#11_04146
+ATGGATCACGCAATTTATACCGCCATGGGGGCGGCCAGCCAGACGCTTAACCAGCAGGCG
+GTAACGGCCAGCAACCTGGCTAATGCCTCAACGCCGGGCTTTCGCGCGCAGCTTAACGCG
+CTACGCGCGGTGCCCGTTGATGGCCTCTCTTTAGCGACGCGCACGTTGGTTACGGCGTCG
+ACGCCGGGGGCGGATATGACCCAGGGTCAGTTGGACTACACTTCCCGCCCGCTGGATGTT
+GCGTTACAGCAGGACGGCTGGCTGGTGGTGCAAGCGGCGGATGGCGCTGAAGGATATACC
+CGTAACGGGAATATCCAGGTGGGCCCGACCGGGCAGTTAACCATTCAGGGACATCCGGTT
+ATCGGCGAAGGCGGCCCGATTACCGTTCCGGAAGGGTCGGAAATCACCATTGCGGCAGAC
+GGCACGATCTCCGCGCTCAATCCCGGCGACCCGCCAAACACGGTGGCGCCCGTTGGGCGG
+CTGAAGCTGGTCAAAGCGGAAGGCAATGAGGTGCAGCGGAGCGATGACGGTTTATTCCGC
+CTTACCGCCGAGGCACAGGCTGAACGCGGGGCGGTACTGGCCGCCGACCCGTCAATTCGC
+ATTATGTCGGGCGTGCTGGAGGGCAGTAACGTCAAGCCGGTTGAAGCCATGACCGACATG
+ATCGCCAACGCACGTCGTTTTGAAATGCAGATGAAGGTTATCACCAGCGTAGATGAGAAC
+GAAGGGCGAGCTAACCAACTGCTGTCGATGAGTTAA
+>11111_1#11_04147
+ATGATCAGTTCATTATGGATCGCCAAAACCGGTCTGGACGCGCAGCAAACCAATATGGAT
+GTGATTGCCAATAACCTGGCAAACGTCAGCACCAATGGTTTTAAGCGTCAGCGCGCGGTA
+TTTGAAGATCTGTTGTATCAGACCATCCGCCAGCCGGGCGCGCAGTCGTCCGAGCAGACG
+ACGCTGCCTTCCGGGCTGCAAATCGGTACCGGCGTGCGTCCGGTCGCCACGGAGCGCCTG
+CACAGTCAGGGGAACCTGTCGCAGACCAACAACAGTAAAGATGTGGCGATTAAAGGGCAG
+GGCTTTTTCCAGGTCATGCTGCCGGACGGTACGTCTGCCTATACCCGCGACGGCTCTTTC
+CAGGTGGATCAGAATGGTCAACTGGTGACGGCGGGCGGTTTTCAGGTGCAGCCGGCAATC
+ACCATTCCGGCCAACGCGTTAAGCATCACGATTGGCCGCGACGGCGTGGTCAGCGTTACC
+CAGCAAGGGCAGGCCGCGCCGGTTCAGGTCGGGCAGCTTAACCTGACCACCTTTATGAAC
+GACACCGGTCTGGAAAGCATCGGCGAGAACCTCTATATCGAAACGCAATCGTCCGGCGCG
+CCGAACGAAAGCACGCCGGGGCTCAACGGCGCGGGGTTGTTGTATCAAGGGTATGTCGAA
+ACGTCGAACGTTAACGTGGCGGAAGAGCTGGTGAACATGATTCAGGTTCAACGCGCCTAT
+GAAATTAACAGTAAAGCAGTATCGACGACCGATCAGATGCTGCAGAAACTGACGCAACTC
+TAA
+>11111_1#11_04148
+ATGGCCCTGATGGTCGCGACGCTGACAGGATGCGCCTGGATACCCGCTAAACCGCTCGTG
+CAGGGGGCGACCACGGCGCAGCCGATACCTGGCCCGGTACCGGTGGCGAATGGCTCCATA
+TTTCAGTCTGCGCAGCCGATTAATTATGGCTATCAGCCGCTTTTTGAAGATCGTCGACCG
+CGTAATATCGGCGATACGCTCACGATTGTGTTACAGGAAAACGTCAGCGCCAGTAAAAGC
+TCGTCGGCAAATGCCAGCCGCGACGGCAAAACCAGCTTTGGTTTTGATACGGTACCGCGT
+TATCTGCAGGGATTATTCGGTAATTCCCGCGCGGATATGGAGGCCTCCGGCGGCAACTCT
+TTTAATGGTAAAGGCGGCGCGAATGCCAGCAATACCTTTAGCGGCACGCTGACCGTGACC
+GTCGATCAGGTTCTGGCCAATGGCAATTTACACGTCGTGGGGGAAAAACAGATCGCGATT
+AATCAGGGAACGGAATTCATCCGCTTCTCCGGCGTGGTAAATCCACGCACCATCAGCGGT
+AGCAACTCTGTTCCCTCGACACAGGTGGCGGATGCGCGGATTGAATATGTCGGGAACGGC
+TATATTAACGAAGCGCAAAATATGGGCTGGCTGCAACGTTTCTTCCTTAATTTGTCGCCG
+ATGTAA
+>11111_1#11_04149
+GTGTTTAAAGCTCTTGCAGGAATCGTTCTGGCACTGGTTGCCACTCTGGCGCACGCCGAG
+CGTATCCGGGATCTGACCAGTGTCCAGGGAGTACGGGAAAACTCGCTGATCGGCTACGGG
+CTGGTGGTCGGGCTGGACGGTACGGGCGACCAGACGACCCAGACGCCATTTACCACCCAG
+ACGCTGAATAACATGCTGTCACAACTGGGGATTACGGTCCCCACCGGCACCAATATGCAG
+TTGAAAAACGTGGCGGCGGTGATGGTGACGGCGTCGTATCCGCCTTTTGCGCGACAGGGA
+CAAACGATCGATGTCGTCGTTTCCTCAATGGGGAACGCTAAAAGTCTGCGTGGCGGGACG
+TTATTAATGACGCCGTTAAAAGGGGTGGACAGCCAGGTGTATGCTCTGGCGCAGGGCAAT
+ATTCTGGTCGGCGGCGCGGGCGCTTCCGCAGGCGGCAGTAGCGTGCAGGTTAACCAGCTT
+AATGGCGGGCGCATCACTAATGGCGCGATTATCGAACGCGAGTTGCCGACTCAGTTCGGC
+GCTGGCAACACCATTAATCTGCAATTGAACGACGAAGATTTTACGATGGCGCAGCAAATT
+ACCGACGCCATCAACCGCGCCCGCGGTTACGGCAGCGCCACTGCGCTTGATGCGCGAACG
+GTACAGGTACGCGTGCCCAGCGGCAACAGCTCGCAGGTGCGTTTTCTGGCGGACATTCAA
+AATATGGAAGTCAACGTGACGCCGCAGGATGCAAAAGTCGTGATCAACTCGCGTACCGGT
+TCGGTGGTCATGAATCGGGAAGTCACGCTGGATAGCTGCGCTGTGGCGCAGGGCAATTTG
+TCAGTGACAGTCAATCGCCAACTCAACGTCAACCAGCCGAATACGCCATTTGGCGGCGGG
+CAGACCGTGGTGACGCCACAGACTCAGATAGATTTGCGTCAGAGCGGCGGATCGCTACAG
+AGCGTGCGTTCCAGCGCCAATCTGAACAGCGTAGTGCGCGCGCTGAATGCGCTTGGCGCG
+ACGCCGATGGATCTGATGTCGATTTTGCAGTCCATGCAGAGCGCGGGCTGTCTACGCGCC
+AAACTGGAAATCATCTGA
+>11111_1#11_04150
+ATGATCGGAGACGGTAAATTGCTGGCCAGCGCGGCCTGGGATGCGCAATCTCTGAACGAA
+CTGAAAGCGAAAGCGGGCCAGGACCCGGCGGCGAATATCCGTCCTGTGGCCCGTCAGGTG
+GAAGGGATGTTTGTGCAGATGATGCTGAAAAGTATGCGCGAGGCTTTACCCAAAGATGGT
+TTATTCAGCAGCGATCAGACGCGTCTGTATACCAGCATGTATGACCAGCAGATCGCCCAG
+CAGATGACCGCCGGTAAGGGATTGGGGCTGGCGGATATGATGGTTAAACAGATGACGGGC
+GGGCAGACGATGCCTGCAGATGATGCGCCGCAAGTACCGCTTAAATTCTCCCTGGAGACG
+GTAAACAGCTATCAAAATCAGGCGCTGACCCAACTGGTGCGCAAAGCCATACCGAAAACG
+CCGGACAGCAGCGATGCGCCGCTCTCCGGCGACAGTAAAGACTTTCTGGCCCGGCTTTCG
+CTCCCGGCGAGGCTGGCCAGCGAACAAAGCGGGGTGCCGCATCATCTGATTCTGGCGCAG
+GCGGCGCTGGAGTCCGGCTGGGGGCAGCGGCAAATCCTGCGGGAGAATGGCGAACCCAGC
+TATAACGTATTTGGCGTGAAAGCGACCGCCAGTTGGAAAGGGCCGGTGACGGAAATCACC
+ACCACTGAATACGAAAATGGCGAAGCGAAAAAAGTGAAAGCGAAATTCCGCGTCTATAGC
+TCGTATCTGGAGGCGTTATCGGATTATGTCGCGCTGTTAACGCGTAACCCACGCTACGCT
+GCCGTGACCACTGCCGCCACGGCAGAGCAGGGCGCAGTGGCTCTGCAAAACGCCGGATAC
+GCCACTGACCCGAATTACGCGCGTAAATTGGCCAGCATGATTCAGCAGTTGAAAGCGATG
+AGTGAAAAGGTCAGCAAAACCTACAGCGCGAATCTCGACAATCTCTTTTAA
+>11111_1#11_04151
+ATGTCCAGCTTGATTAATCACGCCATGAGCGGACTTAACGCCGCGCAGGCCGCGTTAAAT
+ACGGTCAGTAATAACATCAACAATTATAACGTTGCGGGTTATACCCGGCAGACAACTATT
+CTGGCGCAGGCAAACAGTACGTTAGGGGCTGGCGGCTGGATAGGTAATGGCGTTTACGTT
+TCAGGCGTACAGCGCGAATATGATGCGTTTATCACTAATCAGCTACGCGGCGCGCAAAAC
+CAGAGCAGCGGCTTAACCACGCGCTATGAACAAATGTCGAAAATCGACAACCTGCTGGCC
+GATAAATCCAGCTCACTGTCTGGCTCGCTGCAGAGTTTTTTTACCAGCCTGCAAACGTTA
+GTCAGTAATGCGGAAGATCCTGCGGCGCGTCAGGCGCTGATTGGTAAAGCGGAAGGGCTG
+GTAAACCAGTTCAAAACCACCGATCAGTATCTGCGCGATCAGGATAAACAGGTCAATATC
+GCGATTGGCTCCAGCGTGGCGCAAATCAACAATTACGCGAAGCAGATAGCTAACCTGAAC
+GATCAAATCTCCCGTATGACGGGCGTAGGCGCGGGCGCATCGCCGAACGACCTGCTCGAT
+CAACGTGATCAGTTGGTTAGCGAGCTTAACAAGATCGTTGGCGTCGAGGTGAGTGTACAG
+GACGGCGGCACCTATAACCTGACGATGGCCAATGGCTATACGCTGGTGCAGGGGTCGACG
+GCGCGTCAGTTGGCGGCGGTTCCCTCCAGCGCCGACCCGACGCGAACGACTGTCGCTTAT
+GTCGATGAGGCCGCCGGTAACATCGAAATTCCGGAAAAGTTGCTGAACACCGGTTCGCTC
+GGCGGGCTACTGACGTTCCGTTCTCAGGATCTGGATCAGACTCGTAATACGCTGGGCCAG
+TTGGCGTTGGCGTTTGCCGATGCGTTTAACGCGCAGCATACCAAAGGTTATGACGCCGAC
+GGCAATAAAGGGAAAGACTTCTTTAGCATTGGCTCGCCGGTGGTATATAGCAACAGTAAT
+AATGCCGATAAAACGGTATCGCTAACCGCTAAGGTGGTCGACAGCACGAAGGTTCAGGCG
+ACGGATTATAAGATTGTTTTTGACGGTACAGACTGGCAGGTTACTCGCACTGCGGATAAC
+ACCACCTTCACGGCAACAAAAGATGCTGACGGAAAACTGGAGATTGACGGTCTGAAAGTG
+ACGGTAGGGACTGGCGCACAGAAAAACGACAGTTTTCTTCTCAAGCCGGTCAGCAATGCT
+ATCGTCGACATGAACGTTAAAGTGACAAATGAAGCCGAGATTGCGATGGCGTCTGAGTCA
+AAACTCGATCCTGATGTGGATACCGGCGACAGCGATAACCGCAATGGTCAGGCATTGCTG
+GACTTACAAAACAGCAATGTAGTGGGCGGCAACAAAACCTTTAACGATGCTTACGCCACG
+TTGGTCAGCGATGTGGGTAACAAAACGTCAACGCTGAAAACCAGCAGCACCACGCAGGCG
+AATGTGGTTAAACAGCTTTATAAACAGCAACAGTCGGTTTCCGGCGTTAACCTCGACGAA
+GAGTACGGCAATTTGCAGCGTTATCAGCAGTATTATCTGGCGAATGCGCAAGTATTGCAG
+ACCGCGAATGCGCTGTTTGATGCGTTATTGAATATTCGCTAA
+>11111_1#11_04152
+ATGCGTATCAGTACCCAGATGATGTACGAACAAAATATGAGCGGCATCACTAATTCTCAG
+GCCGAATGGATGAAGCTGGGCGAGCAGATGTCTACCGGTAAGCGCGTTACCAACCCATCT
+GACGATCCGATCGCCGCGTCGCAGGCGGTAGTACTCTTTCAGGCGCAGGCGCAGAATAGC
+CAGTACGCCCTGGCGCGTACGTTTGCCACCCAAAAAGTGTCGCTGGAAGAAAGCGTACTC
+AGTCAGGTGACGACGGCGATTCAAACCGCGCAGGAAAAAATCGTCTATGCCGGAAACGGC
+ACGTTAAGCGACGATGACCGCGCGTCGCTGGCGACGGATTTACAGGGGATCCGCGATCAG
+CTGATGAACCTGGCAAACAGCACTGACGGCAATGGTCGCTATATCTTTGCCGGGTATAAA
+ACGGAAGCGGCGCCATTCGACCAGGCGACAGGTGGTTATCATGGCGGCGAGAAAAGTGTT
+ACCCAGCAGGTGGATTCCGCACGCACGATGGTAATTGGCCATACGGGAGCGCAAATTTTT
+AATAGCATCACCAGCAATGCGGTGCCGGAACCGGATGGCTCGGACTCCGAAAAGAATCTG
+TTTGTCATGCTCGATACGGCAATTGCCGCGCTCAAGACCCCGGTGGAAGGCAATGACGTG
+GAAAAAGAAAAAGCCGCTGCCGCCATTGATAAAACCAATCGCGGCTTAAAAAATTCGCTT
+AATAACGTCCTGACCGTTCGTGCGGAACTGGGAACGCAACTGAGCGAACTCAGTACGCTG
+GATTCACTGGGAAGCGACCGTGCGCTGGGACAGAAGCTACAGATGAGCAACCTGGTAGAT
+GTGGACTGGAACTCGGTCATTTCCTCCTACGTCATGCAACAGGCGGCATTACAGGCGTCC
+TATAAAACGTTTACCGACATGCAGGGAATGTCGCTTTTCCAGTTGAACCGGTAA
+>22222_2#22_04112
+ATGAAACTTTTAAGAGTGGCAGCATTCGCAGCAATCGTAGTTTCTGGCAGTGCTCTGGCT
+GGCGTCGTTCCACGATGGGGCGGCGGCGGTAATCATAACGGCGGCGGCAATAGTTCCGGG
+CCGGATTCCACGTGGAGCATTTATCAGTACGGTTCCGCTAACGCTGCGCTTGCTCTGCAA
+AGCGATGCCCGTAGATCTGAAACGACCATTACCCAGAGCGGTTATGGTAACGGCGCCGAT
+GTAGGCCAGGGTGGGGATAACAGTACTATTGAACTGACTCAGAATGGTTTCAGAAACAAT
+GCCACCATCGACCGGTGGAACGCTAAAAACTCCGATATTACTGTCGGTCAATACGGCGGT
+AATAACGCCGCGCGGGTTAATCAGACCGCATCTGATTCCAGCGTAATGGTGCGTCAGGTT
+GGTTTTGGCAACAGCGCCACGGCTAACCAGTATTAA
+>22222_2#22_04141
+ATGCAAACGTTAAAACNNGGATTCGCCGTGGCGGCTTTGCTGTTCAGCCCCCTGACAATG
+GCGCAGGACATCAACGNNCAGCTGACCACGTGGTTTTCCCAGCGTCTGGCTGGCTTCAGC
+GACGAGGTGGTTGTCANNCTCCGTTCACCGCCCAATCTGCTACCGAGCTGCGAGCAGCCA
+GCCTTCAGCATGACGGNNAGCGCGAAGCTGTGGGGCAACGTCAATGTGGTGGCGCGCTGC
+GCCAATGAAAAACGTTNNTTGCAGGTGAATGTGCAAGCGACCGGCAATTATGTCGCCGTA
+GCCGCGCCCGTCGCGCNNGGCGGAAAATTGACGCCGGCCAACGTCACGCTAAAACGTGGC
+AGACTGGATCAGTTACNNCCGCGGACGGTACTGGATATCCGTCAGATTCAGGATGCCGTC
+AGTTTGCGCGATCTCGNNCCCGGGCAGCCGGTACAGCTTACGATGATACGTCAGGCCTGG
+CGTGTCAAAGCGGGTCNNCGAGTACAGGTCATCGCCAATGGCGAGGGGTTTAGCGTCAAT
+GCGGAAGGTCAGGCGANNAATAATGCCGCCGTCGCGCAAAATGCGCGCGTGCGCATGACA
+TCGGGTCAAATCGTGANNGGAACCGTCGATTCTGATGGGAATATTCTTATTAACCTATAA
diff --git a/t/data/expected_prank_input.fa.aln b/t/data/expected_prank_input.fa.aln
new file mode 100644
index 0000000..d500851
--- /dev/null
+++ b/t/data/expected_prank_input.fa.aln
@@ -0,0 +1,66 @@
+>1111#5_04506
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+---------ATGGCAATCACCTTACGGGAGCTGGATGGCCTGAGCTATGAAGAGATAGCG
+GCTATCATGGATTGTCCGGTGGGGACGGTGCGTTCACGTATCTTCCGGGCGCGGGAAGCT
+ATTGATAATAAAGTTCAACCGCTTATCAGGCGT
+>1234_8#75_04759
+ATGAGCGAGCAGTTAACGGACCAGGTCCTGGTTGAACGGGTCCAGAAGGGAGATCAGAAA
+GCCTTTAACTTACTGGTAGTGCGCTACCAGCATAAAGTGGCGAGTCTGGTTTCCCGCTAT
+GTGCCATCGGGCGACGTTCCCGATGTCGTACAGGAATCATTTATTAAGGCCTATCGCGCG
+CTGGATTCTTTCCGGCGGGATAGTGCTTTTTATACCTGGTTGTATCGTATTGCGGTCAAT
+ACCGCGAAGAACTACCTGGTTGCGCAGGGGCGTCGTCCGCCTTCCAGTGATGTAGACGCG
+ATTGAAGCAGAAAACTTTGAAAGCGGCGGCGCGCTGAAAGAAATTTCGAACCCTGAGAAC
+TTAATGTTGTCAGAAGAACTGAGACAGATAGTTTTCCGAACTATTGAGTCCCTCCCGGAA
+GATTTACGTATGGCAATCACCTTACGGGAGCTGGATGGCCTGAGCTATGAAGAGATAGCG
+GCTATCATGGATTGTCCGGTGGGGACGGTGCGTTCACGTATCTTCCGGGCGCGGGAAGCT
+ATTGATAATAAAGTTCAACCGCTTATCAGGCGT
+>Salmonella_enterica_subsp_enterica_serovar_Typhimurium_DT104_v1_02853
+ATGAGCGAGCAGTTAACGGACCAGGTCCTGGTTGAACGGGTCCAGAAGGGAGATCAGAAA
+GCCTTTAACTTACTGGTAGTGCGCTACCAGCATAAAGTGGCGAGTCTGGTTTCCCGCTAT
+GTGCCATCGGGCGACGTTCCCGATGTCGTACAGGAATCATTTATTAAGGCCTATCGCGCG
+CTGGATTCTTTCCGGGGGGATAGTGCTTTTTATACCTGGTTGTATCGTATTGCGGTCAAT
+ACCGCGAAGAACTACCTGGTTGCGCAGGGGCGTCGTCCGCCTTCCAGTGATGTAGACGCG
+ATTGAAGCAGAAAACTTTGAAAGCGGCGGCGCGCTGAAAGAAATTTCGAACCCTGAGAAC
+TTAATGTTTTCAGAAGAACTGAGACAGATAGTTTTCCGAACTATTGAGTCCCTCCCGGAA
+GATTTACGTATGGCAATCACCTTACGGGAGCTGGATGGCCTGAGCTATGAAGAGATAGCG
+GCTATCATGGATTGTCCGGTGGGGACGGTGCGTTCACGTATCTTCCGGGCGCGGGAAGCT
+ATTGATAATAAAGTTCAACCGCTTATCAGGCGT
+>Salmonella_enterica_subsp_enterica_serovar_Typhimurium_SL1344_v2_02736
+ATGAGCGAGCAGTTAACGGACCAGGTCCTGGTTGAACGGGTCCAGAAGGGAGATCAGAAA
+GCCTTTAACTTACTGGTAGTGCGCTACCAGCATAAAGTGGCGAGTCTGGTTTCCCGCTAT
+GTGCCATCGGGCGACGTTCCCGATGTCGTACAGGAATCATTTATTAAGGCCTATCGCGCG
+CTGGATTCTTTCCGGGGGGATAGTGCTTTTTATACCTGGTTGTATCGTATTGCGGTCAAT
+ACCGCGAAGAACTACCTGGTTGCGCAGGGGCGTCGTCCGCCTTCCAGTGATGTAGACGCG
+ATTGAAGCAGAAAACTTTGAAAGCGGCGGCGCGCTGAAAGAAATTTCGAACCCTGAGAAC
+TTAATGTTGTCAGAAGAACTGAGACAGATAGTTTTCCGAACTATTGAGTCCCTCCCGGAA
+GATTTACGTATGGCAATCACCTTACGGGAGCTGGATGGCCTGAGCTGTGAAGAGATAGCG
+GCTATCATGGATTGTCCGGTGGGGACGGTGCGTTCACGTATCTTCCGGGCGCGGGAAGCT
+ATTGATAATAAAGTTCAACCGCTTATCAGGCGT
+>Salmonella_enterica_subsp_enterica_serovar_Typhimurium_str_D23580_v1_02783
+ATGAGCGAGCAGTTAACGGACCAGGTCCTGGTTGAACGGGTCCAGAAGGGAGATCAGAAA
+GCCTTTAACTTACTGGTAGTGCGCTACCAGCATAAAGTGGCGAGTCTGGTTTCCCGCTAT
+GTGCCATCGGGCGACGTTCCCGATGTCGTACAGGAATCATTTATTAAGGCCTATCGCGCG
+CTGGATTCTTTCCGGGGGGATAGTGCTTTTTATACCTGGTTGTATCGTATTGCGGTCAAT
+ACCGCGAAGAACTACCTGGTTGCGCAGGGGCGTCGTCCGCCTTCCAGTGATGTAGACGCG
+ATTGAAGCAGAAAACTTTGAAAGCGGCGGCGCGCTGAAAGAAATTTCGAACCCTGAGAAC
+TTAATGTTGTCAGAAGAACTGAGACAGATAGTTTTCCGAACTATTGAGTCCCTCCCGGAA
+GATTTACGTATGGCAATCACCTTACGGGAGCTGGATGGCCTGAGCTATGAAGAGATAGCG
+GCTATCATGGATTGTCCGGTGGGGACGGTGCGTTCACGTATCTTCCGGGCGCGGGAAGCT
+ATTGATAATAAAGTTCAACCGCTTATCAGGCGT
+>Salmonella_enterica_subsp_enterica_serovar_Typhimurium_str_DT2_v1_02741
+ATGAGCGAGCAGTTAACGGAC---GTCCTGGTTGAACGGGTCCAGAAGGGAGATCAGAAA
+GCCTTTAACTTACTGGTAGTGCGCTACCAGCATAAAGTGGCGAGTCTGGTTTCCCGCTAT
+GTGCCATCGGGCGACGTTCCCGATGTCGTACAGGAATCATTTATTAAGGCCTATCGCGCG
+CTGGATTCTTTCCGGGGGGATAGTGCTTTTTATACCTGGTTGTATCGTATTGCGGTCAAT
+ACCGCGAAGAACTACCTGGTTGCGCAGGGGCGTCGTCCGCCTTCCAGTGATGTAGACGCG
+ATTGAAGCAGAAAACTTTGAAAGCGGCGGCGCGCTGAAAGAAATTTCGAACCCTGAGAAC
+TTAATGTTGTCAGAAGAACTGAGACAGATAGTTTTCCGAACTATTGAGTCCCTCCCGGAA
+GATTTACGTATGGCAATCACCTTACGGGAGCTGGATGGCCTGAGCTATGAAGAGATAGCG
+GCTATCATGGATTGTCCGGTGGGGACGGTGCGTTCACGTATCTTCCGGGCGCGGGAAGCT
+ATTGATAATAAAGTTCAACCGCTTATCAGGCGT
diff --git a/t/data/expected_query_1.fna b/t/data/expected_query_1.fna
new file mode 100644
index 0000000..70ca9a0
--- /dev/null
+++ b/t/data/expected_query_1.fna
@@ -0,0 +1,252 @@
+>abc|SC|contig000001
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA
+TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT
+CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC
+TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC
+TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT
+AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC
+AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC
+ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT
+AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC
+AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT
+TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA
+CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC
+AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC
+TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG
+TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT
+TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT
+AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT
+GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT
+CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG
+CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG
+ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA
+AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA
+AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT
+GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT
+AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT
+AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT
+AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT
+CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA
+GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT
+AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA
+AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA
+AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA
+GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA
+TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA
+TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA
+ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT
+ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG
+GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG
+CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA
+ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA
+TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA
+GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT
+TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA
+CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC
+TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT
+TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT
+TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA
+GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG
+TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT
+TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG
+CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC
+CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT
+TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT
+AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG
+TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA
+TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA
+TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC
+ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC
+TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT
+TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC
+TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC
+ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA
+AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC
+TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA
+CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT
+GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA
+AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT
+GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA
+TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT
+AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT
+CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA
+TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT
+TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC
+TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT
+AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT
+TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC
+AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC
+CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT
+TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT
+GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT
+TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC
+GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT
+ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC
+GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG
+TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT
+TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA
+AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG
+TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT
+TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA
+CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA
+TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT
+GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA
+GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT
+GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA
+GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA
+TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA
+AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC
+ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT
+GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA
+CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG
+CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA
+TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA
+TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT
+GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG
+TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC
+ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA
+GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT
+TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG
+CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA
+AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT
+TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA
+TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT
+AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG
+TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA
+AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC
+AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC
+AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT
+ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT
+AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA
+GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA
+TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA
+TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA
+AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT
+TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG
+TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA
+TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT
+TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA
+TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG
+TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG
+GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG
+TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT
+AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC
+ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT
+TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG
+TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA
+AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT
+TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG
+GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC
+AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT
+AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC
+TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC
+ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT
+CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT
+CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA
+ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA
+AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT
+GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA
+ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA
+GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT
+AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT
+TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT
+GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG
+TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT
+AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC
+TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA
+GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT
+TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG
+AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC
+ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA
+CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA
+CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT
+AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA
+AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA
+AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG
+TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC
+ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT
+TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC
+AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA
+AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT
+CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT
+CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT
+AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA
+AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA
+ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC
+AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC
+TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT
+GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT
+ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA
+AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA
+TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA
+TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC
+CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA
+AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG
+AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA
+AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC
+AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA
+AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA
+TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT
+TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC
+GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA
+CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC
+GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG
+TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT
+TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT
+TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA
+ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT
+GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC
+GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA
+TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG
+GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC
+AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT
+TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT
+GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT
+AAGATTTAGATACATAGACAATATAGGAGATGGGGAAATTGGGATATAAAAATATTTTGA
+TAGACTTTGATGATACAATTGTTGATTTTTATGATGCAGAAGAATGGGCGTTTCACTATA
+TGGCGAATGTTTTTAATCATAAAGCAACAAAGGATGATTTTTTAACATTTAAAAAAATCA
+ATCACCAACATTGGGAAGCTTTTCAACAAAATAAATTAACGAAGTCTGAAGTATTATCAG
+AACGATTTGTGAATTACTTCAAACATCATCAAATGGAAGTTGATGGGCATCGTGCAGATG
+TGTTATTTAGAAATGGATTAGCAGAAGCTAAAGTTAAATACTTTGATCAAACATTAGAAA
+CAATTGTCGAATTATCGAAAAGACATGATTTATATATTGTTACTAATGGTGTAACCGAAA
+CGCAAAAGAGAAGGTTAAATCAGACGCCGTTGCATAAATATATTAAAAAGATATTTATAT
+CTGAGGAAACAGGATATCAAAAACCTAATCCGGAATTTTTTAATTATGTTTTTAATGATA
+TTGGTGAGGATGAAAGACAGCACTCGATTATAGTTGGAGATTCTTTAACATCTGACATTC
+TAGGTGGAATCAATGCGGGTATAGCTACTTGCTGGTTTAATTTTAGAGGATTTGATCATA
+ATCCAGGAATTATACCTGATTATGAAATTAATTCATGGAAACAACTAAATGATATTGTAC
+GTTAAGACAAATAATTTGAAAATGATTTATATTAGCTTAAGAAAGTAATTGTAAAAACGA
+CACTACGAAAAGATGCATTTTCATGCTCGTAGTGTCGAATATTATTATCTTATGGTTTCC
+ACAATAAGACATCATGTCCCTCTATATTTTTAGCTTCCACATCTGTAAATCCATGATGTA
+AAAAGAAATCCTTAGAATCATTTCTGCCAATGGCTTTAATTGGCATATTGAAACTCTTTG
+CAAAATCAATCAATTCTGAAGCGTACCCTCTGTTTTGATATTTTGGTAATACTTCTAACT
+TCCATAATAATATATAATCTTCAAAATCTGGGAAGTAGATTTCTTCGACATCACCTTTTT
+TTAGTAATGCCATTCTAGCTCCTAATTGATCTCCGACAAATATGCCATAAAATGGTGAAT
+CTGAACTTGCATCAATCATTTGACCGTTTAACTCATTGACCATGTATAAGTCTTTGTTGC
+CAAACGCTCTAAAGTTTTCGAATAATTCGTCAGTTTTGTAATTAATTTCAAGACGTTTGA
+TTTCACTCATATTTATCTACCCCTTATTTGTTTACTACTATTATTATACATTAAACCACT
+TGTGTTTTTCATTGATTTGAAAATGTAAAACAATGAACATTATAAATTTTCCATGATTAA
+ATTGATTTTCAAGCCGTAAAATTGTCAATCACCGGTATAGACTTTATACTATAGTTTGTA
+ATCGCGTACTTAAGGAGAAAAAACATGGACTGTAAAGTAGTTAGTTTAAATGAAAAAGAT
+CAGTTTATACCAAAAATAAAGAGCAGTGACCCTGTAATAACAGGATTATTTCAATATGAT
+GCAGCTCAACAAACTAGTTTTGAAAAAAGGATGTCTAAAGAAAATAATGGAAGAGAAGCG
+GCATTAGCGAATGTTATTCGTGAATATATGAGTGATTTAAAGCTTTCAAATGAACAAGAA
+TTAAACATACAACATTTAGCTAATGGTTCAAAAGTTGTGATTGGTGGACAACAAGCAGGG
+CTTTTCGGGGGACCATTGTATACATTCCATAAAATATTTTCAATCATTACTTTATCTAAG
+GAATTAACGGATACACATAAGCAACAAGTAGTACCAGTTTTTTGGATTGCAGGAGAAGAT
+CATGATTTCGATGAAGTGAATCATACATTTGTTTATAACGAAAATCATGGGTCGCTGCAT
+AAGGTTAAATATCATACAATGGAGATGCCAGAGACGACAGTCTCTAGATATTATCCTGAT
+AAGGCTGAGTTGAAACAAACTTTAAAAACGATGTTCATTCATATGAAAGAAACTGTTCAT
+ACACAAGGTCTACTGGAGATTTGTGACAGAATTATTGACCAATATGACTCGTGGACTGAT
+ATGTTTAAAGCACTACTGCATGAAACATTTAAAGCATATGGCGTTCTATTTATAGATGCG
+CAGTTTGAGCCGTTAAGAAAAATGGAAGCGCCTATGTTTAAAAAGATTTTGAAAAAACAT
+CAGTTGCTTGATGATGCTTTTAGAGCAACACAACAACGTACTCAAAATCAAGGCTTGAAT
+GCGATGATACAAACAGATACAAATGTTCATTTATTCTTACATGATGAAAATATGCGCCAA
+TTAGTTTCGTATGATGGTAAGCATTTTAAATTAAATAAAACAGATAAGACATATATAAAG
diff --git a/t/data/expected_query_2.fna b/t/data/expected_query_2.fna
new file mode 100644
index 0000000..70ca9a0
--- /dev/null
+++ b/t/data/expected_query_2.fna
@@ -0,0 +1,252 @@
+>abc|SC|contig000001
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA
+TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT
+CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC
+TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC
+TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT
+AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC
+AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC
+ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT
+AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC
+AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT
+TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA
+CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC
+AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC
+TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG
+TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT
+TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT
+AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT
+GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT
+CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG
+CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG
+ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA
+AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA
+AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT
+GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT
+AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT
+AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT
+AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT
+CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA
+GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT
+AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA
+AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA
+AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA
+GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA
+TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA
+TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA
+ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT
+ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG
+GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG
+CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA
+ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA
+TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA
+GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT
+TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA
+CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC
+TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT
+TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT
+TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA
+GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG
+TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT
+TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG
+CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC
+CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT
+TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT
+AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG
+TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA
+TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA
+TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC
+ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC
+TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT
+TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC
+TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC
+ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA
+AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC
+TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA
+CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT
+GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA
+AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT
+GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA
+TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT
+AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT
+CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA
+TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT
+TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC
+TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT
+AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT
+TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC
+AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC
+CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT
+TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT
+GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT
+TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC
+GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT
+ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC
+GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG
+TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT
+TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA
+AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG
+TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT
+TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA
+CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA
+TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT
+GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA
+GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT
+GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA
+GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA
+TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA
+AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC
+ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT
+GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA
+CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG
+CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA
+TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA
+TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT
+GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG
+TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC
+ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA
+GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT
+TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG
+CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA
+AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT
+TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA
+TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT
+AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG
+TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA
+AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC
+AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC
+AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT
+ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT
+AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA
+GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA
+TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA
+TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA
+AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT
+TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG
+TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA
+TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT
+TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA
+TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG
+TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG
+GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG
+TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT
+AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC
+ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT
+TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG
+TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA
+AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT
+TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG
+GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC
+AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT
+AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC
+TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC
+ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT
+CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT
+CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA
+ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA
+AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT
+GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA
+ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA
+GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT
+AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT
+TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT
+GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG
+TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT
+AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC
+TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA
+GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT
+TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG
+AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC
+ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA
+CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA
+CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT
+AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA
+AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA
+AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG
+TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC
+ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT
+TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC
+AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA
+AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT
+CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT
+CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT
+AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA
+AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA
+ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC
+AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC
+TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT
+GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT
+ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA
+AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA
+TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA
+TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC
+CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA
+AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG
+AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA
+AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC
+AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA
+AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA
+TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT
+TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC
+GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA
+CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC
+GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG
+TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT
+TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT
+TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA
+ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT
+GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC
+GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA
+TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG
+GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC
+AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT
+TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT
+GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT
+AAGATTTAGATACATAGACAATATAGGAGATGGGGAAATTGGGATATAAAAATATTTTGA
+TAGACTTTGATGATACAATTGTTGATTTTTATGATGCAGAAGAATGGGCGTTTCACTATA
+TGGCGAATGTTTTTAATCATAAAGCAACAAAGGATGATTTTTTAACATTTAAAAAAATCA
+ATCACCAACATTGGGAAGCTTTTCAACAAAATAAATTAACGAAGTCTGAAGTATTATCAG
+AACGATTTGTGAATTACTTCAAACATCATCAAATGGAAGTTGATGGGCATCGTGCAGATG
+TGTTATTTAGAAATGGATTAGCAGAAGCTAAAGTTAAATACTTTGATCAAACATTAGAAA
+CAATTGTCGAATTATCGAAAAGACATGATTTATATATTGTTACTAATGGTGTAACCGAAA
+CGCAAAAGAGAAGGTTAAATCAGACGCCGTTGCATAAATATATTAAAAAGATATTTATAT
+CTGAGGAAACAGGATATCAAAAACCTAATCCGGAATTTTTTAATTATGTTTTTAATGATA
+TTGGTGAGGATGAAAGACAGCACTCGATTATAGTTGGAGATTCTTTAACATCTGACATTC
+TAGGTGGAATCAATGCGGGTATAGCTACTTGCTGGTTTAATTTTAGAGGATTTGATCATA
+ATCCAGGAATTATACCTGATTATGAAATTAATTCATGGAAACAACTAAATGATATTGTAC
+GTTAAGACAAATAATTTGAAAATGATTTATATTAGCTTAAGAAAGTAATTGTAAAAACGA
+CACTACGAAAAGATGCATTTTCATGCTCGTAGTGTCGAATATTATTATCTTATGGTTTCC
+ACAATAAGACATCATGTCCCTCTATATTTTTAGCTTCCACATCTGTAAATCCATGATGTA
+AAAAGAAATCCTTAGAATCATTTCTGCCAATGGCTTTAATTGGCATATTGAAACTCTTTG
+CAAAATCAATCAATTCTGAAGCGTACCCTCTGTTTTGATATTTTGGTAATACTTCTAACT
+TCCATAATAATATATAATCTTCAAAATCTGGGAAGTAGATTTCTTCGACATCACCTTTTT
+TTAGTAATGCCATTCTAGCTCCTAATTGATCTCCGACAAATATGCCATAAAATGGTGAAT
+CTGAACTTGCATCAATCATTTGACCGTTTAACTCATTGACCATGTATAAGTCTTTGTTGC
+CAAACGCTCTAAAGTTTTCGAATAATTCGTCAGTTTTGTAATTAATTTCAAGACGTTTGA
+TTTCACTCATATTTATCTACCCCTTATTTGTTTACTACTATTATTATACATTAAACCACT
+TGTGTTTTTCATTGATTTGAAAATGTAAAACAATGAACATTATAAATTTTCCATGATTAA
+ATTGATTTTCAAGCCGTAAAATTGTCAATCACCGGTATAGACTTTATACTATAGTTTGTA
+ATCGCGTACTTAAGGAGAAAAAACATGGACTGTAAAGTAGTTAGTTTAAATGAAAAAGAT
+CAGTTTATACCAAAAATAAAGAGCAGTGACCCTGTAATAACAGGATTATTTCAATATGAT
+GCAGCTCAACAAACTAGTTTTGAAAAAAGGATGTCTAAAGAAAATAATGGAAGAGAAGCG
+GCATTAGCGAATGTTATTCGTGAATATATGAGTGATTTAAAGCTTTCAAATGAACAAGAA
+TTAAACATACAACATTTAGCTAATGGTTCAAAAGTTGTGATTGGTGGACAACAAGCAGGG
+CTTTTCGGGGGACCATTGTATACATTCCATAAAATATTTTCAATCATTACTTTATCTAAG
+GAATTAACGGATACACATAAGCAACAAGTAGTACCAGTTTTTTGGATTGCAGGAGAAGAT
+CATGATTTCGATGAAGTGAATCATACATTTGTTTATAACGAAAATCATGGGTCGCTGCAT
+AAGGTTAAATATCATACAATGGAGATGCCAGAGACGACAGTCTCTAGATATTATCCTGAT
+AAGGCTGAGTTGAAACAAACTTTAAAAACGATGTTCATTCATATGAAAGAAACTGTTCAT
+ACACAAGGTCTACTGGAGATTTGTGACAGAATTATTGACCAATATGACTCGTGGACTGAT
+ATGTTTAAAGCACTACTGCATGAAACATTTAAAGCATATGGCGTTCTATTTATAGATGCG
+CAGTTTGAGCCGTTAAGAAAAATGGAAGCGCCTATGTTTAAAAAGATTTTGAAAAAACAT
+CAGTTGCTTGATGATGCTTTTAGAGCAACACAACAACGTACTCAAAATCAAGGCTTGAAT
+GCGATGATACAAACAGATACAAATGTTCATTTATTCTTACATGATGAAAATATGCGCCAA
+TTAGTTTCGTATGATGGTAAGCATTTTAAATTAAATAAAACAGATAAGACATATATAAAG
diff --git a/t/data/expected_real_data_core_gene_alignment.aln b/t/data/expected_real_data_core_gene_alignment.aln
new file mode 100644
index 0000000..f2bc3fa
--- /dev/null
+++ b/t/data/expected_real_data_core_gene_alignment.aln
@@ -0,0 +1,2168 @@
+>real_data_1
+ATGAAAAAATCATTACTCGCTGTTGCTGTGGCAGGGGCTGTTTTGTTGTCATCCGCCGTA
+CAGGCGCAGACAACGCCGGAAGGTTATCAATTACAACAGGTGCTGATGATGAGCCGCCAT
+AATCTGCGGGCGCCGCTGGCGAATAATGGCAACGTACTGGCGCAGTCGACGCCGAACGCC
+TGGCCGGCGTGGGACGTTCCCGGCGGGCAACTGACGACGAAAGGCGGCGTGCTGGAAGTC
+TATATGGGACACTACACACGTGAATGGCTGGTCGCGCAGGGGCTGATACCGTCGGGAGAA
+TGTCCGGCGCCCGACACGGTATATGCCTATGCGAATAGTTTGCAGCGCACCGTCGCCACC
+GCGCAATTTTTCATTACCAGCGCTTTCCCCGGCTGTGATATTCCTGTTCATCATCAGGAA
+AAAATGGGCACTATGGACCCTACCTTCAATCCGGTGATTACCGATGATTCCGCCGCGTTC
+CGGCAACAGGCCGTACAGGCGATGGAAAAGGCGCGTAGTCAGCTACATCTTGATGAGAGT
+TATAAACTGCTTGAGCAGATAACGCATTATCAGGACTCGCCGTCCTGCAAAGAGAAGCAT
+CAGTGTTCGCTAATCGACGCGAAAGATACCTTCAGCGCGAACTATCAGCAAGAGCCTGGC
+GTGCAGGGGCCGCTGAAAGTAGGGAACTCGCTGGTGGATGCGTTTACCCTGCAATATTAC
+GAAGGCTTTCCGATGGATCAGGTCGCTTGGGGCGGGATCCACACCGATCGGCAGTGGAAG
+GTGCTGTCAAAACTGAAAAACGGCTACCAGGACAGCCTGTTTACCTCACCCACGGTGGCG
+CGCAATGTCGCTGCGCCGCTGGTAAAATATATCGATAAGGTGCTGGTTGCCGAGCGCGTT
+AGCGCGCCGAAGGTTACCGTGCTGGTGGGGCATGATTCCAATATCGCGTCGCTGCTGACG
+GCGCTGGATTTTAAACCCTATCAGCTCCATGACCAGTATGAGAGAACGCCGATTGGTGGT
+CAGCTTGTCTTCCAACGCTGGCATGACGGTAACGCTAACCGGGATTTGATGAAAATCGAG
+TATGTCTACCAGAGCGCCCGGCAGTTACGTAATGCGGAAGCGTTAACGCTCAAATCGCCT
+GCGCAAAGGGTAACGCTGGAACTGAAAGGATGTCCGGTGGATGCGAACGGCTTCTGTCCG
+CTGGATAAGTTCGATAACGTCATGAACACTGCTGCAAAATAGATGGAAAAGAATAATGAA
+GTCATTCAGACCCATCCGCTTGTAGGATGGGACATCAGCACCGTCGATAGCTATGATGCG
+CTGATGCTGCGTTTACACTACCAGACCCCAAATCGTCCGGAACCGGAAGGGACTGAAGTT
+GGTCAAACGCTCTGGTTAACGACAGATGTAGCCAGGCAATTTATTTCAATATTAGAAGCC
+GGCATCGCCAAAATAGAATCAGGCGATTACCAGGAAAACGAGTATCGTCACCATTAGATG
+GAACTTAAGGATTATTACGCCATTATGGGCGTGAAACCGACGGACGATCTCAAGACGATT
+AAGACCGCCTATCGCCGACTGGCCCGCAAGTACCATCCAGATGTCAGCAAAGAACCCGAT
+GCCGAAGCCCGTTTCAAAGAGGTTGCTGAAGCATGGGAAGTGCTGAGTGATGAGCAACGG
+CGCGCCGAGTATGACCAGTTATGGCAACACCGTAACGATCCACAATTTAATCGCCAGTTC
+CAGCAACACGAAGGCCAGCCGTATAACGCCGAAGATTTTGATGATATTTTCTCGTCTATT
+TTTGGTCAGCACGGTCGTCATTCGCACCACCGCCACGCCGCACGCGGTCATGATATCGAA
+ATTGAAGTGGCGGTATTCCTGGAAGAAACGCTGGAAGAGCACCAGCGTACGATTAGCTAT
+TCCGTCCCCGTTTATAACGCGTTCGGCCTGGTGGAGCGGGAAATTCCCAAAACATTGAAT
+GTGAAAATCCCGGCTGGCGTCAGCAACGGGCAACGAATCAGACTGAAAGGCCAGGGCACG
+CCGGGGGAAAACGGCGGACCTAATGGCGATTTATGGCTCGTTATCCATATTGCCCCGCAT
+CCGCTCTTTGATATCGTCAATCAGGATCTGGAAGTCGTCCTTCCGCTTGCCCCATGGGAG
+GCGGCGCTCGGCGCTAAGGTGTCTGTGCCAACGCTTAAAGAGCGTATTTTGCTGACCATT
+CCCCCCGGCAGCCAGGCAGGTCAGCGGCTGCGTATCAAAGGAAAAGGATTAGCCAGTAAA
+AAGCACACTGGCGATCTCTATGCCATCATCAAAATCGTTATGCCGCCGAAACCTGACGAG
+AAAACAGCTGCCCTGTGGCAACAACTGGCGGACGCGCAGTCGTCCTTTGACCCACGCCAG
+CAATGGGGGAAAGCATAAATGGCTAACATCACTGTCACCTTTACCATCACCGAATTTTGT
+TTGCACACCGGCGTGACGGAAGAGGAGCTAAACGAAATCGTCGGACTTGGCGTAATTGAG
+CCTTACGAAGACGATAACGCCGACTGGCAATTCGACGATCGCGCAGCGAGCGTGGTACAA
+CGCGCGCTACGCTTACGCGAGGAGCTGGCGCTCGACTGGCCAGGGATCGCGGTCGCGTTA
+ACGCTGCTGGAAGAGAATTCACGGCTGCGCGAAGAAAACCGGTTACTGCTGCAACGCCTT
+TCTCGCTTTATCTCGCATCCCTAAATGTCATCTTGTTGGAGATTTACGGATTCGCTAACA
+AGCCTATGGCATACTGCGTTGATGAAGATTTTATTGATTGAAGATAACCAGAAAACCATT
+GAGTGGGTACGTCAGGGACTCACGGAGGCAGGCTATGTGGTTGATTATGCCTGTGATGGA
+CGAGACGGATTACACCTAGCCCTTCAGGAACATTATTCATTGATTATTCTTGATATTATG
+CTGCCGGGGCTTGATGGATGGCAGGTTTTACGCGCGTTGCGCACTGCATATCAGCCCCCT
+GTTATTTGCCTGACGGCGCGCGACTCGGTTGAGGATCGCGTCAAAGGTCTTGAGGCGGGC
+GCTAATGATTACCTTGTTAAGCCTTTTTCCTTCGCCGAACTGCTGGCCCGGGTGAGAGCT
+CAACTCAGACAGCATGTCCCGGTCTTTACCCGACTGACGATCAATGGTCTGGACATGGAT
+GCCACAAAGCAATCGGTGTTACGAAATGGCAAACCGATTTCCCTGACCCGCAAAGAATTC
+CTGCTCCTCTGGTTACTGGCGTCCCGGGCAGGGGAAATCGTGCCCCGAACCGCGATCGCC
+AGCGAAGTTTGGGGAATTAACTTTGATAGTGAAACCAACACCGTTGATGTCGCGATTCGT
+CGGCTGCGCGCCAAAGTAGACGATCCATTTGAAAAGAAGCTCATTATGACCGTCCAGGGG
+ATGGGTTATCGATTACAGGCGGAAACGTCGCAGAATGGTTAAATGAAAAACAAATTGTTA
+TTTATGATGTTGACAATACTGGGTGCGCCTGGGATTGCAACCGCGACAAATTATGATCTG
+GCTCGTTCAGAGTATAATTTTGCGGTAAATGAATTAAGCAAGTCTTCATTTAATCAGGCG
+GCCATTATTGGTCAAGTCGGCACGGATAATAGTGCCAGAGTACGCCAGGAAGGATCAAAA
+CTATTGTCCGTTATTTCACAAGAAGGAGAAAATAATCGGGCGAAAGTCGACCAGGCAGGG
+AATTATAACTTTGCGTATATTGAGCAAACGGGCAATGCCAACGATGCCAGTATATCGCAA
+AGCGCTTACGGTAATAGTGCGGCTATTATCCAGAAAGGTTCTGGAAATAAGGCCAATATT
+ACCCAGTACGGTACGCAGAAAACAGCAGTTGTAGTGCAGAAACAGTCGCATATGGCTATT
+CGCGTCACCCAACGCATGCATACTTTATTGCTCCTTGCCGCACTTTCAAATCAGATTACG
+TTTACCACGACTCAGCAAGGCGATATTTACACGGTGATCCCTCAGGTCACATTAAACGAA
+CCCTGCGTCTGTCTGGTGCAAATTCTCTCTGTGCGCGACGGCGTCGGGGGACAAAGCCAT
+ACACAGCAAAAACAAACGCTATCTTTACCTGCTAATCAACCGATTGAGTTGTCTCGTCTT
+AGTGTAAATATATCTTCAGAGGACTCGGTTAAAATTATTGTTACTGTTTCGGACGGACAA
+TCACTGCATTTATCACAACAATGGCCGCCTTCTGCACAGATGTTTAATGAAGTCCATAGT
+AGTCATGGTCACACACTATTGTTGATCACAAAGCCATCTCTGCAAGCTACGGCATTATTG
+CAACATTTAAAGCAATCGCTGGCCATAACCGGAAAACTGCATAATATTCAACGTTCTCTG
+GAAGATATCTCAGCCGGTTGCATTGTTTTAATGGATATGATGGAAGCGGATAAGAAGCTT
+ATCCACTATTGGCAGGATAATTTAAGCCGCAAAAACAATAATATAAAAACATTATTGTTA
+AATACCCCTGACGATTATCCCTACCGTGAAATTGAAAACTGGCCTCATATTAACGGCGTG
+TTTTACGCCACTGAAGACCAGGAACACGTGGTCAGCGGATTACAGGGTATTCTGCGGGGC
+GAATGCTATTTTTCACAAAAATTAGCCAGTTACCTGATTACACACTCAGGAAATTACCGC
+TACAACAGCACCGAGTCCGCATTACTCACTCATCGCGAAAAAGAGATCCTCAATAAGTTA
+CGTATTGGTGCCTCTAATAATGAAATCGCCAGGTCGCTATTTATCAGCGAGAATACGGTT
+AAGACACATCTTTATAATCTTTTCAAAAAGATAGCTGTCAAAAATCGCACCCAGGCAGTT
+TCAATGAAACGCTATCTGACCTGGATTGTAGCAGCAGAGTTACTGTTCGCTACCGGAAAC
+CTGCATGCCAATGAAGTTGAAGTCGAGGTTCCCGGATTGTTAACCGACCATACCGTCTCT
+TCGATAGGACATGAATTCTATCGTGCATTCAGCGACAAATGGGAAAGCGAATACACCGGC
+AATCTGACCATTAATGAAAGACCCAGTGCGCGTTGGGGAAGCTGGATCACCATAACGGTA
+AATCAGGACGTTATTTTCCAGACCTTTTTATTTCCAATGAAAAGAGACTTCGAGAAAACC
+GTCGTCTTCGCATTAGCGCAAACAGAGGAAGCATTAAATCGCCGACAAATAGATCAAACG
+CTATTAAGTACGAGTGATTTAGCGCGTGATGAATTCATGCGTGTTAAACATGCAGTAGTG
+CTGCTCATGCTTTTTTCGCCATTAACCTGGGCTGGAAATATGACGTTCCAGTTCCGTAAT
+CCTAACTTTGGTGGAAACCCCAATAACGGTTCCTTTTTATTGAATAGCGCCCAGGCGCAA
+AATTCATATAAAGACCCCGCTTATGATAACGATTTTGGTATCGAGACCCCCTCAGCGTTG
+GATAACTTTACGCAGGCTATTCAATCGCAAATTCTGGGCGGCTTGTTGACCAATATTAAT
+ACCGGAAAACCAGGACGTATGGTGACCAATGATTTTATTATCGATATCGCTAATCGCGAC
+GGACAGCTCCAGCTCAACGTCACGGACAGAAAAACGGGAAGAACCTCGACCATCGAAGTG
+TCAGGTTTACAAACTCAGTCAACCGATTTTATGCCGCGCTTACTTATTTTGGTTGCCGTT
+TTATTGTTGAGCGGATGCTTAACTGCCCCGCCGAAACAAGCTGCGAAACCGACATTAATG
+CCCCGCGCACAAAGTTACAAAGATTTGACGCACTTACCTGCTCCCACCGGTAAGATCTTT
+GTTTCGGTATATAACATTCAGGATGAAACGGGCCAATTTAAACCTTACCCGGCAAGTAAC
+TTTTCCACGGCTGTGCCGCAGAGCGCCACCGCTATGTTGGTCACCGCGCTGAAAGATTCG
+CGCTGGTTTATCCCACTAGAACGACAAGGCTTACAGAATCTTTTGAATGAACGGAAAATT
+ATTCGCGCAGCCCAGGAAAACGGCACCGTGGCGATGAATAACCGTATCCCGCTTCAGTCG
+TTGACGGCGGCAAATATTATGGTGGAAGGTTCTATTATTGGTTATGAAAGTAACGTCAAA
+TCCGGCGGGGTCGGCGCAAGATATTTCGGTATTGGCGCCGATACGCAGTATCAGCTGGAT
+CAGATTGCTGTCAACCTGCGCGTGGTTAACGTCAGTACGGGCGAGATCCTTTCTTCGGTG
+AACACCAGTAAAACGATCCTTTCCTATGAAGTACAGGCAGGCGTGTTCCGTTTTATTGAT
+TACCAGCGCTTACTGGAAGGCGAAATCGGCTATACCTCGAACGAACCGGTGATGCTGTGT
+CTGATGTCAGCCATTGAAACCGGCGTTATCTTCCTCATTAATGATGGTATCGATCGCGGA
+CTGTGGGATTTGCAGAATAAAGCGGACAGGCAAAATGATATTCTGGTGAAATACCGTGAG
+CTGTCAGTACCGCCAGAATCCATGTCTATTGCCGTAAATATGAATGACCCGACCAACACG
+GGCGTCAAAACGACGACCGGCAGCGGGTCGATGACCGGAAGCAACGCTGCCGATCTGCAA
+AGCAGTTTCCTGACCTTACTGGTCGCGCAATTGAAGAACCAGGACCCGACTAACCCATTA
+CAAAATAATGAGTTAACGACACAGTTGGCGCAAATCAGTACCGTGAGCGGCATTGAAAAA
+CTGAATACGACGCTGGGGGCTATTTCCGGGCAAATCGATAATAGTCAGTCCCTACAGGCG
+ACCACGCTGATTGGACATGGCGTTATGGTGCCTGGCACCACAATTCTGGCGGGTAAAGGC
+GCGGAAGAAGGGGCCGTGACGTCCACGACGCCGTTTGGCGTGGAATTGCAACAGCCTGCG
+GACAAAGTGACGGCAACCATTACCGATAAAGATGGCCGGGTGGTACGGACGCTGGAGATC
+GGTGAGTTGCGAGCCGGGGTACACACCTTTACCTGGGATGGTAAGCAAACGGACGGAACA
+ACGGTACCGAATGGTTCTTACAACATTGCGATTACCGCCAGCAATGGCGGGACGCAACTG
+GTGGCGCAGCCGCTGCAATTCGCTCTGGTACAGGGCGTGACGAAGGGCAGTAACGGCAAC
+CTGTTGGATCTGGGTACCTACGGCACCACCACACTCGACGAAGTTCGGCAAATAATCTAA
+ATGTCTTTTTCTCAAGCGGTTAGCGGCCTGAACGCTGCGGCCACCAACCTTGATGTTATC
+GGTAATAACATCGCCAACTCCGCCACCTATGGCTTTAAGTCCGGTACGGCATCATTTGCC
+GATATGTTCGCCGGTTCCAAAGTGGGGTTGGGCGTAAAAGTGGCGGGGATTACCCAGGAT
+TTTACCGACGGTACGACAACGAACACCGGGCGCGGGCTGGATGTCGCGATTAGCCAGAAC
+GGTTTTTTCCGCCTGGTAGACAGCAACGGTTCCGTGTTCTATAGCCGCAACGGCCAGTTC
+AAACTGGACGAGAACCGTAACCTGGTCAATATGCAGGGGATGCAGTTGACCGGCTATCCG
+GCCACCGGTACGCCGCCGACCATTCAGCAGGGGGCGAATCCTGCGCCGATCACCATTCCG
+AACACGCTGATGGCGGCGAAATCGACCACCACCGCGTCAATGCAGATCAACCTGAACTCA
+ACGGACCCTGTACCGTCTAAAACGCCCTTTAGCGTGAGTGATGCGGATTCGTATAACAAA
+AAAGGCACCGTCACCGTTTATGACAGCCAGGGTAATGCCCATGACATGAACGTCTATTTT
+GTGAAAACCAAAGATAATGAATGGGCTGTGTACACCCATGACAGCAGCGATCCTGCAGCC
+ACTGCGCCAACAACGGCGTCCACTACGCTGAAATTCAATGAAAACGGGATTCTGGAGTCT
+GGCGGTACGGTGAACATCACCACCGGTACGATTAATGGCGCGACAGCGGCCACCTTCTCC
+CTCAGCTTCCTTAACTCCATGCAGCAGAACACCGGGGCTAATAACATCGTCGCCACCAAT
+CAAAACGGCTATAAGCCTGGCGACCTGGTGAGCTACCAGATTAACAATGATGGCACCGTG
+GTTGGCAACTACTCCAACGAGCAGGAGCAGGTGCTGGGGCAGATTGTGCTGGCTAACTTC
+GCCAACAACGAAGGTCTGGCATCCCAGGGCGATAACGTCTGGGCGGCGACGCAGGCCTCC
+GGGGTAGCGCTGCTGGGGACTGCCGGTTCCGGCAACTTCGGTAAGCTGACGAACGGCGCG
+CTGGAAGCCTCTAACGTGGATTTGAGTAAAGAGCTGGTGAATATGATCGTCGCGCAGCGT
+AACTACCAGTCGAATGCGCAGACCATCAAAACCCAGGACCAGATCCTCAATACGCTGGTT
+AACCTGCGCTAAATGGATCACGCAATTTATACCGCCATGGGGGCGGCCAGCCAGACGCTT
+AACCAGCAGGCGGTAACGGCCAGCAACCTGGCTAATGCCTCAACGCCGGGCTTTCGCGCG
+CAGCTTAACGCGCTACGCGCGGTGCCCGTTGATGGCCTCTCTTTAGCGACGCGCACGTTG
+GTTACGGCGTCGACGCCGGGGGCGGATATGACCCAGGGTCAGTTGGACTACACTTCCCGC
+CCGCTGGATGTTGCGTTACAGCAGGACGGCTGGCTGGTGGTGCAAGCGGCGGATGGCGCT
+GAAGGATATACCCGTAACGGGAATATCCAGGTGGGCCCGACCGGGCAGTTAACCATTCAG
+GGACATCCGGTTATCGGCGAAGGCGGCCCGATTACCGTTCCGGAAGGGTCGGAAATCACC
+ATTGCGGCAGACGGCACGATCTCCGCGCTCAATCCCGGCGACCCGCCAAACACGGTGGCG
+CCCGTTGGGCGGCTGAAGCTGGTCAAAGCGGAAGGCAATGAGGTGCAGCGGAGCGATGAC
+GGTTTATTCCGCCTTACCGCCGAGGCACAGGCTGAACGCGGGGCGGTACTGGCCGCCGAC
+CCGTCAATTCGCATTATGTCGGGCGTGCTGGAGGGCAGTAACGTCAAGCCGGTTGAAGCC
+ATGACCGACATGATCGCCAACGCACGTCGTTTTGAAATGCAGATGAAGGTTATCACCAGC
+GTAGATGAGAACGAAGGGCGAGCTAACCAACTGCTGTCGATGAGTTAAATGATCAGTTCA
+TTATGGATCGCCAAAACCGGTCTGGACGCGCAGCAAACCAATATGGATGTGATTGCCAAT
+AACCTGGCAAACGTCAGCACCAATGGTTTTAAGCGTCAGCGCGCGGTATTTGAAGATCTG
+TTGTATCAGACCATCCGCCAGCCGGGCGCGCAGTCGTCCGAGCAGACGACGCTGCCTTCC
+GGGCTGCAAATCGGTACCGGCGTGCGTCCGGTCGCCACGGAGCGCCTGCACAGTCAGGGG
+AACCTGTCGCAGACCAACAACAGTAAAGATGTGGCGATTAAAGGGCAGGGCTTTTTCCAG
+GTCATGCTGCCGGACGGTACGTCTGCCTATACCCGCGACGGCTCTTTCCAGGTGGATCAG
+AATGGTCAACTGGTGACGGCGGGCGGTTTTCAGGTGCAGCCGGCAATCACCATTCCGGCC
+AACGCGTTAAGCATCACGATTGGCCGCGACGGCGTGGTCAGCGTTACCCAGCAAGGGCAG
+GCCGCGCCGGTTCAGGTCGGGCAGCTTAACCTGACCACCTTTATGAACGACACCGGTCTG
+GAAAGCATCGGCGAGAACCTCTATATCGAAACGCAATCGTCCGGCGCGCCGAACGAAAGC
+ACGCCGGGGCTCAACGGCGCGGGGTTGTTGTATCAAGGGTATGTCGAAACGTCGAACGTT
+AACGTGGCGGAAGAGCTGGTGAACATGATTCAGGTTCAACGCGCCTATGAAATTAACAGT
+AAAGCAGTATCGACGACCGATCAGATGCTGCAGAAACTGACGCAACTCTAAATGGCCCTG
+ATGGTCGCGACGCTGACAGGATGCGCCTGGATACCCGCTAAACCGCTCGTGCAGGGGGCG
+ACCACGGCGCAGCCGATACCTGGCCCGGTACCGGTGGCGAATGGCTCCATATTTCAGTCT
+GCGCAGCCGATTAATTATGGCTATCAGCCGCTTTTTGAAGATCGTCGACCGCGTAATATC
+GGCGATACGCTCACGATTGTGTTACAGGAAAACGTCAGCGCCAGTAAAAGCTCGTCGGCA
+AATGCCAGCCGCGACGGCAAAACCAGCTTTGGTTTTGATACGGTACCGCGTTATCTGCAG
+GGATTATTCGGTAATTCCCGCGCGGATATGGAGGCCTCCGGCGGCAACTCTTTTAATGGT
+AAAGGCGGCGCGAATGCCAGCAATACCTTTAGCGGCACGCTGACCGTGACCGTCGATCAG
+GTTCTGGCCAATGGCAATTTACACGTCGTGGGGGAAAAACAGATCGCGATTAATCAGGGA
+ACGGAATTCATCCGCTTCTCCGGCGTGGTAAATCCACGCACCATCAGCGGTAGCAACTCT
+GTTCCCTCGACACAGGTGGCGGATGCGCGGATTGAATATGTCGGGAACGGCTATATTAAC
+GAAGCGCAAAATATGGGCTGGCTGCAACGTTTCTTCCTTAATTTGTCGCCGATGTAAGTG
+TTTAAAGCTCTTGCAGGAATCGTTCTGGCACTGGTTGCCACTCTGGCGCACGCCGAGCGT
+ATCCGGGATCTGACCAGTGTCCAGGGAGTACGGGAAAACTCGCTGATCGGCTACGGGCTG
+GTGGTCGGGCTGGACGGTACGGGCGACCAGACGACCCAGACGCCATTTACCACCCAGACG
+CTGAATAACATGCTGTCACAACTGGGGATTACGGTCCCCACCGGCACCAATATGCAGTTG
+AAAAACGTGGCGGCGGTGATGGTGACGGCGTCGTATCCGCCTTTTGCGCGACAGGGACAA
+ACGATCGATGTCGTCGTTTCCTCAATGGGGAACGCTAAAAGTCTGCGTGGCGGGACGTTA
+TTAATGACGCCGTTAAAAGGGGTGGACAGCCAGGTGTATGCTCTGGCGCAGGGCAATATT
+CTGGTCGGCGGCGCGGGCGCTTCCGCAGGCGGCAGTAGCGTGCAGGTTAACCAGCTTAAT
+GGCGGGCGCATCACTAATGGCGCGATTATCGAACGCGAGTTGCCGACTCAGTTCGGCGCT
+GGCAACACCATTAATCTGCAATTGAACGACGAAGATTTTACGATGGCGCAGCAAATTACC
+GACGCCATCAACCGCGCCCGCGGTTACGGCAGCGCCACTGCGCTTGATGCGCGAACGGTA
+CAGGTACGCGTGCCCAGCGGCAACAGCTCGCAGGTGCGTTTTCTGGCGGACATTCAAAAT
+ATGGAAGTCAACGTGACGCCGCAGGATGCAAAAGTCGTGATCAACTCGCGTACCGGTTCG
+GTGGTCATGAATCGGGAAGTCACGCTGGATAGCTGCGCTGTGGCGCAGGGCAATTTGTCA
+GTGACAGTCAATCGCCAACTCAACGTCAACCAGCCGAATACGCCATTTGGCGGCGGGCAG
+ACCGTGGTGACGCCACAGACTCAGATAGATTTGCGTCAGAGCGGCGGATCGCTACAGAGC
+GTGCGTTCCAGCGCCAATCTGAACAGCGTAGTGCGCGCGCTGAATGCGCTTGGCGCGACG
+CCGATGGATCTGATGTCGATTTTGCAGTCCATGCAGAGCGCGGGCTGTCTACGCGCCAAA
+CTGGAAATCATCTGAATGATCGGAGACGGTAAATTGCTGGCCAGCGCGGCCTGGGATGCG
+CAATCTCTGAACGAACTGAAAGCGAAAGCGGGCCAGGACCCGGCGGCGAATATCCGTCCT
+GTGGCCCGTCAGGTGGAAGGGATGTTTGTGCAGATGATGCTGAAAAGTATGCGCGAGGCT
+TTACCCAAAGATGGTTTATTCAGCAGCGATCAGACGCGTCTGTATACCAGCATGTATGAC
+CAGCAGATCGCCCAGCAGATGACCGCCGGTAAGGGATTGGGGCTGGCGGATATGATGGTT
+AAACAGATGACGGGCGGGCAGACGATGCCTGCAGATGATGCGCCGCAAGTACCGCTTAAA
+TTCTCCCTGGAGACGGTAAACAGCTATCAAAATCAGGCGCTGACCCAACTGGTGCGCAAA
+GCCATACCGAAAACGCCGGACAGCAGCGATGCGCCGCTCTCCGGCGACAGTAAAGACTTT
+CTGGCCCGGCTTTCGCTCCCGGCGAGGCTGGCCAGCGAACAAAGCGGGGTGCCGCATCAT
+CTGATTCTGGCGCAGGCGGCGCTGGAGTCCGGCTGGGGGCAGCGGCAAATCCTGCGGGAG
+AATGGCGAACCCAGCTATAACGTATTTGGCGTGAAAGCGACCGCCAGTTGGAAAGGGCCG
+GTGACGGAAATCACCACCACTGAATACGAAAATGGCGAAGCGAAAAAAGTGAAAGCGAAA
+TTCCGCGTCTATAGCTCGTATCTGGAGGCGTTATCGGATTATGTCGCGCTGTTAACGCGT
+AACCCACGCTACGCTGCCGTGACCACTGCCGCCACGGCAGAGCAGGGCGCAGTGGCTCTG
+CAAAACGCCGGATACGCCACTGACCCGAATTACGCGCGTAAATTGGCCAGCATGATTCAG
+CAGTTGAAAGCGATGAGTGAAAAGGTCAGCAAAACCTACAGCGCGAATCTCGACAATCTC
+TTTTAAATGTCCAGCTTGATTAATCACGCCATGAGCGGACTTAACGCCGCGCAGGCCGCG
+TTAAATACGGTCAGTAATAACATCAACAATTATAACGTTGCGGGTTATACCCGGCAGACA
+ACTATTCTGGCGCAGGCAAACAGTACGTTAGGGGCTGGCGGCTGGATAGGTAATGGCGTT
+TACGTTTCAGGCGTACAGCGCGAATATGATGCGTTTATCACTAATCAGCTACGCGGCGCG
+CAAAACCAGAGCAGCGGCTTAACCACGCGCTATGAACAAATGTCGAAAATCGACAACCTG
+CTGGCCGATAAATCCAGCTCACTGTCTGGCTCGCTGCAGAGTTTTTTTACCAGCCTGCAA
+ACGTTAGTCAGTAATGCGGAAGATCCTGCGGCGCGTCAGGCGCTGATTGGTAAAGCGGAA
+GGGCTGGTAAACCAGTTCAAAACCACCGATCAGTATCTGCGCGATCAGGATAAACAGGTC
+AATATCGCGATTGGCTCCAGCGTGGCGCAAATCAACAATTACGCGAAGCAGATAGCTAAC
+CTGAACGATCAAATCTCCCGTATGACGGGCGTAGGCGCGGGCGCATCGCCGAACGACCTG
+CTCGATCAACGTGATCAGTTGGTTAGCGAGCTTAACAAGATCGTTGGCGTCGAGGTGAGT
+GTACAGGACGGCGGCACCTATAACCTGACGATGGCCAATGGCTATACGCTGGTGCAGGGG
+TCGACGGCGCGTCAGTTGGCGGCGGTTCCCTCCAGCGCCGACCCGACGCGAACGACTGTC
+GCTTATGTCGATGAGGCCGCCGGTAACATCGAAATTCCGGAAAAGTTGCTGAACACCGGT
+TCGCTCGGCGGGCTACTGACGTTCCGTTCTCAGGATCTGGATCAGACTCGTAATACGCTG
+GGCCAGTTGGCGTTGGCGTTTGCCGATGCGTTTAACGCGCAGCATACCAAAGGTTATGAC
+GCCGACGGCAATAAAGGGAAAGACTTCTTTAGCATTGGCTCGCCGGTGGTATATAGCAAC
+AGTAATAATGCCGATAAAACGGTATCGCTAACCGCTAAGGTGGTCGACAGCACGAAGGTT
+CAGGCGACGGATTATAAGATTGTTTTTGACGGTACAGACTGGCAGGTTACTCGCACTGCG
+GATAACACCACCTTCACGGCAACAAAAGATGCTGACGGAAAACTGGAGATTGACGGTCTG
+AAAGTGACGGTAGGGACTGGCGCACAGAAAAACGACAGTTTTCTTCTCAAGCCGGTCAGC
+AATGCTATCGTCGACATGAACGTTAAAGTGACAAATGAAGCCGAGATTGCGATGGCGTCT
+GAGTCAAAACTCGATCCTGATGTGGATACCGGCGACAGCGATAACCGCAATGGTCAGGCA
+TTGCTGGACTTACAAAACAGCAATGTAGTGGGCGGCAACAAAACCTTTAACGATGCTTAC
+GCCACGTTGGTCAGCGATGTGGGTAACAAAACGTCAACGCTGAAAACCAGCAGCACCACG
+CAGGCGAATGTGGTTAAACAGCTTTATAAACAGCAACAGTCGGTTTCCGGCGTTAACCTC
+GACGAAGAGTACGGCAATTTGCAGCGTTATCAGCAGTATTATCTGGCGAATGCGCAAGTA
+TTGCAGACCGCGAATGCGCTGTTTGATGCGTTATTGAATATTCGCTAAATGCGTATCAGT
+ACCCAGATGATGTACGAACAAAATATGAGCGGCATCACTAATTCTCAGGCCGAATGGATG
+AAGCTGGGCGAGCAGATGTCTACCGGTAAGCGCGTTACCAACCCATCTGACGATCCGATC
+GCCGCGTCGCAGGCGGTAGTACTCTTTCAGGCGCAGGCGCAGAATAGCCAGTACGCCCTG
+GCGCGTACGTTTGCCACCCAAAAAGTGTCGCTGGAAGAAAGCGTACTCAGTCAGGTGACG
+ACGGCGATTCAAACCGCGCAGGAAAAAATCGTCTATGCCGGAAACGGCACGTTAAGCGAC
+GATGACCGCGCGTCGCTGGCGACGGATTTACAGGGGATCCGCGATCAGCTGATGAACCTG
+GCAAACAGCACTGACGGCAATGGTCGCTATATCTTTGCCGGGTATAAAACGGAAGCGGCG
+CCATTCGACCAGGCGACAGGTGGTTATCATGGCGGCGAGAAAAGTGTTACCCAGCAGGTG
+GATTCCGCACGCACGATGGTAATTGGCCATACGGGAGCGCAAATTTTTAATAGCATCACC
+AGCAATGCGGTGCCGGAACCGGATGGCTCGGACTCCGAAAAGAATCTGTTTGTCATGCTC
+GATACGGCAATTGCCGCGCTCAAGACCCCGGTGGAAGGCAATGACGTGGAAAAAGAAAAA
+GCCGCTGCCGCCATTGATAAAACCAATCGCGGCTTAAAAAATTCGCTTAATAACGTCCTG
+ACCGTTCGTGCGGAACTGGGAACGCAACTGAGCGAACTCAGTACGCTGGATTCACTGGGA
+AGCGACCGTGCGCTGGGACAGAAGCTACAGATGAGCAACCTGGTAGATGTGGACTGGAAC
+TCGGTCATTTCCTCCTACGTCATGCAACAGGCGGCATTACAGGCGTCCTATAAAACGTTT
+ACCGACATGCAGGGAATGTCGCTTTTCCAGTTGAACCGGTAAATGGAGATAATTTTTTAT
+CACCCGACATTTAACGCCGCCTGGTGGGTAAATGCGCTGGAGAAGGCTCTCCCACATGCG
+CGCGTTCGTGAATGGAAGGTCGGTGATAACAACCCCGCAGACTATGCGCTTGTATGGCAG
+CCCCCGGTTGAAATGCTGGCCGGAAGACGCTTAAAAGCCGTCTTTGTGCTGGGCGCGGGG
+GTGGATGCAATTCTGAGTAAATTAAATGCGCATCCGGAAATGCTGGACGCCTCCATTCCT
+CTATTCCGTCTGGAAGATACCGGAATGGGCCTGCAAATGCAGGAGTATGCCGCCAGCCAG
+GTATTACACTGGTTCCGTCGTTTCGATGATTATCAGGCGCTGAAAAATCAGGCGCTATGG
+AAACCGTTGCCGGAATATACCCGCGAAGAGTTTAGCGTCGGTATCATAGGCGCAGGGGTA
+CTGGGCGCAAAAGTGGCAGAAAGTCTACAGGCGTGGGGGTTCCCGTTACGTTGCTGGAGT
+CGTAGCCGCAAATCCTGGCCTGGCGTGGAAAGTTATGTAGGGCGTGAAGAACTGCGCGCT
+TTCCTGAACCAGACGCGGGTGCTGATTAATCTGCTGCCGAATACGGCCCAAACGGTAGGA
+ATTATTAATAGCGAATTGTTGGATCAATTGCCGGATGGCGCTTACGTGCTGAATCTCGCG
+CGCGGCGTTCATGTTCAGGAGGCGGATCTGCTGGCTGCGCTTGATAGCGGTAAGCTAAAA
+GGCGCGATGTTGGATGTCTTTAGCCAGGAACCGTTACCGCAGGAAAGTCCATTATGGCGC
+CATCCGCGAGTCGCCATGACGCCGCACATTGCGGCAGTCACCCGTCCGGCGGAAGCCATC
+GATTATATTAGCCGCACCATTACCCAGCTGGAGAAGGGAGAGCCGGTGACGGGGCAGGTG
+GATCGGGCGAGAGGATAT---ATGTCCGTAATCAAGAAAAATATCCCTGCCATAGGCCTG
+TGTATCTGCGCTTTTTTTATCCATTCTGCGGTAGGGCAACAAACGGTACAGGGCGGCGTT
+ATCCATTTTCGCGGCGCGATTGTTGAGCCACTGTGCGATATTTCTACTCACGCCGAAAAT
+ATTGATTTAACCTGCCTACGCGAAGGTAAAAAGCAAATGCACCGGATAGACCTTCGGCAG
+GCATCTGGATTACCGCAGGATATTCAGTCCATTGCGACGGTACGGCTGCATTATCTCGAT
+GCGCAAAAAAGCCTGGCGGTGATGAATATTGAGTACCGTTAAATGGCAAACCATCGTGGC
+GGTTCCGGTAATTTTGCGGAAGACCGCGAAAGAGCATCAGAAGCAGGTCGTAAAAGTGGT
+CAGCACAGCGGGGGCAATTTTAAGAATGACCCGCAGCGTGCATCCGAAGCAGGCAAAAAA
+GGGGGCAAAAGCAGTAACCGTAATCGCTAGATGGTAATGTCCGCACCAGGACACATTGTT
+TACAGTAGTTACAACACCCTGTACGGACATTCTCTCTCCGGTGGTGGTCTTGTCATCTTA
+AAAGCTCTCATCATTTCCCTTACTGTCCATACCCATGACGCCATATGTGGTGCGCGTAGC
+CGTGTGTGGCGTCGTTTCAAAAAGCAAGCTAAGGCTTACAAGGAAGCCAACCCTCAGATG
+TGTGTGCGCATAATCGCGTTCAAGAGAACGCGGGTGATGTATACCTACAACTCAAGGTGC
+TATCCATGGGAAGACAAAAAGCAGTGAATGAAACGAATTTTCCTTACCTGCGCGGCGTTG
+TTGTTCAGCAGTCAGGCGTTGGCCGATGAGTGTGCCAGCGCCAGTACGCAGCTGGAAATG
+AATCGCTGCGCCGCCGCGCAATACCAGGCGGCAGATAAAAAGCTGAACGAAACCTATCAA
+AGCGCGATTAAGCGTGCGCAACCGCCGCAGCGTGAGCTATTGCAAAAAGCGCAGGTGGCA
+TGGATTGCCCTGCGCGACGCCGATTGCGCGCTGATTCGCTCAGGTACGGAGGGCGGCAGC
+GTTCAACCCATGATCGCCAGCCAGTGCCTGACCGATAAAACGAACGAACGCGAAGCGTTT
+TTAGCCTCGCTGCTGCAATGTGAAGAGGGTGATTTGAGCTGCCCACTGCCGCCAGCCGGT
+TAAGTGCGTATATTCGCGGTGAGCATAATGGTGATTACCCTGAGCGGCTGCGGCAGTATT
+ATCAGCAGAACGATCCCCGGACAAGGACACGGCAACCAGTATTACCCTGGCGTGCAGTGG
+GATATGCGTGATTCCGCATGGCGCTATATCACTATCCTCGATCTGCCCTTCTCACTGATC
+TTCGATACACTGCTACTGCCGCTCGATATTCACCACGGGCCTTATGAGATGTGCCAACGT
+GCGATCGCCAATATTGATATCAGCAAAGAGTATGACGAAAGCATGGGCAGTAACGATGTG
+CATTATCAGTCGTTTGCTCGTATGGCGGATTTCTTTGGTCGTGATATGCAGGCGCATCGC
+CACGACCAGTTTTTTCAAATGCACTTTCTTGATACCGGGCAGATTGAGCTACAGCTCGAC
+GATCATCGCTATTCGGTGCAGGCGCCGCTATTTGTGCTAACGCCGCCCTCGGTGCCGCAT
+GCTTTTATTACCGAATCGGATAGCGATGGTCATGTTCTGACGGTACGCGAAGAGCTGGTT
+TGGCCGCTGCTGGAAGTGCTTTATCCCGGCACCAGAGAGGCCTTCGGCCTGCCGGGAATC
+TGCCTGTCGCTGGCGGATAAACTCAACGAGCTGGCGGCGCTCAAACATTACTGGCAGCTA
+ATTGAGCGGGAGTCCACGGAACAACTGGCTGGCTGCGAACATACCTTGGTACTACTGGCG
+CAGGCGGTATTTACCTTGCTGTTGCGTAATGCGAAGCTGGACGATCATGCCGCAACCGGG
+ATGCGCGGTGAACTGAAACTTTTTCAGCGCTTTACCCTGTTAATTGACAACCACTTCCAT
+CAGCACTGGACGGTGCCCGATTATGCCTGTGAGTTGCATATTACCGAATCTCGTTTGACC
+GATATTTGCCGACGTTTTGCTAATCGCCCGCCTAAACGCCTGATTTTTGATCGGCAATTA
+CGCGAGGCGAAACGACTGCTGCTTTTTTCCGACAATGCTGTCAACGAGATCGCCTGGCAA
+TTAGGTTTTAAAGATCCGGCTTATTTCGCCCGTTTCTTTAATCGCCTTGCTGGCTGTTCT
+CCTTCGCAGTTTCGCCAACGTGAAGTTCCTTCTTTTCTCAACTAAATGGGACGCACACCG
+GATTACAAAGCCGCCTTTGGCTGCGCTCTGGGCGCTAACCCAGCCTTCTACGGCCAGTTT
+GAGCAGAACGCCCGTAACTGGTACACCCGTATTCAGGAGACCGGCCTGTACTTTAACCAT
+GCAATCGTCAACCCGCCCATTGACCGCCACAAACCTGCCGACGAAGTGAAAGACGTCTAT
+ATCAAGCTGGAGAAAGAGACGGACGCCGGGATTATTGTCAGCGGGGCGAAAGTTGTCGCC
+ACTAACTCCGCCCTGACTCACTACAACATGATTGGTTTCGGCTCAGCCCAGGTGATGGGC
+GAAAACCCGGATTTTGCTCTGATGTTTGTCGCGCCAATGGATGCCGAAGGCGTAAAACTT
+ATTTCGCGCGCCTCGTATGAAATGGTCGCGGGCGCGACGGGCTCGCCGTTTGATTATCCC
+CTCTCCAGCCGTTTTGATGAAAACGATGCCATTCTGGTGATGGACAAGGTGCTGATCCCG
+TGGGAAAACGTATTAATTTACCGTGATTTCGATCGTTGTCGTCGCTGGACGATGGAAGGC
+GGCTTTGCCCGTATGTATCCACTGCAAGCCTGTGTTCGTCTGGCGGTAAAACTTGATTTC
+ATTACCGCGCTGCTGAAAAAATCGCTCGAATGTACGGGTACCGTAGAGTTCCGGGGCGTG
+CAGGCCGATCTCGGCGAAGTCGTGGCCTGGCGCAATATGTTCTGGGCATTGAGCGATTCT
+ATGTGTTCTGAAGCAACCCCGTGGGTAAACGGCGCCTGGCTACCGGACCACGCCGCGCTG
+CAAACCTATCGTGTGATGGCCCCAATGGCCTACGCGAAAATTAAAAATATTATTGAACGT
+AACGTTACCAGCGGCCTGATTTACCTGCCTTCCAGCGCCCGCGATCTGAATAATCCGCAA
+ATCGACCAGTACCTGGCGAAATACGTACGCGGCTCTAACGGAATGGACCATGTTGAACGT
+ATCAAAATTCTTAAATTGATGTGGGATGCCATCGGCAGCGAGTTTGGCGGTCGCCATGAG
+CTGTACGAGATTAACTACTCGGGCAGCCAGGATGAAATTCGTCTGCAGTGTCTGCGTCAG
+GCCCAGAGCTCCGGCAATATGGATAAGATGATGGCAATGGTCGATCGCTGCCTCTCCGAA
+TACGATCAGAATGGCTGGACGGTTTCGCATTTGCACAATAACGACGACATCAATCAACTG
+GATAAGCTGCTGAAATAAATGCAAGTAGATGAACAACGTCTGCGTTTTCGCGATGCGATG
+GCAAGTCTGGCGGCAGCGGTCAACATCGTAACCACGGCGGGTCACGCCGGACGCTGCGGT
+ATCACCGCAACAGCGGTTTGCTCAGTCACTGATACGCCGCCCTCCGTGATGGTATGTATT
+AATGCCAATAGCGCCATGAACCCCGTTTTTCAGGGCAACGGCAGGCTGTGCATTAATGTA
+CTTAACCATGAGCAGGAGCTGATGGCGCGCCACTTTGCCGGTATGACGGGGATGGCGATG
+GAGGAGCGTTTTCACCAGCCATGTTGGCAAAACGGGCCGCTGGGCCAGCCGGTACTTAAC
+GGCGCGCTGGCCAGTCTTGAAGGCGAGATCAGCGAGGTACAAACCATTGGCACGCATCTG
+GTGTATCTGGTGGCGATCAAAAATATTATTCTTAGCCAGGAGGGGCATGGCCTGATTTAT
+TTCAAACGCCGTTTTCATCCGGTCAGACTTGAGATGGAAGCGCCTGTTTAAATGAAGGGT
+ACTGTTTTCGCCGTTGCGTTAAACCATCGCAGCCAGCTTGATGCCTGGCAAGAGGCTTTC
+TCTCAGCCTCCCTATAATGCGCCGCCTAAAACCGCAGTGTGGTTCATCAAGCCGCGTAAT
+ACGGTGATTCGTCACGGCGAACCCATTCCTTATCCGCAGGGAGAAAAGGTACTGAGCGGC
+GCGACAGTGGCGCTCATTGTGGGGAAAACCGCCAGCCGGATACGCCCTGAAGCGGCGGCG
+GACTATATCGCCGGGTATGCGCTGGCTAACGAGGTCAGCCTGCCGGAAGAGAGCTTTTAT
+CGCCCGGCGATTAAAGCGAAATGTCGCGATGGCTTTTGCCCGCTGGGTGAAATGGCGCCG
+CTGAGTGATGTGGATAATCTCACCATTATCACTGAAATCAACGGACGAGAAGCGGACCAC
+TGGAATACTGCCGATTTACAGCGTAGCGCCGCACAACTGCTTAGCGCGTTAAGTGAGTTC
+GCTACACTTAACCCTGGCGATGCGATCTTACTTGGTACGCCGCAGAATCGCGTTGCGCTG
+CGTCCCGGCGATCGGGTGCGTATTCTGGCGAAAGGTTTACCCGCGCTGGAAAATCCGGTT
+GTCGCAGAAGATGAATTCGCCCGCCACCAGACGTTTACGTGGCCGCTGTCAGCGACGGGA
+ACGTTATTTGCGCTGGGGTTGAACTACGCCGATCACGCCAGCGAGCTGGCATTTACGCCG
+CCGAAAGAGCCGCTGGTATTTATCAAAGCGCCAAACACCTTTACCGAACATCACCAAACG
+TCGGTGCGCCCGAACAACGTCGAATATATGCACTACGAAGCCGAGCTGGTCGTGGTGATT
+GGCAAAACGGCGCGTAAGGTGAGCGAAGCCGAAGCCATGGAGTATGTGGCCGGTTACACC
+GTCTGTAACGACTACGCGATCCGCGACTATCTGGAAAACTACTACCGTCCGAATCTGCGG
+GTAAAAAGCCGCGACGGCCTGACGCCGATAGGCCCGTGGATTGTGGATAAAGAGGCGGTT
+TCTGATCCGCACAACCTGACGTTACGCACCTTTGTCAACGGTGAGCTGCGGCAGGAAGGG
+ACGACCGCCGATCTGATCTTCAGCATCCCGTTCCTGATTTCTTATCTGAGCGAATTTATG
+ACGTTGCAACCGGGCGACATGATTGCCACCGGTACGCCGAAAGGGCTGTCCGATGTGGTG
+CCGGGGGATGAAGTTGTCGTTGAAGTAGAAGGCGTGGGTCGCCTGGTTAACCGAATCGTC
+AGTGAGGAGAGCGCAAAATGAATGAAAAATGCTTTCAAAGACGCGTTAAAAGCGGGGCGC
+CCGCAAATCGGTTTGTGGCTGGGGCTTGCCAACAGTTACAGCGCTGAACTGTTAGCGGGC
+GCCGGCTTCGACTGGCTACTGATTGACGGTGAACACGCGCCAAACAACGTGCAGACGGTG
+TTGACCCAGTTGCAGGCGATTGCGCCTTATCCCAGCCAGCCGGTGGTGCGTCCGTCATGG
+AACGATCCGGTACAGATTAAGCAACTGCTCGACGTCGGCGCGCAAACGCTGCTGATACCG
+ATGGTGCAGAATGCCGATGAAGCGCGAAACGCCGTGGCGGCTACGCGTTATCCGCCTGCC
+GGTATTCGCGGCGTGGGCAGCGCGCTGGCGCGGGCATCGCGCTGGAATCGCATTCCGGAC
+TATCTCCACCAGGCCAACGACGCCATGTGCGTACTGGTGCAGATTGAAACGCGTGAGGCG
+ATGAGCAATCTGGCGTCAATTCTCGACGTGGATGGCATTGACGGCGTGTTTATTGGCCCG
+GCGGATCTCAGCGCCGATATGGGCTTTGCCGGCAATCCGCAGCACCCGGAAGTGCAGGCG
+GCGATTGAGAACGCCATCGTGCAGATACGCGCGGCGGGGAAAGCGCCGGGGATTCTGATG
+GCCAATGAAGCACTGGCGAAACGTTATCTGGAACTGGGGGCGCTATTTGTCGCCGTCGGC
+GTTGACACCACGCTGCTGGCGCGCGGAGCGGAGGCGCTGGCGGCGCGCTTTGGCGCAGAA
+AAAAAACTGTCCGGTGCGTCCGGCGTCTATTAAATGCATGATTCATTAACCATCGCCTTG
+CTTCAGGCGCGCGAAGCGGCAATGACCTATTTCCGCCCCATCGTTAAAAGCCACAATCTG
+ACCGACCAGCAATGGCGCATTGTGCGAATCCTGGCCGATAGCCCCTCTATGGATTTTCAC
+GAGCTGGCCTTTCGTACCTGTATTTTGCGTCCAAGTCTGACCGGAATATTGACGCGCATG
+GAGCGAGACGGACTGGTGTTGCGACTCAAGCCGGTTAACGATCAGCGTAAGTTATATGTC
+ATGTTGACGGAGCAGGGACAAACGTTGTACGCCCGTGCCCGGAGCGAGGTAGAAGAGGCT
+TATCGAAAAATTGAGGCCGATTTCACGCCCGAAAAAACACAGCAATTGATGCTGCTGCTG
+GACGATCTTATTGCTCTGGGGCGCCAGCATCCTGATAGCGAAGCGGAAGCATAGATGAGC
+GACACATCATCTGCACTTCCGGAAAGCCCCGAGTCTGTCGGTTCGCACAACGCGCTCAGC
+ACGGGTCAACAAACCGTCATAAATAAACTGTTCCGCCGACTGATCGTATTTTTATTCGTG
+TTGTTTATCTTCTCGTTTTTAGACCGTATCAACATCGGTTTTGCCGGGTTGACGATGGGG
+CAGGATCTGGGGTTAAGCGCCACCATGTTTGGTCTTGCCACGACGCTGTTTTACGCCACC
+TACGTCATTTTCGGCATTCCCAGCAACGTGATGTTGAGCATCGTCGGCGCCCGCCGCTGG
+ATTGCGACCATTATGGTGCTATGGGGCATTGCATCTACCGCCACGATGTTCGCGGTGGGA
+CCGAAAAGCCTGTATGTGCTGCGAATGCTGGTGGGCATTACCGAAGCGGGCTTTTTGCCA
+GGAATATTGCTCTATTTAACCTACTGGTTCCCGGCATTTTTCCGCGCCCGCGCCAACGCA
+TTATTTATGATTGCCATGCCGGCCACTACCGCGTTGGGGTCAATTGTCTCCGGCTATATT
+TTATCGCTGGACGGCATATTCAATCTGCATGGATGGCAGTGGTTATTCCTGTTGGAAGGA
+TTTCCGTCAGTTTTGTTAGGCATTATGGTCTGGTTTTACCTGGATGATACCCCGGCAAAA
+GCCAAATGGCTGACGGCAGAGGATAAAAAATGTTTGCAGGAGATGATGGATAATGATCGC
+CTGACGCTGGTTCAGCCTGAGGGGGCCATCAGCCATAACGCCATGCAGCAGCGTAGCCTG
+TGGCGCGAAGTATTCACGCCAATTGTACTGATGTATACGCTGGCCTATTTTTGCCTTACC
+AATACGCTTAGCGCCATTAGTATCTGGACGCCGCAAATCCTGAAAAGTTTTAATGAAGGC
+AGCAGCAATATCACCATCGGCCTGCTGGCGGCGATCCCGCAGATTTGTACTGTTCTGGGC
+ATGATTTACTGGAGCCGCCATTCGGACAAACATCAGGAGCGTAAACACCACACTGCGTTA
+CCGTTCCTGTTTGCCGCCGCGGGCTGGCTGCTGGCGTCGGCGACCGACCGTAACCTGATC
+CAGCTCCTGGGGATCGTGATGGCATCCACGGGTTCCTTTAGCGCGATGGCGATCTTCTGG
+ACCACGCCGGATCAGTCGATCAGTTTACGCGCCAGGGCGATAGGCATTGCGGTCATCAAT
+GCCACCGGCAATATTGGCTCCGCGCTCAGCCCGGTTATGATTGGCTGGCTAAAAGATATC
+ACTGGTAGCTTCAATAGCGGACTCTGGTTTGTCGCTTCTCTGTTAGTCGTCGGCGCCGCC
+ATTATCTGGCTCATTCCCATGAAAGCATCGCGTCCGCGCGCCACCCCTTGAATGGGCAAG
+TTAGCGTTAGCAGCAAAAATTACCCACGTGCCGTCGATGTATCTTTCTGAACTGCCAGGA
+AAAAATCACGGTTGTCGTCAGGCAGCCATTGATGGGCATATTGAAATTGGCAAGCGTTGC
+CGCGAAATGGGCGTTGACACCATTATCGTATTCGACACCCACTGGCTGGTGAATAGCGCT
+TACCACATTAATTGTGCCGACCATTTCCAGGGCGTCTATACCAGCAACGAATTGCCGCAC
+TTTATTCGCGACATGACCTATGACTATGACGGTAATCCGGCGCTCGGCCATCTGATCGCC
+GACGAGGCGGTCAAACTGGGCGTGCGCGCCAAAGCGCACAACATCCCGAGCCTGAAGCTG
+GAGTATGGCACGCTGGTGCCGATGCGCTACATGAACAGCGACAAGCACTTCAAAGTGGTC
+TCCATCTCGGCGTTCTGCACTGTGCATGATTTTGCCGACAGCCGCAAACTGGGCGAAGCC
+ATTCTCAAGGCGATTGAGAAATATGACGGTACCGTAGCGGTATTCGCCAGTGGTTCTCTG
+TCGCACCGTTTTATTGACGACCAACGGGCGGAAGAGGGGATGAACAGCTACACCCGCGAG
+TTCGATCATCAAATGGACGAGCGCGTGGTCAAGCTGTGGCGCGAAGGCAAATTCAAGGAG
+TTTTGCACCATGTTGCCGGAGTACGCCGACTACTGCTACGGCGAAGGCAACATGCACGAC
+ACGGTCATGCTACTGGGAATGCTGGGGTGGGACAAATACGACGGCAAGGTGGAGTTCATC
+ACCGACCTGTTCGCCAGCTCCGGTACCGGCCAGGTAAACGCTGTTTTCCCGCTGCCTGCG
+TAAATGAAGAAAATAAATCATTGGATTAACGGCAAAAACGTTGCAGGTAACGACTACTTC
+CAGACCACTAACCCGGCGACCGGTGATGTGCTGGCGGAAGTAGCCTCCGGCGGTGAAGCA
+GAAGTGAACCAGGCTGTCGCGGCGGCAAAAGAGGCGTTCCCGAAATGGGCCAACCTGCCG
+ATGAAAGAGCGCGCGCGCCTGATGCGCCGCCTTGGCGACCTGATTGACCAGCATGTGCCG
+GAAATCGCGGCGATGGAAACCGCCGACACCGGCCTGCCTATTCACCAGACTAAAACGTGC
+TGAGTGCTGATCCCGCGCGCCTCGCATAACTTCGAATTCTTCGCCGAAGTGTGCCAGCAG
+ATGAACGGCAAGACCTATCCGGTTGACGATAAAATGCTCAATTATACGCTGGTGCAGCCC
+GTCGGCGTCTGCGCGCTGGTGTCGCCGTGGAACGTGCCGTTTATGACCGCGACTTGGAAA
+GTTGCGCCGTGCCTGGCGCTGGGTAACACCGCGGTGCTCAAAATGTCCGAGCTGTCGCCG
+CTGACTGCCGACAGGCTGGGCGAGCTGGCACTGGAGGCAGGAATTCCGGCAGGCGTGCTG
+AACGTGGTGCAGGGCTACGGCGCGACGGCGGGCGATGCGCTGGTACGCCACCATGACGTG
+CGTGCGGTGTCGTTTACCGGCGGTACCGCCACCGGTCGCAATATCATGAAAAATGCCGGG
+CTGAAAAAATACTCGATGGAGCTGGGCGGCAAATCGCCGGTGCTGATTTTTGAAGACGCC
+GACATTGAGCGCGCGCTGGACGCCGCGCTGTTCACCATCTTCTCGATCAACGGCGAACGC
+TGCACCGCTGGGTCGCGCATCTTTATCCAGCAGAGCATTTACCCTGAGTTCGTGAAGCGC
+TTTGCCGAACGCGCGAATCGCCTGCGTGTCGGCGATCCGACCGACCCGAACACCCAGGTC
+GGCGCGCTGATTAGCCAACAGCACTGGGAGAAAGTCTCCGGTTATATCCGCCTCGGCATT
+GAAGAGGGGGCAACGCTGCTGGCGGGCGGTGCGGAAAAACCCACTGACCTGCCTGCGCAT
+CTGAAAGGCGGTAACTTCCTGCGCCCAACCGTGCTGGCCGATGTCGACAACCGTATGCGC
+GTTGCGCAGGAAGAGATCTTTGGGCCGGTCGCCTGCCTGCTGCCATTCAAAGACGAAGCG
+GAAGGGTTACGTTTGGCGAACGATGTGGAATACGGTCTGGCCTCTTATATCTGGACCCAG
+GACGTGAGCAAAGTGTTGCGCCTGGCGCGTGGGATTGAAGCCGGCATGGTCTTCGTCAAC
+ACCCAGAACGTCCGCGACCTGCGCCAGCCGTTCGGCGGCGTGAAAGCCTCCGGTACCGGG
+CGCGAAGGCGGCGAATATAGCTTCGAAGTGTTTGCGGAAATGAAAAACGTCTGCATCTCA
+ATGGGCGACCATCCTATCCCAAAATGGGGAGTTTGAATGCCGCACTTTATTGCTGAATGT
+ACTGAAAATATTCGCGAGCAGGCTGATTTACCAAGCCTGTTCAGCAAGGTAAACGAGGCG
+CTGGCCGCCACCGGGATTTTCCCCATCGGCGGTATCCGCAGTCGCGCCCACTGGCTGGAT
+ACCTGGCAGATGGCTGACGGTAAGCATGATTACGCGTTTGTGCATATGACGCTGAAAATC
+GGCGCCGGGCGCAGCCTGGAGAGCCGTCAGGAAGTCGGCGAAATGCTGTTTGGGCTGATT
+AAAGCCCACTTCGCCGACCTGATGGAGAACCGCTATCTGGCGCTGTCGTTTGAGATTGCC
+GAGTTACATCCAACGCTCAATTACAAACAAAACAACGTACACGCGTTATTTAAATAGATG
+CTCGATAAACAGACCCATACCCTGATCGCTCAGCGACTTAATCAGGCTGAAAAACAGCGT
+GAACAGATTCGCGCAGTGTCGCTGGATTATCCCAACATCACTATTGAAGATGCCTATGCC
+GTACAGCGTGAATGGGTCAATATCAAGATTGCCGAAGGGCGCACGCTCAAAGGCCACAAA
+ATCGGCCTGACCTCAAAAGCGATGCAGGCCAGCTCGCAAATCAGCGAACCGGATTACGGC
+GCGCTGCTTGACGATATGTTCTTCCATGACGGCGGAGATATCCCCACCGACCGTTTTATC
+GTCCCGCGTATTGAAGTGGAGCTGGCGTTCGTGCTGGCGAAACCGCTGCGCGGCCCTCAC
+TGCACGCTGTTCGACGTCTACAACGCCACGGATTATGTGATTCCGGCGCTGGAACTGATT
+GACGCCCGCAGCCACAACATCGACCCGGAAACCCAGCGCCCGCGCAAAGTGTTCGACACC
+ATTTCCGACAACGCCGCCAACGCCGGGGTGATCCTCGGTGGTCGCCCCATCAAACCAGAC
+GAGCTGGATCTGCGCTGGATCTCCGCGCTGCTCTATCGCAACGGCGTGATCGAAGAAACC
+GGCGTCGCCGCAGGCGTGCTGAATCATCCGGCCAACGGCGTGGCGTGGCTGGCGAACAAG
+CTTGCCCCCTACGATGTCCAGCTTGAAGCCGGGCAGATCATCCTCGGCGGCTCGTTCACC
+CGCCCGGTGCCGGCGCGCAAGGGCGACACCTTCCATGTCGATTACGGCAACATGGGCGCG
+ATCAGTTGCCGGTTTGTGTAAATGAGCTCTGTACCCGCGCCGCGTGAATATTTTCTTGAC
+TCTATCCGCGCATGGCTGATGTTGTTAGGGATTCCCTTTCATATCTCGTTGATCTATTCC
+ACTCACAGTTGGCATGTCAATAGCGCCGCGCCATCGTGGTGGCTAACCCTGTTTAACGAT
+TTTATCCACGCTTTTCGTATGCAGGTGTTTTTTGTTATTTCTGGTTATTTTTCGTACATG
+TTATTTTTACGTTATCCATTAAAACACTGGTGGAAAGTACGGGTAGAACGTGTGGGTATT
+CCCATGCTTACCGCAATCCCTTTGCTTACCTTGCCGCAATTTATCCTGTTGCAATATGTC
+AAAGAGAAAACAGAGAACTGGCCTACACTCTCTGCCTATGAAAAATATAATACGTTAGCG
+TGGGAACTCATTTCACATCTGTGGTTTTTACTGGTGCTGGTGATATTAACCACCGTCAGC
+ATCGGGATTTTTACCTGGTTCCAAAAAAGGCAGGAAACAAGCAAGCCTCGTCCCGCCGCT
+ATTTCGCTGGCCAAACTTTCGCTTATTTTTTTCCTGCTGGGGGTGGCGTACGCTGCTATC
+AGGCGCATTATATTCATCGTATATCCGGCAATCCTCAGTGACGGCATGTTCAATTTTATT
+GTGATGCAAACGCTATTTTATGTGCCGTTTTTTATTCTCGGCGCGTTGGCCTTCATTCAC
+CCCGATCTGAAAGCGCGCTTCACCACGCCCTCACGCGGATGCACTTTAGGCGCTGCCGTT
+GCTTTTATCGCGTATCTGCTGAATCAACGTTATGGGAGCGGCGACGCCTGGATGTACGAA
+ACCGAATCCGTGATTACGATGGTAATGGGGCTATGGATGGTGAACGTGGTATTTTCACTG
+GGGCATCGCTTGTTAAACTTTCAGTCCGCGCGTGTCACCTATTTCGTGAATGCTTCGCTG
+TTTATTTATCTGGTGCATCATCCCTTAACGCTTTTCTTTGGCGCGTATATTACACCGCAT
+ATCTCCTCCAACCTGATCGGGTTCTTGTGCGGGCTGATATTTGTTATGGGTATTGCGTTA
+ATTCTGTATGAAATTCATTTACGCATCCCGCTCCTGAAATTTCTCTTTTCAGGTAAACCG
+CCGGTAAAACAAGAAAGCCGCGCCGCGATCGGGTAGATGAAACATAAACGACAAATGATG
+AAAATGCGTTGGTTGGGCGCAGCTATTATGTTAACGCTCTACGCATCATCGAGCTGGGCG
+TTCAGTATTGATGACGTGGCAAAACAAGCTCAATCTTTAGCCGGGAAAGGCTATGAGGCG
+CCTAAAAGCAACTTGCCCTCCGTTTTCCGCGACATGAAATATGCGGATTATCAGCAGATC
+CAGTTTAACAGCGATAAAGCCTACTGGAACAACTTAAAGACCCCTTTTAAGCTCGAATTT
+TACCATCAGGGGATGTACTTCGATACGCCGGTCAAGATTAACGAAGTGACGGCGACGACG
+GTCAAAAGAATCAAATACAGCCCGGATTACTTCAATTTTGGCAATGTTCAGCACGATAAA
+GACACGGTAAAAGATTTAGGCTTCGCCGGGTTCAAAGTCCTGTACCCCATTAACAGTAAA
+GATAAGAACGACGAAATCGTCAGTATGCTTGGCGCCAGCTATTTCCGCGTTATCGGCGCA
+GGCCAGGTGTATGGCTTATCTGCGCGCGGCCTGGCGATTGATACCGCCTTACCATCTGGT
+GAAGAGTTTCCCCGCTTTCGCGAGTTCTGGATTGAGCGTCCAAAACCCACCGATAAGCGT
+TTGACCGTCTATGCATTACTGGATTCTCCGCGCGCGACCGGCGCTTACCGTTTTGTGATC
+ATTCCTGGCCGCGATACCGTGGTGGACGTGCAGTCAAAAGTCTATCTGCGCGATAAGGTG
+GGCAAGCTGGGCGTTGCGCCATTAACCAGTATGTTCCTGTTTGGGCCAAACCAGCCGTCG
+CCGACGACCAACTATCGTCCGGAATTGCATGACTCGAACGGCTTATCCATTCATGCGGGT
+AATGGCGAGTGGATTTGGCGTCCGCTGAACAATCCAAAACACCTCGCTGTGAGCAGCTAT
+GCGATGGAAAACCCTCAGGGATTCGGCCTGTTGCAGCGTGGTCGCGAGTTCTCGCGCTTT
+GAAGATTTAGACGATCGCTATGACCTGCGTCCAAGCGCCTGGATTACCCCGAAAGGCGAC
+TGGGGCAAAGGTAAGGTTGAACTGGTTGAAATTCCGACCAATGATGAAACCAACGATAAC
+ATCGTCGCTTACTGGACTCCGGATCAACTGCCGGAACCGGGTAAAGAGATGAACTTCAAG
+TACACTCTGACCTTCAGCCGCGATGAAGATAAACTTCATGCGCCGGATAATGCCTGGGTG
+CTGCAAACACGCCGCTCAACGGGCGACGTTAAACAGTCGAATCTGATTCGCCAGCCCGAC
+GGCACTATTGCCTTTGTGGTGGATTTCGTTGGCGCCGACATGAAAAAACTGCCGCCGGAT
+ACGCCCGTCGCTGCACAAACCAGCATTGGCGATAACGGTGAAATCGTTGACAGTAATGTA
+CGCTATAACCCAGTCACTAAAGGCTGGCGTTTAATGCTGCGCGTGAAAGTCAAAGACGCG
+AAGAAAACCACGGAAATGCGTGCCGCATTGGTGAATGCCGATCAGACGCTAAGTGAAACC
+TGGAGCTACCAGTTACCTGCCAATGAATAAATGAATAAAACAACTGAGTATATTGACGCA
+CTGCTGCTTTCTGAACGTGAGAAAGCGGCATTGCCGAAAACTGACATCCGCGCCGTGCAT
+CAGGCGCTGGATGCCGAGCATCGGACTTACTCGCGAGAAGACGATTCACCGCAGGGTTCC
+GTAAAAGCCCGCCTTGAACACGCCTGGCCGGATTCATTGGCGAAGGGGCAGTTAATTAAA
+GATGATGAAGGGCGCGATCAGTTGCAGGCTATGCCAAAAGCGACGCGCTCTTCGATGTTT
+CCTGATCCCTGGCGAACCAACCCGGTTGGCCGTTTCTGGGATCGCCTGCGTGGGCGGGAT
+GTAACGCCGCGCTATGTTTCTCGTCTGACAAAAGAAGAGCAGGCGAGTGAGCAAAAATGG
+CGTACCGTCGGCACTATACGCCGCTATATTTTGTTAATTTTGACTCTGGCGCAAACCGTC
+GTCGCGACCTGGTATATGAAGACCATTCTGCCCTATCAGGGATGGGCGCTCATCAATCCT
+ATGGATATGGTGGGGCAGGATATTTGGGTCTCCTTTATGCAGCTCCTGCCCTACATGCTG
+CAAACCGGTATCCTGATTTTGTTTGCCGTGCTGTTCTGCTGGGTGTCTGCCGGATTCTGG
+ACGGCGCTGATGGGCTTCCTGCAACTGCTTATCGGGCGCGATAAGTACAGTATCTCCGCG
+TCTACGGTTGGCGATGAGCCCCTCAATCCGGAACACCAGACGGCGCTGATCATGCCTATC
+TGTAATGAAGACGTTAGCCGCGTTTTCGCCGGTCTGCGCGCGACCTGGGAGTCCGTTAAA
+GCCACAGGCAACGCCGCGCATTTTGACGTCTATATCCTTAGCGATAGTTATAACCCGGAT
+ATCTGCGTGGCGGAGCAAAAGGCGTGGATGGAGCTCATCGCGGAAGTGCAGGGCGAAGGC
+CAAATTTTTTACCGTCGCCGCCGCCGCCGTATGAAACGCAAAAGCGGCAATATTGACGAT
+TTTTGCCGCCGCTGGGGCAATCAGTACAGCTATATGGTGGTGCTGGACGCGGACTCAGTG
+ATGAGCGGCGAGTGTCTGAGCGGGCTGGTGCGCCTGATGGAAGCGAACCCTAACGCCGGG
+ATTATCCAGTCTTCGCCGAAAGCGTCGGGGATGGATACTCTGTATGCCCGCTGCCAACAG
+TTCGCGACCCGTGTTTATGGACCGCTGTTTACCGCCGGGCTGCACTTCTGGCAGTTGGGG
+GAGTCGCACTACTGGGGGCACAATGCCATTATCCGCGTGAAGCCGTTTATCGAGCACTGC
+GCTCTGGCGCCGCTGCCGGGAGAAGGTTCGTTCGCCGGATCGATTCTTTCCCACGACTTT
+GTGGAGGCGGCGCTAATGCGTCGGGCAGGGTGGGGCGTCTGGATTGCCTACGATCTCCCC
+GGCTCCTATGAAGAGCTGCCGCCAAACCTGCTGGATGAGCTTAAACGCGACCGCCGCTGG
+TGTCACGGCAACCTGATGAACTTTCGTCTGTTCCTGGTGAAAGGAATGCACCCGGTGCAT
+CGCGCCGTGTTCCTGACCGGGGTAATGTCATACCTGTCCGCGCCGTTATGGTTTATGTTC
+CTCGCGCTTTCTACCGCGCTGCAGGTCGTTCATGCGTTAACAGAGCCGCAATATTTCCTT
+CAGCCGCGCCAGCTTTTTCCGGTCTGGCCGCAGTGGCGTCCGGAACTGGCAATCGCGCTG
+TTTGCGTCAACGATGGTGCTGCTGTTCCTGCCGAAGCTGCTCAGTATTATGCTGATCTGG
+TGTAAAGGCACCAAAGAGTATGGCGGTTTCTGGCGCGTTACGCTGTCGCTATTGCTGGAA
+GTGCTGTTCTCCGTGTTGCTGGCGCCGGTGCGTATGCTGTTTCATACCGTGTTTGTGGTC
+AGCGCGTTCCTCGGCTGGGAAGTGGTCTGGAACTCACCGCAACGCGACGATGATTCTACG
+CCGTGGGGAGAAGCCTTTATGCGTCACGGCTCTCAACTGCTGCTGGGGCTGGTCTGGGCG
+GTGGGTATGGCGTGGCTGGATTTACGCTTTCTGTTCTGGCTGGCGCCGATTGTCTTTTCG
+CTGATTCTGTCGCCATTTGTTTCGGTGATCTCCAGTCGTTCAACGGTAGGATTACGCACC
+AAACGCTGGAAGCTGTTCCTGATCCCGGAAGAGTATTCGCCGCCTCAGGTGTTGGTCGAT
+ACCGATAAATATCTGGAGATGAATCGCCGCCGTATTCTGGACGATGGCTTTATGCATGCG
+GTATTTAACCCGTCGCTTAATGCGCTGGCGACCGCGATGGCCACCGCGCGTCACCGCGCC
+AGTAAGGTGCTGGAAATAGCCCGCGATCGTCATGTGGAGCAGGCGCTAAACGAAACGCCG
+GAGAAACTGAACCGCGATCGGCGTCTGGTTTTGCTCAGCGATCCGGTGACGATGGCGCGT
+TTACACTATCGGGTCTGGAATGCGCCAGAGAGATACTCTTCCTGGGTAAACCATTATCAG
+TCTCTCGTCCTGAATCCGCAGGCGTTGCAGGGACGAACATCGTCAGCGGGAATGTCGCGC
+GTCTCGCAGGCGAGGAACCTGGGTAAATATTTTCTTCTCATCGATAACATGTTAGTGGTG
+CTGGGTTTTTTCGTCGTCTTCCCGCTCATCTCTATTCGCTTTGTCGATCAAATGGGGTGG
+GCTGCCGTAATGGTAGGGATCGCGCTCGGCCTGCGTCAGTTTATTCAACAAGGTCTGGGC
+ATTTTTGGCGGCGCCATCGCCGATCGCTTTGGCGCGAAACCGATGATTGTCACCGGTATG
+CTGATGCGCGCCGCAGGCTTTGCCACCATGGGTATCGCGCATGAGCCCTGGCTCTTGTGG
+TTTTCCTGCTTTCTTTCCGGTCTCGGCGGTACGCTTTTCGACCCGCCGCGTTCAGCGCTG
+GTGGTCAAATTAATTCGTCCGGAGCAACGGGGCCGCTTCTTCTCTCTGTTGATGATGCAG
+GACAGCGCGGGCGCGGTGATTGGCGCGCTGCTGGGAAGCTGGTTGCTACAATACGATTTT
+CGCCTGGTCTGCGCGACGGGCGCTATTTTGTTCATATTATGCGCCCTTTTCAACGCATGG
+CTGCTTCCGGCCTGGAAGCTATCAACGGCCAGAACGCCGGTGCGTGAAGGAATGCGCCGC
+GTCATGAGCAATAAAAGGTTTGTCACCTACGTGCTGACGCTGGCGGGCTACTATATGCTG
+GCGGTACAGGTCATGTTAATGCTGCCGATTATGGTAAACGATATCGCCGGTTCGCCTGCT
+GCCGTGAAATGGATGTACGCTATTGAGGCGTGTCTCTCGCTGACGTTGCTCTACCCGATT
+GCCCGCTGGAGCGAAAAGCGTTTTCGGCTGGAGCATCGGCTGATGGCCGGTTTGCTCGTC
+ATGTCGCTGAGCATGCTCCCCATCGGGATGGTGGGCAATTTACAGCAGCTTTTTACGCTT
+ATTTGCGCTTTCTACATCGGCTCGGTTATCGCCGAACCGGCGCGCGAAACGCTCAGCGCG
+TCGCCCGCGGACGCGAGGGCGCGGGGAAGCTATATGGGCTTTAGCCGTCTGGGATTAGCC
+ATTGGCGGCGCGATTAGTTATATCGGCGGCGGCTGGTTGTTTGATATGGGTAAAGCGCTT
+GCGCAGCCTGAACTACCGTGGATGATGCTCGGTATTATCGGCTTTATCACCTTTTTGGCT
+TTAGGCTGGCAATTTAGTCATAAGCGCACGCCGCGCCGGATGCTGGAACCCGGCGCCATG
+ACCATGTATGCCACGCTGGAAGAAGCTATCGATGCAGCCCGGGAAGAATTTCTGGCTGAC
+CATCCAGGCCTCGAACAAGACGAAGCGAATGTGCAGCAGTTCAACGTTCAGAAATATGTA
+CTGCAGGATGGGGACATCATGTGGCAGGTCGAATTTTTCGCCGATGAAGGTGAAGATGGC
+GAATGTCTGCCGATGCTGAGTGGTGAAGCCGCACAGAGCGTGTTTGACGGCGATTATGAT
+GAGATAGAGATCCGCCAGGAATGGCAGGAAGAGAATACTTTGCATGAATGGGATGAAGGG
+GAATTCCAGCTTGAACCCCCGCTTGATACCGAGGAAGGCCGTACTGCGGCAGACGAATGG
+GATGAGCGTATGTCACTATTAGCCAGGCTGGAACAAAGTGTACACGAAAACGGTGGGCTG
+ATTGTCTCATGCCAACCGGTACCAGGCAGCCCTATGGATAAACCTGAAATTGTGGCTGCA
+ATGGCACAGGCAGCGGCTTCGGCGGGTGCGGTCGCTGTGCGCATTGAAGGCATTGAGAAT
+CTGCGGACTGTTCGTCCCCATCTTTCTGTTCCTATTATTGGGATAATTAAACGTGACCTT
+ACAGGGTCGCCAGTCCGTATCACTCCATATTTACAGGATGTTGACGCCCTGGCGCAGGCA
+GGTGCCGATATTATCGCTTTTGATGCCTCATTCCGCTCTCGCCCGGTTGATATTGATAGT
+TTACTGACACGTATTCGCCTGCATGGATTACTGGCGATGGCAGACTGTTCAACCGTGAAT
+GAAGGCATAAGTTGCCATCAGAAAGGAATCGAATTCATTGGTACAACACTGTCTGGCTAT
+ACCGGTCCCATCACGCCGGTTGAGCCAGATTTGGCAATGGTGACACAACTGAGTCATGCA
+GGTTGTCGTGTTATTGCCGAGGGGCGCTATAACACGCCTGCACTGGCGGCCAATGCTATT
+GAGCATGGTGCCTGGGCAGTTACCGTTGGTTCCGCTATCACCCGTATCGAGCATATCTGT
+CAGTGGTTCAGTCACGCAGTAAAACGCTGAATGAAAAATTTTAAGAAAATGATGACGCTA
+ATGGCGCTATGTTTATCAGTTGCTATCACCACATCAGGATATGCAACCACGCTTCCTGAT
+ATACCAGAACCACTGAAAAATGGTACTGGCGCTATTGATAATAATGGCGTGATTTATGTC
+GGCTTAGGTACCGCAGGGACATCCTGGTATAAAATTGATCTTAAAAAGCAACATAAAGAC
+TGGGAGCGTATAAAGTCGTTTCCTGGTGGAGCTCGTGAGCAATCCGTGTCGGTATTTTTA
+AATGATAAGCTGTATGTTTTTGGTGGCGTAGGGAAAAAAAACAGTGAATCACCGTTGCAG
+GTTTATAGCGATGTGTACAAATACTCACCGGTGAAAAATACATGGCAAAAAGTTGATACT
+ATATCTCCAGTTGGATTAACAGGGCATACGGGAGTAAAATTAAACGAAACGATGGTACTT
+ATTACCGGAGGGGTTAATGAGCATATCTTTGATAAGTATTTTATTGATATAGCGGCTGCG
+GATGAAAGTGAAAAAAATAAAGTCATCTATAATTATTTTAATAAACCTGCCAAAGATTAT
+TTTTTTAATAAAATCGTATTTATCTACAATGCTAAAGAGAACACATGGAAGAATGCCGGT
+GAGCTGCCAGGCGCGGGGACGGCAGGATCGTCATCGGTAATGGAAAATAATTTCTTGATG
+CTGATTAATGGTGAGCTCAAACCGGGTTTACGTACCGATGTGATTTACCGCGCCATGTGG
+GATAACGATAAGCTAACATGGTTGAAGAACAGCCAGTTACCGCCATCGCCTGGAGAACAA
+CAGCAGGAAGGGTTGGCCGGAGCATTTTCGGGCTATAGCCACGGTGTCCTGCTTGTCGGT
+GGTGGCGCGAATTTTCCGGGAGCAAAACAAAATTATACTAATGGAAAGTTTTATTCCCAC
+GAAGGGATAAATAAAAAATGGCGAGATGAAGTCTATGGTTTGATTAATGGCCATTGGCAA
+TATATGGGTAAAATGAAACAACCTCTCGGCTATGGTGTATCAGTAAGTTATGGTGATGAA
+GTTTTCCTTATTGGTGGTGAAAATGCTAAAGGGAAACCTGTTTCGTCTGTAACCTCCTTT
+ACCATGCGTGATGGTAATTTATTAATAAAATAAGTGATAGCAAAATTCTTCCCGTGGTAT
+AGCGAGATAACACGTCCACAAAAAAATGCTTTATTTTCAGCATGGCTGGGTTACGTTTTT
+GATGGCTTCGACTTTATGCTGATTTTCTACATTATGTATCTGATCAAGGCTGACTTAGGA
+TTGACAGATATGGAGGGCGCATTCCTTGCCACAGCGGCCTTTATTGGGCGACCATTTGGC
+GGGGCGCTATTTGGTCTGCTGGCAGACAAATTTGGCCGTAAGCCGTTAATGATGTGGTCG
+ATAGTTGCCTATTCTGTAGGTACAGGGTTAAGTGGCCTGGCTTCCGGTGTAATTATGCTG
+ACGCTTAGTCGTTTTATTGTCGGTATGGGGATGGCGGGGAAGTATGCTTGCGCTTCTACT
+TATGCCGTGGAAAGTTGGCCAAAGCATTTAAAATCTAAAGCGAGCGCATTTCTGGTTTCA
+GGTTTCGGTATTGGTAACATCATAGCAGCCTATTTTATGCCGTCATTTGCCGAAGCGTAT
+GGTTGGCGTGCTGCTTTTTTTGTCGGTTTGCTACCCGTTCTTTTAGTAATCTACATCCGG
+GCCAGGGCTCCTGAATCTAAAGAGTGGGAAGAAGCCAAACTCAGTGGTCTCGGAAAGCAT
+TCACAAAGTGCCTGGTCAGTTTTCTCTTTGTCAATGAAAGGGCTATTTAATCGAGCTCAA
+TTTCCACTGACATTATGTGTATTTATTGTTCTGTTCTCTATTTTCGGCGCAAACTGGCCG
+ATCTTTGGTCTACTGCCTACATATTTGGCGGGAGAGGGCTTTGATACGGGCGTGGTCTCT
+AATTTAATGACGGCGGCGGCATTCGGCACTGTATTGGGAAATATCGTTTGGGGTCTGTGC
+GCAGATAGAATTGGTTTGAAGAAAACGTTCAGCATTGGTCTTCTCATGTCCTTTTTATTC
+ATTTTCCCGTTATTCAGAATTCCGCAAGATAATTATTTACTGCTGGGCGCATGTTTATTC
+GGTTTAATGGCGACTAACGTAGGTGTTGGCGGGCTGGTTCCCAAATTTCTCTACGACTAC
+TTTCCTCTTGAGGTTCGTGGTTTGGGTACCGGGCTTATTTATAATCTTGCTGCGACATCA
+GGCACATTCAATTCAATGGCGGCGACCTGGCTTGGAATAACAATGGGGCTAGGCGCTGCG
+CTAACGTTCATTGTTGCTTTCTGGACCGCAACAATTCTACTCATTATTGGCCTATCCATT
+CCGGATAGACTAAAAGCACGTCGTGAAAGGTTTCAGTCAACAAAAGAATTTTAAATGAAA
+AAGTATCTTGCTTTCGCCGTTACGCTGCTGGGTATGGGTAAAGTCATCGCCTGTACTACC
+CTTTTGGTAGGCAATCAGGCTTCGGCTGACGGCTCCTTTATTATCGCGCGCAACGAGGAT
+GGCTCGGCAAATAACGCCAAGCATAAGGTTATTCATCCCGTCGCGTTTCATCAACAAGGC
+GAGTATAAAGCACATCGCAACAATTTTAGCTGGCCGCTTCCGGAGACAGCGATGCGCTAT
+ACGGCGATTCATGACTTTGATACTAACGATAACGCCATGGGTGAAGCCGGTTTCAATTCG
+GCGGGCGTCGGAATGAGCGCAACGGAAACCATTTACAACGGCAGAGCGGCGCTGGCTGCC
+GATCCTTACGTGACAAAAACGGGAATCACGGAAGACGCCATTGAGTCCGTGATCCTGCCA
+GTGGCGCAATCGGCGCGTCAGGGCGCCAAATTACTGGGAGATATTATTGAACAAAAAGGC
+GCGGGCGAAGGTTTCGGCGTCGCGTTTATTGATAGCAAAGAGATATGGTATCTGGAGACG
+GGAAGCGGACATCAATGGCTGGCAGTACGACTTCCGGCAGATAGCTATTTCGTTTCCGCC
+AATCAGGGACGTTTACGCCATTACGATCCGAATGATAACGCGAATTATATGGCGTCACCA
+ACGTTAGTAAGCTTTGCGAAAAAGCAGGGATTATATGATCCGGCCCGCGGCGAATTCGAC
+TTTCATCAAGCCTATTCGCAGGATAACAAAAACGATACCACCTATAATTATCCGCGCGTC
+TGGACGCTACAACACCAGTTTAATCCGCATCTGGATACGGTCGTTAGCGAAGGGGAAACA
+TTTTCTGTTTTTTTAACGCCAATAACGAAGATCAGCGTGGCGGCAGTAAAAAACGCGTTA
+CGCAATCACTATCAGGGAACGTCGCACGACCCTTATGCCAGTCATAATCCACAAGAACCA
+TGGCGACCTATATCCGTTTTTCGTACCCAGGAGTCACATATTTTACAGGTCAGACCGAAA
+TTACCGCAGGCTATCGGCAACGTAGAATACATCGCCTATGGAATGCCATCTCTTAGCGTC
+TATCTCCCCTATTACCAGGGGATGCGTCATTATCAACCCGGAGATGATAAAGGAACCGAT
+CGGGCGAGCAACGACTCTACCTACTGGACATTCCGCACGCTGCAAACACTGGTTATGCAA
+GACTACAATACGTTTGCGCCAGATGTGCAACATGCCTGGAAAACATTTGAACAGCAAACA
+GCTAAGCAACAGTATAAGATGGAGCAGAGCTATCTGAGATTATATGCGTCGCATCCGAAA
+GAAGCACAACGCTTACTGCAAAATTTTGAAGATAAAACGATGCAAAATGCGCAGACGCTC
+GCCCGTCGCCTGACCAATAATATTATTACGACAATGACTTACCGCACAGATATGAAATAT
+CACTTTTCAAGTACGCAGCCATAAATGGGAAGACAAAAAGCAGTGATCAAAGCTCGTCGT
+GAAGCAAAGCGTGTGTTGAGACGAGATTCGCGTAGTCATAAGCAACGTGAAGAAGAATCG
+GTCACGTCACTGGTACAGATGGGCGGAGTAGAAGCCATTGGCATGGCGCGCGATAGTCGC
+GATACCTCTCCTGTTAAGGCGCGAAATGAAGCACAGGCGCATTATCTGAACGCTATCGAC
+AGTAAACAGCTTATTTTTGCGACCGGCGAAGCCGGCTGCGGAAAAACATGGATCAGTGCG
+GCAAAGGCGGCAGAAGCATTGATTCATAAGGACGTCGAGAGGATCATTGTGACGCGTCCG
+GTATTGCAGGCTGATGAAGATCTTGGTTTTTTGCCCGGTGATATCGCTGAAAAATTCGCG
+CCTTATTTTCGTCCCGTCTACGATGTCCTGCTTAAACGGTTGGGCGCGTCCTTTATGCAA
+TATTGTTTGCGCCCGGAAATCGGTAAGGTAGAAATTGCCCCGTTCGCCTATATGCGTGGG
+CGTACTTTTGAAAATGCGGTCGTGATCCTCGACGAGGCGCAAAATGTGACTGCGGCGCAA
+ATGAAAATGTTTTTGACGCGATTAGGCGAAAATGTCACGGTCATTGTCAATGGCGATATT
+ACGCAATGCGACCTGCCGCGCGGTGTGCGTTCCGGGTTGAGTGATGCGTTGGAACGCTTT
+GAAGAAGATGAAATGGTGGGGATTGTGCATTTCAACAAAGACGACTGCGTGCGCTCGGCG
+CTTTGTCAGCGAACGCTCCACGCATACAGCTAAATGGGAACCACCACGATGGGGGTTAAG
+CTGGACGACGCCACGCGCGAACGGATCAAAATGGCCGCGTCGCGTATCGATCGCACGCCG
+CACTGGTTAATAAAACAGGCAATCTTTAGCTATCTGGACAAGCTGGAAAATAGCGATACG
+CTACCGGAGCTACCTGCGCTGTTTGCCGGCGCGGCAAATGAAAGCGAGGAGCCGGTCGCG
+CCGCAGGATGAGCCGCATCAGCCCTTTCTGGAGTTTGCCGAACAGATTCTTCCCCAATCC
+GTCTCTCGCGCCGCCATCACCGCCGCCTGGCGCCGCCCGGAAACCGATGCGGTGTCAATG
+CTAATGGAACAGGCGCGCCTGTCGCCGCCTGTCGCTGAGCAGGCGCATAAACTGGCGTAT
+CAACTGGCGGAGAAATTGCGCAATCAAAAATCCGCCAGCGGTCGCGCGGGTATGGTGCAA
+GGCCTGTTGCAGGAGTTTTCCCTCTCTTCGCAAGAAGGCGTAGCGCTGATGTGTCTGGCG
+GAAGCGCTGCTGCGTATTCCCGACAAAGCTACGCGCGATGCGTTAATTCGCGACAAAATC
+AGTAATGGCAACTGGCAGTCGCATATTGGCCGTAGCCCGTCGCTGTTTGTAAACGCCGCC
+ACCTGGGGGCTGCTCTTTACCGGCCGACTGGTCTCAACGCATAACGAAGCCAATCTTTCG
+CGCTCGCTGAACCGCATTATCGGCAAGAGCGGCGAACCGTTAATCCGCAAAGGCGTCGAC
+ATGGCGATGCGTTTAATGGGCGAGCAGTTCGTGACTGGCGAAACCATTGCTCAGGCGCTG
+GCGAATGCCCGAAAACTGGAAGAGAAAGGGTTCCGCTATTCTTACGATATGCTGGGCGAA
+GCCGCGTTAACCGCCGCCGATGCGCAGGCCTATATGGTCTCTTACCAGCAAGCGATTCAT
+GCCATCGGCAAAGCGTCTAACGGTCGCGGTATTTACGAAGGGCCAGGCATCTCGATTAAG
+CTGTCCGCCCTGCATCCACGCTATAGTCGCGCGCAATACGATCGGGTAATGGAGGAGCTT
+TATCCGCGCCTGAAATCCCTGACGCTGCTGGCGCGCCAGTATGATATCGGTCTCAATATC
+GACGCCGAAGAGGCGGATCGTCTGGAGATCTCGCTTGATCTGCTGGAAAAACTCTGCTTC
+GAACCCGAACTGGCGGGCTGGAACGGCATTGGCTTTGTGATTCAGGCTTACCAGAAACGC
+TGCCCGCTGGTCATTGATTATTTAGTCGATCTGGCCTCCCGTAGCCGCCGTCGGCTGATG
+ATTCGTCTGGTGAAAGGCGCCTACTGGGATAGCGAGATCAAACGCGCGCAAATGGAAGGG
+CTGGAGGGCTATCCAGTTTATACCCGCAAAGTGTATACCGATGTCTCTTATCTGGCCTGC
+GCGAAAAAACTGCTCGCCGTCCCTAATCTGATCTACCCGCAGTTCGCGACCCATAACGCT
+CACACACTGGCGGCGATTTATCATCTGGCCGGGCAAAATTACTATCCGGGTCAGTACGAA
+TTCCAGTGCCTGCACGGCATGGGAGAACCGCTGTATGAACAGGTCACCGGTAAAGTGGGG
+GACGGAAAACTTAACCGTCCCTGCCGTATTTACGCGCCGGTGGGAACACACGAAACCCTG
+CTGGCCTATCTGGTACGACGCCTGCTGGAAAACGGCGCCAACACCTCTTTTGTCAACCGC
+ATCGCCGATGCCACCCTACCGCTCGATGAACTGGTGGCCGACCCGGTCGAGGCCGTGGAA
+AAACTGGCGCAGCAGGAAGGTCAGGCTGGCATACCGCATCCAAAAATTCCGCTGCCGCGC
+GATCTGTACGGCGAAGGTCGGATAAACTCCGCCGGACTTGATTTAGCGAATGAACATCGC
+CTCGCCTCGCTTTCTTCTGCCCTGTTAAGCAACGCCATGCAGAAATGGCAGGCCAAACCT
+GTGCTGGAACAACCGGTGGCCGACGGTGAGATGACGCCGGTTATCAACCCGGCGGAACCG
+AAAGATATTGTTGGCTGGGGACGCGAAGCGACAGAAAGCGAGGTTGAACAGGCGTTGCAA
+AACGCGGTCAATCAGGCGCCGGTTTGGTTTGCGACGCCGCCGCAAGAACGCGCCGCTATT
+TTGCAGCGGGCGGCGGTATTGATGGAAGACCAAATGCAGCAGTTGATTGGCCTGTTGGTG
+CGTGAAGCGGGGAAAACGTTCAGCAACGCCATTGCCGAAGTGCGCGAAGCGGTAGACTTC
+CTCCATTATTATGCCGGTCAAGTGCGTGACGATTTCGATAACGAAACGCATCGCCCGTTA
+GGGCCGGTGGTCTGTATCAGTCCGTGGAACTTTCCGCTGGCCATTTTCACTGGCCAAATC
+GCCGCCGCGCTGGCGGCAGGTAACAGCGTTCTGGCGAAACCGGCAGAGCAGACATCGCTG
+ATTGCCGCCCAGGGCATTGCCATTTTGCTGGAAGCGGGCGTACCGCCGGGCGTCGTGCAA
+CTGTTGCCGGGACGGGGAGAAACCGTCGGCGCCCAGCTTACCGCCGATGCGCGTGTACGC
+GGCGTGATGTTTACCGGTTCCACGGAGGTCGCGACGTTGTTGCAGCGCAACATCGCCACG
+CGTCTTGACGCCCAGGGGCGCCCTATTCCGTTGATTGCGGAAACCGGCGGTATGAACGCT
+ATGATTGTCGACTCTTCCGCGCTCACCGAGCAGGTGGTCGTGGATGTGCTGGCTTCCGCC
+TTCGACAGCGCCGGACAACGCTGTTCCGCGCTCCGCGTGCTGTGTTTGCAGGACGATATC
+GCCGAACATACGCTGAAAATGTTACGCGGCGCGATGGCGGAGTGTCGGATGGGGAATCCA
+GGCCGTCTGACGACCGATATCGGGCCGGTGATCGATAGCGAGGCCAAAGCCAACATTGAA
+CGTCATATCCAGACGATGCGCGCCAAAGGCCGCCCGGTTTTCCAGGCCGCGCGTGAAAAC
+AGCGATGACGCGCAGGAATGGCAGACCGGTACGTTTGTTATGCCCACGCTTATTGAGCTG
+GAAAACTTCGCAGAACTGGAAAAAGAGGTCTTCGGGCCCGTGCTGCACGTCGTGCGTTAT
+AACCGTAACCAACTGGCGGAGCTTATCGAACAGATTAACGCTTCCGGCTACGGGCTAACG
+CTGGGCGTACATACCCGTATTGATGAAACCATTGCGCAAGTCACCGGTTCCGCCCATGTC
+GGCAACCTGTACGTTAACCGTAATATGGTGGGCGCGGTCGTCGGCGTCCAGCCGTTTGGC
+GGCGAAGGCCTGTCCGGCACCGGGCCAAAAGCGGGAGGGCCGCTCTATCTCTACCGCCTG
+CTGGCACACCGCCCGCCCAATGCGCTCAATACGACGCTGACTCGTCAGGATGCGCGTTAC
+CCGGTGGATGCGCAGCTTAAAACCACGCTACTCGCGCCGTTGACCGCTCTGACGCAATGG
+GCGGCGGATCGCCCGGCGCTACAGACGCTCTGCCGACAATTCGCCGATCTGGCGCAGGCC
+GGCACGCAGCGCCTGCTACCGGGGCCGACCGGCGAGCGTAATACCTGGACGCTGTTGCCG
+CGTGAACGGGTGTTATGCCTGGCTGATGATGAACAGGACGCGTTGACGCAGCTTGCCGCC
+GTTCTCGCCGTCGGCAGTCAGGCGCTATGGTCAGACGACGCCTTCCACCGCGATCTGGCG
+AAACGTCTCCCCGCCGCCGTCGCGGCGCGTGTCCAGTTTGCGAAAGCGGAAACGCTGATG
+GCGCAGCCGTTTGACGCGGTGATTTTCCACGGCGACTCCGACAAGCTGCGAACCGTGTGC
+GAAGCCGTCGCCGCCCGCGAAGGCGCGATAGTGTCGGTACAGGGGTTCGCCCGCGGCGAA
+AGCAATATGCTGCTGGAACGGCTCTATATTGAACGTTCGCTGAGCGTAAACACTGCCGCC
+GCTGGCGGTAATGCCAGCCTGATGACAATTGGCTAAATGGCTATTAGCACACCGATGTTG
+GTGACATTCTGTGTCTATATTTTTGGCATGATATTGATTGGGTTTATCGCCTGGCGCTCA
+ACCAAAAACTTTGATGACTATATTCTTGGCGGTCGCAGCCTGGGGCCGTTTGTTACGGCT
+TTATCAGCCGGCGCGTCGGATATGAGCGGCTGGCTGTTAATGGGGCTGCCTGGCGCTATC
+TTTCTGTCGGGGATCTCTGAAAGCTGGATCGCCATTGGCCTGACGTTAGGCGCATGGATT
+AACTGGAAGCTGGTGGCCGGGCGCCTGCGCGTGCATACCGAATTTAACAATAACGCGCTC
+ACGCTGCCGGACTATTTTACCGGTCGGTTTGAGGATAAGAGCCGAGTCCTGCGTATTATT
+TCCGCGCTGGTCATTCTGCTGTTTTTCACTATCTATTGCGCATCAGGTATTGTCGCTGGG
+GCACGACTGTTCGAAAGCACCTTCGGTATGAGCTATGAAACCGCACTGTGGGCGGGGGCC
+GCGGCAACCATTATTTATACCTTTATCGGCGGGTTTCTTGCCGTTAGCTGGACGGATACC
+GTTCAGGCCAGCCTGATGATTTTTGCGTTAATCCTGACGCCGGTGATGGTTATTGTCGGC
+GTAGGCGGTTTTAGCGAGTCGCTGGAAGTGATCAAGCAAAAGAGCATCGAGAATGTCGAC
+ATGCTCAAGGGGCTGAATTTTGTCGCTATTATTTCTCTGATGGGCTGGGGGCTGGGTTAC
+TTCGGTCAGCCGCATATCCTGGCGCGCTTTATGGCGGCGGATTCCCATCACAGTATTGTT
+CATGCGCGTCGTATCAGTATGACCTGGATGATTCTGTGTCTGGCGGGCGCGGTGGCGGTG
+GGCTTCTTTGGCATTGCGTACTTTAACAATAACCCCGCGCTGGCCGGGGCGGTGAACCAA
+AACTCAGAACGCGTATTTATTGAACTGGCGCAGATCCTGTTTAACCCGTGGATTGCCGGT
+GTTCTGCTGTCTGCTATCCTGGCGGCGGTGATGTCGACGTTGAGCTGTCAGTTGCTGGTA
+TGCTCCAGCGCGATTACGGAAGATTTATATAAGGCTTTTCTGCGTAAAAGCGCCAGCCAG
+CAAGAGCTGGTATGGGTAGGGCGAGTGATGGTGCTGGTGGTAGCGCTGATCGCCATTGCG
+CTGGCGGCGAATCCTGATAACCGTGTGCTGGGGCTGGTGAGCTACGCCTGGGCTGGATTC
+GGCGCGGCATTTGGACCTGTTGTCCTGTTTTCTGTGATGTGGTCGCGTATGACACGTAAC
+GGCGCGCTGGCGGGAATGATTATTGGCGCGGTGACGGTTATCGTCTGGAAACAATATGGC
+TGGCTGGATCTGTATGAGATTATCCCTGGCTTCATTTTCGGCAGCCTGGGGATCGTAATC
+TTTAGCCTGCTTGGCAAAGCGCCGACAGCAACGATGCAGGAACGCTTTGCAAAAGCGGAC
+GCGCATTATCATTCCGCGCCGCCGTCGAAGCTACAGGCGGAATAAATGGCGGGTAAACTG
+CGGCGTTGGCTGCGTGAAGCCGCGGTTTTTCTGGCGCTCCTCATCGCGATAATGGTGGTC
+ATGGACGTCTGGCGCGCGCCGCAGGCGCCTCCGGCGTTTGCCGCGACACCATTACATACG
+CTGACGGGAGAGTCGACAACTCTGGCGACCTTGAGCGAGGAACGCCCCGTACTGCTCTAT
+TTTTGGGCCAGCTGGTGCGGGGTATGCCGCTTTACCACGCCTGCGGTCGCTCACCTGGCG
+GCGGAAGGGGAAAACGTCATGACCGTTGCGCTCCGCTCCGGCGGTGATGCTGAGGTTGCC
+CGCTGGCTGGCGCGCAAGGGCGTTGACTTCCCGGTCGTCAATGATGCTAACGGCGCCTTA
+TCCGCTGGCTGGGAAATCAGCGTGACGCCAACGCTGGTGGTGGTTTCACAAGGTCGGGTT
+GTGTTCACCACCAGCGGCTGGACCAGCTATTGGGGCATGAAGCTTCGGCTGTGGTGGGCA
+AAAACGTTCTGAATGATGAAAAAAAGCGTCGCTATGCTGGCGGTTTGTATGCTGGCGCAA
+AGCCACCTTGCCATTGCTGCCGGTGCTCCTGCGCCTCAAGAGATCAACATTGTTTTACTG
+GGCACCAAAGGCGGGCCTTCTTTGCTCAATACAGCCAGACTACCGCAAGCGACGGCGCTC
+ACTATCGGCGATAAGATATGGCTGATAGATGCCGGCTACGGCGCCAGTCTGCAACTGGTG
+AAAAATGGCATTCCACTGCGCAACATCAATACTATTTTGCTCACCCATCTGCACAGCGAC
+CACATACTGGATTATCCTTCCTTGCTGATGAATGCCTGGGCAAGTGGCCTGAAAGACCAT
+ACCATACAGGTTTATGGCCCGCCGGGAACCCAGGCGATGACGAAGGCTAGCTGGAAGGTC
+TTTGACAGGGATATCACGTTACGCATGGAAGAAGAGGGGAAACCCGATCCGCGCAACCTG
+GTTAAGGCGACCGATATCGGCCAGGGCGTCATCTATAAAGATGAACTGGTCACAATAAGC
+GCGCTGAAAGTGCCTCATTCCCCTTTCCCGGACGGTGAAGCGTTTGCTTACCGTTTTGAT
+ACTCAGGGTAAGCGAATCGTCTTCTCTGGCGATACGTCCTGGTTTCCTCCGCTTGCAACG
+TTTGCCCAGGGGGCGGATATCCTGGTACATGAGGCGGTACATGTCCCTTCGGTAGCAAAA
+CTGGCTAATAGTATTGGCAACGGAAAAACGCTGGCTGAAGCGATTGCGTCGCATCACACC
+ACGATTGAAGATGTCGGTAAGATTGCTCGCGAGGCCCACGTGAAAAAACTGGTGTTAAGT
+CATCTGGTGCCTGCGACGGTTGCGGATGACGTCTGGCAACAGGAAGCCATGAAAAATTAC
+CCGGGCCCTGTCATTGTCGGTCATGACAATATGACGATAAGCGTACCGTAAATGTCGCAA
+CGCACAGAGAAAAAAATCGGGAAACGTTCGCAGGCCACCGGTGCAAAACGGCAGCTTATC
+TTAACCGCCGCGCTTGCCGTTTTTTCCCAGTATGGCATTCATGGCGCGCGTCTTGAACAG
+GTCGCCGAGCGGGCAGGCGTCTCCAAAACCAATCTGCTTTATTATTATCCCTCGAAAGAG
+GCGCTGTATGTCGCGGTAATGCGACAGATTCTGGATGTCTGGTTGGCGCCGCTCAAGGCG
+TTTCGCGCAGAATTTTCCCCTCTGGAGGCCATCAAAGAGTATATCCGTCTCAAGCTGGAG
+GTTTCGCGTGATTATCCGCAGGCGTCGCGGCTCTTCTGCATGGAGATGCTGGCGGGCGCG
+CCGCTCTTAATGGATGAACTGACCGGCGATCTAAAAGCGTTGATAGATGAAAAATCCGCG
+CTGATTGCCGGATGGGTGCACAGCGGGAAACTCGCGCCCGTTTCTCCGCATCATTTGATC
+TTCATGATTTGGGCCGCCACGCAACATTACGCCGATTTCGCCCCTCAGGTTGAAGCGGTA
+ACCGGCGCGACGCTTCGCGATGAAGCCTTTTTCAACCAAACGGTCGAAAGCGTTCAGCGC
+ATTATTATTGAAGGGATTCGCGTGCGTTAAATGGCGAAACAACAACGGATGGGCTGGTGG
+TTTCTTTGCCTTGCATGTGTCGTGGTAATGGTTTGTACCGCGCAACGCATGGCGGGCCTG
+CACGCCTTGCAGATGCAGGCGACGGCCTCTGCTGCGGTGGTCAGCGCTCCCTCCTCGACA
+GATGACGGCTCGCCGGTCACTCCCTGCGAATTAAGCGCCAAGTCGCTGCTGGCGGCGCCT
+CCAGTACTCTTTGAAGGTGCTATCCTTGCGCTTTATCTACTGCTTTCCTTACTGGCGCCT
+GTCCGGGTCATGCGCCTGCCGTTTTCGCCTCCACGGGCTATTTCGCCGCCCACATTACGG
+GTACATCTACGATTTTGTGTCTTCCGTGAATGAATGATGATTTTATTCAGGCGGATACTG
+TTCTGCCTGTTATGGCTTTGGCTGCCCGTCTCCTGGGCGGCGGAAAGCGGCTGGCTGCGT
+TCGCCCGATAACGACCATGCCAGCATACGGCTACGTGCCGATACGTCCGCTAACAGTGAG
+ACCCGGCTGTTGCTGGATGTCAAACTGGAAAACGGCTGGAAAACCTACTGGCGCGCGCCG
+GGGGAAGGGGGCGTGGCACCCTCTATCGCCTGGAAAGGCGACATGCCTGAGGTAAGCTGG
+TTCTGGCCAACCCCCTCGCGCTTTGATGTGGCGAATATCACCACCCAGGGATATCACGAC
+GAGGTGACCTTTCCGATGATCGTGCGCGGTACGCCGCCGGCGACCTTGCGCGGTGTGTTG
+ACGTTATCAACCTGCAGCAATGTTTGTCTGTTGACCGATTACCCCTTTTCCGTGACGCCC
+ACTGTGCAGAATGCCGATTTTGCCCATGACTATGCGCGGGCGATGGGTAAAGTTCCGCTC
+CGCAGTGGGCTAACGGACTCGCTTGACGTTGGCTATCGCCCGGGAGAACTGGTGGTCACT
+GCTACGCGAGCGGCGGGCTGGTCATCGCCCGGGCTCTATCTTGACACCATAGATGACGTC
+GATTTTGCGAAGCCTCGCCTGCGCGTAGAGGGCGACAGGTTACAGGCGACGGTGCCGGTG
+ACGGACAGTTGGGGCGAAAAGGCGCCCGATTTGCGCGACAAATCGCTGACCCTCGTGTTA
+GCCGATGGCGCTATCGCCCAGGAGAGCACGCAAACCATTGGCGCTGCGCCAGCGCAAACG
+CCGGACAATGCGGCGCTACCTTTCTGGCAAGTTGTAATGATGGCGCTAATCGGCGGACTG
+ATTCTTAATTTAATGCCCTGCGTACTGCCTGTTCTGGGCATGAAACTTGGCTCTATTTTA
+TTGGTAGAGGAAAAAAGCCGCTCTCACATCAGGCGACAATTTTTGGCTTCGGTCGCCGGT
+ATCATTGCGTCATTTATGGCGCTGGCGGCGTTTATGACCCTCCTTCGCCTGTCAAACCAT
+GCGCTGGCCTGGGGAGTCCAGTTCCAGAATGCATGGTTTATTGGTTTTATGGCGCTGGTG
+ATGTTGTTGTTTAGCGCCAGCCTGTTCGGGCTTTTTGAGTTCAGGCTTCCCTCATCTATG
+ACCACGAAACTGGCCACTTACGGCGGTAACGGTATGTCGGGACATTTCTGGCAGGGGGCG
+TTCGCCACGCTGCTGGCGACGCCTTGTAGCGCGCCGTTTCTGGGCACGGCGGTCGCGGTG
+GCGCTCACGGCGTCGCTGCCGACGCTGTGGGGGCTGTTCCTTGCGCTTGGCCTGGGAATG
+AGCGCGCCGTGGCTACTGGTCGCGATACGACCAGGGCTTGCGCTACGTTTACCGCGCCCC
+GGGCGTTGGATGAATGTCCTGCGCAGGATCCTCGGTCTGATGATGCTGGGGTCGGCTATC
+TGGCTGGCGACGTTACTCCTGCCGCATTTCGGCTTCACTGCGTCAAAGAGCGCGCAAGAC
+ACGGTTCAGTGGCAACCGTTGAGTGAACAGGCAATCCAGTCGGCGCTGGCGCAGCATAAG
+CGGGTATTTGTCGATGTCACTGCGGACTGGTGTATTACCTGTAAAGTGAATAAATACAAC
+GTCCTGCAAAAAGAGGATGTGCAGGCCGCCTTGCAACAGCCGGATGTTGTGGCGCTGCGG
+GGAGACTGGACGCTGCCGTCCGATGCCATTACAGATTTTCTGAAAACGCGCGGCCAGGTC
+GCCGTGCCGTTTAATCAGGTATATGGCCCCGGTTTGCCGGAAGGGGAGGCACTGCCCACT
+TTGCTGACCCGCGATGCGGTATTACAAACGTTGAAAAAAGCGAAAGGAATAACCCAATGA
+ATGAAATACATGATTGTTTTACTGCTGGCGCTGTTTTCGACGCTGAGCATCGCGCAAGAA
+ACCGCTCCTTTTACGCCGGATCAGGAAAAGCAGATTAAAAATCTGATCCATGCGGCGTTG
+TTTAACGATCCTGCCAGCCCGCGGATAGGCGCTAAACACCCTAAGCTGACGCTGGTGAAC
+TTTACGGATTACAACTGCCCGTACTGCAAACAGCTCGATCCGATGCTGGAAAAGATTGTG
+CAGAAATATCCTGACGTTGCGGTCATTATTAAACCGCTGCCATTCAAAGGAGAGAGTTCC
+ATACTGGCGGCGCGTATTGCGCTGACCACCTGGCGCGATCATCCGCAACAGTTCCTCGCG
+CTACATGAAAAACTTATGCAAAAGCGCGGTTACCATACGGATGACAGTATTAAACAGGCC
+CAGCAGAAAGCAGGGGCGACGCCAGTGACGCTGGATGAAAAAAGCATGGAAACGATACGC
+ACTAATTTGCAGTTGGCAAGACTGGTCGACGTGCAAGGAACGCCAGCGACGATCATTGGC
+GACGAGCTGATTCCGGGCGCAGTGCCCTGGGATACGCTGGAAGCGGTGGTGAAAGAAAAA
+CTGGCGGCTGCCAATGGCGGGTAAATGATTACACATTCTTTCGGCATCGTTAATTATTTT
+GTATTATTTGGCTACCTCCTGGCCATGATGTTAGTCGGTGTCTATTTTTCCAGACGGCAA
+AAAACAGCAGACGATTATTTTCGCGGTGGTGGCCGGGTTCCTGGTTGGGCGGCTGGGGTC
+AGTGTATTTGCTACTACGTTAAGCTCAATTACATTTATGTCAATTCCTGCCAAAGCGTTT
+ACTTCCGACTGGACGTTTATCATTGGTCAGTATCTGGCTATCGCAATTTTACCGCTGGTT
+TTTTATTTCTATATTCCGTTTTTTCGGAAATTGAAAGTCACATCAGCCTATGAATATCTC
+GAAGCACGGTTCGATGTGCGCTGCCGTCTGTTCGCCAGCATGTCATTTATGTTGTTTCAT
+ATTGGACGTATCGCCATTATCACTTTCCTCACCGTGCTGGCCTTGCGCCCCTTCATCGCT
+ATAGACCCGGTGATTTTGGTACTGTTGATTAGTGTGATGTGTATCATTTATACCTGGATG
+GGGGGGAATATGGAAAGTCTATTAAATCGTTTATATGACGCGTTAGGCCTGGATGCGCCA
+GAAGATGAGCCACTGCTTATCATTGATGATGGGATACAGGTTTATTTTAATGAATCCGAT
+CATACACTGGAAATGTGCTGTCCCTTTATGCCACTGCCTGACGACACTCTGACTTTGCAG
+CATTTTTTACGTCTTAACTACGCCAGCGCCGTCACTATCGGCGCTGATGCAGACAATACT
+GCTTTAGTGGCGCTTTATCGCTTGCCGCAAACCAGTACCGAAGAAGAGGCGCTCACTGGT
+TTTGAATTATTCATTTCAAACGTGAAGCAATTGAAAGAGCATTATGCAATGAAATACGAC
+CTTATTATTATCGGCAGCGGTTCGGTTGGCGCCGCCGCTGGTTATTACGCCACCCGCGCC
+GGGCTAAAGGTCCTGATGACCGATGCGCATATGCCGCCTTATCAACAGGGCAGCCACCAC
+GGCGATACCCGTCTTATCCGCCACGCTTATGGTGAAGGCGAAAAATATGTCCCGCTGGTG
+CTTCGCGCCCAGACGCTTTGGGATGAGCTCTCCACACACAATGAAGAGCCTATTTTTGTC
+CGCTCCGGCGTCGTCAACCTCGGCCCGGCCGATTCCGCTTTCTTAGCCAACGTCGCACGA
+AGCGCGCAACAGTGGCAATTGAACGTCGAGCGCCTGGACGCGACGGCCCTCATGACGCGC
+TGGCCGGAAATTCGCGTGCCCGATAATTATATCGGGCTGTTTGAAGCTGACTCCGGTTTC
+CTGCGCAGCGAATTAGCCATTACCACATGGCTTCGTCTGGCCCGAGAGGCAGGCTGCGCA
+CAGCTATTCAACAGCCCGGTAAGCCATATTCACCATGATGATAACGGTGTGACGATAGAG
+ACGAGTGAAGGCTGCTACCACGCCAGCAAAGCGCTGATTAGCGCGGGCACCTGGGTCAAA
+ACGCTGGTACCGGAGCTGCCCGTTCAGCCCGTACGTAAAGTTTTTGCCTGGTTTAAGGCG
+GATGGACGTTACAGCACTAAAAACCGCTTTCCGGCCTTTACCGGCGAAATGCCCAACGGC
+GATCACTATTACGGTTTCCCGGCGGAGAACGACGAGTTAAAAATCGGCAAACACAATGGC
+GGGCAGCGAATACAGGCACCGGAAGAGCGCAAGCCCTTTGCCGCCGTTGCCAGCGATGGC
+GCGGAAGCATTTCCTTTCCTGCGTAACGTACTGCCGGGTATCGGCGGTTGTTTACATGGG
+GCGGCATGTACCTATGATAATTCGCCGGACGAGGATTTTATTATCGATACGCTGCCTGGC
+CATGAGAATACGCTTGTCATCACTGGACTCAGCGGACATGGTTTTAAATTCGCCCCGGTG
+TTAGGAGAAATCGCTGCGGATTTTGCGTTGGGAAAAACGCCCTCCTTTGATCTGACGCCG
+TTCCGGCTTTCCCGTTTTAGCCAATAAATGCAAATACAGAGCTTCTATCACTCAGCTTCA
+CTAAAAACCCAGGAGGCTTTTAAAAGCCTACAAAAAACCTTATACAACGGAATGCAGATT
+CTCTCAGGCCAGGGCAAAGCGCCGGCTAAAGCGCCCGACGCTCGCCCGGAAATTATTGTC
+CTGCGAGAACCTGGCGCGACATGGGGGAATTATCTACAGCATCAGAAGACGTCTAACCAC
+TCGCTGCATAACCTCTATAACTTACAGCGCGATCTTCTTACCGTCGCGGCAACCGTTCTG
+GGTAAACAAGACCCGGTTCTAACGTCAATGGCAAACCAAATGGAGTTAGCCAAAGTTAAA
+GCGGACCGGCCAGCAACAAAACAAGAAGAAGCTGCGGCAAAAGCATTGAAGAAAAATCTT
+ATCGAACTTATTGCAGCACGCACTCAGCAGCAAAATGGCTTACCTGCAAAAGAAGCTCAT
+CGCTTTGCGGCAGTAGCGTTTAGAGATGCTCAGGTCAAGCAGCTCAATAACCAGCCCTGG
+CAAACCATAAAAAATACACTCACGCATAACGGGCATCACTATACCAACACGCAGCTCCCT
+GCCGCAGAGATGAAAATCGGCGCAAAAGATATCTTTCCCAGTGCTTATGAGGGAAAGGGC
+GTATGCAGTTGGGATACCAAGAATATTCATCACGCCAATAATTTGTGGATGTCCACGGTG
+AGTGTGCATGAGGACGGTAAAGATAAAACGCTTTTTTGCGGGATACGTCATGGTGTGCTT
+TCCCCCTATCATGAAAAAGATCCGCTTCTGCGTCAGGCCGGCGCTGAAAACAAAGCCAAA
+GAAGTATTAGCTGCGGCACTTTTTAGTAAACCTGAGTTGCTTAACAGAGCCTTAGAGGGC
+GAAGCGGTAAGCCTGAAACTGGTATCCGTCGGGTTACTCACCGCGTCGAATATTTTCGGC
+AAAGAGGGAACTATGGTCGAGGATCAAATGCGCGCATGGCAATCGTTGACCCAGCCGGGA
+AAAATGATTCATTTAAAAATCCGCAATAAAGATGGCGATCTACAGACGGTAAAAATAAAA
+CCGGACGTCGCCGCATTTAATGTGGGTGTTAATGAGCTGGCGCTCAAGCTCGGCTTTGGC
+CTTAAAGCATCAGATAGCTATAATGCCGAAGCGCTACATCAGTTATTAGGCAATGATTTA
+CGCCCTGAAGCCAGACCAGGTGGCTGGGTTGGCGAATGGCTGGCGCAATACCCGGATAAT
+TATGAGGTCGTCAATACATTAGCGCGCCAGATTAAGGATATCTGGAAAAATAACCAACAT
+CATAAAGATGGCGGCGAACCCTATAAACTCGCACAACGCCTTGCCATGTTAGCCCATGAA
+ATTGACGCGGTGCCCGCCTGGAATTGTAAAAGCGGCAAAGATCGTACAGGGATGATGGAT
+TCAGAAATCAAGCGAGAGCTCATTTCTTTCCATCAGACCCATATGTTAAGTGCGCCTGGT
+AGTCTTCCGGATAGCGGTGGACAGAAAATTTTCCAAAAAGTATTACTGAATAGCGGTAAC
+CTGGAGATTCAGAAACAAAATACGGGCGGGGCGGGAAACAAAGTAATGAAAAATTTATCG
+CCAGAGGTGCTCAATCTTTCCTATCAAAAACGAGTTGGGGATGAAAATATTTGGCAGTCA
+GTAAAAGGTATTTCTTCATTAATCACATCTATGAAACGATATATACTGGCTACCGCGATA
+GCGTCTCTTGTTGCAGCCCCGGCAATGGCGCTGGCCGCTGGCAGCAATATTCTCAGCGTA
+CATATTCTCGATCAGCAAACAGGCAAACCAGCGCCCGGCGTGGAGGTGGTACTGGAGCAG
+AAAAAGGATAACGGATGGACGCAATTAAACACCGGGCATACCGACCAGGATGGACGAATT
+AAAGCACTGTGGCCCGAAAAAGCTGCCGCGCCGGGGGATTATCGCGTTATTTTTAAAACC
+GGCCAGTATTTTGAAAGTAAAAAACTGGACACGTTTTTCCCGGAGATTCCCGTCGAGTTT
+CATATCAGCAAAACGAATGAGCACTATCATGTGCCGCTGTTATTAAGTCAGTATGGTTAT
+TCAACCTATCGCGGGAGCTAAATGGCAAAGATTCTGGTGCTCTATTATTCCATGTACGGA
+CACATTGAAACCATGGCGCACGCGGTGGCGGAAGGGGCAAAGAAAGTCGACGGCGCAGAG
+GTCATTATAAAGCGTGTGCCAGAAACAATGCCGCCTGAAATCTTCGCAAAAGCTGGCGGT
+AAAACGCAAAACGCACCGGTTGCCACCCCACAGGAGCTGGCGGATTACGATGCCATTATT
+TTTGGTACGCCAACCCGGTTTGGCAATATGTCAGGCCAGATGCGTACCTTCCTGGACCAA
+ACCGGCGGACTGTGGGCATCCGGCGCGCTATACGGCAAGCTCGGCGGCGTGTTCAGTTCT
+ACCGGAACGGGCGGCGGCCAGGAGCAGACCATCACCTCGACCTGGACTACGCTTGCCCAT
+CATGGGATGGTGATTGTCCCGATAGGCTATTCCGCACAGGAACTGTTTGACGTCTCCCAG
+GTTCGCGGCGGTACGCCTTACGGCGCAACGACTATCGCTGGAGGCGACGGTTCACGTCAA
+CCAAGCCAGGAGGAACTCTCTATCGCTCGCTATCAGGGGGAATACGTCGCCGGTCTGGCA
+GTCAAACTCAACGGCTAAATGGAGCCTCAACCCCCACGTCTTAAACCCGGAAAAATCCTT
+GACACTCTGGGTGCTATGCAAAAAAGCCTGACACGTGCCTCCCAGCGTATTGCGCAATAT
+ATTTTAGCCTTCCCCAGACAGGTGACACAGTCATCTATTGCCGATTTGTCGCGCGACACA
+CAGGCCGGAGAAGCCACGGTTATTCGCTTTTGTCGCACCCTGGGCTATAAAGGTTTTCAG
+GATTTTAAAATGGACCTGGCCATTGAACTTGCCACTACCGAGTCTGATGACAGTAGTCCT
+CTACTGGATGCCGAAGTTAGCGAATCCGACGATGCCCACGCCATTGGTTTAAAATTGCAG
+AACACCATTAGTAATGTATTATCTGAAACGCTAAATCTGCTTGATATGCAACAGGTTCTC
+GGTGTCGTGGACGCCCTACGTCACTGTCACTCAGTTTATATCTTTGGTGTGGGCTCATCG
+GGGATCACGGCGCTGGATATGAAACACAAGCTAATGCGTATTGGTTTACGGGGCGATGCG
+GTAAGCAATAACCATTTTATGTACATGCAGGCTACGCTATTAAAAGCAGGCGATGTCGCG
+ATGGGTGTCAGTCACTCGGGCACATCGCCAGAAACAGTGCATTCACTCCGATTGGCCCGA
+CAGGCTGGCGCCACCACAGTCGCCATTACCCATAATCTGGGTTCTCCATTATGTGAAGAG
+GCCGATTTTTGCCTGATCAATGGTAATCGGCAAGGAATGTTGCAGGGTGACTCGATCGGT
+ACGAAAGCCGCGCAGCTTTTCGTCTTTGACCTGCTCTATACCCTTCTTGTACAGTCCTCG
+CCGGAACAGGCCCGAGAAAGCAAATTACGGACAATGAATGCCCTGGACATGACAAAAATG
+AAGAAACTGCCCGGCTTTACGCAAGATTACTTACTCAGCAAGGCGACGACCCTGCCTGAT
+AAAACACGCCTGGAGCGTGCCGTTGAACCGCTATGCGCGCGCCATCCCGGAGAGTGCGGC
+ATTCTTGCGCTGGATAACAGTCTGGACGCTTTTGCCGCCCGCTACCGCCTGACCGAAATG
+GCGGCGCGGACGCTGGATGTGCAGTATTATATTTGGGAAGACGATATGTCCGGGCGGCTG
+CTCTTTTCGGTTCTGCTGTCGGCGGCGAAGCGCGGCGTTCATGTTCGTCTGCTGCTGGAT
+GATAACAATACGCCTGGTCTGGATGATACGTTGCGCTTGCTGGATAGCCATCCTAATATC
+GAAGTTCGTCTGTTTAATCCTTTCTCTTTTCGTACGCTACGCGCGCTGGGATATTTGACG
+GATTTTGCGCGGCTGAATCGGCGGATGCACAATAAAAGTTACACTGCCGACGGCGTAGTG
+ACGCTGGTCGGTGGGCGCAACATCGGCGATGCCTATTTCGGCGCTGGCGAGGAGCCGCTA
+TTTTCCGATCTGGACGTGATGGCCATTGGCCCGGTGGTCAATGATGTCGCCAATGATTTT
+GAACGTTACTGGCGCTGTAGTTCAGTGTCGACATTGCAGCAAGTATTATCCCTTTCTGAG
+CAGGAACTGACGCAGCGTATCGAACTTCCCGAATCCTGGTATAACGATGAGATCACCCGC
+CGTTATCTGCATAAGCTGGAAACCAGCCAGTTTATGGCGGATCTCGATCGCGGAACGTTG
+CCGCTGATTTGGGCAAAAACACGCTTGCTTAGCGATGACCCTTCTAAAGGCGAGGGGAAG
+GCGCAGCGCCATTCGCTTCTTCCGCAGCGATTATTTGACGTGATGGGGTCGCCGACGGAG
+CGTATCGACATTATTTCCGCTTACTTTGTCCCTACGCGCGCAGGCGTGGCGCAGTTGCTT
+AATCTGGTCAGGAAAGGTGTGAAGATCGCCATCTTAACTAACTCTCTGGCGGCCAACGAT
+GTGGCGGTCGTTCACGCAGGGTACGCGCGCTGGCGCAAGAAATTACTGCGCTATGGCGTG
+GAGCTCTACGAACTGAAACCGACCCGCGAACATGAAACCGCCGTACATGATCGCGGACTC
+ACCGGGAACTCAGGTTCCAGCTTACATGCTAAAACGTTCAGTATTGATGGTAGTAAGGTG
+TTTATCGGGTCGCTTAATTTTGATCCCCGTTCAACGCTTTTAAATACCGAAATGGGCTTT
+GTCATTGAAAGTGAAACGCTGGCGACGCTTATTCATAAGCGTTTTACGCAGAGCCAACGC
+GATGCGGCCTGGCAACTGCGGCTGGATCGCTGGGGACGAATTAACTGGATCGATCGTCAG
+CAAGAAGAGGAAAAGGTGTTAAAGAAAGAACCCGCTACGCGTTTCTGGCAGCGAGTTCTG
+GTACGGTTGGCGGCAATTTTACCTGTGGAATGGTTGCTGTGAATGCCAACTCAAGAAGCA
+AAAGCGCACCGCGTCGGCGAATGGGCAAGCCTGCGTAATACGTCGCCGGAAATTGCCGAA
+GCCATTTTTGAAGTCGCTCACTATGACGAGAAACTGGCAGAAAAAATATGGGAAGAAGGT
+AGCGATGAGGTGCTGATCAAAGCCTTTGAGAAAACGGACAAAGACTCGCTCTTCTGGGGC
+GAACAAGTCATCGAACGTAAGAACGTATAAATGTATCCCGTTGACCTGCATATGCATACC
+GTCGCCAGCACTCATGCCTACAGTACTCTGAGCGATTATATCGCGGAAGCCAAACGCAAA
+GGCATTAAACTTTTTGCGATTACCGATCATGGTCCGGACATGGAAGATGCGCCGCATCAC
+TGGCATTTTATTAACATGCGCATCTGGCCGCGTCTGGTTGACGGCGTGGGGATACTGCGT
+GGCATTGAGGCGAATATCAAGAATATTAACGGTGAAATTGATTGTTCCGGAAAGATGTTC
+GACTCGCTGGATCTGATTATCGCAGGCTTTCATGAGCCCGTTTTTGCGCCGCATGATAAA
+GAAACCAATACTCAGGCGATGATCGCGACCATCGCCAGCGGCAAGGTGCATATAATTAGT
+CACCCCGGAAATCCAAAGTATCCAGTGGAGGTTAAAGCCATCGCGCAGGCGGCGGCGAAA
+CACCATGTAGCGCTGGAAATCAACAACTCTTCTTTTCTGCATTCGCGTAAAGGAAGCGAA
+GATAATTGCCGCGCGGTCGCTGCCGCCGTACGCGATGCGGGAGGCTGGGTAGCGTTAGGC
+TCTGATTCCCATACGGCCTTTACGCTTGGCGATTTCACCGAATGCCGGAAAATTCTGGAT
+GCGGTGAATTTTCCGGAAGATCGAATCCTGAACGTCTCTCCGCAGCGCTTACTGGCCTTT
+CTCGAATCACGCGGTATGGCGCCTGTACCGGAATTTGCCGAACTTATGAATGAGTTTTCA
+ATCCTGTGCCGTGTGCTGGGATCGTTGTTTTACCGCCAACCGCAAGATCCTTTACTGGTT
+CCGCTGTTTACGTTAATCCGTGAAGGTAAACTGGCGGCAAACTGGCCGCTGGAGCAGGAT
+GACATGCTGGCGCGTTTACAGAAAAGCTGCGATATCACGCAGATTTCCACTGATTACAAT
+GCGTTATTTGTTGGGGAAGAGTGCGCGGTAGCGCCATACCGCAGTGCGTGGGTCGAAGGC
+GCGGAAGAGTCTGAGGTGCGCGCTTTTTTAACGTCGCGAGGGATGCCGCTGGCCGATACG
+CCTGCCGATCACATTGGCACTTTATTGCTCGCGGCCTCCTGGCTGGAAGATCAGTCTGCC
+GAAGATGAAAGTGAAGCGCTGGAAACCTTATTTGCCGATTATCTGCTTCCCTGGTGCAAT
+ACCTTCCTCGGTAAAGTTGAAGCCCATGCCGTTACGCCATTCTGGCGCACTCTGGCGCCG
+CTAACGCGTGATGCGATAGGGGCCATGTGGGATGAACTTCAGGAAGAAGATGAAGAAATG
+ATGCGCGCCATGAACATACTTCTTTCTATTGCTATCACTACGGGCATCCTTTCTGGAATA
+TGGGGATGGGTGGCCGTCTCCCTGGGGTTACTAAGCTGGGCCGGTTTTTTAGGCTGTACG
+GCTTATTTCGCCTGTCCGCAGGGCGGCTTTAAGGGATTGTTGATTTCCGCCTGTACGCTG
+TTAAGCGGTATGGTGTGGGCGCTGGTCATTATTCACGGTAGCGCGTTGGCGCCGCATCTG
+GAAATTGTCAGTTACGTGTTGACGGGGATCGTGGCATTCCTGATGTGTATCCAGGCAAAG
+CAGCTATTGCTTTCTTTTGTTCCGGGAACATTTATCGGCGCCTGCGCGACATTTGCAGGG
+CAGGGTGACTGGCGGTTGGTATTACCGTCGCTGGCGCTGGGGCTAATCTTTGGCTATGCC
+ATGAAAAATAGTGGGCTATGGCTGGCATCACGCCGCGAGCAACATTCAGCGAATACGGCG
+GTCACAAAAATGAAAAAAAACCTGCTGGGATTCACCCTCGCATCCTTGTTATTCACGACC
+GGTTCCGCCGTGGCGGCGGAGTATAAAATTGATAAAGAAGGCCAACATGCGTTCGTCAAT
+TTCCGCATCCAGCATCTGGGCTACAGCTGGCTATACGGCACCTTTAAAGATTTCGACGGC
+ACGTTCACTTTTGACGAAAAAAATCCGTCAGCAGACAAAGTGAATGTGACCATTAACACC
+AATAGCGTCGACACTAACCATGCCGAACGTGACAAACACCTGCGTAGCGCGGAGTTTCTT
+AATGTTGCGAAATTCCCGCAGGCAACCTTCACCTCTACCAGCGTGAAAAAAGAGGGCGAT
+GAACTGGATATTACCGGCAATCTGACGCTCAATGGCGTGACTAAACCGGTGACGCTGGAA
+GCGAAGCTGATGGGCCAGGGCGACGATCCGTGGGGCGGTAAGCGCGCGGGCTTTGAGGCC
+GAAGGAAAAATTAAGCTGAAAGATTTCAATATAACTACCGATCTCGGCCCAGCCTCACAA
+GAGGTGGAGCTTATCATCTCAGTAGAAGGCGTTCAGCAGAAGTAAATGTTACTGATGATG
+GCGCTGATCGTGCGTATTATCTGGCGGCTTTATTCTCCGCCGCCCGTTGCGTTGACCAGC
+TATTCCCGTTTAACGCGCATTGGCGCCGCCGCGGGTCATATCCTTCTGTATCTCCTGCTC
+TTTGCGATAATCATTAGCGGCTACCTGATTTCCACCGCCGACGGTAAACCGATTAGCGTC
+TTTGGCTGGTTTGAGATTCCGGCCACGCTTACGGACGCGGGCGCGCAGGCTGACATCGCC
+GGAACACTGCATCTGTGGTTTGCCTGGTCGCTGGTCATTATCTCGCTCTCGCATGGGGTT
+ATGGCGCTAAAACACCATTTCATCGATAAAGACGACACACTGAAACGTATGACAGGAATG
+TCGTCATCTGACTATGGAGCTCAAAAATGAATGGTTAAGTTATCAATGACGCTGCGCCTG
+ACAATTTCTTTTATCGCCATACTTATCCTCGCCTGTACCGGCATTAGCTGGACGCTCTAT
+AACGCGCTGAGCAAAGAATTAACGTATCGGGATGATATGACGCTAATAAATCGGGCGGCG
+CAAATGCAGCAACTGTTACTGGATGGCGCCAGGCCGGAAAATCTGCCGCTCTATTTCAAT
+CGGATGGTGGATACGAAGCAGGATATCTTATTGATCCACTCAGCAACAGGCCATAATGTT
+GCGATTAATCATAGCGGCATCCCCGACCAACGCTTTAACGAGATTCCGCTGGCTAAAAAC
+ATCACCCGCGAAACCTTATTTCGCCAGGCGGTACAAGGCACGGAGCTGACCGCGGTACGA
+GTAAACGCCAGAAGCGGCGATAACCCGCTGACCCTTACTATTGCCAGGCTGGCGACGGAA
+AGGCGGCAAATGCTGGCGCAATATCGCCGCAACAGTTTGCTGATTAGCCTTATCGCGATC
+CTCGTCTGTTCGGCGCTCAGTCCATTAGTCATCAGAAACGGGCTGCGGGCCATTACGTCG
+CTCAGCCGACTCACCGCGGCGACAGATAGCGGCACACTTCGCCAGCCGCTGGCGGAACAG
+GCGTTACCCGTCGAGCTCAGGCCGCTTGGGCAAGCGCTAAATACCATGCGCCAGAAGCTT
+TCCGACGATTTTGAACGCCTGAACCAATTTGCCGACGATCTGGCGCATGAGCTGCGCACG
+CCGGTTAATATTTTACTGGGGAAGAATCAGGTTATGCTGAGTCAGGAACGCAGCGCCGAA
+GAGTATCAACAAGCCCTTGTCGATAATATTGAAGAGCTGGAGGGACTGTCGCGACTGACA
+GAAAATATTCTCTTTCTGGCACGCGCGGAGCACCAGAATATAGCGGTAAAAAAACAGCCT
+GTTTCGCTCAATGCGCTGGTCGAAAATATGCTGGATTATCTTAGCCCCCTTGCCGAAGAG
+AAGCACATCTGTTTTATAAATCAATGTCAGGGAACGGTATGGGCTGACGAAATATTATTA
+CAAAGAGTGCTCTCAAACCTGCTGACGAATGCCATCCGTTATTCTGATGAAAACGCCGTG
+ATACGTATTGAAAGCGCTTATGATGATAACGTTGCAGAAATTCGGGTCGCTAATCCGGGC
+AGCCCCACCGCCGATGCGGATAAGCTTTTCCGGCGTTTTTGGCGAGGAGATAATGCCCGC
+TACACTGCCGGTTTCGGCCTGGGGTTATCGTTAGTTAACGCGATTGCCCTATTGCACGGT
+GGCTCGGCATCTTACCGCTATGCCGATGAACATAATATCTTTTCGGTTCGTCTGCCTGAT
+AGCGGTGATAGCTAAGTGATATGTCTCAAAGTCCAGGGCGGCATTGGTGAAATTTTTACG
+GTGACGCAGCAGGCGGATAAATTCTTTCCGGCTACGCAGTTCCACTGGAGCTGGACGGAA
+AGCACAGTACCTGTATTGATGATTGGTTTTCTGTTTGCCAATATTCAGCAATTTACTGCC
+AGTCAGGATGTGGTCCAACGCTATATCGTGACTGACTCCATAGAGGAAACGAAGAAAACA
+TTACTTACAAATGCCAAACTGGTTGCTGTGATCCCTGTTTTCTTTTTTGCTATCGGCTCG
+GCATTATTTGTCTACTATCAGCAACATCCACAATTATTACCGGCGGGATTCAACACTGGC
+GGCATTTTGCCCTTATTCGTGGTCACCGAAATGCCAGTCGGCATTGCAGGGTTGATAATC
+TCCGCTATTTTCGCTGCCGCGCAGTCCAGCATCTCCAGCAGCTTAAACAGCATTTCCAGT
+TGTTTTAATTCCGATATCTATCAGCGTTTGAGTCATAAAAAAGGAACGCCAGAAAACCGT
+ATGAAAATAGCTAAGTTAGTTATTCTGGTCGCGGGCCTGATAAGTAGCGCGGCCTCGGTA
+TGGCTGGTCATGGCCGATGAATCAGAAATCTGGGATGCATTTAATAGTCTGATAGGTCTG
+ATGGGAGGGCCAATGACCGGTCTGTTCATGCTGGGCATTTTCTTTAAACGAGCAAATGCC
+GGGAGTGCGGTTTTAGGAATTATTATCAGCGTCATTACCGTGCTGGGCACACGCTATGCC
+ACTGACCTTAACTTCTTCTTTTATGGGGTCATTGGCTCGCTAAGCGTGGTGATCAGCGGC
+GTTATTTTCGCCCCGTTATTTGCCCCGGCACCGCCATTGACGCTGGATGAAAAACCTGAA
+CCAAAGGTGACATTAATGAAAATCAACAGATATCTTCTGGGTATGGTTTCGTTTATAGCA
+TTTTCATCATATCTACAAGCGGCAACCCTTGATTATCGGCATGAATATGCTGATAGAACC
+AGAATTAATAAAGACCGTATTGCTATAATTGAAAAGCTTCCTAACGGCATTGGTTTTTAT
+GTCGATGCCAGCGTTAAATCGGGAGGAGTAGATGGTGAGCAGGATAAGCATTTAAGCGAT
+CTCGTCGCAAACGCTATAGAACTGGGCGTAAGTTATAATTATAAAGTTACGGACCATTTT
+GTTTTGCAGCCTGGATTTATATTTGAAAGCGGTCCAGACACTTCAATTTATAAGCCTTAT
+TTAAGGGCGCAATATAATTTTGATTCTGGTGTTTATATGGCTGGTCGTTACCGTTATGAC
+TATGCAAGGAAGACAGCTAACTATAATGATGATGAGAAAACGAATAGATTTGATACTTAT
+ATAGGTTATGTTTTTGATGAGTTGAAATTGGAATATAAATTTACCTGGATGGATAGCGAT
+CAAATTAAATTTGATAACAAAAAAACAAACTATGAACATAATGTGGCTTTAGCCTGGAAA
+CTGAATAAGTCATTTACACCATACGTTGAGGTCGGAAATGTAGCGGTGAGAAATAATACC
+GATGAGAGACAGACCCGTTATCGCGTTGGATTACAATACCACTTTTGAATGACGAAATAC
+GGTGTTATAGGTACAGGTTATTTTGGCGCTGAACTGGCGCGATTTATGTCTAAGGTTGAA
+GGGGCGAAAATCACTGCGATTTACGATCCGGTAAATGCGGCTCCGATAGCGAAAGAGCTG
+AACTGTGTCGCCACTTCAACGATGGAGGCGCTTTGTACCCATCCTGATGTGGATTGCGTA
+ATTATTGCTTCACCAAATTACTTACATAAAGCGCCGGTCATTGCGGCGGCTAAAGCGGGT
+AAACACGTGTTTTGTGAAAAACCTATCGCCTTAAATTACCAGGATTGTAAGGATATGGTT
+GATGCCTGCAAAGAAGCTGGTGTTACCTTTATGGCGGGTCACGTTATGAACTTTTTTCAC
+GGGGTTCGCCACGCTAAAGCGCTCATCAAAGCCGGTGAAATCGGTGAAGTTACACAAGTT
+CACACTAAACGTAATGGTTTTGAAGACGTGCAGGATGAGATCTCATGGAAGAAGATTCGC
+GCAAAGTCAGGTGGGCATCTGTACCATCACATTCACGAGCTAGATTGTACACTGTTCATC
+ATGGATGAAACCCCATCCCTGGTTTCAATGGCGGCGGGGAATGTTGCGCACAAAGGTGAA
+AAATTTGGTGATGAAGATGATGTTGTCCTAATCACCCTTGAGTTTGAAAGCGGTCGTTTC
+GCGACACTTCAGTGGGGATCATCGTTCCACTACCCTGAGCACTATGTATTAATTGAGGGC
+ACGACAGGTGCAATTCTCATTGATATGCAAAACACGGCTGGTTATCTAATAAAAGCGGGC
+AAAAAAACACACTTTCTTGTGCATGAAAGCCAGGCGGAGGATGATGATCGTCGCAACGGT
+AACATATCCAGCGAGATGGATGGCGCAATCGCTTATGGTAAACCCGGTAAACGTACGCCG
+ATGTGGCTCTCATCAATTATGAAACTGGAGATGCAGTACTTGCATGATGTGATAAACGGT
+CTGGAGCCAGGCGAGGAGTTTGCTAAATTGCTAACGGGAGAAGCGGCGACAAATGCCATT
+GCTACCGCTGATGCTGCGACGCTTTCTTCAAACGAGGGGCGCAAAGTTAAACTCACTGAA
+ATTCTTGGCTAAATGACATCACGTCTTCAGGTCATACAGGGTGATATCACTCAACTTAGC
+GTCGATGCGATTGTGAATGCCGCTAACGCATCATTAATGGGCGGCGGTGGCGTAGACGGC
+GCAATTCATCGCGCGGCGGGGCCGGCATTGCTGGACGCCTGTAAACTCATCCGTCAGCAA
+CAGGGCGAATGTCAGACGGGACATGCGGTTATCACGCCTGCTGGCAAGCTTTCGGCAAAG
+GCGGTTATTCACACAGTGGGGCCCGTCTGGCGAGGCGGCGAACACCAGGAAGCTGAGCTA
+CTCGAAGAGGCATACCGGAATTGTTTGCTGCTTGCCGAGGCGAATCACTTTCGTTCCATC
+GCTTTTCCGGCAATCAGTACCGGCGTTTATGGCTATCCACGCGCCCAGGCCGCTGAAGTC
+GCCGTCAGGACGGTTTCAGATTTTATTACCCGTTACGCTCTGCCTGAACAGGTATACTTT
+GTCTGTTATGATGAAGAAACTGCCCGGCTTTACGCAAGATTACTTACTCAGCAAGGCGAC
+GACCCTGCCTGA
+>real_data_2
+ATGAAAAAATCATTACTCGCTGTTGCTGTGGCAGGGGCTGTTTTGTTGTCATCCGCCGTA
+CAGGCGCAGACAACGCCGGAAGGTTATCAATTACAACAGGTGCTGATGATGAGCCGCCAT
+AATCTGCGGGCGCCGCTGGCGAATAATGGCAACGTACTGGCGCAGTCGACGCCGAACGCC
+TGGCCGGCGTGGGACGTTCCCGGCGGGCAACTGACGACGAAAGGCGGCGTGCTGGAAGTC
+TATATGGGACACTACACACGTGAATGGCTGGTCGCGCAGGGGCTGATACCGTCGGGAGAA
+TGTCCGGCGCCCGACACGGTATATGCCTATGCGAATAGTTTGCAGCGCACCGTCGCCACC
+GCGCAATTTTTCATTACCAGCGCTTTCCCCGGCTGTGATATTCCTGTTCATCATCAGGAA
+AAAATGGGCACTATGGACCCTACCTTCAATCCGGTGATTACCGATGATTCCGCCGCGTTC
+CGGCAACAGGCCGTACAGGCGATGGAAAAGGCGCGTAGTCAGCTACATCTTGATGAGAGT
+TATAAACTGCTTGAGCAGATAACGCATTATCAGGACTCGCCGTCCTGCAAAGAGAAGCAT
+CAGTGTTCGCTAATCGACGCGAAAGATACCTTCAGCGCGAACTATCAGCAAGAGCCTGGC
+GTGCAGGGGCCGCTGAAAGTAGGGAACTCGCTGGTGGATGCGTTTACCCTGCAATATTAC
+GAAGGCTTTCCGATGGATCAGGTCGCTTGGGGCGGGATCCACACCGATCGGCAGTGGAAG
+GTGCTGTCAAAACTGAAAAACGGCTACCAGGACAGCCTGTTTACCTCACCCACGGTGGCG
+CGCAATGTCGCTGCGCCGCTGGTAAAATATATCGATAAGGTGCTGGTTGCCGAGCGCGTT
+AGCGCGCCGAAGGTTACCGTGCTGGTGGGGCATGATTCCAATATCGCGTCGCTGCTGACG
+GCGCTGGATTTTAAACCCTATCAGCTCCATGACCAGTATGAGAGAACGCCGATTGGTGGT
+CAGCTTGTCTTCCAACGCTGGCATGACGGTAACGCTAACCGGGATTTGATGAAAATCGAG
+TATGTCTACCAGAGCGCCCGGCAGTTACGTAATGCGGAAGCGTTAACGCTCAAATCGCCT
+GCGCAAAGGGTAACGCTGGAACTGAAAGGATGTCCGGTGGATGCGAACGGCTTCTGTCCG
+CTGGATAAGTTCGATAACGTCATGAACACTGCTGCAAAATAGATGGAAAAGAATAATGAA
+GTCATTCAGACCCATCCGCTTGTAGGATGGGACATCAGCACCGTCGATAGCTATGATGCG
+CTGATGCTGCGTTTACACTACCAGACCCCAAATCGTCCGGAACCGGAAGGGACTGAAGTT
+GGTCAAACGCTCTGGTTAACGACAGATGTAGCCAGGCAATTTATTTCAATATTAGAAGCC
+GGCATCGCCAAAATAGAATCAGGCGATTACCAGGAAAACGAGTATCGTCACCATTAGATG
+GAACTTAAGGATTATTACGCCATTATGGGCGTGAAACCGACGGACGATCTCAAGACGATT
+AAGACCGCCTATCGCCGACTGGCCCGCAAGTACCATCCAGATGTCAGCAAAGAACCCGAT
+GCCGAAGCCCGTTTCAAAGAGGTTGCTGAAGCATGGGAAGTGCTGAGTGATGAGCAACGG
+CGCGCCGAGTATGACCAGTTATGGCAACACCGTAACGATCCACAATTTAATCGCCAGTTC
+CAGCAACACGAAGGCCAGCCGTATAACGCCGAAGATTTTGATGATATTTTCTCGTCTATT
+TTTGGTCAGCACGGTCGTCATTCGCACCACCGCCACGCCGCACGCGGTCATGATATCGAA
+ATTGAAGTGGCGGTATTCCTGGAAGAAACGCTGGAAGAGCACCAGCGTACGATTAGCTAT
+TCCGTCCCCGTTTATAACGCGTTCGGCCTGGTGGAGCGGGAAATTCCCAAAACATTGAAT
+GTGAAAATCCCGGCTGGCGTCAGCAACGGGCAACGAATCAGACTGAAAGGCCAGGGCACG
+CCGGGGGAAAACGGCGGACCTAATGGCGATTTATGGCTCGTTATCCATATTGCCCCGCAT
+CCGCTCTTTGATATCGTCAATCAGGATCTGGAAGTCGTCCTTCCGCTTGCCCCATGGGAG
+GCGGCGCTCGGCGCTAAGGTGTCTGTGCCAACGCTTAAAGAGCGTATTTTGCTGACCATT
+CCCCCCGGCAGCCAGGCAGGTCAGCGGCTGCGTATCAAAGGAAAAGGATTAGCCAGTAAA
+AAGCACACTGGCGATCTCTATGCCATCATCAAAATCGTTATGCCGCCGAAACCTGACGAG
+AAAACAGCTGCCCTGTGGCAACAACTGGCGGACGCGCAGTCGTCCTTTGACCCACGCCAG
+CAATGGGGGAAAGCATAAATGGCTAACATCACTGTCACCTTTACCATCACCGAATTTTGT
+TTGCACACCGGCGTGACGGAAGAGGAGCTAAACGAAATCGTCGGACTTGGCGTAATTGAG
+CCTTACGAAGACGATAACGCCGACTGGCAATTCGACGATCGCGCAGCGAGCGTGGTACAA
+CGCGCGCTACGCTTACGCGAGGAGCTGGCGCTCGACTGGCCAGGGATCGCGGTCGCGTTA
+ACGCTGCTGGAAGAGAATTCACGGCTGCGCGAAGAAAACCGGTTACTGCTGCAACGCCTT
+TCTCGCTTTATCTCGCATCCCTAAATGTCATCTTGTTGGAGATTTACGGATTCGCTAACA
+AGCCTATGGCATACTGCGTTGATGAAGATTTTATTGATTGAAGATAACCAGAAAACCATT
+GAGTGGGTACGTCAGGGACTCACGGAGGCAGGCTATGTGGTTGATTATGCCTGTGATGGA
+CGAGACGGATTACACCTAGCCCTTCAGGAACATTATTCATTGATTATTCTTGATATTATG
+CTGCCGGGGCTTGATGGATGGCAGGTTTTACGCGCGTTGCGCACTGCATATCAGCCCCCT
+GTTATTTGCCTGACGGCGCGCGACTCGGTTGAGGATCGCGTCAAAGGTCTTGAGGCGGGC
+GCTAATGATTACCTTGTTAAGCCTTTTTCCTTCGCCGAACTGCTGGCCCGGGTGAGAGCT
+CAACTCAGACAGCATGTCCCGGTCTTTACCCGACTGACGATCAATGGTCTGGACATGGAT
+GCCACAAAGCAATCGGTGTTACGAAATGGCAAACCGATTTCCCTGACCCGCAAAGAATTC
+CTGCTCCTCTGGTTACTGGCGTCCCGGGCAGGGGAAATCGTGCCCCGAACCGCGATCGCC
+AGCGAAGTTTGGGGAATTAACTTTGATAGTGAAACCAACACCGTTGATGTCGCGATTCGT
+CGGCTGCGCGCCAAAGTAGACGATCCATTTGAAAAGAAGCTCATTATGACCGTCCAGGGG
+ATGGGTTATCGATTACAGGCGGAAACGTCGCAGAATGGTTAAATGAAAAACAAATTGTTA
+TTTATGATGTTGGCAATACTGGGTGCGCCTGGGATTGCAACCGCGACAAATTATGATCTG
+GCTCGTTCAGAGGATAATTTTGCGGTAAATGAATTAAGCAAGTCTTCATTTAATCAGGCG
+GCCATTATTGGTGAAGTCGGCACGGATAATAGTGCCAGAGTACGCCAGGAAGGATCAAAA
+CTATTGTCCGTTGTTTCACAAGAAGGAGAAAATAATCGGGCGAAAGTCGACCAGGCAGGG
+AATTATAACTTTGCGTATATTGAGCAAACGGGCAATGCCAACGATGCCAGTATATCGCAA
+AGCGCTTACGGTGATAGTGCGGCTATTATCCAGAAAGGTTCTGGAAATAAGGCCAATATT
+ACCCAGTACGGTGCGCAGAAAACAGCAGTTGTAGTGCAGAAACAGTCGCATATGGCTATT
+CGCGTCACCCAAGGCATGCATACTTTATTGCTCCTTGCCGCACTTTCAAATGAGATTACG
+TTTACCACGACTCAGCAAGGCGATATTTACACGGTGATCCCTCAGGTCACAGTAAACGAA
+CCCTGCGTCTGTCTGGTGCAAATTCTCTCTGTGCGCGACGGCGTCGGGGGAGAAAGCCAT
+ACACAGCAAAAACAAACGCTATCTTTACCTGCTAATCAACCGATTGAGTTGGCTCGTCTT
+AGTGTAAATATATCTTCAGAGGACTCGGTTAAAATTATTGTTACTGTTTCGGACGGACAA
+TCACTGCATTTATCACAACAATGGCCGCCTTCTGCACAGATGTTTAATGAAGTCCATAGT
+AGTCATGGTCACACACTATTGTTGATCACAAAGCCATCTCTGCAAGCTACGGCATTATTG
+CAACATTTAAAGCAATCGCTGGCCATAACCGGACAACTGCATAATATTCAACGTTCTCTG
+GAAGATATCTCAGCCGGTTGCATTGTTTTAATGCATATGATGGAAGCGGATAAGAAGCTT
+ATCCACTATTGGCAGGATAATTTAAGCCGCAAACACAATAATATAAAAACATTATTGTTA
+AATACCCCTGACGATTATCCCTACCGTGAAATTCAAAACTGGCCTCATATTAACGGCGTG
+TTTTACGCCACTGAAGACCAGGAACACGTGGTCCGCGGATTACAGGGTATTCTGCGGGGC
+GAATGCTATTTTTCACAAAAATTAGCCAGTTACCTGATTACACACTCAGGAAATTACCGC
+TACAACAGCACCGAGTCCGCATTACTCACTCATCGCGAAAAAGAGATCCTCAATAAGTTA
+CGTATTGGTGCCTCTAATAATGAAATCGCCAGGCCGCTATTTATCAGCGAGAATACGGTT
+AAGACACATCTTTATAATCTTTTCAAAAAGATACCTGTCAAAAATCGCACCCAGGCAGTT
+TCAATGAAACGCTATCTGACCTGGATTGTAGCAGCAGAGTTACTGTTCGCTACCGGAAAC
+CTCCATGCCAATGAAGTTGAAGTCGAGGTTCCCGGATTGTTAACCGACCATACCGTCTCT
+TCCATAGGACATGAATTCTATCGTGCATTCAGCGACAAATGGGAAAGCGAATACACCGGC
+AACCTGACCATTAATGAAAGACCCAGTGCGCGTTGGGGAAGCTGGATCACCATAACGGTA
+AACCAGGACGTTATTTTCCAGACCTTTTTATTTCCAATGAAAAGAGACTTCGAGAAAACC
+GTCGTCTTCGCATTAGCGCAAACAGAGGAAGCATTAAATCGCCGACAAATAGATCAAACG
+CTCTTAAGTACGAGTGATTTAGCGCGTGATGAATTCATGCGTGTTAAACATGCAGTAGTG
+CTGCTCATGCTTTTTTCGCCATTAACCTGGGCTCGAAATATGACGTTCCAGTTCCGTAAT
+CCTAACTTTGGTGGAAACCCCAATAACGGTTCCCTTTTATTGAATAGCGCCCAGGCGCAA
+AATTCATATAAAGACCCCGCTTATGATAACGATCTTGGTATCGAGACCCCCTCAGCGTTG
+GATAACTTTACGCAGGCTATTCAATCGCAAATTCTGGGCGGCTTGTTGACCAATATTAAT
+ACCGGAAAACCAGGACGTATGGTGACCAATGATCTTATTATCGATATCGCTAATCGCGAC
+GGACAGCTCCAGCTCAACGTCACGGACAGAAAACCGGGAAGAACCTCGACCATCGAAGTG
+TCAGGTTTACAAACTCAGTCAACCGATTTTATGCCGCGCTTACTTATTTTGGTTGCCGTT
+TTATCGTTGAGCGGATGCTTAACTGCCCCGCCGAAACAAGCTGCGAAACCGACATTAATG
+CCCCCCGCACAAAGTTACAAAGATTTGACGCACTTACCTGCTCCCACCGGTAAGATCTTT
+GTTTCGGTATATAACATTCAGGATGAAACGGGCCAATTTAAACCTTACCCGGCAAGTAAC
+TTTTCCACGGCTGTGCCGCAGAGCGCCACCGCTATGTTGGTCACCGCGCTGAAAGATTCG
+CGCTCGTTTATCCCACTAGAACGACAAGGCTTACAGAATCTTTTGAATGAACGGAAAATT
+ATTCCCGCAGCCCAGGAAAACGGCACCGTGGCGATGAATAACCGTATCCCGCTTCAGTCG
+TTGACGGCGGCAAATATTATGGTGGAAGGTTCTATTATTGGTTATGAAAGTAACGTCAAA
+TCCGCCGGGGTCGGCGCAAGATATTTCGGTATTGGCGCCGATACGCAGTATCAGCTGGAT
+CAGACTGCTGTCAACCTGCGCGTGGTTAACGTCAGTACGGGCGAGATCCTTTCTTCGGTG
+AACACCAGTAAAACGATCCTTTCCTATGAAGTACAGGCAGGCGTGTTCCGTTTTATTGAT
+TACCCGCGCTTACTGGAAGGCGAAATCGGCTATACCTCGAACGAACCGGTGATGCTGTGT
+CTGACGTCAGCCATTGAAACCGGCGTTATCTTCCTCATTAATGATGGTATCGATCGCGGA
+CTGTCGGATTTGCAGAATAAAGCGGACAGGCAAAATGATATTCTGGTGAAATACCGTGAG
+CTGTCAGTACCGCCAGAATCCATGTCTATTGCCGTAAATATGAATGACCCGACCAACACG
+GGCGTCAAAACGACGACCGGCAGCGGGTCGATGACCGGAAGCAACGCTGCCGATCTGCAA
+AGCAGTTTCCTGACCTTACTGGTCGCGCAATTGAAGAACCAGGACCCGACTAACCCATTA
+CAAAATAATGAGTTAACGACACAGTTGGCGCAAATCAGTACCGTGAGCGGCATTGAAAAA
+CTGAATACGACGCTGGGGGCTATTTCCGGGCAAATCGATAATAGTCAGTCCCTACAGGCG
+ACCACGCTGATTGGACATGGCGTTATGGTGCCTGGCACCACAATTCTGGCGGGTAAAGGC
+GCGGAAGAAGGGGCCGTGACGTCCACGACGCCGTTTGGCGTGGAATTGCAACAGCCTGCG
+GACAAAGTGACGGCAACCATTACCGATAAAGATGGCCGGGTGGTACGGACGCTGGAGATC
+GGTGAGTTGCGAGCCGGGGTACACACCTTTACCTGGGATGGTAAGCAAACGGACGGAACA
+ACGGTACCGAATGGTTCTTACAACATTGCGATTACCGCCAGCAATGGCGGGACGCAACTG
+GTGGCGCAGCCGCTGCAATTCGCTCTGGTACAGGGCGTGACGAAGGGCAGTAACGGCAAC
+CTGTTGGATCTGGGTACCTACGGCACCACCACACTCGACGAAGTTCGGCAAATAATCTAA
+ATGTCTTTTTCTCAAGCGGTTAGCGGCCTGAACGCTGCGGCCACCAACCTTGATGTTATC
+GGTAATAACATCGCCAACTCCGCCACCTATGGCTTTAAGTCCGGTACGGCATCATTTGCC
+GATATGTTCGCCGGTTCCAAAGTGGGGTTGGGCGTAAAAGTGGCGGGGATTACCCAGGAT
+TTTACCGACGGTACGACAACGAACACCGGGCGCGGGCTGGATGTCGCGATTAGCCAGAAC
+GGTTTTTTCCGCCTGGTAGACAGCAACGGTTCCGTGTTCTATAGCCGCAACGGCCAGTTC
+AAACTGGACGAGAACCGTAACCTGGTCAATATGCAGGGGATGCAGTTGACCGGCTATCCG
+GCCACCGGTACGCCGCCGACCATTCAGCAGGGGGCGAATCCTGCGCCGATCACCATTCCG
+AACACGCTGATGGCGGCGAAATCGACCACCACCGCGTCAATGCAGATCAACCTGAACTCA
+ACGGACCCTGTACCGTCTAAAACGCCCTTTAGCGTGAGTGATGCGGATTCGTATAACAAA
+AAAGGCACCGTCACCGTTTATGACAGCCAGGGTAATGCCCATGACATGAACGTCTATTTT
+GTGAAAACCAAAGATAATGAATGGGCTGTGTACACCCATGACAGCAGCGATCCTGCAGCC
+ACTGCGCCAACAACGGCGTCCACTACGCTGAAATTCAATGAAAACGGGATTCTGGAGTCT
+GGCGGTACGGTGAACATCACCACCGGTACGATTAATGGCGCGACAGCGGCCACCTTCTCC
+CTCAGCTTCCTTAACTCCATGCAGCAGAACACCGGGGCTAATAACATCGTCGCCACCAAT
+CAAAACGGCTATAAGCCTGGCGACCTGGTGAGCTACCAGATTAACAATGATGGCACCGTG
+GTTGGCAACTACTCCAACGAGCAGGAGCAGGTGCTGGGGCAGATTGTGCTGGCTAACTTC
+GCCAACAACGAAGGTCTGGCATCCCAGGGCGATAACGTCTGGGCGGCGACGCAGGCCTCC
+GGGGTAGCGCTGCTGGGGACTGCCGGTTCCGGCAACTTCGGTAAGCTGACGAACGGCGCG
+CTGGAAGCCTCTAACGTGGATTTGAGTAAAGAGCTGGTGAATATGATCGTCGCGCAGCGT
+AACTACCAGTCGAATGCGCAGACCATCAAAACCCAGGACCAGATCCTCAATACGCTGGTT
+AACCTGCGCTAAATGGATCACGCAATTTATACCGCCATGGGGGCGGCCAGCCAGACGCTT
+AACCAGCAGGCGGTAACGGCCAGCAACCTGGCTAATGCCTCAACGCCGGGCTTTCGCGCG
+CAGCTTAACGCGCTACGCGCGGTGCCCGTTGATGGCCTCTCTTTAGCGACGCGCACGTTG
+GTTACGGCGTCGACGCCGGGGGCGGATATGACCCAGGGTCAGTTGGACTACACTTCCCGC
+CCGCTGGATGTTGCGTTACAGCAGGACGGCTGGCTGGTGGTGCAAGCGGCGGATGGCGCT
+GAAGGATATACCCGTAACGGGAATATCCAGGTGGGCCCGACCGGGCAGTTAACCATTCAG
+GGACATCCGGTTATCGGCGAAGGCGGCCCGATTACCGTTCCGGAAGGGTCGGAAATCACC
+ATTGCGGCAGACGGCACGATCTCCGCGCTCAATCCCGGCGACCCGCCAAACACGGTGGCG
+CCCGTTGGGCGGCTGAAGCTGGTCAAAGCGGAAGGCAATGAGGTGCAGCGGAGCGATGAC
+GGTTTATTCCGCCTTACCGCCGAGGCACAGGCTGAACGCGGGGCGGTACTGGCCGCCGAC
+CCGTCAATTCGCATTATGTCGGGCGTGCTGGAGGGCAGTAACGTCAAGCCGGTTGAAGCC
+ATGACCGACATGATCGCCAACGCACGTCGTTTTGAAATGCAGATGAAGGTTATCACCAGC
+GTAGATGAGAACGAAGGGCGAGCTAACCAACTGCTGTCGATGAGTTAAATGATCAGTTCA
+TTATGGATCGCCAAAACCGGTCTGGACGCGCAGCAAACCAATATGGATGTGATTGCCAAT
+AACCTGGCAAACGTCAGCACCAATGGTTTTAAGCGTCAGCGCGCGGTATTTGAAGATCTG
+TTGTATCAGACCATCCGCCAGCCGGGCGCGCAGTCGTCCGAGCAGACGACGCTGCCTTCC
+GGGCTGCAAATCGGTACCGGCGTGCGTCCGGTCGCCACGGAGCGCCTGCACAGTCAGGGG
+AACCTGTCGCAGACCAACAACAGTAAAGATGTGGCGATTAAAGGGCAGGGCTTTTTCCAG
+GTCATGCTGCCGGACGGTACGTCTGCCTATACCCGCGACGGCTCTTTCCAGGTGGATCAG
+AATGGTCAACTGGTGACGGCGGGCGGTTTTCAGGTGCAGCCGGCAATCACCATTCCGGCC
+AACGCGTTAAGCATCACGATTGGCCGCGACGGCGTGGTCAGCGTTACCCAGCAAGGGCAG
+GCCGCGCCGGTTCAGGTCGGGCAGCTTAACCTGACCACCTTTATGAACGACACCGGTCTG
+GAAAGCATCGGCGAGAACCTCTATATCGAAACGCAATCGTCCGGCGCGCCGAACGAAAGC
+ACGCCGGGGCTCAACGGCGCGGGGTTGTTGTATCAAGGGTATGTCGAAACGTCGAACGTT
+AACGTGGCGGAAGAGCTGGTGAACATGATTCAGGTTCAACGCGCCTATGAAATTAACAGT
+AAAGCAGTATCGACGACCGATCAGATGCTGCAGAAACTGACGCAACTCTAAATGGCCCTG
+ATGGTCGCGACGCTGACAGGATGCGCCTGGATACCCGCTAAACCGCTCGTGCAGGGGGCG
+ACCACGGCGCAGCCGATACCTGGCCCGGTACCGGTGGCGAATGGCTCCATATTTCAGTCT
+GCGCAGCCGATTAATTATGGCTATCAGCCGCTTTTTGAAGATCGTCGACCGCGTAATATC
+GGCGATACGCTCACGATTGTGTTACAGGAAAACGTCAGCGCCAGTAAAAGCTCGTCGGCA
+AATGCCAGCCGCGACGGCAAAACCAGCTTTGGTTTTGATACGGTACCGCGTTATCTGCAG
+GGATTATTCGGTAATTCCCGCGCGGATATGGAGGCCTCCGGCGGCAACTCTTTTAATGGT
+AAAGGCGGCGCGAATGCCAGCAATACCTTTAGCGGCACGCTGACCGTGACCGTCGATCAG
+GTTCTGGCCAATGGCAATTTACACGTCGTGGGGGAAAAACAGATCGCGATTAATCAGGGA
+ACGGAATTCATCCGCTTCTCCGGCGTGGTAAATCCACGCACCATCAGCGGTAGCAACTCT
+GTTCCCTCGACACAGGTGGCGGATGCGCGGATTGAATATGTCGGGAACGGCTATATTAAC
+GAAGCGCAAAATATGGGCTGGCTGCAACGTTTCTTCCTTAATTTGTCGCCGATGTAAGTG
+TTTAAAGCTCTTGCAGGAATCGTTCTGGCACTGGTTGCCACTCTGGCGCACGCCGAGCGT
+ATCCGGGATCTGACCAGTGTCCAGGGAGTACGGGAAAACTCGCTGATCGGCTACGGGCTG
+GTGGTCGGGCTGGACGGTACGGGCGACCAGACGACCCAGACGCCATTTACCACCCAGACG
+CTGAATAACATGCTGTCACAACTGGGGATTACGGTCCCCACCGGCACCAATATGCAGTTG
+AAAAACGTGGCGGCGGTGATGGTGACGGCGTCGTATCCGCCTTTTGCGCGACAGGGACAA
+ACGATCGATGTCGTCGTTTCCTCAATGGGGAACGCTAAAAGTCTGCGTGGCGGGACGTTA
+TTAATGACGCCGTTAAAAGGGGTGGACAGCCAGGTGTATGCTCTGGCGCAGGGCAATATT
+CTGGTCGGCGGCGCGGGCGCTTCCGCAGGCGGCAGTAGCGTGCAGGTTAACCAGCTTAAT
+GGCGGGCGCATCACTAATGGCGCGATTATCGAACGCGAGTTGCCGACTCAGTTCGGCGCT
+GGCAACACCATTAATCTGCAATTGAACGACGAAGATTTTACGATGGCGCAGCAAATTACC
+GACGCCATCAACCGCGCCCGCGGTTACGGCAGCGCCACTGCGCTTGATGCGCGAACGGTA
+CAGGTACGCGTGCCCAGCGGCAACAGCTCGCAGGTGCGTTTTCTGGCGGACATTCAAAAT
+ATGGAAGTCAACGTGACGCCGCAGGATGCAAAAGTCGTGATCAACTCGCGTACCGGTTCG
+GTGGTCATGAATCGGGAAGTCACGCTGGATAGCTGCGCTGTGGCGCAGGGCAATTTGTCA
+GTGACAGTCAATCGCCAACTCAACGTCAACCAGCCGAATACGCCATTTGGCGGCGGGCAG
+ACCGTGGTGACGCCACAGACTCAGATAGATTTGCGTCAGAGCGGCGGATCGCTACAGAGC
+GTGCGTTCCAGCGCCAATCTGAACAGCGTAGTGCGCGCGCTGAATGCGCTTGGCGCGACG
+CCGATGGATCTGATGTCGATTTTGCAGTCCATGCAGAGCGCGGGCTGTCTACGCGCCAAA
+CTGGAAATCATCTGAATGATCGGAGACGGTAAATTGCTGGCCAGCGCGGCCTGGGATGCG
+CAATCTCTGAACGAACTGAAAGCGAAAGCGGGCCAGGACCCGGCGGCGAATATCCGTCCT
+GTGGCCCGTCAGGTGGAAGGGATGTTTGTGCAGATGATGCTGAAAAGTATGCGCGAGGCT
+TTACCCAAAGATGGTTTATTCAGCAGCGATCAGACGCGTCTGTATACCAGCATGTATGAC
+CAGCAGATCGCCCAGCAGATGACCGCCGGTAAGGGATTGGGGCTGGCGGATATGATGGTT
+AAACAGATGACGGGCGGGCAGACGATGCCTGCAGATGATGCGCCGCAAGTACCGCTTAAA
+TTCTCCCTGGAGACGGTAAACAGCTATCAAAATCAGGCGCTGACCCAACTGGTGCGCAAA
+GCCATACCGAAAACGCCGGACAGCAGCGATGCGCCGCTCTCCGGCGACAGTAAAGACTTT
+CTGGCCCGGCTTTCGCTCCCGGCGAGGCTGGCCAGCGAACAAAGCGGGGTGCCGCATCAT
+CTGATTCTGGCGCAGGCGGCGCTGGAGTCCGGCTGGGGGCAGCGGCAAATCCTGCGGGAG
+AATGGCGAACCCAGCTATAACGTATTTGGCGTGAAAGCGACCGCCAGTTGGAAAGGGCCG
+GTGACGGAAATCACCACCACTGAATACGAAAATGGCGAAGCGAAAAAAGTGAAAGCGAAA
+TTCCGCGTCTATAGCTCGTATCTGGAGGCGTTATCGGATTATGTCGCGCTGTTAACGCGT
+AACCCACGCTACGCTGCCGTGACCACTGCCGCCACGGCAGAGCAGGGCGCAGTGGCTCTG
+CAAAACGCCGGATACGCCACTGACCCGAATTACGCGCGTAAATTGGCCAGCATGATTCAG
+CAGTTGAAAGCGATGAGTGAAAAGGTCAGCAAAACCTACAGCGCGAATCTCGACAATCTC
+TTTTAAATGTCCAGCTTGATTAATCACGCCATGAGCGGACTTAACGCCGCGCAGGCCGCG
+TTAAATACGGTCAGTAATAACATCAACAATTATAACGTTGCGGGTTATACCCGGCAGACA
+ACTATTCTGGCGCAGGCAAACAGTACGTTAGGGGCTGGCGGCTGGATAGGTAATGGCGTT
+TACGTTTCAGGCGTACAGCGCGAATATGATGCGTTTATCACTAATCAGCTACGCGGCGCG
+CAAAACCAGAGCAGCGGCTTAACCACGCGCTATGAACAAATGTCGAAAATCGACAACCTG
+CTGGCCGATAAATCCAGCTCACTGTCTGGCTCGCTGCAGAGTTTTTTTACCAGCCTGCAA
+ACGTTAGTCAGTAATGCGGAAGATCCTGCGGCGCGTCAGGCGCTGATTGGTAAAGCGGAA
+GGGCTGGTAAACCAGTTCAAAACCACCGATCAGTATCTGCGCGATCAGGATAAACAGGTC
+AATATCGCGATTGGCTCCAGCGTGGCGCAAATCAACAATTACGCGAAGCAGATAGCTAAC
+CTGAACGATCAAATCTCCCGTATGACGGGCGTAGGCGCGGGCGCATCGCCGAACGACCTG
+CTCGATCAACGTGATCAGTTGGTTAGCGAGCTTAACAAGATCGTTGGCGTCGAGGTGAGT
+GTACAGGACGGCGGCACCTATAACCTGACGATGGCCAATGGCTATACGCTGGTGCAGGGG
+TCGACGGCGCGTCAGTTGGCGGCGGTTCCCTCCAGCGCCGACCCGACGCGAACGACTGTC
+GCTTATGTCGATGAGGCCGCCGGTAACATCGAAATTCCGGAAAAGTTGCTGAACACCGGT
+TCGCTCGGCGGGCTACTGACGTTCCGTTCTCAGGATCTGGATCAGACTCGTAATACGCTG
+GGCCAGTTGGCGTTGGCGTTTGCCGATGCGTTTAACGCGCAGCATACCAAAGGTTATGAC
+GCCGACGGCAATAAAGGGAAAGACTTCTTTAGCATTGGCTCGCCGGTGGTATATAGCAAC
+AGTAATAATGCCGATAAAACGGTATCGCTAACCGCTAAGGTGGTCGACAGCACGAAGGTT
+CAGGCGACGGATTATAAGATTGTTTTTGACGGTACAGACTGGCAGGTTACTCGCACTGCG
+GATAACACCACCTTCACGGCAACAAAAGATGCTGACGGAAAACTGGAGATTGACGGTCTG
+AAAGTGACGGTAGGGACTGGCGCACAGAAAAACGACAGTTTTCTTCTCAAGCCGGTCAGC
+AATGCTATCGTCGACATGAACGTTAAAGTGACAAATGAAGCCGAGATTGCGATGGCGTCT
+GAGTCAAAACTCGATCCTGATGTGGATACCGGCGACAGCGATAACCGCAATGGTCAGGCA
+TTGCTGGACTTACAAAACAGCAATGTAGTGGGCGGCAACAAAACCTTTAACGATGCTTAC
+GCCACGTTGGTCAGCGATGTGGGTAACAAAACGTCAACGCTGAAAACCAGCAGCACCACG
+CAGGCGAATGTGGTTAAACAGCTTTATAAACAGCAACAGTCGGTTTCCGGCGTTAACCTC
+GACGAAGAGTACGGCAATTTGCAGCGTTATCAGCAGTATTATCTGGCGAATGCGCAAGTA
+TTGCAGACCGCGAATGCGCTGTTTGATGCGTTATTGAATATTCGCTAAATGCGTATCAGT
+ACCCAGATGATGTACGAACAAAATATGAGCGGCATCACTAATTCTCAGGCCGAATGGATG
+AAGCTGGGCGAGCAGATGTCTACCGGTAAGCGCGTTACCAACCCATCTGACGATCCGATC
+GCCGCGTCGCAGGCGGTAGTACTCTTTCAGGCGCAGGCGCAGAATAGCCAGTACGCCCTG
+GCGCGTACGTTTGCCACCCAAAAAGTGTCGCTGGAAGAAAGCGTACTCAGTCAGGTGACG
+ACGGCGATTCAAACCGCGCAGGAAAAAATCGTCTATGCCGGAAACGGCACGTTAAGCGAC
+GATGACCGCGCGTCGCTGGCGACGGATTTACAGGGGATCCGCGATCAGCTGATGAACCTG
+GCAAACAGCACTGACGGCAATGGTCGCTATATCTTTGCCGGGTATAAAACGGAAGCGGCG
+CCATTCGACCAGGCGACAGGTGGTTATCATGGCGGCGAGAAAAGTGTTACCCAGCAGGTG
+GATTCCGCACGCACGATGGTAATTGGCCATACGGGAGCGCAAATTTTTAATAGCATCACC
+AGCAATGCGGTGCCGGAACCGGATGGCTCGGACTCCGAAAAGAATCTGTTTGTCATGCTC
+GATACGGCAATTGCCGCGCTCAAGACCCCGGTGGAAGGCAATGACGTGGAAAAAGAAAAA
+GCCGCTGCCGCCATTGATAAAACCAATCGCGGCTTAAAAAATTCGCTTAATAACGTCCTG
+ACCGTTCGTGCGGAACTGGGAACGCAACTGAGCGAACTCAGTACGCTGGATTCACTGGGA
+AGCGACCGTGCGCTGGGACAGAAGCTACAGATGAGCAACCTGGTAGATGTGGACTGGAAC
+TCGGTCATTTCCTCCTACGTCATGCAACAGGCGGCATTACAGGCGTCCTATAAAACGTTT
+ACCGACATGCAGGGAATGTCGCTTTTCCAGTTGAACCGGTAAATGGAGATAATTTTTTAT
+CACCCGACATTTAACGCCGCCTGGTGGGTAAATGCGCTGGAGAAGGCTCTCCCACATGCG
+CGCGTTCGTGAATGGAAGGTCGGTGATAACAACCCCGCAGACTATGCGCTTGTATGGCAG
+CCCCCGGTTGAAATGCTGGCCGGAAGACGCTTAAAAGCCGTCTTTGTGCTGGGCGCGGGG
+GTGGATGCAATTCTGAGTAAATTAAATGCGCATCCGGAAATGCTGGACGCCTCCATTCCT
+CTATTCCGTCTGGAAGATACCGGAATGGGCCTGCAAATGCAGGAGTATGCCGCCAGCCAG
+GTATTACACTGGTTCCGTCGTTTCGATGATTATCAGGCGCTGAAAAATCAGGCGCTATGG
+AAACCGTTGCCGGAATATACCCGCGAAGAGTTTAGCGTCGGTATCATAGGCGCAGGGGTA
+CTGGGCGCAAAAGTGGCAGAAAGTCTACAGGCGTGGGGGTTCCCGTTACGTTGCTGGAGT
+CGTAGCCGCAAATCCTGGCCTGGCGTGGAAAGTTATGTAGGGCGTGAAGAACTGCGCGCT
+TTCCTGAACCAGACGCGGGTGCTGATTAATCTGCTGCCGAATACGGCCCAAACGGTAGGA
+ATTATTAATAGCGAATTGTTGGATCAATTGCCGGATGGCGCTTACGTGCTGAATCTCGCG
+CGCGGCGTTCATGTTCAGGAGGCGGATCTGCTGGCTGCGCTTGATAGCGGTAAGCTAAAA
+GGCGCGATGTTGGATGTCTTTAGCCAGGAACCGTTACCGCAGGAAAGTCCATTATGGCGC
+CATCCGCGAGTCGCCATGACGCCGCACATTGCGGCAGTCACCCGTCCGGCGGAAGCCATC
+GATTATATTAGCCGCACCATTACCCAGCTGGAGAAGGGAGAGCCGGTGACGGGGCAGGTG
+GATCGGGCGAGAGGATATTGGATGTCCGTAATCAAGAAAAATATCCCTGCCATAGGCCTG
+TGTATCTGCGCTTTTTTTATCCATTCTGCGGTAGGGCAACAAACGGTACAGGGCGGCGTT
+ATCCATTTTCGCGGCGCGATTGTTGAGCCACTGTGCGATATTTCTACTCACGCCGAAAAT
+ATTGATTTAACCTGCCTACGCGAAGGTAAAAAGCAAATGCACCGGATAGACCTTCGGCAG
+GCATCTGGATTACCGCAGGATATTCAGTCCATTGCGACGGTACGGCTGCATTATCTCGAT
+GCGCAAAAAAGCCTGGCGGTGATGAATATTGAGTACCGTTAAATGGCAAACCATCGTGGC
+GGTTCCGGTAATTTTGCGGAAGACCGCGAAAGAGCATCAGAAGCAGGTCGTAAAAGTGGT
+CAGCACAGCGGGGGCAATTTTAAGAATGACCCGCAGCGTGCATCCGAAGCAGGCAAAAAA
+GGGGGCAAAAGCAGTAACCGTAATCGCTAGATGGTAATGTCCGCACCAGGACACATTGTT
+TACAGTAGTTACAACACCCTGTACGGACATTCTCTCTCCGGTGGTGGTCTTGTCATCTTA
+AAAGCTCTCATCATTTCCCTTACTGTCCATACCCATGACGCCATATGTGGTGCGCGTAGC
+CGTGTGTGGCGTCGTTTCAAAAAGCAAGCTAAGGCTTACAAGGAAGCCAACCCTCAGATG
+TGTGTGCGCATAATCGCGTTCAAGAGAACGCGGGTGATGTATACCTACAACTCAAGGTGC
+TATCCATGGGAAGACAAAAAGCAGTGAATGAAACGAATTTTCCTTACCTGCGCGGCGTTG
+TTGTTCAGCAGTCAGGCGTTGGCCGATGAGTGTGCCAGCGCCAGTACGCAGCTGGAAATG
+AATCGCTGCGCCGCCGCGCAATACCAGGCGGCAGATAAAAAGCTGAACGAAACCTATCAA
+AGCGCGATTAAGCGTGCGCAACCGCCGCAGCGTGAGCTATTGCAAAAAGCGCAGGTGGCA
+TGGATTGCCCTGCGCGACGCCGATTGCGCGCTGATTCGCTCAGGTACGGAGGGCGGCAGC
+GTTCAACCCATGATCGCCAGCCAGTGCCTGACCGATAAAACGAACGAACGCGAAGCGTTT
+TTAGCCTCGCTGCTGCAATGTGAAGAGGGTGATTTGAGCTGCCCACTGCCGCCAGCCGGT
+TAAGTGCGTATATTCGCGGTGAGCATAATGGTGATTACCCTGAGCGGCTGCGGCAGTATT
+ATCAGCAGAACGATCCCCGGACAAGGACACGGCAACCAGTATTACCCTGGCGTGCAGTTG
+GATATGCGTGATTCCGCATGGCGCTATATCACTATCCTCGATCTGCCCTTCTCACTGATC
+TTCGATACACTGCTACTGCCGCTCGATATTCACCACGGGCCTTATGAGATGTGCCAACGT
+GCGATCGCCAATATTGATATCAGCAAAGAGTATGACGAAAGCATGGGCAGTAACGATGTG
+CATTATCAGTCGTTTGCTCGTATGGCGGATTTCTTTGGTCGTGATATGCAGGCGCATCGC
+CACGACCAGTTTTTTCAAATGCACTTTCTTGATACCGGGCAGATTGAGCTACAGCTCGAC
+GATCATCGCTATTCGGTGCAGGCGCCGCTATTTGTGCTAACGCCGCCCTCGGTGCCGCAT
+GCTTTTATTACCGAATCGGATAGCGATGGTCATGTTCTGACGGTACGCGAAGAGCTGGTT
+TGGCCGCTGCTGGAAGTGCTTTATCCCGGCACCAGAGAGGCCTTCGGCCTGCCGGGAATC
+TGCCTGTCGCTGGCGGATAAACTCAACGAGCTGGCGGCGCTCAAACATTACTGGCAGCTA
+ATTGAGCGGGAGTCCACGGAACAACTGGCTGGCTGCGAACATACCTTGGTACTACTGGCG
+CAGGCGGTATTTACCTTGCTGTTGCGTAATGCGAAGCTGGACGATCATGCCGCAACCGGG
+ATGCGCGGTGAACTGAAACTTTTTCAGCGCTTTACCCTGTTAATTGACAACCACTTCCAT
+CAGCACTGGACGGTGCCCGATTATGCCTGTGAGTTGCATATTACCGAATCTCGTTTGACC
+GATATTTGCCGACGTTTTGCTAATCGCCCGCCTAAACGCCTGATTTTTGATCGGCAATTA
+CGCGAGGCGAAACGACTGCTGCTTTTTTCCGACAATGCTGTCAACGAGATCGCCTGGCAA
+TTAGGTTTTAAAGATCCGGCTTATTTCGCCCGTTTCTTTAATCGCCTTGCTGGCTGTTCT
+CCTTCGCAGTTTCGCCAACGTGAAGTTCCTTCTTTTCTCAACTAAATGGGACGCACACCG
+GATTACAAAGCCGCCTTTGGCTGCGCTCTGGGCGCTAACCCAGCCTTCTACGGCCAGTTT
+GAGCAGAACGCCCGTAACTGGTACACCCGTATTCAGGAGACCGGCCTGTACTTTAACCAT
+GCAATCGTCAACCCGCCCATTGACCGCCACAAACCTGCCGACGAAGTGAAAGACGTCTAT
+ATCAAGCTGGAGAAAGAGACGGACGCCGGGATTATTGTCAGCGGGGCGAAAGTTGTCGCC
+ACTAACTCCGCCCTGACTCACTACAACATGATTGGTTTCGGCTCAGCCCAGGTGATGGGC
+GAAAACCCGGATTTTGCTCTGATGTTTGTCGCGCCAATGGATGCCGAAGGCGTAAAACTT
+ATTTCGCGCGCCTCGTATGAAATGGTCGCGGGCGCGACGGGCTCGCCGTTTGATTATCCC
+CTCTCCAGCCGTTTTGATGAAAACGATGCCATTCTGGTGATGGACAAGGTGCTGATCCCG
+TGGGAAAACGTATTAATTTACCGTGATTTCGATCGTTGTCGTCGCTGGACGATGGAAGGC
+GGCTTTGCCCGTATGTATCCACTGCAAGCCTGTGTTCGTCTGGCGGTAAAACTTGATTTC
+ATTACCGCGCTGCTGAAAAAATCGCTCGAATGTACGGGTACCGTAGAGTTCCGGGGCGTG
+CAGGCCGATCTCGGCGAAGTCGTGGCCTGGCGCAATATGTTCTGGGCATTGAGCGATTCT
+ATGTGTTCTGAAGCAACCCCGTGGGTAAACGGCGCCTGGCTACCGGACCACGCCGCGCTG
+CAAACCTATCGTGTGATGGCCCCAATGGCCTACGCGAAAATTAAAAATATTATTGAACGT
+AACGTTACCAGCGGCCTGATTTACCTGCCTTCCAGCGCCCGCGATCTGAATAATCCGCAA
+ATCGACCAGTACCTGGCGAAATACGTACGCGGCTCTAACGGAATGGACCATGTTGAACGT
+ATCAAAATTCTTAAATTGATGTGGGATGCCATCGGCAGCGAGTTTGGCGGTCGCCATGAG
+CTGTACGAGATTAACTACTCGGGCAGCCAGGATGAAATTCGTCTGCAGTGTCTGCGTCAG
+GCCCAGAGCTCCGGCAATATGGATAAGATGATGGCAATGGTCGATCGCTGCCTCTCCGAA
+TACGATCAGAATGGCTGGACGGTTTCGCATTTGCACAATAACGACGACATCAATCAACTG
+GATAAGCTGCTGAAATAAATGCAAGTAGATGAACAACGTCTGCGTTTTCGCGATGCGATG
+GCAAGTCTGGCGGCAGCGGTCAACATCGTAACCACGGCGGGTCACGCCGGACGCTGCGGT
+ATCACCGCAACAGCGGTTTGCTCAGTCACTGATACGCCGCCCTCCGTGATGGTATGTATT
+AATGCCAATAGCGCCATGAACCCCGTTTTTCAGGGCAACGGCAGGCTGTGCATTAATGTA
+CTTAACCATGAGCAGGAGCTGATGGCGCGCCACTTTGCCGGTATGACGGGGATGGCGATG
+GAGGAGCGTTTTCACCAGCCATGTTGGCAAAACGGGCCGCTGGGCCAGCCGGTACTTAAC
+GGCGCGCTGGCCAGTCTTGAAGGCGAGATCAGCGAGGTACAAACCATTGGCACGCATCTG
+GTGTATCTGGTGGCGATCAAAAATATTATTCTTAGCCAGGAGGGGCATGGCCTGATTTAT
+TTCAAACGCCGTTTTCATCCGGTCAGACTTGAGATGGAAGCGCCTGTTTAAATGAAGGGT
+ACTGTTTTCGCCGTTGCGTTAAACCATCGCAGCCAGCTTGATGCCTGGCAAGAGGCTTTC
+TCTCAGCCTCCCTATAATGCGCCGCCTAAAACCGCAGTGTGGTTCATCAAGCCGCGTAAT
+ACGGTGATTCGTCACGGCGAACCCATTCCTTATCCGCAGGGAGAAAAGGTACTGAGCGGC
+GCGACAGTGGCGCTCATTGTGGGGAAAACCGCCAGCCGGATACGCCCTGAAGCGGCGGCG
+GACTATATCGCCGGGTATGCGCTGGCTAACGAGGTCAGCCTGCCGGAAGAGAGCTTTTAT
+CGCCCGGCGATTAAAGCGAAATGTCGCGATGGCTTTTGCCCGCTGGGTGAAATGGCGCCG
+CTGAGTGATGTGGATAATCTCACCATTATCACTGAAATCAACGGACGAGAAGCGGACCAC
+TGGAATACTGCCGATTTACAGCGTAGCGCCGCACAACTGCTTAGCGCGTTAAGTGAGTTC
+GCTACACTTAACCCTGGCGATGCGATCTTACTTGGTACGCCGCAGAATCGCGTTGCGCTG
+CGTCCCGGCGATCGGGTGCGTATTCTGGCGAAAGGTTTACCCGCGCTGGAAAATCCGGTT
+GTCGCAGAAGATGAATTCGCCCGCCACCAGACGTTTACGTGGCCGCTGTCAGCGACGGGA
+ACGTTATTTGCGCTGGGGTTGAACTACGCCGATCACGCCAGCGAGCTGGCATTTACGCCG
+CCGAAAGAGCCGCTGGTATTTATCAAAGCGCCAAACACCTTTACCGAACATCACCAAACG
+TCGGTGCGCCCGAACAACGTCGAATATATGCACTACGAAGCCGAGCTGGTCGTGGTGATT
+GGCAAAACGGCGCGTAAGGTGAGCGAAGCCGAAGCCATGGAGTATGTGGCCGGTTACACC
+GTCTGTAACGACTACGCGATCCGCGACTATCTGGAAAACTACTACCGTCCGAATCTGCGG
+GTAAAAAGCCGCGACGGCCTGACGCCGATAGGCCCGTGGATTGTGGATAAAGAGGCGGTT
+TCTGATCCGCACAACCTGACGTTACGCACCTTTGTCAACGGTGAGCTGCGGCAGGAAGGG
+ACGACCGCCGATCTGATCTTCAGCATCCCGTTCCTGATTTCTTATCTGAGCGAATTTATG
+ACGTTGCAACCGGGCGACATGATTGCCACCGGTACGCCGAAAGGGCTGTCCGATGTGGTG
+CCGGGGGATGAAGTTGTCGTTGAAGTAGAAGGCGTGGGTCGCCTGGTTAACCGAATCGTC
+AGTGAGGAGAGCGCAAAATGAATGAAAAATGCTTTCAAAGACGCGTTAAAAGCGGGGCGC
+CCGCAAATCGGTTTGTGGCTGGGGCTTGCCAACAGTTACAGCGCTGAACTGTTAGCGGGC
+GCCGGCTTCGACTGGCTACTGATTGACGGTGAACACGCGCCAAACAACGTGCAGACGGTG
+TTGACCCAGTTGCAGGCGATTGCGCCTTATCCCAGCCAGCCGGTGGTGCGTCCGTCATGG
+AACGATCCGGTACAGATTAAGCAACTGCTCGACGTCGGCGCGCAAACGCTGCTGATACCG
+ATGGTGCAGAATGCCGATGAAGCGCGAAACGCCGTGGCGGCTACGCGTTATCCGCCTGCC
+GGTATTCGCGGCGTGGGCAGCGCGCTGGCGCGGGCATCGCGCTGGAATCGCATTCCGGAC
+TATCTCCACCAGGCCAACGACGCCATGTGCGTACTGGTGCAGATTGAAACGCGTGAGGCG
+ATGAGCAATCTGGCGTCAATTCTCGACGTGGATGGCATTGACGGCGTGTTTATTGGCCCG
+GCGGATCTCAGCGCCGATATGGGCTTTGCCGGCAATCCGCAGCACCCGGAAGTGCAGGCG
+GCGATTGAGAACGCCATCGTGCAGATACGCGCGGCGGGGAAAGCGCCGGGGATTCTGATG
+GCCAATGAAGCACTGGCGAAACGTTATCTGGAACTGGGGGCGCTATTTGTCGCCGTCGGC
+GTTGACACCACGCTGCTGGCGCGCGGAGCGGAGGCGCTGGCGGCGCGCTTTGGCGCAGAA
+AAAAAACTGTCCGGTGCGTCCGGCGTCTATTAAATGCATGATTCATTAACCATCGCCTTG
+CTTCAGGCGCGCGAAGCGGCAATGACCTATTTCCGCCCCATCGTTAAAAGCCACAATCTG
+ACCGACCAGCAATGGCGCATTGTGCGAATCCTGGCCGATAGCCCCTCTATGGATTTTCAC
+GAGCTGGCCTTTCGTACCTGTATTTTGCGTCCAAGTCTGACCGGAATATTGACGCGCATG
+GAGCGAGACGGACTGGTGTTGCGACTCAAGCCGGTTAACGATCAGCGTAAGTTATATGTC
+ATGTTGACGGAGCAGGGACAAACGTTGTACGCCCGTGCCCGGAGCGAGGTAGAAGAGGCT
+TATCGAAAAATTGAGGCCGATTTCACGCCCGAAAAAACACAGCAATTGATGCTGCTGCTG
+GACGATCTTATTGCTCTGGGGCGCCAGCATCCTGATAGCGAAGCGGAAGCATAGATGAGC
+GACACATCATCTGCACTTCCGGAAAGCCCCGAGTCTGTCGGTTCGCACAACGCGCTCAGC
+ACGGGTCAACAAACCGTCATAAATAAACTGTTCCGCCGACTGATCGTATTTTTATTCGTG
+TTGTTTATCTTCTCGTTTTTAGACCGTATCAACATCGGTTTTGCCGGGTTGACGATGGGG
+CAGGATCTGGGGTTAAGCGCCACCATGTTTGGTCTTGCCACGACGCTGTTTTACGCCACC
+TACGTCATTTTCGGCATTCCCAGCAACGTGATGTTGAGCATCGTCGGCGCCCGCCGCTGG
+ATTGCGACCATTATGGTGCTATGGGGCATTGCATCTACCGCCACGATGTTCGCGGTGGGA
+CCGAAAAGCCTGTATGTGCTGCGAATGCTGGTGGGCATTACCGAAGCGGGCTTTTTGCCA
+GGAATATTGCTCTATTTAACCTACTGGTTCCCGGCATTTTTCCGCGCCCGCGCCAACGCA
+TTATTTATGATTGCCATGCCGGCCACTACCGCGTTGGGGTCAATTGTCTCCGGCTATATT
+TTATCGCTGGACGGCATATTCAATCTGCATGGATGGCAGTGGTTATTCCTGTTGGAAGGA
+TTTCCGTCAGTTTTGTTAGGCATTATGGTCTGGTTTTACCTGGATGATACCCCGGCAAAA
+GCCAAATGGCTGACGGCAGAGGATAAAAAATGTTTGCAGGAGATGATGGATAATGATCGC
+CTGACGCTGGTTCAGCCTGAGGGGGCCATCAGCCATAACGCCATGCAGCAGCGTAGCCTG
+TGGCGCGAAGTATTCACGCCAATTGTACTGATGTATACGCTGGCCTATTTTTGCCTTACC
+AATACGCTTAGCGCCATTAGTATCTGGACGCCGCAAATCCTGAAAAGTTTTAATGAAGGC
+AGCAGCAATATCACCATCGGCCTGCTGGCGGCGATCCCGCAGATTTGTACTGTTCTGGGC
+ATGATTTACTGGAGCCGCCATTCGGACAAACATCAGGAGCGTAAACACCACACTGCGTTA
+CCGTTCCTGTTTGCCGCCGCGGGCTGGCTGCTGGCGTCGGCGACCGACCGTAACCTGATC
+CAGCTCCTGGGGATCGTGATGGCATCCACGGGTTCCTTTAGCGCGATGGCGATCTTCTGG
+ACCACGCCGGATCAGTCGATCAGTTTACGCGCCAGGGCGATAGGCATTGCGGTCATCAAT
+GCCACCGGCAATATTGGCTCCGCGCTCAGCCCGGTTATGATTGGCTGGCTAAAAGATATC
+ACTGGTAGCTTCAATAGCGGACTCTGGTTTGTCGCTTCTCTGTTAGTCGTCGGCGCCGCC
+ATTATCTGGCTCATTCCCATGAAAGCATCGCGTCCGCGCGCCACCCCTTGAATGGGCAAG
+TTAGCGTTAGCAGCAAAAATTACCCACGTGCCGTCGATGTATCTTTCTGAACTGCCAGGA
+AAAAATCACGGTTGTCGTCAGGCAGCCATTGATGGGCATATTGAAATTGGCAAGCGTTGC
+CGCGAAATGGGCGTTGACACCATTATCGTATTCGACACCCACTGGCTGGTGAATAGCGCT
+TACCACATTAATTGTGCCGACCATTTCCAGGGCGTCTATACCAGCAACGAATTGCCGCAC
+TTTATTCGCGACATGACCTATGACTATGACGGTAATCCGGCGCTCGGCCATCTGATCGCC
+GACGAGGCGGTCAAACTGGGCGTGCGCGCCAAAGCGCACAACATCCCGAGCCTGAAGCTG
+GAGTATGGCACGCTGGTGCCGATGCGCTACATGAACAGCGACAAGCACTTCAAAGTGGTC
+TCCATCTCGGCGTTCTGCACTGTGCATGATTTTGCCGACAGCCGCAAACTGGGCGAAGCC
+ATTCTCAAGGCGATTGAGAAATATGACGGTACCGTAGCGGTATTCGCCAGTGGTTCTCTG
+TCGCACCGTTTTATTGACGACCAACGGGCGGAAGAGGGGATGAACAGCTACACCCGCGAG
+TTCGATCATCAAATGGACGAGCGCGTGGTCAAGCTGTGGCGCGAAGGCAAATTCAAGGAG
+TTTTGCACCATGTTGCCGGAGTACGCCGACTACTGCTACGGCGAAGGCAACATGCACGAC
+ACGGTCATGCTACTGGGAATGCTGGGGTGGGACAAATACGACGGCAAGGTGGAGTTCATC
+ACCGACCTGTTCGCCAGCTCCGGTACCGGCCAGGTAAACGCTGTTTTCCCGCTGCCTGCG
+TAAATGAAGAAAATAAATCATTGGATTAACGGCAAAAACGTTGCAGGTAACGACTACTTC
+CAGACCACTAACCCGGCGACCGGTGATGTGCTGGCGGAAGTAGCCTCCGGCGGTGAAGCA
+GAAGTGAACCAGGCTGTCGCGGCGGCAAAAGAGGCGTTCCCGAAATGGGCCAACCTGCCG
+ATGAAAGAGCGCGCGCGCCTGATGCGCCGCCTTGGCGACCTGATTGACCAGCATGTGCCG
+GAAATCGCGGCGATGGAAACCGCCGACACCGGCCTGCCTATTCACCAGACTAAAACGTGC
+TGAGTGCTGATCCCGCGCGCCTCGCATAACTTCGAATTCTTCGCCGAAGTGTGCCAGCAG
+ATGAACGGCAAGACCTATCCGGTTGACGATAAAATGCTCAATTATACGCTGGTGCAGCCC
+GTCGGCGTCTGCGCGCTGGTGTCGCCGTGGAACGTGCCGTTTATGACCGCGACTTGGAAA
+GTTGCGCCGTGCCTGGCGCTGGGTAACACCGCGGTGCTCAAAATGTCCGAGCTGTCGCCG
+CTGACTGCCGACAGGCTGGGCGAGCTGGCACTGGAGGCAGGAATTCCGGCAGGCGTGCTG
+AACGTGGTGCAGGGCTACGGCGCGACGGCGGGCGATGCGCTGGTACGCCACCATGACGTG
+CGTGCGGTGTCGTTTACCGGCGGTACCGCCACCGGTCGCAATATCATGAAAAATGCCGGG
+CTGAAAAAATACTCGATGGAGCTGGGCGGCAAATCGCCGGTGCTGATTTTTGAAGACGCC
+GACATTGAGCGCGCGCTGGACGCCGCGCTGTTCACCATCTTCTCGATCAACGGCGAACGC
+TGCACCGCTGGGTCGCGCATCTTTATCCAGCAGAGCATTTACCCTGAGTTCGTGAAGCGC
+TTTGCCGAACGCGCGAATCGCCTGCGTGTCGGCGATCCGACCGACCCGAACACCCAGGTC
+GGCGCGCTGATTAGCCAACAGCACTGGGAGAAAGTCTCCGGTTATATCCGCCTCGGCATT
+GAAGAGGGGGCAACGCTGCTGGCGGGCGGTGCGGAAAAACCCACTGACCTGCCTGCGCAT
+CTGAAAGGCGGTAACTTCCTGCGCCCAACCGTGCTGGCCGATGTCGACAACCGTATGCGC
+GTTGCGCAGGAAGAGATCTTTGGGCCGGTCGCCTGCCTGCTGCCATTCAAAGACGAAGCG
+GAAGGGTTACGTTTGGCGAACGATGTGGAATACGGTCTGGCCTCTTATATCTGGACCCAG
+GACGTGAGCAAAGTGTTGCGCCTGGCGCGTGGGATTGAAGCCGGCATGGTCTTCGTCAAC
+ACCCAGAACGTCCGCGACCTGCGCCAGCCGTTCGGCGGCGTGAAAGCCTCCGGTACCGGG
+CGCGAAGGCGGCGAATATAGCTTCGAAGTGTTTGCGGAAATGAAAAACGTCTGCATCTCA
+ATGGGCGACCATCCTATCCCAAAATGGGGAGTTTGAATGCCGCACTTTATTGCTGAATGT
+ACTGAAAATATTCGCGAGCAGGCTGATTTACCAAGCCTGTTCAGCAAGGTAAACGAGGCG
+CTGGCCGCCACCGGGATTTTCCCCATCGGCGGTATCCGCAGTCGCGCCCACTGGCTGGAT
+ACCTGGCAGATGGCTGACGGTAAGCATGATTACGCGTTTGTGCATATGACGCTGAAAATC
+GGCGCCGGGCGCAGCCTGGAGAGCCGTCAGGAAGTCGGCGAAATGCTGTTTGGGCTGATT
+AAAGCCCACTTCGCCGACCTGATGGAGAACCGCTATCTGGCGCTGTCGTTTGAGATTGCC
+GAGTTACATCCAACGCTCAATTACAAACAAAACAACGTACACGCGTTATTTAAATAGATG
+CTCGATAAACAGACCCATACCCTGATCGCTCAGCGACTTAATCAGGCTGAAAAACAGCGT
+GAACAGATTCGCGCAGTGTCGCTGGATTATCCCAACATCACTATTGAAGATGCCTATGCC
+GTACAGCGTGAATGGGTCAATATCAAGATTGCCGAAGGGCGCACGCTCAAAGGCCACAAA
+ATCGGCCTGACCTCAAAAGCGATGCAGGCCAGCTCGCAAATCAGCGAACCGGATTACGGC
+GCGCTGCTTGACGATATGTTCTTCCATGACGGCGGAGATATCCCCACCGACCGTTTTATC
+GTCCCGCGTATTGAAGTGGAGCTGGCGTTCGTGCTGGCGAAACCGCTGCGCGGCCCTCAC
+TGCACGCTGTTCGACGTCTACAACGCCACGGATTATGTGATTCCGGCGCTGGAACTGATT
+GACGCCCGCAGCCACAACATCGACCCGGAAACCCAGCGCCCGCGCAAAGTGTTCGACACC
+ATTTCCGACAACGCCGCCAACGCCGGGGTGATCCTCGGTGGTCGCCCCATCAAACCAGAC
+GAGCTGGATCTGCGCTGGATCTCCGCGCTGCTCTATCGCAACGGCGTGATCGAAGAAACC
+GGCGTCGCCGCAGGCGTGCTGAATCATCCGGCCAACGGCGTGGCGTGGCTGGCGAACAAG
+CTTGCCCCCTACGATGTCCAGCTTGAAGCCGGGCAGATCATCCTCGGCGGCTCGTTCACC
+CGCCCGGTGCCGGCGCGCAAGGGCGACACCTTCCATGTCGATTACGGCAACATGGGCGCG
+ATCAGTTGCCGGTTTGTGTAAATGAGCTCTGTACCCGCGCCGCGTGAATATTTTCTTGAC
+TCTATCCGCGCATGGCTGATGTTGTTAGGGATTCCCTTTCATATCTCGTTGATCTATTCC
+ACTCACAGTTGGCATGTCAATAGCGCCGCGCCATCGTGGTGGCTAACCCTGTTTAACGAT
+TTTATCCACGCTTTTCGTATGCAGGTGTTTTTTGTTATTTCTGGTTATTTTTCGTACATG
+TTATTTTTACGTTATCCATTAAAACACTGGTGGAAAGTACGGGTAGAACGTGTGGGTATT
+CCCATGCTTACCGCAATCCCTTTGCTTACCTTGCCGCAATTTATCCTGTTGCAATATGTC
+AAAGAGAAAACAGAGAACTGGCCTACACTCTCTGCCTATGAAAAATATAATACGTTAGCG
+TGGGAACTCATTTCACATCTGTGGTTTTTACTGGTGCTGGTGATATTAACCACCGTCAGC
+ATCGGGATTTTTACCTGGTTCCAAAAAAGGCAGGAAACAAGCAAGCCTCGTCCCGCCGCT
+ATTTCGCTGGCCAAACTTTCGCTTATTTTTTTCCTGCTGGGGGTGGCGTACGCTGCTATC
+AGGCGCATTATATTCATCGTATATCCGGCAATCCTCAGTGACGGCATGTTCAATTTTATT
+GTGATGCAAACGCTATTTTATGTGCCGTTTTTTATTCTCGGCGCGTTGGCCTTCATTCAC
+CCCGATCTGAAAGCGCGCTTCACCACGCCCTCACGCGGATGCACTTTAGGCGCTGCCGTT
+GCTTTTATCGCGTATCTGCTGAATCAACGTTATGGGAGCGGCGACGCCTGGATGTACGAA
+ACCGAATCCGTGATTACGATGGTAATGGGGCTATGGATGGTGAACGTGGTATTTTCACTG
+GGGCATCGCTTGTTAAACTTTCAGTCCGCGCGTGTCACCTATTTCGTGAATGCTTCGCTG
+TTTATTTATCTGGTGCATCATCCCTTAACGCTTTTCTTTGGCGCGTATATTACACCGCAT
+ATCTCCTCCAACCTGATCGGGTTCTTGTGCGGGCTGATATTTGTTATGGGTATTGCGTTA
+ATTCTGTATGAAATTCATTTACGCATCCCGCTCCTGAAATTTCTCTTTTCAGGTAAACCG
+CCGGTAAAACAAGAAAGCCGCGCCGCGATCGGGTAGATGAAACATAAACGACAAATGATG
+AAAATGCGTTGGTTGGGCGCAGCTATTATGTTAACGCTCTACGCATCATCGAGCTGGGCG
+TTCAGTATTGATGACGTGGCAAAACAAGCTCAATCTTTAGCCGGGAAAGGCTATGAGGCG
+CCTAAAAGCAACTTGCCCTCCGTTTTCCGCGACATGAAATATGCGGATTATCAGCAGATC
+CAGTTTAACAGCGATAAAGCCTACTGGAACAACTTAAAGACCCCTTTTAAGCTCGAATTT
+TACCATCAGGGGATGTACTTCGATACGCCGGTCAAGATTAACGAAGTGACGGCGACGACG
+GTCAAAAGAATCAAATACAGCCCGGATTACTTCAATTTTGGCAATGTTCAGCACGATAAA
+GACACGGTAAAAGATTTAGGCTTCGCCGGGTTCAAAGTCCTGTACCCCATTAACAGTAAA
+GATAAGAACGACGAAATCGTCAGTATGCTTGGCGCCAGCTATTTCCGCGTTATCGGCGCA
+GGCCAGGTGTATGGCTTATCTGCGCGCGGCCTGGCGATTGATACCGCCTTACCATCTGGT
+GAAGAGTTTCCCCGCTTTCGCGAGTTCTGGATTGAGCGTCCAAAACCCACCGATAAGCGT
+TTGACCGTCTATGCATTACTGGATTCTCCGCGCGCGACCGGCGCTTACCGTTTTGTGATC
+ATTCCTGGCCGCGATACCGTGGTGGACGTGCAGTCAAAAGTCTATCTGCGCGATAAGGTG
+GGCAAGCTGGGCGTTGCGCCATTAACCAGTATGTTCCTGTTTGGGCCAAACCAGCCGTCG
+CCGACGACCAACTATCGTCCGGAATTGCATGACTCGAACGGCTTATCCATTCATGCGGGT
+AATGGCGAGTGGATTTGGCGTCCGCTGAACAATCCAAAACACCTCGCTGTGAGCAGCTAT
+GCGATGGAAAACCCTCAGGGATTCGGCCTGTTGCAGCGTGGTCGCGAGTTCTCGCGCTTT
+GAAGATTTAGACGATCGCTATGACCTGCGTCCAAGCGCCTGGATTACCCCGAAAGGCGAC
+TGGGGCAAAGGTAAGGTTGAACTGGTTGAAATTCCGACCAATGATGAAACCAACGATAAC
+ATCGTCGCTTACTGGACTCCGGATCAACTGCCGGAACCGGGTAAAGAGATGAACTTCAAG
+TACACTCTGACCTTCAGCCGCGATGAAGATAAACTTCATGCGCCGGATAATGCCTGGGTG
+CTGCAAACACGCCGCTCAACGGGCGACGTTAAACAGTCGAATCTGATTCGCCAGCCCGAC
+GGCACTATTGCCTTTGTGGTGGATTTCGTTGGCGCCGACATGAAAAAACTGCCGCCGGAT
+ACGCCCGTCGCTGCACAAACCAGCATTGGCGATAACGGTGAAATCGTTGACAGTAATGTA
+CGCTATAACCCAGTCACTAAAGGCTGGCGTTTAATGCTGCGCGTGAAAGTCAAAGACGCG
+AAGAAAACCACGGAAATGCGTGCCGCATTGGTGAATGCCGATCAGACGCTAAGTGAAACC
+TGGAGCTACCAGTTACCTGCCAATGAATAAATGAATAAAACAACTGAGTATATTGACGCA
+CTGCTGCTTTCTGAACGTGAGAAAGCGGCATTGCCGAAAACTGACATCCGCGCCGTGCAT
+CAGGCGCTGGATGCCGAGCATCGGACTTACTCGCGAGAAGACGATTCACCGCAGGGTTCC
+GTAAAAGCCCGCCTTGAACACGCCTGGCCGGATTCATTGGCGAAGGGGCAGTTAATTAAA
+GATGATGAAGGGCGCGATCAGTTGCAGGCTATGCCAAAAGCGACGCGCTCTTCGATGTTT
+CCTGATCCCTGGCGAACCAACCCGGTTGGCCGTTTCTGGGATCGCCTGCGTGGGCGGGAT
+GTTACGCCGCGCTATGTTTCTCGTCTGACAAAAGAAGAGCAGGCGAGTGAGCAAAAATGG
+CGTACCGTCGGCACTATACGCCGCTATATTTTGTTAATTTTGACTCTGGCGCAAACCGTC
+GTTGCGACCTGGTATATGAAGACCATTCTGCCCTATCAGGGATGGGCGCTCATCAATCCT
+ATTGATATGGTGGGGCAGGATATTTGGGTCTCCTTTATGCAGCTCCTGCCCTACATGCTG
+CATACCGGTATCCTGATTTTGTTTGCCGTGCTGTTCTGCTGGGTGTCTGCCGGATTCTGG
+ACTGCGCTGATGGGCTTCCTGCAACTGCTTATCGGGCGCGATAAGTACAGTATCTCCGCG
+TCTACGGTTGGCGATGAGCCCCTCAATCCGGAACACCAGACGGCGCTGATCATGCCTATC
+TGTAATGAAGACGTTAGCCGCGTTTTCGCCGGTCTGCGCGCGACCTGGGAGTCCGTTAAA
+GCTACAGGCAACGCCGCGCATTTTGACGTCTATATCCTTAGCGATAGTTATAACCCGGAT
+ATTTGCGTGGCGGAGCAAAAGGCGTGGATGGAGCTCATCGCGGAAGTGCAGGGCGAAGGC
+CATATTTTTTACCGTCGCCGCCGCCGCCGTATGAAACGCAAAAGCGGCAATATTGACGAT
+TTTTGCCGCCGCTGGGGCAATCAGTACAGCTATATGGTGGTGCTGGACGCGGACTCAGTG
+ATTAGCGGCGAGTGTCTGAGCGGGCTGGTGCGCCTGATGGAAGCGAACCCTAACGCCGGG
+ATTATCCAGTCTTCGCCGAAAGCGTCGGGGATGGATACTCTGTATGCCCGCTGCCAACAG
+TTTGCGACCCGTGTTTATGGACCGCTGTTTACCGCCGGGCTGCACTTCTGGCAGTTGGGG
+GATTCGCACTACTGGGGGCACAATGCCATTATCCGCGTGAAGCCGTTTATCGAGCACTGC
+GCTCTGGCGCCGCTGCCGGGAGAAGGTTCGTTCGCCGGATCGATTCTTTCCCACGACTTT
+GTTGAGGCGGCGCTAATGCGTCGGGCAGGGTGGGGCGTCTGGATTGCCTACGATCTCCCC
+GGTTCCTATGAAGAGCTGCCGCCAAACCTGCTGGATGAGCTTAAACGCGACCGCCGCTGG
+TGTCACGGCAACCTGATGAACTTTCGTCTGTTCCTGGTGAAAGGAATGCACCCGGTGCAT
+CGTGCCGTGTTCCTGACCGGGGTAATGTCATACCTGTCCGCGCCGTTATGGTTTATGTTC
+CTTGCGCTTTCTACCGCGCTGCAGGTCGTTCATGCGTTAACAGAGCCGCAATATTTCCTT
+CATCCGCGCCAGCTTTTTCCGGTCTGGCCGCAGTGGCGTCCGGAACTGGCAATCGCGCTG
+TTTGCGTCAACGATGGTGCTGCTGTTCCTGCCGAAGCTGCTCAGTATTATGCTGATCTGG
+TGTAAAGGCACCAAAGAGTATGGCGGTTTCTGGCGCGTTACGCTGTCGCTATTGCTGGAA
+GTTCTGTTCTCCGTGTTGCTGGCGCCGGTGCGTATGCTGTTTCATACCGTGTTTGTGGTC
+AGTGCGTTCCTCGGCTGGGAAGTGGTCTGGAACTCACCGCAACGCGACGATGATTCTACG
+CCTTGGGGAGAAGCCTTTATGCGTCACGGCTCTCAACTGCTGCTGGGGCTGGTCTGGGCG
+GTTGGTATGGCGTGGCTGGATTTACGCTTTCTGTTCTGGCTGGCGCCGATTGTCTTTTCG
+CTTATTCTGTCGCCATTTGTTTCGGTGATCTCCAGTCGTTCAACGGTAGGATTACGCACC
+AATCGCTGGAAGCTGTTCCTGATCCCGGAAGAGTATTCGCCGCCTCAGGTGTTGGTCGAT
+ACTGATAAATATCTGGAGATGAATCGCCGCCGTATTCTGGACGATGGCTTTATGCATGCG
+GTTTTTAACCCGTCGCTTAATGCGCTGGCGACCGCGATGGCCACCGCGCGTCACCGCGCC
+AGTAAGGTGCTGGAAATAGCCCGCGATCGTCATGTGGAGCAGGCGCTAAACGAAACGCCG
+GATAAACTGAACCGCGATCGGCGTCTGGTTTTGCTCAGCGATCCGGTGACGATGGCGCGT
+TTTCACTATCGGGTCTGGAATGCGCCAGAGAGATACTCTTCCTGGGTAAACCATTATCAG
+TCTCTCGTCCTGAATCCGCAGGCGTTGCAGGGACGAACATCGTCAGCGGGAATGTCGCGC
+GTCTCGNNNGCGAGGAACCTGGGTAAATATTTTCTTCTCATCGATAACATGTTAGTGGTG
+CTGGGTNNNTTCGTCGTCTTCCCGCTCATCTCTATTCGCTTTGTCGATCAAATGGGGTGG
+GCTGCCNNNATGGTAGGGATCGCGCTCGGCCTGCGTCAGTTTATTCAACAAGGTCTGGGC
+ATTTTTNNNGGCGCCATCGCCGATCGCTTTGGCGCGAAACCGATGATTGTCACCGGTATG
+CTGATGNNNGCCGCAGGCTTTGCCACCATGGGTATCGCGCATGAGCCCTGGCTCTTGTGG
+TTTTCCNNNTTTCTTTCCGGTCTCGGCGGTACGCTTTTCGACCCGCCGCGTTCAGCGCTG
+GTGGTCNNNTTAATTCGTCCGGAGCAACGGGGCCGCTTCTTCTCTCTGTTGATGATGCAG
+GACAGCNNNGGCGCGGTGATTGGCGCGCTGCTGGGAAGCTGGTTGCTACAATACGATTTT
+CGCCTGNNNTGCGCGACGGGCGCTATTTTGTTCATATTATGCGCCCTTTTCAACGCATGG
+CTGCTTNNNGCCTGGAAGCTATCAACGGCCAGAACGCCGGTGCGTGAAGGAATGCGCCGC
+GTCATGNNNAATAAAAGGTTTGTCACCTACGTGCTGACGCTGGCGGGCTACTATATGCTG
+GCGGTANNNGTCATGTTAATGCTGCCGATTATGGTAAACGATATCGCCGGTTCGCCTGCT
+GCCGTGNNNTGGATGTACGCTATTGAGGCGTGTCTCTCGCTGACGTTGCTCTACCCGATT
+GCCCGCNNNAGCGAAAAGCGTTTTCGGCTGGAGCATCGGCTGATGGCCGGTTTGCTCGTC
+ATGTCGNNNAGCATGCTCCCCATCGGGATGGTGGGCAATTTACAGCAGCTTTTTACGCTT
+ATTTGCNNNTTCTACATCGGCTCGGTTATCGCCGAACCGGCGCGCGAAACGCTCAGCGCG
+TCGCCCNNNGACGCGAGGGCGCGGGGAAGCTATATGGGCTTTAGCCGTCTGGGATTAGCC
+ATTGGCNNNGCGATTAGTTATATCGGCGGCGGCTGGTTGTTTGATATGGGTAAAGCGCTT
+GCGCAGNNNGAACTACCGTGGATGATGCTCGGTATTATCGGCTTTATCACCTTTTTGGCT
+TTAGGCNNNCAATTTAGTCATAAGCGCACGCCGCGCCGGATGCTGGAACCCGGCGCCATG
+ACCAAGTATGCCACGCTGGAAGAAGCTATCGATGCAGCCCGGGAAGAATTTCTGGCTGAC
+CATCAAGGCCTCGAACAAGACGAAGCGAATGTGCAGCAGTTCAACGTTCAGAAATATGTA
+CTGCAGGATGGGGACATCATGTGGCAGGTCGAATTTTTCGCCGATGAAGGTGAAGATGGC
+GAATATCTGCCGATGCTGAGTGGTGAAGCCGCACAGAGCGTGTTTGACGGCGATTATGAT
+GAGAAAGAGATCCGCCAGGAATGGCAGGAAGAGAATACTTTGCATGAATGGGATGAAGGG
+GAATACCAGCTTGAACCCCCGCTTGATACCGAGGAAGGCCGTACTGCGGCAGACGAATGG
+GATGAGCGTATGTCACTATTAGCCAGGCTGGAACAAAGTGTACACGAAAACGGTGGGCTG
+ATTGTCTCATGCCAACCGGTACCAGGCAGCCCTATGGATAAACCTGAAATTGTGGCTGCA
+ATGGCACAGGCAGCGGCTTCGGCGGGTGCGGTCGCTGTGCGCATTGAAGGCATTGAGAAT
+CTGCGGACTGTTCGTCCCCATCTTTCTGTTCCTATTATTGGGATAATTAAACGTGACCTT
+ACAGGGTCGCCAGTCCGTATCACTCCATATTTACAGGATGTTGACGCCCTGGCGCAGGCA
+GGTGCCGATATTATCGCTTTTGATGCCTCATTCCGCTCTCGCCCGGTTGATATTGATAGT
+TTACTGACACGTATTCGCCTGCATGGATTACTGGCGATGGCAGACTGTTCAACCGTGAAT
+GAAGGCATAAGTTGCCATCAGAAAGGAATCGAATTCATTGGTACAACACTGTCTGGCTAT
+ACCGGTCCCATCACGCCGGTTGAGCCAGATTTGGCAATGGTGACACAACTGAGTCATGCA
+GGTTGTCGTGTTATTGCCGAGGGGCGCTATAACACGCCTGCACTGGCGGCCAATGCTATT
+GAGCATGGTGCCTGGGCAGTTACCGTTGGTTCCGCTATCACCCGTATCGAGCATATCTGT
+CAGTGGTTCAGTCACGCAGTAAAACGCTGAATGAAAAATTTTAAGAAAATGATGACGCTA
+ATGGCGCTATGTTTATCAGTTGCTATCACCACATCAGGATATGCAACCACGCTTCCTGAT
+ATACCAGAACCACTGAAAAATGGTACTGGCGCTATTGATAATAATGGCGTGATTTATGTC
+GGCTTAGGTACCGCAGGGACATCCTGGTATAAAATTGATCTTAAAAAGCAACATAAAGAC
+TGGGAGCGTATAAAGTCGTTTCCTGGTGGAGCTCGTGAGCAATCCGTGTCGGTATTTTTA
+AATGATAAGCTGTATGTTTTTGGTGGCGTAGGGAAAAAAAACAGTGAATCACCGTTGCAG
+GTTTATAGCGATGTGTACAAATACTCACCGGTGAAAAATACATGGCAAAAAGTTGATACT
+ATATCTCCAGTTGGATTAACAGGGCATACGGGAGTAAAATTAAACGAAACGATGGTACTT
+ATTACCGGAGGGGTTAATGAGCATATCTTTGATAAGTATTTTATTGATATAGCGGCTGCG
+GATGAAAGTGAAAAAAATAAAGTCATCTATAATTATTTTAATAAACCTGCCAAAGATTAT
+TTTTTTAATAAAATCGTATTTATCTACAATGCTAAAGAGAACACATGGAAGAATGCCGGT
+GAGCTGCCAGGCGCGGGGACGGCAGGATCGTCATCGGTAATGGAAAATAATTTCTTGATG
+CTGATTAATGGTGAGCTCAAACCGGGTTTACGTACCGATGTGATTTACCGCGCCATGTGG
+GATAACGATAAGCTAACATGGTTGAAGAACAGCCAGTTACCGCCATCGCCTGGAGAACAA
+CAGCAGGAAGGGTTGGCCGGAGCATTTTCGGGCTATAGCCACGGTGTCCTGCTTGTCGGT
+GGTGGCGCGAATTTTCCGGGAGCAAAACAAAATTATACTAATGGAAAGTTTTATTCCCAC
+GAAGGGATAAATAAAAAATGGCGAGATGAAGTCTATGGTTTGATTAATGGCCATTGGCAA
+TATATGGGTAAAATGAAACAACCTCTCGGCTATGGTGTATCAGTAAGTTATGGTGATGAA
+GTTTTCCTTATTGGTGGTGAAAATGCTAAAGGGAAACCTGTTTCGTCTGTAACCTCCTTT
+ACCATGCGTGATGGTAATTTATTAATAAAATAAGTGATAGCAAAATTCTTCCCGTGGTAT
+AGCGAGATAACACGTCCACAAAAAAATGCTTTATTTTCAGCATGGCTGGGTTACGTTTTT
+GATGGCTTCGACTTTATGCTGATTTTCTACATTATGTATCTGATCAAGGCTGACTTAGGA
+TTGACAGATATGGAGGGCGCATTCCTTGCCACAGCGGCCTTTATTGGGCGACCATTTGGC
+GGGGCGCTATTTGGTCTGCTGGCAGACAAATTTGGCCGTAAGCCGTTAATGATGTGGTCG
+ATAGTTGCCTATTCTGTAGGTACAGGGTTAAGTGGCCTGGCTTCCGGTGTAATTATGCTG
+ACGCTTAGTCGTTTTATTGTCGGTATGGGGATGGCGGGGAAGTATGCTTGCGCTTCTACT
+TATGCCGTGGAAAGTTGGCCAAAGCATTTAAAATCTAAAGCGAGCGCATTTCTGGTTTCA
+GGTTTCGGTATTGGTAACATCATAGCAGCCTATTTTATGCCGTCATTTGCCGAAGCGTAT
+GGTTGGCGTGCTGCTTTTTTTGTCGGTTTGCTACCCGTTCTTTTAGTAATCTACATCCGG
+GCCAGGGCTCCTGAATCTAAAGAGTGGGAAGAAGCCAAACTCAGTGGTCTCGGAAAGCAT
+TCACAAAGTGCCTGGTCAGTTTTCTCTTTGTCAATGAAAGGGCTATTTAATCGAGCTCAA
+TTTCCACTGACATTATGTGTATTTATTGTTCTGTTCTCTATTTTCGGCGCAAACTGGCCG
+ATCTTTGGTCTACTGCCTACATATTTGGCGGGAGAGGGCTTTGATACGGGCGTGGTCTCT
+AATTTAATGACGGCGGCGGCATTCGGCACTGTATTGGGAAATATCGTTTGGGGTCTGTGC
+GCAGATAGAATTGGTTTGAAGAAAACGTTCAGCATTGGTCTTCTCATGTCCTTTTTATTC
+ATTTTCCCGTTATTCAGAATTCCGCAAGATAATTATTTACTGCTGGGCGCATGTTTATTC
+GGTTTAATGGCGACTAACGTAGGTGTTGGCGGGCTGGTTCCCAAATTTCTCTACGACTAC
+TTTCCTCTTGAGGTTCGTGGTTTGGGTACCGGGCTTATTTATAATCTTGCTGCGACATCA
+GGCACATTCAATTCAATGGCGGCGACCTGGCTTGGAATAACAATGGGGCTAGGCGCTGCG
+CTAACGTTCATTGTTGCTTTCTGGACCGCAACAATTCTACTCATTATTGGCCTATCCATT
+CCGGATAGACTAAAAGCACGTCGTGAAAGGTTTCAGTCAACAAAAGAATTTTAAATGAAA
+AAGTATCTTGCTTTCGCCGTTACGCTGCTGGGTATGGGTAAAGTCATCGCCTGTACTACC
+CTTTTGGTAGGCAATCAGGCTTCGGCTGACGGCTCCTTTATTATCGCGCGCAACGAGGAT
+GGCTCGGCAAATAACGCCAAGCATAAGGTTATTCATCCCGTCGCGTTTCATCAACAAGGC
+GAGTATAAAGCACATCGCAACAATTTTAGCTGGCCGCTTCCGGAGACAGCGATGCGCTAT
+ACGGCGATTCATGACTTTGATACTAACGATAACGCCATGGGTGAAGCCGGTTTCAATTCG
+GCGGGCGTCGGAATGAGCGCAACGGAAACCATTTACAACGGCAGAGCGGCGCTGGCTGCC
+GATCCTTACGTGACAAAAACGGGAATCACGGAAGACGCCATTGAGTCCGTGATCCTGCCA
+GTGGCGCAATCGGCGCGTCAGGGCGCCAAATTACTGGGAGATATTATTGAACAAAAAGGC
+GCGGGCGAAGGTTTCGGCGTCGCGTTTATTGATAGCAAAGAGATATGGTATCTGGAGACG
+GGAAGCGGACATCAATGGCTGGCAGTACGACTTCCGGCAGATAGCTATTTCGTTTCCGCC
+AATCAGGGACGTTTACGCCATTACGATCCGAATGATAACGCGAATTATATGGCGTCACCA
+ACGTTAGTAAGCTTTGCGAAAAAGCAGGGATTATATGATCCGGCCCGCGGCGAATTCGAC
+TTTCATCAAGCCTATTCGCAGGATAACAAAAACGATACCACCTATAATTATCCGCGCGTC
+TGGACGCTACAACACCAGTTTAATCCGCATCTGGATACGGTCGTTAGCGAAGGGGAAACA
+TTTTCTGTTTTTTTAACGCCAATAACGAAGATCAGCGTGGCGGCAGTAAAAAACGCGTTA
+CGCAATCACTATCAGGGAACGTCGCACGACCCTTATGCCAGTCATAATCCACAAGAACCA
+TGGCGACCTATATCCGTTTTTCGTACCCAGGAGTCACATATTTTACAGGTCAGACCGAAA
+TTACCGCAGGCTATCGGCAACGTAGAATACATCGCCTATGGAATGCCATCTCTTAGCGTC
+TATCTCCCCTATTACCAGGGGATGCGTCATTATCAACCCGGAGATGATAAAGGAACCGAT
+CGGGCGAGCAACGACTCTACCTACTGGACATTCCGCACGCTGCAAACACTGGTTATGCAA
+GACTACAATACGTTTGCGCCAGATGTGCAACATGCCTGGAAAACATTTGAACAGCAAACA
+GCTAAGCAACAGTATAAGATGGAGCAGAGCTATCTGAGATTATATGCGTCGCATCCGAAA
+GAAGCACAACGCTTACTGCAAAATTTTGAAGATAAAACGATGCAAAATGCGCAGACGCTC
+GCCCGTCGCCTGACCAATAATATTATTACGACAATGACTTACCGCACAGATATGAAATAT
+CACTTTTCAAGTACGCAGCCATAAATGGGAAGACAAAAAGCAGTGATCAAAGCTCGTCGT
+GAAGCAAAGCGTGTGTTGAGACGAGATTCGCGTAGTCATAAGCAACGTGAAGAAGAATCG
+GTCACGTCACTGGTACAGATGGGCGGAGTAGAAGCCATTGGCATGGCGCGCGATAGTCGC
+GATACCTCTCCTGTTAAGGCGCGAAATGAAGCACAGGCGCATTATCTGAACGCTATCGAC
+AGTAAACAGCTTATTTTTGCGACCGGCGAAGCCGGCTGCGGAAAAACATGGATCAGTGCG
+GCAAAGGCGGCAGAAGCATTGATTCATAAGGACGTCGAGAGGATCATTGTGACGCGTCCG
+GTATTGCAGGCTGATGAAGATCTTGGTTTTTTGCCCGGTGATATCGCTGAAAAATTCGCG
+CCTTATTTTCGTCCCGTCTACGATGTCCTGCTTAAACGGTTGGGCGCGTCCTTTATGCAA
+TATTGTTTGCGCCCGGAAATCGGTAAGGTAGAAATTGCCCCGTTCGCCTATATGCGTGGG
+CGTACTTTTGAAAATGCGGTCGTGATCCTCGACGAGGCGCAAAATGTGACTGCGGCGCAA
+ATGAAAATGTTTTTGACGCGATTAGGCGAAAATGTCACGGTCATTGTCAATGGCGATATT
+ACGCAATGCGACCTGCCGCGCGGTGTGCGTTCCGGGTTGAGTGATGCGTTGGAACGCTTT
+GAAGAAGATGAAATGGTGGGGATTGTGCATTTCAACAAAGACGACTGCGTGCGCTCGGCG
+CTTTGTCAGCGAACGCTCCACGCATACAGCTAAATGGGAACCACCACGATGGGGGTTAAG
+CTGGACGACGCCACGCGCGAACGGATCAAAATGGCCGCGTCGCGTATCGATCGCACGCCG
+CACTGGTTAATAAAACAGGCAATCTTTAGCTATCTGGACAAGCTGGAAAATAGCGATACG
+CTACCGGAGCTACCTGCGCTGTTTGCCGGCGCGGCAAATGAAAGCGAGGAGCCGGTCGCG
+CCGCAGGATGAGCCGCATCAGCCCTTTCTGGAGTTTGCCGAACAGATTCTTCCCCAATCC
+GTCTCTCGCGCCGCCATCACCGCCGCCTGGCGCCGCCCGGAAACCGATGCGGTGTCAATG
+CTAATGGAACAGGCGCGCCTGTCGCCGCCTGTCGCTGAGCAGGCGCATAAACTGGCGTAT
+CAACTGGCGGAGAAATTGCGCAATCAAAAATCCGCCAGCGGTCGCGCGGGTATGGTGCAA
+GGCCTGTTGCAGGAGTTTTCCCTCTCTTCGCAAGAAGGCGTAGCGCTGATGTGTCTGGCG
+GAAGCGCTGCTGCGTATTCCCGACAAAGCTACGCGCGATGCGTTAATTCGCGACAAAATC
+AGTAATGGCAACTGGCAGTCGCATATTGGCCGTAGCCCGTCGCTGTTTGTAAACGCCGCC
+ACCTGGGGGCTGCTCTTTACCGGCCGACTGGTCTCAACGCATAACGAAGCCAATCTTTCG
+CGCTCGCTGAACCGCATTATCGGCAAGAGCGGCGAACCGTTAATCCGCAAAGGCGTCGAC
+ATGGCGATGCGTTTAATGGGCGAGCAGTTCGTGACTGGCGAAACCATTGCTCAGGCGCTG
+GCGAATGCCCGAAAACTGGAAGAGAAAGGGTTCCGCTATTCTTACGATATGCTGGGCGAA
+GCCGCGTTAACCGCCGCCGATGCGCAGGCCTATATGGTCTCTTACCAGCAAGCGATTCAT
+GCCATCGGCAAAGCGTCTAACGGTCGCGGTATTTACGAAGGGCCAGGCATCTCGATTAAG
+CTGTCCGCCCTGCATCCACGCTATAGTCGCGCGCAATACGATCGGGTAATGGAGGAGCTT
+TATCCGCGCCTGAAATCCCTGACGCTGCTGGCGCGCCAGTATGATATCGGTCTCAATATC
+GACGCCGAAGAGGCGGATCGTCTGGAGATCTCGCTTGATCTGCTGGAAAAACTCTGCTTC
+GAACCCGAACTGGCGGGCTGGAACGGCATTGGCTTTGTGATTCAGGCTTACCAGAAACGC
+TGCCCGCTGGTCATTGATTATTTAGTCGATCTGGCCTCCCGTAGCCGCCGTCGGCTGATG
+ATTCGTCTGGTGAAAGGCGCCTACTGGGATAGCGAGATCAAACGCGCGCAAATGGAAGGG
+CTGGAGGGCTATCCAGTTTATACCCGCAAAGTGTATACCGATGTCTCTTATCTGGCCTGC
+GCGAAAAAACTGCTCGCCGTCCCTAATCTGATCTACCCGCAGTTCGCGACCCATAACGCT
+CACACACTGGCGGCGATTTATCATCTGGCCGGGCAAAATTACTATCCGGGTCAGTACGAA
+TTCCAGTGCCTGCACGGCATGGGAGAACCGCTGTATGAACAGGTCACCGGTAAAGTGGGG
+GACGGAAAACTTAACCGTCCCTGCCGTATTTACGCGCCGGTGGGAACACACGAAACCCTG
+CTGGCCTATCTGGTACGACGCCTGCTGGAAAACGGCGCCAACACCTCTTTTGTCAACCGC
+ATCGCCGATGCCACCCTACCGCTCGATGAACTGGTGGCCGACCCGGTCGAGGCCGTGGAA
+AAACTGGCGCAGCAGGAAGGTCAGGCTGGCATACCGCATCCAAAAATTCCGCTGCCGCGC
+GATCTGTACGGCGAAGGTCGGATAAACTCCGCCGGACTTGATTTAGCGAATGAACATCGC
+CTCGCCTCGCTTTCTTCTGCCCTGTTAAGCAACGCCATGCAGAAATGGCAGGCCAAACCT
+GTGCTGGAACAACCGGTGGCCGACGGTGAGATGACGCCGGTTATCAACCCGGCGGAACCG
+AAAGATATTGTTGGCTGGGGACGCGAAGCGACAGAAAGCGAGGTTGAACAGGCGTTGCAA
+AACGCGGTCAATCAGGCGCCGGTTTGGTTTGCGACGCCGCCGCAAGAACGCGCCGCTATT
+TTGCAGCGGGCGGCGGTATTGATGGAAGACCAAATGCAGCAGTTGATTGGCCTGTTGGTG
+CGTGAAGCGGGGAAAACGTTCAGCAACGCCATTGCCGAAGTGCGCGAAGCGGTAGACTTC
+CTCCATTATTATGCCGGTCAAGTGCGTGACGATTTCGATAACGAAACGCATCGCCCGTTA
+GGGCCGGTGGTCTGTATCAGTCCGTGGAACTTTCCGCTGGCCATTTTCACTGGCCAAATC
+GCCGCCGCGCTGGCGGCAGGTAACAGCGTTCTGGCGAAACCGGCAGAGCAGACATCGCTG
+ATTGCCGCCCAGGGCATTGCCATTTTGCTGGAAGCGGGCGTACCGCCGGGCGTCGTGCAA
+CTGTTGCCGGGACGGGGAGAAACCGTCGGCGCCCAGCTTACCGCCGATGCGCGTGTACGC
+GGCGTGATGTTTACCGGTTCCACGGAGGTCGCGACGTTGTTGCAGCGCAACATCGCCACG
+CGTCTTGACGCCCAGGGGCGCCCTATTCCGTTGATTGCGGAAACCGGCGGTATGAACGCT
+ATGATTGTCGACTCTTCCGCGCTCACCGAGCAGGTGGTCGTGGATGTGCTGGCTTCCGCC
+TTCGACAGCGCCGGACAACGCTGTTCCGCGCTCCGCGTGCTGTGTTTGCAGGACGATATC
+GCCGAACATACGCTGAAAATGTTACGCGGCGCGATGGCGGAGTGTCGGATGGGGAATCCA
+GGCCGTCTGACGACCGATATCGGGCCGGTGATCGATAGCGAGGCCAAAGCCAACATTGAA
+CGTCATATCCAGACGATGCGCGCCAAAGGCCGCCCGGTTTTCCAGGCCGCGCGTGAAAAC
+AGCGATGACGCGCAGGAATGGCAGACCGGTACGTTTGTTATGCCCACGCTTATTGAGCTG
+GAAAACTTCGCAGAACTGGAAAAAGAGGTCTTCGGGCCCGTGCTGCACGTCGTGCGTTAT
+AACCGTAACCAACTGGCGGAGCTTATCGAACAGATTAACGCTTCCGGCTACGGGCTAACG
+CTGGGCGTACATACCCGTATTGATGAAACCATTGCGCAAGTCACCGGTTCCGCCCATGTC
+GGCAACCTGTACGTTAACCGTAATATGGTGGGCGCGGTCGTCGGCGTCCAGCCGTTTGGC
+GGCGAAGGCCTGTCCGGCACCGGGCCAAAAGCGGGAGGGCCGCTCTATCTCTACCGCCTG
+CTGGCACACCGCCCGCCCAATGCGCTCAATACGACGCTGACTCGTCAGGATGCGCGTTAC
+CCGGTGGATGCGCAGCTTAAAACCACGCTACTCGCGCCGTTGACCGCTCTGACGCAATGG
+GCGGCGGATCGCCCGGCGCTACAGACGCTCTGCCGACAATTCGCCGATCTGGCGCAGGCC
+GGCACGCAGCGCCTGCTACCGGGGCCGACCGGCGAGCGTAATACCTGGACGCTGTTGCCG
+CGTGAACGGGTGTTATGCCTGGCTGATGATGAACAGGACGCGTTGACGCAGCTTGCCGCC
+GTTCTCGCCGTCGGCAGTCAGGCGCTATGGTCAGACGACGCCTTCCACCGCGATCTGGCG
+AAACGTCTCCCCGCCGCCGTCGCGGCGCGTGTCCAGTTTGCGAAAGCGGAAACGCTGATG
+GCGCAGCCGTTTGACGCGGTGATTTTCCACGGCGACTCCGACAAGCTGCGAACCGTGTGC
+GAAGCCGTCGCCGCCCGCGAAGGCGCGATAGTGTCGGTACAGGGGTTCGCCCGCGGCGAA
+AGCAATATGCTGCTGGAACGGCTCTATATTGAACGTTCGCTGAGCGTAAACACTGCCGCC
+GCTGGCGGTAATGCCAGCCTGATGACAATTGGCTAAATGGCTATTAGCACACCGATGTTG
+GTGACATTCTGTGTCTATATTTTTGGCATGATATTGATTGGGTTTATCGCCTGGCGCTCA
+ACCAAAAACTTTGATGACTATATTCTTGGCGGTCGCAGCCTGGGGCCGTTTGTTACGGCT
+TTATCAGCCGGCGCGTCGGATATGAGCGGCTGGCTGTTAATGGGGCTGCCTGGCGCTATC
+TTTCTGTCGGGGATCTCTGAAAGCTGGATCGCCATTGGCCTGACGTTAGGCGCATGGATT
+AACTGGAAGCTGGTGGCCGGGCGCCTGCGCGTGCATACCGAATTTAACAATAACGCGCTC
+ACGCTGCCGGACTATTTTACCGGTCGGTTTGAGGATAAGAGCCGAGTCCTGCGTATTATT
+TCCGCGCTGGTCATTCTGCTGTTTTTCACTATCTATTGCGCATCAGGTATTGTCGCTGGG
+GCACGACTGTTCGAAAGCACCTTCGGTATGAGCTATGAAACCGCACTGTGGGCGGGGGCC
+GCGGCAACCATTATTTATACCTTTATCGGCGGGTTTCTTGCCGTTAGCTGGACGGATACC
+GTTCAGGCCAGCCTGATGATTTTTGCGTTAATCCTGACGCCGGTGATGGTTATTGTCGGC
+GTAGGCGGTTTTAGCGAGTCGCTGGAAGTGATCAAGCAAAAGAGCATCGAGAATGTCGAC
+ATGCTCAAGGGGCTGAATTTTGTCGCTATTATTTCTCTGATGGGCTGGGGGCTGGGTTAC
+TTCGGTCAGCCGCATATCCTGGCGCGCTTTATGGCGGCGGATTCCCATCACAGTATTGTT
+CATGCGCGTCGTATCAGTATGACCTGGATGATTCTGTGTCTGGCGGGCGCGGTGGCGGTG
+GGCTTCTTTGGCATTGCGTACTTTAACAATAACCCCGCGCTGGCCGGGGCGGTGAACCAA
+AACTCAGAACGCGTATTTATTGAACTGGCGCAGATCCTGTTTAACCCGTGGATTGCCGGT
+GTTCTGCTGTCTGCTATCCTGGCGGCGGTGATGTCGACGTTGAGCTGTCAGTTGCTGGTA
+TGCTCCAGCGCGATTACGGAAGATTTATATAAGGCTTTTCTGCGTAAAAGCGCCAGCCAG
+CAAGAGCTGGTATGGGTAGGGCGAGTGATGGTGCTGGTGGTAGCGCTGATCGCCATTGCG
+CTGGCGGCGAATCCTGATAACCGTGTGCTGGGGCTGGTGAGCTACGCCTGGGCTGGATTC
+GGCGCGGCATTTGGACCTGTTGTCCTGTTTTCTGTGATGTGGTCGCGTATGACACGTAAC
+GGCGCGCTGGCGGGAATGATTATTGGCGCGGTGACGGTTATCGTCTGGAAACAATATGGC
+TGGCTGGATCTGTATGAGATTATCCCTGGCTTCATTTTCGGCAGCCTGGGGATCGTAATC
+TTTAGCCTGCTTGGCAAAGCGCCGACAGCAACGATGCAGGAACGCTTTGCAAAAGCGGAC
+GCGCATTATCATTCCGCGCCGCCGTCGAAGCTACAGGCGGAATAAATGGCGGGTAAACTG
+CGGCGTTGGCTGCGTGAAGCCGCGGTTTTTCTGGCGCTCCTCATCGCGATAATGGTGGTC
+ATGGACGTCTGGCGCGCGCCGCAGGCGCCTCCGGCGTTTGCCGCGACACCATTACATACG
+CTGACGGGAGAGTCGACAACTCTGGCGACCTTGAGCGAGGAACGCCCCGTACTGCTCTAT
+TTTTGGGCCAGCTGGTGCGGGGTATGCCGCTTTACCACGCCTGCGGTCGCTCACCTGGCG
+GCGGAAGGGGAAAACGTCATGACCGTTGCGCTCCGCTCCGGCGGTGATGCTGAGGTTGCC
+CGCTGGCTGGCGCGCAAGGGCGTTGACTTCCCGGTCGTCAATGATGCTAACGGCGCCTTA
+TCCGCTGGCTGGGAAATCAGCGTGACGCCAACGCTGGTGGTGGTTTCACAAGGTCGGGTT
+GTGTTCACCACCAGCGGCTGGACCAGCTATTGGGGCATGAAGCTTCGGCTGTGGTGGGCA
+AAAACGTTCTGAATGATGAAAAAAAGCGTCGCTATGCTGGCGGTTTGTATGCTGGCGCAA
+AGCCACCTTGCCATTGCTGCCGGTGCTCCTGCGCCTCAAGAGATCAACATTGTTTTACTG
+GGCACCAAAGGCGGGCCTTCTTTGCTCAATACAGCCAGACTACCGCAAGCGACGGCGCTC
+ACTATCGGCGATAAGATATGGCTGATAGATGCCGGCTACGGCGCCAGTCTGCAACTGGTG
+AAAAATGGCATTCCACTGCGCAACATCAATACTATTTTGCTCACCCATCTGCACAGCGAC
+CACATACTGGATTATCCTTCCTTGCTGATGAATGCCTGGGCAAGTGGCCTGAAAGACCAT
+ACCATACAGGTTTATGGCCCGCCGGGAACCCAGGCGATGACGAAGGCTAGCTGGAAGGTC
+TTTGACAGGGATATCACGTTACGCATGGAAGAAGAGGGGAAACCCGATCCGCGCAACCTG
+GTTAAGGCGACCGATATCGGCCAGGGCGTCATCTATAAAGATGAACTGGTCACAATAAGC
+GCGCTGAAAGTGCCTCATTCCCCTTTCCCGGACGGTGAAGCGTTTGCTTACCGTTTTGAT
+ACTCAGGGTAAGCGAATCGTCTTCTCTGGCGATACGTCCTGGTTTCCTCCGCTTGCAACG
+TTTGCCCAGGGGGCGGATATCCTGGTACATGAGGCGGTACATGTCCCTTCGGTAGCAAAA
+CTGGCTAATAGTATTGGCAACGGAAAAACGCTGGCTGAAGCGATTGCGTCGCATCACACC
+ACGATTGAAGATGTCGGTAAGATTGCTCGCGAGGCCCACGTGAAAAAACTGGTGTTAAGT
+CATCTGGTGCCTGCGACGGTTGCGGATGACGTCTGGCAACAGGAAGCCATGAAAAATTAC
+CCGGGCCCTGTCATTGTCGGTCATGACAATATGACGATAAGCGTACCGTAAATGTCGCAA
+CGCACAGAGAAAAAAATCGGGAAACGTTCGCAGGCCACCGGTGCAAAACGGCAGCTTATC
+TTAACCGCCGCGCTTGCCGTTTTTTCCCAGTATGGCATTCATGGCGCGCGTCTTGAACAG
+GTCGCCGAGCGGGCAGGCGTCTCCAAAACCAATCTGCTTTATTATTATCCCTCGAAAGAG
+GCGCTGTATGTCGCGGTAATGCGACAGATTCTGGATGTCTGGTTGGCGCCGCTCAAGGCG
+TTTCGCGCAGAATTTTCCCCTCTGGAGGCCATCAAAGAGTATATCCGTCTCAAGCTGGAG
+GTTTCGCGTGATTATCCGCAGGCGTCGCGGCTCTTCTGCATGGAGATGCTGGCGGGCGCG
+CCGCTCTTAATGGATGAACTGACCGGCGATCTAAAAGCGTTGATAGATGAAAAATCCGCG
+CTGATTGCCGGATGGGTGCACAGCGGGAAACTCGCGCCCGTTTCTCCGCATCATTTGATC
+TTCATGATTTGGGCCGCCACGCAACATTACGCCGATTTCGCCCCTCAGGTTGAAGCGGTA
+ACCGGCGCGACGCTTCGCGATGAAGCCTTTTTCAACCAAACGGTCGAAAGCGTTCAGCGC
+ATTATTATTGAAGGGATTCGCGTGCGTTAAATGGCGAAACAACAACGGATGGGCTGGTGG
+TTTCTTTGCCTTGCATGTGTCGTGGTAATGGTTTGTACCGCGCAACGCATGGCGGGCCTG
+CACGCCTTGCAGATGCAGGCGACGGCCTCTGCTGCGGTGGTCAGCGCTCCCTCCTCGACA
+GATGACGGCTCGCCGGTCACTCCCTGCGAATTAAGCGCCAAGTCGCTGCTGGCGGCGCCT
+CCAGTACTCTTTGAAGGTGCTATCCTTGCGCTTTATCTACTGCTTTCCTTACTGGCGCCT
+GTCCGGGTCATGCGCCTGCCGTTTTCGCCTCCACGGGCTATTTCGCCGCCCACATTACGG
+GTACATCTACGATTTTGTGTCTTCCGTGAATGAATGATGATTTTATTCAGGCGGATACTG
+TTCTGCCTGTTATGGCTTTGGCTGCCCGTCTCCTGGGCGGCGGAAAGCGGCTGGCTGCGT
+TCGCCCGATAACGACCATGCCAGCATACGGCTACGTGCCGATACGTCCGCTAACAGTGAG
+ACCCGGCTGTTGCTGGATGTCAAACTGGAAAACGGCTGGAAAACCTACTGGCGCGCGCCG
+GGGGAAGGGGGCGTGGCACCCTCTATCGCCTGGAAAGGCGACATGCCTGAGGTAAGCTGG
+TTCTGGCCAACCCCCTCGCGCTTTGATGTGGCGAATATCACCACCCAGGGATATCACGAC
+GAGGTGACCTTTCCGATGATCGTGCGCGGTACGCCGCCGGCGACCTTGCGCGGTGTGTTG
+ACGTTATCAACCTGCAGCAATGTTTGTCTGTTGACCGATTACCCCTTTTCCGTGACGCCC
+ACTGTGCAGAATGCCGATTTTGCCCATGACTATGCGCGGGCGATGGGTAAAGTTCCGCTC
+CGCAGTGGGCTAACGGACTCGCTTGACGTTGGCTATCGCCCGGGAGAACTGGTGGTCACT
+GCTACGCGAGCGGCGGGCTGGTCATCGCCCGGGCTCTATCTTGACACCATAGATGACGTC
+GATTTTGCGAAGCCTCGCCTGCGCGTAGAGGGCGACAGGTTACAGGCGACGGTGCCGGTG
+ACGGACAGTTGGGGCGAAAAGGCGCCCGATTTGCGCGACAAATCGCTGACCCTCGTGTTA
+GCCGATGGCGCTATCGCCCAGGAGAGCACGCAAACCATTGGCGCTGCGCCAGCGCAAACG
+CCGGACAATGCGGCGCTACCTTTCTGGCAAGTTGTAATGATGGCGCTAATCGGCGGACTG
+ATTCTTAATTTAATGCCCTGCGTACTGCCTGTTCTGGGCATGAAACTTGGCTCTATTTTA
+TTGGTAGAGGAAAAAAGCCGCTCTCACATCAGGCGACAATTTTTGGCTTCGGTCGCCGGT
+ATCATTGCGTCATTTATGGCGCTGGCGGCGTTTATGACCCTCCTTCGCCTGTCAAACCAT
+GCGCTGGCCTGGGGAGTCCAGTTCCAGAATGCATGGTTTATTGGTTTTATGGCGCTGGTG
+ATGTTGTTGTTTAGCGCCAGCCTGTTCGGGCTTTTTGAGTTCAGGCTTCCCTCATCTATG
+ACCACGAAACTGGCCACTTACGGCGGTAACGGTATGTCGGGACATTTCTGGCAGGGGGCG
+TTCGCCACGCTGCTGGCGACGCCTTGTAGCGCGCCGTTTCTGGGCACGGCGGTCGCGGTG
+GCGCTCACGGCGTCGCTGCCGACGCTGTGGGGGCTGTTCCTTGCGCTTGGCCTGGGAATG
+AGCGCGCCGTGGCTACTGGTCGCGATACGACCAGGGCTTGCGCTACGTTTACCGCGCCCC
+GGGCGTTGGATGAATGTCCTGCGCAGGATCCTCGGTCTGATGATGCTGGGGTCGGCTATC
+TGGCTGGCGACGTTACTCCTGCCGCATTTCGGCTTCACTGCGTCAAAGAGCGCGCAAGAC
+ACGGTTCAGTGGCAACCGTTGAGTGAACAGGCAATCCAGTCGGCGCTGGCGCAGCATAAG
+CGGGTATTTGTCGATGTCACTGCGGACTGGTGTATTACCTGTAAAGTGAATAAATACAAC
+GTCCTGCAAAAAGAGGATGTGCAGGCCGCCTTGCAACAGCCGGATGTTGTGGCGCTGCGG
+GGAGACTGGACGCTGCCGTCCGATGCCATTACAGATTTTCTGAAAACGCGCGGCCAGGTC
+GCCGTGCCGTTTAATCAGGTATATGGCCCCGGTTTGCCGGAAGGGGAGGCACTGCCCACT
+TTGCTGACCCGCGATGCGGTATTACAAACGTTGAAAAAAGCGAAAGGAATAACCCAATGA
+ATGAAATACATGATTGTTTTACTGCTGGCGCTGTTTTCGACGCTGAGCATCGCGCAAGAA
+ACCGCTCCTTTTACGCCGGATCAGGAAAAGCAGATTAAAAATCTGATCCATGCGGCGTTG
+TTTAACGATCCTGCCAGCCCGCGGATAGGCGCTAAACACCCTAAGCTGACGCTGGTGAAC
+TTTACGGATTACAACTGCCCGTACTGCAAACAGCTCGATCCGATGCTGGAAAAGATTGTG
+CAGAAATATCCTGACGTTGCGGTCATTATTAAACCGCTGCCATTCAAAGGAGAGAGTTCC
+ATACTGGCGGCGCGTATTGCGCTGACCACCTGGCGCGATCATCCGCAACAGTTCCTCGCG
+CTACATGAAAAACTTATGCAAAAGCGCGGTTACCATACGGATGACAGTATTAAACAGGCC
+CAGCAGAAAGCAGGGGCGACGCCAGTGACGCTGGATGAAAAAAGCATGGAAACGATACGC
+ACTAATTTGCAGTTGGCAAGACTGGTCGACGTGCAAGGAACGCCAGCGACGATCATTGGC
+GACGAGCTGATTCCGGGCGCAGTGCCCTGGGATACGCTGGAAGCGGTGGTGAAAGAAAAA
+CTGGCGGCTGCCAATGGCGGGTAAATGATTGCACATTCTTTCGGCATCGTTAATTATTTT
+GTATTATTTGGCTACCTCCTGGCCATGATGGTAGTCGGTGTCTATTTTTCCAGACGGCAA
+AAAACAGCAGACGATTATTTTCGCGGTGGTGGCCGGGTTCCTGGTTGGGCGGCTGGGGTC
+AGTGTATTTGCTACTACGTTAAGCTCAATTGCATTTATGTCAATTCCTGCCAAAGCGTTT
+ACTTCCGACTGGACGTTTATCATTGGTCAGGATCTGGCTATCGCAATTTTACCGCTGGTT
+TTTTATTTCTATATTCCGTTTTTTCGGAAAGTGAAAGTCACATCAGCCTATGAATATCTC
+GAAGCACGGTTCGATGTGCGCTGCCGTCTGGTCGCCAGCATGTCATTTATGTTGTTTCAT
+ATTGGACGTATCGCCATTATCACTTTCCTCGCCGTGCTGGCCTTGCGCCCCTTCATCGCT
+ATAGACCCGGTGATTTTGGTACTGTTGATTGGTGTGATGTGTATCATTTATACCTGGATG
+GGGGGGAATATGGAAAGTCTATTAAATCGTTTATATGACGCGTTAGGCCTGGATGCGCCA
+GAAGTTGAGCCACTGCTTATCATTGATGATGGGATACAGGTTTATTTTAATGAATCCGAT
+CATATACTGGAAATGTGCTGTCCCTTTATGCCACTGCCTGACGACACTCTGACTTTGCAG
+CATTTTTTACGTCTTAACTACGCCAGCGCCGTCACTATCGGCGCTGATGCAGACAATACT
+GCTTTAGTGGCGCTTTATCGCTTGCCGCAAACCAGTACCGAAGAAGAGGCGCTCACTGGT
+TTTGTATTATTCATTTCAAACGTGAAGCAATTGAAAGAGCATTATGCAATGAAATACGAC
+CTTATTATTATCGGCAGCGGTTCGGTTGGCGCCGCCGCTGGTTATTACGCCACCCGCGCC
+GGGCTAAAGGTCCTGATGACCGATGCGCATATGCCGCCTTATCAACAGGGCAGCCACCAC
+GGCGATACCCGTCTTATCCGCCACGCTTATGGTGAAGGCGAAAAATATGTCCCGCTGGTG
+CTTCGCGCCCAGACGCTTTGGGATGAGCTCTCCACACACAATGAAGAGCCTATTTTTGTC
+CGCTCCGGCGTCGTCAACCTCGGCCCGGCCGATTCCGCTTTCTTAGCCAACGTCGCACGA
+AGCGCGCAACAGTGGCAATTGAACGTCGAGCGCCTGGACGCGACGGCCCTCATGACGCGC
+TGGCCGGAAATTCGCGTGCCCGATAATTATATCGGGCTGTTTGAAGCTGACTCCGGTTTC
+CTGCGCAGCGAATTAGCCATTACCACATGGCTTCGTCTGGCCCGAGAGGCAGGCTGCGCA
+CAGCTATTCAACAGCCCGGTAAGCCATATTCACCATGATGATAACGGTGTGACGATAGAG
+ACGAGTGAAGGCTGCTACCACGCCAGCAAAGCGCTGATTAGCGCGGGCACCTGGGTCAAA
+ACGCTGGTACCGGAGCTGCCCGTTCAGCCCGTACGTAAAGTTTTTGCCTGGTTTAAGGCG
+GATGGACGTTACAGCACTAAAAACCGCTTTCCGGCCTTTACCGGCGAAATGCCCAACGGC
+GATCACTATTACGGTTTCCCGGCGGAGAACGACGAGTTAAAAATCGGCAAACACAATGGC
+GGGCAGCGAATACAGGCACCGGAAGAGCGCAAGCCCTTTGCCGCCGTTGCCAGCGATGGC
+GCGGAAGCATTTCCTTTCCTGCGTAACGTACTGCCGGGTATCGGCGGTTGTTTACATGGG
+GCGGCATGTACCTATGATAATTCGCCGGACGAGGATTTTATTATCGATACGCTGCCTGGC
+CATGAGAATACGCTTGTCATCACTGGACTCAGCGGACATGGTTTTAAATTCGCCCCGGTG
+TTAGGAGAAATCGCTGCGGATTTTGCGTTGGGAAAAACGCCCTCCTTTGATCTGACGCCG
+TTCCGGCTTTCCCGTTTTAGCCAATAAATGCAAATACAGAGCTTCTATCACTCAGCTTCA
+CTAAAAACCCAGGAGGCTTTTAAAAGCCTACAAAAAACCTTATACAACGGAATGCAGATT
+CTCTCAGGCCAGGGCAAAGCGCCGGCTAAAGCGCCCGACGCTCGCCCGGAAATTATTGTC
+CTGCGAGAACCTGGCGCGACATGGGGGAATTATCTACAGCATCAGAAGACGTCTAACCAC
+TCGCTGCATAACCTCTATAACTTACAGCGCGATCTTCTTACCGTCGCGGCAACCGTTCTG
+GGTAAACAAGACCCGGTTCTAACGTCAATGGCAAACCAAATGGAGTTAGCCAAAGTTAAA
+GCGGACCGGCCAGCAACAAAACAAGAAGAAGCTGCGGCAAAAGCATTGAAGAAAAATCTT
+ATCGAACTTATTGCAGCACGCACTCAGCAGCAAAATGGCTTACCTGCAAAAGAAGCTCAT
+CGCTTTGCGGCAGTAGCGTTTAGAGATGCTCAGGTCAAGCAGCTCAATAACCAGCCCTGG
+CAAACCATAAAAAATACACTCACGCATAACGGGCATCACTATACCAACACGCAGCTCCCT
+GCCGCAGAGATGAAAATCGGCGCAAAAGATATCTTTCCCAGTGCTTATGAGGGAAAGGGC
+GTATGCAGTTGGGATACCAAGAATATTCATCACGCCAATAATTTGTGGATGTCCACGGTG
+AGTGTGCATGAGGACGGTAAAGATAAAACGCTTTTTTGCGGGATACGTCATGGTGTGCTT
+TCCCCCTATCATGAAAAAGATCCGCTTCTGCGTCAGGCCGGCGCTGAAAACAAAGCCAAA
+GAAGTATTAGCTGCGGCACTTTTTAGTAAACCTGAGTTGCTTAACAGAGCCTTAGAGGGC
+GAAGCGGTAAGCCTGAAACTGGTATCCGTCGGGTTACTCACCGCGTCGAATATTTTCGGC
+AAAGAGGGAACTATGGTCGAGGATCAAATGCGCGCATGGCAATCGTTGACCCAGCCGGGA
+AAAATGATTCATTTAAAAATCCGCAATAAAGATGGCGATCTACAGACGGTAAAAATAAAA
+CCGGACGTCGCCGCATTTAATGTGGGTGTTAATGAGCTGGCGCTCAAGCTCGGCTTTGGC
+CTTAAAGCATCAGATAGCTATAATGCCGAAGCGCTACATCAGTTATTAGGCAATGATTTA
+CGCCCTGAAGCCAGACCAGGTGGCTGGGTTGGCGAATGGCTGGCGCAATACCCGGATAAT
+TATGAGGTCGTCAATACATTAGCGCGCCAGATTAAGGATATCTGGAAAAATAACCAACAT
+CATAAAGATGGCGGCGAACCCTATAAACTCGCACAACGCCTTGCCATGTTAGCCCATGAA
+ATTGACGCGGTGCCCGCCTGGAATTGTAAAAGCGGCAAAGATCGTACAGGGATGATGGAT
+TCAGAAATCAAGCGAGAGCTCATTTCTTTCCATCAGACCCATATGTTAAGTGCGCCTGGT
+AGTCTTCCGGATAGCGGTGGACAGAAAATTTTCCAAAAAGTATTACTGAATAGCGGTAAC
+CTGGAGATTCAGAAACAAAATACGGGCGGGGCGGGAAACAAAGTAATGAAAAATTTATCG
+CCAGAGGTGCTCAATCTTTCCTATCAAAAACGAGTTGGGGATGATAATATTTGGCAGTCA
+GTAAAAGGTATTTCTTCATTAATCACATCTATGAAACGATATATACTGGCTACCGCGATA
+GCGTCTCTTGTTGCAGCCCCGGCAATGGCGCTGGCCGCTGGCAGCAATATTCTCAGCGTA
+CATATTCTCGATCAGCAAACAGGCAAACCAGCGCCCGGCGTGGAGGTGGTACTGGAGCAG
+AAAAAGGATAACGGATGGACGCAATTAAACACCGGGCATACCGACCAGGATGGACGAATT
+AAAGCACTGTGGCCCGAAAAAGCTGCCGCGCCGGGGGATTATCGCGTTATTTTTAAAACC
+GGCCAGTATTTTGAAAGTAAAAAACTGGACACGTTTTTCCCGGAGATTCCCGTCGAGTTT
+CATATCAGCAAAACGAATGAGCACTATCATGTGCCGCTGTTATTAAGTCAGTATGGTTAT
+TCAACCTATCGCGGGAGCTAAATGGCAAAGATTCTGGTGCTCTATTATTCCATGTACGGA
+CACATTGAAACCATGGCGCACGCGGTGGCGGAAGGGGCAAAGAAAGTCGACGGCGCAGAG
+GTCATTATAAAGCGTGTGCCAGAAACAATGCCGCCTGAAATCTTCGCAAAAGCTGGCGGT
+AAAACGCAAAACGCACCGGTTGCCACCCCACAGGAGCTGGCGGATTACGATGCCATTATT
+TTTGGTACGCCAACCCGGTTTGGCAATATGTCAGGCCAGATGCGTACCTTCCTGGACCAA
+ACCGGCGGACTGTGGGCATCCGGCGCGCTATACGGCAAGCTCGGCGGCGTGTTCAGTTCT
+ACCGGAACGGGCGGCGGCCAGGAGCAGACCATCACCTCGACCTGGACTACGCTTGCCCAT
+CATGGGATGGTGATTGTCCCGATAGGCTATTCCGCACAGGAACTGTTTGACGTCTCCCAG
+GTTCGCGGCGGTACGCCTTACGGCGCAACGACTATCGCTGGAGGCGACGGTTCACGTCAA
+CCAAGCCAGGAGGAACTCTCTATCGCTCGCTATCAGGGGGAATACGTCGCCGGTCTGGCA
+GTCAAACTCAACGGCTAAATGGAGCCTCAACCCCCACGTCTGAAACCCGGAAAAATCCTT
+GACACTCTGGGTGCTATGCAAAAAAGCCTGACACGTGCCTCGCAGCGTATTGCGCAATAT
+ATTTTAGCCTTCCCCAGACAGGTGACACAGTCATCTATTGCGGATTTGTCGCGCGACACA
+CAGGCCGGAGAAGCCACGGTTATTCGCTTTTGTCGCACCCTGGGCTATAAAGGTTTTCAG
+GATTTTAAAATGGACCTGGCCATTGAACTTGCCACTACCGAGTCTGATGACAGTAGTCCT
+CTACTGGATGCCGAAGTTAGCGAATCCGACGATGCCCACGCGATTGGTTTAAAATTGCAG
+AACACCATTAGTAATGTATTATCTGAAACGCTAAATCTGCTGGATATGCAACAGGTTCTC
+GGTGTCGTGGACGCCCTACGTCACTGTCACTCAGTTTATATGTTTGGTGTGGGCTCATCG
+GGGATCACGGCGCTGGATATGAAACACAAGCTAATGCGTATGGGTTTACGGGGCGATGCG
+GTAAGCAATAACCATTTTATGTACATGCAGGCTACGCTATTGAAAGCAGGCGATGTCGCG
+ATGGGTGTCAGTCACTCGGGCACATCGCCAGAAACAGTGCATTCACTCCGATTGGCCCGA
+CAGGCTGGCGCCACCACAGTCGCCATTACCCATAATCTGGGTTCTCCATTATGTGAAGAG
+GCCGATTTTTGCCTGATCAATGGTAATCGGCAAGGAATGTTGCAGGGTGACTCGATCGGT
+ACGAAAGCCGCGCAGCTTTTCGTCTTTGACCTGCTCTATACCCTTCTTGTACAGTCCTCG
+CCGGAACAGGCCCGAGAAAGCAAATTACGGACAATGAATGCCCTGGACATGACAAAAATG
+AAGAAACTGCCCGGCTTTACGCAAGATTACTTACTCAGCAAGGCGACGACCCTGCCTGAT
+AAAACACGCCTGGAGCGTGCCGTTGAACCGCTATGCGCGCGCCATCCCGGAGAGTGCGGC
+ATTCTTGCGCTGGATAACAGTCTGGACGCTTTTGCCGCCCGCTACCGCCTGACCGAAATG
+GCGGCGCGGACGCTGGATGTGCAGTATTATATTTGGGAAGACGATATGTCCGGGCGGCTG
+CTCTTTTCGGTTCTGCTGTCGGCGGCGAAGCGCGGCGTTCATGTTCGTCTGCTGCTGGAT
+GATAACAATACGCCTGGTCTGGATGATACGTTGCGCTTGCTGGATAGCCATCCTAATATC
+GAAGTTCGTCTGTTTAATCCTTTCTCTTTTCGTACGCTACGCGCGCTGGGATATTTGACG
+GATTTTGCGCGGCTGAATCGGCGGATGCACAATAAAAGTTACACTGCCGACGGCGTAGTG
+ACGCTGGTCGGTGGGCGCAACATCGGCGATGCCTATTTCGGCGCTGGCGAGGAGCCGCTA
+TTTTCCGATCTGGACGTGATGGCCATTGGCCCGGTGGTCAATGATGTCGCCAATGATTTT
+GAACGTTACTGGCGCTGTAGTTCAGTGTCGACATTGCAGCAAGTATTATCCCTTTCTGAG
+CAGGAACTGACGCAGCGTATCGAACTTCCCGAATCCTGGTATAACGATGAGATCACCCGC
+CGTTATCTGCATAAGCTGGAAACCAGCCAGTTTATGGCGGATCTCGATCGCGGAACGTTG
+CCGCTGATTTGGGCAAAAACACGCTTGCTTAGCGATGACCCTTCTAAAGGCGAGGGGAAG
+GCGCAGCGCCATTCGCTTCTTCCGCAGCGATTATTTGACGTGATGGGGTCGCCGACGGAG
+CGTATCGACATTATTTCCGCTTACTTTGTCCCTACGCGCGCAGGCGTGGCGCAGTTGCTT
+AATCTGGTCAGGAAAGGTGTGAAGATCGCCATCTTAACTAACTCTCTGGCGGCCAACGAT
+GTGGCGGTCGTTCACGCAGGGTACGCGCGCTGGCGCAAGAAATTACTGCGCTATGGCGTG
+GAGCTCTACGAACTGAAACCGACCCGCGAACATGAAACCGCCGTACATGATCGCGGACTC
+ACCGGGAACTCAGGTTCCAGCTTACATGCTAAAACGTTCAGTATTGATGGTAGTAAGGTG
+TTTATCGGGTCGCTTAATTTTGATCCCCGTTCAACGCTTTTAAATACCGAAATGGGCTTT
+GTCATTGAAAGTGAAACGCTGGCGACGCTTATTCATAAGCGTTTTACGCAGAGCCAACGC
+GATGCGGCCTGGCAACTGCGGCTGGATCGCTGGGGACGAATTAACTGGATCGATCGTCAG
+CAAGAAGAGGAAAAGGTGTTAAAGAAAGAACCCGCTACGCGTTTCTGGCAGCGAGTTCTG
+GTACGGTTGGCGGCAATTTTACCTGTGGAATGGTTGCTGTGAATGCCAACTCAAGAAGCA
+AAAGCGCACCGCGTCGGCGAATGGGCAAGCCTGCGTAATACGTCGCCGGAAATTGCCGAA
+GCCATTTTTGAAGTCGCTCACTATGACGAGAAACTGGCAGAAAAAATATGGGAAGAAGGT
+AGCGATGAGGTGCTGATCAAAGCCTTTGAGAAAACGGACAAAGACTCGCTCTTCTGGGGC
+GAACAAGTCATCGAACGTAAGAACGTATAAATGTATCCCGTTGACCTGCATATGCATACC
+GTCGCGAGCACTCATGCCTACAGTACTCTGAGCGATTATATCGCGGAAGCCAAACGCAAA
+GGCATGAAACTTTTTGCGATTACCGATCATGGTCCGGACATGGAAGATGCGCCGCATCAC
+TGGCAGTTTATTAACATGCGCATCTGGCCGCGTCTGGTTGACGGCGTGGGGATACTGCGT
+GGCATGGAGGCGAATATCAAGAATATTAACGGTGAAATTGATTGTTCCGGAAAGATGTTC
+GACTCGCTGGATCTGATTATCGCAGGCTTTCATGAGCCCGTTTTTGCGCCGCATGATAAA
+GAAACGAATACTCAGGCGATGATCGCGACCATCGCCAGCGGCAAGGTGCATATAATTAGT
+CACCCGGGAAATCCAAAGTATCCAGTGGAGGTTAAAGCCATCGCGCAGGCGGCGGCGAAA
+CACCAGGTAGCGCTGGAAATCAACAACTCTTCTTTTCTGCATTCGCGTAAAGGAAGCGAA
+GATAAGTGCCGCGCGGTCGCTGCCGCCGTACGCGATGCGGGAGGCTGGGTAGCGTTAGGC
+TCTGAGTCCCATACGGCCTTTACGCTTGGCGATTTCACCGAATGCCGGAAAATTCTGGAT
+GCGGTGAATTTTCCGGAAGATCGAATCCTGAACGTCTCTCCGCAGCGCTTACTGGCCTTT
+CTCGAGTCACGCGGTATGGCGCCTGTACCGGAATTTGCCGAACTTATGAATGAGTTTTCA
+ATCCTGTGCCGTGTGCTGGGATCGTTGTTTTACCGCCAAGCGCAAGATCCTTTACTGGTT
+CCGCTGTTTACGTTAATCCGTGAAGGTAAACTGGCGGCAGACTGGCCGCTGGAGCAGGAT
+GACATGCTGGCGCGTTTACAGAAAAGCTGCGATATCACGGAGATTTCCACTGATTACAAT
+GCGTTATTTGTTGGGGAAGAGTGCGCGGTAGCGCCATACGGCAGTGCGTGGGTCGAAGGC
+GCGGAAGAGTCTGAGGTGCGCGCTTTTTTAACGTCGCGAGGGATGCCGCTGGCCGATACG
+CCTGCCGATCACATTGGCACTTTATTGCTCGCGGCCTCCGGGCTGGAAGATCAGTCTGCC
+GAAGATGAAAGTGAAGCGCTGGAAACCTTATTTGCCGATGATCTGCTTCCCTGGTGCAAT
+ACCTTCCTCGGTAAAGTTGAAGCCCATGCCGTTACGCCAGTCTGGCGCACTCTGGCGCCG
+CTAACGCGTGATGCGATAGGGGCCATGTGGGATGAACTTGAGGAAGAAGATGAAGAAATG
+ATGCGCGCGATGAACATACTTCTTTCTATTGCTATCACTACGGGCATCCTTTCTGGAATA
+TGGGGATGGGTGGCCGTCTCCCTGGGGTTACTAAGCTGGGCCGGTTTTTTAGGCTGTACG
+GCTTATTTGGCCTGTCCGCAGGGCGGCTTTAAGGGATTGTTGATTTCCGCCTGTACGCTG
+TTAAGCGGGATGGTGTGGGCGCTGGTCATTATTCACGGTAGCGCGTTGGCGCCGCATCTG
+GAAATTGTGAGTTACGTGTTGACGGGGATCGTGGCATTCCTGATGTGTATCCAGGCAAAG
+CAGCTATTGCTTTCTTTTGTTCCGGGAACATTTATCGGCGCCTGCGCGACATTTGCAGGG
+CAGGGTGAGTGGCGGTTGGTATTACCGTCGCTGGCGCTGGGGCTAATCTTTGGCTATGCC
+ATGAAAAAGAGTGGGCTATGGCTGGCATCACGCCGCGAGCAACATTCAGCGAATACGGCG
+GTCACAAAGATGAAAAAAAACCTGCTGGGATTCACCCTCGCATCCTTGTTATTCACGACC
+GGTTCCGCCGTGGCGGCGGAGTATAAAATTGATAAAGAAGGCCAACATGCGTTCGTCAAT
+TTCCGCATCCAGCATCTGGGCTACAGCTGGCTATACGGCACCTTTAAAGATTTCGACGGC
+ACGTTCACTTTTGACGAAAAAAATCCGTCAGCAGACAAAGTGAATGTGACCATTAACACC
+AATAGCGTCGACACTAACCATGCCGAACGTGACAAACACCTGCGTAGCGCGGAGTTTCTT
+AATGTTGCGAAATTCCCGCAGGCAACCTTCACCTCTACCAGCGTGAAAAAAGAGGGCGAT
+GAACTGGATATTACCGGCAATCTGACGCTCAATGGCGTGACTAAACCGGTGACGCTGGAA
+GCGAAGCTGATGGGCCAGGGCGACGATCCGTGGGGCGGTAAGCGCGCGGGCTTTGAGGCC
+GAAGGAAAAATTAAGCTGAAAGATTTCAATATAACTACCGATCTCGGCCCAGCCTCACAA
+GAGGTGGAGCTTATCATCTCAGTAGAAGGCGTTCAGCAGAAGTAAATGTTACTGATGATG
+GCGCTGATCGTGCGTATTATCTGGCGGCTTTATTCTCCGCCGCCCGTTGCGTTGACCAGC
+TATTCCCGTTTAACGCGCATTGGCGCCGCCGCGGGTCATATCCTTCTGTATCTCCTGCTC
+TTTGCGATAATCATTAGCGGCTACCTGATTTCCACCGCCGACGGTAAACCGATTAGCGTC
+TTTGGCTGGTTTGAGATTCCGGCCACGCTTACGGACGCGGGCGCGCAGGCTGACATCGCC
+GGAACACTGCATCTGTGGTTTGCCTGGTCGCTGGTCATTATCTCGCTCTCGCATGGGGTT
+ATGGCGCTAAAACACCATTTCATCGATAAAGACGACACACTGAAACGTATGACAGGAATG
+TCGTCATCTGACTATGGAGCTCAAAAATGAATGGTTAAGTTATCAATGACGCTGCGCCTG
+ACAATTTCTTTTATCGCCATACTTATCCTCGCCTGTACCGGCATTAGCTGGACGCTCTAT
+AACGCGCTGAGCAAAGAATTAACGTATCGGGATGATATGACGCTAATAAATCGGGCGGCG
+CAAATGCAGCAACTGTTACTGGATGGCGCCAGGCCGGAAAATCTGCCGCTCTATTTCAAT
+CGGATGGTGGATACGAAGCAGGATATCTTATTGATCCACTCAGCAACAGGCCATAATGTT
+GCGATTAATCATAGCGGCATCCCCGACCAACGCTTTAACGAGATTCCGCTGGCTAAAAAC
+ATCACCCGCGAAACCTTATTTCGCCAGGCGGTACAAGGCACGGAGCTGACCGCGGTACGA
+GTAAACGCCAGAAGCGGCGATAACCCGCTGACCCTTACTATTGCCAGGCTGGCGACGGAA
+AGGCGGCAAATGCTGGCGCAATATCGCCGCAACAGTTTGCTGATTAGCCTTATCGCGATC
+CTCGTCTGTTCGGCGCTCAGTCCATTAGTCATCAGAAACGGGCTGCGGGCCATTACGTCG
+CTCAGCCGACTCACCGCGGCGACAGATAGCGGCACACTTCGCCAGCCGCTGGCGGAACAG
+GCGTTACCCGTCGAGCTCAGGCCGCTTGGGCAAGCGCTAAATACCATGCGCCAGAAGCTT
+TCCGACGATTTTGAACGCCTGAACCAATTTGCCGACGATCTGGCGCATGAGCTGCGCACG
+CCGGTTAATATTTTACTGGGGAAGAATCAGGTTATGCTGAGTCAGGAACGCAGCGCCGAA
+GAGTATCAACAAGCCCTTGTCGATAATATTGAAGAGCTGGAGGGACTGTCGCGACTGACA
+GAAAATATTCTCTTTCTGGCACGCGCGGAGCACCAGAATATAGCGGTAAAAAAACAGCCT
+GTTTCGCTCAATGCGCTGGTCGAAAATATGCTGGATTATCTTAGCCCCCTTGCCGAAGAG
+AAGCACATCTGTTTTATAAATCAATGTCAGGGAACGGTATGGGCTGACGAAATATTATTA
+CAAAGAGTGCTCTCAAACCTGCTGACGAATGCCATCCGTTATTCTGATGAAAACGCCGTG
+ATACGTATTGAAAGCGCTTATGATGATAACGTTGCAGAAATTCGGGTCGCTAATCCGGGC
+AGCCCCACCGCCGATGCGGATAAGCTTTTCCGGCGTTTTTGGCGAGGAGATAATGCCCGC
+TACACTGCCGGTTTCGGCCTGGGGTTATCGTTAGTTAACGCGATTGCCCTATTGCACGGT
+GGCTCGGCATCTTACCGCTATGCCGATGAACATAATATCTTTTCGGTTCGTCTGCCTGAT
+AGCGGTGATAGCTAAGTGATATGTCTGAAAGTCCAGGGCGGCATTGGTGAAATTTTTACG
+GTGACGCAGCAGGCGGATAAATTCTTGCCGGCTACGCAGTTCCACTGGAGCTGGACGGAA
+AGCACAGTACCTGTATTGATGATTGGGTTTCTGTTTGCCAATATTCAGCAATTTACTGCC
+AGTCAGGATGTGGTCCAACGCTATATGGTGACTGACTCCATAGAGGAAACGAAGAAAACA
+TTACTTACAAATGCCAAACTGGTTGCGGTGATCCCTGTTTTCTTTTTTGCTATCGGCTCG
+GCATTATTTGTCTACTATCAGCAACAGCCACAATTATTACCGGCGGGATTCAACACTGGC
+GGCATTTTGCCCTTATTCGTGGTCACGGAAATGCCAGTCGGCATTGCAGGGTTGATAATC
+TCCGCTATTTTCGCTGCCGCGCAGTCGAGCATCTCCAGCAGCTTAAACAGCATTTCCAGT
+TGTTTTAATTCCGATATCTATCAGCGGTTGAGTCATAAAAAAGGAACGCCAGAAAACCGT
+ATGAAAATAGCTAAGTTAGTTATTCTGGTCGCGGGCCTGATAAGTAGCGCGGCCTCGGTA
+TGGCTGGTCATGGCCGATGAATCAGAGATCTGGGATGCATTTAATAGTCTGATAGGTCTG
+ATGGGAGGGCCAATGACCGGTCTGTTGATGCTGGGCATTTTCTTTAAACGAGCAAATGCC
+GGGAGTGCGGTTTTAGGAATTATTATGAGCGTCATTACCGTGCTGGGCACACGCTATGCC
+ACTGACCTTAACTTCTTCTTTTATGGGGTCATTGGCTCGCTAAGCGTGGTGATCAGCGGC
+GTTATTTTCGCCCCGTTATTTGCCCCGGCACCGCCATTGACGCTGGATGAAAAACCTGAA
+CCAAAGGTGACATTAATGAAAATCAACAGATATCTTCTGGGTATGGTTTCGTTTATAGCA
+TTTTCATCATATCTACAAGCGGCAACCCTTGATTATCGGCATGAATATGCTGATAGAACC
+AGAATTAATAAAGACCGTATTGCTATAATTGAAAAGCTTCCTAACGGCATTGGTTTTTAT
+GTCGATGCCAGCGTTAAATCGGGAGGAGTAGATGGTGAGCAGGATAAGCATTTAAGCGAT
+CTCGTCGCAAACGCTATAGAACTGGGCGTAAGTTATAATTATAAAGTTACGGACCATTTT
+GTTTTGCAGCCTGGATTTATATTTGAAAGCGGTCCAGACACTTCAATTTATAAGCCTTAT
+TTAAGGGCGCAATATAATTTTGATTCTGGTGTTTATATGGCTGGTCGTTACCGTTATGAC
+TATGCAAGGAAGACAGCTAACTATAATGATGATGAGAAAACGAATAGATTTGATACTTAT
+ATAGGTTATGTTTTTGATGAGTTGAAATTGGAATATAAATTTACCTGGATGGATAGCGAT
+CAAATTAAATTTGATAACAAAAAAACAAACTATGAACATAATGTGGCTTTAGCCTGGAAA
+CTGAATAAGTCATTTACACCATACGTTGAGGTCGGAAATGTAGCGGTGAGAAATAATACC
+GATGAGAGACAGACCCGTTATCGCGTTGGATTACAATACCACTTTTGAATGACGAAATAC
+GGTGTTATAGGTACAGGTTATTTTGGCGCTGAACTGGCGCGATTTATGTCTAAGGTTGAA
+GGGGCGAAAATCACTGCGATTTACGATCCGGTAAATGCGGCTCCGATAGCGAAAGAGCTG
+AACTGTGTCGCCACTTCAACGATGGAGGCGCTTTGTACCCATCCTGATGTGGATTGCGTA
+ATTATTGCTTCACCAAATTACTTACATAAAGCGCCGGTCATTGCGGCGGCTAAAGCGGGT
+AAACACGTGTTTTGTGAAAAACCTATCGCCTTAAATTACCAGGATTGTAAGGATATGGTT
+GATGCCTGCAAAGAAGCTGGTGTTACCTTTATGGCGGGTCACGTTATGAACTTTTTTCAC
+GGGGTTCGCCACGCTAAAGCGCTCATCAAAGCCGGTGAAATCGGTGAAGTTACACAAGTT
+CACACTAAACGTAATGGTTTTGAAGACGTGCAGGATGAGATCTCATGGAAGAAGATTCGC
+GCAAAGTCAGGTGGGCATCTGTACCATCACATTCACGAGCTAGATTGTACACTGTTCATC
+ATGGATGAAACCCCATCCCTGGTTTCAATGGCGGCGGGGAATGTTGCGCACAAAGGTGAA
+AAATTTGGTGATGAAGATGATGTTGTCCTAATCACCCTTGAGTTTGAAAGCGGTCGTTTC
+GCGACACTTCAGTGGGGATCATCGTTCCACTACCCTGAGCACTATGTATTAATTGAGGGC
+ACGACAGGTGCAATTCTCATTGATATGCAAAACACGGCTGGTTATCTAATAAAAGCGGGC
+AAAAAAACACACTTTCTTGTGCATGAAAGCCAGGCGGAGGATGATGATCGTCGCAACGGT
+AACATATCCAGCGAGATGGATGGCGCAATCGCTTATGGTAAACCCGGTAAACGTACGCCG
+ATGTGGCTCTCATCAATTATGAAACTGGAGATGCAGTACTTGCATGATGTGATAAACGGT
+CTGGAGCCAGGCGAGGAGTTTGCTAAATTGCTAACGGGAGAAGCGGCGACAAATGCCATT
+GCTACCGCTGATGCTGCGACGCTTTCTTCAAACGAGGGGCGCAAAGTTAAACTCACTGAA
+ATTCTTGGCTAAATGACATCACGTCTTCAGGTCATACAGGGTGATATCACTCAACTTAGC
+GTCGATGCGATTGTGAATGCCGCTAACGCATCATTAATGGGCGGCGGTGGCGTAGACGGC
+GCAATTCATCGCGCGGCGGGGCCGGCATTGCTGGACGCCTGTAAACTCATCCGTCAGCAA
+CAGGGCGAATGTCAGACGGGACATGCGGTTATCACGCCTGCTGGCAAGCTTTCGGCAAAG
+GCGGTTATTCACACAGTGGGGCCCGTCTGGCGAGGCGGCGAACACCAGGAAGCTGAGCTA
+CTCGAAGAGGCATACCGGAATTGTTTGCTGCTTGCCGAGGCGAATCACTTTCGTTCCATC
+GCTTTTCCGGCAATCAGTACCGGCGTTTATGGCTATCCACGCGCCCAGGCCGCTGAAGTC
+GCCGTCAGGACGGTTTCAGATTTTATTACCCGTTACGCTCTGCCTGAACAGGTATACTTT
+GTCTGTTATGATGAAGAAACTGCCCGGCTTTACGCAAGATTACTTACTCAGCAAGGCGAC
+GACCCTGCCTGA
diff --git a/t/data/expected_reannotated_groups_file b/t/data/expected_reannotated_groups_file
new file mode 100644
index 0000000..aa08715
--- /dev/null
+++ b/t/data/expected_reannotated_groups_file
@@ -0,0 +1,7 @@
+hly: 1_1	2_1	3_1
+argF: 1_3	3_3
+group_4: 2_4	3_4
+speH: 1_2	2_2
+yfnB: 3_5
+group_7: 2_7
+group_6: 1_6
diff --git a/t/data/expected_sample_weights_accessory_graph.dot b/t/data/expected_sample_weights_accessory_graph.dot
new file mode 100644
index 0000000..d88fd4f
--- /dev/null
+++ b/t/data/expected_sample_weights_accessory_graph.dot
@@ -0,0 +1,15 @@
+digraph g
+{
+
+  /* list of nodes */
+  "group_X";
+  "group_X";
+  "group_X";
+  "group_X";
+  "group_X";
+
+  /* list of edges */
+  "group_X" -> "group_X" [weight = "2"];
+  "group_X" -> "group_X" [weight = "2"];
+  "group_X" -> "group_X" [weight = "2"];
+}
diff --git a/t/data/expected_sample_weights_core_accessory_graph.dot b/t/data/expected_sample_weights_core_accessory_graph.dot
new file mode 100644
index 0000000..6f187a6
--- /dev/null
+++ b/t/data/expected_sample_weights_core_accessory_graph.dot
@@ -0,0 +1,22 @@
+digraph g
+{
+
+  /* list of nodes */
+  "group_A";
+  "group_B";
+  "group_C";
+  "group_D";
+  "group_E";
+  "group_F";
+  "group_G";
+  "group_H";
+
+  /* list of edges */
+  "group_A" -> "group_B" [weight = "2"];
+  "group_C" -> "group_B" [weight = "2"];
+  "group_C" -> "group_D" [weight = "2"];
+  "group_E" -> "group_D" [weight = "0.625"];
+  "group_E" -> "group_F" [weight = "0.625"];
+  "group_F" -> "group_G" [weight = "2"];
+  "group_G" -> "group_H" [weight = "2"];
+}
diff --git a/t/data/expected_set_difference_common_set b/t/data/expected_set_difference_common_set
new file mode 100644
index 0000000..c38f7c4
--- /dev/null
+++ b/t/data/expected_set_difference_common_set
@@ -0,0 +1,3 @@
+group_1: 1_1	2_1	3_1
+group_3: 1_3	3_3
+group_2: 1_2	2_2
diff --git a/t/data/expected_set_difference_common_set_statistics.csv b/t/data/expected_set_difference_common_set_statistics.csv
new file mode 100644
index 0000000..bcf3868
--- /dev/null
+++ b/t/data/expected_set_difference_common_set_statistics.csv
@@ -0,0 +1,4 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","query_1.fa.tmp.filtered.fa","query_2.fa.tmp.filtered.fa","query_3.fa.tmp.filtered.fa"
+"group_1","","","3","3","1","","","","","","","","","1_1","2_1","3_1"
+"group_2","","","2","2","1","","","","","","","","","1_2","2_2",""
+"group_3","","","2","2","1","","","","","","","","","1_3","","3_3"
diff --git a/t/data/expected_set_difference_unique_set_one b/t/data/expected_set_difference_unique_set_one
new file mode 100644
index 0000000..d72c802
--- /dev/null
+++ b/t/data/expected_set_difference_unique_set_one
@@ -0,0 +1 @@
+group_6: 1_6
diff --git a/t/data/expected_set_difference_unique_set_one_statistics.csv b/t/data/expected_set_difference_unique_set_one_statistics.csv
new file mode 100644
index 0000000..9dbe73c
--- /dev/null
+++ b/t/data/expected_set_difference_unique_set_one_statistics.csv
@@ -0,0 +1,2 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","query_1.fa.tmp.filtered.fa","query_2.fa.tmp.filtered.fa","query_3.fa.tmp.filtered.fa"
+"group_6","","","1","1","1","","","","","","","","","1_6","",""
diff --git a/t/data/expected_set_difference_unique_set_two b/t/data/expected_set_difference_unique_set_two
new file mode 100644
index 0000000..9b9fe13
--- /dev/null
+++ b/t/data/expected_set_difference_unique_set_two
@@ -0,0 +1,3 @@
+group_4: 2_4	3_4
+group_7: 2_7
+group_5: 3_5
diff --git a/t/data/expected_set_difference_unique_set_two_statistics.csv b/t/data/expected_set_difference_unique_set_two_statistics.csv
new file mode 100644
index 0000000..3e0c956
--- /dev/null
+++ b/t/data/expected_set_difference_unique_set_two_statistics.csv
@@ -0,0 +1,4 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","query_1.fa.tmp.filtered.fa","query_2.fa.tmp.filtered.fa","query_3.fa.tmp.filtered.fa"
+"group_4","","","2","2","1","","","","","","","","","","2_4","3_4"
+"group_5","","","1","1","1","","","","","","","","","","","3_5"
+"group_7","","","1","1","1","","","","","","","","","","2_7",""
diff --git a/t/data/expected_some_different_output b/t/data/expected_some_different_output
new file mode 100644
index 0000000..b2fe49b
--- /dev/null
+++ b/t/data/expected_some_different_output
@@ -0,0 +1,95 @@
+csgE: 11111_1#11_04109	22222_2#22_04109
+flgJ: 11111_1#11_04150	22222_2#22_04150
+hpaA: 11111_1#11_04075	22222_2#22_04075
+sopB: 11111_1#11_04059	22222_2#22_04059
+csgG: 11111_1#11_04107	22222_2#22_04107
+hpcB: 11111_1#11_04070	22222_2#22_04070
+ycdZ: 11111_1#11_04106	22222_2#22_04106
+uraH: 11111_1#11_04063	22222_2#22_04063
+yceJ: 11111_1#11_04126	22222_2#22_04126
+flgL: 11111_1#11_04152	22222_2#22_04152
+ybhO_2: 11111_1#11_04116	22222_2#22_04116
+msyB: 11111_1#11_04121	22222_2#22_04121
+hpaI: 11111_1#11_04073	22222_2#22_04073
+yedV: 11111_1#11_04061	22222_2#22_04061
+mdoC: 11111_1#11_04117	22222_2#22_04117
+nanE: 11111_1#11_04097	22222_2#22_04097
+csgF: 11111_1#11_04108	22222_2#22_04108
+sigE: 11111_1#11_04058	22222_2#22_04058
+flgE: 11111_1#11_04145	22222_2#22_04145
+hpcD: 11111_1#11_04071	22222_2#22_04071
+hpaX: 11111_1#11_04074	22222_2#22_04074
+mdtH: 11111_1#11_04134	22222_2#22_04134
+yccJ: 11111_1#11_04084	22222_2#22_04084
+yjhC: 11111_1#11_04101	22222_2#22_04101
+ghrA: 22222_2#22_04103	11111_1#11_04103
+csgB: 11111_1#11_04111	22222_2#22_04111
+wrbA: 11111_1#11_04085	22222_2#22_04085
+sglT: 11111_1#11_04096	22222_2#22_04096
+cbpM: 11111_1#11_04077	22222_2#22_04077
+flgI: 11111_1#11_04149	22222_2#22_04149
+hpaR: 11111_1#11_04066	22222_2#22_04066
+hpcC_1: 11111_1#11_04068	22222_2#22_04068
+copR: 11111_1#11_04062	22222_2#22_04062
+agp: 11111_1#11_04083	22222_2#22_04083
+hpaB: 11111_1#11_04065	22222_2#22_04065
+group_33: 11111_1#11_04114	22222_2#22_04114
+mdoH: 11111_1#11_04119	22222_2#22_04119
+flgD: 11111_1#11_04144	22222_2#22_04144
+cbpA: 11111_1#11_04078	22222_2#22_04078
+putA: 11111_1#11_04089	22222_2#22_04089
+hpaC: 11111_1#11_04064	22222_2#22_04064
+flgG: 11111_1#11_04147	22222_2#22_04147
+rutR: 11111_1#11_04087	22222_2#22_04087
+flgK: 11111_1#11_04151	22222_2#22_04151
+group_56: 11111_1#11_04088	22222_2#22_04088
+pepD_2: 11111_1#11_04060	22222_2#22_04060
+csgC: 11111_1#11_04113	22222_2#22_04113
+csgD: 11111_1#11_04110	22222_2#22_04110
+hpcG: 11111_1#11_04072	22222_2#22_04072
+ycdY: 11111_1#11_04105	22222_2#22_04105
+scsC: 11111_1#11_04081	22222_2#22_04081
+flgF: 11111_1#11_04146	22222_2#22_04146
+phoH: 11111_1#11_04092	22222_2#22_04092
+flgH: 11111_1#11_04148	22222_2#22_04148
+group_52: 11111_1#11_04091	22222_2#22_04091
+resA: 11111_1#11_04082	22222_2#22_04082
+yceI_2: 11111_1#11_04125	22222_2#22_04125
+bssS: 11111_1#11_04129	22222_2#22_04129
+yidK: 11111_1#11_04095	22222_2#22_04095
+group_47: 11111_1#11_04086	22222_2#22_04086
+mdoG: 11111_1#11_04118	22222_2#22_04118
+yiiy: 11111_1#11_04099	22222_2#22_04099
+solA: 11111_1#11_04128	22222_2#22_04128
+group_93: 11111_1#11_04120	22222_2#22_04120
+scsB: 11111_1#11_04080	22222_2#22_04080
+ymdB: 11111_1#11_04115	22222_2#22_04115
+ybbH_2: 11111_1#11_04093	22222_2#22_04093
+putP: 11111_1#11_04090	22222_2#22_04090
+hpaG: 11111_1#11_04067	22222_2#22_04067
+nanM: 11111_1#11_04098	22222_2#22_04098
+scsA: 11111_1#11_04079	22222_2#22_04079
+rnz: 11111_1#11_04076	22222_2#22_04076
+ycdX: 11111_1#11_04104	22222_2#22_04104
+nanT_3: 11111_1#11_04100	22222_2#22_04100
+hpcC_2: 11111_1#11_04069	22222_2#22_04069
+flgA: 11111_1#11_04141
+grxB: 11111_1#11_04133
+mviN: 11111_1#11_04138
+group_12: 11111_1#11_04055
+yceH: 11111_1#11_04136
+pipB2_2: 11111_1#11_04056
+flgC: 11111_1#11_04143
+yceE: 11111_1#11_04122
+htrB_2: 11111_1#11_04123
+mviM: 11111_1#11_04137
+csgA: 22222_2#22_04112
+flgN: 11111_1#11_04139
+rimJ: 11111_1#11_04135
+flgB: 11111_1#11_04142
+group_31: 22222_2#22_04141
+group_14: 11111_1#11_04112
+yceB: 11111_1#11_04132
+pyrC: 11111_1#11_04131
+flgM: 11111_1#11_04140
+group_17: 11111_1#11_04124
diff --git a/t/data/expected_summary_statistics.txt b/t/data/expected_summary_statistics.txt
new file mode 100644
index 0000000..cd3d36c
--- /dev/null
+++ b/t/data/expected_summary_statistics.txt
@@ -0,0 +1,5 @@
+Core genes	(99% <= strains <= 100%)	1
+Soft core genes	(95% <= strains < 99%)	1
+Shell genes	(15% <= strains < 95%)	24
+Cloud genes	(0% <= strains < 15%)	4
+Total genes	(0% <= strains <= 100%)	30
diff --git a/t/data/expected_uneven_sequences.fa b/t/data/expected_uneven_sequences.fa
new file mode 100644
index 0000000..c9f0e0c
--- /dev/null
+++ b/t/data/expected_uneven_sequences.fa
@@ -0,0 +1,18 @@
+>1
+aNN
+>2
+aaN
+>3
+aaa
+>4
+aaaaNN
+>5
+aaaaaN
+>6
+aaaaaa
+>7
+aaaaaaaNN
+>8
+aaaaaaaaN
+>9
+aaaaaaaaa
diff --git a/t/data/expected_union_of_groups.gg b/t/data/expected_union_of_groups.gg
new file mode 100644
index 0000000..b6c4dd7
--- /dev/null
+++ b/t/data/expected_union_of_groups.gg
@@ -0,0 +1,7 @@
+group_1: 1_1	2_1	3_1
+group_3: 1_3	3_3
+group_4: 2_4	3_4
+group_2: 1_2	2_2
+group_7: 2_7
+group_5: 3_5
+group_6: 1_6
diff --git a/t/data/genbank_gbff/genbank1.gff b/t/data/genbank_gbff/genbank1.gff
new file mode 100644
index 0000000..d89711b
--- /dev/null
+++ b/t/data/genbank_gbff/genbank1.gff
@@ -0,0 +1,195 @@
+##gff-version 3
+##sequence-region CVAX01000001 1 489098
+# conversion-by bp_genbank2gff3.pl
+# organism Salmonella enterica subsp. enterica serovar Typhi
+# Note Salmonella enterica subsp. enterica serovar Typhi genome assembly 10426_1#28, scaffold ERS325254SCcontig000001, whole genome shotgun sequence.
+# date 02-APR-2015
+CVAX01000001	GenBank	region	1	489098	.	+	1	ID=CVAX01000001;Dbxref=BioProject:PRJEB3215,taxon:90370;Name=CVAX01000001;Note=Salmonella enterica subsp. enterica serovar Typhi genome assembly 10426_1#28%2C scaffold ERS325254SCcontig000001%2C whole genome shotgun sequence.;collection_date=1994;country=Viet Nam;date=02-APR-2015;isolation_source=Not known;mol_type=genomic DNA;organism=Salmonella enterica subsp. enterica serovar Typhi;serovar=H58;strain=dtc71
+CVAX01000001	GenBank	gene	358	1212	.	-	1	ID=ERS325254_00002;Name=kdsA;locus_tag=ERS325254_00002
+CVAX01000001	GenBank	mRNA	358	1212	.	-	1	ID=ERS325254_00002.t01;Parent=ERS325254_00002
+CVAX01000001	GenBank	CDS	358	1212	.	-	1	ID=ERS325254_00002.p01;Parent=ERS325254_00002.t01;Dbxref=GI:804224942;eC_number=2.5.1.55;Name=kdsA;codon_start=1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004730486.1;locus_tag=ERS325254_00002;product=2-dehydro-3-deoxyphosphooctonate aldolase;protein_id=CQU24961.1;transl_table=11;translation=length.284
+CVAX01000001	GenBank	exon	358	1212	.	-	1	Parent=ERS325254_00002.t01
+CVAX01000001	GenBank	gene	1250	2059	.	-	1	ID=ERS325254_00003;Name=ERS325254_00003
+CVAX01000001	GenBank	mRNA	1250	2059	.	-	1	ID=ERS325254_00003.t01;Parent=ERS325254_00003
+CVAX01000001	GenBank	CDS	1250	2059	.	-	1	ID=ERS325254_00003.p01;Parent=ERS325254_00003.t01;Dbxref=GI:804224943;Name=ERS325254_00003;codon_start=1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004730487.1;product=regulator;protein_id=CQU25012.1;transl_table=11;translation=length.269
+CVAX01000001	GenBank	exon	1250	2059	.	-	1	Parent=ERS325254_00003.t01
+CVAX01000001	GenBank	gene	2063	2452	.	-	1	ID=ERS325254_00004;Name=sirB2;locus_tag=ERS325254_00004
+CVAX01000001	GenBank	mRNA	2063	2452	.	-	1	ID=ERS325254_00004.t01;Parent=ERS325254_00004
+CVAX01000001	GenBank	CDS	2063	2452	.	-	1	ID=ERS325254_00004.p01;Parent=ERS325254_00004.t01;Dbxref=GI:804224944;Name=sirB2;codon_start=1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004730488.1;locus_tag=ERS325254_00004;product=regulator;protein_id=CQU25030.1;transl_table=11;translation=length.129
+CVAX01000001	GenBank	exon	2063	2452	.	-	1	Parent=ERS325254_00004.t01
+CVAX01000001	GenBank	gene	2449	3282	.	-	1	ID=ERS325254_00005;Name=prmC;locus_tag=ERS325254_00005
+CVAX01000001	GenBank	mRNA	2449	3282	.	-	1	ID=ERS325254_00005.t01;Parent=ERS325254_00005
+CVAX01000001	GenBank	CDS	2449	3282	.	-	1	ID=ERS325254_00005.p01;Parent=ERS325254_00005.t01;Dbxref=GI:804224945;eC_number=2.1.1.-;Name=prmC;codon_start=1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570225.1;locus_tag=ERS325254_00005;product=N5-glutamine S-adenosyl-L-methionine-dependent methyltransferase;protein_id=CQU25049.1;transl_table=11;translation=length.277
+CVAX01000001	GenBank	exon	2449	3282	.	-	1	Parent=ERS325254_00005.t01
+CVAX01000001	GenBank	gene	3282	4364	.	-	1	ID=ERS325254_00006;Name=prfA;locus_tag=ERS325254_00006
+CVAX01000001	GenBank	mRNA	3282	4364	.	-	1	ID=ERS325254_00006.t01;Parent=ERS325254_00006
+CVAX01000001	GenBank	CDS	3282	4364	.	-	1	ID=ERS325254_00006.p01;Parent=ERS325254_00006.t01;Dbxref=GI:804224946;Name=prfA;codon_start=1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004730490.1;locus_tag=ERS325254_00006;product=peptide chain release factor 1 (RF-1);protein_id=CQU25068.1;transl_table=11;translation=length.360
+CVAX01000001	GenBank	exon	3282	4364	.	-	1	Parent=ERS325254_00006.t01
+CVAX01000001	GenBank	gene	4405	5661	.	-	1	ID=ERS325254_00007;Name=hemA;locus_tag=ERS325254_00007
+CVAX01000001	GenBank	mRNA	4405	5661	.	-	1	ID=ERS325254_00007.t01;Parent=ERS325254_00007
+CVAX01000001	GenBank	CDS	4405	5661	.	-	1	ID=ERS325254_00007.p01;Parent=ERS325254_00007.t01;Dbxref=GI:804224947;eC_number=1.2.1.70;Name=hemA;codon_start=1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004730491.1;locus_tag=ERS325254_00007;product=glutamyl-tRNA reductase;protein_id=CQU25084.1;transl_table=11;translation=length.418
+CVAX01000001	GenBank	exon	4405	5661	.	-	1	Parent=ERS325254_00007.t01
+CVAX01000001	GenBank	gene	5975	6598	.	+	1	ID=ERS325254_00008;Name=hemM;locus_tag=ERS325254_00008
+CVAX01000001	GenBank	mRNA	5975	6598	.	+	1	ID=ERS325254_00008.t01;Parent=ERS325254_00008
+CVAX01000001	GenBank	CDS	5975	6598	.	+	1	ID=ERS325254_00008.p01;Parent=ERS325254_00008.t01;Dbxref=GI:804224948;Name=hemM;codon_start=1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004730492.1;locus_tag=ERS325254_00008;product=outer membrane lipoprotein;protein_id=CQU25103.1;transl_table=11;translation=length.207
+CVAX01000001	GenBank	exon	5975	6598	.	+	1	Parent=ERS325254_00008.t01
+CVAX01000001	GenBank	gene	6595	7446	.	+	1	ID=ERS325254_00009;Name=ipk;locus_tag=ERS325254_00009
+CVAX01000001	GenBank	mRNA	6595	7446	.	+	1	ID=ERS325254_00009.t01;Parent=ERS325254_00009
+CVAX01000001	GenBank	CDS	6595	7446	.	+	1	ID=ERS325254_00009.p01;Parent=ERS325254_00009.t01;Dbxref=GI:804224949;eC_number=2.7.1.-,2.7.1.148;Name=ipk;codon_start=1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004730493.1;locus_tag=ERS325254_00009;product=isopentenyl monophosphate kinase;protein_id=CQU25122.1;transl_table=11;translation=length.283
+CVAX01000001	GenBank	exon	6595	7446	.	+	1	Parent=ERS325254_00009.t01
+##FASTA
+>CVAX01000001
+GCTTCCAGAATTCGTACGTTTTTCCATTCACACAATTTTCTTAATATGCTGCCTACTGCC
+CTACGCTTCTCTCCATAGAACGCTTGTCTTCGGTATTTGGGCGCGAAAACTATGTGATAT
+TTACAGTTCCATCGGGTGTGCGCTAAGCTCTTTTCGTCCCCCATTGGGACCCCCTTTTGA
+TTTCTTGTTGAACTTTTGCAGTCGCCAGACCGCAAGATGTTTTAACAAATCAAAAGGGGT
+TTTAATAACTGGCTTAAAGCTGAAAGCTTTCCGGAACCCCCAGCCTAGCTGGGGGTTTTC
+CATAGACAAAAAAGCCGGATAGTATCTTATCCGGCCTACACAAGCTAAAAACCGCAATCA
+GTTCTCGGTATCCAGCTCGTCGAAGCTTTTCACCAAATCATCAATCGCTTTTATCTGGGT
+CAGGAACTGCTCCAGTTTCGCCAGCGGCAGCGCGGAAGGACCGTCGCACTTCGCGTTAGC
+CGGGTCCGGATGCGACTCCAGGAACAGGCCCGCCAGACCGACAGCCATACCGGCGCGCGC
+CAGCTCGGTCACCTGACCACGACGACCGCCCGAGGCAGCGCCAAATGGGTCGCGGCATTG
+CAGCGCATGGGTAACGTCGAAAATCACCGGCGAGTTGCCGGAGACCTTTTTCATCACGCT
+AAAGCCCAGCATATCCACCACCAGGTTGTCATAACCGAAGTTCGCGCCGCGATCGCACAG
+AATCACCTTATCGTTACCGCCCTCATGGAACTTATCCACGATATTGCCCATCTGACCCGG
+GCTTACAAACTGCGGCTTTTTCACGTTAATCACAGCGCCGGTTTTCGCCATCGCTTCCAC
+CAGGTCGGTCTGGCGCGCCAAAAACGCCGGGAGCTGAATCACGTCAACCACATCAGCCAC
+AGGCTGCGCCTGGCTGGCTTCATGGACGTCAGTGATCACTTTTACGCCAAATGTCTGTTT
+GAGTTCCTGGAAGATTTTCATCCCTTCTTCCAGACCCGGCCCACGGTAAGAGTGAATAGA
+GGAACGGTTGGCTTTATCAAAAGAGGCCTTGAACACGTAAGGAATACCCAGCTTCTGGGT
+AACGGTTACGTAGTGCTCACAAATGCGCATTGCCAGATCGCGTGACTCCAGCACGTTCAT
+ACCGCCAAACAGCACAAACGGCAGGTCATTTGCCACCTTAATGTCGCCAATGTTAACCAC
+TTTTTGTTTCATAGGATCGCCTTACACTTATAGGTAAAATGTCGGTTAATTAATGTAGAA
+CAATTTGCTTGTGCGCAATGGTGTTGATCTGCGCACGAATCATTTCGCTGATCGGGTCTT
+CCGGACACTGTTCAACGAAGTAGCTTAAATCAGTCAGCGCAACGTGTTCACACTCAAGCT
+GCGCGTAGATCAACCCGCGGTCACGGATTTCGTAAGGATCTTCCGGGTTAAATTGCAACA
+ATGCTTCGCTAACGCGCAGCGCCAGCTCCATTTGCCGTTCCTCCATCAGCGAGGATTTCA
+GCGTATCCAGTAGCTTACGAATCACTTCGGCGTTATCCGCTTCGTCCAAATCTTCGTTAA
+ACAACTCGGCGACCGGACTAATATTGCCTTTTAACCAGACTTCGAGGGTATGTTCATCAA
+GCGTTTCACCGTTGAACGGGTTAATCAGCCACATTTCTCCTTCCAGCGATTCAATACGCA
+AAATCAGCTGCGTTGGGAAGATAACGGGCACCAACGGCAAATCAAGACGGTTTGCTATCC
+ATAATAAAATAGCGCCCAACGATACAGCGCTCCCCTGGCGTTTTTTCAGGACTTTATCGA
+GCCATAAAGCATCAGAAAGACGGTAGACGCCGCGGGAATCCGTGAACCCCCACTCGCCGT
+AAAAAAGTGCCAGCAATTTTTCCAGTTGCTCATCCTGAGACAGGAGCTGGCTAATTTCTT
+CCTGCGCCAGACTGACCAGACGTTCCAGTTCGTCGTAGACAAACTGCGTGGGAAAATCCA
+GGCGAATCATCTCTGATGCCAGGATCATGCCATCACACAGCGGCGCATTGTTAAATTCGA
+AATCAGCTAACGACCTCATGACTTACCCCAGTAACGGTATTTTTGTGGTGGCGAGTTTAA
+TGATGATGTACAGCACCACCAGCGCCAGCGGGAAAGCGATAAAACGCGCCTGCTGGCTGC
+GCGCCTGACGATAATCAAGCGCAATAAAACCCAAAACGATGTAGATGATAACGCCAAACA
+GTTTTTCAGTCAGCCATGAGCCCGACTCCGTGAATGGCAGGATGTGCGTTTTAACGATTA
+ACCCAATACCGCTAAGCAGTAACAGGGTATCTATGACGGGAGGCACGATTCGTGTCCAGC
+GCGCGGCGGCCAACGCATGGCCGCAATAGCGCCACCAGTAACGAGCGACAAACAAACTGA
+CGGAGAGCGCAACACAAATAAGATGAAGCGTGAGCAACATTGCAATTGTCATGGCGTAAA
+ACGCCCGCAGGCCACGCGTTCATTGCCACCATAATCACGACACGTTTCTACATCCGTGTA
+ACCAGACCGCCTGAAAGCCGCTCTTACCGCTTCGCCCTGCCGCCAACCGTGTTCCAGCAG
+CAGGAAGCCGCCGGGCGTTAACATCTGGCGGGCATTATCGATAATATGCGTAAGATCCGC
+CATACCGTTTTCGTCCGCCACCAACGCTGAGCGCGGTTCAAAGCGTACATCGCCTTCGCT
+AAGGTGCGGGTCCTGCGCGTCAATGTAGGGCGGATTGCTGACAATCATGTCGAACTGCTG
+CCCCGATAGTGCGCTGAACCAACAACTTTGCAGTATACGCACATTCCGGATAGCCAAATG
+TTCCGCGTTGCGTATCGCCAGCGCAACCGCATCAGGCATACGATCGACCGCAGTCACCTC
+GCAATCCGGTCGCTCGCAGGCCAGCGCCAGCGCAATCGCGCCGGTTCCGGTGCCTAAATC
+CAGAATACGACAGGTTTTGACCGGCAATCGCGCCAACGCCTGTTCAACCAGACATTCGGT
+ATCCGGGCGCGGGATTAGCGTGGCGGGAGAAACGAAGAGCGGCAGCGACCAAAATTCGCG
+TAAGCCCGTCAGGTACGCAATGGGTTCGCCCTGTTTACGCCGCTGCAGCAGCTCCGCGAG
+CTGTTGTTGCTGGACATCGGTAAGCGGCGTTTCGCCAAAGGCCATGATATACGTCCGCCT
+CTTGCCCGTAACGTACTCAAGCAGGATCTCGGCGTCGCGCCGGGGGCTGTCGCTGTCCCG
+GAGCTGGTTTACCGCCTCATGTAGCCAGTGCTGAAAATCCATTATTCCTGCTCGGACAAC
+GCGGCTAACAGGTCAGCCTGGTGTTCCTGAACAATCGGCTCAATCAGCATATCCAGCTTA
+CCTTCCATCGTTTCATCAAGGCGATATAACGTCAGATTAATACGATGATCGGTCACGCGC
+CCCTGCGGGAAATTATAGGTCCGGTTACGATCGCTGCGATCGCCGCTGCCAAGCAGGTTG
+CGGCGCGTTGACGCCTCGGCCTGCTGGCGTTTTGCCGTTTCGGCGGCGTGAATGCGCGCC
+CCGAGCACCGAGAGCGCTTTCGCTTTGTTTTTATGCTGCGAACGCTCGTCCTGGCATTCC
+ACCACGATGCCGGTCGGCAAGTGGGTAATACGGATAGCGGAGTCGGTGGTGTTAACGTGC
+TGACCGCCCGCGCCGGAAGAACGAAACGTATCAATGCGCAGATCCGCCGGGTTAATATCC
+GGCAGCTCGGCTTCCGGCAGCTCCGGCATCACGGCGACGGTACAGGCGGAGGTATGGATA
+CGCCCCTGCGACTCGGTCGCCGGAACACGCTGTACGCGGTGTCCGCCGGACTCAAATTTC
+AGTCGGCCATACACGCCGTCGCCGCTGATTTTGGCGATGATCTCTTTATAACCGCCATGC
+TCGCCTTCGCTCATGCTCATGATCTCCACGCGCCAGCGGCGCGCTTCGGCATAACGGCTG
+TACATGCGGAACAGATCGCCGGCAAACAGCGCGGCTTCGTCGCCGCCAGTGCCGGCGCGA
+ACCTCAAGGAACGCGTTTCGTTCATCGTCCGGATCTTTCGGCAGCAGCAGTACCTGTAAC
+TGTTGCTCCAGTTGTTCGCTTTTTTCTTTCGCTTCGCGCAGTTCTTCCTGCGCCATTTCC
+CGCATTTCAGGATCGTCGAGCATCATCTGAGCCGTCTCGATATCGTCCTGAACCTGTTGC
+CAGTCCGTAAAACAGCGAGAAACGTCGCTTAATTGCGCATATTCGCGCGACAATGCGCGA
+AAGCGGTCCTGGTCGGCGATAATTCCCGCATCGCCCAGCAACGCCTGAACTTCCTCATGG
+CGTTCGTGCAGGGCTTCCAGTTTGGCAACGATAGAAGGCTTCATAGGCGTAAATTCACCC
+TGTAAAAAAGTGTGGTGTGTGCTGCTACTCCAGCCCGAGGCTGTCGCGCAGAATATTCAG
+GCGTTCGTCATCCCCGTCACGGGCAGCCTGTTGAAGTGATTTCGTTGGCGCATGAATCAG
+GCGGTTGGTCAGTTTCCATGCCAGATCCTGCAAGATGGCTTGCGCATCACCGCCCTGTTG
+AAGGGCCGACAGCGCTTTGGTAGTCAGTTCGTCACGAATCTGCTCCGACTGACTACGGTA
+TTCCCGAATGGTCTCGCTGGCCCCCTGGGCGCGTAGCCAGGCCATAAACTCGCTGGCTTC
+CTGCTCAACAATCGTTTCCGCTTCCACCGCCGCAGCCTGACGCTGCGCCAGATTATGCGA
+AATGATGCTCTGTAAATCATCGACGCTATAAAGATAAGCGTTCGCCAGTTTGCCGACTTC
+CGGCTCAACGTCGCGCGGTACGGCGATATCCACCAGCAGCATCGGCTGGTTGCGACGGCT
+TTTTAATGCGCGCTCCACCATGCCTTTACCGATAATCGGCAGCGGGCTGGCGGTCGAACT
+GATGATAATATCGGCATCCTGCAAGCGGGCGTCGATATCGCTGAGCGAGATAACCTCAGC
+GCCAACCTCATCCGCCAGGGCTTGCGCGCGCTCGCGGGTTCGGTTGGCGATAATCATCTT
+TTGTACTTTATGCTCGCGCAGGTGACGCGCCACCAGTTCAATAGTTTCGCCCGCGCCAAC
+TAACAGTACAGTGACCGTCGAGAGCGATTCAAAGATTTGGCGGGCGAGCGTACAGGCGGC
+AAACGCGACGGAGACGGCGCTGGCGCCGATATCGGTTTCAGTCCGCACTCGCTTGGCGAC
+GGAAAAAGACTTCTGAAACATTCGCTCCAGCGCGCTGGCGTTAAGGTGGCCTTTTTGCGA
+ATCCGCAAACGCTTTTTTCACCTGACCGAGGATTTGCGGTTCGCCCAGCACCAGTGAATC
+CAGACCGCTGGCGACGCGCATCAGGTGGCTGACGGCGTCATTGTCCTGATGCCAGTACAG
+ACTGTTGCGCAGATCGTCCTCGTTCAGGTTATGGTAATCGCATAACCAGCGGATCAGCGC
+TTCTTGCAGGTTATCCTGCTCTTCCACGCTCAAATACAGCTCTGTACGGTTACAGGTTGA
+CAGCACGACCCCGCCCTGCACCATTGGCTGCGCAAGCAGGCTGTCCAGCGCCTGATCAAG
+CGTGTCCGGCGAAAACGTTACGCGTTCTCGCAGCGATACAGGTGCCGTTTTATGGTTAAT
+ACCGAGTGCTAAAAGGGTCATGTCTGCGGGAGTAGTACCAGCGTTGATATGGTTAGTCTG
+CTTGCATCATACAGGATGCGCGTGGTCAATAAAAGAGAGAGCCCCCTTTTGGAGTAATTG
+GCAGCGCTCGCTAATTTGATGATTTAAGACAACTTGAAAGTAGACGATGTCACCAGGCGG
+CGCTAGCATTAAAGGCTATAACTGTAACCGATAGCAAAAATTTGCCGAATCGCGGCGCGA
+ACGTTTTTACAGCTTGCCGGTTCAGTTTTCAGGCCTGATAAGCGTAACGCCTTCAGGCAA
+TTATTGCACCGTATCACAAGGATTCGTCATCACTATGACCCTGCCCGATTTTCGCCTGAT
+TCGTCTACTGCCGTTAGCAAGCCTGGTTCTCACCGCCTGTACGCTTCCTGGGCATAAAGG
+CCCGGGCAAGAGCCCGGATTCCCCTCAGTGGCGCCAGCATCAGCAAGAGGTGCGTCATCT
+GAATCAATACCAGACGCGCGGCGCCTTTGCTTACATCTCAGATGATCAGAAAGTCTATGC
+GCGCTTCTTCTGGCAACAGACCGGACAGGATCGCTACCGCCTGCTGCTTACCAACCCGCT
+GGGCAGCACCGAGCTGGAGCTTAACGCCCAGCCGGGCAACGTCCAGTTGGTGGATAACAA
+AGGCCAGCGTTATACCGCCGATGACGCCGAAGAGATGATCGGCAAACTCACCGGGATGCC
+GATTCCGTTAAACAGCCTGCGGCAGTGGATCCTCGGTCTGCCTGGCGATGCCACCGACTA
+CAAACTGGACGACCAGTACCGCCTGAGCGAAGTGAACTACCACCAGGATGGTAAAAACTG
+GAAAGTGGTTTACGGTGGCTATGACAGCAAAACGCAGCCTGCCATGCCAGCCAATATGGA
+GCTTTCAGATGGCAGTCAGCGCATTAAGCTGAAAATGGATAACTGGATTGTGAAATGATG
+ACCCATTGGCCTTCTCCGGCAAAATTAAATCTGTTTTTATATATCACCGGACAGCGTGCA
+GACGGCTACCACACGCTGCAGACGCTGTTTCAGTTTCTGGATTATGGCGACACACTCCAC
+ATCGAACCGCGTCACGATGGCGAAATCCATTTATTAACGCCGGTAACCGGCGTTGAAAAT
+GAAGACAATCTGATCGTCCGCGCCGCGCGGCTGTTGATGAAAGTCGCCTCGGAGAGTGGA
+CGCCTGCCCGCCGGAAGCGGCGCGGATATCAGCATTGAGAAGCGCCTTCCCATGGGCGGC
+GGTCTGGGTGGCGGCTCGTCTAACGCCGCGACCGTTCTGGTGGCGCTCAATCATCTTTGG
+CAATGCGGGCTTTCCATTGATGAACTGGCGACGCTCGGCCTGACGCTCGGCGCCGACGTC
+CCGGTCTTTGTTCGTGGCCACGCCGCGTTTGCCGAAGGCGTAGGCGAAATATTAACGCCG
+GTGAATCCGCCGGAAAAATGGTATCTGGTCGCGCACCCTGGCGTAAGCATTCCGACGCCA
+GTTATCTTTAAAGATCCTCAATTGCCGCGTAATACGCCAAAAAGGTCAATAGATACGTTA
+CTAAAATGTGAATTCAGCAATGATTGCGAGGTTATCGCAAGAAAACGTTTTCGCGAGGTT
+GATGCGGCGCTTTCCTGGCTGTTAGAATACGCGCCGTCGCGCCTGACTGGGACAGGGGCC
+TGTGTCTTTGCTGAATTCGATACAGAGTCTTGTGCTCGCCAGGTGCTTGAGCAAGCCCCG
+GAATGGCTCAATGCTTTTGTGGCGAAGGGTGTTAACCTTTCCCCATTGCATCGAGAGTTA
+CTCTAACGCATTCGGGTTTCAGGAAGATGGCGAAATGACGAGTCGCCGGTCATATATGCC
+GTATTTGACCGGTGTGAGGCATTGATACCAGCGCACATGAAACGTGAATGAAGACGAGTA
+AGCCGGGCAAGCTGAGCTTCGGTGACAACGTCACCTTGTTCCAGACGTTGCATCGCGCTC
+TTTAATACACCGCCTGGATAGGATTTTGCCTGGCCCGCACAGTTTTCGGCAGATTCTTTC
+CACCAATGGACGCATGCCTGAGGTTCTTCTCGTGCCTGATATGAAGCTTTTTGCTGGTAA
+CGCCACCCCGGAACTAGCACAACGTATTGCCAACCGCCTGTACACTTCTCTCGGCGACGC
+CGCCGTAGGTCGCTTTAGCGACGGCGAAGTCAGCGTACAAATCAACGAAAATGTACGCGG
+TGGTGATATTTTCATCATCCAGTCCACTTGTGCCCCAACCAACGACAACCTGATGGAATT
+GGTCGTTATGGTTGATGCCCTGCGTCGTGCTTCCGCAGGTCGTATCACCGCCGTTATCCC
+CTACTTTGGCTATGCACGTCAGGACCGTCGCGTACGTTCCGCCCGTGTGCCGATTACCGC
+AAAAGTTGTCGCTGACTTCCTGTCCAGCGTCGGCGTTGACCGCGTTCTCACCGTAGATCT
+GCATGCTGAACAGATCCAGGGCTTCTTTGACGTTCCGGTTGATAACGTGTTCGGTAGCCC
+AATCCTGCTCGAAGATATGCTGCAACTGAATCTGGATAACCCAATCGTGGTTTCCCCGGA
+TATTGGCGGCGTGGTTCGTGCCCGCGCTATCGCTAAGCTGCTGAACGATACCGATATGGC
+TATCATTGATAAACGTCGTCCGCGCGCGAACGTTTCTCAGGTGATGCACATCATCGGCGA
+CGTCGCTGGCCGTGACTGCGTGCTGGTTGATGATATGATCGATACCGGCGGTACTCTGTG
+CAAAGCAGCAGAAGCATTGAAAGAACGTGGCGCTAAACGCGTGTTTGCCTACGCGACGCA
+CCCGATCTTCTCAGGCAATGCGGCAAACAACCTGCGCAACTCCGTTATTGACGAAGTCGT
+TGTCTGTGACACCATTCCGCTGACCGACGAAATCAAAGCGCTGCCGAACGTGCGTACTTT
+GACCCTGTCAGGTATGCTGGCCGAAGCGATTCGCCGTATCAGCAACGAAGAATCGATTTC
+CGCCATGTTCGAGCATTGATCGAACCCGGATCTGAAACCCGCTGCGGCGGGTTTTTTTGT
+CTGTAACACCCTTTTATATGACTTATGCCTCCTTCACCTGCCATTTAGTTGACAGATGAT
+GCGCTCATGGATGAAACATTATTGTGAACAAATTATTTTCCTCACATGTGATGCCTTTCC
+GCGCTCTCATCGATGCTTGCTGGAAAGAAAAATATACCGCCTCCCGGTTCACCCGTGATG
+TGATAGCCGGGATCACCGTCGGGATTATTGCTATCCCGCTGGCGATGGCGCTGGCAATTG
+GCAGTGGCGTTGCGCCGCAGTATGGCCTCTATACCTCCGCTGTCGCCGGGATCGTGATCG
+CGCTAACCGGCGGCTCGCGCTTTAGCGTTTCCGGCCCTACCGCCGCGTTTGTGGTGATTT
+TGTATCCGGTGTCGCAACAGTTTGGTCTGGCGGGCCTACTGGTCGCCACGCTGATGTCGG
+GCTTCTTCCTGATCCTTTTCGGCCTGGCGAGACTGGGGCGATTGATTGAATATATCCCGG
+TGTCGGTCACGTTGGGTTTTACCTCAGGGATTGGTATTACCATCGGTACCATGCAGATTA
\ No newline at end of file
diff --git a/t/data/genbank_gbff/genbank1.gff.proteome.faa.expected b/t/data/genbank_gbff/genbank1.gff.proteome.faa.expected
new file mode 100644
index 0000000..20955ec
--- /dev/null
+++ b/t/data/genbank_gbff/genbank1.gff.proteome.faa.expected
@@ -0,0 +1,49 @@
+>ERS325254_00002.p01
+MKQKVVNIGDIKVANDLPFVLFGGMNVLESRDLAMRICEHYVTVTQKLGIPYVFKASFDK
+ANRSSIHSYRGPGLEEGMKIFQELKQTFGVKVITDVHEASQAQPVADVVDVIQLPAFLAR
+QTDLVEAMAKTGAVINVKKPQFVSPGQMGNIVDKFHEGGNDKVILCDRGANFGYDNLVVD
+MLGFSVMKKVSGNSPVIFDVTHALQCRDPFGAASGGRRGQVTELARAGMAVGLAGLFLES
+HPDPANAKCDGPSALPLAKLEQFLTQIKAIDDLVKSFDELDTEN*
+>ERS325254_00003.p01
+MRSLADFEFNNAPLCDGMILASEMIRLDFPTQFVYDELERLVSLAQEEISQLLSQDEQLE
+KLLALFYGEWGFTDSRGVYRLSDALWLDKVLKKRQGSAVSLGAILLWIANRLDLPLVPVI
+FPTQLILRIESLEGEMWLINPFNGETLDEHTLEVWLKGNISPVAELFNEDLDEADNAEVI
+RKLLDTLKSSLMEERQMELALRVSEALLQFNPEDPYEIRDRGLIYAQLECEHVALTDLSY
+FVEQCPEDPISEMIRAQINTIAHKQIVLH*
+>ERS325254_00004.p01
+MTIAMLLTLHLICVALSVSLFVARYWWRYCGHALAAARWTRIVPPVIDTLLLLSGIGLIV
+KTHILPFTESGSWLTEKLFGVIIYIVLGFIALDYRQARSQQARFIAFPLALVVLYIIIKL
+ATTKIPLLG*
+>ERS325254_00005.p01
+MDFQHWLHEAVNQLRDSDSPRRDAEILLEYVTGKRRTYIMAFGETPLTDVQQQQLAELLQ
+RRKQGEPIAYLTGLREFWSLPLFVSPATLIPRPDTECLVEQALARLPVKTCRILDLGTGT
+GAIALALACERPDCEVTAVDRMPDAVALAIRNAEHLAIRNVRILQSCWFSALSGQQFDMI
+VSNPPYIDAQDPHLSEGDVRFEPRSALVADENGMADLTHIIDNARQMLTPGGFLLLEHGW
+RQGEAVRAAFRRSGYTDVETCRDYGGNERVACGRFTP*
+>ERS325254_00006.p01
+MKPSIVAKLEALHERHEEVQALLGDAGIIADQDRFRALSREYAQLSDVSRCFTDWQQVQD
+DIETAQMMLDDPEMREMAQEELREAKEKSEQLEQQLQVLLLPKDPDDERNAFLEVRAGTG
+GDEAALFAGDLFRMYSRYAEARRWRVEIMSMSEGEHGGYKEIIAKISGDGVYGRLKFESG
+GHRVQRVPATESQGRIHTSACTVAVMPELPEAELPDINPADLRIDTFRSSGAGGQHVNTT
+DSAIRITHLPTGIVVECQDERSQHKNKAKALSVLGARIHAAETAKRQQAEASTRRNLLGS
+GDRSDRNRTYNFPQGRVTDHRINLTLYRLDETMEGKLDMLIEPIVQEHQADLLAALSEQE
+*
+>ERS325254_00007.p01
+MTLLALGINHKTAPVSLRERVTFSPDTLDQALDSLLAQPMVQGGVVLSTCNRTELYLSVE
+EQDNLQEALIRWLCDYHNLNEDDLRNSLYWHQDNDAVSHLMRVASGLDSLVLGEPQILGQ
+VKKAFADSQKGHLNASALERMFQKSFSVAKRVRTETDIGASAVSVAFAACTLARQIFESL
+STVTVLLVGAGETIELVARHLREHKVQKMIIANRTRERAQALADEVGAEVISLSDIDARL
+QDADIIISSTASPLPIIGKGMVERALKSRRNQPMLLVDIAVPRDVEPEVGKLANAYLYSV
+DDLQSIISHNLAQRQAAAVEAETIVEQEASEFMAWLRAQGASETIREYRSQSEQIRDELT
+TKALSALQQGGDAQAILQDLAWKLTNRLIHAPTKSLQQAARDGDDERLNILRDSLGLE*
+>ERS325254_00008.p01
+MTLPDFRLIRLLPLASLVLTACTLPGHKGPGKSPDSPQWRQHQQEVRHLNQYQTRGAFAY
+ISDDQKVYARFFWQQTGQDRYRLLLTNPLGSTELELNAQPGNVQLVDNKGQRYTADDAEE
+MIGKLTGMPIPLNSLRQWILGLPGDATDYKLDDQYRLSEVNYHQDGKNWKVVYGGYDSKT
+QPAMPANMELSDGSQRIKLKMDNWIVK*
+>ERS325254_00009.p01
+MMTHWPSPAKLNLFLYITGQRADGYHTLQTLFQFLDYGDTLHIEPRHDGEIHLLTPVTGV
+ENEDNLIVRAARLLMKVASESGRLPAGSGADISIEKRLPMGGGLGGGSSNAATVLVALNH
+LWQCGLSIDELATLGLTLGADVPVFVRGHAAFAEGVGEILTPVNPPEKWYLVAHPGVSIP
+TPVIFKDPQLPRNTPKRSIDTLLKCEFSNDCEVIARKRFREVDAALSWLLEYAPSRLTGT
+GACVFAEFDTESCARQVLEQAPEWLNAFVAKGVNLSPLHRELL*
diff --git a/t/data/genbank_gbff/genbank2.gff b/t/data/genbank_gbff/genbank2.gff
new file mode 100644
index 0000000..2904c83
--- /dev/null
+++ b/t/data/genbank_gbff/genbank2.gff
@@ -0,0 +1,282 @@
+##gff-version 3
+##sequence-region CVBR01000001 1 488985
+# conversion-by bp_genbank2gff3.pl
+# organism Salmonella enterica subsp. enterica serovar Typhi
+# Note Salmonella enterica subsp. enterica serovar Typhi genome assembly 10426_1#57, scaffold ERS325340SCcontig000001, whole genome shotgun sequence.
+# date 02-APR-2015
+CVBR01000001	GenBank	region	1	488985	.	+	1	ID=CVBR01000001;Dbxref=BioProject:PRJEB3215,taxon:90370;Name=CVBR01000001;Note=Salmonella enterica subsp. enterica serovar Typhi genome assembly 10426_1#57%2C scaffold ERS325340SCcontig000001%2C whole genome shotgun sequence.;collection_date=1994;country=Viet Nam;date=02-APR-2015;isolation_source=Not known;mol_type=genomic DNA;organism=Salmonella enterica subsp. enterica serovar Typhi;serovar=H58;strain=ct1-65
+CVBR01000001	GenBank	gene	79	726	.	-	1	ID=ERS325340_00001;Name=narL_1;locus_tag=ERS325340_00001
+CVBR01000001	GenBank	mRNA	79	726	.	-	1	ID=ERS325340_00001.t01;Parent=ERS325340_00001
+CVBR01000001	GenBank	CDS	79	726	.	-	1	ID=ERS325340_00001.p01;Parent=ERS325340_00001.t01;Dbxref=GI:804333227;Name=narL_1;codon_start=1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002147204.1;locus_tag=ERS325340_00001;product=transcriptional regulator NarP;protein_id=CQU31315.1;transl_table=11;translation=length.215
+CVBR01000001	GenBank	exon	79	726	.	-	1	Parent=ERS325340_00001.t01
+CVBR01000001	GenBank	gene	2716	3507	.	+	1	ID=ERS325340_00003;Name=ERS325340_00003
+CVBR01000001	GenBank	mRNA	2716	3507	.	+	1	ID=ERS325340_00003.t01;Parent=ERS325340_00003
+CVBR01000001	GenBank	CDS	2716	3507	.	+	1	ID=ERS325340_00003.p01;Parent=ERS325340_00003.t01;Dbxref=GI:804333228;Name=ERS325340_00003;codon_start=1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002041506.1;product=side tail fiber protein;protein_id=CQU31351.1;transl_table=11;translation=length.263
+CVBR01000001	GenBank	exon	2716	3507	.	+	1	Parent=ERS325340_00003.t01
+CVBR01000001	GenBank	gene	4176	6542	.	+	1	ID=ERS325340_00004;Name=sspH2;locus_tag=ERS325340_00004
+CVBR01000001	GenBank	mRNA	4176	6542	.	+	1	ID=ERS325340_00004.t01;Parent=ERS325340_00004
+CVBR01000001	GenBank	CDS	4176	6542	.	+	1	ID=ERS325340_00004.p01;Parent=ERS325340_00004.t01;Dbxref=GI:804333229;eC_number=6.3.2.-;Name=sspH2;codon_start=1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002244314.1;locus_tag=ERS325340_00004;product=secreted effector protein;protein_id=CQU31395.1;transl_table=11;translation=length.788
+CVBR01000001	GenBank	exon	4176	6542	.	+	1	Parent=ERS325340_00004.t01
+CVBR01000001	GenBank	gene	7163	7239	.	-	1	ID=ERS325340_00005;Name=ERS325340_00005
+CVBR01000001	GenBank	tRNA	7163	7239	.	-	1	ID=ERS325340_00005.r01;Parent=ERS325340_00005;Name=ERS325340_00005;inference=COORDINATES: profile:Aragorn:1.2.36;product=tRNA-Pro
+CVBR01000001	GenBank	exon	7163	7239	.	-	1	Parent=ERS325340_00005.r01;Name=ERS325340_00005
+##FASTA
+>CVBR01000001
+TAGAGCGCAGTATCCGGCTGGCGCAGGAGAAATAAGACCCTGACGGCCCGGTGGCGTGGC
+GCCACCGGGCCTGGCTTGTTACTGCATTCCGCGTGTTTGTAAAAATAGAATGGTCGCCGC
+GACGCGCGAGCGCACGTTTAGCTTGCGAAGTAGATTACGAATGTGCACCTTAACCGTCTG
+CTCAGAAATATTCAGTACTGACGCAATCTGTTTGTTAGAAAGCCCCTGCGCCAGCTCATG
+CAAGACATCTAACTCTCGTTCGGTCAGGATACTAAAGGGATCTTCCTGCGCGCCAAACCG
+TTCACGCTCACGCAGATATTCATTGACCCGATCGCTAAAGACTTTACCGCCGTTAGCGCC
+TTTGCGAATGGCCTCCAGCAGCACTTCCGGATCGCTATCTTTGAGCAGATAACCGTCGGC
+GCCGGCGTCGATCAGCGCGTAGATGTCGCTGGCGGAATCCGAGACGGTAAGAATAATAAT
+TTGCGCCGTCACACCATCCCGGCGTAGCGCGTTCAGGGTATCTAACCCGCTCAGACCTTT
+CATATTCAGATCCAGCAGGATCAGATCGGGCTCAATGCGATTCGCCAGATCGATCGCACT
+CGCCCCGTCACCCGCTTCGGCAACGACATAAAAAGCCGGGTCCAGTTCCAGTAATTGACG
+AATACCTCGCCGCATAAGTGGATGGTCGTCCACAATAAGCACCTGAAAAGGTGTTACTTC
+AGGCATGCTATATCTCCTGAGTTTTATTAGAATGATTATTGTTTTCAGCGCGGAAATCGT
+CCAGATTTGCCACATTGTCAAGCGCGTAAATTTTACCTTAATTAGTGCATGGGTAAGTAG
+TAAAACCTGTGACAGGCGAGTCATTAGCTGAACGATAGAAAGGATAAAACAACCTGTTGT
+GAGAGAAAACAGAATACTCCCTGAAGTTGAATACTGGTCTGGACGCTTTTTTCGGACAAT
+GTGACATAACAGAAGTGTGGCATATGTAGCTGCGCCCTTGCGTCCGTAAGTCCAGACTAT
+CTTTACTGGCAGGAAATTCCTTATTAAAAGTCATAGCGTCTAAAGATTTTCATTATTAAC
+AAATTATTTTAATTGAATTTATTTATGGTTTTATTAATGCCAATGAAATACATCTGAGCA
+GAACGAATAAACCGCGACATCTGTTAAATTAAATATTAAGTCAATAGCAGAGGTATTTTT
+ATGCGATAAAGAGATTCTGACGATATTCTTATCATAAATGAAAATGTGGTCAGGAATGAG
+CGTCATTCACTTATAACTTATCCTGGATGTGCCTGAACAACACTCGCGGCGTTTTGAGCA
+CCCGTTGGGGCGACTCAAGGGGAGAACGTAGTGTGGATGCTATATCAGCCGTTTCTGTGA
+GCGTAAGCGTGGCGTAGAAAATTTTAAATATGTTAGCCGGTTAAAAATAACTATTGACAT
+TGAAATGGTGGTGGAGTGTATATGAAGAGAATATTTATATATCTATTATTACCTTGTGCA
+TTCGCATGTTCTGCTAATGATAATGTTTTTTTGGCAAGGGCAACAAGCATCAGATCTCTT
+TTGCTGCGGGAGAAAGTATAAGAAGAGGAGGGGTTGAGCACTTATATACGGCTTTTCTGA
+CATACAGTGAACCCAGCGATTTTTTCTTTTTACAGGCAAGAAATAATCTGGAGTTAGGAG
+GATTTAAGGCTAAGGGTAGCGATGATTGCAGTAAACATTCTGGCAGCGTTCCCTGTAATA
+AATATAACCAGGGCGTATTGGGTATCTCGAAGGATGTGGCGCTGGTTCATTTCGCTGGTA
+TCTATACCGGTATTGGTCTGGGGGCTTATATAAAATCTAAGTCGCGAGATGATATGCGTG
+TCAATTCTGCATTTACCTTTGGAGAAAAAGCGTTTCTTGGCTGGAACTTTGGGGCTTTTT
+CTACAGAAGCTTATATCCGGCATTTCTCGAATGGATCACTTACGGATAAAAATTCAGGGC
+ATAATTTTGTAGGTGCTTCAATTAGTTATAATTTCTGAAGTTGAATAACAATTAGCGAGT
+TGCTGGCTGAAGGAGTAATTAATCACCGTACGTTGTCTACAAAACGTGTTGTGAATAGCC
+GATCGTTATCGACCCTATGGCGACAGGGAGGCTAATTGTTAGCGACAAAGGCGTGGTAGG
+CCGTTTACCTTACCAGAACGTTTTATTATTGCTGCGACAGATTGCTTACTCATTTACCTG
+CCACTGCAAACATTTTTTCCAGTATAGGGTGTAGATGCGCTTTTTCTGTTTTAGTACAAA
+CATGGTTTACGTTAGTTGCCTGCATTGACTTTACTGTGACCACTGCACAGGAAAGATGTC
+CAATCAGTTCATGCAAAATCAGTTCATGCAAAATAACGTCTCTGGCATCAGCTAACTTTT
+TAATTGTAGGCATAAAAAAACCAGCCCTGACAGGCTGGTTTTCAAGGGGAATTTTGGTCG
+GCAGCTTCATCCGCAGCATCTGCGGAGAGCAGCGCAGGTACGGCGACCACAAAAGCCGGG
+GAGGCATCAGCCAGCGCGGCGTCGGCTGACACAGCCAGAACGGCGGCAGCCGCCTCGTCA
+TCAGCATCGTAGGCATCCACCCACGCCGCCGCATCTGATACCAGCGCATCACTGGCAGCG
+CAAAGCAGAGCTGCAGCCGGAGAATCGGCAACCAGAGCCGAAGAGGCCGCGAAACGGGCC
+GAGGATATCGCAGACGTGATTTCCCTGGAAGATGCAAGCCTGACGAAAAAAGGTATCGTT
+AAATTAAGCAGTGCCACGGACAGTGACAGCGAAGCGCTGGCGGCCACACCAAAGGCGGTA
+AAAACCGTTATGGGTGAGGTACGGACCAAAGCGCCGCTGGACAGCCCGGCATTCACTGGA
+ACGCCGACCACACCGACGCCGCCAGGCGATGCTAAAGGGCTTCAGACAACAAACGCGGAG
+TTTGTCCGCAAACTGATTGCCGCGCTGGTTGGTTCCGTACTGGAGCCACTGGACACCCTG
+CAGGAACTGGCTGACGCGTTGGGAAATGATCCGAACTTTGCCACCACGGTACTGAATAAA
+CTGGCGGGCAAGCAGCCGCTGGACGAAACCCTGACGGCGCTGTCAGGAAAAAGCGTTGAC
+GGTCTTATCGAATACGTTGGTTTGCGAGAAACCATAAGTCGTGCCGCCGATGCATTACAA
+AAATCACAGAATGGCGGCGATATTCCGGACAAGGATTTGTTTGTGCGTCGTATCGGTGCC
+GCGCGAGCGTTTGATGGCGCAGTTATTATCGGCTGTGATGATAATCCGTGGACGACGGCG
+GAGTTTATCGTCTGGCTGGAGTCTCAGGGCGCATTCAATCACCCTTACTGGATGTGTCGT
+GGCTCCTGGTCTTACGCTTATAACAAAATCATCACGGATACTGGCTGCGGTAATATCTGT
+CTCGCTGGCGCAGTGATTGAGGTAATGGGAGTGCGTGGCGCGATGACTATTCGGGTGACA
+ACGTCCCATTCAGTATCTGGTTGGTGATACGTGGGTGACAGCCCCAAGTGTATAAGAAGG
+AATAATTATGACAGCGGAAAAAAAATAAAAAGAACAAACAGTTTTTAAATATTAAAAATT
+TCATTCCGTATGCACCGGAACCAGATGACACATTATTCGCCGATGCGGCGTATCTTAAAT
+CAGAGGATGGTCAGGACTGGTATGGGTGCCAGCAATTATTTTCAGCAGACACGCTGAAAA
+TTACCTACGACGATAACGATGTTATTACGTGTATTACGCGTGATGTTTCCGGGCTGTGGC
+CTGCTGGCCAGAGCGTTGCAGAGTTGCCTGATACGGATGAAAACCGTCGCGCTGATATTC
+ATGCTGCTGGCAGTTTAAAGACGGTAAAGTCGTTCAAAGGGTTTATTCGCCGGAAGAGCT
+GCGCAGGCAGGCAGAATCGAAAATTGAACGCCCGGGCGTTGATACCGGATGATCTGGTCA
+TCGTGGAAAGCGACCCTGAAAAAATCGACACTTTAGCTGTAAAATGACAGTCCCGCCATC
+CGGTCATCATAACGGATTTTTCTTCTGCACCTTCTGAAGCCCGCCATGTCAGGACGACCA
+TGAATCCGCCGATAACCTTATTGTGAAATTAAGACCAGGAAGAGATGATGTCTGCCGGAC
+AGATACTATATGTAAATTTATAAAGGTTTTTTGTTATGCCCTTTCATATTGGAAGCGGAT
+GTCTTCCCGCCACCATCAGTAATCGCCGCATTTATCGTATTGCCTGGTCTGATACCCCCC
+CTGAAATGAGTTCCTGGGAAAAAATGAAGGAATTTTTTTGCTCAACGCACCAGACTGAAG
+CGCTGGAGTGCATCTGGACGATTTGTCACCCGCCGGCCGGAACGACGCGGGAGGATGTGA
+TCAACAGATTTGAACTGCTCAGGACGCTCGCGTATGCCGGATGGGAGGAAAGCATTCATT
+CCGGCCAGCACGGGGAAAATTACTTCTGTATTCTGGATGAAGACAGTCAGGAGATATTGT
+CAGTCACCCTTGATGATGCCGGGAACTATACCGTAAATTGCCAGGGGTACAGTGAAACAC
+ATCGCCTCACCCTGGACACAGCACAGGGTGAGGAGGGCACAGGACACGCGGAAGGGGCAT
+CCGGGACATTCAGGACATCCTTCCTCCCTGCCACAACGGCTCCACAGACGCCAGCAGAGT
+ATGATGCTGTCTGGTCAGCGTGGAGAAGGGCTGCACCCGCAGAAGAGTCACGCGGCCGTG
+CAGCAGCGGTACAGAAAATGCGTGCCTGCCTGAATAATGGCAATGCAGTGCTTAACGTGG
+GAGAATCAGGTCTTACCACCTTGCCAGACTGTTTACCCGCGCATATTACCACACTGGTTA
+TTCCTGATAATAATCTGACCAGCCTGCCGGCGCTGCCGCCAGAACTGCGGACGCTGGAGG
+TCTCTGGTAACCAGCTGACTAGCCTGCCGGTGCTGCCGCCAGGACTACTGGAACTGTCGA
+TCTTTAGTAACCCGCTGACCCACCTGCCGGCGCTGCCGTCAGGACTATGTAAGCTGTGGA
+TCTTTGGTAATCAACTGACCAGCCTGCCGGTGTTGCCGCCAGGGCTACAGGAGCTGTCGG
+TATCTGATAACCAACTGGCCAGCCTGCCGGCGCTGCCGTCAGAATTATGTAAGCTGTGGG
+CCTATAATAACCAGCTGACCAGCCTGCCGACGTTGCCGTCAGGGCTACAGGAGCTGTCGG
+TATCTGATAACCAACTGGCCAGCTTGCCGACGCTGCCGTCAGAATTATATAAGCTGTGGG
+CCTATAATAATCGGCTGACCAGCCTGCCGGCGTTGCCGTCAGGACTGAAGGAGCTGATTG
+TATCTGGTAACCGGCTGACCAGTCTGCCGGTGCTGCCGTCAGAACTGAAGGAGCTGATGG
+TATCTGGTAACCGGCTGACCAGCCTGCCGATGCTGCCGTCAGGACTACTGTCGCTGTCGG
+TCTATCGTAACCAGCTGACCCGCCTGCCGGAAAGTCTCATTCATCTGTCTTCAGAGACAA
+CCGTAAATCTGGAAGGGAACCCACTGTCTGAACGTACTTTGCAGGCGCTGCGGGAGATCA
+CCAGCGCGCCTGGCTATTCAGGCCCCATAATACGATTCGATATGGCGGGAGCCTCCGCCC
+CCCGGGAAACTCGGGCACTGCACCTGGCGGCCGCTGACTGGCTGGTGCCTGCCCGGGAGG
+GGGAACCGGCTCCTGCAGACAGATGGCATATGTTCGGACAGGAAGATAACGCCGACGCCT
+TCAGCCTCTTCCTGGACAGACTGAGTGAGACGGAAAACTTCATAAAGGACGCGGGGTTTA
+AGGCACAGATATCGTCCTGGCTGGCACAACTGGCTGAAGATGAGGCGTTGAGAGCAAACA
+CCTTTGCTATGGCAACAGAGGCAACCTCAAGCTGCGAGGACCGGGTCACATTTTTTTTGC
+ACCAGATGAAGAACGTACAGCTGGTACATAATGCAGAAAAAGGGCAATACGATAACGATC
+TCGCGGCGCTGGTTGCCACGGGGCGTGAGATGTTCCGTCTGGGAAAACTGGAACAGATTG
+CCCGGGAAAAGGTCAGAACGCTGGCTCTCGTTGATGAAATTGAGGTCTGGCTGGCGTATC
+AGAATAAGCTGAAGAAATCACTCGGGCTGACCAGCGTGACGTCAGAAATGCGTTTCTTTG
+ACGTATCCGGCGTGACGGTTACAGACCTTCAGGACGCGGAGCTTCAGGTGAAAGCCGCTG
+AAAAAAGCGAGTTCAGGGAGTGGATACTGCAGTGGGGGCCGTTACACAGAGTGCTGGAGC
+GCAAAGCGCCGAAACGCGTTAACGCGCTTCGTGAAAAGCAAATATCGGATTATGAGGAAA
+CGTACCGGATGCTGTCTGACACAGAGCTGAGACCGTCTGGGCTGGTCGGTAATACCGATG
+CAGAGCGCACTATCGGAGCAAGAGCGATGGAGAGCGCGAAAAAGACATTTTTGGATGGCC
+TGCGACCTCTTGTGGAGGAGATGCTGGGGAGCTATCTGAACGTTCAGTGGCGTCGTAACT
+GATGCACCAGGTGAATGAGGTGCGGTGCGACAAAGATATTCCCGGACGAACAACATCAGA
+CAGTACGGATGATGTACAGGTGAAATAGGGGAGACTTCTTCAGTCAGGGCGCGGCGCAAC
+TTTTTCGATGATAACGCGCCGCGCGCCGGTAGCGAGAAGCCGATGGAAGTACTGGATCAC
+CTGAATGCAAAGAAGGGCAGGGGAAAGCTGTACTTTGCCGGGGAGGGTATCCAGCAACAG
+TGGGCTATGAAGAGAAACATACTGTCATCCCGATATACCACCCGCTATGAAGACCTGCTT
+CAGGTTAAGTGACAGGTTTACCCTGATTTTCAAATTTCTGATGTGATGGTGTGCCGCAGC
+CACATTGTATGCAAGAACGGGCTGCGGCAAACTGGCGATCGTTCGATAGTGCGTGTATTG
+AATAGTTACCAGTCGTGGCGGATTCTACTGGTTAAGGATGACTAATCAATGTATTTAAGT
+CATCTGTTCAACTTTTCGCGTTGGAGGGAGCTTGAAGTCAATTTGCAGTGACCTTCGATA
+GCTATTTCACCATCATTTCACCATTTGCAAATTGGATATATAAAAAAACCAGCCCTGGCA
+GGCTGGCTTTTAAGGGGAATTTTGGTCGGCACGAGAGGATTTGAACCTCCGACCCCCGAC
+ACCCCATGACGGTGCGCTACCAGGCTGCGCTACGTGCCGACTTGTGGGTGCTAATACTAC
+CTCTTTCCAACGCGAATGCAAGGGGAAACGGTGCTAACTGATTTATAATTAATCAGTTAG
+CGATAAAACGTTTCTCTTCTGTTAGCACTTGCAGCAGCAGACTGAGCTGCGGCTTCTGAT
+CTTTGATCTTCTCGCCGTGTAAGTCATAGGTCTGATAATGACCGTTATTATTGAGCACCA
+GGGTCATTTGCGGCGTCGTGATTGCCAGCGTACTGCCGTCTGCCGCCGTTACCCAGTTAT
+GACGACGTGGAACGGTAAAGATGTCCTGGCCCTGTGAATATTCGTTCGCTGGCGTGCTGA
+CATGTAACAGGCGTTGCATCAACGTGGTCATCACATCGGTATGATCGGTAAGCACATTAA
+TACGCTGCGCAGGCGTCCCCGGCCAGTGGATCACCAGCGGTACTTGCAGATGACCTTGCG
+ACCAGTCGAAGCGATTTTCTTCCGGCGTCAACGGTATGCCGCGTCCTGCGGTAATGATCA
+CGACAGTATTGTCGAATTTACCGGCCTCTCGCAGCGCGTTCAGAACCCGATTGATTTGCG
+CATCCACGTCGCTGGCGGCACTGGCGTAACGCTTAACAAAATTTTTCTGGTTACTGTCAT
+CAATGTTAGTACCGTTAAACGATATCCATGAGAACCAGCGGTTATCTTCCTGTGCGTAGC
+GCCCAAGCCAGTCTATCCACTGGCTGGCCGTTTGCGCATCAGACTGCGTTTGCGCTGCCG
+GCATCGAGAAATCGGACAGTAATGCCTGACGATAAAGCGGGCTGGCGAAGCCATCCGAAG
+AAAACAGTCCCAGTTGGTAGCCTTGCTGATTCAACGCGGTAATGAGCGCCGCAGGCGTTC
+TGGTGGATAACACGCCATCCATATAGCCTGGCGATATGCCATAGAACAGGCCGAAAATAC
+CGTTATCAGTGGTATTCCCTGAACTCATATGACGGGTGAAGTCGATGTTTTGCTCGGCAA
+ATGTAGCCAGCTCCGGCATCTGCTTCTCAAAGCGAGAATAGTTCAGGCCGTCTACGGTAA
+TGAGCAGCACATTCTGACCGGTGCCCATATCGCGATAGTGCAAATTGCTGAGCGGGTACT
+GAACGGAGACCGCTTCCGGATTGCCTTGCTCTACCAGACGGCGCTGATATTCCTGCGCAT
+CCAGCAGACCGTGTTTTTCAAGAAAACGTCGCGCCGTCATCGGATAAGAGAGCGGCAGGT
+TTGCCCGCTGCATGGTAATCGGCCGGTAAAAATTAGCGTCTGCCCAGATGTAGATAAGAT
+GCGATGCGATAAAGGAGACGAAAAAGAATGCCGCAAGCGGCCTTGCGAAATGGCGGCGGC
+GCGTGAGACTGCGCAGCTTTTGCCAACTCCATGTCGCAAATAGCATCTCAATCAATAAGA
+TAACTGGCACGCTAATAAACATAAGCTGCCAGTCGCGCGCCATTTCGTTCTGGTCAGGGT
+TGATGACCAGTTCCCAGACAATGGGGTTAAGATGCAGGTGAAAACGGGTAAAGACTTCGC
+TGTCGATAAGCAACAACGTCATACCCGCGGTCGCCAGAATGGCTGATAAAAACCGCATCA
+GCCGCTGGGACATCACGATAAACGTGAGCGGAAAAAGAATAAGCAAATAGGTGGCGAACA
+CCAGAAAGCTAAAGTGCCCGACAATGCTCAGGTAGGAGTATATGCGACCCGCAAGCGTTG
+TCGGCCAGTCGGCGACAAACAGGTAACGGCTGCCGAGCAACGTAGCCAACAGTATATTGA
+ACAGAGCGAACCAGTGCCCCCAGCTAACCATCTGGGAGACTTTTTCACGGTAGCGCTGAC
+GATGAGTTACCATACCTGTCGCTTGTTTCTCCGGAGTCGGCTTTTTAGTGCGCGTTGTCT
+TCGCTAATCGAAGACTGCAAGGCGCGGGCAAAAGAGTTCGCAATCGCCTGGCGTTGAGCC
+GGCGCCACGCTGGTGTTGATAAGGTTGGTGACCATATTTCCCAGTACCATCAGGGAAAGA
+TCGGTCGGCGCCTTATGTTTTTCCAGTACACTGAGCAGTTCGCTCAGCAGTTGTTCAACA
+TGTTCATCACTATAGCGGGAGAGTTGTGGCATAAATCAAAATCTGTTTGTTCATGAAAGG
+GCAACATATTACCGTAGCAACAGTTTTTTTTCTGCATTTTTATCCCCTAAATCATTCGCG
+TTACGGATAACGCCTGAAATGACAGGGCGATTGCTTGCGTCTTCCGGCAGGCGGTGGTTG
+AATACCGCCCGGTCTTAAAGGAGAGTTTATCATGAGTCTGGATATCAACCAGATTGCCCT
+GCACCAGCTTATCAAGCGCGATGAGCAAAATCTTGAGCTGGTCTTGCGCGATTCATTGCT
+GGAGCCGACAACCACCGTTGTCGAGATGGTGGCTGAACTGCATCGGGTCTATAGCGCCAA
+GAATAAGGCGTATGGCCTGTTTAATGAAGAGAGTGAACTGGCGCAAGCGCTGCGGTTGCA
+ACGTCAGGGGGAAGAAGATTTTCTTGCCTTTAGCCGGGCGGCGACCGGACGCCTGCGTGA
+CGAACTGGCGAAATATCCCTTTGCGGACGGCGGCATTGTATTGTTCTGCCATTATCGTTA
+CCTGGCGGTGGAGTATCTGCTGGTTACGGTACTGAACAACCTGAGCAGTATGCGGGTCAA
+TGAAAATCTGGACATTAACCCGACGCATTATCTTGATATCAACCATGCGGATATCGTGGC
+GCGTATCGATCTTACCGAGTGGGAAACTAATCCGCAATCGACCCGCTACCTGACGTTCCT
+GAAAGGTCGGGTAGGGCGCAAGGTCGCTGACTTCTTTATGGATTTCCTCGGCGCCAGCGA
+AGGGTTGAACGCCAAAGCGCAGAATCGCGGCCTGTTGCAGGCAGTGGATGATTTCACCGC
+AGAAGCGCAGTTGGATAAAGCAGAACGTCAGAACGTGCGTCAGCAGGTGTACAGCTACTG
+CAACGAGCAGTTACAAGCCGGGGAAGAGATTGAGCTGGAATCGCTGTCTAAAGAGCTTTC
+CGGCGTCAGTGAAGTCAGCTTCAGCGAATTTACCGCCGAAAAAGGCTATGAGCTGGAAGA
+GAGCTTCCCGGCAGATCGCAGTACGCTACGCCAGTTAACCAAATATGCCGGCAGCGGCGG
+CGGGTTAACGATTAACTTTGATGCGATGCTACTGGGCGAGCGGATTTTCTGGGACCCGGC
+GACCGATACCCTGACTATCAAAGGGACGCCGCCGAATTTGCGCGATCAGTTGCAGCGACG
+CACGTCGGGCGGGAAATAAAGGCATACGACATAAAAGGACGCCGTAGTGAGTAAAGGAAT
+ACGTCAGAGGCCGCCTGTTAATTTCAGTTTATTTCGGCAGGTTTTGTATGCGCACATTGT
+CGCATTTTTGATGATGTTGATGCTCGGCATGGTGTTCACCGTTCTCTCGTTGGTACTGTT
+TTATACTTATGGCGCCAACTGGTTACTGAGCTTGTTCATATGCCCGCTGTTCTTGTTAAG
+TGGTTTATTCATTACTGGATTTGCTTTTAAATCAACCTGGTCCAGCATACGTTACTATTA
+CGACAAGGGGCAGTTGAAACGGTATGGACTCAATCTTGATGCAACTTTGACGCATAAAGA
+GAAGGTGGAAATACGCATCGATAATGCAAAGCGTCAAGTGCGTGTAGATGAACTGGAACT
+CCATGTCTTATTTGATTTTCAGTTTGATAGCAAGACATGGAGCTGCGGCGACTTACTGAC
+CAATGAAAAGGTATTTGATGCGTTGAATGATGGGCAAACGATACCAATACGAATTTTGCC
+ATGGAAACCGGAAAGTGCCAGCGTTCGTCAACGGGCATTATTCAATCGGCTTAAAGGTAT
+GAATACCGCGGCGGAAACTACAGATCCTCGATTAGGGGAGGCGCTAATTGAATGTGGCGA
+GGTGTAAGAAAGCAGAAAAGCAAAGTGGGTTCTCGTTGCTCTGCATGTCGTCAAATTCAA
+TTAAACGCATAAAAAAACCCCGCCGGGGCGGGGTTTTTCTTCAACTTCCAGGCGATTACG
+CGCGAACGAAGTCGATGTGAGTCAGCTTCGGTTTGTAAGCGTGACGCTGTACAGCCTGAG
+CTTTAACTTTTACTTCTTTACCGTCAACAACGAGGGTCAGAACTTCGCTGTAGAATTCAG
+CTTTAGCTTGCATGTTCATCACCTGGTCGTGGTCCAGTTCGATAGCAATCGGGGCTTCAG
+AACCGCCGTAGATGATTGCCGGGAACTTGTTAGCGGCGCGCAGGCGGCGGCTCGCACCCT
+TACCCTGCTCTTTACGTACTTCTGCGTTGATAGTAAACATTTAAATCTCTCTTTAATAAT
+TCCTGCTACAGGCGACCCAGCAACAGGTAAGTGATCTGCTTTGCGTATGCAAAGGCGGGC
+GGGATTCTATACTCAAATCGCCGTTACATCAATGAAAAGTACAATTAACCACGTAATTCG
+TGCGCCCGGCGGAAGCGGCCTTCATAGTCGAACACTTTTTCACGCACCTGCCAGTACTGG
+CCTTTCATCCGCGCGACCACAAAATCGGGATGCCGCAACAGCGCCTGCTGCGCGACAATA
+TCCGCCGCCGTGATCCAGCGTAAAGGAACGCCGGGCGTGCGCGTATGCGGGCGAATAAAT
+AGCTGTTCGAAAGCGGTACGCTGGGCGGGCGTGTGCAAGCGGAAGCGCTCACTGACATCC
+GCGCCGTCCTCGTCATAGTAAGTGATTTTCAGCCATTCGCCTTTCTCATCCTGCCCATGC
+TGCATCGTCATTCCGCTACAGCGCAGGACTAACGCATCCTTGAGCCTGAGCGCCGCTTTT
+AACATATCGTCCGGGTCGACCAGAATGGCGTCACATTCCCGGCAGCGTCGGGCGGCAATA
+TCGTTTTCGGCATTACACTGCGGGCAGTTTTTGAAGCGAAAGCGAAAATCGCACTGCTCG
+CGATGGCCGTCGTCATCCTCAAACCAGCCCTGGCAGCGACGGCCAAAGTGTTCAATCAGC
+GTGCCGTCGGCAGTGGTTTTCCCCCAGAAGGTGTTGGCAAAGCCGCAGGCCGGGCAAAAT
+ACCTGGACAGGGACGTTATCGCTTTTTCCCTTCGGGCTACCGACCTCCGGGGCATACAGG
+TCGTGCGGGTTGCCTGCGTAATCAAGAATCAGGCAATCGGTCTTTCCCGGCGCAAGGCGC
+AGACCACGCCCGACAATTTGTTGGTAAAGACTAACTGACTCCGTGGGACGTAGAATCGCG
+ATGAGATCAACGTGTGGGGCGTCAAAGCCGGTGGTCAGTACCGAGACGTTAACCAGATAG
+CGAAAACGCTGCGCCTTGAAATTATCAATCAGCGCGTCGCGCTCGGGCCCTGGCGTATCG
+CCGGTAATCAGCGCCGCGTCGTCCGCCGGAAGCAGACCGACAATCTCTTTCGCATGTTCG
+ACCGTGGCGGCGAAAATCATCACGCCTTTGCGCGTTTGCGCAAATTCCATAATCTGGCTG
+ATGATGTGCGGCGTAATCCGCTGCTGCTTTTTCAGCTCGCGGTTCAGGTCGGCTTCGCTG
+AACAGCCCATTGCTTTGGGCCTGCAGGCGGCTGAAATCGTATTGGACCACTGGCATATCA
+AGCCGCTCAGGCGGCGTCAGATAGCCGTGTTTAATCATATAGCGCAGCGGCAGTTCATAA
+ATACAGTCGCGAAACAGAGCGTTGTCGTTGCCGCGCACCATACCGTGATAATGAAATTGA
+TAAATCCAGCCTTTTCCGAGGCGAAAAGGCGTGGCGGTGAGTCCAAGCAGACGTAAGTGA
+GGATTAACTTTACTCAGGTGAGTGAGGATTTGCTGATACTGACTGTCTTCATCGTCACCG
+ATGCGGTGGCATTCATCGACAATCAACAGCGAAAACTCCTCCTGGAAGGCGTCAAGATTA
+CGCGCCACCGACTGTACGCTGCCGAACACGACTTTGCCCTGACTCTCTTTACGTTTGAGT
+CCGGCGGCGAAAATATCCGCTTCCAGCCCCAGCGCGCAATATTTGGCGTGGTTCTGCGCG
+ACCAGCTCTTTCACATGCGCCAGCACCAGTACCCGTCCGCGGGCGACGCGCGCCAGTTCG
+GCGATCACCAGGCTTTTACCTGCGCCGGTCGGCAGAACAATCACGGCGGGCGTACGGTGG
+CGGCGAAAGTGGCTGAGCGTGGCGTCTACGGCTTCTTGCTGGTAGGGGCGGAGTGTAAAA
+ATCATGGTCTCACTACGTTAAACGGTTCCGGGAATAGTATGCCATGAATCATTTCCCTTG
+AGGGATATAGTTAGCCCGCTATACTGAGCGGATAGCAATTCCCTTTTTTCGGGTAGAATG
+CCCGATTTCCGTATTATTACAGGCTAAATCACACACATGCGACTTGATAAATTTATCGCT
+CAGCAGCTTGGCGTCAGCCGCGCTATTGCCGGGCGTGAAATTCGTGGTAACCGCGTTACC
+GTCGATGGCGACATCATTAAAAATGCGGCCTTCAAACTGCTCCCGGAACATGCGGTTGCG
+TATGACGGCAATCCCTTAGCGCAGCAACACGGGCCACGCTATTTTATGCTTAACAAGCCG
+CAGGGATACGTTTGTTCAACCGATGATCCCGATCATCCAACGGTGCTGTATTTCCTGGAT
+GAGCCGGTGGCGTATAAGCTGCATGCCGCAGGACGTCTGGATATCGATACTACCGGTCTG
+GTGTTAATGACAGATGACGGTCAGTGGTCGCACCGCATTACGTCGCCGCGCCATCACTGT
+GAAAAAACCTATCTGGTGACCCTGGAGTCGCCGGTGGCCGACGATACGGCAGCGCAATTT
+GCGAAAGGCGTGCAGTTGCATAATGAAAAAGATCTCACTAAACCCGCTACGCTGGAGGTG
+ATAACACCTGTGCAGGTCCGTCTGACCATCAGCGAAGGCCGTTATCATCAGGTGAAGCGG
+ATGTTTGCCGCAGTAGGCAATCGCGTTGTGGAACTGCACCGCGAACGGATTGGCGCCATT
+ACGCTGGATGAGAATCTGGCTCCCGGCGAGTACCGCCCGTTGACTGAAGAAGAAATCGCC
+AGCGTCGGCTAACTATCTCGTTAAATTCAGGAGTTCGATGTGACCACCCGGCAGCACTCT
+TCCTTTGCCATTGTCTTTATTCTTGGCCTGTTGGCCATGTTAATGCCGCTGTCGATTGAT
+ATGTATCTTCCAGCGCTGCCGGTGATTTCTGCGCAATTCGGCGTGCCTGCCGGTAGCGCG
+CAGATGACGCTCAGCACCTATATTCTGGGGTTTGCGCTGGGTCAGCTTATCTATGGACCG
+ATGGCGGATAGCCTCGGGCGTAAGCCGGTCATCCTGGGCGGGACGCTGGTATTTGCCGCT
+GCGGCGGTCGCCTGTGCGTTGGCGCAGACTATCGATCAACTGATCGTGATGCGTTTCTTT
+CACGGTTTGGTGGCTGCGGCGGCAAGCGTCGTCATCAATGCGCTGATGCGGGATATTTAT
+CCAAAGGAAGAGTTTTCGCGCATGATGTCATTTGTCATGCTGGTCACTACGATAGCGCCG
+TTAATGGCGCCCATTGTTGGCGGCTGGGTGTTGGTATGGTTAAGCTGGCACTATATCTTC
+TGGATACTGGCTATTGCGGCGATTCTGGCGTCAGTCATGATCTTTGCTTTGATTAAAGAG
+ACGCTGCCCGTTGAGCGGCGTCAGCCTTTTCATATTCGTACCACAATAGGTAACTTTGCC
+GCGTTGTTTCGCCACAAACGCGTACTGAGCTATATGCTGGCGAGCGGGTTCAGTTTTGCC
+GGTATGTTCTCTTTTTTGAGCGCGGGGCCGTTTGTCTATATCGAAATTAATCATGTTCCG
+CCGCAGGATTTCGGCTACTACTTCGCGTTGAACATCGTATTTCTGTTTGTGATGACGATT
+ATCAACAGCCGTTTTGTCAGACGGGTAGGGGCGCTAAACATGTTTCGGGCCGGGCTATGG
+ATTCAGTTTGCGATGGCGGTGTGGATGGTTTTCAGCGCGCTGATGGGCATTGGATTCTGG
+GCGCTGGTGGTTGGCGTTGCGGCGTTTGTCGGCTGTGTGTCGATGGTTTCGTCCAATGCG
+ATGGCGGTCATTTTGGATGAGTTTCCGCATATGGCCGGAACGGCGTCTTCGTTGGCGGGC
+ACTTTCCGCTTTGGTATTGGCGCTATCGTCGGCGCGTTGCTGTCGCTGGCTACCTTTAAC
+AGTGCGTGGCCGATGATCTGGTCGATTGCGCTTTGCGCCGCCTGTTCCATTCTGTTTTAT
diff --git a/t/data/genbank_gbff/genbank2.gff.proteome.faa.expected b/t/data/genbank_gbff/genbank2.gff.proteome.faa.expected
new file mode 100644
index 0000000..7805e26
--- /dev/null
+++ b/t/data/genbank_gbff/genbank2.gff.proteome.faa.expected
@@ -0,0 +1,26 @@
+>ERS325340_00001.p01
+MPEVTPFQVLIVDDHPLMRRGIRQLLELDPAFYVVAEAGDGASAIDLANRIEPDLILLDL
+NMKGLSGLDTLNALRRDGVTAQIIILTVSDSASDIYALIDAGADGYLLKDSDPEVLLEAI
+RKGANGGKVFSDRVNEYLRERERFGAQEDPFSILTERELDVLHELAQGLSNKQIASVLNI
+SEQTVKVHIRNLLRKLNVRSRVAATILFLQTRGMQ*
+>ERS325340_00003.p01
+VISLEDASLTKKGIVKLSSATDSDSEALAATPKAVKTVMGEVRTKAPLDSPAFTGTPTTP
+TPPGDAKGLQTTNAEFVRKLIAALVGSVLEPLDTLQELADALGNDPNFATTVLNKLAGKQ
+PLDETLTALSGKSVDGLIEYVGLRETISRAADALQKSQNGGDIPDKDLFVRRIGAARAFD
+GAVIIGCDDNPWTTAEFIVWLESQGAFNHPYWMCRGSWSYAYNKIITDTGCGNICLAGAV
+IEVMGVRGAMTIRVTTSHSVSGW*
+>ERS325340_00004.p01
+MPFHIGSGCLPATISNRRIYRIAWSDTPPEMSSWEKMKEFFCSTHQTEALECIWTICHPP
+AGTTREDVINRFELLRTLAYAGWEESIHSGQHGENYFCILDEDSQEILSVTLDDAGNYTV
+NCQGYSETHRLTLDTAQGEEGTGHAEGASGTFRTSFLPATTAPQTPAEYDAVWSAWRRAA
+PAEESRGRAAAVQKMRACLNNGNAVLNVGESGLTTLPDCLPAHITTLVIPDNNLTSLPAL
+PPELRTLEVSGNQLTSLPVLPPGLLELSIFSNPLTHLPALPSGLCKLWIFGNQLTSLPVL
+PPGLQELSVSDNQLASLPALPSELCKLWAYNNQLTSLPTLPSGLQELSVSDNQLASLPTL
+PSELYKLWAYNNRLTSLPALPSGLKELIVSGNRLTSLPVLPSELKELMVSGNRLTSLPML
+PSGLLSLSVYRNQLTRLPESLIHLSSETTVNLEGNPLSERTLQALREITSAPGYSGPIIR
+FDMAGASAPRETRALHLAAADWLVPAREGEPAPADRWHMFGQEDNADAFSLFLDRLSETE
+NFIKDAGFKAQISSWLAQLAEDEALRANTFAMATEATSSCEDRVTFFLHQMKNVQLVHNA
+EKGQYDNDLAALVATGREMFRLGKLEQIAREKVRTLALVDEIEVWLAYQNKLKKSLGLTS
+VTSEMRFFDVSGVTVTDLQDAELQVKAAEKSEFREWILQWGPLHRVLERKAPKRVNALRE
+KQISDYEETYRMLSDTELRPSGLVGNTDAERTIGARAMESAKKTFLDGLRPLVEEMLGSY
+LNVQWRRN*
diff --git a/t/data/genbank_gbff/genbank3.gff b/t/data/genbank_gbff/genbank3.gff
new file mode 100644
index 0000000..a3ec219
--- /dev/null
+++ b/t/data/genbank_gbff/genbank3.gff
@@ -0,0 +1,282 @@
+##gff-version 3
+##sequence-region CVBT01000001 1 489157
+# conversion-by bp_genbank2gff3.pl
+# organism Salmonella enterica subsp. enterica serovar Typhi
+# Note Salmonella enterica subsp. enterica serovar Typhi genome assembly 10426_1#50, scaffold ERS325326SCcontig000001, whole genome shotgun sequence.
+# date 02-APR-2015
+CVBT01000001	GenBank	region	1	489157	.	+	1	ID=CVBT01000001;Dbxref=BioProject:PRJEB3215,taxon:90370;Name=CVBT01000001;Note=Salmonella enterica subsp. enterica serovar Typhi genome assembly 10426_1#50%2C scaffold ERS325326SCcontig000001%2C whole genome shotgun sequence.;collection_date=1997;country=Viet Nam;date=02-APR-2015;isolation_source=Not known;mol_type=genomic DNA;organism=Salmonella enterica subsp. enterica serovar Typhi;serovar=H58;strain=ipt76
+CVBT01000001	GenBank	gene	83	730	.	-	1	ID=ERS325326_00001;Name=narL_1;locus_tag=ERS325326_00001
+CVBT01000001	GenBank	mRNA	83	730	.	-	1	ID=ERS325326_00001.t01;Parent=ERS325326_00001
+CVBT01000001	GenBank	CDS	83	730	.	-	1	ID=ERS325326_00001.p01;Parent=ERS325326_00001.t01;Dbxref=GI:804342840;Name=narL_1;codon_start=1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002147204.1;locus_tag=ERS325326_00001;product=transcriptional regulator NarP;protein_id=CQU32483.1;transl_table=11;translation=length.215
+CVBT01000001	GenBank	exon	83	730	.	-	1	Parent=ERS325326_00001.t01
+CVBT01000001	GenBank	gene	2720	3511	.	+	1	ID=ERS325326_00003;Name=ERS325326_00003
+CVBT01000001	GenBank	mRNA	2720	3511	.	+	1	ID=ERS325326_00003.t01;Parent=ERS325326_00003
+CVBT01000001	GenBank	CDS	2720	3511	.	+	1	ID=ERS325326_00003.p01;Parent=ERS325326_00003.t01;Dbxref=GI:804342841;Name=ERS325326_00003;codon_start=1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002041506.1;product=side tail fiber protein;protein_id=CQU32522.1;transl_table=11;translation=length.263
+CVBT01000001	GenBank	exon	2720	3511	.	+	1	Parent=ERS325326_00003.t01
+CVBT01000001	GenBank	gene	4180	6546	.	+	1	ID=ERS325326_00004;Name=sspH2;locus_tag=ERS325326_00004
+CVBT01000001	GenBank	mRNA	4180	6546	.	+	1	ID=ERS325326_00004.t01;Parent=ERS325326_00004
+CVBT01000001	GenBank	CDS	4180	6546	.	+	1	ID=ERS325326_00004.p01;Parent=ERS325326_00004.t01;Dbxref=GI:804342842;eC_number=6.3.2.-;Name=sspH2;codon_start=1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002244314.1;locus_tag=ERS325326_00004;product=secreted effector protein;protein_id=CQU32561.1;transl_table=11;translation=length.788
+CVBT01000001	GenBank	exon	4180	6546	.	+	1	Parent=ERS325326_00004.t01
+CVBT01000001	GenBank	gene	7167	7243	.	-	1	ID=ERS325326_00005;Name=ERS325326_00005
+CVBT01000001	GenBank	tRNA	7167	7243	.	-	1	ID=ERS325326_00005.r01;Parent=ERS325326_00005;Name=ERS325326_00005;inference=COORDINATES: profile:Aragorn:1.2.36;product=tRNA-Pro
+CVBT01000001	GenBank	exon	7167	7243	.	-	1	Parent=ERS325326_00005.r01;Name=ERS325326_00005
+##FASTA
+>CVBT01000001
+GTGATAGAGCGCAGTATCCGGCTGGCGCAGGAGAAATAAGACCCTGACGGCCCGGTGGCG
+TGGCGCCACCGGGCCTGGCTTGTTACTGCATTCCGCGTGTTTGTAAAAATAGAATGGTCG
+CCGCGACGCGCGAGCGCACGTTTAGCTTGCGAAGTAGATTACGAATGTGCACCTTAACCG
+TCTGCTCAGAAATATTCAGTACTGACGCAATCTGTTTGTTAGAAAGCCCCTGCGCCAGCT
+CATGCAAGACATCTAACTCTCGTTCGGTCAGGATACTAAAGGGATCTTCCTGCGCGCCAA
+ACCGTTCACGCTCACGCAGATATTCATTGACCCGATCGCTAAAGACTTTACCGCCGTTAG
+CGCCTTTGCGAATGGCCTCCAGCAGCACTTCCGGATCGCTATCTTTGAGCAGATAACCGT
+CGGCGCCGGCGTCGATCAGCGCGTAGATGTCGCTGGCGGAATCCGAGACGGTAAGAATAA
+TAATTTGCGCCGTCACACCATCCCGGCGTAGCGCGTTCAGGGTATCTAACCCGCTCAGAC
+CTTTCATATTCAGATCCAGCAGGATCAGATCGGGCTCAATGCGATTCGCCAGATCGATCG
+CACTCGCCCCGTCACCCGCTTCGGCAACGACATAAAAAGCCGGGTCCAGTTCCAGTAATT
+GACGAATACCTCGCCGCATAAGTGGATGGTCGTCCACAATAAGCACCTGAAAAGGTGTTA
+CTTCAGGCATGCTATATCTCCTGAGTTTTATTAGAATGATTATTGTTTTCAGCGCGGAAA
+TCGTCCAGATTTGCCACATTGTCAAGCGCGTAAATTTTACCTTAATTAGTGCATGGGTAA
+GTAGTAAAACCTGTGACAGGCGAGTCATTAGCTGAACGATAGAAAGGATAAAACAACCTG
+TTGTGAGAGAAAACAGAATACTCCCTGAAGTTGAATACTGGTCTGGACGCTTTTTTCGGA
+CAATGTGACATAACAGAAGTGTGGCATATGTAGCTGCGCCCTTGCGTCCGTAAGTCCAGA
+CTATCTTTACTGGCAGGAAATTCCTTATTAAAAGTCATAGCGTCTAAAGATTTTCATTAT
+TAACAAATTATTTTAATTGAATTTATTTATGGTTTTATTAATGCCAATGAAATACATCTG
+AGCAGAACGAATAAACCGCGACATCTGTTAAATTAAATATTAAGTCAATAGCAGAGGTAT
+TTTTATGCGATAAAGAGATTCTGACGATATTCTTATCATAAATGAAAATGTGGTCAGGAA
+TGAGCGTCATTCACTTATAACTTATCCTGGATGTGCCTGAACAACACTCGCGGCGTTTTG
+AGCACCCGTTGGGGCGACTCAAGGGGAGAACGTAGTGTGGATGCTATATCAGCCGTTTCT
+GTGAGCGTAAGCGTGGCGTAGAAAATTTTAAATATGTTAGCCGGTTAAAAATAACTATTG
+ACATTGAAATGGTGGTGGAGTGTATATGAAGAGAATATTTATATATCTATTATTACCTTG
+TGCATTCGCATGTTCTGCTAATGATAATGTTTTTTTGGCAAGGGCAACAAGCATCAGATC
+TCTTTTGCTGCGGGAGAAAGTATAAGAAGAGGAGGGGTTGAGCACTTATATACGGCTTTT
+CTGACATACAGTGAACCCAGCGATTTTTTCTTTTTACAGGCAAGAAATAATCTGGAGTTA
+GGAGGATTTAAGGCTAAGGGTAGCGATGATTGCAGTAAACATTCTGGCAGCGTTCCCTGT
+AATAAATATAACCAGGGCGTATTGGGTATCTCGAAGGATGTGGCGCTGGTTCATTTCGCT
+GGTATCTATACCGGTATTGGTCTGGGGGCTTATATAAAATCTAAGTCGCGAGATGATATG
+CGTGTCAATTCTGCATTTACCTTTGGAGAAAAAGCGTTTCTTGGCTGGAACTTTGGGGCT
+TTTTCTACAGAAGCTTATATCCGGCATTTCTCGAATGGATCACTTACGGATAAAAATTCA
+GGGCATAATTTTGTAGGTGCTTCAATTAGTTATAATTTCTGAAGTTGAATAACAATTAGC
+GAGTTGCTGGCTGAAGGAGTAATTAATCACCGTACGTTGTCTACAAAACGTGTTGTGAAT
+AGCCGATCGTTATCGACCCTATGGCGACAGGGAGGCTAATTGTTAGCGACAAAGGCGTGG
+TAGGCCGTTTACCTTACCAGAACGTTTTATTATTGCTGCGACAGATTGCTTACTCATTTA
+CCTGCCACTGCAAACATTTTTTCCAGTATAGGGTGTAGATGCGCTTTTTCTGTTTTAGTA
+CAAACATGGTTTACGTTAGTTGCCTGCATTGACTTTACTGTGACCACTGCACAGGAAAGA
+TGTCCAATCAGTTCATGCAAAATCAGTTCATGCAAAATAACGTCTCTGGCATCAGCTAAC
+TTTTTAATTGTAGGCATAAAAAAACCAGCCCTGACAGGCTGGTTTTCAAGGGGAATTTTG
+GTCGGCAGCTTCATCCGCAGCATCTGCGGAGAGCAGCGCAGGTACGGCGACCACAAAAGC
+CGGGGAGGCATCAGCCAGCGCGGCGTCGGCTGACACAGCCAGAACGGCGGCAGCCGCCTC
+GTCATCAGCATCGTAGGCATCCACCCACGCCGCCGCATCTGATACCAGCGCATCACTGGC
+AGCGCAAAGCAGAGCTGCAGCCGGAGAATCGGCAACCAGAGCCGAAGAGGCCGCGAAACG
+GGCCGAGGATATCGCAGACGTGATTTCCCTGGAAGATGCAAGCCTGACGAAAAAAGGTAT
+CGTTAAATTAAGCAGTGCCACGGACAGTGACAGCGAAGCGCTGGCGGCCACACCAAAGGC
+GGTAAAAACCGTTATGGGTGAGGTACGGACCAAAGCGCCGCTGGACAGCCCGGCATTCAC
+TGGAACGCCGACCACACCGACGCCGCCAGGCGATGCTAAAGGGCTTCAGACAACAAACGC
+GGAGTTTGTCCGCAAACTGATTGCCGCGCTGGTTGGTTCCGTACTGGAGCCACTGGACAC
+CCTGCAGGAACTGGCTGACGCGTTGGGAAATGATCCGAACTTTGCCACCACGGTACTGAA
+TAAACTGGCGGGCAAGCAGCCGCTGGACGAAACCCTGACGGCGCTGTCAGGAAAAAGCGT
+TGACGGTCTTATCGAATACGTTGGTTTGCGAGAAACCATAAGTCGTGCCGCCGATGCATT
+ACAAAAATCACAGAATGGCGGCGATATTCCGGACAAGGATTTGTTTGTGCGTCGTATCGG
+TGCCGCGCGAGCGTTTGATGGCGCAGTTATTATCGGCTGTGATGATAATCCGTGGACGAC
+GGCGGAGTTTATCGTCTGGCTGGAGTCTCAGGGCGCATTCAATCACCCTTACTGGATGTG
+TCGTGGCTCCTGGTCTTACGCTTATAACAAAATCATCACGGATACTGGCTGCGGTAATAT
+CTGTCTCGCTGGCGCAGTGATTGAGGTAATGGGAGTGCGTGGCGCGATGACTATTCGGGT
+GACAACGTCCCATTCAGTATCTGGTTGGTGATACGTGGGTGACAGCCCCAAGTGTATAAG
+AAGGAATAATTATGACAGCGGAAAAAAAATAAAAAGAACAAACAGTTTTTAAATATTAAA
+AATTTCATTCCGTATGCACCGGAACCAGATGACACATTATTCGCCGATGCGGCGTATCTT
+AAATCAGAGGATGGTCAGGACTGGTATGGGTGCCAGCAATTATTTTCAGCAGACACGCTG
+AAAATTACCTACGACGATAACGATGTTATTACGTGTATTACGCGTGATGTTTCCGGGCTG
+TGGCCTGCTGGCCAGAGCGTTGCAGAGTTGCCTGATACGGATGAAAACCGTCGCGCTGAT
+ATTCATGCTGCTGGCAGTTTAAAGACGGTAAAGTCGTTCAAAGGGTTTATTCGCCGGAAG
+AGCTGCGCAGGCAGGCAGAATCGAAAATTGAACGCCCGGGCGTTGATACCGGATGATCTG
+GTCATCGTGGAAAGCGACCCTGAAAAAATCGACACTTTAGCTGTAAAATGACAGTCCCGC
+CATCCGGTCATCATAACGGATTTTTCTTCTGCACCTTCTGAAGCCCGCCATGTCAGGACG
+ACCATGAATCCGCCGATAACCTTATTGTGAAATTAAGACCAGGAAGAGATGATGTCTGCC
+GGACAGATACTATATGTAAATTTATAAAGGTTTTTTGTTATGCCCTTTCATATTGGAAGC
+GGATGTCTTCCCGCCACCATCAGTAATCGCCGCATTTATCGTATTGCCTGGTCTGATACC
+CCCCCTGAAATGAGTTCCTGGGAAAAAATGAAGGAATTTTTTTGCTCAACGCACCAGACT
+GAAGCGCTGGAGTGCATCTGGACGATTTGTCACCCGCCGGCCGGAACGACGCGGGAGGAT
+GTGATCAACAGATTTGAACTGCTCAGGACGCTCGCGTATGCCGGATGGGAGGAAAGCATT
+CATTCCGGCCAGCACGGGGAAAATTACTTCTGTATTCTGGATGAAGACAGTCAGGAGATA
+TTGTCAGTCACCCTTGATGATGCCGGGAACTATACCGTAAATTGCCAGGGGTACAGTGAA
+ACACATCGCCTCACCCTGGACACAGCACAGGGTGAGGAGGGCACAGGACACGCGGAAGGG
+GCATCCGGGACATTCAGGACATCCTTCCTCCCTGCCACAACGGCTCCACAGACGCCAGCA
+GAGTATGATGCTGTCTGGTCAGCGTGGAGAAGGGCTGCACCCGCAGAAGAGTCACGCGGC
+CGTGCAGCAGCGGTACAGAAAATGCGTGCCTGCCTGAATAATGGCAATGCAGTGCTTAAC
+GTGGGAGAATCAGGTCTTACCACCTTGCCAGACTGTTTACCCGCGCATATTACCACACTG
+GTTATTCCTGATAATAATCTGACCAGCCTGCCGGCGCTGCCGCCAGAACTGCGGACGCTG
+GAGGTCTCTGGTAACCAGCTGACTAGCCTGCCGGTGCTGCCGCCAGGACTACTGGAACTG
+TCGATCTTTAGTAACCCGCTGACCCACCTGCCGGCGCTGCCGTCAGGACTATGTAAGCTG
+TGGATCTTTGGTAATCAACTGACCAGCCTGCCGGTGTTGCCGCCAGGGCTACAGGAGCTG
+TCGGTATCTGATAACCAACTGGCCAGCCTGCCGGCGCTGCCGTCAGAATTATGTAAGCTG
+TGGGCCTATAATAACCAGCTGACCAGCCTGCCGACGTTGCCGTCAGGGCTACAGGAGCTG
+TCGGTATCTGATAACCAACTGGCCAGCTTGCCGACGCTGCCGTCAGAATTATATAAGCTG
+TGGGCCTATAATAATCGGCTGACCAGCCTGCCGGCGTTGCCGTCAGGACTGAAGGAGCTG
+ATTGTATCTGGTAACCGGCTGACCAGTCTGCCGGTGCTGCCGTCAGAACTGAAGGAGCTG
+ATGGTATCTGGTAACCGGCTGACCAGCCTGCCGATGCTGCCGTCAGGACTACTGTCGCTG
+TCGGTCTATCGTAACCAGCTGACCCGCCTGCCGGAAAGTCTCATTCATCTGTCTTCAGAG
+ACAACCGTAAATCTGGAAGGGAACCCACTGTCTGAACGTACTTTGCAGGCGCTGCGGGAG
+ATCACCAGCGCGCCTGGCTATTCAGGCCCCATAATACGATTCGATATGGCGGGAGCCTCC
+GCCCCCCGGGAAACTCGGGCACTGCACCTGGCGGCCGCTGACTGGCTGGTGCCTGCCCGG
+GAGGGGGAACCGGCTCCTGCAGACAGATGGCATATGTTCGGACAGGAAGATAACGCCGAC
+GCCTTCAGCCTCTTCCTGGACAGACTGAGTGAGACGGAAAACTTCATAAAGGACGCGGGG
+TTTAAGGCACAGATATCGTCCTGGCTGGCACAACTGGCTGAAGATGAGGCGTTGAGAGCA
+AACACCTTTGCTATGGCAACAGAGGCAACCTCAAGCTGCGAGGACCGGGTCACATTTTTT
+TTGCACCAGATGAAGAACGTACAGCTGGTACATAATGCAGAAAAAGGGCAATACGATAAC
+GATCTCGCGGCGCTGGTTGCCACGGGGCGTGAGATGTTCCGTCTGGGAAAACTGGAACAG
+ATTGCCCGGGAAAAGGTCAGAACGCTGGCTCTCGTTGATGAAATTGAGGTCTGGCTGGCG
+TATCAGAATAAGCTGAAGAAATCACTCGGGCTGACCAGCGTGACGTCAGAAATGCGTTTC
+TTTGACGTATCCGGCGTGACGGTTACAGACCTTCAGGACGCGGAGCTTCAGGTGAAAGCC
+GCTGAAAAAAGCGAGTTCAGGGAGTGGATACTGCAGTGGGGGCCGTTACACAGAGTGCTG
+GAGCGCAAAGCGCCGAAACGCGTTAACGCGCTTCGTGAAAAGCAAATATCGGATTATGAG
+GAAACGTACCGGATGCTGTCTGACACAGAGCTGAGACCGTCTGGGCTGGTCGGTAATACC
+GATGCAGAGCGCACTATCGGAGCAAGAGCGATGGAGAGCGCGAAAAAGACATTTTTGGAT
+GGCCTGCGACCTCTTGTGGAGGAGATGCTGGGGAGCTATCTGAACGTTCAGTGGCGTCGT
+AACTGATGCACCAGGTGAATGAGGTGCGGTGCGACAAAGATATTCCCGGACGAACAACAT
+CAGACAGTACGGATGATGTACAGGTGAAATAGGGGAGACTTCTTCAGTCAGGGCGCGGCG
+CAACTTTTTCGATGATAACGCGCCGCGCGCCGGTAGCGAGAAGCCGATGGAAGTACTGGA
+TCACCTGAATGCAAAGAAGGGCAGGGGAAAGCTGTACTTTGCCGGGGAGGGTATCCAGCA
+ACAGTGGGCTATGAAGAGAAACATACTGTCATCCCGATATACCACCCGCTATGAAGACCT
+GCTTCAGGTTAAGTGACAGGTTTACCCTGATTTTCAAATTTCTGATGTGATGGTGTGCCG
+CAGCCACATTGTATGCAAGAACGGGCTGCGGCAAACTGGCGATCGTTCGATAGTGCGTGT
+ATTGAATAGTTACCAGTCGTGGCGGATTCTACTGGTTAAGGATGACTAATCAATGTATTT
+AAGTCATCTGTTCAACTTTTCGCGTTGGAGGGAGCTTGAAGTCAATTTGCAGTGACCTTC
+GATAGCTATTTCACCATCATTTCACCATTTGCAAATTGGATATATAAAAAAACCAGCCCT
+GGCAGGCTGGCTTTTAAGGGGAATTTTGGTCGGCACGAGAGGATTTGAACCTCCGACCCC
+CGACACCCCATGACGGTGCGCTACCAGGCTGCGCTACGTGCCGACTTGTGGGTGCTAATA
+CTACCTCTTTCCAACGCGAATGCAAGGGGAAACGGTGCTAACTGATTTATAATTAATCAG
+TTAGCGATAAAACGTTTCTCTTCTGTTAGCACTTGCAGCAGCAGACTGAGCTGCGGCTTC
+TGATCTTTGATCTTCTCGCCGTGTAAGTCATAGGTCTGATAATGACCGTTATTATTGAGC
+ACCAGGGTCATTTGCGGCGTCGTGATTGCCAGCGTACTGCCGTCTGCCGCCGTTACCCAG
+TTATGACGACGTGGAACGGTAAAGATGTCCTGGCCCTGTGAATATTCGTTCGCTGGCGTG
+CTGACATGTAACAGGCGTTGCATCAACGTGGTCATCACATCGGTATGATCGGTAAGCACA
+TTAATACGCTGCGCAGGCGTCCCCGGCCAGTGGATCACCAGCGGTACTTGCAGATGACCT
+TGCGACCAGTCGAAGCGATTTTCTTCCGGCGTCAACGGTATGCCGCGTCCTGCGGTAATG
+ATCACGACAGTATTGTCGAATTTACCGGCCTCTCGCAGCGCGTTCAGAACCCGATTGATT
+TGCGCATCCACGTCGCTGGCGGCACTGGCGTAACGCTTAACAAAATTTTTCTGGTTACTG
+TCATCAATGTTAGTACCGTTAAACGATATCCATGAGAACCAGCGGTTATCTTCCTGTGCG
+TAGCGCCCAAGCCAGTCTATCCACTGGCTGGCCGTTTGCGCATCAGACTGCGTTTGCGCT
+GCCGGCATCGAGAAATCGGACAGTAATGCCTGACGATAAAGCGGGCTGGCGAAGCCATCC
+GAAGAAAACAGTCCCAGTTGGTAGCCTTGCTGATTCAACGCGGTAATGAGCGCCGCAGGC
+GTTCTGGTGGATAACACGCCATCCATATAGCCTGGCGATATGCCATAGAACAGGCCGAAA
+ATACCGTTATCAGTGGTATTCCCTGAACTCATATGACGGGTGAAGTCGATGTTTTGCTCG
+GCAAATGTAGCCAGCTCCGGCATCTGCTTCTCAAAGCGAGAATAGTTCAGGCCGTCTACG
+GTAATGAGCAGCACATTCTGACCGGTGCCCATATCGCGATAGTGCAAATTGCTGAGCGGG
+TACTGAACGGAGACCGCTTCCGGATTGCCTTGCTCTACCAGACGGCGCTGATATTCCTGC
+GCATCCAGCAGACCGTGTTTTTCAAGAAAACGTCGCGCCGTCATCGGATAAGAGAGCGGC
+AGGTTTGCCCGCTGCATGGTAATCGGCCGGTAAAAATTAGCGTCTGCCCAGATGTAGATA
+AGATGCGATGCGATAAAGGAGACGAAAAAGAATGCCGCAAGCGGCCTTGCGAAATGGCGG
+CGGCGCGTGAGACTGCGCAGCTTTTGCCAACTCCATGTCGCAAATAGCATCTCAATCAAT
+AAGATAACTGGCACGCTAATAAACATAAGCTGCCAGTCGCGCGCCATTTCGTTCTGGTCA
+GGGTTGATGACCAGTTCCCAGACAATGGGGTTAAGATGCAGGTGAAAACGGGTAAAGACT
+TCGCTGTCGATAAGCAACAACGTCATACCCGCGGTCGCCAGAATGGCTGATAAAAACCGC
+ATCAGCCGCTGGGACATCACGATAAACGTGAGCGGAAAAAGAATAAGCAAATAGGTGGCG
+AACACCAGAAAGCTAAAGTGCCCGACAATGCTCAGGTAGGAGTATATGCGACCCGCAAGC
+GTTGTCGGCCAGTCGGCGACAAACAGGTAACGGCTGCCGAGCAACGTAGCCAACAGTATA
+TTGAACAGAGCGAACCAGTGCCCCCAGCTAACCATCTGGGAGACTTTTTCACGGTAGCGC
+TGACGATGAGTTACCATACCTGTCGCTTGTTTCTCCGGAGTCGGCTTTTTAGTGCGCGTT
+GTCTTCGCTAATCGAAGACTGCAAGGCGCGGGCAAAAGAGTTCGCAATCGCCTGGCGTTG
+AGCCGGCGCCACGCTGGTGTTGATAAGGTTGGTGACCATATTTCCCAGTACCATCAGGGA
+AAGATCGGTCGGCGCCTTATGTTTTTCCAGTACACTGAGCAGTTCGCTCAGCAGTTGTTC
+AACATGTTCATCACTATAGCGGGAGAGTTGTGGCATAAATCAAAATCTGTTTGTTCATGA
+AAGGGCAACATATTACCGTAGCAACAGTTTTTTTTCTGCATTTTTATCCCCTAAATCATT
+CGCGTTACGGATAACGCCTGAAATGACAGGGCGATTGCTTGCGTCTTCCGGCAGGCGGTG
+GTTGAATACCGCCCGGTCTTAAAGGAGAGTTTATCATGAGTCTGGATATCAACCAGATTG
+CCCTGCACCAGCTTATCAAGCGCGATGAGCAAAATCTTGAGCTGGTCTTGCGCGATTCAT
+TGCTGGAGCCGACAACCACCGTTGTCGAGATGGTGGCTGAACTGCATCGGGTCTATAGCG
+CCAAGAATAAGGCGTATGGCCTGTTTAATGAAGAGAGTGAACTGGCGCAAGCGCTGCGGT
+TGCAACGTCAGGGGGAAGAAGATTTTCTTGCCTTTAGCCGGGCGGCGACCGGACGCCTGC
+GTGACGAACTGGCGAAATATCCCTTTGCGGACGGCGGCATTGTATTGTTCTGCCATTATC
+GTTACCTGGCGGTGGAGTATCTGCTGGTTACGGTACTGAACAACCTGAGCAGTATGCGGG
+TCAATGAAAATCTGGACATTAACCCGACGCATTATCTTGATATCAACCATGCGGATATCG
+TGGCGCGTATCGATCTTACCGAGTGGGAAACTAATCCGCAATCGACCCGCTACCTGACGT
+TCCTGAAAGGTCGGGTAGGGCGCAAGGTCGCTGACTTCTTTATGGATTTCCTCGGCGCCA
+GCGAAGGGTTGAACGCCAAAGCGCAGAATCGCGGCCTGTTGCAGGCAGTGGATGATTTCA
+CCGCAGAAGCGCAGTTGGATAAAGCAGAACGTCAGAACGTGCGTCAGCAGGTGTACAGCT
+ACTGCAACGAGCAGTTACAAGCCGGGGAAGAGATTGAGCTGGAATCGCTGTCTAAAGAGC
+TTTCCGGCGTCAGTGAAGTCAGCTTCAGCGAATTTACCGCCGAAAAAGGCTATGAGCTGG
+AAGAGAGCTTCCCGGCAGATCGCAGTACGCTACGCCAGTTAACCAAATATGCCGGCAGCG
+GCGGCGGGTTAACGATTAACTTTGATGCGATGCTACTGGGCGAGCGGATTTTCTGGGACC
+CGGCGACCGATACCCTGACTATCAAAGGGACGCCGCCGAATTTGCGCGATCAGTTGCAGC
+GACGCACGTCGGGCGGGAAATAAAGGCATACGACATAAAAGGACGCCGTAGTGAGTAAAG
+GAATACGTCAGAGGCCGCCTGTTAATTTCAGTTTATTTCGGCAGGTTTTGTATGCGCACA
+TTGTCGCATTTTTGATGATGTTGATGCTCGGCATGGTGTTCACCGTTCTCTCGTTGGTAC
+TGTTTTATACTTATGGCGCCAACTGGTTACTGAGCTTGTTCATATGCCCGCTGTTCTTGT
+TAAGTGGTTTATTCATTACTGGATTTGCTTTTAAATCAACCTGGTCCAGCATACGTTACT
+ATTACGACAAGGGGCAGTTGAAACGGTATGGACTCAATCTTGATGCAACTTTGACGCATA
+AAGAGAAGGTGGAAATACGCATCGATAATGCAAAGCGTCAAGTGCGTGTAGATGAACTGG
+AACTCCATGTCTTATTTGATTTTCAGTTTGATAGCAAGACATGGAGCTGCGGCGACTTAC
+TGACCAATGAAAAGGTATTTGATGCGTTGAATGATGGGCAAACGATACCAATACGAATTT
+TGCCATGGAAACCGGAAAGTGCCAGCGTTCGTCAACGGGCATTATTCAATCGGCTTAAAG
+GTATGAATACCGCGGCGGAAACTACAGATCCTCGATTAGGGGAGGCGCTAATTGAATGTG
+GCGAGGTGTAAGAAAGCAGAAAAGCAAAGTGGGTTCTCGTTGCTCTGCATGTCGTCAAAT
+TCAATTAAACGCATAAAAAAACCCCGCCGGGGCGGGGTTTTTCTTCAACTTCCAGGCGAT
+TACGCGCGAACGAAGTCGATGTGAGTCAGCTTCGGTTTGTAAGCGTGACGCTGTACAGCC
+TGAGCTTTAACTTTTACTTCTTTACCGTCAACAACGAGGGTCAGAACTTCGCTGTAGAAT
+TCAGCTTTAGCTTGCATGTTCATCACCTGGTCGTGGTCCAGTTCGATAGCAATCGGGGCT
+TCAGAACCGCCGTAGATGATTGCCGGGAACTTGTTAGCGGCGCGCAGGCGGCGGCTCGCA
+CCCTTACCCTGCTCTTTACGTACTTCTGCGTTGATAGTAAACATTTAAATCTCTCTTTAA
+TAATTCCTGCTACAGGCGACCCAGCAACAGGTAAGTGATCTGCTTTGCGTATGCAAAGGC
+GGGCGGGATTCTATACTCAAATCGCCGTTACATCAATGAAAAGTACAATTAACCACGTAA
+TTCGTGCGCCCGGCGGAAGCGGCCTTCATAGTCGAACACTTTTTCACGCACCTGCCAGTA
+CTGGCCTTTCATCCGCGCGACCACAAAATCGGGATGCCGCAACAGCGCCTGCTGCGCGAC
+AATATCCGCCGCCGTGATCCAGCGTAAAGGAACGCCGGGCGTGCGCGTATGCGGGCGAAT
+AAATAGCTGTTCGAAAGCGGTACGCTGGGCGGGCGTGTGCAAGCGGAAGCGCTCACTGAC
+ATCCGCGCCGTCCTCGTCATAGTAAGTGATTTTCAGCCATTCGCCTTTCTCATCCTGCCC
+ATGCTGCATCGTCATTCCGCTACAGCGCAGGACTAACGCATCCTTGAGCCTGAGCGCCGC
+TTTTAACATATCGTCCGGGTCGACCAGAATGGCGTCACATTCCCGGCAGCGTCGGGCGGC
+AATATCGTTTTCGGCATTACACTGCGGGCAGTTTTTGAAGCGAAAGCGAAAATCGCACTG
+CTCGCGATGGCCGTCGTCATCCTCAAACCAGCCCTGGCAGCGACGGCCAAAGTGTTCAAT
+CAGCGTGCCGTCGGCAGTGGTTTTCCCCCAGAAGGTGTTGGCAAAGCCGCAGGCCGGGCA
+AAATACCTGGACAGGGACGTTATCGCTTTTTCCCTTCGGGCTACCGACCTCCGGGGCATA
+CAGGTCGTGCGGGTTGCCTGCGTAATCAAGAATCAGGCAATCGGTCTTTCCCGGCGCAAG
+GCGCAGACCACGCCCGACAATTTGTTGGTAAAGACTAACTGACTCCGTGGGACGTAGAAT
+CGCGATGAGATCAACGTGTGGGGCGTCAAAGCCGGTGGTCAGTACCGAGACGTTAACCAG
+ATAGCGAAAACGCTGCGCCTTGAAATTATCAATCAGCGCGTCGCGCTCGGGCCCTGGCGT
+ATCGCCGGTAATCAGCGCCGCGTCGTCCGCCGGAAGCAGACCGACAATCTCTTTCGCATG
+TTCGACCGTGGCGGCGAAAATCATCACGCCTTTGCGCGTTTGCGCAAATTCCATAATCTG
+GCTGATGATGTGCGGCGTAATCCGCTGCTGCTTTTTCAGCTCGCGGTTCAGGTCGGCTTC
+GCTGAACAGCCCATTGCTTTGGGCCTGCAGGCGGCTGAAATCGTATTGGACCACTGGCAT
+ATCAAGCCGCTCAGGCGGCGTCAGATAGCCGTGTTTAATCATATAGCGCAGCGGCAGTTC
+ATAAATACAGTCGCGAAACAGAGCGTTGTCGTTGCCGCGCACCATACCGTGATAATGAAA
+TTGATAAATCCAGCCTTTTCCGAGGCGAAAAGGCGTGGCGGTGAGTCCAAGCAGACGTAA
+GTGAGGATTAACTTTACTCAGGTGAGTGAGGATTTGCTGATACTGACTGTCTTCATCGTC
+ACCGATGCGGTGGCATTCATCGACAATCAACAGCGAAAACTCCTCCTGGAAGGCGTCAAG
+ATTACGCGCCACCGACTGTACGCTGCCGAACACGACTTTGCCCTGACTCTCTTTACGTTT
+GAGTCCGGCGGCGAAAATATCCGCTTCCAGCCCCAGCGCGCAATATTTGGCGTGGTTCTG
+CGCGACCAGCTCTTTCACATGCGCCAGCACCAGTACCCGTCCGCGGGCGACGCGCGCCAG
+TTCGGCGATCACCAGGCTTTTACCTGCGCCGGTCGGCAGAACAATCACGGCGGGCGTACG
+GTGGCGGCGAAAGTGGCTGAGCGTGGCGTCTACGGCTTCTTGCTGGTAGGGGCGGAGTGT
+AAAAATCATGGTCTCACTACGTTAAACGGTTCCGGGAATAGTATGCCATGAATCATTTCC
+CTTGAGGGATATAGTTAGCCCGCTATACTGAGCGGATAGCAATTCCCTTTTTTCGGGTAG
+AATGCCCGATTTCCGTATTATTACAGGCTAAATCACACACATGCGACTTGATAAATTTAT
+CGCTCAGCAGCTTGGCGTCAGCCGCGCTATTGCCGGGCGTGAAATTCGTGGTAACCGCGT
+TACCGTCGATGGCGACATCATTAAAAATGCGGCCTTCAAACTGCTCCCGGAACATGCGGT
+TGCGTATGACGGCAATCCCTTAGCGCAGCAACACGGGCCACGCTATTTTATGCTTAACAA
+GCCGCAGGGATACGTTTGTTCAACCGATGATCCCGATCATCCAACGGTGCTGTATTTCCT
+GGATGAGCCGGTGGCGTATAAGCTGCATGCCGCAGGACGTCTGGATATCGATACTACCGG
+TCTGGTGTTAATGACAGATGACGGTCAGTGGTCGCACCGCATTACGTCGCCGCGCCATCA
+CTGTGAAAAAACCTATCTGGTGACCCTGGAGTCGCCGGTGGCCGACGATACGGCAGCGCA
+ATTTGCGAAAGGCGTGCAGTTGCATAATGAAAAAGATCTCACTAAACCCGCTACGCTGGA
+GGTGATAACACCTGTGCAGGTCCGTCTGACCATCAGCGAAGGCCGTTATCATCAGGTGAA
+GCGGATGTTTGCCGCAGTAGGCAATCGCGTTGTGGAACTGCACCGCGAACGGATTGGCGC
+CATTACGCTGGATGAGAATCTGGCTCCCGGCGAGTACCGCCCGTTGACTGAAGAAGAAAT
+CGCCAGCGTCGGCTAACTATCTCGTTAAATTCAGGAGTTCGATGTGACCACCCGGCAGCA
+CTCTTCCTTTGCCATTGTCTTTATTCTTGGCCTGTTGGCCATGTTAATGCCGCTGTCGAT
+TGATATGTATCTTCCAGCGCTGCCGGTGATTTCTGCGCAATTCGGCGTGCCTGCCGGTAG
+CGCGCAGATGACGCTCAGCACCTATATTCTGGGGTTTGCGCTGGGTCAGCTTATCTATGG
+ACCGATGGCGGATAGCCTCGGGCGTAAGCCGGTCATCCTGGGCGGGACGCTGGTATTTGC
+CGCTGCGGCGGTCGCCTGTGCGTTGGCGCAGACTATCGATCAACTGATCGTGATGCGTTT
+CTTTCACGGTTTGGTGGCTGCGGCGGCAAGCGTCGTCATCAATGCGCTGATGCGGGATAT
+TTATCCAAAGGAAGAGTTTTCGCGCATGATGTCATTTGTCATGCTGGTCACTACGATAGC
+GCCGTTAATGGCGCCCATTGTTGGCGGCTGGGTGTTGGTATGGTTAAGCTGGCACTATAT
+CTTCTGGATACTGGCTATTGCGGCGATTCTGGCGTCAGTCATGATCTTTGCTTTGATTAA
+AGAGACGCTGCCCGTTGAGCGGCGTCAGCCTTTTCATATTCGTACCACAATAGGTAACTT
+TGCCGCGTTGTTTCGCCACAAACGCGTACTGAGCTATATGCTGGCGAGCGGGTTCAGTTT
+TGCCGGTATGTTCTCTTTTTTGAGCGCGGGGCCGTTTGTCTATATCGAAATTAATCATGT
+TCCGCCGCAGGATTTCGGCTACTACTTCGCGTTGAACATCGTATTTCTGTTTGTGATGAC
+GATTATCAACAGCCGTTTTGTCAGACGGGTAGGGGCGCTAAACATGTTTCGGGCCGGGCT
+ATGGATTCAGTTTGCGATGGCGGTGTGGATGGTTTTCAGCGCGCTGATGGGCATTGGATT
+CTGGGCGCTGGTGGTTGGCGTTGCGGCGTTTGTCGGCTGTGTGTCGATGGTTTCGTCCAA
+TGCGATGGCGGTCATTTTGGATGAGTTTCCGCATATGGCCGGAACGGCGTCTTCGTTGGC
+GGGCACTTTCCGCTTTGGTATTGGCGCTATCGTCGGCGCGTTGCTGTCGCTGGCTACCTT
+TAACAGTGCGTGGCCGATGATCTGGTCGATTGCGCTTTGCGCCGCCTGTTCCATTCTGTT
\ No newline at end of file
diff --git a/t/data/genbank_gbff/genbank3.gff.proteome.faa.expected b/t/data/genbank_gbff/genbank3.gff.proteome.faa.expected
new file mode 100644
index 0000000..0a7d914
--- /dev/null
+++ b/t/data/genbank_gbff/genbank3.gff.proteome.faa.expected
@@ -0,0 +1,26 @@
+>ERS325326_00001.p01
+MPEVTPFQVLIVDDHPLMRRGIRQLLELDPAFYVVAEAGDGASAIDLANRIEPDLILLDL
+NMKGLSGLDTLNALRRDGVTAQIIILTVSDSASDIYALIDAGADGYLLKDSDPEVLLEAI
+RKGANGGKVFSDRVNEYLRERERFGAQEDPFSILTERELDVLHELAQGLSNKQIASVLNI
+SEQTVKVHIRNLLRKLNVRSRVAATILFLQTRGMQ*
+>ERS325326_00003.p01
+VISLEDASLTKKGIVKLSSATDSDSEALAATPKAVKTVMGEVRTKAPLDSPAFTGTPTTP
+TPPGDAKGLQTTNAEFVRKLIAALVGSVLEPLDTLQELADALGNDPNFATTVLNKLAGKQ
+PLDETLTALSGKSVDGLIEYVGLRETISRAADALQKSQNGGDIPDKDLFVRRIGAARAFD
+GAVIIGCDDNPWTTAEFIVWLESQGAFNHPYWMCRGSWSYAYNKIITDTGCGNICLAGAV
+IEVMGVRGAMTIRVTTSHSVSGW*
+>ERS325326_00004.p01
+MPFHIGSGCLPATISNRRIYRIAWSDTPPEMSSWEKMKEFFCSTHQTEALECIWTICHPP
+AGTTREDVINRFELLRTLAYAGWEESIHSGQHGENYFCILDEDSQEILSVTLDDAGNYTV
+NCQGYSETHRLTLDTAQGEEGTGHAEGASGTFRTSFLPATTAPQTPAEYDAVWSAWRRAA
+PAEESRGRAAAVQKMRACLNNGNAVLNVGESGLTTLPDCLPAHITTLVIPDNNLTSLPAL
+PPELRTLEVSGNQLTSLPVLPPGLLELSIFSNPLTHLPALPSGLCKLWIFGNQLTSLPVL
+PPGLQELSVSDNQLASLPALPSELCKLWAYNNQLTSLPTLPSGLQELSVSDNQLASLPTL
+PSELYKLWAYNNRLTSLPALPSGLKELIVSGNRLTSLPVLPSELKELMVSGNRLTSLPML
+PSGLLSLSVYRNQLTRLPESLIHLSSETTVNLEGNPLSERTLQALREITSAPGYSGPIIR
+FDMAGASAPRETRALHLAAADWLVPAREGEPAPADRWHMFGQEDNADAFSLFLDRLSETE
+NFIKDAGFKAQISSWLAQLAEDEALRANTFAMATEATSSCEDRVTFFLHQMKNVQLVHNA
+EKGQYDNDLAALVATGREMFRLGKLEQIAREKVRTLALVDEIEVWLAYQNKLKKSLGLTS
+VTSEMRFFDVSGVTVTDLQDAELQVKAAEKSEFREWILQWGPLHRVLERKAPKRVNALRE
+KQISDYEETYRMLSDTELRPSGLVGNTDAERTIGARAMESAKKTFLDGLRPLVEEMLGSY
+LNVQWRRN*
diff --git a/t/data/genbank_gbff/genbank_gene_presence_absence.csv b/t/data/genbank_gbff/genbank_gene_presence_absence.csv
new file mode 100644
index 0000000..a3e7826
--- /dev/null
+++ b/t/data/genbank_gbff/genbank_gene_presence_absence.csv
@@ -0,0 +1,12 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","genbank1","genbank2","genbank3"
+"group_10","","side tail fiber protein","2","2","1","2","3","2","2","","791","791","791","","ERS325340_00003.p01","ERS325326_00003.p01"
+"group_11","","secreted effector protein","2","2","1","2","1","2","1","","2366","2366","2366","","ERS325340_00004.p01","ERS325326_00004.p01"
+"group_9","","transcriptional regulator NarP","2","2","1","2","2","2","3","","647","647","647","","ERS325340_00001.p01","ERS325326_00001.p01"
+"group_1","","2-dehydro-3-deoxyphosphooctonate aldolase","1","1","1","1","1","1","1","","854","854","854","ERS325254_00002.p01","",""
+"group_2","","regulator","1","1","1","1","2","1","8","","809","809","809","ERS325254_00003.p01","",""
+"group_3","","regulator","1","1","1","1","8","1","7","","389","389","389","ERS325254_00004.p01","",""
+"group_4","","N5-glutamine S-adenosyl-L-methionine-dependent methyltransferase","1","1","1","1","7","1","6","","833","833","833","ERS325254_00005.p01","",""
+"group_5","","peptide chain release factor 1 (RF-1)","1","1","1","1","6","1","5","","1082","1082","1082","ERS325254_00006.p01","",""
+"group_6","","glutamyl-tRNA reductase","1","1","1","1","5","1","4","","1256","1256","1256","ERS325254_00007.p01","",""
+"group_7","","outer membrane lipoprotein","1","1","1","1","4","1","3","","623","623","623","ERS325254_00008.p01","",""
+"group_8","","isopentenyl monophosphate kinase","1","1","1","1","3","1","2","","851","851","851","ERS325254_00009.p01","",""
diff --git a/t/data/gene_category_count.csv b/t/data/gene_category_count.csv
new file mode 100644
index 0000000..ef92db9
--- /dev/null
+++ b/t/data/gene_category_count.csv
@@ -0,0 +1 @@
+Gene,Non-unique Gene name,Annotation,No. isolates,No. sequences,Avg sequences per isolate,Genome Fragment,Order within Fragment,Accessory Fragment,Accessory Order with Fragment,QC,"Min group size nuc","Max group size nuc","Avg group size nuc",sample1,sample2,sample3,sample4,sample5,sample6,sample7,sample8,sample9,sample10,sample11,sample12,sample13,sample14,sample15,sample16,sample17,sample18,sample19,sample20,sample21,sample22,sample23,sample24,sample25,sample26,sample27,sample28,sample [...]
bcsC_1,,cellulose synthase subunit BcsC,9,9,1,11,5692,,,,,,,sample1_001,sample2_001,sample3_001,sample4_001,sample5_001,sample6_001,sample7_001,sample8_001,sample9_001,sample10_001,sample11_001,sample12_001,sample13_001,sample14_001,sample15_001,sample16_001,sample17_001,sample18_001,sample19_001,sample20_001,sample21_001,sample22_001,sample23_001,sample24_001,sample25_001,sample26_001,sample27_001,sample28_001,sample29_001,sample30_001
betC_2,,sulfatase,9,9,1,11,1876,,,,,,,,sample2_002,sample3_002,sample4_002,sample5_002,sample6_002,sample7_002,sample8_002,sample9_002,sample10_002,sample11_002,sample12_002,sample13_002,sample14_002,sample15_002,sample16_002,sample17_002,sample18_002,sample19_002,sample20_002,sample21_002,sample22_002,sample23_002,sample24_002,sample25_002,sample26_002,sample27_002,sample28_002,sample29_002,sample30_002
comM_2,,putative ATP-dependent protease,9,9,1,11,6422,,,,,,,,,sample3_003,sample4_003,sample5_003,sample6_003,sample7_003,sample8_003,sample9_003,sample10_003,sample11_003,sample12_003,sample13_003,sample14_003,sample15_003,sample16_003,sample17_003,sample18_003,sample19_003,sample20_003,sample21_003,sample22_003,sample23_003,sample24_003,sample25_003,sample26_003,sample27_003,sample28_003,sample29_003,sample30_003
dmsA4_1,,anaerobic dimethyl sulfoxide reductase subunit A,9,9,1,11,540,,,,,,,,,,sample4_004,sample5_004,sample6_004,sample7_004,sample8_004,sample9_004,sample10_004,sample11_004,sample12_004,sample13_004,sample14_004,sample15_004,sample16_004,sample17_004,sample18_004,sample19_004,sample20_004,sample21_004,sample22_004,sample23_004,sample24_004,sample25_004,sample26_004,sample27_004,sample28_004,sample29_004,sample30_004
dosC,,diguanylate cylase,9,9,1,11,909,,,,,,,,,,,sample5_005,sample6_005,sample7_005,sample8_005,sample9_005,sample10_005,sample11_005,sample12_005,sample13_005,sample14_005,sample15_005,sample16_005,sample17_005,sample18_005,sample19_005,sample20_005,sample21_005,sample22_005,sample23_005,sample24_005,sample25_005,sample26_005,sample27_005,sample28_005,sample29_005,sample30_005
dsbA_3,,Thiol:disulfide interchange protein,9,9,1,11,1676,,,,,,,,,,,,sample6_006,sample7_006,sample8_006,sample9_006,sample10_006,sample11_006,sample12_006,sample13_006,sample14_006,sample15_006,sample16_006,sample17_006,sample18_006,sample19_006,sample20_006,sample21_006,sample22_006,sample23_006,sample24_006,sample25_006,sample26_006,sample27_006,sample28_006,sample29_006,sample30_006
fadH_1,,2 4-dienoyl-CoA reductase,9,9,1,11,5287,,,,,,,,,,,,,sample7_007,sample8_007,sample9_007,sample10_007,sample11_007,sample12_007,sample13_007,sample14_007,sample15_007,sample16_007,sample17_007,sample18_007,sample19_007,sample20_007,sample21_007,sample22_007,sample23_007,sample24_007,sample25_007,sample26_007,sample27_007,sample28_007,sample29_007,sample30_007
fimD_3,,outer membrane usher protein FimD,9,9,1,11,1813,,,,,,,,,,,,,,sample8_008,sample9_008,sample10_008,sample11_008,sample12_008,sample13_008,sample14_008,sample15_008,sample16_008,sample17_008,sample18_008,sample19_008,sample20_008,sample21_008,sample22_008,sample23_008,sample24_008,sample25_008,sample26_008,sample27_008,sample28_008,sample29_008,sample30_008
fliB_2,,lysine-N-methylase,9,9,1,11,1159,,,,,,,,,,,,,,,sample9_009,sample10_009,sample11_009,sample12_009,sample13_009,sample14_009,sample15_009,sample16_009,sample17_009,sample18_009,sample19_009,sample20_009,sample21_009,sample22_009,sample23_009,sample24_009,sample25_009,sample26_009,sample27_009,sample28_009,sample29_009,sample30_009
fliF,,flagellar MS-ring protein,9,9,1,11,1176,,,,,,,,,,,,,,,,sample10_010,sample11_010,sample12_010,sample13_010,sample14_010,sample15_010,sample16_010,sample17_010,sample18_010,sample19_010,sample20_010,sample21_010,sample22_010,sample23_010,sample24_010,sample25_010,sample26_010,sample27_010,sample28_010,sample29_010,sample30_010
ftsN,,cell division protein FtsN,9,9,1,11,6144,,,,,,,,,,,,,,,,,sample11_011,sample12_011,sample13_011,sample14_011,sample15_011,sample16_011,sample17_011,sample18_011,sample19_011,sample20_011,sample21_011,sample22_011,sample23_011,sample24_011,sample25_011,sample26_011,sample27_011,sample28_011,sample29_011,sample30_011
gatY_1,,fructose-bisphosphate aldolase,9,9,1,11,5872,,,,,,,,,,,,,,,,,,sample12_012,sample13_012,sample14_012,sample15_012,sample16_012,sample17_012,sample18_012,sample19_012,sample20_012,sample21_012,sample22_012,sample23_012,sample24_012,sample25_012,sample26_012,sample27_012,sample28_012,sample29_012,sample30_012
glfT2,,glycosyltransferase cell wall biogenesis,9,9,1,11,4176,,,,,,,,,,,,,,,,,,,sample13_013,sample14_013,sample15_013,sample16_013,sample17_013,sample18_013,sample19_013,sample20_013,sample21_013,sample22_013,sample23_013,sample24_013,sample25_013,sample26_013,sample27_013,sample28_013,sample29_013,sample30_013
group_1000,,membrane protein,9,9,1,11,593,,,,,,,,,,,,,,,,,,,,sample14_014,sample15_014,sample16_014,sample17_014,sample18_014,sample19_014,sample20_014,sample21_014,sample22_014,sample23_014,sample24_014,sample25_014,sample26_014,sample27_014,sample28_014,sample29_014,sample30_014
group_1001,,lipoprotein,9,9,1,11,597,,,,,,,,,,,,,,,,,,,,,sample15_015,sample16_015,sample17_015,sample18_015,sample19_015,sample20_015,sample21_015,sample22_015,sample23_015,sample24_015,sample25_015,sample26_015,sample27_015,sample28_015,sample29_015,sample30_015
group_1004,,arylsulfate sulfotransferase,9,9,1,11,2834,,,,,,,,,,,,,,,,,,,,,,sample16_016,sample17_016,sample18_016,sample19_016,sample20_016,sample21_016,sample22_016,sample23_016,sample24_016,sample25_016,sample26_016,sample27_016,sample28_016,sample29_016,sample30_016
group_1006,,putative inner membrane protein,9,9,1,11,2791,,,,,,,,,,,,,,,,,,,,,,,sample17_017,sample18_017,sample19_017,sample20_017,sample21_017,sample22_017,sample23_017,sample24_017,sample25_017,sample26_017,sample27_017,sample28_017,sample29_017,sample30_017
group_1009,,Phage-like protein,9,9,1,11,6416,,,,,,,,,,,,,,,,,,,,,,,,sample18_018,sample19_018,sample20_018,sample21_018,sample22_018,sample23_018,sample24_018,sample25_018,sample26_018,sample27_018,sample28_018,sample29_018,sample30_018
group_220,,putative glycosyl transferase,9,9,1,11,4174,,,,,,,,,,,,,,,,,,,,,,,,,sample19_019,sample20_019,sample21_019,sample22_019,sample23_019,sample24_019,sample25_019,sample26_019,sample27_019,sample28_019,sample29_019,sample30_019
group_277,,arylsulfate sulfotransferase,9,9,1,11,6149,,,,,,,,,,,,,,,,,,,,,,,,,,sample20_020,sample21_020,sample22_020,sample23_020,sample24_020,sample25_020,sample26_020,sample27_020,sample28_020,sample29_020,sample30_020
group_281,,transport system periplasmic binding protein,9,9,1,11,6046,,,,,,,,,,,,,,,,,,,,,,,,,,,sample21_021,sample22_021,sample23_021,sample24_021,sample25_021,sample26_021,sample27_021,sample28_021,sample29_021,sample30_021
group_283,,periplasmic protein,9,9,1,11,6091,,,,,,,,,,,,,,,,,,,,,,,,,,,,sample22_022,sample23_022,sample24_022,sample25_022,sample26_022,sample27_022,sample28_022,sample29_022,sample30_022
group_284,hemN_2,coproporphyrinogen III oxidase,9,9,1,11,6093,,,,,,,,,,,,,,,,,,,,,,,,,,,,,sample23_023,sample24_023,sample25_023,sample26_023,sample27_023,sample28_023,sample29_023,sample30_023
group_288,,permease,9,9,1,11,5969,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,sample24_024,sample25_024,sample26_024,sample27_024,sample28_024,sample29_024,sample30_024
hemD,,uroporphyrinogen III synthase,9,9,1,11,2788,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,sample25_025,sample26_025,sample27_025,sample28_025,sample29_025,sample30_025
hsrA_2,,Inner membrane transport protein YieO,9,9,1,11,5993,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,sample26_026,sample27_026,sample28_026,sample29_026,sample30_026
icsA,,autotransporter MisL,9,9,1,11,5845,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,sample27_027,sample28_027,sample29_027,sample30_027
kdpD,,sensor protein KdpD,9,9,1,11,4196,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,sample28_028,sample29_028,sample30_028
ligB_1,,NAD-dependent DNA ligase LigB,9,9,1,11,5819,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,sample29_029,sample30_029
marT_1,,putative transcriptional regulator MarT,9,9,1,11,5849,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,sample30_030
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
\ No newline at end of file
diff --git a/t/data/group_1.fa.aln b/t/data/group_1.fa.aln
new file mode 100644
index 0000000..6f6e678
--- /dev/null
+++ b/t/data/group_1.fa.aln
@@ -0,0 +1,84 @@
+>abc_00004
+ATGAGTAAGAACATCACGAAAAATATAATTTTAACGACAACATTATTACTATTAGGTACT
+GTATTACCTCAAAATCAAAAACCAGTATTTAGTTTTTACTCTGAAGCTAAAGCTTATAGC
+ATTGGTCAAGATGAAACTAACATCAATGAATTAATTAAATATTACACACAGCCTCATTTT
+TCATTTTCAAATAAATGGCTATATCAATATGATAATGGAAACATTTATGTTGAACTTAAG
+AGATATTCATGGTCAGCACATATATCTTTATGGGGCGCTGAAAGTTGGGGAAATATTAAT
+CAGTTAAAAGATCGTTACGTAGATGTGTTTGGACTAAAAGACAAAGATACTGATCAGTTA
+TGGTGGTCTTATAGAGAGACATTTACAGGTGGCGTTACACCAGCCGCAAAACCTTCTGAT
+AAAACTTATAATCTTTTTGTGCAATACAAAGATAAACTACAAACGATTATTGGTGCGCAT
+AAAATATACCAAGGCAATAAACCAGTATTAACATTGAAAGAAATCGATTTCCGTGCACGA
+GAAGCGTTAATAAAAAATAAAATATTATATACCGAAAATCGTAATAAAGGTAAGCTTAAG
+ATCACCGGTGGCGGTAATAACTACACTATTGATTTAAGCAAAAGATTACATTCAGATCTA
+GCAAATGTTTATGTTAAAAATCCTAATAAAATAACTGTTGACGTC---CTC------TTT
+GATTAG
+>abc_00004
+ATGAGTAAGAACATCACGAAAAATATAATTTTAACGACAACATTATTACTATTAGGTACT
+GTATTACCTCAAAATCAAAAACCAGTATTTAGTTTTTACTCTGAAGCTAAAGCTTATAGC
+ATTGGTCAAGATGAAACTAACATCAATGAATTAATTAAATATTACACACAGCCTCATTTT
+TCATTTTCAAATAAATGGCTATATCAATATGATAATGGAAACATTTATGTTGAACTTAAG
+AGATATTCATGGTCAGCACATATATCTTTATGGGGCGCTGAAAGTTGGGGAAATATTAAT
+CAGTTAAAAGATCGTTACGTAGATGTGTTTGGACTAAAAGACAAAGATACTGATCAGTTA
+TGGTGGTCTTATAGAGAGACATTTACAGGTGGCGTTACACCAGCCGCAAAACCTTCTGAT
+AAAACTTATAATCTTTTTGTGCAATACAAAGATAAACTACAAACGATTATTGGTGCGCAT
+AAAATATACCAAGGCAATAAACCAGTATTAACATTGAAAGAAATCGATTTCCGTGCACGA
+GAAGCGTTAATAAAAAATAAAATATTATATACCGAAAATCGTAATAAAGGTAAGCTTAAG
+ATCACCGGTGGCGGTAATAACTACACTATTGATTTAAGCAAAAGATTACATTCAGATCTA
+GCAAATGTTTATGTTAAAAATCCTAATAAAATAACTGTTGACGTC---CTC------TTT
+GATTAG
+>abc_00004
+ATGAGTAAGAACATCACGAAAAATATAATTTTAACGACAACATTATTACTATTAGGTACT
+GTATTACCTCAAAATCAAAAACCAGTATTTAGTTTTTACTCTGAAGCTAAAGCTTATAGC
+ATTGGTCAAGATGAAACTAACATCAATGAATTAATTAAATATTACACACAGCCTCATTTT
+TCATTTTCAAATAAATGGCTATATCAATATGATAATGGAAACATTTATGTTGAACTTAAG
+AGATATTCATGGTCAGCACATATATCTTTATGGGGCGCTGAAAGTTGGGGAAATATTAAT
+CAGTTAAAAGATCGTTACGTAGATGTGTTTGGACTAAAAGACAAAGATACTGATCAGTTA
+TGGTGGTCTTATAGAGAGACATTTACAGGTGGCGTTACACCAGCCGCAAAACCTTCTGAT
+AAAACTTATAATCTTTTTGTGCAATACAAAGATAAACTACAAACGATTATTGGTGCGCAT
+AAAATATACCAAGGCAATAAACCAGTATTAACATTGAAAGAAATCGATTTCCGTGCACGA
+GAAGCGTTAATAAAAAATAAAATATTATATACCGAAAATCGTAATAAAGGTAAGCTTAAG
+ATCACCGGTGGCGGTAATAACTACACTATTGATTTAAGCAAAAGATTACATTCAGATCTA
+GCAAATGTTTATGTTAAAAATCCTAATAAAATAACTGTTGACGTC---CTC------TTT
+GATTAG
+>abc_00006
+ATGAAAAAGAACATCATGAATAAATTAGTTTTATCAACAGCATTGTTACTTTTAGGAACC
+ACATCAACACAACTTCCTAAAACACCAATCAGTTTTTCATCTGAAGCAAAAGCCTATAAT
+ATCAGTGAAAACGAGACTAATATCAATGAGTTAATCAAATATTACACTCAGCCGCATTTT
+TCATTATCTGGAAAATGGCTATGGCAAAAGCCCAATGGTAGCATTCATGCAACATTGCAA
+ACGTGGGTTTGGTATAGTCATATTCAAGTGTTTGGATCCGAGAGTTGGGGAAACATTAAT
+CAGTTAAGAAATAAATACGTTGATATATTTGGAACTAAAGATGAGGACACAGTTGAAGGT
+TACTGGACTTATGATGAAACATTTACTGGTGGTGTTACGCCAGCAGCTACTTCATCTGAT
+AAACCTTATAGACTATTTTTAAAATATAGTGATAAACAACAAACTATCATCGGTGGACAT
+GAATTTTACAAAGGAAATAAACCAGTATTAACTTTAAAAGAATTAGATTTCCGTATTCGT
+CAAACATTAATAAAGAATAAAAAGTTATATAACGGAGAATTTAATAAAGGTCAAATTAAG
+ATAACTGCTGATGGAAATAATTACACGATTGATTTAAGTAAAAAGTTAAAATTAACTGAC
+ACAAACCGTTATGTTAAAAATCCTAAAAATGCACAAATTGAAGTCATACTCGAAAAATCT
+AACTAA
+>abc_00006
+ATGAAAAAGAACATCATGAATAAATTAGTTTTATCAACAGCATTGTTACTTTTAGGAACC
+ACATCAACACAACTTCCTAAAACACCAATCAGTTTTTCATCTGAAGCAAAAGCCTATAAT
+ATCAGTGAAAACGAGACTAATATCAATGAGTTAATCAAATATTACACTCAGCCGCATTTT
+TCATTATCTGGAAAATGGCTATGGCAAAAGCCCAATGGTAGCATTCATGCAACATTGCAA
+ACGTGGGTTTGGTATAGTCATATTCAAGTGTTTGGATCCGAGAGTTGGGGAAACATTAAT
+CAGTTAAGAAATAAATACGTTGATATATTTGGAACTAAAGATGAGGACACAGTTGAAGGT
+TACTGGACTTATGATGAAACATTTACTGGTGGTGTTACGCCAGCAGCTACTTCATCTGAT
+AAACCTTATAGACTATTTTTAAAATATAGTGATAAACAACAAACTATCATCGGTGGACAT
+GAATTTTACAAAGGAAATAAACCAGTATTAACTTTAAAAGAATTAGATTTCCGTATTCGT
+CAAACATTAATAAAGAATAAAAAGTTATATAACGGAGAATTTAATAAAGGTCAAATTAAG
+ATAACTGCTGATGGAAATAATTACACGATTGATTTAAGTAAAAAGTTAAAATTAACTGAC
+ACAAACCGTTATGTTAAAAATCCTAAAAATGCACAAATTGAAGTCATACTCGAAAAATCT
+AACTAA
+>abc_00006
+ATGAAAAAGAACATCATGAATAAATTAGTTTTATCAACAGCATTGTTACTTTTAGGAACC
+ACATCAACACAACTTCCTAAAACACCAATCAGTTTTTCATCTGAAGCAAAAGCCTATAAT
+ATCAGTGAAAACGAGACTAATATCAATGAGTTAATCAAATATTACACTCAGCCGCATTTT
+TCATTATCTGGAAAATGGCTATGGCAAAAGCCCAATGGTAGCATTCATGCAACATTGCAA
+ACGTGGGTTTGGTATAGTCATATTCAAGTGTTTGGATCCGAGAGTTGGGGAAACATTAAT
+CAGTTAAGAAATAAATACGTTGATATATTTGGAACTAAAGATGAGGACACAGTTGAAGGT
+TACTGGACTTATGATGAAACATTTACTGGTGGTGTTACGCCAGCAGCTACTTCATCTGAT
+AAACCTTATAGACTATTTTTAAAATATAGTGATAAACAACAAACTATCATCGGTGGACAT
+GAATTTTACAAAGGAAATAAACCAGTATTAACTTTAAAAGAATTAGATTTCCGTATTCGT
+CAAACATTAATAAAGAATAAAAAGTTATATAACGGAGAATTTAATAAAGGTCAAATTAAG
+ATAACTGCTGATGGAAATAATTACACGATTGATTTAAGTAAAAAGTTAAAATTAACTGAC
+ACAAACCGTTATGTTAAAAATCCTAAAAATGCACAAATTGAAGTCATACTCGAAAAATCT
+AACTAA
diff --git a/t/data/group_9.fa b/t/data/group_9.fa
new file mode 100644
index 0000000..f1e867b
--- /dev/null
+++ b/t/data/group_9.fa
@@ -0,0 +1,15 @@
+>abc_00010
+ATGACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTT
+AGTAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTG
+CTAGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTA
+CCTTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAA
+>abc_00010
+ATGACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTT
+AGTAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTG
+CTAGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTA
+CCTTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAA
+>abc_00010
+ATGACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTT
+AGTAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTG
+CTAGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTA
+CCTTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAA
diff --git a/t/data/input_accessory_binary.fa b/t/data/input_accessory_binary.fa
new file mode 100644
index 0000000..ae3b208
--- /dev/null
+++ b/t/data/input_accessory_binary.fa
@@ -0,0 +1,20 @@
+>seq1
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+>seq2
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC
+>seq3
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACC
+>seq4
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACCC
+>seq5
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACCCC
+>seq6
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACCCCC
+>seq7
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACCCCCC
+>seq8
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACCCCCCC
+>seq9
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCC
+>seq10
+AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCCC
diff --git a/t/data/input_block_spreadsheet.csv b/t/data/input_block_spreadsheet.csv
new file mode 100644
index 0000000..1ebd7ca
--- /dev/null
+++ b/t/data/input_block_spreadsheet.csv
@@ -0,0 +1,52 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","oneblock","oneblockrev","contigwithgaps","nocontigs","threeblocks","threeblocksinversion"
+"bcsC_1","","cellulose synthase subunit BcsC","9","9","1","11","5692","","","",,,,"oneblock_0031","oneblockrev_0020","contigwithgaps_0121","nocontigs_0331","threeblocks_0131","threeblocksinversion_0031"
+"betC_2","","sulfatase","9","9","1","11","1876","","","",,,,"oneblock_0012","oneblockrev_0039","contigwithgaps_0045","nocontigs_0122","threeblocks_0112","threeblocksinversion_0029"
+"comM_2","","putative ATP-dependent protease","9","9","1","11","6422","","","",,,,"oneblock_0050","oneblockrev_0001","contigwithgaps_0197","nocontigs_0540","threeblocks_0250","threeblocksinversion_0050"
+"dmsA4_1","","anaerobic dimethyl sulfoxide reductase subunit A","9","9","1","11","540","","","",,,,"oneblock_0001","oneblockrev_0050","contigwithgaps_0001","nocontigs_0001","threeblocks_0001","threeblocksinversion_0001"
+"dosC","","diguanylate cylase","9","9","1","11","909","","","",,,,"oneblock_0005","oneblockrev_0046","contigwithgaps_0017","nocontigs_0045","threeblocks_0005","threeblocksinversion_0005"
+"dsbA_3","","Thiol:disulfide interchange protein","9","9","1","11","1676","","","",,,,"oneblock_0009","oneblockrev_0042","contigwithgaps_0033","nocontigs_0089","threeblocks_0009","threeblocksinversion_0009"
+"fadH_1","","2 4-dienoyl-CoA reductase","9","9","1","11","5287","","","",,,,"oneblock_0029","oneblockrev_0022","contigwithgaps_0113","nocontigs_0309","threeblocks_0129","threeblocksinversion_0012"
+"fimD_3","","outer membrane usher protein FimD","9","9","1","11","1813","","","",,,,"oneblock_0011","oneblockrev_0040","contigwithgaps_0041","nocontigs_0111","threeblocks_0111","threeblocksinversion_0030"
+"fliB_2","","lysine-N-methylase","9","9","1","11","1159","","","",,,,"oneblock_0007","oneblockrev_0044","contigwithgaps_0025","nocontigs_0067","threeblocks_0007","threeblocksinversion_0007"
+"fliF","","flagellar MS-ring protein","9","9","1","11","1176","","","",,,,"oneblock_0008","oneblockrev_0043","contigwithgaps_0029","nocontigs_0078","threeblocks_0008","threeblocksinversion_0008"
+"ftsN","","cell division protein FtsN","9","9","1","11","6144","","","",,,,"oneblock_0047","oneblockrev_0004","contigwithgaps_0185","nocontigs_0507","threeblocks_0247","threeblocksinversion_0047"
+"gatY_1","","fructose-bisphosphate aldolase","9","9","1","11","5872","","","",,,,"oneblock_0039","oneblockrev_0012","contigwithgaps_0153","nocontigs_0419","threeblocks_0239","threeblocksinversion_0039"
+"glfT2","","glycosyltransferase cell wall biogenesis","9","9","1","11","4176","","","",,,,"oneblock_0022","oneblockrev_0029","contigwithgaps_0085","nocontigs_0232","threeblocks_0122","threeblocksinversion_0019"
+"group_1000","","membrane protein","9","9","1","11","593","","","",,,,"oneblock_0002","oneblockrev_0049","contigwithgaps_0005","nocontigs_0012","threeblocks_0002","threeblocksinversion_0002"
+"group_1001","","lipoprotein","9","9","1","11","597","","","",,,,"oneblock_0003","oneblockrev_0048","contigwithgaps_0009","nocontigs_0023","threeblocks_0003","threeblocksinversion_0003"
+"group_1004","","arylsulfate sulfotransferase","9","9","1","11","2834","","","",,,,"oneblock_0019","oneblockrev_0032","contigwithgaps_0073","nocontigs_0199","threeblocks_0119","threeblocksinversion_0022"
+"group_1006","","putative inner membrane protein","9","9","1","11","2791","","","",,,,"oneblock_0017","oneblockrev_0034","contigwithgaps_0065","nocontigs_0177","threeblocks_0117","threeblocksinversion_0024"
+"group_1009","","Phage-like protein","9","9","1","11","6416","","","",,,,"oneblock_0049","oneblockrev_0002","contigwithgaps_0193","nocontigs_0529","threeblocks_0249","threeblocksinversion_0049"
+"group_220","","putative glycosyl transferase","9","9","1","11","4174","","","",,,,"oneblock_0021","oneblockrev_0030","contigwithgaps_0081","nocontigs_0221","threeblocks_0121","threeblocksinversion_0020"
+"group_277","","arylsulfate sulfotransferase","9","9","1","11","6149","","","",,,,"oneblock_0048","oneblockrev_0003","contigwithgaps_0189","nocontigs_0518","threeblocks_0248","threeblocksinversion_0048"
+"group_281","","transport system periplasmic binding protein","9","9","1","11","6046","","","",,,,"oneblock_0043","oneblockrev_0008","contigwithgaps_0169","nocontigs_0463","threeblocks_0243","threeblocksinversion_0043"
+"group_283","","periplasmic protein","9","9","1","11","6091","","","",,,,"oneblock_0044","oneblockrev_0007","contigwithgaps_0173","nocontigs_0474","threeblocks_0244","threeblocksinversion_0044"
+"group_284","hemN_2","coproporphyrinogen III oxidase","9","9","1","11","6093","","","",,,,"oneblock_0045","oneblockrev_0006","contigwithgaps_0177","nocontigs_0485","threeblocks_0245","threeblocksinversion_0045"
+"group_288","","permease","9","9","1","11","5969","","","",,,,"oneblock_0041","oneblockrev_0010","contigwithgaps_0161","nocontigs_0441","threeblocks_0241","threeblocksinversion_0041"
+"hemD","","uroporphyrinogen III synthase","9","9","1","11","2788","","","",,,,"oneblock_0016","oneblockrev_0035","contigwithgaps_0061","nocontigs_0166","threeblocks_0116","threeblocksinversion_0025"
+"hsrA_2","","Inner membrane transport protein YieO","9","9","1","11","5993","","","",,,,"oneblock_0042","oneblockrev_0009","contigwithgaps_0165","nocontigs_0452","threeblocks_0242","threeblocksinversion_0042"
+"icsA","","autotransporter MisL","9","9","1","11","5845","","","",,,,"oneblock_0035","oneblockrev_0016","contigwithgaps_0137","nocontigs_0375","threeblocks_0235","threeblocksinversion_0035"
+"kdpD","","sensor protein KdpD","9","9","1","11","4196","","","",,,,"oneblock_0023","oneblockrev_0028","contigwithgaps_0089","nocontigs_0243","threeblocks_0123","threeblocksinversion_0018"
+"ligB_1","","NAD-dependent DNA ligase LigB","9","9","1","11","5819","","","",,,,"oneblock_0034","oneblockrev_0017","contigwithgaps_0133","nocontigs_0364","threeblocks_0234","threeblocksinversion_0034"
+"marT_1","","putative transcriptional regulator MarT","9","9","1","11","5849","","","",,,,"oneblock_0036","oneblockrev_0015","contigwithgaps_0141","nocontigs_0386","threeblocks_0236","threeblocksinversion_0036"
+"nepI","","inner membrane transport protein","9","9","1","11","5866","","","",,,,"oneblock_0038","oneblockrev_0013","contigwithgaps_0149","nocontigs_0408","threeblocks_0238","threeblocksinversion_0038"
+"rffH","","glucose-1-phosphate thymidylyltransferase","9","9","1","11","2778","","","",,,,"oneblock_0015","oneblockrev_0036","contigwithgaps_0057","nocontigs_0155","threeblocks_0115","threeblocksinversion_0026"
+"rpoS","","RNA polymerase sigma factor RpoS","9","9","1","11","4905","","","",,,,"oneblock_0028","oneblockrev_0023","contigwithgaps_0109","nocontigs_0298","threeblocks_0128","threeblocksinversion_0013"
+"SBOV29371","","putative cytoplasmic protein","9","9","1","11","4890","","","",,,,"oneblock_0027","oneblockrev_0024","contigwithgaps_0105","nocontigs_0287","threeblocks_0127","threeblocksinversion_0014"
+"SBOV38871","","integral membrane protein","9","9","1","11","5890","","","",,,,"oneblock_0040","oneblockrev_0011","contigwithgaps_0157","nocontigs_0430","threeblocks_0240","threeblocksinversion_0040"
+"SBOV43201","","putative cytoplasmic protein","9","9","1","11","611","","","",,,,"oneblock_0004","oneblockrev_0047","contigwithgaps_0013","nocontigs_0034","threeblocks_0004","threeblocksinversion_0004"
+"selA_1","","transferase","9","9","1","11","5858","","","",,,,"oneblock_0037","oneblockrev_0014","contigwithgaps_0145","nocontigs_0397","threeblocks_0237","threeblocksinversion_0037"
+"speC_3","","Ornithine decarboxylase inducible","9","9","1","11","4201","","","",,,,"oneblock_0024","oneblockrev_0027","contigwithgaps_0093","nocontigs_0254","threeblocks_0124","threeblocksinversion_0017"
+"sptP","sptx","pathogenicity island 1 effector protein StpP","9","9","1","11","4859","","","",,,,"oneblock_0026","oneblockrev_0025","contigwithgaps_0101","nocontigs_0276","threeblocks_0126","threeblocksinversion_0015"
+"srgB","","putative outer membrane protein","9","9","1","11","1678","","","",,,,"oneblock_0010","oneblockrev_0041","contigwithgaps_0037","nocontigs_0100","threeblocks_0010","threeblocksinversion_0010"
+"stp","","export protein","9","9","1","11","1037","","","",,,,"oneblock_0006","oneblockrev_0045","contigwithgaps_0021","nocontigs_0056","threeblocks_0006","threeblocksinversion_0006"
+"STY3593","","putative regulatory protein","9","9","1","11","2820","","","",,,,"oneblock_0018","oneblockrev_0033","contigwithgaps_0069","nocontigs_0188","threeblocks_0118","threeblocksinversion_0023"
+"STY4162","","putative membrane protein","9","9","1","11","5712","","","",,,,"oneblock_0032","oneblockrev_0019","contigwithgaps_0125","nocontigs_0342","threeblocks_0232","threeblocksinversion_0032"
+"tmcA","","methionine tRNA cytidine acetyltransferase","9","9","1","11","2199","","","",,,,"oneblock_0013","oneblockrev_0038","contigwithgaps_0049","nocontigs_0133","threeblocks_0113","threeblocksinversion_0028"
+"tub","","permease","9","9","1","11","2418","","","",,,,"oneblock_0014","oneblockrev_0037","contigwithgaps_0053","nocontigs_0144","threeblocks_0114","threeblocksinversion_0027"
+"yadA","","membrane protein","9","9","1","11","5768","","","",,,,"oneblock_0033","oneblockrev_0018","contigwithgaps_0129","nocontigs_0353","threeblocks_0233","threeblocksinversion_0033"
+"ybbW_1","","allantoin permease","9","9","1","11","4372","","","",,,,"oneblock_0025","oneblockrev_0026","contigwithgaps_0097","nocontigs_0265","threeblocks_0125","threeblocksinversion_0016"
+"yhaO_2","","membrane protein","9","9","1","11","5310","","","",,,,"oneblock_0030","oneblockrev_0021","contigwithgaps_0117","nocontigs_0320","threeblocks_0130","threeblocksinversion_0011"
+"yicJ_1","","sodium:galactoside symporter","9","9","1","11","6117","","","",,,,"oneblock_0046","oneblockrev_0005","contigwithgaps_0181","nocontigs_0496","threeblocks_0246","threeblocksinversion_0046"
+"yigZ","","protein co-occurring with transport systems","9","9","1","11","2838","","","",,,,"oneblock_0020","oneblockrev_0031","contigwithgaps_0077","nocontigs_0210","threeblocks_0120","threeblocksinversion_0021"
+,
\ No newline at end of file
diff --git a/t/data/kraken_report.txt b/t/data/kraken_report.txt
new file mode 100644
index 0000000..d1c2053
--- /dev/null
+++ b/t/data/kraken_report.txt
@@ -0,0 +1,19 @@
+  0.39	86	86	U	0	unclassified
+ 99.61	21911	0	-	1	root
+ 99.61	21911	0	-	131567	  cellular organisms
+ 99.61	21911	3	D	2	    Bacteria
+ 99.60	21908	1	P	1224	      Proteobacteria
+ 99.59	21907	9	C	28211	        Alphaproteobacteria
+ 99.55	21898	11	O	356	          Rhizobiales
+ 99.50	21887	25	F	118882	            Brucellaceae
+ 99.39	21862	20467	G	234	              Brucella
+  6.28	1381	1349	S	120577	                Brucella ceti
+  0.10	21	21	-	1423891	                  Brucella ceti TE10759-12
+  0.05	11	11	-	1407053	                  Brucella ceti TE28753-12
+  0.04	9	0	S	120576	                Brucella pinnipedialis
+  0.04	9	9	-	520461	                  Brucella pinnipedialis B2/94
+  0.01	3	0	S	444163	                Brucella microti
+  0.01	3	3	-	568815	                  Brucella microti CCM 4915
+  0.00	1	0	S	236	                Brucella ovis
+  0.00	1	1	-	444178	                  Brucella ovis ATCC 25840
+  0.00	1	1	S	29461	                Brucella suis
\ No newline at end of file
diff --git a/t/data/kraken_test/database.idx b/t/data/kraken_test/database.idx
new file mode 100644
index 0000000..79a2451
Binary files /dev/null and b/t/data/kraken_test/database.idx differ
diff --git a/t/data/kraken_test/database.jdb b/t/data/kraken_test/database.jdb
new file mode 100644
index 0000000..f9ed7b5
Binary files /dev/null and b/t/data/kraken_test/database.jdb differ
diff --git a/t/data/kraken_test/database.kdb b/t/data/kraken_test/database.kdb
new file mode 100644
index 0000000..14a89d0
Binary files /dev/null and b/t/data/kraken_test/database.kdb differ
diff --git a/t/data/kraken_test/taxonomy/names.dmp b/t/data/kraken_test/taxonomy/names.dmp
new file mode 100644
index 0000000..120d91a
--- /dev/null
+++ b/t/data/kraken_test/taxonomy/names.dmp
@@ -0,0 +1,77 @@
+1	|	all	|		|	synonym	|
+1	|	root	|		|	scientific name	|
+2	|	Bacteria	|	Bacteria <prokaryote>	|	scientific name	|
+2	|	Monera	|	Monera <Bacteria>	|	in-part	|
+2	|	Procaryotae	|	Procaryotae <Bacteria>	|	in-part	|
+2	|	Prokaryota	|	Prokaryota <Bacteria>	|	in-part	|
+2	|	Prokaryotae	|	Prokaryotae <Bacteria>	|	in-part	|
+2	|	bacteria	|	bacteria <blast2>	|	blast name	|
+2	|	eubacteria	|		|	genbank common name	|
+2	|	not Bacteria Haeckel 1894	|		|	synonym	|
+2	|	prokaryote	|	prokaryote <Bacteria>	|	in-part	|
+2	|	prokaryotes	|	prokaryotes <Bacteria>	|	in-part	|
+1239	|	Bacillus/Clostridium group	|		|	synonym	|
+1239	|	Clostridium group firmicutes	|		|	synonym	|
+1239	|	Firmacutes	|		|	synonym	|
+1239	|	Firmicutes	|		|	scientific name	|
+1239	|	Firmicutes corrig. Gibbons and Murray 1978	|		|	authority	|
+1239	|	Gram positive bacteria	|		|	misspelling	|
+1239	|	Gram-positive bacteria	|		|	genbank common name	|
+1239	|	Low G+C firmicutes	|		|	synonym	|
+1239	|	clostridial firmicutes	|		|	synonym	|
+1239	|	firmicutes	|	firmicutes <blast1239>	|	blast name	|
+1239	|	low G+C Gram-positive bacteria	|		|	common name	|
+1239	|	low GC Gram+	|	low GC gram-positives<blast31968>	|	common name	|
+1279	|	"Aurococcus" Winslow and Rogers 1906	|		|	authority	|
+1279	|	Aurococcus	|		|	synonym	|
+1279	|	Staphylococcus	|		|	scientific name	|
+1279	|	Staphylococcus Rosenbach 1884	|		|	authority	|
+1280	|	"Micrococcus aureus" (Rosenbach 1884) Zopf 1885	|		|	authority	|
+1280	|	"Micrococcus pyogenes" Lehmann and Neumann 1896	|		|	authority	|
+1280	|	"Staphlococcus pyogenes citreus" Passet 1885	|		|	authority	|
+1280	|	"Staphylococcus pyogenes aureus" Rosenbach 1884	|		|	authority	|
+1280	|	ATCC 12600	|		|	type material	|
+1280	|	ATCC 12600-U	|		|	type material	|
+1280	|	CCM 885	|		|	type material	|
+1280	|	CCUG 1800	|		|	type material	|
+1280	|	CIP 65.8	|		|	type material	|
+1280	|	DSM 20231	|		|	type material	|
+1280	|	HAMBI 66	|		|	type material	|
+1280	|	JCM 20624	|		|	type material	|
+1280	|	Micrococcus aureus	|		|	synonym	|
+1280	|	Micrococcus pyogenes	|		|	synonym	|
+1280	|	NBRC 100910	|		|	type material	|
+1280	|	NCAIM B.01065	|		|	type material	|
+1280	|	NCCB 72047	|		|	type material	|
+1280	|	NCTC 8532	|		|	type material	|
+1280	|	Staphilococcus aureus	|		|	misspelling	|
+1280	|	Staphlococcus pyogenes citreus	|		|	synonym	|
+1280	|	Staphylococcus aureus	|		|	scientific name	|
+1280	|	Staphylococcus aureus Rosenbach 1884	|		|	authority	|
+1280	|	Staphylococcus pyogenes aureus	|		|	synonym	|
+1280	|	Staphylococus aureus	|		|	misspelling	|
+1280	|	Streptococcus aureus	|		|	misnomer	|
+1385	|	Bacillales	|		|	scientific name	|
+1385	|	Bacillales Prevot 1953	|		|	authority	|
+1385	|	Bacillus/Staphylococcus group	|		|	synonym	|
+46170	|	Staphylococcus aureus aureus	|		|	equivalent name	|
+46170	|	Staphylococcus aureus subsp. aureus	|		|	scientific name	|
+46170	|	Staphylococcus aureus subsp. aureus Rosenbach 1884	|		|	authority	|
+90964	|	Staphylococcaceae	|		|	scientific name	|
+90964	|	Staphylococcaceae Schleifer and Bell 2010	|		|	authority	|
+90964	|	Staphylococceae	|		|	includes	|
+90964	|	Staphylococceae Prevot 1940	|		|	includes	|
+90964	|	Staphylococcus group	|		|	synonym	|
+91061	|	Bacilli	|		|	scientific name	|
+91061	|	Bacilli Ludwig et al. 2010	|		|	authority	|
+91061	|	Bacillus/Lactobacillus/Streptococcus group	|		|	synonym	|
+91061	|	Firmibacteria	|		|	synonym	|
+91061	|	Firmibacteria Murray 1988	|		|	authority	|
+131567	|	biota	|		|	synonym	|
+131567	|	cellular organisms	|		|	scientific name	|
+663951	|	Staphylococcus aureus subsp. aureus 0528	|		|	misspelling	|
+663951	|	Staphylococcus aureus subsp. aureus 0582	|		|	synonym	|
+663951	|	Staphylococcus aureus subsp. aureus TW20	|		|	scientific name	|
+663951	|	Staphylococcus aureus subsp. aureus str. TW20	|		|	equivalent name	|
+663951	|	Staphylococcus aureus subsp. aureus strain TW20	|		|	equivalent name	|
+2000000000	|	Staphylococcus aureus	|		|	scientific name	|
diff --git a/t/data/kraken_test/taxonomy/nodes.dmp b/t/data/kraken_test/taxonomy/nodes.dmp
new file mode 100644
index 0000000..77545e1
--- /dev/null
+++ b/t/data/kraken_test/taxonomy/nodes.dmp
@@ -0,0 +1,12 @@
+1	|	1	|	no rank	|		|	8	|	0	|	1	|	0	|	0	|	0	|	0	|	0	|		|
+2	|	131567	|	superkingdom	|		|	0	|	0	|	11	|	0	|	0	|	0	|	0	|	0	|		|
+1239	|	2	|	phylum	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+1279	|	90964	|	genus	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+1280	|	1279	|	species	|	SA	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+1385	|	91061	|	order	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+46170	|	1280	|	subspecies	|	SA	|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+90964	|	1385	|	family	|		|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+91061	|	1239	|	class	|		|	0	|	1	|	11	|	1	|	0	|	1	|	0	|	0	|		|
+131567	|	1	|	no rank	|		|	8	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
+663951	|	46170	|	no rank	|		|	0	|	1	|	11	|	1	|	0	|	1	|	1	|	0	|		|
+2000000000	|	663951	|	no rank	|	HI	|	9	|	1	|	1	|	1	|	0	|	1	|	1	|	0	|		|
diff --git a/t/data/large_accessory_binary_genes.fa b/t/data/large_accessory_binary_genes.fa
new file mode 100644
index 0000000..b5bf343
--- /dev/null
+++ b/t/data/large_accessory_binary_genes.fa
@@ -0,0 +1,94 @@
+>4976_2#9
+CCCCCCCCCCCCCCCACCCCCCAACCCCCCCCCCCCACCCACCCCCCCCCCCCCCCCCCCCCCAACAAAACACACCAACAACCACACAACAAACCAACAACACCCAAACAACACAACACACCAAAAACCCAAACCCAACACAACCAAACCACAACACACCCAACACCCACCAAACCCCCAACCACCCCCCACCCCCACACAACACCCACCAAACCCAACCCACCCCCACCCACCCAACCCCCCCAACACCCACAACACCCACCACCAACCCCACCCCCCCCCACCACCCCACACCCCCCCACCCCCCCCACCCCCCCCCCCCCACCCCCCCAAACCACCACAACACAACCACCACCCCAACACCACACCCCCCCCCCAAACCCCCCCCCCACCCCCACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCACCCCACCCACCCACACCCACCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCACCCC [...]
+>4976_3#1
+CCAACACACCACCCCCCCCCCCCACCCCCCCCCCCCAACCCACCCCCACCCCCCCCCCCCCCCAACAAAACCCCCACCCCACACCAACCACAAACAAAACCACCCAAACAAAAACACACACAACCAACACCAACCCCCCAACCCCAAACCACAACCAACACCACCCCAACCACACCCCCCCCCACCCCCCACCCCCACACAACACCCACCACCACCAACCCACCCCCACCCACCCCACCCACCCAACACCCACACAACCCACCCACCCACCACCACCCCCCCCCCACCCCACACCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCCCCCCACCCACACACAACAACCCCCCCCCCAACACCACACCCCCCCCCCCAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCCCCCCAACCACCCACAACCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC [...]
+>4976_3#10
+CCCACACACAACCCCCCAACCCCCACCCCCCCCCCCACCCCACCCCCACCCCCCCCACCCCCCAACAAAACCCCCACCCCACCACAACCACAACCAACACCACCCAAACAACACCACAAACACCCAACACCAACCCCACACACCAAAACCACAAACAACCACACCCCCACCACACCCCCCCCCACCCCCCACCCCCACACAACACCCACCACCACCAACCCACCCCCACCCACCCCCCCCACCCAACACCCACACAACCCACCCCCACACCCACACCCCCCCCCCACCCCACACCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCCCCCCACCACCACACAACAACCCCCCCCCCAACACCACACCCACCCCCCCAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACCACCCCCCCCCCCCCCCACCCACCCACACCCCCCCCCCCACCCCCCCCCACCCCCCCCCCCCCCCCCCCCCCCCC [...]
+>4976_3#2
+ACACAACACACCCCCCCCCACCCACCCCCCCCCCCCACCCCACCCCCACCCCCCCCCCCCCCCAACAAAACCCCCACCCCACACCACACACAACCAACACCACCCAAACAAAACCACAAACAACCACAACCAACCCCCCACACCCAAAACACAACCAACACCACCCCAACCACACCCCCCCCCACCCCCCACCCCCACACAACACCCACCACCACCAACCCACCCCCACCCACCCCACCCACCCAACACCCACACAACCCACCCACCCACCACCACCCCCCCCCCACCCCACACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCCCCACCCACACACAACAACACCCCCCCCAACACCACACCCCCCCCCCCAACCCCCCCCCCCCCCACACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCCCCCCACCCACCCACACCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC [...]
+>4976_3#3
+CCCCCCCCCCCCCCCACAACCCAACAAAAACACCCCACCCACCCCCACCCCCCCCCCCCCCCCAACAACACCCCCCCCCAACCACACACCAAACCAACACAACCCAAACAACACAAAACACCAAAAACCCAAACCCAACACCACCAAACCACAACACACCCAACACCCACCAAACCCCCAACCACCCCCCACCCCCACACAACACCCACCAAACCCAACCCACCCCCACCCACCCCACCCCCCCACCCCCCCCCCCACCCACCACCAACCCCACCCCCCCCCCCCACCCCACACCCCCCCACCCCCCCCACCCCCCCCCCCCCACCCCCCCAAACCACCACAACACAACCACCACCCCAACACAACACCCCCCCCCCAAACCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCACCCCACCCACCCACCACCACCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCACCCC [...]
+>4976_6#8
+CCCACACACCCCCACCCAACCCCCACCCCCCCCCCCACCCCACCCCCACCCCCCCCCCCCCCCAACAAAACCCCCACCCCACCACAACCACAACCAACACCAACCAAACAACACCACAAACACCCAACACCAACCCCACACACCCAAACCACAACCAACCACACCCCCACCACACCCCCCCCCACCCCCCACCCCCACACAACACCCACCACCACCAACCCACCCCCACCCACCCCCCCCACCCAACACCCACACAACCCACCCCCACACCCACACCCCCCCCCCACCCCACACCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCCCCCCACCACCACACAACAACCCCCCCCCCAACACCACACCCCCCCCCCCAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACCACCCCCCCCCCCCCCCACCCACCCACACCCCCCCCCCCACCCCCCCCCACCCCCCCCCCCCCCCCCCCCCCCCC [...]
+>4976_7#3
+CCCACAAACCCCACCCCCACCCCACCACACCCACAAAACCCACCCCCACCCCCCCCCCCCCCCAACAAAAACCCCACCCCACCACAACCACAACCAACACCACCCAAACAACAACACAAACCACCAACACCAACCCCACACACCCAAACCACAACCAACCCAACCCCCACCACACCCCCCCCCCACCCCCACCCCCACACAACACCCACCACCACCAACCCACCCCCACCCACCCCACCCCCCCAACACCCACACAACCCCCACCCCCACCCACACCCCCCCCCCACCCCACACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCCCCACCACAACAACAAAACACCCCCCCCAACACCACACCCCCCCCCCCAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCCCCCCACCCACCCAAACCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC [...]
+>4976_7#4
+CCAACACACCCACCCCCCCCCACACCCCCCCCCCCCAAAACACCCCCACCCCCCCCCCCCCCCAACAAAACCCCCACCCCACCACAACCCAAACCAACACCACCCAAACAACACCACAAACCACCAACACAAACCCCAAACACCCAAACCACAAAACACCACACCCCAACCACACCCCCCCCCACCCCCCACCCCCACACAACACCCACCACACCCAACCCACCCCCACCCACCCCACCCCCCCAACACCCACACAACCCACCCCCACACCACCACCCCCCCCCCACCCCACACCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCCCCCCACCACCACACAACAACCCCCCCCCCAACACCACACCCCCCCCCCCAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACCACCCCCCCCCCCCCCCAACCCCCCACAACCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC [...]
+>4976_7#5
+ACCACAACACACCCCCCCACCCCCACCCCCCCACACAACCCACCCCCACCCCCCCCCCCCCCCAACAAAACCCCCACCCCACCACAACCACAACCAAAACCACCCAAACAACAACACACACCACCAACACCAACCCCACACACCCAAACCACAACAACAACCACCCCCACCACACCCCCCCCCACCCCCCACCCCCACACAACACCCACCACACCCAACCCACCCCCACCCACCCCACCCCCCCAACACCCACACAACCCACACCCCCACCACCACCCCCCCCCCACCCCACACCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCCCCCCACCCACACACAACAACACCCCCCCCAACACCACACCCCCCCCCCCAACCCCCCCCCCCCCCAACCCCCCCCCCCACCCCCCCACCACCCCCCCCCCCCCCCACCCACCCACAACCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC [...]
+>4976_7#6
+CCCCCCCCCCCCCCCCCAACCCAACCCCCCCCCCCCACCCACACCCCACCCCCCCCACCCCCCAACAACACACACCACAAACCACACACCAAACCAACAAAACCCAAACAACACAACACACCAAAAACCCAAACCCAACACAAACAAACCACAACACACCCAACACCCACCAAACCCCCAACCACCCCCCACCCCCACACAACACCCACCAAACCCAACCCACCCCCACCCACCCCACCCCCCCAACACCCACAACACCCACCACCAACCCACCCCCCCCCCCCCACCCCACACCACCCCACCCCCCCCACCCCCCCCCCCCCACCCCCCCCCACCACCACAACACAACCACCACCCCAACACCACACCCCCCCCCCAAACCCCCCCCCCACCCCAACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCACCCCACCCACCCACCACCACCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCACCCC [...]
+>4976_7#7
+CCCCCCCACACCCCCCCCCCCCCACCCCCAACCCCCACCCACCAAACAAAACACCCCCCCCCCAACAACACCCCCACCCCCCCACACACACAACCAACCCCACACAACAAACACAACACACCACCAACCCAAACCAACCACACACAAACCACAACACACCACACACCCACCAAACCCCCACCCACCCCCCACCCCCACACAACACCCACCACCACCAACCCCACCCCACCCACCCAACCACCCCAACACCCACACAACCCACCCACCCCCCACCCCCCCCCCACCACCCCACACCCCCCCCCCCCCCCCACCCCCCCCCCCCCACCCCCCCCCACCCACACAACACACCCCCCCACCCAACACCACACCCCCCCCCCCAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCCCCCCACCAACACACCACCACCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCACCCC [...]
+>4976_7#8
+CCCCCCCACACCCCCCCCCCCCCACCAAACCCCCCCACCCACCCCACCAACCCACCCCCCCCCAACAAAACCCCCCCCCCCCACCACAAACAAACAACACCACCAAACAAACACAACACACCACCAACCCAAACCCAACACACACAAACCACAACACACCACACACCAACCAAACCCCCACCCAACCCCCACCCCCACACAACACCCACCACCACCAACCCCACCCCACCCACCCAACCCCCCCAACACCCACACAACCCACACCCCCCCCACACCCCCCCCCCCACCCCACACCCCCCCCCCCCCCCCACCCCCCCCCCCCCACCCCCCCCCACCACCACAACACACCCCCCCACCCAACACCACACCCCCCCCCCCAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCCCCCCACCCACCCAACACCACCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCACCCC [...]
+>4976_7#9
+CCCCCCCCCCCCCCCCCCCCCCAACCAAACCCCCCCACCCACCACCCAACCCCCACACCCCCCAACAACACCCCCCCCCCACCACACACCAAACCAACAACACCCAAACAACACAACACACCAAAAACCCAAACCCAACACACACAAACCACAACACACCCAACACCCACCAAACCCCCAACCACCCCCCACCCCCACACAACACCCACCACACCCAACCCACCCCCACCCACCCAACCCCCCCAACACCCACAACACCCACCACCACACCCACCCCCCCCCCCCACCCCACACCCCCCCACCCCCCCCACCCCCCCCCCCCCACCCCCCCCCACCACCCAAACACAACCCCCACCCCAACACAACACCCCCCCCCCAAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCACCCCACCCACCCACCACCACCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCACCCC [...]
+>5103_7#1
+CCCCCCCCCCCCCCCACCCAAAAAACCCCAAAAACCAAAAACCCCCCCCCCCCCCCCACCCCCAACAACACAAACCACACACCACACACCAAACCAACAACACCCAAAAACAACAACACACCAAAAACCCAAACCCAACACAACCAAACCACAACACAAAAAACACCAACAAAACCCCCAACCACCCCCCACCCCCACACAACAACCAACAAACCCAACCCACCCCCACCCACCCCACCCCCCCAACACCCACAACACCCACCAACAAACCCACCCCCCCCCCCCACCCCACACCCACCCACCCCCCCCACCCCCCCCCCCCCACCCCCCCCCACCACCCAAAAACAACCCCCACCACAACAACACCCCCCACCCCCAAACCCCCCCCCCACCCCCCCCCCCCCCCCACCCCCCCACAACCCCCCCCCCACCCCACCCACCCACCACCACCCCCCCACCCCCCCCCACCCCCCCCCCCCCCCCCCCCACCCC [...]
+>5103_7#10
+CCAACACACCCCACCCCCCCCACACCCCCCCCCCCCAAAACACCCCCACCCCCCCCCCCCCCCAACAAAACCCCCACCCCACCACAACCCAAACCAACACCACCCAAACAACACCACAAACCACCAACACAAACCCCAAACACCCAAACCACAACCAACCACACCCCACCCACACCCCCCCCCACCCCCCACCCCCACACAACACCCACACCACCCAACCCACCCCCACCCACCCCACCCCCCCAACACCCACACAACCCACCCCCACACCACCACCCCCCCCCCAACCCACACCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCCCCCCACCACCACACAACAACCCCCCCCCCAACACCACCCCCCACCCCCCAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACCACCCCCCCCCCCCCCCAACCCCCCACAACCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCC [...]
+>5103_7#8
+CCCACACCAACCCCACCAACCCCCACCCCCCCCCCCACCCCACCCCCACCCCCCCCCCCCCCCAACAAAACCCCCACCCCACCACAACCACAACCAACCCCAACCAAACAACACCACAAACACCCAACACCAACCCCACACACCCAAACCACAACCAACCACACCCCCACCACACCCCCCCCCACCCCCCACCCCCACACAACACCCACCACCACCAACCCACCCCCACCCACCCCCCCCACCCAACACCCACACAACCCACCCCCACACCCACACCCCCCCCCCACCCCACACCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCCCCCCACCACCACACAACAACCCCCCCCCCAACACCACACCCACCCCCCCAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACCACCCCCCCCCCCCCCCACCCACCCACACCCCCCCCCCCACCCCCCCCCACCCCCCCCCCCCCCCCCCCCCCCCC [...]
+>5103_7#9
+CAAAAACACCACCCCCCCCACCCACCAAACCCCCCCAACCCACCCCCACCCCCCCCCCCCCCCAACAAACACCCCACCCCACACCAACCACAAACAACACCACCCAAACAAAAACACACACAACCAACACCAACCCCCCACACCCAACACACAACCAACACCACCCCCACCACACCCCCCCCCACCCCCCACCCCCACACAACAACCACCACCACCAACCCACCCCCACCCACCCCACCCACCCAACACCCACACAACCCACCCACCCACCACCACCCCCCCCCCACCCCACACCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCCCCCCACCCACACACAACAACACCCCCCCCAACACAACACCCCCCCCCCCAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCACCCACCCACACCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC [...]
+>5116_2#1
+CCCACACCACCCCCACCAACCCCCACCCCCCCCCCCACCCCACCCCCACCCCCCCCACCCCCCAACAAAACCCCCACCCCACCACAACCACAACCAACACCACCCAAACAACACCACAAACACCCAACACCAACCCCACACACCAAAACCACAACCAACCACACCCCCACCACACCCCCCCCCACCCCCCACCCCCACACAACACCCACCACCACCAACCCACCCCCACCCACCCCCCCCACCCAACACCCACACAACCCACCCCCACACCCACACCCCCCCCCCACCCCACACCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCCCCCCACCACCACACAACAACCCCCCCCCCAACACCACACCCCCCCCCCCAACCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCACCACCCCCCCCCCCCCCCACCCACCCACACCCCCCCCCCCACCCCCCCCCACCCCCCCCCCCCCCCCCCCCCCCCC [...]
+>5116_2#11
+CCAACACACCCAACCCCCCCACCACCCCCCCCCCCCAAAACACCCCCACCCCCCCCCCCCCCCAACAAAACCCCCACCCCACCACAACCCAAACCAACACCACCCAAACAACACCACAAACCACCAACACAAACCCCAAACACCCAAACCACAACCAACCACACCCCAACCACACCCCCCCCCACCCCCCACCCCCACAAAACACCCACCACACCCAACCCACCCCCACCCACCCCACCCCCCCAACACCCACACAACCCACCCCCACACCACCACCCCCCCCCCACCCCACACCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCCCCCCACCACCACACAACAACCCCCCCCCCAACACCACACCCCCCCCCCCAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACCACCCCCCCCCCCCCCCAACCCCCCACAACCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCC [...]
+>5116_2#2
+CCCCCCCCCCCCCCCACCCCCCAACCCCCCCCCACCACCCACCCCCCACACCCCCCCCCCCCCAACAAAACAACCCAACCACCACACACCAAACCAACAACACCCAAACAACACAACACACCAAAAACCCAAACCCAACACAACCAAACCACAACACACCCAACACCAACCAAACCCCCAACCACCCCCCACCCCCACACAACACCCACCAAACCCAACCCACCCCCACCCACCCCACCACCCCAACACCCACAACACCCACCACCAACCCCACCCCCCCCCCCCACCCCACACCCCCCCACCCCCCCCACCCCCCCCCCCCCACCCCCCCAAACCACCACAACACAACCACCACCCCAACACCACACCCCCCCCCCAAACCCCCCCCCCACCCCCACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCACCCCACCCACCCACCACCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCC [...]
+>5116_2#3
+CCCCCCCCCCCCCCCCCCCACCAACCAAACCCCCCCACCCACACCCCAACCCCCAACCCCCCCAACAACACAACACAACCACCACACAACAAACCAACACCACCCAAACAACACCACACACCAAAAACCCAAACCCAACACAACCAAACCACAACACACCCAACACCCACCAAACCCCCAACCACCCCCCACCCCCACACAACAACCACCAAACCCAACCCACCCCCACCCACCCCACCCCCCCAACACCCCCAACACCCACCACCAACCCCACCCCCCCCCCCCACCCCACACCCCCCCACCCCCCCCACCCCCCCCCCCCCACCCCCCCCCACCACCACAACACAACCCCCCCCCCAACACCACACCCCCCCCCCAAACCCCCCCCCCACCCCCACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCCCCCCACCCACCCAACACCACCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCACCCC [...]
+>5116_2#4
+ACCCCCCCCCCCCCCACCCCCCAACACCCCCCCCCCACAAACCCCCCCCCCCCCCCCACCCCCAACAAAACACACCAACAACCACACACCAAACCAACAACACCCAAACAACACAACACACCAAAAACCCAAACCCAACACAACCAAACCACAACACACCCAACACCCACCAAACCCCCAACCACCCCCCACCCCCACACAACACCCACCAAACCCAACCCACCCCCACCCACCCCACCCCCCCAACACCCACAACACCCACCACCAACCACACCCCCCCCCACCACCCCACACCCCCCCACCCCCCCCACCCCCCCCCCCCCACCCCCCCAAACCACCACAACACAACCACCACCCCAACACCACACCCCCCCCCCAAACCCCCCCCCCACCCCCACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCACCCCACCCACCCACACACACCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCACCCC [...]
+>5116_3#7
+CCCCCCCACACCCCCCCCCCCCCACCCCCAACCCCCACCCACCAAACAAAACACCCCCCCCCCAACAACACCCCCACCCCCCCACACACACAACCAACCCCACACAACAAACACAACACACCACCAACCCAAACCAACCACACACAAACCACAACACACCACACACCCACCAAACCCCCACCCACCCCCCACCCCCACACAACACCCACCACCACCAACCCCACCCCACCCACCCAACCACCCCAACACCCACACAACCCACCCACCCCCCACCCCCCCCCCCCCACCCCACACCCCCCCCCCCCCCCCACCCCCCCCCCCCCACCCCCCCCCACCCACACAACACACCCCCCCACCCAACACCACACCCCCCCCCCCAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCCCCCCACCAACACACCACCACCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCACCCC [...]
+>5116_3#8
+CCAACACACACCCCCCCCACCCCCACCCCCCCACCCAACCCACCCCCACCCCCCCCCCCCCCCAACAAAACCCCCACCCCACCACAACCACAACCAACACCACCCAAACAACAACACACACCACCAACACCAACCCCACACACCCAAACCACAACAACAACCACCCCCACCACACCCCCCCCCACCCCCCACCCCCACACAACACCCACCACCACCAACCCACCCCCACCCACCCCACCCCCCCAACACCCACACAACCCCCACCCCCACCCACACCCCCCCCCCACACCACACCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCCCCCCACCCACACACAACAACCCCCCCCCCAACACCACACCCCCCCCCCCAACCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCAAACCCCCCCCCCCCCCCCACCCACCCACACCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC [...]
+>5116_3#9
+CCAACACACCCACCCCCCCCCCCACCCCCCCCCCCCAACCCACCCCCACCCCCCCCCCCCCCCAACAAAACCCCCACCCCACACCAACCACAAACAACACCACCCAAACAAAAACACACACAACCAACCCAAACCCCCCACACCCAAACCAAAACCAACACCACCCCCACCACACCCCCCCCCACCCCCCACCCCCACACAACACCCACCACCACCAACCCACCCCCACCCACCCCACCCACCCAACACCCACACAACCCACCCACCCACCACCACCCCCCCCCCACCCCACACCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCCCCCCACCCACACACAACAACCCCCCCCCCAACACCACACCCCCCCCCCCAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCCCCCCAACCACCCACAACCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC [...]
+>5139_5#3
+CCCCCCCCCCCCCCCCCCCACCCCCCCCCAACCCCCCCCCCCCCCCCCCCCCCACCCACCCCCCCCCCCCCCCCCCCCCCAAACCCCCCCCCCCACACCCCCCCCCCCCCCCCCCACCCCCCCCCCACCCCCCCCCACCCCCCCCCCACCCCCCCCCCCCCCACCCCACCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCACCACCCCCCCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCACCCCCACCCCACCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCCCCCC [...]
+>5749_7#1
+CCCCCCCCCCCCCCCCCAACCCAACCCCCAACCCCCACAAACCCCCACCCCCCCCCCCCCCCCAACAACACACAACAACAAACACACACCAAACCAACAACACCCCAACAACACAACACACCAACAACCCAAACCCAACACAACCAAACCACAACACACCCAAAAAACAACAAAAACAAAAAAACAAAAAAAACAAACACAACACCCACCACACCCAACCCACCCCCACCCACCCCACCCCCCCAACACCCACAAAACCCACCACCAACCCCACCCCCCCCCCCCACCCCACACCCACCCACCCCCCCCACCCCCCCCCCCCCACCCCCCCCCACCACCACAACACAACCCCCCCCCCAACACCACACCCCCCCCCCAAACCCCCCCCCCACCCCAACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCCCCCCACCCACCCACCACCACCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCACCCC [...]
+>5749_7#2
+CCAACACACCAACCCCCCCCACCACCCCCCCCCCCAAAAACACCCCCACCCCCCCCACCCCCCAACAAAACCCCCACCCCACCACAACCCAAACCAACACCACCCAAACAACACCCAAAACCACCAACCACAACCCCAAACACCCAAACCACAACCAACCACACCCCAACCACACCCCCCCCCACCCCCCACCCCCAAAAAACACCCACACCACCCAACCCACCCCCACCCACCCCACCCCCCCAACACCCACACAACCCACCCCCACACCACCACCCCCCCCCCACCCCACACCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCCCCCCACCACCACACAACAACCCCCCCCCCAACACCACACCCCCCCCCCCAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACCACCCCCCCCCCCCCCCAACCCCCCACAACCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCC [...]
+>5749_7#3
+CCCCCCCCCCCCCCCCCCCCCCAACCCCCACCCCCCACCCACACCCCCACCCCCACACCCCCCAACAACACACAACACACACCACACACCAAACCAACACCACCCAAACAACACAACACACCAACAAACCAAACCCAACACAACCAAACCACAACACACCCAACACCCAACAAAACCCCAAAACAAAAACACCCCCACACACAACCAACCAAACAAAACAAACAAACAAAAAAAACAACCCACAAAAACCCACAACACCCACCACCAACCCCACCCCCCCCCCCCACCCCACACCCCCCCACCCCCCCCACCCCCCCCCCCCCACCCCCCCCCACCACCACAACACAACCCCCCCCCCAACACCACACCCCCCCCCCAAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCACCCCACCCACCCACCACCACCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCACCCC [...]
+>5749_7#4
+CCCCCCCCCCCCCCCCCCCCCCAACACCCACACCCCACAAACCACCACCCCCCCCCCCCCCCCAACAACACACCACAAACACCACACACCAAACAAACAAAACCCCAACAACACAACACACCAACAACCCAAACCCAACACAACCAAACCACAACACACCCAAAAAACACCAAACACAAAACCACCCCCCACCCCCACACAACACCCACCACACCCAACCCACCCCCACCCACCCCACCCCCCCAACAAAAAAAACACCCACCACCAACCACACCCCCCCCCCCCACCCCACCCCCCCCCACCCCCCCCACCCCCCCCCCCCCACCCCCCCAAACCACCACAACACAACCCCCACCCCAACACCACACCCCCCCCCCAAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCCCCCCACCCACCCACCAACACCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCACCCC [...]
+>5749_7#5
+CCCACACCAACCCACCCAACCCCCACCCCCCCCCCCACCCCACCCCCACCCCCCCCCCCCCCCAACAAAACCCCCACCCCACCACAACCACAACCAACACCAACCAAACAACACCACACACACCCAACACCAACACCACACACCCAAACCAAAACCAACCACACCCCCACCACACCCCCCCCCACCCCCCACCCCCACACAACACCCACCACCACCAACCCACCCCCACCCACCCCCCCCACCCAACACCCACACAACCCACCCCCACACCACCACCCCCCCCCCACCCCACACCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCCCCCCACCACCACACAACAACCCCCCCCCCAACACCACACCCCCCCCCCCAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACCACCCCCCCCCCCCCCCACCCACCCACACCCCCCCCCCCACCCCCCCCCACCCCCCCCCCCCCCCCCCCCCCCCC [...]
+>8016_2#61
+CCCCCCCCCCCCCCCCCAACCCAACCCCCCCCCACCACCCACCACCCAACCCCCCCCCCCCCCAACAACACCCCCCCCCAACCACACACCAAACCAACACCACCCAAACAACACCACACACCAACAACCCAAACCCAACACAACCAAACCACAACACACCCAACACCCACCAAACCCCCACCCACCCCACACCCCCACACAACACCAACCAAACCCAACCCACCCCCAAACACACCACCCCCCCAACACCCACAACAAAAAACACAAACCCCACCCCCCCCCCCCACCCCACACCCCCCCACCCCCCCCACCCCCCCCCCCCCACCCCCCCAAACCACCACAACACAACCACCACCCCAACACCACACCCCCCCCCCAAACCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCACCCCACCCACCCACCACCACCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCACCCC [...]
+>8016_2#62
+CCCCCCCCCCCCCCCCCCCCCCAACAAAACCCCCCCACAAACCCCCCCACCCCCCAACCCCCCAACAACACACCACACACACCACACAACAAACCAACAAAACCCAAACAACACAACACACCAACAACCCAAACCCAACACAAACAAACCACAACACACCCAACACCCACCAAACCCCCAACCACCCCCCACCCCCACACAACACCCACCAAACCCAACCCACCCCCACCAAACCCACCCCCCCAACACCCAAAACACCCACCACAAACAACACCCCCCCCCCCCACCCCACACCACCCCACCCCCCCCACCCCCCCCCCCCCACCCCCCCCCACCACCACAACACAACCACCACCCCAACACAACACCCCCCCCCCAAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCACCCCACCCACCCACCACCACCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCACCCC [...]
+>8016_2#65
+CCAACACACCACCCCCCCCCCACACCCCCCCCCCCCAAAACACCCCCACCCCCCCCCCCCCCCAACAAAACCCCCACCCCACCACAACCCAAACCAACACCACCCAAACAACACCACAAACCACCAACACACACCCCAAACACCCAAACCACAACCAACCACACCCCAACCACACCCCCCCCCACCCCCCACCCCCAAACAACACCCACACCACCCAACCCACCCCCACCCACCCCACCCCACCAACACCCACACAAAAAACCCCCACACCACCACAAAAAACAAAACCCACACCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCCCCCCACCACCACAACCAAACCCCCCCCCCAACACCACACCCCCCCCCCCAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACCACCCCCCCCCCCCCCCAACCCCCCACAACCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCCC [...]
+>8016_2#72
+CAAAAACCACACCCCCCCCACCCACCAAACCCCCCCAACCCACCCCCACCCCCCCCCCCCCCCAACAAACACCCCACCCCACACAAACCACAAACAACACCACCCAAACAAAAACACACACAACCAACACCAACCCCCCACACCCAAACCACAACCAACACCACCCCCACCACACCCCCCCCCACCCCCCACCCCCAAACAACAACCACCACCACCAACCCACCCCCACCCACCCCACCCACCCAACACCCACACAACCCACCCACCCACCACCACCCCCCCCCCACCCCACACCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCCCCCCACCCACACACAACAACACCCCCCCCAACACAACACCCCCCCCCCCAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACCACCCCCCCCCCCCCCCACCCACCCACACCCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC [...]
+>8016_2#73
+CCCACACCAACCCCCCCCCCCCCACCCCCCCCCCCCAACCCACCCCCACCCCCCCCCCCCCCCAACAAAACCCCCACCCCACACCAACCACAAACAACACCACCCAAACAAAAACACACACAACCAACACCAACCCCCCACACCCAAACCACAACCAACACCACCCCAACCACACCCCCCCCCACCCCCCACCCCCAAACAACACCCACCACCACCAACCCACCCCCACCCACCCCACCCACCCAACACCCACACAACCCACCCACCCACCACCACCCCCCCCCCACCAAACACCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCCCCCCACCCACACACAACAACCCCCCCCCCAACACCACACCCCCCCCCCCAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCCCCCCAACCACCCACAACCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC [...]
+>8016_2#74
+ACAACACCAACCCCCCCCCCCCCACCCCCCCCCCCCAACCCACCCCCACCCCCCCCCCCCCCCAACAAAACCCCCACCCCACACCAACCACAAACAACACCACCCAAACAAAAACACACACAACCAACACCAACCCCCCACACCCAAACCACAACCAACACCACCCCCACCACACCCCCCCCCACCCCCCACCCCCAAACAACACCCACCACCACCAACCCACCCCCACCCACCCCACCCACCCAACACCCACACAACCCACCCACCCACCACCACCCCCCCCCCACCAAACACCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCCCCCCACCCACACACAACAACCCCCCCCCCAACACCACACCCCCCCCCCCAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCCCCCCAACCACCCACAACCACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC [...]
+>8016_2#75
+CCCCCCCACACCCCCCCCCCCCCACCAAAAACCCCCACCCACCACACAAAAACACCCCCCCCCAACAACACCCCCACCCCCCCACACACACAACCACACCCACACAACAAACACAACACACCACCAACCCAAACCAACCACACACAAACCACAACACACCACACACCCACCAAACCCCCACCCACCCCCCACCCCCACACAAAACCCACCACCACCAACCCCACCCCACCCACCCAACCACCCCAACACCAACACAACCCACCCACCCCCCACACCCCCCCCCCCACCCCAAAAACAAAACAAAAAAACAAAAAAAACAAAAAAAAAAAAAAAACCACCACAACACACCCCCCCACCCAACACAACACCCCACCCCCCAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCCCCCCACCCACACACCACCACCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCACCCC [...]
+>8016_2#76
+CCCCCCCACACCCCCCCCCCCCCACCCCCCCCCCCCACCCACCCCACACCCACACCCCCCCCCAACAAAACACACAACACCCACCACACACAACCAACCCCACCAAACAAACACAACACACCACCAACCCAAACCCAACACACACAAACCACAACACACCACACACCAACCAAACCCCCACCCACCCCCCACCCCCACACAACACCCACCACCACCAACCCCACCCCACCCACCCAACCCCCCCAACAAACACACAACCCACCCACCCCCCACCCACCCCCCCCCACCCCACACCCCCACCCCCCCCCCACCACAACCAAAAAAAAAAACACCAAAACCACAACACACACCAACAACAAACACCACACCCCCCCCCCCAACCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCCCCCCACCCACCCACCACCACCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCACCCC [...]
+>8016_2#77
+CCCCCCCACACCCCCCCAACCCCACCAAACCCCCCCACAAACCACACAAAACAACCCCCCCCCAACAACACCCCCCCCCCCCCAAACACCAAACCAACCCCACACAACAAACACAACACACCACCAACCCAAACCAAACACACACAAACCACAACACACCACACACCAACCAAACCCCCACCCACCCCCCACCCCCACACAACACCCACCACCACCAACCCCACCCCACCCACCCAACCACCCCAACACCCACAACACCCACCCACCCCAACAACCCCCCCCCCCACCCCACAAACCCAACAAAAACCCAAAAAAAACCAAAAAAAAAAAACCACCAACACAACACACCCCCCCACCCAAAACAAAAAAACCAAAAACAAAAAACCCCCCCCCCCAACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCCCCCCACCCACCCACACCCACCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCACCCC [...]
+>8016_2#78
+CCCCCCCACACCCCCCCAACCCCACCCCCCCCCCCCACAAACCAAACAAACCACCCCCCCCCCAACAAAACCCCCCCCCACCCAAACACACAACCACACCCACACAACAAACACAACACACCACCAACCCAAACCAACCACACACAAACCACAACACACCACACACCAACCAAACCCCCACCCACCCCCCACCCCCACACAACACCCACCACCACCAACCCCACCCCACCCACCCAACCACCCCAACACCCACACAACCCAACCACCCCCCACCCACCCCCCCCAACCCCAAACCCCACACCCCCAAACAAAACAACCCAAAAAAAAAAAACCAAAACCACAACACACACCACCACCAAAAACCAAACAACCAACAACAAAAACAAAAAACAAACAAAAAAAAAAAACAAAAAAAACACAAAAAAAAAACAAAAACAAAACAACCACAACCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCACCCC [...]
+>8016_2#79
+ACCCCCCACACCCCCCCCCCCCCACCAAACCCCCCCACAAACCACACAAAAAACCCCCCCCCCAACAAAACCCCCCCCCCCCCAAACAAAAAACCACACCCACACAACAAACACAACACACCACCAACCCAAACCAACCACACACAAACCACAACACACCACACACCCACCAAACCCCCACCCACCCCCCACCCCCACACAACACCCACCACCACCAACCCCACCCCACCCACCCAACCCCCCCAACACCCACACAACCCACACCCCCCCCACCCACCCCCCACCACCCCACACCCCACACCCCCAAACACCACAACCCAAAAAAAAAACACCACCAACACAACACACACCACCAAAAAAACACAAACAACCCAACCCAAAAAAAAAAACCCCCCCAAAAAAAAAAACAAAAAAAACACAAAAAAAAAACAAAAACAAAAAAAACAAAACCCCCCCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCACCCC [...]
+>8016_2#81
+ACAAAACCAACCCCCCCAACCCCAACACACCCCCCCACCCAACCCCCACCCCCCCCCCAAAAAAAAAAAACCCCCCCCCCACCAAAACCACAACCAACACCACCCAAACAAAAACACACACCACCAACACCACACCCCCAAACCCAAACCACAACCAACCCCCCCCCCCCCACACCCCCCCCCACCCCCCACCCCCACACAACACCCCACACAACCAACCCACCAAAACCCACCCCACACCCAACACACCCACACAACCCACCCCCACACCACCACAACAAACACACCCCACCCCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCCCCAAACCACAACAACCAAACACCCCCCCCAACACCACACCCACCCCCCCAACCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCCCCCCACCCACCCAAAACCAAACACAACAAAAAAAAAACAAAAAAAAAAAAAAAAAAACAAAA [...]
+>8016_2#82
+ACCACACAAACCCCCCCAACCCCACACCCCCCCCCCACCCAACCCCCACCCCCCCCCCACCCCAACAAAACACACCACACACCAAACACACAAACAACACCACCCAAACAAAAACACACAACACCAACAACAAAACCCCAAACCCAAACCACAACCAACACCCCACCCCCAACACCACCCCCCACCCCCAAAAAAAACACCCAACACACAACACAAAAAAAAAAAAAACCCACCCCACACCCAAAAAACACACAACACCCACCCCCCCACCAACACAACAAACACACCCCACCCCCCCCCCCCCCCCCAACCCCCCCACCCCCCCCCCCCCAAACCACCACAACCAAACACCACCCCCAACACCACAACCAACCCCACAACCCCCCCCCCCCCACAACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCCCCCCACCCACCCACAACCACAAAAAACAAAACAAAAAACCCCCCCCCCCCCAACCCCCCCAC [...]
+>8016_2#83
+ACCACCACAACCCCCCCCCCCCCACACCCCCCCCCCACAAAACCCCCACCCCCCCCCCAACAAAACAAAACACACCACACACCAAACACACAAACAACACCACCCAAACAAAAACACACAACACCAACAACAAAACCCCAAACCCAAACAACAACCAACCCACCACCCCCCACACCACCCCCCACCCCCAAACAAAACACCCAACACACAACACAAAAAAAACAAAAACCCACCCCACACCCAAAAAACACACACAACCCACCCCCCCACCAACACAACAACCCCACCCCACCCCCCCCCCCCCCCCCAACCCCCCCACCCCCCCCCCCCCAAAAAACCAAAACCAAACACCACCCCCAACACCACACAAAACCCCCCAACCCCCCCCCCCCCACCACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCCCCCCACCCACCCACAACCACAAAAAACACAACAAAAAACCCCACCCCCCCCAACCCCACCCC [...]
+>8016_2#85
+ACAAAACAAACCCCCCACCCCCCAACACACCCCCCCACCCAACCCCCACCCCCCCCCCACCCCAAAAAAACCCCCCCCCCACCAAAAACACAACCAACACCACCCAAACAAAAACACACACCACCACAACCAACCCCAAAAACCCAAACCACCACCAACCCCACCCCCCCCACACCCCCCCCCACCCCCAAAACCAACACAACACACCACACAACCAACCCACCACAACCCACCACAACCCCCAAACACCCCCACAACCCACCCCCACACCACCACCAAAAACACACCCCACCCCCCCCCCCCCCCCCAACCCCCCCCCCCCCCCCCCCCCCCAAAACCACAACCAAACACCCCCCCCAACACCACACCCAACCCCCCAACCCCCCCCCACAACCCACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCCCCCCACCCACCCCACCCCCAACACAACAAAAACCCACCAAAAAAAAAAAAAAAAAAAAAACA [...]
+>8016_2#86
+ACCACACCACCCCCCCACCCCCCACACCCCCCACCCACACCACCCCCACCCCCCCCCCAAACAAACAAAACACACCACACACAAAAACCACAACCAACACCCCACAAACAAAACCACACCCCACCAACACCCCCACCCCAAACCCAAACAACAACCAACACCCCACCCACCACACCCCCCCCCACCCCCAAAACCAACACCACACACCACACACCCCACCAACACCAACCCACCACACACCCAAAACACACACACAACCCACCCCCCCACCACCACCCCCCACACCCACCACCCCCCCCCCCCCCCCCAACCCCCCCACCCCCCCCCCCCCAAACCACCACAACCAAACACCACCCCCAACACCACACCCAACCCCCCAACCCCCCCCCACAACCCACCCCCCCCCCCCCCCCCCACACCCCCCCCCCCCCCCCACCCACCCACAACCACAACACCCCCCCCCCCCACCCCCCCCCCCCCCCCCCCCCCCCC [...]
diff --git a/t/data/locus_tag_gffs/query_1.gff b/t/data/locus_tag_gffs/query_1.gff
new file mode 100644
index 0000000..2c5c8fb
--- /dev/null
+++ b/t/data/locus_tag_gffs/query_1.gff
@@ -0,0 +1,271 @@
+##gff-version 3
+##sequence-region abc|SC|contig000001 1 15000
+abc|SC|contig000001	Prodigal:2.60	CDS	172	1131	.	-	0	gene=different;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag=abc_00001;product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|abc_00001
+abc|SC|contig000001	Prodigal:2.60	CDS	1804	1950	.	+	0	inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00002;product=hypothetical protein;protein_id=gnl|SC|abc_00002
+abc|SC|contig000001	Prodigal:2.60	CDS	1934	2131	.	+	0	inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00003;product=hypothetical protein;protein_id=gnl|SC|abc_00003
+abc|SC|contig000001	Prodigal:2.60	CDS	2621	3337	.	-	0	inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag=abc_00004;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00004
+abc|SC|contig000001	Prodigal:2.60	CDS	3445	4170	.	-	0	gene=speH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=abc_00005;note=SPE H;product=hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00005
+abc|SC|contig000001	Prodigal:2.60	CDS	4265	4990	.	-	0	inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13043,protein motif:Pfam:PF02876.11;locus_tag=abc_00006;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00006
+abc|SC|contig000001	Prodigal:2.60	CDS	5428	6429	.	+	0	eC_number=2.1.3.3;gene=argF;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P99073,protein motif:CLUSTERS:PRK04284,protein motif:Cdd:COG0078,protein motif:TIGRFAMs:TIGR00658,protein motif:Pfam:PF00185.1;locus_tag=abc_00007;product=Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Aspartate/ornithine carbamoyltransferase%2C Asp/Orn b [...]
+abc|SC|contig000001	Prodigal:2.60	CDS	6449	7384	.	+	0	eC_number=2.7.2.2;gene=arcC1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q7A627,protein motif:CLUSTERS:PRK12353,protein motif:TIGRFAMs:TIGR00746,protein motif:Pfam:PF00696.22;locus_tag=abc_00008;product=Carbamate kinase 1,putative amino acid kinase,carbamate kinase,Amino acid kinase family protein;protein_id=gnl|SC|abc_00008
+abc|SC|contig000001	RNAmmer:1.2	rRNA	7556	9112	.	+	0	inference=COORDINATES:profile:RNAmmer:1.2;locus_tag=abc_01705;product=16S ribosomal RNA
+abc|SC|contig000001	Prodigal:2.60	CDS	9419	9646	.	+	0	inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00010;product=hypothetical protein;protein_id=gnl|SC|abc_00010
+abc|SC|contig000001	Prodigal:2.60	CDS	9952	10899	.	-	0	inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF03595.11;locus_tag=abc_00011;product=C4-dicarboxylate transporter/malic acid transport protein;protein_id=gnl|SC|abc_00011
+abc|SC|contig000001	Prodigal:2.60	CDS	11148	11336	.	+	0	inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00012;product=hypothetical protein;protein_id=gnl|SC|abc_00012
+abc|SC|contig000001	Aragorn:1.2.34	tRNA	11803	11878	.	-	0	inference=COORDINATES:profile:Aragorn:1.2.34;locus_tag=abc_00013;product=tRNA-Arg(tct)
+abc|SC|contig000001	Prodigal:2.60	CDS	12241	12375	.	+	0	inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=abc_00014;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00014
+abc|SC|contig000001	Prodigal:2.60	CDS	12432	12566	.	+	0	inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=abc_00015;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00015
+abc|SC|contig000001	Prodigal:2.60	CDS	12699	13385	.	+	0	eC_number=3.-.-.-;gene=yfnB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O06480,protein motif:CLUSTERS:PRK09449,protein motif:TIGRFAMs:TIGR02254,protein motif:Pfam:PF00702.1;locus_tag=abc_00016;product=Putative HAD-hydrolase yfnB,dUMP phosphatase,HAD hydrolase,haloacid dehalogenase-like hydrolase;protein_id=gnl|SC|abc_00016
+##FASTA
+>abc|SC|contig000001
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA
+TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT
+CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC
+TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC
+TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT
+AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC
+AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC
+ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT
+AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC
+AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT
+TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA
+CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC
+AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC
+TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG
+TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT
+TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT
+AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT
+GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT
+CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG
+CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG
+ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA
+AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA
+AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT
+GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT
+AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT
+AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT
+AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT
+CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA
+GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT
+AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA
+AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA
+AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA
+GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA
+TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA
+TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA
+ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT
+ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG
+GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG
+CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA
+ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA
+TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA
+GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT
+TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA
+CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC
+TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT
+TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT
+TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA
+GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG
+TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT
+TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG
+CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC
+CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT
+TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT
+AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG
+TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA
+TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA
+TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC
+ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC
+TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT
+TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC
+TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC
+ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA
+AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC
+TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA
+CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT
+GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA
+AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT
+GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA
+TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT
+AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT
+CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA
+TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT
+TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC
+TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT
+AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT
+TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC
+AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC
+CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT
+TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT
+GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT
+TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC
+GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT
+ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC
+GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG
+TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT
+TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA
+AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG
+TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT
+TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA
+CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA
+TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT
+GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA
+GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT
+GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA
+GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA
+TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA
+AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC
+ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT
+GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA
+CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG
+CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA
+TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA
+TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT
+GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG
+TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC
+ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA
+GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT
+TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG
+CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA
+AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT
+TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA
+TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT
+AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG
+TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA
+AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC
+AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC
+AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT
+ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT
+AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA
+GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA
+TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA
+TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA
+AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT
+TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG
+TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA
+TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT
+TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA
+TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG
+TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG
+GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG
+TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT
+AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC
+ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT
+TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG
+TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA
+AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT
+TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG
+GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC
+AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT
+AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC
+TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC
+ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT
+CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT
+CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA
+ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA
+AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT
+GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA
+ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA
+GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT
+AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT
+TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT
+GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG
+TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT
+AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC
+TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA
+GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT
+TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG
+AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC
+ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA
+CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA
+CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT
+AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA
+AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA
+AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG
+TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC
+ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT
+TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC
+AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA
+AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT
+CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT
+CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT
+AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA
+AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA
+ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC
+AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC
+TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT
+GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT
+ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA
+AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA
+TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA
+TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC
+CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA
+AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG
+AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA
+AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC
+AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA
+AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA
+TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT
+TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC
+GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA
+CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC
+GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG
+TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT
+TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT
+TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA
+ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT
+GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC
+GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA
+TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG
+GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC
+AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT
+TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT
+GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT
+AAGATTTAGATACATAGACAATATAGGAGATGGGGAAATTGGGATATAAAAATATTTTGA
+TAGACTTTGATGATACAATTGTTGATTTTTATGATGCAGAAGAATGGGCGTTTCACTATA
+TGGCGAATGTTTTTAATCATAAAGCAACAAAGGATGATTTTTTAACATTTAAAAAAATCA
+ATCACCAACATTGGGAAGCTTTTCAACAAAATAAATTAACGAAGTCTGAAGTATTATCAG
+AACGATTTGTGAATTACTTCAAACATCATCAAATGGAAGTTGATGGGCATCGTGCAGATG
+TGTTATTTAGAAATGGATTAGCAGAAGCTAAAGTTAAATACTTTGATCAAACATTAGAAA
+CAATTGTCGAATTATCGAAAAGACATGATTTATATATTGTTACTAATGGTGTAACCGAAA
+CGCAAAAGAGAAGGTTAAATCAGACGCCGTTGCATAAATATATTAAAAAGATATTTATAT
+CTGAGGAAACAGGATATCAAAAACCTAATCCGGAATTTTTTAATTATGTTTTTAATGATA
+TTGGTGAGGATGAAAGACAGCACTCGATTATAGTTGGAGATTCTTTAACATCTGACATTC
+TAGGTGGAATCAATGCGGGTATAGCTACTTGCTGGTTTAATTTTAGAGGATTTGATCATA
+ATCCAGGAATTATACCTGATTATGAAATTAATTCATGGAAACAACTAAATGATATTGTAC
+GTTAAGACAAATAATTTGAAAATGATTTATATTAGCTTAAGAAAGTAATTGTAAAAACGA
+CACTACGAAAAGATGCATTTTCATGCTCGTAGTGTCGAATATTATTATCTTATGGTTTCC
+ACAATAAGACATCATGTCCCTCTATATTTTTAGCTTCCACATCTGTAAATCCATGATGTA
+AAAAGAAATCCTTAGAATCATTTCTGCCAATGGCTTTAATTGGCATATTGAAACTCTTTG
+CAAAATCAATCAATTCTGAAGCGTACCCTCTGTTTTGATATTTTGGTAATACTTCTAACT
+TCCATAATAATATATAATCTTCAAAATCTGGGAAGTAGATTTCTTCGACATCACCTTTTT
+TTAGTAATGCCATTCTAGCTCCTAATTGATCTCCGACAAATATGCCATAAAATGGTGAAT
+CTGAACTTGCATCAATCATTTGACCGTTTAACTCATTGACCATGTATAAGTCTTTGTTGC
+CAAACGCTCTAAAGTTTTCGAATAATTCGTCAGTTTTGTAATTAATTTCAAGACGTTTGA
+TTTCACTCATATTTATCTACCCCTTATTTGTTTACTACTATTATTATACATTAAACCACT
+TGTGTTTTTCATTGATTTGAAAATGTAAAACAATGAACATTATAAATTTTCCATGATTAA
+ATTGATTTTCAAGCCGTAAAATTGTCAATCACCGGTATAGACTTTATACTATAGTTTGTA
+ATCGCGTACTTAAGGAGAAAAAACATGGACTGTAAAGTAGTTAGTTTAAATGAAAAAGAT
+CAGTTTATACCAAAAATAAAGAGCAGTGACCCTGTAATAACAGGATTATTTCAATATGAT
+GCAGCTCAACAAACTAGTTTTGAAAAAAGGATGTCTAAAGAAAATAATGGAAGAGAAGCG
+GCATTAGCGAATGTTATTCGTGAATATATGAGTGATTTAAAGCTTTCAAATGAACAAGAA
+TTAAACATACAACATTTAGCTAATGGTTCAAAAGTTGTGATTGGTGGACAACAAGCAGGG
+CTTTTCGGGGGACCATTGTATACATTCCATAAAATATTTTCAATCATTACTTTATCTAAG
+GAATTAACGGATACACATAAGCAACAAGTAGTACCAGTTTTTTGGATTGCAGGAGAAGAT
+CATGATTTCGATGAAGTGAATCATACATTTGTTTATAACGAAAATCATGGGTCGCTGCAT
+AAGGTTAAATATCATACAATGGAGATGCCAGAGACGACAGTCTCTAGATATTATCCTGAT
+AAGGCTGAGTTGAAACAAACTTTAAAAACGATGTTCATTCATATGAAAGAAACTGTTCAT
+ACACAAGGTCTACTGGAGATTTGTGACAGAATTATTGACCAATATGACTCGTGGACTGAT
+ATGTTTAAAGCACTACTGCATGAAACATTTAAAGCATATGGCGTTCTATTTATAGATGCG
+CAGTTTGAGCCGTTAAGAAAAATGGAAGCGCCTATGTTTAAAAAGATTTTGAAAAAACAT
+CAGTTGCTTGATGATGCTTTTAGAGCAACACAACAACGTACTCAAAATCAAGGCTTGAAT
+GCGATGATACAAACAGATACAAATGTTCATTTATTCTTACATGATGAAAATATGCGCCAA
+TTAGTTTCGTATGATGGTAAGCATTTTAAATTAAATAAAACAGATAAGACATATATAAAG
\ No newline at end of file
diff --git a/t/data/locus_tag_gffs/query_1.gff.proteome.faa.expected b/t/data/locus_tag_gffs/query_1.gff.proteome.faa.expected
new file mode 100644
index 0000000..78f8a5d
--- /dev/null
+++ b/t/data/locus_tag_gffs/query_1.gff.proteome.faa.expected
@@ -0,0 +1,75 @@
+>abc_00001
+MKTRIVSSVTTTLLLGSILMNPVANAADSDINIKTGTTDIGSNTTVKTGDLVTYDKENGM
+HKKVFYSFIDDKNHNKKLLVIRTKGTIAGQYRVYSEEGANKSGLAWPSAFKVQLQLPDNE
+VAQISDYYPRNSIDTKEYMSTLTYGFNGNVTGDDTGKIGGLIGANVSIGHTLKYVQPDFK
+TILESPTDKKVGWKVIFNNMVNQNWGPYDRDSWNPVYGNQLFMKTRNGSMKAAENFLDPN
+KASSLLSSGFSPDFATVITMDRKASKQQTNIDVIYERVRDDYQLHWTSTNWKGTNTKDKW
+TDRSSERYKIDWEKEEMTN*
+>abc_00002
+MKLFYIVFLIIIWLNIFLGNEIIHTLTVLITTLYIVNSRKGIKNDRVE*
+>abc_00003
+MTELNNIINSLQSLFESESGYKISKNSGVPYQTVQDLRNGKTKLEDARFRTIIKLYSYYV
+SLKEH*
+>abc_00004
+MSKNITKNIILTTTLLLLGTVLPQNQKPVFSFYSEAKAYSIGQDETNINELIKYYTQPHF
+SFSNKWLYQYDNGNIYVELKRYSWSAHISLWGAESWGNINQLKDRYVDVFGLKDKDTDQL
+WWSYRETFTGGVTPAAKPSDKTYNLFVQYKDKLQTIIGAHKIYQGNKPVLTLKEIDFRAR
+EALIKNKILYTENRNKGKLKITGGGNNYTIDLSKRLHSDLANVYVKNPNKITVDVLFD*
+>abc_00005
+MNNNITKKIILSTTLLLLGTAFTQFPNTPINSSSEAKAYYINQNETNVNELTKYYSQKYL
+TFSNSTLWQKDNGTIHATLLQFSWYSHIQVYGPESWGNINQLRNKSVDIFGIKDQETIDS
+FALSQETFTGGVTPAATSNDKHYKLNVTYKDKAETFTGGFPVYEGNKPVLTLKELDFRIR
+QTLIKSKKLYNNSYNKGQIKITGTDNNYTIDLSKRLPSTDANRYVKKPQNAKIEVILEKS
+N*
+>abc_00006
+MKKNIMNKLVLSTALLLLGTTSTQLPKTPISFSSEAKAYNISENETNINELIKYYTQPHF
+SLSGKWLWQKPNGSIHATLQTWVWYSHIQVFGSESWGNINQLRNKYVDIFGTKDEDTVEG
+YWTYDETFTGGVTPAATSSDKPYRLFLKYSDKQQTIIGGHEFYKGNKPVLTLKELDFRIR
+QTLIKNKKLYNGEFNKGQIKITADGNNYTIDLSKKLKLTDTNRYVKNPKNAQIEVILEKS
+N*
+>abc_00007
+MKNLRNRSFLTLLDFSRQEVEFLLTLSEDLKRAKYIGTEKPMLKNKNIALLFEKDSTRTR
+CAFEVAAHDQGANVTYLGPTGSQMGKKETTKDTARVLGGMYDGIEYRGFSQRTVETLAEN
+SGVPVWNGLTDEDHPTQVLADFLTAKEVLKKDYADINFTYVGDGRNNVANALMQGAAIMG
+MNFHLVCPKELNPTDELLNRCKNIAAENGGNILITDDIDQGVKGSDVIYTDVWVSMGEPD
+EVWKERLELLKPYQVNKEMMDKTGNPNVIFEHCLPSFHNADTKIGQQIFEKYGIREMEVT
+DEVFESKASVVFQEAENRMHTIKAVMVATLGEF*
+>abc_00008
+MMAKIVVALGGNALGKSPQEQLELVKNTAKSLVGLITKGHEIVISHGNGPQVGSINLGLN
+YAAEHNQGPAFPFAECGAMSQAYIGYQLQESLQNELHSIGMDKQVVTLVTQVEVDENDPA
+FNNPSKPIGLFYNKEEAEQIQKEKGFIFVEDAGRGYRRVVPSPQPISIIELESIKTLIKN
+DTLVIAAGGGGIPVIREQHDGFKGIDAVIDKDKTSALLGANIQCDQLIILTAIDYVYINF
+NTENQQPLKTTNVDELKRYIDENQFAKGSMLPKIEAAISFIENNPKGSVLITSLNELDAA
+LEGKVGTVIKK*
+>abc_01705
+VENTINESEKKKRFKLKMPGAFMILFILTVVAVIATWVIPAGAYSKLSYEPSSQELKIVN
+PHNQVKKVPGTQQELDKMGVKIKIEQFKSGAINKPVSIPNTYERLKQHPAGPEQITSSMV
+EGTIEAVDIMVFILVLGGLIGVVQASGSFESGLLALTKKTKGHEFMLIVFVSILMIIGGT
+LCGIEEEAVAFYPILVPIFIALGYDSIVSVGAIFLASSVGSTFSTINPFSVVIASNAAGT
+TFTDGLYWRIGACIVGAIFVISYLYWYCKKIKNDPKASYSYEDKDAFEQQWSVLKDDDSA
+HFTLRKKIILTLFVLPFPIMVWGVMTQGWWFPVMASAFLIFTIIIMFIAGTGKSGLGEKG
+TVDAFVNGASSLVGVSLIIGLARGINLVLNEGMISDTILHFSSSLVQHMSGPLFIIVLLF
+IFFCLGFIVPSSSGLAVLSMPIFAPLADTVGIPRFVIVTTYQFGQYAMLFLAPTGLVMAT
+LQMLNMRYSHWFRFVWPVVAFVLIFGGGVLITQVLIYS*
+>abc_00010
+MTHLTKVLDTLTGICVVLLFSKYFVAYANMVFDWNLRWYLLENIPHLPIILFILMFIFGV
+PSEMIKDRQRKNNGV*
+>abc_00011
+MRLQKAPLVTSGLVLGLLGLGNLLKDLSLTLNAVCGIFAFLIWIHLLCTMIKYFNNVKEQ
+LNSPLVSSVFTTFFMSGFLGTTYLNTFFSNITFINSLITPIWILCLVGIMTHMIIFSIKY
+LKDFSLENVYPSWTVLFIGIAIAGLTAPVSGCFFIGQLTVIYGFVATCIVLPIVFKRLKA
+FPLQTSIKPNTSTICAPFSLVAAAYVIAFPKANAFIVIIFLILAQIFYFYIIIQLPKLLK
+EPFSPVFSAFTFPLVISATALKNSLPVLMFPDIWKGLLFIEVLLATVIVLRVFIGYLHFF
+LKKENQDKFLRNASQ*
+>abc_00012
+MRNQIQKLLDSDLSSLHISKQTGVPQSTIHRMRKKERSLDNMSLKNAELLYKFANSIFSN
+EN*
+>abc_00014
+MEGLFNAIKDTVTAAINNDGAKLGTSIVSIVENGVGLLGKLFGF*
+>abc_00015
+MTGLAEAIANTVQAAQQHDSVKLGTSIVDIVANGVGLLGKLFGF*
+>abc_00016
+LGYKNILIDFDDTIVDFYDAEEWAFHYMANVFNHKATKDDFLTFKKINHQHWEAFQQNKL
+TKSEVLSERFVNYFKHHQMEVDGHRADVLFRNGLAEAKVKYFDQTLETIVELSKRHDLYI
+VTNGVTETQKRRLNQTPLHKYIKKIFISEETGYQKPNPEFFNYVFNDIGEDERQHSIIVG
+DSLTSDILGGINAGIATCWFNFRGFDHNPGIIPDYEINSWKQLNDIVR*
diff --git a/t/data/locus_tag_gffs/query_2.gff b/t/data/locus_tag_gffs/query_2.gff
new file mode 100644
index 0000000..8428774
--- /dev/null
+++ b/t/data/locus_tag_gffs/query_2.gff
@@ -0,0 +1,271 @@
+##gff-version 3
+##sequence-region abc|SC|contig000001 1 15000
+abc|SC|contig000001	Prodigal:2.60	CDS	172	1131	.	-	0	gene=hly;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag="zzz_00001";product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|abc_00001
+abc|SC|contig000001	Prodigal:2.60	CDS	1804	1950	.	+	0	inference=ab initio prediction:Prodigal:2.60;locus_tag="zzz_00002";product=hypothetical protein;protein_id=gnl|SC|abc_00002
+abc|SC|contig000001	Prodigal:2.60	CDS	1934	2131	.	+	0	inference=ab initio prediction:Prodigal:2.60;locus_tag="zzz_00003";product=hypothetical protein;protein_id=gnl|SC|abc_00003
+abc|SC|contig000001	Prodigal:2.60	CDS	2621	3337	.	-	0	inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag="zzz_00004";product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00004
+abc|SC|contig000001	Prodigal:2.60	CDS	3445	4170	.	-	0	gene=speH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=zzz_00005;note=SPE H;product=hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00005
+abc|SC|contig000001	Prodigal:2.60	CDS	4265	4990	.	-	0	inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13043,protein motif:Pfam:PF02876.11;locus_tag="zzz_00006"";product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00006
+abc|SC|contig000001	Prodigal:2.60	CDS	5428	6429	.	+	0	eC_number=2.1.3.3;gene=argF;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P99073,protein motif:CLUSTERS:PRK04284,protein motif:Cdd:COG0078,protein motif:TIGRFAMs:TIGR00658,protein motif:Pfam:PF00185.1;locus_tag=zzz_00007;product=Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Aspartate/ornithine carbamoyltransferase%2C Asp/Orn b [...]
+abc|SC|contig000001	Prodigal:2.60	CDS	6449	7384	.	+	0	eC_number=2.7.2.2;gene=arcC1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q7A627,protein motif:CLUSTERS:PRK12353,protein motif:TIGRFAMs:TIGR00746,protein motif:Pfam:PF00696.22;locus_tag=zzz_00008;product=Carbamate kinase 1,putative amino acid kinase,carbamate kinase,Amino acid kinase family protein;protein_id=gnl|SC|abc_00008
+abc|SC|contig000001	RNAmmer:1.2	rRNA	7556	9112	.	+	0	inference=COORDINATES:profile:RNAmmer:1.2;locus_tag="zzz_01705";product=16S ribosomal RNA
+abc|SC|contig000001	Prodigal:2.60	CDS	9419	9646	.	+	0	inference=ab initio prediction:Prodigal:2.60;locus_tag="zzz_00010";product=hypothetical protein;protein_id=gnl|SC|abc_00010
+abc|SC|contig000001	Prodigal:2.60	CDS	9952	10899	.	-	0	inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF03595.11;locus_tag="zzz_00011";product=C4-dicarboxylate transporter/malic acid transport protein;protein_id=gnl|SC|abc_00011
+abc|SC|contig000001	Prodigal:2.60	CDS	11148	11336	.	+	0	inference=ab initio prediction:Prodigal:2.60;locus_tag="zzz_00012";product=hypothetical protein;protein_id=gnl|SC|abc_00012
+abc|SC|contig000001	Aragorn:1.2.34	tRNA	11803	11878	.	-	0	inference=COORDINATES:profile:Aragorn:1.2.34;locus_tag="zzz_00013";product=tRNA-Arg(tct)
+abc|SC|contig000001	Prodigal:2.60	CDS	12241	12375	.	+	0	inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag="zzz_00014";product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00014
+abc|SC|contig000001	Prodigal:2.60	CDS	12432	12566	.	+	0	inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag="zzz_00015";product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00015
+abc|SC|contig000001	Prodigal:2.60	CDS	12699	13385	.	+	0	eC_number=3.-.-.-;gene=yfnB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O06480,protein motif:CLUSTERS:PRK09449,protein motif:TIGRFAMs:TIGR02254,protein motif:Pfam:PF00702.1;locus_tag="zzz_00016";product=Putative HAD-hydrolase yfnB,dUMP phosphatase,HAD hydrolase,haloacid dehalogenase-like hydrolase;protein_id=gnl|SC|abc_00016
+##FASTA
+>abc|SC|contig000001
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA
+TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT
+CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC
+TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC
+TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT
+AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC
+AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC
+ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT
+AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC
+AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT
+TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA
+CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC
+AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC
+TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG
+TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT
+TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT
+AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT
+GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT
+CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG
+CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG
+ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA
+AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA
+AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT
+GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT
+AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT
+AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT
+AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT
+CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA
+GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT
+AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA
+AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA
+AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA
+GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA
+TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA
+TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA
+ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT
+ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG
+GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG
+CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA
+ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA
+TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA
+GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT
+TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA
+CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC
+TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT
+TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT
+TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA
+GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG
+TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT
+TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG
+CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC
+CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT
+TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT
+AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG
+TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA
+TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA
+TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC
+ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC
+TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT
+TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC
+TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC
+ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA
+AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC
+TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA
+CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT
+GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA
+AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT
+GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA
+TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT
+AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT
+CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA
+TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT
+TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC
+TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT
+AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT
+TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC
+AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC
+CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT
+TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT
+GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT
+TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC
+GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT
+ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC
+GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG
+TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT
+TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA
+AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG
+TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT
+TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA
+CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA
+TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT
+GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA
+GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT
+GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA
+GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA
+TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA
+AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC
+ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT
+GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA
+CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG
+CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA
+TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA
+TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT
+GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG
+TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC
+ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA
+GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT
+TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG
+CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA
+AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT
+TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA
+TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT
+AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG
+TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA
+AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC
+AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC
+AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT
+ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT
+AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA
+GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA
+TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA
+TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA
+AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT
+TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG
+TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA
+TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT
+TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA
+TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG
+TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG
+GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG
+TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT
+AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC
+ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT
+TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG
+TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA
+AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT
+TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG
+GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC
+AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT
+AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC
+TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC
+ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT
+CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT
+CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA
+ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA
+AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT
+GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA
+ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA
+GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT
+AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT
+TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT
+GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG
+TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT
+AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC
+TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA
+GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT
+TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG
+AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC
+ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA
+CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA
+CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT
+AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA
+AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA
+AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG
+TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC
+ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT
+TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC
+AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA
+AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT
+CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT
+CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT
+AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA
+AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA
+ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC
+AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC
+TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT
+GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT
+ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA
+AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA
+TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA
+TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC
+CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA
+AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG
+AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA
+AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC
+AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA
+AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA
+TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT
+TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC
+GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA
+CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC
+GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG
+TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT
+TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT
+TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA
+ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT
+GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC
+GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA
+TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG
+GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC
+AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT
+TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT
+GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT
+AAGATTTAGATACATAGACAATATAGGAGATGGGGAAATTGGGATATAAAAATATTTTGA
+TAGACTTTGATGATACAATTGTTGATTTTTATGATGCAGAAGAATGGGCGTTTCACTATA
+TGGCGAATGTTTTTAATCATAAAGCAACAAAGGATGATTTTTTAACATTTAAAAAAATCA
+ATCACCAACATTGGGAAGCTTTTCAACAAAATAAATTAACGAAGTCTGAAGTATTATCAG
+AACGATTTGTGAATTACTTCAAACATCATCAAATGGAAGTTGATGGGCATCGTGCAGATG
+TGTTATTTAGAAATGGATTAGCAGAAGCTAAAGTTAAATACTTTGATCAAACATTAGAAA
+CAATTGTCGAATTATCGAAAAGACATGATTTATATATTGTTACTAATGGTGTAACCGAAA
+CGCAAAAGAGAAGGTTAAATCAGACGCCGTTGCATAAATATATTAAAAAGATATTTATAT
+CTGAGGAAACAGGATATCAAAAACCTAATCCGGAATTTTTTAATTATGTTTTTAATGATA
+TTGGTGAGGATGAAAGACAGCACTCGATTATAGTTGGAGATTCTTTAACATCTGACATTC
+TAGGTGGAATCAATGCGGGTATAGCTACTTGCTGGTTTAATTTTAGAGGATTTGATCATA
+ATCCAGGAATTATACCTGATTATGAAATTAATTCATGGAAACAACTAAATGATATTGTAC
+GTTAAGACAAATAATTTGAAAATGATTTATATTAGCTTAAGAAAGTAATTGTAAAAACGA
+CACTACGAAAAGATGCATTTTCATGCTCGTAGTGTCGAATATTATTATCTTATGGTTTCC
+ACAATAAGACATCATGTCCCTCTATATTTTTAGCTTCCACATCTGTAAATCCATGATGTA
+AAAAGAAATCCTTAGAATCATTTCTGCCAATGGCTTTAATTGGCATATTGAAACTCTTTG
+CAAAATCAATCAATTCTGAAGCGTACCCTCTGTTTTGATATTTTGGTAATACTTCTAACT
+TCCATAATAATATATAATCTTCAAAATCTGGGAAGTAGATTTCTTCGACATCACCTTTTT
+TTAGTAATGCCATTCTAGCTCCTAATTGATCTCCGACAAATATGCCATAAAATGGTGAAT
+CTGAACTTGCATCAATCATTTGACCGTTTAACTCATTGACCATGTATAAGTCTTTGTTGC
+CAAACGCTCTAAAGTTTTCGAATAATTCGTCAGTTTTGTAATTAATTTCAAGACGTTTGA
+TTTCACTCATATTTATCTACCCCTTATTTGTTTACTACTATTATTATACATTAAACCACT
+TGTGTTTTTCATTGATTTGAAAATGTAAAACAATGAACATTATAAATTTTCCATGATTAA
+ATTGATTTTCAAGCCGTAAAATTGTCAATCACCGGTATAGACTTTATACTATAGTTTGTA
+ATCGCGTACTTAAGGAGAAAAAACATGGACTGTAAAGTAGTTAGTTTAAATGAAAAAGAT
+CAGTTTATACCAAAAATAAAGAGCAGTGACCCTGTAATAACAGGATTATTTCAATATGAT
+GCAGCTCAACAAACTAGTTTTGAAAAAAGGATGTCTAAAGAAAATAATGGAAGAGAAGCG
+GCATTAGCGAATGTTATTCGTGAATATATGAGTGATTTAAAGCTTTCAAATGAACAAGAA
+TTAAACATACAACATTTAGCTAATGGTTCAAAAGTTGTGATTGGTGGACAACAAGCAGGG
+CTTTTCGGGGGACCATTGTATACATTCCATAAAATATTTTCAATCATTACTTTATCTAAG
+GAATTAACGGATACACATAAGCAACAAGTAGTACCAGTTTTTTGGATTGCAGGAGAAGAT
+CATGATTTCGATGAAGTGAATCATACATTTGTTTATAACGAAAATCATGGGTCGCTGCAT
+AAGGTTAAATATCATACAATGGAGATGCCAGAGACGACAGTCTCTAGATATTATCCTGAT
+AAGGCTGAGTTGAAACAAACTTTAAAAACGATGTTCATTCATATGAAAGAAACTGTTCAT
+ACACAAGGTCTACTGGAGATTTGTGACAGAATTATTGACCAATATGACTCGTGGACTGAT
+ATGTTTAAAGCACTACTGCATGAAACATTTAAAGCATATGGCGTTCTATTTATAGATGCG
+CAGTTTGAGCCGTTAAGAAAAATGGAAGCGCCTATGTTTAAAAAGATTTTGAAAAAACAT
+CAGTTGCTTGATGATGCTTTTAGAGCAACACAACAACGTACTCAAAATCAAGGCTTGAAT
+GCGATGATACAAACAGATACAAATGTTCATTTATTCTTACATGATGAAAATATGCGCCAA
+TTAGTTTCGTATGATGGTAAGCATTTTAAATTAAATAAAACAGATAAGACATATATAAAG
\ No newline at end of file
diff --git a/t/data/locus_tag_gffs/query_2.gff.proteome.faa.expected b/t/data/locus_tag_gffs/query_2.gff.proteome.faa.expected
new file mode 100644
index 0000000..4950fa7
--- /dev/null
+++ b/t/data/locus_tag_gffs/query_2.gff.proteome.faa.expected
@@ -0,0 +1,75 @@
+>zzz_00001
+MKTRIVSSVTTTLLLGSILMNPVANAADSDINIKTGTTDIGSNTTVKTGDLVTYDKENGM
+HKKVFYSFIDDKNHNKKLLVIRTKGTIAGQYRVYSEEGANKSGLAWPSAFKVQLQLPDNE
+VAQISDYYPRNSIDTKEYMSTLTYGFNGNVTGDDTGKIGGLIGANVSIGHTLKYVQPDFK
+TILESPTDKKVGWKVIFNNMVNQNWGPYDRDSWNPVYGNQLFMKTRNGSMKAAENFLDPN
+KASSLLSSGFSPDFATVITMDRKASKQQTNIDVIYERVRDDYQLHWTSTNWKGTNTKDKW
+TDRSSERYKIDWEKEEMTN*
+>zzz_00002
+MKLFYIVFLIIIWLNIFLGNEIIHTLTVLITTLYIVNSRKGIKNDRVE*
+>zzz_00003
+MTELNNIINSLQSLFESESGYKISKNSGVPYQTVQDLRNGKTKLEDARFRTIIKLYSYYV
+SLKEH*
+>zzz_00004
+MSKNITKNIILTTTLLLLGTVLPQNQKPVFSFYSEAKAYSIGQDETNINELIKYYTQPHF
+SFSNKWLYQYDNGNIYVELKRYSWSAHISLWGAESWGNINQLKDRYVDVFGLKDKDTDQL
+WWSYRETFTGGVTPAAKPSDKTYNLFVQYKDKLQTIIGAHKIYQGNKPVLTLKEIDFRAR
+EALIKNKILYTENRNKGKLKITGGGNNYTIDLSKRLHSDLANVYVKNPNKITVDVLFD*
+>zzz_00005
+MNNNITKKIILSTTLLLLGTAFTQFPNTPINSSSEAKAYYINQNETNVNELTKYYSQKYL
+TFSNSTLWQKDNGTIHATLLQFSWYSHIQVYGPESWGNINQLRNKSVDIFGIKDQETIDS
+FALSQETFTGGVTPAATSNDKHYKLNVTYKDKAETFTGGFPVYEGNKPVLTLKELDFRIR
+QTLIKSKKLYNNSYNKGQIKITGTDNNYTIDLSKRLPSTDANRYVKKPQNAKIEVILEKS
+N*
+>zzz_00006
+MKKNIMNKLVLSTALLLLGTTSTQLPKTPISFSSEAKAYNISENETNINELIKYYTQPHF
+SLSGKWLWQKPNGSIHATLQTWVWYSHIQVFGSESWGNINQLRNKYVDIFGTKDEDTVEG
+YWTYDETFTGGVTPAATSSDKPYRLFLKYSDKQQTIIGGHEFYKGNKPVLTLKELDFRIR
+QTLIKNKKLYNGEFNKGQIKITADGNNYTIDLSKKLKLTDTNRYVKNPKNAQIEVILEKS
+N*
+>zzz_00007
+MKNLRNRSFLTLLDFSRQEVEFLLTLSEDLKRAKYIGTEKPMLKNKNIALLFEKDSTRTR
+CAFEVAAHDQGANVTYLGPTGSQMGKKETTKDTARVLGGMYDGIEYRGFSQRTVETLAEN
+SGVPVWNGLTDEDHPTQVLADFLTAKEVLKKDYADINFTYVGDGRNNVANALMQGAAIMG
+MNFHLVCPKELNPTDELLNRCKNIAAENGGNILITDDIDQGVKGSDVIYTDVWVSMGEPD
+EVWKERLELLKPYQVNKEMMDKTGNPNVIFEHCLPSFHNADTKIGQQIFEKYGIREMEVT
+DEVFESKASVVFQEAENRMHTIKAVMVATLGEF*
+>zzz_00008
+MMAKIVVALGGNALGKSPQEQLELVKNTAKSLVGLITKGHEIVISHGNGPQVGSINLGLN
+YAAEHNQGPAFPFAECGAMSQAYIGYQLQESLQNELHSIGMDKQVVTLVTQVEVDENDPA
+FNNPSKPIGLFYNKEEAEQIQKEKGFIFVEDAGRGYRRVVPSPQPISIIELESIKTLIKN
+DTLVIAAGGGGIPVIREQHDGFKGIDAVIDKDKTSALLGANIQCDQLIILTAIDYVYINF
+NTENQQPLKTTNVDELKRYIDENQFAKGSMLPKIEAAISFIENNPKGSVLITSLNELDAA
+LEGKVGTVIKK*
+>zzz_01705
+VENTINESEKKKRFKLKMPGAFMILFILTVVAVIATWVIPAGAYSKLSYEPSSQELKIVN
+PHNQVKKVPGTQQELDKMGVKIKIEQFKSGAINKPVSIPNTYERLKQHPAGPEQITSSMV
+EGTIEAVDIMVFILVLGGLIGVVQASGSFESGLLALTKKTKGHEFMLIVFVSILMIIGGT
+LCGIEEEAVAFYPILVPIFIALGYDSIVSVGAIFLASSVGSTFSTINPFSVVIASNAAGT
+TFTDGLYWRIGACIVGAIFVISYLYWYCKKIKNDPKASYSYEDKDAFEQQWSVLKDDDSA
+HFTLRKKIILTLFVLPFPIMVWGVMTQGWWFPVMASAFLIFTIIIMFIAGTGKSGLGEKG
+TVDAFVNGASSLVGVSLIIGLARGINLVLNEGMISDTILHFSSSLVQHMSGPLFIIVLLF
+IFFCLGFIVPSSSGLAVLSMPIFAPLADTVGIPRFVIVTTYQFGQYAMLFLAPTGLVMAT
+LQMLNMRYSHWFRFVWPVVAFVLIFGGGVLITQVLIYS*
+>zzz_00010
+MTHLTKVLDTLTGICVVLLFSKYFVAYANMVFDWNLRWYLLENIPHLPIILFILMFIFGV
+PSEMIKDRQRKNNGV*
+>zzz_00011
+MRLQKAPLVTSGLVLGLLGLGNLLKDLSLTLNAVCGIFAFLIWIHLLCTMIKYFNNVKEQ
+LNSPLVSSVFTTFFMSGFLGTTYLNTFFSNITFINSLITPIWILCLVGIMTHMIIFSIKY
+LKDFSLENVYPSWTVLFIGIAIAGLTAPVSGCFFIGQLTVIYGFVATCIVLPIVFKRLKA
+FPLQTSIKPNTSTICAPFSLVAAAYVIAFPKANAFIVIIFLILAQIFYFYIIIQLPKLLK
+EPFSPVFSAFTFPLVISATALKNSLPVLMFPDIWKGLLFIEVLLATVIVLRVFIGYLHFF
+LKKENQDKFLRNASQ*
+>zzz_00012
+MRNQIQKLLDSDLSSLHISKQTGVPQSTIHRMRKKERSLDNMSLKNAELLYKFANSIFSN
+EN*
+>zzz_00014
+MEGLFNAIKDTVTAAINNDGAKLGTSIVSIVENGVGLLGKLFGF*
+>zzz_00015
+MTGLAEAIANTVQAAQQHDSVKLGTSIVDIVANGVGLLGKLFGF*
+>zzz_00016
+LGYKNILIDFDDTIVDFYDAEEWAFHYMANVFNHKATKDDFLTFKKINHQHWEAFQQNKL
+TKSEVLSERFVNYFKHHQMEVDGHRADVLFRNGLAEAKVKYFDQTLETIVELSKRHDLYI
+VTNGVTETQKRRLNQTPLHKYIKKIFISEETGYQKPNPEFFNYVFNDIGEDERQHSIIVG
+DSLTSDILGGINAGIATCWFNFRGFDHNPGIIPDYEINSWKQLNDIVR*
diff --git a/t/data/locus_tag_gffs/query_3.gff b/t/data/locus_tag_gffs/query_3.gff
new file mode 100644
index 0000000..b4b0cc0
--- /dev/null
+++ b/t/data/locus_tag_gffs/query_3.gff
@@ -0,0 +1,271 @@
+##gff-version 3
+##sequence-region abc|SC|contig000001 1 15000
+abc|SC|contig000001	Prodigal:2.60	CDS	172	1131	.	-	0	gene=hly;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag=xxx_00001;product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|abc_00001
+abc|SC|contig000001	Prodigal:2.60	CDS	1804	1950	.	+	0	inference=ab initio prediction:Prodigal:2.60;locus_tag=xxx_00002;product=hypothetical protein;protein_id=gnl|SC|abc_00002
+abc|SC|contig000001	Prodigal:2.60	CDS	1934	2131	.	+	0	inference=ab initio prediction:Prodigal:2.60;locus_tag=xxx_00003;product=hypothetical protein;protein_id=gnl|SC|abc_00003
+abc|SC|contig000001	Prodigal:2.60	CDS	2621	3337	.	-	0	inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag=xxx_00004;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00004
+abc|SC|contig000001	Prodigal:2.60	CDS	3445	4170	.	-	0	gene=speH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=xxx_00005;note=SPE H;product=hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00005
+abc|SC|contig000001	Prodigal:2.60	CDS	4265	4990	.	-	0	inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13043,protein motif:Pfam:PF02876.11;locus_tag=xxx_00006;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00006
+abc|SC|contig000001	Prodigal:2.60	CDS	5428	6429	.	+	0	eC_number=2.1.3.3;gene=argF;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P99073,protein motif:CLUSTERS:PRK04284,protein motif:Cdd:COG0078,protein motif:TIGRFAMs:TIGR00658,protein motif:Pfam:PF00185.1;locus_tag=xxx_00007;product=Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Aspartate/ornithine carbamoyltransferase%2C Asp/Orn b [...]
+abc|SC|contig000001	Prodigal:2.60	CDS	6449	7384	.	+	0	eC_number=2.7.2.2;gene=arcC1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q7A627,protein motif:CLUSTERS:PRK12353,protein motif:TIGRFAMs:TIGR00746,protein motif:Pfam:PF00696.22;locus_tag=xxx_00008;product=Carbamate kinase 1,putative amino acid kinase,carbamate kinase,Amino acid kinase family protein;protein_id=gnl|SC|abc_00008
+abc|SC|contig000001	RNAmmer:1.2	rRNA	7556	9112	.	+	0	inference=COORDINATES:profile:RNAmmer:1.2;locus_tag=xxx_01705;product=16S ribosomal RNA
+abc|SC|contig000001	Prodigal:2.60	CDS	9419	9646	.	+	0	inference=ab initio prediction:Prodigal:2.60;locus_tag=xxx_00010;product=hypothetical protein;protein_id=gnl|SC|abc_00010
+abc|SC|contig000001	Prodigal:2.60	CDS	9952	10899	.	-	0	inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF03595.11;locus_tag=xxx_00011;product=C4-dicarboxylate transporter/malic acid transport protein;protein_id=gnl|SC|abc_00011
+abc|SC|contig000001	Prodigal:2.60	CDS	11148	11336	.	+	0	inference=ab initio prediction:Prodigal:2.60;locus_tag=xxx_00012;product=hypothetical protein;protein_id=gnl|SC|abc_00012
+abc|SC|contig000001	Aragorn:1.2.34	tRNA	11803	11878	.	-	0	inference=COORDINATES:profile:Aragorn:1.2.34;locus_tag=xxx_00013;product=tRNA-Arg(tct)
+abc|SC|contig000001	Prodigal:2.60	CDS	12241	12375	.	+	0	inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=xxx_00014;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00014
+abc|SC|contig000001	Prodigal:2.60	CDS	12432	12566	.	+	0	inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=xxx_00015;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00015
+abc|SC|contig000001	Prodigal:2.60	CDS	12699	13385	.	+	0	eC_number=3.-.-.-;gene=yfnB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O06480,protein motif:CLUSTERS:PRK09449,protein motif:TIGRFAMs:TIGR02254,protein motif:Pfam:PF00702.1;locus_tag=xxx_00016;product=Putative HAD-hydrolase yfnB,dUMP phosphatase,HAD hydrolase,haloacid dehalogenase-like hydrolase;protein_id=gnl|SC|abc_00016
+##FASTA
+>abc|SC|contig000001
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA
+TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT
+CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC
+TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC
+TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT
+AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC
+AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC
+ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT
+AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC
+AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT
+TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA
+CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC
+AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC
+TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG
+TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT
+TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT
+AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT
+GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT
+CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG
+CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG
+ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA
+AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA
+AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT
+GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT
+AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT
+AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT
+AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT
+CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA
+GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT
+AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA
+AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA
+AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA
+GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA
+TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA
+TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA
+ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT
+ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG
+GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG
+CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA
+ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA
+TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA
+GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT
+TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA
+CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC
+TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT
+TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT
+TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA
+GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG
+TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT
+TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG
+CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC
+CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT
+TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT
+AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG
+TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA
+TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA
+TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC
+ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC
+TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT
+TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC
+TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC
+ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA
+AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC
+TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA
+CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT
+GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA
+AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT
+GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA
+TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT
+AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT
+CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA
+TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT
+TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC
+TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT
+AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT
+TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC
+AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC
+CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT
+TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT
+GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT
+TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC
+GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT
+ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC
+GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG
+TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT
+TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA
+AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG
+TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT
+TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA
+CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA
+TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT
+GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA
+GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT
+GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA
+GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA
+TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA
+AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC
+ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT
+GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA
+CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG
+CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA
+TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA
+TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT
+GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG
+TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC
+ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA
+GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT
+TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG
+CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA
+AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT
+TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA
+TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT
+AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG
+TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA
+AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC
+AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC
+AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT
+ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT
+AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA
+GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA
+TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA
+TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA
+AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT
+TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG
+TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA
+TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT
+TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA
+TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG
+TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG
+GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG
+TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT
+AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC
+ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT
+TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG
+TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA
+AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT
+TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG
+GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC
+AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT
+AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC
+TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC
+ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT
+CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT
+CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA
+ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA
+AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT
+GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA
+ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA
+GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT
+AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT
+TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT
+GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG
+TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT
+AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC
+TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA
+GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT
+TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG
+AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC
+ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA
+CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA
+CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT
+AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA
+AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA
+AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG
+TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC
+ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT
+TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC
+AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA
+AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT
+CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT
+CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT
+AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA
+AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA
+ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC
+AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC
+TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT
+GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT
+ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA
+AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA
+TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA
+TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC
+CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA
+AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG
+AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA
+AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC
+AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA
+AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA
+TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT
+TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC
+GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA
+CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC
+GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG
+TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT
+TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT
+TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA
+ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT
+GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC
+GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA
+TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG
+GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC
+AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT
+TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT
+GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT
+AAGATTTAGATACATAGACAATATAGGAGATGGGGAAATTGGGATATAAAAATATTTTGA
+TAGACTTTGATGATACAATTGTTGATTTTTATGATGCAGAAGAATGGGCGTTTCACTATA
+TGGCGAATGTTTTTAATCATAAAGCAACAAAGGATGATTTTTTAACATTTAAAAAAATCA
+ATCACCAACATTGGGAAGCTTTTCAACAAAATAAATTAACGAAGTCTGAAGTATTATCAG
+AACGATTTGTGAATTACTTCAAACATCATCAAATGGAAGTTGATGGGCATCGTGCAGATG
+TGTTATTTAGAAATGGATTAGCAGAAGCTAAAGTTAAATACTTTGATCAAACATTAGAAA
+CAATTGTCGAATTATCGAAAAGACATGATTTATATATTGTTACTAATGGTGTAACCGAAA
+CGCAAAAGAGAAGGTTAAATCAGACGCCGTTGCATAAATATATTAAAAAGATATTTATAT
+CTGAGGAAACAGGATATCAAAAACCTAATCCGGAATTTTTTAATTATGTTTTTAATGATA
+TTGGTGAGGATGAAAGACAGCACTCGATTATAGTTGGAGATTCTTTAACATCTGACATTC
+TAGGTGGAATCAATGCGGGTATAGCTACTTGCTGGTTTAATTTTAGAGGATTTGATCATA
+ATCCAGGAATTATACCTGATTATGAAATTAATTCATGGAAACAACTAAATGATATTGTAC
+GTTAAGACAAATAATTTGAAAATGATTTATATTAGCTTAAGAAAGTAATTGTAAAAACGA
+CACTACGAAAAGATGCATTTTCATGCTCGTAGTGTCGAATATTATTATCTTATGGTTTCC
+ACAATAAGACATCATGTCCCTCTATATTTTTAGCTTCCACATCTGTAAATCCATGATGTA
+AAAAGAAATCCTTAGAATCATTTCTGCCAATGGCTTTAATTGGCATATTGAAACTCTTTG
+CAAAATCAATCAATTCTGAAGCGTACCCTCTGTTTTGATATTTTGGTAATACTTCTAACT
+TCCATAATAATATATAATCTTCAAAATCTGGGAAGTAGATTTCTTCGACATCACCTTTTT
+TTAGTAATGCCATTCTAGCTCCTAATTGATCTCCGACAAATATGCCATAAAATGGTGAAT
+CTGAACTTGCATCAATCATTTGACCGTTTAACTCATTGACCATGTATAAGTCTTTGTTGC
+CAAACGCTCTAAAGTTTTCGAATAATTCGTCAGTTTTGTAATTAATTTCAAGACGTTTGA
+TTTCACTCATATTTATCTACCCCTTATTTGTTTACTACTATTATTATACATTAAACCACT
+TGTGTTTTTCATTGATTTGAAAATGTAAAACAATGAACATTATAAATTTTCCATGATTAA
+ATTGATTTTCAAGCCGTAAAATTGTCAATCACCGGTATAGACTTTATACTATAGTTTGTA
+ATCGCGTACTTAAGGAGAAAAAACATGGACTGTAAAGTAGTTAGTTTAAATGAAAAAGAT
+CAGTTTATACCAAAAATAAAGAGCAGTGACCCTGTAATAACAGGATTATTTCAATATGAT
+GCAGCTCAACAAACTAGTTTTGAAAAAAGGATGTCTAAAGAAAATAATGGAAGAGAAGCG
+GCATTAGCGAATGTTATTCGTGAATATATGAGTGATTTAAAGCTTTCAAATGAACAAGAA
+TTAAACATACAACATTTAGCTAATGGTTCAAAAGTTGTGATTGGTGGACAACAAGCAGGG
+CTTTTCGGGGGACCATTGTATACATTCCATAAAATATTTTCAATCATTACTTTATCTAAG
+GAATTAACGGATACACATAAGCAACAAGTAGTACCAGTTTTTTGGATTGCAGGAGAAGAT
+CATGATTTCGATGAAGTGAATCATACATTTGTTTATAACGAAAATCATGGGTCGCTGCAT
+AAGGTTAAATATCATACAATGGAGATGCCAGAGACGACAGTCTCTAGATATTATCCTGAT
+AAGGCTGAGTTGAAACAAACTTTAAAAACGATGTTCATTCATATGAAAGAAACTGTTCAT
+ACACAAGGTCTACTGGAGATTTGTGACAGAATTATTGACCAATATGACTCGTGGACTGAT
+ATGTTTAAAGCACTACTGCATGAAACATTTAAAGCATATGGCGTTCTATTTATAGATGCG
+CAGTTTGAGCCGTTAAGAAAAATGGAAGCGCCTATGTTTAAAAAGATTTTGAAAAAACAT
+CAGTTGCTTGATGATGCTTTTAGAGCAACACAACAACGTACTCAAAATCAAGGCTTGAAT
+GCGATGATACAAACAGATACAAATGTTCATTTATTCTTACATGATGAAAATATGCGCCAA
+TTAGTTTCGTATGATGGTAAGCATTTTAAATTAAATAAAACAGATAAGACATATATAAAG
\ No newline at end of file
diff --git a/t/data/locus_tag_gffs/query_3.gff.proteome.faa.expected b/t/data/locus_tag_gffs/query_3.gff.proteome.faa.expected
new file mode 100644
index 0000000..adcbe48
--- /dev/null
+++ b/t/data/locus_tag_gffs/query_3.gff.proteome.faa.expected
@@ -0,0 +1,75 @@
+>xxx_00001
+MKTRIVSSVTTTLLLGSILMNPVANAADSDINIKTGTTDIGSNTTVKTGDLVTYDKENGM
+HKKVFYSFIDDKNHNKKLLVIRTKGTIAGQYRVYSEEGANKSGLAWPSAFKVQLQLPDNE
+VAQISDYYPRNSIDTKEYMSTLTYGFNGNVTGDDTGKIGGLIGANVSIGHTLKYVQPDFK
+TILESPTDKKVGWKVIFNNMVNQNWGPYDRDSWNPVYGNQLFMKTRNGSMKAAENFLDPN
+KASSLLSSGFSPDFATVITMDRKASKQQTNIDVIYERVRDDYQLHWTSTNWKGTNTKDKW
+TDRSSERYKIDWEKEEMTN*
+>xxx_00002
+MKLFYIVFLIIIWLNIFLGNEIIHTLTVLITTLYIVNSRKGIKNDRVE*
+>xxx_00003
+MTELNNIINSLQSLFESESGYKISKNSGVPYQTVQDLRNGKTKLEDARFRTIIKLYSYYV
+SLKEH*
+>xxx_00004
+MSKNITKNIILTTTLLLLGTVLPQNQKPVFSFYSEAKAYSIGQDETNINELIKYYTQPHF
+SFSNKWLYQYDNGNIYVELKRYSWSAHISLWGAESWGNINQLKDRYVDVFGLKDKDTDQL
+WWSYRETFTGGVTPAAKPSDKTYNLFVQYKDKLQTIIGAHKIYQGNKPVLTLKEIDFRAR
+EALIKNKILYTENRNKGKLKITGGGNNYTIDLSKRLHSDLANVYVKNPNKITVDVLFD*
+>xxx_00005
+MNNNITKKIILSTTLLLLGTAFTQFPNTPINSSSEAKAYYINQNETNVNELTKYYSQKYL
+TFSNSTLWQKDNGTIHATLLQFSWYSHIQVYGPESWGNINQLRNKSVDIFGIKDQETIDS
+FALSQETFTGGVTPAATSNDKHYKLNVTYKDKAETFTGGFPVYEGNKPVLTLKELDFRIR
+QTLIKSKKLYNNSYNKGQIKITGTDNNYTIDLSKRLPSTDANRYVKKPQNAKIEVILEKS
+N*
+>xxx_00006
+MKKNIMNKLVLSTALLLLGTTSTQLPKTPISFSSEAKAYNISENETNINELIKYYTQPHF
+SLSGKWLWQKPNGSIHATLQTWVWYSHIQVFGSESWGNINQLRNKYVDIFGTKDEDTVEG
+YWTYDETFTGGVTPAATSSDKPYRLFLKYSDKQQTIIGGHEFYKGNKPVLTLKELDFRIR
+QTLIKNKKLYNGEFNKGQIKITADGNNYTIDLSKKLKLTDTNRYVKNPKNAQIEVILEKS
+N*
+>xxx_00007
+MKNLRNRSFLTLLDFSRQEVEFLLTLSEDLKRAKYIGTEKPMLKNKNIALLFEKDSTRTR
+CAFEVAAHDQGANVTYLGPTGSQMGKKETTKDTARVLGGMYDGIEYRGFSQRTVETLAEN
+SGVPVWNGLTDEDHPTQVLADFLTAKEVLKKDYADINFTYVGDGRNNVANALMQGAAIMG
+MNFHLVCPKELNPTDELLNRCKNIAAENGGNILITDDIDQGVKGSDVIYTDVWVSMGEPD
+EVWKERLELLKPYQVNKEMMDKTGNPNVIFEHCLPSFHNADTKIGQQIFEKYGIREMEVT
+DEVFESKASVVFQEAENRMHTIKAVMVATLGEF*
+>xxx_00008
+MMAKIVVALGGNALGKSPQEQLELVKNTAKSLVGLITKGHEIVISHGNGPQVGSINLGLN
+YAAEHNQGPAFPFAECGAMSQAYIGYQLQESLQNELHSIGMDKQVVTLVTQVEVDENDPA
+FNNPSKPIGLFYNKEEAEQIQKEKGFIFVEDAGRGYRRVVPSPQPISIIELESIKTLIKN
+DTLVIAAGGGGIPVIREQHDGFKGIDAVIDKDKTSALLGANIQCDQLIILTAIDYVYINF
+NTENQQPLKTTNVDELKRYIDENQFAKGSMLPKIEAAISFIENNPKGSVLITSLNELDAA
+LEGKVGTVIKK*
+>xxx_01705
+VENTINESEKKKRFKLKMPGAFMILFILTVVAVIATWVIPAGAYSKLSYEPSSQELKIVN
+PHNQVKKVPGTQQELDKMGVKIKIEQFKSGAINKPVSIPNTYERLKQHPAGPEQITSSMV
+EGTIEAVDIMVFILVLGGLIGVVQASGSFESGLLALTKKTKGHEFMLIVFVSILMIIGGT
+LCGIEEEAVAFYPILVPIFIALGYDSIVSVGAIFLASSVGSTFSTINPFSVVIASNAAGT
+TFTDGLYWRIGACIVGAIFVISYLYWYCKKIKNDPKASYSYEDKDAFEQQWSVLKDDDSA
+HFTLRKKIILTLFVLPFPIMVWGVMTQGWWFPVMASAFLIFTIIIMFIAGTGKSGLGEKG
+TVDAFVNGASSLVGVSLIIGLARGINLVLNEGMISDTILHFSSSLVQHMSGPLFIIVLLF
+IFFCLGFIVPSSSGLAVLSMPIFAPLADTVGIPRFVIVTTYQFGQYAMLFLAPTGLVMAT
+LQMLNMRYSHWFRFVWPVVAFVLIFGGGVLITQVLIYS*
+>xxx_00010
+MTHLTKVLDTLTGICVVLLFSKYFVAYANMVFDWNLRWYLLENIPHLPIILFILMFIFGV
+PSEMIKDRQRKNNGV*
+>xxx_00011
+MRLQKAPLVTSGLVLGLLGLGNLLKDLSLTLNAVCGIFAFLIWIHLLCTMIKYFNNVKEQ
+LNSPLVSSVFTTFFMSGFLGTTYLNTFFSNITFINSLITPIWILCLVGIMTHMIIFSIKY
+LKDFSLENVYPSWTVLFIGIAIAGLTAPVSGCFFIGQLTVIYGFVATCIVLPIVFKRLKA
+FPLQTSIKPNTSTICAPFSLVAAAYVIAFPKANAFIVIIFLILAQIFYFYIIIQLPKLLK
+EPFSPVFSAFTFPLVISATALKNSLPVLMFPDIWKGLLFIEVLLATVIVLRVFIGYLHFF
+LKKENQDKFLRNASQ*
+>xxx_00012
+MRNQIQKLLDSDLSSLHISKQTGVPQSTIHRMRKKERSLDNMSLKNAELLYKFANSIFSN
+EN*
+>xxx_00014
+MEGLFNAIKDTVTAAINNDGAKLGTSIVSIVENGVGLLGKLFGF*
+>xxx_00015
+MTGLAEAIANTVQAAQQHDSVKLGTSIVDIVANGVGLLGKLFGF*
+>xxx_00016
+LGYKNILIDFDDTIVDFYDAEEWAFHYMANVFNHKATKDDFLTFKKINHQHWEAFQQNKL
+TKSEVLSERFVNYFKHHQMEVDGHRADVLFRNGLAEAKVKYFDQTLETIVELSKRHDLYI
+VTNGVTETQKRRLNQTPLHKYIKKIFISEETGYQKPNPEFFNYVFNDIGEDERQHSIIVG
+DSLTSDILGGINAGIATCWFNFRGFDHNPGIIPDYEINSWKQLNDIVR*
diff --git a/t/data/locus_tag_gffs/query_5.gff b/t/data/locus_tag_gffs/query_5.gff
new file mode 100644
index 0000000..ad5d755
--- /dev/null
+++ b/t/data/locus_tag_gffs/query_5.gff
@@ -0,0 +1,271 @@
+##gff-version 3
+##sequence-region abc|SC|contig000001 1 15000
+abc|SC|contig000001	Prodigal:2.60	CDS	172	1131	.	-	0	ID=3_1;gene=hly;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag=ccc_00001;product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|ccc_00001
+abc|SC|contig000001	Prodigal:2.60	CDS	1804	1950	.	+	0	ID="ccc_50002";inference=ab initio prediction:Prodigal:2.60;locus_tag=ccc_00002;product=hypothetical protein;protein_id=gnl|SC|ccc_00002
+abc|SC|contig000001	Prodigal:2.60	CDS	1934	2131	.	+	0	ID=ccc_50003;inference=ab initio prediction:Prodigal:2.60;locus_tag=ccc_00003;product=hypothetical protein;protein_id=gnl|SC|ccc_00003
+abc|SC|contig000001	Prodigal:2.60	CDS	2621	3337	.	-	0	ID=ccc_50004;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag=ccc_00004;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|ccc_00004
+abc|SC|contig000001	Prodigal:2.60	CDS	3445	4170	.	-	0	ID=3_2;gene=speH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=ccc_00005;note=SPE H;product=hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|ccc_00005
+abc|SC|contig000001	Prodigal:2.60	CDS	4265	4990	.	-	0	ID="ccc_50006";inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13043,protein motif:Pfam:PF02876.11;locus_tag=ccc_00006;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|ccc_00006
+abc|SC|contig000001	Prodigal:2.60	CDS	5428	6429	.	+	0	ID=3_3;eC_number=2.1.3.3;gene=argF;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P99073,protein motif:CLUSTERS:PRK04284,protein motif:Cdd:COG0078,protein motif:TIGRFAMs:TIGR00658,protein motif:Pfam:PF00185.1;locus_tag=ccc_00007;product=Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Aspartate/ornithine carbamoyltransferase%2C As [...]
+abc|SC|contig000001	Prodigal:2.60	CDS	6449	7384	.	+	0	ID=ccc_50008;eC_number=2.7.2.2;gene=arcC1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q7A627,protein motif:CLUSTERS:PRK12353,protein motif:TIGRFAMs:TIGR00746,protein motif:Pfam:PF00696.22;locus_tag=ccc_00008;product=Carbamate kinase 1,putative amino acid kinase,carbamate kinase,Amino acid kinase family protein;protein_id=gnl|SC|ccc_00008
+abc|SC|contig000001	RNAmmer:1.2	rRNA	7556	9112	.	+	0	ID=ccc_51705;inference=COORDINATES:profile:RNAmmer:1.2;locus_tag=ccc_01705;product=16S ribosomal RNA
+abc|SC|contig000001	Prodigal:2.60	CDS	9419	9646	.	+	0	ID=ccc_50010;inference=ab initio prediction:Prodigal:2.60;locus_tag=ccc_00010;product=hypothetical protein;protein_id=gnl|SC|ccc_00010
+abc|SC|contig000001	Prodigal:2.60	CDS	9952	10899	.	-	0	ID=ccc_50011;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF03595.11;locus_tag=ccc_00011;product=C4-dicarboxylate transporter/malic acid transport protein;protein_id=gnl|SC|ccc_00011
+abc|SC|contig000001	Prodigal:2.60	CDS	11148	11336	.	+	0	ID=ccc_50012;inference=ab initio prediction:Prodigal:2.60;locus_tag=ccc_00012;product=hypothetical protein;protein_id=gnl|SC|ccc_00012
+abc|SC|contig000001	Aragorn:1.2.34	tRNA	11803	11878	.	-	0	ID=ccc_50013;inference=COORDINATES:profile:Aragorn:1.2.34;locus_tag=ccc_00013;product=tRNA-Arg(tct)
+abc|SC|contig000001	Prodigal:2.60	CDS	12241	12375	.	+	0	ID=ccc_50014;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=ccc_00014;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|ccc_00014
+abc|SC|contig000001	Prodigal:2.60	CDS	12432	12566	.	+	0	ID="ccc_50015";inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=ccc_00015;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|ccc_00015
+abc|SC|contig000001	Prodigal:2.60	CDS	12699	13385	.	+	0	ID=3_5;eC_number=3.-.-.-;gene=yfnB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O06480,protein motif:CLUSTERS:PRK09449,protein motif:TIGRFAMs:TIGR02254,protein motif:Pfam:PF00702.1;locus_tag=ccc_00016;product=Putative HAD-hydrolase yfnB,dUMP phosphatase,HAD hydrolase,haloacid dehalogenase-like hydrolase;protein_id=gnl|SC|ccc_00016
+##FASTA
+>abc|SC|contig000001
+ACTGGCCGCCTAATTTTAAAAACTCCCCAAGCTGTAATTTAAGGGGGTTCTTTAAATTAT
+ATACCCACCACATTTTTTGGAGAACCCCAAACTAGCCGAAAAGGGGCATTTCTGAAGTTA
+ACGGCTAAAGTTATTTTTTATATTTCCCTGTCCATGAACAAAGGGGTTACATTAATTTGT
+AATTTCTTCTTTTTTTTAATCGATTCCCTATCTTTCTGAAGAGGGGTCTGTCCATTTATC
+ATTAGTATTGGTACTTTTCCAATTTCCCGAACTCCAATGCAAGGGGTAGTCATCACGAAC
+ACGTTCGTATATTATTTCTATATTTCCCTGTCGTTTGGATGCGGGGCTATCCATAGTAAT
+AACTGTAGCGAAGTTTTGTGAAAACCCCGAACATAATAGAGAGGGGGCTTTGTTAGGATC
+AAGGAAGTTCTCTGTTTCTTTCATACCCCCACTTCTAGTTTTGGGGAAAAGTTGATTGCC
+ATATACCGGGTTCCTTTAATCTCTACCCTATCGTCCCCAATTGGGGTTCACCATATTGTT
+AAATATCACTTTCCTTTCTACTTTTCCCTCACTTGGGCTCTCGGGGATTGTTTTGAAATC
+AGGTTGAACATATTTTTGTGTATGACCCATCCAAACATTTGCGGGGATAAGGCCGCCAAT
+ATTTCCTGTATCATTTTCAGTAACACCCCCGCTGAATCCATAGGGGAAAGTACTCATATA
+ATCTTTTGTATCAATTTAATTTCTTCCCTAGCAATCAGATATGGGGGCTACTTCATTATC
+AGGTAGTTGCAACTTTTCCTTAAAGCCCGAACGCCAGGCTAAGGGGCTTTTGTTAGCACC
+ATCTTCGCTATAAATTTTATATTGACCCGCACTGGTACCTTTGGGGCTAATAACTAGCAG
+ATTTTTATTGTGATTTTTATCATCGCCCAAACTATAAAATACGGGGTTGTGCATGCCATT
+ATCTTTATCATAAGTTTCTAAATCACCCGTTCTTACTGTAGTGGGGCTTCCAATATCTGT
+AGTACCGGTTTTAATTTTAATATCACCCTCTCCGGCATTAGCGGGGGGATTCATTAATAT
+AGAACCTAGCAATATTTTTGTTGTTCCCGAGCTGACTATACGGGGGTTCATTTTCATCAT
+ACTTCTATTTTTTTTTTCGATTTGACCCAACCATAATCAATAGGGGAATTTAGAATATTG
+AAGTTGAGACATATTTTATATTTATCCCCGTCTATATTAGTAGGGGTAATGTAGCAACTG
+ATAAATTACTGAGTTTTGATGAGTGCCCATTCTAAGAATATGGGGGTAACTTTTATTTAA
+AATTTGAAAGGAAGTTTTTCAATTTCCCGGGCTAGTCAAAGTGGGGTAAATTCTTTATGA
+AACAAGGAAAAGACTTTGCTAATTTCCCTGACTAATTTCTTTGGGGCTAATGATTTGTTT
+AATTTAAAAATGTATTTGATTACAACCCAAACATACAAATATGGGGGAATTAAATCAATT
+AATTAACTATTAAATTTAAATTAAACCCATACTAACTACTGTGGGGTAATAAATAGAAAT
+AGAGAAAAAGGGTATTTATTATGTTCCCAATCTCGTCGGGAAGGGGTTTTGCCATTACAT
+AGAAATATCTAATATTTAATGAAAACCCATCCTATGTATTTTGGGGATAGTGTAAAATAT
+AATATGTAAAATAATTTGTAGATTTCCCGTTCGAGGCATTATGGGGAAATTTTGAGTATA
+AGTTAGCTTTTAATTTTGAATCTTACCCAAACTTGATTAATAGGGGTATGATAGGGGATT
+AAAATGAAACTATTTTTTATCGTATCCCTTACTATTATATGGGGGGATATATTTTTAGGA
+AATGAAATTATCCATTTACTGACTGCCCTAACAACAACATTGGGGGTTGTTAATTCAAGA
+AAGGGGATTAAAAATTTCAGAGTTGCCCAATCTTATAAACTCGGGGCAATCTTTGTTTGA
+ATCTGAATCAGGCTTTTAAATTTCGCCCAATCCAGGAGTTCCGGGGCAAACAGTACAAGA
+ATTAAGAAATGGGATTTCCAAACTACCCGATCCTAGATTTAGGGGGATTATTAAACTTTA
+AAGTTACTATGTCTTTTTAAAAGAACCCTAACCATGGGACAAGGGGGTATTGCTATAATA
+ATTGAATCATTAAATTTAGGAATAGCCCCTACGACATAATAAGGGGATGTCTTAGGCTCT
+ACAATATTATATTGTTTGTAGTTGACCCAATCAAAATGACCAGGGGACAAGCATTTTTGG
+AGCCCCAACACAGATTTTGACGAAACCCCAGCTTACAATAATGGGGAAGTTGGGGATGGG
+ACCCAACACAGAGATTTTCAAAAAGCCCTTCCACAGACAATGGGGGTTGGCGGGGCCCCA
+ACATAGAGAAATTCTTTAAGAAATTCCCCAGCCAATGCAAGTGGGGGAGTGACAACGAAA
+AAAATTTTATAAAATTTCATTTCTGCCCCATCCCTACTCCCAGGGGCATTTAAATATATA
+AAAAATTTCACCTATTTTATACATCCCCCACCTTAAAAATTAGGGGTTAAATAATCTGAT
+ATGGAATTAAAGTGTTTGAAGTATACCCCACCTTCATATACTGGGGAAAGAGGACGTCAA
+AAGTTATTTTATTATTTTTTTTAACCCCAACCTTTGCTAGATGGGGATGTAATCTTTTGC
+ATAAATCAATAGTGTTTTTATTACCCCCACCCGTGATCTTAAGGGGACCTTTATTACGAT
+ATTCGGTATATAATTTTTTATTTTTCCCTAACGCTTCTCGTGGGGGGAAATCGATTTCTT
+ACAATGTTAATACTTTTTTATTGCCCCCGTACATTTTATGCGGGGGAATAATCGTTTGTA
+ATTTATCTTTGTATTTTACAAAAAGCCCATACGTTTTATCAGGGGGTTTTGCGGCTGGTG
+AAACGCCACCTGTATTTGTCTCTCTCCCAGACCACCATAACTGGGGAGTATCTTTGTCTT
+ATAGTCCAAACACATTTACGTAACGCCCTTTCAACTGATTAAGGGGTCCCCAACTTTCAG
+AGCCCCATAAAGATTTTTGTGCTGACCCTGACTATCTCTTAAGGGGAACATAAATGTTTC
+AATTATCATATTGATTTAGCCATTTCCCTGACAATGAAAAATGGGGCTGTGTGTAATATT
+AAATTAATTCATTGTTTTTAGTTTCCCCTTGCCCAATGCTATGGGGTTTAGCTTCAGAGT
+AAAAACTAAATACTTTTTTTTGATTCCCAGGCAATACAGTACGGGGTAGTAATAATGTTG
+ACGTTAAAATTATATTTTTCGTGATCCCCTTCCTCATTAGAAGGGGTCCTTTCAGAGGAA
+ACATGATACGAGGATTTAGAAATTTCCCTGTCAGCGAAGTCAGGGGAGTATTTGCGATTA
+ATTTTATTAACTCCTTTATTATTGTCCCTTTCATTTTTCGAGGGGGACTTCAATTTTTGC
+ATTTTGAGGTTTTTTTTCATATCTACCCGCACCAGTTGATGGGGGGCTTTTACTTAAATC
+AATTGTGTAGTTATTTTCTGTACCTCCCATTCTAATTTGTCCGGGGTTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATCCCTGACGAATACGAAAGGGGAATTCTTTTAAAGT
+AAAAACAGGCTTATTTTCTTCATAACCCGGACATCCGCCAGTGGGGGTTTCTGCTTTATC
+ATTATATGTTACATTTTGTTTATAGCCCTTACCGTTAGATGTGGGGGCAGGAGTAACACC
+ACCAGTAAACGTTTTTTGAGATAATCCCAAACAATCAATGGTGGGGTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTCCCTGGCTGATATTGCCGGGGCTTTCAGGTCCATA
+AACTTGAATATGACTTTACCAAGAACCCTGTCACAACGTTGCGGGGATCGTACCGTTATC
+ATTTTGCCATAACGTTTTGTTAGAGCCCGTTCAATATTTTTGGGGGTAATATTTAGTTAA
+ATCATTAACGTTAGTTTCGTTTTGACCCATACAATAAGCTTTGGGGTCAGATGAAGAATT
+AATAGGTGTATTAGTTTATTGTGTACCCGCTCTACCTAATAGGGGGAATGTTGTTGATAA
+AATAATTTTTTTCGTTTTGTTATTGCCCATTCGAATTTCTCCGGGGAGTATTGTTGGAAT
+ATTTAATTATAAAATTTGGTTAATTCCCTAACTGAAATTATCGGGGTTTACAAAAGGTAA
+AAGGTTAGTTAGATTTTTCGAGTATCCCTTCCATTTGTGCATGGGGAGGATTTTTAACAT
+AACGGTTTGTGTCATTTAATTTTAACCCTTTCCTTAAATCAAGGGGGTAATTATTTCCAT
+AAGCAGTTATCTTATTTTGACCTTTCCCAAACTCTCCGTTATGGGGCTTTTTATTCTTTA
+ATAATGTTTGACGATTTCGGAAATCCCCTTCCTTTAAAGTTAGGGGTGGTTTATTTCCTT
+AGTAAAATTCATGTTTTCCGATGATCCCTTGCTGTTTATCACGGGGTTTTAAAAATAGTC
+AATAAGGTTTATCATTTGAAGTAGCCCCTGGCGTAACACCACGGGGAAATGTTTCATCAT
+AAGTCCAGTAACCTTTTACTGTGTCCCCATCCTTAGTTCCAAGGGGATCAACGTATTTAT
+ATCTTAACTGATTATTTTTTCCCCACCCCTCCGATCCAAACAGGGGAATATGACTATACC
+AAACCCACGTTTGCTTTGTTGCATGCCCGCTCCCATTGGGCTGGGGCCATAGCCATTTTC
+AAGATAATGAAAAATTTGGCTGAGTCCCATACTTGATTAACTGGGGGATATTAGTCTCGT
+ATTCACTGATATTATTTGCTTTTGCCCCAGACGAAAAACTGAGGGGTGTTTTAGGAAGTT
+ATGTTGATGTGGTTTTTAAAAGTAACCCTGCCGTTGATAAAAGGGGTTTATTCATGATGT
+ACTTTTTCATATGATTTTCTCCTTTCCCTGACTTACCCAAAGGGGGTAAGCTATTACACC
+AATTCGGAATTAAATTTAAGCTAAACCCATGCTAAATAAACTGGGGCAGTTAGTAGTGTT
+ATTTAAGCAAAACTTTTCATTTTTACCCTTTCGACAGAAACAGGGGTTAATAAAGTAGGC
+AGGAGTTATATATTTTTAACGACACCCCACACTTATTCTCTAGGGGATTGCATTAAATTG
+ATTGATAATTGAATTTTCTAACTACCCCAAACATAGTTATACGGGGAATGTAGTACTTAT
+ATTAATTATTTCCTTTTACTTAAATCCCATACTAATAAAATGGGGGTTTAATTATTGATA
+AAATATTACAAATTTTTATAGTAGGCCCTGTCTATTTTGTATGGGGTTACAATTTAGGTG
+AAACTAAAATAAAATTTGTTGTTATCCCTGACAAATTTACGAGGGGGAAGTTTTTTAACT
+ATATTAGACTTTTCTTTACAAGAGGCCCAATCCTTATTAACAGGGGCCGAGGATTTAAAA
+AGTGCTAAATATATTTTCACTGAAACCCCTACGTTAAAAAATGGGGATATTGCACTGTTA
+ATTGAAAAAGATTCTTTAAGAACGCCCCGTGCATTTGAAGTTGGGGCGCATGATCAAGGT
+ACAAATGTAACTTATTTAGGCCCAACCCGATCACAAATGGGTGGGGAAGAAACAACTAAA
+AATACTGCACGTGTTTTTGGTGGAACCCATGCTGGCATTGAAGGGGGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAACCCCAGCCGTACCAGTGGGGGATGGTTTAACTGAT
+AAAGATCATCCTACTTTAGTTCTTGCCCATTCCTTAACAGCAGGGGAAGTCTTAAAAAAA
+AATTATGCAGATATTTTCTTTACATCCCTTGCAGATGGTCGTGGGGACGTTGCAAATGCA
+ATAATGCAAGGTGCTTTCATTATGGCCCTGACCTTCCATTTAGGGGGTCCAAAAGAATTA
+AATCCAACAGATGATTTATTAAATCCCCGTACAAATATTGCCGGGGAAAATGGTGGCAAC
+ATATTAATCACAGATTTTATTGACCCCCGTGCAAAAGGTTCGGGGGTAATTTACACTGAT
+ATTTGGGTATCAATTTTTGAACCTGCCCAAGCATGGAAAGAAGGGGTTGAATTATTGAAA
+ACATATCAAGTAAATTTAGAAATGACCCATACAACTGGTAATGGGGATGTTATTTTTGAG
+AATTGCTTACCATCTTTCCATAATGCCCATACGAAAATTGGTGGGGAAATTTTTGAAAAA
+AATGGTATTCGAGATTTGGAAGTTACCCATGCAGTATTCGAAGGGGAAGCTTCAGTTGTA
+ATCCAAGAAGCTGATTTCAGAATGCCCCCAACCAAAGCAGTCGGGGTTGCTACATTGGGT
+AAATTTTAAATGATTTTAGGAAGTGCCCATGCTGGCGAAAATGGGGGTAGCATTAGGTGG
+AAATGCTTTAGGAATTTCACCTCAACCCCAACTCGAGCTTGTGGGGAATACTGCGAAATC
+ATTAGTAGGATTAATTTCAAAAGGACCCGAGCTTGTTATTAGGGGGGGTAATGGACCACA
+AGTTGGAAGCATTATTTTGGGACTTCCCTATCCTGCAGAACAGGGGCAAGGTCCGGCATT
+ACCATTTGCTGAATTTTGCGCAATGCCCCAACCTTACATCGGGGGGCAATTACAAGAAAG
+ATTACAAAATGAATTTTATTCTATTCCCATGCATAAACAAGTGGGGACACTAGTGACACA
+AGTTGAAGTTGATGTTTATGATCCGCCCTTTCACAATCCTTCGGGGCCAATTGGGTTATT
+ATACAACAAAGAAGTTTCTGAACAACCCCAACAAGAAAAAGGGGGGATATTTGTTGAAGA
+AGCTGGAAGAGGATTTTGACGCGTTCCCCCTCCACCACAACCGGGGTCTATTATTGAATT
+AGAGAGTATTAAAATTTTTATTAAACCCGATCCACTCGTTATGGGGGCTGGTGGTGGAGG
+AATACCAGTAATTATTTAGCAACATCCCGGTCTTAAAGGTATGGGGGCAGTTATAGACAA
+AGATAAAACAAGTGTTTTGTTGGGTCCCAATCTTCAATGCGAGGGGTTGATTATTTTAAC
+AGCAATTGATTATGTTTATATTAATCCCAACCCTGAAAACCAGGGGCCTTTGAAAACAAC
+AAATGTTGATGAATTTTAACGATATCCCGACCAAAATCAATTGGGGAAAGGAAGTATGTT
+ACCAAAAATTGAAGTTTCCATATCACCCATTCAAAACAATCCGGGGGGAAGTGTGCTTAT
+AACATCATTAAATGTTTTAGATGCTCCCTTACAGGGTAAAGTGGGGACTGTGATTAAAAA
+ATAATTGAATTGAATTTCTTTTCAACCCCTACATGTCAAATGGGGGATTTTTATTATTTA
+AGTGCACCCCCTGATTTTAATGCCTCCCTTTCGATGCGGGGTGGGGTTTCTTAATTTATA
+ATTATAAAATCTTTTTTGTAGAAATCCCAGGCTAAATGTCGAGGGGGATGAAACCGTGGA
+AAATACAATTAATGTTTGTGAAAAGCCCAAACGATTTAAATTGGGGATGCCAGGTGCATT
+AATGATTTTATTCATTTTAACGGTTCCCGCACTTATAGCAACGGGGGTTATTCCTGCTGG
+AGCATATTCTAAACTTTCTTACGAACCCTCACCCCAAGAACTGGGGATAGTTAACCCTCA
+AAACCAAGTGAAAATTTTTCCGGGTCCCCAACAGGAACTAGAGGGGATGGGGGTTAAAAT
+AAAGATTGAACAATTTTAATCAGGTCCCATTCATAAGCCAGTGGGGATTCCGAATACTTA
+AGAAAGATTAAAGCTTTATCCAGCTCCCCCACAACAAATAACGGGGAGCATGGTTGAAGG
+AACGATAGAAGCGGTTTATATCATGCCCTTCCTTCTTGTACTGGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGTTTCTTTTGAACCCGGACTGTTAGCTTTGGGGAAGAAAACAAAAGG
+ACATGAATTTATGCTTTTTGTGTTTCCCTCACTACTAATGATGGGGGGCGGGACGTTATG
+AGGTATTGAAGAAGTTTCTGTAGCACCCTATCCGATTTTAGTGGGGATATTTATAGCGTT
+AGGATACGATTCTATTTTTTCAGTTCCCGCCCTATTCCTTGCGGGGTCTGTCGGTAGTAC
+ATTTTCAACTATTATTTCGTTCTCGCCCGTACTTGCCTCTAAGGGGGCTGGTACAACTTT
+AACGGATGGCTTGTTTTGGAGAATACCCGCTCGTATTGTCGGGGGGATTTTTGTTATTAG
+ATATTTATATTGGTTTTGTAAAAAACCCAAACACGATCCTAAGGGGTCATATTCTTATGA
+AGACAAAGATGCTTTTTAACAGCAACCCTCTCTATTAAAAGAGGGGGATAGTGCCCATTT
+AACTTTGCGTAAGATTTTAATCCTTCCCTTACTTGTACTACCGGGGCCAATTATGGTATG
+AGGAGTTATGACGCTTTGTTGGTGGCCCCCACTTATGGCTTCGGGGTTTTTAATATTTAC
+AATTATAATAATGTTTTTTGCTGGGCCCGGTCAATCTGGATTGGGGGAAAAAGGAACTGT
+AGATGCATTTGTCATTTGTGCATCACCCTTACTAGGTGTATCGGGGATTATTGGTTTAGC
+ACGAGGTATTAATTTTTTGTTGAATCCCGGTCTGATTTCAGAGGGGATCTTACACTTTTC
+ATCATCTTTAGTTCTTTATATGAGTCCCCCACTATTTATCATGGGGTTACTATTTATTTT
+ATTCTGTTTAGGTTTTTTCGTGCCACCCTCTCCTGGATTAGCGGGGTTATCAATGCCTAT
+ATTTGCACCACTAGTTTATACAGTACCCATACCAAGATTCGTGGGGGTTACGACATATCA
+ATTTGGTCAATATGTTTTGTTATTCCCCGCGCCGACTGGACTGGGGATGGCCACACTACA
+AATGTTAAACATGCTTTATTCACATCCCTTCCGATTTGTATGGGGGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGTTTGAGTACTACCCACGCAAGTACTAATGGGGTCATAATTTGAAAT
+ACTATATTATAAAATTTCTAATTGGCCCTTACGCATCTCGTAGGGGTGTAGAAATACTAA
+ACTAAGCGAGGTGCTTTATTATTTTCCCTTACGAAAATAATGGGGGTAATGATAACACTA
+ATAAGTAATTGATATTTTGCTCTATCCCATACTGATATTTTTGGGGTTTGTTTTTAATGT
+AATGTTAGATCTATTTTATATTATACCCATCCTGGTAAATATGGGGTTGCTGTAAACAGT
+ATTTGTAACACATGTTTCATATGGTCCCGAACATAGCATATAGGGGGGATGGCTATAAAT
+AACACATTTGACAATTTTTTTAGATCCCCTACCTGGAATATGGGGGGTATTATTATTTAG
+AAAATATTTTGTGGTTTATGCAAATCCCGTGCTTGATTGGAAGGGGAGATGGTATTTGCT
+AGAAAACATACCACTTTTGCCAATTCCCTTACTTATTCTGATGGGGATTTTCGGAGTACC
+ATCTGAAATGATAATTTATAGGCAACCCAAACATAACGGTGTGGGGTTTATCGATATTTA
+AAGGTGATAAAATTTTTCAACTATTCCCAAGCCTAAAATAAAGGGGTTTATTAATTATTT
+AATTGGTATATTAGTTTTGGCGTTTCCCGCTCATATATTATTGGGGTAAAGGTTAGAGTG
+AAACGTGTTTATGATTTAGACGTTCCCCTATCGTTACTACAGGGGGAGTCAGAAGGTATC
+ATTGATAAGATCATTTTAAATCAAACCCGCACTGATATACACGGGGAAGAGGCAAGATTA
+ACTGCCTCTTTTTTTTTTATTAAATCCCCGTCTTAATTCTTGGGGGTAGTGTTACTGAGA
+AGCATTACGAAGAATTTTATCTTGACCCTCCCTTTTTAAAAAGGGGTGAAGATATCCTAT
+AAAGACTCTAAGTATTTTTACAGTGCCCAATCACACTTCGATGGGGAAAAGACCTTTCCA
+AATATCTGGAAACATTTGTACAGGCCCCCTGCTCTTTAAAGCGGGGGCTGAGATTACTAA
+AGGGAATGTGAAAGTTTAAAATACGCCCGAACAAGGTTCTTTGGGGAATTTAGGCAATTG
+AATAATGATATAAATTTAAAATATTCCCGCTCATATTAAAAAGGGGATTACGATAAAAGC
+ATTCGCCTTAGGAATTTCTATAACACCCGCTCCAGCGACTAAGGGGAATGGTGCACAAAT
+AGTCGATGTGTTCGTTTTGATTGACCCCTGCCATGGAAATGCGGGGAATCGCTTGAAAAC
+AATAGGTAAGACAATTTAAGTAGCTCCCAAGCCATATATTACGGGGAATTGACCTATGAA
+AAAACATCCGCTAATTTGTGCCGTCCCCCCTCCGATAGCAATGGGGATAAAAAGTACAGT
+ACACGAAGGATAAATTTTTTCAAGTCCCAAACCTTTTAAATAGGGGATTGAAAAAATAAT
+AATATGCGTCATAATTTCCACAAGGCCCAAACTCCAAATAGGGGGGATTAAGCTATTGAT
+AAAAGTTATGTTACTTTAAAATGTACCCAAACAAGTAGTACCGGGGAAGCCAGACATGAA
+AAATGTTGTGAACATTTATGAAACTCCCGGACTGTTTAATTGGGGGTTCACATTATTAAA
+ATATTTGATCATAGTTTATAAAAGGCCCATCCAAATCAAGAAGGGGAAGATTCCGCAAAC
+AGCGTTTAAAGTAATTTATAAGTCTCCCAATCGATTACCCAGGGGGAATAATCCTAAGAC
+AAGTCCTGACGTTATTTGAGGTGCTCCCTGACGTCTCATGATGGGGAACCTTTCTTATGT
+AATTTTCTTCACTATTTATATCATGCCCGCTCTGGCCAATTAGGGGGAAGAGTGTGTACT
+ATTACGTTATTAGATTTTGTATATTCCCTTGCTAGACACATAGGGGACATTTAAATCTCA
+AAATTAATGATATTTTTGGTATGCTCCCCAACCTAAATATTGGGGGATGTGGAAAAGTAA
+ATATTTAATTTAAATTTTGATTGAACCCTTACAAGGGGGTGTGGGGAATGAGAAATCAAA
+ATCAAAAACTATTATTTAGTGATTTCCCCAGCTTACATATATGGGGACAAACAGGAGTTC
+AACAAAGCACAATATTTAGAATGAGCCCAAACGAAAGATCATGGGGCAATATGTCATTGA
+AAAACGCTGAACTATTTTATAAATTCCCCAACAGTATATTTAGGGGTGAAAATTAAGAAG
+AATAATTAGTGAGTTTTATAATTAACCCACCCCGTCTCGATGGGGGTGGTTATTTTTTTA
+AATGTATTTAATTATTTGATTTCGGCCCCCTCAAAAGTCCCTGGGGTTTTGAATAGTATC
+AAAATCTATAGGAATTTTATAATAACCCAAACCTCTACGCATGGGGATGGTGAGTGTTAA
+AAATCTTGGTGTAGTTTTGGTGTAGCCCAGGCGCAGTATAGAGGGGATTTGAGCACAAAA
+ATACTTAATTAAAATTTTATAAACACCCGTCCGACGCGTGTCGGGGGTGTGTCAAAAAAT
+ACTATGACGAATAATTTTGCTTGTTCCCATTCCTGTATTTTCGGGGCTTATGCGGGGGAC
+ATTTTGGTGACGCATTTTACTATATCCCTGACATTCAAAACAGGGGGAGCCCCGTAATCA
+AGGAACTCTTTTGTTTTGTAATGCGCCCAAACATACCTATAAGGGGCCTGGGAGGGATTC
+AAACCCCCGACCGATTTCTTAGAAGCCCATTCCTCTATCCAGGGGGGCTACCAGGACACG
+ATTAACAACACAAGTTTTATTATATCCCAATCAACTTAAATTGGGGATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTCCCGAGCTATTTATCTAGGGGGGTTCAATAAGACT
+AAAATGCGAATTCATTTAACTTAATCCCGTTCAATACAGTTTGGGGTGCCTAACTGTATT
+ACTTTTCTCTTTAATTTACAGTTAACCCCATCATAAGATGTTGGGGGGATAAACAAACTA
+ATTGCATCAAATTTTTTTTAAAATACCCACACCAAAACGTTAGGGGAATAACATTTCGGT
+AATTTAAAAGCTACTTTCGTTTTTGCCCTCTCCAAATTTAAAGGGGAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTTTCGCAATTACCCATACCGTAACTGCAGGGGTTAATAATGATGGC
+ACAAAATTAGGCACTTTCATTGTGACCCTCGCTGAAAATGGCGGGGGTTTATTAGGTAAA
+ATATTCGGATTCTATTTTCAATATGCCCTGTCAGTAATCAGTGGGGTTTCAAAGGTGAGG
+AAGAGATTTAAATGTTTGGACTAGCCCCAGCCATCGCAAATAGGGGGCAAGCTGCACAAC
+AACATGATAGTGTGTTTTTAGGCACCCCTATCGTAGACATCGGGGGTAACGGTGTGGGTT
+AACTAGGTAAATTATTTGGATTCTACCCTAACAACTAATATTGGGGAAAATAAACTGGGT
+AAGCATACTTTAATTTTATGCACTCCCCTTACTTTATTTGCAGGGGTTTGAGCCTCTGTT
+AAGATTTAGATACATTTACAATATACCCGATCGGGAAATTGGGGGGTAAAAATATTTTGA
+AAGACTTTGATGATTTTATTGTTGACCCTTACGATGCAGAAGGGGGGGCGTTTCACTATA
+AGGCGAATGTTTTTTTTCATAAAGCCCCAAACGATGATTTTTGGGGATTTAAAAAAATCA
+ATCACCAACATTGGTTTGCTTTTCACCCAAACAAATTAACGAGGGGTGAAGTATTATCAG
+AACGATTTGTGAATTTTTTCAAACACCCTCACATGGAAGTTGGGGGGCATCGTGCAGATG
+AGTTATTTAGAAATTTTTTAGCAGACCCTAACGTTAAATACTGGGGTCAAACATTAGAAA
+AAATTGTCGAATTATTTAAAAGACACCCTTTCTATATTGTTAGGGGTGGTGTAACCGAAA
+AGCAAAAGAGAAGGTTTAATCAGACCCCGTTCCATAAATATAGGGGAAAGATATTTATAT
+ATGAGGAAACAGGATTTCAAAAACCCCCTCCCGAATTTTTTAGGGGTGTTTTTAATGATA
+ATGGTGAGGATGAATTTCAGCACTCCCCTATCGTTGGAGATTGGGGAACATCTGACATTC
+AAGGTGGAATCAATTTTGGTATAGCCCCTTGCTGGTTTAATTGGGGAGGATTTGATCATA
+ATCCAGGAATTATATTTGATTATGACCCTAACTCATGGAAACGGGGAAATGATATTGTAC
+ATTAAGACAAATAATTTGAAAATGACCCATACTAGCTTAAGAGGGGAATTGTAAAAACGA
+AACTACGAAAAGATTTTTTTTCATGCCCGTACTGTCGAATATGGGGATCTTATGGTTTCC
+ACAATAAGACATCATTTCCCTCTATCCCTTTCGCTTCCACATGGGGAAATCCATGATGTA
+AAAAGAAATCCTTATTTTCATTTCTCCCAATCGCTTTAATTGGGGGATTGAAACTCTTTG
+AAAAATCAATCAATTTTGAAGCGTACCCTCTCTTTTGATATTGGGGTAATACTTCTAACT
+ACCATAATAATATATTTTCTTCAAACCCTGGCAAGTAGATTTGGGGGACATCACCTTTTT
+ATAGTAATGCCATTTTTGCTCCTAACCCATCCCCGACAAATAGGGGATAAAATGGTGAAT
+ATGAACTTGCATCATTTATTTGACCCCCTAACTCATTGACCAGGGGTAAGTCTTTGTTGC
+AAAACGCTCTAAAGTTTTCGAATAACCCGTCCGTTTTGTAATGGGGTTCAAGACGTTTGA
+ATTCACTCATATTTTTTTACCCCTTCCCTGTCTACTACTATTGGGGTACATTAAACCACT
+AGTGTTTTTCATTGTTTTGAAAATGCCCAACCATGAACATTAGGGGTTTTCCATGATTAA
+ATTGATTTTCAAGCTTTAAAATTGTCCCTCACCGGTATAGACGGGGTACTATAGTTTGTA
+ATCGCGTACTTAAGTTTAAAAAACACCCACTCTAAAGTAGTTGGGGTAAATGAAAAAGAT
+AAGTTTATACCAAATTTAAAGAGCACCCACCCTGTAATAACAGGGGTATTTCAATATGAT
+ACAGCTCAACAAACTTTTTTTGAAACCCGGACGTCTAAAGAAGGGGATGGAAGAGAAGCG
+ACATTAGCGAATGTTTTTCGTGAATCCCTGACTGATTTAAAGGGGGCAAATGAACAAGAA
+ATAAACATACAACATTTAGCTAATGCCCCAACAGTTGTGATTGGGGGACAACAAGCAGGG
+ATTTTCGGGGGACCTTTGTATACATCCCATACAATATTTTCAGGGGTTACTTTATCTAAG
+AAATTAACGGATACTTTTAAGCAACCCCTAGCACCAGTTTTTGGGGTTGCAGGAGAAGAT
+AATGATTTCGATGATTTGAATCATACCCTTGCTTATAACGAAGGGGATGGGTCGCTGCAT
+AAGGTTAAATATCATTTAATGGAGACCCCAGCGACGACAGTCGGGGGATATTATCCTGAT
+AAGGCTGAGTTGAATTTAACTTTAACCCCGACGTTCATTCATGGGGAAGAAACTGTTCAT
+ACACAAGGTCTACTTTTGATTTGTGCCCGAACTATTGACCAAGGGGACTCGTGGACTGAT
+ATGTTTAAAGCACTTTTGCATGAAACCCTTACAGCATATGGCGGGGTATTTATAGATGCG
+AAGTTTGAGCCGTTTTTAAAAATGGCCCCGCCTATGTTTAAAGGGGTTTTGAAAAAACAT
+AAGTTGCTTGATGATTTTTTTAGAGCCCCACCACAACGTACTGGGGATCAAGGCTTGAAT
+ACGATGATACAAACTTTTACAAATGCCCATTCATTCTTACATGGGGAAAATATGCGCCAA
+ATAGTTTCGTATGATTTTAAGCATTCCCAATCAAATAAAACAGGGGAGACATATATAAAG
\ No newline at end of file
diff --git a/t/data/locus_tag_gffs/query_5.gff.proteome.faa.expected b/t/data/locus_tag_gffs/query_5.gff.proteome.faa.expected
new file mode 100644
index 0000000..653afc3
--- /dev/null
+++ b/t/data/locus_tag_gffs/query_5.gff.proteome.faa.expected
@@ -0,0 +1,75 @@
+>3_1
+MNPRIVSSGTTKILLGSILMNPPANAGEGDIKIKTGTTDIGSPTTVRTGDLETYDKDNGM
+HNPVFYSLGDDKNHNKNLLVISPKGTSAGQYKIYSEDGANKSPLAWRSGFKEKLQLPDNE
+VAPISDC*GRN*IDTKDYMSTFPYGFSGGVTENDTGNIGGLIPANVWMGHTQKYVQPDFK
+TIPESPSEGKVERKVIFNNMVNPNWGR*GRD*RNPVYGNQLFPKTRSGGMKETENFLDPN
+KAPSLLCSGFSQNFATVITMDSPASKRQGNIEIIYERVRDDYPLHWSSGNWKSTNTNDKW
+TDPSSER*GID*KKEEITN*
+>ccc_50002
+MKLFFIVSLTIIWGDIFLGNEIIHLLTALTTTLGVVNSRKGIKNFRVAQ
+>ccc_50003
+ISELPNLINSGQSLFESESGF*ISPNPGVPGQTVQELRNGISKLPDPRFRGIIKL*SYYV
+FLKEP*
+>ccc_50004
+MRKGITKNIILTSTLLLPRTVLPGNQKKVFSFYSEAKPHSIGQGETKNNELI*YYTQPHF
+SLSGKWLNQYDN*NIYVPLKR*SGSAQKSLWGSESWGPLNQLKGRYVNVFGL*DKDTPQL
+WWSGRETNTGGVSPAAKPPDKTYGLFVKYKDKLQTIIPPHKMYGGNKKVLTL*EIDFPPR
+EALGKNKKLYTEYRNKGPLKITGGGNKNTIDLCKRLHPHLAKVGVKKNNKITFDVLFPQ
+>3_2
+MGNNKTKKIILSTTFPLLGRAGTQ*TNTPINSSSDPKAYCMGQNETNVNDLTKYYPQKY*
+TGSNKTLWQNDNGTIPATL*QGSW*SHIQVYGPESPGNISQGRNKSVDIFGIKDHPTIDC
+LGLSQKTFTGGVTPAPTSNGKGYKQNVTYNDKAETPTGGCPGYEENKPVFTLKEFPFRIR
+QGLIKSKKLYNNSYNPGQIRMGGTENNYTIDLSKSPPSTGAGRYEKKPQNAKIEVPLEK*
+KG
+>ccc_50006
+MKKYIMNKPLLSTAGLLLKTTST*LPKTPLSFSSGAKANNISEYETNIPQLIKYGTQPNF
+SLS*KWLWPQPNGSGHATKQTWVWYSHIPLFGSEGWGKNNQLRYKYVDPLGTKDGDTVKG
+YWTYDETFPRGVTPGATSNDKPY*LFLKPRDKQQGIIGKHEFY*GNKPPLTLKEGDFRNR
+QTLLKNKKPHNGEFGKGQNKITAYGNNYPLDLRKGLKLNDTNRYVKNPPHAQMEGILEKS
+N*
+>3_3
+LTNLRGGSFLTILDFSLQEAQSLLTGAEDLKSAKYIFTETPTLKNGDIALLIEKDSLRTP
+RAFEVGAHDQGTNVTYLGPTRSQMGGEETTKNTARVFGGTHAGIEGGGFSQRTVETLAET
+PAVPVGDGLTDKDHPTLVLAHSLTAGEVLKKNYADIFFTSLADGRGDVANAIMQGAFIMA
+LTFHLGGPKELNPTDDLLNPRTNIAGENGGNILITDFIDPRAKGSGVIYTDIWVSIFEPA
+QAWKEGVELLKTYQVNLEMTHTTGNGDVIFENCLPSFHNAHTKIGGEIFEKNGIRDLEVT
+HAVFEGEASVVIQEADFRMPPTKAVGVATLGKF*
+>ccc_50008
+MLAKMGVALGGNALGISPQPQLELVGNTAKSLVGLISKGPELVIRGGNGPQVGSIILGLP
+YPAEQGQGPALPFAEFCAMPQPYIGGQLQERLQNEFYSIPMHKQVGTLVTQVEVDVYDPP
+FHNPSGPIGLLYNKEVSEQPQQEKGGIFVEEAGRGF*RVPPPPQPGSIIELESIKIFIKP
+DPLVMGAGGGGIPVII*QHPGLKGMGAVIDKDKTSVLLGPNLQCEGLIILTAIDYVYINP
+NPENQGPLKTTNVDEF*RYPDQNQLGKGSMLPKIEVSISPIQNNPGGSVLITSLNVLDAP
+LQGKVGTVIKK*
+>ccc_51705
+VENTINVCEKPKRFKLGMPGALMILFILTVPALIATGVIPAGAYSKLSYEPSPQELGIVN
+PQNQVKIFPGPQQELEGMGVKIKIEQF*SGPIHKPVGIPNT*ERLKLYPAPPQQITGSMV
+EGTIEAVYIMPFLLVLGGLIGVVQASVSFEPGLLALGKKTKGHEFMLFVFPSLLMMGGGT
+L*GIEEVSVAPYPILVGIFIALGYDSIFSVPALFLAGSVGSTFSTIISFSPVLASKGAGT
+TLTDGLFWRIPARIVGGIFVIRYLYWFCKKPKHDPKGSYSYEDKDAF*QQPSLLKEGDSA
+HLTLRKILILPLLVLPGPIMV*GVMTLCWWPPLMASGFLIFTIIIMFFAGPGQSGLGEKG
+TVDAFVICASPLLGVSGIIGLARGINFLLNPGLISEGILHFSSSLVLYMSPPLFIMGLLF
+ILFCLGFFVPPSPGLAGLSMPIFAPLVYTVPIPRFVGVTTYQFGQYVLLFPAPTGLGMAT
+LQMLNMLYSHPFRFVWGVVAFVLIFGV*VLPTQVLMGS*
+>ccc_50010
+ITHLTIFLDPLPGIWGVLLFRKYFVVYANPVLDWKGRWYLLENIPLLPIPLLILMGIFGV
+PSEMIIYRQPKHNGVG
+>ccc_50011
+MRRQGAPQITSGLVLGLFPLGNRLGDL*ITLNAVCGIFPFLIWMGLL*TMIKYFNNVNPQ
+LNSPGVS*MFTTFFMSGFPGTTCLGTF*SNITFINSLIPPIWSLGLVEIMTHIIIFSIPY
+LKGLGLEKIYPSCTVLFIPIAIGGGTAQISGCFFIGQFPVIYGLGAT*IVLPIVFKRFPA
+FPWQGSIKTNTSTICAPFPLVAGAGVIEIPKANAFIVIPFLI*AGIF*IYIIIQLPKFPK
+EPCSGVF*TFTFPLVISAPALKSRGPVQMFPDIWKGLFPIEV*LGTVKILRVFIGYLHPF
+LKREGQDKILRNASQ*
+>ccc_50012
+MRNQNQKLLFSDFPSLHIWGQTGVQQSTIFRMSPNERSWGNMSLKNAELFYKFPNSIFRG
+EN*
+>ccc_50014
+MEGLFFAITHTVTAGVNNDGTKLGTFIVTLAENGGGLLGKIFGFY
+>ccc_50015
+MFGLAPAIANRGQAAQQHDSVFLGTPIVDIGGNGVG*LGKLFGFY
+>3_5
+LGGKNILKDFDDFIVDPYDAEGGAFHYKANVFFHKAPNDDFWGFKKINHQHWFAFHPNKL
+TRGEVLSERFVNFFKHPHMEVGGHRADELFRNFLADPNVKYWGQTLEKIVELFKRHPFYI
+VRGGVTEKQKRRFNQTPFHKYRGKIFIYEETGFQKPPPEFFRGVFNDNGEDEFQHSPIVG
+DWGTSDIQGGINFGIAPCWFNWGGFDHNPGIIFDYDPNSWKRGNDIVH*
diff --git a/t/data/mafft_input.fa b/t/data/mafft_input.fa
new file mode 100644
index 0000000..59c5769
--- /dev/null
+++ b/t/data/mafft_input.fa
@@ -0,0 +1,59 @@
+>1234_8#75_04759
+ATGAGCGAGCAGTTAACGGACCAGGTCCTGGTTGAACGGGTCCAGAAGGGAGATCAGAAA
+GCCTTTAACTTACTGGTAGTGCGCTACCAGCATAAAGTGGCGAGTCTGGTTTCCCGCTAT
+GTGCCATCGGGCGACGTTCCCGATGTCGTACAGGAATCATTTATTAAGGCCTATCGCGCG
+CTGGATTCTTTCCGGCGGGATAGTGCTTTTTATACCTGGTTGTATCGTATTGCGGTCAAT
+ACCGCGAAGAACTACCTGGTTGCGCAGGGGCGTCGTCCGCCTTCCAGTGATGTAGACGCG
+ATTGAAGCAGAAAACTTTGAAAGCGGCGGCGCGCTGAAAGAAATTTCGAACCCTGAGAAC
+TTAATGTTGTCAGAAGAACTGAGACAGATAGTTTTCCGAACTATTGAGTCCCTCCCGGAA
+GATTTACGTATGGCAATCACCTTACGGGAGCTGGATGGCCTGAGCTATGAAGAGATAGCG
+GCTATCATGGATTGTCCGGTGGGGACGGTGCGTTCACGTATCTTCCGGGCGCGGGAAGCT
+ATTGATAATAAAGTTCAACCGCTTATCAGGCGTTGA
+>1111#5_04506
+ATGGCAATCACCTTACGGGAGCTGGATGGCCTGAGCTATGAAGAGATAGCGGCTATCATG
+GATTGTCCGGTGGGGACGGTGCGTTCACGTATCTTCCGGGCGCGGGAAGCTATTGATAAT
+AAAGTTCAACCGCTTATCAGGCGTTGA
+>Salmonella_enterica_subsp_enterica_serovar_Typhimurium_DT104_v1_02853
+ATGAGCGAGCAGTTAACGGACCAGGTCCTGGTTGAACGGGTCCAGAAGGGAGATCAGAAA
+GCCTTTAACTTACTGGTAGTGCGCTACCAGCATAAAGTGGCGAGTCTGGTTTCCCGCTAT
+GTGCCATCGGGCGACGTTCCCGATGTCGTACAGGAATCATTTATTAAGGCCTATCGCGCG
+CTGGATTCTTTCCGGGGGGATAGTGCTTTTTATACCTGGTTGTATCGTATTGCGGTCAAT
+ACCGCGAAGAACTACCTGGTTGCGCAGGGGCGTCGTCCGCCTTCCAGTGATGTAGACGCG
+ATTGAAGCAGAAAACTTTGAAAGCGGCGGCGCGCTGAAAGAAATTTCGAACCCTGAGAAC
+TTAATGTTTTCAGAAGAACTGAGACAGATAGTTTTCCGAACTATTGAGTCCCTCCCGGAA
+GATTTACGTATGGCAATCACCTTACGGGAGCTGGATGGCCTGAGCTATGAAGAGATAGCG
+GCTATCATGGATTGTCCGGTGGGGACGGTGCGTTCACGTATCTTCCGGGCGCGGGAAGCT
+ATTGATAATAAAGTTCAACCGCTTATCAGGCGTTGA
+>Salmonella_enterica_subsp_enterica_serovar_Typhimurium_SL1344_v2_02736
+ATGAGCGAGCAGTTAACGGACCAGGTCCTGGTTGAACGGGTCCAGAAGGGAGATCAGAAA
+GCCTTTAACTTACTGGTAGTGCGCTACCAGCATAAAGTGGCGAGTCTGGTTTCCCGCTAT
+GTGCCATCGGGCGACGTTCCCGATGTCGTACAGGAATCATTTATTAAGGCCTATCGCGCG
+CTGGATTCTTTCCGGGGGGATAGTGCTTTTTATACCTGGTTGTATCGTATTGCGGTCAAT
+ACCGCGAAGAACTACCTGGTTGCGCAGGGGCGTCGTCCGCCTTCCAGTGATGTAGACGCG
+ATTGAAGCAGAAAACTTTGAAAGCGGCGGCGCGCTGAAAGAAATTTCGAACCCTGAGAAC
+TTAATGTTGTCAGAAGAACTGAGACAGATAGTTTTCCGAACTATTGAGTCCCTCCCGGAA
+GATTTACGTATGGCAATCACCTTACGGGAGCTGGATGGCCTGAGCTGTGAAGAGATAGCG
+GCTATCATGGATTGTCCGGTGGGGACGGTGCGTTCACGTATCTTCCGGGCGCGGGAAGCT
+ATTGATAATAAAGTTCAACCGCTTATCAGGCGTTGA
+>Salmonella_enterica_subsp_enterica_serovar_Typhimurium_str_D23580_v1_02783
+ATGAGCGAGCAGTTAACGGACCAGGTCCTGGTTGAACGGGTCCAGAAGGGAGATCAGAAA
+GCCTTTAACTTACTGGTAGTGCGCTACCAGCATAAAGTGGCGAGTCTGGTTTCCCGCTAT
+GTGCCATCGGGCGACGTTCCCGATGTCGTACAGGAATCATTTATTAAGGCCTATCGCGCG
+CTGGATTCTTTCCGGGGGGATAGTGCTTTTTATACCTGGTTGTATCGTATTGCGGTCAAT
+ACCGCGAAGAACTACCTGGTTGCGCAGGGGCGTCGTCCGCCTTCCAGTGATGTAGACGCG
+ATTGAAGCAGAAAACTTTGAAAGCGGCGGCGCGCTGAAAGAAATTTCGAACCCTGAGAAC
+TTAATGTTGTCAGAAGAACTGAGACAGATAGTTTTCCGAACTATTGAGTCCCTCCCGGAA
+GATTTACGTATGGCAATCACCTTACGGGAGCTGGATGGCCTGAGCTATGAAGAGATAGCG
+GCTATCATGGATTGTCCGGTGGGGACGGTGCGTTCACGTATCTTCCGGGCGCGGGAAGCT
+ATTGATAATAAAGTTCAACCGCTTATCAGGCGTTGA
+>Salmonella_enterica_subsp_enterica_serovar_Typhimurium_str_DT2_v1_02741
+ATGAGCGAGCAGTTAACGGACGTCCTGGTTGAACGGGTCCAGAAGGGAGATCAGAAA
+GCCTTTAACTTACTGGTAGTGCGCTACCAGCATAAAGTGGCGAGTCTGGTTTCCCGCTAT
+GTGCCATCGGGCGACGTTCCCGATGTCGTACAGGAATCATTTATTAAGGCCTATCGCGCG
+CTGGATTCTTTCCGGGGGGATAGTGCTTTTTATACCTGGTTGTATCGTATTGCGGTCAAT
+ACCGCGAAGAACTACCTGGTTGCGCAGGGGCGTCGTCCGCCTTCCAGTGATGTAGACGCG
+ATTGAAGCAGAAAACTTTGAAAGCGGCGGCGCGCTGAAAGAAATTTCGAACCCTGAGAAC
+TTAATGTTGTCAGAAGAACTGAGACAGATAGTTTTCCGAACTATTGAGTCCCTCCCGGAA
+GATTTACGTATGGCAATCACCTTACGGGAGCTGGATGGCCTGAGCTATGAAGAGATAGCG
+GCTATCATGGATTGTCCGGTGGGGACGGTGCGTTCACGTATCTTCCGGGCGCGGGAAGCT
+ATTGATAATAAAGTTCAACCGCTTATCAGGCGTTGA
diff --git a/t/data/mcl_file b/t/data/mcl_file
new file mode 100644
index 0000000..253196c
--- /dev/null
+++ b/t/data/mcl_file
@@ -0,0 +1,5 @@
+6259_8#9_01142 6631_2#11_00851 6631_4#5_01901
+6664_1#21_00211
+6259_8#13_01077
+6630_4#9_00008 6259_5#1_00688 6259_5#8_00815
+6259_7#7_02078 6631_5#24_01032 6593_5#13_00457
\ No newline at end of file
diff --git a/t/data/mdoH.fa.aln b/t/data/mdoH.fa.aln
new file mode 100644
index 0000000..9cbdc63
--- /dev/null
+++ b/t/data/mdoH.fa.aln
@@ -0,0 +1,88 @@
+>11111_1#11_04119
+ATGAATAAAACAACTGAGTATATTGACGCACTGCTGCTTTCTGAACGTGAGAAAGCGGCA
+TTGCCGAAAACTGACATCCGCGCCGTGCATCAGGCGCTGGATGCCGAGCATCGGACTTAC
+TCGCGAGAAGACGATTCACCGCAGGGTTCCGTAAAAGCCCGCCTTGAACACGCCTGGCCG
+GATTCATTGGCGAAGGGGCAGTTAATTAAAGATGATGAAGGGCGCGATCAGTTGCAGGCT
+ATGCCAAAAGCGACGCGCTCTTCGATGTTTCCTGATCCCTGGCGAACCAACCCGGTTGGC
+CGTTTCTGGGATCGCCTGCGTGGGCGGGATGTAACGCCGCGCTATGTTTCTCGTCTGACA
+AAAGAAGAGCAGGCGAGTGAGCAAAAATGGCGTACCGTCGGCACTATACGCCGCTATATT
+TTGTTAATTTTGACTCTGGCGCAAACCGTCGTCGCGACCTGGTATATGAAGACCATTCTG
+CCCTATCAGGGATGGGCGCTCATCAATCCTATGGATATGGTGGGGCAGGATATTTGGGTC
+TCCTTTATGCAGCTCCTGCCCTACATGCTGCAAACCGGTATCCTGATTTTGTTTGCCGTG
+CTGTTCTGCTGGGTGTCTGCCGGATTCTGGACGGCGCTGATGGGCTTCCTGCAACTGCTT
+ATCGGGCGCGATAAGTACAGTATCTCCGCGTCTACGGTTGGCGATGAGCCCCTCAATCCG
+GAACACCAGACGGCGCTGATCATGCCTATCTGTAATGAAGACGTTAGCCGCGTTTTCGCC
+GGTCTGCGCGCGACCTGGGAGTCCGTTAAAGCCACAGGCAACGCCGCGCATTTTGACGTC
+TATATCCTTAGCGATAGTTATAACCCGGATATCTGCGTGGCGGAGCAAAAGGCGTGGATG
+GAGCTCATCGCGGAAGTGCAGGGCGAAGGCCAAATTTTTTACCGTCGCCGCCGCCGCCGT
+ATGAAACGCAAAAGCGGCAATATTGACGATTTTTGCCGCCGCTGGGGCAATCAGTACAGC
+TATATGGTGGTGCTGGACGCGGACTCAGTGATGAGCGGCGAGTGTCTGAGCGGGCTGGTG
+CGCCTGATGGAAGCGAACCCTAACGCCGGGATTATCCAGTCTTCGCCGAAAGCGTCGGGG
+ATGGATACTCTGTATGCCCGCTGCCAACAGTTCGCGACCCGTGTTTATGGACCGCTGTTT
+ACCGCCGGGCTGCACTTCTGGCAGTTGGGGGAGTCGCACTACTGGGGGCACAATGCCATT
+ATCCGCGTGAAGCCGTTTATCGAGCACTGCGCTCTGGCGCCGCTGCCGGGAGAAGGTTCG
+TTCGCCGGATCGATTCTTTCCCACGACTTTGTGGAGGCGGCGCTAATGCGTCGGGCAGGG
+TGGGGCGTCTGGATTGCCTACGATCTCCCCGGCTCCTATGAAGAGCTGCCGCCAAACCTG
+CTGGATGAGCTTAAACGCGACCGCCGCTGGTGTCACGGCAACCTGATGAACTTTCGTCTG
+TTCCTGGTGAAAGGAATGCACCCGGTGCATCGCGCCGTGTTCCTGACCGGGGTAATGTCA
+TACCTGTCCGCGCCGTTATGGTTTATGTTCCTCGCGCTTTCTACCGCGCTGCAGGTCGTT
+CATGCGTTAACAGAGCCGCAATATTTCCTTCAGCCGCGCCAGCTTTTTCCGGTCTGGCCG
+CAGTGGCGTCCGGAACTGGCAATCGCGCTGTTTGCGTCAACGATGGTGCTGCTGTTCCTG
+CCGAAGCTGCTCAGTATTATGCTGATCTGGTGTAAAGGCACCAAAGAGTATGGCGGTTTC
+TGGCGCGTTACGCTGTCGCTATTGCTGGAAGTGCTGTTCTCCGTGTTGCTGGCGCCGGTG
+CGTATGCTGTTTCATACCGTGTTTGTGGTCAGCGCGTTCCTCGGCTGGGAAGTGGTCTGG
+AACTCACCGCAACGCGACGATGATTCTACGCCGTGGGGAGAAGCCTTTATGCGTCACGGC
+TCTCAACTGCTGCTGGGGCTGGTCTGGGCGGTGGGTATGGCGTGGCTGGATTTACGCTTT
+CTGTTCTGGCTGGCGCCGATTGTCTTTTCGCTGATTCTGTCGCCATTTGTTTCGGTGATC
+TCCAGTCGTTCAACGGTAGGATTACGCACCAAACGCTGGAAGCTGTTCCTGATCCCGGAA
+GAGTATTCGCCGCCTCAGGTGTTGGTCGATACCGATAAATATCTGGAGATGAATCGCCGC
+CGTATTCTGGACGATGGCTTTATGCATGCGGTATTTAACCCGTCGCTTAATGCGCTGGCG
+ACCGCGATGGCCACCGCGCGTCACCGCGCCAGTAAGGTGCTGGAAATAGCCCGCGATCGT
+CATGTGGAGCAGGCGCTAAACGAAACGCCGGAGAAACTGAACCGCGATCGGCGTCTGGTT
+TTGCTCAGCGATCCGGTGACGATGGCGCGTTTACACTATCGGGTCTGGAATGCGCCAGAG
+AGATACTCTTCCTGGGTAAACCATTATCAGTCTCTCGTCCTGAATCCGCAGGCGTTGCAG
+GGACGAACATCGTCAGCGGGATAA
+>22222_2#22_04119
+ATGAATAAAACAACTGAGTATATTGACGCACTGCTGCTTTCTGAACGTGAGAAAGCGGCA
+TTGCCGAAAACTGACATCCGCGCCGTGCATCAGGCGCTGGATGCCGAGCATCGGACTTAC
+TCGCGAGAAGACGATTCACCGCAGGGTTCCGTAAAAGCCCGCCTTGAACACGCCTGGCCG
+GATTCATTGGCGAAGGGGCAGTTAATTAAAGATGATGAAGGGCGCGATCAGTTGCAGGCT
+ATGCCAAAAGCGACGCGCTCTTCGATGTTTCCTGATCCCTGGCGAACCAACCCGGTTGGC
+CGTTTCTGGGATCGCCTGCGTGGGCGGGATGTTACGCCGCGCTATGTTTCTCGTCTGACA
+AAAGAAGAGCAGGCGAGTGAGCAAAAATGGCGTACCGTCGGCACTATACGCCGCTATATT
+TTGTTAATTTTGACTCTGGCGCAAACCGTCGTTGCGACCTGGTATATGAAGACCATTCTG
+CCCTATCAGGGATGGGCGCTCATCAATCCTATTGATATGGTGGGGCAGGATATTTGGGTC
+TCCTTTATGCAGCTCCTGCCCTACATGCTGCATACCGGTATCCTGATTTTGTTTGCCGTG
+CTGTTCTGCTGGGTGTCTGCCGGATTCTGGACTGCGCTGATGGGCTTCCTGCAACTGCTT
+ATCGGGCGCGATAAGTACAGTATCTCCGCGTCTACGGTTGGCGATGAGCCCCTCAATCCG
+GAACACCAGACGGCGCTGATCATGCCTATCTGTAATGAAGACGTTAGCCGCGTTTTCGCC
+GGTCTGCGCGCGACCTGGGAGTCCGTTAAAGCTACAGGCAACGCCGCGCATTTTGACGTC
+TATATCCTTAGCGATAGTTATAACCCGGATATTTGCGTGGCGGAGCAAAAGGCGTGGATG
+GAGCTCATCGCGGAAGTGCAGGGCGAAGGCCATATTTTTTACCGTCGCCGCCGCCGCCGT
+ATGAAACGCAAAAGCGGCAATATTGACGATTTTTGCCGCCGCTGGGGCAATCAGTACAGC
+TATATGGTGGTGCTGGACGCGGACTCAGTGATTAGCGGCGAGTGTCTGAGCGGGCTGGTG
+CGCCTGATGGAAGCGAACCCTAACGCCGGGATTATCCAGTCTTCGCCGAAAGCGTCGGGG
+ATGGATACTCTGTATGCCCGCTGCCAACAGTTTGCGACCCGTGTTTATGGACCGCTGTTT
+ACCGCCGGGCTGCACTTCTGGCAGTTGGGGGATTCGCACTACTGGGGGCACAATGCCATT
+ATCCGCGTGAAGCCGTTTATCGAGCACTGCGCTCTGGCGCCGCTGCCGGGAGAAGGTTCG
+TTCGCCGGATCGATTCTTTCCCACGACTTTGTTGAGGCGGCGCTAATGCGTCGGGCAGGG
+TGGGGCGTCTGGATTGCCTACGATCTCCCCGGTTCCTATGAAGAGCTGCCGCCAAACCTG
+CTGGATGAGCTTAAACGCGACCGCCGCTGGTGTCACGGCAACCTGATGAACTTTCGTCTG
+TTCCTGGTGAAAGGAATGCACCCGGTGCATCGTGCCGTGTTCCTGACCGGGGTAATGTCA
+TACCTGTCCGCGCCGTTATGGTTTATGTTCCTTGCGCTTTCTACCGCGCTGCAGGTCGTT
+CATGCGTTAACAGAGCCGCAATATTTCCTTCATCCGCGCCAGCTTTTTCCGGTCTGGCCG
+CAGTGGCGTCCGGAACTGGCAATCGCGCTGTTTGCGTCAACGATGGTGCTGCTGTTCCTG
+CCGAAGCTGCTCAGTATTATGCTGATCTGGTGTAAAGGCACCAAAGAGTATGGCGGTTTC
+TGGCGCGTTACGCTGTCGCTATTGCTGGAAGTTCTGTTCTCCGTGTTGCTGGCGCCGGTG
+CGTATGCTGTTTCATACCGTGTTTGTGGTCAGTGCGTTCCTCGGCTGGGAAGTGGTCTGG
+AACTCACCGCAACGCGACGATGATTCTACGCCTTGGGGAGAAGCCTTTATGCGTCACGGC
+TCTCAACTGCTGCTGGGGCTGGTCTGGGCGGTTGGTATGGCGTGGCTGGATTTACGCTTT
+CTGTTCTGGCTGGCGCCGATTGTCTTTTCGCTTATTCTGTCGCCATTTGTTTCGGTGATC
+TCCAGTCGTTCAACGGTAGGATTACGCACCAATCGCTGGAAGCTGTTCCTGATCCCGGAA
+GAGTATTCGCCGCCTCAGGTGTTGGTCGATACTGATAAATATCTGGAGATGAATCGCCGC
+CGTATTCTGGACGATGGCTTTATGCATGCGGTTTTTAACCCGTCGCTTAATGCGCTGGCG
+ACCGCGATGGCCACCGCGCGTCACCGCGCCAGTAAGGTGCTGGAAATAGCCCGCGATCGT
+CATGTGGAGCAGGCGCTAAACGAAACGCCGGATAAACTGAACCGCGATCGGCGTCTGGTT
+TTGCTCAGCGATCCGGTGACGATGGCGCGTTTTCACTATCGGGTCTGGAATGCGCCAGAG
+AGATACTCTTCCTGGGTAAACCATTATCAGTCTCTCGTCCTGAATCCGCAGGCGTTGCAG
+GGACGAACATCGTCAGCGGGATAA
diff --git a/t/data/mdoH_mafft.fa.aln b/t/data/mdoH_mafft.fa.aln
new file mode 100644
index 0000000..9cbdc63
--- /dev/null
+++ b/t/data/mdoH_mafft.fa.aln
@@ -0,0 +1,88 @@
+>11111_1#11_04119
+ATGAATAAAACAACTGAGTATATTGACGCACTGCTGCTTTCTGAACGTGAGAAAGCGGCA
+TTGCCGAAAACTGACATCCGCGCCGTGCATCAGGCGCTGGATGCCGAGCATCGGACTTAC
+TCGCGAGAAGACGATTCACCGCAGGGTTCCGTAAAAGCCCGCCTTGAACACGCCTGGCCG
+GATTCATTGGCGAAGGGGCAGTTAATTAAAGATGATGAAGGGCGCGATCAGTTGCAGGCT
+ATGCCAAAAGCGACGCGCTCTTCGATGTTTCCTGATCCCTGGCGAACCAACCCGGTTGGC
+CGTTTCTGGGATCGCCTGCGTGGGCGGGATGTAACGCCGCGCTATGTTTCTCGTCTGACA
+AAAGAAGAGCAGGCGAGTGAGCAAAAATGGCGTACCGTCGGCACTATACGCCGCTATATT
+TTGTTAATTTTGACTCTGGCGCAAACCGTCGTCGCGACCTGGTATATGAAGACCATTCTG
+CCCTATCAGGGATGGGCGCTCATCAATCCTATGGATATGGTGGGGCAGGATATTTGGGTC
+TCCTTTATGCAGCTCCTGCCCTACATGCTGCAAACCGGTATCCTGATTTTGTTTGCCGTG
+CTGTTCTGCTGGGTGTCTGCCGGATTCTGGACGGCGCTGATGGGCTTCCTGCAACTGCTT
+ATCGGGCGCGATAAGTACAGTATCTCCGCGTCTACGGTTGGCGATGAGCCCCTCAATCCG
+GAACACCAGACGGCGCTGATCATGCCTATCTGTAATGAAGACGTTAGCCGCGTTTTCGCC
+GGTCTGCGCGCGACCTGGGAGTCCGTTAAAGCCACAGGCAACGCCGCGCATTTTGACGTC
+TATATCCTTAGCGATAGTTATAACCCGGATATCTGCGTGGCGGAGCAAAAGGCGTGGATG
+GAGCTCATCGCGGAAGTGCAGGGCGAAGGCCAAATTTTTTACCGTCGCCGCCGCCGCCGT
+ATGAAACGCAAAAGCGGCAATATTGACGATTTTTGCCGCCGCTGGGGCAATCAGTACAGC
+TATATGGTGGTGCTGGACGCGGACTCAGTGATGAGCGGCGAGTGTCTGAGCGGGCTGGTG
+CGCCTGATGGAAGCGAACCCTAACGCCGGGATTATCCAGTCTTCGCCGAAAGCGTCGGGG
+ATGGATACTCTGTATGCCCGCTGCCAACAGTTCGCGACCCGTGTTTATGGACCGCTGTTT
+ACCGCCGGGCTGCACTTCTGGCAGTTGGGGGAGTCGCACTACTGGGGGCACAATGCCATT
+ATCCGCGTGAAGCCGTTTATCGAGCACTGCGCTCTGGCGCCGCTGCCGGGAGAAGGTTCG
+TTCGCCGGATCGATTCTTTCCCACGACTTTGTGGAGGCGGCGCTAATGCGTCGGGCAGGG
+TGGGGCGTCTGGATTGCCTACGATCTCCCCGGCTCCTATGAAGAGCTGCCGCCAAACCTG
+CTGGATGAGCTTAAACGCGACCGCCGCTGGTGTCACGGCAACCTGATGAACTTTCGTCTG
+TTCCTGGTGAAAGGAATGCACCCGGTGCATCGCGCCGTGTTCCTGACCGGGGTAATGTCA
+TACCTGTCCGCGCCGTTATGGTTTATGTTCCTCGCGCTTTCTACCGCGCTGCAGGTCGTT
+CATGCGTTAACAGAGCCGCAATATTTCCTTCAGCCGCGCCAGCTTTTTCCGGTCTGGCCG
+CAGTGGCGTCCGGAACTGGCAATCGCGCTGTTTGCGTCAACGATGGTGCTGCTGTTCCTG
+CCGAAGCTGCTCAGTATTATGCTGATCTGGTGTAAAGGCACCAAAGAGTATGGCGGTTTC
+TGGCGCGTTACGCTGTCGCTATTGCTGGAAGTGCTGTTCTCCGTGTTGCTGGCGCCGGTG
+CGTATGCTGTTTCATACCGTGTTTGTGGTCAGCGCGTTCCTCGGCTGGGAAGTGGTCTGG
+AACTCACCGCAACGCGACGATGATTCTACGCCGTGGGGAGAAGCCTTTATGCGTCACGGC
+TCTCAACTGCTGCTGGGGCTGGTCTGGGCGGTGGGTATGGCGTGGCTGGATTTACGCTTT
+CTGTTCTGGCTGGCGCCGATTGTCTTTTCGCTGATTCTGTCGCCATTTGTTTCGGTGATC
+TCCAGTCGTTCAACGGTAGGATTACGCACCAAACGCTGGAAGCTGTTCCTGATCCCGGAA
+GAGTATTCGCCGCCTCAGGTGTTGGTCGATACCGATAAATATCTGGAGATGAATCGCCGC
+CGTATTCTGGACGATGGCTTTATGCATGCGGTATTTAACCCGTCGCTTAATGCGCTGGCG
+ACCGCGATGGCCACCGCGCGTCACCGCGCCAGTAAGGTGCTGGAAATAGCCCGCGATCGT
+CATGTGGAGCAGGCGCTAAACGAAACGCCGGAGAAACTGAACCGCGATCGGCGTCTGGTT
+TTGCTCAGCGATCCGGTGACGATGGCGCGTTTACACTATCGGGTCTGGAATGCGCCAGAG
+AGATACTCTTCCTGGGTAAACCATTATCAGTCTCTCGTCCTGAATCCGCAGGCGTTGCAG
+GGACGAACATCGTCAGCGGGATAA
+>22222_2#22_04119
+ATGAATAAAACAACTGAGTATATTGACGCACTGCTGCTTTCTGAACGTGAGAAAGCGGCA
+TTGCCGAAAACTGACATCCGCGCCGTGCATCAGGCGCTGGATGCCGAGCATCGGACTTAC
+TCGCGAGAAGACGATTCACCGCAGGGTTCCGTAAAAGCCCGCCTTGAACACGCCTGGCCG
+GATTCATTGGCGAAGGGGCAGTTAATTAAAGATGATGAAGGGCGCGATCAGTTGCAGGCT
+ATGCCAAAAGCGACGCGCTCTTCGATGTTTCCTGATCCCTGGCGAACCAACCCGGTTGGC
+CGTTTCTGGGATCGCCTGCGTGGGCGGGATGTTACGCCGCGCTATGTTTCTCGTCTGACA
+AAAGAAGAGCAGGCGAGTGAGCAAAAATGGCGTACCGTCGGCACTATACGCCGCTATATT
+TTGTTAATTTTGACTCTGGCGCAAACCGTCGTTGCGACCTGGTATATGAAGACCATTCTG
+CCCTATCAGGGATGGGCGCTCATCAATCCTATTGATATGGTGGGGCAGGATATTTGGGTC
+TCCTTTATGCAGCTCCTGCCCTACATGCTGCATACCGGTATCCTGATTTTGTTTGCCGTG
+CTGTTCTGCTGGGTGTCTGCCGGATTCTGGACTGCGCTGATGGGCTTCCTGCAACTGCTT
+ATCGGGCGCGATAAGTACAGTATCTCCGCGTCTACGGTTGGCGATGAGCCCCTCAATCCG
+GAACACCAGACGGCGCTGATCATGCCTATCTGTAATGAAGACGTTAGCCGCGTTTTCGCC
+GGTCTGCGCGCGACCTGGGAGTCCGTTAAAGCTACAGGCAACGCCGCGCATTTTGACGTC
+TATATCCTTAGCGATAGTTATAACCCGGATATTTGCGTGGCGGAGCAAAAGGCGTGGATG
+GAGCTCATCGCGGAAGTGCAGGGCGAAGGCCATATTTTTTACCGTCGCCGCCGCCGCCGT
+ATGAAACGCAAAAGCGGCAATATTGACGATTTTTGCCGCCGCTGGGGCAATCAGTACAGC
+TATATGGTGGTGCTGGACGCGGACTCAGTGATTAGCGGCGAGTGTCTGAGCGGGCTGGTG
+CGCCTGATGGAAGCGAACCCTAACGCCGGGATTATCCAGTCTTCGCCGAAAGCGTCGGGG
+ATGGATACTCTGTATGCCCGCTGCCAACAGTTTGCGACCCGTGTTTATGGACCGCTGTTT
+ACCGCCGGGCTGCACTTCTGGCAGTTGGGGGATTCGCACTACTGGGGGCACAATGCCATT
+ATCCGCGTGAAGCCGTTTATCGAGCACTGCGCTCTGGCGCCGCTGCCGGGAGAAGGTTCG
+TTCGCCGGATCGATTCTTTCCCACGACTTTGTTGAGGCGGCGCTAATGCGTCGGGCAGGG
+TGGGGCGTCTGGATTGCCTACGATCTCCCCGGTTCCTATGAAGAGCTGCCGCCAAACCTG
+CTGGATGAGCTTAAACGCGACCGCCGCTGGTGTCACGGCAACCTGATGAACTTTCGTCTG
+TTCCTGGTGAAAGGAATGCACCCGGTGCATCGTGCCGTGTTCCTGACCGGGGTAATGTCA
+TACCTGTCCGCGCCGTTATGGTTTATGTTCCTTGCGCTTTCTACCGCGCTGCAGGTCGTT
+CATGCGTTAACAGAGCCGCAATATTTCCTTCATCCGCGCCAGCTTTTTCCGGTCTGGCCG
+CAGTGGCGTCCGGAACTGGCAATCGCGCTGTTTGCGTCAACGATGGTGCTGCTGTTCCTG
+CCGAAGCTGCTCAGTATTATGCTGATCTGGTGTAAAGGCACCAAAGAGTATGGCGGTTTC
+TGGCGCGTTACGCTGTCGCTATTGCTGGAAGTTCTGTTCTCCGTGTTGCTGGCGCCGGTG
+CGTATGCTGTTTCATACCGTGTTTGTGGTCAGTGCGTTCCTCGGCTGGGAAGTGGTCTGG
+AACTCACCGCAACGCGACGATGATTCTACGCCTTGGGGAGAAGCCTTTATGCGTCACGGC
+TCTCAACTGCTGCTGGGGCTGGTCTGGGCGGTTGGTATGGCGTGGCTGGATTTACGCTTT
+CTGTTCTGGCTGGCGCCGATTGTCTTTTCGCTTATTCTGTCGCCATTTGTTTCGGTGATC
+TCCAGTCGTTCAACGGTAGGATTACGCACCAATCGCTGGAAGCTGTTCCTGATCCCGGAA
+GAGTATTCGCCGCCTCAGGTGTTGGTCGATACTGATAAATATCTGGAGATGAATCGCCGC
+CGTATTCTGGACGATGGCTTTATGCATGCGGTTTTTAACCCGTCGCTTAATGCGCTGGCG
+ACCGCGATGGCCACCGCGCGTCACCGCGCCAGTAAGGTGCTGGAAATAGCCCGCGATCGT
+CATGTGGAGCAGGCGCTAAACGAAACGCCGGATAAACTGAACCGCGATCGGCGTCTGGTT
+TTGCTCAGCGATCCGGTGACGATGGCGCGTTTTCACTATCGGGTCTGGAATGCGCCAGAG
+AGATACTCTTCCTGGGTAAACCATTATCAGTCTCTCGTCCTGAATCCGCAGGCGTTGCAG
+GGACGAACATCGTCAGCGGGATAA
diff --git a/t/data/multfasta1.aln b/t/data/multfasta1.aln
new file mode 100644
index 0000000..a72cb20
--- /dev/null
+++ b/t/data/multfasta1.aln
@@ -0,0 +1,8 @@
+>1111_1#1_00123
+AAAAA-
+>2222_2#2_00456
+CCCCC-
+>3333_3#3_00789
+TTTTT-
+>4444_4#4_00123
+GGGGG-
diff --git a/t/data/multfasta2.aln b/t/data/multfasta2.aln
new file mode 100644
index 0000000..db31669
--- /dev/null
+++ b/t/data/multfasta2.aln
@@ -0,0 +1,8 @@
+>1111_1#1_01123
+CCCC----
+>2222_2#2_01456
+AAAA----
+>3333_3#3_01789
+GGGG----
+>4444_4#4_01123
+TTTT----
diff --git a/t/data/multfasta3.aln b/t/data/multfasta3.aln
new file mode 100644
index 0000000..376ac47
--- /dev/null
+++ b/t/data/multfasta3.aln
@@ -0,0 +1,8 @@
+>1111_1#1_02123
+TTTTTTTTTTTTTTT
+>2222_2#2_02456
+AAAAAAAAAAAAAAA
+>3333_3#3_02789
+CCCCCCCCCCCCCCC
+>4444_4#4_02123
+GGGGGGGGGGGGGGG
diff --git a/t/data/multfasta4.aln b/t/data/multfasta4.aln
new file mode 100644
index 0000000..4946e2d
--- /dev/null
+++ b/t/data/multfasta4.aln
@@ -0,0 +1,7 @@
+>1111_1#1_0001
+-AAAA-
+>2222_2#2_0001
+-TTTT-
+>4444_4#4_0001
+-CCCC-
+
diff --git a/t/data/multfasta5.aln b/t/data/multfasta5.aln
new file mode 100644
index 0000000..650ca13
--- /dev/null
+++ b/t/data/multfasta5.aln
@@ -0,0 +1,7 @@
+>1111_1#1_0004
+-AAAA-
+>2222_2#2_0004
+-TTTT-
+>4444_4#4_0004
+-CCCC-
+
diff --git a/t/data/multifasta_files/expected_output.embl b/t/data/multifasta_files/expected_output.embl
new file mode 100644
index 0000000..32519f0
--- /dev/null
+++ b/t/data/multifasta_files/expected_output.embl
@@ -0,0 +1,19 @@
+ID   Genome standard; DNA; PRO; 1234 BP.
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   feature         1..1
+FT                   /label=1
+FT                   /locus_tag=1
+FT   feature         2..11
+FT                   /label=outof_order
+FT                   /locus_tag=outof_order
+FT   feature         12..111
+FT                   /label=2
+FT                   /locus_tag=2
+FT   feature         112..1111
+FT                   /label=3
+FT                   /locus_tag=3
+XX
+SQ   Sequence 1234 BP; 789 A; 1717 C; 1693 G; 691 T; 0 other;
+//
diff --git a/t/data/nnn_at_end.fa b/t/data/nnn_at_end.fa
new file mode 100644
index 0000000..d13361d
--- /dev/null
+++ b/t/data/nnn_at_end.fa
@@ -0,0 +1,8 @@
+>1
+AAANNN
+>2
+AAACCCnNn
+>3
+AAACCCGGGnnn
+>4
+AAACCCGGGTTTNNN
\ No newline at end of file
diff --git a/t/data/nnn_at_end.fa.sorted.fa b/t/data/nnn_at_end.fa.sorted.fa
new file mode 100644
index 0000000..87b65fc
--- /dev/null
+++ b/t/data/nnn_at_end.fa.sorted.fa
@@ -0,0 +1,8 @@
+>1
+AAA
+>2
+AAACCC
+>3
+AAACCCGGG
+>4
+AAACCCGGGTTT
diff --git a/t/data/nuc_multifasta.fa b/t/data/nuc_multifasta.fa
new file mode 100644
index 0000000..d5bc8c5
--- /dev/null
+++ b/t/data/nuc_multifasta.fa
@@ -0,0 +1,112 @@
+>AAAA#74_01075
+TGTCAGTACCACTTATTCTGACCTTACTGGCGGGCGCTGCCACCTTTATTGGCGCGTTTC
+TTGGCGTTCTTGGCCAAAAACCGTCTAACCGCGTGCTGGCTTTTTCGCTGGGCTTCGCCG
+CAGGGATCATGCTGCTCATCTCGCTGATGGAGATGCTGCCCGCCGCGTTAGATACCGAGG
+GGATGTCGCCTGTACTGGGCTACGGGATGTTTATTATCGGCCTGTTGGGCTACTTCGGGC
+TGGATCGTCTGCTTCCTCACGCTCATCCGCAGGATCTGGTGCAAAAAAGGCAGCAGCCGC
+TTCCCGGCTCGATAAAACGCACTGCGATTTTATTGACGCTCGGCATTAGCCTGCACAACT
+TTCCGGAAGGAATCGCCACCTTTGTCACTGCCAGCAGCAATCTTGAACTGGGTTTCGGCA
+TCGCACTGGCGGTGGCGTTGCACAATATTCCTGAAGGGCTGGCGGTTGCCGGCCCGGTTT
+ATGCCGCGACGGGCTCAAAACGTACCGCGATTTTTTGGGCCGGTATCTCCGGCATGGCGG
+AAATTCTTGGCGGCGTGCTGGCGTGGCTGATTTTGGGCAGCCTGGTTTCACCGATCGTTA
+TGGCGGCTATCATGGCAGCAGTCGCCGGCATTATGGTGGCGCTCTCCGTCGATGAACTGA
+TGCCGTTGGCAAAAGAGATCGATCCTAACAATAACCCCAGCTATGGTGTGCTTTGCGGTA
+TGTCCATCATGGGGCTCAGTCTCGTCATTTTGCAGACGATAGGTATCGGTTAA
+>BBBB#75_01314
+ATGTCAGTACCACTTATTCTGACCTTACTGGCGGGCGCTGCCACCTTTATTGGCGCGTTT
+CTTGGCGTTCTTGGCCAAAAACCGTCTAACCGCGTGCTGGCTTTTTCGCTGGGCTTCGCC
+GCAGGGATCATGCTGCTCATCTCGCTGATGGATGCTGCCCGCCGCGTTAGATACCGAGGG
+GATGTCGCCTGTACTGGGCTACGGGATGTTTATTATCGGCCTGTTGGGCTACTTCGGGCT
+GGATCGTCTGCTTCCTCACGCTCATCCGCAGGATCTGGTGCAAAAAAGGCAGCAGCCGCT
+TCCCGGCTCGATAAAACGCACTGCGATTTTATTGACGCTCGGCATTAGCCTGCACAACTT
+TCCGGAAGGAATCGCCACCTTTGTCACTGCCAGCAGCAATCTTGAACTGGGTTTCGGCAT
+CGCACTGGCGGTGGCGTTGCACAATATTCCTGAAGGGCTGGCGGTTGCCGGCCCGGTTTA
+TGCCGCGACGGGCTCAAAACGTACCGCGATTTTTTGGGCCGGTATCTCCGGCATGGCGGA
+AATTCTTGGCGGCGTGCTGGCGTGGCTGATTTTGGGCAGCCTGGTTTCACCGATCGTTAT
+GGCGGCTATCATGGCAGCAGTCGCCGGCATTATGGTGGCGCTCTCCGTCGATGAACTGAT
+GCCGTTGGCAAAAGAGATCGATCCTAACAATAACCCCAGCTATGGTGTGCTTTGCGGTAT
+GTCCATCATGGGGCTCAGTCTCGTCATTTTGCAGACGATAGGTATCGGTTAA
+>CCCC#76_00877
+ATGTCAGTACCACTTATTCTGACCTTACTGGCGGGCGCTGCCACCTTTATTGGCGCGTTT
+CTTGGCGTTCTTGGCCAAAAACCGTCTAACCGCGTGCTGGCTTTTTCGCTGGGCTTCGCC
+GCAGGGATCATGCTGCTCATCTCGCTGATGGAGATGCTGCCCGCCGCGTTAGATACCGAG
+GGGATGTCGCCTGTACTGGGCTACGGGAGTTTATTATCGGCCTGTTGGGCTACTTCGGGC
+TGGATCGTCTGCTTCCTCACGCTCATCCGCAGGATCTGGTGCAAAAAAGGCAGCAGCCGC
+TTCCCGGCTCGATAAAACGCACTGCGATTTTATTGACGCTCGGCATTAGCCTGCACAACT
+TTCCGGAAGGAATCACCTTTGTCACTGCCAGCAGCAATCTTGAACTGGGTTTCGGCATCG
+CACTGGCGGTGGCGTTGCACAATATTCCTGAAGGGCTGGCGGTTGCCGGCCCGGTTTATG
+CCGCGACGGGCTCAAAACGTACCGCGATTTTTTGGGCCGGTATCTCCGGCATGGCGGAAA
+TTCTTGGCGGCGTGCTGGCGTGGCTGATTTTGGGCAGCCTGGTTTCACCGATCGTTATGG
+CGGCTATCATGGCAGCAGTCGCCGGCATTATGGTGGCGCTCTCCGTCGATGAACTGATGC
+CGTTGGCAAAAGAGATCGATCCTAACAATAACCCCAGCTATGGTGTGCTTTGCGGTATGT
+CCATCATGGGGCTCAGTCTCGTCATTTTGCAGACGATAGGTATCGGTTAA
+>DDDD#77_01105
+ATGTCAGTACCACTTATTCTGACCTTACTGGCGGGCGCTGCCACCTTTATTGGCGCGTTT
+CTTGGCGTTCTTGGCCAAAAACCGTCTAACCGCGTGCTGGCTTTTTCGCTGGGCTTCGCC
+GCAGGGATCATGCTGCTCATCTCGCTGATGGAGATGCTGCCCGCCGCGTTAGATACCGAG
+GGGATGTCGCCTGTACTGGGCTACGGGATGTTTATTATCGGCCTGTTGGGCTACTTCGGG
+CTGGATCGTCTGCTTCCTCACGCTCATCCGCAGGATCTGGTGCAAAAAAGGCAGCAGCCG
+CTTCCCGGCTCGATAAAACGCACTGCGATTTTATTGACGCTCGGCATTAGCCTGCACAAC
+TTTCCGGAAGGAATCGCCACCTTTGTCACTGCCAGCAGCAATCTTGAACTGGGTTTCGGC
+ATCGCACTGGCGGTGGCGTTGCACAATATTCCTGAAGGGCTGGCGGTTGCCGGCCCGGTT
+TATGCCGCGACGGGCTCAAAACGTACCGCGATTTTTTGGGCCGGTATCTCCGGCATGGCG
+GAAATTCTTGGCGGCGTGCTGGCGTGGCTGATTTTGGGCAGCCTGGTTTCACCGATCGTT
+ATGGCGGCTATCATGGCAGCAGTCGCCGGCATTATGGTGGCGCTCTCCGTCGATGAACTG
+ATGCCGTTGGCAAAAGAGATCGATCCTAACAATAACCCCAGCTATGGTGTGCTTTGCGGT
+ATGTCCATCATGGGGCTCAGTCTCGTCATTTTGCAGACGATAGGTATCGGTTAA
+>EEEE
+ATGTCAGTACCACTTATTCTGACCTTACTGGCGGGCGCTGCCACCTTTATTGGCGCGTTT
+CTTGGCGTTCTTGGCCAAAAACCGTCTAACCGCGTGCTGGCTTTTTCGCTGGGCTTCGCC
+GCAGGGATCATGCTGCTCATCTCGCTGATGGAGATGCTGCCCGCCGCGTTAGATACCGAG
+GGGATGTCGCCTGTACTGGGCTACGGGATGTTTATTATCGGCCTGTTGGGCTACTTCGGG
+CTGGATCGTCTGCTTCCTCACGCTCATCCGCAGGATCTGGTGCAAAAAAGGCAGCAGCCG
+CTTCCCGGCTCGATAAAACGCACTGCGATTTTATTGACGCTCGGCATTAGCCTGCACAAC
+TTTCCGGAAGGAATCGCCACCTTTGTCACTGCCAGCAGCAATCTTGAACTGGGTTTCGGC
+ATCGCACTGGCGGTGGCGTTGCACAATATTCCTGAAGGGCTGGCGGTTGCCGGCCCGGTT
+TATGCCGCGACGGGCTCAAAACGTACCGCGATTTTTTGGGCCGGTATCTCCGGCATGGCG
+GAAATTCTCGGCGTGCTGGCGTGGCTGATTTTGGGCAGCCTGGTTTCACCGATCGTTATG
+GCGGCTATCATGGCAGCAGTCGCCGGCATTATGGTGGCGCTCTCCGTCGATGAACTGATG
+CCGTTGGCAAAAGAGATCGATCCTAACAATAACCCCAGCTATGGTGTGCTTTGCGGTATG
+TCCATCATGGGGCTCAGTCTCGTCATTTTGCAGACGATAGGTATCGGTTAA
+>FFFF
+ATGTCAGTACCACTTATTCTGACCTTACTGGCGGGCGCTGCCACCTTTATTGGCGCGTTT
+CTTGGCGTTCTTGGCCAAAAACCGTCTAACCGCGTGCTGGCTTTTTCGCTGGGCTTCGCC
+GCAGGGATCATGCTGCTCATCTCGCTGATGGAGATGCTGCCCGCCGCGTTAGATACCGAG
+GGGATGTCGCCTGTACTGGGCTACGGGATGTTTATTATCGGCCTGTTGGGCTACTTCGGG
+CTGGATCGTCTGCTTCCTCACGCTCATCCGCAGGATCTGGTGCAAAAAAGGCAGCAGCCG
+CTTCCCGGCTCGATAAAACGCACTGCGATTTTATTGACGCTCGGCATTAGCCTGCACAAC
+TTTCCGGAAGGAATCGCCACCTTTGTCACTGCCAGCAGCAATCTTGAACTGGGTTTCGGC
+ATCGCACTGGCGGTGGCGTTGCACAATATTCCTGAAGGGCTGGCGGTTGCCGGCCCGGTT
+TATGCCGCGACGGGCTCAAAACGTACCGCGATTTTTTGGGCCGGTATCTCCGGCATGGCG
+GAAATTCTTGGCGGCGTGCTGGCGTGGCTGATTTTGGGCAGCCTGGCACCGATCGTTATG
+GCGGCTATCATGGCAGCAGTCGCCGGCATTATGGTGGCGCTCTCCGTCGATGAACTGATG
+CCGTTGGCAAAAGAGATCGATCCTAACAATAACCCCAGCTATGGTGTGCTTTGCGGTATG
+TCCATCATGGGGCTCAGTCTCGTCATTTTGCAGACGATAGGTATCGGTTAA
+>GGGG
+ATGTCAGTACCACTTATTCTGACCTTACTGGCGGGCGCTGCCACCTTTATTGGCGCGTTT
+CTTGGCGTTCTTGGCCAAAAACCGTCTAACCGCGTGCTGGCTTTTTCGCTGGGCTTCGCC
+GCAGGGATCATGCTGCTCATCTCGCTGATGGAGATGCTGCCCGCCGCGTTAGATACCGAG
+GGGATGTCGCCTGTACTGGGCTACGGGATGTTTATTATCGGCCTGTTGGGCTACTTCGGG
+CTGGATCGTCTGCTTCCTCACGCTCATCCGCAGGATCTGGTGCAAAAAAGGCAGCAGCCG
+CTTCCCGGCTCGATAAAACGCACTGCGATTTTATTGACGCTCGGCATTAGCCTGCACAAC
+TTTCCGGAAGGAATCGCCACCTTTGTCACTGCCAGCAGCAATCTTGAACTGGGTTTCGGC
+ATCGCACTGGGGTGGCGTTGCACAATATTCCTGAAGGGCTGGCGGTTGCCGGCCCGGTTT
+ATGCCGCGACGGGCTCAAAACGTACCGCGATTTTTTGGGCCGGTATCTCCGGCATGGCGG
+AAATTCTTGGCGGCGTGCTGGCGTGGCTGATTTTGGGCAGCCTGGTTTCACCGATCGTTA
+TGGCGGCTATCATGGCAGCAGTCGCCGGCATTATGGTGGCGCTCTCCGATGAACTGATGC
+CGTTGGCAAAAGAGATCGATCCTAACAATAACCCCAGCTATGGTGTGCTTTGCGGTATGT
+CCATCATGGGGCTCAGTCTCGTCATTTTGCAGACGATAGGTATCGGTTAA
+>HHHH
+ATGTCAGTACCACTTATTCTGACCTTACTGGCGGGCGCTGCCACCTTTATTGGCGCGTTT
+CTTGGCGTTCTTGGCCAAAAACCGTCTAACCGCGTGCTGGCTTTTTCGCTGGGCTTCGCC
+GCAGGGATCATGCTGCTCATCTCGCTGATGGAGATGCTGCCCGCCGCGTTAGATACCGAG
+GGGATGTCGCCTGTACTGGGCTACGGGATGTTTATTATCGGCCTGTTGGGCTACTTCGGG
+CTGGATCGTCTGCTTCCTCACGCTTCCGCAGGATCTGGTGCAAAAAAGGCAGCAGCCGCT
+TCCCGGCTCGATAAAACGCACTGCGATTTTATTGACGCTCGGCATTAGCCTGCACAACTT
+TCCGGAAGGAATCGCCACCTTTGTCACTGCCAGCAGCAATCTTGAACTGGGTTTCGGCAT
+CGCACTGGCGGTGGCGTTGCACAATATTCCTGAAGGGCTGGCGGTTGCCGGCCCGGTTTA
+TGCCGCGACGGGCTCAAAACGTACCGCGATTTTTTGGGCCGGTATCTCCGGCATGGCGGA
+AATTCTTGGCGGCGTGCTGGCGTGGCTGATTTTGGGCAGCCTGGTTTCACCGATCGTTAT
+GGCGGCTATCATGGCAGCAGTCGCCGGCATTATGGTGGCGCTCTCCGTCGATGAACTGAT
+GCCGTTGGCAAAAGAGATCGATCCTAACAATAACCCCAGCTATGGTGTGCTTTGCGGTAT
+GTCCATCATGGGGCTCAGTCTCGTCATTTTGCAGACGATAGGTATCGGTTAA
diff --git a/t/data/nuc_to_be_aligned.fa b/t/data/nuc_to_be_aligned.fa
new file mode 100644
index 0000000..a7e81a1
--- /dev/null
+++ b/t/data/nuc_to_be_aligned.fa
@@ -0,0 +1,90 @@
+>1
+ATGCAAATACAGAGCTTCTATCACTCAGCTTCACTAAAAACCCAGGAGGCTTTTAAAAGC
+CTACAAAAAACCTTATACAACGGAATGCAGATTCTCTCAGGCCAGGGCAAAGCGCCGGCT
+AAAGCGCCCGACGCTCGCCCGGAAATTATTGTCCTGCGAGAACCTGGCGCGACATGGGGG
+AATTATCTACAGCATCAGAAGACGTCTAACCACTCGCTGCATAACCTCTATAACTTACAG
+CGCGATCTTCTTACCGTCGCGGCAACCGTTCTGGGTAAACAAGACCCGGTTCTAACGTCA
+ATGGCAAACCAAATGGAGTTAGCCAAAGTTAAAGCGGACCGGCCAGCAACAAAACAAGAA
+GAAGCTGCGGCAAAAGCATTGAAGAAAAATCTTATCGAACTTATTGCAGCACGCACTCAG
+CAGCAAAATGGCTTACCTGCAAAAGAAGCTCATCGCTTTGCGGCAGTAGCGTTTAGAGAT
+GCTCAGGTCAAGCAGCTCAATAACCAGCCCTGGCAAACCATAAAAAATACACTCACGCAT
+AACGGGCATCACTATACCAACACGCAGCTCCCTGCCGCAGAGATGAAAATCGGCGCAAAA
+GATATCTTTCCCAGTGCTTATGAGGGAAAGGGCGTATGCAGTTGGGATACCAAGAATATT
+CATCACGCCAATAATTTGTGGATGTCCACGGTGAGTGTGCATGAGGACGGTAAAGATAAA
+ACGCTTTTTTGCGGGATACGTCATGGTGTGCTTTCCCCCTATCATGAAAAAGATCCGCTT
+CTGCGTCAGGCCGGCGCTGAAAACAAAGCCAAAGAAGTATTAGCTGCGGCACTTTTTAGT
+AAACCTGAGTTGCTTAACAGAGCCTTAGAGGGCGAAGCGGTAAGCCTGAAACTGGTATCC
+GTCGGGTTACTCACCGCGTCGAATATTTTCGGCAAAGAGGGAACTATGGTCGAGGATCAA
+ATGCGCGCATGGCAATCGTTGACCCAGCCGGGAAAAATGATTCATTTAAAAATCCGCAAT
+AAAGATGGCGATCTACAGACGGTAAAAATAAAACCGGACGTCGCCGCATTTAATGTGGGT
+GTTAATGAGCTGGCGCTCAAGCTCGGCTTTGGCCTTAAAGCATCAGATAGCTATAATGCC
+GAAGCGCTGCATCAGTTATTAGGCAATGATTTACGCCCTGAAGCCAGACCAGGTGGCTGG
+GTTGGCGAATGGCTGGCGCAATACCCGGATAATTATGAGGTCGTCAATACATTAGCGCGC
+CAGATTAAGGATATCTGGAAAAATAACCAACATCATAAAGATGGCGGCGAACCCTATAAA
+CTCGCACAACGCCTTGCCATGTTAGCCCATGAAATTGACGCGGTGCCCGCCTGGAATTGT
+AAAAGCGGCAAAGATCGTACAGGGATGATGGATTCAGAAATCAAGCGAGAGCTCATTTCT
+TTCCATCAGACCCATATGTTAAGTGCGCCTGGTAGTCTTCCGGATAGCGGTGGACAGAAA
+ATTTTCCAAAAAGTATTACTGAATAGCGGTAACCTGGAGATTCAGAAACAAAATACGGGC
+GGGGCGGGAAACAAAGTAATGAAAAATTTATCGCCAGAGGTGCTCAATCTTTCCTATCAA
+AAACGAGTTGGGGATGAAAATATTTGGCGTCAGTAAAATATTTCTTCATTAATCACA
+TCT
+>2
+ATGCAAATACAGAGCTTCTATCACTCAGCTTCACTAAAAACCCAGGAGGCTTTTAAAAGC
+CTACAAAAAACCTTATACAACGGAATGCAGATTCTCTCAGGCCAGGGCAAAGCGCCGGCT
+AAAGCGCCCGACGCTCGCCCGGAAATTATTGTCCTGCGAGAACCTGGCGCGACATGGGGG
+AATTATCTACAGCATCAGAAGACGTCTAACCACTCGCTGCATAACCTCTATAACTTACAG
+CGCGATCTTCTTACCGTCGCGGCAACCGTTCTGGGTAAACAAGACCCGGTTCTAACGTCA
+ATGGCAAACCAAATGGAGTTAGCCAAAGTTAAAGCGGACCGGCCAGCAACAAAACAAGAA
+GAAGCTGCGGCAAAAGCATTGAAGAAAAATCTTATCGAACTTATTGCAGCACGCACTCAG
+CAGCAAAATGGCTTACCTGCAAAAGAAGCTCATCGCTTTGCGGCAGTAGCGTTTAGAGAT
+GCTCAGGTCAAGCAGCTCAATAACCAGCCCTGGCAAACCATAAAAAATACACTCACGCAT
+AACGGGCATCACTATACCAACACGCAGCTCCCTGCAGAGATGAAAATCGGCGCAAAA
+GATATCTTTCCCAGTGCTTATGAGGGAAAGGGCGTATGCAGTTGGGATACCAAGAATATT
+CATCACGCCAATAATTTGTGGGTGTCCACGGTGAGTGTGCATGAGGACGGTAAAGATAAA
+ACGCTTTTTTGCGGGATACGTCATGGTGTGCTTTCCCCCTATCATGAAAAAGATCCGCTT
+CTGCGTCAGGCCGGCGCTGAAAACAAAGCCAAAGAAGTATTAGCTGCGGCACTTTTTAGT
+AAACCTGAGTTGCTTAACAGAGCCTTAGAGGGCGAAGCGGTAAGCCTGAAACTGGTATCC
+GTCGGGTTACTCACCGCGTCGAATATTTTCGGCAAAGAGGGAACTATGGTCGAGGATCAA
+ATGCGCGCATGGCAATCGTTGACCCAGCCGGGAAAAATGATTCATTTAAAAATCCGCAAT
+AAAGATGGCGATCTACAGACGGTAAAAATAAAACCGGACGTCGCCGCATTTAATGTGGGT
+GTTAATGAGCTGGCGCTCAAGCTCGGCTTTGGCCTTAAAGCATCAGATAGCTATAATGCC
+GAAGCGCTACATCAGTTATTAGGCAATGATTTACGCCCTGAAGCCAGACCAGGTGGCTGG
+GTTGGCGAATGGCTGGCGCAATACCCGGATAATTATGAGGTCGTCAATACATTAGCGCGC
+CAGATTAAGGATATCTGGAAAAATAACCAACATCATAAAGATGGCGGCGAACCCTATAAA
+CTCGCACAACGCCTTGCCATGTTAGCCCATGAAATTGACGCGGTGCCCGCCTGGAATTGT
+AAAAGCGGCAAAGATCGTACAGGGATGATGGATTCAGAAATCAAGCGAGAGCTCATTTCT
+TTCCATCAGACCCATATGTTAAGTGCGCCTGGTAGTCTTCCGGATAGCGGTGGACAGAAA
+ATTTTCCAAAAAGTATTACTGAATAGCGGTAACCTGGAGATTCAGAAACAAAATACGGGC
+GGGGCGGGAAACAAAGTAATGAAAAATTTATCGCCAGAGGTGCTCAATCTTTCCTATCAA
+AAACGAGTTGGGGATGAAAATATTTGGCAGTCAGTAAAAGGTATTTCTTCATTAATCACA
+TCT
+>3
+ATGCAAATACAGAGCTTCTATCACTCAGCTTCACTAAAAACCCAGGAGGCTTTTAAAAGC
+CTACAAAAAACCTTATACAACGGAATGCAGATTCTCTCAGGCCAGGGCAAAGCGCCGGCT
+AAAGCGCCCGACGCTCGCCCGGAAATTATTGTCCTGCGAGAACCTGGCGCGACATGGGGG
+AATTATCTACAGCATCAGAAGACGTCTAACCACTCGCTGCATAACCTCTATAACTTACAG
+CGCGATCTTCTTACCGTCGCGGCAACCGTTCTGGGTAAACAAGACCCGGTTCTAACGTCA
+ATGGCAAACCAAATGGAGTTAGCCAAGGTTAAAGCGGACCGGCCAGCAACAAAACAAGAA
+GAAGCTGCGGCAAAAGCATTGAAGAAAAATCTTATCGAACTTATTGCAGCACGCACTCAG
+CAGCAAAATGGCTTACCTGCAAAAGAAGCTCATCGCTTTGCGGCAGTAGCGTTTAGAGAT
+GCTCAGGTCAAGCAGCTCAATAACCAGCCCTGGCAAACCATAAAAAATACACTCACGCAT
+AACGGGCATCACTATACCAACACGCAGCTCCCTGCCGCAGAGATGAAAATCGGCGCAAAA
+GATATCTTTCCCAGTGCTTATGAGGGAAAGGGCGTATGCAGTTGGGATACCAAGAATATT
+CATCACGCCAATAATTTGTGGATGTCCACGGTGAGTGTGCATGAGGACGGTAAAGATAAA
+ACGCTTTTTTGCGGGATACGTCATGGTGTGCTTTCCCCCTATCATGAAAAAGATCCGCTT
+CTGCGTCAGGCCGGCGCTGAAAACAAAGCCAAAGAAGTATTAGCTGCGGCACTTTTTAGT
+AAACCTGAGTTGCTTAACAGAGCCTTAGAGGGCGAAGCGGTAAGCCTGAAACTGGTATCC
+GTCGGGTTACTCACCGCGTCGAATATTTTCGGCAAAGAGGGAACTATGGTCGAGGATCAA
+ATGCGCGCATGGCAATCGTTGACCCAGCCGGGAAAAATGATTCATTTAAAAATCCGCAAT
+AAAGATGGCGATCTACAGACGGTAAAAATAAAACCGGACGTCGCCGCATTTAATGTGGGT
+GTTAATGAGCTGGCGCTCAAGCTCGGCTTTGGCCTTAAAGCATCAGATAGCTATAATGCC
+GAAGCGCTACATCAGTTATTAGGCAATGATTTACGCCCTGAAGCCAGACCAGGTGGCTGG
+GTTGGCGAATGGCTGGCGCAATACCCGGATAATTATGAGGTCGTCAATACATTAGCGCGC
+CAGATTAAGGATATCTGGAAAAATAACCAACATCATAAAGATGGCGGCGAACCCTATAAA
+CTCGCACAACGCCTTGCCATGTTAGCCCATGAAATTGACGCGGTGCCCGCCTGGAATTGT
+AAAAGCGGCAAAGATCGTACAGGGATGATGGATTCAGAAATCAAGCGAGAGCTCATTTCT
+TTCCATCAGACCCATATGTTAAGTGCGCCTGGTAGTCTTCCGGATAGCGGTGGACAGAAA
+ATTTTCCAAAAAGTATTACTGAATAGCGGTAACCTGGAGATTCAGAAACAAAATACGGGC
+GGGGCGGGAAACAAAGTAATGAAAAATTTATCGCCAGAGGTGCTCAATCTTTCCTATCAA
+AAACGAGTTGGGGATGAAAATATTTGGCAGTCAGTAAAAGGTATTTCTTCATTAATCACA
+TCT
\ No newline at end of file
diff --git a/t/data/out_of_order_fasta.fa b/t/data/out_of_order_fasta.fa
new file mode 100644
index 0000000..13f02f9
--- /dev/null
+++ b/t/data/out_of_order_fasta.fa
@@ -0,0 +1,10 @@
+>5555
+AAAAAAA
+>3333
+GGGGGGG
+>4444
+CCCCCCC
+>2222
+TTTTTTT
+>1111
+AAAAAAA
diff --git a/t/data/out_of_order_fasta.fa.sorted.fa b/t/data/out_of_order_fasta.fa.sorted.fa
new file mode 100644
index 0000000..56ce040
--- /dev/null
+++ b/t/data/out_of_order_fasta.fa.sorted.fa
@@ -0,0 +1,10 @@
+>1111
+AAAAAAA
+>2222
+TTTTTTT
+>3333
+GGGGGGG
+>4444
+CCCCCCC
+>5555
+AAAAAAA
diff --git a/t/data/overall_gene_presence_absence.csv b/t/data/overall_gene_presence_absence.csv
new file mode 100644
index 0000000..d828265
--- /dev/null
+++ b/t/data/overall_gene_presence_absence.csv
@@ -0,0 +1,22 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","Min group size nuc","Max group size nuc","Avg group size nuc","query_1","query_2","query_5"
+"hly","","Alpha-toxin","2","2","1","1","1","1","7","","959","959","959","1_1","2_1___1",""
+"group_10","","hypothetical protein","2","2","1","1","10","1","6","","227","227","227","abc_00010","abc_00010___10",""
+"group_11","","C4-dicarboxylate transporter/malic acid transport protein","2","2","1","1","11","1","5","","947","947","947","abc_00011","abc_00011___11",""
+"group_12","","hypothetical protein","2","2","1","1","12","1","4","","188","188","188","abc_00012","abc_00012___12",""
+"group_13","","Gonococcal growth inhibitor III","2","2","1","1","13","1","3","","134","134","134","abc_00014","abc_00014___14",""
+"group_14","","Gonococcal growth inhibitor III","2","2","1","1","14","1","2","","134","134","134","1_6","2_7___15",""
+"yfnB","","Putative HAD-hydrolase yfnB","2","2","1","1","15","1","1","","686","686","686","abc_00016","abc_00016___16",""
+"group_2","","hypothetical protein","2","2","1","1","2","1","8","","146","146","146","abc_00002","abc_00002___2",""
+"group_3","","hypothetical protein","2","2","1","1","3","1","9","","197","197","197","abc_00003","abc_00003___3",""
+"group_4","","superantigen-like protein","2","2","1","1","4","1","10","","716","716","716","abc_00004","abc_00004___4",""
+"speH","","hypothetical protein","2","2","1","1","5","1","11","","725","725","725","1_2","2_2___5",""
+"group_6","","superantigen-like protein","2","2","1","1","6","1","12","","725","725","725","abc_00006","abc_00006___6",""
+"argF","","Ornithine carbamoyltransferase","2","2","1","1","7","1","13","","1001","1001","1001","1_3","2_3___7",""
+"arcC1","","Carbamate kinase 1","2","2","1","1","8","1","14","","935","935","935","abc_00008","abc_00008___8",""
+"group_9","","16S ribosomal RNA","2","2","1","1","9","1","15","","1556","1556","1556","abc_01705","abc_01705___9",""
+"group_16","","hypothetical protein","1","1","1","2","1","2","6","","146","146","146","","","abc_50002"
+"group_17","argF","Ornithine carbamoyltransferase","1","1","1","2","6","2","5","","1001","1001","1001","","","3_3"
+"group_18","","hypothetical protein","1","1","1","2","5","2","4","","227","227","227","","","abc_50010"
+"group_19","","hypothetical protein","1","1","1","2","4","2","3","","188","188","188","","","abc_50012"
+"group_20","","Gonococcal growth inhibitor III","1","1","1","2","3","2","2","","134","134","134","","","abc_50014"
+"group_21","yfnB","Putative HAD-hydrolase yfnB","1","1","1","2","2","2","1","","686","686","686","","","3_5"
diff --git a/t/data/pan_genome_sequences/argF.fa b/t/data/pan_genome_sequences/argF.fa
new file mode 100644
index 0000000..fc60aab
--- /dev/null
+++ b/t/data/pan_genome_sequences/argF.fa
@@ -0,0 +1,36 @@
+>1_3
+ATGAAAAATTTACGAAACAGAAGTTTTTTAACTTTATTAGACTTTTCACGACAAGAGGTA
+GAATTCTTATTAACACTCTCCGAGGATTTAAAACGTGCTAAATATATTGGCACTGAAAAG
+CCTATGTTAAAAAATAAAAATATTGCACTGTTATTTGAAAAAGATTCTACAAGAACGCGA
+TGTGCATTTGAAGTTGCAGCGCATGATCAAGGTGCAAATGTAACTTATTTAGGCCCAACT
+GGATCACAAATGGGTAAAAAAGAAACAACTAAAGATACTGCACGTGTGCTTGGTGGAATG
+TATGATGGCATTGAATACCGTGGTTTTTCACAAAGAACAGTAGAAACTTTAGCTGAAAAT
+TCAGGCGTACCAGTGTGGAATGGTTTAACTGATGAAGATCATCCTACTCAAGTTCTTGCT
+GATTTCTTAACAGCAAAAGAAGTCTTAAAAAAAGATTATGCAGATATTAACTTTACATAT
+GTTGGAGATGGTCGTAATAACGTTGCAAATGCATTAATGCAAGGTGCTGCCATTATGGGT
+ATGAACTTCCATTTAGTTTGTCCAAAAGAATTAAATCCAACAGATGAATTATTAAATCGC
+TGTAAAAATATTGCCGCTGAAAATGGTGGCAACATATTAATCACAGATGATATTGACCAA
+GGTGTAAAAGGTTCGGATGTAATTTACACTGATGTTTGGGTATCAATGGGTGAACCTGAT
+GAAGTATGGAAAGAACGACTTGAATTATTGAAACCATATCAAGTAAATAAAGAAATGATG
+GATAAAACTGGTAATCCAAATGTTATTTTTGAGCATTGCTTACCATCTTTCCATAATGCT
+GATACGAAAATTGGTCAACAAATTTTTGAAAAATATGGTATTCGAGAAATGGAAGTTACA
+GATGAAGTATTCGAAAGTAAAGCTTCAGTTGTATTCCAAGAAGCTGAGAACAGAATGCAT
+ACAATCAAAGCAGTCATGGTTGCTACATTGGGTGAATTTTAA
+>3_3
+ATGAAAAATTTACGAAACAGAAGTTTTTTAACTTTATTAGACTTTTCACGACAAGAGGTA
+GAATTCTTATTAACACTCTCCGAGGATTTAAAACGTGCTAAATATATTGGCACTGAAAAG
+CCTATGTTAAAAAATAAAAATATTGCACTGTTATTTGAAAAAGATTCTACAAGAACGCGA
+TGTGCATTTGAAGTTGCAGCGCATGATCAAGGTGCAAATGTAACTTATTTAGGCCCAACT
+GGATCACAAATGGGTAAAAAAGAAACAACTAAAGATACTGCACGTGTGCTTGGTGGAATG
+TATGATGGCATTGAATACCGTGGTTTTTCACAAAGAACAGTAGAAACTTTAGCTGAAAAT
+TCAGGCGTACCAGTGTGGAATGGTTTAACTGATGAAGATCATCCTACTCAAGTTCTTGCT
+GATTTCTTAACAGCAAAAGAAGTCTTAAAAAAAGATTATGCAGATATTAACTTTACATAT
+GTTGGAGATGGTCGTAATAACGTTGCAAATGCATTAATGCAAGGTGCTGCCATTATGGGT
+ATGAACTTCCATTTAGTTTGTCCAAAAGAATTAAATCCAACAGATGAATTATTAAATCGC
+TGTAAAAATATTGCCGCTGAAAATGGTGGCAACATATTAATCACAGATGATATTGACCAA
+GGTGTAAAAGGTTCGGATGTAATTTACACTGATGTTTGGGTATCAATGGGTGAACCTGAT
+GAAGTATGGAAAGAACGACTTGAATTATTGAAACCATATCAAGTAAATAAAGAAATGATG
+GATAAAACTGGTAATCCAAATGTTATTTTTGAGCATTGCTTACCATCTTTCCATAATGCT
+GATACGAAAATTGGTCAACAAATTTTTGAAAAATATGGTATTCGAGAAATGGAAGTTACA
+GATGAAGTATTCGAAAGTAAAGCTTCAGTTGTATTCCAAGAAGCTGAGAACAGAATGCAT
+ACAATCAAAGCAGTCATGGTTGCTACATTGGGTGAATTTTAA
diff --git a/t/data/pan_genome_sequences/hly.fa b/t/data/pan_genome_sequences/hly.fa
new file mode 100644
index 0000000..777b60f
--- /dev/null
+++ b/t/data/pan_genome_sequences/hly.fa
@@ -0,0 +1,51 @@
+>1_1
+ATGAAAACACGTATAGTCAGCTCAGTAACAACAACACTATTGCTAGGTTCCATATTAATG
+AATCCTGTCGCTAATGCCGCAGATTCTGATATTAATATTAAAACCGGTACTACAGATATT
+GGAAGCAATACTACAGTAAAAACAGGTGATTTAGTCACTTATGATAAAGAAAATGGCATG
+CACAAAAAAGTATTTTATAGTTTTATCGATGATAAAAATCACAATAAAAAACTGCTAGTT
+ATTAGAACGAAAGGTACCATTGCTGGTCAATATAGAGTTTATAGCGAAGAAGGTGCTAAC
+AAAAGTGGTTTAGCCTGGCCTTCAGCCTTTAAGGTACAGTTGCAACTACCTGATAATGAA
+GTAGCTCAAATATCTGATTACTATCCAAGAAATTCGATTGATACAAAAGAGTATATGAGT
+ACTTTAACTTATGGATTCAACGGTAATGTTACTGGTGATGATACAGGAAAAATTGGCGGC
+CTTATTGGTGCAAATGTTTCGATTGGTCATACACTGAAATATGTTCAACCTGATTTCAAA
+ACAATTTTAGAGAGCCCAACTGATAAAAAAGTAGGCTGGAAAGTGATATTTAACAATATG
+GTGAATCAAAATTGGGGACCATATGATAGAGATTCTTGGAACCCGGTATATGGCAATCAA
+CTTTTCATGAAAACTAGAAATGGTTCTATGAAAGCAGCAGAGAACTTCCTTGATCCTAAC
+AAAGCAAGTTCTCTATTATCTTCAGGGTTTTCACCAGACTTCGCTACAGTTATTACTATG
+GATAGAAAAGCATCCAAACAACAAACAAATATAGATGTAATATACGAACGAGTTCGTGAT
+GACTACCAATTGCATTGGACTTCAACAAATTGGAAAGGTACCAATACTAAAGATAAATGG
+ACAGATCGTTCTTCAGAAAGATATAAAATCGATTGGGAAAAAGAAGAAATGACAAATTAA
+>2_1
+ATGAAAACACGTATAGTCAGCTCAGTAACAACAACACTATTGCTAGGTTCCATATTAATG
+AATCCTGTCGCTAATGCCGCAGATTCTGATATTAATATTAAAACCGGTACTACAGATATT
+GGAAGCAATACTACAGTAAAAACAGGTGATTTAGTCACTTATGATAAAGAAAATGGCATG
+CACAAAAAAGTATTTTATAGTTTTATCGATGATAAAAATCACAATAAAAAACTGCTAGTT
+ATTAGAACGAAAGGTACCATTGCTGGTCAATATAGAGTTTATAGCGAAGAAGGTGCTAAC
+AAAAGTGGTTTAGCCTGGCCTTCAGCCTTTAAGGTACAGTTGCAACTACCTGATAATGAA
+GTAGCTCAAATATCTGATTACTATCCAAGAAATTCGATTGATACAAAAGAGTATATGAGT
+ACTTTAACTTATGGATTCAACGGTAATGTTACTGGTGATGATACAGGAAAAATTGGCGGC
+CTTATTGGTGCAAATGTTTCGATTGGTCATACACTGAAATATGTTCAACCTGATTTCAAA
+ACAATTTTAGAGAGCCCAACTGATAAAAAAGTAGGCTGGAAAGTGATATTTAACAATATG
+GTGAATCAAAATTGGGGACCATATGATAGAGATTCTTGGAACCCGGTATATGGCAATCAA
+CTTTTCATGAAAACTAGAAATGGTTCTATGAAAGCAGCAGAGAACTTCCTTGATCCTAAC
+AAAGCAAGTTCTCTATTATCTTCAGGGTTTTCACCAGACTTCGCTACAGTTATTACTATG
+GATAGAAAAGCATCCAAACAACAAACAAATATAGATGTAATATACGAACGAGTTCGTGAT
+GACTACCAATTGCATTGGACTTCAACAAATTGGAAAGGTACCAATACTAAAGATAAATGG
+ACAGATCGTTCTTCAGAAAGATATAAAATCGATTGGGAAAAAGAAGAAATGACAAATTAA
+>3_1
+ATGAAAACACGTATAGTCAGCTCAGTAACAACAACACTATTGCTAGGTTCCATATTAATG
+AATCCTGTCGCTAATGCCGCAGATTCTGATATTAATATTAAAACCGGTACTACAGATATT
+GGAAGCAATACTACAGTAAAAACAGGTGATTTAGTCACTTATGATAAAGAAAATGGCATG
+CACAAAAAAGTATTTTATAGTTTTATCGATGATAAAAATCACAATAAAAAACTGCTAGTT
+ATTAGAACGAAAGGTACCATTGCTGGTCAATATAGAGTTTATAGCGAAGAAGGTGCTAAC
+AAAAGTGGTTTAGCCTGGCCTTCAGCCTTTAAGGTACAGTTGCAACTACCTGATAATGAA
+GTAGCTCAAATATCTGATTACTATCCAAGAAATTCGATTGATACAAAAGAGTATATGAGT
+ACTTTAACTTATGGATTCAACGGTAATGTTACTGGTGATGATACAGGAAAAATTGGCGGC
+CTTATTGGTGCAAATGTTTCGATTGGTCATACACTGAAATATGTTCAACCTGATTTCAAA
+ACAATTTTAGAGAGCCCAACTGATAAAAAAGTAGGCTGGAAAGTGATATTTAACAATATG
+GTGAATCAAAATTGGGGACCATATGATAGAGATTCTTGGAACCCGGTATATGGCAATCAA
+CTTTTCATGAAAACTAGAAATGGTTCTATGAAAGCAGCAGAGAACTTCCTTGATCCTAAC
+AAAGCAAGTTCTCTATTATCTTCAGGGTTTTCACCAGACTTCGCTACAGTTATTACTATG
+GATAGAAAAGCATCCAAACAACAAACAAATATAGATGTAATATACGAACGAGTTCGTGAT
+GACTACCAATTGCATTGGACTTCAACAAATTGGAAAGGTACCAATACTAAAGATAAATGG
+ACAGATCGTTCTTCAGAAAGATATAAAATCGATTGGGAAAAAGAAGAAATGACAAATTAA
diff --git a/t/data/pan_genome_sequences/speH.fa b/t/data/pan_genome_sequences/speH.fa
new file mode 100644
index 0000000..2e8dc9d
--- /dev/null
+++ b/t/data/pan_genome_sequences/speH.fa
@@ -0,0 +1,28 @@
+>1_2
+ATGAACAATAACATCACGAAAAAAATTATTTTATCAACAACATTGTTACTATTAGGTACA
+GCATTTACACAATTTCCTAATACACCTATCAATTCTTCATCTGAAGCGAAAGCTTATTAT
+ATAAATCAAAACGAAACTAACGTTAATGAGTTAACTAAATATTACTCGCAAAAATATTTA
+ACCTTCTCTAACAGTACGTTATGGCAAAAAGATAACGGTACGATTCATGCAACGTTGTTA
+CAGTTTTCTTGGTATAGTCATATTCAAGTTTATGGACCTGAAAGTTGGGGCAATATCAAC
+CAATTAAGAAATAAAAGCGTTGATATTTTTGGCATAAAAGACCAAGAAACCATTGATTCT
+TTTGCATTATCTCAAGAAACGTTTACTGGTGGTGTTACTCCTGCAGCAACATCTAACGAT
+AAACACTATAAACTGAATGTAACATATAAAGATAAAGCAGAAACGTTTACTGGCGGATTT
+CCAGTTTATGAAGGCAATAAGCCTGTTTTAACTTTAAAAGAATTAGATTTTCGTATTCGT
+CAAACATTAATTAAAAGTAAAAAATTATATAATAATTCTTATAATAAAGGACAAATTAAA
+ATAACAGGTACAGACAATAACTACACAATAGATTTAAGTAAAAGGTTGCCATCAACTGAT
+GCAAATAGATATGTTAAAAAACCTCAAAATGCAAAAATTGAAGTTATCCTCGAAAAATCA
+AACTAA
+>2_2
+ATGAACAATAACATCACGAAAAAAATTATTTTATCAACAACATTGTTACTATTAGGTACA
+GCATTTACACAATTTCCTAATACACCTATCAATTCTTCATCTGAAGCGAAAGCTTATTAT
+ATAAATCAAAACGAAACTAACGTTAATGAGTTAACTAAATATTACTCGCAAAAATATTTA
+ACCTTCTCTAACAGTACGTTATGGCAAAAAGATAACGGTACGATTCATGCAACGTTGTTA
+CAGTTTTCTTGGTATAGTCATATTCAAGTTTATGGACCTGAAAGTTGGGGCAATATCAAC
+CAATTAAGAAATAAAAGCGTTGATATTTTTGGCATAAAAGACCAAGAAACCATTGATTCT
+TTTGCATTATCTCAAGAAACGTTTACTGGTGGTGTTACTCCTGCAGCAACATCTAACGAT
+AAACACTATAAACTGAATGTAACATATAAAGATAAAGCAGAAACGTTTACTGGCGGATTT
+CCAGTTTATGAAGGCAATAAGCCTGTTTTAACTTTAAAAGAATTAGATTTTCGTATTCGT
+CAAACATTAATTAAAAGTAAAAAATTATATAATAATTCTTATAATAAAGGACAAATTAAA
+ATAACAGGTACAGACAATAACTACACAATAGATTTAAGTAAAAGGTTGCCATCAACTGAT
+GCAAATAGATATGTTAAAAAACCTCAAAATGCAAAAATTGAAGTTATCCTCGAAAAATCA
+AACTAA
diff --git a/t/data/post_analysis/_clustered.clstr b/t/data/post_analysis/_clustered.clstr
new file mode 100644
index 0000000..6c1d1b2
--- /dev/null
+++ b/t/data/post_analysis/_clustered.clstr
@@ -0,0 +1,55 @@
+>Cluster 0
+0	518aa, >abc_01705... *
+1	518aa, >abc_01705... at 100.00%
+>Cluster 1
+0	333aa, >1_3... *
+1	333aa, >2_3... at 100.00%
+>Cluster 2
+0	333aa, >3_3... *
+>Cluster 3
+0	319aa, >1_1... *
+1	319aa, >2_1... at 100.00%
+>Cluster 4
+0	315aa, >abc_00011... *
+1	315aa, >abc_00011... at 100.00%
+>Cluster 5
+0	311aa, >abc_00008... *
+1	311aa, >abc_00008... at 100.00%
+>Cluster 6
+0	241aa, >1_2... *
+1	241aa, >2_2... at 100.00%
+>Cluster 7
+0	241aa, >abc_00006... *
+1	241aa, >abc_00006... at 100.00%
+>Cluster 8
+0	238aa, >abc_00004... *
+1	238aa, >abc_00004... at 100.00%
+>Cluster 9
+0	228aa, >abc_00016... *
+1	228aa, >abc_00016... at 100.00%
+>Cluster 10
+0	228aa, >3_5... *
+>Cluster 11
+0	75aa, >abc_00010... *
+1	75aa, >abc_00010... at 100.00%
+>Cluster 12
+0	65aa, >abc_00003... *
+1	65aa, >abc_00003... at 100.00%
+>Cluster 13
+0	62aa, >abc_00012... *
+1	62aa, >abc_00012... at 100.00%
+>Cluster 14
+0	62aa, >abc_00012... *
+>Cluster 15
+0	49aa, >abc_00002... *
+>Cluster 16
+0	48aa, >abc_00002... *
+1	48aa, >abc_00002... at 100.00%
+>Cluster 17
+0	45aa, >abc_00014... *
+>Cluster 18
+0	44aa, >abc_00014... *
+1	44aa, >abc_00014... at 100.00%
+>Cluster 19
+0	44aa, >1_6... *
+1	44aa, >2_7... at 100.00%
diff --git a/t/data/post_analysis/_combined_files b/t/data/post_analysis/_combined_files
new file mode 100644
index 0000000..12a43ec
--- /dev/null
+++ b/t/data/post_analysis/_combined_files
@@ -0,0 +1,169 @@
+>1_1
+MKTRIVSSVTTTLLLGSILMNPVANAADSDINIKTGTTDIGSNTTVKTGDLVTYDKENGM
+HKKVFYSFIDDKNHNKKLLVIRTKGTIAGQYRVYSEEGANKSGLAWPSAFKVQLQLPDNE
+VAQISDYYPRNSIDTKEYMSTLTYGFNGNVTGDDTGKIGGLIGANVSIGHTLKYVQPDFK
+TILESPTDKKVGWKVIFNNMVNQNWGPYDRDSWNPVYGNQLFMKTRNGSMKAAENFLDPN
+KASSLLSSGFSPDFATVITMDRKASKQQTNIDVIYERVRDDYQLHWTSTNWKGTNTKDKW
+TDRSSERYKIDWEKEEMTN*
+>abc_00002
+MKLFYIVFLIIIWLNIFLGNEIIHTLTVLITTLYIVNSRKGIKNDRVE*
+>abc_00003
+MTELNNIINSLQSLFESESGYKISKNSGVPYQTVQDLRNGKTKLEDARFRTIIKLYSYYV
+SLKEH*
+>abc_00004
+MSKNITKNIILTTTLLLLGTVLPQNQKPVFSFYSEAKAYSIGQDETNINELIKYYTQPHF
+SFSNKWLYQYDNGNIYVELKRYSWSAHISLWGAESWGNINQLKDRYVDVFGLKDKDTDQL
+WWSYRETFTGGVTPAAKPSDKTYNLFVQYKDKLQTIIGAHKIYQGNKPVLTLKEIDFRAR
+EALIKNKILYTENRNKGKLKITGGGNNYTIDLSKRLHSDLANVYVKNPNKITVDVLFD*
+>1_2
+MNNNITKKIILSTTLLLLGTAFTQFPNTPINSSSEAKAYYINQNETNVNELTKYYSQKYL
+TFSNSTLWQKDNGTIHATLLQFSWYSHIQVYGPESWGNINQLRNKSVDIFGIKDQETIDS
+FALSQETFTGGVTPAATSNDKHYKLNVTYKDKAETFTGGFPVYEGNKPVLTLKELDFRIR
+QTLIKSKKLYNNSYNKGQIKITGTDNNYTIDLSKRLPSTDANRYVKKPQNAKIEVILEKS
+N*
+>abc_00006
+MKKNIMNKLVLSTALLLLGTTSTQLPKTPISFSSEAKAYNISENETNINELIKYYTQPHF
+SLSGKWLWQKPNGSIHATLQTWVWYSHIQVFGSESWGNINQLRNKYVDIFGTKDEDTVEG
+YWTYDETFTGGVTPAATSSDKPYRLFLKYSDKQQTIIGGHEFYKGNKPVLTLKELDFRIR
+QTLIKNKKLYNGEFNKGQIKITADGNNYTIDLSKKLKLTDTNRYVKNPKNAQIEVILEKS
+N*
+>1_3
+MKNLRNRSFLTLLDFSRQEVEFLLTLSEDLKRAKYIGTEKPMLKNKNIALLFEKDSTRTR
+CAFEVAAHDQGANVTYLGPTGSQMGKKETTKDTARVLGGMYDGIEYRGFSQRTVETLAEN
+SGVPVWNGLTDEDHPTQVLADFLTAKEVLKKDYADINFTYVGDGRNNVANALMQGAAIMG
+MNFHLVCPKELNPTDELLNRCKNIAAENGGNILITDDIDQGVKGSDVIYTDVWVSMGEPD
+EVWKERLELLKPYQVNKEMMDKTGNPNVIFEHCLPSFHNADTKIGQQIFEKYGIREMEVT
+DEVFESKASVVFQEAENRMHTIKAVMVATLGEF*
+>abc_00008
+MMAKIVVALGGNALGKSPQEQLELVKNTAKSLVGLITKGHEIVISHGNGPQVGSINLGLN
+YAAEHNQGPAFPFAECGAMSQAYIGYQLQESLQNELHSIGMDKQVVTLVTQVEVDENDPA
+FNNPSKPIGLFYNKEEAEQIQKEKGFIFVEDAGRGYRRVVPSPQPISIIELESIKTLIKN
+DTLVIAAGGGGIPVIREQHDGFKGIDAVIDKDKTSALLGANIQCDQLIILTAIDYVYINF
+NTENQQPLKTTNVDELKRYIDENQFAKGSMLPKIEAAISFIENNPKGSVLITSLNELDAA
+LEGKVGTVIKK*
+>abc_01705
+VENTINESEKKKRFKLKMPGAFMILFILTVVAVIATWVIPAGAYSKLSYEPSSQELKIVN
+PHNQVKKVPGTQQELDKMGVKIKIEQFKSGAINKPVSIPNTYERLKQHPAGPEQITSSMV
+EGTIEAVDIMVFILVLGGLIGVVQASGSFESGLLALTKKTKGHEFMLIVFVSILMIIGGT
+LCGIEEEAVAFYPILVPIFIALGYDSIVSVGAIFLASSVGSTFSTINPFSVVIASNAAGT
+TFTDGLYWRIGACIVGAIFVISYLYWYCKKIKNDPKASYSYEDKDAFEQQWSVLKDDDSA
+HFTLRKKIILTLFVLPFPIMVWGVMTQGWWFPVMASAFLIFTIIIMFIAGTGKSGLGEKG
+TVDAFVNGASSLVGVSLIIGLARGINLVLNEGMISDTILHFSSSLVQHMSGPLFIIVLLF
+IFFCLGFIVPSSSGLAVLSMPIFAPLADTVGIPRFVIVTTYQFGQYAMLFLAPTGLVMAT
+LQMLNMRYSHWFRFVWPVVAFVLIFGGGVLITQVLIYS*
+>abc_00010
+MTHLTKVLDTLTGICVVLLFSKYFVAYANMVFDWNLRWYLLENIPHLPIILFILMFIFGV
+PSEMIKDRQRKNNGV*
+>abc_00011
+MRLQKAPLVTSGLVLGLLGLGNLLKDLSLTLNAVCGIFAFLIWIHLLCTMIKYFNNVKEQ
+LNSPLVSSVFTTFFMSGFLGTTYLNTFFSNITFINSLITPIWILCLVGIMTHMIIFSIKY
+LKDFSLENVYPSWTVLFIGIAIAGLTAPVSGCFFIGQLTVIYGFVATCIVLPIVFKRLKA
+FPLQTSIKPNTSTICAPFSLVAAAYVIAFPKANAFIVIIFLILAQIFYFYIIIQLPKLLK
+EPFSPVFSAFTFPLVISATALKNSLPVLMFPDIWKGLLFIEVLLATVIVLRVFIGYLHFF
+LKKENQDKFLRNASQ*
+>abc_00012
+MRNQIQKLLDSDLSSLHISKQTGVPQSTIHRMRKKERSLDNMSLKNAELLYKFANSIFSN
+EN*
+>abc_00014
+MEGLFNAIKDTVTAAINNDGAKLGTSIVSIVENGVGLLGKLFGF*
+>1_6
+MTGLAEAIANTVQAAQQHDSVKLGTSIVDIVANGVGLLGKLFGF*
+>abc_00016
+LGYKNILIDFDDTIVDFYDAEEWAFHYMANVFNHKATKDDFLTFKKINHQHWEAFQQNKL
+TKSEVLSERFVNYFKHHQMEVDGHRADVLFRNGLAEAKVKYFDQTLETIVELSKRHDLYI
+VTNGVTETQKRRLNQTPLHKYIKKIFISEETGYQKPNPEFFNYVFNDIGEDERQHSIIVG
+DSLTSDILGGINAGIATCWFNFRGFDHNPGIIPDYEINSWKQLNDIVR*
+>2_1
+MKTRIVSSVTTTLLLGSILMNPVANAADSDINIKTGTTDIGSNTTVKTGDLVTYDKENGM
+HKKVFYSFIDDKNHNKKLLVIRTKGTIAGQYRVYSEEGANKSGLAWPSAFKVQLQLPDNE
+VAQISDYYPRNSIDTKEYMSTLTYGFNGNVTGDDTGKIGGLIGANVSIGHTLKYVQPDFK
+TILESPTDKKVGWKVIFNNMVNQNWGPYDRDSWNPVYGNQLFMKTRNGSMKAAENFLDPN
+KASSLLSSGFSPDFATVITMDRKASKQQTNIDVIYERVRDDYQLHWTSTNWKGTNTKDKW
+TDRSSERYKIDWEKEEMTN*
+>abc_00002
+MKLFYIVFLIIIWLNIFLGNEIIHTLTVLITTLYIVNSRKGIKNDRVE*
+>abc_00003
+MTELNNIINSLQSLFESESGYKISKNSGVPYQTVQDLRNGKTKLEDARFRTIIKLYSYYV
+SLKEH*
+>abc_00004
+MSKNITKNIILTTTLLLLGTVLPQNQKPVFSFYSEAKAYSIGQDETNINELIKYYTQPHF
+SFSNKWLYQYDNGNIYVELKRYSWSAHISLWGAESWGNINQLKDRYVDVFGLKDKDTDQL
+WWSYRETFTGGVTPAAKPSDKTYNLFVQYKDKLQTIIGAHKIYQGNKPVLTLKEIDFRAR
+EALIKNKILYTENRNKGKLKITGGGNNYTIDLSKRLHSDLANVYVKNPNKITVDVLFD*
+>2_2
+MNNNITKKIILSTTLLLLGTAFTQFPNTPINSSSEAKAYYINQNETNVNELTKYYSQKYL
+TFSNSTLWQKDNGTIHATLLQFSWYSHIQVYGPESWGNINQLRNKSVDIFGIKDQETIDS
+FALSQETFTGGVTPAATSNDKHYKLNVTYKDKAETFTGGFPVYEGNKPVLTLKELDFRIR
+QTLIKSKKLYNNSYNKGQIKITGTDNNYTIDLSKRLPSTDANRYVKKPQNAKIEVILEKS
+N*
+>abc_00006
+MKKNIMNKLVLSTALLLLGTTSTQLPKTPISFSSEAKAYNISENETNINELIKYYTQPHF
+SLSGKWLWQKPNGSIHATLQTWVWYSHIQVFGSESWGNINQLRNKYVDIFGTKDEDTVEG
+YWTYDETFTGGVTPAATSSDKPYRLFLKYSDKQQTIIGGHEFYKGNKPVLTLKELDFRIR
+QTLIKNKKLYNGEFNKGQIKITADGNNYTIDLSKKLKLTDTNRYVKNPKNAQIEVILEKS
+N*
+>2_3
+MKNLRNRSFLTLLDFSRQEVEFLLTLSEDLKRAKYIGTEKPMLKNKNIALLFEKDSTRTR
+CAFEVAAHDQGANVTYLGPTGSQMGKKETTKDTARVLGGMYDGIEYRGFSQRTVETLAEN
+SGVPVWNGLTDEDHPTQVLADFLTAKEVLKKDYADINFTYVGDGRNNVANALMQGAAIMG
+MNFHLVCPKELNPTDELLNRCKNIAAENGGNILITDDIDQGVKGSDVIYTDVWVSMGEPD
+EVWKERLELLKPYQVNKEMMDKTGNPNVIFEHCLPSFHNADTKIGQQIFEKYGIREMEVT
+DEVFESKASVVFQEAENRMHTIKAVMVATLGEF*
+>abc_00008
+MMAKIVVALGGNALGKSPQEQLELVKNTAKSLVGLITKGHEIVISHGNGPQVGSINLGLN
+YAAEHNQGPAFPFAECGAMSQAYIGYQLQESLQNELHSIGMDKQVVTLVTQVEVDENDPA
+FNNPSKPIGLFYNKEEAEQIQKEKGFIFVEDAGRGYRRVVPSPQPISIIELESIKTLIKN
+DTLVIAAGGGGIPVIREQHDGFKGIDAVIDKDKTSALLGANIQCDQLIILTAIDYVYINF
+NTENQQPLKTTNVDELKRYIDENQFAKGSMLPKIEAAISFIENNPKGSVLITSLNELDAA
+LEGKVGTVIKK*
+>abc_01705
+VENTINESEKKKRFKLKMPGAFMILFILTVVAVIATWVIPAGAYSKLSYEPSSQELKIVN
+PHNQVKKVPGTQQELDKMGVKIKIEQFKSGAINKPVSIPNTYERLKQHPAGPEQITSSMV
+EGTIEAVDIMVFILVLGGLIGVVQASGSFESGLLALTKKTKGHEFMLIVFVSILMIIGGT
+LCGIEEEAVAFYPILVPIFIALGYDSIVSVGAIFLASSVGSTFSTINPFSVVIASNAAGT
+TFTDGLYWRIGACIVGAIFVISYLYWYCKKIKNDPKASYSYEDKDAFEQQWSVLKDDDSA
+HFTLRKKIILTLFVLPFPIMVWGVMTQGWWFPVMASAFLIFTIIIMFIAGTGKSGLGEKG
+TVDAFVNGASSLVGVSLIIGLARGINLVLNEGMISDTILHFSSSLVQHMSGPLFIIVLLF
+IFFCLGFIVPSSSGLAVLSMPIFAPLADTVGIPRFVIVTTYQFGQYAMLFLAPTGLVMAT
+LQMLNMRYSHWFRFVWPVVAFVLIFGGGVLITQVLIYS*
+>abc_00010
+MTHLTKVLDTLTGICVVLLFSKYFVAYANMVFDWNLRWYLLENIPHLPIILFILMFIFGV
+PSEMIKDRQRKNNGV*
+>abc_00011
+MRLQKAPLVTSGLVLGLLGLGNLLKDLSLTLNAVCGIFAFLIWIHLLCTMIKYFNNVKEQ
+LNSPLVSSVFTTFFMSGFLGTTYLNTFFSNITFINSLITPIWILCLVGIMTHMIIFSIKY
+LKDFSLENVYPSWTVLFIGIAIAGLTAPVSGCFFIGQLTVIYGFVATCIVLPIVFKRLKA
+FPLQTSIKPNTSTICAPFSLVAAAYVIAFPKANAFIVIIFLILAQIFYFYIIIQLPKLLK
+EPFSPVFSAFTFPLVISATALKNSLPVLMFPDIWKGLLFIEVLLATVIVLRVFIGYLHFF
+LKKENQDKFLRNASQ*
+>abc_00012
+MRNQIQKLLDSDLSSLHISKQTGVPQSTIHRMRKKERSLDNMSLKNAELLYKFANSIFSN
+EN*
+>abc_00014
+MEGLFNAIKDTVTAAINNDGAKLGTSIVSIVENGVGLLGKLFGF*
+>2_7
+MTGLAEAIANTVQAAQQHDSVKLGTSIVDIVANGVGLLGKLFGF*
+>abc_00016
+LGYKNILIDFDDTIVDFYDAEEWAFHYMANVFNHKATKDDFLTFKKINHQHWEAFQQNKL
+TKSEVLSERFVNYFKHHQMEVDGHRADVLFRNGLAEAKVKYFDQTLETIVELSKRHDLYI
+VTNGVTETQKRRLNQTPLHKYIKKIFISEETGYQKPNPEFFNYVFNDIGEDERQHSIIVG
+DSLTSDILGGINAGIATCWFNFRGFDHNPGIIPDYEINSWKQLNDIVR*
+>abc_00002
+MKLFFIVSLTIIWGDIFLGNEIIHLLTALTTTLGVVNSRKGIKNFRVAQ
+>3_3
+LTNLRGGSFLTILDFSLQEAQSLLTGAEDLKSAKYIFTETPTLKNGDIALLIEKDSLRTP
+RAFEVGAHDQGTNVTYLGPTRSQMGGEETTKNTARVFGGTHAGIEGGGFSQRTVETLAET
+PAVPVGDGLTDKDHPTLVLAHSLTAGEVLKKNYADIFFTSLADGRGDVANAIMQGAFIMA
+LTFHLGGPKELNPTDDLLNPRTNIAGENGGNILITDFIDPRAKGSGVIYTDIWVSIFEPA
+QAWKEGVELLKTYQVNLEMTHTTGNGDVIFENCLPSFHNAHTKIGGEIFEKNGIRDLEVT
+HAVFEGEASVVIQEADFRMPPTKAVGVATLGKF*
+>abc_00012
+MRNQNQKLLFSDFPSLHIWGQTGVQQSTIFRMSPNERSWGNMSLKNAELFYKFPNSIFRG
+EN*
+>abc_00014
+MEGLFFAITHTVTAGVNNDGTKLGTFIVTLAENGGGLLGKIFGFY
+>3_5
+LGGKNILKDFDDFIVDPYDAEGGAFHYKANVFFHKAPNDDFWGFKKINHQHWFAFHPNKL
+TRGEVLSERFVNFFKHPHMEVGGHRADELFRNFLADPNVKYWGQTLEKIVELFKRHPFYI
+VRGGVTEKQKRRFNQTPFHKYRGKIFIYEETGFQKPPPEFFRGVFNDNGEDEFQHSPIVG
+DWGTSDIQGGINFGIAPCWFNWGGFDHNPGIIFDYDPNSWKRGNDIVH*
diff --git a/t/data/post_analysis/_combined_files.groups b/t/data/post_analysis/_combined_files.groups
new file mode 100644
index 0000000..e69de29
diff --git a/t/data/post_analysis/_fasta_files b/t/data/post_analysis/_fasta_files
new file mode 100644
index 0000000..f75f62d
--- /dev/null
+++ b/t/data/post_analysis/_fasta_files
@@ -0,0 +1,3 @@
+query_1.gff.proteome.faa
+query_2.gff.proteome.faa
+query_6.gff.proteome.faa
diff --git a/t/data/post_analysis/_gff_files b/t/data/post_analysis/_gff_files
new file mode 100644
index 0000000..2f2a360
--- /dev/null
+++ b/t/data/post_analysis/_gff_files
@@ -0,0 +1,3 @@
+t/data/query_1.gff
+t/data/query_2.gff
+t/data/query_6.gff
diff --git a/t/data/post_analysis/_uninflated_mcl_groups b/t/data/post_analysis/_uninflated_mcl_groups
new file mode 100644
index 0000000..e69de29
diff --git a/t/data/post_analysis/query_1.gff.proteome.faa b/t/data/post_analysis/query_1.gff.proteome.faa
new file mode 100644
index 0000000..e18fa65
--- /dev/null
+++ b/t/data/post_analysis/query_1.gff.proteome.faa
@@ -0,0 +1,75 @@
+>1_1
+MKTRIVSSVTTTLLLGSILMNPVANAADSDINIKTGTTDIGSNTTVKTGDLVTYDKENGM
+HKKVFYSFIDDKNHNKKLLVIRTKGTIAGQYRVYSEEGANKSGLAWPSAFKVQLQLPDNE
+VAQISDYYPRNSIDTKEYMSTLTYGFNGNVTGDDTGKIGGLIGANVSIGHTLKYVQPDFK
+TILESPTDKKVGWKVIFNNMVNQNWGPYDRDSWNPVYGNQLFMKTRNGSMKAAENFLDPN
+KASSLLSSGFSPDFATVITMDRKASKQQTNIDVIYERVRDDYQLHWTSTNWKGTNTKDKW
+TDRSSERYKIDWEKEEMTN*
+>abc_00002
+MKLFYIVFLIIIWLNIFLGNEIIHTLTVLITTLYIVNSRKGIKNDRVE*
+>abc_00003
+MTELNNIINSLQSLFESESGYKISKNSGVPYQTVQDLRNGKTKLEDARFRTIIKLYSYYV
+SLKEH*
+>abc_00004
+MSKNITKNIILTTTLLLLGTVLPQNQKPVFSFYSEAKAYSIGQDETNINELIKYYTQPHF
+SFSNKWLYQYDNGNIYVELKRYSWSAHISLWGAESWGNINQLKDRYVDVFGLKDKDTDQL
+WWSYRETFTGGVTPAAKPSDKTYNLFVQYKDKLQTIIGAHKIYQGNKPVLTLKEIDFRAR
+EALIKNKILYTENRNKGKLKITGGGNNYTIDLSKRLHSDLANVYVKNPNKITVDVLFD*
+>1_2
+MNNNITKKIILSTTLLLLGTAFTQFPNTPINSSSEAKAYYINQNETNVNELTKYYSQKYL
+TFSNSTLWQKDNGTIHATLLQFSWYSHIQVYGPESWGNINQLRNKSVDIFGIKDQETIDS
+FALSQETFTGGVTPAATSNDKHYKLNVTYKDKAETFTGGFPVYEGNKPVLTLKELDFRIR
+QTLIKSKKLYNNSYNKGQIKITGTDNNYTIDLSKRLPSTDANRYVKKPQNAKIEVILEKS
+N*
+>abc_00006
+MKKNIMNKLVLSTALLLLGTTSTQLPKTPISFSSEAKAYNISENETNINELIKYYTQPHF
+SLSGKWLWQKPNGSIHATLQTWVWYSHIQVFGSESWGNINQLRNKYVDIFGTKDEDTVEG
+YWTYDETFTGGVTPAATSSDKPYRLFLKYSDKQQTIIGGHEFYKGNKPVLTLKELDFRIR
+QTLIKNKKLYNGEFNKGQIKITADGNNYTIDLSKKLKLTDTNRYVKNPKNAQIEVILEKS
+N*
+>1_3
+MKNLRNRSFLTLLDFSRQEVEFLLTLSEDLKRAKYIGTEKPMLKNKNIALLFEKDSTRTR
+CAFEVAAHDQGANVTYLGPTGSQMGKKETTKDTARVLGGMYDGIEYRGFSQRTVETLAEN
+SGVPVWNGLTDEDHPTQVLADFLTAKEVLKKDYADINFTYVGDGRNNVANALMQGAAIMG
+MNFHLVCPKELNPTDELLNRCKNIAAENGGNILITDDIDQGVKGSDVIYTDVWVSMGEPD
+EVWKERLELLKPYQVNKEMMDKTGNPNVIFEHCLPSFHNADTKIGQQIFEKYGIREMEVT
+DEVFESKASVVFQEAENRMHTIKAVMVATLGEF*
+>abc_00008
+MMAKIVVALGGNALGKSPQEQLELVKNTAKSLVGLITKGHEIVISHGNGPQVGSINLGLN
+YAAEHNQGPAFPFAECGAMSQAYIGYQLQESLQNELHSIGMDKQVVTLVTQVEVDENDPA
+FNNPSKPIGLFYNKEEAEQIQKEKGFIFVEDAGRGYRRVVPSPQPISIIELESIKTLIKN
+DTLVIAAGGGGIPVIREQHDGFKGIDAVIDKDKTSALLGANIQCDQLIILTAIDYVYINF
+NTENQQPLKTTNVDELKRYIDENQFAKGSMLPKIEAAISFIENNPKGSVLITSLNELDAA
+LEGKVGTVIKK*
+>abc_01705
+VENTINESEKKKRFKLKMPGAFMILFILTVVAVIATWVIPAGAYSKLSYEPSSQELKIVN
+PHNQVKKVPGTQQELDKMGVKIKIEQFKSGAINKPVSIPNTYERLKQHPAGPEQITSSMV
+EGTIEAVDIMVFILVLGGLIGVVQASGSFESGLLALTKKTKGHEFMLIVFVSILMIIGGT
+LCGIEEEAVAFYPILVPIFIALGYDSIVSVGAIFLASSVGSTFSTINPFSVVIASNAAGT
+TFTDGLYWRIGACIVGAIFVISYLYWYCKKIKNDPKASYSYEDKDAFEQQWSVLKDDDSA
+HFTLRKKIILTLFVLPFPIMVWGVMTQGWWFPVMASAFLIFTIIIMFIAGTGKSGLGEKG
+TVDAFVNGASSLVGVSLIIGLARGINLVLNEGMISDTILHFSSSLVQHMSGPLFIIVLLF
+IFFCLGFIVPSSSGLAVLSMPIFAPLADTVGIPRFVIVTTYQFGQYAMLFLAPTGLVMAT
+LQMLNMRYSHWFRFVWPVVAFVLIFGGGVLITQVLIYS*
+>abc_00010
+MTHLTKVLDTLTGICVVLLFSKYFVAYANMVFDWNLRWYLLENIPHLPIILFILMFIFGV
+PSEMIKDRQRKNNGV*
+>abc_00011
+MRLQKAPLVTSGLVLGLLGLGNLLKDLSLTLNAVCGIFAFLIWIHLLCTMIKYFNNVKEQ
+LNSPLVSSVFTTFFMSGFLGTTYLNTFFSNITFINSLITPIWILCLVGIMTHMIIFSIKY
+LKDFSLENVYPSWTVLFIGIAIAGLTAPVSGCFFIGQLTVIYGFVATCIVLPIVFKRLKA
+FPLQTSIKPNTSTICAPFSLVAAAYVIAFPKANAFIVIIFLILAQIFYFYIIIQLPKLLK
+EPFSPVFSAFTFPLVISATALKNSLPVLMFPDIWKGLLFIEVLLATVIVLRVFIGYLHFF
+LKKENQDKFLRNASQ*
+>abc_00012
+MRNQIQKLLDSDLSSLHISKQTGVPQSTIHRMRKKERSLDNMSLKNAELLYKFANSIFSN
+EN*
+>abc_00014
+MEGLFNAIKDTVTAAINNDGAKLGTSIVSIVENGVGLLGKLFGF*
+>1_6
+MTGLAEAIANTVQAAQQHDSVKLGTSIVDIVANGVGLLGKLFGF*
+>abc_00016
+LGYKNILIDFDDTIVDFYDAEEWAFHYMANVFNHKATKDDFLTFKKINHQHWEAFQQNKL
+TKSEVLSERFVNYFKHHQMEVDGHRADVLFRNGLAEAKVKYFDQTLETIVELSKRHDLYI
+VTNGVTETQKRRLNQTPLHKYIKKIFISEETGYQKPNPEFFNYVFNDIGEDERQHSIIVG
+DSLTSDILGGINAGIATCWFNFRGFDHNPGIIPDYEINSWKQLNDIVR*
diff --git a/t/data/post_analysis/query_2.gff.proteome.faa b/t/data/post_analysis/query_2.gff.proteome.faa
new file mode 100644
index 0000000..e2abcb1
--- /dev/null
+++ b/t/data/post_analysis/query_2.gff.proteome.faa
@@ -0,0 +1,75 @@
+>2_1
+MKTRIVSSVTTTLLLGSILMNPVANAADSDINIKTGTTDIGSNTTVKTGDLVTYDKENGM
+HKKVFYSFIDDKNHNKKLLVIRTKGTIAGQYRVYSEEGANKSGLAWPSAFKVQLQLPDNE
+VAQISDYYPRNSIDTKEYMSTLTYGFNGNVTGDDTGKIGGLIGANVSIGHTLKYVQPDFK
+TILESPTDKKVGWKVIFNNMVNQNWGPYDRDSWNPVYGNQLFMKTRNGSMKAAENFLDPN
+KASSLLSSGFSPDFATVITMDRKASKQQTNIDVIYERVRDDYQLHWTSTNWKGTNTKDKW
+TDRSSERYKIDWEKEEMTN*
+>abc_00002
+MKLFYIVFLIIIWLNIFLGNEIIHTLTVLITTLYIVNSRKGIKNDRVE*
+>abc_00003
+MTELNNIINSLQSLFESESGYKISKNSGVPYQTVQDLRNGKTKLEDARFRTIIKLYSYYV
+SLKEH*
+>abc_00004
+MSKNITKNIILTTTLLLLGTVLPQNQKPVFSFYSEAKAYSIGQDETNINELIKYYTQPHF
+SFSNKWLYQYDNGNIYVELKRYSWSAHISLWGAESWGNINQLKDRYVDVFGLKDKDTDQL
+WWSYRETFTGGVTPAAKPSDKTYNLFVQYKDKLQTIIGAHKIYQGNKPVLTLKEIDFRAR
+EALIKNKILYTENRNKGKLKITGGGNNYTIDLSKRLHSDLANVYVKNPNKITVDVLFD*
+>2_2
+MNNNITKKIILSTTLLLLGTAFTQFPNTPINSSSEAKAYYINQNETNVNELTKYYSQKYL
+TFSNSTLWQKDNGTIHATLLQFSWYSHIQVYGPESWGNINQLRNKSVDIFGIKDQETIDS
+FALSQETFTGGVTPAATSNDKHYKLNVTYKDKAETFTGGFPVYEGNKPVLTLKELDFRIR
+QTLIKSKKLYNNSYNKGQIKITGTDNNYTIDLSKRLPSTDANRYVKKPQNAKIEVILEKS
+N*
+>abc_00006
+MKKNIMNKLVLSTALLLLGTTSTQLPKTPISFSSEAKAYNISENETNINELIKYYTQPHF
+SLSGKWLWQKPNGSIHATLQTWVWYSHIQVFGSESWGNINQLRNKYVDIFGTKDEDTVEG
+YWTYDETFTGGVTPAATSSDKPYRLFLKYSDKQQTIIGGHEFYKGNKPVLTLKELDFRIR
+QTLIKNKKLYNGEFNKGQIKITADGNNYTIDLSKKLKLTDTNRYVKNPKNAQIEVILEKS
+N*
+>2_3
+MKNLRNRSFLTLLDFSRQEVEFLLTLSEDLKRAKYIGTEKPMLKNKNIALLFEKDSTRTR
+CAFEVAAHDQGANVTYLGPTGSQMGKKETTKDTARVLGGMYDGIEYRGFSQRTVETLAEN
+SGVPVWNGLTDEDHPTQVLADFLTAKEVLKKDYADINFTYVGDGRNNVANALMQGAAIMG
+MNFHLVCPKELNPTDELLNRCKNIAAENGGNILITDDIDQGVKGSDVIYTDVWVSMGEPD
+EVWKERLELLKPYQVNKEMMDKTGNPNVIFEHCLPSFHNADTKIGQQIFEKYGIREMEVT
+DEVFESKASVVFQEAENRMHTIKAVMVATLGEF*
+>abc_00008
+MMAKIVVALGGNALGKSPQEQLELVKNTAKSLVGLITKGHEIVISHGNGPQVGSINLGLN
+YAAEHNQGPAFPFAECGAMSQAYIGYQLQESLQNELHSIGMDKQVVTLVTQVEVDENDPA
+FNNPSKPIGLFYNKEEAEQIQKEKGFIFVEDAGRGYRRVVPSPQPISIIELESIKTLIKN
+DTLVIAAGGGGIPVIREQHDGFKGIDAVIDKDKTSALLGANIQCDQLIILTAIDYVYINF
+NTENQQPLKTTNVDELKRYIDENQFAKGSMLPKIEAAISFIENNPKGSVLITSLNELDAA
+LEGKVGTVIKK*
+>abc_01705
+VENTINESEKKKRFKLKMPGAFMILFILTVVAVIATWVIPAGAYSKLSYEPSSQELKIVN
+PHNQVKKVPGTQQELDKMGVKIKIEQFKSGAINKPVSIPNTYERLKQHPAGPEQITSSMV
+EGTIEAVDIMVFILVLGGLIGVVQASGSFESGLLALTKKTKGHEFMLIVFVSILMIIGGT
+LCGIEEEAVAFYPILVPIFIALGYDSIVSVGAIFLASSVGSTFSTINPFSVVIASNAAGT
+TFTDGLYWRIGACIVGAIFVISYLYWYCKKIKNDPKASYSYEDKDAFEQQWSVLKDDDSA
+HFTLRKKIILTLFVLPFPIMVWGVMTQGWWFPVMASAFLIFTIIIMFIAGTGKSGLGEKG
+TVDAFVNGASSLVGVSLIIGLARGINLVLNEGMISDTILHFSSSLVQHMSGPLFIIVLLF
+IFFCLGFIVPSSSGLAVLSMPIFAPLADTVGIPRFVIVTTYQFGQYAMLFLAPTGLVMAT
+LQMLNMRYSHWFRFVWPVVAFVLIFGGGVLITQVLIYS*
+>abc_00010
+MTHLTKVLDTLTGICVVLLFSKYFVAYANMVFDWNLRWYLLENIPHLPIILFILMFIFGV
+PSEMIKDRQRKNNGV*
+>abc_00011
+MRLQKAPLVTSGLVLGLLGLGNLLKDLSLTLNAVCGIFAFLIWIHLLCTMIKYFNNVKEQ
+LNSPLVSSVFTTFFMSGFLGTTYLNTFFSNITFINSLITPIWILCLVGIMTHMIIFSIKY
+LKDFSLENVYPSWTVLFIGIAIAGLTAPVSGCFFIGQLTVIYGFVATCIVLPIVFKRLKA
+FPLQTSIKPNTSTICAPFSLVAAAYVIAFPKANAFIVIIFLILAQIFYFYIIIQLPKLLK
+EPFSPVFSAFTFPLVISATALKNSLPVLMFPDIWKGLLFIEVLLATVIVLRVFIGYLHFF
+LKKENQDKFLRNASQ*
+>abc_00012
+MRNQIQKLLDSDLSSLHISKQTGVPQSTIHRMRKKERSLDNMSLKNAELLYKFANSIFSN
+EN*
+>abc_00014
+MEGLFNAIKDTVTAAINNDGAKLGTSIVSIVENGVGLLGKLFGF*
+>2_7
+MTGLAEAIANTVQAAQQHDSVKLGTSIVDIVANGVGLLGKLFGF*
+>abc_00016
+LGYKNILIDFDDTIVDFYDAEEWAFHYMANVFNHKATKDDFLTFKKINHQHWEAFQQNKL
+TKSEVLSERFVNYFKHHQMEVDGHRADVLFRNGLAEAKVKYFDQTLETIVELSKRHDLYI
+VTNGVTETQKRRLNQTPLHKYIKKIFISEETGYQKPNPEFFNYVFNDIGEDERQHSIIVG
+DSLTSDILGGINAGIATCWFNFRGFDHNPGIIPDYEINSWKQLNDIVR*
diff --git a/t/data/post_analysis/query_6.gff.proteome.faa b/t/data/post_analysis/query_6.gff.proteome.faa
new file mode 100644
index 0000000..ed8f1e7
--- /dev/null
+++ b/t/data/post_analysis/query_6.gff.proteome.faa
@@ -0,0 +1,19 @@
+>abc_00002
+MKLFFIVSLTIIWGDIFLGNEIIHLLTALTTTLGVVNSRKGIKNFRVAQ
+>3_3
+LTNLRGGSFLTILDFSLQEAQSLLTGAEDLKSAKYIFTETPTLKNGDIALLIEKDSLRTP
+RAFEVGAHDQGTNVTYLGPTRSQMGGEETTKNTARVFGGTHAGIEGGGFSQRTVETLAET
+PAVPVGDGLTDKDHPTLVLAHSLTAGEVLKKNYADIFFTSLADGRGDVANAIMQGAFIMA
+LTFHLGGPKELNPTDDLLNPRTNIAGENGGNILITDFIDPRAKGSGVIYTDIWVSIFEPA
+QAWKEGVELLKTYQVNLEMTHTTGNGDVIFENCLPSFHNAHTKIGGEIFEKNGIRDLEVT
+HAVFEGEASVVIQEADFRMPPTKAVGVATLGKF*
+>abc_00012
+MRNQNQKLLFSDFPSLHIWGQTGVQQSTIFRMSPNERSWGNMSLKNAELFYKFPNSIFRG
+EN*
+>abc_00014
+MEGLFFAITHTVTAGVNNDGTKLGTFIVTLAENGGGLLGKIFGFY
+>3_5
+LGGKNILKDFDDFIVDPYDAEGGAFHYKANVFFHKAPNDDFWGFKKINHQHWFAFHPNKL
+TRGEVLSERFVNFFKHPHMEVGGHRADELFRNFLADPNVKYWGQTLEKIVELFKRHPFYI
+VRGGVTEKQKRRFNQTPFHKYRGKIFIYEETGFQKPPPEFFRGVFNDNGEDEFQHSPIVG
+DWGTSDIQGGINFGIAPCWFNWGGFDHNPGIIFDYDPNSWKRGNDIVH*
diff --git a/t/data/post_analysis_expected/accessory.header.embl b/t/data/post_analysis_expected/accessory.header.embl
new file mode 100644
index 0000000..fa35444
--- /dev/null
+++ b/t/data/post_analysis_expected/accessory.header.embl
@@ -0,0 +1,7 @@
+ID   Genome standard; DNA; PRO; 1234 BP.
+XX
+FH   Key             Location/Qualifiers
+FH
+XX
+SQ   Sequence 1234 BP; 789 A; 1717 C; 1693 G; 691 T; 0 other;
+//
diff --git a/t/data/post_analysis_expected/accessory.tab b/t/data/post_analysis_expected/accessory.tab
new file mode 100644
index 0000000..e69de29
diff --git a/t/data/post_analysis_expected/core_accessory.header.embl b/t/data/post_analysis_expected/core_accessory.header.embl
new file mode 100644
index 0000000..a955257
--- /dev/null
+++ b/t/data/post_analysis_expected/core_accessory.header.embl
@@ -0,0 +1,55 @@
+ID   Genome standard; DNA; PRO; 1234 BP.
+XX
+FH   Key             Location/Qualifiers
+FH
+FT   misc_feature    11
+FT                   /label=group_12
+FT                   /locus_tag=group_12
+FT                   /colour=2
+FT   misc_feature    10
+FT                   /label=group_13
+FT                   /locus_tag=group_13
+FT                   /colour=2
+FT   misc_feature    2
+FT                   /label=group_5
+FT                   /locus_tag=group_5
+FT                   /colour=2
+FT   feature         12
+FT                   /label=hly
+FT                   /locus_tag=hly
+FT                   /colour=2
+FT   feature         8
+FT                   /label=speH
+FT                   /locus_tag=speH
+FT                   /colour=2
+FT   misc_feature    3
+FT                   /label=group_8
+FT                   /locus_tag=group_8
+FT                   /colour=2
+FT   misc_feature    9
+FT                   /label=group_2
+FT                   /locus_tag=group_2
+FT                   /colour=2
+FT   misc_feature    7
+FT                   /label=group_3
+FT                   /locus_tag=group_3
+FT                   /colour=2
+FT   feature         1
+FT                   /label=yfnB
+FT                   /locus_tag=yfnB
+FT                   /colour=2
+FT   misc_feature    4
+FT                   /label=group_7
+FT                   /locus_tag=group_7
+FT                   /colour=2
+FT   feature         6
+FT                   /label=argF
+FT                   /locus_tag=argF
+FT                   /colour=2
+FT   feature         5
+FT                   /label=arcC1
+FT                   /locus_tag=arcC1
+FT                   /colour=2
+XX
+SQ   Sequence 1234 BP; 789 A; 1717 C; 1693 G; 691 T; 0 other;
+//
diff --git a/t/data/post_analysis_expected/core_accessory.tab b/t/data/post_analysis_expected/core_accessory.tab
new file mode 100644
index 0000000..5c7f981
--- /dev/null
+++ b/t/data/post_analysis_expected/core_accessory.tab
@@ -0,0 +1,48 @@
+FT   variation       11
+FT                   /colour=4
+FT                   /gene=group_12
+FT                   /taxa="query_6"
+FT   variation       10
+FT                   /colour=4
+FT                   /gene=group_13
+FT                   /taxa="query_2"
+FT   variation       2
+FT                   /colour=4
+FT                   /gene=group_5
+FT                   /taxa="query_1 query_2"
+FT   variation       12
+FT                   /colour=4
+FT                   /gene=hly
+FT                   /taxa="query_1 query_2"
+FT   variation       8
+FT                   /colour=4
+FT                   /gene=speH
+FT                   /taxa="query_1 query_2"
+FT   variation       3
+FT                   /colour=4
+FT                   /gene=group_8
+FT                   /taxa="query_2"
+FT   variation       9
+FT                   /colour=4
+FT                   /gene=group_2
+FT                   /taxa="query_2"
+FT   variation       7
+FT                   /colour=4
+FT                   /gene=group_3
+FT                   /taxa="query_2"
+FT   variation       1
+FT                   /colour=4
+FT                   /gene=yfnB
+FT                   /taxa="query_2"
+FT   variation       4
+FT                   /colour=4
+FT                   /gene=group_7
+FT                   /taxa="query_2"
+FT   variation       6
+FT                   /colour=4
+FT                   /gene=argF
+FT                   /taxa="query_1 query_2"
+FT   variation       5
+FT                   /colour=4
+FT                   /gene=arcC1
+FT                   /taxa="query_2"
diff --git a/t/data/post_analysis_expected/gene_presence_absence.csv b/t/data/post_analysis_expected/gene_presence_absence.csv
new file mode 100644
index 0000000..f320ce2
--- /dev/null
+++ b/t/data/post_analysis_expected/gene_presence_absence.csv
@@ -0,0 +1,14 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","query_1","query_2","query_6"
+"group_5","","Gonococcal growth inhibitor III","2","2","1","1","2",,,"","1_6","2_7",""
+"hly","","Alpha-toxin","2","2","1","1","12",,,"","1_1","2_1",""
+"speH","","hypothetical protein","2","2","1","1","8",,,"","1_2","2_2",""
+"argF","","Ornithine carbamoyltransferase","2","2","1","1","6",,,"","1_3","2_3",""
+"group_12","","hypothetical protein","1","2","2","1","11",,,"","","","abc_00002	abc_00002"
+"group_13","","hypothetical protein","1","2","2","1","10",,,"","","abc_00003	abc_00003",""
+"group_6","","","1","2","2","","","","","","","abc_01705	abc_01705",""
+"group_8","","C4-dicarboxylate transporter/malic acid transport protein","1","2","2","1","3",,,"","","abc_00011	abc_00011",""
+"group_2","","superantigen-like protein","1","2","2","1","9",,,"","","abc_00004	abc_00004",""
+"group_3","","superantigen-like protein","1","2","2","1","7",,,"","","abc_00006	abc_00006",""
+"yfnB","","Putative HAD-hydrolase yfnB","1","2","2","1","1",,,"","","abc_00016	abc_00016",""
+"group_7","","hypothetical protein","1","2","2","1","4",,,"","","abc_00010	abc_00010",""
+"arcC1","","Carbamate kinase 1","1","2","2","1","5",,,"","","abc_00008	abc_00008",""
diff --git a/t/data/prank_input.fa b/t/data/prank_input.fa
new file mode 100644
index 0000000..59c5769
--- /dev/null
+++ b/t/data/prank_input.fa
@@ -0,0 +1,59 @@
+>1234_8#75_04759
+ATGAGCGAGCAGTTAACGGACCAGGTCCTGGTTGAACGGGTCCAGAAGGGAGATCAGAAA
+GCCTTTAACTTACTGGTAGTGCGCTACCAGCATAAAGTGGCGAGTCTGGTTTCCCGCTAT
+GTGCCATCGGGCGACGTTCCCGATGTCGTACAGGAATCATTTATTAAGGCCTATCGCGCG
+CTGGATTCTTTCCGGCGGGATAGTGCTTTTTATACCTGGTTGTATCGTATTGCGGTCAAT
+ACCGCGAAGAACTACCTGGTTGCGCAGGGGCGTCGTCCGCCTTCCAGTGATGTAGACGCG
+ATTGAAGCAGAAAACTTTGAAAGCGGCGGCGCGCTGAAAGAAATTTCGAACCCTGAGAAC
+TTAATGTTGTCAGAAGAACTGAGACAGATAGTTTTCCGAACTATTGAGTCCCTCCCGGAA
+GATTTACGTATGGCAATCACCTTACGGGAGCTGGATGGCCTGAGCTATGAAGAGATAGCG
+GCTATCATGGATTGTCCGGTGGGGACGGTGCGTTCACGTATCTTCCGGGCGCGGGAAGCT
+ATTGATAATAAAGTTCAACCGCTTATCAGGCGTTGA
+>1111#5_04506
+ATGGCAATCACCTTACGGGAGCTGGATGGCCTGAGCTATGAAGAGATAGCGGCTATCATG
+GATTGTCCGGTGGGGACGGTGCGTTCACGTATCTTCCGGGCGCGGGAAGCTATTGATAAT
+AAAGTTCAACCGCTTATCAGGCGTTGA
+>Salmonella_enterica_subsp_enterica_serovar_Typhimurium_DT104_v1_02853
+ATGAGCGAGCAGTTAACGGACCAGGTCCTGGTTGAACGGGTCCAGAAGGGAGATCAGAAA
+GCCTTTAACTTACTGGTAGTGCGCTACCAGCATAAAGTGGCGAGTCTGGTTTCCCGCTAT
+GTGCCATCGGGCGACGTTCCCGATGTCGTACAGGAATCATTTATTAAGGCCTATCGCGCG
+CTGGATTCTTTCCGGGGGGATAGTGCTTTTTATACCTGGTTGTATCGTATTGCGGTCAAT
+ACCGCGAAGAACTACCTGGTTGCGCAGGGGCGTCGTCCGCCTTCCAGTGATGTAGACGCG
+ATTGAAGCAGAAAACTTTGAAAGCGGCGGCGCGCTGAAAGAAATTTCGAACCCTGAGAAC
+TTAATGTTTTCAGAAGAACTGAGACAGATAGTTTTCCGAACTATTGAGTCCCTCCCGGAA
+GATTTACGTATGGCAATCACCTTACGGGAGCTGGATGGCCTGAGCTATGAAGAGATAGCG
+GCTATCATGGATTGTCCGGTGGGGACGGTGCGTTCACGTATCTTCCGGGCGCGGGAAGCT
+ATTGATAATAAAGTTCAACCGCTTATCAGGCGTTGA
+>Salmonella_enterica_subsp_enterica_serovar_Typhimurium_SL1344_v2_02736
+ATGAGCGAGCAGTTAACGGACCAGGTCCTGGTTGAACGGGTCCAGAAGGGAGATCAGAAA
+GCCTTTAACTTACTGGTAGTGCGCTACCAGCATAAAGTGGCGAGTCTGGTTTCCCGCTAT
+GTGCCATCGGGCGACGTTCCCGATGTCGTACAGGAATCATTTATTAAGGCCTATCGCGCG
+CTGGATTCTTTCCGGGGGGATAGTGCTTTTTATACCTGGTTGTATCGTATTGCGGTCAAT
+ACCGCGAAGAACTACCTGGTTGCGCAGGGGCGTCGTCCGCCTTCCAGTGATGTAGACGCG
+ATTGAAGCAGAAAACTTTGAAAGCGGCGGCGCGCTGAAAGAAATTTCGAACCCTGAGAAC
+TTAATGTTGTCAGAAGAACTGAGACAGATAGTTTTCCGAACTATTGAGTCCCTCCCGGAA
+GATTTACGTATGGCAATCACCTTACGGGAGCTGGATGGCCTGAGCTGTGAAGAGATAGCG
+GCTATCATGGATTGTCCGGTGGGGACGGTGCGTTCACGTATCTTCCGGGCGCGGGAAGCT
+ATTGATAATAAAGTTCAACCGCTTATCAGGCGTTGA
+>Salmonella_enterica_subsp_enterica_serovar_Typhimurium_str_D23580_v1_02783
+ATGAGCGAGCAGTTAACGGACCAGGTCCTGGTTGAACGGGTCCAGAAGGGAGATCAGAAA
+GCCTTTAACTTACTGGTAGTGCGCTACCAGCATAAAGTGGCGAGTCTGGTTTCCCGCTAT
+GTGCCATCGGGCGACGTTCCCGATGTCGTACAGGAATCATTTATTAAGGCCTATCGCGCG
+CTGGATTCTTTCCGGGGGGATAGTGCTTTTTATACCTGGTTGTATCGTATTGCGGTCAAT
+ACCGCGAAGAACTACCTGGTTGCGCAGGGGCGTCGTCCGCCTTCCAGTGATGTAGACGCG
+ATTGAAGCAGAAAACTTTGAAAGCGGCGGCGCGCTGAAAGAAATTTCGAACCCTGAGAAC
+TTAATGTTGTCAGAAGAACTGAGACAGATAGTTTTCCGAACTATTGAGTCCCTCCCGGAA
+GATTTACGTATGGCAATCACCTTACGGGAGCTGGATGGCCTGAGCTATGAAGAGATAGCG
+GCTATCATGGATTGTCCGGTGGGGACGGTGCGTTCACGTATCTTCCGGGCGCGGGAAGCT
+ATTGATAATAAAGTTCAACCGCTTATCAGGCGTTGA
+>Salmonella_enterica_subsp_enterica_serovar_Typhimurium_str_DT2_v1_02741
+ATGAGCGAGCAGTTAACGGACGTCCTGGTTGAACGGGTCCAGAAGGGAGATCAGAAA
+GCCTTTAACTTACTGGTAGTGCGCTACCAGCATAAAGTGGCGAGTCTGGTTTCCCGCTAT
+GTGCCATCGGGCGACGTTCCCGATGTCGTACAGGAATCATTTATTAAGGCCTATCGCGCG
+CTGGATTCTTTCCGGGGGGATAGTGCTTTTTATACCTGGTTGTATCGTATTGCGGTCAAT
+ACCGCGAAGAACTACCTGGTTGCGCAGGGGCGTCGTCCGCCTTCCAGTGATGTAGACGCG
+ATTGAAGCAGAAAACTTTGAAAGCGGCGGCGCGCTGAAAGAAATTTCGAACCCTGAGAAC
+TTAATGTTGTCAGAAGAACTGAGACAGATAGTTTTCCGAACTATTGAGTCCCTCCCGGAA
+GATTTACGTATGGCAATCACCTTACGGGAGCTGGATGGCCTGAGCTATGAAGAGATAGCG
+GCTATCATGGATTGTCCGGTGGGGACGGTGCGTTCACGTATCTTCCGGGCGCGGGAAGCT
+ATTGATAATAAAGTTCAACCGCTTATCAGGCGTTGA
diff --git a/t/data/proteome_with_and_without_descriptions.faa b/t/data/proteome_with_and_without_descriptions.faa
new file mode 100644
index 0000000..d2547e9
--- /dev/null
+++ b/t/data/proteome_with_and_without_descriptions.faa
@@ -0,0 +1,29 @@
+>1234#10_00001
+MSKHGFFQITQKLFLRKGDELLILRDRKSGLGDLPGGRMNENEFFEDWSLSMQREIEEEL
+GSQVQIRVSTKPLFIHKHKVNEGNFPCIIIAYHADYLGGDIILSDEHDYISWEKVQTYEP
+SPLFTEYMLDAVNLYLKEYAPLVH*
+>1234#10_00002
+MIIKLKIILNSYLIYYFLRGQNTLIRTLLFEFPLTTFFVFLMVATFFIVNVFLPEHLIRQ
+YFLNHPGQIQPLSWIGAVFYHGNLIHLFGNMFYLFFLGRAVEYKAGKGRWLLFFFMAALI
+SSLLDSFIRGVILHDPTPVVGASGAISGIAAVAALLSPFSLRFNQRNIPFPVFLVAWIMV
+YSDITNVFTEDGVARWAHLGGFISVIFAAYFLKPTERKQLHSGFILNLIFIILTLILAFF
+YSNRS*
+>1234#10_00003
+VKTTLSGEIEKLRYEVAVKIVNLQGEVLDLRAEMKINFSEVNSKILKLQFEFEMAKIRKE
+LKTEIADLRAETKTDFLELQKSIVDIYKTISTQTRWILGVATLFAAIGKVIN*
+>1234#10_00005 imidazoleglycerol-phosphate dehydratase or histidinol-phosphatase
+LTDKLIGFYDPVRMKAERKTSETEIKLEMNLRGTGQYQFDTEIPFFEHMLSHISKHGLID
+LNLWLRGDIEIDCHHSVEDTAILMGATIHKQLGDKAGIFRYGHFTLTMDEVLTTVAVDLG
+GRYFFKYTGPELTGKFGIYDAELSLEFLQKLALNAKMNLHVVVHYGDNKHHVHESIFKAL
+GKALRMAIAQDSAAAGAIPSTKGVLE*
+>1234#10_00006
+MIAILDYGMGNIHSCLKAVSLYTKDFVFTKDHSTIENSKALILPGDGHFDKAMENLNSTG
+LRKTIDKHVTSGKPLFGICIGFQILFESSEEIAQGSKKEQIEGLGYIKGKIKKFHGKDFK
+VPHIGWNRLQIRRKDKSVLLKGIGDQSFFYFIHSYRPTDAEGNAITGLCDYYQEKFPAVV
+EKNNIFGTQFHPEKSHTHGLKLLENFIRFI*
+>1234#10_00007 imidazole glycerol phosphate synthase subunit HisH
+MIVIPAIDLFDNCAVRLFKGNYEEKKIYSSEPWKLAESFAKNGATLLHLVDLNGARNQLG
+VNEDSILKIRETTSLKVQLGGGIRDKEKLAYYDKIGINRFILGTAAVTNPDLLKYALDNY
+GKERVVVAVDARDGIVKIAGWEKDSGIHYRDLLERLVKAGIEHIVFTDIAQDGTLAGPNL
+EAYREILNSYPFQVIASGGIASLKDLMDLSSLKTKISLYGVITGKALYEGKLDLAKAISS
+I*
\ No newline at end of file
diff --git a/t/data/query_1.fa b/t/data/query_1.fa
new file mode 100644
index 0000000..0b8bacd
--- /dev/null
+++ b/t/data/query_1.fa
@@ -0,0 +1,8 @@
+>1_1
+AAA
+>1_2
+BBB
+>1_3
+CCC
+>1_6
+FFF
\ No newline at end of file
diff --git a/t/data/query_1.gff b/t/data/query_1.gff
new file mode 100644
index 0000000..a45c73e
--- /dev/null
+++ b/t/data/query_1.gff
@@ -0,0 +1,271 @@
+##gff-version 3
+##sequence-region abc|SC|contig000001 1 15000
+abc|SC|contig000001	Prodigal:2.60	CDS	172	1131	.	-	0	ID=1_1;gene=different;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag=abc_00001;product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|abc_00001
+abc|SC|contig000001	Prodigal:2.60	CDS	1804	1950	.	+	0	ID=abc_00002;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00002;product=hypothetical protein;protein_id=gnl|SC|abc_00002
+abc|SC|contig000001	Prodigal:2.60	CDS	1934	2131	.	+	0	ID=abc_00003;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00003;product=hypothetical protein;protein_id=gnl|SC|abc_00003
+abc|SC|contig000001	Prodigal:2.60	CDS	2621	3337	.	-	0	ID=abc_00004;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag=abc_00004;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00004
+abc|SC|contig000001	Prodigal:2.60	CDS	3445	4170	.	-	0	ID=1_2;gene=speH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=abc_00005;note=SPE H;product=hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00005
+abc|SC|contig000001	Prodigal:2.60	CDS	4265	4990	.	-	0	ID=abc_00006;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13043,protein motif:Pfam:PF02876.11;locus_tag=abc_00006;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00006
+abc|SC|contig000001	Prodigal:2.60	CDS	5428	6429	.	+	0	ID=1_3;eC_number=2.1.3.3;gene=argF;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P99073,protein motif:CLUSTERS:PRK04284,protein motif:Cdd:COG0078,protein motif:TIGRFAMs:TIGR00658,protein motif:Pfam:PF00185.1;locus_tag=abc_00007;product=Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Aspartate/ornithine carbamoyltransferase%2C As [...]
+abc|SC|contig000001	Prodigal:2.60	CDS	6449	7384	.	+	0	ID=abc_00008;eC_number=2.7.2.2;gene=arcC1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q7A627,protein motif:CLUSTERS:PRK12353,protein motif:TIGRFAMs:TIGR00746,protein motif:Pfam:PF00696.22;locus_tag=abc_00008;product=Carbamate kinase 1,putative amino acid kinase,carbamate kinase,Amino acid kinase family protein;protein_id=gnl|SC|abc_00008
+abc|SC|contig000001	RNAmmer:1.2	rRNA	7556	9112	.	+	0	ID=abc_01705;inference=COORDINATES:profile:RNAmmer:1.2;locus_tag=abc_01705;product=16S ribosomal RNA
+abc|SC|contig000001	Prodigal:2.60	CDS	9419	9646	.	+	0	ID=abc_00010;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00010;product=hypothetical protein;protein_id=gnl|SC|abc_00010
+abc|SC|contig000001	Prodigal:2.60	CDS	9952	10899	.	-	0	ID=abc_00011;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF03595.11;locus_tag=abc_00011;product=C4-dicarboxylate transporter/malic acid transport protein;protein_id=gnl|SC|abc_00011
+abc|SC|contig000001	Prodigal:2.60	CDS	11148	11336	.	+	0	ID=abc_00012;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00012;product=hypothetical protein;protein_id=gnl|SC|abc_00012
+abc|SC|contig000001	Aragorn:1.2.34	tRNA	11803	11878	.	-	0	ID=abc_00013;inference=COORDINATES:profile:Aragorn:1.2.34;locus_tag=abc_00013;product=tRNA-Arg(tct)
+abc|SC|contig000001	Prodigal:2.60	CDS	12241	12375	.	+	0	ID=abc_00014;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=abc_00014;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00014
+abc|SC|contig000001	Prodigal:2.60	CDS	12432	12566	.	+	0	ID=1_6;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=abc_00015;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00015
+abc|SC|contig000001	Prodigal:2.60	CDS	12699	13385	.	+	0	ID=abc_00016;eC_number=3.-.-.-;gene=yfnB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O06480,protein motif:CLUSTERS:PRK09449,protein motif:TIGRFAMs:TIGR02254,protein motif:Pfam:PF00702.1;locus_tag=abc_00016;product=Putative HAD-hydrolase yfnB,dUMP phosphatase,HAD hydrolase,haloacid dehalogenase-like hydrolase;protein_id=gnl|SC|abc_00016
+##FASTA
+>abc|SC|contig000001
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA
+TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT
+CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC
+TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC
+TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT
+AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC
+AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC
+ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT
+AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC
+AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT
+TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA
+CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC
+AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC
+TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG
+TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT
+TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT
+AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT
+GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT
+CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG
+CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG
+ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA
+AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA
+AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT
+GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT
+AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT
+AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT
+AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT
+CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA
+GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT
+AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA
+AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA
+AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA
+GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA
+TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA
+TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA
+ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT
+ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG
+GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG
+CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA
+ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA
+TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA
+GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT
+TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA
+CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC
+TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT
+TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT
+TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA
+GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG
+TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT
+TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG
+CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC
+CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT
+TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT
+AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG
+TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA
+TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA
+TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC
+ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC
+TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT
+TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC
+TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC
+ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA
+AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC
+TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA
+CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT
+GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA
+AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT
+GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA
+TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT
+AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT
+CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA
+TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT
+TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC
+TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT
+AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT
+TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC
+AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC
+CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT
+TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT
+GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT
+TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC
+GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT
+ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC
+GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG
+TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT
+TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA
+AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG
+TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT
+TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA
+CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA
+TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT
+GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA
+GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT
+GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA
+GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA
+TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA
+AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC
+ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT
+GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA
+CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG
+CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA
+TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA
+TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT
+GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG
+TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC
+ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA
+GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT
+TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG
+CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA
+AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT
+TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA
+TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT
+AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG
+TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA
+AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC
+AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC
+AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT
+ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT
+AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA
+GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA
+TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA
+TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA
+AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT
+TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG
+TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA
+TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT
+TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA
+TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG
+TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG
+GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG
+TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT
+AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC
+ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT
+TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG
+TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA
+AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT
+TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG
+GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC
+AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT
+AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC
+TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC
+ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT
+CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT
+CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA
+ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA
+AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT
+GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA
+ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA
+GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT
+AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT
+TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT
+GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG
+TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT
+AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC
+TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA
+GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT
+TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG
+AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC
+ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA
+CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA
+CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT
+AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA
+AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA
+AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG
+TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC
+ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT
+TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC
+AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA
+AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT
+CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT
+CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT
+AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA
+AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA
+ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC
+AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC
+TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT
+GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT
+ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA
+AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA
+TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA
+TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC
+CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA
+AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG
+AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA
+AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC
+AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA
+AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA
+TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT
+TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC
+GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA
+CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC
+GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG
+TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT
+TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT
+TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA
+ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT
+GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC
+GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA
+TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG
+GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC
+AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT
+TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT
+GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT
+AAGATTTAGATACATAGACAATATAGGAGATGGGGAAATTGGGATATAAAAATATTTTGA
+TAGACTTTGATGATACAATTGTTGATTTTTATGATGCAGAAGAATGGGCGTTTCACTATA
+TGGCGAATGTTTTTAATCATAAAGCAACAAAGGATGATTTTTTAACATTTAAAAAAATCA
+ATCACCAACATTGGGAAGCTTTTCAACAAAATAAATTAACGAAGTCTGAAGTATTATCAG
+AACGATTTGTGAATTACTTCAAACATCATCAAATGGAAGTTGATGGGCATCGTGCAGATG
+TGTTATTTAGAAATGGATTAGCAGAAGCTAAAGTTAAATACTTTGATCAAACATTAGAAA
+CAATTGTCGAATTATCGAAAAGACATGATTTATATATTGTTACTAATGGTGTAACCGAAA
+CGCAAAAGAGAAGGTTAAATCAGACGCCGTTGCATAAATATATTAAAAAGATATTTATAT
+CTGAGGAAACAGGATATCAAAAACCTAATCCGGAATTTTTTAATTATGTTTTTAATGATA
+TTGGTGAGGATGAAAGACAGCACTCGATTATAGTTGGAGATTCTTTAACATCTGACATTC
+TAGGTGGAATCAATGCGGGTATAGCTACTTGCTGGTTTAATTTTAGAGGATTTGATCATA
+ATCCAGGAATTATACCTGATTATGAAATTAATTCATGGAAACAACTAAATGATATTGTAC
+GTTAAGACAAATAATTTGAAAATGATTTATATTAGCTTAAGAAAGTAATTGTAAAAACGA
+CACTACGAAAAGATGCATTTTCATGCTCGTAGTGTCGAATATTATTATCTTATGGTTTCC
+ACAATAAGACATCATGTCCCTCTATATTTTTAGCTTCCACATCTGTAAATCCATGATGTA
+AAAAGAAATCCTTAGAATCATTTCTGCCAATGGCTTTAATTGGCATATTGAAACTCTTTG
+CAAAATCAATCAATTCTGAAGCGTACCCTCTGTTTTGATATTTTGGTAATACTTCTAACT
+TCCATAATAATATATAATCTTCAAAATCTGGGAAGTAGATTTCTTCGACATCACCTTTTT
+TTAGTAATGCCATTCTAGCTCCTAATTGATCTCCGACAAATATGCCATAAAATGGTGAAT
+CTGAACTTGCATCAATCATTTGACCGTTTAACTCATTGACCATGTATAAGTCTTTGTTGC
+CAAACGCTCTAAAGTTTTCGAATAATTCGTCAGTTTTGTAATTAATTTCAAGACGTTTGA
+TTTCACTCATATTTATCTACCCCTTATTTGTTTACTACTATTATTATACATTAAACCACT
+TGTGTTTTTCATTGATTTGAAAATGTAAAACAATGAACATTATAAATTTTCCATGATTAA
+ATTGATTTTCAAGCCGTAAAATTGTCAATCACCGGTATAGACTTTATACTATAGTTTGTA
+ATCGCGTACTTAAGGAGAAAAAACATGGACTGTAAAGTAGTTAGTTTAAATGAAAAAGAT
+CAGTTTATACCAAAAATAAAGAGCAGTGACCCTGTAATAACAGGATTATTTCAATATGAT
+GCAGCTCAACAAACTAGTTTTGAAAAAAGGATGTCTAAAGAAAATAATGGAAGAGAAGCG
+GCATTAGCGAATGTTATTCGTGAATATATGAGTGATTTAAAGCTTTCAAATGAACAAGAA
+TTAAACATACAACATTTAGCTAATGGTTCAAAAGTTGTGATTGGTGGACAACAAGCAGGG
+CTTTTCGGGGGACCATTGTATACATTCCATAAAATATTTTCAATCATTACTTTATCTAAG
+GAATTAACGGATACACATAAGCAACAAGTAGTACCAGTTTTTTGGATTGCAGGAGAAGAT
+CATGATTTCGATGAAGTGAATCATACATTTGTTTATAACGAAAATCATGGGTCGCTGCAT
+AAGGTTAAATATCATACAATGGAGATGCCAGAGACGACAGTCTCTAGATATTATCCTGAT
+AAGGCTGAGTTGAAACAAACTTTAAAAACGATGTTCATTCATATGAAAGAAACTGTTCAT
+ACACAAGGTCTACTGGAGATTTGTGACAGAATTATTGACCAATATGACTCGTGGACTGAT
+ATGTTTAAAGCACTACTGCATGAAACATTTAAAGCATATGGCGTTCTATTTATAGATGCG
+CAGTTTGAGCCGTTAAGAAAAATGGAAGCGCCTATGTTTAAAAAGATTTTGAAAAAACAT
+CAGTTGCTTGATGATGCTTTTAGAGCAACACAACAACGTACTCAAAATCAAGGCTTGAAT
+GCGATGATACAAACAGATACAAATGTTCATTTATTCTTACATGATGAAAATATGCGCCAA
+TTAGTTTCGTATGATGGTAAGCATTTTAAATTAAATAAAACAGATAAGACATATATAAAG
\ No newline at end of file
diff --git a/t/data/query_1_alternative_patterns.gff b/t/data/query_1_alternative_patterns.gff
new file mode 100644
index 0000000..99df45f
--- /dev/null
+++ b/t/data/query_1_alternative_patterns.gff
@@ -0,0 +1,262 @@
+##gff-version 3
+##sequence-region abc|SC|contig000001 1 15000
+abc|SC|contig000001	Prodigal:2.60	CDS	172	1131	.	-	0	ID=1_1
+abc|SC|contig000001	Prodigal:2.60	CDS	1804	1950	.	+	0	ID=abc_00002;
+abc|SC|contig000001	Prodigal:2.60	CDS	1934	2131	.	+	0	ID=abc_00003;inference=ab initio
+abc|SC|contig000001	Prodigal:2.60	CDS	2621	3337	.	-	0	ID="abc_00004";inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag=abc_00004;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain
+abc|SC|contig000001	Prodigal:2.60	CDS	3445	4170	.	-	0	ID='1_2';gene=speH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=abc_00005;note=SPE H;product='hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain';protein_id=gnl|SC|abc_00005
+abc|SC|contig000001	Prodigal:2.60	CDS	4265	4990	.	-	0	id=abc_00006;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13043,protein motif:Pfam:PF02876.11;locus_tag=abc_00006;product="superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain";protein_id=gnl|SC|abc_00006
+
+##FASTA
+>abc|SC|contig000001
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA
+TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT
+CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC
+TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC
+TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT
+AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC
+AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC
+ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT
+AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC
+AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT
+TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA
+CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC
+AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC
+TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG
+TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT
+TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT
+AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT
+GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT
+CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG
+CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG
+ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA
+AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA
+AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT
+GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT
+AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT
+AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT
+AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT
+CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA
+GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT
+AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA
+AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA
+AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA
+GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA
+TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA
+TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA
+ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT
+ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG
+GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG
+CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA
+ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA
+TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA
+GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT
+TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA
+CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC
+TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT
+TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT
+TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA
+GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG
+TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT
+TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG
+CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC
+CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT
+TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT
+AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG
+TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA
+TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA
+TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC
+ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC
+TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT
+TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC
+TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC
+ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA
+AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC
+TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA
+CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT
+GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA
+AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT
+GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA
+TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT
+AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT
+CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA
+TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT
+TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC
+TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT
+AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT
+TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC
+AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC
+CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT
+TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT
+GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT
+TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC
+GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT
+ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC
+GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG
+TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT
+TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA
+AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG
+TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT
+TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA
+CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA
+TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT
+GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA
+GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT
+GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA
+GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA
+TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA
+AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC
+ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT
+GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA
+CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG
+CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA
+TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA
+TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT
+GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG
+TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC
+ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA
+GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT
+TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG
+CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA
+AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT
+TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA
+TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT
+AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG
+TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA
+AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC
+AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC
+AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT
+ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT
+AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA
+GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA
+TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA
+TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA
+AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT
+TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG
+TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA
+TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT
+TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA
+TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG
+TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG
+GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG
+TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT
+AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC
+ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT
+TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG
+TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA
+AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT
+TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG
+GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC
+AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT
+AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC
+TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC
+ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT
+CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT
+CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA
+ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA
+AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT
+GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA
+ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA
+GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT
+AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT
+TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT
+GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG
+TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT
+AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC
+TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA
+GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT
+TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG
+AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC
+ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA
+CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA
+CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT
+AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA
+AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA
+AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG
+TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC
+ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT
+TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC
+AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA
+AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT
+CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT
+CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT
+AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA
+AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA
+ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC
+AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC
+TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT
+GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT
+ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA
+AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA
+TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA
+TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC
+CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA
+AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG
+AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA
+AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC
+AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA
+AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA
+TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT
+TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC
+GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA
+CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC
+GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG
+TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT
+TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT
+TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA
+ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT
+GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC
+GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA
+TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG
+GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC
+AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT
+TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT
+GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT
+AAGATTTAGATACATAGACAATATAGGAGATGGGGAAATTGGGATATAAAAATATTTTGA
+TAGACTTTGATGATACAATTGTTGATTTTTATGATGCAGAAGAATGGGCGTTTCACTATA
+TGGCGAATGTTTTTAATCATAAAGCAACAAAGGATGATTTTTTAACATTTAAAAAAATCA
+ATCACCAACATTGGGAAGCTTTTCAACAAAATAAATTAACGAAGTCTGAAGTATTATCAG
+AACGATTTGTGAATTACTTCAAACATCATCAAATGGAAGTTGATGGGCATCGTGCAGATG
+TGTTATTTAGAAATGGATTAGCAGAAGCTAAAGTTAAATACTTTGATCAAACATTAGAAA
+CAATTGTCGAATTATCGAAAAGACATGATTTATATATTGTTACTAATGGTGTAACCGAAA
+CGCAAAAGAGAAGGTTAAATCAGACGCCGTTGCATAAATATATTAAAAAGATATTTATAT
+CTGAGGAAACAGGATATCAAAAACCTAATCCGGAATTTTTTAATTATGTTTTTAATGATA
+TTGGTGAGGATGAAAGACAGCACTCGATTATAGTTGGAGATTCTTTAACATCTGACATTC
+TAGGTGGAATCAATGCGGGTATAGCTACTTGCTGGTTTAATTTTAGAGGATTTGATCATA
+ATCCAGGAATTATACCTGATTATGAAATTAATTCATGGAAACAACTAAATGATATTGTAC
+GTTAAGACAAATAATTTGAAAATGATTTATATTAGCTTAAGAAAGTAATTGTAAAAACGA
+CACTACGAAAAGATGCATTTTCATGCTCGTAGTGTCGAATATTATTATCTTATGGTTTCC
+ACAATAAGACATCATGTCCCTCTATATTTTTAGCTTCCACATCTGTAAATCCATGATGTA
+AAAAGAAATCCTTAGAATCATTTCTGCCAATGGCTTTAATTGGCATATTGAAACTCTTTG
+CAAAATCAATCAATTCTGAAGCGTACCCTCTGTTTTGATATTTTGGTAATACTTCTAACT
+TCCATAATAATATATAATCTTCAAAATCTGGGAAGTAGATTTCTTCGACATCACCTTTTT
+TTAGTAATGCCATTCTAGCTCCTAATTGATCTCCGACAAATATGCCATAAAATGGTGAAT
+CTGAACTTGCATCAATCATTTGACCGTTTAACTCATTGACCATGTATAAGTCTTTGTTGC
+CAAACGCTCTAAAGTTTTCGAATAATTCGTCAGTTTTGTAATTAATTTCAAGACGTTTGA
+TTTCACTCATATTTATCTACCCCTTATTTGTTTACTACTATTATTATACATTAAACCACT
+TGTGTTTTTCATTGATTTGAAAATGTAAAACAATGAACATTATAAATTTTCCATGATTAA
+ATTGATTTTCAAGCCGTAAAATTGTCAATCACCGGTATAGACTTTATACTATAGTTTGTA
+ATCGCGTACTTAAGGAGAAAAAACATGGACTGTAAAGTAGTTAGTTTAAATGAAAAAGAT
+CAGTTTATACCAAAAATAAAGAGCAGTGACCCTGTAATAACAGGATTATTTCAATATGAT
+GCAGCTCAACAAACTAGTTTTGAAAAAAGGATGTCTAAAGAAAATAATGGAAGAGAAGCG
+GCATTAGCGAATGTTATTCGTGAATATATGAGTGATTTAAAGCTTTCAAATGAACAAGAA
+TTAAACATACAACATTTAGCTAATGGTTCAAAAGTTGTGATTGGTGGACAACAAGCAGGG
+CTTTTCGGGGGACCATTGTATACATTCCATAAAATATTTTCAATCATTACTTTATCTAAG
+GAATTAACGGATACACATAAGCAACAAGTAGTACCAGTTTTTTGGATTGCAGGAGAAGAT
+CATGATTTCGATGAAGTGAATCATACATTTGTTTATAACGAAAATCATGGGTCGCTGCAT
+AAGGTTAAATATCATACAATGGAGATGCCAGAGACGACAGTCTCTAGATATTATCCTGAT
+AAGGCTGAGTTGAAACAAACTTTAAAAACGATGTTCATTCATATGAAAGAAACTGTTCAT
+ACACAAGGTCTACTGGAGATTTGTGACAGAATTATTGACCAATATGACTCGTGGACTGAT
+ATGTTTAAAGCACTACTGCATGAAACATTTAAAGCATATGGCGTTCTATTTATAGATGCG
+CAGTTTGAGCCGTTAAGAAAAATGGAAGCGCCTATGTTTAAAAAGATTTTGAAAAAACAT
+CAGTTGCTTGATGATGCTTTTAGAGCAACACAACAACGTACTCAAAATCAAGGCTTGAAT
+GCGATGATACAAACAGATACAAATGTTCATTTATTCTTACATGATGAAAATATGCGCCAA
+TTAGTTTCGTATGATGGTAAGCATTTTAAATTAAATAAAACAGATAAGACATATATAAAG
\ No newline at end of file
diff --git a/t/data/query_2.fa b/t/data/query_2.fa
new file mode 100644
index 0000000..e80478e
--- /dev/null
+++ b/t/data/query_2.fa
@@ -0,0 +1,8 @@
+>2_1
+AAA
+>2_2
+BBB
+>2_4
+DDD
+>2_7
+GGG
\ No newline at end of file
diff --git a/t/data/query_2.gff b/t/data/query_2.gff
new file mode 100644
index 0000000..b93ffcc
--- /dev/null
+++ b/t/data/query_2.gff
@@ -0,0 +1,271 @@
+##gff-version 3
+##sequence-region abc|SC|contig000001 1 15000
+abc|SC|contig000001	Prodigal:2.60	CDS	172	1131	.	-	0	ID=2_1;gene=hly;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag=abc_00001;product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|abc_00001
+abc|SC|contig000001	Prodigal:2.60	CDS	1804	1950	.	+	0	ID=abc_00002;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00002;product=hypothetical protein;protein_id=gnl|SC|abc_00002
+abc|SC|contig000001	Prodigal:2.60	CDS	1934	2131	.	+	0	ID=abc_00003;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00003;product=hypothetical protein;protein_id=gnl|SC|abc_00003
+abc|SC|contig000001	Prodigal:2.60	CDS	2621	3337	.	-	0	ID=abc_00004;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag=abc_00004;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00004
+abc|SC|contig000001	Prodigal:2.60	CDS	3445	4170	.	-	0	ID=2_2;gene=speH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=abc_00005;note=SPE H;product=hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00005
+abc|SC|contig000001	Prodigal:2.60	CDS	4265	4990	.	-	0	ID=abc_00006;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13043,protein motif:Pfam:PF02876.11;locus_tag=abc_00006;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00006
+abc|SC|contig000001	Prodigal:2.60	CDS	5428	6429	.	+	0	ID=2_3;eC_number=2.1.3.3;gene=argF;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P99073,protein motif:CLUSTERS:PRK04284,protein motif:Cdd:COG0078,protein motif:TIGRFAMs:TIGR00658,protein motif:Pfam:PF00185.1;locus_tag=abc_00007;product=Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Aspartate/ornithine carbamoyltransferase%2C As [...]
+abc|SC|contig000001	Prodigal:2.60	CDS	6449	7384	.	+	0	ID=abc_00008;eC_number=2.7.2.2;gene=arcC1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q7A627,protein motif:CLUSTERS:PRK12353,protein motif:TIGRFAMs:TIGR00746,protein motif:Pfam:PF00696.22;locus_tag=abc_00008;product=Carbamate kinase 1,putative amino acid kinase,carbamate kinase,Amino acid kinase family protein;protein_id=gnl|SC|abc_00008
+abc|SC|contig000001	RNAmmer:1.2	rRNA	7556	9112	.	+	0	ID=abc_01705;inference=COORDINATES:profile:RNAmmer:1.2;locus_tag=abc_01705;product=16S ribosomal RNA
+abc|SC|contig000001	Prodigal:2.60	CDS	9419	9646	.	+	0	ID=abc_00010;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00010;product=hypothetical protein;protein_id=gnl|SC|abc_00010
+abc|SC|contig000001	Prodigal:2.60	CDS	9952	10899	.	-	0	ID=abc_00011;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF03595.11;locus_tag=abc_00011;product=C4-dicarboxylate transporter/malic acid transport protein;protein_id=gnl|SC|abc_00011
+abc|SC|contig000001	Prodigal:2.60	CDS	11148	11336	.	+	0	ID=abc_00012;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00012;product=hypothetical protein;protein_id=gnl|SC|abc_00012
+abc|SC|contig000001	Aragorn:1.2.34	tRNA	11803	11878	.	-	0	ID=abc_00013;inference=COORDINATES:profile:Aragorn:1.2.34;locus_tag=abc_00013;product=tRNA-Arg(tct)
+abc|SC|contig000001	Prodigal:2.60	CDS	12241	12375	.	+	0	ID=abc_00014;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=abc_00014;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00014
+abc|SC|contig000001	Prodigal:2.60	CDS	12432	12566	.	+	0	ID=2_7;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=abc_00015;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00015
+abc|SC|contig000001	Prodigal:2.60	CDS	12699	13385	.	+	0	ID=abc_00016;eC_number=3.-.-.-;gene=yfnB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O06480,protein motif:CLUSTERS:PRK09449,protein motif:TIGRFAMs:TIGR02254,protein motif:Pfam:PF00702.1;locus_tag=abc_00016;product=Putative HAD-hydrolase yfnB,dUMP phosphatase,HAD hydrolase,haloacid dehalogenase-like hydrolase;protein_id=gnl|SC|abc_00016
+##FASTA
+>abc|SC|contig000001
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA
+TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT
+CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC
+TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC
+TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT
+AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC
+AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC
+ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT
+AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC
+AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT
+TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA
+CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC
+AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC
+TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG
+TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT
+TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT
+AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT
+GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT
+CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG
+CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG
+ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA
+AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA
+AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT
+GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT
+AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT
+AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT
+AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT
+CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA
+GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT
+AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA
+AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA
+AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA
+GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA
+TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA
+TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA
+ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT
+ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG
+GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG
+CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA
+ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA
+TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA
+GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT
+TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA
+CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC
+TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT
+TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT
+TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA
+GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG
+TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT
+TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG
+CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC
+CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT
+TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT
+AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG
+TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA
+TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA
+TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC
+ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC
+TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT
+TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC
+TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC
+ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA
+AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC
+TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA
+CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT
+GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA
+AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT
+GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA
+TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT
+AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT
+CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA
+TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT
+TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC
+TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT
+AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT
+TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC
+AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC
+CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT
+TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT
+GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT
+TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC
+GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT
+ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC
+GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG
+TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT
+TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA
+AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG
+TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT
+TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA
+CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA
+TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT
+GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA
+GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT
+GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA
+GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA
+TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA
+AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC
+ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT
+GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA
+CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG
+CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA
+TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA
+TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT
+GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG
+TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC
+ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA
+GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT
+TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG
+CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA
+AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT
+TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA
+TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT
+AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG
+TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA
+AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC
+AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC
+AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT
+ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT
+AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA
+GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA
+TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA
+TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA
+AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT
+TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG
+TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA
+TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT
+TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA
+TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG
+TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG
+GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG
+TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT
+AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC
+ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT
+TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG
+TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA
+AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT
+TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG
+GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC
+AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT
+AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC
+TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC
+ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT
+CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT
+CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA
+ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA
+AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT
+GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA
+ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA
+GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT
+AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT
+TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT
+GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG
+TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT
+AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC
+TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA
+GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT
+TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG
+AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC
+ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA
+CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA
+CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT
+AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA
+AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA
+AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG
+TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC
+ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT
+TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC
+AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA
+AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT
+CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT
+CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT
+AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA
+AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA
+ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC
+AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC
+TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT
+GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT
+ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA
+AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA
+TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA
+TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC
+CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA
+AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG
+AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA
+AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC
+AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA
+AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA
+TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT
+TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC
+GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA
+CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC
+GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG
+TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT
+TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT
+TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA
+ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT
+GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC
+GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA
+TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG
+GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC
+AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT
+TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT
+GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT
+AAGATTTAGATACATAGACAATATAGGAGATGGGGAAATTGGGATATAAAAATATTTTGA
+TAGACTTTGATGATACAATTGTTGATTTTTATGATGCAGAAGAATGGGCGTTTCACTATA
+TGGCGAATGTTTTTAATCATAAAGCAACAAAGGATGATTTTTTAACATTTAAAAAAATCA
+ATCACCAACATTGGGAAGCTTTTCAACAAAATAAATTAACGAAGTCTGAAGTATTATCAG
+AACGATTTGTGAATTACTTCAAACATCATCAAATGGAAGTTGATGGGCATCGTGCAGATG
+TGTTATTTAGAAATGGATTAGCAGAAGCTAAAGTTAAATACTTTGATCAAACATTAGAAA
+CAATTGTCGAATTATCGAAAAGACATGATTTATATATTGTTACTAATGGTGTAACCGAAA
+CGCAAAAGAGAAGGTTAAATCAGACGCCGTTGCATAAATATATTAAAAAGATATTTATAT
+CTGAGGAAACAGGATATCAAAAACCTAATCCGGAATTTTTTAATTATGTTTTTAATGATA
+TTGGTGAGGATGAAAGACAGCACTCGATTATAGTTGGAGATTCTTTAACATCTGACATTC
+TAGGTGGAATCAATGCGGGTATAGCTACTTGCTGGTTTAATTTTAGAGGATTTGATCATA
+ATCCAGGAATTATACCTGATTATGAAATTAATTCATGGAAACAACTAAATGATATTGTAC
+GTTAAGACAAATAATTTGAAAATGATTTATATTAGCTTAAGAAAGTAATTGTAAAAACGA
+CACTACGAAAAGATGCATTTTCATGCTCGTAGTGTCGAATATTATTATCTTATGGTTTCC
+ACAATAAGACATCATGTCCCTCTATATTTTTAGCTTCCACATCTGTAAATCCATGATGTA
+AAAAGAAATCCTTAGAATCATTTCTGCCAATGGCTTTAATTGGCATATTGAAACTCTTTG
+CAAAATCAATCAATTCTGAAGCGTACCCTCTGTTTTGATATTTTGGTAATACTTCTAACT
+TCCATAATAATATATAATCTTCAAAATCTGGGAAGTAGATTTCTTCGACATCACCTTTTT
+TTAGTAATGCCATTCTAGCTCCTAATTGATCTCCGACAAATATGCCATAAAATGGTGAAT
+CTGAACTTGCATCAATCATTTGACCGTTTAACTCATTGACCATGTATAAGTCTTTGTTGC
+CAAACGCTCTAAAGTTTTCGAATAATTCGTCAGTTTTGTAATTAATTTCAAGACGTTTGA
+TTTCACTCATATTTATCTACCCCTTATTTGTTTACTACTATTATTATACATTAAACCACT
+TGTGTTTTTCATTGATTTGAAAATGTAAAACAATGAACATTATAAATTTTCCATGATTAA
+ATTGATTTTCAAGCCGTAAAATTGTCAATCACCGGTATAGACTTTATACTATAGTTTGTA
+ATCGCGTACTTAAGGAGAAAAAACATGGACTGTAAAGTAGTTAGTTTAAATGAAAAAGAT
+CAGTTTATACCAAAAATAAAGAGCAGTGACCCTGTAATAACAGGATTATTTCAATATGAT
+GCAGCTCAACAAACTAGTTTTGAAAAAAGGATGTCTAAAGAAAATAATGGAAGAGAAGCG
+GCATTAGCGAATGTTATTCGTGAATATATGAGTGATTTAAAGCTTTCAAATGAACAAGAA
+TTAAACATACAACATTTAGCTAATGGTTCAAAAGTTGTGATTGGTGGACAACAAGCAGGG
+CTTTTCGGGGGACCATTGTATACATTCCATAAAATATTTTCAATCATTACTTTATCTAAG
+GAATTAACGGATACACATAAGCAACAAGTAGTACCAGTTTTTTGGATTGCAGGAGAAGAT
+CATGATTTCGATGAAGTGAATCATACATTTGTTTATAACGAAAATCATGGGTCGCTGCAT
+AAGGTTAAATATCATACAATGGAGATGCCAGAGACGACAGTCTCTAGATATTATCCTGAT
+AAGGCTGAGTTGAAACAAACTTTAAAAACGATGTTCATTCATATGAAAGAAACTGTTCAT
+ACACAAGGTCTACTGGAGATTTGTGACAGAATTATTGACCAATATGACTCGTGGACTGAT
+ATGTTTAAAGCACTACTGCATGAAACATTTAAAGCATATGGCGTTCTATTTATAGATGCG
+CAGTTTGAGCCGTTAAGAAAAATGGAAGCGCCTATGTTTAAAAAGATTTTGAAAAAACAT
+CAGTTGCTTGATGATGCTTTTAGAGCAACACAACAACGTACTCAAAATCAAGGCTTGAAT
+GCGATGATACAAACAGATACAAATGTTCATTTATTCTTACATGATGAAAATATGCGCCAA
+TTAGTTTCGTATGATGGTAAGCATTTTAAATTAAATAAAACAGATAAGACATATATAAAG
\ No newline at end of file
diff --git a/t/data/query_3.fa b/t/data/query_3.fa
new file mode 100644
index 0000000..fca5b3b
--- /dev/null
+++ b/t/data/query_3.fa
@@ -0,0 +1,8 @@
+>3_1
+AAA
+>3_3
+CCC
+>3_4
+DDD
+>3_5
+EEE
\ No newline at end of file
diff --git a/t/data/query_3.gff b/t/data/query_3.gff
new file mode 100644
index 0000000..4cc9dd9
--- /dev/null
+++ b/t/data/query_3.gff
@@ -0,0 +1,271 @@
+##gff-version 3
+##sequence-region abc|SC|contig000001 1 15000
+abc|SC|contig000001	Prodigal:2.60	CDS	172	1131	.	-	0	ID=3_1;gene=hly;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag=abc_00001;product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|abc_00001
+abc|SC|contig000001	Prodigal:2.60	CDS	1804	1950	.	+	0	ID=abc_00002;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00002;product=hypothetical protein;protein_id=gnl|SC|abc_00002
+abc|SC|contig000001	Prodigal:2.60	CDS	1934	2131	.	+	0	ID=abc_00003;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00003;product=hypothetical protein;protein_id=gnl|SC|abc_00003
+abc|SC|contig000001	Prodigal:2.60	CDS	2621	3337	.	-	0	ID=abc_00004;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag=abc_00004;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00004
+abc|SC|contig000001	Prodigal:2.60	CDS	3445	4170	.	-	0	ID=3_2;gene=speH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=abc_00005;note=SPE H;product=hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00005
+abc|SC|contig000001	Prodigal:2.60	CDS	4265	4990	.	-	0	ID=abc_00006;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13043,protein motif:Pfam:PF02876.11;locus_tag=abc_00006;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00006
+abc|SC|contig000001	Prodigal:2.60	CDS	5428	6429	.	+	0	ID=3_3;eC_number=2.1.3.3;gene=argF;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P99073,protein motif:CLUSTERS:PRK04284,protein motif:Cdd:COG0078,protein motif:TIGRFAMs:TIGR00658,protein motif:Pfam:PF00185.1;locus_tag=abc_00007;product=Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Aspartate/ornithine carbamoyltransferase%2C As [...]
+abc|SC|contig000001	Prodigal:2.60	CDS	6449	7384	.	+	0	ID=abc_00008;eC_number=2.7.2.2;gene=arcC1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q7A627,protein motif:CLUSTERS:PRK12353,protein motif:TIGRFAMs:TIGR00746,protein motif:Pfam:PF00696.22;locus_tag=abc_00008;product=Carbamate kinase 1,putative amino acid kinase,carbamate kinase,Amino acid kinase family protein;protein_id=gnl|SC|abc_00008
+abc|SC|contig000001	RNAmmer:1.2	rRNA	7556	9112	.	+	0	ID=abc_01705;inference=COORDINATES:profile:RNAmmer:1.2;locus_tag=abc_01705;product=16S ribosomal RNA
+abc|SC|contig000001	Prodigal:2.60	CDS	9419	9646	.	+	0	ID=abc_00010;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00010;product=hypothetical protein;protein_id=gnl|SC|abc_00010
+abc|SC|contig000001	Prodigal:2.60	CDS	9952	10899	.	-	0	ID=abc_00011;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF03595.11;locus_tag=abc_00011;product=C4-dicarboxylate transporter/malic acid transport protein;protein_id=gnl|SC|abc_00011
+abc|SC|contig000001	Prodigal:2.60	CDS	11148	11336	.	+	0	ID=abc_00012;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00012;product=hypothetical protein;protein_id=gnl|SC|abc_00012
+abc|SC|contig000001	Aragorn:1.2.34	tRNA	11803	11878	.	-	0	ID=abc_00013;inference=COORDINATES:profile:Aragorn:1.2.34;locus_tag=abc_00013;product=tRNA-Arg(tct)
+abc|SC|contig000001	Prodigal:2.60	CDS	12241	12375	.	+	0	ID=abc_00014;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=abc_00014;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00014
+abc|SC|contig000001	Prodigal:2.60	CDS	12432	12566	.	+	0	ID=abc_00015;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=abc_00015;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00015
+abc|SC|contig000001	Prodigal:2.60	CDS	12699	13385	.	+	0	ID=3_5;eC_number=3.-.-.-;gene=yfnB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O06480,protein motif:CLUSTERS:PRK09449,protein motif:TIGRFAMs:TIGR02254,protein motif:Pfam:PF00702.1;locus_tag=abc_00016;product=Putative HAD-hydrolase yfnB,dUMP phosphatase,HAD hydrolase,haloacid dehalogenase-like hydrolase;protein_id=gnl|SC|abc_00016
+##FASTA
+>abc|SC|contig000001
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA
+TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT
+CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC
+TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC
+TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT
+AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC
+AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC
+ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT
+AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC
+AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT
+TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA
+CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC
+AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC
+TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG
+TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT
+TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT
+AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT
+GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT
+CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG
+CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG
+ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA
+AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA
+AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT
+GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT
+AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT
+AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT
+AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT
+CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA
+GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT
+AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA
+AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA
+AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA
+GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA
+TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA
+TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA
+ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT
+ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG
+GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG
+CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA
+ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA
+TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA
+GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT
+TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA
+CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC
+TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT
+TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT
+TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA
+GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG
+TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT
+TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG
+CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC
+CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT
+TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT
+AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG
+TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA
+TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA
+TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC
+ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC
+TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT
+TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC
+TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC
+ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA
+AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC
+TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA
+CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT
+GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA
+AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT
+GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA
+TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT
+AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT
+CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA
+TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT
+TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC
+TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT
+AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT
+TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC
+AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC
+CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT
+TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT
+GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT
+TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC
+GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT
+ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC
+GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG
+TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT
+TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA
+AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG
+TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT
+TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA
+CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA
+TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT
+GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA
+GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT
+GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA
+GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA
+TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA
+AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC
+ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT
+GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA
+CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG
+CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA
+TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA
+TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT
+GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG
+TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC
+ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA
+GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT
+TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG
+CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA
+AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT
+TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA
+TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT
+AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG
+TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA
+AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC
+AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC
+AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT
+ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT
+AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA
+GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA
+TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA
+TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA
+AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT
+TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG
+TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA
+TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT
+TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA
+TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG
+TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG
+GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG
+TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT
+AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC
+ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT
+TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG
+TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA
+AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT
+TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG
+GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC
+AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT
+AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC
+TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC
+ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT
+CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT
+CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA
+ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA
+AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT
+GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA
+ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA
+GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT
+AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT
+TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT
+GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG
+TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT
+AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC
+TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA
+GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT
+TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG
+AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC
+ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA
+CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA
+CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT
+AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA
+AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA
+AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG
+TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC
+ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT
+TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC
+AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA
+AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT
+CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT
+CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT
+AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA
+AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA
+ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC
+AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC
+TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT
+GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT
+ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA
+AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA
+TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA
+TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC
+CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA
+AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG
+AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA
+AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC
+AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA
+AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA
+TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT
+TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC
+GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA
+CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC
+GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG
+TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT
+TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT
+TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA
+ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT
+GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC
+GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA
+TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG
+GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC
+AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT
+TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT
+GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT
+AAGATTTAGATACATAGACAATATAGGAGATGGGGAAATTGGGATATAAAAATATTTTGA
+TAGACTTTGATGATACAATTGTTGATTTTTATGATGCAGAAGAATGGGCGTTTCACTATA
+TGGCGAATGTTTTTAATCATAAAGCAACAAAGGATGATTTTTTAACATTTAAAAAAATCA
+ATCACCAACATTGGGAAGCTTTTCAACAAAATAAATTAACGAAGTCTGAAGTATTATCAG
+AACGATTTGTGAATTACTTCAAACATCATCAAATGGAAGTTGATGGGCATCGTGCAGATG
+TGTTATTTAGAAATGGATTAGCAGAAGCTAAAGTTAAATACTTTGATCAAACATTAGAAA
+CAATTGTCGAATTATCGAAAAGACATGATTTATATATTGTTACTAATGGTGTAACCGAAA
+CGCAAAAGAGAAGGTTAAATCAGACGCCGTTGCATAAATATATTAAAAAGATATTTATAT
+CTGAGGAAACAGGATATCAAAAACCTAATCCGGAATTTTTTAATTATGTTTTTAATGATA
+TTGGTGAGGATGAAAGACAGCACTCGATTATAGTTGGAGATTCTTTAACATCTGACATTC
+TAGGTGGAATCAATGCGGGTATAGCTACTTGCTGGTTTAATTTTAGAGGATTTGATCATA
+ATCCAGGAATTATACCTGATTATGAAATTAATTCATGGAAACAACTAAATGATATTGTAC
+GTTAAGACAAATAATTTGAAAATGATTTATATTAGCTTAAGAAAGTAATTGTAAAAACGA
+CACTACGAAAAGATGCATTTTCATGCTCGTAGTGTCGAATATTATTATCTTATGGTTTCC
+ACAATAAGACATCATGTCCCTCTATATTTTTAGCTTCCACATCTGTAAATCCATGATGTA
+AAAAGAAATCCTTAGAATCATTTCTGCCAATGGCTTTAATTGGCATATTGAAACTCTTTG
+CAAAATCAATCAATTCTGAAGCGTACCCTCTGTTTTGATATTTTGGTAATACTTCTAACT
+TCCATAATAATATATAATCTTCAAAATCTGGGAAGTAGATTTCTTCGACATCACCTTTTT
+TTAGTAATGCCATTCTAGCTCCTAATTGATCTCCGACAAATATGCCATAAAATGGTGAAT
+CTGAACTTGCATCAATCATTTGACCGTTTAACTCATTGACCATGTATAAGTCTTTGTTGC
+CAAACGCTCTAAAGTTTTCGAATAATTCGTCAGTTTTGTAATTAATTTCAAGACGTTTGA
+TTTCACTCATATTTATCTACCCCTTATTTGTTTACTACTATTATTATACATTAAACCACT
+TGTGTTTTTCATTGATTTGAAAATGTAAAACAATGAACATTATAAATTTTCCATGATTAA
+ATTGATTTTCAAGCCGTAAAATTGTCAATCACCGGTATAGACTTTATACTATAGTTTGTA
+ATCGCGTACTTAAGGAGAAAAAACATGGACTGTAAAGTAGTTAGTTTAAATGAAAAAGAT
+CAGTTTATACCAAAAATAAAGAGCAGTGACCCTGTAATAACAGGATTATTTCAATATGAT
+GCAGCTCAACAAACTAGTTTTGAAAAAAGGATGTCTAAAGAAAATAATGGAAGAGAAGCG
+GCATTAGCGAATGTTATTCGTGAATATATGAGTGATTTAAAGCTTTCAAATGAACAAGAA
+TTAAACATACAACATTTAGCTAATGGTTCAAAAGTTGTGATTGGTGGACAACAAGCAGGG
+CTTTTCGGGGGACCATTGTATACATTCCATAAAATATTTTCAATCATTACTTTATCTAAG
+GAATTAACGGATACACATAAGCAACAAGTAGTACCAGTTTTTTGGATTGCAGGAGAAGAT
+CATGATTTCGATGAAGTGAATCATACATTTGTTTATAACGAAAATCATGGGTCGCTGCAT
+AAGGTTAAATATCATACAATGGAGATGCCAGAGACGACAGTCTCTAGATATTATCCTGAT
+AAGGCTGAGTTGAAACAAACTTTAAAAACGATGTTCATTCATATGAAAGAAACTGTTCAT
+ACACAAGGTCTACTGGAGATTTGTGACAGAATTATTGACCAATATGACTCGTGGACTGAT
+ATGTTTAAAGCACTACTGCATGAAACATTTAAAGCATATGGCGTTCTATTTATAGATGCG
+CAGTTTGAGCCGTTAAGAAAAATGGAAGCGCCTATGTTTAAAAAGATTTTGAAAAAACAT
+CAGTTGCTTGATGATGCTTTTAGAGCAACACAACAACGTACTCAAAATCAAGGCTTGAAT
+GCGATGATACAAACAGATACAAATGTTCATTTATTCTTACATGATGAAAATATGCGCCAA
+TTAGTTTCGTATGATGGTAAGCATTTTAAATTAAATAAAACAGATAAGACATATATAAAG
\ No newline at end of file
diff --git a/t/data/query_4_missing_genes.fa b/t/data/query_4_missing_genes.fa
new file mode 100644
index 0000000..0606cb2
--- /dev/null
+++ b/t/data/query_4_missing_genes.fa
@@ -0,0 +1,2 @@
+>4_1
+AAA
\ No newline at end of file
diff --git a/t/data/query_4_missing_genes.gff b/t/data/query_4_missing_genes.gff
new file mode 100644
index 0000000..318d94b
--- /dev/null
+++ b/t/data/query_4_missing_genes.gff
@@ -0,0 +1,256 @@
+##gff-version 3
+##sequence-region abc|SC|contig000001 1 15000
+abc|SC|contig000001	Prodigal:2.60	CDS	172	1131	.	-	0	ID=4_1;gene=hly;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag=abc_00001;product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|abc_00001
+##FASTA
+>abc|SC|contig000001
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA
+TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT
+CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC
+TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC
+TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT
+AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC
+AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC
+ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT
+AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC
+AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT
+TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA
+CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC
+AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC
+TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG
+TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT
+TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT
+AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT
+GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT
+CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG
+CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG
+ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA
+AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA
+AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT
+GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT
+AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT
+AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT
+AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT
+CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA
+GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT
+AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA
+AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA
+AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA
+GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA
+TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA
+TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA
+ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT
+ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG
+GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG
+CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA
+ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA
+TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA
+GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT
+TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA
+CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC
+TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT
+TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT
+TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA
+GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG
+TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT
+TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG
+CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC
+CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT
+TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT
+AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG
+TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA
+TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA
+TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC
+ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC
+TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT
+TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC
+TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC
+ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA
+AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC
+TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA
+CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT
+GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA
+AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT
+GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA
+TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT
+AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT
+CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA
+TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT
+TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC
+TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT
+AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT
+TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC
+AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC
+CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT
+TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT
+GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT
+TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC
+GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT
+ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC
+GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG
+TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT
+TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA
+AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG
+TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT
+TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA
+CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA
+TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT
+GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA
+GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT
+GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA
+GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA
+TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA
+AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC
+ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT
+GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA
+CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG
+CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA
+TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA
+TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT
+GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG
+TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC
+ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA
+GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT
+TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG
+CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA
+AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT
+TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA
+TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT
+AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG
+TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA
+AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC
+AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC
+AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT
+ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT
+AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA
+GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA
+TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA
+TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA
+AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT
+TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG
+TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA
+TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT
+TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA
+TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG
+TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG
+GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG
+TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT
+AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC
+ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT
+TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG
+TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA
+AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT
+TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG
+GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC
+AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT
+AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC
+TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC
+ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT
+CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT
+CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA
+ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA
+AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT
+GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA
+ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA
+GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT
+AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT
+TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT
+GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG
+TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT
+AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC
+TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA
+GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT
+TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG
+AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC
+ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA
+CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA
+CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT
+AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA
+AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA
+AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG
+TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC
+ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT
+TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC
+AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA
+AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT
+CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT
+CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT
+AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA
+AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA
+ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC
+AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC
+TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT
+GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT
+ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA
+AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA
+TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA
+TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC
+CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA
+AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG
+AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA
+AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC
+AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA
+AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA
+TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT
+TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC
+GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA
+CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC
+GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG
+TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT
+TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT
+TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA
+ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT
+GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC
+GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA
+TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG
+GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC
+AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT
+TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT
+GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT
+AAGATTTAGATACATAGACAATATAGGAGATGGGGAAATTGGGATATAAAAATATTTTGA
+TAGACTTTGATGATACAATTGTTGATTTTTATGATGCAGAAGAATGGGCGTTTCACTATA
+TGGCGAATGTTTTTAATCATAAAGCAACAAAGGATGATTTTTTAACATTTAAAAAAATCA
+ATCACCAACATTGGGAAGCTTTTCAACAAAATAAATTAACGAAGTCTGAAGTATTATCAG
+AACGATTTGTGAATTACTTCAAACATCATCAAATGGAAGTTGATGGGCATCGTGCAGATG
+TGTTATTTAGAAATGGATTAGCAGAAGCTAAAGTTAAATACTTTGATCAAACATTAGAAA
+CAATTGTCGAATTATCGAAAAGACATGATTTATATATTGTTACTAATGGTGTAACCGAAA
+CGCAAAAGAGAAGGTTAAATCAGACGCCGTTGCATAAATATATTAAAAAGATATTTATAT
+CTGAGGAAACAGGATATCAAAAACCTAATCCGGAATTTTTTAATTATGTTTTTAATGATA
+TTGGTGAGGATGAAAGACAGCACTCGATTATAGTTGGAGATTCTTTAACATCTGACATTC
+TAGGTGGAATCAATGCGGGTATAGCTACTTGCTGGTTTAATTTTAGAGGATTTGATCATA
+ATCCAGGAATTATACCTGATTATGAAATTAATTCATGGAAACAACTAAATGATATTGTAC
+GTTAAGACAAATAATTTGAAAATGATTTATATTAGCTTAAGAAAGTAATTGTAAAAACGA
+CACTACGAAAAGATGCATTTTCATGCTCGTAGTGTCGAATATTATTATCTTATGGTTTCC
+ACAATAAGACATCATGTCCCTCTATATTTTTAGCTTCCACATCTGTAAATCCATGATGTA
+AAAAGAAATCCTTAGAATCATTTCTGCCAATGGCTTTAATTGGCATATTGAAACTCTTTG
+CAAAATCAATCAATTCTGAAGCGTACCCTCTGTTTTGATATTTTGGTAATACTTCTAACT
+TCCATAATAATATATAATCTTCAAAATCTGGGAAGTAGATTTCTTCGACATCACCTTTTT
+TTAGTAATGCCATTCTAGCTCCTAATTGATCTCCGACAAATATGCCATAAAATGGTGAAT
+CTGAACTTGCATCAATCATTTGACCGTTTAACTCATTGACCATGTATAAGTCTTTGTTGC
+CAAACGCTCTAAAGTTTTCGAATAATTCGTCAGTTTTGTAATTAATTTCAAGACGTTTGA
+TTTCACTCATATTTATCTACCCCTTATTTGTTTACTACTATTATTATACATTAAACCACT
+TGTGTTTTTCATTGATTTGAAAATGTAAAACAATGAACATTATAAATTTTCCATGATTAA
+ATTGATTTTCAAGCCGTAAAATTGTCAATCACCGGTATAGACTTTATACTATAGTTTGTA
+ATCGCGTACTTAAGGAGAAAAAACATGGACTGTAAAGTAGTTAGTTTAAATGAAAAAGAT
+CAGTTTATACCAAAAATAAAGAGCAGTGACCCTGTAATAACAGGATTATTTCAATATGAT
+GCAGCTCAACAAACTAGTTTTGAAAAAAGGATGTCTAAAGAAAATAATGGAAGAGAAGCG
+GCATTAGCGAATGTTATTCGTGAATATATGAGTGATTTAAAGCTTTCAAATGAACAAGAA
+TTAAACATACAACATTTAGCTAATGGTTCAAAAGTTGTGATTGGTGGACAACAAGCAGGG
+CTTTTCGGGGGACCATTGTATACATTCCATAAAATATTTTCAATCATTACTTTATCTAAG
+GAATTAACGGATACACATAAGCAACAAGTAGTACCAGTTTTTTGGATTGCAGGAGAAGAT
+CATGATTTCGATGAAGTGAATCATACATTTGTTTATAACGAAAATCATGGGTCGCTGCAT
+AAGGTTAAATATCATACAATGGAGATGCCAGAGACGACAGTCTCTAGATATTATCCTGAT
+AAGGCTGAGTTGAAACAAACTTTAAAAACGATGTTCATTCATATGAAAGAAACTGTTCAT
+ACACAAGGTCTACTGGAGATTTGTGACAGAATTATTGACCAATATGACTCGTGGACTGAT
+ATGTTTAAAGCACTACTGCATGAAACATTTAAAGCATATGGCGTTCTATTTATAGATGCG
+CAGTTTGAGCCGTTAAGAAAAATGGAAGCGCCTATGTTTAAAAAGATTTTGAAAAAACAT
+CAGTTGCTTGATGATGCTTTTAGAGCAACACAACAACGTACTCAAAATCAAGGCTTGAAT
+GCGATGATACAAACAGATACAAATGTTCATTTATTCTTACATGATGAAAATATGCGCCAA
+TTAGTTTCGTATGATGGTAAGCATTTTAAATTAAATAAAACAGATAAGACATATATAAAG
\ No newline at end of file
diff --git a/t/data/query_5.gff b/t/data/query_5.gff
new file mode 100644
index 0000000..acb51e9
--- /dev/null
+++ b/t/data/query_5.gff
@@ -0,0 +1,271 @@
+##gff-version 3
+##sequence-region abc|SC|contig000001 1 15000
+abc|SC|contig000001	Prodigal:2.60	CDS	172	1131	.	-	0	ID=3_1;gene=hly;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag=abc_00001;product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|abc_00001
+abc|SC|contig000001	Prodigal:2.60	CDS	1804	1950	.	+	0	ID=abc_50002;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00002;product=hypothetical protein;protein_id=gnl|SC|abc_00002
+abc|SC|contig000001	Prodigal:2.60	CDS	1934	2131	.	+	0	ID=abc_50003;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00003;product=hypothetical protein;protein_id=gnl|SC|abc_00003
+abc|SC|contig000001	Prodigal:2.60	CDS	2621	3337	.	-	0	ID=abc_50004;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag=abc_00004;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00004
+abc|SC|contig000001	Prodigal:2.60	CDS	3445	4170	.	-	0	ID=3_2;gene=speH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=abc_00005;note=SPE H;product=hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00005
+abc|SC|contig000001	Prodigal:2.60	CDS	4265	4990	.	-	0	ID=abc_50006;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13043,protein motif:Pfam:PF02876.11;locus_tag=abc_00006;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00006
+abc|SC|contig000001	Prodigal:2.60	CDS	5428	6429	.	+	0	ID=3_3;eC_number=2.1.3.3;gene=argF;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P99073,protein motif:CLUSTERS:PRK04284,protein motif:Cdd:COG0078,protein motif:TIGRFAMs:TIGR00658,protein motif:Pfam:PF00185.1;locus_tag=abc_00007;product=Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Aspartate/ornithine carbamoyltransferase%2C As [...]
+abc|SC|contig000001	Prodigal:2.60	CDS	6449	7384	.	+	0	ID=abc_50008;eC_number=2.7.2.2;gene=arcC1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q7A627,protein motif:CLUSTERS:PRK12353,protein motif:TIGRFAMs:TIGR00746,protein motif:Pfam:PF00696.22;locus_tag=abc_00008;product=Carbamate kinase 1,putative amino acid kinase,carbamate kinase,Amino acid kinase family protein;protein_id=gnl|SC|abc_00008
+abc|SC|contig000001	RNAmmer:1.2	rRNA	7556	9112	.	+	0	ID=abc_51705;inference=COORDINATES:profile:RNAmmer:1.2;locus_tag=abc_01705;product=16S ribosomal RNA
+abc|SC|contig000001	Prodigal:2.60	CDS	9419	9646	.	+	0	ID=abc_50010;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00010;product=hypothetical protein;protein_id=gnl|SC|abc_00010
+abc|SC|contig000001	Prodigal:2.60	CDS	9952	10899	.	-	0	ID=abc_50011;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF03595.11;locus_tag=abc_00011;product=C4-dicarboxylate transporter/malic acid transport protein;protein_id=gnl|SC|abc_00011
+abc|SC|contig000001	Prodigal:2.60	CDS	11148	11336	.	+	0	ID=abc_50012;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00012;product=hypothetical protein;protein_id=gnl|SC|abc_00012
+abc|SC|contig000001	Aragorn:1.2.34	tRNA	11803	11878	.	-	0	ID=abc_50013;inference=COORDINATES:profile:Aragorn:1.2.34;locus_tag=abc_00013;product=tRNA-Arg(tct)
+abc|SC|contig000001	Prodigal:2.60	CDS	12241	12375	.	+	0	ID=abc_50014;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=abc_00014;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00014
+abc|SC|contig000001	Prodigal:2.60	CDS	12432	12566	.	+	0	ID=abc_50015;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=abc_00015;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00015
+abc|SC|contig000001	Prodigal:2.60	CDS	12699	13385	.	+	0	ID=3_5;eC_number=3.-.-.-;gene=yfnB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O06480,protein motif:CLUSTERS:PRK09449,protein motif:TIGRFAMs:TIGR02254,protein motif:Pfam:PF00702.1;locus_tag=abc_00016;product=Putative HAD-hydrolase yfnB,dUMP phosphatase,HAD hydrolase,haloacid dehalogenase-like hydrolase;protein_id=gnl|SC|abc_00016
+##FASTA
+>abc|SC|contig000001
+ACTGGCCGCCTAATTTTAAAAACTCCCCAAGCTGTAATTTAAGGGGGTTCTTTAAATTAT
+ATACCCACCACATTTTTTGGAGAACCCCAAACTAGCCGAAAAGGGGCATTTCTGAAGTTA
+ACGGCTAAAGTTATTTTTTATATTTCCCTGTCCATGAACAAAGGGGTTACATTAATTTGT
+AATTTCTTCTTTTTTTTAATCGATTCCCTATCTTTCTGAAGAGGGGTCTGTCCATTTATC
+ATTAGTATTGGTACTTTTCCAATTTCCCGAACTCCAATGCAAGGGGTAGTCATCACGAAC
+ACGTTCGTATATTATTTCTATATTTCCCTGTCGTTTGGATGCGGGGCTATCCATAGTAAT
+AACTGTAGCGAAGTTTTGTGAAAACCCCGAACATAATAGAGAGGGGGCTTTGTTAGGATC
+AAGGAAGTTCTCTGTTTCTTTCATACCCCCACTTCTAGTTTTGGGGAAAAGTTGATTGCC
+ATATACCGGGTTCCTTTAATCTCTACCCTATCGTCCCCAATTGGGGTTCACCATATTGTT
+AAATATCACTTTCCTTTCTACTTTTCCCTCACTTGGGCTCTCGGGGATTGTTTTGAAATC
+AGGTTGAACATATTTTTGTGTATGACCCATCCAAACATTTGCGGGGATAAGGCCGCCAAT
+ATTTCCTGTATCATTTTCAGTAACACCCCCGCTGAATCCATAGGGGAAAGTACTCATATA
+ATCTTTTGTATCAATTTAATTTCTTCCCTAGCAATCAGATATGGGGGCTACTTCATTATC
+AGGTAGTTGCAACTTTTCCTTAAAGCCCGAACGCCAGGCTAAGGGGCTTTTGTTAGCACC
+ATCTTCGCTATAAATTTTATATTGACCCGCACTGGTACCTTTGGGGCTAATAACTAGCAG
+ATTTTTATTGTGATTTTTATCATCGCCCAAACTATAAAATACGGGGTTGTGCATGCCATT
+ATCTTTATCATAAGTTTCTAAATCACCCGTTCTTACTGTAGTGGGGCTTCCAATATCTGT
+AGTACCGGTTTTAATTTTAATATCACCCTCTCCGGCATTAGCGGGGGGATTCATTAATAT
+AGAACCTAGCAATATTTTTGTTGTTCCCGAGCTGACTATACGGGGGTTCATTTTCATCAT
+ACTTCTATTTTTTTTTTCGATTTGACCCAACCATAATCAATAGGGGAATTTAGAATATTG
+AAGTTGAGACATATTTTATATTTATCCCCGTCTATATTAGTAGGGGTAATGTAGCAACTG
+ATAAATTACTGAGTTTTGATGAGTGCCCATTCTAAGAATATGGGGGTAACTTTTATTTAA
+AATTTGAAAGGAAGTTTTTCAATTTCCCGGGCTAGTCAAAGTGGGGTAAATTCTTTATGA
+AACAAGGAAAAGACTTTGCTAATTTCCCTGACTAATTTCTTTGGGGCTAATGATTTGTTT
+AATTTAAAAATGTATTTGATTACAACCCAAACATACAAATATGGGGGAATTAAATCAATT
+AATTAACTATTAAATTTAAATTAAACCCATACTAACTACTGTGGGGTAATAAATAGAAAT
+AGAGAAAAAGGGTATTTATTATGTTCCCAATCTCGTCGGGAAGGGGTTTTGCCATTACAT
+AGAAATATCTAATATTTAATGAAAACCCATCCTATGTATTTTGGGGATAGTGTAAAATAT
+AATATGTAAAATAATTTGTAGATTTCCCGTTCGAGGCATTATGGGGAAATTTTGAGTATA
+AGTTAGCTTTTAATTTTGAATCTTACCCAAACTTGATTAATAGGGGTATGATAGGGGATT
+AAAATGAAACTATTTTTTATCGTATCCCTTACTATTATATGGGGGGATATATTTTTAGGA
+AATGAAATTATCCATTTACTGACTGCCCTAACAACAACATTGGGGGTTGTTAATTCAAGA
+AAGGGGATTAAAAATTTCAGAGTTGCCCAATCTTATAAACTCGGGGCAATCTTTGTTTGA
+ATCTGAATCAGGCTTTTAAATTTCGCCCAATCCAGGAGTTCCGGGGCAAACAGTACAAGA
+ATTAAGAAATGGGATTTCCAAACTACCCGATCCTAGATTTAGGGGGATTATTAAACTTTA
+AAGTTACTATGTCTTTTTAAAAGAACCCTAACCATGGGACAAGGGGGTATTGCTATAATA
+ATTGAATCATTAAATTTAGGAATAGCCCCTACGACATAATAAGGGGATGTCTTAGGCTCT
+ACAATATTATATTGTTTGTAGTTGACCCAATCAAAATGACCAGGGGACAAGCATTTTTGG
+AGCCCCAACACAGATTTTGACGAAACCCCAGCTTACAATAATGGGGAAGTTGGGGATGGG
+ACCCAACACAGAGATTTTCAAAAAGCCCTTCCACAGACAATGGGGGTTGGCGGGGCCCCA
+ACATAGAGAAATTCTTTAAGAAATTCCCCAGCCAATGCAAGTGGGGGAGTGACAACGAAA
+AAAATTTTATAAAATTTCATTTCTGCCCCATCCCTACTCCCAGGGGCATTTAAATATATA
+AAAAATTTCACCTATTTTATACATCCCCCACCTTAAAAATTAGGGGTTAAATAATCTGAT
+ATGGAATTAAAGTGTTTGAAGTATACCCCACCTTCATATACTGGGGAAAGAGGACGTCAA
+AAGTTATTTTATTATTTTTTTTAACCCCAACCTTTGCTAGATGGGGATGTAATCTTTTGC
+ATAAATCAATAGTGTTTTTATTACCCCCACCCGTGATCTTAAGGGGACCTTTATTACGAT
+ATTCGGTATATAATTTTTTATTTTTCCCTAACGCTTCTCGTGGGGGGAAATCGATTTCTT
+ACAATGTTAATACTTTTTTATTGCCCCCGTACATTTTATGCGGGGGAATAATCGTTTGTA
+ATTTATCTTTGTATTTTACAAAAAGCCCATACGTTTTATCAGGGGGTTTTGCGGCTGGTG
+AAACGCCACCTGTATTTGTCTCTCTCCCAGACCACCATAACTGGGGAGTATCTTTGTCTT
+ATAGTCCAAACACATTTACGTAACGCCCTTTCAACTGATTAAGGGGTCCCCAACTTTCAG
+AGCCCCATAAAGATTTTTGTGCTGACCCTGACTATCTCTTAAGGGGAACATAAATGTTTC
+AATTATCATATTGATTTAGCCATTTCCCTGACAATGAAAAATGGGGCTGTGTGTAATATT
+AAATTAATTCATTGTTTTTAGTTTCCCCTTGCCCAATGCTATGGGGTTTAGCTTCAGAGT
+AAAAACTAAATACTTTTTTTTGATTCCCAGGCAATACAGTACGGGGTAGTAATAATGTTG
+ACGTTAAAATTATATTTTTCGTGATCCCCTTCCTCATTAGAAGGGGTCCTTTCAGAGGAA
+ACATGATACGAGGATTTAGAAATTTCCCTGTCAGCGAAGTCAGGGGAGTATTTGCGATTA
+ATTTTATTAACTCCTTTATTATTGTCCCTTTCATTTTTCGAGGGGGACTTCAATTTTTGC
+ATTTTGAGGTTTTTTTTCATATCTACCCGCACCAGTTGATGGGGGGCTTTTACTTAAATC
+AATTGTGTAGTTATTTTCTGTACCTCCCATTCTAATTTGTCCGGGGTTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATCCCTGACGAATACGAAAGGGGAATTCTTTTAAAGT
+AAAAACAGGCTTATTTTCTTCATAACCCGGACATCCGCCAGTGGGGGTTTCTGCTTTATC
+ATTATATGTTACATTTTGTTTATAGCCCTTACCGTTAGATGTGGGGGCAGGAGTAACACC
+ACCAGTAAACGTTTTTTGAGATAATCCCAAACAATCAATGGTGGGGTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTCCCTGGCTGATATTGCCGGGGCTTTCAGGTCCATA
+AACTTGAATATGACTTTACCAAGAACCCTGTCACAACGTTGCGGGGATCGTACCGTTATC
+ATTTTGCCATAACGTTTTGTTAGAGCCCGTTCAATATTTTTGGGGGTAATATTTAGTTAA
+ATCATTAACGTTAGTTTCGTTTTGACCCATACAATAAGCTTTGGGGTCAGATGAAGAATT
+AATAGGTGTATTAGTTTATTGTGTACCCGCTCTACCTAATAGGGGGAATGTTGTTGATAA
+AATAATTTTTTTCGTTTTGTTATTGCCCATTCGAATTTCTCCGGGGAGTATTGTTGGAAT
+ATTTAATTATAAAATTTGGTTAATTCCCTAACTGAAATTATCGGGGTTTACAAAAGGTAA
+AAGGTTAGTTAGATTTTTCGAGTATCCCTTCCATTTGTGCATGGGGAGGATTTTTAACAT
+AACGGTTTGTGTCATTTAATTTTAACCCTTTCCTTAAATCAAGGGGGTAATTATTTCCAT
+AAGCAGTTATCTTATTTTGACCTTTCCCAAACTCTCCGTTATGGGGCTTTTTATTCTTTA
+ATAATGTTTGACGATTTCGGAAATCCCCTTCCTTTAAAGTTAGGGGTGGTTTATTTCCTT
+AGTAAAATTCATGTTTTCCGATGATCCCTTGCTGTTTATCACGGGGTTTTAAAAATAGTC
+AATAAGGTTTATCATTTGAAGTAGCCCCTGGCGTAACACCACGGGGAAATGTTTCATCAT
+AAGTCCAGTAACCTTTTACTGTGTCCCCATCCTTAGTTCCAAGGGGATCAACGTATTTAT
+ATCTTAACTGATTATTTTTTCCCCACCCCTCCGATCCAAACAGGGGAATATGACTATACC
+AAACCCACGTTTGCTTTGTTGCATGCCCGCTCCCATTGGGCTGGGGCCATAGCCATTTTC
+AAGATAATGAAAAATTTGGCTGAGTCCCATACTTGATTAACTGGGGGATATTAGTCTCGT
+ATTCACTGATATTATTTGCTTTTGCCCCAGACGAAAAACTGAGGGGTGTTTTAGGAAGTT
+ATGTTGATGTGGTTTTTAAAAGTAACCCTGCCGTTGATAAAAGGGGTTTATTCATGATGT
+ACTTTTTCATATGATTTTCTCCTTTCCCTGACTTACCCAAAGGGGGTAAGCTATTACACC
+AATTCGGAATTAAATTTAAGCTAAACCCATGCTAAATAAACTGGGGCAGTTAGTAGTGTT
+ATTTAAGCAAAACTTTTCATTTTTACCCTTTCGACAGAAACAGGGGTTAATAAAGTAGGC
+AGGAGTTATATATTTTTAACGACACCCCACACTTATTCTCTAGGGGATTGCATTAAATTG
+ATTGATAATTGAATTTTCTAACTACCCCAAACATAGTTATACGGGGAATGTAGTACTTAT
+ATTAATTATTTCCTTTTACTTAAATCCCATACTAATAAAATGGGGGTTTAATTATTGATA
+AAATATTACAAATTTTTATAGTAGGCCCTGTCTATTTTGTATGGGGTTACAATTTAGGTG
+AAACTAAAATAAAATTTGTTGTTATCCCTGACAAATTTACGAGGGGGAAGTTTTTTAACT
+ATATTAGACTTTTCTTTACAAGAGGCCCAATCCTTATTAACAGGGGCCGAGGATTTAAAA
+AGTGCTAAATATATTTTCACTGAAACCCCTACGTTAAAAAATGGGGATATTGCACTGTTA
+ATTGAAAAAGATTCTTTAAGAACGCCCCGTGCATTTGAAGTTGGGGCGCATGATCAAGGT
+ACAAATGTAACTTATTTAGGCCCAACCCGATCACAAATGGGTGGGGAAGAAACAACTAAA
+AATACTGCACGTGTTTTTGGTGGAACCCATGCTGGCATTGAAGGGGGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAACCCCAGCCGTACCAGTGGGGGATGGTTTAACTGAT
+AAAGATCATCCTACTTTAGTTCTTGCCCATTCCTTAACAGCAGGGGAAGTCTTAAAAAAA
+AATTATGCAGATATTTTCTTTACATCCCTTGCAGATGGTCGTGGGGACGTTGCAAATGCA
+ATAATGCAAGGTGCTTTCATTATGGCCCTGACCTTCCATTTAGGGGGTCCAAAAGAATTA
+AATCCAACAGATGATTTATTAAATCCCCGTACAAATATTGCCGGGGAAAATGGTGGCAAC
+ATATTAATCACAGATTTTATTGACCCCCGTGCAAAAGGTTCGGGGGTAATTTACACTGAT
+ATTTGGGTATCAATTTTTGAACCTGCCCAAGCATGGAAAGAAGGGGTTGAATTATTGAAA
+ACATATCAAGTAAATTTAGAAATGACCCATACAACTGGTAATGGGGATGTTATTTTTGAG
+AATTGCTTACCATCTTTCCATAATGCCCATACGAAAATTGGTGGGGAAATTTTTGAAAAA
+AATGGTATTCGAGATTTGGAAGTTACCCATGCAGTATTCGAAGGGGAAGCTTCAGTTGTA
+ATCCAAGAAGCTGATTTCAGAATGCCCCCAACCAAAGCAGTCGGGGTTGCTACATTGGGT
+AAATTTTAAATGATTTTAGGAAGTGCCCATGCTGGCGAAAATGGGGGTAGCATTAGGTGG
+AAATGCTTTAGGAATTTCACCTCAACCCCAACTCGAGCTTGTGGGGAATACTGCGAAATC
+ATTAGTAGGATTAATTTCAAAAGGACCCGAGCTTGTTATTAGGGGGGGTAATGGACCACA
+AGTTGGAAGCATTATTTTGGGACTTCCCTATCCTGCAGAACAGGGGCAAGGTCCGGCATT
+ACCATTTGCTGAATTTTGCGCAATGCCCCAACCTTACATCGGGGGGCAATTACAAGAAAG
+ATTACAAAATGAATTTTATTCTATTCCCATGCATAAACAAGTGGGGACACTAGTGACACA
+AGTTGAAGTTGATGTTTATGATCCGCCCTTTCACAATCCTTCGGGGCCAATTGGGTTATT
+ATACAACAAAGAAGTTTCTGAACAACCCCAACAAGAAAAAGGGGGGATATTTGTTGAAGA
+AGCTGGAAGAGGATTTTGACGCGTTCCCCCTCCACCACAACCGGGGTCTATTATTGAATT
+AGAGAGTATTAAAATTTTTATTAAACCCGATCCACTCGTTATGGGGGCTGGTGGTGGAGG
+AATACCAGTAATTATTTAGCAACATCCCGGTCTTAAAGGTATGGGGGCAGTTATAGACAA
+AGATAAAACAAGTGTTTTGTTGGGTCCCAATCTTCAATGCGAGGGGTTGATTATTTTAAC
+AGCAATTGATTATGTTTATATTAATCCCAACCCTGAAAACCAGGGGCCTTTGAAAACAAC
+AAATGTTGATGAATTTTAACGATATCCCGACCAAAATCAATTGGGGAAAGGAAGTATGTT
+ACCAAAAATTGAAGTTTCCATATCACCCATTCAAAACAATCCGGGGGGAAGTGTGCTTAT
+AACATCATTAAATGTTTTAGATGCTCCCTTACAGGGTAAAGTGGGGACTGTGATTAAAAA
+ATAATTGAATTGAATTTCTTTTCAACCCCTACATGTCAAATGGGGGATTTTTATTATTTA
+AGTGCACCCCCTGATTTTAATGCCTCCCTTTCGATGCGGGGTGGGGTTTCTTAATTTATA
+ATTATAAAATCTTTTTTGTAGAAATCCCAGGCTAAATGTCGAGGGGGATGAAACCGTGGA
+AAATACAATTAATGTTTGTGAAAAGCCCAAACGATTTAAATTGGGGATGCCAGGTGCATT
+AATGATTTTATTCATTTTAACGGTTCCCGCACTTATAGCAACGGGGGTTATTCCTGCTGG
+AGCATATTCTAAACTTTCTTACGAACCCTCACCCCAAGAACTGGGGATAGTTAACCCTCA
+AAACCAAGTGAAAATTTTTCCGGGTCCCCAACAGGAACTAGAGGGGATGGGGGTTAAAAT
+AAAGATTGAACAATTTTAATCAGGTCCCATTCATAAGCCAGTGGGGATTCCGAATACTTA
+AGAAAGATTAAAGCTTTATCCAGCTCCCCCACAACAAATAACGGGGAGCATGGTTGAAGG
+AACGATAGAAGCGGTTTATATCATGCCCTTCCTTCTTGTACTGGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGTTTCTTTTGAACCCGGACTGTTAGCTTTGGGGAAGAAAACAAAAGG
+ACATGAATTTATGCTTTTTGTGTTTCCCTCACTACTAATGATGGGGGGCGGGACGTTATG
+AGGTATTGAAGAAGTTTCTGTAGCACCCTATCCGATTTTAGTGGGGATATTTATAGCGTT
+AGGATACGATTCTATTTTTTCAGTTCCCGCCCTATTCCTTGCGGGGTCTGTCGGTAGTAC
+ATTTTCAACTATTATTTCGTTCTCGCCCGTACTTGCCTCTAAGGGGGCTGGTACAACTTT
+AACGGATGGCTTGTTTTGGAGAATACCCGCTCGTATTGTCGGGGGGATTTTTGTTATTAG
+ATATTTATATTGGTTTTGTAAAAAACCCAAACACGATCCTAAGGGGTCATATTCTTATGA
+AGACAAAGATGCTTTTTAACAGCAACCCTCTCTATTAAAAGAGGGGGATAGTGCCCATTT
+AACTTTGCGTAAGATTTTAATCCTTCCCTTACTTGTACTACCGGGGCCAATTATGGTATG
+AGGAGTTATGACGCTTTGTTGGTGGCCCCCACTTATGGCTTCGGGGTTTTTAATATTTAC
+AATTATAATAATGTTTTTTGCTGGGCCCGGTCAATCTGGATTGGGGGAAAAAGGAACTGT
+AGATGCATTTGTCATTTGTGCATCACCCTTACTAGGTGTATCGGGGATTATTGGTTTAGC
+ACGAGGTATTAATTTTTTGTTGAATCCCGGTCTGATTTCAGAGGGGATCTTACACTTTTC
+ATCATCTTTAGTTCTTTATATGAGTCCCCCACTATTTATCATGGGGTTACTATTTATTTT
+ATTCTGTTTAGGTTTTTTCGTGCCACCCTCTCCTGGATTAGCGGGGTTATCAATGCCTAT
+ATTTGCACCACTAGTTTATACAGTACCCATACCAAGATTCGTGGGGGTTACGACATATCA
+ATTTGGTCAATATGTTTTGTTATTCCCCGCGCCGACTGGACTGGGGATGGCCACACTACA
+AATGTTAAACATGCTTTATTCACATCCCTTCCGATTTGTATGGGGGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGTTTGAGTACTACCCACGCAAGTACTAATGGGGTCATAATTTGAAAT
+ACTATATTATAAAATTTCTAATTGGCCCTTACGCATCTCGTAGGGGTGTAGAAATACTAA
+ACTAAGCGAGGTGCTTTATTATTTTCCCTTACGAAAATAATGGGGGTAATGATAACACTA
+ATAAGTAATTGATATTTTGCTCTATCCCATACTGATATTTTTGGGGTTTGTTTTTAATGT
+AATGTTAGATCTATTTTATATTATACCCATCCTGGTAAATATGGGGTTGCTGTAAACAGT
+ATTTGTAACACATGTTTCATATGGTCCCGAACATAGCATATAGGGGGGATGGCTATAAAT
+AACACATTTGACAATTTTTTTAGATCCCCTACCTGGAATATGGGGGGTATTATTATTTAG
+AAAATATTTTGTGGTTTATGCAAATCCCGTGCTTGATTGGAAGGGGAGATGGTATTTGCT
+AGAAAACATACCACTTTTGCCAATTCCCTTACTTATTCTGATGGGGATTTTCGGAGTACC
+ATCTGAAATGATAATTTATAGGCAACCCAAACATAACGGTGTGGGGTTTATCGATATTTA
+AAGGTGATAAAATTTTTCAACTATTCCCAAGCCTAAAATAAAGGGGTTTATTAATTATTT
+AATTGGTATATTAGTTTTGGCGTTTCCCGCTCATATATTATTGGGGTAAAGGTTAGAGTG
+AAACGTGTTTATGATTTAGACGTTCCCCTATCGTTACTACAGGGGGAGTCAGAAGGTATC
+ATTGATAAGATCATTTTAAATCAAACCCGCACTGATATACACGGGGAAGAGGCAAGATTA
+ACTGCCTCTTTTTTTTTTATTAAATCCCCGTCTTAATTCTTGGGGGTAGTGTTACTGAGA
+AGCATTACGAAGAATTTTATCTTGACCCTCCCTTTTTAAAAAGGGGTGAAGATATCCTAT
+AAAGACTCTAAGTATTTTTACAGTGCCCAATCACACTTCGATGGGGAAAAGACCTTTCCA
+AATATCTGGAAACATTTGTACAGGCCCCCTGCTCTTTAAAGCGGGGGCTGAGATTACTAA
+AGGGAATGTGAAAGTTTAAAATACGCCCGAACAAGGTTCTTTGGGGAATTTAGGCAATTG
+AATAATGATATAAATTTAAAATATTCCCGCTCATATTAAAAAGGGGATTACGATAAAAGC
+ATTCGCCTTAGGAATTTCTATAACACCCGCTCCAGCGACTAAGGGGAATGGTGCACAAAT
+AGTCGATGTGTTCGTTTTGATTGACCCCTGCCATGGAAATGCGGGGAATCGCTTGAAAAC
+AATAGGTAAGACAATTTAAGTAGCTCCCAAGCCATATATTACGGGGAATTGACCTATGAA
+AAAACATCCGCTAATTTGTGCCGTCCCCCCTCCGATAGCAATGGGGATAAAAAGTACAGT
+ACACGAAGGATAAATTTTTTCAAGTCCCAAACCTTTTAAATAGGGGATTGAAAAAATAAT
+AATATGCGTCATAATTTCCACAAGGCCCAAACTCCAAATAGGGGGGATTAAGCTATTGAT
+AAAAGTTATGTTACTTTAAAATGTACCCAAACAAGTAGTACCGGGGAAGCCAGACATGAA
+AAATGTTGTGAACATTTATGAAACTCCCGGACTGTTTAATTGGGGGTTCACATTATTAAA
+ATATTTGATCATAGTTTATAAAAGGCCCATCCAAATCAAGAAGGGGAAGATTCCGCAAAC
+AGCGTTTAAAGTAATTTATAAGTCTCCCAATCGATTACCCAGGGGGAATAATCCTAAGAC
+AAGTCCTGACGTTATTTGAGGTGCTCCCTGACGTCTCATGATGGGGAACCTTTCTTATGT
+AATTTTCTTCACTATTTATATCATGCCCGCTCTGGCCAATTAGGGGGAAGAGTGTGTACT
+ATTACGTTATTAGATTTTGTATATTCCCTTGCTAGACACATAGGGGACATTTAAATCTCA
+AAATTAATGATATTTTTGGTATGCTCCCCAACCTAAATATTGGGGGATGTGGAAAAGTAA
+ATATTTAATTTAAATTTTGATTGAACCCTTACAAGGGGGTGTGGGGAATGAGAAATCAAA
+ATCAAAAACTATTATTTAGTGATTTCCCCAGCTTACATATATGGGGACAAACAGGAGTTC
+AACAAAGCACAATATTTAGAATGAGCCCAAACGAAAGATCATGGGGCAATATGTCATTGA
+AAAACGCTGAACTATTTTATAAATTCCCCAACAGTATATTTAGGGGTGAAAATTAAGAAG
+AATAATTAGTGAGTTTTATAATTAACCCACCCCGTCTCGATGGGGGTGGTTATTTTTTTA
+AATGTATTTAATTATTTGATTTCGGCCCCCTCAAAAGTCCCTGGGGTTTTGAATAGTATC
+AAAATCTATAGGAATTTTATAATAACCCAAACCTCTACGCATGGGGATGGTGAGTGTTAA
+AAATCTTGGTGTAGTTTTGGTGTAGCCCAGGCGCAGTATAGAGGGGATTTGAGCACAAAA
+ATACTTAATTAAAATTTTATAAACACCCGTCCGACGCGTGTCGGGGGTGTGTCAAAAAAT
+ACTATGACGAATAATTTTGCTTGTTCCCATTCCTGTATTTTCGGGGCTTATGCGGGGGAC
+ATTTTGGTGACGCATTTTACTATATCCCTGACATTCAAAACAGGGGGAGCCCCGTAATCA
+AGGAACTCTTTTGTTTTGTAATGCGCCCAAACATACCTATAAGGGGCCTGGGAGGGATTC
+AAACCCCCGACCGATTTCTTAGAAGCCCATTCCTCTATCCAGGGGGGCTACCAGGACACG
+ATTAACAACACAAGTTTTATTATATCCCAATCAACTTAAATTGGGGATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTCCCGAGCTATTTATCTAGGGGGGTTCAATAAGACT
+AAAATGCGAATTCATTTAACTTAATCCCGTTCAATACAGTTTGGGGTGCCTAACTGTATT
+ACTTTTCTCTTTAATTTACAGTTAACCCCATCATAAGATGTTGGGGGGATAAACAAACTA
+ATTGCATCAAATTTTTTTTAAAATACCCACACCAAAACGTTAGGGGAATAACATTTCGGT
+AATTTAAAAGCTACTTTCGTTTTTGCCCTCTCCAAATTTAAAGGGGAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTTTCGCAATTACCCATACCGTAACTGCAGGGGTTAATAATGATGGC
+ACAAAATTAGGCACTTTCATTGTGACCCTCGCTGAAAATGGCGGGGGTTTATTAGGTAAA
+ATATTCGGATTCTATTTTCAATATGCCCTGTCAGTAATCAGTGGGGTTTCAAAGGTGAGG
+AAGAGATTTAAATGTTTGGACTAGCCCCAGCCATCGCAAATAGGGGGCAAGCTGCACAAC
+AACATGATAGTGTGTTTTTAGGCACCCCTATCGTAGACATCGGGGGTAACGGTGTGGGTT
+AACTAGGTAAATTATTTGGATTCTACCCTAACAACTAATATTGGGGAAAATAAACTGGGT
+AAGCATACTTTAATTTTATGCACTCCCCTTACTTTATTTGCAGGGGTTTGAGCCTCTGTT
+AAGATTTAGATACATTTACAATATACCCGATCGGGAAATTGGGGGGTAAAAATATTTTGA
+AAGACTTTGATGATTTTATTGTTGACCCTTACGATGCAGAAGGGGGGGCGTTTCACTATA
+AGGCGAATGTTTTTTTTCATAAAGCCCCAAACGATGATTTTTGGGGATTTAAAAAAATCA
+ATCACCAACATTGGTTTGCTTTTCACCCAAACAAATTAACGAGGGGTGAAGTATTATCAG
+AACGATTTGTGAATTTTTTCAAACACCCTCACATGGAAGTTGGGGGGCATCGTGCAGATG
+AGTTATTTAGAAATTTTTTAGCAGACCCTAACGTTAAATACTGGGGTCAAACATTAGAAA
+AAATTGTCGAATTATTTAAAAGACACCCTTTCTATATTGTTAGGGGTGGTGTAACCGAAA
+AGCAAAAGAGAAGGTTTAATCAGACCCCGTTCCATAAATATAGGGGAAAGATATTTATAT
+ATGAGGAAACAGGATTTCAAAAACCCCCTCCCGAATTTTTTAGGGGTGTTTTTAATGATA
+ATGGTGAGGATGAATTTCAGCACTCCCCTATCGTTGGAGATTGGGGAACATCTGACATTC
+AAGGTGGAATCAATTTTGGTATAGCCCCTTGCTGGTTTAATTGGGGAGGATTTGATCATA
+ATCCAGGAATTATATTTGATTATGACCCTAACTCATGGAAACGGGGAAATGATATTGTAC
+ATTAAGACAAATAATTTGAAAATGACCCATACTAGCTTAAGAGGGGAATTGTAAAAACGA
+AACTACGAAAAGATTTTTTTTCATGCCCGTACTGTCGAATATGGGGATCTTATGGTTTCC
+ACAATAAGACATCATTTCCCTCTATCCCTTTCGCTTCCACATGGGGAAATCCATGATGTA
+AAAAGAAATCCTTATTTTCATTTCTCCCAATCGCTTTAATTGGGGGATTGAAACTCTTTG
+AAAAATCAATCAATTTTGAAGCGTACCCTCTCTTTTGATATTGGGGTAATACTTCTAACT
+ACCATAATAATATATTTTCTTCAAACCCTGGCAAGTAGATTTGGGGGACATCACCTTTTT
+ATAGTAATGCCATTTTTGCTCCTAACCCATCCCCGACAAATAGGGGATAAAATGGTGAAT
+ATGAACTTGCATCATTTATTTGACCCCCTAACTCATTGACCAGGGGTAAGTCTTTGTTGC
+AAAACGCTCTAAAGTTTTCGAATAACCCGTCCGTTTTGTAATGGGGTTCAAGACGTTTGA
+ATTCACTCATATTTTTTTACCCCTTCCCTGTCTACTACTATTGGGGTACATTAAACCACT
+AGTGTTTTTCATTGTTTTGAAAATGCCCAACCATGAACATTAGGGGTTTTCCATGATTAA
+ATTGATTTTCAAGCTTTAAAATTGTCCCTCACCGGTATAGACGGGGTACTATAGTTTGTA
+ATCGCGTACTTAAGTTTAAAAAACACCCACTCTAAAGTAGTTGGGGTAAATGAAAAAGAT
+AAGTTTATACCAAATTTAAAGAGCACCCACCCTGTAATAACAGGGGTATTTCAATATGAT
+ACAGCTCAACAAACTTTTTTTGAAACCCGGACGTCTAAAGAAGGGGATGGAAGAGAAGCG
+ACATTAGCGAATGTTTTTCGTGAATCCCTGACTGATTTAAAGGGGGCAAATGAACAAGAA
+ATAAACATACAACATTTAGCTAATGCCCCAACAGTTGTGATTGGGGGACAACAAGCAGGG
+ATTTTCGGGGGACCTTTGTATACATCCCATACAATATTTTCAGGGGTTACTTTATCTAAG
+AAATTAACGGATACTTTTAAGCAACCCCTAGCACCAGTTTTTGGGGTTGCAGGAGAAGAT
+AATGATTTCGATGATTTGAATCATACCCTTGCTTATAACGAAGGGGATGGGTCGCTGCAT
+AAGGTTAAATATCATTTAATGGAGACCCCAGCGACGACAGTCGGGGGATATTATCCTGAT
+AAGGCTGAGTTGAATTTAACTTTAACCCCGACGTTCATTCATGGGGAAGAAACTGTTCAT
+ACACAAGGTCTACTTTTGATTTGTGCCCGAACTATTGACCAAGGGGACTCGTGGACTGAT
+ATGTTTAAAGCACTTTTGCATGAAACCCTTACAGCATATGGCGGGGTATTTATAGATGCG
+AAGTTTGAGCCGTTTTTAAAAATGGCCCCGCCTATGTTTAAAGGGGTTTTGAAAAAACAT
+AAGTTGCTTGATGATTTTTTTAGAGCCCCACCACAACGTACTGGGGATCAAGGCTTGAAT
+ACGATGATACAAACTTTTACAAATGCCCATTCATTCTTACATGGGGAAAATATGCGCCAA
+ATAGTTTCGTATGATTTTAAGCATTCCCAATCAAATAAAACAGGGGAGACATATATAAAG
\ No newline at end of file
diff --git a/t/data/query_6.gff b/t/data/query_6.gff
new file mode 100644
index 0000000..135faac
--- /dev/null
+++ b/t/data/query_6.gff
@@ -0,0 +1,271 @@
+##gff-version 3
+##sequence-region abc|SC|contig000001 1 15000
+abc|SC|contig000001	Prodigal:2.60	CDS	172	1131	.	-	0	ID=3_1;gene=hly;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag=abc_00001;product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|abc_00001
+abc|SC|contig000001	Prodigal:2.60	CDS	1804	1950	.	+	0	ID=abc_00002;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00002;product=hypothetical protein;protein_id=gnl|SC|abc_00002
+abc|SC|contig000001	Prodigal:2.60	CDS	1934	2131	.	+	0	ID=abc_00003;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00003;product=hypothetical protein;protein_id=gnl|SC|abc_00003
+abc|SC|contig000001	Prodigal:2.60	CDS	2621	3337	.	-	0	ID=abc_00004;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag=abc_00004;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00004
+abc|SC|contig000001	Prodigal:2.60	CDS	3445	4170	.	-	0	ID=3_2;gene=speH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=abc_00005;note=SPE H;product=hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00005
+abc|SC|contig000001	Prodigal:2.60	CDS	4265	4990	.	-	0	ID=abc_00006;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13043,protein motif:Pfam:PF02876.11;locus_tag=abc_00006;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00006
+abc|SC|contig000001	Prodigal:2.60	CDS	5428	6429	.	+	0	ID=3_3;eC_number=2.1.3.3;gene=argF;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P99073,protein motif:CLUSTERS:PRK04284,protein motif:Cdd:COG0078,protein motif:TIGRFAMs:TIGR00658,protein motif:Pfam:PF00185.1;locus_tag=abc_00007;product=Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Ornithine carbamoyltransferase,ornithine carbamoyltransferase,Aspartate/ornithine carbamoyltransferase%2C As [...]
+abc|SC|contig000001	Prodigal:2.60	CDS	6449	7384	.	+	0	ID=abc_00008;eC_number=2.7.2.2;gene=arcC1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q7A627,protein motif:CLUSTERS:PRK12353,protein motif:TIGRFAMs:TIGR00746,protein motif:Pfam:PF00696.22;locus_tag=abc_00008;product=Carbamate kinase 1,putative amino acid kinase,carbamate kinase,Amino acid kinase family protein;protein_id=gnl|SC|abc_00008
+abc|SC|contig000001	RNAmmer:1.2	rRNA	7556	9112	.	+	0	ID=abc_01705;inference=COORDINATES:profile:RNAmmer:1.2;locus_tag=abc_01705;product=16S ribosomal RNA
+abc|SC|contig000001	Prodigal:2.60	CDS	9419	9646	.	+	0	ID=abc_00010;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00010;product=hypothetical protein;protein_id=gnl|SC|abc_00010
+abc|SC|contig000001	Prodigal:2.60	CDS	9952	10899	.	-	0	ID=abc_00011;inference=ab initio prediction:Prodigal:2.60,protein motif:Pfam:PF03595.11;locus_tag=abc_00011;product=C4-dicarboxylate transporter/malic acid transport protein;protein_id=gnl|SC|abc_00011
+abc|SC|contig000001	Prodigal:2.60	CDS	11148	11336	.	+	0	ID=abc_00012;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00012;product=hypothetical protein;protein_id=gnl|SC|abc_00012
+abc|SC|contig000001	Aragorn:1.2.34	tRNA	11803	11878	.	-	0	ID=abc_00013;inference=COORDINATES:profile:Aragorn:1.2.34;locus_tag=abc_00013;product=tRNA-Arg(tct)
+abc|SC|contig000001	Prodigal:2.60	CDS	12241	12375	.	+	0	ID=abc_00014;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=abc_00014;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00014
+abc|SC|contig000001	Prodigal:2.60	CDS	12432	12566	.	+	0	ID=abc_00015;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P11699,protein motif:Pfam:PF05480.5;locus_tag=abc_00015;product=Gonococcal growth inhibitor III,Staphylococcus haemolytic protein;protein_id=gnl|SC|abc_00015
+abc|SC|contig000001	Prodigal:2.60	CDS	12699	13385	.	+	0	ID=3_5;eC_number=3.-.-.-;gene=yfnB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:O06480,protein motif:CLUSTERS:PRK09449,protein motif:TIGRFAMs:TIGR02254,protein motif:Pfam:PF00702.1;locus_tag=abc_00016;product=Putative HAD-hydrolase yfnB,dUMP phosphatase,HAD hydrolase,haloacid dehalogenase-like hydrolase;protein_id=gnl|SC|abc_00016
+##FASTA
+>abc|SC|contig000001
+ACTGGCCGCCTAATTTTAAAAACTCCCCAAGCTGTAATTTAAGGGGGTTCTTTAAATTAT
+ATACCCACCACATTTTTTGGAGAACCCCAAACTAGCCGAAAAGGGGCATTTCTGAAGTTA
+ACGGCTAAAGTTATTTTTTATATTTCCCTGTCCATGAACAAAGGGGTTACATTAATTTGT
+AATTTCTTCTTTTTTTTAATCGATTCCCTATCTTTCTGAAGAGGGGTCTGTCCATTTATC
+ATTAGTATTGGTACTTTTCCAATTTCCCGAACTCCAATGCAAGGGGTAGTCATCACGAAC
+ACGTTCGTATATTATTTCTATATTTCCCTGTCGTTTGGATGCGGGGCTATCCATAGTAAT
+AACTGTAGCGAAGTTTTGTGAAAACCCCGAACATAATAGAGAGGGGGCTTTGTTAGGATC
+AAGGAAGTTCTCTGTTTCTTTCATACCCCCACTTCTAGTTTTGGGGAAAAGTTGATTGCC
+ATATACCGGGTTCCTTTAATCTCTACCCTATCGTCCCCAATTGGGGTTCACCATATTGTT
+AAATATCACTTTCCTTTCTACTTTTCCCTCACTTGGGCTCTCGGGGATTGTTTTGAAATC
+AGGTTGAACATATTTTTGTGTATGACCCATCCAAACATTTGCGGGGATAAGGCCGCCAAT
+ATTTCCTGTATCATTTTCAGTAACACCCCCGCTGAATCCATAGGGGAAAGTACTCATATA
+ATCTTTTGTATCAATTTAATTTCTTCCCTAGCAATCAGATATGGGGGCTACTTCATTATC
+AGGTAGTTGCAACTTTTCCTTAAAGCCCGAACGCCAGGCTAAGGGGCTTTTGTTAGCACC
+ATCTTCGCTATAAATTTTATATTGACCCGCACTGGTACCTTTGGGGCTAATAACTAGCAG
+ATTTTTATTGTGATTTTTATCATCGCCCAAACTATAAAATACGGGGTTGTGCATGCCATT
+ATCTTTATCATAAGTTTCTAAATCACCCGTTCTTACTGTAGTGGGGCTTCCAATATCTGT
+AGTACCGGTTTTAATTTTAATATCACCCTCTCCGGCATTAGCGGGGGGATTCATTAATAT
+AGAACCTAGCAATATTTTTGTTGTTCCCGAGCTGACTATACGGGGGTTCATTTTCATCAT
+ACTTCTATTTTTTTTTTCGATTTGACCCAACCATAATCAATAGGGGAATTTAGAATATTG
+AAGTTGAGACATATTTTATATTTATCCCCGTCTATATTAGTAGGGGTAATGTAGCAACTG
+ATAAATTACTGAGTTTTGATGAGTGCCCATTCTAAGAATATGGGGGTAACTTTTATTTAA
+AATTTGAAAGGAAGTTTTTCAATTTCCCGGGCTAGTCAAAGTGGGGTAAATTCTTTATGA
+AACAAGGAAAAGACTTTGCTAATTTCCCTGACTAATTTCTTTGGGGCTAATGATTTGTTT
+AATTTAAAAATGTATTTGATTACAACCCAAACATACAAATATGGGGGAATTAAATCAATT
+AATTAACTATTAAATTTAAATTAAACCCATACTAACTACTGTGGGGTAATAAATAGAAAT
+AGAGAAAAAGGGTATTTATTATGTTCCCAATCTCGTCGGGAAGGGGTTTTGCCATTACAT
+AGAAATATCTAATATTTAATGAAAACCCATCCTATGTATTTTGGGGATAGTGTAAAATAT
+AATATGTAAAATAATTTGTAGATTTCCCGTTCGAGGCATTATGGGGAAATTTTGAGTATA
+AGTTAGCTTTTAATTTTGAATCTTACCCAAACTTGATTAATAGGGGTATGATAGGGGATT
+AAAATGAAACTATTTTTTATCGTATCCCTTACTATTATATGGGGGGATATATTTTTAGGA
+AATGAAATTATCCATTTACTGACTGCCCTAACAACAACATTGGGGGTTGTTAATTCAAGA
+AAGGGGATTAAAAATTTCAGAGTTGCCCAATCTTATAAACTCGGGGCAATCTTTGTTTGA
+ATCTGAATCAGGCTTTTAAATTTCGCCCAATCCAGGAGTTCCGGGGCAAACAGTACAAGA
+ATTAAGAAATGGGATTTCCAAACTACCCGATCCTAGATTTAGGGGGATTATTAAACTTTA
+AAGTTACTATGTCTTTTTAAAAGAACCCTAACCATGGGACAAGGGGGTATTGCTATAATA
+ATTGAATCATTAAATTTAGGAATAGCCCCTACGACATAATAAGGGGATGTCTTAGGCTCT
+ACAATATTATATTGTTTGTAGTTGACCCAATCAAAATGACCAGGGGACAAGCATTTTTGG
+AGCCCCAACACAGATTTTGACGAAACCCCAGCTTACAATAATGGGGAAGTTGGGGATGGG
+ACCCAACACAGAGATTTTCAAAAAGCCCTTCCACAGACAATGGGGGTTGGCGGGGCCCCA
+ACATAGAGAAATTCTTTAAGAAATTCCCCAGCCAATGCAAGTGGGGGAGTGACAACGAAA
+AAAATTTTATAAAATTTCATTTCTGCCCCATCCCTACTCCCAGGGGCATTTAAATATATA
+AAAAATTTCACCTATTTTATACATCCCCCACCTTAAAAATTAGGGGTTAAATAATCTGAT
+ATGGAATTAAAGTGTTTGAAGTATACCCCACCTTCATATACTGGGGAAAGAGGACGTCAA
+AAGTTATTTTATTATTTTTTTTAACCCCAACCTTTGCTAGATGGGGATGTAATCTTTTGC
+ATAAATCAATAGTGTTTTTATTACCCCCACCCGTGATCTTAAGGGGACCTTTATTACGAT
+ATTCGGTATATAATTTTTTATTTTTCCCTAACGCTTCTCGTGGGGGGAAATCGATTTCTT
+ACAATGTTAATACTTTTTTATTGCCCCCGTACATTTTATGCGGGGGAATAATCGTTTGTA
+ATTTATCTTTGTATTTTACAAAAAGCCCATACGTTTTATCAGGGGGTTTTGCGGCTGGTG
+AAACGCCACCTGTATTTGTCTCTCTCCCAGACCACCATAACTGGGGAGTATCTTTGTCTT
+ATAGTCCAAACACATTTACGTAACGCCCTTTCAACTGATTAAGGGGTCCCCAACTTTCAG
+AGCCCCATAAAGATTTTTGTGCTGACCCTGACTATCTCTTAAGGGGAACATAAATGTTTC
+AATTATCATATTGATTTAGCCATTTCCCTGACAATGAAAAATGGGGCTGTGTGTAATATT
+AAATTAATTCATTGTTTTTAGTTTCCCCTTGCCCAATGCTATGGGGTTTAGCTTCAGAGT
+AAAAACTAAATACTTTTTTTTGATTCCCAGGCAATACAGTACGGGGTAGTAATAATGTTG
+ACGTTAAAATTATATTTTTCGTGATCCCCTTCCTCATTAGAAGGGGTCCTTTCAGAGGAA
+ACATGATACGAGGATTTAGAAATTTCCCTGTCAGCGAAGTCAGGGGAGTATTTGCGATTA
+ATTTTATTAACTCCTTTATTATTGTCCCTTTCATTTTTCGAGGGGGACTTCAATTTTTGC
+ATTTTGAGGTTTTTTTTCATATCTACCCGCACCAGTTGATGGGGGGCTTTTACTTAAATC
+AATTGTGTAGTTATTTTCTGTACCTCCCATTCTAATTTGTCCGGGGTTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATCCCTGACGAATACGAAAGGGGAATTCTTTTAAAGT
+AAAAACAGGCTTATTTTCTTCATAACCCGGACATCCGCCAGTGGGGGTTTCTGCTTTATC
+ATTATATGTTACATTTTGTTTATAGCCCTTACCGTTAGATGTGGGGGCAGGAGTAACACC
+ACCAGTAAACGTTTTTTGAGATAATCCCAAACAATCAATGGTGGGGTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTCCCTGGCTGATATTGCCGGGGCTTTCAGGTCCATA
+AACTTGAATATGACTTTACCAAGAACCCTGTCACAACGTTGCGGGGATCGTACCGTTATC
+ATTTTGCCATAACGTTTTGTTAGAGCCCGTTCAATATTTTTGGGGGTAATATTTAGTTAA
+ATCATTAACGTTAGTTTCGTTTTGACCCATACAATAAGCTTTGGGGTCAGATGAAGAATT
+AATAGGTGTATTAGTTTATTGTGTACCCGCTCTACCTAATAGGGGGAATGTTGTTGATAA
+AATAATTTTTTTCGTTTTGTTATTGCCCATTCGAATTTCTCCGGGGAGTATTGTTGGAAT
+ATTTAATTATAAAATTTGGTTAATTCCCTAACTGAAATTATCGGGGTTTACAAAAGGTAA
+AAGGTTAGTTAGATTTTTCGAGTATCCCTTCCATTTGTGCATGGGGAGGATTTTTAACAT
+AACGGTTTGTGTCATTTAATTTTAACCCTTTCCTTAAATCAAGGGGGTAATTATTTCCAT
+AAGCAGTTATCTTATTTTGACCTTTCCCAAACTCTCCGTTATGGGGCTTTTTATTCTTTA
+ATAATGTTTGACGATTTCGGAAATCCCCTTCCTTTAAAGTTAGGGGTGGTTTATTTCCTT
+AGTAAAATTCATGTTTTCCGATGATCCCTTGCTGTTTATCACGGGGTTTTAAAAATAGTC
+AATAAGGTTTATCATTTGAAGTAGCCCCTGGCGTAACACCACGGGGAAATGTTTCATCAT
+AAGTCCAGTAACCTTTTACTGTGTCCCCATCCTTAGTTCCAAGGGGATCAACGTATTTAT
+ATCTTAACTGATTATTTTTTCCCCACCCCTCCGATCCAAACAGGGGAATATGACTATACC
+AAACCCACGTTTGCTTTGTTGCATGCCCGCTCCCATTGGGCTGGGGCCATAGCCATTTTC
+AAGATAATGAAAAATTTGGCTGAGTCCCATACTTGATTAACTGGGGGATATTAGTCTCGT
+ATTCACTGATATTATTTGCTTTTGCCCCAGACGAAAAACTGAGGGGTGTTTTAGGAAGTT
+ATGTTGATGTGGTTTTTAAAAGTAACCCTGCCGTTGATAAAAGGGGTTTATTCATGATGT
+ACTTTTTCATATGATTTTCTCCTTTCCCTGACTTACCCAAAGGGGGTAAGCTATTACACC
+AATTCGGAATTAAATTTAAGCTAAACCCATGCTAAATAAACTGGGGCAGTTAGTAGTGTT
+ATTTAAGCAAAACTTTTCATTTTTACCCTTTCGACAGAAACAGGGGTTAATAAAGTAGGC
+AGGAGTTATATATTTTTAACGACACCCCACACTTATTCTCTAGGGGATTGCATTAAATTG
+ATTGATAATTGAATTTTCTAACTACCCCAAACATAGTTATACGGGGAATGTAGTACTTAT
+ATTAATTATTTCCTTTTACTTAAATCCCATACTAATAAAATGGGGGTTTAATTATTGATA
+AAATATTACAAATTTTTATAGTAGGCCCTGTCTATTTTGTATGGGGTTACAATTTAGGTG
+AAACTAAAATAAAATTTGTTGTTATCCCTGACAAATTTACGAGGGGGAAGTTTTTTAACT
+ATATTAGACTTTTCTTTACAAGAGGCCCAATCCTTATTAACAGGGGCCGAGGATTTAAAA
+AGTGCTAAATATATTTTCACTGAAACCCCTACGTTAAAAAATGGGGATATTGCACTGTTA
+ATTGAAAAAGATTCTTTAAGAACGCCCCGTGCATTTGAAGTTGGGGCGCATGATCAAGGT
+ACAAATGTAACTTATTTAGGCCCAACCCGATCACAAATGGGTGGGGAAGAAACAACTAAA
+AATACTGCACGTGTTTTTGGTGGAACCCATGCTGGCATTGAAGGGGGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAACCCCAGCCGTACCAGTGGGGGATGGTTTAACTGAT
+AAAGATCATCCTACTTTAGTTCTTGCCCATTCCTTAACAGCAGGGGAAGTCTTAAAAAAA
+AATTATGCAGATATTTTCTTTACATCCCTTGCAGATGGTCGTGGGGACGTTGCAAATGCA
+ATAATGCAAGGTGCTTTCATTATGGCCCTGACCTTCCATTTAGGGGGTCCAAAAGAATTA
+AATCCAACAGATGATTTATTAAATCCCCGTACAAATATTGCCGGGGAAAATGGTGGCAAC
+ATATTAATCACAGATTTTATTGACCCCCGTGCAAAAGGTTCGGGGGTAATTTACACTGAT
+ATTTGGGTATCAATTTTTGAACCTGCCCAAGCATGGAAAGAAGGGGTTGAATTATTGAAA
+ACATATCAAGTAAATTTAGAAATGACCCATACAACTGGTAATGGGGATGTTATTTTTGAG
+AATTGCTTACCATCTTTCCATAATGCCCATACGAAAATTGGTGGGGAAATTTTTGAAAAA
+AATGGTATTCGAGATTTGGAAGTTACCCATGCAGTATTCGAAGGGGAAGCTTCAGTTGTA
+ATCCAAGAAGCTGATTTCAGAATGCCCCCAACCAAAGCAGTCGGGGTTGCTACATTGGGT
+AAATTTTAAATGATTTTAGGAAGTGCCCATGCTGGCGAAAATGGGGGTAGCATTAGGTGG
+AAATGCTTTAGGAATTTCACCTCAACCCCAACTCGAGCTTGTGGGGAATACTGCGAAATC
+ATTAGTAGGATTAATTTCAAAAGGACCCGAGCTTGTTATTAGGGGGGGTAATGGACCACA
+AGTTGGAAGCATTATTTTGGGACTTCCCTATCCTGCAGAACAGGGGCAAGGTCCGGCATT
+ACCATTTGCTGAATTTTGCGCAATGCCCCAACCTTACATCGGGGGGCAATTACAAGAAAG
+ATTACAAAATGAATTTTATTCTATTCCCATGCATAAACAAGTGGGGACACTAGTGACACA
+AGTTGAAGTTGATGTTTATGATCCGCCCTTTCACAATCCTTCGGGGCCAATTGGGTTATT
+ATACAACAAAGAAGTTTCTGAACAACCCCAACAAGAAAAAGGGGGGATATTTGTTGAAGA
+AGCTGGAAGAGGATTTTGACGCGTTCCCCCTCCACCACAACCGGGGTCTATTATTGAATT
+AGAGAGTATTAAAATTTTTATTAAACCCGATCCACTCGTTATGGGGGCTGGTGGTGGAGG
+AATACCAGTAATTATTTAGCAACATCCCGGTCTTAAAGGTATGGGGGCAGTTATAGACAA
+AGATAAAACAAGTGTTTTGTTGGGTCCCAATCTTCAATGCGAGGGGTTGATTATTTTAAC
+AGCAATTGATTATGTTTATATTAATCCCAACCCTGAAAACCAGGGGCCTTTGAAAACAAC
+AAATGTTGATGAATTTTAACGATATCCCGACCAAAATCAATTGGGGAAAGGAAGTATGTT
+ACCAAAAATTGAAGTTTCCATATCACCCATTCAAAACAATCCGGGGGGAAGTGTGCTTAT
+AACATCATTAAATGTTTTAGATGCTCCCTTACAGGGTAAAGTGGGGACTGTGATTAAAAA
+ATAATTGAATTGAATTTCTTTTCAACCCCTACATGTCAAATGGGGGATTTTTATTATTTA
+AGTGCACCCCCTGATTTTAATGCCTCCCTTTCGATGCGGGGTGGGGTTTCTTAATTTATA
+ATTATAAAATCTTTTTTGTAGAAATCCCAGGCTAAATGTCGAGGGGGATGAAACCGTGGA
+AAATACAATTAATGTTTGTGAAAAGCCCAAACGATTTAAATTGGGGATGCCAGGTGCATT
+AATGATTTTATTCATTTTAACGGTTCCCGCACTTATAGCAACGGGGGTTATTCCTGCTGG
+AGCATATTCTAAACTTTCTTACGAACCCTCACCCCAAGAACTGGGGATAGTTAACCCTCA
+AAACCAAGTGAAAATTTTTCCGGGTCCCCAACAGGAACTAGAGGGGATGGGGGTTAAAAT
+AAAGATTGAACAATTTTAATCAGGTCCCATTCATAAGCCAGTGGGGATTCCGAATACTTA
+AGAAAGATTAAAGCTTTATCCAGCTCCCCCACAACAAATAACGGGGAGCATGGTTGAAGG
+AACGATAGAAGCGGTTTATATCATGCCCTTCCTTCTTGTACTGGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGTTTCTTTTGAACCCGGACTGTTAGCTTTGGGGAAGAAAACAAAAGG
+ACATGAATTTATGCTTTTTGTGTTTCCCTCACTACTAATGATGGGGGGCGGGACGTTATG
+AGGTATTGAAGAAGTTTCTGTAGCACCCTATCCGATTTTAGTGGGGATATTTATAGCGTT
+AGGATACGATTCTATTTTTTCAGTTCCCGCCCTATTCCTTGCGGGGTCTGTCGGTAGTAC
+ATTTTCAACTATTATTTCGTTCTCGCCCGTACTTGCCTCTAAGGGGGCTGGTACAACTTT
+AACGGATGGCTTGTTTTGGAGAATACCCGCTCGTATTGTCGGGGGGATTTTTGTTATTAG
+ATATTTATATTGGTTTTGTAAAAAACCCAAACACGATCCTAAGGGGTCATATTCTTATGA
+AGACAAAGATGCTTTTTAACAGCAACCCTCTCTATTAAAAGAGGGGGATAGTGCCCATTT
+AACTTTGCGTAAGATTTTAATCCTTCCCTTACTTGTACTACCGGGGCCAATTATGGTATG
+AGGAGTTATGACGCTTTGTTGGTGGCCCCCACTTATGGCTTCGGGGTTTTTAATATTTAC
+AATTATAATAATGTTTTTTGCTGGGCCCGGTCAATCTGGATTGGGGGAAAAAGGAACTGT
+AGATGCATTTGTCATTTGTGCATCACCCTTACTAGGTGTATCGGGGATTATTGGTTTAGC
+ACGAGGTATTAATTTTTTGTTGAATCCCGGTCTGATTTCAGAGGGGATCTTACACTTTTC
+ATCATCTTTAGTTCTTTATATGAGTCCCCCACTATTTATCATGGGGTTACTATTTATTTT
+ATTCTGTTTAGGTTTTTTCGTGCCACCCTCTCCTGGATTAGCGGGGTTATCAATGCCTAT
+ATTTGCACCACTAGTTTATACAGTACCCATACCAAGATTCGTGGGGGTTACGACATATCA
+ATTTGGTCAATATGTTTTGTTATTCCCCGCGCCGACTGGACTGGGGATGGCCACACTACA
+AATGTTAAACATGCTTTATTCACATCCCTTCCGATTTGTATGGGGGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGTTTGAGTACTACCCACGCAAGTACTAATGGGGTCATAATTTGAAAT
+ACTATATTATAAAATTTCTAATTGGCCCTTACGCATCTCGTAGGGGTGTAGAAATACTAA
+ACTAAGCGAGGTGCTTTATTATTTTCCCTTACGAAAATAATGGGGGTAATGATAACACTA
+ATAAGTAATTGATATTTTGCTCTATCCCATACTGATATTTTTGGGGTTTGTTTTTAATGT
+AATGTTAGATCTATTTTATATTATACCCATCCTGGTAAATATGGGGTTGCTGTAAACAGT
+ATTTGTAACACATGTTTCATATGGTCCCGAACATAGCATATAGGGGGGATGGCTATAAAT
+AACACATTTGACAATTTTTTTAGATCCCCTACCTGGAATATGGGGGGTATTATTATTTAG
+AAAATATTTTGTGGTTTATGCAAATCCCGTGCTTGATTGGAAGGGGAGATGGTATTTGCT
+AGAAAACATACCACTTTTGCCAATTCCCTTACTTATTCTGATGGGGATTTTCGGAGTACC
+ATCTGAAATGATAATTTATAGGCAACCCAAACATAACGGTGTGGGGTTTATCGATATTTA
+AAGGTGATAAAATTTTTCAACTATTCCCAAGCCTAAAATAAAGGGGTTTATTAATTATTT
+AATTGGTATATTAGTTTTGGCGTTTCCCGCTCATATATTATTGGGGTAAAGGTTAGAGTG
+AAACGTGTTTATGATTTAGACGTTCCCCTATCGTTACTACAGGGGGAGTCAGAAGGTATC
+ATTGATAAGATCATTTTAAATCAAACCCGCACTGATATACACGGGGAAGAGGCAAGATTA
+ACTGCCTCTTTTTTTTTTATTAAATCCCCGTCTTAATTCTTGGGGGTAGTGTTACTGAGA
+AGCATTACGAAGAATTTTATCTTGACCCTCCCTTTTTAAAAAGGGGTGAAGATATCCTAT
+AAAGACTCTAAGTATTTTTACAGTGCCCAATCACACTTCGATGGGGAAAAGACCTTTCCA
+AATATCTGGAAACATTTGTACAGGCCCCCTGCTCTTTAAAGCGGGGGCTGAGATTACTAA
+AGGGAATGTGAAAGTTTAAAATACGCCCGAACAAGGTTCTTTGGGGAATTTAGGCAATTG
+AATAATGATATAAATTTAAAATATTCCCGCTCATATTAAAAAGGGGATTACGATAAAAGC
+ATTCGCCTTAGGAATTTCTATAACACCCGCTCCAGCGACTAAGGGGAATGGTGCACAAAT
+AGTCGATGTGTTCGTTTTGATTGACCCCTGCCATGGAAATGCGGGGAATCGCTTGAAAAC
+AATAGGTAAGACAATTTAAGTAGCTCCCAAGCCATATATTACGGGGAATTGACCTATGAA
+AAAACATCCGCTAATTTGTGCCGTCCCCCCTCCGATAGCAATGGGGATAAAAAGTACAGT
+ACACGAAGGATAAATTTTTTCAAGTCCCAAACCTTTTAAATAGGGGATTGAAAAAATAAT
+AATATGCGTCATAATTTCCACAAGGCCCAAACTCCAAATAGGGGGGATTAAGCTATTGAT
+AAAAGTTATGTTACTTTAAAATGTACCCAAACAAGTAGTACCGGGGAAGCCAGACATGAA
+AAATGTTGTGAACATTTATGAAACTCCCGGACTGTTTAATTGGGGGTTCACATTATTAAA
+ATATTTGATCATAGTTTATAAAAGGCCCATCCAAATCAAGAAGGGGAAGATTCCGCAAAC
+AGCGTTTAAAGTAATTTATAAGTCTCCCAATCGATTACCCAGGGGGAATAATCCTAAGAC
+AAGTCCTGACGTTATTTGAGGTGCTCCCTGACGTCTCATGATGGGGAACCTTTCTTATGT
+AATTTTCTTCACTATTTATATCATGCCCGCTCTGGCCAATTAGGGGGAAGAGTGTGTACT
+ATTACGTTATTAGATTTTGTATATTCCCTTGCTAGACACATAGGGGACATTTAAATCTCA
+AAATTAATGATATTTTTGGTATGCTCCCCAACCTAAATATTGGGGGATGTGGAAAAGTAA
+ATATTTAATTTAAATTTTGATTGAACCCTTACAAGGGGGTGTGGGGAATGAGAAATCAAA
+ATCAAAAACTATTATTTAGTGATTTCCCCAGCTTACATATATGGGGACAAACAGGAGTTC
+AACAAAGCACAATATTTAGAATGAGCCCAAACGAAAGATCATGGGGCAATATGTCATTGA
+AAAACGCTGAACTATTTTATAAATTCCCCAACAGTATATTTAGGGGTGAAAATTAAGAAG
+AATAATTAGTGAGTTTTATAATTAACCCACCCCGTCTCGATGGGGGTGGTTATTTTTTTA
+AATGTATTTAATTATTTGATTTCGGCCCCCTCAAAAGTCCCTGGGGTTTTGAATAGTATC
+AAAATCTATAGGAATTTTATAATAACCCAAACCTCTACGCATGGGGATGGTGAGTGTTAA
+AAATCTTGGTGTAGTTTTGGTGTAGCCCAGGCGCAGTATAGAGGGGATTTGAGCACAAAA
+ATACTTAATTAAAATTTTATAAACACCCGTCCGACGCGTGTCGGGGGTGTGTCAAAAAAT
+ACTATGACGAATAATTTTGCTTGTTCCCATTCCTGTATTTTCGGGGCTTATGCGGGGGAC
+ATTTTGGTGACGCATTTTACTATATCCCTGACATTCAAAACAGGGGGAGCCCCGTAATCA
+AGGAACTCTTTTGTTTTGTAATGCGCCCAAACATACCTATAAGGGGCCTGGGAGGGATTC
+AAACCCCCGACCGATTTCTTAGAAGCCCATTCCTCTATCCAGGGGGGCTACCAGGACACG
+ATTAACAACACAAGTTTTATTATATCCCAATCAACTTAAATTGGGGATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTCCCGAGCTATTTATCTAGGGGGGTTCAATAAGACT
+AAAATGCGAATTCATTTAACTTAATCCCGTTCAATACAGTTTGGGGTGCCTAACTGTATT
+ACTTTTCTCTTTAATTTACAGTTAACCCCATCATAAGATGTTGGGGGGATAAACAAACTA
+ATTGCATCAAATTTTTTTTAAAATACCCACACCAAAACGTTAGGGGAATAACATTTCGGT
+AATTTAAAAGCTACTTTCGTTTTTGCCCTCTCCAAATTTAAAGGGGAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTTTCGCAATTACCCATACCGTAACTGCAGGGGTTAATAATGATGGC
+ACAAAATTAGGCACTTTCATTGTGACCCTCGCTGAAAATGGCGGGGGTTTATTAGGTAAA
+ATATTCGGATTCTATTTTCAATATGCCCTGTCAGTAATCAGTGGGGTTTCAAAGGTGAGG
+AAGAGATTTAAATGTTTGGACTAGCCCCAGCCATCGCAAATAGGGGGCAAGCTGCACAAC
+AACATGATAGTGTGTTTTTAGGCACCCCTATCGTAGACATCGGGGGTAACGGTGTGGGTT
+AACTAGGTAAATTATTTGGATTCTACCCTAACAACTAATATTGGGGAAAATAAACTGGGT
+AAGCATACTTTAATTTTATGCACTCCCCTTACTTTATTTGCAGGGGTTTGAGCCTCTGTT
+AAGATTTAGATACATTTACAATATACCCGATCGGGAAATTGGGGGGTAAAAATATTTTGA
+AAGACTTTGATGATTTTATTGTTGACCCTTACGATGCAGAAGGGGGGGCGTTTCACTATA
+AGGCGAATGTTTTTTTTCATAAAGCCCCAAACGATGATTTTTGGGGATTTAAAAAAATCA
+ATCACCAACATTGGTTTGCTTTTCACCCAAACAAATTAACGAGGGGTGAAGTATTATCAG
+AACGATTTGTGAATTTTTTCAAACACCCTCACATGGAAGTTGGGGGGCATCGTGCAGATG
+AGTTATTTAGAAATTTTTTAGCAGACCCTAACGTTAAATACTGGGGTCAAACATTAGAAA
+AAATTGTCGAATTATTTAAAAGACACCCTTTCTATATTGTTAGGGGTGGTGTAACCGAAA
+AGCAAAAGAGAAGGTTTAATCAGACCCCGTTCCATAAATATAGGGGAAAGATATTTATAT
+ATGAGGAAACAGGATTTCAAAAACCCCCTCCCGAATTTTTTAGGGGTGTTTTTAATGATA
+ATGGTGAGGATGAATTTCAGCACTCCCCTATCGTTGGAGATTGGGGAACATCTGACATTC
+AAGGTGGAATCAATTTTGGTATAGCCCCTTGCTGGTTTAATTGGGGAGGATTTGATCATA
+ATCCAGGAATTATATTTGATTATGACCCTAACTCATGGAAACGGGGAAATGATATTGTAC
+ATTAAGACAAATAATTTGAAAATGACCCATACTAGCTTAAGAGGGGAATTGTAAAAACGA
+AACTACGAAAAGATTTTTTTTCATGCCCGTACTGTCGAATATGGGGATCTTATGGTTTCC
+ACAATAAGACATCATTTCCCTCTATCCCTTTCGCTTCCACATGGGGAAATCCATGATGTA
+AAAAGAAATCCTTATTTTCATTTCTCCCAATCGCTTTAATTGGGGGATTGAAACTCTTTG
+AAAAATCAATCAATTTTGAAGCGTACCCTCTCTTTTGATATTGGGGTAATACTTCTAACT
+ACCATAATAATATATTTTCTTCAAACCCTGGCAAGTAGATTTGGGGGACATCACCTTTTT
+ATAGTAATGCCATTTTTGCTCCTAACCCATCCCCGACAAATAGGGGATAAAATGGTGAAT
+ATGAACTTGCATCATTTATTTGACCCCCTAACTCATTGACCAGGGGTAAGTCTTTGTTGC
+AAAACGCTCTAAAGTTTTCGAATAACCCGTCCGTTTTGTAATGGGGTTCAAGACGTTTGA
+ATTCACTCATATTTTTTTACCCCTTCCCTGTCTACTACTATTGGGGTACATTAAACCACT
+AGTGTTTTTCATTGTTTTGAAAATGCCCAACCATGAACATTAGGGGTTTTCCATGATTAA
+ATTGATTTTCAAGCTTTAAAATTGTCCCTCACCGGTATAGACGGGGTACTATAGTTTGTA
+ATCGCGTACTTAAGTTTAAAAAACACCCACTCTAAAGTAGTTGGGGTAAATGAAAAAGAT
+AAGTTTATACCAAATTTAAAGAGCACCCACCCTGTAATAACAGGGGTATTTCAATATGAT
+ACAGCTCAACAAACTTTTTTTGAAACCCGGACGTCTAAAGAAGGGGATGGAAGAGAAGCG
+ACATTAGCGAATGTTTTTCGTGAATCCCTGACTGATTTAAAGGGGGCAAATGAACAAGAA
+ATAAACATACAACATTTAGCTAATGCCCCAACAGTTGTGATTGGGGGACAACAAGCAGGG
+ATTTTCGGGGGACCTTTGTATACATCCCATACAATATTTTCAGGGGTTACTTTATCTAAG
+AAATTAACGGATACTTTTAAGCAACCCCTAGCACCAGTTTTTGGGGTTGCAGGAGAAGAT
+AATGATTTCGATGATTTGAATCATACCCTTGCTTATAACGAAGGGGATGGGTCGCTGCAT
+AAGGTTAAATATCATTTAATGGAGACCCCAGCGACGACAGTCGGGGGATATTATCCTGAT
+AAGGCTGAGTTGAATTTAACTTTAACCCCGACGTTCATTCATGGGGAAGAAACTGTTCAT
+ACACAAGGTCTACTTTTGATTTGTGCCCGAACTATTGACCAAGGGGACTCGTGGACTGAT
+ATGTTTAAAGCACTTTTGCATGAAACCCTTACAGCATATGGCGGGGTATTTATAGATGCG
+AAGTTTGAGCCGTTTTTAAAAATGGCCCCGCCTATGTTTAAAGGGGTTTTGAAAAAACAT
+AAGTTGCTTGATGATTTTTTTAGAGCCCCACCACAACGTACTGGGGATCAAGGCTTGAAT
+ACGATGATACAAACTTTTACAAATGCCCATTCATTCTTACATGGGGAAAATATGCGCCAA
+ATAGTTTCGTATGATTTTAAGCATTCCCAATCAAATAAAACAGGGGAGACATATATAAAG
\ No newline at end of file
diff --git a/t/data/query_groups b/t/data/query_groups
new file mode 100644
index 0000000..bdbb4a1
--- /dev/null
+++ b/t/data/query_groups
@@ -0,0 +1,7 @@
+group_1: 1_1	2_1	3_1
+group_2: 1_2	2_2
+group_3: 1_3	3_3
+group_4: 2_4	3_4
+group_5: 3_5
+group_6: 1_6
+group_7: 2_7
\ No newline at end of file
diff --git a/t/data/query_groups_all_merged b/t/data/query_groups_all_merged
new file mode 100644
index 0000000..c4815ee
--- /dev/null
+++ b/t/data/query_groups_all_merged
@@ -0,0 +1 @@
+group_1: 1_1	2_1	3_1	1_2	2_2	1_3	3_3	2_4	3_4	3_5	1_6	2_7
\ No newline at end of file
diff --git a/t/data/query_groups_missing_genes b/t/data/query_groups_missing_genes
new file mode 100644
index 0000000..616fa8b
--- /dev/null
+++ b/t/data/query_groups_missing_genes
@@ -0,0 +1,7 @@
+group_1: 1_1	2_1	3_1	4_1
+group_2: 1_2	2_2
+group_3: 1_3	3_3
+group_4: 2_4	3_4
+group_5: 3_5
+group_6: 1_6
+group_7: 2_7
\ No newline at end of file
diff --git a/t/data/query_groups_paralogs b/t/data/query_groups_paralogs
new file mode 100644
index 0000000..6661bde
--- /dev/null
+++ b/t/data/query_groups_paralogs
@@ -0,0 +1,6 @@
+group_1: 1_1	2_1	3_1 
+group_2: 1_2	2_2	2_7
+group_3: 1_3	3_3	2_4
+group_4: 3_4
+group_5: 3_5
+group_6: 1_6
diff --git a/t/data/query_groups_reference b/t/data/query_groups_reference
new file mode 100644
index 0000000..62c6032
--- /dev/null
+++ b/t/data/query_groups_reference
@@ -0,0 +1,6 @@
+group_1: 1_1	2_1	3_1
+group_2: 1_2	2_2
+group_3: 1_3	3_3
+group_5: 3_5
+group_6: 1_6
+group_7: 2_7
diff --git a/t/data/raxml.tre b/t/data/raxml.tre
new file mode 100644
index 0000000..ba35f18
--- /dev/null
+++ b/t/data/raxml.tre
@@ -0,0 +1 @@
+((efgh_7#3:0.02316815548247504186,(((((abcd_4#15:0.00000132226186997362,((abcd_3#9:0.00077142536151366802,abcd_4#17:0.00231509323793084891)66:0.00000132226186997362,abcd_3#20:0.00000132226186997362)69:0.00000132226186997362)100:0.00465923394294907780,abcd_3#96:0.00855943508912884905)97:0.00305706239572176106,abcd_3#7:0.00702776231555758951)100:0.01189241259037208259,((6753_5#30:0.00077274606169234225,((abcd_3#8:0.00232303086183083740,abcd_3#12:0.00000132226186997362)15:0.0000013222618699 [...]
diff --git a/t/data/real_data_1.gff b/t/data/real_data_1.gff
new file mode 100644
index 0000000..9b1b4c1
--- /dev/null
+++ b/t/data/real_data_1.gff
@@ -0,0 +1,1641 @@
+##gff-version 3
+##sequence-region ERS111111|SC|contig000020 1 92255
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	241	921	.	-	0	ID=11111_1#11_04055;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002145956.2,protein motif:Pfam:PF07108.5;locus_tag=11111_1#11_04055;product=pathogenicity island-encoded protein A,PipA protein;protein_id=gnl|SC|11111_1#11_04055
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	1144	2019	.	-	0	ID=11111_1#11_04056;gene=pipB2_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002145957.1,similar to AA sequence:UniProtKB:Q8ZMM8,protein motif:CLUSTERS:PRK15197,protein motif:Cdd:COG5351,protein motif:Pfam:PF00805.16;locus_tag=11111_1#11_04056;product=secreted effector protein,Type III effector pipB2,secreted effector protein PipB,Uncharacterized protein conserved in bacteria,Pentapeptide repeat [...]
+ERS111111|SC|contig000020	Infernal:1.1	ncRNA	2139	2237	.	+	0	ID=11111_1#11_04057;inference=COORDINATES:profile:Infernal:1.1;locus_tag=11111_1#11_04057;product=isrI
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	2567	2908	.	-	0	ID=11111_1#11_04058;gene=sigE;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002145959.1,similar to AA sequence:UniProtKB:O30917,protein motif:CLUSTERS:PRK15202,protein motif:Pfam:PF07824.6;locus_tag=11111_1#11_04058;product=chaperone protein SigE,Chaperone protein sigE,type III secretion chaperone protein SigE,Type III secretion chaperone domain;protein_id=gnl|SC|11111_1#11_04058
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	2925	4610	.	-	0	ID=11111_1#11_04059;eC_number=3.1.3.-,3.1.3.-;gene=sopB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002145960.1,similar to AA sequence:UniProtKB:O30916,protein motif:CLUSTERS:PRK15378,protein motif:Pfam:PF05925.6;locus_tag=11111_1#11_04059;product=inositol phosphate phosphatase SopB,Inositol phosphate phosphatase sopB,inositol phosphate phosphatase SopB,Enterobacterial virulence protein IpgD;pro [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	5332	6801	.	-	0	ID=11111_1#11_04060;eC_number=3.4.-.-;gene=pepD_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002215025.1,similar to AA sequence:UniProtKB:Q8G6Z9,protein motif:Pfam:PF03577.9;locus_tag=11111_1#11_04060;product=peptidase family C69,Dipeptidase,Peptidase family C69;protein_id=gnl|SC|11111_1#11_04060
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	6974	8338	.	-	0	ID=11111_1#11_04061;eC_number=2.7.13.3;gene=yedV;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002145964.1,similar to AA sequence:UniProtKB:P76339,protein motif:CLUSTERS:PRK09835,protein motif:Cdd:COG5278,protein motif:TIGRFAMs:TIGR01386,protein motif:Pfam:PF02518.20;locus_tag=11111_1#11_04061;product=heavy metal sensor kinase subfamily,Probable sensor-like histidine kinase YedV,sensor kinase CusS [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	8331	9068	.	-	0	ID=11111_1#11_04062;gene=copR;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002243090.1,similar to AA sequence:UniProtKB:P76340,protein motif:CLUSTERS:PRK11517,protein motif:Cdd:COG4565,protein motif:TIGRFAMs:TIGR01387,protein motif:Pfam:PF00072.18;locus_tag=11111_1#11_04062;product=transcriptional regulatory protein YedW,Probable transcriptional regulatory protein YedW,transcriptional regulatory  [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	9147	9557	.	+	0	ID=11111_1#11_04063;eC_number=3.5.2.17,3.5.2.17;gene=uraH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002145966.1,similar to AA sequence:UniProtKB:Q4VYA5,protein motif:CLUSTERS:PRK15036,protein motif:TIGRFAMs:TIGR02962,protein motif:Pfam:PF00576.15;locus_tag=11111_1#11_04063;product=hydroxyisourate hydrolase,5-hydroxyisourate hydrolase precursor,hydroxyisourate hydrolase,hydroxyisourate hydrolas [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	9890	10402	.	-	0	ID=11111_1#11_04064;eC_number=1.5.1.36;gene=hpaC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729827.1,similar to AA sequence:UniProtKB:Q57501,protein motif:CLUSTERS:PRK15486,protein motif:TIGRFAMs:TIGR02296,protein motif:Pfam:PF01613.12;locus_tag=11111_1#11_04064;product=4-hydroxyphenylacetate 3-monooxygenase coupling protein,4-hydroxyphenylacetate 3-monooxygenase reductase component,4-hydro [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	10420	11652	.	-	0	ID=11111_1#11_04065;eC_number=1.14.14.9;gene=hpaB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002145968.1,similar to AA sequence:UniProtKB:Q57160,protein motif:TIGRFAMs:TIGR02310,protein motif:Pfam:PF03241.7;locus_tag=11111_1#11_04065;product=4-hydroxyphenylacetate 3-monooxygenase%2C oxygenase component,4-hydroxyphenylacetate 3-monooxygenase oxygenase component,4-hydroxyphenylacetate 3-monooxy [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	11833	12273	.	-	0	ID=11111_1#11_04066;gene=hpaR;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002145969.1,similar to AA sequence:UniProtKB:O07458,protein motif:TIGRFAMs:TIGR02337,protein motif:Pfam:PF01047.16;locus_tag=11111_1#11_04066;product=homoprotocatechuate degradation operon regulator%2C HpaR,Benzoate anaerobic degradation regulator,homoprotocatechuate degradation operon regulator%2C HpaR,MarR family;prote [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	12548	13837	.	+	0	ID=11111_1#11_04067;eC_number=5.3.3.-;gene=hpaG;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729830.1,similar to AA sequence:UniProtKB:P37352,protein motif:CLUSTERS:PRK15203,protein motif:Cdd:COG0179,protein motif:TIGRFAMs:TIGR02303,protein motif:Pfam:PF01557.12;locus_tag=11111_1#11_04067;product=4-hydroxyphenylacetate degradation bifunctional isomerase/decarboxylase,Homoprotocatechuate cata [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	13834	14133	.	+	0	ID=11111_1#11_04068;eC_number=1.2.1.8;gene=hpcC_1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729831.1,similar to AA sequence:UniProtKB:Q9HTJ1,protein motif:CLUSTERS:PRK13252,protein motif:Cdd:COG3191,protein motif:TIGRFAMs:TIGR02299,protein motif:Pfam:PF00171.1;locus_tag=11111_1#11_04068;product=5-carboxymethyl-2-hydroxymuconate semialdehyde dehydrogenase,Betaine aldehyde dehydrogenase,bet [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	14127	15299	.	+	0	ID=11111_1#11_04069;eC_number=1.2.1.-;gene=hpcC_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729831.1,similar to AA sequence:UniProtKB:P23105,protein motif:CLUSTERS:PRK13252,protein motif:Cdd:COG4230,protein motif:TIGRFAMs:TIGR02299,protein motif:Pfam:PF00171.1;locus_tag=11111_1#11_04069;product=5-carboxymethyl-2-hydroxymuconate semialdehyde dehydrogenase,2-hydroxymuconic semialdehyde dehy [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	15301	16152	.	+	0	ID=11111_1#11_04070;eC_number=1.13.11.15,1.13.11.15;gene=hpcB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729832.1,similar to AA sequence:UniProtKB:Q05353,protein motif:CLUSTERS:PRK03881,protein motif:Cdd:COG0179,protein motif:TIGRFAMs:TIGR02298,protein motif:Pfam:PF02900.1;locus_tag=11111_1#11_04070;product=3%2C4-dihydroxyphenylacetate 2%2C3-dioxygenase,3%2C4-dihydroxyphenylacetate 2%2C3-d [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	16162	16542	.	+	0	ID=11111_1#11_04071;eC_number=5.3.3.10,5.3.3.10;gene=hpcD;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729833.1,similar to AA sequence:UniProtKB:Q05354,protein motif:CLUSTERS:PRK15031,protein motif:Cdd:COG3232,protein motif:Pfam:PF02962.1;locus_tag=11111_1#11_04071;product=5-carboxymethyl-2-hydroxymuconate delta-isomerase,5-carboxymethyl-2-hydroxymuconate Delta-isomerase,5-carboxymethyl-2-hy [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	16686	17489	.	+	0	ID=11111_1#11_04072;eC_number=4.2.-.-,4.2.1.80;gene=hpcG;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729834.1,similar to AA sequence:UniProtKB:Q9S156,protein motif:CLUSTERS:PRK11342,protein motif:Cdd:COG3971,protein motif:TIGRFAMs:TIGR02312,protein motif:Pfam:PF01557.12;locus_tag=11111_1#11_04072;product=2-oxo-hepta-3-ene-1%2C7-dioic acid hydratase,2-keto-4-pentenoate hydratase,2-keto-4-pen [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	17500	18291	.	+	0	ID=11111_1#11_04073;eC_number=4.1.2.-;gene=hpaI;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_216043.1,similar to AA sequence:UniProtKB:Q47098,protein motif:CLUSTERS:PRK10128,protein motif:Cdd:COG2301,protein motif:TIGRFAMs:TIGR02311,protein motif:Pfam:PF03328.8;locus_tag=11111_1#11_04073;product=4-hydroxyphenylacetate catabolism,4-hydroxy-2-oxo-heptane-1%2C7-dioate aldolase,2-keto-3-deoxy-L-rha [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	18363	19739	.	+	0	ID=11111_1#11_04074;gene=hpaX;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729836.1,similar to AA sequence:UniProtKB:P76470,protein motif:CLUSTERS:PRK11551,protein motif:Cdd:COG2814,protein motif:TIGRFAMs:TIGR02332,protein motif:Pfam:PF07690.10;locus_tag=11111_1#11_04074;product=4-hydroxyphenylacetate permease,Inner membrane transport protein RhmT,putative 3-hydroxyphenylpropionic transporte [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	19749	20645	.	+	0	ID=11111_1#11_04075;gene=hpaA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729837.1,similar to AA sequence:UniProtKB:P40408,protein motif:CLUSTERS:PRK10572,protein motif:Cdd:COG2169,protein motif:TIGRFAMs:TIGR02297,protein motif:Pfam:PF12833.1;locus_tag=11111_1#11_04075;product=4-hydroxyphenylacetate 3-monooxygenase operon regulatory protein,Bacillibactin transport regulator,DNA-binding tran [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	20659	21597	.	+	0	ID=11111_1#11_04076;eC_number=3.1.26.11,3.1.26.11;gene=rnz;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_006087376.1,similar to AA sequence:UniProtKB:P54548,protein motif:CLUSTERS:PRK00055,protein motif:Cdd:COG5212,protein motif:TIGRFAMs:TIGR02651,protein motif:Pfam:PF12706.1;locus_tag=11111_1#11_04076;product=Ribonuclease Z,Ribonuclease Z,ribonuclease Z,Low-affinity cAMP phosphodiesterase,ribon [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	23021	23326	.	-	0	ID=11111_1#11_04077;gene=cbpM;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570915.1,similar to AA sequence:UniProtKB:P63264,protein motif:CLUSTERS:PRK10265;locus_tag=11111_1#11_04077;product=chaperone-modulator protein CbpM,Chaperone modulatory protein CbpM,chaperone-modulator protein CbpM;protein_id=gnl|SC|11111_1#11_04077
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	23326	24246	.	-	0	ID=11111_1#11_04078;gene=cbpA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729840.1,similar to AA sequence:UniProtKB:P36659,protein motif:CLUSTERS:PRK10266,protein motif:Cdd:COG5407,protein motif:TIGRFAMs:TIGR02349,protein motif:Pfam:PF01556.12;locus_tag=11111_1#11_04078;product=curved DNA-binding protein,Curved DNA-binding protein,curved DNA-binding protein CbpA,Preprotein translocase subun [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	24482	24844	.	+	0	ID=11111_1#11_04079;gene=scsA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729841.1;locus_tag=11111_1#11_04079;product=copper-sensitivity suppressor membrane protein A;protein_id=gnl|SC|11111_1#11_04079
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	24893	26779	.	+	0	ID=11111_1#11_04080;eC_number=1.8.1.8;gene=scsB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729842.1,similar to AA sequence:UniProtKB:P36655,protein motif:CLUSTERS:PRK00293,protein motif:Cdd:COG4233,protein motif:Pfam:PF02683.9;locus_tag=11111_1#11_04080;product=copper-sensitivity suppressor membrane protein B,Thiol:disulfide interchange protein DsbD precursor,thiol:disulfide interchange pr [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	26776	27399	.	+	0	ID=11111_1#11_04081;gene=scsC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729843.1,similar to AA sequence:UniProtKB:O32218,protein motif:Cdd:COG1651,protein motif:Pfam:PF01323.14;locus_tag=11111_1#11_04081;product=copper-sensitivity secreted suppressor protein C,Thiol-disulfide oxidoreductase D,Protein-disulfide isomerase,DSBA-like thioredoxin domain;protein_id=gnl|SC|11111_1#11_04081
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	27389	27895	.	+	0	ID=11111_1#11_04082;gene=resA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002145985.1,similar to AA sequence:UniProtKB:P35160,protein motif:CLUSTERS:PRK03147,protein motif:TIGRFAMs:TIGR00385,protein motif:Pfam:PF08534.4;locus_tag=11111_1#11_04082;product=suppressor for copper-sensitivity D,Thiol-disulfide oxidoreductase resA,thiol-disulfide oxidoreductase,periplasmic protein thiol:disulfide ox [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	28028	29269	.	+	0	ID=11111_1#11_04083;eC_number=3.1.3.10;gene=agp;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570908.1,similar to AA sequence:UniProtKB:P19926,protein motif:CLUSTERS:PRK10173,protein motif:Pfam:PF00328.1;locus_tag=11111_1#11_04083;product=glucose-1-phosphatase/inositol phosphatase,Glucose-1-phosphatase precursor,glucose-1-phosphatase/inositol phosphatase,Histidine phosphatase superfamily (bra [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	29303	29530	.	-	0	ID=11111_1#11_04084;gene=yccJ;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_006087385.1,similar to AA sequence:UniProtKB:P0AB14,protein motif:CLUSTERS:PRK10174;locus_tag=11111_1#11_04084;product=YccJ-like protein,hypothetical protein,hypothetical protein;protein_id=gnl|SC|11111_1#11_04084
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	29551	30147	.	-	0	ID=11111_1#11_04085;gene=wrbA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729847.1,similar to AA sequence:UniProtKB:P0A8G6,protein motif:CLUSTERS:PRK03767,protein motif:TIGRFAMs:TIGR01755,protein motif:Pfam:PF03358.9;locus_tag=11111_1#11_04085;product=trp repressor binding protein,Trp repressor-binding protein,NAD(P)H:quinone oxidoreductase,NAD(P)H:quinone oxidoreductase%2C type IV,NADPH-de [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	30532	30699	.	+	0	ID=11111_1#11_04086;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_006087756.1,protein motif:Pfam:PF10685.3;locus_tag=11111_1#11_04086;product=Conidiation-specific protein 10,Stress-induced bacterial acidophilic repeat motif;protein_id=gnl|SC|11111_1#11_04086
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	30836	31474	.	+	0	ID=11111_1#11_04087;gene=rutR;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729849.1,similar to AA sequence:UniProtKB:P0ACU2,protein motif:CLUSTERS:PRK15008,protein motif:TIGRFAMs:TIGR03613,protein motif:Pfam:PF08362.5;locus_tag=11111_1#11_04087;product=transcriptional regulator,Rut operon repressor,HTH-type transcriptional regulator RutR,pyrimidine utilization regulatory protein R,YcdC-like  [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	31471	31866	.	-	0	ID=11111_1#11_04088;inference=ab initio prediction:Prodigal:2.60,protein motif:Cdd:COG3755,protein motif:Pfam:PF07007.6;locus_tag=11111_1#11_04088;product=Uncharacterized protein conserved in bacteria,Protein of unknown function (DUF1311);protein_id=gnl|SC|11111_1#11_04088
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	31925	35887	.	-	0	ID=11111_1#11_04089;gene=putA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002243116.1,similar to AA sequence:UniProtKB:P09546,protein motif:CLUSTERS:PRK11809,protein motif:Cdd:COG4230,protein motif:TIGRFAMs:TIGR01238,protein motif:Pfam:PF01619.12;locus_tag=11111_1#11_04089;product=trifunctional transcriptional regulator/proline dehydrogenase/pyrroline-5-carboxylate dehydrogenase,Bifunctional p [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	36309	37817	.	+	0	ID=11111_1#11_04090;gene=putP;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002145994.1,similar to AA sequence:UniProtKB:P07117,protein motif:CLUSTERS:PRK15419,protein motif:Cdd:COG4145,protein motif:TIGRFAMs:TIGR02121,protein motif:Pfam:PF00474.11;locus_tag=11111_1#11_04090;product=sodium/proline symporter,Propionate transporter,proline:sodium symporter PutP,Na+/panthothenate symporter,sodium/p [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	38435	38731	.	+	0	ID=11111_1#11_04091;inference=ab initio prediction:Prodigal:2.60;locus_tag=11111_1#11_04091;product=hypothetical protein;protein_id=gnl|SC|11111_1#11_04091
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	38710	39498	.	+	0	ID=11111_1#11_04092;gene=phoH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729855.1,similar to AA sequence:UniProtKB:P0A9K1,protein motif:CLUSTERS:PRK10536,protein motif:Cdd:COG1875,protein motif:Pfam:PF02562.10;locus_tag=11111_1#11_04092;product=phosphate starvation-inducible protein PsiH,Phosphate starvation-inducible protein psiH,hypothetical protein,Predicted ATPase related to phosphate  [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	39604	40485	.	-	0	ID=11111_1#11_04093;gene=ybbH_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570898.1,similar to AA sequence:UniProtKB:Q45581,protein motif:CLUSTERS:PRK11337,protein motif:Cdd:COG2222,protein motif:TIGRFAMs:TIGR03127,protein motif:Pfam:PF01418.11;locus_tag=11111_1#11_04093;product=putative transcriptional regulator,Uncharacterized HTH-type transcriptional regulator ybbH,DNA-binding transcrip [...]
+ERS111111|SC|contig000020	Infernal:1.1	ncRNA	40486	40770	.	+	0	ID=11111_1#11_04094;inference=COORDINATES:profile:Infernal:1.1;locus_tag=11111_1#11_04094;product=STnc500
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	40771	41673	.	-	0	ID=11111_1#11_04095;gene=yidK;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002215057.1,similar to AA sequence:UniProtKB:P31448,protein motif:CLUSTERS:PRK10484,protein motif:Cdd:COG4146,protein motif:TIGRFAMs:TIGR00813,protein motif:Pfam:PF00474.11;locus_tag=11111_1#11_04095;product=sodium-glucose/galactose cotransporter,Uncharacterized symporter yidK,putative transporter,Predicted symporter,tra [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	41741	42268	.	-	0	ID=11111_1#11_04096;gene=sglT;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002215057.1,similar to AA sequence:UniProtKB:P96169,protein motif:CLUSTERS:PRK10484,protein motif:Cdd:COG4146,protein motif:TIGRFAMs:TIGR00813,protein motif:Pfam:PF00474.11;locus_tag=11111_1#11_04096;product=sodium-glucose/galactose cotransporter,Na(+)/glucose symporter,putative transporter,Predicted symporter,transporte [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	42605	43285	.	-	0	ID=11111_1#11_04097;eC_number=5.1.3.9;gene=nanE;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570896.2,similar to AA sequence:UniProtKB:P60668,protein motif:CLUSTERS:PRK01130,protein motif:Pfam:PF04131.8;locus_tag=11111_1#11_04097;product=N-acetylmannosamine-6-phosphate 2-epimerase,Putative N-acetylmannosamine-6-phosphate 2-epimerase,N-acetylmannosamine-6-phosphate 2-epimerase,Putative N-acet [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	43804	44946	.	+	0	ID=11111_1#11_04098;eC_number=5.1.3.24;gene=nanM;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570895.1,similar to AA sequence:UniProtKB:P44544,protein motif:CLUSTERS:PRK14131,protein motif:TIGRFAMs:TIGR03547,protein motif:Pfam:PF01344.19;locus_tag=11111_1#11_04098;product=N-acetylneuraminic acid mutarotase,N-acetylneuraminate epimerase precursor,N-acetylneuraminic acid mutarotase,mutatrotase [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	44992	45684	.	+	0	ID=11111_1#11_04099;gene=yiiy;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_216068.1,similar to AA sequence:UniProtKB:Q934G3,protein motif:CLUSTERS:PRK09980,protein motif:Cdd:COG3203,protein motif:Pfam:PF06178.7;locus_tag=11111_1#11_04099;product=outer membrane protein,Oligogalacturonate-specific porin kdgM precursor,outer membrane porin L,Outer membrane protein (porin),Oligogalacturonate-specif [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	45967	47247	.	+	0	ID=11111_1#11_04100;gene=nanT_3;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570893.1,similar to AA sequence:UniProtKB:P41036,protein motif:CLUSTERS:PRK12307,protein motif:Cdd:COG2814,protein motif:TIGRFAMs:TIGR00891,protein motif:Pfam:PF00083.18;locus_tag=11111_1#11_04100;product=putative sialic acid transporter,Sialic acid permease,putative sialic acid transporter,Arabinose efflux permease [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	47261	48364	.	+	0	ID=11111_1#11_04101;eC_number=1.1.1.292;gene=yjhC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_216069.1,similar to AA sequence:UniProtKB:Q2I8V6,protein motif:CLUSTERS:PRK11579,protein motif:Pfam:PF01408.16;locus_tag=11111_1#11_04101;product=dehydrogenase-like protein,1%2C5-anhydro-D-fructose reductase,putative oxidoreductase,Oxidoreductase family%2C NAD-binding Rossmann fold;protein_id=gnl|SC|1 [...]
+ERS111111|SC|contig000020	Aragorn:1.2.36	tRNA	48701	48788	.	-	0	ID=11111_1#11_04102;inference=COORDINATES:profile:Aragorn:1.2.36;locus_tag=11111_1#11_04102;product=tRNA-Ser(gga)
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	49024	49962	.	+	0	ID=11111_1#11_04103;eC_number=1.1.1.79;gene=ghrA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729856.1,similar to AA sequence:UniProtKB:Q8ZQ30,protein motif:CLUSTERS:PRK15469,protein motif:Cdd:COG1932,protein motif:TIGRFAMs:TIGR01327,protein motif:Pfam:PF02826.13;locus_tag=11111_1#11_04103;product=2-hydroxyacid dehydrogenase,Glyoxylate/hydroxypyruvate reductase A,bifunctional glyoxylate/hydr [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	50047	50784	.	+	0	ID=11111_1#11_04104;eC_number=3.1.3.-;gene=ycdX;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570887.1,similar to AA sequence:UniProtKB:P75914,protein motif:CLUSTERS:PRK09248,protein motif:Cdd:COG1387,protein motif:TIGRFAMs:TIGR01856,protein motif:Pfam:PF02811.13;locus_tag=11111_1#11_04104;product=putative hydrolase,Probable phosphatase YcdX,putative hydrolase,Histidinol phosphatase and relat [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	50808	51362	.	+	0	ID=11111_1#11_04105;gene=ycdY;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002146908.1,similar to AA sequence:UniProtKB:P75915,protein motif:CLUSTERS:PRK11621,protein motif:Cdd:COG3381,protein motif:Pfam:PF02613.9;locus_tag=11111_1#11_04105;product=chaperone%2C TorD family,Chaperone protein YcdY,twin-argninine leader-binding protein DmsD,Uncharacterized component of anaerobic dehydrogenases,Nit [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	51451	51945	.	+	0	ID=11111_1#11_04106;gene=ycdZ;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P75916,protein motif:Pfam:PF06496.5;locus_tag=11111_1#11_04106;product=Inner membrane protein ycdZ,Protein of unknown function (DUF1097);protein_id=gnl|SC|11111_1#11_04106
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	51983	52816	.	-	0	ID=11111_1#11_04107;gene=csgG;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729860.1,protein motif:CLUSTERS:PRK15184,protein motif:Pfam:PF03783.8;locus_tag=11111_1#11_04107;product=assembly/transport component in curli production,curli production assembly/transport protein CsgG,Curli production assembly/transport component CsgG;protein_id=gnl|SC|11111_1#11_04107
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	52843	53259	.	-	0	ID=11111_1#11_04108;gene=csgF;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729861.1,protein motif:CLUSTERS:PRK10050,protein motif:Pfam:PF10614.3;locus_tag=11111_1#11_04108;product=assembly/transport component in curli production,curli assembly protein CsgF,Curli production assembly/transport component CsgF;protein_id=gnl|SC|11111_1#11_04108
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	53286	53681	.	-	0	ID=11111_1#11_04109;gene=csgE;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729862.1,protein motif:CLUSTERS:PRK10386,protein motif:Pfam:PF10627.3;locus_tag=11111_1#11_04109;product=assembly/transport component in curli production,curli assembly protein CsgE,Curli assembly protein CsgE;protein_id=gnl|SC|11111_1#11_04109
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	53710	54336	.	-	0	ID=11111_1#11_04110;gene=csgD;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729863.1,similar to AA sequence:UniProtKB:P52106,protein motif:CLUSTERS:PRK10100,protein motif:Cdd:COG2909,protein motif:TIGRFAMs:TIGR03020,protein motif:Pfam:PF00196.13;locus_tag=11111_1#11_04110;product=regulatory protein,CsgBAC operon transcriptional regulatory protein,DNA-binding transcriptional regulator CsgD,ATP [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	55092	55547	.	+	0	ID=11111_1#11_04111;gene=csgB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729864.1,similar to AA sequence:UniProtKB:P0A1E9,protein motif:CLUSTERS:PRK10101,protein motif:Pfam:PF07012.6;locus_tag=11111_1#11_04111;product=nucleation component of curlin monomers,Fimbrin SEF17 minor subunit,curlin minor subunit CsgB,Curlin associated repeat;protein_id=gnl|SC|11111_1#11_04111
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	55589	56044	.	+	0	ID=11111_1#11_04112;gene=csgA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729865.1,similar to AA sequence:UniProtKB:P0A1E7,protein motif:CLUSTERS:PRK10051,protein motif:Pfam:PF07012.6;locus_tag=11111_1#11_04112;product=major curlin subunit,Fimbrin SEF17,major curlin subunit,Curlin associated repeat;protein_id=gnl|SC|11111_1#11_04112
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	56106	56432	.	+	0	ID=11111_1#11_04113;gene=csgC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_007470940.1,similar to AA sequence:UniProtKB:P0A1Z9,protein motif:CLUSTERS:PRK10102,protein motif:Pfam:PF10610.3;locus_tag=11111_1#11_04113;product=curli assembly protein CsgC,Curli assembly protein CsgC precursor,curli assembly protein CsgC,Thin aggregative fimbriae synthesis protein;protein_id=gnl|SC|11111_1#11_04113
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	56564	56884	.	+	0	ID=11111_1#11_04114;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_006087410.1;locus_tag=11111_1#11_04114;product=Fimbrial protein;protein_id=gnl|SC|11111_1#11_04114
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	56972	57511	.	+	0	ID=11111_1#11_04115;eC_number=3.5.1.-;gene=ymdB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_006087411.1,similar to AA sequence:UniProtKB:P0A8D6,protein motif:CLUSTERS:PRK00431,protein motif:Pfam:PF01661.15;locus_tag=11111_1#11_04115;product=Macro domain%2C possibly ADP-ribose binding module,O-acetyl-ADP-ribose deacetylase,RNase III inhibitor,Macro domain;protein_id=gnl|SC|11111_1#11_04115
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	57450	58934	.	+	0	ID=11111_1#11_04116;eC_number=2.7.8.-;gene=ybhO_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570877.2,similar to AA sequence:UniProtKB:P0AA84,protein motif:CLUSTERS:PRK01642,protein motif:TIGRFAMs:TIGR04265;locus_tag=11111_1#11_04116;product=phospholipase,Putative cardiolipin synthase YbhO,cardiolipin synthetase,cardiolipin synthase;protein_id=gnl|SC|11111_1#11_04116
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	58951	60105	.	-	0	ID=11111_1#11_04117;eC_number=2.1.-.-;gene=mdoC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570876.1,similar to AA sequence:UniProtKB:P75920,protein motif:CLUSTERS:PRK03854,protein motif:Cdd:COG3274,protein motif:Pfam:PF01757.16;locus_tag=11111_1#11_04117;product=glucans biosynthesis protein,Glucans biosynthesis protein C,glucans biosynthesis protein,Uncharacterized protein conserved in bac [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	60359	61912	.	+	0	ID=11111_1#11_04118;gene=mdoG;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570875.1,similar to AA sequence:UniProtKB:P33136,protein motif:CLUSTERS:PRK13274,protein motif:Cdd:COG3131,protein motif:Pfam:PF04349.6;locus_tag=11111_1#11_04118;product=glucan biosynthesis protein G,Glucans biosynthesis protein G precursor,glucan biosynthesis protein G,Periplasmic glucans biosynthesis protein,Peripl [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	61905	64448	.	+	0	ID=11111_1#11_04119;eC_number=2.4.1.-;gene=mdoH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_216085.1,similar to AA sequence:UniProtKB:P62517,protein motif:CLUSTERS:PRK05454,protein motif:Pfam:PF00535.20;locus_tag=11111_1#11_04119;product=glucosyltransferase MdoH,Glucans biosynthesis glucosyltransferase H,glucosyltransferase MdoH,Glycosyl transferase family 2;protein_id=gnl|SC|11111_1#11_04119
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	64522	64749	.	+	0	ID=11111_1#11_04120;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729872.1,protein motif:CLUSTERS:PRK10175,protein motif:Cdd:COG5645,protein motif:Pfam:PF07119.6;locus_tag=11111_1#11_04120;product=lipoprotein,lipoprotein,Predicted periplasmic lipoprotein,Protein of unknown function (DUF1375);protein_id=gnl|SC|11111_1#11_04120
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	64750	65124	.	-	0	ID=11111_1#11_04121;gene=msyB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729873.1,protein motif:CLUSTERS:PRK11467;locus_tag=11111_1#11_04121;product=acidic protein MsyB,secY/secA suppressor protein;protein_id=gnl|SC|11111_1#11_04121
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	65206	66420	.	-	0	ID=11111_1#11_04122;gene=yceE;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729874.1,similar to AA sequence:UniProtKB:O31762,protein motif:CLUSTERS:PRK09874,protein motif:Cdd:COG2814,protein motif:TIGRFAMs:TIGR00880,protein motif:Pfam:PF07690.10;locus_tag=11111_1#11_04122;product=membrane transport protein,Bacillibactin exporter,drug efflux system protein MdtG,Arabinose efflux permease,multid [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	66575	67495	.	-	0	ID=11111_1#11_04123;eC_number=2.3.1.-;gene=htrB_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729875.1,similar to AA sequence:UniProtKB:P0ACV0,protein motif:CLUSTERS:PRK06860,protein motif:Cdd:COG1560,protein motif:TIGRFAMs:TIGR02207,protein motif:Pfam:PF03279.7;locus_tag=11111_1#11_04123;product=lipid A biosynthesis lauroyl acyltransferase,Lipid A biosynthesis lauroyl acyltransferase,lipid [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	67715	68767	.	+	0	ID=11111_1#11_04124;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_006087420.1,protein motif:CLUSTERS:PRK00142,protein motif:Cdd:COG2210,protein motif:Pfam:PF00581.14;locus_tag=11111_1#11_04124;product=Rhodanese-like sulfurtransferase,putative rhodanese-related sulfurtransferase,Uncharacterized conserved protein,Rhodanese-like domain;protein_id=gnl|SC|11111_1#11_04124
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	68819	69394	.	-	0	ID=11111_1#11_04125;gene=yceI_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_006087865.1,similar to AA sequence:UniProtKB:P0A8X2,protein motif:CLUSTERS:PRK03757,protein motif:Cdd:COG2353,protein motif:Pfam:PF04264.7;locus_tag=11111_1#11_04125;product=YceI protein,hypothetical protein,hypothetical protein,Uncharacterized conserved protein,YceI-like domain;protein_id=gnl|SC|11111_1#11_04125
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	69391	69795	.	-	0	ID=11111_1#11_04126;gene=yceJ;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_005212184.1,similar to AA sequence:UniProtKB:P75925,protein motif:CLUSTERS:PRK11513,protein motif:Pfam:PF00033.13;locus_tag=11111_1#11_04126;product=cytochrome b561-like protein 2,Cytochrome b561 homolog 2,cytochrome b561,Cytochrome b(N-terminal)/b6/petB;protein_id=gnl|SC|11111_1#11_04126
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	70226	70339	.	-	0	ID=11111_1#11_04127;gene=yceO;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_216094.1,similar to AA sequence:UniProtKB:P64442,protein motif:Pfam:PF10968.2;locus_tag=11111_1#11_04127;product=inner membrane protein,hypothetical protein,Protein of unknown function (DUF2770);protein_id=gnl|SC|11111_1#11_04127
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	70380	71498	.	-	0	ID=11111_1#11_04128;eC_number=1.5.3.1,1.5.3.-;gene=solA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729880.1,similar to AA sequence:UniProtKB:P40874,protein motif:CLUSTERS:PRK11259,protein motif:Cdd:COG4121,protein motif:TIGRFAMs:TIGR01377,protein motif:Pfam:PF01266.18;locus_tag=11111_1#11_04128;product=sarcosine oxidase,N-methyl-L-tryptophan oxidase,N-methyltryptophan oxidase,Uncharacteriz [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	71611	71865	.	-	0	ID=11111_1#11_04129;gene=bssS;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570863.1,protein motif:CLUSTERS:PRK12301;locus_tag=11111_1#11_04129;product=biofilm formation regulatory protein BssS,biofilm formation regulatory protein BssS;protein_id=gnl|SC|11111_1#11_04129
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	72155	72418	.	-	0	ID=11111_1#11_04130;gene=dinI_3;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729882.1,similar to AA sequence:UniProtKB:P0ABR1,protein motif:CLUSTERS:PRK10597,protein motif:Pfam:PF06183.7;locus_tag=11111_1#11_04130;product=damage-inducible protein,DNA-damage-inducible protein I,DNA damage-inducible protein I,DinI-like family;protein_id=gnl|SC|11111_1#11_04130
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	72492	73538	.	-	0	ID=11111_1#11_04131;eC_number=3.5.2.3,3.5.2.3;gene=pyrC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729883.1,similar to AA sequence:UniProtKB:P06204,protein motif:CLUSTERS:PRK05451,protein motif:Cdd:COG0418,protein motif:TIGRFAMs:TIGR00856,protein motif:Pfam:PF01979.14;locus_tag=11111_1#11_04131;product=dihydroorotase,Dihydroorotase,dihydroorotase,Dihydroorotase,dihydroorotase%2C homodimeri [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	73642	74268	.	-	0	ID=11111_1#11_04132;gene=yceB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_007470922.1,similar to AA sequence:UniProtKB:P0AB26,protein motif:CLUSTERS:PRK10598,protein motif:Pfam:PF07273.6;locus_tag=11111_1#11_04132;product=lipoprotein,Uncharacterized lipoprotein yceB precursor,lipoprotein,Protein of unknown function (DUF1439);protein_id=gnl|SC|11111_1#11_04132
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	74326	74973	.	-	0	ID=11111_1#11_04133;gene=grxB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729885.1,similar to AA sequence:UniProtKB:P0AC59,protein motif:CLUSTERS:PRK10387,protein motif:TIGRFAMs:TIGR02182,protein motif:Pfam:PF04399.7;locus_tag=11111_1#11_04133;product=glutaredoxin,Glutaredoxin-2,glutaredoxin 2,glutaredoxin%2C GrxB family,Glutaredoxin 2%2C C terminal domain;protein_id=gnl|SC|11111_1#11_04133
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	75037	76245	.	-	0	ID=11111_1#11_04134;gene=mdtH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570858.1,similar to AA sequence:UniProtKB:P69367,protein motif:CLUSTERS:PRK11646,protein motif:Cdd:COG2814,protein motif:TIGRFAMs:TIGR00880,protein motif:Pfam:PF07690.10;locus_tag=11111_1#11_04134;product=multidrug resistance protein MdtH,Multidrug resistance protein MdtH,multidrug resistance protein MdtH,Arabinose ef [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	76482	77066	.	+	0	ID=11111_1#11_04135;eC_number=2.3.1.-;gene=rimJ;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729887.1,similar to AA sequence:UniProtKB:P96579,protein motif:CLUSTERS:PRK10809,protein motif:TIGRFAMs:TIGR03585,protein motif:Pfam:PF00583.18;locus_tag=11111_1#11_04135;product=ribosomal-protein-alanine acetyltransferase,Putative ribosomal N-acetyltransferase YdaF,ribosomal-protein-S5-alanine N-ace [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	77102	77749	.	+	0	ID=11111_1#11_04136;gene=yceH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P29217,protein motif:CLUSTERS:PRK11239,protein motif:Cdd:COG3132,protein motif:Pfam:PF04337.6;locus_tag=11111_1#11_04136;product=G20.3,hypothetical protein,Uncharacterized protein conserved in bacteria,Protein of unknown function%2C DUF480;protein_id=gnl|SC|11111_1#11_04136
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	77751	78674	.	+	0	ID=11111_1#11_04137;gene=mviM;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729889.1,similar to AA sequence:UniProtKB:P75931,protein motif:CLUSTERS:PRK11579,protein motif:Cdd:COG3132,protein motif:Pfam:PF01408.16;locus_tag=11111_1#11_04137;product=virulence factor MviM,Virulence factor mviM homolog,putative oxidoreductase,Uncharacterized protein conserved in bacteria,Oxidoreductase family%2C  [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	78939	80513	.	+	0	ID=11111_1#11_04138;gene=mviN;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002146873.1,similar to AA sequence:UniProtKB:P37169,protein motif:TIGRFAMs:TIGR01695,protein motif:Pfam:PF03023.8;locus_tag=11111_1#11_04138;product=integral membrane protein MviN,hypothetical protein,integral membrane protein MviN,MviN-like protein;protein_id=gnl|SC|11111_1#11_04138
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	80595	81017	.	-	0	ID=11111_1#11_04139;gene=flgN;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729891.1,protein motif:CLUSTERS:PRK15459,protein motif:Pfam:PF05130.6;locus_tag=11111_1#11_04139;product=flagella synthesis protein FlgN,flagella synthesis chaperone protein FlgN,FlgN protein;protein_id=gnl|SC|11111_1#11_04139
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	81022	81315	.	-	0	ID=11111_1#11_04140;gene=flgM;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729892.1,similar to AA sequence:UniProtKB:P26477,protein motif:CLUSTERS:PRK10810,protein motif:TIGRFAMs:TIGR03824,protein motif:Pfam:PF04316.7;locus_tag=11111_1#11_04140;product=negative regulator of flagellin synthesis (anti-sigma factor),Anti-sigma-28 factor,anti-sigma28 factor FlgM,flagellar biosynthesis anti-sigma [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	81407	82066	.	-	0	ID=11111_1#11_04141;gene=flgA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729893.1,protein motif:CLUSTERS:PRK07018,protein motif:TIGRFAMs:TIGR03170,protein motif:Pfam:PF08666.6;locus_tag=11111_1#11_04141;product=flagellar basal body P-ring protein FlgA,flagellar basal body P-ring biosynthesis protein FlgA,flagella basal body P-ring formation protein FlgA,SAF domain;protein_id=gnl|SC|11111_1 [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	82223	82639	.	+	0	ID=11111_1#11_04142;gene=flgB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729894.1,similar to AA sequence:UniProtKB:P16437,protein motif:CLUSTERS:PRK05680,protein motif:Cdd:COG1815,protein motif:TIGRFAMs:TIGR01396,protein motif:Pfam:PF00460.14;locus_tag=11111_1#11_04142;product=flagellar basal-body rod protein FlgB,Putative proximal rod protein,flagellar basal body rod protein FlgB,Flagella [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	82643	83047	.	+	0	ID=11111_1#11_04143;gene=flgC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729895.1,similar to AA sequence:UniProtKB:P0A1I7,protein motif:CLUSTERS:PRK05681,protein motif:Cdd:COG4786,protein motif:TIGRFAMs:TIGR01395,protein motif:Pfam:PF06429.7;locus_tag=11111_1#11_04143;product=flagellar basal-body rod protein FlgC,Putative proximal rod protein,flagellar basal body rod protein FlgC,Flagellar [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	83059	83757	.	+	0	ID=11111_1#11_04144;gene=flgD;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729896.1,similar to AA sequence:UniProtKB:P0A1I9,protein motif:CLUSTERS:PRK06655,protein motif:Cdd:COG1843,protein motif:Pfam:PF03963.8;locus_tag=11111_1#11_04144;product=flagellar hook formation protein FlgD,Basal-body rod modification protein flgD,flagellar basal body rod modification protein,Flagellar hook capping  [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	83784	84995	.	+	0	ID=11111_1#11_04145;gene=flgE;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002146866.1,similar to AA sequence:UniProtKB:P0A1J1,protein motif:CLUSTERS:PRK05682,protein motif:Cdd:COG1749,protein motif:TIGRFAMs:TIGR03506,protein motif:Pfam:PF07559.8;locus_tag=11111_1#11_04145;product=flagellar hook protein FlgE,Flagellar hook protein flgE,flagellar hook protein FlgE,Flagellar hook protein FlgE,fla [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	85016	85771	.	+	0	ID=11111_1#11_04146;gene=flgF;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729898.1,similar to AA sequence:UniProtKB:P16323,protein motif:CLUSTERS:PRK12640,protein motif:Cdd:COG4787,protein motif:TIGRFAMs:TIGR03506,protein motif:Pfam:PF06429.7;locus_tag=11111_1#11_04146;product=flagellar basal-body rod protein FlgF,Putative proximal rod protein,flagellar basal body rod protein FlgF,Flagellar [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	85785	86567	.	+	0	ID=11111_1#11_04147;gene=flgG;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729899.1,similar to AA sequence:UniProtKB:P0A1J3,protein motif:CLUSTERS:PRK12694,protein motif:Cdd:COG4786,protein motif:TIGRFAMs:TIGR02488,protein motif:Pfam:PF06429.7;locus_tag=11111_1#11_04147;product=flagellar basal-body rod protein FlgG (distal rod protein),Distal rod protein,flagellar basal body rod protein FlgG [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	86655	87320	.	+	0	ID=11111_1#11_04148;gene=flgH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729900.1,similar to AA sequence:UniProtKB:Q9PPM0,protein motif:CLUSTERS:PRK00249,protein motif:Pfam:PF02107.10;locus_tag=11111_1#11_04148;product=flagellar L-ring protein,Basal body L-ring protein,flagellar basal body L-ring protein,Flagellar L-ring protein;protein_id=gnl|SC|11111_1#11_04148
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	87332	88429	.	+	0	ID=11111_1#11_04149;gene=flgI;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729901.1,similar to AA sequence:UniProtKB:Q9PMJ8,protein motif:CLUSTERS:PRK05303,protein motif:Cdd:COG1706,protein motif:Pfam:PF02119.10;locus_tag=11111_1#11_04149;product=flagellar P-ring protein,Basal body P-ring protein,flagellar basal body P-ring protein,Flagellar basal-body P-ring protein,Flagellar P-ring protein [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	88429	89379	.	+	0	ID=11111_1#11_04150;eC_number=3.2.1.-;gene=flgJ;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729902.1,similar to AA sequence:UniProtKB:P15931,protein motif:CLUSTERS:PRK05684,protein motif:Cdd:COG3951,protein motif:TIGRFAMs:TIGR02541,protein motif:Pfam:PF01832.14;locus_tag=11111_1#11_04150;product=flagellar protein FlgJ,Peptidoglycan hydrolase flgJ,flagellar rod assembly protein/muramidase Fl [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	89444	91105	.	+	0	ID=11111_1#11_04151;gene=flgK;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729903.1,similar to AA sequence:UniProtKB:P0A1J5,protein motif:CLUSTERS:PRK08147,protein motif:Cdd:COG1749,protein motif:TIGRFAMs:TIGR02492,protein motif:Pfam:PF06429.7;locus_tag=11111_1#11_04151;product=flagellar hook-associated protein 1,Flagellar hook-associated protein 1,flagellar hook-associated protein FlgK,Flag [...]
+ERS111111|SC|contig000020	Prodigal:2.60	CDS	91120	92073	.	+	0	ID=11111_1#11_04152;gene=flgL;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729904.1,similar to AA sequence:UniProtKB:P16326,protein motif:CLUSTERS:PRK08027,protein motif:Cdd:COG3951,protein motif:TIGRFAMs:TIGR02550,protein motif:Pfam:PF00669.14;locus_tag=11111_1#11_04152;product=flagellar hook-associated protein 3,Hook-filament junction protein,flagellar hook-associated protein FlgL,Rod bind [...]
+ERS111111|SC|contig000020	Infernal:1.1	ncRNA	92162	92255	.	+	0	ID=11111_1#11_04153;inference=COORDINATES:profile:Infernal:1.1;locus_tag=11111_1#11_04153;product=STnc490k
+##FASTA
+>ERS111111|SC|contig000020
+TATCCGGGCAGCCCGTTTACGGGCCGTAAGTAACGAAGTTTGATGCAAATGTCAGATCGT
+ATGCGCCTGTTAGGGCGCGGCTGGTAAGAGAGCCTTACAGGCGCATCAGAAAAACCTCCG
+GCTATGCCGGAGGATATTTATTTTTTGGGCGCTTAGAATATAGTTAGAAAATACTGCTAG
+AACATTTCCCGAAAAAACGGACTACGTGGGTTTTTAGTTTCTTTTCGTTTCTTGATGTGT
+CTATTTATTGAAGATGTAGACCATTCTGGGAGGTGAAGGATGCCCCATCTCTTTCAGAAT
+AATGTTGGTATATTCGACAACAGGGCCTCTTGGATGATTTTCTTCTTTATCCTGAAGATG
+GGTCAGTGCATGTACAACTTCATGAGTAAATGAACGTTTTGTGTCAAAAAGTTGTCTTCC
+TTCGTTACTTTCATAATGTTCGGTATATGAATCATCAGAATCGTCCAGATTGAGACAAAT
+AACTTTCCTGCCTTCTGAAAGTTTGAAGTGTTCCTGAGCCACGGTAGTTTCAAAGGCTTC
+GCCTGCCCCCGGTAGCCAGCGCTGCTCCACATCATGTAGTTCTTTTTCATATGCGTAATT
+CATCAGTCTGCGGAATGTTTCGCTTTGGGTATACGCATTTTGAAGTACGGAGGATAGTTC
+ATCGTAGCATTCGTCATAAGTGTCGTCATCAATTTCTGTATCAGGGTCTATTCCACCCGC
+GCCTGAGATAAGGTACTCCACCACACACTCTGGCTCCAGACGGAATTCACTGTTTATGGC
+AAGGCTGTCATGAGCAAGGCGTAGCCGTGAGGGGTTTGGTGCATGTTCGGGAATATCGGG
+GAAAACAGGTGTATCTGCGGTATTTAATCCATATGTGGATACTCCGCTTTGAGGTATTAA
+TCTGTAGGTGACCGGAAGCATAATTTCTTATTCCCTGACTGCAATTATGAATTATTTAAA
+TTAAACAATAATAGTGTTTTTAGTTAATGTGCCACATACAGATAACGCTATGATTCAGGC
+AAAACCAACAAGTAATACGCTGAAAATGTTATTTATAATGAAGCTTAGGGGCGGGGTTTG
+TTTAATAAAACAAGGAGGGCTTCTGTTTGAATACTTCTTGTTTATAAAATCCCTTTATCT
+CGACTAAAATATTGGATGGGGGAAAAGCGTTTTATCATTGTAATCCGGGAGTGGAGTAGG
+GGTATGTTGACTACCTGTCAGATCGGCTCCTGTTAATGCTTTCGCTAAAGTTAGAGCTAT
+CTTATCTAATTTTGCACCATTTAGTTTGGTGTCAGTTAAGTCTGAGCCGAATAGAATTGC
+AGCGGTTAAGTTTACACCGGACATGTTAGCGCAAGTCAGGTCTGCGTGAGTCAGGTTTGC
+TTTAGTCATGTTTGAACCCATTAGATTTACAGCGTGTAGATTTGCACAACACAAATTTGC
+ACCGACTAAATTTACATTAGATAGATTTGTTTCACGGAAATCTGCATTAGAAGCATCTAT
+GTCTGACAAATCTTCTCCCTGGAAATCTTTATGAGCTAGGTTTACTCCGCGCAGGTTCAG
+AAAACCATCCTCGGTTATAAGTGAATCAGGCTGTTGTATATTATGTTTACTCATCAGTTT
+TAACATTCGACATACATTCCAAAATCTCTCACTATCAACTTTATCCGTTACAGTTTTTCC
+ATTTTTGTTCACTTCAATAGTGACCATGGGATCCGTATTTTCGGATGCTGCTCCACATGA
+CAGACATATAGTACACCCGTTGACATCCTCCAGAAATATTTTATTACCATCGAAGAAAGC
+ATCTTTATTTACATATAATAATGATGTGGTCAGTTTTCCAATTACCTCCCAAAAGCATCT
+TTCATTGCTTCTTCTTACTCCACCACAGGTAAAAAAATTGACAAACCATTCCAGTATACC
+GCGTGGTGAAGTTGCACTTTTCATTGCTTCTTTCGTACCGGTACCGGCCGCATGCAAATA
+TCTTAATATATTTTCTGGGGACGCGTTAGTTATTGGCATTTTGATTCCTTCTTATGGAAG
+TGAGTCGATTCCTTATAGGTGTGAGTCAAGTCACATTTATACAAGGAATAATAAAAAAAG
+ATATAAGGTGATTTTTGTATTTAGAGCTTTGGTTGAGTATGAGTAGGTGTCTGGCATCTG
+ACAAGAGGCGATGATATCTGCCATAACGGTGAAGCAGAAGTAATATCACCCGTGAATAAT
+AAGATTATCCTTGCTATTGACTTCCTTCATTTCCAGCATAGCTTACGCCTCATCATCATT
+GAATAAACAATTAAGTTTGTTGAGCGAAAATTTACTTAAAGAAAAAATAATAAGCTTTAA
+TATTTTTTGCAGCATTTCAACTTATAGCAGAATAAATGTGCGTAGATGGCGTAAAAACCT
+GATGAGCAGGAATACTTACACACTAAAAATGCTCCCGCAAACAGAGCAAAATGAGAAAGA
+GGAACGTTTGCCCTTTACATTTCCACCAAAGATTCTGGTTTTGTCTCTTGTAAAGGGCAT
+ACGTATCGCGTTTTATCTCATTAAGAAAGTATGTTGACGTATTAAATTATGCATAATGCT
+CTTTCAATTGCTTCACGTTTGAAATGAATAATTCAAAACCAGTGAGCGCCTCTTCTTCGG
+TACTGGTTTGCGGCAAGCGATAAAGCGCCACTAAAGCAGTATTGTCTGCATCAGCGCCGA
+TAGTGACGGCGCTGGCGTAGTTAAGACGTAAAAAATGCTGCAAAGTCAGAGTGTCGTCAG
+GCAGTGGCATAAAGGGACAGCACATTTCCAGTGTATGATCGGATTCATTAAAATAAACCT
+GTATCCCATCATCAATGATAAGCAGTGGCTCATCTTCTGGCGCATCCAGGCCTAACGCGT
+CATATAAACGATTTAATAGACTTTCCATATAGTTACCTCAAGACTCAAGATGTGATTAAT
+GAAGAAATACCTTTTACTGACTGCCAAATATTTTCATCCCCAACTCGTTTTTGATAGGAA
+AGATTGAGCACCTCTGGCGATAAATTTTTCATTACTTTGTTTCCCGCCCCGCCCGTATTT
+TGTTTCTGAATCTCCAGGTTACCGCTATTCAGTAATACTTTTTGGAAAATTTTCTGTCCA
+CCGCTATCCGGAAGACTACCAGGCGCACTTAACATATGGGTCTGATGGAAAGAAATGAGC
+TCTCGCTTGATTTCTGAATCCATCATCCCTGTACGATCTTTGCCGCTTTTACAATTCCAG
+GCGGGCACCGCGTCAATTTCATGGGCTAACATGGCAAGGCGTTGTGCGAGTTTATAGGGT
+TCGCCGCCATCTTTATGATGTTGGTTATTTTTCCAGATATCCTTAATCTGGCGCGCTAAT
+GTATTGACGACCTCATAATTATCCGGGTATTGCGCCAGCCATTCGCCAACCCAGCCACCT
+GGTCTGGCTTCAGGGCGTAAATCATTGCCTAATAACTGATGTAGCGCTTCGGCATTATAG
+CTATCTGATGCTTTAAGGCCAAAGCCGAGCTTGAGCGCCAGCTCATTAACACCCACATTA
+AATGCGGCGACGTCCGGTTTTATTTTTACCGTCTGTAGATCGCCATCTTTATTGCGGATT
+TTTAAATGAATCATTTTTCCCGGCTGGGTCAACGATTGCCATGCGCGCATTTGATCCTCG
+ACCATAGTTCCCTCTTTGCCGAAAATATTCGACGCGGTGAGTAACCCGACGGATACCAGT
+TTCAGGCTTACCGCTTCGCCCTCTAAGGCTCTGTTAAGCAACTCAGGTTTACTAAAAAGT
+GCCGCAGCTAATACTTCTTTGGCTTTGTTTTCAGCGCCGGCCTGACGCAGAAGCGGATCT
+TTTTCATGATAGGGGGAAAGCACACCATGACGTATCCCGCAAAAAAGCGTTTTATCTTTA
+CCGTCCTCATGCACACTCACCGTGGACATCCACAAATTATTGGCGTGATGAATATTCTTG
+GTATCCCAACTGCATACGCCCTTTCCCTCATAAGCACTGGGAAAGATATCTTTTGCGCCG
+ATTTTCATCTCTGCGGCAGGGAGCTGCGTGTTGGTATAGTGATGCCCGTTATGCGTGAGT
+GTATTTTTTATGGTTTGCCAGGGCTGGTTATTGAGCTGCTTGACCTGAGCATCTCTAAAC
+GCTACTGCCGCAAAGCGATGAGCTTCTTTTGCAGGTAAGCCATTTTGCTGCTGAGTGCGT
+GCTGCAATAAGTTCGATAAGATTTTTCTTCAATGCTTTTGCCGCAGCTTCTTCTTGTTTT
+GTTGCTGGCCGGTCCGCTTTAACTTTGGCTAACTCCATTTGGTTTGCCATTGACGTTAGA
+ACCGGGTCTTGTTTACCCAGAACGGTTGCCGCGACGGTAAGAAGATCGCGCTGTAAGTTA
+TAGAGGTTATGCAGCGAGTGGTTAGACGTCTTCTGATGCTGTAGATAATTCCCCCATGTC
+GCGCCAGGTTCTCGCAGGACAATAATTTCCGGGCGAGCGTCGGGCGCTTTAGCCGGCGCT
+TTGCCCTGGCCTGAGAGAATCTGCATTCCGTTGTATAAGGTTTTTTGTAGGCTTTTAAAA
+GCCTCCTGGGTTTTTAGTGAAGCTGAGTGATAGAAGCTCTGTATTTGCATAGCGTTTTTA
+ATATTCCTGAATAGGGGAGTGGGAACATTCAACAGGGTTAACAATCTTTTAAAAGGATGT
+GACTTTTATGCACCAGGAACTTTAAAAAAATGTCCTTTTTAATGAGGAAACGCTTCTGAT
+CAGGCGGGGAGGCATCCTGACGTCCATACGTAGTGGCGATCATGGGGGAGATCGGTACAT
+CAAATGTTCATAAAATCTACAAACTTCACAATTTAGGGCACAACTTTTATTGAAAACCCA
+CCTTCTTGATTCAGATCAAATTCACCGATTTACTCCTGCGTAAAGTAACGGCGTTACATC
+AAATGTTATACAGGTGTGTGTATGCCAGCAAAGCCCCGGACGAGTAAGACCGTGACGAAG
+AATATTCGTTTTTCCTATTCCATGCTTGAACAGATAGAATTCGCGTTGAAATCTGAAAAG
+ACGCGGAATTTTTCAGCATGGGTAAAAGAGGCCTGTCGGGAAAAGTTATGTAACACGGGA
+CATAAGCTGTAGTTATCGTCTTATGTAAGAGGGGGCGGCCATTTTGAAAAGAACAACGTG
+CTTATATACCTCCTGGGTCTTTGCCGCTTTTGTCTCTCTGCTGATATTTGTCTGGAGTGT
+CATAAACTATCCTCTCTATGAATCCATAATAATTATTGTCTTTTATATCTGGCTAATTCT
+GGTACCGCTTTATCTTATTGTGTATGAGTGGCTAATAGATTGTCATTAAATTTATGGCTG
+CGTACTTGAAAAGTGATATTTCATATCTGTGCGGTAAGTCATTGTCGTAATAATATTATT
+GGTCAGGCGACGGGCGAGCGTCTGCGCATTTTGCATCGTTTTATCTTCAAAATTTTGCAG
+TAAGCGTTGTGCTTCTTTCGGATGCGACGCATATAATCTCAGATAGCTCTGCTCCATCTT
+ATACTGTTGCTTAGCTGTTTGCTGTTCAAATGTTTTCCAGGCATGTTGCACATCTGGCGC
+AAACGTATTGTAGTCTTGCATAACCAGTGTTTGCAGCGTGCGGAATGTCCAGTAGGTAGA
+GTCGTTGCTCGCCCGATCGGTTCCTTTATCATCTCCGGGTTGATAATGACGCATCCCCTG
+GTAATAGGGGAGATAGACGCTAAGAGATGGCATTCCATAGGCGATGTATTCTACGTTGCC
+GATAGCCTGCGGTAATTTCGGTCTGACCTGTAAAATATGTGACTCCTGGGTACGAAAAAC
+GGATATAGGTCGCCATGGTTCTTGTGGATTATGACTGGCATAAGGGTCGTGCGACGTTCC
+CTGATAGTGATTGCGTAACGCGTTTTTTACTGCCGCCACGCTGATCTTCGTTATTGGCGT
+TAAAAAAACAGAAAATGTTTCCCCTTCGCTAACGACCGTATCCAGATGCGGATTAAACTG
+GTGTTGTAGCGTCCAGACGCGCGGATAATTATAGGTGGTATCGTTTTTGTTATCCTGCGA
+ATAGGCTTGATGAAAGTCGAATTCGCCGCGGGCCGGATCATATAATCCCTGCTTTTTCGC
+AAAGCTTACTAACGTTGGTGACGCCATATAATTCGCGTTATCATTCGGATCGTAATGGCG
+TAAACGTCCCTGATTGGCGGAAACGAAATAGCTATCTGCCGGAAGTCGTACTGCCAGCCA
+TTGATGTCCGCTTCCCGTCTCCAGATACCATATCTCTTTGCTATCAATAAACGCGACGCC
+GAAACCTTCGCCCGCGCCTTTTTGTTCAATAATATCTCCCAGTAATTTGGCGCCCTGACG
+CGCCGATTGCGCCACTGGCAGGATCACGGACTCAATGGCGTCTTCCGTGATTCCCGTTTT
+TGTCACGTAAGGATCGGCAGCCAGCGCCGCTCTGCCGTTGTAAATGGTTTCCGTTGCGCT
+CATTCCGACGCCCGCCGAATTGAAACCGGCTTCACCCATGGCGTTATCGTTAGTATCAAA
+GTCATGAATCGCCGTATAGCGCATCGCTGTCTCCGGAAGCGGCCAGCTAAAATTGTTGCG
+ATGTGCTTTATACTCGCCTTGTTGATGAAACGCGACGGGATGAATAACCTTATGCTTGGC
+GTTATTTGCCGAGCCATCCTCGTTGCGCGCGATAATAAAGGAGCCGTCAGCCGAAGCCTG
+ATTGCCTACCAAAAGGGTAGTACAGGCGATGACTTTACCCATACCCAGCAGCGTAACGGC
+GAAAGCAAGATACTTTTTCATAAAGGTTCCCACTGAATAACGCGTTATGGGATGAATTGA
+CCCTGGATTGGAAACCGAGAAAGTGATCGAGCCAGCAATATTCTTTACCGGCATCCTTTA
+TTTTCTTTTTATTGAGGTTGTATTGATAACCACAGCCCTGTGGCAGGGAAGGGGAACAGA
+ACCTGTACTGACCTTAGCTATCACCGCTATCAGGCAGACGAACCGAAAAGATATTATGTT
+CATCGGCATAGCGGTAAGATGCCGAGCCACCGTGCAATAGGGCAATCGCGTTAACTAACG
+ATAACCCCAGGCCGAAACCGGCAGTGTAGCGGGCATTATCTCCTCGCCAAAAACGCCGGA
+AAAGCTTATCCGCATCGGCGGTGGGGCTGCCCGGATTAGCGACCCGAATTTCTGCAACGT
+TATCATCATAAGCGCTTTCAATACGTATCACGGCGTTTTCATCAGAATAACGGATGGCAT
+TCGTCAGCAGGTTTGAGAGCACTCTTTGTAATAATATTTCGTCAGCCCATACCGTTCCCT
+GACATTGATTTATAAAACAGATGTGCTTCTCTTCGGCAAGGGGGCTAAGATAATCCAGCA
+TATTTTCGACCAGCGCATTGAGCGAAACAGGCTGTTTTTTTACCGCTATATTCTGGTGCT
+CCGCGCGTGCCAGAAAGAGAATATTTTCTGTCAGTCGCGACAGTCCCTCCAGCTCTTCAA
+TATTATCGACAAGGGCTTGTTGATACTCTTCGGCGCTGCGTTCCTGACTCAGCATAACCT
+GATTCTTCCCCAGTAAAATATTAACCGGCGTGCGCAGCTCATGCGCCAGATCGTCGGCAA
+ATTGGTTCAGGCGTTCAAAATCGTCGGAAAGCTTCTGGCGCATGGTATTTAGCGCTTGCC
+CAAGCGGCCTGAGCTCGACGGGTAACGCCTGTTCCGCCAGCGGCTGGCGAAGTGTGCCGC
+TATCTGTCGCCGCGGTGAGTCGGCTGAGCGACGTAATGGCCCGCAGCCCGTTTCTGATGA
+CTAATGGACTGAGCGCCGAACAGACGAGGATCGCGATAAGGCTAATCAGCAAACTGTTGC
+GGCGATATTGCGCCAGCATTTGCCGCCTTTCCGTCGCCAGCCTGGCAATAGTAAGGGTCA
+GCGGGTTATCGCCGCTTCTGGCGTTTACTCGTACCGCGGTCAGCTCCGTGCCTTGTACCG
+CCTGGCGAAATAAGGTTTCGCGGGTGATGTTTTTAGCCAGCGGAATCTCGTTAAAGCGTT
+GGTCGGGGATGCCGCTATGATTAATCGCAACATTATGGCCTGTTGCTGAGTGGATCAATA
+AGATATCCTGCTTCGTATCCACCATCCGATTGAAATAGAGCGGCAGATTTTCCGGCCTGG
+CGCCATCCAGTAACAGTTGCTGCATTTGCGCCGCCCGATTTATTAGCGTCATATCATCCC
+GATACGTTAATTCTTTGCTCAGCGCGTTATAGAGCGTCCAGCTAATGCCGGTACAGGCGA
+GGATAAGTATGGCGATAAAAGAAATTGTCAGGCGCAGCGTCATTGATAACTTAACCATTC
+TGCGACGTTTCCGCCTGTAATCGATAACCCATCCCCTGGACGGTCATAATGAGCTTCTTT
+TCAAATGGATCGTCTACTTTGGCGCGCAGCCGACGAATCGCGACATCAACGGTGTTGGTT
+TCACTATCAAAGTTAATTCCCCAAACTTCGCTGGCGATCGCGGTTCGGGGCACGATTTCC
+CCTGCCCGGGACGCCAGTAACCAGAGGAGCAGGAATTCTTTGCGGGTCAGGGAAATCGGT
+TTGCCATTTCGTAACACCGATTGCTTTGTGGCATCCATGTCCAGACCATTGATCGTCAGT
+CGGGTAAAGACCGGGACATGCTGTCTGAGTTGAGCTCTCACCCGGGCCAGCAGTTCGGCG
+AAGGAAAAAGGCTTAACAAGGTAATCATTAGCGCCCGCCTCAAGACCTTTGACGCGATCC
+TCAACCGAGTCGCGCGCCGTCAGGCAAATAACAGGGGGCTGATATGCAGTGCGCAACGCG
+CGTAAAACCTGCCATCCATCAAGCCCCGGCAGCATAATATCAAGAATAATCAATGAATAA
+TGTTCCTGAAGGGCTAGGTGTAATCCGTCTCGTCCATCACAGGCATAATCAACCACATAG
+CCTGCCTCCGTGAGTCCCTGACGTACCCACTCAATGGTTTTCTGGTTATCTTCAATCAAT
+AAAATCTTCATCAACGCAGTATGCCATAGGCTTGTTAGCGAATCCGTAAATCTCCAACAA
+GATGACATAATTGTCATGTCCGTGTAAGGTTAATCCTGGACGCGCCAGATATATTTTCCT
+GCATCAATACCGTAAAGGAGTGAAGCATGAAACGATATATACTGGCTACCGCGATAGCGT
+CTCTTGTTGCAGCCCCGGCAATGGCGCTGGCCGCTGGCAGCAATATTCTCAGCGTACATA
+TTCTCGATCAGCAAACAGGCAAACCAGCGCCCGGCGTGGAGGTGGTACTGGAGCAGAAAA
+AGGATAACGGATGGACGCAATTAAACACCGGGCATACCGACCAGGATGGACGAATTAAAG
+CACTGTGGCCCGAAAAAGCTGCCGCGCCGGGGGATTATCGCGTTATTTTTAAAACCGGCC
+AGTATTTTGAAAGTAAAAAACTGGACACGTTTTTCCCGGAGATTCCCGTCGAGTTTCATA
+TCAGCAAAACGAATGAGCACTATCATGTGCCGCTGTTATTAAGTCAGTATGGTTATTCAA
+CCTATCGCGGGAGCTAATTTAGAGCCTATCCTATTAGGGCTATTTTACTTGCCATTTTGG
+TCCTGGGCAGTGCTCGCCAAAACGCGTTAGCGTTTTGAACGCCGCTTGCGGCGGCCCGAA
+GGGCGAGCGTAGCGAGTCAAACCTCACGTACAACGTGTACGCTCCGGTTTTTGCGCGCTG
+TCCGTGTCCAAACAGGCTGCGCCAATAACGCCTGGTGGGACAGGCTCTTAGATTTTATTA
+ATCGCCGGGATTTATGGCGAGCAAACATCGCGGCGGGAATAAAAATCTTCAGCGTATCGA
+CTCTCCTCCCACAAGCGGTGGTTATCCCGCTTGTGGGTATGAGTGACGGTTAAACAGGCG
+CTTCCATCTCAAGTCTGACCGGATGAAAACGGCGTTTGAAATAAATCAGGCCATGCCCCT
+CCTGGCTAAGAATAATATTTTTGATCGCCACCAGATACACCAGATGCGTGCCAATGGTTT
+GTACCTCGCTGATCTCGCCTTCAAGACTGGCCAGCGCGCCGTTAAGTACCGGCTGGCCCA
+GCGGCCCGTTTTGCCAACATGGCTGGTGAAAACGCTCCTCCATCGCCATCCCCGTCATAC
+CGGCAAAGTGGCGCGCCATCAGCTCCTGCTCATGGTTAAGTACATTAATGCACAGCCTGC
+CGTTGCCCTGAAAAACGGGGTTCATGGCGCTATTGGCATTAATACATACCATCACGGAGG
+GCGGCGTATCAGTGACTGAGCAAACCGCTGTTGCGGTGATACCGCAGCGTCCGGCGTGAC
+CCGCCGTGGTTACGATGTTGACCGCTGCCGCCAGACTTGCCATCGCATCGCGAAAACGCA
+GACGTTGTTCATCTACTTGCATGAGAACCTCCTGCCGCGTTATTTCAGCAGCTTATCCAG
+TTGATTGATGTCGTCGTTATTGTGCAAATGCGAAACCGTCCAGCCATTCTGATCGTATTC
+GGAGAGGCAGCGATCGACCATTGCCATCATCTTATCCATATTGCCGGAGCTCTGGGCCTG
+ACGCAGACACTGCAGACGAATTTCATCCTGGCTGCCCGAGTAGTTAATCTCGTACAGCTC
+ATGGCGACCGCCAAACTCGCTGCCGATGGCATCCCACATCAATTTAAGAATTTTGATACG
+TTCAACATGGTCCATTCCGTTAGAGCCGCGTACGTATTTCGCCAGGTACTGGTCGATTTG
+CGGATTATTCAGATCGCGGGCGCTGGAAGGCAGGTAAATCAGGCCGCTGGTAACGTTACG
+TTCAATAATATTTTTAATTTTCGCGTAGGCCATTGGGGCCATCACACGATAGGTTTGCAG
+CGCGGCGTGGTCCGGTAGCCAGGCGCCGTTTACCCACGGGGTTGCTTCAGAACACATAGA
+ATCGCTCAATGCCCAGAACATATTGCGCCAGGCCACGACTTCGCCGAGATCGGCCTGCAC
+GCCCCGGAACTCTACGGTACCCGTACATTCGAGCGATTTTTTCAGCAGCGCGGTAATGAA
+ATCAAGTTTTACCGCCAGACGAACACAGGCTTGCAGTGGATACATACGGGCAAAGCCGCC
+TTCCATCGTCCAGCGACGACAACGATCGAAATCACGGTAAATTAATACGTTTTCCCACGG
+GATCAGCACCTTGTCCATCACCAGAATGGCATCGTTTTCATCAAAACGGCTGGAGAGGGG
+ATAATCAAACGGCGAGCCCGTCGCGCCCGCGACCATTTCATACGAGGCGCGCGAAATAAG
+TTTTACGCCTTCGGCATCCATTGGCGCGACAAACATCAGAGCAAAATCCGGGTTTTCGCC
+CATCACCTGGGCTGAGCCGAAACCAATCATGTTGTAGTGAGTCAGGGCGGAGTTAGTGGC
+GACAACTTTCGCCCCGCTGACAATAATCCCGGCGTCCGTCTCTTTCTCCAGCTTGATATA
+GACGTCTTTCACTTCGTCGGCAGGTTTGTGGCGGTCAATGGGCGGGTTGACGATTGCATG
+GTTAAAGTACAGGCCGGTCTCCTGAATACGGGTGTACCAGTTACGGGCGTTCTGCTCAAA
+CTGGCCGTAGAAGGCTGGGTTAGCGCCCAGAGCGCAGCCAAAGGCGGCTTTGTAATCCGG
+TGTGCGTCCCATCCAGCCGTAACTCAGGCGTGACCACTCGGCGATAGCATCACGCTGTTG
+GCGCAGATCGTCTGCGCTTTTCGCCACGCGGAAAAATTTATGCGTATAACCGCCGCTGCC
+GGTATCGGTATTCCAGCACAGGGTGTAAGCGGGAGGGGATTCCCGTGTCGGCGGCGTGGG
+AAGGTAAGAATACTATGCTTCCGCTTCGCTATCAGGATGCTGGCGCCCCAGAGCAATAAG
+ATCGTCCAGCAGCAGCATCAATTGCTGTGTTTTTTCGGGCGTGAAATCGGCCTCAATTTT
+TCGATAAGCCTCTTCTACCTCGCTCCGGGCACGGGCGTACAACGTTTGTCCCTGCTCCGT
+CAACATGACATATAACTTACGCTGATCGTTAACCGGCTTGAGTCGCAACACCAGTCCGTC
+TCGCTCCATGCGCGTCAATATTCCGGTCAGACTTGGACGCAAAATACAGGTACGAAAGGC
+CAGCTCGTGAAAATCCATAGAGGGGCTATCGGCCAGGATTCGCACAATGCGCCATTGCTG
+GTCGGTCAGATTGTGGCTTTTAACGATGGGGCGGAAATAGGTCATTGCCGCTTCGCGCGC
+CTGAAGCAAGGCGATGGTTAATGAATCATGCATAAGCGTTTCTCTTTTGCAGAATTATTA
+ATACCTGAATAATCGTGTTTACCGATGTGAGCTATGACATCTGTTTTATGTCCAAAGTTT
+AATAGAAACAAGGGGTTTTATTTAACTATTTGATATATATGTATTTAATAATAAATTTGT
+GAAAATATTGTTAATCACATCATAAATACTTTACTTAAGCTTGCTAAATGTACAGCGAAA
+GCATAAATCTAATCATTAATATGTTAATGAAATCACAGCCCGTTAAATCGGCCTGAGGAG
+TTTATGTATGAAGGGTACTGTTTTCGCCGTTGCGTTAAACCATCGCAGCCAGCTTGATGC
+CTGGCAAGAGGCTTTCTCTCAGCCTCCCTATAATGCGCCGCCTAAAACCGCAGTGTGGTT
+CATCAAGCCGCGTAATACGGTGATTCGTCACGGCGAACCCATTCCTTATCCGCAGGGAGA
+AAAGGTACTGAGCGGCGCGACAGTGGCGCTCATTGTGGGGAAAACCGCCAGCCGGATACG
+CCCTGAAGCGGCGGCGGACTATATCGCCGGGTATGCGCTGGCTAACGAGGTCAGCCTGCC
+GGAAGAGAGCTTTTATCGCCCGGCGATTAAAGCGAAATGTCGCGATGGCTTTTGCCCGCT
+GGGTGAAATGGCGCCGCTGAGTGATGTGGATAATCTCACCATTATCACTGAAATCAACGG
+ACGAGAAGCGGACCACTGGAATACTGCCGATTTACAGCGTAGCGCCGCACAACTGCTTAG
+CGCGTTAAGTGAGTTCGCTACACTTAACCCTGGCGATGCGATCTTACTTGGTACGCCGCA
+GAATCGCGTTGCGCTGCGTCCCGGCGATCGGGTGCGTATTCTGGCGAAAGGTTTACCCGC
+GCTGGAAAATCCGGTTGTCGCAGAAGATGAATTCGCCCGCCACCAGACGTTTACGTGGCC
+GCTGTCAGCGACGGGAACGTTATTTGCGCTGGGGTTGAACTACGCCGATCACGCCAGCGA
+GCTGGCATTTACGCCGCCGAAAGAGCCGCTGGTATTTATCAAAGCGCCAAACACCTTTAC
+CGAACATCACCAAACGTCGGTGCGCCCGAACAACGTCGAATATATGCACTACGAAGCCGA
+GCTGGTCGTGGTGATTGGCAAAACGGCGCGTAAGGTGAGCGAAGCCGAAGCCATGGAGTA
+TGTGGCCGGTTACACCGTCTGTAACGACTACGCGATCCGCGACTATCTGGAAAACTACTA
+CCGTCCGAATCTGCGGGTAAAAAGCCGCGACGGCCTGACGCCGATAGGCCCGTGGATTGT
+GGATAAAGAGGCGGTTTCTGATCCGCACAACCTGACGTTACGCACCTTTGTCAACGGTGA
+GCTGCGGCAGGAAGGGACGACCGCCGATCTGATCTTCAGCATCCCGTTCCTGATTTCTTA
+TCTGAGCGAATTTATGACGTTGCAACCGGGCGACATGATTGCCACCGGTACGCCGAAAGG
+GCTGTCCGATGTGGTGCCGGGGGATGAAGTTGTCGTTGAAGTAGAAGGCGTGGGTCGCCT
+GGTTAACCGAATCGTCAGTGAGGAGAGCGCAAAATGAAGAAAATAAATCATTGGATTAAC
+GGCAAAAACGTTGCAGGTAACGACTACTTCCAGACCACTAACCCGGCGACCGGTGATGTG
+CTGGCGGAAGTAGCCTCCGGCGGTGAAGCAGAAGTGAACCAGGCTGTCGCGGCGGCAAAA
+GAGGCGTTCCCGAAATGGGCCAACCTGCCGATGAAAGAGCGCGCGCGCCTGATGCGCCGC
+CTTGGCGACCTGATTGACCAGCATGTGCCGGAAATCGCGGCGATGGAAACCGCCGACACC
+GGCCTGCCTATTCACCAGACTAAAACGTGCTGATCCCGCGCGCCTCGCATAACTTCGAAT
+TCTTCGCCGAAGTGTGCCAGCAGATGAACGGCAAGACCTATCCGGTTGACGATAAAATGC
+TCAATTATACGCTGGTGCAGCCCGTCGGCGTCTGCGCGCTGGTGTCGCCGTGGAACGTGC
+CGTTTATGACCGCGACTTGGAAAGTTGCGCCGTGCCTGGCGCTGGGTAACACCGCGGTGC
+TCAAAATGTCCGAGCTGTCGCCGCTGACTGCCGACAGGCTGGGCGAGCTGGCACTGGAGG
+CAGGAATTCCGGCAGGCGTGCTGAACGTGGTGCAGGGCTACGGCGCGACGGCGGGCGATG
+CGCTGGTACGCCACCATGACGTGCGTGCGGTGTCGTTTACCGGCGGTACCGCCACCGGTC
+GCAATATCATGAAAAATGCCGGGCTGAAAAAATACTCGATGGAGCTGGGCGGCAAATCGC
+CGGTGCTGATTTTTGAAGACGCCGACATTGAGCGCGCGCTGGACGCCGCGCTGTTCACCA
+TCTTCTCGATCAACGGCGAACGCTGCACCGCTGGGTCGCGCATCTTTATCCAGCAGAGCA
+TTTACCCTGAGTTCGTGAAGCGCTTTGCCGAACGCGCGAATCGCCTGCGTGTCGGCGATC
+CGACCGACCCGAACACCCAGGTCGGCGCGCTGATTAGCCAACAGCACTGGGAGAAAGTCT
+CCGGTTATATCCGCCTCGGCATTGAAGAGGGGGCAACGCTGCTGGCGGGCGGTGCGGAAA
+AACCCACTGACCTGCCTGCGCATCTGAAAGGCGGTAACTTCCTGCGCCCAACCGTGCTGG
+CCGATGTCGACAACCGTATGCGCGTTGCGCAGGAAGAGATCTTTGGGCCGGTCGCCTGCC
+TGCTGCCATTCAAAGACGAAGCGGAAGGGTTACGTTTGGCGAACGATGTGGAATACGGTC
+TGGCCTCTTATATCTGGACCCAGGACGTGAGCAAAGTGTTGCGCCTGGCGCGTGGGATTG
+AAGCCGGCATGGTCTTCGTCAACACCCAGAACGTCCGCGACCTGCGCCAGCCGTTCGGCG
+GCGTGAAAGCCTCCGGTACCGGGCGCGAAGGCGGCGAATATAGCTTCGAAGTGTTTGCGG
+AAATGAAAAACGTCTGCATCTCAATGGGCGACCATCCTATCCCAAAATGGGGAGTTTGAT
+ATGGGCAAGTTAGCGTTAGCAGCAAAAATTACCCACGTGCCGTCGATGTATCTTTCTGAA
+CTGCCAGGAAAAAATCACGGTTGTCGTCAGGCAGCCATTGATGGGCATATTGAAATTGGC
+AAGCGTTGCCGCGAAATGGGCGTTGACACCATTATCGTATTCGACACCCACTGGCTGGTG
+AATAGCGCTTACCACATTAATTGTGCCGACCATTTCCAGGGCGTCTATACCAGCAACGAA
+TTGCCGCACTTTATTCGCGACATGACCTATGACTATGACGGTAATCCGGCGCTCGGCCAT
+CTGATCGCCGACGAGGCGGTCAAACTGGGCGTGCGCGCCAAAGCGCACAACATCCCGAGC
+CTGAAGCTGGAGTATGGCACGCTGGTGCCGATGCGCTACATGAACAGCGACAAGCACTTC
+AAAGTGGTCTCCATCTCGGCGTTCTGCACTGTGCATGATTTTGCCGACAGCCGCAAACTG
+GGCGAAGCCATTCTCAAGGCGATTGAGAAATATGACGGTACCGTAGCGGTATTCGCCAGT
+GGTTCTCTGTCGCACCGTTTTATTGACGACCAACGGGCGGAAGAGGGGATGAACAGCTAC
+ACCCGCGAGTTCGATCATCAAATGGACGAGCGCGTGGTCAAGCTGTGGCGCGAAGGCAAA
+TTCAAGGAGTTTTGCACCATGTTGCCGGAGTACGCCGACTACTGCTACGGCGAAGGCAAC
+ATGCACGACACGGTCATGCTACTGGGAATGCTGGGGTGGGACAAATACGACGGCAAGGTG
+GAGTTCATCACCGACCTGTTCGCCAGCTCCGGTACCGGCCAGGTAAACGCTGTTTTCCCG
+CTGCCTGCGTAAGGGGGGTTTATGCCGCACTTTATTGCTGAATGTACTGAAAATATTCGC
+GAGCAGGCTGATTTACCAAGCCTGTTCAGCAAGGTAAACGAGGCGCTGGCCGCCACCGGG
+ATTTTCCCCATCGGCGGTATCCGCAGTCGCGCCCACTGGCTGGATACCTGGCAGATGGCT
+GACGGTAAGCATGATTACGCGTTTGTGCATATGACGCTGAAAATCGGCGCCGGGCGCAGC
+CTGGAGAGCCGTCAGGAAGTCGGCGAAATGCTGTTTGGGCTGATTAAAGCCCACTTCGCC
+GACCTGATGGAGAACCGCTATCTGGCGCTGTCGTTTGAGATTGCCGAGTTACATCCAACG
+CTCAATTACAAACAAAACAACGTACACGCGTTATTTAAATAGCACACTCTTTCGCCCGGT
+GGCGCTGCGCTGACCGGGCCTACAAAAAAACGATGCACGAAGCAGTACCGAACCGTAGGC
+CGGGTAAGACGCACCCGCGTCGCCACCCGGCGCAGCAGCACCGAATGAGGTAACAGGAAG
+CAACTATGCTCGATAAACAGACCCATACCCTGATCGCTCAGCGACTTAATCAGGCTGAAA
+AACAGCGTGAACAGATTCGCGCAGTGTCGCTGGATTATCCCAACATCACTATTGAAGATG
+CCTATGCCGTACAGCGTGAATGGGTCAATATCAAGATTGCCGAAGGGCGCACGCTCAAAG
+GCCACAAAATCGGCCTGACCTCAAAAGCGATGCAGGCCAGCTCGCAAATCAGCGAACCGG
+ATTACGGCGCGCTGCTTGACGATATGTTCTTCCATGACGGCGGAGATATCCCCACCGACC
+GTTTTATCGTCCCGCGTATTGAAGTGGAGCTGGCGTTCGTGCTGGCGAAACCGCTGCGCG
+GCCCTCACTGCACGCTGTTCGACGTCTACAACGCCACGGATTATGTGATTCCGGCGCTGG
+AACTGATTGACGCCCGCAGCCACAACATCGACCCGGAAACCCAGCGCCCGCGCAAAGTGT
+TCGACACCATTTCCGACAACGCCGCCAACGCCGGGGTGATCCTCGGTGGTCGCCCCATCA
+AACCAGACGAGCTGGATCTGCGCTGGATCTCCGCGCTGCTCTATCGCAACGGCGTGATCG
+AAGAAACCGGCGTCGCCGCAGGCGTGCTGAATCATCCGGCCAACGGCGTGGCGTGGCTGG
+CGAACAAGCTTGCCCCCTACGATGTCCAGCTTGAAGCCGGGCAGATCATCCTCGGCGGCT
+CGTTCACCCGCCCGGTGCCGGCGCGCAAGGGCGACACCTTCCATGTCGATTACGGCAACA
+TGGGCGCGATCAGTTGCCGGTTTGTGTAAGGAAAAAACGATGAAAAATGCTTTCAAAGAC
+GCGTTAAAAGCGGGGCGCCCGCAAATCGGTTTGTGGCTGGGGCTTGCCAACAGTTACAGC
+GCTGAACTGTTAGCGGGCGCCGGCTTCGACTGGCTACTGATTGACGGTGAACACGCGCCA
+AACAACGTGCAGACGGTGTTGACCCAGTTGCAGGCGATTGCGCCTTATCCCAGCCAGCCG
+GTGGTGCGTCCGTCATGGAACGATCCGGTACAGATTAAGCAACTGCTCGACGTCGGCGCG
+CAAACGCTGCTGATACCGATGGTGCAGAATGCCGATGAAGCGCGAAACGCCGTGGCGGCT
+ACGCGTTATCCGCCTGCCGGTATTCGCGGCGTGGGCAGCGCGCTGGCGCGGGCATCGCGC
+TGGAATCGCATTCCGGACTATCTCCACCAGGCCAACGACGCCATGTGCGTACTGGTGCAG
+ATTGAAACGCGTGAGGCGATGAGCAATCTGGCGTCAATTCTCGACGTGGATGGCATTGAC
+GGCGTGTTTATTGGCCCGGCGGATCTCAGCGCCGATATGGGCTTTGCCGGCAATCCGCAG
+CACCCGGAAGTGCAGGCGGCGATTGAGAACGCCATCGTGCAGATACGCGCGGCGGGGAAA
+GCGCCGGGGATTCTGATGGCCAATGAAGCACTGGCGAAACGTTATCTGGAACTGGGGGCG
+CTATTTGTCGCCGTCGGCGTTGACACCACGCTGCTGGCGCGCGGAGCGGAGGCGCTGGCG
+GCGCGCTTTGGCGCAGAAAAAAAACTGTCCGGTGCGTCCGGCGTCTATTAAGCCTGGGCC
+GGTAAGCGCAGCGCTACCGGGCAACCGTAGTACCCTACAAAATTCCCATCAGAGGAAAAA
+AAATGAGCGACACATCATCTGCACTTCCGGAAAGCCCCGAGTCTGTCGGTTCGCACAACG
+CGCTCAGCACGGGTCAACAAACCGTCATAAATAAACTGTTCCGCCGACTGATCGTATTTT
+TATTCGTGTTGTTTATCTTCTCGTTTTTAGACCGTATCAACATCGGTTTTGCCGGGTTGA
+CGATGGGGCAGGATCTGGGGTTAAGCGCCACCATGTTTGGTCTTGCCACGACGCTGTTTT
+ACGCCACCTACGTCATTTTCGGCATTCCCAGCAACGTGATGTTGAGCATCGTCGGCGCCC
+GCCGCTGGATTGCGACCATTATGGTGCTATGGGGCATTGCATCTACCGCCACGATGTTCG
+CGGTGGGACCGAAAAGCCTGTATGTGCTGCGAATGCTGGTGGGCATTACCGAAGCGGGCT
+TTTTGCCAGGAATATTGCTCTATTTAACCTACTGGTTCCCGGCATTTTTCCGCGCCCGCG
+CCAACGCATTATTTATGATTGCCATGCCGGCCACTACCGCGTTGGGGTCAATTGTCTCCG
+GCTATATTTTATCGCTGGACGGCATATTCAATCTGCATGGATGGCAGTGGTTATTCCTGT
+TGGAAGGATTTCCGTCAGTTTTGTTAGGCATTATGGTCTGGTTTTACCTGGATGATACCC
+CGGCAAAAGCCAAATGGCTGACGGCAGAGGATAAAAAATGTTTGCAGGAGATGATGGATA
+ATGATCGCCTGACGCTGGTTCAGCCTGAGGGGGCCATCAGCCATAACGCCATGCAGCAGC
+GTAGCCTGTGGCGCGAAGTATTCACGCCAATTGTACTGATGTATACGCTGGCCTATTTTT
+GCCTTACCAATACGCTTAGCGCCATTAGTATCTGGACGCCGCAAATCCTGAAAAGTTTTA
+ATGAAGGCAGCAGCAATATCACCATCGGCCTGCTGGCGGCGATCCCGCAGATTTGTACTG
+TTCTGGGCATGATTTACTGGAGCCGCCATTCGGACAAACATCAGGAGCGTAAACACCACA
+CTGCGTTACCGTTCCTGTTTGCCGCCGCGGGCTGGCTGCTGGCGTCGGCGACCGACCGTA
+ACCTGATCCAGCTCCTGGGGATCGTGATGGCATCCACGGGTTCCTTTAGCGCGATGGCGA
+TCTTCTGGACCACGCCGGATCAGTCGATCAGTTTACGCGCCAGGGCGATAGGCATTGCGG
+TCATCAATGCCACCGGCAATATTGGCTCCGCGCTCAGCCCGGTTATGATTGGCTGGCTAA
+AAGATATCACTGGTAGCTTCAATAGCGGACTCTGGTTTGTCGCTTCTCTGTTAGTCGTCG
+GCGCCGCCATTATCTGGCTCATTCCCATGAAAGCATCGCGTCCGCGCGCCACCCCTTGAG
+GAGAAACTATGTGCCAACGTGCGATCGCCAATATTGATATCAGCAAAGAGTATGACGAAA
+GCATGGGCAGTAACGATGTGCATTATCAGTCGTTTGCTCGTATGGCGGATTTCTTTGGTC
+GTGATATGCAGGCGCATCGCCACGACCAGTTTTTTCAAATGCACTTTCTTGATACCGGGC
+AGATTGAGCTACAGCTCGACGATCATCGCTATTCGGTGCAGGCGCCGCTATTTGTGCTAA
+CGCCGCCCTCGGTGCCGCATGCTTTTATTACCGAATCGGATAGCGATGGTCATGTTCTGA
+CGGTACGCGAAGAGCTGGTTTGGCCGCTGCTGGAAGTGCTTTATCCCGGCACCAGAGAGG
+CCTTCGGCCTGCCGGGAATCTGCCTGTCGCTGGCGGATAAACTCAACGAGCTGGCGGCGC
+TCAAACATTACTGGCAGCTAATTGAGCGGGAGTCCACGGAACAACTGGCTGGCTGCGAAC
+ATACCTTGGTACTACTGGCGCAGGCGGTATTTACCTTGCTGTTGCGTAATGCGAAGCTGG
+ACGATCATGCCGCAACCGGGATGCGCGGTGAACTGAAACTTTTTCAGCGCTTTACCCTGT
+TAATTGACAACCACTTCCATCAGCACTGGACGGTGCCCGATTATGCCTGTGAGTTGCATA
+TTACCGAATCTCGTTTGACCGATATTTGCCGACGTTTTGCTAATCGCCCGCCTAAACGCC
+TGATTTTTGATCGGCAATTACGCGAGGCGAAACGACTGCTGCTTTTTTCCGACAATGCTG
+TCAACGAGATCGCCTGGCAATTAGGTTTTAAAGATCCGGCTTATTTCGCCCGTTTCTTTA
+ATCGCCTTGCTGGCTGTTCTCCTTCGCAGTTTCGCCAACGTGAAGTTCCTTCTTTTCTCA
+ACTAAGAAGAGTAAAAACATGATGAAAAAAAGCGTCGCTATGCTGGCGGTTTGTATGCTG
+GCGCAAAGCCACCTTGCCATTGCTGCCGGTGCTCCTGCGCCTCAAGAGATCAACATTGTT
+TTACTGGGCACCAAAGGCGGGCCTTCTTTGCTCAATACAGCCAGACTACCGCAAGCGACG
+GCGCTCACTATCGGCGATAAGATATGGCTGATAGATGCCGGCTACGGCGCCAGTCTGCAA
+CTGGTGAAAAATGGCATTCCACTGCGCAACATCAATACTATTTTGCTCACCCATCTGCAC
+AGCGACCACATACTGGATTATCCTTCCTTGCTGATGAATGCCTGGGCAAGTGGCCTGAAA
+GACCATACCATACAGGTTTATGGCCCGCCGGGAACCCAGGCGATGACGAAGGCTAGCTGG
+AAGGTCTTTGACAGGGATATCACGTTACGCATGGAAGAAGAGGGGAAACCCGATCCGCGC
+AACCTGGTTAAGGCGACCGATATCGGCCAGGGCGTCATCTATAAAGATGAACTGGTCACA
+ATAAGCGCGCTGAAAGTGCCTCATTCCCCTTTCCCGGACGGTGAAGCGTTTGCTTACCGT
+TTTGATACTCAGGGTAAGCGAATCGTCTTCTCTGGCGATACGTCCTGGTTTCCTCCGCTT
+GCAACGTTTGCCCAGGGGGCGGATATCCTGGTACATGAGGCGGTACATGTCCCTTCGGTA
+GCAAAACTGGCTAATAGTATTGGCAACGGAAAAACGCTGGCTGAAGCGATTGCGTCGCAT
+CACACCACGATTGAAGATGTCGGTAAGATTGCTCGCGAGGCCCACGTGAAAAAACTGGTG
+TTAAGTCATCTGGTGCCTGCGACGGTTGCGGATGACGTCTGGCAACAGGAAGCCATGAAA
+AATTACCCGGGCCCTGTCATTGTCGGTCATGACAATATGACGATAAGCGTACCGTAAGCA
+TCAGGGCAGGGGTAAACCAACGGGCTGACATCATGTCAGCCCGTTATGCTTTTGGCGATC
+AGCATGACGCGCAGAGCTTTTGTCTAAAACATTATTTTTTCTAAACACTACTTGCTGATT
+TTTATGCACAATAAAGGTAAGATGATTGCTCTGGTGAAATATCACTAAATAAGAATGTCA
+CGGATGGAATTTATACAGTGTTAAAAGCAATGCGTTTCTTTGTCTGCCGGCCATCAGTAA
+TATCCTCTGTACCGGCTCCTCGTTGCCAAAGTGAAGGTTAGATTAAACGAATCAAAAAAT
+ATCCGAACTGTACAATTTTTATATCAAACATATTTAACTGCAAAATACTGCAAACGTGCG
+CGAATACACTTAATCAGTAGTATGGCTAATGCATACTTTATTTCCTAAACTTTCTTATTA
+TCTATTTGAATATTTCTGGTTAAACCAAATGGACAAACGGATACCAGACACTCGAAAGGA
+TAACTGCAAGCTTCTGGTTTAGGATTGCAGTTACCAGGACAGTGGCTATTTTCTTTAAGC
+GATTTCCATAGAACAGGATTAAACCTGGTAACGTTGTAAACAGTAACGGGATAAAGTTTA
+TCGTTTATTAAGACCCCCTTATTAAAGGGGATGATAATGCTACCTGGAGGCATGTAAACA
+TCTGCTTGATACCATAAGGTCAATCTAAGATTTTTTAGGCTATGTATGCATGAGAACGAC
+ACTCCGGTAGAGGGACTTATAACTGTATCAACGACCTTCCATTCCATTACAGAAACCTTT
+TAAGTGAAATAGTCAATGGTGTAAATTACATTTTCCGCAAGCCTATTCAGGATAGATGAA
+AATCTGAGAAACATAACTCGTTGAAAACGATCGTTTTTTATTGACCACTCTATTTATATT
+GATTTTTTAATAAGTTATTTATTTTATGCTCTTATTTTGTTTACATTCTATTACATTTTT
+GTATTTTATACGTGGTGTAAATATTGGGCGGTAGTGGATGTTATAACGCTCAATTATTAA
+ACTGTAAGTGACTGTTGTATTTCTTATTTATAAATACCGCAACCTGTAAGCGCAGCGGTA
+CATTTTACCCGGCATTGACAGGGAGAAGATACGTATCAACCAGAGAGTACAAAACAGTGA
+AAGAATATATTCTTCAGGCAATAAATGATCGCTAACGTGTAAAAATAACCGACGAATCTA
+AAAAATGTGAGCGTGTGGCTGGTCTTTATTTAAACGATGATCTTCGCAATGGCGCATTAA
+TGGCTGACATTGGCGGTTCGTTGATCGGCTTGTGCGCGGTGAAAGGCTATATTTCTTCCG
+TTGCGGGAAGTATGTCAGAGGCATTCTGGTATTATACCGGGTGTATGTAAATCCGGGGTG
+GGCAGAAAGCCCACCCTCGGTTTATTGCCTTTATCGTGGATTAGGGATGCGAGATAAAGC
+GAGAAAGGCGTTGCAGCAGTAACCGGTTTTCTTCGCGCAGCCGTGAATTCTCTTCCAGCA
+GCGTTAACGCGACCGCGATCCCTGGCCAGTCGAGCGCCAGCTCCTCGCGTAAGCGTAGCG
+CGCGTTGTACCACGCTCGCTGCGCGATCGTCGAATTGCCAGTCGGCGTTATCGTCTTCGT
+AAGGCTCAATTACGCCAAGTCCGACGATTTCGTTTAGCTCCTCTTCCGTCACGCCGGTGT
+GCAAACAAAATTCGGTGATGGTAAAGGTGACAGTGATGTTAGCCATTATGCTTTCCCCCA
+TTGCTGGCGTGGGTCAAAGGACGACTGCGCGTCCGCCAGTTGTTGCCACAGGGCAGCTGT
+TTTCTCGTCAGGTTTCGGCGGCATAACGATTTTGATGATGGCATAGAGATCGCCAGTGTG
+CTTTTTACTGGCTAATCCTTTTCCTTTGATACGCAGCCGCTGACCTGCCTGGCTGCCGGG
+GGGAATGGTCAGCAAAATACGCTCTTTAAGCGTTGGCACAGACACCTTAGCGCCGAGCGC
+CGCCTCCCATGGGGCAAGCGGAAGGACGACTTCCAGATCCTGATTGACGATATCAAAGAG
+CGGATGCGGGGCAATATGGATAACGAGCCATAAATCGCCATTAGGTCCGCCGTTTTCCCC
+CGGCGTGCCCTGGCCTTTCAGTCTGATTCGTTGCCCGTTGCTGACGCCAGCCGGGATTTT
+CACATTCAATGTTTTGGGAATTTCCCGCTCCACCAGGCCGAACGCGTTATAAACGGGGAC
+GGAATAGCTAATCGTACGCTGGTGCTCTTCCAGCGTTTCTTCCAGGAATACCGCCACTTC
+AATTTCGATATCATGACCGCGTGCGGCGTGGCGGTGGTGCGAATGACGACCGTGCTGACC
+AAAAATAGACGAGAAAATATCATCAAAATCTTCGGCGTTATACGGCTGGCCTTCGTGTTG
+CTGGAACTGGCGATTAAATTGTGGATCGTTACGGTGTTGCCATAACTGGTCATACTCGGC
+GCGCCGTTGCTCATCACTCAGCACTTCCCATGCTTCAGCAACCTCTTTGAAACGGGCTTC
+GGCATCGGGTTCTTTGCTGACATCTGGATGGTACTTGCGGGCCAGTCGGCGATAGGCGGT
+CTTAATCGTCTTGAGATCGTCCGTCGGTTTCACGCCCATAATGGCGTAATAATCCTTAAG
+TTCCATAGCATCATCTCGCTAAATCAATACATACAGAAGGGACCCCAAAAAGGTTTCTCC
+ACTAAGTGTAGGGTAAACCTGAAAAGTGCGTATGAAAACACCAGTTATATCATTAGTAAG
+AATAAATTACGTTGTTCGACTATCAGAAGGTTGCGCAGCGCGCCGACATAACTTTACAGG
+GGAAAGGTTGCCAAAACCGCGCCAGTGGCTAAGATAACTCGCGTTAAACAGTGAGGGCGC
+AATGGCGAAACAACAACGGATGGGCTGGTGGTTTCTTTGCCTTGCATGTGTCGTGGTAAT
+GGTTTGTACCGCGCAACGCATGGCGGGCCTGCACGCCTTGCAGATGCAGGCGACGGCCTC
+TGCTGCGGTGGTCAGCGCTCCCTCCTCGACAGATGACGGCTCGCCGGTCACTCCCTGCGA
+ATTAAGCGCCAAGTCGCTGCTGGCGGCGCCTCCAGTACTCTTTGAAGGTGCTATCCTTGC
+GCTTTATCTACTGCTTTCCTTACTGGCGCCTGTCCGGGTCATGCGCCTGCCGTTTTCGCC
+TCCACGGGCTATTTCGCCGCCCACATTACGGGTACATCTACGATTTTGTGTCTTCCGTGA
+ATGACAGACCGGTTATTACTAACGGTTAATTACTCATTCACGGAGAAAAAATATGATGAT
+TTTATTCAGGCGGATACTGTTCTGCCTGTTATGGCTTTGGCTGCCCGTCTCCTGGGCGGC
+GGAAAGCGGCTGGCTGCGTTCGCCCGATAACGACCATGCCAGCATACGGCTACGTGCCGA
+TACGTCCGCTAACAGTGAGACCCGGCTGTTGCTGGATGTCAAACTGGAAAACGGCTGGAA
+AACCTACTGGCGCGCGCCGGGGGAAGGGGGCGTGGCACCCTCTATCGCCTGGAAAGGCGA
+CATGCCTGAGGTAAGCTGGTTCTGGCCAACCCCCTCGCGCTTTGATGTGGCGAATATCAC
+CACCCAGGGATATCACGACGAGGTGACCTTTCCGATGATCGTGCGCGGTACGCCGCCGGC
+GACCTTGCGCGGTGTGTTGACGTTATCAACCTGCAGCAATGTTTGTCTGTTGACCGATTA
+CCCCTTTTCCGTGACGCCCACTGTGCAGAATGCCGATTTTGCCCATGACTATGCGCGGGC
+GATGGGTAAAGTTCCGCTCCGCAGTGGGCTAACGGACTCGCTTGACGTTGGCTATCGCCC
+GGGAGAACTGGTGGTCACTGCTACGCGAGCGGCGGGCTGGTCATCGCCCGGGCTCTATCT
+TGACACCATAGATGACGTCGATTTTGCGAAGCCTCGCCTGCGCGTAGAGGGCGACAGGTT
+ACAGGCGACGGTGCCGGTGACGGACAGTTGGGGCGAAAAGGCGCCCGATTTGCGCGACAA
+ATCGCTGACCCTCGTGTTAGCCGATGGCGCTATCGCCCAGGAGAGCACGCAAACCATTGG
+CGCTGCGCCAGCGCAAACGCCGGACAATGCGGCGCTACCTTTCTGGCAAGTTGTAATGAT
+GGCGCTAATCGGCGGACTGATTCTTAATTTAATGCCCTGCGTACTGCCTGTTCTGGGCAT
+GAAACTTGGCTCTATTTTATTGGTAGAGGAAAAAAGCCGCTCTCACATCAGGCGACAATT
+TTTGGCTTCGGTCGCCGGTATCATTGCGTCATTTATGGCGCTGGCGGCGTTTATGACCCT
+CCTTCGCCTGTCAAACCATGCGCTGGCCTGGGGAGTCCAGTTCCAGAATGCATGGTTTAT
+TGGTTTTATGGCGCTGGTGATGTTGTTGTTTAGCGCCAGCCTGTTCGGGCTTTTTGAGTT
+CAGGCTTCCCTCATCTATGACCACGAAACTGGCCACTTACGGCGGTAACGGTATGTCGGG
+ACATTTCTGGCAGGGGGCGTTCGCCACGCTGCTGGCGACGCCTTGTAGCGCGCCGTTTCT
+GGGCACGGCGGTCGCGGTGGCGCTCACGGCGTCGCTGCCGACGCTGTGGGGGCTGTTCCT
+TGCGCTTGGCCTGGGAATGAGCGCGCCGTGGCTACTGGTCGCGATACGACCAGGGCTTGC
+GCTACGTTTACCGCGCCCCGGGCGTTGGATGAATGTCCTGCGCAGGATCCTCGGTCTGAT
+GATGCTGGGGTCGGCTATCTGGCTGGCGACGTTACTCCTGCCGCATTTCGGCTTCACTGC
+GTCAAAGAGCGCGCAAGACACGGTTCAGTGGCAACCGTTGAGTGAACAGGCAATCCAGTC
+GGCGCTGGCGCAGCATAAGCGGGTATTTGTCGATGTCACTGCGGACTGGTGTATTACCTG
+TAAAGTGAATAAATACAACGTCCTGCAAAAAGAGGATGTGCAGGCCGCCTTGCAACAGCC
+GGATGTTGTGGCGCTGCGGGGAGACTGGACGCTGCCGTCCGATGCCATTACAGATTTTCT
+GAAAACGCGCGGCCAGGTCGCCGTGCCGTTTAATCAGGTATATGGCCCCGGTTTGCCGGA
+AGGGGAGGCACTGCCCACTTTGCTGACCCGCGATGCGGTATTACAAACGTTGAAAAAAGC
+GAAAGGAATAACCCAATGAAATACATGATTGTTTTACTGCTGGCGCTGTTTTCGACGCTG
+AGCATCGCGCAAGAAACCGCTCCTTTTACGCCGGATCAGGAAAAGCAGATTAAAAATCTG
+ATCCATGCGGCGTTGTTTAACGATCCTGCCAGCCCGCGGATAGGCGCTAAACACCCTAAG
+CTGACGCTGGTGAACTTTACGGATTACAACTGCCCGTACTGCAAACAGCTCGATCCGATG
+CTGGAAAAGATTGTGCAGAAATATCCTGACGTTGCGGTCATTATTAAACCGCTGCCATTC
+AAAGGAGAGAGTTCCATACTGGCGGCGCGTATTGCGCTGACCACCTGGCGCGATCATCCG
+CAACAGTTCCTCGCGCTACATGAAAAACTTATGCAAAAGCGCGGTTACCATACGGATGAC
+AGTATTAAACAGGCCCAGCAGAAAGCAGGGGCGACGCCAGTGACGCTGGATGAAAAAAGC
+ATGGAAACGATACGCACTAATTTGCAGTTGGCAAGACTGGTCGACGTGCAAGGAACGCCA
+GCGACGATCATTGGCGACGAGCTGATTCCGGGCGCAGTGCCCTGGGATACGCTGGAAGCG
+GTGGTGAAAGAAAAACTGGCGGCTGCCAATGGCGGGTAAACTGCGGCGTTGGCTGCGTGA
+AGCCGCGGTTTTTCTGGCGCTCCTCATCGCGATAATGGTGGTCATGGACGTCTGGCGCGC
+GCCGCAGGCGCCTCCGGCGTTTGCCGCGACACCATTACATACGCTGACGGGAGAGTCGAC
+AACTCTGGCGACCTTGAGCGAGGAACGCCCCGTACTGCTCTATTTTTGGGCCAGCTGGTG
+CGGGGTATGCCGCTTTACCACGCCTGCGGTCGCTCACCTGGCGGCGGAAGGGGAAAACGT
+CATGACCGTTGCGCTCCGCTCCGGCGGTGATGCTGAGGTTGCCCGCTGGCTGGCGCGCAA
+GGGCGTTGACTTCCCGGTCGTCAATGATGCTAACGGCGCCTTATCCGCTGGCTGGGAAAT
+CAGCGTGACGCCAACGCTGGTGGTGGTTTCACAAGGTCGGGTTGTGTTCACCACCAGCGG
+CTGGACCAGCTATTGGGGCATGAAGCTTCGGCTGTGGTGGGCAAAAACGTTCTGAATATG
+CGCCGGGGTTTCCCGGCGCTAACGCGTTTACTGTAAGAAAAACCTCCGTTTTGCGAAATC
+GTTCCCGGAAAAATGATCCATTTCTGTCACACTCAGAACGATTTGATAACAACAAGAGGT
+CATAGGGATGAAAAAATCATTACTCGCTGTTGCTGTGGCAGGGGCTGTTTTGTTGTCATC
+CGCCGTACAGGCGCAGACAACGCCGGAAGGTTATCAATTACAACAGGTGCTGATGATGAG
+CCGCCATAATCTGCGGGCGCCGCTGGCGAATAATGGCAACGTACTGGCGCAGTCGACGCC
+GAACGCCTGGCCGGCGTGGGACGTTCCCGGCGGGCAACTGACGACGAAAGGCGGCGTGCT
+GGAAGTCTATATGGGACACTACACACGTGAATGGCTGGTCGCGCAGGGGCTGATACCGTC
+GGGAGAATGTCCGGCGCCCGACACGGTATATGCCTATGCGAATAGTTTGCAGCGCACCGT
+CGCCACCGCGCAATTTTTCATTACCAGCGCTTTCCCCGGCTGTGATATTCCTGTTCATCA
+TCAGGAAAAAATGGGCACTATGGACCCTACCTTCAATCCGGTGATTACCGATGATTCCGC
+CGCGTTCCGGCAACAGGCCGTACAGGCGATGGAAAAGGCGCGTAGTCAGCTACATCTTGA
+TGAGAGTTATAAACTGCTTGAGCAGATAACGCATTATCAGGACTCGCCGTCCTGCAAAGA
+GAAGCATCAGTGTTCGCTAATCGACGCGAAAGATACCTTCAGCGCGAACTATCAGCAAGA
+GCCTGGCGTGCAGGGGCCGCTGAAAGTAGGGAACTCGCTGGTGGATGCGTTTACCCTGCA
+ATATTACGAAGGCTTTCCGATGGATCAGGTCGCTTGGGGCGGGATCCACACCGATCGGCA
+GTGGAAGGTGCTGTCAAAACTGAAAAACGGCTACCAGGACAGCCTGTTTACCTCACCCAC
+GGTGGCGCGCAATGTCGCTGCGCCGCTGGTAAAATATATCGATAAGGTGCTGGTTGCCGA
+GCGCGTTAGCGCGCCGAAGGTTACCGTGCTGGTGGGGCATGATTCCAATATCGCGTCGCT
+GCTGACGGCGCTGGATTTTAAACCCTATCAGCTCCATGACCAGTATGAGAGAACGCCGAT
+TGGTGGTCAGCTTGTCTTCCAACGCTGGCATGACGGTAACGCTAACCGGGATTTGATGAA
+AATCGAGTATGTCTACCAGAGCGCCCGGCAGTTACGTAATGCGGAAGCGTTAACGCTCAA
+ATCGCCTGCGCAAAGGGTAACGCTGGAACTGAAAGGATGTCCGGTGGATGCGAACGGCTT
+CTGTCCGCTGGATAAGTTCGATAACGTCATGAACACTGCTGCAAAATAGCCGTATGCCCC
+CGCGCAGGCGGGGGCGTTTGTGTTATACGTTCTTACGTTCGATGACTTGTTCGCCCCAGA
+AGAGCGAGTCTTTGTCCGTTTTCTCAAAGGCTTTGATCAGCACCTCATCGCTACCTTCTT
+CCCATATTTTTTCTGCCAGTTTCTCGTCATAGTGAGCGACTTCAAAAATGGCTTCGGCAA
+TTTCCGGCGACGTATTACGCAGGCTTGCCCATTCGCCGACGCGGTGCGCTTTTGCTTCTT
+GAGTTGGCATTCGAATCCTCCTGTTGAAGATTAGCCGTTGAGTTTGACTGCCAGACCGGC
+GACGTATTCCCCCTGATAGCGAGCGATAGAGAGTTCCTCCTGGCTTGGTTGACGTGAACC
+GTCGCCTCCAGCGATAGTCGTTGCGCCGTAAGGCGTACCGCCGCGAACCTGGGAGACGTC
+AAACAGTTCCTGTGCGGAATAGCCTATCGGGACAATCACCATCCCATGATGGGCAAGCGT
+AGTCCAGGTCGAGGTGATGGTCTGCTCCTGGCCGCCGCCCGTTCCGGTAGAACTGAACAC
+GCCGCCGAGCTTGCCGTATAGCGCGCCGGATGCCCACAGTCCGCCGGTTTGGTCCAGGAA
+GGTACGCATCTGGCCTGACATATTGCCAAACCGGGTTGGCGTACCAAAAATAATGGCATC
+GTAATCCGCCAGCTCCTGTGGGGTGGCAACCGGTGCGTTTTGCGTTTTACCGCCAGCTTT
+TGCGAAGATTTCAGGCGGCATTGTTTCTGGCACACGCTTTATAATGACCTCTGCGCCGTC
+GACTTTCTTTGCCCCTTCCGCCACCGCGTGCGCCATGGTTTCAATGTGTCCGTACATGGA
+ATAATAGAGCACCAGAATCTTTGCCATTTGTACTACTCCTCAGATTAATCAGGTGTGTAG
+CGATTCGCTACGTCTATTTAAAGATATGCCCTCCTGTGAAGAGTGCAAATTTCAGCGCCA
+TTTCTTTGATTTATAACAATAATTAATTTGGCGACCTTTGTTGCAAAATGATACATTTTT
+AAGCGCTTTGATTTTTCCAAGTATAAGAATAACTTATTTATTTCTCATGGTTATTATTCT
+GCATATTCGGCTTCTGATGTTGCAGAATATTTCGGTAAGCGGCCTACCACGGCGTTTTTC
+ACTATGCTTAATTTTACGCGGCGTTACTGATGATATCGTTCATACGCGGCGCGAGGAGAT
+ACTCCTCATTACCTATGCAATATGATGTCTAATCTATGACGGAGGTCAGTAATGGCAAAC
+CATCGTGGCGGTTCCGGTAATTTTGCGGAAGACCGCGAAAGAGCATCAGAAGCAGGTCGT
+AAAAGTGGTCAGCACAGCGGGGGCAATTTTAAGAATGACCCGCAGCGTGCATCCGAAGCA
+GGCAAAAAAGGGGGCAAAAGCAGTAACCGTAATCGCTAGCCTGCGTCACAACAGCAAGAC
+GCTGAGCGTTTCGCGCTAAAAAGCGTCATGCGCCACCGCCGCCGGATTTCCGGCGGTTTT
+TTTGTTTATAAGCCGGATTTGGTATGCTTGCGTCCTGACGAAAAGGAGAGGGCGCATGTC
+GCAACGCACAGAGAAAAAAATCGGGAAACGTTCGCAGGCCACCGGTGCAAAACGGCAGCT
+TATCTTAACCGCCGCGCTTGCCGTTTTTTCCCAGTATGGCATTCATGGCGCGCGTCTTGA
+ACAGGTCGCCGAGCGGGCAGGCGTCTCCAAAACCAATCTGCTTTATTATTATCCCTCGAA
+AGAGGCGCTGTATGTCGCGGTAATGCGACAGATTCTGGATGTCTGGTTGGCGCCGCTCAA
+GGCGTTTCGCGCAGAATTTTCCCCTCTGGAGGCCATCAAAGAGTATATCCGTCTCAAGCT
+GGAGGTTTCGCGTGATTATCCGCAGGCGTCGCGGCTCTTCTGCATGGAGATGCTGGCGGG
+CGCGCCGCTCTTAATGGATGAACTGACCGGCGATCTAAAAGCGTTGATAGATGAAAAATC
+CGCGCTGATTGCCGGATGGGTGCACAGCGGGAAACTCGCGCCCGTTTCTCCGCATCATTT
+GATCTTCATGATTTGGGCCGCCACGCAACATTACGCCGATTTCGCCCCTCAGGTTGAAGC
+GGTAACCGGCGCGACGCTTCGCGATGAAGCCTTTTTCAACCAAACGGTCGAAAGCGTTCA
+GCGCATTATTATTGAAGGGATTCGCGTGCGTTAACCGGCTGGCGGCAGTGGGCAGCTCAA
+ATCACCCTCTTCACATTGCAGCAGCGAGGCTAAAAACGCTTCGCGTTCGTTCGTTTTATC
+GGTCAGGCACTGGCTGGCGATCATGGGTTGAACGCTGCCGCCCTCCGTACCTGAGCGAAT
+CAGCGCGCAATCGGCGTCGCGCAGGGCAATCCATGCCACCTGCGCTTTTTGCAATAGCTC
+ACGCTGCGGCGGTTGCGCACGCTTAATCGCGCTTTGATAGGTTTCGTTCAGCTTTTTATC
+TGCCGCCTGGTATTGCGCGGCGGCGCAGCGATTCATTTCCAGCTGCGTACTGGCGCTGGC
+ACACTCATCGGCCAACGCCTGACTGCTGAACAACAACGCCGCGCAGGTAAGGAAAATTCG
+TTTCATACGGTCCTCTGACGCGAGTAAAAAGGCTCCATTGACGGAGCCTTAATTAACCAC
+AGTATTAGCCAATTGTCATCAGGCTGGCATTACCGCCAGCGGCGGCAGTGTTTACGCTCA
+GCGAACGTTCAATATAGAGCCGTTCCAGCAGCATATTGCTTTCGCCGCGGGCGAACCCCT
+GTACCGACACTATCGCGCCTTCGCGGGCGGCGACGGCTTCGCACACGGTTCGCAGCTTGT
+CGGAGTCGCCGTGGAAAATCACCGCGTCAAACGGCTGCGCCATCAGCGTTTCCGCTTTCG
+CAAACTGGACACGCGCCGCGACGGCGGCGGGGAGACGTTTCGCCAGATCGCGGTGGAAGG
+CGTCGTCTGACCATAGCGCCTGACTGCCGACGGCGAGAACGGCGGCAAGCTGCGTCAACG
+CGTCCTGTTCATCATCAGCCAGGCATAACACCCGTTCACGCGGCAACAGCGTCCAGGTAT
+TACGCTCGCCGGTCGGCCCCGGTAGCAGGCGCTGCGTGCCGGCCTGCGCCAGATCGGCGA
+ATTGTCGGCAGAGCGTCTGTAGCGCCGGGCGATCCGCCGCCCATTGCGTCAGAGCGGTCA
+ACGGCGCGAGTAGCGTGGTTTTAAGCTGCGCATCCACCGGGTAACGCGCATCCTGACGAG
+TCAGCGTCGTATTGAGCGCATTGGGCGGGCGGTGTGCCAGCAGGCGGTAGAGATAGAGCG
+GCCCTCCCGCTTTTGGCCCGGTGCCGGACAGGCCTTCGCCGCCAAACGGCTGGACGCCGA
+CGACCGCGCCCACCATATTACGGTTAACGTACAGGTTGCCGACATGGGCGGAACCGGTGA
+CTTGCGCAATGGTTTCATCAATACGGGTATGTACGCCCAGCGTTAGCCCGTAGCCGGAAG
+CGTTAATCTGTTCGATAAGCTCCGCCAGTTGGTTACGGTTATAACGCACGACGTGCAGCA
+CGGGCCCGAAGACCTCTTTTTCCAGTTCTGCGAAGTTTTCCAGCTCAATAAGCGTGGGCA
+TAACAAACGTACCGGTCTGCCATTCCTGCGCGTCATCGCTGTTTTCACGCGCGGCCTGGA
+AAACCGGGCGGCCTTTGGCGCGCATCGTCTGGATATGACGTTCAATGTTGGCTTTGGCCT
+CGCTATCGATCACCGGCCCGATATCGGTCGTCAGACGGCCTGGATTCCCCATCCGACACT
+CCGCCATCGCGCCGCGTAACATTTTCAGCGTATGTTCGGCGATATCGTCCTGCAAACACA
+GCACGCGGAGCGCGGAACAGCGTTGTCCGGCGCTGTCGAAGGCGGAAGCCAGCACATCCA
+CGACCACCTGCTCGGTGAGCGCGGAAGAGTCGACAATCATAGCGTTCATACCGCCGGTTT
+CCGCAATCAACGGAATAGGGCGCCCCTGGGCGTCAAGACGCGTGGCGATGTTGCGCTGCA
+ACAACGTCGCGACCTCCGTGGAACCGGTAAACATCACGCCGCGTACACGCGCATCGGCGG
+TAAGCTGGGCGCCGACGGTTTCTCCCCGTCCCGGCAACAGTTGCACGACGCCCGGCGGTA
+CGCCCGCTTCCAGCAAAATGGCAATGCCCTGGGCGGCAATCAGCGATGTCTGCTCTGCCG
+GTTTCGCCAGAACGCTGTTACCTGCCGCCAGCGCGGCGGCGATTTGGCCAGTGAAAATGG
+CCAGCGGAAAGTTCCACGGACTGATACAGACCACCGGCCCTAACGGGCGATGCGTTTCGT
+TATCGAAATCGTCACGCACTTGACCGGCATAATAATGGAGGAAGTCTACCGCTTCGCGCA
+CTTCGGCAATGGCGTTGCTGAACGTTTTCCCCGCTTCACGCACCAACAGGCCAATCAACT
+GCTGCATTTGGTCTTCCATCAATACCGCCGCCCGCTGCAAAATAGCGGCGCGTTCTTGCG
+GCGGCGTCGCAAACCAAACCGGCGCCTGATTGACCGCGTTTTGCAACGCCTGTTCAACCT
+CGCTTTCTGTCGCTTCGCGTCCCCAGCCAACAATATCTTTCGGTTCCGCCGGGTTGATAA
+CCGGCGTCATCTCACCGTCGGCCACCGGTTGTTCCAGCACAGGTTTGGCCTGCCATTTCT
+GCATGGCGTTGCTTAACAGGGCAGAAGAAAGCGAGGCGAGGCGATGTTCATTCGCTAAAT
+CAAGTCCGGCGGAGTTTATCCGACCTTCGCCGTACAGATCGCGCGGCAGCGGAATTTTTG
+GATGCGGTATGCCAGCCTGACCTTCCTGCTGCGCCAGTTTTTCCACGGCCTCGACCGGGT
+CGGCCACCAGTTCATCGAGCGGTAGGGTGGCATCGGCGATGCGGTTGACAAAAGAGGTGT
+TGGCGCCGTTTTCCAGCAGGCGTCGTACCAGATAGGCCAGCAGGGTTTCGTGTGTTCCCA
+CCGGCGCGTAAATACGGCAGGGACGGTTAAGTTTTCCGTCCCCCACTTTACCGGTGACCT
+GTTCATACAGCGGTTCTCCCATGCCGTGCAGGCACTGGAATTCGTACTGACCCGGATAGT
+AATTTTGCCCGGCCAGATGATAAATCGCCGCCAGTGTGTGAGCGTTATGGGTCGCGAACT
+GCGGGTAGATCAGATTAGGGACGGCGAGCAGTTTTTTCGCGCAGGCCAGATAAGAGACAT
+CGGTATACACTTTGCGGGTATAAACTGGATAGCCCTCCAGCCCTTCCATTTGCGCGCGTT
+TGATCTCGCTATCCCAGTAGGCGCCTTTCACCAGACGAATCATCAGCCGACGGCGGCTAC
+GGGAGGCCAGATCGACTAAATAATCAATGACCAGCGGGCAGCGTTTCTGGTAAGCCTGAA
+TCACAAAGCCAATGCCGTTCCAGCCCGCCAGTTCGGGTTCGAAGCAGAGTTTTTCCAGCA
+GATCAAGCGAGATCTCCAGACGATCCGCCTCTTCGGCGTCGATATTGAGACCGATATCAT
+ACTGGCGCGCCAGCAGCGTCAGGGATTTCAGGCGCGGATAAAGCTCCTCCATTACCCGAT
+CGTATTGCGCGCGACTATAGCGTGGATGCAGGGCGGACAGCTTAATCGAGATGCCTGGCC
+CTTCGTAAATACCGCGACCGTTAGACGCTTTGCCGATGGCATGAATCGCTTGCTGGTAAG
+AGACCATATAGGCCTGCGCATCGGCGGCGGTTAACGCGGCTTCGCCCAGCATATCGTAAG
+AATAGCGGAACCCTTTCTCTTCCAGTTTTCGGGCATTCGCCAGCGCCTGAGCAATGGTTT
+CGCCAGTCACGAACTGCTCGCCCATTAAACGCATCGCCATGTCGACGCCTTTGCGGATTA
+ACGGTTCGCCGCTCTTGCCGATAATGCGGTTCAGCGAGCGCGAAAGATTGGCTTCGTTAT
+GCGTTGAGACCAGTCGGCCGGTAAAGAGCAGCCCCCAGGTGGCGGCGTTTACAAACAGCG
+ACGGGCTACGGCCAATATGCGACTGCCAGTTGCCATTACTGATTTTGTCGCGAATTAACG
+CATCGCGCGTAGCTTTGTCGGGAATACGCAGCAGCGCTTCCGCCAGACACATCAGCGCTA
+CGCCTTCTTGCGAAGAGAGGGAAAACTCCTGCAACAGGCCTTGCACCATACCCGCGCGAC
+CGCTGGCGGATTTTTGATTGCGCAATTTCTCCGCCAGTTGATACGCCAGTTTATGCGCCT
+GCTCAGCGACAGGCGGCGACAGGCGCGCCTGTTCCATTAGCATTGACACCGCATCGGTTT
+CCGGGCGGCGCCAGGCGGCGGTGATGGCGGCGCGAGAGACGGATTGGGGAAGAATCTGTT
+CGGCAAACTCCAGAAAGGGCTGATGCGGCTCATCCTGCGGCGCGACCGGCTCCTCGCTTT
+CATTTGCCGCGCCGGCAAACAGCGCAGGTAGCTCCGGTAGCGTATCGCTATTTTCCAGCT
+TGTCCAGATAGCTAAAGATTGCCTGTTTTATTAACCAGTGCGGCGTGCGATCGATACGCG
+ACGCGGCCATTTTGATCCGTTCGCGCGTGGCGTCGTCCAGCTTAACCCCCATCGTGGTGG
+TTCCCATACCCTCTACTCCTGTTATTCGCTCTATCTGCTAACCAATAGTTAGCGGAAAAT
+ATCCACTATATTGCAACTTTGTGCAACCGCGTTAAATGTGACCTGCGTTGCAAGCTTAAA
+AATGAATAAATTGTTAAAAAAAGAAAGTGGGAGTCTGACGGGGAAAACCATCTGGATTTA
+TTTTCTCTGCGGTAGTTAACACTTTTAAAAGGTGCAACCGCAAAAAATGTGAGAGAGTGC
+AACCTGGAGAAAAATAGTATCCCTCTGCAATCAAATTTGATGTAAATGGTGTGTTAAATC
+GATTGTGAATAACCACCGATTCCGGCAGGATACGGTCGCCCTGGTAAACATAACACCCTT
+GCCACGTTCCGGCAGGGTACAAAACGGCACGCTACGGTAGTGCCAATAAATAAATTTGGA
+GAACCTTGATGGCTATTAGCACACCGATGTTGGTGACATTCTGTGTCTATATTTTTGGCA
+TGATATTGATTGGGTTTATCGCCTGGCGCTCAACCAAAAACTTTGATGACTATATTCTTG
+GCGGTCGCAGCCTGGGGCCGTTTGTTACGGCTTTATCAGCCGGCGCGTCGGATATGAGCG
+GCTGGCTGTTAATGGGGCTGCCTGGCGCTATCTTTCTGTCGGGGATCTCTGAAAGCTGGA
+TCGCCATTGGCCTGACGTTAGGCGCATGGATTAACTGGAAGCTGGTGGCCGGGCGCCTGC
+GCGTGCATACCGAATTTAACAATAACGCGCTCACGCTGCCGGACTATTTTACCGGTCGGT
+TTGAGGATAAGAGCCGAGTCCTGCGTATTATTTCCGCGCTGGTCATTCTGCTGTTTTTCA
+CTATCTATTGCGCATCAGGTATTGTCGCTGGGGCACGACTGTTCGAAAGCACCTTCGGTA
+TGAGCTATGAAACCGCACTGTGGGCGGGGGCCGCGGCAACCATTATTTATACCTTTATCG
+GCGGGTTTCTTGCCGTTAGCTGGACGGATACCGTTCAGGCCAGCCTGATGATTTTTGCGT
+TAATCCTGACGCCGGTGATGGTTATTGTCGGCGTAGGCGGTTTTAGCGAGTCGCTGGAAG
+TGATCAAGCAAAAGAGCATCGAGAATGTCGACATGCTCAAGGGGCTGAATTTTGTCGCTA
+TTATTTCTCTGATGGGCTGGGGGCTGGGTTACTTCGGTCAGCCGCATATCCTGGCGCGCT
+TTATGGCGGCGGATTCCCATCACAGTATTGTTCATGCGCGTCGTATCAGTATGACCTGGA
+TGATTCTGTGTCTGGCGGGCGCGGTGGCGGTGGGCTTCTTTGGCATTGCGTACTTTAACA
+ATAACCCCGCGCTGGCCGGGGCGGTGAACCAAAACTCAGAACGCGTATTTATTGAACTGG
+CGCAGATCCTGTTTAACCCGTGGATTGCCGGTGTTCTGCTGTCTGCTATCCTGGCGGCGG
+TGATGTCGACGTTGAGCTGTCAGTTGCTGGTATGCTCCAGCGCGATTACGGAAGATTTAT
+ATAAGGCTTTTCTGCGTAAAAGCGCCAGCCAGCAAGAGCTGGTATGGGTAGGGCGAGTGA
+TGGTGCTGGTGGTAGCGCTGATCGCCATTGCGCTGGCGGCGAATCCTGATAACCGTGTGC
+TGGGGCTGGTGAGCTACGCCTGGGCTGGATTCGGCGCGGCATTTGGACCTGTTGTCCTGT
+TTTCTGTGATGTGGTCGCGTATGACACGTAACGGCGCGCTGGCGGGAATGATTATTGGCG
+CGGTGACGGTTATCGTCTGGAAACAATATGGCTGGCTGGATCTGTATGAGATTATCCCTG
+GCTTCATTTTCGGCAGCCTGGGGATCGTAATCTTTAGCCTGCTTGGCAAAGCGCCGACAG
+CAACGATGCAGGAACGCTTTGCAAAAGCGGACGCGCATTATCATTCCGCGCCGCCGTCGA
+AGCTACAGGCGGAATAACCGACATGTCCGATAGCATTATTGCCATCGGATATATTTTACC
+AGGCGGCGTTATACGCCTGCCTGGTCCTCTGATAAGTCCCGGACCGATTGACTGAAGGTG
+TTCAGGTAAATGAGGACGCTCTCTCAAACTATTTTTAGCCCTTGAGCGCCGTTAACGCGG
+CCGTAATACGTCTCGAGTAATCACTAACTGACCATATATGAAATCGCCTGTTAATGGTAC
+CAATAGCCTTGACGCAATAGAGTAATGACAAAAATTAAAACGAGTCAGCGTTACTGGCGT
+AAGTATGCCGCACAAAATTTTGCATAAATAATGCCGTTTTAGCGATGGGAGAGAGGACAC
+GTTAATTACTCCGTTTTAATCTTTTATATGTTGAATATTCAATGGGTTATGGGTGTTTTC
+ACCATTAATACCCATAGTAGCTAATGATTATCTTTTTTAGTCTCCTGCCAATGAAATAAT
+TGTGTAATCTTTCTGTAAGAGACTGACAATGACGCAATAATGTTTGGTTAATGTTTGGTG
+AATATATTGTTGCATTATTGATGTTTTGTGTTGTACTTAGTAGTAATAGCGGTAGTTCCC
+CGGCAGTGATGGTCACTCACTATGGAGATCGCGAATGGTAATGTCCGCACCAGGACACAT
+TGTTTACAGTAGTTACAACACCCTGTACGGACATTCTCTCTCCGGTGGTGGTCTTGTCAT
+CTTAAAAGCTCTCATCATTTCCCTTACTGTCCATACCCATGACGCCATATGTGGTGCGCG
+TAGCCGTGTGTGGCGTCGTTTCAAAAAGCAAGCTAAGGCTTACAAGGAAGCCAACCCTCA
+GATGTGTGTGCGCATAATCGCGTTCAAGAGAACGCGGGTGATGTATACCTACAACTCAAG
+GTGCTATCCATGGGAAGACAAAAAGCAGTGATCAAAGCTCGTCGTGAAGCAAAGCGTGTG
+TTGAGACGAGATTCGCGTAGTCATAAGCAACGTGAAGAAGAATCGGTCACGTCACTGGTA
+CAGATGGGCGGAGTAGAAGCCATTGGCATGGCGCGCGATAGTCGCGATACCTCTCCTGTT
+AAGGCGCGAAATGAAGCACAGGCGCATTATCTGAACGCTATCGACAGTAAACAGCTTATT
+TTTGCGACCGGCGAAGCCGGCTGCGGAAAAACATGGATCAGTGCGGCAAAGGCGGCAGAA
+GCATTGATTCATAAGGACGTCGAGAGGATCATTGTGACGCGTCCGGTATTGCAGGCTGAT
+GAAGATCTTGGTTTTTTGCCCGGTGATATCGCTGAAAAATTCGCGCCTTATTTTCGTCCC
+GTCTACGATGTCCTGCTTAAACGGTTGGGCGCGTCCTTTATGCAATATTGTTTGCGCCCG
+GAAATCGGTAAGGTAGAAATTGCCCCGTTCGCCTATATGCGTGGGCGTACTTTTGAAAAT
+GCGGTCGTGATCCTCGACGAGGCGCAAAATGTGACTGCGGCGCAAATGAAAATGTTTTTG
+ACGCGATTAGGCGAAAATGTCACGGTCATTGTCAATGGCGATATTACGCAATGCGACCTG
+CCGCGCGGTGTGCGTTCCGGGTTGAGTGATGCGTTGGAACGCTTTGAAGAAGATGAAATG
+GTGGGGATTGTGCATTTCAACAAAGACGACTGCGTGCGCTCGGCGCTTTGTCAGCGAACG
+CTCCACGCATACAGCTAATATAGCATTGACTTTCAGAGCCCGGGAGACCGGGCTTTGTTG
+TCTATAAAGCGTCCAGCAGACTCATGGACAAAAAAGGAAAATGACGCCTGTACGTGAAGG
+GAATTATTTTGTCATGTCCAGGGCATTCATTGTCCGTAATTTGCTTTCTCGGGCCTGTTC
+CGGCGAGGACTGTACAAGAAGGGTATAGAGCAGGTCAAAGACGAAAAGCTGCGCGGCTTT
+CGTACCGATCGAGTCACCCTGCAACATTCCTTGCCGATTACCATTGATCAGGCAAAAATC
+GGCCTCTTCACATAATGGAGAACCCAGATTATGGGTAATGGCGACTGTGGTGGCGCCAGC
+CTGTCGGGCCAATCGGAGTGAATGCACTGTTTCTGGCGATGTGCCCGAGTGACTGACACC
+CATCGCGACATCGCCTGCTTTTAATAGCGTAGCCTGCATGTACATAAAATGGTTATTGCT
+TACCGCATCGCCCCGTAAACCAATACGCATTAGCTTGTGTTTCATATCCAGCGCCGTGAT
+CCCCGATGAGCCCACACCAAAGATATAAACTGAGTGACAGTGACGTAGGGCGTCCACGAC
+ACCGAGAACCTGTTGCATATCAAGCAGATTTAGCGTTTCAGATAATACATTACTAATGGT
+GTTCTGCAATTTTAAACCAATGGCGTGGGCATCGTCGGATTCGCTAACTTCGGCATCCAG
+TAGAGGACTACTGTCATCAGACTCGGTAGTGGCAAGTTCAATGGCCAGGTCCATTTTAAA
+ATCCTGAAAACCTTTATAGCCCAGGGTGCGACAAAAGCGAATAACCGTGGCTTCTCCGGC
+CTGTGTGTCGCGCGACAAATCGGCAATAGATGACTGTGTCACCTGTCTGGGGAAGGCTAA
+AATATATTGCGCAATACGCTGGGAGGCACGTGTCAGGCTTTTTTGCATAGCACCCAGAGT
+GTCAAGGATTTTTCCGGGTTTAAGACGTGGGGGTTGAGGCTCCATATAGTTCCTGCTCTT
+GAGGGCTGATTGCGGCACAAAGCCTGCAATATCGAACAGAGCAGAGCCTATCAAGTCTGC
+GGCATGAAAAAAAGCGCGATGTAAGTAAAGGAGGGGGAATATTTACATCGCGTTGCCAAC
+TTATGACAGAACATCACTGGAGAGTAATCTGTTTGCTCTTACGGTGTTTATAGCGCGCCA
+TCTGTCGGATGGTTTATATCTGGCCCGGTATTGTCGCCCCGCGCGTTGGCGTCACCGTGT
+GCTGTGTCGGTATAGCGTGGTATCATGAAATCATAATGTCACCTTTGGTTCAGGTTTTTC
+ATCCAGCGTCAATGGCGGTGCCGGGGCAAATAACGGGGCGAAAATAACGCCGCTGATCAC
+CACGCTTAGCGAGCCAATGACCCCATAAAAGAAGAAGTTAAGGTCAGTGGCATAGCGTGT
+GCCCAGCACGGTAATGACGCTGATAATAATTCCTAAAACCGCACTCCCGGCATTTGCTCG
+TTTAAAGAAAATGCCCAGCATGAACAGACCGGTCATTGGCCCTCCCATCAGACCTATCAG
+ACTATTAAATGCATCCCAGATTTCTGATTCATCGGCCATGACCAGCCATACCGAGGCCGC
+GCTACTTATCAGGCCCGCGACCAGAATAACTAACTTAGCTATTTTCATACGGTTTTCTGG
+CGTTCCTTTTTTATGACTCAAACGCTGATAGATATCGGAATTAAAACAACTGGAAATGCT
+GTTTAAGCTGCTGGAGATGCTGGACTGCGCGGCAGCGAAAATAGCGGAGATTATCAACCC
+TGCAATGCCGACTGGCATTTCGGTGACCACGAATAAGGGCAAAATGCCGCCAGTGTTGAA
+TCCCGCCGGTAATAATTGTGGATGTTGCTGATAGTAGACAAATAATGCCGAGCCGATAGC
+AAAAAAGAAAACAGGGATCACAGCAACCAGTTTGGCATTTGTAAGTAATGTTTTCTTCGT
+TTCCTCTATGGAGTCAGTCACGATATAGCGTTGGACCACATCCTGACTGGCAGTAAATTG
+CTGAATATTGGCAAACAGAAAACCAATCATCAATACAGGTACTGTGCTTTCCGTCCAGCT
+CCAGTGGAACTGCGTAGCCGGAAAGAATTTATCCGCCTGCTGCGTCACCGTAAAAATTTC
+ACCAATGCCGCCCTGGACTTTGAGACATATCACTATAAAAATCAGTATCGCGCTGCCAGA
+AAGTAAGAGACCTTGAATAACATCAGTCCATATTACTCCTTCAATTCCCCCCCATCCAGG
+TATAAATGATACACATCACACTAATCAACAGTACCAAAATCACCGGGTCTATAGCGATGA
+AGGGGCGCAAGGCCAGCACGGTGAGGAAAGTGATAATGGCGATACGTCCAATATGAAACA
+ACATAAATGACATGCTGGCGAACAGACGGCAGCGCACATCGAACCGTGCTTCGAGATATT
+CATAGGCTGATGTGACTTTCAATTTCCGAAAAAACGGAATATAGAAATAAAAAACCAGCG
+GTAAAATTGCGATAGCCAGATACTGACCAATGATAAACGTCCAGTCGGAAGTAAACGCTT
+TGGCAGGAATTGACATAAATGTAATTGAGCTTAACGTAGTAGCAAATACACTGACCCCAG
+CCGCCCAACCAGGAACCCGGCCACCACCGCGAAAATAATCGTCTGCTGTTTTTTGCCGTC
+TGGAAAAATAGACACCGACTAACATCATGGCCAGGAGGTAGCCAAATAATACAAAATAAT
+TAACGATGCCGAAAGAATGTGTAATCATAGTTCTTTCCTGCTTATGGTTTTGAGTAGCGT
+TGCGGTCATCGTCTGTGTATTTTTATCTTCCCATCATTCAGACGATACGATATTCAGATC
+AAGCAATAGTTTTTGTTAATCTCAATGACGCTAATCATGAGATGTAAAAGGCAGAGCATT
+TATGCTCCACCAATGGTGATGCCCAGACGTATGTCCTGCCGGATACCTGAAATAGCAGCT
+ATCCGATATTGCGTTCAAACAGGCTTCAGGCCTGGGTATCGTATTTGCCGATAGCATTCT
+TAACGTCGGATTATTCTCTGAGACATAAATATCAATAGTAAGCGTGTTCATTTTCTTTTC
+CTGTTCAGCGTTTTACTGCGTGACTGAACCACTGACAGATATGCTCGATACGGGTGATAG
+CGGAACCAACGGTAACTGCCCAGGCACCATGCTCAATAGCATTGGCCGCCAGTGCAGGCG
+TGTTATAGCGCCCCTCGGCAATAACACGACAACCTGCATGACTCAGTTGTGTCACCATTG
+CCAAATCTGGCTCAACCGGCGTGATGGGACCGGTATAGCCAGACAGTGTTGTACCAATGA
+ATTCGATTCCTTTCTGATGGCAACTTATGCCTTCATTCACGGTTGAACAGTCTGCCATCG
+CCAGTAATCCATGCAGGCGAATACGTGTCAGTAAACTATCAATATCAACCGGGCGAGAGC
+GGAATGAGGCATCAAAAGCGATAATATCGGCACCTGCCTGCGCCAGGGCGTCAACATCCT
+GTAAATATGGAGTGATACGGACTGGCGACCCTGTAAGGTCACGTTTAATTATCCCAATAA
+TAGGAACAGAAAGATGGGGACGAACAGTCCGCAGATTCTCAATGCCTTCAATGCGCACAG
+CGACCGCACCCGCCGAAGCCGCTGCCTGTGCCATTGCAGCCACAATTTCAGGTTTATCCA
+TAGGGCTGCCTGGTACCGGTTGGCATGAGACAATCAGCCCACCGTTTTCGTGTACACTTT
+GTTCCAGCCTGGCTAATAGTGACATCCAGTTCCGCCTTATATATTTTGAAGTTTTACTCC
+GTTTTATATTATGTTGTGGAGTGTTGCTGCATCAAGCAAAAAGATAGAAAAATGTGAACA
+CAGTCATAAGGTATGTTTGTCATTCAATTACTGCGCCAGCGGATTTGAGGCTATAACCTG
+AAGCTACAAGACAAATACAATTCATTCTGACGGCAGCATGCTGTATTTTATTAACTGCTT
+CCGGCATAAAGATAAAATGCTCAGAAGGAGCTTATACGCCACGGGATAGCCAGTATGAAA
+TGCATCACATAATTCTTGTTGTATTTATTCAGAATGCAATCACTAATGAGTTTGCCCATA
+ACAGGTCACCCATAATCACTTCTAACATCACCATAGTGTGCTTTTACCTCGCACATGGTA
+ACCCCATGTAATTATTTTTAGCGCTTGGTCACATTTTGTCATTTTCATTGTTGAAATTAT
+GCACCATAAGATCACTAATGATGAAGCTTTACTCCAGTTGTATTTCTTCGCATGGGGATG
+CAGATGAAAAATTTTAAGAAAATGATGACGCTAATGGCGCTATGTTTATCAGTTGCTATC
+ACCACATCAGGATATGCAACCACGCTTCCTGATATACCAGAACCACTGAAAAATGGTACT
+GGCGCTATTGATAATAATGGCGTGATTTATGTCGGCTTAGGTACCGCAGGGACATCCTGG
+TATAAAATTGATCTTAAAAAGCAACATAAAGACTGGGAGCGTATAAAGTCGTTTCCTGGT
+GGAGCTCGTGAGCAATCCGTGTCGGTATTTTTAAATGATAAGCTGTATGTTTTTGGTGGC
+GTAGGGAAAAAAAACAGTGAATCACCGTTGCAGGTTTATAGCGATGTGTACAAATACTCA
+CCGGTGAAAAATACATGGCAAAAAGTTGATACTATATCTCCAGTTGGATTAACAGGGCAT
+ACGGGAGTAAAATTAAACGAAACGATGGTACTTATTACCGGAGGGGTTAATGAGCATATC
+TTTGATAAGTATTTTATTGATATAGCGGCTGCGGATGAAAGTGAAAAAAATAAAGTCATC
+TATAATTATTTTAATAAACCTGCCAAAGATTATTTTTTTAATAAAATCGTATTTATCTAC
+AATGCTAAAGAGAACACATGGAAGAATGCCGGTGAGCTGCCAGGCGCGGGGACGGCAGGA
+TCGTCATCGGTAATGGAAAATAATTTCTTGATGCTGATTAATGGTGAGCTCAAACCGGGT
+TTACGTACCGATGTGATTTACCGCGCCATGTGGGATAACGATAAGCTAACATGGTTGAAG
+AACAGCCAGTTACCGCCATCGCCTGGAGAACAACAGCAGGAAGGGTTGGCCGGAGCATTT
+TCGGGCTATAGCCACGGTGTCCTGCTTGTCGGTGGTGGCGCGAATTTTCCGGGAGCAAAA
+CAAAATTATACTAATGGAAAGTTTTATTCCCACGAAGGGATAAATAAAAAATGGCGAGAT
+GAAGTCTATGGTTTGATTAATGGCCATTGGCAATATATGGGTAAAATGAAACAACCTCTC
+GGCTATGGTGTATCAGTAAGTTATGGTGATGAAGTTTTCCTTATTGGTGGTGAAAATGCT
+AAAGGGAAACCTGTTTCGTCTGTAACCTCCTTTACCATGCGTGATGGTAATTTATTAATA
+AAATAATTTTTTAAATACAAAAATAAAGTTAATTGATAAGCGGAGTATTTTATGAAAATC
+AACAGATATCTTCTGGGTATGGTTTCGTTTATAGCATTTTCATCATATCTACAAGCGGCA
+ACCCTTGATTATCGGCATGAATATGCTGATAGAACCAGAATTAATAAAGACCGTATTGCT
+ATAATTGAAAAGCTTCCTAACGGCATTGGTTTTTATGTCGATGCCAGCGTTAAATCGGGA
+GGAGTAGATGGTGAGCAGGATAAGCATTTAAGCGATCTCGTCGCAAACGCTATAGAACTG
+GGCGTAAGTTATAATTATAAAGTTACGGACCATTTTGTTTTGCAGCCTGGATTTATATTT
+GAAAGCGGTCCAGACACTTCAATTTATAAGCCTTATTTAAGGGCGCAATATAATTTTGAT
+TCTGGTGTTTATATGGCTGGTCGTTACCGTTATGACTATGCAAGGAAGACAGCTAACTAT
+AATGATGATGAGAAAACGAATAGATTTGATACTTATATAGGTTATGTTTTTGATGAGTTG
+AAATTGGAATATAAATTTACCTGGATGGATAGCGATCAAATTAAATTTGATAACAAAAAA
+ACAAACTATGAACATAATGTGGCTTTAGCCTGGAAACTGAATAAGTCATTTACACCATAC
+GTTGAGGTCGGAAATGTAGCGGTGAGAAATAATACCGATGAGAGACAGACCCGTTATCGC
+GTTGGATTACAATACCACTTTTGAGAAAGGTTAAAGAGTTACTGTACTAATCTTTGCTTA
+GTATTCATATCGATAATCTTATAGCCCGTAGCTACGTTACAGAACGTTAGACTTTGTCTT
+GAGTATTCACGTCCTTAACGTAGCTACGAACAAGGATGATAAACAATGAATATACGCCTT
+ACATAGCTACGATCTCAGGCGGTTGTCGGAAAGTGCCGGTTGATAGTGTCTTATTCGGCA
+ATTGATATGACTTAAAAATTAATTCCGTAAGCATTTCAGACGGTAACAGCAAATAAGGGT
+TTTATTGTGATAGCAAAATTCTTCCCGTGGTATAGCGAGATAACACGTCCACAAAAAAAT
+GCTTTATTTTCAGCATGGCTGGGTTACGTTTTTGATGGCTTCGACTTTATGCTGATTTTC
+TACATTATGTATCTGATCAAGGCTGACTTAGGATTGACAGATATGGAGGGCGCATTCCTT
+GCCACAGCGGCCTTTATTGGGCGACCATTTGGCGGGGCGCTATTTGGTCTGCTGGCAGAC
+AAATTTGGCCGTAAGCCGTTAATGATGTGGTCGATAGTTGCCTATTCTGTAGGTACAGGG
+TTAAGTGGCCTGGCTTCCGGTGTAATTATGCTGACGCTTAGTCGTTTTATTGTCGGTATG
+GGGATGGCGGGGAAGTATGCTTGCGCTTCTACTTATGCCGTGGAAAGTTGGCCAAAGCAT
+TTAAAATCTAAAGCGAGCGCATTTCTGGTTTCAGGTTTCGGTATTGGTAACATCATAGCA
+GCCTATTTTATGCCGTCATTTGCCGAAGCGTATGGTTGGCGTGCTGCTTTTTTTGTCGGT
+TTGCTACCCGTTCTTTTAGTAATCTACATCCGGGCCAGGGCTCCTGAATCTAAAGAGTGG
+GAAGAAGCCAAACTCAGTGGTCTCGGAAAGCATTCACAAAGTGCCTGGTCAGTTTTCTCT
+TTGTCAATGAAAGGGCTATTTAATCGAGCTCAATTTCCACTGACATTATGTGTATTTATT
+GTTCTGTTCTCTATTTTCGGCGCAAACTGGCCGATCTTTGGTCTACTGCCTACATATTTG
+GCGGGAGAGGGCTTTGATACGGGCGTGGTCTCTAATTTAATGACGGCGGCGGCATTCGGC
+ACTGTATTGGGAAATATCGTTTGGGGTCTGTGCGCAGATAGAATTGGTTTGAAGAAAACG
+TTCAGCATTGGTCTTCTCATGTCCTTTTTATTCATTTTCCCGTTATTCAGAATTCCGCAA
+GATAATTATTTACTGCTGGGCGCATGTTTATTCGGTTTAATGGCGACTAACGTAGGTGTT
+GGCGGGCTGGTTCCCAAATTTCTCTACGACTACTTTCCTCTTGAGGTTCGTGGTTTGGGT
+ACCGGGCTTATTTATAATCTTGCTGCGACATCAGGCACATTCAATTCAATGGCGGCGACC
+TGGCTTGGAATAACAATGGGGCTAGGCGCTGCGCTAACGTTCATTGTTGCTTTCTGGACC
+GCAACAATTCTACTCATTATTGGCCTATCCATTCCGGATAGACTAAAAGCACGTCGTGAA
+AGGTTTCAGTCAACAAAAGAATTTTAATAGAGGATAAATGATGACGAAATACGGTGTTAT
+AGGTACAGGTTATTTTGGCGCTGAACTGGCGCGATTTATGTCTAAGGTTGAAGGGGCGAA
+AATCACTGCGATTTACGATCCGGTAAATGCGGCTCCGATAGCGAAAGAGCTGAACTGTGT
+CGCCACTTCAACGATGGAGGCGCTTTGTACCCATCCTGATGTGGATTGCGTAATTATTGC
+TTCACCAAATTACTTACATAAAGCGCCGGTCATTGCGGCGGCTAAAGCGGGTAAACACGT
+GTTTTGTGAAAAACCTATCGCCTTAAATTACCAGGATTGTAAGGATATGGTTGATGCCTG
+CAAAGAAGCTGGTGTTACCTTTATGGCGGGTCACGTTATGAACTTTTTTCACGGGGTTCG
+CCACGCTAAAGCGCTCATCAAAGCCGGTGAAATCGGTGAAGTTACACAAGTTCACACTAA
+ACGTAATGGTTTTGAAGACGTGCAGGATGAGATCTCATGGAAGAAGATTCGCGCAAAGTC
+AGGTGGGCATCTGTACCATCACATTCACGAGCTAGATTGTACACTGTTCATCATGGATGA
+AACCCCATCCCTGGTTTCAATGGCGGCGGGGAATGTTGCGCACAAAGGTGAAAAATTTGG
+TGATGAAGATGATGTTGTCCTAATCACCCTTGAGTTTGAAAGCGGTCGTTTCGCGACACT
+TCAGTGGGGATCATCGTTCCACTACCCTGAGCACTATGTATTAATTGAGGGCACGACAGG
+TGCAATTCTCATTGATATGCAAAACACGGCTGGTTATCTAATAAAAGCGGGCAAAAAAAC
+ACACTTTCTTGTGCATGAAAGCCAGGCGGAGGATGATGATCGTCGCAACGGTAACATATC
+CAGCGAGATGGATGGCGCAATCGCTTATGGTAAACCCGGTAAACGTACGCCGATGTGGCT
+CTCATCAATTATGAAACTGGAGATGCAGTACTTGCATGATGTGATAAACGGTCTGGAGCC
+AGGCGAGGAGTTTGCTAAATTGCTAACGGGAGAAGCGGCGACAAATGCCATTGCTACCGC
+TGATGCTGCGACGCTTTCTTCAAACGAGGGGCGCAAAGTTAAACTCACTGAAATTCTTGG
+CTAAAATTTAAAGCCGGATGGTGGTGTTATTGGCCGGTGGCGCTGCGCTTATGTAGACTG
+GATAAGGCGCCCGTGAACTGTGCCGCCATCCGGCAATGGACGGGGGCTTAATGCGAAAAA
+AAGCCCGTACATTCGTACGAGCTCTTTCTTAAATATGGCGGTGAGGGGGGGATTGACTCG
+CTGCGCTCGCCCTTCGGGCAGCCCGTTCGCTGCGCGCCCGGTCTGTCCAACTGGCTGCGC
+CAGTTGTCGAACCCCGGTCGGTGGTTCTCATCCCCCCTTGGTTTGGGGGATACATATAAG
+CAAAAAGCCTGTACTTCTGTACAGGCTCTCAACTTGAAGATGGCGGTGAGGGGGGGATTC
+GAACCCCCGATACGTTGCCGTATACACACTTTCCAGGCGTGCTCCTTCAGCCACTCGGAC
+ACCTCACCAAATTGTCGCTCCAGCATTACTGGAACGGGCGCTAATGTAGGGAAATATCCT
+TTCTACGTCAATCAACTTTTTTAAAAAAAAGCGCTTTTATACAAACTTCCATCAATCTGT
+GGCTTTAATAAGCGAAAACTGCTTTTTTTGCCCGCGCCGGGAAATTTGCTATGCTGCACA
+TCCCGTTGAAAACGCTGATAACAGGCGCAATCACATTCCGCACAATACTGCTCAGGAGAT
+AACATGGAGATAATTTTTTATCACCCGACATTTAACGCCGCCTGGTGGGTAAATGCGCTG
+GAGAAGGCTCTCCCACATGCGCGCGTTCGTGAATGGAAGGTCGGTGATAACAACCCCGCA
+GACTATGCGCTTGTATGGCAGCCCCCGGTTGAAATGCTGGCCGGAAGACGCTTAAAAGCC
+GTCTTTGTGCTGGGCGCGGGGGTGGATGCAATTCTGAGTAAATTAAATGCGCATCCGGAA
+ATGCTGGACGCCTCCATTCCTCTATTCCGTCTGGAAGATACCGGAATGGGCCTGCAAATG
+CAGGAGTATGCCGCCAGCCAGGTATTACACTGGTTCCGTCGTTTCGATGATTATCAGGCG
+CTGAAAAATCAGGCGCTATGGAAACCGTTGCCGGAATATACCCGCGAAGAGTTTAGCGTC
+GGTATCATAGGCGCAGGGGTACTGGGCGCAAAAGTGGCAGAAAGTCTACAGGCGTGGGGG
+TTCCCGTTACGTTGCTGGAGTCGTAGCCGCAAATCCTGGCCTGGCGTGGAAAGTTATGTA
+GGGCGTGAAGAACTGCGCGCTTTCCTGAACCAGACGCGGGTGCTGATTAATCTGCTGCCG
+AATACGGCCCAAACGGTAGGAATTATTAATAGCGAATTGTTGGATCAATTGCCGGATGGC
+GCTTACGTGCTGAATCTCGCGCGCGGCGTTCATGTTCAGGAGGCGGATCTGCTGGCTGCG
+CTTGATAGCGGTAAGCTAAAAGGCGCGATGTTGGATGTCTTTAGCCAGGAACCGTTACCG
+CAGGAAAGTCCATTATGGCGCCATCCGCGAGTCGCCATGACGCCGCACATTGCGGCAGTC
+ACCCGTCCGGCGGAAGCCATCGATTATATTAGCCGCACCATTACCCAGCTGGAGAAGGGA
+GAGCCGGTGACGGGGCAGGTGGATCGGGCGAGAGGATATTGATATCAACCCGGCGCGGGC
+CGGGTTTCGCTAAAAAACGCTGGCGATACCTGCTATCCTTGTCGGAAATGACTACAGGAG
+AGAGCAATGTATCCCGTTGACCTGCATATGCATACCGTCGCCAGCACTCATGCCTACAGT
+ACTCTGAGCGATTATATCGCGGAAGCCAAACGCAAAGGCATTAAACTTTTTGCGATTACC
+GATCATGGTCCGGACATGGAAGATGCGCCGCATCACTGGCATTTTATTAACATGCGCATC
+TGGCCGCGTCTGGTTGACGGCGTGGGGATACTGCGTGGCATTGAGGCGAATATCAAGAAT
+ATTAACGGTGAAATTGATTGTTCCGGAAAGATGTTCGACTCGCTGGATCTGATTATCGCA
+GGCTTTCATGAGCCCGTTTTTGCGCCGCATGATAAAGAAACCAATACTCAGGCGATGATC
+GCGACCATCGCCAGCGGCAAGGTGCATATAATTAGTCACCCCGGAAATCCAAAGTATCCA
+GTGGAGGTTAAAGCCATCGCGCAGGCGGCGGCGAAACACCATGTAGCGCTGGAAATCAAC
+AACTCTTCTTTTCTGCATTCGCGTAAAGGAAGCGAAGATAATTGCCGCGCGGTCGCTGCC
+GCCGTACGCGATGCGGGAGGCTGGGTAGCGTTAGGCTCTGATTCCCATACGGCCTTTACG
+CTTGGCGATTTCACCGAATGCCGGAAAATTCTGGATGCGGTGAATTTTCCGGAAGATCGA
+ATCCTGAACGTCTCTCCGCAGCGCTTACTGGCCTTTCTCGAATCACGCGGTATGGCGCCT
+GTACCGGAATTTGCCGAACTTTAATCGTTATTTACGGGAAGATATCAATGAATGAGTTTT
+CAATCCTGTGCCGTGTGCTGGGATCGTTGTTTTACCGCCAACCGCAAGATCCTTTACTGG
+TTCCGCTGTTTACGTTAATCCGTGAAGGTAAACTGGCGGCAAACTGGCCGCTGGAGCAGG
+ATGACATGCTGGCGCGTTTACAGAAAAGCTGCGATATCACGCAGATTTCCACTGATTACA
+ATGCGTTATTTGTTGGGGAAGAGTGCGCGGTAGCGCCATACCGCAGTGCGTGGGTCGAAG
+GCGCGGAAGAGTCTGAGGTGCGCGCTTTTTTAACGTCGCGAGGGATGCCGCTGGCCGATA
+CGCCTGCCGATCACATTGGCACTTTATTGCTCGCGGCCTCCTGGCTGGAAGATCAGTCTG
+CCGAAGATGAAAGTGAAGCGCTGGAAACCTTATTTGCCGATTATCTGCTTCCCTGGTGCA
+ATACCTTCCTCGGTAAAGTTGAAGCCCATGCCGTTACGCCATTCTGGCGCACTCTGGCGC
+CGCTAACGCGTGATGCGATAGGGGCCATGTGGGATGAACTTCAGGAAGAAGATGAAGAAT
+AATGTGATGTAAATCACCATTAACTGCAACGGGTTTTGCATCATTGCATAAAATGTGTGC
+GTGATCTCATTAATGTGCCGCTTTTCTGTTATGATGCGCGCCATGAACATACTTCTTTCT
+ATTGCTATCACTACGGGCATCCTTTCTGGAATATGGGGATGGGTGGCCGTCTCCCTGGGG
+TTACTAAGCTGGGCCGGTTTTTTAGGCTGTACGGCTTATTTCGCCTGTCCGCAGGGCGGC
+TTTAAGGGATTGTTGATTTCCGCCTGTACGCTGTTAAGCGGTATGGTGTGGGCGCTGGTC
+ATTATTCACGGTAGCGCGTTGGCGCCGCATCTGGAAATTGTCAGTTACGTGTTGACGGGG
+ATCGTGGCATTCCTGATGTGTATCCAGGCAAAGCAGCTATTGCTTTCTTTTGTTCCGGGA
+ACATTTATCGGCGCCTGCGCGACATTTGCAGGGCAGGGTGACTGGCGGTTGGTATTACCG
+TCGCTGGCGCTGGGGCTAATCTTTGGCTATGCCATGAAAAATAGTGGGCTATGGCTGGCA
+TCACGCCGCGAGCAACATTCAGCGAATACGGCGGTCACAAAATAAAAAAGCGTGGGGTTT
+TCCCCACGCTTTGTCGTATTCATCAGGATTCTGGCGGTACTGACAGCTCACGGTATTTCA
+CCAGAATATCATTTTGCCTGTCCGCTTTATTCTGCAAATCCCACAGTCCGCGATCGATAC
+CATCATTAATGAGGAAGATAACGCCGGTTTCAATGGCTGACATCAGACACAGCATCACCG
+GTTCGTTCGAGGTATAGCCGATTTCGCCTTCCAGTAAGCGCTGGTAATCAATAAAACGGA
+ACACGCCTGCCTGTACTTCATAGGAAAGGATCGTTTTACTGGTGTTCACCGAAGAAAGGA
+TCTCGCCCGTACTGACGTTAACCACGCGCAGGTTGACAGCAATCTGATCCAGCTGATACT
+GCGTATCGGCGCCAATACCGAAATATCTTGCGCCGACCCCGCCGGATTTGACGTTACTTT
+CATAACCAATAATAGAACCTTCCACCATAATATTTGCCGCCGTCAACGACTGAAGCGGGA
+TACGGTTATTCATCGCCACGGTGCCGTTTTCCTGGGCTGCGCGAATAATTTTCCGTTCAT
+TCAAAAGATTCTGTAAGCCTTGTCGTTCTAGTGGGATAAACCAGCGCGAATCTTTCAGCG
+CGGTGACCAACATAGCGGTGGCGCTCTGCGGCACAGCCGTGGAAAAGTTACTTGCCGGGT
+AAGGTTTAAATTGGCCCGTTTCATCCTGAATGTTATATACCGAAACAAAGATCTTACCGG
+TGGGAGCAGGTAAGTGCGTCAAATCTTTGTAACTTTGTGCGCGGGGCATTAATGTCGGTT
+TCGCAGCTTGTTTCGGCGGGGCAGTTAAGCATCCGCTCAACAATAAAACGGCAACCAAAA
+TAAGTAAGCGCGGCATGATTTATATCCTTTAGTGACTGTAGCTTAAAAATCGGTTGACTG
+AGTTTGTAAACCTGACACTTCGATGGTCGAGGTTCTTCCCGTTTTTCTGTCCGTGACGTT
+GAGCTGGAGCTGTCCGTCGCGATTAGCGATATCGATAATAAAATCATTGGTCACCATACG
+TCCTGGTTTTCCGGTATTAATATTGGTCAACAAGCCGCCCAGAATTTGCGATTGAATAGC
+CTGCGTAAAGTTATCCAACGCTGAGGGGGTCTCGATACCAAAATCGTTATCATAAGCGGG
+GTCTTTATATGAATTTTGCGCCTGGGCGCTATTCAATAAAAAGGAACCGTTATTGGGGTT
+TCCACCAAAGTTAGGATTACGGAACTGGAACGTCATATTTCCAGCCCAGGTTAATGGCGA
+AAAAAGCATGAGCAGCACTACTGCATGTTTAACACGCATGACAGCCTCCGGATAAAAATC
+ATGTTTTAGAATTCATCACGCGCTAAATCACTCGTACTTAATAGCGTTTGATCTATTTGT
+CGGCGATTTAATGCTTCCTCTGTTTGCGCTAATGCGAAGACGACGGTTTTCTCGAAGTCT
+CTTTTCATTGGAAATAAAAAGGTCTGGAAAATAACGTCCTGATTTACCGTTATGGTGATC
+CAGCTTCCCCAACGCGCACTGGGTCTTTCATTAATGGTCAGATTGCCGGTGTATTCGCTT
+TCCCATTTGTCGCTGAATGCACGATAGAATTCATGTCCTATCGAAGAGACGGTATGGTCG
+GTTAACAATCCGGGAACCTCGACTTCAACTTCATTGGCATGCAGGTTTCCGGTAGCGAAC
+AGTAACTCTGCTGCTACAATCCAGGTCAGATAGCGTTTCATGGCCTTACCGCCTGAGATT
+ATCGTTTGCTCATGAAACTGCCTGGGTGCGATTTTTGACAGCTATCTTTTTGAAAAGATT
+ATAAAGATGTGTCTTAACCGTATTCTCGCTGATAAATAGCGACCTGGCGATTTCATTATT
+AGAGGCACCAATACGTAACTTATTGAGGATCTCTTTTTCGCGATGAGTGAGTAATGCGGA
+CTCGGTGCTGTTGTAGCGGTAATTTCCTGAGTGTGTAATCAGGTAACTGGCTAATTTTTG
+TGAAAAATAGCATTCGCCCCGCAGAATACCCTGTAATCCGCTGACCACGTGTTCCTGGTC
+TTCAGTGGCGTAAAACACGCCGTTAATATGAGGCCAGTTTTCAATTTCACGGTAGGGATA
+ATCGTCAGGGGTATTTAACAATAATGTTTTTATATTATTGTTTTTGCGGCTTAAATTATC
+CTGCCAATAGTGGATAAGCTTCTTATCCGCTTCCATCATATCCATTAAAACAATGCAACC
+GGCTGAGATATCTTCCAGAGAACGTTGAATATTATGCAGTTTTCCGGTTATGGCCAGCGA
+TTGCTTTAAATGTTGCAATAATGCCGTAGCTTGCAGAGATGGCTTTGTGATCAACAATAG
+TGTGTGACCATGACTACTATGGACTTCATTAAACATGATGAAACTCCACTTTTTTTAATC
+GCACATCTGACAGCTGCCCCCATAAAATAAAGGCACCAGAAGTACTGACAGATGTTGCAC
+TGCTGTGGGTTGAAATAGCCCATTATCCAGAAAGAGAAAAATATTTACGAAAATACTTTT
+AACTGTTTTCAATCTAGCCATTACAAATCTTAAAGCAAGTGTTAAACTTGTAACAAGATG
+TAAAAATATATATTAAAATGTTGTTTTTGGGTTTTTTTGAAGTTTAGATTTGATAGTAAA
+GTTGTACATTTCGCTGTTATTGCATAGATTTAAAAAATCATACAAATTATAATAATTCAT
+TGATTTTTAATCATTTTAATTATTGTATGTCATGTTTTGATTTTATTTTTTCTTAAAATT
+TGAGACGTGGCATTAACCTGGACAGCACAAAGACAAAAAAAAACGAAGTGTGTCACGTCT
+TGTGCGTATTGCCCCCCATGGGAAGCATAAGAACATCCCCATGGCGGCATAACACACACC
+AACACTTCATTTTTTAGGTGCGCGATACACTATCTTCTGTGGCCAAAAATCAATTATAAA
+AAATCACATGGCTATCGTTTTATTAGCACTTTGGTATGAGCTTAAATAACAAAATACCAC
+GCGTGGGTGAGTTATTAAAAATGTTTCCACGGACATACTCTTCATCGTAACGACGCGTTA
+ACAAAAAACGCATGTCGCTAACAAGGTAATAGATAATTTTCGCTATGTACGACCAGGTCC
+AGGGTGACAGCATGAAAAACAAATTGTTATTTATGATGTTGACAATACTGGGTGCGCCTG
+GGATTGCAACCGCGACAAATTATGATCTGGCTCGTTCAGAGTATAATTTTGCGGTAAATG
+AATTAAGCAAGTCTTCATTTAATCAGGCGGCCATTATTGGTCAAGTCGGCACGGATAATA
+GTGCCAGAGTACGCCAGGAAGGATCAAAACTATTGTCCGTTATTTCACAAGAAGGAGAAA
+ATAATCGGGCGAAAGTCGACCAGGCAGGGAATTATAACTTTGCGTATATTGAGCAAACGG
+GCAATGCCAACGATGCCAGTATATCGCAAAGCGCTTACGGTAATAGTGCGGCTATTATCC
+AGAAAGGTTCTGGAAATAAGGCCAATATTACCCAGTACGGTACGCAGAAAACAGCAGTTG
+TAGTGCAGAAACAGTCGCATATGGCTATTCGCGTCACCCAACGCTAATACCGTTACGACT
+TTTAAATCAATCCGATGGGGGTTTTACCATGAAACTTTTAAAAGTGGCAGCATTCGCAGC
+AATCGTAGTTTCTGGCAGTGCTCTGGCTGGCGTCGTTCCACAATGGGGCGGCGGCGGTAA
+TCATAACGGCGGCGGCAATAGTTCCGGGCCGGATTCCACGTTGAGCATTTATCAGTACGG
+TTCCGCTAACGCTGCGCTTGCTCTGCAAAGCGATGCCCGTAAATCTGAAACGACCATTAC
+CCAGAGCGGTTATGGTAACGGCGCCGATGTAGGCCAGGGTGCGGATAACAGTACTATTGA
+ACTGACTCAGAATGGTTTCAGAAACAATGCCACCATCGACCAGTGGAACGCTAAAAACTC
+CGATATTACTGTCGGTCAATACGGCGGTAATAACGCCGCGCTGGTTAATCAGACCGCATC
+TGATTCCAGCGTAATGGTGCGTCAGGTTGGTTTTGGCAACAACGCCACGGCTAACCAGTA
+TTAATTTAGCGTCTGCGCTAATAAAAAAACAGGGCGTAAGCCCTGTTTTTTTTCGGGAGG
+AAATTATGCATACTTTATTGCTCCTTGCCGCACTTTCAAATCAGATTACGTTTACCACGA
+CTCAGCAAGGCGATATTTACACGGTGATCCCTCAGGTCACATTAAACGAACCCTGCGTCT
+GTCTGGTGCAAATTCTCTCTGTGCGCGACGGCGTCGGGGGACAAAGCCATACACAGCAAA
+AACAAACGCTATCTTTACCTGCTAATCAACCGATTGAGTTGTCTCGTCTTAGTGTAAATA
+TATCTTCAGAGGACTCGGTTAAAATTATTGTTACTGTTTCGGACGGACAATCACTGCATT
+TATCACAACAATGGCCGCCTTCTGCACAGTAGTTTTTGATGGTGGCGGAAATGGATTGGC
+TGACCTGGGTATTAAAGAGGCGATAAAAGCGTCTCATCGTCTCGGCATGTCGCTAAAAGG
+TAACGCCGAACCCTCGAGGATGACTAATCATTGAGGAGTTAACATGTCCGTAATCAAGAA
+AAATATCCCTGCCATAGGCCTGTGTATCTGCGCTTTTTTTATCCATTCTGCGGTAGGGCA
+ACAAACGGTACAGGGCGGCGTTATCCATTTTCGCGGCGCGATTGTTGAGCCACTGTGCGA
+TATTTCTACTCACGCCGAAAATATTGATTTAACCTGCCTACGCGAAGGTAAAAAGCAAAT
+GCACCGGATAGACCTTCGGCAGGCATCTGGATTACCGCAGGATATTCAGTCCATTGCGAC
+GGTACGGCTGCATTATCTCGATGCGCAAAAAAGCCTGGCGGTGATGAATATTGAGTACCG
+TTAACGGCGGTGTCATAGATATAAAAAGAGCGACTCTGTTGAAAGCCCTGCTGTACACTT
+TGCAGATAAGGTGAGACAAAAGGGGGGGGTTATGACATCACGTCTTCAGGTCATACAGGG
+TGATATCACTCAACTTAGCGTCGATGCGATTGTGAATGCCGCTAACGCATCATTAATGGG
+CGGCGGTGGCGTAGACGGCGCAATTCATCGCGCGGCGGGGCCGGCATTGCTGGACGCCTG
+TAAACTCATCCGTCAGCAACAGGGCGAATGTCAGACGGGACATGCGGTTATCACGCCTGC
+TGGCAAGCTTTCGGCAAAGGCGGTTATTCACACAGTGGGGCCCGTCTGGCGAGGCGGCGA
+ACACCAGGAAGCTGAGCTACTCGAAGAGGCATACCGGAATTGTTTGCTGCTTGCCGAGGC
+GAATCACTTTCGTTCCATCGCTTTTCCGGCAATCAGTACCGGCGTTTATGGCTATCCACG
+CGCCCAGGCCGCTGAAGTCGCCGTCAGGACGGTTTCAGATTTTATTACCCGTTACGCTCT
+GCCTGAACAGGTATACTTTGTCTGTTATGATGAAGAAACTGCCCGGCTTTACGCAAGATT
+ACTTACTCAGCAAGGCGACGACCCTGCCTGATAAAACACGCCTGGAGCGTGCCGTTGAAC
+CGCTATGCGCGCGCCATCCCGGAGAGTGCGGCATTCTTGCGCTGGATAACAGTCTGGACG
+CTTTTGCCGCCCGCTACCGCCTGACCGAAATGGCGGCGCGGACGCTGGATGTGCAGTATT
+ATATTTGGGAAGACGATATGTCCGGGCGGCTGCTCTTTTCGGTTCTGCTGTCGGCGGCGA
+AGCGCGGCGTTCATGTTCGTCTGCTGCTGGATGATAACAATACGCCTGGTCTGGATGATA
+CGTTGCGCTTGCTGGATAGCCATCCTAATATCGAAGTTCGTCTGTTTAATCCTTTCTCTT
+TTCGTACGCTACGCGCGCTGGGATATTTGACGGATTTTGCGCGGCTGAATCGGCGGATGC
+ACAATAAAAGTTACACTGCCGACGGCGTAGTGACGCTGGTCGGTGGGCGCAACATCGGCG
+ATGCCTATTTCGGCGCTGGCGAGGAGCCGCTATTTTCCGATCTGGACGTGATGGCCATTG
+GCCCGGTGGTCAATGATGTCGCCAATGATTTTGAACGTTACTGGCGCTGTAGTTCAGTGT
+CGACATTGCAGCAAGTATTATCCCTTTCTGAGCAGGAACTGACGCAGCGTATCGAACTTC
+CCGAATCCTGGTATAACGATGAGATCACCCGCCGTTATCTGCATAAGCTGGAAACCAGCC
+AGTTTATGGCGGATCTCGATCGCGGAACGTTGCCGCTGATTTGGGCAAAAACACGCTTGC
+TTAGCGATGACCCTTCTAAAGGCGAGGGGAAGGCGCAGCGCCATTCGCTTCTTCCGCAGC
+GATTATTTGACGTGATGGGGTCGCCGACGGAGCGTATCGACATTATTTCCGCTTACTTTG
+TCCCTACGCGCGCAGGCGTGGCGCAGTTGCTTAATCTGGTCAGGAAAGGTGTGAAGATCG
+CCATCTTAACTAACTCTCTGGCGGCCAACGATGTGGCGGTCGTTCACGCAGGGTACGCGC
+GCTGGCGCAAGAAATTACTGCGCTATGGCGTGGAGCTCTACGAACTGAAACCGACCCGCG
+AACATGAAACCGCCGTACATGATCGCGGACTCACCGGGAACTCAGGTTCCAGCTTACATG
+CTAAAACGTTCAGTATTGATGGTAGTAAGGTGTTTATCGGGTCGCTTAATTTTGATCCCC
+GTTCAACGCTTTTAAATACCGAAATGGGCTTTGTCATTGAAAGTGAAACGCTGGCGACGC
+TTATTCATAAGCGTTTTACGCAGAGCCAACGCGATGCGGCCTGGCAACTGCGGCTGGATC
+GCTGGGGACGAATTAACTGGATCGATCGTCAGCAAGAAGAGGAAAAGGTGTTAAAGAAAG
+AACCCGCTACGCGTTTCTGGCAGCGAGTTCTGGTACGGTTGGCGGCAATTTTACCTGTGG
+AATGGTTGCTGTGAACCCGCGCGCGGAAAACTACCCGATCGCGGCGCGGCTTTCTTGTTT
+TACCGGCGGTTTACCTGAAAAGAGAAATTTCAGGAGCGGGATGCGTAAATGAATTTCATA
+CAGAATTAACGCAATACCCATAACAAATATCAGCCCGCACAAGAACCCGATCAGGTTGGA
+GGAGATATGCGGTGTAATATACGCGCCAAAGAAAAGCGTTAAGGGATGATGCACCAGATA
+AATAAACAGCGAAGCATTCACGAAATAGGTGACACGCGCGGACTGAAAGTTTAACAAGCG
+ATGCCCCAGTGAAAATACCACGTTCACCATCCATAGCCCCATTACCATCGTAATCACGGA
+TTCGGTTTCGTACATCCAGGCGTCGCCGCTCCCATAACGTTGATTCAGCAGATACGCGAT
+AAAAGCAACGGCAGCGCCTAAAGTGCATCCGCGTGAGGGCGTGGTGAAGCGCGCTTTCAG
+ATCGGGGTGAATGAAGGCCAACGCGCCGAGAATAAAAAACGGCACATAAAATAGCGTTTG
+CATCACAATAAAATTGAACATGCCGTCACTGAGGATTGCCGGATATACGATGAATATAAT
+GCGCCTGATAGCAGCGTACGCCACCCCCAGCAGGAAAAAAATAAGCGAAAGTTTGGCCAG
+CGAAATAGCGGCGGGACGAGGCTTGCTTGTTTCCTGCCTTTTTTGGAACCAGGTAAAAAT
+CCCGATGCTGACGGTGGTTAATATCACCAGCACCAGTAAAAACCACAGATGTGAAATGAG
+TTCCCACGCTAACGTATTATATTTTTCATAGGCAGAGAGTGTAGGCCAGTTCTCTGTTTT
+CTCTTTGACATATTGCAACAGGATAAATTGCGGCAAGGTAAGCAAAGGGATTGCGGTAAG
+CATGGGAATACCCACACGTTCTACCCGTACTTTCCACCAGTGTTTTAATGGATAACGTAA
+AAATAACATGTACGAAAAATAACCAGAAATAACAAAAAACACCTGCATACGAAAAGCGTG
+GATAAAATCGTTAAACAGGGTTAGCCACCACGATGGCGCGGCGCTATTGACATGCCAACT
+GTGAGTGGAATAGATCAACGAGATATGAAAGGGAATCCCTAACAACATCAGCCATGCGCG
+GATAGAGTCAAGAAAATATTCACGCGGCGCGGGTACAGAGCTCATATAAGGTCACGTATT
+CTCAGATTTTTCACCTTATCCATAAGGCGAATTATAGTTACATTCGGTAGCAACCCTACA
+CCAACTCCGACAACCTGTCTCCAGGATAAGCACGCAAAGTGAAAACAGGCGCGGGAGGTG
+CTTAATCCATGAGCCAGCGCGCTGAACAAAGCCTGGATTCAGTTGTCGTAATGCCTGATT
+ATCCATTAAAATGGATCGGATCGATATAAGCACACAAAGGGGGAAGTGCTTACTTATTAT
+GAAACATAAACGACAAATGATGAAAATGCGTTGGTTGGGCGCAGCTATTATGTTAACGCT
+CTACGCATCATCGAGCTGGGCGTTCAGTATTGATGACGTGGCAAAACAAGCTCAATCTTT
+AGCCGGGAAAGGCTATGAGGCGCCTAAAAGCAACTTGCCCTCCGTTTTCCGCGACATGAA
+ATATGCGGATTATCAGCAGATCCAGTTTAACAGCGATAAAGCCTACTGGAACAACTTAAA
+GACCCCTTTTAAGCTCGAATTTTACCATCAGGGGATGTACTTCGATACGCCGGTCAAGAT
+TAACGAAGTGACGGCGACGACGGTCAAAAGAATCAAATACAGCCCGGATTACTTCAATTT
+TGGCAATGTTCAGCACGATAAAGACACGGTAAAAGATTTAGGCTTCGCCGGGTTCAAAGT
+CCTGTACCCCATTAACAGTAAAGATAAGAACGACGAAATCGTCAGTATGCTTGGCGCCAG
+CTATTTCCGCGTTATCGGCGCAGGCCAGGTGTATGGCTTATCTGCGCGCGGCCTGGCGAT
+TGATACCGCCTTACCATCTGGTGAAGAGTTTCCCCGCTTTCGCGAGTTCTGGATTGAGCG
+TCCAAAACCCACCGATAAGCGTTTGACCGTCTATGCATTACTGGATTCTCCGCGCGCGAC
+CGGCGCTTACCGTTTTGTGATCATTCCTGGCCGCGATACCGTGGTGGACGTGCAGTCAAA
+AGTCTATCTGCGCGATAAGGTGGGCAAGCTGGGCGTTGCGCCATTAACCAGTATGTTCCT
+GTTTGGGCCAAACCAGCCGTCGCCGACGACCAACTATCGTCCGGAATTGCATGACTCGAA
+CGGCTTATCCATTCATGCGGGTAATGGCGAGTGGATTTGGCGTCCGCTGAACAATCCAAA
+ACACCTCGCTGTGAGCAGCTATGCGATGGAAAACCCTCAGGGATTCGGCCTGTTGCAGCG
+TGGTCGCGAGTTCTCGCGCTTTGAAGATTTAGACGATCGCTATGACCTGCGTCCAAGCGC
+CTGGATTACCCCGAAAGGCGACTGGGGCAAAGGTAAGGTTGAACTGGTTGAAATTCCGAC
+CAATGATGAAACCAACGATAACATCGTCGCTTACTGGACTCCGGATCAACTGCCGGAACC
+GGGTAAAGAGATGAACTTCAAGTACACTCTGACCTTCAGCCGCGATGAAGATAAACTTCA
+TGCGCCGGATAATGCCTGGGTGCTGCAAACACGCCGCTCAACGGGCGACGTTAAACAGTC
+GAATCTGATTCGCCAGCCCGACGGCACTATTGCCTTTGTGGTGGATTTCGTTGGCGCCGA
+CATGAAAAAACTGCCGCCGGATACGCCCGTCGCTGCACAAACCAGCATTGGCGATAACGG
+TGAAATCGTTGACAGTAATGTACGCTATAACCCAGTCACTAAAGGCTGGCGTTTAATGCT
+GCGCGTGAAAGTCAAAGACGCGAAGAAAACCACGGAAATGCGTGCCGCATTGGTGAATGC
+CGATCAGACGCTAAGTGAAACCTGGAGCTACCAGTTACCTGCCAATGAATAAAACAACTG
+AGTATATTGACGCACTGCTGCTTTCTGAACGTGAGAAAGCGGCATTGCCGAAAACTGACA
+TCCGCGCCGTGCATCAGGCGCTGGATGCCGAGCATCGGACTTACTCGCGAGAAGACGATT
+CACCGCAGGGTTCCGTAAAAGCCCGCCTTGAACACGCCTGGCCGGATTCATTGGCGAAGG
+GGCAGTTAATTAAAGATGATGAAGGGCGCGATCAGTTGCAGGCTATGCCAAAAGCGACGC
+GCTCTTCGATGTTTCCTGATCCCTGGCGAACCAACCCGGTTGGCCGTTTCTGGGATCGCC
+TGCGTGGGCGGGATGTAACGCCGCGCTATGTTTCTCGTCTGACAAAAGAAGAGCAGGCGA
+GTGAGCAAAAATGGCGTACCGTCGGCACTATACGCCGCTATATTTTGTTAATTTTGACTC
+TGGCGCAAACCGTCGTCGCGACCTGGTATATGAAGACCATTCTGCCCTATCAGGGATGGG
+CGCTCATCAATCCTATGGATATGGTGGGGCAGGATATTTGGGTCTCCTTTATGCAGCTCC
+TGCCCTACATGCTGCAAACCGGTATCCTGATTTTGTTTGCCGTGCTGTTCTGCTGGGTGT
+CTGCCGGATTCTGGACGGCGCTGATGGGCTTCCTGCAACTGCTTATCGGGCGCGATAAGT
+ACAGTATCTCCGCGTCTACGGTTGGCGATGAGCCCCTCAATCCGGAACACCAGACGGCGC
+TGATCATGCCTATCTGTAATGAAGACGTTAGCCGCGTTTTCGCCGGTCTGCGCGCGACCT
+GGGAGTCCGTTAAAGCCACAGGCAACGCCGCGCATTTTGACGTCTATATCCTTAGCGATA
+GTTATAACCCGGATATCTGCGTGGCGGAGCAAAAGGCGTGGATGGAGCTCATCGCGGAAG
+TGCAGGGCGAAGGCCAAATTTTTTACCGTCGCCGCCGCCGCCGTATGAAACGCAAAAGCG
+GCAATATTGACGATTTTTGCCGCCGCTGGGGCAATCAGTACAGCTATATGGTGGTGCTGG
+ACGCGGACTCAGTGATGAGCGGCGAGTGTCTGAGCGGGCTGGTGCGCCTGATGGAAGCGA
+ACCCTAACGCCGGGATTATCCAGTCTTCGCCGAAAGCGTCGGGGATGGATACTCTGTATG
+CCCGCTGCCAACAGTTCGCGACCCGTGTTTATGGACCGCTGTTTACCGCCGGGCTGCACT
+TCTGGCAGTTGGGGGAGTCGCACTACTGGGGGCACAATGCCATTATCCGCGTGAAGCCGT
+TTATCGAGCACTGCGCTCTGGCGCCGCTGCCGGGAGAAGGTTCGTTCGCCGGATCGATTC
+TTTCCCACGACTTTGTGGAGGCGGCGCTAATGCGTCGGGCAGGGTGGGGCGTCTGGATTG
+CCTACGATCTCCCCGGCTCCTATGAAGAGCTGCCGCCAAACCTGCTGGATGAGCTTAAAC
+GCGACCGCCGCTGGTGTCACGGCAACCTGATGAACTTTCGTCTGTTCCTGGTGAAAGGAA
+TGCACCCGGTGCATCGCGCCGTGTTCCTGACCGGGGTAATGTCATACCTGTCCGCGCCGT
+TATGGTTTATGTTCCTCGCGCTTTCTACCGCGCTGCAGGTCGTTCATGCGTTAACAGAGC
+CGCAATATTTCCTTCAGCCGCGCCAGCTTTTTCCGGTCTGGCCGCAGTGGCGTCCGGAAC
+TGGCAATCGCGCTGTTTGCGTCAACGATGGTGCTGCTGTTCCTGCCGAAGCTGCTCAGTA
+TTATGCTGATCTGGTGTAAAGGCACCAAAGAGTATGGCGGTTTCTGGCGCGTTACGCTGT
+CGCTATTGCTGGAAGTGCTGTTCTCCGTGTTGCTGGCGCCGGTGCGTATGCTGTTTCATA
+CCGTGTTTGTGGTCAGCGCGTTCCTCGGCTGGGAAGTGGTCTGGAACTCACCGCAACGCG
+ACGATGATTCTACGCCGTGGGGAGAAGCCTTTATGCGTCACGGCTCTCAACTGCTGCTGG
+GGCTGGTCTGGGCGGTGGGTATGGCGTGGCTGGATTTACGCTTTCTGTTCTGGCTGGCGC
+CGATTGTCTTTTCGCTGATTCTGTCGCCATTTGTTTCGGTGATCTCCAGTCGTTCAACGG
+TAGGATTACGCACCAAACGCTGGAAGCTGTTCCTGATCCCGGAAGAGTATTCGCCGCCTC
+AGGTGTTGGTCGATACCGATAAATATCTGGAGATGAATCGCCGCCGTATTCTGGACGATG
+GCTTTATGCATGCGGTATTTAACCCGTCGCTTAATGCGCTGGCGACCGCGATGGCCACCG
+CGCGTCACCGCGCCAGTAAGGTGCTGGAAATAGCCCGCGATCGTCATGTGGAGCAGGCGC
+TAAACGAAACGCCGGAGAAACTGAACCGCGATCGGCGTCTGGTTTTGCTCAGCGATCCGG
+TGACGATGGCGCGTTTACACTATCGGGTCTGGAATGCGCCAGAGAGATACTCTTCCTGGG
+TAAACCATTATCAGTCTCTCGTCCTGAATCCGCAGGCGTTGCAGGGACGAACATCGTCAG
+CGGGATAAGGTCTTCAGGTCTGGAGTGAGGTGAAAAATACCGGCGTGATGCCGGTATTTT
+TATAGTGAAATGAGGTATCAGGTGCGTATATTCGCGGTGAGCATAATGGTGATTACCCTG
+AGCGGCTGCGGCAGTATTATCAGCAGAACGATCCCCGGACAAGGACACGGCAACCAGTAT
+TACCCTGGCGTGCAGTGGGATATGCGTGATTCCGCATGGCGCTATATCACTATCCTCGAT
+CTGCCCTTCTCACTGATCTTCGATACACTGCTACTGCCGCTCGATATTCACCACGGGCCT
+TATGAGTAATTAACGCTCATCCCATTCGTCTGCCGCAGTACGGCCTTCCTCGGTATCAAG
+CGGGGGTTCAAGCTGGAATTCCCCTTCATCCCATTCATGCAAAGTATTCTCTTCCTGCCA
+TTCCTGGCGGATCTCTATCTCATCATAATCGCCGTCAAACACGCTCTGTGCGGCTTCACC
+ACTCAGCATCGGCAGACATTCGCCATCTTCACCTTCATCGGCGAAAAATTCGACCTGCCA
+CATGATGTCCCCATCCTGCAGTACATATTTCTGAACGTTGAACTGCTGCACATTCGCTTC
+GTCTTGTTCGAGGCCTGGATGGTCAGCCAGAAATTCTTCCCGGGCTGCATCGATAGCTTC
+TTCCAGCGTGGCATACATGGTCATCAGTGTCTCCCTTTGATTTGACGAGGTATTTAGGGA
+AAGAATAGCTGATTCTGTGATATTGCAAGTATGAAAGCGCAAAAATCATTCTATTGCCAG
+TCTGCGCCGTCGTAAGCTGTTCCATGAATAGATAGCATTGAACAACACCACGCCTGCCGT
+GACGCAGAATACGGCGCGGAAGCCATAGCTCGCGGAAATTGCGGCACCCATGAGAGGGCC
+GGTGACGTTGCCGATATCGCGGAAAGATTGGTTGTAACTGAATATGCGCCCGGCTATCTG
+GTTGGTAGAGTTGTAAACCAGCAGAGTTTGAACGGCTGGCAGCAGCGCGCCATCCGCCGC
+GCCAAGCAGAAACCGCAATAGCGCGAGCTGCCACGGCGTTTGCACAAAAGACATTGGAAT
+CAGCAGCAGTACGGAAATAATCAGCGCGACGATAAGGATCTTTTCCGGGCCAATTCTGTC
+GCCGAGCTTGCCGAGCCGGGGGGCGCTAAGTAATGCCGCCACGCCGGGAACGGACGCTAT
+CATCCCGCTAATGAACGCAATATTACTGACATTTCCCGCGAGTTCGCGCACATAGAGCGT
+CAGGATGGGCGCAATAGACCCCGTCGCCACCTGAATAATCAGCGTGGTGACAAACAGGCT
+TAACACCAGGCGGGGATTTTTTAACGAGGCGACCACTTCCCGAACGTGAAGCATCTCTTT
+TTTGCTCACCGGCAGGAAATTCTCGCGAATAAAAAAGAAGGTGAGCAGAAAACAGATAAA
+CAAAACGCTGGCGGTAATAAAAAAGACCGGGCGAAGGCCGTAATGGTCGGCGAGCAGACC
+GCCCGCAAGTGGGCCAAGCAGAGCGCCGCTGACGCCGCCAGTAGAGAGCGTCCCTAACGC
+CCAGCCGCTTTTGTGGCGCGGCACCTGAGTGGCGATGAGCGCATTGGCGTTGGGAATAAA
+TCCGCCCAGCAGGCCTAATAACGCGCGGAGGATCAGAAATTGCCAGATATTTTGCGCCAT
+GCCCATTAACAGCATCACAATGGCCATGCCGAGAGCGGAACGCAACAGTATGATCTTACG
+CCCTTTACGATCCGCCAGACCGCCCCAAAAAGGGGAAGCGATAGCTGAAAAGAGGAACGT
+AATACTAAAGACCAGCCCGGACCACATGTTGAGCGCGCTATGGCCTGTTACGCCGAGTTG
+CTCAACGTAGAGAGGGAGAAAAGGCATGACCAGACTGAATGCCGCGCCGGTTAAAAAACA
+GCCCAGCCAGGTAACGGTTAGATTGCGTTTCCAGTTTATGGGGACATCAGAGGGTGACAT
+AGTGTTCCACAGTATGATGCGCGTTTTGCGCTATCATCAATTTAATTATGAGCGTACTAA
+TTATAATTATGCGCCGACCTTACCAGCCTCGCAATGTGGGGAGCTTTTAAAGCTAAAAGA
+GGGGAAAAATTGCAGCCTGACGGCTGCAATCCTGTCAATAGCGCGACGGTACGCCTTCAG
+GGCGAGTTTTAAAGCGGCGATGCAGCCACATATACTGCTCTGGCGCCATCATAATGCACT
+GCTCAACAATCTTATTCATCCATGCGGCAGTCGCTTCTGCGCTCTCCAGCGGAGGCGAAT
+ATTCCGCAGGAAGGATGATCAGTTCGTAGCCTTTCCCGTCGGGTTTACGGCGAGGCACGG
+AGGGGATAATACAGGCTTTAGACATGCGCGCGAGCATCCAGGTACCGGAGGTCGTAGCGG
+CCTGGTCGACGGCGAATAACGGTACAAAGACGCTGGCGCGCGGGCCATAATCGTGATCCG
+GCGCATACCAGATCAATTCGCCGCTTTTCAATGCCTTAACCATACCTTTCAGATCTTTGC
+GATCCAACATCGATTTATTGGAGCGTAAGCGCCCCCAGGTCTGTAACCAGTCGAGCAACG
+GATTATCATTCGGGCGATAAACGCCGATACCCGGGTTATGCATACCAAACATGCGGGCGC
+CAAACTCAAGGGTAAGGAAATGTATTCCTACCAGAATGAACCCCAGCCCCTGCGCCTTAA
+CTTCACGGATATGCTCCAGACCGCTCGCTTCCATCCAGCGGTTCACTCGCCGATCGGGCC
+AGAACCAGGCCATGCCGGTTTCCATCACGCCCATACCGACGGATTCAAAGTTTTTGACCA
+CCATGGTGTGGCGTTCCTGCGCGCTCATTTCCGGAAAGCATAATTCAAGGTTGCGATAGG
+CGATTTTCGCGCGGCGTTTCATTACGCGTCGCGCCAGGTGGCCCAATGCGCAACCCAATT
+TATAGATGACCGGGTAGGGGAGTTGCACGACCAACCAAAGCGCGCCTATACCCAACCAGG
+TTAACCAATAGCGCGGGTGCAGTAAGGCCACGGAGAACTTAGGCAACTTCGTCATTTCTA
+TCCTGTCTTTCAACGAACAATTCTCCGTATTCTCGCATCTTTTTGCGTTGAGCAAAAATA
+TGTAGCAGGAGAGTGGCGATTAAATCGACAATTGTTGTTAATTATTTAGCGTAAAGCAGG
+AAATGTAGCGCAAAATGTGTGGATGTAAATTGGCGAGACTTGCCTTATCATGCCTGCCCA
+CTTTATTTTTTGCTGATTGCAGGATACGTACACCATGCCAGTGTTACACAACCGCATCTC
+TAATGACGAGCTGAAAGCCAAAATGCTGGCGGAAAGCGAGCCGCGTACGACAATTTCTTT
+TTATAAATATTTCACTATCGCCTCGCCGCAACAGACGCGGGACGCGTTGTATCAGGTGTT
+TACGGCGTTGGACGTTTTTGGTCGCGTTTACCTGGCGCATGAGGGCATCAATGCGCAAAT
+CAGCGTGCCGCAAAGCAAGGTTGAGACCTTTCGTCAACAGCTTTATACGTTCGACCCCGC
+GCTGGACGGGGTGCGTTTAAATATCGCGCTGGAGGATGACGGAAAGTCATTTTGGGTGCT
+GCGTATGAAAGTTCGCGACCGTATCGTCGCTGACGGTATTGACGATCCGAGTTTTGACGC
+CAGTAATGTCGGCGATTATCTGAAGGCGGCAGATGTGAATGCGATGCTGGACGATCCTGA
+CGCGGTCTTTATTGATATGCGCAACCACTATGAGTATGAAGTCGGCCATTTCGAAAATGC
+TCTGGAAATCCCGGCGGATACGTTTCGTGAACAGTTGCCAAAAGCGGTTGAAATGCTGCG
+GGAACATGCAGATAAAAAGATAGTGATGTACTGTACCGGCGGTATTCGTTGTGAGAAAGC
+CAGCGCCTGGATGAAACACAACGGTTTCAATAAAGTCTGGCATATTGAGGGTGGCATCAT
+TGAGTACGCCCGTCGCGCGCGCGAGCAGGGGCTTCCCGTTCGCTTTATCGGCAAAAACTT
+TGTATTTGATGAGCGAATGGGCGAGCGCATCTCGGATGAGGTTATCGCGCATTGCCATCA
+GTGCGGCGTGTCCTGCGATAGCCATACCAACTGCAAAAACGACGGTTGCCATCTGCTGTT
+TATCCAGTGTCCGCAGTGCGCCAGTAAATTTAACGGCTGCTGTAGTGAACAATGCTGTGA
+AGAGTTGGCCTTGCCGGAGGAAGAACAGCGCCGACGTCGCGCGGGTCGTGAGAACGGCAA
+CAAAATTTTTAATAAATCGCGGGGTCGGCTTAATAGCAAACTGAGCATTCCCGATCCGGC
+TGAGTAATATTTTGCCGGATAGCGGCGTAAAGGCTGCTATCCGGCATTTCGCCAGGCGTT
+ACTTCTGCTGAACGCCTTCTACTGAGATGATAAGCTCCACCTCTTGTGAGGCTGGGCCGA
+GATCGGTAGTTATATTGAAATCTTTCAGCTTAATTTTTCCTTCGGCCTCAAAGCCCGCGC
+GCTTACCGCCCCACGGATCGTCGCCCTGGCCCATCAGCTTCGCTTCCAGCGTCACCGGTT
+TAGTCACGCCATTGAGCGTCAGATTGCCGGTAATATCCAGTTCATCGCCCTCTTTTTTCA
+CGCTGGTAGAGGTGAAGGTTGCCTGCGGGAATTTCGCAACATTAAGAAACTCCGCGCTAC
+GCAGGTGTTTGTCACGTTCGGCATGGTTAGTGTCGACGCTATTGGTGTTAATGGTCACAT
+TCACTTTGTCTGCTGACGGATTTTTTTCGTCAAAAGTGAACGTGCCGTCGAAATCTTTAA
+AGGTGCCGTATAGCCAGCTGTAGCCCAGATGCTGGATGCGGAAATTGACGAACGCATGTT
+GGCCTTCTTTATCAATTTTATACTCCGCCGCCACGGCGGAACCGGTCGTGAATAACAAGG
+ATGCGAGGGTGAATCCCAGCAGGTTTTTTTTCATTTTTGAGCTCCATAGTCAGATGACGA
+CATTCCTGTCATACGTTTCAGTGTGTCGTCTTTATCGATGAAATGGTGTTTTAGCGCCAT
+AACCCCATGCGAGAGCGAGATAATGACCAGCGACCAGGCAAACCACAGATGCAGTGTTCC
+GGCGATGTCAGCCTGCGCGCCCGCGTCCGTAAGCGTGGCCGGAATCTCAAACCAGCCAAA
+GACGCTAATCGGTTTACCGTCGGCGGTGGAAATCAGGTAGCCGCTAATGATTATCGCAAA
+GAGCAGGAGATACAGAAGGATATGACCCGCGGCGGCGCCAATGCGCGTTAAACGGGAATA
+GCTGGTCAACGCAACGGGCGGCGGAGAATAAAGCCGCCAGATAATACGCACGATCAGCGC
+CATCATCAGTAACATGCCAATACTTTTATGTATTTCCGGCGCCTAGTGATACCAGCCGTC
+GTAATAACTGAGCGTGACCATCCATAAACCCAACGCAAACATGCCATAGACCACTAGGGC
+GGTCAGCCAGTGGAGGGCGGCGGAAACTACGCCATAACGTTGTGGAGTATTTTTAAATTG
+CATAAACACACCAATGAATATTTCACGAGAGAATGAAAATGGCGTGGAAAAGCGCCGAAT
+GCAACTTATAAATAAGAATTTGAATGATATTTATTTTTATTTCAATAATTTTGATGTTGT
+TTGCGATTCAGCTTCAGAAGTTTCGAGATATTTCACTCCTGTAACCGCACAACGGCAGGA
+GTTGGCATTATTGCCGTGCTTCAACATTATGAAATAAAAAGTGAGGAACTTTCAGGAAGT
+GTTAGTTAACGTCAATGAAAAGCAATCAGAAGAAAAGGAGATAAACAATATCCATCACCG
+CCAGCAGCGACCAAAGAATAACGTAAAGCATGAAATGTTCGCGAATATTATTCATCAGAA
+AATGAAAGAGACGACGCATAGCTTACCTTAATAAACAGCCCCTTTACGGGGCCGACAAAT
+TATTGGCTAAAACGGGAAAGCCGGAACGGCGTCAGATCAAAGGAGGGCGTTTTTCCCAAC
+GCAAAATCCGCAGCGATTTCTCCTAACACCGGGGCGAATTTAAAACCATGTCCGCTGAGT
+CCAGTGATGACAAGCGTATTCTCATGGCCAGGCAGCGTATCGATAATAAAATCCTCGTCC
+GGCGAATTATCATAGGTACATGCCGCCCCATGTAAACAACCGCCGATACCCGGCAGTACG
+TTACGCAGGAAAGGAAATGCTTCCGCGCCATCGCTGGCAACGGCGGCAAAGGGCTTGCGC
+TCTTCCGGTGCCTGTATTCGCTGCCCGCCATTGTGTTTGCCGATTTTTAACTCGTCGTTC
+TCCGCCGGGAAACCGTAATAGTGATCGCCGTTGGGCATTTCGCCGGTAAAGGCCGGAAAG
+CGGTTTTTAGTGCTGTAACGTCCATCCGCCTTAAACCAGGCAAAAACTTTACGTACGGGC
+TGAACGGGCAGCTCCGGTACCAGCGTTTTGACCCAGGTGCCCGCGCTAATCAGCGCTTTG
+CTGGCGTGGTAGCAGCCTTCACTCGTCTCTATCGTCACACCGTTATCATCATGGTGAATA
+TGGCTTACCGGGCTGTTGAATAGCTGTGCGCAGCCTGCCTCTCGGGCCAGACGAAGCCAT
+GTGGTAATGGCTAATTCGCTGCGCAGGAAACCGGAGTCAGCTTCAAACAGCCCGATATAA
+TTATCGGGCACGCGAATTTCCGGCCAGCGCGTCATGAGGGCCGTCGCGTCCAGGCGCTCG
+ACGTTCAATTGCCACTGTTGCGCGCTTCGTGCGACGTTGGCTAAGAAAGCGGAATCGGCC
+GGGCCGAGGTTGACGACGCCGGAGCGGACAAAAATAGGCTCTTCATTGTGTGTGGAGAGC
+TCATCCCAAAGCGTCTGGGCGCGAAGCACCAGCGGGACATATTTTTCGCCTTCACCATAA
+GCGTGGCGGATAAGACGGGTATCGCCGTGGTGGCTGCCCTGTTGATAAGGCGGCATATGC
+GCATCGGTCATCAGGACCTTTAGCCCGGCGCGGGTGGCGTAATAACCAGCGGCGGCGCCA
+ACCGAACCGCTGCCGATAATAATAAGGTCGTATTTCATCAGCTTCTCTCTGCTATCGCGA
+TGATTTCAAGGTAAATAACTGCGCTGAGATATACAAGCCAGAAATAAGTGAGGCACCTTA
+CGGTGCCTGAGAGAGGGGGAGCGCGTCACGCTAATGGTGACGATACTCGTTTTCCTGGTA
+ATCGCCTGATTCTATTTTGGCGATGCCGGCTTCTAATATTGAAATAAATTGCCTGGCTAC
+ATCTGTCGTTAACCAGAGCGTTTGACCAACTTCAGTCCCTTCCGGTTCCGGACGATTTGG
+GGTCTGGTAGTGTAAACGCAGCATCAGCGCATCATAGCTATCGACGGTGCTGATGTCCCA
+TCCTACAAGCGGATGGGTCTGAATGACTTCATTATTCTTTTCCATCATGCCCCCCTGGTA
+CGTGTTATAAGACAACGGTTCTCGAGGTTCAATGCGTGTTTTTCTTCTGAAGCAACTTCA
+GTATACCAATTAATAAGGCTATTCACTGCGTTTTTAAAGAGACCGGAGGATAAATTTCTC
+TTTTTAAGAATTATATGAACAATAAAGCGGCAGTTCATTCATATTTTTTTAGGATGTTGT
+GCAATTATTTTGACGGTCAGGCGAAATATTCATCAGTTGCGCAAATAAAAAAGCCGGGGC
+GACCCGGCAAACATACATCACTGCATATCATTTTTTATTCATTGATGAACCAGTCATCAG
+CGCTCTCCCAGGTCTCCTGGAGAATCTCGCTAATGCGTTCTTTATCCTCTTTTGACGCGC
+CAATGACGGACAAGTTGTTGGCGGTAGCGTAACGCACGGTGACGTTACCCAAATTCTCAG
+GAAAATGATGGCTAATACGGCGGGNNNNNNNNNNNNNNNNNNNNNNNNNNCCCGCCAGCG
+CACCAATCGCACCGGCAGGCAGAGGTGACGTTTTGGCTATAGTGACTTCAATACGCATAA
+TGGCCCCCTGTTGAATATACTGGATATATATACAGTTAAATCCAATATATAGCAACAGGT
+AAGCGCATTTTTTATTTTTTTACTGACCAGCGTACTGTTTCACCCGCTAAAAAAGGCACC
+AGCGAATCATCAGCCAGCGCGATATTTTCCGGTATCTGTTGTTCATCGCGAACCAGTTCC
+ACCCACCCCGTATTCACCGGCAGGCCATAGAATTGCGGGCCATTCAGTGAACAGAACGCT
+TCAAAGTGCGCCAGCGCGTTCATTTCCTCAAACACGGCGGCATAACTGCCAAGAGCGGAG
+GGGGCGTTGAAACAACCGGCGCAGCCGCAACGGGTCTCTTTACGATGACGTGAATGCGGC
+GCTGAATCCGTCCCCAGGAAGGCGCGCGTAAAACCACTGGCGACCAGGTCGCGTAACGCC
+TGCTGGTGAATATTGCGTTTCAGAATCGGCAGACAGTACAGGTGAGGACGAATGCCGCCA
+ACCAGCATATCATTACGGTTAAACATTAAATGTTGAGGCGTAATGGTCGCCGCCAGGTTG
+TAGCTGCCGTCACGTACATACTGCGCGGCATCTTTGGTTGTGATGTGTTCAAAGACCACT
+TTAAGCGCGGTCAGACGCTGGCGTAGCGGTTCCATTACGGTGTCGATAAAACGCGCTTCG
+CGATCGAAGATATCAACATCCGCATGGGTCACCTCACCGTGGACCAGCAATGGCATTCCG
+AGTTTTTCCATCCGCTCCAGTACCGGCATGATAGCGTCGACTGACGTTACGCCATGACTG
+GAGTTAGTGGTGGCATTGGCCGGGTAAAGCTTGGCCGCAGTAAACACGCCTTCATGGAAA
+CCACGCTCCAGTTCATCGGCATCGAGCGAATCCGTTAAATAGCAGGTCATTAACGGCGTG
+AAATCATGCCCGGCGGGCACCGCATCGAGAATACGCTGGCGGTAGGCGATCGCTGCATCA
+ACGGTCGTAATGGGGGACGCCAGGTTCGGCATCACGATAGCGCGACCATAAATTTCGCTG
+GTATAGGGTACGACCGTTTTTAACATGTCGCCATCGCGAAGGTGAACGTGCCAGTCGTCC
+GGGCGGCGGATCTTTAAAACCTGGGATGGTGCAGTCATTAATAAGCTCCGGCTGAGGAAT
+AGTCTTTTTGCCGGAAACAAAGGATAAGCGGAAACGTTTTCGTTTGCACGTAAAAAAAGG
+GCGCGAGCGCGCCCTCCGAAATCAATTGGTGAAAGGAATAACGATTTCACCAGGTTTAAC
+TTCAATGCCTTTCGCGAGTTTCTTCGCTAACGCTTCGCCTTTACTACTGTCCTCACGCAA
+CACGTAAGCGGGCCGCTGGTTAAAGTAGCTACGTAAAGACTGATTTAAATAGGGCAGGAG
+CGTTTGTAGCACTGATTGCATTTTCTCCGGCGTCACGGTGGCGTCTACTACTTCCATCTC
+CTGAAGATAGATGGCGCCTTTTTCTTTATCAAAGACCGGCAGGGCTTTTAGCTTGAGTTT
+CATCGTCGCTTTTTGACTGCCAAACAGGGAATTCATATCCAGCCTGGCATCGCCAGTAAG
+GGTGACTTTATTAGGCTCTTCCCGACCAATCTGGCTGGCAAGGTTAGTCAATACGATATG
+CGCGTCGGCAATGCCAGGCAGACCAATATCTTTTGAGAAATTATTCCGTTTTTCAAGCGC
+TTGATTGATTTCTTGTTCGCTAATGGTGTATTGCGTAAGTTGATTACAACCCACTAACAG
+GCCGCTAACTACCAGCGCAGCGGCAAAAAAAACTTTTTCATGGCGTTCCTTAGCATGTTG
+CCTGTGCCCTAATCTTGACACAAAGCAGCATGTCGCGCCAGCGGACATGGCGCCACTAAA
+AAAAGCTGAAAAAGGCGGCAAGAAAGGGTTGCCGCCTGGCGGGCGTTAGATAGCCATTGA
+GGAGAGTAAATTAATTTGCGTCTGCTTAGCCATATTATCGCGATAGTCCGCGACTTTTGT
+CGGCCAGTGAATACCGGCGACCAGCGTCAGATTACGCAGAAGCGGAAACAGATGAATATC
+ATCTTCCGATAATTCGCCGTTAACGGCGTTAGGCTGTACGATGAGTTTATCCAGCAAACG
+TAAATCATCGCCGATCTTTTTAATCAGTCCGGCAGAGTGCGCAAGATGGTTGTCAAAACT
+GCCAGATGAGGCCTCTTTTTTGCGGATAAAATACTGGCGCGCCGCAGGGGTAGAAAATTC
+ATCGAAAGCGGATTTTGCAAATCGCGGCAGCAGTAGCTGATTAACGTAACCGTTAACTTT
+GCGCAACCACTCTTCAATGGCTGGATTACGTTTCCCGGTTAACAGCGGTTTGCCGTCGAG
+GTTGTCGACATAATGTACAATATCCATACTTTCAGGAAGGTAGCGACTATCATCTTTTTG
+CAGGATGGGCACCATCTTTTGACCAATCATCCGGGTGGGCGTCGCCTCGTCGTCATTTTG
+TAACACGTTAAGTTCAACGGGGATGTTCTTCAGGCCGAAAATCATGCGGGCTTTAACGCA
+GAAAGGGCAATGATCGTAAATATAAAGCTTCACGTTTCTCCTCCATTTGACTGTCGGTTC
+CTGACCAGTATGGAGGAGATAGCGACAGGTATCAAATCAGGCGCCGGGTTCCAGCATCCG
+GCGCGGCGTGCGCTTATGACTAAATTGCCAGCCTAAAGCCAAAAAGGTGATAAAGCCGAT
+AATACCGAGCATCATCCACGGTAGTTCAGGCTGCGCAAGCGCTTTACCCATATCAAACAA
+CCAGCCGCCGCCGATATAACTAATCGCGCCGCCAATGGCTAATCCCAGACGGCTAAAGCC
+CATATAGCTTCCCCGCGCCCTCGCGTCCGCGGGCGACGCGCTGAGCGTTTCGCGCGCCGG
+TTCGGCGATAACCGAGCCGATGTAGAAAGCGCAAATAAGCGTAAAAAGCTGCTGTAAATT
+GCCCACCATCCCGATGGGGAGCATGCTCAGCGACATGACGAGCAAACCGGCCATCAGCCG
+ATGCTCCAGCCGAAAACGCTTTTCGCTCCAGCGGGCAATCGGGTAGAGCAACGTCAGCGA
+GAGACACGCCTCAATAGCGTACATCCATTTCACGGCAGCAGGCGAACCGGCGATATCGTT
+TACCATAATCGGCAGCATTAACATGACCTGTACCGCCAGCATATAGTAGCCCGCCAGCGT
+CAGCACGTAGGTGACAAACCTTTTATTGCTCATGACGCGGCGCATTCCTTCACGCACCGG
+CGTTCTGGCCGTTGATAGCTTCCAGGCCGGAAGCAGCCATGCGTTGAAAAGGGCGCATAA
+TATGAACAAAATAGCGCCCGTCGCGCAGACCAGGCGAAAATCGTATTGTAGCAACCAGCT
+TCCCAGCAGCGCGCCAATCACCGCGCCCGCGCTGTCCTGCATCATCAACAGAGAGAAGAA
+GCGGCCCCGTTGCTCCGGACGAATTAATTTGACCACCAGCGCTGAACGCGGCGGGTCGAA
+AAGCGTACCGCCGAGACCGGAAAGAAAGCAGGAAAACCACAAGAGCCAGGGCTCATGCGC
+GATACCCATGGTGGCAAAGCCTGCGGCGCGCATCAGCATACCGGTGACAATCATCGGTTT
+CGCGCCAAAGCGATCGGCGATGGCGCCGCCAAAAATGCCCAGACCTTGTTGAATAAACTG
+ACGCAGGCCGAGCGCGATCCCTACCATTACGGCAGCCCACCCCATTTGATCGACAAAGCG
+AATAGAGATGAGCGGGAAGACGACGAAAAAACCCAGCACCACTAACATGTTATCGATGAG
+AAGAAAATATTTACCCAGGTTCCTCGCCTGCGAGACGCGCGACATTTCCCCTCCCGGGAA
+ATAAAAGATGAGCGTCTTCTATTCTGCGGCGGCGTTTCGTTTTTTCCTACCGTTAGCGGG
+ACAATATTTTTTTATCAAAAGTCCTTTTTAATCGAGAGTTTTCATCAAAATGTGGCAGCA
+ATTCAAAAAATGACGATTTGCGCTTTTCACAGGGCCTGGTTGCGCAGGTATAGTAATGTT
+ACTGGCGTGCTGAAGACGTTACAGGAAGGAGTAGGTATAGAATGTTTGGCTATCGCAGTA
+ACGTGCCAAAAGTGCGCTTAACCACCGATCGTCTGGTGGTACGTTTAGTGCATGAGCGTG
+ATGCCTGGCGTCTGGCCGATTATTACGCGGAAAATCGTCATTTTTTAAAACCCTGGGAAC
+CGGTCCGTGATGAAAGTCATTGTTATCCTTCAGGATGGCAGGCGCGTCTGGGAATGATCG
+GTGAATTTCACAAACAGGGCTCCGCCTTCTATTTCGCGCTACTTGATCCGGAAGAAAAAG
+AAATTATCGGCGTGGCGAATTTTTCCAATGTGGTGCGCGGTTCTTTTCATGCCTGTTATC
+TGGGCTATTCCATTGCGCAAGAGTGGCAGGGGCAAGGGCTGATGTTTGAAGCCTTAACCG
+CTGCGATTCGCTATATGCAGCGCACTCAGCATATCCACCGTATCATGGCGAACTATATGC
+CGCACAACAAACGTAGCGGCGCGTTGCTGGCGCGGCTTGGCTTTGAAAAAGAAGGCTATG
+CGAAAGATTACCTGTTGATTGATGGACAATGGCGCGACCATGTCCTGACGGCGTTAACCA
+CGCCGTTATGGACGCCGGGGCGTTGAGCGGCTTACGGAATGAGAGGCAAAGGGAGAAAAC
+GATGAAATATGAATTAACCGCCACTGAAGCGCGAGTGATTGGCTGTCTGCTGGAAAAGCA
+GGTGACAACGCCGGAACAGTATCCGCTTTCCGTCAACGGGGTGGTGACAGCCTGTAATCA
+GAAAACCAACCGTGAACCGGTGATGAACCTGACGGAACAAGAGGTACAAGAACAGCTCGA
+TAACCTGGTGAAACGCCACTTTTTGCGTACGGTCAGCGGGTTTGGCAACCGCGTCACCAA
+ATATGAACAGCGCTTCTGTAATTCCGAATTTGGCGATCTGAAACTTAGCGCGGCGGAAGT
+GGCGCTCGTCACTACGTTGCTGCTGCGCGGCGCGCAAACGCCCGGCGAGTTGCGTAGCCG
+GGCGTCGCGGATGCATGAATTCAGCGATATGGCGGAAGTTGAATCCACGCTGGAACGGCT
+TGCCAGTCGTGAGGACGGCCCGTATGTCGTCCGTCTGGCGCGTGAACCGGGTAAGCGCGA
+AAGCCGCTATATGCACCTTTTTTGCGGCGACGTCGATGAACTGTCTCTCCAGACGTCTGC
+GCCGGAAAGTGCGTCGGGCGATCTTCAGTCGCGCGTCGAAGCGCTGGAAAGCGAAGTGGC
+GGAGTTAAAGCAGCGGCTGGATTCTTTGTTAGCTCACCTGGGAGAGTAATGTGAGAACAT
+TACGGATTGGCATTGTCGTGTTAGGTGGTATTGCGCAGAAGGCCTGGCTGCCGGTATTAA
+CCAACACCGCCGGATGGACGTTACAGGGCGCCTGGTCTCCTTCGCGGGATAAAGCCTTAC
+GTATTTGCGAAAGCTGGCGCATACCGTATGTGGATTCGCTGGCGAATTTAGCGTCCGGCT
+GCGATGCGGTCTTCGTCCACTCCAGTACCGCAAGCCATTATGCCGTGGTCAGCGAACTTC
+TCAACGCTGGCGTCCATGTCTGCGTGGATAAACCGCTGGCGGAAAATCTACGTGATGCCG
+AACGGCTGGTGGCGCTGGCGGCGCAAAAAAAATTGACGCTGATGGTTGGCTTTAATCGCC
+GTTTCGCGCCGCTGTACCGCGAACTGAAGACGCGCCTCGGCACTGCGGCGTCACTGCGTA
+TGGATAAACATCGTACCGATAGCATCGGGCCGCATGACTTACGTTTTACTTTGCTCGATG
+ACTATCTGCATGTCGTGGATACCGTTCTGTGGCTGGCGGGCGGCGAGGCGCGCCTTGCCA
+GCGGCACGTTGCTCACCAGCGAGTCCGGCGAAATGTGCTATGCGGAACATCATTTTTCCG
+CCGACAAATTACAAATTACCACCAGTATGCACCGGCGCGCCGGAAGTCAGCGTGAATCGG
+TCCAGGCCGTCACCGATGGCGGGCTGTATGACGTGACGGATATGCGTGAATGGCGCGAAG
+AGCGCGGGCAGGGTATTCTCATCAAACCCATTCCGGGTTGGCAAACAACGCTTGAGCAGC
+GTGGTTTTGTCGGATGCGCGCGGCATTTCATTGACTGCGTACAAAATCAGACGGTTCCGG
+AAACGGCGGGGGAGCAGGCGATTTTGGCCCAGCGCGTCGTGGAGGCGCTGTGGCGGGACG
+CCATCAGCGAATAATCCTCTGTAACATCTGGCGGTAGTAATTCATCGTAATCCAGGTACT
+ATACCCTCAATAATTCGAGTTGCAGAAAAGCTAACGCACATGCAGCTCGAAGTATGGCGG
+GTATATATGCCCACTCTACATAATACCTCTTTTCACAGTGAATAATGGCAAACCGTGGGG
+AGTCTGTAAGGCCTGATAAGACGTTTTGACGTCGCCGTCAGGCGCAGTGATACAGCCGGA
+TACGGCAGAAGTTGCGTTATCCGGTCGATGGATCAGCCATGCAGGAGTTTTACGCCAGGG
+TCTGGAATACAAAAGAAATGAATTTATTGAAATCGCTGGCTGCCGTCAGCTCGATGACTA
+TGTTTTCACGCGTGTTGGGCTTTGCCCGTGATGCGATTGTCGCCAGAATTTTTGGCGCAG
+GGATGGCGACCGACGCCTTTTTTGTGGCGTTTAAACTTCCCAATCTACTACGCCGGATCT
+TTGCCGAAGGCGCTTTTTCTCAGGCCTTTGTGCCTATCCTGGCGGAATATAAGAGCAAGC
+AGGGTGAAGAAGCGACGCGGATCTTTGTCGCTTACGTTTCCGGCCTGTTGACGCTGGCAT
+TGGCCGTCGTGACGGTGGCCGGTATGCTGGCCGCCCCGTGGGTGATTATGGTAACCGCGC
+CGGGTTTTGCCGATACTGCGGATAAATTTGCGCTGACGACGCAACTGCTGCGGATTACGT
+TTCCCTATATTCTGCTGATCTCGCTGGCTTCACTGGTTGGCGCCATTCTCAACACCTGGA
+ATCGCTTCTCTATTCCCGCTTTTGCGCCGACATTTCTTAATATCAGTATGATCGGTTTTG
+CATTATTCGCCGCGCCATACTTTAATCCGCCGGTGCTGGCGTTAGCCTGGGCAGTCACCG
+TCGGCGGCGTGCTGCAACTGGTGTATCAACTTCCGTATTTGAAAAAGATCGGTATGCTGG
+TGCTGCCGCGCATTAACTTTCACGACACCGGGGCGATGCGGGTGGTCAAACAGATGGGGC
+CGGCGATTTTGGGCGTTTCCGTCAGTCAGATCTCCCTTATCATCAATACCATTTTCGCCT
+CGTTTCTGGCCTCCGGCTCGGTCTCATGGTTGTACTATGCCGATCGGTTGATGGAGTTCC
+CGTCCGGCGTGCTGGGCGTGGCGTTGGGGACCATCCTGTTGCCGTCATTGTCGAAAAGCT
+TTGCCAGCGGCAATCATGATGAGTACTGCCGCCTGATGGACTGGGGGCTGCGTTTGTGCT
+TTTTACTGGCGTTGCCGAGCGCGGTAGCGCTAGGCATTCTGGCGAAGCCGCTGACGGTCT
+CGCTGTTTCAGTACGGTAAATTCACCGCCTTTGATGCGGCGATGACGCAGCGGGCGTTAA
+TCGCCTATTCGGTGGGGCTGATTGGCTTGATCGTCGTAAAAGTGCTGGCCCCGGGCTTCT
+ATTCTCGCCAGGATATTAAAACGCCGGTGAAAATCGCCATCGTGACGTTAATCATGACGC
+AGTTAATGAACCTGGCGTTTATTGGACCGCTGAAACACGCCGGGCTGTCGCTCTCTATTG
+GTCTGGCGGCATGTCTCAATGCGTCGCTGCTGTACTGGCAACTGCGCAAACAGAATATCT
+TTACGCCACAACCGGGGTGGATGTGGTTCCTGATGCGTCTGATCATTTCCGTACTGGTAA
+TGGCCGCCGTGTTGTTCGGCGTGTTGCATATTATGCCGGAGTGGTCGCAAGGGTCGATGC
+TATGGCGTTTGCTGCGTTTGATGGCGGTAGTGATCGCGGGTATCGCGGCCTATTTCGCCG
+CGCTTGCCGTGCTGGGCTTTAAAGTGAAAGAGTTTGTTCGCCGGACGGCGTAAATTCAGT
+GCCTGATAGCGCTGTGCTATCAGGCCTACAAGGCATTCAGGCCGGAAAGGCGCAACGTCG
+CCATCCGGCAATGATTAGATAGATATTTTTTTACCGCCGCGGTGAGAGACGGAAGTCTGA
+CCGTCAGCCCCGTACAAGGTCGGCTCCTGGTGAGGTTTCAGCACCTCCAGCGCCTGTTGA
+TTACGCTCGATTTGCCCTTCCAGCAGCCAGCCGTTGTGCTGGTTGAGGTCGCGCAGATGC
+TGCGTTTTTTCGGTAATCGCCTGCCAGCGCTCTGCAATGTCATCGTTTGCGCTACGCTGC
+GCGTTCTGCTCCAGACGGCGCTGTTGTTCCAGATAATCCAGCGTCGCCAGCAACGAGCTT
+TTTTCTTCTGTAATACGCTGTAGCTGGCTGCCGTTAATCTGGCCTACGGAAAGCTGTTGT
+TGCTCGGCGTCCATCACCGTCTTCAGGTCATTCAGGACGGTGGTCATCTGGTCAAGTATT
+TCTGACAAACGAGTCATACGCTTATTTATTCTGTAAGTAGCTCTGCGCCTCGCGAATGAG
+CGAGTCTGCTATTTTTCCCGTATCCATTTTTAACTCACCGTTACGGATAGCCGTTTTTAA
+TGCTTCGACGCGTTCCATATTAATGTCGCTGACGCCTGGCTGCATAAGCTTCGCTTGCGC
+GTCGCTTAACGTTACGCTGGCGCTCGTCGCGGCGGACGTTTTTTCCTGACGCGTTTTTTG
+TACCGGCGTGTCGCTGGTTTCGCGCGTCTGGACAGTGCTAACGGGTTTCAAAGGTGAGGT
+ACGGTCAATGCTCATTTATTTATCCTCATCGAGGGTTACGTTGTAGCGGCCAGCTACCAT
+CATGGTTGAATATCTCATCGGCAGCCGCGACAAAATCTTTACACAATTATAGGTTAATAA
+GAATATTCCCATCAGAATCGACGGTTCCACTCACGATTTGACCCGATGTCATGCGCACGC
+GCGCATTTTGCGCGACGGCGGCATTATTCATCGCCTGACCTTCCGCATTGACGCTAAACC
+CCTCGCCATTGGCGATGACCTGTACTCGTTGACCCGCTTTGACACGCCAGGCCTGACGTA
+TCATCGTAAGCTGTACCGGCTGCCCGGGAGCGAGATCGCGCAAACTGACGGCATCCTGAA
+TCTGACGGATATCCAGTACCGTCCGCGGCGGTAACTGATCCAGTCTGCCACGTTTTAGCG
+TGACGTTGGCCGGCGTCAATTTTCCGCCGCGCGCGACGGGCGCGGCTACGGCGACATAAT
+TGCCGGTCGCTTGCACATTCACCTGCAAATAACGTTTTTCATTGGCGCAGCGCGCCACCA
+CATTGACGTTGCCCCACAGCTTCGCGCTGCCCGTCATGCTGAAGGCTGGCTGCTCGCAGC
+TCGGTAGCAGATTGGGCGGTGAACGGAGCGTGACAACCACCTCGTCGCTGAAGCCAGCCA
+GACGCTGGGAAAACCACGTGGTCAGCTGGGCGTTGATGTCCTGCGCCATTGTCAGGGGGC
+TGAACAGCAAAGCCGCCACGGCGAATCCTCGTTTTAACGTTTGCATGGTACTTCCCCCTG
+GTTGATGTCATGACAGGATTCTACCCGTGTGAAGCAAGCATCAACGCAATAAATAGCGAC
+GCATTTTGCGTTTATTCCGGCGATAACGCGCGCGTGAAGGCATTTAAGCTGTCGGCTGAA
+TTTTGCCATTTGCGGAGGAGATATGCTCGACAGGCTCGATGCCGCCTTACGATTTCAGCA
+GGAAGCGCTAAATCTGCGCGCGCAACGTCAGGAAATATTAGCGGCGAATATCGCCAATGC
+CGATACGCCGGGGTATCAGGCGCGCGATATTGATTTTGCCAGTGAGTTAAAAAAAGTGAT
+GGTGCGCGGACGGGAAGAAACCGGCGGCGTCGCGTTGTCGTTGACTTCTTCTCGCCATAT
+TCCCGCCCAGGCGGTCTCTTCTCCCGCAGTGGATCTGCTTTACCGCGTACCCGATCAGCC
+TTCTTTGGATGGTAACACCGTAGATATGGACAGGGAACGTACGCAGTTTGCGGATAACAG
+TCTCAAATATCAGATGGGGCTTACCGTTCTGGGTAGCCAACTCAAAGGCATGATGAATGT
+GCTACAGGGAGGAAACTAATTCGTGGCGCTGTTAAACATTTTTGATATTGCCGGATCGGC
+GCTTGCCGCACAGTCCAAGCGGTTGAACGTTGCGGCCAGTAACCTTGCGAATGCGGATAG
+CGTCACCGGCCCGGACGGACAGCCTTATCGCGCCAAACAGGTGGTTTTTCAGGTGGACGC
+CGCGCCGGGTCAAGCCACTGGCGGGGTAAAGGTCGCCAGCGTGATTGAAAGTCAGGCACC
+GGAAAAGCTGGTTTATGAGCCAGGCAATCCGCTGGCGGACGCTAATGGTTACGTCAAAAT
+GCCCAACGTCGATGTGGTCGGCGAAATGGTCAACACGATGTCAGCCTCGCGCAGCTATCA
+GGCAAATATCGAAGTCCTGAATACCGTAAAAAGCATGATGCTTAAAACGCTGACATTAGG
+CCAGTAAAGGAGGCGCGTATGTCTATTGCCGTAAATATGAATGACCCGACCAACACGGGC
+GTCAAAACGACGACCGGCAGCGGGTCGATGACCGGAAGCAACGCTGCCGATCTGCAAAGC
+AGTTTCCTGACCTTACTGGTCGCGCAATTGAAGAACCAGGACCCGACTAACCCATTACAA
+AATAATGAGTTAACGACACAGTTGGCGCAAATCAGTACCGTGAGCGGCATTGAAAAACTG
+AATACGACGCTGGGGGCTATTTCCGGGCAAATCGATAATAGTCAGTCCCTACAGGCGACC
+ACGCTGATTGGACATGGCGTTATGGTGCCTGGCACCACAATTCTGGCGGGTAAAGGCGCG
+GAAGAAGGGGCCGTGACGTCCACGACGCCGTTTGGCGTGGAATTGCAACAGCCTGCGGAC
+AAAGTGACGGCAACCATTACCGATAAAGATGGCCGGGTGGTACGGACGCTGGAGATCGGT
+GAGTTGCGAGCCGGGGTACACACCTTTACCTGGGATGGTAAGCAAACGGACGGAACAACG
+GTACCGAATGGTTCTTACAACATTGCGATTACCGCCAGCAATGGCGGGACGCAACTGGTG
+GCGCAGCCGCTGCAATTCGCTCTGGTACAGGGCGTGACGAAGGGCAGTAACGGCAACCTG
+TTGGATCTGGGTACCTACGGCACCACCACACTCGACGAAGTTCGGCAAATAATCTAAGCC
+CTTACACTTATCAGGAGTCAGTCATGTCTTTTTCTCAAGCGGTTAGCGGCCTGAACGCTG
+CGGCCACCAACCTTGATGTTATCGGTAATAACATCGCCAACTCCGCCACCTATGGCTTTA
+AGTCCGGTACGGCATCATTTGCCGATATGTTCGCCGGTTCCAAAGTGGGGTTGGGCGTAA
+AAGTGGCGGGGATTACCCAGGATTTTACCGACGGTACGACAACGAACACCGGGCGCGGGC
+TGGATGTCGCGATTAGCCAGAACGGTTTTTTCCGCCTGGTAGACAGCAACGGTTCCGTGT
+TCTATAGCCGCAACGGCCAGTTCAAACTGGACGAGAACCGTAACCTGGTCAATATGCAGG
+GGATGCAGTTGACCGGCTATCCGGCCACCGGTACGCCGCCGACCATTCAGCAGGGGGCGA
+ATCCTGCGCCGATCACCATTCCGAACACGCTGATGGCGGCGAAATCGACCACCACCGCGT
+CAATGCAGATCAACCTGAACTCAACGGACCCTGTACCGTCTAAAACGCCCTTTAGCGTGA
+GTGATGCGGATTCGTATAACAAAAAAGGCACCGTCACCGTTTATGACAGCCAGGGTAATG
+CCCATGACATGAACGTCTATTTTGTGAAAACCAAAGATAATGAATGGGCTGTGTACACCC
+ATGACAGCAGCGATCCTGCAGCCACTGCGCCAACAACGGCGTCCACTACGCTGAAATTCA
+ATGAAAACGGGATTCTGGAGTCTGGCGGTACGGTGAACATCACCACCGGTACGATTAATG
+GCGCGACAGCGGCCACCTTCTCCCTCAGCTTCCTTAACTCCATGCAGCAGAACACCGGGG
+CTAATAACATCGTCGCCACCAATCAAAACGGCTATAAGCCTGGCGACCTGGTGAGCTACC
+AGATTAACAATGATGGCACCGTGGTTGGCAACTACTCCAACGAGCAGGAGCAGGTGCTGG
+GGCAGATTGTGCTGGCTAACTTCGCCAACAACGAAGGTCTGGCATCCCAGGGCGATAACG
+TCTGGGCGGCGACGCAGGCCTCCGGGGTAGCGCTGCTGGGGACTGCCGGTTCCGGCAACT
+TCGGTAAGCTGACGAACGGCGCGCTGGAAGCCTCTAACGTGGATTTGAGTAAAGAGCTGG
+TGAATATGATCGTCGCGCAGCGTAACTACCAGTCGAATGCGCAGACCATCAAAACCCAGG
+ACCAGATCCTCAATACGCTGGTTAACCTGCGCTAAGCGCCTGACGGGATAGCTTAATGGA
+TCACGCAATTTATACCGCCATGGGGGCGGCCAGCCAGACGCTTAACCAGCAGGCGGTAAC
+GGCCAGCAACCTGGCTAATGCCTCAACGCCGGGCTTTCGCGCGCAGCTTAACGCGCTACG
+CGCGGTGCCCGTTGATGGCCTCTCTTTAGCGACGCGCACGTTGGTTACGGCGTCGACGCC
+GGGGGCGGATATGACCCAGGGTCAGTTGGACTACACTTCCCGCCCGCTGGATGTTGCGTT
+ACAGCAGGACGGCTGGCTGGTGGTGCAAGCGGCGGATGGCGCTGAAGGATATACCCGTAA
+CGGGAATATCCAGGTGGGCCCGACCGGGCAGTTAACCATTCAGGGACATCCGGTTATCGG
+CGAAGGCGGCCCGATTACCGTTCCGGAAGGGTCGGAAATCACCATTGCGGCAGACGGCAC
+GATCTCCGCGCTCAATCCCGGCGACCCGCCAAACACGGTGGCGCCCGTTGGGCGGCTGAA
+GCTGGTCAAAGCGGAAGGCAATGAGGTGCAGCGGAGCGATGACGGTTTATTCCGCCTTAC
+CGCCGAGGCACAGGCTGAACGCGGGGCGGTACTGGCCGCCGACCCGTCAATTCGCATTAT
+GTCGGGCGTGCTGGAGGGCAGTAACGTCAAGCCGGTTGAAGCCATGACCGACATGATCGC
+CAACGCACGTCGTTTTGAAATGCAGATGAAGGTTATCACCAGCGTAGATGAGAACGAAGG
+GCGAGCTAACCAACTGCTGTCGATGAGTTAATACAGGACATTTTATGATCAGTTCATTAT
+GGATCGCCAAAACCGGTCTGGACGCGCAGCAAACCAATATGGATGTGATTGCCAATAACC
+TGGCAAACGTCAGCACCAATGGTTTTAAGCGTCAGCGCGCGGTATTTGAAGATCTGTTGT
+ATCAGACCATCCGCCAGCCGGGCGCGCAGTCGTCCGAGCAGACGACGCTGCCTTCCGGGC
+TGCAAATCGGTACCGGCGTGCGTCCGGTCGCCACGGAGCGCCTGCACAGTCAGGGGAACC
+TGTCGCAGACCAACAACAGTAAAGATGTGGCGATTAAAGGGCAGGGCTTTTTCCAGGTCA
+TGCTGCCGGACGGTACGTCTGCCTATACCCGCGACGGCTCTTTCCAGGTGGATCAGAATG
+GTCAACTGGTGACGGCGGGCGGTTTTCAGGTGCAGCCGGCAATCACCATTCCGGCCAACG
+CGTTAAGCATCACGATTGGCCGCGACGGCGTGGTCAGCGTTACCCAGCAAGGGCAGGCCG
+CGCCGGTTCAGGTCGGGCAGCTTAACCTGACCACCTTTATGAACGACACCGGTCTGGAAA
+GCATCGGCGAGAACCTCTATATCGAAACGCAATCGTCCGGCGCGCCGAACGAAAGCACGC
+CGGGGCTCAACGGCGCGGGGTTGTTGTATCAAGGGTATGTCGAAACGTCGAACGTTAACG
+TGGCGGAAGAGCTGGTGAACATGATTCAGGTTCAACGCGCCTATGAAATTAACAGTAAAG
+CAGTATCGACGACCGATCAGATGCTGCAGAAACTGACGCAACTCTAAGGGGCCGCCGGTG
+GGGGATACGCCACCGGCTCCCTGATTTTGAAGATGAAGGTAATGCAAAAATACGCGCTTC
+ACGCTTACCCAGTTATGGCCCTGATGGTCGCGACGCTGACAGGATGCGCCTGGATACCCG
+CTAAACCGCTCGTGCAGGGGGCGACCACGGCGCAGCCGATACCTGGCCCGGTACCGGTGG
+CGAATGGCTCCATATTTCAGTCTGCGCAGCCGATTAATTATGGCTATCAGCCGCTTTTTG
+AAGATCGTCGACCGCGTAATATCGGCGATACGCTCACGATTGTGTTACAGGAAAACGTCA
+GCGCCAGTAAAAGCTCGTCGGCAAATGCCAGCCGCGACGGCAAAACCAGCTTTGGTTTTG
+ATACGGTACCGCGTTATCTGCAGGGATTATTCGGTAATTCCCGCGCGGATATGGAGGCCT
+CCGGCGGCAACTCTTTTAATGGTAAAGGCGGCGCGAATGCCAGCAATACCTTTAGCGGCA
+CGCTGACCGTGACCGTCGATCAGGTTCTGGCCAATGGCAATTTACACGTCGTGGGGGAAA
+AACAGATCGCGATTAATCAGGGAACGGAATTCATCCGCTTCTCCGGCGTGGTAAATCCAC
+GCACCATCAGCGGTAGCAACTCTGTTCCCTCGACACAGGTGGCGGATGCGCGGATTGAAT
+ATGTCGGGAACGGCTATATTAACGAAGCGCAAAATATGGGCTGGCTGCAACGTTTCTTCC
+TTAATTTGTCGCCGATGTAAGCGAGGTGTATGTGTTTAAAGCTCTTGCAGGAATCGTTCT
+GGCACTGGTTGCCACTCTGGCGCACGCCGAGCGTATCCGGGATCTGACCAGTGTCCAGGG
+AGTACGGGAAAACTCGCTGATCGGCTACGGGCTGGTGGTCGGGCTGGACGGTACGGGCGA
+CCAGACGACCCAGACGCCATTTACCACCCAGACGCTGAATAACATGCTGTCACAACTGGG
+GATTACGGTCCCCACCGGCACCAATATGCAGTTGAAAAACGTGGCGGCGGTGATGGTGAC
+GGCGTCGTATCCGCCTTTTGCGCGACAGGGACAAACGATCGATGTCGTCGTTTCCTCAAT
+GGGGAACGCTAAAAGTCTGCGTGGCGGGACGTTATTAATGACGCCGTTAAAAGGGGTGGA
+CAGCCAGGTGTATGCTCTGGCGCAGGGCAATATTCTGGTCGGCGGCGCGGGCGCTTCCGC
+AGGCGGCAGTAGCGTGCAGGTTAACCAGCTTAATGGCGGGCGCATCACTAATGGCGCGAT
+TATCGAACGCGAGTTGCCGACTCAGTTCGGCGCTGGCAACACCATTAATCTGCAATTGAA
+CGACGAAGATTTTACGATGGCGCAGCAAATTACCGACGCCATCAACCGCGCCCGCGGTTA
+CGGCAGCGCCACTGCGCTTGATGCGCGAACGGTACAGGTACGCGTGCCCAGCGGCAACAG
+CTCGCAGGTGCGTTTTCTGGCGGACATTCAAAATATGGAAGTCAACGTGACGCCGCAGGA
+TGCAAAAGTCGTGATCAACTCGCGTACCGGTTCGGTGGTCATGAATCGGGAAGTCACGCT
+GGATAGCTGCGCTGTGGCGCAGGGCAATTTGTCAGTGACAGTCAATCGCCAACTCAACGT
+CAACCAGCCGAATACGCCATTTGGCGGCGGGCAGACCGTGGTGACGCCACAGACTCAGAT
+AGATTTGCGTCAGAGCGGCGGATCGCTACAGAGCGTGCGTTCCAGCGCCAATCTGAACAG
+CGTAGTGCGCGCGCTGAATGCGCTTGGCGCGACGCCGATGGATCTGATGTCGATTTTGCA
+GTCCATGCAGAGCGCGGGCTGTCTACGCGCCAAACTGGAAATCATCTGATGATCGGAGAC
+GGTAAATTGCTGGCCAGCGCGGCCTGGGATGCGCAATCTCTGAACGAACTGAAAGCGAAA
+GCGGGCCAGGACCCGGCGGCGAATATCCGTCCTGTGGCCCGTCAGGTGGAAGGGATGTTT
+GTGCAGATGATGCTGAAAAGTATGCGCGAGGCTTTACCCAAAGATGGTTTATTCAGCAGC
+GATCAGACGCGTCTGTATACCAGCATGTATGACCAGCAGATCGCCCAGCAGATGACCGCC
+GGTAAGGGATTGGGGCTGGCGGATATGATGGTTAAACAGATGACGGGCGGGCAGACGATG
+CCTGCAGATGATGCGCCGCAAGTACCGCTTAAATTCTCCCTGGAGACGGTAAACAGCTAT
+CAAAATCAGGCGCTGACCCAACTGGTGCGCAAAGCCATACCGAAAACGCCGGACAGCAGC
+GATGCGCCGCTCTCCGGCGACAGTAAAGACTTTCTGGCCCGGCTTTCGCTCCCGGCGAGG
+CTGGCCAGCGAACAAAGCGGGGTGCCGCATCATCTGATTCTGGCGCAGGCGGCGCTGGAG
+TCCGGCTGGGGGCAGCGGCAAATCCTGCGGGAGAATGGCGAACCCAGCTATAACGTATTT
+GGCGTGAAAGCGACCGCCAGTTGGAAAGGGCCGGTGACGGAAATCACCACCACTGAATAC
+GAAAATGGCGAAGCGAAAAAAGTGAAAGCGAAATTCCGCGTCTATAGCTCGTATCTGGAG
+GCGTTATCGGATTATGTCGCGCTGTTAACGCGTAACCCACGCTACGCTGCCGTGACCACT
+GCCGCCACGGCAGAGCAGGGCGCAGTGGCTCTGCAAAACGCCGGATACGCCACTGACCCG
+AATTACGCGCGTAAATTGGCCAGCATGATTCAGCAGTTGAAAGCGATGAGTGAAAAGGTC
+AGCAAAACCTACAGCGCGAATCTCGACAATCTCTTTTAAATTGCTCAAGTCCACGTAGTC
+GCTGCCGATAACAACGAGTATTGAAGGATTAAAAGGAACCATCATGTCCAGCTTGATTAA
+TCACGCCATGAGCGGACTTAACGCCGCGCAGGCCGCGTTAAATACGGTCAGTAATAACAT
+CAACAATTATAACGTTGCGGGTTATACCCGGCAGACAACTATTCTGGCGCAGGCAAACAG
+TACGTTAGGGGCTGGCGGCTGGATAGGTAATGGCGTTTACGTTTCAGGCGTACAGCGCGA
+ATATGATGCGTTTATCACTAATCAGCTACGCGGCGCGCAAAACCAGAGCAGCGGCTTAAC
+CACGCGCTATGAACAAATGTCGAAAATCGACAACCTGCTGGCCGATAAATCCAGCTCACT
+GTCTGGCTCGCTGCAGAGTTTTTTTACCAGCCTGCAAACGTTAGTCAGTAATGCGGAAGA
+TCCTGCGGCGCGTCAGGCGCTGATTGGTAAAGCGGAAGGGCTGGTAAACCAGTTCAAAAC
+CACCGATCAGTATCTGCGCGATCAGGATAAACAGGTCAATATCGCGATTGGCTCCAGCGT
+GGCGCAAATCAACAATTACGCGAAGCAGATAGCTAACCTGAACGATCAAATCTCCCGTAT
+GACGGGCGTAGGCGCGGGCGCATCGCCGAACGACCTGCTCGATCAACGTGATCAGTTGGT
+TAGCGAGCTTAACAAGATCGTTGGCGTCGAGGTGAGTGTACAGGACGGCGGCACCTATAA
+CCTGACGATGGCCAATGGCTATACGCTGGTGCAGGGGTCGACGGCGCGTCAGTTGGCGGC
+GGTTCCCTCCAGCGCCGACCCGACGCGAACGACTGTCGCTTATGTCGATGAGGCCGCCGG
+TAACATCGAAATTCCGGAAAAGTTGCTGAACACCGGTTCGCTCGGCGGGCTACTGACGTT
+CCGTTCTCAGGATCTGGATCAGACTCGTAATACGCTGGGCCAGTTGGCGTTGGCGTTTGC
+CGATGCGTTTAACGCGCAGCATACCAAAGGTTATGACGCCGACGGCAATAAAGGGAAAGA
+CTTCTTTAGCATTGGCTCGCCGGTGGTATATAGCAACAGTAATAATGCCGATAAAACGGT
+ATCGCTAACCGCTAAGGTGGTCGACAGCACGAAGGTTCAGGCGACGGATTATAAGATTGT
+TTTTGACGGTACAGACTGGCAGGTTACTCGCACTGCGGATAACACCACCTTCACGGCAAC
+AAAAGATGCTGACGGAAAACTGGAGATTGACGGTCTGAAAGTGACGGTAGGGACTGGCGC
+ACAGAAAAACGACAGTTTTCTTCTCAAGCCGGTCAGCAATGCTATCGTCGACATGAACGT
+TAAAGTGACAAATGAAGCCGAGATTGCGATGGCGTCTGAGTCAAAACTCGATCCTGATGT
+GGATACCGGCGACAGCGATAACCGCAATGGTCAGGCATTGCTGGACTTACAAAACAGCAA
+TGTAGTGGGCGGCAACAAAACCTTTAACGATGCTTACGCCACGTTGGTCAGCGATGTGGG
+TAACAAAACGTCAACGCTGAAAACCAGCAGCACCACGCAGGCGAATGTGGTTAAACAGCT
+TTATAAACAGCAACAGTCGGTTTCCGGCGTTAACCTCGACGAAGAGTACGGCAATTTGCA
+GCGTTATCAGCAGTATTATCTGGCGAATGCGCAAGTATTGCAGACCGCGAATGCGCTGTT
+TGATGCGTTATTGAATATTCGCTAAAGGAGAAGGATGACATGCGTATCAGTACCCAGATG
+ATGTACGAACAAAATATGAGCGGCATCACTAATTCTCAGGCCGAATGGATGAAGCTGGGC
+GAGCAGATGTCTACCGGTAAGCGCGTTACCAACCCATCTGACGATCCGATCGCCGCGTCG
+CAGGCGGTAGTACTCTTTCAGGCGCAGGCGCAGAATAGCCAGTACGCCCTGGCGCGTACG
+TTTGCCACCCAAAAAGTGTCGCTGGAAGAAAGCGTACTCAGTCAGGTGACGACGGCGATT
+CAAACCGCGCAGGAAAAAATCGTCTATGCCGGAAACGGCACGTTAAGCGACGATGACCGC
+GCGTCGCTGGCGACGGATTTACAGGGGATCCGCGATCAGCTGATGAACCTGGCAAACAGC
+ACTGACGGCAATGGTCGCTATATCTTTGCCGGGTATAAAACGGAAGCGGCGCCATTCGAC
+CAGGCGACAGGTGGTTATCATGGCGGCGAGAAAAGTGTTACCCAGCAGGTGGATTCCGCA
+CGCACGATGGTAATTGGCCATACGGGAGCGCAAATTTTTAATAGCATCACCAGCAATGCG
+GTGCCGGAACCGGATGGCTCGGACTCCGAAAAGAATCTGTTTGTCATGCTCGATACGGCA
+ATTGCCGCGCTCAAGACCCCGGTGGAAGGCAATGACGTGGAAAAAGAAAAAGCCGCTGCC
+GCCATTGATAAAACCAATCGCGGCTTAAAAAATTCGCTTAATAACGTCCTGACCGTTCGT
+GCGGAACTGGGAACGCAACTGAGCGAACTCAGTACGCTGGATTCACTGGGAAGCGACCGT
+GCGCTGGGACAGAAGCTACAGATGAGCAACCTGGTAGATGTGGACTGGAACTCGGTCATT
+TCCTCCTACGTCATGCAACAGGCGGCATTACAGGCGTCCTATAAAACGTTTACCGACATG
+CAGGGAATGTCGCTTTTCCAGTTGAACCGGTAACGCCTCTTTTTGAAACATATCACGAAA
+CTGGATATGTTTTGTCTGCCCGCGCCATCCACCCCGGCGCGGGCATTTTTTGTCTATGGA
+AAACCCCCAGCTAGGCTGGGGGTTCCGGAAAGCTTTCAGCTTTAAGCCAGTTATTAAAAC
+CCCTTTTGATTTGTTAAAACATCTTGCGGTCTGGC
\ No newline at end of file
diff --git a/t/data/real_data_2.gff b/t/data/real_data_2.gff
new file mode 100644
index 0000000..100910e
--- /dev/null
+++ b/t/data/real_data_2.gff
@@ -0,0 +1,1641 @@
+##gff-version 3
+##sequence-region ERS222222|SC|contig000020 1 92255
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	241	921	.	-	0	ID=22222_2#22_04055;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002145956.2,protein motif:Pfam:PF07108.5;locus_tag=22222_2#22_04055;product=pathogenicity island-encoded protein A,PipA protein;protein_id=gnl|SC|22222_2#22_04055
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	1144	2019	.	-	0	ID=22222_2#22_04056;gene=pipB2_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002145957.1,similar to AA sequence:UniProtKB:Q8ZMM8,protein motif:CLUSTERS:PRK15197,protein motif:Cdd:COG5351,protein motif:Pfam:PF00805.16;locus_tag=22222_2#22_04056;product=secreted effector protein,Type III effector pipB2,secreted effector protein PipB,Uncharacterized protein conserved in bacteria,Pentapeptide repeat [...]
+ERS222222|SC|contig000020	Infernal:1.1	ncRNA	2139	2237	.	+	0	ID=22222_2#22_04057;inference=COORDINATES:profile:Infernal:1.1;locus_tag=22222_2#22_04057;product=isrI
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	2567	2908	.	-	0	ID=22222_2#22_04058;gene=sigE;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002145959.1,similar to AA sequence:UniProtKB:O30917,protein motif:CLUSTERS:PRK15202,protein motif:Pfam:PF07824.6;locus_tag=22222_2#22_04058;product=chaperone protein SigE,Chaperone protein sigE,type III secretion chaperone protein SigE,Type III secretion chaperone domain;protein_id=gnl|SC|22222_2#22_04058
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	2925	4610	.	-	0	ID=22222_2#22_04059;eC_number=3.1.3.-,3.1.3.-;gene=sopB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002145960.1,similar to AA sequence:UniProtKB:O30916,protein motif:CLUSTERS:PRK15378,protein motif:Pfam:PF05925.6;locus_tag=22222_2#22_04059;product=inositol phosphate phosphatase SopB,Inositol phosphate phosphatase sopB,inositol phosphate phosphatase SopB,Enterobacterial virulence protein IpgD;pro [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	5332	6801	.	-	0	ID=22222_2#22_04060;eC_number=3.4.-.-;gene=pepD_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002215025.1,similar to AA sequence:UniProtKB:Q8G6Z9,protein motif:Pfam:PF03577.9;locus_tag=22222_2#22_04060;product=peptidase family C69,Dipeptidase,Peptidase family C69;protein_id=gnl|SC|22222_2#22_04060
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	6974	8338	.	-	0	ID=22222_2#22_04061;eC_number=2.7.13.3;gene=yedV;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002145964.1,similar to AA sequence:UniProtKB:P76339,protein motif:CLUSTERS:PRK09835,protein motif:Cdd:COG5278,protein motif:TIGRFAMs:TIGR01386,protein motif:Pfam:PF02518.20;locus_tag=22222_2#22_04061;product=heavy metal sensor kinase subfamily,Probable sensor-like histidine kinase YedV,sensor kinase CusS [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	8331	9068	.	-	0	ID=22222_2#22_04062;gene=copR;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002243090.1,similar to AA sequence:UniProtKB:P76340,protein motif:CLUSTERS:PRK11517,protein motif:Cdd:COG4565,protein motif:TIGRFAMs:TIGR01387,protein motif:Pfam:PF00072.18;locus_tag=22222_2#22_04062;product=transcriptional regulatory protein YedW,Probable transcriptional regulatory protein YedW,transcriptional regulatory  [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	9147	9557	.	+	0	ID=22222_2#22_04063;eC_number=3.5.2.17,3.5.2.17;gene=uraH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002145966.1,similar to AA sequence:UniProtKB:Q4VYA5,protein motif:CLUSTERS:PRK15036,protein motif:TIGRFAMs:TIGR02962,protein motif:Pfam:PF00576.15;locus_tag=22222_2#22_04063;product=hydroxyisourate hydrolase,5-hydroxyisourate hydrolase precursor,hydroxyisourate hydrolase,hydroxyisourate hydrolas [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	9890	10402	.	-	0	ID=22222_2#22_04064;eC_number=1.5.1.36;gene=hpaC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729827.1,similar to AA sequence:UniProtKB:Q57501,protein motif:CLUSTERS:PRK15486,protein motif:TIGRFAMs:TIGR02296,protein motif:Pfam:PF01613.12;locus_tag=22222_2#22_04064;product=4-hydroxyphenylacetate 3-monooxygenase coupling protein,4-hydroxyphenylacetate 3-monooxygenase reductase component,4-hydro [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	10420	11652	.	-	0	ID=22222_2#22_04065;eC_number=1.14.14.9;gene=hpaB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002145968.1,similar to AA sequence:UniProtKB:Q57160,protein motif:TIGRFAMs:TIGR02310,protein motif:Pfam:PF03241.7;locus_tag=22222_2#22_04065;product=4-hydroxyphenylacetate 3-monooxygenase%2C oxygenase component,4-hydroxyphenylacetate 3-monooxygenase oxygenase component,4-hydroxyphenylacetate 3-monooxy [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	11833	12273	.	-	0	ID=22222_2#22_04066;gene=hpaR;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002145969.1,similar to AA sequence:UniProtKB:O07458,protein motif:TIGRFAMs:TIGR02337,protein motif:Pfam:PF01047.16;locus_tag=22222_2#22_04066;product=homoprotocatechuate degradation operon regulator%2C HpaR,Benzoate anaerobic degradation regulator,homoprotocatechuate degradation operon regulator%2C HpaR,MarR family;prote [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	12548	13837	.	+	0	ID=22222_2#22_04067;eC_number=5.3.3.-;gene=hpaG;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729830.1,similar to AA sequence:UniProtKB:P37352,protein motif:CLUSTERS:PRK15203,protein motif:Cdd:COG0179,protein motif:TIGRFAMs:TIGR02303,protein motif:Pfam:PF01557.12;locus_tag=22222_2#22_04067;product=4-hydroxyphenylacetate degradation bifunctional isomerase/decarboxylase,Homoprotocatechuate cata [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	13834	14133	.	+	0	ID=22222_2#22_04068;eC_number=1.2.1.8;gene=hpcC_1;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729831.1,similar to AA sequence:UniProtKB:Q9HTJ1,protein motif:CLUSTERS:PRK13252,protein motif:Cdd:COG3191,protein motif:TIGRFAMs:TIGR02299,protein motif:Pfam:PF00171.1;locus_tag=22222_2#22_04068;product=5-carboxymethyl-2-hydroxymuconate semialdehyde dehydrogenase,Betaine aldehyde dehydrogenase,bet [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	14127	15299	.	+	0	ID=22222_2#22_04069;eC_number=1.2.1.-;gene=hpcC_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729831.1,similar to AA sequence:UniProtKB:P23105,protein motif:CLUSTERS:PRK13252,protein motif:Cdd:COG4230,protein motif:TIGRFAMs:TIGR02299,protein motif:Pfam:PF00171.1;locus_tag=22222_2#22_04069;product=5-carboxymethyl-2-hydroxymuconate semialdehyde dehydrogenase,2-hydroxymuconic semialdehyde dehy [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	15301	16152	.	+	0	ID=22222_2#22_04070;eC_number=1.13.11.15,1.13.11.15;gene=hpcB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729832.1,similar to AA sequence:UniProtKB:Q05353,protein motif:CLUSTERS:PRK03881,protein motif:Cdd:COG0179,protein motif:TIGRFAMs:TIGR02298,protein motif:Pfam:PF02900.1;locus_tag=22222_2#22_04070;product=3%2C4-dihydroxyphenylacetate 2%2C3-dioxygenase,3%2C4-dihydroxyphenylacetate 2%2C3-d [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	16162	16542	.	+	0	ID=22222_2#22_04071;eC_number=5.3.3.10,5.3.3.10;gene=hpcD;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729833.1,similar to AA sequence:UniProtKB:Q05354,protein motif:CLUSTERS:PRK15031,protein motif:Cdd:COG3232,protein motif:Pfam:PF02962.1;locus_tag=22222_2#22_04071;product=5-carboxymethyl-2-hydroxymuconate delta-isomerase,5-carboxymethyl-2-hydroxymuconate Delta-isomerase,5-carboxymethyl-2-hy [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	16686	17489	.	+	0	ID=22222_2#22_04072;eC_number=4.2.-.-,4.2.1.80;gene=hpcG;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729834.1,similar to AA sequence:UniProtKB:Q9S156,protein motif:CLUSTERS:PRK11342,protein motif:Cdd:COG3971,protein motif:TIGRFAMs:TIGR02312,protein motif:Pfam:PF01557.12;locus_tag=22222_2#22_04072;product=2-oxo-hepta-3-ene-1%2C7-dioic acid hydratase,2-keto-4-pentenoate hydratase,2-keto-4-pen [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	17500	18291	.	+	0	ID=22222_2#22_04073;eC_number=4.1.2.-;gene=hpaI;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_216043.1,similar to AA sequence:UniProtKB:Q47098,protein motif:CLUSTERS:PRK10128,protein motif:Cdd:COG2301,protein motif:TIGRFAMs:TIGR02311,protein motif:Pfam:PF03328.8;locus_tag=22222_2#22_04073;product=4-hydroxyphenylacetate catabolism,4-hydroxy-2-oxo-heptane-1%2C7-dioate aldolase,2-keto-3-deoxy-L-rha [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	18363	19739	.	+	0	ID=22222_2#22_04074;gene=hpaX;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729836.1,similar to AA sequence:UniProtKB:P76470,protein motif:CLUSTERS:PRK11551,protein motif:Cdd:COG2814,protein motif:TIGRFAMs:TIGR02332,protein motif:Pfam:PF07690.10;locus_tag=22222_2#22_04074;product=4-hydroxyphenylacetate permease,Inner membrane transport protein RhmT,putative 3-hydroxyphenylpropionic transporte [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	19749	20645	.	+	0	ID=22222_2#22_04075;gene=hpaA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729837.1,similar to AA sequence:UniProtKB:P40408,protein motif:CLUSTERS:PRK10572,protein motif:Cdd:COG2169,protein motif:TIGRFAMs:TIGR02297,protein motif:Pfam:PF12833.1;locus_tag=22222_2#22_04075;product=4-hydroxyphenylacetate 3-monooxygenase operon regulatory protein,Bacillibactin transport regulator,DNA-binding tran [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	20659	21597	.	+	0	ID=22222_2#22_04076;eC_number=3.1.26.11,3.1.26.11;gene=rnz;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_006087376.1,similar to AA sequence:UniProtKB:P54548,protein motif:CLUSTERS:PRK00055,protein motif:Cdd:COG5212,protein motif:TIGRFAMs:TIGR02651,protein motif:Pfam:PF12706.1;locus_tag=22222_2#22_04076;product=Ribonuclease Z,Ribonuclease Z,ribonuclease Z,Low-affinity cAMP phosphodiesterase,ribon [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	23021	23326	.	-	0	ID=22222_2#22_04077;gene=cbpM;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570915.1,similar to AA sequence:UniProtKB:P63264,protein motif:CLUSTERS:PRK10265;locus_tag=22222_2#22_04077;product=chaperone-modulator protein CbpM,Chaperone modulatory protein CbpM,chaperone-modulator protein CbpM;protein_id=gnl|SC|22222_2#22_04077
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	23326	24246	.	-	0	ID=22222_2#22_04078;gene=cbpA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729840.1,similar to AA sequence:UniProtKB:P36659,protein motif:CLUSTERS:PRK10266,protein motif:Cdd:COG5407,protein motif:TIGRFAMs:TIGR02349,protein motif:Pfam:PF01556.12;locus_tag=22222_2#22_04078;product=curved DNA-binding protein,Curved DNA-binding protein,curved DNA-binding protein CbpA,Preprotein translocase subun [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	24482	24844	.	+	0	ID=22222_2#22_04079;gene=scsA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729841.1;locus_tag=22222_2#22_04079;product=copper-sensitivity suppressor membrane protein A;protein_id=gnl|SC|22222_2#22_04079
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	24893	26779	.	+	0	ID=22222_2#22_04080;eC_number=1.8.1.8;gene=scsB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729842.1,similar to AA sequence:UniProtKB:P36655,protein motif:CLUSTERS:PRK00293,protein motif:Cdd:COG4233,protein motif:Pfam:PF02683.9;locus_tag=22222_2#22_04080;product=copper-sensitivity suppressor membrane protein B,Thiol:disulfide interchange protein DsbD precursor,thiol:disulfide interchange pr [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	26776	27399	.	+	0	ID=22222_2#22_04081;gene=scsC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729843.1,similar to AA sequence:UniProtKB:O32218,protein motif:Cdd:COG1651,protein motif:Pfam:PF01323.14;locus_tag=22222_2#22_04081;product=copper-sensitivity secreted suppressor protein C,Thiol-disulfide oxidoreductase D,Protein-disulfide isomerase,DSBA-like thioredoxin domain;protein_id=gnl|SC|22222_2#22_04081
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	27389	27895	.	+	0	ID=22222_2#22_04082;gene=resA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002145985.1,similar to AA sequence:UniProtKB:P35160,protein motif:CLUSTERS:PRK03147,protein motif:TIGRFAMs:TIGR00385,protein motif:Pfam:PF08534.4;locus_tag=22222_2#22_04082;product=suppressor for copper-sensitivity D,Thiol-disulfide oxidoreductase resA,thiol-disulfide oxidoreductase,periplasmic protein thiol:disulfide ox [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	28028	29269	.	+	0	ID=22222_2#22_04083;eC_number=3.1.3.10;gene=agp;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570908.1,similar to AA sequence:UniProtKB:P19926,protein motif:CLUSTERS:PRK10173,protein motif:Pfam:PF00328.1;locus_tag=22222_2#22_04083;product=glucose-1-phosphatase/inositol phosphatase,Glucose-1-phosphatase precursor,glucose-1-phosphatase/inositol phosphatase,Histidine phosphatase superfamily (bra [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	29303	29530	.	-	0	ID=22222_2#22_04084;gene=yccJ;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_006087385.1,similar to AA sequence:UniProtKB:P0AB14,protein motif:CLUSTERS:PRK10174;locus_tag=22222_2#22_04084;product=YccJ-like protein,hypothetical protein,hypothetical protein;protein_id=gnl|SC|22222_2#22_04084
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	29551	30147	.	-	0	ID=22222_2#22_04085;gene=wrbA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729847.1,similar to AA sequence:UniProtKB:P0A8G6,protein motif:CLUSTERS:PRK03767,protein motif:TIGRFAMs:TIGR01755,protein motif:Pfam:PF03358.9;locus_tag=22222_2#22_04085;product=trp repressor binding protein,Trp repressor-binding protein,NAD(P)H:quinone oxidoreductase,NAD(P)H:quinone oxidoreductase%2C type IV,NADPH-de [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	30532	30699	.	+	0	ID=22222_2#22_04086;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_006087756.1,protein motif:Pfam:PF10685.3;locus_tag=22222_2#22_04086;product=Conidiation-specific protein 10,Stress-induced bacterial acidophilic repeat motif;protein_id=gnl|SC|22222_2#22_04086
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	30836	31474	.	+	0	ID=22222_2#22_04087;gene=rutR;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729849.1,similar to AA sequence:UniProtKB:P0ACU2,protein motif:CLUSTERS:PRK15008,protein motif:TIGRFAMs:TIGR03613,protein motif:Pfam:PF08362.5;locus_tag=22222_2#22_04087;product=transcriptional regulator,Rut operon repressor,HTH-type transcriptional regulator RutR,pyrimidine utilization regulatory protein R,YcdC-like  [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	31471	31866	.	-	0	ID=22222_2#22_04088;inference=ab initio prediction:Prodigal:2.60,protein motif:Cdd:COG3755,protein motif:Pfam:PF07007.6;locus_tag=22222_2#22_04088;product=Uncharacterized protein conserved in bacteria,Protein of unknown function (DUF1311);protein_id=gnl|SC|22222_2#22_04088
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	31925	35887	.	-	0	ID=22222_2#22_04089;gene=putA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002243116.1,similar to AA sequence:UniProtKB:P09546,protein motif:CLUSTERS:PRK11809,protein motif:Cdd:COG4230,protein motif:TIGRFAMs:TIGR01238,protein motif:Pfam:PF01619.12;locus_tag=22222_2#22_04089;product=trifunctional transcriptional regulator/proline dehydrogenase/pyrroline-5-carboxylate dehydrogenase,Bifunctional p [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	36309	37817	.	+	0	ID=22222_2#22_04090;gene=putP;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002145994.1,similar to AA sequence:UniProtKB:P07117,protein motif:CLUSTERS:PRK15419,protein motif:Cdd:COG4145,protein motif:TIGRFAMs:TIGR02121,protein motif:Pfam:PF00474.11;locus_tag=22222_2#22_04090;product=sodium/proline symporter,Propionate transporter,proline:sodium symporter PutP,Na+/panthothenate symporter,sodium/p [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	38435	38731	.	+	0	ID=22222_2#22_04091;inference=ab initio prediction:Prodigal:2.60;locus_tag=22222_2#22_04091;product=hypothetical protein;protein_id=gnl|SC|22222_2#22_04091
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	38710	39498	.	+	0	ID=22222_2#22_04092;gene=phoH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729855.1,similar to AA sequence:UniProtKB:P0A9K1,protein motif:CLUSTERS:PRK10536,protein motif:Cdd:COG1875,protein motif:Pfam:PF02562.10;locus_tag=22222_2#22_04092;product=phosphate starvation-inducible protein PsiH,Phosphate starvation-inducible protein psiH,hypothetical protein,Predicted ATPase related to phosphate  [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	39604	40485	.	-	0	ID=22222_2#22_04093;gene=ybbH_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570898.1,similar to AA sequence:UniProtKB:Q45581,protein motif:CLUSTERS:PRK11337,protein motif:Cdd:COG2222,protein motif:TIGRFAMs:TIGR03127,protein motif:Pfam:PF01418.11;locus_tag=22222_2#22_04093;product=putative transcriptional regulator,Uncharacterized HTH-type transcriptional regulator ybbH,DNA-binding transcrip [...]
+ERS222222|SC|contig000020	Infernal:1.1	ncRNA	40486	40770	.	+	0	ID=22222_2#22_04094;inference=COORDINATES:profile:Infernal:1.1;locus_tag=22222_2#22_04094;product=STnc500
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	40771	41673	.	-	0	ID=22222_2#22_04095;gene=yidK;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002215057.1,similar to AA sequence:UniProtKB:P31448,protein motif:CLUSTERS:PRK10484,protein motif:Cdd:COG4146,protein motif:TIGRFAMs:TIGR00813,protein motif:Pfam:PF00474.11;locus_tag=22222_2#22_04095;product=sodium-glucose/galactose cotransporter,Uncharacterized symporter yidK,putative transporter,Predicted symporter,tra [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	41741	42268	.	-	0	ID=22222_2#22_04096;gene=sglT;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002215057.1,similar to AA sequence:UniProtKB:P96169,protein motif:CLUSTERS:PRK10484,protein motif:Cdd:COG4146,protein motif:TIGRFAMs:TIGR00813,protein motif:Pfam:PF00474.11;locus_tag=22222_2#22_04096;product=sodium-glucose/galactose cotransporter,Na(+)/glucose symporter,putative transporter,Predicted symporter,transporte [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	42605	43285	.	-	0	ID=22222_2#22_04097;eC_number=5.1.3.9;gene=nanE;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570896.2,similar to AA sequence:UniProtKB:P60668,protein motif:CLUSTERS:PRK01130,protein motif:Pfam:PF04131.8;locus_tag=22222_2#22_04097;product=N-acetylmannosamine-6-phosphate 2-epimerase,Putative N-acetylmannosamine-6-phosphate 2-epimerase,N-acetylmannosamine-6-phosphate 2-epimerase,Putative N-acet [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	43804	44946	.	+	0	ID=22222_2#22_04098;eC_number=5.1.3.24;gene=nanM;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570895.1,similar to AA sequence:UniProtKB:P44544,protein motif:CLUSTERS:PRK14131,protein motif:TIGRFAMs:TIGR03547,protein motif:Pfam:PF01344.19;locus_tag=22222_2#22_04098;product=N-acetylneuraminic acid mutarotase,N-acetylneuraminate epimerase precursor,N-acetylneuraminic acid mutarotase,mutatrotase [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	44992	45684	.	+	0	ID=22222_2#22_04099;gene=yiiy;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_216068.1,similar to AA sequence:UniProtKB:Q934G3,protein motif:CLUSTERS:PRK09980,protein motif:Cdd:COG3203,protein motif:Pfam:PF06178.7;locus_tag=22222_2#22_04099;product=outer membrane protein,Oligogalacturonate-specific porin kdgM precursor,outer membrane porin L,Outer membrane protein (porin),Oligogalacturonate-specif [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	45967	47247	.	+	0	ID=22222_2#22_04100;gene=nanT_3;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570893.1,similar to AA sequence:UniProtKB:P41036,protein motif:CLUSTERS:PRK12307,protein motif:Cdd:COG2814,protein motif:TIGRFAMs:TIGR00891,protein motif:Pfam:PF00083.18;locus_tag=22222_2#22_04100;product=putative sialic acid transporter,Sialic acid permease,putative sialic acid transporter,Arabinose efflux permease [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	47261	48364	.	+	0	ID=22222_2#22_04101;eC_number=1.1.1.292;gene=yjhC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_216069.1,similar to AA sequence:UniProtKB:Q2I8V6,protein motif:CLUSTERS:PRK11579,protein motif:Pfam:PF01408.16;locus_tag=22222_2#22_04101;product=dehydrogenase-like protein,1%2C5-anhydro-D-fructose reductase,putative oxidoreductase,Oxidoreductase family%2C NAD-binding Rossmann fold;protein_id=gnl|SC|2 [...]
+ERS222222|SC|contig000020	Aragorn:1.2.36	tRNA	48701	48788	.	-	0	ID=22222_2#22_04102;inference=COORDINATES:profile:Aragorn:1.2.36;locus_tag=22222_2#22_04102;product=tRNA-Ser(gga)
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	49024	49962	.	+	0	ID=22222_2#22_04103;eC_number=1.1.1.79;gene=ghrA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729856.1,similar to AA sequence:UniProtKB:Q8ZQ30,protein motif:CLUSTERS:PRK15469,protein motif:Cdd:COG1932,protein motif:TIGRFAMs:TIGR01327,protein motif:Pfam:PF02826.13;locus_tag=22222_2#22_04103;product=2-hydroxyacid dehydrogenase,Glyoxylate/hydroxypyruvate reductase A,bifunctional glyoxylate/hydr [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	50047	50784	.	+	0	ID=22222_2#22_04104;eC_number=3.1.3.-;gene=ycdX;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570887.1,similar to AA sequence:UniProtKB:P75914,protein motif:CLUSTERS:PRK09248,protein motif:Cdd:COG1387,protein motif:TIGRFAMs:TIGR01856,protein motif:Pfam:PF02811.13;locus_tag=22222_2#22_04104;product=putative hydrolase,Probable phosphatase YcdX,putative hydrolase,Histidinol phosphatase and relat [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	50808	51362	.	+	0	ID=22222_2#22_04105;gene=ycdY;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002146908.1,similar to AA sequence:UniProtKB:P75915,protein motif:CLUSTERS:PRK11621,protein motif:Cdd:COG3381,protein motif:Pfam:PF02613.9;locus_tag=22222_2#22_04105;product=chaperone%2C TorD family,Chaperone protein YcdY,twin-argninine leader-binding protein DmsD,Uncharacterized component of anaerobic dehydrogenases,Nit [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	51451	51945	.	+	0	ID=22222_2#22_04106;gene=ycdZ;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P75916,protein motif:Pfam:PF06496.5;locus_tag=22222_2#22_04106;product=Inner membrane protein ycdZ,Protein of unknown function (DUF1097);protein_id=gnl|SC|22222_2#22_04106
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	51983	52816	.	-	0	ID=22222_2#22_04107;gene=csgG;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729860.1,protein motif:CLUSTERS:PRK15184,protein motif:Pfam:PF03783.8;locus_tag=22222_2#22_04107;product=assembly/transport component in curli production,curli production assembly/transport protein CsgG,Curli production assembly/transport component CsgG;protein_id=gnl|SC|22222_2#22_04107
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	52843	53259	.	-	0	ID=22222_2#22_04108;gene=csgF;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729861.1,protein motif:CLUSTERS:PRK10050,protein motif:Pfam:PF10614.3;locus_tag=22222_2#22_04108;product=assembly/transport component in curli production,curli assembly protein CsgF,Curli production assembly/transport component CsgF;protein_id=gnl|SC|22222_2#22_04108
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	53286	53681	.	-	0	ID=22222_2#22_04109;gene=csgE;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729862.1,protein motif:CLUSTERS:PRK10386,protein motif:Pfam:PF10627.3;locus_tag=22222_2#22_04109;product=assembly/transport component in curli production,curli assembly protein CsgE,Curli assembly protein CsgE;protein_id=gnl|SC|22222_2#22_04109
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	53710	54336	.	-	0	ID=22222_2#22_04110;gene=csgD;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729863.1,similar to AA sequence:UniProtKB:P52106,protein motif:CLUSTERS:PRK10100,protein motif:Cdd:COG2909,protein motif:TIGRFAMs:TIGR03020,protein motif:Pfam:PF00196.13;locus_tag=22222_2#22_04110;product=regulatory protein,CsgBAC operon transcriptional regulatory protein,DNA-binding transcriptional regulator CsgD,ATP [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	55092	55547	.	+	0	ID=22222_2#22_04111;gene=csgB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729864.1,similar to AA sequence:UniProtKB:P0A1E9,protein motif:CLUSTERS:PRK10101,protein motif:Pfam:PF07012.6;locus_tag=22222_2#22_04111;product=nucleation component of curlin monomers,Fimbrin SEF17 minor subunit,curlin minor subunit CsgB,Curlin associated repeat;protein_id=gnl|SC|22222_2#22_04111
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	55589	56044	.	+	0	ID=22222_2#22_04112;gene=csgA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729865.1,similar to AA sequence:UniProtKB:P0A1E7,protein motif:CLUSTERS:PRK10051,protein motif:Pfam:PF07012.6;locus_tag=22222_2#22_04112;product=major curlin subunit,Fimbrin SEF17,major curlin subunit,Curlin associated repeat;protein_id=gnl|SC|22222_2#22_04112
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	56106	56432	.	+	0	ID=22222_2#22_04113;gene=csgC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_007470940.1,similar to AA sequence:UniProtKB:P0A1Z9,protein motif:CLUSTERS:PRK10102,protein motif:Pfam:PF10610.3;locus_tag=22222_2#22_04113;product=curli assembly protein CsgC,Curli assembly protein CsgC precursor,curli assembly protein CsgC,Thin aggregative fimbriae synthesis protein;protein_id=gnl|SC|22222_2#22_04113
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	56564	56884	.	+	0	ID=22222_2#22_04114;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_006087410.1;locus_tag=22222_2#22_04114;product=Fimbrial protein;protein_id=gnl|SC|22222_2#22_04114
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	56972	57511	.	+	0	ID=22222_2#22_04115;eC_number=3.5.1.-;gene=ymdB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_006087411.1,similar to AA sequence:UniProtKB:P0A8D6,protein motif:CLUSTERS:PRK00431,protein motif:Pfam:PF01661.15;locus_tag=22222_2#22_04115;product=Macro domain%2C possibly ADP-ribose binding module,O-acetyl-ADP-ribose deacetylase,RNase III inhibitor,Macro domain;protein_id=gnl|SC|22222_2#22_04115
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	57450	58934	.	+	0	ID=22222_2#22_04116;eC_number=2.7.8.-;gene=ybhO_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570877.2,similar to AA sequence:UniProtKB:P0AA84,protein motif:CLUSTERS:PRK01642,protein motif:TIGRFAMs:TIGR04265;locus_tag=22222_2#22_04116;product=phospholipase,Putative cardiolipin synthase YbhO,cardiolipin synthetase,cardiolipin synthase;protein_id=gnl|SC|22222_2#22_04116
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	58951	60105	.	-	0	ID=22222_2#22_04117;eC_number=2.1.-.-;gene=mdoC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570876.1,similar to AA sequence:UniProtKB:P75920,protein motif:CLUSTERS:PRK03854,protein motif:Cdd:COG3274,protein motif:Pfam:PF01757.16;locus_tag=22222_2#22_04117;product=glucans biosynthesis protein,Glucans biosynthesis protein C,glucans biosynthesis protein,Uncharacterized protein conserved in bac [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	60359	61912	.	+	0	ID=22222_2#22_04118;gene=mdoG;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570875.1,similar to AA sequence:UniProtKB:P33136,protein motif:CLUSTERS:PRK13274,protein motif:Cdd:COG3131,protein motif:Pfam:PF04349.6;locus_tag=22222_2#22_04118;product=glucan biosynthesis protein G,Glucans biosynthesis protein G precursor,glucan biosynthesis protein G,Periplasmic glucans biosynthesis protein,Peripl [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	61905	64448	.	+	0	ID=22222_2#22_04119;eC_number=2.4.1.-;gene=mdoH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_216085.1,similar to AA sequence:UniProtKB:P62517,protein motif:CLUSTERS:PRK05454,protein motif:Pfam:PF00535.20;locus_tag=22222_2#22_04119;product=glucosyltransferase MdoH,Glucans biosynthesis glucosyltransferase H,glucosyltransferase MdoH,Glycosyl transferase family 2;protein_id=gnl|SC|22222_2#22_04119
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	64522	64749	.	+	0	ID=22222_2#22_04120;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729872.1,protein motif:CLUSTERS:PRK10175,protein motif:Cdd:COG5645,protein motif:Pfam:PF07119.6;locus_tag=22222_2#22_04120;product=lipoprotein,lipoprotein,Predicted periplasmic lipoprotein,Protein of unknown function (DUF1375);protein_id=gnl|SC|22222_2#22_04120
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	64750	65124	.	-	0	ID=22222_2#22_04121;gene=msyB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729873.1,protein motif:CLUSTERS:PRK11467;locus_tag=22222_2#22_04121;product=acidic protein MsyB,secY/secA suppressor protein;protein_id=gnl|SC|22222_2#22_04121
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	65206	66420	.	-	0	ID=22222_2#22_04122;gene=yceE;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729874.1,similar to AA sequence:UniProtKB:O31762,protein motif:CLUSTERS:PRK09874,protein motif:Cdd:COG2814,protein motif:TIGRFAMs:TIGR00880,protein motif:Pfam:PF07690.10;locus_tag=22222_2#22_04122;product=membrane transport protein,Bacillibactin exporter,drug efflux system protein MdtG,Arabinose efflux permease,multid [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	66575	67495	.	-	0	ID=22222_2#22_04123;eC_number=2.3.1.-;gene=htrB_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729875.1,similar to AA sequence:UniProtKB:P0ACV0,protein motif:CLUSTERS:PRK06860,protein motif:Cdd:COG1560,protein motif:TIGRFAMs:TIGR02207,protein motif:Pfam:PF03279.7;locus_tag=22222_2#22_04123;product=lipid A biosynthesis lauroyl acyltransferase,Lipid A biosynthesis lauroyl acyltransferase,lipid [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	67715	68767	.	+	0	ID=22222_2#22_04124;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_006087420.1,protein motif:CLUSTERS:PRK00142,protein motif:Cdd:COG2210,protein motif:Pfam:PF00581.14;locus_tag=22222_2#22_04124;product=Rhodanese-like sulfurtransferase,putative rhodanese-related sulfurtransferase,Uncharacterized conserved protein,Rhodanese-like domain;protein_id=gnl|SC|22222_2#22_04124
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	68819	69394	.	-	0	ID=22222_2#22_04125;gene=yceI_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_006087865.1,similar to AA sequence:UniProtKB:P0A8X2,protein motif:CLUSTERS:PRK03757,protein motif:Cdd:COG2353,protein motif:Pfam:PF04264.7;locus_tag=22222_2#22_04125;product=YceI protein,hypothetical protein,hypothetical protein,Uncharacterized conserved protein,YceI-like domain;protein_id=gnl|SC|22222_2#22_04125
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	69391	69795	.	-	0	ID=22222_2#22_04126;gene=yceJ;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_005212184.1,similar to AA sequence:UniProtKB:P75925,protein motif:CLUSTERS:PRK11513,protein motif:Pfam:PF00033.13;locus_tag=22222_2#22_04126;product=cytochrome b561-like protein 2,Cytochrome b561 homolog 2,cytochrome b561,Cytochrome b(N-terminal)/b6/petB;protein_id=gnl|SC|22222_2#22_04126
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	70226	70339	.	-	0	ID=22222_2#22_04127;gene=yceO;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_216094.1,similar to AA sequence:UniProtKB:P64442,protein motif:Pfam:PF10968.2;locus_tag=22222_2#22_04127;product=inner membrane protein,hypothetical protein,Protein of unknown function (DUF2770);protein_id=gnl|SC|22222_2#22_04127
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	70380	71498	.	-	0	ID=22222_2#22_04128;eC_number=1.5.3.1,1.5.3.-;gene=solA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729880.1,similar to AA sequence:UniProtKB:P40874,protein motif:CLUSTERS:PRK11259,protein motif:Cdd:COG4121,protein motif:TIGRFAMs:TIGR01377,protein motif:Pfam:PF01266.18;locus_tag=22222_2#22_04128;product=sarcosine oxidase,N-methyl-L-tryptophan oxidase,N-methyltryptophan oxidase,Uncharacteriz [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	71611	71865	.	-	0	ID=22222_2#22_04129;gene=bssS;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570863.1,protein motif:CLUSTERS:PRK12301;locus_tag=22222_2#22_04129;product=biofilm formation regulatory protein BssS,biofilm formation regulatory protein BssS;protein_id=gnl|SC|22222_2#22_04129
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	72155	72418	.	-	0	ID=22222_2#22_04130;gene=dinI_3;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729882.1,similar to AA sequence:UniProtKB:P0ABR1,protein motif:CLUSTERS:PRK10597,protein motif:Pfam:PF06183.7;locus_tag=22222_2#22_04130;product=damage-inducible protein,DNA-damage-inducible protein I,DNA damage-inducible protein I,DinI-like family;protein_id=gnl|SC|22222_2#22_04130
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	72492	73538	.	-	0	ID=22222_2#22_04131;eC_number=3.5.2.3,3.5.2.3;gene=pyrC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729883.1,similar to AA sequence:UniProtKB:P06204,protein motif:CLUSTERS:PRK05451,protein motif:Cdd:COG0418,protein motif:TIGRFAMs:TIGR00856,protein motif:Pfam:PF01979.14;locus_tag=22222_2#22_04131;product=dihydroorotase,Dihydroorotase,dihydroorotase,Dihydroorotase,dihydroorotase%2C homodimeri [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	73642	74268	.	-	0	ID=22222_2#22_04132;gene=yceB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_007470922.1,similar to AA sequence:UniProtKB:P0AB26,protein motif:CLUSTERS:PRK10598,protein motif:Pfam:PF07273.6;locus_tag=22222_2#22_04132;product=lipoprotein,Uncharacterized lipoprotein yceB precursor,lipoprotein,Protein of unknown function (DUF1439);protein_id=gnl|SC|22222_2#22_04132
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	74326	74973	.	-	0	ID=22222_2#22_04133;gene=grxB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729885.1,similar to AA sequence:UniProtKB:P0AC59,protein motif:CLUSTERS:PRK10387,protein motif:TIGRFAMs:TIGR02182,protein motif:Pfam:PF04399.7;locus_tag=22222_2#22_04133;product=glutaredoxin,Glutaredoxin-2,glutaredoxin 2,glutaredoxin%2C GrxB family,Glutaredoxin 2%2C C terminal domain;protein_id=gnl|SC|22222_2#22_04133
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	75037	76245	.	-	0	ID=22222_2#22_04134;gene=mdtH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_001570858.1,similar to AA sequence:UniProtKB:P69367,protein motif:CLUSTERS:PRK11646,protein motif:Cdd:COG2814,protein motif:TIGRFAMs:TIGR00880,protein motif:Pfam:PF07690.10;locus_tag=22222_2#22_04134;product=multidrug resistance protein MdtH,Multidrug resistance protein MdtH,multidrug resistance protein MdtH,Arabinose ef [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	76482	77066	.	+	0	ID=22222_2#22_04135;eC_number=2.3.1.-;gene=rimJ;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729887.1,similar to AA sequence:UniProtKB:P96579,protein motif:CLUSTERS:PRK10809,protein motif:TIGRFAMs:TIGR03585,protein motif:Pfam:PF00583.18;locus_tag=22222_2#22_04135;product=ribosomal-protein-alanine acetyltransferase,Putative ribosomal N-acetyltransferase YdaF,ribosomal-protein-S5-alanine N-ace [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	77102	77749	.	+	0	ID=22222_2#22_04136;gene=yceH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P29217,protein motif:CLUSTERS:PRK11239,protein motif:Cdd:COG3132,protein motif:Pfam:PF04337.6;locus_tag=22222_2#22_04136;product=G20.3,hypothetical protein,Uncharacterized protein conserved in bacteria,Protein of unknown function%2C DUF480;protein_id=gnl|SC|22222_2#22_04136
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	77751	78674	.	+	0	ID=22222_2#22_04137;gene=mviM;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729889.1,similar to AA sequence:UniProtKB:P75931,protein motif:CLUSTERS:PRK11579,protein motif:Cdd:COG3132,protein motif:Pfam:PF01408.16;locus_tag=22222_2#22_04137;product=virulence factor MviM,Virulence factor mviM homolog,putative oxidoreductase,Uncharacterized protein conserved in bacteria,Oxidoreductase family%2C  [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	78939	80513	.	+	0	ID=22222_2#22_04138;gene=mviN;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002146873.1,similar to AA sequence:UniProtKB:P37169,protein motif:TIGRFAMs:TIGR01695,protein motif:Pfam:PF03023.8;locus_tag=22222_2#22_04138;product=integral membrane protein MviN,hypothetical protein,integral membrane protein MviN,MviN-like protein;protein_id=gnl|SC|22222_2#22_04138
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	80595	81017	.	-	0	ID=22222_2#22_04139;gene=flgN;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729891.1,protein motif:CLUSTERS:PRK15459,protein motif:Pfam:PF05130.6;locus_tag=22222_2#22_04139;product=flagella synthesis protein FlgN,flagella synthesis chaperone protein FlgN,FlgN protein;protein_id=gnl|SC|22222_2#22_04139
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	81022	81315	.	-	0	ID=22222_2#22_04140;gene=flgM;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729892.1,similar to AA sequence:UniProtKB:P26477,protein motif:CLUSTERS:PRK10810,protein motif:TIGRFAMs:TIGR03824,protein motif:Pfam:PF04316.7;locus_tag=22222_2#22_04140;product=negative regulator of flagellin synthesis (anti-sigma factor),Anti-sigma-28 factor,anti-sigma28 factor FlgM,flagellar biosynthesis anti-sigma [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	81407	82066	.	-	0	ID=22222_2#22_04141;gene=flgA;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729893.1,protein motif:CLUSTERS:PRK07018,protein motif:TIGRFAMs:TIGR03170,protein motif:Pfam:PF08666.6;locus_tag=22222_2#22_04141;product=flagellar basal body P-ring protein FlgA,flagellar basal body P-ring biosynthesis protein FlgA,flagella basal body P-ring formation protein FlgA,SAF domain;protein_id=gnl|SC|22222_2 [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	82223	82639	.	+	0	ID=22222_2#22_04142;gene=flgB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729894.1,similar to AA sequence:UniProtKB:P16437,protein motif:CLUSTERS:PRK05680,protein motif:Cdd:COG1815,protein motif:TIGRFAMs:TIGR01396,protein motif:Pfam:PF00460.14;locus_tag=22222_2#22_04142;product=flagellar basal-body rod protein FlgB,Putative proximal rod protein,flagellar basal body rod protein FlgB,Flagella [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	82643	83047	.	+	0	ID=22222_2#22_04143;gene=flgC;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729895.1,similar to AA sequence:UniProtKB:P0A1I7,protein motif:CLUSTERS:PRK05681,protein motif:Cdd:COG4786,protein motif:TIGRFAMs:TIGR01395,protein motif:Pfam:PF06429.7;locus_tag=22222_2#22_04143;product=flagellar basal-body rod protein FlgC,Putative proximal rod protein,flagellar basal body rod protein FlgC,Flagellar [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	83059	83757	.	+	0	ID=22222_2#22_04144;gene=flgD;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729896.1,similar to AA sequence:UniProtKB:P0A1I9,protein motif:CLUSTERS:PRK06655,protein motif:Cdd:COG1843,protein motif:Pfam:PF03963.8;locus_tag=22222_2#22_04144;product=flagellar hook formation protein FlgD,Basal-body rod modification protein flgD,flagellar basal body rod modification protein,Flagellar hook capping  [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	83784	84995	.	+	0	ID=22222_2#22_04145;gene=flgE;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_002146866.1,similar to AA sequence:UniProtKB:P0A1J1,protein motif:CLUSTERS:PRK05682,protein motif:Cdd:COG1749,protein motif:TIGRFAMs:TIGR03506,protein motif:Pfam:PF07559.8;locus_tag=22222_2#22_04145;product=flagellar hook protein FlgE,Flagellar hook protein flgE,flagellar hook protein FlgE,Flagellar hook protein FlgE,fla [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	85016	85771	.	+	0	ID=22222_2#22_04146;gene=flgF;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729898.1,similar to AA sequence:UniProtKB:P16323,protein motif:CLUSTERS:PRK12640,protein motif:Cdd:COG4787,protein motif:TIGRFAMs:TIGR03506,protein motif:Pfam:PF06429.7;locus_tag=22222_2#22_04146;product=flagellar basal-body rod protein FlgF,Putative proximal rod protein,flagellar basal body rod protein FlgF,Flagellar [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	85785	86567	.	+	0	ID=22222_2#22_04147;gene=flgG;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729899.1,similar to AA sequence:UniProtKB:P0A1J3,protein motif:CLUSTERS:PRK12694,protein motif:Cdd:COG4786,protein motif:TIGRFAMs:TIGR02488,protein motif:Pfam:PF06429.7;locus_tag=22222_2#22_04147;product=flagellar basal-body rod protein FlgG (distal rod protein),Distal rod protein,flagellar basal body rod protein FlgG [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	86655	87320	.	+	0	ID=22222_2#22_04148;gene=flgH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729900.1,similar to AA sequence:UniProtKB:Q9PPM0,protein motif:CLUSTERS:PRK00249,protein motif:Pfam:PF02107.10;locus_tag=22222_2#22_04148;product=flagellar L-ring protein,Basal body L-ring protein,flagellar basal body L-ring protein,Flagellar L-ring protein;protein_id=gnl|SC|22222_2#22_04148
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	87332	88429	.	+	0	ID=22222_2#22_04149;gene=flgI;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729901.1,similar to AA sequence:UniProtKB:Q9PMJ8,protein motif:CLUSTERS:PRK05303,protein motif:Cdd:COG1706,protein motif:Pfam:PF02119.10;locus_tag=22222_2#22_04149;product=flagellar P-ring protein,Basal body P-ring protein,flagellar basal body P-ring protein,Flagellar basal-body P-ring protein,Flagellar P-ring protein [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	88429	89379	.	+	0	ID=22222_2#22_04150;eC_number=3.2.1.-;gene=flgJ;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729902.1,similar to AA sequence:UniProtKB:P15931,protein motif:CLUSTERS:PRK05684,protein motif:Cdd:COG3951,protein motif:TIGRFAMs:TIGR02541,protein motif:Pfam:PF01832.14;locus_tag=22222_2#22_04150;product=flagellar protein FlgJ,Peptidoglycan hydrolase flgJ,flagellar rod assembly protein/muramidase Fl [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	89444	91105	.	+	0	ID=22222_2#22_04151;gene=flgK;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729903.1,similar to AA sequence:UniProtKB:P0A1J5,protein motif:CLUSTERS:PRK08147,protein motif:Cdd:COG1749,protein motif:TIGRFAMs:TIGR02492,protein motif:Pfam:PF06429.7;locus_tag=22222_2#22_04151;product=flagellar hook-associated protein 1,Flagellar hook-associated protein 1,flagellar hook-associated protein FlgK,Flag [...]
+ERS222222|SC|contig000020	Prodigal:2.60	CDS	91120	92073	.	+	0	ID=22222_2#22_04152;gene=flgL;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_004729904.1,similar to AA sequence:UniProtKB:P16326,protein motif:CLUSTERS:PRK08027,protein motif:Cdd:COG3951,protein motif:TIGRFAMs:TIGR02550,protein motif:Pfam:PF00669.14;locus_tag=22222_2#22_04152;product=flagellar hook-associated protein 3,Hook-filament junction protein,flagellar hook-associated protein FlgL,Rod bind [...]
+ERS222222|SC|contig000020	Infernal:1.1	ncRNA	92162	92255	.	+	0	ID=22222_2#22_04153;inference=COORDINATES:profile:Infernal:1.1;locus_tag=22222_2#22_04153;product=STnc490k
+##FASTA
+>ERS222222|SC|contig000020
+TATCCGGGCAGCCCGTTTACGGGCCGTAAGTAACGAAGTTTGATGCAAATGTCAGATCGT
+ATGCGCCTGTTAGGGCGCGGCTGGTAAGAGAGACTTACAGGCGCATCAGAAAAACCTCCG
+GCTATGCCGGAGGATATTTATTTTTTGGGCGCATAGAATATAGTTAGAAAATACTGCTAG
+AACATTTCCCGAAAAAACGGACTACGTGGGTTATTAGTTTCTTTTCGTTTCTTGATGTGT
+CTATTTATTGAAGATGTAGACCATTCTGGGAGATGAAGGATGCCCCATCTCTTTCAGAAT
+AATGTTGGTATATTCGACAACAGGGCCTCTTGAATGATTTTCTTCTTTATCCTGAAGATG
+GGTCAGTGCATGTACAACTTCATGAGTAAATGAACGTTTTGTGTCAAAAAGTTGTCTTCC
+TTCGTTACTTTCATAATGTTCGGTATATGAATAATCAGAATCGTCCAGATTGAGACAAAT
+AACTTTCCTGCCTTCTGAAAGTTTGAAGTGTTACTGAGCCACGGTAGTTTCAAAGGCTTC
+GCCTGCCCCCGGTAGCCAGCGCTGCTCCACATAATGTAGTTCTTTTTCATATGCGTAATT
+CATCAGTCTGCGGAATGTTTCGCTTTGGGTATACGCATTTTGAAGTACGGAGGATAGTTC
+ATCGTAGCATTCGTCATAAGTGTCGTCATCAAATTCTGTATCAGGGTCTATTCCACCCGC
+GCCTGAGATAAGGTACTCCACCACACACTCTGACTCCAGACGGAATTCACTGTTTATGGC
+AAGGCTGTCATGAGCAAGGCGTAGCCGTGAGGAGTTTGGTGCATGTTCGGGAATATCGGG
+GAAAACAGGTGTATCTGCGGTATTTAATCCATATGTGGATACTCCGCTTTGAGGTATTAA
+TCTGTAGGTGACCGGAAGCATAATTTCTTATTACCTGACTGCAATTATGAATTATTTAAA
+TTAAACAATAATAGTGTTTTTAGTTAATGTGCAACATACAGATAACGCTATGATTCAGGC
+AAAACCAACAAGTAATACGCTGAAAATGTTATATATAATGAAGCTTAGGGGCGGGGTTTG
+TTTAATAAAACAAGGAGGGCTTCTGTTTGAATACTTCTTGTTTATAAAATCCCTTTATCT
+CGACTAAAATATTGGATGGGGGAAAAGCGTTTAATCATTGTAATCCGGGAGTGGAGTAGG
+GGTATGTTGACTACCTGTCAGATCGGCTCCTGATAATGCTTTCGCTAAAGTTAGAGCTAT
+CTTATCTAATTTTGCACCATTTAGTTTGGTGTAAGTTAAGTCTGAGCCGAATAGAATTGC
+AGCGGTTAAGTTTACACCGGACATGTTAGCGCAAGTCAGGTCTGCGTGAGTCAGGTTTGC
+TTTAGTCATGTTTGAACCCATTAGATTTACAGAGTGTAGATTTGCACAACACAAATTTGC
+ACCGACTAAATTTACATTAGATAGATTTGTTTAACGGAAATCTGCATTAGAAGCATCTAT
+GTCTGACAAATCTTCTCCCTGGAAATCTTTATAAGCTAGGTTTACTCCGCGCAGGTTCAG
+AAAACCATCCTCGGTTATAAGTGAATCAGGCTATTGTATATTATGTTTACTCATCAGTTT
+TAACATTCGACATACATTCCAAAATCTCTCACAATCAACTTTATCCGTTACAGTTTTTCC
+ATTTTTGTTCACTTCAATAGTGACCATGGGATACGTATTTTCGGATGCTGCTCCACATGA
+CAGACATATAGTACACCCGTTGACATCCTCCAAAAATATTTTATTACCATCGAAGAAAGC
+ATCTTTATTTACATATAATAATGATGTGGTCAATTTTCCAATTACCTCCCAAAAGCATCT
+TTCATTGCTTCTTCTTACTCCACCACAGGTAAAAAAATTGACAAACCATTCCAGTATACC
+GCGTGGTGAAGTTGCACTTTTCATTGCTTCTTACGTACCGGTACCGGCCGCATGCAAATA
+TCTTAATATATTTTCTGGGGACGCGTTAGTTAATGGCATTTTGATTCCTTCTTATGGAAG
+TGAGTCGATTCCTTATAGGTGTGAGTCAAGTCACATTTATACAAGGAATAATAAAAAAAG
+ATATAAGGTGATTTTTGTATTTAGAGCTTTGGATGAGTATGAGTAGGTGTCTGGCATCTG
+ACAAGAGGCGATGATATCTGCCATAACGGTGAAGCAGAAGTAATATCACCCGTGAATAAT
+AAGATTATCCTTGCTATTGACTTCCTTCATTTACAGCATAGCTTACGCCTCATCATCATT
+GAATAAACAATTAAGTTTGTTGAGCGAAAATTAACTTAAAGAAAAAATAATAAGCTTTAA
+TATTTTTTGCAGCATTTCAACTTATAGCAGAAAAAATGTGCGTAGATGGCGTAAAAACCT
+GATGAGCAGGAATACTTACACACTAAAAATGCACCCGCAAACAGAGCAAAATGAGAAAGA
+GGAACGTTTGCCCTTTACATTTCCACCAAAGAATCTGGTTTTGTCTCTTGTAAAGGGCAT
+ACGTATCGCGTTTTATCTCATTAAGAAAGTATATTGACGTATTAAATTATGCATAATGCT
+CTTTCAATTGCTTCACGTTTGAAATGAATAATACAAAACCAGTGAGCGCCTCTTCTTCGG
+TACTGGTTTGCGGCAAGCGATAAAGCGCCACTAAAGCAGTATTGTCTGCATCAGCGCCGA
+TAGTGACGGCGCTGGCGTAGTTAAGACGTAAAAAATGCTGCAAAGTCAGAGTGTCGTCAG
+GCAGTGGCATAAAGGGACAGCACATTTCCAGTATATGATCGGATTCATTAAAATAAACCT
+GTATCCCATCATCAATGATAAGCAGTGGCTCAACTTCTGGCGCATCCAGGCCTAACGCGT
+CATATAAACGATTTAATAGACTTTCCATATAGATACCTCAAGACTCAAGATGTGATTAAT
+GAAGAAATACCTTTTACTGACTGCCAAATATTATCATCCCCAACTCGTTTTTGATAGGAA
+AGATTGAGCACCTCTGGCGATAAATTTTTCATTACTTTGTTTCCCGCCCCGCCCGTATTT
+TGTTTCTGAATCTCCAGGTTACCGCTATTCAGTAATACTTTTTGGAAAATTTTCTGTCCA
+CCGCTATCCGGAAGACTACCAGGCGCACTTAACATATGGGTCTGATGGAAAGAAATGAGC
+TCTCGCTTGATTTCTGAATCCATCATCCCTGTACGATCTTTGCCGCTTTTACAATTCCAG
+GCGGGCACCGCGTCAATTTCATGGGCTAACATGGCAAGGCGTTGTGCGAGTTTATAGGGT
+TCGCCGCCATCTTTATGATGTTGGTTATTTTTCCAGATATCCTTAATCTGGCGCGCTAAT
+GTATTGACGACCTCATAATTATCCGGGTATTGCGCCAGCCATTCGCCAACCCAGCCACCT
+GGTCTGGCTTCAGGGCGTAAATCATTGCCTAATAACTGATGTAGCGCTTCGGCATTATAG
+CTATCTGATGCTTTAAGGCCAAAGCCGAGCTTGAGCGCCAGCTCATTAACACCCACATTA
+AATGCGGCGACGTCCGGTTTTATTTTTACCGTCTGTAGATCGCCATCTTTATTGCGGATT
+TTTAAATGAATCATTTTTCCCGGCTGGGTCAACGATTGCCATGCGCGCATTTGATCCTCG
+ACCATAGTTCCCTCTTTGCCGAAAATATTCGACGCGGTGAGTAACCCGACGGATACCAGT
+TTCAGGCTTACCGCTTCGCCCTCTAAGGCTCTGTTAAGCAACTCAGGTTTACTAAAAAGT
+GCCGCAGCTAATACTTCTTTGGCTTTGTTTTCAGCGCCGGCCTGACGCAGAAGCGGATCT
+TTTTCATGATAGGGGGAAAGCACACCATGACGTATCCCGCAAAAAAGCGTTTTATCTTTA
+CCGTCCTCATGCACACTCACCGTGGACATCCACAAATTATTGGCGTGATGAATATTCTTG
+GTATCCCAACTGCATACGCCCTTTCCCTCATAAGCACTGGGAAAGATATCTTTTGCGCCG
+ATTTTCATCTCTGCGGCAGGGAGCTGCGTGTTGGTATAGTGATGCCCGTTATGCGTGAGT
+GTATTTTTTATGGTTTGCCAGGGCTGGTTATTGAGCTGCTTGACCTGAGCATCTCTAAAC
+GCTACTGCCGCAAAGCGATGAGCTTCTTTTGCAGGTAAGCCATTTTGCTGCTGAGTGCGT
+GCTGCAATAAGTTCGATAAGATTTTTCTTCAATGCTTTTGCCGCAGCTTCTTCTTGTTTT
+GTTGCTGGCCGGTCCGCTTTAACTTTGGCTAACTCCATTTGGTTTGCCATTGACGTTAGA
+ACCGGGTCTTGTTTACCCAGAACGGTTGCCGCGACGGTAAGAAGATCGCGCTGTAAGTTA
+TAGAGGTTATGCAGCGAGTGGTTAGACGTCTTCTGATGCTGTAGATAATTCCCCCATGTC
+GCGCCAGGTTCTCGCAGGACAATAATTTCCGGGCGAGCGTCGGGCGCTTTAGCCGGCGCT
+TTGCCCTGGCCTGAGAGAATCTGCATTCCGTTGTATAAGGTTTTTTGTAGGCTTTTAAAA
+GCCTCCTGGGTTTTTAGTGAAGCTGAGTGATAGAAGCTCTGTATTTGCATAGCGTTTTTA
+ATATTCCTGAATAGGGGAGTGGGAACATTCAACAGGGTTAACAATCTTTTAAAAGGATGT
+GACTTTTATGCACCAGGAACTTTAAAAAAATGTCCTTTTTAATGAGGAAACGCTTCTGAT
+CAGGCGGGGAGGCATCCTGACGTCCATACGTAGTGGCGATCATGGGGGAGATCGGTACAT
+CAAATGTTCATAAAATCTACAAACTTCACAATTTAGGGCACAACTTTTATTGAAAACCCA
+CCTTCTTGATTCAGATCAAATTCACCGATTTACTCCTGCGTAAAGTAACGGCGTTACATC
+AAATGTTATACAGGTGTGTGTATGCCAGCAAAGCCCCGGACGAGTAAGACCGTGACGAAG
+AATATTCGTTTTTCCTATTCCATGCTTGAACAGATAGAATTCGCGTTGAAATCTGAAAAG
+ACGCGGAATTTTTCAGCATGGGTAAAAGAGGCCTGTCGGGAAAAGTTATGTAACACGGGA
+CATAAGCTGTAGTTATCGTCTTATGTAAGAGGGGGCGGCCATTTTGAAAAGAACAACGTG
+CTTATATACCTCCTGGGTCTTTGCCGCTTTTGTCTCTCTGCTGATATTTGTCTGGAGTGT
+CATAAACTATCCTCTCTATGAATCCATAATAATTATTGTCTTTTATATCTGGCTAATTCT
+GGTACCGCTTTATCTTATTGTGTATGAGTGGCTAATAGATTGTCATTAAATTTATGGCTG
+CGTACTTGAAAAGTGATATTTCATATCTGTGCGGTAAGTCATTGTCGTAATAATATTATT
+GGTCAGGCGACGGGCGAGCGTCTGCGCATTTTGCATCGTTTTATCTTCAAAATTTTGCAG
+TAAGCGTTGTGCTTCTTTCGGATGCGACGCATATAATCTCAGATAGCTCTGCTCCATCTT
+ATACTGTTGCTTAGCTGTTTGCTGTTCAAATGTTTTCCAGGCATGTTGCACATCTGGCGC
+AAACGTATTGTAGTCTTGCATAACCAGTGTTTGCAGCGTGCGGAATGTCCAGTAGGTAGA
+GTCGTTGCTCGCCCGATCGGTTCCTTTATCATCTCCGGGTTGATAATGACGCATCCCCTG
+GTAATAGGGGAGATAGACGCTAAGAGATGGCATTCCATAGGCGATGTATTCTACGTTGCC
+GATAGCCTGCGGTAATTTCGGTCTGACCTGTAAAATATGTGACTCCTGGGTACGAAAAAC
+GGATATAGGTCGCCATGGTTCTTGTGGATTATGACTGGCATAAGGGTCGTGCGACGTTCC
+CTGATAGTGATTGCGTAACGCGTTTTTTACTGCCGCCACGCTGATCTTCGTTATTGGCGT
+TAAAAAAACAGAAAATGTTTCCCCTTCGCTAACGACCGTATCCAGATGCGGATTAAACTG
+GTGTTGTAGCGTCCAGACGCGCGGATAATTATAGGTGGTATCGTTTTTGTTATCCTGCGA
+ATAGGCTTGATGAAAGTCGAATTCGCCGCGGGCCGGATCATATAATCCCTGCTTTTTCGC
+AAAGCTTACTAACGTTGGTGACGCCATATAATTCGCGTTATCATTCGGATCGTAATGGCG
+TAAACGTCCCTGATTGGCGGAAACGAAATAGCTATCTGCCGGAAGTCGTACTGCCAGCCA
+TTGATGTCCGCTTCCCGTCTCCAGATACCATATCTCTTTGCTATCAATAAACGCGACGCC
+GAAACCTTCGCCCGCGCCTTTTTGTTCAATAATATCTCCCAGTAATTTGGCGCCCTGACG
+CGCCGATTGCGCCACTGGCAGGATCACGGACTCAATGGCGTCTTCCGTGATTCCCGTTTT
+TGTCACGTAAGGATCGGCAGCCAGCGCCGCTCTGCCGTTGTAAATGGTTTCCGTTGCGCT
+CATTCCGACGCCCGCCGAATTGAAACCGGCTTCACCCATGGCGTTATCGTTAGTATCAAA
+GTCATGAATCGCCGTATAGCGCATCGCTGTCTCCGGAAGCGGCCAGCTAAAATTGTTGCG
+ATGTGCTTTATACTCGCCTTGTTGATGAAACGCGACGGGATGAATAACCTTATGCTTGGC
+GTTATTTGCCGAGCCATCCTCGTTGCGCGCGATAATAAAGGAGCCGTCAGCCGAAGCCTG
+ATTGCCTACCAAAAGGGTAGTACAGGCGATGACTTTACCCATACCCAGCAGCGTAACGGC
+GAAAGCAAGATACTTTTTCATAAAGGTTCCCACTGAATAACGCGTTATGGGATGAATTGA
+CCCTGGATTGGAAACCGAGAAAGTGATCGAGCCAGCAATATTCTTTACCGGCATCCTTTA
+TTTTCTTTTTATTGAGGTTGTATTGATAACCACAGCCCTGTGGCAGGGAAGGGGAACAGA
+ACCTGTACTGACCTTAGCTATCACCGCTATCAGGCAGACGAACCGAAAAGATATTATGTT
+CATCGGCATAGCGGTAAGATGCCGAGCCACCGTGCAATAGGGCAATCGCGTTAACTAACG
+ATAACCCCAGGCCGAAACCGGCAGTGTAGCGGGCATTATCTCCTCGCCAAAAACGCCGGA
+AAAGCTTATCCGCATCGGCGGTGGGGCTGCCCGGATTAGCGACCCGAATTTCTGCAACGT
+TATCATCATAAGCGCTTTCAATACGTATCACGGCGTTTTCATCAGAATAACGGATGGCAT
+TCGTCAGCAGGTTTGAGAGCACTCTTTGTAATAATATTTCGTCAGCCCATACCGTTCCCT
+GACATTGATTTATAAAACAGATGTGCTTCTCTTCGGCAAGGGGGCTAAGATAATCCAGCA
+TATTTTCGACCAGCGCATTGAGCGAAACAGGCTGTTTTTTTACCGCTATATTCTGGTGCT
+CCGCGCGTGCCAGAAAGAGAATATTTTCTGTCAGTCGCGACAGTCCCTCCAGCTCTTCAA
+TATTATCGACAAGGGCTTGTTGATACTCTTCGGCGCTGCGTTCCTGACTCAGCATAACCT
+GATTCTTCCCCAGTAAAATATTAACCGGCGTGCGCAGCTCATGCGCCAGATCGTCGGCAA
+ATTGGTTCAGGCGTTCAAAATCGTCGGAAAGCTTCTGGCGCATGGTATTTAGCGCTTGCC
+CAAGCGGCCTGAGCTCGACGGGTAACGCCTGTTCCGCCAGCGGCTGGCGAAGTGTGCCGC
+TATCTGTCGCCGCGGTGAGTCGGCTGAGCGACGTAATGGCCCGCAGCCCGTTTCTGATGA
+CTAATGGACTGAGCGCCGAACAGACGAGGATCGCGATAAGGCTAATCAGCAAACTGTTGC
+GGCGATATTGCGCCAGCATTTGCCGCCTTTCCGTCGCCAGCCTGGCAATAGTAAGGGTCA
+GCGGGTTATCGCCGCTTCTGGCGTTTACTCGTACCGCGGTCAGCTCCGTGCCTTGTACCG
+CCTGGCGAAATAAGGTTTCGCGGGTGATGTTTTTAGCCAGCGGAATCTCGTTAAAGCGTT
+GGTCGGGGATGCCGCTATGATTAATCGCAACATTATGGCCTGTTGCTGAGTGGATCAATA
+AGATATCCTGCTTCGTATCCACCATCCGATTGAAATAGAGCGGCAGATTTTCCGGCCTGG
+CGCCATCCAGTAACAGTTGCTGCATTTGCGCCGCCCGATTTATTAGCGTCATATCATCCC
+GATACGTTAATTCTTTGCTCAGCGCGTTATAGAGCGTCCAGCTAATGCCGGTACAGGCGA
+GGATAAGTATGGCGATAAAAGAAATTGTCAGGCGCAGCGTCATTGATAACTTAACCATTC
+TGCGACGTTTCCGCCTGTAATCGATAACCCATCCCCTGGACGGTCATAATGAGCTTCTTT
+TCAAATGGATCGTCTACTTTGGCGCGCAGCCGACGAATCGCGACATCAACGGTGTTGGTT
+TCACTATCAAAGTTAATTCCCCAAACTTCGCTGGCGATCGCGGTTCGGGGCACGATTTCC
+CCTGCCCGGGACGCCAGTAACCAGAGGAGCAGGAATTCTTTGCGGGTCAGGGAAATCGGT
+TTGCCATTTCGTAACACCGATTGCTTTGTGGCATCCATGTCCAGACCATTGATCGTCAGT
+CGGGTAAAGACCGGGACATGCTGTCTGAGTTGAGCTCTCACCCGGGCCAGCAGTTCGGCG
+AAGGAAAAAGGCTTAACAAGGTAATCATTAGCGCCCGCCTCAAGACCTTTGACGCGATCC
+TCAACCGAGTCGCGCGCCGTCAGGCAAATAACAGGGGGCTGATATGCAGTGCGCAACGCG
+CGTAAAACCTGCCATCCATCAAGCCCCGGCAGCATAATATCAAGAATAATCAATGAATAA
+TGTTCCTGAAGGGCTAGGTGTAATCCGTCTCGTCCATCACAGGCATAATCAACCACATAG
+CCTGCCTCCGTGAGTCCCTGACGTACCCACTCAATGGTTTTCTGGTTATCTTCAATCAAT
+AAAATCTTCATCAACGCAGTATGCCATAGGCTTGTTAGCGAATCCGTAAATCTCCAACAA
+GATGACATAATTGTCATGTCCGTGTAAGGTTAATCCTGGACGCGCCAGATATATTTTCCT
+GCATCAATACCGTAAAGGAGTGAAGCATGAAACGATATATACTGGCTACCGCGATAGCGT
+CTCTTGTTGCAGCCCCGGCAATGGCGCTGGCCGCTGGCAGCAATATTCTCAGCGTACATA
+TTCTCGATCAGCAAACAGGCAAACCAGCGCCCGGCGTGGAGGTGGTACTGGAGCAGAAAA
+AGGATAACGGATGGACGCAATTAAACACCGGGCATACCGACCAGGATGGACGAATTAAAG
+CACTGTGGCCCGAAAAAGCTGCCGCGCCGGGGGATTATCGCGTTATTTTTAAAACCGGCC
+AGTATTTTGAAAGTAAAAAACTGGACACGTTTTTCCCGGAGATTCCCGTCGAGTTTCATA
+TCAGCAAAACGAATGAGCACTATCATGTGCCGCTGTTATTAAGTCAGTATGGTTATTCAA
+CCTATCGCGGGAGCTAATTTAGAGCCTATCCTATTAGGGCTATTTTACTTGCCATTTTGG
+TCCTGGGCAGTGCTCGCCAAAACGCGTTAGCGTTTTGAACGCCGCTTGCGGCGGCCCGAA
+GGGCGAGCGTAGCGAGTCAAACCTCACGTACAACGTGTACGCTCCGGTTTTTGCGCGCTG
+TCCGTGTCCAAACAGGCTGCGCCAATAACGCCTGGTGGGACAGGCTCTTAGATTTTATTA
+ATCGCCGGGATTTATGGCGAGCAAACATCGCGGCGGGAATAAAAATCTTCAGCGTATCGA
+CTCTCCTCCCACAAGCGGTGGTTATCCCGCTTGTGGGTATGAGTGACGGTTAAACAGGCG
+CTTCCATCTCAAGTCTGACCGGATGAAAACGGCGTTTGAAATAAATCAGGCCATGCCCCT
+CCTGGCTAAGAATAATATTTTTGATCGCCACCAGATACACCAGATGCGTGCCAATGGTTT
+GTACCTCGCTGATCTCGCCTTCAAGACTGGCCAGCGCGCCGTTAAGTACCGGCTGGCCCA
+GCGGCCCGTTTTGCCAACATGGCTGGTGAAAACGCTCCTCCATCGCCATCCCCGTCATAC
+CGGCAAAGTGGCGCGCCATCAGCTCCTGCTCATGGTTAAGTACATTAATGCACAGCCTGC
+CGTTGCCCTGAAAAACGGGGTTCATGGCGCTATTGGCATTAATACATACCATCACGGAGG
+GCGGCGTATCAGTGACTGAGCAAACCGCTGTTGCGGTGATACCGCAGCGTCCGGCGTGAC
+CCGCCGTGGTTACGATGTTGACCGCTGCCGCCAGACTTGCCATCGCATCGCGAAAACGCA
+GACGTTGTTCATCTACTTGCATGAGAACCTCCTGCCGCGTTATTTCAGCAGCTTATCCAG
+TTGATTGATGTCGTCGTTATTGTGCAAATGCGAAACCGTCCAGCCATTCTGATCGTATTC
+GGAGAGGCAGCGATCGACCATTGCCATCATCTTATCCATATTGCCGGAGCTCTGGGCCTG
+ACGCAGACACTGCAGACGAATTTCATCCTGGCTGCCCGAGTAGTTAATCTCGTACAGCTC
+ATGGCGACCGCCAAACTCGCTGCCGATGGCATCCCACATCAATTTAAGAATTTTGATACG
+TTCAACATGGTCCATTCCGTTAGAGCCGCGTACGTATTTCGCCAGGTACTGGTCGATTTG
+CGGATTATTCAGATCGCGGGCGCTGGAAGGCAGGTAAATCAGGCCGCTGGTAACGTTACG
+TTCAATAATATTTTTAATTTTCGCGTAGGCCATTGGGGCCATCACACGATAGGTTTGCAG
+CGCGGCGTGGTCCGGTAGCCAGGCGCCGTTTACCCACGGGGTTGCTTCAGAACACATAGA
+ATCGCTCAATGCCCAGAACATATTGCGCCAGGCCACGACTTCGCCGAGATCGGCCTGCAC
+GCCCCGGAACTCTACGGTACCCGTACATTCGAGCGATTTTTTCAGCAGCGCGGTAATGAA
+ATCAAGTTTTACCGCCAGACGAACACAGGCTTGCAGTGGATACATACGGGCAAAGCCGCC
+TTCCATCGTCCAGCGACGACAACGATCGAAATCACGGTAAATTAATACGTTTTCCCACGG
+GATCAGCACCTTGTCCATCACCAGAATGGCATCGTTTTCATCAAAACGGCTGGAGAGGGG
+ATAATCAAACGGCGAGCCCGTCGCGCCCGCGACCATTTCATACGAGGCGCGCGAAATAAG
+TTTTACGCCTTCGGCATCCATTGGCGCGACAAACATCAGAGCAAAATCCGGGTTTTCGCC
+CATCACCTGGGCTGAGCCGAAACCAATCATGTTGTAGTGAGTCAGGGCGGAGTTAGTGGC
+GACAACTTTCGCCCCGCTGACAATAATCCCGGCGTCCGTCTCTTTCTCCAGCTTGATATA
+GACGTCTTTCACTTCGTCGGCAGGTTTGTGGCGGTCAATGGGCGGGTTGACGATTGCATG
+GTTAAAGTACAGGCCGGTCTCCTGAATACGGGTGTACCAGTTACGGGCGTTCTGCTCAAA
+CTGGCCGTAGAAGGCTGGGTTAGCGCCCAGAGCGCAGCCAAAGGCGGCTTTGTAATCCGG
+TGTGCGTCCCATCCAGCCGTAACTCAGGCGTGACCACTCGGCGATAGCATCACGCTGTTG
+GCGCAGATCGTCTGCGCTTTTCGCCACGCGGAAAAATTTATGCGTATAACCGCCGCTGCC
+GGTATCGGTATTCCAGCACAGGGTGTAAGCGGGAGGGGATTCCCGTGTCGGCGGCGTGGG
+AAGGTAAGAATACTATGCTTCCGCTTCGCTATCAGGATGCTGGCGCCCCAGAGCAATAAG
+ATCGTCCAGCAGCAGCATCAATTGCTGTGTTTTTTCGGGCGTGAAATCGGCCTCAATTTT
+TCGATAAGCCTCTTCTACCTCGCTCCGGGCACGGGCGTACAACGTTTGTCCCTGCTCCGT
+CAACATGACATATAACTTACGCTGATCGTTAACCGGCTTGAGTCGCAACACCAGTCCGTC
+TCGCTCCATGCGCGTCAATATTCCGGTCAGACTTGGACGCAAAATACAGGTACGAAAGGC
+CAGCTCGTGAAAATCCATAGAGGGGCTATCGGCCAGGATTCGCACAATGCGCCATTGCTG
+GTCGGTCAGATTGTGGCTTTTAACGATGGGGCGGAAATAGGTCATTGCCGCTTCGCGCGC
+CTGAAGCAAGGCGATGGTTAATGAATCATGCATAAGCGTTTCTCTTTTGCAGAATTATTA
+ATACCTGAATAATCGTGTTTACCGATGTGAGCTATGACATCTGTTTTATGTCCAAAGTTT
+AATAGAAACAAGGGGTTTTATTTAACTATTTGATATATATGTATTTAATAATAAATTTGT
+GAAAATATTGTTAATCACATCATAAATACTTTACTTAAGCTTGCTAAATGTACAGCGAAA
+GCATAAATCTAATCATTAATATGTTAATGAAATCACAGCCCGTTAAATCGGCCTGAGGAG
+TTTATGTATGAAGGGTACTGTTTTCGCCGTTGCGTTAAACCATCGCAGCCAGCTTGATGC
+CTGGCAAGAGGCTTTCTCTCAGCCTCCCTATAATGCGCCGCCTAAAACCGCAGTGTGGTT
+CATCAAGCCGCGTAATACGGTGATTCGTCACGGCGAACCCATTCCTTATCCGCAGGGAGA
+AAAGGTACTGAGCGGCGCGACAGTGGCGCTCATTGTGGGGAAAACCGCCAGCCGGATACG
+CCCTGAAGCGGCGGCGGACTATATCGCCGGGTATGCGCTGGCTAACGAGGTCAGCCTGCC
+GGAAGAGAGCTTTTATCGCCCGGCGATTAAAGCGAAATGTCGCGATGGCTTTTGCCCGCT
+GGGTGAAATGGCGCCGCTGAGTGATGTGGATAATCTCACCATTATCACTGAAATCAACGG
+ACGAGAAGCGGACCACTGGAATACTGCCGATTTACAGCGTAGCGCCGCACAACTGCTTAG
+CGCGTTAAGTGAGTTCGCTACACTTAACCCTGGCGATGCGATCTTACTTGGTACGCCGCA
+GAATCGCGTTGCGCTGCGTCCCGGCGATCGGGTGCGTATTCTGGCGAAAGGTTTACCCGC
+GCTGGAAAATCCGGTTGTCGCAGAAGATGAATTCGCCCGCCACCAGACGTTTACGTGGCC
+GCTGTCAGCGACGGGAACGTTATTTGCGCTGGGGTTGAACTACGCCGATCACGCCAGCGA
+GCTGGCATTTACGCCGCCGAAAGAGCCGCTGGTATTTATCAAAGCGCCAAACACCTTTAC
+CGAACATCACCAAACGTCGGTGCGCCCGAACAACGTCGAATATATGCACTACGAAGCCGA
+GCTGGTCGTGGTGATTGGCAAAACGGCGCGTAAGGTGAGCGAAGCCGAAGCCATGGAGTA
+TGTGGCCGGTTACACCGTCTGTAACGACTACGCGATCCGCGACTATCTGGAAAACTACTA
+CCGTCCGAATCTGCGGGTAAAAAGCCGCGACGGCCTGACGCCGATAGGCCCGTGGATTGT
+GGATAAAGAGGCGGTTTCTGATCCGCACAACCTGACGTTACGCACCTTTGTCAACGGTGA
+GCTGCGGCAGGAAGGGACGACCGCCGATCTGATCTTCAGCATCCCGTTCCTGATTTCTTA
+TCTGAGCGAATTTATGACGTTGCAACCGGGCGACATGATTGCCACCGGTACGCCGAAAGG
+GCTGTCCGATGTGGTGCCGGGGGATGAAGTTGTCGTTGAAGTAGAAGGCGTGGGTCGCCT
+GGTTAACCGAATCGTCAGTGAGGAGAGCGCAAAATGAAGAAAATAAATCATTGGATTAAC
+GGCAAAAACGTTGCAGGTAACGACTACTTCCAGACCACTAACCCGGCGACCGGTGATGTG
+CTGGCGGAAGTAGCCTCCGGCGGTGAAGCAGAAGTGAACCAGGCTGTCGCGGCGGCAAAA
+GAGGCGTTCCCGAAATGGGCCAACCTGCCGATGAAAGAGCGCGCGCGCCTGATGCGCCGC
+CTTGGCGACCTGATTGACCAGCATGTGCCGGAAATCGCGGCGATGGAAACCGCCGACACC
+GGCCTGCCTATTCACCAGACTAAAACGTGCTGATCCCGCGCGCCTCGCATAACTTCGAAT
+TCTTCGCCGAAGTGTGCCAGCAGATGAACGGCAAGACCTATCCGGTTGACGATAAAATGC
+TCAATTATACGCTGGTGCAGCCCGTCGGCGTCTGCGCGCTGGTGTCGCCGTGGAACGTGC
+CGTTTATGACCGCGACTTGGAAAGTTGCGCCGTGCCTGGCGCTGGGTAACACCGCGGTGC
+TCAAAATGTCCGAGCTGTCGCCGCTGACTGCCGACAGGCTGGGCGAGCTGGCACTGGAGG
+CAGGAATTCCGGCAGGCGTGCTGAACGTGGTGCAGGGCTACGGCGCGACGGCGGGCGATG
+CGCTGGTACGCCACCATGACGTGCGTGCGGTGTCGTTTACCGGCGGTACCGCCACCGGTC
+GCAATATCATGAAAAATGCCGGGCTGAAAAAATACTCGATGGAGCTGGGCGGCAAATCGC
+CGGTGCTGATTTTTGAAGACGCCGACATTGAGCGCGCGCTGGACGCCGCGCTGTTCACCA
+TCTTCTCGATCAACGGCGAACGCTGCACCGCTGGGTCGCGCATCTTTATCCAGCAGAGCA
+TTTACCCTGAGTTCGTGAAGCGCTTTGCCGAACGCGCGAATCGCCTGCGTGTCGGCGATC
+CGACCGACCCGAACACCCAGGTCGGCGCGCTGATTAGCCAACAGCACTGGGAGAAAGTCT
+CCGGTTATATCCGCCTCGGCATTGAAGAGGGGGCAACGCTGCTGGCGGGCGGTGCGGAAA
+AACCCACTGACCTGCCTGCGCATCTGAAAGGCGGTAACTTCCTGCGCCCAACCGTGCTGG
+CCGATGTCGACAACCGTATGCGCGTTGCGCAGGAAGAGATCTTTGGGCCGGTCGCCTGCC
+TGCTGCCATTCAAAGACGAAGCGGAAGGGTTACGTTTGGCGAACGATGTGGAATACGGTC
+TGGCCTCTTATATCTGGACCCAGGACGTGAGCAAAGTGTTGCGCCTGGCGCGTGGGATTG
+AAGCCGGCATGGTCTTCGTCAACACCCAGAACGTCCGCGACCTGCGCCAGCCGTTCGGCG
+GCGTGAAAGCCTCCGGTACCGGGCGCGAAGGCGGCGAATATAGCTTCGAAGTGTTTGCGG
+AAATGAAAAACGTCTGCATCTCAATGGGCGACCATCCTATCCCAAAATGGGGAGTTTGAT
+ATGGGCAAGTTAGCGTTAGCAGCAAAAATTACCCACGTGCCGTCGATGTATCTTTCTGAA
+CTGCCAGGAAAAAATCACGGTTGTCGTCAGGCAGCCATTGATGGGCATATTGAAATTGGC
+AAGCGTTGCCGCGAAATGGGCGTTGACACCATTATCGTATTCGACACCCACTGGCTGGTG
+AATAGCGCTTACCACATTAATTGTGCCGACCATTTCCAGGGCGTCTATACCAGCAACGAA
+TTGCCGCACTTTATTCGCGACATGACCTATGACTATGACGGTAATCCGGCGCTCGGCCAT
+CTGATCGCCGACGAGGCGGTCAAACTGGGCGTGCGCGCCAAAGCGCACAACATCCCGAGC
+CTGAAGCTGGAGTATGGCACGCTGGTGCCGATGCGCTACATGAACAGCGACAAGCACTTC
+AAAGTGGTCTCCATCTCGGCGTTCTGCACTGTGCATGATTTTGCCGACAGCCGCAAACTG
+GGCGAAGCCATTCTCAAGGCGATTGAGAAATATGACGGTACCGTAGCGGTATTCGCCAGT
+GGTTCTCTGTCGCACCGTTTTATTGACGACCAACGGGCGGAAGAGGGGATGAACAGCTAC
+ACCCGCGAGTTCGATCATCAAATGGACGAGCGCGTGGTCAAGCTGTGGCGCGAAGGCAAA
+TTCAAGGAGTTTTGCACCATGTTGCCGGAGTACGCCGACTACTGCTACGGCGAAGGCAAC
+ATGCACGACACGGTCATGCTACTGGGAATGCTGGGGTGGGACAAATACGACGGCAAGGTG
+GAGTTCATCACCGACCTGTTCGCCAGCTCCGGTACCGGCCAGGTAAACGCTGTTTTCCCG
+CTGCCTGCGTAAGGGGGGTTTATGCCGCACTTTATTGCTGAATGTACTGAAAATATTCGC
+GAGCAGGCTGATTTACCAAGCCTGTTCAGCAAGGTAAACGAGGCGCTGGCCGCCACCGGG
+ATTTTCCCCATCGGCGGTATCCGCAGTCGCGCCCACTGGCTGGATACCTGGCAGATGGCT
+GACGGTAAGCATGATTACGCGTTTGTGCATATGACGCTGAAAATCGGCGCCGGGCGCAGC
+CTGGAGAGCCGTCAGGAAGTCGGCGAAATGCTGTTTGGGCTGATTAAAGCCCACTTCGCC
+GACCTGATGGAGAACCGCTATCTGGCGCTGTCGTTTGAGATTGCCGAGTTACATCCAACG
+CTCAATTACAAACAAAACAACGTACACGCGTTATTTAAATAGCACACTCTTTCGCCCGGT
+GGCGCTGCGCTGACCGGGCCTACAAAAAAACGATGCACGAAGCAGTACCGAACCGTAGGC
+CGGGTAAGACGCACCCGCGTCGCCACCCGGCGCAGCAGCACCGAATGAGGTAACAGGAAG
+CAACTATGCTCGATAAACAGACCCATACCCTGATCGCTCAGCGACTTAATCAGGCTGAAA
+AACAGCGTGAACAGATTCGCGCAGTGTCGCTGGATTATCCCAACATCACTATTGAAGATG
+CCTATGCCGTACAGCGTGAATGGGTCAATATCAAGATTGCCGAAGGGCGCACGCTCAAAG
+GCCACAAAATCGGCCTGACCTCAAAAGCGATGCAGGCCAGCTCGCAAATCAGCGAACCGG
+ATTACGGCGCGCTGCTTGACGATATGTTCTTCCATGACGGCGGAGATATCCCCACCGACC
+GTTTTATCGTCCCGCGTATTGAAGTGGAGCTGGCGTTCGTGCTGGCGAAACCGCTGCGCG
+GCCCTCACTGCACGCTGTTCGACGTCTACAACGCCACGGATTATGTGATTCCGGCGCTGG
+AACTGATTGACGCCCGCAGCCACAACATCGACCCGGAAACCCAGCGCCCGCGCAAAGTGT
+TCGACACCATTTCCGACAACGCCGCCAACGCCGGGGTGATCCTCGGTGGTCGCCCCATCA
+AACCAGACGAGCTGGATCTGCGCTGGATCTCCGCGCTGCTCTATCGCAACGGCGTGATCG
+AAGAAACCGGCGTCGCCGCAGGCGTGCTGAATCATCCGGCCAACGGCGTGGCGTGGCTGG
+CGAACAAGCTTGCCCCCTACGATGTCCAGCTTGAAGCCGGGCAGATCATCCTCGGCGGCT
+CGTTCACCCGCCCGGTGCCGGCGCGCAAGGGCGACACCTTCCATGTCGATTACGGCAACA
+TGGGCGCGATCAGTTGCCGGTTTGTGTAAGGAAAAAACGATGAAAAATGCTTTCAAAGAC
+GCGTTAAAAGCGGGGCGCCCGCAAATCGGTTTGTGGCTGGGGCTTGCCAACAGTTACAGC
+GCTGAACTGTTAGCGGGCGCCGGCTTCGACTGGCTACTGATTGACGGTGAACACGCGCCA
+AACAACGTGCAGACGGTGTTGACCCAGTTGCAGGCGATTGCGCCTTATCCCAGCCAGCCG
+GTGGTGCGTCCGTCATGGAACGATCCGGTACAGATTAAGCAACTGCTCGACGTCGGCGCG
+CAAACGCTGCTGATACCGATGGTGCAGAATGCCGATGAAGCGCGAAACGCCGTGGCGGCT
+ACGCGTTATCCGCCTGCCGGTATTCGCGGCGTGGGCAGCGCGCTGGCGCGGGCATCGCGC
+TGGAATCGCATTCCGGACTATCTCCACCAGGCCAACGACGCCATGTGCGTACTGGTGCAG
+ATTGAAACGCGTGAGGCGATGAGCAATCTGGCGTCAATTCTCGACGTGGATGGCATTGAC
+GGCGTGTTTATTGGCCCGGCGGATCTCAGCGCCGATATGGGCTTTGCCGGCAATCCGCAG
+CACCCGGAAGTGCAGGCGGCGATTGAGAACGCCATCGTGCAGATACGCGCGGCGGGGAAA
+GCGCCGGGGATTCTGATGGCCAATGAAGCACTGGCGAAACGTTATCTGGAACTGGGGGCG
+CTATTTGTCGCCGTCGGCGTTGACACCACGCTGCTGGCGCGCGGAGCGGAGGCGCTGGCG
+GCGCGCTTTGGCGCAGAAAAAAAACTGTCCGGTGCGTCCGGCGTCTATTAAGCCTGGGCC
+GGTAAGCGCAGCGCTACCGGGCAACCGTAGTACCCTACAAAATTCCCATCAGAGGAAAAA
+AAATGAGCGACACATCATCTGCACTTCCGGAAAGCCCCGAGTCTGTCGGTTCGCACAACG
+CGCTCAGCACGGGTCAACAAACCGTCATAAATAAACTGTTCCGCCGACTGATCGTATTTT
+TATTCGTGTTGTTTATCTTCTCGTTTTTAGACCGTATCAACATCGGTTTTGCCGGGTTGA
+CGATGGGGCAGGATCTGGGGTTAAGCGCCACCATGTTTGGTCTTGCCACGACGCTGTTTT
+ACGCCACCTACGTCATTTTCGGCATTCCCAGCAACGTGATGTTGAGCATCGTCGGCGCCC
+GCCGCTGGATTGCGACCATTATGGTGCTATGGGGCATTGCATCTACCGCCACGATGTTCG
+CGGTGGGACCGAAAAGCCTGTATGTGCTGCGAATGCTGGTGGGCATTACCGAAGCGGGCT
+TTTTGCCAGGAATATTGCTCTATTTAACCTACTGGTTCCCGGCATTTTTCCGCGCCCGCG
+CCAACGCATTATTTATGATTGCCATGCCGGCCACTACCGCGTTGGGGTCAATTGTCTCCG
+GCTATATTTTATCGCTGGACGGCATATTCAATCTGCATGGATGGCAGTGGTTATTCCTGT
+TGGAAGGATTTCCGTCAGTTTTGTTAGGCATTATGGTCTGGTTTTACCTGGATGATACCC
+CGGCAAAAGCCAAATGGCTGACGGCAGAGGATAAAAAATGTTTGCAGGAGATGATGGATA
+ATGATCGCCTGACGCTGGTTCAGCCTGAGGGGGCCATCAGCCATAACGCCATGCAGCAGC
+GTAGCCTGTGGCGCGAAGTATTCACGCCAATTGTACTGATGTATACGCTGGCCTATTTTT
+GCCTTACCAATACGCTTAGCGCCATTAGTATCTGGACGCCGCAAATCCTGAAAAGTTTTA
+ATGAAGGCAGCAGCAATATCACCATCGGCCTGCTGGCGGCGATCCCGCAGATTTGTACTG
+TTCTGGGCATGATTTACTGGAGCCGCCATTCGGACAAACATCAGGAGCGTAAACACCACA
+CTGCGTTACCGTTCCTGTTTGCCGCCGCGGGCTGGCTGCTGGCGTCGGCGACCGACCGTA
+ACCTGATCCAGCTCCTGGGGATCGTGATGGCATCCACGGGTTCCTTTAGCGCGATGGCGA
+TCTTCTGGACCACGCCGGATCAGTCGATCAGTTTACGCGCCAGGGCGATAGGCATTGCGG
+TCATCAATGCCACCGGCAATATTGGCTCCGCGCTCAGCCCGGTTATGATTGGCTGGCTAA
+AAGATATCACTGGTAGCTTCAATAGCGGACTCTGGTTTGTCGCTTCTCTGTTAGTCGTCG
+GCGCCGCCATTATCTGGCTCATTCCCATGAAAGCATCGCGTCCGCGCGCCACCCCTTGAG
+GAGAAACTATGTGCCAACGTGCGATCGCCAATATTGATATCAGCAAAGAGTATGACGAAA
+GCATGGGCAGTAACGATGTGCATTATCAGTCGTTTGCTCGTATGGCGGATTTCTTTGGTC
+GTGATATGCAGGCGCATCGCCACGACCAGTTTTTTCAAATGCACTTTCTTGATACCGGGC
+AGATTGAGCTACAGCTCGACGATCATCGCTATTCGGTGCAGGCGCCGCTATTTGTGCTAA
+CGCCGCCCTCGGTGCCGCATGCTTTTATTACCGAATCGGATAGCGATGGTCATGTTCTGA
+CGGTACGCGAAGAGCTGGTTTGGCCGCTGCTGGAAGTGCTTTATCCCGGCACCAGAGAGG
+CCTTCGGCCTGCCGGGAATCTGCCTGTCGCTGGCGGATAAACTCAACGAGCTGGCGGCGC
+TCAAACATTACTGGCAGCTAATTGAGCGGGAGTCCACGGAACAACTGGCTGGCTGCGAAC
+ATACCTTGGTACTACTGGCGCAGGCGGTATTTACCTTGCTGTTGCGTAATGCGAAGCTGG
+ACGATCATGCCGCAACCGGGATGCGCGGTGAACTGAAACTTTTTCAGCGCTTTACCCTGT
+TAATTGACAACCACTTCCATCAGCACTGGACGGTGCCCGATTATGCCTGTGAGTTGCATA
+TTACCGAATCTCGTTTGACCGATATTTGCCGACGTTTTGCTAATCGCCCGCCTAAACGCC
+TGATTTTTGATCGGCAATTACGCGAGGCGAAACGACTGCTGCTTTTTTCCGACAATGCTG
+TCAACGAGATCGCCTGGCAATTAGGTTTTAAAGATCCGGCTTATTTCGCCCGTTTCTTTA
+ATCGCCTTGCTGGCTGTTCTCCTTCGCAGTTTCGCCAACGTGAAGTTCCTTCTTTTCTCA
+ACTAAGAAGAGTAAAAACATGATGAAAAAAAGCGTCGCTATGCTGGCGGTTTGTATGCTG
+GCGCAAAGCCACCTTGCCATTGCTGCCGGTGCTCCTGCGCCTCAAGAGATCAACATTGTT
+TTACTGGGCACCAAAGGCGGGCCTTCTTTGCTCAATACAGCCAGACTACCGCAAGCGACG
+GCGCTCACTATCGGCGATAAGATATGGCTGATAGATGCCGGCTACGGCGCCAGTCTGCAA
+CTGGTGAAAAATGGCATTCCACTGCGCAACATCAATACTATTTTGCTCACCCATCTGCAC
+AGCGACCACATACTGGATTATCCTTCCTTGCTGATGAATGCCTGGGCAAGTGGCCTGAAA
+GACCATACCATACAGGTTTATGGCCCGCCGGGAACCCAGGCGATGACGAAGGCTAGCTGG
+AAGGTCTTTGACAGGGATATCACGTTACGCATGGAAGAAGAGGGGAAACCCGATCCGCGC
+AACCTGGTTAAGGCGACCGATATCGGCCAGGGCGTCATCTATAAAGATGAACTGGTCACA
+ATAAGCGCGCTGAAAGTGCCTCATTCCCCTTTCCCGGACGGTGAAGCGTTTGCTTACCGT
+TTTGATACTCAGGGTAAGCGAATCGTCTTCTCTGGCGATACGTCCTGGTTTCCTCCGCTT
+GCAACGTTTGCCCAGGGGGCGGATATCCTGGTACATGAGGCGGTACATGTCCCTTCGGTA
+GCAAAACTGGCTAATAGTATTGGCAACGGAAAAACGCTGGCTGAAGCGATTGCGTCGCAT
+CACACCACGATTGAAGATGTCGGTAAGATTGCTCGCGAGGCCCACGTGAAAAAACTGGTG
+TTAAGTCATCTGGTGCCTGCGACGGTTGCGGATGACGTCTGGCAACAGGAAGCCATGAAA
+AATTACCCGGGCCCTGTCATTGTCGGTCATGACAATATGACGATAAGCGTACCGTAAGCA
+TCAGGGCAGGGGTAAACCAACGGGCTGACATCATGTCAGCCCGTTATGCTTTTGGCGATC
+AGCATGACGCGCAGAGCTTTTGTCTAAAACATTATTTTTTCTAAACACTACTTGCTGATT
+TTTATGCACAATAAAGGTAAGATGATTGCTCTGGTGAAATATCACTAAATAAGAATGTCA
+CGGATGGAATTTATACAGTGTTAAAAGCAATGCGTTTCTTTGTCTGCCGGCCATCAGTAA
+TATCCTCTGTACCGGCTCCTCGTTGCCAAAGTGAAGGTTAGATTAAACGAATCAAAAAAT
+ATCCGAACTGTACAATTTTTATATCAAACATATTTAACTGCAAAATACTGCAAACGTGCG
+CGAATACACTTAATCAGTAGTATGGCTAATGCATACTTTATTTCCTAAACTTTCTTATTA
+TCTATTTGAATATTTCTGGTTAAACCAAATGGACAAACGGATACCAGACACTCGAAAGGA
+TAACTGCAAGCTTCTGGTTTAGGATTGCAGTTACCAGGACAGTGGCTATTTTCTTTAAGC
+GATTTCCATAGAACAGGATTAAACCTGGTAACGTTGTAAACAGTAACGGGATAAAGTTTA
+TCGTTTATTAAGACCCCCTTATTAAAGGGGATGATAATGCTACCTGGAGGCATGTAAACA
+TCTGCTTGATACCATAAGGTCAATCTAAGATTTTTTAGGCTATGTATGCATGAGAACGAC
+ACTCCGGTAGAGGGACTTATAACTGTATCAACGACCTTCCATTCCATTACAGAAACCTTT
+TAAGTGAAATAGTCAATGGTGTAAATTACATTTTCCGCAAGCCTATTCAGGATAGATGAA
+AATCTGAGAAACATAACTCGTTGAAAACGATCGTTTTTTATTGACCACTCTATTTATATT
+GATTTTTTAATAAGTTATTTATTTTATGCTCTTATTTTGTTTACATTCTATTACATTTTT
+GTATTTTATACGTGGTGTAAATATTGGGCGGTAGTGGATGTTATAACGCTCAATTATTAA
+ACTGTAAGTGACTGTTGTATTTCTTATTTATAAATACCGCAACCTGTAAGCGCAGCGGTA
+CATTTTACCCGGCATTGACAGGGAGAAGATACGTATCAACCAGAGAGTACAAAACAGTGA
+AAGAATATATTCTTCAGGCAATAAATGATCGCTAACGTGTAAAAATAACCGACGAATCTA
+AAAAATGTGAGCGTGTGGCTGGTCTTTATTTAAACGATGATCTTCGCAATGGCGCATTAA
+TGGCTGACATTGGCGGTTCGTTGATCGGCTTGTGCGCGGTGAAAGGCTATATTTCTTCCG
+TTGCGGGAAGTATGTCAGAGGCATTCTGGTATTATACCGGGTGTATGTAAATCCGGGGTG
+GGCAGAAAGCCCACCCTCGGTTTATTGCCTTTATCGTGGATTAGGGATGCGAGATAAAGC
+GAGAAAGGCGTTGCAGCAGTAACCGGTTTTCTTCGCGCAGCCGTGAATTCTCTTCCAGCA
+GCGTTAACGCGACCGCGATCCCTGGCCAGTCGAGCGCCAGCTCCTCGCGTAAGCGTAGCG
+CGCGTTGTACCACGCTCGCTGCGCGATCGTCGAATTGCCAGTCGGCGTTATCGTCTTCGT
+AAGGCTCAATTACGCCAAGTCCGACGATTTCGTTTAGCTCCTCTTCCGTCACGCCGGTGT
+GCAAACAAAATTCGGTGATGGTAAAGGTGACAGTGATGTTAGCCATTATGCTTTCCCCCA
+TTGCTGGCGTGGGTCAAAGGACGACTGCGCGTCCGCCAGTTGTTGCCACAGGGCAGCTGT
+TTTCTCGTCAGGTTTCGGCGGCATAACGATTTTGATGATGGCATAGAGATCGCCAGTGTG
+CTTTTTACTGGCTAATCCTTTTCCTTTGATACGCAGCCGCTGACCTGCCTGGCTGCCGGG
+GGGAATGGTCAGCAAAATACGCTCTTTAAGCGTTGGCACAGACACCTTAGCGCCGAGCGC
+CGCCTCCCATGGGGCAAGCGGAAGGACGACTTCCAGATCCTGATTGACGATATCAAAGAG
+CGGATGCGGGGCAATATGGATAACGAGCCATAAATCGCCATTAGGTCCGCCGTTTTCCCC
+CGGCGTGCCCTGGCCTTTCAGTCTGATTCGTTGCCCGTTGCTGACGCCAGCCGGGATTTT
+CACATTCAATGTTTTGGGAATTTCCCGCTCCACCAGGCCGAACGCGTTATAAACGGGGAC
+GGAATAGCTAATCGTACGCTGGTGCTCTTCCAGCGTTTCTTCCAGGAATACCGCCACTTC
+AATTTCGATATCATGACCGCGTGCGGCGTGGCGGTGGTGCGAATGACGACCGTGCTGACC
+AAAAATAGACGAGAAAATATCATCAAAATCTTCGGCGTTATACGGCTGGCCTTCGTGTTG
+CTGGAACTGGCGATTAAATTGTGGATCGTTACGGTGTTGCCATAACTGGTCATACTCGGC
+GCGCCGTTGCTCATCACTCAGCACTTCCCATGCTTCAGCAACCTCTTTGAAACGGGCTTC
+GGCATCGGGTTCTTTGCTGACATCTGGATGGTACTTGCGGGCCAGTCGGCGATAGGCGGT
+CTTAATCGTCTTGAGATCGTCCGTCGGTTTCACGCCCATAATGGCGTAATAATCCTTAAG
+TTCCATAGCATCATCTCGCTAAATCAATACATACAGAAGGGACCCCAAAAAGGTTTCTCC
+ACTAAGTGTAGGGTAAACCTGAAAAGTGCGTATGAAAACACCAGTTATATCATTAGTAAG
+AATAAATTACGTTGTTCGACTATCAGAAGGTTGCGCAGCGCGCCGACATAACTTTACAGG
+GGAAAGGTTGCCAAAACCGCGCCAGTGGCTAAGATAACTCGCGTTAAACAGTGAGGGCGC
+AATGGCGAAACAACAACGGATGGGCTGGTGGTTTCTTTGCCTTGCATGTGTCGTGGTAAT
+GGTTTGTACCGCGCAACGCATGGCGGGCCTGCACGCCTTGCAGATGCAGGCGACGGCCTC
+TGCTGCGGTGGTCAGCGCTCCCTCCTCGACAGATGACGGCTCGCCGGTCACTCCCTGCGA
+ATTAAGCGCCAAGTCGCTGCTGGCGGCGCCTCCAGTACTCTTTGAAGGTGCTATCCTTGC
+GCTTTATCTACTGCTTTCCTTACTGGCGCCTGTCCGGGTCATGCGCCTGCCGTTTTCGCC
+TCCACGGGCTATTTCGCCGCCCACATTACGGGTACATCTACGATTTTGTGTCTTCCGTGA
+ATGACAGACCGGTTATTACTAACGGTTAATTACTCATTCACGGAGAAAAAATATGATGAT
+TTTATTCAGGCGGATACTGTTCTGCCTGTTATGGCTTTGGCTGCCCGTCTCCTGGGCGGC
+GGAAAGCGGCTGGCTGCGTTCGCCCGATAACGACCATGCCAGCATACGGCTACGTGCCGA
+TACGTCCGCTAACAGTGAGACCCGGCTGTTGCTGGATGTCAAACTGGAAAACGGCTGGAA
+AACCTACTGGCGCGCGCCGGGGGAAGGGGGCGTGGCACCCTCTATCGCCTGGAAAGGCGA
+CATGCCTGAGGTAAGCTGGTTCTGGCCAACCCCCTCGCGCTTTGATGTGGCGAATATCAC
+CACCCAGGGATATCACGACGAGGTGACCTTTCCGATGATCGTGCGCGGTACGCCGCCGGC
+GACCTTGCGCGGTGTGTTGACGTTATCAACCTGCAGCAATGTTTGTCTGTTGACCGATTA
+CCCCTTTTCCGTGACGCCCACTGTGCAGAATGCCGATTTTGCCCATGACTATGCGCGGGC
+GATGGGTAAAGTTCCGCTCCGCAGTGGGCTAACGGACTCGCTTGACGTTGGCTATCGCCC
+GGGAGAACTGGTGGTCACTGCTACGCGAGCGGCGGGCTGGTCATCGCCCGGGCTCTATCT
+TGACACCATAGATGACGTCGATTTTGCGAAGCCTCGCCTGCGCGTAGAGGGCGACAGGTT
+ACAGGCGACGGTGCCGGTGACGGACAGTTGGGGCGAAAAGGCGCCCGATTTGCGCGACAA
+ATCGCTGACCCTCGTGTTAGCCGATGGCGCTATCGCCCAGGAGAGCACGCAAACCATTGG
+CGCTGCGCCAGCGCAAACGCCGGACAATGCGGCGCTACCTTTCTGGCAAGTTGTAATGAT
+GGCGCTAATCGGCGGACTGATTCTTAATTTAATGCCCTGCGTACTGCCTGTTCTGGGCAT
+GAAACTTGGCTCTATTTTATTGGTAGAGGAAAAAAGCCGCTCTCACATCAGGCGACAATT
+TTTGGCTTCGGTCGCCGGTATCATTGCGTCATTTATGGCGCTGGCGGCGTTTATGACCCT
+CCTTCGCCTGTCAAACCATGCGCTGGCCTGGGGAGTCCAGTTCCAGAATGCATGGTTTAT
+TGGTTTTATGGCGCTGGTGATGTTGTTGTTTAGCGCCAGCCTGTTCGGGCTTTTTGAGTT
+CAGGCTTCCCTCATCTATGACCACGAAACTGGCCACTTACGGCGGTAACGGTATGTCGGG
+ACATTTCTGGCAGGGGGCGTTCGCCACGCTGCTGGCGACGCCTTGTAGCGCGCCGTTTCT
+GGGCACGGCGGTCGCGGTGGCGCTCACGGCGTCGCTGCCGACGCTGTGGGGGCTGTTCCT
+TGCGCTTGGCCTGGGAATGAGCGCGCCGTGGCTACTGGTCGCGATACGACCAGGGCTTGC
+GCTACGTTTACCGCGCCCCGGGCGTTGGATGAATGTCCTGCGCAGGATCCTCGGTCTGAT
+GATGCTGGGGTCGGCTATCTGGCTGGCGACGTTACTCCTGCCGCATTTCGGCTTCACTGC
+GTCAAAGAGCGCGCAAGACACGGTTCAGTGGCAACCGTTGAGTGAACAGGCAATCCAGTC
+GGCGCTGGCGCAGCATAAGCGGGTATTTGTCGATGTCACTGCGGACTGGTGTATTACCTG
+TAAAGTGAATAAATACAACGTCCTGCAAAAAGAGGATGTGCAGGCCGCCTTGCAACAGCC
+GGATGTTGTGGCGCTGCGGGGAGACTGGACGCTGCCGTCCGATGCCATTACAGATTTTCT
+GAAAACGCGCGGCCAGGTCGCCGTGCCGTTTAATCAGGTATATGGCCCCGGTTTGCCGGA
+AGGGGAGGCACTGCCCACTTTGCTGACCCGCGATGCGGTATTACAAACGTTGAAAAAAGC
+GAAAGGAATAACCCAATGAAATACATGATTGTTTTACTGCTGGCGCTGTTTTCGACGCTG
+AGCATCGCGCAAGAAACCGCTCCTTTTACGCCGGATCAGGAAAAGCAGATTAAAAATCTG
+ATCCATGCGGCGTTGTTTAACGATCCTGCCAGCCCGCGGATAGGCGCTAAACACCCTAAG
+CTGACGCTGGTGAACTTTACGGATTACAACTGCCCGTACTGCAAACAGCTCGATCCGATG
+CTGGAAAAGATTGTGCAGAAATATCCTGACGTTGCGGTCATTATTAAACCGCTGCCATTC
+AAAGGAGAGAGTTCCATACTGGCGGCGCGTATTGCGCTGACCACCTGGCGCGATCATCCG
+CAACAGTTCCTCGCGCTACATGAAAAACTTATGCAAAAGCGCGGTTACCATACGGATGAC
+AGTATTAAACAGGCCCAGCAGAAAGCAGGGGCGACGCCAGTGACGCTGGATGAAAAAAGC
+ATGGAAACGATACGCACTAATTTGCAGTTGGCAAGACTGGTCGACGTGCAAGGAACGCCA
+GCGACGATCATTGGCGACGAGCTGATTCCGGGCGCAGTGCCCTGGGATACGCTGGAAGCG
+GTGGTGAAAGAAAAACTGGCGGCTGCCAATGGCGGGTAAACTGCGGCGTTGGCTGCGTGA
+AGCCGCGGTTTTTCTGGCGCTCCTCATCGCGATAATGGTGGTCATGGACGTCTGGCGCGC
+GCCGCAGGCGCCTCCGGCGTTTGCCGCGACACCATTACATACGCTGACGGGAGAGTCGAC
+AACTCTGGCGACCTTGAGCGAGGAACGCCCCGTACTGCTCTATTTTTGGGCCAGCTGGTG
+CGGGGTATGCCGCTTTACCACGCCTGCGGTCGCTCACCTGGCGGCGGAAGGGGAAAACGT
+CATGACCGTTGCGCTCCGCTCCGGCGGTGATGCTGAGGTTGCCCGCTGGCTGGCGCGCAA
+GGGCGTTGACTTCCCGGTCGTCAATGATGCTAACGGCGCCTTATCCGCTGGCTGGGAAAT
+CAGCGTGACGCCAACGCTGGTGGTGGTTTCACAAGGTCGGGTTGTGTTCACCACCAGCGG
+CTGGACCAGCTATTGGGGCATGAAGCTTCGGCTGTGGTGGGCAAAAACGTTCTGAATATG
+CGCCGGGGTTTCCCGGCGCTAACGCGTTTACTGTAAGAAAAACCTCCGTTTTGCGAAATC
+GTTCCCGGAAAAATGATCCATTTCTGTCACACTCAGAACGATTTGATAACAACAAGAGGT
+CATAGGGATGAAAAAATCATTACTCGCTGTTGCTGTGGCAGGGGCTGTTTTGTTGTCATC
+CGCCGTACAGGCGCAGACAACGCCGGAAGGTTATCAATTACAACAGGTGCTGATGATGAG
+CCGCCATAATCTGCGGGCGCCGCTGGCGAATAATGGCAACGTACTGGCGCAGTCGACGCC
+GAACGCCTGGCCGGCGTGGGACGTTCCCGGCGGGCAACTGACGACGAAAGGCGGCGTGCT
+GGAAGTCTATATGGGACACTACACACGTGAATGGCTGGTCGCGCAGGGGCTGATACCGTC
+GGGAGAATGTCCGGCGCCCGACACGGTATATGCCTATGCGAATAGTTTGCAGCGCACCGT
+CGCCACCGCGCAATTTTTCATTACCAGCGCTTTCCCCGGCTGTGATATTCCTGTTCATCA
+TCAGGAAAAAATGGGCACTATGGACCCTACCTTCAATCCGGTGATTACCGATGATTCCGC
+CGCGTTCCGGCAACAGGCCGTACAGGCGATGGAAAAGGCGCGTAGTCAGCTACATCTTGA
+TGAGAGTTATAAACTGCTTGAGCAGATAACGCATTATCAGGACTCGCCGTCCTGCAAAGA
+GAAGCATCAGTGTTCGCTAATCGACGCGAAAGATACCTTCAGCGCGAACTATCAGCAAGA
+GCCTGGCGTGCAGGGGCCGCTGAAAGTAGGGAACTCGCTGGTGGATGCGTTTACCCTGCA
+ATATTACGAAGGCTTTCCGATGGATCAGGTCGCTTGGGGCGGGATCCACACCGATCGGCA
+GTGGAAGGTGCTGTCAAAACTGAAAAACGGCTACCAGGACAGCCTGTTTACCTCACCCAC
+GGTGGCGCGCAATGTCGCTGCGCCGCTGGTAAAATATATCGATAAGGTGCTGGTTGCCGA
+GCGCGTTAGCGCGCCGAAGGTTACCGTGCTGGTGGGGCATGATTCCAATATCGCGTCGCT
+GCTGACGGCGCTGGATTTTAAACCCTATCAGCTCCATGACCAGTATGAGAGAACGCCGAT
+TGGTGGTCAGCTTGTCTTCCAACGCTGGCATGACGGTAACGCTAACCGGGATTTGATGAA
+AATCGAGTATGTCTACCAGAGCGCCCGGCAGTTACGTAATGCGGAAGCGTTAACGCTCAA
+ATCGCCTGCGCAAAGGGTAACGCTGGAACTGAAAGGATGTCCGGTGGATGCGAACGGCTT
+CTGTCCGCTGGATAAGTTCGATAACGTCATGAACACTGCTGCAAAATAGCCGTATGCCCC
+CGCGCAGGCGGGGGCGTTTGTGTTATACGTTCTTACGTTCGATGACTTGTTCGCCCCAGA
+AGAGCGAGTCTTTGTCCGTTTTCTCAAAGGCTTTGATCAGCACCTCATCGCTACCTTCTT
+CCCATATTTTTTCTGCCAGTTTCTCGTCATAGTGAGCGACTTCAAAAATGGCTTCGGCAA
+TTTCCGGCGACGTATTACGCAGGCTTGCCCATTCGCCGACGCGGTGCGCTTTTGCTTCTT
+GAGTTGGCATTCGAATCCTCCTGTTGAAGATTAGCCGTTGAGTTTGACTGCCAGACCGGC
+GACGTATTCCCCCTGATAGCGAGCGATAGAGAGTTCCTCCTGGCTTGGTTGACGTGAACC
+GTCGCCTCCAGCGATAGTCGTTGCGCCGTAAGGCGTACCGCCGCGAACCTGGGAGACGTC
+AAACAGTTCCTGTGCGGAATAGCCTATCGGGACAATCACCATCCCATGATGGGCAAGCGT
+AGTCCAGGTCGAGGTGATGGTCTGCTCCTGGCCGCCGCCCGTTCCGGTAGAACTGAACAC
+GCCGCCGAGCTTGCCGTATAGCGCGCCGGATGCCCACAGTCCGCCGGTTTGGTCCAGGAA
+GGTACGCATCTGGCCTGACATATTGCCAAACCGGGTTGGCGTACCAAAAATAATGGCATC
+GTAATCCGCCAGCTCCTGTGGGGTGGCAACCGGTGCGTTTTGCGTTTTACCGCCAGCTTT
+TGCGAAGATTTCAGGCGGCATTGTTTCTGGCACACGCTTTATAATGACCTCTGCGCCGTC
+GACTTTCTTTGCCCCTTCCGCCACCGCGTGCGCCATGGTTTCAATGTGTCCGTACATGGA
+ATAATAGAGCACCAGAATCTTTGCCATTTGTACTACTCCTCAGATTAATCAGGTGTGTAG
+CGATTCGCTACGTCTATTTAAAGATATGCCCTCCTGTGAAGAGTGCAAATTTCAGCGCCA
+TTTCTTTGATTTATAACAATAATTAATTTGGCGACCTTTGTTGCAAAATGATACATTTTT
+AAGCGCTTTGATTTTTCCAAGTATAAGAATAACTTATTTATTTCTCATGGTTATTATTCT
+GCATATTCGGCTTCTGATGTTGCAGAATATTTCGGTAAGCGGCCTACCACGGCGTTTTTC
+ACTATGCTTAATTTTACGCGGCGTTACTGATGATATCGTTCATACGCGGCGCGAGGAGAT
+ACTCCTCATTACCTATGCAATATGATGTCTAATCTATGACGGAGGTCAGTAATGGCAAAC
+CATCGTGGCGGTTCCGGTAATTTTGCGGAAGACCGCGAAAGAGCATCAGAAGCAGGTCGT
+AAAAGTGGTCAGCACAGCGGGGGCAATTTTAAGAATGACCCGCAGCGTGCATCCGAAGCA
+GGCAAAAAAGGGGGCAAAAGCAGTAACCGTAATCGCTAGCCTGCGTCACAACAGCAAGAC
+GCTGAGCGTTTCGCGCTAAAAAGCGTCATGCGCCACCGCCGCCGGATTTCCGGCGGTTTT
+TTTGTTTATAAGCCGGATTTGGTATGCTTGCGTCCTGACGAAAAGGAGAGGGCGCATGTC
+GCAACGCACAGAGAAAAAAATCGGGAAACGTTCGCAGGCCACCGGTGCAAAACGGCAGCT
+TATCTTAACCGCCGCGCTTGCCGTTTTTTCCCAGTATGGCATTCATGGCGCGCGTCTTGA
+ACAGGTCGCCGAGCGGGCAGGCGTCTCCAAAACCAATCTGCTTTATTATTATCCCTCGAA
+AGAGGCGCTGTATGTCGCGGTAATGCGACAGATTCTGGATGTCTGGTTGGCGCCGCTCAA
+GGCGTTTCGCGCAGAATTTTCCCCTCTGGAGGCCATCAAAGAGTATATCCGTCTCAAGCT
+GGAGGTTTCGCGTGATTATCCGCAGGCGTCGCGGCTCTTCTGCATGGAGATGCTGGCGGG
+CGCGCCGCTCTTAATGGATGAACTGACCGGCGATCTAAAAGCGTTGATAGATGAAAAATC
+CGCGCTGATTGCCGGATGGGTGCACAGCGGGAAACTCGCGCCCGTTTCTCCGCATCATTT
+GATCTTCATGATTTGGGCCGCCACGCAACATTACGCCGATTTCGCCCCTCAGGTTGAAGC
+GGTAACCGGCGCGACGCTTCGCGATGAAGCCTTTTTCAACCAAACGGTCGAAAGCGTTCA
+GCGCATTATTATTGAAGGGATTCGCGTGCGTTAACCGGCTGGCGGCAGTGGGCAGCTCAA
+ATCACCCTCTTCACATTGCAGCAGCGAGGCTAAAAACGCTTCGCGTTCGTTCGTTTTATC
+GGTCAGGCACTGGCTGGCGATCATGGGTTGAACGCTGCCGCCCTCCGTACCTGAGCGAAT
+CAGCGCGCAATCGGCGTCGCGCAGGGCAATCCATGCCACCTGCGCTTTTTGCAATAGCTC
+ACGCTGCGGCGGTTGCGCACGCTTAATCGCGCTTTGATAGGTTTCGTTCAGCTTTTTATC
+TGCCGCCTGGTATTGCGCGGCGGCGCAGCGATTCATTTCCAGCTGCGTACTGGCGCTGGC
+ACACTCATCGGCCAACGCCTGACTGCTGAACAACAACGCCGCGCAGGTAAGGAAAATTCG
+TTTCATACGGTCCTCTGACGCGAGTAAAAAGGCTCCATTGACGGAGCCTTAATTAACCAC
+AGTATTAGCCAATTGTCATCAGGCTGGCATTACCGCCAGCGGCGGCAGTGTTTACGCTCA
+GCGAACGTTCAATATAGAGCCGTTCCAGCAGCATATTGCTTTCGCCGCGGGCGAACCCCT
+GTACCGACACTATCGCGCCTTCGCGGGCGGCGACGGCTTCGCACACGGTTCGCAGCTTGT
+CGGAGTCGCCGTGGAAAATCACCGCGTCAAACGGCTGCGCCATCAGCGTTTCCGCTTTCG
+CAAACTGGACACGCGCCGCGACGGCGGCGGGGAGACGTTTCGCCAGATCGCGGTGGAAGG
+CGTCGTCTGACCATAGCGCCTGACTGCCGACGGCGAGAACGGCGGCAAGCTGCGTCAACG
+CGTCCTGTTCATCATCAGCCAGGCATAACACCCGTTCACGCGGCAACAGCGTCCAGGTAT
+TACGCTCGCCGGTCGGCCCCGGTAGCAGGCGCTGCGTGCCGGCCTGCGCCAGATCGGCGA
+ATTGTCGGCAGAGCGTCTGTAGCGCCGGGCGATCCGCCGCCCATTGCGTCAGAGCGGTCA
+ACGGCGCGAGTAGCGTGGTTTTAAGCTGCGCATCCACCGGGTAACGCGCATCCTGACGAG
+TCAGCGTCGTATTGAGCGCATTGGGCGGGCGGTGTGCCAGCAGGCGGTAGAGATAGAGCG
+GCCCTCCCGCTTTTGGCCCGGTGCCGGACAGGCCTTCGCCGCCAAACGGCTGGACGCCGA
+CGACCGCGCCCACCATATTACGGTTAACGTACAGGTTGCCGACATGGGCGGAACCGGTGA
+CTTGCGCAATGGTTTCATCAATACGGGTATGTACGCCCAGCGTTAGCCCGTAGCCGGAAG
+CGTTAATCTGTTCGATAAGCTCCGCCAGTTGGTTACGGTTATAACGCACGACGTGCAGCA
+CGGGCCCGAAGACCTCTTTTTCCAGTTCTGCGAAGTTTTCCAGCTCAATAAGCGTGGGCA
+TAACAAACGTACCGGTCTGCCATTCCTGCGCGTCATCGCTGTTTTCACGCGCGGCCTGGA
+AAACCGGGCGGCCTTTGGCGCGCATCGTCTGGATATGACGTTCAATGTTGGCTTTGGCCT
+CGCTATCGATCACCGGCCCGATATCGGTCGTCAGACGGCCTGGATTCCCCATCCGACACT
+CCGCCATCGCGCCGCGTAACATTTTCAGCGTATGTTCGGCGATATCGTCCTGCAAACACA
+GCACGCGGAGCGCGGAACAGCGTTGTCCGGCGCTGTCGAAGGCGGAAGCCAGCACATCCA
+CGACCACCTGCTCGGTGAGCGCGGAAGAGTCGACAATCATAGCGTTCATACCGCCGGTTT
+CCGCAATCAACGGAATAGGGCGCCCCTGGGCGTCAAGACGCGTGGCGATGTTGCGCTGCA
+ACAACGTCGCGACCTCCGTGGAACCGGTAAACATCACGCCGCGTACACGCGCATCGGCGG
+TAAGCTGGGCGCCGACGGTTTCTCCCCGTCCCGGCAACAGTTGCACGACGCCCGGCGGTA
+CGCCCGCTTCCAGCAAAATGGCAATGCCCTGGGCGGCAATCAGCGATGTCTGCTCTGCCG
+GTTTCGCCAGAACGCTGTTACCTGCCGCCAGCGCGGCGGCGATTTGGCCAGTGAAAATGG
+CCAGCGGAAAGTTCCACGGACTGATACAGACCACCGGCCCTAACGGGCGATGCGTTTCGT
+TATCGAAATCGTCACGCACTTGACCGGCATAATAATGGAGGAAGTCTACCGCTTCGCGCA
+CTTCGGCAATGGCGTTGCTGAACGTTTTCCCCGCTTCACGCACCAACAGGCCAATCAACT
+GCTGCATTTGGTCTTCCATCAATACCGCCGCCCGCTGCAAAATAGCGGCGCGTTCTTGCG
+GCGGCGTCGCAAACCAAACCGGCGCCTGATTGACCGCGTTTTGCAACGCCTGTTCAACCT
+CGCTTTCTGTCGCTTCGCGTCCCCAGCCAACAATATCTTTCGGTTCCGCCGGGTTGATAA
+CCGGCGTCATCTCACCGTCGGCCACCGGTTGTTCCAGCACAGGTTTGGCCTGCCATTTCT
+GCATGGCGTTGCTTAACAGGGCAGAAGAAAGCGAGGCGAGGCGATGTTCATTCGCTAAAT
+CAAGTCCGGCGGAGTTTATCCGACCTTCGCCGTACAGATCGCGCGGCAGCGGAATTTTTG
+GATGCGGTATGCCAGCCTGACCTTCCTGCTGCGCCAGTTTTTCCACGGCCTCGACCGGGT
+CGGCCACCAGTTCATCGAGCGGTAGGGTGGCATCGGCGATGCGGTTGACAAAAGAGGTGT
+TGGCGCCGTTTTCCAGCAGGCGTCGTACCAGATAGGCCAGCAGGGTTTCGTGTGTTCCCA
+CCGGCGCGTAAATACGGCAGGGACGGTTAAGTTTTCCGTCCCCCACTTTACCGGTGACCT
+GTTCATACAGCGGTTCTCCCATGCCGTGCAGGCACTGGAATTCGTACTGACCCGGATAGT
+AATTTTGCCCGGCCAGATGATAAATCGCCGCCAGTGTGTGAGCGTTATGGGTCGCGAACT
+GCGGGTAGATCAGATTAGGGACGGCGAGCAGTTTTTTCGCGCAGGCCAGATAAGAGACAT
+CGGTATACACTTTGCGGGTATAAACTGGATAGCCCTCCAGCCCTTCCATTTGCGCGCGTT
+TGATCTCGCTATCCCAGTAGGCGCCTTTCACCAGACGAATCATCAGCCGACGGCGGCTAC
+GGGAGGCCAGATCGACTAAATAATCAATGACCAGCGGGCAGCGTTTCTGGTAAGCCTGAA
+TCACAAAGCCAATGCCGTTCCAGCCCGCCAGTTCGGGTTCGAAGCAGAGTTTTTCCAGCA
+GATCAAGCGAGATCTCCAGACGATCCGCCTCTTCGGCGTCGATATTGAGACCGATATCAT
+ACTGGCGCGCCAGCAGCGTCAGGGATTTCAGGCGCGGATAAAGCTCCTCCATTACCCGAT
+CGTATTGCGCGCGACTATAGCGTGGATGCAGGGCGGACAGCTTAATCGAGATGCCTGGCC
+CTTCGTAAATACCGCGACCGTTAGACGCTTTGCCGATGGCATGAATCGCTTGCTGGTAAG
+AGACCATATAGGCCTGCGCATCGGCGGCGGTTAACGCGGCTTCGCCCAGCATATCGTAAG
+AATAGCGGAACCCTTTCTCTTCCAGTTTTCGGGCATTCGCCAGCGCCTGAGCAATGGTTT
+CGCCAGTCACGAACTGCTCGCCCATTAAACGCATCGCCATGTCGACGCCTTTGCGGATTA
+ACGGTTCGCCGCTCTTGCCGATAATGCGGTTCAGCGAGCGCGAAAGATTGGCTTCGTTAT
+GCGTTGAGACCAGTCGGCCGGTAAAGAGCAGCCCCCAGGTGGCGGCGTTTACAAACAGCG
+ACGGGCTACGGCCAATATGCGACTGCCAGTTGCCATTACTGATTTTGTCGCGAATTAACG
+CATCGCGCGTAGCTTTGTCGGGAATACGCAGCAGCGCTTCCGCCAGACACATCAGCGCTA
+CGCCTTCTTGCGAAGAGAGGGAAAACTCCTGCAACAGGCCTTGCACCATACCCGCGCGAC
+CGCTGGCGGATTTTTGATTGCGCAATTTCTCCGCCAGTTGATACGCCAGTTTATGCGCCT
+GCTCAGCGACAGGCGGCGACAGGCGCGCCTGTTCCATTAGCATTGACACCGCATCGGTTT
+CCGGGCGGCGCCAGGCGGCGGTGATGGCGGCGCGAGAGACGGATTGGGGAAGAATCTGTT
+CGGCAAACTCCAGAAAGGGCTGATGCGGCTCATCCTGCGGCGCGACCGGCTCCTCGCTTT
+CATTTGCCGCGCCGGCAAACAGCGCAGGTAGCTCCGGTAGCGTATCGCTATTTTCCAGCT
+TGTCCAGATAGCTAAAGATTGCCTGTTTTATTAACCAGTGCGGCGTGCGATCGATACGCG
+ACGCGGCCATTTTGATCCGTTCGCGCGTGGCGTCGTCCAGCTTAACCCCCATCGTGGTGG
+TTCCCATACCCTCTACTCCTGTTATTCGCTCTATCTGCTAACCAATAGTTAGCGGAAAAT
+ATCCACTATATTGCAACTTTGTGCAACCGCGTTAAATGTGACCTGCGTTGCAAGCTTAAA
+AATGAATAAATTGTTAAAAAAAGAAAGTGGGAGTCTGACGGGGAAAACCATCTGGATTTA
+TTTTCTCTGCGGTAGTTAACACTTTTAAAAGGTGCAACCGCAAAAAATGTGAGAGAGTGC
+AACCTGGAGAAAAATAGTATCCCTCTGCAATCAAATTTGATGTAAATGGTGTGTTAAATC
+GATTGTGAATAACCACCGATTCCGGCAGGATACGGTCGCCCTGGTAAACATAACACCCTT
+GCCACGTTCCGGCAGGGTACAAAACGGCACGCTACGGTAGTGCCAATAAATAAATTTGGA
+GAACCTTGATGGCTATTAGCACACCGATGTTGGTGACATTCTGTGTCTATATTTTTGGCA
+TGATATTGATTGGGTTTATCGCCTGGCGCTCAACCAAAAACTTTGATGACTATATTCTTG
+GCGGTCGCAGCCTGGGGCCGTTTGTTACGGCTTTATCAGCCGGCGCGTCGGATATGAGCG
+GCTGGCTGTTAATGGGGCTGCCTGGCGCTATCTTTCTGTCGGGGATCTCTGAAAGCTGGA
+TCGCCATTGGCCTGACGTTAGGCGCATGGATTAACTGGAAGCTGGTGGCCGGGCGCCTGC
+GCGTGCATACCGAATTTAACAATAACGCGCTCACGCTGCCGGACTATTTTACCGGTCGGT
+TTGAGGATAAGAGCCGAGTCCTGCGTATTATTTCCGCGCTGGTCATTCTGCTGTTTTTCA
+CTATCTATTGCGCATCAGGTATTGTCGCTGGGGCACGACTGTTCGAAAGCACCTTCGGTA
+TGAGCTATGAAACCGCACTGTGGGCGGGGGCCGCGGCAACCATTATTTATACCTTTATCG
+GCGGGTTTCTTGCCGTTAGCTGGACGGATACCGTTCAGGCCAGCCTGATGATTTTTGCGT
+TAATCCTGACGCCGGTGATGGTTATTGTCGGCGTAGGCGGTTTTAGCGAGTCGCTGGAAG
+TGATCAAGCAAAAGAGCATCGAGAATGTCGACATGCTCAAGGGGCTGAATTTTGTCGCTA
+TTATTTCTCTGATGGGCTGGGGGCTGGGTTACTTCGGTCAGCCGCATATCCTGGCGCGCT
+TTATGGCGGCGGATTCCCATCACAGTATTGTTCATGCGCGTCGTATCAGTATGACCTGGA
+TGATTCTGTGTCTGGCGGGCGCGGTGGCGGTGGGCTTCTTTGGCATTGCGTACTTTAACA
+ATAACCCCGCGCTGGCCGGGGCGGTGAACCAAAACTCAGAACGCGTATTTATTGAACTGG
+CGCAGATCCTGTTTAACCCGTGGATTGCCGGTGTTCTGCTGTCTGCTATCCTGGCGGCGG
+TGATGTCGACGTTGAGCTGTCAGTTGCTGGTATGCTCCAGCGCGATTACGGAAGATTTAT
+ATAAGGCTTTTCTGCGTAAAAGCGCCAGCCAGCAAGAGCTGGTATGGGTAGGGCGAGTGA
+TGGTGCTGGTGGTAGCGCTGATCGCCATTGCGCTGGCGGCGAATCCTGATAACCGTGTGC
+TGGGGCTGGTGAGCTACGCCTGGGCTGGATTCGGCGCGGCATTTGGACCTGTTGTCCTGT
+TTTCTGTGATGTGGTCGCGTATGACACGTAACGGCGCGCTGGCGGGAATGATTATTGGCG
+CGGTGACGGTTATCGTCTGGAAACAATATGGCTGGCTGGATCTGTATGAGATTATCCCTG
+GCTTCATTTTCGGCAGCCTGGGGATCGTAATCTTTAGCCTGCTTGGCAAAGCGCCGACAG
+CAACGATGCAGGAACGCTTTGCAAAAGCGGACGCGCATTATCATTCCGCGCCGCCGTCGA
+AGCTACAGGCGGAATAACCGACATGTCCGATAGCATTATTGCCATCGGATATATTTTACC
+AGGCGGCGTTATACGCCTGCCTGGTCCTCTGATAAGTCCCGGACCGATTGACTGAAGGTG
+TTCAGGTAAATGAGGACGCTCTCTCAAACTATTTTTAGCCCTTGAGCGCCGTTAACGCGG
+CCGTAATACGTCTCGAGTAATCACTAACTGACCATATATGAAATCGCCTGTTAATGGTAC
+CAATAGCCTTGACGCAATAGAGTAATGACAAAAATTAAAACGAGTCAGCGTTACTGGCGT
+AAGTATGCCGCACAAAATTTTGCATAAATAATGCCGTTTTAGCGATGGGAGAGAGGACAC
+GTTAATTACTCCGTTTTAATCTTTTATATGTTGAATATTCAATGGGTTATGGGTGTTTTC
+ACCATTAATACCCATAGTAGCTAATGATTATCTTTTTTAGTCTCCTGCCAATGAAATAAT
+TGTGTAATCTTTCTGTAAGAGACTGACAATGACGCAATAATGTTTGGTTAATGTTTGGTG
+AATATATTGTTGCATTATTGATGTTTTGTGTTGTACTTAGTAGTAATAGCGGTAGTTCCC
+CGGCAGTGATGGTCACTCACTATGGAGATCGCGAATGGTAATGTCCGCACCAGGACACAT
+TGTTTACAGTAGTTACAACACCCTGTACGGACATTCTCTCTCCGGTGGTGGTCTTGTCAT
+CTTAAAAGCTCTCATCATTTCCCTTACTGTCCATACCCATGACGCCATATGTGGTGCGCG
+TAGCCGTGTGTGGCGTCGTTTCAAAAAGCAAGCTAAGGCTTACAAGGAAGCCAACCCTCA
+GATGTGTGTGCGCATAATCGCGTTCAAGAGAACGCGGGTGATGTATACCTACAACTCAAG
+GTGCTATCCATGGGAAGACAAAAAGCAGTGATCAAAGCTCGTCGTGAAGCAAAGCGTGTG
+TTGAGACGAGATTCGCGTAGTCATAAGCAACGTGAAGAAGAATCGGTCACGTCACTGGTA
+CAGATGGGCGGAGTAGAAGCCATTGGCATGGCGCGCGATAGTCGCGATACCTCTCCTGTT
+AAGGCGCGAAATGAAGCACAGGCGCATTATCTGAACGCTATCGACAGTAAACAGCTTATT
+TTTGCGACCGGCGAAGCCGGCTGCGGAAAAACATGGATCAGTGCGGCAAAGGCGGCAGAA
+GCATTGATTCATAAGGACGTCGAGAGGATCATTGTGACGCGTCCGGTATTGCAGGCTGAT
+GAAGATCTTGGTTTTTTGCCCGGTGATATCGCTGAAAAATTCGCGCCTTATTTTCGTCCC
+GTCTACGATGTCCTGCTTAAACGGTTGGGCGCGTCCTTTATGCAATATTGTTTGCGCCCG
+GAAATCGGTAAGGTAGAAATTGCCCCGTTCGCCTATATGCGTGGGCGTACTTTTGAAAAT
+GCGGTCGTGATCCTCGACGAGGCGCAAAATGTGACTGCGGCGCAAATGAAAATGTTTTTG
+ACGCGATTAGGCGAAAATGTCACGGTCATTGTCAATGGCGATATTACGCAATGCGACCTG
+CCGCGCGGTGTGCGTTCCGGGTTGAGTGATGCGTTGGAACGCTTTGAAGAAGATGAAATG
+GTGGGGATTGTGCATTTCAACAAAGACGACTGCGTGCGCTCGGCGCTTTGTCAGCGAACG
+CTCCACGCATACAGCTAATATAGCATTGACTTTCAGAGCCCGGGAGACCGGGCTTTGTTG
+TCTATAAAGCGTCCAGCAGACTCATGGACAAAAAAGGAAAATGACGCCTGTACGTGAAGG
+GAATTATTTTGTCATGTCCAGGGCATTCATTGTCCGTAATTTGCTTTCTCGGGCCTGTTC
+CGGCGAGGACTGTACAAGAAGGGTATAGAGCAGGTCAAAGACGAAAAGCTGCGCGGCTTT
+CGTACCGATCGAGTCACCCTGCAACATTCCTTGCCGATTACCATTGATCAGGCAAAAATC
+GGCCTCTTCACATAATGGAGAACCCAGATTATGGGTAATGGCGACTGTGGTGGCGCCAGC
+CTGTCGGGCCAATCGGAGTGAATGCACTGTTTCTGGCGATGTGCCCGAGTGACTGACACC
+CATCGCGACATCGCCTGCTTTCAATAGCGTAGCCTGCATGTACATAAAATGGTTATTGCT
+TACCGCATCGCCCCGTAAACCCATACGCATTAGCTTGTGTTTCATATCCAGCGCCGTGAT
+CCCCGATGAGCCCACACCAAACATATAAACTGAGTGACAGTGACGTAGGGCGTCCACGAC
+ACCGAGAACCTGTTGCATATCCAGCAGATTTAGCGTTTCAGATAATACATTACTAATGGT
+GTTCTGCAATTTTAAACCAATCGCGTGGGCATCGTCGGATTCGCTAACTTCGGCATCCAG
+TAGAGGACTACTGTCATCAGACTCGGTAGTGGCAAGTTCAATGGCCAGGTCCATTTTAAA
+ATCCTGAAAACCTTTATAGCCCAGGGTGCGACAAAAGCGAATAACCGTGGCTTCTCCGGC
+CTGTGTGTCGCGCGACAAATCCGCAATAGATGACTGTGTCACCTGTCTGGGGAAGGCTAA
+AATATATTGCGCAATACGCTGCGAGGCACGTGTCAGGCTTTTTTGCATAGCACCCAGAGT
+GTCAAGGATTTTTCCGGGTTTCAGACGTGGGGGTTGAGGCTCCATATAGTTCCTGCTCTT
+GAGGGCTGATTGCGGCACAAACCCTGCAATATCGAACAGAGCAGAGCCTATCAAGTCTGC
+GGCATGAAAAAAAGCGCGATGCAAGTAAAGGAGGGGGAATATTTACATCGCGTTGCCAAC
+TTATGACAGAACATCACTGGACAGTAATCTGTTTGCTCTTACGGTGTTTATAGCGCGCCA
+TCTGTCGGATGGTTTATATCTCGCCCGGTATTGTCGCCCCGCGCGTTGGCGTCACCGTGT
+GCTGTGTCGGTATAGCGTGGTCTCATGAAATCATAATGTCACCTTTGGTTCAGGTTTTTC
+ATCCAGCGTCAATGGCGGTGCCGGGGCAAATAACGGGGCGAAAATAACGCCGCTGATCAC
+CACGCTTAGCGAGCCAATGACCCCATAAAAGAAGAAGTTAAGGTCAGTGGCATAGCGTGT
+GCCCAGCACGGTAATGACGCTCATAATAATTCCTAAAACCGCACTCCCGGCATTTGCTCG
+TTTAAAGAAAATGCCCAGCATCAACAGACCGGTCATTGGCCCTCCCATCAGACCTATCAG
+ACTATTAAATGCATCCCAGATCTCTGATTCATCGGCCATGACCAGCCATACCGAGGCCGC
+GCTACTTATCAGGCCCGCGACCAGAATAACTAACTTAGCTATTTTCATACGGTTTTCTGG
+CGTTCCTTTTTTATGACTCAACCGCTGATAGATATCGGAATTAAAACAACTGGAAATGCT
+GTTTAAGCTGCTGGAGATGCTCGACTGCGCGGCAGCGAAAATAGCGGAGATTATCAACCC
+TGCAATGCCGACTGGCATTTCCGTGACCACGAATAAGGGCAAAATGCCGCCAGTGTTGAA
+TCCCGCCGGTAATAATTGTGGCTGTTGCTGATAGTAGACAAATAATGCCGAGCCGATAGC
+AAAAAAGAAAACAGGGATCACCGCAACCAGTTTGGCATTTGTAAGTAATGTTTTCTTCGT
+TTCCTCTATGGAGTCAGTCACCATATAGCGTTGGACCACATCCTGACTGGCAGTAAATTG
+CTGAATATTGGCAAACAGAAACCCAATCATCAATACAGGTACTGTGCTTTCCGTCCAGCT
+CCAGTGGAACTGCGTAGCCGGCAAGAATTTATCCGCCTGCTGCGTCACCGTAAAAATTTC
+ACCAATGCCGCCCTGGACTTTCAGACATATCACTATAAAAATCAGTATCGCGCTGCCAGA
+AAGTAAGAGACCTTGAATAACCTCAGTCCATATTACTCCTTCAATTCCCCCCCATCCAGG
+TATAAATGATACACATCACACCAATCAACAGTACCAAAATCACCGGGTCTATAGCGATGA
+AGGGGCGCAAGGCCAGCACGGCGAGGAAAGTGATAATGGCGATACGTCCAATATGAAACA
+ACATAAATGACATGCTGGCGACCAGACGGCAGCGCACATCGAACCGTGCTTCGAGATATT
+CATAGGCTGATGTGACTTTCACTTTCCGAAAAAACGGAATATAGAAATAAAAAACCAGCG
+GTAAAATTGCGATAGCCAGATCCTGACCAATGATAAACGTCCAGTCGGAAGTAAACGCTT
+TGGCAGGAATTGACATAAATGCAATTGAGCTTAACGTAGTAGCAAATACACTGACCCCAG
+CCGCCCAACCAGGAACCCGGCCACCACCGCGAAAATAATCGTCTGCTGTTTTTTGCCGTC
+TGGAAAAATAGACACCGACTACCATCATGGCCAGGAGGTAGCCAAATAATACAAAATAAT
+TAACGATGCCGAAAGAATGTGCAATCATAGTTCTTTCCTGCTTATGGTTTTGAGTAGCGT
+TGCGGTCATCGTCTGTGTATTTTTATCTTCCCATCATTCAGACGATACGATATTCAGATC
+AAGCAATAGTTTTTGTTAATCTCAATGACGCTAATCATGAGATGTAAAAGGCAGAGCATT
+TATGCTCCACCAATGGTGATGCCCAGACGTATGTCCTGCCGGATACCTGAAATAGCAGCT
+ATCCGATATTGCGTTCAAACAGGCTTCAGGCCTGGGTATCGTATTTGCCGATAGCATTCT
+TAACGTCGGATTATTCTCTGAGACATAAATATCAATAGTAAGCGTGTTCATTTTCTTTTC
+CTGTTCAGCGTTTTACTGCGTGACTGAACCACTGACAGATATGCTCGATACGGGTGATAG
+CGGAACCAACGGTAACTGCCCAGGCACCATGCTCAATAGCATTGGCCGCCAGTGCAGGCG
+TGTTATAGCGCCCCTCGGCAATAACACGACAACCTGCATGACTCAGTTGTGTCACCATTG
+CCAAATCTGGCTCAACCGGCGTGATGGGACCGGTATAGCCAGACAGTGTTGTACCAATGA
+ATTCGATTCCTTTCTGATGGCAACTTATGCCTTCATTCACGGTTGAACAGTCTGCCATCG
+CCAGTAATCCATGCAGGCGAATACGTGTCAGTAAACTATCAATATCAACCGGGCGAGAGC
+GGAATGAGGCATCAAAAGCGATAATATCGGCACCTGCCTGCGCCAGGGCGTCAACATCCT
+GTAAATATGGAGTGATACGGACTGGCGACCCTGTAAGGTCACGTTTAATTATCCCAATAA
+TAGGAACAGAAAGATGGGGACGAACAGTCCGCAGATTCTCAATGCCTTCAATGCGCACAG
+CGACCGCACCCGCCGAAGCCGCTGCCTGTGCCATTGCAGCCACAATTTCAGGTTTATCCA
+TAGGGCTGCCTGGTACCGGTTGGCATGAGACAATCAGCCCACCGTTTTCGTGTACACTTT
+GTTCCAGCCTGGCTAATAGTGACATCCAGTTCCGCCTTATATATTTTGAAGTTTTACTCC
+GTTTTATATTATGTTGTGGAGTGTTGCTGCATCAAGCAAAAAGATAGAAAAATGTGAACA
+CAGTCATAAGGTATGTTTGTCATTCAATTACTGCGCCAGCGGATTTGAGGCTATAACCTG
+AAGCTACAAGACAAATACAATTCATTCTGACGGCAGCATGCTGTATTTTATTAACTGCTT
+CCGGCATAAAGATAAAATGCTCAGAAGGAGCTTATACGCCACGGGATAGCCAGTATGAAA
+TGCATCACATAATTCTTGTTGTATTTATTCAGAATGCAATCACTAATGAGTTTGCCCATA
+ACAGGTCACCCATAATCACTTCTAACATCACCATAGTGTGCTTTTACCTCGCACATGGTA
+ACCCCATGTAATTATTTTTAGCGCTTGGTCACATTTTGTCATTTTCATTGTTGAAATTAT
+GCACCATAAGATCACTAATGATGAAGCTTTACTCCAGTTGTATTTCTTCGCATGGGGATG
+CAGATGAAAAATTTTAAGAAAATGATGACGCTAATGGCGCTATGTTTATCAGTTGCTATC
+ACCACATCAGGATATGCAACCACGCTTCCTGATATACCAGAACCACTGAAAAATGGTACT
+GGCGCTATTGATAATAATGGCGTGATTTATGTCGGCTTAGGTACCGCAGGGACATCCTGG
+TATAAAATTGATCTTAAAAAGCAACATAAAGACTGGGAGCGTATAAAGTCGTTTCCTGGT
+GGAGCTCGTGAGCAATCCGTGTCGGTATTTTTAAATGATAAGCTGTATGTTTTTGGTGGC
+GTAGGGAAAAAAAACAGTGAATCACCGTTGCAGGTTTATAGCGATGTGTACAAATACTCA
+CCGGTGAAAAATACATGGCAAAAAGTTGATACTATATCTCCAGTTGGATTAACAGGGCAT
+ACGGGAGTAAAATTAAACGAAACGATGGTACTTATTACCGGAGGGGTTAATGAGCATATC
+TTTGATAAGTATTTTATTGATATAGCGGCTGCGGATGAAAGTGAAAAAAATAAAGTCATC
+TATAATTATTTTAATAAACCTGCCAAAGATTATTTTTTTAATAAAATCGTATTTATCTAC
+AATGCTAAAGAGAACACATGGAAGAATGCCGGTGAGCTGCCAGGCGCGGGGACGGCAGGA
+TCGTCATCGGTAATGGAAAATAATTTCTTGATGCTGATTAATGGTGAGCTCAAACCGGGT
+TTACGTACCGATGTGATTTACCGCGCCATGTGGGATAACGATAAGCTAACATGGTTGAAG
+AACAGCCAGTTACCGCCATCGCCTGGAGAACAACAGCAGGAAGGGTTGGCCGGAGCATTT
+TCGGGCTATAGCCACGGTGTCCTGCTTGTCGGTGGTGGCGCGAATTTTCCGGGAGCAAAA
+CAAAATTATACTAATGGAAAGTTTTATTCCCACGAAGGGATAAATAAAAAATGGCGAGAT
+GAAGTCTATGGTTTGATTAATGGCCATTGGCAATATATGGGTAAAATGAAACAACCTCTC
+GGCTATGGTGTATCAGTAAGTTATGGTGATGAAGTTTTCCTTATTGGTGGTGAAAATGCT
+AAAGGGAAACCTGTTTCGTCTGTAACCTCCTTTACCATGCGTGATGGTAATTTATTAATA
+AAATAATTTTTTAAATACAAAAATAAAGTTAATTGATAAGCGGAGTATTTTATGAAAATC
+AACAGATATCTTCTGGGTATGGTTTCGTTTATAGCATTTTCATCATATCTACAAGCGGCA
+ACCCTTGATTATCGGCATGAATATGCTGATAGAACCAGAATTAATAAAGACCGTATTGCT
+ATAATTGAAAAGCTTCCTAACGGCATTGGTTTTTATGTCGATGCCAGCGTTAAATCGGGA
+GGAGTAGATGGTGAGCAGGATAAGCATTTAAGCGATCTCGTCGCAAACGCTATAGAACTG
+GGCGTAAGTTATAATTATAAAGTTACGGACCATTTTGTTTTGCAGCCTGGATTTATATTT
+GAAAGCGGTCCAGACACTTCAATTTATAAGCCTTATTTAAGGGCGCAATATAATTTTGAT
+TCTGGTGTTTATATGGCTGGTCGTTACCGTTATGACTATGCAAGGAAGACAGCTAACTAT
+AATGATGATGAGAAAACGAATAGATTTGATACTTATATAGGTTATGTTTTTGATGAGTTG
+AAATTGGAATATAAATTTACCTGGATGGATAGCGATCAAATTAAATTTGATAACAAAAAA
+ACAAACTATGAACATAATGTGGCTTTAGCCTGGAAACTGAATAAGTCATTTACACCATAC
+GTTGAGGTCGGAAATGTAGCGGTGAGAAATAATACCGATGAGAGACAGACCCGTTATCGC
+GTTGGATTACAATACCACTTTTGAGAAAGGTTAAAGAGTTACTGTACTAATCTTTGCTTA
+GTATTCATATCGATAATCTTATAGCCCGTAGCTACGTTACAGAACGTTAGACTTTGTCTT
+GAGTATTCACGTCCTTAACGTAGCTACGAACAAGGATGATAAACAATGAATATACGCCTT
+ACATAGCTACGATCTCAGGCGGTTGTCGGAAAGTGCCGGTTGATAGTGTCTTATTCGGCA
+ATTGATATGACTTAAAAATTAATTCCGTAAGCATTTCAGACGGTAACAGCAAATAAGGGT
+TTTATTGTGATAGCAAAATTCTTCCCGTGGTATAGCGAGATAACACGTCCACAAAAAAAT
+GCTTTATTTTCAGCATGGCTGGGTTACGTTTTTGATGGCTTCGACTTTATGCTGATTTTC
+TACATTATGTATCTGATCAAGGCTGACTTAGGATTGACAGATATGGAGGGCGCATTCCTT
+GCCACAGCGGCCTTTATTGGGCGACCATTTGGCGGGGCGCTATTTGGTCTGCTGGCAGAC
+AAATTTGGCCGTAAGCCGTTAATGATGTGGTCGATAGTTGCCTATTCTGTAGGTACAGGG
+TTAAGTGGCCTGGCTTCCGGTGTAATTATGCTGACGCTTAGTCGTTTTATTGTCGGTATG
+GGGATGGCGGGGAAGTATGCTTGCGCTTCTACTTATGCCGTGGAAAGTTGGCCAAAGCAT
+TTAAAATCTAAAGCGAGCGCATTTCTGGTTTCAGGTTTCGGTATTGGTAACATCATAGCA
+GCCTATTTTATGCCGTCATTTGCCGAAGCGTATGGTTGGCGTGCTGCTTTTTTTGTCGGT
+TTGCTACCCGTTCTTTTAGTAATCTACATCCGGGCCAGGGCTCCTGAATCTAAAGAGTGG
+GAAGAAGCCAAACTCAGTGGTCTCGGAAAGCATTCACAAAGTGCCTGGTCAGTTTTCTCT
+TTGTCAATGAAAGGGCTATTTAATCGAGCTCAATTTCCACTGACATTATGTGTATTTATT
+GTTCTGTTCTCTATTTTCGGCGCAAACTGGCCGATCTTTGGTCTACTGCCTACATATTTG
+GCGGGAGAGGGCTTTGATACGGGCGTGGTCTCTAATTTAATGACGGCGGCGGCATTCGGC
+ACTGTATTGGGAAATATCGTTTGGGGTCTGTGCGCAGATAGAATTGGTTTGAAGAAAACG
+TTCAGCATTGGTCTTCTCATGTCCTTTTTATTCATTTTCCCGTTATTCAGAATTCCGCAA
+GATAATTATTTACTGCTGGGCGCATGTTTATTCGGTTTAATGGCGACTAACGTAGGTGTT
+GGCGGGCTGGTTCCCAAATTTCTCTACGACTACTTTCCTCTTGAGGTTCGTGGTTTGGGT
+ACCGGGCTTATTTATAATCTTGCTGCGACATCAGGCACATTCAATTCAATGGCGGCGACC
+TGGCTTGGAATAACAATGGGGCTAGGCGCTGCGCTAACGTTCATTGTTGCTTTCTGGACC
+GCAACAATTCTACTCATTATTGGCCTATCCATTCCGGATAGACTAAAAGCACGTCGTGAA
+AGGTTTCAGTCAACAAAAGAATTTTAATAGAGGATAAATGATGACGAAATACGGTGTTAT
+AGGTACAGGTTATTTTGGCGCTGAACTGGCGCGATTTATGTCTAAGGTTGAAGGGGCGAA
+AATCACTGCGATTTACGATCCGGTAAATGCGGCTCCGATAGCGAAAGAGCTGAACTGTGT
+CGCCACTTCAACGATGGAGGCGCTTTGTACCCATCCTGATGTGGATTGCGTAATTATTGC
+TTCACCAAATTACTTACATAAAGCGCCGGTCATTGCGGCGGCTAAAGCGGGTAAACACGT
+GTTTTGTGAAAAACCTATCGCCTTAAATTACCAGGATTGTAAGGATATGGTTGATGCCTG
+CAAAGAAGCTGGTGTTACCTTTATGGCGGGTCACGTTATGAACTTTTTTCACGGGGTTCG
+CCACGCTAAAGCGCTCATCAAAGCCGGTGAAATCGGTGAAGTTACACAAGTTCACACTAA
+ACGTAATGGTTTTGAAGACGTGCAGGATGAGATCTCATGGAAGAAGATTCGCGCAAAGTC
+AGGTGGGCATCTGTACCATCACATTCACGAGCTAGATTGTACACTGTTCATCATGGATGA
+AACCCCATCCCTGGTTTCAATGGCGGCGGGGAATGTTGCGCACAAAGGTGAAAAATTTGG
+TGATGAAGATGATGTTGTCCTAATCACCCTTGAGTTTGAAAGCGGTCGTTTCGCGACACT
+TCAGTGGGGATCATCGTTCCACTACCCTGAGCACTATGTATTAATTGAGGGCACGACAGG
+TGCAATTCTCATTGATATGCAAAACACGGCTGGTTATCTAATAAAAGCGGGCAAAAAAAC
+ACACTTTCTTGTGCATGAAAGCCAGGCGGAGGATGATGATCGTCGCAACGGTAACATATC
+CAGCGAGATGGATGGCGCAATCGCTTATGGTAAACCCGGTAAACGTACGCCGATGTGGCT
+CTCATCAATTATGAAACTGGAGATGCAGTACTTGCATGATGTGATAAACGGTCTGGAGCC
+AGGCGAGGAGTTTGCTAAATTGCTAACGGGAGAAGCGGCGACAAATGCCATTGCTACCGC
+TGATGCTGCGACGCTTTCTTCAAACGAGGGGCGCAAAGTTAAACTCACTGAAATTCTTGG
+CTAAAATTTAAAGCCGGATGGTGGTGTTATTGGCCGGTGGCGCTGCGCTTATGTAGACTG
+GATAAGGCGCCCGTGAACTGTGCCGCCATCCGGCAATGGACGGGGGCTTAATGCGAAAAA
+AAGCCCGTACATTCGTACGAGCTCTTTCTTAAATATGGCGGTGAGGGGGGGATTGACTCG
+CTGCGCTCGCCCTTCGGGCAGCCCGTTCGCTGCGCGCCCGGTCTGTCCAACTGGCTGCGC
+CAGTTGTCGAACCCCGGTCGGTGGTTCTCATCCCCCCTTGGTTTGGGGGATACATATAAG
+CAAAAAGCCTGTACTTCTGTACAGGCTCTCAACTTGAAGATGGCGGTGAGGGGGGGATTC
+GAACCCCCGATACGTTGCCGTATACACACTTTCCAGGCGTGCTCCTTCAGCCACTCGGAC
+ACCTCACCAAATTGTCGCTCCAGCATTACTGGAACGGGCGCTAATGTAGGGAAATATCCT
+TTCTACGTCAATCAACTTTTTTAAAAAAAAGCGCTTTTATACAAACTTCCATCAATCTGT
+GGCTTTAATAAGCGAAAACTGCTTTTTTTGCCCGCGCCGGGAAATTTGCTATGCTGCACA
+TCCCGTTGAAAACGCTGATAACAGGCGCAATCACATTCCGCACAATACTGCTCAGGAGAT
+AACATGGAGATAATTTTTTATCACCCGACATTTAACGCCGCCTGGTGGGTAAATGCGCTG
+GAGAAGGCTCTCCCACATGCGCGCGTTCGTGAATGGAAGGTCGGTGATAACAACCCCGCA
+GACTATGCGCTTGTATGGCAGCCCCCGGTTGAAATGCTGGCCGGAAGACGCTTAAAAGCC
+GTCTTTGTGCTGGGCGCGGGGGTGGATGCAATTCTGAGTAAATTAAATGCGCATCCGGAA
+ATGCTGGACGCCTCCATTCCTCTATTCCGTCTGGAAGATACCGGAATGGGCCTGCAAATG
+CAGGAGTATGCCGCCAGCCAGGTATTACACTGGTTCCGTCGTTTCGATGATTATCAGGCG
+CTGAAAAATCAGGCGCTATGGAAACCGTTGCCGGAATATACCCGCGAAGAGTTTAGCGTC
+GGTATCATAGGCGCAGGGGTACTGGGCGCAAAAGTGGCAGAAAGTCTACAGGCGTGGGGG
+TTCCCGTTACGTTGCTGGAGTCGTAGCCGCAAATCCTGGCCTGGCGTGGAAAGTTATGTA
+GGGCGTGAAGAACTGCGCGCTTTCCTGAACCAGACGCGGGTGCTGATTAATCTGCTGCCG
+AATACGGCCCAAACGGTAGGAATTATTAATAGCGAATTGTTGGATCAATTGCCGGATGGC
+GCTTACGTGCTGAATCTCGCGCGCGGCGTTCATGTTCAGGAGGCGGATCTGCTGGCTGCG
+CTTGATAGCGGTAAGCTAAAAGGCGCGATGTTGGATGTCTTTAGCCAGGAACCGTTACCG
+CAGGAAAGTCCATTATGGCGCCATCCGCGAGTCGCCATGACGCCGCACATTGCGGCAGTC
+ACCCGTCCGGCGGAAGCCATCGATTATATTAGCCGCACCATTACCCAGCTGGAGAAGGGA
+GAGCCGGTGACGGGGCAGGTGGATCGGGCGAGAGGATATTGGTATCAACCCGGCGCGGGC
+CGGGTTTCGCTAAAAAACGCTGGCGATACCTGCTATCCTTGGCGGAAATGACTACAGGAG
+AGAGCAATGTATCCCGTTGACCTGCATATGCATACCGTCGCGAGCACTCATGCCTACAGT
+ACTCTGAGCGATTATATCGCGGAAGCCAAACGCAAAGGCATGAAACTTTTTGCGATTACC
+GATCATGGTCCGGACATGGAAGATGCGCCGCATCACTGGCAGTTTATTAACATGCGCATC
+TGGCCGCGTCTGGTTGACGGCGTGGGGATACTGCGTGGCATGGAGGCGAATATCAAGAAT
+ATTAACGGTGAAATTGATTGTTCCGGAAAGATGTTCGACTCGCTGGATCTGATTATCGCA
+GGCTTTCATGAGCCCGTTTTTGCGCCGCATGATAAAGAAACGAATACTCAGGCGATGATC
+GCGACCATCGCCAGCGGCAAGGTGCATATAATTAGTCACCCGGGAAATCCAAAGTATCCA
+GTGGAGGTTAAAGCCATCGCGCAGGCGGCGGCGAAACACCAGGTAGCGCTGGAAATCAAC
+AACTCTTCTTTTCTGCATTCGCGTAAAGGAAGCGAAGATAAGTGCCGCGCGGTCGCTGCC
+GCCGTACGCGATGCGGGAGGCTGGGTAGCGTTAGGCTCTGAGTCCCATACGGCCTTTACG
+CTTGGCGATTTCACCGAATGCCGGAAAATTCTGGATGCGGTGAATTTTCCGGAAGATCGA
+ATCCTGAACGTCTCTCCGCAGCGCTTACTGGCCTTTCTCGAGTCACGCGGTATGGCGCCT
+GTACCGGAATTTGCCGAACTTTAATCGTTATTTACGGGAAGGTATCAATGAATGAGTTTT
+CAATCCTGTGCCGTGTGCTGGGATCGTTGTTTTACCGCCAAGCGCAAGATCCTTTACTGG
+TTCCGCTGTTTACGTTAATCCGTGAAGGTAAACTGGCGGCAGACTGGCCGCTGGAGCAGG
+ATGACATGCTGGCGCGTTTACAGAAAAGCTGCGATATCACGGAGATTTCCACTGATTACA
+ATGCGTTATTTGTTGGGGAAGAGTGCGCGGTAGCGCCATACGGCAGTGCGTGGGTCGAAG
+GCGCGGAAGAGTCTGAGGTGCGCGCTTTTTTAACGTCGCGAGGGATGCCGCTGGCCGATA
+CGCCTGCCGATCACATTGGCACTTTATTGCTCGCGGCCTCCGGGCTGGAAGATCAGTCTG
+CCGAAGATGAAAGTGAAGCGCTGGAAACCTTATTTGCCGATGATCTGCTTCCCTGGTGCA
+ATACCTTCCTCGGTAAAGTTGAAGCCCATGCCGTTACGCCAGTCTGGCGCACTCTGGCGC
+CGCTAACGCGTGATGCGATAGGGGCCATGTGGGATGAACTTGAGGAAGAAGATGAAGAAT
+AATGTGATGTAAATCACCATTAACTGCAACGGGTTTTGCATGATTGCATAAAATGTGTGC
+GTGATCTCATTAATGTGCCGCTTTTCTGTTATGATGCGCGCGATGAACATACTTCTTTCT
+ATTGCTATCACTACGGGCATCCTTTCTGGAATATGGGGATGGGTGGCCGTCTCCCTGGGG
+TTACTAAGCTGGGCCGGTTTTTTAGGCTGTACGGCTTATTTGGCCTGTCCGCAGGGCGGC
+TTTAAGGGATTGTTGATTTCCGCCTGTACGCTGTTAAGCGGGATGGTGTGGGCGCTGGTC
+ATTATTCACGGTAGCGCGTTGGCGCCGCATCTGGAAATTGTGAGTTACGTGTTGACGGGG
+ATCGTGGCATTCCTGATGTGTATCCAGGCAAAGCAGCTATTGCTTTCTTTTGTTCCGGGA
+ACATTTATCGGCGCCTGCGCGACATTTGCAGGGCAGGGTGAGTGGCGGTTGGTATTACCG
+TCGCTGGCGCTGGGGCTAATCTTTGGCTATGCCATGAAAAAGAGTGGGCTATGGCTGGCA
+TCACGCCGCGAGCAACATTCAGCGAATACGGCGGTCACAAAGTAAAAAAGCGTGGGGTTT
+TCCCCACGCTTTGTCGTATTCATCAGGATTCTGGCGGTACTGACAGCTCACGGTATTTCA
+CCAGAATATCATTTTGCCTGTCCGCTTTATTCTGCAAATCCGACAGTCCGCGATCGATAC
+CATCATTAATGAGGAAGATAACGCCGGTTTCAATGGCTGACGTCAGACACAGCATCACCG
+GTTCGTTCGAGGTATAGCCGATTTCGCCTTCCAGTAAGCGCGGGTAATCAATAAAACGGA
+ACACGCCTGCCTGTACTTCATAGGAAAGGATCGTTTTACTGGTGTTCACCGAAGAAAGGA
+TCTCGCCCGTACTGACGTTAACCACGCGCAGGTTGACAGCAGTCTGATCCAGCTGATACT
+GCGTATCGGCGCCAATACCGAAATATCTTGCGCCGACCCCGGCGGATTTGACGTTACTTT
+CATAACCAATAATAGAACCTTCCACCATAATATTTGCCGCCGTCAACGACTGAAGCGGGA
+TACGGTTATTCATCGCCACGGTGCCGTTTTCCTGGGCTGCGGGAATAATTTTCCGTTCAT
+TCAAAAGATTCTGTAAGCCTTGTCGTTCTAGTGGGATAAACGAGCGCGAATCTTTCAGCG
+CGGTGACCAACATAGCGGTGGCGCTCTGCGGCACAGCCGTGGAAAAGTTACTTGCCGGGT
+AAGGTTTAAATTGGCCCGTTTCATCCTGAATGTTATATACCGAAACAAAGATCTTACCGG
+TGGGAGCAGGTAAGTGCGTCAAATCTTTGTAACTTTGTGCGGGGGGCATTAATGTCGGTT
+TCGCAGCTTGTTTCGGCGGGGCAGTTAAGCATCCGCTCAACGATAAAACGGCAACCAAAA
+TAAGTAAGCGCGGCATGATTTATATCCTTTAGTGACTGTAGGTTAAAAATCGGTTGACTG
+AGTTTGTAAACCTGACACTTCGATGGTCGAGGTTCTTCCCGGTTTTCTGTCCGTGACGTT
+GAGCTGGAGCTGTCCGTCGCGATTAGCGATATCGATAATAAGATCATTGGTCACCATACG
+TCCTGGTTTTCCGGTATTAATATTGGTCAACAAGCCGCCCAGAATTTGCGATTGAATAGC
+CTGCGTAAAGTTATCCAACGCTGAGGGGGTCTCGATACCAAGATCGTTATCATAAGCGGG
+GTCTTTATATGAATTTTGCGCCTGGGCGCTATTCAATAAAAGGGAACCGTTATTGGGGTT
+TCCACCAAAGTTAGGATTACGGAACTGGAACGTCATATTTCGAGCCCAGGTTAATGGCGA
+AAAAAGCATGAGCAGCACTACTGCATGTTTAACACGCATGAGAGCCTCCGGATAAAAATC
+ATGTTTTAGAATTCATCACGCGCTAAATCACTCGTACTTAAGAGCGTTTGATCTATTTGT
+CGGCGATTTAATGCTTCCTCTGTTTGCGCTAATGCGAAGACGACGGTTTTCTCGAAGTCT
+CTTTTCATTGGAAATAAAAAGGTCTGGAAAATAACGTCCTGGTTTACCGTTATGGTGATC
+CAGCTTCCCCAACGCGCACTGGGTCTTTCATTAATGGTCAGGTTGCCGGTGTATTCGCTT
+TCCCATTTGTCGCTGAATGCACGATAGAATTCATGTCCTATGGAAGAGACGGTATGGTCG
+GTTAACAATCCGGGAACCTCGACTTCAACTTCATTGGCATGGAGGTTTCCGGTAGCGAAC
+AGTAACTCTGCTGCTACAATCCAGGTCAGATAGCGTTTCATGGCCTTACCGCCTGAGATT
+ATCGTTTGCTCATGAAACTGCCTGGGTGCGATTTTTGACAGGTATCTTTTTGAAAAGATT
+ATAAAGATGTGTCTTAACCGTATTCTCGCTGATAAATAGCGGCCTGGCGATTTCATTATT
+AGAGGCACCAATACGTAACTTATTGAGGATCTCTTTTTCGCGATGAGTGAGTAATGCGGA
+CTCGGTGCTGTTGTAGCGGTAATTTCCTGAGTGTGTAATCAGGTAACTGGCTAATTTTTG
+TGAAAAATAGCATTCGCCCCGCAGAATACCCTGTAATCCGCGGACCACGTGTTCCTGGTC
+TTCAGTGGCGTAAAACACGCCGTTAATATGAGGCCAGTTTTGAATTTCACGGTAGGGATA
+ATCGTCAGGGGTATTTAACAATAATGTTTTTATATTATTGTGTTTGCGGCTTAAATTATC
+CTGCCAATAGTGGATAAGCTTCTTATCCGCTTCCATCATATGCATTAAAACAATGCAACC
+GGCTGAGATATCTTCCAGAGAACGTTGAATATTATGCAGTTGTCCGGTTATGGCCAGCGA
+TTGCTTTAAATGTTGCAATAATGCCGTAGCTTGCAGAGATGGCTTTGTGATCAACAATAG
+TGTGTGACCATGACTACTATGGACTTCATTAAACATGATGAGACTCCACTTTTTTTAATC
+GCACATCTGACAGCTGCCCCCATAAAATAAAGGCACCAGAAGTACTGACAGATGTTGCAC
+TGCTGTGGGTTGAAATAGCCCATTATCCAGAAAGAGAAAAAGATTTACGAAAATACTTTT
+AACTGTTTTCAATCTAGCCATTACAAATCTTAAAGCAAGTGGTAAACTTGTAACAAGATG
+TAAAAATATATATTAAAATGTTGTTTTTGGGTTTTTTTGAAGTTTAGATTTGATAGTAAA
+GTTGTACATTTCGCTGTTATTGCATAGATTTAAAAAATCATGCAAATTATAATAATTCAT
+TGATTTTTAATCATTTTAATTATTGTATGTCATGTTTTGATGTTATTTTTTCTTAAAATT
+TGAGACGTGGCATTAACCTGGACAGCACAAAGACAAAAAAAGACGAAGTGTGTCACGTCT
+TGTGCGTATTGCCCCCCATGGGAAGCATAAGAACATCCCCAGGGCGGCATAACACACACC
+AACACTTCATTTTTTAGGTGCGCGATACACTATCTTCTGTGGCCAAAAATCAATTATAAA
+AAATCACATGGCTATCGTTTTATTAGCACTTTGGTATGAGCGTAAATAACAAAATACCAC
+GCGTGGGTGAGTTATTAAAAATGTTTCCACGGACATACTCTGCATCGTAACGACGCGTTA
+ACAAAAAACGCATGTCGCTAACAAGGTAATAGATAATTTTCGCTATGTACGACCAGGTCC
+AGGGTGACAGCATGAAAAACAAATTGTTATTTATGATGTTGGCAATACTGGGTGCGCCTG
+GGATTGCAACCGCGACAAATTATGATCTGGCTCGTTCAGAGGATAATTTTGCGGTAAATG
+AATTAAGCAAGTCTTCATTTAATCAGGCGGCCATTATTGGTGAAGTCGGCACGGATAATA
+GTGCCAGAGTACGCCAGGAAGGATCAAAACTATTGTCCGTTGTTTCACAAGAAGGAGAAA
+ATAATCGGGCGAAAGTCGACCAGGCAGGGAATTATAACTTTGCGTATATTGAGCAAACGG
+GCAATGCCAACGATGCCAGTATATCGCAAAGCGCTTACGGTGATAGTGCGGCTATTATCC
+AGAAAGGTTCTGGAAATAAGGCCAATATTACCCAGTACGGTGCGCAGAAAACAGCAGTTG
+TAGTGCAGAAACAGTCGCATATGGCTATTCGCGTCACCCAAGGCTAATACCGTTACGACT
+TTTAAATCAATCCGATGGGGGTTTTACCATGAAACTTTTAAGAGTGGCAGCATTCGCAGC
+AATCGTAGTTTCTGGCAGTGCTCTGGCTGGCGTCGTTCCACGATGGGGCGGCGGCGGTAA
+TCATAACGGCGGCGGCAATAGTTCCGGGCCGGATTCCACGTGGAGCATTTATCAGTACGG
+TTCCGCTAACGCTGCGCTTGCTCTGCAAAGCGATGCCCGTAGATCTGAAACGACCATTAC
+CCAGAGCGGTTATGGTAACGGCGCCGATGTAGGCCAGGGTGGGGATAACAGTACTATTGA
+ACTGACTCAGAATGGTTTCAGAAACAATGCCACCATCGACCGGTGGAACGCTAAAAACTC
+CGATATTACTGTCGGTCAATACGGCGGTAATAACGCCGCGCGGGTTAATCAGACCGCATC
+TGATTCCAGCGTAATGGTGCGTCAGGTTGGTTTTGGCAACAGCGCCACGGCTAACCAGTA
+TTAATTTAGCGTCTGCGCTAATAAAAAAACAGGGCGTAAGCGCTGTTTTTTTTCGGGAGG
+AAATTATGCATACTTTATTGCTCCTTGCCGCACTTTCAAATGAGATTACGTTTACCACGA
+CTCAGCAAGGCGATATTTACACGGTGATCCCTCAGGTCACAGTAAACGAACCCTGCGTCT
+GTCTGGTGCAAATTCTCTCTGTGCGCGACGGCGTCGGGGGAGAAAGCCATACACAGCAAA
+AACAAACGCTATCTTTACCTGCTAATCAACCGATTGAGTTGGCTCGTCTTAGTGTAAATA
+TATCTTCAGAGGACTCGGTTAAAATTATTGTTACTGTTTCGGACGGACAATCACTGCATT
+TATCACAACAATGGCCGCCTTCTGCACAGTAGTTTTTGATGGTGGCGGAAATGGATTGGC
+TGACCTGGGTATTAAAGAGGCGATAAAAGCGTCTCATCGTCTCGGCATGTCGCTAAAAGG
+TAACGCCGAACCCTCGAGGATGACTAATCATTGAGGAGTTAACATGTCCGTAATCAAGAA
+AAATATCCCTGCCATAGGCCTGTGTATCTGCGCTTTTTTTATCCATTCTGCGGTAGGGCA
+ACAAACGGTACAGGGCGGCGTTATCCATTTTCGCGGCGCGATTGTTGAGCCACTGTGCGA
+TATTTCTACTCACGCCGAAAATATTGATTTAACCTGCCTACGCGAAGGTAAAAAGCAAAT
+GCACCGGATAGACCTTCGGCAGGCATCTGGATTACCGCAGGATATTCAGTCCATTGCGAC
+GGTACGGCTGCATTATCTCGATGCGCAAAAAAGCCTGGCGGTGATGAATATTGAGTACCG
+TTAACGGCGGTGTCATAGATATAAAAAGAGCGACTCTGTTGAAAGCCCTGCTGTACACTT
+TGCAGATAAGGTGAGACAAAAGGGGGGGGTTATGACATCACGTCTTCAGGTCATACAGGG
+TGATATCACTCAACTTAGCGTCGATGCGATTGTGAATGCCGCTAACGCATCATTAATGGG
+CGGCGGTGGCGTAGACGGCGCAATTCATCGCGCGGCGGGGCCGGCATTGCTGGACGCCTG
+TAAACTCATCCGTCAGCAACAGGGCGAATGTCAGACGGGACATGCGGTTATCACGCCTGC
+TGGCAAGCTTTCGGCAAAGGCGGTTATTCACACAGTGGGGCCCGTCTGGCGAGGCGGCGA
+ACACCAGGAAGCTGAGCTACTCGAAGAGGCATACCGGAATTGTTTGCTGCTTGCCGAGGC
+GAATCACTTTCGTTCCATCGCTTTTCCGGCAATCAGTACCGGCGTTTATGGCTATCCACG
+CGCCCAGGCCGCTGAAGTCGCCGTCAGGACGGTTTCAGATTTTATTACCCGTTACGCTCT
+GCCTGAACAGGTATACTTTGTCTGTTATGATGAAGAAACTGCCCGGCTTTACGCAAGATT
+ACTTACTCAGCAAGGCGACGACCCTGCCTGATAAAACACGCCTGGAGCGTGCCGTTGAAC
+CGCTATGCGCGCGCCATCCCGGAGAGTGCGGCATTCTTGCGCTGGATAACAGTCTGGACG
+CTTTTGCCGCCCGCTACCGCCTGACCGAAATGGCGGCGCGGACGCTGGATGTGCAGTATT
+ATATTTGGGAAGACGATATGTCCGGGCGGCTGCTCTTTTCGGTTCTGCTGTCGGCGGCGA
+AGCGCGGCGTTCATGTTCGTCTGCTGCTGGATGATAACAATACGCCTGGTCTGGATGATA
+CGTTGCGCTTGCTGGATAGCCATCCTAATATCGAAGTTCGTCTGTTTAATCCTTTCTCTT
+TTCGTACGCTACGCGCGCTGGGATATTTGACGGATTTTGCGCGGCTGAATCGGCGGATGC
+ACAATAAAAGTTACACTGCCGACGGCGTAGTGACGCTGGTCGGTGGGCGCAACATCGGCG
+ATGCCTATTTCGGCGCTGGCGAGGAGCCGCTATTTTCCGATCTGGACGTGATGGCCATTG
+GCCCGGTGGTCAATGATGTCGCCAATGATTTTGAACGTTACTGGCGCTGTAGTTCAGTGT
+CGACATTGCAGCAAGTATTATCCCTTTCTGAGCAGGAACTGACGCAGCGTATCGAACTTC
+CCGAATCCTGGTATAACGATGAGATCACCCGCCGTTATCTGCATAAGCTGGAAACCAGCC
+AGTTTATGGCGGATCTCGATCGCGGAACGTTGCCGCTGATTTGGGCAAAAACACGCTTGC
+TTAGCGATGACCCTTCTAAAGGCGAGGGGAAGGCGCAGCGCCATTCGCTTCTTCCGCAGC
+GATTATTTGACGTGATGGGGTCGCCGACGGAGCGTATCGACATTATTTCCGCTTACTTTG
+TCCCTACGCGCGCAGGCGTGGCGCAGTTGCTTAATCTGGTCAGGAAAGGTGTGAAGATCG
+CCATCTTAACTAACTCTCTGGCGGCCAACGATGTGGCGGTCGTTCACGCAGGGTACGCGC
+GCTGGCGCAAGAAATTACTGCGCTATGGCGTGGAGCTCTACGAACTGAAACCGACCCGCG
+AACATGAAACCGCCGTACATGATCGCGGACTCACCGGGAACTCAGGTTCCAGCTTACATG
+CTAAAACGTTCAGTATTGATGGTAGTAAGGTGTTTATCGGGTCGCTTAATTTTGATCCCC
+GTTCAACGCTTTTAAATACCGAAATGGGCTTTGTCATTGAAAGTGAAACGCTGGCGACGC
+TTATTCATAAGCGTTTTACGCAGAGCCAACGCGATGCGGCCTGGCAACTGCGGCTGGATC
+GCTGGGGACGAATTAACTGGATCGATCGTCAGCAAGAAGAGGAAAAGGTGTTAAAGAAAG
+AACCCGCTACGCGTTTCTGGCAGCGAGTTCTGGTACGGTTGGCGGCAATTTTACCTGTGG
+AATGGTTGCTGTGAACCCGCGCGCGGAAAACTACCCGATCGCGGCGCGGCTTTCTTGTTT
+TACCGGCGGTTTACCTGAAAAGAGAAATTTCAGGAGCGGGATGCGTAAATGAATTTCATA
+CAGAATTAACGCAATACCCATAACAAATATCAGCCCGCACAAGAACCCGATCAGGTTGGA
+GGAGATATGCGGTGTAATATACGCGCCAAAGAAAAGCGTTAAGGGATGATGCACCAGATA
+AATAAACAGCGAAGCATTCACGAAATAGGTGACACGCGCGGACTGAAAGTTTAACAAGCG
+ATGCCCCAGTGAAAATACCACGTTCACCATCCATAGCCCCATTACCATCGTAATCACGGA
+TTCGGTTTCGTACATCCAGGCGTCGCCGCTCCCATAACGTTGATTCAGCAGATACGCGAT
+AAAAGCAACGGCAGCGCCTAAAGTGCATCCGCGTGAGGGCGTGGTGAAGCGCGCTTTCAG
+ATCGGGGTGAATGAAGGCCAACGCGCCGAGAATAAAAAACGGCACATAAAATAGCGTTTG
+CATCACAATAAAATTGAACATGCCGTCACTGAGGATTGCCGGATATACGATGAATATAAT
+GCGCCTGATAGCAGCGTACGCCACCCCCAGCAGGAAAAAAATAAGCGAAAGTTTGGCCAG
+CGAAATAGCGGCGGGACGAGGCTTGCTTGTTTCCTGCCTTTTTTGGAACCAGGTAAAAAT
+CCCGATGCTGACGGTGGTTAATATCACCAGCACCAGTAAAAACCACAGATGTGAAATGAG
+TTCCCACGCTAACGTATTATATTTTTCATAGGCAGAGAGTGTAGGCCAGTTCTCTGTTTT
+CTCTTTGACATATTGCAACAGGATAAATTGCGGCAAGGTAAGCAAAGGGATTGCGGTAAG
+CATGGGAATACCCACACGTTCTACCCGTACTTTCCACCAGTGTTTTAATGGATAACGTAA
+AAATAACATGTACGAAAAATAACCAGAAATAACAAAAAACACCTGCATACGAAAAGCGTG
+GATAAAATCGTTAAACAGGGTTAGCCACCACGATGGCGCGGCGCTATTGACATGCCAACT
+GTGAGTGGAATAGATCAACGAGATATGAAAGGGAATCCCTAACAACATCAGCCATGCGCG
+GATAGAGTCAAGAAAATATTCACGCGGCGCGGGTACAGAGCTCATATAAGGTCACGTATT
+CTCAGATTTTTCACCTTATCCATAAGGCGAATTATAGTTACATTCGGTAGCAACCCTACA
+CCAACTCCGACAACCTGTCTCCAGGATAAGCACGCAAAGTGAAAACAGGCGCGGGAGGTG
+CTTAATCCATGAGCCAGCGCGCTGAACAAAGCCTGGATTCAGTTGTCGTAATGCCTGATT
+ATCCATTAAAATGGATCGGATCGATATAAGCACACAAAGGGGGAAGTGCTTACTTATTAT
+GAAACATAAACGACAAATGATGAAAATGCGTTGGTTGGGCGCAGCTATTATGTTAACGCT
+CTACGCATCATCGAGCTGGGCGTTCAGTATTGATGACGTGGCAAAACAAGCTCAATCTTT
+AGCCGGGAAAGGCTATGAGGCGCCTAAAAGCAACTTGCCCTCCGTTTTCCGCGACATGAA
+ATATGCGGATTATCAGCAGATCCAGTTTAACAGCGATAAAGCCTACTGGAACAACTTAAA
+GACCCCTTTTAAGCTCGAATTTTACCATCAGGGGATGTACTTCGATACGCCGGTCAAGAT
+TAACGAAGTGACGGCGACGACGGTCAAAAGAATCAAATACAGCCCGGATTACTTCAATTT
+TGGCAATGTTCAGCACGATAAAGACACGGTAAAAGATTTAGGCTTCGCCGGGTTCAAAGT
+CCTGTACCCCATTAACAGTAAAGATAAGAACGACGAAATCGTCAGTATGCTTGGCGCCAG
+CTATTTCCGCGTTATCGGCGCAGGCCAGGTGTATGGCTTATCTGCGCGCGGCCTGGCGAT
+TGATACCGCCTTACCATCTGGTGAAGAGTTTCCCCGCTTTCGCGAGTTCTGGATTGAGCG
+TCCAAAACCCACCGATAAGCGTTTGACCGTCTATGCATTACTGGATTCTCCGCGCGCGAC
+CGGCGCTTACCGTTTTGTGATCATTCCTGGCCGCGATACCGTGGTGGACGTGCAGTCAAA
+AGTCTATCTGCGCGATAAGGTGGGCAAGCTGGGCGTTGCGCCATTAACCAGTATGTTCCT
+GTTTGGGCCAAACCAGCCGTCGCCGACGACCAACTATCGTCCGGAATTGCATGACTCGAA
+CGGCTTATCCATTCATGCGGGTAATGGCGAGTGGATTTGGCGTCCGCTGAACAATCCAAA
+ACACCTCGCTGTGAGCAGCTATGCGATGGAAAACCCTCAGGGATTCGGCCTGTTGCAGCG
+TGGTCGCGAGTTCTCGCGCTTTGAAGATTTAGACGATCGCTATGACCTGCGTCCAAGCGC
+CTGGATTACCCCGAAAGGCGACTGGGGCAAAGGTAAGGTTGAACTGGTTGAAATTCCGAC
+CAATGATGAAACCAACGATAACATCGTCGCTTACTGGACTCCGGATCAACTGCCGGAACC
+GGGTAAAGAGATGAACTTCAAGTACACTCTGACCTTCAGCCGCGATGAAGATAAACTTCA
+TGCGCCGGATAATGCCTGGGTGCTGCAAACACGCCGCTCAACGGGCGACGTTAAACAGTC
+GAATCTGATTCGCCAGCCCGACGGCACTATTGCCTTTGTGGTGGATTTCGTTGGCGCCGA
+CATGAAAAAACTGCCGCCGGATACGCCCGTCGCTGCACAAACCAGCATTGGCGATAACGG
+TGAAATCGTTGACAGTAATGTACGCTATAACCCAGTCACTAAAGGCTGGCGTTTAATGCT
+GCGCGTGAAAGTCAAAGACGCGAAGAAAACCACGGAAATGCGTGCCGCATTGGTGAATGC
+CGATCAGACGCTAAGTGAAACCTGGAGCTACCAGTTACCTGCCAATGAATAAAACAACTG
+AGTATATTGACGCACTGCTGCTTTCTGAACGTGAGAAAGCGGCATTGCCGAAAACTGACA
+TCCGCGCCGTGCATCAGGCGCTGGATGCCGAGCATCGGACTTACTCGCGAGAAGACGATT
+CACCGCAGGGTTCCGTAAAAGCCCGCCTTGAACACGCCTGGCCGGATTCATTGGCGAAGG
+GGCAGTTAATTAAAGATGATGAAGGGCGCGATCAGTTGCAGGCTATGCCAAAAGCGACGC
+GCTCTTCGATGTTTCCTGATCCCTGGCGAACCAACCCGGTTGGCCGTTTCTGGGATCGCC
+TGCGTGGGCGGGATGTTACGCCGCGCTATGTTTCTCGTCTGACAAAAGAAGAGCAGGCGA
+GTGAGCAAAAATGGCGTACCGTCGGCACTATACGCCGCTATATTTTGTTAATTTTGACTC
+TGGCGCAAACCGTCGTTGCGACCTGGTATATGAAGACCATTCTGCCCTATCAGGGATGGG
+CGCTCATCAATCCTATTGATATGGTGGGGCAGGATATTTGGGTCTCCTTTATGCAGCTCC
+TGCCCTACATGCTGCATACCGGTATCCTGATTTTGTTTGCCGTGCTGTTCTGCTGGGTGT
+CTGCCGGATTCTGGACTGCGCTGATGGGCTTCCTGCAACTGCTTATCGGGCGCGATAAGT
+ACAGTATCTCCGCGTCTACGGTTGGCGATGAGCCCCTCAATCCGGAACACCAGACGGCGC
+TGATCATGCCTATCTGTAATGAAGACGTTAGCCGCGTTTTCGCCGGTCTGCGCGCGACCT
+GGGAGTCCGTTAAAGCTACAGGCAACGCCGCGCATTTTGACGTCTATATCCTTAGCGATA
+GTTATAACCCGGATATTTGCGTGGCGGAGCAAAAGGCGTGGATGGAGCTCATCGCGGAAG
+TGCAGGGCGAAGGCCATATTTTTTACCGTCGCCGCCGCCGCCGTATGAAACGCAAAAGCG
+GCAATATTGACGATTTTTGCCGCCGCTGGGGCAATCAGTACAGCTATATGGTGGTGCTGG
+ACGCGGACTCAGTGATTAGCGGCGAGTGTCTGAGCGGGCTGGTGCGCCTGATGGAAGCGA
+ACCCTAACGCCGGGATTATCCAGTCTTCGCCGAAAGCGTCGGGGATGGATACTCTGTATG
+CCCGCTGCCAACAGTTTGCGACCCGTGTTTATGGACCGCTGTTTACCGCCGGGCTGCACT
+TCTGGCAGTTGGGGGATTCGCACTACTGGGGGCACAATGCCATTATCCGCGTGAAGCCGT
+TTATCGAGCACTGCGCTCTGGCGCCGCTGCCGGGAGAAGGTTCGTTCGCCGGATCGATTC
+TTTCCCACGACTTTGTTGAGGCGGCGCTAATGCGTCGGGCAGGGTGGGGCGTCTGGATTG
+CCTACGATCTCCCCGGTTCCTATGAAGAGCTGCCGCCAAACCTGCTGGATGAGCTTAAAC
+GCGACCGCCGCTGGTGTCACGGCAACCTGATGAACTTTCGTCTGTTCCTGGTGAAAGGAA
+TGCACCCGGTGCATCGTGCCGTGTTCCTGACCGGGGTAATGTCATACCTGTCCGCGCCGT
+TATGGTTTATGTTCCTTGCGCTTTCTACCGCGCTGCAGGTCGTTCATGCGTTAACAGAGC
+CGCAATATTTCCTTCATCCGCGCCAGCTTTTTCCGGTCTGGCCGCAGTGGCGTCCGGAAC
+TGGCAATCGCGCTGTTTGCGTCAACGATGGTGCTGCTGTTCCTGCCGAAGCTGCTCAGTA
+TTATGCTGATCTGGTGTAAAGGCACCAAAGAGTATGGCGGTTTCTGGCGCGTTACGCTGT
+CGCTATTGCTGGAAGTTCTGTTCTCCGTGTTGCTGGCGCCGGTGCGTATGCTGTTTCATA
+CCGTGTTTGTGGTCAGTGCGTTCCTCGGCTGGGAAGTGGTCTGGAACTCACCGCAACGCG
+ACGATGATTCTACGCCTTGGGGAGAAGCCTTTATGCGTCACGGCTCTCAACTGCTGCTGG
+GGCTGGTCTGGGCGGTTGGTATGGCGTGGCTGGATTTACGCTTTCTGTTCTGGCTGGCGC
+CGATTGTCTTTTCGCTTATTCTGTCGCCATTTGTTTCGGTGATCTCCAGTCGTTCAACGG
+TAGGATTACGCACCAATCGCTGGAAGCTGTTCCTGATCCCGGAAGAGTATTCGCCGCCTC
+AGGTGTTGGTCGATACTGATAAATATCTGGAGATGAATCGCCGCCGTATTCTGGACGATG
+GCTTTATGCATGCGGTTTTTAACCCGTCGCTTAATGCGCTGGCGACCGCGATGGCCACCG
+CGCGTCACCGCGCCAGTAAGGTGCTGGAAATAGCCCGCGATCGTCATGTGGAGCAGGCGC
+TAAACGAAACGCCGGATAAACTGAACCGCGATCGGCGTCTGGTTTTGCTCAGCGATCCGG
+TGACGATGGCGCGTTTTCACTATCGGGTCTGGAATGCGCCAGAGAGATACTCTTCCTGGG
+TAAACCATTATCAGTCTCTCGTCCTGAATCCGCAGGCGTTGCAGGGACGAACATCGTCAG
+CGGGATAAGGTCTTCATGTCTGGAGTGAGGTGAAAAATACCGGCGTGATGCCGGTATTTT
+TATAGTGAAATGAGGTTTCAGGTGCGTATATTCGCGGTGAGCATAATGGTGATTACCCTG
+AGCGGCTGCGGCAGTATTATCAGCAGAACGATCCCCGGACAAGGACACGGCAACCAGTAT
+TACCCTGGCGTGCAGTTGGATATGCGTGATTCCGCATGGCGCTATATCACTATCCTCGAT
+CTGCCCTTCTCACTGATCTTCGATACACTGCTACTGCCGCTCGATATTCACCACGGGCCT
+TATGAGTAATTAACGCTCATCCCATTCGTCTGCCGCAGTACGGCCTTCCTCGGTATCAAG
+CGGGGGTTCAAGCTGGTATTCCCCTTCATCCCATTCATGCAAAGTATTCTCTTCCTGCCA
+TTCCTGGCGGATCTCTTTCTCATCATAATCGCCGTCAAACACGCTCTGTGCGGCTTCACC
+ACTCAGCATCGGCAGATATTCGCCATCTTCACCTTCATCGGCGAAAAATTCGACCTGCCA
+CATGATGTCCCCATCCTGCAGTACATATTTCTGAACGTTGAACTGCTGCACATTCGCTTC
+GTCTTGTTCGAGGCCTTGATGGTCAGCCAGAAATTCTTCCCGGGCTGCATCGATAGCTTC
+TTCCAGCGTGGCATACTTGGTCATCAGTGTCTCCCTTTGATTTGACGAGGTATTTAGGGA
+AAGAATAGCTGATTCTTTGATATTGCAAGTATGAAAGCGCAAAAATCATTCTATTGCCAG
+TCTGCGCCGTCGTAAGTTGTTCCATGAATAGATAGCATTGAACAACACCACGCCTGCCGT
+GACGCAGAATACGGCGTGGAAGCCATAGCTCGCGGAAATTGCGGCACCCATGAGAGGGCC
+GGTGACGTTGCCGATATCGCGGAAAGATTGGTTGTAACTGAATATGCGCCCGGCTATCTG
+GTTGGTAGAGTTGTAATCCAGCAGAGTTTGAACGGCTGGCAGCAGCGCGCCATCCGCCGC
+GCCAAGCAGAAACCGCTATAGCGCGAGCTGCCACGGCGTTTGCACAAAAGACATTGGAAT
+CAGCAGCAGTACGGAATTAATCAGCGCGACGATAAGGATCTTTTCCGGGCCAATTCTGTC
+GCCGAGCTTGCCGAGCTGGGGGGCGCTAAGTAATGCCGCCACGCCGGGAACGGACGCTAT
+CATCCCGCTAATGAACTCAATATTACTGACATTTCCCGCGAGTTCGCGCACATAGAGCGT
+CAGGATGGGCGCAATATACCCCGTCGCCACCTGAATAATCAGCGTGGTGACAAACAGGCT
+TAACACCAGGCGGGGATTTTTTAACGAGGCGACCACTTCCCGAACGTGAAGCATCTCTTT
+TTTGCTCACCGGCAGGTAATTCTCGCGAATAAAAAAGAAGGTGAGCAGAAAACAGATAAA
+CAAAACGCTGGCGGTATTAAAAAAGACCGGGCGAAGGCCGTAATGGTCGGCGAGCAGACC
+GCCCGCAAGTGGGCCATGCAGAGCGCCGCTGACGCCGCCAGTAGAGAGCGTCCCTAACGC
+CCAGCCGCTTTTGTGGTGCGGCACCTGAGTGGCGATGAGCGCATTGGCGTTGGGAATAAA
+TCCGCCCAGCAGGCCTTATAACGCGCGGAGGATCAGAAATTGCCAGATATTTTGCGCCAT
+GCCCATTAACAGCATCTCAATGGCCATGCCGAGAGCGGAACGCAACAGTATGATCTTACG
+CCCTTTACGATCCGCCTGACCGCCCCAAAAAGGGGAAGCGATAGCTGAAAAGAGGAACGT
+AATACTAAAGACCAGCTCGGACCACATGTTGAGCGCGCTATGGCCTGTTACGCCGAGTTG
+CTCAACGTAGAGAGGGTGAAAAGGCATGACCAGACTGAATGCCGCGCCGGTTAAAAAACA
+GCCCAGCCAGGTAACGTTTAGATTGCGTTTCCAGTTTATGGGGACATCAGAGGGTGACAT
+AGTGTTCCACAGTATGTTGCGCGTTTTGCGCTATCATCAATTTAATTATGAGCGTACTAA
+TTATAATTATGCGCCGTCCTTACCAGCCTCGCAATGTGGGGAGCTTTTAAAGCTAAAAGA
+GGGGAAAAATTGCAGCTTGACGGCTGCAATCCTGTCAATAGCGCGACGGTACGCCTTCAG
+GGCGAGTTTTAAAGCGTCGATGCAGCCACATATACTGCTCTGGCGCCATCATAATGCACT
+GCTCAACAATCTTATTTATCCATGCGGCAGTCGCTTCTGCGCTCTCCAGCGGAGGCGAAT
+ATTCCGCAGGAAGGATTATCAGTTCGTAGCCTTTCCCGTCGGGTTTACGGCGAGGCACGG
+AGGGGATAATACAGGCTTTAGACATGCGCGCGAGCATCCAGGTACCGGAGGTCGTAGCGG
+CCTGGTCGACGGCGAATAACGGTACAAAGACGCTGGCGCGCGGGCCATAATCGTGATCCG
+GCGCATACCAGATCAATTCGCCGCTTTTCAATGCCTTAACCATACCTTTCAGATCTTTGC
+GATCCAACATCGATTTTTTGGAGCGTAAGCGCCCCCAGGTCTGTAACCAGTCGAGCAACG
+GATTATCATTCGGGCGTTAAACGCCGATACCCGGGTTATGCATACCAAACATGCGGGCGC
+CAAACTCAAGGGTAAGTAAATGTATTCCTACCAGAATGAACCCCAGCCCCTGCGCCTTAA
+CTTCACGGATATGCTCTAGACCGCTCGCTTCCATCCAGCGGTTCACTCGCCGATCGGGCC
+AGAACCAGGCCATGCCTGTTTCCATCACGCCCATACCGACGGATTCAAAGTTTTTGACCA
+CCATGGTGTGGCGTTCTTGCGCGCTCATTTCCGGAAAGCATAATTCAAGGTTGCGATAGG
+CGATTTTCGCGCGGCGTTTCATTACGCGTCGCGCCAGGTGGCCCAATGCGCAACCCAATT
+TATAGATGACCGGGTATGGGAGTTGCACGACCAACCAAAGCGCGCCTATACCCAACCAGG
+TTAACCAATAGCGCGGTTGCAGTAAGGCCACGGAGAACTTAGGCAACTTCGTCATTTCTA
+TCCTGTCTTTCAACGATCAATTCTCCGTATTCTCGCATCTTTTTGCGTTGAGCAAAAATA
+TGTAGCAGGAGAGTGGTGATTAAATCGACAATTGTTGTTAATTATTTAGCGTAAAGCAGG
+AAATGTAGCGCAAAATTTGTGGATGTAAATTGGCGAGACTTGCCTTATCATGCCTGCCCA
+CTTTATTTTTTGCTGATTGCAGGATACGTACACCATGCCAGTGTTACACAACCGCATCTC
+TAATGACGAGCTGAAATCCAAAATGCTGGCGGAAAGCGAGCCGCGTACGACAATTTCTTT
+TTATAAATATTTCACTTTCGCCTCGCCGCAACAGACGCGGGACGCGTTGTATCAGGTGTT
+TACGGCGTTGGACGTTTTTGGTCGCGTTTACCTGGCGCATGAGGGCATCAATGCGCAAAT
+CAGCGTGCCGCAAAGCTAGGTTGAGACCTTTCGTCAACAGCTTTATACGTTCGACCCCGC
+GCTGGACGGGGTGCGTTTAAATATCGCGCTGGAGGATGACGGAAAGTCATTTTGGGTGCT
+GCGTATGAAAGTTCGCGACCGTATCGTCGCTGACGGTATTGACGATCCGAGTTTTGACGC
+CAGTAATGTCGGCGATTATCTGAAGGCGGCAGATGTGAATGCGATGCTGGACGATCCTGA
+CGCGGTCTTTATTGATATGCGCAACCACTATGAGTATGAAGTCGGCCATTTCGAAAATGC
+TCTGGAAATCCCGGCGGATACGTTTCGTGAACAGTTGCCAAAAGCGGTTGAAATGCTGCG
+GGAACATGCAGATAAAAAGATAGTGATGTACTGTACCGGCGGTATTCGTTGTGAGAAAGC
+CAGCGCCTGGATGAAACACAACGGTTTCAATAAAGTCTGGCATATTGAGGGTGGCATCAT
+TGAGTACGCCCGTCGCGCGCGCGAGCAGGGGCTTCCCGTTCGCTTTATCGGCAAAAACTT
+TGTATTTGATGAGCGAATGGGCGAGCGCATCTCGGATGAGGTTATCGCGCATTGCCATCA
+GTGCGGCGTGTCCTGCGATAGCCATACCAACTGCAAAAACGACGGTTGCCATCTGCTGTT
+TATCCAGTGTCCGCAGTGCGCCAGTAAATTTAACGGCTGCTGTAGTGAACAATGCTGTGA
+AGAGTTGGCCTTGCCGGAGGAAGAACAGCGCCGACGTCGCGCGGGTCGTGAGAACGGCAA
+CAAAATTTTTAATAAATCGCGGGGTCGGCTTAATAGCAAACTGAGCATTCCCGATCCGGC
+TGAGTAATATTTTGCCGGATAGCGGCGTAAAGGCTGCTATCCGGCATTTCGCCAGGCGTT
+ACTTCTGCTGAACGCCTTCTACTGAGATGATAAGCTCCACCTCTTGTGAGGCTGGGCCGA
+GATCGGTAGTTATATTGAAATCTTTCAGCTTAATTTTTCCTTCGGCCTCAAAGCCCGCGC
+GCTTACCGCCCCACGGATCGTCGCCCTGGCCCATCAGCTTCGCTTCCAGCGTCACCGGTT
+TAGTCACGCCATTGAGCGTCAGATTGCCGGTAATATCCAGTTCATCGCCCTCTTTTTTCA
+CGCTGGTAGAGGTGAAGGTTGCCTGCGGGAATTTCGCAACATTAAGAAACTCCGCGCTAC
+GCAGGTGTTTGTCACGTTCGGCATGGTTAGTGTCGACGCTATTGGTGTTAATGGTCACAT
+TCACTTTGTCTGCTGACGGATTTTTTTCGTCAAAAGTGAACGTGCCGTCGAAATCTTTAA
+AGGTGCCGTATAGCCAGCTGTAGCCCAGATGCTGGATGCGGAAATTGACGAACGCATGTT
+GGCCTTCTTTATCAATTTTATACTCCGCCGCCACGGCGGAACCGGTCGTGAATAACAAGG
+ATGCGAGGGTGAATCCCAGCAGGTTTTTTTTCATTTTTGAGCTCCATAGTCAGATGACGA
+CATTCCTGTCATACGTTTCAGTGTGTCGTCTTTATCGATGAAATGGTGTTTTAGCGCCAT
+AACCCCATGCGAGAGCGAGATAATGACCAGCGACCAGGCAAACCACAGATGCAGTGTTCC
+GGCGATGTCAGCCTGCGCGCCCGCGTCCGTAAGCGTGGCCGGAATCTCAAACCAGCCAAA
+GACGCTAATCGGTTTACCGTCGGCGGTGGAAATCAGGTAGCCGCTAATGATTATCGCAAA
+GAGCAGGAGATACAGAAGGATATGACCCGCGGCGGCGCCAATGCGCGTTAAACGGGAATA
+GCTGGTCAACGCAACGGGCGGCGGAGAATAAAGCCGCCAGATAATACGCACGATCAGCGC
+CATCATCAGTAACATGCCAATACTTTTATGTATTTCCGGCGCCTAGTGATACCAGCCGTC
+GTAATAACTGAGCGTGACCATCCATAAACCCAACGCAAACATGCCATAGACCACTAGGGC
+GGTCAGCCAGTGGAGGGCGGCGGAAACTACGCCATAACGTTGTGGAGTATTTTTAAATTG
+CATAAACACACCAATGAATATTTCACGAGAGAATGAAAATGGCGTGGAAAAGCGCCGAAT
+GCAACTTATAAATAAGAATTTGAATGATATTTATTTTTATTTCAATAATTTTGATGTTGT
+TTGCGATTCAGCTTCAGAAGTTTCGAGATATTTCACTCCTGTAACCGCACAACGGCAGGA
+GTTGGCATTATTGCCGTGCTTCAACATTATGAAATAAAAAGTGAGGAACTTTCAGGAAGT
+GTTAGTTAACGTCAATGAAAAGCAATCAGAAGAAAAGGAGATAAACAATATCCATCACCG
+CCAGCAGCGACCAAAGAATAACGTAAAGCATGAAATGTTCGCGAATATTATTCATCAGAA
+AATGAAAGAGACGACGCATAGCTTACCTTAATAAACAGCCCCTTTACGGGGCCGACAAAT
+TATTGGCTAAAACGGGAAAGCCGGAACGGCGTCAGATCAAAGGAGGGCGTTTTTCCCAAC
+GCAAAATCCGCAGCGATTTCTCCTAACACCGGGGCGAATTTAAAACCATGTCCGCTGAGT
+CCAGTGATGACAAGCGTATTCTCATGGCCAGGCAGCGTATCGATAATAAAATCCTCGTCC
+GGCGAATTATCATAGGTACATGCCGCCCCATGTAAACAACCGCCGATACCCGGCAGTACG
+TTACGCAGGAAAGGAAATGCTTCCGCGCCATCGCTGGCAACGGCGGCAAAGGGCTTGCGC
+TCTTCCGGTGCCTGTATTCGCTGCCCGCCATTGTGTTTGCCGATTTTTAACTCGTCGTTC
+TCCGCCGGGAAACCGTAATAGTGATCGCCGTTGGGCATTTCGCCGGTAAAGGCCGGAAAG
+CGGTTTTTAGTGCTGTAACGTCCATCCGCCTTAAACCAGGCAAAAACTTTACGTACGGGC
+TGAACGGGCAGCTCCGGTACCAGCGTTTTGACCCAGGTGCCCGCGCTAATCAGCGCTTTG
+CTGGCGTGGTAGCAGCCTTCACTCGTCTCTATCGTCACACCGTTATCATCATGGTGAATA
+TGGCTTACCGGGCTGTTGAATAGCTGTGCGCAGCCTGCCTCTCGGGCCAGACGAAGCCAT
+GTGGTAATGGCTAATTCGCTGCGCAGGAAACCGGAGTCAGCTTCAAACAGCCCGATATAA
+TTATCGGGCACGCGAATTTCCGGCCAGCGCGTCATGAGGGCCGTCGCGTCCAGGCGCTCG
+ACGTTCAATTGCCACTGTTGCGCGCTTCGTGCGACGTTGGCTAAGAAAGCGGAATCGGCC
+GGGCCGAGGTTGACGACGCCGGAGCGGACAAAAATAGGCTCTTCATTGTGTGTGGAGAGC
+TCATCCCAAAGCGTCTGGGCGCGAAGCACCAGCGGGACATATTTTTCGCCTTCACCATAA
+GCGTGGCGGATAAGACGGGTATCGCCGTGGTGGCTGCCCTGTTGATAAGGCGGCATATGC
+GCATCGGTCATCAGGACCTTTAGCCCGGCGCGGGTGGCGTAATAACCAGCGGCGGCGCCA
+ACCGAACCGCTGCCGATAATAATAAGGTCGTATTTCATCAGCTTCTCTCTGCTATCGCGA
+TGATTTCAAGGTAAATAACTGCGCTGAGATATACAAGCCAGAAATAAGTGAGGCACCTTA
+CGGTGCCTGAGAGAGGGGGAGCGCGTCACGCTAATGGTGACGATACTCGTTTTCCTGGTA
+ATCGCCTGATTCTATTTTGGCGATGCCGGCTTCTAATATTGAAATAAATTGCCTGGCTAC
+ATCTGTCGTTAACCAGAGCGTTTGACCAACTTCAGTCCCTTCCGGTTCCGGACGATTTGG
+GGTCTGGTAGTGTAAACGCAGCATCAGCGCATCATAGCTATCGACGGTGCTGATGTCCCA
+TCCTACAAGCGGATGGGTCTGAATGACTTCATTATTCTTTTCCATCATGCCCCCCTGGTA
+CGTGTTATAAGACAACGGTTCTCGAGGTTCAATGCGTGTTTTTCTTCTGAAGCAACTTCA
+GTATACCAATTAATAAGGCTATTCACTGCGTTTTTAAAGAGACCGGAGGATAAATTTCTC
+TTTTTAAGAATTATATGAACAATAAAGCGGCAGTTCATTCATATTTTTTTAGGATGTTGT
+GCAATTATTTTGACGGTCAGGCGAAATATTCATCAGTTGCGCAAATAAAAAAGCCGGGGC
+GACCCGGCAAACATACATCACTGCATATCATTTTTTATTCATTGATGAACCAGTCATCAG
+CGCTCTCCCAGGTCTCCTGGAGAATCTCGCTAATGCGTTCTTTATCCTCTTTTGACGCGC
+CAATGACGGACAAGTTGTTGGCGGTAGCGTAACGCACGGTGACGTTACCCAAATTCTCAG
+GAAAATGATGGCTAATACGGCGGGNNNNNNNNNNNNNNNNNNNNNNNNNNCCCGCCAGCG
+CACCAATCGCACCGGCAGGCAGAGGTGACGTTTTGGCTATAGTGACTTCAATACGCATAA
+TGGCCCCCTGTTGAATATACTGGATATATATACAGTTAAATCCAATATATAGCAACAGGT
+AAGCGCATTTTTTATTTTTTTACTGACCAGCGTACTGTTTCACCCGCTAAAAAAGGCACC
+AGCGAATCATCAGCCAGCGCGATATTTTNNGGTATCTGTTGTTCATCGCGAACCAGTTCC
+ACCCACCCCGTATTCACCGGCAGGCCATNNAATTGCGGGCCATTCAGTGAACAGAACGCT
+TCAAAGTGCGCCAGCGCGTTCATTTCCTNNAACACGGCGGCATAACTGCCAAGAGCGGAG
+GGGGCGTTGAAACAACCGGCGCAGCCGCNNCGGGTCTCTTTACGATGACGTGAATGCGGC
+GCTGAATCCGTCCCCAGGAAGGCGCGCGNNAAACCACTGGCGACCAGGTCGCGTAACGCC
+TGCTGGTGAATATTGCGTTTCAGAATCGNNAGACAGTACAGGTGAGGACGAATGCCGCCA
+ACCAGCATATCATTACGGTTAAACATTANNTGTTGAGGCGTAATGGTCGCCGCCAGGTTG
+TAGCTGCCGTCACGTACATACTGCGCGGNNTCTTTGGTTGTGATGTGTTCAAAGACCACT
+TTAAGCGCGGTCAGACGCTGGCGTAGCGNNTCCATTACGGTGTCGATAAAACGCGCTTCG
+CGATCGAAGATATCAACATCCGCATGGGNNACCTCACCGTGGACCAGCAATGGCATTCCG
+AGTTTTTCCATCCGCTCCAGTACCGGCANNATAGCGTCGACTGACGTTACGCCATGACTG
+GAGTTAGTGGTGGCATTGGCCGGGTAAANNTTGGCCGCAGTAAACACGCCTTCATGGAAA
+CCACGCTCCAGTTCATCGGCATCGAGCGNNTCCGTTAAATAGCAGGTCATTAACGGCGTG
+AAATCATGCCCGGCGGGCACCGCATCGANNATACGCTGGCGGTAGGCGATCGCTGCATCA
+ACGGTCGTAATGGGGGACGCCAGGTTCGNNATCACGATAGCGCGACCATAAATTTCGCTG
+GTATAGGGTACGACCGTTTTTAACATGTNNCCATCGCGAAGGTGAACGTGCCAGTCGTCC
+GGGCGGCGGATCTTTAAAACCTGGGATGNNGCAGTCATTAATAAGCTCCGGCTGAGGAAT
+AGTCTTTTTGCCGGAAACAAAGGATAAGNNGAAACGTTTTCGTTTGCACGTAAAAAAAGG
+GCGCGAGCGCGCCCTCCGAAATCAATTGNNGAAAGGAATAACGATTTCACCAGGTTTAAC
+TTCAATGCCTTTCGCGAGTTTCTTCGCTNNCGCTTCGCCTTTACTACTGTCCTCACGCAA
+CACGTAAGCGGGCCGCTGGTTAAAGTAGNNACGTAAAGACTGATTTAAATAGGGCAGGAG
+CGTTTGTAGCACTGATTGCATTTTCTCCNNCGTCACGGTGGCGTCTACTACTTCCATCTC
+CTGAAGATAGATGGCGCCTTTTTCTTTANNAAAGACCGGCAGGGCTTTTAGCTTGAGTTT
+CATCGTCGCTTTTTGACTGCCAAACAGGNNATTCATATCCAGCCTGGCATCGCCAGTAAG
+GGTGACTTTATTAGGCTCTTCCCGACCANNCTGGCTGGCAAGGTTAGTCAATACGATATG
+CGCGTCGGCAATGCCAGGCAGACCAATANNTTTTGAGAAATTATTCCGTTTTTCAAGCGC
+TTGATTGATTTCTTGTTCGCTAATGGTGNNTTGCGTAAGTTGATTACAACCCACTAACAG
+GCCGCTAACTACCAGCGCAGCGGCAAAANNAACTTTTTCATGGCGTTCCTTAGCATGTTG
+CCTGTGCCCTAATCTTGACACAAAGCAGNNTGTCGCGCCAGCGGACATGGCGCCACTAAA
+AAAAGCTGAAAAAGGCGGCAAGAAAGGGNNGCCGCCTGGCGGGCGTTAGATAGCCATTGA
+GGAGAGTAAATTAATTTGCGTCTGCTTANNCATATTATCGCGATAGTCCGCGACTTTTGT
+CGGCCAGTGAATACCGGCGACCAGCGTCNNATTACGCAGAAGCGGAAACAGATGAATATC
+ATCTTCCGATAATTCGCCGTTAACGGCGNNAGGCTGTACGATGAGTTTATCCAGCAAACG
+TAAATCATCGCCGATCTTTTTAATCAGTNNGGCAGAGTGCGCAAGATGGTTGTCAAAACT
+GCCAGATGAGGCCTCTTTTTTGCGGATANNATACTGGCGCGCCGCAGGGGTAGAAAATTC
+ATCGAAAGCGGATTTTGCAAATCGCGGCNNCAGTAGCTGATTAACGTAACCGTTAACTTT
+GCGCAACCACTCTTCAATGGCTGGATTANNTTTCCCGGTTAACAGCGGTTTGCCGTCGAG
+GTTGTCGACATAATGTACAATATCCATANNTTCAGGAAGGTAGCGACTATCATCTTTTTG
+CAGGATGGGCACCATCTTTTGACCAATCNNCCGGGTGGGCGTCGCCTCGTCGTCATTTTG
+TAACACGTTAAGTTCAACGGGGATGTTCNNCAGGCCGAAAATCATGCGGGCTTTAACGCA
+GAAAGGGCAATGATCGTAAATATAAAGCNNCACGTTTCTCCTCCATTTGACTGTCGGTTC
+CTGACCAGTATGGAGGAGATAGCGACAGNNATCAAATCAGGCGCCGGGTTCCAGCATCCG
+GCGCGGCGTGCGCTTATGACTAAATTGCNNGCCTAAAGCCAAAAAGGTGATAAAGCCGAT
+AATACCGAGCATCATCCACGGTAGTTCANNCTGCGCAAGCGCTTTACCCATATCAAACAA
+CCAGCCGCCGCCGATATAACTAATCGCGNNGCCAATGGCTAATCCCAGACGGCTAAAGCC
+CATATAGCTTCCCCGCGCCCTCGCGTCCNNGGGCGACGCGCTGAGCGTTTCGCGCGCCGG
+TTCGGCGATAACCGAGCCGATGTAGAAANNGCAAATAAGCGTAAAAAGCTGCTGTAAATT
+GCCCACCATCCCGATGGGGAGCATGCTCNNCGACATGACGAGCAAACCGGCCATCAGCCG
+ATGCTCCAGCCGAAAACGCTTTTCGCTCNNGCGGGCAATCGGGTAGAGCAACGTCAGCGA
+GAGACACGCCTCAATAGCGTACATCCATNNCACGGCAGCAGGCGAACCGGCGATATCGTT
+TACCATAATCGGCAGCATTAACATGACCNNTACCGCCAGCATATAGTAGCCCGCCAGCGT
+CAGCACGTAGGTGACAAACCTTTTATTGNNCATGACGCGGCGCATTCCTTCACGCACCGG
+CGTTCTGGCCGTTGATAGCTTCCAGGCCNNAAGCAGCCATGCGTTGAAAAGGGCGCATAA
+TATGAACAAAATAGCGCCCGTCGCGCAGNNCAGGCGAAAATCGTATTGTAGCAACCAGCT
+TCCCAGCAGCGCGCCAATCACCGCGCCCNNGCTGTCCTGCATCATCAACAGAGAGAAGAA
+GCGGCCCCGTTGCTCCGGACGAATTAATNNGACCACCAGCGCTGAACGCGGCGGGTCGAA
+AAGCGTACCGCCGAGACCGGAAAGAAAGNNGGAAAACCACAAGAGCCAGGGCTCATGCGC
+GATACCCATGGTGGCAAAGCCTGCGGCGNNCATCAGCATACCGGTGACAATCATCGGTTT
+CGCGCCAAAGCGATCGGCGATGGCGCCGNNAAAAATGCCCAGACCTTGTTGAATAAACTG
+ACGCAGGCCGAGCGCGATCCCTACCATTNNGGCAGCCCACCCCATTTGATCGACAAAGCG
+AATAGAGATGAGCGGGAAGACGACGAAANNACCCAGCACCACTAACATGTTATCGATGAG
+AAGAAAATATTTACCCAGGTTCCTCGCCNNCGAGACGCGCGACATTTCCCCTCCCGGGAA
+ATAAAAGATGAGCGTCTTCTATTCTGCGNNGGCGTTTCGTTTTTTCCTACCGTTAGCGGG
+ACAATATTTTTTTATCAAAAGTCCTTTTNNATCGAGAGTTTTCATCAAAATGTGGCAGCA
+ATTCAAAAAATGACGATTTGCGCTTTTCNNAGGGCCTGGTTGCGCAGGTATAGTAATGTT
+ACTGGCGTGCTGAAGACGTTACAGGAAGNNGTAGGTATAGAATGTTTGGCTATCGCAGTA
+ACGTGCCAAAAGTGCGCTTAACCACCGANNGTCTGGTGGTACGTTTAGTGCATGAGCGTG
+ATGCCTGGCGTCTGGCCGATTATTACGCNNAAAATCGTCATTTTTTAAAACCCTGGGAAC
+CGGTCCGTGATGAAAGTCATTGTTATCCNNCAGGATGGCAGGCGCGTCTGGGAATGATCG
+GTGAATTTCACAAACAGGGCTCCGCCTTNNATTTCGCGCTACTTGATCCGGAAGAAAAAG
+AAATTATCGGCGTGGCGAATTTTTCCAANNTGGTGCGCGGTTCTTTTCATGCCTGTTATC
+TGGGCTATTCCATTGCGCAAGAGTGGCANNGGCAAGGGCTGATGTTTGAAGCCTTAACCG
+CTGCGATTCGCTATATGCAGCGCACTCANNATATCCACCGTATCATGGCGAACTATATGC
+CGCACAACAAACGTAGCGGCGCGTTGCTNNCGCGGCTTGGCTTTGAAAAAGAAGGCTATG
+CGAAAGATTACCTGTTGATTGATGGACANNGGCGCGACCATGTCCTGACGGCGTTAACCA
+CGCCGTTATGGACGCCGGGGCGTTGAGCNNCTTACGGAATGAGAGGCAAAGGGAGAAAAC
+GATGAAATATGAATTAACCGCCACTGAANNGCGAGTGATTGGCTGTCTGCTGGAAAAGCA
+GGTGACAACGCCGGAACAGTATCCGCTTNNCGTCAACGGGGTGGTGACAGCCTGTAATCA
+GAAAACCAACCGTGAACCGGTGATGAACNNGACGGAACAAGAGGTACAAGAACAGCTCGA
+TAACCTGGTGAAACGCCACTTTTTGCGTNNGGTCAGCGGGTTTGGCAACCGCGTCACCAA
+ATATGAACAGCGCTTCTGTAATTCCGAANNTGGCGATCTGAAACTTAGCGCGGCGGAAGT
+GGCGCTCGTCACTACGTTGCTGCTGCGCNNCGCGCAAACGCCCGGCGAGTTGCGTAGCCG
+GGCGTCGCGGATGCATGAATTCAGCGATNNGGCGGAAGTTGAATCCACGCTGGAACGGCT
+TGCCAGTCGTGAGGACGGCCCGTATGTCNNCCGTCTGGCGCGTGAACCGGGTAAGCGCGA
+AAGCCGCTATATGCACCTTTTTTGCGGCNNCGTCGATGAACTGTCTCTCCAGACGTCTGC
+GCCGGAAAGTGCGTCGGGCGATCTTCAGNNGCGCGTCGAAGCGCTGGAAAGCGAAGTGGC
+GGAGTTAAAGCAGCGGCTGGATTCTTTGNNAGCTCACCTGGGAGAGTAATGTGAGAACAT
+TACGGATTGGCATTGTCGTGTTAGGTGGNNTTGCGCAGAAGGCCTGGCTGCCGGTATTAA
+CCAACACCGCCGGATGGACGTTACAGGGNNCCTGGTCTCCTTCGCGGGATAAAGCCTTAC
+GTATTTGCGAAAGCTGGCGCATACCGTANNTGGATTCGCTGGCGAATTTAGCGTCCGGCT
+GCGATGCGGTCTTCGTCCACTCCAGTACNNCAAGCCATTATGCCGTGGTCAGCGAACTTC
+TCAACGCTGGCGTCCATGTCTGCGTGGANNAACCGCTGGCGGAAAATCTACGTGATGCCG
+AACGGCTGGTGGCGCTGGCGGCGCAAAANNAATTGACGCTGATGGTTGGCTTTAATCGCC
+GTTTCGCGCCGCTGTACCGCGAACTGAANNCGCGCCTCGGCACTGCGGCGTCACTGCGTA
+TGGATAAACATCGTACCGATAGCATCGGNNCGCATGACTTACGTTTTACTTTGCTCGATG
+ACTATCTGCATGTCGTGGATACCGTTCTNNGGCTGGCGGGCGGCGAGGCGCGCCTTGCCA
+GCGGCACGTTGCTCACCAGCGAGTCCGGNNAAATGTGCTATGCGGAACATCATTTTTCCG
+CCGACAAATTACAAATTACCACCAGTATNNACCGGCGCGCCGGAAGTCAGCGTGAATCGG
+TCCAGGCCGTCACCGATGGCGGGCTGTANNACGTGACGGATATGCGTGAATGGCGCGAAG
+AGCGCGGGCAGGGTATTCTCATCAAACCNNTTCCGGGTTGGCAAACAACGCTTGAGCAGC
+GTGGTTTTGTCGGATGCGCGCGGCATTTNNTTGACTGCGTACAAAATCAGACGGTTCCGG
+AAACGGCGGGGGAGCAGGCGATTTTGGCNNAGCGCGTCGTGGAGGCGCTGTGGCGGGACG
+CCATCAGCGAATAATCCTCTGTAACATCNNGCGGTAGTAATTCATCGTAATCCAGGTACT
+ATACCCTCAATAATTCGAGTTGCAGAAANNCTAACGCACATGCAGCTCGAAGTATGGCGG
+GTATATATGCCCACTCTACATAATACCTNNTTTCACAGTGAATAATGGCAAACCGTGGGG
+AGTCTGTAAGGCCTGATAAGACGTTTTGNNGTCGCCGTCAGGCGCAGTGATACAGCCGGA
+TACGGCAGAAGTTGCGTTATCCGGTCGANNGATCAGCCATGCAGGAGTTTTACGCCAGGG
+TCTGGAATACAAAAGAAATGAATTTATTNNAATCGCTGGCTGCCGTCAGCTCGATGACTA
+TGTTTTCACGCGTGTTGGGCTTTGCCCGNNATGCGATTGTCGCCAGAATTTTTGGCGCAG
+GGATGGCGACCGACGCCTTTTTTGTGGCNNTTAAACTTCCCAATCTACTACGCCGGATCT
+TTGCCGAAGGCGCTTTTTCTCAGGCCTTNNTGCCTATCCTGGCGGAATATAAGAGCAAGC
+AGGGTGAAGAAGCGACGCGGATCTTTGTNNCTTACGTTTCCGGCCTGTTGACGCTGGCAT
+TGGCCGTCGTGACGGTGGCCGGTATGCTNNCCGCCCCGTGGGTGATTATGGTAACCGCGC
+CGGGTTTTGCCGATACTGCGGATAAATTNNCGCTGACGACGCAACTGCTGCGGATTACGT
+TTCCCTATATTCTGCTGATCTCGCTGGCNNCACTGGTTGGCGCCATTCTCAACACCTGGA
+ATCGCTTCTCTATTCCCGCTTTTGCGCCNNCATTTCTTAATATCAGTATGATCGGTTTTG
+CATTATTCGCCGCGCCATACTTTAATCCNNCGGTGCTGGCGTTAGCCTGGGCAGTCACCG
+TCGGCGGCGTGCTGCAACTGGTGTATCANNTTCCGTATTTGAAAAAGATCGGTATGCTGG
+TGCTGCCGCGCATTAACTTTCACGACACNNGGGCGATGCGGGTGGTCAAACAGATGGGGC
+CGGCGATTTTGGGCGTTTCCGTCAGTCANNTCTCCCTTATCATCAATACCATTTTCGCCT
+CGTTTCTGGCCTCCGGCTCGGTCTCATGNNTGTACTATGCCGATCGGTTGATGGAGTTCC
+CGTCCGGCGTGCTGGGCGTGGCGTTGGGNNCCATCCTGTTGCCGTCATTGTCGAAAAGCT
+TTGCCAGCGGCAATCATGATGAGTACTGNNGCCTGATGGACTGGGGGCTGCGTTTGTGCT
+TTTTACTGGCGTTGCCGAGCGCGGTAGCNNTAGGCATTCTGGCGAAGCCGCTGACGGTCT
+CGCTGTTTCAGTACGGTAAATTCACCGCNNTTGATGCGGCGATGACGCAGCGGGCGTTAA
+TCGCCTATTCGGTGGGGCTGATTGGCTTNNTCGTCGTAAAAGTGCTGGCCCCGGGCTTCT
+ATTCTCGCCAGGATATTAAAACGCCGGTNNAAATCGCCATCGTGACGTTAATCATGACGC
+AGTTAATGAACCTGGCGTTTATTGGACCNNTGAAACACGCCGGGCTGTCGCTCTCTATTG
+GTCTGGCGGCATGTCTCAATGCGTCGCTNNTGTACTGGCAACTGCGCAAACAGAATATCT
+TTACGCCACAACCGGGGTGGATGTGGTTNNTGATGCGTCTGATCATTTCCGTACTGGTAA
+TGGCCGCCGTGTTGTTCGGCGTGTTGCANNTTATGCCGGAGTGGTCGCAAGGGTCGATGC
+TATGGCGTTTGCTGCGTTTGATGGCGGTNNTGATCGCGGGTATCGCGGCCTATTTCGCCG
+CGCTTGCCGTGCTGGGCTTTAAAGTGAANNAGTTTGTTCGCCGGACGGCGTAAATTCAGT
+GCCTGATAGCGCTGTGCTATCAGGCCTANNAGGCATTCAGGCCGGAAAGGCGCAACGTCG
+CCATCCGGCAATGATTAGATAGATATTTNNTTACCGCCGCGGTGAGAGACGGAAGTCTGA
+CCGTCAGCCCCGTACAAGGTCGGCTCCTNNTGAGGTTTCAGCACCTCCAGCGCCTGTTGA
+TTACGCTCGATTTGCCCTTCCAGCAGCCNNCCGTTGTGCTGGTTGAGGTCGCGCAGATGC
+TGCGTTTTTTCGGTAATCGCCTGCCAGCNNTCTGCAATGTCATCGTTTGCGCTACGCTGC
+GCGTTCTGCTCCAGACGGCGCTGTTGTTNNAGATAATCCAGCGTCGCCAGCAACGAGCTT
+TTTTCTTCTGTAATACGCTGTAGCTGGCNNCCGTTAATCTGGCCTACGGAAAGCTGTTGT
+TGCTCGGCGTCCATCACCGTCTTCAGGTNNTTCAGGACGGTGGTCATCTGGTCAAGTATT
+TCTGACAAACGAGTCATACGCTTATTTANNCTGTAAGTAGCTCTGCGCCTCGCGAATGAG
+CGAGTCTGCTATTTTTCCCGTATCCATTNNTAACTCACCGTTACGGATAGCCGTTTTTAA
+TGCTTCGACGCGTTCCATATTAATGTCGNNGACGCCTGGCTGCATAAGCTTCGCTTGCGC
+GTCGCTTAACGTTACGCTGGCGCTCGTCNNGGCGGACGTTTTTTCCTGACGCGTTTTTTG
+TACCGGCGTGTCGCTGGTTTCGCGCGTCNNGACAGTGCTAACGGGTTTCAAAGGTGAGGT
+ACGGTCAATGCTCATTTATTTATCCTCANNGAGGGTTACGTTGTAGCGGCCAGCTACCAT
+CATGGTTGAATATCTCATCGGCAGCCGCNNCAAAATCTTTACACAATTATAGGTTAATAA
+GAATATTCCCATCAGAATCGACGGTTCCNNTCACGATTTGACCCGATGTCATGCGCACGC
+GCGCATTTTGCGCGACGGCGGCATTATTNNTCGCCTGACCTTCCGCATTGACGCTAAACC
+CCTCGCCATTGGCGATGACCTGTACTCGNNGACCCGCTTTGACACGCCAGGCCTGACGTA
+TCATCGTAAGCTGTACCGGCTGCCCGGGNNCGAGATCGCGCAAACTGACGGCATCCTGAA
+TCTGACGGATATCCAGTACCGTCCGCGGNNGTAACTGATCCAGTCTGCCACGTTTTAGCG
+TGACGTTGGCCGGCGTCAATTTTCCGCCNNGCGCGACGGGCGCGGCTACGGCGACATAAT
+TGCCGGTCGCTTGCACATTCACCTGCAANNAACGTTTTTCATTGGCGCAGCGCGCCACCA
+CATTGACGTTGCCCCACAGCTTCGCGCTNNCCGTCATGCTGAAGGCTGGCTGCTCGCAGC
+TCGGTAGCAGATTGGGCGGTGAACGGAGNNTGACAACCACCTCGTCGCTGAAGCCAGCCA
+GACGCTGGGAAAACCACGTGGTCAGCTGNNCGTTGATGTCCTGCGCCATTGTCAGGGGGC
+TGAACAGCAAAGCCGCCACGGCGAATCCNNGTTTTAACGTTTGCATGGTACTTCCCCCTG
+GTTGATGTCATGACAGGATTCTACCCGTNNGAAGCAAGCATCAACGCAATAAATAGCGAC
+GCATTTTGCGTTTATTCCGGCGATAACGNNCGCGTGAAGGCATTTAAGCTGTCGGCTGAA
+TTTTGCCATTTGCGGAGGAGATATGCTCNNCAGGCTCGATGCCGCCTTACGATTTCAGCA
+GGAAGCGCTAAATCTGCGCGCGCAACGTNNGGAAATATTAGCGGCGAATATCGCCAATGC
+CGATACGCCGGGGTATCAGGCGCGCGATNNTGATTTTGCCAGTGAGTTAAAAAAAGTGAT
+GGTGCGCGGACGGGAAGAAACCGGCGGCNNCGCGTTGTCGTTGACTTCTTCTCGCCATAT
+TCCCGCCCAGGCGGTCTCTTCTCCCGCANNGGATCTGCTTTACCGCGTACCCGATCAGCC
+TTCTTTGGATGGTAACACCGTAGATATGNNCAGGGAACGTACGCAGTTTGCGGATAACAG
+TCTCAAATATCAGATGGGGCTTACCGTTNNGGGTAGCCAACTCAAAGGCATGATGAATGT
+GCTACAGGGAGGAAACTAATTCGTGGCGNNGTTAAACATTTTTGATATTGCCGGATCGGC
+GCTTGCCGCACAGTCCAAGCGGTTGAACNNTGCGGCCAGTAACCTTGCGAATGCGGATAG
+CGTCACCGGCCCGGACGGACAGCCTTATNNCGCCAAACAGGTGGTTTTTCAGGTGGACGC
+CGCGCCGGGTCAAGCCACTGGCGGGGTANNGGTCGCCAGCGTGATTGAAAGTCAGGCACC
+GGAAAAGCTGGTTTATGAGCCAGGCAATNNGCTGGCGGACGCTAATGGTTACGTCAAAAT
+GCCCAACGTCGATGTGGTCGGCGAAATGNNCAACACGATGTCAGCCTCGCGCAGCTATCA
+GGCAAATATCGAAGTCCTGAATACCGTANNAAGCATGATGCTTAAAACGCTGACATTAGG
+CCAGTAAAGGAGGCGCGTATGTCTATTGCCGTAAATATGAATGACCCGACCAACACGGGC
+GTCAAAACGACGACCGGCAGCGGGTCGATGACCGGAAGCAACGCTGCCGATCTGCAAAGC
+AGTTTCCTGACCTTACTGGTCGCGCAATTGAAGAACCAGGACCCGACTAACCCATTACAA
+AATAATGAGTTAACGACACAGTTGGCGCAAATCAGTACCGTGAGCGGCATTGAAAAACTG
+AATACGACGCTGGGGGCTATTTCCGGGCAAATCGATAATAGTCAGTCCCTACAGGCGACC
+ACGCTGATTGGACATGGCGTTATGGTGCCTGGCACCACAATTCTGGCGGGTAAAGGCGCG
+GAAGAAGGGGCCGTGACGTCCACGACGCCGTTTGGCGTGGAATTGCAACAGCCTGCGGAC
+AAAGTGACGGCAACCATTACCGATAAAGATGGCCGGGTGGTACGGACGCTGGAGATCGGT
+GAGTTGCGAGCCGGGGTACACACCTTTACCTGGGATGGTAAGCAAACGGACGGAACAACG
+GTACCGAATGGTTCTTACAACATTGCGATTACCGCCAGCAATGGCGGGACGCAACTGGTG
+GCGCAGCCGCTGCAATTCGCTCTGGTACAGGGCGTGACGAAGGGCAGTAACGGCAACCTG
+TTGGATCTGGGTACCTACGGCACCACCACACTCGACGAAGTTCGGCAAATAATCTAAGCC
+CTTACACTTATCAGGAGTCAGTCATGTCTTTTTCTCAAGCGGTTAGCGGCCTGAACGCTG
+CGGCCACCAACCTTGATGTTATCGGTAATAACATCGCCAACTCCGCCACCTATGGCTTTA
+AGTCCGGTACGGCATCATTTGCCGATATGTTCGCCGGTTCCAAAGTGGGGTTGGGCGTAA
+AAGTGGCGGGGATTACCCAGGATTTTACCGACGGTACGACAACGAACACCGGGCGCGGGC
+TGGATGTCGCGATTAGCCAGAACGGTTTTTTCCGCCTGGTAGACAGCAACGGTTCCGTGT
+TCTATAGCCGCAACGGCCAGTTCAAACTGGACGAGAACCGTAACCTGGTCAATATGCAGG
+GGATGCAGTTGACCGGCTATCCGGCCACCGGTACGCCGCCGACCATTCAGCAGGGGGCGA
+ATCCTGCGCCGATCACCATTCCGAACACGCTGATGGCGGCGAAATCGACCACCACCGCGT
+CAATGCAGATCAACCTGAACTCAACGGACCCTGTACCGTCTAAAACGCCCTTTAGCGTGA
+GTGATGCGGATTCGTATAACAAAAAAGGCACCGTCACCGTTTATGACAGCCAGGGTAATG
+CCCATGACATGAACGTCTATTTTGTGAAAACCAAAGATAATGAATGGGCTGTGTACACCC
+ATGACAGCAGCGATCCTGCAGCCACTGCGCCAACAACGGCGTCCACTACGCTGAAATTCA
+ATGAAAACGGGATTCTGGAGTCTGGCGGTACGGTGAACATCACCACCGGTACGATTAATG
+GCGCGACAGCGGCCACCTTCTCCCTCAGCTTCCTTAACTCCATGCAGCAGAACACCGGGG
+CTAATAACATCGTCGCCACCAATCAAAACGGCTATAAGCCTGGCGACCTGGTGAGCTACC
+AGATTAACAATGATGGCACCGTGGTTGGCAACTACTCCAACGAGCAGGAGCAGGTGCTGG
+GGCAGATTGTGCTGGCTAACTTCGCCAACAACGAAGGTCTGGCATCCCAGGGCGATAACG
+TCTGGGCGGCGACGCAGGCCTCCGGGGTAGCGCTGCTGGGGACTGCCGGTTCCGGCAACT
+TCGGTAAGCTGACGAACGGCGCGCTGGAAGCCTCTAACGTGGATTTGAGTAAAGAGCTGG
+TGAATATGATCGTCGCGCAGCGTAACTACCAGTCGAATGCGCAGACCATCAAAACCCAGG
+ACCAGATCCTCAATACGCTGGTTAACCTGCGCTAAGCGCCTGACGGGATAGCTTAATGGA
+TCACGCAATTTATACCGCCATGGGGGCGGCCAGCCAGACGCTTAACCAGCAGGCGGTAAC
+GGCCAGCAACCTGGCTAATGCCTCAACGCCGGGCTTTCGCGCGCAGCTTAACGCGCTACG
+CGCGGTGCCCGTTGATGGCCTCTCTTTAGCGACGCGCACGTTGGTTACGGCGTCGACGCC
+GGGGGCGGATATGACCCAGGGTCAGTTGGACTACACTTCCCGCCCGCTGGATGTTGCGTT
+ACAGCAGGACGGCTGGCTGGTGGTGCAAGCGGCGGATGGCGCTGAAGGATATACCCGTAA
+CGGGAATATCCAGGTGGGCCCGACCGGGCAGTTAACCATTCAGGGACATCCGGTTATCGG
+CGAAGGCGGCCCGATTACCGTTCCGGAAGGGTCGGAAATCACCATTGCGGCAGACGGCAC
+GATCTCCGCGCTCAATCCCGGCGACCCGCCAAACACGGTGGCGCCCGTTGGGCGGCTGAA
+GCTGGTCAAAGCGGAAGGCAATGAGGTGCAGCGGAGCGATGACGGTTTATTCCGCCTTAC
+CGCCGAGGCACAGGCTGAACGCGGGGCGGTACTGGCCGCCGACCCGTCAATTCGCATTAT
+GTCGGGCGTGCTGGAGGGCAGTAACGTCAAGCCGGTTGAAGCCATGACCGACATGATCGC
+CAACGCACGTCGTTTTGAAATGCAGATGAAGGTTATCACCAGCGTAGATGAGAACGAAGG
+GCGAGCTAACCAACTGCTGTCGATGAGTTAATACAGGACATTTTATGATCAGTTCATTAT
+GGATCGCCAAAACCGGTCTGGACGCGCAGCAAACCAATATGGATGTGATTGCCAATAACC
+TGGCAAACGTCAGCACCAATGGTTTTAAGCGTCAGCGCGCGGTATTTGAAGATCTGTTGT
+ATCAGACCATCCGCCAGCCGGGCGCGCAGTCGTCCGAGCAGACGACGCTGCCTTCCGGGC
+TGCAAATCGGTACCGGCGTGCGTCCGGTCGCCACGGAGCGCCTGCACAGTCAGGGGAACC
+TGTCGCAGACCAACAACAGTAAAGATGTGGCGATTAAAGGGCAGGGCTTTTTCCAGGTCA
+TGCTGCCGGACGGTACGTCTGCCTATACCCGCGACGGCTCTTTCCAGGTGGATCAGAATG
+GTCAACTGGTGACGGCGGGCGGTTTTCAGGTGCAGCCGGCAATCACCATTCCGGCCAACG
+CGTTAAGCATCACGATTGGCCGCGACGGCGTGGTCAGCGTTACCCAGCAAGGGCAGGCCG
+CGCCGGTTCAGGTCGGGCAGCTTAACCTGACCACCTTTATGAACGACACCGGTCTGGAAA
+GCATCGGCGAGAACCTCTATATCGAAACGCAATCGTCCGGCGCGCCGAACGAAAGCACGC
+CGGGGCTCAACGGCGCGGGGTTGTTGTATCAAGGGTATGTCGAAACGTCGAACGTTAACG
+TGGCGGAAGAGCTGGTGAACATGATTCAGGTTCAACGCGCCTATGAAATTAACAGTAAAG
+CAGTATCGACGACCGATCAGATGCTGCAGAAACTGACGCAACTCTAAGGGGCCGCCGGTG
+GGGGATACGCCACCGGCTCCCTGATTTTGAAGATGAAGGTAATGCAAAAATACGCGCTTC
+ACGCTTACCCAGTTATGGCCCTGATGGTCGCGACGCTGACAGGATGCGCCTGGATACCCG
+CTAAACCGCTCGTGCAGGGGGCGACCACGGCGCAGCCGATACCTGGCCCGGTACCGGTGG
+CGAATGGCTCCATATTTCAGTCTGCGCAGCCGATTAATTATGGCTATCAGCCGCTTTTTG
+AAGATCGTCGACCGCGTAATATCGGCGATACGCTCACGATTGTGTTACAGGAAAACGTCA
+GCGCCAGTAAAAGCTCGTCGGCAAATGCCAGCCGCGACGGCAAAACCAGCTTTGGTTTTG
+ATACGGTACCGCGTTATCTGCAGGGATTATTCGGTAATTCCCGCGCGGATATGGAGGCCT
+CCGGCGGCAACTCTTTTAATGGTAAAGGCGGCGCGAATGCCAGCAATACCTTTAGCGGCA
+CGCTGACCGTGACCGTCGATCAGGTTCTGGCCAATGGCAATTTACACGTCGTGGGGGAAA
+AACAGATCGCGATTAATCAGGGAACGGAATTCATCCGCTTCTCCGGCGTGGTAAATCCAC
+GCACCATCAGCGGTAGCAACTCTGTTCCCTCGACACAGGTGGCGGATGCGCGGATTGAAT
+ATGTCGGGAACGGCTATATTAACGAAGCGCAAAATATGGGCTGGCTGCAACGTTTCTTCC
+TTAATTTGTCGCCGATGTAAGCGAGGTGTATGTGTTTAAAGCTCTTGCAGGAATCGTTCT
+GGCACTGGTTGCCACTCTGGCGCACGCCGAGCGTATCCGGGATCTGACCAGTGTCCAGGG
+AGTACGGGAAAACTCGCTGATCGGCTACGGGCTGGTGGTCGGGCTGGACGGTACGGGCGA
+CCAGACGACCCAGACGCCATTTACCACCCAGACGCTGAATAACATGCTGTCACAACTGGG
+GATTACGGTCCCCACCGGCACCAATATGCAGTTGAAAAACGTGGCGGCGGTGATGGTGAC
+GGCGTCGTATCCGCCTTTTGCGCGACAGGGACAAACGATCGATGTCGTCGTTTCCTCAAT
+GGGGAACGCTAAAAGTCTGCGTGGCGGGACGTTATTAATGACGCCGTTAAAAGGGGTGGA
+CAGCCAGGTGTATGCTCTGGCGCAGGGCAATATTCTGGTCGGCGGCGCGGGCGCTTCCGC
+AGGCGGCAGTAGCGTGCAGGTTAACCAGCTTAATGGCGGGCGCATCACTAATGGCGCGAT
+TATCGAACGCGAGTTGCCGACTCAGTTCGGCGCTGGCAACACCATTAATCTGCAATTGAA
+CGACGAAGATTTTACGATGGCGCAGCAAATTACCGACGCCATCAACCGCGCCCGCGGTTA
+CGGCAGCGCCACTGCGCTTGATGCGCGAACGGTACAGGTACGCGTGCCCAGCGGCAACAG
+CTCGCAGGTGCGTTTTCTGGCGGACATTCAAAATATGGAAGTCAACGTGACGCCGCAGGA
+TGCAAAAGTCGTGATCAACTCGCGTACCGGTTCGGTGGTCATGAATCGGGAAGTCACGCT
+GGATAGCTGCGCTGTGGCGCAGGGCAATTTGTCAGTGACAGTCAATCGCCAACTCAACGT
+CAACCAGCCGAATACGCCATTTGGCGGCGGGCAGACCGTGGTGACGCCACAGACTCAGAT
+AGATTTGCGTCAGAGCGGCGGATCGCTACAGAGCGTGCGTTCCAGCGCCAATCTGAACAG
+CGTAGTGCGCGCGCTGAATGCGCTTGGCGCGACGCCGATGGATCTGATGTCGATTTTGCA
+GTCCATGCAGAGCGCGGGCTGTCTACGCGCCAAACTGGAAATCATCTGATGATCGGAGAC
+GGTAAATTGCTGGCCAGCGCGGCCTGGGATGCGCAATCTCTGAACGAACTGAAAGCGAAA
+GCGGGCCAGGACCCGGCGGCGAATATCCGTCCTGTGGCCCGTCAGGTGGAAGGGATGTTT
+GTGCAGATGATGCTGAAAAGTATGCGCGAGGCTTTACCCAAAGATGGTTTATTCAGCAGC
+GATCAGACGCGTCTGTATACCAGCATGTATGACCAGCAGATCGCCCAGCAGATGACCGCC
+GGTAAGGGATTGGGGCTGGCGGATATGATGGTTAAACAGATGACGGGCGGGCAGACGATG
+CCTGCAGATGATGCGCCGCAAGTACCGCTTAAATTCTCCCTGGAGACGGTAAACAGCTAT
+CAAAATCAGGCGCTGACCCAACTGGTGCGCAAAGCCATACCGAAAACGCCGGACAGCAGC
+GATGCGCCGCTCTCCGGCGACAGTAAAGACTTTCTGGCCCGGCTTTCGCTCCCGGCGAGG
+CTGGCCAGCGAACAAAGCGGGGTGCCGCATCATCTGATTCTGGCGCAGGCGGCGCTGGAG
+TCCGGCTGGGGGCAGCGGCAAATCCTGCGGGAGAATGGCGAACCCAGCTATAACGTATTT
+GGCGTGAAAGCGACCGCCAGTTGGAAAGGGCCGGTGACGGAAATCACCACCACTGAATAC
+GAAAATGGCGAAGCGAAAAAAGTGAAAGCGAAATTCCGCGTCTATAGCTCGTATCTGGAG
+GCGTTATCGGATTATGTCGCGCTGTTAACGCGTAACCCACGCTACGCTGCCGTGACCACT
+GCCGCCACGGCAGAGCAGGGCGCAGTGGCTCTGCAAAACGCCGGATACGCCACTGACCCG
+AATTACGCGCGTAAATTGGCCAGCATGATTCAGCAGTTGAAAGCGATGAGTGAAAAGGTC
+AGCAAAACCTACAGCGCGAATCTCGACAATCTCTTTTAAATTGCTCAAGTCCACGTAGTC
+GCTGCCGATAACAACGAGTATTGAAGGATTAAAAGGAACCATCATGTCCAGCTTGATTAA
+TCACGCCATGAGCGGACTTAACGCCGCGCAGGCCGCGTTAAATACGGTCAGTAATAACAT
+CAACAATTATAACGTTGCGGGTTATACCCGGCAGACAACTATTCTGGCGCAGGCAAACAG
+TACGTTAGGGGCTGGCGGCTGGATAGGTAATGGCGTTTACGTTTCAGGCGTACAGCGCGA
+ATATGATGCGTTTATCACTAATCAGCTACGCGGCGCGCAAAACCAGAGCAGCGGCTTAAC
+CACGCGCTATGAACAAATGTCGAAAATCGACAACCTGCTGGCCGATAAATCCAGCTCACT
+GTCTGGCTCGCTGCAGAGTTTTTTTACCAGCCTGCAAACGTTAGTCAGTAATGCGGAAGA
+TCCTGCGGCGCGTCAGGCGCTGATTGGTAAAGCGGAAGGGCTGGTAAACCAGTTCAAAAC
+CACCGATCAGTATCTGCGCGATCAGGATAAACAGGTCAATATCGCGATTGGCTCCAGCGT
+GGCGCAAATCAACAATTACGCGAAGCAGATAGCTAACCTGAACGATCAAATCTCCCGTAT
+GACGGGCGTAGGCGCGGGCGCATCGCCGAACGACCTGCTCGATCAACGTGATCAGTTGGT
+TAGCGAGCTTAACAAGATCGTTGGCGTCGAGGTGAGTGTACAGGACGGCGGCACCTATAA
+CCTGACGATGGCCAATGGCTATACGCTGGTGCAGGGGTCGACGGCGCGTCAGTTGGCGGC
+GGTTCCCTCCAGCGCCGACCCGACGCGAACGACTGTCGCTTATGTCGATGAGGCCGCCGG
+TAACATCGAAATTCCGGAAAAGTTGCTGAACACCGGTTCGCTCGGCGGGCTACTGACGTT
+CCGTTCTCAGGATCTGGATCAGACTCGTAATACGCTGGGCCAGTTGGCGTTGGCGTTTGC
+CGATGCGTTTAACGCGCAGCATACCAAAGGTTATGACGCCGACGGCAATAAAGGGAAAGA
+CTTCTTTAGCATTGGCTCGCCGGTGGTATATAGCAACAGTAATAATGCCGATAAAACGGT
+ATCGCTAACCGCTAAGGTGGTCGACAGCACGAAGGTTCAGGCGACGGATTATAAGATTGT
+TTTTGACGGTACAGACTGGCAGGTTACTCGCACTGCGGATAACACCACCTTCACGGCAAC
+AAAAGATGCTGACGGAAAACTGGAGATTGACGGTCTGAAAGTGACGGTAGGGACTGGCGC
+ACAGAAAAACGACAGTTTTCTTCTCAAGCCGGTCAGCAATGCTATCGTCGACATGAACGT
+TAAAGTGACAAATGAAGCCGAGATTGCGATGGCGTCTGAGTCAAAACTCGATCCTGATGT
+GGATACCGGCGACAGCGATAACCGCAATGGTCAGGCATTGCTGGACTTACAAAACAGCAA
+TGTAGTGGGCGGCAACAAAACCTTTAACGATGCTTACGCCACGTTGGTCAGCGATGTGGG
+TAACAAAACGTCAACGCTGAAAACCAGCAGCACCACGCAGGCGAATGTGGTTAAACAGCT
+TTATAAACAGCAACAGTCGGTTTCCGGCGTTAACCTCGACGAAGAGTACGGCAATTTGCA
+GCGTTATCAGCAGTATTATCTGGCGAATGCGCAAGTATTGCAGACCGCGAATGCGCTGTT
+TGATGCGTTATTGAATATTCGCTAAAGGAGAAGGATGACATGCGTATCAGTACCCAGATG
+ATGTACGAACAAAATATGAGCGGCATCACTAATTCTCAGGCCGAATGGATGAAGCTGGGC
+GAGCAGATGTCTACCGGTAAGCGCGTTACCAACCCATCTGACGATCCGATCGCCGCGTCG
+CAGGCGGTAGTACTCTTTCAGGCGCAGGCGCAGAATAGCCAGTACGCCCTGGCGCGTACG
+TTTGCCACCCAAAAAGTGTCGCTGGAAGAAAGCGTACTCAGTCAGGTGACGACGGCGATT
+CAAACCGCGCAGGAAAAAATCGTCTATGCCGGAAACGGCACGTTAAGCGACGATGACCGC
+GCGTCGCTGGCGACGGATTTACAGGGGATCCGCGATCAGCTGATGAACCTGGCAAACAGC
+ACTGACGGCAATGGTCGCTATATCTTTGCCGGGTATAAAACGGAAGCGGCGCCATTCGAC
+CAGGCGACAGGTGGTTATCATGGCGGCGAGAAAAGTGTTACCCAGCAGGTGGATTCCGCA
+CGCACGATGGTAATTGGCCATACGGGAGCGCAAATTTTTAATAGCATCACCAGCAATGCG
+GTGCCGGAACCGGATGGCTCGGACTCCGAAAAGAATCTGTTTGTCATGCTCGATACGGCA
+ATTGCCGCGCTCAAGACCCCGGTGGAAGGCAATGACGTGGAAAAAGAAAAAGCCGCTGCC
+GCCATTGATAAAACCAATCGCGGCTTAAAAAATTCGCTTAATAACGTCCTGACCGTTCGT
+GCGGAACTGGGAACGCAACTGAGCGAACTCAGTACGCTGGATTCACTGGGAAGCGACCGT
+GCGCTGGGACAGAAGCTACAGATGAGCAACCTGGTAGATGTGGACTGGAACTCGGTCATT
+TCCTCCTACGTCATGCAACAGGCGGCATTACAGGCGTCCTATAAAACGTTTACCGACATG
+CAGGGAATGTCGCTTTTCCAGTTGAACCGGTAACGCCTCTTTTTGAAACATATCACGAAA
+CTGGATATGTTTTGTCTGCCCGCGCCATCCACCCCGGCGCGGGCATTTTTTGTCTATGGA
+AAACCCCCAGCTAGGCTGGGGGTTCCGGAAAGCTTTCAGCTTTAAGCCAGTTATTAAAAC
+CCCTTTTGATTTGTTAAAACATCTTGCGGTCTGGC
\ No newline at end of file
diff --git a/t/data/real_data_core_gene_alignment.aln b/t/data/real_data_core_gene_alignment.aln
new file mode 100644
index 0000000..e23823b
--- /dev/null
+++ b/t/data/real_data_core_gene_alignment.aln
@@ -0,0 +1,1950 @@
+>11111_1#11
+ATGCGTATCAGTACCCAGATGATGTACGAACAAAATATGAGCGGCATCACTAATTCTCAG
+GCCGAATGGATGAAGCTGGGCGAGCAGATGTCTACCGGTAAGCGCGTTACCAACCCATCT
+GACGATCCGATCGCCGCGTCGCAGGCGGTAGTACTCTTTCAGGCGCAGGCGCAGAATAGC
+CAGTACGCCCTGGCGCGTACGTTTGCCACCCAAAAAGTGTCGCTGGAAGAAAGCGTACTC
+AGTCAGGTGACGACGGCGATTCAAACCGCGCAGGAAAAAATCGTCTATGCCGGAAACGGC
+ACGTTAAGCGACGATGACCGCGCGTCGCTGGCGACGGATTTACAGGGGATCCGCGATCAG
+CTGATGAACCTGGCAAACAGCACTGACGGCAATGGTCGCTATATCTTTGCCGGGTATAAA
+ACGGAAGCGGCGCCATTCGACCAGGCGACAGGTGGTTATCATGGCGGCGAGAAAAGTGTT
+ACCCAGCAGGTGGATTCCGCACGCACGATGGTAATTGGCCATACGGGAGCGCAAATTTTT
+AATAGCATCACCAGCAATGCGGTGCCGGAACCGGATGGCTCGGACTCCGAAAAGAATCTG
+TTTGTCATGCTCGATACGGCAATTGCCGCGCTCAAGACCCCGGTGGAAGGCAATGACGTG
+GAAAAAGAAAAAGCCGCTGCCGCCATTGATAAAACCAATCGCGGCTTAAAAAATTCGCTT
+AATAACGTCCTGACCGTTCGTGCGGAACTGGGAACGCAACTGAGCGAACTCAGTACGCTG
+GATTCACTGGGAAGCGACCGTGCGCTGGGACAGAAGCTACAGATGAGCAACCTGGTAGAT
+GTGGACTGGAACTCGGTCATTTCCTCCTACGTCATGCAACAGGCGGCATTACAGGCGTCC
+TATAAAACGTTTACCGACATGCAGGGAATGTCGCTTTTCCAGTTGAACCGGATGTCCAGC
+TTGATTAATCACGCCATGAGCGGACTTAACGCCGCGCAGGCCGCGTTAAATACGGTCAGT
+AATAACATCAACAATTATAACGTTGCGGGTTATACCCGGCAGACAACTATTCTGGCGCAG
+GCAAACAGTACGTTAGGGGCTGGCGGCTGGATAGGTAATGGCGTTTACGTTTCAGGCGTA
+CAGCGCGAATATGATGCGTTTATCACTAATCAGCTACGCGGCGCGCAAAACCAGAGCAGC
+GGCTTAACCACGCGCTATGAACAAATGTCGAAAATCGACAACCTGCTGGCCGATAAATCC
+AGCTCACTGTCTGGCTCGCTGCAGAGTTTTTTTACCAGCCTGCAAACGTTAGTCAGTAAT
+GCGGAAGATCCTGCGGCGCGTCAGGCGCTGATTGGTAAAGCGGAAGGGCTGGTAAACCAG
+TTCAAAACCACCGATCAGTATCTGCGCGATCAGGATAAACAGGTCAATATCGCGATTGGC
+TCCAGCGTGGCGCAAATCAACAATTACGCGAAGCAGATAGCTAACCTGAACGATCAAATC
+TCCCGTATGACGGGCGTAGGCGCGGGCGCATCGCCGAACGACCTGCTCGATCAACGTGAT
+CAGTTGGTTAGCGAGCTTAACAAGATCGTTGGCGTCGAGGTGAGTGTACAGGACGGCGGC
+ACCTATAACCTGACGATGGCCAATGGCTATACGCTGGTGCAGGGGTCGACGGCGCGTCAG
+TTGGCGGCGGTTCCCTCCAGCGCCGACCCGACGCGAACGACTGTCGCTTATGTCGATGAG
+GCCGCCGGTAACATCGAAATTCCGGAAAAGTTGCTGAACACCGGTTCGCTCGGCGGGCTA
+CTGACGTTCCGTTCTCAGGATCTGGATCAGACTCGTAATACGCTGGGCCAGTTGGCGTTG
+GCGTTTGCCGATGCGTTTAACGCGCAGCATACCAAAGGTTATGACGCCGACGGCAATAAA
+GGGAAAGACTTCTTTAGCATTGGCTCGCCGGTGGTATATAGCAACAGTAATAATGCCGAT
+AAAACGGTATCGCTAACCGCTAAGGTGGTCGACAGCACGAAGGTTCAGGCGACGGATTAT
+AAGATTGTTTTTGACGGTACAGACTGGCAGGTTACTCGCACTGCGGATAACACCACCTTC
+ACGGCAACAAAAGATGCTGACGGAAAACTGGAGATTGACGGTCTGAAAGTGACGGTAGGG
+ACTGGCGCACAGAAAAACGACAGTTTTCTTCTCAAGCCGGTCAGCAATGCTATCGTCGAC
+ATGAACGTTAAAGTGACAAATGAAGCCGAGATTGCGATGGCGTCTGAGTCAAAACTCGAT
+CCTGATGTGGATACCGGCGACAGCGATAACCGCAATGGTCAGGCATTGCTGGACTTACAA
+AACAGCAATGTAGTGGGCGGCAACAAAACCTTTAACGATGCTTACGCCACGTTGGTCAGC
+GATGTGGGTAACAAAACGTCAACGCTGAAAACCAGCAGCACCACGCAGGCGAATGTGGTT
+AAACAGCTTTATAAACAGCAACAGTCGGTTTCCGGCGTTAACCTCGACGAAGAGTACGGC
+AATTTGCAGCGTTATCAGCAGTATTATCTGGCGAATGCGCAAGTATTGCAGACCGCGAAT
+GCGCTGTTTGATGCGTTATTGAATATTCGCATGATCGGAGACGGTAAATTGCTGGCCAGC
+GCGGCCTGGGATGCGCAATCTCTGAACGAACTGAAAGCGAAAGCGGGCCAGGACCCGGCG
+GCGAATATCCGTCCTGTGGCCCGTCAGGTGGAAGGGATGTTTGTGCAGATGATGCTGAAA
+AGTATGCGCGAGGCTTTACCCAAAGATGGTTTATTCAGCAGCGATCAGACGCGTCTGTAT
+ACCAGCATGTATGACCAGCAGATCGCCCAGCAGATGACCGCCGGTAAGGGATTGGGGCTG
+GCGGATATGATGGTTAAACAGATGACGGGCGGGCAGACGATGCCTGCAGATGATGCGCCG
+CAAGTACCGCTTAAATTCTCCCTGGAGACGGTAAACAGCTATCAAAATCAGGCGCTGACC
+CAACTGGTGCGCAAAGCCATACCGAAAACGCCGGACAGCAGCGATGCGCCGCTCTCCGGC
+GACAGTAAAGACTTTCTGGCCCGGCTTTCGCTCCCGGCGAGGCTGGCCAGCGAACAAAGC
+GGGGTGCCGCATCATCTGATTCTGGCGCAGGCGGCGCTGGAGTCCGGCTGGGGGCAGCGG
+CAAATCCTGCGGGAGAATGGCGAACCCAGCTATAACGTATTTGGCGTGAAAGCGACCGCC
+AGTTGGAAAGGGCCGGTGACGGAAATCACCACCACTGAATACGAAAATGGCGAAGCGAAA
+AAAGTGAAAGCGAAATTCCGCGTCTATAGCTCGTATCTGGAGGCGTTATCGGATTATGTC
+GCGCTGTTAACGCGTAACCCACGCTACGCTGCCGTGACCACTGCCGCCACGGCAGAGCAG
+GGCGCAGTGGCTCTGCAAAACGCCGGATACGCCACTGACCCGAATTACGCGCGTAAATTG
+GCCAGCATGATTCAGCAGTTGAAAGCGATGAGTGAAAAGGTCAGCAAAACCTACAGCGCG
+AATCTCGACAATCTCTTTGTGTTTAAAGCTCTTGCAGGAATCGTTCTGGCACTGGTTGCC
+ACTCTGGCGCACGCCGAGCGTATCCGGGATCTGACCAGTGTCCAGGGAGTACGGGAAAAC
+TCGCTGATCGGCTACGGGCTGGTGGTCGGGCTGGACGGTACGGGCGACCAGACGACCCAG
+ACGCCATTTACCACCCAGACGCTGAATAACATGCTGTCACAACTGGGGATTACGGTCCCC
+ACCGGCACCAATATGCAGTTGAAAAACGTGGCGGCGGTGATGGTGACGGCGTCGTATCCG
+CCTTTTGCGCGACAGGGACAAACGATCGATGTCGTCGTTTCCTCAATGGGGAACGCTAAA
+AGTCTGCGTGGCGGGACGTTATTAATGACGCCGTTAAAAGGGGTGGACAGCCAGGTGTAT
+GCTCTGGCGCAGGGCAATATTCTGGTCGGCGGCGCGGGCGCTTCCGCAGGCGGCAGTAGC
+GTGCAGGTTAACCAGCTTAATGGCGGGCGCATCACTAATGGCGCGATTATCGAACGCGAG
+TTGCCGACTCAGTTCGGCGCTGGCAACACCATTAATCTGCAATTGAACGACGAAGATTTT
+ACGATGGCGCAGCAAATTACCGACGCCATCAACCGCGCCCGCGGTTACGGCAGCGCCACT
+GCGCTTGATGCGCGAACGGTACAGGTACGCGTGCCCAGCGGCAACAGCTCGCAGGTGCGT
+TTTCTGGCGGACATTCAAAATATGGAAGTCAACGTGACGCCGCAGGATGCAAAAGTCGTG
+ATCAACTCGCGTACCGGTTCGGTGGTCATGAATCGGGAAGTCACGCTGGATAGCTGCGCT
+GTGGCGCAGGGCAATTTGTCAGTGACAGTCAATCGCCAACTCAACGTCAACCAGCCGAAT
+ACGCCATTTGGCGGCGGGCAGACCGTGGTGACGCCACAGACTCAGATAGATTTGCGTCAG
+AGCGGCGGATCGCTACAGAGCGTGCGTTCCAGCGCCAATCTGAACAGCGTAGTGCGCGCG
+CTGAATGCGCTTGGCGCGACGCCGATGGATCTGATGTCGATTTTGCAGTCCATGCAGAGC
+GCGGGCTGTCTACGCGCCAAACTGGAAATCATCATGGCCCTGATGGTCGCGACGCTGACA
+GGATGCGCCTGGATACCCGCTAAACCGCTCGTGCAGGGGGCGACCACGGCGCAGCCGATA
+CCTGGCCCGGTACCGGTGGCGAATGGCTCCATATTTCAGTCTGCGCAGCCGATTAATTAT
+GGCTATCAGCCGCTTTTTGAAGATCGTCGACCGCGTAATATCGGCGATACGCTCACGATT
+GTGTTACAGGAAAACGTCAGCGCCAGTAAAAGCTCGTCGGCAAATGCCAGCCGCGACGGC
+AAAACCAGCTTTGGTTTTGATACGGTACCGCGTTATCTGCAGGGATTATTCGGTAATTCC
+CGCGCGGATATGGAGGCCTCCGGCGGCAACTCTTTTAATGGTAAAGGCGGCGCGAATGCC
+AGCAATACCTTTAGCGGCACGCTGACCGTGACCGTCGATCAGGTTCTGGCCAATGGCAAT
+TTACACGTCGTGGGGGAAAAACAGATCGCGATTAATCAGGGAACGGAATTCATCCGCTTC
+TCCGGCGTGGTAAATCCACGCACCATCAGCGGTAGCAACTCTGTTCCCTCGACACAGGTG
+GCGGATGCGCGGATTGAATATGTCGGGAACGGCTATATTAACGAAGCGCAAAATATGGGC
+TGGCTGCAACGTTTCTTCCTTAATTTGTCGCCGATGATGATCAGTTCATTATGGATCGCC
+AAAACCGGTCTGGACGCGCAGCAAACCAATATGGATGTGATTGCCAATAACCTGGCAAAC
+GTCAGCACCAATGGTTTTAAGCGTCAGCGCGCGGTATTTGAAGATCTGTTGTATCAGACC
+ATCCGCCAGCCGGGCGCGCAGTCGTCCGAGCAGACGACGCTGCCTTCCGGGCTGCAAATC
+GGTACCGGCGTGCGTCCGGTCGCCACGGAGCGCCTGCACAGTCAGGGGAACCTGTCGCAG
+ACCAACAACAGTAAAGATGTGGCGATTAAAGGGCAGGGCTTTTTCCAGGTCATGCTGCCG
+GACGGTACGTCTGCCTATACCCGCGACGGCTCTTTCCAGGTGGATCAGAATGGTCAACTG
+GTGACGGCGGGCGGTTTTCAGGTGCAGCCGGCAATCACCATTCCGGCCAACGCGTTAAGC
+ATCACGATTGGCCGCGACGGCGTGGTCAGCGTTACCCAGCAAGGGCAGGCCGCGCCGGTT
+CAGGTCGGGCAGCTTAACCTGACCACCTTTATGAACGACACCGGTCTGGAAAGCATCGGC
+GAGAACCTCTATATCGAAACGCAATCGTCCGGCGCGCCGAACGAAAGCACGCCGGGGCTC
+AACGGCGCGGGGTTGTTGTATCAAGGGTATGTCGAAACGTCGAACGTTAACGTGGCGGAA
+GAGCTGGTGAACATGATTCAGGTTCAACGCGCCTATGAAATTAACAGTAAAGCAGTATCG
+ACGACCGATCAGATGCTGCAGAAACTGACGCAACTCATGGATCACGCAATTTATACCGCC
+ATGGGGGCGGCCAGCCAGACGCTTAACCAGCAGGCGGTAACGGCCAGCAACCTGGCTAAT
+GCCTCAACGCCGGGCTTTCGCGCGCAGCTTAACGCGCTACGCGCGGTGCCCGTTGATGGC
+CTCTCTTTAGCGACGCGCACGTTGGTTACGGCGTCGACGCCGGGGGCGGATATGACCCAG
+GGTCAGTTGGACTACACTTCCCGCCCGCTGGATGTTGCGTTACAGCAGGACGGCTGGCTG
+GTGGTGCAAGCGGCGGATGGCGCTGAAGGATATACCCGTAACGGGAATATCCAGGTGGGC
+CCGACCGGGCAGTTAACCATTCAGGGACATCCGGTTATCGGCGAAGGCGGCCCGATTACC
+GTTCCGGAAGGGTCGGAAATCACCATTGCGGCAGACGGCACGATCTCCGCGCTCAATCCC
+GGCGACCCGCCAAACACGGTGGCGCCCGTTGGGCGGCTGAAGCTGGTCAAAGCGGAAGGC
+AATGAGGTGCAGCGGAGCGATGACGGTTTATTCCGCCTTACCGCCGAGGCACAGGCTGAA
+CGCGGGGCGGTACTGGCCGCCGACCCGTCAATTCGCATTATGTCGGGCGTGCTGGAGGGC
+AGTAACGTCAAGCCGGTTGAAGCCATGACCGACATGATCGCCAACGCACGTCGTTTTGAA
+ATGCAGATGAAGGTTATCACCAGCGTAGATGAGAACGAAGGGCGAGCTAACCAACTGCTG
+TCGATGAGTATGTCTTTTTCTCAAGCGGTTAGCGGCCTGAACGCTGCGGCCACCAACCTT
+GATGTTATCGGTAATAACATCGCCAACTCCGCCACCTATGGCTTTAAGTCCGGTACGGCA
+TCATTTGCCGATATGTTCGCCGGTTCCAAAGTGGGGTTGGGCGTAAAAGTGGCGGGGATT
+ACCCAGGATTTTACCGACGGTACGACAACGAACACCGGGCGCGGGCTGGATGTCGCGATT
+AGCCAGAACGGTTTTTTCCGCCTGGTAGACAGCAACGGTTCCGTGTTCTATAGCCGCAAC
+GGCCAGTTCAAACTGGACGAGAACCGTAACCTGGTCAATATGCAGGGGATGCAGTTGACC
+GGCTATCCGGCCACCGGTACGCCGCCGACCATTCAGCAGGGGGCGAATCCTGCGCCGATC
+ACCATTCCGAACACGCTGATGGCGGCGAAATCGACCACCACCGCGTCAATGCAGATCAAC
+CTGAACTCAACGGACCCTGTACCGTCTAAAACGCCCTTTAGCGTGAGTGATGCGGATTCG
+TATAACAAAAAAGGCACCGTCACCGTTTATGACAGCCAGGGTAATGCCCATGACATGAAC
+GTCTATTTTGTGAAAACCAAAGATAATGAATGGGCTGTGTACACCCATGACAGCAGCGAT
+CCTGCAGCCACTGCGCCAACAACGGCGTCCACTACGCTGAAATTCAATGAAAACGGGATT
+CTGGAGTCTGGCGGTACGGTGAACATCACCACCGGTACGATTAATGGCGCGACAGCGGCC
+ACCTTCTCCCTCAGCTTCCTTAACTCCATGCAGCAGAACACCGGGGCTAATAACATCGTC
+GCCACCAATCAAAACGGCTATAAGCCTGGCGACCTGGTGAGCTACCAGATTAACAATGAT
+GGCACCGTGGTTGGCAACTACTCCAACGAGCAGGAGCAGGTGCTGGGGCAGATTGTGCTG
+GCTAACTTCGCCAACAACGAAGGTCTGGCATCCCAGGGCGATAACGTCTGGGCGGCGACG
+CAGGCCTCCGGGGTAGCGCTGCTGGGGACTGCCGGTTCCGGCAACTTCGGTAAGCTGACG
+AACGGCGCGCTGGAAGCCTCTAACGTGGATTTGAGTAAAGAGCTGGTGAATATGATCGTC
+GCGCAGCGTAACTACCAGTCGAATGCGCAGACCATCAAAACCCAGGACCAGATCCTCAAT
+ACGCTGGTTAACCTGCGCATGTCTATTGCCGTAAATATGAATGACCCGACCAACACGGGC
+GTCAAAACGACGACCGGCAGCGGGTCGATGACCGGAAGCAACGCTGCCGATCTGCAAAGC
+AGTTTCCTGACCTTACTGGTCGCGCAATTGAAGAACCAGGACCCGACTAACCCATTACAA
+AATAATGAGTTAACGACACAGTTGGCGCAAATCAGTACCGTGAGCGGCATTGAAAAACTG
+AATACGACGCTGGGGGCTATTTCCGGGCAAATCGATAATAGTCAGTCCCTACAGGCGACC
+ACGCTGATTGGACATGGCGTTATGGTGCCTGGCACCACAATTCTGGCGGGTAAAGGCGCG
+GAAGAAGGGGCCGTGACGTCCACGACGCCGTTTGGCGTGGAATTGCAACAGCCTGCGGAC
+AAAGTGACGGCAACCATTACCGATAAAGATGGCCGGGTGGTACGGACGCTGGAGATCGGT
+GAGTTGCGAGCCGGGGTACACACCTTTACCTGGGATGGTAAGCAAACGGACGGAACAACG
+GTACCGAATGGTTCTTACAACATTGCGATTACCGCCAGCAATGGCGGGACGCAACTGGTG
+GCGCAGCCGCTGCAATTCGCTCTGGTACAGGGCGTGACGAAGGGCAGTAACGGCAACCTG
+TTGGATCTGGGTACCTACGGCACCACCACACTCGACGAAGTTCGGCAAATAATCATGCAA
+ATACAGAGCTTCTATCACTCAGCTTCACTAAAAACCCAGGAGGCTTTTAAAAGCCTACAA
+AAAACCTTATACAACGGAATGCAGATTCTCTCAGGCCAGGGCAAAGCGCCGGCTAAAGCG
+CCCGACGCTCGCCCGGAAATTATTGTCCTGCGAGAACCTGGCGCGACATGGGGGAATTAT
+CTACAGCATCAGAAGACGTCTAACCACTCGCTGCATAACCTCTATAACTTACAGCGCGAT
+CTTCTTACCGTCGCGGCAACCGTTCTGGGTAAACAAGACCCGGTTCTAACGTCAATGGCA
+AACCAAATGGAGTTAGCCAAAGTTAAAGCGGACCGGCCAGCAACAAAACAAGAAGAAGCT
+GCGGCAAAAGCATTGAAGAAAAATCTTATCGAACTTATTGCAGCACGCACTCAGCAGCAA
+AATGGCTTACCTGCAAAAGAAGCTCATCGCTTTGCGGCAGTAGCGTTTAGAGATGCTCAG
+GTCAAGCAGCTCAATAACCAGCCCTGGCAAACCATAAAAAATACACTCACGCATAACGGG
+CATCACTATACCAACACGCAGCTCCCTGCCGCAGAGATGAAAATCGGCGCAAAAGATATC
+TTTCCCAGTGCTTATGAGGGAAAGGGCGTATGCAGTTGGGATACCAAGAATATTCATCAC
+GCCAATAATTTGTGGATGTCCACGGTGAGTGTGCATGAGGACGGTAAAGATAAAACGCTT
+TTTTGCGGGATACGTCATGGTGTGCTTTCCCCCTATCATGAAAAAGATCCGCTTCTGCGT
+CAGGCCGGCGCTGAAAACAAAGCCAAAGAAGTATTAGCTGCGGCACTTTTTAGTAAACCT
+GAGTTGCTTAACAGAGCCTTAGAGGGCGAAGCGGTAAGCCTGAAACTGGTATCCGTCGGG
+TTACTCACCGCGTCGAATATTTTCGGCAAAGAGGGAACTATGGTCGAGGATCAAATGCGC
+GCATGGCAATCGTTGACCCAGCCGGGAAAAATGATTCATTTAAAAATCCGCAATAAAGAT
+GGCGATCTACAGACGGTAAAAATAAAACCGGACGTCGCCGCATTTAATGTGGGTGTTAAT
+GAGCTGGCGCTCAAGCTCGGCTTTGGCCTTAAAGCATCAGATAGCTATAATGCCGAAGCG
+CTACATCAGTTATTAGGCAATGATTTACGCCCTGAAGCCAGACCAGGTGGCTGGGTTGGC
+GAATGGCTGGCGCAATACCCGGATAATTATGAGGTCGTCAATACATTAGCGCGCCAGATT
+AAGGATATCTGGAAAAATAACCAACATCATAAAGATGGCGGCGAACCCTATAAACTCGCA
+CAACGCCTTGCCATGTTAGCCCATGAAATTGACGCGGTGCCCGCCTGGAATTGTAAAAGC
+GGCAAAGATCGTACAGGGATGATGGATTCAGAAATCAAGCGAGAGCTCATTTCTTTCCAT
+CAGACCCATATGTTAAGTGCGCCTGGTAGTCTTCCGGATAGCGGTGGACAGAAAATTTTC
+CAAAAAGTATTACTGAATAGCGGTAACCTGGAGATTCAGAAACAAAATACGGGCGGGGCG
+GGAAACAAAGTAATGAAAAATTTATCGCCAGAGGTGCTCAATCTTTCCTATCAAAAACGA
+GTTGGGGATGAAAATATTTGGCAGTCAGTAAAAGGTATTTCTTCATTAATCACATCTATG
+AAAAAGTATCTTGCTTTCGCCGTTACGCTGCTGGGTATGGGTAAAGTCATCGCCTGTACT
+ACCCTTTTGGTAGGCAATCAGGCTTCGGCTGACGGCTCCTTTATTATCGCGCGCAACGAG
+GATGGCTCGGCAAATAACGCCAAGCATAAGGTTATTCATCCCGTCGCGTTTCATCAACAA
+GGCGAGTATAAAGCACATCGCAACAATTTTAGCTGGCCGCTTCCGGAGACAGCGATGCGC
+TATACGGCGATTCATGACTTTGATACTAACGATAACGCCATGGGTGAAGCCGGTTTCAAT
+TCGGCGGGCGTCGGAATGAGCGCAACGGAAACCATTTACAACGGCAGAGCGGCGCTGGCT
+GCCGATCCTTACGTGACAAAAACGGGAATCACGGAAGACGCCATTGAGTCCGTGATCCTG
+CCAGTGGCGCAATCGGCGCGTCAGGGCGCCAAATTACTGGGAGATATTATTGAACAAAAA
+GGCGCGGGCGAAGGTTTCGGCGTCGCGTTTATTGATAGCAAAGAGATATGGTATCTGGAG
+ACGGGAAGCGGACATCAATGGCTGGCAGTACGACTTCCGGCAGATAGCTATTTCGTTTCC
+GCCAATCAGGGACGTTTACGCCATTACGATCCGAATGATAACGCGAATTATATGGCGTCA
+CCAACGTTAGTAAGCTTTGCGAAAAAGCAGGGATTATATGATCCGGCCCGCGGCGAATTC
+GACTTTCATCAAGCCTATTCGCAGGATAACAAAAACGATACCACCTATAATTATCCGCGC
+GTCTGGACGCTACAACACCAGTTTAATCCGCATCTGGATACGGTCGTTAGCGAAGGGGAA
+ACATTTTCTGTTTTTTTAACGCCAATAACGAAGATCAGCGTGGCGGCAGTAAAAAACGCG
+TTACGCAATCACTATCAGGGAACGTCGCACGACCCTTATGCCAGTCATAATCCACAAGAA
+CCATGGCGACCTATATCCGTTTTTCGTACCCAGGAGTCACATATTTTACAGGTCAGACCG
+AAATTACCGCAGGCTATCGGCAACGTAGAATACATCGCCTATGGAATGCCATCTCTTAGC
+GTCTATCTCCCCTATTACCAGGGGATGCGTCATTATCAACCCGGAGATGATAAAGGAACC
+GATCGGGCGAGCAACGACTCTACCTACTGGACATTCCGCACGCTGCAAACACTGGTTATG
+CAAGACTACAATACGTTTGCGCCAGATGTGCAACATGCCTGGAAAACATTTGAACAGCAA
+ACAGCTAAGCAACAGTATAAGATGGAGCAGAGCTATCTGAGATTATATGCGTCGCATCCG
+AAAGAAGCACAACGCTTACTGCAAAATTTTGAAGATAAAACGATGCAAAATGCGCAGACG
+CTCGCCCGTCGCCTGACCAATAATATTATTACGACAATGACTTACCGCACAGATATGAAA
+TATCACTTTTCAAGTACGCAGCCAATGGTTAAGTTATCAATGACGCTGCGCCTGACAATT
+TCTTTTATCGCCATACTTATCCTCGCCTGTACCGGCATTAGCTGGACGCTCTATAACGCG
+CTGAGCAAAGAATTAACGTATCGGGATGATATGACGCTAATAAATCGGGCGGCGCAAATG
+CAGCAACTGTTACTGGATGGCGCCAGGCCGGAAAATCTGCCGCTCTATTTCAATCGGATG
+GTGGATACGAAGCAGGATATCTTATTGATCCACTCAGCAACAGGCCATAATGTTGCGATT
+AATCATAGCGGCATCCCCGACCAACGCTTTAACGAGATTCCGCTGGCTAAAAACATCACC
+CGCGAAACCTTATTTCGCCAGGCGGTACAAGGCACGGAGCTGACCGCGGTACGAGTAAAC
+GCCAGAAGCGGCGATAACCCGCTGACCCTTACTATTGCCAGGCTGGCGACGGAAAGGCGG
+CAAATGCTGGCGCAATATCGCCGCAACAGTTTGCTGATTAGCCTTATCGCGATCCTCGTC
+TGTTCGGCGCTCAGTCCATTAGTCATCAGAAACGGGCTGCGGGCCATTACGTCGCTCAGC
+CGACTCACCGCGGCGACAGATAGCGGCACACTTCGCCAGCCGCTGGCGGAACAGGCGTTA
+CCCGTCGAGCTCAGGCCGCTTGGGCAAGCGCTAAATACCATGCGCCAGAAGCTTTCCGAC
+GATTTTGAACGCCTGAACCAATTTGCCGACGATCTGGCGCATGAGCTGCGCACGCCGGTT
+AATATTTTACTGGGGAAGAATCAGGTTATGCTGAGTCAGGAACGCAGCGCCGAAGAGTAT
+CAACAAGCCCTTGTCGATAATATTGAAGAGCTGGAGGGACTGTCGCGACTGACAGAAAAT
+ATTCTCTTTCTGGCACGCGCGGAGCACCAGAATATAGCGGTAAAAAAACAGCCTGTTTCG
+CTCAATGCGCTGGTCGAAAATATGCTGGATTATCTTAGCCCCCTTGCCGAAGAGAAGCAC
+ATCTGTTTTATAAATCAATGTCAGGGAACGGTATGGGCTGACGAAATATTATTACAAAGA
+GTGCTCTCAAACCTGCTGACGAATGCCATCCGTTATTCTGATGAAAACGCCGTGATACGT
+ATTGAAAGCGCTTATGATGATAACGTTGCAGAAATTCGGGTCGCTAATCCGGGCAGCCCC
+ACCGCCGATGCGGATAAGCTTTTCCGGCGTTTTTGGCGAGGAGATAATGCCCGCTACACT
+GCCGGTTTCGGCCTGGGGTTATCGTTAGTTAACGCGATTGCCCTATTGCACGGTGGCTCG
+GCATCTTACCGCTATGCCGATGAACATAATATCTTTTCGGTTCGTCTGCCTGATAGCGGT
+GATAGCATGTCATCTTGTTGGAGATTTACGGATTCGCTAACAAGCCTATGGCATACTGCG
+TTGATGAAGATTTTATTGATTGAAGATAACCAGAAAACCATTGAGTGGGTACGTCAGGGA
+CTCACGGAGGCAGGCTATGTGGTTGATTATGCCTGTGATGGACGAGACGGATTACACCTA
+GCCCTTCAGGAACATTATTCATTGATTATTCTTGATATTATGCTGCCGGGGCTTGATGGA
+TGGCAGGTTTTACGCGCGTTGCGCACTGCATATCAGCCCCCTGTTATTTGCCTGACGGCG
+CGCGACTCGGTTGAGGATCGCGTCAAAGGTCTTGAGGCGGGCGCTAATGATTACCTTGTT
+AAGCCTTTTTCCTTCGCCGAACTGCTGGCCCGGGTGAGAGCTCAACTCAGACAGCATGTC
+CCGGTCTTTACCCGACTGACGATCAATGGTCTGGACATGGATGCCACAAAGCAATCGGTG
+TTACGAAATGGCAAACCGATTTCCCTGACCCGCAAAGAATTCCTGCTCCTCTGGTTACTG
+GCGTCCCGGGCAGGGGAAATCGTGCCCCGAACCGCGATCGCCAGCGAAGTTTGGGGAATT
+AACTTTGATAGTGAAACCAACACCGTTGATGTCGCGATTCGTCGGCTGCGCGCCAAAGTA
+GACGATCCATTTGAAAAGAAGCTCATTATGACCGTCCAGGGGATGGGTTATCGATTACAG
+GCGGAAACGTCGCAGAATGGTATGAAACGATATATACTGGCTACCGCGATAGCGTCTCTT
+GTTGCAGCCCCGGCAATGGCGCTGGCCGCTGGCAGCAATATTCTCAGCGTACATATTCTC
+GATCAGCAAACAGGCAAACCAGCGCCCGGCGTGGAGGTGGTACTGGAGCAGAAAAAGGAT
+AACGGATGGACGCAATTAAACACCGGGCATACCGACCAGGATGGACGAATTAAAGCACTG
+TGGCCCGAAAAAGCTGCCGCGCCGGGGGATTATCGCGTTATTTTTAAAACCGGCCAGTAT
+TTTGAAAGTAAAAAACTGGACACGTTTTTCCCGGAGATTCCCGTCGAGTTTCATATCAGC
+AAAACGAATGAGCACTATCATGTGCCGCTGTTATTAAGTCAGTATGGTTATTCAACCTAT
+CGCGGGAGCATGCAAGTAGATGAACAACGTCTGCGTTTTCGCGATGCGATGGCAAGTCTG
+GCGGCAGCGGTCAACATCGTAACCACGGCGGGTCACGCCGGACGCTGCGGTATCACCGCA
+ACAGCGGTTTGCTCAGTCACTGATACGCCGCCCTCCGTGATGGTATGTATTAATGCCAAT
+AGCGCCATGAACCCCGTTTTTCAGGGCAACGGCAGGCTGTGCATTAATGTACTTAACCAT
+GAGCAGGAGCTGATGGCGCGCCACTTTGCCGGTATGACGGGGATGGCGATGGAGGAGCGT
+TTTCACCAGCCATGTTGGCAAAACGGGCCGCTGGGCCAGCCGGTACTTAACGGCGCGCTG
+GCCAGTCTTGAAGGCGAGATCAGCGAGGTACAAACCATTGGCACGCATCTGGTGTATCTG
+GTGGCGATCAAAAATATTATTCTTAGCCAGGAGGGGCATGGCCTGATTTATTTCAAACGC
+CGTTTTCATCCGGTCAGACTTGAGATGGAAGCGCCTGTTATGGGACGCACACCGGATTAC
+AAAGCCGCCTTTGGCTGCGCTCTGGGCGCTAACCCAGCCTTCTACGGCCAGTTTGAGCAG
+AACGCCCGTAACTGGTACACCCGTATTCAGGAGACCGGCCTGTACTTTAACCATGCAATC
+GTCAACCCGCCCATTGACCGCCACAAACCTGCCGACGAAGTGAAAGACGTCTATATCAAG
+CTGGAGAAAGAGACGGACGCCGGGATTATTGTCAGCGGGGCGAAAGTTGTCGCCACTAAC
+TCCGCCCTGACTCACTACAACATGATTGGTTTCGGCTCAGCCCAGGTGATGGGCGAAAAC
+CCGGATTTTGCTCTGATGTTTGTCGCGCCAATGGATGCCGAAGGCGTAAAACTTATTTCG
+CGCGCCTCGTATGAAATGGTCGCGGGCGCGACGGGCTCGCCGTTTGATTATCCCCTCTCC
+AGCCGTTTTGATGAAAACGATGCCATTCTGGTGATGGACAAGGTGCTGATCCCGTGGGAA
+AACGTATTAATTTACCGTGATTTCGATCGTTGTCGTCGCTGGACGATGGAAGGCGGCTTT
+GCCCGTATGTATCCACTGCAAGCCTGTGTTCGTCTGGCGGTAAAACTTGATTTCATTACC
+GCGCTGCTGAAAAAATCGCTCGAATGTACGGGTACCGTAGAGTTCCGGGGCGTGCAGGCC
+GATCTCGGCGAAGTCGTGGCCTGGCGCAATATGTTCTGGGCATTGAGCGATTCTATGTGT
+TCTGAAGCAACCCCGTGGGTAAACGGCGCCTGGCTACCGGACCACGCCGCGCTGCAAACC
+TATCGTGTGATGGCCCCAATGGCCTACGCGAAAATTAAAAATATTATTGAACGTAACGTT
+ACCAGCGGCCTGATTTACCTGCCTTCCAGCGCCCGCGATCTGAATAATCCGCAAATCGAC
+CAGTACCTGGCGAAATACGTACGCGGCTCTAACGGAATGGACCATGTTGAACGTATCAAA
+ATTCTTAAATTGATGTGGGATGCCATCGGCAGCGAGTTTGGCGGTCGCCATGAGCTGTAC
+GAGATTAACTACTCGGGCAGCCAGGATGAAATTCGTCTGCAGTGTCTGCGTCAGGCCCAG
+AGCTCCGGCAATATGGATAAGATGATGGCAATGGTCGATCGCTGCCTCTCCGAATACGAT
+CAGAATGGCTGGACGGTTTCGCATTTGCACAATAACGACGACATCAATCAACTGGATAAG
+CTGCTGAAAATGCATGATTCATTAACCATCGCCTTGCTTCAGGCGCGCGAAGCGGCAATG
+ACCTATTTCCGCCCCATCGTTAAAAGCCACAATCTGACCGACCAGCAATGGCGCATTGTG
+CGAATCCTGGCCGATAGCCCCTCTATGGATTTTCACGAGCTGGCCTTTCGTACCTGTATT
+TTGCGTCCAAGTCTGACCGGAATATTGACGCGCATGGAGCGAGACGGACTGGTGTTGCGA
+CTCAAGCCGGTTAACGATCAGCGTAAGTTATATGTCATGTTGACGGAGCAGGGACAAACG
+TTGTACGCCCGTGCCCGGAGCGAGGTAGAAGAGGCTTATCGAAAAATTGAGGCCGATTTC
+ACGCCCGAAAAAACACAGCAATTGATGCTGCTGCTGGACGATCTTATTGCTCTGGGGCGC
+CAGCATCCTGATAGCGAAGCGGAAGCAATGAAGGGTACTGTTTTCGCCGTTGCGTTAAAC
+CATCGCAGCCAGCTTGATGCCTGGCAAGAGGCTTTCTCTCAGCCTCCCTATAATGCGCCG
+CCTAAAACCGCAGTGTGGTTCATCAAGCCGCGTAATACGGTGATTCGTCACGGCGAACCC
+ATTCCTTATCCGCAGGGAGAAAAGGTACTGAGCGGCGCGACAGTGGCGCTCATTGTGGGG
+AAAACCGCCAGCCGGATACGCCCTGAAGCGGCGGCGGACTATATCGCCGGGTATGCGCTG
+GCTAACGAGGTCAGCCTGCCGGAAGAGAGCTTTTATCGCCCGGCGATTAAAGCGAAATGT
+CGCGATGGCTTTTGCCCGCTGGGTGAAATGGCGCCGCTGAGTGATGTGGATAATCTCACC
+ATTATCACTGAAATCAACGGACGAGAAGCGGACCACTGGAATACTGCCGATTTACAGCGT
+AGCGCCGCACAACTGCTTAGCGCGTTAAGTGAGTTCGCTACACTTAACCCTGGCGATGCG
+ATCTTACTTGGTACGCCGCAGAATCGCGTTGCGCTGCGTCCCGGCGATCGGGTGCGTATT
+CTGGCGAAAGGTTTACCCGCGCTGGAAAATCCGGTTGTCGCAGAAGATGAATTCGCCCGC
+CACCAGACGTTTACGTGGCCGCTGTCAGCGACGGGAACGTTATTTGCGCTGGGGTTGAAC
+TACGCCGATCACGCCAGCGAGCTGGCATTTACGCCGCCGAAAGAGCCGCTGGTATTTATC
+AAAGCGCCAAACACCTTTACCGAACATCACCAAACGTCGGTGCGCCCGAACAACGTCGAA
+TATATGCACTACGAAGCCGAGCTGGTCGTGGTGATTGGCAAAACGGCGCGTAAGGTGAGC
+GAAGCCGAAGCCATGGAGTATGTGGCCGGTTACACCGTCTGTAACGACTACGCGATCCGC
+GACTATCTGGAAAACTACTACCGTCCGAATCTGCGGGTAAAAAGCCGCGACGGCCTGACG
+CCGATAGGCCCGTGGATTGTGGATAAAGAGGCGGTTTCTGATCCGCACAACCTGACGTTA
+CGCACCTTTGTCAACGGTGAGCTGCGGCAGGAAGGGACGACCGCCGATCTGATCTTCAGC
+ATCCCGTTCCTGATTTCTTATCTGAGCGAATTTATGACGTTGCAACCGGGCGACATGATT
+GCCACCGGTACGCCGAAAGGGCTGTCCGATGTGGTGCCGGGGGATGAAGTTGTCGTTGAA
+GTAGAAGGCGTGGGTCGCCTGGTTAACCGAATCGTCAGTGAGGAGAGCGCAAAAATGAAG
+AAAATAAATCATTGGATTAACGGCAAAAACGTTGCAGGTAACGACTACTTCCAGACCACT
+AACCCGGCGACCGGTGATGTGCTGGCGGAAGTAGCCTCCGGCGGTGAAGCAGAAGTGAAC
+CAGGCTGTCGCGGCGGCAAAAGAGGCGTTCCCGAAATGGGCCAACCTGCCGATGAAAGAG
+CGCGCGCGCCTGATGCGCCGCCTTGGCGACCTGATTGACCAGCATGTGCCGGAAATCGCG
+GCGATGGAAACCGCCGACACCGGCCTGCCTATTCACCAGACTAAAACGTGCGTGCTGATC
+CCGCGCGCCTCGCATAACTTCGAATTCTTCGCCGAAGTGTGCCAGCAGATGAACGGCAAG
+ACCTATCCGGTTGACGATAAAATGCTCAATTATACGCTGGTGCAGCCCGTCGGCGTCTGC
+GCGCTGGTGTCGCCGTGGAACGTGCCGTTTATGACCGCGACTTGGAAAGTTGCGCCGTGC
+CTGGCGCTGGGTAACACCGCGGTGCTCAAAATGTCCGAGCTGTCGCCGCTGACTGCCGAC
+AGGCTGGGCGAGCTGGCACTGGAGGCAGGAATTCCGGCAGGCGTGCTGAACGTGGTGCAG
+GGCTACGGCGCGACGGCGGGCGATGCGCTGGTACGCCACCATGACGTGCGTGCGGTGTCG
+TTTACCGGCGGTACCGCCACCGGTCGCAATATCATGAAAAATGCCGGGCTGAAAAAATAC
+TCGATGGAGCTGGGCGGCAAATCGCCGGTGCTGATTTTTGAAGACGCCGACATTGAGCGC
+GCGCTGGACGCCGCGCTGTTCACCATCTTCTCGATCAACGGCGAACGCTGCACCGCTGGG
+TCGCGCATCTTTATCCAGCAGAGCATTTACCCTGAGTTCGTGAAGCGCTTTGCCGAACGC
+GCGAATCGCCTGCGTGTCGGCGATCCGACCGACCCGAACACCCAGGTCGGCGCGCTGATT
+AGCCAACAGCACTGGGAGAAAGTCTCCGGTTATATCCGCCTCGGCATTGAAGAGGGGGCA
+ACGCTGCTGGCGGGCGGTGCGGAAAAACCCACTGACCTGCCTGCGCATCTGAAAGGCGGT
+AACTTCCTGCGCCCAACCGTGCTGGCCGATGTCGACAACCGTATGCGCGTTGCGCAGGAA
+GAGATCTTTGGGCCGGTCGCCTGCCTGCTGCCATTCAAAGACGAAGCGGAAGGGTTACGT
+TTGGCGAACGATGTGGAATACGGTCTGGCCTCTTATATCTGGACCCAGGACGTGAGCAAA
+GTGTTGCGCCTGGCGCGTGGGATTGAAGCCGGCATGGTCTTCGTCAACACCCAGAACGTC
+CGCGACCTGCGCCAGCCGTTCGGCGGCGTGAAAGCCTCCGGTACCGGGCGCGAAGGCGGC
+GAATATAGCTTCGAAGTGTTTGCGGAAATGAAAAACGTCTGCATCTCAATGGGCGACCAT
+CCTATCCCAAAATGGGGAGTTATGGGCAAGTTAGCGTTAGCAGCAAAAATTACCCACGTG
+CCGTCGATGTATCTTTCTGAACTGCCAGGAAAAAATCACGGTTGTCGTCAGGCAGCCATT
+GATGGGCATATTGAAATTGGCAAGCGTTGCCGCGAAATGGGCGTTGACACCATTATCGTA
+TTCGACACCCACTGGCTGGTGAATAGCGCTTACCACATTAATTGTGCCGACCATTTCCAG
+GGCGTCTATACCAGCAACGAATTGCCGCACTTTATTCGCGACATGACCTATGACTATGAC
+GGTAATCCGGCGCTCGGCCATCTGATCGCCGACGAGGCGGTCAAACTGGGCGTGCGCGCC
+AAAGCGCACAACATCCCGAGCCTGAAGCTGGAGTATGGCACGCTGGTGCCGATGCGCTAC
+ATGAACAGCGACAAGCACTTCAAAGTGGTCTCCATCTCGGCGTTCTGCACTGTGCATGAT
+TTTGCCGACAGCCGCAAACTGGGCGAAGCCATTCTCAAGGCGATTGAGAAATATGACGGT
+ACCGTAGCGGTATTCGCCAGTGGTTCTCTGTCGCACCGTTTTATTGACGACCAACGGGCG
+GAAGAGGGGATGAACAGCTACACCCGCGAGTTCGATCATCAAATGGACGAGCGCGTGGTC
+AAGCTGTGGCGCGAAGGCAAATTCAAGGAGTTTTGCACCATGTTGCCGGAGTACGCCGAC
+TACTGCTACGGCGAAGGCAACATGCACGACACGGTCATGCTACTGGGAATGCTGGGGTGG
+GACAAATACGACGGCAAGGTGGAGTTCATCACCGACCTGTTCGCCAGCTCCGGTACCGGC
+CAGGTAAACGCTGTTTTCCCGCTGCCTGCGATGCCGCACTTTATTGCTGAATGTACTGAA
+AATATTCGCGAGCAGGCTGATTTACCAAGCCTGTTCAGCAAGGTAAACGAGGCGCTGGCC
+GCCACCGGGATTTTCCCCATCGGCGGTATCCGCAGTCGCGCCCACTGGCTGGATACCTGG
+CAGATGGCTGACGGTAAGCATGATTACGCGTTTGTGCATATGACGCTGAAAATCGGCGCC
+GGGCGCAGCCTGGAGAGCCGTCAGGAAGTCGGCGAAATGCTGTTTGGGCTGATTAAAGCC
+CACTTCGCCGACCTGATGGAGAACCGCTATCTGGCGCTGTCGTTTGAGATTGCCGAGTTA
+CATCCAACGCTCAATTACAAACAAAACAACGTACACGCGTTATTTAAAATGCTCGATAAA
+CAGACCCATACCCTGATCGCTCAGCGACTTAATCAGGCTGAAAAACAGCGTGAACAGATT
+CGCGCAGTGTCGCTGGATTATCCCAACATCACTATTGAAGATGCCTATGCCGTACAGCGT
+GAATGGGTCAATATCAAGATTGCCGAAGGGCGCACGCTCAAAGGCCACAAAATCGGCCTG
+ACCTCAAAAGCGATGCAGGCCAGCTCGCAAATCAGCGAACCGGATTACGGCGCGCTGCTT
+GACGATATGTTCTTCCATGACGGCGGAGATATCCCCACCGACCGTTTTATCGTCCCGCGT
+ATTGAAGTGGAGCTGGCGTTCGTGCTGGCGAAACCGCTGCGCGGCCCTCACTGCACGCTG
+TTCGACGTCTACAACGCCACGGATTATGTGATTCCGGCGCTGGAACTGATTGACGCCCGC
+AGCCACAACATCGACCCGGAAACCCAGCGCCCGCGCAAAGTGTTCGACACCATTTCCGAC
+AACGCCGCCAACGCCGGGGTGATCCTCGGTGGTCGCCCCATCAAACCAGACGAGCTGGAT
+CTGCGCTGGATCTCCGCGCTGCTCTATCGCAACGGCGTGATCGAAGAAACCGGCGTCGCC
+GCAGGCGTGCTGAATCATCCGGCCAACGGCGTGGCGTGGCTGGCGAACAAGCTTGCCCCC
+TACGATGTCCAGCTTGAAGCCGGGCAGATCATCCTCGGCGGCTCGTTCACCCGCCCGGTG
+CCGGCGCGCAAGGGCGACACCTTCCATGTCGATTACGGCAACATGGGCGCGATCAGTTGC
+CGGTTTGTGATGAAAAATGCTTTCAAAGACGCGTTAAAAGCGGGGCGCCCGCAAATCGGT
+TTGTGGCTGGGGCTTGCCAACAGTTACAGCGCTGAACTGTTAGCGGGCGCCGGCTTCGAC
+TGGCTACTGATTGACGGTGAACACGCGCCAAACAACGTGCAGACGGTGTTGACCCAGTTG
+CAGGCGATTGCGCCTTATCCCAGCCAGCCGGTGGTGCGTCCGTCATGGAACGATCCGGTA
+CAGATTAAGCAACTGCTCGACGTCGGCGCGCAAACGCTGCTGATACCGATGGTGCAGAAT
+GCCGATGAAGCGCGAAACGCCGTGGCGGCTACGCGTTATCCGCCTGCCGGTATTCGCGGC
+GTGGGCAGCGCGCTGGCGCGGGCATCGCGCTGGAATCGCATTCCGGACTATCTCCACCAG
+GCCAACGACGCCATGTGCGTACTGGTGCAGATTGAAACGCGTGAGGCGATGAGCAATCTG
+GCGTCAATTCTCGACGTGGATGGCATTGACGGCGTGTTTATTGGCCCGGCGGATCTCAGC
+GCCGATATGGGCTTTGCCGGCAATCCGCAGCACCCGGAAGTGCAGGCGGCGATTGAGAAC
+GCCATCGTGCAGATACGCGCGGCGGGGAAAGCGCCGGGGATTCTGATGGCCAATGAAGCA
+CTGGCGAAACGTTATCTGGAACTGGGGGCGCTATTTGTCGCCGTCGGCGTTGACACCACG
+CTGCTGGCGCGCGGAGCGGAGGCGCTGGCGGCGCGCTTTGGCGCAGAAAAAAAACTGTCC
+GGTGCGTCCGGCGTCTATATGAGCGACACATCATCTGCACTTCCGGAAAGCCCCGAGTCT
+GTCGGTTCGCACAACGCGCTCAGCACGGGTCAACAAACCGTCATAAATAAACTGTTCCGC
+CGACTGATCGTATTTTTATTCGTGTTGTTTATCTTCTCGTTTTTAGACCGTATCAACATC
+GGTTTTGCCGGGTTGACGATGGGGCAGGATCTGGGGTTAAGCGCCACCATGTTTGGTCTT
+GCCACGACGCTGTTTTACGCCACCTACGTCATTTTCGGCATTCCCAGCAACGTGATGTTG
+AGCATCGTCGGCGCCCGCCGCTGGATTGCGACCATTATGGTGCTATGGGGCATTGCATCT
+ACCGCCACGATGTTCGCGGTGGGACCGAAAAGCCTGTATGTGCTGCGAATGCTGGTGGGC
+ATTACCGAAGCGGGCTTTTTGCCAGGAATATTGCTCTATTTAACCTACTGGTTCCCGGCA
+TTTTTCCGCGCCCGCGCCAACGCATTATTTATGATTGCCATGCCGGCCACTACCGCGTTG
+GGGTCAATTGTCTCCGGCTATATTTTATCGCTGGACGGCATATTCAATCTGCATGGATGG
+CAGTGGTTATTCCTGTTGGAAGGATTTCCGTCAGTTTTGTTAGGCATTATGGTCTGGTTT
+TACCTGGATGATACCCCGGCAAAAGCCAAATGGCTGACGGCAGAGGATAAAAAATGTTTG
+CAGGAGATGATGGATAATGATCGCCTGACGCTGGTTCAGCCTGAGGGGGCCATCAGCCAT
+AACGCCATGCAGCAGCGTAGCCTGTGGCGCGAAGTATTCACGCCAATTGTACTGATGTAT
+ACGCTGGCCTATTTTTGCCTTACCAATACGCTTAGCGCCATTAGTATCTGGACGCCGCAA
+ATCCTGAAAAGTTTTAATGAAGGCAGCAGCAATATCACCATCGGCCTGCTGGCGGCGATC
+CCGCAGATTTGTACTGTTCTGGGCATGATTTACTGGAGCCGCCATTCGGACAAACATCAG
+GAGCGTAAACACCACACTGCGTTACCGTTCCTGTTTGCCGCCGCGGGCTGGCTGCTGGCG
+TCGGCGACCGACCGTAACCTGATCCAGCTCCTGGGGATCGTGATGGCATCCACGGGTTCC
+TTTAGCGCGATGGCGATCTTCTGGACCACGCCGGATCAGTCGATCAGTTTACGCGCCAGG
+GCGATAGGCATTGCGGTCATCAATGCCACCGGCAATATTGGCTCCGCGCTCAGCCCGGTT
+ATGATTGGCTGGCTAAAAGATATCACTGGTAGCTTCAATAGCGGACTCTGGTTTGTCGCT
+TCTCTGTTAGTCGTCGGCGCCGCCATTATCTGGCTCATTCCCATGAAAGCATCGCGTCCG
+CGCGCCACCCCTATGTGCCAACGTGCGATCGCCAATATTGATATCAGCAAAGAGTATGAC
+GAAAGCATGGGCAGTAACGATGTGCATTATCAGTCGTTTGCTCGTATGGCGGATTTCTTT
+GGTCGTGATATGCAGGCGCATCGCCACGACCAGTTTTTTCAAATGCACTTTCTTGATACC
+GGGCAGATTGAGCTACAGCTCGACGATCATCGCTATTCGGTGCAGGCGCCGCTATTTGTG
+CTAACGCCGCCCTCGGTGCCGCATGCTTTTATTACCGAATCGGATAGCGATGGTCATGTT
+CTGACGGTACGCGAAGAGCTGGTTTGGCCGCTGCTGGAAGTGCTTTATCCCGGCACCAGA
+GAGGCCTTCGGCCTGCCGGGAATCTGCCTGTCGCTGGCGGATAAACTCAACGAGCTGGCG
+GCGCTCAAACATTACTGGCAGCTAATTGAGCGGGAGTCCACGGAACAACTGGCTGGCTGC
+GAACATACCTTGGTACTACTGGCGCAGGCGGTATTTACCTTGCTGTTGCGTAATGCGAAG
+CTGGACGATCATGCCGCAACCGGGATGCGCGGTGAACTGAAACTTTTTCAGCGCTTTACC
+CTGTTAATTGACAACCACTTCCATCAGCACTGGACGGTGCCCGATTATGCCTGTGAGTTG
+CATATTACCGAATCTCGTTTGACCGATATTTGCCGACGTTTTGCTAATCGCCCGCCTAAA
+CGCCTGATTTTTGATCGGCAATTACGCGAGGCGAAACGACTGCTGCTTTTTTCCGACAAT
+GCTGTCAACGAGATCGCCTGGCAATTAGGTTTTAAAGATCCGGCTTATTTCGCCCGTTTC
+TTTAATCGCCTTGCTGGCTGTTCTCCTTCGCAGTTTCGCCAACGTGAAGTTCCTTCTTTT
+CTCAACATGATGAAAAAAAGCGTCGCTATGCTGGCGGTTTGTATGCTGGCGCAAAGCCAC
+CTTGCCATTGCTGCCGGTGCTCCTGCGCCTCAAGAGATCAACATTGTTTTACTGGGCACC
+AAAGGCGGGCCTTCTTTGCTCAATACAGCCAGACTACCGCAAGCGACGGCGCTCACTATC
+GGCGATAAGATATGGCTGATAGATGCCGGCTACGGCGCCAGTCTGCAACTGGTGAAAAAT
+GGCATTCCACTGCGCAACATCAATACTATTTTGCTCACCCATCTGCACAGCGACCACATA
+CTGGATTATCCTTCCTTGCTGATGAATGCCTGGGCAAGTGGCCTGAAAGACCATACCATA
+CAGGTTTATGGCCCGCCGGGAACCCAGGCGATGACGAAGGCTAGCTGGAAGGTCTTTGAC
+AGGGATATCACGTTACGCATGGAAGAAGAGGGGAAACCCGATCCGCGCAACCTGGTTAAG
+GCGACCGATATCGGCCAGGGCGTCATCTATAAAGATGAACTGGTCACAATAAGCGCGCTG
+AAAGTGCCTCATTCCCCTTTCCCGGACGGTGAAGCGTTTGCTTACCGTTTTGATACTCAG
+GGTAAGCGAATCGTCTTCTCTGGCGATACGTCCTGGTTTCCTCCGCTTGCAACGTTTGCC
+CAGGGGGCGGATATCCTGGTACATGAGGCGGTACATGTCCCTTCGGTAGCAAAACTGGCT
+AATAGTATTGGCAACGGAAAAACGCTGGCTGAAGCGATTGCGTCGCATCACACCACGATT
+GAAGATGTCGGTAAGATTGCTCGCGAGGCCCACGTGAAAAAACTGGTGTTAAGTCATCTG
+GTGCCTGCGACGGTTGCGGATGACGTCTGGCAACAGGAAGCCATGAAAAATTACCCGGGC
+CCTGTCATTGTCGGTCATGACAATATGACGATAAGCGTACCGATGGCTAACATCACTGTC
+ACCTTTACCATCACCGAATTTTGTTTGCACACCGGCGTGACGGAAGAGGAGCTAAACGAA
+ATCGTCGGACTTGGCGTAATTGAGCCTTACGAAGACGATAACGCCGACTGGCAATTCGAC
+GATCGCGCAGCGAGCGTGGTACAACGCGCGCTACGCTTACGCGAGGAGCTGGCGCTCGAC
+TGGCCAGGGATCGCGGTCGCGTTAACGCTGCTGGAAGAGAATTCACGGCTGCGCGAAGAA
+AACCGGTTACTGCTGCAACGCCTTTCTCGCTTTATCTCGCATCCCATGGAACTTAAGGAT
+TATTACGCCATTATGGGCGTGAAACCGACGGACGATCTCAAGACGATTAAGACCGCCTAT
+CGCCGACTGGCCCGCAAGTACCATCCAGATGTCAGCAAAGAACCCGATGCCGAAGCCCGT
+TTCAAAGAGGTTGCTGAAGCATGGGAAGTGCTGAGTGATGAGCAACGGCGCGCCGAGTAT
+GACCAGTTATGGCAACACCGTAACGATCCACAATTTAATCGCCAGTTCCAGCAACACGAA
+GGCCAGCCGTATAACGCCGAAGATTTTGATGATATTTTCTCGTCTATTTTTGGTCAGCAC
+GGTCGTCATTCGCACCACCGCCACGCCGCACGCGGTCATGATATCGAAATTGAAGTGGCG
+GTATTCCTGGAAGAAACGCTGGAAGAGCACCAGCGTACGATTAGCTATTCCGTCCCCGTT
+TATAACGCGTTCGGCCTGGTGGAGCGGGAAATTCCCAAAACATTGAATGTGAAAATCCCG
+GCTGGCGTCAGCAACGGGCAACGAATCAGACTGAAAGGCCAGGGCACGCCGGGGGAAAAC
+GGCGGACCTAATGGCGATTTATGGCTCGTTATCCATATTGCCCCGCATCCGCTCTTTGAT
+ATCGTCAATCAGGATCTGGAAGTCGTCCTTCCGCTTGCCCCATGGGAGGCGGCGCTCGGC
+GCTAAGGTGTCTGTGCCAACGCTTAAAGAGCGTATTTTGCTGACCATTCCCCCCGGCAGC
+CAGGCAGGTCAGCGGCTGCGTATCAAAGGAAAAGGATTAGCCAGTAAAAAGCACACTGGC
+GATCTCTATGCCATCATCAAAATCGTTATGCCGCCGAAACCTGACGAGAAAACAGCTGCC
+CTGTGGCAACAACTGGCGGACGCGCAGTCGTCCTTTGACCCACGCCAGCAATGGGGGAAA
+GCAATGGCGAAACAACAACGGATGGGCTGGTGGTTTCTTTGCCTTGCATGTGTCGTGGTA
+ATGGTTTGTACCGCGCAACGCATGGCGGGCCTGCACGCCTTGCAGATGCAGGCGACGGCC
+TCTGCTGCGGTGGTCAGCGCTCCCTCCTCGACAGATGACGGCTCGCCGGTCACTCCCTGC
+GAATTAAGCGCCAAGTCGCTGCTGGCGGCGCCTCCAGTACTCTTTGAAGGTGCTATCCTT
+GCGCTTTATCTACTGCTTTCCTTACTGGCGCCTGTCCGGGTCATGCGCCTGCCGTTTTCG
+CCTCCACGGGCTATTTCGCCGCCCACATTACGGGTACATCTACGATTTTGTGTCTTCCGT
+GAAATGATGATTTTATTCAGGCGGATACTGTTCTGCCTGTTATGGCTTTGGCTGCCCGTC
+TCCTGGGCGGCGGAAAGCGGCTGGCTGCGTTCGCCCGATAACGACCATGCCAGCATACGG
+CTACGTGCCGATACGTCCGCTAACAGTGAGACCCGGCTGTTGCTGGATGTCAAACTGGAA
+AACGGCTGGAAAACCTACTGGCGCGCGCCGGGGGAAGGGGGCGTGGCACCCTCTATCGCC
+TGGAAAGGCGACATGCCTGAGGTAAGCTGGTTCTGGCCAACCCCCTCGCGCTTTGATGTG
+GCGAATATCACCACCCAGGGATATCACGACGAGGTGACCTTTCCGATGATCGTGCGCGGT
+ACGCCGCCGGCGACCTTGCGCGGTGTGTTGACGTTATCAACCTGCAGCAATGTTTGTCTG
+TTGACCGATTACCCCTTTTCCGTGACGCCCACTGTGCAGAATGCCGATTTTGCCCATGAC
+TATGCGCGGGCGATGGGTAAAGTTCCGCTCCGCAGTGGGCTAACGGACTCGCTTGACGTT
+GGCTATCGCCCGGGAGAACTGGTGGTCACTGCTACGCGAGCGGCGGGCTGGTCATCGCCC
+GGGCTCTATCTTGACACCATAGATGACGTCGATTTTGCGAAGCCTCGCCTGCGCGTAGAG
+GGCGACAGGTTACAGGCGACGGTGCCGGTGACGGACAGTTGGGGCGAAAAGGCGCCCGAT
+TTGCGCGACAAATCGCTGACCCTCGTGTTAGCCGATGGCGCTATCGCCCAGGAGAGCACG
+CAAACCATTGGCGCTGCGCCAGCGCAAACGCCGGACAATGCGGCGCTACCTTTCTGGCAA
+GTTGTAATGATGGCGCTAATCGGCGGACTGATTCTTAATTTAATGCCCTGCGTACTGCCT
+GTTCTGGGCATGAAACTTGGCTCTATTTTATTGGTAGAGGAAAAAAGCCGCTCTCACATC
+AGGCGACAATTTTTGGCTTCGGTCGCCGGTATCATTGCGTCATTTATGGCGCTGGCGGCG
+TTTATGACCCTCCTTCGCCTGTCAAACCATGCGCTGGCCTGGGGAGTCCAGTTCCAGAAT
+GCATGGTTTATTGGTTTTATGGCGCTGGTGATGTTGTTGTTTAGCGCCAGCCTGTTCGGG
+CTTTTTGAGTTCAGGCTTCCCTCATCTATGACCACGAAACTGGCCACTTACGGCGGTAAC
+GGTATGTCGGGACATTTCTGGCAGGGGGCGTTCGCCACGCTGCTGGCGACGCCTTGTAGC
+GCGCCGTTTCTGGGCACGGCGGTCGCGGTGGCGCTCACGGCGTCGCTGCCGACGCTGTGG
+GGGCTGTTCCTTGCGCTTGGCCTGGGAATGAGCGCGCCGTGGCTACTGGTCGCGATACGA
+CCAGGGCTTGCGCTACGTTTACCGCGCCCCGGGCGTTGGATGAATGTCCTGCGCAGGATC
+CTCGGTCTGATGATGCTGGGGTCGGCTATCTGGCTGGCGACGTTACTCCTGCCGCATTTC
+GGCTTCACTGCGTCAAAGAGCGCGCAAGACACGGTTCAGTGGCAACCGTTGAGTGAACAG
+GCAATCCAGTCGGCGCTGGCGCAGCATAAGCGGGTATTTGTCGATGTCACTGCGGACTGG
+TGTATTACCTGTAAAGTGAATAAATACAACGTCCTGCAAAAAGAGGATGTGCAGGCCGCC
+TTGCAACAGCCGGATGTTGTGGCGCTGCGGGGAGACTGGACGCTGCCGTCCGATGCCATT
+ACAGATTTTCTGAAAACGCGCGGCCAGGTCGCCGTGCCGTTTAATCAGGTATATGGCCCC
+GGTTTGCCGGAAGGGGAGGCACTGCCCACTTTGCTGACCCGCGATGCGGTATTACAAACG
+TTGAAAAAAGCGAAAGGAATAACCCAAATGAAATACATGATTGTTTTACTGCTGGCGCTG
+TTTTCGACGCTGAGCATCGCGCAAGAAACCGCTCCTTTTACGCCGGATCAGGAAAAGCAG
+ATTAAAAATCTGATCCATGCGGCGTTGTTTAACGATCCTGCCAGCCCGCGGATAGGCGCT
+AAACACCCTAAGCTGACGCTGGTGAACTTTACGGATTACAACTGCCCGTACTGCAAACAG
+CTCGATCCGATGCTGGAAAAGATTGTGCAGAAATATCCTGACGTTGCGGTCATTATTAAA
+CCGCTGCCATTCAAAGGAGAGAGTTCCATACTGGCGGCGCGTATTGCGCTGACCACCTGG
+CGCGATCATCCGCAACAGTTCCTCGCGCTACATGAAAAACTTATGCAAAAGCGCGGTTAC
+CATACGGATGACAGTATTAAACAGGCCCAGCAGAAAGCAGGGGCGACGCCAGTGACGCTG
+GATGAAAAAAGCATGGAAACGATACGCACTAATTTGCAGTTGGCAAGACTGGTCGACGTG
+CAAGGAACGCCAGCGACGATCATTGGCGACGAGCTGATTCCGGGCGCAGTGCCCTGGGAT
+ACGCTGGAAGCGGTGGTGAAAGAAAAACTGGCGGCTGCCAATGGCGGGATGGCGGGTAAA
+CTGCGGCGTTGGCTGCGTGAAGCCGCGGTTTTTCTGGCGCTCCTCATCGCGATAATGGTG
+GTCATGGACGTCTGGCGCGCGCCGCAGGCGCCTCCGGCGTTTGCCGCGACACCATTACAT
+ACGCTGACGGGAGAGTCGACAACTCTGGCGACCTTGAGCGAGGAACGCCCCGTACTGCTC
+TATTTTTGGGCCAGCTGGTGCGGGGTATGCCGCTTTACCACGCCTGCGGTCGCTCACCTG
+GCGGCGGAAGGGGAAAACGTCATGACCGTTGCGCTCCGCTCCGGCGGTGATGCTGAGGTT
+GCCCGCTGGCTGGCGCGCAAGGGCGTTGACTTCCCGGTCGTCAATGATGCTAACGGCGCC
+TTATCCGCTGGCTGGGAAATCAGCGTGACGCCAACGCTGGTGGTGGTTTCACAAGGTCGG
+GTTGTGTTCACCACCAGCGGCTGGACCAGCTATTGGGGCATGAAGCTTCGGCTGTGGTGG
+GCAAAAACGTTCATGAAAAAATCATTACTCGCTGTTGCTGTGGCAGGGGCTGTTTTGTTG
+TCATCCGCCGTACAGGCGCAGACAACGCCGGAAGGTTATCAATTACAACAGGTGCTGATG
+ATGAGCCGCCATAATCTGCGGGCGCCGCTGGCGAATAATGGCAACGTACTGGCGCAGTCG
+ACGCCGAACGCCTGGCCGGCGTGGGACGTTCCCGGCGGGCAACTGACGACGAAAGGCGGC
+GTGCTGGAAGTCTATATGGGACACTACACACGTGAATGGCTGGTCGCGCAGGGGCTGATA
+CCGTCGGGAGAATGTCCGGCGCCCGACACGGTATATGCCTATGCGAATAGTTTGCAGCGC
+ACCGTCGCCACCGCGCAATTTTTCATTACCAGCGCTTTCCCCGGCTGTGATATTCCTGTT
+CATCATCAGGAAAAAATGGGCACTATGGACCCTACCTTCAATCCGGTGATTACCGATGAT
+TCCGCCGCGTTCCGGCAACAGGCCGTACAGGCGATGGAAAAGGCGCGTAGTCAGCTACAT
+CTTGATGAGAGTTATAAACTGCTTGAGCAGATAACGCATTATCAGGACTCGCCGTCCTGC
+AAAGAGAAGCATCAGTGTTCGCTAATCGACGCGAAAGATACCTTCAGCGCGAACTATCAG
+CAAGAGCCTGGCGTGCAGGGGCCGCTGAAAGTAGGGAACTCGCTGGTGGATGCGTTTACC
+CTGCAATATTACGAAGGCTTTCCGATGGATCAGGTCGCTTGGGGCGGGATCCACACCGAT
+CGGCAGTGGAAGGTGCTGTCAAAACTGAAAAACGGCTACCAGGACAGCCTGTTTACCTCA
+CCCACGGTGGCGCGCAATGTCGCTGCGCCGCTGGTAAAATATATCGATAAGGTGCTGGTT
+GCCGAGCGCGTTAGCGCGCCGAAGGTTACCGTGCTGGTGGGGCATGATTCCAATATCGCG
+TCGCTGCTGACGGCGCTGGATTTTAAACCCTATCAGCTCCATGACCAGTATGAGAGAACG
+CCGATTGGTGGTCAGCTTGTCTTCCAACGCTGGCATGACGGTAACGCTAACCGGGATTTG
+ATGAAAATCGAGTATGTCTACCAGAGCGCCCGGCAGTTACGTAATGCGGAAGCGTTAACG
+CTCAAATCGCCTGCGCAAAGGGTAACGCTGGAACTGAAAGGATGTCCGGTGGATGCGAAC
+GGCTTCTGTCCGCTGGATAAGTTCGATAACGTCATGAACACTGCTGCAAAAATGCCAACT
+CAAGAAGCAAAAGCGCACCGCGTCGGCGAATGGGCAAGCCTGCGTAATACGTCGCCGGAA
+ATTGCCGAAGCCATTTTTGAAGTCGCTCACTATGACGAGAAACTGGCAGAAAAAATATGG
+GAAGAAGGTAGCGATGAGGTGCTGATCAAAGCCTTTGAGAAAACGGACAAAGACTCGCTC
+TTCTGGGGCGAACAAGTCATCGAACGTAAGAACGTAATGGCAAAGATTCTGGTGCTCTAT
+TATTCCATGTACGGACACATTGAAACCATGGCGCACGCGGTGGCGGAAGGGGCAAAGAAA
+GTCGACGGCGCAGAGGTCATTATAAAGCGTGTGCCAGAAACAATGCCGCCTGAAATCTTC
+GCAAAAGCTGGCGGTAAAACGCAAAACGCACCGGTTGCCACCCCACAGGAGCTGGCGGAT
+TACGATGCCATTATTTTTGGTACGCCAACCCGGTTTGGCAATATGTCAGGCCAGATGCGT
+ACCTTCCTGGACCAAACCGGCGGACTGTGGGCATCCGGCGCGCTATACGGCAAGCTCGGC
+GGCGTGTTCAGTTCTACCGGAACGGGCGGCGGCCAGGAGCAGACCATCACCTCGACCTGG
+ACTACGCTTGCCCATCATGGGATGGTGATTGTCCCGATAGGCTATTCCGCACAGGAACTG
+TTTGACGTCTCCCAGGTTCGCGGCGGTACGCCTTACGGCGCAACGACTATCGCTGGAGGC
+GACGGTTCACGTCAACCAAGCCAGGAGGAACTCTCTATCGCTCGCTATCAGGGGGAATAC
+GTCGCCGGTCTGGCAGTCAAACTCAACGGCATGGCAAACCATCGTGGCGGTTCCGGTAAT
+TTTGCGGAAGACCGCGAAAGAGCATCAGAAGCAGGTCGTAAAAGTGGTCAGCACAGCGGG
+GGCAATTTTAAGAATGACCCGCAGCGTGCATCCGAAGCAGGCAAAAAAGGGGGCAAAAGC
+AGTAACCGTAATCGCATGTCGCAACGCACAGAGAAAAAAATCGGGAAACGTTCGCAGGCC
+ACCGGTGCAAAACGGCAGCTTATCTTAACCGCCGCGCTTGCCGTTTTTTCCCAGTATGGC
+ATTCATGGCGCGCGTCTTGAACAGGTCGCCGAGCGGGCAGGCGTCTCCAAAACCAATCTG
+CTTTATTATTATCCCTCGAAAGAGGCGCTGTATGTCGCGGTAATGCGACAGATTCTGGAT
+GTCTGGTTGGCGCCGCTCAAGGCGTTTCGCGCAGAATTTTCCCCTCTGGAGGCCATCAAA
+GAGTATATCCGTCTCAAGCTGGAGGTTTCGCGTGATTATCCGCAGGCGTCGCGGCTCTTC
+TGCATGGAGATGCTGGCGGGCGCGCCGCTCTTAATGGATGAACTGACCGGCGATCTAAAA
+GCGTTGATAGATGAAAAATCCGCGCTGATTGCCGGATGGGTGCACAGCGGGAAACTCGCG
+CCCGTTTCTCCGCATCATTTGATCTTCATGATTTGGGCCGCCACGCAACATTACGCCGAT
+TTCGCCCCTCAGGTTGAAGCGGTAACCGGCGCGACGCTTCGCGATGAAGCCTTTTTCAAC
+CAAACGGTCGAAAGCGTTCAGCGCATTATTATTGAAGGGATTCGCGTGCGTATGAAACGA
+ATTTTCCTTACCTGCGCGGCGTTGTTGTTCAGCAGTCAGGCGTTGGCCGATGAGTGTGCC
+AGCGCCAGTACGCAGCTGGAAATGAATCGCTGCGCCGCCGCGCAATACCAGGCGGCAGAT
+AAAAAGCTGAACGAAACCTATCAAAGCGCGATTAAGCGTGCGCAACCGCCGCAGCGTGAG
+CTATTGCAAAAAGCGCAGGTGGCATGGATTGCCCTGCGCGACGCCGATTGCGCGCTGATT
+CGCTCAGGTACGGAGGGCGGCAGCGTTCAACCCATGATCGCCAGCCAGTGCCTGACCGAT
+AAAACGAACGAACGCGAAGCGTTTTTAGCCTCGCTGCTGCAATGTGAAGAGGGTGATTTG
+AGCTGCCCACTGCCGCCAGCCGGTATGGGAACCACCACGATGGGGGTTAAGCTGGACGAC
+GCCACGCGCGAACGGATCAAAATGGCCGCGTCGCGTATCGATCGCACGCCGCACTGGTTA
+ATAAAACAGGCAATCTTTAGCTATCTGGACAAGCTGGAAAATAGCGATACGCTACCGGAG
+CTACCTGCGCTGTTTGCCGGCGCGGCAAATGAAAGCGAGGAGCCGGTCGCGCCGCAGGAT
+GAGCCGCATCAGCCCTTTCTGGAGTTTGCCGAACAGATTCTTCCCCAATCCGTCTCTCGC
+GCCGCCATCACCGCCGCCTGGCGCCGCCCGGAAACCGATGCGGTGTCAATGCTAATGGAA
+CAGGCGCGCCTGTCGCCGCCTGTCGCTGAGCAGGCGCATAAACTGGCGTATCAACTGGCG
+GAGAAATTGCGCAATCAAAAATCCGCCAGCGGTCGCGCGGGTATGGTGCAAGGCCTGTTG
+CAGGAGTTTTCCCTCTCTTCGCAAGAAGGCGTAGCGCTGATGTGTCTGGCGGAAGCGCTG
+CTGCGTATTCCCGACAAAGCTACGCGCGATGCGTTAATTCGCGACAAAATCAGTAATGGC
+AACTGGCAGTCGCATATTGGCCGTAGCCCGTCGCTGTTTGTAAACGCCGCCACCTGGGGG
+CTGCTCTTTACCGGCCGACTGGTCTCAACGCATAACGAAGCCAATCTTTCGCGCTCGCTG
+AACCGCATTATCGGCAAGAGCGGCGAACCGTTAATCCGCAAAGGCGTCGACATGGCGATG
+CGTTTAATGGGCGAGCAGTTCGTGACTGGCGAAACCATTGCTCAGGCGCTGGCGAATGCC
+CGAAAACTGGAAGAGAAAGGGTTCCGCTATTCTTACGATATGCTGGGCGAAGCCGCGTTA
+ACCGCCGCCGATGCGCAGGCCTATATGGTCTCTTACCAGCAAGCGATTCATGCCATCGGC
+AAAGCGTCTAACGGTCGCGGTATTTACGAAGGGCCAGGCATCTCGATTAAGCTGTCCGCC
+CTGCATCCACGCTATAGTCGCGCGCAATACGATCGGGTAATGGAGGAGCTTTATCCGCGC
+CTGAAATCCCTGACGCTGCTGGCGCGCCAGTATGATATCGGTCTCAATATCGACGCCGAA
+GAGGCGGATCGTCTGGAGATCTCGCTTGATCTGCTGGAAAAACTCTGCTTCGAACCCGAA
+CTGGCGGGCTGGAACGGCATTGGCTTTGTGATTCAGGCTTACCAGAAACGCTGCCCGCTG
+GTCATTGATTATTTAGTCGATCTGGCCTCCCGTAGCCGCCGTCGGCTGATGATTCGTCTG
+GTGAAAGGCGCCTACTGGGATAGCGAGATCAAACGCGCGCAAATGGAAGGGCTGGAGGGC
+TATCCAGTTTATACCCGCAAAGTGTATACCGATGTCTCTTATCTGGCCTGCGCGAAAAAA
+CTGCTCGCCGTCCCTAATCTGATCTACCCGCAGTTCGCGACCCATAACGCTCACACACTG
+GCGGCGATTTATCATCTGGCCGGGCAAAATTACTATCCGGGTCAGTACGAATTCCAGTGC
+CTGCACGGCATGGGAGAACCGCTGTATGAACAGGTCACCGGTAAAGTGGGGGACGGAAAA
+CTTAACCGTCCCTGCCGTATTTACGCGCCGGTGGGAACACACGAAACCCTGCTGGCCTAT
+CTGGTACGACGCCTGCTGGAAAACGGCGCCAACACCTCTTTTGTCAACCGCATCGCCGAT
+GCCACCCTACCGCTCGATGAACTGGTGGCCGACCCGGTCGAGGCCGTGGAAAAACTGGCG
+CAGCAGGAAGGTCAGGCTGGCATACCGCATCCAAAAATTCCGCTGCCGCGCGATCTGTAC
+GGCGAAGGTCGGATAAACTCCGCCGGACTTGATTTAGCGAATGAACATCGCCTCGCCTCG
+CTTTCTTCTGCCCTGTTAAGCAACGCCATGCAGAAATGGCAGGCCAAACCTGTGCTGGAA
+CAACCGGTGGCCGACGGTGAGATGACGCCGGTTATCAACCCGGCGGAACCGAAAGATATT
+GTTGGCTGGGGACGCGAAGCGACAGAAAGCGAGGTTGAACAGGCGTTGCAAAACGCGGTC
+AATCAGGCGCCGGTTTGGTTTGCGACGCCGCCGCAAGAACGCGCCGCTATTTTGCAGCGG
+GCGGCGGTATTGATGGAAGACCAAATGCAGCAGTTGATTGGCCTGTTGGTGCGTGAAGCG
+GGGAAAACGTTCAGCAACGCCATTGCCGAAGTGCGCGAAGCGGTAGACTTCCTCCATTAT
+TATGCCGGTCAAGTGCGTGACGATTTCGATAACGAAACGCATCGCCCGTTAGGGCCGGTG
+GTCTGTATCAGTCCGTGGAACTTTCCGCTGGCCATTTTCACTGGCCAAATCGCCGCCGCG
+CTGGCGGCAGGTAACAGCGTTCTGGCGAAACCGGCAGAGCAGACATCGCTGATTGCCGCC
+CAGGGCATTGCCATTTTGCTGGAAGCGGGCGTACCGCCGGGCGTCGTGCAACTGTTGCCG
+GGACGGGGAGAAACCGTCGGCGCCCAGCTTACCGCCGATGCGCGTGTACGCGGCGTGATG
+TTTACCGGTTCCACGGAGGTCGCGACGTTGTTGCAGCGCAACATCGCCACGCGTCTTGAC
+GCCCAGGGGCGCCCTATTCCGTTGATTGCGGAAACCGGCGGTATGAACGCTATGATTGTC
+GACTCTTCCGCGCTCACCGAGCAGGTGGTCGTGGATGTGCTGGCTTCCGCCTTCGACAGC
+GCCGGACAACGCTGTTCCGCGCTCCGCGTGCTGTGTTTGCAGGACGATATCGCCGAACAT
+ACGCTGAAAATGTTACGCGGCGCGATGGCGGAGTGTCGGATGGGGAATCCAGGCCGTCTG
+ACGACCGATATCGGGCCGGTGATCGATAGCGAGGCCAAAGCCAACATTGAACGTCATATC
+CAGACGATGCGCGCCAAAGGCCGCCCGGTTTTCCAGGCCGCGCGTGAAAACAGCGATGAC
+GCGCAGGAATGGCAGACCGGTACGTTTGTTATGCCCACGCTTATTGAGCTGGAAAACTTC
+GCAGAACTGGAAAAAGAGGTCTTCGGGCCCGTGCTGCACGTCGTGCGTTATAACCGTAAC
+CAACTGGCGGAGCTTATCGAACAGATTAACGCTTCCGGCTACGGGCTAACGCTGGGCGTA
+CATACCCGTATTGATGAAACCATTGCGCAAGTCACCGGTTCCGCCCATGTCGGCAACCTG
+TACGTTAACCGTAATATGGTGGGCGCGGTCGTCGGCGTCCAGCCGTTTGGCGGCGAAGGC
+CTGTCCGGCACCGGGCCAAAAGCGGGAGGGCCGCTCTATCTCTACCGCCTGCTGGCACAC
+CGCCCGCCCAATGCGCTCAATACGACGCTGACTCGTCAGGATGCGCGTTACCCGGTGGAT
+GCGCAGCTTAAAACCACGCTACTCGCGCCGTTGACCGCTCTGACGCAATGGGCGGCGGAT
+CGCCCGGCGCTACAGACGCTCTGCCGACAATTCGCCGATCTGGCGCAGGCCGGCACGCAG
+CGCCTGCTACCGGGGCCGACCGGCGAGCGTAATACCTGGACGCTGTTGCCGCGTGAACGG
+GTGTTATGCCTGGCTGATGATGAACAGGACGCGTTGACGCAGCTTGCCGCCGTTCTCGCC
+GTCGGCAGTCAGGCGCTATGGTCAGACGACGCCTTCCACCGCGATCTGGCGAAACGTCTC
+CCCGCCGCCGTCGCGGCGCGTGTCCAGTTTGCGAAAGCGGAAACGCTGATGGCGCAGCCG
+TTTGACGCGGTGATTTTCCACGGCGACTCCGACAAGCTGCGAACCGTGTGCGAAGCCGTC
+GCCGCCCGCGAAGGCGCGATAGTGTCGGTACAGGGGTTCGCCCGCGGCGAAAGCAATATG
+CTGCTGGAACGGCTCTATATTGAACGTTCGCTGAGCGTAAACACTGCCGCCGCTGGCGGT
+AATGCCAGCCTGATGACAATTGGCATGGCTATTAGCACACCGATGTTGGTGACATTCTGT
+GTCTATATTTTTGGCATGATATTGATTGGGTTTATCGCCTGGCGCTCAACCAAAAACTTT
+GATGACTATATTCTTGGCGGTCGCAGCCTGGGGCCGTTTGTTACGGCTTTATCAGCCGGC
+GCGTCGGATATGAGCGGCTGGCTGTTAATGGGGCTGCCTGGCGCTATCTTTCTGTCGGGG
+ATCTCTGAAAGCTGGATCGCCATTGGCCTGACGTTAGGCGCATGGATTAACTGGAAGCTG
+GTGGCCGGGCGCCTGCGCGTGCATACCGAATTTAACAATAACGCGCTCACGCTGCCGGAC
+TATTTTACCGGTCGGTTTGAGGATAAGAGCCGAGTCCTGCGTATTATTTCCGCGCTGGTC
+ATTCTGCTGTTTTTCACTATCTATTGCGCATCAGGTATTGTCGCTGGGGCACGACTGTTC
+GAAAGCACCTTCGGTATGAGCTATGAAACCGCACTGTGGGCGGGGGCCGCGGCAACCATT
+ATTTATACCTTTATCGGCGGGTTTCTTGCCGTTAGCTGGACGGATACCGTTCAGGCCAGC
+CTGATGATTTTTGCGTTAATCCTGACGCCGGTGATGGTTATTGTCGGCGTAGGCGGTTTT
+AGCGAGTCGCTGGAAGTGATCAAGCAAAAGAGCATCGAGAATGTCGACATGCTCAAGGGG
+CTGAATTTTGTCGCTATTATTTCTCTGATGGGCTGGGGGCTGGGTTACTTCGGTCAGCCG
+CATATCCTGGCGCGCTTTATGGCGGCGGATTCCCATCACAGTATTGTTCATGCGCGTCGT
+ATCAGTATGACCTGGATGATTCTGTGTCTGGCGGGCGCGGTGGCGGTGGGCTTCTTTGGC
+ATTGCGTACTTTAACAATAACCCCGCGCTGGCCGGGGCGGTGAACCAAAACTCAGAACGC
+GTATTTATTGAACTGGCGCAGATCCTGTTTAACCCGTGGATTGCCGGTGTTCTGCTGTCT
+GCTATCCTGGCGGCGGTGATGTCGACGTTGAGCTGTCAGTTGCTGGTATGCTCCAGCGCG
+ATTACGGAAGATTTATATAAGGCTTTTCTGCGTAAAAGCGCCAGCCAGCAAGAGCTGGTA
+TGGGTAGGGCGAGTGATGGTGCTGGTGGTAGCGCTGATCGCCATTGCGCTGGCGGCGAAT
+CCTGATAACCGTGTGCTGGGGCTGGTGAGCTACGCCTGGGCTGGATTCGGCGCGGCATTT
+GGACCTGTTGTCCTGTTTTCTGTGATGTGGTCGCGTATGACACGTAACGGCGCGCTGGCG
+GGAATGATTATTGGCGCGGTGACGGTTATCGTCTGGAAACAATATGGCTGGCTGGATCTG
+TATGAGATTATCCCTGGCTTCATTTTCGGCAGCCTGGGGATCGTAATCTTTAGCCTGCTT
+GGCAAAGCGCCGACAGCAACGATGCAGGAACGCTTTGCAAAAGCGGACGCGCATTATCAT
+TCCGCGCCGCCGTCGAAGCTACAGGCGGAAATGGTAATGTCCGCACCAGGACACATTGTT
+TACAGTAGTTACAACACCCTGTACGGACATTCTCTCTCCGGTGGTGGTCTTGTCATCTTA
+AAAGCTCTCATCATTTCCCTTACTGTCCATACCCATGACGCCATATGTGGTGCGCGTAGC
+CGTGTGTGGCGTCGTTTCAAAAAGCAAGCTAAGGCTTACAAGGAAGCCAACCCTCAGATG
+TGTGTGCGCATAATCGCGTTCAAGAGAACGCGGGTGATGTATACCTACAACTCAAGGTGC
+TATCCATGGGAAGACAAAAAGCAGATGGGAAGACAAAAAGCAGTGATCAAAGCTCGTCGT
+GAAGCAAAGCGTGTGTTGAGACGAGATTCGCGTAGTCATAAGCAACGTGAAGAAGAATCG
+GTCACGTCACTGGTACAGATGGGCGGAGTAGAAGCCATTGGCATGGCGCGCGATAGTCGC
+GATACCTCTCCTGTTAAGGCGCGAAATGAAGCACAGGCGCATTATCTGAACGCTATCGAC
+AGTAAACAGCTTATTTTTGCGACCGGCGAAGCCGGCTGCGGAAAAACATGGATCAGTGCG
+GCAAAGGCGGCAGAAGCATTGATTCATAAGGACGTCGAGAGGATCATTGTGACGCGTCCG
+GTATTGCAGGCTGATGAAGATCTTGGTTTTTTGCCCGGTGATATCGCTGAAAAATTCGCG
+CCTTATTTTCGTCCCGTCTACGATGTCCTGCTTAAACGGTTGGGCGCGTCCTTTATGCAA
+TATTGTTTGCGCCCGGAAATCGGTAAGGTAGAAATTGCCCCGTTCGCCTATATGCGTGGG
+CGTACTTTTGAAAATGCGGTCGTGATCCTCGACGAGGCGCAAAATGTGACTGCGGCGCAA
+ATGAAAATGTTTTTGACGCGATTAGGCGAAAATGTCACGGTCATTGTCAATGGCGATATT
+ACGCAATGCGACCTGCCGCGCGGTGTGCGTTCCGGGTTGAGTGATGCGTTGGAACGCTTT
+GAAGAAGATGAAATGGTGGGGATTGTGCATTTCAACAAAGACGACTGCGTGCGCTCGGCG
+CTTTGTCAGCGAACGCTCCACGCATACAGCATGGAGCCTCAACCCCCACGTCTTAAACCC
+GGAAAAATCCTTGACACTCTGGGTGCTATGCAAAAAAGCCTGACACGTGCCTCCCAGCGT
+ATTGCGCAATATATTTTAGCCTTCCCCAGACAGGTGACACAGTCATCTATTGCCGATTTG
+TCGCGCGACACACAGGCCGGAGAAGCCACGGTTATTCGCTTTTGTCGCACCCTGGGCTAT
+AAAGGTTTTCAGGATTTTAAAATGGACCTGGCCATTGAACTTGCCACTACCGAGTCTGAT
+GACAGTAGTCCTCTACTGGATGCCGAAGTTAGCGAATCCGACGATGCCCACGCCATTGGT
+TTAAAATTGCAGAACACCATTAGTAATGTATTATCTGAAACGCTAAATCTGCTTGATATG
+CAACAGGTTCTCGGTGTCGTGGACGCCCTACGTCACTGTCACTCAGTTTATATCTTTGGT
+GTGGGCTCATCGGGGATCACGGCGCTGGATATGAAACACAAGCTAATGCGTATTGGTTTA
+CGGGGCGATGCGGTAAGCAATAACCATTTTATGTACATGCAGGCTACGCTATTAAAAGCA
+GGCGATGTCGCGATGGGTGTCAGTCACTCGGGCACATCGCCAGAAACAGTGCATTCACTC
+CGATTGGCCCGACAGGCTGGCGCCACCACAGTCGCCATTACCCATAATCTGGGTTCTCCA
+TTATGTGAAGAGGCCGATTTTTGCCTGATCAATGGTAATCGGCAAGGAATGTTGCAGGGT
+GACTCGATCGGTACGAAAGCCGCGCAGCTTTTCGTCTTTGACCTGCTCTATACCCTTCTT
+GTACAGTCCTCGCCGGAACAGGCCCGAGAAAGCAAATTACGGACAATGAATGCCCTGGAC
+ATGACAAAAGTGATATGTCTCAAAGTCCAGGGCGGCATTGGTGAAATTTTTACGGTGACG
+CAGCAGGCGGATAAATTCTTTCCGGCTACGCAGTTCCACTGGAGCTGGACGGAAAGCACA
+GTACCTGTATTGATGATTGGTTTTCTGTTTGCCAATATTCAGCAATTTACTGCCAGTCAG
+GATGTGGTCCAACGCTATATCGTGACTGACTCCATAGAGGAAACGAAGAAAACATTACTT
+ACAAATGCCAAACTGGTTGCTGTGATCCCTGTTTTCTTTTTTGCTATCGGCTCGGCATTA
+TTTGTCTACTATCAGCAACATCCACAATTATTACCGGCGGGATTCAACACTGGCGGCATT
+TTGCCCTTATTCGTGGTCACCGAAATGCCAGTCGGCATTGCAGGGTTGATAATCTCCGCT
+ATTTTCGCTGCCGCGCAGTCCAGCATCTCCAGCAGCTTAAACAGCATTTCCAGTTGTTTT
+AATTCCGATATCTATCAGCGTTTGAGTCATAAAAAAGGAACGCCAGAAAACCGTATGAAA
+ATAGCTAAGTTAGTTATTCTGGTCGCGGGCCTGATAAGTAGCGCGGCCTCGGTATGGCTG
+GTCATGGCCGATGAATCAGAAATCTGGGATGCATTTAATAGTCTGATAGGTCTGATGGGA
+GGGCCAATGACCGGTCTGTTCATGCTGGGCATTTTCTTTAAACGAGCAAATGCCGGGAGT
+GCGGTTTTAGGAATTATTATCAGCGTCATTACCGTGCTGGGCACACGCTATGCCACTGAC
+CTTAACTTCTTCTTTTATGGGGTCATTGGCTCGCTAAGCGTGGTGATCAGCGGCGTTATT
+TTCGCCCCGTTATTTGCCCCGGCACCGCCATTGACGCTGGATGAAAAACCTGAACCAAAG
+GTGACATTAATGTCACTATTAGCCAGGCTGGAACAAAGTGTACACGAAAACGGTGGGCTG
+ATTGTCTCATGCCAACCGGTACCAGGCAGCCCTATGGATAAACCTGAAATTGTGGCTGCA
+ATGGCACAGGCAGCGGCTTCGGCGGGTGCGGTCGCTGTGCGCATTGAAGGCATTGAGAAT
+CTGCGGACTGTTCGTCCCCATCTTTCTGTTCCTATTATTGGGATAATTAAACGTGACCTT
+ACAGGGTCGCCAGTCCGTATCACTCCATATTTACAGGATGTTGACGCCCTGGCGCAGGCA
+GGTGCCGATATTATCGCTTTTGATGCCTCATTCCGCTCTCGCCCGGTTGATATTGATAGT
+TTACTGACACGTATTCGCCTGCATGGATTACTGGCGATGGCAGACTGTTCAACCGTGAAT
+GAAGGCATAAGTTGCCATCAGAAAGGAATCGAATTCATTGGTACAACACTGTCTGGCTAT
+ACCGGTCCCATCACGCCGGTTGAGCCAGATTTGGCAATGGTGACACAACTGAGTCATGCA
+GGTTGTCGTGTTATTGCCGAGGGGCGCTATAACACGCCTGCACTGGCGGCCAATGCTATT
+GAGCATGGTGCCTGGGCAGTTACCGTTGGTTCCGCTATCACCCGTATCGAGCATATCTGT
+CAGTGGTTCAGTCACGCAGTAAAACGCATGAAAAATTTTAAGAAAATGATGACGCTAATG
+GCGCTATGTTTATCAGTTGCTATCACCACATCAGGATATGCAACCACGCTTCCTGATATA
+CCAGAACCACTGAAAAATGGTACTGGCGCTATTGATAATAATGGCGTGATTTATGTCGGC
+TTAGGTACCGCAGGGACATCCTGGTATAAAATTGATCTTAAAAAGCAACATAAAGACTGG
+GAGCGTATAAAGTCGTTTCCTGGTGGAGCTCGTGAGCAATCCGTGTCGGTATTTTTAAAT
+GATAAGCTGTATGTTTTTGGTGGCGTAGGGAAAAAAAACAGTGAATCACCGTTGCAGGTT
+TATAGCGATGTGTACAAATACTCACCGGTGAAAAATACATGGCAAAAAGTTGATACTATA
+TCTCCAGTTGGATTAACAGGGCATACGGGAGTAAAATTAAACGAAACGATGGTACTTATT
+ACCGGAGGGGTTAATGAGCATATCTTTGATAAGTATTTTATTGATATAGCGGCTGCGGAT
+GAAAGTGAAAAAAATAAAGTCATCTATAATTATTTTAATAAACCTGCCAAAGATTATTTT
+TTTAATAAAATCGTATTTATCTACAATGCTAAAGAGAACACATGGAAGAATGCCGGTGAG
+CTGCCAGGCGCGGGGACGGCAGGATCGTCATCGGTAATGGAAAATAATTTCTTGATGCTG
+ATTAATGGTGAGCTCAAACCGGGTTTACGTACCGATGTGATTTACCGCGCCATGTGGGAT
+AACGATAAGCTAACATGGTTGAAGAACAGCCAGTTACCGCCATCGCCTGGAGAACAACAG
+CAGGAAGGGTTGGCCGGAGCATTTTCGGGCTATAGCCACGGTGTCCTGCTTGTCGGTGGT
+GGCGCGAATTTTCCGGGAGCAAAACAAAATTATACTAATGGAAAGTTTTATTCCCACGAA
+GGGATAAATAAAAAATGGCGAGATGAAGTCTATGGTTTGATTAATGGCCATTGGCAATAT
+ATGGGTAAAATGAAACAACCTCTCGGCTATGGTGTATCAGTAAGTTATGGTGATGAAGTT
+TTCCTTATTGGTGGTGAAAATGCTAAAGGGAAACCTGTTTCGTCTGTAACCTCCTTTACC
+ATGCGTGATGGTAATTTATTAATAAAAATGAAAATCAACAGATATCTTCTGGGTATGGTT
+TCGTTTATAGCATTTTCATCATATCTACAAGCGGCAACCCTTGATTATCGGCATGAATAT
+GCTGATAGAACCAGAATTAATAAAGACCGTATTGCTATAATTGAAAAGCTTCCTAACGGC
+ATTGGTTTTTATGTCGATGCCAGCGTTAAATCGGGAGGAGTAGATGGTGAGCAGGATAAG
+CATTTAAGCGATCTCGTCGCAAACGCTATAGAACTGGGCGTAAGTTATAATTATAAAGTT
+ACGGACCATTTTGTTTTGCAGCCTGGATTTATATTTGAAAGCGGTCCAGACACTTCAATT
+TATAAGCCTTATTTAAGGGCGCAATATAATTTTGATTCTGGTGTTTATATGGCTGGTCGT
+TACCGTTATGACTATGCAAGGAAGACAGCTAACTATAATGATGATGAGAAAACGAATAGA
+TTTGATACTTATATAGGTTATGTTTTTGATGAGTTGAAATTGGAATATAAATTTACCTGG
+ATGGATAGCGATCAAATTAAATTTGATAACAAAAAAACAAACTATGAACATAATGTGGCT
+TTAGCCTGGAAACTGAATAAGTCATTTACACCATACGTTGAGGTCGGAAATGTAGCGGTG
+AGAAATAATACCGATGAGAGACAGACCCGTTATCGCGTTGGATTACAATACCACTTTGTG
+ATAGCAAAATTCTTCCCGTGGTATAGCGAGATAACACGTCCACAAAAAAATGCTTTATTT
+TCAGCATGGCTGGGTTACGTTTTTGATGGCTTCGACTTTATGCTGATTTTCTACATTATG
+TATCTGATCAAGGCTGACTTAGGATTGACAGATATGGAGGGCGCATTCCTTGCCACAGCG
+GCCTTTATTGGGCGACCATTTGGCGGGGCGCTATTTGGTCTGCTGGCAGACAAATTTGGC
+CGTAAGCCGTTAATGATGTGGTCGATAGTTGCCTATTCTGTAGGTACAGGGTTAAGTGGC
+CTGGCTTCCGGTGTAATTATGCTGACGCTTAGTCGTTTTATTGTCGGTATGGGGATGGCG
+GGGAAGTATGCTTGCGCTTCTACTTATGCCGTGGAAAGTTGGCCAAAGCATTTAAAATCT
+AAAGCGAGCGCATTTCTGGTTTCAGGTTTCGGTATTGGTAACATCATAGCAGCCTATTTT
+ATGCCGTCATTTGCCGAAGCGTATGGTTGGCGTGCTGCTTTTTTTGTCGGTTTGCTACCC
+GTTCTTTTAGTAATCTACATCCGGGCCAGGGCTCCTGAATCTAAAGAGTGGGAAGAAGCC
+AAACTCAGTGGTCTCGGAAAGCATTCACAAAGTGCCTGGTCAGTTTTCTCTTTGTCAATG
+AAAGGGCTATTTAATCGAGCTCAATTTCCACTGACATTATGTGTATTTATTGTTCTGTTC
+TCTATTTTCGGCGCAAACTGGCCGATCTTTGGTCTACTGCCTACATATTTGGCGGGAGAG
+GGCTTTGATACGGGCGTGGTCTCTAATTTAATGACGGCGGCGGCATTCGGCACTGTATTG
+GGAAATATCGTTTGGGGTCTGTGCGCAGATAGAATTGGTTTGAAGAAAACGTTCAGCATT
+GGTCTTCTCATGTCCTTTTTATTCATTTTCCCGTTATTCAGAATTCCGCAAGATAATTAT
+TTACTGCTGGGCGCATGTTTATTCGGTTTAATGGCGACTAACGTAGGTGTTGGCGGGCTG
+GTTCCCAAATTTCTCTACGACTACTTTCCTCTTGAGGTTCGTGGTTTGGGTACCGGGCTT
+ATTTATAATCTTGCTGCGACATCAGGCACATTCAATTCAATGGCGGCGACCTGGCTTGGA
+ATAACAATGGGGCTAGGCGCTGCGCTAACGTTCATTGTTGCTTTCTGGACCGCAACAATT
+CTACTCATTATTGGCCTATCCATTCCGGATAGACTAAAAGCACGTCGTGAAAGGTTTCAG
+TCAACAAAAGAATTTATGACGAAATACGGTGTTATAGGTACAGGTTATTTTGGCGCTGAA
+CTGGCGCGATTTATGTCTAAGGTTGAAGGGGCGAAAATCACTGCGATTTACGATCCGGTA
+AATGCGGCTCCGATAGCGAAAGAGCTGAACTGTGTCGCCACTTCAACGATGGAGGCGCTT
+TGTACCCATCCTGATGTGGATTGCGTAATTATTGCTTCACCAAATTACTTACATAAAGCG
+CCGGTCATTGCGGCGGCTAAAGCGGGTAAACACGTGTTTTGTGAAAAACCTATCGCCTTA
+AATTACCAGGATTGTAAGGATATGGTTGATGCCTGCAAAGAAGCTGGTGTTACCTTTATG
+GCGGGTCACGTTATGAACTTTTTTCACGGGGTTCGCCACGCTAAAGCGCTCATCAAAGCC
+GGTGAAATCGGTGAAGTTACACAAGTTCACACTAAACGTAATGGTTTTGAAGACGTGCAG
+GATGAGATCTCATGGAAGAAGATTCGCGCAAAGTCAGGTGGGCATCTGTACCATCACATT
+CACGAGCTAGATTGTACACTGTTCATCATGGATGAAACCCCATCCCTGGTTTCAATGGCG
+GCGGGGAATGTTGCGCACAAAGGTGAAAAATTTGGTGATGAAGATGATGTTGTCCTAATC
+ACCCTTGAGTTTGAAAGCGGTCGTTTCGCGACACTTCAGTGGGGATCATCGTTCCACTAC
+CCTGAGCACTATGTATTAATTGAGGGCACGACAGGTGCAATTCTCATTGATATGCAAAAC
+ACGGCTGGTTATCTAATAAAAGCGGGCAAAAAAACACACTTTCTTGTGCATGAAAGCCAG
+GCGGAGGATGATGATCGTCGCAACGGTAACATATCCAGCGAGATGGATGGCGCAATCGCT
+TATGGTAAACCCGGTAAACGTACGCCGATGTGGCTCTCATCAATTATGAAACTGGAGATG
+CAGTACTTGCATGATGTGATAAACGGTCTGGAGCCAGGCGAGGAGTTTGCTAAATTGCTA
+ACGGGAGAAGCGGCGACAAATGCCATTGCTACCGCTGATGCTGCGACGCTTTCTTCAAAC
+GAGGGGCGCAAAGTTAAACTCACTGAAATTCTTGGCATGGAGATAATTTTTTATCACCCG
+ACATTTAACGCCGCCTGGTGGGTAAATGCGCTGGAGAAGGCTCTCCCACATGCGCGCGTT
+CGTGAATGGAAGGTCGGTGATAACAACCCCGCAGACTATGCGCTTGTATGGCAGCCCCCG
+GTTGAAATGCTGGCCGGAAGACGCTTAAAAGCCGTCTTTGTGCTGGGCGCGGGGGTGGAT
+GCAATTCTGAGTAAATTAAATGCGCATCCGGAAATGCTGGACGCCTCCATTCCTCTATTC
+CGTCTGGAAGATACCGGAATGGGCCTGCAAATGCAGGAGTATGCCGCCAGCCAGGTATTA
+CACTGGTTCCGTCGTTTCGATGATTATCAGGCGCTGAAAAATCAGGCGCTATGGAAACCG
+TTGCCGGAATATACCCGCGAAGAGTTTAGCGTCGGTATCATAGGCGCAGGGGTACTGGGC
+GCAAAAGTGGCAGAAAGTCTACAGGCGTGGGGGTTCCCGTTACGTTGCTGGAGTCGTAGC
+CGCAAATCCTGGCCTGGCGTGGAAAGTTATGTAGGGCGTGAAGAACTGCGCGCTTTCCTG
+AACCAGACGCGGGTGCTGATTAATCTGCTGCCGAATACGGCCCAAACGGTAGGAATTATT
+AATAGCGAATTGTTGGATCAATTGCCGGATGGCGCTTACGTGCTGAATCTCGCGCGCGGC
+GTTCATGTTCAGGAGGCGGATCTGCTGGCTGCGCTTGATAGCGGTAAGCTAAAAGGCGCG
+ATGTTGGATGTCTTTAGCCAGGAACCGTTACCGCAGGAAAGTCCATTATGGCGCCATCCG
+CGAGTCGCCATGACGCCGCACATTGCGGCAGTCACCCGTCCGGCGGAAGCCATCGATTAT
+ATTAGCCGCACCATTACCCAGCTGGAGAAGGGAGAGCCGGTGACGGGGCAGGTGGATCGG
+GCGAGAGGATAT---ATGATGCGCGCCATGAACATACTTCTTTCTATTGCTATCACTACG
+GGCATCCTTTCTGGAATATGGGGATGGGTGGCCGTCTCCCTGGGGTTACTAAGCTGGGCC
+GGTTTTTTAGGCTGTACGGCTTATTTCGCCTGTCCGCAGGGCGGCTTTAAGGGATTGTTG
+ATTTCCGCCTGTACGCTGTTAAGCGGTATGGTGTGGGCGCTGGTCATTATTCACGGTAGC
+GCGTTGGCGCCGCATCTGGAAATTGTCAGTTACGTGTTGACGGGGATCGTGGCATTCCTG
+ATGTGTATCCAGGCAAAGCAGCTATTGCTTTCTTTTGTTCCGGGAACATTTATCGGCGCC
+TGCGCGACATTTGCAGGGCAGGGTGACTGGCGGTTGGTATTACCGTCGCTGGCGCTGGGG
+CTAATCTTTGGCTATGCCATGAAAAATAGTGGGCTATGGCTGGCATCACGCCGCGAGCAA
+CATTCAGCGAATACGGCGGTCACAAAAATGAAACGCTATCTGACCTGGATTGTAGCAGCA
+GAGTTACTGTTCGCTACCGGAAACCTGCATGCCAATGAAGTTGAAGTCGAGGTTCCCGGA
+TTGTTAACCGACCATACCGTCTCTTCGATAGGACATGAATTCTATCGTGCATTCAGCGAC
+AAATGGGAAAGCGAATACACCGGCAATCTGACCATTAATGAAAGACCCAGTGCGCGTTGG
+GGAAGCTGGATCACCATAACGGTAAATCAGGACGTTATTTTCCAGACCTTTTTATTTCCA
+ATGAAAAGAGACTTCGAGAAAACCGTCGTCTTCGCATTAGCGCAAACAGAGGAAGCATTA
+AATCGCCGACAAATAGATCAAACGCTATTAAGTACGAGTGATTTAGCGCGTGATGAATTC
+ATGTCCGTAATCAAGAAAAATATCCCTGCCATAGGCCTGTGTATCTGCGCTTTTTTTATC
+CATTCTGCGGTAGGGCAACAAACGGTACAGGGCGGCGTTATCCATTTTCGCGGCGCGATT
+GTTGAGCCACTGTGCGATATTTCTACTCACGCCGAAAATATTGATTTAACCTGCCTACGC
+GAAGGTAAAAAGCAAATGCACCGGATAGACCTTCGGCAGGCATCTGGATTACCGCAGGAT
+ATTCAGTCCATTGCGACGGTACGGCTGCATTATCTCGATGCGCAAAAAAGCCTGGCGGTG
+ATGAATATTGAGTACCGTATGACATCACGTCTTCAGGTCATACAGGGTGATATCACTCAA
+CTTAGCGTCGATGCGATTGTGAATGCCGCTAACGCATCATTAATGGGCGGCGGTGGCGTA
+GACGGCGCAATTCATCGCGCGGCGGGGCCGGCATTGCTGGACGCCTGTAAACTCATCCGT
+CAGCAACAGGGCGAATGTCAGACGGGACATGCGGTTATCACGCCTGCTGGCAAGCTTTCG
+GCAAAGGCGGTTATTCACACAGTGGGGCCCGTCTGGCGAGGCGGCGAACACCAGGAAGCT
+GAGCTACTCGAAGAGGCATACCGGAATTGTTTGCTGCTTGCCGAGGCGAATCACTTTCGT
+TCCATCGCTTTTCCGGCAATCAGTACCGGCGTTTATGGCTATCCACGCGCCCAGGCCGCT
+GAAGTCGCCGTCAGGACGGTTTCAGATTTTATTACCCGTTACGCTCTGCCTGAACAGGTA
+TACTTTGTCTGTTATGATGAAGAAACTGCCCGGCTTTACGCAAGATTACTTACTCAGCAA
+GGCGACGACCCTGCCATGAAGAAACTGCCCGGCTTTACGCAAGATTACTTACTCAGCAAG
+GCGACGACCCTGCCTGATAAAACACGCCTGGAGCGTGCCGTTGAACCGCTATGCGCGCGC
+CATCCCGGAGAGTGCGGCATTCTTGCGCTGGATAACAGTCTGGACGCTTTTGCCGCCCGC
+TACCGCCTGACCGAAATGGCGGCGCGGACGCTGGATGTGCAGTATTATATTTGGGAAGAC
+GATATGTCCGGGCGGCTGCTCTTTTCGGTTCTGCTGTCGGCGGCGAAGCGCGGCGTTCAT
+GTTCGTCTGCTGCTGGATGATAACAATACGCCTGGTCTGGATGATACGTTGCGCTTGCTG
+GATAGCCATCCTAATATCGAAGTTCGTCTGTTTAATCCTTTCTCTTTTCGTACGCTACGC
+GCGCTGGGATATTTGACGGATTTTGCGCGGCTGAATCGGCGGATGCACAATAAAAGTTAC
+ACTGCCGACGGCGTAGTGACGCTGGTCGGTGGGCGCAACATCGGCGATGCCTATTTCGGC
+GCTGGCGAGGAGCCGCTATTTTCCGATCTGGACGTGATGGCCATTGGCCCGGTGGTCAAT
+GATGTCGCCAATGATTTTGAACGTTACTGGCGCTGTAGTTCAGTGTCGACATTGCAGCAA
+GTATTATCCCTTTCTGAGCAGGAACTGACGCAGCGTATCGAACTTCCCGAATCCTGGTAT
+AACGATGAGATCACCCGCCGTTATCTGCATAAGCTGGAAACCAGCCAGTTTATGGCGGAT
+CTCGATCGCGGAACGTTGCCGCTGATTTGGGCAAAAACACGCTTGCTTAGCGATGACCCT
+TCTAAAGGCGAGGGGAAGGCGCAGCGCCATTCGCTTCTTCCGCAGCGATTATTTGACGTG
+ATGGGGTCGCCGACGGAGCGTATCGACATTATTTCCGCTTACTTTGTCCCTACGCGCGCA
+GGCGTGGCGCAGTTGCTTAATCTGGTCAGGAAAGGTGTGAAGATCGCCATCTTAACTAAC
+TCTCTGGCGGCCAACGATGTGGCGGTCGTTCACGCAGGGTACGCGCGCTGGCGCAAGAAA
+TTACTGCGCTATGGCGTGGAGCTCTACGAACTGAAACCGACCCGCGAACATGAAACCGCC
+GTACATGATCGCGGACTCACCGGGAACTCAGGTTCCAGCTTACATGCTAAAACGTTCAGT
+ATTGATGGTAGTAAGGTGTTTATCGGGTCGCTTAATTTTGATCCCCGTTCAACGCTTTTA
+AATACCGAAATGGGCTTTGTCATTGAAAGTGAAACGCTGGCGACGCTTATTCATAAGCGT
+TTTACGCAGAGCCAACGCGATGCGGCCTGGCAACTGCGGCTGGATCGCTGGGGACGAATT
+AACTGGATCGATCGTCAGCAAGAAGAGGAAAAGGTGTTAAAGAAAGAACCCGCTACGCGT
+TTCTGGCAGCGAGTTCTGGTACGGTTGGCGGCAATTTTACCTGTGGAATGGTTGCTGATG
+AGCTCTGTACCCGCGCCGCGTGAATATTTTCTTGACTCTATCCGCGCATGGCTGATGTTG
+TTAGGGATTCCCTTTCATATCTCGTTGATCTATTCCACTCACAGTTGGCATGTCAATAGC
+GCCGCGCCATCGTGGTGGCTAACCCTGTTTAACGATTTTATCCACGCTTTTCGTATGCAG
+GTGTTTTTTGTTATTTCTGGTTATTTTTCGTACATGTTATTTTTACGTTATCCATTAAAA
+CACTGGTGGAAAGTACGGGTAGAACGTGTGGGTATTCCCATGCTTACCGCAATCCCTTTG
+CTTACCTTGCCGCAATTTATCCTGTTGCAATATGTCAAAGAGAAAACAGAGAACTGGCCT
+ACACTCTCTGCCTATGAAAAATATAATACGTTAGCGTGGGAACTCATTTCACATCTGTGG
+TTTTTACTGGTGCTGGTGATATTAACCACCGTCAGCATCGGGATTTTTACCTGGTTCCAA
+AAAAGGCAGGAAACAAGCAAGCCTCGTCCCGCCGCTATTTCGCTGGCCAAACTTTCGCTT
+ATTTTTTTCCTGCTGGGGGTGGCGTACGCTGCTATCAGGCGCATTATATTCATCGTATAT
+CCGGCAATCCTCAGTGACGGCATGTTCAATTTTATTGTGATGCAAACGCTATTTTATGTG
+CCGTTTTTTATTCTCGGCGCGTTGGCCTTCATTCACCCCGATCTGAAAGCGCGCTTCACC
+ACGCCCTCACGCGGATGCACTTTAGGCGCTGCCGTTGCTTTTATCGCGTATCTGCTGAAT
+CAACGTTATGGGAGCGGCGACGCCTGGATGTACGAAACCGAATCCGTGATTACGATGGTA
+ATGGGGCTATGGATGGTGAACGTGGTATTTTCACTGGGGCATCGCTTGTTAAACTTTCAG
+TCCGCGCGTGTCACCTATTTCGTGAATGCTTCGCTGTTTATTTATCTGGTGCATCATCCC
+TTAACGCTTTTCTTTGGCGCGTATATTACACCGCATATCTCCTCCAACCTGATCGGGTTC
+TTGTGCGGGCTGATATTTGTTATGGGTATTGCGTTAATTCTGTATGAAATTCATTTACGC
+ATCCCGCTCCTGAAATTTCTCTTTTCAGGTAAACCGCCGGTAAAACAAGAAAGCCGCGCC
+GCGATCGGGATGAAACATAAACGACAAATGATGAAAATGCGTTGGTTGGGCGCAGCTATT
+ATGTTAACGCTCTACGCATCATCGAGCTGGGCGTTCAGTATTGATGACGTGGCAAAACAA
+GCTCAATCTTTAGCCGGGAAAGGCTATGAGGCGCCTAAAAGCAACTTGCCCTCCGTTTTC
+CGCGACATGAAATATGCGGATTATCAGCAGATCCAGTTTAACAGCGATAAAGCCTACTGG
+AACAACTTAAAGACCCCTTTTAAGCTCGAATTTTACCATCAGGGGATGTACTTCGATACG
+CCGGTCAAGATTAACGAAGTGACGGCGACGACGGTCAAAAGAATCAAATACAGCCCGGAT
+TACTTCAATTTTGGCAATGTTCAGCACGATAAAGACACGGTAAAAGATTTAGGCTTCGCC
+GGGTTCAAAGTCCTGTACCCCATTAACAGTAAAGATAAGAACGACGAAATCGTCAGTATG
+CTTGGCGCCAGCTATTTCCGCGTTATCGGCGCAGGCCAGGTGTATGGCTTATCTGCGCGC
+GGCCTGGCGATTGATACCGCCTTACCATCTGGTGAAGAGTTTCCCCGCTTTCGCGAGTTC
+TGGATTGAGCGTCCAAAACCCACCGATAAGCGTTTGACCGTCTATGCATTACTGGATTCT
+CCGCGCGCGACCGGCGCTTACCGTTTTGTGATCATTCCTGGCCGCGATACCGTGGTGGAC
+GTGCAGTCAAAAGTCTATCTGCGCGATAAGGTGGGCAAGCTGGGCGTTGCGCCATTAACC
+AGTATGTTCCTGTTTGGGCCAAACCAGCCGTCGCCGACGACCAACTATCGTCCGGAATTG
+CATGACTCGAACGGCTTATCCATTCATGCGGGTAATGGCGAGTGGATTTGGCGTCCGCTG
+AACAATCCAAAACACCTCGCTGTGAGCAGCTATGCGATGGAAAACCCTCAGGGATTCGGC
+CTGTTGCAGCGTGGTCGCGAGTTCTCGCGCTTTGAAGATTTAGACGATCGCTATGACCTG
+CGTCCAAGCGCCTGGATTACCCCGAAAGGCGACTGGGGCAAAGGTAAGGTTGAACTGGTT
+GAAATTCCGACCAATGATGAAACCAACGATAACATCGTCGCTTACTGGACTCCGGATCAA
+CTGCCGGAACCGGGTAAAGAGATGAACTTCAAGTACACTCTGACCTTCAGCCGCGATGAA
+GATAAACTTCATGCGCCGGATAATGCCTGGGTGCTGCAAACACGCCGCTCAACGGGCGAC
+GTTAAACAGTCGAATCTGATTCGCCAGCCCGACGGCACTATTGCCTTTGTGGTGGATTTC
+GTTGGCGCCGACATGAAAAAACTGCCGCCGGATACGCCCGTCGCTGCACAAACCAGCATT
+GGCGATAACGGTGAAATCGTTGACAGTAATGTACGCTATAACCCAGTCACTAAAGGCTGG
+CGTTTAATGCTGCGCGTGAAAGTCAAAGACGCGAAGAAAACCACGGAAATGCGTGCCGCA
+TTGGTGAATGCCGATCAGACGCTAAGTGAAACCTGGAGCTACCAGTTACCTGCCAATGAA
+ATGAATAAAACAACTGAGTATATTGACGCACTGCTGCTTTCTGAACGTGAGAAAGCGGCA
+TTGCCGAAAACTGACATCCGCGCCGTGCATCAGGCGCTGGATGCCGAGCATCGGACTTAC
+TCGCGAGAAGACGATTCACCGCAGGGTTCCGTAAAAGCCCGCCTTGAACACGCCTGGCCG
+GATTCATTGGCGAAGGGGCAGTTAATTAAAGATGATGAAGGGCGCGATCAGTTGCAGGCT
+ATGCCAAAAGCGACGCGCTCTTCGATGTTTCCTGATCCCTGGCGAACCAACCCGGTTGGC
+CGTTTCTGGGATCGCCTGCGTGGGCGGGATGTAACGCCGCGCTATGTTTCTCGTCTGACA
+AAAGAAGAGCAGGCGAGTGAGCAAAAATGGCGTACCGTCGGCACTATACGCCGCTATATT
+TTGTTAATTTTGACTCTGGCGCAAACCGTCGTCGCGACCTGGTATATGAAGACCATTCTG
+CCCTATCAGGGATGGGCGCTCATCAATCCTATGGATATGGTGGGGCAGGATATTTGGGTC
+TCCTTTATGCAGCTCCTGCCCTACATGCTGCAAACCGGTATCCTGATTTTGTTTGCCGTG
+CTGTTCTGCTGGGTGTCTGCCGGATTCTGGACGGCGCTGATGGGCTTCCTGCAACTGCTT
+ATCGGGCGCGATAAGTACAGTATCTCCGCGTCTACGGTTGGCGATGAGCCCCTCAATCCG
+GAACACCAGACGGCGCTGATCATGCCTATCTGTAATGAAGACGTTAGCCGCGTTTTCGCC
+GGTCTGCGCGCGACCTGGGAGTCCGTTAAAGCCACAGGCAACGCCGCGCATTTTGACGTC
+TATATCCTTAGCGATAGTTATAACCCGGATATCTGCGTGGCGGAGCAAAAGGCGTGGATG
+GAGCTCATCGCGGAAGTGCAGGGCGAAGGCCAAATTTTTTACCGTCGCCGCCGCCGCCGT
+ATGAAACGCAAAAGCGGCAATATTGACGATTTTTGCCGCCGCTGGGGCAATCAGTACAGC
+TATATGGTGGTGCTGGACGCGGACTCAGTGATGAGCGGCGAGTGTCTGAGCGGGCTGGTG
+CGCCTGATGGAAGCGAACCCTAACGCCGGGATTATCCAGTCTTCGCCGAAAGCGTCGGGG
+ATGGATACTCTGTATGCCCGCTGCCAACAGTTCGCGACCCGTGTTTATGGACCGCTGTTT
+ACCGCCGGGCTGCACTTCTGGCAGTTGGGGGAGTCGCACTACTGGGGGCACAATGCCATT
+ATCCGCGTGAAGCCGTTTATCGAGCACTGCGCTCTGGCGCCGCTGCCGGGAGAAGGTTCG
+TTCGCCGGATCGATTCTTTCCCACGACTTTGTGGAGGCGGCGCTAATGCGTCGGGCAGGG
+TGGGGCGTCTGGATTGCCTACGATCTCCCCGGCTCCTATGAAGAGCTGCCGCCAAACCTG
+CTGGATGAGCTTAAACGCGACCGCCGCTGGTGTCACGGCAACCTGATGAACTTTCGTCTG
+TTCCTGGTGAAAGGAATGCACCCGGTGCATCGCGCCGTGTTCCTGACCGGGGTAATGTCA
+TACCTGTCCGCGCCGTTATGGTTTATGTTCCTCGCGCTTTCTACCGCGCTGCAGGTCGTT
+CATGCGTTAACAGAGCCGCAATATTTCCTTCAGCCGCGCCAGCTTTTTCCGGTCTGGCCG
+CAGTGGCGTCCGGAACTGGCAATCGCGCTGTTTGCGTCAACGATGGTGCTGCTGTTCCTG
+CCGAAGCTGCTCAGTATTATGCTGATCTGGTGTAAAGGCACCAAAGAGTATGGCGGTTTC
+TGGCGCGTTACGCTGTCGCTATTGCTGGAAGTGCTGTTCTCCGTGTTGCTGGCGCCGGTG
+CGTATGCTGTTTCATACCGTGTTTGTGGTCAGCGCGTTCCTCGGCTGGGAAGTGGTCTGG
+AACTCACCGCAACGCGACGATGATTCTACGCCGTGGGGAGAAGCCTTTATGCGTCACGGC
+TCTCAACTGCTGCTGGGGCTGGTCTGGGCGGTGGGTATGGCGTGGCTGGATTTACGCTTT
+CTGTTCTGGCTGGCGCCGATTGTCTTTTCGCTGATTCTGTCGCCATTTGTTTCGGTGATC
+TCCAGTCGTTCAACGGTAGGATTACGCACCAAACGCTGGAAGCTGTTCCTGATCCCGGAA
+GAGTATTCGCCGCCTCAGGTGTTGGTCGATACCGATAAATATCTGGAGATGAATCGCCGC
+CGTATTCTGGACGATGGCTTTATGCATGCGGTATTTAACCCGTCGCTTAATGCGCTGGCG
+ACCGCGATGGCCACCGCGCGTCACCGCGCCAGTAAGGTGCTGGAAATAGCCCGCGATCGT
+CATGTGGAGCAGGCGCTAAACGAAACGCCGGAGAAACTGAACCGCGATCGGCGTCTGGTT
+TTGCTCAGCGATCCGGTGACGATGGCGCGTTTACACTATCGGGTCTGGAATGCGCCAGAG
+AGATACTCTTCCTGGGTAAACCATTATCAGTCTCTCGTCCTGAATCCGCAGGCGTTGCAG
+GGACGAACATCGTCAGCGGGAGTGCGTATATTCGCGGTGAGCATAATGGTGATTACCCTG
+AGCGGCTGCGGCAGTATTATCAGCAGAACGATCCCCGGACAAGGACACGGCAACCAGTAT
+TACCCTGGCGTGCAGTGGGATATGCGTGATTCCGCATGGCGCTATATCACTATCCTCGAT
+CTGCCCTTCTCACTGATCTTCGATACACTGCTACTGCCGCTCGATATTCACCACGGGCCT
+TATGAGATGAAAAAAAACCTGCTGGGATTCACCCTCGCATCCTTGTTATTCACGACCGGT
+TCCGCCGTGGCGGCGGAGTATAAAATTGATAAAGAAGGCCAACATGCGTTCGTCAATTTC
+CGCATCCAGCATCTGGGCTACAGCTGGCTATACGGCACCTTTAAAGATTTCGACGGCACG
+TTCACTTTTGACGAAAAAAATCCGTCAGCAGACAAAGTGAATGTGACCATTAACACCAAT
+AGCGTCGACACTAACCATGCCGAACGTGACAAACACCTGCGTAGCGCGGAGTTTCTTAAT
+GTTGCGAAATTCCCGCAGGCAACCTTCACCTCTACCAGCGTGAAAAAAGAGGGCGATGAA
+CTGGATATTACCGGCAATCTGACGCTCAATGGCGTGACTAAACCGGTGACGCTGGAAGCG
+AAGCTGATGGGCCAGGGCGACGATCCGTGGGGCGGTAAGCGCGCGGGCTTTGAGGCCGAA
+GGAAAAATTAAGCTGAAAGATTTCAATATAACTACCGATCTCGGCCCAGCCTCACAAGAG
+GTGGAGCTTATCATCTCAGTAGAAGGCGTTCAGCAGAAGATGTTACTGATGATGGCGCTG
+ATCGTGCGTATTATCTGGCGGCTTTATTCTCCGCCGCCCGTTGCGTTGACCAGCTATTCC
+CGTTTAACGCGCATTGGCGCCGCCGCGGGTCATATCCTTCTGTATCTCCTGCTCTTTGCG
+ATAATCATTAGCGGCTACCTGATTTCCACCGCCGACGGTAAACCGATTAGCGTCTTTGGC
+TGGTTTGAGATTCCGGCCACGCTTACGGACGCGGGCGCGCAGGCTGACATCGCCGGAACA
+CTGCATCTGTGGTTTGCCTGGTCGCTGGTCATTATCTCGCTCTCGCATGGGGTTATGGCG
+CTAAAACACCATTTCATCGATAAAGACGACACACTGAAACGTATGACAGGAATGTCGTCA
+TCTGACTATGGAGCTCAAAAAATGAAATACGACCTTATTATTATCGGCAGCGGTTCGGTT
+GGCGCCGCCGCTGGTTATTACGCCACCCGCGCCGGGCTAAAGGTCCTGATGACCGATGCG
+CATATGCCGCCTTATCAACAGGGCAGCCACCACGGCGATACCCGTCTTATCCGCCACGCT
+TATGGTGAAGGCGAAAAATATGTCCCGCTGGTGCTTCGCGCCCAGACGCTTTGGGATGAG
+CTCTCCACACACAATGAAGAGCCTATTTTTGTCCGCTCCGGCGTCGTCAACCTCGGCCCG
+GCCGATTCCGCTTTCTTAGCCAACGTCGCACGAAGCGCGCAACAGTGGCAATTGAACGTC
+GAGCGCCTGGACGCGACGGCCCTCATGACGCGCTGGCCGGAAATTCGCGTGCCCGATAAT
+TATATCGGGCTGTTTGAAGCTGACTCCGGTTTCCTGCGCAGCGAATTAGCCATTACCACA
+TGGCTTCGTCTGGCCCGAGAGGCAGGCTGCGCACAGCTATTCAACAGCCCGGTAAGCCAT
+ATTCACCATGATGATAACGGTGTGACGATAGAGACGAGTGAAGGCTGCTACCACGCCAGC
+AAAGCGCTGATTAGCGCGGGCACCTGGGTCAAAACGCTGGTACCGGAGCTGCCCGTTCAG
+CCCGTACGTAAAGTTTTTGCCTGGTTTAAGGCGGATGGACGTTACAGCACTAAAAACCGC
+TTTCCGGCCTTTACCGGCGAAATGCCCAACGGCGATCACTATTACGGTTTCCCGGCGGAG
+AACGACGAGTTAAAAATCGGCAAACACAATGGCGGGCAGCGAATACAGGCACCGGAAGAG
+CGCAAGCCCTTTGCCGCCGTTGCCAGCGATGGCGCGGAAGCATTTCCTTTCCTGCGTAAC
+GTACTGCCGGGTATCGGCGGTTGTTTACATGGGGCGGCATGTACCTATGATAATTCGCCG
+GACGAGGATTTTATTATCGATACGCTGCCTGGCCATGAGAATACGCTTGTCATCACTGGA
+CTCAGCGGACATGGTTTTAAATTCGCCCCGGTGTTAGGAGAAATCGCTGCGGATTTTGCG
+TTGGGAAAAACGCCCTCCTTTGATCTGACGCCGTTCCGGCTTTCCCGTTTTAGCCAAATG
+GAAAAGAATAATGAAGTCATTCAGACCCATCCGCTTGTAGGATGGGACATCAGCACCGTC
+GATAGCTATGATGCGCTGATGCTGCGTTTACACTACCAGACCCCAAATCGTCCGGAACCG
+GAAGGGACTGAAGTTGGTCAAACGCTCTGGTTAACGACAGATGTAGCCAGGCAATTTATT
+TCAATATTAGAAGCCGGCATCGCCAAAATAGAATCAGGCGATTACCAGGAAAACGAGTAT
+CGTCACCAT
+>22222_2#22
+ATGCGTATCAGTACCCAGATGATGTACGAACAAAATATGAGCGGCATCACTAATTCTCAG
+GCCGAATGGATGAAGCTGGGCGAGCAGATGTCTACCGGTAAGCGCGTTACCAACCCATCT
+GACGATCCGATCGCCGCGTCGCAGGCGGTAGTACTCTTTCAGGCGCAGGCGCAGAATAGC
+CAGTACGCCCTGGCGCGTACGTTTGCCACCCAAAAAGTGTCGCTGGAAGAAAGCGTACTC
+AGTCAGGTGACGACGGCGATTCAAACCGCGCAGGAAAAAATCGTCTATGCCGGAAACGGC
+ACGTTAAGCGACGATGACCGCGCGTCGCTGGCGACGGATTTACAGGGGATCCGCGATCAG
+CTGATGAACCTGGCAAACAGCACTGACGGCAATGGTCGCTATATCTTTGCCGGGTATAAA
+ACGGAAGCGGCGCCATTCGACCAGGCGACAGGTGGTTATCATGGCGGCGAGAAAAGTGTT
+ACCCAGCAGGTGGATTCCGCACGCACGATGGTAATTGGCCATACGGGAGCGCAAATTTTT
+AATAGCATCACCAGCAATGCGGTGCCGGAACCGGATGGCTCGGACTCCGAAAAGAATCTG
+TTTGTCATGCTCGATACGGCAATTGCCGCGCTCAAGACCCCGGTGGAAGGCAATGACGTG
+GAAAAAGAAAAAGCCGCTGCCGCCATTGATAAAACCAATCGCGGCTTAAAAAATTCGCTT
+AATAACGTCCTGACCGTTCGTGCGGAACTGGGAACGCAACTGAGCGAACTCAGTACGCTG
+GATTCACTGGGAAGCGACCGTGCGCTGGGACAGAAGCTACAGATGAGCAACCTGGTAGAT
+GTGGACTGGAACTCGGTCATTTCCTCCTACGTCATGCAACAGGCGGCATTACAGGCGTCC
+TATAAAACGTTTACCGACATGCAGGGAATGTCGCTTTTCCAGTTGAACCGGATGTCCAGC
+TTGATTAATCACGCCATGAGCGGACTTAACGCCGCGCAGGCCGCGTTAAATACGGTCAGT
+AATAACATCAACAATTATAACGTTGCGGGTTATACCCGGCAGACAACTATTCTGGCGCAG
+GCAAACAGTACGTTAGGGGCTGGCGGCTGGATAGGTAATGGCGTTTACGTTTCAGGCGTA
+CAGCGCGAATATGATGCGTTTATCACTAATCAGCTACGCGGCGCGCAAAACCAGAGCAGC
+GGCTTAACCACGCGCTATGAACAAATGTCGAAAATCGACAACCTGCTGGCCGATAAATCC
+AGCTCACTGTCTGGCTCGCTGCAGAGTTTTTTTACCAGCCTGCAAACGTTAGTCAGTAAT
+GCGGAAGATCCTGCGGCGCGTCAGGCGCTGATTGGTAAAGCGGAAGGGCTGGTAAACCAG
+TTCAAAACCACCGATCAGTATCTGCGCGATCAGGATAAACAGGTCAATATCGCGATTGGC
+TCCAGCGTGGCGCAAATCAACAATTACGCGAAGCAGATAGCTAACCTGAACGATCAAATC
+TCCCGTATGACGGGCGTAGGCGCGGGCGCATCGCCGAACGACCTGCTCGATCAACGTGAT
+CAGTTGGTTAGCGAGCTTAACAAGATCGTTGGCGTCGAGGTGAGTGTACAGGACGGCGGC
+ACCTATAACCTGACGATGGCCAATGGCTATACGCTGGTGCAGGGGTCGACGGCGCGTCAG
+TTGGCGGCGGTTCCCTCCAGCGCCGACCCGACGCGAACGACTGTCGCTTATGTCGATGAG
+GCCGCCGGTAACATCGAAATTCCGGAAAAGTTGCTGAACACCGGTTCGCTCGGCGGGCTA
+CTGACGTTCCGTTCTCAGGATCTGGATCAGACTCGTAATACGCTGGGCCAGTTGGCGTTG
+GCGTTTGCCGATGCGTTTAACGCGCAGCATACCAAAGGTTATGACGCCGACGGCAATAAA
+GGGAAAGACTTCTTTAGCATTGGCTCGCCGGTGGTATATAGCAACAGTAATAATGCCGAT
+AAAACGGTATCGCTAACCGCTAAGGTGGTCGACAGCACGAAGGTTCAGGCGACGGATTAT
+AAGATTGTTTTTGACGGTACAGACTGGCAGGTTACTCGCACTGCGGATAACACCACCTTC
+ACGGCAACAAAAGATGCTGACGGAAAACTGGAGATTGACGGTCTGAAAGTGACGGTAGGG
+ACTGGCGCACAGAAAAACGACAGTTTTCTTCTCAAGCCGGTCAGCAATGCTATCGTCGAC
+ATGAACGTTAAAGTGACAAATGAAGCCGAGATTGCGATGGCGTCTGAGTCAAAACTCGAT
+CCTGATGTGGATACCGGCGACAGCGATAACCGCAATGGTCAGGCATTGCTGGACTTACAA
+AACAGCAATGTAGTGGGCGGCAACAAAACCTTTAACGATGCTTACGCCACGTTGGTCAGC
+GATGTGGGTAACAAAACGTCAACGCTGAAAACCAGCAGCACCACGCAGGCGAATGTGGTT
+AAACAGCTTTATAAACAGCAACAGTCGGTTTCCGGCGTTAACCTCGACGAAGAGTACGGC
+AATTTGCAGCGTTATCAGCAGTATTATCTGGCGAATGCGCAAGTATTGCAGACCGCGAAT
+GCGCTGTTTGATGCGTTATTGAATATTCGCATGATCGGAGACGGTAAATTGCTGGCCAGC
+GCGGCCTGGGATGCGCAATCTCTGAACGAACTGAAAGCGAAAGCGGGCCAGGACCCGGCG
+GCGAATATCCGTCCTGTGGCCCGTCAGGTGGAAGGGATGTTTGTGCAGATGATGCTGAAA
+AGTATGCGCGAGGCTTTACCCAAAGATGGTTTATTCAGCAGCGATCAGACGCGTCTGTAT
+ACCAGCATGTATGACCAGCAGATCGCCCAGCAGATGACCGCCGGTAAGGGATTGGGGCTG
+GCGGATATGATGGTTAAACAGATGACGGGCGGGCAGACGATGCCTGCAGATGATGCGCCG
+CAAGTACCGCTTAAATTCTCCCTGGAGACGGTAAACAGCTATCAAAATCAGGCGCTGACC
+CAACTGGTGCGCAAAGCCATACCGAAAACGCCGGACAGCAGCGATGCGCCGCTCTCCGGC
+GACAGTAAAGACTTTCTGGCCCGGCTTTCGCTCCCGGCGAGGCTGGCCAGCGAACAAAGC
+GGGGTGCCGCATCATCTGATTCTGGCGCAGGCGGCGCTGGAGTCCGGCTGGGGGCAGCGG
+CAAATCCTGCGGGAGAATGGCGAACCCAGCTATAACGTATTTGGCGTGAAAGCGACCGCC
+AGTTGGAAAGGGCCGGTGACGGAAATCACCACCACTGAATACGAAAATGGCGAAGCGAAA
+AAAGTGAAAGCGAAATTCCGCGTCTATAGCTCGTATCTGGAGGCGTTATCGGATTATGTC
+GCGCTGTTAACGCGTAACCCACGCTACGCTGCCGTGACCACTGCCGCCACGGCAGAGCAG
+GGCGCAGTGGCTCTGCAAAACGCCGGATACGCCACTGACCCGAATTACGCGCGTAAATTG
+GCCAGCATGATTCAGCAGTTGAAAGCGATGAGTGAAAAGGTCAGCAAAACCTACAGCGCG
+AATCTCGACAATCTCTTTGTGTTTAAAGCTCTTGCAGGAATCGTTCTGGCACTGGTTGCC
+ACTCTGGCGCACGCCGAGCGTATCCGGGATCTGACCAGTGTCCAGGGAGTACGGGAAAAC
+TCGCTGATCGGCTACGGGCTGGTGGTCGGGCTGGACGGTACGGGCGACCAGACGACCCAG
+ACGCCATTTACCACCCAGACGCTGAATAACATGCTGTCACAACTGGGGATTACGGTCCCC
+ACCGGCACCAATATGCAGTTGAAAAACGTGGCGGCGGTGATGGTGACGGCGTCGTATCCG
+CCTTTTGCGCGACAGGGACAAACGATCGATGTCGTCGTTTCCTCAATGGGGAACGCTAAA
+AGTCTGCGTGGCGGGACGTTATTAATGACGCCGTTAAAAGGGGTGGACAGCCAGGTGTAT
+GCTCTGGCGCAGGGCAATATTCTGGTCGGCGGCGCGGGCGCTTCCGCAGGCGGCAGTAGC
+GTGCAGGTTAACCAGCTTAATGGCGGGCGCATCACTAATGGCGCGATTATCGAACGCGAG
+TTGCCGACTCAGTTCGGCGCTGGCAACACCATTAATCTGCAATTGAACGACGAAGATTTT
+ACGATGGCGCAGCAAATTACCGACGCCATCAACCGCGCCCGCGGTTACGGCAGCGCCACT
+GCGCTTGATGCGCGAACGGTACAGGTACGCGTGCCCAGCGGCAACAGCTCGCAGGTGCGT
+TTTCTGGCGGACATTCAAAATATGGAAGTCAACGTGACGCCGCAGGATGCAAAAGTCGTG
+ATCAACTCGCGTACCGGTTCGGTGGTCATGAATCGGGAAGTCACGCTGGATAGCTGCGCT
+GTGGCGCAGGGCAATTTGTCAGTGACAGTCAATCGCCAACTCAACGTCAACCAGCCGAAT
+ACGCCATTTGGCGGCGGGCAGACCGTGGTGACGCCACAGACTCAGATAGATTTGCGTCAG
+AGCGGCGGATCGCTACAGAGCGTGCGTTCCAGCGCCAATCTGAACAGCGTAGTGCGCGCG
+CTGAATGCGCTTGGCGCGACGCCGATGGATCTGATGTCGATTTTGCAGTCCATGCAGAGC
+GCGGGCTGTCTACGCGCCAAACTGGAAATCATCATGGCCCTGATGGTCGCGACGCTGACA
+GGATGCGCCTGGATACCCGCTAAACCGCTCGTGCAGGGGGCGACCACGGCGCAGCCGATA
+CCTGGCCCGGTACCGGTGGCGAATGGCTCCATATTTCAGTCTGCGCAGCCGATTAATTAT
+GGCTATCAGCCGCTTTTTGAAGATCGTCGACCGCGTAATATCGGCGATACGCTCACGATT
+GTGTTACAGGAAAACGTCAGCGCCAGTAAAAGCTCGTCGGCAAATGCCAGCCGCGACGGC
+AAAACCAGCTTTGGTTTTGATACGGTACCGCGTTATCTGCAGGGATTATTCGGTAATTCC
+CGCGCGGATATGGAGGCCTCCGGCGGCAACTCTTTTAATGGTAAAGGCGGCGCGAATGCC
+AGCAATACCTTTAGCGGCACGCTGACCGTGACCGTCGATCAGGTTCTGGCCAATGGCAAT
+TTACACGTCGTGGGGGAAAAACAGATCGCGATTAATCAGGGAACGGAATTCATCCGCTTC
+TCCGGCGTGGTAAATCCACGCACCATCAGCGGTAGCAACTCTGTTCCCTCGACACAGGTG
+GCGGATGCGCGGATTGAATATGTCGGGAACGGCTATATTAACGAAGCGCAAAATATGGGC
+TGGCTGCAACGTTTCTTCCTTAATTTGTCGCCGATGATGATCAGTTCATTATGGATCGCC
+AAAACCGGTCTGGACGCGCAGCAAACCAATATGGATGTGATTGCCAATAACCTGGCAAAC
+GTCAGCACCAATGGTTTTAAGCGTCAGCGCGCGGTATTTGAAGATCTGTTGTATCAGACC
+ATCCGCCAGCCGGGCGCGCAGTCGTCCGAGCAGACGACGCTGCCTTCCGGGCTGCAAATC
+GGTACCGGCGTGCGTCCGGTCGCCACGGAGCGCCTGCACAGTCAGGGGAACCTGTCGCAG
+ACCAACAACAGTAAAGATGTGGCGATTAAAGGGCAGGGCTTTTTCCAGGTCATGCTGCCG
+GACGGTACGTCTGCCTATACCCGCGACGGCTCTTTCCAGGTGGATCAGAATGGTCAACTG
+GTGACGGCGGGCGGTTTTCAGGTGCAGCCGGCAATCACCATTCCGGCCAACGCGTTAAGC
+ATCACGATTGGCCGCGACGGCGTGGTCAGCGTTACCCAGCAAGGGCAGGCCGCGCCGGTT
+CAGGTCGGGCAGCTTAACCTGACCACCTTTATGAACGACACCGGTCTGGAAAGCATCGGC
+GAGAACCTCTATATCGAAACGCAATCGTCCGGCGCGCCGAACGAAAGCACGCCGGGGCTC
+AACGGCGCGGGGTTGTTGTATCAAGGGTATGTCGAAACGTCGAACGTTAACGTGGCGGAA
+GAGCTGGTGAACATGATTCAGGTTCAACGCGCCTATGAAATTAACAGTAAAGCAGTATCG
+ACGACCGATCAGATGCTGCAGAAACTGACGCAACTCATGGATCACGCAATTTATACCGCC
+ATGGGGGCGGCCAGCCAGACGCTTAACCAGCAGGCGGTAACGGCCAGCAACCTGGCTAAT
+GCCTCAACGCCGGGCTTTCGCGCGCAGCTTAACGCGCTACGCGCGGTGCCCGTTGATGGC
+CTCTCTTTAGCGACGCGCACGTTGGTTACGGCGTCGACGCCGGGGGCGGATATGACCCAG
+GGTCAGTTGGACTACACTTCCCGCCCGCTGGATGTTGCGTTACAGCAGGACGGCTGGCTG
+GTGGTGCAAGCGGCGGATGGCGCTGAAGGATATACCCGTAACGGGAATATCCAGGTGGGC
+CCGACCGGGCAGTTAACCATTCAGGGACATCCGGTTATCGGCGAAGGCGGCCCGATTACC
+GTTCCGGAAGGGTCGGAAATCACCATTGCGGCAGACGGCACGATCTCCGCGCTCAATCCC
+GGCGACCCGCCAAACACGGTGGCGCCCGTTGGGCGGCTGAAGCTGGTCAAAGCGGAAGGC
+AATGAGGTGCAGCGGAGCGATGACGGTTTATTCCGCCTTACCGCCGAGGCACAGGCTGAA
+CGCGGGGCGGTACTGGCCGCCGACCCGTCAATTCGCATTATGTCGGGCGTGCTGGAGGGC
+AGTAACGTCAAGCCGGTTGAAGCCATGACCGACATGATCGCCAACGCACGTCGTTTTGAA
+ATGCAGATGAAGGTTATCACCAGCGTAGATGAGAACGAAGGGCGAGCTAACCAACTGCTG
+TCGATGAGTATGTCTTTTTCTCAAGCGGTTAGCGGCCTGAACGCTGCGGCCACCAACCTT
+GATGTTATCGGTAATAACATCGCCAACTCCGCCACCTATGGCTTTAAGTCCGGTACGGCA
+TCATTTGCCGATATGTTCGCCGGTTCCAAAGTGGGGTTGGGCGTAAAAGTGGCGGGGATT
+ACCCAGGATTTTACCGACGGTACGACAACGAACACCGGGCGCGGGCTGGATGTCGCGATT
+AGCCAGAACGGTTTTTTCCGCCTGGTAGACAGCAACGGTTCCGTGTTCTATAGCCGCAAC
+GGCCAGTTCAAACTGGACGAGAACCGTAACCTGGTCAATATGCAGGGGATGCAGTTGACC
+GGCTATCCGGCCACCGGTACGCCGCCGACCATTCAGCAGGGGGCGAATCCTGCGCCGATC
+ACCATTCCGAACACGCTGATGGCGGCGAAATCGACCACCACCGCGTCAATGCAGATCAAC
+CTGAACTCAACGGACCCTGTACCGTCTAAAACGCCCTTTAGCGTGAGTGATGCGGATTCG
+TATAACAAAAAAGGCACCGTCACCGTTTATGACAGCCAGGGTAATGCCCATGACATGAAC
+GTCTATTTTGTGAAAACCAAAGATAATGAATGGGCTGTGTACACCCATGACAGCAGCGAT
+CCTGCAGCCACTGCGCCAACAACGGCGTCCACTACGCTGAAATTCAATGAAAACGGGATT
+CTGGAGTCTGGCGGTACGGTGAACATCACCACCGGTACGATTAATGGCGCGACAGCGGCC
+ACCTTCTCCCTCAGCTTCCTTAACTCCATGCAGCAGAACACCGGGGCTAATAACATCGTC
+GCCACCAATCAAAACGGCTATAAGCCTGGCGACCTGGTGAGCTACCAGATTAACAATGAT
+GGCACCGTGGTTGGCAACTACTCCAACGAGCAGGAGCAGGTGCTGGGGCAGATTGTGCTG
+GCTAACTTCGCCAACAACGAAGGTCTGGCATCCCAGGGCGATAACGTCTGGGCGGCGACG
+CAGGCCTCCGGGGTAGCGCTGCTGGGGACTGCCGGTTCCGGCAACTTCGGTAAGCTGACG
+AACGGCGCGCTGGAAGCCTCTAACGTGGATTTGAGTAAAGAGCTGGTGAATATGATCGTC
+GCGCAGCGTAACTACCAGTCGAATGCGCAGACCATCAAAACCCAGGACCAGATCCTCAAT
+ACGCTGGTTAACCTGCGCATGTCTATTGCCGTAAATATGAATGACCCGACCAACACGGGC
+GTCAAAACGACGACCGGCAGCGGGTCGATGACCGGAAGCAACGCTGCCGATCTGCAAAGC
+AGTTTCCTGACCTTACTGGTCGCGCAATTGAAGAACCAGGACCCGACTAACCCATTACAA
+AATAATGAGTTAACGACACAGTTGGCGCAAATCAGTACCGTGAGCGGCATTGAAAAACTG
+AATACGACGCTGGGGGCTATTTCCGGGCAAATCGATAATAGTCAGTCCCTACAGGCGACC
+ACGCTGATTGGACATGGCGTTATGGTGCCTGGCACCACAATTCTGGCGGGTAAAGGCGCG
+GAAGAAGGGGCCGTGACGTCCACGACGCCGTTTGGCGTGGAATTGCAACAGCCTGCGGAC
+AAAGTGACGGCAACCATTACCGATAAAGATGGCCGGGTGGTACGGACGCTGGAGATCGGT
+GAGTTGCGAGCCGGGGTACACACCTTTACCTGGGATGGTAAGCAAACGGACGGAACAACG
+GTACCGAATGGTTCTTACAACATTGCGATTACCGCCAGCAATGGCGGGACGCAACTGGTG
+GCGCAGCCGCTGCAATTCGCTCTGGTACAGGGCGTGACGAAGGGCAGTAACGGCAACCTG
+TTGGATCTGGGTACCTACGGCACCACCACACTCGACGAAGTTCGGCAAATAATCATGCAA
+ATACAGAGCTTCTATCACTCAGCTTCACTAAAAACCCAGGAGGCTTTTAAAAGCCTACAA
+AAAACCTTATACAACGGAATGCAGATTCTCTCAGGCCAGGGCAAAGCGCCGGCTAAAGCG
+CCCGACGCTCGCCCGGAAATTATTGTCCTGCGAGAACCTGGCGCGACATGGGGGAATTAT
+CTACAGCATCAGAAGACGTCTAACCACTCGCTGCATAACCTCTATAACTTACAGCGCGAT
+CTTCTTACCGTCGCGGCAACCGTTCTGGGTAAACAAGACCCGGTTCTAACGTCAATGGCA
+AACCAAATGGAGTTAGCCAAAGTTAAAGCGGACCGGCCAGCAACAAAACAAGAAGAAGCT
+GCGGCAAAAGCATTGAAGAAAAATCTTATCGAACTTATTGCAGCACGCACTCAGCAGCAA
+AATGGCTTACCTGCAAAAGAAGCTCATCGCTTTGCGGCAGTAGCGTTTAGAGATGCTCAG
+GTCAAGCAGCTCAATAACCAGCCCTGGCAAACCATAAAAAATACACTCACGCATAACGGG
+CATCACTATACCAACACGCAGCTCCCTGCCGCAGAGATGAAAATCGGCGCAAAAGATATC
+TTTCCCAGTGCTTATGAGGGAAAGGGCGTATGCAGTTGGGATACCAAGAATATTCATCAC
+GCCAATAATTTGTGGATGTCCACGGTGAGTGTGCATGAGGACGGTAAAGATAAAACGCTT
+TTTTGCGGGATACGTCATGGTGTGCTTTCCCCCTATCATGAAAAAGATCCGCTTCTGCGT
+CAGGCCGGCGCTGAAAACAAAGCCAAAGAAGTATTAGCTGCGGCACTTTTTAGTAAACCT
+GAGTTGCTTAACAGAGCCTTAGAGGGCGAAGCGGTAAGCCTGAAACTGGTATCCGTCGGG
+TTACTCACCGCGTCGAATATTTTCGGCAAAGAGGGAACTATGGTCGAGGATCAAATGCGC
+GCATGGCAATCGTTGACCCAGCCGGGAAAAATGATTCATTTAAAAATCCGCAATAAAGAT
+GGCGATCTACAGACGGTAAAAATAAAACCGGACGTCGCCGCATTTAATGTGGGTGTTAAT
+GAGCTGGCGCTCAAGCTCGGCTTTGGCCTTAAAGCATCAGATAGCTATAATGCCGAAGCG
+CTACATCAGTTATTAGGCAATGATTTACGCCCTGAAGCCAGACCAGGTGGCTGGGTTGGC
+GAATGGCTGGCGCAATACCCGGATAATTATGAGGTCGTCAATACATTAGCGCGCCAGATT
+AAGGATATCTGGAAAAATAACCAACATCATAAAGATGGCGGCGAACCCTATAAACTCGCA
+CAACGCCTTGCCATGTTAGCCCATGAAATTGACGCGGTGCCCGCCTGGAATTGTAAAAGC
+GGCAAAGATCGTACAGGGATGATGGATTCAGAAATCAAGCGAGAGCTCATTTCTTTCCAT
+CAGACCCATATGTTAAGTGCGCCTGGTAGTCTTCCGGATAGCGGTGGACAGAAAATTTTC
+CAAAAAGTATTACTGAATAGCGGTAACCTGGAGATTCAGAAACAAAATACGGGCGGGGCG
+GGAAACAAAGTAATGAAAAATTTATCGCCAGAGGTGCTCAATCTTTCCTATCAAAAACGA
+GTTGGGGATGATAATATTTGGCAGTCAGTAAAAGGTATTTCTTCATTAATCACATCTATG
+AAAAAGTATCTTGCTTTCGCCGTTACGCTGCTGGGTATGGGTAAAGTCATCGCCTGTACT
+ACCCTTTTGGTAGGCAATCAGGCTTCGGCTGACGGCTCCTTTATTATCGCGCGCAACGAG
+GATGGCTCGGCAAATAACGCCAAGCATAAGGTTATTCATCCCGTCGCGTTTCATCAACAA
+GGCGAGTATAAAGCACATCGCAACAATTTTAGCTGGCCGCTTCCGGAGACAGCGATGCGC
+TATACGGCGATTCATGACTTTGATACTAACGATAACGCCATGGGTGAAGCCGGTTTCAAT
+TCGGCGGGCGTCGGAATGAGCGCAACGGAAACCATTTACAACGGCAGAGCGGCGCTGGCT
+GCCGATCCTTACGTGACAAAAACGGGAATCACGGAAGACGCCATTGAGTCCGTGATCCTG
+CCAGTGGCGCAATCGGCGCGTCAGGGCGCCAAATTACTGGGAGATATTATTGAACAAAAA
+GGCGCGGGCGAAGGTTTCGGCGTCGCGTTTATTGATAGCAAAGAGATATGGTATCTGGAG
+ACGGGAAGCGGACATCAATGGCTGGCAGTACGACTTCCGGCAGATAGCTATTTCGTTTCC
+GCCAATCAGGGACGTTTACGCCATTACGATCCGAATGATAACGCGAATTATATGGCGTCA
+CCAACGTTAGTAAGCTTTGCGAAAAAGCAGGGATTATATGATCCGGCCCGCGGCGAATTC
+GACTTTCATCAAGCCTATTCGCAGGATAACAAAAACGATACCACCTATAATTATCCGCGC
+GTCTGGACGCTACAACACCAGTTTAATCCGCATCTGGATACGGTCGTTAGCGAAGGGGAA
+ACATTTTCTGTTTTTTTAACGCCAATAACGAAGATCAGCGTGGCGGCAGTAAAAAACGCG
+TTACGCAATCACTATCAGGGAACGTCGCACGACCCTTATGCCAGTCATAATCCACAAGAA
+CCATGGCGACCTATATCCGTTTTTCGTACCCAGGAGTCACATATTTTACAGGTCAGACCG
+AAATTACCGCAGGCTATCGGCAACGTAGAATACATCGCCTATGGAATGCCATCTCTTAGC
+GTCTATCTCCCCTATTACCAGGGGATGCGTCATTATCAACCCGGAGATGATAAAGGAACC
+GATCGGGCGAGCAACGACTCTACCTACTGGACATTCCGCACGCTGCAAACACTGGTTATG
+CAAGACTACAATACGTTTGCGCCAGATGTGCAACATGCCTGGAAAACATTTGAACAGCAA
+ACAGCTAAGCAACAGTATAAGATGGAGCAGAGCTATCTGAGATTATATGCGTCGCATCCG
+AAAGAAGCACAACGCTTACTGCAAAATTTTGAAGATAAAACGATGCAAAATGCGCAGACG
+CTCGCCCGTCGCCTGACCAATAATATTATTACGACAATGACTTACCGCACAGATATGAAA
+TATCACTTTTCAAGTACGCAGCCAATGGTTAAGTTATCAATGACGCTGCGCCTGACAATT
+TCTTTTATCGCCATACTTATCCTCGCCTGTACCGGCATTAGCTGGACGCTCTATAACGCG
+CTGAGCAAAGAATTAACGTATCGGGATGATATGACGCTAATAAATCGGGCGGCGCAAATG
+CAGCAACTGTTACTGGATGGCGCCAGGCCGGAAAATCTGCCGCTCTATTTCAATCGGATG
+GTGGATACGAAGCAGGATATCTTATTGATCCACTCAGCAACAGGCCATAATGTTGCGATT
+AATCATAGCGGCATCCCCGACCAACGCTTTAACGAGATTCCGCTGGCTAAAAACATCACC
+CGCGAAACCTTATTTCGCCAGGCGGTACAAGGCACGGAGCTGACCGCGGTACGAGTAAAC
+GCCAGAAGCGGCGATAACCCGCTGACCCTTACTATTGCCAGGCTGGCGACGGAAAGGCGG
+CAAATGCTGGCGCAATATCGCCGCAACAGTTTGCTGATTAGCCTTATCGCGATCCTCGTC
+TGTTCGGCGCTCAGTCCATTAGTCATCAGAAACGGGCTGCGGGCCATTACGTCGCTCAGC
+CGACTCACCGCGGCGACAGATAGCGGCACACTTCGCCAGCCGCTGGCGGAACAGGCGTTA
+CCCGTCGAGCTCAGGCCGCTTGGGCAAGCGCTAAATACCATGCGCCAGAAGCTTTCCGAC
+GATTTTGAACGCCTGAACCAATTTGCCGACGATCTGGCGCATGAGCTGCGCACGCCGGTT
+AATATTTTACTGGGGAAGAATCAGGTTATGCTGAGTCAGGAACGCAGCGCCGAAGAGTAT
+CAACAAGCCCTTGTCGATAATATTGAAGAGCTGGAGGGACTGTCGCGACTGACAGAAAAT
+ATTCTCTTTCTGGCACGCGCGGAGCACCAGAATATAGCGGTAAAAAAACAGCCTGTTTCG
+CTCAATGCGCTGGTCGAAAATATGCTGGATTATCTTAGCCCCCTTGCCGAAGAGAAGCAC
+ATCTGTTTTATAAATCAATGTCAGGGAACGGTATGGGCTGACGAAATATTATTACAAAGA
+GTGCTCTCAAACCTGCTGACGAATGCCATCCGTTATTCTGATGAAAACGCCGTGATACGT
+ATTGAAAGCGCTTATGATGATAACGTTGCAGAAATTCGGGTCGCTAATCCGGGCAGCCCC
+ACCGCCGATGCGGATAAGCTTTTCCGGCGTTTTTGGCGAGGAGATAATGCCCGCTACACT
+GCCGGTTTCGGCCTGGGGTTATCGTTAGTTAACGCGATTGCCCTATTGCACGGTGGCTCG
+GCATCTTACCGCTATGCCGATGAACATAATATCTTTTCGGTTCGTCTGCCTGATAGCGGT
+GATAGCATGTCATCTTGTTGGAGATTTACGGATTCGCTAACAAGCCTATGGCATACTGCG
+TTGATGAAGATTTTATTGATTGAAGATAACCAGAAAACCATTGAGTGGGTACGTCAGGGA
+CTCACGGAGGCAGGCTATGTGGTTGATTATGCCTGTGATGGACGAGACGGATTACACCTA
+GCCCTTCAGGAACATTATTCATTGATTATTCTTGATATTATGCTGCCGGGGCTTGATGGA
+TGGCAGGTTTTACGCGCGTTGCGCACTGCATATCAGCCCCCTGTTATTTGCCTGACGGCG
+CGCGACTCGGTTGAGGATCGCGTCAAAGGTCTTGAGGCGGGCGCTAATGATTACCTTGTT
+AAGCCTTTTTCCTTCGCCGAACTGCTGGCCCGGGTGAGAGCTCAACTCAGACAGCATGTC
+CCGGTCTTTACCCGACTGACGATCAATGGTCTGGACATGGATGCCACAAAGCAATCGGTG
+TTACGAAATGGCAAACCGATTTCCCTGACCCGCAAAGAATTCCTGCTCCTCTGGTTACTG
+GCGTCCCGGGCAGGGGAAATCGTGCCCCGAACCGCGATCGCCAGCGAAGTTTGGGGAATT
+AACTTTGATAGTGAAACCAACACCGTTGATGTCGCGATTCGTCGGCTGCGCGCCAAAGTA
+GACGATCCATTTGAAAAGAAGCTCATTATGACCGTCCAGGGGATGGGTTATCGATTACAG
+GCGGAAACGTCGCAGAATGGTATGAAACGATATATACTGGCTACCGCGATAGCGTCTCTT
+GTTGCAGCCCCGGCAATGGCGCTGGCCGCTGGCAGCAATATTCTCAGCGTACATATTCTC
+GATCAGCAAACAGGCAAACCAGCGCCCGGCGTGGAGGTGGTACTGGAGCAGAAAAAGGAT
+AACGGATGGACGCAATTAAACACCGGGCATACCGACCAGGATGGACGAATTAAAGCACTG
+TGGCCCGAAAAAGCTGCCGCGCCGGGGGATTATCGCGTTATTTTTAAAACCGGCCAGTAT
+TTTGAAAGTAAAAAACTGGACACGTTTTTCCCGGAGATTCCCGTCGAGTTTCATATCAGC
+AAAACGAATGAGCACTATCATGTGCCGCTGTTATTAAGTCAGTATGGTTATTCAACCTAT
+CGCGGGAGCATGCAAGTAGATGAACAACGTCTGCGTTTTCGCGATGCGATGGCAAGTCTG
+GCGGCAGCGGTCAACATCGTAACCACGGCGGGTCACGCCGGACGCTGCGGTATCACCGCA
+ACAGCGGTTTGCTCAGTCACTGATACGCCGCCCTCCGTGATGGTATGTATTAATGCCAAT
+AGCGCCATGAACCCCGTTTTTCAGGGCAACGGCAGGCTGTGCATTAATGTACTTAACCAT
+GAGCAGGAGCTGATGGCGCGCCACTTTGCCGGTATGACGGGGATGGCGATGGAGGAGCGT
+TTTCACCAGCCATGTTGGCAAAACGGGCCGCTGGGCCAGCCGGTACTTAACGGCGCGCTG
+GCCAGTCTTGAAGGCGAGATCAGCGAGGTACAAACCATTGGCACGCATCTGGTGTATCTG
+GTGGCGATCAAAAATATTATTCTTAGCCAGGAGGGGCATGGCCTGATTTATTTCAAACGC
+CGTTTTCATCCGGTCAGACTTGAGATGGAAGCGCCTGTTATGGGACGCACACCGGATTAC
+AAAGCCGCCTTTGGCTGCGCTCTGGGCGCTAACCCAGCCTTCTACGGCCAGTTTGAGCAG
+AACGCCCGTAACTGGTACACCCGTATTCAGGAGACCGGCCTGTACTTTAACCATGCAATC
+GTCAACCCGCCCATTGACCGCCACAAACCTGCCGACGAAGTGAAAGACGTCTATATCAAG
+CTGGAGAAAGAGACGGACGCCGGGATTATTGTCAGCGGGGCGAAAGTTGTCGCCACTAAC
+TCCGCCCTGACTCACTACAACATGATTGGTTTCGGCTCAGCCCAGGTGATGGGCGAAAAC
+CCGGATTTTGCTCTGATGTTTGTCGCGCCAATGGATGCCGAAGGCGTAAAACTTATTTCG
+CGCGCCTCGTATGAAATGGTCGCGGGCGCGACGGGCTCGCCGTTTGATTATCCCCTCTCC
+AGCCGTTTTGATGAAAACGATGCCATTCTGGTGATGGACAAGGTGCTGATCCCGTGGGAA
+AACGTATTAATTTACCGTGATTTCGATCGTTGTCGTCGCTGGACGATGGAAGGCGGCTTT
+GCCCGTATGTATCCACTGCAAGCCTGTGTTCGTCTGGCGGTAAAACTTGATTTCATTACC
+GCGCTGCTGAAAAAATCGCTCGAATGTACGGGTACCGTAGAGTTCCGGGGCGTGCAGGCC
+GATCTCGGCGAAGTCGTGGCCTGGCGCAATATGTTCTGGGCATTGAGCGATTCTATGTGT
+TCTGAAGCAACCCCGTGGGTAAACGGCGCCTGGCTACCGGACCACGCCGCGCTGCAAACC
+TATCGTGTGATGGCCCCAATGGCCTACGCGAAAATTAAAAATATTATTGAACGTAACGTT
+ACCAGCGGCCTGATTTACCTGCCTTCCAGCGCCCGCGATCTGAATAATCCGCAAATCGAC
+CAGTACCTGGCGAAATACGTACGCGGCTCTAACGGAATGGACCATGTTGAACGTATCAAA
+ATTCTTAAATTGATGTGGGATGCCATCGGCAGCGAGTTTGGCGGTCGCCATGAGCTGTAC
+GAGATTAACTACTCGGGCAGCCAGGATGAAATTCGTCTGCAGTGTCTGCGTCAGGCCCAG
+AGCTCCGGCAATATGGATAAGATGATGGCAATGGTCGATCGCTGCCTCTCCGAATACGAT
+CAGAATGGCTGGACGGTTTCGCATTTGCACAATAACGACGACATCAATCAACTGGATAAG
+CTGCTGAAAATGCATGATTCATTAACCATCGCCTTGCTTCAGGCGCGCGAAGCGGCAATG
+ACCTATTTCCGCCCCATCGTTAAAAGCCACAATCTGACCGACCAGCAATGGCGCATTGTG
+CGAATCCTGGCCGATAGCCCCTCTATGGATTTTCACGAGCTGGCCTTTCGTACCTGTATT
+TTGCGTCCAAGTCTGACCGGAATATTGACGCGCATGGAGCGAGACGGACTGGTGTTGCGA
+CTCAAGCCGGTTAACGATCAGCGTAAGTTATATGTCATGTTGACGGAGCAGGGACAAACG
+TTGTACGCCCGTGCCCGGAGCGAGGTAGAAGAGGCTTATCGAAAAATTGAGGCCGATTTC
+ACGCCCGAAAAAACACAGCAATTGATGCTGCTGCTGGACGATCTTATTGCTCTGGGGCGC
+CAGCATCCTGATAGCGAAGCGGAAGCAATGAAGGGTACTGTTTTCGCCGTTGCGTTAAAC
+CATCGCAGCCAGCTTGATGCCTGGCAAGAGGCTTTCTCTCAGCCTCCCTATAATGCGCCG
+CCTAAAACCGCAGTGTGGTTCATCAAGCCGCGTAATACGGTGATTCGTCACGGCGAACCC
+ATTCCTTATCCGCAGGGAGAAAAGGTACTGAGCGGCGCGACAGTGGCGCTCATTGTGGGG
+AAAACCGCCAGCCGGATACGCCCTGAAGCGGCGGCGGACTATATCGCCGGGTATGCGCTG
+GCTAACGAGGTCAGCCTGCCGGAAGAGAGCTTTTATCGCCCGGCGATTAAAGCGAAATGT
+CGCGATGGCTTTTGCCCGCTGGGTGAAATGGCGCCGCTGAGTGATGTGGATAATCTCACC
+ATTATCACTGAAATCAACGGACGAGAAGCGGACCACTGGAATACTGCCGATTTACAGCGT
+AGCGCCGCACAACTGCTTAGCGCGTTAAGTGAGTTCGCTACACTTAACCCTGGCGATGCG
+ATCTTACTTGGTACGCCGCAGAATCGCGTTGCGCTGCGTCCCGGCGATCGGGTGCGTATT
+CTGGCGAAAGGTTTACCCGCGCTGGAAAATCCGGTTGTCGCAGAAGATGAATTCGCCCGC
+CACCAGACGTTTACGTGGCCGCTGTCAGCGACGGGAACGTTATTTGCGCTGGGGTTGAAC
+TACGCCGATCACGCCAGCGAGCTGGCATTTACGCCGCCGAAAGAGCCGCTGGTATTTATC
+AAAGCGCCAAACACCTTTACCGAACATCACCAAACGTCGGTGCGCCCGAACAACGTCGAA
+TATATGCACTACGAAGCCGAGCTGGTCGTGGTGATTGGCAAAACGGCGCGTAAGGTGAGC
+GAAGCCGAAGCCATGGAGTATGTGGCCGGTTACACCGTCTGTAACGACTACGCGATCCGC
+GACTATCTGGAAAACTACTACCGTCCGAATCTGCGGGTAAAAAGCCGCGACGGCCTGACG
+CCGATAGGCCCGTGGATTGTGGATAAAGAGGCGGTTTCTGATCCGCACAACCTGACGTTA
+CGCACCTTTGTCAACGGTGAGCTGCGGCAGGAAGGGACGACCGCCGATCTGATCTTCAGC
+ATCCCGTTCCTGATTTCTTATCTGAGCGAATTTATGACGTTGCAACCGGGCGACATGATT
+GCCACCGGTACGCCGAAAGGGCTGTCCGATGTGGTGCCGGGGGATGAAGTTGTCGTTGAA
+GTAGAAGGCGTGGGTCGCCTGGTTAACCGAATCGTCAGTGAGGAGAGCGCAAAAATGAAG
+AAAATAAATCATTGGATTAACGGCAAAAACGTTGCAGGTAACGACTACTTCCAGACCACT
+AACCCGGCGACCGGTGATGTGCTGGCGGAAGTAGCCTCCGGCGGTGAAGCAGAAGTGAAC
+CAGGCTGTCGCGGCGGCAAAAGAGGCGTTCCCGAAATGGGCCAACCTGCCGATGAAAGAG
+CGCGCGCGCCTGATGCGCCGCCTTGGCGACCTGATTGACCAGCATGTGCCGGAAATCGCG
+GCGATGGAAACCGCCGACACCGGCCTGCCTATTCACCAGACTAAAACGTGCGTGCTGATC
+CCGCGCGCCTCGCATAACTTCGAATTCTTCGCCGAAGTGTGCCAGCAGATGAACGGCAAG
+ACCTATCCGGTTGACGATAAAATGCTCAATTATACGCTGGTGCAGCCCGTCGGCGTCTGC
+GCGCTGGTGTCGCCGTGGAACGTGCCGTTTATGACCGCGACTTGGAAAGTTGCGCCGTGC
+CTGGCGCTGGGTAACACCGCGGTGCTCAAAATGTCCGAGCTGTCGCCGCTGACTGCCGAC
+AGGCTGGGCGAGCTGGCACTGGAGGCAGGAATTCCGGCAGGCGTGCTGAACGTGGTGCAG
+GGCTACGGCGCGACGGCGGGCGATGCGCTGGTACGCCACCATGACGTGCGTGCGGTGTCG
+TTTACCGGCGGTACCGCCACCGGTCGCAATATCATGAAAAATGCCGGGCTGAAAAAATAC
+TCGATGGAGCTGGGCGGCAAATCGCCGGTGCTGATTTTTGAAGACGCCGACATTGAGCGC
+GCGCTGGACGCCGCGCTGTTCACCATCTTCTCGATCAACGGCGAACGCTGCACCGCTGGG
+TCGCGCATCTTTATCCAGCAGAGCATTTACCCTGAGTTCGTGAAGCGCTTTGCCGAACGC
+GCGAATCGCCTGCGTGTCGGCGATCCGACCGACCCGAACACCCAGGTCGGCGCGCTGATT
+AGCCAACAGCACTGGGAGAAAGTCTCCGGTTATATCCGCCTCGGCATTGAAGAGGGGGCA
+ACGCTGCTGGCGGGCGGTGCGGAAAAACCCACTGACCTGCCTGCGCATCTGAAAGGCGGT
+AACTTCCTGCGCCCAACCGTGCTGGCCGATGTCGACAACCGTATGCGCGTTGCGCAGGAA
+GAGATCTTTGGGCCGGTCGCCTGCCTGCTGCCATTCAAAGACGAAGCGGAAGGGTTACGT
+TTGGCGAACGATGTGGAATACGGTCTGGCCTCTTATATCTGGACCCAGGACGTGAGCAAA
+GTGTTGCGCCTGGCGCGTGGGATTGAAGCCGGCATGGTCTTCGTCAACACCCAGAACGTC
+CGCGACCTGCGCCAGCCGTTCGGCGGCGTGAAAGCCTCCGGTACCGGGCGCGAAGGCGGC
+GAATATAGCTTCGAAGTGTTTGCGGAAATGAAAAACGTCTGCATCTCAATGGGCGACCAT
+CCTATCCCAAAATGGGGAGTTATGGGCAAGTTAGCGTTAGCAGCAAAAATTACCCACGTG
+CCGTCGATGTATCTTTCTGAACTGCCAGGAAAAAATCACGGTTGTCGTCAGGCAGCCATT
+GATGGGCATATTGAAATTGGCAAGCGTTGCCGCGAAATGGGCGTTGACACCATTATCGTA
+TTCGACACCCACTGGCTGGTGAATAGCGCTTACCACATTAATTGTGCCGACCATTTCCAG
+GGCGTCTATACCAGCAACGAATTGCCGCACTTTATTCGCGACATGACCTATGACTATGAC
+GGTAATCCGGCGCTCGGCCATCTGATCGCCGACGAGGCGGTCAAACTGGGCGTGCGCGCC
+AAAGCGCACAACATCCCGAGCCTGAAGCTGGAGTATGGCACGCTGGTGCCGATGCGCTAC
+ATGAACAGCGACAAGCACTTCAAAGTGGTCTCCATCTCGGCGTTCTGCACTGTGCATGAT
+TTTGCCGACAGCCGCAAACTGGGCGAAGCCATTCTCAAGGCGATTGAGAAATATGACGGT
+ACCGTAGCGGTATTCGCCAGTGGTTCTCTGTCGCACCGTTTTATTGACGACCAACGGGCG
+GAAGAGGGGATGAACAGCTACACCCGCGAGTTCGATCATCAAATGGACGAGCGCGTGGTC
+AAGCTGTGGCGCGAAGGCAAATTCAAGGAGTTTTGCACCATGTTGCCGGAGTACGCCGAC
+TACTGCTACGGCGAAGGCAACATGCACGACACGGTCATGCTACTGGGAATGCTGGGGTGG
+GACAAATACGACGGCAAGGTGGAGTTCATCACCGACCTGTTCGCCAGCTCCGGTACCGGC
+CAGGTAAACGCTGTTTTCCCGCTGCCTGCGATGCCGCACTTTATTGCTGAATGTACTGAA
+AATATTCGCGAGCAGGCTGATTTACCAAGCCTGTTCAGCAAGGTAAACGAGGCGCTGGCC
+GCCACCGGGATTTTCCCCATCGGCGGTATCCGCAGTCGCGCCCACTGGCTGGATACCTGG
+CAGATGGCTGACGGTAAGCATGATTACGCGTTTGTGCATATGACGCTGAAAATCGGCGCC
+GGGCGCAGCCTGGAGAGCCGTCAGGAAGTCGGCGAAATGCTGTTTGGGCTGATTAAAGCC
+CACTTCGCCGACCTGATGGAGAACCGCTATCTGGCGCTGTCGTTTGAGATTGCCGAGTTA
+CATCCAACGCTCAATTACAAACAAAACAACGTACACGCGTTATTTAAAATGCTCGATAAA
+CAGACCCATACCCTGATCGCTCAGCGACTTAATCAGGCTGAAAAACAGCGTGAACAGATT
+CGCGCAGTGTCGCTGGATTATCCCAACATCACTATTGAAGATGCCTATGCCGTACAGCGT
+GAATGGGTCAATATCAAGATTGCCGAAGGGCGCACGCTCAAAGGCCACAAAATCGGCCTG
+ACCTCAAAAGCGATGCAGGCCAGCTCGCAAATCAGCGAACCGGATTACGGCGCGCTGCTT
+GACGATATGTTCTTCCATGACGGCGGAGATATCCCCACCGACCGTTTTATCGTCCCGCGT
+ATTGAAGTGGAGCTGGCGTTCGTGCTGGCGAAACCGCTGCGCGGCCCTCACTGCACGCTG
+TTCGACGTCTACAACGCCACGGATTATGTGATTCCGGCGCTGGAACTGATTGACGCCCGC
+AGCCACAACATCGACCCGGAAACCCAGCGCCCGCGCAAAGTGTTCGACACCATTTCCGAC
+AACGCCGCCAACGCCGGGGTGATCCTCGGTGGTCGCCCCATCAAACCAGACGAGCTGGAT
+CTGCGCTGGATCTCCGCGCTGCTCTATCGCAACGGCGTGATCGAAGAAACCGGCGTCGCC
+GCAGGCGTGCTGAATCATCCGGCCAACGGCGTGGCGTGGCTGGCGAACAAGCTTGCCCCC
+TACGATGTCCAGCTTGAAGCCGGGCAGATCATCCTCGGCGGCTCGTTCACCCGCCCGGTG
+CCGGCGCGCAAGGGCGACACCTTCCATGTCGATTACGGCAACATGGGCGCGATCAGTTGC
+CGGTTTGTGATGAAAAATGCTTTCAAAGACGCGTTAAAAGCGGGGCGCCCGCAAATCGGT
+TTGTGGCTGGGGCTTGCCAACAGTTACAGCGCTGAACTGTTAGCGGGCGCCGGCTTCGAC
+TGGCTACTGATTGACGGTGAACACGCGCCAAACAACGTGCAGACGGTGTTGACCCAGTTG
+CAGGCGATTGCGCCTTATCCCAGCCAGCCGGTGGTGCGTCCGTCATGGAACGATCCGGTA
+CAGATTAAGCAACTGCTCGACGTCGGCGCGCAAACGCTGCTGATACCGATGGTGCAGAAT
+GCCGATGAAGCGCGAAACGCCGTGGCGGCTACGCGTTATCCGCCTGCCGGTATTCGCGGC
+GTGGGCAGCGCGCTGGCGCGGGCATCGCGCTGGAATCGCATTCCGGACTATCTCCACCAG
+GCCAACGACGCCATGTGCGTACTGGTGCAGATTGAAACGCGTGAGGCGATGAGCAATCTG
+GCGTCAATTCTCGACGTGGATGGCATTGACGGCGTGTTTATTGGCCCGGCGGATCTCAGC
+GCCGATATGGGCTTTGCCGGCAATCCGCAGCACCCGGAAGTGCAGGCGGCGATTGAGAAC
+GCCATCGTGCAGATACGCGCGGCGGGGAAAGCGCCGGGGATTCTGATGGCCAATGAAGCA
+CTGGCGAAACGTTATCTGGAACTGGGGGCGCTATTTGTCGCCGTCGGCGTTGACACCACG
+CTGCTGGCGCGCGGAGCGGAGGCGCTGGCGGCGCGCTTTGGCGCAGAAAAAAAACTGTCC
+GGTGCGTCCGGCGTCTATATGAGCGACACATCATCTGCACTTCCGGAAAGCCCCGAGTCT
+GTCGGTTCGCACAACGCGCTCAGCACGGGTCAACAAACCGTCATAAATAAACTGTTCCGC
+CGACTGATCGTATTTTTATTCGTGTTGTTTATCTTCTCGTTTTTAGACCGTATCAACATC
+GGTTTTGCCGGGTTGACGATGGGGCAGGATCTGGGGTTAAGCGCCACCATGTTTGGTCTT
+GCCACGACGCTGTTTTACGCCACCTACGTCATTTTCGGCATTCCCAGCAACGTGATGTTG
+AGCATCGTCGGCGCCCGCCGCTGGATTGCGACCATTATGGTGCTATGGGGCATTGCATCT
+ACCGCCACGATGTTCGCGGTGGGACCGAAAAGCCTGTATGTGCTGCGAATGCTGGTGGGC
+ATTACCGAAGCGGGCTTTTTGCCAGGAATATTGCTCTATTTAACCTACTGGTTCCCGGCA
+TTTTTCCGCGCCCGCGCCAACGCATTATTTATGATTGCCATGCCGGCCACTACCGCGTTG
+GGGTCAATTGTCTCCGGCTATATTTTATCGCTGGACGGCATATTCAATCTGCATGGATGG
+CAGTGGTTATTCCTGTTGGAAGGATTTCCGTCAGTTTTGTTAGGCATTATGGTCTGGTTT
+TACCTGGATGATACCCCGGCAAAAGCCAAATGGCTGACGGCAGAGGATAAAAAATGTTTG
+CAGGAGATGATGGATAATGATCGCCTGACGCTGGTTCAGCCTGAGGGGGCCATCAGCCAT
+AACGCCATGCAGCAGCGTAGCCTGTGGCGCGAAGTATTCACGCCAATTGTACTGATGTAT
+ACGCTGGCCTATTTTTGCCTTACCAATACGCTTAGCGCCATTAGTATCTGGACGCCGCAA
+ATCCTGAAAAGTTTTAATGAAGGCAGCAGCAATATCACCATCGGCCTGCTGGCGGCGATC
+CCGCAGATTTGTACTGTTCTGGGCATGATTTACTGGAGCCGCCATTCGGACAAACATCAG
+GAGCGTAAACACCACACTGCGTTACCGTTCCTGTTTGCCGCCGCGGGCTGGCTGCTGGCG
+TCGGCGACCGACCGTAACCTGATCCAGCTCCTGGGGATCGTGATGGCATCCACGGGTTCC
+TTTAGCGCGATGGCGATCTTCTGGACCACGCCGGATCAGTCGATCAGTTTACGCGCCAGG
+GCGATAGGCATTGCGGTCATCAATGCCACCGGCAATATTGGCTCCGCGCTCAGCCCGGTT
+ATGATTGGCTGGCTAAAAGATATCACTGGTAGCTTCAATAGCGGACTCTGGTTTGTCGCT
+TCTCTGTTAGTCGTCGGCGCCGCCATTATCTGGCTCATTCCCATGAAAGCATCGCGTCCG
+CGCGCCACCCCTATGTGCCAACGTGCGATCGCCAATATTGATATCAGCAAAGAGTATGAC
+GAAAGCATGGGCAGTAACGATGTGCATTATCAGTCGTTTGCTCGTATGGCGGATTTCTTT
+GGTCGTGATATGCAGGCGCATCGCCACGACCAGTTTTTTCAAATGCACTTTCTTGATACC
+GGGCAGATTGAGCTACAGCTCGACGATCATCGCTATTCGGTGCAGGCGCCGCTATTTGTG
+CTAACGCCGCCCTCGGTGCCGCATGCTTTTATTACCGAATCGGATAGCGATGGTCATGTT
+CTGACGGTACGCGAAGAGCTGGTTTGGCCGCTGCTGGAAGTGCTTTATCCCGGCACCAGA
+GAGGCCTTCGGCCTGCCGGGAATCTGCCTGTCGCTGGCGGATAAACTCAACGAGCTGGCG
+GCGCTCAAACATTACTGGCAGCTAATTGAGCGGGAGTCCACGGAACAACTGGCTGGCTGC
+GAACATACCTTGGTACTACTGGCGCAGGCGGTATTTACCTTGCTGTTGCGTAATGCGAAG
+CTGGACGATCATGCCGCAACCGGGATGCGCGGTGAACTGAAACTTTTTCAGCGCTTTACC
+CTGTTAATTGACAACCACTTCCATCAGCACTGGACGGTGCCCGATTATGCCTGTGAGTTG
+CATATTACCGAATCTCGTTTGACCGATATTTGCCGACGTTTTGCTAATCGCCCGCCTAAA
+CGCCTGATTTTTGATCGGCAATTACGCGAGGCGAAACGACTGCTGCTTTTTTCCGACAAT
+GCTGTCAACGAGATCGCCTGGCAATTAGGTTTTAAAGATCCGGCTTATTTCGCCCGTTTC
+TTTAATCGCCTTGCTGGCTGTTCTCCTTCGCAGTTTCGCCAACGTGAAGTTCCTTCTTTT
+CTCAACATGATGAAAAAAAGCGTCGCTATGCTGGCGGTTTGTATGCTGGCGCAAAGCCAC
+CTTGCCATTGCTGCCGGTGCTCCTGCGCCTCAAGAGATCAACATTGTTTTACTGGGCACC
+AAAGGCGGGCCTTCTTTGCTCAATACAGCCAGACTACCGCAAGCGACGGCGCTCACTATC
+GGCGATAAGATATGGCTGATAGATGCCGGCTACGGCGCCAGTCTGCAACTGGTGAAAAAT
+GGCATTCCACTGCGCAACATCAATACTATTTTGCTCACCCATCTGCACAGCGACCACATA
+CTGGATTATCCTTCCTTGCTGATGAATGCCTGGGCAAGTGGCCTGAAAGACCATACCATA
+CAGGTTTATGGCCCGCCGGGAACCCAGGCGATGACGAAGGCTAGCTGGAAGGTCTTTGAC
+AGGGATATCACGTTACGCATGGAAGAAGAGGGGAAACCCGATCCGCGCAACCTGGTTAAG
+GCGACCGATATCGGCCAGGGCGTCATCTATAAAGATGAACTGGTCACAATAAGCGCGCTG
+AAAGTGCCTCATTCCCCTTTCCCGGACGGTGAAGCGTTTGCTTACCGTTTTGATACTCAG
+GGTAAGCGAATCGTCTTCTCTGGCGATACGTCCTGGTTTCCTCCGCTTGCAACGTTTGCC
+CAGGGGGCGGATATCCTGGTACATGAGGCGGTACATGTCCCTTCGGTAGCAAAACTGGCT
+AATAGTATTGGCAACGGAAAAACGCTGGCTGAAGCGATTGCGTCGCATCACACCACGATT
+GAAGATGTCGGTAAGATTGCTCGCGAGGCCCACGTGAAAAAACTGGTGTTAAGTCATCTG
+GTGCCTGCGACGGTTGCGGATGACGTCTGGCAACAGGAAGCCATGAAAAATTACCCGGGC
+CCTGTCATTGTCGGTCATGACAATATGACGATAAGCGTACCGATGGCTAACATCACTGTC
+ACCTTTACCATCACCGAATTTTGTTTGCACACCGGCGTGACGGAAGAGGAGCTAAACGAA
+ATCGTCGGACTTGGCGTAATTGAGCCTTACGAAGACGATAACGCCGACTGGCAATTCGAC
+GATCGCGCAGCGAGCGTGGTACAACGCGCGCTACGCTTACGCGAGGAGCTGGCGCTCGAC
+TGGCCAGGGATCGCGGTCGCGTTAACGCTGCTGGAAGAGAATTCACGGCTGCGCGAAGAA
+AACCGGTTACTGCTGCAACGCCTTTCTCGCTTTATCTCGCATCCCATGGAACTTAAGGAT
+TATTACGCCATTATGGGCGTGAAACCGACGGACGATCTCAAGACGATTAAGACCGCCTAT
+CGCCGACTGGCCCGCAAGTACCATCCAGATGTCAGCAAAGAACCCGATGCCGAAGCCCGT
+TTCAAAGAGGTTGCTGAAGCATGGGAAGTGCTGAGTGATGAGCAACGGCGCGCCGAGTAT
+GACCAGTTATGGCAACACCGTAACGATCCACAATTTAATCGCCAGTTCCAGCAACACGAA
+GGCCAGCCGTATAACGCCGAAGATTTTGATGATATTTTCTCGTCTATTTTTGGTCAGCAC
+GGTCGTCATTCGCACCACCGCCACGCCGCACGCGGTCATGATATCGAAATTGAAGTGGCG
+GTATTCCTGGAAGAAACGCTGGAAGAGCACCAGCGTACGATTAGCTATTCCGTCCCCGTT
+TATAACGCGTTCGGCCTGGTGGAGCGGGAAATTCCCAAAACATTGAATGTGAAAATCCCG
+GCTGGCGTCAGCAACGGGCAACGAATCAGACTGAAAGGCCAGGGCACGCCGGGGGAAAAC
+GGCGGACCTAATGGCGATTTATGGCTCGTTATCCATATTGCCCCGCATCCGCTCTTTGAT
+ATCGTCAATCAGGATCTGGAAGTCGTCCTTCCGCTTGCCCCATGGGAGGCGGCGCTCGGC
+GCTAAGGTGTCTGTGCCAACGCTTAAAGAGCGTATTTTGCTGACCATTCCCCCCGGCAGC
+CAGGCAGGTCAGCGGCTGCGTATCAAAGGAAAAGGATTAGCCAGTAAAAAGCACACTGGC
+GATCTCTATGCCATCATCAAAATCGTTATGCCGCCGAAACCTGACGAGAAAACAGCTGCC
+CTGTGGCAACAACTGGCGGACGCGCAGTCGTCCTTTGACCCACGCCAGCAATGGGGGAAA
+GCAATGGCGAAACAACAACGGATGGGCTGGTGGTTTCTTTGCCTTGCATGTGTCGTGGTA
+ATGGTTTGTACCGCGCAACGCATGGCGGGCCTGCACGCCTTGCAGATGCAGGCGACGGCC
+TCTGCTGCGGTGGTCAGCGCTCCCTCCTCGACAGATGACGGCTCGCCGGTCACTCCCTGC
+GAATTAAGCGCCAAGTCGCTGCTGGCGGCGCCTCCAGTACTCTTTGAAGGTGCTATCCTT
+GCGCTTTATCTACTGCTTTCCTTACTGGCGCCTGTCCGGGTCATGCGCCTGCCGTTTTCG
+CCTCCACGGGCTATTTCGCCGCCCACATTACGGGTACATCTACGATTTTGTGTCTTCCGT
+GAAATGATGATTTTATTCAGGCGGATACTGTTCTGCCTGTTATGGCTTTGGCTGCCCGTC
+TCCTGGGCGGCGGAAAGCGGCTGGCTGCGTTCGCCCGATAACGACCATGCCAGCATACGG
+CTACGTGCCGATACGTCCGCTAACAGTGAGACCCGGCTGTTGCTGGATGTCAAACTGGAA
+AACGGCTGGAAAACCTACTGGCGCGCGCCGGGGGAAGGGGGCGTGGCACCCTCTATCGCC
+TGGAAAGGCGACATGCCTGAGGTAAGCTGGTTCTGGCCAACCCCCTCGCGCTTTGATGTG
+GCGAATATCACCACCCAGGGATATCACGACGAGGTGACCTTTCCGATGATCGTGCGCGGT
+ACGCCGCCGGCGACCTTGCGCGGTGTGTTGACGTTATCAACCTGCAGCAATGTTTGTCTG
+TTGACCGATTACCCCTTTTCCGTGACGCCCACTGTGCAGAATGCCGATTTTGCCCATGAC
+TATGCGCGGGCGATGGGTAAAGTTCCGCTCCGCAGTGGGCTAACGGACTCGCTTGACGTT
+GGCTATCGCCCGGGAGAACTGGTGGTCACTGCTACGCGAGCGGCGGGCTGGTCATCGCCC
+GGGCTCTATCTTGACACCATAGATGACGTCGATTTTGCGAAGCCTCGCCTGCGCGTAGAG
+GGCGACAGGTTACAGGCGACGGTGCCGGTGACGGACAGTTGGGGCGAAAAGGCGCCCGAT
+TTGCGCGACAAATCGCTGACCCTCGTGTTAGCCGATGGCGCTATCGCCCAGGAGAGCACG
+CAAACCATTGGCGCTGCGCCAGCGCAAACGCCGGACAATGCGGCGCTACCTTTCTGGCAA
+GTTGTAATGATGGCGCTAATCGGCGGACTGATTCTTAATTTAATGCCCTGCGTACTGCCT
+GTTCTGGGCATGAAACTTGGCTCTATTTTATTGGTAGAGGAAAAAAGCCGCTCTCACATC
+AGGCGACAATTTTTGGCTTCGGTCGCCGGTATCATTGCGTCATTTATGGCGCTGGCGGCG
+TTTATGACCCTCCTTCGCCTGTCAAACCATGCGCTGGCCTGGGGAGTCCAGTTCCAGAAT
+GCATGGTTTATTGGTTTTATGGCGCTGGTGATGTTGTTGTTTAGCGCCAGCCTGTTCGGG
+CTTTTTGAGTTCAGGCTTCCCTCATCTATGACCACGAAACTGGCCACTTACGGCGGTAAC
+GGTATGTCGGGACATTTCTGGCAGGGGGCGTTCGCCACGCTGCTGGCGACGCCTTGTAGC
+GCGCCGTTTCTGGGCACGGCGGTCGCGGTGGCGCTCACGGCGTCGCTGCCGACGCTGTGG
+GGGCTGTTCCTTGCGCTTGGCCTGGGAATGAGCGCGCCGTGGCTACTGGTCGCGATACGA
+CCAGGGCTTGCGCTACGTTTACCGCGCCCCGGGCGTTGGATGAATGTCCTGCGCAGGATC
+CTCGGTCTGATGATGCTGGGGTCGGCTATCTGGCTGGCGACGTTACTCCTGCCGCATTTC
+GGCTTCACTGCGTCAAAGAGCGCGCAAGACACGGTTCAGTGGCAACCGTTGAGTGAACAG
+GCAATCCAGTCGGCGCTGGCGCAGCATAAGCGGGTATTTGTCGATGTCACTGCGGACTGG
+TGTATTACCTGTAAAGTGAATAAATACAACGTCCTGCAAAAAGAGGATGTGCAGGCCGCC
+TTGCAACAGCCGGATGTTGTGGCGCTGCGGGGAGACTGGACGCTGCCGTCCGATGCCATT
+ACAGATTTTCTGAAAACGCGCGGCCAGGTCGCCGTGCCGTTTAATCAGGTATATGGCCCC
+GGTTTGCCGGAAGGGGAGGCACTGCCCACTTTGCTGACCCGCGATGCGGTATTACAAACG
+TTGAAAAAAGCGAAAGGAATAACCCAAATGAAATACATGATTGTTTTACTGCTGGCGCTG
+TTTTCGACGCTGAGCATCGCGCAAGAAACCGCTCCTTTTACGCCGGATCAGGAAAAGCAG
+ATTAAAAATCTGATCCATGCGGCGTTGTTTAACGATCCTGCCAGCCCGCGGATAGGCGCT
+AAACACCCTAAGCTGACGCTGGTGAACTTTACGGATTACAACTGCCCGTACTGCAAACAG
+CTCGATCCGATGCTGGAAAAGATTGTGCAGAAATATCCTGACGTTGCGGTCATTATTAAA
+CCGCTGCCATTCAAAGGAGAGAGTTCCATACTGGCGGCGCGTATTGCGCTGACCACCTGG
+CGCGATCATCCGCAACAGTTCCTCGCGCTACATGAAAAACTTATGCAAAAGCGCGGTTAC
+CATACGGATGACAGTATTAAACAGGCCCAGCAGAAAGCAGGGGCGACGCCAGTGACGCTG
+GATGAAAAAAGCATGGAAACGATACGCACTAATTTGCAGTTGGCAAGACTGGTCGACGTG
+CAAGGAACGCCAGCGACGATCATTGGCGACGAGCTGATTCCGGGCGCAGTGCCCTGGGAT
+ACGCTGGAAGCGGTGGTGAAAGAAAAACTGGCGGCTGCCAATGGCGGGATGGCGGGTAAA
+CTGCGGCGTTGGCTGCGTGAAGCCGCGGTTTTTCTGGCGCTCCTCATCGCGATAATGGTG
+GTCATGGACGTCTGGCGCGCGCCGCAGGCGCCTCCGGCGTTTGCCGCGACACCATTACAT
+ACGCTGACGGGAGAGTCGACAACTCTGGCGACCTTGAGCGAGGAACGCCCCGTACTGCTC
+TATTTTTGGGCCAGCTGGTGCGGGGTATGCCGCTTTACCACGCCTGCGGTCGCTCACCTG
+GCGGCGGAAGGGGAAAACGTCATGACCGTTGCGCTCCGCTCCGGCGGTGATGCTGAGGTT
+GCCCGCTGGCTGGCGCGCAAGGGCGTTGACTTCCCGGTCGTCAATGATGCTAACGGCGCC
+TTATCCGCTGGCTGGGAAATCAGCGTGACGCCAACGCTGGTGGTGGTTTCACAAGGTCGG
+GTTGTGTTCACCACCAGCGGCTGGACCAGCTATTGGGGCATGAAGCTTCGGCTGTGGTGG
+GCAAAAACGTTCATGAAAAAATCATTACTCGCTGTTGCTGTGGCAGGGGCTGTTTTGTTG
+TCATCCGCCGTACAGGCGCAGACAACGCCGGAAGGTTATCAATTACAACAGGTGCTGATG
+ATGAGCCGCCATAATCTGCGGGCGCCGCTGGCGAATAATGGCAACGTACTGGCGCAGTCG
+ACGCCGAACGCCTGGCCGGCGTGGGACGTTCCCGGCGGGCAACTGACGACGAAAGGCGGC
+GTGCTGGAAGTCTATATGGGACACTACACACGTGAATGGCTGGTCGCGCAGGGGCTGATA
+CCGTCGGGAGAATGTCCGGCGCCCGACACGGTATATGCCTATGCGAATAGTTTGCAGCGC
+ACCGTCGCCACCGCGCAATTTTTCATTACCAGCGCTTTCCCCGGCTGTGATATTCCTGTT
+CATCATCAGGAAAAAATGGGCACTATGGACCCTACCTTCAATCCGGTGATTACCGATGAT
+TCCGCCGCGTTCCGGCAACAGGCCGTACAGGCGATGGAAAAGGCGCGTAGTCAGCTACAT
+CTTGATGAGAGTTATAAACTGCTTGAGCAGATAACGCATTATCAGGACTCGCCGTCCTGC
+AAAGAGAAGCATCAGTGTTCGCTAATCGACGCGAAAGATACCTTCAGCGCGAACTATCAG
+CAAGAGCCTGGCGTGCAGGGGCCGCTGAAAGTAGGGAACTCGCTGGTGGATGCGTTTACC
+CTGCAATATTACGAAGGCTTTCCGATGGATCAGGTCGCTTGGGGCGGGATCCACACCGAT
+CGGCAGTGGAAGGTGCTGTCAAAACTGAAAAACGGCTACCAGGACAGCCTGTTTACCTCA
+CCCACGGTGGCGCGCAATGTCGCTGCGCCGCTGGTAAAATATATCGATAAGGTGCTGGTT
+GCCGAGCGCGTTAGCGCGCCGAAGGTTACCGTGCTGGTGGGGCATGATTCCAATATCGCG
+TCGCTGCTGACGGCGCTGGATTTTAAACCCTATCAGCTCCATGACCAGTATGAGAGAACG
+CCGATTGGTGGTCAGCTTGTCTTCCAACGCTGGCATGACGGTAACGCTAACCGGGATTTG
+ATGAAAATCGAGTATGTCTACCAGAGCGCCCGGCAGTTACGTAATGCGGAAGCGTTAACG
+CTCAAATCGCCTGCGCAAAGGGTAACGCTGGAACTGAAAGGATGTCCGGTGGATGCGAAC
+GGCTTCTGTCCGCTGGATAAGTTCGATAACGTCATGAACACTGCTGCAAAAATGCCAACT
+CAAGAAGCAAAAGCGCACCGCGTCGGCGAATGGGCAAGCCTGCGTAATACGTCGCCGGAA
+ATTGCCGAAGCCATTTTTGAAGTCGCTCACTATGACGAGAAACTGGCAGAAAAAATATGG
+GAAGAAGGTAGCGATGAGGTGCTGATCAAAGCCTTTGAGAAAACGGACAAAGACTCGCTC
+TTCTGGGGCGAACAAGTCATCGAACGTAAGAACGTAATGGCAAAGATTCTGGTGCTCTAT
+TATTCCATGTACGGACACATTGAAACCATGGCGCACGCGGTGGCGGAAGGGGCAAAGAAA
+GTCGACGGCGCAGAGGTCATTATAAAGCGTGTGCCAGAAACAATGCCGCCTGAAATCTTC
+GCAAAAGCTGGCGGTAAAACGCAAAACGCACCGGTTGCCACCCCACAGGAGCTGGCGGAT
+TACGATGCCATTATTTTTGGTACGCCAACCCGGTTTGGCAATATGTCAGGCCAGATGCGT
+ACCTTCCTGGACCAAACCGGCGGACTGTGGGCATCCGGCGCGCTATACGGCAAGCTCGGC
+GGCGTGTTCAGTTCTACCGGAACGGGCGGCGGCCAGGAGCAGACCATCACCTCGACCTGG
+ACTACGCTTGCCCATCATGGGATGGTGATTGTCCCGATAGGCTATTCCGCACAGGAACTG
+TTTGACGTCTCCCAGGTTCGCGGCGGTACGCCTTACGGCGCAACGACTATCGCTGGAGGC
+GACGGTTCACGTCAACCAAGCCAGGAGGAACTCTCTATCGCTCGCTATCAGGGGGAATAC
+GTCGCCGGTCTGGCAGTCAAACTCAACGGCATGGCAAACCATCGTGGCGGTTCCGGTAAT
+TTTGCGGAAGACCGCGAAAGAGCATCAGAAGCAGGTCGTAAAAGTGGTCAGCACAGCGGG
+GGCAATTTTAAGAATGACCCGCAGCGTGCATCCGAAGCAGGCAAAAAAGGGGGCAAAAGC
+AGTAACCGTAATCGCATGTCGCAACGCACAGAGAAAAAAATCGGGAAACGTTCGCAGGCC
+ACCGGTGCAAAACGGCAGCTTATCTTAACCGCCGCGCTTGCCGTTTTTTCCCAGTATGGC
+ATTCATGGCGCGCGTCTTGAACAGGTCGCCGAGCGGGCAGGCGTCTCCAAAACCAATCTG
+CTTTATTATTATCCCTCGAAAGAGGCGCTGTATGTCGCGGTAATGCGACAGATTCTGGAT
+GTCTGGTTGGCGCCGCTCAAGGCGTTTCGCGCAGAATTTTCCCCTCTGGAGGCCATCAAA
+GAGTATATCCGTCTCAAGCTGGAGGTTTCGCGTGATTATCCGCAGGCGTCGCGGCTCTTC
+TGCATGGAGATGCTGGCGGGCGCGCCGCTCTTAATGGATGAACTGACCGGCGATCTAAAA
+GCGTTGATAGATGAAAAATCCGCGCTGATTGCCGGATGGGTGCACAGCGGGAAACTCGCG
+CCCGTTTCTCCGCATCATTTGATCTTCATGATTTGGGCCGCCACGCAACATTACGCCGAT
+TTCGCCCCTCAGGTTGAAGCGGTAACCGGCGCGACGCTTCGCGATGAAGCCTTTTTCAAC
+CAAACGGTCGAAAGCGTTCAGCGCATTATTATTGAAGGGATTCGCGTGCGTATGAAACGA
+ATTTTCCTTACCTGCGCGGCGTTGTTGTTCAGCAGTCAGGCGTTGGCCGATGAGTGTGCC
+AGCGCCAGTACGCAGCTGGAAATGAATCGCTGCGCCGCCGCGCAATACCAGGCGGCAGAT
+AAAAAGCTGAACGAAACCTATCAAAGCGCGATTAAGCGTGCGCAACCGCCGCAGCGTGAG
+CTATTGCAAAAAGCGCAGGTGGCATGGATTGCCCTGCGCGACGCCGATTGCGCGCTGATT
+CGCTCAGGTACGGAGGGCGGCAGCGTTCAACCCATGATCGCCAGCCAGTGCCTGACCGAT
+AAAACGAACGAACGCGAAGCGTTTTTAGCCTCGCTGCTGCAATGTGAAGAGGGTGATTTG
+AGCTGCCCACTGCCGCCAGCCGGTATGGGAACCACCACGATGGGGGTTAAGCTGGACGAC
+GCCACGCGCGAACGGATCAAAATGGCCGCGTCGCGTATCGATCGCACGCCGCACTGGTTA
+ATAAAACAGGCAATCTTTAGCTATCTGGACAAGCTGGAAAATAGCGATACGCTACCGGAG
+CTACCTGCGCTGTTTGCCGGCGCGGCAAATGAAAGCGAGGAGCCGGTCGCGCCGCAGGAT
+GAGCCGCATCAGCCCTTTCTGGAGTTTGCCGAACAGATTCTTCCCCAATCCGTCTCTCGC
+GCCGCCATCACCGCCGCCTGGCGCCGCCCGGAAACCGATGCGGTGTCAATGCTAATGGAA
+CAGGCGCGCCTGTCGCCGCCTGTCGCTGAGCAGGCGCATAAACTGGCGTATCAACTGGCG
+GAGAAATTGCGCAATCAAAAATCCGCCAGCGGTCGCGCGGGTATGGTGCAAGGCCTGTTG
+CAGGAGTTTTCCCTCTCTTCGCAAGAAGGCGTAGCGCTGATGTGTCTGGCGGAAGCGCTG
+CTGCGTATTCCCGACAAAGCTACGCGCGATGCGTTAATTCGCGACAAAATCAGTAATGGC
+AACTGGCAGTCGCATATTGGCCGTAGCCCGTCGCTGTTTGTAAACGCCGCCACCTGGGGG
+CTGCTCTTTACCGGCCGACTGGTCTCAACGCATAACGAAGCCAATCTTTCGCGCTCGCTG
+AACCGCATTATCGGCAAGAGCGGCGAACCGTTAATCCGCAAAGGCGTCGACATGGCGATG
+CGTTTAATGGGCGAGCAGTTCGTGACTGGCGAAACCATTGCTCAGGCGCTGGCGAATGCC
+CGAAAACTGGAAGAGAAAGGGTTCCGCTATTCTTACGATATGCTGGGCGAAGCCGCGTTA
+ACCGCCGCCGATGCGCAGGCCTATATGGTCTCTTACCAGCAAGCGATTCATGCCATCGGC
+AAAGCGTCTAACGGTCGCGGTATTTACGAAGGGCCAGGCATCTCGATTAAGCTGTCCGCC
+CTGCATCCACGCTATAGTCGCGCGCAATACGATCGGGTAATGGAGGAGCTTTATCCGCGC
+CTGAAATCCCTGACGCTGCTGGCGCGCCAGTATGATATCGGTCTCAATATCGACGCCGAA
+GAGGCGGATCGTCTGGAGATCTCGCTTGATCTGCTGGAAAAACTCTGCTTCGAACCCGAA
+CTGGCGGGCTGGAACGGCATTGGCTTTGTGATTCAGGCTTACCAGAAACGCTGCCCGCTG
+GTCATTGATTATTTAGTCGATCTGGCCTCCCGTAGCCGCCGTCGGCTGATGATTCGTCTG
+GTGAAAGGCGCCTACTGGGATAGCGAGATCAAACGCGCGCAAATGGAAGGGCTGGAGGGC
+TATCCAGTTTATACCCGCAAAGTGTATACCGATGTCTCTTATCTGGCCTGCGCGAAAAAA
+CTGCTCGCCGTCCCTAATCTGATCTACCCGCAGTTCGCGACCCATAACGCTCACACACTG
+GCGGCGATTTATCATCTGGCCGGGCAAAATTACTATCCGGGTCAGTACGAATTCCAGTGC
+CTGCACGGCATGGGAGAACCGCTGTATGAACAGGTCACCGGTAAAGTGGGGGACGGAAAA
+CTTAACCGTCCCTGCCGTATTTACGCGCCGGTGGGAACACACGAAACCCTGCTGGCCTAT
+CTGGTACGACGCCTGCTGGAAAACGGCGCCAACACCTCTTTTGTCAACCGCATCGCCGAT
+GCCACCCTACCGCTCGATGAACTGGTGGCCGACCCGGTCGAGGCCGTGGAAAAACTGGCG
+CAGCAGGAAGGTCAGGCTGGCATACCGCATCCAAAAATTCCGCTGCCGCGCGATCTGTAC
+GGCGAAGGTCGGATAAACTCCGCCGGACTTGATTTAGCGAATGAACATCGCCTCGCCTCG
+CTTTCTTCTGCCCTGTTAAGCAACGCCATGCAGAAATGGCAGGCCAAACCTGTGCTGGAA
+CAACCGGTGGCCGACGGTGAGATGACGCCGGTTATCAACCCGGCGGAACCGAAAGATATT
+GTTGGCTGGGGACGCGAAGCGACAGAAAGCGAGGTTGAACAGGCGTTGCAAAACGCGGTC
+AATCAGGCGCCGGTTTGGTTTGCGACGCCGCCGCAAGAACGCGCCGCTATTTTGCAGCGG
+GCGGCGGTATTGATGGAAGACCAAATGCAGCAGTTGATTGGCCTGTTGGTGCGTGAAGCG
+GGGAAAACGTTCAGCAACGCCATTGCCGAAGTGCGCGAAGCGGTAGACTTCCTCCATTAT
+TATGCCGGTCAAGTGCGTGACGATTTCGATAACGAAACGCATCGCCCGTTAGGGCCGGTG
+GTCTGTATCAGTCCGTGGAACTTTCCGCTGGCCATTTTCACTGGCCAAATCGCCGCCGCG
+CTGGCGGCAGGTAACAGCGTTCTGGCGAAACCGGCAGAGCAGACATCGCTGATTGCCGCC
+CAGGGCATTGCCATTTTGCTGGAAGCGGGCGTACCGCCGGGCGTCGTGCAACTGTTGCCG
+GGACGGGGAGAAACCGTCGGCGCCCAGCTTACCGCCGATGCGCGTGTACGCGGCGTGATG
+TTTACCGGTTCCACGGAGGTCGCGACGTTGTTGCAGCGCAACATCGCCACGCGTCTTGAC
+GCCCAGGGGCGCCCTATTCCGTTGATTGCGGAAACCGGCGGTATGAACGCTATGATTGTC
+GACTCTTCCGCGCTCACCGAGCAGGTGGTCGTGGATGTGCTGGCTTCCGCCTTCGACAGC
+GCCGGACAACGCTGTTCCGCGCTCCGCGTGCTGTGTTTGCAGGACGATATCGCCGAACAT
+ACGCTGAAAATGTTACGCGGCGCGATGGCGGAGTGTCGGATGGGGAATCCAGGCCGTCTG
+ACGACCGATATCGGGCCGGTGATCGATAGCGAGGCCAAAGCCAACATTGAACGTCATATC
+CAGACGATGCGCGCCAAAGGCCGCCCGGTTTTCCAGGCCGCGCGTGAAAACAGCGATGAC
+GCGCAGGAATGGCAGACCGGTACGTTTGTTATGCCCACGCTTATTGAGCTGGAAAACTTC
+GCAGAACTGGAAAAAGAGGTCTTCGGGCCCGTGCTGCACGTCGTGCGTTATAACCGTAAC
+CAACTGGCGGAGCTTATCGAACAGATTAACGCTTCCGGCTACGGGCTAACGCTGGGCGTA
+CATACCCGTATTGATGAAACCATTGCGCAAGTCACCGGTTCCGCCCATGTCGGCAACCTG
+TACGTTAACCGTAATATGGTGGGCGCGGTCGTCGGCGTCCAGCCGTTTGGCGGCGAAGGC
+CTGTCCGGCACCGGGCCAAAAGCGGGAGGGCCGCTCTATCTCTACCGCCTGCTGGCACAC
+CGCCCGCCCAATGCGCTCAATACGACGCTGACTCGTCAGGATGCGCGTTACCCGGTGGAT
+GCGCAGCTTAAAACCACGCTACTCGCGCCGTTGACCGCTCTGACGCAATGGGCGGCGGAT
+CGCCCGGCGCTACAGACGCTCTGCCGACAATTCGCCGATCTGGCGCAGGCCGGCACGCAG
+CGCCTGCTACCGGGGCCGACCGGCGAGCGTAATACCTGGACGCTGTTGCCGCGTGAACGG
+GTGTTATGCCTGGCTGATGATGAACAGGACGCGTTGACGCAGCTTGCCGCCGTTCTCGCC
+GTCGGCAGTCAGGCGCTATGGTCAGACGACGCCTTCCACCGCGATCTGGCGAAACGTCTC
+CCCGCCGCCGTCGCGGCGCGTGTCCAGTTTGCGAAAGCGGAAACGCTGATGGCGCAGCCG
+TTTGACGCGGTGATTTTCCACGGCGACTCCGACAAGCTGCGAACCGTGTGCGAAGCCGTC
+GCCGCCCGCGAAGGCGCGATAGTGTCGGTACAGGGGTTCGCCCGCGGCGAAAGCAATATG
+CTGCTGGAACGGCTCTATATTGAACGTTCGCTGAGCGTAAACACTGCCGCCGCTGGCGGT
+AATGCCAGCCTGATGACAATTGGCATGGCTATTAGCACACCGATGTTGGTGACATTCTGT
+GTCTATATTTTTGGCATGATATTGATTGGGTTTATCGCCTGGCGCTCAACCAAAAACTTT
+GATGACTATATTCTTGGCGGTCGCAGCCTGGGGCCGTTTGTTACGGCTTTATCAGCCGGC
+GCGTCGGATATGAGCGGCTGGCTGTTAATGGGGCTGCCTGGCGCTATCTTTCTGTCGGGG
+ATCTCTGAAAGCTGGATCGCCATTGGCCTGACGTTAGGCGCATGGATTAACTGGAAGCTG
+GTGGCCGGGCGCCTGCGCGTGCATACCGAATTTAACAATAACGCGCTCACGCTGCCGGAC
+TATTTTACCGGTCGGTTTGAGGATAAGAGCCGAGTCCTGCGTATTATTTCCGCGCTGGTC
+ATTCTGCTGTTTTTCACTATCTATTGCGCATCAGGTATTGTCGCTGGGGCACGACTGTTC
+GAAAGCACCTTCGGTATGAGCTATGAAACCGCACTGTGGGCGGGGGCCGCGGCAACCATT
+ATTTATACCTTTATCGGCGGGTTTCTTGCCGTTAGCTGGACGGATACCGTTCAGGCCAGC
+CTGATGATTTTTGCGTTAATCCTGACGCCGGTGATGGTTATTGTCGGCGTAGGCGGTTTT
+AGCGAGTCGCTGGAAGTGATCAAGCAAAAGAGCATCGAGAATGTCGACATGCTCAAGGGG
+CTGAATTTTGTCGCTATTATTTCTCTGATGGGCTGGGGGCTGGGTTACTTCGGTCAGCCG
+CATATCCTGGCGCGCTTTATGGCGGCGGATTCCCATCACAGTATTGTTCATGCGCGTCGT
+ATCAGTATGACCTGGATGATTCTGTGTCTGGCGGGCGCGGTGGCGGTGGGCTTCTTTGGC
+ATTGCGTACTTTAACAATAACCCCGCGCTGGCCGGGGCGGTGAACCAAAACTCAGAACGC
+GTATTTATTGAACTGGCGCAGATCCTGTTTAACCCGTGGATTGCCGGTGTTCTGCTGTCT
+GCTATCCTGGCGGCGGTGATGTCGACGTTGAGCTGTCAGTTGCTGGTATGCTCCAGCGCG
+ATTACGGAAGATTTATATAAGGCTTTTCTGCGTAAAAGCGCCAGCCAGCAAGAGCTGGTA
+TGGGTAGGGCGAGTGATGGTGCTGGTGGTAGCGCTGATCGCCATTGCGCTGGCGGCGAAT
+CCTGATAACCGTGTGCTGGGGCTGGTGAGCTACGCCTGGGCTGGATTCGGCGCGGCATTT
+GGACCTGTTGTCCTGTTTTCTGTGATGTGGTCGCGTATGACACGTAACGGCGCGCTGGCG
+GGAATGATTATTGGCGCGGTGACGGTTATCGTCTGGAAACAATATGGCTGGCTGGATCTG
+TATGAGATTATCCCTGGCTTCATTTTCGGCAGCCTGGGGATCGTAATCTTTAGCCTGCTT
+GGCAAAGCGCCGACAGCAACGATGCAGGAACGCTTTGCAAAAGCGGACGCGCATTATCAT
+TCCGCGCCGCCGTCGAAGCTACAGGCGGAAATGGTAATGTCCGCACCAGGACACATTGTT
+TACAGTAGTTACAACACCCTGTACGGACATTCTCTCTCCGGTGGTGGTCTTGTCATCTTA
+AAAGCTCTCATCATTTCCCTTACTGTCCATACCCATGACGCCATATGTGGTGCGCGTAGC
+CGTGTGTGGCGTCGTTTCAAAAAGCAAGCTAAGGCTTACAAGGAAGCCAACCCTCAGATG
+TGTGTGCGCATAATCGCGTTCAAGAGAACGCGGGTGATGTATACCTACAACTCAAGGTGC
+TATCCATGGGAAGACAAAAAGCAGATGGGAAGACAAAAAGCAGTGATCAAAGCTCGTCGT
+GAAGCAAAGCGTGTGTTGAGACGAGATTCGCGTAGTCATAAGCAACGTGAAGAAGAATCG
+GTCACGTCACTGGTACAGATGGGCGGAGTAGAAGCCATTGGCATGGCGCGCGATAGTCGC
+GATACCTCTCCTGTTAAGGCGCGAAATGAAGCACAGGCGCATTATCTGAACGCTATCGAC
+AGTAAACAGCTTATTTTTGCGACCGGCGAAGCCGGCTGCGGAAAAACATGGATCAGTGCG
+GCAAAGGCGGCAGAAGCATTGATTCATAAGGACGTCGAGAGGATCATTGTGACGCGTCCG
+GTATTGCAGGCTGATGAAGATCTTGGTTTTTTGCCCGGTGATATCGCTGAAAAATTCGCG
+CCTTATTTTCGTCCCGTCTACGATGTCCTGCTTAAACGGTTGGGCGCGTCCTTTATGCAA
+TATTGTTTGCGCCCGGAAATCGGTAAGGTAGAAATTGCCCCGTTCGCCTATATGCGTGGG
+CGTACTTTTGAAAATGCGGTCGTGATCCTCGACGAGGCGCAAAATGTGACTGCGGCGCAA
+ATGAAAATGTTTTTGACGCGATTAGGCGAAAATGTCACGGTCATTGTCAATGGCGATATT
+ACGCAATGCGACCTGCCGCGCGGTGTGCGTTCCGGGTTGAGTGATGCGTTGGAACGCTTT
+GAAGAAGATGAAATGGTGGGGATTGTGCATTTCAACAAAGACGACTGCGTGCGCTCGGCG
+CTTTGTCAGCGAACGCTCCACGCATACAGCATGGAGCCTCAACCCCCACGTCTGAAACCC
+GGAAAAATCCTTGACACTCTGGGTGCTATGCAAAAAAGCCTGACACGTGCCTCGCAGCGT
+ATTGCGCAATATATTTTAGCCTTCCCCAGACAGGTGACACAGTCATCTATTGCGGATTTG
+TCGCGCGACACACAGGCCGGAGAAGCCACGGTTATTCGCTTTTGTCGCACCCTGGGCTAT
+AAAGGTTTTCAGGATTTTAAAATGGACCTGGCCATTGAACTTGCCACTACCGAGTCTGAT
+GACAGTAGTCCTCTACTGGATGCCGAAGTTAGCGAATCCGACGATGCCCACGCGATTGGT
+TTAAAATTGCAGAACACCATTAGTAATGTATTATCTGAAACGCTAAATCTGCTGGATATG
+CAACAGGTTCTCGGTGTCGTGGACGCCCTACGTCACTGTCACTCAGTTTATATGTTTGGT
+GTGGGCTCATCGGGGATCACGGCGCTGGATATGAAACACAAGCTAATGCGTATGGGTTTA
+CGGGGCGATGCGGTAAGCAATAACCATTTTATGTACATGCAGGCTACGCTATTGAAAGCA
+GGCGATGTCGCGATGGGTGTCAGTCACTCGGGCACATCGCCAGAAACAGTGCATTCACTC
+CGATTGGCCCGACAGGCTGGCGCCACCACAGTCGCCATTACCCATAATCTGGGTTCTCCA
+TTATGTGAAGAGGCCGATTTTTGCCTGATCAATGGTAATCGGCAAGGAATGTTGCAGGGT
+GACTCGATCGGTACGAAAGCCGCGCAGCTTTTCGTCTTTGACCTGCTCTATACCCTTCTT
+GTACAGTCCTCGCCGGAACAGGCCCGAGAAAGCAAATTACGGACAATGAATGCCCTGGAC
+ATGACAAAAGTGATATGTCTGAAAGTCCAGGGCGGCATTGGTGAAATTTTTACGGTGACG
+CAGCAGGCGGATAAATTCTTGCCGGCTACGCAGTTCCACTGGAGCTGGACGGAAAGCACA
+GTACCTGTATTGATGATTGGGTTTCTGTTTGCCAATATTCAGCAATTTACTGCCAGTCAG
+GATGTGGTCCAACGCTATATGGTGACTGACTCCATAGAGGAAACGAAGAAAACATTACTT
+ACAAATGCCAAACTGGTTGCGGTGATCCCTGTTTTCTTTTTTGCTATCGGCTCGGCATTA
+TTTGTCTACTATCAGCAACAGCCACAATTATTACCGGCGGGATTCAACACTGGCGGCATT
+TTGCCCTTATTCGTGGTCACGGAAATGCCAGTCGGCATTGCAGGGTTGATAATCTCCGCT
+ATTTTCGCTGCCGCGCAGTCGAGCATCTCCAGCAGCTTAAACAGCATTTCCAGTTGTTTT
+AATTCCGATATCTATCAGCGGTTGAGTCATAAAAAAGGAACGCCAGAAAACCGTATGAAA
+ATAGCTAAGTTAGTTATTCTGGTCGCGGGCCTGATAAGTAGCGCGGCCTCGGTATGGCTG
+GTCATGGCCGATGAATCAGAGATCTGGGATGCATTTAATAGTCTGATAGGTCTGATGGGA
+GGGCCAATGACCGGTCTGTTGATGCTGGGCATTTTCTTTAAACGAGCAAATGCCGGGAGT
+GCGGTTTTAGGAATTATTATGAGCGTCATTACCGTGCTGGGCACACGCTATGCCACTGAC
+CTTAACTTCTTCTTTTATGGGGTCATTGGCTCGCTAAGCGTGGTGATCAGCGGCGTTATT
+TTCGCCCCGTTATTTGCCCCGGCACCGCCATTGACGCTGGATGAAAAACCTGAACCAAAG
+GTGACATTAATGTCACTATTAGCCAGGCTGGAACAAAGTGTACACGAAAACGGTGGGCTG
+ATTGTCTCATGCCAACCGGTACCAGGCAGCCCTATGGATAAACCTGAAATTGTGGCTGCA
+ATGGCACAGGCAGCGGCTTCGGCGGGTGCGGTCGCTGTGCGCATTGAAGGCATTGAGAAT
+CTGCGGACTGTTCGTCCCCATCTTTCTGTTCCTATTATTGGGATAATTAAACGTGACCTT
+ACAGGGTCGCCAGTCCGTATCACTCCATATTTACAGGATGTTGACGCCCTGGCGCAGGCA
+GGTGCCGATATTATCGCTTTTGATGCCTCATTCCGCTCTCGCCCGGTTGATATTGATAGT
+TTACTGACACGTATTCGCCTGCATGGATTACTGGCGATGGCAGACTGTTCAACCGTGAAT
+GAAGGCATAAGTTGCCATCAGAAAGGAATCGAATTCATTGGTACAACACTGTCTGGCTAT
+ACCGGTCCCATCACGCCGGTTGAGCCAGATTTGGCAATGGTGACACAACTGAGTCATGCA
+GGTTGTCGTGTTATTGCCGAGGGGCGCTATAACACGCCTGCACTGGCGGCCAATGCTATT
+GAGCATGGTGCCTGGGCAGTTACCGTTGGTTCCGCTATCACCCGTATCGAGCATATCTGT
+CAGTGGTTCAGTCACGCAGTAAAACGCATGAAAAATTTTAAGAAAATGATGACGCTAATG
+GCGCTATGTTTATCAGTTGCTATCACCACATCAGGATATGCAACCACGCTTCCTGATATA
+CCAGAACCACTGAAAAATGGTACTGGCGCTATTGATAATAATGGCGTGATTTATGTCGGC
+TTAGGTACCGCAGGGACATCCTGGTATAAAATTGATCTTAAAAAGCAACATAAAGACTGG
+GAGCGTATAAAGTCGTTTCCTGGTGGAGCTCGTGAGCAATCCGTGTCGGTATTTTTAAAT
+GATAAGCTGTATGTTTTTGGTGGCGTAGGGAAAAAAAACAGTGAATCACCGTTGCAGGTT
+TATAGCGATGTGTACAAATACTCACCGGTGAAAAATACATGGCAAAAAGTTGATACTATA
+TCTCCAGTTGGATTAACAGGGCATACGGGAGTAAAATTAAACGAAACGATGGTACTTATT
+ACCGGAGGGGTTAATGAGCATATCTTTGATAAGTATTTTATTGATATAGCGGCTGCGGAT
+GAAAGTGAAAAAAATAAAGTCATCTATAATTATTTTAATAAACCTGCCAAAGATTATTTT
+TTTAATAAAATCGTATTTATCTACAATGCTAAAGAGAACACATGGAAGAATGCCGGTGAG
+CTGCCAGGCGCGGGGACGGCAGGATCGTCATCGGTAATGGAAAATAATTTCTTGATGCTG
+ATTAATGGTGAGCTCAAACCGGGTTTACGTACCGATGTGATTTACCGCGCCATGTGGGAT
+AACGATAAGCTAACATGGTTGAAGAACAGCCAGTTACCGCCATCGCCTGGAGAACAACAG
+CAGGAAGGGTTGGCCGGAGCATTTTCGGGCTATAGCCACGGTGTCCTGCTTGTCGGTGGT
+GGCGCGAATTTTCCGGGAGCAAAACAAAATTATACTAATGGAAAGTTTTATTCCCACGAA
+GGGATAAATAAAAAATGGCGAGATGAAGTCTATGGTTTGATTAATGGCCATTGGCAATAT
+ATGGGTAAAATGAAACAACCTCTCGGCTATGGTGTATCAGTAAGTTATGGTGATGAAGTT
+TTCCTTATTGGTGGTGAAAATGCTAAAGGGAAACCTGTTTCGTCTGTAACCTCCTTTACC
+ATGCGTGATGGTAATTTATTAATAAAAATGAAAATCAACAGATATCTTCTGGGTATGGTT
+TCGTTTATAGCATTTTCATCATATCTACAAGCGGCAACCCTTGATTATCGGCATGAATAT
+GCTGATAGAACCAGAATTAATAAAGACCGTATTGCTATAATTGAAAAGCTTCCTAACGGC
+ATTGGTTTTTATGTCGATGCCAGCGTTAAATCGGGAGGAGTAGATGGTGAGCAGGATAAG
+CATTTAAGCGATCTCGTCGCAAACGCTATAGAACTGGGCGTAAGTTATAATTATAAAGTT
+ACGGACCATTTTGTTTTGCAGCCTGGATTTATATTTGAAAGCGGTCCAGACACTTCAATT
+TATAAGCCTTATTTAAGGGCGCAATATAATTTTGATTCTGGTGTTTATATGGCTGGTCGT
+TACCGTTATGACTATGCAAGGAAGACAGCTAACTATAATGATGATGAGAAAACGAATAGA
+TTTGATACTTATATAGGTTATGTTTTTGATGAGTTGAAATTGGAATATAAATTTACCTGG
+ATGGATAGCGATCAAATTAAATTTGATAACAAAAAAACAAACTATGAACATAATGTGGCT
+TTAGCCTGGAAACTGAATAAGTCATTTACACCATACGTTGAGGTCGGAAATGTAGCGGTG
+AGAAATAATACCGATGAGAGACAGACCCGTTATCGCGTTGGATTACAATACCACTTTGTG
+ATAGCAAAATTCTTCCCGTGGTATAGCGAGATAACACGTCCACAAAAAAATGCTTTATTT
+TCAGCATGGCTGGGTTACGTTTTTGATGGCTTCGACTTTATGCTGATTTTCTACATTATG
+TATCTGATCAAGGCTGACTTAGGATTGACAGATATGGAGGGCGCATTCCTTGCCACAGCG
+GCCTTTATTGGGCGACCATTTGGCGGGGCGCTATTTGGTCTGCTGGCAGACAAATTTGGC
+CGTAAGCCGTTAATGATGTGGTCGATAGTTGCCTATTCTGTAGGTACAGGGTTAAGTGGC
+CTGGCTTCCGGTGTAATTATGCTGACGCTTAGTCGTTTTATTGTCGGTATGGGGATGGCG
+GGGAAGTATGCTTGCGCTTCTACTTATGCCGTGGAAAGTTGGCCAAAGCATTTAAAATCT
+AAAGCGAGCGCATTTCTGGTTTCAGGTTTCGGTATTGGTAACATCATAGCAGCCTATTTT
+ATGCCGTCATTTGCCGAAGCGTATGGTTGGCGTGCTGCTTTTTTTGTCGGTTTGCTACCC
+GTTCTTTTAGTAATCTACATCCGGGCCAGGGCTCCTGAATCTAAAGAGTGGGAAGAAGCC
+AAACTCAGTGGTCTCGGAAAGCATTCACAAAGTGCCTGGTCAGTTTTCTCTTTGTCAATG
+AAAGGGCTATTTAATCGAGCTCAATTTCCACTGACATTATGTGTATTTATTGTTCTGTTC
+TCTATTTTCGGCGCAAACTGGCCGATCTTTGGTCTACTGCCTACATATTTGGCGGGAGAG
+GGCTTTGATACGGGCGTGGTCTCTAATTTAATGACGGCGGCGGCATTCGGCACTGTATTG
+GGAAATATCGTTTGGGGTCTGTGCGCAGATAGAATTGGTTTGAAGAAAACGTTCAGCATT
+GGTCTTCTCATGTCCTTTTTATTCATTTTCCCGTTATTCAGAATTCCGCAAGATAATTAT
+TTACTGCTGGGCGCATGTTTATTCGGTTTAATGGCGACTAACGTAGGTGTTGGCGGGCTG
+GTTCCCAAATTTCTCTACGACTACTTTCCTCTTGAGGTTCGTGGTTTGGGTACCGGGCTT
+ATTTATAATCTTGCTGCGACATCAGGCACATTCAATTCAATGGCGGCGACCTGGCTTGGA
+ATAACAATGGGGCTAGGCGCTGCGCTAACGTTCATTGTTGCTTTCTGGACCGCAACAATT
+CTACTCATTATTGGCCTATCCATTCCGGATAGACTAAAAGCACGTCGTGAAAGGTTTCAG
+TCAACAAAAGAATTTATGACGAAATACGGTGTTATAGGTACAGGTTATTTTGGCGCTGAA
+CTGGCGCGATTTATGTCTAAGGTTGAAGGGGCGAAAATCACTGCGATTTACGATCCGGTA
+AATGCGGCTCCGATAGCGAAAGAGCTGAACTGTGTCGCCACTTCAACGATGGAGGCGCTT
+TGTACCCATCCTGATGTGGATTGCGTAATTATTGCTTCACCAAATTACTTACATAAAGCG
+CCGGTCATTGCGGCGGCTAAAGCGGGTAAACACGTGTTTTGTGAAAAACCTATCGCCTTA
+AATTACCAGGATTGTAAGGATATGGTTGATGCCTGCAAAGAAGCTGGTGTTACCTTTATG
+GCGGGTCACGTTATGAACTTTTTTCACGGGGTTCGCCACGCTAAAGCGCTCATCAAAGCC
+GGTGAAATCGGTGAAGTTACACAAGTTCACACTAAACGTAATGGTTTTGAAGACGTGCAG
+GATGAGATCTCATGGAAGAAGATTCGCGCAAAGTCAGGTGGGCATCTGTACCATCACATT
+CACGAGCTAGATTGTACACTGTTCATCATGGATGAAACCCCATCCCTGGTTTCAATGGCG
+GCGGGGAATGTTGCGCACAAAGGTGAAAAATTTGGTGATGAAGATGATGTTGTCCTAATC
+ACCCTTGAGTTTGAAAGCGGTCGTTTCGCGACACTTCAGTGGGGATCATCGTTCCACTAC
+CCTGAGCACTATGTATTAATTGAGGGCACGACAGGTGCAATTCTCATTGATATGCAAAAC
+ACGGCTGGTTATCTAATAAAAGCGGGCAAAAAAACACACTTTCTTGTGCATGAAAGCCAG
+GCGGAGGATGATGATCGTCGCAACGGTAACATATCCAGCGAGATGGATGGCGCAATCGCT
+TATGGTAAACCCGGTAAACGTACGCCGATGTGGCTCTCATCAATTATGAAACTGGAGATG
+CAGTACTTGCATGATGTGATAAACGGTCTGGAGCCAGGCGAGGAGTTTGCTAAATTGCTA
+ACGGGAGAAGCGGCGACAAATGCCATTGCTACCGCTGATGCTGCGACGCTTTCTTCAAAC
+GAGGGGCGCAAAGTTAAACTCACTGAAATTCTTGGCATGGAGATAATTTTTTATCACCCG
+ACATTTAACGCCGCCTGGTGGGTAAATGCGCTGGAGAAGGCTCTCCCACATGCGCGCGTT
+CGTGAATGGAAGGTCGGTGATAACAACCCCGCAGACTATGCGCTTGTATGGCAGCCCCCG
+GTTGAAATGCTGGCCGGAAGACGCTTAAAAGCCGTCTTTGTGCTGGGCGCGGGGGTGGAT
+GCAATTCTGAGTAAATTAAATGCGCATCCGGAAATGCTGGACGCCTCCATTCCTCTATTC
+CGTCTGGAAGATACCGGAATGGGCCTGCAAATGCAGGAGTATGCCGCCAGCCAGGTATTA
+CACTGGTTCCGTCGTTTCGATGATTATCAGGCGCTGAAAAATCAGGCGCTATGGAAACCG
+TTGCCGGAATATACCCGCGAAGAGTTTAGCGTCGGTATCATAGGCGCAGGGGTACTGGGC
+GCAAAAGTGGCAGAAAGTCTACAGGCGTGGGGGTTCCCGTTACGTTGCTGGAGTCGTAGC
+CGCAAATCCTGGCCTGGCGTGGAAAGTTATGTAGGGCGTGAAGAACTGCGCGCTTTCCTG
+AACCAGACGCGGGTGCTGATTAATCTGCTGCCGAATACGGCCCAAACGGTAGGAATTATT
+AATAGCGAATTGTTGGATCAATTGCCGGATGGCGCTTACGTGCTGAATCTCGCGCGCGGC
+GTTCATGTTCAGGAGGCGGATCTGCTGGCTGCGCTTGATAGCGGTAAGCTAAAAGGCGCG
+ATGTTGGATGTCTTTAGCCAGGAACCGTTACCGCAGGAAAGTCCATTATGGCGCCATCCG
+CGAGTCGCCATGACGCCGCACATTGCGGCAGTCACCCGTCCGGCGGAAGCCATCGATTAT
+ATTAGCCGCACCATTACCCAGCTGGAGAAGGGAGAGCCGGTGACGGGGCAGGTGGATCGG
+GCGAGAGGATATTGGATGATGCGCGCGATGAACATACTTCTTTCTATTGCTATCACTACG
+GGCATCCTTTCTGGAATATGGGGATGGGTGGCCGTCTCCCTGGGGTTACTAAGCTGGGCC
+GGTTTTTTAGGCTGTACGGCTTATTTGGCCTGTCCGCAGGGCGGCTTTAAGGGATTGTTG
+ATTTCCGCCTGTACGCTGTTAAGCGGGATGGTGTGGGCGCTGGTCATTATTCACGGTAGC
+GCGTTGGCGCCGCATCTGGAAATTGTGAGTTACGTGTTGACGGGGATCGTGGCATTCCTG
+ATGTGTATCCAGGCAAAGCAGCTATTGCTTTCTTTTGTTCCGGGAACATTTATCGGCGCC
+TGCGCGACATTTGCAGGGCAGGGTGAGTGGCGGTTGGTATTACCGTCGCTGGCGCTGGGG
+CTAATCTTTGGCTATGCCATGAAAAAGAGTGGGCTATGGCTGGCATCACGCCGCGAGCAA
+CATTCAGCGAATACGGCGGTCACAAAGATGAAACGCTATCTGACCTGGATTGTAGCAGCA
+GAGTTACTGTTCGCTACCGGAAACCTCCATGCCAATGAAGTTGAAGTCGAGGTTCCCGGA
+TTGTTAACCGACCATACCGTCTCTTCCATAGGACATGAATTCTATCGTGCATTCAGCGAC
+AAATGGGAAAGCGAATACACCGGCAACCTGACCATTAATGAAAGACCCAGTGCGCGTTGG
+GGAAGCTGGATCACCATAACGGTAAACCAGGACGTTATTTTCCAGACCTTTTTATTTCCA
+ATGAAAAGAGACTTCGAGAAAACCGTCGTCTTCGCATTAGCGCAAACAGAGGAAGCATTA
+AATCGCCGACAAATAGATCAAACGCTCTTAAGTACGAGTGATTTAGCGCGTGATGAATTC
+ATGTCCGTAATCAAGAAAAATATCCCTGCCATAGGCCTGTGTATCTGCGCTTTTTTTATC
+CATTCTGCGGTAGGGCAACAAACGGTACAGGGCGGCGTTATCCATTTTCGCGGCGCGATT
+GTTGAGCCACTGTGCGATATTTCTACTCACGCCGAAAATATTGATTTAACCTGCCTACGC
+GAAGGTAAAAAGCAAATGCACCGGATAGACCTTCGGCAGGCATCTGGATTACCGCAGGAT
+ATTCAGTCCATTGCGACGGTACGGCTGCATTATCTCGATGCGCAAAAAAGCCTGGCGGTG
+ATGAATATTGAGTACCGTATGACATCACGTCTTCAGGTCATACAGGGTGATATCACTCAA
+CTTAGCGTCGATGCGATTGTGAATGCCGCTAACGCATCATTAATGGGCGGCGGTGGCGTA
+GACGGCGCAATTCATCGCGCGGCGGGGCCGGCATTGCTGGACGCCTGTAAACTCATCCGT
+CAGCAACAGGGCGAATGTCAGACGGGACATGCGGTTATCACGCCTGCTGGCAAGCTTTCG
+GCAAAGGCGGTTATTCACACAGTGGGGCCCGTCTGGCGAGGCGGCGAACACCAGGAAGCT
+GAGCTACTCGAAGAGGCATACCGGAATTGTTTGCTGCTTGCCGAGGCGAATCACTTTCGT
+TCCATCGCTTTTCCGGCAATCAGTACCGGCGTTTATGGCTATCCACGCGCCCAGGCCGCT
+GAAGTCGCCGTCAGGACGGTTTCAGATTTTATTACCCGTTACGCTCTGCCTGAACAGGTA
+TACTTTGTCTGTTATGATGAAGAAACTGCCCGGCTTTACGCAAGATTACTTACTCAGCAA
+GGCGACGACCCTGCCATGAAGAAACTGCCCGGCTTTACGCAAGATTACTTACTCAGCAAG
+GCGACGACCCTGCCTGATAAAACACGCCTGGAGCGTGCCGTTGAACCGCTATGCGCGCGC
+CATCCCGGAGAGTGCGGCATTCTTGCGCTGGATAACAGTCTGGACGCTTTTGCCGCCCGC
+TACCGCCTGACCGAAATGGCGGCGCGGACGCTGGATGTGCAGTATTATATTTGGGAAGAC
+GATATGTCCGGGCGGCTGCTCTTTTCGGTTCTGCTGTCGGCGGCGAAGCGCGGCGTTCAT
+GTTCGTCTGCTGCTGGATGATAACAATACGCCTGGTCTGGATGATACGTTGCGCTTGCTG
+GATAGCCATCCTAATATCGAAGTTCGTCTGTTTAATCCTTTCTCTTTTCGTACGCTACGC
+GCGCTGGGATATTTGACGGATTTTGCGCGGCTGAATCGGCGGATGCACAATAAAAGTTAC
+ACTGCCGACGGCGTAGTGACGCTGGTCGGTGGGCGCAACATCGGCGATGCCTATTTCGGC
+GCTGGCGAGGAGCCGCTATTTTCCGATCTGGACGTGATGGCCATTGGCCCGGTGGTCAAT
+GATGTCGCCAATGATTTTGAACGTTACTGGCGCTGTAGTTCAGTGTCGACATTGCAGCAA
+GTATTATCCCTTTCTGAGCAGGAACTGACGCAGCGTATCGAACTTCCCGAATCCTGGTAT
+AACGATGAGATCACCCGCCGTTATCTGCATAAGCTGGAAACCAGCCAGTTTATGGCGGAT
+CTCGATCGCGGAACGTTGCCGCTGATTTGGGCAAAAACACGCTTGCTTAGCGATGACCCT
+TCTAAAGGCGAGGGGAAGGCGCAGCGCCATTCGCTTCTTCCGCAGCGATTATTTGACGTG
+ATGGGGTCGCCGACGGAGCGTATCGACATTATTTCCGCTTACTTTGTCCCTACGCGCGCA
+GGCGTGGCGCAGTTGCTTAATCTGGTCAGGAAAGGTGTGAAGATCGCCATCTTAACTAAC
+TCTCTGGCGGCCAACGATGTGGCGGTCGTTCACGCAGGGTACGCGCGCTGGCGCAAGAAA
+TTACTGCGCTATGGCGTGGAGCTCTACGAACTGAAACCGACCCGCGAACATGAAACCGCC
+GTACATGATCGCGGACTCACCGGGAACTCAGGTTCCAGCTTACATGCTAAAACGTTCAGT
+ATTGATGGTAGTAAGGTGTTTATCGGGTCGCTTAATTTTGATCCCCGTTCAACGCTTTTA
+AATACCGAAATGGGCTTTGTCATTGAAAGTGAAACGCTGGCGACGCTTATTCATAAGCGT
+TTTACGCAGAGCCAACGCGATGCGGCCTGGCAACTGCGGCTGGATCGCTGGGGACGAATT
+AACTGGATCGATCGTCAGCAAGAAGAGGAAAAGGTGTTAAAGAAAGAACCCGCTACGCGT
+TTCTGGCAGCGAGTTCTGGTACGGTTGGCGGCAATTTTACCTGTGGAATGGTTGCTGATG
+AGCTCTGTACCCGCGCCGCGTGAATATTTTCTTGACTCTATCCGCGCATGGCTGATGTTG
+TTAGGGATTCCCTTTCATATCTCGTTGATCTATTCCACTCACAGTTGGCATGTCAATAGC
+GCCGCGCCATCGTGGTGGCTAACCCTGTTTAACGATTTTATCCACGCTTTTCGTATGCAG
+GTGTTTTTTGTTATTTCTGGTTATTTTTCGTACATGTTATTTTTACGTTATCCATTAAAA
+CACTGGTGGAAAGTACGGGTAGAACGTGTGGGTATTCCCATGCTTACCGCAATCCCTTTG
+CTTACCTTGCCGCAATTTATCCTGTTGCAATATGTCAAAGAGAAAACAGAGAACTGGCCT
+ACACTCTCTGCCTATGAAAAATATAATACGTTAGCGTGGGAACTCATTTCACATCTGTGG
+TTTTTACTGGTGCTGGTGATATTAACCACCGTCAGCATCGGGATTTTTACCTGGTTCCAA
+AAAAGGCAGGAAACAAGCAAGCCTCGTCCCGCCGCTATTTCGCTGGCCAAACTTTCGCTT
+ATTTTTTTCCTGCTGGGGGTGGCGTACGCTGCTATCAGGCGCATTATATTCATCGTATAT
+CCGGCAATCCTCAGTGACGGCATGTTCAATTTTATTGTGATGCAAACGCTATTTTATGTG
+CCGTTTTTTATTCTCGGCGCGTTGGCCTTCATTCACCCCGATCTGAAAGCGCGCTTCACC
+ACGCCCTCACGCGGATGCACTTTAGGCGCTGCCGTTGCTTTTATCGCGTATCTGCTGAAT
+CAACGTTATGGGAGCGGCGACGCCTGGATGTACGAAACCGAATCCGTGATTACGATGGTA
+ATGGGGCTATGGATGGTGAACGTGGTATTTTCACTGGGGCATCGCTTGTTAAACTTTCAG
+TCCGCGCGTGTCACCTATTTCGTGAATGCTTCGCTGTTTATTTATCTGGTGCATCATCCC
+TTAACGCTTTTCTTTGGCGCGTATATTACACCGCATATCTCCTCCAACCTGATCGGGTTC
+TTGTGCGGGCTGATATTTGTTATGGGTATTGCGTTAATTCTGTATGAAATTCATTTACGC
+ATCCCGCTCCTGAAATTTCTCTTTTCAGGTAAACCGCCGGTAAAACAAGAAAGCCGCGCC
+GCGATCGGGATGAAACATAAACGACAAATGATGAAAATGCGTTGGTTGGGCGCAGCTATT
+ATGTTAACGCTCTACGCATCATCGAGCTGGGCGTTCAGTATTGATGACGTGGCAAAACAA
+GCTCAATCTTTAGCCGGGAAAGGCTATGAGGCGCCTAAAAGCAACTTGCCCTCCGTTTTC
+CGCGACATGAAATATGCGGATTATCAGCAGATCCAGTTTAACAGCGATAAAGCCTACTGG
+AACAACTTAAAGACCCCTTTTAAGCTCGAATTTTACCATCAGGGGATGTACTTCGATACG
+CCGGTCAAGATTAACGAAGTGACGGCGACGACGGTCAAAAGAATCAAATACAGCCCGGAT
+TACTTCAATTTTGGCAATGTTCAGCACGATAAAGACACGGTAAAAGATTTAGGCTTCGCC
+GGGTTCAAAGTCCTGTACCCCATTAACAGTAAAGATAAGAACGACGAAATCGTCAGTATG
+CTTGGCGCCAGCTATTTCCGCGTTATCGGCGCAGGCCAGGTGTATGGCTTATCTGCGCGC
+GGCCTGGCGATTGATACCGCCTTACCATCTGGTGAAGAGTTTCCCCGCTTTCGCGAGTTC
+TGGATTGAGCGTCCAAAACCCACCGATAAGCGTTTGACCGTCTATGCATTACTGGATTCT
+CCGCGCGCGACCGGCGCTTACCGTTTTGTGATCATTCCTGGCCGCGATACCGTGGTGGAC
+GTGCAGTCAAAAGTCTATCTGCGCGATAAGGTGGGCAAGCTGGGCGTTGCGCCATTAACC
+AGTATGTTCCTGTTTGGGCCAAACCAGCCGTCGCCGACGACCAACTATCGTCCGGAATTG
+CATGACTCGAACGGCTTATCCATTCATGCGGGTAATGGCGAGTGGATTTGGCGTCCGCTG
+AACAATCCAAAACACCTCGCTGTGAGCAGCTATGCGATGGAAAACCCTCAGGGATTCGGC
+CTGTTGCAGCGTGGTCGCGAGTTCTCGCGCTTTGAAGATTTAGACGATCGCTATGACCTG
+CGTCCAAGCGCCTGGATTACCCCGAAAGGCGACTGGGGCAAAGGTAAGGTTGAACTGGTT
+GAAATTCCGACCAATGATGAAACCAACGATAACATCGTCGCTTACTGGACTCCGGATCAA
+CTGCCGGAACCGGGTAAAGAGATGAACTTCAAGTACACTCTGACCTTCAGCCGCGATGAA
+GATAAACTTCATGCGCCGGATAATGCCTGGGTGCTGCAAACACGCCGCTCAACGGGCGAC
+GTTAAACAGTCGAATCTGATTCGCCAGCCCGACGGCACTATTGCCTTTGTGGTGGATTTC
+GTTGGCGCCGACATGAAAAAACTGCCGCCGGATACGCCCGTCGCTGCACAAACCAGCATT
+GGCGATAACGGTGAAATCGTTGACAGTAATGTACGCTATAACCCAGTCACTAAAGGCTGG
+CGTTTAATGCTGCGCGTGAAAGTCAAAGACGCGAAGAAAACCACGGAAATGCGTGCCGCA
+TTGGTGAATGCCGATCAGACGCTAAGTGAAACCTGGAGCTACCAGTTACCTGCCAATGAA
+ATGAATAAAACAACTGAGTATATTGACGCACTGCTGCTTTCTGAACGTGAGAAAGCGGCA
+TTGCCGAAAACTGACATCCGCGCCGTGCATCAGGCGCTGGATGCCGAGCATCGGACTTAC
+TCGCGAGAAGACGATTCACCGCAGGGTTCCGTAAAAGCCCGCCTTGAACACGCCTGGCCG
+GATTCATTGGCGAAGGGGCAGTTAATTAAAGATGATGAAGGGCGCGATCAGTTGCAGGCT
+ATGCCAAAAGCGACGCGCTCTTCGATGTTTCCTGATCCCTGGCGAACCAACCCGGTTGGC
+CGTTTCTGGGATCGCCTGCGTGGGCGGGATGTTACGCCGCGCTATGTTTCTCGTCTGACA
+AAAGAAGAGCAGGCGAGTGAGCAAAAATGGCGTACCGTCGGCACTATACGCCGCTATATT
+TTGTTAATTTTGACTCTGGCGCAAACCGTCGTTGCGACCTGGTATATGAAGACCATTCTG
+CCCTATCAGGGATGGGCGCTCATCAATCCTATTGATATGGTGGGGCAGGATATTTGGGTC
+TCCTTTATGCAGCTCCTGCCCTACATGCTGCATACCGGTATCCTGATTTTGTTTGCCGTG
+CTGTTCTGCTGGGTGTCTGCCGGATTCTGGACTGCGCTGATGGGCTTCCTGCAACTGCTT
+ATCGGGCGCGATAAGTACAGTATCTCCGCGTCTACGGTTGGCGATGAGCCCCTCAATCCG
+GAACACCAGACGGCGCTGATCATGCCTATCTGTAATGAAGACGTTAGCCGCGTTTTCGCC
+GGTCTGCGCGCGACCTGGGAGTCCGTTAAAGCTACAGGCAACGCCGCGCATTTTGACGTC
+TATATCCTTAGCGATAGTTATAACCCGGATATTTGCGTGGCGGAGCAAAAGGCGTGGATG
+GAGCTCATCGCGGAAGTGCAGGGCGAAGGCCATATTTTTTACCGTCGCCGCCGCCGCCGT
+ATGAAACGCAAAAGCGGCAATATTGACGATTTTTGCCGCCGCTGGGGCAATCAGTACAGC
+TATATGGTGGTGCTGGACGCGGACTCAGTGATTAGCGGCGAGTGTCTGAGCGGGCTGGTG
+CGCCTGATGGAAGCGAACCCTAACGCCGGGATTATCCAGTCTTCGCCGAAAGCGTCGGGG
+ATGGATACTCTGTATGCCCGCTGCCAACAGTTTGCGACCCGTGTTTATGGACCGCTGTTT
+ACCGCCGGGCTGCACTTCTGGCAGTTGGGGGATTCGCACTACTGGGGGCACAATGCCATT
+ATCCGCGTGAAGCCGTTTATCGAGCACTGCGCTCTGGCGCCGCTGCCGGGAGAAGGTTCG
+TTCGCCGGATCGATTCTTTCCCACGACTTTGTTGAGGCGGCGCTAATGCGTCGGGCAGGG
+TGGGGCGTCTGGATTGCCTACGATCTCCCCGGTTCCTATGAAGAGCTGCCGCCAAACCTG
+CTGGATGAGCTTAAACGCGACCGCCGCTGGTGTCACGGCAACCTGATGAACTTTCGTCTG
+TTCCTGGTGAAAGGAATGCACCCGGTGCATCGTGCCGTGTTCCTGACCGGGGTAATGTCA
+TACCTGTCCGCGCCGTTATGGTTTATGTTCCTTGCGCTTTCTACCGCGCTGCAGGTCGTT
+CATGCGTTAACAGAGCCGCAATATTTCCTTCATCCGCGCCAGCTTTTTCCGGTCTGGCCG
+CAGTGGCGTCCGGAACTGGCAATCGCGCTGTTTGCGTCAACGATGGTGCTGCTGTTCCTG
+CCGAAGCTGCTCAGTATTATGCTGATCTGGTGTAAAGGCACCAAAGAGTATGGCGGTTTC
+TGGCGCGTTACGCTGTCGCTATTGCTGGAAGTTCTGTTCTCCGTGTTGCTGGCGCCGGTG
+CGTATGCTGTTTCATACCGTGTTTGTGGTCAGTGCGTTCCTCGGCTGGGAAGTGGTCTGG
+AACTCACCGCAACGCGACGATGATTCTACGCCTTGGGGAGAAGCCTTTATGCGTCACGGC
+TCTCAACTGCTGCTGGGGCTGGTCTGGGCGGTTGGTATGGCGTGGCTGGATTTACGCTTT
+CTGTTCTGGCTGGCGCCGATTGTCTTTTCGCTTATTCTGTCGCCATTTGTTTCGGTGATC
+TCCAGTCGTTCAACGGTAGGATTACGCACCAATCGCTGGAAGCTGTTCCTGATCCCGGAA
+GAGTATTCGCCGCCTCAGGTGTTGGTCGATACTGATAAATATCTGGAGATGAATCGCCGC
+CGTATTCTGGACGATGGCTTTATGCATGCGGTTTTTAACCCGTCGCTTAATGCGCTGGCG
+ACCGCGATGGCCACCGCGCGTCACCGCGCCAGTAAGGTGCTGGAAATAGCCCGCGATCGT
+CATGTGGAGCAGGCGCTAAACGAAACGCCGGATAAACTGAACCGCGATCGGCGTCTGGTT
+TTGCTCAGCGATCCGGTGACGATGGCGCGTTTTCACTATCGGGTCTGGAATGCGCCAGAG
+AGATACTCTTCCTGGGTAAACCATTATCAGTCTCTCGTCCTGAATCCGCAGGCGTTGCAG
+GGACGAACATCGTCAGCGGGAGTGCGTATATTCGCGGTGAGCATAATGGTGATTACCCTG
+AGCGGCTGCGGCAGTATTATCAGCAGAACGATCCCCGGACAAGGACACGGCAACCAGTAT
+TACCCTGGCGTGCAGTTGGATATGCGTGATTCCGCATGGCGCTATATCACTATCCTCGAT
+CTGCCCTTCTCACTGATCTTCGATACACTGCTACTGCCGCTCGATATTCACCACGGGCCT
+TATGAGATGAAAAAAAACCTGCTGGGATTCACCCTCGCATCCTTGTTATTCACGACCGGT
+TCCGCCGTGGCGGCGGAGTATAAAATTGATAAAGAAGGCCAACATGCGTTCGTCAATTTC
+CGCATCCAGCATCTGGGCTACAGCTGGCTATACGGCACCTTTAAAGATTTCGACGGCACG
+TTCACTTTTGACGAAAAAAATCCGTCAGCAGACAAAGTGAATGTGACCATTAACACCAAT
+AGCGTCGACACTAACCATGCCGAACGTGACAAACACCTGCGTAGCGCGGAGTTTCTTAAT
+GTTGCGAAATTCCCGCAGGCAACCTTCACCTCTACCAGCGTGAAAAAAGAGGGCGATGAA
+CTGGATATTACCGGCAATCTGACGCTCAATGGCGTGACTAAACCGGTGACGCTGGAAGCG
+AAGCTGATGGGCCAGGGCGACGATCCGTGGGGCGGTAAGCGCGCGGGCTTTGAGGCCGAA
+GGAAAAATTAAGCTGAAAGATTTCAATATAACTACCGATCTCGGCCCAGCCTCACAAGAG
+GTGGAGCTTATCATCTCAGTAGAAGGCGTTCAGCAGAAGATGTTACTGATGATGGCGCTG
+ATCGTGCGTATTATCTGGCGGCTTTATTCTCCGCCGCCCGTTGCGTTGACCAGCTATTCC
+CGTTTAACGCGCATTGGCGCCGCCGCGGGTCATATCCTTCTGTATCTCCTGCTCTTTGCG
+ATAATCATTAGCGGCTACCTGATTTCCACCGCCGACGGTAAACCGATTAGCGTCTTTGGC
+TGGTTTGAGATTCCGGCCACGCTTACGGACGCGGGCGCGCAGGCTGACATCGCCGGAACA
+CTGCATCTGTGGTTTGCCTGGTCGCTGGTCATTATCTCGCTCTCGCATGGGGTTATGGCG
+CTAAAACACCATTTCATCGATAAAGACGACACACTGAAACGTATGACAGGAATGTCGTCA
+TCTGACTATGGAGCTCAAAAAATGAAATACGACCTTATTATTATCGGCAGCGGTTCGGTT
+GGCGCCGCCGCTGGTTATTACGCCACCCGCGCCGGGCTAAAGGTCCTGATGACCGATGCG
+CATATGCCGCCTTATCAACAGGGCAGCCACCACGGCGATACCCGTCTTATCCGCCACGCT
+TATGGTGAAGGCGAAAAATATGTCCCGCTGGTGCTTCGCGCCCAGACGCTTTGGGATGAG
+CTCTCCACACACAATGAAGAGCCTATTTTTGTCCGCTCCGGCGTCGTCAACCTCGGCCCG
+GCCGATTCCGCTTTCTTAGCCAACGTCGCACGAAGCGCGCAACAGTGGCAATTGAACGTC
+GAGCGCCTGGACGCGACGGCCCTCATGACGCGCTGGCCGGAAATTCGCGTGCCCGATAAT
+TATATCGGGCTGTTTGAAGCTGACTCCGGTTTCCTGCGCAGCGAATTAGCCATTACCACA
+TGGCTTCGTCTGGCCCGAGAGGCAGGCTGCGCACAGCTATTCAACAGCCCGGTAAGCCAT
+ATTCACCATGATGATAACGGTGTGACGATAGAGACGAGTGAAGGCTGCTACCACGCCAGC
+AAAGCGCTGATTAGCGCGGGCACCTGGGTCAAAACGCTGGTACCGGAGCTGCCCGTTCAG
+CCCGTACGTAAAGTTTTTGCCTGGTTTAAGGCGGATGGACGTTACAGCACTAAAAACCGC
+TTTCCGGCCTTTACCGGCGAAATGCCCAACGGCGATCACTATTACGGTTTCCCGGCGGAG
+AACGACGAGTTAAAAATCGGCAAACACAATGGCGGGCAGCGAATACAGGCACCGGAAGAG
+CGCAAGCCCTTTGCCGCCGTTGCCAGCGATGGCGCGGAAGCATTTCCTTTCCTGCGTAAC
+GTACTGCCGGGTATCGGCGGTTGTTTACATGGGGCGGCATGTACCTATGATAATTCGCCG
+GACGAGGATTTTATTATCGATACGCTGCCTGGCCATGAGAATACGCTTGTCATCACTGGA
+CTCAGCGGACATGGTTTTAAATTCGCCCCGGTGTTAGGAGAAATCGCTGCGGATTTTGCG
+TTGGGAAAAACGCCCTCCTTTGATCTGACGCCGTTCCGGCTTTCCCGTTTTAGCCAAATG
+GAAAAGAATAATGAAGTCATTCAGACCCATCCGCTTGTAGGATGGGACATCAGCACCGTC
+GATAGCTATGATGCGCTGATGCTGCGTTTACACTACCAGACCCCAAATCGTCCGGAACCG
+GAAGGGACTGAAGTTGGTCAAACGCTCTGGTTAACGACAGATGTAGCCAGGCAATTTATT
+TCAATATTAGAAGCCGGCATCGCCAAAATAGAATCAGGCGATTACCAGGAAAACGAGTAT
+CGTCACCAT
diff --git a/t/data/reformat_input_gffs/expected_fixed_query_2.gff b/t/data/reformat_input_gffs/expected_fixed_query_2.gff
new file mode 100644
index 0000000..1cabef6
--- /dev/null
+++ b/t/data/reformat_input_gffs/expected_fixed_query_2.gff
@@ -0,0 +1,220 @@
+##gff-version 3
+##sequence-region abc|SC|contig000001 1 15000
+abc|SC|contig000001	Prodigal:2.60	CDS	172	1131	.	-	0	ID=1_1___1;gene=different;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag=abc_00001;product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|abc_00001
+abc|SC|contig000001	Prodigal:2.60	CDS	1804	1950	.	+	0	ID=abc_00002___2;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00002;product=hypothetical protein;protein_id=gnl|SC|abc_00002
+abc|SC|contig000001	Prodigal:2.60	CDS	1934	2131	.	+	0	inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00003;ID=abc_00003___3;product=hypothetical protein;protein_id=gnl|SC|abc_00003
+abc|SC|contig000001	Prodigal:2.60	CDS	2621	3337	.	-	0	ID=abc_00004___4;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag=abc_00004;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00004
+abc|SC|contig000001	Prodigal:2.60	CDS	3445	4170	.	-	0	gene=speH;ID=1_2___5;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=abc_00005;note=SPE H;product=hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00005
+##FASTA
+>abc|SC|contig000001
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA
+TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT
+CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC
+TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC
+TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT
+AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC
+AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC
+ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT
+AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC
+AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT
+TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA
+CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC
+AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC
+TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG
+TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT
+TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT
+AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT
+GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT
+CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG
+CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG
+ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA
+AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA
+AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT
+GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT
+AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT
+AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT
+AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT
+CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA
+GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT
+AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA
+AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA
+AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA
+GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA
+TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA
+TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA
+ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT
+ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG
+GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG
+CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA
+ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA
+TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA
+GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT
+TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA
+CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC
+TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT
+TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT
+TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA
+GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG
+TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT
+TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG
+CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC
+CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT
+TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT
+AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG
+TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA
+TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA
+TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC
+ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC
+TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT
+TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC
+TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC
+ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA
+AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC
+TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA
+CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT
+GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA
+AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT
+GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA
+TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT
+AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT
+CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA
+TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT
+TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC
+TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT
+AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT
+TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC
+AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC
+CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT
+TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT
+GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT
+TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC
+GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT
+ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC
+GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG
+TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT
+TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA
+AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG
+TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT
+TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA
+CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA
+TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT
+GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA
+GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT
+GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA
+GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA
+TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA
+AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC
+ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT
+GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA
+CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG
+CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA
+TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA
+TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT
+GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG
+TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC
+ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA
+GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT
+TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG
+CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA
+AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT
+TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA
+TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT
+AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG
+TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA
+AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC
+AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC
+AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT
+ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT
+AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA
+GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA
+TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA
+TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA
+AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT
+TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG
+TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA
+TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT
+TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA
+TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG
+TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG
+GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG
+TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT
+AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC
+ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT
+TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG
+TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA
+AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT
+TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG
+GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC
+AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT
+AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC
+TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC
+ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT
+CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT
+CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA
+ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA
+AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT
+GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA
+ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA
+GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT
+AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT
+TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT
+GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG
+TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT
+AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC
+TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA
+GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT
+TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG
+AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC
+ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA
+CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA
+CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT
+AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA
+AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA
+AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG
+TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC
+ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT
+TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC
+AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA
+AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT
+CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT
+CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT
+AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA
+AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA
+ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC
+AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC
+TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT
+GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT
+ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA
+AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA
+TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA
+TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC
+CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA
+AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG
+AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA
+AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC
+AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA
+AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA
+TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT
+TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC
+GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA
+CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC
+GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG
+TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT
+TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT
+TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA
+ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT
+GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC
+GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA
+TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG
+GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC
+AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT
+TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT
+GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT
\ No newline at end of file
diff --git a/t/data/reformat_input_gffs/expected_fixed_query_3.gff b/t/data/reformat_input_gffs/expected_fixed_query_3.gff
new file mode 100644
index 0000000..1d7dca1
--- /dev/null
+++ b/t/data/reformat_input_gffs/expected_fixed_query_3.gff
@@ -0,0 +1,220 @@
+##gff-version 3
+##sequence-region abc|SC|contig000001 1 15000
+abc|SC|contig000001	Prodigal:2.60	CDS	172	1131	.	-	0	ID=1_1___6;gene=different;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag=abc_00001;product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|abc_00001
+abc|SC|contig000001	Prodigal:2.60	CDS	1804	1950	.	+	0	ID=abc_00002___7;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00002;product=hypothetical protein;protein_id=gnl|SC|abc_00002
+abc|SC|contig000001	Prodigal:2.60	CDS	1934	2131	.	+	0	ID=abc_00003___8;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00003;product=hypothetical protein;protein_id=gnl|SC|abc_00003
+abc|SC|contig000001	Prodigal:2.60	CDS	2621	3337	.	-	0	ID=abc_00004___9;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag=abc_00004;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00004
+abc|SC|contig000001	Prodigal:2.60	CDS	3445	4170	.	-	0	ID=1_2___10;gene=speH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=abc_00005;note=SPE H;product=hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00005
+##FASTA
+>abc|SC|contig000001
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA
+TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT
+CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC
+TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC
+TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT
+AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC
+AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC
+ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT
+AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC
+AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT
+TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA
+CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC
+AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC
+TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG
+TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT
+TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT
+AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT
+GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT
+CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG
+CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG
+ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA
+AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA
+AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT
+GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT
+AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT
+AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT
+AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT
+CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA
+GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT
+AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA
+AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA
+AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA
+GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA
+TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA
+TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA
+ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT
+ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG
+GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG
+CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA
+ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA
+TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA
+GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT
+TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA
+CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC
+TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT
+TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT
+TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA
+GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG
+TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT
+TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG
+CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC
+CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT
+TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT
+AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG
+TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA
+TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA
+TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC
+ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC
+TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT
+TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC
+TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC
+ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA
+AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC
+TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA
+CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT
+GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA
+AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT
+GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA
+TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT
+AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT
+CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA
+TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT
+TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC
+TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT
+AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT
+TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC
+AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC
+CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT
+TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT
+GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT
+TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC
+GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT
+ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC
+GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG
+TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT
+TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA
+AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG
+TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT
+TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA
+CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA
+TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT
+GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA
+GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT
+GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA
+GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA
+TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA
+AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC
+ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT
+GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA
+CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG
+CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA
+TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA
+TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT
+GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG
+TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC
+ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA
+GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT
+TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG
+CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA
+AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT
+TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA
+TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT
+AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG
+TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA
+AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC
+AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC
+AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT
+ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT
+AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA
+GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA
+TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA
+TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA
+AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT
+TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG
+TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA
+TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT
+TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA
+TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG
+TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG
+GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG
+TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT
+AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC
+ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT
+TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG
+TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA
+AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT
+TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG
+GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC
+AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT
+AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC
+TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC
+ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT
+CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT
+CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA
+ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA
+AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT
+GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA
+ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA
+GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT
+AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT
+TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT
+GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG
+TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT
+AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC
+TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA
+GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT
+TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG
+AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC
+ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA
+CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA
+CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT
+AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA
+AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA
+AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG
+TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC
+ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT
+TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC
+AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA
+AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT
+CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT
+CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT
+AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA
+AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA
+ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC
+AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC
+TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT
+GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT
+ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA
+AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA
+TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA
+TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC
+CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA
+AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG
+AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA
+AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC
+AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA
+AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA
+TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT
+TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC
+GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA
+CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC
+GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG
+TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT
+TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT
+TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA
+ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT
+GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC
+GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA
+TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG
+GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC
+AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT
+TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT
+GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT
\ No newline at end of file
diff --git a/t/data/reformat_input_gffs/expected_real_1.gff b/t/data/reformat_input_gffs/expected_real_1.gff
new file mode 100644
index 0000000..36af703
--- /dev/null
+++ b/t/data/reformat_input_gffs/expected_real_1.gff
@@ -0,0 +1,223 @@
+##gff-version 3
+##sequence-region 2 1 15000
+2	annotation	remark	1	15000	.	.	.	ID=id___1;accessions=REL607;comment=Source DNAEscherichia
+2	feature	source	1	4629812	.	+	.	ID=id___2;mol_type=genomic DNA;organism=Escherichia coli;strain=REL607
+2	feature	CDS	190	255	.	+	0	ID=id___3;codon_start=1;gene=thrL;locus_tag=ABC_00001;note=b0001;product=thr operon leader peptide;protein_id=kribb:ABC_00001;transl_table=11;translation=MKRISTTITTTITITTGNGAG
+2	feature	gene	190	255	.	+	.	ID=id___4;gene=thrL;locus_tag=ABC_00001
+2	feature	CDS	336	2798	.	+	0	ID=id___5;EC_number=2.7.2.4,1.1.1.3;codon_start=1;gene=thrA;locus_tag=ABC_00002;note=b0002;product=bifunctional aspartokinase I/homeserine dehydrogenase I;protein_id=kribb:ABC_00002;transl_table=11;translation=MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQE [...]
+2	feature	gene	336	2798	.	+	.	ID=id___6;gene=thrA
+2	feature	CDS	2800	3732	.	+	0	ID=id___7;EC_number=2.7.1.39;codon_start=1;gene=thrB;locus_tag=ABC_00003;note=b0003;product=homoserine kinase;protein_id=kribb:ABC_00003;transl_table=11;translation=MVKVYAPASSANMSVGFDVLGAAVTPVDGALLGDVVTVEAAETFSLNNLGRFADKLPSEPRENIVYQCWERFCQELGKQIPVAMTLEKNMPIGSGLGSSACSVVAALMAMNEHCGKPLNDTRLLALMGELEGRISGSIHYDNVAPCFLGGMQLMIEENDIISQQVPGFDEWLWVLAYPGIKVSTAEARAILPAQYRRQDCIAHGRHLAGFIHACYSRQPELAAKLMKDVIAEPYRERLLPGFRQARQAVAEIGAVASGISGSGPTLFALCDKPDTAQRVADWLGKNYLQNQEGFVHI [...]
+2	feature	gene	2800	3732	.	+	.	ID=id___8;gene=thrB;locus_tag=ABC_00003
+##FASTA
+>2
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA
+TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT
+CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC
+TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC
+TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT
+AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC
+AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC
+ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT
+AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC
+AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT
+TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA
+CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC
+AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC
+TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG
+TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT
+TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT
+AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT
+GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT
+CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG
+CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG
+ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA
+AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA
+AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT
+GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT
+AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT
+AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT
+AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT
+CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA
+GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT
+AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA
+AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA
+AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA
+GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA
+TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA
+TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA
+ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT
+ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG
+GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG
+CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA
+ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA
+TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA
+GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT
+TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA
+CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC
+TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT
+TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT
+TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA
+GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG
+TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT
+TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG
+CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC
+CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT
+TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT
+AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG
+TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA
+TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA
+TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC
+ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC
+TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT
+TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC
+TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC
+ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA
+AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC
+TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA
+CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT
+GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA
+AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT
+GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA
+TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT
+AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT
+CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA
+TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT
+TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC
+TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT
+AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT
+TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC
+AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC
+CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT
+TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT
+GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT
+TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC
+GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT
+ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC
+GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG
+TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT
+TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA
+AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG
+TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT
+TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA
+CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA
+TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT
+GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA
+GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT
+GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA
+GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA
+TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA
+AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC
+ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT
+GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA
+CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG
+CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA
+TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA
+TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT
+GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG
+TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC
+ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA
+GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT
+TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG
+CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA
+AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT
+TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA
+TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT
+AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG
+TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA
+AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC
+AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC
+AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT
+ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT
+AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA
+GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA
+TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA
+TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA
+AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT
+TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG
+TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA
+TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT
+TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA
+TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG
+TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG
+GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG
+TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT
+AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC
+ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT
+TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG
+TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA
+AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT
+TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG
+GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC
+AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT
+AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC
+TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC
+ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT
+CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT
+CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA
+ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA
+AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT
+GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA
+ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA
+GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT
+AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT
+TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT
+GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG
+TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT
+AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC
+TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA
+GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT
+TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG
+AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC
+ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA
+CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA
+CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT
+AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA
+AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA
+AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG
+TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC
+ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT
+TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC
+AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA
+AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT
+CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT
+CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT
+AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA
+AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA
+ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC
+AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC
+TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT
+GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT
+ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA
+AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA
+TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA
+TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC
+CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA
+AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG
+AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA
+AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC
+AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA
+AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA
+TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT
+TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC
+GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA
+CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC
+GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG
+TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT
+TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT
+TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA
+ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT
+GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC
+GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA
+TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG
+GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC
+AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT
+TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT
+GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT
\ No newline at end of file
diff --git a/t/data/reformat_input_gffs/query_1.gff b/t/data/reformat_input_gffs/query_1.gff
new file mode 100644
index 0000000..c8a0672
--- /dev/null
+++ b/t/data/reformat_input_gffs/query_1.gff
@@ -0,0 +1,220 @@
+##gff-version 3
+##sequence-region abc|SC|contig000001 1 15000
+abc|SC|contig000001	Prodigal:2.60	CDS	172	1131	.	-	0	ID=1_1;gene=different;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag=abc_00001;product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|abc_00001
+abc|SC|contig000001	Prodigal:2.60	CDS	1804	1950	.	+	0	ID=abc_00002;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00002;product=hypothetical protein;protein_id=gnl|SC|abc_00002
+abc|SC|contig000001	Prodigal:2.60	CDS	1934	2131	.	+	0	ID=abc_00003;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00003;product=hypothetical protein;protein_id=gnl|SC|abc_00003
+abc|SC|contig000001	Prodigal:2.60	CDS	2621	3337	.	-	0	ID=abc_00004;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag=abc_00004;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00004
+abc|SC|contig000001	Prodigal:2.60	CDS	3445	4170	.	-	0	ID=1_2;gene=speH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=abc_00005;note=SPE H;product=hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00005
+##FASTA
+>abc|SC|contig000001
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA
+TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT
+CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC
+TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC
+TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT
+AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC
+AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC
+ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT
+AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC
+AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT
+TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA
+CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC
+AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC
+TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG
+TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT
+TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT
+AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT
+GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT
+CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG
+CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG
+ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA
+AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA
+AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT
+GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT
+AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT
+AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT
+AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT
+CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA
+GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT
+AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA
+AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA
+AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA
+GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA
+TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA
+TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA
+ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT
+ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG
+GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG
+CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA
+ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA
+TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA
+GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT
+TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA
+CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC
+TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT
+TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT
+TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA
+GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG
+TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT
+TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG
+CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC
+CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT
+TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT
+AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG
+TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA
+TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA
+TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC
+ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC
+TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT
+TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC
+TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC
+ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA
+AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC
+TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA
+CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT
+GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA
+AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT
+GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA
+TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT
+AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT
+CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA
+TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT
+TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC
+TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT
+AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT
+TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC
+AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC
+CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT
+TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT
+GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT
+TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC
+GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT
+ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC
+GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG
+TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT
+TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA
+AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG
+TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT
+TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA
+CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA
+TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT
+GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA
+GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT
+GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA
+GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA
+TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA
+AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC
+ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT
+GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA
+CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG
+CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA
+TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA
+TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT
+GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG
+TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC
+ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA
+GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT
+TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG
+CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA
+AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT
+TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA
+TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT
+AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG
+TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA
+AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC
+AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC
+AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT
+ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT
+AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA
+GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA
+TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA
+TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA
+AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT
+TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG
+TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA
+TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT
+TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA
+TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG
+TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG
+GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG
+TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT
+AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC
+ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT
+TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG
+TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA
+AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT
+TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG
+GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC
+AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT
+AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC
+TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC
+ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT
+CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT
+CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA
+ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA
+AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT
+GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA
+ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA
+GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT
+AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT
+TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT
+GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG
+TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT
+AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC
+TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA
+GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT
+TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG
+AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC
+ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA
+CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA
+CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT
+AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA
+AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA
+AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG
+TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC
+ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT
+TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC
+AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA
+AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT
+CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT
+CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT
+AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA
+AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA
+ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC
+AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC
+TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT
+GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT
+ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA
+AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA
+TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA
+TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC
+CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA
+AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG
+AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA
+AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC
+AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA
+AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA
+TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT
+TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC
+GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA
+CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC
+GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG
+TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT
+TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT
+TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA
+ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT
+GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC
+GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA
+TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG
+GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC
+AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT
+TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT
+GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT
\ No newline at end of file
diff --git a/t/data/reformat_input_gffs/query_2.gff b/t/data/reformat_input_gffs/query_2.gff
new file mode 100644
index 0000000..d429f62
--- /dev/null
+++ b/t/data/reformat_input_gffs/query_2.gff
@@ -0,0 +1,220 @@
+##gff-version 3
+##sequence-region abc|SC|contig000001 1 15000
+abc|SC|contig000001	Prodigal:2.60	CDS	172	1131	.	-	0	ID=1_1;gene=different;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag=abc_00001;product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|abc_00001
+abc|SC|contig000001	Prodigal:2.60	CDS	1804	1950	.	+	0	ID=abc_00002;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00002;product=hypothetical protein;protein_id=gnl|SC|abc_00002
+abc|SC|contig000001	Prodigal:2.60	CDS	1934	2131	.	+	0	inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00003;ID=abc_00003;product=hypothetical protein;protein_id=gnl|SC|abc_00003
+abc|SC|contig000001	Prodigal:2.60	CDS	2621	3337	.	-	0	ID=abc_00004;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag=abc_00004;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00004
+abc|SC|contig000001	Prodigal:2.60	CDS	3445	4170	.	-	0	gene=speH;ID=1_2;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=abc_00005;note=SPE H;product=hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00005
+##FASTA
+>abc|SC|contig000001
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA
+TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT
+CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC
+TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC
+TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT
+AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC
+AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC
+ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT
+AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC
+AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT
+TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA
+CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC
+AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC
+TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG
+TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT
+TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT
+AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT
+GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT
+CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG
+CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG
+ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA
+AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA
+AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT
+GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT
+AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT
+AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT
+AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT
+CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA
+GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT
+AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA
+AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA
+AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA
+GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA
+TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA
+TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA
+ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT
+ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG
+GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG
+CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA
+ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA
+TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA
+GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT
+TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA
+CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC
+TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT
+TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT
+TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA
+GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG
+TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT
+TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG
+CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC
+CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT
+TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT
+AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG
+TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA
+TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA
+TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC
+ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC
+TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT
+TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC
+TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC
+ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA
+AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC
+TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA
+CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT
+GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA
+AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT
+GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA
+TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT
+AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT
+CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA
+TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT
+TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC
+TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT
+AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT
+TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC
+AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC
+CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT
+TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT
+GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT
+TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC
+GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT
+ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC
+GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG
+TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT
+TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA
+AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG
+TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT
+TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA
+CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA
+TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT
+GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA
+GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT
+GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA
+GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA
+TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA
+AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC
+ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT
+GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA
+CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG
+CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA
+TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA
+TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT
+GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG
+TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC
+ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA
+GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT
+TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG
+CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA
+AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT
+TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA
+TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT
+AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG
+TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA
+AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC
+AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC
+AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT
+ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT
+AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA
+GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA
+TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA
+TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA
+AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT
+TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG
+TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA
+TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT
+TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA
+TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG
+TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG
+GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG
+TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT
+AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC
+ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT
+TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG
+TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA
+AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT
+TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG
+GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC
+AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT
+AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC
+TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC
+ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT
+CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT
+CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA
+ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA
+AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT
+GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA
+ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA
+GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT
+AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT
+TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT
+GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG
+TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT
+AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC
+TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA
+GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT
+TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG
+AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC
+ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA
+CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA
+CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT
+AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA
+AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA
+AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG
+TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC
+ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT
+TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC
+AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA
+AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT
+CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT
+CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT
+AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA
+AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA
+ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC
+AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC
+TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT
+GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT
+ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA
+AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA
+TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA
+TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC
+CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA
+AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG
+AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA
+AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC
+AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA
+AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA
+TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT
+TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC
+GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA
+CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC
+GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG
+TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT
+TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT
+TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA
+ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT
+GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC
+GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA
+TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG
+GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC
+AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT
+TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT
+GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT
\ No newline at end of file
diff --git a/t/data/reformat_input_gffs/query_3.gff b/t/data/reformat_input_gffs/query_3.gff
new file mode 100644
index 0000000..c8a0672
--- /dev/null
+++ b/t/data/reformat_input_gffs/query_3.gff
@@ -0,0 +1,220 @@
+##gff-version 3
+##sequence-region abc|SC|contig000001 1 15000
+abc|SC|contig000001	Prodigal:2.60	CDS	172	1131	.	-	0	ID=1_1;gene=different;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:Q2G1X0,protein motif:TIGRFAMs:TIGR01002,protein motif:Pfam:PF07968.6;locus_tag=abc_00001;product=Alpha-toxin,beta-channel forming cytolysin,Leukocidin/Hemolysin toxin family protein;protein_id=gnl|SC|abc_00001
+abc|SC|contig000001	Prodigal:2.60	CDS	1804	1950	.	+	0	ID=abc_00002;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00002;product=hypothetical protein;protein_id=gnl|SC|abc_00002
+abc|SC|contig000001	Prodigal:2.60	CDS	1934	2131	.	+	0	ID=abc_00003;inference=ab initio prediction:Prodigal:2.60;locus_tag=abc_00003;product=hypothetical protein;protein_id=gnl|SC|abc_00003
+abc|SC|contig000001	Prodigal:2.60	CDS	2621	3337	.	-	0	ID=abc_00004;inference=ab initio prediction:Prodigal:2.60,protein motif:CLUSTERS:PRK13350,protein motif:Pfam:PF02876.11;locus_tag=abc_00004;product=superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00004
+abc|SC|contig000001	Prodigal:2.60	CDS	3445	4170	.	-	0	ID=1_2;gene=speH;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:UniProtKB:P0C0I6,protein motif:CLUSTERS:PRK13349,protein motif:Pfam:PF02876.11;locus_tag=abc_00005;note=SPE H;product=hypothetical protein,superantigen-like protein,Staphylococcal/Streptococcal toxin%2C beta-grasp domain;protein_id=gnl|SC|abc_00005
+##FASTA
+>abc|SC|contig000001
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA
+TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT
+CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC
+TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC
+TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT
+AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC
+AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC
+ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT
+AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC
+AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT
+TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA
+CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC
+AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC
+TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG
+TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT
+TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT
+AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT
+GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT
+CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG
+CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG
+ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA
+AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA
+AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT
+GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT
+AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT
+AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT
+AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT
+CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA
+GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT
+AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA
+AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA
+AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA
+GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA
+TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA
+TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA
+ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT
+ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG
+GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG
+CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA
+ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA
+TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA
+GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT
+TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA
+CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC
+TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT
+TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT
+TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA
+GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG
+TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT
+TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG
+CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC
+CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT
+TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT
+AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG
+TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA
+TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA
+TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC
+ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC
+TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT
+TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC
+TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC
+ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA
+AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC
+TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA
+CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT
+GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA
+AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT
+GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA
+TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT
+AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT
+CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA
+TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT
+TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC
+TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT
+AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT
+TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC
+AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC
+CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT
+TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT
+GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT
+TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC
+GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT
+ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC
+GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG
+TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT
+TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA
+AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG
+TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT
+TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA
+CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA
+TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT
+GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA
+GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT
+GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA
+GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA
+TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA
+AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC
+ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT
+GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA
+CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG
+CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA
+TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA
+TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT
+GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG
+TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC
+ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA
+GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT
+TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG
+CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA
+AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT
+TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA
+TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT
+AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG
+TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA
+AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC
+AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC
+AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT
+ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT
+AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA
+GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA
+TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA
+TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA
+AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT
+TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG
+TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA
+TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT
+TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA
+TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG
+TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG
+GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG
+TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT
+AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC
+ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT
+TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG
+TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA
+AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT
+TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG
+GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC
+AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT
+AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC
+TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC
+ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT
+CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT
+CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA
+ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA
+AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT
+GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA
+ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA
+GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT
+AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT
+TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT
+GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG
+TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT
+AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC
+TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA
+GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT
+TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG
+AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC
+ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA
+CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA
+CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT
+AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA
+AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA
+AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG
+TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC
+ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT
+TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC
+AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA
+AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT
+CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT
+CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT
+AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA
+AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA
+ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC
+AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC
+TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT
+GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT
+ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA
+AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA
+TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA
+TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC
+CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA
+AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG
+AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA
+AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC
+AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA
+AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA
+TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT
+TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC
+GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA
+CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC
+GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG
+TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT
+TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT
+TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA
+ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT
+GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC
+GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA
+TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG
+GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC
+AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT
+TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT
+GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT
\ No newline at end of file
diff --git a/t/data/reformat_input_gffs/real_1.gff b/t/data/reformat_input_gffs/real_1.gff
new file mode 100644
index 0000000..3242220
--- /dev/null
+++ b/t/data/reformat_input_gffs/real_1.gff
@@ -0,0 +1,223 @@
+##gff-version 3
+##sequence-region 2 1 15000
+2	annotation	remark	1	15000	.	.	.	accessions=REL607;comment=Source DNAEscherichia
+2	feature	source	1	4629812	.	+	.	mol_type=genomic DNA;organism=Escherichia coli;strain=REL607
+2	feature	CDS	190	255	.	+	0	codon_start=1;gene=thrL;locus_tag=ABC_00001;note=b0001;product=thr operon leader peptide;protein_id=kribb:ABC_00001;transl_table=11;translation=MKRISTTITTTITITTGNGAG
+2	feature	gene	190	255	.	+	.	gene=thrL;locus_tag=ABC_00001
+2	feature	CDS	336	2798	.	+	0	EC_number=2.7.2.4,1.1.1.3;codon_start=1;gene=thrA;locus_tag=ABC_00002;note=b0002;product=bifunctional aspartokinase I/homeserine dehydrogenase I;protein_id=kribb:ABC_00002;transl_table=11;translation=MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAK [...]
+2	feature	gene	336	2798	.	+	.	gene=thrA
+2	feature	CDS	2800	3732	.	+	0	EC_number=2.7.1.39;codon_start=1;gene=thrB;locus_tag=ABC_00003;note=b0003;product=homoserine kinase;protein_id=kribb:ABC_00003;transl_table=11;translation=MVKVYAPASSANMSVGFDVLGAAVTPVDGALLGDVVTVEAAETFSLNNLGRFADKLPSEPRENIVYQCWERFCQELGKQIPVAMTLEKNMPIGSGLGSSACSVVAALMAMNEHCGKPLNDTRLLALMGELEGRISGSIHYDNVAPCFLGGMQLMIEENDIISQQVPGFDEWLWVLAYPGIKVSTAEARAILPAQYRRQDCIAHGRHLAGFIHACYSRQPELAAKLMKDVIAEPYRERLLPGFRQARQAVAEIGAVASGISGSGPTLFALCDKPDTAQRVADWLGKNYLQNQEGFVHICRLDTAGARVLEN
+2	feature	gene	2800	3732	.	+	.	gene=thrB;locus_tag=ABC_00003
+##FASTA
+>2
+ACTGGCCGCCTAATAATAAAAACTCTAAAAGTTGTAATTTAAAATAGTTCTTTAAATTAT
+ATACCCACCACATTTGGTGGAGAACCAAAAATTAGCCGAAAAACATCATTTCTGAAGTTA
+TCGGCTAAAGTTATAAATTATATTTATTTGTACATGAACAAATAATTTACATTAATTTGT
+CATTTCTTCTTTTTCCCAATCGATTTTATATCTTTCTGAAGAACGATCTGTCCATTTATC
+TTTAGTATTGGTACCTTTCCAATTTGTTGAAGTCCAATGCAATTGGTAGTCATCACGAAC
+TCGTTCGTATATTACATCTATATTTGTTTGTTGTTTGGATGCTTTTCTATCCATAGTAAT
+AACTGTAGCGAAGTCTGGTGAAAACCCTGAAGATAATAGAGAACTTGCTTTGTTAGGATC
+AAGGAAGTTCTCTGCTGCTTTCATAGAACCATTTCTAGTTTTCATGAAAAGTTGATTGCC
+ATATACCGGGTTCCAAGAATCTCTATCATATGGTCCCCAATTTTGATTCACCATATTGTT
+AAATATCACTTTCCAGCCTACTTTTTTATCAGTTGGGCTCTCTAAAATTGTTTTGAAATC
+AGGTTGAACATATTTCAGTGTATGACCAATCGAAACATTTGCACCAATAAGGCCGCCAAT
+TTTTCCTGTATCATCACCAGTAACATTACCGTTGAATCCATAAGTTAAAGTACTCATATA
+CTCTTTTGTATCAATCGAATTTCTTGGATAGTAATCAGATATTTGAGCTACTTCATTATC
+AGGTAGTTGCAACTGTACCTTAAAGGCTGAAGGCCAGGCTAAACCACTTTTGTTAGCACC
+TTCTTCGCTATAAACTCTATATTGACCAGCAATGGTACCTTTCGTTCTAATAACTAGCAG
+TTTTTTATTGTGATTTTTATCATCGATAAAACTATAAAATACTTTTTTGTGCATGCCATT
+TTCTTTATCATAAGTGACTAAATCACCTGTTTTTACTGTAGTATTGCTTCCAATATCTGT
+AGTACCGGTTTTAATATTAATATCAGAATCTGCGGCATTAGCGACAGGATTCATTAATAT
+GGAACCTAGCAATAGTGTTGTTGTTACTGAGCTGACTATACGTGTTTTCATTTTCATCAT
+CCTTCTATTTTTTTTAACGATTTGAGGAAACAATAATCAATATGTCAATTTAGAATATTG
+CAGTTGAGACATATCAAATATTTATAATCGTTTATATTAGTATATTTAATGTAGCAACTG
+ATAAATTACTGAGTGATGATGAGTGATTATTTTAAGAATATGTTTTTAACTTTTATTTAA
+AATTTGAAAGGAAGCATTTCAATTTCGAGGGTTAGTCAAAGTTGAATAAATTCTTTATGA
+AACAAGGAAAAGACATAGCTAATTTTATTGATTAATTTCTTTAAAACTAATGATTTGTTT
+GATTTAAAAATGTAATCGATTACAATATAAAAATACAAATATCTTAGAATTAAATCAATT
+AATTAACTATTAAATAAAAATTAAATATATATTAACTACTGTAAATTAATAAATAGAAAT
+AGAGAAAAAGGGTATTAATTATGTTTGGAATTTCGTCGGGAATAATTTTTGCCATTACAT
+AGAAATATCTAATAGATAATGAAAAAGTATCGTATGTATTTTTAATATAGTGTAAAATAT
+CATATGTAAAATAAAATGTAGATTTTTAGTTAGAGGCATTATAAGAAAATTTTGAGTATA
+GGTTAGCTTTTAATTATGAATCTTATTGAAATTTGATTAATAAATATATGATAGGGGATT
+AAAATGAAACTATTTTATATCGTATTTCTTATTATTATATGGCTGAATATATTTTTAGGA
+AATGAAATTATCCATACACTGACTGTTTTAATAACAACATTGTATATTGTTAATTCAAGA
+AAGGGGATTAAAAATGACAGAGTTGAATAATATTATAAACTCTCTTCAATCTTTGTTTGA
+GTCTGAATCAGGCTATAAAATTTCGAAAAATTCAGGAGTTCCATATCAAACAGTACAAGA
+TTTAAGAAATGGGAAAACCAAACTAGAAGATGCTAGATTTAGAACGATTATTAAACTTTA
+TAGTTACTATGTCTCATTAAAAGAACATTAATCATGGGACAATAAAGTATTGCTATAATA
+ATTGAATCATTAAATGAAGGAATAGAGCCTAAGACATAATAAATCAATGTCTTAGGCTCT
+ACAATATTATATTGGTAGTAGTTGACCGAATGAAAATGACCATGTAACAAGCATTTTTGG
+GGCCCCAACACAGAAGCTGACGAAAAGTCAGCTTACAATAATGTGCAAGTTGGGGATGGG
+CCCCAACACAGAGAATTTCAAAAAGAAATTCTACAGACAATGCAAGTTGGCGGGGCCCCA
+ACATAGAGAAATTCAAAAAGAAATTCTACAGACAATGCAAGTTGGGGAGTGACAACGAAA
+TAAATTTTATAAAATACCATTTCTGTCACATTCCTACTCCCAATTCCATTTAAATATATA
+GAAAATTTCACCTATTATATACATCATACACTTTAAAAATTATCATTTAAATAATCTGAT
+TTGGAATTAAAGTGCATGAAGTATAAGTCACCTTCATATACTAATCAAAGAGGACGTCAA
+CAGTTATTTTATTAGGATTTTTAACATAAACATTTGCTAGATCTGAATGTAATCTTTTGC
+TTAAATCAATAGTGTAGTTATTACCGCCACCGGTGATCTTAAGCTTACCTTTATTACGAT
+TTTCGGTATATAATATTTTATTTTTTATTAACGCTTCTCGTGCACGGAAATCGATTTCTT
+TCAATGTTAATACTGGTTTATTGCCTTGGTATATTTTATGCGCACCAATAATCGTTTGTA
+GTTTATCTTTGTATTGCACAAAAAGATTATAAGTTTTATCAGAAGGTTTTGCGGCTGGTG
+TAACGCCACCTGTAAATGTCTCTCTATAAGACCACCATAACTGATCAGTATCTTTGTCTT
+TTAGTCCAAACACATCTACGTAACGATCTTTTAACTGATTAATATTTCCCCAACTTTCAG
+CGCCCCATAAAGATATATGTGCTGACCATGAATATCTCTTAAGTTCAACATAAATGTTTC
+CATTATCATATTGATATAGCCATTTATTTGAAAATGAAAAATGAGGCTGTGTGTAATATT
+TAATTAATTCATTGATGTTAGTTTCATCTTGACCAATGCTATAAGCTTTAGCTTCAGAGT
+AAAAACTAAATACTGGTTTTTGATTTTGAGGTAATACAGTACCTAATAGTAATAATGTTG
+TCGTTAAAATTATATTTTTCGTGATGTTCTTACTCATTAGAACATCTCCTTTCAGAGGAA
+TCATGATACGAGGAATAAGAAATTTAAATGTGAGCGAAGTCAATATAGTATTTGCGATTA
+TTTTTATTAACTCCATTATTATTGTTAGTTTGATTTTTCGAGGATAACTTCAATTTTTGC
+ATTTTGAGGTTTTTTAACATATCTATTTGCATCAGTTGATGGCAACCTTTTACTTAAATC
+TATTGTGTAGTTATTGTCTGTACCTGTTATTTTAATTTGTCCTTTATTATAAGAATTATT
+ATATAATTTTTTACTTTTAATTAATGTTTGACGAATACGAAAATCTAATTCTTTTAAAGT
+TAAAACAGGCTTATTGCCTTCATAAACTGGAAATCCGCCAGTAAACGTTTCTGCTTTATC
+TTTATATGTTACATTCAGTTTATAGTGTTTATCGTTAGATGTTGCTGCAGGAGTAACACC
+ACCAGTAAACGTTTCTTGAGATAATGCAAAAGAATCAATGGTTTCTTGGTCTTTTATGCC
+AAAAATATCAACGCTTTTATTTCTTAATTGGTTGATATTGCCCCAACTTTCAGGTCCATA
+AACTTGAATATGACTATACCAAGAAAACTGTAACAACGTTGCATGAATCGTACCGTTATC
+TTTTTGCCATAACGTACTGTTAGAGAAGGTTAAATATTTTTGCGAGTAATATTTAGTTAA
+CTCATTAACGTTAGTTTCGTTTTGATTTATATAATAAGCTTTCGCTTCAGATGAAGAATT
+GATAGGTGTATTAGGAAATTGTGTAAATGCTGTACCTAATAGTAACAATGTTGTTGATAA
+AATAATTTTTTTCGTGATGTTATTGTTCATTCGAATTTCTCCTTTGAGTATTGTTGGAAT
+GTTTAATTATAAAAAGGGGTTAATTAGATAATTGAAATTATCCGCATTTACAAAAGGTAA
+TAGGTTAGTTAGATTTTTCGAGTATGACTTCAATTTGTGCATTTTTAGGATTTTTAACAT
+AACGGTTTGTGTCAGTTAATTTTAACTTTTTACTTAAATCAATCGTGTAATTATTTCCAT
+CAGCAGTTATCTTAATTTGACCTTTATTAAATTCTCCGTTATATAACTTTTTATTCTTTA
+TTAATGTTTGACGAATACGGAAATCTAATTCTTTTAAAGTTAATACTGGTTTATTTCCTT
+TGTAAAATTCATGTCCACCGATGATAGTTTGTTGTTTATCACTATATTTTAAAAATAGTC
+TATAAGGTTTATCAGATGAAGTAGCTGCTGGCGTAACACCACCAGTAAATGTTTCATCAT
+AAGTCCAGTAACCTTCAACTGTGTCCTCATCTTTAGTTCCAAATATATCAACGTATTTAT
+TTCTTAACTGATTAATGTTTCCCCAACTCTCGGATCCAAACACTTGAATATGACTATACC
+AAACCCACGTTTGCAATGTTGCATGAATGCTACCATTGGGCTTTTGCCATAGCCATTTTC
+CAGATAATGAAAAATGCGGCTGAGTGTAATATTTGATTAACTCATTGATATTAGTCTCGT
+TTTCACTGATATTATAGGCTTTTGCTTCAGATGAAAAACTGATTGGTGTTTTAGGAAGTT
+GTGTTGATGTGGTTCCTAAAAGTAACAATGCTGTTGATAAAACTAATTTATTCATGATGT
+TCTTTTTCATATGAAAATCTCCTTTGCGTGAATTACCCAAAGTATATAAGCTATTACACC
+GATTCGGAATTAAATAAAAGCTAAAACTATGTTAAATAAACTTAAACAGTTAGTAGTGTT
+ATTTAAGCAAAACTTATCATTTTTAAGTTTTGGACAGAAACAGTACTTAATAAAGTAGGC
+GGGAGTTATATATTAAAAACGACACGTTACAATTATTCTCTAATCAATTGCATTAAATTG
+TTTGATAATTGAATTTTCTAACTACTTGAAAAATAGTTATACTTTAAATGTAGTACTTAT
+TTTAATTATTTCCTACTACTTAAATTTAATATTAATAAAATGTTCATTTAATTATTGATA
+AAATATTACAAATTTTAATAGTAGGTTGTGTTTATTTTGTATGCGCTTACAATTTAGGTG
+TAACTAAAATAAAAGGAGTTGTTATTAATGAAAAATTTACGAAACAGAAGTTTTTTAACT
+TTATTAGACTTTTCACGACAAGAGGTAGAATTCTTATTAACACTCTCCGAGGATTTAAAA
+CGTGCTAAATATATTGGCACTGAAAAGCCTATGTTAAAAAATAAAAATATTGCACTGTTA
+TTTGAAAAAGATTCTACAAGAACGCGATGTGCATTTGAAGTTGCAGCGCATGATCAAGGT
+GCAAATGTAACTTATTTAGGCCCAACTGGATCACAAATGGGTAAAAAAGAAACAACTAAA
+GATACTGCACGTGTGCTTGGTGGAATGTATGATGGCATTGAATACCGTGGTTTTTCACAA
+AGAACAGTAGAAACTTTAGCTGAAAATTCAGGCGTACCAGTGTGGAATGGTTTAACTGAT
+GAAGATCATCCTACTCAAGTTCTTGCTGATTTCTTAACAGCAAAAGAAGTCTTAAAAAAA
+GATTATGCAGATATTAACTTTACATATGTTGGAGATGGTCGTAATAACGTTGCAAATGCA
+TTAATGCAAGGTGCTGCCATTATGGGTATGAACTTCCATTTAGTTTGTCCAAAAGAATTA
+AATCCAACAGATGAATTATTAAATCGCTGTAAAAATATTGCCGCTGAAAATGGTGGCAAC
+ATATTAATCACAGATGATATTGACCAAGGTGTAAAAGGTTCGGATGTAATTTACACTGAT
+GTTTGGGTATCAATGGGTGAACCTGATGAAGTATGGAAAGAACGACTTGAATTATTGAAA
+CCATATCAAGTAAATAAAGAAATGATGGATAAAACTGGTAATCCAAATGTTATTTTTGAG
+CATTGCTTACCATCTTTCCATAATGCTGATACGAAAATTGGTCAACAAATTTTTGAAAAA
+TATGGTATTCGAGAAATGGAAGTTACAGATGAAGTATTCGAAAGTAAAGCTTCAGTTGTA
+TTCCAAGAAGCTGAGAACAGAATGCATACAATCAAAGCAGTCATGGTTGCTACATTGGGT
+GAATTTTAAATGATATAAGGAAGTGAATATGATGGCGAAAATAGTAGTAGCATTAGGTGG
+TAATGCTTTAGGAAAATCACCTCAAGAACAACTCGAGCTTGTTAAAAATACTGCGAAATC
+ATTAGTAGGATTAATAACAAAAGGACATGAGATTGTTATTAGTCATGGTAATGGACCACA
+GGTTGGAAGCATTAATTTGGGACTTAACTATGCTGCAGAACATAACCAAGGTCCGGCATT
+TCCATTTGCTGAATGTGGCGCAATGAGTCAAGCTTACATCGGCTATCAATTACAAGAAAG
+CTTACAAAATGAATTGCATTCTATTGGAATGGATAAACAAGTGGTAACACTAGTGACACA
+AGTTGAAGTTGATGAAAATGATCCGGCATTTAACAATCCTTCAAAACCAATTGGGTTATT
+TTACAACAAAGAAGAAGCTGAACAAATTCAAAAAGAAAAAGGATTTATATTTGTTGAAGA
+TGCTGGAAGAGGATATAGACGCGTTGTTCCTTCACCACAACCCATCTCTATTATTGAATT
+AGAGAGTATTAAAACACTTATTAAAAATGATACACTCGTTATTGCTGCTGGTGGTGGAGG
+TATACCAGTAATTAGAGAGCAACATGATGGTTTTAAAGGTATTGATGCAGTTATAGACAA
+AGATAAAACAAGTGCGTTGTTGGGTGCTAATATTCAATGCGATCAATTGATTATTTTAAC
+AGCAATTGATTATGTATATATTAATTTTAACACTGAAAACCAACAGCCTTTGAAAACAAC
+AAATGTTGATGAATTAAAACGATATATAGACGAAAATCAATTTGCAAAAGGAAGTATGTT
+ACCAAAAATTGAAGCAGCCATATCATTTATTGAAAACAATCCAAAAGGAAGTGTGCTTAT
+AACATCATTAAATGAATTAGATGCTGCCTTAGAGGGTAAAGTAGGTACTGTGATTAAAAA
+GTAATTGAATTGAAACGCTTTTCAATTACTATATGTCAAATGCATGATTTTTATTATTTA
+TGTGCACCCCCTGAAAATAATGCCTCTATTTTGATGCGGGGTGCACTTTCTTAATTTATA
+TTTATAAAATCTTTAAGGTAGAAATGCTAGGTTAAATGTCGAAGGAGATGAAACCGTGGA
+AAATACAATTAATGAAAGTGAAAAGAAAAAACGATTTAAATTAAAAATGCCAGGTGCATT
+TATGATTTTATTCATTTTAACGGTTGTTGCAGTTATAGCAACATGGGTTATTCCTGCTGG
+TGCATATTCTAAACTTTCTTACGAACCTTCATCCCAAGAACTAAAGATAGTTAACCCTCA
+TAACCAAGTGAAAAAGGTTCCGGGTACGCAACAGGAACTAGACAAAATGGGGGTTAAAAT
+TAAGATTGAACAATTTAAATCAGGTGCAATTAATAAGCCAGTATCAATTCCGAATACTTA
+TGAAAGATTAAAGCAACATCCAGCTGGACCAGAACAAATAACAAGTAGCATGGTTGAAGG
+TACGATAGAAGCGGTCGATATCATGGTATTCATTCTTGTACTAGGGGGACTTATTGGCGT
+AGTTCAAGCCAGTGGTTCTTTTGAATCGGGATTGTTAGCTTTAACGAAGAAAACAAAAGG
+GCATGAATTTATGCTAATTGTGTTTGTATCAATACTAATGATTATCGGCGGGACGTTATG
+TGGTATTGAAGAAGAAGCTGTAGCATTCTATCCGATTTTAGTCCCTATATTTATAGCGTT
+AGGATACGATTCTATCGTTTCAGTTGGTGCCATATTCCTTGCCAGCTCTGTCGGTAGTAC
+ATTTTCAACTATTAACCCGTTCTCGGTTGTAATTGCCTCTAATGCCGCTGGTACAACTTT
+TACGGATGGCTTGTATTGGAGAATAGGTGCTTGTATTGTCGGTGCGATTTTTGTTATTAG
+TTATTTATATTGGTACTGTAAAAAAATTAAAAACGATCCTAAAGCGTCATATTCTTATGA
+AGACAAAGATGCTTTTGAACAGCAATGGTCTGTATTAAAAGATGACGATAGTGCCCATTT
+TACTTTGCGTAAGAAGATAATCCTTACATTATTTGTACTACCATTTCCAATTATGGTATG
+GGGAGTTATGACGCAAGGTTGGTGGTTCCCAGTTATGGCTTCAGCATTTTTAATATTTAC
+AATTATAATAATGTTTATTGCTGGGACAGGTAAATCTGGATTGGGAGAAAAAGGAACTGT
+AGATGCATTTGTCAATGGTGCATCAAGTTTAGTAGGTGTATCTTTAATTATTGGTTTAGC
+TCGAGGTATTAATTTAGTGTTGAATGAAGGTATGATTTCAGATACAATCTTACACTTTTC
+ATCATCTTTAGTTCAACATATGAGTGGACCATTATTTATCATCGTATTACTATTTATTTT
+CTTCTGTTTAGGTTTTATCGTGCCATCATCTTCTGGATTAGCGGTATTATCAATGCCTAT
+CTTTGCACCACTAGCTGATACAGTAGGTATACCAAGATTCGTCATCGTTACGACATATCA
+ATTTGGTCAATATGCGATGTTATTCTTAGCGCCGACTGGACTTGTTATGGCCACACTACA
+AATGTTAAACATGCGATATTCACATTGGTTCCGATTTGTATGGCCGGTAGTTGCTTTTGT
+ATTGATTTTCGGTGGCGGAGTACTAATTACGCAAGTACTAATTTATTCATAATTTGAAAT
+GCTATATTATAAAAATACTAATTGGGTTTTATGCATCTCGTAGGTTTGTAGAAATACTAA
+ACTAAGCGAGGTGCATTATTATTTTTGATTAAGAAAATAATGACGGTAATGATAACACTA
+GTAAGTAATTGATACAATGCTCTATTTAATAATGATATTTTTAAAATTTGTTTTTAATGT
+AATGTTAGATCTATGGTATATTATATTTATCGTGGTAAATATGTATTTGCTGTAAACAGT
+TTTTGTAACACATGACACATATGGTTGTGAAAATAGCATATATAAAGGATGGCTATAAAT
+GACACATTTGACAAAGGTTTTAGATACACTAACTGGAATATGCGTAGTATTATTATTTAG
+TAAATATTTTGTGGCGTATGCAAATATGGTGTTTGATTGGAATTTAAGATGGTATTTGCT
+AGAAAACATACCACATTTGCCAATTATATTATTTATTCTGATGTTTATTTTCGGAGTACC
+TTCTGAAATGATAAAAGATAGGCAAAGGAAAAATAACGGTGTTTAATTTATCGATATTTA
+GAGGTGATAAAATTTGTCAACTATTACAAAGACTAAAATAAAAATCTTTATTAATTATTT
+TATTGGTATATTAGCCTTGGCGTTTGCTGCTTATATATTATTTAAATAAAGGTTAGAGTG
+AAACGTGTTTATGAACTAGACGTTCTAGTATAGTTACTACAGCTTTAGTCAGAAGGTATC
+ATTGATAAGATCATATTAAATCAAAGAGGCATTGATATACACTAAAAAGAGGCAAGATTA
+CCTGCCTCTTTTTTAGTTATTAAATATGCGTGTTAATTCTTGGTAATAGTGTTACTGAGA
+CGCATTACGAAGAAATTTATCTTGATTTTCCTTTTTTAAAAAGAAGTGAAGATATCCTAT
+AAAGACTCTAAGTACTATTACAGTGGCTAATAACACTTCGATAAACAAAAGACCTTTCCA
+AATATCTGGAAACATAAGTACAGGCAAACTGTTCTTTAAAGCAGTTGCTGAGATTACTAA
+AGGGAATGTGAAAGCTGAAAATACGGGCGAAAAAGGTTCTTTTAGTAATTTAGGCAATTG
+TATAATGATATAAAAATAAAATATTTGAGCTAATATTAAAAATATAATTACGATAAAAGC
+ATTCGCCTTAGGAAAGGCTATAACATATGCTGCAGCGACTAAAGAAAATGGTGCACAAAT
+TGTCGATGTGTTCGGTTTGATTGACGTCTGCAATGGAAATGCTTTTAATCGCTTGAAAAC
+AATAGGTAAGACAATACAAGTAGCTACAAAGCCATATATTACTGTTAATTGACCTATGAA
+AAAACATCCGCTAACGGGTGCCGTCAATCCTGCGATAGCAATACCAATAAAAAGTACAGT
+CCACGAAGGATAAACATTTTCAAGTGAAAAATCTTTTAAATATTTTATTGAAAAAATAAT
+CATATGCGTCATAATTCCCACAAGGCATAAAATCCAAATAGGCGTTATTAAGCTATTGAT
+AAAAGTTATGTTACTAAAAAATGTATTTAAATAAGTAGTACCTAAAAAGCCAGACATGAA
+AAATGTTGTGAACACTGATGAAACTAGAGGACTGTTTAATTGTTCTTTCACATTATTAAA
+ATATTTGATCATAGTACATAAAAGGTGAATCCAAATCAAGAAAGCAAAGATTCCGCAAAC
+AGCGTTTAAAGTAAGAGATAAGTCTTTTAATAGATTACCCAGGCCTAATAATCCTAAGAC
+TAGTCCTGACGTTACTAGAGGTGCTTTTTGAAGTCTCATGATTTACAACCTTTCTTATGT
+GATTTTCTTCACTAATTATATCATGATAGCTATGGCCAATTAATAAGAAGAGTGTGTACT
+ATTACGTTATTAGAGTATGTATATTGGATTGATAGACACATAATTGACATTTAAATCTCA
+AAATTAATGATATTTGAGGTATGCTTTGCAACCTAAATATTGGAATATGTGGAAAAGTAA
+TTATTTAATTTAAATGATGATTGAAGATTTACAAGGGGGTGTACAAAATGAGAAATCAAA
+TTCAAAAACTATTAGACAGTGATTTGAGCAGTTTACATATATCGAAACAAACAGGAGTTC
+CACAAAGCACAATACACAGAATGAGAAAAAAAGAAAGATCATTAGACAATATGTCATTGA
+AAAACGCTGAACTACTTTATAAATTTGCCAATAGTATATTTAGCAATGAAAATTAAGAAG
+AATAATTAGTGAGTTAAATAATTAAAAAACCACGTCTCGATGGACGTGGTTATTTTTTTA
+AATGTATTTAATTATAGGATTTCGGGCCCCTGAAAAGTCCCTAAAATTTTGAATAGTATC
+AAAATCTATAGGAATAATATAATAATAAAAATCTCTACGCATGCAAATGGTGAGTGTTAA
+AAATCTTGGTGTAGTGTTGGTGTAGTTCAGGTGCAGTATAGATGAAATTTGAGCACAAAA
+TTACTTAATTAAAAATGTATAAACAAGTGTCAGACGCGTGTCAATTGTGTGTCAAAAAAT
+TCTATGACGAATAAGCATGCTTGTTTTAATTACTGTATTTTCGGGGCTTATGCGGGGGAC
+GTTTTGGTGACGCATCATACTATATTACTGAAATTCAAAACAAAAAGAGCCCCGTAATCA
+CGGAACTCTTTTGTTTGGTAATGCGTATAAAAATACCTATAAACGTCCTGGGAGGGATTC
+GAACCCCCGACCGATGGCTTAGAAGGCCATTGCTCTATCCAGCTGAGCTACCAGGACACG
+TTTAACAACACAAGAATTATTATATCTAAATGAACTTAAATTAGCAATACCTTGTAAATA
+AAAAATGTTTATATTTTTCACTATTATAGAGCTATTTATCTAAAAAGGTTCAATAAGACT
+TAAATGCGAATTCAGGCAACTTAATTGTGTTAAATACAGTTTTGAATGCCTAACTGTATT
+TCTTTTCTCTTTAAAATACAGTTAAGTACATTATAAGATGTTGTGCGGATAAACAAACTA
+ATTGCATCAAATTTATTTTAAAATAACAACAACAAAACGTTAAGCGAATAACATTTCGGT
+GATTTAAAAGCTACGCACGTTTTTGTTATCTTCAAATTTAAATTTTAAGGAGTGTTTTCA
+ATGGAAGGTTTATTTAACGCAATTAAAGATACCGTAACTGCAGCAATTAATAATGATGGC
+GCAAAATTAGGCACAAGCATTGTGAGCATCGTTGAAAATGGCGTAGGTTTATTAGGTAAA
+TTATTCGGATTCTAATTTCAATATGTTATGTAAGTAATCAGTATTATTTCAAAGGTGAGG
+GAGAGATTTAAATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAAC
+AACATGATAGTGTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTT
+TACTAGGTAAATTATTTGGATTCTAATATAATAACTAATATTCTTTAAAATAAACTGGGT
+GAGCATACTTTAATGTTATGCACTCAGTTTATTTTATTTGCAGAAATTTGAGCCTCTGTT
\ No newline at end of file
diff --git a/t/data/reorder_isolates.tre b/t/data/reorder_isolates.tre
new file mode 100644
index 0000000..0119bd7
--- /dev/null
+++ b/t/data/reorder_isolates.tre
@@ -0,0 +1 @@
+(query_1:6.0,(query_3:5.0,query_4:3.0):5.0,query_2:11.0);
\ No newline at end of file
diff --git a/t/data/reorder_isolates_expected_output.csv b/t/data/reorder_isolates_expected_output.csv
new file mode 100644
index 0000000..57e20f7
--- /dev/null
+++ b/t/data/reorder_isolates_expected_output.csv
@@ -0,0 +1,8 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","query_1","query_2","query_3"
+"hly","","Alpha-toxin","3","3","1","","","","","","1_1","2_1","3_1"
+"argF","","Ornithine carbamoyltransferase","2","2","1","","","","","","1_3","","3_3"
+"group_4","","","2","2","1","","","","","","","2_4","3_4"
+"speH","","hypothetical protein","2","2","1","","","","","","1_2","2_2",""
+"group_7","","Gonococcal growth inhibitor III","1","1","1","","","","","","","2_7",""
+"yfnB","","Putative HAD-hydrolase yfnB","1","1","1","","","","","","","","3_5"
+"group_6","","Gonococcal growth inhibitor III","1","1","1","","","","","","1_6","",""
diff --git a/t/data/reorder_isolates_expected_output_breadth_alpha.csv b/t/data/reorder_isolates_expected_output_breadth_alpha.csv
new file mode 100644
index 0000000..57e20f7
--- /dev/null
+++ b/t/data/reorder_isolates_expected_output_breadth_alpha.csv
@@ -0,0 +1,8 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","query_1","query_2","query_3"
+"hly","","Alpha-toxin","3","3","1","","","","","","1_1","2_1","3_1"
+"argF","","Ornithine carbamoyltransferase","2","2","1","","","","","","1_3","","3_3"
+"group_4","","","2","2","1","","","","","","","2_4","3_4"
+"speH","","hypothetical protein","2","2","1","","","","","","1_2","2_2",""
+"group_7","","Gonococcal growth inhibitor III","1","1","1","","","","","","","2_7",""
+"yfnB","","Putative HAD-hydrolase yfnB","1","1","1","","","","","","","","3_5"
+"group_6","","Gonococcal growth inhibitor III","1","1","1","","","","","","1_6","",""
diff --git a/t/data/reorder_isolates_expected_output_breadth_creation.csv b/t/data/reorder_isolates_expected_output_breadth_creation.csv
new file mode 100644
index 0000000..57e20f7
--- /dev/null
+++ b/t/data/reorder_isolates_expected_output_breadth_creation.csv
@@ -0,0 +1,8 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","query_1","query_2","query_3"
+"hly","","Alpha-toxin","3","3","1","","","","","","1_1","2_1","3_1"
+"argF","","Ornithine carbamoyltransferase","2","2","1","","","","","","1_3","","3_3"
+"group_4","","","2","2","1","","","","","","","2_4","3_4"
+"speH","","hypothetical protein","2","2","1","","","","","","1_2","2_2",""
+"group_7","","Gonococcal growth inhibitor III","1","1","1","","","","","","","2_7",""
+"yfnB","","Putative HAD-hydrolase yfnB","1","1","1","","","","","","","","3_5"
+"group_6","","Gonococcal growth inhibitor III","1","1","1","","","","","","1_6","",""
diff --git a/t/data/reorder_isolates_expected_output_breadth_height.csv b/t/data/reorder_isolates_expected_output_breadth_height.csv
new file mode 100644
index 0000000..57e20f7
--- /dev/null
+++ b/t/data/reorder_isolates_expected_output_breadth_height.csv
@@ -0,0 +1,8 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","query_1","query_2","query_3"
+"hly","","Alpha-toxin","3","3","1","","","","","","1_1","2_1","3_1"
+"argF","","Ornithine carbamoyltransferase","2","2","1","","","","","","1_3","","3_3"
+"group_4","","","2","2","1","","","","","","","2_4","3_4"
+"speH","","hypothetical protein","2","2","1","","","","","","1_2","2_2",""
+"group_7","","Gonococcal growth inhibitor III","1","1","1","","","","","","","2_7",""
+"yfnB","","Putative HAD-hydrolase yfnB","1","1","1","","","","","","","","3_5"
+"group_6","","Gonococcal growth inhibitor III","1","1","1","","","","","","1_6","",""
diff --git a/t/data/reorder_isolates_expected_output_breadth_revalpha.csv b/t/data/reorder_isolates_expected_output_breadth_revalpha.csv
new file mode 100644
index 0000000..57e20f7
--- /dev/null
+++ b/t/data/reorder_isolates_expected_output_breadth_revalpha.csv
@@ -0,0 +1,8 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","query_1","query_2","query_3"
+"hly","","Alpha-toxin","3","3","1","","","","","","1_1","2_1","3_1"
+"argF","","Ornithine carbamoyltransferase","2","2","1","","","","","","1_3","","3_3"
+"group_4","","","2","2","1","","","","","","","2_4","3_4"
+"speH","","hypothetical protein","2","2","1","","","","","","1_2","2_2",""
+"group_7","","Gonococcal growth inhibitor III","1","1","1","","","","","","","2_7",""
+"yfnB","","Putative HAD-hydrolase yfnB","1","1","1","","","","","","","","3_5"
+"group_6","","Gonococcal growth inhibitor III","1","1","1","","","","","","1_6","",""
diff --git a/t/data/reorder_isolates_expected_output_depth_alpha.csv b/t/data/reorder_isolates_expected_output_depth_alpha.csv
new file mode 100644
index 0000000..57e20f7
--- /dev/null
+++ b/t/data/reorder_isolates_expected_output_depth_alpha.csv
@@ -0,0 +1,8 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","query_1","query_2","query_3"
+"hly","","Alpha-toxin","3","3","1","","","","","","1_1","2_1","3_1"
+"argF","","Ornithine carbamoyltransferase","2","2","1","","","","","","1_3","","3_3"
+"group_4","","","2","2","1","","","","","","","2_4","3_4"
+"speH","","hypothetical protein","2","2","1","","","","","","1_2","2_2",""
+"group_7","","Gonococcal growth inhibitor III","1","1","1","","","","","","","2_7",""
+"yfnB","","Putative HAD-hydrolase yfnB","1","1","1","","","","","","","","3_5"
+"group_6","","Gonococcal growth inhibitor III","1","1","1","","","","","","1_6","",""
diff --git a/t/data/reorder_isolates_expected_output_depth_creation.csv b/t/data/reorder_isolates_expected_output_depth_creation.csv
new file mode 100644
index 0000000..57e20f7
--- /dev/null
+++ b/t/data/reorder_isolates_expected_output_depth_creation.csv
@@ -0,0 +1,8 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","query_1","query_2","query_3"
+"hly","","Alpha-toxin","3","3","1","","","","","","1_1","2_1","3_1"
+"argF","","Ornithine carbamoyltransferase","2","2","1","","","","","","1_3","","3_3"
+"group_4","","","2","2","1","","","","","","","2_4","3_4"
+"speH","","hypothetical protein","2","2","1","","","","","","1_2","2_2",""
+"group_7","","Gonococcal growth inhibitor III","1","1","1","","","","","","","2_7",""
+"yfnB","","Putative HAD-hydrolase yfnB","1","1","1","","","","","","","","3_5"
+"group_6","","Gonococcal growth inhibitor III","1","1","1","","","","","","1_6","",""
diff --git a/t/data/reorder_isolates_expected_output_depth_height.csv b/t/data/reorder_isolates_expected_output_depth_height.csv
new file mode 100644
index 0000000..57e20f7
--- /dev/null
+++ b/t/data/reorder_isolates_expected_output_depth_height.csv
@@ -0,0 +1,8 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","query_1","query_2","query_3"
+"hly","","Alpha-toxin","3","3","1","","","","","","1_1","2_1","3_1"
+"argF","","Ornithine carbamoyltransferase","2","2","1","","","","","","1_3","","3_3"
+"group_4","","","2","2","1","","","","","","","2_4","3_4"
+"speH","","hypothetical protein","2","2","1","","","","","","1_2","2_2",""
+"group_7","","Gonococcal growth inhibitor III","1","1","1","","","","","","","2_7",""
+"yfnB","","Putative HAD-hydrolase yfnB","1","1","1","","","","","","","","3_5"
+"group_6","","Gonococcal growth inhibitor III","1","1","1","","","","","","1_6","",""
diff --git a/t/data/reorder_isolates_expected_output_depth_revalpha.csv b/t/data/reorder_isolates_expected_output_depth_revalpha.csv
new file mode 100644
index 0000000..57e20f7
--- /dev/null
+++ b/t/data/reorder_isolates_expected_output_depth_revalpha.csv
@@ -0,0 +1,8 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","query_1","query_2","query_3"
+"hly","","Alpha-toxin","3","3","1","","","","","","1_1","2_1","3_1"
+"argF","","Ornithine carbamoyltransferase","2","2","1","","","","","","1_3","","3_3"
+"group_4","","","2","2","1","","","","","","","2_4","3_4"
+"speH","","hypothetical protein","2","2","1","","","","","","1_2","2_2",""
+"group_7","","Gonococcal growth inhibitor III","1","1","1","","","","","","","2_7",""
+"yfnB","","Putative HAD-hydrolase yfnB","1","1","1","","","","","","","","3_5"
+"group_6","","Gonococcal growth inhibitor III","1","1","1","","","","","","1_6","",""
diff --git a/t/data/reorder_isolates_input.csv b/t/data/reorder_isolates_input.csv
new file mode 100644
index 0000000..4747e68
--- /dev/null
+++ b/t/data/reorder_isolates_input.csv
@@ -0,0 +1,8 @@
+"Gene","Non-unique Gene name","Annotation","No. isolates","No. sequences","Avg sequences per isolate","Genome Fragment","Order within Fragment","Accessory Fragment","Accessory Order with Fragment","QC","query_1","query_2","query_3"
+"hly","","Alpha-toxin","3","3","1","","","","","","1_1","2_1","3_1"
+"argF","","Ornithine carbamoyltransferase","2","2","1","","","","","","1_3","","3_3"
+"group_4","","","2","2","1","","","","","","","2_4","3_4"
+"speH","","hypothetical protein","2","2","1","","","","","","1_2","2_2",""
+"group_7","","Gonococcal growth inhibitor III","1","1","1","","","","","","","2_7",""
+"yfnB","","Putative HAD-hydrolase yfnB","1","1","1","","","","","","","","3_5"
+"group_6","","Gonococcal growth inhibitor III","1","1","1","","","","","","1_6","",""
\ No newline at end of file
diff --git a/t/data/sequences_with_unknowns.faa b/t/data/sequences_with_unknowns.faa
new file mode 100644
index 0000000..9169433
--- /dev/null
+++ b/t/data/sequences_with_unknowns.faa
@@ -0,0 +1,26 @@
+>1234_00186 GGDEF family protein
+MNFENEYDLEKLVNNSLDLLTIVDLSGNVLLVNPAFERTLGWKKEDLVGKDPFHLLHPED
+KESTYKEFEKLNQGLLTLSFQNRYICADGQYRYFSWTASPDLVSGLVYVTGRDITDVIES
+NRKISQLAVKLKETNDRLFEQASTDPLTKLKNRRMFNEELNNLIHACDKESHPLSLLMID
+ADHFKDYNDKFGHIAGDKVLVELASILTKTFRKKDVLARYGGEEFIAALPNTSEPEANQI
+AERLVQTVREFSWEKRSVTISVGITTYNFNPTSKSINSEYLLNLIEQADKALYCSKVSGR
+XXXXXXXXXXXXXXXXXXXYYI*
+>1234_00325 only a few unknowns so keep
+MAFDPSVPQQQAQAPAGTLLFPEGSSANTLNVLHSGTVRYLTEVPGGRKLELFKLNGANL
+TPGSVALFTSGRYPFHLQAEEACVISTYAMNRDTISKSVGSRVSLGLMVARTLLREITEL
+FKKSNQIRKITSEIEKVNDNLSILYYQFNPSVFPDIKPGSPIPEVSADVVDPVMRLCREN
+LKLFFDNGGILPDRPSPQFLEEEHESQLTRLYPEEIDFQDGEFNFIRKLVMQDPKILNVL
+FTADPSMLAYVCSKLANVLDQISGILKTCLTDLDEAFRIFFIGENSLVEKFYLILDITSS
+GYGTAPAEFVIPVLGAFAGKIEKYKNGHQALFGVPVANISPNTQAFQSKAVTLAKKMEET
+APKVQAPVTSSATAGVDVDAIRKELDNSASVIIQFSGLGAEQIKEFSALMVKVKSLKNPL
+DPEGDNRKVRRTLGRHYWDMYQECFTKYMNSNRNVPKPVELMLKYGYFDETLVDDSQIAF
+MYTQKDPANFTSNVPISLGTEWLEKVFKREVPTSLDEMGQNFFEKVKLENRNIVIKKESD
+IPPELDNPDTRLKFEFASLYEANVRLTSGSPATHFPILTKFHSQMAIDKSYVSKKILEEV
+VHELMAVDYSIFHREVIYNNNELGITKEFIQKCVIPDFILVPSIGTKVMMWQDLSIHRGA
+GSKESPGRIVLPIFAQGDLKTMVADALAAFRWELTKSILGAEWNNVGNPSITADYTDYIQ
+FFKKNKDLSMEIKEKLASDFKRFRNDRDIFANDYQLWMKYEADGVQRLNKVVRGIFYRHI
+PFSKQVRDKVAXXXXXXXXXXXXXLLPLQRSITDLLILEIENILK*
+>1234_00377 Uncharacterized protein conserved in bacteria
+MTLNEFAKNVLFGSGLEDKLFSPPVHPVDIRSFDFLNVPSLPAREKKIQISEQKSKIPRL
+EQLFNEENRIITLHHFANHELMAIELFAWAILKFQDAPSSIRFGLYRTLLEEQTHLKMYL
+SEMKKGGMELGDRPLNFIFWKQVPKMQTLEKFXXXXXXXXXXXXXXXXXF*
diff --git a/t/data/shred1.gff b/t/data/shred1.gff
new file mode 100644
index 0000000..1df7f57
--- /dev/null
+++ b/t/data/shred1.gff
@@ -0,0 +1,9 @@
+##gff-version 3
+##sequence-region shredA 1 40
+##sequence-region shredB 1 40
+shredA	Prodigal:2.60	CDS	179	1471	.	-	0	ID=11983_1#21_00001;eC_number=2.7.13.3;gene=regB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_005150533.1,similar to AA sequence:UniProtKB:Q3J6C1,protein motif:CLUSTERS:PRK11360,protein motif:Cdd:COG4191,protein motif:TIGRFAMs:TIGR02966,protein motif:Pfam:PF02518.20;locus_tag=11983_1#21_00001;product=integral membrane sensor signal transduction histidine kinase,Sensor histidine kinase regB,sensory histidine kinase A [...]
+##FASTA
+>shredA
+AAAAAAAAAACCCCCCCCCCGGGGGGGGGGTTTTTTTTTT
+>shredB
+AAAAAAAAAACCCCCCCCCCGGGGGGGGGGTTTTTTTTTT
diff --git a/t/data/shred1.shred.fa b/t/data/shred1.shred.fa
new file mode 100644
index 0000000..6cc6a07
--- /dev/null
+++ b/t/data/shred1.shred.fa
@@ -0,0 +1,16 @@
+>shred1_1
+AAAAAAAAAA
+>shred1_2
+CCCCCCCCCC
+>shred1_3
+GGGGGGGGGG
+>shred1_4
+TTTTTTTTTT
+>shred1_5
+AAAAAAAAAA
+>shred1_6
+CCCCCCCCCC
+>shred1_7
+GGGGGGGGGG
+>shred1_8
+TTTTTTTTTT
diff --git a/t/data/shred2.gff b/t/data/shred2.gff
new file mode 100644
index 0000000..142a41e
--- /dev/null
+++ b/t/data/shred2.gff
@@ -0,0 +1,9 @@
+##gff-version 3
+##sequence-region shredA 1 40
+##sequence-region shredB 1 40
+shredA	Prodigal:2.60	CDS	179	1471	.	-	0	ID=11983_1#21_00001;eC_number=2.7.13.3;gene=regB;inference=ab initio prediction:Prodigal:2.60,similar to AA sequence:RefSeq:YP_005150533.1,similar to AA sequence:UniProtKB:Q3J6C1,protein motif:CLUSTERS:PRK11360,protein motif:Cdd:COG4191,protein motif:TIGRFAMs:TIGR02966,protein motif:Pfam:PF02518.20;locus_tag=11983_1#21_00001;product=integral membrane sensor signal transduction histidine kinase,Sensor histidine kinase regB,sensory histidine kinase A [...]
+##FASTA
+>shredA
+AAAAAAAAAAGGGGGGGGGGTTTTTTTTTTCCCCCCCCCC
+>shredB
+AAAAAAAAAACCCCCCCCCCTTTTTTTTTTGGGGGGGGGG
diff --git a/t/data/shred2.shred.fa b/t/data/shred2.shred.fa
new file mode 100644
index 0000000..1d474dd
--- /dev/null
+++ b/t/data/shred2.shred.fa
@@ -0,0 +1,16 @@
+>shred2_1
+AAAAAAAAAA
+>shred2_2
+GGGGGGGGGG
+>shred2_3
+TTTTTTTTTT
+>shred2_4
+CCCCCCCCCC
+>shred2_5
+AAAAAAAAAA
+>shred2_6
+CCCCCCCCCC
+>shred2_7
+TTTTTTTTTT
+>shred2_8
+GGGGGGGGGG
diff --git a/t/data/sopB.fa.aln b/t/data/sopB.fa.aln
new file mode 100644
index 0000000..b73d36b
--- /dev/null
+++ b/t/data/sopB.fa.aln
@@ -0,0 +1,60 @@
+>11111_1#11_04059
+ATGCAAATACAGAGCTTCTATCACTCAGCTTCACTAAAAACCCAGGAGGCTTTTAAAAGC
+CTACAAAAAACCTTATACAACGGAATGCAGATTCTCTCAGGCCAGGGCAAAGCGCCGGCT
+AAAGCGCCCGACGCTCGCCCGGAAATTATTGTCCTGCGAGAACCTGGCGCGACATGGGGG
+AATTATCTACAGCATCAGAAGACGTCTAACCACTCGCTGCATAACCTCTATAACTTACAG
+CGCGATCTTCTTACCGTCGCGGCAACCGTTCTGGGTAAACAAGACCCGGTTCTAACGTCA
+ATGGCAAACCAAATGGAGTTAGCCAAAGTTAAAGCGGACCGGCCAGCAACAAAACAAGAA
+GAAGCTGCGGCAAAAGCATTGAAGAAAAATCTTATCGAACTTATTGCAGCACGCACTCAG
+CAGCAAAATGGCTTACCTGCAAAAGAAGCTCATCGCTTTGCGGCAGTAGCGTTTAGAGAT
+GCTCAGGTCAAGCAGCTCAATAACCAGCCCTGGCAAACCATAAAAAATACACTCACGCAT
+AACGGGCATCACTATACCAACACGCAGCTCCCTGCCGCAGAGATGAAAATCGGCGCAAAA
+GATATCTTTCCCAGTGCTTATGAGGGAAAGGGCGTATGCAGTTGGGATACCAAGAATATT
+CATCACGCCAATAATTTGTGGATGTCCACGGTGAGTGTGCATGAGGACGGTAAAGATAAA
+ACGCTTTTTTGCGGGATACGTCATGGTGTGCTTTCCCCCTATCATGAAAAAGATCCGCTT
+CTGCGTCAGGCCGGCGCTGAAAACAAAGCCAAAGAAGTATTAGCTGCGGCACTTTTTAGT
+AAACCTGAGTTGCTTAACAGAGCCTTAGAGGGCGAAGCGGTAAGCCTGAAACTGGTATCC
+GTCGGGTTACTCACCGCGTCGAATATTTTCGGCAAAGAGGGAACTATGGTCGAGGATCAA
+ATGCGCGCATGGCAATCGTTGACCCAGCCGGGAAAAATGATTCATTTAAAAATCCGCAAT
+AAAGATGGCGATCTACAGACGGTAAAAATAAAACCGGACGTCGCCGCATTTAATGTGGGT
+GTTAATGAGCTGGCGCTCAAGCTCGGCTTTGGCCTTAAAGCATCAGATAGCTATAATGCC
+GAAGCGCTACATCAGTTATTAGGCAATGATTTACGCCCTGAAGCCAGACCAGGTGGCTGG
+GTTGGCGAATGGCTGGCGCAATACCCGGATAATTATGAGGTCGTCAATACATTAGCGCGC
+CAGATTAAGGATATCTGGAAAAATAACCAACATCATAAAGATGGCGGCGAACCCTATAAA
+CTCGCACAACGCCTTGCCATGTTAGCCCATGAAATTGACGCGGTGCCCGCCTGGAATTGT
+AAAAGCGGCAAAGATCGTACAGGGATGATGGATTCAGAAATCAAGCGAGAGCTCATTTCT
+TTCCATCAGACCCATATGTTAAGTGCGCCTGGTAGTCTTCCGGATAGCGGTGGACAGAAA
+ATTTTCCAAAAAGTATTACTGAATAGCGGTAACCTGGAGATTCAGAAACAAAATACGGGC
+GGGGCGGGAAACAAAGTAATGAAAAATTTATCGCCAGAGGTGCTCAATCTTTCCTATCAA
+AAACGAGTTGGGGATGAAAATATTTGGCAGTCAGTAAAAGGTATTTCTTCATTAATCACA
+TCT
+>22222_2#22_04059
+ATGCAAATACAGAGCTTCTATCACTCAGCTTCACTAAAAACCCAGGAGGCTTTTAAAAGC
+CTACAAAAAACCTTATACAACGGAATGCAGATTCTCTCAGGCCAGGGCAAAGCGCCGGCT
+AAAGCGCCCGACGCTCGCCCGGAAATTATTGTCCTGCGAGAACCTGGCGCGACATGGGGG
+AATTATCTACAGCATCAGAAGACGTCTAACCACTCGCTGCATAACCTCTATAACTTACAG
+CGCGATCTTCTTACCGTCGCGGCAACCGTTCTGGGTAAACAAGACCCGGTTCTAACGTCA
+ATGGCAAACCAAATGGAGTTAGCCAAAGTTAAAGCGGACCGGCCAGCAACAAAACAAGAA
+GAAGCTGCGGCAAAAGCATTGAAGAAAAATCTTATCGAACTTATTGCAGCACGCACTCAG
+CAGCAAAATGGCTTACCTGCAAAAGAAGCTCATCGCTTTGCGGCAGTAGCGTTTAGAGAT
+GCTCAGGTCAAGCAGCTCAATAACCAGCCCTGGCAAACCATAAAAAATACACTCACGCAT
+AACGGGCATCACTATACCAACACGCAGCTCCCTGCCGCAGAGATGAAAATCGGCGCAAAA
+GATATCTTTCCCAGTGCTTATGAGGGAAAGGGCGTATGCAGTTGGGATACCAAGAATATT
+CATCACGCCAATAATTTGTGGATGTCCACGGTGAGTGTGCATGAGGACGGTAAAGATAAA
+ACGCTTTTTTGCGGGATACGTCATGGTGTGCTTTCCCCCTATCATGAAAAAGATCCGCTT
+CTGCGTCAGGCCGGCGCTGAAAACAAAGCCAAAGAAGTATTAGCTGCGGCACTTTTTAGT
+AAACCTGAGTTGCTTAACAGAGCCTTAGAGGGCGAAGCGGTAAGCCTGAAACTGGTATCC
+GTCGGGTTACTCACCGCGTCGAATATTTTCGGCAAAGAGGGAACTATGGTCGAGGATCAA
+ATGCGCGCATGGCAATCGTTGACCCAGCCGGGAAAAATGATTCATTTAAAAATCCGCAAT
+AAAGATGGCGATCTACAGACGGTAAAAATAAAACCGGACGTCGCCGCATTTAATGTGGGT
+GTTAATGAGCTGGCGCTCAAGCTCGGCTTTGGCCTTAAAGCATCAGATAGCTATAATGCC
+GAAGCGCTACATCAGTTATTAGGCAATGATTTACGCCCTGAAGCCAGACCAGGTGGCTGG
+GTTGGCGAATGGCTGGCGCAATACCCGGATAATTATGAGGTCGTCAATACATTAGCGCGC
+CAGATTAAGGATATCTGGAAAAATAACCAACATCATAAAGATGGCGGCGAACCCTATAAA
+CTCGCACAACGCCTTGCCATGTTAGCCCATGAAATTGACGCGGTGCCCGCCTGGAATTGT
+AAAAGCGGCAAAGATCGTACAGGGATGATGGATTCAGAAATCAAGCGAGAGCTCATTTCT
+TTCCATCAGACCCATATGTTAAGTGCGCCTGGTAGTCTTCCGGATAGCGGTGGACAGAAA
+ATTTTCCAAAAAGTATTACTGAATAGCGGTAACCTGGAGATTCAGAAACAAAATACGGGC
+GGGGCGGGAAACAAAGTAATGAAAAATTTATCGCCAGAGGTGCTCAATCTTTCCTATCAA
+AAACGAGTTGGGGATGATAATATTTGGCAGTCAGTAAAAGGTATTTCTTCATTAATCACA
+TCT
diff --git a/t/data/speH.fa.aln b/t/data/speH.fa.aln
new file mode 100644
index 0000000..f6502a9
--- /dev/null
+++ b/t/data/speH.fa.aln
@@ -0,0 +1,32 @@
+>2_2 
+ATGAACAATAACATCACGAAAAAAATTATTTTATCAACAACATTGTTACT
+ATTAGGTACAGCATTTACACAATTTCCTAATACACCTATCAATTCTTCAT
+CTGAAGCGAAAGCTTATTATATAAATCAAAACGAAACTAACGTTAATGAG
+TTAACTAAATATTACTCGCAAAAATATTTAACCTTCTCTAACAGTACGTT
+ATGGCAAAAAGATAACGGTACGATTCATGCAACGTTGTTACAGTTTTCTT
+GGTATAGTCATATTCAAGTTTATGGACCTGAAAGTTGGGGCAATATCAAC
+CAATTAAGAAATAAAAGCGTTGATATTTTTGGCATAAAAGACCAAGAAAC
+CATTGATTCTTTTGCATTATCTCAAGAAACGTTTACTGGTGGTGTTACTC
+CTGCAGCAACATCTAACGATAAACACTATAAACTGAATGTAACATATAAA
+GATAAAGCAGAAACGTTTACTGGCGGATTTCCAGTTTATGAAGGCAATAA
+GCCTGTTTTAACTTTAAAAGAATTAGATTTTCGTATTCGTCAAACATTAA
+TTAAAAGTAAAAAATTATATAATAATTCTTATAATAAAGGACAAATTAAA
+ATAACAGGTACAGACAATAACTACACAATAGATTTAAGTAAAAGGTTGCC
+ATCAACTGATGCAAATAGATATGTTAAAAAACCTCAAAATGCAAAAATTG
+AAGTTATCCTCGAAAAATCAAAC
+>1_2 
+ATGAACAATAACATCACGAAAAAAATTATTTTATCAACAACATTGTTACT
+ATTAGGTACAGCATTTACACAATTTCCTAATACACCTATCAATTCTTCAT
+CTGAAGCGAAAGCTTATTATATAAATCAAAACGAAACTAACGTTAATGAG
+TTAACTAAATATTACTCGCAAAAATATTTAACCTTCTCTAACAGTACGTT
+ATGGCAAAAAGATAACGGTACGATTCATGCAACGTTGTTACAGTTTTCTT
+GGTATAGTCATATTCAAGTTTATGGACCTGAAAGTTGGGGCAATATCAAC
+CAATTAAGAAATAAAAGCGTTGATATTTTTGGCATAAAAGACCAAGAAAC
+CATTGATTCTTTTGCATTATCTCAAGAAACGTTTACTGGTGGTGTTACTC
+CTGCAGCAACATCTAACGATAAACACTATAAACTGAATGTAACATATAAA
+GATAAAGCAGAAACGTTTACTGGCGGATTTCCAGTTTATGAAGGCAATAA
+GCCTGTTTTAACTTTAAAAGAATTAGATTTTCGTATTCGTCAAACATTAA
+TTAAAAGTAAAAAATTATATAATAATTCTTATAATAAAGGACAAATTAAA
+ATAACAGGTACAGACAATAACTACACAATAGATTTAAGTAAAAGGTTGCC
+ATCAACTGATGCAAATAGATATGTTAAAAAACCTCAAAATGCAAAAATTG
+AAGTTATCCTCGAAAAATCAAAC
diff --git a/t/data/split_groups/paralog_clusters1 b/t/data/split_groups/paralog_clusters1
new file mode 100644
index 0000000..a8dc985
--- /dev/null
+++ b/t/data/split_groups/paralog_clusters1
@@ -0,0 +1,13 @@
+abc_02	def_02
+abc_03	def_03
+abc_04	def_04
+abc_05	def_05	abc_12	def_12
+abc_06	def_06
+abc_07	def_07
+abc_08	def_08
+abc_09	def_09
+abc_10	def_10
+abc_11	def_11
+abc_13	def_13
+abc_14	def_14
+abc_15	def_15
\ No newline at end of file
diff --git a/t/data/split_groups/paralog_clusters2 b/t/data/split_groups/paralog_clusters2
new file mode 100644
index 0000000..256f184
--- /dev/null
+++ b/t/data/split_groups/paralog_clusters2
@@ -0,0 +1,11 @@
+abc_02	def_03
+abc_03	def_04
+abc_05	def_05	abc_12	def_12
+abc_06	def_06
+abc_07	def_07
+abc_08	def_08
+abc_09	def_09
+abc_10	def_10
+abc_13	def_13
+abc_14	def_14
+abc_15	def_15
\ No newline at end of file
diff --git a/t/data/split_groups/paralog_clusters3 b/t/data/split_groups/paralog_clusters3
new file mode 100644
index 0000000..93a44e3
--- /dev/null
+++ b/t/data/split_groups/paralog_clusters3
@@ -0,0 +1,13 @@
+abc_02	def_02
+abc_03	def_03
+abc_04	def_04
+abc_05	def_05	abc_12	def_12 abc_20
+abc_06	def_06
+abc_07	def_07
+abc_08	def_08
+abc_09	def_09
+abc_10	def_10
+abc_11	def_11
+abc_13	def_13
+abc_14	def_14
+abc_15	def_15
\ No newline at end of file
diff --git a/t/data/split_groups/paralog_clusters4 b/t/data/split_groups/paralog_clusters4
new file mode 100644
index 0000000..9693ce7
--- /dev/null
+++ b/t/data/split_groups/paralog_clusters4
@@ -0,0 +1,14 @@
+abc_02	def_02
+abc_03	def_03
+abc_04	def_04
+abc_05	def_05	ghi_05	abc_12	def_12 abc_20 abc_13	def_14	def_25 ghi_23
+abc_06	def_06	ghi_06
+abc_07	def_07	ghi_07
+abc_08	def_08
+abc_09	def_09
+abc_10	def_10
+abc_11	def_11
+abc_14	def_13
+abc_15	def_15
+abc_22	def_22	ghi_22
+abc_23	def_23	ghi_23
\ No newline at end of file
diff --git a/t/data/split_groups/paralog_exp_clusters1 b/t/data/split_groups/paralog_exp_clusters1
new file mode 100644
index 0000000..511036d
--- /dev/null
+++ b/t/data/split_groups/paralog_exp_clusters1
@@ -0,0 +1,14 @@
+abc_02	def_02
+abc_03	def_03
+abc_04	def_04
+abc_05	def_05
+abc_12	def_12
+abc_06	def_06
+abc_07	def_07
+abc_08	def_08
+abc_09	def_09
+abc_10	def_10
+abc_11	def_11
+abc_13	def_13
+abc_14	def_14
+abc_15	def_15
diff --git a/t/data/split_groups/paralog_exp_clusters2 b/t/data/split_groups/paralog_exp_clusters2
new file mode 100644
index 0000000..767d19b
--- /dev/null
+++ b/t/data/split_groups/paralog_exp_clusters2
@@ -0,0 +1,12 @@
+abc_02	def_03
+abc_03	def_04
+abc_05	def_05
+abc_12	def_12
+abc_06	def_06
+abc_07	def_07
+abc_08	def_08
+abc_09	def_09
+abc_10	def_10
+abc_13	def_13
+abc_14	def_14
+abc_15	def_15
diff --git a/t/data/split_groups/paralog_exp_clusters3 b/t/data/split_groups/paralog_exp_clusters3
new file mode 100644
index 0000000..009a6f0
--- /dev/null
+++ b/t/data/split_groups/paralog_exp_clusters3
@@ -0,0 +1,15 @@
+abc_02	def_02
+abc_03	def_03
+abc_04	def_04
+abc_05	def_05
+abc_12	def_12
+abc_20
+abc_06	def_06
+abc_07	def_07
+abc_08	def_08
+abc_09	def_09
+abc_10	def_10
+abc_11	def_11
+abc_13	def_13
+abc_14	def_14
+abc_15	def_15
diff --git a/t/data/split_groups/paralog_exp_clusters4 b/t/data/split_groups/paralog_exp_clusters4
new file mode 100644
index 0000000..ed93be4
--- /dev/null
+++ b/t/data/split_groups/paralog_exp_clusters4
@@ -0,0 +1,17 @@
+abc_02	def_02
+abc_03	def_03
+abc_04	def_04
+abc_05	def_05	ghi_05
+abc_12	def_12
+abc_20	def_25	ghi_23
+abc_13	def_14
+abc_06	def_06	ghi_06
+abc_07	def_07	ghi_07
+abc_08	def_08
+abc_09	def_09
+abc_10	def_10
+abc_11	def_11
+abc_14	def_13
+abc_15	def_15
+abc_22	def_22	ghi_22
+abc_23	def_23	ghi_23
diff --git a/t/data/split_groups/paralogs1.fa b/t/data/split_groups/paralogs1.fa
new file mode 100644
index 0000000..e098cb0
--- /dev/null
+++ b/t/data/split_groups/paralogs1.fa
@@ -0,0 +1,50 @@
+>abc_01
+AAAAAAAAAAAAAAAAAAA
+>abc_02
+AAAAAAAAAAAAAAAAAAA
+>abc_03
+AAAAAAAAAAAAAAAAAAA
+>abc_04
+AAAAAAAAAAAAAAAAAAA
+>abc_05
+AAAAAAAAAAAAAAAAAAA
+>abc_06
+AAAAAAAAAAAAAAAAAAA
+>abc_07
+AAAAAAAAAAAAAAAAAAA
+>abc_08
+AAAAAAAAAAAAAAAAAAA
+>abc_09
+AAAAAAAAAAAAAAAAAAA
+>abc_10
+AAAAAAAAAAAAAAAAAAA
+>abc_11
+AAAAAAAAAAAAAAAAAAA
+>abc_12
+AAAAAAAAAAAAAAAAAAA
+>abc_13
+AAAAAAAAAAAAAAAAAAA
+>abc_14
+AAAAAAAAAAAAAAAAAAA
+>abc_15
+AAAAAAAAAAAAAAAAAAA
+>abc_16
+AAAAAAAAAAAAAAAAAAA
+>abc_17
+AAAAAAAAAAAAAAAAAAA
+>abc_18
+AAAAAAAAAAAAAAAAAAA
+>abc_19
+AAAAAAAAAAAAAAAAAAA
+>abc_20
+AAAAAAAAAAAAAAAAAAA
+>abc_21
+AAAAAAAAAAAAAAAAAAA
+>abc_22
+AAAAAAAAAAAAAAAAAAA
+>abc_23
+AAAAAAAAAAAAAAAAAAA
+>abc_24
+AAAAAAAAAAAAAAAAAAA
+>abc_25
+AAAAAAAAAAAAAAAAAAA
\ No newline at end of file
diff --git a/t/data/split_groups/paralogs2.fa b/t/data/split_groups/paralogs2.fa
new file mode 100644
index 0000000..62c3182
--- /dev/null
+++ b/t/data/split_groups/paralogs2.fa
@@ -0,0 +1,50 @@
+>def_01
+AAAAAAAAAAAAAAAAAAA
+>def_02
+AAAAAAAAAAAAAAAAAAA
+>def_03
+AAAAAAAAAAAAAAAAAAA
+>def_04
+AAAAAAAAAAAAAAAAAAA
+>def_05
+AAAAAAAAAAAAAAAAAAA
+>def_06
+AAAAAAAAAAAAAAAAAAA
+>def_07
+AAAAAAAAAAAAAAAAAAA
+>def_08
+AAAAAAAAAAAAAAAAAAA
+>def_09
+AAAAAAAAAAAAAAAAAAA
+>def_10
+AAAAAAAAAAAAAAAAAAA
+>def_11
+AAAAAAAAAAAAAAAAAAA
+>def_12
+AAAAAAAAAAAAAAAAAAA
+>def_13
+AAAAAAAAAAAAAAAAAAA
+>def_14
+AAAAAAAAAAAAAAAAAAA
+>def_15
+AAAAAAAAAAAAAAAAAAA
+>def_16
+AAAAAAAAAAAAAAAAAAA
+>def_17
+AAAAAAAAAAAAAAAAAAA
+>def_18
+AAAAAAAAAAAAAAAAAAA
+>def_19
+AAAAAAAAAAAAAAAAAAA
+>def_20
+AAAAAAAAAAAAAAAAAAA
+>def_21
+AAAAAAAAAAAAAAAAAAA
+>def_22
+AAAAAAAAAAAAAAAAAAA
+>def_23
+AAAAAAAAAAAAAAAAAAA
+>def_24
+AAAAAAAAAAAAAAAAAAA
+>def_25
+AAAAAAAAAAAAAAAAAAA
\ No newline at end of file
diff --git a/t/data/split_groups/paralogs3.fa b/t/data/split_groups/paralogs3.fa
new file mode 100644
index 0000000..d5dd3c6
--- /dev/null
+++ b/t/data/split_groups/paralogs3.fa
@@ -0,0 +1,50 @@
+>ghi_01
+AAAAAAAAAAAAAAAAAAA
+>ghi_02
+AAAAAAAAAAAAAAAAAAA
+>ghi_03
+AAAAAAAAAAAAAAAAAAA
+>ghi_04
+AAAAAAAAAAAAAAAAAAA
+>ghi_05
+AAAAAAAAAAAAAAAAAAA
+>ghi_06
+AAAAAAAAAAAAAAAAAAA
+>ghi_07
+AAAAAAAAAAAAAAAAAAA
+>ghi_08
+AAAAAAAAAAAAAAAAAAA
+>ghi_09
+AAAAAAAAAAAAAAAAAAA
+>ghi_10
+AAAAAAAAAAAAAAAAAAA
+>ghi_11
+AAAAAAAAAAAAAAAAAAA
+>ghi_12
+AAAAAAAAAAAAAAAAAAA
+>ghi_13
+AAAAAAAAAAAAAAAAAAA
+>ghi_14
+AAAAAAAAAAAAAAAAAAA
+>ghi_15
+AAAAAAAAAAAAAAAAAAA
+>ghi_16
+AAAAAAAAAAAAAAAAAAA
+>ghi_17
+AAAAAAAAAAAAAAAAAAA
+>ghi_18
+AAAAAAAAAAAAAAAAAAA
+>ghi_19
+AAAAAAAAAAAAAAAAAAA
+>ghi_20
+AAAAAAAAAAAAAAAAAAA
+>ghi_21
+AAAAAAAAAAAAAAAAAAA
+>ghi_22
+AAAAAAAAAAAAAAAAAAA
+>ghi_23
+AAAAAAAAAAAAAAAAAAA
+>ghi_24
+AAAAAAAAAAAAAAAAAAA
+>ghi_25
+AAAAAAAAAAAAAAAAAAA
\ No newline at end of file
diff --git a/t/data/split_pan_genome_sequences/argF.fa b/t/data/split_pan_genome_sequences/argF.fa
new file mode 100644
index 0000000..fc60aab
--- /dev/null
+++ b/t/data/split_pan_genome_sequences/argF.fa
@@ -0,0 +1,36 @@
+>1_3
+ATGAAAAATTTACGAAACAGAAGTTTTTTAACTTTATTAGACTTTTCACGACAAGAGGTA
+GAATTCTTATTAACACTCTCCGAGGATTTAAAACGTGCTAAATATATTGGCACTGAAAAG
+CCTATGTTAAAAAATAAAAATATTGCACTGTTATTTGAAAAAGATTCTACAAGAACGCGA
+TGTGCATTTGAAGTTGCAGCGCATGATCAAGGTGCAAATGTAACTTATTTAGGCCCAACT
+GGATCACAAATGGGTAAAAAAGAAACAACTAAAGATACTGCACGTGTGCTTGGTGGAATG
+TATGATGGCATTGAATACCGTGGTTTTTCACAAAGAACAGTAGAAACTTTAGCTGAAAAT
+TCAGGCGTACCAGTGTGGAATGGTTTAACTGATGAAGATCATCCTACTCAAGTTCTTGCT
+GATTTCTTAACAGCAAAAGAAGTCTTAAAAAAAGATTATGCAGATATTAACTTTACATAT
+GTTGGAGATGGTCGTAATAACGTTGCAAATGCATTAATGCAAGGTGCTGCCATTATGGGT
+ATGAACTTCCATTTAGTTTGTCCAAAAGAATTAAATCCAACAGATGAATTATTAAATCGC
+TGTAAAAATATTGCCGCTGAAAATGGTGGCAACATATTAATCACAGATGATATTGACCAA
+GGTGTAAAAGGTTCGGATGTAATTTACACTGATGTTTGGGTATCAATGGGTGAACCTGAT
+GAAGTATGGAAAGAACGACTTGAATTATTGAAACCATATCAAGTAAATAAAGAAATGATG
+GATAAAACTGGTAATCCAAATGTTATTTTTGAGCATTGCTTACCATCTTTCCATAATGCT
+GATACGAAAATTGGTCAACAAATTTTTGAAAAATATGGTATTCGAGAAATGGAAGTTACA
+GATGAAGTATTCGAAAGTAAAGCTTCAGTTGTATTCCAAGAAGCTGAGAACAGAATGCAT
+ACAATCAAAGCAGTCATGGTTGCTACATTGGGTGAATTTTAA
+>3_3
+ATGAAAAATTTACGAAACAGAAGTTTTTTAACTTTATTAGACTTTTCACGACAAGAGGTA
+GAATTCTTATTAACACTCTCCGAGGATTTAAAACGTGCTAAATATATTGGCACTGAAAAG
+CCTATGTTAAAAAATAAAAATATTGCACTGTTATTTGAAAAAGATTCTACAAGAACGCGA
+TGTGCATTTGAAGTTGCAGCGCATGATCAAGGTGCAAATGTAACTTATTTAGGCCCAACT
+GGATCACAAATGGGTAAAAAAGAAACAACTAAAGATACTGCACGTGTGCTTGGTGGAATG
+TATGATGGCATTGAATACCGTGGTTTTTCACAAAGAACAGTAGAAACTTTAGCTGAAAAT
+TCAGGCGTACCAGTGTGGAATGGTTTAACTGATGAAGATCATCCTACTCAAGTTCTTGCT
+GATTTCTTAACAGCAAAAGAAGTCTTAAAAAAAGATTATGCAGATATTAACTTTACATAT
+GTTGGAGATGGTCGTAATAACGTTGCAAATGCATTAATGCAAGGTGCTGCCATTATGGGT
+ATGAACTTCCATTTAGTTTGTCCAAAAGAATTAAATCCAACAGATGAATTATTAAATCGC
+TGTAAAAATATTGCCGCTGAAAATGGTGGCAACATATTAATCACAGATGATATTGACCAA
+GGTGTAAAAGGTTCGGATGTAATTTACACTGATGTTTGGGTATCAATGGGTGAACCTGAT
+GAAGTATGGAAAGAACGACTTGAATTATTGAAACCATATCAAGTAAATAAAGAAATGATG
+GATAAAACTGGTAATCCAAATGTTATTTTTGAGCATTGCTTACCATCTTTCCATAATGCT
+GATACGAAAATTGGTCAACAAATTTTTGAAAAATATGGTATTCGAGAAATGGAAGTTACA
+GATGAAGTATTCGAAAGTAAAGCTTCAGTTGTATTCCAAGAAGCTGAGAACAGAATGCAT
+ACAATCAAAGCAGTCATGGTTGCTACATTGGGTGAATTTTAA
diff --git a/t/data/split_pan_genome_sequences/different.fa b/t/data/split_pan_genome_sequences/different.fa
new file mode 100644
index 0000000..eb960ac
--- /dev/null
+++ b/t/data/split_pan_genome_sequences/different.fa
@@ -0,0 +1,38 @@
+>1_1
+ATGAAAACACGTATAGTCAGCTCAGTAACAACAACACTATTGCTAGGTTCCATATTAATG
+AATCCTGTCGCTAATGCCGCAGATTCTGATATTAATATTAAAACCGGTACTACAGATATT
+GGAAGCAATACTACAGTAAAAACAGGTGATTTAGTCACTTATGATAAAGAAAATGGCATG
+CACAAAAAAGTATTTTATAGTTTTATCGATGATAAAAATCACAATAAAAAACTGCTAGTT
+ATTAGAACGAAAGGTACCATTGCTGGTCAATATAGAGTTTATAGCGAAGAAGGTGCTAAC
+AAAAGTGGTTTAGCCTGGCCTTCAGCCTTTAAGGTACAGTTGCAACTACCTGATAATGAA
+GTAGCTCAAATATCTGATTACTATCCAAGAAATTCGATTGATACAAAAGAGTATATGAGT
+ACTTTAACTTATGGATTCAACGGTAATGTTACTGGTGATGATACAGGAAAAATTGGCGGC
+CTTATTGGTGCAAATGTTTCGATTGGTCATACACTGAAATATGTTCAACCTGATTTCAAA
+ACAATTTTAGAGAGCCCAACTGATAAAAAAGTAGGCTGGAAAGTGATATTTAACAATATG
+GTGAATCAAAATTGGGGACCATATGATAGAGATTCTTGGAACCCGGTATATGGCAATCAA
+CTTTTCATGAAAACTAGAAATGGTTCTATGAAAGCAGCAGAGAACTTCCTTGATCCTAAC
+AAAGCAAGTTCTCTATTATCTTCAGGGTTTTCACCAGACTTCGCTACAGTTATTACTATG
+GATAGAAAAGCATCCAAACAACAAACAAATATAGATGTAATATACGAACGAGTTCGTGAT
+GACTACCAATTGCATTGGACTTCAACAAATTGGAAAGGTACCAATACTAAAGATAAATGG
+ACAGATCGTTCTTCAGAAAGATATAAAATCGATTGGGAAAAAGAAGAAATGACAAATTAA
+>1_6
+ATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAACAACATGATAGT
+GTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTTTACTAGGTAAA
+TTATTTGGATTCTAA
+>2_7
+ATGACTGGACTAGCAGAAGCAATCGCAAATACTGTGCAAGCTGCACAACAACATGATAGT
+GTGAAATTAGGCACAAGTATCGTAGACATCGTTGCTAACGGTGTGGGTTTACTAGGTAAA
+TTATTTGGATTCTAA
+>3_5
+TTGGGATATAAAAATATTTTGATAGACTTTGATGATACAATTGTTGATTTTTATGATGCA
+GAAGAATGGGCGTTTCACTATATGGCGAATGTTTTTAATCATAAAGCAACAAAGGATGAT
+TTTTTAACATTTAAAAAAATCAATCACCAACATTGGGAAGCTTTTCAACAAAATAAATTA
+ACGAAGTCTGAAGTATTATCAGAACGATTTGTGAATTACTTCAAACATCATCAAATGGAA
+GTTGATGGGCATCGTGCAGATGTGTTATTTAGAAATGGATTAGCAGAAGCTAAAGTTAAA
+TACTTTGATCAAACATTAGAAACAATTGTCGAATTATCGAAAAGACATGATTTATATATT
+GTTACTAATGGTGTAACCGAAACGCAAAAGAGAAGGTTAAATCAGACGCCGTTGCATAAA
+TATATTAAAAAGATATTTATATCTGAGGAAACAGGATATCAAAAACCTAATCCGGAATTT
+TTTAATTATGTTTTTAATGATATTGGTGAGGATGAAAGACAGCACTCGATTATAGTTGGA
+GATTCTTTAACATCTGACATTCTAGGTGGAATCAATGCGGGTATAGCTACTTGCTGGTTT
+AATTTTAGAGGATTTGATCATAATCCAGGAATTATACCTGATTATGAAATTAATTCATGG
+AAACAACTAAATGATATTGTACGTTAA
diff --git a/t/data/split_pan_genome_sequences/hly.fa b/t/data/split_pan_genome_sequences/hly.fa
new file mode 100644
index 0000000..42a7fdf
--- /dev/null
+++ b/t/data/split_pan_genome_sequences/hly.fa
@@ -0,0 +1,34 @@
+>2_1
+ATGAAAACACGTATAGTCAGCTCAGTAACAACAACACTATTGCTAGGTTCCATATTAATG
+AATCCTGTCGCTAATGCCGCAGATTCTGATATTAATATTAAAACCGGTACTACAGATATT
+GGAAGCAATACTACAGTAAAAACAGGTGATTTAGTCACTTATGATAAAGAAAATGGCATG
+CACAAAAAAGTATTTTATAGTTTTATCGATGATAAAAATCACAATAAAAAACTGCTAGTT
+ATTAGAACGAAAGGTACCATTGCTGGTCAATATAGAGTTTATAGCGAAGAAGGTGCTAAC
+AAAAGTGGTTTAGCCTGGCCTTCAGCCTTTAAGGTACAGTTGCAACTACCTGATAATGAA
+GTAGCTCAAATATCTGATTACTATCCAAGAAATTCGATTGATACAAAAGAGTATATGAGT
+ACTTTAACTTATGGATTCAACGGTAATGTTACTGGTGATGATACAGGAAAAATTGGCGGC
+CTTATTGGTGCAAATGTTTCGATTGGTCATACACTGAAATATGTTCAACCTGATTTCAAA
+ACAATTTTAGAGAGCCCAACTGATAAAAAAGTAGGCTGGAAAGTGATATTTAACAATATG
+GTGAATCAAAATTGGGGACCATATGATAGAGATTCTTGGAACCCGGTATATGGCAATCAA
+CTTTTCATGAAAACTAGAAATGGTTCTATGAAAGCAGCAGAGAACTTCCTTGATCCTAAC
+AAAGCAAGTTCTCTATTATCTTCAGGGTTTTCACCAGACTTCGCTACAGTTATTACTATG
+GATAGAAAAGCATCCAAACAACAAACAAATATAGATGTAATATACGAACGAGTTCGTGAT
+GACTACCAATTGCATTGGACTTCAACAAATTGGAAAGGTACCAATACTAAAGATAAATGG
+ACAGATCGTTCTTCAGAAAGATATAAAATCGATTGGGAAAAAGAAGAAATGACAAATTAA
+>3_1
+ATGAAAACACGTATAGTCAGCTCAGTAACAACAACACTATTGCTAGGTTCCATATTAATG
+AATCCTGTCGCTAATGCCGCAGATTCTGATATTAATATTAAAACCGGTACTACAGATATT
+GGAAGCAATACTACAGTAAAAACAGGTGATTTAGTCACTTATGATAAAGAAAATGGCATG
+CACAAAAAAGTATTTTATAGTTTTATCGATGATAAAAATCACAATAAAAAACTGCTAGTT
+ATTAGAACGAAAGGTACCATTGCTGGTCAATATAGAGTTTATAGCGAAGAAGGTGCTAAC
+AAAAGTGGTTTAGCCTGGCCTTCAGCCTTTAAGGTACAGTTGCAACTACCTGATAATGAA
+GTAGCTCAAATATCTGATTACTATCCAAGAAATTCGATTGATACAAAAGAGTATATGAGT
+ACTTTAACTTATGGATTCAACGGTAATGTTACTGGTGATGATACAGGAAAAATTGGCGGC
+CTTATTGGTGCAAATGTTTCGATTGGTCATACACTGAAATATGTTCAACCTGATTTCAAA
+ACAATTTTAGAGAGCCCAACTGATAAAAAAGTAGGCTGGAAAGTGATATTTAACAATATG
+GTGAATCAAAATTGGGGACCATATGATAGAGATTCTTGGAACCCGGTATATGGCAATCAA
+CTTTTCATGAAAACTAGAAATGGTTCTATGAAAGCAGCAGAGAACTTCCTTGATCCTAAC
+AAAGCAAGTTCTCTATTATCTTCAGGGTTTTCACCAGACTTCGCTACAGTTATTACTATG
+GATAGAAAAGCATCCAAACAACAAACAAATATAGATGTAATATACGAACGAGTTCGTGAT
+GACTACCAATTGCATTGGACTTCAACAAATTGGAAAGGTACCAATACTAAAGATAAATGG
+ACAGATCGTTCTTCAGAAAGATATAAAATCGATTGGGAAAAAGAAGAAATGACAAATTAA
diff --git a/t/data/split_pan_genome_sequences/reannotated_groups_file b/t/data/split_pan_genome_sequences/reannotated_groups_file
new file mode 100644
index 0000000..9cbf9e6
--- /dev/null
+++ b/t/data/split_pan_genome_sequences/reannotated_groups_file
@@ -0,0 +1,4 @@
+different: 1_1	2_4	3_4	3_5	1_6	2_7
+speH: 1_2	2_2
+hly: 2_1	3_1
+argF: 1_3	3_3
diff --git a/t/data/split_pan_genome_sequences/speH.fa b/t/data/split_pan_genome_sequences/speH.fa
new file mode 100644
index 0000000..2e8dc9d
--- /dev/null
+++ b/t/data/split_pan_genome_sequences/speH.fa
@@ -0,0 +1,28 @@
+>1_2
+ATGAACAATAACATCACGAAAAAAATTATTTTATCAACAACATTGTTACTATTAGGTACA
+GCATTTACACAATTTCCTAATACACCTATCAATTCTTCATCTGAAGCGAAAGCTTATTAT
+ATAAATCAAAACGAAACTAACGTTAATGAGTTAACTAAATATTACTCGCAAAAATATTTA
+ACCTTCTCTAACAGTACGTTATGGCAAAAAGATAACGGTACGATTCATGCAACGTTGTTA
+CAGTTTTCTTGGTATAGTCATATTCAAGTTTATGGACCTGAAAGTTGGGGCAATATCAAC
+CAATTAAGAAATAAAAGCGTTGATATTTTTGGCATAAAAGACCAAGAAACCATTGATTCT
+TTTGCATTATCTCAAGAAACGTTTACTGGTGGTGTTACTCCTGCAGCAACATCTAACGAT
+AAACACTATAAACTGAATGTAACATATAAAGATAAAGCAGAAACGTTTACTGGCGGATTT
+CCAGTTTATGAAGGCAATAAGCCTGTTTTAACTTTAAAAGAATTAGATTTTCGTATTCGT
+CAAACATTAATTAAAAGTAAAAAATTATATAATAATTCTTATAATAAAGGACAAATTAAA
+ATAACAGGTACAGACAATAACTACACAATAGATTTAAGTAAAAGGTTGCCATCAACTGAT
+GCAAATAGATATGTTAAAAAACCTCAAAATGCAAAAATTGAAGTTATCCTCGAAAAATCA
+AACTAA
+>2_2
+ATGAACAATAACATCACGAAAAAAATTATTTTATCAACAACATTGTTACTATTAGGTACA
+GCATTTACACAATTTCCTAATACACCTATCAATTCTTCATCTGAAGCGAAAGCTTATTAT
+ATAAATCAAAACGAAACTAACGTTAATGAGTTAACTAAATATTACTCGCAAAAATATTTA
+ACCTTCTCTAACAGTACGTTATGGCAAAAAGATAACGGTACGATTCATGCAACGTTGTTA
+CAGTTTTCTTGGTATAGTCATATTCAAGTTTATGGACCTGAAAGTTGGGGCAATATCAAC
+CAATTAAGAAATAAAAGCGTTGATATTTTTGGCATAAAAGACCAAGAAACCATTGATTCT
+TTTGCATTATCTCAAGAAACGTTTACTGGTGGTGTTACTCCTGCAGCAACATCTAACGAT
+AAACACTATAAACTGAATGTAACATATAAAGATAAAGCAGAAACGTTTACTGGCGGATTT
+CCAGTTTATGAAGGCAATAAGCCTGTTTTAACTTTAAAAGAATTAGATTTTCGTATTCGT
+CAAACATTAATTAAAAGTAAAAAATTATATAATAATTCTTATAATAAAGGACAAATTAAA
+ATAACAGGTACAGACAATAACTACACAATAGATTTAAGTAAAAGGTTGCCATCAACTGAT
+GCAAATAGATATGTTAAAAAACCTCAAAATGCAAAAATTGAAGTTATCCTCGAAAAATCA
+AACTAA
diff --git a/t/data/uneven_sequences.fa b/t/data/uneven_sequences.fa
new file mode 100644
index 0000000..ea59566
--- /dev/null
+++ b/t/data/uneven_sequences.fa
@@ -0,0 +1,18 @@
+>1
+a
+>2
+aa
+>3
+aaa
+>4
+aaaa
+>5
+aaaaa
+>6
+aaaaaa
+>7
+aaaaaaa
+>8
+aaaaaaaa
+>9
+aaaaaaaaa
\ No newline at end of file
diff --git a/t/data/uneven_sequences.fa.sorted.fa b/t/data/uneven_sequences.fa.sorted.fa
new file mode 100644
index 0000000..c9f0e0c
--- /dev/null
+++ b/t/data/uneven_sequences.fa.sorted.fa
@@ -0,0 +1,18 @@
+>1
+aNN
+>2
+aaN
+>3
+aaa
+>4
+aaaaNN
+>5
+aaaaaN
+>6
+aaaaaa
+>7
+aaaaaaaNN
+>8
+aaaaaaaaN
+>9
+aaaaaaaaa
diff --git a/t/data/variable_core/gene_1.fa.aln b/t/data/variable_core/gene_1.fa.aln
new file mode 100644
index 0000000..6f25d27
--- /dev/null
+++ b/t/data/variable_core/gene_1.fa.aln
@@ -0,0 +1,10 @@
+>111_00001
+AAAA
+>222_00001
+AAAA
+>333_00001
+AAAA
+>444_00001
+AAAA
+>555_00001
+AAAA
\ No newline at end of file
diff --git a/t/data/variable_core/gene_2.fa.aln b/t/data/variable_core/gene_2.fa.aln
new file mode 100644
index 0000000..0bf355e
--- /dev/null
+++ b/t/data/variable_core/gene_2.fa.aln
@@ -0,0 +1,8 @@
+>333_00002
+CCCCCCC
+>444_00002
+CCCCCCC
+>555_00002
+CCCCCCC
+>222_00002
+CCCCCCC
\ No newline at end of file
diff --git a/t/data/variable_core/gene_3.fa.aln b/t/data/variable_core/gene_3.fa.aln
new file mode 100644
index 0000000..2f7c1f3
--- /dev/null
+++ b/t/data/variable_core/gene_3.fa.aln
@@ -0,0 +1,8 @@
+>111_00003
+GG
+>333_00003
+GG
+>222_00003
+GG
+>444_00003
+GG
\ No newline at end of file
diff --git a/t/data/variable_core/gene_4.fa.aln b/t/data/variable_core/gene_4.fa.aln
new file mode 100644
index 0000000..5a62879
--- /dev/null
+++ b/t/data/variable_core/gene_4.fa.aln
@@ -0,0 +1,4 @@
+>222_00004
+TTTTT
+>444_00004
+TTTTT
\ No newline at end of file
diff --git a/t/data/variable_core/gene_5.fa.aln b/t/data/variable_core/gene_5.fa.aln
new file mode 100644
index 0000000..c2e36a0
--- /dev/null
+++ b/t/data/variable_core/gene_5.fa.aln
@@ -0,0 +1,10 @@
+>555_00005
+XXXXXXX
+>111_00005
+XXXXXXX
+>222_00005
+XXXXXXX
+>333_00005
+XXXXXXX
+>444_00005
+XXXXXXX
\ No newline at end of file
diff --git a/t/dummy_blastp b/t/dummy_blastp
new file mode 100755
index 0000000..1e374d4
--- /dev/null
+++ b/t/dummy_blastp
@@ -0,0 +1,3 @@
+#!/usr/bin/env perl
+system("touch results.out");
+1;
\ No newline at end of file
diff --git a/t/dummy_cd-hit b/t/dummy_cd-hit
new file mode 100755
index 0000000..c3e21e3
--- /dev/null
+++ b/t/dummy_cd-hit
@@ -0,0 +1,5 @@
+#!/usr/bin/env perl
+system("touch output");
+system("touch output.clstr");
+system("touch output.bak.clstr");
+1;
\ No newline at end of file
diff --git a/t/dummy_makeblastdb b/t/dummy_makeblastdb
new file mode 100755
index 0000000..0ccbc2a
--- /dev/null
+++ b/t/dummy_makeblastdb
@@ -0,0 +1,5 @@
+#!/usr/bin/env perl
+system("touch output_contigs.phr");
+system("touch output_contigs.pin");
+system("touch output_contigs.psq");
+1;
\ No newline at end of file
diff --git a/t/lib/TestHelper.pm b/t/lib/TestHelper.pm
new file mode 100644
index 0000000..aae17f1
--- /dev/null
+++ b/t/lib/TestHelper.pm
@@ -0,0 +1,323 @@
+package TestHelper;
+use Moose::Role;
+use Test::Most;
+use Data::Dumper;
+use File::Slurper qw(read_lines read_text);
+use Test::Files;
+use Test::Output;
+
+$ENV{PATH} .= ":./bin";
+
+sub compare_files {
+    my ( $actual_file, $expected_file, $comment ) = @_;
+    my @actual_lines   = sort( read_lines($actual_file) );
+    my @expected_lines = sort( read_lines($expected_file) );
+    is_deeply( \@actual_lines, \@expected_lines, $comment );
+}
+
+sub compare_groups_files {
+    my ( $actual_file, $expected_file, $comment ) = @_;
+    my @actual_lines   = sort( read_lines($actual_file) );
+    my @expected_lines = sort( read_lines($expected_file) );
+
+    my @actual_sorted_lines;
+    for my $line (@actual_lines) {
+        my @line_details = split( / /, $line );
+        shift @line_details;
+        my @sorted = sort(@line_details);
+        push( @actual_sorted_lines, \@sorted );
+    }
+
+    my @expected_sorted_lines;
+    for my $line (@expected_lines) {
+        my @line_details = split( / /, $line );
+        shift @line_details;
+        my @sorted = sort(@line_details);
+        push( @expected_sorted_lines, \@sorted );
+    }
+    is_deeply( \@actual_sorted_lines, \@expected_sorted_lines, $comment );
+}
+
+
+sub stdout_should_have
+{
+	my ( $script_name, $parameters, $expected ) = @_;
+	my @input_args = split( " ", $parameters );
+	open OLDERR, '>&STDERR';
+	eval("use $script_name ;");
+    my $returned_values = 0;
+    {
+        local *STDERR;
+        open STDERR, '>/dev/null' or warn "Can't open /dev/null: $!";
+	    stdout_like { eval("$script_name->new(args => \\\@input_args, script_name => '$script_name')->run;"); } qr/$expected/, "got expected text $expected for $parameters";
+        close STDERR;
+    }
+    open STDERR, '>&OLDERR' or die "Can't restore stderr: $!";
+    close OLDERR or die "Can't close OLDERR: $!";
+}
+
+
+sub stdout_should_not_have
+{
+	my ( $script_name, $parameters, $expected ) = @_;
+	my @input_args = split( " ", $parameters );
+	open OLDERR, '>&STDERR';
+	eval("use $script_name ;");
+    my $returned_values = 0;
+    {
+        local *STDERR;
+        open STDERR, '>/dev/null' or warn "Can't open /dev/null: $!";
+	    stdout_unlike { eval("$script_name->new(args => \\\@input_args, script_name => '$script_name')->run;"); } qr/$expected/, "got expected text $expected for $parameters";
+        close STDERR;
+    }
+    open STDERR, '>&OLDERR' or die "Can't restore stderr: $!";
+    close OLDERR or die "Can't close OLDERR: $!";
+}
+
+
+
+sub stderr_should_not_have
+{
+	my ( $script_name, $parameters, $expected ) = @_;
+	my @input_args = split( " ", $parameters );
+	open OLDOUT, '>&STDOUT';
+	eval("use $script_name ;");
+    my $returned_values = 0;
+    {
+        local *STDOUT;
+        open STDOUT, '>/dev/null' or warn "Can't open /dev/null: $!";
+	    stderr_unlike { eval("$script_name->new(args => \\\@input_args, script_name => '$script_name')->run;"); } qr/$expected/, "got expected text $expected for $parameters";
+        close STDOUT;
+    }
+    open STDOUT, '>&OLDOUT' or die "Can't restore stdout: $!";
+    close OLDOUT or die "Can't close OLDOUT: $!";
+}
+
+sub stderr_should_have
+{
+	my ( $script_name, $parameters, $expected ) = @_;
+	my @input_args = split( " ", $parameters );
+	open OLDOUT, '>&STDOUT';
+	eval("use $script_name ;");
+    my $returned_values = 0;
+    {
+        local *STDOUT;
+        open STDOUT, '>/dev/null' or warn "Can't open /dev/null: $!";
+	    stderr_like { eval("$script_name->new(args => \\\@input_args, script_name => '$script_name')->run;"); } qr/$expected/, "got expected text $expected for $parameters";
+        close STDOUT;
+    }
+    open STDOUT, '>&OLDOUT' or die "Can't restore stdout: $!";
+    close OLDOUT or die "Can't close OLDOUT: $!";
+}
+
+
+sub mock_execute_script_and_check_output {
+    my ( $script_name, $scripts_and_expected_files, $columns_to_exclude ) = @_;
+
+    system('touch empty_file');
+
+    open OLDOUT, '>&STDOUT';
+    open OLDERR, '>&STDERR';
+    eval("use $script_name ;");
+    my $returned_values = 0;
+    {
+        local *STDOUT;
+        open STDOUT, '>/dev/null' or warn "Can't open /dev/null: $!";
+        local *STDERR;
+        open STDERR, '>/dev/null' or warn "Can't open /dev/null: $!";
+
+        for my $script_parameters ( sort keys %$scripts_and_expected_files ) {
+            my $full_script = $script_parameters;
+            my @input_args = split( " ", $full_script );
+
+            my $cmd = "$script_name->new(args => \\\@input_args, script_name => '$script_name')->run;";
+            eval($cmd);
+            warn $@ if $@;
+
+            my $actual_output_file_name   = $scripts_and_expected_files->{$script_parameters}->[0];
+            my $expected_output_file_name = $scripts_and_expected_files->{$script_parameters}->[1];
+            ok( -e $actual_output_file_name, "Actual output file exists $actual_output_file_name  $script_parameters" );
+            if ( defined($columns_to_exclude) ) {
+                is(
+                    _exclude_variable_columns_from_spreadsheet( $actual_output_file_name,   $columns_to_exclude ),
+                    _exclude_variable_columns_from_spreadsheet( $expected_output_file_name, $columns_to_exclude ),
+                    'Actual and expected match output excluding variable columns'
+                );
+            }
+            else {
+                compare_ok( $actual_output_file_name, $expected_output_file_name,
+                    "Actual and expected output match for '$script_parameters'" );
+
+            }
+            unlink($actual_output_file_name);
+        }
+        close STDOUT;
+        close STDERR;
+    }
+
+    # Restore stdout.
+    open STDOUT, '>&OLDOUT' or die "Can't restore stdout: $!";
+    open STDERR, '>&OLDERR' or die "Can't restore stderr: $!";
+
+    # Avoid leaks by closing the independent copies.
+    close OLDOUT or die "Can't close OLDOUT: $!";
+    close OLDERR or die "Can't close OLDERR: $!";
+    unlink('empty_file');
+}
+
+sub mock_execute_script_and_check_output_sorted_groups {
+    my ( $script_name, $scripts_and_expected_files, $columns_to_exclude ) = @_;
+
+    system('touch empty_file');
+
+    open OLDOUT, '>&STDOUT';
+    open OLDERR, '>&STDERR';
+    eval("use $script_name ;");
+    my $returned_values = 0;
+    {
+        local *STDOUT;
+        open STDOUT, '>/dev/null' or warn "Can't open /dev/null: $!";
+        local *STDERR;
+        open STDERR, '>/dev/null' or warn "Can't open /dev/null: $!";
+
+        for my $script_parameters ( sort keys %$scripts_and_expected_files ) {
+            my $full_script = $script_parameters;
+            my @input_args = split( " ", $full_script );
+
+            my $cmd = "$script_name->new(args => \\\@input_args, script_name => '$script_name')->run;";
+            eval($cmd);
+            warn $@ if $@;
+
+            my $actual_output_file_name = $scripts_and_expected_files->{$script_parameters}->[0];
+
+            my $expected_output_file_name = $scripts_and_expected_files->{$script_parameters}->[1];
+            ok( -e $actual_output_file_name, "Actual output file exists $actual_output_file_name  $script_parameters" );
+            if ( defined($columns_to_exclude) ) {
+                my @actual_content_sorted =
+                  sort( split( /\n/, _exclude_variable_columns_from_spreadsheet( $actual_output_file_name, $columns_to_exclude ) ) );
+                my @expected_content_sorted =
+                  sort( split( /\n/, _exclude_variable_columns_from_spreadsheet( $expected_output_file_name, $columns_to_exclude ) ) );
+                is_deeply( \@actual_content_sorted, \@expected_content_sorted,
+                    'Actual and expected match output excluding variable columns' );
+            }
+            else {
+                compare_groups_files( $actual_output_file_name, $expected_output_file_name,
+                    "Actual and expected sorted output match for '$script_parameters'" );
+            }
+            unlink($actual_output_file_name);
+        }
+        close STDOUT;
+        close STDERR;
+    }
+
+    # Restore stdout.
+    open STDOUT, '>&OLDOUT' or die "Can't restore stdout: $!";
+    open STDERR, '>&OLDERR' or die "Can't restore stderr: $!";
+
+    # Avoid leaks by closing the independent copies.
+    close OLDOUT or die "Can't close OLDOUT: $!";
+    close OLDERR or die "Can't close OLDERR: $!";
+    unlink('empty_file');
+}
+
+sub mock_execute_script_and_check_output_sorted {
+    my ( $script_name, $scripts_and_expected_files, $columns_to_exclude ) = @_;
+
+    system('touch empty_file');
+
+    open OLDOUT, '>&STDOUT';
+    open OLDERR, '>&STDERR';
+    eval("use $script_name ;");
+    my $returned_values = 0;
+    {
+        local *STDOUT;
+        open STDOUT, '>/dev/null' or warn "Can't open /dev/null: $!";
+        local *STDERR;
+        open STDERR, '>/dev/null' or warn "Can't open /dev/null: $!";
+
+        for my $script_parameters ( sort keys %$scripts_and_expected_files ) {
+            my $full_script = $script_parameters;
+            my @input_args = split( " ", $full_script );
+
+            my $cmd = "$script_name->new(args => \\\@input_args, script_name => '$script_name')->run;";
+            eval($cmd);
+            warn $@ if $@;
+
+            my $actual_output_file_name = $scripts_and_expected_files->{$script_parameters}->[0];
+
+            my $expected_output_file_name = $scripts_and_expected_files->{$script_parameters}->[1];
+            ok( -e $actual_output_file_name, "Actual output file exists $actual_output_file_name  $script_parameters" );
+            if ( defined($columns_to_exclude) ) {
+                my @actual_content_sorted =
+                  sort( split( /\n/, _exclude_variable_columns_from_spreadsheet( $actual_output_file_name, $columns_to_exclude ) ) );
+                my @expected_content_sorted =
+                  sort( split( /\n/, _exclude_variable_columns_from_spreadsheet( $expected_output_file_name, $columns_to_exclude ) ) );
+                is_deeply( \@actual_content_sorted, \@expected_content_sorted,
+                    'Actual and expected match output excluding variable columns' );
+            }
+            else {
+                compare_groups_files( $actual_output_file_name, $expected_output_file_name,
+                    "Actual and expected sorted output match for '$script_parameters'" );
+            }
+            unlink($actual_output_file_name);
+        }
+        close STDOUT;
+        close STDERR;
+    }
+
+    # Restore stdout.
+    open STDOUT, '>&OLDOUT' or die "Can't restore stdout: $!";
+    open STDERR, '>&OLDERR' or die "Can't restore stderr: $!";
+
+    # Avoid leaks by closing the independent copies.
+    close OLDOUT or die "Can't close OLDOUT: $!";
+    close OLDERR or die "Can't close OLDERR: $!";
+    unlink('empty_file');
+}
+
+sub compare_tab_files_with_variable_coordinates {
+    my ( $actual_file, $expected_file ) = @_;
+    ok( -e $actual_file, 'File exists' . $actual_file );
+
+    is(
+        _filter_coordinates_from_string($actual_file),
+        _filter_coordinates_from_string($expected_file),
+        'file contents the same for ' . $actual_file
+    );
+}
+
+sub _filter_coordinates_from_string {
+    my ($file_name) = @_;
+    my $file_contents = read_text($file_name);
+    my @lines = split( /\n/, $file_contents );
+    my $modified_file_contents = '';
+    for my $line ( sort @lines ) {
+        next if ( $line =~ /(variation|misc_feature|feature)/ );
+        $line =~ s!group_[\d]+!group_XX!gi;
+        $modified_file_contents .= $line . "\n";
+    }
+    return $modified_file_contents;
+}
+
+sub _exclude_variable_columns_from_spreadsheet {
+    my ( $file_name, $columns_to_exclude ) = @_;
+    my $file_contents          = read_text($file_name);
+    my @lines                  = split( /\n/, $file_contents );
+    my $modified_file_contents = '';
+
+    for ( my $i = 0 ; $i < @lines ; $i++ ) {
+        my @cells = split( /,/, $lines[$i] );
+
+        for my $col_number ( @{$columns_to_exclude} ) {
+            next unless ( defined( $cells[$col_number] ) );
+            $cells[$col_number] = '';
+        }
+        $modified_file_contents .= join( ',', @cells ) . "\n";
+    }
+
+    return $modified_file_contents;
+}
+
+no Moose;
+1;
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/roary.git



More information about the debian-med-commit mailing list