[med-svn] [ugene] 01/03: New upstream version 1.24.2+dfsg
Sascha Steinbiss
satta at debian.org
Sat Sep 17 16:53:36 UTC 2016
This is an automated email from the git hooks/post-receive script.
satta pushed a commit to branch master
in repository ugene.
commit adee81f160c39905559bb05b83f6278f8445f834
Author: Sascha Steinbiss <satta at debian.org>
Date: Sat Sep 17 16:21:25 2016 +0000
New upstream version 1.24.2+dfsg
---
.../{hmm3-search.uwl => hmm3-build-and-search.uwl} | 96 +-
data/workflow_samples/NGS/call_variants_full.uwl | 413 +++
data/workflow_samples/NGS/fastqc.uwl | 2 +-
.../NGS/raw_ngs/chipseq/chipseq_paired.uwl | 97 +-
.../NGS/raw_ngs/chipseq/chipseq_single.uwl | 66 +-
.../NGS/raw_ngs/dnaseq/dna_paired.uwl | 91 +-
.../NGS/raw_ngs/dnaseq/dna_single.uwl | 60 +-
.../NGS/raw_ngs/rnaseq/rnaseq_paired.uwl | 79 +-
.../NGS/raw_ngs/rnaseq/rnaseq_single.uwl | 42 +-
data/workflow_samples/NGS/variation_annotation.uwl | 38 +-
data/workflow_samples/Scenarios/length_filter.uwl | 131 +
data/workflow_samples/users/LengthMarker.usa | 14 +
.../U2Algorithm/src/misc/DnaAssemblyMultiTask.cpp | 10 +-
.../U2Algorithm/src/misc/DnaAssemblyMultiTask.h | 5 +-
.../U2Algorithm/src/registry/DnaAssemblyTask.cpp | 2 +-
.../U2Algorithm/src/registry/DnaAssemblyTask.h | 7 +-
src/corelibs/U2Core/U2Core.pro | 16 +-
src/corelibs/U2Core/src/datatype/Annotation.cpp | 9 +-
src/corelibs/U2Core/src/datatype/Annotation.h | 4 +-
src/corelibs/U2Core/src/datatype/U2Assembly.h | 24 +-
.../src/datatype/U2Qualifier.cpp} | 40 +-
src/corelibs/U2Core/src/datatype/U2Qualifier.h | 15 +-
.../U2Core/src/datatype/U2Variant.cpp} | 46 +-
src/corelibs/U2Core/src/datatype/U2Variant.h | 29 +-
src/corelibs/U2Core/src/dbi/U2DbiUtils.cpp | 89 +-
src/corelibs/U2Core/src/dbi/U2DbiUtils.h | 6 +
.../U2Core/src/globals/ExternalToolRegistry.h | 2 +
src/corelibs/U2Core/src/globals/L10n.h | 3 +
.../U2Core/src/gobjects/VariantTrackObject.cpp | 20 +-
.../U2Core/src/gobjects/VariantTrackObject.h | 1 +
src/corelibs/U2Core/src/models/DocumentModel.cpp | 4 +-
src/corelibs/U2Core/src/models/DocumentModel.h | 2 +
.../src/tasks/AddSequencesToAlignmentTask.cpp | 2 +-
.../U2Core/src/tasks/RemoveDocumentTask.cpp | 6 +-
src/corelibs/U2Core/src/tasks/SaveDocumentTask.cpp | 18 +-
.../U2Core/src/tasks/SequenceDbiWalkerTask.cpp | 5 +-
.../U2Core/src/util/AnnotationCreationPattern.cpp} | 15 +-
.../U2Core/src/util/AnnotationCreationPattern.h} | 22 +-
src/corelibs/U2Core/src/util/GenbankFeatures.cpp | 2 +-
src/corelibs/U2Core/src/util/SnpeffDictionary.cpp | 162 ++
.../U2Core/src/util/SnpeffDictionary.h} | 34 +-
src/corelibs/U2Core/src/util/U1AnnotationUtils.cpp | 18 +
src/corelibs/U2Core/src/util/U1AnnotationUtils.h | 3 +
src/corelibs/U2Core/src/util/U2AlphabetUtils.h | 2 +-
src/corelibs/U2Core/src/util/U2AssemblyUtils.cpp | 91 +-
src/corelibs/U2Core/src/util/U2AssemblyUtils.h | 11 +
src/corelibs/U2Core/src/util/U2AttributeUtils.cpp | 7 +-
src/corelibs/U2Core/src/util/U2AttributeUtils.h | 3 +-
src/corelibs/U2Core/src/util/U2FeatureUtils.cpp | 2 +-
src/corelibs/U2Core/src/util/U2VariationUtils.cpp | 4 +-
src/corelibs/U2Core/transl/english.ts | 7 +-
src/corelibs/U2Core/transl/russian.ts | 7 +-
.../U2Designer/src/BreakpointHitCountDialog.cpp | 2 +-
src/corelibs/U2Designer/src/DelegateEditors.cpp | 4 +
src/corelibs/U2Designer/src/DelegateEditors.h | 2 +
.../U2Designer/src/EditBreakpointLabelsDialog.cpp | 2 +-
.../U2Designer/src/EditMarkerGroupDialog.cpp | 4 +-
.../U2Designer/src/NewBreakpointDialog.cpp | 2 +-
.../U2Designer/src/NewGrouperSlotDialog.cpp | 10 +-
.../U2Designer/src/dashboard/StatisticsWidget.cpp | 5 +-
.../U2Designer/src/support/URLLineEdit.cpp | 11 +-
src/corelibs/U2Formats/U2Formats.pro | 12 +-
.../U2Formats/src/AbstractVariationFormat.cpp | 320 ++-
.../U2Formats/src/AbstractVariationFormat.h | 10 +-
src/corelibs/U2Formats/src/BAMUtils.cpp | 8 +-
src/corelibs/U2Formats/src/BAMUtils.h | 2 +-
src/corelibs/U2Formats/src/SAMFormat.cpp | 2 +-
src/corelibs/U2Formats/src/StockholmFormat.cpp | 4 +-
src/corelibs/U2Formats/src/VCF4VariationFormat.cpp | 2 +-
.../U2Formats/src/ace/ConvertAceToSqliteTask.cpp | 4 +-
src/corelibs/U2Formats/src/mysql_dbi/MysqlDbi.cpp | 2 +
.../U2Formats/src/mysql_dbi/MysqlVariantDbi.cpp | 4 +-
.../src/mysql_dbi/util/MysqlAssemblyUtils.cpp | 8 +-
.../upgraders/MysqlUpgraderFrom_1_16_To_1_24.cpp | 217 ++
.../upgraders/MysqlUpgraderFrom_1_16_To_1_24.h | 54 +
.../U2Formats/src/sqlite_dbi/SQLiteAssemblyDbi.cpp | 8 +-
.../U2Formats/src/sqlite_dbi/SQLiteVariantDbi.cpp | 4 +-
src/corelibs/U2Formats/src/tasks/ConvertFileTask.h | 1 +
.../ConvertSnpeffVariationsToAnnotationsTask.cpp | 233 ++
.../ConvertSnpeffVariationsToAnnotationsTask.h | 89 +
.../U2Formats/src/util/SnpeffInfoParser.cpp | 277 ++
src/corelibs/U2Formats/src/util/SnpeffInfoParser.h | 147 +
src/corelibs/U2Formats/transl/russian.ts | 2 +-
src/corelibs/U2Gui/src/ToolsMenu.cpp | 2 +
src/corelibs/U2Gui/src/ToolsMenu.h | 1 +
.../U2Gui/src/util/AddNewDocumentDialogImpl.cpp | 2 +-
.../U2Gui/src/util/AuthenticationDialog.cpp | 2 +-
.../src/util/BreakpointConditionEditDialog.cpp | 2 +-
.../U2Gui/src/util/CreateAnnotationDialog.cpp | 2 +-
.../src/util/CreateAnnotationWidgetController.cpp | 11 +-
.../src/util/CreateAnnotationWidgetController.h | 2 +
.../CreateDocumentFromTextDialogController.cpp | 2 +-
.../U2Gui/src/util/DownloadRemoteFileDialog.cpp | 4 +-
.../U2Gui/src/util/EditQualifierDialog.cpp | 2 +-
.../src/util/EditSequenceDialogController.cpp | 2 +-
.../U2Gui/src/util/ExportAnnotationsDialog.cpp | 2 +-
.../src/util/ExportDocumentDialogController.cpp | 10 +-
src/corelibs/U2Gui/src/util/ExportImageDialog.cpp | 17 +-
src/corelibs/U2Gui/src/util/ExportImageDialog.ui | 2 +-
.../src/util/ImportDialogs/AceImportDialog.cpp | 2 +-
src/corelibs/U2Gui/src/util/RangeSelector.cpp | 2 +-
.../RemovePartFromSequenceDialogController.cpp | 2 +-
.../U2Gui/src/util/SaveDocumentController.cpp | 19 +-
src/corelibs/U2Gui/src/util/ScriptEditorDialog.cpp | 2 +-
.../util/SearchGenbankSequenceDialogController.cpp | 2 +-
.../util/shared_db/CommonImportOptionsDialog.cpp | 2 +-
.../src/util/shared_db/EditConnectionDialog.cpp | 2 +-
.../src/util/shared_db/ImportToDatabaseDialog.cpp | 2 +-
.../src/util/shared_db/ItemToImportEditDialog.cpp | 2 +-
.../src/util/shared_db/SharedConnectionsDialog.cpp | 4 +-
src/corelibs/U2Gui/transl/english.ts | 16 +-
src/corelibs/U2Gui/transl/russian.ts | 18 +-
.../U2Lang/src/library/BaseOneOneWorker.cpp | 2 +
.../U2Lang/src/model/ConfigurationEditor.cpp | 2 +
.../U2Lang/src/model/ConfigurationEditor.h | 3 +
.../U2Private/src/DocumentFormatRegistryImpl.cpp | 2 +-
src/corelibs/U2Private/transl/english.ts | 35 +-
src/corelibs/U2Private/transl/russian.ts | 35 +-
.../U2View/src/ov_assembly/AssemblyBrowser.cpp | 6 +-
.../U2View/src/ov_assembly/AssemblyInfoWidget.cpp | 2 +-
.../U2View/src/ov_assembly/AssemblyModel.cpp | 24 +-
.../src/ov_assembly/AssemblyNavigationWidget.cpp | 2 +-
.../U2View/src/ov_assembly/AssemblyReadsArea.cpp | 4 +
.../src/ov_assembly/AssemblySettingsWidget.cpp | 2 +-
.../ov_assembly/CalculateCoveragePerBaseTask.cpp | 79 +-
.../src/ov_assembly/CalculateCoveragePerBaseTask.h | 13 +-
.../U2View/src/ov_assembly/CoverageInfo.cpp | 48 +-
src/corelibs/U2View/src/ov_assembly/CoverageInfo.h | 5 +-
.../src/ov_assembly/ExportConsensusDialog.cpp | 2 +-
.../ExportConsensusVariationsDialog.cpp | 2 +-
.../src/ov_assembly/ExportCoverageDialog.cpp | 2 +-
.../U2View/src/ov_assembly/ExportCoverageTask.h | 1 +
.../U2View/src/ov_assembly/ExportReadsDialog.cpp | 2 +-
.../ov_assembly/ExtractAssemblyRegionDialog.cpp | 2 +-
.../src/ov_msa/ColorSchemaDialogController.cpp | 4 +-
.../src/ov_msa/ColorSchemaSettingsController.cpp | 2 +-
.../ov_msa/CreateSubalignmentDialogController.cpp | 2 +-
.../U2View/src/ov_msa/DeleteGapsDialog.cpp | 2 +-
.../MSAExportConsensusTabFactory.cpp | 2 +-
.../ov_msa/ExportHighlightedDialogController.cpp | 2 +-
.../src/ov_msa/General/MSAGeneralTabFactory.cpp | 2 +-
.../Highlighting/MSAHighlightingTabFactory.cpp | 2 +-
.../U2View/src/ov_msa/MSAEditorSequenceArea.cpp | 4 +-
.../src/ov_msa/MSASelectSubalignmentDialog.cpp | 2 +-
.../src/ov_msa/Overview/MSAGraphOverview.cpp | 2 +
.../U2View/src/ov_msa/Overview/MSAGraphOverview.h | 4 +-
.../src/ov_msa/Overview/MSAOverviewContextMenu.cpp | 1 +
.../ov_msa/Overview/MSAOverviewImageExportTask.cpp | 8 +-
.../src/ov_msa/PairAlign/PairAlignFactory.cpp | 2 +-
...SaveSelectedSequenceFromMSADialogController.cpp | 2 +-
.../SeqStatistics/SeqStatisticsWidgetFactory.cpp | 2 +-
.../TreeOptions/TreeOptionsWidgetFactory.cpp | 6 +-
.../ov_sequence/CreateRulerDialogController.cpp | 2 +-
.../src/ov_sequence/GraphLabelsSelectDialog.cpp | 2 +-
.../U2View/src/ov_sequence/GraphSettingsDialog.cpp | 2 +-
.../U2View/src/ov_sequence/PanViewRows.cpp | 27 +-
.../SaveGraphCutoffsDialogController.cpp | 2 +-
.../src/ov_sequence/SearchQualifierDialog.cpp | 3 +-
.../AnnotHighlightWidgetFactory.cpp | 2 +-
.../ov_sequence/find_pattern/FindPatternWidget.cpp | 147 +-
.../find_pattern/FindPatternWidgetFactory.cpp | 2 +-
.../sequence_info/SequenceInfoFactory.cpp | 2 +-
.../U2View/src/phyltree/BranchSettingsDialog.cpp | 4 +-
.../src/phyltree/CreatePhyTreeDialogController.cpp | 6 +-
.../U2View/src/phyltree/TextSettingsDialog.cpp | 2 +-
.../U2View/src/phyltree/TreeSettingsDialog.cpp | 2 +-
.../src/util_dna_assembly/BuildIndexDialog.cpp | 12 +-
.../ConvertAssemblyToSamDialog.cpp | 2 +-
.../src/util_dna_assembly/DnaAssemblyDialog.cpp | 12 +-
.../src/util_dna_assembly/DnaAssemblyUtils.cpp | 9 +-
.../src/util_dna_assembly/DnaAssemblyUtils.h | 5 +-
.../src/util_dna_assembly/GenomeAssemblyDialog.cpp | 2 +-
.../util_sec_struct_predict/SecStructDialog.cpp | 8 +-
.../util_smith_waterman/SmithWatermanDialog.cpp | 2 +-
src/corelibs/U2View/transl/english.ts | 64 +-
src/corelibs/U2View/transl/russian.ts | 92 +-
src/include/U2Core/AnnotationCreationPattern.h | 1 +
src/include/U2Core/SnpeffDictionary.h | 1 +
.../ConvertSnpeffVariationsToAnnotationsTask.h | 1 +
src/include/U2Formats/SnpeffInfoParser.h | 1 +
.../GUITestBase/src/GTUtilsWorkflowDesigner.cpp | 10 +-
.../GUITestBase/src/GTUtilsWorkflowDesigner.h | 3 +-
src/plugins/GUITestBase/src/GUITestBasePlugin.cpp | 14 +-
.../GTTestsRegressionScenarios_2001_3000.cpp | 21 +
.../GTTestsRegressionScenarios_2001_3000.h | 1 +
.../GTTestsRegressionScenarios_4001_5000.cpp | 66 +-
.../GTTestsRegressionScenarios_4001_5000.h | 2 +
.../GTTestsRegressionScenarios_5001_6000.cpp | 272 +-
.../GTTestsRegressionScenarios_5001_6000.h | 11 +-
.../annotator/src/CollocationsDialogController.cpp | 2 +-
.../annotator/src/CustomAutoAnnotationDialog.cpp | 2 +-
.../datatype/annotations/AnnotationUnitTests.cpp | 2 +-
.../src/core/dbi/assembly/AssemblyDbiUnitTests.cpp | 4 +-
.../biostruct3d_view/src/SelectModelsDialog.cpp | 2 +-
.../biostruct3d_view/src/SettingsDialog.cpp | 2 +-
.../src/StructuralAlignmentDialog.cpp | 2 +-
.../src/deprecated/SelectModelsDialog.cpp | 2 +-
.../src/deprecated/SettingsDialog.cpp | 2 +-
.../src/deprecated/StructuralAlignmentDialog.cpp | 2 +-
src/plugins/biostruct3d_view/transl/english.ts | 80 +-
src/plugins/biostruct3d_view/transl/russian.ts | 80 +-
.../src/CircularViewSettingsWidgetFactory.cpp | 2 +-
src/plugins/dbi_bam/src/BAMFormat.cpp | 2 +-
src/plugins/dbi_bam/src/ConvertToSQLiteDialog.cpp | 2 +-
src/plugins/dbi_bam/src/ConvertToSQLiteDialog.ui | 65 +-
src/plugins/dbi_bam/src/ConvertToSQLiteTask.cpp | 7 +-
.../src/CSVColumnConfigurationDialog.cpp | 2 +-
.../dna_export/src/DNASequenceGeneratorDialog.cpp | 4 +-
.../dna_export/src/ExportBlastResultDialog.cpp | 2 +-
.../dna_export/src/ExportChromatogramDialog.cpp | 2 +-
src/plugins/dna_export/src/ExportMSA2MSADialog.cpp | 2 +-
.../dna_export/src/ExportMSA2SequencesDialog.cpp | 2 +-
.../dna_export/src/ExportSequences2MSADialog.cpp | 2 +-
.../dna_export/src/ExportSequencesDialog.cpp | 2 +-
.../dna_export/src/GetSequenceByIdDialog.cpp | 2 +-
.../src/ImportAnnotationsFromCSVDialog.cpp | 2 +-
src/plugins/dna_export/transl/russian.ts | 2 +-
src/plugins/dna_flexibility/src/DNAFlexDialog.cpp | 2 +-
.../dna_stat/src/DNAStatMSAProfileDialog.cpp | 2 +-
.../src/DistanceMatrixMSAProfileDialog.cpp | 2 +-
src/plugins/dotplot/src/DotPlotDialog.cpp | 2 +-
src/plugins/dotplot/src/DotPlotDialog.ui | 5 +-
src/plugins/dotplot/src/DotPlotFilesDialog.cpp | 2 +-
src/plugins/dotplot/src/DotPlotFilterDialog.cpp | 2 +-
.../enzymes/src/ConstructMoleculeDialog.cpp | 4 +-
src/plugins/enzymes/src/CreateFragmentDialog.cpp | 4 +-
src/plugins/enzymes/src/DNAFragment.cpp | 11 +-
src/plugins/enzymes/src/DNAFragment.h | 2 +-
src/plugins/enzymes/src/DigestSequenceDialog.cpp | 2 +-
src/plugins/enzymes/src/EditFragmentDialog.cpp | 2 +-
src/plugins/enzymes/src/EnzymesQuery.cpp | 2 +-
src/plugins/enzymes/src/FindEnzymesDialog.cpp | 2 +-
src/plugins/enzymes/src/FindEnzymesTask.cpp | 30 +-
src/plugins/enzymes/transl/english.ts | 24 +-
src/plugins/enzymes/transl/russian.ts | 24 +-
.../external_tool_support.pro | 38 +
.../external_tool_support.qrc | 1 +
.../external_tool_support/images/hmmer.png} | Bin
.../src/ExternalToolSupportPlugin.cpp | 224 +-
.../src/ExternalToolSupportPlugin.h | 3 +
.../src/ExternalToolSupportSettingsController.cpp | 2 +-
.../src/bigWigTools/BedGraphToBigWigTask.cpp | 3 +-
.../src/bigWigTools/BedGraphToBigWigTask.h | 2 +-
.../src/bigWigTools/BedGraphToBigWigWorker.cpp | 3 +-
.../src/blast/FormatDBSupport.h | 1 +
.../src/blast/FormatDBSupportRunDialog.cpp | 2 +-
.../src/blast/FormatDBSupportTask.cpp | 143 +-
.../src/blast/FormatDBSupportTask.h | 8 +
.../src/blast_plus/BlastDBCmdDialog.cpp | 2 +-
.../src/bowtie/BowtieTask.cpp | 8 +-
.../external_tool_support/src/bowtie/BowtieTask.h | 4 +-
.../src/bowtie/BowtieWorker.cpp | 4 -
.../src/bowtie/BowtieWorker.h | 1 -
.../src/bowtie2/Bowtie2Task.cpp | 8 +-
.../src/bowtie2/Bowtie2Task.h | 4 +-
.../src/bowtie2/Bowtie2Worker.cpp | 4 -
.../src/bowtie2/Bowtie2Worker.h | 1 -
.../external_tool_support/src/bwa/BwaMemWorker.cpp | 4 -
.../external_tool_support/src/bwa/BwaMemWorker.h | 1 -
.../external_tool_support/src/bwa/BwaTask.cpp | 26 +-
.../external_tool_support/src/bwa/BwaTask.h | 10 +-
.../external_tool_support/src/bwa/BwaWorker.cpp | 4 -
.../external_tool_support/src/bwa/BwaWorker.h | 1 -
.../src/cap3/CAP3SupportDialog.cpp | 2 +-
.../src/clustalo/ClustalOSupportRunDialog.cpp | 4 +-
.../src/clustalw/ClustalWSupportRunDialog.cpp | 4 +-
.../src/cutadapt/CutadaptWorker.cpp | 5 +-
.../src/hmmer/ConvertAlignment2StockholmTask.cpp | 121 +
.../src/hmmer/ConvertAlignment2StockholmTask.h} | 36 +-
.../src/hmmer/HmmerBuildDialog.cpp | 243 ++
.../src/hmmer/HmmerBuildDialog.h} | 62 +-
.../src/hmmer/HmmerBuildDialog.ui} | 4 +-
.../src/hmmer/HmmerBuildFromFileTask.cpp | 105 +
.../src/hmmer/HmmerBuildFromFileTask.h} | 54 +-
.../src/hmmer/HmmerBuildFromMsaTask.cpp | 116 +
.../src/hmmer/HmmerBuildFromMsaTask.h} | 37 +-
.../src/hmmer/HmmerBuildSettings.cpp | 71 +
.../src/hmmer/HmmerBuildSettings.h | 83 +
.../src/hmmer/HmmerBuildTask.cpp | 204 ++
.../src/hmmer/HmmerBuildTask.h} | 37 +-
.../src/hmmer/HmmerBuildWorker.cpp} | 167 +-
.../src/hmmer/HmmerBuildWorker.h} | 46 +-
.../src/hmmer/HmmerParseSearchResultsTask.cpp | 101 +
.../src/hmmer/HmmerParseSearchResultsTask.h | 79 +
.../src/hmmer/HmmerSearchDialog.cpp | 248 ++
.../src/hmmer/HmmerSearchDialog.h} | 54 +-
.../src/hmmer/HmmerSearchDialog.ui} | 8 +-
.../src/hmmer/HmmerSearchSettings.cpp | 64 +
.../src/hmmer/HmmerSearchSettings.h | 79 +
.../src/hmmer/HmmerSearchTask.cpp | 242 ++
.../src/hmmer/HmmerSearchTask.h | 68 +
.../src/hmmer/HmmerSearchWorker.cpp | 332 +++
.../src/hmmer/HmmerSearchWorker.h} | 57 +-
.../src/hmmer/HmmerSupport.cpp | 380 +++
.../external_tool_support/src/hmmer/HmmerSupport.h | 107 +
.../src/hmmer/PhmmerSearchDialog.cpp | 223 ++
.../src/hmmer/PhmmerSearchDialog.h} | 73 +-
.../src/hmmer/PhmmerSearchDialog.ui} | 45 +-
.../src/hmmer/PhmmerSearchSettings.cpp | 81 +
.../src/hmmer/PhmmerSearchSettings.h | 78 +
.../src/hmmer/PhmmerSearchTask.cpp | 228 ++
.../src/hmmer/PhmmerSearchTask.h | 67 +
.../external_tool_support/src/java/JavaSupport.cpp | 12 +-
.../external_tool_support/src/java/JavaSupport.h | 9 +
.../src/mafft/MAFFTSupportRunDialog.cpp | 4 +-
.../external_tool_support/src/phyml/PhyMLDialog.ui | 52 +-
.../src/snpeff/SnpEffSupport.cpp | 10 +-
.../src/snpeff/SnpEffTask.cpp | 7 +
.../src/snpeff/SnpEffWorker.cpp | 58 +-
.../src/tcoffee/TCoffeeSupportRunDialog.cpp | 4 +-
.../src/utils/BaseShortReadsAlignerWorker.cpp | 6 +-
.../src/utils/BaseShortReadsAlignerWorker.h | 1 -
.../src/utils/BlastRunCommonDialog.cpp | 2 +-
.../src/utils/ExportTasks.cpp | 88 +-
.../external_tool_support/src/utils/ExportTasks.h | 29 +-
.../src/utils/ExternalToolValidateTask.cpp | 4 +-
.../src/utils/ExternalToolValidateTask.h | 2 +
.../external_tool_support/transl/english.ts | 4 +-
.../external_tool_support/transl/russian.ts | 4 +-
.../src/GenomeAlignerSettingsController.cpp | 2 +-
.../opencl_support/src/OpenCLSupportPlugin.cpp | 2 +-
.../src/OpenCLSupportSettingsController.cpp | 2 +-
src/plugins/opencl_support/transl/english.ts | 23 +-
src/plugins/opencl_support/transl/russian.ts | 23 +-
src/plugins/orf_marker/src/ORFDialog.cpp | 2 +-
src/plugins/pcr/src/EditPrimerDialog.cpp | 2 +-
src/plugins/pcr/src/InSilicoPcrOPWidgetFactory.cpp | 2 +-
src/plugins/pcr/src/PrimerLibrarySelector.cpp | 2 +-
src/plugins/pcr/src/PrimerLibraryWidget.cpp | 2 +-
src/plugins/pcr/src/PrimersDetailsDialog.cpp | 2 +-
src/plugins/pcr/src/export/ExportPrimersDialog.cpp | 2 +-
src/plugins/pcr/src/import/ImportPrimersDialog.cpp | 2 +-
src/plugins/pcr/transl/russian.ts | 8 +-
src/plugins/query_designer/src/QDRunDialog.cpp | 4 +-
.../query_designer/src/QueryViewController.cpp | 2 +-
.../remote_blast/src/SendSelectionDialog.cpp | 2 +-
.../repeat_finder/src/FindRepeatsDialog.cpp | 2 +-
.../repeat_finder/src/FindTandemsDialog.cpp | 2 +-
.../weight_matrix/src/PWMBuildDialogController.cpp | 2 +-
.../src/PWMJASPARDialogController.cpp | 2 +-
.../src/PWMSearchDialogController.cpp | 2 +-
.../src/SetParametersDialogController.cpp | 2 +-
.../src/ViewMatrixDialogController.cpp | 2 +-
.../workflow_designer/src/CreateScriptWorker.cpp | 2 +-
.../src/DashboardsManagerDialog.cpp | 2 +-
.../workflow_designer/src/ImportSchemaDialog.cpp | 2 +-
.../src/PortAliasesConfigurationDialog.cpp | 2 +-
.../src/SchemaAliasesConfigurationDialogImpl.cpp | 2 +-
.../workflow_designer/src/StartupDialog.cpp | 2 +-
.../src/WorkflowDesignerPlugin.cpp | 3 +
.../workflow_designer/src/WorkflowMetaDialog.cpp | 2 +-
.../src/WorkflowSettingsController.cpp | 2 +-
.../src/WorkflowViewController.cpp | 1 +
.../src/library/BaseDocWriter.cpp | 1 +
.../ConvertSnpeffVariationsToAnnotationsWorker.cpp | 177 ++
.../ConvertSnpeffVariationsToAnnotationsWorker.h} | 143 +-
.../workflow_designer/src/library/CoreLib.cpp | 52 +-
.../src/library/FASTQWorkersLibrary.cpp | 15 +-
.../src/library/FilterBamWorker.cpp | 3 +-
.../src/library/PassFilterWorker.cpp | 2 +-
.../src/library/WriteAnnotationsWorker.cpp | 1 +
src/plugins/workflow_designer/transl/english.ts | 4 +-
src/plugins/workflow_designer/transl/russian.ts | 4 +-
.../workflow_designer/workflow_designer.pro | 5 +
src/plugins_3rdparty/gor4/src/gor.cpp | 2 +-
.../hmm2/src/u_build/HMMBuildDialogController.cpp | 2 +-
.../u_calibrate/HMMCalibrateDialogController.cpp | 2 +-
.../src/u_search/HMMSearchDialogController.cpp | 2 +-
src/plugins_3rdparty/hmm3/CMakeLists.txt | 5 -
src/plugins_3rdparty/hmm3/hmm3.license | 674 -----
src/plugins_3rdparty/hmm3/hmm3.pri | 22 -
src/plugins_3rdparty/hmm3/hmm3.pro | 152 --
src/plugins_3rdparty/hmm3/hmm3.qrc | 5 -
.../hmm3/src/build/uHMM3BuildDialogImpl.cpp | 243 --
.../hmm3/src/build/uHMM3BuildTask.cpp | 331 ---
.../hmm3/src/build/uHMM3BuildTask.h | 100 -
src/plugins_3rdparty/hmm3/src/build/uhmm3build.cpp | 120 -
.../hmm3/src/format/uHMMFormat.cpp | 337 ---
src/plugins_3rdparty/hmm3/src/format/uHMMFormat.h | 68 -
.../hmm3/src/format/uHMMFormatReader.cpp | 924 -------
.../hmm3/src/format/uHMMFormatReader.h | 71 -
.../hmm3/src/gobject/uHMMObject.cpp | 65 -
src/plugins_3rdparty/hmm3/src/gobject/uHMMObject.h | 60 -
src/plugins_3rdparty/hmm3/src/hmmer3/COPYRIGHT | 62 -
src/plugins_3rdparty/hmm3/src/hmmer3/build.cpp | 357 ---
.../hmm3/src/hmmer3/easel/COPYRIGHT | 27 -
src/plugins_3rdparty/hmm3/src/hmmer3/easel/LICENSE | 32 -
.../hmm3/src/hmmer3/easel/easel.cpp | 1490 ----------
src/plugins_3rdparty/hmm3/src/hmmer3/easel/easel.h | 377 ---
.../hmm3/src/hmmer3/easel/esl_alphabet.cpp | 1583 -----------
.../hmm3/src/hmmer3/easel/esl_alphabet.h | 133 -
.../hmm3/src/hmmer3/easel/esl_cluster.cpp | 197 --
.../hmm3/src/hmmer3/easel/esl_cluster.h | 12 -
.../hmm3/src/hmmer3/easel/esl_config.h | 142 -
.../hmm3/src/hmmer3/easel/esl_dirichlet.cpp | 1014 -------
.../hmm3/src/hmmer3/easel/esl_dirichlet.h | 75 -
.../hmm3/src/hmmer3/easel/esl_distance.cpp | 894 ------
.../hmm3/src/hmmer3/easel/esl_distance.h | 76 -
.../hmm3/src/hmmer3/easel/esl_dmatrix.cpp | 1271 ---------
.../hmm3/src/hmmer3/easel/esl_dmatrix.h | 90 -
.../hmm3/src/hmmer3/easel/esl_exponential.cpp | 442 ---
.../hmm3/src/hmmer3/easel/esl_exponential.h | 56 -
.../hmm3/src/hmmer3/easel/esl_gumbel.cpp | 920 -------
.../hmm3/src/hmmer3/easel/esl_gumbel.h | 60 -
.../hmm3/src/hmmer3/easel/esl_histogram.cpp | 1684 ------------
.../hmm3/src/hmmer3/easel/esl_histogram.h | 130 -
.../hmm3/src/hmmer3/easel/esl_hmm.cpp | 470 ----
.../hmm3/src/hmmer3/easel/esl_hmm.h | 62 -
.../hmm3/src/hmmer3/easel/esl_keyhash.cpp | 813 ------
.../hmm3/src/hmmer3/easel/esl_keyhash.h | 70 -
.../hmm3/src/hmmer3/easel/esl_minimizer.cpp | 661 -----
.../hmm3/src/hmmer3/easel/esl_minimizer.h | 35 -
.../hmm3/src/hmmer3/easel/esl_msa.cpp | 2886 --------------------
.../hmm3/src/hmmer3/easel/esl_msa.h | 213 --
.../hmm3/src/hmmer3/easel/esl_msacluster.cpp | 237 --
.../hmm3/src/hmmer3/easel/esl_msacluster.h | 22 -
.../hmm3/src/hmmer3/easel/esl_msaweight.cpp | 525 ----
.../hmm3/src/hmmer3/easel/esl_msaweight.h | 28 -
.../hmm3/src/hmmer3/easel/esl_random.cpp | 736 -----
.../hmm3/src/hmmer3/easel/esl_random.h | 59 -
.../hmm3/src/hmmer3/easel/esl_randomseq.cpp | 1154 --------
.../hmm3/src/hmmer3/easel/esl_randomseq.h | 51 -
.../hmm3/src/hmmer3/easel/esl_ratematrix.cpp | 552 ----
.../hmm3/src/hmmer3/easel/esl_ratematrix.h | 36 -
.../hmm3/src/hmmer3/easel/esl_rootfinder.cpp | 348 ---
.../hmm3/src/hmmer3/easel/esl_rootfinder.h | 60 -
.../hmm3/src/hmmer3/easel/esl_scorematrix.cpp | 974 -------
.../hmm3/src/hmmer3/easel/esl_scorematrix.h | 79 -
.../hmm3/src/hmmer3/easel/esl_sq.cpp | 1848 -------------
.../hmm3/src/hmmer3/easel/esl_sq.h | 198 --
.../hmm3/src/hmmer3/easel/esl_sse.cpp | 276 --
.../hmm3/src/hmmer3/easel/esl_sse.h | 224 --
.../hmm3/src/hmmer3/easel/esl_stack.cpp | 396 ---
.../hmm3/src/hmmer3/easel/esl_stack.h | 61 -
.../hmm3/src/hmmer3/easel/esl_stats.cpp | 556 ----
.../hmm3/src/hmmer3/easel/esl_stats.h | 30 -
.../hmm3/src/hmmer3/easel/esl_tree.cpp | 1218 ---------
.../hmm3/src/hmmer3/easel/esl_tree.h | 130 -
.../hmm3/src/hmmer3/easel/esl_vectorops.cpp | 1144 --------
.../hmm3/src/hmmer3/easel/esl_vectorops.h | 121 -
.../hmm3/src/hmmer3/easel/esl_wuss.cpp | 429 ---
.../hmm3/src/hmmer3/easel/esl_wuss.h | 28 -
src/plugins_3rdparty/hmm3/src/hmmer3/emit.cpp | 383 ---
src/plugins_3rdparty/hmm3/src/hmmer3/errors.cpp | 59 -
src/plugins_3rdparty/hmm3/src/hmmer3/evalues.cpp | 500 ----
src/plugins_3rdparty/hmm3/src/hmmer3/eweight.cpp | 98 -
src/plugins_3rdparty/hmm3/src/hmmer3/hmmer.cpp | 177 --
src/plugins_3rdparty/hmm3/src/hmmer3/hmmer.h | 1149 --------
.../hmm3/src/hmmer3/hmmer3_funcs.cpp | 47 -
.../hmm3/src/hmmer3/hmmer3_funcs.h | 31 -
.../hmm3/src/hmmer3/impl_sse/decoding.cpp | 204 --
.../hmm3/src/hmmer3/impl_sse/fwdback.cpp | 762 ------
.../hmm3/src/hmmer3/impl_sse/impl_sse.h | 410 ---
.../hmm3/src/hmmer3/impl_sse/msvfilter.cpp | 221 --
.../hmm3/src/hmmer3/impl_sse/null2.cpp | 230 --
.../hmm3/src/hmmer3/impl_sse/optacc.cpp | 431 ---
.../hmm3/src/hmmer3/impl_sse/p7_omx.cpp | 632 -----
.../hmm3/src/hmmer3/impl_sse/p7_oprofile.cpp | 1453 ----------
.../hmm3/src/hmmer3/impl_sse/stotrace.cpp | 301 --
.../hmm3/src/hmmer3/impl_sse/vitfilter.cpp | 265 --
src/plugins_3rdparty/hmm3/src/hmmer3/logsum.cpp | 116 -
.../hmm3/src/hmmer3/modelconfig.cpp | 567 ----
.../hmm3/src/hmmer3/modelstats.cpp | 233 --
.../hmm3/src/hmmer3/p7_alidisplay.cpp | 385 ---
src/plugins_3rdparty/hmm3/src/hmmer3/p7_bg.cpp | 302 --
.../hmm3/src/hmmer3/p7_builder.cpp | 668 -----
src/plugins_3rdparty/hmm3/src/hmmer3/p7_config.h | 150 -
.../hmm3/src/hmmer3/p7_domaindef.cpp | 783 ------
src/plugins_3rdparty/hmm3/src/hmmer3/p7_gmx.cpp | 305 ---
src/plugins_3rdparty/hmm3/src/hmmer3/p7_hmm.cpp | 1222 ---------
.../hmm3/src/hmmer3/p7_pipeline.cpp | 794 ------
src/plugins_3rdparty/hmm3/src/hmmer3/p7_prior.cpp | 443 ---
.../hmm3/src/hmmer3/p7_profile.cpp | 608 -----
.../hmm3/src/hmmer3/p7_spensemble.cpp | 459 ----
.../hmm3/src/hmmer3/p7_tophits.cpp | 787 ------
src/plugins_3rdparty/hmm3/src/hmmer3/p7_trace.cpp | 1538 -----------
src/plugins_3rdparty/hmm3/src/hmmer3/seqmodel.cpp | 111 -
.../hmm3/src/hmmer3/tracealign.cpp | 816 ------
.../hmm3/src/phmmer/uHMM3PhmmerDialogImpl.cpp | 227 --
.../hmm3/src/phmmer/uhmm3PhmmerTask.cpp | 474 ----
.../hmm3/src/phmmer/uhmm3PhmmerTask.h | 153 --
.../hmm3/src/phmmer/uhmm3phmmer.cpp | 262 --
src/plugins_3rdparty/hmm3/src/phmmer/uhmm3phmmer.h | 95 -
.../hmm3/src/search/Hmmer3SearchWorkflowTask.cpp | 134 -
.../hmm3/src/search/Hmmer3SearchWorkflowTask.h | 63 -
.../hmm3/src/search/uHMM3SearchDialogImpl.cpp | 246 --
.../hmm3/src/search/uHMM3SearchTask.cpp | 550 ----
.../hmm3/src/search/uHMM3SearchTask.h | 171 --
.../hmm3/src/search/uhmm3QDActor.cpp | 294 --
.../hmm3/src/search/uhmm3QDActor.h | 60 -
.../hmm3/src/search/uhmm3SearchResult.cpp | 106 -
.../hmm3/src/search/uhmm3SearchResult.h | 79 -
.../hmm3/src/search/uhmm3search.cpp | 157 --
.../task_local_storage/uHMMSearchTaskLocalData.h | 37 -
.../uHMMSearchTaskLocalStorage.cpp | 73 -
.../hmm3/src/tests/uhmmer3BuildTests.cpp | 429 ---
.../hmm3/src/tests/uhmmer3BuildTests.h | 104 -
.../hmm3/src/tests/uhmmer3PhmmerTests.cpp | 260 --
.../hmm3/src/tests/uhmmer3PhmmerTests.h | 104 -
.../hmm3/src/tests/uhmmer3SearchTests.cpp | 752 -----
.../hmm3/src/tests/uhmmer3SearchTests.h | 146 -
.../hmm3/src/tests/uhmmer3Tests.cpp | 41 -
src/plugins_3rdparty/hmm3/src/uHMM3Plugin.cpp | 293 --
src/plugins_3rdparty/hmm3/src/uHMM3Plugin.h | 88 -
.../hmm3/src/util/uhmm3Utilities.cpp | 246 --
.../hmm3/src/util/uhmm3Utilities.h | 59 -
.../hmm3/src/workers/HMM3IOWorker.cpp | 322 ---
.../hmm3/src/workers/HMM3IOWorker.h | 137 -
.../hmm3/src/workers/HMM3SearchWorker.cpp | 355 ---
src/plugins_3rdparty/hmm3/transl/english.ts | 2209 ---------------
src/plugins_3rdparty/hmm3/transl/russian.ts | 2206 ---------------
.../kalign/src/KalignDialogController.cpp | 4 +-
src/plugins_3rdparty/primer3/src/Primer3Dialog.cpp | 2 +-
.../sitecon/src/SiteconBuildDialogController.cpp | 2 +-
.../sitecon/src/SiteconSearchDialogController.cpp | 2 +-
.../umuscle/src/MuscleAlignDialogController.cpp | 4 +-
src/ugene_version.pri | 8 +-
src/ugenem/src/SendReportDialog.cpp | 1 -
src/ugeneui/html/version_news.html | 25 +-
.../DirectoriesSettingsGUIController.cpp | 2 +-
.../FormatSettingsGUIController.cpp | 2 +-
.../logview_settings/LogSettingsGUIController.cpp | 2 +-
.../NetworkSettingsGUIController.cpp | 2 +-
.../ResourceSettingsGUIController.cpp | 2 +-
.../UserApplicationsSettingsGUIController.cpp | 2 +-
src/ugeneui/src/main_window/ShutdownTask.cpp | 131 +-
src/ugeneui/src/main_window/ShutdownTask.h | 24 +-
.../DocumentFormatSelectorController.cpp | 2 +-
.../DocumentProviderSelectorController.cpp | 2 +-
.../DocumentReadingModeSelectorController.cpp | 2 +-
.../ExportProjectDialogController.cpp | 2 +-
...tipleDocumentsReadingModeSelectorController.cpp | 2 +-
.../src/project_support/ProjectLoaderImpl.cpp | 2 +-
.../src/project_support/ProjectTasksGui.cpp | 10 +-
src/ugeneui/src/project_support/ProjectTasksGui.h | 1 -
ugene.pro | 4 -
536 files changed, 8534 insertions(+), 61694 deletions(-)
diff --git a/data/cmdline/hmm3-search.uwl b/data/cmdline/hmm3-build-and-search.uwl
similarity index 57%
rename from data/cmdline/hmm3-search.uwl
rename to data/cmdline/hmm3-build-and-search.uwl
index 97e45cb..cc02426 100644
--- a/data/cmdline/hmm3-search.uwl
+++ b/data/cmdline/hmm3-build-and-search.uwl
@@ -1,18 +1,23 @@
#@UGENE_WORKFLOW
-#The workflow searches for HMMER signals in a sequence.
-#The output is a file with signal annotations.
+#Builds a new profile HMM from input alignment, calibrates the HMM and saves to a file. Then runs a test HMM search over sample sequence and saves test results to Genbank file. To run this workflow, you need to specify appropriate locations for input/output files. This is achieved by selecting a task and editing interesting parameters in Property Inspector panel.Optionally, fine tune the build/search parameters as you see fit. Then schedule the workflow for execution by pressing CTRL+R s [...]
-workflow "HMMER 3 search"{
+workflow "New workflow"{
+ hmm3-build {
+ type:hmm3-build;
+ name:"HMM3 Build";
+ }
hmm3-search {
type:hmm3-search;
name:"HMM3 Search";
}
- hmm3-read-profile {
- type:hmm3-read-profile;
- name:"Read HMM3 Profile";
+ write-annotations {
+ type:write-annotations;
+ name:"Write Annotations";
+ url-out:"";
+ write-mode:0;
}
read-sequence {
type:read-sequence;
@@ -21,49 +26,33 @@ workflow "HMMER 3 search"{
dataset:"Dataset 1";
}
}
- write-annotations {
- type:write-annotations;
- name:"Write Annotations";
- write-mode:1;
+ read-msa {
+ type:read-msa;
+ name:"Read Alignment";
+ url-in {
+ dataset:"Dataset 1";
+ }
}
.actor-bindings {
- read-sequence.out-sequence->hmm3-search.in-sequence
+ read-msa.out-msa->hmm3-build.in-msa
+ hmm3-build.out-hmm3->hmm3-search.in-hmm3
hmm3-search.out-annotations->write-annotations.in-annotations
- hmm3-read-profile.out-hmm3->hmm3-search.in-hmm3
+ read-sequence.out-sequence->hmm3-search.in-sequence
}
- hmm3-read-profile.hmm3-profile->hmm3-search.in-hmm3.hmm3-profile
+ read-msa.msa->hmm3-build.in-msa.msa
+ hmm3-build.url->hmm3-search.in-hmm3.url
read-sequence.sequence->hmm3-search.in-sequence.sequence
hmm3-search.annotations->write-annotations.in-annotations.annotations
- read-sequence.annotations->write-annotations.in-annotations.annotations
- read-sequence.sequence->write-annotations.in-annotations.sequence
.meta {
parameter-aliases {
- hmm3-search.F1 {
- alias:F1;
- }
- hmm3-search.F2 {
- alias:F2;
- }
- hmm3-search.F3 {
- alias:F3;
- }
- hmm3-search.domZ {
- alias:domZ;
- }
hmm3-search.e-val {
alias:domE;
}
- hmm3-search.max {
- alias:max;
- }
- hmm3-search.nobias {
- alias:nobias;
- }
- hmm3-search.nonull2 {
- alias:nonull2;
+ hmm3-search.filter-by {
+ alias:filter_by;
}
hmm3-search.result-name {
alias:annotation-name;
@@ -74,55 +63,62 @@ workflow "HMMER 3 search"{
hmm3-search.seed {
alias:seed;
}
- hmm3-search.threshold-type {
- alias:threshold-type;
- }
- hmm3-read-profile.url-in {
- alias:hmm;
- }
- read-sequence.url-in {
- alias:in;
- }
write-annotations.document-format {
alias:format;
}
write-annotations.url-out {
alias:out;
}
+ read-sequence.url-in {
+ alias:in_sequence;
+ }
+ read-msa.url-in {
+ alias:in_alignment;
+ }
}
visual {
- hmm3-read-profile {
- pos:"-960 -690";
+ hmm3-build {
+ pos:"-780 -570";
style:ext;
bg-color-ext:"0 128 128 64";
+ in-msa.angle:180;
out-hmm3.angle:360;
}
hmm3-search {
- pos:"-720 -600";
+ pos:"-600 -435";
style:ext;
bg-color-ext:"0 128 128 64";
in-hmm3.angle:150;
in-sequence.angle:210;
out-annotations.angle:360;
}
+ read-msa {
+ pos:"-975 -570";
+ style:ext;
+ bg-color-ext:"0 128 128 64";
+ out-msa.angle:360;
+ }
read-sequence {
- pos:"-960 -540";
+ pos:"-780 -420";
style:ext;
bg-color-ext:"0 128 128 64";
out-sequence.angle:360;
}
write-annotations {
- pos:"-420 -600";
+ pos:"-360 -435";
style:ext;
bg-color-ext:"0 128 128 64";
in-annotations.angle:180;
}
- hmm3-read-profile.out-hmm3->hmm3-search.in-hmm3 {
+ hmm3-build.out-hmm3->hmm3-search.in-hmm3 {
text-pos:"-35 -24";
}
hmm3-search.out-annotations->write-annotations.in-annotations {
text-pos:"-45 -37";
}
+ read-msa.out-msa->hmm3-build.in-msa {
+ text-pos:"-45 -50";
+ }
read-sequence.out-sequence->hmm3-search.in-sequence {
text-pos:"-27.5 -24";
}
diff --git a/data/workflow_samples/NGS/call_variants_full.uwl b/data/workflow_samples/NGS/call_variants_full.uwl
new file mode 100644
index 0000000..316aa38
--- /dev/null
+++ b/data/workflow_samples/NGS/call_variants_full.uwl
@@ -0,0 +1,413 @@
+#@UGENE_WORKFLOW
+#Call variants for an input assembly and a reference sequence using SAMtools mpileup and bcftool.
+#Predict effects of the found variants using SnpEff.
+
+
+
+workflow "Variant calling and effects prediction"{
+
+ read-assembly {
+ type:get-file-list;
+ name:"Read Assembly (BAM/SAM)";
+ url-in {
+ dataset:Dataset;
+ }
+ }
+ read-sequence {
+ type:get-file-list;
+ name:"Read Sequence";
+ url-in {
+ dataset:Dataset;
+ }
+ }
+ call_variants {
+ type:call_variants;
+ name:"Call Variants";
+ max-dep:100;
+ reference-source:port;
+ variants-url:variants.vcf;
+ }
+ files-conversion-1 {
+ type:files-conversion;
+ name:"To FASTA";
+ document-format:fasta;
+ }
+ files-conversion-2 {
+ type:files-conversion;
+ name:"To BAM";
+ document-format:bam;
+ }
+ rename-chromosome-in-variation {
+ type:rename-chromosome-in-variation;
+ name:"Chr Notation";
+ prefix-replace-with:chr;
+ prefixes-to-replace:"NC_00000;NC_0000";
+ }
+ seff {
+ type:seff;
+ name:"Annotate and Predict Effects with SnpEff";
+ }
+ convert-snpeff-variations-to-annotations {
+ type:convert-snpeff-variations-to-annotations;
+ name:"To Annotations";
+ }
+
+ .actor-bindings {
+ call_variants.out-variations->rename-chromosome-in-variation.in-file
+ files-conversion-2.out-file->call_variants.in-assembly
+ read-sequence.out-url->files-conversion-1.in-file
+ files-conversion-1.out-file->call_variants.in-sequence
+ seff.out-file->convert-snpeff-variations-to-annotations.in-variations-url
+ read-assembly.out-url->files-conversion-2.in-file
+ rename-chromosome-in-variation.out-file->seff.in-file
+ }
+
+ read-assembly.dataset->call_variants.in-assembly.dataset
+ files-conversion-2.url->call_variants.in-assembly.url
+ files-conversion-1.url->call_variants.in-sequence.url
+ read-sequence.url->files-conversion-1.in-file.url
+ read-assembly.url->files-conversion-2.in-file.url
+ call_variants.url->rename-chromosome-in-variation.in-file.url
+ rename-chromosome-in-variation.url->seff.in-file.url
+ seff.url->convert-snpeff-variations-to-annotations.in-variations-url.url
+
+ .meta {
+ parameter-aliases {
+ read-assembly.url-in {
+ alias:assembly_in;
+ }
+ read-sequence.url-in {
+ alias:seq_in;
+ }
+ call_variants.acgt_only {
+ alias:a_c_g_t;
+ }
+ call_variants.bcf_bed {
+ alias:bcf_bed;
+ }
+ call_variants.bed {
+ alias:bed_pos_list_file;
+ }
+ call_variants.disable_baq {
+ alias:BAQ;
+ }
+ call_variants.keepalt {
+ alias:retain_all_pos_alt;
+ }
+ call_variants.max_depth {
+ alias:max_reads_num;
+ }
+ call_variants.no_indel {
+ alias:no_indels;
+ }
+ call_variants.ptype {
+ alias:allele_freq_spect;
+ }
+ call_variants.theta {
+ alias:theta;
+ }
+ call_variants.variants-url {
+ alias:out_file;
+ }
+ }
+ visual {
+ call_variants {
+ pos:"-465 -510";
+ style:ext;
+ bg-color-ext:"0 129 94 64";
+ bounds:"-30 -30 123.125 72";
+ in-assembly.angle:176.73;
+ in-sequence.angle:235.098;
+ out-variations.angle:351.674;
+ }
+ convert-snpeff-variations-to-annotations {
+ pos:"-450 -285";
+ style:simple;
+ bg-color-simple:"234 223 240 255";
+ in-variations-url.angle:3.25195;
+ }
+ files-conversion-1 {
+ pos:"-615 -450";
+ style:simple;
+ bg-color-simple:"84 84 84 255";
+ in-file.angle:196.858;
+ out-file.angle:6.5198;
+ }
+ files-conversion-2 {
+ pos:"-615 -555";
+ style:simple;
+ bg-color-simple:"84 84 84 255";
+ in-file.angle:170.538;
+ out-file.angle:334.799;
+ }
+ read-assembly {
+ pos:"-870 -600";
+ style:ext;
+ bg-color-ext:"24 102 175 64";
+ bounds:"-30 -30 111.375 66";
+ out-url.angle:360;
+ }
+ read-sequence {
+ pos:"-885 -435";
+ style:ext;
+ bg-color-ext:"24 102 175 64";
+ bounds:"-30 -30 110.25 62";
+ out-url.angle:349.061;
+ }
+ rename-chromosome-in-variation {
+ pos:"-210 -495";
+ style:simple;
+ bg-color-simple:"232 227 230 255";
+ in-file.angle:180;
+ out-file.angle:270.785;
+ }
+ seff {
+ pos:"-270 -285";
+ style:ext;
+ bg-color-ext:"0 129 94 64";
+ in-file.angle:28.6105;
+ out-file.angle:181.848;
+ }
+ call_variants.out-variations->rename-chromosome-in-variation.in-file {
+ text-pos:"-45 -43";
+ }
+ files-conversion-1.out-file->call_variants.in-sequence {
+ text-pos:"-19 -31";
+ }
+ files-conversion-2.out-file->call_variants.in-assembly {
+ text-pos:"-12 -24";
+ }
+ read-assembly.out-url->files-conversion-2.in-file {
+ text-pos:"-37 -11";
+ }
+ read-sequence.out-url->files-conversion-1.in-file {
+ text-pos:"-31 -21";
+ }
+ rename-chromosome-in-variation.out-file->seff.in-file {
+ text-pos:"-45 -43";
+ }
+ seff.out-file->convert-snpeff-variations-to-annotations.in-variations-url {
+ text-pos:"-45 -43";
+ }
+ }
+ wizard {
+ name:"Call Variants Wizard";
+ page {
+ id:1;
+ next:2;
+ title:"Input reference sequence and assembly";
+ parameters-area {
+ group {
+ title:"Input files";
+ lineedit-and-dataset {
+ read-sequence.url-in {
+ label:"Reference sequence file";
+ }
+ read-assembly.url-in {
+ label:"BAM/SAM file";
+ }
+ }
+ }
+ }
+ }
+ page {
+ id:2;
+ next:3;
+ title:"SAMtools <i>mpileup</i> parameters";
+ parameters-area {
+ group {
+ title:Parameters;
+ label-size:200;
+ call_variants.use_orphan {
+ }
+ call_variants.disable_baq {
+ }
+ call_variants.capq_thres {
+ }
+ call_variants.max_depth {
+ }
+ call_variants.ext_baq {
+ }
+ call_variants.bed {
+ }
+ call_variants.reg {
+ }
+ call_variants.min_mq {
+ }
+ call_variants.min_baseq {
+ }
+ }
+ group {
+ title:Additional;
+ label-size:200;
+ type:hideable;
+ call_variants.illumina13-encoding {
+ }
+ call_variants.extQ {
+ }
+ call_variants.tandemQ {
+ }
+ call_variants.no_indel {
+ }
+ call_variants.max_indel_depth {
+ }
+ call_variants.openQ {
+ }
+ call_variants.pl_list {
+ }
+ }
+ }
+ }
+ page {
+ id:3;
+ next:4;
+ title:"SAMtools <i>bcftools view</i> parameters";
+ parameters-area {
+ group {
+ title:Parameters;
+ label-size:200;
+ call_variants.keepalt {
+ }
+ call_variants.fix_pl {
+ }
+ call_variants.no_geno {
+ }
+ call_variants.acgt_only {
+ }
+ call_variants.bcf_bed {
+ }
+ call_variants.qcall {
+ }
+ call_variants.samples {
+ }
+ call_variants.min_smpl_frac {
+ }
+ call_variants.call_gt {
+ }
+ }
+ group {
+ title:Additional;
+ label-size:200;
+ type:hideable;
+ call_variants.indel_frac {
+ }
+ call_variants.pref {
+ }
+ call_variants.ptype {
+ }
+ call_variants.theta {
+ }
+ call_variants.ccall {
+ }
+ call_variants.n1 {
+ }
+ call_variants.n_perm {
+ }
+ call_variants.min_perm_p {
+ }
+ }
+ }
+ }
+ page {
+ id:4;
+ next:5;
+ title:"SAMtools <i>vcfutils varFilter</i> parameters";
+ parameters-area {
+ group {
+ title:Parameters;
+ label-size:200;
+ call_variants.print-filtered {
+ }
+ call_variants.min-qual {
+ }
+ call_variants.min-dep {
+ }
+ call_variants.max-dep {
+ }
+ call_variants.min-alt-bases {
+ }
+ call_variants.gap-size {
+ }
+ call_variants.window {
+ }
+ }
+ group {
+ title:Additional;
+ label-size:200;
+ type:hideable;
+ call_variants.min-strand {
+ }
+ call_variants.min-baseQ {
+ }
+ call_variants.min-mapQ {
+ }
+ call_variants.min-end-distance {
+ }
+ call_variants.min-hwe {
+ }
+ }
+ }
+ }
+ page {
+ id:5;
+ next:6;
+ title:"Change chromosome notation for variations";
+ parameters-area {
+ group {
+ title:Parameters;
+ label-size:150;
+ rename-chromosome-in-variation.prefixes-to-replace {
+ }
+ rename-chromosome-in-variation.prefix-replace-with {
+ }
+ }
+ }
+ }
+ page {
+ id:6;
+ next:7;
+ title:"SnpEff parameters";
+ parameters-area {
+ group {
+ title:Parameters;
+ label-size:200;
+ seff.genome {
+ }
+ }
+ group {
+ title:Additional;
+ label-size:200;
+ type:hideable;
+ seff.canon {
+ }
+ seff.hgvs {
+ }
+ seff.lof {
+ }
+ seff.motif {
+ }
+ seff.updown-length {
+ }
+ }
+ }
+ }
+ page {
+ id:7;
+ title:"Output files";
+ parameters-area {
+ group {
+ title:"Output variants";
+ label-size:110;
+ call_variants.variants-url {
+ label:"Output VCF file"
+ }
+ convert-snpeff-variations-to-annotations.url-out {
+ label:"Output GenBank file"
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
diff --git a/data/workflow_samples/NGS/fastqc.uwl b/data/workflow_samples/NGS/fastqc.uwl
index 4305555..40ab17d 100644
--- a/data/workflow_samples/NGS/fastqc.uwl
+++ b/data/workflow_samples/NGS/fastqc.uwl
@@ -68,7 +68,7 @@ workflow "Quality control by FastQC"{
title:"Input parameters";
label-size:120;
get-file-list.url-in {
- label:"FASTQ URL(s)";
+ label:"Input file(s)";
}
}
group {
diff --git a/data/workflow_samples/NGS/raw_ngs/chipseq/chipseq_paired.uwl b/data/workflow_samples/NGS/raw_ngs/chipseq/chipseq_paired.uwl
index 127e242..2c700df 100644
--- a/data/workflow_samples/NGS/raw_ngs/chipseq/chipseq_paired.uwl
+++ b/data/workflow_samples/NGS/raw_ngs/chipseq/chipseq_paired.uwl
@@ -27,14 +27,13 @@ workflow "Processing of raw ChIP-Seq paired-end reads"{
type:CASAVAFilter;
name:"Filter Reads by CASAVA Header";
custom-dir:filtered_fastq;
- out-mode:1;
}
QualityTrim {
type:QualityTrim;
name:"Trim Reads by Quality";
custom-dir:filtered_fastq;
- out-mode:0;
len-id:1;
+ out-mode:0;
qual-id:20;
}
get-file-list {
@@ -53,8 +52,8 @@ workflow "Processing of raw ChIP-Seq paired-end reads"{
files-conversion {
type:files-conversion;
name:"Convert to BAM";
- out-mode:0;
document-format:bam;
+ out-mode:0;
}
merge-bam {
type:merge-bam;
@@ -65,8 +64,8 @@ workflow "Processing of raw ChIP-Seq paired-end reads"{
type:filter-bam;
name:"Filter Merged BAM File with SAMtools";
flag:"The read is unmapped";
- out-mode:0;
mapq:1;
+ out-mode:0;
}
rmdup-bam {
type:rmdup-bam;
@@ -77,8 +76,8 @@ workflow "Processing of raw ChIP-Seq paired-end reads"{
files-conversion-1 {
type:files-conversion;
name:"Convert to BED Format";
- out-mode:0;
document-format:bed;
+ out-mode:0;
}
Sort-bam {
type:Sort-bam;
@@ -101,14 +100,13 @@ workflow "Processing of raw ChIP-Seq paired-end reads"{
type:CASAVAFilter;
name:"Filter Reads by CASAVA Header";
custom-dir:filtered_fastq;
- out-mode:1;
}
QualityTrim-1 {
type:QualityTrim;
name:"Trim Reads by Quality";
custom-dir:filtered_fastq;
- out-mode:0;
len-id:1;
+ out-mode:0;
qual-id:20;
}
CutAdaptFastq {
@@ -124,44 +122,45 @@ workflow "Processing of raw ChIP-Seq paired-end reads"{
fastqc {
type:fastqc;
name:"FastQC Quality Control";
- out-mode:1;
}
fastqc-1 {
type:fastqc;
name:"FastQC Quality Control";
- out-mode:1;
}
fastqc-2 {
type:fastqc;
name:"FastQC Quality Control";
- out-mode:1;
}
fastqc-2-1 {
type:fastqc;
name:"FastQC Quality Control";
- out-mode:1;
+ }
+ fastqc-1-1 {
+ type:fastqc;
+ name:"FastQC Quality Control";
}
.actor-bindings {
+ get-file-list.out-url->CASAVAFilter.in-file
+ get-file-list.out-url->fastqc.in-file
+ CASAVAFilter-1.out-file->CutAdaptFastq-1.in-file
QualityTrim.out-file->bwamem-id.in-data
QualityTrim.out-file->fastqc-2.in-file
- bwamem-id.out-data->files-conversion.in-file
- QualityTrim-1.out-file->bwamem-id.in-data-paired
- QualityTrim-1.out-file->fastqc-2-1.in-file
- CutAdaptFastq.out-file->QualityTrim.in-file
CutAdaptFastq-1.out-file->QualityTrim-1.in-file
- merge-bam.out-file->filter-bam.in-file
- get-file-list-1.out-url->CASAVAFilter-1.in-file
- get-file-list-1.out-url->fastqc-1.in-file
- CASAVAFilter-1.out-file->CutAdaptFastq-1.in-file
- CASAVAFilter.out-file->CutAdaptFastq.in-file
Sort-bam.out-file->files-conversion-1.in-file
- files-conversion.out-file->merge-bam.in-file
- get-file-list.out-url->CASAVAFilter.in-file
- get-file-list.out-url->fastqc.in-file
+ Sort-bam.out-file->fastqc-1-1.in-file
+ rmdup-bam.out-file->Sort-bam.in-file
filter-bam.out-file->Sort-bam-1.in-file
Sort-bam-1.out-file->rmdup-bam.in-file
- rmdup-bam.out-file->Sort-bam.in-file
+ merge-bam.out-file->filter-bam.in-file
+ QualityTrim-1.out-file->bwamem-id.in-data-paired
+ QualityTrim-1.out-file->fastqc-2-1.in-file
+ files-conversion.out-file->merge-bam.in-file
+ CASAVAFilter.out-file->CutAdaptFastq.in-file
+ get-file-list-1.out-url->CASAVAFilter-1.in-file
+ get-file-list-1.out-url->fastqc-1.in-file
+ CutAdaptFastq.out-file->QualityTrim.in-file
+ bwamem-id.out-data->files-conversion.in-file
}
get-file-list.url->CASAVAFilter.in-file.url
@@ -183,6 +182,7 @@ workflow "Processing of raw ChIP-Seq paired-end reads"{
get-file-list-1.url->fastqc-1.in-file.url
QualityTrim.url->fastqc-2.in-file.url
QualityTrim-1.url->fastqc-2-1.in-file.url
+ Sort-bam.url->fastqc-1-1.in-file.url
.meta {
parameter-aliases {
@@ -252,14 +252,14 @@ workflow "Processing of raw ChIP-Seq paired-end reads"{
}
visual {
CASAVAFilter {
- pos:"-840 -645";
+ pos:"-810 -615";
style:ext;
bg-color-ext:"194 0 0 64";
in-file.angle:186.911;
out-file.angle:356.112;
}
CASAVAFilter-1 {
- pos:"-840 -465";
+ pos:"-810 -435";
style:ext;
bg-color-ext:"194 0 0 64";
bounds:"-30 -30 146.375 97";
@@ -267,7 +267,7 @@ workflow "Processing of raw ChIP-Seq paired-end reads"{
out-file.angle:356.112;
}
CutAdaptFastq {
- pos:"-570 -630";
+ pos:"-540 -600";
style:ext;
bg-color-ext:"236 177 178 64";
bounds:"-30 -30 99.375 89";
@@ -275,7 +275,7 @@ workflow "Processing of raw ChIP-Seq paired-end reads"{
out-file.angle:360;
}
CutAdaptFastq-1 {
- pos:"-570 -450";
+ pos:"-540 -420";
style:ext;
bg-color-ext:"236 177 178 64";
bounds:"-30 -30 96.5 92";
@@ -283,14 +283,14 @@ workflow "Processing of raw ChIP-Seq paired-end reads"{
out-file.angle:360;
}
QualityTrim {
- pos:"-330 -645";
+ pos:"-300 -615";
style:ext;
bg-color-ext:"204 68 102 64";
in-file.angle:190.305;
out-file.angle:356.558;
}
QualityTrim-1 {
- pos:"-330 -465";
+ pos:"-300 -435";
style:ext;
bg-color-ext:"204 68 102 64";
bounds:"-30 -30 149.375 87";
@@ -298,21 +298,21 @@ workflow "Processing of raw ChIP-Seq paired-end reads"{
out-file.angle:356.558;
}
Sort-bam {
- pos:"-120 -45";
+ pos:"-90 -15";
style:simple;
bg-color-simple:"84 84 84 255";
in-file.angle:176.82;
out-file.angle:360;
}
Sort-bam-1 {
- pos:"-570 -45";
+ pos:"-540 -15";
style:simple;
bg-color-simple:"84 84 84 255";
in-file.angle:88.5312;
out-file.angle:2.38594;
}
bwamem-id {
- pos:"-30 -540";
+ pos:"0 -510";
style:ext;
bg-color-ext:"0 128 0 64";
bounds:"-30 -30 105 92";
@@ -321,73 +321,79 @@ workflow "Processing of raw ChIP-Seq paired-end reads"{
out-data.angle:296.565;
}
fastqc {
- pos:"-840 -825";
+ pos:"-810 -795";
style:simple;
bg-color-simple:"0 128 128 255";
in-file.angle:231.34;
}
fastqc-1 {
- pos:"-840 -270";
+ pos:"-810 -240";
style:simple;
bg-color-simple:"0 128 128 255";
in-file.angle:147.529;
}
+ fastqc-1-1 {
+ pos:"-15 105";
+ style:simple;
+ bg-color-simple:"0 128 128 255";
+ in-file.angle:108.435;
+ }
fastqc-2 {
- pos:"-30 -750";
+ pos:"0 -720";
style:simple;
bg-color-simple:"0 128 128 255";
in-file.angle:207.699;
}
fastqc-2-1 {
- pos:"-75 -345";
+ pos:"-45 -315";
style:simple;
bg-color-simple:"0 128 128 255";
in-file.angle:149.621;
}
files-conversion {
- pos:"0 -270";
+ pos:"0 -240";
style:simple;
bg-color-simple:"84 84 84 255";
in-file.angle:88.2643;
out-file.angle:183.576;
}
files-conversion-1 {
- pos:"60 -60";
+ pos:"60 -30";
style:ext;
bg-color-ext:"64 26 96 64";
in-file.angle:214.778;
out-file.angle:360;
}
filter-bam {
- pos:"-600 -270";
+ pos:"-570 -240";
style:ext;
bg-color-ext:"194 0 0 64";
in-file.angle:356.285;
out-file.angle:290.63;
}
get-file-list {
- pos:"-1035 -645";
+ pos:"-1005 -615";
style:ext;
bg-color-ext:"24 102 175 64";
bounds:"-30 -30 74 94";
out-url.angle:354.447;
}
get-file-list-1 {
- pos:"-1035 -465";
+ pos:"-1005 -435";
style:ext;
bg-color-ext:"24 102 175 64";
bounds:"-30 -30 75 94";
out-url.angle:354.447;
}
merge-bam {
- pos:"-225 -255";
+ pos:"-195 -225";
style:simple;
bg-color-simple:"78 151 184 255";
in-file.angle:1.7357;
out-file.angle:175.486;
}
rmdup-bam {
- pos:"-345 -45";
+ pos:"-315 -15";
style:simple;
bg-color-simple:"218 98 98 255";
in-file.angle:180;
@@ -420,6 +426,9 @@ workflow "Processing of raw ChIP-Seq paired-end reads"{
Sort-bam-1.out-file->rmdup-bam.in-file {
text-pos:"-41 -24";
}
+ Sort-bam.out-file->fastqc-1-1.in-file {
+ text-pos:"-105 6";
+ }
Sort-bam.out-file->files-conversion-1.in-file {
text-pos:"-47 -24";
}
diff --git a/data/workflow_samples/NGS/raw_ngs/chipseq/chipseq_single.uwl b/data/workflow_samples/NGS/raw_ngs/chipseq/chipseq_single.uwl
index 0f75da6..c9df00e 100644
--- a/data/workflow_samples/NGS/raw_ngs/chipseq/chipseq_single.uwl
+++ b/data/workflow_samples/NGS/raw_ngs/chipseq/chipseq_single.uwl
@@ -27,14 +27,13 @@ workflow "Processing of raw ChIP-Seq single-end reads"{
type:CASAVAFilter;
name:"Filter Reads by CASAVA Header";
custom-dir:filtered_fastq;
- out-mode:1;
}
QualityTrim {
type:QualityTrim;
name:"Trim Reads by Quality";
custom-dir:filtered_fastq;
- out-mode:0;
len-id:1;
+ out-mode:0;
qual-id:20;
}
get-file-list {
@@ -52,8 +51,8 @@ workflow "Processing of raw ChIP-Seq single-end reads"{
files-conversion {
type:files-conversion;
name:"Convert to BAM";
- out-mode:0;
document-format:bam;
+ out-mode:0;
}
merge-bam {
type:merge-bam;
@@ -64,8 +63,8 @@ workflow "Processing of raw ChIP-Seq single-end reads"{
type:filter-bam;
name:"Filter Merged BAM File with SAMtools";
flag:"The read is unmapped";
- out-mode:0;
mapq:1;
+ out-mode:0;
}
rmdup-bam {
type:rmdup-bam;
@@ -76,8 +75,8 @@ workflow "Processing of raw ChIP-Seq single-end reads"{
files-conversion-1 {
type:files-conversion;
name:"Convert to BED Format";
- out-mode:0;
document-format:bed;
+ out-mode:0;
}
Sort-bam {
type:Sort-bam;
@@ -97,27 +96,30 @@ workflow "Processing of raw ChIP-Seq single-end reads"{
fastqc {
type:fastqc;
name:"FastQC Quality Control";
- out-mode:1;
}
fastqc-1 {
type:fastqc;
name:"FastQC Quality Control";
- out-mode:1;
+ }
+ fastqc-1-1 {
+ type:fastqc;
+ name:"FastQC Quality Control";
}
.actor-bindings {
- bwamem-id.out-data->files-conversion.in-file
+ merge-bam.out-file->filter-bam.in-file
+ rmdup-bam.out-file->Sort-bam.in-file
CASAVAFilter.out-file->CutAdaptFastq.in-file
+ CutAdaptFastq.out-file->QualityTrim.in-file
get-file-list.out-url->CASAVAFilter.in-file
get-file-list.out-url->fastqc.in-file
+ Sort-bam-1.out-file->rmdup-bam.in-file
+ files-conversion.out-file->merge-bam.in-file
Sort-bam.out-file->files-conversion-1.in-file
- CutAdaptFastq.out-file->QualityTrim.in-file
+ Sort-bam.out-file->fastqc-1-1.in-file
+ bwamem-id.out-data->files-conversion.in-file
QualityTrim.out-file->bwamem-id.in-data
QualityTrim.out-file->fastqc-1.in-file
- Sort-bam-1.out-file->rmdup-bam.in-file
- merge-bam.out-file->filter-bam.in-file
- rmdup-bam.out-file->Sort-bam.in-file
- files-conversion.out-file->merge-bam.in-file
filter-bam.out-file->Sort-bam-1.in-file
}
@@ -134,6 +136,7 @@ workflow "Processing of raw ChIP-Seq single-end reads"{
CASAVAFilter.url->CutAdaptFastq.in-file.url
get-file-list.url->fastqc.in-file.url
QualityTrim.url->fastqc-1.in-file.url
+ Sort-bam.url->fastqc-1-1.in-file.url
.meta {
parameter-aliases {
@@ -176,14 +179,14 @@ workflow "Processing of raw ChIP-Seq single-end reads"{
}
visual {
CASAVAFilter {
- pos:"-825 -525";
+ pos:"-795 -495";
style:ext;
bg-color-ext:"194 0 0 64";
in-file.angle:192.529;
out-file.angle:356.112;
}
CutAdaptFastq {
- pos:"-555 -510";
+ pos:"-525 -480";
style:ext;
bg-color-ext:"236 177 178 64";
bounds:"-30 -30 69.375 106";
@@ -191,28 +194,28 @@ workflow "Processing of raw ChIP-Seq single-end reads"{
out-file.angle:360;
}
QualityTrim {
- pos:"-375 -525";
+ pos:"-345 -495";
style:ext;
bg-color-ext:"204 68 102 64";
in-file.angle:190.305;
out-file.angle:356.558;
}
Sort-bam {
- pos:"-120 -105";
+ pos:"-90 -75";
style:simple;
bg-color-simple:"84 84 84 255";
in-file.angle:180;
out-file.angle:360;
}
Sort-bam-1 {
- pos:"-585 -105";
+ pos:"-555 -75";
style:simple;
bg-color-simple:"84 84 84 255";
in-file.angle:88.5312;
out-file.angle:2.38594;
}
bwamem-id {
- pos:"-120 -510";
+ pos:"-90 -480";
style:ext;
bg-color-ext:"0 128 0 64";
bounds:"-30 -30 105 92";
@@ -221,54 +224,60 @@ workflow "Processing of raw ChIP-Seq single-end reads"{
out-data.angle:297.474;
}
fastqc {
- pos:"-840 -720";
+ pos:"-810 -690";
style:simple;
bg-color-simple:"0 128 128 255";
in-file.angle:241.928;
}
fastqc-1 {
- pos:"-120 -705";
+ pos:"-90 -675";
style:simple;
bg-color-simple:"0 128 128 255";
in-file.angle:235.561;
}
+ fastqc-1-1 {
+ pos:"30 30";
+ style:simple;
+ bg-color-simple:"0 128 128 255";
+ in-file.angle:133.531;
+ }
files-conversion {
- pos:"-75 -285";
+ pos:"-45 -255";
style:simple;
bg-color-simple:"84 84 84 255";
in-file.angle:88.2643;
out-file.angle:183.576;
}
files-conversion-1 {
- pos:"120 -120";
+ pos:"120 -90";
style:ext;
bg-color-ext:"64 26 96 64";
in-file.angle:214.778;
out-file.angle:360;
}
filter-bam {
- pos:"-615 -330";
+ pos:"-585 -300";
style:ext;
bg-color-ext:"194 0 0 64";
in-file.angle:346.95;
out-file.angle:290.63;
}
get-file-list {
- pos:"-1035 -525";
+ pos:"-1005 -495";
style:ext;
bg-color-ext:"24 102 175 64";
bounds:"-30 -30 74 94";
out-url.angle:354.447;
}
merge-bam {
- pos:"-270 -285";
+ pos:"-240 -255";
style:simple;
bg-color-simple:"78 151 184 255";
in-file.angle:0;
out-file.angle:180;
}
rmdup-bam {
- pos:"-345 -105";
+ pos:"-315 -75";
style:simple;
bg-color-simple:"218 98 98 255";
in-file.angle:181.736;
@@ -289,6 +298,9 @@ workflow "Processing of raw ChIP-Seq single-end reads"{
Sort-bam-1.out-file->rmdup-bam.in-file {
text-pos:"-41 -24";
}
+ Sort-bam.out-file->fastqc-1-1.in-file {
+ text-pos:"-88 17";
+ }
Sort-bam.out-file->files-conversion-1.in-file {
text-pos:"-47 -24";
}
diff --git a/data/workflow_samples/NGS/raw_ngs/dnaseq/dna_paired.uwl b/data/workflow_samples/NGS/raw_ngs/dnaseq/dna_paired.uwl
index b482c42..c1127ee 100644
--- a/data/workflow_samples/NGS/raw_ngs/dnaseq/dna_paired.uwl
+++ b/data/workflow_samples/NGS/raw_ngs/dnaseq/dna_paired.uwl
@@ -26,14 +26,13 @@ workflow "Processing of raw DNA-Seq paired-end reads"{
type:CASAVAFilter;
name:"Filter Reads by CASAVA Header";
custom-dir:filtered_fastq;
- out-mode:1;
}
QualityTrim {
type:QualityTrim;
name:"Trim Reads by Quality";
custom-dir:filtered_fastq;
- out-mode:0;
len-id:1;
+ out-mode:0;
qual-id:20;
}
get-file-list {
@@ -52,8 +51,8 @@ workflow "Processing of raw DNA-Seq paired-end reads"{
files-conversion {
type:files-conversion;
name:"Convert to BAM";
- out-mode:0;
document-format:bam;
+ out-mode:0;
}
merge-bam {
type:merge-bam;
@@ -63,9 +62,9 @@ workflow "Processing of raw DNA-Seq paired-end reads"{
filter-bam {
type:filter-bam;
name:"Filter Merged BAM File with SAMtools";
- out-mode:0;
flag:"The read is unmapped";
mapq:1;
+ out-mode:0;
}
rmdup-bam {
type:rmdup-bam;
@@ -87,7 +86,6 @@ workflow "Processing of raw DNA-Seq paired-end reads"{
type:CASAVAFilter;
name:"Filter Reads by CASAVA Header";
custom-dir:filtered_fastq;
- out-mode:1;
}
get-file-list-1 {
type:get-file-list;
@@ -100,8 +98,8 @@ workflow "Processing of raw DNA-Seq paired-end reads"{
type:QualityTrim;
name:"Trim Reads by Quality";
custom-dir:filtered_fastq;
- out-mode:0;
len-id:1;
+ out-mode:0;
qual-id:20;
}
CutAdaptFastq {
@@ -117,43 +115,44 @@ workflow "Processing of raw DNA-Seq paired-end reads"{
fastqc {
type:fastqc;
name:"FastQC Quality Control";
- out-mode:1;
}
fastqc-1 {
type:fastqc;
name:"FastQC Quality Control";
- out-mode:1;
}
fastqc-1-1 {
type:fastqc;
name:"FastQC Quality Control";
- out-mode:1;
}
fastqc-1-2 {
type:fastqc;
name:"FastQC Quality Control";
- out-mode:1;
+ }
+ fastqc-1-1-1 {
+ type:fastqc;
+ name:"FastQC Quality Control";
}
.actor-bindings {
+ get-file-list-1.out-url->CASAVAFilter-1.in-file
+ get-file-list-1.out-url->fastqc-1.in-file
CutAdaptFastq.out-file->QualityTrim.in-file
- CutAdaptFastq-1.out-file->QualityTrim-1.in-file
- CASAVAFilter-1.out-file->CutAdaptFastq-1.in-file
- get-file-list.out-url->CASAVAFilter.in-file
- get-file-list.out-url->fastqc.in-file
- CASAVAFilter.out-file->CutAdaptFastq.in-file
filter-bam.out-file->Sort-bam-1.in-file
- QualityTrim.out-file->bwamem-id.in-data
- QualityTrim.out-file->fastqc-1-1.in-file
files-conversion.out-file->merge-bam.in-file
+ CASAVAFilter.out-file->CutAdaptFastq.in-file
Sort-bam-1.out-file->rmdup-bam.in-file
- bwamem-id.out-data->files-conversion.in-file
- merge-bam.out-file->filter-bam.in-file
- rmdup-bam.out-file->Sort-bam.in-file
- get-file-list-1.out-url->CASAVAFilter-1.in-file
- get-file-list-1.out-url->fastqc-1.in-file
QualityTrim-1.out-file->bwamem-id.in-data-paired
QualityTrim-1.out-file->fastqc-1-2.in-file
+ Sort-bam.out-file->fastqc-1-1-1.in-file
+ merge-bam.out-file->filter-bam.in-file
+ bwamem-id.out-data->files-conversion.in-file
+ QualityTrim.out-file->bwamem-id.in-data
+ QualityTrim.out-file->fastqc-1-1.in-file
+ CASAVAFilter-1.out-file->CutAdaptFastq-1.in-file
+ get-file-list.out-url->CASAVAFilter.in-file
+ get-file-list.out-url->fastqc.in-file
+ rmdup-bam.out-file->Sort-bam.in-file
+ CutAdaptFastq-1.out-file->QualityTrim-1.in-file
}
get-file-list.url->CASAVAFilter.in-file.url
@@ -174,6 +173,7 @@ workflow "Processing of raw DNA-Seq paired-end reads"{
get-file-list-1.url->fastqc-1.in-file.url
QualityTrim.url->fastqc-1-1.in-file.url
QualityTrim-1.url->fastqc-1-2.in-file.url
+ Sort-bam.url->fastqc-1-1-1.in-file.url
.meta {
parameter-aliases {
@@ -243,7 +243,7 @@ workflow "Processing of raw DNA-Seq paired-end reads"{
}
visual {
CASAVAFilter {
- pos:"-960 -795";
+ pos:"-915 -750";
style:ext;
bg-color-ext:"194 0 0 64";
bounds:"-30 -30 123.875 80";
@@ -251,7 +251,7 @@ workflow "Processing of raw DNA-Seq paired-end reads"{
out-file.angle:360;
}
CASAVAFilter-1 {
- pos:"-960 -630";
+ pos:"-915 -585";
style:ext;
bg-color-ext:"194 0 0 64";
bounds:"-30 -30 122.875 82";
@@ -259,49 +259,49 @@ workflow "Processing of raw DNA-Seq paired-end reads"{
out-file.angle:360;
}
CutAdaptFastq {
- pos:"-690 -795";
+ pos:"-645 -750";
style:ext;
bg-color-ext:"236 177 178 64";
in-file.angle:180;
out-file.angle:360;
}
CutAdaptFastq-1 {
- pos:"-690 -630";
+ pos:"-645 -585";
style:ext;
bg-color-ext:"236 177 178 64";
in-file.angle:180;
out-file.angle:360;
}
QualityTrim {
- pos:"-405 -795";
+ pos:"-360 -750";
style:ext;
bg-color-ext:"204 68 102 64";
in-file.angle:180;
out-file.angle:360;
}
QualityTrim-1 {
- pos:"-405 -630";
+ pos:"-360 -585";
style:ext;
bg-color-ext:"204 68 102 64";
in-file.angle:180;
out-file.angle:360;
}
Sort-bam {
- pos:"0 -225";
+ pos:"-75 -180";
style:simple;
bg-color-simple:"84 84 84 255";
in-file.angle:180;
out-file.angle:360;
}
Sort-bam-1 {
- pos:"-480 -225";
+ pos:"-435 -180";
style:simple;
bg-color-simple:"84 84 84 255";
in-file.angle:90;
out-file.angle:360;
}
bwamem-id {
- pos:"-30 -735";
+ pos:"0 -690";
style:ext;
bg-color-ext:"0 128 0 64";
bounds:"-30 -30 108 135";
@@ -310,35 +310,41 @@ workflow "Processing of raw DNA-Seq paired-end reads"{
out-data.angle:287.162;
}
fastqc {
- pos:"-900 -975";
+ pos:"-855 -930";
style:simple;
in-file.angle:180;
}
fastqc-1 {
- pos:"-915 -450";
+ pos:"-870 -405";
style:simple;
in-file.angle:180;
}
fastqc-1-1 {
- pos:"-135 -945";
+ pos:"-90 -900";
style:simple;
bg-color-simple:"0 128 128 255";
in-file.angle:232.815;
}
+ fastqc-1-1-1 {
+ pos:"105 -180";
+ style:simple;
+ bg-color-simple:"0 128 128 255";
+ in-file.angle:178.636;
+ }
fastqc-1-2 {
- pos:"-135 -510";
+ pos:"-90 -465";
style:simple;
in-file.angle:138.18;
}
files-conversion {
- pos:"0 -405";
+ pos:"0 -360";
style:simple;
bg-color-simple:"84 84 84 255";
in-file.angle:83.2902;
out-file.angle:176.186;
}
filter-bam {
- pos:"-525 -435";
+ pos:"-480 -390";
style:ext;
bg-color-ext:"194 0 0 64";
bounds:"-30 -30 121.875 79";
@@ -346,28 +352,28 @@ workflow "Processing of raw DNA-Seq paired-end reads"{
out-file.angle:298.996;
}
get-file-list {
- pos:"-1200 -795";
+ pos:"-1155 -750";
style:ext;
bg-color-ext:"24 102 175 64";
bounds:"-30 -30 63 87";
out-url.angle:360;
}
get-file-list-1 {
- pos:"-1200 -630";
+ pos:"-1155 -585";
style:ext;
bg-color-ext:"24 102 175 64";
bounds:"-30 -30 61 92";
out-url.angle:360;
}
merge-bam {
- pos:"-210 -405";
+ pos:"-165 -360";
style:simple;
bg-color-simple:"78 151 184 255";
in-file.angle:1.7357;
out-file.angle:180;
}
rmdup-bam {
- pos:"-225 -225";
+ pos:"-255 -180";
style:simple;
bg-color-simple:"218 98 98 255";
in-file.angle:180;
@@ -400,6 +406,9 @@ workflow "Processing of raw DNA-Seq paired-end reads"{
Sort-bam-1.out-file->rmdup-bam.in-file {
text-pos:"-45 -26";
}
+ Sort-bam.out-file->fastqc-1-1-1.in-file {
+ text-pos:"-46 -46";
+ }
bwamem-id.out-data->files-conversion.in-file {
text-pos:"-38 -24";
}
diff --git a/data/workflow_samples/NGS/raw_ngs/dnaseq/dna_single.uwl b/data/workflow_samples/NGS/raw_ngs/dnaseq/dna_single.uwl
index eaf77af..b1d894c 100644
--- a/data/workflow_samples/NGS/raw_ngs/dnaseq/dna_single.uwl
+++ b/data/workflow_samples/NGS/raw_ngs/dnaseq/dna_single.uwl
@@ -26,14 +26,13 @@ workflow "Processing of raw DNA-Seq single-end reads"{
type:CASAVAFilter;
name:"Filter Reads by CASAVA Header";
custom-dir:filtered_fastq;
- out-mode:1;
}
QualityTrim {
type:QualityTrim;
name:"Trim Reads by Quality";
custom-dir:filtered_fastq;
- out-mode:0;
len-id:1;
+ out-mode:0;
qual-id:20;
}
get-file-list {
@@ -51,8 +50,8 @@ workflow "Processing of raw DNA-Seq single-end reads"{
files-conversion {
type:files-conversion;
name:"Convert to BAM";
- out-mode:0;
document-format:bam;
+ out-mode:0;
}
merge-bam {
type:merge-bam;
@@ -62,9 +61,9 @@ workflow "Processing of raw DNA-Seq single-end reads"{
filter-bam {
type:filter-bam;
name:"Filter Merged BAM File with SAMtools";
- out-mode:0;
flag:"The read is unmapped";
mapq:1;
+ out-mode:0;
}
rmdup-bam {
type:rmdup-bam;
@@ -90,27 +89,30 @@ workflow "Processing of raw DNA-Seq single-end reads"{
fastqc {
type:fastqc;
name:"FastQC Quality Control";
- out-mode:1;
}
fastqc-1 {
type:fastqc;
name:"FastQC Quality Control";
- out-mode:1;
+ }
+ fastqc-1-1 {
+ type:fastqc;
+ name:"FastQC Quality Control";
}
.actor-bindings {
+ merge-bam.out-file->filter-bam.in-file
+ Sort-bam-1.out-file->rmdup-bam.in-file
filter-bam.out-file->Sort-bam-1.in-file
- CutAdaptFastq.out-file->QualityTrim.in-file
+ rmdup-bam.out-file->Sort-bam.in-file
CASAVAFilter.out-file->CutAdaptFastq.in-file
get-file-list.out-url->CASAVAFilter.in-file
get-file-list.out-url->fastqc.in-file
- Sort-bam-1.out-file->rmdup-bam.in-file
- rmdup-bam.out-file->Sort-bam.in-file
- bwamem-id.out-data->files-conversion.in-file
- merge-bam.out-file->filter-bam.in-file
QualityTrim.out-file->bwamem-id.in-data
QualityTrim.out-file->fastqc-1.in-file
files-conversion.out-file->merge-bam.in-file
+ Sort-bam.out-file->fastqc-1-1.in-file
+ bwamem-id.out-data->files-conversion.in-file
+ CutAdaptFastq.out-file->QualityTrim.in-file
}
get-file-list.url->CASAVAFilter.in-file.url
@@ -125,6 +127,7 @@ workflow "Processing of raw DNA-Seq single-end reads"{
CASAVAFilter.url->CutAdaptFastq.in-file.url
get-file-list.url->fastqc.in-file.url
QualityTrim.url->fastqc-1.in-file.url
+ Sort-bam.url->fastqc-1-1.in-file.url
.meta {
parameter-aliases {
@@ -167,7 +170,7 @@ workflow "Processing of raw DNA-Seq single-end reads"{
}
visual {
CASAVAFilter {
- pos:"-840 -795";
+ pos:"-795 -750";
style:ext;
bg-color-ext:"194 0 0 64";
bounds:"-30 -30 110.875 92";
@@ -175,35 +178,35 @@ workflow "Processing of raw DNA-Seq single-end reads"{
out-file.angle:360;
}
CutAdaptFastq {
- pos:"-600 -795";
+ pos:"-555 -750";
style:ext;
bg-color-ext:"236 177 178 64";
in-file.angle:180;
out-file.angle:360;
}
QualityTrim {
- pos:"-315 -795";
+ pos:"-270 -750";
style:ext;
bg-color-ext:"204 68 102 64";
in-file.angle:180;
out-file.angle:360;
}
Sort-bam {
- pos:"-30 -360";
+ pos:"-105 -315";
style:simple;
bg-color-simple:"84 84 84 255";
in-file.angle:180;
out-file.angle:360;
}
Sort-bam-1 {
- pos:"-540 -360";
+ pos:"-495 -315";
style:simple;
bg-color-simple:"84 84 84 255";
in-file.angle:93.4682;
out-file.angle:360;
}
bwamem-id {
- pos:"0 -795";
+ pos:"0 -750";
style:ext;
bg-color-ext:"0 128 0 64";
in-data.angle:180;
@@ -211,26 +214,32 @@ workflow "Processing of raw DNA-Seq single-end reads"{
out-data.angle:297.532;
}
fastqc {
- pos:"-900 -960";
+ pos:"-855 -915";
style:simple;
bg-color-simple:"0 128 128 255";
in-file.angle:213.69;
}
fastqc-1 {
- pos:"-105 -945";
+ pos:"-60 -900";
style:simple;
bg-color-simple:"0 128 128 255";
in-file.angle:231.843;
}
+ fastqc-1-1 {
+ pos:"75 -315";
+ style:simple;
+ bg-color-simple:"0 128 128 255";
+ in-file.angle:178.264;
+ }
files-conversion {
- pos:"45 -540";
+ pos:"45 -495";
style:simple;
bg-color-simple:"84 84 84 255";
in-file.angle:90;
out-file.angle:181.591;
}
filter-bam {
- pos:"-570 -555";
+ pos:"-525 -510";
style:ext;
bg-color-ext:"194 0 0 64";
bounds:"-30 -30 142.875 61";
@@ -238,21 +247,21 @@ workflow "Processing of raw DNA-Seq single-end reads"{
out-file.angle:297.235;
}
get-file-list {
- pos:"-1080 -795";
+ pos:"-1035 -750";
style:ext;
bg-color-ext:"24 102 175 64";
bounds:"-30 -30 102.25 86";
out-url.angle:360;
}
merge-bam {
- pos:"-180 -540";
+ pos:"-135 -495";
style:simple;
bg-color-simple:"78 151 184 255";
in-file.angle:1.59114;
out-file.angle:176.634;
}
rmdup-bam {
- pos:"-255 -360";
+ pos:"-300 -315";
style:simple;
bg-color-simple:"218 98 98 255";
in-file.angle:180;
@@ -273,6 +282,9 @@ workflow "Processing of raw DNA-Seq single-end reads"{
Sort-bam-1.out-file->rmdup-bam.in-file {
text-pos:"-53 -25";
}
+ Sort-bam.out-file->fastqc-1-1.in-file {
+ text-pos:"-44 -25";
+ }
bwamem-id.out-data->files-conversion.in-file {
text-pos:"-45 -37";
}
diff --git a/data/workflow_samples/NGS/raw_ngs/rnaseq/rnaseq_paired.uwl b/data/workflow_samples/NGS/raw_ngs/rnaseq/rnaseq_paired.uwl
index 5669080..73f3a97 100644
--- a/data/workflow_samples/NGS/raw_ngs/rnaseq/rnaseq_paired.uwl
+++ b/data/workflow_samples/NGS/raw_ngs/rnaseq/rnaseq_paired.uwl
@@ -24,8 +24,8 @@ workflow "Processing of raw RNA-Seq paired-end reads with mapping"{
type:QualityTrim;
name:"Trim Reads by Quality";
custom-dir:filtered_fastq;
- out-mode:0;
len-id:10;
+ out-mode:0;
qual-id:20;
}
get-file-list {
@@ -66,8 +66,8 @@ workflow "Processing of raw RNA-Seq paired-end reads with mapping"{
type:QualityTrim;
name:"Trim Reads by Quality";
custom-dir:filtered_fastq;
- out-mode:0;
len-id:10;
+ out-mode:0;
qual-id:20;
}
MergeFastq-1 {
@@ -94,38 +94,39 @@ workflow "Processing of raw RNA-Seq paired-end reads with mapping"{
fastqc {
type:fastqc;
name:"FastQC Quality Control";
- out-mode:1;
}
fastqc-1 {
type:fastqc;
name:"FastQC Quality Control";
- out-mode:1;
}
fastqc-2 {
type:fastqc;
name:"FastQC Quality Control";
- out-mode:1;
}
fastqc-3 {
type:fastqc;
name:"FastQC Quality Control";
- out-mode:1;
+ }
+ fastqc-2-1 {
+ type:fastqc;
+ name:"FastQC Quality Control";
}
.actor-bindings {
CutAdaptFastq-1.out-file->QualityTrim.in-file
+ tophat.out-assembly->fastqc-2-1.in-file
CASAVAFilter-1.out-file->CutAdaptFastq.in-file
- CutAdaptFastq.out-file->QualityTrim-1.in-file
- MergeFastq.out-file->multiplexer.input-data-1
- MergeFastq.out-file->fastqc-3.in-file
MergeFastq-1.out-file->multiplexer.input-data-2
MergeFastq-1.out-file->fastqc-2.in-file
+ MergeFastq.out-file->multiplexer.input-data-1
+ MergeFastq.out-file->fastqc-3.in-file
+ QualityTrim.out-file->MergeFastq.in-file
+ CutAdaptFastq.out-file->QualityTrim-1.in-file
QualityTrim-1.out-file->MergeFastq-1.in-file
- get-file-list-1.out-url->CASAVAFilter-1.in-file
- get-file-list-1.out-url->fastqc.in-file
CASAVAFilter.out-file->CutAdaptFastq-1.in-file
multiplexer.output-data->tophat.in-sequence
- QualityTrim.out-file->MergeFastq.in-file
+ get-file-list-1.out-url->CASAVAFilter-1.in-file
+ get-file-list-1.out-url->fastqc.in-file
get-file-list.out-url->CASAVAFilter.in-file
get-file-list.out-url->fastqc-1.in-file
}
@@ -144,6 +145,7 @@ workflow "Processing of raw RNA-Seq paired-end reads with mapping"{
get-file-list.url->fastqc-1.in-file.url
MergeFastq-1.url->fastqc-2.in-file.url
MergeFastq.url->fastqc-3.in-file.url
+ tophat.hits-url->fastqc-2-1.in-file.url
.meta {
parameter-aliases {
@@ -153,12 +155,6 @@ workflow "Processing of raw RNA-Seq paired-end reads with mapping"{
CASAVAFilter.out-mode {
alias:casava1_out_dir_type;
}
- CASAVAFilter-1.custom-dir {
- alias:casava2_out_dir;
- }
- CASAVAFilter-1.out-mode {
- alias:casava2_out_dir_type;
- }
QualityTrim.custom-dir {
alias:trim_reads2_out_dir;
}
@@ -180,6 +176,12 @@ workflow "Processing of raw RNA-Seq paired-end reads with mapping"{
get-file-list-1.url-in {
alias:in_mate;
}
+ CASAVAFilter-1.custom-dir {
+ alias:casava2_out_dir;
+ }
+ CASAVAFilter-1.out-mode {
+ alias:casava2_out_dir_type;
+ }
QualityTrim-1.custom-dir {
alias:trim_reads1_out_dir;
}
@@ -213,7 +215,7 @@ workflow "Processing of raw RNA-Seq paired-end reads with mapping"{
}
visual {
CASAVAFilter {
- pos:"-1275 -600";
+ pos:"-1245 -570";
style:ext;
bg-color-ext:"194 0 0 64";
bounds:"-30 -30 123.875 82";
@@ -221,7 +223,7 @@ workflow "Processing of raw RNA-Seq paired-end reads with mapping"{
out-file.angle:268.768;
}
CASAVAFilter-1 {
- pos:"-765 -975";
+ pos:"-735 -945";
style:ext;
bg-color-ext:"194 0 0 64";
bounds:"-30 -30 119.875 81";
@@ -229,42 +231,42 @@ workflow "Processing of raw RNA-Seq paired-end reads with mapping"{
out-file.angle:359.076;
}
CutAdaptFastq {
- pos:"-540 -870";
+ pos:"-510 -840";
style:ext;
bg-color-ext:"236 177 178 64";
in-file.angle:49.8991;
out-file.angle:344.434;
}
CutAdaptFastq-1 {
- pos:"-1155 -375";
+ pos:"-1125 -345";
style:ext;
bg-color-ext:"236 177 178 64";
in-file.angle:34.1145;
out-file.angle:351.588;
}
MergeFastq {
- pos:"-480 -255";
+ pos:"-450 -225";
style:simple;
bg-color-simple:"78 151 184 255";
in-file.angle:171.87;
out-file.angle:1.4688;
}
MergeFastq-1 {
- pos:"-210 -480";
+ pos:"-180 -450";
style:simple;
bg-color-simple:"78 151 184 255";
in-file.angle:91.9092;
out-file.angle:271.432;
}
QualityTrim {
- pos:"-870 -300";
+ pos:"-840 -270";
style:ext;
bg-color-ext:"204 68 102 64";
in-file.angle:211.827;
out-file.angle:347.391;
}
QualityTrim-1 {
- pos:"-255 -750";
+ pos:"-225 -720";
style:ext;
bg-color-ext:"204 68 102 64";
bounds:"-30 -30 107.125 96";
@@ -272,45 +274,51 @@ workflow "Processing of raw RNA-Seq paired-end reads with mapping"{
out-file.angle:292.859;
}
fastqc {
- pos:"-915 -825";
+ pos:"-885 -795";
style:simple;
bg-color-simple:"0 128 128 255";
in-file.angle:90;
}
fastqc-1 {
- pos:"-1080 -750";
+ pos:"-1050 -720";
style:simple;
bg-color-simple:"0 128 128 255";
in-file.angle:180;
}
fastqc-2 {
- pos:"-15 -435";
+ pos:"0 -405";
+ style:simple;
+ bg-color-simple:"0 128 128 255";
+ in-file.angle:180;
+ }
+ fastqc-2-1 {
+ pos:"-330 -555";
style:simple;
bg-color-simple:"0 128 128 255";
in-file.angle:180;
}
fastqc-3 {
- pos:"-450 -120";
+ pos:"-420 -90";
style:simple;
bg-color-simple:"0 128 128 255";
in-file.angle:88.0251;
}
get-file-list {
- pos:"-1335 -840";
+ pos:"-1305 -810";
style:ext;
bg-color-ext:"24 102 175 64";
bounds:"-30 -30 87.25 90";
out-url.angle:296.095;
}
get-file-list-1 {
- pos:"-1005 -1020";
+ pos:"-975 -990";
style:ext;
bg-color-ext:"24 102 175 64";
bounds:"-30 -30 86 92";
out-url.angle:328.062;
}
multiplexer {
- pos:"-210 -255";
+ pos:"-180 -225";
style:simple;
bg-color-simple:"84 84 84 255";
input-data-1.angle:186.911;
@@ -318,7 +326,7 @@ workflow "Processing of raw RNA-Seq paired-end reads with mapping"{
output-data.angle:145.62;
}
tophat {
- pos:"-615 -555";
+ pos:"-585 -525";
style:ext;
bg-color-ext:"0 128 0 64";
bounds:"-30 -30 140.375 90";
@@ -370,6 +378,9 @@ workflow "Processing of raw RNA-Seq paired-end reads with mapping"{
multiplexer.output-data->tophat.in-sequence {
text-pos:"-111 -64";
}
+ tophat.out-assembly->fastqc-2-1.in-file {
+ text-pos:"-44.2266 -24";
+ }
}
wizard {
name:"Raw RNA-Seq Data Processing Wizard";
diff --git a/data/workflow_samples/NGS/raw_ngs/rnaseq/rnaseq_single.uwl b/data/workflow_samples/NGS/raw_ngs/rnaseq/rnaseq_single.uwl
index c2290fc..3d0b421 100644
--- a/data/workflow_samples/NGS/raw_ngs/rnaseq/rnaseq_single.uwl
+++ b/data/workflow_samples/NGS/raw_ngs/rnaseq/rnaseq_single.uwl
@@ -19,14 +19,13 @@ workflow "Processing of raw RNA-Seq single-end reads with mapping"{
type:CASAVAFilter;
name:"Filter Reads by CASAVA Header";
custom-dir:filtered_fastq;
- out-mode:1;
}
QualityTrim {
type:QualityTrim;
name:"Trim Reads by Quality";
custom-dir:filtered_fastq;
- out-mode:0;
len-id:10;
+ out-mode:0;
qual-id:20;
}
get-file-list {
@@ -59,22 +58,25 @@ workflow "Processing of raw RNA-Seq single-end reads with mapping"{
fastqc {
type:fastqc;
name:"FastQC Quality Control";
- out-mode:1;
}
fastqc-1 {
type:fastqc;
name:"FastQC Quality Control";
- out-mode:1;
+ }
+ fastqc-1-1 {
+ type:fastqc;
+ name:"FastQC Quality Control";
}
.actor-bindings {
- CutAdaptFastq.out-file->QualityTrim.in-file
- get-file-list.out-url->CASAVAFilter.in-file
- get-file-list.out-url->fastqc.in-file
CASAVAFilter.out-file->CutAdaptFastq.in-file
+ tophat.out-assembly->fastqc-1-1.in-file
MergeFastq.out-file->tophat.in-sequence
MergeFastq.out-file->fastqc-1.in-file
QualityTrim.out-file->MergeFastq.in-file
+ get-file-list.out-url->CASAVAFilter.in-file
+ get-file-list.out-url->fastqc.in-file
+ CutAdaptFastq.out-file->QualityTrim.in-file
}
get-file-list.url->CASAVAFilter.in-file.url
@@ -84,6 +86,7 @@ workflow "Processing of raw RNA-Seq single-end reads with mapping"{
CASAVAFilter.url->CutAdaptFastq.in-file.url
get-file-list.url->fastqc.in-file.url
MergeFastq.url->fastqc-1.in-file.url
+ tophat.hits-url->fastqc-1-1.in-file.url
.meta {
parameter-aliases {
@@ -123,7 +126,7 @@ workflow "Processing of raw RNA-Seq single-end reads with mapping"{
}
visual {
CASAVAFilter {
- pos:"-795 -510";
+ pos:"-765 -480";
style:ext;
bg-color-ext:"194 0 0 64";
bounds:"-30 -30 181.875 92";
@@ -131,21 +134,21 @@ workflow "Processing of raw RNA-Seq single-end reads with mapping"{
out-file.angle:360;
}
CutAdaptFastq {
- pos:"-435 -510";
+ pos:"-405 -480";
style:ext;
bg-color-ext:"236 177 178 64";
in-file.angle:180;
out-file.angle:360;
}
MergeFastq {
- pos:"-90 -270";
+ pos:"-60 -240";
style:simple;
bg-color-simple:"78 151 184 255";
in-file.angle:90;
out-file.angle:270;
}
QualityTrim {
- pos:"-135 -510";
+ pos:"-105 -480";
style:ext;
bg-color-ext:"204 68 102 64";
bounds:"-30 -30 106 96";
@@ -153,26 +156,32 @@ workflow "Processing of raw RNA-Seq single-end reads with mapping"{
out-file.angle:290.925;
}
fastqc {
- pos:"-960 -285";
+ pos:"-930 -255";
style:simple;
bg-color-simple:"0 128 128 255";
in-file.angle:90;
}
fastqc-1 {
- pos:"-345 -240";
+ pos:"-315 -210";
+ style:simple;
+ bg-color-simple:"0 128 128 255";
+ in-file.angle:358.091;
+ }
+ fastqc-1-1 {
+ pos:"-315 -60";
style:simple;
bg-color-simple:"0 128 128 255";
in-file.angle:358.091;
}
get-file-list {
- pos:"-1065 -510";
+ pos:"-1035 -480";
style:ext;
bg-color-ext:"24 102 175 64";
bounds:"-30 -30 102.25 92";
out-url.angle:360;
}
tophat {
- pos:"-165 -105";
+ pos:"-135 -75";
style:ext;
bg-color-ext:"0 128 0 64";
in-sequence.angle:21.9745;
@@ -199,6 +208,9 @@ workflow "Processing of raw RNA-Seq single-end reads with mapping"{
get-file-list.out-url->fastqc.in-file {
text-pos:"-49.7578 25";
}
+ tophat.out-assembly->fastqc-1-1.in-file {
+ text-pos:"-44.2266 -24";
+ }
}
wizard {
name:"Raw RNA-Seq Data Processing Wizard";
diff --git a/data/workflow_samples/NGS/variation_annotation.uwl b/data/workflow_samples/NGS/variation_annotation.uwl
index 8009276..63df098 100644
--- a/data/workflow_samples/NGS/variation_annotation.uwl
+++ b/data/workflow_samples/NGS/variation_annotation.uwl
@@ -1,11 +1,13 @@
#@UGENE_WORKFLOW
#The workflow uses the SnpEff tool to annotate and predict the effects of genetic variations (such as amino acid changes).
#
-#To use the workflow you're required to input at least one file with variations (SNPs, insertions, deletions, and MNPs). The input file is usually obtained as a result of a sequencing experiment, and it is usually in Variant Call Format (VCF).
+#To use the workflow you're required to input at least one file with variations (SNPs, insertions, deletions, and MNPs). The input file is usually obtained as a result of a sequencing experiment, and it is usually in the Variant Call Format (VCF).
#
#To analyze the input variations SnpEff requires the variations file to have a certain chromosome notation. For example, "chr1" specified for a variation works fine, but "NC_000001" will result in an error. Therefore, the workflow contains an automated way for changing the chromosomes notation, which can be tweaked, if required.
#
#SnpEff outputs the annotated variations file and a detailed HTML report.
+#
+#Finally, information, added by SnpEff to variations, is converted to standard annotations (in GenBank format by default).
@@ -21,7 +23,6 @@ workflow "Variation annotation with SnpEff"{
seff {
type:seff;
name:"Annotate and Predict Effects with SnpEff";
- out-mode:1;
}
rename-chromosome-in-variation {
type:rename-chromosome-in-variation;
@@ -29,14 +30,20 @@ workflow "Variation annotation with SnpEff"{
prefix-replace-with:chr;
prefixes-to-replace:"NC_00000;NC_0000";
}
+ convert-snpeff-variations-to-annotations {
+ type:convert-snpeff-variations-to-annotations;
+ name:"Convert SnpEff Variations to Annotations";
+ }
.actor-bindings {
- get-file-list.out-url->rename-chromosome-in-variation.in-file
rename-chromosome-in-variation.out-file->seff.in-file
+ seff.out-file->convert-snpeff-variations-to-annotations.in-variations-url
+ get-file-list.out-url->rename-chromosome-in-variation.in-file
}
rename-chromosome-in-variation.url->seff.in-file.url
get-file-list.url->rename-chromosome-in-variation.in-file.url
+ seff.url->convert-snpeff-variations-to-annotations.in-variations-url.url
.meta {
parameter-aliases {
@@ -69,15 +76,22 @@ workflow "Variation annotation with SnpEff"{
}
}
visual {
+ convert-snpeff-variations-to-annotations {
+ pos:"-85 -512";
+ style:ext;
+ bg-color-ext:"120 47 165 64";
+ bounds:"-30 -30 151 125";
+ in-variations-url.angle:228.27;
+ }
get-file-list {
- pos:"-873 -501";
+ pos:"-981 -502";
style:ext;
bg-color-ext:"42 108 251 64";
bounds:"-30 -30 108.25 97";
out-url.angle:349.38;
}
rename-chromosome-in-variation {
- pos:"-608 -513";
+ pos:"-716 -514";
style:ext;
bg-color-ext:"128 104 117 64";
bounds:"-30 -30 160 136";
@@ -85,7 +99,7 @@ workflow "Variation annotation with SnpEff"{
out-file.angle:347.905;
}
seff {
- pos:"-268 -514";
+ pos:"-376 -515";
style:ext;
bg-color-ext:"0 128 64 64";
bounds:"-30 -30 141 130";
@@ -98,10 +112,12 @@ workflow "Variation annotation with SnpEff"{
rename-chromosome-in-variation.out-file->seff.in-file {
text-pos:"-45 -43";
}
+ seff.out-file->convert-snpeff-variations-to-annotations.in-variations-url {
+ text-pos:"-45 -43";
+ }
}
wizard {
name:"Variation Annotation with SnpEff Wizard";
- help-page-id:16122730;
page {
id:1;
next:2;
@@ -163,6 +179,14 @@ workflow "Variation annotation with SnpEff"{
title:Output;
parameters-area {
group {
+ title:"Convert SnpEff Variations to Annotations";
+ label-size:100;
+ convert-snpeff-variations-to-annotations.url-out {
+ }
+ convert-snpeff-variations-to-annotations.document-format {
+ }
+ }
+ group {
title:"Output folder";
label-size:100;
seff.out-mode {
diff --git a/data/workflow_samples/Scenarios/length_filter.uwl b/data/workflow_samples/Scenarios/length_filter.uwl
new file mode 100644
index 0000000..19dd74b
--- /dev/null
+++ b/data/workflow_samples/Scenarios/length_filter.uwl
@@ -0,0 +1,131 @@
+#@UGENE_WORKFLOW
+#To use this workflow input a set of sequences and set a minimum sequence length.
+#All sequences with length less than the specified value will be filtered out. The result will be written into a FASTA file by default.
+
+include "LengthMarker.usa" as "Script-LengthMarker"
+
+workflow "Filter out short sequences"{
+
+ read-sequence {
+ type:read-sequence;
+ name:"Read Sequence";
+ url-in {
+ dataset:"Dataset 1";
+ }
+ }
+ filter-by-values {
+ type:filter-by-values;
+ name:Filter;
+ text:long;
+ }
+ Script-LengthMarker {
+ type:Script-LengthMarker;
+ name:"Length Marker";
+ script {
+var seqObject = new Sequence(in_sequence);
+if (seqObject.length() < SequenceLength) {
+ out_text = 'short';
+} else {
+ out_text = 'long';
+};
+ };
+ SequenceLength:10000;
+ }
+ write-sequence {
+ type:write-sequence;
+ name:"Write Long Sequences";
+ }
+
+ .actor-bindings {
+ filter-by-values.filtered-data->write-sequence.in-sequence
+ Script-LengthMarker.out->filter-by-values.in-data
+ read-sequence.out-sequence->Script-LengthMarker.in
+ }
+
+ Script-LengthMarker.text->filter-by-values.in-data.text
+ read-sequence.sequence->Script-LengthMarker.in.sequence
+ read-sequence.sequence->write-sequence.in-sequence.sequence {
+ path-through:filter-by-values;
+ }
+
+ .meta {
+ visual {
+ Script-LengthMarker {
+ pos:"-615 -465";
+ style:simple;
+ bg-color-simple:"0 128 128 64";
+ in.angle:180;
+ out.angle:360;
+ }
+ filter-by-values {
+ pos:"-450 -465";
+ style:ext;
+ bg-color-ext:"0 129 94 64";
+ filtered-data.angle:360;
+ in-data.angle:180;
+ }
+ read-sequence {
+ pos:"-810 -465";
+ style:ext;
+ bg-color-ext:"255 170 127 64";
+ out-sequence.angle:360;
+ }
+ write-sequence {
+ pos:"-195 -465";
+ style:ext;
+ bg-color-ext:"170 170 0 64";
+ in-sequence.angle:180;
+ }
+ Script-LengthMarker.out->filter-by-values.in-data {
+ text-pos:"-40.5 -37";
+ }
+ filter-by-values.filtered-data->write-sequence.in-sequence {
+ text-pos:"-39 -36";
+ }
+ read-sequence.out-sequence->Script-LengthMarker.in {
+ text-pos:"-30.5 -39";
+ }
+ }
+ wizard {
+ name:"Filter short sequences";
+ page {
+ id:1;
+ next:2;
+ title:"Input sequence(s)";
+ parameters-area {
+ read-sequence.url-in {
+ type:datasets;
+ }
+ }
+ }
+ page {
+ id:2;
+ next:3;
+ title:Filtering;
+ parameters-area {
+ group {
+ title:"Sequence Length Threshold";
+ label-size:150;
+ Script-LengthMarker.SequenceLength {
+ label:"Minimum length";
+ }
+ }
+ }
+ }
+ page {
+ id:3;
+ title:"Output data";
+ parameters-area {
+ group {
+ title:"Output data";
+ label-size:120;
+ write-sequence.url-out {
+ label:"Result file";
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
diff --git a/data/workflow_samples/users/LengthMarker.usa b/data/workflow_samples/users/LengthMarker.usa
new file mode 100644
index 0000000..969842a
--- /dev/null
+++ b/data/workflow_samples/users/LengthMarker.usa
@@ -0,0 +1,14 @@
+<!DOCTYPE GB2WORKFLOW>
+<Actor>
+ <Input-port>
+ <In-Slots Slot="seq"/>
+ </Input-port>
+ <Output-port>
+ <Out-Slots Slot="string"/>
+ </Output-port>
+ <Attributes>
+ <Attribute Name="SequenceLength" Type="number"/>
+ </Attributes>
+ <Element-name Name="LengthMarker"/>
+ <Element-description Description="Identifies an input seaquence as long or short"/>
+</Actor>
diff --git a/src/corelibs/U2Algorithm/src/misc/DnaAssemblyMultiTask.cpp b/src/corelibs/U2Algorithm/src/misc/DnaAssemblyMultiTask.cpp
index c129059..f323af6 100644
--- a/src/corelibs/U2Algorithm/src/misc/DnaAssemblyMultiTask.cpp
+++ b/src/corelibs/U2Algorithm/src/misc/DnaAssemblyMultiTask.cpp
@@ -41,12 +41,9 @@
namespace U2 {
-DnaAssemblyMultiTask::DnaAssemblyMultiTask( const DnaAssemblyToRefTaskSettings& s, bool view, bool _justBuildIndex )
-: Task("DnaAssemblyMultiTask", TaskFlags_NR_FOSE_COSC | TaskFlag_ReportingIsSupported | TaskFlag_ReportingIsEnabled), settings(s),
-assemblyToRefTask(NULL), shortReadSets(s.shortReadSets), openView(view), justBuildIndex(_justBuildIndex)
-{
-
-}
+DnaAssemblyMultiTask::DnaAssemblyMultiTask(const DnaAssemblyToRefTaskSettings& s, bool view, bool _justBuildIndex)
+: ExternalToolSupportTask("DnaAssemblyMultiTask", TaskFlags_NR_FOSE_COSC | TaskFlag_ReportingIsSupported | TaskFlag_ReportingIsEnabled), settings(s),
+assemblyToRefTask(NULL), shortReadSets(s.shortReadSets), openView(view), justBuildIndex(_justBuildIndex) {}
void DnaAssemblyMultiTask::prepare() {
// perform assembly
@@ -58,6 +55,7 @@ void DnaAssemblyMultiTask::prepare() {
return;
}
assemblyToRefTask = env->getTaskFactory()->createTaskInstance(settings, justBuildIndex);
+ assemblyToRefTask->addListeners(getListeners());
addSubTask(assemblyToRefTask);
}
diff --git a/src/corelibs/U2Algorithm/src/misc/DnaAssemblyMultiTask.h b/src/corelibs/U2Algorithm/src/misc/DnaAssemblyMultiTask.h
index f4b7b34..94e5091 100644
--- a/src/corelibs/U2Algorithm/src/misc/DnaAssemblyMultiTask.h
+++ b/src/corelibs/U2Algorithm/src/misc/DnaAssemblyMultiTask.h
@@ -23,14 +23,15 @@
#define _U2_DNA_ASSEMBLY_MULTI_TASK_
#include <U2Algorithm/DnaAssemblyTask.h>
-#include <U2Core/Task.h>
+
+#include <U2Core/ExternalToolRunTask.h>
#include <U2Core/GUrl.h>
namespace U2 {
class Document;
-class U2ALGORITHM_EXPORT DnaAssemblyMultiTask : public Task {
+class U2ALGORITHM_EXPORT DnaAssemblyMultiTask : public ExternalToolSupportTask {
Q_OBJECT
public:
DnaAssemblyMultiTask(const DnaAssemblyToRefTaskSettings& settings, bool viewResult = false, bool justBuildIndex = false);
diff --git a/src/corelibs/U2Algorithm/src/registry/DnaAssemblyTask.cpp b/src/corelibs/U2Algorithm/src/registry/DnaAssemblyTask.cpp
index 12541ed..5630806 100644
--- a/src/corelibs/U2Algorithm/src/registry/DnaAssemblyTask.cpp
+++ b/src/corelibs/U2Algorithm/src/registry/DnaAssemblyTask.cpp
@@ -27,7 +27,7 @@
namespace U2 {
DnaAssemblyToReferenceTask::DnaAssemblyToReferenceTask(const DnaAssemblyToRefTaskSettings &settings, TaskFlags flags, bool justBuildIndex)
-: Task(tr("Align short reads"), flags), settings(settings), justBuildIndex(justBuildIndex) {
+: ExternalToolSupportTask(tr("Align short reads"), flags), settings(settings), justBuildIndex(justBuildIndex) {
}
void DnaAssemblyToReferenceTask::setUpIndexBuilding(const QStringList &indexSuffixes) {
diff --git a/src/corelibs/U2Algorithm/src/registry/DnaAssemblyTask.h b/src/corelibs/U2Algorithm/src/registry/DnaAssemblyTask.h
index af83cf2..9346183 100644
--- a/src/corelibs/U2Algorithm/src/registry/DnaAssemblyTask.h
+++ b/src/corelibs/U2Algorithm/src/registry/DnaAssemblyTask.h
@@ -22,10 +22,11 @@
#ifndef _U2_DNA_ASSEMBLY_TASK_H_
#define _U2_DNA_ASSEMBLY_TASK_H_
-#include <U2Core/Task.h>
+#include <U2Core/ExternalToolRunTask.h>
+#include <U2Core/DNASequence.h>
#include <U2Core/GUrl.h>
#include <U2Core/MAlignment.h>
-#include <U2Core/DNASequence.h>
+#include <U2Core/Task.h>
namespace U2 {
@@ -77,7 +78,7 @@ private:
QMap<QString, QVariant> customSettings;
};
-class U2ALGORITHM_EXPORT DnaAssemblyToReferenceTask : public Task {
+class U2ALGORITHM_EXPORT DnaAssemblyToReferenceTask : public ExternalToolSupportTask {
Q_OBJECT
public:
DnaAssemblyToReferenceTask(const DnaAssemblyToRefTaskSettings &settings, TaskFlags flags = TaskFlags_FOSCOE, bool justBuildIndex = false);
diff --git a/src/corelibs/U2Core/U2Core.pro b/src/corelibs/U2Core/U2Core.pro
index ac7cb98..c8226f6 100644
--- a/src/corelibs/U2Core/U2Core.pro
+++ b/src/corelibs/U2Core/U2Core.pro
@@ -54,6 +54,7 @@ HEADERS += src/cmdline/CMDLineCoreOptions.h \
src/datatype/UdrSchemaRegistry.h \
src/datatype/Vector3D.h \
src/datatype/udr/RawDataUdrSchema.h \
+ src/dbi/DbiConnection.h \
src/dbi/DbiDocumentFormat.h \
src/dbi/U2AbstractDbi.h \
src/dbi/U2AssemblyDbi.h \
@@ -201,6 +202,7 @@ HEADERS += src/cmdline/CMDLineCoreOptions.h \
src/tasks/shared_db/ImportFileToDatabaseTask.h \
src/tasks/shared_db/ImportObjectToDatabaseTask.h \
src/tasks/shared_db/ImportToDatabaseTask.h \
+ src/util/AnnotationCreationPattern.h \
src/util/AssemblyImporter.h \
src/util/DatatypeSerializeUtils.h \
src/util/FileAndDirectoryUtils.h \
@@ -222,6 +224,7 @@ HEADERS += src/cmdline/CMDLineCoreOptions.h \
src/util/QObjectScopedPointer.h \
src/util/QVariantUtils.h \
src/util/SequenceUtils.h \
+ src/util/SnpeffDictionary.h \
src/util/SyncHttp.h \
src/util/TextUtils.h \
src/util/TaskWatchdog.h \
@@ -237,8 +240,8 @@ HEADERS += src/cmdline/CMDLineCoreOptions.h \
src/util/U2OpStatusUtils.h \
src/util/U2SequenceUtils.h \
src/util/U2VariationUtils.h \
- src/util/VariationPropertiesUtils.h \
- src/dbi/DbiConnection.h
+ src/util/VariationPropertiesUtils.h
+
SOURCES += src/cmdline/CMDLineCoreOptions.cpp \
src/cmdline/CMDLineRegistry.cpp \
src/cmdline/CMDLineUtils.cpp \
@@ -270,13 +273,16 @@ SOURCES += src/cmdline/CMDLineCoreOptions.cpp \
src/datatype/U2CoreAttributes.cpp \
src/datatype/U2FeatureType.cpp \
src/datatype/U2Mod.cpp \
+ src/datatype/U2Qualifier.cpp \
src/datatype/U2Region.cpp \
src/datatype/U2Type.cpp \
+ src/datatype/U2Variant.cpp \
src/datatype/UdrRecord.cpp \
src/datatype/UdrSchema.cpp \
src/datatype/UdrSchemaRegistry.cpp \
src/datatype/Vector3D.cpp \
src/datatype/udr/RawDataUdrSchema.cpp \
+ src/dbi/DbiConnection.cpp \
src/dbi/DbiDocumentFormat.cpp \
src/dbi/U2Dbi.cpp \
src/dbi/U2DbiPackUtils.cpp \
@@ -399,6 +405,7 @@ SOURCES += src/cmdline/CMDLineCoreOptions.cpp \
src/tasks/shared_db/ImportFileToDatabaseTask.cpp \
src/tasks/shared_db/ImportObjectToDatabaseTask.cpp \
src/tasks/shared_db/ImportToDatabaseTask.cpp \
+ src/util/AnnotationCreationPattern.cpp \
src/util/AssemblyImporter.cpp \
src/util/DatatypeSerializeUtils.cpp \
src/util/FileAndDirectoryUtils.cpp \
@@ -418,6 +425,7 @@ SOURCES += src/cmdline/CMDLineCoreOptions.cpp \
src/util/MSAUtils.cpp \
src/util/QVariantUtils.cpp \
src/util/SequenceUtils.cpp \
+ src/util/SnpeffDictionary.cpp \
src/util/SyncHttp.cpp \
src/util/TextUtils.cpp \
src/util/TaskWatchdog.cpp \
@@ -431,7 +439,7 @@ SOURCES += src/cmdline/CMDLineCoreOptions.cpp \
src/util/U2FeatureUtils.cpp \
src/util/U2ObjectTypeUtils.cpp \
src/util/U2SequenceUtils.cpp \
- src/util/U2VariationUtils.cpp \
- src/dbi/DbiConnection.cpp
+ src/util/U2VariationUtils.cpp
+
TRANSLATIONS += transl/english.ts \
transl/russian.ts
diff --git a/src/corelibs/U2Core/src/datatype/Annotation.cpp b/src/corelibs/U2Core/src/datatype/Annotation.cpp
index f0feb38..27e0bd9 100644
--- a/src/corelibs/U2Core/src/datatype/Annotation.cpp
+++ b/src/corelibs/U2Core/src/datatype/Annotation.cpp
@@ -324,13 +324,12 @@ bool Annotation::annotationLessThanByRegion(Annotation *first, Annotation *secon
return r1 < r2;
}
-bool Annotation::isValidQualifierName(const QString &n) {
- return !n.isEmpty() && 20 > n.length() && TextUtils::fits(TextUtils::QUALIFIER_NAME_CHARS, n.toLocal8Bit().data(), n.length());
+bool Annotation::isValidQualifierName(const QString &name) {
+ return U2Qualifier::isValidQualifierName(name);
}
-bool Annotation::isValidQualifierValue(const QString & /*v*/) {
- // todo: check whitespaces!
- return true;
+bool Annotation::isValidQualifierValue(const QString &value) {
+ return U2Qualifier::isValidQualifierValue(value);
}
namespace {
diff --git a/src/corelibs/U2Core/src/datatype/Annotation.h b/src/corelibs/U2Core/src/datatype/Annotation.h
index 8f360b4..6c7910e 100644
--- a/src/corelibs/U2Core/src/datatype/Annotation.h
+++ b/src/corelibs/U2Core/src/datatype/Annotation.h
@@ -110,9 +110,9 @@ public:
static bool isValidAnnotationName(const QString &n);
- static bool isValidQualifierName(const QString &n);
+ static bool isValidQualifierName(const QString &name);
- static bool isValidQualifierValue(const QString &v);
+ static bool isValidQualifierValue(const QString &value);
static QString produceValidAnnotationName(const QString &name);
diff --git a/src/corelibs/U2Core/src/datatype/U2Assembly.h b/src/corelibs/U2Core/src/datatype/U2Assembly.h
index 0d6daa9..200b4a1 100644
--- a/src/corelibs/U2Core/src/datatype/U2Assembly.h
+++ b/src/corelibs/U2Core/src/datatype/U2Assembly.h
@@ -238,11 +238,29 @@ public:
};
/** Statistics information collected during the reads packing algorithm */
-class U2AssemblyCoverageStat {
+class U2CORE_EXPORT U2AssemblyCoverageStat {
public:
- U2AssemblyCoverageStat() {}
+ U2AssemblyCoverageStat() {
+ coverage = new QVector<U2Range <int> >();
+ }
+
+ U2AssemblyCoverageStat(const U2AssemblyCoverageStat& toCopy) {
+ this->coverage = new QVector<U2Range<int> >(*toCopy.coverage);
+ }
+
+ ~U2AssemblyCoverageStat() {
+ delete coverage;
+ }
+ const U2AssemblyCoverageStat& operator=(const U2AssemblyCoverageStat& obj) {
+ if (this == &obj) {
+ return *this;
+ }
+ delete this->coverage;
+ this->coverage = new QVector< U2Range<int> > (*obj.coverage);
+ return *this;
+ }
- QVector< U2Range<int> > coverage;
+ QVector< U2Range<int> > *coverage;
};
} //namespace
diff --git a/src/corelibs/U2Gui/src/util/shared_db/CommonImportOptionsDialog.cpp b/src/corelibs/U2Core/src/datatype/U2Qualifier.cpp
similarity index 50%
copy from src/corelibs/U2Gui/src/util/shared_db/CommonImportOptionsDialog.cpp
copy to src/corelibs/U2Core/src/datatype/U2Qualifier.cpp
index 1467e34..9c9f0f9 100644
--- a/src/corelibs/U2Gui/src/util/shared_db/CommonImportOptionsDialog.cpp
+++ b/src/corelibs/U2Core/src/datatype/U2Qualifier.cpp
@@ -19,31 +19,43 @@
* MA 02110-1301, USA.
*/
-#include <U2Gui/HelpButton.h>
+#include <U2Core/TextUtils.h>
+#include <U2Core/U2SafePoints.h>
-#include "CommonImportOptionsDialog.h"
-#include "ImportOptionsWidget.h"
+#include "U2Qualifier.h"
namespace U2 {
-CommonImportOptionsDialog::CommonImportOptionsDialog(const QString& baseFolder, const ImportToDatabaseOptions& options, QWidget *parent) :
- QDialog(parent)
+U2Qualifier::U2Qualifier()
{
- setupUi(this);
- new HelpButton(this, buttonBox, "17470800");
- init(baseFolder, options);
+
+}
+
+U2Qualifier::U2Qualifier(const QString &name, const QString &value)
+ : name(name),
+ value(value)
+{
+ SAFE_POINT(isValid(), "An attempt to create an invalid qualifier", );
+}
+
+bool U2Qualifier::isValid() const {
+ return isValidQualifierName(name) && isValidQualifierValue(value);
+}
+
+bool U2Qualifier::operator==(const U2Qualifier &q) const {
+ return q.name == name && q.value == value;
}
-QString CommonImportOptionsDialog::getBaseFolder() const {
- return optionsWidget->getFolder();
+bool U2Qualifier::operator!=(const U2Qualifier &q) const {
+ return !(*this == q);
}
-ImportToDatabaseOptions CommonImportOptionsDialog::getOptions() const {
- return optionsWidget->getOptions();
+bool U2Qualifier::isValidQualifierName(const QString &name) {
+ return !name.isEmpty() && name.length() <= 20 && TextUtils::fits(TextUtils::QUALIFIER_NAME_CHARS, name.toLocal8Bit().data(), name.length());
}
-void CommonImportOptionsDialog::init(const QString& baseFolder, const ImportToDatabaseOptions& options) {
- optionsWidget->init(baseFolder, options);
+bool U2Qualifier::isValidQualifierValue(const QString &) {
+ return true;
}
} // namespace U2
diff --git a/src/corelibs/U2Core/src/datatype/U2Qualifier.h b/src/corelibs/U2Core/src/datatype/U2Qualifier.h
index d7d2e59..dbbac02 100644
--- a/src/corelibs/U2Core/src/datatype/U2Qualifier.h
+++ b/src/corelibs/U2Core/src/datatype/U2Qualifier.h
@@ -22,6 +22,8 @@
#ifndef _U2_QUALIFIER_H_
#define _U2_QUALIFIER_H_
+#include <U2Core/global.h>
+
namespace U2 {
/**
@@ -37,19 +39,22 @@ public:
/** Constructs new empty (and invalid) qualifier */
- U2Qualifier(){}
+ U2Qualifier();
/** Constructs new qualifier instance with name and value set */
- U2Qualifier(const QString& _name, const QString& _value) : name(_name), value(_value){}
+ U2Qualifier(const QString &name, const QString &value);
/** U2Qualifier is valid if its name is not empty */
- bool isValid() const {return !name.isEmpty();}
+ bool isValid() const;
/** Any two qualifiers are equal if their names & values are equal */
- bool operator== ( const U2Qualifier & q ) const { return q.name == name && q.value == value; }
+ bool operator==(const U2Qualifier &q) const;
/** Any two qualifiers are not equal if either their names or values are not equal */
- bool operator!= ( const U2Qualifier & q ) const { return !(*this == q); }
+ bool operator!=(const U2Qualifier &q) const;
+
+ static bool isValidQualifierName(const QString &name);
+ static bool isValidQualifierValue(const QString &value);
};
} // namespace
diff --git a/src/ugeneui/src/main_window/ShutdownTask.h b/src/corelibs/U2Core/src/datatype/U2Variant.cpp
similarity index 55%
copy from src/ugeneui/src/main_window/ShutdownTask.h
copy to src/corelibs/U2Core/src/datatype/U2Variant.cpp
index 4c8596e..902ce60 100644
--- a/src/ugeneui/src/main_window/ShutdownTask.h
+++ b/src/corelibs/U2Core/src/datatype/U2Variant.cpp
@@ -19,35 +19,39 @@
* MA 02110-1301, USA.
*/
-#ifndef _U2_SHUTDOWN_TASK_H_
-#define _U2_SHUTDOWN_TASK_H_
-
-#include <U2Core/Task.h>
+#include "U2Variant.h"
namespace U2 {
-class Document;
-class MainWindowImpl;
+const QString U2VariantTrack::META_INFO_ATTIBUTE = "meta-info";
+const QString U2VariantTrack::HEADER_ATTIBUTE = "header";
+
+U2VariantTrack::U2VariantTrack()
+ : trackType(TrackType_All)
+{
-class ShutdownTask : public Task {
- Q_OBJECT
-public:
- ShutdownTask(MainWindowImpl* mw);
+}
- void prepare();
+U2VariantTrack::U2VariantTrack(const U2DataId &id, const QString &dbId, VariantTrackType trackType, qint64 version)
+ : U2Object(id, dbId, version),
+ trackType(trackType)
+{
- ReportResult report();
+}
-protected:
- virtual QList<Task*> onSubTaskFinished(Task* subTask);
+U2DataType U2VariantTrack::getType() const {
+ return U2Type::VariantTrack;
+}
-private:
- MainWindowImpl* mw;
- bool docsToRemoveAreFetched;
- QList<Document *> docsToRemove;
-};
+const QString U2Variant::VCF4_QUAL = "QUAL";
+const QString U2Variant::VCF4_FILTER = "FILTER";
+const QString U2Variant::VCF4_INFO = "INFO";
+U2Variant::U2Variant()
+ : startPos(0),
+ endPos(0)
+{
-}//namespace
+}
-#endif
+} // namespace U2
diff --git a/src/corelibs/U2Core/src/datatype/U2Variant.h b/src/corelibs/U2Core/src/datatype/U2Variant.h
index 954967f..e0eaffa 100644
--- a/src/corelibs/U2Core/src/datatype/U2Variant.h
+++ b/src/corelibs/U2Core/src/datatype/U2Variant.h
@@ -26,15 +26,11 @@
namespace U2 {
-/**return SNP region in case endPos == 0, else return variation region*/
-#define VARIATION_REGION(var) \
- U2Region((var).startPos, (var).endPos == 0 ? 1 : (var).endPos - (var).startPos)
-
/**
Representation for set of genomic variations.
*/
-enum VariantTrackType{
+enum VariantTrackType {
TrackType_All = 1,
TrackType_Perspective = 2,
TrackType_Discarded = 3,
@@ -47,9 +43,8 @@ enum VariantTrackType{
class U2CORE_EXPORT U2VariantTrack : public U2Object {
public:
- U2VariantTrack():
- trackType(TrackType_All){}
- U2VariantTrack(const U2DataId& id, const QString& dbId, VariantTrackType _trackType, qint64 version) : U2Object(id, dbId, version), trackType(_trackType){}
+ U2VariantTrack();
+ U2VariantTrack(const U2DataId &id, const QString &dbId, VariantTrackType trackType, qint64 version);
/** Sequence id */
U2DataId sequence;
@@ -63,23 +58,29 @@ public:
/** File header */
QString fileHeader;
- U2DataType getType() const { return U2Type::VariantTrack; }
+ U2DataType getType() const;
+
+ static const QString META_INFO_ATTIBUTE;
+ static const QString HEADER_ATTIBUTE;
};
/** Database representation of genomic variations such as snps, indels, etc. */
-class U2Variant : public U2Entity {
+class U2CORE_EXPORT U2Variant : public U2Entity {
public:
- U2Variant() : startPos(0), endPos(0) {}
+ U2Variant();
qint64 startPos;
qint64 endPos;
QByteArray refData;
QByteArray obsData;
QString publicId;
- QString additionalInfo;
+ QStrStrMap additionalInfo;
+ static const QString VCF4_QUAL;
+ static const QString VCF4_FILTER;
+ static const QString VCF4_INFO;
};
-} // namespace
+} // namespace U2
-#endif
+#endif // _U2_VARIANT_H_
diff --git a/src/corelibs/U2Core/src/dbi/U2DbiUtils.cpp b/src/corelibs/U2Core/src/dbi/U2DbiUtils.cpp
index b4413e6..b429d82 100644
--- a/src/corelibs/U2Core/src/dbi/U2DbiUtils.cpp
+++ b/src/corelibs/U2Core/src/dbi/U2DbiUtils.cpp
@@ -19,7 +19,10 @@
* MA 02110-1301, USA.
*/
-#include <QtCore/QFile>
+#include <cmath>
+
+#include <QBitArray>
+#include <QFile>
#include <U2Core/AppContext.h>
#include <U2Core/U2DbiRegistry.h>
@@ -43,7 +46,7 @@ static U2DataId emptyId;
const QString U2DbiUtils::PUBLIC_DATABASE_NAME = QObject::tr("UGENE public database");
const QString U2DbiUtils::PUBLIC_DATABASE_LOGIN = "public";
const QString U2DbiUtils::PUBLIC_DATABASE_PASSWORD = "public";
-const QString U2DbiUtils::PUBLIC_DATABASE_URL = U2DbiUtils::createFullDbiUrl(PUBLIC_DATABASE_LOGIN, "5.9.139.103", 3306, "public_ugene_1_16");
+const QString U2DbiUtils::PUBLIC_DATABASE_URL = U2DbiUtils::createFullDbiUrl(PUBLIC_DATABASE_LOGIN, "5.9.139.103", 3306, "public_ugene_1_24");
void U2DbiUtils::logNotSupported(U2DbiFeature f, U2Dbi* dbi, U2OpStatus& os) {
QString msg = tr("Feature is not supported: %1, dbi: %2").arg(int(f)).arg(dbi == NULL ? QString("<unknown>") : dbi->getDbiId());
@@ -236,6 +239,88 @@ bool U2DbiUtils::isDatabaseTooOld(const U2DbiRef &dbiRef, const Version &ugeneVe
return minRequiredVersion < ugeneVersion;
}
+namespace {
+
+const QString LIST_SEPARATOR = ",";
+const QString MAP_SEPARATOR = ";";
+const QString PAIR_CONNECTOR = "=";
+const QRegExp listSeparatorRegExp(QString("^\\\"|(?!\\\\)\\\"%1\\\"|\\\"$").arg(LIST_SEPARATOR));
+const QRegExp mapSeparatorRegExp(QString("(?!\\\\)\\\"%1\\\"").arg(MAP_SEPARATOR));
+const QRegExp pairSeparatorRegExp(QString("^\\\"|(?!\\\\)\\\"%1\\\"|\\\"$").arg(PAIR_CONNECTOR));
+
+QBitArray initCharactersToEscape() {
+ QBitArray map(pow(2, 8 * sizeof(char)));
+ map[(int)'\\'] = true;
+ map[(int)'\"'] = true;
+ return map;
+}
+
+const QBitArray charactersToEscape = initCharactersToEscape();
+
+QString escapeCharacters(QString string) {
+ for (int i = 0; i < charactersToEscape.size(); i++) {
+ if (charactersToEscape[i]) {
+ const char c = (char)i;
+ string.replace(c, QString("\\") + c);
+ }
+ }
+
+ return string;
+}
+
+QString unescapeCharacters(QString string) {
+ for (int i = 0; i < charactersToEscape.size(); i++) {
+ if (charactersToEscape[i]) {
+ const char c = (char)i;
+ string.replace(QString("\\") + c, QString(1, c));
+ }
+ }
+
+ return string;
+}
+
+QString wrapString(const QString &string) {
+ return "\"" + string + "\"";
+}
+
+}
+
+QString U2DbiUtils::packStringList(const QStringList &list) {
+ QString packedList;
+ foreach (const QString &string, list) {
+ packedList += wrapString(escapeCharacters(string)) + LIST_SEPARATOR;
+ }
+ packedList.chop(LIST_SEPARATOR.size());
+ return packedList;
+}
+
+QStringList U2DbiUtils::unpackStringList(const QString &string) {
+ QStringList unpackedList;
+ foreach (const QString &escapedString, string.split(listSeparatorRegExp, QString::SkipEmptyParts)) {
+ unpackedList << unescapeCharacters(escapedString);
+ }
+ return unpackedList;
+}
+
+QString U2DbiUtils::packMap(const QStrStrMap &map) {
+ QString string;
+ foreach (const QString &key, map.keys()) {
+ string += wrapString(escapeCharacters(key)) + PAIR_CONNECTOR + wrapString(escapeCharacters(map[key])) + MAP_SEPARATOR;
+ }
+ string.chop(MAP_SEPARATOR.size());
+ return string;
+}
+
+QStrStrMap U2DbiUtils::unpackMap(const QString &string) {
+ QStrStrMap map;
+ foreach (const QString &pair, string.split(mapSeparatorRegExp, QString::SkipEmptyParts)) {
+ const QStringList splittedPair = pair.split(pairSeparatorRegExp, QString::SkipEmptyParts);
+ Q_ASSERT(splittedPair.size() <= 2);
+ map.insert(splittedPair.first(), splittedPair.size() > 1 ? splittedPair[1] : "");
+ }
+ return map;
+}
+
//////////////////////////////////////////////////////////////////////////
// TmpDbiHandle
diff --git a/src/corelibs/U2Core/src/dbi/U2DbiUtils.h b/src/corelibs/U2Core/src/dbi/U2DbiUtils.h
index 62eaa42..656642f 100644
--- a/src/corelibs/U2Core/src/dbi/U2DbiUtils.h
+++ b/src/corelibs/U2Core/src/dbi/U2DbiUtils.h
@@ -164,6 +164,12 @@ public:
static bool isDatabaseTooNew(const U2DbiRef &dbiRef, const Version &ugeneVersion, QString &minRequiredVersionString, U2OpStatus &os);
static bool isDatabaseTooOld(const U2DbiRef &dbiRef, const Version &ugeneVersion, U2OpStatus &os);
+ static QString packStringList(const QStringList &list);
+ static QStringList unpackStringList(const QString &string);
+
+ static QString packMap(const QStrStrMap &map);
+ static QStrStrMap unpackMap(const QString &string);
+
static const QString PUBLIC_DATABASE_NAME;
static const QString PUBLIC_DATABASE_URL;
static const QString PUBLIC_DATABASE_LOGIN;
diff --git a/src/corelibs/U2Core/src/globals/ExternalToolRegistry.h b/src/corelibs/U2Core/src/globals/ExternalToolRegistry.h
index c58c2bd..cc27f96 100644
--- a/src/corelibs/U2Core/src/globals/ExternalToolRegistry.h
+++ b/src/corelibs/U2Core/src/globals/ExternalToolRegistry.h
@@ -78,6 +78,8 @@ public:
const QString& getToolKitName() const { return toolKitName; }
const QStrStrMap& getErrorDescriptions() const { return errorDescriptions; }
+ virtual void getAdditionalParameters(const QString& output) {};
+
ExternalToolValidation getToolValidation();
const QList<ExternalToolValidation>& getToolAdditionalValidations() const { return additionalValidators; }
const QStringList& getDependencies() const { return dependencies; }
diff --git a/src/corelibs/U2Core/src/globals/L10n.h b/src/corelibs/U2Core/src/globals/L10n.h
index cc9b364..e295b4f 100644
--- a/src/corelibs/U2Core/src/globals/L10n.h
+++ b/src/corelibs/U2Core/src/globals/L10n.h
@@ -80,7 +80,10 @@ public:
static QString errorColorLabelStr() { return "rgb(166, 57, 46)"; }
static QString errorColorLabelHtmlStr() { return "#A6392E"; } // the same as errorColorLabelStr()
+ static QString warningColorLabelHtmlStr() { return "#FF8B19"; }
+
static QColor infoHintColor() { return QColor("green"); }
+ static QString infoColorLabelHtmlStr() { return "#218F20"; }
static QString infoHintStyleSheet() { return QString("color: %1; font: bold").arg(infoHintColor().name()); }
static QColor successColor() { return QColor("green"); }
diff --git a/src/corelibs/U2Core/src/gobjects/VariantTrackObject.cpp b/src/corelibs/U2Core/src/gobjects/VariantTrackObject.cpp
index 3ff5604..51237b7 100644
--- a/src/corelibs/U2Core/src/gobjects/VariantTrackObject.cpp
+++ b/src/corelibs/U2Core/src/gobjects/VariantTrackObject.cpp
@@ -22,6 +22,7 @@
#include <U2Core/AppContext.h>
#include <U2Core/DocumentModel.h>
#include <U2Core/GHints.h>
+#include <U2Core/U2AttributeUtils.h>
#include <U2Core/U2DbiUtils.h>
#include <U2Core/U2ObjectDbi.h>
#include <U2Core/U2OpStatusUtils.h>
@@ -51,17 +52,27 @@ U2DbiIterator<U2Variant>* VariantTrackObject::getVariants( const U2Region& reg,
CHECK_OP(os, NULL);
U2VariantDbi* vdbi = con.dbi->getVariantDbi();
- SAFE_POINT(vdbi != NULL, "Varian DBI is NULL", NULL);
+ SAFE_POINT(vdbi != NULL, "Variant DBI is NULL", NULL);
return vdbi->getVariants(entityRef.entityId, reg, os);
}
+int VariantTrackObject::getVariantCount(U2OpStatus &os) const {
+ DbiConnection con(entityRef.dbiRef, os);
+ CHECK_OP(os, 0);
+
+ U2VariantDbi *variantDbi = con.dbi->getVariantDbi();
+ SAFE_POINT(variantDbi != NULL, "Variant DBI is NULL", 0);
+
+ return variantDbi->getVariantCount(entityRef.entityId, os);
+}
+
U2VariantTrack VariantTrackObject::getVariantTrack(U2OpStatus &os) const {
DbiConnection con(entityRef.dbiRef, os);
CHECK_OP(os, U2VariantTrack());
U2VariantDbi* vdbi = con.dbi->getVariantDbi();
- SAFE_POINT(vdbi != NULL, "Varian DBI is NULL", U2VariantTrack());
+ SAFE_POINT(vdbi != NULL, "Variant DBI is NULL", U2VariantTrack());
return vdbi->getVariantTrack(entityRef.entityId, os);
}
@@ -75,7 +86,6 @@ GObject* VariantTrackObject::clone(const U2DbiRef &dstDbiRef, U2OpStatus &os, co
CHECK_OP(os, NULL);
DbiConnection dstCon(dstDbiRef, true, os);
CHECK_OP(os, NULL);
- Q_UNUSED(srcCon);
GHintsDefaultImpl gHints(getGHintsMap());
gHints.setAll(hints);
@@ -95,6 +105,8 @@ GObject* VariantTrackObject::clone(const U2DbiRef &dstDbiRef, U2OpStatus &os, co
dstVDbi->addVariantsToTrack(clonedTrack, varsIter.data(), os);
CHECK_OP(os, NULL);
+ U2AttributeUtils::copyObjectAttributes(entityRef.entityId, clonedTrack.id, srcCon.dbi->getAttributeDbi(), dstCon.dbi->getAttributeDbi(), os);
+
U2EntityRef clonedTrackRef(dstDbiRef, clonedTrack.id);
VariantTrackObject *clonedObj = new VariantTrackObject(getGObjectName(), clonedTrackRef, gHints.getMap());
return clonedObj;
@@ -105,7 +117,7 @@ void VariantTrackObject::addVariants( const QList<U2Variant>& variants, U2OpStat
CHECK_OP(os, );
U2VariantDbi* vdbi = con.dbi->getVariantDbi();
- SAFE_POINT(vdbi != NULL, "Varian DBI is NULL", );
+ SAFE_POINT(vdbi != NULL, "Variant DBI is NULL", );
U2VariantTrack track = vdbi->getVariantTrack(entityRef.entityId, os);
CHECK_OP(os, );
diff --git a/src/corelibs/U2Core/src/gobjects/VariantTrackObject.h b/src/corelibs/U2Core/src/gobjects/VariantTrackObject.h
index 66e0fbc..137dbc7 100644
--- a/src/corelibs/U2Core/src/gobjects/VariantTrackObject.h
+++ b/src/corelibs/U2Core/src/gobjects/VariantTrackObject.h
@@ -42,6 +42,7 @@ public:
/**U2_REGION_MAX to get all variants*/
U2DbiIterator<U2Variant>* getVariants(const U2Region& reg, U2OpStatus& os ) const;
+ int getVariantCount(U2OpStatus &os) const;
void addVariants(const QList<U2Variant>& variants, U2OpStatus& os);
diff --git a/src/corelibs/U2Core/src/models/DocumentModel.cpp b/src/corelibs/U2Core/src/models/DocumentModel.cpp
index 4f06d55..6f05964 100644
--- a/src/corelibs/U2Core/src/models/DocumentModel.cpp
+++ b/src/corelibs/U2Core/src/models/DocumentModel.cpp
@@ -682,7 +682,9 @@ bool Document::unload(bool deleteObjects) {
// deallocate objects
if (deleteObjects) {
- removeObjectsDataFromDbi(tmpObjects);
+ if (isDocumentOwnsDbiResources()) {
+ removeObjectsDataFromDbi(tmpObjects);
+ }
qDeleteAll(tmpObjects);
}
diff --git a/src/corelibs/U2Core/src/models/DocumentModel.h b/src/corelibs/U2Core/src/models/DocumentModel.h
index 5619ab2..57192fe 100644
--- a/src/corelibs/U2Core/src/models/DocumentModel.h
+++ b/src/corelibs/U2Core/src/models/DocumentModel.h
@@ -75,6 +75,8 @@ enum DocumentFormatFlag {
DocumentFormatFlag_DirectWriteOperations = 1<<9,
// Document can be locked if created not by UGENE
DocumentFormatFlag_LockedIfNotCreatedByUGENE = 1<<10,
+
+ DocumentFormatFlag_CannotBeCompressed = 1<<11
};
diff --git a/src/corelibs/U2Core/src/tasks/AddSequencesToAlignmentTask.cpp b/src/corelibs/U2Core/src/tasks/AddSequencesToAlignmentTask.cpp
index 12f0561..387f073 100644
--- a/src/corelibs/U2Core/src/tasks/AddSequencesToAlignmentTask.cpp
+++ b/src/corelibs/U2Core/src/tasks/AddSequencesToAlignmentTask.cpp
@@ -55,7 +55,7 @@ void AddSequenceObjectsToAlignmentTask::prepare() {
return;
}
- stateLock = new StateLock("Adding_files_to_alignment", StateLockFlag_LiveLock);
+ stateLock = new StateLock("Adding_files_to_alignment");
maObj->lockState(stateLock);
processObjectsAndSetResultingAlphabet();
diff --git a/src/corelibs/U2Core/src/tasks/RemoveDocumentTask.cpp b/src/corelibs/U2Core/src/tasks/RemoveDocumentTask.cpp
index 9f1caee..2b77f12 100644
--- a/src/corelibs/U2Core/src/tasks/RemoveDocumentTask.cpp
+++ b/src/corelibs/U2Core/src/tasks/RemoveDocumentTask.cpp
@@ -31,7 +31,7 @@
namespace U2 {
RemoveMultipleDocumentsTask::RemoveMultipleDocumentsTask(Project* _p, const QList<Document*>& _docs, bool _saveModifiedDocs, bool _useGUI)
-: Task(tr("Remove document"), TaskFlag_NoRun), p(_p), saveModifiedDocs(_saveModifiedDocs), useGUI(_useGUI)
+: Task(tr("Remove document"), TaskFlags(TaskFlag_NoRun) | TaskFlag_CancelOnSubtaskCancel), p(_p), saveModifiedDocs(_saveModifiedDocs), useGUI(_useGUI)
{
assert(!_docs.empty());
assert(p!=NULL);
@@ -87,6 +87,10 @@ Task::ReportResult RemoveMultipleDocumentsTask::report() {
return Task::ReportResult_CallMeAgain;
}
+ if (isCanceled()) {
+ return ReportResult_Finished;
+ }
+
foreach(Document* doc, docPtrs) {
if ( doc != NULL ) {
// check for "stay-alive" locked objects
diff --git a/src/corelibs/U2Core/src/tasks/SaveDocumentTask.cpp b/src/corelibs/U2Core/src/tasks/SaveDocumentTask.cpp
index 54b6ded..9a4fa5b 100644
--- a/src/corelibs/U2Core/src/tasks/SaveDocumentTask.cpp
+++ b/src/corelibs/U2Core/src/tasks/SaveDocumentTask.cpp
@@ -204,14 +204,19 @@ SaveMultipleDocuments::SaveMultipleDocuments(const QList<Document*>& docs, bool
foreach(Document* doc, docs) {
bool save=true;
if (askBeforeSave) {
- QMessageBox::StandardButtons buttons = QMessageBox::StandardButtons(QMessageBox::Yes) | QMessageBox::No;
+ QMessageBox::StandardButtons buttons = QMessageBox::StandardButtons(QMessageBox::Yes) | QMessageBox::No | QMessageBox::Cancel;
if (docs.size() > 1) {
buttons = buttons | QMessageBox::YesToAll | QMessageBox::NoToAll;
}
- QMessageBox::StandardButton res = saveAll ? QMessageBox::YesToAll : QMessageBox::question(QApplication::activeWindow(),
- tr("Question?"), tr("Save document: %1").arg(doc->getURLString()),
- buttons, QMessageBox::Yes);
+ QObjectScopedPointer<QMessageBox> messageBox(new QMessageBox(QMessageBox::Question,
+ tr("Question?"),
+ tr("Save document: %1").arg(doc->getURLString()),
+ buttons,
+ QApplication::activeWindow()));
+ messageBox->button(QMessageBox::Cancel)->hide();
+
+ int res = saveAll ? QMessageBox::YesToAll : messageBox->exec();
if (res == QMessageBox::NoToAll) {
break;
@@ -222,6 +227,11 @@ SaveMultipleDocuments::SaveMultipleDocuments(const QList<Document*>& docs, bool
if (res == QMessageBox::No) {
save = false;
}
+ if (res == QMessageBox::Cancel) {
+ save = false;
+ cancel();
+ break;
+ }
}
if (save) {
GUrl url = doc->getURL();
diff --git a/src/corelibs/U2Core/src/tasks/SequenceDbiWalkerTask.cpp b/src/corelibs/U2Core/src/tasks/SequenceDbiWalkerTask.cpp
index 4eb32ca..5db17d9 100644
--- a/src/corelibs/U2Core/src/tasks/SequenceDbiWalkerTask.cpp
+++ b/src/corelibs/U2Core/src/tasks/SequenceDbiWalkerTask.cpp
@@ -21,9 +21,10 @@
#include "SequenceDbiWalkerTask.h"
+#include <U2Core/DNASequenceObject.h>
#include <U2Core/DNATranslation.h>
#include <U2Core/TextUtils.h>
-#include <U2Core/DNASequenceObject.h>
+#include <U2Core/U2SafePoints.h>
namespace U2 {
@@ -51,7 +52,7 @@ QList<SequenceDbiWalkerSubtask*> SequenceDbiWalkerTask::prepareSubtasks() {
} else {
U2Region wholeSeqReg(0, sequenceObject.getSequenceLength());
if (!config.walkCircular) {
- assert(wholeSeqReg.contains(config.range));
+ SAFE_POINT_EXT(wholeSeqReg.contains(config.range), stateInfo.setError(tr("Target region out of sequence range")), res);
}
}
diff --git a/src/plugins_3rdparty/hmm3/src/task_local_storage/uHMMSearchTaskLocalData.cpp b/src/corelibs/U2Core/src/util/AnnotationCreationPattern.cpp
similarity index 77%
rename from src/plugins_3rdparty/hmm3/src/task_local_storage/uHMMSearchTaskLocalData.cpp
rename to src/corelibs/U2Core/src/util/AnnotationCreationPattern.cpp
index c476e84..efc2d82 100644
--- a/src/plugins_3rdparty/hmm3/src/task_local_storage/uHMMSearchTaskLocalData.cpp
+++ b/src/corelibs/U2Core/src/util/AnnotationCreationPattern.cpp
@@ -19,17 +19,14 @@
* MA 02110-1301, USA.
*/
-#include <cmath>
-
-#include "uHMMSearchTaskLocalData.h"
+#include "AnnotationCreationPattern.h"
namespace U2 {
-UHMM3SearchTaskLocalData::UHMM3SearchTaskLocalData() {
- int i = 0;
- for( i = 0; i < p7_LOGSUM_TBL; ++i ) {
- flogsum_lookup[i] = log(1. + exp((double) -i / p7_INTSCALE));
- }
+AnnotationCreationPattern::AnnotationCreationPattern()
+ : type(U2FeatureTypes::MiscFeature)
+{
+
}
-} // U2
+} // namespace U2
diff --git a/src/plugins_3rdparty/hmm3/src/tests/uhmmer3Tests.h b/src/corelibs/U2Core/src/util/AnnotationCreationPattern.h
similarity index 70%
rename from src/plugins_3rdparty/hmm3/src/tests/uhmmer3Tests.h
rename to src/corelibs/U2Core/src/util/AnnotationCreationPattern.h
index 528fb62..cb661b3 100644
--- a/src/plugins_3rdparty/hmm3/src/tests/uhmmer3Tests.h
+++ b/src/corelibs/U2Core/src/util/AnnotationCreationPattern.h
@@ -19,19 +19,23 @@
* MA 02110-1301, USA.
*/
-#ifndef _GB2_UHMMER3_TESTS_H_
-#define _GB2_UHMMER3_TESTS_H_
+#ifndef _U2_ANNOTATION_CREATION_PATTERN_H_
+#define _U2_ANNOTATION_CREATION_PATTERN_H_
-#include <QtCore/QList>
-#include <U2Test/XMLTestFormat.h>
+#include <U2Core/U2FeatureType.h>
namespace U2 {
-class UHMMER3Tests {
+class U2CORE_EXPORT AnnotationCreationPattern {
public:
- static QList<XMLTestFactory*> createTestFactories();
-}; // UHMMER3Tests
+ AnnotationCreationPattern();
-} // GB2
+ QString annotationName;
+ QString groupName;
+ QString description;
+ U2FeatureType type;
+};
-#endif // _GB2_UHMMER3_TESTS_H_
+} // namespace U2
+
+#endif // _U2_ANNOTATION_CREATION_PATTERN_H_
diff --git a/src/corelibs/U2Core/src/util/GenbankFeatures.cpp b/src/corelibs/U2Core/src/util/GenbankFeatures.cpp
index 186ffec..90d83b0 100644
--- a/src/corelibs/U2Core/src/util/GenbankFeatures.cpp
+++ b/src/corelibs/U2Core/src/util/GenbankFeatures.cpp
@@ -143,7 +143,7 @@ const QVector<GBFeatureKeyInfo>& GBFeatureUtils::allKeys() {
FK(GBFeatureKey_unsure, U2FeatureTypes::Unsure, "unsure", cl("000000"), false, QObject::tr("Authors are unsure about the sequence in this region"));
FK(GBFeatureKey_V_region, U2FeatureTypes::VRegion, "V_region", cl("000000"), false, QObject::tr("Span of the V immunological feature"));
FK(GBFeatureKey_V_segment, U2FeatureTypes::VSegment, "V_segment", cl("000000"), false, QObject::tr("Variable segment of immunoglobulin light and heavy chains, and T-cell receptor alpha, beta, and gamma chains; codes for most of the variable region (V_region) and the last few amino acids of the leader peptide"));
- FK(GBFeatureKey_variation, U2FeatureTypes::Variation, "variation", cl("ffff9b"), false, QObject::tr("A related population contains stable mutation"));
+ FK(GBFeatureKey_variation, U2FeatureTypes::Variation, "variation", cl("e32636"), false, QObject::tr("A related population contains stable mutation"));
FK(GBFeatureKey__10_signal, U2FeatureTypes::Minus10Signal, "-10_signal", cl("000000"), false, QObject::tr("`Pribnow box' in prokaryotic promoters"));
FK(GBFeatureKey__35_signal, U2FeatureTypes::Minus35Signal, "-35_signal", cl("000000"), false, QObject::tr("`-35 box' in prokaryotic promoters"));
FK(GBFeatureKey_3_clip, U2FeatureTypes::ThreePrimeClip, "3'clip", cl("000000"), false, QObject::tr("3'-most region of a precursor transcript removed in processing"));
diff --git a/src/corelibs/U2Core/src/util/SnpeffDictionary.cpp b/src/corelibs/U2Core/src/util/SnpeffDictionary.cpp
new file mode 100644
index 0000000..1bb7fbc
--- /dev/null
+++ b/src/corelibs/U2Core/src/util/SnpeffDictionary.cpp
@@ -0,0 +1,162 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include "SnpeffDictionary.h"
+
+namespace U2 {
+
+const QMap<QString, QString> SnpeffDictionary::impactDescriptions = SnpeffDictionary::initImpactDescriptions();
+const QMap<QString, QString> SnpeffDictionary::effectDescriptions = SnpeffDictionary::initEffectDescriptions();
+const QMap<QString, QString> SnpeffDictionary::messageDescriptions = SnpeffDictionary::initMessageDescriptions();
+
+QMap<QString, QString> SnpeffDictionary::initImpactDescriptions() {
+ QMap<QString, QString> result;
+ result.insert("HIGH", "The variant is assumed to have high (disruptive) impact in the protein, probably causing protein truncation, loss of function or triggering nonsense mediated decay.");
+ result.insert("MODERATE", "A non-disruptive variant that might change protein effectiveness.");
+ result.insert("LOW", "Assumed to be mostly harmless or unlikely to change protein behavior.");
+ result.insert("MODIFIER", "Usually non-coding variants or variants affecting non-coding genes, where predictions are difficult or there is no evidence of impact.");
+ return result;
+}
+
+QMap<QString, QString> SnpeffDictionary::initEffectDescriptions() {
+ QMap<QString, QString> result;
+
+ // Seq. Ontology effects
+ result.insert("coding_sequence_variant", "The variant hits a CDS. / One or many codons are changed. E.g.: An MNP of size multiple of 3.");
+ result.insert("chromosome", "A large parte (over 1%) of the chromosome was deleted.");
+ result.insert("inframe_insertion", "One or many codons are inserted. E.g.: An insert multiple of three in a codon boundary.");
+ result.insert("disruptive_inframe_insertion", "One codon is changed and one or many codons are inserted. E.g.: An insert of size multiple of three, not at codon boundary.");
+ result.insert("inframe_deletion", "One or many codons are deleted. E.g.: A deletion multiple of three at codon boundary.");
+ result.insert("disruptive_inframe_deletion", "One codon is changed and one or more codons are deleted. E.g.: A deletion of size multiple of three, not at codon boundary.");
+ result.insert("downstream_gene_variant", "Downstream of a gene (default length: 5K bases).");
+ result.insert("exon_variant", "The variant hits an exon (from a non-coding transcript) or a retained intron.");
+ result.insert("exon_loss_variant", "A deletion removes the whole exon.");
+ result.insert("frameshift_variant", "Insertion or deletion causes a frame shift. E.g.: An indel size is not multple of 3.");
+ result.insert("gene_variant", "The variant hits a gene.");
+ result.insert("intergenic_region", "The variant is in an intergenic region.");
+ result.insert("conserved_intergenic_variant", "The variant is in a highly conserved intergenic region.");
+ result.insert("intragenic_variant", "The variant hits a gene, but no transcripts within the gene.");
+ result.insert("intron_variant", "Variant hits and intron. Technically, hits no exon in the transcript.");
+ result.insert("conserved_intron_variant", "The variant is in a highly conserved intronic region.");
+ result.insert("miRNA", "Variant affects an miRNA.");
+ result.insert("missense_variant", "Variant causes a codon that produces a different amino acid. E.g.: Tgg/Cgg, W/R.");
+ result.insert("initiator_codon_variant", "Variant causes start codon to be mutated into another start codon (the new codon produces a different AA). E.g.: Atg/Ctg, M/L (ATG and CTG can be START codons).");
+ result.insert("stop_retained_variant", "Variant causes stop codon to be mutated into another stop codon (the new codon produces a different AA). E.g.: Atg/Ctg, M/L (ATG and CTG can be START codons).");
+ result.insert("rare_amino_acid_variant", "The variant hits a rare amino acid thus is likely to produce protein loss of function.");
+ result.insert("splice_acceptor_variant", "The variant hits a splice acceptor site (defined as two bases before exon start, except for the first exon).");
+ result.insert("splice_donor_variant", "The variant hits a Splice donor site (defined as two bases after coding exon end, except for the last exon).");
+ result.insert("splice_region_variant", "A sequence variant in which a change has occurred within the region of the splice site, either within 1-3 bases of the exon or 3-8 bases of the intron. / "
+ "A varaint affective putative (Lariat) branch point, located in the intron. / "
+ "A varaint affective putative (Lariat) branch point from U12 splicing machinery, located in the intron.");
+ result.insert("stop_lost", "Variant causes stop codon to be mutated into a non-stop codon. E.g.: Tga/Cga, */R.");
+ result.insert("5_prime_UTR_premature_start_codon_gain_variant", "A variant in 5'UTR region produces a three base sequence that can be a START codon.");
+ result.insert("start_lost", "Variant causes start codon to be mutated into a non-start codon. E.g.: aTg/aGg, M/R.");
+ result.insert("stop_gained", "Variant causes a STOP codon/ e.g.: Cag/Tag, Q/*.");
+ result.insert("synonymous_variant", "Variant causes a codon that produces the same amino acid. E.g.: Ttg/Ctg, L/L.");
+ result.insert("start_retained", "Variant causes start codon to be mutated into another start codon. E.g.: Ttg/Ctg, L/L (TTG and CTG can be START codons).");
+ result.insert("stop_retained_variant", "Variant causes stop codon to be mutated into another stop codon. E.g.: taA/taG, */*.");
+ result.insert("transcript_variant", "The variant hits a transcript.");
+ result.insert("regulatory_region_variant", "The variant hits a known regulatory feature (non-coding).");
+ result.insert("upstream_gene_variant", "Upstream of a gene (default length: 5K bases).");
+ result.insert("3_prime_UTR_variant", "Variant hits 3'UTR region.");
+ result.insert("3_prime_UTR_truncation", "The variant deletes an exon which is in the 3'UTR of the transcript.");
+ result.insert("5_prime_UTR_variant", "Variant hits 5'UTR region.");
+ result.insert("5_prime_UTR_truncation", "The variant deletes an exon which is in the 5'UTR of the transcript.");
+ result.insert("sequence_feature", "A 'NextProt' based annotation. Details are provided in the 'feature type' sub-field (ANN), or in the effect details (EFF).");
+
+ // Classic effects
+ result.insert("CDS", "The variant hits a CDS.");
+ result.insert("CHROMOSOME_LARGE_DELETION", "A large parte (over 1%) of the chromosome was deleted.");
+ result.insert("CODON_CHANGE", "One or many codons are changed. E.g.: An MNP of size multiple of 3.");
+ result.insert("CODON_INSERTION", "One or many codons are inserted. E.g.: An insert multiple of three in a codon boundary.");
+ result.insert("CODON_CHANGE_PLUS_CODON_INSERTION", "One codon is changed and one or many codons are inserted. E.g.: An insert of size multiple of three, not at codon boundary.");
+ result.insert("CODON_DELETION", "One or many codons are deleted. E.g.: A deletion multiple of three at codon boundary.");
+ result.insert("CODON_CHANGE_PLUS_CODON_DELETION", "One codon is changed and one or more codons are deleted. E.g.: A deletion of size multiple of three, not at codon boundary.");
+ result.insert("DOWNSTREAM", "Downstream of a gene (default length: 5K bases).");
+ result.insert("EXON", "The variant hits an exon (from a non-coding transcript) or a retained intron.");
+ result.insert("EXON_DELETED", "A deletion removes the whole exon.");
+ result.insert("FRAME_SHIFT", "Insertion or deletion causes a frame shift. E.g.: An indel size is not multple of 3.");
+ result.insert("GENE", "The variant hits a gene.");
+ result.insert("INTERGENIC", "The variant is in an intergenic region.");
+ result.insert("INTERGENIC_CONSERVED", "The variant is in a highly conserved intergenic region.");
+ result.insert("INTRAGENIC", "The variant hits a gene, but no transcripts within the gene.");
+ result.insert("INTRON", "Variant hits and intron. Technically, hits no exon in the transcript.");
+ result.insert("INTRON_CONSERVED", "The variant is in a highly conserved intronic region.");
+ result.insert("MICRO_RNA", "Variant affects an miRNA.");
+ result.insert("NON_SYNONYMOUS_CODING", "Variant causes a codon that produces a different amino acid. E.g.: Tgg/Cgg, W/R.");
+ result.insert("NON_SYNONYMOUS_START", "Variant causes start codon to be mutated into another start codon (the new codon produces a different AA). E.g.: Atg/Ctg, M/L (ATG and CTG can be START codons).");
+ result.insert("NON_SYNONYMOUS_STOP", "Variant causes stop codon to be mutated into another stop codon (the new codon produces a different AA). E.g.: Atg/Ctg, M/L (ATG and CTG can be START codons).");
+ result.insert("RARE_AMINO_ACID", "The variant hits a rare amino acid thus is likely to produce protein loss of function.");
+ result.insert("SPLICE_SITE_ACCEPTOR", "The variant hits a splice acceptor site (defined as two bases before exon start, except for the first exon).");
+ result.insert("SPLICE_SITE_DONOR", "The variant hits a Splice donor site (defined as two bases after coding exon end, except for the last exon).");
+ result.insert("SPLICE_SITE_REGION", "A sequence variant in which a change has occurred within the region of the splice site, either within 1-3 bases of the exon or 3-8 bases of the intron.");
+ result.insert("SPLICE_SITE_BRANCH", "A varaint affective putative (Lariat) branch point, located in the intron.");
+ result.insert("SPLICE_SITE_BRANCH_U12", "A varaint affective putative (Lariat) branch point from U12 splicing machinery, located in the intron.");
+ result.insert("STOP_LOST", "Variant causes stop codon to be mutated into a non-stop codon. E.g.: Tga/Cga, */R.");
+ result.insert("START_GAINED", "A variant in 5'UTR region produces a three base sequence that can be a START codon.");
+ result.insert("START_LOST", "Variant causes start codon to be mutated into a non-start codon. E.g.: aTg/aGg, M/R.");
+ result.insert("STOP_GAINED", "Variant causes a STOP codon/ e.g.: Cag/Tag, Q/*.");
+ result.insert("SYNONYMOUS_CODING", "Variant causes a codon that produces the same amino acid. E.g.: Ttg/Ctg, L/L.");
+ result.insert("SYNONYMOUS_START", "Variant causes start codon to be mutated into another start codon. E.g.: Ttg/Ctg, L/L (TTG and CTG can be START codons).");
+ result.insert("SYNONYMOUS_STOP", "Variant causes stop codon to be mutated into another stop codon. E.g.: taA/taG, */*.");
+ result.insert("TRANSCRIPT", "The variant hits a transcript.");
+ result.insert("REGULATION", "The variant hits a known regulatory feature (non-coding).");
+ result.insert("UPSTREAM", "Upstream of a gene (default length: 5K bases).");
+ result.insert("UTR_3_PRIME", "Variant hits 3'UTR region.");
+ result.insert("UTR_3_DELETED", "The variant deletes an exon which is in the 3'UTR of the transcript.");
+ result.insert("UTR_5_PRIME", "Variant hits 5'UTR region.");
+ result.insert("UTR_5_DELETED", "The variant deletes an exon which is in the 5'UTR of the transcript.");
+ result.insert("NEXT_PROT", "A 'NextProt' based annotation. Details are provided in the 'feature type' sub-field (ANN), or in the effect details (EFF).");
+
+ return result;
+}
+
+QMap<QString, QString> SnpeffDictionary::initMessageDescriptions() {
+ QMap<QString, QString> result;
+
+ // code
+ result.insert("E1", "Chromosome does not exists in reference genome database. Typically indicates a mismatch between the chromosome names in the input file and the chromosome names used in the reference genome.");
+ result.insert("E2", "The variant’s genomic coordinate is greater than chromosome's length.");
+ result.insert("W1", "This means that the ‘REF’ field in the input VCF file does not match the reference genome. This warning may indicate a conflict between input data and data from reference genome (for instance is the input VCF was aligned to a different reference genome).");
+ result.insert("W2", "Reference sequence is not available, thus no inference could be performed.");
+ result.insert("W3", "A protein coding transcript having a non-multiple of 3 length. It indicates that the reference genome has missing information about this particular transcript.");
+ result.insert("W4", "A protein coding transcript has two or more STOP codons in the middle of the coding sequence (CDS). This should not happen and it usually means the reference genome may have an error in this transcript.");
+ result.insert("W5", "A protein coding transcript does not have a proper START codon. It is rare that a real transcript does not have a START codon, so this probably indicates an error or missing information in the reference genome.");
+ result.insert("I1", "Variant has been realigned to the most 3-prime position within the transcript. This is usually done to to comply with HGVS specification to always report the most 3-prime annotation.");
+ result.insert("I2", "This effect is a result of combining more than one variants (e.g. two consecutive SNPs that conform an MNP, or two consecutive frame_shift variants that compensate frame).");
+ result.insert("I3", "An alternative reference sequence was used to calculate this annotation (e.g. cancer sample comparing somatic vs. germline).");
+
+ // Message type
+ result.insert("ERROR_CHROMOSOME_NOT_FOUND", result["E1"]);
+ result.insert("ERROR_OUT_OF_CHROMOSOME_RANGE", result["E2"]);
+ result.insert("WARNING_REF_DOES_NOT_MATCH_GENOME", result["W1"]);
+ result.insert("WARNING_SEQUENCE_NOT_AVAILABLE", result["W2"]);
+ result.insert("WARNING_TRANSCRIPT_INCOMPLETE", result["W3"]);
+ result.insert("WARNING_TRANSCRIPT_MULTIPLE_STOP_CODONS", result["W4"]);
+ result.insert("WARNING_TRANSCRIPT_NO_START_CODON", result["W5"]);
+ result.insert("INFO_REALIGN_3_PRIME", result["I1"]);
+ result.insert("INFO_COMPOUND_ANNOTATION", result["I2"]);
+ result.insert("INFO_NON_REFERENCE_ANNOTATION", result["I3"]);
+
+ return result;
+}
+
+} // namespace U2
diff --git a/src/plugins_3rdparty/hmm3/src/search/uhmm3search.h b/src/corelibs/U2Core/src/util/SnpeffDictionary.h
similarity index 61%
rename from src/plugins_3rdparty/hmm3/src/search/uhmm3search.h
rename to src/corelibs/U2Core/src/util/SnpeffDictionary.h
index 4f2deb2..bf84db3 100644
--- a/src/plugins_3rdparty/hmm3/src/search/uhmm3search.h
+++ b/src/corelibs/U2Core/src/util/SnpeffDictionary.h
@@ -19,29 +19,25 @@
* MA 02110-1301, USA.
*/
-#ifndef _GB2_UHMM3_SEARCH_H_
-#define _GB2_UHMM3_SEARCH_H_
+#ifndef _U2_SNPEFF_DICTIONARY_H_
+#define _U2_SNPEFF_DICTIONARY_H_
-#include <QtCore/QObject>
-#include <QtCore/QList>
-#include <QtCore/QString>
-
-#include <U2Core/Task.h>
-
-#include <hmmer3/hmmer.h>
-
-#include "uhmm3SearchResult.h"
+#include <U2Core/global.h>
namespace U2 {
-class UHMM3Search : public QObject {
- Q_OBJECT
+class U2CORE_EXPORT SnpeffDictionary {
public:
- static UHMM3SearchResult search( const P7_HMM* ahmm, const char* sq, int sqLen,
- const UHMM3SearchSettings& set, TaskStateInfo& si, int wholeSeqSz );
-
-}; // UHMM3Search
+ static const QMap<QString, QString> impactDescriptions;
+ static const QMap<QString, QString> effectDescriptions;
+ static const QMap<QString, QString> messageDescriptions;
+
+private:
+ static QMap<QString, QString> initImpactDescriptions();
+ static QMap<QString, QString> initEffectDescriptions();
+ static QMap<QString, QString> initMessageDescriptions();
+};
-} // U2
+} // namespace U2
-#endif // _GB2_UHMM3_SEARCH_H_
+#endif // _U2_SNPEFF_DICTIONARY_H_
diff --git a/src/corelibs/U2Core/src/util/U1AnnotationUtils.cpp b/src/corelibs/U2Core/src/util/U1AnnotationUtils.cpp
index 6e53f96..5c93d23 100644
--- a/src/corelibs/U2Core/src/util/U1AnnotationUtils.cpp
+++ b/src/corelibs/U2Core/src/util/U1AnnotationUtils.cpp
@@ -425,6 +425,24 @@ void U1AnnotationUtils::addDescriptionQualifier(SharedAnnotationData &annotation
annotationData->qualifiers << U2Qualifier(GBFeatureUtils::QUALIFIER_NOTE, description);
}
+bool U1AnnotationUtils::containsQualifier(const QList<U2Qualifier> &qualifiers, const QString &qualifierName) {
+ foreach (const U2Qualifier &qualifier, qualifiers) {
+ if (qualifier.name == qualifierName) {
+ return true;
+ }
+ }
+ return false;
+}
+
+void U1AnnotationUtils::removeAllQualifier(SharedAnnotationData &annotationData, const QString &qualifierName) {
+ QMutableVectorIterator<U2Qualifier> i(annotationData->qualifiers);
+ while (i.hasNext()) {
+ if (i.next().name == qualifierName) {
+ i.remove();
+ }
+ }
+}
+
QString U1AnnotationUtils::buildLocationString(const U2LocationData &location) {
bool complement = location.strand.isCompementary();
bool multi = location.regions.size() > 1;
diff --git a/src/corelibs/U2Core/src/util/U1AnnotationUtils.h b/src/corelibs/U2Core/src/util/U1AnnotationUtils.h
index ab1146f..a379458 100644
--- a/src/corelibs/U2Core/src/util/U1AnnotationUtils.h
+++ b/src/corelibs/U2Core/src/util/U1AnnotationUtils.h
@@ -101,6 +101,9 @@ public:
static void addDescriptionQualifier(QList<SharedAnnotationData> &annotations, const QString &description);
static void addDescriptionQualifier(SharedAnnotationData &annotationData, const QString &description);
+ static bool containsQualifier(const QList<U2Qualifier> &qualifiers, const QString &qualifierName);
+ static void removeAllQualifier(SharedAnnotationData &annotationData, const QString &qualifierName);
+
static QString buildLocationString(const SharedAnnotationData &a);
static QString buildLocationString(const U2LocationData &location);
static QString buildLocationString(const QVector<U2Region> ®ions);
diff --git a/src/corelibs/U2Core/src/util/U2AlphabetUtils.h b/src/corelibs/U2Core/src/util/U2AlphabetUtils.h
index b9711fc..1f0b330 100644
--- a/src/corelibs/U2Core/src/util/U2AlphabetUtils.h
+++ b/src/corelibs/U2Core/src/util/U2AlphabetUtils.h
@@ -98,7 +98,7 @@ private:
int ExtendedDNAlphabetComparator::getMatchMask(char c) const {
int i = c - ' ';
- assert(i>=0 && i<DNA_AL_EX_INDEX_SIZE);
+ SAFE_POINT(i >= 0 && i<DNA_AL_EX_INDEX_SIZE, QObject::tr("Symbol is not belong to alphabet"), 0);
return index[i];
}
diff --git a/src/corelibs/U2Core/src/util/U2AssemblyUtils.cpp b/src/corelibs/U2Core/src/util/U2AssemblyUtils.cpp
index 8a910c2..ea01dce 100644
--- a/src/corelibs/U2Core/src/util/U2AssemblyUtils.cpp
+++ b/src/corelibs/U2Core/src/util/U2AssemblyUtils.cpp
@@ -20,9 +20,12 @@
*/
#include "U2AssemblyUtils.h"
+#include "dbi/U2AssemblyDbi.h"
+#include <U2Core/DbiConnection.h>
#include <U2Core/TextUtils.h>
#include <U2Core/U2OpStatus.h>
+#include <U2Core/U2SafePoints.h>
#include <QtCore/QRegExp>
@@ -140,23 +143,23 @@ QByteArray U2AssemblyUtils::getCigarAlphabetChars() {
QByteArray U2AssemblyUtils::serializeCoverageStat(const U2AssemblyCoverageStat& coverageStat) {
QByteArray data;
- for(int index = 0;index < coverageStat.coverage.size();index++) {
+ for(int index = 0;index < coverageStat.coverage->size();index++) {
for(int i = 0;i < 4;i++) {
- data.append((char)(coverageStat.coverage[index].maxValue >> (i*8)));
+ data.append((char)(coverageStat.coverage->at(index).maxValue >> (i*8)));
}
}
return data;
}
void U2AssemblyUtils::deserializeCoverageStat(QByteArray data, U2AssemblyCoverageStat& res, U2OpStatus &os) {
- res.coverage.clear();
+ res.coverage->clear();
if(!data.isEmpty() && 0 == (data.size() % 4)) {
for(int index = 0;index < data.size()/4;index++) {
int value = 0;
for(int i = 0;i < 4;i++) {
value |= ((int)data[index*4 + i] & 0xff) << (i*8);
}
- res.coverage.append(U2Range<int>(value, value));
+ res.coverage->append(U2Range<int>(value, value));
}
} else {
os.setError("Invalid attribute size");
@@ -164,12 +167,88 @@ void U2AssemblyUtils::deserializeCoverageStat(QByteArray data, U2AssemblyCoverag
}
QVector<qint64> U2AssemblyUtils::coverageStatToVector(const U2AssemblyCoverageStat &coverageStat) {
- int size = coverageStat.coverage.size();
+ int size = coverageStat.coverage->size();
QVector<qint64> res(size);
for(int i = 0; i < size; ++i) {
- res[i] = coverageStat.coverage[i].maxValue;
+ res[i] = coverageStat.coverage->at(i).maxValue;
}
return res;
}
+U2CigarOp nextCigarOp(const QByteArray &cigarString, int &index, int &insertionsCount, U2OpStatus &os) {
+ QString errString;
+ U2CigarOp cigarOp = U2CigarOp_Invalid;
+
+ do {
+ SAFE_POINT_EXT(index < cigarString.length(), os.setError(U2AssemblyUtils::tr("Cigar string: out of bounds")), U2CigarOp_Invalid);
+ cigarOp = U2AssemblyUtils::char2Cigar(cigarString[index], errString);
+ SAFE_POINT_EXT(errString.isEmpty(), os.setError(errString), U2CigarOp_Invalid);
+
+ index++;
+
+ if (U2CigarOp_I == cigarOp || U2CigarOp_S == cigarOp) {
+ insertionsCount++;
+ }
+ } while (U2CigarOp_I == cigarOp || U2CigarOp_S == cigarOp || U2CigarOp_P == cigarOp);
+
+ return cigarOp;
+}
+
+void U2AssemblyUtils::calculateCoveragePerBase(const U2DbiRef &dbiRef, const U2DataId &assemblyId, const U2Region ®ion, QVector<CoveragePerBaseInfo> *results, U2OpStatus &os) {
+ DbiConnection con(dbiRef, os);
+ CHECK_OP(os, );
+ U2AssemblyDbi *assemblyDbi = con.dbi->getAssemblyDbi();
+ SAFE_POINT_EXT(NULL != assemblyDbi, os.setError(tr("Assembly DBI is NULL")), );
+
+ results->resize(region.length);
+
+ QScopedPointer<U2DbiIterator<U2AssemblyRead> > readsIterator(assemblyDbi->getReads(assemblyId, region, os));
+ while (readsIterator->hasNext()) {
+ const U2AssemblyRead read = readsIterator->next();
+ const qint64 startPos = qMax(read->leftmostPos, region.startPos);
+ const qint64 endPos = qMin(read->leftmostPos + read->effectiveLen, region.endPos());
+ const U2Region regionToProcess = U2Region(startPos, endPos - startPos);
+
+ // we have used effective length of the read, so insertions/deletions are already taken into account
+ // cigarString can be longer than needed
+ QByteArray cigarString;
+ foreach(const U2CigarToken &cigar, read->cigar) {
+ cigarString += QByteArray(cigar.count, U2AssemblyUtils::cigar2Char(cigar.op));
+ }
+
+ if (read->leftmostPos < regionToProcess.startPos) {
+ cigarString = cigarString.mid(regionToProcess.startPos - read->leftmostPos);
+ }
+
+ for (int positionOffset = 0, cigarOffset = 0, deletionsCount = 0, insertionsCount = 0; regionToProcess.startPos + positionOffset < regionToProcess.endPos(); positionOffset++) {
+ char currentBase = 'N';
+ CoveragePerBaseInfo &info = (*results)[regionToProcess.startPos + positionOffset - region.startPos];
+ U2CigarOp cigarOp = nextCigarOp(cigarString, cigarOffset, insertionsCount, os);
+ CHECK_OP(os, );
+
+
+ switch (cigarOp) {
+ case U2CigarOp_I:
+ case U2CigarOp_S:
+ // skip the insertion
+ continue;
+ case U2CigarOp_D:
+ // skip the deletion
+ deletionsCount++;
+ continue;
+ case U2CigarOp_N:
+ // skip the deletion
+ deletionsCount++;
+ continue;
+ default:
+ currentBase = read->readSequence[positionOffset - deletionsCount + insertionsCount];
+ break;
+ }
+ info.basesCount[currentBase] = info.basesCount[currentBase] + 1;
+ info.coverage++;
+ }
+ CHECK_OP(os, );
+ }
+}
+
} //namespace
diff --git a/src/corelibs/U2Core/src/util/U2AssemblyUtils.h b/src/corelibs/U2Core/src/util/U2AssemblyUtils.h
index 4ef7d63..7b663e1 100644
--- a/src/corelibs/U2Core/src/util/U2AssemblyUtils.h
+++ b/src/corelibs/U2Core/src/util/U2AssemblyUtils.h
@@ -38,6 +38,15 @@ namespace U2 {
/**
U2Assembly and related structures utility functions
*/
+class U2CORE_EXPORT CoveragePerBaseInfo {
+public:
+ CoveragePerBaseInfo() :
+ coverage(0) {}
+
+ int coverage;
+ QMap<char, int> basesCount;
+};
+
class U2CORE_EXPORT U2AssemblyUtils : public QObject {
Q_OBJECT
private:
@@ -96,6 +105,8 @@ public:
*/
static QVector<qint64> coverageStatToVector(const U2AssemblyCoverageStat &coverageStat);
+ static void calculateCoveragePerBase(const U2DbiRef &dbiRef, const U2DataId &assemblyId, const U2Region ®ion, QVector<CoveragePerBaseInfo> *results, U2OpStatus &os);
+
/**
Size of array of cached coverage
*/
diff --git a/src/corelibs/U2Core/src/util/U2AttributeUtils.cpp b/src/corelibs/U2Core/src/util/U2AttributeUtils.cpp
index af0ff15..1548533 100644
--- a/src/corelibs/U2Core/src/util/U2AttributeUtils.cpp
+++ b/src/corelibs/U2Core/src/util/U2AttributeUtils.cpp
@@ -19,8 +19,8 @@
* MA 02110-1301, USA.
*/
-#include <U2Core/U2AttributeUtils.h>
#include <U2Core/U2AttributeDbi.h>
+#include <U2Core/U2AttributeUtils.h>
#include <U2Core/U2OpStatus.h>
#include <U2Core/U2OpStatusUtils.h>
#include <U2Core/U2SafePoints.h>
@@ -28,8 +28,7 @@
namespace U2 {
-void U2AttributeUtils::init(U2Attribute &attr, U2Object & obj, const QString& name)
-{
+void U2AttributeUtils::init(U2Attribute &attr, const U2Object &obj, const QString &name) {
attr.objectId = obj.id;
attr.version = obj.version;
attr.name = name;
@@ -151,4 +150,4 @@ void U2AttributeUtils::copyObjectAttributes(const U2DataId &srcObjId, const U2Da
}
}
-} //namespace
+} // namespace U2
diff --git a/src/corelibs/U2Core/src/util/U2AttributeUtils.h b/src/corelibs/U2Core/src/util/U2AttributeUtils.h
index ccfe063..a3b6a4d 100644
--- a/src/corelibs/U2Core/src/util/U2AttributeUtils.h
+++ b/src/corelibs/U2Core/src/util/U2AttributeUtils.h
@@ -50,14 +50,13 @@ public:
static U2StringAttribute findStringAttribute(U2AttributeDbi* adbi, const U2DataId& objectId, const QString& name, U2OpStatus& os);
- static void init(U2Attribute &attr, U2Object & obj, const QString& name);
+ static void init(U2Attribute &attr, const U2Object &obj, const QString &name);
static void removeAttribute(U2AttributeDbi* adbi, const U2DataId& attrId, U2OpStatus& os);
static void copyObjectAttributes(const U2DataId &srcObjId, const U2DataId &dstObjId,
U2AttributeDbi *srcAttributeDbi, U2AttributeDbi *dstAttributeDbi,
U2OpStatus &os);
-
};
} //namespace
diff --git a/src/corelibs/U2Core/src/util/U2FeatureUtils.cpp b/src/corelibs/U2Core/src/util/U2FeatureUtils.cpp
index e768a2e..060282c 100644
--- a/src/corelibs/U2Core/src/util/U2FeatureUtils.cpp
+++ b/src/corelibs/U2Core/src/util/U2FeatureUtils.cpp
@@ -520,8 +520,8 @@ void U2FeatureUtils::createFeatureEntityFromAnnotationData(const SharedAnnotatio
resFeature.featureType = annotation->type;
resFeature.parentFeatureId = parentFeatureId;
resFeature.rootFeatureId = rootFeatureId;
+ resFeature.location.strand = annotation->location->strand;
if (annotation->location->isSingleRegion()) {
- resFeature.location.strand = annotation->location->strand;
resFeature.location.region = annotation->location->regions.first();
}
diff --git a/src/corelibs/U2Core/src/util/U2VariationUtils.cpp b/src/corelibs/U2Core/src/util/U2VariationUtils.cpp
index 3dbd1a7..7dc9b43 100644
--- a/src/corelibs/U2Core/src/util/U2VariationUtils.cpp
+++ b/src/corelibs/U2Core/src/util/U2VariationUtils.cpp
@@ -66,7 +66,7 @@ AnnotationData U2VariationUtils::variantToAnnotation( const U2Variant &var ) {
U2Region varRegion;
varRegion.startPos = var.startPos;
- varRegion.length = var.endPos == 0 ? 1 : ( var.endPos - var.startPos );
+ varRegion.length = var.endPos - var.startPos + 1;
d.location->regions << varRegion;
d.qualifiers << U2Qualifier( "public_id", var.publicId );
@@ -82,7 +82,7 @@ U2Feature U2VariationUtils::variantToFeature( const U2Variant& var ){
res.id = var.id;
res.name = "variation";
- res.location.region = U2Region(var.startPos, var.endPos == 0 ? 1 : var.endPos - var.startPos);
+ res.location.region = U2Region(var.startPos, var.endPos - var.startPos + 1);
return res;
}
diff --git a/src/corelibs/U2Core/transl/english.ts b/src/corelibs/U2Core/transl/english.ts
index c29f4a2..94765c9 100644
--- a/src/corelibs/U2Core/transl/english.ts
+++ b/src/corelibs/U2Core/transl/english.ts
@@ -794,7 +794,7 @@ The session database file is removed after closing of UGENE.</translation>
<context>
<name>U2::CmdlineTaskRunner</name>
<message>
- <location filename="../src/tasks/CmdlineTaskRunner.cpp" line="99"/>
+ <location filename="../src/tasks/CmdlineTaskRunner.cpp" line="98"/>
<source>Run UGENE command line: %1</source>
<translation type="unfinished"></translation>
</message>
@@ -823,6 +823,11 @@ The session database file is removed after closing of UGENE.</translation>
<source>Unknown error occurred</source>
<translation type="unfinished"></translation>
</message>
+ <message>
+ <location filename="../src/tasks/CmdlineTaskRunner.cpp" line="262"/>
+ <source>An error occurred. Process is not finished successfully.</source>
+ <translation type="unfinished"></translation>
+ </message>
</context>
<context>
<name>U2::ComplementSequenceTask</name>
diff --git a/src/corelibs/U2Core/transl/russian.ts b/src/corelibs/U2Core/transl/russian.ts
index 3df1eed..924622e 100644
--- a/src/corelibs/U2Core/transl/russian.ts
+++ b/src/corelibs/U2Core/transl/russian.ts
@@ -794,7 +794,7 @@ The session database file is removed after closing of UGENE.</source>
<context>
<name>U2::CmdlineTaskRunner</name>
<message>
- <location filename="../src/tasks/CmdlineTaskRunner.cpp" line="99"/>
+ <location filename="../src/tasks/CmdlineTaskRunner.cpp" line="98"/>
<source>Run UGENE command line: %1</source>
<translation type="unfinished"></translation>
</message>
@@ -823,6 +823,11 @@ The session database file is removed after closing of UGENE.</source>
<source>Unknown error occurred</source>
<translation type="unfinished"></translation>
</message>
+ <message>
+ <location filename="../src/tasks/CmdlineTaskRunner.cpp" line="262"/>
+ <source>An error occurred. Process is not finished successfully.</source>
+ <translation type="unfinished"></translation>
+ </message>
</context>
<context>
<name>U2::ComplementSequenceTask</name>
diff --git a/src/corelibs/U2Designer/src/BreakpointHitCountDialog.cpp b/src/corelibs/U2Designer/src/BreakpointHitCountDialog.cpp
index e349e58..8b3b2f1 100644
--- a/src/corelibs/U2Designer/src/BreakpointHitCountDialog.cpp
+++ b/src/corelibs/U2Designer/src/BreakpointHitCountDialog.cpp
@@ -57,7 +57,7 @@ BreakpointHitCountDialog::BreakpointHitCountDialog(const QStringList &hitCountCo
{
ui = new Ui_BreakpointHitCountDialog();
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17471051");
+ new HelpButton(this, ui->buttonBox, "18220911");
ui->hitConditionCombo->addItems(hitCountConditions);
ui->hitConditionCombo->setCurrentIndex(hitCountConditions.indexOf(conditionOnLaunch));
diff --git a/src/corelibs/U2Designer/src/DelegateEditors.cpp b/src/corelibs/U2Designer/src/DelegateEditors.cpp
index 0b33c6f..c8bc91d 100644
--- a/src/corelibs/U2Designer/src/DelegateEditors.cpp
+++ b/src/corelibs/U2Designer/src/DelegateEditors.cpp
@@ -467,6 +467,10 @@ URLDelegate::URLDelegate(const DelegateTags &_tags, const QString &type, bool mu
*tags() = _tags;
}
+QVariant URLDelegate::getDisplayValue(const QVariant &v) const {
+ return v.toString().isEmpty() ? QVariant(DelegateTags::getString(tags(), DelegateTags::PLACEHOLDER_TEXT)) : v;
+}
+
URLWidget * URLDelegate::createWidget(QWidget *parent) const {
URLWidget *result;
if (noFilesMode) {
diff --git a/src/corelibs/U2Designer/src/DelegateEditors.h b/src/corelibs/U2Designer/src/DelegateEditors.h
index c151084..5c1bb6a 100644
--- a/src/corelibs/U2Designer/src/DelegateEditors.h
+++ b/src/corelibs/U2Designer/src/DelegateEditors.h
@@ -90,6 +90,8 @@ public:
URLDelegate(const QString& filter, const QString& type, bool multi = false, bool isPath = false, bool saveFile = true, QObject *parent = 0, const QString &format = "", bool noFilesMode = false);
URLDelegate(const DelegateTags& tags, const QString& type, bool multi = false, bool isPath = false, bool saveFile = true, QObject *parent = 0, bool noFilesMode = false);
+ QVariant getDisplayValue(const QVariant &v) const;
+
virtual QWidget *createEditor(QWidget *parent, const QStyleOptionViewItem &option,
const QModelIndex &index) const;
virtual PropertyWidget * createWizardWidget(U2OpStatus &os, QWidget *parent) const;
diff --git a/src/corelibs/U2Designer/src/EditBreakpointLabelsDialog.cpp b/src/corelibs/U2Designer/src/EditBreakpointLabelsDialog.cpp
index 0f7324c..bce6790 100644
--- a/src/corelibs/U2Designer/src/EditBreakpointLabelsDialog.cpp
+++ b/src/corelibs/U2Designer/src/EditBreakpointLabelsDialog.cpp
@@ -44,7 +44,7 @@ EditBreakpointLabelsDialog::EditBreakpointLabelsDialog(const QStringList &existi
ui(new Ui_EditBreakpointLabelsDialog())
{
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17471051");
+ new HelpButton(this, ui->buttonBox, "18220911");
ui->buttonBox->button(QDialogButtonBox::Ok)->setText(tr("OK"));
ui->buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2Designer/src/EditMarkerGroupDialog.cpp b/src/corelibs/U2Designer/src/EditMarkerGroupDialog.cpp
index 123cc7d..1b4c014 100644
--- a/src/corelibs/U2Designer/src/EditMarkerGroupDialog.cpp
+++ b/src/corelibs/U2Designer/src/EditMarkerGroupDialog.cpp
@@ -42,7 +42,7 @@ EditMarkerGroupDialog::EditMarkerGroupDialog(bool isNew, Marker *marker, Workflo
: QDialog(parent), isNew(isNew), marker(NULL), allModel(_allModel)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470946");
+ new HelpButton(this, buttonBox, "18220806");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("OK"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
{
@@ -397,7 +397,7 @@ EditMarkerDialog::EditMarkerDialog(bool isNew, const QString &type, const QStrin
: QDialog(parent), isNew(isNew), type(type), name(name), values(values), editWidget(NULL)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470946");
+ new HelpButton(this, buttonBox, "18220806");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("OK"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
if (!isNew) {
diff --git a/src/corelibs/U2Designer/src/NewBreakpointDialog.cpp b/src/corelibs/U2Designer/src/NewBreakpointDialog.cpp
index c0ffc4c..cf0568a 100644
--- a/src/corelibs/U2Designer/src/NewBreakpointDialog.cpp
+++ b/src/corelibs/U2Designer/src/NewBreakpointDialog.cpp
@@ -30,7 +30,7 @@ NewBreakpointDialog::NewBreakpointDialog(const QStringList &elementsNames, QWidg
Qt::WindowFlags f) : QDialog(parent, f), ui(new Ui_NewBreakpointDialog())
{
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17471050");
+ new HelpButton(this, ui->buttonBox, "18220910");
ui->buttonBox->button(QDialogButtonBox::Ok)->setText(tr("OK"));
ui->buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
ui->elementCombo->addItems(elementsNames);
diff --git a/src/corelibs/U2Designer/src/NewGrouperSlotDialog.cpp b/src/corelibs/U2Designer/src/NewGrouperSlotDialog.cpp
index dcedadc..7035723 100644
--- a/src/corelibs/U2Designer/src/NewGrouperSlotDialog.cpp
+++ b/src/corelibs/U2Designer/src/NewGrouperSlotDialog.cpp
@@ -35,7 +35,7 @@ NewGrouperSlotDialog::NewGrouperSlotDialog(QWidget *parent, QList<Descriptor> &i
: QDialog(parent), inSlots(inSlots), names(names)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470944");
+ new HelpButton(this, buttonBox, "18220804");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("OK"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
@@ -105,7 +105,7 @@ AnnsActionDialog::AnnsActionDialog(QWidget *parent, GrouperSlotAction *action, Q
: ActionDialog(parent)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470944");
+ new HelpButton(this, buttonBox, "18220804");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("OK"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
@@ -152,7 +152,7 @@ SequeceActionDialog::SequeceActionDialog(QWidget *parent, GrouperSlotAction *act
: ActionDialog(parent)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470944");
+ new HelpButton(this, buttonBox, "18220804");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("OK"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
@@ -217,7 +217,7 @@ MsaActionDialog::MsaActionDialog(QWidget *parent, GrouperSlotAction *action)
: ActionDialog(parent)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470944");
+ new HelpButton(this, buttonBox, "18220804");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("OK"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
if (NULL != action) {
@@ -251,7 +251,7 @@ StringActionDialog::StringActionDialog(QWidget *parent, GrouperSlotAction *actio
: ActionDialog(parent)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470944");
+ new HelpButton(this, buttonBox, "18220804");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("OK"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
if (NULL != action) {
diff --git a/src/corelibs/U2Designer/src/dashboard/StatisticsWidget.cpp b/src/corelibs/U2Designer/src/dashboard/StatisticsWidget.cpp
index 4a8425f..fdd3414 100644
--- a/src/corelibs/U2Designer/src/dashboard/StatisticsWidget.cpp
+++ b/src/corelibs/U2Designer/src/dashboard/StatisticsWidget.cpp
@@ -62,7 +62,10 @@ QStringList StatisticsWidget::header() {
}
inline static QString timeStr(qint64 timeMks) {
- return QTime().addMSecs(timeMks/1000).toString("hh:mm:ss");
+ QDateTime t;
+ t.setTimeSpec(Qt::UTC);
+ t.setMSecsSinceEpoch(timeMks / 1000);
+ return t.toString("hh:mm:ss.zzz");
}
QList<QStringList> StatisticsWidget::data() {
diff --git a/src/corelibs/U2Designer/src/support/URLLineEdit.cpp b/src/corelibs/U2Designer/src/support/URLLineEdit.cpp
index 5ea0daf..70a338e 100644
--- a/src/corelibs/U2Designer/src/support/URLLineEdit.cpp
+++ b/src/corelibs/U2Designer/src/support/URLLineEdit.cpp
@@ -135,11 +135,18 @@ URLLineEdit::URLLineEdit(const QString &type,
bool isPath,
bool saveFile,
URLWidget *_parent)
-: QLineEdit(_parent), schemaConfig(NULL), type(type), multi(multi),
-isPath(isPath), saveFile(saveFile), parent(_parent) {
+ : QLineEdit(_parent),
+ schemaConfig(NULL),
+ type(type),
+ multi(multi),
+ isPath(isPath),
+ saveFile(saveFile),
+ parent(_parent)
+{
if (saveFile && NULL != parent) {
new BaseCompleter(new FilenameCompletionFiller(parent), this);
}
+ setPlaceholderText(DelegateTags::getString(parent->tags(), DelegateTags::PLACEHOLDER_TEXT));
}
CompletionFiller * URLLineEdit::getCompletionFillerInstance() {
diff --git a/src/corelibs/U2Formats/U2Formats.pro b/src/corelibs/U2Formats/U2Formats.pro
index 5fc82e3..4814a8d 100644
--- a/src/corelibs/U2Formats/U2Formats.pro
+++ b/src/corelibs/U2Formats/U2Formats.pro
@@ -74,6 +74,7 @@ HEADERS += src/ABIFormat.h \
src/mysql_dbi/util/upgraders/MysqlUpgraderFrom_1_14_To_1_15.h \
src/mysql_dbi/util/upgraders/MysqlUpgraderFrom_1_15_To_1_16.h \
src/mysql_dbi/util/upgraders/MysqlUpgraderFrom_1_16_To_1_17.h \
+ src/mysql_dbi/util/upgraders/MysqlUpgraderFrom_1_16_To_1_24.h \
src/sqlite_dbi/SQLiteAssemblyDbi.h \
src/sqlite_dbi/SQLiteAttributeDbi.h \
src/sqlite_dbi/SQLiteBlobInputStream.h \
@@ -95,10 +96,13 @@ HEADERS += src/ABIFormat.h \
src/tasks/BgzipTask.h \
src/tasks/ConvertAssemblyToSamTask.h \
src/tasks/ConvertFileTask.h \
+ src/tasks/ConvertSnpeffVariationsToAnnotationsTask.h \
src/tasks/MergeBamTask.h \
src/tasks/MysqlUpgradeTask.h \
src/util/AssemblyAdapter.h \
- src/util/AssemblyPackAlgorithm.h
+ src/util/AssemblyPackAlgorithm.h \
+ src/util/SnpeffInfoParser.h
+
SOURCES += src/ABIFormat.cpp \
src/AbstractVariationFormat.cpp \
src/ASNFormat.cpp \
@@ -170,6 +174,7 @@ SOURCES += src/ABIFormat.cpp \
src/mysql_dbi/util/upgraders/MysqlUpgraderFrom_1_14_To_1_15.cpp \
src/mysql_dbi/util/upgraders/MysqlUpgraderFrom_1_15_To_1_16.cpp \
src/mysql_dbi/util/upgraders/MysqlUpgraderFrom_1_16_To_1_17.cpp \
+ src/mysql_dbi/util/upgraders/MysqlUpgraderFrom_1_16_To_1_24.cpp \
src/sqlite_dbi/SQLiteAssemblyDbi.cpp \
src/sqlite_dbi/SQLiteAttributeDbi.cpp \
src/sqlite_dbi/SQLiteBlobInputStream.cpp \
@@ -191,9 +196,12 @@ SOURCES += src/ABIFormat.cpp \
src/tasks/BgzipTask.cpp \
src/tasks/ConvertAssemblyToSamTask.cpp \
src/tasks/ConvertFileTask.cpp \
+ src/tasks/ConvertSnpeffVariationsToAnnotationsTask.cpp \
src/tasks/MergeBamTask.cpp \
src/tasks/MysqlUpgradeTask.cpp \
- src/util/AssemblyPackAlgorithm.cpp
+ src/util/AssemblyPackAlgorithm.cpp \
+ src/util/SnpeffInfoParser.cpp
+
RESOURCES += U2Formats.qrc
TRANSLATIONS += transl/english.ts \
transl/russian.ts
diff --git a/src/corelibs/U2Formats/src/AbstractVariationFormat.cpp b/src/corelibs/U2Formats/src/AbstractVariationFormat.cpp
index aa99860..4c30d21 100644
--- a/src/corelibs/U2Formats/src/AbstractVariationFormat.cpp
+++ b/src/corelibs/U2Formats/src/AbstractVariationFormat.cpp
@@ -19,9 +19,12 @@
* MA 02110-1301, USA.
*/
+#include <U2Core/GAutoDeleteList.h>
#include <U2Core/IOAdapter.h>
#include <U2Core/L10n.h>
#include <U2Core/TextUtils.h>
+#include <U2Core/U2AttributeDbi.h>
+#include <U2Core/U2AttributeUtils.h>
#include <U2Core/U2DbiUtils.h>
#include <U2Core/U2ObjectDbi.h>
#include <U2Core/U2OpStatus.h>
@@ -33,11 +36,11 @@
#include "AbstractVariationFormat.h"
-#include <QtCore/QStringList>
-
namespace U2 {
-const QString AbstractVariationFormat::COMMENT_START("#");
+const QString AbstractVariationFormat::META_INFO_START = "##";
+const QString AbstractVariationFormat::HEADER_START = "#";
+const QString AbstractVariationFormat::COLUMNS_SEPARATOR = "\t";
QList<U2Variant> splitVariants(const U2Variant& v, const QList<QString>& altAllel){
QList<U2Variant> res;
@@ -55,7 +58,8 @@ QList<U2Variant> splitVariants(const U2Variant& v, const QList<QString>& altAlle
AbstractVariationFormat::AbstractVariationFormat(QObject *p, const QStringList &fileExts, bool _isSupportHeader)
-: DocumentFormat(p, DocumentFormatFlags_SW, fileExts), isSupportHeader(_isSupportHeader), sep(QString())
+ : DocumentFormat(p, DocumentFormatFlags_SW, fileExts),
+ isSupportHeader(_isSupportHeader)
{
supportedObjectTypes += GObjectTypes::VARIANT_TRACK;
formatDescription = tr("SNP formats are used to store single-nucleotide polymorphism data");
@@ -77,6 +81,18 @@ namespace {
}
}
+namespace {
+
+void addStringAttribute(U2OpStatus &os, U2Dbi *dbi, const U2VariantTrack &variantTrack, const QString &name, const QString &value) {
+ CHECK(!value.isEmpty(), );
+ U2StringAttribute attribute;
+ U2AttributeUtils::init(attribute, variantTrack, name);
+ attribute.value = value;
+ dbi->getAttributeDbi()->createStringAttribute(attribute, os);
+}
+
+}
+
#define CHR_PREFIX "chr"
Document *AbstractVariationFormat::loadDocument(IOAdapter *io, const U2DbiRef &dbiRef, const QVariantMap &fs, U2OpStatus &os) {
@@ -96,7 +112,8 @@ Document *AbstractVariationFormat::loadDocument(IOAdapter *io, const U2DbiRef &d
//TODO: load snps with chunks of fixed size to avoid memory consumption
QMap<QString, QList<U2Variant> > snpsMap;
- QString headerText;
+ QString metaInfo;
+ QStringList header;
int lineNumber = 0;
do {
@@ -107,13 +124,17 @@ Document *AbstractVariationFormat::loadDocument(IOAdapter *io, const U2DbiRef &d
continue;
}
- // skip comments
- if (line.startsWith(COMMENT_START)) {
- headerText += line + "\n";
+ if (line.startsWith(META_INFO_START)) {
+ metaInfo += line + "\n";
+ continue;
+ }
+
+ if (line.startsWith(HEADER_START)) {
+ header = line.split(COLUMNS_SEPARATOR);
continue;
}
- QStringList columns = sep.isEmpty() ? line.split(QRegExp("\\s+")) : line.split(sep);
+ QStringList columns = line.split(COLUMNS_SEPARATOR);
if (columns.size() < maxColumnNumber) {
os.addWarning(tr("Line %1: There are too few columns in this line. The line was skipped.").arg(lineNumber));
@@ -125,59 +146,53 @@ Document *AbstractVariationFormat::loadDocument(IOAdapter *io, const U2DbiRef &d
U2Variant v;
QString seqName;
- bool skipVariation = false;
- foreach (int columnNumber, columnRoles.keys()) {
- if (columns.size() <= columnNumber) {
- skipVariation = true;
- os.addWarning(tr("Line %1: There are too few columns in this line. The line was skipped.").arg(lineNumber));
+ for (int columnNumber = 0; columnNumber < columns.size(); columnNumber++) {
+ const ColumnRole columnRole = columnRoles.value(columnNumber, ColumnRole_Unknown);
+ const QString &columnData = columns[columnNumber];
+ switch (columnRole) {
+ case ColumnRole_ChromosomeId:
+ seqName = columnData;
+ break;
+ case ColumnRole_StartPos:
+ v.startPos = columnData.toInt();
+ if (indexing == AbstractVariationFormat::OneBased){
+ v.startPos -= 1;
+ }
+ break;
+ case ColumnRole_EndPos:
+ v.endPos = columnData.toInt();
+ if (indexing == AbstractVariationFormat::OneBased){
+ v.endPos -= 1;
+ }
+ break;
+ case ColumnRole_RefData:
+ v.refData = columnData.toLatin1();
+ break;
+ case ColumnRole_ObsData:
+ if (splitting == AbstractVariationFormat::Split){
+ altAllele = columnData.trimmed().split(',');
+ }else{
+ v.obsData = columnData.toLatin1();
+ }
+ break;
+ case ColumnRole_PublicId:
+ v.publicId = columnData.toLatin1();
+ break;
+ case ColumnRole_Info:
+ v.additionalInfo.insert(U2Variant::VCF4_INFO, columnData);
+ break;
+ case ColumnRole_Unknown:
+ v.additionalInfo.insert(columnNumber < header.size() ? header[columnNumber] : QString::number(columnNumber), columnData);
+ break;
+ default:
+ assert(0);
+ coreLog.trace(QString("Warning: unknown column role %1 (line %2, column %3)").arg(columnRole).arg(line).arg(columnNumber));
break;
- }
- const QString& columnData = columns.at(columnNumber);
- ColumnRole role = columnRoles.value(columnNumber);
- switch (role) {
- case ColumnRole_ChromosomeId:
- seqName = columnData;
- break;
- case ColumnRole_StartPos:
- v.startPos = columnData.toInt();
- if (indexing == AbstractVariationFormat::OneBased){
- v.startPos -= 1;
- }
- break;
- case ColumnRole_EndPos:
- v.endPos = columnData.toInt();
- if (indexing == AbstractVariationFormat::OneBased){
- v.startPos -= 1;
- }
- break;
- case ColumnRole_RefData:
- v.refData = columnData.toLatin1();
- break;
- case ColumnRole_ObsData:
- if (splitting == AbstractVariationFormat::Split){
- altAllele = columnData.trimmed().split(',');
- }else{
- v.obsData = columnData.toLatin1();
- }
- break;
- case ColumnRole_PublicId:
- v.publicId = columnData.toLatin1();
- break;
- case ColumnRole_AdditionalInfo:
- v.additionalInfo = columnData.toLatin1();
- for(int i = columnNumber + 1; i < columns.size(); i++) {
- v.additionalInfo += "\t" + columns.at(i);
- }
- break;
- default:
- assert(0);
- coreLog.trace(QString("Warning: unknown column role %1 (line %2, column %3)").arg(role).arg(line).arg(columnNumber));
- break;
}
}
- if (skipVariation) {
- continue;
+ if (!columnRoles.values().contains(ColumnRole_EndPos)) {
+ v.endPos = v.startPos + v.refData.size() - 1;
}
if (v.publicId.isEmpty()) {
@@ -199,7 +214,7 @@ Document *AbstractVariationFormat::loadDocument(IOAdapter *io, const U2DbiRef &d
} while (!io->isEof());
- QList<GObject*> objects;
+ GAutoDeleteList<GObject> objects;
QSet<QString> names;
const QString folder = fs.value(DBI_FOLDER_HINT, U2ObjectDbi::ROOT_FOLDER).toString();
@@ -207,36 +222,48 @@ Document *AbstractVariationFormat::loadDocument(IOAdapter *io, const U2DbiRef &d
if (snpsMap.isEmpty()){
U2VariantTrack track;
track.sequenceName = "unknown";
- track.fileHeader = headerText;
dbi->getVariantDbi()->createVariantTrack(track, TrackType_All, folder, os);
+ CHECK_OP(os, NULL);
+
+ addStringAttribute(os, dbi, track, U2VariantTrack::META_INFO_ATTIBUTE, metaInfo);
+ CHECK_OP(os, NULL);
+ addStringAttribute(os, dbi, track, U2VariantTrack::HEADER_ATTIBUTE, U2DbiUtils::packStringList(header));
+ CHECK_OP(os, NULL);
U2EntityRef trackRef(dbiRef, track.id);
QString objName = TextUtils::variate(track.sequenceName, "_", names);
names.insert(objName);
VariantTrackObject *trackObj = new VariantTrackObject(objName, trackRef);
- objects << trackObj;
+ objects.qlist << trackObj;
}
foreach (const QString &seqName, snpsMap.keys().toSet()) {
U2VariantTrack track;
track.visualName = "Variant track";
track.sequenceName = seqName;
- track.fileHeader = headerText;
dbi->getVariantDbi()->createVariantTrack(track, TrackType_All, folder, os);
+ CHECK_OP(os, NULL);
+
+ addStringAttribute(os, dbi, track, U2VariantTrack::META_INFO_ATTIBUTE, metaInfo);
+ CHECK_OP(os, NULL);
+ addStringAttribute(os, dbi, track, U2VariantTrack::HEADER_ATTIBUTE, U2DbiUtils::packStringList(header));
+ CHECK_OP(os, NULL);
const QList<U2Variant>& vars = snpsMap.value(seqName);
BufferedDbiIterator<U2Variant> bufIter(vars);
dbi->getVariantDbi()->addVariantsToTrack(track, &bufIter, os);
+ CHECK_OP(os, NULL);
U2EntityRef trackRef(dbiRef, track.id);
QString objName = TextUtils::variate(track.sequenceName, "_", names);
names.insert(objName);
VariantTrackObject *trackObj = new VariantTrackObject(objName, trackRef);
- objects << trackObj;
+ objects.qlist << trackObj;
}
QString lockReason;
- Document* doc = new Document(this, io->getFactory(), io->getURL(), dbiRef, objects, fs, lockReason);
+ Document* doc = new Document(this, io->getFactory(), io->getURL(), dbiRef, objects.qlist, fs, lockReason);
+ objects.qlist.clear();
return doc;
}
@@ -251,9 +278,9 @@ FormatCheckResult AbstractVariationFormat::checkRawData(const QByteArray &dataPr
continue;
}
- QString line = l.simplified();
+ QString line = l.trimmed();
idx++;
- if (line.startsWith(COMMENT_START)) {
+ if (line.startsWith(META_INFO_START)) {
bool isFormatMatched = line.contains("format=" + formatName);
if(isFormatMatched) {
return FormatDetection_Matched;
@@ -261,7 +288,7 @@ FormatCheckResult AbstractVariationFormat::checkRawData(const QByteArray &dataPr
continue;
}
- QStringList cols = line.split(QRegExp("\\s"), QString::SkipEmptyParts);
+ QStringList cols = line.split(COLUMNS_SEPARATOR, QString::SkipEmptyParts);
if (!this->checkFormatByColumnCount(cols.size())) {
return FormatDetection_NotMatched;
}
@@ -308,14 +335,14 @@ FormatCheckResult AbstractVariationFormat::checkRawData(const QByteArray &dataPr
}
void AbstractVariationFormat::storeDocument(Document *doc, IOAdapter *io, U2OpStatus &os) {
- if(!doc->getObjects().isEmpty()) {
- storeHeader(doc->getObjects().at(0), io, os);
+ const QList<GObject *> variantTrackObjects = doc->findGObjectByType(GObjectTypes::VARIANT_TRACK);
+ if(!variantTrackObjects.isEmpty()) {
+ storeHeader(variantTrackObjects.first(), io, os);
}
- foreach (GObject *obj, doc->getObjects()) {
- if (GObjectTypes::VARIANT_TRACK != obj->getGObjectType()) {
- continue;
- }
- VariantTrackObject *trackObj = dynamic_cast<VariantTrackObject*>(obj);
+
+ foreach (GObject *obj, variantTrackObjects) {
+ VariantTrackObject *trackObj = qobject_cast<VariantTrackObject *>(obj);
+ SAFE_POINT_EXT(NULL != trackObj, os.setError("Can't cast GObject to VariantTrackObject"), );
storeTrack(io, trackObj, os);
}
}
@@ -335,7 +362,10 @@ void AbstractVariationFormat::storeTrack(IOAdapter *io, const VariantTrackObject
CHECK(NULL != trackObj, );
U2VariantTrack track = trackObj->getVariantTrack(os);
CHECK_OP(os, );
- QScopedPointer< U2DbiIterator<U2Variant> > varsIter(trackObj->getVariants(U2_REGION_MAX, os));
+ QScopedPointer<U2DbiIterator<U2Variant> > varsIter(trackObj->getVariants(U2_REGION_MAX, os));
+ CHECK_OP(os, );
+
+ const QStringList header = getHeader(trackObj, os);
CHECK_OP(os, );
QByteArray snpString;
@@ -343,54 +373,74 @@ void AbstractVariationFormat::storeTrack(IOAdapter *io, const VariantTrackObject
U2Variant variant = varsIter->next();
snpString.clear();
- bool first = true;
- foreach (int columnNumber, columnRoles.keys()) {
- if (first) {
- first = false;
- } else {
- snpString += "\t";
+ for (int columnNumber = 0; columnNumber <= maxColumnNumber; columnNumber++) {
+ if (columnNumber != 0) {
+ snpString += COLUMNS_SEPARATOR;
}
- ColumnRole role = columnRoles.value(columnNumber);
+
+ ColumnRole role = columnRoles.value(columnNumber, ColumnRole_Unknown);
switch (role) {
- case ColumnRole_ChromosomeId:
- snpString += track.sequenceName;
- break;
- case ColumnRole_StartPos:
- if (indexing == AbstractVariationFormat::OneBased){
- snpString += QByteArray::number(variant.startPos + 1);
- }else if (indexing == AbstractVariationFormat::ZeroBased){
- snpString += QByteArray::number(variant.startPos);
- }else{
- assert(0);
- }
-
- break;
- case ColumnRole_EndPos:
- if (indexing == AbstractVariationFormat::OneBased){
- snpString += QByteArray::number(variant.endPos + 1);
- }else if (indexing == AbstractVariationFormat::ZeroBased){
- snpString += QByteArray::number(variant.endPos);
- }else{
- assert(0);
- }
- break;
- case ColumnRole_RefData:
- snpString += variant.refData;
- break;
- case ColumnRole_ObsData:
- snpString += variant.obsData;
- break;
- case ColumnRole_PublicId:
- snpString += variant.publicId;
- break;
- case ColumnRole_AdditionalInfo:
- snpString += variant.additionalInfo;
- break;
- default:
- coreLog.trace("Warning: unknown column role (%, line %, column %)");
- break;
+ case ColumnRole_ChromosomeId:
+ snpString += track.sequenceName;
+ break;
+ case ColumnRole_StartPos:
+ switch (indexing) {
+ case AbstractVariationFormat::OneBased:
+ snpString += QByteArray::number(variant.startPos + 1);
+ break;
+ case AbstractVariationFormat::ZeroBased:
+ snpString += QByteArray::number(variant.startPos);
+ break;
+ default:
+ assert(0);
+ }
+ break;
+ case ColumnRole_EndPos:
+ switch (indexing) {
+ case AbstractVariationFormat::OneBased:
+ snpString += QByteArray::number(variant.endPos + 1);
+ break;
+ case AbstractVariationFormat::ZeroBased:
+ snpString += QByteArray::number(variant.endPos);
+ break;
+ default:
+ assert(0);
+ }
+ break;
+ case ColumnRole_RefData:
+ snpString += variant.refData;
+ break;
+ case ColumnRole_ObsData:
+ snpString += variant.obsData;
+ break;
+ case ColumnRole_PublicId:
+ snpString += variant.publicId;
+ break;
+ case ColumnRole_Info:
+ snpString += variant.additionalInfo.value(U2Variant::VCF4_INFO, ".");
+ break;
+ case ColumnRole_Unknown: {
+ const QString columnTitle = columnNumber < header.size() ? header[columnNumber] : QString::number(columnNumber);
+ snpString += variant.additionalInfo.value(columnTitle, ".");
+ break;
+ }
+ default:
+ coreLog.trace("Warning: unknown column role (%, line %, column %)");
+ break;
+ }
+ }
+
+ for (int i = maxColumnNumber + 1; i < header.size(); i++) {
+ snpString += COLUMNS_SEPARATOR + variant.additionalInfo.value(header[i], ".").toLatin1();
+ }
+
+ for (int i = qMax(maxColumnNumber + 1, header.size()); i <= maxColumnNumber + variant.additionalInfo.size(); i++) {
+ if (!variant.additionalInfo.contains(QString::number(i))) {
+ break;
}
+ snpString += COLUMNS_SEPARATOR + variant.additionalInfo[QString::number(i)].toLatin1();
}
+
snpString += "\n";
io->writeBlock(snpString);
}
@@ -398,17 +448,37 @@ void AbstractVariationFormat::storeTrack(IOAdapter *io, const VariantTrackObject
void AbstractVariationFormat::storeHeader(GObject *obj, IOAdapter *io, U2OpStatus &os) {
CHECK(isSupportHeader, );
- CHECK(NULL != obj, );
+ SAFE_POINT_EXT(NULL != obj, os.setError("NULL object"), );
- CHECK(GObjectTypes::VARIANT_TRACK == obj->getGObjectType(), );
+ SAFE_POINT_EXT(GObjectTypes::VARIANT_TRACK == obj->getGObjectType(), os.setError("Invalid GObjectType"), );
- VariantTrackObject *trackObj = dynamic_cast<VariantTrackObject*>(obj);
- CHECK(NULL != trackObj, );
+ VariantTrackObject *trackObj = qobject_cast<VariantTrackObject*>(obj);
+ SAFE_POINT_EXT(NULL != trackObj, os.setError("Can't cast GObject to VariantTrackObject"), );
- U2VariantTrack track = trackObj->getVariantTrack(os);
+ const QString metaInfo = getMetaInfo(trackObj, os);
+ CHECK_OP(os, );
+ if (!metaInfo.isEmpty()) {
+ io->writeBlock(metaInfo.toLatin1());
+ }
+
+ const QStringList header = getHeader(trackObj, os);
CHECK_OP(os, );
+ if (!header.isEmpty()) {
+ io->writeBlock(header.join(COLUMNS_SEPARATOR).toLatin1() + "\n");
+ }
+}
+
+QString AbstractVariationFormat::getMetaInfo(const VariantTrackObject *variantTrackObject, U2OpStatus &os) {
+ DbiConnection connection(variantTrackObject->getEntityRef().dbiRef, os);
+ CHECK_OP(os, "");
+ return U2AttributeUtils::findStringAttribute(connection.dbi->getAttributeDbi(), variantTrackObject->getEntityRef().entityId, U2VariantTrack::META_INFO_ATTIBUTE, os).value;
+}
- io->writeBlock(track.fileHeader.toLatin1());
+QStringList AbstractVariationFormat::getHeader(const VariantTrackObject *variantTrackObject, U2OpStatus &os) {
+ DbiConnection connection(variantTrackObject->getEntityRef().dbiRef, os);
+ CHECK_OP(os, QStringList());
+ const QString packedHeader = U2AttributeUtils::findStringAttribute(connection.dbi->getAttributeDbi(), variantTrackObject->getEntityRef().entityId, U2VariantTrack::HEADER_ATTIBUTE, os).value;
+ return U2DbiUtils::unpackStringList(packedHeader);
}
} // U2
diff --git a/src/corelibs/U2Formats/src/AbstractVariationFormat.h b/src/corelibs/U2Formats/src/AbstractVariationFormat.h
index 0ae2b64..893ba0d 100644
--- a/src/corelibs/U2Formats/src/AbstractVariationFormat.h
+++ b/src/corelibs/U2Formats/src/AbstractVariationFormat.h
@@ -39,7 +39,7 @@ public:
ColumnRole_PublicId,
ColumnRole_ChromosomeId,
ColumnRole_Comment,
- ColumnRole_AdditionalInfo
+ ColumnRole_Info
};
enum PositionIndexing{
@@ -69,7 +69,6 @@ protected:
bool isSupportHeader;
QMap<int,ColumnRole> columnRoles;
- QString sep;
int maxColumnNumber;
PositionIndexing indexing;
@@ -77,10 +76,15 @@ protected:
virtual Document *loadDocument(IOAdapter *io, const U2DbiRef &dbiRef, const QVariantMap &fs, U2OpStatus &os);
virtual bool checkFormatByColumnCount(int columnCount) const = 0;
- static const QString COMMENT_START;
+ static const QString META_INFO_START;
+ static const QString HEADER_START;
+ static const QString COLUMNS_SEPARATOR;
private:
void storeTrack(IOAdapter *io, const VariantTrackObject *trackObj, U2OpStatus &os);
+
+ static QString getMetaInfo(const VariantTrackObject *variantTrackObject, U2OpStatus &os);
+ static QStringList getHeader(const VariantTrackObject *variantTrackObject, U2OpStatus &os);
};
} // U2
diff --git a/src/corelibs/U2Formats/src/BAMUtils.cpp b/src/corelibs/U2Formats/src/BAMUtils.cpp
index 3b8a8e0..e02f03b 100644
--- a/src/corelibs/U2Formats/src/BAMUtils.cpp
+++ b/src/corelibs/U2Formats/src/BAMUtils.cpp
@@ -767,10 +767,14 @@ KSEQ_INIT(gzFile, gzread)
#pragma GCC diagnostic pop
#endif
-FASTQIterator::FASTQIterator(const QString &fileUrl)
+FASTQIterator::FASTQIterator(const QString &fileUrl, U2OpStatus &os)
: seq(NULL)
{
- fp = gzopen(fileUrl.toLatin1().constData(), "r");
+ fp = gzopen(fileUrl.toLocal8Bit().constData(), "r");
+ if (fp == NULL) {
+ os.setError(QObject::tr("Can't open file with given url: %1.").arg(fileUrl));
+ return;
+ }
seq = kseq_init(static_cast<gzFile>(fp));
fetchNext();
}
diff --git a/src/corelibs/U2Formats/src/BAMUtils.h b/src/corelibs/U2Formats/src/BAMUtils.h
index 2effc64..a92fe6e 100644
--- a/src/corelibs/U2Formats/src/BAMUtils.h
+++ b/src/corelibs/U2Formats/src/BAMUtils.h
@@ -89,7 +89,7 @@ public:
//iterates over a FASTQ file (including zipped) with kseq from samtools
class U2FORMATS_EXPORT FASTQIterator {
public:
- FASTQIterator(const QString& fileUrl);
+ FASTQIterator(const QString& fileUrl, U2OpStatus &os);
virtual ~FASTQIterator();
DNASequence next();
diff --git a/src/corelibs/U2Formats/src/SAMFormat.cpp b/src/corelibs/U2Formats/src/SAMFormat.cpp
index a45d26e..0a789be 100644
--- a/src/corelibs/U2Formats/src/SAMFormat.cpp
+++ b/src/corelibs/U2Formats/src/SAMFormat.cpp
@@ -87,7 +87,7 @@ bool SAMFormat::validateField(int num, QByteArray &field, U2OpStatus *ti) {
return true;
}
-SAMFormat::SAMFormat( QObject* p ): DocumentFormat(p, DocumentFormatFlag_SupportWriting, QStringList()<< "sam")
+SAMFormat::SAMFormat( QObject* p ): DocumentFormat(p, DocumentFormatFlags(DocumentFormatFlag_SupportWriting | DocumentFormatFlag_CannotBeCompressed), QStringList()<< "sam")
{
formatName = tr("SAM");
formatDescription = tr("The Sequence Alignment/Map (SAM) format is a generic alignment format for"
diff --git a/src/corelibs/U2Formats/src/StockholmFormat.cpp b/src/corelibs/U2Formats/src/StockholmFormat.cpp
index daed886..392d021 100644
--- a/src/corelibs/U2Formats/src/StockholmFormat.cpp
+++ b/src/corelibs/U2Formats/src/StockholmFormat.cpp
@@ -621,7 +621,7 @@ static QByteArray getNameSeqGap( int diff ) {
return ret;
}
-static void save( IOAdapter* io, const MAlignment& msa, const QString& name, U2OpStatus &os ) {
+static void save( IOAdapter* io, const MAlignment& msa, QString name, U2OpStatus &os ) {
assert( NULL != io );
assert( msa.getNumRows() );
int ret = 0;
@@ -632,7 +632,7 @@ static void save( IOAdapter* io, const MAlignment& msa, const QString& name, U2O
QByteArray unimark = StockholmFormat::UNI_ANNOTATION_MARK + "\n\n";
ret = io->writeBlock( unimark );
checkValThrowException<int>( true, unimark.size(), ret, StockholmFormat::WriteError(io->getURL()) );
- QByteArray idAnn = StockholmFormat::FILE_ANNOTATION_ID + " " + name.toLatin1() + "\n\n";
+ QByteArray idAnn = StockholmFormat::FILE_ANNOTATION_ID + " " + name.replace(QRegExp("\\s"), "_").toLatin1() + "\n\n";
ret = io->writeBlock( idAnn );
checkValThrowException<int>( true, idAnn.size(), ret, StockholmFormat::WriteError(io->getURL()) );
diff --git a/src/corelibs/U2Formats/src/VCF4VariationFormat.cpp b/src/corelibs/U2Formats/src/VCF4VariationFormat.cpp
index 20bae88..eae31cc 100644
--- a/src/corelibs/U2Formats/src/VCF4VariationFormat.cpp
+++ b/src/corelibs/U2Formats/src/VCF4VariationFormat.cpp
@@ -33,7 +33,7 @@ VCF4VariationFormat::VCF4VariationFormat(QObject *p)
columnRoles.insert(2, ColumnRole_PublicId);
columnRoles.insert(3, ColumnRole_RefData);
columnRoles.insert(4, ColumnRole_ObsData);
- columnRoles.insert(5, ColumnRole_AdditionalInfo);
+ columnRoles.insert(7, ColumnRole_Info);
maxColumnNumber = columnRoles.keys().last();
diff --git a/src/corelibs/U2Formats/src/ace/ConvertAceToSqliteTask.cpp b/src/corelibs/U2Formats/src/ace/ConvertAceToSqliteTask.cpp
index 9e5cc5b..e360616 100644
--- a/src/corelibs/U2Formats/src/ace/ConvertAceToSqliteTask.cpp
+++ b/src/corelibs/U2Formats/src/ace/ConvertAceToSqliteTask.cpp
@@ -285,7 +285,8 @@ void ConvertAceToSqliteTask::updateAttributeDbi() {
attrDbi->createIntegerAttribute(countReadsAttr, stateInfo);
CHECK_OP(stateInfo, );
}
- if (!coverageStat.coverage.isEmpty()) {
+ /*
+ if (!coverageStat.coverage->isEmpty()) {
U2ByteArrayAttribute attribute;
attribute.objectId = assembly.id;
attribute.name = U2BaseAttributeName::coverage_statistics;
@@ -294,6 +295,7 @@ void ConvertAceToSqliteTask::updateAttributeDbi() {
attrDbi->createByteArrayAttribute(attribute, stateInfo);
CHECK_OP(stateInfo, );
}
+ */
stateInfo.setProgress(stateInfo.getProgress() + progressStep);
}
}
diff --git a/src/corelibs/U2Formats/src/mysql_dbi/MysqlDbi.cpp b/src/corelibs/U2Formats/src/mysql_dbi/MysqlDbi.cpp
index 61972ab..abaeca1 100644
--- a/src/corelibs/U2Formats/src/mysql_dbi/MysqlDbi.cpp
+++ b/src/corelibs/U2Formats/src/mysql_dbi/MysqlDbi.cpp
@@ -42,6 +42,7 @@
#include "util/upgraders/MysqlUpgraderFrom_1_14_To_1_15.h"
#include "util/upgraders/MysqlUpgraderFrom_1_15_To_1_16.h"
#include "util/upgraders/MysqlUpgraderFrom_1_16_To_1_17.h"
+#include "util/upgraders/MysqlUpgraderFrom_1_16_To_1_24.h"
namespace U2 {
@@ -67,6 +68,7 @@ MysqlDbi::MysqlDbi()
upgraders << new MysqlUpgraderFrom_1_14_To_1_15(this);
upgraders << new MysqlUpgraderFrom_1_15_To_1_16(this);
upgraders << new MysqlUpgraderFrom_1_16_To_1_17(this);
+ upgraders << new MysqlUpgraderFrom_1_16_To_1_24(this);
}
MysqlDbi::~MysqlDbi() {
diff --git a/src/corelibs/U2Formats/src/mysql_dbi/MysqlVariantDbi.cpp b/src/corelibs/U2Formats/src/mysql_dbi/MysqlVariantDbi.cpp
index 3a01606..742d49d 100644
--- a/src/corelibs/U2Formats/src/mysql_dbi/MysqlVariantDbi.cpp
+++ b/src/corelibs/U2Formats/src/mysql_dbi/MysqlVariantDbi.cpp
@@ -42,7 +42,7 @@ public:
res.refData = q->getBlob(3);
res.obsData = q->getBlob(4);
res.publicId = q->getString(5);
- res.additionalInfo = q->getString(6);
+ res.additionalInfo = U2DbiUtils::unpackMap(q->getString(6));
return res;
}
@@ -199,7 +199,7 @@ void MysqlVariantDbi::addVariantsToTrack(const U2VariantTrack& track, U2DbiItera
q.bindBlob(":refData", var.refData);
q.bindBlob(":obsData", var.obsData);
q.bindString(":publicId", var.publicId);
- q.bindString(":additionalInfo", var.additionalInfo);
+ q.bindString(":additionalInfo", U2DbiUtils::packMap(var.additionalInfo));
var.id = q.insert(U2Type::VariantType);
CHECK_OP(os, );
diff --git a/src/corelibs/U2Formats/src/mysql_dbi/util/MysqlAssemblyUtils.cpp b/src/corelibs/U2Formats/src/mysql_dbi/util/MysqlAssemblyUtils.cpp
index 5c34482..0de8276 100644
--- a/src/corelibs/U2Formats/src/mysql_dbi/util/MysqlAssemblyUtils.cpp
+++ b/src/corelibs/U2Formats/src/mysql_dbi/util/MysqlAssemblyUtils.cpp
@@ -207,10 +207,10 @@ void MysqlAssemblyUtils::unpackData(const QByteArray& packedData, U2AssemblyRead
}
void MysqlAssemblyUtils::calculateCoverage(U2SqlQuery& q, const U2Region& r, U2AssemblyCoverageStat& c, U2OpStatus& os) {
- int csize = c.coverage.size();
+ int csize = c.coverage->size();
SAFE_POINT(csize > 0, "illegal coverage vector size!", );
- U2Range<int>* cdata = c.coverage.data();
+ U2Range<int>* cdata = c.coverage->data();
double basesPerRange = double(r.length) / csize;
while (q.step() && !os.isCoR()) {
qint64 startPos = q.getInt64(0);
@@ -236,7 +236,7 @@ void MysqlAssemblyUtils::addToCoverage(U2AssemblyCoverageImportInfo& ii, const U
return;
}
- int csize = ii.coverage.coverage.size();
+ int csize = ii.coverage.coverage->size();
int startPos = (int)(read->leftmostPos / ii.coverageBasesPerPoint);
int endPos = (int)((read->leftmostPos + read->effectiveLen - 1) / ii.coverageBasesPerPoint);
@@ -244,7 +244,7 @@ void MysqlAssemblyUtils::addToCoverage(U2AssemblyCoverageImportInfo& ii, const U
coreLog.trace(QString("addToCoverage: endPos > csize - 1: %1 > %2").arg(endPos).arg(csize-1));
endPos = csize - 1;
}
- U2Range<int>* coverageData = ii.coverage.coverage.data();
+ U2Range<int>* coverageData = ii.coverage.coverage->data();
for (int i = startPos; i <= endPos && i < csize; i++) {
coverageData[i].minValue++;
coverageData[i].maxValue++;
diff --git a/src/corelibs/U2Formats/src/mysql_dbi/util/upgraders/MysqlUpgraderFrom_1_16_To_1_24.cpp b/src/corelibs/U2Formats/src/mysql_dbi/util/upgraders/MysqlUpgraderFrom_1_16_To_1_24.cpp
new file mode 100644
index 0000000..e0de2cb
--- /dev/null
+++ b/src/corelibs/U2Formats/src/mysql_dbi/util/upgraders/MysqlUpgraderFrom_1_16_To_1_24.cpp
@@ -0,0 +1,217 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <U2Core/U2AttributeUtils.h>
+
+#include "MysqlUpgraderFrom_1_16_To_1_24.h"
+#include "mysql_dbi/MysqlDbi.h"
+#include "mysql_dbi/MysqlObjectDbi.h"
+#include "mysql_dbi/util/MysqlHelpers.h"
+
+namespace U2 {
+
+const QString MysqlUpgraderFrom_1_16_To_1_24::META_INFO_MARKER = "##";
+const QString MysqlUpgraderFrom_1_16_To_1_24::HEADER_MARKER = "#";
+const QString MysqlUpgraderFrom_1_16_To_1_24::COLUMN_SEPARATOR = "\t";
+
+MysqlUpgraderFrom_1_16_To_1_24::MysqlUpgraderFrom_1_16_To_1_24(MysqlDbi *dbi)
+ : MysqlUpgrader(Version::parseVersion("1.16.0"), Version::parseVersion("1.24.0"), dbi)
+{
+
+}
+
+void MysqlUpgraderFrom_1_16_To_1_24::upgrade(U2OpStatus &os) const {
+ MysqlTransaction t(dbi->getDbRef(), os);
+ Q_UNUSED(t);
+
+ upgradeVariantDbi(os);
+ CHECK_OP(os, );
+
+ dbi->setProperty(U2DbiOptions::APP_MIN_COMPATIBLE_VERSION, versionTo.text, os);
+}
+
+void MysqlUpgraderFrom_1_16_To_1_24::upgradeVariantDbi(U2OpStatus &os) const {
+ coreLog.trace("Variant DBI upgrading");
+
+ MysqlTransaction t(dbi->getDbRef(), os);
+ Q_UNUSED(t);
+
+ QMap<U2DataId, QStringList> trackId2header;
+
+ extractAttributes(os, trackId2header);
+ CHECK_OP(os, );
+
+ repackInfo(os, trackId2header);
+ CHECK_OP(os, );
+
+ updateScheme(os);
+}
+
+namespace {
+
+QString convertInfo(const QString &additionalInfo, const QStringList &header) {
+ QStrStrMap convertedInfoMap;
+ CHECK(!additionalInfo.isEmpty(), QString());
+ QStringList splittedInfo = additionalInfo.split("\t", QString::SkipEmptyParts);
+ CHECK(!splittedInfo.isEmpty(), QString());
+
+ convertedInfoMap.insert(U2Variant::VCF4_QUAL, splittedInfo.takeFirst());
+
+ if (!splittedInfo.isEmpty()) {
+ convertedInfoMap.insert(U2Variant::VCF4_FILTER, splittedInfo.takeFirst());
+ }
+
+ if (!splittedInfo.isEmpty()) {
+ convertedInfoMap.insert(U2Variant::VCF4_INFO, splittedInfo.takeFirst());
+ }
+
+ static const int maxVcf4MandatoryColumnNumber = 7; // VCF4 format supposes 8 mandatory columns
+ for (int i = maxVcf4MandatoryColumnNumber + 1; i < header.size(); i++) {
+ convertedInfoMap.insert(header[i], splittedInfo.isEmpty() ? "." : splittedInfo.takeFirst());
+ }
+
+ if (!splittedInfo.isEmpty()) {
+ // There is no possibility to split the data correctly, because it was splitted by spaces not by tabulations
+ convertedInfoMap.insert(QString::number(qMax(maxVcf4MandatoryColumnNumber, header.size()) + 1), splittedInfo.join("\t"));
+ }
+
+ return U2DbiUtils::packMap(convertedInfoMap);
+}
+
+}
+
+void MysqlUpgraderFrom_1_16_To_1_24::repackInfo(U2OpStatus &os, const QMap<U2DataId, QStringList> &trackId2header) const {
+ coreLog.trace("Additional info repacking");
+
+ MysqlTransaction t(dbi->getDbRef(), os);
+ Q_UNUSED(t);
+
+ const qint64 variantsCount = U2SqlQuery("SELECT count(*) from Variant", dbi->getDbRef(), os).selectInt64();
+
+ static QString getQueryString ("SELECT id, track, additionalInfo FROM Variant");
+ static QString setQueryString ("UPDATE Variant SET additionalInfo = :additionalInfo WHERE id = :id");
+ U2SqlQuery getQuery(getQueryString, dbi->getDbRef(), os);
+ U2SqlQuery setQuery(setQueryString, dbi->getDbRef(), os);
+
+ QSet<U2DataId> trackIds;
+
+ qint64 number = 0;
+ while (getQuery.step()) {
+ CHECK_OP(os, );
+ const qint64 dbiId = getQuery.getInt64(0);
+ const QString additionalInfo = getQuery.getString(2);
+ const U2DataId trackId = getQuery.getDataId(1, U2Type::VariantTrack);
+ trackIds << trackId;
+
+ const QString convertedInfo = convertInfo(additionalInfo, trackId2header[trackId]);
+
+ setQuery.bindString(":additionalInfo", convertedInfo);
+ setQuery.bindInt64(":id", dbiId);
+ setQuery.execute();
+ CHECK_OP(os, );
+
+ number++;
+ if (number % 100 == 0) {
+ coreLog.trace(QString("Variants processed: %1/%2").arg(number).arg(variantsCount));
+ }
+ }
+
+ if (number % 100 != 0) {
+ coreLog.trace(QString("Variants processed: %1/%2").arg(number).arg(variantsCount));
+ }
+
+ number = 0;
+ foreach (const U2DataId &trackId, trackIds) {
+ MysqlObjectDbi::incrementVersion(trackId, dbi->getDbRef(), os);
+ CHECK_OP(os, );
+
+ number++;
+ if (number % 10 == 0) {
+ coreLog.trace(QString("Object versions processed: %1/%2").arg(number).arg(trackIds.size()));
+ }
+ }
+
+ if (number % 10 != 0) {
+ coreLog.trace(QString("Object versions processed: %1/%2").arg(number).arg(trackIds.size()));
+ }
+}
+
+void MysqlUpgraderFrom_1_16_To_1_24::extractAttributes(U2OpStatus &os, QMap<U2DataId, QStringList> &trackId2header) const {
+ coreLog.trace("Attributes extracting");
+
+ const qint64 tracksCount = U2SqlQuery("SELECT count(*) from VariantTrack", dbi->getDbRef(), os).selectInt64();
+ CHECK_OP(os, );
+
+ QScopedPointer<U2DbiIterator<U2VariantTrack> > variantTracksIterator(dbi->getVariantDbi()->getVariantTracks(TrackType_All, os));
+ CHECK_OP(os, );
+
+ qint64 number = 0;
+ while (variantTracksIterator->hasNext()) {
+ U2VariantTrack variantTrack = variantTracksIterator->next();
+ CHECK_OP(os, );
+
+ QString metaInfo;
+ QStringList header;
+ splitFileHeader(variantTrack.fileHeader, metaInfo, header);
+
+ trackId2header.insert(variantTrack.id, header);
+
+ addStringAttribute(os, variantTrack, U2VariantTrack::META_INFO_ATTIBUTE, metaInfo);
+ CHECK_OP(os, );
+ addStringAttribute(os, variantTrack, U2VariantTrack::HEADER_ATTIBUTE, U2DbiUtils::packStringList(header));
+ CHECK_OP(os, );
+
+ number++;
+ if (number % 10 == 0) {
+ coreLog.trace(QString("Variant tracks processed: %1/%2").arg(number).arg(tracksCount));
+ }
+ }
+
+ if (number % 10 != 0) {
+ coreLog.trace(QString("Variant tracks processed: %1/%2").arg(number).arg(tracksCount));
+ }
+}
+
+void MysqlUpgraderFrom_1_16_To_1_24::updateScheme(U2OpStatus &os) const {
+ coreLog.trace("Scheme updating");
+ U2SqlQuery("ALTER TABLE VariantTrack DROP COLUMN fileHeader;", dbi->getDbRef(), os).execute();
+}
+
+void MysqlUpgraderFrom_1_16_To_1_24::addStringAttribute(U2OpStatus &os, const U2VariantTrack &variantTrack, const QString &attributeName, const QString &attributeValue) const {
+ CHECK(!attributeValue.isEmpty(), );
+ U2StringAttribute attribute;
+ U2AttributeUtils::init(attribute, variantTrack, attributeName);
+ attribute.value = attributeValue;
+ dbi->getAttributeDbi()->createStringAttribute(attribute, os);
+}
+
+void MysqlUpgraderFrom_1_16_To_1_24::splitFileHeader(const QString &fileHeader, QString &metaInfo, QStringList &header) {
+ const QStringList lines = fileHeader.split(QRegExp("\\n\\r?"), QString::SkipEmptyParts);
+ foreach (const QString &line, lines) {
+ if (line.startsWith(META_INFO_MARKER)) {
+ metaInfo += line + "\n";
+ } else if (line.startsWith(HEADER_MARKER)) {
+ header = line.split(COLUMN_SEPARATOR);
+ }
+ }
+}
+
+} // namespace U2
diff --git a/src/corelibs/U2Formats/src/mysql_dbi/util/upgraders/MysqlUpgraderFrom_1_16_To_1_24.h b/src/corelibs/U2Formats/src/mysql_dbi/util/upgraders/MysqlUpgraderFrom_1_16_To_1_24.h
new file mode 100644
index 0000000..938a27f
--- /dev/null
+++ b/src/corelibs/U2Formats/src/mysql_dbi/util/upgraders/MysqlUpgraderFrom_1_16_To_1_24.h
@@ -0,0 +1,54 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef _U2_MYSQL_UPGRADER_FROM_1_17_TO_1_24_H_
+#define _U2_MYSQL_UPGRADER_FROM_1_17_TO_1_24_H_
+
+#include "MysqlUpgrader.h"
+
+namespace U2 {
+
+class MysqlDbRef;
+class U2VariantTrack;
+
+class MysqlUpgraderFrom_1_16_To_1_24 : public MysqlUpgrader {
+public:
+ MysqlUpgraderFrom_1_16_To_1_24(MysqlDbi *dbi);
+
+ void upgrade(U2OpStatus &os) const;
+
+private:
+ void upgradeVariantDbi(U2OpStatus &os) const;
+ void repackInfo(U2OpStatus &os, const QMap<U2DataId, QStringList> &trackId2header) const;
+ void extractAttributes(U2OpStatus &os, QMap<U2DataId, QStringList> &trackId2header) const;
+ void updateScheme(U2OpStatus &os) const;
+ void addStringAttribute(U2OpStatus &os, const U2VariantTrack &variantTrack, const QString &attributeName, const QString &attributeValue) const;
+
+ static void splitFileHeader(const QString &fileHeader, QString &metaInfo, QStringList &header);
+
+ static const QString META_INFO_MARKER;
+ static const QString HEADER_MARKER;
+ static const QString COLUMN_SEPARATOR;
+};
+
+} // namespace U2
+
+#endif // _U2_MYSQL_UPGRADER_FROM_1_17_TO_1_24_H_
diff --git a/src/corelibs/U2Formats/src/sqlite_dbi/SQLiteAssemblyDbi.cpp b/src/corelibs/U2Formats/src/sqlite_dbi/SQLiteAssemblyDbi.cpp
index e727097..cb5457f 100644
--- a/src/corelibs/U2Formats/src/sqlite_dbi/SQLiteAssemblyDbi.cpp
+++ b/src/corelibs/U2Formats/src/sqlite_dbi/SQLiteAssemblyDbi.cpp
@@ -544,10 +544,10 @@ void SQLiteAssemblyUtils::unpackData(const QByteArray& packedData, U2AssemblyRea
}
void SQLiteAssemblyUtils::calculateCoverage(SQLiteQuery& q, const U2Region& r, U2AssemblyCoverageStat& c, U2OpStatus& os) {
- int csize = c.coverage.size();
+ int csize = c.coverage->size();
SAFE_POINT(csize > 0, "illegal coverage vector size!", );
- U2Range<int>* cdata = c.coverage.data();
+ U2Range<int>* cdata = c.coverage->data();
double basesPerRange = double(r.length) / csize;
while (q.step() && !os.isCoR()) {
qint64 startPos = q.getInt64(0);
@@ -572,7 +572,7 @@ void SQLiteAssemblyUtils::addToCoverage(U2AssemblyCoverageImportInfo& ii, const
if (!ii.computeCoverage) {
return;
}
- int csize = ii.coverage.coverage.size();
+ int csize = ii.coverage.coverage->size();
int startPos = (int)(read->leftmostPos / ii.coverageBasesPerPoint);
int endPos = (int)((read->leftmostPos + read->effectiveLen - 1) / ii.coverageBasesPerPoint);
@@ -580,7 +580,7 @@ void SQLiteAssemblyUtils::addToCoverage(U2AssemblyCoverageImportInfo& ii, const
coreLog.trace(QString("addToCoverage: endPos > csize - 1: %1 > %2").arg(endPos).arg(csize-1));
endPos = csize - 1;
}
- U2Range<int>* coverageData = ii.coverage.coverage.data();
+ U2Range<int>* coverageData = ii.coverage.coverage->data();
for (int i = startPos; i <= endPos && i < csize; i++) {
coverageData[i].minValue++;
coverageData[i].maxValue++;
diff --git a/src/corelibs/U2Formats/src/sqlite_dbi/SQLiteVariantDbi.cpp b/src/corelibs/U2Formats/src/sqlite_dbi/SQLiteVariantDbi.cpp
index c2a7149..dddf9f9 100644
--- a/src/corelibs/U2Formats/src/sqlite_dbi/SQLiteVariantDbi.cpp
+++ b/src/corelibs/U2Formats/src/sqlite_dbi/SQLiteVariantDbi.cpp
@@ -110,7 +110,7 @@ void SQLiteVariantDbi::addVariantsToTrack(const U2VariantTrack& track, U2DbiIter
q2->bindBlob(4, var.refData);
q2->bindBlob(5, var.obsData);
q2->bindString(6, var.publicId);
- q2->bindString(7, var.additionalInfo);
+ q2->bindString(7, U2DbiUtils::packMap(var.additionalInfo));
var.id = q2->insert(U2Type::VariantType);
SAFE_POINT_OP(os,);
@@ -175,7 +175,7 @@ public:
res.refData = q->getBlob(3);
res.obsData = q->getBlob(4);
res.publicId = q->getString(5);
- res.additionalInfo = q->getString(6);
+ res.additionalInfo = U2DbiUtils::unpackMap(q->getString(6));
return res;
}
};
diff --git a/src/corelibs/U2Formats/src/tasks/ConvertFileTask.h b/src/corelibs/U2Formats/src/tasks/ConvertFileTask.h
index 898e113..a464eca 100644
--- a/src/corelibs/U2Formats/src/tasks/ConvertFileTask.h
+++ b/src/corelibs/U2Formats/src/tasks/ConvertFileTask.h
@@ -32,6 +32,7 @@ class SaveDocumentTask;
//
class U2FORMATS_EXPORT ConvertFileTask : public Task {
+ Q_OBJECT
public:
ConvertFileTask(const GUrl &sourceURL, const QString &detectedFormat, const QString &targetFormat, const QString &dir);
diff --git a/src/corelibs/U2Formats/src/tasks/ConvertSnpeffVariationsToAnnotationsTask.cpp b/src/corelibs/U2Formats/src/tasks/ConvertSnpeffVariationsToAnnotationsTask.cpp
new file mode 100644
index 0000000..4da5ac4
--- /dev/null
+++ b/src/corelibs/U2Formats/src/tasks/ConvertSnpeffVariationsToAnnotationsTask.cpp
@@ -0,0 +1,233 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <U2Core/AppContext.h>
+#include <U2Core/CreateAnnotationTask.h>
+#include <U2Core/DeleteObjectsTask.h>
+#include <U2Core/DocumentModel.h>
+#include <U2Core/GenbankFeatures.h>
+#include <U2Core/IOAdapterUtils.h>
+#include <U2Core/L10n.h>
+#include <U2Core/LoadDocumentTask.h>
+#include <U2Core/SaveDocumentTask.h>
+#include <U2Core/U1AnnotationUtils.h>
+#include <U2Core/U2OpStatusUtils.h>
+#include <U2Core/U2SafePoints.h>
+#include <U2Core/VariantTrackObject.h>
+
+#include <U2Formats/SnpeffInfoParser.h>
+
+#include "ConvertSnpeffVariationsToAnnotationsTask.h"
+
+namespace U2 {
+
+const QString ConvertSnpeffVariationsToAnnotationsTask::CHROM_QUALIFIER_NAME = "chrom";
+const QString ConvertSnpeffVariationsToAnnotationsTask::REFERENCE_QUALIFIER_NAME = "Reference_bases";
+const QString ConvertSnpeffVariationsToAnnotationsTask::ALTERNATE_QUALIFIER_NAME = "Alternate_bases";
+const QString ConvertSnpeffVariationsToAnnotationsTask::ALLELE_QUALIFIER_NAME = "Allele";
+const QString ConvertSnpeffVariationsToAnnotationsTask::ID_QUALIFIER_NAME = "ID";
+
+ConvertSnpeffVariationsToAnnotationsTask::ConvertSnpeffVariationsToAnnotationsTask(const QList<VariantTrackObject *> &variantTrackObjects)
+ : Task(tr("Convert SnpEff variations to annotations task"), TaskFlag_None),
+ variantTrackObjects(variantTrackObjects)
+{
+
+}
+
+const QMap<QString, QList<SharedAnnotationData> > & ConvertSnpeffVariationsToAnnotationsTask::getAnnotationsData() const {
+ return annotationTablesData;
+}
+
+void ConvertSnpeffVariationsToAnnotationsTask::run() {
+ foreach (VariantTrackObject *variantTrackObject, variantTrackObjects) {
+ QList<SharedAnnotationData> annotationTableData;
+
+ const U2VariantTrack variantTrack = variantTrackObject->getVariantTrack(stateInfo);
+ CHECK_OP(stateInfo, );
+
+ QScopedPointer<U2DbiIterator<U2Variant> > variantsIterator(variantTrackObject->getVariants(U2_REGION_MAX, stateInfo));
+ CHECK_OP(stateInfo, );
+
+ SharedAnnotationData tableAnnotationData(new AnnotationData);
+ tableAnnotationData->qualifiers << U2Qualifier(CHROM_QUALIFIER_NAME, variantTrack.sequenceName);
+ tableAnnotationData->type = U2FeatureTypes::Variation;
+
+ SnpeffInfoParser infoParser;
+ while (variantsIterator.data()->hasNext()) {
+ const U2Variant variant = variantsIterator.data()->next();
+
+ SharedAnnotationData entryAnnotationData = tableAnnotationData;
+ entryAnnotationData->name = GBFeatureUtils::getKeyInfo(GBFeatureKey_variation).text;
+ entryAnnotationData->location->regions << U2Region(variant.startPos, variant.endPos - variant.startPos + 1);
+ entryAnnotationData->qualifiers << U2Qualifier(REFERENCE_QUALIFIER_NAME, variant.refData);
+ entryAnnotationData->qualifiers << U2Qualifier(ALTERNATE_QUALIFIER_NAME, variant.obsData);
+ if (!variant.publicId.isEmpty()) {
+ entryAnnotationData->qualifiers << U2Qualifier(ID_QUALIFIER_NAME, variant.publicId);
+ }
+
+ U2OpStatusImpl os;
+ const QList<QList<U2Qualifier> > qualifiersList = infoParser.parse(os, variant.additionalInfo[U2Variant::VCF4_INFO]);
+ CHECK_OP(os, );
+ CHECK_OP(stateInfo, );
+ stateInfo.addWarnings(os.getWarnings());
+
+ foreach (const QList<U2Qualifier> &qualifiers, qualifiersList) {
+ if (qualifiers.isEmpty()) {
+ continue;
+ }
+
+ SharedAnnotationData parsedAnnotationData = entryAnnotationData;
+ parsedAnnotationData->qualifiers << qualifiers.toVector();
+ if (U1AnnotationUtils::containsQualifier(qualifiers, ALLELE_QUALIFIER_NAME)) {
+ U1AnnotationUtils::removeAllQualifier(parsedAnnotationData, ALTERNATE_QUALIFIER_NAME);
+ }
+ annotationTableData << parsedAnnotationData;
+ }
+
+ if (!os.hasWarnings() && qualifiersList.isEmpty()) {
+ annotationTableData << entryAnnotationData;
+ }
+ }
+ annotationTablesData.insert(variantTrack.sequenceName, annotationTableData);
+ }
+}
+
+LoadConvertAndSaveSnpeffVariationsToAnnotationsTask::LoadConvertAndSaveSnpeffVariationsToAnnotationsTask(const QString &variationsUrl,
+ const U2DbiRef &dstDbiRef,
+ const QString &dstUrl,
+ const QString &formatId)
+ : Task(tr("Load file and convert SnpEff variations to annotations task"), TaskFlags_NR_FOSE_COSC | TaskFlag_CollectChildrenWarnings),
+ variationsUrl(variationsUrl),
+ dstDbiRef(dstDbiRef),
+ dstUrl(dstUrl),
+ formatId(formatId),
+ loadTask(NULL),
+ convertTask(NULL),
+ saveTask(NULL),
+ loadedVariationsDocument(NULL),
+ annotationsDocument(NULL)
+{
+ SAFE_POINT_EXT(!variationsUrl.isEmpty(), setError("Source VCF file URL is empty"), );
+ SAFE_POINT_EXT(dstDbiRef.isValid(), setError("Destination DBI reference is invalid"), );
+ SAFE_POINT_EXT(!dstUrl.isEmpty(), setError("Destination file URL is empty"), );
+ SAFE_POINT_EXT(!formatId.isEmpty(), setError("Destination file format is empty"), );
+}
+
+LoadConvertAndSaveSnpeffVariationsToAnnotationsTask::~LoadConvertAndSaveSnpeffVariationsToAnnotationsTask() {
+ qDeleteAll(annotationTableObjects);
+ delete loadedVariationsDocument;
+ delete annotationsDocument;
+}
+
+const QString & LoadConvertAndSaveSnpeffVariationsToAnnotationsTask::getResultUrl() const {
+ return dstUrl;
+}
+
+void LoadConvertAndSaveSnpeffVariationsToAnnotationsTask::prepare() {
+ QVariantMap hints;
+ hints[DocumentFormat::DBI_REF_HINT] = QVariant::fromValue<U2DbiRef>(dstDbiRef);
+ loadTask = LoadDocumentTask::getDefaultLoadDocTask(variationsUrl, hints);
+ addSubTask(loadTask);
+}
+
+QList<Task *> LoadConvertAndSaveSnpeffVariationsToAnnotationsTask::onSubTaskFinished(Task *subTask) {
+ QList<Task *> newSubtasks;
+ CHECK_OP(stateInfo, newSubtasks);
+
+ if (loadTask == subTask) {
+ loadedVariationsDocument = loadTask->takeDocument();
+ CHECK_EXT(NULL != loadedVariationsDocument, setError(tr("'%1' load failed, the result document is NULL").arg(variationsUrl)), newSubtasks);
+ loadedVariationsDocument->setDocumentOwnsDbiResources(false);
+
+ QList<GObject *> objects = loadedVariationsDocument->findGObjectByType(GObjectTypes::VARIANT_TRACK);
+ CHECK_EXT(!objects.isEmpty(), setError(tr("File '%1' doesn't contain variation tracks").arg(variationsUrl)), newSubtasks);
+
+ QList<VariantTrackObject *> variantTrackObjects;
+ foreach (GObject *object, objects) {
+ VariantTrackObject *variantTrackObject = qobject_cast<VariantTrackObject *>(object);
+ SAFE_POINT_EXT(NULL != variantTrackObject, setError("Can't cast GObject to VariantTrackObject"), newSubtasks);
+ variantTrackObjects << variantTrackObject;
+ }
+
+ convertTask = new ConvertSnpeffVariationsToAnnotationsTask(variantTrackObjects);
+ newSubtasks << convertTask;
+ }
+
+ if (convertTask == subTask) {
+ QMap<QString, QList<SharedAnnotationData> > annotationsData = convertTask->getAnnotationsData();
+ foreach (const QString &chromosome, annotationsData.keys()) {
+ AnnotationTableObject *annotationTableObject = new AnnotationTableObject(chromosome, dstDbiRef);
+ annotationTableObjects << annotationTableObject;
+
+ createAnnotationsTasks << new CreateAnnotationsTask(annotationTableObject, annotationsData[chromosome], "Variations");
+ }
+ newSubtasks << createAnnotationsTasks;
+ }
+
+ if (createAnnotationsTasks.contains(subTask)) {
+ createAnnotationsTasks.removeAll(subTask);
+ if (createAnnotationsTasks.isEmpty()) {
+ prepareSaveTask();
+ CHECK_OP(stateInfo, newSubtasks);
+ newSubtasks << saveTask;
+ newSubtasks << new DeleteObjectsTask(loadedVariationsDocument->getObjects());
+ delete loadedVariationsDocument;
+ loadedVariationsDocument = NULL;
+ }
+ }
+
+ if (saveTask == subTask) {
+ newSubtasks << new DeleteObjectsTask(annotationsDocument->getObjects());
+ delete annotationsDocument;
+ annotationsDocument = NULL;
+ }
+
+ return newSubtasks;
+}
+
+Document * LoadConvertAndSaveSnpeffVariationsToAnnotationsTask::prepareDocument() {
+ DocumentFormat *format = AppContext::getDocumentFormatRegistry()->getFormatById(formatId);
+ SAFE_POINT_EXT(NULL != format, setError(QString("Document format '%1' not found in the registry").arg(formatId)), NULL);
+ IOAdapterFactory *ioAdapterFactory = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(IOAdapterUtils::url2io(dstUrl));
+ SAFE_POINT_EXT(NULL != ioAdapterFactory, setError(L10N::nullPointerError("ioAdapterFactory")), NULL);
+
+ QVariantMap hints;
+ hints[DocumentFormat::DBI_REF_HINT] = QVariant::fromValue<U2DbiRef>(dstDbiRef);
+
+ Document *document = format->createNewLoadedDocument(ioAdapterFactory, dstUrl, stateInfo, hints);
+ CHECK_OP(stateInfo, NULL);
+ document->setDocumentOwnsDbiResources(false);
+
+ foreach (AnnotationTableObject *annotationTableObject, annotationTableObjects) {
+ document->addObject(annotationTableObject);
+ }
+ annotationTableObjects.clear();
+
+ return document;
+}
+
+void LoadConvertAndSaveSnpeffVariationsToAnnotationsTask::prepareSaveTask() {
+ annotationsDocument = prepareDocument();
+ CHECK_OP(stateInfo, );
+ saveTask = new SaveDocumentTask(annotationsDocument);
+}
+
+} // namespace U2
diff --git a/src/corelibs/U2Formats/src/tasks/ConvertSnpeffVariationsToAnnotationsTask.h b/src/corelibs/U2Formats/src/tasks/ConvertSnpeffVariationsToAnnotationsTask.h
new file mode 100644
index 0000000..8ba59d9
--- /dev/null
+++ b/src/corelibs/U2Formats/src/tasks/ConvertSnpeffVariationsToAnnotationsTask.h
@@ -0,0 +1,89 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef _U2_CONVERT_SNPEFF_VARIATIONS_TO_ANNOTATIONS_TASK_H_
+#define _U2_CONVERT_SNPEFF_VARIATIONS_TO_ANNOTATIONS_TASK_H_
+
+#include <U2Core/AnnotationData.h>
+#include <U2Core/Task.h>
+
+namespace U2 {
+
+class AnnotationTableObject;
+class CreateAnnotationsTask;
+class Document;
+class LoadDocumentTask;
+class SaveDocumentTask;
+class VariantTrackObject;
+
+class ConvertSnpeffVariationsToAnnotationsTask : public Task {
+ Q_OBJECT
+public:
+ ConvertSnpeffVariationsToAnnotationsTask(const QList<VariantTrackObject *> &variantTrackObjects);
+
+ const QMap<QString, QList<SharedAnnotationData> > & getAnnotationsData() const;
+
+private:
+ void run();
+
+ const QList<VariantTrackObject *> variantTrackObjects;
+ QMap<QString, QList<SharedAnnotationData> > annotationTablesData;
+
+ static const QString CHROM_QUALIFIER_NAME;
+ static const QString REFERENCE_QUALIFIER_NAME;
+ static const QString ALTERNATE_QUALIFIER_NAME;
+ static const QString ALLELE_QUALIFIER_NAME;
+ static const QString ID_QUALIFIER_NAME;
+};
+
+class U2FORMATS_EXPORT LoadConvertAndSaveSnpeffVariationsToAnnotationsTask : public Task {
+ Q_OBJECT
+public:
+ LoadConvertAndSaveSnpeffVariationsToAnnotationsTask(const QString &variationsUrl, const U2DbiRef &dstDbiRef, const QString &dstUrl, const QString &formatId);
+ ~LoadConvertAndSaveSnpeffVariationsToAnnotationsTask();
+
+ const QString & getResultUrl() const;
+
+private:
+ void prepare();
+ QList<Task *> onSubTaskFinished(Task *subTask);
+
+ Document * prepareDocument();
+ void prepareSaveTask();
+
+ const QString variationsUrl;
+ const U2DbiRef dstDbiRef;
+ const QString dstUrl;
+ const QString formatId;
+
+ LoadDocumentTask *loadTask;
+ ConvertSnpeffVariationsToAnnotationsTask *convertTask;
+ QList<Task *> createAnnotationsTasks;
+ SaveDocumentTask *saveTask;
+
+ Document *loadedVariationsDocument;
+ Document *annotationsDocument;
+ QList<AnnotationTableObject *> annotationTableObjects;
+};
+
+} // namespace U2
+
+#endif // _U2_CONVERT_SNPEFF_VARIATIONS_TO_ANNOTATIONS_TASK_H_
diff --git a/src/corelibs/U2Formats/src/util/SnpeffInfoParser.cpp b/src/corelibs/U2Formats/src/util/SnpeffInfoParser.cpp
new file mode 100644
index 0000000..833eb21
--- /dev/null
+++ b/src/corelibs/U2Formats/src/util/SnpeffInfoParser.cpp
@@ -0,0 +1,277 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <U2Core/SnpeffDictionary.h>
+#include <U2Core/U2OpStatus.h>
+#include <U2Core/U2SafePoints.h>
+
+#include "SnpeffInfoParser.h"
+
+namespace U2 {
+
+const QString SnpeffInfoParser::PAIRS_SEPARATOR = ";";
+const QString SnpeffInfoParser::KEY_VALUE_SEPARATOR = "=";
+
+SnpeffInfoParser::SnpeffInfoParser()
+{
+ initPartParsers();
+}
+
+SnpeffInfoParser::~SnpeffInfoParser() {
+ qDeleteAll(partParsers.values());
+}
+
+QList<QList<U2Qualifier> > SnpeffInfoParser::parse(U2OpStatus &os, const QString &snpeffInfo) const {
+ QList<QList<U2Qualifier> > qualifiers;
+ const QStringList keyValuePairs = snpeffInfo.split(PAIRS_SEPARATOR, QString::SkipEmptyParts);
+ foreach (const QString &keyValuePair, keyValuePairs) {
+ const QStringList splittedKeyValuePair = keyValuePair.split(KEY_VALUE_SEPARATOR);
+ if (splittedKeyValuePair.size() > 2) {
+ os.addWarning(tr("Can't parse the next INFO part: '%1'").arg(keyValuePair));
+ continue;
+ }
+
+ if (splittedKeyValuePair.size() == 1) {
+ continue;
+ }
+
+ InfoPartParser *partParser = partParsers.value(splittedKeyValuePair.first(), NULL);
+ if (NULL == partParser) {
+ // This INFO part is not added by SnpEff
+ continue;
+ }
+ qualifiers << partParser->parse(os, splittedKeyValuePair.last());
+ CHECK_OP(os, qualifiers);
+ }
+
+ return qualifiers;
+}
+
+void SnpeffInfoParser::initPartParsers() {
+ partParsers.insert(AnnParser::KEY_WORD, new AnnParser);
+ partParsers.insert(EffParser::KEY_WORD, new EffParser);
+ partParsers.insert(LofParser::KEY_WORD, new LofParser);
+ partParsers.insert(NmdParser::KEY_WORD, new NmdParser);
+}
+
+const QString InfoPartParser::ERROR = "error";
+const QString InfoPartParser::WARNING = "warning";
+const QString InfoPartParser::INFO = "info";
+const QString InfoPartParser::MESSAGE = "message";
+const QString InfoPartParser::MESSAGE_DESCRIPTION = "message_desc";
+const QString InfoPartParser::ANNOTATION_SEPARATOR = ",";
+const QString InfoPartParser::SNPEFF_TAG = "SnpEff_tag";
+
+InfoPartParser::InfoPartParser(const QString &keyWord, bool canStoreMessages)
+ : keyWord(keyWord),
+ canStoreMessages(canStoreMessages)
+{
+
+}
+
+const QString & InfoPartParser::getKeyWord() const {
+ return keyWord;
+}
+
+QList<QList<U2Qualifier> > InfoPartParser::parse(U2OpStatus &os, const QString &infoPart) const {
+ QList<QList<U2Qualifier> > qualifiers;
+ const QStringList entries = infoPart.split(ANNOTATION_SEPARATOR);
+ foreach (const QString &entry, entries) {
+ qualifiers << parseEntry(os, entry);
+ CHECK_OP(os, qualifiers);
+ }
+ return qualifiers;
+}
+
+QList<U2Qualifier> InfoPartParser::processValue(const QString &qualifierName, const QString &value) const {
+ QList<U2Qualifier> qualifiers;
+ qualifiers << U2Qualifier(qualifierName, value);
+ return qualifiers;
+}
+
+QList<U2Qualifier> InfoPartParser::parseEntry(U2OpStatus &os, const QString &entry) const {
+ QList<U2Qualifier> qualifiers;
+ const QStringList qualifierNames = getQualifierNames();
+ const QStringList values = getValues(entry);
+ CHECK_EXT(values.size() >= qualifierNames.size(), os.addWarning(tr("Too few values in the entry: '%1'. Expected at least %2 values.").arg(entry).arg(qualifierNames.size())), qualifiers);
+
+ qualifiers << U2Qualifier(SNPEFF_TAG, keyWord);
+
+ int i = 0;
+ for (i = 0; i < qualifierNames.size(); i++) {
+ if (!values[i].isEmpty()) {
+ qualifiers << processValue(qualifierNames[i], values[i]);
+ }
+ }
+ if (canStoreMessages) {
+ for (; i < values.size(); i++) {
+ if (!values[i].isEmpty()) {
+ qualifiers << U2Qualifier(MESSAGE, values[i]);
+ if (SnpeffDictionary::messageDescriptions.contains(values[i])) {
+ qualifiers << U2Qualifier(MESSAGE_DESCRIPTION, SnpeffDictionary::messageDescriptions[values[i]]);
+ }
+ }
+ }
+ } else if (i < values.size()) {
+ os.addWarning(tr("Too many values in the entry '%1', extra entries are ignored").arg(entry));
+ }
+ return qualifiers;
+}
+
+const QString AnnParser::KEY_WORD = "ANN";
+const QString AnnParser::VALUES_SEPARATOR = "|";
+const QString AnnParser::EFFECTS_SEPARATOR = "&";
+const QString AnnParser::EFFECT = "Effect";
+const QString AnnParser::EFFECT_DESCRIPTION = "Effect_desc";
+const QString AnnParser::PUTATIVE_IMPACT = "Putative_impact";
+const QString AnnParser::PUTATIVE_IMPACT_DESCRIPTION = "Putative_imp_desc";
+
+AnnParser::AnnParser()
+ : InfoPartParser(KEY_WORD, true)
+{
+
+}
+
+QStringList AnnParser::getQualifierNames() const {
+ return QStringList() << "Allele"
+ << EFFECT
+ << PUTATIVE_IMPACT
+ << "Gene_name"
+ << "Gene_ID"
+ << "Feature_type"
+ << "Feature_ID"
+ << "Transcript_biotype"
+ << "Rank_total"
+ << "HGVS_c"
+ << "HGVS_p"
+ << "cDNA_pos_len"
+ << "CDS_pos_len"
+ << "Protein_pos_len"
+ << "Distance_to_feature";
+}
+
+QStringList AnnParser::getValues(const QString &entry) const {
+ return entry.split(VALUES_SEPARATOR);
+}
+
+QList<U2Qualifier> AnnParser::processValue(const QString &qualifierName, const QString &value) const {
+ QList<U2Qualifier> qualifiers = InfoPartParser::processValue(qualifierName, value);
+ if (qualifierName == PUTATIVE_IMPACT && SnpeffDictionary::impactDescriptions.contains(value)) {
+ qualifiers << U2Qualifier(PUTATIVE_IMPACT_DESCRIPTION, SnpeffDictionary::impactDescriptions[value]);
+ } else if (qualifierName == EFFECT) {
+ const QStringList effects = value.split(EFFECTS_SEPARATOR, QString::SkipEmptyParts);
+ foreach (const QString &effect, effects) {
+ if (SnpeffDictionary::effectDescriptions.contains(effect)) {
+ qualifiers << U2Qualifier(EFFECT_DESCRIPTION, effect + ": " + SnpeffDictionary::effectDescriptions[value]);
+ }
+ }
+ }
+ return qualifiers;
+}
+
+const QString EffParser::KEY_WORD = "EFF";
+const QString EffParser::EFFECT_DATA_SEPARATOR = "|";
+const QString EffParser::EFFECT = "Effect";
+const QString EffParser::EFFECT_DESCRIPTION = "Effect_desc";
+const QString EffParser::EFFECT_IMPACT = "Effect_impact";
+const QString EffParser::EFFECT_IMPACT_DESCRIPTION = "Effect_impact_desc";
+
+EffParser::EffParser()
+ : InfoPartParser(KEY_WORD, true)
+{
+
+}
+
+QStringList EffParser::getQualifierNames() const {
+ return QStringList() << EFFECT
+ << EFFECT_IMPACT
+ << "Functional_class"
+ << "Codon_change_dist"
+ << "Amino_acid_change"
+ << "Amino_acid_length"
+ << "Gene_name"
+ << "Transcript_biotype"
+ << "Gene_coding"
+ << "Transcript_ID"
+ << "Exon_intron_rank"
+ << "Genotype_number";
+}
+
+QStringList EffParser::getValues(const QString &entry) const {
+ QRegExp regexp("^(\\w+)\\((.*)\\)$");
+ QStringList values;
+ regexp.indexIn(entry);
+ values << regexp.cap(1);
+ values << regexp.cap(2).split(EFFECT_DATA_SEPARATOR);
+ return values;
+}
+
+QList<U2Qualifier> EffParser::processValue(const QString &qualifierName, const QString &value) const {
+ QList<U2Qualifier> qualifiers = InfoPartParser::processValue(qualifierName, value);
+ if (qualifierName == EFFECT && SnpeffDictionary::effectDescriptions.contains(value)) {
+ qualifiers << U2Qualifier(EFFECT_DESCRIPTION, SnpeffDictionary::effectDescriptions[value]);
+ } else if (qualifierName == EFFECT_IMPACT && SnpeffDictionary::impactDescriptions.contains(value)) {
+ qualifiers << U2Qualifier(EFFECT_IMPACT_DESCRIPTION, SnpeffDictionary::impactDescriptions[value]);
+ }
+ return qualifiers;
+}
+
+const QString LofParser::KEY_WORD = "LOF";
+const QString LofParser::VALUES_SEPARATOR = "|";
+
+LofParser::LofParser()
+ : InfoPartParser(KEY_WORD)
+{
+
+}
+
+QStringList LofParser::getQualifierNames() const {
+ return QStringList() << "Gene"
+ << "ID"
+ << "Num_transcripts"
+ << "percent_affected";
+}
+
+QStringList LofParser::getValues(const QString &entry) const {
+ return entry.mid(1, entry.length() - 2).split(VALUES_SEPARATOR);
+}
+
+const QString NmdParser::KEY_WORD = "NMD";
+const QString NmdParser::VALUES_SEPARATOR = "|";
+
+NmdParser::NmdParser()
+ : InfoPartParser(KEY_WORD)
+{
+
+}
+
+QStringList NmdParser::getQualifierNames() const {
+ return QStringList() << "Gene"
+ << "ID"
+ << "Num_transcripts"
+ << "percent_affected";
+}
+
+QStringList NmdParser::getValues(const QString &entry) const {
+ return entry.mid(1, entry.length() - 2).split(VALUES_SEPARATOR);
+}
+
+} // namespace U2
diff --git a/src/corelibs/U2Formats/src/util/SnpeffInfoParser.h b/src/corelibs/U2Formats/src/util/SnpeffInfoParser.h
new file mode 100644
index 0000000..93d8a8e
--- /dev/null
+++ b/src/corelibs/U2Formats/src/util/SnpeffInfoParser.h
@@ -0,0 +1,147 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef _U2_SNPEFF_INFO_PARSER_H_
+#define _U2_SNPEFF_INFO_PARSER_H_
+
+#include <U2Core/AnnotationData.h>
+
+namespace U2 {
+
+class AnnotationCreationPattern;
+class InfoPartParser;
+class U2OpStatus;
+
+class U2FORMATS_EXPORT SnpeffInfoParser : public QObject {
+public:
+ SnpeffInfoParser();
+ ~SnpeffInfoParser();
+
+ /**
+ * Each @snpeffInfo string can contain data for several annotations.
+ * For each annotation a U2Qualifier list will be produced.
+ * So, @return will contain data for several annotations that are produced from one variation.
+ */
+ QList<QList<U2Qualifier> > parse(U2OpStatus &os, const QString &snpeffInfo) const;
+
+private:
+ void initPartParsers();
+
+ QMap<QString, InfoPartParser *> partParsers;
+
+ static const QString PAIRS_SEPARATOR;
+ static const QString KEY_VALUE_SEPARATOR;
+};
+
+class InfoPartParser : public QObject {
+public:
+ InfoPartParser(const QString &keyWord, bool canStoreMessages = false);
+
+ const QString & getKeyWord() const;
+ QList<QList<U2Qualifier> > parse(U2OpStatus &os, const QString &infoPart) const;
+
+protected:
+ virtual QStringList getQualifierNames() const = 0;
+ virtual QStringList getValues(const QString &entry) const = 0;
+ virtual QList<U2Qualifier> processValue(const QString &qualifierName, const QString &value) const;
+
+ static const QString ERROR;
+ static const QString WARNING;
+ static const QString INFO;
+ static const QString MESSAGE;
+ static const QString MESSAGE_DESCRIPTION;
+
+private:
+ QList<U2Qualifier> parseEntry(U2OpStatus &os, const QString &entry) const;
+
+ const QString keyWord;
+ const bool canStoreMessages;
+
+ static const QString ANNOTATION_SEPARATOR;
+ static const QString SNPEFF_TAG;
+};
+
+class AnnParser : public InfoPartParser {
+public:
+ AnnParser();
+
+ static const QString KEY_WORD;
+
+private:
+ QStringList getQualifierNames() const;
+ QStringList getValues(const QString &entry) const;
+ QList<U2Qualifier> processValue(const QString &qualifierName, const QString &value) const;
+
+ static const QString VALUES_SEPARATOR;
+ static const QString EFFECTS_SEPARATOR;
+ static const QString EFFECT;
+ static const QString EFFECT_DESCRIPTION;
+ static const QString PUTATIVE_IMPACT;
+ static const QString PUTATIVE_IMPACT_DESCRIPTION;
+};
+
+class EffParser : public InfoPartParser {
+public:
+ EffParser();
+
+ static const QString KEY_WORD;
+
+private:
+ QStringList getQualifierNames() const;
+ QStringList getValues(const QString &entry) const;
+ QList<U2Qualifier> processValue(const QString &qualifierName, const QString &value) const;
+
+ static const QString EFFECT_DATA_SEPARATOR;
+ static const QString EFFECT;
+ static const QString EFFECT_DESCRIPTION;
+ static const QString EFFECT_IMPACT;
+ static const QString EFFECT_IMPACT_DESCRIPTION;
+};
+
+class LofParser : public InfoPartParser {
+public:
+ LofParser();
+
+ static const QString KEY_WORD;
+
+private:
+ QStringList getQualifierNames() const;
+ QStringList getValues(const QString &entry) const;
+
+ static const QString VALUES_SEPARATOR;
+};
+
+class NmdParser : public InfoPartParser {
+public:
+ NmdParser();
+
+ static const QString KEY_WORD;
+
+private:
+ QStringList getQualifierNames() const;
+ QStringList getValues(const QString &entry) const;
+
+ static const QString VALUES_SEPARATOR;
+};
+
+} // namespace U2
+
+#endif // _U2_SNPEFF_INFO_PARSER_H_
diff --git a/src/corelibs/U2Formats/transl/russian.ts b/src/corelibs/U2Formats/transl/russian.ts
index d9f39b8..1e6e846 100644
--- a/src/corelibs/U2Formats/transl/russian.ts
+++ b/src/corelibs/U2Formats/transl/russian.ts
@@ -2739,7 +2739,7 @@ The following sequences are empty:</translation>
<message>
<location filename="../src/VectorNtiSequenceFormat.cpp" line="46"/>
<source>Vector NTI sequence</source>
- <translation>Последовательномть Vector NTI</translation>
+ <translation>Последовательность Vector NTI</translation>
</message>
<message>
<location filename="../src/VectorNtiSequenceFormat.cpp" line="47"/>
diff --git a/src/corelibs/U2Gui/src/ToolsMenu.cpp b/src/corelibs/U2Gui/src/ToolsMenu.cpp
index 8dd7c8b..20428c9 100644
--- a/src/corelibs/U2Gui/src/ToolsMenu.cpp
+++ b/src/corelibs/U2Gui/src/ToolsMenu.cpp
@@ -45,6 +45,7 @@ const QString ToolsMenu::NGS_MAP = "NGS_MAP";
const QString ToolsMenu::NGS_INDEX = "NGS_INDEX";
const QString ToolsMenu::NGS_RAW_DNA = "NGS_RAW_DNA";
const QString ToolsMenu::NGS_CALL_VARIANTS = "NGS_CALL_VARIANTS";
+const QString ToolsMenu::NGS_CALL_VARIANTS_AND_EFFECT = "NGS_CALL_VARIANTS_AND_EFFECT";
const QString ToolsMenu::NGS_VARIANT_EFFECT = "NGS_VARIANT_EFFECT";
const QString ToolsMenu::NGS_RAW_RNA = "NGS_RAW_RNA";
const QString ToolsMenu::NGS_RNA = "NGS_RNA";
@@ -126,6 +127,7 @@ void ToolsMenu::init() {
subMenuAction[NGS_MENU] << LINE;
subMenuAction[NGS_MENU] << NGS_RAW_DNA;
subMenuAction[NGS_MENU] << NGS_CALL_VARIANTS;
+ subMenuAction[NGS_MENU] << NGS_CALL_VARIANTS_AND_EFFECT;
subMenuAction[NGS_MENU] << NGS_VARIANT_EFFECT;
subMenuAction[NGS_MENU] << LINE;
subMenuAction[NGS_MENU] << NGS_RAW_RNA;
diff --git a/src/corelibs/U2Gui/src/ToolsMenu.h b/src/corelibs/U2Gui/src/ToolsMenu.h
index a479481..865874d 100644
--- a/src/corelibs/U2Gui/src/ToolsMenu.h
+++ b/src/corelibs/U2Gui/src/ToolsMenu.h
@@ -53,6 +53,7 @@ public:
static const QString NGS_INDEX;
static const QString NGS_RAW_DNA;
static const QString NGS_CALL_VARIANTS;
+ static const QString NGS_CALL_VARIANTS_AND_EFFECT;
static const QString NGS_VARIANT_EFFECT;
static const QString NGS_RAW_RNA;
static const QString NGS_RNA;
diff --git a/src/corelibs/U2Gui/src/util/AddNewDocumentDialogImpl.cpp b/src/corelibs/U2Gui/src/util/AddNewDocumentDialogImpl.cpp
index 20bf5b7..aaae206 100644
--- a/src/corelibs/U2Gui/src/util/AddNewDocumentDialogImpl.cpp
+++ b/src/corelibs/U2Gui/src/util/AddNewDocumentDialogImpl.cpp
@@ -50,7 +50,7 @@ AddNewDocumentDialogImpl::AddNewDocumentDialogImpl(QWidget* p, AddNewDocumentDia
model(m)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470528");
+ new HelpButton(this, buttonBox, "18220388");
if (model.format.isEmpty()) {
model.format = AppContext::getSettings()->getValue(SETTINGS_LASTFORMAT, QString("")).toString();
}
diff --git a/src/corelibs/U2Gui/src/util/AuthenticationDialog.cpp b/src/corelibs/U2Gui/src/util/AuthenticationDialog.cpp
index 2c3ae6d..ac05874 100644
--- a/src/corelibs/U2Gui/src/util/AuthenticationDialog.cpp
+++ b/src/corelibs/U2Gui/src/util/AuthenticationDialog.cpp
@@ -39,7 +39,7 @@ AuthenticationDialog::AuthenticationDialog(const QString &text, QWidget* parent)
authenticationWidget(new AuthenticationWidget)
{
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17470798");
+ new HelpButton(this, ui->buttonBox, "18220658");
ui->mainLayout->insertWidget(1, authenticationWidget);
if (text.isEmpty()) {
diff --git a/src/corelibs/U2Gui/src/util/BreakpointConditionEditDialog.cpp b/src/corelibs/U2Gui/src/util/BreakpointConditionEditDialog.cpp
index b4cc5d9..bd3b69c 100644
--- a/src/corelibs/U2Gui/src/util/BreakpointConditionEditDialog.cpp
+++ b/src/corelibs/U2Gui/src/util/BreakpointConditionEditDialog.cpp
@@ -35,7 +35,7 @@ BreakpointConditionEditDialog::BreakpointConditionEditDialog(QWidget *parent,
{
ui = new Ui_BreakpointConditionEditDialog();
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17471051");
+ new HelpButton(this, ui->buttonBox, "18220911");
ui->buttonBox->button(QDialogButtonBox::Ok)->setText(tr("OK"));
ui->buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2Gui/src/util/CreateAnnotationDialog.cpp b/src/corelibs/U2Gui/src/util/CreateAnnotationDialog.cpp
index 0e082ff..2d75b48 100644
--- a/src/corelibs/U2Gui/src/util/CreateAnnotationDialog.cpp
+++ b/src/corelibs/U2Gui/src/util/CreateAnnotationDialog.cpp
@@ -46,7 +46,7 @@ CreateAnnotationDialog::CreateAnnotationDialog(QWidget* p, CreateAnnotationModel
ui->setupUi(this);
annWidgetController = new CreateAnnotationWidgetController(m, this, CreateAnnotationWidgetController::Full);
- helpButton = new HelpButton(this, ui->buttonBox, "17470494");
+ helpButton = new HelpButton(this, ui->buttonBox, "18220354");
ui->buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Create"));
ui->mainLayout->insertWidget(0, annWidgetController->getWidget());
diff --git a/src/corelibs/U2Gui/src/util/CreateAnnotationWidgetController.cpp b/src/corelibs/U2Gui/src/util/CreateAnnotationWidgetController.cpp
index c4f21e7..8b23be8 100644
--- a/src/corelibs/U2Gui/src/util/CreateAnnotationWidgetController.cpp
+++ b/src/corelibs/U2Gui/src/util/CreateAnnotationWidgetController.cpp
@@ -249,7 +249,7 @@ QString CreateAnnotationWidgetController::validate() {
}
if (!w->isUsePatternNamesChecked() && !model.hideAnnotationName && !Annotation::isValidAnnotationName(model.data->name)) {
- return tr("Illegal annotation name");
+ return tr("Illegal annotation name! ");
}
if (model.groupName.isEmpty()) {
@@ -456,6 +456,15 @@ QWidget *CreateAnnotationWidgetController::getWidget() const {
return w;
}
+AnnotationCreationPattern CreateAnnotationWidgetController::getAnnotationPattern() const {
+ AnnotationCreationPattern pattern;
+ pattern.annotationName = model.data->name;
+ pattern.type = model.data->type;
+ pattern.groupName = model.groupName;
+ pattern.description = model.description;
+ return pattern;
+}
+
QPair<QWidget*, QWidget*> CreateAnnotationWidgetController::getTaborderEntryAndExitPoints() const {
return w->getTabOrderEntryAndExitPoints();
}
diff --git a/src/corelibs/U2Gui/src/util/CreateAnnotationWidgetController.h b/src/corelibs/U2Gui/src/util/CreateAnnotationWidgetController.h
index b7306dc..b0acbde 100644
--- a/src/corelibs/U2Gui/src/util/CreateAnnotationWidgetController.h
+++ b/src/corelibs/U2Gui/src/util/CreateAnnotationWidgetController.h
@@ -22,6 +22,7 @@
#ifndef _U2_CREATE_ANNOTATION_WIDGET_CONTROLLER_H_
#define _U2_CREATE_ANNOTATION_WIDGET_CONTROLLER_H_
+#include <U2Core/AnnotationCreationPattern.h>
#include <U2Core/AnnotationData.h>
#include <U2Core/GObjectReference.h>
@@ -110,6 +111,7 @@ public:
QWidget* getWidget() const;
const CreateAnnotationModel& getModel() const {return model;}
+ AnnotationCreationPattern getAnnotationPattern() const;
void updateWidgetForAnnotationModel(const CreateAnnotationModel& model);
diff --git a/src/corelibs/U2Gui/src/util/CreateDocumentFromTextDialogController.cpp b/src/corelibs/U2Gui/src/util/CreateDocumentFromTextDialogController.cpp
index d7f5aea..38f1ab8 100644
--- a/src/corelibs/U2Gui/src/util/CreateDocumentFromTextDialogController.cpp
+++ b/src/corelibs/U2Gui/src/util/CreateDocumentFromTextDialogController.cpp
@@ -49,7 +49,7 @@ CreateDocumentFromTextDialogController::CreateDocumentFromTextDialogController(Q
saveController(NULL) {
ui = new Ui_CreateDocumentFromTextDialog();
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17470429");
+ new HelpButton(this, ui->buttonBox, "18220289");
initSaveController();
diff --git a/src/corelibs/U2Gui/src/util/DownloadRemoteFileDialog.cpp b/src/corelibs/U2Gui/src/util/DownloadRemoteFileDialog.cpp
index c27508f..7a9b2ad 100644
--- a/src/corelibs/U2Gui/src/util/DownloadRemoteFileDialog.cpp
+++ b/src/corelibs/U2Gui/src/util/DownloadRemoteFileDialog.cpp
@@ -60,7 +60,7 @@ QString DownloadRemoteFileDialog::defaultDB("");
DownloadRemoteFileDialog::DownloadRemoteFileDialog(QWidget *p):QDialog(p), isQueryDB(false) {
ui = new Ui_DownloadRemoteFileDialog;
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17470443");
+ new HelpButton(this, ui->buttonBox, "18220303");
ui->buttonBox->button(QDialogButtonBox::Ok)->setText(tr("OK"));
ui->buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
@@ -97,7 +97,7 @@ DownloadRemoteFileDialog::DownloadRemoteFileDialog( const QString& id, const QSt
{
ui = new Ui_DownloadRemoteFileDialog;
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17470456");
+ new HelpButton(this, ui->buttonBox, "18220316");
ui->formatBox->addItem(GENBANK_FORMAT);
ui->formatBox->addItem(FASTA_FORMAT);
diff --git a/src/corelibs/U2Gui/src/util/EditQualifierDialog.cpp b/src/corelibs/U2Gui/src/util/EditQualifierDialog.cpp
index 3f2a5be..f51ff39 100644
--- a/src/corelibs/U2Gui/src/util/EditQualifierDialog.cpp
+++ b/src/corelibs/U2Gui/src/util/EditQualifierDialog.cpp
@@ -41,7 +41,7 @@ EditQualifierDialog::EditQualifierDialog(QWidget* p, const U2Qualifier& q, bool
{
ui = new Ui_EditQualifierDialog;
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17470501");
+ new HelpButton(this, ui->buttonBox, "18220361");
ui->buttonBox->button(QDialogButtonBox::Ok)->setText(tr("OK"));
ui->buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2Gui/src/util/EditSequenceDialogController.cpp b/src/corelibs/U2Gui/src/util/EditSequenceDialogController.cpp
index 1ad4be1..bf09a51 100644
--- a/src/corelibs/U2Gui/src/util/EditSequenceDialogController.cpp
+++ b/src/corelibs/U2Gui/src/util/EditSequenceDialogController.cpp
@@ -69,7 +69,7 @@ EditSequenceDialogController::EditSequenceDialogController(const EditSequencDial
{
ui = new Ui_EditSequenceDialog;
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17470484");
+ new HelpButton(this, ui->buttonBox, "18220344");
ui->buttonBox->button(QDialogButtonBox::Ok)->setText(tr("OK"));
ui->buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2Gui/src/util/ExportAnnotationsDialog.cpp b/src/corelibs/U2Gui/src/util/ExportAnnotationsDialog.cpp
index 5c9f4dd..ddf32ba 100644
--- a/src/corelibs/U2Gui/src/util/ExportAnnotationsDialog.cpp
+++ b/src/corelibs/U2Gui/src/util/ExportAnnotationsDialog.cpp
@@ -43,7 +43,7 @@ ExportAnnotationsDialog::ExportAnnotationsDialog( const QString &filename, QWidg
: QDialog( parent ), ui( new Ui_ExportAnnotationsDialog( ) )
{
ui->setupUi( this );
- new HelpButton(this, ui->buttonBox, "17470506");
+ new HelpButton(this, ui->buttonBox, "18220366");
ui->buttonBox->button(QDialogButtonBox::Ok)->setText(tr("OK"));
ui->buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2Gui/src/util/ExportDocumentDialogController.cpp b/src/corelibs/U2Gui/src/util/ExportDocumentDialogController.cpp
index 157c968..594e7f2 100644
--- a/src/corelibs/U2Gui/src/util/ExportDocumentDialogController.cpp
+++ b/src/corelibs/U2Gui/src/util/ExportDocumentDialogController.cpp
@@ -44,7 +44,7 @@ ExportDocumentDialogController::ExportDocumentDialogController(Document* d, QWid
{
ui = new Ui_ExportDocumentDialog();
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17470430");
+ new HelpButton(this, ui->buttonBox, "18220290");
ui->buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Export"));
ui->buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
@@ -62,7 +62,7 @@ ExportDocumentDialogController::ExportDocumentDialogController(GObject *object,
QList<GObject *> objectList = QList<GObject *>() << sourceObject;
initSaveController(objectList, initUrl);
- new HelpButton(this, ui->buttonBox, "17470430");
+ new HelpButton(this, ui->buttonBox, "18220290");
}
void ExportDocumentDialogController::initSaveController(const QList<GObject *> &objects, const QString &fileUrl) {
@@ -71,7 +71,7 @@ void ExportDocumentDialogController::initSaveController(const QList<GObject *> &
config.fileDialogButton = ui->browseButton;
config.fileNameEdit = ui->fileNameEdit;
config.formatCombo = ui->formatCombo;
- config.compressCheckbox= ui->compressCheck;
+ config.compressCheckbox = ui->compressCheck;
config.parentWidget = this;
config.rollOutProjectUrls = true;
config.rollSuffix = "_copy";
@@ -109,11 +109,11 @@ DocumentFormatConstraints ExportDocumentDialogController::getAcceptableConstrain
QString ExportDocumentDialogController::getDocumentURL() const {
QString path = saveController->getSaveFileName();
- if(ui->compressCheck->isChecked()) {
+ if (ui->compressCheck != NULL && ui->compressCheck->isChecked() && ui->compressCheck->isEnabled()) {
QString suffix = path.split(".").last();
if(suffix != "gz") {
return path + ".gz";
- }
+ }
}
return path;
}
diff --git a/src/corelibs/U2Gui/src/util/ExportImageDialog.cpp b/src/corelibs/U2Gui/src/util/ExportImageDialog.cpp
index 9473d4e..7616a8b 100644
--- a/src/corelibs/U2Gui/src/util/ExportImageDialog.cpp
+++ b/src/corelibs/U2Gui/src/util/ExportImageDialog.cpp
@@ -144,31 +144,32 @@ void ExportImageDialog::init() {
ui = new Ui_ImageExportForm;
ui->setupUi(this);
ui->buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Export"));
+ ui->buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
switch (source) {
case WD:
- new HelpButton(this, ui->buttonBox, "17470833");
+ new HelpButton(this, ui->buttonBox, "18220693");
break;
case CircularView:
- new HelpButton(this, ui->buttonBox, "17470511");
+ new HelpButton(this, ui->buttonBox, "18220371");
break;
case MSA:
- new HelpButton(this, ui->buttonBox, "17470584");
+ new HelpButton(this, ui->buttonBox, "18220444");
break;
case SequenceView:
- new HelpButton(this, ui->buttonBox, "17470471");
+ new HelpButton(this, ui->buttonBox, "18220331");
break;
case AssemblyView:
- new HelpButton(this, ui->buttonBox, "17470620");
+ new HelpButton(this, ui->buttonBox, "18220480");
break;
case PHYTreeView:
- new HelpButton(this, ui->buttonBox, "17470649");
+ new HelpButton(this, ui->buttonBox, "18220509");
break;
case DotPlot:
- new HelpButton(this, ui->buttonBox, "17470542");
+ new HelpButton(this, ui->buttonBox, "18220402");
break;
case MolView:
- new HelpButton(this, ui->buttonBox, "17470525");
+ new HelpButton(this, ui->buttonBox, "18220385");
break;
default:
FAIL("Can't find help Id",);
diff --git a/src/corelibs/U2Gui/src/util/ExportImageDialog.ui b/src/corelibs/U2Gui/src/util/ExportImageDialog.ui
index 84c1dc2..5f08a96 100644
--- a/src/corelibs/U2Gui/src/util/ExportImageDialog.ui
+++ b/src/corelibs/U2Gui/src/util/ExportImageDialog.ui
@@ -38,7 +38,7 @@
</sizepolicy>
</property>
<property name="title">
- <string notr="true">Export settings</string>
+ <string>Export settings</string>
</property>
<layout class="QVBoxLayout" name="settingsLayout">
<property name="spacing">
diff --git a/src/corelibs/U2Gui/src/util/ImportDialogs/AceImportDialog.cpp b/src/corelibs/U2Gui/src/util/ImportDialogs/AceImportDialog.cpp
index 5dcbf97..3142f50 100644
--- a/src/corelibs/U2Gui/src/util/ImportDialogs/AceImportDialog.cpp
+++ b/src/corelibs/U2Gui/src/util/ImportDialogs/AceImportDialog.cpp
@@ -48,7 +48,7 @@ AceImportDialog::AceImportDialog(const QVariantMap& _settings) :
saveController(NULL)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470632");
+ new HelpButton(this, buttonBox, "18220492");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("OK"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2Gui/src/util/RangeSelector.cpp b/src/corelibs/U2Gui/src/util/RangeSelector.cpp
index 580ed56..fefc789 100644
--- a/src/corelibs/U2Gui/src/util/RangeSelector.cpp
+++ b/src/corelibs/U2Gui/src/util/RangeSelector.cpp
@@ -240,7 +240,7 @@ MultipleRangeSelector::MultipleRangeSelector(QWidget* _parent, const QVector<U2R
ui = new Ui_RangeSelectionDialog;
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17470476");
+ new HelpButton(this, ui->buttonBox, "18220336");
ui->buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Go"));
ui->buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2Gui/src/util/RemovePartFromSequenceDialogController.cpp b/src/corelibs/U2Gui/src/util/RemovePartFromSequenceDialogController.cpp
index 6db5f22..1c13017 100644
--- a/src/corelibs/U2Gui/src/util/RemovePartFromSequenceDialogController.cpp
+++ b/src/corelibs/U2Gui/src/util/RemovePartFromSequenceDialogController.cpp
@@ -55,7 +55,7 @@ RemovePartFromSequenceDialogController::RemovePartFromSequenceDialogController(U
saveController(NULL)
{
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17470484");
+ new HelpButton(this, ui->buttonBox, "18220344");
ui->buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Remove"));
ui->buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2Gui/src/util/SaveDocumentController.cpp b/src/corelibs/U2Gui/src/util/SaveDocumentController.cpp
index fd3fedb..3aaed9c 100644
--- a/src/corelibs/U2Gui/src/util/SaveDocumentController.cpp
+++ b/src/corelibs/U2Gui/src/util/SaveDocumentController.cpp
@@ -31,6 +31,7 @@
#include <U2Core/DocumentUtils.h>
#include <U2Core/FormatUtils.h>
#include <U2Core/GUrlUtils.h>
+#include <U2Core/L10n.h>
#include <U2Core/U2SafePoints.h>
#include <U2Gui/LastUsedDirHelper.h>
@@ -185,6 +186,15 @@ void SaveDocumentController::sl_fileDialogButtonClicked() {
void SaveDocumentController::sl_formatChanged(const QString &newFormat) {
currentFormat = newFormat;
+ if (conf.compressCheckbox != NULL) {
+ DocumentFormatRegistry* fr = AppContext::getDocumentFormatRegistry();
+ SAFE_POINT(fr != NULL, L10N::nullPointerError("DocumentFormatRegistry"), );
+ DocumentFormat* format = fr->getFormatById(formatsInfo.getIdByName(newFormat));
+ if (format != NULL) { // custom format names without DocumentFormat class can be added into the formats combobox (e.g. ExportCoverageDialog)
+ conf.compressCheckbox->setDisabled(format->checkFlags(DocumentFormatFlag_CannotBeCompressed));
+ }
+ }
+
if (!conf.fileNameEdit->text().isEmpty()) {
QString oldPath = conf.fileNameEdit->text();
cutGzExtension(oldPath);
@@ -202,7 +212,7 @@ void SaveDocumentController::sl_formatChanged(const QString &newFormat) {
}
void SaveDocumentController::sl_compressToggled(bool enable) {
- CHECK(NULL != conf.compressCheckbox, );
+ CHECK(NULL != conf.compressCheckbox && conf.compressCheckbox->isEnabled(), );
QString path = conf.fileNameEdit->text();
if (enable) {
addGzExtension(path);
@@ -246,7 +256,6 @@ void SaveDocumentController::initSimpleFormatInfo(const QList<DocumentFormatId>
fr->getFormatById(id)->getFormatName(),
fr->getFormatById(id)->getSupportedDocumentFileExtensions());
}
-
}
void SaveDocumentController::initFormatComboBox() {
@@ -278,7 +287,7 @@ bool SaveDocumentController::cutGzExtension(QString &path) const {
}
void SaveDocumentController::addGzExtension(QString &path) const {
- CHECK(NULL != conf.compressCheckbox && conf.compressCheckbox->isChecked(), );
+ CHECK(NULL != conf.compressCheckbox && conf.compressCheckbox->isChecked() && conf.compressCheckbox->isEnabled(), );
CHECK(!path.endsWith(".gz"), );
path += ".gz";
}
@@ -298,7 +307,7 @@ void SaveDocumentController::addFormatExtension(QString &path) const {
QString SaveDocumentController::prepareDefaultFileFilter() const {
QStringList extraExtensions;
- if (NULL != conf.compressCheckbox) {
+ if (NULL != conf.compressCheckbox && conf.compressCheckbox->isEnabled()) {
extraExtensions << ".gz";
}
@@ -312,7 +321,7 @@ QString SaveDocumentController::prepareFileFilter() const {
}
QStringList extraExtensions;
- if (NULL != conf.compressCheckbox) {
+ if (NULL != conf.compressCheckbox && conf.compressCheckbox->isEnabled()) {
extraExtensions << ".gz";
}
diff --git a/src/corelibs/U2Gui/src/util/ScriptEditorDialog.cpp b/src/corelibs/U2Gui/src/util/ScriptEditorDialog.cpp
index 4f4b723..27e8f3d 100644
--- a/src/corelibs/U2Gui/src/util/ScriptEditorDialog.cpp
+++ b/src/corelibs/U2Gui/src/util/ScriptEditorDialog.cpp
@@ -48,7 +48,7 @@ ScriptEditorDialog::ScriptEditorDialog(QWidget* w, const QString& roHeaderText,
: QDialog(w), ui(new Ui_ScriptEditorDialog())
{
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17470841");
+ new HelpButton(this, ui->buttonBox, "18220701");
ui->buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Done"));
ui->buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2Gui/src/util/SearchGenbankSequenceDialogController.cpp b/src/corelibs/U2Gui/src/util/SearchGenbankSequenceDialogController.cpp
index 19478fa..2c907d2 100644
--- a/src/corelibs/U2Gui/src/util/SearchGenbankSequenceDialogController.cpp
+++ b/src/corelibs/U2Gui/src/util/SearchGenbankSequenceDialogController.cpp
@@ -44,7 +44,7 @@ SearchGenbankSequenceDialogController::SearchGenbankSequenceDialogController(QWi
{
ui = new Ui_SearchGenbankSequenceDialog();
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17470456");
+ new HelpButton(this, ui->buttonBox, "18220316");
ui->buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Download"));
ui->buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Close"));
diff --git a/src/corelibs/U2Gui/src/util/shared_db/CommonImportOptionsDialog.cpp b/src/corelibs/U2Gui/src/util/shared_db/CommonImportOptionsDialog.cpp
index 1467e34..9f3e07c 100644
--- a/src/corelibs/U2Gui/src/util/shared_db/CommonImportOptionsDialog.cpp
+++ b/src/corelibs/U2Gui/src/util/shared_db/CommonImportOptionsDialog.cpp
@@ -30,7 +30,7 @@ CommonImportOptionsDialog::CommonImportOptionsDialog(const QString& baseFolder,
QDialog(parent)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470800");
+ new HelpButton(this, buttonBox, "18220660");
init(baseFolder, options);
}
diff --git a/src/corelibs/U2Gui/src/util/shared_db/EditConnectionDialog.cpp b/src/corelibs/U2Gui/src/util/shared_db/EditConnectionDialog.cpp
index 5607d17..6e53d1e 100644
--- a/src/corelibs/U2Gui/src/util/shared_db/EditConnectionDialog.cpp
+++ b/src/corelibs/U2Gui/src/util/shared_db/EditConnectionDialog.cpp
@@ -45,7 +45,7 @@ EditConnectionDialog::EditConnectionDialog(QWidget *parent, const QString &dbiUr
ui(new Ui_EditConnectionDialog)
{
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17470798");
+ new HelpButton(this, ui->buttonBox, "18220658");
adjustSize();
init(dbiUrl, connectionName, userName);
}
diff --git a/src/corelibs/U2Gui/src/util/shared_db/ImportToDatabaseDialog.cpp b/src/corelibs/U2Gui/src/util/shared_db/ImportToDatabaseDialog.cpp
index cf228e1..4048b7e 100644
--- a/src/corelibs/U2Gui/src/util/shared_db/ImportToDatabaseDialog.cpp
+++ b/src/corelibs/U2Gui/src/util/shared_db/ImportToDatabaseDialog.cpp
@@ -61,7 +61,7 @@ ImportToDatabaseDialog::ImportToDatabaseDialog(Document *dbConnection, const QSt
baseFolder(U2DbiUtils::makeFolderCanonical(defaultFolder))
{
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17470800");
+ new HelpButton(this, ui->buttonBox, "18220660");
init();
connectSignals();
updateState();
diff --git a/src/corelibs/U2Gui/src/util/shared_db/ItemToImportEditDialog.cpp b/src/corelibs/U2Gui/src/util/shared_db/ItemToImportEditDialog.cpp
index 24f33e5..8bb561e 100644
--- a/src/corelibs/U2Gui/src/util/shared_db/ItemToImportEditDialog.cpp
+++ b/src/corelibs/U2Gui/src/util/shared_db/ItemToImportEditDialog.cpp
@@ -32,7 +32,7 @@ ItemToImportEditDialog::ItemToImportEditDialog(const QString &item, const QStrin
ui(new Ui_ItemToImportEditDialog)
{
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17470800");
+ new HelpButton(this, ui->buttonBox, "18220660");
init(item, folder, options);
}
diff --git a/src/corelibs/U2Gui/src/util/shared_db/SharedConnectionsDialog.cpp b/src/corelibs/U2Gui/src/util/shared_db/SharedConnectionsDialog.cpp
index d11d9ac..3a82a76 100644
--- a/src/corelibs/U2Gui/src/util/shared_db/SharedConnectionsDialog.cpp
+++ b/src/corelibs/U2Gui/src/util/shared_db/SharedConnectionsDialog.cpp
@@ -84,7 +84,7 @@ SharedConnectionsDialog::SharedConnectionsDialog(QWidget *parent) :
ui(new Ui_SharedConnectionsDialog)
{
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17470798");
+ new HelpButton(this, ui->buttonBox, "18220658");
init();
connectSignals();
@@ -506,7 +506,7 @@ bool SharedConnectionsDialog::checkDbShouldBeUpgraded(const U2DbiRef &ref) {
if (upgradeDatabase) {
QObjectScopedPointer<QMessageBox> question = new QMessageBox(QMessageBox::Question, tr(DATABASE_UPGRADE_TITLE), tr(DATABASE_UPGRADE_TEXT), QMessageBox::Ok | QMessageBox::Cancel| QMessageBox::Help, this);
question->button(QMessageBox::Ok)->setText(tr("Upgrade"));
- HelpButton(question.data(), question->button(QMessageBox::Help), "17470798");
+ HelpButton(question.data(), question->button(QMessageBox::Help), "18220658");
question->setDefaultButton(QMessageBox::Cancel);
const int dialogResult = question->exec();
CHECK(!question.isNull(), true);
diff --git a/src/corelibs/U2Gui/transl/english.ts b/src/corelibs/U2Gui/transl/english.ts
index 442ff31..eaa76fb 100644
--- a/src/corelibs/U2Gui/transl/english.ts
+++ b/src/corelibs/U2Gui/transl/english.ts
@@ -795,6 +795,11 @@
<translation>...</translation>
</message>
<message>
+ <location filename="../src/util/ExportImageDialog.ui" line="41"/>
+ <source>Export settings</source>
+ <translation type="unfinished"></translation>
+ </message>
+ <message>
<location filename="../src/util/ExportImageDialog.ui" line="56"/>
<source>Export to file</source>
<translation>Export to file</translation>
@@ -1742,8 +1747,8 @@ To append data to existing file press 'Append'.</translation>
</message>
<message>
<location filename="../src/util/CreateAnnotationWidgetController.cpp" line="252"/>
- <source>Illegal annotation name</source>
- <translation>Illegal annotation name</translation>
+ <source>Illegal annotation name! </source>
+ <translation>Illegal annotation name! </translation>
</message>
<message>
<location filename="../src/util/CreateAnnotationWidgetController.cpp" line="257"/>
@@ -2164,7 +2169,12 @@ complement(5..15)</translation>
<translation>Saving image to '%1'...</translation>
</message>
<message>
- <location filename="../src/util/ExportImageDialog.cpp" line="226"/>
+ <location filename="../src/util/ExportImageDialog.cpp" line="147"/>
+ <source>Cancel</source>
+ <translation>Cancel</translation>
+ </message>
+ <message>
+ <location filename="../src/util/ExportImageDialog.cpp" line="227"/>
<source>Save Image As</source>
<translation>Save Image As</translation>
</message>
diff --git a/src/corelibs/U2Gui/transl/russian.ts b/src/corelibs/U2Gui/transl/russian.ts
index 6ad19e2..a840380 100644
--- a/src/corelibs/U2Gui/transl/russian.ts
+++ b/src/corelibs/U2Gui/transl/russian.ts
@@ -795,6 +795,11 @@
<translation>...</translation>
</message>
<message>
+ <location filename="../src/util/ExportImageDialog.ui" line="41"/>
+ <source>Export settings</source>
+ <translation>Настройки экспорта</translation>
+ </message>
+ <message>
<location filename="../src/util/ExportImageDialog.ui" line="56"/>
<source>Export to file</source>
<translation>Экспортировать в файл</translation>
@@ -1742,8 +1747,8 @@ To append data to existing file press 'Append'.</source>
</message>
<message>
<location filename="../src/util/CreateAnnotationWidgetController.cpp" line="252"/>
- <source>Illegal annotation name</source>
- <translation>Нелегальные символы в имени аннотации. Допустимы только цифры, буквы и подчёркивания</translation>
+ <source>Illegal annotation name! </source>
+ <translation>Нелегальные символы в имени аннотации. Допустимы только цифры, буквы и подчёркивания! </translation>
</message>
<message>
<location filename="../src/util/CreateAnnotationWidgetController.cpp" line="257"/>
@@ -2164,7 +2169,12 @@ complement(5..15)</translation>
<translation>Сохранение изображения в '%1'...</translation>
</message>
<message>
- <location filename="../src/util/ExportImageDialog.cpp" line="226"/>
+ <location filename="../src/util/ExportImageDialog.cpp" line="147"/>
+ <source>Cancel</source>
+ <translation>Отмена</translation>
+ </message>
+ <message>
+ <location filename="../src/util/ExportImageDialog.cpp" line="227"/>
<source>Save Image As</source>
<translation>Сохранить изображение как</translation>
</message>
@@ -3247,7 +3257,7 @@ check connection settings</translation>
<message>
<location filename="../src/ToolsMenu.cpp" line="178"/>
<source>Primer</source>
- <translation>Подбор праймеров</translation>
+ <translation>Олигонуклеотиды</translation>
</message>
<message>
<location filename="../src/ToolsMenu.cpp" line="184"/>
diff --git a/src/corelibs/U2Lang/src/library/BaseOneOneWorker.cpp b/src/corelibs/U2Lang/src/library/BaseOneOneWorker.cpp
index 0bfb15e..7303a15 100644
--- a/src/corelibs/U2Lang/src/library/BaseOneOneWorker.cpp
+++ b/src/corelibs/U2Lang/src/library/BaseOneOneWorker.cpp
@@ -36,6 +36,8 @@ BaseOneOneWorker::BaseOneOneWorker(Actor *a, bool autoTransitBus, const QString
void BaseOneOneWorker::init() {
input = ports.value(inPortId);
output = ports.value(outPortId);
+ SAFE_POINT(NULL != input, QString("Input port '%1' is NULL").arg(inPortId), );
+ SAFE_POINT(NULL != output, QString("Output port '%1' is NULL").arg(outPortId), );
}
Task * BaseOneOneWorker::tick() {
diff --git a/src/corelibs/U2Lang/src/model/ConfigurationEditor.cpp b/src/corelibs/U2Lang/src/model/ConfigurationEditor.cpp
index db310d0..865b3e7 100644
--- a/src/corelibs/U2Lang/src/model/ConfigurationEditor.cpp
+++ b/src/corelibs/U2Lang/src/model/ConfigurationEditor.cpp
@@ -111,6 +111,8 @@ void PropertyDelegate::setSchemaConfig(SchemaConfig *value) {
schemaConfig = value;
}
+const QString DelegateTags::PLACEHOLDER_TEXT = "placeholder_text";
+
DelegateTags::DelegateTags(QObject *parent)
: QObject(parent)
{
diff --git a/src/corelibs/U2Lang/src/model/ConfigurationEditor.h b/src/corelibs/U2Lang/src/model/ConfigurationEditor.h
index 1d8b830..33ad851 100644
--- a/src/corelibs/U2Lang/src/model/ConfigurationEditor.h
+++ b/src/corelibs/U2Lang/src/model/ConfigurationEditor.h
@@ -165,6 +165,9 @@ public:
static QStringList getStringList(const DelegateTags *tags, const QString &name);
const DelegateTags &operator = (const DelegateTags &other);
+
+ static const QString PLACEHOLDER_TEXT;
+
private:
QVariantMap tags;
};
diff --git a/src/corelibs/U2Private/src/DocumentFormatRegistryImpl.cpp b/src/corelibs/U2Private/src/DocumentFormatRegistryImpl.cpp
index 53327e1..3cefff7 100644
--- a/src/corelibs/U2Private/src/DocumentFormatRegistryImpl.cpp
+++ b/src/corelibs/U2Private/src/DocumentFormatRegistryImpl.cpp
@@ -221,7 +221,7 @@ void DocumentFormatRegistryImpl::init() {
AppContext::getDbiRegistry()->registerDbiFactory(new SQLiteDbiFactory());
AppContext::getDbiRegistry()->registerDbiFactory(new MysqlDbiFactory());
- DocumentFormatFlags flags(DocumentFormatFlag_SupportWriting);
+ DocumentFormatFlags flags(DocumentFormatFlag_SupportWriting | DocumentFormatFlag_CannotBeCompressed);
DbiDocumentFormat* sdbi = new DbiDocumentFormat(SQLiteDbiFactory::ID, BaseDocumentFormats::UGENEDB, tr("UGENE Database"), QStringList()<<"ugenedb", flags);
registerFormat(sdbi);
}
diff --git a/src/corelibs/U2Private/transl/english.ts b/src/corelibs/U2Private/transl/english.ts
index 1797d39..afb9bcf 100644
--- a/src/corelibs/U2Private/transl/english.ts
+++ b/src/corelibs/U2Private/transl/english.ts
@@ -40,37 +40,47 @@
<context>
<name>U2::AddPluginTask</name>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="489"/>
+ <location filename="../src/PluginSupportImpl.cpp" line="419"/>
<source>Add plugin task: %1</source>
<translation>Add plugin task: %1</translation>
</message>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="496"/>
+ <location filename="../src/PluginSupportImpl.cpp" line="434"/>
<source>Plugin is already loaded: %1</source>
<translation>Plugin is already loaded: %1</translation>
</message>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="504"/>
+ <location filename="../src/PluginSupportImpl.cpp" line="442"/>
<source>Plugin %1 depends on %2 which is not loaded</source>
<translation>Plugin %1 depends on %2 which is not loaded</translation>
</message>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="508"/>
+ <location filename="../src/PluginSupportImpl.cpp" line="446"/>
<source>Plugin %1 depends on %2 which is available, but the version is too old</source>
<translation>Plugin %1 depends on %2 which is available, but the version is too old</translation>
</message>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="519"/>
+ <location filename="../src/PluginSupportImpl.cpp" line="457"/>
<source>Plugin loading error: %1, Error string %2</source>
<translation>Plugin loading error: %1, Error string %2</translation>
</message>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="526"/>
+ <location filename="../src/PluginSupportImpl.cpp" line="462"/>
+ <source>Settings is NULL</source>
+ <translation type="unfinished"></translation>
+ </message>
+ <message>
+ <location filename="../src/PluginSupportImpl.cpp" line="490"/>
+ <source>Plugin loading error: %1. Verification failed.</source>
+ <translation type="unfinished"></translation>
+ </message>
+ <message>
+ <location filename="../src/PluginSupportImpl.cpp" line="513"/>
<source>Plugin initialization routine was not found: %1</source>
<translation>Plugin initialization routine was not found: %1</translation>
</message>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="532"/>
+ <location filename="../src/PluginSupportImpl.cpp" line="519"/>
<source>Plugin initialization failed: %1</source>
<translation>Plugin initialization failed: %1</translation>
</message>
@@ -202,22 +212,21 @@ By default, loglevel="ERROR".</translation>
<context>
<name>U2::LoadAllPluginsTask</name>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="112"/>
+ <location filename="../src/PluginSupportImpl.cpp" line="83"/>
<source>Loading start up plugins</source>
<translation>Loading start up plugins</translation>
</message>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="163"/>
<source>Problem occurred loading the OpenCL driver. Please try to update drivers if you're going to make calculations on your video card. For details see this page: <a href="%1">%1</a></source>
- <translation>Problem occurred loading the OpenCL driver. Please try to update drivers if you're going to make calculations on your video card. For details see this page: <a href="%1">%1</a></translation>
+ <translation type="vanished">Problem occurred loading the OpenCL driver. Please try to update drivers if you're going to make calculations on your video card. For details see this page: <a href="%1">%1</a></translation>
</message>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="174"/>
+ <location filename="../src/PluginSupportImpl.cpp" line="115"/>
<source>File not found: %1</source>
<translation>File not found: %1</translation>
</message>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="179"/>
+ <location filename="../src/PluginSupportImpl.cpp" line="120"/>
<source>Invalid file format: %1</source>
<translation>Invalid file format: %1</translation>
</message>
@@ -454,7 +463,7 @@ By default, loglevel="ERROR".</translation>
<context>
<name>U2::VerifyPluginTask</name>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="560"/>
+ <location filename="../src/PluginSupportImpl.cpp" line="548"/>
<source>Verify plugin task: %1</source>
<translation>Verify plugin task: %1</translation>
</message>
diff --git a/src/corelibs/U2Private/transl/russian.ts b/src/corelibs/U2Private/transl/russian.ts
index 886ea8c..1fa7262 100644
--- a/src/corelibs/U2Private/transl/russian.ts
+++ b/src/corelibs/U2Private/transl/russian.ts
@@ -39,37 +39,47 @@
<context>
<name>U2::AddPluginTask</name>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="489"/>
+ <location filename="../src/PluginSupportImpl.cpp" line="419"/>
<source>Add plugin task: %1</source>
<translation>Загрузка внешнего модуля: %1</translation>
</message>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="496"/>
+ <location filename="../src/PluginSupportImpl.cpp" line="434"/>
<source>Plugin is already loaded: %1</source>
<translation>Модуль уже загружен: %1</translation>
</message>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="504"/>
+ <location filename="../src/PluginSupportImpl.cpp" line="442"/>
<source>Plugin %1 depends on %2 which is not loaded</source>
<translation>Модуль %1 зависит от модуля %2, который не был загружен</translation>
</message>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="508"/>
+ <location filename="../src/PluginSupportImpl.cpp" line="446"/>
<source>Plugin %1 depends on %2 which is available, but the version is too old</source>
<translation>Модуль %1 зависит от модуля %2, который доступен, но версия которого устарела</translation>
</message>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="519"/>
+ <location filename="../src/PluginSupportImpl.cpp" line="457"/>
<source>Plugin loading error: %1, Error string %2</source>
<translation>Ошибка загрузки модуля: %1, строка %2</translation>
</message>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="526"/>
+ <location filename="../src/PluginSupportImpl.cpp" line="462"/>
+ <source>Settings is NULL</source>
+ <translation type="unfinished"></translation>
+ </message>
+ <message>
+ <location filename="../src/PluginSupportImpl.cpp" line="490"/>
+ <source>Plugin loading error: %1. Verification failed.</source>
+ <translation type="unfinished"></translation>
+ </message>
+ <message>
+ <location filename="../src/PluginSupportImpl.cpp" line="513"/>
<source>Plugin initialization routine was not found: %1</source>
<translation>Не найдена входная функция библиотеки модуля: %1</translation>
</message>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="532"/>
+ <location filename="../src/PluginSupportImpl.cpp" line="519"/>
<source>Plugin initialization failed: %1</source>
<translation>Ошибка инициализации модуля: %1</translation>
</message>
@@ -201,22 +211,21 @@ By default, loglevel="ERROR".</source>
<context>
<name>U2::LoadAllPluginsTask</name>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="112"/>
+ <location filename="../src/PluginSupportImpl.cpp" line="83"/>
<source>Loading start up plugins</source>
<translation>Загрузка подключаемых модулей</translation>
</message>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="163"/>
<source>Problem occurred loading the OpenCL driver. Please try to update drivers if you're going to make calculations on your video card. For details see this page: <a href="%1">%1</a></source>
- <translation>Problem occurred loading the OpenCL driver. Please try to update drivers if you're going to make calculations on your video card. For details see this page: <a href="%1">%1</a></translation>
+ <translation type="vanished">Problem occurred loading the OpenCL driver. Please try to update drivers if you're going to make calculations on your video card. For details see this page: <a href="%1">%1</a></translation>
</message>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="174"/>
+ <location filename="../src/PluginSupportImpl.cpp" line="115"/>
<source>File not found: %1</source>
<translation>Не найден внешний модуль: %1</translation>
</message>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="179"/>
+ <location filename="../src/PluginSupportImpl.cpp" line="120"/>
<source>Invalid file format: %1</source>
<translation>Некорректный файл библиотеки: %1</translation>
</message>
@@ -453,7 +462,7 @@ By default, loglevel="ERROR".</source>
<context>
<name>U2::VerifyPluginTask</name>
<message>
- <location filename="../src/PluginSupportImpl.cpp" line="560"/>
+ <location filename="../src/PluginSupportImpl.cpp" line="548"/>
<source>Verify plugin task: %1</source>
<translation>Задача верификации модуля: %1</translation>
</message>
diff --git a/src/corelibs/U2View/src/ov_assembly/AssemblyBrowser.cpp b/src/corelibs/U2View/src/ov_assembly/AssemblyBrowser.cpp
index 018e4d4..b551658 100644
--- a/src/corelibs/U2View/src/ov_assembly/AssemblyBrowser.cpp
+++ b/src/corelibs/U2View/src/ov_assembly/AssemblyBrowser.cpp
@@ -376,7 +376,7 @@ void AssemblyBrowser::setGlobalCoverageInfo(CoverageInfo newInfo) {
if(model->hasCachedCoverageStat()) {
U2OpStatus2Log status;
U2AssemblyCoverageStat coverageStat = model->getCoverageStat(status);
- if(!status.isCoR() && coverageStat.coverage.size() > newInfo.coverageInfo.size()) {
+ if(!status.isCoR() && coverageStat.coverage->size() > newInfo.coverageInfo.size()) {
newInfo.coverageInfo = U2AssemblyUtils::coverageStatToVector(coverageStat);
newInfo.updateStats();
}
@@ -414,9 +414,9 @@ qint64 AssemblyBrowser::getCoverageAtPos(qint64 pos) {
} else {
U2OpStatus2Log status;
U2AssemblyCoverageStat coverageStat;
- coverageStat.coverage.resize(1);
+ coverageStat.coverage->resize(1);
model->calculateCoverageStat(U2Region(pos, 1), coverageStat, status);
- return coverageStat.coverage.first().maxValue;
+ return coverageStat.coverage->first().maxValue;
}
}
diff --git a/src/corelibs/U2View/src/ov_assembly/AssemblyInfoWidget.cpp b/src/corelibs/U2View/src/ov_assembly/AssemblyInfoWidget.cpp
index e8c606b..ba097fe 100644
--- a/src/corelibs/U2View/src/ov_assembly/AssemblyInfoWidget.cpp
+++ b/src/corelibs/U2View/src/ov_assembly/AssemblyInfoWidget.cpp
@@ -118,7 +118,7 @@ AssemblyInfoWidget::AssemblyInfoWidget(AssemblyBrowser *browser, QWidget *p)
const QString AssemblyInfoWidgetFactory::GROUP_ID = "OP_ASS_INFO";
const QString AssemblyInfoWidgetFactory::GROUP_ICON_STR = ":core/images/chart_bar.png";
const QString AssemblyInfoWidgetFactory::GROUP_TITLE = QString(QObject::tr("Assembly Statistics"));
-const QString AssemblyInfoWidgetFactory::GROUP_DOC_PAGE = "17470627";
+const QString AssemblyInfoWidgetFactory::GROUP_DOC_PAGE = "18220487";
AssemblyInfoWidgetFactory::AssemblyInfoWidgetFactory()
diff --git a/src/corelibs/U2View/src/ov_assembly/AssemblyModel.cpp b/src/corelibs/U2View/src/ov_assembly/AssemblyModel.cpp
index fdd9d20..6ac3360 100644
--- a/src/corelibs/U2View/src/ov_assembly/AssemblyModel.cpp
+++ b/src/corelibs/U2View/src/ov_assembly/AssemblyModel.cpp
@@ -122,7 +122,7 @@ void AssemblyModel::calculateCoverageStat(const U2Region & r, U2AssemblyCoverage
}
bool AssemblyModel::hasCachedCoverageStat() {
- if(!cachedCoverageStat.coverage.isEmpty()) {
+ if(!cachedCoverageStat.coverage->isEmpty()) {
return true;
}
U2AttributeDbi * attributeDbi = dbiHandle.dbi->getAttributeDbi();
@@ -140,21 +140,28 @@ bool AssemblyModel::hasCachedCoverageStat() {
const U2AssemblyCoverageStat &AssemblyModel::getCoverageStat(U2OpStatus & os) {
QMutexLocker mutexLocker(&mutex);
Q_UNUSED(mutexLocker);
- if(cachedCoverageStat.coverage.isEmpty()) {
+ if(cachedCoverageStat.coverage->isEmpty()) {
U2AttributeDbi * attributeDbi = dbiHandle.dbi->getAttributeDbi();
if(NULL != attributeDbi) {
U2ByteArrayAttribute attr = U2AttributeUtils::findByteArrayAttribute(attributeDbi, assembly.id, COVERAGE_STAT_ATTRIBUTE_NAME, os);
if(!os.isCoR()) {
+ /*
if(attr.hasValidId()) {
// TODO: check version
- U2AssemblyUtils::deserializeCoverageStat(attr.value, cachedCoverageStat, os);
+ U2AssemblyUtils::deserializeCoverageStat(attr.value, *cachedCoverageStat, os);
} else {
+ */
qint64 length = getModelLength(os);
if(!os.isCoR()) {
- static const qint64 MAX_COVERAGE_CACHE_SIZE = 1000*1000;
- int coverageCacheSize = (int)qMin(MAX_COVERAGE_CACHE_SIZE, length);
- cachedCoverageStat.coverage.resize(coverageCacheSize);
- calculateCoverageStat(U2Region(0, length), cachedCoverageStat, os);
+ QScopedPointer<QVector<CoveragePerBaseInfo> > results(new QVector<CoveragePerBaseInfo>(length));
+ cachedCoverageStat.coverage->clear();
+ cachedCoverageStat.coverage->reserve(length);
+ U2AssemblyUtils::calculateCoveragePerBase(getDbiConnection().dbi->getDbiRef(), getAssembly().id, U2Region(0, length), results.data(), os);
+ for (int i = 0; i < results->size(); i++) {
+ cachedCoverageStat.coverage->append(U2Range<int>(results->at(i).coverage, results->at(i).coverage));
+ }
+
+ /*
if(!os.isCoR()) {
U2ByteArrayAttribute attribute;
attribute.objectId = assembly.id;
@@ -165,8 +172,9 @@ const U2AssemblyCoverageStat &AssemblyModel::getCoverageStat(U2OpStatus & os) {
attributeDbi->createByteArrayAttribute(attribute, opStatus);
LOG_OP(opStatus);
}
+ */
}
- }
+ //}
}
} else {
os.setError("Attribute DBI is not supported");
diff --git a/src/corelibs/U2View/src/ov_assembly/AssemblyNavigationWidget.cpp b/src/corelibs/U2View/src/ov_assembly/AssemblyNavigationWidget.cpp
index 2af3267..0cd75c3 100644
--- a/src/corelibs/U2View/src/ov_assembly/AssemblyNavigationWidget.cpp
+++ b/src/corelibs/U2View/src/ov_assembly/AssemblyNavigationWidget.cpp
@@ -132,7 +132,7 @@ void CoveredRegionsLabel::sl_updateContent() {
const QString AssemblyNavigationWidgetFactory::GROUP_ID = "OP_ASS_NAVIGATION";
const QString AssemblyNavigationWidgetFactory::GROUP_ICON_STR = ":core/images/goto.png";
const QString AssemblyNavigationWidgetFactory::GROUP_TITLE = QString(QObject::tr("Navigation"));
-const QString AssemblyNavigationWidgetFactory::GROUP_DOC_PAGE = "17470625";
+const QString AssemblyNavigationWidgetFactory::GROUP_DOC_PAGE = "18220485";
AssemblyNavigationWidgetFactory::AssemblyNavigationWidgetFactory() {
objectViewOfWidget = ObjViewType_AssemblyBrowser;
diff --git a/src/corelibs/U2View/src/ov_assembly/AssemblyReadsArea.cpp b/src/corelibs/U2View/src/ov_assembly/AssemblyReadsArea.cpp
index a0f1e81..8ee8fa7 100644
--- a/src/corelibs/U2View/src/ov_assembly/AssemblyReadsArea.cpp
+++ b/src/corelibs/U2View/src/ov_assembly/AssemblyReadsArea.cpp
@@ -24,6 +24,7 @@
#include <QApplication>
#include <QCursor>
+#include <QMessageBox>
#include <QPainter>
#include <QResizeEvent>
#include <QVBoxLayout>
@@ -922,6 +923,9 @@ void AssemblyReadsArea::sl_onExportRead() {
void AssemblyReadsArea::sl_onExportReadsOnScreen() {
if(!cachedReads.data.isEmpty()) {
exportReads(cachedReads.data);
+ } else {
+ QMessageBox::warning(QApplication::activeWindow(), AssemblyReadsArea::tr("Export visible reads as sequence."),
+ AssemblyReadsArea::tr("There are no reads in the current area. Nothing to export!"), QMessageBox::Ok);
}
}
diff --git a/src/corelibs/U2View/src/ov_assembly/AssemblySettingsWidget.cpp b/src/corelibs/U2View/src/ov_assembly/AssemblySettingsWidget.cpp
index 4d7fe8e..7d7e192 100644
--- a/src/corelibs/U2View/src/ov_assembly/AssemblySettingsWidget.cpp
+++ b/src/corelibs/U2View/src/ov_assembly/AssemblySettingsWidget.cpp
@@ -231,7 +231,7 @@ QWidget * AssemblySettingsWidget::createRulerSettings() {
const QString AssemblySettingsWidgetFactory::GROUP_ID = "OP_ASS_SETTINGS";
const QString AssemblySettingsWidgetFactory::GROUP_ICON_STR = ":core/images/settings2.png";
const QString AssemblySettingsWidgetFactory::GROUP_TITLE = QString(QObject::tr("Assembly Browser Settings"));
-const QString AssemblySettingsWidgetFactory::GROUP_DOC_PAGE = "17470626";
+const QString AssemblySettingsWidgetFactory::GROUP_DOC_PAGE = "18220486";
AssemblySettingsWidgetFactory::AssemblySettingsWidgetFactory()
diff --git a/src/corelibs/U2View/src/ov_assembly/CalculateCoveragePerBaseTask.cpp b/src/corelibs/U2View/src/ov_assembly/CalculateCoveragePerBaseTask.cpp
index 49cc846..511591f 100644
--- a/src/corelibs/U2View/src/ov_assembly/CalculateCoveragePerBaseTask.cpp
+++ b/src/corelibs/U2View/src/ov_assembly/CalculateCoveragePerBaseTask.cpp
@@ -56,19 +56,7 @@ CalculateCoveragePerBaseOnRegionTask::~CalculateCoveragePerBaseOnRegionTask() {
}
void CalculateCoveragePerBaseOnRegionTask::run() {
- DbiConnection con(dbiRef, stateInfo);
- CHECK_OP(stateInfo, );
- U2AssemblyDbi *assemblyDbi = con.dbi->getAssemblyDbi();
- SAFE_POINT_EXT(NULL != assemblyDbi, setError(tr("Assembly DBI is NULL")), );
-
- results->resize(region.length);
-
- QScopedPointer<U2DbiIterator<U2AssemblyRead> > readsIterator(assemblyDbi->getReads(assemblyId, region, stateInfo));
- while (readsIterator->hasNext()) {
- const U2AssemblyRead read = readsIterator->next();
- processRead(read);
- CHECK_OP(stateInfo, );
- }
+ U2AssemblyUtils::calculateCoveragePerBase(dbiRef, assemblyId, region, results, stateInfo);
}
const U2Region &CalculateCoveragePerBaseOnRegionTask::getRegion() const {
@@ -81,71 +69,6 @@ QVector<CoveragePerBaseInfo> *CalculateCoveragePerBaseOnRegionTask::takeResult()
return result;
}
-void CalculateCoveragePerBaseOnRegionTask::processRead(const U2AssemblyRead &read) {
- const qint64 startPos = qMax(read->leftmostPos, region.startPos);
- const qint64 endPos = qMin(read->leftmostPos + read->effectiveLen, region.endPos());
- const U2Region regionToProcess = U2Region(startPos, endPos - startPos);
-
- // we have used effective length of the read, so insertions/deletions are already taken into account
- // cigarString can be longer than needed
- QByteArray cigarString;
- foreach (const U2CigarToken &cigar, read->cigar) {
- cigarString += QByteArray(cigar.count, U2AssemblyUtils::cigar2Char(cigar.op));
- }
-
- if (read->leftmostPos < regionToProcess.startPos) {
- cigarString = cigarString.mid(regionToProcess.startPos - read->leftmostPos);
- }
-
- for (int positionOffset = 0, cigarOffset = 0, deletionsCount = 0, insertionsCount = 0; regionToProcess.startPos + positionOffset < regionToProcess.endPos(); positionOffset++) {
- char currentBase = 'N';
- CoveragePerBaseInfo &info = (*results)[regionToProcess.startPos + positionOffset - region.startPos];
- const U2CigarOp cigarOp = nextCigarOp(cigarString, cigarOffset, insertionsCount);
- CHECK_OP(stateInfo, );
-
- switch(cigarOp) {
- case U2CigarOp_I:
- case U2CigarOp_S:
- // skip the insertion
- continue;
- case U2CigarOp_D:
- // skip the deletion
- deletionsCount++;
- continue;
- case U2CigarOp_N:
- // skip the deletion
- deletionsCount++;
- break;
- default:
- currentBase = read->readSequence[positionOffset - deletionsCount + insertionsCount];
- break;
- }
- info.basesCount[currentBase] = info.basesCount[currentBase] + 1;
- info.coverage++;
- }
-}
-
-U2CigarOp CalculateCoveragePerBaseOnRegionTask::nextCigarOp(const QByteArray &cigarString, int &index, int &insertionsCount) {
- QString errString;
- U2CigarOp cigarOp = U2CigarOp_Invalid;
-
- do {
- SAFE_POINT_EXT(index < cigarString.length(), setError(tr("Cigar string: out of bounds")), U2CigarOp_Invalid);
- cigarOp = U2AssemblyUtils::char2Cigar(cigarString[index], errString);
- if (Q_UNLIKELY(!errString.isEmpty() && !hasError())) {
- setError(errString);
- }
- CHECK_OP(stateInfo, U2CigarOp_Invalid);
- index++;
-
- if (U2CigarOp_I == cigarOp || U2CigarOp_S == cigarOp) {
- insertionsCount++;
- }
- } while (U2CigarOp_I == cigarOp || U2CigarOp_S == cigarOp || U2CigarOp_P == cigarOp);
-
- return cigarOp;
-}
-
CalculateCoveragePerBaseTask::CalculateCoveragePerBaseTask(const U2DbiRef &dbiRef, const U2DataId &assemblyId) :
Task(tr("Calculate coverage per base for assembly %1"), TaskFlags_NR_FOSE_COSC),
dbiRef(dbiRef),
diff --git a/src/corelibs/U2View/src/ov_assembly/CalculateCoveragePerBaseTask.h b/src/corelibs/U2View/src/ov_assembly/CalculateCoveragePerBaseTask.h
index 26c9731..478a2d9 100644
--- a/src/corelibs/U2View/src/ov_assembly/CalculateCoveragePerBaseTask.h
+++ b/src/corelibs/U2View/src/ov_assembly/CalculateCoveragePerBaseTask.h
@@ -25,18 +25,10 @@
#include <U2Core/U2Assembly.h>
#include <U2Core/Task.h>
#include <U2Core/U2Type.h>
+#include <U2Core/U2AssemblyUtils.h>
namespace U2 {
-class CoveragePerBaseInfo {
-public:
- CoveragePerBaseInfo() :
- coverage(0) {}
-
- int coverage;
- QMap<char, int> basesCount;
-};
-
class CalculateCoveragePerBaseOnRegionTask : public Task {
Q_OBJECT
public:
@@ -49,9 +41,6 @@ public:
QVector<CoveragePerBaseInfo> *takeResult();
private:
- void processRead(const U2AssemblyRead &read);
- U2CigarOp nextCigarOp(const QByteArray &cigarString, int &index, int &insertionsCount);
-
const U2DbiRef dbiRef;
const U2DataId assemblyId;
const U2Region region;
diff --git a/src/corelibs/U2View/src/ov_assembly/CoverageInfo.cpp b/src/corelibs/U2View/src/ov_assembly/CoverageInfo.cpp
index 54b73c4..10a9a74 100644
--- a/src/corelibs/U2View/src/ov_assembly/CoverageInfo.cpp
+++ b/src/corelibs/U2View/src/ov_assembly/CoverageInfo.cpp
@@ -19,10 +19,11 @@
* MA 02110-1301, USA.
*/
+#include "AssemblyModel.h"
#include "CoverageInfo.h"
#include <U2Core/U2OpStatusUtils.h>
-#include "AssemblyModel.h"
+#include <U2Core/U2SafePoints.h>
#include <algorithm>
#include <limits>
@@ -44,13 +45,10 @@ void CoverageInfo::updateStats() {
}
CalcCoverageInfoTask::CalcCoverageInfoTask(const CalcCoverageInfoTaskSettings & settings_) :
-BackgroundTask<CoverageInfo>("Calculate assembly coverage", TaskFlag_None), settings(settings_)
-{
- tpm = Progress_Manual;
-};
+BackgroundTask<CoverageInfo>("Calculate assembly coverage", TaskFlags_NR_FOSE_COSC), settings(settings_), calculateTask(NULL) {}
-void CalcCoverageInfoTask::run() {
+void CalcCoverageInfoTask::prepare() {
U2AssemblyCoverageStat cachedCoverageStat;
{
cachedCoverageStat = settings.model->getCoverageStat(stateInfo);
@@ -69,35 +67,41 @@ void CalcCoverageInfoTask::run() {
}
}
double basesPerRegion = (double)settings.visibleRange.length/settings.regions;
- double coverageStatBasesPerRegion = (double)modelLength/cachedCoverageStat.coverage.size();
+ double coverageStatBasesPerRegion = (double)modelLength/cachedCoverageStat.coverage->size();
result.coverageInfo.resize(settings.regions);
result.region = settings.visibleRange;
- if(cachedCoverageStat.coverage.isEmpty() || (coverageStatBasesPerRegion > basesPerRegion)) {
- U2AssemblyCoverageStat coverageStat;
- coverageStat.coverage.resize(settings.regions);
- {
- settings.model->calculateCoverageStat(settings.visibleRange, coverageStat, stateInfo);
- if(stateInfo.isCoR()) {
- return;
- }
- }
- assert(coverageStat.coverage.size() == settings.regions);
- for(int regionIndex = 0;regionIndex < settings.regions;regionIndex++) {
- result.coverageInfo[regionIndex] = coverageStat.coverage[regionIndex].maxValue;
- }
+ if(cachedCoverageStat.coverage->isEmpty() || (coverageStatBasesPerRegion > basesPerRegion)) {
+ calculateTask = new CalculateCoveragePerBaseOnRegionTask(settings.model->getDbiConnection().dbi->getDbiRef(), settings.model->getAssembly().id, settings.visibleRange);
+ addSubTask(calculateTask);
} else {
for(int regionIndex = 0;regionIndex < settings.regions;regionIndex++) {
int startPosition = qRound((settings.visibleRange.startPos + basesPerRegion*regionIndex)/coverageStatBasesPerRegion);
int endPosition = qRound((settings.visibleRange.startPos + basesPerRegion*(regionIndex + 1))/coverageStatBasesPerRegion);
result.coverageInfo[regionIndex] = 0;
for(int i = startPosition;i < endPosition;i++) {
- result.coverageInfo[regionIndex] = std::max(result.coverageInfo[regionIndex], (qint64)cachedCoverageStat.coverage[i].maxValue);
+ result.coverageInfo[regionIndex] = std::max(result.coverageInfo[regionIndex], (qint64)cachedCoverageStat.coverage->at(i).maxValue);
}
+ result.updateStats();
}
}
- result.updateStats();
+}
+
+QList<Task*> CalcCoverageInfoTask::onSubTaskFinished(Task* subTask) {
+ QList<Task*> tasks;
+ CHECK(!subTask->hasError(), tasks);
+ CHECK(!hasError(), tasks);
+ if (subTask == calculateTask) {
+ QVector<CoveragePerBaseInfo> *info = calculateTask->takeResult();
+ for (int i = 0; i < info->size(); i++) {
+ result.coverageInfo[i] = info->at(i).coverage;
+ }
+
+ delete info;
+ result.updateStats();
+ }
+ return tasks;
}
}
diff --git a/src/corelibs/U2View/src/ov_assembly/CoverageInfo.h b/src/corelibs/U2View/src/ov_assembly/CoverageInfo.h
index 2aae9f7..ef962d7 100644
--- a/src/corelibs/U2View/src/ov_assembly/CoverageInfo.h
+++ b/src/corelibs/U2View/src/ov_assembly/CoverageInfo.h
@@ -23,6 +23,7 @@
#define __COVERAGE_INFO_H__
#include "AssemblyModel.h"
+#include "CalculateCoveragePerBaseTask.h"
#include <U2Core/BackgroundTaskRunner.h>
#include <U2Core/U2Region.h>
@@ -57,9 +58,11 @@ struct CalcCoverageInfoTaskSettings {
class CalcCoverageInfoTask : public BackgroundTask<CoverageInfo> {
public:
CalcCoverageInfoTask(const CalcCoverageInfoTaskSettings & settings);
- virtual void run();
+ void prepare();
+ QList<Task*> onSubTaskFinished(Task *subTask);
private:
CalcCoverageInfoTaskSettings settings;
+ CalculateCoveragePerBaseOnRegionTask *calculateTask;
};
}
diff --git a/src/corelibs/U2View/src/ov_assembly/ExportConsensusDialog.cpp b/src/corelibs/U2View/src/ov_assembly/ExportConsensusDialog.cpp
index 49d0f75..4c21fa0 100644
--- a/src/corelibs/U2View/src/ov_assembly/ExportConsensusDialog.cpp
+++ b/src/corelibs/U2View/src/ov_assembly/ExportConsensusDialog.cpp
@@ -42,7 +42,7 @@ ExportConsensusDialog::ExportConsensusDialog(QWidget *p, const ExportConsensusTa
saveController(NULL)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470619");
+ new HelpButton(this, buttonBox, "18220479");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Export"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
//hide for this dialog
diff --git a/src/corelibs/U2View/src/ov_assembly/ExportConsensusVariationsDialog.cpp b/src/corelibs/U2View/src/ov_assembly/ExportConsensusVariationsDialog.cpp
index 45d0173..ad2eef8 100644
--- a/src/corelibs/U2View/src/ov_assembly/ExportConsensusVariationsDialog.cpp
+++ b/src/corelibs/U2View/src/ov_assembly/ExportConsensusVariationsDialog.cpp
@@ -41,7 +41,7 @@ ExportConsensusVariationsDialog::ExportConsensusVariationsDialog(QWidget *p, con
{
setupUi(this);
setWindowTitle(tr("Export Consensus Variations"));
- new HelpButton(this, buttonBox, "17470621");
+ new HelpButton(this, buttonBox, "18220481");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Export"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
//hide for this dialog
diff --git a/src/corelibs/U2View/src/ov_assembly/ExportCoverageDialog.cpp b/src/corelibs/U2View/src/ov_assembly/ExportCoverageDialog.cpp
index d469200..19ff798 100644
--- a/src/corelibs/U2View/src/ov_assembly/ExportCoverageDialog.cpp
+++ b/src/corelibs/U2View/src/ov_assembly/ExportCoverageDialog.cpp
@@ -97,7 +97,7 @@ void ExportCoverageDialog::sl_formatChanged(const QString &format) {
void ExportCoverageDialog::initLayout() {
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Export"));
- new HelpButton(this, buttonBox, "17470622");
+ new HelpButton(this, buttonBox, "18220482");
gbAdditionalOptions->hide();
adjustSize();
}
diff --git a/src/corelibs/U2View/src/ov_assembly/ExportCoverageTask.h b/src/corelibs/U2View/src/ov_assembly/ExportCoverageTask.h
index afc6c74..3abae57 100644
--- a/src/corelibs/U2View/src/ov_assembly/ExportCoverageTask.h
+++ b/src/corelibs/U2View/src/ov_assembly/ExportCoverageTask.h
@@ -22,6 +22,7 @@
#ifndef _U2_EXPORT_COVERAGE_TASK_H_
#define _U2_EXPORT_COVERAGE_TASK_H_
+#include <U2Core/U2AssemblyUtils.h>
#include <U2Core/GUrl.h>
#include <U2Core/IOAdapter.h>
#include <U2Core/Task.h>
diff --git a/src/corelibs/U2View/src/ov_assembly/ExportReadsDialog.cpp b/src/corelibs/U2View/src/ov_assembly/ExportReadsDialog.cpp
index d05294b..b024e98 100644
--- a/src/corelibs/U2View/src/ov_assembly/ExportReadsDialog.cpp
+++ b/src/corelibs/U2View/src/ov_assembly/ExportReadsDialog.cpp
@@ -40,7 +40,7 @@ namespace U2 {
ExportReadsDialog::ExportReadsDialog(QWidget * p, const QList<DocumentFormatId> & formats) : QDialog(p) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470617");
+ new HelpButton(this, buttonBox, "18220477");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Export"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2View/src/ov_assembly/ExtractAssemblyRegionDialog.cpp b/src/corelibs/U2View/src/ov_assembly/ExtractAssemblyRegionDialog.cpp
index 379f959..c3d131b 100644
--- a/src/corelibs/U2View/src/ov_assembly/ExtractAssemblyRegionDialog.cpp
+++ b/src/corelibs/U2View/src/ov_assembly/ExtractAssemblyRegionDialog.cpp
@@ -42,7 +42,7 @@ ExtractAssemblyRegionDialog::ExtractAssemblyRegionDialog(QWidget * p, ExtractAss
, settings(settings) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470623");
+ new HelpButton(this, buttonBox, "18220483");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Export"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2View/src/ov_msa/ColorSchemaDialogController.cpp b/src/corelibs/U2View/src/ov_msa/ColorSchemaDialogController.cpp
index 9c03323..5380ae9 100644
--- a/src/corelibs/U2View/src/ov_msa/ColorSchemaDialogController.cpp
+++ b/src/corelibs/U2View/src/ov_msa/ColorSchemaDialogController.cpp
@@ -48,7 +48,7 @@ ColorSchemaDialogController::ColorSchemaDialogController(QMap<char, QColor>& col
int ColorSchemaDialogController::adjustAlphabetColors(){
setupUi(this);
- new HelpButton(this, buttonBox, "17470553");
+ new HelpButton(this, buttonBox, "18220413");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("OK"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
alphabetColorsView = new QPixmap(alphabetColorsFrame->size());
@@ -178,7 +178,7 @@ void ColorSchemaDialogController::mouseReleaseEvent(QMouseEvent * event){
CreateColorSchemaDialog::CreateColorSchemaDialog(ColorSchemeData* _newSchema, QStringList _usedNames) : usedNames(_usedNames), newSchema(_newSchema) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470553");
+ new HelpButton(this, buttonBox, "18220413");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Create"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2View/src/ov_msa/ColorSchemaSettingsController.cpp b/src/corelibs/U2View/src/ov_msa/ColorSchemaSettingsController.cpp
index 50be7d2..72c554a 100644
--- a/src/corelibs/U2View/src/ov_msa/ColorSchemaSettingsController.cpp
+++ b/src/corelibs/U2View/src/ov_msa/ColorSchemaSettingsController.cpp
@@ -93,7 +93,7 @@ static void setSchemaColors(const ColorSchemeData& customSchema){
}
-const QString ColorSchemaSettingsPageController::helpPageId = QString("17470454");
+const QString ColorSchemaSettingsPageController::helpPageId = QString("18220314");
ColorSchemaSettingsPageController::ColorSchemaSettingsPageController(MsaColorSchemeRegistry* mcsr, QObject* p)
: AppSettingsGUIPageController(tr("Alignment Color Scheme"), ColorSchemaSettingsPageId, p) {
diff --git a/src/corelibs/U2View/src/ov_msa/CreateSubalignmentDialogController.cpp b/src/corelibs/U2View/src/ov_msa/CreateSubalignmentDialogController.cpp
index 756d295..9376ee0 100644
--- a/src/corelibs/U2View/src/ov_msa/CreateSubalignmentDialogController.cpp
+++ b/src/corelibs/U2View/src/ov_msa/CreateSubalignmentDialogController.cpp
@@ -48,7 +48,7 @@ namespace U2{
CreateSubalignmentDialogController::CreateSubalignmentDialogController(MAlignmentObject *_mobj, const QRect& selection, QWidget *p)
: QDialog(p), mobj(_mobj), saveController(NULL){
setupUi(this);
- new HelpButton(this, buttonBox, "17470583");
+ new HelpButton(this, buttonBox, "18220443");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Extract"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2View/src/ov_msa/DeleteGapsDialog.cpp b/src/corelibs/U2View/src/ov_msa/DeleteGapsDialog.cpp
index c8f932e..5400ec8 100644
--- a/src/corelibs/U2View/src/ov_msa/DeleteGapsDialog.cpp
+++ b/src/corelibs/U2View/src/ov_msa/DeleteGapsDialog.cpp
@@ -30,7 +30,7 @@ namespace U2 {
DeleteGapsDialog::DeleteGapsDialog(QWidget* parent, int rowNum): QDialog(parent), ui(new Ui_DeleteGapsDialog()) {
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17470566");
+ new HelpButton(this, ui->buttonBox, "18220426");
ui->buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Remove"));
ui->buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2View/src/ov_msa/ExportConsensus/MSAExportConsensusTabFactory.cpp b/src/corelibs/U2View/src/ov_msa/ExportConsensus/MSAExportConsensusTabFactory.cpp
index 456438c..38ffb8b 100644
--- a/src/corelibs/U2View/src/ov_msa/ExportConsensus/MSAExportConsensusTabFactory.cpp
+++ b/src/corelibs/U2View/src/ov_msa/ExportConsensus/MSAExportConsensusTabFactory.cpp
@@ -34,7 +34,7 @@ namespace U2 {
const QString MSAExportConsensusFactoryTab::GROUP_ID = "OP_EXPORT_CONSENSUS";
const QString MSAExportConsensusFactoryTab::GROUP_ICON_STR = ":core/images/consensus.png";
const QString MSAExportConsensusFactoryTab::GROUP_TITLE = QObject::tr("Export Consensus");
-const QString MSAExportConsensusFactoryTab::GROUP_DOC_PAGE = "17470557";
+const QString MSAExportConsensusFactoryTab::GROUP_DOC_PAGE = "18220417";
MSAExportConsensusFactoryTab::MSAExportConsensusFactoryTab() {
objectViewOfWidget = ObjViewType_AlignmentEditor;
diff --git a/src/corelibs/U2View/src/ov_msa/ExportHighlightedDialogController.cpp b/src/corelibs/U2View/src/ov_msa/ExportHighlightedDialogController.cpp
index 743ccdd..c32cad0 100644
--- a/src/corelibs/U2View/src/ov_msa/ExportHighlightedDialogController.cpp
+++ b/src/corelibs/U2View/src/ov_msa/ExportHighlightedDialogController.cpp
@@ -44,7 +44,7 @@ ExportHighligtingDialogController::ExportHighligtingDialogController(MSAEditorUI
ui(new Ui_ExportHighlightedDialog())
{
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17470558");
+ new HelpButton(this, ui->buttonBox, "18220418");
ui->buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Export"));
ui->buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2View/src/ov_msa/General/MSAGeneralTabFactory.cpp b/src/corelibs/U2View/src/ov_msa/General/MSAGeneralTabFactory.cpp
index b81901c..9d9e5ed 100644
--- a/src/corelibs/U2View/src/ov_msa/General/MSAGeneralTabFactory.cpp
+++ b/src/corelibs/U2View/src/ov_msa/General/MSAGeneralTabFactory.cpp
@@ -34,7 +34,7 @@ namespace U2 {
const QString MSAGeneralTabFactory::GROUP_ID = "OP_MSA_GENERAL";
const QString MSAGeneralTabFactory::GROUP_ICON_STR = ":core/images/settings2.png";
const QString MSAGeneralTabFactory::GROUP_TITLE = QString(QObject::tr("General"));
-const QString MSAGeneralTabFactory::GROUP_DOC_PAGE = "17470556";
+const QString MSAGeneralTabFactory::GROUP_DOC_PAGE = "18220416";
MSAGeneralTabFactory::MSAGeneralTabFactory() {
objectViewOfWidget = ObjViewType_AlignmentEditor;
diff --git a/src/corelibs/U2View/src/ov_msa/Highlighting/MSAHighlightingTabFactory.cpp b/src/corelibs/U2View/src/ov_msa/Highlighting/MSAHighlightingTabFactory.cpp
index bf53925..587f010 100644
--- a/src/corelibs/U2View/src/ov_msa/Highlighting/MSAHighlightingTabFactory.cpp
+++ b/src/corelibs/U2View/src/ov_msa/Highlighting/MSAHighlightingTabFactory.cpp
@@ -32,7 +32,7 @@ namespace U2 {
const QString MSAHighlightingFactory::GROUP_ID = "OP_MSA_HIGHLIGHTING";
const QString MSAHighlightingFactory::GROUP_ICON_STR = ":core/images/highlight.png";
const QString MSAHighlightingFactory::GROUP_TITLE = QString(QObject::tr("Highlighting"));
-const QString MSAHighlightingFactory::GROUP_DOC_PAGE = "17470558";
+const QString MSAHighlightingFactory::GROUP_DOC_PAGE = "18220418";
MSAHighlightingFactory::MSAHighlightingFactory() {
objectViewOfWidget = ObjViewType_AlignmentEditor;
diff --git a/src/corelibs/U2View/src/ov_msa/MSAEditorSequenceArea.cpp b/src/corelibs/U2View/src/ov_msa/MSAEditorSequenceArea.cpp
index a101bb7..32fadfe 100644
--- a/src/corelibs/U2View/src/ov_msa/MSAEditorSequenceArea.cpp
+++ b/src/corelibs/U2View/src/ov_msa/MSAEditorSequenceArea.cpp
@@ -142,11 +142,11 @@ MSAEditorSequenceArea::MSAEditorSequenceArea(MSAEditorUI* _ui, GScrollBar* hb, G
addAction(insSymAction);
- createSubaligniment = new QAction(tr("Save subalignment"), this);
+ createSubaligniment = new QAction(tr("Save subalignment..."), this);
createSubaligniment->setObjectName("Save subalignment");
connect(createSubaligniment, SIGNAL(triggered()), SLOT(sl_createSubaligniment()));
- saveSequence = new QAction(tr("Save sequence"), this);
+ saveSequence = new QAction(tr("Save sequence..."), this);
saveSequence->setObjectName("Save sequence");
connect(saveSequence, SIGNAL(triggered()), SLOT(sl_saveSequence()));
diff --git a/src/corelibs/U2View/src/ov_msa/MSASelectSubalignmentDialog.cpp b/src/corelibs/U2View/src/ov_msa/MSASelectSubalignmentDialog.cpp
index d3ba77e..64228b5 100644
--- a/src/corelibs/U2View/src/ov_msa/MSASelectSubalignmentDialog.cpp
+++ b/src/corelibs/U2View/src/ov_msa/MSASelectSubalignmentDialog.cpp
@@ -120,7 +120,7 @@ void SelectSubalignmentDialog::init() {
SAFE_POINT(ui->getEditor() != NULL, tr("MSA Editor is NULL"), );
setupUi(this);
- new HelpButton(this, buttonBox, "17470584");
+ new HelpButton(this, buttonBox, "18220444");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Select"));
connect(allButton, SIGNAL(clicked()), SLOT(sl_allButtonClicked()));
diff --git a/src/corelibs/U2View/src/ov_msa/Overview/MSAGraphOverview.cpp b/src/corelibs/U2View/src/ov_msa/Overview/MSAGraphOverview.cpp
index 2d588de..66e9f32 100644
--- a/src/corelibs/U2View/src/ov_msa/Overview/MSAGraphOverview.cpp
+++ b/src/corelibs/U2View/src/ov_msa/Overview/MSAGraphOverview.cpp
@@ -257,10 +257,12 @@ void MSAGraphOverview::sl_calculationMethodChanged(MSAGraphCalculationMethod _me
void MSAGraphOverview::sl_startRendering() {
isRendering = true;
+ emit si_renderingStateChanged(isRendering);
}
void MSAGraphOverview::sl_stopRendering() {
isRendering = false;
+ emit si_renderingStateChanged(isRendering);
}
void MSAGraphOverview::sl_blockRendering() {
diff --git a/src/corelibs/U2View/src/ov_msa/Overview/MSAGraphOverview.h b/src/corelibs/U2View/src/ov_msa/Overview/MSAGraphOverview.h
index d857140..8c83cf3 100644
--- a/src/corelibs/U2View/src/ov_msa/Overview/MSAGraphOverview.h
+++ b/src/corelibs/U2View/src/ov_msa/Overview/MSAGraphOverview.h
@@ -76,7 +76,6 @@ public:
const static int FIXED_HEIGHT = 70;
void cancelRendering();
- bool isReadyToClose() { return !isRendering; }
QColor getCurrentColor() const { return displaySettings->color; }
MSAGraphOverviewDisplaySettings::GraphType getCurrentGraphType() const { return displaySettings->type; }
@@ -84,6 +83,9 @@ public:
{ return displaySettings->orientation; }
MSAGraphCalculationMethod getCurrentCalculationMethod() const { return method; }
+signals:
+ void si_renderingStateChanged(bool isRendering);
+
public slots:
void sl_visibleRangeChanged();
void sl_redraw();
diff --git a/src/corelibs/U2View/src/ov_msa/Overview/MSAOverviewContextMenu.cpp b/src/corelibs/U2View/src/ov_msa/Overview/MSAOverviewContextMenu.cpp
index 3d0e28a..1d05bd2 100644
--- a/src/corelibs/U2View/src/ov_msa/Overview/MSAOverviewContextMenu.cpp
+++ b/src/corelibs/U2View/src/ov_msa/Overview/MSAOverviewContextMenu.cpp
@@ -61,6 +61,7 @@ void MSAOverviewContextMenu::connectSlots() {
connect(showSimpleOverviewAction, SIGNAL(toggled(bool)), simpleOverview, SLOT(setVisible(bool)));
connect(exportAsImage, SIGNAL(triggered()), SLOT(sl_exportAsImageTriggered()));
+ connect(graphOverview, SIGNAL(si_renderingStateChanged(bool)), exportAsImage, SLOT(setDisabled(bool)));
connect(graphTypeActionGroup, SIGNAL(triggered(QAction*)), SLOT(sl_graphTypeActionTriggered(QAction*)));
diff --git a/src/corelibs/U2View/src/ov_msa/Overview/MSAOverviewImageExportTask.cpp b/src/corelibs/U2View/src/ov_msa/Overview/MSAOverviewImageExportTask.cpp
index acd6d3f..7169a43 100644
--- a/src/corelibs/U2View/src/ov_msa/Overview/MSAOverviewImageExportTask.cpp
+++ b/src/corelibs/U2View/src/ov_msa/Overview/MSAOverviewImageExportTask.cpp
@@ -59,13 +59,13 @@ void MSAOverviewImageExportToBitmapTask::run() {
QPainter p(&pixmap);
if (overviewSettings.exportSimpleOverview) {
- p.drawPixmap(simpleOverview->rect(),
- simpleOverview->getView());
+ const QPixmap pixmap = simpleOverview->getView();
+ p.drawPixmap(simpleOverview->rect(), pixmap);
p.translate(0, simpleOverview->height());
}
if (overviewSettings.exportGraphOverview) {
- p.drawPixmap(graphOverview->rect(),
- graphOverview->getView());
+ const QPixmap pixmap = graphOverview->getView();
+ p.drawPixmap(graphOverview->rect(), pixmap);
}
p.end();
diff --git a/src/corelibs/U2View/src/ov_msa/PairAlign/PairAlignFactory.cpp b/src/corelibs/U2View/src/ov_msa/PairAlign/PairAlignFactory.cpp
index cdc1644..a5cf3fe 100644
--- a/src/corelibs/U2View/src/ov_msa/PairAlign/PairAlignFactory.cpp
+++ b/src/corelibs/U2View/src/ov_msa/PairAlign/PairAlignFactory.cpp
@@ -30,7 +30,7 @@ namespace U2 {
const QString PairAlignFactory::GROUP_ID = "OP_PAIRALIGN";
const QString PairAlignFactory::GROUP_ICON_STR = ":core/images/pairwise.png";
const QString PairAlignFactory::GROUP_TITLE = QString(QObject::tr("Pairwise Alignment"));
-const QString PairAlignFactory::GROUP_DOC_PAGE = "17470587";
+const QString PairAlignFactory::GROUP_DOC_PAGE = "18220447";
PairAlignFactory::PairAlignFactory() {
diff --git a/src/corelibs/U2View/src/ov_msa/SaveSelectedSequenceFromMSADialogController.cpp b/src/corelibs/U2View/src/ov_msa/SaveSelectedSequenceFromMSADialogController.cpp
index becbf2b..ae32ea5 100644
--- a/src/corelibs/U2View/src/ov_msa/SaveSelectedSequenceFromMSADialogController.cpp
+++ b/src/corelibs/U2View/src/ov_msa/SaveSelectedSequenceFromMSADialogController.cpp
@@ -41,7 +41,7 @@ SaveSelectedSequenceFromMSADialogController::SaveSelectedSequenceFromMSADialogCo
ui(new Ui_SaveSelectedSequenceFromMSADialog())
{
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17470585");
+ new HelpButton(this, ui->buttonBox, "18220445");
ui->buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Export"));
ui->buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2View/src/ov_msa/SeqStatistics/SeqStatisticsWidgetFactory.cpp b/src/corelibs/U2View/src/ov_msa/SeqStatistics/SeqStatisticsWidgetFactory.cpp
index 884dfa2..09f6ea3 100644
--- a/src/corelibs/U2View/src/ov_msa/SeqStatistics/SeqStatisticsWidgetFactory.cpp
+++ b/src/corelibs/U2View/src/ov_msa/SeqStatistics/SeqStatisticsWidgetFactory.cpp
@@ -34,7 +34,7 @@ namespace U2 {
const QString SeqStatisticsWidgetFactory::GROUP_ID = "OP_SEQ_STATISTICS_WIDGET";
const QString SeqStatisticsWidgetFactory::GROUP_ICON_STR = ":core/images/chart_bar.png";
const QString SeqStatisticsWidgetFactory::GROUP_TITLE = QString(QObject::tr("Statistics"));
-const QString SeqStatisticsWidgetFactory::GROUP_DOC_PAGE = "17470596";
+const QString SeqStatisticsWidgetFactory::GROUP_DOC_PAGE = "18220456";
SeqStatisticsWidgetFactory::SeqStatisticsWidgetFactory() {
diff --git a/src/corelibs/U2View/src/ov_msa/TreeOptions/TreeOptionsWidgetFactory.cpp b/src/corelibs/U2View/src/ov_msa/TreeOptions/TreeOptionsWidgetFactory.cpp
index 165e8d7..db21e6f 100644
--- a/src/corelibs/U2View/src/ov_msa/TreeOptions/TreeOptionsWidgetFactory.cpp
+++ b/src/corelibs/U2View/src/ov_msa/TreeOptions/TreeOptionsWidgetFactory.cpp
@@ -34,7 +34,7 @@ namespace U2 {
const QString MSATreeOptionsWidgetFactory::GROUP_ID = "OP_MSA_TREES_WIDGET";
const QString MSATreeOptionsWidgetFactory::GROUP_ICON_STR = ":core/images/tree.png";
const QString MSATreeOptionsWidgetFactory::GROUP_TITLE = QString(QObject::tr("Tree Settings"));
-const QString MSATreeOptionsWidgetFactory::GROUP_DOC_PAGE = "17470634";
+const QString MSATreeOptionsWidgetFactory::GROUP_DOC_PAGE = "18220494";
MSATreeOptionsWidgetFactory::MSATreeOptionsWidgetFactory()
@@ -75,7 +75,7 @@ void MSATreeOptionsWidgetFactory::sl_onWidgetViewSaved(const TreeOpWidgetViewSet
const QString TreeOptionsWidgetFactory::GROUP_ID = "OP_TREES_WIDGET";
const QString TreeOptionsWidgetFactory::GROUP_ICON_STR = ":core/images/tree.png";
const QString TreeOptionsWidgetFactory::GROUP_TITLE = QString(QObject::tr("Tree Settings"));
-const QString TreeOptionsWidgetFactory::GROUP_DOC_PAGE = "17470634";
+const QString TreeOptionsWidgetFactory::GROUP_DOC_PAGE = "18220494";
TreeOptionsWidgetFactory::TreeOptionsWidgetFactory()
: viewSettings(new TreeOpWidgetViewSettings)
@@ -117,7 +117,7 @@ void TreeOptionsWidgetFactory::sl_onWidgetViewSaved(const TreeOpWidgetViewSettin
const QString AddTreeWidgetFactory::GROUP_ID = "OP_MSA_ADD_TREE_WIDGET";
const QString AddTreeWidgetFactory::GROUP_ICON_STR = ":core/images/tree.png";
const QString AddTreeWidgetFactory::GROUP_TITLE = QString(QObject::tr("Tree Settings"));
-const QString AddTreeWidgetFactory::GROUP_DOC_PAGE = "17470634";
+const QString AddTreeWidgetFactory::GROUP_DOC_PAGE = "18220494";
AddTreeWidgetFactory::AddTreeWidgetFactory() {
objectViewOfWidget = ObjViewType_AlignmentEditor;
diff --git a/src/corelibs/U2View/src/ov_sequence/CreateRulerDialogController.cpp b/src/corelibs/U2View/src/ov_sequence/CreateRulerDialogController.cpp
index 0f6c63f..aeea45b 100644
--- a/src/corelibs/U2View/src/ov_sequence/CreateRulerDialogController.cpp
+++ b/src/corelibs/U2View/src/ov_sequence/CreateRulerDialogController.cpp
@@ -50,7 +50,7 @@ CreateRulerDialogController::CreateRulerDialogController(const QSet<QString>& na
: QDialog(p)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470473");
+ new HelpButton(this, buttonBox, "18220333");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Create"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2View/src/ov_sequence/GraphLabelsSelectDialog.cpp b/src/corelibs/U2View/src/ov_sequence/GraphLabelsSelectDialog.cpp
index d23e62f..e8dcb4e 100644
--- a/src/corelibs/U2View/src/ov_sequence/GraphLabelsSelectDialog.cpp
+++ b/src/corelibs/U2View/src/ov_sequence/GraphLabelsSelectDialog.cpp
@@ -71,7 +71,7 @@ GraphLabelsSelectDialog::GraphLabelsSelectDialog(int maxWindowSize, QWidget* par
QPushButton* okButton = buttonBox->button(QDialogButtonBox::Ok);
QPushButton* cancelButton = buttonBox->button(QDialogButtonBox::Cancel);
- new HelpButton(this, buttonBox, "17470531");
+ new HelpButton(this, buttonBox, "18220391");
mainLayout->addLayout(spinLayout);
mainLayout->addWidget(usedIntervalsCheck);
diff --git a/src/corelibs/U2View/src/ov_sequence/GraphSettingsDialog.cpp b/src/corelibs/U2View/src/ov_sequence/GraphSettingsDialog.cpp
index edad110..ae36f53 100644
--- a/src/corelibs/U2View/src/ov_sequence/GraphSettingsDialog.cpp
+++ b/src/corelibs/U2View/src/ov_sequence/GraphSettingsDialog.cpp
@@ -88,7 +88,7 @@ GraphSettingsDialog::GraphSettingsDialog( GSequenceGraphDrawer* d, const U2Regio
QPushButton* okButton = buttonBox->button(QDialogButtonBox::Ok);
QPushButton* cancelButton = buttonBox->button(QDialogButtonBox::Cancel);
- new HelpButton(this, buttonBox, "17470533");
+ new HelpButton(this, buttonBox, "18220393");
connect(cancelButton, SIGNAL(clicked()), SLOT(sl_onCancelClicked()));
connect(okButton, SIGNAL(clicked()), SLOT(sl_onOkClicked()));
diff --git a/src/corelibs/U2View/src/ov_sequence/PanViewRows.cpp b/src/corelibs/U2View/src/ov_sequence/PanViewRows.cpp
index 29655e6..d5a3fc7 100644
--- a/src/corelibs/U2View/src/ov_sequence/PanViewRows.cpp
+++ b/src/corelibs/U2View/src/ov_sequence/PanViewRows.cpp
@@ -89,25 +89,39 @@ void PVRowsManager::addAnnotation(Annotation *a) {
const SharedAnnotationData &data = a->getData();
const QVector<U2Region> location = data->getRegions();
- if (rowByName.contains(data->name)) {
- foreach (PVRowData *row, rowByName[data->name]) {
- if (row->fitToRow(location)) {
+ QString name;
+ bool isRestrictionSite = data->type == U2FeatureTypes::RestrictionSite;
+ if (isRestrictionSite) {
+ name = QObject::tr("Restriction Site");
+ } else {
+ name = data->name;
+ }
+
+ if (rowByName.contains(name)) {
+ foreach (PVRowData *row, rowByName[name]) {
+ if (row->fitToRow(location) || isRestrictionSite) {
row->annotations.append(a);
rowByAnnotation[a] = row;
+ if (name != data->name) {
+ rowByName[data->name].append(row);
+ }
return;
}
}
}
- PVRowData *row = new PVRowData(data->name);
+ PVRowData *row = new PVRowData(name);
+
row->ranges << location;
row->annotations.append(a);
-
rowByAnnotation[a] = row;
QList<PVRowData *>::iterator i = std::upper_bound(rows.begin(), rows.end(), row, compare_rows);
rows.insert(i, row);
- rowByName[data->name].append(row);
+ rowByName[name].append(row);
+ if (name != data->name) {
+ rowByName[data->name].append(row);
+ }
}
namespace {
@@ -128,6 +142,7 @@ void PVRowsManager::removeAnnotation(Annotation *a) {
PVRowData *row = rowByAnnotation.value(a, NULL);
CHECK(NULL != row,); // annotation may present in a DB, but has not been added to the panview yet
rowByAnnotation.remove(a);
+ rowByName.remove(a->getName());
row->annotations.removeOne(a);
substractRegions(row->ranges, a->getRegions());
if (row->annotations.isEmpty()) {
diff --git a/src/corelibs/U2View/src/ov_sequence/SaveGraphCutoffsDialogController.cpp b/src/corelibs/U2View/src/ov_sequence/SaveGraphCutoffsDialogController.cpp
index e667fed..bab79cc 100644
--- a/src/corelibs/U2View/src/ov_sequence/SaveGraphCutoffsDialogController.cpp
+++ b/src/corelibs/U2View/src/ov_sequence/SaveGraphCutoffsDialogController.cpp
@@ -40,7 +40,7 @@ SaveGraphCutoffsDialogController::SaveGraphCutoffsDialogController(GSequenceGrap
:QDialog(parent), ctx(_ctx), d(_d), gd(_gd)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470534");
+ new HelpButton(this, buttonBox, "18220394");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Save"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2View/src/ov_sequence/SearchQualifierDialog.cpp b/src/corelibs/U2View/src/ov_sequence/SearchQualifierDialog.cpp
index f489b6c..5a61d70 100644
--- a/src/corelibs/U2View/src/ov_sequence/SearchQualifierDialog.cpp
+++ b/src/corelibs/U2View/src/ov_sequence/SearchQualifierDialog.cpp
@@ -44,9 +44,10 @@ SearchQualifierDialog::SearchQualifierDialog(QWidget* p, AnnotationsTreeView *tr
indexOfPrevResult(-1)
{
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17470507");
+ new HelpButton(this, ui->buttonBox, "18220367");
ui->buttonBox->button(QDialogButtonBox::Yes)->setText(tr("Select all"));
ui->buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Next"));
+ ui->buttonBox->button(QDialogButtonBox::Close)->setText(tr("Close"));
ui->valueEdit->installEventFilter(this);
diff --git a/src/corelibs/U2View/src/ov_sequence/annot_highlight/AnnotHighlightWidgetFactory.cpp b/src/corelibs/U2View/src/ov_sequence/annot_highlight/AnnotHighlightWidgetFactory.cpp
index 2e31ab0..e5a6c46 100644
--- a/src/corelibs/U2View/src/ov_sequence/annot_highlight/AnnotHighlightWidgetFactory.cpp
+++ b/src/corelibs/U2View/src/ov_sequence/annot_highlight/AnnotHighlightWidgetFactory.cpp
@@ -34,7 +34,7 @@ namespace U2 {
const QString AnnotHighlightWidgetFactory::GROUP_ID = "OP_ANNOT_HIGHLIGHT";
const QString AnnotHighlightWidgetFactory::GROUP_ICON_STR = ":core/images/annotation_settings.png";
const QString AnnotHighlightWidgetFactory::GROUP_TITLE = QObject::tr("Annotations Highlighting");
-const QString AnnotHighlightWidgetFactory::GROUP_DOC_PAGE = "17470496";
+const QString AnnotHighlightWidgetFactory::GROUP_DOC_PAGE = "18220356";
AnnotHighlightWidgetFactory::AnnotHighlightWidgetFactory() {
objectViewOfWidget = ObjViewType_SequenceView;
diff --git a/src/corelibs/U2View/src/ov_sequence/find_pattern/FindPatternWidget.cpp b/src/corelibs/U2View/src/ov_sequence/find_pattern/FindPatternWidget.cpp
index 4686042..cadeac7 100644
--- a/src/corelibs/U2View/src/ov_sequence/find_pattern/FindPatternWidget.cpp
+++ b/src/corelibs/U2View/src/ov_sequence/find_pattern/FindPatternWidget.cpp
@@ -298,9 +298,7 @@ void FindPatternWidget::showCurrentResultAndStopProgress(const int current, cons
}
void FindPatternWidget::initLayout() {
- lblErrorMessage->setStyleSheet(
- "color: " + L10N::errorColorLabelStr() + ";"
- "font: bold;");
+ lblErrorMessage->setStyleSheet("font: bold;");
lblErrorMessage->setText("");
initAlgorithmLayout();
initStrandSelection();
@@ -692,10 +690,7 @@ void FindPatternWidget::showHideMessage( bool show, MessageFlag messageFlag, con
}
if (!messageFlags.isEmpty()) {
- static QString storedTextColor = currentColorOfMessageText();
- if (storedTextColor != currentColorOfMessageText()) {
- changeColorOfMessageText(storedTextColor);
- }
+
#ifndef Q_OS_MAC
const QString lineBreakShortcut = "Ctrl+Enter";
@@ -706,89 +701,96 @@ void FindPatternWidget::showHideMessage( bool show, MessageFlag messageFlag, con
foreach (MessageFlag flag, messageFlags) {
switch (flag) {
case PatternIsTooLong:
- if (!text.isEmpty()) {
- text += "\n";
- }
- text += QString(tr("The value is longer than the search region."
- " Please input a shorter value or select another region!"));
+ {
+ const QString message = tr("The value is longer than the search region."
+ " Please input a shorter value or select another region!");
+ text = tr("<b><font color=%1>%2</font><br></br></b>").arg(L10N::errorColorLabelHtmlStr()).arg(message);
break;
- case PatternAlphabetDoNotMatch:
- if (!text.isEmpty()) {
- text += "\n";
}
- text += QString(tr("Warning: input value contains characters that"
- " do not match the active alphabet!"));
+ case PatternAlphabetDoNotMatch:
+ {
+ const QString message = tr("Warning: input value contains characters that"
+ " do not match the active alphabet!");
+ text += tr("<b><font color=%1>%2</font><br></br></b>").arg(L10N::warningColorLabelHtmlStr()).arg(message);
highlightBackground(textPattern);
break;
- case PatternsWithBadAlphabetInFile:
- if (!text.isEmpty()) {
- text += "\n";
}
- text += QString(tr("Warning: file contains patterns that"
- " do not match the active alphabet! Those patterns were ignored "));
+ case PatternsWithBadAlphabetInFile:
+ {
+ const QString message = tr("Warning: file contains patterns that"
+ " do not match the active alphabet! Those patterns were ignored ");
+ text += tr("<b><font color=%1>%2</font><br></br></b>").arg(L10N::warningColorLabelHtmlStr()).arg(message);
break;
- case PatternsWithBadRegionInFile:
- if (!text.isEmpty()) {
- text += "\n";
}
- text += QString(tr("Warning: file contains patterns that"
- " longer than the search region! Those patterns were ignored. Please input a shorter value or select another region! "));
+ case PatternsWithBadRegionInFile:
+ {
+ const QString message = tr("Warning: file contains patterns that"
+ " longer than the search region! Those patterns were ignored. Please input a shorter value or select another region! ");
+ text += tr("<b><font color=%1>%2</font><br></br></b>").arg(L10N::warningColorLabelHtmlStr()).arg(message);
break;
- case UseMultiplePatternsTip:
- if (!text.isEmpty()) {
- text += "\n";
}
- text += QString(tr("Info: please input at least one sequence pattern to search for. Use %1 to input multiple patterns. Alternatively, load patterns from a FASTA file.").arg(lineBreakShortcut));
- changeColorOfMessageText(L10N::infoHintColor().name());
+ case UseMultiplePatternsTip:
+ {
+ const QString message = tr("Info: please input at least one sequence pattern to search for. Use %1 to input multiple patterns. Alternatively, load patterns from a FASTA file.").arg(lineBreakShortcut);
+ text = tr("<b><font color=%1>%2</font><br></br></b>").arg(L10N::infoColorLabelHtmlStr()).arg(message);
break;
- case AnnotationNotValidName:
- if (!text.isEmpty()) {
- text += "\n";
}
- text += QString(tr("Warning: annotation name or annotation group name are invalid. "));
+ case AnnotationNotValidName:
+ {
+ const QString message = tr("Warning: annotation name or annotation group name are invalid. ");
+ text += tr("<b><font color=%1>%2</font><br></br></b>").arg(L10N::errorColorLabelHtmlStr()).arg(message);
if (!additionalMsg.isEmpty()){
- text += QString(tr("Reason: "));
- text += additionalMsg;
+ const QString message = tr("Reason: ");
+ text += tr("<b><font color=%1>%2</font></b>").arg(L10N::errorColorLabelHtmlStr()).arg(message);
+ text += tr("<b><font color=%1>%2</font><br></br></b>").arg(L10N::errorColorLabelHtmlStr()).arg(additionalMsg);
}
- text += QString(tr(" Please input valid annotation names "));
+ const QString msg = tr(" Please input valid annotation names. ");
+ text += tr("<b><font color=%1>%2</font><br></br></b>").arg(L10N::errorColorLabelHtmlStr()).arg(msg);
break;
- case AnnotationNotValidFastaParsedName:
- if (!text.isEmpty()) {
- text += "\n";
}
- text += QString(tr("Warning: annotation names are invalid. "));
+ case AnnotationNotValidFastaParsedName:
+ {
+ const QString message = tr("Warning: annotation names are invalid. ");
+ text += tr("<b><font color=%1>%2</font><br></br></b>").arg(L10N::errorColorLabelHtmlStr()).arg(message);
if (!additionalMsg.isEmpty()){
- text += QString(tr("Reason: "));
- text += additionalMsg;
+ const QString message = tr("Reason: ");
+ text += tr("<b><font color=%1>%2</font></b>").arg(L10N::errorColorLabelHtmlStr()).arg(message);
+ text += tr("<b><font color=%1>%2</font><br></br></b>").arg(L10N::errorColorLabelHtmlStr()).arg(additionalMsg);
}
- text += QString(tr(" It will be automatically changed to acceptable name if 'Get annotations' button is pressed. "));
+ const QString msg = tr(" It will be automatically changed to acceptable name if 'Get annotations' button is pressed. ");
+ text += tr("<b><font color=%1>%2</font><br></br></b>").arg(L10N::errorColorLabelHtmlStr()).arg(msg);
break;
- case NoPatternToSearch:
- if (!text.isEmpty()) {
- text += "\n";
}
- text += QString(tr("Warning: there is no pattern to search. "));
- text += QString(tr(" Please input a valid pattern or choose a file with patterns "));
+ case NoPatternToSearch:
+ {
+ const QString message = tr("Warning: there is no pattern to search. ");
+ text += tr("<b><font color=%1>%2</font></b>").arg(L10N::errorColorLabelHtmlStr()).arg(message);
+ const QString msg = tr(" Please input a valid pattern or choose a file with patterns ");
+ text += tr("<b><font color=%1>%2</font><br></br></b>").arg(L10N::errorColorLabelHtmlStr()).arg(msg);
break;
- case SearchRegionIncorrect:
- if (!text.isEmpty()) {
- text += "\n";
}
- text += QString(tr("Warning: there is no pattern to search. "));
- text += QString(tr(" Please input a valid pattern or choose a file with patterns "));
+ case SearchRegionIncorrect:
+ {
+ const QString message = tr("Warning: there is no pattern to search. ");
+ text += tr("<b><font color=%1>%2</font></b>").arg(L10N::errorColorLabelHtmlStr()).arg(message);
+ const QString msg = tr(" Please input a valid pattern or choose a file with patterns ");
+ text += tr("<b><font color=%1>%2</font><br></br></b>").arg(L10N::errorColorLabelHtmlStr()).arg(msg);
break;
- case PatternWrongRegExp:
- if (!text.isEmpty()) {
- text += "\n";
}
- text += QString(tr("Warning: invalid regexp. "));
+ case PatternWrongRegExp:
+ {
+ const QString message = tr("Warning: invalid regexp. ");
+ text += tr("<b><font color=%1>%2</font><br></br></b>").arg(L10N::errorColorLabelHtmlStr()).arg(message);
highlightBackground(textPattern);
break;
+ }
case SequenceIsTooBig:
+ {
text.clear(); // the search is blocked at all -- any other messages are meaningless
- text += QString(tr("Warning: current sequence is too long to search in."));
- changeColorOfMessageText(L10N::errorColorLabelStr());
+ const QString message = tr("Warning: current sequence is too long to search in.");
+ text += tr("<b><font color=%1>%2</font><br></br></b>").arg(L10N::errorColorLabelHtmlStr()).arg(message);
break;
+ }
default:
FAIL("Unexpected value of the error flag in show/hide error message for pattern!",);
}
@@ -801,27 +803,6 @@ void FindPatternWidget::showHideMessage( bool show, MessageFlag messageFlag, con
}
}
-void FindPatternWidget::changeColorOfMessageText(const QString &newColorName)
-{
- QString currentStyleSheet = lblErrorMessage->styleSheet();
- currentStyleSheet.replace(currentColorOfMessageText(), newColorName);
- lblErrorMessage->setStyleSheet(currentStyleSheet);
-}
-
-QString FindPatternWidget::currentColorOfMessageText() const
-{
- const QString currentStyleSheet = lblErrorMessage->styleSheet();
- const int startOfColorDefinitionPosition = currentStyleSheet.indexOf(STYLESHEET_COLOR_DEFINITION);
- const int endOfColorDefinitionPosition = currentStyleSheet.indexOf(STYLESHEET_DEFINITIONS_SEPARATOR,
- startOfColorDefinitionPosition);
- const QString currentMessageTextColor = currentStyleSheet.mid(startOfColorDefinitionPosition
- + STYLESHEET_COLOR_DEFINITION.length(),
- endOfColorDefinitionPosition
- - startOfColorDefinitionPosition
- - STYLESHEET_COLOR_DEFINITION.length());
- return currentMessageTextColor;
-}
-
void FindPatternWidget::sl_onSearchPatternChanged()
{
static QString patterns = "";
diff --git a/src/corelibs/U2View/src/ov_sequence/find_pattern/FindPatternWidgetFactory.cpp b/src/corelibs/U2View/src/ov_sequence/find_pattern/FindPatternWidgetFactory.cpp
index b89f692..b6e03de 100644
--- a/src/corelibs/U2View/src/ov_sequence/find_pattern/FindPatternWidgetFactory.cpp
+++ b/src/corelibs/U2View/src/ov_sequence/find_pattern/FindPatternWidgetFactory.cpp
@@ -34,7 +34,7 @@ namespace U2 {
const QString FindPatternWidgetFactory::GROUP_ID = "OP_FIND_PATTERN";
const QString FindPatternWidgetFactory::GROUP_ICON_STR = ":core/images/find_dialog.png";
const QString FindPatternWidgetFactory::GROUP_TITLE = QString(QObject::tr("Search in Sequence"));
-const QString FindPatternWidgetFactory::GROUP_DOC_PAGE = "17470478";
+const QString FindPatternWidgetFactory::GROUP_DOC_PAGE = "18220338";
FindPatternWidgetFactory::FindPatternWidgetFactory() {
objectViewOfWidget = ObjViewType_SequenceView;
diff --git a/src/corelibs/U2View/src/ov_sequence/sequence_info/SequenceInfoFactory.cpp b/src/corelibs/U2View/src/ov_sequence/sequence_info/SequenceInfoFactory.cpp
index 8579900..b9beb68 100644
--- a/src/corelibs/U2View/src/ov_sequence/sequence_info/SequenceInfoFactory.cpp
+++ b/src/corelibs/U2View/src/ov_sequence/sequence_info/SequenceInfoFactory.cpp
@@ -34,7 +34,7 @@ namespace U2 {
const QString SequenceInfoFactory::GROUP_ID = "OP_SEQ_INFO";
const QString SequenceInfoFactory::GROUP_ICON_STR = ":core/images/chart_bar.png";
const QString SequenceInfoFactory::GROUP_TITLE = QObject::tr("Statistics");
-const QString SequenceInfoFactory::GROUP_DOC_PAGE = "17470467";
+const QString SequenceInfoFactory::GROUP_DOC_PAGE = "18220327";
SequenceInfoFactory::SequenceInfoFactory() {
objectViewOfWidget = ObjViewType_SequenceView;
diff --git a/src/corelibs/U2View/src/phyltree/BranchSettingsDialog.cpp b/src/corelibs/U2View/src/phyltree/BranchSettingsDialog.cpp
index 5970d3c..fcc1a17 100644
--- a/src/corelibs/U2View/src/phyltree/BranchSettingsDialog.cpp
+++ b/src/corelibs/U2View/src/phyltree/BranchSettingsDialog.cpp
@@ -39,7 +39,9 @@ BranchSettingsDialog::BranchSettingsDialog(QWidget *parent, const OptionsMap& se
changedSettings[BRANCH_COLOR] = settings[BRANCH_COLOR];
changedSettings[BRANCH_THICKNESS] = settings[BRANCH_THICKNESS];
setupUi(this);
- new HelpButton(this, buttonBox, "17470635");
+ new HelpButton(this, buttonBox, "18220495");
+ buttonBox->button(QDialogButtonBox::Ok)->setText(tr("OK"));
+ buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
thicknessSpinBox->setValue(changedSettings[BRANCH_THICKNESS].toInt());
diff --git a/src/corelibs/U2View/src/phyltree/CreatePhyTreeDialogController.cpp b/src/corelibs/U2View/src/phyltree/CreatePhyTreeDialogController.cpp
index fd9d3c8..326635e 100644
--- a/src/corelibs/U2View/src/phyltree/CreatePhyTreeDialogController.cpp
+++ b/src/corelibs/U2View/src/phyltree/CreatePhyTreeDialogController.cpp
@@ -62,9 +62,9 @@ CreatePhyTreeDialogController::CreatePhyTreeDialogController(QWidget* parent, co
ui->setupUi(this);
QMap<QString, QString> helpPagesMap;
- helpPagesMap.insert("PHYLIP Neighbor Joining","17470593");
- helpPagesMap.insert("MrBayes","17470594");
- helpPagesMap.insert("PhyML Maximum Likelihood","17470592");
+ helpPagesMap.insert("PHYLIP Neighbor Joining","18220453");
+ helpPagesMap.insert("MrBayes","18220454");
+ helpPagesMap.insert("PhyML Maximum Likelihood","18220452");
new ComboboxDependentHelpButton(this, ui->buttonBox, ui->algorithmBox, helpPagesMap);
ui->buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Build"));
diff --git a/src/corelibs/U2View/src/phyltree/TextSettingsDialog.cpp b/src/corelibs/U2View/src/phyltree/TextSettingsDialog.cpp
index edc3238..26dc95d 100644
--- a/src/corelibs/U2View/src/phyltree/TextSettingsDialog.cpp
+++ b/src/corelibs/U2View/src/phyltree/TextSettingsDialog.cpp
@@ -37,7 +37,7 @@ TextSettingsDialog::TextSettingsDialog(QWidget *parent, const OptionsMap& settin
: BaseSettingsDialog(parent) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470640");
+ new HelpButton(this, buttonBox, "18220500");
curColor = qvariant_cast<QColor>(settings[LABEL_COLOR]);
diff --git a/src/corelibs/U2View/src/phyltree/TreeSettingsDialog.cpp b/src/corelibs/U2View/src/phyltree/TreeSettingsDialog.cpp
index 7314d5f..a4f7f0a 100644
--- a/src/corelibs/U2View/src/phyltree/TreeSettingsDialog.cpp
+++ b/src/corelibs/U2View/src/phyltree/TreeSettingsDialog.cpp
@@ -30,7 +30,7 @@ namespace U2 {
TreeSettingsDialog::TreeSettingsDialog(QWidget *parent, const OptionsMap &settings, bool isRectLayout)
: BaseSettingsDialog(parent) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470634");
+ new HelpButton(this, buttonBox, "18220494");
heightSlider->setValue(settings[HEIGHT_COEF].toUInt());
widthlSlider->setValue(settings[WIDTH_COEF].toUInt());
diff --git a/src/corelibs/U2View/src/util_dna_assembly/BuildIndexDialog.cpp b/src/corelibs/U2View/src/util_dna_assembly/BuildIndexDialog.cpp
index 0b9b434..d6172ae 100644
--- a/src/corelibs/U2View/src/util_dna_assembly/BuildIndexDialog.cpp
+++ b/src/corelibs/U2View/src/util_dna_assembly/BuildIndexDialog.cpp
@@ -48,12 +48,12 @@ BuildIndexDialog::BuildIndexDialog(const DnaAssemblyAlgRegistry* registry, QWidg
{
setupUi(this);
QMap<QString,QString> helpPagesMap;
- helpPagesMap.insert("BWA","17470711");
- helpPagesMap.insert("BWA-MEM","17470745");
- helpPagesMap.insert("BWA-SW","17470739");
- helpPagesMap.insert("Bowtie","17470708");
- helpPagesMap.insert("Bowtie2","17470736");
- helpPagesMap.insert("UGENE Genome Aligner","17470714");
+ helpPagesMap.insert("BWA","18220571");
+ helpPagesMap.insert("BWA-MEM","18220605");
+ helpPagesMap.insert("BWA-SW","18220599");
+ helpPagesMap.insert("Bowtie","18220568");
+ helpPagesMap.insert("Bowtie2","18220596");
+ helpPagesMap.insert("UGENE Genome Aligner","18220574");
new ComboboxDependentHelpButton(this, buttonBox, methodNamesBox, helpPagesMap);
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Start"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2View/src/util_dna_assembly/ConvertAssemblyToSamDialog.cpp b/src/corelibs/U2View/src/util_dna_assembly/ConvertAssemblyToSamDialog.cpp
index fbe0366..cacde30 100644
--- a/src/corelibs/U2View/src/util_dna_assembly/ConvertAssemblyToSamDialog.cpp
+++ b/src/corelibs/U2View/src/util_dna_assembly/ConvertAssemblyToSamDialog.cpp
@@ -44,7 +44,7 @@ ConvertAssemblyToSamDialog::ConvertAssemblyToSamDialog(QWidget* parent, QString
saveController(NULL)
{
ui->setupUi(this);
- new HelpButton(this, ui->buttonBox, "17470715");
+ new HelpButton(this, ui->buttonBox, "18220575");
ui->buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Convert"));
ui->buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2View/src/util_dna_assembly/DnaAssemblyDialog.cpp b/src/corelibs/U2View/src/util_dna_assembly/DnaAssemblyDialog.cpp
index 0bfc22a..9b0d963 100644
--- a/src/corelibs/U2View/src/util_dna_assembly/DnaAssemblyDialog.cpp
+++ b/src/corelibs/U2View/src/util_dna_assembly/DnaAssemblyDialog.cpp
@@ -67,12 +67,12 @@ DnaAssemblyDialog::DnaAssemblyDialog(QWidget* p, const QStringList& shortReadsUr
{
setupUi(this);
QMap<QString,QString> helpPagesMap;
- helpPagesMap.insert("BWA","17470710");
- helpPagesMap.insert("BWA-MEM","17470744");
- helpPagesMap.insert("BWA-SW","17470738");
- helpPagesMap.insert("Bowtie","17470707");
- helpPagesMap.insert("Bowtie2","17470735");
- helpPagesMap.insert("UGENE Genome Aligner","17470713");
+ helpPagesMap.insert("BWA","18220570");
+ helpPagesMap.insert("BWA-MEM","18220604");
+ helpPagesMap.insert("BWA-SW","18220598");
+ helpPagesMap.insert("Bowtie","18220567");
+ helpPagesMap.insert("Bowtie2","18220595");
+ helpPagesMap.insert("UGENE Genome Aligner","18220573");
new ComboboxDependentHelpButton(this, buttonBox, methodNamesBox, helpPagesMap);
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Start"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2View/src/util_dna_assembly/DnaAssemblyUtils.cpp b/src/corelibs/U2View/src/util_dna_assembly/DnaAssemblyUtils.cpp
index b22f4e3..7d6d436 100644
--- a/src/corelibs/U2View/src/util_dna_assembly/DnaAssemblyUtils.cpp
+++ b/src/corelibs/U2View/src/util_dna_assembly/DnaAssemblyUtils.cpp
@@ -263,12 +263,12 @@ QString DnaAssemblySupport::unknownText(const QList<GUrl> &unknownFormatFiles) {
/* DnaAssemblyTaskWithConversions */
/************************************************************************/
DnaAssemblyTaskWithConversions::DnaAssemblyTaskWithConversions(const DnaAssemblyToRefTaskSettings &settings, bool viewResult, bool justBuildIndex)
-: Task("Dna assembly task", TaskFlags_NR_FOSCOE), settings(settings), viewResult(viewResult),
- justBuildIndex(justBuildIndex), conversionTasksCount(0), assemblyTask(NULL)
-{
+ : ExternalToolSupportTask("Dna assembly task", TaskFlags_NR_FOSCOE), settings(settings), viewResult(viewResult),
+ justBuildIndex(justBuildIndex), conversionTasksCount(0), assemblyTask(NULL) {}
+const DnaAssemblyToRefTaskSettings& DnaAssemblyTaskWithConversions::getSettings() const {
+ return settings;
}
-
void DnaAssemblyTaskWithConversions::prepare() {
DnaAssemblyAlgorithmEnv *env= AppContext::getDnaAssemblyAlgRegistry()->getAlgorithm(settings.algName);
if (env == NULL) {
@@ -288,6 +288,7 @@ void DnaAssemblyTaskWithConversions::prepare() {
if (0 == conversionTasksCount) {
assemblyTask = new DnaAssemblyMultiTask(settings, viewResult, justBuildIndex);
+ assemblyTask->addListeners(getListeners());
addSubTask(assemblyTask);
}
}
diff --git a/src/corelibs/U2View/src/util_dna_assembly/DnaAssemblyUtils.h b/src/corelibs/U2View/src/util_dna_assembly/DnaAssemblyUtils.h
index 8b825f0..fb5ca13 100644
--- a/src/corelibs/U2View/src/util_dna_assembly/DnaAssemblyUtils.h
+++ b/src/corelibs/U2View/src/util_dna_assembly/DnaAssemblyUtils.h
@@ -23,6 +23,7 @@
#define _U2_DNA_ASSEMBLEY_UTILS_H_
#include <U2Algorithm/DnaAssemblyMultiTask.h>
+#include <U2Core/ExternalToolRunTask.h>
namespace U2 {
@@ -45,12 +46,14 @@ private slots:
};
-class U2VIEW_EXPORT DnaAssemblyTaskWithConversions : public Task {
+class U2VIEW_EXPORT DnaAssemblyTaskWithConversions : public ExternalToolSupportTask {
+ Q_OBJECT
public:
DnaAssemblyTaskWithConversions(const DnaAssemblyToRefTaskSettings &settings, bool viewResult = false, bool justBuildIndex = false);
void prepare();
QList<Task*> onSubTaskFinished(Task *subTask);
+ const DnaAssemblyToRefTaskSettings& getSettings() const;
private:
DnaAssemblyToRefTaskSettings settings;
diff --git a/src/corelibs/U2View/src/util_dna_assembly/GenomeAssemblyDialog.cpp b/src/corelibs/U2View/src/util_dna_assembly/GenomeAssemblyDialog.cpp
index b2ab9e9..c901602 100644
--- a/src/corelibs/U2View/src/util_dna_assembly/GenomeAssemblyDialog.cpp
+++ b/src/corelibs/U2View/src/util_dna_assembly/GenomeAssemblyDialog.cpp
@@ -66,7 +66,7 @@ GenomeAssemblyDialog::GenomeAssemblyDialog(QWidget* p)
setupUi(this);
QMap<QString,QString> helpPagesMap;
- //helpPagesMap.insert("SPAdes","");
+ helpPagesMap.insert("SPAdes","18220606");
new ComboboxDependentHelpButton(this, buttonBox, methodNamesBox, helpPagesMap);
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Start"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2View/src/util_sec_struct_predict/SecStructDialog.cpp b/src/corelibs/U2View/src/util_sec_struct_predict/SecStructDialog.cpp
index e3ecc06..bc8e5eb 100644
--- a/src/corelibs/U2View/src/util_sec_struct_predict/SecStructDialog.cpp
+++ b/src/corelibs/U2View/src/util_sec_struct_predict/SecStructDialog.cpp
@@ -53,14 +53,18 @@ namespace U2 {
SecStructDialog::SecStructDialog( ADVSequenceObjectContext* _ctx, QWidget *p ) : QDialog(p), rangeStart(0), rangeEnd(0), ctx(_ctx), task(NULL)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470686");
+ new HelpButton(this, buttonBox, "18220546");
sspr = AppContext::getSecStructPredictAlgRegistry();
algorithmComboBox->addItems(sspr->getAlgNameList());
+ startButton = buttonBox->button(QDialogButtonBox::Ok);
saveAnnotationButton = buttonBox->button(QDialogButtonBox::Save);
cancelButton = buttonBox->button(QDialogButtonBox::Cancel);
- startButton = buttonBox->button(QDialogButtonBox::Ok);
+
+ buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Predict"));
+ buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
+ buttonBox->button(QDialogButtonBox::Save)->setText(tr("Save"));
saveAnnotationButton->setDisabled(true);
diff --git a/src/corelibs/U2View/src/util_smith_waterman/SmithWatermanDialog.cpp b/src/corelibs/U2View/src/util_smith_waterman/SmithWatermanDialog.cpp
index 0025df1..420880b 100644
--- a/src/corelibs/U2View/src/util_smith_waterman/SmithWatermanDialog.cpp
+++ b/src/corelibs/U2View/src/util_smith_waterman/SmithWatermanDialog.cpp
@@ -72,7 +72,7 @@ SmithWatermanDialog::SmithWatermanDialog(QWidget* w, ADVSequenceObjectContext* c
dialogConfig = _dialogConfig;
setupUi(this);
- new HelpButton(this, buttonBox, "17470693");
+ new HelpButton(this, buttonBox, "18220553");
buttonBox->button(QDialogButtonBox::Yes)->setText(tr("Remote run"));
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Search"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/corelibs/U2View/transl/english.ts b/src/corelibs/U2View/transl/english.ts
index cc2fc8f..37a2dec 100644
--- a/src/corelibs/U2View/transl/english.ts
+++ b/src/corelibs/U2View/transl/english.ts
@@ -3476,7 +3476,17 @@ You don't have enough rights to change file</translation>
<context>
<name>U2::BranchSettingsDialog</name>
<message>
- <location filename="../src/phyltree/BranchSettingsDialog.cpp" line="75"/>
+ <location filename="../src/phyltree/BranchSettingsDialog.cpp" line="44"/>
+ <source>Cancel</source>
+ <translation type="unfinished">Cancel</translation>
+ </message>
+ <message>
+ <location filename="../src/phyltree/BranchSettingsDialog.cpp" line="43"/>
+ <source>OK</source>
+ <translation type="unfinished">OK</translation>
+ </message>
+ <message>
+ <location filename="../src/phyltree/BranchSettingsDialog.cpp" line="77"/>
<source>Select Color</source>
<translation>Select Color</translation>
</message>
@@ -4771,8 +4781,8 @@ Please, check external tools in the settings.</translation>
</message>
<message>
<location filename="../src/ov_sequence/find_pattern/FindPatternWidget.cpp" line="753"/>
- <source> Please input valid annotation names </source>
- <translation> Please input valid annotation names </translation>
+ <source> Please input valid annotation names. </source>
+ <translation> Please input valid annotation names. </translation>
</message>
<message>
<location filename="../src/ov_sequence/find_pattern/FindPatternWidget.cpp" line="759"/>
@@ -5360,13 +5370,13 @@ Please, check external tools in the settings.</translation>
</message>
<message>
<location filename="../src/ov_msa/MSAEditorSequenceArea.cpp" line="145"/>
- <source>Save subalignment</source>
- <translation>Save subalignment</translation>
+ <source>Save subalignment...</source>
+ <translation>Save subalignment...</translation>
</message>
<message>
<location filename="../src/ov_msa/MSAEditorSequenceArea.cpp" line="149"/>
- <source>Save sequence</source>
- <translation>Save sequence</translation>
+ <source>Save sequence...</source>
+ <translation>Save sequence...</translation>
</message>
<message>
<location filename="../src/ov_msa/MSAEditorSequenceArea.cpp" line="153"/>
@@ -6626,34 +6636,39 @@ Simple overview is unavailable.</translation>
<translation>Next</translation>
</message>
<message>
- <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="128"/>
- <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="137"/>
+ <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="50"/>
+ <source>Close</source>
+ <translation>Close</translation>
+ </message>
+ <message>
+ <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="129"/>
+ <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="138"/>
<source>Search Complete</source>
<translation>Search Complete</translation>
</message>
<message>
- <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="129"/>
+ <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="130"/>
<source>The end of the annotation tree has been reached. Would you like to start the search from the beginning?</source>
<translation>The end of the annotation tree has been reached. Would you like to start the search from the beginning?</translation>
</message>
<message>
- <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="138"/>
+ <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="139"/>
<source>No results found</source>
<translation>No results found</translation>
</message>
<message>
- <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="155"/>
- <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="159"/>
+ <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="156"/>
+ <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="160"/>
<source>Error!</source>
<translation>Error!</translation>
</message>
<message>
- <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="155"/>
+ <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="156"/>
<source>Illegal qualifier name</source>
<translation>Illegal qualifier name</translation>
</message>
<message>
- <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="159"/>
+ <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="160"/>
<source>Illegal qualifier value</source>
<translation>Illegal qualifier value</translation>
</message>
@@ -6661,12 +6676,27 @@ Simple overview is unavailable.</translation>
<context>
<name>U2::SecStructDialog</name>
<message>
- <location filename="../src/util_sec_struct_predict/SecStructDialog.cpp" line="82"/>
+ <location filename="../src/util_sec_struct_predict/SecStructDialog.cpp" line="65"/>
+ <source>OK</source>
+ <translation>OK</translation>
+ </message>
+ <message>
+ <location filename="../src/util_sec_struct_predict/SecStructDialog.cpp" line="66"/>
+ <source>Cancel</source>
+ <translation>Cancel</translation>
+ </message>
+ <message>
+ <location filename="../src/util_sec_struct_predict/SecStructDialog.cpp" line="67"/>
+ <source>Save</source>
+ <translation>Save</translation>
+ </message>
+ <message>
+ <location filename="../src/util_sec_struct_predict/SecStructDialog.cpp" line="86"/>
<source>Region</source>
<translation>Region</translation>
</message>
<message>
- <location filename="../src/util_sec_struct_predict/SecStructDialog.cpp" line="83"/>
+ <location filename="../src/util_sec_struct_predict/SecStructDialog.cpp" line="87"/>
<source>Structure Type</source>
<translation>Structure Type</translation>
</message>
diff --git a/src/corelibs/U2View/transl/russian.ts b/src/corelibs/U2View/transl/russian.ts
index 13d6e9e..fd5c721 100644
--- a/src/corelibs/U2View/transl/russian.ts
+++ b/src/corelibs/U2View/transl/russian.ts
@@ -354,7 +354,7 @@ Probably the data is too big.</source>
<message>
<location filename="../src/ov_msa/CreateSubalignmentDialog.ui" line="14"/>
<source>Extract Selected as MSA</source>
- <translation>Сохранить участок выравнивания</translation>
+ <translation>Экспортировать область выравнивания</translation>
</message>
<message>
<location filename="../src/ov_msa/CreateSubalignmentDialog.ui" line="22"/>
@@ -953,7 +953,7 @@ Probably the data is too big.</source>
<message>
<location filename="../src/LicenseDialog.ui" line="52"/>
<source>Cancel</source>
- <translation>Отменить</translation>
+ <translation>Отмена</translation>
</message>
</context>
<context>
@@ -2617,7 +2617,7 @@ Double-click to collapse the branch</source>
<message>
<location filename="../src/ov_sequence/AnnotatedDNAView.cpp" line="585"/>
<source>Edit</source>
- <translation>Редактирование</translation>
+ <translation>Редактировать</translation>
</message>
<message>
<location filename="../src/ov_sequence/AnnotatedDNAView.cpp" line="1095"/>
@@ -2741,17 +2741,17 @@ Double-click to collapse the branch</source>
<message>
<location filename="../src/ov_sequence/AnnotationsTreeView.cpp" line="1773"/>
<source>Edit Group</source>
- <translation type="unfinished"></translation>
+ <translation>Редактировать группу</translation>
</message>
<message>
<location filename="../src/ov_sequence/AnnotationsTreeView.cpp" line="1820"/>
<source>Edit Annotation</source>
- <translation type="unfinished">Редактирование аннотации</translation>
+ <translation>Редактирование аннотации</translation>
</message>
<message>
<location filename="../src/ov_sequence/AnnotationsTreeView.cpp" line="1820"/>
<source>Edit</source>
- <translation type="unfinished">Редактирование</translation>
+ <translation>Редактировать</translation>
</message>
<message>
<location filename="../src/ov_sequence/AnnotationsTreeView.cpp" line="176"/>
@@ -3474,7 +3474,17 @@ You don't have enough rights to change file</source>
<context>
<name>U2::BranchSettingsDialog</name>
<message>
- <location filename="../src/phyltree/BranchSettingsDialog.cpp" line="75"/>
+ <location filename="../src/phyltree/BranchSettingsDialog.cpp" line="44"/>
+ <source>Cancel</source>
+ <translation type="unfinished">Отмена</translation>
+ </message>
+ <message>
+ <location filename="../src/phyltree/BranchSettingsDialog.cpp" line="43"/>
+ <source>OK</source>
+ <translation type="unfinished">OK</translation>
+ </message>
+ <message>
+ <location filename="../src/phyltree/BranchSettingsDialog.cpp" line="77"/>
<source>Select Color</source>
<translation>Выбрать цвет</translation>
</message>
@@ -4449,7 +4459,7 @@ Please, check external tools in the settings.</source>
<message>
<location filename="../src/ov_msa/ExportHighlightedDialogController.cpp" line="50"/>
<source>Cancel</source>
- <translation type="unfinished"></translation>
+ <translation>Отмена</translation>
</message>
<message>
<location filename="../src/ov_msa/ExportHighlightedDialogController.cpp" line="96"/>
@@ -4474,10 +4484,6 @@ Please, check external tools in the settings.</source>
<translation type="vanished">Экспорт</translation>
</message>
<message>
- <source>Cancel</source>
- <translation type="vanished">Отмена</translation>
- </message>
- <message>
<source>Warning</source>
<translation type="vanished">Внимание</translation>
</message>
@@ -4566,7 +4572,7 @@ Please, check external tools in the settings.</source>
<message>
<location filename="../src/ov_assembly/ExtractAssemblyRegionDialog.cpp" line="47"/>
<source>Cancel</source>
- <translation type="unfinished"></translation>
+ <translation>Отмена</translation>
</message>
<message>
<location filename="../src/ov_assembly/ExtractAssemblyRegionDialog.cpp" line="51"/>
@@ -4722,7 +4728,7 @@ Please, check external tools in the settings.</source>
<message>
<location filename="../src/ov_sequence/find_pattern/FindPatternWidget.cpp" line="712"/>
<source>The value is longer than the search region. Please input a shorter value or select another region!</source>
- <translation>Значение длинее чем регион поиска. Ввведите более короткое значение или выберите другой регион!</translation>
+ <translation>Значение длиннее чем регион поиска. Ввведите более короткое значение или выберите другой регион!</translation>
</message>
<message>
<location filename="../src/ov_sequence/find_pattern/FindPatternWidget.cpp" line="719"/>
@@ -4763,13 +4769,13 @@ Please, check external tools in the settings.</source>
<message>
<location filename="../src/ov_sequence/find_pattern/FindPatternWidget.cpp" line="741"/>
<source>Info: please input at least one sequence pattern to search for. Use %1 to input multiple patterns. Alternatively, load patterns from a FASTA file.</source>
- <translation>Инфрмация: добавьте как минимум одну подстроку для поиска. Используйте %1, чтобы ввести несколько подстрок. Также вы можете загрузить подстроки из FASTA файла.
+ <translation>Информация: добавьте как минимум одну подстроку для поиска. Используйте %1, чтобы ввести несколько подстрок. Также вы можете загрузить подстроки из FASTA файла.
</translation>
</message>
<message>
<location filename="../src/ov_sequence/find_pattern/FindPatternWidget.cpp" line="753"/>
- <source> Please input valid annotation names </source>
- <translation>Введите корректные имена аннотаций </translation>
+ <source> Please input valid annotation names. </source>
+ <translation>Введите корректные имена аннотаций. </translation>
</message>
<message>
<location filename="../src/ov_sequence/find_pattern/FindPatternWidget.cpp" line="759"/>
@@ -4779,7 +4785,7 @@ Please, check external tools in the settings.</source>
<message>
<location filename="../src/ov_sequence/find_pattern/FindPatternWidget.cpp" line="764"/>
<source> It will be automatically changed to acceptable name if 'Get annotations' button is pressed. </source>
- <translation> Они будут автоматически изменены на доступные имена после того как кнопка 'Получить аннотации' будет нажата. </translation>
+ <translation> Они будут автоматически изменены на доступные имена после того как кнопка 'Создать аннотации' будет нажата. </translation>
</message>
<message>
<location filename="../src/ov_sequence/find_pattern/FindPatternWidget.cpp" line="770"/>
@@ -5610,13 +5616,13 @@ Please, check external tools in the settings.</source>
</message>
<message>
<location filename="../src/ov_msa/MSAEditorSequenceArea.cpp" line="145"/>
- <source>Save subalignment</source>
- <translation>Сохранить область выравнивания</translation>
+ <source>Save subalignment...</source>
+ <translation>Экспортировать область выравнивания...</translation>
</message>
<message>
<location filename="../src/ov_msa/MSAEditorSequenceArea.cpp" line="149"/>
- <source>Save sequence</source>
- <translation>Сохранить последовательность</translation>
+ <source>Save sequence...</source>
+ <translation>Экспортировать последовательность...</translation>
</message>
<message>
<location filename="../src/ov_msa/MSAEditorSequenceArea.cpp" line="176"/>
@@ -5776,7 +5782,7 @@ All phylogenetic tree(s), opened in the same view, will be no more synchronized
<message>
<location filename="../src/ov_msa/PhyTrees/MSAEditorTreeViewer.cpp" line="231"/>
<source>Cancel</source>
- <translation>Отменить</translation>
+ <translation>Отмена</translation>
</message>
</context>
<context>
@@ -6623,34 +6629,39 @@ Simple overview is unavailable.</source>
<translation>Следующий</translation>
</message>
<message>
- <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="128"/>
- <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="137"/>
+ <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="50"/>
+ <source>Close</source>
+ <translation>Закрыть</translation>
+ </message>
+ <message>
+ <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="129"/>
+ <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="138"/>
<source>Search Complete</source>
<translation>Поиск завершен</translation>
</message>
<message>
- <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="129"/>
+ <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="130"/>
<source>The end of the annotation tree has been reached. Would you like to start the search from the beginning?</source>
<translation>Поиск завершен. Хотите начать сначала?</translation>
</message>
<message>
- <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="138"/>
+ <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="139"/>
<source>No results found</source>
<translation>Результатов не найдено</translation>
</message>
<message>
- <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="155"/>
- <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="159"/>
+ <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="156"/>
+ <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="160"/>
<source>Error!</source>
<translation>Ошибка!</translation>
</message>
<message>
- <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="155"/>
+ <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="156"/>
<source>Illegal qualifier name</source>
<translation>Некорректное имя квалификатора</translation>
</message>
<message>
- <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="159"/>
+ <location filename="../src/ov_sequence/SearchQualifierDialog.cpp" line="160"/>
<source>Illegal qualifier value</source>
<translation>Некорректное значение квалификатора</translation>
</message>
@@ -6658,12 +6669,27 @@ Simple overview is unavailable.</source>
<context>
<name>U2::SecStructDialog</name>
<message>
- <location filename="../src/util_sec_struct_predict/SecStructDialog.cpp" line="82"/>
+ <location filename="../src/util_sec_struct_predict/SecStructDialog.cpp" line="65"/>
+ <source>OK</source>
+ <translation>OK</translation>
+ </message>
+ <message>
+ <location filename="../src/util_sec_struct_predict/SecStructDialog.cpp" line="66"/>
+ <source>Cancel</source>
+ <translation>Отмена</translation>
+ </message>
+ <message>
+ <location filename="../src/util_sec_struct_predict/SecStructDialog.cpp" line="67"/>
+ <source>Save</source>
+ <translation>Сохранить</translation>
+ </message>
+ <message>
+ <location filename="../src/util_sec_struct_predict/SecStructDialog.cpp" line="86"/>
<source>Region</source>
<translation>Регион</translation>
</message>
<message>
- <location filename="../src/util_sec_struct_predict/SecStructDialog.cpp" line="83"/>
+ <location filename="../src/util_sec_struct_predict/SecStructDialog.cpp" line="87"/>
<source>Structure Type</source>
<translation>Тип структуры</translation>
</message>
diff --git a/src/include/U2Core/AnnotationCreationPattern.h b/src/include/U2Core/AnnotationCreationPattern.h
new file mode 100644
index 0000000..f5d415d
--- /dev/null
+++ b/src/include/U2Core/AnnotationCreationPattern.h
@@ -0,0 +1 @@
+#include "../../corelibs/U2Core/src/util/AnnotationCreationPattern.h"
diff --git a/src/include/U2Core/SnpeffDictionary.h b/src/include/U2Core/SnpeffDictionary.h
new file mode 100644
index 0000000..5e78acd
--- /dev/null
+++ b/src/include/U2Core/SnpeffDictionary.h
@@ -0,0 +1 @@
+#include "../../corelibs/U2Core/src/util/SnpeffDictionary.h"
\ No newline at end of file
diff --git a/src/include/U2Formats/ConvertSnpeffVariationsToAnnotationsTask.h b/src/include/U2Formats/ConvertSnpeffVariationsToAnnotationsTask.h
new file mode 100644
index 0000000..85e7660
--- /dev/null
+++ b/src/include/U2Formats/ConvertSnpeffVariationsToAnnotationsTask.h
@@ -0,0 +1 @@
+#include "../../corelibs/U2Formats/src/tasks/ConvertSnpeffVariationsToAnnotationsTask.h"
diff --git a/src/include/U2Formats/SnpeffInfoParser.h b/src/include/U2Formats/SnpeffInfoParser.h
new file mode 100644
index 0000000..c1fa5b7
--- /dev/null
+++ b/src/include/U2Formats/SnpeffInfoParser.h
@@ -0,0 +1 @@
+#include "../../corelibs/U2Formats/src/util/SnpeffInfoParser.h"
diff --git a/src/plugins/GUITestBase/src/GTUtilsWorkflowDesigner.cpp b/src/plugins/GUITestBase/src/GTUtilsWorkflowDesigner.cpp
index 7f96c01..396a23d 100644
--- a/src/plugins/GUITestBase/src/GTUtilsWorkflowDesigner.cpp
+++ b/src/plugins/GUITestBase/src/GTUtilsWorkflowDesigner.cpp
@@ -760,7 +760,11 @@ QList<WorkflowProcessItem*> GTUtilsWorkflowDesigner::getWorkers(HI::GUITestOpSta
}
#define GT_METHOD_NAME "setDatasetInputFile"
-void GTUtilsWorkflowDesigner::setDatasetInputFile(HI::GUITestOpStatus &os, QString filePath, QString fileName){
+void GTUtilsWorkflowDesigner::setDatasetInputFile(HI::GUITestOpStatus &os, const QString &folderPath, const QString &fileName) {
+ setDatasetInputFile(os, folderPath + "/" + fileName);
+}
+
+void GTUtilsWorkflowDesigner::setDatasetInputFile(GUITestOpStatus &os, const QString &filePath, bool pastePath) {
GTGlobals::sleep(200);
QWidget* DatasetWidget = GTWidget::findWidget(os, "DatasetWidget");
GT_CHECK(DatasetWidget, "DatasetWidget not found");
@@ -768,7 +772,9 @@ void GTUtilsWorkflowDesigner::setDatasetInputFile(HI::GUITestOpStatus &os, QStri
QWidget* addFileButton = GTWidget::findWidget(os, "addFileButton", DatasetWidget);
GT_CHECK(addFileButton, "addFileButton not found");
- GTFileDialogUtils *ob = new GTFileDialogUtils(os, filePath, fileName, GTFileDialogUtils::Open, GTGlobals::UseMouse);
+ GTFileDialogUtils::TextInput t = pastePath ? GTFileDialogUtils::CopyPaste : GTFileDialogUtils::Typing;
+
+ GTFileDialogUtils *ob = new GTFileDialogUtils(os, filePath, GTGlobals::UseMouse, GTFileDialogUtils::Open, t);
GTUtilsDialog::waitForDialog(os, ob);
GTWidget::click(os, addFileButton);
diff --git a/src/plugins/GUITestBase/src/GTUtilsWorkflowDesigner.h b/src/plugins/GUITestBase/src/GTUtilsWorkflowDesigner.h
index f436edb..24a6bd3 100644
--- a/src/plugins/GUITestBase/src/GTUtilsWorkflowDesigner.h
+++ b/src/plugins/GUITestBase/src/GTUtilsWorkflowDesigner.h
@@ -120,7 +120,8 @@ public:
static void createDataset(HI::GUITestOpStatus& os, QString datasetName = "");
//sets input file with path "filePath" and name "filename" dataset
//this method should be called after selecting worker which contains dataset on scene
- static void setDatasetInputFile(HI::GUITestOpStatus &os, QString filePath, QString fileName);
+ static void setDatasetInputFile(HI::GUITestOpStatus &os, const QString &folderPath, const QString &fileName);
+ static void setDatasetInputFile(HI::GUITestOpStatus &os, const QString &filePath, bool pastePath = false);
static void addInputFile(HI::GUITestOpStatus &os, const QString &elementName, const QString &url);
diff --git a/src/plugins/GUITestBase/src/GUITestBasePlugin.cpp b/src/plugins/GUITestBase/src/GUITestBasePlugin.cpp
index 5ffadaa..d8dce51 100644
--- a/src/plugins/GUITestBase/src/GUITestBasePlugin.cpp
+++ b/src/plugins/GUITestBase/src/GUITestBasePlugin.cpp
@@ -793,6 +793,7 @@ void GUITestBasePlugin::registerTests(UGUITestBase *guiTestBase) {
REGISTER_TEST(GUITest_regression_scenarios::test_2910_3);
REGISTER_TEST(GUITest_regression_scenarios::test_2923);
REGISTER_TEST(GUITest_regression_scenarios::test_2924);
+ REGISTER_TEST(GUITest_regression_scenarios::test_2927);
REGISTER_TEST(GUITest_regression_scenarios::test_2929);
REGISTER_TEST(GUITest_regression_scenarios::test_2930);
REGISTER_TEST(GUITest_regression_scenarios::test_2931);
@@ -1168,6 +1169,7 @@ void GUITestBasePlugin::registerTests(UGUITestBase *guiTestBase) {
REGISTER_TEST(GUITest_regression_scenarios::test_4689_2);
REGISTER_TEST(GUITest_regression_scenarios::test_4694);
+ REGISTER_TEST(GUITest_regression_scenarios::test_4700);
REGISTER_TEST(GUITest_regression_scenarios::test_4702_1);
REGISTER_TEST(GUITest_regression_scenarios::test_4702_2);
REGISTER_TEST(GUITest_regression_scenarios::test_4702_3);
@@ -1233,6 +1235,7 @@ void GUITestBasePlugin::registerTests(UGUITestBase *guiTestBase) {
REGISTER_TEST(GUITest_regression_scenarios::test_4936);
REGISTER_TEST(GUITest_regression_scenarios::test_4938);
REGISTER_TEST(GUITest_regression_scenarios::test_4938_1);
+ REGISTER_TEST(GUITest_regression_scenarios::test_4965);
REGISTER_TEST(GUITest_regression_scenarios::test_4966);
REGISTER_TEST(GUITest_regression_scenarios::test_4969_1);
REGISTER_TEST(GUITest_regression_scenarios::test_4969_2);
@@ -1249,8 +1252,9 @@ if(QSysInfo::WordSize == 64){
}
REGISTER_TEST(GUITest_regression_scenarios::test_5027_2);
REGISTER_TEST(GUITest_regression_scenarios::test_5029);
+ REGISTER_TEST(GUITest_regression_scenarios::test_5039);
REGISTER_TEST(GUITest_regression_scenarios::test_5052);
- REGISTER_TEST(GUITest_regression_scenarios::test_5079);
+ REGISTER_TEST(GUITest_regression_scenarios::test_5069);
REGISTER_TEST(GUITest_regression_scenarios::test_5082);
REGISTER_TEST(GUITest_regression_scenarios::test_5090);
@@ -1270,6 +1274,14 @@ if (QSysInfo::WordSize == 32) {
REGISTER_TEST(GUITest_regression_scenarios::test_5268);
REGISTER_TEST(GUITest_regression_scenarios::test_5278);
REGISTER_TEST(GUITest_regression_scenarios::test_5295);
+ REGISTER_TEST(GUITest_regression_scenarios::test_5352);
+ REGISTER_TEST(GUITest_regression_scenarios::test_5356);
+ REGISTER_TEST(GUITest_regression_scenarios::test_5360);
+ REGISTER_TEST(GUITest_regression_scenarios::test_5363_1);
+ REGISTER_TEST(GUITest_regression_scenarios::test_5363_2);
+ REGISTER_TEST(GUITest_regression_scenarios::test_5367);
+
+ REGISTER_TEST(GUITest_regression_scenarios::test_5314);
//////////////////////////////////////////////////////////////////////////
// Common scenarios/project/
diff --git a/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_2001_3000.cpp b/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_2001_3000.cpp
index 7b99bd5..00f1e67 100644
--- a/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_2001_3000.cpp
+++ b/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_2001_3000.cpp
@@ -5916,6 +5916,27 @@ GUI_TEST_CLASS_DEFINITION(test_2924) {
}
}
+GUI_TEST_CLASS_DEFINITION(test_2927) {
+// 1. Open file "data/samples/PDB/1CF7.PDB".
+ GTFileDialog::openFile(os, dataDir + "samples/PDB/1CF7.PDB");
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+
+// 2. Open file "data/samples/PDB/1CRN.PDB".
+ GTFileDialog::openFile(os, dataDir + "samples/PDB/1CRN.PDB");
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+
+// 3. Remove first document with delete key.
+ GTUtilsProjectTreeView::click(os, "1CF7.PDB");
+ GTKeyboardDriver::keyClick(Qt::Key_Delete);
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+
+// 4. Press "Open" toolBar button.
+// Expected state: UGENE doesn't crash.
+ GTUtilsDialog::waitForDialog(os, new GTFileDialogUtils(os, dataDir + "samples/PDB/1CF7.PDB"));
+ GTToolbar::clickButtonByTooltipOnToolbar(os, MWTOOLBAR_MAIN, "Open");
+ GTGlobals::sleep();
+}
+
GUI_TEST_CLASS_DEFINITION(test_2929){
// 1. Open "human_T1.fa".
// 2. Click the "Find TFBS with SITECON" button on the main toolbar
diff --git a/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_2001_3000.h b/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_2001_3000.h
index 3803753..b6798ee 100644
--- a/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_2001_3000.h
+++ b/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_2001_3000.h
@@ -231,6 +231,7 @@ GUI_TEST_CLASS_DECLARATION(test_2910_2)
GUI_TEST_CLASS_DECLARATION(test_2910_3)
GUI_TEST_CLASS_DECLARATION(test_2923)
GUI_TEST_CLASS_DECLARATION(test_2924)
+GUI_TEST_CLASS_DECLARATION(test_2927)
GUI_TEST_CLASS_DECLARATION(test_2929)
GUI_TEST_CLASS_DECLARATION(test_2930)
GUI_TEST_CLASS_DECLARATION(test_2931)
diff --git a/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_4001_5000.cpp b/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_4001_5000.cpp
index 169958d..2a230a5 100644
--- a/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_4001_5000.cpp
+++ b/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_4001_5000.cpp
@@ -3727,6 +3727,26 @@ GUI_TEST_CLASS_DEFINITION(test_4694) {
CHECK_SET_ERR(!undo->isEnabled(), "Button should be disabled");
}
+GUI_TEST_CLASS_DEFINITION(test_4700) {
+ //1. Open assembly
+ GTFileDialog::openFile(os, testDir + "_common_data/scenarios/_regression/4700/", "almost-empty.ugenedb");
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+ GTWidget::click(os, GTUtilsMdi::activeWindow(os));
+ //2. Find area without reads
+ for (int i = 0;i < 24;i++) {
+ GTKeyboardDriver::keyClick('=', Qt::ShiftModifier);
+ GTGlobals::sleep(100);
+ }
+ GTGlobals::sleep(2000);
+
+ GTKeyboardDriver::keyClick(Qt::Key_Home);
+ GTGlobals::sleep(2000);
+ //3. Export visible reads
+ GTUtilsDialog::waitForDialog(os, new PopupChooserByText(os, QStringList() << "Export" << "Visible reads as sequences"));
+ GTUtilsDialog::waitForDialog(os, new MessageBoxDialogFiller(os, QMessageBox::Ok));
+ GTUtilsAssemblyBrowser::callContextMenu(os, GTUtilsAssemblyBrowser::Reads);
+}
+
GUI_TEST_CLASS_DEFINITION(test_4702_1) {
// 1. Open "samples/Genbank/NC_014267.1.gb"
GTFileDialog::openFile(os, dataDir + "samples/Genbank/NC_014267.1.gb");
@@ -4291,12 +4311,12 @@ GUI_TEST_CLASS_DEFINITION(test_4764_1) {
//3. Select region with edited sequences, one of sequences should starts with gap
GTUtilsMSAEditorSequenceArea::selectArea(os, QPoint(5,5), QPoint(16, 9));
-
+
//4. Copy this subalignment
GTUtilsDialog::waitForDialog(os, new PopupChooserByText(os, QStringList() << "Copy/Paste" << "Copy formatted"));
GTUtilsMSAEditorSequenceArea::callContextMenu(os);
GTGlobals::sleep();
-
+
QMainWindow* mw = AppContext::getMainWindow()->getQMainWindow();
MSAEditor* editor = mw->findChild<MSAEditor*>();
QWidget *nameListWidget = editor->getUI()->getEditorNameList();
@@ -4334,7 +4354,7 @@ GUI_TEST_CLASS_DEFINITION(test_4764_2) {
QMainWindow* mw = AppContext::getMainWindow()->getQMainWindow();
MSAEditor* editor = mw->findChild<MSAEditor*>();
QWidget *sequenceAreaWidget = editor->getUI()->getSequenceArea();
-
+
GTUtilsMSAEditorSequenceArea::selectArea(os, QPoint(0, 0), QPoint(15, 0));
GTUtilsDialog::waitForDialog(os, new PopupChooserByText(os, QStringList() << "Copy/Paste" << "Copy selection"));
GTWidget::click(os, sequenceAreaWidget, Qt::RightButton);
@@ -5264,6 +5284,46 @@ GUI_TEST_CLASS_DEFINITION(test_4938_1) {
CHECK_SET_ERR(!annTree->findItems("CDS", Qt::MatchExactly).isEmpty(), "CDS item is missing in OP_ANNOT_HIGHLIGHT_TREE");
}
+GUI_TEST_CLASS_DEFINITION(test_4965) {
+ // 1. Open any assembly
+ // 2. Right button click on the assembly object in the project view
+ // 3. Export/Import --> Export object
+ // Expected state: 'Compress' checkbox is disabled for all assembly formats
+
+ GTFileDialog::openFile(os, testDir + "_common_data/ugenedb/toy.sam.bam.ugenedb");
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+
+ class CheckCompressFiller : public Filler {
+ public:
+ CheckCompressFiller(GUITestOpStatus &os)
+ : Filler(os, "ExportDocumentDialog") {}
+ virtual void run() {
+ QWidget *dialog = QApplication::activeModalWidget();
+ CHECK_SET_ERR(dialog != NULL, "dialog not found");
+
+ QComboBox *comboBox = dialog->findChild<QComboBox*>("formatCombo");
+ CHECK_SET_ERR(comboBox != NULL, "ComboBox not found");
+
+ QCheckBox *compressCheckBox = dialog->findChild<QCheckBox*>(QString::fromUtf8("compressCheck"));
+ CHECK_SET_ERR(compressCheckBox != NULL, "Check box not found");
+
+ QStringList checkFormats;
+ checkFormats << "BAM" << "SAM" << "UGENE Database";
+ foreach (const QString& format, checkFormats) {
+ GTComboBox::setIndexWithText(os, comboBox, format, true);
+ CHECK_SET_ERR(!compressCheckBox->isEnabled(),
+ QString("Compress checkbox is unexpectedly enabled for '%1' format!").arg(format));
+ }
+
+ GTUtilsDialog::clickButtonBox(os, dialog, QDialogButtonBox::Cancel);
+ }
+
+ };
+
+ GTUtilsDialog::waitForDialog(os, new CheckCompressFiller(os));
+ GTUtilsDialog::waitForDialog(os, new PopupChooserByText(os, QStringList() << "Export/Import" << "Export object..."));
+ GTUtilsProjectTreeView::click(os, "ref", Qt::RightButton);
+}
GUI_TEST_CLASS_DEFINITION(test_4966) {
//GTLogTracer l;
diff --git a/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_4001_5000.h b/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_4001_5000.h
index 20229b8..47605d1 100644
--- a/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_4001_5000.h
+++ b/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_4001_5000.h
@@ -158,6 +158,7 @@ GUI_TEST_CLASS_DECLARATION(test_4689_1)
GUI_TEST_CLASS_DECLARATION(test_4689_2)
GUI_TEST_CLASS_DECLARATION(test_4694)
+GUI_TEST_CLASS_DECLARATION(test_4700)
GUI_TEST_CLASS_DECLARATION(test_4702_1)
GUI_TEST_CLASS_DECLARATION(test_4702_2)
GUI_TEST_CLASS_DECLARATION(test_4702_3)
@@ -224,6 +225,7 @@ GUI_TEST_CLASS_DECLARATION(test_4934)
GUI_TEST_CLASS_DECLARATION(test_4936)
GUI_TEST_CLASS_DECLARATION(test_4938)
GUI_TEST_CLASS_DECLARATION(test_4938_1)
+GUI_TEST_CLASS_DECLARATION(test_4965)
GUI_TEST_CLASS_DECLARATION(test_4966)
GUI_TEST_CLASS_DECLARATION(test_4969_1)
GUI_TEST_CLASS_DECLARATION(test_4969_2)
diff --git a/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_5001_6000.cpp b/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_5001_6000.cpp
index 48c6563..0d3cd35 100644
--- a/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_5001_6000.cpp
+++ b/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_5001_6000.cpp
@@ -91,18 +91,23 @@
#include "GTUtilsTaskTreeView.h"
#include "GTUtilsWizard.h"
#include "GTUtilsWorkflowDesigner.h"
+#include "runnables/ugene/corelibs/U2Gui/ImportBAMFileDialogFiller.h"
#include "runnables/ugene/corelibs/U2Gui/PredictSecondaryStructureDialogFiller.h"
+#include "runnables/ugene/corelibs/U2View/ov_assembly/ExportCoverageDialogFiller.h"
#include "runnables/ugene/corelibs/U2View/ov_msa/DistanceMatrixDialogFiller.h"
#include "runnables/ugene/corelibs/U2View/ov_msa/GenerateAlignmentProfileDialogFiller.h"
#include "runnables/ugene/corelibs/U2View/ov_msa/LicenseAgreementDialogFiller.h"
#include "runnables/ugene/plugins/dna_export/ExportSequencesDialogFiller.h"
#include "runnables/ugene/plugins/enzymes/DigestSequenceDialogFiller.h"
#include "runnables/ugene/plugins/enzymes/FindEnzymesDialogFiller.h"
+#include "runnables/ugene/plugins/external_tools/FormatDBDialogFiller.h"
+#include "runnables/ugene/plugins/external_tools/BlastAllSupportDialogFiller.h"
#include "runnables/ugene/plugins/pcr/ImportPrimersDialogFiller.h"
#include "runnables/ugene/plugins_3rdparty/umuscle/MuscleDialogFiller.h"
#include "runnables/ugene/ugeneui/SaveProjectDialogFiller.h"
#include "runnables/ugene/ugeneui/SequenceReadingModeSelectorDialogFiller.h"
+
namespace U2 {
namespace GUITest_regression_scenarios {
@@ -332,23 +337,7 @@ GUI_TEST_CLASS_DEFINITION(test_5029) {
CHECK_SET_ERR( numPlugins > 10, QString("Not all plugins were loaded. Loaded %1 plugins").arg(numPlugins));
}
-GUI_TEST_CLASS_DEFINITION(test_5052) {
- //1. Open "samples/Genbank/murine.gb".
- GTFileDialog::openFile(os, dataDir + "samples/Genbank/murine.gb");
- //2. Close the opened sequence view.
- GTGlobals::FindOptions findOptions;
- findOptions.matchPolicy = Qt::MatchContains;
- GTUtilsMdi::closeWindow(os, "NC_", findOptions);
- //3. Click "murine.gb" on Start Page.
- GTUtilsStartPage::clickResentDocument(os, "murine.gb");
- //Expected: The file is loaded, the view is opened.
- GTUtilsTaskTreeView::waitTaskFinished(os);
- CHECK_SET_ERR(GTUtilsDocument::isDocumentLoaded(os, "murine.gb"), "The file is not loaded");
- QString title = GTUtilsMdi::activeWindowTitle(os);
- CHECK_SET_ERR(title.contains("NC_"), "Wrong MDI window is active");
-}
-
-GUI_TEST_CLASS_DEFINITION(test_5079) {
+GUI_TEST_CLASS_DEFINITION(test_5039) {
//1. Open "COI.aln".
GTFileDialog::openFile(os, dataDir + "samples/CLUSTALW", "COI.aln");
GTUtilsTaskTreeView::waitTaskFinished(os);
@@ -381,6 +370,41 @@ GUI_TEST_CLASS_DEFINITION(test_5079) {
CHECK_SET_ERR(!l.hasError(), "unexpected errors in log");
}
+GUI_TEST_CLASS_DEFINITION(test_5052) {
+ //1. Open "samples/Genbank/murine.gb".
+ GTFileDialog::openFile(os, dataDir + "samples/Genbank/murine.gb");
+ //2. Close the opened sequence view.
+ GTGlobals::FindOptions findOptions;
+ findOptions.matchPolicy = Qt::MatchContains;
+ GTUtilsMdi::closeWindow(os, "NC_", findOptions);
+ //3. Click "murine.gb" on Start Page.
+ GTUtilsStartPage::clickResentDocument(os, "murine.gb");
+ //Expected: The file is loaded, the view is opened.
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+ CHECK_SET_ERR(GTUtilsDocument::isDocumentLoaded(os, "murine.gb"), "The file is not loaded");
+ QString title = GTUtilsMdi::activeWindowTitle(os);
+ CHECK_SET_ERR(title.contains("NC_"), "Wrong MDI window is active");
+}
+
+GUI_TEST_CLASS_DEFINITION(test_5069) {
+// 1. Load workflow "_common_data/regression/5069/crash.uwl".
+ GTUtilsWorkflowDesigner::openWorkflowDesigner(os);
+ GTUtilsWorkflowDesigner::loadWorkflow(os, testDir + "_common_data/regression/5069/crash.uwl");
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+
+// 2. Set "data/samples/Genbank/murine.gb" as input.
+ GTUtilsWorkflowDesigner::click(os, "Read Sequence");
+ GTUtilsWorkflowDesigner::setDatasetInputFile(os, dataDir + "samples/Genbank/murine.gb");
+
+// 3. Launch workflow.
+// Expected state: UGENE doesn't crash.
+ GTUtilsWorkflowDesigner::runWorkflow(os);
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+
+ const bool areThereProblems = GTUtilsDashboard::areThereProblems(os);
+ CHECK_SET_ERR(!areThereProblems, "Workflow has finished with problems");
+}
+
GUI_TEST_CLASS_DEFINITION(test_5082) {
GTLogTracer l;
// 1. Open "_common_data/clustal/big.aln".
@@ -728,6 +752,220 @@ GUI_TEST_CLASS_DEFINITION(test_5295) {
CHECK_SET_ERR(colors.size() > 1, "Biostruct was not drawn after renderer change");
}
+GUI_TEST_CLASS_DEFINITION(test_5314) {
+ //1. Open "data/samples/Genbank/CVU55762.gb".
+ //2. Search any enzyme on the whole sequence.
+ //3. Open "data/samples/ABIF/A01.abi".
+ GTFileDialog::openFile(os, testDir + "_common_data/genbank/CVU55762.gb");
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+
+ const QStringList defaultEnzymes = QStringList() << "ClaI";
+ GTUtilsDialog::waitForDialog(os, new PopupChooser(os, QStringList() << "ADV_MENU_ANALYSE" << "Find restriction sites"));
+ GTUtilsDialog::waitForDialog(os, new FindEnzymesDialogFiller(os, defaultEnzymes));
+ GTMenu::showContextMenu(os, GTUtilsSequenceView::getSeqWidgetByNumber(os));
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+ GTLogTracer lt;
+ GTFileDialog::openFile(os, testDir + "_common_data/abif/A01.abi");
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+ GTGlobals::sleep();
+ CHECK_SET_ERR(!lt.hasError(), "Log shouldn't contain errors");
+}
+
+GUI_TEST_CLASS_DEFINITION(test_5352) {
+// 1. Open WD
+// 2. Open any sample (e.g. Align with MUSCLE)
+// 3. Remove some elements and set input data
+// 4. Run the workflow
+// 5. Click "Load dashboard workflow"
+// Expected state: message box about workflow modification appears
+// 6. Click "Close without saving"
+// Expected state: the launched workflow is loaded successfully, no errors
+
+ GTLogTracer l;
+
+ GTUtilsWorkflowDesigner::openWorkflowDesigner(os);
+ GTUtilsWorkflowDesigner::addSample(os, "Align sequences with MUSCLE");
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+
+ GTUtilsWorkflowDesigner::removeItem(os, "Align with MUSCLE");
+
+ WorkflowProcessItem* read = GTUtilsWorkflowDesigner::getWorker(os, "Read alignment");
+ WorkflowProcessItem* write = GTUtilsWorkflowDesigner::getWorker(os, "Write alignment");
+ GTUtilsWorkflowDesigner::connect(os, read, write);
+
+ GTUtilsWorkflowDesigner::click(os, "Read alignment");
+ GTUtilsWorkflowDesigner::addInputFile(os, "Read alignment", dataDir + "samples/CLUSTALW/COI.aln");
+
+ GTUtilsWorkflowDesigner::runWorkflow(os);
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+
+ GTUtilsDialog::waitForDialog(os, new MessageBoxDialogFiller(os, "Close without Saving"));
+ HIWebElement element = GTUtilsDashboard::findElement(os, "", "BUTTON");
+ GTUtilsDashboard::click(os, element);
+
+ CHECK_SET_ERR(!l.hasError(), "There is and error in the log");
+}
+
+GUI_TEST_CLASS_DEFINITION(test_5356) {
+// 1. Open WD
+// 2. Create workflow: "Read FASTQ" --> "Cut Adapter" --> "FastQC"
+// (open _common_data/regression/5356/cutadapter_and_trim.uwl)
+// 3. Set input data:
+// reads - _common_data/regression/5356/reads.fastq
+// adapter file - _common_data/regression/5356/adapter.fa
+// 4. Run the workflow
+// Expected state: no errors in the log (empty sequences were skipped by CutAdapter)
+
+ GTLogTracer l;
+
+ GTUtilsWorkflowDesigner::openWorkflowDesigner(os);
+ GTUtilsWorkflowDesigner::loadWorkflow(os, testDir + "_common_data/regression/5356/cutadapt_and_trim.uwl");
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+
+ GTUtilsWorkflowDesigner::addInputFile(os, "Read FASTQ Files with Reads 1", testDir + "_common_data/regression/5356/reads.fastq");
+
+ GTUtilsWorkflowDesigner::click(os, "Cut Adapter");
+ GTUtilsWorkflowDesigner::setParameter(os, "FASTA file with adapters", QDir(testDir + "_common_data/regression/5356/adapter.fa").absolutePath(), GTUtilsWorkflowDesigner::textValue);
+ GTUtilsWorkflowDesigner::setParameter(os, "Output directory", "Custom", GTUtilsWorkflowDesigner::comboValue);
+ GTUtilsWorkflowDesigner::setParameter(os, "Custom directory", QDir(sandBoxDir).absolutePath(), GTUtilsWorkflowDesigner::textValue);
+
+ GTUtilsWorkflowDesigner::runWorkflow(os);
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+
+ CHECK_SET_ERR(!l.hasError(), "There is an error in the log");
+}
+
+GUI_TEST_CLASS_DEFINITION(test_5360) {
+ //1. Open scheme _common_data / scenarios / _regression / 5360 / 5360.uwl
+ //
+ //2. Set input fastq file located with path containing non ASCII symbols
+ //
+ //3. Run workflow
+ //Expected state : workflow runs without errors.
+ GTUtilsWorkflowDesigner::openWorkflowDesigner(os);
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+ GTUtilsWorkflowDesigner::loadWorkflow(os, testDir + "_common_data/scenarios/_regression/5360/5360.uwl");
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+
+ GTUtilsWorkflowDesigner::click(os, "Read FASTQ Files with Reads");
+ GTUtilsWorkflowDesigner::setDatasetInputFile(os, testDir + QString::fromLocal8Bit("_common_data/scenarios/_regression/5360/папка/риды.fastq"), true);
+
+ GTLogTracer lt;
+ GTUtilsWorkflowDesigner::runWorkflow(os);
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+ CHECK_SET_ERR(!lt.hasError(), "There is an error in the log");
+}
+
+GUI_TEST_CLASS_DEFINITION(test_5363_1) {
+// 1. {Tools --> BLAST --> BLAST make database}
+// 2. Set murine.gb as input file
+// 3. Check nucleotide radiobutton
+// 4. Create database
+// Expected state: database was successfully created
+// 5. Open murine.gb
+// 6. {Analyze --> Query with local BLAST}
+// 7. Select the created database and accept the dialog
+// Expected state: blast annotations were found and the annotations locations are equal to 'hit-from' and 'hit-to' qualifier values
+
+ FormatDBSupportRunDialogFiller::Parameters parametersDB;
+ parametersDB.inputFilePath = dataDir + "/samples/Genbank/murine.gb";
+ parametersDB.outputDirPath = sandBoxDir;
+ GTUtilsDialog::waitForDialog(os, new FormatDBSupportRunDialogFiller(os, parametersDB));
+ GTMenu::clickMainMenuItem(os, QStringList() << "Tools" << "BLAST" << "BLAST make database...");
+
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+
+ GTFileDialog::openFile(os, dataDir + "/samples/Genbank/murine.gb");
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+
+ BlastAllSupportDialogFiller::Parameters parametersSearch;
+ parametersSearch.runBlast = true;
+ parametersSearch.dbPath = sandBoxDir + "/murine.nin";
+
+ GTUtilsDialog::waitForDialog(os, new BlastAllSupportDialogFiller(parametersSearch, os));
+ GTMenu::clickMainMenuItem(os, QStringList() << "Actions" << "Analyze" << "Query with local BLAST...");
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+
+ QTreeWidgetItem* treeItem = GTUtilsAnnotationsTreeView::findItem(os, "blast result");
+ CHECK_SET_ERR(treeItem != NULL, "blast result annotations not found");
+ bool ok;
+ int hitFrom = GTUtilsAnnotationsTreeView::getQualifierValue(os, "hit-to", treeItem).toInt(&ok);
+ CHECK_SET_ERR(ok, "Cannot get hit-to qualifier value");
+
+ int hitTo = GTUtilsAnnotationsTreeView::getQualifierValue(os, "hit-from", treeItem).toInt(&ok);
+ CHECK_SET_ERR(ok, "Cannot get hit-from qualifier value");
+
+ CHECK_SET_ERR(GTUtilsAnnotationsTreeView::findRegion(os, "blast result", U2Region(hitFrom, hitTo - hitFrom)),
+ QString("Cannot find blast result [%1, %2]").arg(hitFrom).arg(hitTo));
+}
+
+GUI_TEST_CLASS_DEFINITION(test_5363_2) {
+// 1. {Tools --> BLAST --> BLAST+ make database}
+// 2. Set murine.gb as input file
+// 3. Check nucleotide radiobutton
+// 4. Create database
+// Expected state: database was successfully created
+// 5. Open murine.gb
+// 6. {Analyze --> Query with local BLAST+}
+// 7. Select the created database and accept the dialog
+// Expected state: blast annotations were found and the annotations locations are equal to 'hit-from' and 'hit-to' qualifier values
+
+ FormatDBSupportRunDialogFiller::Parameters parametersDB;
+ parametersDB.inputFilePath = dataDir + "/samples/Genbank/murine.gb";
+ parametersDB.outputDirPath = sandBoxDir;
+ GTUtilsDialog::waitForDialog(os, new FormatDBSupportRunDialogFiller(os, parametersDB));
+ GTMenu::clickMainMenuItem(os, QStringList() << "Tools" << "BLAST" << "BLAST+ make database...");
+
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+
+ GTFileDialog::openFile(os, dataDir + "/samples/Genbank/murine.gb");
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+
+ BlastAllSupportDialogFiller::Parameters parametersSearch;
+ parametersSearch.runBlast = true;
+ parametersSearch.dbPath = sandBoxDir + "/murine.nin";
+
+ GTUtilsDialog::waitForDialog(os, new BlastAllSupportDialogFiller(parametersSearch, os));
+ GTMenu::clickMainMenuItem(os, QStringList() << "Actions" << "Analyze" << "Query with local BLAST+...");
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+
+ QTreeWidgetItem* treeItem = GTUtilsAnnotationsTreeView::findItem(os, "blast result");
+ CHECK_SET_ERR(treeItem != NULL, "blast result annotations not found");
+ bool ok;
+ int hitFrom = GTUtilsAnnotationsTreeView::getQualifierValue(os, "hit-to", treeItem).toInt(&ok);
+ CHECK_SET_ERR(ok, "Cannot get hit-to qualifier value");
+
+ int hitTo = GTUtilsAnnotationsTreeView::getQualifierValue(os, "hit-from", treeItem).toInt(&ok);
+ CHECK_SET_ERR(ok, "Cannot get hit-from qualifier value");
+
+ CHECK_SET_ERR(GTUtilsAnnotationsTreeView::findRegion(os, "blast result", U2Region(hitFrom, hitTo - hitFrom)),
+ QString("Cannot find blast result [%1, %2]").arg(hitFrom).arg(hitTo));
+}
+
+GUI_TEST_CLASS_DEFINITION(test_5367) {
+// 1. Open "_common_data/bam/accepted_hits_with_gaps.bam"
+// 2. Export coverage in 'Per base' format
+// Expected state: gaps are not considered "to cover, the result file is qual to "_common_data/bam/accepted_hits_with_gaps_coverage.txt"
+
+ GTUtilsDialog::waitForDialog(os, new ImportBAMFileFiller(os, sandBoxDir + "/test_5367.ugenedb"));
+ GTFileDialog::openFile(os, testDir + "_common_data/bam/accepted_hits_with_gaps.bam");
+
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+
+ QList<ExportCoverageDialogFiller::Action> actions;
+ actions << ExportCoverageDialogFiller::Action(ExportCoverageDialogFiller::SetFormat, "Per base");
+ actions << ExportCoverageDialogFiller::Action(ExportCoverageDialogFiller::EnterFilePath, QDir(sandBoxDir).absolutePath() + "/test_5367_coverage.txt");
+ actions << ExportCoverageDialogFiller::Action(ExportCoverageDialogFiller::ClickOk, QVariant());
+
+ GTUtilsDialog::waitForDialog(os, new ExportCoverageDialogFiller(os, actions) );
+ GTUtilsDialog::waitForDialog(os, new PopupChooserByText(os, QStringList() << "Export coverage..."));
+ GTUtilsAssemblyBrowser::callContextMenu(os);
+
+ GTUtilsTaskTreeView::waitTaskFinished(os);
+
+ CHECK_SET_ERR(GTFile::equals(os, sandBoxDir + "/test_5367_coverage.txt", testDir + "/_common_data/bam/accepted_hits_with_gaps_coverage.txt"), "Exported coverage is wrong!");
+}
+
} // namespace GUITest_regression_scenarios
} // namespace U2
diff --git a/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_5001_6000.h b/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_5001_6000.h
index 7b47cac..1437695 100644
--- a/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_5001_6000.h
+++ b/src/plugins/GUITestBase/src/tests/regression_scenarios/GTTestsRegressionScenarios_5001_6000.h
@@ -39,8 +39,9 @@ GUI_TEST_CLASS_DECLARATION(test_5018)
GUI_TEST_CLASS_DECLARATION(test_5027_1)
GUI_TEST_CLASS_DECLARATION(test_5027_2)
GUI_TEST_CLASS_DECLARATION(test_5029)
+GUI_TEST_CLASS_DECLARATION(test_5039)
GUI_TEST_CLASS_DECLARATION(test_5052)
-GUI_TEST_CLASS_DECLARATION(test_5079)
+GUI_TEST_CLASS_DECLARATION(test_5069)
GUI_TEST_CLASS_DECLARATION(test_5082)
GUI_TEST_CLASS_DECLARATION(test_5090)
@@ -58,6 +59,14 @@ GUI_TEST_CLASS_DECLARATION(test_5227)
GUI_TEST_CLASS_DECLARATION(test_5268)
GUI_TEST_CLASS_DECLARATION(test_5278)
GUI_TEST_CLASS_DECLARATION(test_5295)
+GUI_TEST_CLASS_DECLARATION(test_5352)
+GUI_TEST_CLASS_DECLARATION(test_5356)
+GUI_TEST_CLASS_DECLARATION(test_5360)
+GUI_TEST_CLASS_DECLARATION(test_5363_1)
+GUI_TEST_CLASS_DECLARATION(test_5363_2)
+GUI_TEST_CLASS_DECLARATION(test_5367)
+
+GUI_TEST_CLASS_DECLARATION(test_5314)
#undef GUI_TEST_SUITE
diff --git a/src/plugins/annotator/src/CollocationsDialogController.cpp b/src/plugins/annotator/src/CollocationsDialogController.cpp
index 1bc240c..9fd8ea7 100644
--- a/src/plugins/annotator/src/CollocationsDialogController.cpp
+++ b/src/plugins/annotator/src/CollocationsDialogController.cpp
@@ -57,7 +57,7 @@ CollocationsDialogController::CollocationsDialogController(QStringList _names, A
task = NULL;
qSort(allNames);
setupUi(this);
- new HelpButton(this, buttonBox, "17470653");
+ new HelpButton(this, buttonBox, "18220513");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Search"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/annotator/src/CustomAutoAnnotationDialog.cpp b/src/plugins/annotator/src/CustomAutoAnnotationDialog.cpp
index 3b1dc8a..1643fe7 100644
--- a/src/plugins/annotator/src/CustomAutoAnnotationDialog.cpp
+++ b/src/plugins/annotator/src/CustomAutoAnnotationDialog.cpp
@@ -43,7 +43,7 @@ CustomAutoAnnotationDialog::CustomAutoAnnotationDialog(ADVSequenceObjectContext*
: QDialog(ctx->getAnnotatedDNAView()->getWidget()), seqCtx(ctx)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470732");
+ new HelpButton(this, buttonBox, "18220592");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Annotate"));
loadSettings();
diff --git a/src/plugins/api_tests/src/core/datatype/annotations/AnnotationUnitTests.cpp b/src/plugins/api_tests/src/core/datatype/annotations/AnnotationUnitTests.cpp
index 7196a2e..15ed502 100644
--- a/src/plugins/api_tests/src/core/datatype/annotations/AnnotationUnitTests.cpp
+++ b/src/plugins/api_tests/src/core/datatype/annotations/AnnotationUnitTests.cpp
@@ -216,7 +216,7 @@ IMPLEMENT_TEST(AnnotationUnitTest, getSet_Qualifiers) {
CHECK_TRUE(annotation->getQualifiers() == initQuals, "Unexpected annotation's qualifiers");
- U2Qualifier supplementedQual("New Key", "New Value");
+ U2Qualifier supplementedQual("New_Key", "New Value");
annotation->addQualifier(supplementedQual);
CHECK_TRUE(annotation->getQualifiers() == (QVector<U2Qualifier>(initQuals) << supplementedQual), "Unexpected annotation's qualifiers");
diff --git a/src/plugins/api_tests/src/core/dbi/assembly/AssemblyDbiUnitTests.cpp b/src/plugins/api_tests/src/core/dbi/assembly/AssemblyDbiUnitTests.cpp
index 1df60bc..d80b81b 100644
--- a/src/plugins/api_tests/src/core/dbi/assembly/AssemblyDbiUnitTests.cpp
+++ b/src/plugins/api_tests/src/core/dbi/assembly/AssemblyDbiUnitTests.cpp
@@ -555,10 +555,10 @@ void AssemblyDbiUnitTests_calculateCoverage::Test() {
const U2DataId& id = AssemblyTestData::getAssemblyIds()->first();
const U2Region& region = U2Region(20, 1);
U2AssemblyCoverageStat c;
- c.coverage.resize(1);
+ c.coverage->resize(1);
assemblyDbi->calculateCoverage(id, region, c, os);
CHECK_NO_ERROR(os);
- int res = c.coverage.first().maxValue;
+ int res = c.coverage->first().maxValue;
CHECK_TRUE(res == 1, "incorrect calculate Coverage");
}
diff --git a/src/plugins/biostruct3d_view/src/SelectModelsDialog.cpp b/src/plugins/biostruct3d_view/src/SelectModelsDialog.cpp
index a80f122..9116773 100644
--- a/src/plugins/biostruct3d_view/src/SelectModelsDialog.cpp
+++ b/src/plugins/biostruct3d_view/src/SelectModelsDialog.cpp
@@ -36,7 +36,7 @@ SelectModelsDialog::SelectModelsDialog(const QList<int> &_modelIds, const QList<
: QDialog(parent), Ui_SelectModelsDialog()
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470524");
+ new HelpButton(this, buttonBox, "18220384");
buttonBox_1->button(QDialogButtonBox::Cancel)->setText(::U2::SelectModelsDialog::tr("All"));
buttonBox_1->button(QDialogButtonBox::No)->setText(::U2::SelectModelsDialog::tr("Invert"));
buttonBox->button(QDialogButtonBox::Ok)->setText(::U2::SelectModelsDialog::tr("OK"));
diff --git a/src/plugins/biostruct3d_view/src/SettingsDialog.cpp b/src/plugins/biostruct3d_view/src/SettingsDialog.cpp
index efb49e2..d83a474 100644
--- a/src/plugins/biostruct3d_view/src/SettingsDialog.cpp
+++ b/src/plugins/biostruct3d_view/src/SettingsDialog.cpp
@@ -32,7 +32,7 @@ BioStruct3DSettingsDialog::BioStruct3DSettingsDialog()
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470521");
+ new HelpButton(this, buttonBox, "18220381");
glWidget = NULL;
initColorSchemes();
diff --git a/src/plugins/biostruct3d_view/src/StructuralAlignmentDialog.cpp b/src/plugins/biostruct3d_view/src/StructuralAlignmentDialog.cpp
index e6b23d1..1aed531 100644
--- a/src/plugins/biostruct3d_view/src/StructuralAlignmentDialog.cpp
+++ b/src/plugins/biostruct3d_view/src/StructuralAlignmentDialog.cpp
@@ -56,7 +56,7 @@ StructuralAlignmentDialog::StructuralAlignmentDialog(const BioStruct3DObject *fi
: QDialog(parent), task(0)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470527");
+ new HelpButton(this, buttonBox, "18220387");
StructuralAlignmentAlgorithmRegistry *reg = AppContext::getStructuralAlignmentAlgorithmRegistry();
foreach (const QString &id, reg->getFactoriesIds()) {
diff --git a/src/plugins/biostruct3d_view/src/deprecated/SelectModelsDialog.cpp b/src/plugins/biostruct3d_view/src/deprecated/SelectModelsDialog.cpp
index a80f122..9116773 100644
--- a/src/plugins/biostruct3d_view/src/deprecated/SelectModelsDialog.cpp
+++ b/src/plugins/biostruct3d_view/src/deprecated/SelectModelsDialog.cpp
@@ -36,7 +36,7 @@ SelectModelsDialog::SelectModelsDialog(const QList<int> &_modelIds, const QList<
: QDialog(parent), Ui_SelectModelsDialog()
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470524");
+ new HelpButton(this, buttonBox, "18220384");
buttonBox_1->button(QDialogButtonBox::Cancel)->setText(::U2::SelectModelsDialog::tr("All"));
buttonBox_1->button(QDialogButtonBox::No)->setText(::U2::SelectModelsDialog::tr("Invert"));
buttonBox->button(QDialogButtonBox::Ok)->setText(::U2::SelectModelsDialog::tr("OK"));
diff --git a/src/plugins/biostruct3d_view/src/deprecated/SettingsDialog.cpp b/src/plugins/biostruct3d_view/src/deprecated/SettingsDialog.cpp
index efb49e2..d83a474 100644
--- a/src/plugins/biostruct3d_view/src/deprecated/SettingsDialog.cpp
+++ b/src/plugins/biostruct3d_view/src/deprecated/SettingsDialog.cpp
@@ -32,7 +32,7 @@ BioStruct3DSettingsDialog::BioStruct3DSettingsDialog()
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470521");
+ new HelpButton(this, buttonBox, "18220381");
glWidget = NULL;
initColorSchemes();
diff --git a/src/plugins/biostruct3d_view/src/deprecated/StructuralAlignmentDialog.cpp b/src/plugins/biostruct3d_view/src/deprecated/StructuralAlignmentDialog.cpp
index e6b23d1..1aed531 100644
--- a/src/plugins/biostruct3d_view/src/deprecated/StructuralAlignmentDialog.cpp
+++ b/src/plugins/biostruct3d_view/src/deprecated/StructuralAlignmentDialog.cpp
@@ -56,7 +56,7 @@ StructuralAlignmentDialog::StructuralAlignmentDialog(const BioStruct3DObject *fi
: QDialog(parent), task(0)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470527");
+ new HelpButton(this, buttonBox, "18220387");
StructuralAlignmentAlgorithmRegistry *reg = AppContext::getStructuralAlignmentAlgorithmRegistry();
foreach (const QString &id, reg->getFactoriesIds()) {
diff --git a/src/plugins/biostruct3d_view/transl/english.ts b/src/plugins/biostruct3d_view/transl/english.ts
index c8c795c..d97b32d 100644
--- a/src/plugins/biostruct3d_view/transl/english.ts
+++ b/src/plugins/biostruct3d_view/transl/english.ts
@@ -199,92 +199,92 @@
<context>
<name>U2::BioStruct3DGLWidget</name>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="767"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="771"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="764"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="776"/>
<source>Spin</source>
<translation>Spin</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="756"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="760"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="753"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="765"/>
<source>Off</source>
<translation>Off</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="763"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="767"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="760"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="772"/>
<source>Models..</source>
<translation>Models..</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="771"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="775"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="768"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="780"/>
<source>Settings...</source>
<translation>Settings...</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="774"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="778"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="771"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="783"/>
<source>Close</source>
<translation>Close</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="777"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="781"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="774"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="786"/>
<source>Export Image...</source>
<translation>Export Image...</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="786"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="790"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="783"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="795"/>
<source>Align With...</source>
<translation>Align With...</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="790"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="794"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="787"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="799"/>
<source>Reset</source>
<translation>Reset</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="797"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="801"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="794"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="806"/>
<source>Render Style</source>
<translation>Render Style</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="802"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="806"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="799"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="811"/>
<source>Coloring Scheme</source>
<translation>Coloring Scheme</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="807"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="811"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="804"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="816"/>
<source>Molecular Surface Render Style</source>
<translation>Molecular Surface Render Style</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="811"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="815"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="808"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="820"/>
<source>Molecular Surface</source>
<translation>Molecular Surface</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="836"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="840"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="833"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="845"/>
<source>Structural Alignment</source>
<translation>Structural Alignment</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="584"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="588"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="581"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="593"/>
<source>Error</source>
<translation>Error</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="584"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="588"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="581"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="593"/>
<source>Unable to open file %1 for writing</source>
<translation>Unable to open file %1 for writing</translation>
</message>
@@ -337,8 +337,8 @@
<context>
<name>U2::BioStruct3DViewContext</name>
<message>
- <location filename="../src/BioStruct3DViewPlugin.cpp" line="183"/>
- <location filename="../src/deprecated/BioStruct3DViewPlugin.cpp" line="183"/>
+ <location filename="../src/BioStruct3DViewPlugin.cpp" line="194"/>
+ <location filename="../src/deprecated/BioStruct3DViewPlugin.cpp" line="194"/>
<source>Close 3D Structure Viewer</source>
<translation>Close 3D Structure Viewer</translation>
</message>
@@ -346,14 +346,22 @@
<context>
<name>U2::BioStruct3DViewPlugin</name>
<message>
- <location filename="../src/BioStruct3DViewPlugin.cpp" line="87"/>
- <location filename="../src/deprecated/BioStruct3DViewPlugin.cpp" line="87"/>
+ <location filename="../src/BioStruct3DViewPlugin.cpp" line="92"/>
+ <location filename="../src/deprecated/BioStruct3DViewPlugin.cpp" line="92"/>
+ <source>Unfortunately, your system does not have OpenGL Support.
+The 3D Structure Viewer is not available.
+You may try to upgrade your system by updating the video card driver.</source>
+ <translation type="unfinished"></translation>
+ </message>
+ <message>
+ <location filename="../src/BioStruct3DViewPlugin.cpp" line="98"/>
+ <location filename="../src/deprecated/BioStruct3DViewPlugin.cpp" line="98"/>
<source>3D Structure Viewer</source>
<translation>3D Structure Viewer</translation>
</message>
<message>
- <location filename="../src/BioStruct3DViewPlugin.cpp" line="87"/>
- <location filename="../src/deprecated/BioStruct3DViewPlugin.cpp" line="87"/>
+ <location filename="../src/BioStruct3DViewPlugin.cpp" line="98"/>
+ <location filename="../src/deprecated/BioStruct3DViewPlugin.cpp" line="98"/>
<source>Visualizes 3D structures of biological molecules.</source>
<translation>Visualizes 3D structures of biological molecules.</translation>
</message>
diff --git a/src/plugins/biostruct3d_view/transl/russian.ts b/src/plugins/biostruct3d_view/transl/russian.ts
index bc672c8..6494e69 100644
--- a/src/plugins/biostruct3d_view/transl/russian.ts
+++ b/src/plugins/biostruct3d_view/transl/russian.ts
@@ -199,92 +199,92 @@
<context>
<name>U2::BioStruct3DGLWidget</name>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="767"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="771"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="764"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="776"/>
<source>Spin</source>
<translation>Вращение</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="756"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="760"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="753"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="765"/>
<source>Off</source>
<translation>Нет</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="763"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="767"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="760"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="772"/>
<source>Models..</source>
<translation>Модели..</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="786"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="790"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="783"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="795"/>
<source>Align With...</source>
<translation>Выровнять с...</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="807"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="811"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="804"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="816"/>
<source>Molecular Surface Render Style</source>
<translation>Стиль отрисовки поверхности</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="811"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="815"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="808"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="820"/>
<source>Molecular Surface</source>
<translation>Молекулярная поверхность</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="584"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="588"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="581"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="593"/>
<source>Error</source>
<translation>Ошибка</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="584"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="588"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="581"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="593"/>
<source>Unable to open file %1 for writing</source>
<translation>Невозможно открыть файл %1 для записи</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="771"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="775"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="768"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="780"/>
<source>Settings...</source>
<translation>Настройки...</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="774"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="778"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="771"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="783"/>
<source>Close</source>
<translation>Закрыть</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="777"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="781"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="774"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="786"/>
<source>Export Image...</source>
<translation>Экспортировать изображение...</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="790"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="794"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="787"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="799"/>
<source>Reset</source>
<translation>Сбросить</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="797"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="801"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="794"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="806"/>
<source>Render Style</source>
<translation>Стиль отрисовки</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="802"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="806"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="799"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="811"/>
<source>Coloring Scheme</source>
<translation>Цветовая схема</translation>
</message>
<message>
- <location filename="../src/BioStruct3DGLWidget.cpp" line="836"/>
- <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="840"/>
+ <location filename="../src/BioStruct3DGLWidget.cpp" line="833"/>
+ <location filename="../src/deprecated/BioStruct3DGLWidget.cpp" line="845"/>
<source>Structural Alignment</source>
<translation>Структурное выравнивание</translation>
</message>
@@ -337,8 +337,8 @@
<context>
<name>U2::BioStruct3DViewContext</name>
<message>
- <location filename="../src/BioStruct3DViewPlugin.cpp" line="183"/>
- <location filename="../src/deprecated/BioStruct3DViewPlugin.cpp" line="183"/>
+ <location filename="../src/BioStruct3DViewPlugin.cpp" line="194"/>
+ <location filename="../src/deprecated/BioStruct3DViewPlugin.cpp" line="194"/>
<source>Close 3D Structure Viewer</source>
<translation>Закрыть визуализатор 3D структур</translation>
</message>
@@ -346,14 +346,22 @@
<context>
<name>U2::BioStruct3DViewPlugin</name>
<message>
- <location filename="../src/BioStruct3DViewPlugin.cpp" line="87"/>
- <location filename="../src/deprecated/BioStruct3DViewPlugin.cpp" line="87"/>
+ <location filename="../src/BioStruct3DViewPlugin.cpp" line="92"/>
+ <location filename="../src/deprecated/BioStruct3DViewPlugin.cpp" line="92"/>
+ <source>Unfortunately, your system does not have OpenGL Support.
+The 3D Structure Viewer is not available.
+You may try to upgrade your system by updating the video card driver.</source>
+ <translation type="unfinished"></translation>
+ </message>
+ <message>
+ <location filename="../src/BioStruct3DViewPlugin.cpp" line="98"/>
+ <location filename="../src/deprecated/BioStruct3DViewPlugin.cpp" line="98"/>
<source>3D Structure Viewer</source>
<translation>Визуализатор 3D структур</translation>
</message>
<message>
- <location filename="../src/BioStruct3DViewPlugin.cpp" line="87"/>
- <location filename="../src/deprecated/BioStruct3DViewPlugin.cpp" line="87"/>
+ <location filename="../src/BioStruct3DViewPlugin.cpp" line="98"/>
+ <location filename="../src/deprecated/BioStruct3DViewPlugin.cpp" line="98"/>
<source>Visualizes 3D structures of biological molecules.</source>
<translation>Визуализирует 3D структуры биологических молекул.</translation>
</message>
diff --git a/src/plugins/circular_view/src/CircularViewSettingsWidgetFactory.cpp b/src/plugins/circular_view/src/CircularViewSettingsWidgetFactory.cpp
index a3d40e7..257a4f3 100644
--- a/src/plugins/circular_view/src/CircularViewSettingsWidgetFactory.cpp
+++ b/src/plugins/circular_view/src/CircularViewSettingsWidgetFactory.cpp
@@ -31,7 +31,7 @@ namespace U2 {
const QString CircularViewSettingsWidgetFactory::GROUP_ID = "OP_CV_SETTINGS";
const QString CircularViewSettingsWidgetFactory::GROUP_ICON_STR = ":circular_view/images/circular.png";
const QString CircularViewSettingsWidgetFactory::GROUP_TITLE = QObject::tr("Circular View Settings");
-const QString CircularViewSettingsWidgetFactory::GROUP_DOC_PAGE = "17470512";
+const QString CircularViewSettingsWidgetFactory::GROUP_DOC_PAGE = "18220372";
CircularViewSettingsWidgetFactory::CircularViewSettingsWidgetFactory(CircularViewContext* context)
: ctx(context) {
diff --git a/src/plugins/dbi_bam/src/BAMFormat.cpp b/src/plugins/dbi_bam/src/BAMFormat.cpp
index 59d4f39..54c5cd1 100644
--- a/src/plugins/dbi_bam/src/BAMFormat.cpp
+++ b/src/plugins/dbi_bam/src/BAMFormat.cpp
@@ -47,7 +47,7 @@ BAMFormat::BAMFormat()
tr("BAM"),
QStringList("bam"),
DocumentFormatFlags(DocumentFormatFlag_NoPack) | DocumentFormatFlag_NoFullMemoryLoad
- | DocumentFormatFlag_Hidden | DocumentFormatFlag_SupportWriting)
+ | DocumentFormatFlag_Hidden | DocumentFormatFlag_SupportWriting | DocumentFormatFlag_CannotBeCompressed)
{
}
diff --git a/src/plugins/dbi_bam/src/ConvertToSQLiteDialog.cpp b/src/plugins/dbi_bam/src/ConvertToSQLiteDialog.cpp
index c52b49a..1111277 100644
--- a/src/plugins/dbi_bam/src/ConvertToSQLiteDialog.cpp
+++ b/src/plugins/dbi_bam/src/ConvertToSQLiteDialog.cpp
@@ -57,7 +57,7 @@ ConvertToSQLiteDialog::ConvertToSQLiteDialog(const GUrl& _sourceUrl, BAMInfo& _b
sourceUrl(_sourceUrl),
bamInfo(_bamInfo) {
ui.setupUi(this);
- new HelpButton(this, ui.buttonBox, "17470600");
+ new HelpButton(this, ui.buttonBox, "18220460");
ui.buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Import"));
ui.buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/dbi_bam/src/ConvertToSQLiteDialog.ui b/src/plugins/dbi_bam/src/ConvertToSQLiteDialog.ui
index 3ee5ecc..b217e05 100644
--- a/src/plugins/dbi_bam/src/ConvertToSQLiteDialog.ui
+++ b/src/plugins/dbi_bam/src/ConvertToSQLiteDialog.ui
@@ -2,36 +2,22 @@
<ui version="4.0">
<class>ConvertToSQLiteDialog</class>
<widget class="QDialog" name="ConvertToSQLiteDialog">
- <property name="geometry">
- <rect>
- <x>0</x>
- <y>0</y>
- <width>515</width>
- <height>318</height>
- </rect>
- </property>
- <property name="sizePolicy">
- <sizepolicy hsizetype="Preferred" vsizetype="Maximum">
- <horstretch>0</horstretch>
- <verstretch>0</verstretch>
- </sizepolicy>
- </property>
- <property name="minimumSize">
- <size>
- <width>0</width>
- <height>0</height>
- </size>
- </property>
<property name="windowTitle">
<string>Import BAM File</string>
</property>
<layout class="QVBoxLayout" name="verticalLayout">
+ <property name="sizeConstraint">
+ <enum>QLayout::SetFixedSize</enum>
+ </property>
<item>
- <layout class="QGridLayout" name="gridLayout_2">
- <item row="0" column="0">
- <widget class="QLabel" name="sourceUrlLabel">
+ <layout class="QGridLayout" name="gridLayout_3">
+ <property name="sizeConstraint">
+ <enum>QLayout::SetFixedSize</enum>
+ </property>
+ <item row="1" column="0">
+ <widget class="QLabel" name="refUrlLabel">
<property name="text">
- <string>Source URL:</string>
+ <string>Reference:</string>
</property>
</widget>
</item>
@@ -42,24 +28,24 @@
</property>
</widget>
</item>
- <item row="0" column="2">
- <widget class="QPushButton" name="bamInfoButton">
- <property name="text">
- <string>Info</string>
+ <item row="1" column="1">
+ <widget class="QLineEdit" name="refUrlEdit">
+ <property name="readOnly">
+ <bool>true</bool>
</property>
</widget>
</item>
- <item row="1" column="0">
- <widget class="QLabel" name="refUrlLabel">
+ <item row="0" column="0">
+ <widget class="QLabel" name="sourceUrlLabel">
<property name="text">
- <string>Reference:</string>
+ <string>Source URL:</string>
</property>
</widget>
</item>
- <item row="1" column="1">
- <widget class="QLineEdit" name="refUrlEdit">
- <property name="readOnly">
- <bool>true</bool>
+ <item row="0" column="2">
+ <widget class="QPushButton" name="bamInfoButton">
+ <property name="text">
+ <string>Info</string>
</property>
</widget>
</item>
@@ -111,6 +97,9 @@
<item>
<widget class="QWidget" name="selectionButtons" native="true">
<layout class="QHBoxLayout" name="horizontalLayout_3">
+ <property name="sizeConstraint">
+ <enum>QLayout::SetFixedSize</enum>
+ </property>
<property name="leftMargin">
<number>0</number>
</property>
@@ -169,6 +158,9 @@
</item>
<item>
<layout class="QGridLayout" name="gridLayout">
+ <property name="sizeConstraint">
+ <enum>QLayout::SetFixedSize</enum>
+ </property>
<item row="1" column="0">
<widget class="QLabel" name="label_2">
<property name="text">
@@ -190,6 +182,9 @@
</item>
<item>
<layout class="QHBoxLayout" name="horizontalLayout">
+ <property name="sizeConstraint">
+ <enum>QLayout::SetFixedSize</enum>
+ </property>
<item>
<widget class="QCheckBox" name="addToProjectBox">
<property name="text">
diff --git a/src/plugins/dbi_bam/src/ConvertToSQLiteTask.cpp b/src/plugins/dbi_bam/src/ConvertToSQLiteTask.cpp
index 45717ce..c7ed996 100644
--- a/src/plugins/dbi_bam/src/ConvertToSQLiteTask.cpp
+++ b/src/plugins/dbi_bam/src/ConvertToSQLiteTask.cpp
@@ -64,7 +64,7 @@ static void enableCoverageOnImport(U2AssemblyCoverageImportInfo &cii, int refere
cii.computeCoverage = true;
int coverageInfoSize = qMin(U2AssemblyUtils::MAX_COVERAGE_VECTOR_SIZE, referenceLength);
cii.coverageBasesPerPoint = qMax(1.0, ((double)referenceLength)/coverageInfoSize);
- cii.coverage.coverage.resize(coverageInfoSize);
+ cii.coverage.coverage->resize(coverageInfoSize);
}
namespace {
@@ -616,7 +616,8 @@ qint64 ConvertToSQLiteTask::importMappedSortedReads(BamReader *bamReader, Reader
.arg(references.size()));
U2AssemblyReadsImportInfo &importInfo = importInfos[referenceId];
- enableCoverageOnImport(importInfo.coverageInfo, references[referenceId].getLength());
+ //workaround for UGENE-5366, uncomment after fixing coverage on import
+ //enableCoverageOnImport(importInfo.coverageInfo, references[referenceId].getLength());
QScopedPointer<DbiIterator> dbiIterator;
if(bamInfo.hasIndex()) {
@@ -884,7 +885,7 @@ void ConvertToSQLiteTask::updateImportInfoReadsCountAttribute(const U2AssemblyRe
void ConvertToSQLiteTask::updateImportInfoCoverageStatAttribute(const U2AssemblyReadsImportInfo &importInfo, const U2Assembly &assembly, U2AttributeDbi *attributeDbi) {
const U2AssemblyCoverageStat &coverageStat = importInfo.coverageInfo.coverage;
- CHECK(!coverageStat.coverage.isEmpty(), );
+ CHECK(!coverageStat.coverage->isEmpty(), );
U2ByteArrayAttribute attribute;
attribute.objectId = assembly.id;
diff --git a/src/plugins/dna_export/src/CSVColumnConfigurationDialog.cpp b/src/plugins/dna_export/src/CSVColumnConfigurationDialog.cpp
index b8337d6..fb68a46 100644
--- a/src/plugins/dna_export/src/CSVColumnConfigurationDialog.cpp
+++ b/src/plugins/dna_export/src/CSVColumnConfigurationDialog.cpp
@@ -37,7 +37,7 @@ CSVColumnConfigurationDialog::CSVColumnConfigurationDialog(QWidget* w, const Col
: QDialog(w), config(_config)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470505");
+ new HelpButton(this, buttonBox, "18220365");
connect(complMarkRB, SIGNAL(toggled(bool)), SLOT(sl_complMarkToggle(bool)));
connect(startRB, SIGNAL(toggled(bool)), SLOT(sl_startToggle(bool)));
diff --git a/src/plugins/dna_export/src/DNASequenceGeneratorDialog.cpp b/src/plugins/dna_export/src/DNASequenceGeneratorDialog.cpp
index b2b3800..cd7731f 100644
--- a/src/plugins/dna_export/src/DNASequenceGeneratorDialog.cpp
+++ b/src/plugins/dna_export/src/DNASequenceGeneratorDialog.cpp
@@ -57,7 +57,7 @@ DNASequenceGeneratorDialog::DNASequenceGeneratorDialog(QWidget* p)
: QDialog(p),
saveController(NULL) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470733");
+ new HelpButton(this, buttonBox, "18220593");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Generate"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
@@ -167,7 +167,7 @@ void DNASequenceGeneratorDialog::sl_refButtonToggled(bool checked) {
BaseContentDialog::BaseContentDialog(QMap<char, qreal>& percentMap_, QWidget* p)
: QDialog(p), percentMap(percentMap_) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470733");
+ new HelpButton(this, buttonBox, "18220593");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Save"));
percentASpin->setValue(percentMap.value('A')*100.0);
diff --git a/src/plugins/dna_export/src/ExportBlastResultDialog.cpp b/src/plugins/dna_export/src/ExportBlastResultDialog.cpp
index e727919..04b38f6 100644
--- a/src/plugins/dna_export/src/ExportBlastResultDialog.cpp
+++ b/src/plugins/dna_export/src/ExportBlastResultDialog.cpp
@@ -39,7 +39,7 @@ ExportBlastResultDialog::ExportBlastResultDialog(QWidget* p, const QString& defa
: QDialog(p),
saveController(NULL) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470661");
+ new HelpButton(this, buttonBox, "18220521");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Export"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/dna_export/src/ExportChromatogramDialog.cpp b/src/plugins/dna_export/src/ExportChromatogramDialog.cpp
index c170d1a..9d4ffbc 100644
--- a/src/plugins/dna_export/src/ExportChromatogramDialog.cpp
+++ b/src/plugins/dna_export/src/ExportChromatogramDialog.cpp
@@ -45,7 +45,7 @@ ExportChromatogramDialog::ExportChromatogramDialog(QWidget* p, const GUrl& fileU
saveController(NULL)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470529");
+ new HelpButton(this, buttonBox, "18220389");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Export"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/dna_export/src/ExportMSA2MSADialog.cpp b/src/plugins/dna_export/src/ExportMSA2MSADialog.cpp
index 96487de..f814093 100644
--- a/src/plugins/dna_export/src/ExportMSA2MSADialog.cpp
+++ b/src/plugins/dna_export/src/ExportMSA2MSADialog.cpp
@@ -42,7 +42,7 @@ ExportMSA2MSADialog::ExportMSA2MSADialog(const QString& defaultFileName, const D
: QDialog(p),
saveController(NULL) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470437");
+ new HelpButton(this, buttonBox, "18220297");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Export"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/dna_export/src/ExportMSA2SequencesDialog.cpp b/src/plugins/dna_export/src/ExportMSA2SequencesDialog.cpp
index e1af67e..b656cec 100644
--- a/src/plugins/dna_export/src/ExportMSA2SequencesDialog.cpp
+++ b/src/plugins/dna_export/src/ExportMSA2SequencesDialog.cpp
@@ -39,7 +39,7 @@ ExportMSA2SequencesDialog::ExportMSA2SequencesDialog(QWidget* p)
: QDialog(p),
saveController(NULL) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470436");
+ new HelpButton(this, buttonBox, "18220296");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Export"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/dna_export/src/ExportSequences2MSADialog.cpp b/src/plugins/dna_export/src/ExportSequences2MSADialog.cpp
index 5c5ea25..1e61c03 100644
--- a/src/plugins/dna_export/src/ExportSequences2MSADialog.cpp
+++ b/src/plugins/dna_export/src/ExportSequences2MSADialog.cpp
@@ -39,7 +39,7 @@ ExportSequences2MSADialog::ExportSequences2MSADialog(QWidget* p, const QString&
: QDialog(p),
saveController(NULL) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470435");
+ new HelpButton(this, buttonBox, "18220295");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Export"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
okButton = buttonBox->button(QDialogButtonBox::Ok);
diff --git a/src/plugins/dna_export/src/ExportSequencesDialog.cpp b/src/plugins/dna_export/src/ExportSequencesDialog.cpp
index e408f58..372dcb5 100644
--- a/src/plugins/dna_export/src/ExportSequencesDialog.cpp
+++ b/src/plugins/dna_export/src/ExportSequencesDialog.cpp
@@ -50,7 +50,7 @@ ExportSequencesDialog::ExportSequencesDialog( bool m, bool allowComplement, bool
saveController(NULL),
defaultFileName(defaultFileName) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470434");
+ new HelpButton(this, buttonBox, "18220294");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Export"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/dna_export/src/GetSequenceByIdDialog.cpp b/src/plugins/dna_export/src/GetSequenceByIdDialog.cpp
index c315a40..e1d9ce1 100644
--- a/src/plugins/dna_export/src/GetSequenceByIdDialog.cpp
+++ b/src/plugins/dna_export/src/GetSequenceByIdDialog.cpp
@@ -40,7 +40,7 @@ namespace U2 {
GetSequenceByIdDialog::GetSequenceByIdDialog(QWidget *w): QDialog(w) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470660");
+ new HelpButton(this, buttonBox, "18220520");
connect(toolButton, SIGNAL(clicked()), SLOT(sl_saveFilenameButtonClicked()));
QString defaultPath = AppContext::getAppSettings()->getUserAppsSettings()->getDownloadDirPath();
diff --git a/src/plugins/dna_export/src/ImportAnnotationsFromCSVDialog.cpp b/src/plugins/dna_export/src/ImportAnnotationsFromCSVDialog.cpp
index d0f03ab..935df6f 100644
--- a/src/plugins/dna_export/src/ImportAnnotationsFromCSVDialog.cpp
+++ b/src/plugins/dna_export/src/ImportAnnotationsFromCSVDialog.cpp
@@ -62,7 +62,7 @@ ImportAnnotationsFromCSVDialog::ImportAnnotationsFromCSVDialog(QWidget* w)
: QDialog (w),
saveController(NULL) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470505");
+ new HelpButton(this, buttonBox, "18220365");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Run"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/dna_export/transl/russian.ts b/src/plugins/dna_export/transl/russian.ts
index bb0fdab..a0b676e 100644
--- a/src/plugins/dna_export/transl/russian.ts
+++ b/src/plugins/dna_export/transl/russian.ts
@@ -1959,7 +1959,7 @@ You can use this worker to convert .fasta and .qual pair to fastq format.</sourc
<message>
<location filename="../src/ExportAlignmentViewItems.cpp" line="81"/>
<source>Amino translation...</source>
- <translation>Сохранить транслированное выравнивание...</translation>
+ <translation>Экспортировать транслированное выравнивание...</translation>
</message>
<message>
<location filename="../src/ExportAlignmentViewItems.cpp" line="110"/>
diff --git a/src/plugins/dna_flexibility/src/DNAFlexDialog.cpp b/src/plugins/dna_flexibility/src/DNAFlexDialog.cpp
index 3ffccbc..6b72f8a 100644
--- a/src/plugins/dna_flexibility/src/DNAFlexDialog.cpp
+++ b/src/plugins/dna_flexibility/src/DNAFlexDialog.cpp
@@ -47,7 +47,7 @@ DNAFlexDialog::DNAFlexDialog(ADVSequenceObjectContext* _ctx)
: QDialog(_ctx->getAnnotatedDNAView()->getWidget())
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470654");
+ new HelpButton(this, buttonBox, "18220514");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Search"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/dna_stat/src/DNAStatMSAProfileDialog.cpp b/src/plugins/dna_stat/src/DNAStatMSAProfileDialog.cpp
index a360f3e..00b254f 100644
--- a/src/plugins/dna_stat/src/DNAStatMSAProfileDialog.cpp
+++ b/src/plugins/dna_stat/src/DNAStatMSAProfileDialog.cpp
@@ -46,7 +46,7 @@ DNAStatMSAProfileDialog::DNAStatMSAProfileDialog(QWidget* p, MSAEditor* _c)
ctx(_c),
saveController(NULL) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470597");
+ new HelpButton(this, buttonBox, "18220457");
initSaveController();
}
diff --git a/src/plugins/dna_stat/src/DistanceMatrixMSAProfileDialog.cpp b/src/plugins/dna_stat/src/DistanceMatrixMSAProfileDialog.cpp
index 3b160b8..c96a1f4 100644
--- a/src/plugins/dna_stat/src/DistanceMatrixMSAProfileDialog.cpp
+++ b/src/plugins/dna_stat/src/DistanceMatrixMSAProfileDialog.cpp
@@ -52,7 +52,7 @@ DistanceMatrixMSAProfileDialog::DistanceMatrixMSAProfileDialog(QWidget* p, MSAEd
ctx(_c),
saveController(NULL) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470598");
+ new HelpButton(this, buttonBox, "18220458");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Generate"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/dotplot/src/DotPlotDialog.cpp b/src/plugins/dotplot/src/DotPlotDialog.cpp
index 05a7ad8..5936dcd 100644
--- a/src/plugins/dotplot/src/DotPlotDialog.cpp
+++ b/src/plugins/dotplot/src/DotPlotDialog.cpp
@@ -53,7 +53,7 @@ DotPlotDialog::DotPlotDialog(QWidget *parent, AnnotatedDNAView* currentADV, int
,openSequenceTask(NULL), curURL("")
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470536");
+ new HelpButton(this, buttonBox, "18220396");
startButton = buttonBox->button(QDialogButtonBox::Ok);
SAFE_POINT(adv != NULL, "DotPlotDialog called without view context!", );
diff --git a/src/plugins/dotplot/src/DotPlotDialog.ui b/src/plugins/dotplot/src/DotPlotDialog.ui
index 140bc9d..d74f9c4 100644
--- a/src/plugins/dotplot/src/DotPlotDialog.ui
+++ b/src/plugins/dotplot/src/DotPlotDialog.ui
@@ -5,12 +5,15 @@
<property name="windowModality">
<enum>Qt::ApplicationModal</enum>
</property>
+ <property name="enabled">
+ <bool>true</bool>
+ </property>
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>547</width>
- <height>328</height>
+ <height>374</height>
</rect>
</property>
<property name="windowTitle">
diff --git a/src/plugins/dotplot/src/DotPlotFilesDialog.cpp b/src/plugins/dotplot/src/DotPlotFilesDialog.cpp
index 7735f00..efe5b1a 100644
--- a/src/plugins/dotplot/src/DotPlotFilesDialog.cpp
+++ b/src/plugins/dotplot/src/DotPlotFilesDialog.cpp
@@ -40,7 +40,7 @@ DotPlotFilesDialog::DotPlotFilesDialog(QWidget *parent)
: QDialog(parent)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470536");
+ new HelpButton(this, buttonBox, "18220396");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Next"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/dotplot/src/DotPlotFilterDialog.cpp b/src/plugins/dotplot/src/DotPlotFilterDialog.cpp
index 8d05c7c..ca50a35 100644
--- a/src/plugins/dotplot/src/DotPlotFilterDialog.cpp
+++ b/src/plugins/dotplot/src/DotPlotFilterDialog.cpp
@@ -47,7 +47,7 @@ DotPlotFilterDialog::DotPlotFilterDialog(QWidget *parent, ADVSequenceObjectConte
,seqYItem(NULL)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470546");
+ new HelpButton(this, buttonBox, "18220406");
{
diff --git a/src/plugins/enzymes/src/ConstructMoleculeDialog.cpp b/src/plugins/enzymes/src/ConstructMoleculeDialog.cpp
index 5c8cd5f..4414c94 100644
--- a/src/plugins/enzymes/src/ConstructMoleculeDialog.cpp
+++ b/src/plugins/enzymes/src/ConstructMoleculeDialog.cpp
@@ -47,7 +47,7 @@ ConstructMoleculeDialog::ConstructMoleculeDialog(const QList<DNAFragment>& fragm
fragments(fragmentList),
saveController(NULL) {
setupUi(this);
- new HelpButton(this, buttonBox, "17468951");
+ new HelpButton(this, buttonBox, "18220536");
tabWidget->setCurrentIndex(0);
const QString coreLengthStr = ConstructMoleculeDialog::tr("core length");
@@ -58,7 +58,7 @@ ConstructMoleculeDialog::ConstructMoleculeDialog(const QList<DNAFragment>& fragm
.arg(frag.getSequenceDocName())
.arg(frag.getName())
.arg(coreLengthStr)
- .arg(frag.getLength(true));
+ .arg(frag.getLength());
fragmentListWidget->addItem(fragItem);
}
diff --git a/src/plugins/enzymes/src/CreateFragmentDialog.cpp b/src/plugins/enzymes/src/CreateFragmentDialog.cpp
index f761b82..6799c9b 100644
--- a/src/plugins/enzymes/src/CreateFragmentDialog.cpp
+++ b/src/plugins/enzymes/src/CreateFragmentDialog.cpp
@@ -49,7 +49,7 @@ CreateFragmentDialog::CreateFragmentDialog(ADVSequenceObjectContext* ctx, QWidg
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470675");
+ new HelpButton(this, buttonBox, "18220535");
tabWidget->setCurrentIndex(0);
@@ -69,7 +69,7 @@ CreateFragmentDialog::CreateFragmentDialog(U2SequenceObject* obj, const U2Region
: QDialog(p)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470675");
+ new HelpButton(this, buttonBox, "18220535");
seqObj = obj;
QList<GObject*> aObjects = GObjectUtils::findAllObjects(UOF_LoadedOnly,GObjectTypes::ANNOTATION_TABLE);
diff --git a/src/plugins/enzymes/src/DNAFragment.cpp b/src/plugins/enzymes/src/DNAFragment.cpp
index 495ce73..37e73c2 100644
--- a/src/plugins/enzymes/src/DNAFragment.cpp
+++ b/src/plugins/enzymes/src/DNAFragment.cpp
@@ -243,22 +243,13 @@ void DNAFragment::setRightOverhang(const QByteArray& overhang)
setOverhang(qName, buf);
}
-int DNAFragment::getLength(bool coreLength) const {
+int DNAFragment::getLength() const {
assert(!isEmpty());
QVector<U2Region> regions = getFragmentRegions();
int len = 0;
foreach(const U2Region& r, regions) {
len += r.length;
}
- if (!coreLength) {
- if (annotatedFragment->findFirstQualifierValue(QUALIFIER_RIGHT_STRAND) == OVERHANG_STRAND_DIRECT) {
- len += annotatedFragment->findFirstQualifierValue(QUALIFIER_RIGHT_OVERHANG).length();
- }
- if (annotatedFragment->findFirstQualifierValue(QUALIFIER_LEFT_STRAND) == OVERHANG_STRAND_DIRECT) {
- len += annotatedFragment->findFirstQualifierValue(QUALIFIER_LEFT_OVERHANG).length();
- }
- }
-
return len;
}
diff --git a/src/plugins/enzymes/src/DNAFragment.h b/src/plugins/enzymes/src/DNAFragment.h
index c559d00..5fd4711 100644
--- a/src/plugins/enzymes/src/DNAFragment.h
+++ b/src/plugins/enzymes/src/DNAFragment.h
@@ -85,7 +85,7 @@ public:
QString getSequenceDocName() const;
QVector<U2Region> getFragmentRegions() const;
QByteArray getSequence(U2OpStatus &os) const;
- int getLength(bool coreLength = false) const;
+ int getLength() const;
bool isInverted() const { return reverseCompl; }
const DNAAlphabet* getAlphabet() const;
QByteArray getSourceSequence(U2OpStatus &os) const;
diff --git a/src/plugins/enzymes/src/DigestSequenceDialog.cpp b/src/plugins/enzymes/src/DigestSequenceDialog.cpp
index 02597a7..cb0f67c 100644
--- a/src/plugins/enzymes/src/DigestSequenceDialog.cpp
+++ b/src/plugins/enzymes/src/DigestSequenceDialog.cpp
@@ -58,7 +58,7 @@ DigestSequenceDialog::DigestSequenceDialog(ADVSequenceObjectContext* ctx, QWidge
: QDialog(p),seqCtx(ctx), timer(NULL), animationCounter(0)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470674");
+ new HelpButton(this, buttonBox, "18220534");
okButton = buttonBox->button(QDialogButtonBox::Ok);
tabWidget->setCurrentIndex(0);
diff --git a/src/plugins/enzymes/src/EditFragmentDialog.cpp b/src/plugins/enzymes/src/EditFragmentDialog.cpp
index 1b2d91d..6285e7b 100644
--- a/src/plugins/enzymes/src/EditFragmentDialog.cpp
+++ b/src/plugins/enzymes/src/EditFragmentDialog.cpp
@@ -49,7 +49,7 @@ EditFragmentDialog::EditFragmentDialog( DNAFragment& fragment, QWidget* p )
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470681");
+ new HelpButton(this, buttonBox, "18220541");
connect(lBluntButton, SIGNAL(toggled(bool)), SLOT(sl_onLeftBluntButtonToogled(bool)));
connect(rBluntButton, SIGNAL(toggled(bool)), SLOT(sl_onRightBluntButtonToggled(bool)));
diff --git a/src/plugins/enzymes/src/EnzymesQuery.cpp b/src/plugins/enzymes/src/EnzymesQuery.cpp
index ba6be1e..04a8670 100644
--- a/src/plugins/enzymes/src/EnzymesQuery.cpp
+++ b/src/plugins/enzymes/src/EnzymesQuery.cpp
@@ -145,7 +145,7 @@ QString EnzymesSelectorDialogHandler::getSelectedString(QDialog* dlg) {
EnzymesSelectorDialog::EnzymesSelectorDialog(EnzymesSelectorDialogHandler* parent)
: factory(parent) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470666");
+ new HelpButton(this, buttonBox, "18220526");
QVBoxLayout* vl = new QVBoxLayout();
enzSel = new EnzymesSelectorWidget();
diff --git a/src/plugins/enzymes/src/FindEnzymesDialog.cpp b/src/plugins/enzymes/src/FindEnzymesDialog.cpp
index 89fdeb4..0f06202 100644
--- a/src/plugins/enzymes/src/FindEnzymesDialog.cpp
+++ b/src/plugins/enzymes/src/FindEnzymesDialog.cpp
@@ -516,7 +516,7 @@ void EnzymesSelectorWidget::sl_saveEnzymesFile()
FindEnzymesDialog::FindEnzymesDialog(ADVSequenceObjectContext* sctx)
: QDialog(sctx->getAnnotatedDNAView()->getWidget()), seqCtx(sctx) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470666");
+ new HelpButton(this, buttonBox, "18220526");
maxHitSB->setMaximum(INT_MAX);
minHitSB->setMaximum(INT_MAX);
diff --git a/src/plugins/enzymes/src/FindEnzymesTask.cpp b/src/plugins/enzymes/src/FindEnzymesTask.cpp
index c7188b8..7236e5f 100644
--- a/src/plugins/enzymes/src/FindEnzymesTask.cpp
+++ b/src/plugins/enzymes/src/FindEnzymesTask.cpp
@@ -109,11 +109,12 @@ FindEnzymesTask::FindEnzymesTask(const U2EntityRef& seqRef, const U2Region& regi
: Task(tr("Find Enzymes"), TaskFlags_NR_FOSCOE),
maxResults(mr),
excludedRegions(excludedRegions),
- circular(_circular)
+ circular(_circular),
+ seqlen(0)
{
U2SequenceObject seq("sequence", seqRef);
- assert(seq.getAlphabet()->isNucleic());
+ SAFE_POINT(seq.getAlphabet()->isNucleic(), tr("Alphabet is not nucleic."), );
seqlen = seq.getSequenceLength();
//for every enzymes in selection create FindSingleEnzymeTask
foreach(const SEnzymeData& e, enzymes) {
@@ -239,7 +240,7 @@ FindSingleEnzymeTask::FindSingleEnzymeTask(const U2EntityRef& _seqRef, const U2R
{
U2SequenceObject dnaSeq("sequence", dnaSeqRef);
- assert(dnaSeq.getAlphabet()->isNucleic());
+ SAFE_POINT(dnaSeq.getAlphabet()->isNucleic(), tr("Alphabet is not nucleic."), );
if (resultListener == NULL) {
resultListener = this;
}
@@ -333,9 +334,8 @@ FindEnzymesAutoAnnotationUpdater::FindEnzymesAutoAnnotationUpdater()
}
-Task* FindEnzymesAutoAnnotationUpdater::createAutoAnnotationsUpdateTask( const AutoAnnotationObject* aa )
-{
- const QList<SEnzymeData> enzymeList = EnzymesIO::getDefaultEnzymesList();
+Task* FindEnzymesAutoAnnotationUpdater::createAutoAnnotationsUpdateTask(const AutoAnnotationObject* aa) {
+ const QList<SEnzymeData> enzymeList = EnzymesIO::getDefaultEnzymesList();
QString selStr = AppContext::getSettings()->getValue(EnzymeSettings::LAST_SELECTION).toString();
if (selStr.isEmpty()) {
selStr = EnzymeSettings::COMMON_ENZYMES;
@@ -343,8 +343,8 @@ Task* FindEnzymesAutoAnnotationUpdater::createAutoAnnotationsUpdateTask( const A
QStringList lastSelection = selStr.split(ENZYME_LIST_SEPARATOR);
QList<SEnzymeData> selectedEnzymes;
- foreach (const QString id, lastSelection) {
- foreach (const SEnzymeData& enzyme, enzymeList) {
+ foreach(const QString id, lastSelection) {
+ foreach(const SEnzymeData& enzyme, enzymeList) {
if (id == enzyme->id) {
selectedEnzymes.append(enzyme);
}
@@ -358,10 +358,18 @@ Task* FindEnzymesAutoAnnotationUpdater::createAutoAnnotationsUpdateTask( const A
cfg.minHitCount = AppContext::getSettings()->getValue(EnzymeSettings::MIN_HIT_VALUE, 1).toInt();
cfg.maxHitCount = AppContext::getSettings()->getValue(EnzymeSettings::MAX_HIT_VALUE, INT_MAX).toInt();
cfg.maxResults = AppContext::getSettings()->getValue(EnzymeSettings::MAX_RESULTS, 500000).toInt();
- cfg.searchRegion = AppContext::getSettings()->getValue(EnzymeSettings::SEARCH_REGION, QVariant::fromValue(U2Region())).value<U2Region>();
+
+ U2Region savedSearchRegion = AppContext::getSettings()->getValue(EnzymeSettings::SEARCH_REGION, QVariant::fromValue(U2Region())).value<U2Region>();
+
+ U2SequenceObject* dnaObj = aa->getSeqObject();
+ const U2Region wholeSequenceRegion = U2Region(0, dnaObj->getSequenceLength());
+ cfg.searchRegion = savedSearchRegion.intersect(wholeSequenceRegion);
+ if (cfg.searchRegion.isEmpty()) {
+ cfg.searchRegion = wholeSequenceRegion;
+ }
QVector<U2Region> excludedRegions =
- AppContext::getSettings()->getValue(EnzymeSettings::EXCLUDED_REGION, QVariant::fromValue(QVector<U2Region>()) ).value< QVector<U2Region> >();
+ AppContext::getSettings()->getValue(EnzymeSettings::EXCLUDED_REGION, QVariant::fromValue(QVector<U2Region>())).value< QVector<U2Region> >();
if (!excludedRegions.isEmpty()) {
cfg.excludedRegions = excludedRegions;
@@ -369,7 +377,7 @@ Task* FindEnzymesAutoAnnotationUpdater::createAutoAnnotationsUpdateTask( const A
AnnotationTableObject *aObj = aa->getAnnotationObject();
const U2EntityRef& dnaRef = aa->getSeqObject()->getEntityRef();
- Task* task = new FindEnzymesToAnnotationsTask(aObj, dnaRef, selectedEnzymes, cfg );
+ Task* task = new FindEnzymesToAnnotationsTask(aObj, dnaRef, selectedEnzymes, cfg);
return task;
}
diff --git a/src/plugins/enzymes/transl/english.ts b/src/plugins/enzymes/transl/english.ts
index f753566..17402e3 100644
--- a/src/plugins/enzymes/transl/english.ts
+++ b/src/plugins/enzymes/transl/english.ts
@@ -723,42 +723,42 @@ Choose another region.</translation>
<context>
<name>U2::DigestSequenceTask</name>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="136"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="131"/>
<source>Can't use restriction site %1 for digestion, cleavage site is unknown </source>
<translation>Can't use restriction site %1 for digestion, cleavage site is unknown </translation>
</message>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="218"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="213"/>
<source>Unable to digest into fragments: intersecting restriction sites %1 (%2..%3) and %4 (%5..%6)</source>
<translation>Unable to digest into fragments: intersecting restriction sites %1 (%2..%3) and %4 (%5..%6)</translation>
</message>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="324"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="319"/>
<source>circular</source>
<translation>circular</translation>
</message>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="324"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="319"/>
<source>linear</source>
<translation>linear</translation>
</message>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="325"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="320"/>
<source><h3><br>Digest into fragments %1 (%2)</h3></source>
<translation><h3><br>Digest into fragments %1 (%2)</h3></translation>
</message>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="326"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="321"/>
<source><br>Generated %1 fragments.</source>
<translation><br>Generated %1 fragments.</translation>
</message>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="335"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="330"/>
<source><br><br> %1: From %3 (%2) To %5 (%4) - %6 bp </source>
<translation><br><br> %1: From %3 (%2) To %5 (%4) - %6 bp </translation>
</message>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="361"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="356"/>
<source>Conserved annotation %1 (%2) is disrupted by the digestion. Try changing the restriction sites.</source>
<translation>Conserved annotation %1 (%2) is disrupted by the digestion. Try changing the restriction sites.</translation>
</message>
@@ -1149,22 +1149,22 @@ To start ligation create a project or open an existing.</translation>
<context>
<name>U2::LigateFragmentsTask</name>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="383"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="378"/>
<source>Fragments %1 and %2 are inconsistent. Blunt and sticky ends incompatibility</source>
<translation>Fragments %1 and %2 are inconsistent. Blunt and sticky ends incompatibility</translation>
</message>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="393"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="388"/>
<source>Right overhang from %1 and left overhang from %2 are inconsistent.</source>
<translation>Right overhang from %1 and left overhang from %2 are inconsistent.</translation>
</message>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="439"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="434"/>
<source>Unknown DNA alphabet in fragment %1 of %2</source>
<translation>Unknown DNA alphabet in fragment %1 of %2</translation>
</message>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="506"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="501"/>
<source>Add constructed molecule</source>
<translation>Add constructed molecule</translation>
</message>
diff --git a/src/plugins/enzymes/transl/russian.ts b/src/plugins/enzymes/transl/russian.ts
index 58a0b34..15337f7 100644
--- a/src/plugins/enzymes/transl/russian.ts
+++ b/src/plugins/enzymes/transl/russian.ts
@@ -721,42 +721,42 @@ Choose another region.</translation>
<context>
<name>U2::DigestSequenceTask</name>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="136"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="131"/>
<source>Can't use restriction site %1 for digestion, cleavage site is unknown </source>
<translation>Невозможно использовать сайт %1 для разбиения на фрагменты, точка разреза неопределена</translation>
</message>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="218"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="213"/>
<source>Unable to digest into fragments: intersecting restriction sites %1 (%2..%3) and %4 (%5..%6)</source>
<translation>Не удалость разбить на фрагменты перескающиеся сайты рестрикции %1 (%2..%3) и %4 (%5..%6)</translation>
</message>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="324"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="319"/>
<source>circular</source>
<translation>круговая</translation>
</message>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="324"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="319"/>
<source>linear</source>
<translation>линейная</translation>
</message>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="325"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="320"/>
<source><h3><br>Digest into fragments %1 (%2)</h3></source>
<translation><h3><br>Разбиение на фрагменты %1 (%2)</h3></translation>
</message>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="326"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="321"/>
<source><br>Generated %1 fragments.</source>
<translation><br>Создано %1 фрагментов.</translation>
</message>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="335"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="330"/>
<source><br><br> %1: From %3 (%2) To %5 (%4) - %6 bp </source>
<translation><br><br> %1: От %3 (%2) До %5 (%4) - %6 bp </translation>
</message>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="361"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="356"/>
<source>Conserved annotation %1 (%2) is disrupted by the digestion. Try changing the restriction sites.</source>
<translation>Conserved annotation %1 (%2) is disrupted by the digestion. Try changing the restriction sites.</translation>
</message>
@@ -1143,22 +1143,22 @@ To start ligation create a project or open an existing.</source>
<context>
<name>U2::LigateFragmentsTask</name>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="383"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="378"/>
<source>Fragments %1 and %2 are inconsistent. Blunt and sticky ends incompatibility</source>
<translation>Фрагменты %1 и %2 несопоставимы. Липкий конец и срез невозможно соединить</translation>
</message>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="393"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="388"/>
<source>Right overhang from %1 and left overhang from %2 are inconsistent.</source>
<translation>Правый липкий конец фрагмента %1 несовместим с левым липким концом фрагмента %2.</translation>
</message>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="439"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="434"/>
<source>Unknown DNA alphabet in fragment %1 of %2</source>
<translation>Неизвестный алфавит у фрагмента %1 последовательности %2</translation>
</message>
<message>
- <location filename="../src/CloningUtilTasks.cpp" line="506"/>
+ <location filename="../src/CloningUtilTasks.cpp" line="501"/>
<source>Add constructed molecule</source>
<translation>Добавить созданную молекулу</translation>
</message>
diff --git a/src/plugins/external_tool_support/external_tool_support.pro b/src/plugins/external_tool_support/external_tool_support.pro
index 12ddf68..7b59d6d 100644
--- a/src/plugins/external_tool_support/external_tool_support.pro
+++ b/src/plugins/external_tool_support/external_tool_support.pro
@@ -90,6 +90,22 @@ HEADERS += src/ETSProjectViewItemsContoller.h \
src/fastqc/FastqcSupport.h \
src/fastqc/FastqcTask.h \
src/fastqc/FastqcWorker.h \
+ src/hmmer/ConvertAlignment2StockholmTask.h \
+ src/hmmer/HmmerBuildDialog.h \
+ src/hmmer/HmmerBuildFromFileTask.h \
+ src/hmmer/HmmerBuildFromMsaTask.h \
+ src/hmmer/HmmerBuildSettings.h \
+ src/hmmer/HmmerBuildTask.h \
+ src/hmmer/HmmerBuildWorker.h \
+ src/hmmer/HmmerParseSearchResultsTask.h \
+ src/hmmer/HmmerSearchDialog.h \
+ src/hmmer/HmmerSearchSettings.h \
+ src/hmmer/HmmerSearchTask.h \
+ src/hmmer/HmmerSearchWorker.h \
+ src/hmmer/HmmerSupport.h \
+ src/hmmer/PhmmerSearchDialog.h \
+ src/hmmer/PhmmerSearchSettings.h \
+ src/hmmer/PhmmerSearchTask.h \
src/java/JavaSupport.h \
src/macs/MACSSettings.h \
src/macs/MACSSupport.h \
@@ -158,6 +174,7 @@ HEADERS += src/ETSProjectViewItemsContoller.h \
src/vcftools/VcfConsensusSupportTask.h \
src/vcftools/VcfConsensusWorker.h \
src/vcfutils/VcfutilsSupport.h
+
FORMS += src/blast/FormatDBSupportRunDialog.ui \
src/blast_plus/BlastDBCmdDialog.ui \
src/bowtie/BowtieBuildSettings.ui \
@@ -171,6 +188,9 @@ FORMS += src/blast/FormatDBSupportRunDialog.ui \
src/clustalo/ClustalOSupportRunDialog.ui \
src/clustalw/ClustalWSupportRunDialog.ui \
src/ETSSettingsWidget.ui \
+ src/hmmer/HmmerBuildDialog.ui \
+ src/hmmer/HmmerSearchDialog.ui \
+ src/hmmer/PhmmerSearchDialog.ui \
src/mafft/MAFFTSupportRunDialog.ui \
src/mrbayes/MrBayesDialog.ui \
src/phyml/PhyMLDialog.ui \
@@ -178,6 +198,7 @@ FORMS += src/blast/FormatDBSupportRunDialog.ui \
src/spades/SpadesSettings.ui \
src/utils/BlastAllSupportDialog.ui \
src/utils/BlastDBSelectorWidget.ui
+
SOURCES += src/ETSProjectViewItemsContoller.cpp \
src/ExternalToolManager.cpp \
src/ExternalToolSupportPlugin.cpp \
@@ -266,6 +287,22 @@ SOURCES += src/ETSProjectViewItemsContoller.cpp \
src/fastqc/FastqcSupport.cpp \
src/fastqc/FastqcTask.cpp \
src/fastqc/FastqcWorker.cpp \
+ src/hmmer/ConvertAlignment2StockholmTask.cpp \
+ src/hmmer/HmmerBuildDialog.cpp \
+ src/hmmer/HmmerBuildFromFileTask.cpp \
+ src/hmmer/HmmerBuildFromMsaTask.cpp \
+ src/hmmer/HmmerBuildSettings.cpp \
+ src/hmmer/HmmerBuildTask.cpp \
+ src/hmmer/HmmerBuildWorker.cpp \
+ src/hmmer/HmmerParseSearchResultsTask.cpp \
+ src/hmmer/HmmerSearchDialog.cpp \
+ src/hmmer/HmmerSearchSettings.cpp \
+ src/hmmer/HmmerSearchTask.cpp \
+ src/hmmer/HmmerSearchWorker.cpp \
+ src/hmmer/HmmerSupport.cpp \
+ src/hmmer/PhmmerSearchDialog.cpp \
+ src/hmmer/PhmmerSearchSettings.cpp \
+ src/hmmer/PhmmerSearchTask.cpp \
src/java/JavaSupport.cpp \
src/macs/MACSSettings.cpp \
src/macs/MACSSupport.cpp \
@@ -334,6 +371,7 @@ SOURCES += src/ETSProjectViewItemsContoller.cpp \
src/vcftools/VcfConsensusSupportTask.cpp \
src/vcftools/VcfConsensusWorker.cpp \
src/vcfutils/VcfutilsSupport.cpp
+
RESOURCES += external_tool_support.qrc
TRANSLATIONS += transl/english.ts \
transl/russian.ts
diff --git a/src/plugins/external_tool_support/external_tool_support.qrc b/src/plugins/external_tool_support/external_tool_support.qrc
index e3ce29b..a707a13 100644
--- a/src/plugins/external_tool_support/external_tool_support.qrc
+++ b/src/plugins/external_tool_support/external_tool_support.qrc
@@ -28,5 +28,6 @@
<file>images/R_gray.png</file>
<file>images/R.png</file>
<file>images/R_warn.png</file>
+ <file>images/hmmer.png</file>
</qresource>
</RCC>
diff --git a/src/plugins_3rdparty/hmm3/images/hmmer_16.png b/src/plugins/external_tool_support/images/hmmer.png
similarity index 100%
rename from src/plugins_3rdparty/hmm3/images/hmmer_16.png
rename to src/plugins/external_tool_support/images/hmmer.png
diff --git a/src/plugins/external_tool_support/src/ExternalToolSupportPlugin.cpp b/src/plugins/external_tool_support/src/ExternalToolSupportPlugin.cpp
index 941ca51..4766282 100644
--- a/src/plugins/external_tool_support/src/ExternalToolSupportPlugin.cpp
+++ b/src/plugins/external_tool_support/src/ExternalToolSupportPlugin.cpp
@@ -19,144 +19,132 @@
* MA 02110-1301, USA.
*/
-#include <U2Core/AppContext.h>
+#include <QCoreApplication>
+#include <QDirIterator>
+#include <QMenu>
+
+#include <U2Algorithm/CDSearchTaskFactoryRegistry.h>
+#include <U2Algorithm/DnaAssemblyAlgRegistry.h>
+#include <U2Algorithm/GenomeAssemblyRegistry.h>
+#include <U2Core/AppContext.h>
#include <U2Core/DNAAlphabet.h>
+#include <U2Core/DNASequenceObject.h>
+#include <U2Core/DNASequenceSelection.h>
#include <U2Core/DataBaseRegistry.h>
#include <U2Core/ExternalToolRegistry.h>
-#include <U2Core/MultiTask.h>
-
-#include <U2View/MSAEditor.h>
-#include <U2View/MSAEditorFactory.h>
-#include <U2View/DnaAssemblyUtils.h>
-#include <U2Core/MAlignmentObject.h>
-
#include <U2Core/GAutoDeleteList.h>
-#include <U2View/AnnotatedDNAView.h>
-#include <U2View/ADVSequenceObjectContext.h>
-#include <U2View/ADVConstants.h>
-#include <U2View/ADVUtils.h>
-
-#include <U2Core/DNASequenceSelection.h>
-#include <U2Core/DNASequenceObject.h>
-#include <U2Core/U2SafePoints.h>
+#include <U2Core/MAlignmentObject.h>
+#include <U2Core/MultiTask.h>
#include <U2Core/ScriptingToolRegistry.h>
+#include <U2Core/U2SafePoints.h>
#include <U2Gui/GUIUtils.h>
#include <U2Gui/ToolsMenu.h>
-#include <U2Test/XMLTestFormat.h>
+#include <U2View/ADVConstants.h>
+#include <U2View/ADVSequenceObjectContext.h>
+#include <U2View/ADVUtils.h>
+#include <U2View/AnnotatedDNAView.h>
+#include <U2View/DnaAssemblyUtils.h>
+#include <U2View/MSAEditor.h>
+#include <U2View/MSAEditorFactory.h>
+
#include <U2Test/GTest.h>
#include <U2Test/GTestFrameworkComponents.h>
+#include <U2Test/XMLTestFormat.h>
-#if (QT_VERSION < 0x050000) //Qt 5
-#include <QtGui/QMenu>
-#else
-#include <QtWidgets/QMenu>
-#endif
-#include <QtCore/QCoreApplication>
-#include <QtCore/QDirIterator>
-
+#include "ETSProjectViewItemsContoller.h"
#include "ExternalToolSupportPlugin.h"
#include "ExternalToolSupportSettings.h"
#include "ExternalToolSupportSettingsController.h"
-#include "utils/ExternalToolSupportAction.h"
-#include "utils/ExternalToolValidateTask.h"
-#include "ETSProjectViewItemsContoller.h"
-
-#include "clustalw/ClustalWSupport.h"
-#include "clustalw/ClustalWWorker.h"
-#include "clustalo/ClustalOSupport.h"
-#include "clustalo/ClustalOWorker.h"
-#include "mafft/MAFFTSupport.h"
-#include "mafft/MAFFTWorker.h"
-#include "tcoffee/TCoffeeSupport.h"
-#include "tcoffee/TCoffeeWorker.h"
-#include "mrbayes/MrBayesSupport.h"
-#include "mrbayes/MrBayesTests.h"
-#include "blast/FormatDBSupport.h"
+#include "R/RSupport.h"
+#include "R/RSupport.h"
+#include "bedtools/BedToolsWorkersLibrary.h"
+#include "bedtools/BedtoolsSupport.h"
+#include "bigWigTools/BedGraphToBigWigWorker.h"
+#include "bigWigTools/BigWigSupport.h"
#include "blast/BlastAllSupport.h"
#include "blast/BlastAllWorker.h"
+#include "blast/FormatDBSupport.h"
+#include "blast_plus/BlastDBCmdSupport.h"
#include "blast_plus/BlastPlusSupport.h"
#include "blast_plus/BlastPlusWorker.h"
-#include "blast_plus/BlastDBCmdSupport.h"
-#include "cap3/CAP3Support.h"
-#include "cap3/CAP3Worker.h"
+#include "blast_plus/RPSBlastSupportTask.h"
+#include "bowtie/BowtieSettingsWidget.h"
#include "bowtie/BowtieSupport.h"
#include "bowtie/BowtieTask.h"
-#include "bowtie/BowtieSettingsWidget.h"
-#include "bowtie/bowtie_tests/bowtieTests.h"
#include "bowtie/BowtieWorker.h"
+#include "bowtie/bowtie_tests/bowtieTests.h"
+#include "bowtie2/Bowtie2SettingsWidget.h"
#include "bowtie2/Bowtie2Support.h"
#include "bowtie2/Bowtie2Task.h"
-#include "bowtie2/Bowtie2SettingsWidget.h"
#include "bowtie2/Bowtie2Worker.h"
+#include "bwa/BwaMemWorker.h"
+#include "bwa/BwaSettingsWidget.h"
#include "bwa/BwaSupport.h"
#include "bwa/BwaTask.h"
-#include "bwa/BwaSettingsWidget.h"
+#include "bwa/BwaWorker.h"
#include "bwa/bwa_tests/bwaTests.h"
-#include "samtools/SamToolsExtToolSupport.h"
-#include "samtools/TabixSupport.h"
-#include "vcftools/VcfConsensusWorker.h"
-#include "vcftools/VcfConsensusSupport.h"
-#include "spidey/SpideySupport.h"
-#include "spidey/SpideySupportTask.h"
+#include "cap3/CAP3Support.h"
+#include "cap3/CAP3Worker.h"
+#include "ceas/CEASReportWorker.h"
+#include "ceas/CEASSupport.h"
+#include "clustalo/ClustalOSupport.h"
+#include "clustalo/ClustalOWorker.h"
+#include "clustalw/ClustalWSupport.h"
+#include "clustalw/ClustalWWorker.h"
+#include "conduct_go/ConductGOSupport.h"
+#include "conduct_go/ConductGOWorker.h"
+#include "conservation_plot/ConservationPlotSupport.h"
+#include "conservation_plot/ConservationPlotWorker.h"
#include "cufflinks/CuffdiffWorker.h"
#include "cufflinks/CufflinksSupport.h"
#include "cufflinks/CufflinksWorker.h"
#include "cufflinks/CuffmergeWorker.h"
#include "cufflinks/GffreadWorker.h"
-#include "tophat/TopHatSupport.h"
-#include "tophat/TopHatWorker.h"
-#include "ceas/CEASReportWorker.h"
-#include "ceas/CEASSupport.h"
-#include "macs/MACSWorker.h"
+#include "cutadapt/CutadaptSupport.h"
+#include "cutadapt/CutadaptWorker.h"
+#include "fastqc/FastqcSupport.h"
+#include "fastqc/FastqcWorker.h"
+#include "hmmer/HmmerBuildWorker.h"
+#include "hmmer/HmmerSearchWorker.h"
+#include "hmmer/HmmerSupport.h"
+#include "java/JavaSupport.h"
#include "macs/MACSSupport.h"
-#include "peak2gene/Peak2GeneWorker.h"
+#include "macs/MACSWorker.h"
+#include "mafft/MAFFTSupport.h"
+#include "mafft/MAFFTWorker.h"
+#include "mrbayes/MrBayesSupport.h"
+#include "mrbayes/MrBayesTests.h"
#include "peak2gene/Peak2GeneSupport.h"
-#include "conservation_plot/ConservationPlotWorker.h"
-#include "conservation_plot/ConservationPlotSupport.h"
-#include "seqpos/SeqPosWorker.h"
-#include "seqpos/SeqPosSupport.h"
-#include "conduct_go/ConductGOWorker.h"
-#include "conduct_go/ConductGOSupport.h"
-#include "python/PythonSupport.h"
+#include "peak2gene/Peak2GeneWorker.h"
#include "perl/PerlSupport.h"
-#include "java/JavaSupport.h"
-#include "R/RSupport.h"
-#include "vcfutils/VcfutilsSupport.h"
-#include "samtools/BcfToolsSupport.h"
-#include "R/RSupport.h"
#include "phyml/PhyMLSupport.h"
#include "phyml/PhyMLTests.h"
-
-#include "bwa/BwaMemWorker.h"
-#include "bwa/BwaWorker.h"
-#include "bedtools/BedtoolsSupport.h"
-#include "bedtools/BedToolsWorkersLibrary.h"
-#include "bigWigTools/BigWigSupport.h"
-#include "bigWigTools/BedGraphToBigWigWorker.h"
-
-#include "cutadapt/CutadaptSupport.h"
-#include "cutadapt/CutadaptWorker.h"
-
-#include "spades/SpadesSupport.h"
+#include "python/PythonSupport.h"
+#include "samtools/BcfToolsSupport.h"
+#include "samtools/SamToolsExtToolSupport.h"
+#include "samtools/TabixSupport.h"
+#include "seqpos/SeqPosSupport.h"
+#include "seqpos/SeqPosWorker.h"
+#include "snpeff/SnpEffSupport.h"
+#include "snpeff/SnpEffWorker.h"
#include "spades/SpadesSettingsWidget.h"
+#include "spades/SpadesSupport.h"
#include "spades/SpadesTask.h"
#include "spades/SpadesWorker.h"
-
-#include "snpeff/SnpEffSupport.h"
-#include "snpeff/SnpEffWorker.h"
-
-#include "fastqc/FastqcSupport.h"
-#include "fastqc/FastqcWorker.h"
-
-
-#include <U2Algorithm/CDSearchTaskFactoryRegistry.h>
-#include <U2Algorithm/DnaAssemblyAlgRegistry.h>
-#include <U2Algorithm/GenomeAssemblyRegistry.h>
-#include <blast_plus/RPSBlastSupportTask.h>
-
+#include "spidey/SpideySupport.h"
+#include "spidey/SpideySupportTask.h"
+#include "tcoffee/TCoffeeSupport.h"
+#include "tcoffee/TCoffeeWorker.h"
+#include "tophat/TopHatSupport.h"
+#include "tophat/TopHatWorker.h"
+#include "utils/ExternalToolSupportAction.h"
+#include "utils/ExternalToolValidateTask.h"
+#include "vcftools/VcfConsensusSupport.h"
+#include "vcftools/VcfConsensusWorker.h"
+#include "vcfutils/VcfutilsSupport.h"
#define EXTERNAL_TOOL_SUPPORT_FACTORY_ID "ExternalToolSupport"
#define TOOLS "tools"
@@ -324,10 +312,6 @@ ExternalToolSupportPlugin::ExternalToolSupportPlugin() :
FormatDBSupport* makeBLASTDBTool = new FormatDBSupport(ET_MAKEBLASTDB);
etRegistry->registerEntry(makeBLASTDBTool);
- //MakeBLASTDB from GPU-BLAST+
-// FormatDBSupport* gpuMakeBLASTDBTool = new FormatDBSupport(GPU_MAKEBLASTDB_TOOL_NAME); // https://ugene.unipro.ru/tracker/browse/UGENE-945
-// etRegistry->registerEntry(gpuMakeBLASTDBTool);
-
//BlastAll
BlastAllSupport* blastallTool = new BlastAllSupport(ET_BLASTALL);
etRegistry->registerEntry(blastallTool);
@@ -336,8 +320,6 @@ ExternalToolSupportPlugin::ExternalToolSupportPlugin() :
etRegistry->registerEntry(blastNPlusTool);
BlastPlusSupport* blastPPlusTool = new BlastPlusSupport(ET_BLASTP);
etRegistry->registerEntry(blastPPlusTool);
-// BlastPlusSupport* gpuBlastPPlusTool = new BlastPlusSupport(ET_GPU_BLASTP); // https://ugene.unipro.ru/tracker/browse/UGENE-945
-// etRegistry->registerEntry(gpuBlastPPlusTool);
BlastPlusSupport* blastXPlusTool = new BlastPlusSupport(ET_BLASTX);
etRegistry->registerEntry(blastXPlusTool);
BlastPlusSupport* tBlastNPlusTool = new BlastPlusSupport(ET_TBLASTN);
@@ -460,6 +442,10 @@ ExternalToolSupportPlugin::ExternalToolSupportPlugin() :
FastQCSupport *fastqc = new FastQCSupport(ET_FASTQC);
etRegistry->registerEntry(fastqc);
+ etRegistry->registerEntry(new HmmerSupport(HmmerSupport::BUILD_TOOL));
+ etRegistry->registerEntry(new HmmerSupport(HmmerSupport::SEARCH_TOOL));
+ etRegistry->registerEntry(new HmmerSupport(HmmerSupport::PHMMER_TOOL));
+
if (AppContext::getMainWindow()) {
ExternalToolSupportAction* formatDBAction= new ExternalToolSupportAction(tr("BLAST make database..."), this, QStringList(ET_FORMATDB));
formatDBAction->setObjectName(ToolsMenu::BLAST_DB);
@@ -506,6 +492,9 @@ ExternalToolSupportPlugin::ExternalToolSupportPlugin() :
GObjectViewWindowContext* spideyCtx = spideySupport->getViewContext();
spideyCtx->setParent(this);
spideyCtx->init();
+
+ HmmerContext *hmmerContext = new HmmerContext(this);
+ hmmerContext->init();
}
AppContext::getCDSFactoryRegistry()->registerFactory(new CDSearchLocalTaskFactory(), CDSearchFactoryRegistry::LocalSearch);
@@ -600,11 +589,27 @@ ExternalToolSupportPlugin::ExternalToolSupportPlugin() :
etRegistry->setManager(&validationManager);
validationManager.start();
- //Add viewer for settings
+ registerSettingsController();
+
+ registerWorkers();
+
if (AppContext::getMainWindow()) {
+ //Add project view service
+ services.push_back(new ExternalToolSupportService());
+ }
+}
+
+ExternalToolSupportPlugin::~ExternalToolSupportPlugin(){
+ ExternalToolSupportSettings::setExternalTools();
+}
+
+void ExternalToolSupportPlugin::registerSettingsController() {
+ if (NULL != AppContext::getMainWindow()) {
AppContext::getAppSettingsGUI()->registerPage(new ExternalToolSupportSettingsPageController());
}
- //Add new workers to WD
+}
+
+void ExternalToolSupportPlugin::registerWorkers() {
LocalWorkflow::ClustalWWorkerFactory::init();
LocalWorkflow::ClustalOWorkerFactory::init();
LocalWorkflow::MAFFTWorkerFactory::init();
@@ -636,15 +641,8 @@ ExternalToolSupportPlugin::ExternalToolSupportPlugin() :
LocalWorkflow::FastQCFactory::init();
LocalWorkflow::CutAdaptFastqWorkerFactory::init();
LocalWorkflow::BedtoolsIntersectWorkerFactory::init();
-
- if (AppContext::getMainWindow()) {
- //Add project view service
- services.push_back(new ExternalToolSupportService());
- }
-}
-
-ExternalToolSupportPlugin::~ExternalToolSupportPlugin(){
- ExternalToolSupportSettings::setExternalTools();
+ LocalWorkflow::HmmerBuildWorkerFactory::init();
+ LocalWorkflow::HmmerSearchWorkerFactory::init();
}
//////////////////////////////////////////////////////////////////////////
diff --git a/src/plugins/external_tool_support/src/ExternalToolSupportPlugin.h b/src/plugins/external_tool_support/src/ExternalToolSupportPlugin.h
index 47616f8..c81e1bd 100644
--- a/src/plugins/external_tool_support/src/ExternalToolSupportPlugin.h
+++ b/src/plugins/external_tool_support/src/ExternalToolSupportPlugin.h
@@ -37,6 +37,9 @@ public:
~ExternalToolSupportPlugin();
private:
+ void registerSettingsController();
+ void registerWorkers();
+
ExternalToolManagerImpl validationManager;
};
diff --git a/src/plugins/external_tool_support/src/ExternalToolSupportSettingsController.cpp b/src/plugins/external_tool_support/src/ExternalToolSupportSettingsController.cpp
index a0df444..4bdf75a 100644
--- a/src/plugins/external_tool_support/src/ExternalToolSupportSettingsController.cpp
+++ b/src/plugins/external_tool_support/src/ExternalToolSupportSettingsController.cpp
@@ -69,7 +69,7 @@ AppSettingsGUIPageWidget* ExternalToolSupportSettingsPageController::createWidge
return r;
}
-const QString ExternalToolSupportSettingsPageController::helpPageId = QString("17470453");
+const QString ExternalToolSupportSettingsPageController::helpPageId = QString("18220313");
/////////////////////////////////////////////
////ExternalToolSupportSettingsPageWidget
diff --git a/src/plugins/external_tool_support/src/bigWigTools/BedGraphToBigWigTask.cpp b/src/plugins/external_tool_support/src/bigWigTools/BedGraphToBigWigTask.cpp
index c4c95d0..f8ea68e 100644
--- a/src/plugins/external_tool_support/src/bigWigTools/BedGraphToBigWigTask.cpp
+++ b/src/plugins/external_tool_support/src/bigWigTools/BedGraphToBigWigTask.cpp
@@ -58,7 +58,7 @@ void BedGraphToBigWigParser::parseErrOutput( const QString& partOfLog ){
//////////////////////////////////////////////////////////////////////////
//BedGraphToBigWigTask
BedGraphToBigWigTask::BedGraphToBigWigTask(const BedGraphToBigWigSetting &settings)
-:Task(QString("bedGrapthToBigWig for %1").arg(settings.inputUrl), TaskFlags_FOSE_COSC)
+:ExternalToolSupportTask(QString("bedGrapthToBigWig for %1").arg(settings.inputUrl), TaskFlags_FOSE_COSC)
,settings(settings)
{
@@ -85,6 +85,7 @@ void BedGraphToBigWigTask::prepare(){
CHECK_OP(stateInfo, );
ExternalToolRunTask* etTask = new ExternalToolRunTask(ET_BIGWIG, args, new BedGraphToBigWigParser(), settings.outDir);
+ setListenerForTask(etTask);
addSubTask(etTask);
}
diff --git a/src/plugins/external_tool_support/src/bigWigTools/BedGraphToBigWigTask.h b/src/plugins/external_tool_support/src/bigWigTools/BedGraphToBigWigTask.h
index 583c05d..0ba8d6f 100644
--- a/src/plugins/external_tool_support/src/bigWigTools/BedGraphToBigWigTask.h
+++ b/src/plugins/external_tool_support/src/bigWigTools/BedGraphToBigWigTask.h
@@ -40,7 +40,7 @@ public:
bool uncompressed;
};
-class BedGraphToBigWigTask : public Task {
+class BedGraphToBigWigTask : public ExternalToolSupportTask {
Q_OBJECT
public:
BedGraphToBigWigTask(const BedGraphToBigWigSetting &settings);
diff --git a/src/plugins/external_tool_support/src/bigWigTools/BedGraphToBigWigWorker.cpp b/src/plugins/external_tool_support/src/bigWigTools/BedGraphToBigWigWorker.cpp
index f6eef67..a7d4863 100644
--- a/src/plugins/external_tool_support/src/bigWigTools/BedGraphToBigWigWorker.cpp
+++ b/src/plugins/external_tool_support/src/bigWigTools/BedGraphToBigWigWorker.cpp
@@ -230,7 +230,8 @@ Task * BedGraphToBigWigWorker::tick() {
setting.uncompressed = getValue<bool>(UNCOMPRESSED);
setting.genomePath = getValue<QString>(GENOME);
- Task *t = new BedGraphToBigWigTask (setting);
+ ExternalToolSupportTask *t = new BedGraphToBigWigTask (setting);
+ t->addListeners(createLogListeners());
connect(new TaskSignalMapper(t), SIGNAL(si_taskFinished(Task*)), SLOT(sl_taskFinished(Task*)));
return t;
}
diff --git a/src/plugins/external_tool_support/src/blast/FormatDBSupport.h b/src/plugins/external_tool_support/src/blast/FormatDBSupport.h
index 36fd0b4..fdd4630 100644
--- a/src/plugins/external_tool_support/src/blast/FormatDBSupport.h
+++ b/src/plugins/external_tool_support/src/blast/FormatDBSupport.h
@@ -28,6 +28,7 @@
#define ET_FORMATDB "FormatDB"
#define ET_MAKEBLASTDB "MakeBLASTDB"
#define ET_GPU_MAKEBLASTDB "GPU-MakeBLASTDB"
+#define FORMATDB_TMP_DIR "FormatDB"
namespace U2 {
class FormatDBSupport : public ExternalTool {
diff --git a/src/plugins/external_tool_support/src/blast/FormatDBSupportRunDialog.cpp b/src/plugins/external_tool_support/src/blast/FormatDBSupportRunDialog.cpp
index 883c2fd..86cae35 100644
--- a/src/plugins/external_tool_support/src/blast/FormatDBSupportRunDialog.cpp
+++ b/src/plugins/external_tool_support/src/blast/FormatDBSupportRunDialog.cpp
@@ -44,7 +44,7 @@ FormatDBSupportRunDialog::FormatDBSupportRunDialog(const QString &_name, FormatD
QDialog(_parent), name(_name), settings(_settings)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470726");
+ new HelpButton(this, buttonBox, "18220586");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Format"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
formatButton = buttonBox->button(QDialogButtonBox::Ok);
diff --git a/src/plugins/external_tool_support/src/blast/FormatDBSupportTask.cpp b/src/plugins/external_tool_support/src/blast/FormatDBSupportTask.cpp
index 7c8b2fd..10b9f7a 100644
--- a/src/plugins/external_tool_support/src/blast/FormatDBSupportTask.cpp
+++ b/src/plugins/external_tool_support/src/blast/FormatDBSupportTask.cpp
@@ -22,17 +22,24 @@
#include "FormatDBSupportTask.h"
#include "FormatDBSupport.h"
+#include <QCoreApplication>
#include <QtCore/QDir>
#include <U2Core/AppContext.h>
+#include <U2Core/AddDocumentTask.h>
+#include <U2Core/AppSettings.h>
+#include <U2Core/UserApplicationsSettings.h>
#include <U2Core/Counter.h>
#include <U2Core/DocumentModel.h>
+#include <U2Core/DocumentUtils.h>
#include <U2Core/ExternalToolRegistry.h>
-#include <U2Core/ProjectModel.h>
+#include <U2Core/GUrlUtils.h>
+#include <U2Core/Log.h>
#include <U2Core/MAlignmentObject.h>
+#include <U2Core/ProjectModel.h>
+
+#include <U2Formats/ConvertFileTask.h>
-#include <U2Core/AddDocumentTask.h>
-#include <U2Core/Log.h>
namespace U2 {
@@ -45,47 +52,50 @@ void FormatDBSupportTaskSettings::reset() {
FormatDBSupportTask::FormatDBSupportTask(const QString& name, const FormatDBSupportTaskSettings& _settings) :
Task(tr("Run NCBI FormatDB task"), TaskFlags_NR_FOSCOE | TaskFlag_ReportingIsSupported | TaskFlag_ReportingIsEnabled), toolName(name),
- settings(_settings)
+ settings(_settings),
+ convertSubTaskCounter(0)
{
GCOUNTER( cvar, tvar, "FormatDBSupportTask" );
- formatDBTask=NULL;
+ formatDBTask = NULL;
}
void FormatDBSupportTask::prepare(){
- QStringList arguments;
- assert((toolName == ET_FORMATDB)||(toolName == ET_MAKEBLASTDB));
- if(toolName == ET_FORMATDB){
- for(int i=0; i< settings.inputFilesPath.length(); i++){
- if(settings.inputFilesPath[i].contains(" ")){
- stateInfo.setError(tr("Input files paths contain space characters."));
- return;
- }
- }
- arguments <<"-i"<< settings.inputFilesPath.join(" ");
- arguments <<"-l"<< settings.outputPath + "formatDB.log";
- arguments <<"-n"<< settings.outputPath;
- arguments <<"-p"<< (settings.isInputAmino ? "T" : "F");
- externalToolLog = settings.outputPath + "formatDB.log";
- }else if (toolName == ET_MAKEBLASTDB){
- for(int i=0; i< settings.inputFilesPath.length(); i++){
- settings.inputFilesPath[i]="\""+settings.inputFilesPath[i]+"\"";
- }
- arguments <<"-in"<< settings.inputFilesPath.join(" ");
- arguments <<"-logfile"<< settings.outputPath + "MakeBLASTDB.log";
- externalToolLog = settings.outputPath + "MakeBLASTDB.log";
- if(settings.outputPath.contains(" ")){
- stateInfo.setError(tr("Output database path contain space characters."));
- return;
+ prepareInputFastaFiles();
+ if (convertSubTaskCounter == 0) {
+ createFormatDbTask();
+ addSubTask(formatDBTask);
+ }
+}
+
+QList<Task*> FormatDBSupportTask::onSubTaskFinished(Task *subTask) {
+ QList<Task*> result;
+ CHECK(subTask != NULL, result);
+ CHECK(!subTask->isCanceled() && !subTask->hasError(), result);
+
+ DefaultConvertFileTask* convertTask = qobject_cast<DefaultConvertFileTask*>(subTask);
+ if (convertTask != NULL) {
+ convertSubTaskCounter--;
+ inputFastaFiles << convertTask->getResult();
+ fastaTmpFiles << convertTask->getResult();
+
+ if (convertSubTaskCounter == 0) {
+ createFormatDbTask();
+ result << formatDBTask;
}
- arguments <<"-out"<< settings.outputPath;
- arguments <<"-dbtype"<< (settings.isInputAmino ? "prot" : "nucl");
}
- formatDBTask = new ExternalToolRunTask(toolName, arguments, new ExternalToolLogParser());
- formatDBTask->setSubtaskProgressWeight(95);
- addSubTask(formatDBTask);
+ return result;
}
+
Task::ReportResult FormatDBSupportTask::report(){
+ // remove tmp files
+ if (!fastaTmpFiles.isEmpty()) {
+ QDir dir(QFileInfo(fastaTmpFiles.first()).absoluteDir());
+ if (!dir.removeRecursively()) {
+ stateInfo.addWarning(tr("Can not remove directory for temporary files."));
+ emit si_stateChanged();
+ }
+ }
return ReportResult_Finished;
}
@@ -125,6 +135,35 @@ QString FormatDBSupportTask::generateReport() const {
return res;
}
+void FormatDBSupportTask::prepareInputFastaFiles() {
+ QString tmpDirName = "FormatDB_"+QString::number(this->getTaskId())+"_"+
+ QDate::currentDate().toString("dd.MM.yyyy")+"_"+
+ QTime::currentTime().toString("hh.mm.ss.zzz")+"_"+
+ QString::number(QCoreApplication::applicationPid())+"/";
+ QString tmpDir = GUrlUtils::prepareDirLocation(AppContext::getAppSettings()->getUserAppsSettings()->getCurrentProcessTemporaryDirPath(FORMATDB_TMP_DIR) + "/"+ tmpDirName,
+ stateInfo);
+ CHECK_OP(stateInfo, );
+ CHECK_EXT(!tmpDir.isEmpty(), setError(tr("Cannot create temp directory")), );
+
+ for(int i = 0; i < settings.inputFilesPath.length(); i++){
+ GUrl url(settings.inputFilesPath[i]);
+
+ QList<FormatDetectionResult> formats = DocumentUtils::detectFormat(url);
+ if (formats.isEmpty()) {
+ stateInfo.addWarning(tr("File '%1' was skipped. Cannot detect file format.").arg(url.getURLString()));
+ continue;
+ }
+ QString firstFormat = formats.first().format->getFormatId();
+ if (firstFormat != BaseDocumentFormats::FASTA) {
+ DefaultConvertFileTask* convertTask = new DefaultConvertFileTask(url, firstFormat, BaseDocumentFormats::FASTA, tmpDir);
+ addSubTask(convertTask);
+ convertSubTaskCounter++;
+ } else {
+ inputFastaFiles << url.getURLString();
+ }
+ }
+}
+
QString FormatDBSupportTask::prepareLink( const QString &path ) const {
QString preparedPath = path;
if(preparedPath.startsWith("'") || preparedPath.startsWith("\"")) {
@@ -137,4 +176,40 @@ QString FormatDBSupportTask::prepareLink( const QString &path ) const {
QDir::toNativeSeparators(preparedPath) + "</a><br>";
}
+void FormatDBSupportTask::createFormatDbTask() {
+ SAFE_POINT_EXT(formatDBTask == NULL, setError(tr("Trying to initialize Format DB task second time")), );
+
+ QStringList arguments;
+ assert((toolName == ET_FORMATDB)||(toolName == ET_MAKEBLASTDB));
+ if(toolName == ET_FORMATDB){
+ for (int i = 0; i < inputFastaFiles.length(); i++){
+ if (inputFastaFiles[i].contains(" ")) {
+ stateInfo.setError(tr("Input files paths contain space characters."));
+ return;
+ }
+ }
+ arguments <<"-i"<< inputFastaFiles.join(" ");
+ arguments <<"-l"<< settings.outputPath + "formatDB.log";
+ arguments <<"-n"<< settings.outputPath;
+ arguments <<"-p"<< (settings.isInputAmino ? "T" : "F");
+ externalToolLog = settings.outputPath + "formatDB.log";
+ }else if (toolName == ET_MAKEBLASTDB){
+ for (int i = 0; i < inputFastaFiles.length(); i++){
+ inputFastaFiles[i]="\""+inputFastaFiles[i]+"\"";
+ }
+ arguments <<"-in"<< inputFastaFiles.join(" ");
+ arguments <<"-logfile"<< settings.outputPath + "MakeBLASTDB.log";
+ externalToolLog = settings.outputPath + "MakeBLASTDB.log";
+ if(settings.outputPath.contains(" ")){
+ stateInfo.setError(tr("Output database path contain space characters."));
+ return;
+ }
+ arguments <<"-out"<< settings.outputPath;
+ arguments <<"-dbtype"<< (settings.isInputAmino ? "prot" : "nucl");
+ }
+
+ formatDBTask = new ExternalToolRunTask(toolName, arguments, new ExternalToolLogParser());
+ formatDBTask->setSubtaskProgressWeight(95);
+}
+
}//namespace
diff --git a/src/plugins/external_tool_support/src/blast/FormatDBSupportTask.h b/src/plugins/external_tool_support/src/blast/FormatDBSupportTask.h
index a834f72..16adf66 100644
--- a/src/plugins/external_tool_support/src/blast/FormatDBSupportTask.h
+++ b/src/plugins/external_tool_support/src/blast/FormatDBSupportTask.h
@@ -93,14 +93,22 @@ class FormatDBSupportTask : public Task {
public:
FormatDBSupportTask(const QString& name, const FormatDBSupportTaskSettings& settings);
void prepare();
+ QList<Task*> onSubTaskFinished(Task *subTask);
Task::ReportResult report();
QString generateReport() const;
private:
+ void prepareInputFastaFiles();
QString prepareLink(const QString &path) const;
+ void createFormatDbTask();
+
QString externalToolLog;
ExternalToolRunTask* formatDBTask;
QString toolName;
FormatDBSupportTaskSettings settings;
+ int convertSubTaskCounter;
+
+ QStringList inputFastaFiles;
+ QStringList fastaTmpFiles;
};
}//namespace
diff --git a/src/plugins/external_tool_support/src/blast_plus/BlastDBCmdDialog.cpp b/src/plugins/external_tool_support/src/blast_plus/BlastDBCmdDialog.cpp
index 4c7cf53..6561fbc 100644
--- a/src/plugins/external_tool_support/src/blast_plus/BlastDBCmdDialog.cpp
+++ b/src/plugins/external_tool_support/src/blast_plus/BlastDBCmdDialog.cpp
@@ -37,7 +37,7 @@ BlastDBCmdDialog::BlastDBCmdDialog(BlastDBCmdSupportTaskSettings &_settings, QWi
settings(_settings)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470728");
+ new HelpButton(this, buttonBox, "18220588");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Fetch"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/external_tool_support/src/bowtie/BowtieTask.cpp b/src/plugins/external_tool_support/src/bowtie/BowtieTask.cpp
index e1ec344..3c10b29 100644
--- a/src/plugins/external_tool_support/src/bowtie/BowtieTask.cpp
+++ b/src/plugins/external_tool_support/src/bowtie/BowtieTask.cpp
@@ -33,7 +33,7 @@ namespace U2 {
// BowtieBuildIndexTask
BowtieBuildIndexTask::BowtieBuildIndexTask(const QString &referencePath, const QString &indexPath, bool colorspace):
- Task("Build Bowtie index", TaskFlags_NR_FOSCOE),
+ ExternalToolSupportTask("Build Bowtie index", TaskFlags_NR_FOSCOE),
referencePath(referencePath),
indexPath(indexPath),
colorspace(colorspace)
@@ -60,6 +60,7 @@ void BowtieBuildIndexTask::prepare() {
}
ExternalToolRunTask *task = new ExternalToolRunTask(ET_BOWTIE_BUILD, arguments, new LogParser());
+ setListenerForTask(task);
addSubTask(task);
}
@@ -127,7 +128,7 @@ int BowtieBuildIndexTask::LogParser::getProgress() {
// BowtieAssembleTask
BowtieAssembleTask::BowtieAssembleTask(const DnaAssemblyToRefTaskSettings &settings):
- Task("Bowtie reads assembly", TaskFlags_NR_FOSCOE),
+ ExternalToolSupportTask("Bowtie reads assembly", TaskFlags_NR_FOSCOE),
logParser(NULL),
settings(settings)
{
@@ -285,6 +286,7 @@ void BowtieAssembleTask::prepare() {
arguments.append(settings.resultFileName.getURLString());
logParser = new LogParser();
ExternalToolRunTask *task = new ExternalToolRunTask(ET_BOWTIE, arguments, logParser, NULL);
+ setListenerForTask(task);
addSubTask(task);
}
@@ -378,9 +380,11 @@ void BowtieTask::prepare() {
}
buildIndexTask = new BowtieBuildIndexTask(settings.refSeqUrl.getURLString(), indexFileName,
settings.getCustomValue(BowtieTask::OPTION_COLORSPACE, false).toBool());
+ buildIndexTask->addListeners(QList <ExternalToolListener*>() << getListener(0));
}
if(!justBuildIndex) {
assembleTask = new BowtieAssembleTask(settings);
+ assembleTask->addListeners(QList <ExternalToolListener*>() << getListener(1));
}
if (unzipTask != NULL) {
diff --git a/src/plugins/external_tool_support/src/bowtie/BowtieTask.h b/src/plugins/external_tool_support/src/bowtie/BowtieTask.h
index f4c34ab..dd5f069 100644
--- a/src/plugins/external_tool_support/src/bowtie/BowtieTask.h
+++ b/src/plugins/external_tool_support/src/bowtie/BowtieTask.h
@@ -29,7 +29,7 @@
namespace U2 {
-class BowtieBuildIndexTask : public Task {
+class BowtieBuildIndexTask : public ExternalToolSupportTask {
Q_OBJECT
public:
BowtieBuildIndexTask(const QString &referencePath, const QString &indexPath, bool colorspace);
@@ -68,7 +68,7 @@ private:
bool colorspace;
};
-class BowtieAssembleTask : public Task {
+class BowtieAssembleTask : public ExternalToolSupportTask {
Q_OBJECT
public:
BowtieAssembleTask(const DnaAssemblyToRefTaskSettings &settings);
diff --git a/src/plugins/external_tool_support/src/bowtie/BowtieWorker.cpp b/src/plugins/external_tool_support/src/bowtie/BowtieWorker.cpp
index ba0c728..27d3947 100644
--- a/src/plugins/external_tool_support/src/bowtie/BowtieWorker.cpp
+++ b/src/plugins/external_tool_support/src/bowtie/BowtieWorker.cpp
@@ -100,10 +100,6 @@ QString BowtieWorker::getBaseSubdir() const {
return BASE_Bowtie_SUBDIR;
}
-DnaAssemblyToReferenceTask* BowtieWorker::getTask(const DnaAssemblyToRefTaskSettings &settings) const {
- return new BowtieTask(settings);
-}
-
void BowtieWorker::setGenomeIndex(DnaAssemblyToRefTaskSettings& settings){
settings.refSeqUrl = getValue<QString>(REFERENCE_GENOME);
diff --git a/src/plugins/external_tool_support/src/bowtie/BowtieWorker.h b/src/plugins/external_tool_support/src/bowtie/BowtieWorker.h
index 79fe66d..39a0bc3 100644
--- a/src/plugins/external_tool_support/src/bowtie/BowtieWorker.h
+++ b/src/plugins/external_tool_support/src/bowtie/BowtieWorker.h
@@ -39,7 +39,6 @@ protected:
QVariantMap getCustomParameters() const;
QString getDefaultFileName() const;
QString getBaseSubdir() const;
- DnaAssemblyToReferenceTask* getTask(const DnaAssemblyToRefTaskSettings &settings) const;
void setGenomeIndex(DnaAssemblyToRefTaskSettings& settings);
}; // BowtieWorker
diff --git a/src/plugins/external_tool_support/src/bowtie2/Bowtie2Task.cpp b/src/plugins/external_tool_support/src/bowtie2/Bowtie2Task.cpp
index b5c5808..f8b016b 100644
--- a/src/plugins/external_tool_support/src/bowtie2/Bowtie2Task.cpp
+++ b/src/plugins/external_tool_support/src/bowtie2/Bowtie2Task.cpp
@@ -33,7 +33,7 @@ namespace U2 {
// Bowtie2BuildIndexTask
Bowtie2BuildIndexTask::Bowtie2BuildIndexTask(const QString &referencePath, const QString &indexPath):
- Task("Build Bowtie2 index", TaskFlags_NR_FOSE_COSC),
+ ExternalToolSupportTask("Build Bowtie2 index", TaskFlags_NR_FOSE_COSC),
referencePath(referencePath),
indexPath(indexPath)
{
@@ -53,13 +53,14 @@ void Bowtie2BuildIndexTask::prepare() {
arguments.append(indexPath);
ExternalToolRunTask *task = new ExternalToolRunTask(ET_BOWTIE2_BUILD, arguments, new ExternalToolLogParser());
+ setListenerForTask(task);
addSubTask(task);
}
// Bowtie2AlignTask
Bowtie2AlignTask::Bowtie2AlignTask(const DnaAssemblyToRefTaskSettings &settings):
- Task("Bowtie2 reads assembly", TaskFlags_NR_FOSE_COSC),
+ ExternalToolSupportTask("Bowtie2 reads assembly", TaskFlags_NR_FOSE_COSC),
settings(settings)
{
}
@@ -194,6 +195,7 @@ void Bowtie2AlignTask::prepare() {
arguments.append(settings.resultFileName.getURLString());
ExternalToolRunTask *task = new ExternalToolRunTask(ET_BOWTIE2_ALIGN, arguments, new ExternalToolLogParser());
+ setListenerForTask(task);
addSubTask(task);
}
@@ -252,9 +254,11 @@ void Bowtie2Task::prepare() {
}
}
buildIndexTask = new Bowtie2BuildIndexTask(settings.refSeqUrl.getURLString(), indexFileName);
+ buildIndexTask->addListeners(QList <ExternalToolListener*>() << getListener(0));
}
if(!justBuildIndex) {
alignTask = new Bowtie2AlignTask(settings);
+ alignTask->addListeners(QList <ExternalToolListener*>() << getListener(1));
}
if (unzipTask != NULL) {
diff --git a/src/plugins/external_tool_support/src/bowtie2/Bowtie2Task.h b/src/plugins/external_tool_support/src/bowtie2/Bowtie2Task.h
index 8aded47..96dba4f 100644
--- a/src/plugins/external_tool_support/src/bowtie2/Bowtie2Task.h
+++ b/src/plugins/external_tool_support/src/bowtie2/Bowtie2Task.h
@@ -28,7 +28,7 @@
namespace U2 {
-class Bowtie2BuildIndexTask : public Task {
+class Bowtie2BuildIndexTask : public ExternalToolSupportTask {
Q_OBJECT
public:
Bowtie2BuildIndexTask(const QString &referencePath, const QString &indexPath);
@@ -39,7 +39,7 @@ private:
QString indexPath;
};
-class Bowtie2AlignTask : public Task {
+class Bowtie2AlignTask : public ExternalToolSupportTask {
Q_OBJECT
public:
Bowtie2AlignTask(const DnaAssemblyToRefTaskSettings &settings);
diff --git a/src/plugins/external_tool_support/src/bowtie2/Bowtie2Worker.cpp b/src/plugins/external_tool_support/src/bowtie2/Bowtie2Worker.cpp
index 5134151..c9ab98e 100644
--- a/src/plugins/external_tool_support/src/bowtie2/Bowtie2Worker.cpp
+++ b/src/plugins/external_tool_support/src/bowtie2/Bowtie2Worker.cpp
@@ -92,10 +92,6 @@ QString Bowtie2Worker::getBaseSubdir() const {
return BASE_Bowtie2_SUBDIR;
}
-DnaAssemblyToReferenceTask* Bowtie2Worker::getTask(const DnaAssemblyToRefTaskSettings &settings) const {
- return new Bowtie2Task(settings);
-}
-
void Bowtie2Worker::setGenomeIndex(DnaAssemblyToRefTaskSettings& settings){
settings.refSeqUrl = getValue<QString>(REFERENCE_GENOME);
diff --git a/src/plugins/external_tool_support/src/bowtie2/Bowtie2Worker.h b/src/plugins/external_tool_support/src/bowtie2/Bowtie2Worker.h
index fb31bba..f2cb255 100644
--- a/src/plugins/external_tool_support/src/bowtie2/Bowtie2Worker.h
+++ b/src/plugins/external_tool_support/src/bowtie2/Bowtie2Worker.h
@@ -40,7 +40,6 @@ protected :
QVariantMap getCustomParameters() const;
QString getDefaultFileName() const;
QString getBaseSubdir() const;
- DnaAssemblyToReferenceTask* getTask(const DnaAssemblyToRefTaskSettings &settings) const;
void setGenomeIndex(DnaAssemblyToRefTaskSettings& settings);
}; // Bowtie2Worker
diff --git a/src/plugins/external_tool_support/src/bwa/BwaMemWorker.cpp b/src/plugins/external_tool_support/src/bwa/BwaMemWorker.cpp
index b8a3d83..cc046a6 100644
--- a/src/plugins/external_tool_support/src/bwa/BwaMemWorker.cpp
+++ b/src/plugins/external_tool_support/src/bwa/BwaMemWorker.cpp
@@ -111,10 +111,6 @@ QString BwaMemWorker::getBaseSubdir() const {
return BASE_BWA_SUBDIR;
}
-DnaAssemblyToReferenceTask* BwaMemWorker::getTask(const DnaAssemblyToRefTaskSettings &settings) const {
- return new BwaTask(settings);
-}
-
void BwaMemWorker::setGenomeIndex(DnaAssemblyToRefTaskSettings& settings) {
settings.refSeqUrl = getValue<QString>(REFERENCE_GENOME);
settings.prebuiltIndex = DnaAssemblyToReferenceTask::isIndexUrl(settings.refSeqUrl.getURLString(), BwaTask::indexSuffixes);
diff --git a/src/plugins/external_tool_support/src/bwa/BwaMemWorker.h b/src/plugins/external_tool_support/src/bwa/BwaMemWorker.h
index 40740ca..9c717c4 100644
--- a/src/plugins/external_tool_support/src/bwa/BwaMemWorker.h
+++ b/src/plugins/external_tool_support/src/bwa/BwaMemWorker.h
@@ -40,7 +40,6 @@ protected:
QVariantMap getCustomParameters() const;
QString getDefaultFileName() const;
QString getBaseSubdir() const;
- DnaAssemblyToReferenceTask* getTask(const DnaAssemblyToRefTaskSettings &settings) const;
void setGenomeIndex(DnaAssemblyToRefTaskSettings& settings);
}; // BwaMemWorker
diff --git a/src/plugins/external_tool_support/src/bwa/BwaTask.cpp b/src/plugins/external_tool_support/src/bwa/BwaTask.cpp
index 20b1f7d..7345276 100644
--- a/src/plugins/external_tool_support/src/bwa/BwaTask.cpp
+++ b/src/plugins/external_tool_support/src/bwa/BwaTask.cpp
@@ -41,7 +41,7 @@ namespace U2 {
// BwaBuildIndexTask
BwaBuildIndexTask::BwaBuildIndexTask(const QString &referencePath, const QString &indexPath, const DnaAssemblyToRefTaskSettings &settings):
- Task("Build Bwa index", TaskFlags_NR_FOSCOE),
+ ExternalToolSupportTask("Build Bwa index", TaskFlags_NR_FOSCOE),
referencePath(referencePath),
indexPath(indexPath),
settings(settings)
@@ -60,6 +60,7 @@ void BwaBuildIndexTask::prepare() {
arguments.append(indexPath);
arguments.append(referencePath);
ExternalToolRunTask *task = new ExternalToolRunTask(ET_BWA, arguments, new LogParser());
+ setListenerForTask(task);
addSubTask(task);
}
@@ -87,7 +88,7 @@ void cleanupTempDir(const QStringList &tempDirFiles) {
}
BwaAlignTask::BwaAlignTask(const QString &indexPath, const QList<ShortReadSet>& shortReadSets, const QString &resultPath, const DnaAssemblyToRefTaskSettings &settings):
- Task("Bwa reads assembly", TaskFlags_NR_FOSCOE),
+ ExternalToolSupportTask("Bwa reads assembly", TaskFlags_NR_FOSCOE),
indexPath(indexPath),
readSets(shortReadSets),
resultPath(resultPath),
@@ -188,7 +189,8 @@ void BwaAlignTask::prepare() {
arguments.append( getSAIPath(currentReadSet.url.getURLString()));
arguments.append(indexPath);
arguments.append(currentReadSet.url.getURLString());
- Task* alignTask = new ExternalToolRunTask(ET_BWA, arguments, new LogParser(), NULL);
+ ExternalToolRunTask* alignTask = new ExternalToolRunTask(ET_BWA, arguments, new LogParser(), NULL);
+ setListenerForTask(alignTask);
alignTasks.append(alignTask);
}
alignMultiTask = new MultiTask(tr("Align reads with BWA Multitask"), alignTasks);
@@ -230,6 +232,7 @@ QList<Task *> BwaAlignTask::onSubTaskFinished(Task *subTask) {
arguments.append(currentReadsSet.url.getURLString());
}
ExternalToolRunTask *task = new ExternalToolRunTask(ET_BWA, arguments, new LogParser(), NULL);
+ setListenerForTask(task);
samTasks.append(task);
}
samMultiTask = new MultiTask(tr("Saming reads with BWA Multitask"), samTasks);
@@ -241,9 +244,10 @@ QList<Task *> BwaAlignTask::onSubTaskFinished(Task *subTask) {
}
//converting SAM -> BAM
QStringList bamUrlstoMerge;
+ int i = 0;
foreach(const QString &url, urlsToMerge) {
QFileInfo urlToConvertFileInfo(url);
- QString convertedBamUrl = urlToConvertFileInfo.dir().canonicalPath() + "/" + urlToConvertFileInfo.baseName() + ".bam";
+ QString convertedBamUrl = tmpDirPath + "/" + resultPathFileInfo.baseName() + "_" + QString::number(i) + ".bam";
BAMUtils::ConvertOption options(true);
BAMUtils::convertToSamOrBam(url, convertedBamUrl, options, stateInfo);
bamUrlstoMerge.append(convertedBamUrl);
@@ -251,6 +255,7 @@ QList<Task *> BwaAlignTask::onSubTaskFinished(Task *subTask) {
cleanupTempDir(urlsToMerge);
return result;
}
+ i++;
}
mergeTask = new MergeBamTask(bamUrlstoMerge, resultPathFileInfo.dir().canonicalPath(), resultPathFileInfo.baseName() + ".bam", true);
result.append(mergeTask);
@@ -300,7 +305,7 @@ void BwaAlignTask::LogParser::parseErrOutput(const QString &partOfLog) {
// BwaMemAlignTask
BwaMemAlignTask::BwaMemAlignTask(const QString &indexPath, const DnaAssemblyToRefTaskSettings &settings):
- Task("BWA MEM reads assembly", TaskFlags_NR_FOSCOE),
+ ExternalToolSupportTask("BWA MEM reads assembly", TaskFlags_NR_FOSCOE),
indexPath(indexPath),
resultPath(settings.resultFileName.getURLString()),
settings(settings)
@@ -402,6 +407,7 @@ void BwaMemAlignTask::prepare() {
QString::number(pairedReadsCounter++) + "." + resultFileInfo.completeSuffix();
alignTask->setStandartOutputFile(resultFilePathWithpartNumber);
}
+ setListenerForTask(alignTask);
alignTasks.append(alignTask);
} else if (settings.shortReadSets.size() > 1) {
arguments.append(currentReadSet.url.getURLString());
@@ -409,11 +415,13 @@ void BwaMemAlignTask::prepare() {
QString resultFilePathWithpartNumber = resultFileInfo.dir().canonicalPath() + "/" + resultFileInfo.baseName() + "_" +
QString::number(resultPartsCounter) + "." + resultFileInfo.completeSuffix();
alignTask->setStandartOutputFile(resultFilePathWithpartNumber);
+ setListenerForTask(alignTask);
alignTasks.append(alignTask);
} else {
arguments.append(currentReadSet.url.getURLString());
ExternalToolRunTask* alignTask = new ExternalToolRunTask(ET_BWA, arguments, new BwaAlignTask::LogParser(), NULL);
alignTask->setStandartOutputFile(settings.resultFileName.getURLString());
+ setListenerForTask(alignTask);
alignTasks.append(alignTask);
}
}
@@ -434,7 +442,7 @@ QList<Task *> BwaMemAlignTask::onSubTaskFinished(Task *subTask) {
for (int i = 0; i < partsCounter; i++) {
QString resultFilePathWithpartNumber = resultFileInfo.dir().canonicalPath() + "/" + resultFileInfo.baseName() + "_" +
QString::number(i) + "." + resultFileInfo.completeSuffix();
- QString bamFilePath = tmpDirPath + "/" + resultFileInfo.baseName() + ".bam";
+ QString bamFilePath = tmpDirPath + "/" + resultFileInfo.baseName() + "_" + QString::number(i) + ".bam";
BAMUtils::ConvertOption options(true);
BAMUtils::convertToSamOrBam(resultFilePathWithpartNumber, bamFilePath, options, stateInfo);
bamUrlstoMerge.append(bamFilePath);
@@ -459,7 +467,7 @@ QList<Task *> BwaMemAlignTask::onSubTaskFinished(Task *subTask) {
// BwaSwAlignTask
BwaSwAlignTask::BwaSwAlignTask(const QString &indexPath, const DnaAssemblyToRefTaskSettings &settings):
- Task("BWA SW reads assembly", TaskFlags_NR_FOSCOE),
+ ExternalToolSupportTask("BWA SW reads assembly", TaskFlags_NR_FOSCOE),
indexPath(indexPath),
settings(settings)
{
@@ -604,6 +612,7 @@ void BwaTask::prepare() {
}
if(!settings.prebuiltIndex) {
buildIndexTask = new BwaBuildIndexTask(settings.refSeqUrl.getURLString(), indexFileName, settings);
+ buildIndexTask->addListeners(QList <ExternalToolListener*>() << getListener(0));
}
int upStreamCount = 0;
int downStreamCount = 0;
@@ -621,6 +630,7 @@ void BwaTask::prepare() {
return;
}
alignTask = new BwaSwAlignTask(indexFileName, settings);
+ alignTask->addListeners(QList <ExternalToolListener*>() << getListener(1));
} else if (settings.getCustomValue(OPTION_MEM_ALIGNMENT, false) == true) {
if (downStreamCount != upStreamCount && settings.pairedReads) {
setError(tr("Please, provide same number of files with downstream and upstream reads."));
@@ -628,8 +638,10 @@ void BwaTask::prepare() {
}
alignTask = new BwaMemAlignTask(indexFileName, settings);
+ alignTask->addListeners(QList <ExternalToolListener*>() << getListener(1));
}else{
alignTask = new BwaAlignTask(indexFileName, settings.shortReadSets, settings.resultFileName.getURLString(), settings);
+ alignTask->addListeners(QList <ExternalToolListener*>() << getListener(1));
}
}
diff --git a/src/plugins/external_tool_support/src/bwa/BwaTask.h b/src/plugins/external_tool_support/src/bwa/BwaTask.h
index 46d3ec2..7a661af 100644
--- a/src/plugins/external_tool_support/src/bwa/BwaTask.h
+++ b/src/plugins/external_tool_support/src/bwa/BwaTask.h
@@ -31,7 +31,7 @@ namespace U2 {
class MultiTask;
-class BwaBuildIndexTask : public Task {
+class BwaBuildIndexTask : public ExternalToolSupportTask {
Q_OBJECT
public:
BwaBuildIndexTask(const QString &referencePath, const QString &indexPath, const DnaAssemblyToRefTaskSettings &settings);
@@ -51,7 +51,7 @@ private:
DnaAssemblyToRefTaskSettings settings;
};
-class BwaAlignTask : public Task {
+class BwaAlignTask : public ExternalToolSupportTask {
Q_OBJECT
public:
BwaAlignTask(const QString &indexPath, const QList<ShortReadSet>& shortReadSets, const QString &resultPath, const DnaAssemblyToRefTaskSettings &settings);
@@ -80,7 +80,7 @@ private:
inline QString getSAIPath(const QString& pathToReads);
};
-class BwaSwAlignTask : public Task {
+class BwaSwAlignTask : public ExternalToolSupportTask {
Q_OBJECT
public:
BwaSwAlignTask(const QString &indexPath, const DnaAssemblyToRefTaskSettings &settings);
@@ -91,7 +91,7 @@ private:
DnaAssemblyToRefTaskSettings settings;
};
-class BwaMemAlignTask : public Task {
+class BwaMemAlignTask : public ExternalToolSupportTask {
Q_OBJECT
public:
BwaMemAlignTask(const QString &indexPath, const DnaAssemblyToRefTaskSettings &settings);
@@ -194,7 +194,7 @@ public:
private:
BwaBuildIndexTask *buildIndexTask;
- Task *alignTask;
+ ExternalToolSupportTask *alignTask;
};
class BwaTaskFactory : public DnaAssemblyToRefTaskFactory {
diff --git a/src/plugins/external_tool_support/src/bwa/BwaWorker.cpp b/src/plugins/external_tool_support/src/bwa/BwaWorker.cpp
index 538d551..a902b88 100644
--- a/src/plugins/external_tool_support/src/bwa/BwaWorker.cpp
+++ b/src/plugins/external_tool_support/src/bwa/BwaWorker.cpp
@@ -112,10 +112,6 @@ QString BwaWorker::getBaseSubdir() const {
return BASE_BWA_SUBDIR;
}
-DnaAssemblyToReferenceTask* BwaWorker::getTask(const DnaAssemblyToRefTaskSettings &settings) const {
- return new BwaTask(settings);
-}
-
void BwaWorker::setGenomeIndex(DnaAssemblyToRefTaskSettings& settings) {
settings.refSeqUrl = getValue<QString>(REFERENCE_GENOME);
settings.prebuiltIndex = DnaAssemblyToReferenceTask::isIndexUrl(settings.refSeqUrl.getURLString(), BwaTask::indexSuffixes);
diff --git a/src/plugins/external_tool_support/src/bwa/BwaWorker.h b/src/plugins/external_tool_support/src/bwa/BwaWorker.h
index e5475c9..f4ac3e8 100644
--- a/src/plugins/external_tool_support/src/bwa/BwaWorker.h
+++ b/src/plugins/external_tool_support/src/bwa/BwaWorker.h
@@ -39,7 +39,6 @@ protected :
QVariantMap getCustomParameters() const;
QString getDefaultFileName() const;
QString getBaseSubdir() const;
- DnaAssemblyToReferenceTask* getTask(const DnaAssemblyToRefTaskSettings &settings) const;
void setGenomeIndex(DnaAssemblyToRefTaskSettings& settings);
}; // BwaWorker
diff --git a/src/plugins/external_tool_support/src/cap3/CAP3SupportDialog.cpp b/src/plugins/external_tool_support/src/cap3/CAP3SupportDialog.cpp
index fea7695..6af18d9 100644
--- a/src/plugins/external_tool_support/src/cap3/CAP3SupportDialog.cpp
+++ b/src/plugins/external_tool_support/src/cap3/CAP3SupportDialog.cpp
@@ -40,7 +40,7 @@ CAP3SupportDialog::CAP3SupportDialog(CAP3SupportTaskSettings& s, QWidget* parent
saveController(NULL)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470716");
+ new HelpButton(this, buttonBox, "18220576");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Run"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/external_tool_support/src/clustalo/ClustalOSupportRunDialog.cpp b/src/plugins/external_tool_support/src/clustalo/ClustalOSupportRunDialog.cpp
index 9f54518..aacd3fb 100644
--- a/src/plugins/external_tool_support/src/clustalo/ClustalOSupportRunDialog.cpp
+++ b/src/plugins/external_tool_support/src/clustalo/ClustalOSupportRunDialog.cpp
@@ -45,7 +45,7 @@ ClustalOSupportRunDialog::ClustalOSupportRunDialog(const MAlignment& _ma, Clusta
QDialog(_parent), ma(_ma), settings(_settings)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470740");
+ new HelpButton(this, buttonBox, "18220600");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Align"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
@@ -79,7 +79,7 @@ ClustalOWithExtFileSpecifySupportRunDialog::ClustalOWithExtFileSpecifySupportRun
saveController(NULL)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470722");
+ new HelpButton(this, buttonBox, "18220582");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Align"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/external_tool_support/src/clustalw/ClustalWSupportRunDialog.cpp b/src/plugins/external_tool_support/src/clustalw/ClustalWSupportRunDialog.cpp
index 74b4b60..55ea6b0 100644
--- a/src/plugins/external_tool_support/src/clustalw/ClustalWSupportRunDialog.cpp
+++ b/src/plugins/external_tool_support/src/clustalw/ClustalWSupportRunDialog.cpp
@@ -42,7 +42,7 @@ ClustalWSupportRunDialog::ClustalWSupportRunDialog(const MAlignment& _ma, Clusta
QDialog(_parent), ma(_ma), settings(_settings)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470729");
+ new HelpButton(this, buttonBox, "18220589");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Align"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
@@ -119,7 +119,7 @@ ClustalWWithExtFileSpecifySupportRunDialog::ClustalWWithExtFileSpecifySupportRun
saveController(NULL)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470729");
+ new HelpButton(this, buttonBox, "18220589");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Align"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/external_tool_support/src/cutadapt/CutadaptWorker.cpp b/src/plugins/external_tool_support/src/cutadapt/CutadaptWorker.cpp
index 518dbcf..4e9dace 100644
--- a/src/plugins/external_tool_support/src/cutadapt/CutadaptWorker.cpp
+++ b/src/plugins/external_tool_support/src/cutadapt/CutadaptWorker.cpp
@@ -140,7 +140,7 @@ void CutAdaptFastqWorkerFactory::init() {
"the adapter itself is trimmed."));
- Descriptor anywhere(ANYWHERE_URL, CutAdaptFastqWorker::tr("FASTA File with any end adapters"),
+ Descriptor anywhere(ANYWHERE_URL, CutAdaptFastqWorker::tr("FASTA file with 5' and 3' adapters"),
CutAdaptFastqWorker::tr("A FASTA file with one or multiple sequences of adapters that were ligated to the 5' end or 3' end."));
@@ -288,6 +288,7 @@ QStringList CutAdaptFastqTask::getParameters(U2OpStatus &/*os*/) {
res << "-f";
res << "fastq";
}
+ res << "-m" << "1";
res << settings.inputUrl;
@@ -301,7 +302,7 @@ void CutAdaptParser::parseErrOutput( const QString& partOfLog ) {
lastPartOfLog.first() = lastErrLine + lastPartOfLog.first();
lastErrLine = lastPartOfLog.takeLast();
QString error = parseTextForErrors(lastPartOfLog);
- if (error.isEmpty()) {
+ if (!error.isEmpty()) {
setLastError(error);
}
}
diff --git a/src/plugins/external_tool_support/src/hmmer/ConvertAlignment2StockholmTask.cpp b/src/plugins/external_tool_support/src/hmmer/ConvertAlignment2StockholmTask.cpp
new file mode 100644
index 0000000..18980c1
--- /dev/null
+++ b/src/plugins/external_tool_support/src/hmmer/ConvertAlignment2StockholmTask.cpp
@@ -0,0 +1,121 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <QCoreApplication>
+#include <QDir>
+#include <QFileInfo>
+
+#include <U2Core/AppContext.h>
+#include <U2Core/AppSettings.h>
+#include <U2Core/ExternalToolRunTask.h>
+#include <U2Core/LoadDocumentTask.h>
+#include <U2Core/MAlignmentObject.h>
+#include <U2Core/U2SafePoints.h>
+#include <U2Core/UserApplicationsSettings.h>
+
+#include "ConvertAlignment2StockholmTask.h"
+#include "utils/ExportTasks.h"
+
+namespace U2 {
+
+ConvertAlignment2Stockholm::ConvertAlignment2Stockholm(const QString &msaUrl, const QString &workingDir)
+ : Task(tr("Convert alignment to Stockholm format"), TaskFlags_NR_FOSE_COSC),
+ loadTask(NULL),
+ saveTask(NULL),
+ msaUrl(msaUrl),
+ workingDir(workingDir)
+{
+ SAFE_POINT_EXT(!msaUrl.isEmpty(), setError("Msa URL is empty"), );
+}
+
+const QString & ConvertAlignment2Stockholm::getResultUrl() const {
+ return resultUrl;
+}
+
+void ConvertAlignment2Stockholm::prepare() {
+ QVariantMap hints;
+ hints[DocumentReadingMode_SequenceAsAlignmentHint] = true;
+ loadTask = LoadDocumentTask::getDefaultLoadDocTask(msaUrl);
+ addSubTask(loadTask);
+}
+
+QList<Task *> ConvertAlignment2Stockholm::onSubTaskFinished(Task *subTask) {
+ QList<Task *> result;
+ CHECK_OP(stateInfo, result);
+
+ if (subTask == loadTask) {
+ prepareResultUrl();
+ CHECK_OP(stateInfo, result);
+
+ prepareSaveTask();
+ CHECK_OP(stateInfo, result);
+ result << saveTask;
+ }
+
+ return result;
+}
+
+namespace {
+
+const QString TEMP_DIR = "convert";
+
+QString getTaskTempDirName(const QString &prefix, Task *task) {
+ return prefix + QString::number(task->getTaskId()) + "_" +
+ QDate::currentDate().toString("dd.MM.yyyy") + "_" +
+ QTime::currentTime().toString("hh.mm.ss.zzz") + "_" +
+ QString::number(QCoreApplication::applicationPid());
+}
+
+}
+
+void ConvertAlignment2Stockholm::prepareResultUrl() {
+ if (workingDir.isEmpty()) {
+ QString tempDirName = getTaskTempDirName("convert_", this);
+ workingDir = AppContext::getAppSettings()->getUserAppsSettings()->getCurrentProcessTemporaryDirPath(TEMP_DIR) + "/" + tempDirName;
+ }
+ resultUrl = workingDir + "/" + QFileInfo(msaUrl).baseName() + ".sto";
+
+ QDir tempDir(workingDir);
+ if (tempDir.exists()){
+ ExternalToolSupportUtils::removeTmpDir(workingDir, stateInfo);
+ CHECK_OP(stateInfo, );
+ }
+ if (!tempDir.mkpath(workingDir)){
+ setError(tr("Cannot create a directory for temporary files."));
+ return;
+ }
+}
+
+void ConvertAlignment2Stockholm::prepareSaveTask() {
+ Document *document = loadTask->takeDocument();
+ QList<GObject *> objects = document->findGObjectByType(GObjectTypes::MULTIPLE_ALIGNMENT);
+ CHECK_EXT(!objects.isEmpty(), setError(tr("File doesn't contain any multiple alignments.")), );
+
+ if (1 < objects.size()) {
+ stateInfo.addWarning(tr("File contains several multiple alignments. Only the first one is saved to the result file."));
+ }
+
+ MAlignmentObject *maObject = qobject_cast<MAlignmentObject *>(objects.first());
+ saveTask = new SaveAlignmentTask(maObject->getMAlignment(), resultUrl, BaseDocumentFormats::STOCKHOLM);
+ saveTask->setSubtaskProgressWeight(50);
+}
+
+} // namespace U2
diff --git a/src/ugeneui/src/main_window/ShutdownTask.h b/src/plugins/external_tool_support/src/hmmer/ConvertAlignment2StockholmTask.h
similarity index 59%
copy from src/ugeneui/src/main_window/ShutdownTask.h
copy to src/plugins/external_tool_support/src/hmmer/ConvertAlignment2StockholmTask.h
index 4c8596e..972c89d 100644
--- a/src/ugeneui/src/main_window/ShutdownTask.h
+++ b/src/plugins/external_tool_support/src/hmmer/ConvertAlignment2StockholmTask.h
@@ -19,35 +19,37 @@
* MA 02110-1301, USA.
*/
-#ifndef _U2_SHUTDOWN_TASK_H_
-#define _U2_SHUTDOWN_TASK_H_
+#ifndef _U2_CONVERT_ALIGNMENT_2_STOCKHOLM_TASK_H_
+#define _U2_CONVERT_ALIGNMENT_2_STOCKHOLM_TASK_H_
#include <U2Core/Task.h>
namespace U2 {
-class Document;
-class MainWindowImpl;
+class LoadDocumentTask;
+class SaveAlignmentTask;
-class ShutdownTask : public Task {
- Q_OBJECT
+class ConvertAlignment2Stockholm : public Task {
public:
- ShutdownTask(MainWindowImpl* mw);
+ ConvertAlignment2Stockholm(const QString &msaUrl, const QString &workingDir);
+ const QString & getResultUrl() const;
+
+private:
void prepare();
+ QList<Task *> onSubTaskFinished(Task *subTask);
- ReportResult report();
+ void prepareResultUrl();
+ void prepareSaveTask();
-protected:
- virtual QList<Task*> onSubTaskFinished(Task* subTask);
+ LoadDocumentTask *loadTask;
+ SaveAlignmentTask *saveTask;
-private:
- MainWindowImpl* mw;
- bool docsToRemoveAreFetched;
- QList<Document *> docsToRemove;
+ const QString msaUrl;
+ QString workingDir;
+ QString resultUrl;
};
+} // namespace U2
-}//namespace
-
-#endif
+#endif // _U2_CONVERT_ALIGNMENT_2_STOCKHOLM_TASK_H_
diff --git a/src/plugins/external_tool_support/src/hmmer/HmmerBuildDialog.cpp b/src/plugins/external_tool_support/src/hmmer/HmmerBuildDialog.cpp
new file mode 100644
index 0000000..fba756e
--- /dev/null
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerBuildDialog.cpp
@@ -0,0 +1,243 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <QMessageBox>
+#include <QPushButton>
+
+#include <U2Core/AppContext.h>
+
+#include <U2Gui/DialogUtils.h>
+#include <U2Gui/HelpButton.h>
+#include <U2Gui/LastUsedDirHelper.h>
+#include <U2Gui/SaveDocumentController.h>
+#include <U2Gui/U2FileDialog.h>
+
+#include "HmmerBuildDialog.h"
+#include "HmmerBuildFromFileTask.h"
+#include "HmmerBuildFromMsaTask.h"
+
+namespace U2 {
+
+const QString HmmerBuildDialog::MA_FILES_DIR_ID = "uhmmer3_build_ma_files_dir";
+const QString HmmerBuildDialog::HMM_FILES_DIR_ID = "uhmmer3_build_hmm_files_dir";
+
+void HmmerBuildDialog::setSignalsAndSlots() {
+ QPushButton *okButton = buttonBox->button(QDialogButtonBox::Ok);
+ QPushButton *cancelButton = buttonBox->button(QDialogButtonBox::Cancel);
+
+ connect(maOpenFileButton, SIGNAL(clicked()), SLOT(sl_maOpenFileButtonClicked()));
+ connect(okButton, SIGNAL(clicked()), SLOT(sl_buildButtonClicked()));
+ connect(cancelButton, SIGNAL(clicked()), SLOT(sl_cancelButtonClicked()));
+ connect(mcFastRadioButton, SIGNAL(toggled(bool)), SLOT(sl_fastMCRadioButtonChanged(bool)));
+ connect(wblosumRSWRadioButton, SIGNAL(toggled(bool)), SLOT(sl_wblosumRSWRadioButtonChanged(bool)));
+ connect(eentESWRadioButton, SIGNAL(toggled(bool)), SLOT(sl_eentESWRadioButtonChanged(bool)));
+ connect(eclustESWRadioButton, SIGNAL(toggled(bool)), SLOT(sl_eclustESWRadioButtonChanged(bool)));
+ connect(esetESWRadioButton, SIGNAL(toggled(bool)), SLOT(sl_esetESWRadioButtonChanged(bool)));
+
+ //temporary disabling of strange label/spinbox
+ fragThreshDoubleSpinBox->setVisible(false);
+ fragthreshLabel->setVisible(false);
+}
+
+void HmmerBuildDialog::initialize() {
+ setupUi(this);
+ new HelpButton(this, buttonBox, "18220559");
+ buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Build"));
+ buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
+
+ initSaveController();
+ setModelValues(); // build settings are default here
+ setSignalsAndSlots();
+}
+
+void HmmerBuildDialog::initSaveController() {
+ SaveDocumentControllerConfig config;
+ config.defaultDomain = HMM_FILES_DIR_ID;
+ config.defaultFormatId = "hmm";
+ config.fileDialogButton = outHmmfileToolButton;
+ config.fileNameEdit = outHmmfileEdit;
+ config.parentWidget = this;
+ config.saveTitle = tr("Select hmm file to create");
+
+ SaveDocumentController::SimpleFormatsInfo formatsInfo;
+ formatsInfo.addFormat("hmm", "HMM profile", QStringList() << "hmm");
+
+ saveController = new SaveDocumentController(config, formatsInfo, this);
+}
+
+HmmerBuildDialog::HmmerBuildDialog(const MAlignment &ma, QWidget * parent)
+ : QDialog(parent),
+ saveController(NULL)
+{
+ initialize();
+ model.alignment = ma;
+ model.alignmentUsing = !model.alignment.isEmpty();
+
+ if (model.alignmentUsing) {
+ maLoadFromFileEdit->hide();
+ maLoadFromFileLabel->hide();
+ maOpenFileButton->hide();
+ }
+}
+
+void HmmerBuildDialog::setModelValues() {
+ symfracDoubleSpinBox->setValue(model.buildSettings.symfrac);
+ widRSWDoubleSpinBox->setValue(model.buildSettings.wid);
+ eidESWDoubleSpinBox->setValue(model.buildSettings.eid);
+ esetESWDoubleSpinBox->setValue(model.buildSettings.eset);
+ emlSpinBox->setValue(model.buildSettings.eml);
+ emnSpinBox->setValue(model.buildSettings.emn);
+ evlSpinBox->setValue(model.buildSettings.evl);
+ evnSpinBox->setValue(model.buildSettings.evn);
+ eflSpinBox->setValue(model.buildSettings.efl);
+ efnSpinBox->setValue(model.buildSettings.efn);
+ eftDoubleSpinBox->setValue(model.buildSettings.eft);
+ seedSpinBox->setValue(model.buildSettings.seed);
+ esigmaDoubleSpinBox->setValue(model.buildSettings.esigma);
+ fragThreshDoubleSpinBox->setValue(model.buildSettings.fragtresh);
+}
+
+void HmmerBuildDialog::sl_maOpenFileButtonClicked() {
+ LastUsedDirHelper helper(MA_FILES_DIR_ID);
+ helper.url = U2FileDialog::getOpenFileName(this, tr("Select multiple alignment file"),
+ helper, DialogUtils::prepareDocumentsFileFilterByObjType(GObjectTypes::MULTIPLE_ALIGNMENT, true));
+ if (!helper.url.isEmpty()) {
+ maLoadFromFileEdit->setText(helper.url);
+ }
+}
+
+void HmmerBuildDialog::getModelValues() {
+ model.buildSettings.symfrac = symfracDoubleSpinBox->value();
+ model.buildSettings.wid = widRSWDoubleSpinBox->value();
+ model.buildSettings.eid = eidESWDoubleSpinBox->value();
+ model.buildSettings.eset = esetESWDoubleSpinBox->value();
+ model.buildSettings.eml = emlSpinBox->value();
+ model.buildSettings.emn = emnSpinBox->value();
+ model.buildSettings.evl = evlSpinBox->value();
+ model.buildSettings.evn = evnSpinBox->value();
+ model.buildSettings.efl = eflSpinBox->value();
+ model.buildSettings.efn = efnSpinBox->value();
+ model.buildSettings.eft = eftDoubleSpinBox->value();
+ model.buildSettings.seed = seedSpinBox->value();
+ model.buildSettings.esigma = esigmaDoubleSpinBox->value();
+ model.buildSettings.fragtresh = fragThreshDoubleSpinBox->value();
+ if (0 != ereESWDoubleSpinBox->value()) {
+ model.buildSettings.ere = ereESWDoubleSpinBox->value();
+ }
+
+ if (mcFastRadioButton->isChecked()) {
+ model.buildSettings.modelConstructionStrategy = HmmerBuildSettings::p7_ARCH_FAST;
+ } else {
+ model.buildSettings.modelConstructionStrategy = HmmerBuildSettings::p7_ARCH_HAND;
+ }
+
+ if (wgscRSWRadioButton->isChecked()) {
+ model.buildSettings.relativeSequenceWeightingStrategy = HmmerBuildSettings::p7_WGT_GSC;
+ } else if (wblosumRSWRadioButton->isChecked()) {
+ model.buildSettings.relativeSequenceWeightingStrategy = HmmerBuildSettings::p7_WGT_BLOSUM;
+ } else if (wpbRSWRadioButton->isChecked()) {
+ model.buildSettings.relativeSequenceWeightingStrategy = HmmerBuildSettings::p7_WGT_PB;
+ } else if (wnoneRSWRadioButton->isChecked()) {
+ model.buildSettings.relativeSequenceWeightingStrategy = HmmerBuildSettings::p7_WGT_NONE;
+ } else if (wgivenRSWRadioButton->isChecked()) {
+ model.buildSettings.relativeSequenceWeightingStrategy = HmmerBuildSettings::p7_WGT_GIVEN;
+ } else {
+ assert(false);
+ }
+
+ if (eentESWRadioButton->isChecked()) {
+ model.buildSettings.effectiveSequenceWeightingStrategy = HmmerBuildSettings::p7_EFFN_ENTROPY;
+ } else if (eclustESWRadioButton->isChecked()) {
+ model.buildSettings.effectiveSequenceWeightingStrategy = HmmerBuildSettings::p7_EFFN_CLUST;
+ } else if (enoneESWRadioButton->isChecked()) {
+ model.buildSettings.effectiveSequenceWeightingStrategy = HmmerBuildSettings::p7_EFFN_NONE;
+ } else if (esetESWRadioButton->isChecked()) {
+ model.buildSettings.effectiveSequenceWeightingStrategy = HmmerBuildSettings::p7_EFFN_SET;
+ } else {
+ assert(false);
+ }
+
+ model.buildSettings.profileUrl = saveController->getSaveFileName();
+ model.inputFile = maLoadFromFileEdit->text();
+}
+
+QString HmmerBuildDialog::checkModel() {
+// assert(checkUHMM3BuildSettings(&model.buildSettings.inner));
+ if (!model.alignmentUsing && model.inputFile.isEmpty()) {
+ return tr("input file is empty");
+ }
+ if (model.buildSettings.profileUrl.isEmpty()) {
+ return tr("output hmm file is empty");
+ }
+ return QString();
+}
+
+void HmmerBuildDialog::sl_buildButtonClicked() {
+ getModelValues();
+ QString err = checkModel();
+ if (!err.isEmpty()) {
+ QMessageBox::critical(this, tr("Error: bad arguments!"), err);
+ return;
+ }
+
+ Task * buildTask = NULL;
+ if (model.alignmentUsing) {
+ buildTask = new HmmerBuildFromMsaTask(model.buildSettings, model.alignment);
+ } else {
+ buildTask = new HmmerBuildFromFileTask(model.buildSettings, model.inputFile);
+ }
+ assert(NULL != buildTask);
+
+ AppContext::getTaskScheduler()->registerTopLevelTask(buildTask);
+ QDialog::accept();
+}
+
+void HmmerBuildDialog::sl_cancelButtonClicked() {
+ reject();
+}
+
+void HmmerBuildDialog::sl_fastMCRadioButtonChanged(bool checked) {
+ mcFastSymfracLabel->setEnabled(checked);
+ symfracDoubleSpinBox->setEnabled(checked);
+}
+
+void HmmerBuildDialog::sl_wblosumRSWRadioButtonChanged(bool checked) {
+ widRSWLabel->setEnabled(checked);
+ widRSWDoubleSpinBox->setEnabled(checked);
+}
+
+void HmmerBuildDialog::sl_eentESWRadioButtonChanged(bool checked) {
+ ereESWDoubleSpinBox->setEnabled(checked);
+ esigmaDoubleSpinBox->setEnabled(checked);
+ esigmaLabel->setEnabled(checked);
+ ereLabel->setEnabled(checked);
+}
+
+void HmmerBuildDialog::sl_eclustESWRadioButtonChanged(bool checked) {
+ eidESWLabel->setEnabled(checked);
+ eidESWDoubleSpinBox->setEnabled(checked);
+}
+
+void HmmerBuildDialog::sl_esetESWRadioButtonChanged(bool checked) {
+ esetESWDoubleSpinBox->setEnabled(checked);
+}
+
+} // namespace U2
diff --git a/src/plugins_3rdparty/hmm3/src/build/uHMM3BuildDialogImpl.h b/src/plugins/external_tool_support/src/hmmer/HmmerBuildDialog.h
similarity index 64%
rename from src/plugins_3rdparty/hmm3/src/build/uHMM3BuildDialogImpl.h
rename to src/plugins/external_tool_support/src/hmmer/HmmerBuildDialog.h
index 7826257..6f7f757 100644
--- a/src/plugins_3rdparty/hmm3/src/build/uHMM3BuildDialogImpl.h
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerBuildDialog.h
@@ -19,60 +19,60 @@
* MA 02110-1301, USA.
*/
-#ifndef _GB2_UHMM3_BUILD_DLG_IMPL_H_
-#define _GB2_UHMM3_BUILD_DLG_IMPL_H_
+#ifndef _U2_HMMER_BUILD_DIALOG_H_
+#define _U2_HMMER_BUILD_DIALOG_H_
#include <QDialog>
-#include "uHMM3BuildTask.h"
-#include <ui_UHMM3BuildDialog.h>
+
+#include <U2Core/MAlignment.h>
+
+#include "HmmerBuildTask.h"
+#include "ui_HmmerBuildDialog.h"
namespace U2 {
class SaveDocumentController;
-struct UHMM3BuildDialogModel {
- UHMM3BuildTaskSettings buildSettings;
+class UHMM3BuildDialogModel {
+public:
+ HmmerBuildSettings buildSettings;
/* one of this is used */
QString inputFile;
MAlignment alignment;
bool alignmentUsing;
-
-}; // UHMM3BuildDialogModel
+};
-class UHMM3BuildDialogImpl : public QDialog, public Ui_UHMM3BuildDialog {
-Q_OBJECT
+class HmmerBuildDialog : public QDialog, public Ui_HmmerBuildDialog {
+ Q_OBJECT
public:
+ HmmerBuildDialog(const MAlignment &ma, QWidget *parent = NULL);
+
static const QString MA_FILES_DIR_ID;
static const QString HMM_FILES_DIR_ID;
-
-public:
- UHMM3BuildDialogImpl( const MAlignment & ma, QWidget * p = NULL );
-
+
+private slots:
+ void sl_maOpenFileButtonClicked();
+ void sl_buildButtonClicked();
+ void sl_cancelButtonClicked();
+ void sl_fastMCRadioButtonChanged(bool checked);
+ void sl_wblosumRSWRadioButtonChanged(bool checked);
+ void sl_eentESWRadioButtonChanged(bool checked);
+ void sl_eclustESWRadioButtonChanged(bool changed);
+ void sl_esetESWRadioButtonChanged(bool checked);
+
private:
void setModelValues();
void getModelValues();
- QString checkModel(); /* return error or empty string */
+ QString checkModel(); // returns error or empty string
void setSignalsAndSlots();
void initialize();
void initSaveController();
-
-private slots:
- void sl_maOpenFileButtonClicked();
- void sl_buildButtonClicked();
- void sl_cancelButtonClicked();
- void sl_fastMCRadioButtonChanged( bool checked );
- void sl_wblosumRSWRadioButtonChanged( bool checked );
- void sl_eentESWRadioButtonChanged( bool checked );
- void sl_eclustESWRadioButtonChanged( bool changed );
- void sl_esetESWRadioButtonChanged( bool checked );
-
-private:
+
UHMM3BuildDialogModel model;
SaveDocumentController *saveController;
-
-}; // UHMM3BuildDialogImpl
+};
-} // U2
+} // namespace U2
-#endif // _GB2_UHMM3_BUILD_DLG_IMPL_H_
+#endif // _U2_HMMER_BUILD_DIALOG_H_
diff --git a/src/plugins_3rdparty/hmm3/src/build/UHMM3BuildDialog.ui b/src/plugins/external_tool_support/src/hmmer/HmmerBuildDialog.ui
similarity index 99%
rename from src/plugins_3rdparty/hmm3/src/build/UHMM3BuildDialog.ui
rename to src/plugins/external_tool_support/src/hmmer/HmmerBuildDialog.ui
index 0188fa4..423dc18 100644
--- a/src/plugins_3rdparty/hmm3/src/build/UHMM3BuildDialog.ui
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerBuildDialog.ui
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
- <class>UHMM3BuildDialog</class>
- <widget class="QDialog" name="UHMM3BuildDialog">
+ <class>HmmerBuildDialog</class>
+ <widget class="QDialog" name="HmmerBuildDialog">
<property name="geometry">
<rect>
<x>0</x>
diff --git a/src/plugins/external_tool_support/src/hmmer/HmmerBuildFromFileTask.cpp b/src/plugins/external_tool_support/src/hmmer/HmmerBuildFromFileTask.cpp
new file mode 100644
index 0000000..5fe0158
--- /dev/null
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerBuildFromFileTask.cpp
@@ -0,0 +1,105 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <U2Core/BaseDocumentFormats.h>
+#include <U2Core/DocumentUtils.h>
+#include <U2Core/U2OpStatusUtils.h>
+#include <U2Core/U2SafePoints.h>
+
+#include "ConvertAlignment2StockholmTask.h"
+#include "HmmerBuildFromFileTask.h"
+
+namespace U2 {
+
+HmmerBuildFromFileTask::HmmerBuildFromFileTask(const HmmerBuildSettings &settings, const QString &msaUrl)
+ : ExternalToolSupportTask(tr("Build HMMER profile from file"), TaskFlags_NR_FOSE_COSC | TaskFlag_ReportingIsEnabled | TaskFlag_ReportingIsSupported),
+ convertTask(NULL),
+ buildTask(NULL),
+ settings(settings),
+ msaUrl(msaUrl)
+{
+ SAFE_POINT_EXT(!msaUrl.isEmpty(), tr("Msa URL is empty"), );
+}
+
+const QString & HmmerBuildFromFileTask::getHmmProfileUrl() const {
+ return settings.profileUrl;
+}
+
+void HmmerBuildFromFileTask::prepare() {
+ if (!isStockholm()) {
+ prepareConvertTask();
+ addSubTask(convertTask);
+ } else {
+ prepareBuildTask(msaUrl);
+ addSubTask(buildTask);
+ }
+}
+
+QList<Task *> HmmerBuildFromFileTask::onSubTaskFinished(Task *subTask) {
+ QList<Task *> result;
+ CHECK_OP(stateInfo, result);
+
+ if (subTask == convertTask) {
+ prepareBuildTask(convertTask->getResultUrl());
+ result << buildTask;
+ } else if (subTask == buildTask) {
+ removeTempDir();
+ }
+
+ return result;
+}
+
+Task::ReportResult HmmerBuildFromFileTask::report() {
+ if (NULL != convertTask) {
+ QFile(convertTask->getResultUrl()).remove();
+ }
+ return ReportResult_Finished;
+}
+
+QString HmmerBuildFromFileTask::generateReport() const {
+ return HmmerBuildTask::getReport(this, settings, msaUrl);
+}
+
+bool HmmerBuildFromFileTask::isStockholm() {
+ QString formatId;
+ DocumentUtils::detectFormat(msaUrl, formatId);
+ return formatId == BaseDocumentFormats::STOCKHOLM;
+}
+
+void HmmerBuildFromFileTask::prepareConvertTask() {
+ convertTask = new ConvertAlignment2Stockholm(msaUrl, settings.workingDir);
+ convertTask->setSubtaskProgressWeight(10);
+}
+
+void HmmerBuildFromFileTask::prepareBuildTask(const QString &stockholmMsaUrl) {
+ buildTask = new HmmerBuildTask(settings, stockholmMsaUrl);
+ setListenerForTask(buildTask);
+ buildTask->setSubtaskProgressWeight(90);
+}
+
+void HmmerBuildFromFileTask::removeTempDir() {
+ if (settings.workingDir.isEmpty()) {
+ U2OpStatusImpl os;
+ ExternalToolSupportUtils::removeTmpDir(settings.workingDir, os);
+ }
+}
+
+} // namespace U2
diff --git a/src/plugins_3rdparty/hmm3/src/task_local_storage/uHMMSearchTaskLocalStorage.h b/src/plugins/external_tool_support/src/hmmer/HmmerBuildFromFileTask.h
similarity index 51%
rename from src/plugins_3rdparty/hmm3/src/task_local_storage/uHMMSearchTaskLocalStorage.h
rename to src/plugins/external_tool_support/src/hmmer/HmmerBuildFromFileTask.h
index ca8b845..162d2bb 100644
--- a/src/plugins_3rdparty/hmm3/src/task_local_storage/uHMMSearchTaskLocalStorage.h
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerBuildFromFileTask.h
@@ -19,38 +19,40 @@
* MA 02110-1301, USA.
*/
-#ifndef _GB2_HMMER3_SEARCH_TLS_H_
-#define _GB2_HMMER3_SEARCH_TLS_H_
+#ifndef _U2_HMMER_BUILD_FROM_FILE_TASK_H_
+#define _U2_HMMER_BUILD_FROM_FILE_TASK_H_
-#include <QtCore/QHash>
-#include <QtCore/QMutex>
-#include <QtCore/QThreadStorage>
-
-#include "uHMMSearchTaskLocalData.h"
+#include "HmmerBuildTask.h"
namespace U2 {
-struct ContextId {
- ContextId( qint64 what ) : id( what ) {}
- qint64 id;
-}; // ContextId
+class ConvertAlignment2Stockholm;
-class UHMM3SearchTaskLocalStorage {
+class HmmerBuildFromFileTask : public ExternalToolSupportTask {
public:
- static const UHMM3SearchTaskLocalData* current();
-
- static UHMM3SearchTaskLocalData* createTaskContext( qint64 ctxId );
-
- static void freeTaskContext( qint64 ctxId );
-
+ HmmerBuildFromFileTask(const HmmerBuildSettings &settigngs, const QString &msaUrl);
+
+ const QString & getHmmProfileUrl() const;
+
private:
- static QHash< qint64, UHMM3SearchTaskLocalData* > data;
- static QThreadStorage< ContextId* > tls;
- static QMutex mutex;
- static const UHMM3SearchTaskLocalData defaultData;
-
-}; // UHMMSearchTaskLocalStorage
+ void prepare();
+ QList<Task *> onSubTaskFinished(Task *subTask);
+ ReportResult report();
+ QString generateReport() const;
+
+ bool isStockholm();
+ void prepareConvertTask();
+ void prepareBuildTask(const QString &stockholmMsaUrl);
+
+ void removeTempDir();
+
+ ConvertAlignment2Stockholm *convertTask;
+ HmmerBuildTask *buildTask;
+
+ const HmmerBuildSettings settings;
+ const QString msaUrl;
+};
-} // U2
+} // namespace U2
-#endif // _GB2_HMMER3_SEARCH_TLS_H_
+#endif // _U2_HMMER_BUILD_FROM_FILE_TASK_H_
diff --git a/src/plugins/external_tool_support/src/hmmer/HmmerBuildFromMsaTask.cpp b/src/plugins/external_tool_support/src/hmmer/HmmerBuildFromMsaTask.cpp
new file mode 100644
index 0000000..bc200de
--- /dev/null
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerBuildFromMsaTask.cpp
@@ -0,0 +1,116 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <QCoreApplication>
+#include <QDir>
+
+#include <U2Core/AppContext.h>
+#include <U2Core/AppSettings.h>
+#include <U2Core/GUrlUtils.h>
+#include <U2Core/L10n.h>
+#include <U2Core/U2OpStatusUtils.h>
+#include <U2Core/UserApplicationsSettings.h>
+
+#include "HmmerBuildFromMsaTask.h"
+#include "utils/ExportTasks.h"
+
+namespace U2 {
+
+HmmerBuildFromMsaTask::HmmerBuildFromMsaTask(const HmmerBuildSettings &settings, const MAlignment &msa)
+ : ExternalToolSupportTask(tr("Build HMMER profile from msa"), TaskFlags_NR_FOSE_COSC | TaskFlag_ReportingIsEnabled | TaskFlag_ReportingIsSupported),
+ settings(settings),
+ msa(msa),
+ saveTask(NULL),
+ hmmerTask(NULL),
+ removeWorkingDir(false)
+{
+ SAFE_POINT_EXT(!settings.profileUrl.isEmpty(), setError(tr("HMM profile URL is empty")), );
+}
+
+const QString & HmmerBuildFromMsaTask::getHmmUrl() const {
+ return settings.profileUrl;
+}
+
+namespace {
+
+const QString PHMMER_TEMP_DIR = "hmmer";
+
+QString getTaskTempDirName(const QString &prefix, Task *task) {
+ return prefix + QString::number(task->getTaskId()) + "_" +
+ QDate::currentDate().toString("dd.MM.yyyy") + "_" +
+ QTime::currentTime().toString("hh.mm.ss.zzz") + "_" +
+ QString::number(QCoreApplication::applicationPid());
+}
+
+}
+
+void HmmerBuildFromMsaTask::prepare() {
+ prepareWorkingDir();
+ CHECK_OP(stateInfo, );
+
+ QString msaUrl = settings.workingDir + "/" + GUrlUtils::fixFileName(msa.getName()) + ".sto";
+ saveTask = new SaveAlignmentTask(msa, msaUrl, BaseDocumentFormats::STOCKHOLM);
+ saveTask->setSubtaskProgressWeight(5);
+ addSubTask(saveTask);
+}
+
+QList<Task *> HmmerBuildFromMsaTask::onSubTaskFinished(Task *subTask) {
+ QList<Task*> result;
+ CHECK_OP(stateInfo, result);
+ if (saveTask == subTask) {
+ hmmerTask = new HmmerBuildTask(settings, saveTask->getUrl());
+ setListenerForTask(hmmerTask);
+ hmmerTask->setSubtaskProgressWeight(95);
+ result << hmmerTask;
+ }
+ return result;
+}
+
+QString HmmerBuildFromMsaTask::generateReport() const {
+ return HmmerBuildTask::getReport(this, settings, "");
+}
+
+void HmmerBuildFromMsaTask::prepareWorkingDir() {
+ if (settings.workingDir.isEmpty()) {
+ QString tempDirName = getTaskTempDirName("hmmer_build_", this);
+ settings.workingDir = AppContext::getAppSettings()->getUserAppsSettings()->getCurrentProcessTemporaryDirPath(PHMMER_TEMP_DIR) + "/" + tempDirName;
+ removeWorkingDir = true;
+ }
+
+ QDir workingDir(settings.workingDir);
+ if (workingDir.exists()){
+ ExternalToolSupportUtils::removeTmpDir(settings.workingDir, stateInfo);
+ CHECK_OP(stateInfo, );
+ }
+ if (!workingDir.mkpath(settings.workingDir)){
+ setError(tr("Cannot create a directory for temporary files."));
+ return;
+ }
+}
+
+void HmmerBuildFromMsaTask::removeTempDir() {
+ if (removeWorkingDir) {
+ U2OpStatusImpl os;
+ ExternalToolSupportUtils::removeTmpDir(settings.workingDir, os);
+ }
+}
+
+} // namespace U2
diff --git a/src/plugins_3rdparty/hmm3/src/build/uhmm3build.h b/src/plugins/external_tool_support/src/hmmer/HmmerBuildFromMsaTask.h
similarity index 57%
rename from src/plugins_3rdparty/hmm3/src/build/uhmm3build.h
rename to src/plugins/external_tool_support/src/hmmer/HmmerBuildFromMsaTask.h
index 4a465c9..dff57a2 100644
--- a/src/plugins_3rdparty/hmm3/src/build/uhmm3build.h
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerBuildFromMsaTask.h
@@ -19,25 +19,38 @@
* MA 02110-1301, USA.
*/
-#ifndef _GB2_UHMM3_BUILD_H_
-#define _GB2_UHMM3_BUILD_H_
-
-#include <QtCore/QObject>
+#ifndef _U2_HMMER_BUILD_FROM_MSA_TASK_H_
+#define _U2_HMMER_BUILD_FROM_MSA_TASK_H_
#include <U2Core/MAlignment.h>
-#include <U2Core/Task.h>
-#include <hmmer3/hmmer.h>
+#include "HmmerBuildTask.h"
namespace U2 {
-class UHMM3Build : public QObject {
+class HmmerBuildFromMsaTask : public ExternalToolSupportTask {
Q_OBJECT
public:
- static P7_HMM * build( const MAlignment & msa, const UHMM3BuildSettings & settings ,TaskStateInfo & ti );
-
-}; // UHMM3Build
+ HmmerBuildFromMsaTask(const HmmerBuildSettings &settings, const MAlignment &msa);
+
+ const QString & getHmmUrl() const;
+
+private:
+ void prepare();
+ QList<Task *> onSubTaskFinished(Task *subTask);
+ QString generateReport() const;
+
+ void prepareWorkingDir();
+ void removeTempDir();
+
+ HmmerBuildSettings settings;
+ const MAlignment msa;
+
+ SaveAlignmentTask *saveTask;
+ HmmerBuildTask *hmmerTask;
+ bool removeWorkingDir;
+};
-} // U2
+} // namespace U2
-#endif // _GB2_UHMM3_BUILD_H_
+#endif // _U2_HMMER_BUILD_FROM_MSA_TASK_H_
diff --git a/src/plugins/external_tool_support/src/hmmer/HmmerBuildSettings.cpp b/src/plugins/external_tool_support/src/hmmer/HmmerBuildSettings.cpp
new file mode 100644
index 0000000..94bb48e
--- /dev/null
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerBuildSettings.cpp
@@ -0,0 +1,71 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <U2Core/U2SafePoints.h>
+
+#include "HmmerBuildSettings.h"
+
+namespace U2 {
+
+HmmerBuildSettings::HmmerBuildSettings()
+ : modelConstructionStrategy(p7_ARCH_FAST),
+ relativeSequenceWeightingStrategy(p7_WGT_PB),
+ effectiveSequenceWeightingStrategy(p7_EFFN_ENTROPY),
+ eset(-1.0),
+ seed(42),
+ symfrac(0.5),
+ fragtresh(0.5),
+ wid(0.62),
+ ere(-1.0),
+ esigma(45.0),
+ eid(0.62),
+ eml(200),
+ emn(200),
+ evl(200),
+ evn(200),
+ efl(100),
+ efn(200),
+ eft(0.04)
+{
+
+}
+
+bool HmmerBuildSettings::validate() const {
+ CHECK(0 <= symfrac && symfrac <= 1, false);
+ CHECK(0 <= wid && wid <= 1, false);
+ CHECK(0 < eset || effectiveSequenceWeightingStrategy != p7_EFFN_SET, false);
+ CHECK(-1 == ere || 0 < ere, false);
+ CHECK(0 <= fragtresh && fragtresh <= 1, false);
+ CHECK(0 < esigma, false);
+ CHECK(0 <= eid && eid <= 1, false);
+ CHECK(0 < eml, false);
+ CHECK(0 < emn, false);
+ CHECK(0 < evl, false);
+ CHECK(0 < evn, false);
+ CHECK(0 < efl, false);
+ CHECK(0 < efn, false);
+ CHECK(0 < wid && wid < 1, false);
+ CHECK(0 <= seed, false);
+
+ return true;
+}
+
+} // namespace U2
diff --git a/src/plugins/external_tool_support/src/hmmer/HmmerBuildSettings.h b/src/plugins/external_tool_support/src/hmmer/HmmerBuildSettings.h
new file mode 100644
index 0000000..adfe17d
--- /dev/null
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerBuildSettings.h
@@ -0,0 +1,83 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef _U2_HMMER_BUILD_SETTINGS_H_
+#define _U2_HMMER_BUILD_SETTINGS_H_
+
+#include <QString>
+
+namespace U2 {
+
+class HmmerBuildSettings {
+public:
+ enum p7_archchoice_e { // model construction strategies
+ p7_ARCH_FAST, // --fast: assign cols >= symfrac residues as consensus
+ p7_ARCH_HAND // --hand: manual construction ( requires reference annotation )
+ };
+
+ enum p7_wgtchoice_e { // relative sequence weighting strategies
+ p7_WGT_NONE, // --wnone: don't do any relative weighting ( set all to 1 )
+ p7_WGT_GIVEN, // --wgiven: use weights as given in msa file
+ p7_WGT_GSC, // --wgsc: Gerstein/Sonnhammer/Chotia tree weights
+ p7_WGT_PB, // --wpb: Henikoff position-based weigths
+ p7_WGT_BLOSUM // --wblosum: Henikoff simple filter weights
+ };
+
+ enum p7_effnchoice_e { // effective sequence weighting strategies
+ p7_EFFN_NONE, // --enone: no effective seq # weighting: just use nseq
+ p7_EFFN_SET, // --eset: seq eff seq # for all models
+ p7_EFFN_CLUST, // --eclust: eff seq # is # of single linkage clusters
+ p7_EFFN_ENTROPY // --eent: adjust eff seq # to achieve relative entropy target
+ };
+
+ HmmerBuildSettings();
+
+ bool validate() const;
+
+ p7_archchoice_e modelConstructionStrategy;
+ p7_wgtchoice_e relativeSequenceWeightingStrategy;
+ p7_effnchoice_e effectiveSequenceWeightingStrategy;
+
+ double eset; // --eset argument
+
+ int seed; // --seed argument
+
+ float symfrac; // --symfrac. sets sym fraction controlling --fast construction
+ float fragtresh; // --fragtresh. if L < x<L>, tag sequence as a fragment
+ double wid; // --wid. for --wblosum: set identity cutoff
+ double ere; // --ere. for --eent:set target relative entropy
+ double esigma; // --esigma. for --eent: set sigma param to <x>
+ double eid; // --eid. for --eclust. set fractional identity cutoff
+ int eml; // --EmL. length of sequences for MSV Gumbel mu fit
+ int emn; // --EmN. number of sequences for MSV Gumbel mu fit
+ int evl; // --EvL. length of sequences for Viterbi Gumbel mu fit
+ int evn; // --EvN. number of sequences for Viterbi Gumbel mu fit
+ int efl; // --EfL. length of sequences for forward exp tail mu fit
+ int efn; // --EfN. number of sequences for forward exp tail mu fit
+ double eft; // --Eft. tail mass for forward exponential tail mu fit
+
+ QString workingDir;
+ QString profileUrl;
+};
+
+} // namespace U2
+
+#endif // _U2_HMMER_BUILD_SETTINGS_H_
diff --git a/src/plugins/external_tool_support/src/hmmer/HmmerBuildTask.cpp b/src/plugins/external_tool_support/src/hmmer/HmmerBuildTask.cpp
new file mode 100644
index 0000000..76e8ce5
--- /dev/null
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerBuildTask.cpp
@@ -0,0 +1,204 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <U2Core/AppContext.h>
+#include <U2Core/AppSettings.h>
+#include <U2Core/AppResources.h>
+#include <U2Core/Counter.h>
+#include <U2Core/GUrlUtils.h>
+
+#include "HmmerSupport.h"
+#include "HmmerBuildTask.h"
+
+namespace U2 {
+
+HmmerBuildTask::HmmerBuildTask(const HmmerBuildSettings &settings, const QString &msaUrl)
+ : ExternalToolRunTask(HmmerSupport::BUILD_TOOL, getArguments(settings, msaUrl), new Hmmer3LogParser()),
+ settings(settings),
+ stockholmMsaUrl(msaUrl)
+{
+ GCOUNTER(cvar, tvar, "UHMM3BuildTask");
+
+ SAFE_POINT_EXT(settings.validate(), setError("Settings are invalid"), );
+
+ setReportingSupported(true);
+ setReportingEnabled(true);
+}
+
+const QString &HmmerBuildTask::getHmmProfileUrl() const {
+ return settings.profileUrl;
+}
+
+QString HmmerBuildTask::getReport(const Task *task, const HmmerBuildSettings &settings, const QString &msaUrl) {
+ QString res;
+
+ res += "<table>";
+ if (!msaUrl.isEmpty()) {
+ res += "<tr><td><b>" + tr("Source alignment") + "</b></td><td>" + msaUrl + "</td></tr>";
+ }
+ res += "<tr><td><b>" + tr("Profile name") + "</b></td><td>" + settings.profileUrl + "</td></tr>";
+
+ res += "<tr><td><b>" + tr("Options:") + "</b></td></tr>";
+ res += "<tr><td><b>" + tr("Model construction strategies") + "</b></td><td>";
+ switch (settings.modelConstructionStrategy) {
+ case HmmerBuildSettings::p7_ARCH_FAST:
+ res += "fast";
+ break;
+ case HmmerBuildSettings::p7_ARCH_HAND:
+ res += "hand";
+ break;
+ default:
+ assert(false);
+ }
+ res += "</td></tr>";
+
+ res += "<tr><td><b>" + tr("Relative model construction strategies") + "</b></td><td>";
+ switch (settings.relativeSequenceWeightingStrategy) {
+ case HmmerBuildSettings::p7_WGT_GSC:
+ res += tr("Gerstein/Sonnhammer/Chothia tree weights");
+ break;
+ case HmmerBuildSettings::p7_WGT_BLOSUM:
+ res += tr("Henikoff simple filter weights" );
+ break;
+ case HmmerBuildSettings::p7_WGT_PB:
+ res += tr("Henikoff position-based weights" );
+ break;
+ case HmmerBuildSettings::p7_WGT_NONE:
+ res += tr("No relative weighting; set all to 1" );
+ break;
+ case HmmerBuildSettings::p7_WGT_GIVEN:
+ res += tr("Weights given in MSA file" );
+ break;
+ default:
+ assert(false);
+ }
+ res += "</td></tr>";
+
+ res += "<tr><td><b>" + tr("Effective sequence weighting strategies") + "</b></td><td>";
+ switch (settings.effectiveSequenceWeightingStrategy) {
+ case HmmerBuildSettings::p7_EFFN_ENTROPY:
+ res += tr("adjust effective sequence number to achieve relative entropy target");
+ break;
+ case HmmerBuildSettings::p7_EFFN_CLUST:
+ res += tr("effective sequence number is number of single linkage clusters");
+ break;
+ case HmmerBuildSettings::p7_EFFN_NONE:
+ res += tr("no effective sequence number weighting: just use number of sequences");
+ break;
+ case HmmerBuildSettings::p7_EFFN_SET:
+ res += tr("set effective sequence number for all models to: %1" ).arg(settings.eset);
+ break;
+ default:
+ assert(false);
+ }
+ res += "</td></tr>";
+
+ if (task->hasError()) {
+ res += "<tr><td><b>" + tr("Task finished with error: '%1'").arg(task->getError()) + "</b></td><td></td></tr>";
+ }
+ res += "</table>";
+
+ return res;
+}
+
+void HmmerBuildTask::prepare() {
+ GUrlUtils::prepareFileLocation(settings.profileUrl, stateInfo);
+}
+
+QString HmmerBuildTask::generateReport() const {
+ return getReport(this, settings, stockholmMsaUrl);
+}
+
+QStringList HmmerBuildTask::getArguments(const HmmerBuildSettings &settings, const QString &msaUrl) {
+ QStringList arguments;
+
+ switch (settings.modelConstructionStrategy) {
+ case HmmerBuildSettings::p7_ARCH_FAST:
+ arguments << "--fast";
+ arguments << "--symfrac" << QString::number(settings.symfrac);
+ break;
+ case HmmerBuildSettings::p7_ARCH_HAND:
+ arguments << "--hand";
+ break;
+ default:
+ FAIL(tr("Unknown model construction strategy"), arguments);
+ }
+
+ switch (settings.relativeSequenceWeightingStrategy) {
+ case HmmerBuildSettings::p7_WGT_NONE:
+ arguments << "--wnone";
+ break;
+ case HmmerBuildSettings::p7_WGT_GIVEN:
+ arguments << "--wgiven";
+ break;
+ case HmmerBuildSettings::p7_WGT_GSC:
+ arguments << "--wgsc";
+ break;
+ case HmmerBuildSettings::p7_WGT_PB:
+ arguments << "--wpb";
+ break;
+ case HmmerBuildSettings::p7_WGT_BLOSUM:
+ arguments << "--wblosum";
+ arguments << "--wid" << QString::number(settings.wid);
+ break;
+ default:
+ FAIL(tr("Unknown relative sequence weighting strategy"), arguments);
+ }
+
+ switch (settings.effectiveSequenceWeightingStrategy) {
+ case HmmerBuildSettings::p7_EFFN_NONE:
+ arguments << "--enone";
+ break;
+ case HmmerBuildSettings::p7_EFFN_SET:
+ arguments << "--eset" << QString::number(settings.eset);
+ break;
+ case HmmerBuildSettings::p7_EFFN_CLUST:
+ arguments << "--eclust";
+ arguments << "--eid" << QString::number(settings.eid);
+ break;
+ case HmmerBuildSettings::p7_EFFN_ENTROPY:
+ arguments << "--eent";
+ if (settings.ere > 0) {
+ arguments << "--ere" << QString::number(settings.ere);
+ }
+ arguments << "--esigma" << QString::number(settings.esigma);
+ break;
+ default:
+ FAIL(tr("Unknown effective sequence weighting strategy"), arguments);
+ }
+
+ arguments << "--cpu" << QString::number(AppContext::getAppSettings()->getAppResourcePool()->getIdealThreadCount());
+ arguments << "--seed" << QString::number(settings.seed);
+ arguments << "--fragthresh" << QString::number(settings.fragtresh);
+ arguments << "--EmL" << QString::number(settings.eml);
+ arguments << "--EmN" << QString::number(settings.emn);
+ arguments << "--EvL" << QString::number(settings.evl);
+ arguments << "--EvN" << QString::number(settings.evn);
+ arguments << "--EfL" << QString::number(settings.efl);
+ arguments << "--EfN" << QString::number(settings.efn);
+ arguments << "--Eft" << QString::number(settings.eft);
+
+ arguments << settings.profileUrl << msaUrl;
+
+ return arguments;
+}
+
+} // namespace U2
diff --git a/src/ugeneui/src/main_window/ShutdownTask.h b/src/plugins/external_tool_support/src/hmmer/HmmerBuildTask.h
similarity index 56%
copy from src/ugeneui/src/main_window/ShutdownTask.h
copy to src/plugins/external_tool_support/src/hmmer/HmmerBuildTask.h
index 4c8596e..514eb2b 100644
--- a/src/ugeneui/src/main_window/ShutdownTask.h
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerBuildTask.h
@@ -19,35 +19,34 @@
* MA 02110-1301, USA.
*/
-#ifndef _U2_SHUTDOWN_TASK_H_
-#define _U2_SHUTDOWN_TASK_H_
+#ifndef _U2_HMMER_BUILD_TASK_H_
+#define _U2_HMMER_BUILD_TASK_H_
-#include <U2Core/Task.h>
+#include <U2Core/ExternalToolRunTask.h>
+
+#include "HmmerBuildSettings.h"
namespace U2 {
-class Document;
-class MainWindowImpl;
+class SaveAlignmentTask;
-class ShutdownTask : public Task {
- Q_OBJECT
+class HmmerBuildTask : public ExternalToolRunTask {
public:
- ShutdownTask(MainWindowImpl* mw);
+ HmmerBuildTask(const HmmerBuildSettings &settings, const QString &stockholmMsaUrl);
- void prepare();
+ const QString & getHmmProfileUrl() const;
+ static QString getReport(const Task *task, const HmmerBuildSettings &settings, const QString &msaUrl);
- ReportResult report();
+private:
+ void prepare();
+ QString generateReport() const;
-protected:
- virtual QList<Task*> onSubTaskFinished(Task* subTask);
+ static QStringList getArguments(const HmmerBuildSettings &settings, const QString &stockholmMsaUrl);
-private:
- MainWindowImpl* mw;
- bool docsToRemoveAreFetched;
- QList<Document *> docsToRemove;
+ HmmerBuildSettings settings;
+ const QString stockholmMsaUrl;
};
+} // namespace U2
-}//namespace
-
-#endif
+#endif // _U2_HMMER_BUILD_TASK_H_
diff --git a/src/plugins_3rdparty/hmm3/src/workers/HMM3BuildWorker.cpp b/src/plugins/external_tool_support/src/hmmer/HmmerBuildWorker.cpp
similarity index 50%
rename from src/plugins_3rdparty/hmm3/src/workers/HMM3BuildWorker.cpp
rename to src/plugins/external_tool_support/src/hmmer/HmmerBuildWorker.cpp
index 03cc57d..ae0cc6a 100644
--- a/src/plugins_3rdparty/hmm3/src/workers/HMM3BuildWorker.cpp
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerBuildWorker.cpp
@@ -19,121 +19,144 @@
* MA 02110-1301, USA.
*/
-#include "HMM3BuildWorker.h"
-#include "HMM3IOWorker.h"
-#include "build/uHMM3BuildTask.h"
+#include <U2Core/AppContext.h>
+#include <U2Core/FailTask.h>
+#include <U2Core/Log.h>
+#include <U2Core/MAlignment.h>
+#include <U2Core/TaskSignalMapper.h>
+
+#include <U2Designer/DelegateEditors.h>
-#include <U2Lang/Datatype.h>
-#include <U2Lang/IntegralBusModel.h>
-#include <U2Lang/WorkflowEnv.h>
#include <U2Lang/ActorPrototypeRegistry.h>
-#include <U2Lang/BaseTypes.h>
-#include <U2Lang/BaseSlots.h>
-#include <U2Lang/BasePorts.h>
#include <U2Lang/BaseActorCategories.h>
-#include <U2Designer/DelegateEditors.h>
+#include <U2Lang/BasePorts.h>
+#include <U2Lang/BaseSlots.h>
+#include <U2Lang/BaseTypes.h>
#include <U2Lang/CoreLibConstants.h>
+#include <U2Lang/Datatype.h>
+#include <U2Lang/IntegralBusModel.h>
+#include <U2Lang/WorkflowEnv.h>
+#include <U2Lang/WorkflowMonitor.h>
-#include <U2Core/MAlignment.h>
-#include <U2Core/AppContext.h>
-#include <U2Core/Log.h>
-#include <U2Core/TaskSignalMapper.h>
-#include <U2Core/FailTask.h>
-
-/* TRANSLATOR U2::LocalWorkflow::HMM3BuildWorker */
+#include "HmmerBuildWorker.h"
+#include "HmmerBuildFromMsaTask.h"
+#include "HmmerSupport.h"
namespace U2 {
namespace LocalWorkflow {
/******************************
- * HMM3BuildWorkerFactory
+ * HmmerBuildWorkerFactory
******************************/
-const QString HMM3BuildWorkerFactory::ACTOR("hmm3-build");
-static const QString OUT_HMM3_PORT_ID("out-hmm3");
+const QString HmmerBuildWorkerFactory::ACTOR("hmm3-build");
+static const QString OUT_HMM_URL_PORT_ID("out-hmm3");
static const QString SEED_ATTR("seed");
static const QString HMM3_PROFILE_DEFAULT_NAME("hmm3_profile");
-void HMM3BuildWorkerFactory::init() {
- QList<PortDescriptor*> p; QList<Attribute*> a;
+void HmmerBuildWorkerFactory::init() {
+ QList<PortDescriptor *> p;
+ QList<Attribute *> a;
{
- Descriptor id(BasePorts::IN_MSA_PORT_ID(), HMM3BuildWorker::tr("Input MSA"),
- HMM3BuildWorker::tr("Input multiple sequence alignment for building statistical model."));
- Descriptor od(OUT_HMM3_PORT_ID, HMM3BuildWorker::tr("HMM3 profile"), HMM3BuildWorker::tr("Produced HMM3 profile"));
+ Descriptor id(BasePorts::IN_MSA_PORT_ID(), HmmerBuildWorker::tr("Input MSA"),
+ HmmerBuildWorker::tr("Input multiple sequence alignment for building statistical model."));
+ Descriptor od(OUT_HMM_URL_PORT_ID, HmmerBuildWorker::tr("HMM3 profile"), HmmerBuildWorker::tr("Produced HMM3 profile URL"));
QMap<Descriptor, DataTypePtr> inM;
inM[BaseSlots::MULTIPLE_ALIGNMENT_SLOT()] = BaseTypes::MULTIPLE_ALIGNMENT_TYPE();
p << new PortDescriptor(id, DataTypePtr(new MapDataType("hmm3.build.in", inM)), true /*input*/);
QMap<Descriptor, DataTypePtr> outM;
- outM[HMM3Lib::HMM3_SLOT] = HMM3Lib::HMM3_PROFILE_TYPE();
+ outM[BaseSlots::URL_SLOT()] = BaseTypes::STRING_TYPE();
p << new PortDescriptor(od, DataTypePtr(new MapDataType("hmm3.build", outM)), false /*input*/, true /*multi*/);
}
- Descriptor sed(SEED_ATTR, HMM3BuildWorker::tr("Random seed"), HMM3BuildWorker::tr("Random generator seed. 0 - means that one-time arbitrary seed will be used."));
+ Descriptor sed(SEED_ATTR, HmmerBuildWorker::tr("Random seed"), HmmerBuildWorker::tr("Random generator seed. 0 - means that one-time arbitrary seed will be used."));
- a << new Attribute(sed, BaseTypes::NUM_TYPE(), false, QVariant(0));
+ a << new Attribute(sed, BaseTypes::NUM_TYPE(), false, QVariant(42));
- Descriptor desc(HMM3BuildWorkerFactory::ACTOR, HMM3BuildWorker::tr("HMM3 Build"), HMM3BuildWorker::tr("Builds a HMM3 profile from a multiple sequence alignment."
+ Descriptor desc(HmmerBuildWorkerFactory::ACTOR, HmmerBuildWorker::tr("HMM3 Build"), HmmerBuildWorker::tr("Builds a HMM3 profile from a multiple sequence alignment."
"<p>The HMM3 profile is a statistical model which captures position-specific information"
" about how conserved each column of the alignment is, and which residues are likely."));
ActorPrototype* proto = new IntegralBusActorPrototype(desc, p, a);
- QMap<QString, PropertyDelegate*> delegates;
+ QMap<QString, PropertyDelegate *> delegates;
{
- QVariantMap m; m["minimum"] = 0; m["maximum"] = INT_MAX;
+ QVariantMap m;
+ m["minimum"] = 0;
+ m["maximum"] = INT_MAX;
delegates[SEED_ATTR] = new SpinBoxDelegate(m);
}
proto->setEditor(new DelegateEditor(delegates));
- proto->setIconPath( ":/hmm3/images/hmmer_16.png" );
- proto->setPrompter(new HMM3BuildPrompter());
- WorkflowEnv::getProtoRegistry()->registerProto(HMM3Lib::HMM3_CATEGORY(), proto);
+ proto->setIconPath(":/external_tool_support/images/hmmer.png");
+ proto->setPrompter(new HmmerBuildPrompter());
+ proto->addExternalTool(HmmerSupport::BUILD_TOOL);
+ WorkflowEnv::getProtoRegistry()->registerProto(Descriptor("hmmer3", HmmerBuildWorker::tr("HMMER3 Tools"), ""), proto);
DomainFactory* localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
- localDomain->registerEntry(new HMM3BuildWorkerFactory());
+ localDomain->registerEntry(new HmmerBuildWorkerFactory());
}
-void HMM3BuildWorkerFactory::cleanup() {
+void HmmerBuildWorkerFactory::cleanup() {
delete WorkflowEnv::getProtoRegistry()->unregisterProto(ACTOR);
- DomainFactory* localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
+ DomainFactory *localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
delete localDomain->unregisterEntry(ACTOR);
}
+HmmerBuildWorkerFactory::HmmerBuildWorkerFactory()
+ : DomainFactory(ACTOR)
+{
+
+}
+
+Worker * HmmerBuildWorkerFactory::createWorker(Actor *a) {
+ return new HmmerBuildWorker(a);
+}
+
/******************************
- * HMM3BuildPrompter
+ * HmmerBuildPrompter
******************************/
-QString HMM3BuildPrompter::composeRichDoc() {
- IntegralBusPort* input = qobject_cast<IntegralBusPort*>(target->getPort(BasePorts::IN_MSA_PORT_ID()));
- Actor* msaProducer = input->getProducer(BasePorts::IN_MSA_PORT_ID());
+HmmerBuildPrompter::HmmerBuildPrompter(Actor *p)
+ : PrompterBase<HmmerBuildPrompter>(p)
+{
+
+}
- QString msaName = msaProducer ? tr("For each MSA from <u>%1</u>,").arg(msaProducer->getLabel()) : "";
+QString HmmerBuildPrompter::composeRichDoc() {
+ IntegralBusPort *input = qobject_cast<IntegralBusPort *>(target->getPort(BasePorts::IN_MSA_PORT_ID()));
+ Actor *msaProducer = input->getProducer(BasePorts::IN_MSA_PORT_ID());
- QString doc = tr("%1 builds a HMM3 profile.")
- .arg(msaName);
+ QString msaName = (msaProducer ? tr("For each MSA from <u>%1</u>,").arg(msaProducer->getLabel()) : "");
+
+ QString doc = tr("%1 builds a HMMER profile.").arg(msaName);
return doc;
}
/******************************
-* HMM3BuildWorker
+* HmmerBuildWorker
******************************/
-HMM3BuildWorker::HMM3BuildWorker(Actor* a) : BaseWorker(a), input(NULL), output(NULL){
+HmmerBuildWorker::HmmerBuildWorker(Actor *a)
+ : BaseWorker(a),
+ input(NULL),
+ output(NULL)
+{
}
-void HMM3BuildWorker::init() {
+void HmmerBuildWorker::init() {
input = ports.value(BasePorts::IN_MSA_PORT_ID());
- output = ports.value(OUT_HMM3_PORT_ID);
- setDefaultUHMM3BuildSettings(&cfg);
+ output = ports.value(OUT_HMM_URL_PORT_ID);
+ cfg = HmmerBuildSettings();
}
-bool HMM3BuildWorker::isReady() const {
+bool HmmerBuildWorker::isReady() const {
if (isDone()) {
return false;
}
return input->hasMessage() || input->isEnded();
}
-Task* HMM3BuildWorker::tick() {
+Task * HmmerBuildWorker::tick() {
if (input->hasMessage()) {
Message inputMessage = getMessageAndSetupScriptValues(input);
if (inputMessage.isEmpty()) {
@@ -148,9 +171,11 @@ Task* HMM3BuildWorker::tick() {
SAFE_POINT(!msaObj.isNull(), "NULL MSA Object!", NULL);
const MAlignment &msa = msaObj->getMAlignment();
- Task* t = new UHMM3BuildTask(cfg, msa);
- connect(new TaskSignalMapper(t), SIGNAL(si_taskFinished(Task*)), SLOT(sl_taskFinished(Task*)));
- return t;
+ cfg.profileUrl = monitor()->outputDir() + "hmmer_build/" + QFileInfo(context->getMetadataStorage().get(inputMessage.getMetadataId()).getFileUrl()).baseName() + ".hmm";
+ HmmerBuildFromMsaTask *task = new HmmerBuildFromMsaTask(cfg, msa);
+ task->addListeners(createLogListeners());
+ connect(new TaskSignalMapper(task), SIGNAL(si_taskFinished(Task *)), SLOT(sl_taskFinished(Task *)));
+ return task;
} else if (input->isEnded()) {
setDone();
output->setEnded();
@@ -158,33 +183,21 @@ Task* HMM3BuildWorker::tick() {
return NULL;
}
-void HMM3BuildWorker::sl_taskFinished() {
- Task * t = qobject_cast<Task*>(sender());
- SAFE_POINT( NULL != t, "Invalid task is encountered", );
- if ( t->isCanceled( ) ) {
- return;
- }
- if( t->getState() != Task::State_Finished ) {
- return;
- }
- sl_taskFinished(t);
-}
-
-void HMM3BuildWorker::sl_taskFinished(Task* t) {
- UHMM3BuildTask* build = qobject_cast<UHMM3BuildTask*>(t);
- SAFE_POINT( NULL != t, "Invalid task is encountered", );
- if ( t->isCanceled( ) ) {
+void HmmerBuildWorker::sl_taskFinished(Task* task) {
+ HmmerBuildFromMsaTask *buildTask = qobject_cast<HmmerBuildFromMsaTask *>(task);
+ SAFE_POINT(NULL != task, "Invalid task is encountered", );
+ if (task->isCanceled()) {
return;
}
- const P7_HMM* hmm = NULL;
- hmm = build->takeHMM();
- output->put(Message(HMM3Lib::HMM3_PROFILE_TYPE(), qVariantFromValue<const P7_HMM*>(hmm)));
- algoLog.info(tr("Built HMM3 profile"));
+ const QString hmmUrl = buildTask->getHmmUrl();
+ monitor()->addOutputFile(hmmUrl, actor->getId(), true);
+ output->put(Message(BaseTypes::STRING_TYPE(), hmmUrl));
+ algoLog.info(tr("Built HMMER profile"));
}
-void HMM3BuildWorker::cleanup() {
+void HmmerBuildWorker::cleanup() {
}
-} //namespace LocalWorkflow
-} //namespace U2
+} // namespace LocalWorkflow
+} // namespace U2
diff --git a/src/plugins_3rdparty/hmm3/src/workers/HMM3BuildWorker.h b/src/plugins/external_tool_support/src/hmmer/HmmerBuildWorker.h
similarity index 58%
copy from src/plugins_3rdparty/hmm3/src/workers/HMM3BuildWorker.h
copy to src/plugins/external_tool_support/src/hmmer/HmmerBuildWorker.h
index 24563a3..70f24f8 100644
--- a/src/plugins_3rdparty/hmm3/src/workers/HMM3BuildWorker.h
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerBuildWorker.h
@@ -19,55 +19,57 @@
* MA 02110-1301, USA.
*/
-#ifndef _U2_HMM3_BUILD_WORKER_H_
-#define _U2_HMM3_BUILD_WORKER_H_
+#ifndef _U2_HMMER_BUILD_WORKER_H_
+#define _U2_HMMER_BUILD_WORKER_H_
#include <U2Lang/LocalDomain.h>
#include <U2Lang/WorkflowUtils.h>
-#include <hmmer3/hmmer.h>
+#include "HmmerBuildSettings.h"
namespace U2 {
namespace LocalWorkflow {
-class HMM3BuildPrompter : public PrompterBase<HMM3BuildPrompter> {
+class HmmerBuildPrompter : public PrompterBase<HmmerBuildPrompter> {
Q_OBJECT
public:
- HMM3BuildPrompter(Actor* p = 0) : PrompterBase<HMM3BuildPrompter>(p) {}
+ HmmerBuildPrompter(Actor *p = 0);
+
protected:
QString composeRichDoc();
};
-class HMM3BuildWorker : public BaseWorker {
+class HmmerBuildWorker : public BaseWorker {
Q_OBJECT
public:
- HMM3BuildWorker(Actor* a);
+ HmmerBuildWorker(Actor *a);
- virtual void init();
- virtual bool isReady() const;
- virtual Task* tick();
- virtual void cleanup();
+ void init();
+ bool isReady() const;
+ Task * tick();
+ void cleanup();
private slots:
- void sl_taskFinished(Task*);
- void sl_taskFinished();
+ void sl_taskFinished(Task *task);
protected:
- IntegralBus *input, *output;
- UHMM3BuildSettings cfg;
-};
+ IntegralBus *input;
+ IntegralBus *output;
+ HmmerBuildSettings cfg;
+};
-class HMM3BuildWorkerFactory : public DomainFactory {
+class HmmerBuildWorkerFactory : public DomainFactory {
public:
static const QString ACTOR;
+
static void init();
static void cleanup();
- HMM3BuildWorkerFactory() : DomainFactory(ACTOR) {}
- virtual Worker* createWorker(Actor* a) {return new HMM3BuildWorker(a);}
+ HmmerBuildWorkerFactory();
+ Worker * createWorker(Actor *a);
};
-} // Workflow namespace
-} // U2 namespace
+} // namespace LocalWorkflow
+} // namespace U2
-#endif
+#endif // _U2_HMMER_BUILD_WORKER_H_
diff --git a/src/plugins/external_tool_support/src/hmmer/HmmerParseSearchResultsTask.cpp b/src/plugins/external_tool_support/src/hmmer/HmmerParseSearchResultsTask.cpp
new file mode 100644
index 0000000..1d9c84e
--- /dev/null
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerParseSearchResultsTask.cpp
@@ -0,0 +1,101 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <U2Core/AnnotationTableObject.h>
+#include <U2Core/AppContext.h>
+#include <U2Core/AppResources.h>
+#include <U2Core/AppSettings.h>
+#include <U2Core/Counter.h>
+#include <U2Core/IOAdapterUtils.h>
+#include <U2Core/U1AnnotationUtils.h>
+#include <U2Core/U2FeatureUtils.h>
+#include <U2Core/U2SafePoints.h>
+
+#include "HmmerParseSearchResultsTask.h"
+
+namespace U2 {
+
+const qint64 HmmerParseSearchResultsTask::BUFF_SIZE = 4096;
+
+HmmerParseSearchResultsTask::HmmerParseSearchResultsTask(const QString &resultUrl, const AnnotationCreationPattern &pattern)
+ : Task("Parse HMMER search result", TaskFlag_None),
+ resultUrl(resultUrl),
+ pattern(pattern)
+{
+ SAFE_POINT_EXT(!resultUrl.isEmpty(), setError("Result file URL is empty"), );
+}
+
+const QList<SharedAnnotationData> & HmmerParseSearchResultsTask::getAnnotations() const {
+ return annotations;
+}
+
+void HmmerParseSearchResultsTask::run() {
+ QScopedPointer<IOAdapter> ioAdapter(IOAdapterUtils::open(resultUrl, stateInfo));
+ CHECK_OP(stateInfo, );
+
+ QByteArray buff;
+ buff.reserve(BUFF_SIZE + 1);
+ qint64 lineNumber = 1;
+
+ while (!ioAdapter->isEof()) {
+ qint64 bytesRead = ioAdapter->readLine(buff.data(), BUFF_SIZE);
+ assert(bytesRead < BUFF_SIZE);
+
+ QString readData(buff.data());
+
+ if (isComment(readData)) {
+ lineNumber++;
+ continue;
+ }
+
+ processHit(readData.split(QRegExp("\\s+"), QString::SkipEmptyParts), lineNumber);
+ CHECK_OP(stateInfo, );
+ lineNumber++;
+ }
+}
+
+bool HmmerParseSearchResultsTask::isComment(const QString &line) {
+ return line.startsWith("#");
+}
+
+void HmmerParseSearchResultsTask::processHit(const QStringList &tokens, qint64 lineNumber) {
+ CHECK_EXT(23 <= tokens.size(), stateInfo.addWarning(tr("Can't parse line %1").arg(lineNumber)), );
+ SharedAnnotationData annotation(new AnnotationData);
+ annotation->qualifiers << U2Qualifier("Accuracy_per_residue", tokens[ACC]);
+ annotation->qualifiers << U2Qualifier("Bias", tokens[BIAS]);
+ annotation->qualifiers << U2Qualifier("Conditional_e-value", tokens[C_EVALUE]);
+ annotation->qualifiers << U2Qualifier("Env_of_domain_loc", tokens[ENV_FROM] + ".." + tokens[ENV_TO]);
+ annotation->qualifiers << U2Qualifier("HMM_model", tokens[QUERY_NAME]);
+ annotation->qualifiers << U2Qualifier("HMM_region", tokens[HMM_FROM] + ".." + tokens[HMM_TO]);
+ annotation->qualifiers << U2Qualifier("Independent_e-value", tokens[I_EVALUE]);
+ annotation->qualifiers << U2Qualifier("Score", tokens[SCORE]);
+ U1AnnotationUtils::addDescriptionQualifier(annotation, pattern.description);
+
+ qint64 start = tokens[ALI_FROM].toLongLong();
+ qint64 end = tokens[ALI_TO].toLongLong();
+ annotation->location->regions << U2Region(start, end - start + 1);
+ annotation->name = pattern.annotationName;
+ annotation->type = pattern.type;
+
+ annotations << annotation;
+}
+
+} // namespace U2
diff --git a/src/plugins/external_tool_support/src/hmmer/HmmerParseSearchResultsTask.h b/src/plugins/external_tool_support/src/hmmer/HmmerParseSearchResultsTask.h
new file mode 100644
index 0000000..bca4820
--- /dev/null
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerParseSearchResultsTask.h
@@ -0,0 +1,79 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef _U2_HMMER_PARSE_SEARCH_RESULTS_TASK_H_
+#define _U2_HMMER_PARSE_SEARCH_RESULTS_TASK_H_
+
+#include <U2Core/Annotation.h>
+#include <U2Core/Task.h>
+
+#include "HmmerSearchTask.h"
+
+namespace U2 {
+
+class HmmerParseSearchResultsTask : public Task {
+public:
+ HmmerParseSearchResultsTask(const QString &resultUrl, const AnnotationCreationPattern &pattern);
+
+ const QList<SharedAnnotationData> & getAnnotations() const;
+
+private:
+ enum TOKENS {
+ TARGET_NAME = 0,
+ TARGET_ACCESSION = 1,
+ TLEN = 2,
+ QUERY_NAME = 3,
+ QUERY_ACCESSION = 4,
+ QLEN = 5,
+ FULL_SEQ_E_VALUE = 6,
+ FULL_SEQ_SCORE = 7,
+ FULL_SEQ_BIAS = 8,
+ NUMBER = 9,
+ TOTAL_COUNT = 10,
+ C_EVALUE = 11,
+ I_EVALUE = 12,
+ SCORE = 13,
+ BIAS = 14,
+ HMM_FROM = 15,
+ HMM_TO = 16,
+ ALI_FROM = 17,
+ ALI_TO = 18,
+ ENV_FROM = 19,
+ ENV_TO = 20,
+ ACC = 21,
+ DESCRIPTION = 22
+ };
+
+ void run();
+
+ static bool isComment(const QString &line);
+ void processHit(const QStringList &tokens, qint64 lineNumber);
+
+ const QString resultUrl;
+ const AnnotationCreationPattern pattern;
+ QList<SharedAnnotationData> annotations;
+
+ static const qint64 BUFF_SIZE;
+};
+
+} // namespace U2
+
+#endif // _U2_HMMER_PARSE_SEARCH_RESULTS_TASK_H_
diff --git a/src/plugins/external_tool_support/src/hmmer/HmmerSearchDialog.cpp b/src/plugins/external_tool_support/src/hmmer/HmmerSearchDialog.cpp
new file mode 100644
index 0000000..ca002ad
--- /dev/null
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerSearchDialog.cpp
@@ -0,0 +1,248 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <math.h>
+
+#include <QMessageBox>
+#include <QPushButton>
+
+#include <U2Core/AppContext.h>
+#include <U2Core/DNAAlphabet.h>
+#include <U2Core/DNASequenceObject.h>
+#include <U2Core/GObjectTypes.h>
+#include <U2Core/L10n.h>
+#include <U2Core/U2OpStatusUtils.h>
+#include <U2Core/U2SafePoints.h>
+
+#include <U2Gui/DialogUtils.h>
+#include <U2Gui/HelpButton.h>
+#include <U2Gui/LastUsedDirHelper.h>
+#include <U2Gui/U2FileDialog.h>
+
+#include "HmmerSearchDialog.h"
+#include "HmmerSearchTask.h"
+
+namespace U2 {
+
+const QString HmmerSearchDialog::DOM_E_PLUS_PREFIX = "1E+";
+const QString HmmerSearchDialog::DOM_E_MINUS_PREFIX = "1E";
+const QString HmmerSearchDialog::HMM_FILES_DIR_ID = "uhmmer3_search_dlg_impl_hmm_dir";
+const QString HmmerSearchDialog::ANNOTATIONS_DEFAULT_NAME = "hmm_signal";
+
+HmmerSearchDialog::HmmerSearchDialog(U2SequenceObject *seqObj, QWidget *parent)
+ : QDialog(parent)
+{
+ setupUi(this);
+ SAFE_POINT(NULL != seqObj, L10N::nullPointerError("sequence object"), );
+
+ new HelpButton(this, buttonBox, "18220560");
+ buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Run"));
+ buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
+
+ useScoreTresholdGroup.addButton(useExplicitScoreTresholdButton);
+ useScoreTresholdGroup.addButton(useGATresholdsButton);
+ useScoreTresholdGroup.addButton(useNCTresholdsButton);
+ useScoreTresholdGroup.addButton(useTCTresholdsButton);
+ useExplicitScoreTresholdButton->setChecked(true);
+
+ model.sequence = QPointer<U2SequenceObject>(seqObj);
+ setModelValues(); // default settings here
+
+ // Annotations widget
+ CreateAnnotationModel annModel;
+ annModel.hideLocation = true;
+ annModel.sequenceObjectRef = seqObj;
+ annModel.useAminoAnnotationTypes = seqObj->getAlphabet()->isAmino();
+ annModel.data->type = U2FeatureTypes::MiscSignal;
+ annModel.data->name = ANNOTATIONS_DEFAULT_NAME;
+ annModel.sequenceLen = seqObj->getSequenceLength();
+ annotationsWidgetController = new CreateAnnotationWidgetController(annModel, this);
+
+ QWidget *firstTab = tabWidget->widget(0);
+ assert(NULL != firstTab);
+ QVBoxLayout *curLayout = qobject_cast<QVBoxLayout *>(firstTab->layout());
+ assert(NULL != curLayout);
+ QWidget *aw = annotationsWidgetController->getWidget();
+ curLayout->insertWidget(1, aw);
+
+ QPushButton *searchButton = buttonBox->button(QDialogButtonBox::Ok);
+ QPushButton *cancelButton = buttonBox->button(QDialogButtonBox::Cancel);
+
+ connect(cancelButton, SIGNAL(clicked()), SLOT(reject()));
+ connect(searchButton, SIGNAL(clicked()), SLOT(sl_okButtonClicked()));
+ connect(useEvalTresholdsButton, SIGNAL(toggled(bool)), SLOT(sl_useEvalTresholdsButtonChanged(bool)));
+ connect(useScoreTresholdsButton, SIGNAL(toggled(bool)), SLOT(sl_useScoreTresholdsButtonChanged(bool)));
+ connect(useExplicitScoreTresholdButton, SIGNAL(toggled(bool)), SLOT(sl_useExplicitScoreTresholdButton(bool)));
+ connect(maxCheckBox, SIGNAL(stateChanged(int)), SLOT(sl_maxCheckBoxChanged(int)));
+ connect(domESpinBox, SIGNAL(valueChanged(int)), SLOT(sl_domESpinBoxChanged(int)));
+ connect(queryHmmFileToolButton, SIGNAL(clicked()), SLOT(sl_queryHmmFileToolButtonClicked()));
+ connect(domZCheckBox, SIGNAL(stateChanged(int)), SLOT(sl_domZCheckBoxChanged(int)));
+}
+
+void HmmerSearchDialog::setModelValues() {
+ domESpinBox->setValue(1);
+ scoreTresholdDoubleSpin->setValue(0); // because default is OPTION_NOT_SET
+ domZDoubleSpinBox->setValue(0); // because default is OPTION_NOT_SET
+ nobiasCheckBox->setChecked(model.searchSettings.noBiasFilter);
+ nonull2CheckBox->setChecked(model.searchSettings.noNull2);
+ maxCheckBox->setChecked(model.searchSettings.doMax);
+ f1DoubleSpinBox->setValue(model.searchSettings.f1);
+ f2DoubleSpinBox->setValue(model.searchSettings.f2);
+ f3DoubleSpinBox->setValue(model.searchSettings.f3);
+ seedSpinBox->setValue(model.searchSettings.seed);
+}
+
+void HmmerSearchDialog::getModelValues() {
+ if (useEvalTresholdsButton->isChecked()) {
+ model.searchSettings.domE = pow(10.0, domESpinBox->value());
+ model.searchSettings.domT = HmmerSearchSettings::OPTION_NOT_SET;
+ } else if (useScoreTresholdsButton->isChecked()) {
+ model.searchSettings.domE = HmmerSearchSettings::OPTION_NOT_SET;
+ if (useExplicitScoreTresholdButton->isChecked()) {
+ model.searchSettings.domT = scoreTresholdDoubleSpin->value();
+ } else if (useGATresholdsButton->isChecked()) {
+ model.searchSettings.useBitCutoffs = HmmerSearchSettings::p7H_GA;
+ } else if (useNCTresholdsButton->isChecked()) {
+ model.searchSettings.useBitCutoffs = HmmerSearchSettings::p7H_NC;
+ } else if(useTCTresholdsButton->isChecked()) {
+ model.searchSettings.useBitCutoffs = HmmerSearchSettings::p7H_TC;
+ } else {
+ assert(false);
+ }
+ } else {
+ assert(false);
+ }
+
+ if (domZCheckBox->isChecked()) {
+ model.searchSettings.domZ = domZDoubleSpinBox->value();
+ } else {
+ model.searchSettings.domZ = HmmerSearchSettings::OPTION_NOT_SET;
+ }
+
+ model.searchSettings.noBiasFilter = nobiasCheckBox->isChecked();
+ model.searchSettings.noNull2 = nonull2CheckBox->isChecked();
+ model.searchSettings.doMax = maxCheckBox->isChecked();
+
+ model.searchSettings.f1 = f1DoubleSpinBox->value();
+ model.searchSettings.f2 = f2DoubleSpinBox->value();
+ model.searchSettings.f3 = f3DoubleSpinBox->value();
+
+ model.searchSettings.seed = seedSpinBox->value();
+
+ const CreateAnnotationModel &annModel = annotationsWidgetController->getModel();
+ model.searchSettings.pattern = annotationsWidgetController->getAnnotationPattern();
+ model.searchSettings.hmmProfileUrl = queryHmmFileEdit->text();
+ model.searchSettings.sequence = model.sequence;
+ model.searchSettings.annotationTable = annModel.getAnnotationObject();
+}
+
+QString HmmerSearchDialog::checkModel() {
+ QString ret;
+
+ if (model.searchSettings.hmmProfileUrl.isEmpty()) {
+ ret = tr("HMM profile is not set");
+ queryHmmFileEdit->setFocus();
+ return ret;
+ }
+
+ if (!model.searchSettings.validate()) {
+ ret = tr("Settings are invalid");
+ return ret;
+ }
+
+ ret = annotationsWidgetController->validate();
+ return ret;
+}
+
+void HmmerSearchDialog::sl_okButtonClicked() {
+ bool objectPrepared = annotationsWidgetController->prepareAnnotationObject();
+ if (!objectPrepared) {
+ QMessageBox::warning(this, tr("Error"), tr("Cannot create an annotation object. Please check settings"));
+ return;
+ }
+
+ SAFE_POINT(!model.sequence.isNull(), L10N::nullPointerError("sequence object"), );
+ getModelValues();
+ QString err = checkModel();
+ if (!err.isEmpty()) {
+ QMessageBox::critical(this, tr("Error: bad arguments!"), err);
+ return;
+ }
+
+ HmmerSearchTask *searchTask = new HmmerSearchTask(model.searchSettings);
+ AppContext::getTaskScheduler()->registerTopLevelTask(searchTask);
+
+ QDialog::accept();
+}
+
+void HmmerSearchDialog::sl_useEvalTresholdsButtonChanged(bool checked) {
+ domESpinBox->setEnabled(checked);
+}
+
+void HmmerSearchDialog::sl_useScoreTresholdsButtonChanged(bool checked) {
+ useExplicitScoreTresholdButton->setEnabled(checked);
+ useGATresholdsButton->setEnabled(checked);
+ useNCTresholdsButton->setEnabled(checked);
+ useTCTresholdsButton->setEnabled(checked);
+ if (!checked) {
+ scoreTresholdDoubleSpin->setEnabled(false);
+ } else {
+ scoreTresholdDoubleSpin->setEnabled(useExplicitScoreTresholdButton->isChecked());
+ }
+}
+
+void HmmerSearchDialog::sl_useExplicitScoreTresholdButton(bool checked) {
+ scoreTresholdDoubleSpin->setEnabled(checked);
+}
+
+void HmmerSearchDialog::sl_maxCheckBoxChanged(int state) {
+ assert(Qt::PartiallyChecked != state);
+ bool unchecked = Qt::Unchecked == state;
+ f1Label->setEnabled(unchecked);
+ f2Label->setEnabled(unchecked);
+ f3Label->setEnabled(unchecked);
+ f1DoubleSpinBox->setEnabled(unchecked);
+ f2DoubleSpinBox->setEnabled(unchecked);
+ f3DoubleSpinBox->setEnabled(unchecked);
+}
+
+void HmmerSearchDialog::sl_domESpinBoxChanged(int newVal) {
+ const QString &prefix = (0 <= newVal ? DOM_E_PLUS_PREFIX : DOM_E_MINUS_PREFIX);
+ domESpinBox->setPrefix(prefix);
+}
+
+void HmmerSearchDialog::sl_queryHmmFileToolButtonClicked() {
+ LastUsedDirHelper helper(HMM_FILES_DIR_ID);
+ const QString fileFilter = DialogUtils::prepareFileFilter(tr("HMM profile"), QStringList() << "hmm", true, QStringList());
+
+ helper.url = U2FileDialog::getOpenFileName(this, tr("Select query HMM profile"), helper, fileFilter);
+ if (!helper.url.isEmpty()) {
+ queryHmmFileEdit->setText(helper.url);
+ }
+}
+
+void HmmerSearchDialog::sl_domZCheckBoxChanged(int state) {
+ assert(Qt::PartiallyChecked != state);
+ bool checked = Qt::Checked == state;
+ domZDoubleSpinBox->setEnabled(checked);
+}
+
+} // namespace U2
diff --git a/src/plugins_3rdparty/hmm3/src/search/uHMM3SearchDialogImpl.h b/src/plugins/external_tool_support/src/hmmer/HmmerSearchDialog.h
similarity index 68%
rename from src/plugins_3rdparty/hmm3/src/search/uHMM3SearchDialogImpl.h
rename to src/plugins/external_tool_support/src/hmmer/HmmerSearchDialog.h
index b5506c3..3212ee7 100644
--- a/src/plugins_3rdparty/hmm3/src/search/uHMM3SearchDialogImpl.h
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerSearchDialog.h
@@ -19,49 +19,34 @@
* MA 02110-1301, USA.
*/
-#ifndef _GB2_UHMM3_SEARCH_DLG_IMPL_H_
-#define _GB2_UHMM3_SEARCH_DLG_IMPL_H_
+#ifndef _U2_HMMER_SEARCH_DIALOG_H_
+#define _U2_HMMER_SEARCH_DIALOG_H_
-#include <qglobal.h>
-#if (QT_VERSION < 0x050000) //Qt 5
-#include <QtGui/QDialog>
-#include <QtGui/QButtonGroup>
-#else
-#include <QtWidgets/QDialog>
-#include <QtWidgets/QButtonGroup>
-#endif
-
-#include <U2Core/DNASequence.h>
#include <U2Gui/CreateAnnotationWidgetController.h>
-#include <U2Core/DNASequenceObject.h>
-#include <ui_UHMM3SearchDialog.h>
-#include <search/uHMM3SearchTask.h>
+#include "HmmerSearchTask.h"
+#include "ui_HmmerSearchDialog.h"
namespace U2 {
-struct UHMM3SearchDialogModel {
- UHMM3SearchTaskSettings searchSettings;
- QString hmmfile;
+class U2SequenceObject;
+
+class HmmerSearchDialogModel {
+public:
+ HmmerSearchSettings searchSettings;
QPointer<U2SequenceObject> sequence;
-}; // UHMM3SearchDialogModel
+};
-class UHMM3SearchDialogImpl : public QDialog, public Ui_UHMM3SearchDialog {
+class HmmerSearchDialog : public QDialog, public Ui_HmmerSearchDialog {
Q_OBJECT
public:
+ HmmerSearchDialog(U2SequenceObject *seqObj, QWidget *parent = NULL);
+
static const QString DOM_E_PLUS_PREFIX;
static const QString DOM_E_MINUS_PREFIX;
static const QString HMM_FILES_DIR_ID;
static const QString ANNOTATIONS_DEFAULT_NAME;
-public:
- UHMM3SearchDialogImpl(U2SequenceObject *seqObj, QWidget *p = NULL);
-
-private:
- void setModelValues();
- void getModelValues();
- QString checkModel();
-
private slots:
void sl_okButtonClicked();
void sl_useEvalTresholdsButtonChanged(bool checked);
@@ -73,12 +58,15 @@ private slots:
void sl_domZCheckBoxChanged(int state);
private:
+ void setModelValues();
+ void getModelValues();
+ QString checkModel();
+
QButtonGroup useScoreTresholdGroup;
CreateAnnotationWidgetController * annotationsWidgetController;
- UHMM3SearchDialogModel model;
-
-}; // UHMM3SearchDialogImpl
+ HmmerSearchDialogModel model;
+};
-} // U2
+} // namespace U2
-#endif // _GB2_UHMM3_SEARCH_DLG_IMPL_H_
+#endif // _U2_HMMER_SEARCH_DIALOG_H_
diff --git a/src/plugins_3rdparty/hmm3/src/search/UHMM3SearchDialog.ui b/src/plugins/external_tool_support/src/hmmer/HmmerSearchDialog.ui
similarity index 99%
rename from src/plugins_3rdparty/hmm3/src/search/UHMM3SearchDialog.ui
rename to src/plugins/external_tool_support/src/hmmer/HmmerSearchDialog.ui
index cf22e05..1b4782a 100644
--- a/src/plugins_3rdparty/hmm3/src/search/UHMM3SearchDialog.ui
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerSearchDialog.ui
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
- <class>UHMM3SearchDialog</class>
- <widget class="QDialog" name="UHMM3SearchDialog">
+ <class>HmmerSearchDialog</class>
+ <widget class="QDialog" name="HmmerSearchDialog">
<property name="geometry">
<rect>
<x>0</x>
@@ -33,8 +33,8 @@
</property>
<property name="sizeHint" stdset="0">
<size>
- <width>601</width>
- <height>320</height>
+ <width>587</width>
+ <height>309</height>
</size>
</property>
<property name="tabShape">
diff --git a/src/plugins/external_tool_support/src/hmmer/HmmerSearchSettings.cpp b/src/plugins/external_tool_support/src/hmmer/HmmerSearchSettings.cpp
new file mode 100644
index 0000000..8fbf0af
--- /dev/null
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerSearchSettings.cpp
@@ -0,0 +1,64 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <U2Core/U2SafePoints.h>
+
+#include "HmmerSearchSettings.h"
+
+namespace U2 {
+
+const double HmmerSearchSettings::OPTION_NOT_SET = -1.0;
+
+HmmerSearchSettings::HmmerSearchSettings()
+ : e(10.0),
+ t(OPTION_NOT_SET),
+ z(OPTION_NOT_SET),
+ domE(OPTION_NOT_SET),
+ domT(OPTION_NOT_SET),
+ domZ(OPTION_NOT_SET),
+ useBitCutoffs(None),
+ f1(0.02),
+ f2(1e-3),
+ f3(1e-5),
+ doMax(false),
+ noBiasFilter(false),
+ noNull2(false),
+ seed(42),
+ annotationTable(NULL)
+{
+
+}
+
+bool HmmerSearchSettings::validate() const {
+ CHECK(0 < e, false);
+ CHECK(0 < t || OPTION_NOT_SET == t, false);
+ CHECK(0 < z || OPTION_NOT_SET == z, false);
+ CHECK(0 < domE || OPTION_NOT_SET == domE, false);
+ CHECK(0 < domT || OPTION_NOT_SET == domT, false);
+ CHECK(0 < domZ || OPTION_NOT_SET == domZ, false);
+ CHECK(0 <= seed, false);
+ CHECK(!hmmProfileUrl.isEmpty(), false);
+ CHECK(!sequenceUrl.isEmpty() || NULL != sequence, false);
+
+ return true;
+}
+
+} // namespace U2
diff --git a/src/plugins/external_tool_support/src/hmmer/HmmerSearchSettings.h b/src/plugins/external_tool_support/src/hmmer/HmmerSearchSettings.h
new file mode 100644
index 0000000..117983d
--- /dev/null
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerSearchSettings.h
@@ -0,0 +1,79 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef _U2_HMMER_SEARCH_SETTINGS_H_
+#define _U2_HMMER_SEARCH_SETTINGS_H_
+
+#include <U2Core/AnnotationCreationPattern.h>
+#include <U2Core/AnnotationTableObject.h>
+#include <U2Core/DNASequenceObject.h>
+
+namespace U2 {
+
+class AnnotationTableObject;
+
+class HmmerSearchSettings {
+public:
+ enum BitCutoffs {
+ None, // disabled
+ p7H_GA, // gathering thresholds available
+ p7H_TC, // trusted cutoffs available
+ p7H_NC // noise cutoffs available
+ };
+
+ HmmerSearchSettings();
+
+ bool validate() const;
+
+ double e; // -E: report sequences <= this e-value treshold in output
+ double t; // -T: report sequences >= this score treshold in output
+ double z; // -Z: set # of camparisons done, for e-value calculation
+ double domE; // --domE: report domains <= this e-value treshold in output
+ double domT; // --domT: report domains >= this score cutoff in output
+ double domZ; // --domZ: set number of significant seqs, for domain e-value calibration
+ BitCutoffs useBitCutoffs; // --cut_ga: use profile's GA gathering cutoffs to set -T, --domT
+ // --cut_nc: use profile's NC noising cutoffs to set -T, --domT
+ // --cut_tc: use profile's TC trusted cutoffs to set -T, --domT
+
+ double f1; // --F1: Stage 1 (MSV) threshold: promote hits w/ P <= F1
+ double f2; // --F2: Stage 2 (Vit) threshold: promote hits w/ P <= F2
+ double f3; // --F3: Stage 3 (Fwd) threshold: promote hits w/ P <= F3
+
+ bool doMax; // --max: Turn all heuristic filters off ( less speed more power )
+ bool noBiasFilter; // --nobias: turn off composition bias filter
+ bool noNull2; // --nonull2: turn off biased composition score corrections
+
+ int seed; // --seed : set RNG seed ( if 0: one-time arbitrary seed )
+
+ QString workingDir;
+ QString hmmProfileUrl;
+ QString sequenceUrl;
+ QPointer<U2SequenceObject> sequence;
+
+ QPointer<AnnotationTableObject> annotationTable;
+ AnnotationCreationPattern pattern;
+
+ static const double OPTION_NOT_SET;
+};
+
+} // namespace U2
+
+#endif // _U2_HMMER_SEARCH_SETTINGS_H_
diff --git a/src/plugins/external_tool_support/src/hmmer/HmmerSearchTask.cpp b/src/plugins/external_tool_support/src/hmmer/HmmerSearchTask.cpp
new file mode 100644
index 0000000..fdc933c
--- /dev/null
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerSearchTask.cpp
@@ -0,0 +1,242 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <QCoreApplication>
+#include <QDir>
+
+#include <U2Core/AppContext.h>
+#include <U2Core/AppResources.h>
+#include <U2Core/AppSettings.h>
+#include <U2Core/Counter.h>
+#include <U2Core/CreateAnnotationTask.h>
+#include <U2Core/DNASequenceObject.h>
+#include <U2Core/IOAdapterUtils.h>
+#include <U2Core/L10n.h>
+#include <U2Core/U2OpStatusUtils.h>
+#include <U2Core/U2SafePoints.h>
+#include <U2Core/UserApplicationsSettings.h>
+
+#include "HmmerParseSearchResultsTask.h"
+#include "HmmerSearchTask.h"
+#include "HmmerSupport.h"
+#include "utils/ExportTasks.h"
+
+namespace U2 {
+
+const QString HmmerSearchTask::INPUT_SEQUENCE_FILENAME = "input_sequence.fa";
+const QString HmmerSearchTask::PER_DOMAIN_HITS_FILENAME = "per_domain_hits.txt";
+
+HmmerSearchTask::HmmerSearchTask(const HmmerSearchSettings &settings)
+ : ExternalToolSupportTask(tr("HMMER search"), TaskFlags_NR_FOSE_COSC | TaskFlag_ReportingIsEnabled | TaskFlag_ReportingIsSupported),
+ settings(settings),
+ saveSequenceTask(NULL),
+ hmmerTask(NULL),
+ parseTask(NULL),
+ removeWorkingDir(false)
+{
+ GCOUNTER(cvar, tvar, "HMMER Search");
+ SAFE_POINT_EXT(settings.validate(), setError("Settings are invalid"), );
+}
+
+QList<SharedAnnotationData> HmmerSearchTask::getAnnotations() const {
+ CHECK(NULL != parseTask, QList<SharedAnnotationData>());
+ return parseTask->getAnnotations();
+}
+
+void HmmerSearchTask::prepare() {
+ prepareWorkingDir();
+
+ if (settings.sequenceUrl.isEmpty()) {
+ SAFE_POINT_EXT(NULL != settings.sequence, setError(L10N::nullPointerError("sequence object")), );
+ prepareSequenceSaveTask();
+ addSubTask(saveSequenceTask);
+ } else {
+ prepareHmmerTask();
+ addSubTask(hmmerTask);
+ }
+}
+
+QList<Task *> HmmerSearchTask::onSubTaskFinished(Task *subTask) {
+ QList<Task *> result;
+ CHECK_OP(stateInfo, result);
+
+ if (subTask == saveSequenceTask) {
+ prepareHmmerTask();
+ result << hmmerTask;
+ } else if (subTask == hmmerTask) {
+ prepareParseTask();
+ result << parseTask;
+ } else if (subTask == parseTask) {
+ removeTempDir();
+ Task *createAnnotationsTask = new CreateAnnotationsTask(settings.annotationTable, parseTask->getAnnotations(), settings.pattern.groupName);
+ createAnnotationsTask->setSubtaskProgressWeight(5);
+ result << createAnnotationsTask;
+ }
+
+ return result;
+}
+
+QString HmmerSearchTask::generateReport() const {
+ QString res;
+ res += "<table>";
+ res += "<tr><td><b>" + tr("HMM profile used: ") + "</b></td><td>" + QFileInfo(settings.hmmProfileUrl).absoluteFilePath() + "</td></tr>";
+
+ if (hasError() || isCanceled()) {
+ res += "<tr><td><b>" + tr("Task was not finished") + "</b></td><td></td></tr>";
+ res += "</table>";
+ return res;
+ }
+
+ if (NULL != settings.annotationTable && NULL != settings.annotationTable->getDocument()) {
+ res += "<tr><td><b>" + tr("Result annotation table: ") + "</b></td><td>" + settings.annotationTable->getDocument()->getName() + "</td></tr>";
+ }
+ res += "<tr><td><b>" + tr("Result annotation group: ") + "</b></td><td>" + settings.pattern.groupName + "</td></tr>";
+ res += "<tr><td><b>" + tr("Result annotation name: ") + "</b></td><td>" + settings.pattern.annotationName + "</td></tr>";
+
+ res += "<tr><td><b>" + tr("Results count: ") + "</b></td><td>" + QString::number(getAnnotations().size()) + "</td></tr>";
+ res += "</table>";
+ return res;
+}
+
+namespace {
+
+const QString HMMER_TEMP_DIR = "hmmer";
+
+QString getTaskTempDirName(const QString &prefix, Task *task) {
+ return prefix + QString::number(task->getTaskId()) + "_" +
+ QDate::currentDate().toString("dd.MM.yyyy") + "_" +
+ QTime::currentTime().toString("hh.mm.ss.zzz") + "_" +
+ QString::number(QCoreApplication::applicationPid());
+}
+
+}
+
+void HmmerSearchTask::prepareWorkingDir() {
+ if (settings.workingDir.isEmpty()) {
+ QString tempDirName = getTaskTempDirName("hmmer_search_", this);
+ settings.workingDir = AppContext::getAppSettings()->getUserAppsSettings()->getCurrentProcessTemporaryDirPath(HMMER_TEMP_DIR) + "/" + tempDirName;
+ removeWorkingDir = true;
+ }
+
+ QDir tempDir(settings.workingDir);
+ if (tempDir.exists()) {
+ ExternalToolSupportUtils::removeTmpDir(settings.workingDir, stateInfo);
+ CHECK_OP(stateInfo, );
+ }
+
+ if (!tempDir.mkpath(settings.workingDir)) {
+ setError(tr("Cannot create a directory for temporary files."));
+ return;
+ }
+}
+
+void HmmerSearchTask::removeTempDir() const {
+ CHECK(removeWorkingDir, );
+ U2OpStatusImpl os;
+ ExternalToolSupportUtils::removeTmpDir(settings.workingDir, os);
+}
+
+QStringList HmmerSearchTask::getArguments() const {
+ QStringList arguments;
+
+ arguments << "-E" << QString::number(settings.e);
+ if (HmmerSearchSettings::OPTION_NOT_SET != settings.t) {
+ arguments << "-T" << QString::number(settings.t);
+ }
+
+ if (HmmerSearchSettings::OPTION_NOT_SET != settings.z) {
+ arguments << "-Z" << QString::number(settings.z);
+ }
+
+ if (HmmerSearchSettings::OPTION_NOT_SET != settings.domE) {
+ arguments << "--domE" << QString::number(settings.domE);
+ }
+
+ if (HmmerSearchSettings::OPTION_NOT_SET != settings.domT) {
+ arguments << "--domT" << QString::number(settings.domT);
+ }
+
+ if (HmmerSearchSettings::OPTION_NOT_SET != settings.domZ) {
+ arguments << "--domZ" << QString::number(settings.domZ);
+ }
+
+ switch (settings.useBitCutoffs) {
+ case HmmerSearchSettings::None:
+ break;
+ case HmmerSearchSettings::p7H_GA:
+ arguments << "--cut_ga";
+ break;
+ case HmmerSearchSettings::p7H_TC:
+ arguments << "--cut_nc";
+ break;
+ case HmmerSearchSettings::p7H_NC:
+ arguments << "--cut_tc";
+ break;
+ default:
+ FAIL(tr("Unknown option controlling model-specific thresholding"), arguments);
+ }
+
+ arguments << "--F1" << QString::number(settings.f1);
+ arguments << "--F2" << QString::number(settings.f2);
+ arguments << "--F3" << QString::number(settings.f3);
+
+ if (settings.doMax) {
+ arguments << "--max";
+ }
+
+ if (settings.noBiasFilter) {
+ arguments << "--nobias";
+ }
+
+ if (settings.noNull2) {
+ arguments << "--nonull2";
+ }
+
+ arguments << "--seed" << QString::number(settings.seed);
+ arguments << "--cpu" << QString::number(AppContext::getAppSettings()->getAppResourcePool()->getIdealThreadCount());
+
+ arguments << "--noali";
+ arguments << "--domtblout" << settings.workingDir + "/" + PER_DOMAIN_HITS_FILENAME;
+
+ arguments << settings.hmmProfileUrl;
+ arguments << settings.sequenceUrl;
+
+ return arguments;
+}
+
+void HmmerSearchTask::prepareSequenceSaveTask() {
+ settings.sequenceUrl = settings.workingDir + "/" + INPUT_SEQUENCE_FILENAME;
+ saveSequenceTask = new SaveSequenceTask(settings.sequence, settings.sequenceUrl, BaseDocumentFormats::FASTA);
+ saveSequenceTask->setSubtaskProgressWeight(5);
+}
+
+void HmmerSearchTask::prepareHmmerTask() {
+ hmmerTask = new ExternalToolRunTask(HmmerSupport::SEARCH_TOOL, getArguments(), new Hmmer3LogParser());
+ setListenerForTask(hmmerTask);
+ hmmerTask->setSubtaskProgressWeight(85);
+}
+
+void HmmerSearchTask::prepareParseTask() {
+ parseTask = new HmmerParseSearchResultsTask(settings.workingDir + "/" + PER_DOMAIN_HITS_FILENAME, settings.pattern);
+ parseTask->setSubtaskProgressWeight(5);
+}
+
+} // namespace U2
diff --git a/src/plugins/external_tool_support/src/hmmer/HmmerSearchTask.h b/src/plugins/external_tool_support/src/hmmer/HmmerSearchTask.h
new file mode 100644
index 0000000..07a003a
--- /dev/null
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerSearchTask.h
@@ -0,0 +1,68 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef _U2_HMMER_SEARCH_TASK_H_
+#define _U2_HMMER_SEARCH_TASK_H_
+
+#include <U2Core/ExternalToolRunTask.h>
+
+#include "HmmerSearchSettings.h"
+
+namespace U2 {
+
+class AnnotationTableObject;
+class HmmerParseSearchResultsTask;
+class SaveSequenceTask;
+
+class HmmerSearchTask : public ExternalToolSupportTask {
+ Q_OBJECT
+public:
+ HmmerSearchTask(const HmmerSearchSettings &settings);
+
+ QList<SharedAnnotationData> getAnnotations() const;
+
+private:
+ void prepare();
+ QList<Task *> onSubTaskFinished(Task *subTask);
+ QString generateReport() const;
+
+ void prepareWorkingDir();
+ void removeTempDir() const;
+ QStringList getArguments() const;
+
+ void prepareSequenceSaveTask();
+ void prepareHmmerTask();
+ void prepareParseTask();
+
+ HmmerSearchSettings settings;
+
+ SaveSequenceTask *saveSequenceTask;
+ ExternalToolRunTask *hmmerTask;
+ HmmerParseSearchResultsTask *parseTask;
+ bool removeWorkingDir;
+
+ static const QString INPUT_SEQUENCE_FILENAME;
+ static const QString PER_DOMAIN_HITS_FILENAME;
+};
+
+} // namespace U2
+
+#endif // _U2_HMMER_SEARCH_TASK_H_
diff --git a/src/plugins/external_tool_support/src/hmmer/HmmerSearchWorker.cpp b/src/plugins/external_tool_support/src/hmmer/HmmerSearchWorker.cpp
new file mode 100644
index 0000000..cd39412
--- /dev/null
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerSearchWorker.cpp
@@ -0,0 +1,332 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <U2Core/AnnotationData.h>
+#include <U2Core/AppContext.h>
+#include <U2Core/DNAAlphabet.h>
+#include <U2Core/DNASequence.h>
+#include <U2Core/DNASequenceObject.h>
+#include <U2Core/DNATranslation.h>
+#include <U2Core/FailTask.h>
+#include <U2Core/Log.h>
+#include <U2Core/MultiTask.h>
+#include <U2Core/TaskSignalMapper.h>
+#include <U2Core/U2OpStatusUtils.h>
+
+#include <U2Designer/DelegateEditors.h>
+
+#include <U2Lang/ActorPrototypeRegistry.h>
+#include <U2Lang/BaseActorCategories.h>
+#include <U2Lang/BasePorts.h>
+#include <U2Lang/BaseSlots.h>
+#include <U2Lang/BaseTypes.h>
+#include <U2Lang/CoreLibConstants.h>
+#include <U2Lang/Datatype.h>
+#include <U2Lang/IntegralBusModel.h>
+#include <U2Lang/WorkflowEnv.h>
+#include <U2Lang/WorkflowMonitor.h>
+
+#include "HmmerSearchTask.h"
+#include "HmmerSearchWorker.h"
+#include "HmmerSupport.h"
+
+namespace U2 {
+namespace LocalWorkflow {
+
+/*******************************
+ * HMM3SearchWorkerFactory
+ *******************************/
+static const QString HMM_URL_PORT("in-hmm3");
+
+static const QString NAME_ATTR("result-name");
+static const QString DOM_E_ATTR("e-val");
+static const QString DOM_T_ATTR("score");
+static const QString SEED_ATTR("seed");
+static const QString FILTER_BY_ATTR("filter-by");
+
+static const QString FILTER_BY_E_VALUE_STRING ("E-value");
+static const QString FILTER_BY_SCORE_STRING ("Score");
+static const QString FILTER_BY_NONE_STRING("Do not filter results");
+
+static const QString FILTER_BY_E_VALUE("evalue");
+static const QString FILTER_BY_SCORE("score");
+static const QString FILTER_BY_NONE("none");
+
+const QString HmmerSearchWorkerFactory::ACTOR("hmm3-search");
+
+void HmmerSearchWorkerFactory::init() {
+ QList<PortDescriptor*> p;
+ QList<Attribute*> a;
+ {
+ Descriptor filterByDesc(FILTER_BY_ATTR,
+ HmmerSearchWorker::tr("Filter by"),
+ HmmerSearchWorker::tr("Parameter to filter results by."));
+ Descriptor hd(HMM_URL_PORT, HmmerSearchWorker::tr("HMMER profile"), HmmerSearchWorker::tr("HMMER profile(s) URL(s) to search with."));
+ Descriptor sd(BasePorts::IN_SEQ_PORT_ID(), HmmerSearchWorker::tr("Input sequence"),
+ HmmerSearchWorker::tr("An input sequence (nucleotide or protein) to search in."));
+ Descriptor od(BasePorts::OUT_ANNOTATIONS_PORT_ID(), HmmerSearchWorker::tr("HMMER annotations"),
+ HmmerSearchWorker::tr("Annotations marking found similar sequence regions."));
+
+ QMap<Descriptor, DataTypePtr> hmmM;
+ hmmM[BaseSlots::URL_SLOT()] = BaseTypes::STRING_TYPE();
+ p << new PortDescriptor(hd, DataTypePtr(new MapDataType("hmm.search.hmm", hmmM)), true /*input*/, false, IntegralBusPort::BLIND_INPUT);
+ QMap<Descriptor, DataTypePtr> seqM;
+ seqM[BaseSlots::DNA_SEQUENCE_SLOT()] = BaseTypes::DNA_SEQUENCE_TYPE();
+ p << new PortDescriptor(sd, DataTypePtr(new MapDataType("hmm.search.sequence", seqM)), true /*input*/);
+ QMap<Descriptor, DataTypePtr> outM;
+ outM[BaseSlots::ANNOTATION_TABLE_SLOT()] = BaseTypes::ANNOTATION_TABLE_TYPE();
+ p << new PortDescriptor(od, DataTypePtr(new MapDataType("hmm.search.out", outM)), false /*input*/, true);
+
+ Descriptor nd(NAME_ATTR, HmmerSearchWorker::tr("Result annotation"), HmmerSearchWorker::tr("A name of the result annotations."));
+ Descriptor nsd(SEED_ATTR, HmmerSearchWorker::tr("Seed"), HmmerSearchWorker::tr("Random generator seed. 0 - means that one-time arbitrary seed will be used."));
+ Descriptor ded(DOM_E_ATTR, HmmerSearchWorker::tr("Filter by high E-value"), HmmerSearchWorker::tr("Report domains with e-value less than."));
+ Descriptor dtd(DOM_T_ATTR, HmmerSearchWorker::tr("Filter by low score"), HmmerSearchWorker::tr("Report domains with score greater than."));
+
+ Attribute *evalue = new Attribute(ded, BaseTypes::NUM_TYPE(), false, QVariant((double)10.0));
+ Attribute *score = new Attribute(dtd, BaseTypes::NUM_TYPE(), false, QVariant((double)0.0));
+
+ Attribute *filterBy = new Attribute(filterByDesc, BaseTypes::STRING_TYPE(), true, FILTER_BY_NONE);
+ a << new Attribute(nd, BaseTypes::STRING_TYPE(), true, QVariant("hmm_signal"));
+ a << filterBy;
+ a << new Attribute(nsd, BaseTypes::NUM_TYPE(), false, QVariant(42));
+ a << evalue;
+ a << score;
+
+
+ evalue->addRelation(new VisibilityRelation(FILTER_BY_ATTR, FILTER_BY_E_VALUE));
+ score->addRelation(new VisibilityRelation(FILTER_BY_ATTR, FILTER_BY_SCORE));
+ }
+
+ Descriptor desc(HmmerSearchWorkerFactory::ACTOR, HmmerSearchWorker::tr("HMM3 Search"),
+ HmmerSearchWorker::tr("Searches each input sequence for significantly similar sequence matches to all specified HMM profiles."
+ " In case several profiles were supplied, searches with all profiles one by one and outputs united set of annotations for each sequence."));
+ ActorPrototype *proto = new IntegralBusActorPrototype(desc, p, a);
+ QMap<QString, PropertyDelegate *> delegates;
+
+ {
+ QVariantMap filterByValues;
+ filterByValues[FILTER_BY_E_VALUE_STRING] = FILTER_BY_E_VALUE;
+ filterByValues[FILTER_BY_SCORE_STRING] = FILTER_BY_SCORE;
+ filterByValues[FILTER_BY_NONE_STRING] = FILTER_BY_NONE;
+ delegates[FILTER_BY_ATTR] = new ComboBoxDelegate(filterByValues);
+ }
+
+ {
+ QVariantMap eMap;
+ eMap["decimals"]= (2);
+ eMap["minimum"] = (1e-99);
+ eMap["maximum"] = (1e+99);
+ eMap["singleStep"] = (0.1);
+ delegates[DOM_E_ATTR] = new DoubleSpinBoxDelegate(eMap);
+ }
+ {
+ QVariantMap nMap;
+ nMap["maximum"] = (INT_MAX);
+ nMap["minimum"] = (0);
+ delegates[SEED_ATTR] = new SpinBoxDelegate(nMap);
+ }
+ {
+ QVariantMap tMap;
+ tMap["decimals"]= (2);
+ tMap["minimum"] = (-1e+09);
+ tMap["maximum"] = (1e+09);
+ tMap["singleStep"] = (0.1);
+ delegates[DOM_T_ATTR] = new DoubleSpinBoxDelegate(tMap);
+ }
+
+ proto->setEditor(new DelegateEditor(delegates));
+ proto->setIconPath(":/external_tool_support/images/hmmer.png");
+ proto->setPrompter(new HmmerSearchPrompter());
+ proto->addExternalTool(HmmerSupport::SEARCH_TOOL);
+ WorkflowEnv::getProtoRegistry()->registerProto(Descriptor("hmmer3", HmmerSearchWorker::tr("HMMER3 Tools"), ""), proto);
+
+ DomainFactory *localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
+ localDomain->registerEntry(new HmmerSearchWorkerFactory());
+}
+
+HmmerSearchWorkerFactory::HmmerSearchWorkerFactory()
+ : DomainFactory(ACTOR)
+{
+
+}
+
+Worker * HmmerSearchWorkerFactory::createWorker(Actor *a) {
+ return new HmmerSearchWorker(a);
+}
+
+/*******************************
+ * HMM3SearchPrompter
+ *******************************/
+HmmerSearchPrompter::HmmerSearchPrompter(Actor *p)
+ : PrompterBase<HmmerSearchPrompter>(p)
+{
+
+}
+
+QString HmmerSearchPrompter::composeRichDoc() {
+ Actor *hmmProducer = qobject_cast<IntegralBusPort *>(target->getPort(HMM_URL_PORT))->getProducer(HMM_URL_PORT);
+ Actor *seqProducer = qobject_cast<IntegralBusPort *>(target->getPort(BasePorts::IN_SEQ_PORT_ID()))->getProducer(BasePorts::IN_SEQ_PORT_ID());
+
+ QString seqName = (seqProducer ? tr("For each sequence from <u>%1</u>,").arg(seqProducer->getLabel()) : "");
+ QString hmmName = (hmmProducer ? tr("using all profiles provided by <u>%1</u>,").arg(hmmProducer->getLabel()) : "");
+
+ QString resultName = getHyperlink(NAME_ATTR, getRequiredParam(NAME_ATTR));
+
+ QString doc = tr("%1 search HMMER signals %2. "
+ "<br>Output the list of found regions annotated as <u>%4</u>.")
+ .arg(seqName)
+ .arg(hmmName)
+ .arg(resultName);
+
+ return doc;
+}
+
+/*******************************
+ * HMM3SearchWorker
+ *******************************/
+HmmerSearchWorker::HmmerSearchWorker(Actor *a)
+ : BaseWorker(a, false),
+ hmmPort(NULL),
+ seqPort(NULL),
+ output(NULL)
+{
+
+}
+
+void HmmerSearchWorker::init() {
+ cfg = HmmerSearchSettings();
+
+ hmmPort = ports.value(HMM_URL_PORT);
+ seqPort = ports.value(BasePorts::IN_SEQ_PORT_ID());
+ output = ports.value(BasePorts::OUT_ANNOTATIONS_PORT_ID());
+ seqPort->addComplement(output);
+ output->addComplement(seqPort);
+
+ QString filterBy = actor->getParameter(FILTER_BY_ATTR)->getAttributeValue<QString>(context);
+ if (filterBy == FILTER_BY_E_VALUE) {
+ cfg.domE = actor->getParameter(DOM_E_ATTR)->getAttributeValue<double>(context);
+ cfg.domT = HmmerSearchSettings::OPTION_NOT_SET;
+ } else if (filterBy == FILTER_BY_SCORE) {
+ cfg.domT = actor->getParameter(DOM_T_ATTR)->getAttributeValue<double>(context);
+ cfg.domE = HmmerSearchSettings::OPTION_NOT_SET;
+ } else {
+ cfg.domE = HmmerSearchSettings::OPTION_NOT_SET;
+ cfg.domT = HmmerSearchSettings::OPTION_NOT_SET;
+ }
+
+ cfg.seed = actor->getParameter(SEED_ATTR)->getAttributeValue<int>(context);
+ resultName = actor->getParameter(NAME_ATTR)->getAttributeValue<QString>(context);
+ if (resultName.isEmpty()) {
+ algoLog.details(tr("Value for attribute name is empty, default name used"));
+ resultName = "hmm_signal";
+ }
+}
+
+bool HmmerSearchWorker::isReady() const {
+ if (isDone()) {
+ return false;
+ }
+ bool seqEnded = seqPort->isEnded();
+ bool hmmEnded = hmmPort->isEnded();
+ int seqHasMes = seqPort->hasMessage();
+ int hmmHasMes = hmmPort->hasMessage();
+ return hmmHasMes || (hmmEnded && (seqHasMes || seqEnded));
+}
+
+Task * HmmerSearchWorker::tick() {
+ while (hmmPort->hasMessage()) {
+ hmms << hmmPort->get().getData().toMap().value(BaseSlots::URL_SLOT().getId()).toString();
+ }
+ if (!hmmPort->isEnded()) { // || hmms.isEmpty() || !seqPort->hasMessage()
+ return NULL;
+ }
+
+ if (seqPort->hasMessage()) {
+ Message inputMessage = getMessageAndSetupScriptValues(seqPort);
+ if (inputMessage.isEmpty() || hmms.isEmpty()) {
+ output->transit();
+ return NULL;
+ }
+ SharedDbiDataHandler seqId = inputMessage.getData().toMap().value(BaseSlots::DNA_SEQUENCE_SLOT().getId()).value<SharedDbiDataHandler>();
+ QScopedPointer<U2SequenceObject> seqObj(StorageUtils::getSequenceObject(context->getDataStorage(), seqId));
+ if (NULL == seqObj) {
+ return NULL;
+ }
+
+ if (seqObj->getAlphabet()->getType() != DNAAlphabet_RAW) {
+ QList<Task *> subtasks;
+ HmmerSearchSettings settings = cfg;
+ foreach (const QString &hmmProfileUrl, hmms) {
+ settings.workingDir = monitor()->outputDir() + "hmmer_search";
+ settings.hmmProfileUrl = hmmProfileUrl;
+ settings.sequence = seqObj.data();
+ settings.pattern.annotationName = resultName;
+ settings.annotationTable = new AnnotationTableObject("Annotation table", context->getDataStorage()->getDbiRef());
+ HmmerSearchTask *searchTask = new HmmerSearchTask(settings);
+ settings.annotationTable->setParent(searchTask);
+ searchTask->addListeners(createLogListeners());
+ subtasks << searchTask;
+ }
+
+ Task *multiTask = new MultiTask(tr("Find HMMER signals in %1").arg(seqObj->getGObjectName()), subtasks);
+ connect(new TaskSignalMapper(multiTask), SIGNAL(si_taskFinished(Task *)), SLOT(sl_taskFinished(Task *)));
+ seqObj.take()->setParent(multiTask);
+ return multiTask;
+ }
+ QString err = tr("Bad sequence supplied to input: %1").arg(seqObj->getGObjectName());
+ return new FailTask(err);
+ } if (seqPort->isEnded()) {
+ setDone();
+ output->setEnded();
+ }
+ return NULL;
+}
+
+void HmmerSearchWorker::sl_taskFinished(Task *task) {
+ SAFE_POINT(NULL != task, "Invalid task is encountered", );
+ if (task->isCanceled()) {
+ return;
+ }
+ if (NULL != output) {
+ QList<SharedAnnotationData> list;
+
+ foreach(Task *sub, task->getSubtasks()) {
+ HmmerSearchTask *searchTask = qobject_cast<HmmerSearchTask *>(sub);
+ if (searchTask == NULL){
+ continue;
+ }
+ list << searchTask->getAnnotations();
+ }
+
+ CHECK(!list.isEmpty(), );
+
+ const SharedDbiDataHandler tableId = context->getDataStorage()->putAnnotationTable(list);
+ output->put(Message(BaseTypes::ANNOTATION_TABLE_TYPE(), qVariantFromValue<SharedDbiDataHandler>(tableId)));
+ algoLog.info(tr("Found %1 HMMER signals").arg(list.size()));
+ }
+}
+
+void HmmerSearchWorker::cleanup() {
+
+}
+
+} // namespace LocalWorkflow
+} // namespace U2
diff --git a/src/plugins_3rdparty/hmm3/src/workers/HMM3SearchWorker.h b/src/plugins/external_tool_support/src/hmmer/HmmerSearchWorker.h
similarity index 50%
rename from src/plugins_3rdparty/hmm3/src/workers/HMM3SearchWorker.h
rename to src/plugins/external_tool_support/src/hmmer/HmmerSearchWorker.h
index f02f46d..fda32b3 100644
--- a/src/plugins_3rdparty/hmm3/src/workers/HMM3SearchWorker.h
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerSearchWorker.h
@@ -19,64 +19,59 @@
* MA 02110-1301, USA.
*/
-#ifndef _U2_HMM3SEARCH_WORKER_H_
-#define _U2_HMM3SEARCH_WORKER_H_
+#ifndef _U2_HMMER_SEARCH_WORKER_H_
+#define _U2_HMMER_SEARCH_WORKER_H_
#include <U2Lang/LocalDomain.h>
#include <U2Lang/WorkflowUtils.h>
-#include "hmmer3/hmmer.h"
+#include "HmmerSearchSettings.h"
namespace U2 {
namespace LocalWorkflow {
-class HMM3SearchPrompter : public PrompterBase<HMM3SearchPrompter> {
+class HmmerSearchPrompter : public PrompterBase<HmmerSearchPrompter> {
Q_OBJECT
public:
- HMM3SearchPrompter(Actor* p = 0) : PrompterBase<HMM3SearchPrompter>(p) {}
+ HmmerSearchPrompter(Actor *p = NULL);
+
protected:
QString composeRichDoc();
};
-class HMM3SearchWorker : public BaseWorker {
+class HmmerSearchWorker : public BaseWorker {
Q_OBJECT
public:
- HMM3SearchWorker(Actor* a);
- virtual void init();
- virtual bool isReady() const;
- virtual Task* tick();
- virtual void cleanup();
+ HmmerSearchWorker(Actor *a);
- static const QString E_THRESHOLD;
- static const QString T_THRESHOLD;
- static const QString CUT_GA_THRESHOLD;
- static const QString CUT_NC_THRESHOLD;
- static const QString CUT_TC_THRESHOLD;;
+ void init();
+ bool isReady() const;
+ Task * tick();
+ void cleanup();
private slots:
- void sl_taskFinished(Task*);
-
-private:
- void initConfig();
+ void sl_taskFinished(Task *task);
protected:
- IntegralBus *hmmPort, *seqPort, *output;
+ IntegralBus *hmmPort;
+ IntegralBus *seqPort;
+ IntegralBus *output;
QString resultName;
- UHMM3SearchSettings cfg;
- QList<const P7_HMM*> hmms;
-
-};
+ HmmerSearchSettings cfg;
+ QStringList hmms;
+};
-class HMM3SearchWorkerFactory : public DomainFactory {
+class HmmerSearchWorkerFactory : public DomainFactory {
public:
static const QString ACTOR;
+
static void init();
- HMM3SearchWorkerFactory() : DomainFactory(ACTOR) {}
- virtual Worker* createWorker(Actor* a) {return new HMM3SearchWorker(a);}
+ HmmerSearchWorkerFactory();
+ Worker * createWorker(Actor *a);
};
-} // Workflow namespace
-} // U2 namespace
+} // namespace LocalWorkflow
+} // namespace U2
-#endif
+#endif //_U2_HMMER_SEARCH_WORKER_H_
diff --git a/src/plugins/external_tool_support/src/hmmer/HmmerSupport.cpp b/src/plugins/external_tool_support/src/hmmer/HmmerSupport.cpp
new file mode 100644
index 0000000..e842304
--- /dev/null
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerSupport.cpp
@@ -0,0 +1,380 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <QMainWindow>
+#include <QMessageBox>
+
+#include <U2Core/AppContext.h>
+#include <U2Core/DNASequenceObject.h>
+#include <U2Core/GObjectSelection.h>
+#include <U2Core/QObjectScopedPointer.h>
+
+#include <U2Gui/AppSettingsGUI.h>
+#include <U2Gui/GUIUtils.h>
+#include <U2Gui/ProjectView.h>
+#include <U2Gui/ToolsMenu.h>
+
+#include <U2View/ADVSequenceObjectContext.h>
+#include <U2View/ADVUtils.h>
+#include <U2View/AnnotatedDNAView.h>
+#include <U2View/AnnotatedDNAViewFactory.h>
+#include <U2View/MSAEditor.h>
+#include <U2View/MSAEditorFactory.h>
+
+#include "ExternalToolSupportSettingsController.h"
+#include "HmmerBuildDialog.h"
+#include "HmmerSearchDialog.h"
+#include "HmmerSupport.h"
+#include "PhmmerSearchDialog.h"
+
+namespace U2 {
+
+const QString HmmerSupport::BUILD_TOOL = "HMMER build";
+const QString HmmerSupport::SEARCH_TOOL = "HMMER search";
+const QString HmmerSupport::PHMMER_TOOL = "PHMMER search";
+
+HmmerSupport::HmmerSupport(const QString &name)
+ : ExternalTool(name)
+{
+ if (AppContext::getMainWindow()) {
+ icon = QIcon(":external_tool_support/images/cmdline.png");
+ grayIcon = QIcon(":external_tool_support/images/cmdline_gray.png");
+ warnIcon = QIcon(":external_tool_support/images/cmdline_warn.png");
+ }
+
+ toolKitName = "HMMER";
+ versionRegExp = QRegExp("HMMER (\\d+.\\d+.\\d+\\w?)");
+
+ if (name == BUILD_TOOL) {
+ initBuild();
+ }
+
+ if (name == SEARCH_TOOL) {
+ initSearch();
+ }
+
+ if (name == PHMMER_TOOL) {
+ initPhmmer();
+ }
+}
+
+void HmmerSupport::sl_buildProfile() {
+ if (!isToolSet(BUILD_TOOL)) {
+ return;
+ }
+
+ MAlignment ma;
+ MWMDIWindow *activeWindow = AppContext::getMainWindow()->getMDIManager()->getActiveWindow();
+ if (NULL != activeWindow) {
+ GObjectViewWindow *objectViewWindow = qobject_cast<GObjectViewWindow *>(activeWindow);
+ if (NULL != objectViewWindow) {
+ MSAEditor *msaEditor = qobject_cast<MSAEditor *>(objectViewWindow->getObjectView());
+ if (NULL != msaEditor) {
+ MAlignmentObject *maObj = msaEditor->getMSAObject();
+ if (maObj != NULL) {
+ ma = maObj->getMAlignment();
+ }
+ }
+ }
+ }
+ QWidget *parent = AppContext::getMainWindow()->getQMainWindow();
+
+ QObjectScopedPointer<HmmerBuildDialog> buildDialog = new HmmerBuildDialog(ma, parent);
+ buildDialog->exec();
+}
+
+namespace {
+
+U2SequenceObject * getDnaSequenceObject() {
+ U2SequenceObject *seqObj = NULL;
+ GObjectViewWindow *activeWindow = qobject_cast<GObjectViewWindow *>(AppContext::getMainWindow()->getMDIManager()->getActiveWindow());
+ if (NULL != activeWindow) {
+ AnnotatedDNAView *dnaView = qobject_cast<AnnotatedDNAView *>(activeWindow->getObjectView());
+ seqObj = (NULL != dnaView ? dnaView->getSequenceInFocus()->getSequenceObject() : NULL);
+ }
+
+ if (NULL == seqObj) {
+ ProjectView *projectView = AppContext::getProjectView();
+ if (NULL != projectView) {
+ const GObjectSelection *objSelection = projectView->getGObjectSelection();
+ GObject *obj = (objSelection->getSelectedObjects().size() == 1 ? objSelection->getSelectedObjects().first() : NULL);
+ seqObj = qobject_cast<U2SequenceObject *>(obj);
+ }
+ }
+
+ return seqObj;
+}
+
+}
+
+void HmmerSupport::sl_search() {
+ if (!isToolSet(SEARCH_TOOL)) {
+ return;
+ }
+
+ U2SequenceObject *seqObj = getDnaSequenceObject();
+ if (NULL == seqObj) {
+ QMessageBox::critical(NULL, tr("Error!"), tr("Target sequence not selected: no opened annotated dna view"));
+ return;
+ }
+
+ QWidget *parent = AppContext::getMainWindow()->getQMainWindow();
+ QObjectScopedPointer<HmmerSearchDialog> searchDlg = new HmmerSearchDialog(seqObj, parent);
+ searchDlg->exec();
+}
+
+void HmmerSupport::sl_phmmerSearch() {
+ if (!isToolSet(PHMMER_TOOL)) {
+ return;
+ }
+
+ U2SequenceObject *seqObj = getDnaSequenceObject();
+ if (NULL == seqObj) {
+ QMessageBox::critical(NULL, tr("Error!"), tr("Target sequence not selected: no opened annotated dna view"));
+ return;
+ }
+ QWidget *parent = AppContext::getMainWindow()->getQMainWindow();
+ QObjectScopedPointer<PhmmerSearchDialog> phmmerDialog = new PhmmerSearchDialog(seqObj, parent);
+ phmmerDialog->exec();
+}
+
+void HmmerSupport::initBuild() {
+#ifdef Q_OS_WIN
+ executableFileName = "hmmbuild.exe";
+#elif defined(Q_OS_UNIX)
+ executableFileName = "hmmbuild";
+#endif
+
+ validationArguments << "-h";
+ validMessage = "hmmbuild";
+ description = tr("<i>HMMER build</i> constructs HMM profiles from multiple sequence alignments.");
+
+ MainWindow *mainWindow = AppContext::getMainWindow();
+ if (NULL != mainWindow) {
+ QAction *buildAction = new QAction(tr("Build HMM3 profile..."), this);
+ buildAction->setObjectName(ToolsMenu::HMMER_BUILD3);
+ connect(buildAction, SIGNAL(triggered()), SLOT(sl_buildProfile()));
+ ToolsMenu::addAction(ToolsMenu::HMMER_MENU, buildAction);
+ }
+}
+
+void HmmerSupport::initSearch() {
+#ifdef Q_OS_WIN
+ executableFileName = "hmmsearch.exe";
+#elif defined(Q_OS_UNIX)
+ executableFileName = "hmmsearch";
+#endif
+
+ validationArguments << "-h";
+ validMessage = "hmmsearch";
+ description = tr("<i>HMMER search</i> searches profile(s) against a sequence database.");
+
+ MainWindow *mainWindow = AppContext::getMainWindow();
+ if (NULL != mainWindow) {
+ QAction *searchAction = new QAction(tr("Search with HMMER3..."), this);
+ searchAction->setObjectName(ToolsMenu::HMMER_SEARCH3);
+ connect(searchAction, SIGNAL(triggered()), SLOT(sl_search()));
+ ToolsMenu::addAction(ToolsMenu::HMMER_MENU, searchAction);
+ }
+}
+
+void HmmerSupport::initPhmmer() {
+#ifdef Q_OS_WIN
+ executableFileName = "phmmer.exe";
+#elif defined(Q_OS_UNIX)
+ executableFileName = "phmmer";
+#endif
+
+ validationArguments << "-h";
+ validMessage = "phmmer";
+ description = tr("<i>PHMMER search</i> searches a protein sequence against a protein database.");
+
+ MainWindow *mainWindow = AppContext::getMainWindow();
+ if (NULL != mainWindow) {
+ QAction *searchAction = new QAction(tr("Search with phmmer..."), this);
+ searchAction->setObjectName(ToolsMenu::HMMER_SEARCH3P);
+ connect(searchAction, SIGNAL(triggered()), SLOT(sl_phmmerSearch()));
+ ToolsMenu::addAction(ToolsMenu::HMMER_MENU, searchAction);
+ }
+}
+
+bool HmmerSupport::isToolSet(const QString &name) const {
+ if (path.isEmpty()){
+ QObjectScopedPointer<QMessageBox> msgBox = new QMessageBox;
+ msgBox->setWindowTitle(name);
+ msgBox->setText(tr("Path for %1 tool not selected.").arg(name));
+ msgBox->setInformativeText(tr("Do you want to select it now?"));
+ msgBox->setStandardButtons(QMessageBox::Yes | QMessageBox::No);
+ msgBox->setDefaultButton(QMessageBox::Yes);
+ const int ret = msgBox->exec();
+ CHECK(!msgBox.isNull(), false);
+
+ switch (ret) {
+ case QMessageBox::Yes:
+ AppContext::getAppSettingsGUI()->showSettingsDialog(ExternalToolSupportSettingsPageId);
+ break;
+ case QMessageBox::No:
+ return false;
+ break;
+ default:
+ assert(false);
+ break;
+ }
+ }
+
+ if (path.isEmpty()) {
+ return false;
+ }
+
+ return true;
+}
+
+HmmerMsaEditorContext::HmmerMsaEditorContext(QObject *parent)
+ : GObjectViewWindowContext(parent, MSAEditorFactory::ID)
+{
+
+}
+
+void HmmerMsaEditorContext::initViewContext(GObjectView *view) {
+ MSAEditor *msaEditor = qobject_cast<MSAEditor *>(view);
+ SAFE_POINT(NULL != msaEditor, "Msa Editor is NULL", );
+ CHECK(NULL != msaEditor->getMSAObject(), );
+
+ GObjectViewAction *action = new GObjectViewAction(this, view, tr("Build HMMER3 profile"));
+ action->setObjectName("Build HMMER3 profile");
+ action->setIcon(QIcon(":/external_tool_support/images/hmmer.png"));
+ connect(action, SIGNAL(triggered()), SLOT(sl_build()));
+ addViewAction(action);
+}
+
+void HmmerMsaEditorContext::buildMenu(GObjectView *view, QMenu *menu) {
+ MSAEditor *msaEditor = qobject_cast<MSAEditor *>(view);
+ SAFE_POINT(NULL != msaEditor, "Msa Editor is NULL", );
+ SAFE_POINT(NULL != menu, "Menu is NULL", );
+ CHECK(NULL != msaEditor->getMSAObject(), );
+
+ QList<GObjectViewAction *> list = getViewActions(view);
+ SAFE_POINT(1 == list.size(), "List size is incorrect", );
+ QMenu *advancedMenu = GUIUtils::findSubMenu(menu, MSAE_MENU_ADVANCED);
+ SAFE_POINT(advancedMenu != NULL, "menu 'Advanced' is NULL", );
+ advancedMenu->addAction(list.first());
+}
+
+void HmmerMsaEditorContext::sl_build() {
+ GObjectViewAction *action = qobject_cast<GObjectViewAction *>(sender());
+ SAFE_POINT(NULL != action, "action is NULL", );
+ MSAEditor *msaEditor = qobject_cast<MSAEditor *>(action->getObjectView());
+ SAFE_POINT(NULL != msaEditor, "Msa Editor is NULL", );
+
+ MAlignmentObject *obj = msaEditor->getMSAObject();
+ if (obj != NULL) {
+ QObjectScopedPointer<HmmerBuildDialog> buildDlg = new HmmerBuildDialog(obj->getMAlignment());
+ buildDlg->exec();
+ CHECK(!buildDlg.isNull(), );
+ }
+}
+
+HmmerAdvContext::HmmerAdvContext(QObject *parent) :
+ GObjectViewWindowContext(parent, AnnotatedDNAViewFactory::ID) {
+
+}
+
+void HmmerAdvContext::initViewContext(GObjectView *view) {
+ AnnotatedDNAView *adv = qobject_cast<AnnotatedDNAView *>(view);
+ SAFE_POINT(NULL != adv, "AnnotatedDNAView is NULL", );
+
+ ADVGlobalAction *searchAction = new ADVGlobalAction(adv, QIcon(":/external_tool_support/images/hmmer.png"), tr("Find HMM signals with HMMER3..."), 70);
+ searchAction->setObjectName("Find HMM signals with HMMER3");
+ connect(searchAction, SIGNAL(triggered()), SLOT(sl_search()));
+}
+
+void HmmerAdvContext::sl_search() {
+ QWidget *parent = getParentWidget(sender());
+ assert(NULL != parent);
+ U2SequenceObject *seqObj = getSequenceInFocus(sender());
+ if (NULL == seqObj) {
+ QMessageBox::critical(parent, tr("error"), tr("No sequence in focus found"));
+ return;
+ }
+
+ QObjectScopedPointer<HmmerSearchDialog> searchDlg = new HmmerSearchDialog(seqObj, parent);
+ searchDlg->exec();
+}
+
+QWidget * HmmerAdvContext::getParentWidget(QObject *sender) {
+ GObjectViewAction *action = qobject_cast<GObjectViewAction *>(sender);
+ SAFE_POINT(NULL != action, "action is NULL", NULL);
+ AnnotatedDNAView *adv = qobject_cast<AnnotatedDNAView *>(action->getObjectView());
+ SAFE_POINT(NULL != adv, "AnnotatedDNAView is NULL", NULL);
+
+ if (adv->getWidget()) {
+ return adv->getWidget();
+ } else {
+ return AppContext::getMainWindow()->getQMainWindow();
+ }
+}
+
+U2SequenceObject * HmmerAdvContext::getSequenceInFocus(QObject *sender) {
+ GObjectViewAction *action = qobject_cast<GObjectViewAction *>(sender);
+ SAFE_POINT(NULL != action, "action is NULL", NULL);
+ AnnotatedDNAView *adv = qobject_cast<AnnotatedDNAView *>(action->getObjectView());
+ SAFE_POINT(NULL != adv, "AnnotatedDNAView is NULL", NULL);
+ ADVSequenceObjectContext *seqCtx = adv->getSequenceInFocus();
+ if (NULL == seqCtx) {
+ return NULL;
+ }
+ return seqCtx->getSequenceObject();
+}
+
+HmmerContext::HmmerContext(QObject *parent) :
+ QObject(parent),
+ msaEditorContext(NULL),
+ advContext(NULL)
+{
+
+}
+
+void HmmerContext::init() {
+ msaEditorContext = new HmmerMsaEditorContext(this);
+ advContext = new HmmerAdvContext(this);
+
+ msaEditorContext->init();
+ advContext->init();
+}
+
+Hmmer3LogParser::Hmmer3LogParser() {
+
+}
+
+void Hmmer3LogParser::parseErrOutput(const QString& partOfLog) {
+ lastPartOfLog = partOfLog.split(QRegExp("(\n|\r)"));
+ lastPartOfLog.first() = lastErrLine + lastPartOfLog.first();
+ lastErrLine = lastPartOfLog.takeLast();
+
+ foreach(const QString &buf, lastPartOfLog) {
+ if (!buf.isEmpty()) {
+ algoLog.error("Hmmer3: " + buf);
+ setLastError(buf);
+ }
+ }
+}
+
+} // namespace U2
diff --git a/src/plugins/external_tool_support/src/hmmer/HmmerSupport.h b/src/plugins/external_tool_support/src/hmmer/HmmerSupport.h
new file mode 100644
index 0000000..f8e9fc0
--- /dev/null
+++ b/src/plugins/external_tool_support/src/hmmer/HmmerSupport.h
@@ -0,0 +1,107 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef _U2_HMMER_SUPPORT_H_
+#define _U2_HMMER_SUPPORT_H_
+
+#include <U2Core/ExternalToolRegistry.h>
+#include <U2Core/ExternalToolRunTask.h>
+
+#include <U2Gui/ObjectViewModel.h>
+
+namespace U2 {
+
+class U2SequenceObject;
+
+class HmmerSupport : public ExternalTool {
+ Q_OBJECT
+public:
+ HmmerSupport(const QString &name);
+
+ static const QString BUILD_TOOL;
+ static const QString SEARCH_TOOL;
+ static const QString PHMMER_TOOL;
+
+private slots:
+ void sl_buildProfile();
+ void sl_search();
+ void sl_phmmerSearch();
+
+private:
+ void initBuild();
+ void initSearch();
+ void initPhmmer();
+
+ bool isToolSet(const QString &name) const;
+};
+
+class HmmerMsaEditorContext : public GObjectViewWindowContext {
+ Q_OBJECT
+public:
+ HmmerMsaEditorContext(QObject *parent);
+
+private slots:
+ void sl_build();
+
+private:
+ void initViewContext(GObjectView *view);
+ void buildMenu(GObjectView *view, QMenu *menu);
+};
+
+class HmmerAdvContext : public GObjectViewWindowContext {
+ Q_OBJECT
+public:
+ HmmerAdvContext(QObject *parent);
+
+private slots:
+ void sl_search();
+
+private:
+ void initViewContext(GObjectView *view);
+
+ QWidget * getParentWidget(QObject *sender);
+ U2SequenceObject * getSequenceInFocus(QObject *sender);
+};
+
+class HmmerContext : public QObject {
+public:
+ HmmerContext(QObject *parent);
+
+ void init();
+
+private:
+ HmmerMsaEditorContext *msaEditorContext;
+ HmmerAdvContext *advContext;
+};
+
+class Hmmer3LogParser : public ExternalToolLogParser {
+ Q_OBJECT
+ Q_DISABLE_COPY(Hmmer3LogParser)
+public:
+ Hmmer3LogParser();
+ void parseErrOutput(const QString& partOfLog);
+private:
+ QString lastErrLine;
+};
+
+} // namespace U2
+
+#endif // _U2_HMMER_SUPPORT_H_
diff --git a/src/plugins/external_tool_support/src/hmmer/PhmmerSearchDialog.cpp b/src/plugins/external_tool_support/src/hmmer/PhmmerSearchDialog.cpp
new file mode 100644
index 0000000..706d1df
--- /dev/null
+++ b/src/plugins/external_tool_support/src/hmmer/PhmmerSearchDialog.cpp
@@ -0,0 +1,223 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <math.h>
+
+#include <QMessageBox>
+#include <QPushButton>
+
+#include <U2Core/AppContext.h>
+#include <U2Core/DNAAlphabet.h>
+#include <U2Core/GObjectTypes.h>
+#include <U2Core/L10n.h>
+#include <U2Core/U2OpStatusUtils.h>
+#include <U2Core/U2SafePoints.h>
+
+#include <U2Gui/DialogUtils.h>
+#include <U2Gui/HelpButton.h>
+#include <U2Gui/LastUsedDirHelper.h>
+#include <U2Gui/U2FileDialog.h>
+
+#include "PhmmerSearchTask.h"
+#include "PhmmerSearchDialog.h"
+
+namespace U2 {
+
+const QString PhmmerSearchDialog::QUERY_FILES_DIR = "uhmm3_phmmer_query_files_dir";
+const QString PhmmerSearchDialog::DOM_E_PLUS_PREFIX = "1E+";
+const QString PhmmerSearchDialog::DOM_E_MINUS_PREFIX = "1E";
+const QString PhmmerSearchDialog::ANNOTATIONS_DEFAULT_NAME = "signal";
+
+PhmmerSearchDialog::PhmmerSearchDialog(U2SequenceObject *seqObj, QWidget *parent)
+ : QDialog(parent)
+{
+ assert(NULL != seqObj);
+ setupUi(this);
+
+ new HelpButton(this, buttonBox, "18220561");
+ buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Search"));
+ buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
+
+ U2OpStatusImpl os;
+ model.dbSequence = seqObj;
+ SAFE_POINT_EXT(!os.hasError(), QMessageBox::critical(QApplication::activeWindow(), L10N::errorTitle(), os.getError()), );
+ setModelValues(); // default model here
+
+ // Annotations widget
+ CreateAnnotationModel annModel;
+ annModel.hideLocation = true;
+ annModel.sequenceObjectRef = seqObj;
+ annModel.useAminoAnnotationTypes = seqObj->getAlphabet()->isAmino();
+ annModel.data->type = U2FeatureTypes::MiscSignal;
+ annModel.data->name = ANNOTATIONS_DEFAULT_NAME;
+ annModel.sequenceLen = seqObj->getSequenceLength();
+ annotationsWidgetController = new CreateAnnotationWidgetController(annModel, this);
+
+ QWidget *firstTab = mainTabWidget->widget(0);
+ assert(NULL != firstTab);
+ QVBoxLayout *curLayout = qobject_cast<QVBoxLayout *>(firstTab->layout());
+ assert(NULL != curLayout);
+ curLayout->insertWidget(ANNOTATIONS_WIDGET_LOCATION, annotationsWidgetController->getWidget());
+
+ connect(queryToolButton, SIGNAL(clicked()), SLOT(sl_queryToolButtonClicked()));
+ connect(useEvalTresholdsButton, SIGNAL(toggled(bool)), SLOT(sl_useEvalTresholdsButtonChanged(bool)));
+ connect(useScoreTresholdsButton, SIGNAL(toggled(bool)), SLOT(sl_useScoreTresholdsButtonChanged(bool)));
+ connect(domZCheckBox, SIGNAL(stateChanged(int)), SLOT(sl_domZCheckBoxChanged(int)));
+ connect(maxCheckBox, SIGNAL(stateChanged(int)), SLOT(sl_maxCheckBoxChanged(int)));
+ connect(domESpinBox, SIGNAL(valueChanged(int)), SLOT(sl_domESpinBoxChanged(int)));
+
+ adjustSize();
+}
+
+void PhmmerSearchDialog::setModelValues() {
+ domESpinBox->setValue(1);
+ assert(10.0 == model.phmmerSettings.domE);
+ scoreTresholdDoubleSpin->setValue(model.phmmerSettings.domT);
+ f1DoubleSpinBox->setValue(model.phmmerSettings.f1);
+ f2DoubleSpinBox->setValue(model.phmmerSettings.f2);
+ f3DoubleSpinBox->setValue(model.phmmerSettings.f3);
+ seedSpinBox->setValue(model.phmmerSettings.seed);
+ emlSpinBox->setValue(model.phmmerSettings.eml);
+ emnSpinBox->setValue(model.phmmerSettings.emn);
+ evlSpinBox->setValue(model.phmmerSettings.evl);
+ evnSpinBox->setValue(model.phmmerSettings.evn);
+ eflSpinBox->setValue(model.phmmerSettings.efl);
+ efnSpinBox->setValue(model.phmmerSettings.efn);
+ eftDoubleSpinBox->setValue(model.phmmerSettings.eft);
+ popenDoubleSpinBox->setValue(model.phmmerSettings.popen);
+ pextendDoubleSpinBox->setValue(model.phmmerSettings.pextend);
+}
+
+void PhmmerSearchDialog::sl_queryToolButtonClicked() {
+ LastUsedDirHelper helper(QUERY_FILES_DIR);
+ helper.url = U2FileDialog::getOpenFileName(this, tr("Select query sequence file"),
+ helper, DialogUtils::prepareDocumentsFileFilterByObjType(GObjectTypes::SEQUENCE, true));
+ if (!helper.url.isEmpty()) {
+ queryLineEdit->setText(helper.url);
+ }
+}
+
+void PhmmerSearchDialog::getModelValues() {
+ if (useEvalTresholdsButton->isChecked()) {
+ model.phmmerSettings.domE = pow(10.0, domESpinBox->value());
+ model.phmmerSettings.domT = PhmmerSearchSettings::OPTION_NOT_SET;
+ } else if (useScoreTresholdsButton->isChecked()) {
+ model.phmmerSettings.domT = scoreTresholdDoubleSpin->value();
+ } else {
+ assert(false);
+ }
+
+ model.phmmerSettings.popen = popenDoubleSpinBox->value();
+ model.phmmerSettings.pextend = pextendDoubleSpinBox->value();
+
+ model.phmmerSettings.noBiasFilter = nobiasCheckBox->isChecked();
+ model.phmmerSettings.noNull2 = nonull2CheckBox->isChecked();
+ model.phmmerSettings.doMax = maxCheckBox->isChecked();
+ model.phmmerSettings.f1 = f1DoubleSpinBox->value();
+ model.phmmerSettings.f2 = f2DoubleSpinBox->value();
+ model.phmmerSettings.f3 = f3DoubleSpinBox->value();
+
+ model.phmmerSettings.eml = emlSpinBox->value();
+ model.phmmerSettings.emn = emnSpinBox->value();
+ model.phmmerSettings.evl = evlSpinBox->value();
+ model.phmmerSettings.evn = evnSpinBox->value();
+ model.phmmerSettings.efl = eflSpinBox->value();
+ model.phmmerSettings.efn = efnSpinBox->value();
+ model.phmmerSettings.eft = eftDoubleSpinBox->value();
+ model.phmmerSettings.seed = seedSpinBox->value();
+
+ const CreateAnnotationModel &annModel = annotationsWidgetController->getModel();
+ model.phmmerSettings.pattern = annotationsWidgetController->getAnnotationPattern();
+ model.phmmerSettings.annotationTable = annModel.getAnnotationObject();
+ model.phmmerSettings.querySequenceUrl = queryLineEdit->text();
+ model.phmmerSettings.targetSequence = model.dbSequence;
+ model.phmmerSettings.pattern.groupName = annModel.groupName;
+}
+
+QString PhmmerSearchDialog::checkModel() {
+ QString ret;
+
+ if (model.phmmerSettings.querySequenceUrl.isEmpty()) {
+ ret = tr("Query sequence file path is empty");
+ queryLineEdit->setFocus();
+ return ret;
+ }
+
+ ret = annotationsWidgetController->validate();
+ if (!ret.isEmpty()) {
+ return ret;
+ }
+
+ assert(model.phmmerSettings.validate());
+
+ return ret;
+}
+
+void PhmmerSearchDialog::accept() {
+ bool objectPrepared = annotationsWidgetController->prepareAnnotationObject();
+ if (!objectPrepared) {
+ QMessageBox::warning(this, tr("Error"), tr("Cannot create an annotation object. Please check settings"));
+ return;
+ }
+
+ getModelValues();
+ QString err = checkModel();
+ if (!err.isEmpty()) {
+ QMessageBox::critical(this, tr("Error: bad arguments!"), err);
+ return;
+ }
+
+ AppContext::getTaskScheduler()->registerTopLevelTask(new PhmmerSearchTask(model.phmmerSettings));
+
+ QDialog::accept();
+}
+
+void PhmmerSearchDialog::sl_useEvalTresholdsButtonChanged(bool checked) {
+ domESpinBox->setEnabled(checked);
+}
+
+void PhmmerSearchDialog::sl_useScoreTresholdsButtonChanged(bool checked) {
+ scoreTresholdDoubleSpin->setEnabled(checked);
+}
+
+void PhmmerSearchDialog::sl_domZCheckBoxChanged(int state) {
+ assert(Qt::PartiallyChecked != state);
+ bool checked = Qt::Checked == state;
+ domZDoubleSpinBox->setEnabled(checked);
+}
+
+void PhmmerSearchDialog::sl_maxCheckBoxChanged(int state) {
+ assert(Qt::PartiallyChecked != state);
+ bool unchecked = Qt::Unchecked == state;
+ f1Label->setEnabled(unchecked);
+ f2Label->setEnabled(unchecked);
+ f3Label->setEnabled(unchecked);
+ f1DoubleSpinBox->setEnabled(unchecked);
+ f2DoubleSpinBox->setEnabled(unchecked);
+ f3DoubleSpinBox->setEnabled(unchecked);
+}
+
+void PhmmerSearchDialog::sl_domESpinBoxChanged(int newVal) {
+ const QString & prefix = (0 <= newVal ? DOM_E_PLUS_PREFIX : DOM_E_MINUS_PREFIX);
+ domESpinBox->setPrefix(prefix);
+}
+
+} // namespace U2
diff --git a/src/plugins_3rdparty/hmm3/src/phmmer/uHMM3PhmmerDialogImpl.h b/src/plugins/external_tool_support/src/hmmer/PhmmerSearchDialog.h
similarity index 56%
rename from src/plugins_3rdparty/hmm3/src/phmmer/uHMM3PhmmerDialogImpl.h
rename to src/plugins/external_tool_support/src/hmmer/PhmmerSearchDialog.h
index 7950ff9..e0b3784 100644
--- a/src/plugins_3rdparty/hmm3/src/phmmer/uHMM3PhmmerDialogImpl.h
+++ b/src/plugins/external_tool_support/src/hmmer/PhmmerSearchDialog.h
@@ -19,63 +19,54 @@
* MA 02110-1301, USA.
*/
-#ifndef _GB2_UHMM3_PHMMER_DIALOG_IMPL_H_
-#define _GB2_UHMM3_PHMMER_DIALOG_IMPL_H_
+#ifndef _U2_PHMMER_SEARCH_DIALOG_H_
+#define _U2_PHMMER_SEARCH_DIALOG_H_
#include <U2Core/DNASequenceObject.h>
#include <U2Core/DNASequence.h>
-#include <U2Gui/CreateAnnotationWidgetController.h>
-#include <phmmer/uhmm3phmmer.h>
-#include <ui_UHMM3PhmmerDialog.h>
+#include <U2Gui/CreateAnnotationWidgetController.h>
-#if (QT_VERSION < 0x050000) //Qt 5
-#include <QtGui/QDialog>
-#else
-#include <QtWidgets/QDialog>
-#endif
+#include "PhmmerSearchSettings.h"
+#include "ui_PhmmerSearchDialog.h"
namespace U2 {
-class UHMM3PhmmerDialogModel {
+class PhmmerSearchDialogModel {
public:
- UHMM3PhmmerSettings phmmerSettings;
- QString queryfile;
- DNASequence dbSequence;
-}; // UHMM3PhmmerDialogModel
+ PhmmerSearchSettings phmmerSettings;
+ QPointer<U2SequenceObject> dbSequence;
+};
-class UHMM3PhmmerDialogImpl : public QDialog, public Ui_UHMM3PhmmerDialog {
+class PhmmerSearchDialog : public QDialog, public Ui_PhmmerSearchDialog {
Q_OBJECT
-private:
- static const QString QUERY_FILES_DIR;
- static const QString DOM_E_PLUS_PREFIX;
- static const QString DOM_E_MINUS_PREFIX;
- static const QString ANNOTATIONS_DEFAULT_NAME;
- static const int ANNOTATIONS_WIDGET_LOCATION = 1;
public:
- UHMM3PhmmerDialogImpl( const U2SequenceObject * seqObj, QWidget * p = NULL );
-
-private:
- void setModelValues();
- void getModelValues();
- QString checkModel();
+ PhmmerSearchDialog(U2SequenceObject *seqObj, QWidget *parent = NULL);
private slots:
+ void accept();
void sl_queryToolButtonClicked();
- void sl_cancelButtonClicked();
- void sl_okButtonClicked();
- void sl_useEvalTresholdsButtonChanged( bool checked );
- void sl_useScoreTresholdsButtonChanged( bool checked );
- void sl_domZCheckBoxChanged( int state );
- void sl_maxCheckBoxChanged( int state );
- void sl_domESpinBoxChanged( int newVal );
+ void sl_useEvalTresholdsButtonChanged(bool checked);
+ void sl_useScoreTresholdsButtonChanged(bool checked);
+ void sl_domZCheckBoxChanged(int state);
+ void sl_maxCheckBoxChanged(int state);
+ void sl_domESpinBoxChanged(int newVal);
private:
- UHMM3PhmmerDialogModel model;
- CreateAnnotationWidgetController * annotationsWidgetController;
-
-}; // UHMM3PhmmerDialogImpl
+ void setModelValues();
+ void getModelValues();
+ QString checkModel();
+
+ PhmmerSearchDialogModel model;
+ CreateAnnotationWidgetController *annotationsWidgetController;
+
+ static const QString QUERY_FILES_DIR;
+ static const QString DOM_E_PLUS_PREFIX;
+ static const QString DOM_E_MINUS_PREFIX;
+ static const QString ANNOTATIONS_DEFAULT_NAME;
+ static const int ANNOTATIONS_WIDGET_LOCATION = 1;
+};
-} // U2
+} // namespace U2
-#endif // _GB2_UHMM3_PHMMER_DIALOG_IMPL_H_
+#endif // _U2_PHMMER_SEARCH_DIALOG_H_
diff --git a/src/plugins_3rdparty/hmm3/src/phmmer/UHMM3PhmmerDialog.ui b/src/plugins/external_tool_support/src/hmmer/PhmmerSearchDialog.ui
similarity index 96%
rename from src/plugins_3rdparty/hmm3/src/phmmer/UHMM3PhmmerDialog.ui
rename to src/plugins/external_tool_support/src/hmmer/PhmmerSearchDialog.ui
index 68463c4..c4eae79 100644
--- a/src/plugins_3rdparty/hmm3/src/phmmer/UHMM3PhmmerDialog.ui
+++ b/src/plugins/external_tool_support/src/hmmer/PhmmerSearchDialog.ui
@@ -1,13 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
- <class>UHMM3PhmmerDialog</class>
- <widget class="QDialog" name="UHMM3PhmmerDialog">
+ <class>PhmmerSearchDialog</class>
+ <widget class="QDialog" name="PhmmerSearchDialog">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
- <width>587</width>
- <height>290</height>
+ <width>593</width>
+ <height>295</height>
</rect>
</property>
<property name="minimumSize">
@@ -29,7 +29,7 @@
</size>
</property>
<property name="currentIndex">
- <number>5</number>
+ <number>0</number>
</property>
<widget class="QWidget" name="inputAndOutputTab">
<attribute name="title">
@@ -777,5 +777,38 @@
</layout>
</widget>
<resources/>
- <connections/>
+ <connections>
+ <connection>
+ <sender>buttonBox</sender>
+ <signal>accepted()</signal>
+ <receiver>PhmmerSearchDialog</receiver>
+ <slot>accept()</slot>
+ <hints>
+ <hint type="sourcelabel">
+ <x>296</x>
+ <y>274</y>
+ </hint>
+ <hint type="destinationlabel">
+ <x>296</x>
+ <y>147</y>
+ </hint>
+ </hints>
+ </connection>
+ <connection>
+ <sender>buttonBox</sender>
+ <signal>rejected()</signal>
+ <receiver>PhmmerSearchDialog</receiver>
+ <slot>reject()</slot>
+ <hints>
+ <hint type="sourcelabel">
+ <x>296</x>
+ <y>274</y>
+ </hint>
+ <hint type="destinationlabel">
+ <x>296</x>
+ <y>147</y>
+ </hint>
+ </hints>
+ </connection>
+ </connections>
</ui>
diff --git a/src/plugins/external_tool_support/src/hmmer/PhmmerSearchSettings.cpp b/src/plugins/external_tool_support/src/hmmer/PhmmerSearchSettings.cpp
new file mode 100644
index 0000000..08f2269
--- /dev/null
+++ b/src/plugins/external_tool_support/src/hmmer/PhmmerSearchSettings.cpp
@@ -0,0 +1,81 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <U2Core/U2SafePoints.h>
+
+#include "PhmmerSearchSettings.h"
+
+namespace U2 {
+
+const double PhmmerSearchSettings::OPTION_NOT_SET = -1.0;
+
+PhmmerSearchSettings::PhmmerSearchSettings()
+ : e(10.0),
+ t(OPTION_NOT_SET),
+ z(OPTION_NOT_SET),
+ domE(10.0),
+ domT(OPTION_NOT_SET),
+ domZ(OPTION_NOT_SET),
+ f1(0.02),
+ f2(1e-3),
+ f3(1e-5),
+ doMax(false),
+ noBiasFilter(false),
+ noNull2(false),
+ eml(200),
+ emn(200),
+ evl(200),
+ evn(200),
+ efl(100),
+ efn(200),
+ eft(0.04),
+ popen(0.02),
+ pextend(0.4),
+ seed(42),
+ annotationTable(NULL)
+{
+
+}
+
+bool PhmmerSearchSettings::validate() const {
+ CHECK(0 < e, false);
+ CHECK(0 < t || OPTION_NOT_SET == t, false);
+ CHECK(0 < z || OPTION_NOT_SET == z, false);
+ CHECK(0 < domE, false);
+ CHECK(0 < domT || OPTION_NOT_SET == domT, false);
+ CHECK(0 < domZ || OPTION_NOT_SET == domZ, false);
+ CHECK(0 < eml, false);
+ CHECK(0 < emn, false);
+ CHECK(0 < evl, false);
+ CHECK(0 < evn, false);
+ CHECK(0 < efl, false);
+ CHECK(0 < efn, false);
+ CHECK(0 < eft && eft < 1, false);
+ CHECK(0 <= popen && popen < 0.5, false);
+ CHECK(0 <= pextend && pextend < 1, false);
+ CHECK(0 <= seed, false);
+ CHECK(!querySequenceUrl.isEmpty(), false);
+ CHECK(!targetSequenceUrl.isEmpty() || NULL != targetSequence, false);
+
+ return true;
+}
+
+} // namespace U2
diff --git a/src/plugins/external_tool_support/src/hmmer/PhmmerSearchSettings.h b/src/plugins/external_tool_support/src/hmmer/PhmmerSearchSettings.h
new file mode 100644
index 0000000..8961a94
--- /dev/null
+++ b/src/plugins/external_tool_support/src/hmmer/PhmmerSearchSettings.h
@@ -0,0 +1,78 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef _U2_PHMMER_SEARCH_SETTINGS_H_
+#define _U2_PHMMER_SEARCH_SETTINGS_H_
+
+#include <U2Core/AnnotationCreationPattern.h>
+#include <U2Core/AnnotationTableObject.h>
+#include <U2Core/DNASequenceObject.h>
+
+namespace U2 {
+
+class PhmmerSearchSettings {
+public:
+ PhmmerSearchSettings();
+
+ bool validate() const;
+
+ double e; // -E: report sequences <= this e-value treshold in output
+ double t; // -T: report sequences >= this score treshold in output
+ double z; // -Z: set # of camparisons done, for e-value calculation
+ double domE; // --domE: report domains <= this e-value treshold in output
+ double domT; // --domT: report domains >= this score cutoff in output
+ double domZ; // --domZ: set number of significant seqs, for domain e-value calibration
+
+ double f1; // --F1: Stage 1 (MSV) threshold: promote hits w/ P <= F1
+ double f2; // --F2: Stage 2 (Vit) threshold: promote hits w/ P <= F2
+ double f3; // --F3: Stage 3 (Fwd) threshold: promote hits w/ P <= F3
+
+ bool doMax; // --max: Turn all heuristic filters off ( less speed more power )
+ bool noBiasFilter; // --nobias: turn off composition bias filter
+ bool noNull2; // --nonull2: turn off biased composition score corrections
+
+ int eml; // --EmL. length of sequences for MSV Gumbel mu fit
+ int emn; // --EmN. number of sequences for MSV Gumbel mu fit
+ int evl; // --EvL. length of sequences for Viterbi Gumbel mu fit
+ int evn; // --EvN. number of sequences for Viterbi Gumbel mu fit
+ int efl; // --EfL. length of sequences for forward exp tail mu fit
+ int efn; // --Efn. number of sequences for forward exp tail mu fit
+ double eft; // --Eft. tail mass for forward exponential tail mu fit
+
+ double popen; // --popen: gap open probability
+ double pextend; // --pextend: gap extend probability
+
+ int seed; // --seed : set RNG seed ( if 0: one-time arbitrary seed )
+
+ QString workingDir;
+ QString querySequenceUrl;
+ QString targetSequenceUrl;
+ QPointer<U2SequenceObject> targetSequence;
+
+ QPointer<AnnotationTableObject> annotationTable;
+ AnnotationCreationPattern pattern;
+
+ static const double OPTION_NOT_SET;
+};
+
+} // namespace U2
+
+#endif // _U2_PHMMER_SEARCH_SETTINGS_H_
diff --git a/src/plugins/external_tool_support/src/hmmer/PhmmerSearchTask.cpp b/src/plugins/external_tool_support/src/hmmer/PhmmerSearchTask.cpp
new file mode 100644
index 0000000..790e485
--- /dev/null
+++ b/src/plugins/external_tool_support/src/hmmer/PhmmerSearchTask.cpp
@@ -0,0 +1,228 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <QCoreApplication>
+#include <QDir>
+
+#include <U2Core/AnnotationTableObject.h>
+#include <U2Core/AppContext.h>
+#include <U2Core/AppResources.h>
+#include <U2Core/AppSettings.h>
+#include <U2Core/BaseDocumentFormats.h>
+#include <U2Core/Counter.h>
+#include <U2Core/CreateAnnotationTask.h>
+#include <U2Core/L10n.h>
+#include <U2Core/U2OpStatusUtils.h>
+#include <U2Core/U2SafePoints.h>
+#include <U2Core/UserApplicationsSettings.h>
+
+#include "HmmerParseSearchResultsTask.h"
+#include "HmmerSupport.h"
+#include "PhmmerSearchTask.h"
+#include "utils/ExportTasks.h"
+
+namespace U2 {
+
+const QString PhmmerSearchTask::INPUT_SEQUENCE_FILENAME = "input_sequence.fa";
+const QString PhmmerSearchTask::PER_DOMAIN_HITS_FILENAME = "per_domain_hits.txt";
+
+PhmmerSearchTask::PhmmerSearchTask(const PhmmerSearchSettings &settings)
+ : ExternalToolSupportTask(tr("Search with phmmer"), TaskFlags_NR_FOSE_COSC | TaskFlag_ReportingIsEnabled | TaskFlag_ReportingIsSupported),
+ settings(settings),
+ phmmerTask(NULL),
+ parseTask(NULL),
+ removeWorkingDir(false)
+{
+ GCOUNTER(cvar, tvar, "HMMER Search");
+ SAFE_POINT_EXT(settings.validate(), setError("Settings are invalid"), );
+}
+
+QList<SharedAnnotationData> PhmmerSearchTask::getAnnotations() const {
+ CHECK(NULL != parseTask, QList<SharedAnnotationData>());
+ return parseTask->getAnnotations();
+}
+
+void PhmmerSearchTask::prepare() {
+ prepareWorkingDir();
+
+ if (settings.targetSequenceUrl.isEmpty()) {
+ SAFE_POINT_EXT(NULL != settings.targetSequence, setError(L10N::nullPointerError("sequence object")), );
+ prepareSequenceSaveTask();
+ addSubTask(saveSequenceTask);
+ } else {
+ preparePhmmerTask();
+ addSubTask(phmmerTask);
+ }
+}
+
+QList<Task *> PhmmerSearchTask::onSubTaskFinished(Task *subTask) {
+ QList<Task *> result;
+ CHECK_OP(stateInfo, result);
+
+ if (subTask == saveSequenceTask) {
+ preparePhmmerTask();
+ result << phmmerTask;
+ } else if (subTask == phmmerTask) {
+ parseTask = new HmmerParseSearchResultsTask(settings.workingDir + "/" + PER_DOMAIN_HITS_FILENAME, settings.pattern);
+ parseTask->setSubtaskProgressWeight(5);
+ result << parseTask;
+ } else if (subTask == parseTask) {
+ removeTempDir();
+ Task *createAnnotationsTask = new CreateAnnotationsTask(settings.annotationTable, parseTask->getAnnotations(), settings.pattern.groupName);
+ createAnnotationsTask->setSubtaskProgressWeight(5);
+ result << createAnnotationsTask;
+ }
+
+ return result;
+}
+
+QString PhmmerSearchTask::generateReport() const {
+ QString res;
+ res += "<table>";
+ res += "<tr><td><b>" + tr("Query sequence: ") + "</b></td><td>" + QFileInfo(settings.querySequenceUrl).absoluteFilePath() + "</td></tr>";
+
+ if (hasError() || isCanceled()) {
+ res += "<tr><td><b>" + tr("Task was not finished") + "</b></td><td></td></tr>";
+ res += "</table>";
+ return res;
+ }
+
+ if (NULL != settings.annotationTable && NULL != settings.annotationTable->getDocument()) {
+ res += "<tr><td><b>" + tr("Result annotation table: ") + "</b></td><td>" + settings.annotationTable->getDocument()->getName() + "</td></tr>";
+ }
+ res += "<tr><td><b>" + tr("Result annotation group: ") + "</b></td><td>" + settings.pattern.groupName + "</td></tr>";
+ res += "<tr><td><b>" + tr("Result annotation name: ") + "</b></td><td>" + settings.pattern.annotationName + "</td></tr>";
+
+ res += "<tr><td><b>" + tr("Results count: ") + "</b></td><td>" + QString::number(getAnnotations().size()) + "</td></tr>";
+ res += "</table>";
+ return res;
+}
+
+namespace {
+
+const QString PHMMER_TEMP_DIR = "phmmer";
+
+QString getTaskTempDirName(const QString &prefix, Task *task) {
+ return prefix + QString::number(task->getTaskId()) + "_" +
+ QDate::currentDate().toString("dd.MM.yyyy") + "_" +
+ QTime::currentTime().toString("hh.mm.ss.zzz") + "_" +
+ QString::number(QCoreApplication::applicationPid());
+}
+
+}
+
+void PhmmerSearchTask::prepareWorkingDir() {
+ if (settings.workingDir.isEmpty()) {
+ QString tempDirName = getTaskTempDirName("phmmer_search_", this);
+ settings.workingDir = AppContext::getAppSettings()->getUserAppsSettings()->getCurrentProcessTemporaryDirPath(PHMMER_TEMP_DIR) + "/" + tempDirName;
+ removeWorkingDir = true;
+ }
+
+ QDir tempDir(settings.workingDir);
+ if (tempDir.exists()) {
+ ExternalToolSupportUtils::removeTmpDir(settings.workingDir, stateInfo);
+ CHECK_OP(stateInfo, );
+ }
+
+ if (!tempDir.mkpath(settings.workingDir)) {
+ setError(tr("Cannot create a directory for temporary files."));
+ return;
+ }
+}
+
+void PhmmerSearchTask::removeTempDir() const {
+ CHECK(removeWorkingDir, );
+ U2OpStatusImpl os;
+ ExternalToolSupportUtils::removeTmpDir(settings.workingDir, os);
+}
+
+QStringList PhmmerSearchTask::getArguments() const {
+ QStringList arguments;
+
+ arguments << "-E" << QString::number(settings.e);
+ if (PhmmerSearchSettings::OPTION_NOT_SET != settings.t) {
+ arguments << "-T" << QString::number(settings.t);
+ }
+
+ if (PhmmerSearchSettings::OPTION_NOT_SET != settings.z) {
+ arguments << "-Z" << QString::number(settings.z);
+ }
+
+ arguments << "--domE" << QString::number(settings.domE);
+ if (PhmmerSearchSettings::OPTION_NOT_SET != settings.domT) {
+ arguments << "--domT" << QString::number(settings.domT);
+ }
+
+ if (PhmmerSearchSettings::OPTION_NOT_SET != settings.domZ) {
+ arguments << "--domZ" << QString::number(settings.domZ);
+ }
+
+ arguments << "--F1" << QString::number(settings.f1);
+ arguments << "--F2" << QString::number(settings.f2);
+ arguments << "--F3" << QString::number(settings.f3);
+
+ if (settings.doMax) {
+ arguments << "--max";
+ }
+
+ if (settings.noBiasFilter) {
+ arguments << "--nobias";
+ }
+
+ if (settings.noNull2) {
+ arguments << "--nonull2";
+ }
+
+ arguments << "--EmL" << QString::number(settings.eml);
+ arguments << "--EmN" << QString::number(settings.emn);
+ arguments << "--EvL" << QString::number(settings.evl);
+ arguments << "--EvN" << QString::number(settings.evn);
+ arguments << "--EfL" << QString::number(settings.efl);
+ arguments << "--EfN" << QString::number(settings.efn);
+ arguments << "--Eft" << QString::number(settings.eft);
+
+ arguments << "--popen" << QString::number(settings.popen);
+ arguments << "--pextend" << QString::number(settings.pextend);
+
+ arguments << "--seed" << QString::number(settings.seed);
+ arguments << "--cpu" << QString::number(AppContext::getAppSettings()->getAppResourcePool()->getIdealThreadCount());
+
+ arguments << "--noali";
+ arguments << "--domtblout" << settings.workingDir + "/" + PER_DOMAIN_HITS_FILENAME;
+
+ arguments << settings.querySequenceUrl;
+ arguments << settings.targetSequenceUrl;
+
+ return arguments;
+}
+
+void PhmmerSearchTask::prepareSequenceSaveTask() {
+ settings.targetSequenceUrl = settings.workingDir + "/" + INPUT_SEQUENCE_FILENAME;
+ saveSequenceTask = new SaveSequenceTask(settings.targetSequence, settings.targetSequenceUrl, BaseDocumentFormats::FASTA);
+ saveSequenceTask->setSubtaskProgressWeight(5);
+}
+
+void PhmmerSearchTask::preparePhmmerTask() {
+ phmmerTask = new ExternalToolRunTask(HmmerSupport::PHMMER_TOOL, getArguments(), new ExternalToolLogParser);
+ phmmerTask->setSubtaskProgressWeight(85);
+}
+
+} // namespace U2
diff --git a/src/plugins/external_tool_support/src/hmmer/PhmmerSearchTask.h b/src/plugins/external_tool_support/src/hmmer/PhmmerSearchTask.h
new file mode 100644
index 0000000..29e969e
--- /dev/null
+++ b/src/plugins/external_tool_support/src/hmmer/PhmmerSearchTask.h
@@ -0,0 +1,67 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef _U2_PHMMER_SEARCH_TASK_H_
+#define _U2_PHMMER_SEARCH_TASK_H_
+
+#include <U2Core/AnnotationCreationPattern.h>
+#include <U2Core/ExternalToolRunTask.h>
+
+#include "PhmmerSearchSettings.h"
+
+namespace U2 {
+
+class AnnotationTableObject;
+class HmmerParseSearchResultsTask;
+class SaveSequenceTask;
+
+class PhmmerSearchTask : public ExternalToolSupportTask {
+public:
+ PhmmerSearchTask(const PhmmerSearchSettings &settings);
+
+ QList<SharedAnnotationData> getAnnotations() const;
+
+private:
+ void prepare();
+ QList<Task *> onSubTaskFinished(Task *subTask);
+ QString generateReport() const;
+
+ void prepareWorkingDir();
+ void removeTempDir() const;
+ QStringList getArguments() const;
+
+ void prepareSequenceSaveTask();
+ void preparePhmmerTask();
+
+ PhmmerSearchSettings settings;
+
+ SaveSequenceTask *saveSequenceTask;
+ ExternalToolRunTask *phmmerTask;
+ HmmerParseSearchResultsTask *parseTask;
+ bool removeWorkingDir;
+
+ static const QString INPUT_SEQUENCE_FILENAME;
+ static const QString PER_DOMAIN_HITS_FILENAME;
+};
+
+} // namespace U2
+
+#endif // _U2_PHMMER_SEARCH_TASK_H_
diff --git a/src/plugins/external_tool_support/src/java/JavaSupport.cpp b/src/plugins/external_tool_support/src/java/JavaSupport.cpp
index 8d89b79..bb070cc 100644
--- a/src/plugins/external_tool_support/src/java/JavaSupport.cpp
+++ b/src/plugins/external_tool_support/src/java/JavaSupport.cpp
@@ -27,7 +27,7 @@
namespace U2 {
JavaSupport::JavaSupport(const QString &name, const QString &path)
-: ExternalTool(name, path)
+: ExternalTool(name, path), architecture(x32)
{
if (AppContext::getMainWindow()) {
icon = QIcon(":external_tool_support/images/cmdline.png");
@@ -54,6 +54,16 @@ JavaSupport::JavaSupport(const QString &name, const QString &path)
connect(this, SIGNAL(si_toolValidationStatusChanged(bool)), SLOT(sl_toolValidationStatusChanged(bool)));
}
+void JavaSupport::getAdditionalParameters(const QString& output) {
+ if (output.contains("64-Bit")) {
+ architecture = x64;
+ }
+}
+
+U2::JavaSupport::Architecture JavaSupport::getArchitecture() const {
+ return architecture;
+}
+
void JavaSupport::sl_toolValidationStatusChanged(bool isValid) {
Q_UNUSED(isValid);
ScriptingTool::onPathChanged(this, QStringList() << "-jar");
diff --git a/src/plugins/external_tool_support/src/java/JavaSupport.h b/src/plugins/external_tool_support/src/java/JavaSupport.h
index bf9a7cb..be191d6 100644
--- a/src/plugins/external_tool_support/src/java/JavaSupport.h
+++ b/src/plugins/external_tool_support/src/java/JavaSupport.h
@@ -31,10 +31,19 @@ namespace U2 {
class JavaSupport : public ExternalTool {
Q_OBJECT
public:
+ enum Architecture {
+ x32,
+ x64
+ };
JavaSupport(const QString &name, const QString &path = "");
+ void getAdditionalParameters(const QString& output);
+ Architecture getArchitecture() const;
private slots:
void sl_toolValidationStatusChanged(bool isValid);
+
+private:
+ Architecture architecture;
};
} // U2
diff --git a/src/plugins/external_tool_support/src/mafft/MAFFTSupportRunDialog.cpp b/src/plugins/external_tool_support/src/mafft/MAFFTSupportRunDialog.cpp
index 14ea05b..3126558 100644
--- a/src/plugins/external_tool_support/src/mafft/MAFFTSupportRunDialog.cpp
+++ b/src/plugins/external_tool_support/src/mafft/MAFFTSupportRunDialog.cpp
@@ -42,7 +42,7 @@ MAFFTSupportRunDialog::MAFFTSupportRunDialog(MAFFTSupportTaskSettings& _settings
QDialog(_parent), settings(_settings)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470730");
+ new HelpButton(this, buttonBox, "18220590");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Align"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
@@ -71,7 +71,7 @@ MAFFTWithExtFileSpecifySupportRunDialog::MAFFTWithExtFileSpecifySupportRunDialog
saveController(NULL)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470730");
+ new HelpButton(this, buttonBox, "18220590");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Align"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/external_tool_support/src/phyml/PhyMLDialog.ui b/src/plugins/external_tool_support/src/phyml/PhyMLDialog.ui
index 0aee672..dcf7be9 100644
--- a/src/plugins/external_tool_support/src/phyml/PhyMLDialog.ui
+++ b/src/plugins/external_tool_support/src/phyml/PhyMLDialog.ui
@@ -30,7 +30,7 @@
</property>
<layout class="QVBoxLayout" name="verticalLayout">
<property name="spacing">
- <number>-1</number>
+ <number>6</number>
</property>
<property name="sizeConstraint">
<enum>QLayout::SetMinAndMaxSize</enum>
@@ -310,25 +310,13 @@ Uncheck to get the maximum likelihood estimate.</string>
<enum>QFormLayout::ExpandingFieldsGrow</enum>
</property>
<item row="0" column="0">
- <widget class="QCheckBox" name="fastMethodCheckbox">
- <property name="sizePolicy">
- <sizepolicy hsizetype="Minimum" vsizetype="MinimumExpanding">
- <horstretch>0</horstretch>
- <verstretch>0</verstretch>
- </sizepolicy>
- </property>
+ <widget class="QRadioButton" name="fastMethodCheckbox">
<property name="text">
<string>Use fast likelihood-based method</string>
</property>
<property name="checked">
<bool>true</bool>
</property>
- <property name="autoRepeat">
- <bool>false</bool>
- </property>
- <property name="autoExclusive">
- <bool>true</bool>
- </property>
</widget>
</item>
<item row="0" column="1">
@@ -342,22 +330,10 @@ Uncheck to get the maximum likelihood estimate.</string>
</widget>
</item>
<item row="1" column="0">
- <widget class="QCheckBox" name="bootstrapCheckBox">
- <property name="sizePolicy">
- <sizepolicy hsizetype="Minimum" vsizetype="Expanding">
- <horstretch>0</horstretch>
- <verstretch>0</verstretch>
- </sizepolicy>
- </property>
+ <widget class="QRadioButton" name="bootstrapCheckBox">
<property name="text">
<string>Perform bootstrap</string>
</property>
- <property name="checked">
- <bool>false</bool>
- </property>
- <property name="autoExclusive">
- <bool>true</bool>
- </property>
</widget>
</item>
<item row="1" column="1">
@@ -368,6 +344,9 @@ Uncheck to get the maximum likelihood estimate.</string>
<property name="toolTip">
<string>It is the number of bootstrap replicates.</string>
</property>
+ <property name="wrapping">
+ <bool>false</bool>
+ </property>
<property name="maximum">
<number>990000</number>
</property>
@@ -583,5 +562,22 @@ Uncheck to get the maximum likelihood estimate.</string>
</customwidget>
</customwidgets>
<resources/>
- <connections/>
+ <connections>
+ <connection>
+ <sender>bootstrapCheckBox</sender>
+ <signal>toggled(bool)</signal>
+ <receiver>bootstrapSpinBox</receiver>
+ <slot>setEnabled(bool)</slot>
+ <hints>
+ <hint type="sourcelabel">
+ <x>88</x>
+ <y>85</y>
+ </hint>
+ <hint type="destinationlabel">
+ <x>311</x>
+ <y>88</y>
+ </hint>
+ </hints>
+ </connection>
+ </connections>
</ui>
diff --git a/src/plugins/external_tool_support/src/snpeff/SnpEffSupport.cpp b/src/plugins/external_tool_support/src/snpeff/SnpEffSupport.cpp
index 533af97..d2e347a 100644
--- a/src/plugins/external_tool_support/src/snpeff/SnpEffSupport.cpp
+++ b/src/plugins/external_tool_support/src/snpeff/SnpEffSupport.cpp
@@ -62,7 +62,15 @@ const QStringList SnpEffSupport::getToolRunnerAdditionalOptions() {
CHECK(s != NULL, result);
//java VM can't allocate whole free memory, Xmx size should be lesser than free memory
int memSize = s->getMaxMemorySizeInMB();
- result << "-Xmx" + QString::number( memSize > 150 ? memSize - 150 : memSize) + "M";
+#if (defined(Q_OS_WIN) || defined(Q_OS_LINUX))
+ ExternalToolRegistry* etRegistry = AppContext::getExternalToolRegistry();
+ JavaSupport* java = qobject_cast<JavaSupport*>(etRegistry->getByName(ET_JAVA));
+ CHECK(java != NULL, result);
+ if (java->getArchitecture() == JavaSupport::x32) {
+ memSize = memSize > 1212 ? 1212 : memSize;
+ }
+#endif // windows or linux
+ result << "-Xmx" + QString::number(memSize > 150 ? memSize - 150 : memSize) + "M";
return result;
}
diff --git a/src/plugins/external_tool_support/src/snpeff/SnpEffTask.cpp b/src/plugins/external_tool_support/src/snpeff/SnpEffTask.cpp
index 3d22f2f..6ebc1ba 100644
--- a/src/plugins/external_tool_support/src/snpeff/SnpEffTask.cpp
+++ b/src/plugins/external_tool_support/src/snpeff/SnpEffTask.cpp
@@ -181,6 +181,13 @@ QString SnpEffTask::getDataPath() const{
CHECK(NULL != AppContext::getAppSettings()->getUserAppsSettings(), QString());
CHECK(NULL != AppContext::getExternalToolRegistry(), QString());
CHECK(NULL != AppContext::getExternalToolRegistry()->getByName(ET_SNPEFF), QString());
+
+ // The next part is for VEME conferention
+ // It is done instead of UGENE-5318 resolving
+ // Remove it after 1.24 version release
+ if (settings.genome == "NC_002549") {
+ return QFileInfo(AppContext::getExternalToolRegistry()->getByName(ET_SNPEFF)->getPath()).dir().absolutePath() + "/data";
+ }
return AppContext::getAppSettings()->getUserAppsSettings()->getDownloadDirPath() + "/" + "snpeff_data_" + AppContext::getExternalToolRegistry()->getByName(ET_SNPEFF)->getVersion();
}
diff --git a/src/plugins/external_tool_support/src/snpeff/SnpEffWorker.cpp b/src/plugins/external_tool_support/src/snpeff/SnpEffWorker.cpp
index 0bded76..4111906 100644
--- a/src/plugins/external_tool_support/src/snpeff/SnpEffWorker.cpp
+++ b/src/plugins/external_tool_support/src/snpeff/SnpEffWorker.cpp
@@ -21,36 +21,38 @@
#include <U2Core/AppContext.h>
#include <U2Core/BaseDocumentFormats.h>
+#include <U2Core/DataPathRegistry.h>
#include <U2Core/DocumentImport.h>
#include <U2Core/DocumentModel.h>
#include <U2Core/DocumentUtils.h>
#include <U2Core/FailTask.h>
+#include <U2Core/FileAndDirectoryUtils.h>
#include <U2Core/GObject.h>
#include <U2Core/GObjectTypes.h>
#include <U2Core/GUrlUtils.h>
#include <U2Core/IOAdapter.h>
#include <U2Core/IOAdapterUtils.h>
+#include <U2Core/SnpeffDictionary.h>
#include <U2Core/TaskSignalMapper.h>
-#include <U2Core/DataPathRegistry.h>
#include <U2Core/U2OpStatusUtils.h>
+
#include <U2Designer/DelegateEditors.h>
+
#include <U2Formats/BAMUtils.h>
-#include <U2Core/FileAndDirectoryUtils.h>
+
#include <U2Lang/ActorPrototypeRegistry.h>
+#include <U2Lang/BaseActorCategories.h>
#include <U2Lang/BaseAttributes.h>
-#include <U2Lang/BaseTypes.h>
#include <U2Lang/BaseSlots.h>
-#include <U2Lang/BaseActorCategories.h>
+#include <U2Lang/BaseTypes.h>
#include <U2Lang/IntegralBusModel.h>
#include <U2Lang/WorkflowEnv.h>
#include <U2Lang/WorkflowMonitor.h>
-#include "java/JavaSupport.h"
-
#include "SnpEffSupport.h"
#include "SnpEffTask.h"
-
#include "SnpEffWorker.h"
+#include "java/JavaSupport.h"
namespace U2 {
namespace LocalWorkflow {
@@ -209,6 +211,7 @@ void SnpEffFactory::init() {
genomeMap["Homo sapiens (hg38)"] = "hg38";
genomeMap["Ecoli K12 MG1655 (NC_000913)"] = "NC_000913";
genomeMap["C. elegans (WS241)"] = "WS241";
+ genomeMap["Ebola Zaire Virus (NC_002549)"] = "NC_002549";
delegates[SnpEffWorker::GENOME] = new ComboBoxEditableDelegate(genomeMap);
}
@@ -358,46 +361,7 @@ void SnpEffLogProcessor::addNotification(const QString &key, int count) {
}
QStrStrMap SnpEffLogProcessor::initWellKnownMessages() {
- QStrStrMap result;
-
- result["ERROR_CHROMOSOME_NOT_FOUND"] = "Chromosome does not exists in reference genome database. "
- "Typically indicates a mismatch between the chromosome names "
- "in the input file and the chromosome names used in the reference genome";
-
- result["ERROR_OUT_OF_CHROMOSOME_RANGE"] = "The variant’s genomic coordinate "
- "is greater than chromosome's length";
-
- result["E1"] = result["ERROR_CHROMOSOME_NOT_FOUND"];
- result["E2"] = result["ERROR_OUT_OF_CHROMOSOME_RANGE"];
-
- result["WARNING_REF_DOES_NOT_MATCH_GENOME"] = "This means that the ‘REF’ field "
- "in the input VCF file does not match the reference genome. This warning may indicate "
- "a conflict between input data and data from reference genome "
- "(for instance is the input VCF was aligned to a different reference genome)";
-
- result["WARNING_SEQUENCE_NOT_AVAILABLE"] = "Reference sequence is not available, "
- "thus no inference could be performed";
-
- result["WARNING_TRANSCRIPT_INCOMPLETE"] = "A protein coding transcript having "
- "a non-multiple of 3 length. It indicates that the reference "
- "genome has missing information about this particular transcript";
-
- result["WARNING_TRANSCRIPT_MULTIPLE_STOP_CODONS"] = "A protein coding transcript has "
- "two or more STOP codons in the middle of the coding sequence (CDS). "
- "This should not happen and it usually means the reference genome "
- "may have an error in this transcript";
-
- result["WARNING_TRANSCRIPT_NO_START_CODON"] = "A protein coding transcript does not have "
- "a proper START codon. It is rare that a real transcript does not have a START codon, "
- "so this probably indicates an error or missing information in the reference genome";
-
- result["W1"] = result["WARNING_REF_DOES_NOT_MATCH_GENOME"];
- result["W2"] = result["WARNING_SEQUENCE_NOT_AVAILABLE"];
- result["W3"] = result["WARNING_TRANSCRIPT_INCOMPLETE"];
- result["W4"] = result["WARNING_TRANSCRIPT_MULTIPLE_STOP_CODONS"];
- result["W5"] = result["WARNING_TRANSCRIPT_NO_START_CODON"];
-
- return result;
+ return SnpeffDictionary::messageDescriptions;
}
QMap<QString, QRegExp> SnpEffLogProcessor::initWellKnownCatchers() {
diff --git a/src/plugins/external_tool_support/src/tcoffee/TCoffeeSupportRunDialog.cpp b/src/plugins/external_tool_support/src/tcoffee/TCoffeeSupportRunDialog.cpp
index 817ae66..87511dc 100644
--- a/src/plugins/external_tool_support/src/tcoffee/TCoffeeSupportRunDialog.cpp
+++ b/src/plugins/external_tool_support/src/tcoffee/TCoffeeSupportRunDialog.cpp
@@ -42,7 +42,7 @@ TCoffeeSupportRunDialog::TCoffeeSupportRunDialog(TCoffeeSupportTaskSettings& _se
QDialog(_parent), settings(_settings)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470731");
+ new HelpButton(this, buttonBox, "18220591");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Align"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
@@ -71,7 +71,7 @@ TCoffeeWithExtFileSpecifySupportRunDialog::TCoffeeWithExtFileSpecifySupportRunDi
saveController(NULL)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470731");
+ new HelpButton(this, buttonBox, "18220591");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Align"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/external_tool_support/src/utils/BaseShortReadsAlignerWorker.cpp b/src/plugins/external_tool_support/src/utils/BaseShortReadsAlignerWorker.cpp
index c87ba1d..f5c540c 100644
--- a/src/plugins/external_tool_support/src/utils/BaseShortReadsAlignerWorker.cpp
+++ b/src/plugins/external_tool_support/src/utils/BaseShortReadsAlignerWorker.cpp
@@ -48,6 +48,7 @@
#include <U2Lang/IntegralBusModel.h>
#include <U2Lang/WorkflowEnv.h>
#include <U2Lang/WorkflowMonitor.h>
+#include <U2View/DnaAssemblyUtils.h>
#include "BaseShortReadsAlignerWorker.h"
@@ -149,7 +150,8 @@ Task *BaseShortReadsAlignerWorker::tick() {
settings.shortReadSets << toUrls(readsFetcher.takeFullDataset(), READS_URL_SLOT_ID, ShortReadSet::SingleEndReads, ShortReadSet::UpstreamMate);
}
- DnaAssemblyToReferenceTask* t = getTask(settings);
+ DnaAssemblyTaskWithConversions *t = new DnaAssemblyTaskWithConversions(settings);
+ t->addListeners(createLogListeners(2));
connect(t, SIGNAL(si_stateChanged()), SLOT(sl_taskFinished()));
return t;
}
@@ -198,7 +200,7 @@ bool BaseShortReadsAlignerWorker::isReady() const {
}
void BaseShortReadsAlignerWorker::sl_taskFinished() {
- DnaAssemblyToReferenceTask *t = dynamic_cast<DnaAssemblyToReferenceTask*>(sender());
+ DnaAssemblyTaskWithConversions *t = qobject_cast<DnaAssemblyTaskWithConversions*>(sender());
if (!t->isFinished() || t->hasError() || t->isCanceled()) {
return;
}
diff --git a/src/plugins/external_tool_support/src/utils/BaseShortReadsAlignerWorker.h b/src/plugins/external_tool_support/src/utils/BaseShortReadsAlignerWorker.h
index 0d9e693..9eec43e 100644
--- a/src/plugins/external_tool_support/src/utils/BaseShortReadsAlignerWorker.h
+++ b/src/plugins/external_tool_support/src/utils/BaseShortReadsAlignerWorker.h
@@ -53,7 +53,6 @@ protected:
virtual void setGenomeIndex(DnaAssemblyToRefTaskSettings& settings) = 0;
virtual QString getDefaultFileName() const = 0;
virtual QString getBaseSubdir() const = 0;
- virtual DnaAssemblyToReferenceTask* getTask(const DnaAssemblyToRefTaskSettings &settings) const = 0;
QList<ShortReadSet> toUrls(const QList<Message> &messages, const QString &urlSlotId, ShortReadSet::LibraryType libType, ShortReadSet::MateOrder order) const;
bool isReadyToRun() const;
bool dataFinished() const;
diff --git a/src/plugins/external_tool_support/src/utils/BlastRunCommonDialog.cpp b/src/plugins/external_tool_support/src/utils/BlastRunCommonDialog.cpp
index e04a55a..d933325 100644
--- a/src/plugins/external_tool_support/src/utils/BlastRunCommonDialog.cpp
+++ b/src/plugins/external_tool_support/src/utils/BlastRunCommonDialog.cpp
@@ -60,7 +60,7 @@ BlastRunCommonDialog::BlastRunCommonDialog(QWidget *parent, BlastType blastType,
: QDialog(parent), ca_c(NULL), useCompValues(useCompValues), compValues(compValues)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470727");
+ new HelpButton(this, buttonBox, "18220587");
buttonBox->button(QDialogButtonBox::Yes)->setText(tr("Restore to default"));
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Search"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/external_tool_support/src/utils/ExportTasks.cpp b/src/plugins/external_tool_support/src/utils/ExportTasks.cpp
index 9f336a1..60e2298 100644
--- a/src/plugins/external_tool_support/src/utils/ExportTasks.cpp
+++ b/src/plugins/external_tool_support/src/utils/ExportTasks.cpp
@@ -19,29 +19,33 @@
* MA 02110-1301, USA.
*/
-#include <QtCore/QFileInfo>
+#include <QFileInfo>
-#include <U2Core/DNAAlphabet.h>
-#include <U2Core/DocumentModel.h>
-#include <U2Core/IOAdapter.h>
-#include <U2Core/IOAdapterUtils.h>
+#include <U2Core/AddDocumentTask.h>
#include <U2Core/AppContext.h>
+#include <U2Core/CloneObjectTask.h>
+#include <U2Core/Counter.h>
+#include <U2Core/DNAAlphabet.h>
+#include <U2Core/DNAChromatogramObject.h>
+#include <U2Core/DNASequenceObject.h>
#include <U2Core/DNATranslation.h>
#include <U2Core/DNATranslationImpl.h>
-#include <U2Formats/SCFFormat.h>
-#include <U2Core/Counter.h>
+#include <U2Core/DocumentModel.h>
#include <U2Core/GHints.h>
-#include <U2Core/DNAChromatogramObject.h>
#include <U2Core/GObjectRelationRoles.h>
-#include <U2Core/AddDocumentTask.h>
+#include <U2Core/IOAdapter.h>
+#include <U2Core/IOAdapterUtils.h>
+#include <U2Core/MAlignmentImporter.h>
+#include <U2Core/MAlignmentObject.h>
#include <U2Core/MSAUtils.h>
+#include <U2Core/SaveDocumentTask.h>
#include <U2Core/TextUtils.h>
+#include <U2Core/U2DbiRegistry.h>
+#include <U2Core/U2ObjectDbi.h>
#include <U2Core/U2SafePoints.h>
#include <U2Core/U2SequenceUtils.h>
-#include <U2Core/DNASequenceObject.h>
-#include <U2Core/MAlignmentImporter.h>
-#include <U2Core/MAlignmentObject.h>
+#include <U2Formats/SCFFormat.h>
#include "ExportTasks.h"
@@ -83,6 +87,18 @@ void SaveAlignmentTask::run() {
f->storeDocument(doc.data(), stateInfo);
}
+Document * SaveAlignmentTask::getDocument() const {
+ return doc.data();
+}
+
+const MAlignment & SaveAlignmentTask::getMAlignment() const {
+ return ma;
+}
+
+const QString &SaveAlignmentTask::getUrl() const {
+ return fileName;
+}
+
//////////////////////////////////////////////////////////////////////////
// export alignment 2 sequence format
@@ -118,5 +134,51 @@ void SaveMSA2SequencesTask::run() {
f->storeDocument(doc.data(), stateInfo);
}
-}//namespace
+SaveSequenceTask::SaveSequenceTask(const QPointer<U2SequenceObject> &sequence, const QString &url, const DocumentFormatId &formatId):
+ Task(tr("Save sequence"), TaskFlags_NR_FOSE_COSC),
+ sequence(sequence),
+ url(url),
+ formatId(formatId),
+ locker(NULL),
+ cloneTask(NULL)
+{
+ SAFE_POINT_EXT(NULL != sequence, setError("Sequence is NULL"), );
+ SAFE_POINT_EXT(!url.isEmpty(), setError("URL is empty"), );
+}
+
+void SaveSequenceTask::prepare() {
+ locker = new StateLocker(sequence);
+ cloneTask = new CloneObjectTask(sequence, AppContext::getDbiRegistry()->getSessionTmpDbiRef(stateInfo), U2ObjectDbi::ROOT_FOLDER);
+ CHECK_OP(stateInfo, );
+ cloneTask->setSubtaskProgressWeight(50);
+ addSubTask(cloneTask);
+}
+
+QList<Task *> SaveSequenceTask::onSubTaskFinished(Task *subTask) {
+ QList<Task *> result;
+
+ if (subTask == cloneTask) {
+ delete locker;
+ locker = NULL;
+ }
+
+ CHECK_OP(stateInfo, result);
+
+ if (subTask == cloneTask) {
+ DocumentFormat *format = AppContext::getDocumentFormatRegistry()->getFormatById(formatId);
+ SAFE_POINT_EXT(NULL != format, setError(tr("'%' format is not registered").arg(formatId)), result);
+
+ Document *document = format->createNewLoadedDocument(IOAdapterUtils::get(BaseIOAdapters::LOCAL_FILE), url, stateInfo);
+ CHECK_OP(stateInfo, result);
+ document->setDocumentOwnsDbiResources(true);
+ document->addObject(cloneTask->takeResult());
+
+ SaveDocumentTask *saveTask = new SaveDocumentTask(document, NULL, GUrl(), SaveDocFlags(SaveDoc_Overwrite) | SaveDoc_DestroyAfter);
+ saveTask->setSubtaskProgressWeight(50);
+ result << saveTask;
+ }
+
+ return result;
+}
+} // namespace U2
diff --git a/src/plugins/external_tool_support/src/utils/ExportTasks.h b/src/plugins/external_tool_support/src/utils/ExportTasks.h
index 11c6912..f0de224 100644
--- a/src/plugins/external_tool_support/src/utils/ExportTasks.h
+++ b/src/plugins/external_tool_support/src/utils/ExportTasks.h
@@ -30,6 +30,8 @@
namespace U2 {
+class CloneObjectTask;
+
/** Save Alignment Task (to CLUSTAL, NEXUS, ...) */
class SaveAlignmentTask : public Task {
Q_OBJECT
@@ -38,9 +40,9 @@ public:
void run();
- virtual Document* getDocument() const {return doc.data();}
-
- MAlignment& getMAlignment() {return ma;}
+ virtual Document* getDocument() const;
+ const MAlignment & getMAlignment() const;
+ const QString & getUrl() const;
private:
MAlignment ma;
@@ -69,6 +71,23 @@ private:
QScopedPointer<Document> doc;
};
-}//namespace
+class SaveSequenceTask : public Task {
+ Q_OBJECT
+public:
+ SaveSequenceTask(const QPointer<U2SequenceObject> &sequence, const QString &url, const DocumentFormatId &formatId);
+
+private:
+ void prepare();
+ QList<Task *> onSubTaskFinished(Task *subTask);
+
+ QPointer<U2SequenceObject> sequence;
+ const QString url;
+ const DocumentFormatId formatId;
+
+ StateLocker *locker;
+ CloneObjectTask *cloneTask;
+};
+
+} // namespace U2
-#endif
+#endif // _U2_EXPORT_PLUGIN_TASKS_H_
diff --git a/src/plugins/external_tool_support/src/utils/ExternalToolValidateTask.cpp b/src/plugins/external_tool_support/src/utils/ExternalToolValidateTask.cpp
index ef41bc4..7dab121 100644
--- a/src/plugins/external_tool_support/src/utils/ExternalToolValidateTask.cpp
+++ b/src/plugins/external_tool_support/src/utils/ExternalToolValidateTask.cpp
@@ -59,7 +59,7 @@ ExternalToolJustValidateTask::~ExternalToolJustValidateTask() {
void ExternalToolJustValidateTask::run() {
ExternalToolRegistry* etRegistry = AppContext::getExternalToolRegistry();
SAFE_POINT(etRegistry, "An external tool registry is NULL", );
- ExternalTool* tool = etRegistry->getByName(toolName);
+ tool = etRegistry->getByName(toolName);
SAFE_POINT(tool, QString("External tool '%1' isn't found in the registry").arg(toolName), );
QFileInfo info(toolPath);
@@ -209,6 +209,7 @@ bool ExternalToolJustValidateTask::parseLog(const ExternalToolValidation& valida
if (errLog.contains(QRegExp(validation.expectedMsg))) {
isValid = true;
checkVersion(errLog);
+ tool->getAdditionalParameters(errLog);
} else {
isValid = false;
foreach (const QString& errStr, validation.possibleErrorsDescr.keys()) {
@@ -225,6 +226,7 @@ bool ExternalToolJustValidateTask::parseLog(const ExternalToolValidation& valida
if (log.contains(QRegExp(validation.expectedMsg))) {
isValid = true;
checkVersion(log);
+ tool->getAdditionalParameters(log);
} else {
isValid = false;
foreach (const QString& errStr, validation.possibleErrorsDescr.keys()) {
diff --git a/src/plugins/external_tool_support/src/utils/ExternalToolValidateTask.h b/src/plugins/external_tool_support/src/utils/ExternalToolValidateTask.h
index 35a46a1..ad8c46d 100644
--- a/src/plugins/external_tool_support/src/utils/ExternalToolValidateTask.h
+++ b/src/plugins/external_tool_support/src/utils/ExternalToolValidateTask.h
@@ -81,6 +81,8 @@ private:
QString lastOutLine;
QProcess* externalToolProcess;
+
+ ExternalTool* tool;
static const int CHECK_PERIOD_MS = 1000;
static const int TIMEOUT_MS = 30000;
};
diff --git a/src/plugins/external_tool_support/transl/english.ts b/src/plugins/external_tool_support/transl/english.ts
index cbf5bfb..f96a948 100644
--- a/src/plugins/external_tool_support/transl/english.ts
+++ b/src/plugins/external_tool_support/transl/english.ts
@@ -7246,8 +7246,8 @@ Default: 64.</translation>
</message>
<message>
<location filename="../src/cutadapt/CutadaptWorker.cpp" line="143"/>
- <source>FASTA File with any end adapters</source>
- <translation>FASTA File with any end adapters</translation>
+ <source>FASTA file with 5' and 3' adapters</source>
+ <translation>FASTA file with 5' and 3' adapters</translation>
</message>
<message>
<location filename="../src/cutadapt/CutadaptWorker.cpp" line="144"/>
diff --git a/src/plugins/external_tool_support/transl/russian.ts b/src/plugins/external_tool_support/transl/russian.ts
index b73a3c6..eebbe37 100644
--- a/src/plugins/external_tool_support/transl/russian.ts
+++ b/src/plugins/external_tool_support/transl/russian.ts
@@ -7258,8 +7258,8 @@ bowtie медленнее, когда указан режим --best.</translati
</message>
<message>
<location filename="../src/cutadapt/CutadaptWorker.cpp" line="143"/>
- <source>FASTA File with any end adapters</source>
- <translation>FASTA файл с любыми концевыми адаптерами</translation>
+ <source>FASTA file with 5' and 3' adapters</source>
+ <translation>FASTA файл с 5' и 3' концевыми адаптерами</translation>
</message>
<message>
<location filename="../src/cutadapt/CutadaptWorker.cpp" line="144"/>
diff --git a/src/plugins/genome_aligner/src/GenomeAlignerSettingsController.cpp b/src/plugins/genome_aligner/src/GenomeAlignerSettingsController.cpp
index 56bd95a..d9b8821 100644
--- a/src/plugins/genome_aligner/src/GenomeAlignerSettingsController.cpp
+++ b/src/plugins/genome_aligner/src/GenomeAlignerSettingsController.cpp
@@ -73,7 +73,7 @@ AppSettingsGUIPageWidget* GenomeAlignerSettingsPageController::createWidget(AppS
return r;
}
-const QString GenomeAlignerSettingsPageController::helpPageId = QString("17470452");
+const QString GenomeAlignerSettingsPageController::helpPageId = QString("18220312");
GenomeAlignerSettingsPageWidget::GenomeAlignerSettingsPageWidget(GenomeAlignerSettingsPageController* ) {
setupUi(this);
diff --git a/src/plugins/opencl_support/src/OpenCLSupportPlugin.cpp b/src/plugins/opencl_support/src/OpenCLSupportPlugin.cpp
index cc5fa3a..16474c1 100644
--- a/src/plugins/opencl_support/src/OpenCLSupportPlugin.cpp
+++ b/src/plugins/opencl_support/src/OpenCLSupportPlugin.cpp
@@ -42,7 +42,7 @@ extern "C" Q_DECL_EXPORT Plugin * U2_PLUGIN_INIT_FUNC() {
extern "C" Q_DECL_EXPORT bool U2_PLUGIN_VERIFY_FUNC() {
{
- volatile OpenCLSupportPlugin plug;
+ volatile OpenCLSupportPlugin plug();
Q_UNUSED(plug);
}
return true;
diff --git a/src/plugins/opencl_support/src/OpenCLSupportSettingsController.cpp b/src/plugins/opencl_support/src/OpenCLSupportSettingsController.cpp
index 65204c1..7a9ebe0 100644
--- a/src/plugins/opencl_support/src/OpenCLSupportSettingsController.cpp
+++ b/src/plugins/opencl_support/src/OpenCLSupportSettingsController.cpp
@@ -74,7 +74,7 @@ AppSettingsGUIPageWidget * OpenCLSupportSettingsPageController::createWidget( Ap
return w;
}
-const QString OpenCLSupportSettingsPageController::helpPageId = QString("17470450");
+const QString OpenCLSupportSettingsPageController::helpPageId = QString("18220310");
OpenCLSupportSettingsPageState::OpenCLSupportSettingsPageState( int num_gpus ) {
assert( num_gpus >= 0 );
diff --git a/src/plugins/opencl_support/transl/english.ts b/src/plugins/opencl_support/transl/english.ts
index 26e8481..6b71ff3 100644
--- a/src/plugins/opencl_support/transl/english.ts
+++ b/src/plugins/opencl_support/transl/english.ts
@@ -4,47 +4,52 @@
<context>
<name>U2::OpenCLSupportPlugin</name>
<message>
- <location filename="../src/OpenCLSupportPlugin.cpp" line="45"/>
+ <location filename="../src/OpenCLSupportPlugin.cpp" line="51"/>
+ <source>Problem occurred loading the OpenCL driver. Please try to update drivers if you're going to make calculations on your video card. For details see this page: <a href="%1">%1</a></source>
+ <translation type="unfinished"></translation>
+ </message>
+ <message>
+ <location filename="../src/OpenCLSupportPlugin.cpp" line="58"/>
<source>OpenCL Support</source>
<translation>OpenCL Support</translation>
</message>
<message>
- <location filename="../src/OpenCLSupportPlugin.cpp" line="46"/>
+ <location filename="../src/OpenCLSupportPlugin.cpp" line="59"/>
<source>Plugin provides support for OpenCL-enabled GPUs.</source>
<translation>Plugin provides support for OpenCL-enabled GPUs.</translation>
</message>
<message>
- <location filename="../src/OpenCLSupportPlugin.cpp" line="88"/>
+ <location filename="../src/OpenCLSupportPlugin.cpp" line="105"/>
<source>Cannot load OpenCL driver dynamic library.<p> Install the latest video GPU driver.</source>
<translation>Cannot load OpenCL driver dynamic library.<p> Install the latest video GPU driver.</translation>
</message>
<message>
- <location filename="../src/OpenCLSupportPlugin.cpp" line="92"/>
+ <location filename="../src/OpenCLSupportPlugin.cpp" line="109"/>
<source>An error has occurred while obtaining information about installed OpenCL GPUs.<br> See OpenCL Support plugin log for details.</source>
<translation>An error has occurred while obtaining information about installed OpenCL GPUs.<br> See OpenCL Support plugin log for details.</translation>
</message>
<message>
- <location filename="../src/OpenCLSupportPlugin.cpp" line="110"/>
+ <location filename="../src/OpenCLSupportPlugin.cpp" line="127"/>
<source>Initializing OpenCL</source>
<translation>Initializing OpenCL</translation>
</message>
<message>
- <location filename="../src/OpenCLSupportPlugin.cpp" line="121"/>
+ <location filename="../src/OpenCLSupportPlugin.cpp" line="138"/>
<source>Number of OpenCL platforms: %1</source>
<translation>Number of OpenCL platforms: %1</translation>
</message>
<message>
- <location filename="../src/OpenCLSupportPlugin.cpp" line="136"/>
+ <location filename="../src/OpenCLSupportPlugin.cpp" line="153"/>
<source>Number of OpenCL devices: %1</source>
<translation>Number of OpenCL devices: %1</translation>
</message>
<message>
- <location filename="../src/OpenCLSupportPlugin.cpp" line="237"/>
+ <location filename="../src/OpenCLSupportPlugin.cpp" line="254"/>
<source>Registering OpenCL-enabled GPU: %1, global mem: %2 Mb, local mem: %3 Kb, max compute units: %4, max work group size: %5, max frequency: %6 Hz</source>
<translation>Registering OpenCL-enabled GPU: %1, global mem: %2 Mb, local mem: %3 Kb, max compute units: %4, max work group size: %5, max frequency: %6 Hz</translation>
</message>
<message>
- <location filename="../src/OpenCLSupportPlugin.cpp" line="255"/>
+ <location filename="../src/OpenCLSupportPlugin.cpp" line="272"/>
<source>OpenCL error code (%1)</source>
<translation>OpenCL error code (%1)</translation>
</message>
diff --git a/src/plugins/opencl_support/transl/russian.ts b/src/plugins/opencl_support/transl/russian.ts
index 406c12b..05978a9 100644
--- a/src/plugins/opencl_support/transl/russian.ts
+++ b/src/plugins/opencl_support/transl/russian.ts
@@ -4,47 +4,52 @@
<context>
<name>U2::OpenCLSupportPlugin</name>
<message>
- <location filename="../src/OpenCLSupportPlugin.cpp" line="45"/>
+ <location filename="../src/OpenCLSupportPlugin.cpp" line="51"/>
+ <source>Problem occurred loading the OpenCL driver. Please try to update drivers if you're going to make calculations on your video card. For details see this page: <a href="%1">%1</a></source>
+ <translation type="unfinished"></translation>
+ </message>
+ <message>
+ <location filename="../src/OpenCLSupportPlugin.cpp" line="58"/>
<source>OpenCL Support</source>
<translation>Поддержка OpenCL</translation>
</message>
<message>
- <location filename="../src/OpenCLSupportPlugin.cpp" line="46"/>
+ <location filename="../src/OpenCLSupportPlugin.cpp" line="59"/>
<source>Plugin provides support for OpenCL-enabled GPUs.</source>
<translation>Плагин поддерживает OpenCL GPUs.</translation>
</message>
<message>
- <location filename="../src/OpenCLSupportPlugin.cpp" line="88"/>
+ <location filename="../src/OpenCLSupportPlugin.cpp" line="105"/>
<source>Cannot load OpenCL driver dynamic library.<p> Install the latest video GPU driver.</source>
<translation>Невозможно загрузить драйвер для OpenCL.<p> Установите последний GPU драйвер.</translation>
</message>
<message>
- <location filename="../src/OpenCLSupportPlugin.cpp" line="92"/>
+ <location filename="../src/OpenCLSupportPlugin.cpp" line="109"/>
<source>An error has occurred while obtaining information about installed OpenCL GPUs.<br> See OpenCL Support plugin log for details.</source>
<translation>Произошла ошибка в процессе получения информации об установленных OpenCL GPU.<br> Посмотрите лог поддержки OpenCL чтобы получить детали.</translation>
</message>
<message>
- <location filename="../src/OpenCLSupportPlugin.cpp" line="110"/>
+ <location filename="../src/OpenCLSupportPlugin.cpp" line="127"/>
<source>Initializing OpenCL</source>
<translation>Инициализация OpenCL</translation>
</message>
<message>
- <location filename="../src/OpenCLSupportPlugin.cpp" line="121"/>
+ <location filename="../src/OpenCLSupportPlugin.cpp" line="138"/>
<source>Number of OpenCL platforms: %1</source>
<translation>Число OpenCL платформ: %1</translation>
</message>
<message>
- <location filename="../src/OpenCLSupportPlugin.cpp" line="136"/>
+ <location filename="../src/OpenCLSupportPlugin.cpp" line="153"/>
<source>Number of OpenCL devices: %1</source>
<translation>Число OpenCL устройств: %1</translation>
</message>
<message>
- <location filename="../src/OpenCLSupportPlugin.cpp" line="237"/>
+ <location filename="../src/OpenCLSupportPlugin.cpp" line="254"/>
<source>Registering OpenCL-enabled GPU: %1, global mem: %2 Mb, local mem: %3 Kb, max compute units: %4, max work group size: %5, max frequency: %6 Hz</source>
<translation>Зарегистрирован OpenCL GPU: %1, глобальная память: %2 Mb, локальная память: %3 Kb, максимальное количество юнитов: %4, максимальный размер рабочей группы: %5, максимальная частота: %6 Hz</translation>
</message>
<message>
- <location filename="../src/OpenCLSupportPlugin.cpp" line="255"/>
+ <location filename="../src/OpenCLSupportPlugin.cpp" line="272"/>
<source>OpenCL error code (%1)</source>
<translation>Код ошибки OpenCL (%1)</translation>
</message>
diff --git a/src/plugins/orf_marker/src/ORFDialog.cpp b/src/plugins/orf_marker/src/ORFDialog.cpp
index 32dfa55..b863109 100644
--- a/src/plugins/orf_marker/src/ORFDialog.cpp
+++ b/src/plugins/orf_marker/src/ORFDialog.cpp
@@ -79,7 +79,7 @@ ORFDialog::ORFDialog(ADVSequenceObjectContext* _ctx)
: QDialog(_ctx->getAnnotatedDNAView()->getWidget()), aaUpdateTask(NULL)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470658");
+ new HelpButton(this, buttonBox, "18220518");
tabWidget->setCurrentIndex(0);
diff --git a/src/plugins/pcr/src/EditPrimerDialog.cpp b/src/plugins/pcr/src/EditPrimerDialog.cpp
index 760a670..91fec33 100644
--- a/src/plugins/pcr/src/EditPrimerDialog.cpp
+++ b/src/plugins/pcr/src/EditPrimerDialog.cpp
@@ -50,7 +50,7 @@ EditPrimerDialog::EditPrimerDialog(QWidget *parent, const Primer &editToPrimer)
void EditPrimerDialog::init() {
GCOUNTER(cvar, tvar, "Add primer in library");
setupUi(this);
- new HelpButton(this, buttonBox, "17470749");
+ new HelpButton(this, buttonBox, "18220609");
primerEdit->setValidator(new PrimerValidator(this));
diff --git a/src/plugins/pcr/src/InSilicoPcrOPWidgetFactory.cpp b/src/plugins/pcr/src/InSilicoPcrOPWidgetFactory.cpp
index bb66096..bd7164b 100644
--- a/src/plugins/pcr/src/InSilicoPcrOPWidgetFactory.cpp
+++ b/src/plugins/pcr/src/InSilicoPcrOPWidgetFactory.cpp
@@ -30,7 +30,7 @@
namespace U2 {
-const QString InSilicoPcrOPWidgetFactory::GROUP_DOC_PAGE = "17470747";
+const QString InSilicoPcrOPWidgetFactory::GROUP_DOC_PAGE = "18220607";
InSilicoPcrOPWidgetFactory::InSilicoPcrOPWidgetFactory()
: OPWidgetFactory()
diff --git a/src/plugins/pcr/src/PrimerLibrarySelector.cpp b/src/plugins/pcr/src/PrimerLibrarySelector.cpp
index 5953dbe..e067c1c 100644
--- a/src/plugins/pcr/src/PrimerLibrarySelector.cpp
+++ b/src/plugins/pcr/src/PrimerLibrarySelector.cpp
@@ -36,7 +36,7 @@ PrimerLibrarySelector::PrimerLibrarySelector(QWidget *parent)
{
GCOUNTER(cvar, tvar, "PrimerLibrarySelector");
setupUi(this);
- new HelpButton(this, buttonBox, "17470747");
+ new HelpButton(this, buttonBox, "18220607");
connect(primerTable, SIGNAL(doubleClicked(const QModelIndex &)), SLOT(accept()));
connect(primerTable->selectionModel(), SIGNAL(selectionChanged(const QItemSelection &, const QItemSelection &)), SLOT(sl_selectionChanged()));
diff --git a/src/plugins/pcr/src/PrimerLibraryWidget.cpp b/src/plugins/pcr/src/PrimerLibraryWidget.cpp
index 6b00961..67d3e70 100644
--- a/src/plugins/pcr/src/PrimerLibraryWidget.cpp
+++ b/src/plugins/pcr/src/PrimerLibraryWidget.cpp
@@ -52,7 +52,7 @@ PrimerLibraryWidget::PrimerLibraryWidget(QWidget *parent)
: QWidget(parent), editPrimerButton(NULL), removePrimersButton(NULL)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470749");
+ new HelpButton(this, buttonBox, "18220609");
QPushButton *newPrimerButton = buttonBox->addButton(tr("New primer"), QDialogButtonBox::ActionRole);
connect(newPrimerButton, SIGNAL(clicked()), SLOT(sl_newPrimer()));
diff --git a/src/plugins/pcr/src/PrimersDetailsDialog.cpp b/src/plugins/pcr/src/PrimersDetailsDialog.cpp
index 5836636..4a00a55 100644
--- a/src/plugins/pcr/src/PrimersDetailsDialog.cpp
+++ b/src/plugins/pcr/src/PrimersDetailsDialog.cpp
@@ -32,7 +32,7 @@ PrimersDetailsDialog::PrimersDetailsDialog(QWidget *parent, const QString &detai
{
GCOUNTER(cvar, tvar, "PrimersDetailsDialog");
setupUi(this);
- new HelpButton(this, buttonBox, "17470748");
+ new HelpButton(this, buttonBox, "18220608");
textEdit->setText(details);
}
diff --git a/src/plugins/pcr/src/export/ExportPrimersDialog.cpp b/src/plugins/pcr/src/export/ExportPrimersDialog.cpp
index 8a73ccb..bacd267 100644
--- a/src/plugins/pcr/src/export/ExportPrimersDialog.cpp
+++ b/src/plugins/pcr/src/export/ExportPrimersDialog.cpp
@@ -61,7 +61,7 @@ ExportPrimersDialog::ExportPrimersDialog(const QList<Primer> &primers, QWidget *
primers(primers)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470749");
+ new HelpButton(this, buttonBox, "18220609");
init();
connectSignals();
diff --git a/src/plugins/pcr/src/import/ImportPrimersDialog.cpp b/src/plugins/pcr/src/import/ImportPrimersDialog.cpp
index f7253eb..799e762 100644
--- a/src/plugins/pcr/src/import/ImportPrimersDialog.cpp
+++ b/src/plugins/pcr/src/import/ImportPrimersDialog.cpp
@@ -49,7 +49,7 @@ ImportPrimersDialog::ImportPrimersDialog(QWidget *parent) :
waitForConnection(false)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470749");
+ new HelpButton(this, buttonBox, "18220609");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Import"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
init();
diff --git a/src/plugins/pcr/transl/russian.ts b/src/plugins/pcr/transl/russian.ts
index 23344b3..6032066 100644
--- a/src/plugins/pcr/transl/russian.ts
+++ b/src/plugins/pcr/transl/russian.ts
@@ -926,7 +926,7 @@
<message>
<location filename="../src/PcrPlugin.cpp" line="62"/>
<source>Primer library</source>
- <translation>Библиотека праймеров</translation>
+ <translation>Библиотека олигонуклеотидов</translation>
</message>
</context>
<context>
@@ -970,7 +970,7 @@
<message>
<location filename="../src/PrimerLibraryMdiWindow.cpp" line="38"/>
<source>Primer Library</source>
- <translation>Библиотека праймеров</translation>
+ <translation>Библиотека олигонуклеотидов</translation>
</message>
</context>
<context>
@@ -1022,8 +1022,8 @@
<location filename="../src/PrimerLibraryTable.cpp" line="215"/>
<source>Your primer library is empty.
Use "Tools -> Primer -> Primer Library" for managing the library.</source>
- <translation>Ваша библиотека праймеров пуста.
-Используйте "Инструменты -> Праймер -> Библиотека праймеров" для управления библиотекой праймеров.</translation>
+ <translation>Ваша Библиотека олигонуклеотидов пуста.
+Используйте "Инструменты -> Праймер -> Библиотека олигонуклеотидов" для управления библиотекой праймеров.</translation>
</message>
</context>
<context>
diff --git a/src/plugins/query_designer/src/QDRunDialog.cpp b/src/plugins/query_designer/src/QDRunDialog.cpp
index 2b622b3..e4bc09b 100644
--- a/src/plugins/query_designer/src/QDRunDialog.cpp
+++ b/src/plugins/query_designer/src/QDRunDialog.cpp
@@ -79,7 +79,7 @@ QDRunDialog::QDRunDialog(QDScheme* _scheme, QWidget* parent, const QString& defa
saveController(NULL)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470365");
+ new HelpButton(this, buttonBox, "18220225");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Run"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
@@ -293,7 +293,7 @@ QList<Task*> QDRunDialogTask::onSubTaskFinished(Task* subTask) {
QDDialog::QDDialog(ADVSequenceObjectContext* _ctx)
: QDialog(_ctx->getAnnotatedDNAView()->getWidget()), ctx(_ctx), scheme(NULL), txtDoc(NULL) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470366");
+ new HelpButton(this, buttonBox, "18220226");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Search"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/query_designer/src/QueryViewController.cpp b/src/plugins/query_designer/src/QueryViewController.cpp
index a22e71c..7360f6d 100644
--- a/src/plugins/query_designer/src/QueryViewController.cpp
+++ b/src/plugins/query_designer/src/QueryViewController.cpp
@@ -1147,7 +1147,7 @@ AddConstraintDialog::AddConstraintDialog(QueryScene* _scene, QDDistanceType _kin
QDElement* defSrc, QDElement* defDst)
: scene(_scene), kind(_kind) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470350");
+ new HelpButton(this, buttonBox, "18220210");
QString title = "Add %1 Constraint";
switch (kind)
diff --git a/src/plugins/remote_blast/src/SendSelectionDialog.cpp b/src/plugins/remote_blast/src/SendSelectionDialog.cpp
index ea6e391..6943bcf 100644
--- a/src/plugins/remote_blast/src/SendSelectionDialog.cpp
+++ b/src/plugins/remote_blast/src/SendSelectionDialog.cpp
@@ -137,7 +137,7 @@ SendSelectionDialog::SendSelectionDialog(const U2SequenceObject* dnaso, bool _is
ca_m.sequenceLen = dnaso->getSequenceLength();
ca_c = new CreateAnnotationWidgetController(ca_m, this);
setupUi(this);
- new HelpButton(this, buttonBox, "17470659");
+ new HelpButton(this, buttonBox, "18220519");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Search"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/repeat_finder/src/FindRepeatsDialog.cpp b/src/plugins/repeat_finder/src/FindRepeatsDialog.cpp
index 44e7515..9981e93 100644
--- a/src/plugins/repeat_finder/src/FindRepeatsDialog.cpp
+++ b/src/plugins/repeat_finder/src/FindRepeatsDialog.cpp
@@ -78,7 +78,7 @@ FindRepeatsDialog::FindRepeatsDialog(ADVSequenceObjectContext* _sc)
{
sc = _sc;
setupUi(this);
- new HelpButton(this, buttonBox, "17470663");
+ new HelpButton(this, buttonBox, "18220523");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Start"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/repeat_finder/src/FindTandemsDialog.cpp b/src/plugins/repeat_finder/src/FindTandemsDialog.cpp
index e5fab6f..911666d 100644
--- a/src/plugins/repeat_finder/src/FindTandemsDialog.cpp
+++ b/src/plugins/repeat_finder/src/FindTandemsDialog.cpp
@@ -64,7 +64,7 @@ FindTandemsDialog::FindTandemsDialog(ADVSequenceObjectContext* _sc)
{
sc = _sc;
setupUi(this);
- new HelpButton(this, buttonBox, "17470664");
+ new HelpButton(this, buttonBox, "18220524");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Start"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/weight_matrix/src/PWMBuildDialogController.cpp b/src/plugins/weight_matrix/src/PWMBuildDialogController.cpp
index 9ecc13c..884f0d7 100644
--- a/src/plugins/weight_matrix/src/PWMBuildDialogController.cpp
+++ b/src/plugins/weight_matrix/src/PWMBuildDialogController.cpp
@@ -64,7 +64,7 @@ PWMBuildDialogController::PWMBuildDialogController(QWidget* w)
logoArea(NULL) {
task = NULL;
setupUi(this);
- new HelpButton(this, buttonBox, "17470719");
+ new HelpButton(this, buttonBox, "18220579");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Start"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/weight_matrix/src/PWMJASPARDialogController.cpp b/src/plugins/weight_matrix/src/PWMJASPARDialogController.cpp
index c3ff50c..db16f85 100644
--- a/src/plugins/weight_matrix/src/PWMJASPARDialogController.cpp
+++ b/src/plugins/weight_matrix/src/PWMJASPARDialogController.cpp
@@ -38,7 +38,7 @@ namespace U2 {
PWMJASPARDialogController::PWMJASPARDialogController(QWidget *w)
: QDialog(w) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470718");
+ new HelpButton(this, buttonBox, "18220578");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Select"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/weight_matrix/src/PWMSearchDialogController.cpp b/src/plugins/weight_matrix/src/PWMSearchDialogController.cpp
index 4fdbac5..fd8a21c 100644
--- a/src/plugins/weight_matrix/src/PWMSearchDialogController.cpp
+++ b/src/plugins/weight_matrix/src/PWMSearchDialogController.cpp
@@ -108,7 +108,7 @@ public:
PWMSearchDialogController::PWMSearchDialogController(ADVSequenceObjectContext* _ctx, QWidget *p):QDialog(p) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470717");
+ new HelpButton(this, buttonBox, "18220577");
buttonBox->button(QDialogButtonBox::Yes)->setText(tr("Add to queue"));
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Search"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins/weight_matrix/src/SetParametersDialogController.cpp b/src/plugins/weight_matrix/src/SetParametersDialogController.cpp
index c1a5a2b..aad27f0 100644
--- a/src/plugins/weight_matrix/src/SetParametersDialogController.cpp
+++ b/src/plugins/weight_matrix/src/SetParametersDialogController.cpp
@@ -37,7 +37,7 @@ SetParametersDialogController::SetParametersDialogController(QWidget *w)
: QDialog(w) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470717");
+ new HelpButton(this, buttonBox, "18220577");
QStringList algo = AppContext::getPWMConversionAlgorithmRegistry()->getAlgorithmIds();
algorithmComboBox->addItems(algo);
diff --git a/src/plugins/weight_matrix/src/ViewMatrixDialogController.cpp b/src/plugins/weight_matrix/src/ViewMatrixDialogController.cpp
index 7cfd32a..5d7b2e9 100644
--- a/src/plugins/weight_matrix/src/ViewMatrixDialogController.cpp
+++ b/src/plugins/weight_matrix/src/ViewMatrixDialogController.cpp
@@ -183,7 +183,7 @@ ViewMatrixDialogController::ViewMatrixDialogController(PFMatrix matrix, QWidget
ViewMatrixDialogController::ViewMatrixDialogController(PWMatrix matrix, QWidget *w): QDialog(w){
setupUi(this);
- new HelpButton(this, buttonBox, "17470719");
+ new HelpButton(this, buttonBox, "18220579");
buttonBox->button(QDialogButtonBox::Close)->setText(tr("Close"));
ml = new MatrixAndLogoController(matrix, this);
diff --git a/src/plugins/workflow_designer/src/CreateScriptWorker.cpp b/src/plugins/workflow_designer/src/CreateScriptWorker.cpp
index d6cf5c5..2272c15 100644
--- a/src/plugins/workflow_designer/src/CreateScriptWorker.cpp
+++ b/src/plugins/workflow_designer/src/CreateScriptWorker.cpp
@@ -358,7 +358,7 @@ private:
CreateScriptElementDialog::CreateScriptElementDialog(QWidget *p, ActorPrototype* proto): QDialog(p), editing(false) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470841");
+ new HelpButton(this, buttonBox, "18220701");
inputList->setModel(new CfgListModel());
inputList->setItemDelegate(new ProxyDelegate());
diff --git a/src/plugins/workflow_designer/src/DashboardsManagerDialog.cpp b/src/plugins/workflow_designer/src/DashboardsManagerDialog.cpp
index 04bfa8d..71232e7 100644
--- a/src/plugins/workflow_designer/src/DashboardsManagerDialog.cpp
+++ b/src/plugins/workflow_designer/src/DashboardsManagerDialog.cpp
@@ -45,7 +45,7 @@ DashboardsManagerDialog::DashboardsManagerDialog(ScanDashboardsDirTask *_task, Q
: QDialog(parent), task(_task)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470839");
+ new HelpButton(this, buttonBox, "18220699");
setupList();
diff --git a/src/plugins/workflow_designer/src/ImportSchemaDialog.cpp b/src/plugins/workflow_designer/src/ImportSchemaDialog.cpp
index 944d896..a2bc1f1 100644
--- a/src/plugins/workflow_designer/src/ImportSchemaDialog.cpp
+++ b/src/plugins/workflow_designer/src/ImportSchemaDialog.cpp
@@ -31,7 +31,7 @@ namespace U2 {
ImportSchemaDialog::ImportSchemaDialog(QWidget* p) : QDialog(p) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470853");
+ new HelpButton(this, buttonBox, "18220713");
}
void ImportSchemaDialog::accept() {
diff --git a/src/plugins/workflow_designer/src/PortAliasesConfigurationDialog.cpp b/src/plugins/workflow_designer/src/PortAliasesConfigurationDialog.cpp
index 661c663..b89e966 100644
--- a/src/plugins/workflow_designer/src/PortAliasesConfigurationDialog.cpp
+++ b/src/plugins/workflow_designer/src/PortAliasesConfigurationDialog.cpp
@@ -39,7 +39,7 @@ namespace Workflow {
PortAliasesConfigurationDialog::PortAliasesConfigurationDialog( const Schema & schema, QWidget * p )
: QDialog(p), portNameMaxSz(0), currentRow(-1) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470853");
+ new HelpButton(this, buttonBox, "18220713");
QPushButton* cancelPushButton = buttonBox->button(QDialogButtonBox::Cancel);
QPushButton* okPushButton = buttonBox->button(QDialogButtonBox::Ok);
diff --git a/src/plugins/workflow_designer/src/SchemaAliasesConfigurationDialogImpl.cpp b/src/plugins/workflow_designer/src/SchemaAliasesConfigurationDialogImpl.cpp
index 33d6bf8..ed55b33 100644
--- a/src/plugins/workflow_designer/src/SchemaAliasesConfigurationDialogImpl.cpp
+++ b/src/plugins/workflow_designer/src/SchemaAliasesConfigurationDialogImpl.cpp
@@ -37,7 +37,7 @@ namespace Workflow {
SchemaAliasesConfigurationDialogImpl::SchemaAliasesConfigurationDialogImpl( const Schema & schema, QWidget * p )
: QDialog(p), procNameMaxSz(0) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470853");
+ new HelpButton(this, buttonBox, "18220713");
QPushButton* cancelPushButton = buttonBox->button(QDialogButtonBox::Cancel);
QPushButton* okPushButton = buttonBox->button(QDialogButtonBox::Ok);
diff --git a/src/plugins/workflow_designer/src/StartupDialog.cpp b/src/plugins/workflow_designer/src/StartupDialog.cpp
index 1fef5f3..46814fc 100644
--- a/src/plugins/workflow_designer/src/StartupDialog.cpp
+++ b/src/plugins/workflow_designer/src/StartupDialog.cpp
@@ -36,7 +36,7 @@ StartupDialog::StartupDialog(QWidget *parent)
: QDialog(parent)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470810");
+ new HelpButton(this, buttonBox, "18220670");
label->setStyleSheet(L10N::infoHintStyleSheet());
diff --git a/src/plugins/workflow_designer/src/WorkflowDesignerPlugin.cpp b/src/plugins/workflow_designer/src/WorkflowDesignerPlugin.cpp
index 896b418..7b78a37 100644
--- a/src/plugins/workflow_designer/src/WorkflowDesignerPlugin.cpp
+++ b/src/plugins/workflow_designer/src/WorkflowDesignerPlugin.cpp
@@ -343,6 +343,8 @@ void WorkflowDesignerService::initSampleActions() {
ngsRawDna.requiredPlugins << externalToolsPlugin;
SampleAction ngsVariants(ToolsMenu::NGS_CALL_VARIANTS, ToolsMenu::NGS_MENU, "NGS/call_variants.uwl", SampleAction::Select, tr("Variant calling"));
ngsVariants.requiredPlugins << externalToolsPlugin;
+ SampleAction ngsVariantsAndEffect(ToolsMenu::NGS_CALL_VARIANTS_AND_EFFECT, ToolsMenu::NGS_MENU, "NGS/call_variants_full.uwl", SampleAction::Select, tr("Variant calling and effects prediction"));
+ ngsVariantsAndEffect.requiredPlugins << externalToolsPlugin;
SampleAction ngsEffect(ToolsMenu::NGS_VARIANT_EFFECT, ToolsMenu::NGS_MENU, "NGS/variation_annotation.uwl", SampleAction::Select, tr("Annotate variants and predict effects"));
ngsEffect.requiredPlugins << externalToolsPlugin;
SampleAction ngsRawRna(ToolsMenu::NGS_RAW_RNA, ToolsMenu::NGS_MENU, "NGS/raw_rna.uwl", SampleAction::Select, tr("Raw RNA-Seq data processing"));
@@ -369,6 +371,7 @@ void WorkflowDesignerService::initSampleActions() {
samples->registerAction(ngsControl);
samples->registerAction(ngsRawDna);
samples->registerAction(ngsVariants);
+ samples->registerAction(ngsVariantsAndEffect);
samples->registerAction(ngsEffect);
samples->registerAction(ngsRawRna);
samples->registerAction(ngsRna);
diff --git a/src/plugins/workflow_designer/src/WorkflowMetaDialog.cpp b/src/plugins/workflow_designer/src/WorkflowMetaDialog.cpp
index bbe193e..23dfc1d 100644
--- a/src/plugins/workflow_designer/src/WorkflowMetaDialog.cpp
+++ b/src/plugins/workflow_designer/src/WorkflowMetaDialog.cpp
@@ -42,7 +42,7 @@ WorkflowMetaDialog::WorkflowMetaDialog(QWidget * p, const Metadata& meta)
meta(meta),
saveController(NULL) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470832");
+ new HelpButton(this, buttonBox, "18220692");
cancelButton = buttonBox->button(QDialogButtonBox::Cancel);
okButton = buttonBox->button(QDialogButtonBox::Ok);
diff --git a/src/plugins/workflow_designer/src/WorkflowSettingsController.cpp b/src/plugins/workflow_designer/src/WorkflowSettingsController.cpp
index 5c89896..42d2eab 100644
--- a/src/plugins/workflow_designer/src/WorkflowSettingsController.cpp
+++ b/src/plugins/workflow_designer/src/WorkflowSettingsController.cpp
@@ -75,7 +75,7 @@ AppSettingsGUIPageWidget* WorkflowSettingsPageController::createWidget(AppSettin
return r;
}
-const QString WorkflowSettingsPageController::helpPageId = QString("17470451");
+const QString WorkflowSettingsPageController::helpPageId = QString("18220311");
WorkflowSettingsPageWidget::WorkflowSettingsPageWidget(WorkflowSettingsPageController* ) {
setupUi(this);
diff --git a/src/plugins/workflow_designer/src/WorkflowViewController.cpp b/src/plugins/workflow_designer/src/WorkflowViewController.cpp
index 5a61fa9..31fdbf2 100644
--- a/src/plugins/workflow_designer/src/WorkflowViewController.cpp
+++ b/src/plugins/workflow_designer/src/WorkflowViewController.cpp
@@ -1063,6 +1063,7 @@ void WorkflowView::removeProcessItem(WorkflowProcessItem *item) {
scene->setModified();
schema->removeProcess(actor);
+ meta.removeActorMeta(actor->getId());
delete actor;
removeWizards();
diff --git a/src/plugins/workflow_designer/src/library/BaseDocWriter.cpp b/src/plugins/workflow_designer/src/library/BaseDocWriter.cpp
index ab57b13..811c4f8 100644
--- a/src/plugins/workflow_designer/src/library/BaseDocWriter.cpp
+++ b/src/plugins/workflow_designer/src/library/BaseDocWriter.cpp
@@ -302,6 +302,7 @@ Document * BaseDocWriter::getDocument(IOAdapter *io, U2OpStatus &os) {
Document *doc = format->createNewLoadedDocument(io->getFactory(), io->getURL(), os, hints);
CHECK_OP(os, NULL);
+ doc->setDocumentOwnsDbiResources(false);
docs[io] = doc;
return doc;
}
diff --git a/src/plugins/workflow_designer/src/library/ConvertSnpeffVariationsToAnnotationsWorker.cpp b/src/plugins/workflow_designer/src/library/ConvertSnpeffVariationsToAnnotationsWorker.cpp
new file mode 100644
index 0000000..7054092
--- /dev/null
+++ b/src/plugins/workflow_designer/src/library/ConvertSnpeffVariationsToAnnotationsWorker.cpp
@@ -0,0 +1,177 @@
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <U2Core/AppContext.h>
+#include <U2Core/AnnotationTableObject.h>
+#include <U2Core/BaseDocumentFormats.h>
+#include <U2Core/GUrlUtils.h>
+#include <U2Core/L10n.h>
+#include <U2Core/TaskSignalMapper.h>
+
+#include <U2Designer/DelegateEditors.h>
+
+#include <U2Formats/ConvertSnpeffVariationsToAnnotationsTask.h>
+
+#include <U2Lang/ActorPrototypeRegistry.h>
+#include <U2Lang/BaseActorCategories.h>
+#include <U2Lang/BaseAttributes.h>
+#include <U2Lang/BasePorts.h>
+#include <U2Lang/BaseSlots.h>
+#include <U2Lang/BaseTypes.h>
+#include <U2Lang/URLAttribute.h>
+#include <U2Lang/WorkflowEnv.h>
+#include <U2Lang/WorkflowMonitor.h>
+
+#include "ConvertSnpeffVariationsToAnnotationsWorker.h"
+
+namespace U2 {
+namespace LocalWorkflow {
+
+static const QString IN_VARIATIONS_URL_PORT_ID = "in-variations-url";
+
+ConvertSnpeffVariationsToAnnotationsPrompter::ConvertSnpeffVariationsToAnnotationsPrompter(Actor *actor)
+ : PrompterBase<ConvertSnpeffVariationsToAnnotationsPrompter>(actor)
+{
+
+}
+
+QString ConvertSnpeffVariationsToAnnotationsPrompter::composeRichDoc() {
+ IntegralBusPort *input = qobject_cast<IntegralBusPort*>(target->getPort(IN_VARIATIONS_URL_PORT_ID));
+ SAFE_POINT(NULL != input, "No input port", "");
+ const Actor *producer = input->getProducer(BaseSlots::URL_SLOT().getId());
+ const QString unsetStr = "<font color='red'>" + tr("unset") + "</font>";
+ const QString producerName = (NULL != producer) ? producer->getLabel() : unsetStr;
+ return tr("Parses information in variations from <u>%1</u> into annotations.").arg(producerName);
+}
+
+const QString ConvertSnpeffVariationsToAnnotationsFactory::ACTOR_ID = "convert-snpeff-variations-to-annotations";
+
+ConvertSnpeffVariationsToAnnotationsFactory::ConvertSnpeffVariationsToAnnotationsFactory()
+ : DomainFactory(ACTOR_ID)
+{
+
+}
+
+Worker * ConvertSnpeffVariationsToAnnotationsFactory::createWorker(Actor *actor) {
+ return new ConvertSnpeffVariationsToAnnotationsWorker(actor);
+}
+
+void ConvertSnpeffVariationsToAnnotationsFactory::init() {
+ QList<PortDescriptor *> ports;
+ {
+ Descriptor inDesc(IN_VARIATIONS_URL_PORT_ID,
+ ConvertSnpeffVariationsToAnnotationsWorker::tr("Input URL"),
+ ConvertSnpeffVariationsToAnnotationsWorker::tr("Input variation file URL."));
+
+ QMap<Descriptor, DataTypePtr> inType;
+ inType[BaseSlots::URL_SLOT()] = BaseTypes::STRING_TYPE();
+
+ ports << new PortDescriptor(inDesc, DataTypePtr(new MapDataType(ACTOR_ID + "-in", inType)), true /*input*/);
+ }
+
+ DocumentFormatConstraints constraints;
+ constraints.supportedObjectTypes.insert(GObjectTypes::ANNOTATION_TABLE);
+ constraints.addFlagToSupport(DocumentFormatFlag_SupportWriting);
+ constraints.addFlagToExclude(DocumentFormatFlag_CannotBeCreated);
+ QList<DocumentFormatId> supportedFormats = AppContext::getDocumentFormatRegistry()->selectFormats(constraints);
+
+ QList<Attribute *> attributes;
+ {
+ attributes << new Attribute(BaseAttributes::URL_OUT_ATTRIBUTE(), BaseTypes::STRING_TYPE(), false, "");
+
+ const DocumentFormatId format = (supportedFormats.contains(BaseDocumentFormats::PLAIN_GENBANK) ? BaseDocumentFormats::PLAIN_GENBANK : supportedFormats.first());
+ Attribute *documentFormatAttribute = new Attribute(BaseAttributes::DOCUMENT_FORMAT_ATTRIBUTE(), BaseTypes::STRING_TYPE(), false, format);
+ documentFormatAttribute->addRelation(new FileExtensionRelation(BaseAttributes::URL_OUT_ATTRIBUTE().getId()));
+ attributes << documentFormatAttribute;
+ }
+
+ Descriptor desc(ACTOR_ID,
+ ConvertSnpeffVariationsToAnnotationsWorker::tr("Convert SnpEff Variations to Annotations"),
+ ConvertSnpeffVariationsToAnnotationsWorker::tr("Parses information, added to variations by SnpEff, into standard annotations."));
+ ActorPrototype *proto = new IntegralBusActorPrototype(desc, ports, attributes);
+ proto->setPrompter(new ConvertSnpeffVariationsToAnnotationsPrompter(NULL));
+ WorkflowEnv::getProtoRegistry()->registerProto(BaseActorCategories::CATEGORY_VARIATION_ANALYSIS(), proto);
+
+ QMap<QString, PropertyDelegate *> delegates;
+ {
+ DelegateTags tags;
+ tags.set(DelegateTags::PLACEHOLDER_TEXT, ConvertSnpeffVariationsToAnnotationsWorker::tr("Produced from the input file name"));
+ delegates[BaseAttributes::URL_OUT_ATTRIBUTE().getId()] = new URLDelegate(tags, "", "");
+
+ QVariantMap map;
+ foreach (const DocumentFormatId &formatId, supportedFormats) {
+ map[formatId] = formatId;
+ }
+ delegates[BaseAttributes::DOCUMENT_FORMAT_ATTRIBUTE().getId()] = new ComboBoxDelegate(map);
+ }
+ proto->setEditor(new DelegateEditor(delegates));
+
+ DomainFactory *localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
+ localDomain->registerEntry(new ConvertSnpeffVariationsToAnnotationsFactory());
+}
+
+ConvertSnpeffVariationsToAnnotationsWorker::ConvertSnpeffVariationsToAnnotationsWorker(Actor *actor)
+ : BaseWorker(actor)
+{
+
+}
+
+void ConvertSnpeffVariationsToAnnotationsWorker::init() {
+ input = ports.value(IN_VARIATIONS_URL_PORT_ID);
+}
+
+Task * ConvertSnpeffVariationsToAnnotationsWorker::tick() {
+ if (input->hasMessage()) {
+ return createTask(getMessageAndSetupScriptValues(input));
+ } else if (input->isEnded()) {
+ setDone();
+ }
+ return NULL;
+}
+
+void ConvertSnpeffVariationsToAnnotationsWorker::cleanup() {
+
+}
+
+void ConvertSnpeffVariationsToAnnotationsWorker::sl_taskFinished(Task *task) {
+ LoadConvertAndSaveSnpeffVariationsToAnnotationsTask *convertTask = qobject_cast<LoadConvertAndSaveSnpeffVariationsToAnnotationsTask *>(task);
+ SAFE_POINT(NULL != convertTask, L10N::nullPointerError("LoadConvertAndSaveSnpeffVariationsToAnnotationsTask"), );
+ CHECK(!convertTask->hasError() && !convertTask->isCanceled(), );
+ monitor()->addOutputFile(convertTask->getResultUrl(), getActorId());
+}
+
+Task * ConvertSnpeffVariationsToAnnotationsWorker::createTask(const Message &message) {
+ QVariantMap data = message.getData().toMap();
+ const QString variationsFileurl = data[BaseSlots::URL_SLOT().getId()].toString();
+ const QString formatId = actor->getParameter(BaseAttributes::DOCUMENT_FORMAT_ATTRIBUTE().getId())->getAttributeValue<QString>(context);
+ QString annotationsFileUrl = actor->getParameter(BaseAttributes::URL_OUT_ATTRIBUTE().getId())->getAttributeValue<QString>(context);
+ if (annotationsFileUrl.isEmpty()) {
+ annotationsFileUrl = context->getMetadataStorage().get(message.getMetadataId()).getFileUrl();
+ const GUrl sourceUrl = GUrlUtils::changeFileExt(annotationsFileUrl, formatId);
+ annotationsFileUrl = GUrlUtils::rollFileName(context->workingDir() + sourceUrl.baseFileName() + "_variants." + sourceUrl.completeFileSuffix(), "_");
+ }
+ Task *task = new LoadConvertAndSaveSnpeffVariationsToAnnotationsTask(variationsFileurl, context->getDataStorage()->getDbiRef(), annotationsFileUrl, formatId);
+ connect(new TaskSignalMapper(task), SIGNAL(si_taskFinished(Task *)), SLOT(sl_taskFinished(Task *)));
+ return task;
+}
+
+} // namespace LocalWorkflow
+} // namespace U2
diff --git a/src/plugins_3rdparty/hmm3/src/workers/HMM3BuildWorker.h b/src/plugins/workflow_designer/src/library/ConvertSnpeffVariationsToAnnotationsWorker.h
similarity index 53%
rename from src/plugins_3rdparty/hmm3/src/workers/HMM3BuildWorker.h
rename to src/plugins/workflow_designer/src/library/ConvertSnpeffVariationsToAnnotationsWorker.h
index 24563a3..23741e3 100644
--- a/src/plugins_3rdparty/hmm3/src/workers/HMM3BuildWorker.h
+++ b/src/plugins/workflow_designer/src/library/ConvertSnpeffVariationsToAnnotationsWorker.h
@@ -1,73 +1,70 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#ifndef _U2_HMM3_BUILD_WORKER_H_
-#define _U2_HMM3_BUILD_WORKER_H_
-
-#include <U2Lang/LocalDomain.h>
-#include <U2Lang/WorkflowUtils.h>
-
-#include <hmmer3/hmmer.h>
-
-namespace U2 {
-
-namespace LocalWorkflow {
-
-class HMM3BuildPrompter : public PrompterBase<HMM3BuildPrompter> {
- Q_OBJECT
-public:
- HMM3BuildPrompter(Actor* p = 0) : PrompterBase<HMM3BuildPrompter>(p) {}
-protected:
- QString composeRichDoc();
-};
-
-class HMM3BuildWorker : public BaseWorker {
- Q_OBJECT
-public:
- HMM3BuildWorker(Actor* a);
-
- virtual void init();
- virtual bool isReady() const;
- virtual Task* tick();
- virtual void cleanup();
-
-private slots:
- void sl_taskFinished(Task*);
- void sl_taskFinished();
-
-protected:
- IntegralBus *input, *output;
- UHMM3BuildSettings cfg;
-};
-
-class HMM3BuildWorkerFactory : public DomainFactory {
-public:
- static const QString ACTOR;
- static void init();
- static void cleanup();
- HMM3BuildWorkerFactory() : DomainFactory(ACTOR) {}
- virtual Worker* createWorker(Actor* a) {return new HMM3BuildWorker(a);}
-};
-
-} // Workflow namespace
-} // U2 namespace
-
-#endif
+/**
+ * UGENE - Integrated Bioinformatics Tools.
+ * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
+ * http://ugene.unipro.ru
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef _U2_CONVERT_SNPEFF_VARIATIONS_TO_ANNOTATIONS_WORKER_H_
+#define _U2_CONVERT_SNPEFF_VARIATIONS_TO_ANNOTATIONS_WORKER_H_
+
+#include <U2Lang/LocalDomain.h>
+#include <U2Lang/WorkflowUtils.h>
+
+namespace U2 {
+namespace LocalWorkflow {
+
+class ConvertSnpeffVariationsToAnnotationsPrompter : public PrompterBase<ConvertSnpeffVariationsToAnnotationsPrompter> {
+ Q_OBJECT
+public:
+ ConvertSnpeffVariationsToAnnotationsPrompter(Actor *actor);
+
+private:
+ QString composeRichDoc();
+};
+
+class ConvertSnpeffVariationsToAnnotationsFactory : public DomainFactory {
+public:
+ ConvertSnpeffVariationsToAnnotationsFactory();
+ Worker * createWorker(Actor *actor);
+
+ static void init();
+ static const QString ACTOR_ID;
+};
+
+class ConvertSnpeffVariationsToAnnotationsWorker : public BaseWorker {
+ Q_OBJECT
+public:
+ ConvertSnpeffVariationsToAnnotationsWorker(Actor *actor);
+
+ void init();
+ Task * tick();
+ void cleanup();
+
+private slots:
+ void sl_taskFinished(Task *task);
+
+private:
+ Task * createTask(const Message &message);
+
+ IntegralBus *input;
+};
+
+} // namespace LocalWorkflow
+} // namespace U2
+
+#endif // _U2_CONVERT_SNPEFF_VARIATIONS_TO_ANNOTATIONS_WORKER_H_
diff --git a/src/plugins/workflow_designer/src/library/CoreLib.cpp b/src/plugins/workflow_designer/src/library/CoreLib.cpp
index 23663b5..e297095 100644
--- a/src/plugins/workflow_designer/src/library/CoreLib.cpp
+++ b/src/plugins/workflow_designer/src/library/CoreLib.cpp
@@ -50,49 +50,50 @@
#include <U2Lang/WorkflowManager.h>
#include <U2Lang/WorkflowSettings.h>
+#include "AlignToReferenceWorker.h"
+#include "AminoTranslationWorker.h"
+#include "AssemblyToSequenceWorker.h"
#include "BaseDocWriter.h"
#include "CDSearchWorker.h"
+#include "ConvertFilesFormatWorker.h"
+#include "ConvertSnpeffVariationsToAnnotationsWorker.h"
#include "CoreLib.h"
#include "DocActors.h"
#include "DocWorkers.h"
#include "ExternalProcessWorker.h"
+#include "ExtractAssemblyCoverageWorker.h"
+#include "ExtractConsensusWorker.h"
+#include "ExtractMSAConsensusWorker.h"
+#include "FASTQWorkersLibrary.h"
#include "FilterAnnotationsByQualifierWorker.h"
#include "FilterAnnotationsWorker.h"
+#include "FilterBamWorker.h"
#include "FindWorker.h"
#include "GenericReadActor.h"
+#include "GetFileListWorker.h"
+#include "GroupWorker.h"
#include "ImportAnnotationsWorker.h"
#include "MSA2SequenceWorker.h"
+#include "MarkSequenceWorker.h"
+#include "MergeBamWorker.h"
+#include "MultiplexerWorker.h"
+#include "PassFilterWorker.h"
+#include "ReadAnnotationsWorker.h"
+#include "ReadAssemblyWorker.h"
+#include "ReadVariationWorker.h"
+#include "RemoteDBFetcherWorker.h"
+#include "RenameChromosomeInVariationWorker.h"
#include "ReverseComplementWorker.h"
+#include "RmdupBamWorker.h"
#include "ScriptWorker.h"
#include "SequenceSplitWorker.h"
#include "SequencesToMSAWorker.h"
+#include "SortBamWorker.h"
#include "StatisticWorkers.h"
#include "Text2SequenceWorker.h"
-#include "library/AlignToReferenceWorker.h"
-#include "library/AminoTranslationWorker.h"
-#include "library/AssemblyToSequenceWorker.h"
-#include "library/ConvertFilesFormatWorker.h"
-#include "library/ExtractAssemblyCoverageWorker.h"
-#include "library/ExtractConsensusWorker.h"
-#include "library/ExtractMSAConsensusWorker.h"
-#include "library/FASTQWorkersLibrary.h"
-#include "library/FilterBamWorker.h"
-#include "library/GetFileListWorker.h"
-#include "library/GroupWorker.h"
-#include "library/MarkSequenceWorker.h"
-#include "library/MergeBamWorker.h"
-#include "library/MultiplexerWorker.h"
-#include "library/PassFilterWorker.h"
-#include "library/ReadAnnotationsWorker.h"
-#include "library/ReadAssemblyWorker.h"
-#include "library/ReadVariationWorker.h"
-#include "library/RemoteDBFetcherWorker.h"
-#include "library/RenameChromosomeInVariationWorker.h"
-#include "library/RmdupBamWorker.h"
-#include "library/SortBamWorker.h"
-#include "library/WriteAnnotationsWorker.h"
-#include "library/WriteAssemblyWorkers.h"
-#include "library/WriteVariationWorker.h"
+#include "WriteAnnotationsWorker.h"
+#include "WriteAssemblyWorkers.h"
+#include "WriteVariationWorker.h"
#include "util/WriteSequenceValidator.h"
namespace U2 {
@@ -289,6 +290,7 @@ void CoreLib::init() {
CASAVAFilterWorkerFactory::init();
CDSearchWorkerFactory::init();
ConvertFilesFormatWorkerFactory::init();
+ ConvertSnpeffVariationsToAnnotationsFactory::init();
DNAStatWorkerFactory::init();
DataWorkerFactory::init();
ExtractAssemblyCoverageWorkerFactory::init();
diff --git a/src/plugins/workflow_designer/src/library/FASTQWorkersLibrary.cpp b/src/plugins/workflow_designer/src/library/FASTQWorkersLibrary.cpp
index 69ef73d..3c454c3 100644
--- a/src/plugins/workflow_designer/src/library/FASTQWorkersLibrary.cpp
+++ b/src/plugins/workflow_designer/src/library/FASTQWorkersLibrary.cpp
@@ -172,7 +172,10 @@ void CASAVAFilterTask::runStep(){
//1:N:0:TAAGGG
QRegExp pattern (":Y:[^:]:");
- FASTQIterator iter(settings.inputUrl);
+ FASTQIterator iter(settings.inputUrl, stateInfo);
+ if (stateInfo.hasError()) {
+ return;
+ }
while(iter.hasNext()){
if(stateInfo.isCoR()){
return;
@@ -346,7 +349,10 @@ void QualityTrimTask::runStep(){
int minLen = settings.customParameters.value(LEN_ID, 0).toInt();
bool bothEnds = settings.customParameters.value(BOTH_ID, false).toInt();
- FASTQIterator iter(settings.inputUrl);
+ FASTQIterator iter(settings.inputUrl, stateInfo);
+ if (stateInfo.hasError()) {
+ return;
+ }
while(iter.hasNext()){
if(stateInfo.isCoR()){
return;
@@ -553,7 +559,10 @@ void MergeFastqTask::runStep(){
qint64 numberOfFiles = 0;
foreach (QString url, urls){
- FASTQIterator iter(url);
+ FASTQIterator iter(url, stateInfo);
+ if (stateInfo.hasError()) {
+ return;
+ }
while(iter.hasNext()){
if(stateInfo.isCoR()){
return;
diff --git a/src/plugins/workflow_designer/src/library/FilterBamWorker.cpp b/src/plugins/workflow_designer/src/library/FilterBamWorker.cpp
index 0cf4a8b..dce2aee 100644
--- a/src/plugins/workflow_designer/src/library/FilterBamWorker.cpp
+++ b/src/plugins/workflow_designer/src/library/FilterBamWorker.cpp
@@ -256,7 +256,8 @@ Task * FilterBamWorker::tick() {
setting.skipFilter = getHexValueByFilterString(getValue<QString>(FLAG_ID), getFilterCodes());
setting.regionFilter = getValue<QString>(REGION_ID);
- Task *t = new SamtoolsViewFilterTask(setting);
+ ExternalToolSupportTask *t = new SamtoolsViewFilterTask(setting);
+ t->addListeners(createLogListeners());
connect(new TaskSignalMapper(t), SIGNAL(si_taskFinished(Task*)), SLOT(sl_taskFinished(Task*)));
return t;
}
diff --git a/src/plugins/workflow_designer/src/library/PassFilterWorker.cpp b/src/plugins/workflow_designer/src/library/PassFilterWorker.cpp
index c475aa3..f265286 100644
--- a/src/plugins/workflow_designer/src/library/PassFilterWorker.cpp
+++ b/src/plugins/workflow_designer/src/library/PassFilterWorker.cpp
@@ -98,7 +98,7 @@ void PassFilterWorkerFactory::init() {
Descriptor passVals(BaseSlots::TEXT_SLOT().getId(),
PassFilterWorker::tr("Filter by value(s)"),
- PassFilterWorker::tr("Semicolon-separated list of values used to filter the input data."));
+ PassFilterWorker::tr("Comma-separated list of values used to filter the input data."));
attrs << new Attribute(passVals, BaseTypes::STRING_TYPE(), true);
Descriptor protoDesc(PassFilterWorkerFactory::ACTOR_ID,
diff --git a/src/plugins/workflow_designer/src/library/WriteAnnotationsWorker.cpp b/src/plugins/workflow_designer/src/library/WriteAnnotationsWorker.cpp
index 380fe4d..162a141 100644
--- a/src/plugins/workflow_designer/src/library/WriteAnnotationsWorker.cpp
+++ b/src/plugins/workflow_designer/src/library/WriteAnnotationsWorker.cpp
@@ -314,6 +314,7 @@ Task * WriteAnnotationsWorker::getSaveDocTask(const QString &formatId, SaveDocFl
hints[DocumentRemovalMode_Synchronous] = QString();
Document * doc = df->createNewLoadedDocument(iof, filepath, os, hints);
CHECK_OP(os, new FailTask(os.getError()));
+ doc->setDocumentOwnsDbiResources(false);
QSet<QString> usedNames;
foreach (AnnotationTableObject *annTable, annTables) {
diff --git a/src/plugins/workflow_designer/transl/english.ts b/src/plugins/workflow_designer/transl/english.ts
index 27725be..da843a4 100644
--- a/src/plugins/workflow_designer/transl/english.ts
+++ b/src/plugins/workflow_designer/transl/english.ts
@@ -3669,8 +3669,8 @@ TCCTTACTGTCTGAGCAATGGGATTCCATCTTTTACGATCTAGACATGGCT
</message>
<message>
<location filename="../src/library/PassFilterWorker.cpp" line="101"/>
- <source>Semicolon-separated list of values used to filter the input data.</source>
- <translation>Semicolon-separated list of values used to filter the input data.</translation>
+ <source>Comma-separated list of values used to filter the input data.</source>
+ <translation>Comma-separated list of values used to filter the input data.</translation>
</message>
<message>
<location filename="../src/library/PassFilterWorker.cpp" line="105"/>
diff --git a/src/plugins/workflow_designer/transl/russian.ts b/src/plugins/workflow_designer/transl/russian.ts
index c0a765e..5433ea0 100644
--- a/src/plugins/workflow_designer/transl/russian.ts
+++ b/src/plugins/workflow_designer/transl/russian.ts
@@ -3383,8 +3383,8 @@ TCCTTACTGTCTGAGCAATGGGATTCCATCTTTTACGATCTAGACATGGCT
</message>
<message>
<location filename="../src/library/PassFilterWorker.cpp" line="101"/>
- <source>Semicolon-separated list of values used to filter the input data.</source>
- <translation>принимает на вход список значений, разделенных точкой с запятой, который будет использован для фильтрации входных данных.</translation>
+ <source>Comma-separated list of values used to filter the input data.</source>
+ <translation>принимает на вход список значений, разделенных запятой, который будет использован для фильтрации входных данных.</translation>
</message>
<message>
<location filename="../src/library/PassFilterWorker.cpp" line="105"/>
diff --git a/src/plugins/workflow_designer/workflow_designer.pro b/src/plugins/workflow_designer/workflow_designer.pro
index b590940..8a6b9e9 100644
--- a/src/plugins/workflow_designer/workflow_designer.pro
+++ b/src/plugins/workflow_designer/workflow_designer.pro
@@ -47,6 +47,7 @@ HEADERS += src/ActorCfgFilterProxyModel.h \
src/library/CDSearchWorker.h \
src/library/CfgExternalToolModel.h \
src/library/ConvertFilesFormatWorker.h \
+ src/library/ConvertSnpeffVariationsToAnnotationsWorker.h \
src/library/CoreLib.h \
src/library/CreateExternalProcessDialog.h \
src/library/DocActors.h \
@@ -97,6 +98,7 @@ HEADERS += src/ActorCfgFilterProxyModel.h \
src/util/SaveSchemaImageUtils.h \
src/util/WorkerNameValidator.h \
src/util/WriteSequenceValidator.h
+
FORMS += src/ChooseItemDialog.ui \
src/CreateScriptBlockDialog.ui \
src/DashboardsManagerDialog.ui \
@@ -110,6 +112,7 @@ FORMS += src/ChooseItemDialog.ui \
src/WorkflowEditorWidget.ui \
src/WorkflowMetaDialog.ui \
src/WorkflowSettingsWidget.ui
+
SOURCES += src/ActorCfgFilterProxyModel.cpp \
src/ActorCfgModel.cpp \
src/BreakpointManagerView.cpp \
@@ -156,6 +159,7 @@ SOURCES += src/ActorCfgFilterProxyModel.cpp \
src/library/CDSearchWorker.cpp \
src/library/CfgExternalToolModel.cpp \
src/library/ConvertFilesFormatWorker.cpp \
+ src/library/ConvertSnpeffVariationsToAnnotationsWorker.cpp \
src/library/CoreLib.cpp \
src/library/CreateExternalProcessDialog.cpp \
src/library/DocActors.cpp \
@@ -206,5 +210,6 @@ SOURCES += src/ActorCfgFilterProxyModel.cpp \
src/util/SaveSchemaImageUtils.cpp \
src/util/WorkerNameValidator.cpp \
src/util/WriteSequenceValidator.cpp
+
RESOURCES += workflow_designer.qrc
TRANSLATIONS += transl/english.ts transl/russian.ts
diff --git a/src/plugins_3rdparty/gor4/src/gor.cpp b/src/plugins_3rdparty/gor4/src/gor.cpp
index 39568b5..72a5a5b 100644
--- a/src/plugins_3rdparty/gor4/src/gor.cpp
+++ b/src/plugins_3rdparty/gor4/src/gor.cpp
@@ -213,7 +213,7 @@ void readFile(QFile& file, int nprot, char **obs, char **title, int *pnter)
if (c == '@') {
break;
}
- if(c == '\n' || c == ' ' || c =='\t') continue;
+ if(c == '\n' || c == ' ' || c =='\t' || c == '\r') continue;
nres++;
if(nres > MAXRES) {
printf("The value of MAXRES should be increased: %d",MAXRES);
diff --git a/src/plugins_3rdparty/hmm2/src/u_build/HMMBuildDialogController.cpp b/src/plugins_3rdparty/hmm2/src/u_build/HMMBuildDialogController.cpp
index a1dcd62..1089195 100644
--- a/src/plugins_3rdparty/hmm2/src/u_build/HMMBuildDialogController.cpp
+++ b/src/plugins_3rdparty/hmm2/src/u_build/HMMBuildDialogController.cpp
@@ -51,7 +51,7 @@ HMMBuildDialogController::HMMBuildDialogController(const QString& _pn, const MAl
profileName(_pn),
saveController(NULL) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470695");
+ new HelpButton(this, buttonBox, "18220555");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Build"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Close"));
diff --git a/src/plugins_3rdparty/hmm2/src/u_calibrate/HMMCalibrateDialogController.cpp b/src/plugins_3rdparty/hmm2/src/u_calibrate/HMMCalibrateDialogController.cpp
index 4d6ead6..c812832 100644
--- a/src/plugins_3rdparty/hmm2/src/u_calibrate/HMMCalibrateDialogController.cpp
+++ b/src/plugins_3rdparty/hmm2/src/u_calibrate/HMMCalibrateDialogController.cpp
@@ -42,7 +42,7 @@ HMMCalibrateDialogController::HMMCalibrateDialogController(QWidget* w)
saveController(NULL) {
task = NULL;
setupUi(this);
- new HelpButton(this, buttonBox, "17470696");
+ new HelpButton(this, buttonBox, "18220556");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Calibrate"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Close"));
diff --git a/src/plugins_3rdparty/hmm2/src/u_search/HMMSearchDialogController.cpp b/src/plugins_3rdparty/hmm2/src/u_search/HMMSearchDialogController.cpp
index aa633cc..bc8a3ee 100644
--- a/src/plugins_3rdparty/hmm2/src/u_search/HMMSearchDialogController.cpp
+++ b/src/plugins_3rdparty/hmm2/src/u_search/HMMSearchDialogController.cpp
@@ -57,7 +57,7 @@ HMMSearchDialogController::HMMSearchDialogController(const DNASequence& sequence
searchTask = NULL;
setupUi(this);
- new HelpButton(this, buttonBox, "17470697");
+ new HelpButton(this, buttonBox, "18220557");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Search"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Close"));
diff --git a/src/plugins_3rdparty/hmm3/CMakeLists.txt b/src/plugins_3rdparty/hmm3/CMakeLists.txt
deleted file mode 100644
index cfceb75..0000000
--- a/src/plugins_3rdparty/hmm3/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-set(UGENE_PLUGIN_NAME hmm3)
-
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-write-strings -Wno-format")
-
-include(../../Plugin.cmake)
diff --git a/src/plugins_3rdparty/hmm3/hmm3.license b/src/plugins_3rdparty/hmm3/hmm3.license
deleted file mode 100644
index 94a9ed0..0000000
--- a/src/plugins_3rdparty/hmm3/hmm3.license
+++ /dev/null
@@ -1,674 +0,0 @@
- GNU GENERAL PUBLIC LICENSE
- Version 3, 29 June 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
- Preamble
-
- The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
-
- The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works. By contrast,
-the GNU General Public License is intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users. We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors. You can apply it to
-your programs, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
- To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights. Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
-
- For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received. You must make sure that they, too, receive
-or can get the source code. And you must show them these terms so they
-know their rights.
-
- Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
-
- For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software. For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
- Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so. This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software. The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable. Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products. If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
- Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary. To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
- TERMS AND CONDITIONS
-
- 0. Definitions.
-
- "This License" refers to version 3 of the GNU General Public License.
-
- "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
- "The Program" refers to any copyrightable work licensed under this
-License. Each licensee is addressed as "you". "Licensees" and
-"recipients" may be individuals or organizations.
-
- To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy. The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
- A "covered work" means either the unmodified Program or a work based
-on the Program.
-
- To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy. Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
- To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies. Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
- An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License. If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
- 1. Source Code.
-
- The "source code" for a work means the preferred form of the work
-for making modifications to it. "Object code" means any non-source
-form of a work.
-
- A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
- The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form. A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
- The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities. However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work. For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
- The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
- The Corresponding Source for a work in source code form is that
-same work.
-
- 2. Basic Permissions.
-
- All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met. This License explicitly affirms your unlimited
-permission to run the unmodified Program. The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work. This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
- You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force. You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright. Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
- Conveying under any other circumstances is permitted solely under
-the conditions stated below. Sublicensing is not allowed; section 10
-makes it unnecessary.
-
- 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
- No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
- When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
- 4. Conveying Verbatim Copies.
-
- You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
- You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
- 5. Conveying Modified Source Versions.
-
- You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
- a) The work must carry prominent notices stating that you modified
- it, and giving a relevant date.
-
- b) The work must carry prominent notices stating that it is
- released under this License and any conditions added under section
- 7. This requirement modifies the requirement in section 4 to
- "keep intact all notices".
-
- c) You must license the entire work, as a whole, under this
- License to anyone who comes into possession of a copy. This
- License will therefore apply, along with any applicable section 7
- additional terms, to the whole of the work, and all its parts,
- regardless of how they are packaged. This License gives no
- permission to license the work in any other way, but it does not
- invalidate such permission if you have separately received it.
-
- d) If the work has interactive user interfaces, each must display
- Appropriate Legal Notices; however, if the Program has interactive
- interfaces that do not display Appropriate Legal Notices, your
- work need not make them do so.
-
- A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit. Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
- 6. Conveying Non-Source Forms.
-
- You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
- a) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by the
- Corresponding Source fixed on a durable physical medium
- customarily used for software interchange.
-
- b) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by a
- written offer, valid for at least three years and valid for as
- long as you offer spare parts or customer support for that product
- model, to give anyone who possesses the object code either (1) a
- copy of the Corresponding Source for all the software in the
- product that is covered by this License, on a durable physical
- medium customarily used for software interchange, for a price no
- more than your reasonable cost of physically performing this
- conveying of source, or (2) access to copy the
- Corresponding Source from a network server at no charge.
-
- c) Convey individual copies of the object code with a copy of the
- written offer to provide the Corresponding Source. This
- alternative is allowed only occasionally and noncommercially, and
- only if you received the object code with such an offer, in accord
- with subsection 6b.
-
- d) Convey the object code by offering access from a designated
- place (gratis or for a charge), and offer equivalent access to the
- Corresponding Source in the same way through the same place at no
- further charge. You need not require recipients to copy the
- Corresponding Source along with the object code. If the place to
- copy the object code is a network server, the Corresponding Source
- may be on a different server (operated by you or a third party)
- that supports equivalent copying facilities, provided you maintain
- clear directions next to the object code saying where to find the
- Corresponding Source. Regardless of what server hosts the
- Corresponding Source, you remain obligated to ensure that it is
- available for as long as needed to satisfy these requirements.
-
- e) Convey the object code using peer-to-peer transmission, provided
- you inform other peers where the object code and Corresponding
- Source of the work are being offered to the general public at no
- charge under subsection 6d.
-
- A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
- A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling. In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage. For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product. A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
- "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source. The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
- If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information. But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
- The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed. Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
- Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
- 7. Additional Terms.
-
- "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law. If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
- When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it. (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.) You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
- Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
- a) Disclaiming warranty or limiting liability differently from the
- terms of sections 15 and 16 of this License; or
-
- b) Requiring preservation of specified reasonable legal notices or
- author attributions in that material or in the Appropriate Legal
- Notices displayed by works containing it; or
-
- c) Prohibiting misrepresentation of the origin of that material, or
- requiring that modified versions of such material be marked in
- reasonable ways as different from the original version; or
-
- d) Limiting the use for publicity purposes of names of licensors or
- authors of the material; or
-
- e) Declining to grant rights under trademark law for use of some
- trade names, trademarks, or service marks; or
-
- f) Requiring indemnification of licensors and authors of that
- material by anyone who conveys the material (or modified versions of
- it) with contractual assumptions of liability to the recipient, for
- any liability that these contractual assumptions directly impose on
- those licensors and authors.
-
- All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10. If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term. If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
- If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
- Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
- 8. Termination.
-
- You may not propagate or modify a covered work except as expressly
-provided under this License. Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
- However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
- Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
- Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License. If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
- 9. Acceptance Not Required for Having Copies.
-
- You are not required to accept this License in order to receive or
-run a copy of the Program. Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance. However,
-nothing other than this License grants you permission to propagate or
-modify any covered work. These actions infringe copyright if you do
-not accept this License. Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
- 10. Automatic Licensing of Downstream Recipients.
-
- Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License. You are not responsible
-for enforcing compliance by third parties with this License.
-
- An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations. If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
- You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License. For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
- 11. Patents.
-
- A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based. The
-work thus licensed is called the contributor's "contributor version".
-
- A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version. For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
- Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
- In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement). To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
- If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients. "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
- If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
- A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License. You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
- Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
- 12. No Surrender of Others' Freedom.
-
- If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all. For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
- 13. Use with the GNU Affero General Public License.
-
- Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU Affero General Public License into a single
-combined work, and to convey the resulting work. The terms of this
-License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU Affero General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
-
- 14. Revised Versions of this License.
-
- The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time. Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
- Each version is given a distinguishing version number. If the
-Program specifies that a certain numbered version of the GNU General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation. If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
- If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
- Later license versions may give you additional or different
-permissions. However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
- 15. Disclaimer of Warranty.
-
- THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
- 16. Limitation of Liability.
-
- IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
- 17. Interpretation of Sections 15 and 16.
-
- If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
- END OF TERMS AND CONDITIONS
-
- How to Apply These Terms to Your New Programs
-
- If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
- To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
- <one line to give the program's name and a brief idea of what it does.>
- Copyright (C) <year> <name of author>
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-Also add information on how to contact you by electronic and paper mail.
-
- If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
- <program> Copyright (C) <year> <name of author>
- This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
- This is free software, and you are welcome to redistribute it
- under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License. Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
-
- You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
-<http://www.gnu.org/licenses/>.
-
- The GNU General Public License does not permit incorporating your program
-into proprietary programs. If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library. If this is what you want to do, use the GNU Lesser General
-Public License instead of this License. But first, please read
-<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/src/plugins_3rdparty/hmm3/hmm3.pri b/src/plugins_3rdparty/hmm3/hmm3.pri
deleted file mode 100644
index 5c69546..0000000
--- a/src/plugins_3rdparty/hmm3/hmm3.pri
+++ /dev/null
@@ -1,22 +0,0 @@
-# include (hmm3.pri)
-
-PLUGIN_ID=hmm3
-PLUGIN_NAME=HMM3
-PLUGIN_VENDOR=Unipro
-CONFIG += warn_off
-include( ../../ugene_plugin_common.pri )
-
-win32 {
- QMAKE_CXXFLAGS+=/wd4244 /wd4305
- DEFINES += _CRT_SECURE_NO_WARNINGS
-}
-
-
-#adding SSE2 gcc compiler flag if building on SSE2 capable CPU
-use_sse2() {
- !win32 {
- QMAKE_CXXFLAGS += -msse2
- QMAKE_CFLAGS_DEBUG += -msse2
- QMAKE_CFLAGS_RELEASE += -msse2
- }
-}
diff --git a/src/plugins_3rdparty/hmm3/hmm3.pro b/src/plugins_3rdparty/hmm3/hmm3.pro
deleted file mode 100644
index 0352b94..0000000
--- a/src/plugins_3rdparty/hmm3/hmm3.pro
+++ /dev/null
@@ -1,152 +0,0 @@
-include (hmm3.pri)
-
-# Input
-HEADERS += src/uHMM3Plugin.h \
- src/build/uhmm3build.h \
- src/build/uHMM3BuildDialogImpl.h \
- src/build/uHMM3BuildTask.h \
- src/format/uHMMFormat.h \
- src/format/uHMMFormatReader.h \
- src/gobject/uHMMObject.h \
- src/hmmer3/hmmer.h \
- src/hmmer3/hmmer3_funcs.h \
- src/hmmer3/p7_config.h \
- src/phmmer/uhmm3phmmer.h \
- src/phmmer/uHMM3PhmmerDialogImpl.h \
- src/phmmer/uhmm3PhmmerTask.h \
- src/search/Hmmer3SearchWorkflowTask.h \
- src/search/uhmm3search.h \
- src/search/uHMM3SearchDialogImpl.h \
- src/search/uhmm3SearchResult.h \
- src/search/uHMM3SearchTask.h \
- src/search/uhmm3QDActor.h \
- src/task_local_storage/uHMMSearchTaskLocalData.h \
- src/task_local_storage/uHMMSearchTaskLocalStorage.h \
- src/tests/uhmmer3BuildTests.h \
- src/tests/uhmmer3PhmmerTests.h \
- src/tests/uhmmer3SearchTests.h \
- src/tests/uhmmer3Tests.h \
- src/util/uhmm3Utilities.h \
- src/hmmer3/easel/easel.h \
- src/hmmer3/easel/esl_alphabet.h \
- src/hmmer3/easel/esl_cluster.h \
- src/hmmer3/easel/esl_config.h \
- src/hmmer3/easel/esl_dirichlet.h \
- src/hmmer3/easel/esl_distance.h \
- src/hmmer3/easel/esl_dmatrix.h \
- src/hmmer3/easel/esl_exponential.h \
- src/hmmer3/easel/esl_gumbel.h \
- src/hmmer3/easel/esl_histogram.h \
- src/hmmer3/easel/esl_hmm.h \
- src/hmmer3/easel/esl_keyhash.h \
- src/hmmer3/easel/esl_minimizer.h \
- src/hmmer3/easel/esl_msa.h \
- src/hmmer3/easel/esl_msacluster.h \
- src/hmmer3/easel/esl_msaweight.h \
- src/hmmer3/easel/esl_random.h \
- src/hmmer3/easel/esl_randomseq.h \
- src/hmmer3/easel/esl_ratematrix.h \
- src/hmmer3/easel/esl_rootfinder.h \
- src/hmmer3/easel/esl_scorematrix.h \
- src/hmmer3/easel/esl_sq.h \
- src/hmmer3/easel/esl_sse.h \
- src/hmmer3/easel/esl_stack.h \
- src/hmmer3/easel/esl_stats.h \
- src/hmmer3/easel/esl_tree.h \
- src/hmmer3/easel/esl_vectorops.h \
- src/hmmer3/easel/esl_wuss.h \
- src/hmmer3/impl_sse/impl_sse.h \
- src/workers/HMM3IOWorker.h \
- src/workers/HMM3BuildWorker.h \
- src/workers/HMM3SearchWorker.h
-FORMS += src/build/UHMM3BuildDialog.ui \
- src/phmmer/UHMM3PhmmerDialog.ui \
- src/search/UHMM3SearchDialog.ui
-SOURCES += src/uHMM3Plugin.cpp \
- src/build/uhmm3build.cpp \
- src/build/uHMM3BuildDialogImpl.cpp \
- src/build/uHMM3BuildTask.cpp \
- src/format/uHMMFormat.cpp \
- src/format/uHMMFormatReader.cpp \
- src/gobject/uHMMObject.cpp \
- src/hmmer3/build.cpp \
- src/hmmer3/emit.cpp \
- src/hmmer3/errors.cpp \
- src/hmmer3/evalues.cpp \
- src/hmmer3/eweight.cpp \
- src/hmmer3/hmmer.cpp \
- src/hmmer3/hmmer3_funcs.cpp \
- src/hmmer3/logsum.cpp \
- src/hmmer3/modelconfig.cpp \
- src/hmmer3/modelstats.cpp \
- src/hmmer3/p7_alidisplay.cpp \
- src/hmmer3/p7_bg.cpp \
- src/hmmer3/p7_builder.cpp \
- src/hmmer3/p7_domaindef.cpp \
- src/hmmer3/p7_gmx.cpp \
- src/hmmer3/p7_hmm.cpp \
- src/hmmer3/p7_pipeline.cpp \
- src/hmmer3/p7_prior.cpp \
- src/hmmer3/p7_profile.cpp \
- src/hmmer3/p7_spensemble.cpp \
- src/hmmer3/p7_tophits.cpp \
- src/hmmer3/p7_trace.cpp \
- src/hmmer3/seqmodel.cpp \
- src/hmmer3/tracealign.cpp \
- src/phmmer/uhmm3phmmer.cpp \
- src/phmmer/uHMM3PhmmerDialogImpl.cpp \
- src/phmmer/uhmm3PhmmerTask.cpp \
- src/search/Hmmer3SearchWorkflowTask.cpp \
- src/search/uhmm3search.cpp \
- src/search/uHMM3SearchDialogImpl.cpp \
- src/search/uhmm3SearchResult.cpp \
- src/search/uHMM3SearchTask.cpp \
- src/search/uhmm3QDActor.cpp \
- src/task_local_storage/uHMMSearchTaskLocalData.cpp \
- src/task_local_storage/uHMMSearchTaskLocalStorage.cpp \
- src/tests/uhmmer3BuildTests.cpp \
- src/tests/uhmmer3PhmmerTests.cpp \
- src/tests/uhmmer3SearchTests.cpp \
- src/tests/uhmmer3Tests.cpp \
- src/util/uhmm3Utilities.cpp \
- src/hmmer3/easel/easel.cpp \
- src/hmmer3/easel/esl_alphabet.cpp \
- src/hmmer3/easel/esl_cluster.cpp \
- src/hmmer3/easel/esl_dirichlet.cpp \
- src/hmmer3/easel/esl_distance.cpp \
- src/hmmer3/easel/esl_dmatrix.cpp \
- src/hmmer3/easel/esl_exponential.cpp \
- src/hmmer3/easel/esl_gumbel.cpp \
- src/hmmer3/easel/esl_histogram.cpp \
- src/hmmer3/easel/esl_hmm.cpp \
- src/hmmer3/easel/esl_keyhash.cpp \
- src/hmmer3/easel/esl_minimizer.cpp \
- src/hmmer3/easel/esl_msa.cpp \
- src/hmmer3/easel/esl_msacluster.cpp \
- src/hmmer3/easel/esl_msaweight.cpp \
- src/hmmer3/easel/esl_random.cpp \
- src/hmmer3/easel/esl_randomseq.cpp \
- src/hmmer3/easel/esl_ratematrix.cpp \
- src/hmmer3/easel/esl_rootfinder.cpp \
- src/hmmer3/easel/esl_scorematrix.cpp \
- src/hmmer3/easel/esl_sq.cpp \
- src/hmmer3/easel/esl_sse.cpp \
- src/hmmer3/easel/esl_stack.cpp \
- src/hmmer3/easel/esl_stats.cpp \
- src/hmmer3/easel/esl_tree.cpp \
- src/hmmer3/easel/esl_vectorops.cpp \
- src/hmmer3/easel/esl_wuss.cpp \
- src/hmmer3/impl_sse/decoding.cpp \
- src/hmmer3/impl_sse/fwdback.cpp \
- src/hmmer3/impl_sse/msvfilter.cpp \
- src/hmmer3/impl_sse/null2.cpp \
- src/hmmer3/impl_sse/optacc.cpp \
- src/hmmer3/impl_sse/p7_omx.cpp \
- src/hmmer3/impl_sse/p7_oprofile.cpp \
- src/hmmer3/impl_sse/stotrace.cpp \
- src/hmmer3/impl_sse/vitfilter.cpp \
- src/workers/HMM3IOWorker.cpp \
- src/workers/HMM3BuildWorker.cpp \
- src/workers/HMM3SearchWorker.cpp
-RESOURCES += hmm3.qrc
-TRANSLATIONS += transl/english.ts transl/russian.ts
diff --git a/src/plugins_3rdparty/hmm3/hmm3.qrc b/src/plugins_3rdparty/hmm3/hmm3.qrc
deleted file mode 100644
index 7ffb755..0000000
--- a/src/plugins_3rdparty/hmm3/hmm3.qrc
+++ /dev/null
@@ -1,5 +0,0 @@
-<RCC>
- <qresource prefix="hmm3/" >
- <file>images/hmmer_16.png</file>
- </qresource>
-</RCC>
diff --git a/src/plugins_3rdparty/hmm3/src/build/uHMM3BuildDialogImpl.cpp b/src/plugins_3rdparty/hmm3/src/build/uHMM3BuildDialogImpl.cpp
deleted file mode 100644
index 93fb009..0000000
--- a/src/plugins_3rdparty/hmm3/src/build/uHMM3BuildDialogImpl.cpp
+++ /dev/null
@@ -1,243 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include <QMessageBox>
-#include <QPushButton>
-
-#include <U2Core/AppContext.h>
-
-#include <U2Gui/DialogUtils.h>
-#include <U2Gui/HelpButton.h>
-#include <U2Gui/LastUsedDirHelper.h>
-#include <U2Gui/SaveDocumentController.h>
-#include <U2Gui/U2FileDialog.h>
-
-#include "uHMM3BuildDialogImpl.h"
-#include "gobject/uHMMObject.h"
-
-namespace U2 {
-
-const QString UHMM3BuildDialogImpl::MA_FILES_DIR_ID = "uhmmer3_build_ma_files_dir";
-const QString UHMM3BuildDialogImpl::HMM_FILES_DIR_ID = "uhmmer3_build_hmm_files_dir";
-
-void UHMM3BuildDialogImpl::setSignalsAndSlots() {
-
- QPushButton* okButton = buttonBox->button(QDialogButtonBox::Ok);
- QPushButton* cancelButton = buttonBox->button(QDialogButtonBox::Cancel);
-
- connect( maOpenFileButton, SIGNAL( clicked() ), SLOT( sl_maOpenFileButtonClicked() ) );
- connect( okButton, SIGNAL( clicked() ), SLOT( sl_buildButtonClicked() ) );
- connect( cancelButton, SIGNAL( clicked() ), SLOT( sl_cancelButtonClicked() ) );
- connect( mcFastRadioButton, SIGNAL( toggled( bool ) ), SLOT( sl_fastMCRadioButtonChanged( bool ) ) );
- connect( wblosumRSWRadioButton, SIGNAL( toggled( bool ) ), SLOT( sl_wblosumRSWRadioButtonChanged( bool ) ) );
- connect( eentESWRadioButton, SIGNAL( toggled( bool ) ), SLOT( sl_eentESWRadioButtonChanged( bool ) ) );
- connect( eclustESWRadioButton, SIGNAL( toggled( bool ) ), SLOT( sl_eclustESWRadioButtonChanged( bool ) ) );
- connect( esetESWRadioButton, SIGNAL( toggled( bool ) ), SLOT( sl_esetESWRadioButtonChanged( bool ) ) );
-
- //temporary disabling of strange label/spinbox
- fragThreshDoubleSpinBox->setVisible(false);
- fragthreshLabel->setVisible(false);
-}
-
-void UHMM3BuildDialogImpl::initialize() {
- setupUi( this );
- new HelpButton(this, buttonBox, "17470699");
- buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Build"));
- buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
-
- initSaveController();
- setModelValues(); // build settings are default here
- setSignalsAndSlots();
-}
-
-void UHMM3BuildDialogImpl::initSaveController() {
- SaveDocumentControllerConfig config;
- config.defaultDomain = HMM_FILES_DIR_ID;
- config.defaultFormatId = UHMMFormat::UHHMER_FORMAT_ID;
- config.fileDialogButton = outHmmfileToolButton;
- config.fileNameEdit = outHmmfileEdit;
- config.parentWidget = this;
- config.saveTitle = tr("Select hmm file to create");
-
- const QList<DocumentFormatId> formats = QList<DocumentFormatId>() << UHMMFormat::UHHMER_FORMAT_ID;
-
- saveController = new SaveDocumentController(config, formats, this);
-}
-
-UHMM3BuildDialogImpl::UHMM3BuildDialogImpl( const MAlignment & ma, QWidget * p )
- : QDialog(p),
- saveController(NULL) {
- initialize();
- model.alignment = ma;
- model.alignmentUsing = !model.alignment.isEmpty();
-
- if( model.alignmentUsing ) {
- maLoadFromFileEdit->hide();
- maLoadFromFileLabel->hide();
- maOpenFileButton->hide();
- }
-}
-
-void UHMM3BuildDialogImpl::setModelValues() {
- const UHMM3BuildSettings & settings = model.buildSettings.inner;
- symfracDoubleSpinBox->setValue( settings.symfrac );
- widRSWDoubleSpinBox->setValue( settings.wid );
- eidESWDoubleSpinBox->setValue( settings.eid );
- esetESWDoubleSpinBox->setValue( settings.eset );
- emlSpinBox->setValue( settings.eml );
- emnSpinBox->setValue( settings.emn );
- evlSpinBox->setValue( settings.evl );
- evnSpinBox->setValue( settings.evn );
- eflSpinBox->setValue( settings.efl );
- efnSpinBox->setValue( settings.efn );
- eftDoubleSpinBox->setValue( settings.eft );
- seedSpinBox->setValue( settings.seed );
- esigmaDoubleSpinBox->setValue( settings.esigma );
- fragThreshDoubleSpinBox->setValue( settings.fragtresh );
-}
-
-void UHMM3BuildDialogImpl::sl_maOpenFileButtonClicked() {
- LastUsedDirHelper helper( MA_FILES_DIR_ID );
- helper.url = U2FileDialog::getOpenFileName( this, tr( "Select multiple alignment file" ),
- helper, DialogUtils::prepareDocumentsFileFilterByObjType(GObjectTypes::MULTIPLE_ALIGNMENT, true));
- if( !helper.url.isEmpty() ) {
- maLoadFromFileEdit->setText( helper.url );
- }
-}
-
-void UHMM3BuildDialogImpl::getModelValues() {
- UHMM3BuildSettings & bldSettings = model.buildSettings.inner;
- bldSettings.symfrac = symfracDoubleSpinBox->value();
- bldSettings.wid = widRSWDoubleSpinBox->value();
- bldSettings.eid = eidESWDoubleSpinBox->value();
- bldSettings.eset = esetESWDoubleSpinBox->value();
- bldSettings.eml = emlSpinBox->value();
- bldSettings.emn = emnSpinBox->value();
- bldSettings.evl = evlSpinBox->value();
- bldSettings.evn = evnSpinBox->value();
- bldSettings.efl = eflSpinBox->value();
- bldSettings.efn = efnSpinBox->value();
- bldSettings.eft = eftDoubleSpinBox->value();
- bldSettings.seed = seedSpinBox->value();
- bldSettings.esigma = esigmaDoubleSpinBox->value();
- bldSettings.fragtresh = fragThreshDoubleSpinBox->value();
- if( 0 != ereESWDoubleSpinBox->value() ) {
- bldSettings.ere = ereESWDoubleSpinBox->value();
- }
-
- if( mcFastRadioButton->isChecked() ) {
- bldSettings.archStrategy = p7_ARCH_FAST;
- } else {
- bldSettings.archStrategy = p7_ARCH_HAND;
- }
-
- if( wgscRSWRadioButton->isChecked() ) {
- bldSettings.wgtStrategy = p7_WGT_GSC;
- } else if( wblosumRSWRadioButton->isChecked() ) {
- bldSettings.wgtStrategy = p7_WGT_BLOSUM;
- } else if( wpbRSWRadioButton->isChecked() ) {
- bldSettings.wgtStrategy = p7_WGT_PB;
- } else if( wnoneRSWRadioButton->isChecked() ) {
- bldSettings.wgtStrategy = p7_WGT_NONE;
- } else if( wgivenRSWRadioButton->isChecked() ) {
- bldSettings.wgtStrategy = p7_WGT_GIVEN;
- } else {
- assert( false );
- }
-
- if( eentESWRadioButton->isChecked() ) {
- bldSettings.effnStrategy = p7_EFFN_ENTROPY;
- } else if( eclustESWRadioButton->isChecked() ) {
- bldSettings.effnStrategy = p7_EFFN_CLUST;
- } else if( enoneESWRadioButton->isChecked() ) {
- bldSettings.effnStrategy = p7_EFFN_NONE;
- } else if( esetESWRadioButton->isChecked() ) {
- bldSettings.effnStrategy = p7_EFFN_SET;
- } else {
- assert( false );
- }
-
- model.buildSettings.outFile = saveController->getSaveFileName();
- model.inputFile = maLoadFromFileEdit->text();
-}
-
-QString UHMM3BuildDialogImpl::checkModel() {
- assert( checkUHMM3BuildSettings( &model.buildSettings.inner ) );
- if( !model.alignmentUsing && model.inputFile.isEmpty() ) {
- return tr( "input file is empty" );
- }
- if( model.buildSettings.outFile.isEmpty() ) {
- return tr( "output hmm file is empty" );
- }
- return QString();
-}
-
-void UHMM3BuildDialogImpl::sl_buildButtonClicked() {
- getModelValues();
- QString err = checkModel();
- if( !err.isEmpty() ) {
- QMessageBox::critical( this, tr( "Error: bad arguments!" ), err );
- return;
- }
-
- Task * buildTask = NULL;
- if( model.alignmentUsing ) {
- buildTask = new UHMM3BuildToFileTask( model.buildSettings, model.alignment );
- } else {
- buildTask = new UHMM3BuildToFileTask( model.buildSettings, model.inputFile );
- }
- assert( NULL != buildTask );
-
- AppContext::getTaskScheduler()->registerTopLevelTask( buildTask );
- QDialog::accept();
-}
-
-void UHMM3BuildDialogImpl::sl_cancelButtonClicked() {
- reject();
-}
-
-void UHMM3BuildDialogImpl::sl_fastMCRadioButtonChanged( bool checked ) {
- mcFastSymfracLabel->setEnabled( checked );
- symfracDoubleSpinBox->setEnabled( checked );
-}
-
-void UHMM3BuildDialogImpl::sl_wblosumRSWRadioButtonChanged( bool checked ) {
- widRSWLabel->setEnabled( checked );
- widRSWDoubleSpinBox->setEnabled( checked );
-}
-
-void UHMM3BuildDialogImpl::sl_eentESWRadioButtonChanged( bool checked ) {
- ereESWDoubleSpinBox->setEnabled( checked );
- esigmaDoubleSpinBox->setEnabled( checked );
- esigmaLabel->setEnabled( checked );
- ereLabel->setEnabled( checked );
-}
-
-void UHMM3BuildDialogImpl::sl_eclustESWRadioButtonChanged( bool checked ) {
- eidESWLabel->setEnabled( checked );
- eidESWDoubleSpinBox->setEnabled( checked );
-}
-
-void UHMM3BuildDialogImpl::sl_esetESWRadioButtonChanged( bool checked ) {
- esetESWDoubleSpinBox->setEnabled( checked );
-}
-
-} // GB2
diff --git a/src/plugins_3rdparty/hmm3/src/build/uHMM3BuildTask.cpp b/src/plugins_3rdparty/hmm3/src/build/uHMM3BuildTask.cpp
deleted file mode 100644
index 9279b1d..0000000
--- a/src/plugins_3rdparty/hmm3/src/build/uHMM3BuildTask.cpp
+++ /dev/null
@@ -1,331 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include <QtCore/QFileInfo>
-
-#include <U2Core/AppContext.h>
-#include <U2Core/AppResources.h>
-#include <U2Core/DocumentModel.h>
-#include <U2Core/IOAdapter.h>
-#include <U2Core/IOAdapterUtils.h>
-#include <U2Core/Counter.h>
-#include <U2Core/LoadDocumentTask.h>
-#include <U2Core/Log.h>
-#include <U2Core/GObjectTypes.h>
-#include <U2Core/MAlignmentObject.h>
-#include <U2Core/U2OpStatusUtils.h>
-#include <U2Core/U2SafePoints.h>
-
-#include <gobject/uHMMObject.h>
-#include <util/uhmm3Utilities.h>
-
-#include "uhmm3build.h"
-#include "uHMM3BuildTask.h"
-
-#define UHMM3_BUILD_LOG_CAT "hmm3_build_log_category"
-
-using namespace U2;
-
-
-namespace U2 {
-
-/**********************************
- * UHMM3BuildTask
- ***********************************/
-
-UHMM3BuildTask::UHMM3BuildTask( const UHMM3BuildSettings& aset, const MAlignment & amsa )
-: Task("", TaskFlag_None), settings( aset ), msa( amsa ), hmm( NULL ) {
- GCOUNTER( cvar, tvar, "UHMM3BuildTask" );
- setTaskName( tr( "Build HMM profile from %1 alignment" ).arg( msa.getName() ) );
- checkMsa();
-
- // work with task resources
- float msaSzInMB = ( msa.getLength() * msa.getNumRows() ) / ( 1024.0 * 1024 );
- int power = ( 0 <= msaSzInMB && msaSzInMB <= 0.5 ) ? 20 : ( 0.5 < msaSzInMB && msaSzInMB <= 1 ) ? 10 :
- ( 1 < msaSzInMB && msaSzInMB <= 10 ) ? 7 : ( 10 < msaSzInMB && msaSzInMB <= 30 ) ? 5 : 4;
- int howManyMem = qMax( 1, (int)( power * msaSzInMB ) );
- addTaskResource(TaskResourceUsage( RESOURCE_MEMORY, howManyMem ));
- algoLog.trace( QString( "%1 requires %2 of memory" ).arg( getTaskName() ).arg( howManyMem ) );
-}
-
-bool UHMM3BuildTask::checkMsa() {
- if( msa.getNumRows() == 0 ) {
- stateInfo.setError( tr("Given multiple alignment has no sequences") );
- return false;
- } else if ( msa.getLength() == 0 ) {
- stateInfo.setError( tr("Given multiple alignment is empty") );
- return false;
- }
- return true;
-}
-
-void UHMM3BuildTask::delHmm() {
- if( NULL != hmm ) {
- p7_hmm_Destroy( hmm );
- }
- hmm = NULL;
-}
-
-UHMM3BuildTask::~UHMM3BuildTask() {
- delHmm();
-}
-
-P7_HMM * UHMM3BuildTask::getHMM() const {
- return hmm;
-}
-
-P7_HMM * UHMM3BuildTask::takeHMM() {
- P7_HMM * ret = hmm;
- hmm = NULL;
- return ret;
-}
-
-void UHMM3BuildTask::run() {
- hmm = UHMM3Build::build( msa, settings, stateInfo );
- if( stateInfo.hasError() ) {
- delHmm();
- }
-}
-
-/**********************************
-* UHMM3BuildTaskSettings
-***********************************/
-
-UHMM3BuildTaskSettings::UHMM3BuildTaskSettings( const QString& out ) : outFile( out ) {
- setDefaultUHMM3BuildSettings( &inner );
-}
-
-/**********************************
-* UHMM3BuildToFileTask
-***********************************/
-
-static QList< MAlignment > getMalignments( const QList< GObject* >& objList );
-
-UHMM3BuildToFileTask::UHMM3BuildToFileTask( const UHMM3BuildTaskSettings& s, const QList< MAlignment >& m )
-: Task( "", TaskFlags_NR_FOSCOE | TaskFlag_ReportingIsSupported | TaskFlag_ReportingIsEnabled ),
- settings( s ), msas( m ), loadTask( NULL ), saveHmmFileTask( NULL ), savingDocument( NULL ) {
-
- setTaskName( tr( "Build HMM profile to '%1'" ).arg( QFileInfo( settings.outFile ).fileName() ) );
-
- if( settings.outFile.isEmpty() ) {
- stateInfo.setError( tr( "Output file is not given" ) );
- return;
- }
-
- if( msas.isEmpty() ) {
- stateInfo.setError( tr( "No multiple alignments given" ) );
- return;
- }
-
- createBuildSubtasks();
- addBuildSubTasks();
-}
-
-UHMM3BuildToFileTask::UHMM3BuildToFileTask( const UHMM3BuildTaskSettings& set, const MAlignment& ma )
-: Task( "", TaskFlags_NR_FOSCOE | TaskFlag_ReportingIsSupported | TaskFlag_ReportingIsEnabled ),
- settings( set ), loadTask( NULL ), saveHmmFileTask( NULL ), savingDocument( NULL ) {
-
- setTaskName( tr( "Build HMM profile to '%1'" ).arg( QFileInfo( settings.outFile ).fileName() ) );
-
- if( settings.outFile.isEmpty() ) {
- stateInfo.setError( tr( "Output file is not given" ) );
- return;
- }
-
- msas.append( ma );
- createBuildSubtasks();
- addBuildSubTasks();
-}
-
-
-UHMM3BuildToFileTask::UHMM3BuildToFileTask( const UHMM3BuildTaskSettings& set, const QString& _inFile )
-: Task( "", TaskFlags_NR_FOSCOE | TaskFlag_ReportingIsSupported | TaskFlag_ReportingIsEnabled ),
-settings( set ), inFile( _inFile ), loadTask( NULL ), saveHmmFileTask( NULL ), savingDocument( NULL ) {
-
- setTaskName( tr( "Build HMM profile '%1' -> '%2'" ).arg( QFileInfo( inFile ).fileName() ).arg( QFileInfo( settings.outFile ).fileName() ) );
-
- if( inFile.isEmpty() ) {
- stateInfo.setError( tr( "Input file is not given" ) );
- return;
- }
- if( settings.outFile.isEmpty() ) {
- stateInfo.setError( tr( "Output file is not given" ) );
- return;
- }
-
- DocumentFormatConstraints constr;
- constr.supportedObjectTypes+=GObjectTypes::MULTIPLE_ALIGNMENT;
- constr.checkRawData = true;
- constr.rawData = IOAdapterUtils::readFileHeader( inFile );
- constr.addFlagToExclude(DocumentFormatFlag_CannotBeCreated);
- QList<DocumentFormatId> formats = AppContext::getDocumentFormatRegistry()->selectFormats( constr );
- if( formats.isEmpty() ) {
- stateInfo.setError( tr( "Unrecognized input alignment file format" ) );
- return;
- }
- DocumentFormatId alnFormat = formats.first();
- IOAdapterFactory* iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById( IOAdapterUtils::url2io( inFile ) );
-
- if( NULL == iof ) {
- stateInfo.setError( tr( "Error opening '%1' file" ).arg( inFile ) );
- return;
- }
-
- loadTask = new LoadDocumentTask( alnFormat, inFile, iof, QVariantMap() );
- addSubTask( loadTask );
-}
-
-void UHMM3BuildToFileTask::createBuildSubtasks() {
- foreach( const MAlignment & ma, msas ) {
- UHMM3BuildTask * curTask = new UHMM3BuildTask( settings.inner, ma );
- buildTasks << curTask;
- }
-}
-
-void UHMM3BuildToFileTask::addBuildSubTasks() {
- assert( !buildTasks.isEmpty() );
- foreach( UHMM3BuildTask * cur, buildTasks ) {
- addSubTask( cur );
- }
-}
-
-QList< Task* > UHMM3BuildToFileTask::onSubTaskFinished( Task* sub ) {
- QMutexLocker locker( &mtx );
- QList< Task* > res;
-
- assert( NULL != sub );
- if( hasError() ) {
- return res;
- }
- if( sub->hasError() ) {
- stateInfo.setError( sub->getError() );
- return res;
- }
-
- if( loadTask == sub ) {
- assert( msas.isEmpty() );
-
- Document* doc = loadTask->getDocument();
- QList< GObject* > msaObjs = doc->findGObjectByType( GObjectTypes::MULTIPLE_ALIGNMENT );
- if( msaObjs.isEmpty() ) {
- stateInfo.setError( tr( "No multiple alignments found in input file" ) );
- return res;
- }
-
- msas = getMalignments( msaObjs );
- createBuildSubtasks();
- foreach( UHMM3BuildTask * cur, buildTasks ) {
- assert( NULL != cur );
- res << cur;
- }
- return res;
- } else if( buildTasks.contains( qobject_cast< UHMM3BuildTask* >(sub) ) ) {
- UHMM3BuildTask * curBuildTask = qobject_cast< UHMM3BuildTask* >( sub );
- assert( NULL != curBuildTask );
- int howMany = buildTasks.removeAll( curBuildTask );
- assert( 1 == howMany );
-
- if( curBuildTask->hasError() || curBuildTask->isCanceled() ) {
- return res; /* nothing to do */
- }
-
- P7_HMM * hmm = curBuildTask->takeHMM();
- assert( NULL != hmm );
- hmms.append( hmm );
-
- if( buildTasks.isEmpty() ) { /* all build tasks had finished */
- assert( !hmms.isEmpty() );
- savingDocument = UHMM3Utilities::getSavingDocument( hmms, settings.outFile );
- saveHmmFileTask = new SaveDocumentTask( savingDocument );
- res << saveHmmFileTask;
- }
- return res;
- } else if( saveHmmFileTask == sub ) {
- assert( NULL != savingDocument );
- delete savingDocument;
- } else {
- assert( 0 );
- }
-
- return res;
-}
-
-QString UHMM3BuildToFileTask::generateReport() const {
- QString res;
-
- res += "<table>";
- if( !inFile.isEmpty() ) {
- res += "<tr><td width=200><b>" + tr("Source alignment") + "</b></td><td>" + inFile + "</td></tr>";
- }
- res += "<tr><td><b>" + tr("Profile name") + "</b></td><td>" + settings.outFile + "</td></tr>";
-
- const UHMM3BuildSettings & bldSettings = settings.inner;
-
- res += "<tr><td><b>" + tr( "Options:" ) + "</b></td></tr>";
- res += "<tr><td><b>" + tr( "Model construction strategies" ) + "</b></td><td>";
- switch( bldSettings.archStrategy ) {
- case p7_ARCH_FAST: res += "fast"; break;
- case p7_ARCH_HAND: res += "hand"; break;
- default: assert( false );
- }
- res += "</td></tr>";
-
- res += "<tr><td><b>" + tr( "Relative model construction strategies" ) + "</b></td><td>";
- switch( bldSettings.wgtStrategy ) {
- case p7_WGT_GSC: res += tr("Gerstein/Sonnhammer/Chothia tree weights"); break;
- case p7_WGT_BLOSUM: res += tr("Henikoff simple filter weights" ); break;
- case p7_WGT_PB: res += tr("Henikoff position-based weights" ); break;
- case p7_WGT_NONE: res += tr("No relative weighting; set all to 1" ); break;
- case p7_WGT_GIVEN: res += tr("Weights given in MSA file" ); break;
- default: assert( false );
- }
- res += "</td></tr>";
-
- res += "<tr><td><b>" + tr( "Effective sequence weighting strategies" ) + "</b></td><td>";
- switch( bldSettings.effnStrategy ) {
- case p7_EFFN_ENTROPY: res += tr( "adjust effective sequence number to achieve relative entropy target" ); break;
- case p7_EFFN_CLUST: res += tr( "effective sequence number is number of single linkage clusters" ); break;
- case p7_EFFN_NONE: res += tr( "no effective sequence number weighting: just use number of sequences" ); break;
- case p7_EFFN_SET: res += tr( "set effective sequence number for all models to: %1" ).arg( bldSettings.eset ); break;
- default: assert( false );
- }
- res += "</td></tr>";
-
- if( hasError() ) {
- res += "<tr><td width=200><b>" + tr( "Task finished with error: '%1'" ).arg( getError() ) + "</b></td><td></td></tr>";
- }
- res += "</table>";
-
- return res;
-}
-
-static QList< MAlignment > getMalignments( const QList< GObject* >& objList ) {
- QList< MAlignment > res;
-
- foreach( GObject* obj, objList ) {
- MAlignmentObject* msaObj = qobject_cast< MAlignmentObject* >( obj );
- assert( NULL != msaObj );
- res << msaObj->getMAlignment();
- }
- return res;
-}
-
-} // U2
diff --git a/src/plugins_3rdparty/hmm3/src/build/uHMM3BuildTask.h b/src/plugins_3rdparty/hmm3/src/build/uHMM3BuildTask.h
deleted file mode 100644
index 9ee29e3..0000000
--- a/src/plugins_3rdparty/hmm3/src/build/uHMM3BuildTask.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#ifndef _UHMMER3_BUILD_TASK_H_
-#define _UHMMER3_BUILD_TASK_H_
-
-#include <QtCore/QMutex>
-
-#include <U2Core/Task.h>
-#include <U2Core/DocumentModel.h>
-#include <U2Core/SaveDocumentTask.h>
-#include <U2Core/MAlignment.h>
-
-#include <format/uHMMFormat.h>
-
-#include <hmmer3/hmmer.h>
-
-namespace U2 {
-
-class LoadDocumentTask;
-
-/* if we build multi-malignments file and error occurs while building -> we will return empty hmm list */
-class UHMM3BuildTask : public Task {
- Q_OBJECT
-public:
- UHMM3BuildTask( const UHMM3BuildSettings& settings, const MAlignment & msa );
- ~UHMM3BuildTask();
- virtual void run();
- P7_HMM * getHMM() const;
- P7_HMM * takeHMM();
-
-private:
- bool checkMsa();
- void delHmm();
-
-private:
- UHMM3BuildSettings settings;
- MAlignment msa;
- P7_HMM * hmm;
-
-}; // UHMM3BuildTask
-
-class UHMM3BuildTaskSettings {
-public:
- UHMM3BuildSettings inner;
- QString outFile;
-
- UHMM3BuildTaskSettings( const QString& outFile = QString() );
-
-}; // UHMM3BuildTaskSettings
-
-class UHMM3BuildToFileTask : public Task {
- Q_OBJECT
-public:
- UHMM3BuildToFileTask( const UHMM3BuildTaskSettings& settings, const QList< MAlignment >& msas);
- UHMM3BuildToFileTask( const UHMM3BuildTaskSettings& settings, const MAlignment& ma );
- UHMM3BuildToFileTask( const UHMM3BuildTaskSettings& settings, const QString& in );
-
- virtual QList< Task* > onSubTaskFinished( Task* sub );
-
- QString generateReport() const;
-
-private:
- void createBuildSubtasks();
- void addBuildSubTasks();
-
-private:
- UHMM3BuildTaskSettings settings;
- QString inFile;
- QList< MAlignment > msas;
- QList<const P7_HMM* > hmms;
- LoadDocumentTask* loadTask;
- QList< UHMM3BuildTask* > buildTasks;
- SaveDocumentTask* saveHmmFileTask;
- Document* savingDocument;
- QMutex mtx;
-
-}; // UHMM3BuildToFileTask
-
-} // U2
-
-#endif // _UHMMER3_BUILD_TASK_H_
diff --git a/src/plugins_3rdparty/hmm3/src/build/uhmm3build.cpp b/src/plugins_3rdparty/hmm3/src/build/uhmm3build.cpp
deleted file mode 100644
index 64f05cd..0000000
--- a/src/plugins_3rdparty/hmm3/src/build/uhmm3build.cpp
+++ /dev/null
@@ -1,120 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <U2Core/DNAAlphabet.h>
-#include <U2Core/MAlignmentInfo.h>
-
-#include <hmmer3/easel/esl_msa.h>
-#include <hmmer3/hmmer.h>
-#include <hmmer3/p7_config.h>
-
-#include <util/uhmm3Utilities.h>
-
-#include "uhmm3build.h"
-
-using namespace U2;
-
-static void destoryAllIfYouCan( ESL_ALPHABET* abc, P7_BG* bg, P7_BUILDER* bld, ESL_MSA* msa, P7_HMM* hmm ) {
- if( NULL != hmm ) p7_hmm_Destroy( hmm );
- if( NULL != bld ) p7_builder_Destroy( bld );
- if( NULL != bg ) p7_bg_Destroy( bg );
- if( NULL != msa ) esl_msa_Destroy( msa );
- if( NULL != abc ) esl_alphabet_Destroy( abc );
-}
-
-namespace U2 {
-
-/* if error occurs we return hmms that are already created, so caller should delete them anyway
- Note, that we do not destroy hmm->abc here, caller should delete it too.
- Don't worry, on exception we'll catch it here
- */
-P7_HMM * UHMM3Build::build( const MAlignment & malignment, const UHMM3BuildSettings & settings ,TaskStateInfo & ti ) {
- ESL_ALPHABET* abc = NULL;
- P7_BG* bg = NULL;
- P7_BUILDER* bld = NULL;
- ESL_MSA* msa = NULL;
- P7_HMM* hmm = NULL;
- QByteArray errStr;
-
- ti.progress = 0;
- try {
- int alType = UHMM3Utilities::convertAlphabetType( malignment.getAlphabet() );
- if( UHMM3Utilities::BAD_ALPHABET == alType ) {
- errStr = tr( "UGENE cannot determine alphabet of alignment" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
- ESL_ALPHABET* abc = esl_alphabet_Create( alType );
- if( NULL == abc ) {
- errStr = tr( "Run out of memory (creating alphabet failed)" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
-
- P7_BG* bg = p7_bg_Create( abc );
- if( NULL == bg ) {
- errStr = tr( "Run out of memory (creating null model failed)" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
- P7_BUILDER* bld = p7_builder_Create( &settings, abc );
- if( NULL == bld ) {
- errStr = tr( "Run out of memory (creating builder failed)" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
-
- ESL_MSA* msa = UHMM3Utilities::convertMSA( malignment );
- if( NULL == msa ) {
- errStr = tr( "Run out of memory (creating multiple alignment failed)" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
- int ret = esl_msa_Digitize( abc, msa, NULL );
- if( eslOK != ret ) {
- errStr = tr( "Run out of memory (digitizing of alignment failed)" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
- ret = p7_Builder( bld, msa, bg, &hmm, NULL, NULL, NULL, NULL, ti );
- if ( eslOK != ret ) {
- if( eslCANCELED == ret ) {
- errStr = tr( HMMER3_CANCELED_ERROR ).toLatin1();
- } else {
- errStr = tr( "Model building failed" ).toLatin1();
- }
- assert( !errStr.isEmpty() );
- throwUHMMER3Exception( errStr.data() );
- }
-
- destoryAllIfYouCan( abc, bg, bld, msa, NULL );
- } catch( const UHMMER3Exception& ex ) {
- ti.setError( ex.msg );
- destoryAllIfYouCan( abc, bg, bld, msa, hmm );
- return NULL;
- } catch(...) {
- ti.setError( tr( HMMER3_UNKNOWN_ERROR ) );
- destoryAllIfYouCan( abc, bg, bld, msa, hmm );
- return NULL;
- }
-
- return hmm;
-}
-
-} // GB2
diff --git a/src/plugins_3rdparty/hmm3/src/format/uHMMFormat.cpp b/src/plugins_3rdparty/hmm3/src/format/uHMMFormat.cpp
deleted file mode 100644
index 95b05f7..0000000
--- a/src/plugins_3rdparty/hmm3/src/format/uHMMFormat.cpp
+++ /dev/null
@@ -1,337 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include <math.h>
-
-#include <QtCore/QList>
-#include <QtCore/QTextStream>
-#include <QtCore/QtAlgorithms>
-
-#include <U2Core/U2SafePoints.h>
-#include <U2Core/L10n.h>
-#include <U2Core/Task.h>
-#include <U2Core/TextUtils.h>
-#include <U2Core/U2SafePoints.h>
-#include <U2Core/U2DbiUtils.h>
-
-#include <gobject/uHMMObject.h>
-#include "uHMMFormat.h"
-#include "uHMMFormatReader.h"
-
-using namespace U2;
-
-const QString UHMMFormat::WRITE_FAILED = QObject::tr("Writing HMM profile file failed");
-
-static void loadOne( IOAdapter* io, QList< GObject* >& objects, U2OpStatus& os) {
- CHECK_OP(os, );
-
- UHMMFormatReader reader( io, os);
- P7_HMM * hmm = reader.getNextHmm();
-
- CHECK_OP(os, );
-
- assert(hmm != NULL);
- QString objName( hmm->name );
- UHMMObject* obj = new UHMMObject( hmm, objName );
- objects.append( obj );
-}
-
-static void loadAll( IOAdapter* io, QList< GObject* >& objects, U2OpStatus& os) {
- assert( NULL != io && io->isOpen() );
-
- while( !io->isEof() && !os.isCoR()) {
- loadOne( io, objects, os);
- os.setProgress(io->getProgress());
- }
-}
-
-static void checkBytesWrittenThrowException( qint64 wantedToWrite, qint64 written, const QString& msg ) {
- if( wantedToWrite != written ) {
- throw UHMMFormat::UHMMWriteException( msg );
- }
-}
-
-static void writeHMMASCIIStr( IOAdapter* io, const QByteArray& str ) {
- assert( NULL != io && io->isOpen() );
- qint64 bytesWritten = io->writeBlock( str );
- checkBytesWrittenThrowException( str.size(), bytesWritten, UHMMFormat::WRITE_FAILED );
-}
-
-static void writeHMMASCIIStr( IOAdapter* io, const char* str, int num ) {
- assert( NULL != io && io->isOpen() );
- assert( NULL != str && 0 < num );
- qint64 bytesWritten = io->writeBlock( str, num );
- checkBytesWrittenThrowException( num, bytesWritten, UHMMFormat::WRITE_FAILED );
-}
-
-static void writeHMMASCIIStr( IOAdapter* io, const char* s1, const char* s2 ) {
- QString str;
- QTextStream txtStream( &str );
- txtStream << s1 << s2 << "\n";
- writeHMMASCIIStr( io, str.toLatin1() );
-}
-
-static void writeHMMASCIIStr( IOAdapter* io, const char* s1, const QString& s2 ) {
- QString str;
- QTextStream txtStream( &str );
- txtStream << s1 << s2 << "\n";
- writeHMMASCIIStr( io, str.toLatin1() );
-}
-
-static void writeHMMHeaderASCII( IOAdapter* io ) {
- QTextStream txtStream;
- QString headerStr;
- txtStream.setString( &headerStr );
- txtStream << "HMMER3/b [" << HMMER_VERSION << " | " << HMMER_DATE << "]\n";
- writeHMMASCIIStr( io, headerStr.toLatin1() );
-}
-
-static void writeHMMMultiLine( IOAdapter *io, const char *pfx, char *s ) {
- QString res;
- char *sptr = s;
- char *end = NULL;
- int n = 0;
- int nline = 1;
-
- do {
- end = strchr(sptr, '\n');
- if (end != NULL) { /* if there's no \n left, end == NULL */
- n = end - sptr; /* n chars exclusive of \n */
-
- res = QString().sprintf( "%s [%d] ", pfx, nline++ );
- writeHMMASCIIStr( io, res.toLatin1() );
- writeHMMASCIIStr( io, sptr, n );
- writeHMMASCIIStr( io, QByteArray( "\n" ) );
- sptr += n + 1; /* +1 to get past \n */
- } else {
- res = QString().sprintf( "%s [%d] %s\n", pfx, nline++, sptr );
- writeHMMASCIIStr( io, res.toLatin1() );
- }
- } while (end != NULL && *sptr != '\0'); /* *sptr == 0 if <s> terminates with a \n */
-}
-
-static void writeHMMProb( IOAdapter* io, int fieldwidth, float p ) {
- assert( NULL != io && io->isOpen() );
- QString res;
- if( p == 0.0 ) {
- res = QString().sprintf( " %*s", fieldwidth, "*" );
- writeHMMASCIIStr( io, res.toLatin1() );
- } else if( p == 1.0 ) {
- res = QString().sprintf( " %*.5f", fieldwidth, 0.0 );
- writeHMMASCIIStr( io, res.toLatin1() );
- } else {
- res = QString().sprintf( " %*.5f", fieldwidth, -logf(p) );
- writeHMMASCIIStr( io, res.toLatin1() );
- }
-}
-
-static void saveOne( IOAdapter* io, const P7_HMM* hmm, U2OpStatus& os) {
- assert( NULL != hmm );
- assert( NULL != io && io->isOpen() );
- assert( !os.hasError() );
-
- try {
- int k = 0;
- int x = 0;
- QString res;
-
- writeHMMHeaderASCII( io );
- writeHMMASCIIStr( io, "NAME ", hmm->name );
-
- if (hmm->flags & p7H_ACC) {
- writeHMMASCIIStr( io, "ACC ", hmm->acc );
- }
- if (hmm->flags & p7H_DESC) {
- writeHMMASCIIStr( io, "DESC ", hmm->desc );
- }
- writeHMMASCIIStr( io, "LENG ", QString::number( hmm->M ) );
- writeHMMASCIIStr( io, "ALPH ", esl_abc_DecodeType( hmm->abc->type ) );
- writeHMMASCIIStr( io, "RF ", ( hmm->flags & p7H_RF )? "yes" : "no" );
- writeHMMASCIIStr( io, "CS ", ( hmm->flags & p7H_CS )? "yes" : "no" );
- writeHMMASCIIStr( io, "MAP ", ( hmm->flags & p7H_MAP )? "yes" : "no" );
-
- if (hmm->ctime != NULL) {
- writeHMMASCIIStr( io, "DATE ", hmm->ctime );
- }
- if (hmm->comlog != NULL) {
- writeHMMMultiLine( io, "COM ", hmm->comlog );
- }
- if (hmm->nseq >= 0) {
- writeHMMASCIIStr( io, "NSEQ ", QString::number( hmm->nseq ) );
- }
- if (hmm->eff_nseq >= 0) {
- res = QString().sprintf( "EFFN %f\n", hmm->eff_nseq );
- writeHMMASCIIStr( io, res.toLatin1() );
- }
- if (hmm->flags & p7H_CHKSUM) {
- writeHMMASCIIStr( io, "CKSUM ", QString::number( hmm->checksum ) );
- }
-
- if (hmm->flags & p7H_GA) {
- res = QString().sprintf( "GA %.2f %.2f\n", hmm->cutoff[p7_GA1], hmm->cutoff[p7_GA2] );
- writeHMMASCIIStr( io, res.toLatin1() );
- }
- if (hmm->flags & p7H_TC) {
- res = QString().sprintf( "TC %.2f %.2f\n", hmm->cutoff[p7_TC1], hmm->cutoff[p7_TC2] );
- writeHMMASCIIStr( io, res.toLatin1() );
- }
- if (hmm->flags & p7H_NC) {
- res = QString().sprintf( "NC %.2f %.2f\n", hmm->cutoff[p7_NC1], hmm->cutoff[p7_NC2] );
- writeHMMASCIIStr( io, res.toLatin1() );
- }
-
- if (hmm->flags & p7H_STATS) {
- res = QString().sprintf( "STATS LOCAL MSV %8.4f %8.5f\n", hmm->evparam[p7_MMU], hmm->evparam[p7_MLAMBDA] );
- writeHMMASCIIStr( io, res.toLatin1() );
- res = QString().sprintf( "STATS LOCAL VITERBI %8.4f %8.5f\n", hmm->evparam[p7_VMU], hmm->evparam[p7_VLAMBDA] );
- writeHMMASCIIStr( io, res.toLatin1() );
- res = QString().sprintf( "STATS LOCAL FORWARD %8.4f %8.5f\n", hmm->evparam[p7_FTAU], hmm->evparam[p7_FLAMBDA] );
- writeHMMASCIIStr( io, res.toLatin1() );
- }
- writeHMMASCIIStr( io, QByteArray( "HMM " ) );
-
- for (x = 0; x < hmm->abc->K; x++) {
- res = QString().sprintf( " %c ", hmm->abc->sym[x] );
- writeHMMASCIIStr( io, res.toLatin1() );
- }
- writeHMMASCIIStr( io, QByteArray( "\n" ) );
- res = QString().sprintf( " %8s %8s %8s %8s %8s %8s %8s\n", "m->m", "m->i", "m->d", "i->m", "i->i", "d->m", "d->d" );
- writeHMMASCIIStr( io, res.toLatin1() );
-
- if (hmm->flags & p7H_COMPO) {
- writeHMMASCIIStr( io, QByteArray( " COMPO " ) );
-
- for (x = 0; x < hmm->abc->K; x++) {
- writeHMMProb( io, 8, hmm->compo[x] );
- }
- writeHMMASCIIStr( io, QByteArray( "\n" ) );
- }
-
- /* node 0 is special: insert emissions, and B-> transitions */
- writeHMMASCIIStr( io, QByteArray( " " ) );
-
- for (x = 0; x < hmm->abc->K; x++) {
- writeHMMProb( io, 8, hmm->ins[0][x] );
- }
- writeHMMASCIIStr( io, QByteArray( "\n" ) );
- writeHMMASCIIStr( io, QByteArray( " " ) );
-
- for (x = 0; x < p7H_NTRANSITIONS; x++) {
- writeHMMProb( io, 8, hmm->t[0][x] );
- }
- writeHMMASCIIStr( io, QByteArray( "\n" ) );
-
- for (k = 1; k <= hmm->M; k++) {
- /* Line 1: k; match emissions; optional map, RF, CS */
- res = QString().sprintf( " %6d ", k );
- writeHMMASCIIStr( io, res.toLatin1() );
-
- for (x = 0; x < hmm->abc->K; x++) {
- writeHMMProb( io, 8, hmm->mat[k][x] );
- }
- if (hmm->flags & p7H_MAP) {
- res = QString().sprintf( " %6d", hmm->map[k] );
- writeHMMASCIIStr( io, res.toLatin1() );
- } else {
- res = QString().sprintf( " %6s", "-" );
- writeHMMASCIIStr( io, res.toLatin1() );
- }
- res = QString().sprintf( " %c", (hmm->flags & p7H_RF) ? hmm->rf[k] : '-' );
- writeHMMASCIIStr( io, res.toLatin1() );
- res = QString().sprintf( " %c\n", (hmm->flags & p7H_CS) ? hmm->cs[k] : '-' );
- writeHMMASCIIStr( io, res.toLatin1() );
-
- /* Line 2: insert emissions */
- writeHMMASCIIStr( io, QByteArray( " " ) );
- for (x = 0; x < hmm->abc->K; x++) {
- writeHMMProb( io, 8, hmm->ins[k][x] );
- }
-
- /* Line 3: transitions */
- writeHMMASCIIStr( io, QByteArray( "\n " ) );
- for (x = 0; x < p7H_NTRANSITIONS; x++) {
- writeHMMProb( io, 8, hmm->t[k][x] );
- }
- writeHMMASCIIStr( io, QByteArray( "\n" ) );
- }
- writeHMMASCIIStr( io, QByteArray( "//\n" ) );
- } catch( const UHMMFormat::UHMMWriteException& ex ) {
- os.setError( ex.what );
- } catch(...) {
- os.setError( UHMMFormat::tr( "Unknown error occurred" ) );
- }
-}
-
-static void saveAll( IOAdapter* io, const QList< GObject* >& objects, U2OpStatus& os ) {
- assert( NULL != io && io->isOpen() );
- QList< const P7_HMM* > hmms;
-
- foreach( const GObject* obj, objects ) {
- const UHMMObject* hmmObj = qobject_cast< const UHMMObject* >(obj);
- CHECK_EXT(hmmObj != NULL, os.setError(L10N::badArgument("Objects in document")), );
- hmms.append( hmmObj->getHMM() );
- }
-
- foreach( const P7_HMM* hmm, hmms ) {
- saveOne(io, hmm, os);
- CHECK_OP(os, );
- }
-}
-
-namespace U2 {
-
-const DocumentFormatId UHMMFormat::UHHMER_FORMAT_ID = "hmmer_document_format";
-const QString UHMMFormat::WRITE_LOCK_REASON = QObject::tr("HMM files are read only");
-
-UHMMFormat::UHMMFormat( QObject* obj ) : DocumentFormat( obj, DocumentFormatFlags_SW | DocumentFormatFlag_LockedIfNotCreatedByUGENE, QStringList("hmm")) {
- formatName = tr( "Profile HMM format" );
- formatDescription = tr("hmm is a format for storing hmm profiles");
- supportedObjectTypes+=UHMMObject::UHMM_OT;
-}
-
-DocumentFormatId UHMMFormat::getFormatId() const {
- return UHHMER_FORMAT_ID;
-}
-
-const QString& UHMMFormat::getFormatName() const {
- return formatName;
-}
-
-Document* UHMMFormat::loadDocument(IOAdapter* io, const U2DbiRef& /*targetDb*/, const QVariantMap& hints, U2OpStatus& os) {
- QList< GObject* > objects;
- loadAll( io, objects, os );
- CHECK_OP_EXT(os, qDeleteAll(objects), NULL);
- return new Document( this, io->getFactory(), io->getURL(), U2DbiRef(), objects, hints, WRITE_LOCK_REASON );
-}
-
-
-void UHMMFormat::storeDocument( Document* doc, IOAdapter* io, U2OpStatus& os) {
- saveAll(io, doc->getObjects(), os);
-}
-
-FormatCheckResult UHMMFormat::checkRawData( const QByteArray& data, const GUrl&) const {
- bool result = data.startsWith( UHMMFormatReader::HMMER2_VERSION_HEADER.toLatin1() )
- || data.startsWith( UHMMFormatReader::HMMER3_VERSION_HEADER.toLatin1() );
-
- return result ? FormatDetection_Matched : FormatDetection_NotMatched;
-}
-
-} // U2
diff --git a/src/plugins_3rdparty/hmm3/src/format/uHMMFormat.h b/src/plugins_3rdparty/hmm3/src/format/uHMMFormat.h
deleted file mode 100644
index ffc3a19..0000000
--- a/src/plugins_3rdparty/hmm3/src/format/uHMMFormat.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#ifndef _GB2_HMMER3_HMM_FORMAT_H_
-#define _GB2_HMMER3_HMM_FORMAT_H_
-
-#include <QtCore/QByteArray>
-#include <QtCore/QString>
-#include <QtCore/QStringList>
-
-#include <U2Core/global.h>
-#include <U2Core/DocumentModel.h>
-#include <U2Core/IOAdapter.h>
-
-namespace U2 {
-
-class UHMMFormat : public DocumentFormat {
- Q_OBJECT
-public:
- static const DocumentFormatId UHHMER_FORMAT_ID;
- static const QString WRITE_LOCK_REASON;
- static const QString WRITE_FAILED;
-
-public:
- UHMMFormat( QObject* obj );
-
- virtual DocumentFormatId getFormatId() const;
-
- virtual const QString& getFormatName() const;
-
-
- virtual void storeDocument( Document* d, IOAdapter* io, U2OpStatus& os );
-
- virtual FormatCheckResult checkRawData( const QByteArray& data, const GUrl& = GUrl()) const;
-
- struct UHMMWriteException {
- QString what;
- UHMMWriteException( const QString& msg ) : what( msg ) {}
- }; // UHMMWriteException
-
-protected:
- Document* loadDocument(IOAdapter* io, const U2DbiRef& targetDb, const QVariantMap& hints, U2OpStatus& os);
-private:
- QString formatName;
-
-}; // UHMMFormat
-
-} // U2
-
-#endif // _GB2_HMMER3_HMM_FORMAT_H_
diff --git a/src/plugins_3rdparty/hmm3/src/format/uHMMFormatReader.cpp b/src/plugins_3rdparty/hmm3/src/format/uHMMFormatReader.cpp
deleted file mode 100644
index 15c2e97..0000000
--- a/src/plugins_3rdparty/hmm3/src/format/uHMMFormatReader.cpp
+++ /dev/null
@@ -1,924 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include <QtCore/QByteArray>
-
-#include <U2Core/L10n.h>
-#include <U2Core/TextUtils.h>
-#include <U2Core/DNAAlphabet.h>
-#include <U2Core/AppContext.h>
-#include <U2Core/U2AlphabetUtils.h>
-#include <U2Core/U2OpStatus.h>
-
-#include "uHMMFormatReader.h"
-
-#include <math.h>
-
-using namespace U2;
-
-const int BUF_SZ = 1024;
-const int BAD_READ = -1;
-const int EMPTY_READ = 0;
-const char TERM_SYM = '\0';
-
-enum HMMERHeaderTags {
- HMM3_BAD_TAG = -1,
- HMM3_NAME,
- HMM3_ACC,
- HMM3_DESC,
- HMM3_LENG,
- HMM3_ALPH,
- HMM3_RF,
- HMM3_CS,
- HMM3_MAP,
- HMM3_DATE,
- HMM3_COM,
- HMM3_NSEQ,
- HMM3_EFFN,
- HMM3_CKSUM,
- HMM3_STATS,
- HMM3_GA,
- HMM3_TC,
- HMM3_NC,
- HMM3_NULE, // in HMMER2 only
- HMM3_HMM,
- HMM3_BM,
- HMM3_SM,
-
- //HMMER 3.1
- HMM3_MAXL,
- HMM3_MM,
- HMM3_CONS,
- HMM3_COMPO
-}; // HMMERHeaderTags
-
-static QMap< QByteArray, HMMERHeaderTags > getHeaderTagsMap() {
- static QMap< QByteArray, HMMERHeaderTags > ret;
- if (ret.isEmpty()) {
- ret["NAME"] = HMM3_NAME;
- ret["ACC"] = HMM3_ACC;
- ret["DESC"] = HMM3_DESC;
- ret["LENG"] = HMM3_LENG;
- ret["ALPH"] = HMM3_ALPH;
- ret["RF"] = HMM3_RF;
- ret["CS"] = HMM3_CS;
- ret["MAP"] = HMM3_MAP;
- ret["DATE"] = HMM3_DATE;
- ret["COM"] = HMM3_COM;
- ret["NSEQ"] = HMM3_NSEQ;
- ret["EFFN"] = HMM3_EFFN;
- ret["CKSUM"] = HMM3_CKSUM;
- ret["STATS"] = HMM3_STATS;
- ret["GA"] = HMM3_GA;
- ret["TC"] = HMM3_TC;
- ret["NC"] = HMM3_NC;
- ret["NULE"] = HMM3_NULE;
- ret["HMM"] = HMM3_HMM;
- ret["BM"] = HMM3_BM;
- ret["SM"] = HMM3_SM;
-
- //HMMER3.1
- ret["MAXL"] = HMM3_MAXL;
- ret["MM"] = HMM3_MM;
- ret["CONS"] = HMM3_CONS;
- ret["COMPO"] = HMM3_COMPO;
-
- }
- return ret;
-}
-
-static float h2ascii2prob( const QByteArray & curToken , float null ) {
- float ret = .0;
- if( '*' == curToken.at( 0 ) ) {
- ret = 0.0;
- } else {
- bool ok = false;
- int num = curToken.toInt( &ok );
- if( !ok ) {
- throw UHMMFormatReader::UHMMFormatReaderException(
- UHMMFormatReader::tr( "Number expected in NULE line. %1 found" ).arg( QString( curToken ) ) );
- }
- ret = ( null ) * exp( num * 0.00069314718 );
- }
- return ret;
-}
-
-static void checkReadThrowException( int readBytes ) {
- if( BAD_READ == readBytes ) {
- throw UHMMFormatReader::UHMMFormatReaderException(
- UHMMFormatReader::HMM_FORMAT_READER_ERROR_PREFIX + UHMMFormatReader::READ_FAILED );
- }
-}
-
-static void getTagValue( const QByteArray& ln, QByteArray& tag, QByteArray& val ) {
- QString line( ln.trimmed() );
- QStringList words = line.split( QRegExp( "\\s+" ), QString::SkipEmptyParts );
-
- if( 1 >= words.size() ) {
- if( words.size() == 1 && getHeaderTagsMap().value(words.at(0).toLatin1(), HMM3_BAD_TAG) == HMM3_NAME ) {
- tag = words.at(0).toLatin1();
- val = "hmm_profile";
- } else {
- throw UHMMFormatReader::UHMMFormatReaderException(UHMMFormatReader::tr("Bad line in header section: '%1'").arg(QString(ln)));
- }
- } else {
- tag = words.first().toLatin1();
- val = ln.mid( tag.size() ).trimmed();
- assert( !tag.isEmpty() );
- if( val.isEmpty() ) {
- throw UHMMFormatReader::UHMMFormatReaderException(UHMMFormatReader::tr("Empty value in header line:%1").arg(QString(ln)));
- }
- }
-}
-
-static void setInteger( int& num, const QByteArray& numStr ) {
- bool ok = false;
- int ret = numStr.toInt( &ok );
- if( !ok ) {
- throw UHMMFormatReader::UHMMFormatReaderException(
- UHMMFormatReader::tr( "Cannot parse integer from string: '%1'" ).arg( QString(numStr) ) );
- } else {
- num = ret;
- }
-}
-
-static void setFloat( float& num, const QByteArray& numStr ) {
- bool ok = false;
- float ret = numStr.toFloat( &ok );
- if( !ok ) {
- throw UHMMFormatReader::UHMMFormatReaderException(
- UHMMFormatReader::tr( "Cannot parse float number from string: '%1'" ).arg( QString(numStr) ) );
- } else {
- num = ret;
- }
-}
-
-static void set2Floats( float& f1, float& f2, const QByteArray& str ) {
- QString line(str.trimmed());
- if (line.endsWith(";")) { //PFAM compatibility fix
- line = line.mid(0, line.length()-1);
- }
-
- QStringList words = line.split( QRegExp( "\\s+" ), QString::SkipEmptyParts );
-
- if( 2 != words.size() ) {
- throw UHMMFormatReader::UHMMFormatReaderException(
- UHMMFormatReader::tr( "Cannot parse 2 float numbers from string: '%1'" ).arg( QString(str) ) );
- }
- setFloat( f1, words.at( 0 ).toLatin1() );
- setFloat( f2, words.at( 1 ).toLatin1() );
-}
-
-static void setUInteger( uint32_t& num, const QByteArray& numStr ) {
- bool ok = false;
- uint32_t ret = numStr.toUInt( &ok );
- if( !ok ) {
- throw UHMMFormatReader::UHMMFormatReaderException(
- UHMMFormatReader::tr( "Cannot parse unsigned integer from string: '%1'" ).arg( QString(numStr) ) );
- } else {
- num = ret;
- }
-}
-
-static void setYesNoValue( int& flags, int val, const QByteArray& s ) {
- QByteArray str = s.toLower();
- if( "yes" == str ) {
- flags |= val;
- } else if( "no" != str ) {
- throw UHMMFormatReader::UHMMFormatReaderException(
- UHMMFormatReader::tr( "Cannot parse y/n value from string: '%1'" ).arg( QString(str) ) );
- }
-}
-
-static void allocAndCopyStr( const QByteArray& from, char** to ) {
- assert( !from.isEmpty() && NULL != to );
- int sz = from.size();
- *to = (char*)calloc( sizeof( char ), sz + 1 );
- if( NULL == *to ) {
- throw UHMMFormatReader::UHMMFormatReaderException( UHMMFormatReader::tr( "Run out of memory (date allocation failed)" ) );
- }
- qCopy( from.data(), from.data() + sz, *to );
- (*to)[sz] = TERM_SYM;
-}
-
-const int ALPHA_VERSION_STATS_FIELDS_NUM = 3;
-const int BETA_VERSION_STATS_FIELDS_NUM = 4;
-
-static void setHmmStats( float* params, const QByteArray& s, uint32_t& statstracker ) {
- assert( NULL != params );
- QString str( s );
- QStringList words = str.split( QRegExp( "\\s+" ), QString::SkipEmptyParts );
- int wordsSz = words.size();
- if( ALPHA_VERSION_STATS_FIELDS_NUM != wordsSz && BETA_VERSION_STATS_FIELDS_NUM != wordsSz ) {
- throw UHMMFormatReader::UHMMFormatReaderException( UHMMFormatReader::tr( "Bad STATS line: '%1'" ).arg( QString(str) ) );
- }
- if( "LOCAL" != words.at( 0 ).toUpper() ) {
- throw UHMMFormatReader::UHMMFormatReaderException(
- UHMMFormatReader::tr( "Bad STATS line: '%1'/ LOCAL word was not found" ).arg( QString(str) ) );
- }
-
- if( ALPHA_VERSION_STATS_FIELDS_NUM == wordsSz ) {
-
- } else if( BETA_VERSION_STATS_FIELDS_NUM == wordsSz ) {
-
- } else {
- assert( false );
- }
- switch( wordsSz ) {
- case ALPHA_VERSION_STATS_FIELDS_NUM: // this one is for backward compatibility with hmmer3 alpha version
- {
- QByteArray numStr = words.at( 2 ).toLatin1();
- QString tagStr = words.at( 1 ).toUpper();
- if( "VLAMBDA" == tagStr ) {
- setFloat( params[p7_MLAMBDA], numStr );
- setFloat( params[p7_VLAMBDA], numStr );
- setFloat( params[p7_FLAMBDA], numStr );
- statstracker |= 0x1;
- } else if( "VMU" == tagStr ) {
- setFloat( params[p7_MMU], numStr );
- setFloat( params[p7_VMU], numStr );
- statstracker |= 0x2;
- } else if( "FTAU" == tagStr ) {
- setFloat( params[p7_FTAU], numStr );
- statstracker |= 0x4;
- } else {
- throw UHMMFormatReader::UHMMFormatReaderException( UHMMFormatReader::tr( "Bad STATS line: %1. %2 not recognized" ).
- arg( str ).arg( tagStr ) );
- }
- }
- break;
- case BETA_VERSION_STATS_FIELDS_NUM:
- {
- QString tagStr = words.at( 1 ).toUpper();
- QByteArray num1Str = words.at( 2 ).toLatin1();
- QByteArray num2Str = words.at( 3 ).toLatin1();
- if( "MSV" == tagStr ) {
- setFloat( params[p7_MMU], num1Str );
- setFloat( params[p7_MLAMBDA], num2Str );
- statstracker |= 0x1;
- } else if( "VITERBI" == tagStr ) {
- setFloat( params[p7_VMU], num1Str );
- setFloat( params[p7_VLAMBDA], num2Str );
- statstracker |= 0x2;
- } else if( "FORWARD" == tagStr ) {
- setFloat( params[p7_FTAU], num1Str );
- setFloat( params[p7_FLAMBDA], num2Str );
- statstracker |= 0x4;
- } else {
- throw UHMMFormatReader::UHMMFormatReaderException( UHMMFormatReader::tr( "Bad STATS line: %1. %2 not recognized" ).
- arg( str ).arg( tagStr ) );
- }
- }
- break;
- default:
- assert( false );
- }
-
-}
-
-static QByteArray getNextToken( QStringList& tokens ) {
- if( tokens.isEmpty() ) {
- throw UHMMFormatReader::UHMMFormatReaderException( UHMMFormatReader::tr( "Unexpected end of file" ) );
- }
- return tokens.takeFirst().toLatin1();
-}
-
-static void setMainModelFloatVal( float& num, const QByteArray& str ) {
- if( "*" == str ) {
- num = 0.0f;
- } else {
- float tmp = 0;
- setFloat( tmp, str );
- num = expf( -1.0 * tmp );
- }
-}
-
-static void skipBlankLines( IOAdapter* io ) {
- assert( NULL != io && io->isOpen() );
- char c = 0;
- bool skip = true;
- while( skip ) {
- int ret = io->readBlock( &c, 1 );
- checkReadThrowException( ret );
- if( 0 == ret || -1 == ret) { return; }
- skip = TextUtils::LINE_BREAKS[(uchar)c] || TextUtils::WHITES[(uchar)c];
- }
- io->skip( -1 );
-}
-
-static void readLine( IOAdapter* io, QByteArray& to, QStringList* tokens = NULL ) {
- assert( NULL != io );
- to.clear();
- QByteArray buf( BUF_SZ, TERM_SYM );
- bool there = false;
- while( !there ) {
- int bytes = io->readUntil( buf.data(), BUF_SZ, TextUtils::LINE_BREAKS, IOAdapter::Term_Include, &there );
- checkReadThrowException( bytes );
- if( EMPTY_READ == bytes ) {
- break;
- }
- to.append( QByteArray( buf.data(), bytes ) );
- }
- to = to.trimmed();
-
- if( NULL != tokens ) {
- *tokens = QString( to ).split( QRegExp( "\\s+" ), QString::SkipEmptyParts );
- }
- skipBlankLines( io );
-}
-
-namespace U2 {
-
-const QString UHMMFormatReader::HMM_FORMAT_READER_ERROR_PREFIX = QObject::tr("HMM reader error occurred: ");
-const QString UHMMFormatReader::READ_FAILED = QObject::tr("Reading file failed");
-const QString UHMMFormatReader::HMMER2_VERSION_HEADER = "HMMER2";
-const QString UHMMFormatReader::HMMER3_VERSION_HEADER = "HMMER3";
-
-UHMMFormatReader::UHMMFormatReader( IOAdapter * i, U2OpStatus& _os) : io( i ), os( _os ) {
- CHECK_OP(os, );
- CHECK_EXT(io!=NULL && io->isOpen(), os.setError( HMM_FORMAT_READER_ERROR_PREFIX + L10N::badArgument( "io adapter" ) ), );
-}
-
-UHMMFormatReader::HMMER_VERSIONS UHMMFormatReader::getVersion( const QByteArray & str ) const {
- QString header( str );
- if( header.startsWith( HMMER2_VERSION_HEADER ) ) {
- return HMMER2_VERSION;
- } else if( header.startsWith( HMMER3_VERSION_HEADER ) ) {
- return HMMER3_VERSION;
- }
-
- return UNKNOWN_VERSION;
-}
-
-P7_HMM * UHMMFormatReader::getNextHmm() {
- CHECK_OP(os, NULL);
-
- QByteArray header;
- readLine( io, header );
- HMMER_VERSIONS version = getVersion( header );
-
- P7_HMM * ret = NULL;
- switch( version ) {
- case HMMER2_VERSION:
- ret = readHMMER2ASCII();
- break;
- case HMMER3_VERSION:
- ret = readHMMER3ASCII();
- break;
- case UNKNOWN_VERSION:
- os.setError( tr( "Input file made by unknown version of HMMER or is not HMM profile file" ) );
- break;
- default:
- assert( false );
- }
-
- return ret;
-}
-
-const int MAX_HMM_HEADER_TAGS = 300;
-
-P7_HMM * UHMMFormatReader::readHMMER3ASCII() {
- assert( NULL != io && io->isOpen() );
-
- ESL_ALPHABET *abc = NULL;
- P7_HMM *hmm = NULL;
- uint32_t statstracker = 0;
- int x = 0;
- bool ok = false;
-
- try {
- if ((hmm = p7_hmm_CreateShell()) == NULL) {
- throw UHMMFormatReader::UHMMFormatReaderException( UHMMFormatReader::tr( "Run out of memory (allocation of HMM shell failed)" ) );
- }
-
- /* Header section */
- QMap< QByteArray, HMMERHeaderTags > headerTagsMap = getHeaderTagsMap();
- bool isHeaderSection = true;
- int tagsNum = 0;
- while( isHeaderSection && ++tagsNum < MAX_HMM_HEADER_TAGS ) {
- QByteArray line;
- QByteArray tagStr;
- QByteArray valueStr;
- HMMERHeaderTags tag;
-
- readLine( io, line );
- getTagValue( line, tagStr, valueStr );
- tag = headerTagsMap.value( tagStr, HMM3_BAD_TAG );
- switch( tag ) {
- case HMM3_NAME:
- p7_hmm_SetName( hmm, valueStr.data() );
- break;
- case HMM3_ACC:
- p7_hmm_SetAccession( hmm, valueStr.data() );
- break;
- case HMM3_DESC:
- p7_hmm_SetDescription( hmm, valueStr.data() );
- break;
- case HMM3_LENG:
- setInteger( hmm->M, valueStr );
- if( 0 >= hmm->M ) {
- throw UHMMFormatReader::UHMMFormatReaderException(
- UHMMFormatReader::tr( "Length of HMM model should be positive. Found: %1" ).arg( hmm->M ) );
- }
- break;
- case HMM3_ALPH:
- {
- int abcType = esl_abc_EncodeType( valueStr.data() );
- if( eslUNKNOWN == abcType ) {
- throw UHMMFormatReader::UHMMFormatReaderException(
- UHMMFormatReader::tr( "Unrecognized alphabet type: %1" ).arg( QString(valueStr) ) );
- }
- abc = esl_alphabet_Create( abcType );
- }
- break;
- case HMM3_RF:
- setYesNoValue( hmm->flags, p7H_RF, valueStr );
- break;
- case HMM3_CS:
- setYesNoValue( hmm->flags, p7H_CS, valueStr );
- break;
- case HMM3_MAP:
- setYesNoValue( hmm->flags, p7H_MAP, valueStr );
- break;
- case HMM3_DATE:
- allocAndCopyStr( valueStr, &(hmm->ctime) );
- break;
- case HMM3_COM: // COM is command line that was used to create this hmm. we don't need it
- break;
- case HMM3_NSEQ:
- setInteger( hmm->nseq, valueStr );
- if( 0 >= hmm->nseq ) {
- throw UHMMFormatReader::UHMMFormatReaderException(
- UHMMFormatReader::tr( "Number of sequences should be positive. Found: %1" ).arg( hmm->nseq ) );
- }
- break;
- case HMM3_EFFN:
- setFloat( hmm->eff_nseq, valueStr );
- if( 0 >= hmm->eff_nseq ) {
- throw UHMMFormatReader::UHMMFormatReaderException(
- UHMMFormatReader::tr( "EFFN shoold be positive. Found: %1" ).arg( hmm->eff_nseq ) );
- }
- break;
- case HMM3_CKSUM:
- setUInteger( hmm->checksum, valueStr );
- hmm->flags |= p7H_CHKSUM;
- break;
- case HMM3_STATS:
- setHmmStats( hmm->evparam, valueStr, statstracker );
- break;
- case HMM3_GA:
- set2Floats( hmm->cutoff[p7_GA1], hmm->cutoff[p7_GA2], valueStr );
- hmm->flags |= p7H_GA;
- break;
- case HMM3_TC:
- set2Floats( hmm->cutoff[p7_TC1], hmm->cutoff[p7_TC2], valueStr );
- hmm->flags |= p7H_TC;
- break;
- case HMM3_NC:
- set2Floats( hmm->cutoff[p7_NC1], hmm->cutoff[p7_NC2], valueStr );
- hmm->flags |= p7H_NC;
- break;
- case HMM3_HMM:
- isHeaderSection = false;
- continue;
- case HMM3_BM:
- case HMM3_SM:
- case HMM3_MAXL:
- case HMM3_MM:
- case HMM3_CONS:
- case HMM3_COMPO:
- //TODO: update HMMER and start use these fields too
- break;
- case HMM3_BAD_TAG:
- throw UHMMFormatReader::UHMMFormatReaderException(
- UHMMFormatReader::tr( "Unrecognized tag in header section: '%1'" ).arg( QString(tagStr) ) );
- default:
- assert( 0 );
- }
- } /* end, loop over possible header tags */
-
- /* If we saw one STATS line, we needed all 3 */
- if( statstracker == 0x7 ) {
- hmm->flags |= p7H_STATS;
- } else if( statstracker != 0x0 ) {
- throw UHMMFormatReader::UHMMFormatReaderException(
- UHMMFormatReader::tr( "Missing one or more STATS parameters" ) );
- }
-
- if( NULL == abc ) {
- throw UHMMFormatReader::UHMMFormatReaderException(
- UHMMFormatReader::tr( "Run out of memory (failed to create alphabet)" ) );
- }
-
- if( p7_hmm_CreateBody( hmm, hmm->M, abc->type ) != eslOK ) {
- throw UHMMFormatReader::UHMMFormatReaderException(
- UHMMFormatReader::tr( "Run out of memory (allocation of HMM body failed)" ) );
- }
-
- QByteArray line;
- QStringList tokens;
- readLine( io, line );
- readLine( io, line, &tokens );
- QByteArray curToken = getNextToken( tokens );
-
- /* Optional model composition (filter null model) may immediately follow headers */
- if( "COMPO" == curToken.toUpper() ) {
- for( x = 0; x < abc->K; x++ ) {
- curToken = getNextToken( tokens );
- setMainModelFloatVal( hmm->compo[x], curToken );
- }
- hmm->flags |= p7H_COMPO;
- readLine( io, line, &tokens );
- curToken = getNextToken( tokens );
- }
-
- /* First two lines are node 0: insert emissions, then transitions from node 0 (begin) */
- setMainModelFloatVal( hmm->ins[0][0], curToken );
- for( x = 1; x < abc->K; x++ ) {
- curToken = getNextToken( tokens );
- setMainModelFloatVal( hmm->ins[0][x], curToken );
- }
- readLine( io, line, &tokens );
- for( x = 0; x < p7H_NTRANSITIONS; x++ ) {
- curToken = getNextToken( tokens );
- setMainModelFloatVal( hmm->t[0][x], curToken );
- }
-
- /* The main model section. */
- int k = 0;
- for( k = 1; k <= hmm->M; k++ ) {
- int n = 0;
- readLine( io, line, &tokens );
- curToken = getNextToken( tokens );
- setInteger( n, curToken );
- if( k != n ) {
- throw UHMMFormatReader::UHMMFormatReaderException(
- UHMMFormatReader::tr( "Line was excpected to start with %1. Found: %2" ).arg( k ).arg( QString(curToken) ) );
- }
- for( x = 0; x < abc->K; x++ ) {
- curToken = getNextToken( tokens );
- setMainModelFloatVal( hmm->mat[k][x], curToken );
- }
- curToken = getNextToken( tokens );
- if( hmm->flags & p7H_MAP ) {
- int num = 0;
- setInteger( num, curToken );
- hmm->map[k] = num;
- }
- curToken = getNextToken( tokens );
- if (hmm->flags & p7H_RF) {
- assert( !curToken.isEmpty() );
- hmm->rf[k] = curToken.at( 0 );
- }
-
- curToken = getNextToken( tokens );
- if( hmm->flags & p7H_CS ) {
- assert( !curToken.isEmpty() );
- hmm->cs[k] = curToken.at( 0 );
- }
-
- readLine( io, line, &tokens );
- for( x = 0; x < abc->K; x++ ) {
- curToken = getNextToken( tokens );
- setMainModelFloatVal( hmm->ins[k][x], curToken );
- }
-
- readLine( io, line, &tokens );
- for( x = 0; x < p7H_NTRANSITIONS; x++ ) {
- curToken = getNextToken( tokens );
- setMainModelFloatVal( hmm->t[k][x], curToken );
- }
- }
-
- /* The closing // */
- readLine( io, line, &tokens );
- curToken = getNextToken( tokens );
- if( "//" != curToken ) {
- throw UHMMFormatReader::UHMMFormatReaderException(
- UHMMFormatReader::tr( "Excpected to find closing '//'. Found %1 instead" ).arg( QString(curToken) ) );
- }
- skipBlankLines( io );
-
- /* Finish up. */
- if( hmm->flags & p7H_RF ) {
- hmm->rf[0] = ' ';
- hmm->rf[hmm->M+1] = '\0';
- }
- if( hmm->flags & p7H_CS ) {
- hmm->cs[0] = ' ';
- hmm->cs[hmm->M+1] = '\0';
- }
- if( hmm->flags & p7H_MAP ) {
- hmm->map[0] = 0;
- }
- if( hmm->name == NULL ) {
- throw UHMMFormatReader::UHMMFormatReaderException( UHMMFormatReader::tr( "HMM name not found" ) );
- }
- assert( 0 <= hmm->M );
- assert( NULL != hmm->abc );
- } catch( const UHMMFormatReaderException & ex ) {
- os.setError( ex.what );
- } catch(...) {
- os.setError( HMMER3_UNKNOWN_ERROR );
- }
- esl_alphabet_Destroy( abc );
-
- CHECK_OP_EXT(os, p7_hmm_Destroy( hmm ), NULL);
- return hmm;
-}
-
-P7_HMM * UHMMFormatReader::readHMMER2ASCII() {
- assert( NULL != io && io->isOpen() );
-
- ESL_ALPHABET *abc = NULL;
- P7_HMM *hmm = NULL;
- P7_BG *bg = NULL;
- float null[p7_MAXABET];
- int k,x;
-
- try {
- if( ( hmm = p7_hmm_CreateShell()) == NULL ) {
- throw UHMMFormatReaderException( tr( "Allocation failure, HMM shell" ) );
- }
-
- /* Header */
- /* H2 save files have no EFFN;
- * COM lines don't have number tags like [1];
- * they have CKSUM but we ignore it because it uses different algorithm;
- * have EVD line, we ignore it, H3 stats are different;
- * XT, NULT lines are ignored; algorithm-dependent config is all internal in H3
- */
- QMap< QByteArray, HMMERHeaderTags > headerTagsMap = getHeaderTagsMap();
- bool isHeaderSection = true;
- bool triedToSetAlph = false;
- int tagsNum = 0;
- while( isHeaderSection && ++tagsNum < MAX_HMM_HEADER_TAGS ) {
- QByteArray line;
- QStringList tokens;
- QByteArray tagStr;
- QByteArray valueStr;
- HMMERHeaderTags tag;
-
- readLine( io, line, &tokens );
- getTagValue( line, tagStr, valueStr );
- tag = headerTagsMap.value( tagStr, HMM3_BAD_TAG );
- switch( tag ) {
- case HMM3_NAME:
- p7_hmm_SetName( hmm, valueStr.data() );
- break;
- case HMM3_ACC:
- p7_hmm_SetAccession( hmm, valueStr.data() );
- break;
- case HMM3_DESC:
- p7_hmm_SetDescription( hmm, valueStr.data() );
- break;
- case HMM3_LENG:
- setInteger( hmm->M, valueStr );
- if( 0 >= hmm->M ) {
- throw UHMMFormatReader::UHMMFormatReaderException(
- UHMMFormatReader::tr( "Length of HMM model should be positive. Found: %1" ).arg( hmm->M ) );
- }
- break;
- case HMM3_ALPH:
- {
- triedToSetAlph = true;
- if(valueStr.toLower() != "nucleic") { // try to encode abc in hmm section
- int abcType = esl_abc_EncodeType( valueStr.data() );
- if( eslUNKNOWN == abcType ) {
- throw UHMMFormatReader::UHMMFormatReaderException(
- tr( "Unrecognized alphabet type: %1" ).arg( QString(valueStr) ) );
- }
- abc = esl_alphabet_Create( abcType );
- }
- }
- break;
- case HMM3_RF:
- setYesNoValue( hmm->flags, p7H_RF, valueStr );
- break;
- case HMM3_CS:
- setYesNoValue( hmm->flags, p7H_CS, valueStr );
- break;
- case HMM3_MAP:
- setYesNoValue( hmm->flags, p7H_MAP, valueStr );
- break;
- case HMM3_DATE:
- allocAndCopyStr( valueStr, &(hmm->ctime) );
- break;
- case HMM3_COM: // we don't need it
- break;
- case HMM3_NSEQ:
- setInteger( hmm->nseq, valueStr );
- if( 0 >= hmm->nseq ) {
- throw UHMMFormatReader::UHMMFormatReaderException(
- UHMMFormatReader::tr( "Number of sequences should be positive. Found: %1" ).arg( hmm->nseq ) );
- }
- break;
- case HMM3_GA:
- set2Floats( hmm->cutoff[p7_GA1], hmm->cutoff[p7_GA2], valueStr );
- hmm->flags |= p7H_GA;
- break;
- case HMM3_TC:
- set2Floats( hmm->cutoff[p7_TC1], hmm->cutoff[p7_TC2], valueStr );
- hmm->flags |= p7H_TC;
- break;
- case HMM3_NC:
- set2Floats( hmm->cutoff[p7_NC1], hmm->cutoff[p7_NC2], valueStr );
- hmm->flags |= p7H_NC;
- break;
- case HMM3_NULE:
- {
- int K = 0;
- if( !triedToSetAlph) {
- throw UHMMFormatReaderException( tr( "ALPH must precede NULE in HMMER2 save files" ) );
- } else if(abc != NULL){
- K = abc->K;
- } else {
- K = 4; // because it is dna or rna
- }
-
- getNextToken( tokens ); // skip NULE word
- for (x = 0; x < K; x++) {
- QByteArray curToken = getNextToken( tokens );
- assert( 1 <= curToken.size() );
- null[x] = h2ascii2prob( curToken, 1.0/(float)K );
- }
- }
- break;
- case HMM3_HMM:
- {
- if(abc == NULL) {
- if(!triedToSetAlph) {
- throw UHMMFormatReaderException(tr("ALPH section must precede HMM"));
- } else {
- QString val(valueStr);
- const DNAAlphabet* al = U2AlphabetUtils::findBestAlphabet(val.remove(QRegExp("\\s+")).toLatin1());
- if(al == NULL || !al->isNucleic()) {
- throw UHMMFormatReaderException(tr("Unknown alphabet"));
- } else {
- int abcT = al->getId() == BaseDNAAlphabetIds::NUCL_DNA_DEFAULT() ||
- al->getId() == BaseDNAAlphabetIds::NUCL_DNA_EXTENDED() ? eslDNA : eslRNA;
- abc = esl_alphabet_Create(abcT);
- }
- }
- }
- isHeaderSection = false;
- }
- continue;
- default:
- continue;
- }
- } // over header section
-
- /* Skip main model header lines; allocate body of HMM now that K,M are known */
- QByteArray line;
- QStringList tokens;
- readLine( io, line );
- readLine( io, line, &tokens );
-
- if( p7_hmm_CreateBody(hmm, hmm->M, abc->type ) != eslOK ) {
- throw UHMMFormatReaderException( tr( "Failed to allocate body of the new HMM" ) );
- }
- if( (bg = p7_bg_Create( abc )) == NULL ) {
- throw UHMMFormatReaderException( tr( "Failed to create background model" ) );
- }
-
- /* H2's tbd1 line ==> translated to H3's node 0 */
- QByteArray tok1;
- QByteArray tok2;
- QByteArray tok3;
- tok1 = getNextToken( tokens );
- tok2 = getNextToken( tokens );
- tok3 = getNextToken( tokens );
-
- hmm->t[0][p7H_MM] = h2ascii2prob(tok1, 1.0); /* B->M1 */
- hmm->t[0][p7H_MI] = 0.0; /* B->I0 */
- hmm->t[0][p7H_MD] = h2ascii2prob(tok3, 1.0); /* B->D1 */
- hmm->t[0][p7H_IM] = 1.0;
- hmm->t[0][p7H_II] = 0.0;
- hmm->t[0][p7H_DM] = 1.0;
- hmm->t[0][p7H_DD] = 0.0;
- for (x = 0; x < abc->K; x++) {
- hmm->ins[0][x] = bg->f[x];
- }
-
- /* The main model section. */
- for (k = 1; k <= hmm->M; k++) {
- int n = 0;
- readLine( io, line, &tokens );
- tok1 = getNextToken( tokens );
- setInteger( n, tok1 );
-
- if( n != k ) {
- throw UHMMFormatReaderException( tr( "Expected match line to start with %1. saw %2" ).arg( k ).arg( QString( tok1 ) ) );
- }
-
- /* Line 1: match emissions; optional map info */
- for (x = 0; x < abc->K; x++) {
- tok1 = getNextToken( tokens );
- hmm->mat[k][x] = h2ascii2prob(tok1, null[x]);
- }
- if (hmm->flags & p7H_MAP) {
- int n = 0;
- tok1 = getNextToken( tokens );
- setInteger( n, tok1 );
- hmm->map[k] = n;
- }
-
- /* Line 2: optional RF; then we ignore insert emissions */
- readLine( io, line, &tokens );
- tok1 = getNextToken( tokens );
- if (hmm->flags & p7H_RF) {
- assert( 1 <= tok1.size() );
- hmm->rf[k] = tok1.at( 0 );
- }
- for (x = 0; x < abc->K; x++) {
- hmm->ins[k][x] = bg->f[x];
- }
-
- /* Line 3: optional CS, then transitions (ignoring last 2, which are entry/exit */
- readLine( io, line, &tokens );
- tok1 = getNextToken( tokens );
- if (hmm->flags & p7H_CS) {
- assert( 1 <= tok1.size() );
- hmm->cs[k] = tok1.at( 0 );
- }
- if (k < hmm->M) { /* ignore last insert transition line; H3/H2 not compatible there */
- for (x = 0; x < p7H_NTRANSITIONS; x++) {
- tok1 = getNextToken( tokens );
- hmm->t[k][x] = h2ascii2prob(tok1, 1.0);
- }
- }
- }
-
- /* node M transitions: H2 doesn't have an I_M state */
- hmm->t[hmm->M][p7H_MM] = 1.0;
- hmm->t[hmm->M][p7H_MI] = 0.0;
- hmm->t[hmm->M][p7H_MD] = 0.0;
- hmm->t[hmm->M][p7H_IM] = 1.0;
- hmm->t[hmm->M][p7H_II] = 0.0;
- hmm->t[hmm->M][p7H_DM] = 1.0;
- hmm->t[hmm->M][p7H_DD] = 0.0;
-
- /* The closing // */
- readLine( io, line, &tokens );
- tok1 = getNextToken( tokens );
- if( "//" != tok1 ) {
- throw UHMMFormatReaderException( tr( "Expected closing //. found %1 instead" ).arg( QString( tok1 ) ) );
- }
-
- /* Tidy up. */
- if (hmm->flags & p7H_RF) {
- hmm->rf[0] = ' ';
- hmm->rf[hmm->M+1] = '\0';
- }
- if (hmm->flags & p7H_CS) {
- hmm->cs[0] = ' ';
- hmm->cs[hmm->M+1] = '\0';
- }
- if (hmm->name == NULL) {
- throw UHMMFormatReaderException( tr( "No NAME found for HMM" ) );
- }
- if (hmm->M <= 0) {
- throw UHMMFormatReaderException( tr( "No LENG found for HMM" ) );
- }
- if (abc == NULL) {
- throw UHMMFormatReaderException( tr( "No ALPH found for HMM" ) );
- }
-
- /* Calibrate the model: cfg rng bg gm om */
- TaskStateInfo tmpTsi;
- if( p7_Calibrate(hmm, NULL, NULL, &bg, NULL, NULL, 0, tmpTsi ) != eslOK ) {
- throw UHMMFormatReaderException( tr( "Failed to calibrate HMMER2 model after input conversion" ) );
- }
- } catch( const UHMMFormatReaderException & ex ) {
- os.setError( ex.what );
- } catch(...) {
- os.setError( HMMER3_UNKNOWN_ERROR );
- }
-
- p7_bg_Destroy(bg);
- esl_alphabet_Destroy( abc );
-
- CHECK_OP_EXT(os, p7_hmm_Destroy( hmm ), NULL);
- return hmm;
-}
-
-} // U2
diff --git a/src/plugins_3rdparty/hmm3/src/format/uHMMFormatReader.h b/src/plugins_3rdparty/hmm3/src/format/uHMMFormatReader.h
deleted file mode 100644
index 383c5c9..0000000
--- a/src/plugins_3rdparty/hmm3/src/format/uHMMFormatReader.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#ifndef _GB2_HMMER3_HMM_FORMAT_READER_H_
-#define _GB2_HMMER3_HMM_FORMAT_READER_H_
-
-#include <QtCore/QObject>
-#include <QtCore/QString>
-
-#include <U2Core/IOAdapter.h>
-#include <U2Core/Task.h>
-
-#include <hmmer3/hmmer.h>
-
-namespace U2 {
-
-class UHMMFormatReader : public QObject {
- Q_OBJECT
-public:
- static const QString HMM_FORMAT_READER_ERROR_PREFIX;
- static const QString READ_FAILED;
-
- enum HMMER_VERSIONS {
- UNKNOWN_VERSION = -1,
- HMMER2_VERSION,
- HMMER3_VERSION
- }; // HMMER_VERSIONS
- static const QString HMMER2_VERSION_HEADER;
- static const QString HMMER3_VERSION_HEADER;
-
-public:
- UHMMFormatReader( IOAdapter * i, U2OpStatus & os);
- P7_HMM * getNextHmm();
-
- struct UHMMFormatReaderException {
- QString what;
- UHMMFormatReaderException( const QString& msg ) : what( msg ) {}
- }; // UHMMFormatReaderException
-
-private:
- P7_HMM * readHMMER3ASCII();
- P7_HMM * readHMMER2ASCII(); /* for backward compatibility */
- HMMER_VERSIONS getVersion( const QByteArray & header ) const;
-
-private:
- IOAdapter * io; // opened io adapter
- U2OpStatus& os;
-
-}; // UHMMFormatReader
-
-} // U2
-
-#endif // _GB2_HMMER3_HMM_FORMAT_READER_H_
diff --git a/src/plugins_3rdparty/hmm3/src/gobject/uHMMObject.cpp b/src/plugins_3rdparty/hmm3/src/gobject/uHMMObject.cpp
deleted file mode 100644
index 00e5080..0000000
--- a/src/plugins_3rdparty/hmm3/src/gobject/uHMMObject.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include <U2Core/GObjectTypes.h>
-
-#include "uHMMObject.h"
-
-namespace U2 {
-
-const QString UHMMObject::OT_ID = "OT_HMM";
-const QString UHMMObject::OT_NAME = "Hidden Markov model";
-const QString UHMMObject::OT_PNAME = "Hidden Markov models";
-const QString UHMMObject::OT_SIGN = "hmm";
-const QString UHMMObject::OT_ICON = "";
-const QString UHMMObject::OT_LOCKED_ICON = "";
-const GObjectType UHMMObject::UHMM_OT =
- GObjectTypes::registerTypeInfo( GObjectTypeInfo( OT_ID, OT_NAME, OT_PNAME, OT_SIGN, OT_ICON, OT_LOCKED_ICON ) );
-
-
-UHMMObject::UHMMObject( P7_HMM* ahmm, const QString& name ) : GObject( UHMM_OT, name ), hmm( ahmm ) {
-}
-
-UHMMObject::~UHMMObject() {
- if( NULL != hmm ) {
- p7_hmm_Destroy( hmm );
- }
-}
-
-const P7_HMM* UHMMObject::getHMM() const {
- return hmm;
-}
-
-P7_HMM* UHMMObject::takeHMM() {
- P7_HMM* ret = hmm;
- hmm = NULL;
- return ret;
-}
-
-GObject* UHMMObject::clone(const U2DbiRef&, U2OpStatus&, const QVariantMap &) const {
- P7_HMM* newHMM = p7_hmm_Clone( hmm );
- UHMMObject* cln = new UHMMObject( newHMM, getGObjectName() );
- cln->setIndexInfo( getIndexInfo() );
-
- return cln;
-}
-
-} // U2
diff --git a/src/plugins_3rdparty/hmm3/src/gobject/uHMMObject.h b/src/plugins_3rdparty/hmm3/src/gobject/uHMMObject.h
deleted file mode 100644
index d754c84..0000000
--- a/src/plugins_3rdparty/hmm3/src/gobject/uHMMObject.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#ifndef _GB2_HMMER3_UHMM_OBJECT_H_
-#define _GB2_HMMER3_UHMM_OBJECT_H_
-
-#include <QtCore/QString>
-
-#include <U2Core/global.h>
-#include <U2Core/GObject.h>
-
-#include <hmmer3/hmmer.h>
-
-namespace U2 {
-
-class UHMMObject : public GObject {
- Q_OBJECT
-public:
- static const QString OT_ID;
- static const QString OT_NAME;
- static const QString OT_PNAME;
- static const QString OT_SIGN;
- static const QString OT_ICON;
- static const QString OT_LOCKED_ICON;
- static const GObjectType UHMM_OT;
-
- UHMMObject( P7_HMM* hmm, const QString& name );
- ~UHMMObject();
-
- const P7_HMM* getHMM() const;
- P7_HMM* takeHMM();
-
- virtual GObject* clone(const U2DbiRef& ref, U2OpStatus& os, const QVariantMap &hints = QVariantMap()) const;
-
-private:
- P7_HMM* hmm;
-
-}; // UHMMObject
-
-} // U2
-
-#endif // _GB2_HMMER3_UHMM_OBJECT_H_
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/COPYRIGHT b/src/plugins_3rdparty/hmm3/src/hmmer3/COPYRIGHT
deleted file mode 100644
index d2b5f28..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/COPYRIGHT
+++ /dev/null
@@ -1,62 +0,0 @@
-HMMER - Biological sequence analysis with profile hidden Markov models
-Copyright (C) 2009 Howard Hughes Medical Institute.
-------------------------------------------------------------------
-
-Additional copyrights apply to the entirety of the package:
-Copyright (C) 1992-2009 Sean R. Eddy
-Copyright (C) 2000-2009 Howard Hughes Medical Institute
-Copyright (C) 1992-2004 Washington University School of Medicine
-Copyright (C) 1992-1994 MRC Laboratory of Molecular Biology
-
-HMMER also includes contributions from others, including copyrighted
-and licensed code and technology. These are noted in the appropriate
-places in source files. Contributors include:
-
- Jeremy Buhler (Washington University)
- Michael Farrar (Amherst, New Hampshire)
- Ian Holmes (UC Berkeley)
- Erik Lindahl (Stanford University)
- Graeme Mitchison (Cambridge University)
- Apple Computer (Cupertino, CA)
- Free Software Foundation, Inc. (Cambridge, MA)
- IBM TJ Watson Research Center (Yorktown Heights, NY)
- X Consortium (Cambridge, MA)
-
-HMMER uses the Easel software library, which has its own license and
-copyright information. See easel/COPYRIGHT and easel/LICENSE.
-
-HMMER includes patent-pending SIMD technology under nonexclusive
-license from Michael Farrar. You are sublicensed to use this
-technology specifically for the use, modification, and redistribution
-of HMMER3, under the terms of the GPLv3 patent clause.
-
-This source code is distributed under the terms of the GNU General
-Public License. See the file LICENSE for details.
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, either version 3 of the License, or (at
-your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-A copy of the GNU General Public License is in the file LICENSE. You
-may also obtain a copy from <http://www.gnu.org/licenses/>.
-
-"HMMER" is a trademark of the Howard Hughes Medical Institute.
-Although the code is freely available as open source, we intend that
-only the version at http://hmmer.org/ is known as HMMER. You are free
-to create a modified version and distribute it (subject to the terms
-of the GPL), but we ask that you name it something that would not be
-confused with HMMER itself.
-
-------------------------------------------------------------------
-
-Sean Eddy
-HHMI Janelia Farm Research Campus
-
-
-
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/build.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/build.cpp
deleted file mode 100644
index 9110476..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/build.cpp
+++ /dev/null
@@ -1,357 +0,0 @@
-/* Construction of new HMMs from multiple alignments.
-*
-* Two versions:
-* p7_Handmodelmaker() -- use #=RF annotation to indicate match columns
-* p7_Fastmodelmaker() -- Krogh/Haussler heuristic
-*
-* The maximum likelihood model construction algorithm that was in previous
-* HMMER versions has been deprecated, at least for the moment.
-*
-* The meat of the model construction code is in matassign2hmm().
-* The two model construction strategies simply label which columns
-* are supposed to be match states, and then hand this info to
-* matassign2hmm().
-*
-*
-* Contents:
-* 1. Exported API: model construction routines.
-* 2. Private functions used in constructing models.
-* 5. Copyright and license.
-*
-* SRE, Tue Jan 2 2007 [Casa de Gatos]
- * SVN $Id: build.c 3041 2009-11-12 12:58:09Z eddys $
-*/
-
-#include <hmmer3/p7_config.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_alphabet.h>
-#include <hmmer3/easel/esl_msa.h>
-
-#include <hmmer3/hmmer.h>
-
-
-static int matassign2hmm(ESL_MSA *msa, int *matassign, P7_HMM **ret_hmm, P7_TRACE ***opt_tr);
-static int annotate_model(P7_HMM *hmm, int *matassign, ESL_MSA *msa);
-
-/*****************************************************************
-* 1. Exported API: model construction routines.
-*****************************************************************/
-
-/* Function: p7_Handmodelmaker()
-*
-* Purpose: Manual model construction.
-* Construct an HMM from a digital alignment, where the
-* <#=RF> line of the alignment file is used to indicate the
-* columns assigned to matches vs. inserts.
-*
-* The <msa> must be in digital mode, and it must have
-* a reference annotation line.
-*
-* NOTE: <p7_Handmodelmaker()> will slightly revise the
-* alignment if necessary, if the assignment of columns
-* implies DI and ID transitions.
-*
-* Returns both the HMM in counts form (ready for applying
-* Dirichlet priors as the next step), and fake tracebacks
-* for each aligned sequence.
-*
-* Models must have at least one node, so if the <msa> defined
-* no consensus columns, a <eslENORESULT> error is returned.
-*
-* Args: msa - multiple sequence alignment
-* ret_hmm - RETURN: counts-form HMM
-* opt_tr - optRETURN: array of tracebacks for aseq's
-*
-* Return: <eslOK> on success. <ret_hmm> and <opt_tr> are allocated
-* here, and must be free'd by caller.
-*
-* Returns <eslENORESULT> if no consensus columns were annotated;
-* in this case, <ret_hmm> and <opt_tr> are returned NULL.
-*
-* Returns <eslEFORMAT> if the <msa> doesn't have a reference
-* annotation line.
-*
-* Throws: <eslEMEM> on allocation failure. Throws <eslEINVAL> if the <msa>
-* isn't in digital mode.
-*/
-int
-p7_Handmodelmaker(ESL_MSA *msa, P7_HMM **ret_hmm, P7_TRACE ***opt_tr)
-{
- int status;
- int *matassign = NULL; /* MAT state assignments if 1; 1..alen */
- int apos; /* counter for aligned columns */
-
- if (! (msa->flags & eslMSA_DIGITAL)) ESL_XEXCEPTION(eslEINVAL, "need a digital msa");
- if (msa->rf == NULL) return eslEFORMAT;
-
- ESL_ALLOC_WITH_TYPE(matassign, int*, sizeof(int) * (msa->alen+1));
-
- /* Watch for off-by-one. rf is [0..alen-1]; matassign is [1..alen] */
- for (apos = 1; apos <= msa->alen; apos++)
- matassign[apos] = (esl_abc_CIsGap(msa->abc, msa->rf[apos-1])? FALSE : TRUE);
-
- /* matassign2hmm leaves ret_hmm, opt_tr in their proper state: */
- if ((status = matassign2hmm(msa, matassign, ret_hmm, opt_tr)) != eslOK) goto ERROR;
-
- free(matassign);
- return eslOK;
-
-ERROR:
- if (matassign != NULL) free(matassign);
- return status;
-}
-
-/* Function: p7_Fastmodelmaker()
-*
-* Purpose: Heuristic model construction.
-* Construct an HMM from an alignment by a simple rule,
-* based on the fractional occupancy of each columns w/
-* residues vs gaps. Any column w/ a fractional
-* occupancy of $\geq$ <symfrac> is assigned as a MATCH column;
-* for instance, if thresh = 0.5, columns w/ $\geq$ 50\%
-* residues are assigned to match... roughly speaking.
-*
-* "Roughly speaking" because sequences may be weighted
-* in the input <msa>, and because missing data symbols are
-* ignored, in order to deal with sequence fragments.
-*
-* The <msa> must be in digital mode.
-*
-* If the caller wants to designate any sequences as
-* fragments, it does so by converting all N-terminal and
-* C-terminal flanking gap symbols to missing data symbols.
-*
-* NOTE: p7_Fastmodelmaker() will slightly revise the
-* alignment if the assignment of columns implies
-* DI and ID transitions.
-*
-* Returns the HMM in counts form (ready for applying Dirichlet
-* priors as the next step). Also returns fake traceback
-* for each training sequence.
-*
-* Models must have at least one node, so if the <msa> defined
-* no consensus columns, a <eslENORESULT> error is returned.
-*
-* Args: msa - multiple sequence alignment
-* symfrac - threshold for residue occupancy; >= assigns MATCH
-* ret_hmm - RETURN: counts-form HMM
-* opt_tr - optRETURN: array of tracebacks for aseq's
-*
-* Return: <eslOK> on success. ret_hmm and opt_tr allocated here,
-* and must be free'd by the caller (FreeTrace(tr[i]), free(tr),
-* FreeHMM(hmm)).
-*
-* Returns <eslENORESULT> if no consensus columns were annotated;
-* in this case, <ret_hmm> and <opt_tr> are returned NULL.
-*
-* Throws: <eslEMEM> on allocation failure; <eslEINVAL> if the
-* <msa> isn't in digital mode.
-*/
-int
-p7_Fastmodelmaker(ESL_MSA *msa, float symfrac, P7_HMM **ret_hmm, P7_TRACE ***opt_tr)
-{
- int status; /* return status flag */
- int *matassign = NULL; /* MAT state assignments if 1; 1..alen */
- int idx; /* counter over sequences */
- int apos; /* counter for aligned columns */
- float r; /* weighted residue count */
- float totwgt; /* weighted residue+gap count */
-
- if (! (msa->flags & eslMSA_DIGITAL)) ESL_XEXCEPTION(eslEINVAL, "need digital MSA");
-
- /* Allocations: matassign is 1..alen array of bit flags.
- */
- ESL_ALLOC_WITH_TYPE(matassign, int*, sizeof(int) * (msa->alen+1));
-
- /* Determine weighted sym freq in each column, set matassign[] accordingly.
- */
- for (apos = 1; apos <= msa->alen; apos++)
- {
- r = totwgt = 0.;
- for (idx = 0; idx < msa->nseq; idx++)
- {
- if (esl_abc_XIsResidue(msa->abc, msa->ax[idx][apos])) { r += msa->wgt[idx]; totwgt += msa->wgt[idx]; }
- else if (esl_abc_XIsGap(msa->abc, msa->ax[idx][apos])) { totwgt += msa->wgt[idx]; }
- else if (esl_abc_XIsMissing(msa->abc, msa->ax[idx][apos])) continue;
- }
- if (r > 0. && r / totwgt >= symfrac) matassign[apos] = TRUE;
- else matassign[apos] = FALSE;
- }
-
- /* Once we have matassign calculated, modelmakers behave
- * the same; matassign2hmm() does this stuff (traceback construction,
- * trace counting) and sets up ret_hmm and opt_tr.
- */
- if ((status = matassign2hmm(msa, matassign, ret_hmm, opt_tr)) != eslOK) goto ERROR;
-
- free(matassign);
- return eslOK;
-
-ERROR:
- if (matassign != NULL) free(matassign);
- return status;
-}
-
-/*-------------------- end, exported API -------------------------*/
-
-
-
-
-/*****************************************************************
-* 2. Private functions used in constructing models.
-*****************************************************************/
-
-/* Function: matassign2hmm()
-*
-* Purpose: Given an assignment of alignment columns to match vs.
-* insert, finish the final part of the model construction
-* calculation that is constant between model construction
-* algorithms.
-*
-* Args: msa - multiple sequence alignment
-* matassign - 1..alen bit flags for column assignments
-* ret_hmm - RETURN: counts-form HMM
-* opt_tr - optRETURN: array of tracebacks for aseq's
-*
-* Return: <eslOK> on success.
-* <eslENORESULT> if no consensus columns are identified.
-*
-* ret_hmm and opt_tr alloc'ed here.
-*/
-static int
-matassign2hmm(ESL_MSA *msa, int *matassign, P7_HMM **ret_hmm, P7_TRACE ***opt_tr)
-{
- int status; /* return status */
- P7_HMM *hmm = NULL; /* RETURN: new hmm */
- P7_TRACE **tr = NULL; /* RETURN: 0..nseq-1 fake traces */
- int M; /* length of new model in match states */
- int idx; /* counter over sequences */
- int apos; /* counter for aligned columns */
- char errbuf[eslERRBUFSIZE];
-
- /* How many match states in the HMM? */
- for (M = 0, apos = 1; apos <= msa->alen; apos++)
- if (matassign[apos]) M++;
- if (M == 0) { status = eslENORESULT; goto ERROR; }
-
- /* Make fake tracebacks for each seq */
- ESL_ALLOC_WITH_TYPE(tr, P7_TRACE**, sizeof(P7_TRACE *) * msa->nseq);
- if ((status = p7_trace_FauxFromMSA(msa, matassign, p7_MSA_COORDS, tr)) != eslOK) goto ERROR;
- for (idx = 0; idx < msa->nseq; idx++)
- {
- if ((status = p7_trace_Doctor(tr[idx], NULL, NULL)) != eslOK) goto ERROR;
- if ((status = p7_trace_Validate(tr[idx], msa->abc, msa->ax[idx], errbuf)) != eslOK)
- ESL_XEXCEPTION(eslFAIL, "validation failed: %s", errbuf);
- }
-
- /* Build count model from tracebacks */
- if ((hmm = p7_hmm_Create(M, msa->abc)) == NULL) { status = eslEMEM; goto ERROR; }
- if ((status = p7_hmm_Zero(hmm)) != eslOK) goto ERROR;
- for (idx = 0; idx < msa->nseq; idx++) {
- if (tr[idx] == NULL) continue; /* skip rare examples of empty sequences */
- if ((status = p7_trace_Count(hmm, msa->ax[idx], msa->wgt[idx], tr[idx])) != eslOK) goto ERROR;
- }
- hmm->nseq = msa->nseq;
- hmm->eff_nseq = msa->nseq;
-
- /* Transfer annotation from the MSA to the new model
- */
- if ((status = annotate_model(hmm, matassign, msa)) != eslOK) goto ERROR;
-
- /* Reset #=RF line of alignment to reflect our assignment
- * of match, delete. matassign is valid from 1..alen and is off
- * by one from msa->rf.
- */
- if (msa->rf == NULL) ESL_ALLOC_WITH_TYPE(msa->rf, char*, sizeof(char) * (msa->alen + 1));
- for (apos = 1; apos <= msa->alen; apos++)
- msa->rf[apos-1] = matassign[apos] ? 'x' : '.';
- msa->rf[msa->alen] = '\0';
-
- if (opt_tr != NULL) *opt_tr = tr;
- else p7_trace_DestroyArray(tr, msa->nseq);
- *ret_hmm = hmm;
- return eslOK;
-
-ERROR:
- if (tr != NULL) p7_trace_DestroyArray(tr, msa->nseq);
- if (hmm != NULL) p7_hmm_Destroy(hmm);
- if (opt_tr != NULL) *opt_tr = NULL;
- *ret_hmm = NULL;
- return status;
-}
-
-
-
-/* Function: annotate_model()
-*
-* Purpose: Transfer rf, cs, and other optional annotation from the alignment
-* to the new model.
-*
-* Args: hmm - [M] new model to annotate
-* matassign - which alignment columns are MAT; [1..alen]
-* msa - alignment, including annotation to transfer
-*
-* Return: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation error.
-*/
-static int
-annotate_model(P7_HMM *hmm, int *matassign, ESL_MSA *msa)
-{
- int apos; /* position in matassign, 1.alen */
- int k; /* position in model, 1.M */
- int status;
-
- /* Reference coord annotation */
- if (msa->rf != NULL) {
- ESL_ALLOC_WITH_TYPE(hmm->rf, char*, sizeof(char) * (hmm->M+2));
- hmm->rf[0] = ' ';
- for (apos = k = 1; apos <= msa->alen; apos++)
- if (matassign[apos]) hmm->rf[k++] = msa->rf[apos-1]; /* watch off-by-one in msa's rf */
- hmm->rf[k] = '\0';
- hmm->flags |= p7H_RF;
- }
-
- /* Consensus structure annotation */
- if (msa->ss_cons != NULL) {
- ESL_ALLOC_WITH_TYPE(hmm->cs, char*, sizeof(char) * (hmm->M+2));
- hmm->cs[0] = ' ';
- for (apos = k = 1; apos <= msa->alen; apos++)
- if (matassign[apos]) hmm->cs[k++] = msa->ss_cons[apos-1];
- hmm->cs[k] = '\0';
- hmm->flags |= p7H_CS;
- }
-
- /* Surface accessibility annotation */
- if (msa->sa_cons != NULL) {
- ESL_ALLOC_WITH_TYPE(hmm->ca, char*, sizeof(char) * (hmm->M+2));
- hmm->ca[0] = ' ';
- for (apos = k = 1; apos <= msa->alen; apos++)
- if (matassign[apos]) hmm->ca[k++] = msa->sa_cons[apos-1];
- hmm->ca[k] = '\0';
- hmm->flags |= p7H_CA;
- }
-
- /* The alignment map (1..M in model, 1..alen in alignment) */
- ESL_ALLOC_WITH_TYPE(hmm->map, int*, sizeof(int) * (hmm->M+1));
- hmm->map[0] = 0;
- for (apos = k = 1; apos <= msa->alen; apos++)
- if (matassign[apos]) hmm->map[k++] = apos;
- hmm->flags |= p7H_MAP;
-
- return eslOK;
-
-ERROR:
- return status;
-}
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/COPYRIGHT b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/COPYRIGHT
deleted file mode 100644
index 15e80b7..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/COPYRIGHT
+++ /dev/null
@@ -1,27 +0,0 @@
-Easel - a library of C functions for biological sequence analysis
-Copyright (C) 2009 Howard Hughes Medical Institute.
-
-Additional copyrights apply to the package:
-Copyright (C) 2004-2009 Sean R. Eddy
-Copyright (C) 2006-2009 Howard Hughes Medical Institute
-
-Easel also includes pieces of copyrighted and licensed code from a
-variety of sources and contributors, as noted in the appropriate
-places in individual files. Copyright holders and contributors
-include:
-
- Free Software Foundation, Inc. configure
- Matteo Frigo aclocal.m4
- Steven G. Johnson aclocal.m4
- Stephen Moshier esl_sse.c,esl_vmx.c
- Julien Pommier esl_sse.c,esl_vmx.c
- Christophe Tournayre aclocal.m4
- David Wheeler easel.c::esl_tmpfile()
- University of Toronto esl_regexp.c
-
-The Easel library is freely modifiable and redistributable under the
-Janelia Farm Software License, a BSD license. See the file LICENSE for
-details.
-
-
-
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/LICENSE b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/LICENSE
deleted file mode 100644
index 49eacb7..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/LICENSE
+++ /dev/null
@@ -1,32 +0,0 @@
-The Janelia Farm Software License
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
- 1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in
- the documentation and/or other materials provided with the
- distribution.
-
- 3. Neither the name of the Howard Hughes Medical Institute nor the
- names of its contributors may be used to endorse or promote
- products derived from this software without specific prior written
- permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, ANY IMPLIED WARRANTIES OF MERCHANTABILITY,
-NON-INFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-REASONABLE ROYALTIES; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
-TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
-USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGE.
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/easel.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/easel.cpp
deleted file mode 100644
index 23faf87..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/easel.cpp
+++ /dev/null
@@ -1,1490 +0,0 @@
-/* Easel's foundation.
-*
-* Contents:
-* 1. Error handling conventions.
-* 2. Memory allocation/deallocation conventions.
-* 3. Standard banner for Easel miniapplications.
-* 4. Replacements for some C library functions.
-* 5. File path/name manipulation, including tmpfiles.
-* 6. Typed comparison functions.
-* 7. Commonly used background composition (iid) frequencies.
-* 11. Copyright and license.
-*
-* SRE, Tue Oct 28 08:29:17 2003 [St. Louis]
-* SVN $Id: easel.c 339 2009-05-25 15:21:48Z eddys $
-*/
-
-#include <hmmer3/easel/esl_config.h>
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#include <ctype.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#include <hmmer3/easel/easel.h>
-
-/*****************************************************************
-* 1. Error handling.
-*****************************************************************/
-
-void
-esl_exception(int code, char *file, int line, char *format, ...)
-{
- va_list argp;
- char buf[EXCEPTION_MSG_SZ];
- va_start( argp, format );
- vsnprintf( buf, EXCEPTION_MSG_SZ, format, argp );
- va_end( argp );
- throwUHMMER3Exception( buf );
-}
-
-void
-esl_fatal(const char *format, ...)
-{
- va_list argp;
- char buf[EXCEPTION_MSG_SZ];
- va_start( argp, format );
- vsnprintf( buf, EXCEPTION_MSG_SZ, format, argp );
- va_end( argp );
- throwUHMMER3Exception( buf );
-}
-/*---------------- end, error handling conventions --------------*/
-
-/*****************************************************************
-* 2. Memory allocation/deallocation conventions.
-*****************************************************************/
-
-/* Function: esl_Free2D()
-* Incept: squid's Free2DArray(), 1999.
-*
-* Purpose: Free a 2D pointer array <p>, where first dimension is
-* <dim1>. (That is, the array is <p[0..dim1-1][]>.)
-* Tolerates any of the pointers being NULL, to allow
-* sparse arrays.
-*
-* Returns: void.
-*/
-void
-esl_Free2D(void **p, int dim1)
-{
- int i;
- if (p != NULL) {
- for (i = 0; i < dim1; i++)
- if (p[i] != NULL) free(p[i]);
- free(p);
- }
- return;
-}
-
-/* Function: esl_Free3D()
-* Incept: squid's Free3DArray(), 1999.
-*
-* Purpose: Free a 3D pointer array <p>, where first and second
-* dimensions are <dim1>,<dim2>. (That is, the array is
-* <p[0..dim1-1][0..dim2-1][]>.) Tolerates any of the
-* pointers being NULL, to allow sparse arrays.
-*
-* Returns: void.
-*/
-void
-esl_Free3D(void ***p, int dim1, int dim2)
-{
- int i, j;
-
- if (p != NULL) {
- for (i = 0; i < dim1; i++)
- if (p[i] != NULL) {
- for (j = 0; j < dim2; j++)
- if (p[i][j] != NULL) free(p[i][j]);
- free(p[i]);
- }
- free(p);
- }
-}
-/*------------- end, memory allocation conventions --------------*/
-
-
-
-/*****************************************************************
-* 3. Standard banner for Easel miniapplications.
-*****************************************************************/
-
-/* Function: esl_banner()
-* Synopsis: print standard Easel application output header
-* Incept: SRE, Mon Feb 14 11:26:56 2005 [St. Louis]
-*
-* Purpose: Print the standard Easel command line application banner
-* to <fp>, constructing it from <progname> (the name of the
-* program) and a short one-line description <banner>.
-* For example,
-* <esl_banner(stdout, "compstruct", "compare RNA structures");>
-* might result in:
-*
-* \begin{cchunk}
-* # compstruct :: compare RNA structures
-* # Easel 0.1 (February 2005)
-* # Copyright (C) 2004-2007 HHMI Janelia Farm Research Campus
-* # Freely licensed under the Janelia Software License.
-* # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-* \end{cchunk}
-*
-* <progname> would typically be an application's
-* <argv[0]>, rather than a fixed string. This allows the
-* program to be renamed, or called under different names
-* via symlinks. Any path in the <progname> is discarded;
-* for instance, if <progname> is "/usr/local/bin/esl-compstruct",
-* "esl-compstruct" is used as the program name.
-*
-* Note:
-* Needs to pick up preprocessor #define's from easel.h,
-* as set by ./configure:
-*
-* symbol example
-* ------ ----------------
-* EASEL_VERSION "0.1"
-* EASEL_DATE "May 2007"
-* EASEL_COPYRIGHT "Copyright (C) 2004-2007 HHMI Janelia Farm Research Campus"
-* EASEL_LICENSE "Freely licensed under the Janelia Software License."
-*
-* Returns: (void)
-*/
-//void
-//esl_banner(FILE *fp, char *progname, char *banner)
-//{
-// char *appname = NULL;
-//
-// if (esl_FileTail(progname, FALSE, &appname) != eslOK) appname = progname;
-//
-// fprintf(fp, "# %s :: %s\n", appname, banner);
-// fprintf(fp, "# Easel %s (%s)\n", EASEL_VERSION, EASEL_DATE);
-// fprintf(fp, "# %s\n", EASEL_COPYRIGHT);
-// fprintf(fp, "# %s\n", EASEL_LICENSE);
-// fprintf(fp, "# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n");
-//
-// if (appname != NULL) free(appname);
-// return;
-//}
-//
-
-/* Function: esl_usage()
-* Synopsis: print standard Easel application usage help line
-* Incept: SRE, Wed May 16 09:04:42 2007 [Janelia]
-*
-* Purpose: Given a usage string <usage> and the name of the program
-* <progname>, output a standardized usage/help
-* message. <usage> is minimally a one line synopsis like
-* "[options] <filename>", but it may extend to multiple
-* lines to explain the command line arguments in more
-* detail. It should not describe the options; that's the
-* job of the getopts module, and its <esl_opt_DisplayHelp()>
-* function.
-*
-* This is used by the Easel miniapps, and may be useful in
-* other applications as well.
-*
-* As in <esl_banner()>, the <progname> is typically passed
-* as <argv[0]>, and any path prefix is ignored.
-*
-* For example, if <argv[0]> is </usr/local/bin/esl-compstruct>,
-* then
-*
-* \begin{cchunk}
-* esl_usage(stdout, argv[0], "[options] <trusted file> <test file>">
-* \end{cchunk}
-*
-* produces
-*
-* \begin{cchunk}
-* Usage: esl-compstruct [options] <trusted file> <test file>
-* \end{cchunk}
-*
-* Returns: (void).
-*/
-//void
-//esl_usage(FILE *fp, char *progname, char *usage)
-//{
-// char *appname = NULL;
-//
-// if (esl_FileTail(progname, FALSE, &appname) != eslOK) appname = progname;
-// fprintf(fp, "Usage: %s %s\n", appname, usage);
-// if (appname != NULL) free(appname);
-// return;
-//}
-
-
-/*-------------------- end, standard miniapp banner --------------------------*/
-
-
-
-
-/******************************************************************************
-* 4. Replacements for C library functions
-* fgets() -> esl_fgets() fgets() with dynamic allocation
-* strdup() -> esl_strdup() strdup() is not ANSI
-* strcat() -> esl_strcat() strcat() with dynamic allocation
-* strtok() -> esl_strtok() threadsafe strtok()
-* sprintf() -> esl_sprintf() sprintf() with dynamic allocation
-* strcmp() -> esl_strcmp() strcmp() tolerant of NULL strings
-* free() -> esl_free() free() tolerant of NULL pointers
-*****************************************************************************/
-
-/* Function: esl_fgets()
-* Date: SRE, Thu May 13 10:56:28 1999 [St. Louis]
-*
-* Purpose: Dynamic allocation version of fgets(),
-* capable of reading almost unlimited line lengths.
-*
-* Args: buf - ptr to a string (may be reallocated)
-* n - ptr to current allocated length of buf,
-* (may be changed)
-* fp - open file ptr for reading
-*
-* Before the first call to esl_parse_fgets(),
-* initialize buf to NULL and n to 0.
-* They're a linked pair, so don't muck with the
-* allocation of buf or the value of n while
-* you're still doing esl_parse_fgets() calls with them.
-*
-* Returns: <eslOK> on success.
-* Returns <eslEOF> on normal end-of-file.
-*
-* When <eslOK>:
-* <*buf> points to a <NUL>-terminated line from the file.
-* <*n> contains the current allocated length for <*buf>.
-*
-* Caller must free <*buf> eventually.
-*
-* Throws: <eslEMEM> on an allocation failure.
-*
-* Example: char *buf;
-* int n;
-* FILE *fp;
-*
-* fp = fopen("my_file", "r");
-* buf = NULL;
-* n = 0;
-* while (esl_fgets(&buf, &n, fp) == eslOK)
-* {
-* do stuff with buf;
-* }
-* if (buf != NULL) free(buf);
-*/
-//int
-//esl_fgets(char **buf, int *n, FILE *fp)
-//{
-// int status;
-// void *p;
-// char *s;
-// int len;
-// int pos;
-//
-// if (*n == 0)
-// {
-// ESL_ALLOC(*buf, sizeof(char) * 128);
-// *n = 128;
-// }
-//
-// /* Simple case 1. We're sitting at EOF, or there's an error.
-// * fgets() returns NULL, so we return EOF.
-// */
-// if (fgets(*buf, *n, fp) == NULL) return eslEOF;
-//
-// /* Simple case 2. fgets() got a string, and it reached EOF doing it.
-// * return success status, so caller can use
-// * the last line; on the next call we'll
-// * return the 0 for the EOF.
-// */
-// if (feof(fp)) return eslOK;
-//
-// /* Simple case 3. We got a complete string, with \n,
-// * and don't need to extend the buffer.
-// */
-// len = strlen(*buf);
-// if ((*buf)[len-1] == '\n') return eslOK;
-//
-// /* The case we're waiting for. We have an incomplete string,
-// * and we have to extend the buffer one or more times. Make
-// * sure we overwrite the previous fgets's \0 (hence +(n-1)
-// * in first step, rather than 128, and reads of 129, not 128).
-// */
-// pos = (*n)-1;
-// while (1) {
-// ESL_RALLOC(*buf, p, sizeof(char) * (*n+128));
-// *n += 128;
-// s = *buf + pos;
-// if (fgets(s, 129, fp) == NULL) return eslOK;
-// len = strlen(s);
-// if (s[len-1] == '\n') return eslOK;
-// pos += 128;
-// }
-// /*NOTREACHED*/
-// return eslOK;
-//
-// ERROR:
-// if (*buf != NULL) free(*buf);
-// *buf = NULL;
-// *n = 0;
-// return status;
-//}
-
-/* Function: esl_strdup()
-* Date: SRE, Wed May 19 17:57:28 1999 [St. Louis]
-*
-* Purpose: Makes a duplicate of string <s>, puts it in <ret_dup>.
-* Caller can pass string length <n>, if it's known,
-* to save a strlen() call; else pass -1 to have the string length
-* determined.
-*
-* Tolerates <s> being <NULL>; in which case,
-* returns <eslOK> with <*ret_dup> set to <NULL>.
-*
-* Args: s - string to duplicate (NUL-terminated)
-* n - length of string, if known; -1 if unknown.
-* ret_dup - RETURN: duplicate of <s>.
-*
-* Returns: <eslOK> on success, and <ret_dup> is valid.
-*
-* Throws: <eslEMEM> on allocation failure.
-*/
-int
-esl_strdup(const char *s, int64_t n, char **ret_dup)
-{
- int status;
- char *newStr = NULL;
-
- if (ret_dup != NULL) *ret_dup = NULL;
- if (s == NULL) return eslOK;
- if (n < 0) n = strlen(s);
-
- ESL_ALLOC_WITH_TYPE(newStr, char*, sizeof(char) * (n+1));
- strncpy(newStr, s, n);
-
- if (ret_dup != NULL) *ret_dup = newStr; else free(newStr);
- return eslOK;
-
-ERROR:
- if (newStr != NULL) free(newStr);
- if (ret_dup != NULL) *ret_dup = NULL;
- return status;
-}
-
-
-/* Function: esl_strcat()
-* Date: SRE, Thu May 13 09:36:32 1999 [St. Louis]
-*
-* Purpose: Dynamic memory version of strcat().
-* Appends <src> to the string that <dest> points to,
-* extending allocation for dest if necessary. Caller
-* can optionally provide the length of <*dest> in
-* <ldest>, and the length of <src> in <lsrc>; if
-* either of these is -1, <esl_strcat()> calls <strlen()>
-* to determine the length. Providing length information,
-* if known, accelerates the routine.
-*
-* <*dest> may be <NULL>, in which case this is equivalent
-* to a <strdup()> of <src> (that is, <*dest> is allocated
-* rather than reallocated).
-*
-* <src> may be <NULL>, in which case <dest> is unmodified.
-*
-* Note: One timing experiment (100 successive appends of
-* 1-255 char) shows esl_strcat() has about a 20%
-* overhead relative to strcat(). If optional
-* length info is passed, esl_strcat() is about 30%
-* faster than strcat().
-*
-* Args: dest - ptr to string (char **), '\0' terminated
-* ldest - length of dest, if known; or -1 if length unknown.
-* src - string to append to dest, '\0' terminated
-* lsrc - length of src, if known; or -1 if length unknown.
-*
-* Returns: <eslOK> on success; <*dest> is (probably) reallocated,
-* modified, and nul-terminated.
-*
-* Throws: <eslEMEM> on allocation failure; initial state of <dest>
-* is unaffected.
-*/
-int
-esl_strcat(char **dest, int64_t ldest, const char *src, int64_t lsrc)
-{
- void *p;
- int status;
- int64_t len1, len2;
-
- if (ldest < 0) len1 = ((*dest == NULL) ? 0 : strlen(*dest));
- else len1 = ldest;
-
- if (lsrc < 0) len2 = (( src == NULL) ? 0 : strlen(src));
- else len2 = lsrc;
-
- if (len2 == 0) return eslOK;
-
- if (*dest == NULL) ESL_ALLOC_WITH_TYPE(*dest, char*, sizeof(char) * (len2+1));
- else ESL_RALLOC_WITH_TYPE(*dest, char*, p, sizeof(char) * (len1+len2+1));
-
- memcpy((*dest)+len1, src, len2+1);
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-/* Function: esl_strtok()
-* Synopsis: Threadsafe version of C's <strtok()>
-* Date: SRE, Wed May 19 16:30:20 1999 [St. Louis]
-*
-* Purpose: Thread-safe version of <strtok()> for parsing next token in
-* a string.
-*
-* Increments <*s> while <**s> is a character in <delim>,
-* then stops; the first non-<delim> character defines the
-* beginning of a token. Increments <*s> until it reaches
-* the next delim character (or \verb+\0+); this defines the end
-* of the token, and this character is replaced with
-* \verb+\0+. <*s> is then reset to point to the next character
-* after the \verb+\0+ that was written, so successive calls can
-* extract tokens in succession. Sets <*ret_tok> to point at
-* the beginning of the token, and returns <eslOK>.
-*
-* If a token is not found -- if <*s> already points to
-* \verb+\0+, or to a string composed entirely of characters in
-* <delim> -- then returns <eslEOL>, with <*ret_tok> set to
-* <NULL>.
-*
-* <*s> cannot be a constant string, since we write \verb+\0+'s
-* to it; caller must be willing to have this string
-* modified. And since we walk <*s> through the string as we
-* parse, the caller wants to use a tmp pointer <*s>, not
-* the original string itself.
-*
-* Example:
-* char *tok;
-* char *s;
-* char buf[50] = "This is a sentence.";
-*
-* s = buf;
-* esl_strtok(&s, " ", &tok);
-* tok is "This"; s is "is a sentence."
-* esl_strtok(&s, " ", &tok);
-* tok is "is"; s is " a sentence.".
-* esl_strtok(&s, " ", &tok);
-* tok is "a"; s is "sentence.".
-* esl_strtok(&s, " ", &tok, &len);
-* tok is "sentence."; s is "\0".
-* esl_strtok(&s, " ", &tok, &len);
-* returned eslEOL; tok is NULL; s is "\0".
-*
-* Args: s - a tmp, modifiable ptr to a string
-* delim - characters that delimits tokens
-* ret_tok - RETURN: ptr to \0-terminated token
-*
-* Returns: <eslOK> on success, <*ret_tok> points to next token, and
-* <*s> points to next character following the token.
-*
-* Returns <eslEOL> on end of line; in which case <*s>
-* points to the terminal \verb+\0+ on the line, and <*ret_tok>
-* is <NULL>.
-*/
-//int
-//esl_strtok(char **s, char *delim, char **ret_tok)
-//{
-// return esl_strtok_adv(s, delim, ret_tok, NULL, NULL);
-//}
-
-
-/* Function: esl_strtok_adv()
-* Synopsis: More advanced interface to <esl_strtok()>
-* Date: SRE, Mon Oct 13 10:16:26 2008
-*
-* Purpose: Same as <esl_strtok()>, except the caller may also
-* optionally retrieve the length of the token in <*opt_toklen>,
-* and the token-ending character that was replaced by \verb+\0+
-* in <*opt_endchar>.
-*
-* Args: s - a tmp, modifiable ptr to string
-* delim - characters that delimits tokens
-* ret_tok - RETURN: ptr to \0-terminated token string
-* opt_toklen - optRETURN: length of token; pass NULL if not wanted
-* opt_endchar - optRETURN: character that was replaced by <\0>.
-*
-* Returns: <eslOK> on success, <*ret_tok> points to next token, <*s>
-* points to next character following the token,
-* <*opt_toklen> is the <strlen()> length of the token in
-* characters (excluding its terminal \verb+\0+), and <*opt_endchar>
-* is the character that got replaced by \verb+\0+ to form the token.
-*
-* Returns <eslEOL> if no token is found (end of line); in
-* which case <*s> points to the terminal \verb+\0+ on the line,
-* <*ret_tok> is <NULL>, <*opt_toklen> is 0 and
-* <*opt_endchar> is \verb+\0+.
-*/
-//int
-//esl_strtok_adv(char **s, char *delim, char **ret_tok, int *opt_toklen, char *opt_endchar)
-//{
-// char *end;
-// char *tok = *s;
-// char c = '\0';
-// int n = 0;
-// int status = eslEOL; /* unless proven otherwise */
-//
-// tok += strspn(tok, delim);
-// if (! *tok) tok = NULL; /* if *tok = 0, EOL, no token left */
-// else
-// {
-// n = strcspn(tok, delim);
-// end = tok + n;
-// if (*end == '\0') *s = end; /* a final token that extends to end of string */
-// else
-// {
-// c = *end; /* internal token: terminate with \0 */
-// *end = '\0';
-// *s = end+1;
-// }
-// status = eslOK;
-// }
-//
-// *ret_tok = tok;
-// if (opt_toklen != NULL) *opt_toklen = n;
-// if (opt_endchar != NULL) *opt_endchar = c;
-// return status;
-//}
-
-//
-///* Function: esl_sprintf()
-// * Synopsis: Dynamic allocation version of sprintf().
-// * Incept: SRE, Mon Oct 20 09:35:57 2008 [Janelia]
-// *
-// * Purpose: Like ANSI C's <sprintf()>, except the string
-// * result is dynamically allocated, and returned
-// * through <*ret_s>.
-// *
-// * Caller is responsible for free'ing <*ret_s>.
-// *
-// * As a special case to facilitate some optional string
-// * initializations, if <format> is <NULL>, <*ret_s> is set
-// * to <NULL>.
-// *
-// * Returns: <eslOK> on success, and <*ret_s> is the resulting
-// * string.
-// *
-// * Throws: <eslEMEM> on allocation failure.
-// * <eslESYS> if a <*printf()> library call fails.
-// */
-//int
-//esl_sprintf(char **ret_s, const char *format, ...)
-//{
-// va_list ap;
-// int status;
-//
-// va_start(ap, format);
-// status = esl_vsprintf(ret_s, format, &ap);
-// va_end(ap);
-// return status;
-//}
-//
-///* Function: esl_vsprintf()
-// * Synopsis: Dynamic allocation version of vsprintf()
-// * Incept: SRE, Wed Oct 22 14:48:44 2008 [Janelia]
-// *
-// * Purpose: Like ANSI C's <vsprintf>, except the string
-// * result is dynamically allocated, and returned
-// * through <*ret_s>.
-// *
-// * Caller is responsible for free'ing <*ret_s>.
-// *
-// * As a special case to facilitate some optional string
-// * initializations, if <format> is <NULL>, <*ret_s> is set
-// * to <NULL>.
-// *
-// * Returns: <eslOK> on success, and <*ret_s> is the resulting
-// * string.
-// *
-// * Throws: <eslEMEM> on allocation failure.
-// * <eslESYS> if a <*printf()> library call fails.
-// */
-int
-esl_vsprintf(char **ret_s, const char *format, va_list *ap)
-{
- char *s = NULL;
- char *p = NULL;
- va_list ap2;
- int n1, n2;
- int status;
-
- if (format == NULL) { *ret_s = NULL; return eslOK; }
-
- va_copy(ap2, *ap);
- n1 = strlen(format) * 2; /* initial guess at string size needed */
- ESL_ALLOC_WITH_TYPE(s, char*, sizeof(char) * (n1+1));
- if ((n2 = vsnprintf(s, n1+1, format, *ap)) >= n1)
- {
- ESL_RALLOC_WITH_TYPE(s, char*, p, sizeof(char) * (n2+1));
- if (vsnprintf(s, n2+1, format, ap2) == -1) ESL_EXCEPTION(eslESYS, "vsnprintf() failed");
- }
- else if (n2 == -1) ESL_EXCEPTION(eslESYS, "vsnprintf() failed");
-
- va_end(ap2);
- *ret_s = s;
- return eslOK;
-
-ERROR:
- if (s != NULL) free(s);
- va_end(ap2);
- *ret_s = NULL;
- return status;
-}
-
-
-/* Function: esl_strcmp()
-* Synopsis: a strcmp() that treats NULL as empty string.
-* Incept: SRE, Wed Jan 21 14:11:48 2009 [Janelia]
-*
-* Purpose: A version of <strcmp()> that accepts <NULL>
-* strings. If both <s1> and <s2> are non-<NULL>
-* they are compared by <strcmp()>. If both are
-* <NULL>, return 0 (as if they are identical
-* strings). If only <s1> (or <s2>) is non-<NULL>,
-* return 1 (or -1), corresponding to ordering
-* any non-<NULL> string as greater than a <NULL>
-* string.
-*
-* (Easel routinely uses NULL to mean an unset optional
-* string, and often needs to compare two strings for
-* equality.)
-*
-* Returns: 0 if <s1 == s2>; 1 if <s1 > s2>; -1 if <s1 < s2>.
-*/
-int
-esl_strcmp(const char *s1, const char *s2)
-{
- if (s1 && s2) return strcmp(s1, s2);
- else if (s1) return 1;
- else if (s2) return -1;
- else return 0;
-}
-
-
-
-/*****************************************************************
-* Easel's optional replacements for common but non-ANSI C functions.
-* These alternatives are only compiled in when we need them,
-* and their inclusion is controlled by #define's in easel.h.
-* strcasecmp() -> may be define'd to be esl_strcasecmp()
-*/
-
-#ifndef HAVE_STRCASECMP
-/* Function: esl_strcasecmp()
-* Incept: SRE, Sat Dec 10 09:44:13 2005 [St. Louis]
-*
-* Purpose: Compare strings <s1> and <s2>. Return -1 if
-* <s1> is alphabetically less than <s2>, 0 if they
-* match, and 1 if <s1> is alphabetically greater
-* than <s2>. All matching is case-insensitive.
-*
-* Args: s1 - string 1, \0 terminated
-* s2 - string 2, \0 terminated
-*
-* Returns: -1, 0, or 1, if <s1> is less than, equal, or
-* greater than <s2>, case-insensitively.
-*
-* Throws: (no abnormal error conditions)
-*/
-int
-esl_strcasecmp(const char *s1, const char *s2)
-{
- int i, c1, c2;
-
- for (i = 0; s1[i] != '\0' && s2[i] != '\0'; i++)
- {
- c1 = s1[i]; /* total paranoia. don't trust toupper() to */
- c2 = s2[i]; /* leave the original unmodified; make a copy. */
-
- if (islower(c1)) c1 = toupper(c1);
- if (islower(c2)) c2 = toupper(c2);
-
- if (c1 < c2) return -1;
- else if (c1 > c2) return 1;
- }
-
- if (s1[i] != '\0') return 1; /* prefixes match, but s1 is longer */
- else if (s2[i] != '\0') return -1; /* prefixes match, s2 is longer */
-
- return 0; /* else, a case-insensitive match. */
-}
-#endif /* ! HAVE_STRCASECMP */
-
-
-/*****************************************************************
-* and some extra str*() functions...
-*****************************************************************/
-
-/* Function: esl_strchop()
-* Incept: SRE, Mon Apr 3 10:24:14 2006 [St. Louis]
-*
-* Purpose: Chops trailing whitespace off of a string <s> (or if <s>
-* is NULL, do nothing).
-* <n> is the length of the input string, if known; or pass <n=-1>
-* if length is unknown.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: (no abnormal error conditions)
-*
-* Xref: from squid's StringChop().
-*/
-int
-esl_strchop(char *s, int64_t n)
-{
- int i;
- if (s == NULL) return eslOK;
- if (n < 0) n = strlen(s);
- for (i = n-1; i>=0 && isspace((int) s[i]); i--);
- s[i+1] = '\0';
- return eslOK;
-}
-
-
-/* Function: esl_strdealign()
-* Synopsis: Dealign a string according to gaps in a reference aseq.
-* Incept: SRE, Thu Feb 17 15:12:26 2005 [St. Louis]
-*
-* Purpose: Dealign string <s> in place, by removing any characters
-* aligned to gaps in <aseq>. Gap characters are defined in the
-* string <gapstring>; for example, <-_.>. Optionally return the
-* unaligned length of <s> in characters in <*opt_rlen>.
-*
-* By providing a reference <aseq> to dealign against, this
-* function can dealign aligned annotation strings, such as
-* secondary structure or surface accessibility strings.
-* If <s> is the same as <aseq>, then the aligned sequence
-* itself is dealigned in place.
-*
-* To dealign both annotations and sequence, do the
-* sequence last, since you need it as the reference <aseq>
-* when doing the annotations.
-*
-* It is safe to pass a <NULL> <s> (an unset optional
-* annotation), in which case the function no-ops and
-* returns <eslOK>.
-*
-* Args: s - string to dealign
-* aseq - reference aligned sequence seq
-* gapchars - definition of gap characters ("-_." for example)
-* opt_rlen - optRETURN: number of residues in <s> after dealign
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_strdealign(char *s, const char *aseq, const char *gapchars, int64_t *opt_rlen)
-{
- int64_t n = 0;
- int64_t apos;
-
- if (s == NULL) return eslOK;
-
- for (apos = 0; aseq[apos] != '\0'; apos++)
- if (strchr(gapchars, aseq[apos]) == NULL)
- s[n++] = s[apos];
- s[n] = '\0';
-
- if (opt_rlen != NULL) *opt_rlen = n;
- return eslOK;
-}
-
-
-/*----------------- end, C library replacements -------------------------*/
-
-//
-//
-//
-///******************************************************************************
-// * 5. File path/name manipulation functions, including tmpfiles
-// *
-// * Sufficiently widespread in the modules that we make them core.
-// * (Should be moved to their own module eventually)
-// *****************************************************************************/
-//
-///* Function: esl_FileExists()
-// * Incept: SRE, Sat Jan 22 09:07:24 2005 [St. Louis]
-// *
-// * Purpose: Returns TRUE if <filename> exists, else FALSE.
-// *
-// * Note: Testing a read-only fopen() is the only portable ANSI C
-// * I'm aware of. We could also use a POSIX func here, since
-// * we have a ESL_POSIX_AUGMENTATION flag in the code.
-// *
-// * Xref: squid's FileExists().
-// */
-//int
-//esl_FileExists(const char *filename)
-//{
-// FILE *fp;
-// if ((fp = fopen(filename, "r"))) { fclose(fp); return TRUE; }
-// return FALSE;
-//}
-//
-//
-///* Function: esl_FileTail()
-// * Incept: SRE, Tue Mar 7 08:30:00 2006 [St. Louis]
-// *
-// * Purpose: Given a full pathname <path>, extract the filename
-// * without the directory path; return it via
-// * <ret_filename>. <ret_filename> space is allocated
-// * here, and must be free'd by the caller.
-// * For example:
-// * </foo/bar/baz.1> becomes <baz.1>;
-// * <foo/bar> becomes <bar>;
-// * <foo> becomes <foo>; and
-// * </> becomes the empty string.
-// *
-// * If <nosuffix> is <TRUE>, the rightmost trailing ".foo" extension
-// * is removed too. The suffix is defined as everything following
-// * the rightmost period in the filename in <path>:
-// * with <nosuffix> <TRUE>,
-// * <foo.2/bar.idx> becomes <bar>,
-// * <foo.2/bar> becomes <bar>, and
-// * <foo.2/bar.1.3> becomes <bar.1>.
-// *
-// * Args: path - full pathname to process, "/foo/bar/baz.1"
-// * nosuffix - TRUE to remove rightmost suffix from the filename
-// * ret_file - RETURN: filename portion of the path.
-// *
-// * Returns: <eslOK> on success, and <ret_file> points to a newly
-// * allocated string containing the filename.
-// *
-// * Throws: <eslEMEM> on allocation failure.
-// */
-//int
-//esl_FileTail(const char *path, int nosuffix, char **ret_file)
-//{
-// int status;
-// char *tail = NULL;
-// char *lastslash;
-// char *lastdot;
-// /* remove directory prefix */
-// lastslash = strrchr(path, eslDIRSLASH);
-// ESL_ALLOC(tail, sizeof(char) * (strlen(path)+1)); /* a little overkill */
-// if (lastslash == NULL) strcpy(tail, path);
-// else strcpy(tail, lastslash+1);
-// /* remove trailing suffix */
-// if (nosuffix) {
-// if ((lastdot = strrchr(tail, '.')) != NULL)
-// *lastdot = '\0';
-// }
-// *ret_file = tail;
-// return eslOK;
-//
-// ERROR:
-// if (tail != NULL) free(tail);
-// *ret_file = NULL;
-// return status;
-//}
-//
-///* Function: esl_FileConcat()
-// * Incept: SRE, Sat Jan 22 07:28:46 2005 [St. Louis]
-// *
-// * Purpose: Concatenates directory path prefix <dir> and a filename
-// * <file>, and returns the new full pathname through
-// * <ret_path>. If <dir> does not already end in the
-// * appropriate delimiter (e.g. / for UNIX), one is added.
-// *
-// * If <dir> is NULL, then <ret_path> is just the same as
-// * <file>. Similarly, if <file> already appears to be a
-// * full path (because its first character is a /), then
-// * <dir> is ignored and <ret_path> is the same as
-// * <file>. It wouldn't normally make sense for a caller to
-// * call this function with such arguments.
-// *
-// * <file> may be a relative path. For example,
-// * if <dir> is "/usr/local" and <file> is "lib/myapp/data",
-// * <ret_path> will be "/usr/local/lib/myapp/data".
-// *
-// * Returns: <eslOK> on success, and puts the path
-// * in <ret_path>; this string is allocated here,
-// * and must be free'd by caller with <free()>.
-// *
-// * Throws: <eslEMEM> on allocation failure.
-// * <eslEINVAL> on bad argument.
-// * In either case, <ret_path> is returned NULL.
-// *
-// * Xref: squid's FileConcat().
-// */
-//int
-//esl_FileConcat(const char *dir, const char *file, char **ret_path)
-//{
-// char *path = NULL;
-// int nd, nf;
-// int status;
-//
-// if (ret_path != NULL) *ret_path = NULL;
-// if (file == NULL) ESL_EXCEPTION(eslEINVAL, "null file");
-//
-// nd = (dir != NULL)? strlen(dir) : 0;
-// nf = strlen(file);
-// ESL_ALLOC(path, sizeof(char) * (nd+nf+2));
-//
-// if (dir == NULL) /* 1. silly caller didn't give a path */
-// strcpy(path, file);
-// else if (*file == eslDIRSLASH) /* 2. <file> is already a path? */
-// strcpy(path, file);
-// else if (dir[nd-1] == eslDIRSLASH) /* 3. <dir><file> (dir is / terminated) */
-// sprintf(path, "%s%s", dir, file);
-// else /* 4. <dir>/<file> (usual case) */
-// sprintf(path, "%s%c%s", dir, eslDIRSLASH, file);
-//
-// *ret_path = path;
-// return eslOK;
-//
-// ERROR:
-// if (path != NULL) free(path);
-// if (ret_path != NULL) *ret_path = NULL;
-// return status;
-//}
-//
-//
-///* Function: esl_FileNewSuffix()
-// * Incept: SRE, Sat Jan 22 10:04:08 2005 [St. Louis]
-// *
-// * Purpose: Add a file suffix <sfx> to <filename>; or if <filename>
-// * already has a suffix, replace it with <sfx>. A suffix is
-// * usually 2-4 letters following a '.' character. Returns
-// * an allocated string containing the result in <ret_newpath>.
-// *
-// * For example, if <filename> is "foo" and <sfx> is "ssi",
-// * returns "foo.ssi". If <filename> is "foo.db" and <sfx>
-// * is "idx", returns "foo.idx".
-// *
-// * Returns: <eslOK> on success, and <ret_newpath> is set
-// * string "<base_filename>.<sfx>". Caller must <free()>
-// * this string.
-// *
-// * Throws: <eslEMEM> on allocation failure.
-// *
-// * Xref: squid's FileAddSuffix().
-// */
-//int
-//esl_FileNewSuffix(const char *filename, const char *sfx, char **ret_newpath)
-//{
-// char *newStr = NULL;
-// char *lastdot;
-// int nf;
-// int status;
-//
-// if (ret_newpath != NULL) *ret_newpath = NULL;
-//
-// lastdot = strrchr(filename, '.'); /* check for suffix to replace */
-// if (lastdot != NULL &&
-// strchr(lastdot, eslDIRSLASH) != NULL)
-// lastdot = NULL; /*foo.1/filename case - don't be fooled.*/
-// nf = (lastdot == NULL)? strlen(filename) : lastdot-filename;
-//
-// ESL_ALLOC(newStr, sizeof(char) * (nf+strlen(sfx)+2)); /* '.' too */
-// strncpy(newStr, filename, nf);
-// *(newStr+nf) = '.';
-// strcpy(newStr+nf+1, sfx);
-//
-// if (ret_newpath != NULL) *ret_newpath = newStr; else free(newStr);
-// return eslOK;
-//
-// ERROR:
-// if (newStr != NULL) free(newStr);
-// if (ret_newpath != NULL) *ret_newpath = NULL;
-// return status;
-//}
-//
-//
-//
-///* Function: esl_FileEnvOpen()
-// * Incept: SRE, Sat Jan 22 08:41:48 2005 [St. Louis]
-// *
-// * Purpose: Looks for a file <fname> in a colon-separated list of
-// * directories that is configured in an environment variable
-// * <env>. The first occurrence of file <fname> in this directory
-// * list is opened read-only. The open file ptr is returned
-// * through <opt_fp>, and the full path name to the file
-// * that was opened is returned through <opt_path>.
-// * Caller can pass NULL in place of <opt_fp> or <opt_path>
-// * if it is not interested in one or both of these.
-// *
-// * Does not look in the current directory unless "." is
-// * explicitly in the directory list provided by <env>.
-// *
-// * Note: One reason to pass <opt_path> back to the caller is that
-// * sometimes we're opening the first in a group of files
-// * (for example, a database and its SSI index), and we want
-// * to make sure that after we find the main file, the
-// * caller can look for the auxiliary file(s) in exactly the
-// * same directory.
-// *
-// * Examples: % setenv BLASTDB /nfs/databases/blast-db:/nfs/databases/nr/
-// *
-// * FILE *fp;
-// * char *path;
-// * int status;
-// * status = esl_FileEnvOpen("swiss42", "BLASTDB", &fp, &path);
-// *
-// * Returns: <eslOK> on success, and provides <opt_fp> and <opt_path>;
-// * <opt_fp> is opened here, and must be <fclose()>'d by caller;
-// * <opt_path> is allocated here, and must be <free()>'d by caller.
-// *
-// * Returns <eslENOTFOUND> if the file not found in any directory,
-// * or if <env> does not contain any directories to look in.
-// *
-// * Throws: <eslEMEM> on allocation error.
-// *
-// * Xref: squid's EnvFileOpen().
-// */
-//int
-//esl_FileEnvOpen(const char *fname, const char *env, FILE **opt_fp, char **opt_path)
-//{
-// FILE *fp;
-// char *dirlist; /* :-separated list of directories */
-// char *s, *s2; /* ptrs into elems in env list */
-// char *path = NULL;
-// int np;
-// int status;
-//
-// fp = NULL;
-// if (opt_fp != NULL) *opt_fp = NULL;
-// if (opt_path != NULL) *opt_path = NULL;
-//
-// if (env == NULL) return eslENOTFOUND;
-// if ((s = getenv(env)) == NULL) return eslENOTFOUND;
-// if (esl_strdup(s, -1, &dirlist) != eslOK) return eslEMEM;
-//
-// np = strlen(fname) + strlen(s) + 2; /* upper bound on full path len */
-// ESL_ALLOC(path, sizeof(char) * np);
-//
-// s = dirlist;
-// while (s != NULL)
-// {
-// if ((s2 = strchr(s, ':')) != NULL) { *s2 = '\0'; s2++;} /* ~=strtok() */
-// sprintf(path, "%s%c%s", s, eslDIRSLASH, fname); /* // won't hurt */
-// if ((fp = fopen(path, "r")) != NULL) break;
-// s = s2;
-// }
-// if (fp == NULL) { free(path); free(dirlist); return eslENOTFOUND; }
-//
-// if (opt_path != NULL) { *opt_path = path; } else free(path);
-// if (opt_fp != NULL) { *opt_fp = fp; } else fclose(fp);
-// free(dirlist);
-// return eslOK;
-//
-// ERROR:
-// if (path != NULL) free(path);
-// if (fp != NULL) fclose(fp);
-// if (dirlist != NULL) free(dirlist);
-// if (opt_path != NULL) *opt_path = NULL;
-// if (opt_fp != NULL) *opt_fp = NULL;
-// return status;
-//}
-//
-///* Function: esl_tmpfile()
-// * Incept: SRE, Wed Sep 6 08:15:15 2006 [Janelia]
-// *
-// * Purpose: Open a secure temporary <FILE *> handle and return it in
-// * <ret_fp>. The file is opened in read-write mode (<w+b>)
-// * with permissions 0600, as an atomic operation using the
-// * POSIX <mkstemp()> function.
-// *
-// * The <basename6X> argument is a modifiable string that must
-// * end in "XXXXXX" (for example, "esltmpXXXXXX"). The
-// * <basename6X> is used to construct a unique tmpfile name.
-// *
-// * The file is opened in a standard temporary file
-// * directory. The path is obtained from the environment
-// * variable <TMPDIR>; failing that, from the environment
-// * variable <TMP>; and failing that, </tmp> is used. If the
-// * process is running <setuid> or <setgid>, then the
-// * environment variables are ignored, and the temp file is
-// * always created in </tmp>.
-// *
-// * The created tmpfile is not persistent and is not visible
-// * to a directory listing. The caller may <rewind()> the
-// * <ret_fp> and do cycles of reading and/or writing, but
-// * once the <ret_fp> is closed, the file disappears. The
-// * caller does not need to <remove()> or <unlink()> it (and
-// * in fact, cannot do so, because it does not know the
-// * tmpfile's name).
-// *
-// * This function is a secure replacement for ANSI C
-// * <tmpfile()>, which is said to be insecurely implemented on
-// * some platforms.
-// *
-// * Returns: <eslOK> on success, and now <ret_fp> points to a new <FILE *>
-// * stream for the opened tempfile.
-// *
-// * Throws: <eslESYS> if a system call (including the <mkstemp()> call)
-// * fails, and and <ret_fp> is returned NULL. One possible
-// * problem is if the temporary directory doesn't exist or
-// * is not writable. This is considered to be a system
-// * error, not a user error, so Easel handles it as an exception.
-// *
-// * Xref: STL11/85. Substantially copied from David Wheeler,
-// * "Secure Programming for Linux and Unix HOWTO",
-// * http://www.dwheeler.com/secure-programs/Secure-Programs-HOWTO/introduction.html.
-// * Copyright (C) 1999-2001 David A. Wheeler.
-// * Licensed under the MIT license; see Appendix C of the HOWTO.
-// * Thanks, David, for the clearest explanation of the issues
-// * that I've seen.
-// *
-// * I also referred to H. Chen, D. Dean, and D. Wagner,
-// * "Model checking one million lines of C code",
-// * In: Network and Distributed System Security Symposium, pp 171-185,
-// * San Diego, CA, February 2004;
-// * http://www.cs.ucdavis.edu/~hchen/paper/ndss04.pdf.
-// * Wheeler's implementation obeys Chen et al's "Property 5",
-// * governing secure use of tempfiles.
-// */
-//int
-//esl_tmpfile(char *basename6X, FILE **ret_fp)
-//{
-// char *tmpdir = NULL;
-// char *path = NULL;
-// FILE *fp = NULL;
-// int fd;
-// int status;
-// mode_t old_mode;
-//
-// /* Determine what tmp directory to use, and construct the
-// * file name.
-// */
-// if (getuid() == geteuid() && getgid() == getegid())
-// {
-// tmpdir = getenv("TMPDIR");
-// if (tmpdir == NULL) tmpdir = getenv("TMP");
-// }
-// if (tmpdir == NULL) tmpdir = "/tmp";
-// if ((status = esl_FileConcat(tmpdir, basename6X, &path)) != eslOK) goto ERROR;
-//
-// old_mode = umask(077);
-// if ((fd = mkstemp(path)) < 0) ESL_XEXCEPTION(eslESYS, "mkstemp() failed.");
-// umask(old_mode);
-// if ((fp = fdopen(fd, "w+b")) == NULL) ESL_XEXCEPTION(eslESYS, "fdopen() failed.");
-// if (unlink(path) < 0) ESL_XEXCEPTION(eslESYS, "unlink() failed.");
-//
-// *ret_fp = fp;
-// free(path);
-// return eslOK;
-//
-// ERROR:
-// if (path != NULL) free(path);
-// if (fp != NULL) fclose(fp);
-// *ret_fp = NULL;
-// return status;
-//}
-//
-///* Function: esl_tmpfile_named()
-// * Incept: SRE, Sat Nov 11 09:13:25 2006 [Janelia]
-// *
-// * Purpose: Open a persistent temporary file relative to the current
-// * working directory. The file name is constructed from the
-// * <basename6X> argument, which must be a modifiable string
-// * ending in the six characters "XXXXXX". These are
-// * replaced by a unique character string by a call to POSIX
-// * <mkstemp()>. For example, <basename6X> might be
-// * <esltmpXXXXXX> on input, and <esltmp12ab34> on return; or, to
-// * put the tmp file in a subdirectory under the current
-// * working directory, something like <my_subdir/esltmpXXXXXX>
-// * on input resulting in something like
-// * <my_subdir/esltmp12ab34> on return. The tmpfile is opened
-// * for reading and writing (in mode <w+b> with permissions
-// * 0600) and the opened <FILE *> handle is returned through
-// * <ret_fp>.
-// *
-// * The created tmpfile is persistent: it will be visible in
-// * a directory listing, and will remain after program
-// * termination unless the caller explicitly removes it by a
-// * <remove()> or <unlink()> call.
-// *
-// * To use this function securely, if you reopen the
-// * tmpfile, you must only reopen it for reading, not
-// * writing, and you must not trust the contents.
-// *
-// * Because the <basename6X> will be modified, it cannot be
-// * a string constant (especially on a picky compiler like
-// * gcc). You have to declare it with something like
-// * <char tmpfile[32] = "esltmpXXXXXX";>
-// * not
-// * <char *tmpfile = "esltmpXXXXXX";>
-// * because a compiler is allowed to make the <*tmpfile> version
-// * a constant.
-// *
-// * Returns: <eslOK> on success, <basename6X> contains the name of the
-// * tmpfile, and <ret_fp> contains a new <FILE *> stream for the
-// * opened file.
-// *
-// * <eslFAIL> on failure, and <ret_fp> is returned NULL and
-// * the contents of <basename6X> are undefined. The most
-// * common reason for a failure will be that the caller does
-// * not have write permission for the directory that
-// * <basename6X> is in. Easel handles this as a normal (user)
-// * failure, not an exception, because these permissions are
-// * most likely in the user's control (in contrast to
-// * <esl_tmpfile()>, which always uses a system <TMPDIR>
-// * that should always be user-writable on a properly
-// * configured POSIX system).
-// *
-// * Xref: STL11/85.
-// */
-//int
-//esl_tmpfile_named(char *basename6X, FILE **ret_fp)
-//{
-// FILE *fp;
-// mode_t old_mode;
-// int fd;
-//
-// *ret_fp = NULL;
-// old_mode = umask(077);
-// if ((fd = mkstemp(basename6X)) < 0) return eslFAIL;
-// umask(old_mode);
-// if ((fp = fdopen(fd, "w+b")) == NULL) return eslFAIL;
-//
-// *ret_fp = fp;
-// return eslOK;
-//}
-//
-//
-///*----------------- end of file path/name functions ------------------------*/
-
-
-/*****************************************************************
-* 6. Typed comparison routines.
-*****************************************************************/
-
-/* Function: esl_DCompare()
-* Incept: SRE, Mon Nov 6 10:11:47 2006 [Janelia]
-*
-* Purpose: Compare two floating point scalars <a> and <b> for approximate equality.
-* Return <eslOK> if equal, <eslFAIL> if not.
-*
-* Equality is defined by being within a relative
-* epsilon <tol>, as <2*fabs(a-b)/(a+b)> $\leq$ <tol>.
-* Additionally, we catch the special cases where <a>
-* and/or <b> are 0 or -0. If both are, return <eslOK>; if
-* one is, check that the absolute value of the other is
-* $\leq$ <tol>.
-*
-* <esl_DCompare()> and <esl_FCompare()> work on <double> and <float>
-* scalars, respectively.
-*/
-int
-esl_DCompare(double a, double b, double tol)
-{
- if (std::isinf(a) && std::isinf(b)) return eslOK;
- if (std::isnan(a) && std::isnan(b)) return eslOK;
- if (!isfin( a ) || !isfin( b ) ) return eslFAIL;
- if (a == b) return eslOK;
- if (fabs(a) == 0. && fabs(b) <= tol) return eslOK;
- if (fabs(b) == 0. && fabs(a) <= tol) return eslOK;
- if (2.*fabs(a-b) / fabs(a+b) <= tol) return eslOK;
- return eslFAIL;
-}
-int
-esl_FCompare(float a, float b, float tol)
-{
- if (std::isinf(a) && std::isinf(b)) return eslOK;
- if (std::isnan(a) && std::isnan(b)) return eslOK;
- if (!isfin( a ) || !isfin( b ) ) return eslFAIL;
- if (a == b) return eslOK;
- if (fabs(a) == 0. && fabs(b) <= tol) return eslOK;
- if (fabs(b) == 0. && fabs(a) <= tol) return eslOK;
- if (2.*fabs(a-b) / fabs(a+b) <= tol) return eslOK;
- return eslFAIL;
-}
-
-/* Function: esl_CCompare()
-* Synopsis: Compare two optional strings for equality.
-* Incept: SRE, Wed Jun 13 10:25:06 2007 [Janelia]
-*
-* Purpose: Compare two optional strings <s1> and <s2>
-* for equality.
-*
-* If they're non-<NULL> and identical up to their
-* <NUL>-terminator, return <eslOK>.
-*
-* If they're both <NULL> (unset), return <eslOK>.
-*
-* Otherwise, they're not identical; return <eslFAIL>.
-*/
-int
-esl_CCompare(char *s1, char *s2)
-{
- if (s1 == NULL && s2 == NULL) return eslOK;
- if (s1 == NULL || s2 == NULL) return eslFAIL;
- if (strcmp(s1, s2) != 0) return eslFAIL;
- return eslOK;
-}
-
-
-/*-------------- end, typed comparison routines --------------------*/
-
-
-
-
-/*****************************************************************
-* 7. Commonly used background composition (iid) frequencies.
-*****************************************************************/
-
-/* Function: esl_composition_BL62()
-* Incept: SRE, Fri Apr 13 16:00:34 2007 [Janelia]
-*
-* Purpose: Sets <f> to the background frequencies used in
-* \citep{Henikoff92} to calculate the BLOSUM62
-* substitution matrix. Caller provides space in <f>
-* allocated for at least 20 doubles. The entries are in
-* alphabetic order A..Y, same as the standard Easel amino
-* acid alphabet order.
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_composition_BL62(double *f)
-{
- f[0] = 0.074;
- f[1] = 0.025;
- f[2] = 0.054;
- f[3] = 0.054;
- f[4] = 0.047;
- f[5] = 0.074;
- f[6] = 0.026;
- f[7] = 0.068;
- f[8] = 0.058;
- f[9] = 0.099;
- f[10] = 0.025;
- f[11] = 0.045;
- f[12] = 0.039;
- f[13] = 0.034;
- f[14] = 0.052;
- f[15] = 0.057;
- f[16] = 0.051;
- f[17] = 0.073;
- f[18] = 0.013;
- f[19] = 0.032;
- return eslOK;
-}
-
-/* Function: esl_composition_WAG()
-* Incept: SRE, Fri Apr 13 16:02:48 2007 [Janelia]
-*
-* Purpose: Sets <f> to the background frequencies used in
-* \citep{WhelanGoldman01} to calculate the WAG rate
-* matrix. Caller provides space in <f> allocated for at
-* least 20 doubles. The entries are in alphabetic order
-* A..Y, same as the standard Easel amino acid alphabet
-* order.
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_composition_WAG(double *f)
-{
- f[0] = 0.086628; /* A */
- f[1] = 0.019308; /* C */
- f[2] = 0.057045; /* D */
- f[3] = 0.058059; /* E */
- f[4] = 0.038432; /* F */
- f[5] = 0.083252; /* G */
- f[6] = 0.024431; /* H */
- f[7] = 0.048466; /* I */
- f[8] = 0.062029; /* K */
- f[9] = 0.086209; /* L */
- f[10] = 0.019503; /* M */
- f[11] = 0.039089; /* N */
- f[12] = 0.045763; /* P */
- f[13] = 0.036728; /* Q */
- f[14] = 0.043972; /* R */
- f[15] = 0.069518; /* S */
- f[16] = 0.061013; /* T */
- f[17] = 0.070896; /* V */
- f[18] = 0.014386; /* W */
- f[19] = 0.035274; /* Y */
- return eslOK;
-}
-
-/* Function: esl_composition_SW34()
-* Incept: SRE, Fri Apr 13 16:03:46 2007 [Janelia]
-*
-* Purpose: Sets <f> to the background frequencies observed in
-* Swissprot release 34 (21.2M residues). Caller provides
-* space in <f> allocated for at least 20 doubles. The
-* entries are in alphabetic order A..Y, same as the
-* standard Easel amino acid alphabet order.
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_composition_SW34(double *f)
-{
- f[0] = 0.075520; /* A */
- f[1] = 0.016973; /* C */
- f[2] = 0.053029; /* D */
- f[3] = 0.063204; /* E */
- f[4] = 0.040762; /* F */
- f[5] = 0.068448; /* G */
- f[6] = 0.022406; /* H */
- f[7] = 0.057284; /* I */
- f[8] = 0.059398; /* K */
- f[9] = 0.093399; /* L */
- f[10] = 0.023569; /* M */
- f[11] = 0.045293; /* N */
- f[12] = 0.049262; /* P */
- f[13] = 0.040231; /* Q */
- f[14] = 0.051573; /* R */
- f[15] = 0.072214; /* S */
- f[16] = 0.057454; /* T */
- f[17] = 0.065252; /* V */
- f[18] = 0.012513; /* W */
- f[19] = 0.031985; /* Y */
- return eslOK;
-}
-
-
-/* Function: esl_composition_SW50()
-* Incept: SRE, Tue Aug 26 08:42:04 2008 [Janelia]
-*
-* Purpose: Sets <f> to the background frequencies observed in
-* Swissprot release 50.8 (86.0M residues; Oct 2006).
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_composition_SW50(double *f)
-{
- f[0] = 0.0787945; /* A */
- f[1] = 0.0151600; /* C */
- f[2] = 0.0535222; /* D */
- f[3] = 0.0668298; /* E */
- f[4] = 0.0397062; /* F */
- f[5] = 0.0695071; /* G */
- f[6] = 0.0229198; /* H */
- f[7] = 0.0590092; /* I */
- f[8] = 0.0594422; /* K */
- f[9] = 0.0963728; /* L */
- f[10]= 0.0237718; /* M */
- f[11]= 0.0414386; /* N */
- f[12]= 0.0482904; /* P */
- f[13]= 0.0395639; /* Q */
- f[14]= 0.0540978; /* R */
- f[15]= 0.0683364; /* S */
- f[16]= 0.0540687; /* T */
- f[17]= 0.0673417; /* V */
- f[18]= 0.0114135; /* W */
- f[19]= 0.0304133; /* Y */
- return eslOK;
-}
-/*-------------- end, background compositions -------------------*/
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/easel.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/easel.h
deleted file mode 100644
index 4dab06e..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/easel.h
+++ /dev/null
@@ -1,377 +0,0 @@
-/* Easel's foundation.
-*
-* Core functionality of easel: errors, memory allocations, constants,
-* and configuration for portability.
-*
-* SRE, Wed Jul 7 09:43:28 2004 [St. Louis]
- * SVN $Id: easel.h 326 2009-02-28 15:49:07Z eddys $
-*/
-#ifndef eslEASEL_INCLUDED
-#define eslEASEL_INCLUDED
-
-#include "esl_config.h"
-
-#include <stdlib.h>
-#include <float.h>
-#include <stdarg.h> /* for va_list */
-#include <hmmer3/hmmer3_funcs.h>
-
-// ! CODE ADDED: compatibility with windows !
-#ifdef _WINDOWS
-typedef signed char int8_t;
-typedef unsigned char uint8_t;
-typedef signed short int16_t;
-typedef unsigned short uint16_t;
-typedef signed int int32_t;
-typedef unsigned int uint32_t;
-typedef long long int int64_t;
-typedef unsigned long long int uint64_t;
-#else
-#include <stdint.h>
-#endif // _WINDOWS
-
-#ifndef va_copy
-/* WARNING - DANGER - ASSUMES TYPICAL STACK MACHINE */
-#define va_copy(dst, src) ((void)((dst) = (src)))
-#endif // va_copy
-
-/*****************************************************************
-* Macros implementing Easel's error handling conventions
-*****************************************************************/
-/* Many objects contain a fixed length "errbuf" for failure
-* diagnostics: ESL_FAIL() and ESL_XFAIL() fill this buffer.
-*/
-#define eslERRBUFSIZE 128
-
-/* ESL_FAIL() - return an error message, without cleanup.
-* ESL_XFAIL() - return an error message, with cleanup.
-* ESL_EXCEPTION() - throwing an exception, without cleanup.
-* ESL_XEXCEPTION() - throwing an exception, with cleanup.
-*
-* The X versions (with cleanup) require the caller to have an
-* <int status> variable and a <ERROR:> goto target in scope.
-*
-* Wrapping these macros in <while(0)> loops allows a statement:
-* if (something) ESL_XEXCEPTION(code,mesg);
-* without the trailing semicolon becoming a null statement after
-* macro expansion.
-*/
-/*::cexcerpt::error_macros::begin::*/
-
-#define ESL_FAIL(code, errbuf, ...) do {\
- if (errbuf != NULL) snprintf(errbuf, eslERRBUFSIZE, __VA_ARGS__);\
- return code; }\
- while (0)
-
-#define ESL_XFAIL(code, errbuf, ...) do {\
- status = code;\
- if (errbuf != NULL) snprintf(errbuf, eslERRBUFSIZE, __VA_ARGS__);\
- goto ERROR; }\
- while (0)
-
-#define ESL_EXCEPTION(code, ...) do {\
- esl_exception(code, __FILE__, __LINE__, __VA_ARGS__);\
- return code; }\
- while (0)
-
-#define ESL_XEXCEPTION(code, ...) do {\
- status = code;\
- esl_exception(code, __FILE__, __LINE__, __VA_ARGS__);\
- goto ERROR; }\
- while (0)
-/*::cexcerpt::error_macros::end::*/
-
-
-/* Return codes for error handler
-*/
-/*::cexcerpt::statuscodes::begin::*/
-#define eslOK 0 /* no error/success */
-#define eslFAIL 1 /* failure */
-#define eslEOL 2 /* end-of-line (often normal) */
-#define eslEOF 3 /* end-of-file (often normal) */
-#define eslEOD 4 /* end-of-data (often normal) */
-#define eslEMEM 5 /* malloc or realloc failed */
-#define eslENOTFOUND 6 /* file or key not found */
-#define eslEFORMAT 7 /* file format not correct */
-#define eslEAMBIGUOUS 8 /* an ambiguity of some sort */
-#define eslEDIVZERO 9 /* attempted div by zero */
-#define eslEINCOMPAT 10 /* incompatible parameters */
-#define eslEINVAL 11 /* invalid argument/parameter */
-#define eslESYS 12 /* generic system call failure */
-#define eslECORRUPT 13 /* unexpected data corruption */
-#define eslEINCONCEIVABLE 14 /* "can't happen" error */
-#define eslESYNTAX 15 /* invalid user input syntax */
-#define eslERANGE 16 /* value out of allowed range */
-#define eslEDUP 17 /* saw a duplicate of something */
-#define eslENOHALT 18 /* a failure to converge */
-#define eslENORESULT 19 /* no result was obtained */
-#define eslENODATA 20 /* no data provided, file empty */
-#define eslETYPE 21 /* invalid type of argument */
-#define eslEOVERWRITE 22 /* attempted to overwrite data */
-#define eslENOSPACE 23 /* ran out of some resource */
-#define eslEUNIMPLEMENTED 24 /* feature is unimplemented */
-
-// ! CODE ADDED: ugene's tasks can be canceled !
-#define eslCANCELED 25 /* task is canceled */
-/*::cexcerpt::statuscodes::end::*/
-
-/*****************************************************************
-* Macros implementing Easel's memory allocation conventions
-*****************************************************************/
-/* ESL_ALLOC(), ESL_RALLOC():
-*
-* Allocation and reallocation wrappers.
-* Both require <int status> in scope, and <ERROR:> goto target.
-* ESL_RALLOC() also requires <void *> ptr to be provided as <tmp>.
-*/
-/*::cexcerpt::alloc_macros::begin::*/
-//#define ESL_ALLOC(p, size) do {\
-// if (((p) = calloc(1, size)) == NULL) {\
-// status = eslEMEM;\
-// esl_exception(eslEMEM, __FILE__, __LINE__, "calloc of size %d failed", size);\
-// goto ERROR;\
-// }} while (0)
-
-#define ESL_ALLOC_WITH_TYPE(p, ptype, size) do {\
- if (((p) = (ptype)calloc(1, size)) == NULL) {\
- status = eslEMEM;\
- esl_exception(eslEMEM, __FILE__, __LINE__, "calloc of size %d failed", size);\
- goto ERROR;\
- }} while (0)
-
-//#define ESL_RALLOC(p, tmp, newsize) do {\
-// if ((p) == NULL) { (tmp) = malloc(newsize); }\
-// else { (tmp) = realloc((p), (newsize)); }\
-// if ((tmp) != NULL) (p) = (tmp);\
-// else {\
-// status = eslEMEM;\
-// esl_exception(eslEMEM, __FILE__, __LINE__, "realloc for size %d failed", newsize);\
-// goto ERROR;\
-//}} while (0)
-
-#define ESL_RALLOC_WITH_TYPE(p, ptype, tmp, newsize) do {\
- if ((p) == NULL) { (tmp) = (ptype)malloc(newsize); }\
- else { (tmp) = (ptype)realloc((p), (newsize)); }\
- if ((tmp) != NULL) (p) = (ptype)(tmp);\
- else {\
- status = eslEMEM;\
- esl_exception(eslEMEM, __FILE__, __LINE__, "realloc for size %d failed", newsize);\
- goto ERROR;\
- }} while (0)
-
-/*::cexcerpt::alloc_macros::end::*/
-
-/*****************************************************************
-* Macros implementing Easel's function argument conventions
-*****************************************************************/
-
-#define esl_byp_IsInternal(p) ((p) == NULL)
-#define esl_byp_IsReturned(p) ((p) != NULL && (*p) == NULL)
-#define esl_byp_IsProvided(p) ((p) != NULL && (*p) != NULL)
-
-
-/*****************************************************************
-* Macros implementing Easel's debugging output conventions
-*****************************************************************/
-/* Debugging hooks, w/ three levels (1-3).
-*/
-#if eslDEBUGLEVEL >= 1 /* for ESL_DASSERT() macros */
-#include <assert.h>
-#endif
-
-#if (eslDEBUGLEVEL >= 1)
-#define ESL_DPRINTF1(x) printf x
-#define ESL_DASSERT1(x) assert x
-#else
-#define ESL_DPRINTF1(x)
-#define ESL_DASSERT1(x)
-#endif
-#if (eslDEBUGLEVEL >= 2)
-#define ESL_DPRINTF2(x) printf x
-#define ESL_DASSERT2(x) assert x
-#else
-#define ESL_DPRINTF2(x)
-#define ESL_DASSERT2(x)
-#endif
-#if (eslDEBUGLEVEL >= 3)
-#define ESL_DPRINTF3(x) printf x
-#define ESL_DASSERT3(x) assert x
-#else
-#define ESL_DPRINTF3(x)
-#define ESL_DASSERT3(x)
-#endif
-
-
-
-
-/*****************************************************************
-* Defined constants
-*****************************************************************/
-/* Making sure TRUE/FALSE are defined, for convenience */
-#ifndef TRUE
-#define TRUE 1
-#endif
-#ifndef FALSE
-#define FALSE 0
-#endif
-
-/* Some basic mathematical constants.
-* Assuming IEEE754 math with 64-bit doubles (53-bit mantissas), we
-* want 17 significant decimal digits in our constants. More is
-* a waste (but we do it for some anyway).
-*/
-#define eslCONST_E 2.71828182845904523536028747135
-#define eslCONST_PI 3.14159265358979323846264338328
-#define eslCONST_EULER 0.57721566490153286060651209008
-#define eslCONST_GOLD 1.61803398874989484820458683437
-#define eslCONST_LOG2 0.69314718055994529
-#define eslCONST_LOG2R 1.44269504088896341
-
-/* Define <eslINFINITY>, <eslNaN> portably. Harder than it looks.
-* We assume we're in an IEEE 754 environment.
-* We assume that HUGE_VAL in a IEEE754 environment is infinity.
-* If we don't have HUGE_VAL set, we assume we can get infinity
-* by division by zero. (But if we don't have HUGE_VAL, we probably
-* have other problems; HUGE_VAL is required by ANSI C spec.)
-* We can't portably get infinity by overflow (e.g. 1e9999);
-* some compilers (Microsoft) will complain.
-*/
-//#ifdef HUGE_VAL
-//#define eslINFINITY HUGE_VAL /* assume IEEE754 HUGE_VAL = infinity. ok? */
-//#else
-//#define eslINFINITY (1.0/0.0) /* portable? */
-//#endif
-//#define eslNaN (eslINFINITY/eslINFINITY) /* portably make a IEEE754 NaN */
-
-#define eslINFINITY ( infinity() )
-
-/* Define crossovers for numerical approximations.
-*/
-/* log(1+x) ~ x and 1-e^x = -x approximation.
-* Same threshold appears to be optimal for float or double x. xref STL9/138.
-*/
-#define eslSMALLX1 5e-9
-
-
-
-
-/*****************************************************************
-* Basic support for Easel's digitized biosequences.
-*****************************************************************/
-
-/* Most of this support is in the alphabet module, but we externalize
-* some into the easel foundation because ESL_INMAP is used in unaugmented
-* sqio, msa modules.
-*
-* A digital sequence residue (ESL_DSQ) is an unsigned 8-bit type
-* (0..255). A valid digital residue has a value in the range 0..127
-* (Easel can represent alphabets of up to 128 different characters).
-* Values 128..255 are reserved for flags.
-*
-* An "inmap" is ESL_DSQ[128], or *ESL_DSQ allocated for 128 values;
-* it is a many-to-one construct for mapping 7-bit ASCII chars (in
-* range 0..127) either to new ASCII chars (in the case of raw
-* sequence input in sqio, msa) or to digital codes (in the alphabet
-* module). Valid mapped values are 0..127; any value in range
-* 128..255 is some kind of flag.
-*/
-typedef uint8_t ESL_DSQ;
-#define eslDSQ_SENTINEL 255 /* sentinel bytes 0,L+1 in a dsq */
-#define eslDSQ_ILLEGAL 254 /* input symbol is unmapped and unexpected */
-#define eslDSQ_IGNORED 253 /* input symbol is unmapped and ignored */
-#define eslDSQ_EOL 252 /* input symbol marks end of a line */
-#define eslDSQ_EOD 251 /* input symbol marks end of a seq record */
-
-/* If you try to test sym > 0 && sym <= 127 below, instead of isascii(sym),
-* you'll get a compiler warning for an always-successful test regardless
-* of whether a char is signed or unsigned. So we trust that isascii() is
-* doing the Right Thing.
-*/
-#define esl_inmap_IsValid(inmap, sym) (isascii(sym) && (inmap)[(int)sym] <= 127)
-
-
-/*****************************************************************
-* Miscellaneous.
-*****************************************************************/
-/* A placeholder for helping w/ portability of filenames/paths.
-* I think, but have not tested, that:
-* VMS: #define DIRSLASH ']'
-* MacOS: #define DIRSLASH ':'
-* DOS: #define DIRSLASH '\\'
-* Setting DIRSLASH correctly is probably not the only thing
-* that would need to be done to port to other OS's, but it's
-* probably a start.
-*
-* The code assumes that '.' is used for file name extensions,
-* such as "foo.bar".
-*
-* This gets used in easel.c's *_File*() functions.
-*/
-#define eslDIRSLASH '/' /* UNIX directory paths have /foo/bar */
-
-/* Some generic macros for swapping, min, and max.
-*/
-#define ESL_SWAP(x, y, type) do { type tmpxyz = (x); (x) = (y); (y) = tmpxyz; } while (0)
-#define ESL_MIN(a,b) (((a)<(b))?(a):(b))
-#define ESL_MAX(a,b) (((a)>(b))?(a):(b))
-
-
-
-/*****************************************************************
-* The API declarations for easel.c
-*****************************************************************/
-
-/* 1. Error handling. */
-//typedef void (*esl_exception_handler_f)(int code, char *file, int line,
-// char *format, va_list argp);
-extern void esl_exception(int code, char *file, int line, char *format, ...);
-//extern void esl_exception_SetHandler(esl_exception_handler_f);
-//extern void esl_exception_ResetDefaultHandler(void);
-extern void esl_fatal(const char *format, ...);
-//extern void esl_nonfatal_handler(int code, char *file, int line, char *format, va_list argp);
-
-/* 2. Memory allocation/deallocation conventions. */
-extern void esl_Free2D(void **p, int dim1);
-extern void esl_Free3D(void ***p, int dim1, int dim2);
-
-/* 3. Standard banner for Easel miniapplications. */
-//extern void esl_banner(FILE *fp, char *progname, char *banner);
-//extern void esl_usage (FILE *fp, char *progname, char *usage);
-
-/* 4. Replacements, additions to C library functions */
-extern int esl_strdup(const char *s, int64_t n, char **ret_dup);
-extern int esl_strcat(char **dest, int64_t ldest, const char *src, int64_t lsrc);
-//extern int esl_fgets(char **buf, int *n, FILE *fp);
-//extern int esl_strtok (char **s, char *delim, char **ret_tok);
-//extern int esl_strtok_adv(char **s, char *delim, char **ret_tok, int *opt_toklen, char *opt_endchar);
-//extern int esl_sprintf (char **ret_s, const char *format, ...);
-extern int esl_vsprintf(char **ret_s, const char *format, va_list *ap);
-extern int esl_strcmp(const char *s1, const char *s2);
-extern int esl_strchop(char *s, int64_t n);
-extern int esl_strdealign(char *s, const char *aseq, const char *gapchars, int64_t *opt_rlen);
-
-extern int esl_strcasecmp(const char *s1, const char *s2);
-
-/* 5. File path/name manipulation functions, including tmpfiles */
-//extern int esl_FileExists(const char *filename);
-//extern int esl_FileTail(const char *path, int nosuffix, char **ret_file);
-//extern int esl_FileConcat(const char *dir, const char *file, char **ret_path);
-//extern int esl_FileNewSuffix(const char *filename, const char *sfx, char **ret_newpath);
-//extern int esl_FileEnvOpen(const char *fname, const char *env,
-// FILE **ret_fp, char **ret_path);
-//extern int esl_tmpfile(char *basename6X, FILE **ret_fp);
-//extern int esl_tmpfile_named(char *basename6X, FILE **ret_fp);
-
-/* 6. Typed comparison routines. */
-extern int esl_DCompare(double a, double b, double tol);
-extern int esl_FCompare(float a, float b, float tol);
-//extern int esl_CCompare(char *s1, char *s2);
-
-/* 7. Commonly used background composition (iid) frequencies. */
-//extern int esl_composition_BL62(double *f);
-extern int esl_composition_WAG (double *f);
-//extern int esl_composition_SW34(double *f);
-//extern int esl_composition_SW50(double *f);
-
-
-#endif /*eslEASEL_INCLUDED*/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_alphabet.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_alphabet.cpp
deleted file mode 100644
index d33725e..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_alphabet.cpp
+++ /dev/null
@@ -1,1583 +0,0 @@
-/* Implements the standard digitized alphabets for biosequences.
-*
-* 1. ESL_ALPHABET object for digital alphabets.
-* 2. Digitized sequences (ESL_DSQ *).
-* 3. Other routines in the API.
-* 7. Copyright notice and license.
-*
- * SVN $Id: esl_alphabet.c 393 2009-09-27 12:04:55Z eddys $
-* SRE, Tue Dec 7 13:49:43 2004
-*/
-
-#include <hmmer3/easel/esl_config.h>
-
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <math.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_alphabet.h>
-#include <hmmer3/hmmer.h>
-
-/*****************************************************************
-* 1. The ESL_ALPHABET object
-*****************************************************************/
-
-static ESL_ALPHABET *create_rna(void);
-static ESL_ALPHABET *create_dna(void);
-static ESL_ALPHABET *create_amino(void);
-static ESL_ALPHABET *create_coins(void);
-static ESL_ALPHABET *create_dice(void);
-
-/* Function: esl_alphabet_Create()
-* Synopsis: Create alphabet of a standard type.
-* Incept: SRE, Mon Dec 20 10:21:54 2004 [Zaragoza]
-*
-* Purpose: Creates one of the three standard bio alphabets:
-* <eslDNA>, <eslRNA>, or <eslAMINO>, and returns
-* a pointer to it.
-*
-* Args: type - <eslDNA>, <eslRNA>, or <eslAMINO>.
-*
-* Returns: pointer to the new alphabet.
-*
-* Throws: <NULL> if any allocation or initialization fails.
-*/
-ESL_ALPHABET *
-esl_alphabet_Create(int type)
-{
- int status;
- ESL_ALPHABET *a;
-
- switch(type) {
- case eslRNA: a = create_rna(); break;
- case eslDNA: a = create_dna(); break;
- case eslAMINO: a = create_amino(); break;
- case eslCOINS: a = create_coins(); break;
- case eslDICE: a = create_dice(); break;
- default:
- ESL_XEXCEPTION(eslEINVAL, "bad alphabet type: unrecognized");
- }
- return a;
-
-ERROR:
- return NULL;
-}
-
-/* Function: esl_alphabet_CreateCustom()
-* Synopsis: Create a custom alphabet.
-* Incept: SRE, Mon Dec 20 09:18:28 2004 [Zaragoza]
-*
-* Purpose: Creates a customized biosequence alphabet,
-* and returns a ptr to it. The alphabet type is set
-* to <eslNONSTANDARD>.
-*
-* <alphabet> is the internal alphabet string;
-* <K> is the size of the base alphabet;
-* <Kp> is the total size of the alphabet string.
-*
-* In the alphabet string, residues <0..K-1> are the base alphabet;
-* residue <K> is the canonical gap (indel) symbol;
-* residues <K+1..Kp-4> are additional degeneracy symbols (possibly 0 of them);
-* residue <Kp-3> is an "any" symbol (such as N or X);
-* residue <Kp-2> is a "nonresidue" symbol (such as *);
-* and residue <Kp-1> is a "missing data" gap symbol.
-*
-* The two gap symbols, the nonresidue, and the "any"
-* symbol are mandatory even for nonstandard alphabets, so
-* <Kp> $\geq$ <K+4>.
-*
-* Args: alphabet - internal alphabet; example "ACGT-RYMKSWHBVDN*~"
-* K - base size; example 4
-* Kp - total size, including gap, degeneracies; example 18
-*
-* Returns: pointer to new <ESL_ALPHABET> structure.
-*
-* Throws: <NULL> if any allocation or initialization fails.
-*/
-ESL_ALPHABET *
-esl_alphabet_CreateCustom(const char *alphabet, int K, int Kp)
-{
- ESL_ALPHABET *a;
- int c,x,y;
- int status;
-
- /* Argument checks.
- */
- if (strlen(alphabet) != Kp) ESL_XEXCEPTION(eslEINVAL, "alphabet length != Kp");
- if (Kp < K+4) ESL_XEXCEPTION(eslEINVAL, "Kp too small in alphabet");
-
- /* Allocation/init, level 1.
- */
- ESL_ALLOC_WITH_TYPE(a, ESL_ALPHABET*, sizeof(ESL_ALPHABET));
- a->sym = NULL;
- a->degen = NULL;
- a->ndegen = NULL;
-
- /* Allocation/init, level 2.
- */
- ESL_ALLOC_WITH_TYPE(a->sym, char*, sizeof(char) * (Kp+1));
- ESL_ALLOC_WITH_TYPE(a->degen, char**, sizeof(char *) * Kp);
- ESL_ALLOC_WITH_TYPE(a->ndegen, int*, sizeof(int) * Kp);
- a->degen[0] = NULL;
-
- /* Allocation/init, level 3.
- */
- ESL_ALLOC_WITH_TYPE(a->degen[0], char*, sizeof(char) * (Kp*K));
- for (x = 1; x < Kp; x++)
- a->degen[x] = a->degen[0]+(K*x);
-
- /* Initialize the internal alphabet:
- */
- a->type = eslNONSTANDARD;
- a->K = K;
- a->Kp = Kp;
- strcpy(a->sym, alphabet);
-
- /* Initialize the input map, mapping ASCII seq chars to digital codes,
- * and eslDSQ_ILLEGAL for everything else.
- */
- for (c = 0; c < 128; c++) a->inmap[c] = eslDSQ_ILLEGAL;
- for (x = 0; x < a->Kp; x++) a->inmap[(int) a->sym[x]] = x;
-
- /* Initialize the degeneracy map:
- * Base alphabet (first K syms) are automatically
- * mapped uniquely; (Kp-3) is assumed to be
- * the "any" character; other degen chars (K+1..Kp-4) are
- * unset; gap, nonresidue, missing character are unmapped (ndegen=0)
- */
- for (x = 0; x < a->Kp; x++) /* clear everything */
- {
- a->ndegen[x] = 0;
- for (y = 0; y < a->K; y++) a->degen[x][y] = 0;
- }
- for (x = 0; x < a->K; x++) /* base alphabet */
- {
- a->ndegen[x] = 1;
- a->degen[x][x] = 1;
- }
- /* "any" character */
- a->ndegen[Kp-3] = K;
- for (x = 0; x < a->K; x++) a->degen[Kp-3][x] = 1;
-
- a->complement = NULL;
- return a;
-
-ERROR:
- esl_alphabet_Destroy(a);
- return NULL;
-}
-
-
-/* define_complementarity()
-* Builds the "complement" lookup table for DNA, RNA alphabets.
-*/
-static int
-define_complementarity(ESL_ALPHABET *a)
-{
- int status;
-
- ESL_ALLOC_WITH_TYPE(a->complement, ESL_DSQ*, sizeof(ESL_DSQ) * a->Kp);
- a->complement[0] = 3; /* A->T */
- a->complement[1] = 2; /* C->G */
- a->complement[2] = 1; /* G->C */
- a->complement[3] = 0; /* T->A */
- a->complement[4] = 4; /* - - */
- a->complement[5] = 6; /* R->Y */
- a->complement[6] = 5; /* Y->R */
- a->complement[7] = 8; /* M->K */
- a->complement[8] = 7; /* K->M */
- a->complement[9] = 9; /* S S */
- a->complement[10]= 10; /* W W */
- a->complement[11]= 14; /* H->D */
- a->complement[12]= 13; /* B->V */
- a->complement[13]= 12; /* V->B */
- a->complement[14]= 11; /* D->H */
- a->complement[15]= 15; /* N N */
- a->complement[16]= 16; /* ~ ~ */
- return eslOK;
-
-ERROR:
- return status;
-}
-
-/* create_rna():
-* Creates a standard RNA alphabet.
-*/
-static ESL_ALPHABET *
-create_rna(void)
-{
- ESL_ALPHABET *a = NULL;
-
- /* Create the fundamental alphabet
- */
- if ((a = esl_alphabet_CreateCustom("ACGU-RYMKSWHBVDN*~", 4, 18)) == NULL) return NULL;
- a->type = eslRNA;
-
- /* Add desired synonyms in the input map.
- */
- esl_alphabet_SetEquiv(a, 'T', 'U'); /* read T as a U */
- esl_alphabet_SetEquiv(a, 'X', 'N'); /* read X as an N (many seq maskers use X) */
- esl_alphabet_SetEquiv(a, '_', '-'); /* allow _ as a gap too */
- esl_alphabet_SetEquiv(a, '.', '-'); /* allow . as a gap too */
- esl_alphabet_SetCaseInsensitive(a); /* allow lower case input */
-
- /* Define degenerate symbols.
- */
- esl_alphabet_SetDegeneracy(a, 'R', "AG");
- esl_alphabet_SetDegeneracy(a, 'Y', "CU");
- esl_alphabet_SetDegeneracy(a, 'M', "AC");
- esl_alphabet_SetDegeneracy(a, 'K', "GU");
- esl_alphabet_SetDegeneracy(a, 'S', "CG");
- esl_alphabet_SetDegeneracy(a, 'W', "AU");
- esl_alphabet_SetDegeneracy(a, 'H', "ACU");
- esl_alphabet_SetDegeneracy(a, 'B', "CGU");
- esl_alphabet_SetDegeneracy(a, 'V', "ACG");
- esl_alphabet_SetDegeneracy(a, 'D', "AGU");
-
- if (define_complementarity(a) != eslOK) return NULL;
-
- return a;
-}
-
-
-/* create_dna():
-* creates and returns a standard DNA alphabet.
-*/
-static ESL_ALPHABET *
-create_dna(void)
-{
- ESL_ALPHABET *a = NULL;
-
- /* Create the fundamental alphabet.
- */
- if ((a = esl_alphabet_CreateCustom("ACGT-RYMKSWHBVDN*~", 4, 18)) == NULL) return NULL;
- a->type = eslDNA;
-
- /* Add desired synonyms in the input map.
- */
- esl_alphabet_SetEquiv(a, 'U', 'T'); /* read U as a T */
- esl_alphabet_SetEquiv(a, 'X', 'N'); /* read X as an N (many seq maskers use X) */
- esl_alphabet_SetEquiv(a, '_', '-'); /* allow _ as a gap too */
- esl_alphabet_SetEquiv(a, '.', '-'); /* allow . as a gap too */
- esl_alphabet_SetCaseInsensitive(a); /* allow lower case input */
-
- /* Define IUBMB degenerate symbols other than the N.
- */
- esl_alphabet_SetDegeneracy(a, 'R', "AG");
- esl_alphabet_SetDegeneracy(a, 'Y', "CT");
- esl_alphabet_SetDegeneracy(a, 'M', "AC");
- esl_alphabet_SetDegeneracy(a, 'K', "GT");
- esl_alphabet_SetDegeneracy(a, 'S', "CG");
- esl_alphabet_SetDegeneracy(a, 'W', "AT");
- esl_alphabet_SetDegeneracy(a, 'H', "ACT");
- esl_alphabet_SetDegeneracy(a, 'B', "CGT");
- esl_alphabet_SetDegeneracy(a, 'V', "ACG");
- esl_alphabet_SetDegeneracy(a, 'D', "AGT");
-
- if (define_complementarity(a) != eslOK) return NULL;
- return a;
-}
-
-
-/* create_amino():
-* Creates a new standard amino acid alphabet.
-*/
-static ESL_ALPHABET *
-create_amino(void)
-{
- ESL_ALPHABET *a = NULL;
-
- /* Create the internal alphabet
- */
- if ((a = esl_alphabet_CreateCustom("ACDEFGHIKLMNPQRSTVWY-BJZOUX*~", 20, 29)) == NULL) return NULL;
- a->type = eslAMINO;
-
- /* Add desired synonyms in the input map.
- */
- esl_alphabet_SetEquiv(a, '_', '-'); /* allow _ as a gap too */
- esl_alphabet_SetEquiv(a, '.', '-'); /* allow . as a gap too */
- esl_alphabet_SetCaseInsensitive(a); /* allow lower case input */
-
- /* Define IUPAC degenerate symbols other than the X.
- */
- esl_alphabet_SetDegeneracy(a, 'B', "ND");
- esl_alphabet_SetDegeneracy(a, 'J', "IL");
- esl_alphabet_SetDegeneracy(a, 'Z', "QE");
-
- /* Define unusual residues as one-to-one degeneracies.
- */
- esl_alphabet_SetDegeneracy(a, 'U', "C"); /* selenocysteine is scored as cysteine */
- esl_alphabet_SetDegeneracy(a, 'O', "K"); /* pyrrolysine is scored as lysine */
-
- return a;
-}
-
-
-/* create_coins():
-* Creates a toy alphabet for coin examples
-*/
-static ESL_ALPHABET *
-create_coins(void)
-{
- ESL_ALPHABET *a = NULL;
-
- /* Create the internal alphabet
- */
- if ((a = esl_alphabet_CreateCustom("HT-X*~", 2, 6)) == NULL) return NULL;
- a->type = eslCOINS;
-
- /* Add desired synonyms in the input map.
- */
- esl_alphabet_SetEquiv(a, '_', '-'); /* allow _ as a gap too */
- esl_alphabet_SetEquiv(a, '.', '-'); /* allow . as a gap too */
- esl_alphabet_SetCaseInsensitive(a); /* allow lower case input */
-
- /* There are no degeneracies in the coin alphabet. */
-
- return a;
-}
-
-/* create_dice():
-* Creates a toy alphabet for dice examples
-*/
-static ESL_ALPHABET *
-create_dice(void)
-{
- ESL_ALPHABET *a = NULL;
-
- /* Create the internal alphabet
- */
- if ((a = esl_alphabet_CreateCustom("123456-X*~", 6, 10)) == NULL) return NULL;
- a->type = eslCOINS;
-
- /* Add desired synonyms in the input map.
- */
- esl_alphabet_SetEquiv(a, '_', '-'); /* allow _ as a gap too */
- esl_alphabet_SetEquiv(a, '.', '-'); /* allow . as a gap too */
- esl_alphabet_SetCaseInsensitive(a); /* allow lower case input */
-
- /* There are no degeneracies in the dice alphabet. */
-
- return a;
-}
-
-
-
-/* Function: esl_alphabet_SetEquiv()
-* Synopsis: Define an equivalent symbol.
-* Incept: SRE, Mon Dec 20 10:40:33 2004 [Zaragoza]
-*
-* Purpose: Maps an additional input alphabetic symbol <sym> to
-* an internal alphabet symbol <c>; for example,
-* we might map T to U for an RNA alphabet, so that we
-* allow for reading input DNA sequences.
-*
-* Args: sym - symbol to allow in the input alphabet; 'T' for example
-* c - symbol to map <sym> to in the internal alphabet; 'U' for example
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEINVAL> if <c> is not in the internal alphabet, or if <sym> is.
-*/
-int
-esl_alphabet_SetEquiv(ESL_ALPHABET *a, char sym, char c)
-{
- char *sp = NULL;
- ESL_DSQ x;
-
- /* Contract checks */
- if ((sp = strchr(a->sym, sym)) != NULL)
- ESL_EXCEPTION(eslEINVAL, "symbol %c is already in internal alphabet, can't equivalence it", sym);
- if ((sp = strchr(a->sym, c)) == NULL)
- ESL_EXCEPTION(eslEINVAL, "char %c not in the alphabet, can't map to it", c);
-
- x = sp - a->sym;
- a->inmap[(int) sym] = x;
- return eslOK;
-}
-
-/* Function: esl_alphabet_SetCaseInsensitive()
-* Synopsis: Make an alphabet's input map case-insensitive.
-* Incept: SRE, Mon Dec 20 15:31:12 2004 [Zaragoza]
-*
-* Purpose: Given a custom alphabet <a>, with all equivalences set,
-* make the input map case-insensitive: for every
-* letter that is mapped in either lower or upper
-* case, map the other case to the same internal
-* residue.
-*
-* For the standard alphabets, this is done automatically.
-*
-* Args: a - alphabet to make case-insensitive.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslECORRUPT> if any lower/uppercase symbol pairs
-* are already both mapped to different symbols.
-*/
-int
-esl_alphabet_SetCaseInsensitive(ESL_ALPHABET *a)
-{
- int lc, uc;
-
- for (lc = 'a'; lc <= 'z'; lc++)
- {
- uc = toupper(lc);
-
- if (esl_abc_CIsValid(a, lc) && ! esl_abc_CIsValid(a, uc)) a->inmap[uc] = a->inmap[lc];
- else if (esl_abc_CIsValid(a, uc) && ! esl_abc_CIsValid(a, lc)) a->inmap[lc] = a->inmap[uc];
- else if (esl_abc_CIsValid(a, lc) && esl_abc_CIsValid(a, uc) && a->inmap[uc] != a->inmap[lc])
- ESL_EXCEPTION(eslECORRUPT, "symbols %c and %c map differently already (%c vs. %c)",
- lc, uc, a->inmap[lc], a->inmap[uc]);
- }
- return eslOK;
-}
-
-/* Function: esl_alphabet_SetDegeneracy()
-* Synopsis: Define degenerate symbol in custom alphabet.
-* Incept: SRE, Mon Dec 20 15:42:23 2004 [Zaragoza]
-*
-* Purpose: Given an alphabet under construction,
-* define the degenerate character <c> to mean
-* any of the characters in the string <ds>.
-*
-* <c> must exist in the digital alphabet, as
-* one of the optional degenerate residues (<K+1>..<Kp-3>).
-* All the characters in the <ds> string must exist
-* in the canonical alphabet (<0>..<K-1>).
-*
-* You may not redefine the mandatory all-degenerate character
-* (typically <N> or <X>; <Kp-3> in the digital alphabet).
-* It is defined automatically in all alphabets.
-*
-* Args: a - an alphabet under construction.
-* c - degenerate character code; example: 'R'
-* ds - string of base characters for c; example: "AG"
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEINVAL> if <c> or <ds> arguments aren't valid.
-*/
-int
-esl_alphabet_SetDegeneracy(ESL_ALPHABET *a, char c, char *ds)
-{
- char *sp;
- ESL_DSQ x,y;
-
- if ((sp = strchr(a->sym, c)) == NULL)
- ESL_EXCEPTION(eslEINVAL, "no such degenerate character");
- x = sp - a->sym;
-
- /* A degenerate character must have code K+1..Kp-4.
- * Kp-3, the all-degenerate character, is automatically
- * created, and can't be remapped.
- */
- if (x == a->Kp-3)
- ESL_EXCEPTION(eslEINVAL, "can't redefine all-degenerate char %c", c);
- if (x < a->K+1 || x >= a->Kp-2)
- ESL_EXCEPTION(eslEINVAL, "char %c isn't in expected position in alphabet", c);
-
- while (*ds != '\0') {
- if ((sp = strchr(a->sym, *ds)) == NULL) ESL_EXCEPTION(eslEINVAL, "no such base character");
- y = sp - a->sym;
- if (! esl_abc_XIsCanonical(a, y)) ESL_EXCEPTION(eslEINVAL, "can't map degeneracy to noncanonical character");
-
- a->degen[x][y] = 1;
- a->ndegen[x]++;
- ds++;
- }
- return eslOK;
-}
-
-
-/* Function: esl_alphabet_SetIgnored()
-* Synopsis: Define a set of characters to be ignored in input.
-* Incept: SRE, Tue Sep 19 15:08:27 2006 [Janelia]
-*
-* Purpose: Given an alphabet <a> (either standard or custom), define
-* all the characters in string <ignoredchars> to be
-* unmapped: valid, but ignored when converting input text.
-*
-* By default, the standard alphabets do not define any
-* ignored characters.
-*
-* The most common ignored characters would be space, tab,
-* and digits, to skip silently over whitespace and
-* sequence coordinates when parsing loosely-defined
-* sequence file formats.
-*
-* Args: a - alphabet to modify
-* ignoredchars - string listing characters to ignore; i.e. " \t"
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_alphabet_SetIgnored(ESL_ALPHABET *a, const char *ignoredchars)
-{
- int i;
- for (i = 0; ignoredchars[i] != '\0'; i++) a->inmap[(int)ignoredchars[i]] = eslDSQ_IGNORED;
- return eslOK;
-}
-
-
-/* Function: esl_alphabet_Destroy()
-* Synopsis: Frees an alphabet object.
-* Incept: SRE, Mon Dec 20 10:27:23 2004 [Zaragoza]
-*
-* Purpose: Free's an <ESL_ALPHABET> structure.
-*
-* Args: a - the <ESL_ALPHABET> to free.
-*
-* Returns: (void).
-*/
-void
-esl_alphabet_Destroy(ESL_ALPHABET *a)
-{
- if (a == NULL) return;
-
- if (a->sym != NULL) free(a->sym);
- if (a->ndegen != NULL) free(a->ndegen);
- if (a->degen != NULL)
- {
- if (a->degen[0] != NULL) free(a->degen[0]);
- free(a->degen);
- }
- if (a->complement != NULL) free(a->complement);
- free(a);
-}
-/*--------------- end, ESL_ALPHABET object ----------------------*/
-
-
-
-
-
-/*****************************************************************
-* 2. Digitized sequences (ESL_DSQ *)
-*****************************************************************/
-/* Design note: SRE, Mon Sep 18 09:11:41 2006
-*
-* An ESL_DSQ is considered to a special string type, equivalent to
-* <char *>, and is not considered to be an Easel "object". Thus it
-* does not have a standard object API. Rather, the caller deals with
-* an ESL_DSQ directly: allocate for <(L+2)*sizeof(ESL_DSQ)> to leave
-* room for sentinels at <0> and <L+1>.
-*
-* Additionally, an ESL_DSQ is considered to be "trusted"
-* data: we're 'guaranteed' that anything in an ESL_DSQ is a valid
-* symbol, so we don't need to error-check. Anything else is a programming
-* error.
-*/
-
-/* Function: esl_abc_CreateDsq()
-* Synopsis: Digitizes a sequence into new space.
-* Incept: SRE, Mon Sep 18 09:15:02 2006 [Janelia]
-*
-* Purpose: Given an alphabet <a> and an ASCII sequence <seq>,
-* digitize the sequence into newly allocated space, and
-* return a pointer to that space in <ret_dsq>.
-*
-* Args: a - internal alphabet
-* seq - text sequence to be digitized
-* ret_dsq - RETURN: the new digital sequence
-*
-* Returns: <eslOK> on success, and <ret_dsq> contains the digitized
-* sequence; caller is responsible for free'ing this
-* memory. Returns <eslEINVAL> if <seq> contains
-* one or more characters that are not in the input map of
-* alphabet <a>. If this happens, <ret_dsq> is still valid upon
-* return: invalid characters are replaced by full ambiguities
-* (typically X or N).
-*
-* Throws: <eslEMEM> on allocation failure.
-*
-* Xref: STL11/63
-*/
-int
-esl_abc_CreateDsq(const ESL_ALPHABET *a, const char *seq, ESL_DSQ **ret_dsq)
-{
- ESL_DSQ *dsq = NULL;
- int status;
- int64_t L;
-
- L = strlen(seq);
- ESL_ALLOC_WITH_TYPE(dsq, ESL_DSQ*, sizeof(ESL_DSQ) * (L+2));
- status = esl_abc_Digitize(a, seq, dsq);
-
- if (ret_dsq != NULL) *ret_dsq = dsq; else free(dsq);
- return status;
-
-ERROR:
- if (dsq != NULL) free(dsq);
- if (ret_dsq != NULL) *ret_dsq = NULL;
- return status;
-}
-
-
-/* Function: esl_abc_Digitize()
-* Synopsis: Digitizes a sequence into existing space.
-* Incept: SRE, Sun Aug 27 11:18:56 2006 [Leesburg]
-*
-* Purpose: Given an alphabet <a> and a nul-terminated ASCII sequence
-* <seq>, digitize the sequence and put it in <dsq>. Caller
-* provides space in <dsq> allocated for at least <L+2>
-* <ESL_DSQ> residues, where <L> is the length of <seq>.
-*
-* Args: a - internal alphabet
-* seq - text sequence to be digitized (\0-terminated)
-* dsq - RETURN: the new digital sequence (caller allocates,
-* at least <(L+2) * sizeof(ESL_DSQ)>).
-*
-* Returns: <eslOK> on success.
-* Returns <eslEINVAL> if <seq> contains one or more characters
-* that are not recognized in the alphabet <a>. (This is classed
-* as a normal error, because the <seq> may be untrusted user input.)
-* If this happens, the digital sequence <dsq> is still valid upon
-* return; invalid ASCII characters are replaced by ambiguities
-* (X or N).
-*/
-int
-esl_abc_Digitize(const ESL_ALPHABET *a, const char *seq, ESL_DSQ *dsq)
-{
- int status;
- int64_t i; /* position in seq */
- int64_t j; /* position in dsq */
- ESL_DSQ x;
-
- status = eslOK;
- dsq[0] = eslDSQ_SENTINEL;
- for (i = 0, j = 1; seq[i] != '\0'; i++)
- {
- x = a->inmap[(int) seq[i]];
- if (x == eslDSQ_IGNORED) continue;
-
- if (esl_abc_XIsValid(a, x))
- dsq[j] = x;
- else
- {
- status = eslEINVAL;
- dsq[j] = esl_abc_XGetUnknown(a);
- }
- j++;
- }
- dsq[j] = eslDSQ_SENTINEL;
- return status;
-}
-
-/* Function: esl_abc_Textize()
-* Synopsis: Convert digital sequence to text.
-* Incept: SRE, Sun Aug 27 11:14:58 2006 [Leesburg]
-*
-* Purpose: Make an ASCII sequence <seq> by converting a digital
-* sequence <dsq> of length <L> back to text, according to
-* the digital alphabet <a>.
-*
-* Caller provides space in <seq> allocated for at least
-* <L+1> bytes (<(L+1) * sizeof(char)>).
-*
-* Args: a - internal alphabet
-* dsq - digital sequence to be converted (1..L)
-* L - length of dsq
-* seq - RETURN: the new text sequence (caller allocated
-* space, at least <(L+1) * sizeof(char)>).
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_abc_Textize(const ESL_ALPHABET *a, const ESL_DSQ *dsq, int64_t L, char *seq)
-{
- int64_t i;
-
- for (i = 0; i < L; i++)
- seq[i] = a->sym[dsq[i+1]];
- seq[i] = '\0';
- return eslOK;
-}
-
-
-/* Function: esl_abc_TextizeN()
-* Synopsis: Convert subsequence from digital to text.
-* Incept: SRE, Tue Sep 5 09:28:38 2006 [Janelia] STL11/54.
-*
-* Purpose: Similar in semantics to <strncpy()>, this procedure takes
-* a window of <L> residues in a digitized sequence
-* starting at the residue pointed to by <dptr>,
-* converts them to ASCII text representation, and
-* copies them into the buffer <buf>.
-*
-* <buf> must be at least <L> residues long; <L+1>, if the
-* caller needs to NUL-terminate it.
-*
-* If a sentinel byte is encountered in the digitized
-* sequence before <L> residues have been copied, <buf> is
-* NUL-terminated there. Otherwise, like <strncpy()>, <buf>
-* will not be NUL-terminated.
-*
-* Note that because digital sequences are indexed <1..N>,
-* not <0..N-1>, the caller must be careful about
-* off-by-one errors in <dptr>. For example, to copy from
-* the first residue of a digital sequence <dsq>, you must
-* pass <dptr=dsq+1>, not <dptr=dsq>. The text in <buf>
-* on the other hand is a normal C string indexed <0..L-1>.
-*
-* Args: a - reference to an internal alphabet
-* dptr - ptr to starting residue in a digital sequence
-* L - number of residues to convert and copy
-* buf - text buffer to store the <L> converted residues in
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_abc_TextizeN(const ESL_ALPHABET *a, const ESL_DSQ *dptr, int64_t L, char *buf)
-{
- int64_t i;
-
- for (i = 0; i < L; i++)
- {
- if (dptr[i] == eslDSQ_SENTINEL)
- {
- buf[i] = '\0';
- return eslOK;
- }
- buf[i] = a->sym[dptr[i]];
- }
- return eslOK;
-}
-
-
-/* Function: esl_abc_dsqcpy()
-* Incept: SRE, Fri Feb 23 08:45:10 2007 [Casa de Gatos]
-*
-* Purpose: Given a digital sequence <dsq> of length <L>,
-* make a copy of it in <dcopy>. Caller provides
-* storage in <dcopy> for at least <L+2> <ESL_DSQ>
-* residues.
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_abc_dsqcpy(const ESL_DSQ *dsq, int64_t L, ESL_DSQ *dcopy)
-{
- memcpy(dcopy, dsq, sizeof(ESL_DSQ) * (L+2));
- return eslOK;
-}
-
-
-/* Function: esl_abc_dsqdup()
-* Synopsis: Duplicate a digital sequence.
-* Incept: SRE, Tue Aug 29 13:51:05 2006 [Janelia]
-*
-* Purpose: Like <esl_strdup()>, but for digitized sequences:
-* make a duplicate of <dsq> and leave it in <ret_dup>.
-* Caller can pass the string length <L> if it's known, saving
-* some overhead; else pass <-1> and the length will be
-* determined for you.
-*
-* Tolerates <dsq> being <NULL>; in which case, returns
-* <eslOK> with <*ret_dup> set to <NULL>.
-*
-* Args: dsq - digital sequence to duplicate (w/ sentinels at 0,L+1)
-* L - length of dsq in residues, if known; -1 if unknown
-* ret_dup - RETURN: allocated duplicate of <dsq>, which caller will
-* free.
-*
-* Returns: <eslOK> on success, and leaves a pointer in <ret_dup>.
-*
-* Throws: <eslEMEM> on allocation failure.
-*
-* Xref: STL11/48
-*/
-int
-esl_abc_dsqdup(const ESL_DSQ *dsq, int64_t L, ESL_DSQ **ret_dup)
-{
- int status;
- ESL_DSQ *newSq = NULL;
-
- if (ret_dup == NULL) return eslOK; /* no-op. */
-
- *ret_dup = NULL;
- if (dsq == NULL) return eslOK;
- if (L < 0) L = esl_abc_dsqlen(dsq);
-
- ESL_ALLOC_WITH_TYPE(newSq, ESL_DSQ*, sizeof(ESL_DSQ) * (L+2));
- memcpy(newSq, dsq, sizeof(ESL_DSQ) * (L+2));
-
- *ret_dup = newSq;
- return eslOK;
-
-ERROR:
- if (newSq != NULL) free(newSq);
- if (ret_dup != NULL) *ret_dup = NULL;
- return status;
-}
-
-
-/* Function: esl_abc_dsqcat()
-* Synopsis: Concatenate digital sequences.
-* Incept: SRE, Tue Aug 29 14:01:59 2006 [Janelia]
-*
-* Purpose: Like <esl_strcat()>, except specialized for digitizing a
-* biosequence text string and appending it to a growing
-* digital sequence. The growing digital sequence is <dsq>,
-* currently of length <L> residues; we append <s> to it,
-* of length <n> symbols, after digitization. Upon return,
-* <dsq> has been reallocated and <L> is set to the new
-* length (which is why both must be passed by reference).
-*
-* Note that the final <L> is not necessarily the initial
-* <L> plus <n>, because the text string <s> may contain
-* symbols that are defined to be ignored
-* (<eslDSQ_IGNORED>) in the input map of this alphabet.
-* (The final <L> is guaranteed to be $\leq$ <L+n> though.>
-*
-* If the initial <L> is unknown, pass <-1>, and it will be
-* determined by counting the residues in <dsq>.
-*
-* Similarly, if <n> is unknown, pass <-1> and it will be
-* determined by counting the symbols in <s>
-*
-* <dsq> may be <NULL>, in which case this call is
-* equivalent to an allocation and digitization just of
-* <s>.
-*
-* <s> may also be <NULL>, in which case <dsq> is
-* unmodified; <L> would be set to the correct length of
-* <dsq> if it was passed as <-1> (unknown).
-*
-* Args: abc - digital alphabet to use
-* dsq - reference to the current digital seq to append to
-* (with sentinel bytes at 0,L+1); may be <NULL>.
-* Upon return, this will probably have
-* been reallocated, and it will contain the original
-* <dsq> with <s> digitized and appended.
-* L - reference to the current length of <dsq> in residues;
-* may be <-1> if unknown. Upon return, <L> is set to
-* the new length of <dsq>, after <s> is appended.
-* s - NUL-terminated ASCII text sequence to append. May
-* contain ignored text characters (flagged with
-* <eslDSQ_IGNORED> in the input map of alphabet <abc>).
-* n - Length of <s> in characters, if known; or <-1> if
-* unknown.
-*
-* Returns: <eslOK> on success; <dsq> contains the result of digitizing
-* and appending <s> to the original <dsq>; and <L> contains
-* the new length of the <dsq> result in residues.
-*
-* If any of the characters in <s> are illegal in the alphabet
-* <abc>, these characters are digitized as unknown residues,
-* and the function returns <eslEINVAL>. The caller might want
-* to call <esl_abc_ValidateSeq()> on <s> if it wants to figure
-* out where digitization goes awry and get a more informative
-* error report. This is a normal error, because the string <s>
-* might be user input.
-*
-* Throws: <eslEMEM> on allocation or reallocation failure;
-*
-* Xref: STL11/48.
-*/
-int
-esl_abc_dsqcat(const ESL_ALPHABET *a, ESL_DSQ **dsq, int64_t *L, const char *s, int64_t n)
-{
- int status;
- void *p;
- int64_t newL;
- int64_t xpos, cpos;
- ESL_DSQ x;
-
- if (*L < 0) newL = ((*dsq == NULL) ? 0 : esl_abc_dsqlen(*dsq));
- else newL = *L;
-
- if (n < 0) n = ((s == NULL) ? 0 : strlen(s));
-
- /* below handles weird case of empty s (including empty dsq and empty s):
- * just hand dsq and its length right back to the caller.
- */
- if (n == 0) { *L = newL; return eslOK; }
-
- if (*dsq == NULL) { /* an entirely new dsq must be allocated *and* initialized with left sentinel. */
- ESL_ALLOC_WITH_TYPE(*dsq, ESL_DSQ*, sizeof(ESL_DSQ) * (n+2));
- (*dsq)[0] = eslDSQ_SENTINEL;
- } else /* else, existing dsq is just reallocated; left sentinel already in place. */
- ESL_RALLOC_WITH_TYPE(*dsq, ESL_DSQ*, p, sizeof(ESL_DSQ) * (newL+n+2)); /* most we'll need */
-
- /* Watch these coords. Start in the 0..n-1 text string at 0;
- * start in the 1..L dsq at L+1, overwriting its terminal
- * sentinel byte.
- */
- status = eslOK;
- for (xpos = newL+1, cpos = 0; s[cpos] != '\0'; cpos++)
- {
- x = a->inmap[(int) s[cpos]];
- if (esl_abc_XIsValid(a, x))
- (*dsq)[xpos++] = x;
- else if (x == eslDSQ_IGNORED)
- ;
- else
- {
- (*dsq)[xpos++] = esl_abc_XGetUnknown(a);
- status = eslEINVAL;
- }
- }
- (*dsq)[xpos] = eslDSQ_SENTINEL;
- *L = xpos-1;
- return status;
-
-ERROR:
- *L = newL;
- return status;
-}
-
-/* Function: esl_abc_dsqlen()
-* Synopsis: Returns the length of a digital sequence.
-* Incept: SRE, Tue Aug 29 13:49:02 2006 [Janelia]
-*
-* Purpose: Returns the length of digitized sequence <dsq> in
-* positions (including gaps, if any). The <dsq> must be
-* properly terminated by a sentinel byte
-* (<eslDSQ_SENTINEL>).
-*/
-int64_t
-esl_abc_dsqlen(const ESL_DSQ *dsq)
-{
- int64_t n = 0;
- while (dsq[n+1] != eslDSQ_SENTINEL) n++;
- return n;
-}
-
-/* Function: esl_abc_dsqrlen()
-* Synopsis: Returns the number of residues in a digital seq.
-* Incept: SRE, Sat Nov 4 09:41:40 2006 [Janelia]
-*
-* Purpose: Returns the unaligned length of digitized sequence
-* <dsq>, in residues, not counting any gaps or
-* missing data symbols.
-*/
-int64_t
-esl_abc_dsqrlen(const ESL_ALPHABET *abc, const ESL_DSQ *dsq)
-{
- int64_t n = 0;
- int64_t i;
-
- for (i = 1; dsq[i] != eslDSQ_SENTINEL; i++)
- if (esl_abc_XIsResidue(abc, dsq[i])) n++;
- return n;
-}
-
-/* Function: esl_abc_CDealign()
-* Synopsis: Dealigns a text string, using a reference digital aseq.
-* Incept: SRE, Sun Mar 30 13:14:05 2008 [Casa de Gatos]
-*
-* Purpose: Dealigns <s> in place by removing characters aligned to
-* gaps (or missing data symbols) in the reference digital
-* aligned sequence <ref_ax>. Gaps and missing data symbols
-* in <ref_ax> are defined by its digital alphabet <abc>.
-*
-* <s> is typically going to be some kind of textual
-* annotation string (secondary structure, consensus, or
-* surface accessibility).
-*
-* Be supercareful of off-by-one errors here! The <ref_ax>
-* is a digital sequence that is indexed <1..L>. The
-* annotation string <s> is assumed to be <0..L-1> (a
-* normal C string), off by one with respect to <ref_ax>.
-* In a sequence object, ss annotation is actually stored
-* <1..L> -- so if you're going to <esl_abc_CDealign()> a
-* <sq->ss>, pass <sq->ss+1> as the argument <s>.
-*
-* Returns: Returns <eslOK> on success; optionally returns the number
-* of characters in the dealigned <s> in <*opt_rlen>.
-*
-* Throws: (no abnormal error conditions)
-*/
-int
-esl_abc_CDealign(const ESL_ALPHABET *abc, char *s, const ESL_DSQ *ref_ax, int64_t *opt_rlen)
-{
- int64_t apos;
- int64_t n = 0;
-
- if (s == NULL) return eslOK;
-
- for (n=0, apos=1; ref_ax[apos] != eslDSQ_SENTINEL; apos++)
- if (! esl_abc_XIsGap(abc, ref_ax[apos]) && ! esl_abc_XIsMissing(abc, ref_ax[apos]) )
- s[n++] = s[apos-1]; /* apos-1 because we assume s was 0..alen-1, whereas ref_ax was 1..alen */
- s[n] = '\0';
-
- if (opt_rlen != NULL) *opt_rlen = n;
- return eslOK;
-}
-
-/* Function: esl_abc_XDealign()
-* Synopsis: Dealigns a digital string, using a reference digital aseq.
-* Incept: SRE, Sun Mar 30 13:19:16 2008 [Casa de Gatos]
-*
-* Purpose: Dealigns <x> in place by removing characters aligned to
-* gaps (or missing data) in the reference digital aligned
-* sequence <ref_ax>. Gaps and missing data symbols in
-* <ref_ax> are defined by its digital alphabet <abc>.
-*
-* Returns: Returns <eslOK> on success; optionally returns the number
-* of characters in the dealigned <x> in <*opt_rlen>.
-*
-* Throws: (no abnormal error conditions)
-*/
-int
-esl_abc_XDealign(const ESL_ALPHABET *abc, ESL_DSQ *x, const ESL_DSQ *ref_ax, int64_t *opt_rlen)
-{
- int64_t apos;
- int64_t n = 0;
-
- if (x == NULL) return eslOK;
-
- x[0] = eslDSQ_SENTINEL;
- for (n=1, apos=1; ref_ax[apos] != eslDSQ_SENTINEL; apos++)
- if (! esl_abc_XIsGap(abc, ref_ax[apos]) && ! esl_abc_XIsMissing(abc, ref_ax[apos]) )
- x[n++] = x[apos];
- x[n] = eslDSQ_SENTINEL;
-
- if (opt_rlen != NULL) *opt_rlen = n-1;
- return eslOK;
-}
-
-/*-------------- end, digital sequences (ESL_DSQ) ---------------*/
-
-
-/*****************************************************************
-* 3. Other routines in the API.
-*****************************************************************/
-
-/* Function: esl_abc_GuessAlphabet()
-* Synopsis: Guess alphabet type from residue composition.
-* Incept: SRE, Wed May 16 11:08:29 2007 [Janelia]
-*
-* Purpose: Guess the alphabet type from a residue composition.
-* The input <ct[0..25]> array contains observed counts of
-* the letters A..Z, case-insensitive.
-*
-* Provided that the compositions contains more than 10
-* residues, the composition is called <eslDNA> if it
-* consists only of the residues ACGTN and all four of ACGT
-* occur (and analogously for <eslRNA>, ACGU$+$N); and it
-* calls the sequence <eslAMINO> either if it contains an
-* amino-specific letter (EFIJLOPQZ), or if it contains at
-* least 15 of the 20 canonical amino acids and consists
-* only of canonical amino acids or X.
-*
-*
-* Returns: <eslOK> on success, and <*ret_type> is set to
-* <eslAMINO>, <eslRNA>, or <eslDNA>.
-*
-* Returns <eslEAMBIGUOUS> if unable to determine the
-* alphabet type; in this case, <*ret_type> is set to
-* <eslUNKNOWN>.
-*/
-int
-esl_abc_GuessAlphabet(const int64_t *ct, int *ret_type)
-{
- int type = eslUNKNOWN;
- char aaonly[] = "EFIJLOPQZ";
- char allcanon[] = "ACG";
- char aacanon[] = "DHKMRSVWY";
- int64_t n1, n2, n3, nn, nt, nu, nx, n; /* n's are counts */
- int x1, x2, x3, xn, xt, xu; /* x's are how many different residues are represented */
- int i, x;
-
- x1 = x2 = x3 = xn = xt = xu = 0;
- n1 = n2 = n3 = n = 0;
- for (i = 0; i < 26; i++) n += ct[i];
- for (i = 0; aaonly[i] != '\0'; i++) { x = ct[aaonly[i] - 'A']; if (x > 0) { n1 += x; x1++; } }
- for (i = 0; allcanon[i] != '\0'; i++) { x = ct[allcanon[i] - 'A']; if (x > 0) { n2 += x; x2++; } }
- for (i = 0; aacanon[i] != '\0'; i++) { x = ct[aacanon[i] - 'A']; if (x > 0) { n3 += x; x3++; } }
- nt = ct['T' - 'A']; xt = (nt > 0) ? 1 : 0;
- nu = ct['U' - 'A']; xu = (nu > 0) ? 1 : 0;
- nx = ct['X' - 'A'];
- nn = ct['N' - 'A'];
-
- if (n <= 10) type = eslUNKNOWN;
- else if (n1 > 0) type = eslAMINO; /* contains giveaway, aa-only chars */
- else if (n2+nt+nn == n && x2+xt == 4) type = eslDNA; /* all DNA canon (or N), all four seen */
- else if (n2+nu+nn == n && x2+xu == 4) type = eslRNA; /* all RNA canon (or N), all four seen */
- else if (n1+n2+n3+nn+nt+nx == n && n3>n2 && x1+x2+x3+xn+xt >= 15) type = eslAMINO; /* all aa canon (or X); more aa canon than ambig; all 20 seen */
-
- *ret_type = type;
- if (type == eslUNKNOWN) return eslEAMBIGUOUS;
- else return eslOK;
-}
-
-
-
-/* Function: esl_abc_Match()
-* Synopsis: Returns the probability that two symbols match.
-* Incept: SRE, Sun Sep 17 11:46:32 2006 [Janelia]
-*
-* Purpose: Given two digital symbols <x> and <y> in alphabet
-* <abc>, calculate and return the probability that
-* <x> and <y> match, taking degenerate residue codes
-* into account.
-*
-* If <p> residue probability vector is NULL, the
-* calculation is a simple average. For example, for DNA,
-* R/A gives 0.5, C/N gives 0.25, N/R gives 0.25, R/R gives
-* 0.5.
-*
-* If <p> residue probability vector is non-NULL, it gives
-* a 0..K-1 array of background frequencies, and the
-* returned match probability is an expectation (weighted
-* average) given those residue frequencies.
-*
-* <x> and <y> should only be residue codes. Any other
-* comparison, including comparisons involving gap or
-* missing data characters, or even comparisons involving
-* illegal digital codes, returns 0.0.
-*
-* Note that comparison of residues from "identical"
-* sequences (even a self-comparison) will not result in an
-* identity of 1.0, if the sequence(s) contain degenerate
-* residue codes.
-*
-* Args: abc - digtal alphabet to use
-* x,y - two symbols to compare
-* p - NULL, or background probabilities of the
-* canonical residues in this alphabet [0..K-1]
-*
-* Returns: the probability of an identity (match) between
-* residues <x> and <y>.
-*/
-double
-esl_abc_Match(const ESL_ALPHABET *abc, ESL_DSQ x, ESL_DSQ y, double *p)
-{
- int i;
- double prob;
- double sx, sy;
-
- /* Easy cases */
- if (esl_abc_XIsCanonical(abc, x) && esl_abc_XIsCanonical(abc, y))
- {
- if (x==y) return 1.0; else return 0.0;
- }
- if ( ! esl_abc_XIsResidue(abc, x) || ! esl_abc_XIsResidue(abc, x)) return 0.0;
-
- /* Else, we have at least one degenerate residue, so calc an average or expectation.
- */
- if (p != NULL)
- {
- prob = sx = sy = 0.;
- for (i = 0; i < abc->K; i++)
- {
- if (abc->degen[(int)x][i]) sx += p[i];
- if (abc->degen[(int)y][i]) sy += p[i];
- if (abc->degen[(int)x][i] && abc->degen[(int)x][i]) prob += p[i] * p[i];
- }
- prob = prob / (sx*sy);
- }
- else
- {
- double uniformp = 1. / (double) abc->K;
- prob = sx = sy = 0.;
- for (i = 0; i < abc->K; i++)
- {
- if (abc->degen[(int)x][i]) sx += uniformp;
- if (abc->degen[(int)y][i]) sy += uniformp;
- if (abc->degen[(int)x][i] && abc->degen[(int)x][i]) prob += uniformp * uniformp;
- }
- prob = prob / (sx*sy);
- }
- return prob;
-}
-
-
-
-/* Function: esl_abc_IAvgScore()
-* Synopsis: Returns average score for degenerate residue.
-* Incept: SRE, Tue Dec 21 10:53:57 2004 [Zaragoza]
-*
-* Purpose: Given a residue code <x> in alphabet <a>, and an array of
-* integer scores <sc> for the residues in the base
-* alphabet, calculate and return the average score
-* (rounded to nearest integer).
-*
-* <x> would usually be a degeneracy code, but it
-* may also be a canonical residue. It must not
-* be a gap, missing data, or illegal symbol; if it
-* is, these functions return a score of 0 without
-* raising an error.
-*
-* <esl_abc_FAvgScore()> and <esl_abc_DAvgScore()> do the
-* same, but for float and double scores instead of integers
-* (and for real-valued scores, no rounding is done).
-*
-* Args: a - digital alphabet to use
-* x - a symbol to score
-* sc - score vector for canonical residues [0..K-1]
-*
-* Returns: average score for symbol <x>
-*/
-int
-esl_abc_IAvgScore(const ESL_ALPHABET *a, ESL_DSQ x, const int *sc)
-{
- float result = 0.;
- int i;
-
- if (! esl_abc_XIsResidue(a, x)) return 0;
- for (i = 0; i < a->K; i++)
- if (a->degen[(int) x][i]) result += (float) sc[i];
- result /= (float) a->ndegen[(int) x];
- if (result < 0) return (int) (result - 0.5);
- else return (int) (result + 0.5);
-}
-float
-esl_abc_FAvgScore(const ESL_ALPHABET *a, ESL_DSQ x, const float *sc)
-{
- float result = 0.;
- int i;
-
- if (! esl_abc_XIsResidue(a, x)) return 0.;
- for (i = 0; i < a->K; i++)
- if (a->degen[(int) x][i]) result += sc[i];
- result /= (float) a->ndegen[(int) x];
- return result;
-}
-double
-esl_abc_DAvgScore(const ESL_ALPHABET *a, ESL_DSQ x, const double *sc)
-{
- double result = 0.;
- int i;
-
- if (! esl_abc_XIsResidue(a, x)) return 0.;
- for (i = 0; i < a->K; i++)
- if (a->degen[(int) x][i]) result += sc[i];
- result /= (double) a->ndegen[(int) x];
- return result;
-}
-
-
-/* Function: esl_abc_IExpectScore()
-* Synopsis: Returns expected score for degenerate residue.
-* Incept: SRE, Tue Dec 21 11:02:46 2004 [Zaragoza]
-*
-* Purpose: Given a residue code <x> in alphabet <a>, an
-* array of integer scores <sc> for the residues in the base
-* alphabet, and background frequencies <p> for the
-* occurrence frequencies of the residues in the base
-* alphabet, calculate and return the expected score
-* (weighted by the occurrence frequencies <p>).
-*
-* <x> would usually be a degeneracy code, but it
-* may also be a canonical residue. It must not
-* be a gap, missing data, or illegal symbol; if it
-* is, these functions return a score of 0 without
-* raising an error.
-*
-* <esl_abc_FExpectScore()> and <esl_abc_DExpectScore()> do the
-* same, but for float and double scores instead of integers
-* (for real-valued scores, no rounding is done).
-*
-* Args: a - digital alphabet to use
-* x - a symbol to score
-* sc - score vector for canonical residues [0..K-1]
-* p - background prob's of canonicals [0..K-1]
-*
-* Returns: average score for symbol <x>
-*/
-int
-esl_abc_IExpectScore(const ESL_ALPHABET *a, ESL_DSQ x, const int *sc, const float *p)
-{
- float result = 0.;
- float denom = 0.;
- int i;
-
- if (! esl_abc_XIsResidue(a, x)) return 0;
- for (i = 0; i < a->K; i++)
- if (a->degen[(int) x][i]) {
- result += (float) sc[i] * p[i];
- denom += p[i];
- }
- result /= denom;
- if (result < 0) return (int) (result - 0.5);
- else return (int) (result + 0.5);
-}
-float
-esl_abc_FExpectScore(const ESL_ALPHABET *a, ESL_DSQ x, const float *sc, const float *p)
-{
- float result = 0.;
- float denom = 0.;
- int i;
-
- if (! esl_abc_XIsResidue(a, x)) return 0.;
- for (i = 0; i < a->K; i++)
- if (a->degen[(int) x][i]) {
- result += sc[i] * p[i];
- denom += p[i];
- }
- result /= denom;
- return result;
-}
-double
-esl_abc_DExpectScore(const ESL_ALPHABET *a, ESL_DSQ x, const double *sc, const double *p)
-{
- double result = 0.;
- double denom = 0.;
- int i;
-
- if (! esl_abc_XIsResidue(a, x)) return 0.;
- for (i = 0; i < a->K; i++)
- if (a->degen[(int) x][i]) {
- result += sc[i] * p[i];
- denom += p[i];
- }
- result /= denom;
- return result;
-}
-
-/* Function: esl_abc_IAvgScVec()
-* Synopsis: Fill out score vector with average degenerate scores.
-* Incept: SRE, Thu Apr 6 12:12:25 2006 [AA890 enroute to Boston]
-*
-* Purpose: Given an alphabet <a> and a score vector <sc> of length
-* <a->Kp> that contains integer scores for the base
-* alphabet (<0..a->K-1>), fill out the rest of the score
-* vector, calculating average scores for
-* degenerate residues using <esl_abc_IAvgScore()>.
-*
-* The score, if any, for a gap character <K>, the
-* nonresidue <Kp-2>, and the missing data character <Kp-1>
-* are untouched by this function. Only the degenerate
-* scores <K+1..Kp-3> are filled in.
-*
-* <esl_abc_FAvgScVec()> and <esl_abc_DAvgScVec()> do
-* the same, but for score vectors of floats or doubles,
-* respectively.
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_abc_IAvgScVec(const ESL_ALPHABET *a, int *sc)
-{
- ESL_DSQ x;
- for (x = a->K+1; x <= a->Kp-3; x++)
- sc[x] = esl_abc_IAvgScore(a, x, sc);
- return eslOK;
-}
-int
-esl_abc_FAvgScVec(const ESL_ALPHABET *a, float *sc)
-{
- ESL_DSQ x;
- for (x = a->K+1; x <= a->Kp-3; x++)
- sc[x] = esl_abc_FAvgScore(a, x, sc);
- return eslOK;
-}
-int
-esl_abc_DAvgScVec(const ESL_ALPHABET *a, double *sc)
-{
- ESL_DSQ x;
- for (x = a->K+1; x <= a->Kp-3; x++)
- sc[x] = esl_abc_DAvgScore(a, x, sc);
- return eslOK;
-}
-
-/* Function: esl_abc_IExpectScVec()
-* Synopsis: Fill out score vector with average expected scores.
-* Incept: SRE, Thu Apr 6 12:23:52 2006 [AA 890 enroute to Boston]
-*
-* Purpose: Given an alphabet <a>, a score vector <sc> of length
-* <a->Kp> that contains integer scores for the base
-* alphabet (<0..a->K-1>), and residue occurrence probabilities
-* <p[0..a->K-1]>; fill in the scores for the
-* degenerate residues <K+1..Kp-3> using <esl_abc_IExpectScore()>.
-*
-* The score, if any, for a gap character <K>, the
-* nonresidue <Kp-2>, and the missing data character <Kp-1>
-* are untouched by this function. Only the degenerate
-* scores <K+1..Kp-3> are filled in.
-*
-* <esl_abc_FExpectScVec()> and <esl_abc_DExpectScVec()> do
-* the same, but for score vectors of floats or doubles,
-* respectively. The probabilities <p> are floats for the
-* integer and float versions, and doubles for the double
-* version.
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_abc_IExpectScVec(const ESL_ALPHABET *a, int *sc, const float *p)
-{
- ESL_DSQ x;
- for (x = a->K+1; x <= a->Kp-3; x++)
- sc[x] = esl_abc_IExpectScore(a, x, sc, p);
- return eslOK;
-}
-int
-esl_abc_FExpectScVec(const ESL_ALPHABET *a, float *sc, const float *p)
-{
- ESL_DSQ x;
- for (x = a->K+1; x <= a->Kp-3; x++)
- sc[x] = esl_abc_FExpectScore(a, x, sc, p);
- return eslOK;
-}
-int
-esl_abc_DExpectScVec(const ESL_ALPHABET *a, double *sc, const double *p)
-{
- ESL_DSQ x;
- for (x = a->K+1; x <= a->Kp-3; x++)
- sc[x] = esl_abc_DExpectScore(a, x, sc, p);
- return eslOK;
-}
-
-
-/* Function: esl_abc_FCount()
-* Synopsis: Count a degenerate symbol into a count vector.
-* Incept: SRE, Wed Apr 12 17:16:35 2006 [St. Louis]
-*
-* Purpose: Count a possibly degenerate digital symbol <x> (0..Kp-1)
-* into a counts array <ct> for base symbols (0..K-1).
-* Assign the symbol a weight of <wt> (often just 1.0).
-* The count weight of a degenerate symbol is divided equally
-* across the possible base symbols.
-*
-* <x> can be a gap; if so, <ct> must be allocated 0..K,
-* not 0..K-1. If <x> is a missing data symbol, or a nonresidue
-* data symbol, nothing is done.
-*
-* <esl_abc_DCount()> does the same, but for double-precision
-* count vectors and weights.
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_abc_FCount(const ESL_ALPHABET *abc, float *ct, ESL_DSQ x, float wt)
-{
- ESL_DSQ y;
-
- if (esl_abc_XIsCanonical(abc, x) || esl_abc_XIsGap(abc, x))
- ct[x] += wt;
- else if (esl_abc_XIsMissing(abc, x) || esl_abc_XIsNonresidue(abc, x))
- return eslOK;
- else
- for (y = 0; y < abc->K; y++) {
- if (abc->degen[x][y])
- ct[y] += wt / (float) abc->ndegen[x];
- }
- return eslOK;
-}
-int
-esl_abc_DCount(const ESL_ALPHABET *abc, double *ct, ESL_DSQ x, double wt)
-{
- ESL_DSQ y;
-
- if (esl_abc_XIsCanonical(abc, x) || esl_abc_XIsGap(abc, x))
- ct[x] += wt;
- else if (esl_abc_XIsMissing(abc, x) || esl_abc_XIsNonresidue(abc, x))
- return eslOK;
- else
- for (y = 0; y < abc->K; y++) {
- if (abc->degen[x][y])
- ct[y] += wt / (double) abc->ndegen[x];
- }
- return eslOK;
-}
-
-/* Function: esl_abc_EncodeType()
-* Synopsis: Convert descriptive string to alphabet type code.
-* Incept: SRE, Mon Oct 13 14:52:18 2008 [Janelia]
-*
-* Purpose: Convert a string like "amino" or "DNA" to the
-* corresponding Easel internal alphabet type code
-* such as <eslAMINO> or <eslDNA>; return the code.
-*
-* Returns: the code, such as <eslAMINO>; if <type> isn't
-* recognized, returns <eslUNKNOWN>.
-*/
-int
-esl_abc_EncodeType(char *type)
-{
- if (esl_strcasecmp(type, "amino") == 0) return eslAMINO;
- else if (esl_strcasecmp(type, "rna") == 0) return eslRNA;
- else if (esl_strcasecmp(type, "dna") == 0) return eslDNA;
- else if (esl_strcasecmp(type, "coins") == 0) return eslCOINS;
- else if (esl_strcasecmp(type, "dice") == 0) return eslDICE;
- else if (esl_strcasecmp(type, "custom")== 0) return eslNONSTANDARD;
- else return eslUNKNOWN;
-}
-
-/* Function: esl_abc_DecodeType()
-* Synopsis: Returns descriptive string for alphabet type code.
-* Incept: SRE, Wed Apr 12 12:23:24 2006 [St. Louis]
-*
-* Purpose: For diagnostics and other output: given an internal
-* alphabet code <type> (<eslRNA>, for example), return
-* pointer to an internal string ("RNA", for example).
-*/
-char *
-esl_abc_DecodeType(int type)
-{
- switch (type) {
- case eslUNKNOWN: return "unknown";
- case eslRNA: return "RNA";
- case eslDNA: return "DNA";
- case eslAMINO: return "amino";
- case eslCOINS: return "coins";
- case eslDICE: return "dice";
- case eslNONSTANDARD: return "custom";
- default: break;
- }
- esl_exception(eslEINVAL, __FILE__, __LINE__, "no such alphabet type code %d\n", type);
- return NULL;
-}
-
-
-/* Function: esl_abc_ValidateSeq()
-* Synopsis: Assure that a text sequence can be digitized.
-* Incept: SRE, Sat Aug 26 17:40:00 2006 [St. Louis]
-*
-* Purpose: Check that sequence <seq> of length <L> can be digitized
-* without error; all its symbols are valid in alphabet
-* <a>. If so, return <eslOK>. If not, return <eslEINVAL>.
-*
-* <errbuf> is either passed as <NULL>, or a pointer to an
-* error string buffer allocated by the caller for
-* <eslERRBUFSIZE> characters. If <errbuf> is non-NULL, and
-* the sequence is invalid, an error message is placed in
-* <errbuf>.
-*
-* Args: a - digital alphabet
-* seq - sequence to validate [0..L-1]; NUL-termination unnecessary
-* L - length of <seq>
-* errbuf - NULL, or ptr to <eslERRBUFSIZE> chars of allocated space
-* for an error message.
-*
-* Returns: <eslOK> if <seq> is valid; <eslEINVAL> if not.
-*
-* Throws: (no abnormal error conditions).
-*/
-int
-esl_abc_ValidateSeq(const ESL_ALPHABET *a, const char *seq, int64_t L, char *errbuf)
-{
- int status;
- int64_t i;
- int64_t firstpos = -1;
- int64_t nbad = 0;
-
- if (errbuf) *errbuf = 0;
- for (i = 0; i < L; i++) {
- if (! esl_abc_CIsValid(a, seq[i])) {
- if (firstpos == -1) firstpos = i;
- nbad++;
- }
- }
- if (nbad > 0) ESL_XFAIL( eslEINVAL, errbuf, "bad chars found in sequence" );
- return eslOK;
-
- ERROR:
- return status;
-}
-/*---------------- end, other API functions ---------------------*/
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_alphabet.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_alphabet.h
deleted file mode 100644
index 9b9fbdf..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_alphabet.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
-* Digital representation of biosequence symbols in Easel.
-* SVN $Id: esl_alphabet.h 337 2009-05-12 02:13:02Z eddys $
-* SRE, Tue Nov 23 19:44:01 2004 [St. Louis]
-*/
-#ifndef ESL_ALPHABET_INCLUDED
-#define ESL_ALPHABET_INCLUDED
-
-#include <ctype.h> /* isascii() */
-#include "easel.h"
-
-/* Flags for alphabet types.
-*/
-#define eslUNKNOWN 0 /* 0=unknown is easel-wide convention; don't change */
-#define eslRNA 1
-#define eslDNA 2
-#define eslAMINO 3
-#define eslCOINS 4 /* for toy examples */
-#define eslDICE 5 /* also for toy examples */
-#define eslNONSTANDARD 6
-
-/* Structure: ESL_ALPHABET
-*/
-typedef struct {
- int type; /* eslDNA, eslRNA, eslAMINO, or eslNONSTANDARD */
- int K; /* uniq alphabet size: 4 or 20 */
- int Kp; /* total size: alphabet + degen + gap + missing */
- char *sym; /* "ACGT-RYMKSWHBVDN~", for instance [0..Kp-1] */
- ESL_DSQ inmap[128]; /* inmap['A'] = 0, etc: dsq[] index for a symbol */
- char **degen; /* 1/0, which syms inc which res [0..Kp-1][0..K-1] */
- int *ndegen; /* # of degenerate residues per code [0..Kp-1] */
- ESL_DSQ *complement; /* map a digital symbol to its complement [0..Kp-1]*/
-} ESL_ALPHABET;
-
-
-
-
-/* 1. An ESL_ALPHABET object.
-*/
-extern ESL_ALPHABET *esl_alphabet_Create(int type);
-extern ESL_ALPHABET *esl_alphabet_CreateCustom(const char *alphabet, int K, int Kp);
-extern int esl_alphabet_SetEquiv(ESL_ALPHABET *a, char sym, char c);
-extern int esl_alphabet_SetCaseInsensitive(ESL_ALPHABET *a);
-extern int esl_alphabet_SetDegeneracy(ESL_ALPHABET *a, char c, char *ds);
-extern int esl_alphabet_SetIgnored(ESL_ALPHABET *a, const char *ignoredchars);
-extern void esl_alphabet_Destroy(ESL_ALPHABET *a);
-
-/* 2. Digitized sequences.
-*/
-extern int esl_abc_CreateDsq(const ESL_ALPHABET *a, const char *seq, ESL_DSQ **ret_dsq);
-extern int esl_abc_Digitize (const ESL_ALPHABET *a, const char *seq, ESL_DSQ *dsq);
-extern int esl_abc_Textize (const ESL_ALPHABET *a, const ESL_DSQ *dsq, int64_t L, char *seq);
-extern int esl_abc_TextizeN (const ESL_ALPHABET *a, const ESL_DSQ *dptr, int64_t L, char *buf);
-extern int esl_abc_dsqcpy(const ESL_DSQ *dsq, int64_t L, ESL_DSQ *dcopy);
-extern int esl_abc_dsqdup(const ESL_DSQ *dsq, int64_t L, ESL_DSQ **ret_dup);
-extern int esl_abc_dsqcat(const ESL_ALPHABET *a, ESL_DSQ **dsq, int64_t *L, const char *s, int64_t n);
-extern int64_t esl_abc_dsqlen(const ESL_DSQ *dsq);
-extern int64_t esl_abc_dsqrlen(const ESL_ALPHABET *a, const ESL_DSQ *dsq);
-extern int esl_abc_CDealign(const ESL_ALPHABET *abc, char *s, const ESL_DSQ *ref_ax, int64_t *opt_rlen);
-extern int esl_abc_XDealign(const ESL_ALPHABET *abc, ESL_DSQ *x, const ESL_DSQ *ref_ax, int64_t *opt_rlen);
-
-
-/* 3. Other routines in the API.
-*/
-extern int esl_abc_GuessAlphabet(const int64_t *ct, int *ret_type);
-extern double esl_abc_Match (const ESL_ALPHABET *a, ESL_DSQ x, ESL_DSQ y, double *p);
-extern int esl_abc_IAvgScore (const ESL_ALPHABET *a, ESL_DSQ x, const int *sc);
-extern float esl_abc_FAvgScore (const ESL_ALPHABET *a, ESL_DSQ x, const float *sc);
-extern double esl_abc_DAvgScore (const ESL_ALPHABET *a, ESL_DSQ x, const double *sc);
-extern int esl_abc_IExpectScore(const ESL_ALPHABET *a, ESL_DSQ x, const int *sc, const float *p);
-extern float esl_abc_FExpectScore(const ESL_ALPHABET *a, ESL_DSQ x, const float *sc, const float *p);
-extern double esl_abc_DExpectScore(const ESL_ALPHABET *a, ESL_DSQ x, const double *sc, const double *p);
-
-extern int esl_abc_IAvgScVec (const ESL_ALPHABET *a, int *sc);
-extern int esl_abc_FAvgScVec (const ESL_ALPHABET *a, float *sc);
-extern int esl_abc_DAvgScVec (const ESL_ALPHABET *a, double *sc);
-extern int esl_abc_IExpectScVec(const ESL_ALPHABET *a, int *sc, const float *p);
-extern int esl_abc_FExpectScVec(const ESL_ALPHABET *a, float *sc, const float *p);
-extern int esl_abc_DExpectScVec(const ESL_ALPHABET *a, double *sc, const double *p);
-extern int esl_abc_FCount (const ESL_ALPHABET *a, float *ct, ESL_DSQ x, float wt);
-extern int esl_abc_DCount (const ESL_ALPHABET *a, double *ct, ESL_DSQ x, double wt);
-extern int esl_abc_EncodeType (char *typestring);
-extern char *esl_abc_DecodeType (int type);
-extern int esl_abc_ValidateSeq(const ESL_ALPHABET *a, const char *seq, int64_t L, char *errbuf);
-
-/* In the tests below, remember the rules of order in internal alphabets:
-* Canonical alphabet Gap Degeneracies Any None Missing
-* 0..K-1 K K+1..Kp-4 (Kp-3) (Kp-2) (Kp-1)
-* ACGT - RYMKSWHBVD N * ~ DNA: K=4 Kp=18
-* ACDEFGHIKLMNPQRSTVWY - BJZOU X * ~ protein: K=20 Kp=29
-*
-* ESL_DSQ is an unsigned 8-bit type, so don't test for >= 0 or compilers will complain.
-*/
-#define esl_abc_DigitizeSymbol(a, c) ((a)->inmap[(int)c])
-#define esl_abc_XIsValid(a, x) ((x) < (a)->Kp)
-#define esl_abc_XIsResidue(a, x) ((x) < (a)->K || ((x) > (a)->K && (x) < (a)->Kp-2))
-#define esl_abc_XIsCanonical(a, x) ((x) < (a)->K)
-#define esl_abc_XIsGap(a, x) ((x) == (a)->K)
-#define esl_abc_XIsDegenerate(a, x) ((x) > (a)->K && (x) < (a)->Kp-2)
-#define esl_abc_XIsUnknown(a, x) ((x) == (a)->Kp-3)
-#define esl_abc_XIsNonresidue(a, x) ((x) == (a)->Kp-2)
-#define esl_abc_XIsMissing(a, x) ((x) == (a)->Kp-1)
-#define esl_abc_XGetGap(a) ((a)->K)
-#define esl_abc_XGetUnknown(a) ((a)->Kp-3)
-#define esl_abc_XGetNonresidue(a) ((a)->Kp-2)
-#define esl_abc_XGetMissing(a) ((a)->Kp-1)
-
-
-#define esl_abc_CIsValid(a, c) (isascii(c) && (a)->inmap[(int)c] < (a)->Kp)
-#define esl_abc_CIsResidue(a, c) ((a)->inmap[(int)c] < (a)->K || ((a)->inmap[(int)c] > (a)->K && (a)->inmap[(int)c] < (a)->Kp-2))
-#define esl_abc_CIsCanonical(a, c) ((a)->inmap[(int)c] < (a)->K)
-#define esl_abc_CIsGap(a, c) ((a)->inmap[(int)c] == (a)->K)
-#define esl_abc_CIsDegenerate(a, c) ((a)->inmap[(int)c] > (a)->K && (a)->inmap[(int)c] < (a)->Kp-2)
-#define esl_abc_CIsUnknown(a, c) ((a)->inmap[(int)c] == (a)->Kp-3)
-#define esl_abc_CIsNonresidue(a, c) ((a)->inmap[(int)c] == (a)->Kp-2)
-#define esl_abc_CIsMissing(a, c) ((a)->inmap[(int)c] == (a)->Kp-1)
-#define esl_abc_CGetGap(a) ((a)->sym[(int)(a)->K])
-#define esl_abc_CGetUnknown(a) ((a)->sym[(int)(a)->Kp-3])
-#define esl_abc_CGetNonresidue(a) ((a)->sym[(int)(a)->Kp-2])
-#define esl_abc_CGetMissing(a) ((a)->sym[(int)(a)->Kp-1])
-
-
-#endif /*!ESL_ALPHABET_INCLUDED*/
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_cluster.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_cluster.cpp
deleted file mode 100644
index 2e5e4b7..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_cluster.cpp
+++ /dev/null
@@ -1,197 +0,0 @@
-/* Generalized single linkage clustering.
-*
-* Table of contents:
-* 1. Single linkage clustering, generalized
-* 5. Copyright and license
-*
-* SRE, Mon Jan 7 09:21:56 2008 [Janelia] [HHGTTG]
-* SVN $Id: esl_cluster.c 269 2008-06-19 13:47:41Z eddys $
-*/
-
-#include <hmmer3/easel/esl_config.h>
-
-#include <stdlib.h>
-
-#include <hmmer3/easel/easel.h>
-#include "esl_cluster.h"
-
-
-/*****************************************************************
-* 1. Single linkage clustering, generalized
-*****************************************************************/
-
-/* Function: esl_cluster_SingleLinkage()
-* Synopsis: Generalized single linkage clustering.
-* Incept: SRE, Mon Jan 7 08:35:10 2008 [Janelia]
-*
-* Purpose: Given a set of vertices, cluster them by single-linkage
-* clustering.
-*
-* The data describing each vertex is provided in an array
-* starting at <base>, consisting of <n> vertices. Each
-* vertex can be of any type (structure, scalar, pointer)
-* so long as each vertex element is of fixed size <n>
-* bytes.
-*
-* A pointer to the clustering function is provided in
-* <(*linkfunc)()>, and a pointer to any necessary
-* parameters for that function (for example, any
-* thresholds) is provided in <param>.
-*
-* The <int (*linkfunc)()> must be written by the
-* caller. It takes arguments <(void *v1, void *v2, void
-* *param, int *ret_link)>: pointers to two vertices to
-* test for linkage and a pointer to any necessary
-* parameters, and it passes the answer <TRUE> (1) or
-* <FALSE> (0) back in <*ret_link>. The <(*linkfunc)()>
-* returns <eslOK> (0) on success, and a nonzero error code
-* on failure (see <easel.h> for a list of Easel's error
-* codes).
-*
-* The caller provides an allocated <workspace> with space
-* for at least <2n> integers. (Allocation in the caller
-* allows the caller to reuse memory and save
-* allocation/free cycles, if it has many rounds of
-* clustering to do.)
-*
-* The caller also provides allocated space in
-* <assignments> for <n> integers which, upon successful
-* return, contains assignments of the <0..n-1> vertices to
-* <0..C-1> clusters. That is, if <assignments[42] = 1>,
-* that means vertex 42 is assigned to cluster 1. The
-* total number of clusters is returned in <ret_C>.
-*
-* The algorithm runs in $O(N)$ memory; importantly, it
-* does not require a $O(N^2)$ adjacency matrix. Worst case
-* time complexity is $O(N^2)$ (multiplied by any
-* additional complexity in the <(*linkfunc()> itself), but
-* the worst case (no links at all; <C=n> clusters) should
-* be unusual. More typically, time scales as about $N \log
-* N$. Best case is $N$, for a completely connected graph
-* in which all vertices group into one cluster. (More
-* precisely, best case complexity arises when vertex 0 is
-* connected to all other <n-1> vertices.)
-*
-* Notes: I don't know if this algorithm is published. I
-* haven't seen it in graph theory books, but that might
-* be because it's so obvious that nobody's bothered.
-*
-* In brief, we're going to do a breadth-first search of the
-* graph, and we're going to calculate links on the fly
-* rather than precalculating them into a standard adjacency
-* matrix.
-*
-* While working, we keep two stacks of maximum length N:
-* a : list of vertices that are still unconnected.
-* b : list of vertices that we've connected to
-* in our current breadth level, but we haven't
-* yet tested for other connections to a.
-* The current length (number of elements in) a and b are
-* kept in na, nb.
-*
-* We store our results in an array of length N:
-* c : assigns each vertex to a component. for example
-* c[4] = 1 means that vertex 4 is in component 1.
-* nc is the number of components. Components
-* are numbered from 0 to nc-1. We return c and nc
-* to our caller.
-*
-* The algorithm is:
-*
-* Initialisation:
-* a <-- all the vertices
-* na <-- N
-* b <-- empty set
-* nb <-- 0
-* nc <-- 0
-*
-* Then:
-* while (a is not empty)
-* pop a vertex off a, push onto b
-* while (b is not empty)
-* pop vertex v off b
-* assign c[v] = nc
-* for each vertex w in a:
-* compare v,w. If w is linked to v, remove w
-* from a, push onto b.
-* nc++
-* q.e.d.
-*
-* Args: base - pointer to array of n fixed-size vertices to be clustered.
-* n - number of vertices
-* size - size of each vertex element
-* linkfunc - pointer to caller's function for defining linked pairs
-* param - pointer to any data that needs to be provided to <(*linkfunc)>
-* workspace - caller provides at least 2n*sizeof(int) of workspace
-* assignments - RETURN: assignments to clusters (caller provides n*sizeof(int) space)
-* ret_C - RETURN: number of clusters
-*
-* Returns: <eslOK> on success; <assignments[0..n-1]> contains cluster assigments
-* <0..C-1> for each vertex, and <*ret_C> contains the number of clusters
-* <C>
-*
-* Throws: status codes from the caller's <(*linkfunc)> on failure; in this case,
-* the contents of <*assignments> is undefined, and <*ret_C> is 0.
-*/
-int
-esl_cluster_SingleLinkage(void *base, size_t n, size_t size,
- int (*linkfunc)(const void *, const void *, const void *, int *), void *param,
- int *workspace, int *assignments, int *ret_C)
-{
- int na, *a = NULL; /* stack of available vertices (still unconnected) */
- int nb, *b = NULL; /* stack of connected but unextended vertices */
- int nc, *c = NULL; /* array of results: # clusters, assignments to clusters */
- int v,w; /* indices of vertices */
- int i; /* counter over the available list */
- int do_link;
- int status;
-
- a = workspace;
- b = workspace + n;
- c = assignments;
-
- for (v = 0; v < (int)n; v++) a[v] = n-v-1; /* initialize by pushing all vertices onto available list (backwards) */
- na = n;
- nb = 0;
- nc = 0;
-
- while (na > 0) /* while vertices remain unexamined or unclustered: */
- {
- v = a[na-1]; na--; /* pop a vertex off a, */
- b[nb] = v; nb++; /* and push it onto b */
-
- while (nb > 0) /* while vertices remain unextended: */
- {
- v = b[nb-1]; nb--; /* pop vertex off b */
- c[v] = nc; /* assign it to cluster nc */
- for (i = na-1; i >= 0; i--) /* backwards, because of deletion/swapping we do*/
- {
- if ((status = (*linkfunc)( (char *) base + v*size, (char *) base + a[i]*size, param, &do_link)) != eslOK) goto ERROR;
- if (do_link)
- {
- w = a[i]; a[i] = a[na-1]; na--; /* delete w from a */
- b[nb] = w; nb++; /* and push it onto b */
- }
- }
- }
- nc++;
- }
-
- *ret_C = nc;
- return eslOK;
-
-ERROR:
- *ret_C = 0;
- return status;
-}
-/*------------------ end, single linkage clustering -------------*/
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_cluster.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_cluster.h
deleted file mode 100644
index 5f8baa2..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_cluster.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* Generalized single linkage clustering.
- *
- * SRE, Mon Jan 7 09:40:06 2008 [Janelia]
- * SVN $Id: esl_cluster.h 231 2008-03-25 14:43:57Z eddys $
- */
-#ifndef ESL_CLUSTER_INCLUDED
-#define ESL_CLUSTER_INCLUDED
-
-extern int esl_cluster_SingleLinkage(void *base, size_t n, size_t size,
- int (*linkfunc)(const void *, const void *, const void *, int *), void *param,
- int *workspace, int *assignments, int *ret_C);
-#endif /*ESL_CLUSTER_INCLUDED*/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_config.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_config.h
deleted file mode 100644
index ac1e587..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_config.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/* easel/esl_config.h. Generated from esl_config.h.in by configure. */
-/* esl_config.h.in [input to configure]
-*
-* System-dependent configuration of Easel, by autoconf.
-*
-* This file should be included in all Easel .c files before
-* anything else, because it may set #define's that control
-* behaviour of system includes and system libraries. An example
-* is large file support.
-*
-* SVN $Id$
-* SRE, Fri Mar 3 08:03:32 2006 [St. Louis]
-*/
-#ifndef ESL_CONFIG_INCLUDED
-#define ESL_CONFIG_INCLUDED
-
-/* Version info.
-*/
-#define EASEL_VERSION "h3.0"
-#define EASEL_DATE "March 2010"
-#define EASEL_COPYRIGHT "Copyright (C) 2010 Howard Hughes Medical Institute."
-#define EASEL_LICENSE "Freely distributed under the Janelia Farm Software License."
-
-/* Large file support
-* Must precede any header file inclusion.
-*/
-/* #undef _FILE_OFFSET_BITS */
-/* #undef _LARGE_FILES */
-/* #undef _LARGEFILE_SOURCE */
-
-/* Debugging verbosity (0=none;3=most verbose)
-*/
-#define eslDEBUGLEVEL 0
-
-/* System headers
-*/
-//#define HAVE_UNISTD_H 1
-//#define HAVE_STDINT_H 1
-//#define HAVE_INTTYPES_H 1
-//#define HAVE_SYS_TYPES_H 1
-
-/* Types
-*/
-/* #undef WORDS_BIGENDIAN */
-/* #undef int8_t */
-/* #undef int16_t */
-/* #undef int32_t */
-/* #undef int64_t */
-/* #undef uint8_t */
-/* #undef uint16_t */
-/* #undef uint32_t */
-/* #undef uint64_t */
-/* #undef off_t */
-
-/* Optional packages
-*/
-/* #undef HAVE_LIBGSL */
-
-/* Optional parallel implementation support
-*/
-#define HAVE_SSE2 1
-/* #undef HAVE_VMX */
-/* #undef HAVE_MPI */
-
-#define HAVE_SSE2_CAST 1
-
-/* Functions
-*/
-//#define HAVE_MKSTEMP 1
-//#define HAVE_POPEN 1
-//#define HAVE_STRCASECMP 1
-//#define HAVE_TIMES 1
-//#define HAVE_GETPID 1
-//#define HAVE_FSEEKO 1
-
-/*****************************************************************
-* Available augmentations.
-*
-* If you grab a single module from Easel to use it by itself,
-* leave all these #undef'd; you have no augmentations.
-*
-* If you grab additional Easel .c files, you can enable any
-* augmentations they provide to other modules by #defining the
-* modules you have below. Alternatively, you can -D them on
-* the compile line, as in cc -DeslAUGMENT_SSI -DeslAUGMENT_MSA.
-*
-* If you compile and install the complete Easel library, all of these
-* get #defined automatically by ./configure, plus the eslLIBRARY flag
-* which means the full library with all augmentations is
-* available. So, if you steal files from an installed library, just
-* set these all back to #undef (depending on which files you have).
-*****************************************************************/
-#define eslLIBRARY 1
-
-#ifndef eslLIBRARY
-/* #undef eslAUGMENT_ALPHABET */
-/* #undef eslAUGMENT_DMATRIX */
-/* #undef eslAUGMENT_FILEPARSER */
-/* #undef eslAUGMENT_GEV */
-/* #undef eslAUGMENT_GUMBEL */
-/* #undef eslAUGMENT_HISTOGRAM */
-/* #undef eslAUGMENT_KEYHASH */
-/* #undef eslAUGMENT_MINIMIZER */
-/* #undef eslAUGMENT_MSA */
-/* #undef eslAUGMENT_RANDOM */
-/* #undef eslAUGMENT_SSI */
-/* #undef eslAUGMENT_STATS */
-#endif
-
-#ifdef eslLIBRARY
-#define eslAUGMENT_ALPHABET
-#define eslAUGMENT_DMATRIX
-
-// ! we don't need FILEPARSER module !
-//#define eslAUGMENT_FILEPARSER
-// ! we don't need GEV module !
-//#define eslAUGMENT_GEV
-// ! we don't need this define !
-//#define eslAUGMENT_GUMBEL
-#define eslAUGMENT_HISTOGRAM
-#define eslAUGMENT_KEYHASH
-#define eslAUGMENT_MINIMIZER
-#define eslAUGMENT_MSA
-#define eslAUGMENT_RANDOM
-
-// ! we don't need SSI module !
-//#define eslAUGMENT_SSI
-// ! we don't need this define !
-//#define eslAUGMENT_STATS
-#endif
-
-
-#endif /*ESL_CONFIG_INCLUDED*/
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_dirichlet.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_dirichlet.cpp
deleted file mode 100644
index 2cc8e9f..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_dirichlet.cpp
+++ /dev/null
@@ -1,1014 +0,0 @@
-/* esl_dirichlet.c
-* Functions relevant to Beta, Gamma, and Dirichlet densities,
-* and simple and mixture Dirichlet priors.
-*
-* SRE, Tue Nov 2 13:42:59 2004 [St. Louis]
- * SVN $Id: esl_dirichlet.c 440 2009-11-13 17:02:49Z eddys $
-*/
-
-#include <hmmer3/easel/esl_config.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_random.h>
-
-#include <hmmer3/easel/esl_vectorops.h>
-#include <hmmer3/easel/esl_stats.h>
-#include <hmmer3/easel/esl_dirichlet.h>
-
-/*****************************************************************
-*# 1. The <ESL_MIXDCHLET> object for mixture Dirichlet priors
-*****************************************************************/
-
-/* Function: esl_mixdchlet_Create()
-* Incept: SRE, Fri Apr 8 10:44:34 2005 [St. Louis]
-*
-* Purpose: Create a new mixture Dirichlet prior with <N> components,
-* each with <K> parameters.
-*
-* Returns: initialized <ESL_MIXDCHLET *> on success.
-*
-* Throws: NULL on allocation failure.
-*/
-ESL_MIXDCHLET *
-esl_mixdchlet_Create(int N, int K)
-{
- int status;
- ESL_MIXDCHLET *pri = NULL;
- int q;
-
- ESL_ALLOC_WITH_TYPE(pri, ESL_MIXDCHLET*, sizeof(ESL_MIXDCHLET));
- pri->pq = NULL;
- pri->alpha = NULL;
-
- ESL_ALLOC_WITH_TYPE(pri->pq, double*, sizeof(double) * N);
- ESL_ALLOC_WITH_TYPE(pri->alpha, double**, sizeof(double *) * N);
- pri->alpha[0] = NULL;
-
- ESL_ALLOC_WITH_TYPE(pri->alpha[0], double*, sizeof(double) * N * K);
- for (q = 1; q < N; q++)
- pri->alpha[q] = pri->alpha[0] + q*K;
-
- pri->N = N;
- pri->K = K;
- return pri;
-
-ERROR:
- esl_mixdchlet_Destroy(pri);
- return NULL;
-}
-
-/* Function: esl_mixdchlet_PerfectBipartiteMatchExists()
-* Synopsis: Given a 2D table representing presence of edges between vertices represented by
-* the rows and columns, test whether a perfect matching exists.
-* Note 1: this doesn't find a perfect matching, just checks if one exists.
-* Note 2: written as a private function for use by esl_mixdchlet_Compare
-* Incept: TW, Fri Nov 6 14:23:23 EST 2009 [janelia]
-*
-* Args: A - 2-dimensional square table representing presence of edges between vertices
-* N - size of that table
-*
-* Returns: <eslOK> if a perfect matching exists; <eslFAIL> otherwise.
-*/
-int
-esl_mixdchlet_PerfectBipartiteMatchExists(int **A, int N )
-{
- /*
- Basic idea:
- -Scan through the rows, and create a matching edge any time a row has only
- one matching column (i.e. a single column with eslOK value)
- * This is conservative: if the row isn't matched with this column, no perfect matching is possible.
- -Repeat, this time scanning columns.
- -Repeat rows then columns - until no rows or columns are found with a single eslOK value.
-
- -If a row or column is found with no possible matches, then no complete matching is possible.
- -If a point is reached where all rows and all columns have more than one match, I'm pretty sure a
- perfect matching is guaranteed.
- - This is unproven; the intuition is that for any imperfect matching an augmenting path
- should (I think) exist: it will contain an edge from one unmatched element to a matched
- element, followed by the existing edge from that element to it's mate, followed by a 2nd
- edge from that mate to another, and so on.
-
- It's a O(n^3) algorithm, though it'll typically run fast in practice
- */
- int * matched_row = new int[N];
- int * matched_col = new int[N];
- esl_vec_ISet(matched_row, N, 0);
- esl_vec_ISet(matched_col, N, 0);
-
- int i,j;
- int unassigned = N;
- int do_row = 1; // otherwise, do_column
- while (unassigned > 0) {
- int changed = 0;
-
- for (i=0; i<N; i++) {
- int match_cnt = 0;
- int match = -1;
-
- if ( 1 == (do_row == 1 ? matched_row[i] : matched_col[i]) ) continue;
-
- for (j=0; j<N; j++) {
- if ( eslOK == (do_row == 1 ? A[i][j] : A[j][i] ) ) {
- match_cnt++;
- match = j;
- }
- }
-
- if (match_cnt == 0) {
- delete[] matched_row; delete[] matched_col;
- return eslFAIL; // mixtures can't possibly match
- }
- if (match_cnt == 1) { // found a pair s.t. only this col can match this row within tol.
- changed++;
- if (do_row == 1 ) {
- matched_row[i] = matched_col[match] = 1;
- for (j=0; j<N; j++)
- A[j][match] = eslFAIL; // don't allow the matched col to match other rows, too.
- } else {
- matched_col[i] = matched_row[match] = 1;
- for (j=0; j<N; j++)
- A[match][j] = eslFAIL; // don't allow the matched rwo to match other cols, too.
- }
- }
- //if (match_cnt > 1), leave it for a later pass
- }
- unassigned -= changed;
-
- if (changed == 0) { // All had multiple hits, so (I think) we are guaranteed of being able to pick some mapping that will be legal
- delete[] matched_row; delete[] matched_col;
- return eslOK;
- }
- do_row = 1 - do_row; // flip value
-
- }
- //got here, all mapping must've been done
- delete[] matched_row; delete[] matched_col;
- return eslOK;
-}
-
-/* Function: esl_mixdchlet_Compare()
-* Synopsis: Compare two mixture Dirichlets for equality.
-* Incept: SRE, Sat May 30 09:37:40 2009 [Stockholm]
- * Modified: TW, Fri Nov 6 10:55:20 EST 2009 [janelia]
- * (the original comparison assumed that component order
- * was in agreement between the two mixtures. This need
- * not be the case for mixtures to be isomorphic)
-*
-* Purpose: Compare mixture Dirichlet objects <d1> and <d2>
-* for equality. For real numbered values, equality
-* is defined by <esl_DCompare()> with a fractional
-* tolerance <tol>.
-*
-* Returns: <eslOK> on equality; <eslFAIL> otherwise.
-*/
-int
-esl_mixdchlet_Compare(ESL_MIXDCHLET *d1, ESL_MIXDCHLET *d2, double tol)
-{
- int i,j;
- int status;
-
- if (d1->N != d2->N) return eslFAIL;
- if (d1->K != d2->K) return eslFAIL;
-
-
- //set up a 2-D matrix, to store the pairs of components that meet tolerance requirements
- int **A;
- ESL_ALLOC_WITH_TYPE(A, int**, d1->N * sizeof(int*));
- for (i=0; i<d1->N; i++)
- ESL_ALLOC_WITH_TYPE(A[i], int*, d1->N * sizeof(int) );
-
- // Fill in matrix - OK if component i from d1 is a viable match with component q from d2
- for (i=0; i<d1->N; i++)
- {
- for (j=0; j<d1->N; j++)
- {
- A[i][j] = esl_DCompare( d1->pq[i], d2->pq[j], tol);
- if (A[i][j] == eslOK)
- A[i][j] = esl_vec_DCompare(d1->alpha[i], d2->alpha[j], d1->K, tol) ;
- }
- }
-
- /* In most cases, there should be only a one-to-one mapping (if any), which is easy to test.
- But in the unlikely case of a many-to-one mapping, we need to do a little more.
- The problem amounts to asking whether there exists a perfect bipartite matching (aka the marriage problem)
- */
- status = esl_mixdchlet_PerfectBipartiteMatchExists( A, d1->N);
-
- ERROR:
-
- for (i=0; i<d1->N; i++)
- free (A[i]);
- free (A);
-
- return status;
-
-}
-
-/* Function: esl_mixdchlet_Copy()
-* Synopsis: Copies mixture dirichlet object <d> to <d_dst>.
-* Both objects are of size <N> and <K>.
-* <d> is unchanged.
-* Incept: ER, Thu Jun 18 10:30:06 2009 [Janelia]
-*
-* Purpose:
-*
-* Returns: <eslOK> on equality; <eslFAIL> otherwise.
-*/
-int
-esl_mixdchlet_Copy(ESL_MIXDCHLET *d, ESL_MIXDCHLET *d_dst)
-{
- int q;
-
- if (d->N != d_dst->N) return eslFAIL;
- if (d->K != d_dst->K) return eslFAIL;
-
- esl_vec_DCopy(d->pq, d->N, d_dst->pq);
-
- for (q = 0; q < d->N; q++)
- esl_vec_DCopy(d->alpha[q], d->K, d_dst->alpha[q]);
-
- return eslOK;
-}
-
-
-/* Function: esl_mixdchlet_Destroy()
-* Incept: SRE, Fri Apr 8 11:00:19 2005 [St. Louis]
-*
-* Purpose: Free's the mixture Dirichlet <pri>.
-*/
-void
-esl_mixdchlet_Destroy(ESL_MIXDCHLET *pri)
-{
- if (pri == NULL) return;
- if (pri->pq != NULL) free(pri->pq);
- if (pri->alpha != NULL) {
- if (pri->alpha[0] != NULL) free(pri->alpha[0]);
- free(pri->alpha);
- }
- free(pri);
-}
-
-
-/* Function: esl_mixdchlet_Dump()
-* Incept: ER, Fri Apr 8 11:00:19 2005 [Janelia]
-*
-* Purpose: Dump the mixture Dirichlet <d>.
-*/
-int
-esl_mixdchlet_Dump(FILE *fp, ESL_MIXDCHLET *d)
-{
- int q; /* counter over mixture components */
- int i; /* counter over params */
-
- fprintf(fp, "Mixture Dirichlet: N=%d K=%d\n", d->N, d->K);
- for (q = 0; q < d->N; q++) {
- printf("q[%d] %f\n", q, d->pq[q]);
- for (i = 0; i < d->K; i++)
- fprintf(fp, "alpha[%d][%d] %f\n", q, i, d->alpha[q][i]);
- }
-
- return eslOK;
-}
-
-
-/* Function: esl_mixdchlet_MPParameters()
-* Incept: SRE, Sat Apr 9 14:28:26 2005 [St. Louis]
-*
-* Purpose: Parameter estimation for a count vector <c> of cardinality
-* <K>, and a mixture Dirichlet prior <pri>. Calculates
-* mean posterior estimates for probability parameters, and
-* returns them in <p>. Also returns the posterior probabilities
-* of each Dirichlet mixture component, $P(q \mid c)$, in <mix>.
-* Caller must provide allocated space for <mix> and <p>, both
-* of length <K>.
-*
-* Returns: <eslOK> on success; <mix> contains posterior probabilities of
-* the Dirichlet components, and <p> contains mean posterior
-* probability parameter estimates.
-*
-* Throws: <esl_EINCOMPAT> if <pri> has different cardinality than <c>.
-*/
-int
-esl_mixdchlet_MPParameters(double *c, int K, ESL_MIXDCHLET *pri, double *mix, double *p)
-{
- int q; /* counter over mixture components */
- int x;
- double val;
- double totc;
- double tota;
-
- if (K != pri->K) {
- ESL_EXCEPTION(eslEINCOMPAT, "cvec's K != mixture Dirichlet's K");
- }
-
- /* Calculate mix[], the posterior probability
- * P(q | c) of mixture component q given the count vector c.
- */
- for (q = 0; q < pri->N; q++)
- if (pri->pq[q] > 0.0)
- {
- esl_dirichlet_LogProbData(c, pri->alpha[q], K, &val);
- mix[q] = val + log(pri->pq[q]);
- }
- else
- mix[q] = -HUGE_VAL;
- esl_vec_DLogNorm(mix, pri->N); /* mix[q] is now P(q|c) */
-
- totc = esl_vec_DSum(c, K);
- esl_vec_DSet(p, K, 0.);
- for (x = 0; x < K; x++)
- for (q = 0; q < pri->N; q++)
- {
- tota = esl_vec_DSum(pri->alpha[q], K);
- p[x] += mix[q] * (c[x] + pri->alpha[q][x]) / (totc + tota);
- }
- /* should be normalized already, but for good measure: */
- esl_vec_DNorm(p, K);
- return eslOK;
-}
-/*---------------- end, ESL_MIXDCHLET ---------------------------*/
-
-
-/*****************************************************************
-*# 2. Dirichlet likelihood functions
-*****************************************************************/
-
-/* Function: esl_dirichlet_LogProbData()
-* Incept: SRE, Tue Nov 2 14:22:37 2004 [St. Louis]
-*
-* Purpose: Given an observed count vector $c[0..K-1]$,
-* and a simple Dirichlet density parameterized by
-* $\alpha[0..K-1]$;
-* calculate $\log P(c \mid \alpha)$.
-*
-* This is $\int P(c \mid p) P(p \mid \alpha) dp$,
-* an integral that can be solved analytically.
-*
-* Args: c - count vector, [0..K-1]
-* alpha - Dirichlet parameters, [0..K-1]
-* K - size of c, alpha vectors
-* ret_answer - RETURN: log P(c | \alpha)
-*
-* Returns: <eslOK> on success, and puts result $\log P(c \mid \alpha)$
-* in <ret_answer>.
-*/
-int
-esl_dirichlet_LogProbData(double *c, double *alpha, int K, double *ret_answer)
-{
- double lnp;
- double sum1, sum2, sum3;
- double a1, a2, a3;
- int x;
-
- sum1 = sum2 = sum3 = lnp = 0.0;
- for (x = 0; x < K; x++)
- {
- sum1 += c[x] + alpha[x];
- sum2 += alpha[x];
- sum3 += c[x];
- esl_stats_LogGamma(alpha[x] + c[x], &a1);
- esl_stats_LogGamma(c[x] + 1., &a2);
- esl_stats_LogGamma(alpha[x], &a3);
- lnp += a1 - a2 - a3;
- }
- esl_stats_LogGamma(sum1, &a1);
- esl_stats_LogGamma(sum2, &a2);
- esl_stats_LogGamma(sum3 + 1., &a3);
- lnp += a2 + a3 - a1;
-
- *ret_answer = lnp;
- return eslOK;
-}
-
-/* Function: esl_dirichlet_LogProbData_Mixture()
-* Incept: ER, Wed Jun 17 14:41:23 EDT 2009 [janelia]
-*
-* Purpose: Given an observed count vector $c[0..K-1]$,
-* and a mixture Dirichlet density parameterized by
-* $\alpha_1[0..K-1]$ ... $\alpha_N[0..K-1]$,
-* calculate $\log \sum_i pq_i * P(c \mid \alpha_i)$.
-*
-*
-* Args: c - count vector, [0..K-1]
-* d - Dirichlet parameters, [0..K-1]
-* ret_answer - RETURN: log P(c | \alpha)
-*
-* Returns: <eslOK> on success, and puts result $\log P(c \mid \alpha)$
-* in <ret_answer>.
-*/
-int
-esl_dirichlet_LogProbData_Mixture(double *c, ESL_MIXDCHLET *d, double *ret_answer)
-{
- double *mixq = NULL;
- double lnp;
- double val;
- int q; /* counter over mixture components */
- int status;
-
- ESL_ALLOC_WITH_TYPE(mixq,double*, sizeof(double)*d->N);
-
- for (q = 0; q < d->N; q++) {
- esl_dirichlet_LogProbData(c, d->alpha[q], d->K, &val);
- mixq[q] = val + log(d->pq[q]);
- }
- lnp = esl_vec_DLogSum(mixq, d->N);
-
- free(mixq);
-
- *ret_answer = lnp;
- return eslOK;
-
-ERROR:
- free(mixq);
- return status;
-}
-
-/* esl_dirichlet_LogProbDataSet_Mixture()
-* Incept: TW, Wed Nov 4 14:10:22 EST 2009 [janelia]
-*
-* Purpose: Given an observed set of count vectors $c[0..N-1][0..K-1]$,
-* and a mixture Dirichlet density parameterized by
-* $\alpha_1[0..K-1]$ ... $\alpha_N[0..K-1]$,
-* calculate $ \sum_n \log \sum_i pq_i * P(c[n] \mid \alpha_i)$.
-* This is a convenience function, which simply wraps
-* esl_dirichlet_LogProbData_Mixture
-*
-*
-* Args: ntrials - number of count vectors (aka N)
-* counts - count vector set, [0..N-1][0..K-1]
-* md - Dirichlet parameters
-* ret_answer - RETURN: log P(c | \alpha)
-*
-* Returns: <eslOK> on success, and puts result $\log P(c \mid \alpha)$
-* in <ret_answer>.
-*/
-static int
-esl_dirichlet_LogProbDataSet_Mixture(int ntrials, double** counts, ESL_MIXDCHLET* md, double *ret_answer)
-{
- double val;
- int i;
-
- *ret_answer = 0;
- for (i = 0; i < ntrials; i++) {
- esl_dirichlet_LogProbData_Mixture(counts[i], md, &val);
- *ret_answer += val;
- }
- return eslOK;
-}
-
-/* Function: esl_dirichlet_LogProbProbs()
-* Incept: SRE, Sat Apr 9 14:35:17 2005 [St. Louis]
-*
-* Purpose: Given Dirichlet parameter vector <alpha> and a probability
-* vector <p>, both of cardinality <K>; return
-* $\log P(p \mid alpha)$.
-*
-* Returns: <eslOK> on success, and the result is in <ret_answer>.
-*
-* Xref: Sjolander (1996) appendix, lemma 2.
-*/
-int
-esl_dirichlet_LogProbProbs(double *p, double *alpha, int K, double *ret_answer)
-{
- double sum; /* for Gammln(|alpha|) in Z */
- double logp; /* RETURN: log P(p|alpha) */
- double val;
- int x;
-
- sum = logp = 0.0;
- for (x = 0; x < K; x++)
- if (p[x] > 0.0) /* any param that is == 0.0 doesn't exist */
- {
- esl_stats_LogGamma(alpha[x], &val);
- logp -= val;
- logp += (alpha[x]-1.0) * log(p[x]);
- sum += alpha[x];
- }
- esl_stats_LogGamma(sum, &val);
- logp += val;
- *ret_answer = logp;
- return eslOK;
-}
-/*----------- end, Dirichlet likelihood functions ---------------*/
-
-/*****************************************************************
-* Dirichlet Maximum likelihood fit from counts
-*****************************************************************/
-
-#ifdef eslAUGMENT_MINIMIZER
-/* This structure is used to sneak the data into minimizer's generic
-* (void *) API for all aux data
-*/
-struct mixdchlet_data {
- ESL_MIXDCHLET *d; /* the dirichlet mixture parameters */
- double **c; /* count vector array [0..nc-1][0..alphabet_size(d->K)] */
- int nc; /* number of count samples */
-};
-
-/*****************************************************************
-* Parameter vector packing/unpacking
-*
-* The conjugate gradient code takes a single parameter vector <p>,
-* where the values are unconstrained real numbers.
-*
-* We have a mixture Dirichlet with two kinds of parameters.
-* pq_i are mixture coefficients, constrained to be >= 0 and
-* \sum_i pq_i = 1. alpha^i_x are the Dirichlet parameters
-* for component i, constrained to be > 0.
-*
-* Our p's are therefore not only packed into a single vector;
-* they're reparameterized to implement the constraints:
-* for a Dirichlet parameter:
-* alpha = exp(p) p = log(alpha)
-* (thus, alpha > 0 for all real p)
-*
-* for a mixture coefficient:
-* pq = exp(-exp(p)) / \sum_a exp(-exp(p_a))
-* (thus, 0 < pq < 1 and \sum_a pq_a = 1, for all real p)
-*
-* In my hands (ER), this parametrization works better that
-* pq = exp(p) / \sum_a exp(p_a)
-*
-* Conjugate gradients optimizes the <p> parameter vector,
-* but we can convert that back out into a Dirichlet answer.
-*
-* The packing order is: the first N terms of a parameter vector are
-* the mixture coefficients pq_i. N different alpha_i vectors follow.
-*
-* [0 ... N-1] [0 ... K-1] [0 ... K-1] ...
-* q's alpha_0 alpha_1 ...
-*
-* In both functions below, p, pq, and alpha are all allocated
-* and free'd by the caller.
-* p : length N + N*K = N*(K+1) [0.. N*(K+1)-1]
-* pq : length N, [0..N-1]
-* alpha : length NxK, [0..N-1][0..K-1].
-*
- * Special cases:
- *
- * - For (N >= 1 && K == 1) there is nothing to optimize.
- *
- * - For (N == 1 && K > 1) the only variables to optimize are the K alphas
- *
-* [0 ... K-1]
-* alpha
-*
-* p : length N*K = N*K [0.. N*K-1]
-* alpha : length NxK, [0][0..K-1].
-*
-*/
-static void
-mixdchlet_pack_paramvector(double *p, int np, ESL_MIXDCHLET *d)
-{
- int nq; /* number the mixture components to optimize */
- int q; /* counter over mixture components */
- int x; /* counter in alphabet size */
-
- nq = (d->N > 1)? d->N : 0;
-
- /* the mixture coeficients */
- for (q = 0; q < nq; q++)
- p[q] = log(d->pq[q]);
- //p[q] = log(-log(d->pq[q])); TW changed to the above; this was causing fit to fail
-
- /* the dirichlet parameters */
- for (q = 0; q < d->N; q++)
- for (x = 0; x < d->K; x++)
- p[nq + q*d->K + x] = log(d->alpha[q][x]);
-
-}
-
-/* Same as above but in reverse: given parameter vector <p>,
-* do appropriate c.o.v. back to desired parameter space, and
-* update the mixdchlet <d>.
-*/
-static void
-mixdchlet_unpack_paramvector(double *p, int np, ESL_MIXDCHLET *d)
-{
- int nq; /* number the mixture components to optimize */
- int q; /* counter over mixture components */
- int x; /* counter in alphabet size */
-
- nq = (d->N > 1)? d->N : 0;
-
- /* the mixture coeficients */
- for (q = 0; q < nq; q++)
- d->pq[q] = exp(p[q]);
- //d->pq[q] = exp(-exp(p[q])); TW changed to the above; this was causing fit to fail
- esl_vec_DNorm(d->pq, d->N);
-
- /* the dirichlet parameters */
- for (q = 0; q < d->N; q++)
- for (x = 0; x < d->K; x++)
- d->alpha[q][x] = exp(p[nq + q*d->K + x]);
-
- /*esl_mixdchlet_Dump(stdout, d);*/
-
-}
-
-/* The log likelihood function to be optimized by ML fitting:
-* This needs to be careful of a case where a lambda = inf.
-*/
-static double
-mixdchlet_complete_func(double *p, int np, void *dptr)
-{
- struct mixdchlet_data *data = (struct mixdchlet_data *) dptr;
- ESL_MIXDCHLET *d = data->d;
- double logPsample;
- double logP = 0.;
- int m; /* counter over count samples */
-
- mixdchlet_unpack_paramvector(p, np, d);
-
- for (m = 0; m < data->nc; m++) {
- esl_dirichlet_LogProbData_Mixture(data->c[m], d, &logPsample);
- logP += logPsample;
- }
-
- if (std::isnan(logP)) esl_fatal("logP is NaN");
- return -logP;
-}
-
-/* The gradient of the NLL w.r.t. each free parameter in p.
- * Modified by ER 11/03/09 to compute derivative of log(alpha) instead of alpha
- * (committed by TW)
-*/
-static void
-mixdchlet_complete_gradient(double *p, int np, void *dptr, double *dp)
-{
- struct mixdchlet_data *data = (struct mixdchlet_data *) dptr;
- ESL_MIXDCHLET *d = data->d;
- double sum_alpha; /* \sum_x alpha[q][x] */
- double sum_c; /* \sum_x c[m][x] */
- double val; /* val is p_q * P(c_m | alpha_q) */
- double *valsum; /* valsum is sum_q [p_q * P(c_m | alpha_q)] */
- double term; /* term is q * P(alpha_q | c_m) */
- double psi1; /* Psi(sum_alpha[q]) */
- double psi2; /* Psi(sum_alpha[q] + sum_c[m]) */
- double psi3; /* Psi(sum_alpha[q][x]+ c[m][x]) */
- double psi4; /* Psi(sum_alpha[q][x]) */
- int nq; /* number the mixture components to optimize */
- int m; /* counter over count samples */
- int q; /* counter over mixture components */
- int x; /* counter in alphabet size */
-
- nq = (d->N > 1)? d->N : 0;
-
- mixdchlet_unpack_paramvector(p, np, d);
-
- /* initialize */
- valsum = (double*)malloc(sizeof(double) * data->nc);
- esl_vec_DSet(dp, np, 0.0);
-
- /* Some precalculation of sums for efficiency.
- * valsum is sum_q [p_q * P(c_m | alpha_q)]
- */
- for (m = 0; m < data->nc; m++)
- esl_dirichlet_LogProbData_Mixture(data->c[m], d, &(valsum[m]));
-
- for (q = 0; q < d->N; q++) {
-
- sum_alpha = esl_vec_DSum(d->alpha[q], d->K);
- esl_stats_Psi(sum_alpha, &psi1); /* psi1 = Psi(sum_alpha[q]) */
-
- for (m = 0; m < data->nc; m++) {
- sum_c = esl_vec_DSum(data->c[m], d->K);
- esl_stats_Psi(sum_alpha+sum_c, &psi2); /* psi2 = Psi(sum_alpha[q] + sum_c[m]) */
-
- /* val is pq * P(c_m | alpha_q) */
- esl_dirichlet_LogProbData(data->c[m], d->alpha[q], d->K, &val);
-
-
- /* derivative respect to the mixture coeficients */
- /* term is pq * P(alpha_q | c_m) */
- term = exp(val - valsum[m] + log(d->pq[q]));
- if (nq > 0) dp[q] += term - d->pq[q];
-
-
- /* derivative respect to the dirichlet parameters */
- for (x = 0; x < d->K; x++) {
- esl_stats_Psi(d->alpha[q][x]+data->c[m][x], &psi3); /* psi3 = Psi(sum_alpha[q][x]+ c[m][x]) */
- esl_stats_Psi(d->alpha[q][x], &psi4); /* psi4 = Psi(sum_alpha[q][x]+ c[m][x]) */
-
- dp[nq + q*d->K + x] += term * d->alpha[q][x] * (psi1 - psi2 + psi3 - psi4);
-
-
- }
- }
- }
-
-
-
- /* Return the negative, because we're minimizing the NLP, not maximizing.
- */
- for (q = 0; q < nq; q++) {
- if (std::isnan(dp[q])) esl_fatal("dp for pq[%d] is NaN", q);
- dp[q] *= -1.;
- }
- for (q = 0; q < d->N; q++)
- for (x = 0; x < d->K; x++) {
- if(std::isnan(dp[nq + q*d->K + x])) esl_fatal("dp for alpha[%d][%d] is NaN", q, x);
- dp[nq + q*d->K + x] *= -1.0;
- }
-
- free(valsum);
-}
-
-/* Function: esl_mixdchlet_Fit()
-* Incept: ER, Wed Jun 17 10:58:50 2009 [Janelia]
-*
-* Purpose: Given a count vector <c>, and an initial guess <d> for
-* a mixdchlet, find maximum likelihood parameters
-* by conjugate gradient descent optimization, starting
-* from <d> and leaving the final optimized solution in
-* <d>.
-*
-* Returns: <eslOK> on success, and <d> contains the fitted
-* mixdchlet parameters.
-*
-* Throws: <eslEMEM> on allocation error, and <d> is left in
-* in its initial state.
-*/
-int
-esl_mixdchlet_Fit(double **c, int nc, ESL_MIXDCHLET *d, int be_verbose)
-{
- struct mixdchlet_data data;
- double *p = NULL;
- double *u = NULL;
- double *wrk = NULL;
- double tol;
- double fx;
- int np; /* number of parameters to optimize */
- int nq; /* number the mixture components to optimize */
- int i;
- int status;
-
- /* nothing to optimize for a dirichlet of K = 1 (alphabet size = 1)*/
- if (d->K == 1) return eslOK;
-
- tol = 1e-6;
-
- /* Allocate parameters
- */
- nq = (d->N > 1)? d->N : 0;
- np = nq + d->N*d->K;
- ESL_ALLOC_WITH_TYPE(p, double*, sizeof(double) * np);
- ESL_ALLOC_WITH_TYPE(u, double*, sizeof(double) * np);
- ESL_ALLOC_WITH_TYPE(wrk, double*, sizeof(double) * np * 4);
-
- /* Copy shared info into the "data" structure
- */
- data.d = d;
- data.c = c;
- data.nc = nc;
-
- /* From d, create the parameter vector.
- */
- mixdchlet_pack_paramvector(p, np, d);
-
- /* Define the step size vector u.
- */
- for (i = 0; i < np; i++) u[i] = 0.1;
-
- /* Feed it all to the mighty optimizer.
- */
- status = esl_min_ConjugateGradientDescent(p, u, np,
- &mixdchlet_complete_func,
- &mixdchlet_complete_gradient,
- (void *) (&data), tol, wrk, &fx);
- if (status != eslOK && status != eslENOHALT) // eslENOHALT? Then take what we've got - it's probably pretty good
- goto ERROR;
-
- /* Convert the final parameter vector back to a mixdchlet
- */
- mixdchlet_unpack_paramvector(p, np, d);
-
- free(p);
- free(u);
- free(wrk);
- return eslOK;
-
-ERROR:
- if (p != NULL) free(p);
- if (u != NULL) free(u);
- if (wrk != NULL) free(wrk);
- return status;
-}
-
-
-/* Function: esl_mixdchlet_Fit_Multipass()
- * Incept: TW, Wed Nov 4 15:00:02 EST 2009 [Janelia]
- *
- * Purpose: Given a set of count vectors <c>, find maximum
- * likelihood mixdchlet parameters. A number <reps>
- * of initial guesses <d> for a mixdchlet are used,
- * with conjugate gradient descent performed for
- * each guess. The mixdchlet returned is the one
- * among these multiple local searches with
- * best likelihood. This is a convenience
- * function, which simply wraps esl_mixdchlet_Fit
- * for multiple start points.
- *
- * Args: r - pointer to random generator
- * c - set of count vectors, [0..M-1][0..N-1]
- * nc - number of count samples
- * reps - number of random starting points
- * best_md - an initialized mixdchlet, which will
- * contain the correct q and alpha values
- * at completion
- * verbose - if >0, output is verbose
- *
- * Returns: <eslOK> on success, and <best_md> contains the fitted
- * mixdchlet parameters with best likelihood.
- *
- * Throws: <eslEMEM> on allocation error, and <d> is left in
- * in its initial state.
- */
-int
-esl_mixdchlet_Fit_Multipass(ESL_RANDOMNESS *r, double **c, int nc, int reps, ESL_MIXDCHLET *best_md, int verbose)
-{
- int i, q, k, status;
- double best_lk = -eslINFINITY;
- double lk;
- ESL_MIXDCHLET *md = esl_mixdchlet_Create(best_md->N, best_md->K);
-
- int err_cnt = 0;
-
- for (i=0; i<reps; i++) {
-
- //for each pass, establish a new random starting point
- for (q = 0; q < md->N; q++) {
- md->pq[q] = esl_rnd_UniformPositive(r);
-
- for (k = 0; k < md->K; k++)
- md->alpha[q][k] = 10.0*esl_rnd_UniformPositive(r);
- }
- esl_vec_DNorm(md->pq, md->N);
-
- //then use Fit to do local search
- status = esl_mixdchlet_Fit(c, nc, md, 0);
- if (status != eslOK) {
- err_cnt++;
- if (err_cnt==2*reps) {
- goto ERROR;
- } else {
- i--; // try another starting point
- continue;
- }
- }
-
- esl_dirichlet_LogProbDataSet_Mixture (nc, c, md, &lk);
-
- if (verbose>0) {
- fprintf(stderr, "Repetition # %d\n------------\n", i);
- esl_mixdchlet_Dump(stderr, md);
- fprintf(stderr, "llk = %.3f (vs best = %.3f)\n", lk, best_lk);
- }
-
- if (lk > best_lk) {
- if (verbose>0)
- fprintf(stderr, "... so copy md -> best_md\n");
- best_lk = lk;
- esl_mixdchlet_Copy(md, best_md);
- }
- }
-
- if (verbose>0) {
- fprintf(stdout, "\n\n----------------\nbest mixture:\n");
- esl_mixdchlet_Dump(stdout, best_md);
- fprintf(stdout, "llk = %.3f", best_lk);
- }
-
-
- ERROR:
- if (md != NULL) free(md);
- return status;
-
-}
-
-#endif /*eslAUGMENT_MINIMIZER*/
-/*----------- end, Dirichlet Maximum likelihood fit from counts ---------------*/
-
-
-/*****************************************************************
-*# 3. Sampling from Dirichlets: requires <esl_random>
-*****************************************************************/
-#ifdef eslAUGMENT_RANDOM
-/* Function: esl_dirichlet_DSample()
-* Incept: SRE, Tue Nov 2 14:30:31 2004 [St. Louis]
-*
-* Purpose: Given a Dirichlet density parameterized by $\alpha[0..K-1]$,
-* sample a probability vector $p[0..K-1]$ from
-* $P(p \mid \alpha)$.
-*
-* Args: r - random number generation object
-* alpha - parameters of Dirichlet density [0..K-1]
-* K - vector size
-* p - RETURN: sampled probability vector
-* (caller allocates 0..K-1).
-*
-* Returns: <eslOK>, and <p> will contain the sampled vector.
-*/
-int
-esl_dirichlet_DSample(ESL_RANDOMNESS *r, double *alpha, int K, double *p)
-{
- int x;
-
- for (x = 0; x < K; x++)
- p[x] = esl_rnd_Gamma(r, alpha[x]);
- esl_vec_DNorm(p, K);
- return eslOK;
-}
-
-/* Function: esl_dirichlet_FSample()
-* Incept: SRE, Sat Jan 6 17:09:05 2007 [Casa de Gatos]
-*
-* Purpose: Same as <esl_dirichlet_DSample()>, except it
-* works in single-precision floats, not doubles.
-*/
-int
-esl_dirichlet_FSample(ESL_RANDOMNESS *r, float *alpha, int K, float *p)
-{
- int x;
-
- for (x = 0; x < K; x++)
- p[x] = (float) esl_rnd_Gamma(r, (double) alpha[x]);
- esl_vec_FNorm(p, K);
- return eslOK;
-}
-
-/* Function: esl_dirichlet_DSampleUniform()
-* Incept: SRE, Thu Aug 11 10:12:49 2005 [St. Louis]
-*
-* Purpose: Sample a probability vector $p[0..K-1]$ uniformly, by
-* sampling from a Dirichlet of $\alpha_i = 1.0 \forall i$.
-*
-* Args: r - source of random numbers
-* K - vector size
-* p - RETURN: sampled prob vector, caller alloc'ed 0..K-1
-*
-* Returns: <eslOK>, and <p> will contain the sampled vector.
-*
-* Throws: (no abnormal error conditions)
-*/
-int
-esl_dirichlet_DSampleUniform(ESL_RANDOMNESS *r, int K, double *p)
-{
- int x;
- for (x = 0; x < K; x++)
- p[x] = esl_rnd_Gamma(r, 1.0);
- esl_vec_DNorm(p, K);
- return eslOK;
-}
-
-/* Function: esl_dirichlet_FSampleUniform()
-* Incept: SRE, Sat Jan 6 17:10:54 2007 [Casa de Gatos]
-*
-* Purpose: Same as <esl_dirichlet_DSampleUniform()>, except it
-* works in single-precision floats, not doubles.
-*/
-int
-esl_dirichlet_FSampleUniform(ESL_RANDOMNESS *r, int K, float *p)
-{
- int x;
- for (x = 0; x < K; x++)
- p[x] = (float) esl_rnd_Gamma(r, 1.0);
- esl_vec_FNorm(p, K);
- return eslOK;
-}
-
-
-/* Function: esl_dirichlet_SampleBeta()
-* Incept: SRE, Sat Oct 25 12:20:31 2003 [Stanford]
-*
-* Purpose: Samples from a Beta(theta1, theta2) density, leaves answer
-* in <ret_answer>. (Special case of sampling Dirichlet.)
-*
-* Returns: <eslOK>.
-*/
-int
-esl_dirichlet_SampleBeta(ESL_RANDOMNESS *r, double theta1, double theta2, double *ret_answer)
-{
- double p, q;
-
- p = esl_rnd_Gamma(r, theta1);
- q = esl_rnd_Gamma(r, theta2);
- *ret_answer = p / (p+q);
- return eslOK;
-}
-#endif /*eslAUGMENT_RANDOM*/
-/*---------------- end, Dirichlet sampling ----------------------*/
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_dirichlet.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_dirichlet.h
deleted file mode 100644
index aa18334..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_dirichlet.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/* Functions relevant to Beta, gamma, and Dirichlet densities,
-* and simple and mixture Dirichlet priors.
-*
-* SRE, Tue Nov 2 14:35:06 2004 [St. Louis]
-* SVN $Id: esl_dirichlet.h 348 2009-06-19 19:09:26Z rivase $
-*/
-#ifndef ESL_DIRICHLET_INCLUDED
-#define ESL_DIRICHLET_INCLUDED
-
-#include <stdio.h>
-
-/* Structure: MIXDCHLET
-*
-* A mixture Dirichlet density, usually used as a prior
-* for a multinomial model (turning count vectors into probability
-* parameters).
-*/
-typedef struct {
- /*::cexcerpt::dirichlet_mixdchlet::begin::*/
- double *pq; /* mixture coefficients pq[0..N-1] */
- double **alpha; /* Dirichlet params alpha[0..N-1][0..K-1] */
- int N; /* number of mixtures, e.g. 9 for Sjolander */
- int K; /* alphabet size, e.g. 20 */
- /*::cexcerpt::dirichlet_mixdchlet::end::*/
-} ESL_MIXDCHLET;
-
-extern ESL_MIXDCHLET *esl_mixdchlet_Create(int N, int K);
-extern int esl_mixdchlet_Compare(ESL_MIXDCHLET *d1, ESL_MIXDCHLET *d2, double tol);
-extern int esl_mixdchlet_Copy(ESL_MIXDCHLET *d, ESL_MIXDCHLET *d_dst);
-extern int esl_mixdchlet_Dump(FILE *fp, ESL_MIXDCHLET *d);
-extern void esl_mixdchlet_Destroy(ESL_MIXDCHLET *pri);
-extern int esl_mixdchlet_MPParameters(double *c, int K,
- ESL_MIXDCHLET *pri, double *mix,
- double *p);
-
-extern int esl_dirichlet_LogProbData(double *c, double *alpha, int K,
- double *ret_answer);
-extern int esl_dirichlet_LogProbData_Mixture(double *c, ESL_MIXDCHLET *d,
- double *ret_answer);
-extern int esl_dirichlet_LogProbProbs(double *p, double *alpha, int K,
- double *ret_answer);
-
-/* Optional fitting code, when augmented by minimizing module.
-*/
-#ifdef eslAUGMENT_MINIMIZER
-#include "esl_minimizer.h"
-extern int esl_mixdchlet_Fit(double **c, int nc, ESL_MIXDCHLET *d, int be_verbose);
-
-#endif /*eslAUGMENT_MINIMIZER*/
-
-/* Optional sampling code, when augmented by random module.
-*/
-#ifdef eslAUGMENT_RANDOM
-#include <hmmer3/easel/esl_random.h>
-
-extern int esl_dirichlet_DSample(ESL_RANDOMNESS *r, double *alpha, int K, double *p);
-extern int esl_dirichlet_FSample(ESL_RANDOMNESS *r, float *alpha, int K, float *p);
-extern int esl_dirichlet_DSampleUniform(ESL_RANDOMNESS *r, int K, double *p);
-extern int esl_dirichlet_FSampleUniform(ESL_RANDOMNESS *r, int K, float *p);
-extern int esl_dirichlet_SampleBeta(ESL_RANDOMNESS *r, double theta1,
- double theta2, double *ret_answer);
-#endif /*eslAUGMENT_RANDOM*/
-
-
-
-#endif /*!ESL_DIRICHLET_INCLUDED*/
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_distance.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_distance.cpp
deleted file mode 100644
index abf156f..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_distance.cpp
+++ /dev/null
@@ -1,894 +0,0 @@
-/* Pairwise identities, distances, and distance matrices.
-*
-* Contents:
-* 1. Pairwise distances for aligned text sequences.
-* 2. Pairwise distances for aligned digital seqs. [alphabet]
-* 3. Distance matrices for aligned text sequences. [dmatrix]
-* 4. Distance matrices for aligned digital sequences. [alphabet,dmatrix]
-* 5. Average pairwise identity for multiple alignments. [alphabet,random]
-* 6. Private (static) functions.
-* 10. Copyright notice and license.
-*
- * SVN $Id: esl_distance.c 389 2009-09-11 17:53:26Z eddys $
-* SRE, Mon Apr 17 20:05:43 2006 [St. Louis]
-*/
-
-#include <hmmer3/easel/esl_config.h>
-
-#include <ctype.h>
-#include <string.h>
-#include <math.h>
-
-#include <hmmer3/easel/easel.h>
-#ifdef eslAUGMENT_ALPHABET
-#include <hmmer3/easel/esl_alphabet.h>
-#endif
-#ifdef eslAUGMENT_DMATRIX
-#include <hmmer3/easel/esl_dmatrix.h>
-#endif
-#ifdef eslAUGMENT_RANDOM
-#include <hmmer3/easel/esl_random.h>
-#endif
-#include "esl_distance.h"
-
-/* Forward declaration of our static functions.
-*/
-static int jukescantor(int n1, int n2, int alphabet_size, double *opt_distance, double *opt_variance);
-
-
-/*****************************************************************
-* 1. Pairwise distances for aligned text sequences.
-*****************************************************************/
-
-/* Function: esl_dst_CPairId()
-* Synopsis: Pairwise identity of two aligned text strings.
-* Incept: SRE, Mon Apr 17 20:06:07 2006 [St. Louis]
-*
-* Purpose: Calculates pairwise fractional identity between two
-* aligned character strings <asq1> and <asq2>.
-* Return this distance in <opt_pid>; return the
-* number of identities counted in <opt_nid>; and
-* return the denominator <MIN(len1,len2)> in
-* <opt_n>.
-*
-* Alphabetic symbols <[a-zA-Z]> are compared
-* case-insensitively for identity. Any nonalphabetic
-* character is assumed to be a gap symbol.
-*
-* This simple comparison rule is unaware of synonyms and
-* degeneracies in biological alphabets. For a more
-* sophisticated and biosequence-aware comparison, use
-* digitized sequences and the <esl_dst_XPairId()> function
-* instead.
-*
-* Args: asq1 - aligned character string 1
-* asq2 - aligned character string 2
-* opt_pid - optRETURN: pairwise identity, 0<=x<=1
-* opt_nid - optRETURN: # of identities
-* opt_n - optRETURN: denominator MIN(len1,len2)
-*
-* Returns: <eslOK> on success. <opt_pid>, <opt_nid>, <opt_n>
-* contain the answers (for whichever were passed non-NULL).
-*
-* Throws: <eslEINVAL> if the strings are different lengths
-* (not aligned).
-*/
-int
-esl_dst_CPairId(const char *asq1, const char *asq2,
- double *opt_pid, int *opt_nid, int *opt_n)
-{
- int status;
- int idents; /* total identical positions */
- int len1, len2; /* lengths of seqs */
- int i; /* position in aligned seqs */
-
- idents = len1 = len2 = 0;
- for (i = 0; asq1[i] != '\0' && asq2[i] != '\0'; i++)
- {
- if (isalpha(asq1[i])) len1++;
- if (isalpha(asq2[i])) len2++;
- if (isalpha(asq1[i]) && isalpha(asq2[i])
- && toupper(asq1[i]) == toupper(asq2[i]))
- idents++;
- }
- if (asq1[i] != '\0' || asq2[i] != '\0')
- ESL_XEXCEPTION(eslEINVAL, "strings not same length, not aligned");
-
- if (opt_pid != NULL) *opt_pid = ( len1==0 ? 0. : (double) idents / (double) ESL_MIN(len1,len2));
- if (opt_nid != NULL) *opt_nid = idents;
- if (opt_n != NULL) *opt_n = len1;
- return eslOK;
-
-ERROR:
- if (opt_pid != NULL) *opt_pid = 0.;
- if (opt_nid != NULL) *opt_nid = 0;
- if (opt_n != NULL) *opt_n = 0;
- return status;
-}
-
-
-/* Function: esl_dst_CJukesCantor()
-* Synopsis: Jukes-Cantor distance for two aligned strings.
-* Incept: SRE, Tue Apr 18 14:00:37 2006 [St. Louis]
-*
-* Purpose: Calculate the generalized Jukes-Cantor distance between
-* two aligned character strings <as1> and <as2>, in
-* substitutions/site, for an alphabet of <K> residues
-* (<K=4> for nucleic acid, <K=20> for proteins). The
-* maximum likelihood estimate for the distance is
-* optionally returned in <opt_distance>. The large-sample
-* variance for the distance estimate is
-* optionally returned in <opt_variance>.
-*
-* Alphabetic symbols <[a-zA-Z]> are compared
-* case-insensitively to count the number of identities
-* (<n1>) and mismatches (<n2>>). Any nonalphabetic
-* character is assumed to be a gap symbol, and aligned
-* columns containing gap symbols are ignored. The
-* fractional difference <D> used to calculate the
-* Jukes/Cantor distance is <n2/n1+n2>.
-*
-* Args: K - size of the alphabet (4 or 20)
-* as1 - 1st aligned seq, 0..L-1, \0-terminated
-* as2 - 2nd aligned seq, 0..L-1, \0-terminated
-* opt_distance - optRETURN: ML estimate of distance d
-* opt_variance - optRETURN: large-sample variance of d
-*
-* Returns: <eslOK> on success.
-*
-* Infinite distances are possible, in which case distance
-* and variance are both <HUGE_VAL>. Caller has to deal
-* with this case as it sees fit, perhaps by enforcing
-* an arbitrary maximum distance.
-*
-* Throws: <eslEINVAL> if the two strings aren't the same length (and
-* thus can't have been properly aligned).
-* <eslEDIVZERO> if no aligned residues were counted.
-* On either failure, distance and variance are both returned
-* as <HUGE_VAL>.
-*/
-int
-esl_dst_CJukesCantor(int K, const char *as1, const char *as2,
- double *opt_distance, double *opt_variance)
-{
- int status;
- int n1, n2; /* number of observed identities, substitutions */
- int i; /* position in aligned seqs */
-
- /* 1. Count identities, mismatches.
- */
- n1 = n2 = 0;
- for (i = 0; as1[i] != '\0' && as2[i] != '\0'; i++)
- {
- if (isalpha(as1[i]) && isalpha(as2[i]))
- {
- if (toupper(as1[i]) == toupper(as2[i])) n1++; else n2++;
- }
- }
- if (as1[i] != '\0' || as2[i] != '\0')
- ESL_XEXCEPTION(eslEINVAL, "strings not same length, not aligned");
-
- return jukescantor(n1, n2, K, opt_distance, opt_variance); /* can throw eslEDIVZERO */
-
-ERROR:
- if (opt_distance != NULL) *opt_distance = HUGE_VAL;
- if (opt_variance != NULL) *opt_variance = HUGE_VAL;
- return status;
-}
-
-/*------- end, pairwise distances for aligned text seqs ---------*/
-
-
-
-
-
-/*****************************************************************
-* 2. Pairwise distances for aligned digitized sequences. [alphabet]
-*****************************************************************/
-#ifdef eslAUGMENT_ALPHABET
-
-/* Function: esl_dst_XPairId()
-* Synopsis: Pairwise identity of two aligned digital seqs.
-* Incept: SRE, Tue Apr 18 09:24:05 2006 [St. Louis]
-*
-* Purpose: Digital version of <esl_dst_PairId()>: <adsq1> and
-* <adsq2> are digitized aligned sequences, in alphabet
-* <abc>. Otherwise, same as <esl_dst_PairId()>.
-*
-* Args: abc - digital alphabet in use
-* ax1 - aligned digital seq 1
-* ax2 - aligned digital seq 2
-* opt_pid - optRETURN: pairwise identity, 0<=x<=1
-* opt_nid - optRETURN: # of identities
-* opt_n - optRETURN: denominator MIN(len1,len2)
-*
-* Returns: <eslOK> on success. <opt_distance>, <opt_nid>, <opt_n>
-* contain the answers, for any of these that were passed
-* non-<NULL> pointers.
-*
-* Throws: <eslEINVAL> if the strings are different lengths (not aligned).
-*/
-int
-esl_dst_XPairId(const ESL_ALPHABET *abc, const ESL_DSQ *ax1, const ESL_DSQ *ax2,
- double *opt_distance, int *opt_nid, int *opt_n)
-{
- int status;
- int idents; /* total identical positions */
- int len1, len2; /* lengths of seqs */
- int i; /* position in aligned seqs */
-
- idents = len1 = len2 = 0;
- for (i = 1; ax1[i] != eslDSQ_SENTINEL && ax2[i] != eslDSQ_SENTINEL; i++)
- {
- if (esl_abc_XIsCanonical(abc, ax1[i])) len1++;
- if (esl_abc_XIsCanonical(abc, ax2[i])) len2++;
-
- if (esl_abc_XIsCanonical(abc, ax1[i]) && esl_abc_XIsCanonical(abc, ax2[i])
- && ax1[i] == ax2[i])
- idents++;
- }
- if (len2 < len1) len1 = len2;
-
- if (ax1[i] != eslDSQ_SENTINEL || ax2[i] != eslDSQ_SENTINEL)
- ESL_XEXCEPTION(eslEINVAL, "strings not same length, not aligned");
-
- if (opt_distance != NULL) *opt_distance = ( len1==0 ? 0. : (double) idents / (double) len1 );
- if (opt_nid != NULL) *opt_nid = idents;
- if (opt_n != NULL) *opt_n = len1;
- return eslOK;
-
-ERROR:
- if (opt_distance != NULL) *opt_distance = 0.;
- if (opt_nid != NULL) *opt_nid = 0;
- if (opt_n != NULL) *opt_n = 0;
- return status;
-}
-
-
-/* Function: esl_dst_XJukesCantor()
-* Synopsis: Jukes-Cantor distance for two aligned digitized seqs.
-* Incept: SRE, Tue Apr 18 15:26:51 2006 [St. Louis]
-*
-* Purpose: Calculate the generalized Jukes-Cantor distance between two
-* aligned digital strings <ax> and <ay>, in substitutions/site,
-* using alphabet <abc> to evaluate identities and differences.
-* The maximum likelihood estimate for the distance is optionally returned in
-* <opt_distance>. The large-sample variance for the distance
-* estimate is optionally returned in <opt_variance>.
-*
-* Identical to <esl_dst_CJukesCantor()>, except that it takes
-* digital sequences instead of character strings.
-*
-* Args: abc - bioalphabet to use for comparisons
-* ax - 1st digital aligned seq
-* ay - 2nd digital aligned seq
-* opt_distance - optRETURN: ML estimate of distance d
-* opt_variance - optRETURN: large-sample variance of d
-*
-* Returns: <eslOK> on success. As in <esl_dst_CJukesCantor()>, the
-* distance and variance may be infinite, in which case they
-* are returned as <HUGE_VAL>.
-*
-* Throws: <eslEINVAL> if the two strings aren't the same length (and
-* thus can't have been properly aligned).
-* <eslEDIVZERO> if no aligned residues were counted.
-* On either failure, the distance and variance are set
-* to <HUGE_VAL>.
-*/
-int
-esl_dst_XJukesCantor(const ESL_ALPHABET *abc, const ESL_DSQ *ax, const ESL_DSQ *ay,
- double *opt_distance, double *opt_variance)
-{
- int status;
- int n1, n2; /* number of observed identities, substitutions */
- int i; /* position in aligned seqs */
-
- n1 = n2 = 0;
- for (i = 1; ax[i] != eslDSQ_SENTINEL && ay[i] != eslDSQ_SENTINEL; i++)
- {
- if (esl_abc_XIsCanonical(abc, ax[i]) && esl_abc_XIsCanonical(abc, ay[i]))
- {
- if (ax[i] == ay[i]) n1++;
- else n2++;
- }
- }
- if (ax[i] != eslDSQ_SENTINEL || ay[i] != eslDSQ_SENTINEL)
- ESL_XEXCEPTION(eslEINVAL, "strings not same length, not aligned");
-
- return jukescantor(n1, n2, abc->K, opt_distance, opt_variance);
-
-ERROR:
- if (opt_distance != NULL) *opt_distance = HUGE_VAL;
- if (opt_variance != NULL) *opt_variance = HUGE_VAL;
- return status;
-}
-
-#endif /*eslAUGMENT_ALPHABET*/
-/*---------- end pairwise distances, digital seqs --------------*/
-
-
-
-
-/*****************************************************************
-* 3. Distance matrices for aligned text sequences.
-*****************************************************************/
-#ifdef eslAUGMENT_DMATRIX
-
-/* Function: esl_dst_CPairIdMx()
-* Synopsis: NxN identity matrix for N aligned text sequences.
-* Incept: SRE, Thu Apr 27 08:46:08 2006 [New York]
-*
-* Purpose: Given a multiple sequence alignment <as>, consisting
-* of <N> aligned character strings; calculate
-* a symmetric fractional pairwise identity matrix by $N(N-1)/2$
-* calls to <esl_dst_CPairId()>, and return it in
-* <ret_D>.
-*
-* Args: as - aligned seqs (all same length), [0..N-1]
-* N - # of aligned sequences
-* ret_S - RETURN: symmetric fractional identity matrix
-*
-* Returns: <eslOK> on success, and <ret_S> contains the fractional
-* identity matrix. Caller free's <S> with
-* <esl_dmatrix_Destroy()>.
-*
-* Throws: <eslEINVAL> if a seq has a different
-* length than others. On failure, <ret_D> is returned <NULL>
-* and state of inputs is unchanged.
-*/
-int
-esl_dst_CPairIdMx(char **as, int N, ESL_DMATRIX **ret_S)
-{
- ESL_DMATRIX *S = NULL;
- int status = 0;
- int i,j;
-
- if (( S = esl_dmatrix_Create(N,N) ) == NULL) goto ERROR;
-
- for (i = 0; i < N; i++)
- {
- S->mx[i][i] = 1.;
- for (j = i+1; j < N; j++)
- {
- status = esl_dst_CPairId(as[i], as[j], &(S->mx[i][j]), NULL, NULL);
- if (status != eslOK)
- ESL_XEXCEPTION(status, "Pairwise identity calculation failed at seqs %d,%d\n", i,j);
- S->mx[j][i] = S->mx[i][j];
- }
- }
- if (ret_S != NULL) *ret_S = S; else esl_dmatrix_Destroy(S);
- return eslOK;
-
-ERROR:
- if (S != NULL) esl_dmatrix_Destroy(S);
- if (ret_S != NULL) *ret_S = NULL;
- return status;
-}
-
-
-/* Function: esl_dst_CDiffMx()
-* Synopsis: NxN difference matrix for N aligned text sequences.
-* Incept: SRE, Fri Apr 28 06:27:20 2006 [New York]
-*
-* Purpose: Same as <esl_dst_CPairIdMx()>, but calculates
-* the fractional difference <d=1-s> instead of the
-* fractional identity <s> for each pair.
-*
-* Args: as - aligned seqs (all same length), [0..N-1]
-* N - # of aligned sequences
-* ret_D - RETURN: symmetric fractional difference matrix
-*
-* Returns: <eslOK> on success, and <ret_D> contains the
-* fractional difference matrix. Caller free's <D> with
-* <esl_dmatrix_Destroy()>.
-*
-* Throws: <eslEINVAL> if any seq has a different
-* length than others. On failure, <ret_D> is returned <NULL>
-* and state of inputs is unchanged.
-*/
-int
-esl_dst_CDiffMx(char **as, int N, ESL_DMATRIX **ret_D)
-{
- ESL_DMATRIX *D = NULL;
- int status;
- int i,j;
-
- status = esl_dst_CPairIdMx(as, N, &D);
- if (status != eslOK) goto ERROR;
-
- for (i = 0; i < N; i++)
- {
- D->mx[i][i] = 0.;
- for (j = i+1; j < N; j++)
- {
- D->mx[i][j] = 1. - D->mx[i][j];
- D->mx[j][i] = D->mx[i][j];
- }
- }
-
- if (ret_D != NULL) *ret_D = D; else esl_dmatrix_Destroy(D);
- return eslOK;
-
-ERROR:
- if (D != NULL) esl_dmatrix_Destroy(D);
- if (ret_D != NULL) *ret_D = NULL;
- return status;
-
-}
-
-/* Function: esl_dst_CJukesCantorMx()
-* Synopsis: NxN Jukes/Cantor distance matrix for N aligned text seqs.
-* Incept: SRE, Tue Apr 18 16:00:16 2006 [St. Louis]
-*
-* Purpose: Given a multiple sequence alignment <aseq>, consisting of
-* <nseq> aligned character sequences in an alphabet of
-* <K> letters (usually 4 for DNA, 20 for protein);
-* calculate a symmetric Jukes/Cantor pairwise distance
-* matrix for all sequence pairs, and optionally return the distance
-* matrix in <ret_D>, and optionally return a symmetric matrix of the
-* large-sample variances for those ML distance estimates
-* in <ret_V>.
-*
-* Infinite distances (and variances) are possible; they
-* are represented as <HUGE_VAL> in <D> and <V>. Caller must
-* be prepared to deal with them as appropriate.
-*
-* Args: K - size of the alphabet (usually 4 or 20)
-* aseq - aligned sequences [0.nseq-1][0..L-1]
-* nseq - number of aseqs
-* opt_D - optRETURN: [0..nseq-1]x[0..nseq-1] symmetric distance mx
-* opt_V - optRETURN: matrix of variances.
-*
-* Returns: <eslOK> on success. <D> and <V> contain the
-* distance matrix (and variances); caller frees these with
-* <esl_dmatrix_Destroy()>.
-*
-* Throws: <eslEINVAL> if any pair of sequences have differing lengths
-* (and thus cannot have been properly aligned).
-* <eslEDIVZERO> if some pair of sequences had no aligned
-* residues. On failure, <D> and <V> are both returned <NULL>
-* and state of inputs is unchanged.
-*/
-int
-esl_dst_CJukesCantorMx(int K, char **aseq, int nseq,
- ESL_DMATRIX **opt_D, ESL_DMATRIX **opt_V)
-{
- int status;
- ESL_DMATRIX *D = NULL;
- ESL_DMATRIX *V = NULL;
- int i,j;
-
- if (( D = esl_dmatrix_Create(nseq, nseq) ) == NULL) goto ERROR;
- if (( V = esl_dmatrix_Create(nseq, nseq) ) == NULL) goto ERROR;
-
- for (i = 0; i < nseq; i++)
- {
- D->mx[i][i] = 0.;
- V->mx[i][i] = 0.;
- for (j = i+1; j < nseq; j++)
- {
- status = esl_dst_CJukesCantor(K, aseq[i], aseq[j],
- &(D->mx[i][j]), &(V->mx[i][j]));
- if (status != eslOK)
- ESL_XEXCEPTION(status, "J/C calculation failed at seqs %d,%d", i,j);
-
- D->mx[j][i] = D->mx[i][j];
- V->mx[j][i] = V->mx[i][j];
- }
- }
- if (opt_D != NULL) *opt_D = D; else esl_dmatrix_Destroy(D);
- if (opt_V != NULL) *opt_V = V; else esl_dmatrix_Destroy(V);
- return eslOK;
-
-ERROR:
- if (D != NULL) esl_dmatrix_Destroy(D);
- if (V != NULL) esl_dmatrix_Destroy(V);
- if (opt_D != NULL) *opt_D = NULL;
- if (opt_V != NULL) *opt_V = NULL;
- return status;
-}
-
-#endif /*eslAUGMENT_DMATRIX*/
-/*----------- end, distance matrices for aligned text seqs ---------*/
-
-
-
-
-/*****************************************************************
-* 4. Distance matrices for aligned digital sequences.
-*****************************************************************/
-#if defined(eslAUGMENT_ALPHABET) && defined(eslAUGMENT_DMATRIX)
-
-
-/* Function: esl_dst_XPairIdMx()
-* Synopsis: NxN identity matrix for N aligned digital seqs.
-* Incept: SRE, Thu Apr 27 09:08:11 2006 [New York]
-*
-* Purpose: Given a digitized multiple sequence alignment <ax>, consisting
-* of <N> aligned digital sequences in alphabet <abc>; calculate
-* a symmetric pairwise fractional identity matrix by $N(N-1)/2$
-* calls to <esl_dst_XPairId()>, and return it in <ret_S>.
-*
-* Args: abc - digital alphabet in use
-* ax - aligned dsq's, [0..N-1][1..alen]
-* N - number of aligned sequences
-* ret_S - RETURN: NxN matrix of fractional identities
-*
-* Returns: <eslOK> on success, and <ret_S> contains the distance
-* matrix. Caller is obligated to free <S> with
-* <esl_dmatrix_Destroy()>.
-*
-* Throws: <eslEINVAL> if a seq has a different
-* length than others. On failure, <ret_S> is returned <NULL>
-* and state of inputs is unchanged.
-*/
-int
-esl_dst_XPairIdMx(const ESL_ALPHABET *abc, ESL_DSQ **ax, int N, ESL_DMATRIX **ret_S)
-{
- int status = 0;
- ESL_DMATRIX *S = NULL;
- int i,j;
-
- if (( S = esl_dmatrix_Create(N,N) ) == NULL) goto ERROR;
-
- for (i = 0; i < N; i++)
- {
- S->mx[i][i] = 1.;
- for (j = i+1; j < N; j++)
- {
- status = esl_dst_XPairId(abc, ax[i], ax[j], &(S->mx[i][j]), NULL, NULL);
- if (status != eslOK)
- ESL_XEXCEPTION(status, "Pairwise identity calculation failed at seqs %d,%d\n", i,j);
- S->mx[j][i] = S->mx[i][j];
- }
- }
- if (ret_S != NULL) *ret_S = S; else esl_dmatrix_Destroy(S);
- return eslOK;
-
-ERROR:
- if (S != NULL) esl_dmatrix_Destroy(S);
- if (ret_S != NULL) *ret_S = NULL;
- return status;
-}
-
-
-/* Function: esl_dst_XDiffMx()
-* Synopsis: NxN difference matrix for N aligned digital seqs.
-* Incept: SRE, Fri Apr 28 06:37:29 2006 [New York]
-*
-* Purpose: Same as <esl_dst_XPairIdMx()>, but calculates fractional
-* difference <1-s> instead of fractional identity <s> for
-* each pair.
-*
-* Args: abc - digital alphabet in use
-* ax - aligned dsq's, [0..N-1][1..alen]
-* N - number of aligned sequences
-* ret_D - RETURN: NxN matrix of fractional differences
-*
-* Returns: <eslOK> on success, and <ret_D> contains the difference
-* matrix; caller is obligated to free <D> with
-* <esl_dmatrix_Destroy()>.
-*
-* Throws: <eslEINVAL> if a seq has a different
-* length than others. On failure, <ret_D> is returned <NULL>
-* and state of inputs is unchanged.
-*/
-int
-esl_dst_XDiffMx(const ESL_ALPHABET *abc, ESL_DSQ **ax, int N, ESL_DMATRIX **ret_D)
-{
- int status;
- ESL_DMATRIX *D = NULL;
- int i,j;
-
- status = esl_dst_XPairIdMx(abc, ax, N, &D);
- if (status != eslOK) goto ERROR;
-
- for (i = 0; i < N; i++)
- {
- D->mx[i][i] = 0.;
- for (j = i+1; j < N; j++)
- {
- D->mx[i][j] = 1. - D->mx[i][j];
- D->mx[j][i] = D->mx[i][j];
- }
- }
- if (ret_D != NULL) *ret_D = D; else esl_dmatrix_Destroy(D);
- return eslOK;
-
-ERROR:
- if (D != NULL) esl_dmatrix_Destroy(D);
- if (ret_D != NULL) *ret_D = NULL;
- return status;
-}
-
-/* Function: esl_dst_XJukesCantorMx()
-* Synopsis: NxN Jukes/Cantor distance matrix for N aligned digital seqs.
-* Incept: SRE, Thu Apr 27 08:38:08 2006 [New York City]
-*
-* Purpose: Given a digitized multiple sequence alignment <ax>,
-* consisting of <nseq> aligned digital sequences in
-* bioalphabet <abc>, calculate a symmetric Jukes/Cantor
-* pairwise distance matrix for all sequence pairs;
-* optionally return the distance matrix in <ret_D> and
-* a matrix of the large-sample variances for those ML distance
-* estimates in <ret_V>.
-*
-* Infinite distances (and variances) are possible. They
-* are represented as <HUGE_VAL> in <D> and <V>. Caller must
-* be prepared to deal with them as appropriate.
-*
-* Args: abc - bioalphabet for <aseq>
-* ax - aligned digital sequences [0.nseq-1][1..L]
-* nseq - number of aseqs
-* opt_D - optRETURN: [0..nseq-1]x[0..nseq-1] symmetric distance mx
-* opt_V - optRETURN: matrix of variances.
-*
-* Returns: <eslOK> on success. <D> (and optionally <V>) contain the
-* distance matrix (and variances). Caller frees these with
-* <esl_dmatrix_Destroy()>.
-*
-* Throws: <eslEINVAL> if any pair of sequences have differing lengths
-* (and thus cannot have been properly aligned).
-* <eslEDIVZERO> if some pair of sequences had no aligned
-* residues. On failure, <D> and <V> are both returned <NULL>
-* and state of inputs is unchanged.
-*/
-int
-esl_dst_XJukesCantorMx(const ESL_ALPHABET *abc, ESL_DSQ **ax, int nseq,
- ESL_DMATRIX **opt_D, ESL_DMATRIX **opt_V)
-{
- ESL_DMATRIX *D = NULL;
- ESL_DMATRIX *V = NULL;
- int status;
- int i,j;
-
- if (( D = esl_dmatrix_Create(nseq, nseq) ) == NULL) goto ERROR;
- if (( V = esl_dmatrix_Create(nseq, nseq) ) == NULL) goto ERROR;
-
- for (i = 0; i < nseq; i++)
- {
- D->mx[i][i] = 0.;
- V->mx[i][i] = 0.;
- for (j = i+1; j < nseq; j++)
- {
- status = esl_dst_XJukesCantor(abc, ax[i], ax[j],
- &(D->mx[i][j]), &(V->mx[i][j]));
- if (status != eslOK)
- ESL_XEXCEPTION(status, "J/C calculation failed at digital aseqs %d,%d", i,j);
-
- D->mx[j][i] = D->mx[i][j];
- V->mx[j][i] = V->mx[i][j];
- }
- }
- if (opt_D != NULL) *opt_D = D; else esl_dmatrix_Destroy(D);
- if (opt_V != NULL) *opt_V = V; else esl_dmatrix_Destroy(V);
- return eslOK;
-
-ERROR:
- if (D != NULL) esl_dmatrix_Destroy(D);
- if (V != NULL) esl_dmatrix_Destroy(V);
- if (opt_D != NULL) *opt_D = NULL;
- if (opt_V != NULL) *opt_V = NULL;
- return status;
-}
-#endif /*eslAUGMENT_ALPHABET && eslAUGMENT_DMATRIX*/
-/*------- end, distance matrices for digital alignments ---------*/
-
-
-
-/*****************************************************************
-* 5. Average pairwise identity for multiple alignments
-*****************************************************************/
-
-#ifdef eslAUGMENT_RANDOM
-/* Function: esl_dst_CAverageId()
-* Synopsis: Calculate avg identity for multiple alignment
-* Incept: SRE, Fri May 18 15:02:38 2007 [Janelia]
-*
-* Purpose: Calculates the average pairwise fractional identity in
-* a multiple sequence alignment <as>, consisting of <N>
-* aligned character sequences of identical length.
-*
-* If an exhaustive calculation would require more than
-* <max_comparisons> pairwise comparisons, then instead of
-* looking at all pairs, calculate the average over a
-* stochastic sample of <max_comparisons> random pairs.
-* This allows the routine to work efficiently even on very
-* deep MSAs.
-*
-* Each fractional pairwise identity (range $[0..$ pid $..1]$
- * is calculated using <esl_dst_CPairId()>.
-*
-* Returns: <eslOK> on success, and <*ret_id> contains the average
-* fractional identity.
-*
-* Throws: <eslEMEM> on allocation failure.
-* <eslEINVAL> if any of the aligned sequence pairs aren't
-* of the same length.
-* In either case, <*ret_id> is set to 0.
-*/
-int
-esl_dst_CAverageId(char **as, int N, int max_comparisons, double *ret_id)
-{
- int status;
- double id;
- double sum = .0;
- int i,j,n;
-
- if (N <= 1) { *ret_id = 1.; return eslOK; }
- *ret_id = 0.;
-
- /* Is nseq small enough that we can average over all pairwise comparisons? */
- if ((N * (N-1) / 2) <= max_comparisons)
- {
- for (i = 0; i < N; i++)
- for (j = i+1; j < N; j++)
- {
- if ((status = esl_dst_CPairId(as[i], as[j], &id, NULL, NULL)) != eslOK) return status;
- sum += id;
- }
- id /= (double) (N * (N-1) / 2);
- }
-
- /* If nseq is large, calculate average over a stochastic sample. */
- else
- {
- ESL_RANDOMNESS *r = esl_randomness_Create(0);
-
- for (n = 0; n < max_comparisons; n++)
- {
- do { i = esl_rnd_Roll(r, N); j = esl_rnd_Roll(r, N); } while (j == i); /* make sure j != i */
- if ((status = esl_dst_CPairId(as[i], as[j], &id, NULL, NULL)) != eslOK) return status;
- sum += id;
- }
- id /= (double) max_comparisons;
- esl_randomness_Destroy(r);
- }
-
- *ret_id = id;
- return eslOK;
-}
-#endif /* eslAUGMENT_RANDOM */
-
-#if defined(eslAUGMENT_RANDOM) && defined(eslAUGMENT_ALPHABET)
-/* Function: esl_dst_XAverageId()
-* Synopsis: Calculate avg identity for digital MSA
-* Incept: SRE, Fri May 18 15:19:14 2007 [Janelia]
-*
-* Purpose: Calculates the average pairwise fractional identity in
-* a digital multiple sequence alignment <ax>, consisting of <N>
-* aligned digital sequences of identical length.
-*
-* If an exhaustive calculation would require more than
-* <max_comparisons> pairwise comparisons, then instead of
-* looking at all pairs, calculate the average over a
-* stochastic sample of <max_comparisons> random pairs.
-* This allows the routine to work efficiently even on very
-* deep MSAs.
-*
-* Each fractional pairwise identity (range $[0..$ pid $..1]$
- * is calculated using <esl_dst_XPairId()>.
-*
-* Returns: <eslOK> on success, and <*ret_id> contains the average
-* fractional identity.
-*
-* Throws: <eslEMEM> on allocation failure.
-* <eslEINVAL> if any of the aligned sequence pairs aren't
-* of the same length.
-* In either case, <*ret_id> is set to 0.
-*/
-int
-esl_dst_XAverageId(const ESL_ALPHABET *abc, ESL_DSQ **ax, int N, int max_comparisons, double *ret_id)
-{
- int status;
- double id;
- double sum = .0;
- int i,j,n;
-
- if (N <= 1) { *ret_id = 1.; return eslOK; }
- *ret_id = 0.;
-
- /* Is N small enough that we can average over all pairwise comparisons?
- watch out for numerical overflow in this: Pfam N's easily overflow when squared
- */
- if (N <= max_comparisons &&
- N <= sqrt(2. * max_comparisons) &&
- (N * (N-1) / 2) <= max_comparisons)
- {
- for (i = 0; i < N; i++)
- for (j = i+1; j < N; j++)
- {
- if ((status = esl_dst_XPairId(abc, ax[i], ax[j], &id, NULL, NULL)) != eslOK) return status;
- sum += id;
- }
- sum /= (double) (N * (N-1) / 2);
- }
-
- /* If nseq is large, calculate average over a stochastic sample. */
- else
- {
- ESL_RANDOMNESS *r = esl_randomness_Create(0);
-
- for (n = 0; n < max_comparisons; n++)
- {
- do { i = esl_rnd_Roll(r, N); j = esl_rnd_Roll(r, N); } while (j == i); /* make sure j != i */
- if ((status = esl_dst_XPairId(abc, ax[i], ax[j], &id, NULL, NULL)) != eslOK) return status;
- sum += id;
- }
- sum /= (double) max_comparisons;
- esl_randomness_Destroy(r);
- }
-
- *ret_id = sum;
- return eslOK;
-}
-#endif /* eslAUGMENT_RANDOM && eslAUGMENT_ALPHABET */
-
-
-
-
-
-/*****************************************************************
-* 6. Private (static) functions
-*****************************************************************/
-
-/* jukescantor()
-*
-* The generalized Jukes/Cantor distance calculation.
-* Given <n1> identities and <n2> differences, for a
-* base alphabet size of <alphabet_size> (4 or 20);
-* calculate J/C distance in substitutions/site and
-* return it in <ret_distance>; calculate large-sample
-* variance and return it in <ret_variance>.
-*
-* Returns <eslEDIVZERO> if there are no data (<n1+n2=0>).
-*/
-static int
-jukescantor(int n1, int n2, int alphabet_size, double *opt_distance, double *opt_variance)
-{
- int status;
- double D, K, N;
- double x;
- double distance, variance;
-
- ESL_DASSERT1( (n1 >= 0) );
- ESL_DASSERT1( (n2 >= 0) );
- ESL_DASSERT1( (alphabet_size >= 0) );
-
- if (n1+n2 == 0) { status = eslEDIVZERO; goto ERROR; }
-
- K = (double) alphabet_size;
- D = (double) n2 / (double) (n1+n2);
- N = (double) (n1+n2);
-
- x = 1. - D * K/(K-1.);
- if (x <= 0.)
- {
- distance = HUGE_VAL;
- variance = HUGE_VAL;
- }
- else
- {
- distance = -log(x) * K/(K-1);
- variance = exp( 2.*K*distance/(K-1) ) * D * (1.-D) / N;
- }
- if (opt_distance != NULL) *opt_distance = distance;
- if (opt_variance != NULL) *opt_variance = variance;
- return eslOK;
-
-ERROR:
- if (opt_distance != NULL) *opt_distance = HUGE_VAL;
- if (opt_variance != NULL) *opt_variance = HUGE_VAL;
- return status;
-}
-/*--------------- end of private functions ----------------------*/
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_distance.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_distance.h
deleted file mode 100644
index fb75a4b..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_distance.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/* esl_distance.h
-* Distances between aligned sequences, including both
-* probabilistic evolutionary models and ad hoc measures.
-*
-* SVN $Id: esl_distance.h 192 2007-06-12 15:06:22Z eddys $
-* SRE, Fri Apr 28 06:41:13 2006 [New York]
-*/
-#ifndef ESL_DISTANCE_INCLUDED
-#define ESL_DISTANCE_INCLUDED
-
-#include "easel.h" /* ESL_DSQ declaration */
-#ifdef eslAUGMENT_ALPHABET
-#include "esl_alphabet.h" /* ESL_ALPHABET declaration */
-#endif
-#ifdef eslAUGMENT_DMATRIX
-#include "esl_dmatrix.h" /* ESL_DMATRIX declaration */
-#endif
-#ifdef eslAUGMENT_RANDOM
-#include "esl_random.h"
-#endif
-
-/* 1. Pairwise distances for aligned text sequences.
-*/
-extern int esl_dst_CPairId(const char *asq1, const char *asq2,
- double *opt_pid, int *opt_nid, int *opt_n);
-extern int esl_dst_CJukesCantor(int K, const char *as1, const char *as2,
- double *opt_distance, double *opt_variance);
-
-/* 2. Pairwise distances for aligned digital seqs.
-*/
-#ifdef eslAUGMENT_ALPHABET
-extern int esl_dst_XPairId(const ESL_ALPHABET *abc, const ESL_DSQ *ax1, const ESL_DSQ *ax2,
- double *opt_pid, int *opt_nid, int *opt_n);
-extern int esl_dst_XJukesCantor(const ESL_ALPHABET *abc, const ESL_DSQ *ax, const ESL_DSQ *ay,
- double *opt_distance, double *opt_variance);
-#endif
-
-
-/* 3. Distance matrices for aligned text sequences.
-*/
-#ifdef eslAUGMENT_DMATRIX
-extern int esl_dst_CPairIdMx (char **as, int N, ESL_DMATRIX **ret_S);
-extern int esl_dst_CDiffMx (char **as, int N, ESL_DMATRIX **ret_D);
-extern int esl_dst_CJukesCantorMx(int K, char **as, int N, ESL_DMATRIX **opt_D, ESL_DMATRIX **opt_V);
-#endif
-
-/* 4. Distance matrices for aligned digital sequences.
-*/
-#if defined(eslAUGMENT_DMATRIX) && defined(eslAUGMENT_ALPHABET)
-extern int esl_dst_XPairIdMx(const ESL_ALPHABET *abc, ESL_DSQ **ax, int N, ESL_DMATRIX **ret_S);
-extern int esl_dst_XDiffMx (const ESL_ALPHABET *abc, ESL_DSQ **ax, int N, ESL_DMATRIX **ret_D);
-
-extern int esl_dst_XJukesCantorMx(const ESL_ALPHABET *abc, ESL_DSQ **ax, int nseq,
- ESL_DMATRIX **opt_D, ESL_DMATRIX **opt_V);
-#endif
-
-/* 5. Average pairwise identity for multiple alignments.
-*/
-#ifdef eslAUGMENT_RANDOM
-extern int esl_dst_CAverageId(char **as, int nseq, int max_comparisons, double *ret_id);
-#endif
-#if defined(eslAUGMENT_RANDOM) && defined(eslAUGMENT_ALPHABET)
-extern int esl_dst_XAverageId(const ESL_ALPHABET *abc, ESL_DSQ **ax, int N, int max_comparisons, double *ret_id);
-#endif
-
-
-#endif /*ESL_DISTANCE_INCLUDED*/
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_dmatrix.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_dmatrix.cpp
deleted file mode 100644
index 27ca0c1..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_dmatrix.cpp
+++ /dev/null
@@ -1,1271 +0,0 @@
-/* Linear algebra operations in double-precision matrices.
-*
-* Implements ESL_DMATRIX (double-precision matrix) and
-* ESL_PERMUTATION (permutation matrix) objects.
-*
-* Table of contents:
-* 1. The ESL_DMATRIX object
-* 2. Debugging/validation code for ESL_DMATRIX
-* 3. The ESL_PERMUTATION object
-* 4. Debugging/validation code for ESL_PERMUTATION
-* 5. The rest of the dmatrix API
-* 6. Optional: Interoperability with GSL
-* 7. Optional: Interfaces to LAPACK
-* 11. Copyright and license
-*
-* SRE, Tue Jul 13 14:42:14 2004 [St. Louis]
-* SVN $Id: esl_dmatrix.c 342 2009-06-03 12:32:42Z eddys $
-*/
-
-#include <hmmer3/easel/esl_config.h>
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_vectorops.h>
-
-#include "esl_dmatrix.h"
-
-
-/*****************************************************************
-* 1. The ESL_DMATRIX object.
-*****************************************************************/
-
-/* Function: esl_dmatrix_Create()
-*
-* Purpose: Creates a general <n> x <m> matrix (<n> rows, <m>
-* columns).
-*
-* Args: <n> - number of rows; $>= 1$
-* <m> - number of columns; $>= 1$
-*
-* Returns: a pointer to a new <ESL_DMATRIX> object. Caller frees
-* with <esl_dmatrix_Destroy()>.
-*
-* Throws: <NULL> if an allocation failed.
-*/
-ESL_DMATRIX *
-esl_dmatrix_Create(int n, int m)
-{
- ESL_DMATRIX *A = NULL;
- int r;
- int status;
-
- ESL_ALLOC_WITH_TYPE(A, ESL_DMATRIX*, sizeof(ESL_DMATRIX));
- A->mx = NULL;
- A->n = n;
- A->m = m;
-
- ESL_ALLOC_WITH_TYPE(A->mx, double**, sizeof(double *) * n);
- A->mx[0] = NULL;
-
- ESL_ALLOC_WITH_TYPE(A->mx[0], double*, sizeof(double) * n * m);
- for (r = 1; r < n; r++)
- A->mx[r] = A->mx[0] + r*m;
-
- A->type = ESL_DMATRIX::eslGENERAL;
- A->ncells = n * m;
- return A;
-
-ERROR:
- esl_dmatrix_Destroy(A);
- return NULL;
-}
-
-
-/* Function: esl_dmatrix_CreateUpper()
-* Incept: SRE, Wed Feb 28 08:45:45 2007 [Janelia]
-*
-* Purpose: Creates a packed upper triangular matrix of <n> rows and
-* <n> columns. Caller may only access cells $i \leq j$.
-* Cells $i > j$ are not stored and are implicitly 0.
-*
-* Not all matrix operations in Easel can work on packed
-* upper triangular matrices.
-*
-* Returns: a pointer to a new <ESL_DMATRIX> object of type
-* <eslUPPER>. Caller frees with <esl_dmatrix_Destroy()>.
-*
-* Throws: <NULL> if allocation fails.
-*
-* Xref: J1/10
-*/
-ESL_DMATRIX *
-esl_dmatrix_CreateUpper(int n)
-{
- int status;
- ESL_DMATRIX *A = NULL;
- int r; /* counter over rows */
- int nc; /* cell counter */
-
- /* matrix structure allocation */
- ESL_ALLOC_WITH_TYPE(A, ESL_DMATRIX*, sizeof(ESL_DMATRIX));
- A->mx = NULL;
- A->n = n;
- A->m = n;
-
- /* n row ptrs */
- ESL_ALLOC_WITH_TYPE(A->mx, double**, sizeof(double *) * n);
- A->mx[0] = NULL;
-
- /* cell storage */
- ESL_ALLOC_WITH_TYPE(A->mx[0], double*, sizeof(double) * n * (n+1) / 2);
-
- /* row pointers set in a tricksy overlapping way, so
- * mx[i][j] access works normally but only i<=j are valid.
- * xref J1/10.
- */
- nc = n; /* nc is the number of valid cells assigned to rows so far */
- for (r = 1; r < n; r++) {
- A->mx[r] = A->mx[0] + nc - r; /* -r overlaps this row w/ previous row */
- nc += n-r;
- }
- A->type = ESL_DMATRIX::eslUPPER;
- A->ncells = n * (n+1) / 2;
- return A;
-
-ERROR:
- esl_dmatrix_Destroy(A);
- return NULL;
-}
-
-
-/* Function: esl_dmatrix_Destroy()
-*
-* Purpose: Frees an <ESL_DMATRIX> object <A>.
-*/
-int
-esl_dmatrix_Destroy(ESL_DMATRIX *A)
-{
- if (A != NULL && A->mx != NULL && A->mx[0] != NULL) free(A->mx[0]);
- if (A != NULL && A->mx != NULL) free(A->mx);
- if (A != NULL) free(A);
- return eslOK;
-}
-
-
-/* Function: esl_dmatrix_Copy()
-*
-* Purpose: Copies <src> matrix into <dest> matrix. <dest> must
-* be allocated already by the caller.
-*
-* You may copy to a matrix of a different type, so long as
-* the copy makes sense. If <dest> matrix is a packed type
-* and <src> is not, the values that should be zeros must
-* be zero in <src>, else the routine throws
-* <eslEINCOMPAT>. If the <src> matrix is a packed type and
-* <dest> is not, the values that are implicitly zeros are
-* set to zeros in the <dest> matrix.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEINCOMPAT> if <src>, <dest> are different sizes,
-* or if their types differ and <dest> cannot represent
-* <src>.
-*/
-int
-esl_dmatrix_Copy(const ESL_DMATRIX *src, ESL_DMATRIX *dest)
-{
- int i,j;
-
- if (dest->n != src->n || dest->m != src->m)
- ESL_EXCEPTION(eslEINCOMPAT, "matrices of different size");
-
- if (src->type == dest->type) /* simple case. */
- memcpy(dest->mx[0], src->mx[0], src->ncells * sizeof(double));
-
- else if (src->type == ESL_DMATRIX::eslGENERAL && dest->type == ESL_DMATRIX::eslUPPER)
- {
- for (i = 1; i < src->n; i++)
- for (j = 0; j < i; j++)
- if (src->mx[i][j] != 0.)
- ESL_EXCEPTION(eslEINCOMPAT, "general matrix isn't upper triangular, can't be copied/packed");
- for (i = 0; i < src->n; i++)
- for (j = i; j < src->m; j++)
- dest->mx[i][j] = src->mx[i][j];
- }
-
- else if (src->type == ESL_DMATRIX::eslUPPER && dest->type == ESL_DMATRIX::eslGENERAL)
- {
- for (i = 1; i < src->n; i++)
- for (j = 0; j < i; j++)
- dest->mx[i][j] = 0.;
- for (i = 0; i < src->n; i++)
- for (j = i; j < src->m; j++)
- dest->mx[i][j] = src->mx[i][j];
- }
-
- return eslOK;
-}
-
-
-/* Function: esl_dmatrix_Clone()
-* Incept: SRE, Tue May 2 14:38:45 2006 [St. Louis]
-*
-* Purpose: Duplicates matrix <A>, making a copy in newly
-* allocated space.
-*
-* Returns: a pointer to the copy. Caller frees with
-* <esl_dmatrix_Destroy()>.
-*
-* Throws: <NULL> on allocation failure.
-*/
-ESL_DMATRIX *
-esl_dmatrix_Clone(const ESL_DMATRIX *A)
-{
- ESL_DMATRIX *newMatr;
-
- switch (A->type) {
- case ESL_DMATRIX::eslUPPER: if ( (newMatr = esl_dmatrix_CreateUpper(A->n)) == NULL) return NULL; break;
- default: case ESL_DMATRIX::eslGENERAL: if ( (newMatr = esl_dmatrix_Create(A->n, A->m)) == NULL) return NULL; break;
- }
- esl_dmatrix_Copy(A, newMatr);
- return newMatr;
-}
-
-
-/* Function: esl_dmatrix_Compare()
-*
-* Purpose: Compares matrix <A> to matrix <B> element by element,
-* using <esl_DCompare()> on each cognate element pair,
-* with equality defined by a fractional tolerance <tol>.
-* If all elements are equal, return <eslOK>; if any
-* elements differ, return <eslFAIL>.
-*
-* <A> and <B> may be of different types; for example,
-* a packed upper triangular matrix A is compared to
-* a general matrix B by assuming <A->mx[i][j] = 0.> for
-* all $i>j$.
-*/
-int
-esl_dmatrix_Compare(const ESL_DMATRIX *A, const ESL_DMATRIX *B, double tol)
-{
- int i,j,c;
- double x1,x2;
-
- if (A->n != B->n) return eslFAIL;
- if (A->m != B->m) return eslFAIL;
-
- if (A->type == B->type)
- { /* simple case. */
- for (c = 0; c < A->ncells; c++) /* can deal w/ packed or unpacked storage */
- if (esl_DCompare(A->mx[0][c], B->mx[0][c], tol) == eslFAIL) return eslFAIL;
- }
- else
- { /* comparing matrices of different types */
- for (i = 0; i < A->n; i++)
- for (j = 0; j < A->m; j++)
- {
- if (A->type == ESL_DMATRIX::eslUPPER && i > j) x1 = 0.;
- else x1 = A->mx[i][j];
-
- if (B->type == ESL_DMATRIX::eslUPPER && i > j) x2 = 0.;
- else x2 = B->mx[i][j];
-
- if (esl_DCompare(x1, x2, tol) == eslFAIL) return eslFAIL;
- }
- }
- return eslOK;
-}
-
-
-/* Function: esl_dmatrix_Set()
-*
-* Purpose: Set all elements $a_{ij}$ in matrix <A> to <x>,
-* and returns <eslOK>.
-*/
-int
-esl_dmatrix_Set(ESL_DMATRIX *A, double x)
-{
- int i;
- for (i = 0; i < A->ncells; i++) A->mx[0][i] = x;
- return eslOK;
-}
-
-
-/* Function: esl_dmatrix_SetZero()
-*
-* Purpose: Sets all elements $a_{ij}$ in matrix <A> to 0,
-* and returns <eslOK>.
-*/
-int
-esl_dmatrix_SetZero(ESL_DMATRIX *A)
-{
- int i;
- for (i = 0; i < A->ncells; i++) A->mx[0][i] = 0.;
- return eslOK;
-}
-
-
-/* Function: esl_dmatrix_SetIdentity()
-*
-* Purpose: Given a square matrix <A>, sets all diagonal elements
-* $a_{ii}$ to 1, and all off-diagonal elements $a_{ij},
-* j \ne i$ to 0. Returns <eslOK> on success.
-*
-* Throws: <eslEINVAL> if the matrix isn't square.
-*/
-int
-esl_dmatrix_SetIdentity(ESL_DMATRIX *A)
-{
- int i;
-
- if (A->n != A->m) ESL_EXCEPTION(eslEINVAL, "matrix isn't square");
- esl_dmatrix_SetZero(A);
- for (i = 0; i < A->n; i++) A->mx[i][i] = 1.;
- return eslOK;
-}
-
-
-
-/* Function: esl_dmatrix_Dump()
-* Incept: SRE, Mon Nov 29 19:21:20 2004 [St. Louis]
-*
-* Purpose: Given a matrix <A>, dump it to output stream <ofp> in human-readable
-* format.
-*
-* If <rowlabel> or <collabel> are non-NULL, they specify a
-* string of single-character labels to put on the rows and
-* columns, respectively. (For example, these might be a
-* sequence alphabet for a 4x4 or 20x20 rate matrix or
-* substitution matrix.) Numbers <1..ncols> or <1..nrows> are
-* used if <collabel> or <rowlabel> are passed as <NULL>.
-*
-* Args: ofp - output file pointer; stdout, for example.
-* A - matrix to dump.
-* rowlabel - optional: NULL, or character labels for rows
-* collabel - optional: NULL, or character labels for cols
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_dmatrix_Dump(FILE *ofp, const ESL_DMATRIX *A, const char *rowlabel, const char *collabel)
-{
- int a,b;
-
- fprintf(ofp, " ");
- if (collabel != NULL)
- for (b = 0; b < A->m; b++) fprintf(ofp, " %c ", collabel[b]);
- else
- for (b = 0; b < A->m; b++) fprintf(ofp, "%8d ", b+1);
- fprintf(ofp, "\n");
-
- for (a = 0; a < A->n; a++) {
- if (rowlabel != NULL) fprintf(ofp, " %c ", rowlabel[a]);
- else fprintf(ofp, "%5d ", a+1);
-
- for (b = 0; b < A->m; b++) {
- switch (A->type) {
- case ESL_DMATRIX::eslUPPER:
- if (a > b) fprintf(ofp, "%8s ", "");
- else fprintf(ofp, "%8.4f ", A->mx[a][b]);
- break;
-
- default: case ESL_DMATRIX::eslGENERAL:
- fprintf(ofp, "%8.4f ", A->mx[a][b]);
- break;
- }
- }
- fprintf(ofp, "\n");
- }
- return eslOK;
-}
-
-/*****************************************************************
-* 3. The ESL_PERMUTATION object.
-*****************************************************************/
-
-/* Function: esl_permutation_Create()
-*
-* Purpose: Creates a new permutation "matrix" of size <n> for
-* permuting <n> x <n> square matrices; returns a
-* pointer to it.
-*
-* A permutation matrix consists of 1's and 0's such that
-* any given row or column contains only one 1. We store it
-* more efficiently as a vector; each value $p_i$
-* represents the column $j$ that has the 1. Thus, on
-* initialization, $p_i = i$ for all $i = 0..n-1$.
-*
-* Returns: a pointer to a new <ESL_PERMUTATION> object. Free with
-* <esl_permutation_Destroy()>.
-*
-* Throws: <NULL> if allocation fails.
-*/
-ESL_PERMUTATION *
-esl_permutation_Create(int n)
-{
- int status;
- ESL_PERMUTATION *P = NULL;
-
- ESL_ALLOC_WITH_TYPE(P, ESL_PERMUTATION*, sizeof(ESL_PERMUTATION));
- P->pi = NULL;
- P->n = n;
- ESL_ALLOC_WITH_TYPE(P->pi, int*, sizeof(int) * n);
-
- esl_permutation_Reuse(P); /* initialize it */
- return P;
-
-ERROR:
- esl_permutation_Destroy(P);
- return NULL;
-}
-
-/* Function: esl_permutation_Destroy()
-*
-* Purpose: Frees an <ESL_PERMUTATION> object <P>.
-*/
-int
-esl_permutation_Destroy(ESL_PERMUTATION *P)
-{
- if (P != NULL && P->pi != NULL) free(P->pi);
- if (P != NULL) free(P);
- return eslOK;
-}
-
-/* Function: esl_permutation_Reuse()
-*
-* Purpose: Resets a permutation matrix <P> to
-* $p_i = i$ for all $i = 0..n-1$.
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_permutation_Reuse(ESL_PERMUTATION *P)
-{
- int i;
- for (i = 0; i < P->n; i++)
- P->pi[i] = i;
- return eslOK;
-}
-
-
-/*****************************************************************
-* 4. Debugging/validation for ESL_PERMUTATION.
-*****************************************************************/
-
-/* Function: esl_permutation_Dump()
-*
-* Purpose: Given a permutation matrix <P>, dump it to output stream <ofp>
-* in human-readable format.
-*
-* If <rowlabel> or <collabel> are non-NULL, they represent
-* single-character labels to put on the rows and columns,
-* respectively. (For example, these might be a sequence
-* alphabet for a 4x4 or 20x20 rate matrix or substitution
-* matrix.) Numbers 1..ncols or 1..nrows are used if
-* <collabel> or <rowlabel> are NULL.
-*
-* Args: ofp - output file pointer; stdout, for example
-* P - permutation matrix to dump
-* rowlabel - optional: NULL, or character labels for rows
-* collabel - optional: NULL, or character labels for cols
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_permutation_Dump(FILE *ofp, const ESL_PERMUTATION *P, const char *rowlabel, const char *collabel)
-{
- int i,j;
-
- fprintf(ofp, " ");
- if (collabel != NULL)
- for (j = 0; j < P->n; j++) fprintf(ofp, " %c ", collabel[j]);
- else
- for (j = 0; j < P->n; j++) fprintf(ofp, "%3d ", j+1);
- fprintf(ofp, "\n");
-
- for (i = 0; i < P->n; i++) {
- if (rowlabel != NULL) fprintf(ofp, " %c ", rowlabel[i]);
- else fprintf(ofp, "%3d ", i+1);
-
- for (j = 0; j < P->n; j++)
- fprintf(ofp, "%3d ", (j == P->pi[i]) ? 1 : 0);
- fprintf(ofp, "\n");
- }
- return eslOK;
-}
-
-/*****************************************************************
-* 5. The rest of the dmatrix API.
-*****************************************************************/
-
-
-
-/* Function: esl_dmx_Max()
-* Incept: SRE, Thu Mar 1 14:46:48 2007 [Janelia]
-*
-* Purpose: Returns the maximum value of all the elements $a_{ij}$ in matrix <A>.
-*/
-double
-esl_dmx_Max(const ESL_DMATRIX *A)
-{
- int i;
- double best;
-
- best = A->mx[0][0];
- for (i = 0; i < A->ncells; i++)
- if (A->mx[0][i] > best) best = A->mx[0][i];
- return best;
-}
-
-/* Function: esl_dmx_Min()
-* Incept: SRE, Thu Mar 1 14:49:29 2007 [Janelia]
-*
-* Purpose: Returns the minimum value of all the elements $a_{ij}$ in matrix <A>.
-*/
-double
-esl_dmx_Min(const ESL_DMATRIX *A)
-{
- int i;
- double best;
-
- best = A->mx[0][0];
- for (i = 0; i < A->ncells; i++)
- if (A->mx[0][i] < best) best = A->mx[0][i];
- return best;
-}
-
-
-/* Function: esl_dmx_MinMax()
-* Incept: SRE, Wed Mar 14 16:58:03 2007 [Janelia]
-*
-* Purpose: Finds the maximum and minimum values of the
-* elements $a_{ij}$ in matrix <A>, and returns
-* them in <ret_min> and <ret_max>.
-*
-* Returns: <eslOK> on success.
-*
-*/
-int
-esl_dmx_MinMax(const ESL_DMATRIX *A, double *ret_min, double *ret_max)
-{
- double min, max;
- int i;
-
- min = max = A->mx[0][0];
- for (i = 0; i < A->ncells; i++) {
- if (A->mx[0][i] < min) min = A->mx[0][i];
- if (A->mx[0][i] > max) max = A->mx[0][i];
- }
- *ret_min = min;
- *ret_max = max;
- return eslOK;
-}
-
-
-
-/* Function: esl_dmx_Sum()
-* Incept: SRE, Thu Mar 1 16:45:16 2007
-*
-* Purpose: Returns the scalar sum of all the elements $a_{ij}$ in matrix <A>,
-* $\sum_{ij} a_{ij}$.
-*/
-double
-esl_dmx_Sum(const ESL_DMATRIX *A)
-{
- int i;
- double sum = 0.;
-
- for (i = 0; i < A->ncells; i++)
- sum += A->mx[0][i];
- return sum;
-}
-
-
-/* Function: esl_dmx_FrobeniusNorm()
-* Incept: SRE, Thu Mar 15 17:59:35 2007 [Janelia]
-*
-* Purpose: Calculates the Frobenius norm of a matrix, which
-* is the element-wise equivalant of a
-* Euclidean vector norm:
-* $ = \sqrt(\sum a_{ij}^2)$
-*
-* Args: A - matrix
-* ret_fnorm - Frobenius norm.
-*
-* Returns: <eslOK> on success, and the Frobenius norm
-* is in <ret_fnorm>.
-*/
-int
-esl_dmx_FrobeniusNorm(const ESL_DMATRIX *A, double *ret_fnorm)
-{
- double F = 0.;
- int i;
-
- for (i = 0; i < A->ncells; i++)
- F += A->mx[0][i] * A->mx[0][i];
- *ret_fnorm = sqrt(F);
- return eslOK;
-}
-
-
-/* Function: esl_dmx_Multiply()
-*
-* Purpose: Matrix multiplication: calculate <AB>, store result in <C>.
-* <A> is $n times m$; <B> is $m \times p$; <C> is $n \times p$.
-* Matrix <C> must be allocated appropriately by the caller.
-*
-* Not supported for anything but general (<eslGENERAL>)
-* matrix type, at present.
-*
-* Throws: <eslEINVAL> if matrices don't have compatible dimensions,
-* or if any of them isn't a general (<eslGENERAL>) matrix.
-*/
-int
-esl_dmx_Multiply(const ESL_DMATRIX *A, const ESL_DMATRIX *B, ESL_DMATRIX *C)
-{
- int i, j, k;
-
- if (A->m != B->n) ESL_EXCEPTION(eslEINVAL, "can't multiply A,B");
- if (A->n != C->n) ESL_EXCEPTION(eslEINVAL, "A,C # of rows not equal");
- if (B->m != C->m) ESL_EXCEPTION(eslEINVAL, "B,C # of cols not equal");
- if (A->type != ESL_DMATRIX::eslGENERAL) ESL_EXCEPTION(eslEINVAL, "A isn't of type eslGENERAL");
- if (B->type != ESL_DMATRIX::eslGENERAL) ESL_EXCEPTION(eslEINVAL, "B isn't of type eslGENERAL");
- if (C->type != ESL_DMATRIX::eslGENERAL) ESL_EXCEPTION(eslEINVAL, "B isn't of type eslGENERAL");
-
- /* i,k,j order should optimize stride, relative to a more textbook
- * order for the indices
- */
- esl_dmatrix_SetZero(C);
- for (i = 0; i < A->n; i++)
- for (k = 0; k < A->m; k++)
- for (j = 0; j < B->m; j++)
- C->mx[i][j] += A->mx[i][k] * B->mx[k][j];
-
- return eslOK;
-}
-
-
-/*::cexcerpt::function_comment_example::begin::*/
-/* Function: esl_dmx_Exp()
-* Synopsis: Calculates matrix exponential $\mathbf{P} = e^{t\mathbf{Q}}$.
-* Incept: SRE, Thu Mar 8 18:41:38 2007 [Janelia]
-*
-* Purpose: Calculates the matrix exponential $\mathbf{P} = e^{t\mathbf{Q}}$,
-* using a scaling and squaring algorithm with
-* the Taylor series approximation \citep{MolerVanLoan03}.
-*
-* <Q> must be a square matrix of type <eslGENERAL>.
-* Caller provides an allocated <P> matrix of the same size and type as <Q>.
-*
-* A typical use of this function is to calculate a
-* conditional substitution probability matrix $\mathbf{P}$
-* (whose elements $P_{xy}$ are conditional substitution
-* probabilities $\mathrm{Prob}(y \mid x, t)$ from time $t$
-* and instantaneous rate matrix $\mathbf{Q}$.
-*
-* Args: Q - matrix to exponentiate (an instantaneous rate matrix)
-* t - time units
-* P - RESULT: $e^{tQ}$.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation error.
-*
-* Xref: J1/19.
-*/
-int
-esl_dmx_Exp(const ESL_DMATRIX *Q, double t, ESL_DMATRIX *P)
-{
- /*::cexcerpt::function_comment_example::end::*/
- int status = eslOK;
- ESL_DMATRIX *Qz = NULL; /* Q/2^z rescaled matrix*/
- ESL_DMATRIX *Qpow = NULL; /* keeps running product Q^k */
- ESL_DMATRIX *C = NULL; /* tmp storage for matrix multiply result */
- double factor = 1.0;
- double fnorm;
- int z;
- double zfac;
- int k;
-
- /* Contract checks */
- if (Q->type != ESL_DMATRIX::eslGENERAL) ESL_EXCEPTION(eslEINVAL, "Q isn't general");
- if (Q->n != Q->m) ESL_EXCEPTION(eslEINVAL, "Q isn't square");
- if (P->type != Q->type) ESL_EXCEPTION(eslEINVAL, "P isn't of same type as Q");
- if (P->n != P->m) ESL_EXCEPTION(eslEINVAL, "P isn't square");
- if (P->n != Q->n) ESL_EXCEPTION(eslEINVAL, "P isn't same size as Q");
-
- /* Allocation of working space */
- if ((Qz = esl_dmatrix_Create(Q->n, Q->n)) == NULL) goto ERROR;
- if ((Qpow = esl_dmatrix_Create(Q->n, Q->n)) == NULL) goto ERROR;
- if ((C = esl_dmatrix_Create(Q->n, Q->n)) == NULL) goto ERROR;
-
- /* Figure out how much to scale the matrix down by. This is not
- * magical; we're just knocking its magnitude down in an ad hoc way.
- */
- esl_dmx_FrobeniusNorm(Q, &fnorm);
- zfac = 1.;
- z = 0;
- while (t*fnorm*zfac > 0.1) { zfac /= 2.; z++; }
-
- /* Make a scaled-down copy of Q in Qz.
- */
- esl_dmatrix_Copy(Q, Qz);
- esl_dmx_Scale(Qz, zfac);
-
- /* Calculate e^{t Q_z} by the Taylor, to complete convergence. */
- esl_dmatrix_SetIdentity(P);
- esl_dmatrix_Copy(Qz, Qpow); /* Qpow is now Qz^1 */
- for (k = 1; k < 100; k++)
- {
- factor *= t/k;
- esl_dmatrix_Copy(P, C); /* C now holds the previous P */
- esl_dmx_AddScale(P, factor, Qpow); /* P += factor*Qpow */
- if (esl_dmatrix_Compare(C, P, 0.) == eslOK) break;
-
- esl_dmx_Multiply(Qpow, Qz, C); /* C = Q^{k+1} */
- esl_dmatrix_Copy(C, Qpow); /* Qpow = C = Q^{k+1} */
- }
-
- /* Now square it back up: e^{tQ} = [e^{tQ_z}]^{2^z} */
- while (z--) {
- esl_dmx_Multiply(P, P, C);
- esl_dmatrix_Copy(C, P);
- }
-
- esl_dmatrix_Destroy(Qz);
- esl_dmatrix_Destroy(Qpow);
- esl_dmatrix_Destroy(C);
- return eslOK;
-
-ERROR:
- if (Qz != NULL) esl_dmatrix_Destroy(Qz);
- if (Qpow != NULL) esl_dmatrix_Destroy(Qpow);
- if (C != NULL) esl_dmatrix_Destroy(C);
- return status;
-}
-
-
-/* Function: esl_dmx_Transpose()
-*
-* Purpose: Transpose a square matrix <A> in place.
-*
-* <A> must be a general (<eslGENERAL>) matrix type.
-*
-* Throws: <eslEINVAL> if <A> isn't square, or if it isn't
-* of type <eslGENERAL>.
-*/
-int
-esl_dmx_Transpose(ESL_DMATRIX *A)
-{
- int i,j;
- double swap;
-
- if (A->n != A->m) ESL_EXCEPTION(eslEINVAL, "matrix isn't square");
- if (A->type != ESL_DMATRIX::eslGENERAL) ESL_EXCEPTION(eslEINVAL, "A isn't of type eslGENERAL");
-
- for (i = 0; i < A->n; i++)
- for (j = i+1; j < A->m; j++)
- { swap = A->mx[i][j]; A->mx[i][j] = A->mx[j][i]; A->mx[j][i] = swap; }
- return eslOK;
-}
-
-
-/* Function: esl_dmx_Add()
-*
-* Purpose: <A = A+B>; adds matrix <B> to matrix <A> and leaves result
-* in matrix <A>.
-*
-* <A> and <B> may be of any type. However, if <A> is a
-* packed upper triangular matrix (type
-* <eslUPPER>), all values $i>j$ in <B> must be
-* zero (i.e. <B> must also be upper triangular, though
-* not necessarily packed upper triangular).
-*
-* Throws: <eslEINVAL> if matrices aren't the same dimensions, or
-* if <A> is <eslUPPER> and any cell $i>j$ in
-* <B> is nonzero.
-*/
-int
-esl_dmx_Add(ESL_DMATRIX *A, const ESL_DMATRIX *B)
-{
- int i,j;
-
- if (A->n != B->n) ESL_EXCEPTION(eslEINVAL, "matrices of different size");
- if (A->m != B->m) ESL_EXCEPTION(eslEINVAL, "matrices of different size");
-
- if (A->type == B->type) /* in this case, can just add cell by cell */
- {
- for (i = 0; i < A->ncells; i++)
- A->mx[0][i] += B->mx[0][i];
- }
- else if (A->type == ESL_DMATRIX::eslUPPER || B->type == ESL_DMATRIX::eslUPPER)
- {
- /* Logic is: if either matrix is upper triangular, then the operation is
- * to add upper triangles only. If we try to add a general matrix <B>
- * to packed UT <A>, make sure all lower triangle entries in <B> are zero.
- */
- if (B->type != ESL_DMATRIX::eslUPPER) {
- for (i = 1; i < A->n; i++)
- for (j = 0; j < i; j++)
- if (B->mx[i][j] != 0.) ESL_EXCEPTION(eslEINVAL, "<B> has nonzero cells in lower triangle");
- }
- for (i = 0; i < A->n; i++)
- for (j = i; j < A->m; j++)
- A->mx[i][j] += B->mx[i][j];
- }
- return eslOK;
-}
-
-/* Function: esl_dmx_Scale()
-*
-* Purpose: Calculates <A = kA>: multiply matrix <A> by scalar
-* <k> and leave answer in <A>.
-*/
-int
-esl_dmx_Scale(ESL_DMATRIX *A, double k)
-{
- int i;
-
- for (i = 0; i < A->ncells; i++) A->mx[0][i] *= k;
- return eslOK;
-}
-
-
-/* Function: esl_dmx_AddScale()
-*
-* Purpose: Calculates <A + kB>, leaves answer in <A>.
-*
-* Only defined for matrices of the same type (<eslGENERAL>
-* or <eslUPPER>).
-*
-* Throws: <eslEINVAL> if matrices aren't the same dimensions, or
-* of different types.
-*/
-int
-esl_dmx_AddScale(ESL_DMATRIX *A, double k, const ESL_DMATRIX *B)
-{
- int i;
-
- if (A->n != B->n) ESL_EXCEPTION(eslEINVAL, "matrices of different size");
- if (A->m != B->m) ESL_EXCEPTION(eslEINVAL, "matrices of different size");
- if (A->type != A->type) ESL_EXCEPTION(eslEINVAL, "matrices of different type");
-
- for (i = 0; i < A->ncells; i++) A->mx[0][i] += k * B->mx[0][i];
- return eslOK;
-}
-
-
-/* Function: esl_dmx_Permute_PA()
-*
-* Purpose: Computes <B = PA>: do a row-wise permutation of a square
-* matrix <A>, using the permutation matrix <P>, and put
-* the result in a square matrix <B> that the caller has
-* allocated.
-*
-* Throws: <eslEINVAL> if <A>, <B>, <P> do not have compatible dimensions,
-* or if <A> or <B> is not of type <eslGENERAL>.
-*/
-int
-esl_dmx_Permute_PA(const ESL_PERMUTATION *P, const ESL_DMATRIX *A, ESL_DMATRIX *B)
-{
- int i,ip,j;
-
- if (A->n != P->n) ESL_EXCEPTION(eslEINVAL, "matrix dimensions not compatible");
- if (A->n != B->n) ESL_EXCEPTION(eslEINVAL, "matrix dimensions not compatible");
- if (A->n != A->m) ESL_EXCEPTION(eslEINVAL, "matrix dimensions not compatible");
- if (B->n != B->m) ESL_EXCEPTION(eslEINVAL, "matrix dimensions not compatible");
- if (A->type != ESL_DMATRIX::eslGENERAL) ESL_EXCEPTION(eslEINVAL, "matrix A not of type eslGENERAL");
- if (B->type != ESL_DMATRIX::eslGENERAL) ESL_EXCEPTION(eslEINVAL, "matrix B not of type eslGENERAL");
-
- for (i = 0; i < A->n; i++)
- {
- ip = P->pi[i];
- for (j = 0; j < A->m; j++)
- B->mx[i][j] = A->mx[ip][j];
- }
- return eslOK;
-}
-
-/* Function: esl_dmx_LUP_decompose()
-*
-* Purpose: Calculates a permuted LU decomposition of square matrix
-* <A>; upon return, <A> is replaced by this decomposition,
-* where <U> is in the lower triangle (inclusive of the
-* diagonal) and <L> is the upper triangle (exclusive of
-* diagonal, which is 1's by definition), and <P> is the
-* permutation matrix. Caller provides an allocated
-* permutation matrix <P> compatible with the square matrix
-* <A>.
-*
-* Implements Gaussian elimination with pivoting
-* \citep[p.~759]{Cormen99}.
-*
-* Throws: <eslEINVAL> if <A> isn't square, or if <P> isn't the right
-* size for <A>, or if <A> isn't of general type.
-*/
-int
-esl_dmx_LUP_decompose(ESL_DMATRIX *A, ESL_PERMUTATION *P)
-{
- int i,j,k,kpiv;
- double max;
- double swap;
-
- if (A->n != A->m) ESL_EXCEPTION(eslEINVAL, "matrix isn't square");
- if (P->n != A->n) ESL_EXCEPTION(eslEINVAL, "permutation isn't the right size");
- if (A->type != ESL_DMATRIX::eslGENERAL) ESL_EXCEPTION(eslEINVAL, "matrix isn't of general type");
- esl_permutation_Reuse(P);
-
- for (k = 0; k < A->n-1; k++)
- {
- /* Identify our pivot;
- * find row with maximum value in col[k].
- */
- max = 0.;
- for (i = k; i < A->n; i++)
- if (fabs(A->mx[i][k]) > max) {
- max = fabs(A->mx[i][k]);
- kpiv = i;
- }
- if (max == 0.) ESL_EXCEPTION(eslEDIVZERO, "matrix is singular");
-
- /* Swap those rows (k and kpiv);
- * and keep track of that permutation in P. (misuse j for swapping integers)
- */
- j = P->pi[k]; P->pi[k] = P->pi[kpiv]; P->pi[kpiv] = j;
- for (j = 0; j < A->m; j++)
- { swap = A->mx[k][j]; A->mx[k][j] = A->mx[kpiv][j]; A->mx[kpiv][j] = swap; }
-
- /* Gaussian elimination for all rows k+1..n.
- */
- for (i = k+1; i < A->n; i++)
- {
- A->mx[i][k] /= A->mx[k][k];
- for (j = k+1; j < A->m; j++)
- A->mx[i][j] -= A->mx[i][k] * A->mx[k][j];
- }
- }
- return eslOK;
-}
-
-
-/* Function: esl_dmx_LU_separate()
-*
-* Purpose: Separate a square <LU> decomposition matrix into its two
-* triangular matrices <L> and <U>. Caller provides two
-* allocated <L> and <U> matrices of same size as <LU> for
-* storing the results.
-*
-* <U> may be an upper triangular matrix in either unpacked
-* (<eslGENERAL>) or packed (<eslUPPER>) form.
-* <LU> and <L> must be of <eslGENERAL> type.
-*
-* Throws: <eslEINVAL> if <LU>, <L>, <U> are not of compatible dimensions,
-* or if <LU> or <L> aren't of general type.
-*/
-int
-esl_dmx_LU_separate(const ESL_DMATRIX *LU, ESL_DMATRIX *L, ESL_DMATRIX *U)
-{
- int i,j;
-
- if (LU->n != LU->m) ESL_EXCEPTION(eslEINVAL, "LU isn't square");
- if (L->n != L->m) ESL_EXCEPTION(eslEINVAL, "L isn't square");
- if (U->n != U->m) ESL_EXCEPTION(eslEINVAL, "U isn't square");
- if (LU->n != L->n) ESL_EXCEPTION(eslEINVAL, "LU, L have incompatible dimensions");
- if (LU->n != U->n) ESL_EXCEPTION(eslEINVAL, "LU, U have incompatible dimensions");
- if (LU->type != ESL_DMATRIX::eslGENERAL) ESL_EXCEPTION(eslEINVAL, "matrix isn't of general type");
- if (L->type != ESL_DMATRIX::eslGENERAL) ESL_EXCEPTION(eslEINVAL, "matrix isn't of general type");
-
- esl_dmatrix_SetZero(L);
- esl_dmatrix_SetZero(U);
-
- for (i = 0; i < LU->n; i++)
- for (j = i; j < LU->m; j++)
- U->mx[i][j] = LU->mx[i][j];
-
- for (i = 0; i < LU->n; i++)
- {
- L->mx[i][i] = 1.;
- for (j = 0; j < i; j++)
- L->mx[i][j] = LU->mx[i][j];
- }
- return eslOK;
-}
-
-/* Function: esl_dmx_Invert()
-*
-* Purpose: Calculates the inverse of square matrix <A>, and stores the
-* result in matrix <Ai>. Caller provides an allocated
-* matrix <Ai> of same dimensions as <A>. Both must be
-* of type <eslGENERAL>.
-*
-* Peforms the inversion by LUP decomposition followed by
-* forward/back-substitution \citep[p.~753]{Cormen99}.
-*
-* Throws: <eslEINVAL> if <A>, <Ai> do not have same dimensions,
-* if <A> isn't square, or if either isn't of
-* type <eslGENERAL>.
-* <eslEMEM> if internal allocations (for LU, and some other
-* bookkeeping) fail.
-*/
-int
-esl_dmx_Invert(const ESL_DMATRIX *A, ESL_DMATRIX *Ai)
-{
- ESL_DMATRIX *LU = NULL;
- ESL_PERMUTATION *P = NULL;
- double *y = NULL; /* column vector, intermediate calculation */
- double *b = NULL; /* column vector of permuted identity matrix */
- int i,j,k;
- int status;
-
- if (A->n != A->m) ESL_EXCEPTION(eslEINVAL, "matrix isn't square");
- if (A->n != Ai->n || A->m != Ai->m) ESL_EXCEPTION(eslEINVAL, "matrices are different size");
- if (A->type != ESL_DMATRIX::eslGENERAL) ESL_EXCEPTION(eslEINVAL, "matrix A not of general type");
- if (Ai->type != ESL_DMATRIX::eslGENERAL) ESL_EXCEPTION(eslEINVAL, "matrix B not of general type");
-
- /* Copy A to LU, and do an LU decomposition.
- */
- if ((LU = esl_dmatrix_Create(A->n, A->m)) == NULL) goto ERROR;
- if ((P = esl_permutation_Create(A->n)) == NULL) goto ERROR;
- if ( esl_dmatrix_Copy(A, LU) != eslOK) goto ERROR;
- if ( esl_dmx_LUP_decompose(LU, P) != eslOK) goto ERROR;
-
- /* Now we have:
- * PA = LU
- *
- * to invert a matrix A, we want A A^-1 = I;
- * that's PAx = Pb, for columns x of A^-1 and b of the identity matrix;
- * and that's n equations LUx = Pb;
- *
- * so, solve Ly = Pb for y by forward substitution;
- * then Ux = y by back substitution;
- * x is then a column of A^-1.
- *
- * Do that for all columns.
- */
- ESL_ALLOC_WITH_TYPE(b, double*, sizeof(double) * A->n);
- ESL_ALLOC_WITH_TYPE(y, double*, sizeof(double) * A->n);
-
- for (k = 0; k < A->m; k++) /* for each column... */
- {
- /* build Pb for column j of the identity matrix */
- for (i = 0; i < A->n; i++)
- if (P->pi[i] == k) b[i] = 1.; else b[i] = 0.;
-
- /* forward substitution
- */
- for (i = 0; i < A->n; i++)
- {
- y[i] = b[i];
- for (j = 0; j < i; j++) y[i] -= LU->mx[i][j] * y[j];
- }
-
- /* back substitution
- */
- for (i = A->n-1; i >= 0; i--)
- {
- Ai->mx[i][k] = y[i];
- for (j = i+1; j < A->n; j++) Ai->mx[i][k] -= LU->mx[i][j] * Ai->mx[j][k];
- Ai->mx[i][k] /= LU->mx[i][i];
- }
- }
-
- free(b);
- free(y);
- esl_dmatrix_Destroy(LU);
- esl_permutation_Destroy(P);
- return eslOK;
-
-ERROR:
- if (y != NULL) free(y);
- if (b != NULL) free(b);
- if (LU != NULL) esl_dmatrix_Destroy(LU);
- if (P != NULL) esl_permutation_Destroy(P);
- return status;
-}
-
-
-/*****************************************************************
-* 6. Optional: interoperability with GSL
-*****************************************************************/
-#ifdef HAVE_LIBGSL
-
-#include <gsl/gsl_matrix.h>
-
-int
-esl_dmx_MorphGSL(const ESL_DMATRIX *E, gsl_matrix **ret_G)
-{
- gsl_matrix *G = NULL;
- int i,j;
-
- if (E->type != eslGENERAL) ESL_EXCEPTION(eslEINVAL, "can only morph general matrices to GSL right now");
-
- G = gsl_matrix_alloc(E->m, E->n);
- for (i = 0; i < E->m; i++)
- for (j = 0; j < E->n; j++)
- gsl_matrix_set(G, i, j, E->mx[i][j]);
- *ret_G = G;
- return eslOK;
-}
-
-int
-esl_dmx_UnmorphGSL(const gsl_matrix *G, ESL_DMATRIX **ret_E)
-{
- ESL_DMATRIX *E = NULL;
- int i,j;
-
- if ((E = esl_dmatrix_Create(G->size1, G->size2)) == NULL) return eslEMEM;
- for (i = 0; i < G->size1; i++)
- for (j = 0; j < G->size2; j++)
- E->mx[i][j] = gsl_matrix_get(G, i, j);
- *ret_E = E;
- return eslOK;
-}
-
-#endif /*HAVE_LIBGSL*/
-
-/*****************************************************************
-* 7. Optional: Interfaces to LAPACK
-*****************************************************************/
-#ifdef HAVE_LIBLAPACK
-
-/* To include LAPACK code, you need to:
-* 1. declare the C interface to the Fortran routine,
-* appending _ to the Fortran routine's name (dgeev becomes dgeev_)
-*
-* 2. Remember to transpose matrices into column-major
-* Fortran form
-*
-* 3. everything must be passed by reference, not by value
-*
-* 4. you don't need any include files, just lapack.a
-*
-* 5. Add -llapack to the compile line.
-* (It doesn't appear that blas or g2c are needed?)
-*/
-
-/* Declare the C interface to the Fortran77 dgeev routine
-* provided by the LAPACK library:
-*/
-extern void dgeev_(char *jobvl, char *jobvr, int *n, double *a,
- int *lda, double *wr, double *wi, double *vl,
- int *ldvl, double *vr, int *ldvr,
- double *work, int *lwork, int *info);
-
-
-/* Function: esl_dmx_Diagonalize()
-* Incept: SRE, Thu Mar 15 09:28:03 2007 [Janelia]
-*
-* Purpose: Given a square real matrix <A>, diagonalize it:
-* solve for $U^{-1} A U = diag(\lambda_1... \lambda_n)$.
-*
-* Upon return, <ret_Er> and <ret_Ei> are vectors
-* containing the real and complex parts of the eigenvalues
-* $\lambda_i$; <ret_UL> is the $U^{-1}$ matrix containing
-* the left eigenvectors; and <ret_UR> is the $U$ matrix
-* containing the right eigenvectors.
-*
-* <ret_UL> and <ret_UR> are optional; pass <NULL> for
-* either if you don't want that set of eigenvectors.
-*
-* This is a C interface to the <dgeev()> routine in the
-* LAPACK linear algebra library.
-*
-* Args: A - square nxn matrix to diagonalize
-* ret_Er - RETURN: real part of eigenvalues (0..n-1)
-* ret_Ei - RETURN: complex part of eigenvalues (0..n-1)
-* ret_UL - optRETURN: nxn matrix of left eigenvectors
-* ret_UR - optRETURN:
-*
-* Returns: <eslOK> on success.
-* <ret_Er> and <ret_Ei> (and <ret_UL>,<ret_UR> when they are
-* requested) are allocated here, and must be free'd by the caller.
-*
-* Throws: <eslEMEM> on allocation failure.
-* In this case, the four return pointers are returned <NULL>.
-*
-* Xref: J1/19.
-*/
-int
-esl_dmx_Diagonalize(const ESL_DMATRIX *A, double **ret_Er, double **ret_Ei,
- ESL_DMATRIX **ret_UL, ESL_DMATRIX **ret_UR)
-{
- int status;
- double *Er = NULL;
- double *Ei = NULL;
- ESL_DMATRIX *At = NULL;
- ESL_DMATRIX *UL = NULL;
- ESL_DMATRIX *UR = NULL;
- double *work = NULL;
- char jobul, jobur;
- int lda;
- int ldul, ldur;
- int lwork;
- int info;
-
- if (A->n != A->m) ESL_EXCEPTION(eslEINVAL, "matrix isn't square");
-
- if ((At = esl_dmatrix_Clone(A)) == NULL) { status = eslEMEM; goto ERROR; }
- if ((UL = esl_dmatrix_Create(A->n,A->n)) == NULL) { status = eslEMEM; goto ERROR; }
- if ((UR = esl_dmatrix_Create(A->n,A->n)) == NULL) { status = eslEMEM; goto ERROR; }
- ESL_ALLOC_WITH_TYPE(Er, double*, sizeof(double) * A->n);
- ESL_ALLOC_WITH_TYPE(Ei, double*, sizeof(double) * A->n);
- ESL_ALLOC_WITH_TYPE(work, double*, sizeof(double) * 8 * A->n);
-
- jobul = (ret_UL == NULL) ? 'N' : 'V'; /* do we want left eigenvectors? */
- jobur = (ret_UR == NULL) ? 'N' : 'V'; /* do we want right eigenvectors? */
- lda = A->n;
- ldul = A->n;
- ldur = A->n;
- lwork = 8*A->n;
-
- /* Fortran convention is colxrow, not rowxcol; so transpose
- * a copy of A before passing it to a Fortran routine.
- */
- esl_dmx_Transpose(At);
-
- /* The Fortran77 interface call to LAPACK's dgeev().
- * All args must be passed by reference.
- * Fortran 2D arrays are 1D: so pass the A[0] part of a DSMX.
- */
- dgeev_(&jobul, &jobur, &(At->n), At->mx[0], &lda, Er, Ei,
- UL->mx[0], &ldul, UR->mx[0], &ldur, work, &lwork, &info);
- if (info < 0) ESL_XEXCEPTION(eslEINVAL, "argument %d to LAPACK dgeev is invalid", -info);
- if (info > 0) ESL_XEXCEPTION(eslEINVAL,
- "diagonalization failed; only eigenvalues %d..%d were computed",
- info+1, At->n);
-
- /* Now, UL, UR are transposed (col x row), so transpose them back to
- * C language convention.
- */
- esl_dmx_Transpose(UL);
- esl_dmx_Transpose(UR);
-
- esl_dmatrix_Destroy(At);
- if (ret_UL != NULL) *ret_UL = UL; else esl_dmatrix_Destroy(UL);
- if (ret_UR != NULL) *ret_UR = UR; else esl_dmatrix_Destroy(UR);
- if (ret_Er != NULL) *ret_Er = Er; else free(Er);
- if (ret_Ei != NULL) *ret_Ei = Ei; else free(Ei);
- free(work);
- return eslOK;
-
-ERROR:
- if (ret_UL != NULL) *ret_UL = NULL;
- if (ret_UR != NULL) *ret_UR = NULL;
- if (ret_Er != NULL) *ret_Er = NULL;
- if (ret_Ei != NULL) *ret_Ei = NULL;
- if (At != NULL) esl_dmatrix_Destroy(At);
- if (UL != NULL) esl_dmatrix_Destroy(UL);
- if (UR != NULL) esl_dmatrix_Destroy(UR);
- if (Er != NULL) free(Er);
- if (Ei != NULL) free(Ei);
- if (work != NULL) free(work);
- return status;
-}
-
-
-#endif /*HAVE_LIBLAPACK*/
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_dmatrix.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_dmatrix.h
deleted file mode 100644
index 66d2ef5..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_dmatrix.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/* dmatrix.h
-*
-* SRE, Tue Jul 13 14:41:07 2004 [St. Louis]
-* SVN $Id: esl_dmatrix.h 159 2007-03-22 18:07:24Z eddys $
-*/
-#ifndef ESL_DMATRIX_INCLUDED
-#define ESL_DMATRIX_INCLUDED
-
-#include <stdio.h>
-
-typedef struct {
- /*mx, mx[0] are allocated. */
- /*::cexcerpt::dmatrix_obj::begin::*/
- double **mx; /* mx[i][j] is i'th row, j'th col */
- int n; /* rows */
- int m; /* columns */
- enum { eslGENERAL, eslUPPER } type;
- /*::cexcerpt::dmatrix_obj::end::*/
- int ncells; /* number of valid cells (nxm in standard matrix) */
-} ESL_DMATRIX;
-
-typedef struct {
- int *pi;
- int n;
-} ESL_PERMUTATION;
-
-/* 1. The ESL_DMATRIX object. */
-extern ESL_DMATRIX *esl_dmatrix_Create(int n, int m);
-extern ESL_DMATRIX *esl_dmatrix_CreateUpper(int n);
-extern int esl_dmatrix_Destroy(ESL_DMATRIX *A);
-extern int esl_dmatrix_Copy (const ESL_DMATRIX *src, ESL_DMATRIX *dest);
-extern ESL_DMATRIX *esl_dmatrix_Clone (const ESL_DMATRIX *old);
-extern int esl_dmatrix_Compare(const ESL_DMATRIX *A, const ESL_DMATRIX *B, double tol);
-extern int esl_dmatrix_Set (ESL_DMATRIX *A, double x);
-extern int esl_dmatrix_SetZero(ESL_DMATRIX *A);
-extern int esl_dmatrix_SetIdentity(ESL_DMATRIX *A);
-
-/* 2. Debugging/validation for ESL_DMATRIX. */
-extern int esl_dmatrix_Dump(FILE *ofp, const ESL_DMATRIX *A,
- const char *rowlabel, const char *collabel);
-
-/* 3. The ESL_PERMUTATION object. */
-extern ESL_PERMUTATION *esl_permutation_Create(int n);
-extern int esl_permutation_Destroy(ESL_PERMUTATION *P);
-extern int esl_permutation_Reuse(ESL_PERMUTATION *P);
-
-/* 4. Debugging/validation for ESL_PERMUTATION. */
-extern int esl_permutation_Dump(FILE *ofp, const ESL_PERMUTATION *P,
- const char *rowlabel, const char *collabel);
-
-/* 5. The rest of the dmatrix API. */
-extern double esl_dmx_Max (const ESL_DMATRIX *A);
-extern double esl_dmx_Min (const ESL_DMATRIX *A);
-extern double esl_dmx_Sum (const ESL_DMATRIX *A);
-extern int esl_dmx_MinMax(const ESL_DMATRIX *A, double *ret_min, double *ret_max);
-extern int esl_dmx_FrobeniusNorm(const ESL_DMATRIX *A, double *ret_fnorm);
-extern int esl_dmx_Multiply(const ESL_DMATRIX *A, const ESL_DMATRIX *B, ESL_DMATRIX *C);
-extern int esl_dmx_Exp(const ESL_DMATRIX *Q, double t, ESL_DMATRIX *P);
-extern int esl_dmx_Transpose(ESL_DMATRIX *A);
-extern int esl_dmx_Add(ESL_DMATRIX *A, const ESL_DMATRIX *B);
-extern int esl_dmx_Scale(ESL_DMATRIX *A, double k);
-extern int esl_dmx_AddScale(ESL_DMATRIX *A, double k, const ESL_DMATRIX *B);
-extern int esl_dmx_Permute_PA(const ESL_PERMUTATION *P, const ESL_DMATRIX *A, ESL_DMATRIX *B);
-extern int esl_dmx_LUP_decompose(ESL_DMATRIX *A, ESL_PERMUTATION *P);
-extern int esl_dmx_LU_separate(const ESL_DMATRIX *LU, ESL_DMATRIX *L, ESL_DMATRIX *U);
-extern int esl_dmx_Invert(const ESL_DMATRIX *A, ESL_DMATRIX *Ai);
-
-/* 6. Optional: interoperability with GSL */
-#ifdef HAVE_LIBGSL
-#include <gsl/gsl_matrix.h>
-extern int esl_dmx_MorphGSL(const ESL_DMATRIX *E, gsl_matrix **ret_G);
-extern int esl_dmx_UnmorphGSL(const gsl_matrix *G, ESL_DMATRIX **ret_E);
-#endif
-
-/* 7. Optional: interfaces to LAPACK */
-#ifdef HAVE_LIBLAPACK
-extern int esl_dmx_Diagonalize(const ESL_DMATRIX *A, double **ret_Er, double **ret_Ei, ESL_DMATRIX **ret_UL, ESL_DMATRIX **ret_UR);
-#endif
-
-#endif /*ESL_DMATRIX_INCLUDED*/
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_exponential.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_exponential.cpp
deleted file mode 100644
index 9df249d..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_exponential.cpp
+++ /dev/null
@@ -1,442 +0,0 @@
-/* Statistical routines for exponential distributions.
-*
-* Contents:
-* 1. Routines for evaluating densities and distributions
-* 2. Generic API routines: for general interface w/ histogram module
-* 3. Routines for dumping plots for files
-* 4. Routines for sampling (requires random module)
-* 5. Maximum likelihood fitting
-* 10. Copyright and license information
-*
-* SRE, Wed Aug 10 08:15:57 2005 [St. Louis]
-* xref STL9/138
-* SVN $Id: esl_exponential.c 326 2009-02-28 15:49:07Z eddys $
-*/
-#include "esl_config.h"
-
-#include <stdio.h>
-#include <math.h>
-
-#include "easel.h"
-#include "esl_stats.h"
-#include "esl_exponential.h"
-
-#ifdef eslAUGMENT_RANDOM
-#include "esl_random.h"
-#endif
-#ifdef eslAUGMENT_HISTOGRAM
-#include "esl_histogram.h"
-#endif
-
-/****************************************************************************
-* 1. Routines for evaluating densities and distributions
-****************************************************************************/
-/* lambda > 0
-* mu <= x < infinity
-*
-* watch out:
-* - any lambda > 0 is valid... including infinity. Fitting code
-* may try to test such lambdas, and it must get back valid numbers,
-* never an NaN, or it will fail. IEEE754 allows us
-* to calculate log(inf) = inf, exp(-inf) = 0, and exp(inf) = inf.
-* But inf-inf = NaN, so Don't Do That.
-*/
-
-/* Function: esl_exp_pdf()
-* Incept: SRE, Wed Aug 10 08:30:46 2005 [St. Louis]
-*
-* Purpose: Calculates the probability density function for the
-* exponential, $P(X=x)$, given value <x>, offset <mu>,
-* and decay parameter <lambda>.
-*/
-double
-esl_exp_pdf(double x, double mu, double lambda)
-{
- if (x < mu) return 0.;
- return (lambda * exp(-lambda*(x-mu)));
-}
-
-/* Function: esl_exp_logpdf()
-* Incept: SRE, Wed Aug 10 08:35:06 2005 [St. Louis]
-*
-* Purpose: Calculates the log probability density function for the
-* exponential, $P(X=x)$, given value <x>, offset <mu>,
-* and decay parameter <lambda>.
-*/
-double
-esl_exp_logpdf(double x, double mu, double lambda)
-{
- if (x < mu) return -eslINFINITY;
-
- if (lambda == eslINFINITY)
- { /* limit as lambda->inf: avoid inf-inf! */
- if (x == mu) return eslINFINITY;
- else return -eslINFINITY;
- }
- return (log(lambda) - lambda*(x-mu));
-}
-
-/* Function: esl_exp_cdf()
-* Incept: SRE, Wed Aug 10 08:36:04 2005 [St. Louis]
-*
-* Purpose: Calculates the cumulative distribution function for the
-* exponential, $P(X \leq x)$, given value <x>, offset <mu>,
-* and decay parameter <lambda>.
-*/
-double
-esl_exp_cdf(double x, double mu, double lambda)
-{
- double y = lambda*(x-mu); /* y>=0 because lambda>0 and x>=mu */
-
- if (x < mu) return 0.;
-
- /* 1-e^-y ~ y for small |y| */
- if (y < eslSMALLX1) return y;
- else return 1 - exp(-y);
-}
-
-/* Function: esl_exp_logcdf()
-* Incept: SRE, Wed Aug 10 10:03:28 2005 [St. Louis]
-*
-* Purpose: Calculates the log of the cumulative distribution function
-* for the exponential, $log P(X \leq x)$, given value <x>,
-* offset <mu>, and decay parameter <lambda>.
-*/
-double
-esl_exp_logcdf(double x, double mu, double lambda)
-{
- double y = lambda * (x-mu);
- double ey = exp(-y);
-
- if (x < mu) return -eslINFINITY;
-
- /* When y is small, 1-e^-y = y, so answer is log(y);
- * when y is large, exp(-y) is small, log(1-exp(-y)) = -exp(-y).
- */
- if (y == 0) return -eslINFINITY; /* don't allow NaN */
- else if (y < eslSMALLX1) return log(y);
- else if (ey < eslSMALLX1) return -ey;
- else return log(1-ey);
-}
-
-/* Function: esl_exp_surv()
-* Incept: SRE, Wed Aug 10 10:14:49 2005 [St. Louis]
-*
-* Purpose: Calculates the survivor function, $P(X>x)$ (that is, 1-CDF,
-* the right tail probability mass) for an exponential distribution,
-* given value <x>, offset <mu>, and decay parameter <lambda>.
-*/
-double
-esl_exp_surv(double x, double mu, double lambda)
-{
- if (x < mu) return 1.0;
- return exp(-lambda * (x-mu));
-}
-
-/* Function: esl_exp_logsurv()
-* Incept: SRE, Wed Aug 10 10:14:49 2005 [St. Louis]
-*
-* Purpose: Calculates the log survivor function, $\log P(X>x)$ (that is,
-* log(1-CDF), the log of the right tail probability mass) for an
-* exponential distribution, given value <x>, offset <mu>, and
-* decay parameter <lambda>.
-*/
-double
-esl_exp_logsurv(double x, double mu, double lambda)
-{
- if (x < mu) return 0.0;
- return -lambda * (x-mu);
-}
-
-
-/* Function: esl_exp_invcdf()
-* Incept: SRE, Sun Aug 21 12:22:24 2005 [St. Louis]
-*
-* Purpose: Calculates the inverse of the CDF; given a <cdf> value
-* $0 <= p < 1$, returns the value $x$ at which the CDF
-* has that value.
-*/
-double
-esl_exp_invcdf(double p, double mu, double lambda)
-{
- return mu - 1/lambda * log(1. - p);
-}
-/*------------------ end of densities and distributions --------------------*/
-
-
-
-
-/*****************************************************************
-* 2. Generic API routines: for general interface w/ histogram module
-*****************************************************************/
-
-/* Function: esl_exp_generic_pdf()
-* Incept: SRE, Thu Aug 25 07:58:34 2005 [St. Louis]
-*
-* Purpose: Generic-API version of PDF.
-*/
-double
-esl_exp_generic_pdf(double x, void *params)
-{
- double *p = (double *) params;
- return esl_exp_pdf(x, p[0], p[1]);
-}
-
-/* Function: esl_exp_generic_cdf()
-* Incept: SRE, Sun Aug 21 12:25:25 2005 [St. Louis]
-*
-* Purpose: Generic-API version of CDF.
-*/
-double
-esl_exp_generic_cdf(double x, void *params)
-{
- double *p = (double *) params;
- return esl_exp_cdf(x, p[0], p[1]);
-}
-
-/* Function: esl_exp_generic_surv()
-* Incept: SRE, Thu Aug 25 07:59:05 2005[St. Louis]
-*
-* Purpose: Generic-API version of survival function.
-*/
-double
-esl_exp_generic_surv(double x, void *params)
-{
- double *p = (double *) params;
- return esl_exp_surv(x, p[0], p[1]);
-}
-
-/* Function: esl_exp_generic_invcdf()
-* Incept: SRE, Sun Aug 21 12:25:59 2005 [St. Louis]
-*
-* Purpose: Generic-API version of inverse CDF.
-*/
-double
-esl_exp_generic_invcdf(double p, void *params)
-{
- double *v = (double *) params;
- return esl_exp_invcdf(p, v[0], v[1]);
-}
-/*------------------------- end of generic API --------------------------*/
-
-
-
-/****************************************************************************
-* 3. Routines for dumping plots for files
-****************************************************************************/
-
-/* Function: esl_exp_Plot()
-* Incept: SRE, Sun Aug 21 13:16:26 2005 [St. Louis]
-*
-* Purpose: Plot some exponential function <func> (for instance,
-* <esl_exp_pdf()>) for parameters <mu> and <lambda>, for
-* a range of values x from <xmin> to <xmax> in steps of <xstep>;
-* output to an open stream <fp> in xmgrace XY input format.
-*
-* Returns: <eslOK>.
-*/
-//int
-//esl_exp_Plot(FILE *fp, double mu, double lambda,
-// double (*func)(double x, double mu, double lambda),
-// double xmin, double xmax, double xstep)
-//{
-// double x;
-// for (x = xmin; x <= xmax; x += xstep)
-// fprintf(fp, "%f\t%g\n", x, (*func)(x, mu, lambda));
-// fprintf(fp, "&\n");
-// return eslOK;
-//}
-/*-------------------- end plot dumping routines ---------------------------*/
-
-
-
-/****************************************************************************
-* 4. Routines for sampling (requires augmentation w/ random module)
-****************************************************************************/
-#ifdef eslAUGMENT_RANDOM
-
-/* Function: esl_exp_Sample()
-* Incept: SRE, Wed Aug 10 10:46:51 2005 [St. Louis]
-*
-* Purpose: Sample an exponential random variate
-* by the transformation method, given offset <mu>
-* and decay parameter <lambda>.
-*/
-double
-esl_exp_Sample(ESL_RANDOMNESS *r, double mu, double lambda)
-{
- double p, x;
- p = esl_rnd_UniformPositive(r);
-
- x = mu - 1./lambda * log(p); /* really log(1-p), but if p uniform on 0..1
- * then so is 1-p.
- */
- return x;
-}
-#endif /*eslAUGMENT_RANDOM*/
-/*--------------------------- end sampling ---------------------------------*/
-
-
-
-
-/****************************************************************************
-* 5. Maximum likelihood fitting
-****************************************************************************/
-
-/* Function: esl_exp_FitComplete()
-* Incept: SRE, Wed Aug 10 10:53:47 2005 [St. Louis]
-*
-* Purpose: Given an array of <n> samples <x[0]..x[n-1]>, fit
-* them to an exponential distribution.
-* Return maximum likelihood parameters <ret_mu> and <ret_lambda>.
-*
-* Args: x - complete exponentially-distributed data [0..n-1]
-* n - number of samples in <x>
-* ret_mu - lower bound of the distribution (all x_i >= mu)
-* ret_lambda - RETURN: maximum likelihood estimate of lambda
-*
-* Returns: <eslOK> on success.
-*
-* Xref: STL9/138.
-*/
-int
-esl_exp_FitComplete(double *x, int n, double *ret_mu, double *ret_lambda)
-{
- double mu, mean;
- int i;
-
- /* ML mu is the lowest score. mu=x is ok in the exponential.
- */
- mu = x[0];
- for (i = 1; i < n; i++) if (x[i] < mu) mu = x[i];
-
- mean = 0.;
- for (i = 0; i < n; i++) mean += x[i] - mu;
- mean /= (double) n;
-
- *ret_mu = mu;
- *ret_lambda = 1./mean; /* ML estimate trivial & analytic */
- return eslOK;
-}
-
-/* Function: esl_exp_FitCompleteScale()
-* Incept: SRE, Wed Apr 25 11:18:22 2007 [Janelia]
-*
-* Purpose: Given an array of <n> samples <x[0]..x[n-1]>, fit
-* them to an exponential distribution of known location
-* parameter <mu>. Return maximum likelihood scale
-* parameter <ret_lambda>.
-*
-* All $x_i \geq \mu$.
-*
-* Args: x - complete exponentially-distributed data [0..n-1]
-* n - number of samples in <x>
-* mu - lower bound of the distribution (all x_i >= mu)
-* ret_lambda - RETURN: maximum likelihood estimate of lambda
-*
-* Returns: <eslOK> on success.
-*
-* Xref: J1/49.
-*/
-int
-esl_exp_FitCompleteScale(double *x, int n, double mu, double *ret_lambda)
-{
- double mean;
- int i;
-
- mean = 0.;
- for (i = 0; i < n; i++) mean += x[i] - mu;
- mean /= (double) n;
-
- *ret_lambda = 1./mean; /* ML estimate trivial & analytic */
- return eslOK;
-}
-
-
-#ifdef eslAUGMENT_HISTOGRAM
-/* Function: esl_exp_FitCompleteBinned()
-* Incept: SRE, Sun Aug 21 13:07:22 2005 [St. Louis]
-*
-* Purpose: Fit a complete exponential distribution to the observed
-* binned data in a histogram <g>, where each
-* bin i holds some number of observed samples x with values from
-* lower bound l to upper bound u (that is, $l < x \leq u$);
-* find maximum likelihood parameters $\mu,\lambda$ and
-* return them in <*ret_mu>, <*ret_lambda>.
-*
-* If the binned data in <g> were set to focus on
-* a tail by virtual censoring, the "complete" exponential is
-* fitted to this tail. The caller then also needs to
-* remember what fraction of the probability mass was in this
-* tail.
-*
-* The ML estimate for $mu$ is the smallest observed
-* sample. For complete data, <ret_mu> is generally set to
-* the smallest observed sample value, except in the
-* special case of a "rounded" complete dataset, where
-* <ret_mu> is set to the lower bound of the smallest
-* occupied bin. For tails, <ret_mu> is set to the cutoff
-* threshold <phi>, where we are guaranteed that <phi> is
-* at the lower bound of a bin (by how the histogram
-* object sets tails).
-*
-* The ML estimate for <ret_lambda> has an analytical
-* solution, so this routine is fast.
-*
-* If all the data are in one bin, the ML estimate of
-* $\lambda$ will be $\infty$. This is mathematically correct,
-* but is probably a situation the caller wants to avoid, perhaps
-* by choosing smaller bins.
-*
-* This function currently cannot fit an exponential tail
-* to truly censored, binned data, because it assumes that
-* all bins have equal width, but in true censored data, the
-* lower cutoff <phi> may fall anywhere in the first bin.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEINVAL> if dataset is true-censored.
-*/
-int
-esl_exp_FitCompleteBinned(ESL_HISTOGRAM *g, double *ret_mu, double *ret_lambda)
-{
- int i;
- double ai, bi, delta;
- double sa, sb;
- double mu = 0.;
-
- if (g->dataset_is == ESL_HISTOGRAM::COMPLETE)
- {
- if (g->is_rounded) mu = esl_histogram_Bin2LBound(g, g->imin);
- else mu = g->xmin;
- }
- else if (g->dataset_is == ESL_HISTOGRAM::VIRTUAL_CENSORED) /* i.e., we'll fit to tail */
- mu = g->phi;
- else if (g->dataset_is == ESL_HISTOGRAM::TRUE_CENSORED)
- ESL_EXCEPTION(eslEINVAL, "can't fit true censored dataset");
-
- delta = g->w;
- sa = sb = 0.;
- for (i = g->cmin; i <= g->imax; i++) /* for each occupied bin */
- {
- if (g->obs[i] == 0) continue;
- ai = esl_histogram_Bin2LBound(g,i);
- bi = esl_histogram_Bin2UBound(g,i);
- sa += g->obs[i] * (ai-mu);
- sb += g->obs[i] * (bi-mu);
- }
- *ret_mu = mu;
- *ret_lambda = 1/delta * (log(sb) - log(sa));
- return eslOK;
-}
-#endif /*eslAUGMENT_HISTOGRAM*/
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_exponential.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_exponential.h
deleted file mode 100644
index b6989a7..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_exponential.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* esl_exponential.h
- * Exponential distributions.
- *
- * SRE, Wed Aug 10 08:32:45 2005 [St. Louis]
- * SVN $Id: esl_exponential.h 172 2007-04-26 00:54:53Z eddys $
- */
-#ifndef ESL_EXP_INCLUDED
-#define ESL_EXP_INCLUDED
-
-#ifdef eslAUGMENT_RANDOM
-#include "esl_random.h"
-#endif
-#ifdef eslAUGMENT_HISTOGRAM
-#include <hmmer3/easel/esl_histogram.h>
-#endif
-
-extern double esl_exp_pdf (double x, double mu, double lambda);
-extern double esl_exp_logpdf (double x, double mu, double lambda);
-extern double esl_exp_cdf (double x, double mu, double lambda);
-extern double esl_exp_logcdf (double x, double mu, double lambda);
-extern double esl_exp_surv (double x, double mu, double lambda);
-extern double esl_exp_logsurv(double x, double mu, double lambda);
-extern double esl_exp_invcdf (double p, double mu, double lambda);
-
-extern double esl_exp_generic_pdf (double x, void *params);
-extern double esl_exp_generic_cdf (double x, void *params);
-extern double esl_exp_generic_surv (double x, void *params);
-extern double esl_exp_generic_invcdf(double p, void *params);
-
-//extern int esl_exp_Plot(FILE *fp, double mu, double lambda,
-// double (*func)(double x, double mu, double lambda),
-// double xmin, double xmax, double xstep);
-
-#ifdef eslAUGMENT_RANDOM
-extern double esl_exp_Sample(ESL_RANDOMNESS *r, double mu, double lambda);
-#endif
-
-extern int esl_exp_FitComplete (double *x, int n, double *ret_mu, double *ret_lambda);
-extern int esl_exp_FitCompleteScale(double *x, int n, double mu, double *ret_lambda);
-
-#ifdef eslAUGMENT_HISTOGRAM
-extern int esl_exp_FitCompleteBinned(ESL_HISTOGRAM *h,
- double *ret_mu, double *ret_lambda);
-#endif
-
-
-#endif /*ESL_EXP_INCLUDED*/
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_gumbel.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_gumbel.cpp
deleted file mode 100644
index 49f7b7a..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_gumbel.cpp
+++ /dev/null
@@ -1,920 +0,0 @@
-/* Statistical routines for Gumbel (type I extreme value) distributions.
-*
-* Contents:
-* 1. Routine for evaluating densities and distributions
-* 2. Generic API routines: for general interface w/ histogram module
-* 3. Routines for dumping plots to files
-* 4. Routines for sampling (requires random module)
-* 5. Maximum likelihood fitting to data (requires minimizer module)
-* 10. Copyright and license information
-*
-* SRE, Thu Jun 23 11:48:39 2005
-* SVN $Id: esl_gumbel.c 326 2009-02-28 15:49:07Z eddys $
-*
-* Note: SRE, Mon Aug 6 13:42:09 2007
-* ML fitting routines will be prone to over/underfitting
-* problems for scores outside a "normal" range, because
-* of exp(-lambda * x) calls. The Lawless ML estimation
-* may eventually need to be recast in log space.
-*/
-#include "esl_config.h"
-
-#include <stdio.h>
-#include <math.h>
-#include <float.h>
-
-#include "easel.h"
-#include "esl_stats.h"
-#include "esl_vectorops.h"
-#include "esl_gumbel.h"
-#ifdef eslAUGMENT_RANDOM
-#include "esl_random.h"
-#endif
-#ifdef eslAUGMENT_MINIMIZER
-#include "esl_minimizer.h"
-#endif
-
-/*****************************************************************
-* 1. Routines for evaluating densities and distributions
-*****************************************************************/
-
-/* Function: esl_gumbel_pdf()
-* Synopsis: Returns the probability density at $x$, $P(S=x)$.
-* Incept: SRE, Sun Jun 26 14:08:19 2005 [St. Louis]
-*
-* Purpose: Calculates the probability density function for the Gumbel,
-* $P(X=x)$, given quantile <x> and Gumbel location and
-* scale parameters <mu> and <lambda>.
-*
-* Let $y = \lambda(x-\mu)$; for 64-bit doubles,
-* useful dynamic range is about $-6.5 <= y <= 710$.
-* Returns 0.0 for smaller $y$, 0.0 for larger $y$.
-*/
-double
-esl_gumbel_pdf(double x, double mu, double lambda)
-{
- double y;
- y = lambda * (x - mu);
- return (lambda * exp(-y - exp(-y)));
-}
-
-
-/* Function: esl_gumbel_logpdf()
-* Synopsis: Returns the log of the pdf at $x$, $\log P(S=x)$.
-* Incept: SRE, Sun Jun 26 14:08:19 2005 [St. Louis]
-*
-* Purpose: Calculates the log probability density function for the Gumbel,
-* $\log P(X=x)$.
-*
-* Let $y = \lambda(x-\mu)$; for 64-bit doubles,
-* useful dynamic range is about $-708 <= y <= \infty$.
-* Returns $-\infty$ for smaller or larger $y$.
-*/
-double
-esl_gumbel_logpdf(double x, double mu, double lambda)
-{
- double y;
- y = lambda * (x - mu);
- return (log(lambda) -y - exp(-y));
-}
-
-
-/* Function: esl_gumbel_cdf()
-* Synopsis: Returns the cumulative distribution at $x$, $P(S \leq x)$.
-* Incept: SRE, Sun Jun 26 10:18:51 2005 [St. Louis]
-*
-* Purpose: Calculates the cumulative distribution function
-* for the Gumbel, $P(X \leq x)$.
-*
-* Let $y = \lambda(x-\mu)$; for 64-bit doubles,
-* useful dynamic range for $y$ is about $-6.5 <= y <=36$.
-* Returns 0.0 for smaller $y$, 1.0 for larger $y$.
-*/
-double
-esl_gumbel_cdf(double x, double mu, double lambda)
-{
- double y;
- y = lambda*(x-mu);
- return exp(-exp(-y));
-}
-
-/* Function: esl_gumbel_logcdf()
-* Synopsis: Returns the log of the cdf at $x$, $\log P(S \leq x)$.
-* Incept: SRE, Sun Jun 26 10:18:51 2005 [St. Louis]
-*
-* Purpose: Calculates the log of the cumulative distribution function
-* for the Gumbel, $\log P(X \leq x)$.
-*
-* Let $y = \lambda(x-\mu)$; for 64-bit doubles,
-* useful dynamic range for $y$ is about $-708 <= y <= 708$.
-* Returns $-\infty$ for smaller $y$, 0.0 for larger $y$.
-*/
-double
-esl_gumbel_logcdf(double x, double mu, double lambda)
-{
- double y;
- y = lambda*(x-mu);
- return (-exp(-y));
-}
-
-/* Function: esl_gumbel_surv()
-* Synopsis: Returns right tail mass above $x$, $P(S > x)$.
-* Incept: SRE, Sun Jun 26 09:54:31 2005 [St. Louis]
-*
-* Purpose: Calculates the survivor function, $P(X>x)$ for a Gumbel
-* (that is, 1-cdf), the right tail's probability mass.
-*
-* Let $y=\lambda(x-\mu)$; for 64-bit doubles,
-* useful dynamic range for $y$ is $-3.6 <= y <= 708$.
-* Returns 1.0 for $y$ below lower limit, and 0.0
-* for $y$ above upper limit.
-*/
-double
-esl_gumbel_surv(double x, double mu, double lambda)
-{
- double y = lambda*(x-mu);
- double ey = -exp(-y);
-
- /* Use 1-e^x ~ -x approximation here when e^-y is small. */
- if (fabs(ey) < eslSMALLX1) return -ey;
- else return 1 - exp(ey);
-}
-
-/* Function: esl_gumbel_logsurv()
-* Synopsis: Returns log survival at $x$, $\log P(S > x)$.
-* Incept: SRE, Sun Jun 26 13:45:52 2005 [St. Louis]
-*
-* Purpose: Calculates $\log P(X>x)$ for a Gumbel (that is, $\log$(1-cdf)):
-* the log of the right tail's probability mass.
-*
-* Let $y=\lambda(x-\mu)$; for 64-bit doubles,
-* useful dynamic range for $y$ is $-6.5 <= y <= \infty$.
-* Returns 0.0 for smaller $y$.
-*/
-double
-esl_gumbel_logsurv(double x, double mu, double lambda)
-{
- double y = lambda*(x-mu);
- double ey = -exp(-y);
-
- /* The real calculation is log(1-exp(-exp(-y))).
- * For "large" y, -exp(-y) is small, so 1-exp(-exp(-y) ~ exp(-y),
- * and log of that gives us -y.
- * For "small y", exp(-exp(-y) is small, and we can use log(1-x) ~ -x.
- */
- if (fabs(ey) < eslSMALLX1) return -y;
- else if (fabs(exp(ey)) < eslSMALLX1) return -exp(ey);
- else return log(1-exp(ey));
-}
-
-/* Function: esl_gumbel_invcdf()
-* Incept: SRE, Sun Aug 21 12:14:06 2005 [St. Louis]
-*
-* Purpose: Calculates the inverse CDF for a Gumbel distribution
-* with parameters <mu> and <lambda>. That is, returns
-* the quantile <x> at which the CDF is <p>.
-*/
-double
-esl_gumbel_invcdf(double p, double mu, double lambda)
-{
- return mu - log(-1. * log(p)) / lambda;
-}
-/*------------------ end of densities and distributions --------------------*/
-
-
-/*****************************************************************
-* 2. Generic API routines: for general interface w/ histogram module
-*****************************************************************/
-
-/* Function: esl_gumbel_generic_pdf()
-* Incept: SRE, Thu Aug 25 07:56:04 2005 [St. Louis]
-*
-* Purpose: Generic-API version of PDF function.
-*/
-double
-esl_gumbel_generic_pdf(double p, void *params)
-{
- double *v = (double *) params;
- return esl_gumbel_pdf(p, v[0], v[1]);
-}
-
-/* Function: esl_gumbel_generic_cdf()
-* Incept: SRE, Sun Aug 21 12:10:49 2005 [St. Louis]
-*
-* Purpose: Generic-API version of CDF function.
-*/
-double
-esl_gumbel_generic_cdf(double x, void *params)
-{
- double *p = (double *) params;
- return esl_gumbel_cdf(x, p[0], p[1]);
-}
-
-/* Function: esl_gumbel_generic_surv()
-* Incept: SRE, Thu Aug 25 07:56:04 2005 [St. Louis]
-*
-* Purpose: Generic-API version of survival function.
-*/
-double
-esl_gumbel_generic_surv(double p, void *params)
-{
- double *v = (double *) params;
- return esl_gumbel_surv(p, v[0], v[1]);
-}
-
-/* Function: esl_gumbel_generic_invcdf()
-* Incept: SRE, Sun Aug 21 12:12:27 2005 [St. Louis]
-*
-* Purpose: Generic-API version of inverse CDF.
-*/
-double
-esl_gumbel_generic_invcdf(double p, void *params)
-{
- double *v = (double *) params;
- return esl_gumbel_invcdf(p, v[0], v[1]);
-}
-
-
-/*------------------------- end of generic API --------------------------*/
-
-
-
-/****************************************************************************
-* 3. Routines for dumping plots for files
-****************************************************************************/
-
-/* Function: esl_gumbel_Plot()
-* Synopsis: Plot a Gumbel function in XMGRACE XY format.
-* Incept: SRE, Sun Aug 21 13:21:37 2005 [St. Louis]
-*
-* Purpose: Plot a Gumbel function <func> (for instance,
-* <esl_gumbel_pdf()>) for parameters <mu> and <lambda>, for
-* a range of quantiles x from <xmin> to <xmax> in steps of <xstep>;
-* output to an open stream <fp> in xmgrace XY input format.
-*
-* Returns: <eslOK>.
-*/
-int
-esl_gumbel_Plot(FILE *fp, double mu, double lambda,
- double (*func)(double x, double mu, double lambda),
- double xmin, double xmax, double xstep)
-{
- double x;
- for (x = xmin; x <= xmax; x += xstep)
- fprintf(fp, "%f\t%g\n", x, (*func)(x, mu, lambda));
- fprintf(fp, "&\n");
- return eslOK;
-}
-/*-------------------- end plot dumping routines ---------------------------*/
-
-
-
-/*****************************************************************
-* 4. Routines for sampling (requires augmentation w/ random module)
-*****************************************************************/
-
-#ifdef eslAUGMENT_RANDOM
-/* Function: esl_gumbel_Sample()
-* Synopsis: Return a Gumbel-distributed random sample $x$.
-* Incept: SRE, Thu Jun 23 11:38:39 2005 [St. Louis]
-*
-* Purpose: Sample a Gumbel-distributed random variate
-* by the transformation method.
-*/
-double
-esl_gumbel_Sample(ESL_RANDOMNESS *r, double mu, double lambda)
-{
- double p;
- p = esl_rnd_UniformPositive(r);
- return esl_gumbel_invcdf(p, mu, lambda);
-}
-#endif /*eslAUGMENT_RANDOM*/
-
-/*------------------------ end of sampling --------------------------------*/
-
-
-
-/*****************************************************************
-* 5. Routines for maximum likelihood fitting Gumbels to data
-* (fitting truncated distributions requires augmentation w/ minimizer module)
-*****************************************************************/
-
-/*****************************************************************
-* Complete data, maximum a posteriori parameters
-*****************************************************************/
-
-/* lawless416()
-* SRE, Thu Nov 13 11:48:50 1997 [St. Louis]
-*
-* Purpose: Equation 4.1.6 from [Lawless82], pg. 143, and
-* its first derivative with respect to lambda,
-* for finding the ML fit to Gumbel lambda parameter.
-* This equation gives a result of zero for the maximum
-* likelihood lambda.
-*
-* Args: x - array of sample values
-* n - number of samples
-* lambda - a lambda to test
-* ret_f - RETURN: 4.1.6 evaluated at lambda
-* ret_df - RETURN: first derivative of 4.1.6 evaluated at lambda
-*
-* Return: (void)
-*/
-static void
-lawless416(double *x, int n, double lambda, double *ret_f, double *ret_df)
-{
- double esum; /* \sum e^(-lambda xi) */
- double xesum; /* \sum xi e^(-lambda xi) */
- double xxesum; /* \sum xi^2 e^(-lambda xi) */
- double xsum; /* \sum xi */
- int i;
-
- esum = xesum = xsum = xxesum = 0.;
- for (i = 0; i < n; i++)
- {
- xsum += x[i];
- xesum += x[i] * exp(-1. * lambda * x[i]);
- xxesum += x[i] * x[i] * exp(-1. * lambda * x[i]);
- esum += exp(-1. * lambda * x[i]);
- }
- *ret_f = (1./lambda) - (xsum / n) + (xesum / esum);
- *ret_df = ((xesum / esum) * (xesum / esum))
- - (xxesum / esum)
- - (1. / (lambda * lambda));
-}
-
-/* Function: esl_gumbel_FitComplete()
-* Synopsis: Estimates $\mu$, $\lambda$ from complete data.
-* Date: SRE, Fri Nov 14 07:56:29 1997 [St. Louis] - HMMER's EVDMaxLikelyFit()
-*
-* Purpose: Given an array of Gumbel-distributed samples <x[0]..x[n-1]>,
-* find maximum likelihood parameters <mu> and <lambda>.
-*
-* Algorithm: Uses approach described in [Lawless82]. Solves
-* for lambda using Newton/Raphson iterations,
-* then substitutes lambda into Lawless' equation 4.1.5
-* to get mu.
-*
-* Args: x - list of Gumbel distributed samples
-* n - number of samples
-* ret_mu : RETURN: ML estimate of mu
-* ret_lambda : RETURN: ML estimate of lambda
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslENOHALT> if the fit doesn't converge.
-*/
-int
-esl_gumbel_FitComplete(double *x, int n, double *ret_mu, double *ret_lambda)
-{
- double variance;
- double lambda, mu;
- double fx; /* f(x) */
- double dfx; /* f'(x) */
- double esum; /* \sum e^(-lambda xi) */
- double tol = 1e-5;
- int i;
-
- /* 1. Find an initial guess at lambda
- * (Evans/Hastings/Peacock, Statistical Distributions, 2000, p.86)
- */
- esl_stats_DMean(x, n, NULL, &variance);
- lambda = eslCONST_PI / sqrt(6.*variance);
-
- /* 2. Use Newton/Raphson to solve Lawless 4.1.6 and find ML lambda
- */
- for (i = 0; i < 100; i++)
- {
- lawless416(x, n, lambda, &fx, &dfx);
- if (fabs(fx) < tol) break; /* success */
- lambda = lambda - fx / dfx; /* Newton/Raphson is simple */
- if (lambda <= 0.) lambda = 0.001; /* but be a little careful */
- }
-
- /* 2.5: If we did 100 iterations but didn't converge, Newton/Raphson failed.
- * Resort to a bisection search. Worse convergence speed
- * but guaranteed to converge (unlike Newton/Raphson).
- * We assume that fx is a monotonically decreasing function of x;
- * i.e. fx > 0 if we are left of the root, fx < 0 if we
- * are right of the root.
- */
- if (i == 100)
- {
- double left, right, mid;
- ESL_DPRINTF1(("esl_gumbel_FitComplete(): Newton/Raphson failed; switchover to bisection"));
-
- /* First bracket the root */
- left = 0.; /* for sure */
- right = eslCONST_PI / sqrt(6.*variance); /* an initial guess */
- lawless416(x, n, lambda, &fx, &dfx);
- while (fx > 0.)
- {
- right *= 2.; /* arbitrary leap to the right */
- if (right > 100.) /* no reasonable lambda should be > 100, we assert */
- ESL_EXCEPTION(eslENOHALT, "Failed to bracket root in esl_gumbel_FitComplete().");
- lawless416(x, n, right, &fx, &dfx);
- }
-
- /* Now, bisection search in left/right interval */
- for (i = 0; i < 100; i++)
- {
- mid = (left + right) / 2.;
- lawless416(x, n, mid, &fx, &dfx);
- if (fabs(fx) < tol) break; /* success */
- if (fx > 0.) left = mid;
- else right = mid;
- }
- if (i == 100)
- ESL_EXCEPTION(eslENOHALT, "Even bisection search failed in esl_gumbel_FitComplete().");
-
- lambda = mid;
- }
-
- /* 3. Substitute into Lawless 4.1.5 to find mu
- */
- esum = 0.;
- for (i = 0; i < n; i++)
- esum += exp(-lambda * x[i]);
- mu = -log(esum / n) / lambda;
-
- *ret_lambda = lambda;
- *ret_mu = mu;
- return eslOK;
-}
-
-/* Function: esl_gumbel_FitCompleteLoc()
-* Synopsis: Estimates $\mu$ from complete data, given $\lambda$.
-* Incept: SRE, Thu Nov 24 09:09:17 2005 [St. Louis]
-*
-* Purpose: Given an array of Gumbel-distributed samples
-* <x[0]..x[n-1]> (complete data), and a known
-* (or otherwise fixed) <lambda>, find a maximum
-* likelihood estimate for location parameter <mu>.
-*
-* Algorithm: A straightforward simplification of FitComplete().
-*
-* Args: x - list of Gumbel distributed samples
-* n - number of samples
-* lambda - known lambda (scale) parameter
-* ret_mu : RETURN: ML estimate of mu
-*
-* Returns: <eslOK> on success.
-*
-* Throws: (no abnormal error conditions)
-*
-* Note: Here and in FitComplete(), we have a potential
-* under/overflow problem. We ought to be doing the
-* esum in log space.
-*/
-int
-esl_gumbel_FitCompleteLoc(double *x, int n, double lambda, double *ret_mu)
-{
- double esum;
- int i;
-
- /* Substitute into Lawless 4.1.5 to find mu */
- esum = 0.;
- for (i = 0; i < n; i++)
- esum += exp(-lambda * x[i]);
- *ret_mu = -log(esum / n) / lambda;
- return eslOK;
-
-#if 0
- /* Replace the code above w/ code below to test the direct method. */
- double mean, variance;
- esl_stats_DMean(x, n, &mean, &variance);
- *ret_mu = mean - 0.57722/lambda;
- return eslOK;
-#endif
-}
-
-
-#if eslDEBUGLEVEL >=3
-/* direct_mv_fit()
-* SRE, Wed Jun 29 08:23:47 2005
-*
-* Purely for curiousity: a complete data fit using the
-* simple direct method, calculating mu and lambda from mean
-* and variance.
-*/
-static int
-direct_mv_fit(double *x, int n, double *ret_mu, double *ret_lambda)
-{
- double mean, variance;
-
- esl_stats_DMean(x, n, &mean, &variance);
- *ret_lambda = eslCONST_PI / sqrt(6.*variance);
- *ret_mu = mean - 0.57722/(*ret_lambda);
- return eslOK;
-}
-#endif
-
-/*------------------- end of complete data fit ---------------------------------*/
-
-
-/*****************************************************************
-* Censored data, MAP/ML parameters
-*****************************************************************/
-/* lawless422()
-* SRE, Mon Nov 17 09:42:48 1997 [St. Louis]
-*
-* Purpose: Equation 4.2.2 from [Lawless82], pg. 169, and
-* its first derivative with respect to lambda,
-* for finding the ML fit to Gumbel lambda parameter
-* for Type I censored data.
-* This equation gives a result of zero for the maximum
-* likelihood lambda.
-*
-* Args: x - array of observed sample values
-* n - number of observed samples
-* z - number of censored samples = N-n
-* phi - censoring value; all observed x_i >= phi
-* lambda - a lambda to test
-* ret_f - RETURN: 4.2.2 evaluated at lambda
-* ret_df - RETURN: first derivative of 4.2.2 evaluated at lambda
-*
-* Return: (void)
-*/
-static void
-lawless422(double *x, int n, int z, double phi,
- double lambda, double *ret_f, double *ret_df)
-{
- double esum; /* \sum e^(-lambda xi) + z term */
- double xesum; /* \sum xi e^(-lambda xi) + z term */
- double xxesum; /* \sum xi^2 e^(-lambda xi) + z term */
- double xsum; /* \sum xi (no z term) */
- int i;
-
- esum = xesum = xsum = xxesum = 0.;
- for (i = 0; i < n; i++)
- {
- xsum += x[i];
- esum += exp(-1. * lambda * x[i]);
- xesum += x[i] * exp(-1. * lambda * x[i]);
- xxesum += x[i] * x[i] * exp(-1. * lambda * x[i]);
- }
-
- /* Add z terms for censored data
- */
- esum += (double) z * exp(-1. * lambda * phi);
- xesum += (double) z * phi * exp(-1. * lambda * phi);
- xxesum += (double) z * phi * phi * exp(-1. * lambda * phi);
-
- *ret_f = 1./lambda - xsum / n + xesum / esum;
- *ret_df = ((xesum / esum) * (xesum / esum))
- - (xxesum / esum)
- - (1. / (lambda * lambda));
-
- return;
-}
-
-/* Function: esl_gumbel_FitCensored()
-* Synopsis: Estimates $\mu$, $\lambda$ from censored data.
-* Date: SRE, Mon Nov 17 10:01:05 1997 [St. Louis]
-*
-* Purpose: Given a left-censored array of Gumbel-distributed samples
-* <x[0]..x[n-1]>, the number of censored samples <z>, and the
-* censoring value <phi> (all <x[i]> $>$ <phi>).
-* Find maximum likelihood parameters <mu> and <lambda>.
-*
-* Algorithm: Uses approach described in [Lawless82]. Solves
-* for lambda using Newton/Raphson iterations;
-* then substitutes lambda into Lawless' equation 4.2.3
-* to get mu.
-*
-* Args: x - array of Gumbel-distributed samples, 0..n-1
-* n - number of observed samples
-* z - number of censored samples
-* phi - censoring value (all x_i >= phi)
-* ret_mu : RETURN: ML estimate of mu
-* ret_lambda : RETURN: ML estimate of lambda
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslENOHALT> if the fit doesn't converge.
-*/
-int
-esl_gumbel_FitCensored(double *x, int n, int z, double phi,
- double *ret_mu, double *ret_lambda)
-{
- double variance;
- double lambda, mu;
- double fx; /* f(x) */
- double dfx; /* f'(x) */
- double esum; /* \sum e^(-lambda xi) */
- double tol = 1e-5;
- int i;
-
- /* 1. Find an initial guess at lambda
- * (Evans/Hastings/Peacock, Statistical Distributions, 2000, p.86)
- */
- esl_stats_DMean(x, n, NULL, &variance);
- lambda = eslCONST_PI / sqrt(6.*variance);
-
- /* 2. Use Newton/Raphson to solve Lawless 4.2.2 and find ML lambda
- */
- for (i = 0; i < 100; i++)
- {
- lawless422(x, n, z, phi, lambda, &fx, &dfx);
- if (fabs(fx) < tol) break; /* success */
- lambda = lambda - fx / dfx; /* Newton/Raphson is simple */
- if (lambda <= 0.) lambda = 0.001; /* but be a little careful */
- }
-
- /* 2.5: If we did 100 iterations but didn't converge, Newton/Raphson failed.
- * Resort to a bisection search. Worse convergence speed
- * but guaranteed to converge (unlike Newton/Raphson).
- * We assume (!?) that fx is a monotonically decreasing function of x;
- * i.e. fx > 0 if we are left of the root, fx < 0 if we
- * are right of the root.
- */
- if (i == 100)
- {
- double left, right, mid;
- ESL_DPRINTF1(("esl_gumbel_FitCensored(): Newton/Raphson failed; switched to bisection"));
-
- /* First bracket the root */
- left = 0.; /* we know that's the left bound */
- right = eslCONST_PI / sqrt(6.*variance); /* start from here, move "right"... */
- lawless422(x, n, z, phi, right, &fx, &dfx);
- while (fx > 0.)
- {
- right *= 2.;
- if (right > 100.) /* no reasonable lambda should be > 100, we assert */
- ESL_EXCEPTION(eslENOHALT, "Failed to bracket root in esl_gumbel_FitCensored().");
- lawless422(x, n, z, phi, right, &fx, &dfx);
- }
-
- /* Now we bisection search in left/right interval */
- for (i = 0; i < 100; i++)
- {
- mid = (left + right) / 2.;
- lawless422(x, n, z, phi, mid, &fx, &dfx);
- if (fabs(fx) < tol) break; /* success */
- if (fx > 0.) left = mid;
- else right = mid;
- }
- if (i == 100)
- ESL_EXCEPTION(eslENOHALT, "Even bisection search failed in esl_gumbel_FitCensored().");
- lambda = mid;
- }
-
- /* 3. Substitute into Lawless 4.2.3 to find mu
- */
- esum = 0.;
- for (i = 0; i < n; i++)
- esum += exp(-lambda * x[i]);
- esum += z * exp(-1. * lambda * phi); /* term from censored data */
- mu = -log(esum / n) / lambda;
-
- *ret_lambda = lambda;
- *ret_mu = mu;
- return eslOK;
-}
-
-
-/* Function: esl_gumbel_FitCensoredLoc()
-* Synopsis: Estimates $\mu$ from censored data, given $\lambda$.
-* Incept: SRE, Mon Feb 6 11:33:10 2006 [St. Louis]
-*
-* Purpose: Given a left-censored array of Gumbel distributed samples
-* <x[0>..x[n-1]>, the number of censored samples <z>, and the censoring
-* value <phi> (where all <x[i]> $>$ <phi>), and a known
-* (or at least fixed) <lambda>;
-* find the maximum likelihood estimate of the location
-* parameter $\mu$ and return it in <ret_mu>.
-*
-* Note: A straightforward simplification of FitCensored().
-*
-* Args: x - array of Gumbel-distributed samples, 0..n-1
-* n - number of observed samples
-* z - number of censored samples
-* phi - censoring value (all x_i >= phi)
-* lambda - known scale parameter $\lambda$
-* ret_mu : RETURN: ML estimate of $\mu$
-*
-* Returns: <eslOK> on success.
-*
-* Throws: (no abnormal error conditions)
-*/
-int
-esl_gumbel_FitCensoredLoc(double *x, int n, int z, double phi, double lambda,
- double *ret_mu)
-{
- double esum;
- int i;
-
- /* Immediately substitute into Lawless 4.2.3 to find mu, because
- * lambda is known.
- */
- esum = 0.;
- for (i = 0; i < n; i++) /* contribution from observed data */
- esum += exp(-lambda * x[i]);
- esum += z * exp(-1. * lambda * phi); /* term from censored data */
- *ret_mu = -log(esum / (double) n) / lambda;
- return eslOK;
-}
-
-
-/*****************************************************************
-* Truncated data, MAP parameters (requires minimizer augmentation)
-*****************************************************************/
-#ifdef eslAUGMENT_MINIMIZER
-/* Easel's conjugate gradient descent code allows a single void ptr to
-* point to any necessary fixed data, so we'll put everything into one
-* structure:
-*/
-struct tevd_data {
- double *x; /* data: n observed samples from a Gumbel */
- int n; /* number of observed samples */
- double phi; /* truncation threshold: all observed x_i >= phi */
-};
-
-/* tevd_func()
-*
-* Called by the optimizer: evaluate the objective function
-* for the negative posterior log probability of a particular choice
-* of parameters mu and lambda, given truncated Gumbel samples.
-*/
-static double
-tevd_func(double *p, int nparam, void *dptr)
-{
- double mu, w, lambda;
- struct tevd_data *data;
- double *x;
- int n;
- double phi;
- double logL;
- int i;
-
- /* unpack what the optimizer gave us; nparam==2 always
- */
- mu = p[0];
- w = p[1];
- lambda = exp(w);
- data = (struct tevd_data *) dptr;
- x = data->x;
- n = data->n;
- phi = data->phi;
-
- /* The log likelihood equation
- */
- logL = n * log(lambda);
- for (i = 0; i < n; i++)
- logL -= lambda * (x[i] - mu);
- for (i = 0; i < n; i++)
- logL -= exp(-1. * lambda * (x[i] - mu));
- logL -= n * esl_gumbel_logsurv(phi, mu, lambda);
-
- return -1.0 * logL; /* objective: minimize the NLP */
-}
-
-/* tevd_grad()
-*
-* Called by the optimizer: evaluate the gradient of the objective
-* function (the negative posterior log probability of the parameters
-* mu and w, where w = log(lambda), at a particular choice of mu and
-* lambda.
-*/
-static void
-tevd_grad(double *p, int nparam, void *dptr, double *dp)
-{
- double mu, lambda, w;
- struct tevd_data *data;
- double *x;
- int n;
- double phi;
- double dmu, dw;
- double coeff;
- int i;
-
- /* unpack what the optimizer gave us; nparam==2 always
- */
- mu = p[0];
- w = p[1];
- lambda = exp(w);
- data = (struct tevd_data *) dptr;
- x = data->x;
- n = data->n;
- phi = data->phi;
-
- /* Both partials include a coefficient that
- * basically looks like P(S=phi) / P(S>=phi); pre-calculate it.
- * Watch out when phi >> mu, which'll give us 0/0; instead,
- * recognize that for phi >> mu, coeff converges to \lambda.
- */
- if (lambda*(phi-mu) > 50.) /* arbitrary crossover. */
- coeff = lambda;
- else
- coeff = esl_gumbel_pdf(phi, mu, lambda) / esl_gumbel_surv(phi, mu, lambda);
-
- /* Partial derivative w.r.t. mu.
- */
- dmu = n * lambda;
- for (i = 0; i < n; i++)
- dmu -= lambda * exp(-1. * lambda * (x[i] - mu));
- dmu -= n * coeff;
-
- /* Partial derivative w.r.t. w=log(lambda).
- */
- dw = n;
- for (i = 0; i < n; i++) dw -= (x[i] - mu) * lambda;
- for (i = 0; i < n; i++) dw += (x[i] - mu) * lambda * exp(-1. * lambda * (x[i] - mu));
- dw += n * (phi - mu) * coeff;
-
- /* Return the negative, because we're minimizing NLP, not maximizing.
- */
- dp[0] = -1. * dmu; /* negative because we're minimizing NLP, not maximizing */
- dp[1] = -1. * dw;
- return;
-}
-
-/* Function: esl_gumbel_FitTruncated()
-* Synopsis: Estimates $\mu$, $\lambda$ from truncated data.
-* Incept: SRE, Wed Jun 29 14:14:17 2005 [St. Louis]
-*
-* Purpose: Given a left-truncated array of Gumbel-distributed
-* samples <x[0]..x[n-1]> and the truncation threshold
-* <phi> (such that all <x[i]> $\geq$ <phi>).
-* Find maximum likelihood parameters <mu> and <lambda>.
-*
-* <phi> should not be much greater than <mu>, the
-* mode of the Gumbel, or the fit will become unstable
-* or may even fail to converge. The problem is
-* that for <phi> $>$ <mu>, the tail of the Gumbel
-* becomes a scale-free exponential, and <mu> becomes
-* undetermined.
-*
-* Algorithm: Uses conjugate gradient descent to optimize the
-* log likelihood of the data. Follows a general
-* approach to fitting missing data problems outlined
-* in [Gelman95].
-*
-* Args: x - observed data samples [0..n-1]
-* n - number of samples
-* phi - truncation threshold; all x[i] >= phi
-* ret_mu - RETURN: ML estimate of mu
-* ret_lambda - RETURN: ML estimate of lambda
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslENOHALT> if the fit doesn't converge.
-*/
-int
-esl_gumbel_FitTruncated(double *x, int n, double phi,
- double *ret_mu, double *ret_lambda)
-{
- struct tevd_data data;
- double wrk[8]; /* workspace for CG: 4 tmp vectors of size 2 */
- double p[2]; /* mu, w; lambda = e^w */
- double u[2]; /* max initial step size for mu, lambda */
- int status;
- double mean, variance;
- double mu, lambda;
- double fx;
-
- data.x = x;
- data.n = n;
- data.phi = phi;
-
- /* The source of the following magic is Evans/Hastings/Peacock,
- * Statistical Distributions, 3rd edition (2000), p.86, which gives
- * eq's for the mean and variance of a Gumbel in terms of mu and lambda;
- * we turn them around to get mu and lambda in terms of the mean and variance.
- * These would be reasonable estimators if we had a full set of Gumbel
- * distributed variates. They'll be off for a truncated sample, but
- * close enough to be a useful starting point.
- */
- esl_stats_DMean(x, n, &mean, &variance);
- lambda = eslCONST_PI / sqrt(6.*variance);
- mu = mean - 0.57722/lambda;
-
- p[0] = mu;
- p[1] = log(lambda); /* c.o.v. because lambda is constrained to >0 */
-
- u[0] = 2.0;
- u[1] = 0.1;
-
- /* Pass the problem to the optimizer. The work is done by the
- * equations in tevd_func() and tevd_grad().
- */
- status = esl_min_ConjugateGradientDescent(p, u, 2,
- &tevd_func, &tevd_grad,(void *)(&data),
- 1e-4, wrk, &fx);
-
- *ret_mu = p[0];
- *ret_lambda = exp(p[1]); /* reverse the c.o.v. */
- return status;
-}
-#endif /*eslAUGMENT_MINIMIZER*/
-/*------------------------ end of fitting --------------------------------*/
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_gumbel.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_gumbel.h
deleted file mode 100644
index 06beb5a..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_gumbel.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/* esl_gumbel.h
-* Gumbel (type I extreme value) distributions.
-*
-* SRE, Mon Jun 27 08:44:41 2005 [St. Louis]
-* SVN $Id: esl_gumbel.h 93 2006-02-26 18:15:26Z eddy $
-*/
-#ifndef ESL_GUMBEL_INCLUDED
-#define ESL_GUMBEL_INCLUDED
-
-#ifdef eslAUGMENT_RANDOM
-#include <hmmer3/easel/esl_random.h>
-#endif
-
-extern double esl_gumbel_pdf (double x, double mu, double lambda);
-extern double esl_gumbel_logpdf (double x, double mu, double lambda);
-extern double esl_gumbel_cdf (double x, double mu, double lambda);
-extern double esl_gumbel_logcdf (double x, double mu, double lambda);
-extern double esl_gumbel_surv (double x, double mu, double lambda);
-extern double esl_gumbel_logsurv(double x, double mu, double lambda);
-extern double esl_gumbel_invcdf (double p, double mu, double lambda);
-
-extern double esl_gumbel_generic_pdf (double x, void *params);
-extern double esl_gumbel_generic_cdf (double x, void *params);
-extern double esl_gumbel_generic_surv (double x, void *params);
-extern double esl_gumbel_generic_invcdf(double p, void *params);
-
-//extern int esl_gumbel_Plot(FILE *fp, double mu, double lambda,
-// double (*func)(double x, double mu, double lambda),
-// double xmin, double xmax, double xstep);
-
-#ifdef eslAUGMENT_RANDOM
-extern double esl_gumbel_Sample(ESL_RANDOMNESS *r, double mu, double lambda);
-#endif
-
-extern int esl_gumbel_FitComplete(double *x, int n,
- double *ret_mu, double *ret_lambda);
-extern int esl_gumbel_FitCompleteLoc(double *x, int n, double lambda,
- double *ret_mu);
-extern int esl_gumbel_FitCensored(double *x, int n, int z, double phi,
- double *ret_mu, double *ret_lambda);
-extern int esl_gumbel_FitCensoredLoc(double *x, int n, int z, double phi,
- double lambda, double *ret_mu);
-#ifdef eslAUGMENT_MINIMIZER
-extern int esl_gumbel_FitTruncated(double *x, int n, double phi,
- double *ret_mu, double *ret_lambda);
-#endif
-
-
-#endif /*ESL_GUMBEL_INCLUDED*/
-
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_histogram.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_histogram.cpp
deleted file mode 100644
index e8ba93f..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_histogram.cpp
+++ /dev/null
@@ -1,1684 +0,0 @@
-/* Collecting and displaying histograms.
- *
- * 1. Creating/destroying histograms and collecting data.
- * 2. Declarations about the binned data before parameter fitting.
- * 3. Routines for accessing data samples in a full histogram.
- * 4. Setting expected counts
- * 5. Output and display of binned data.
- * 6. Test driver.
- * 7. Examples.
- *
- * SRE, Fri Jul 1 13:21:45 2005 [St. Louis]
- * SVN $Id: esl_histogram.c 326 2009-02-28 15:49:07Z eddys $
- */
-#include "esl_config.h"
-
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#include <float.h>
-#include <limits.h>
-
-#include "easel.h"
-#include "esl_stats.h"
-#include "esl_histogram.h"
-#include "esl_vectorops.h"
-
-static int esl_histogram_sort(ESL_HISTOGRAM *h);
-
-
-/*****************************************************************
- * 1. Creating/destroying histograms and collecting data.
- *****************************************************************/
-
-/* Function: esl_histogram_Create()
- * Synopsis: Create a new <ESL_HISTOGRAM>.
- * Incept: SRE, Fri Jul 1 13:40:26 2005 [St. Louis]
- *
- * Purpose: Creates and returns a new histogram object, initially
- * allocated to count scores $>$ <xmin> and $<=$ <xmax> into
- * bins of width <w>. Thus, a total of <xmax>-<xmin>/<w> bins
- * are initially created.
- *
- * The lower bound <xmin> and the width <w> permanently
- * determine the offset and width of the binning, but not
- * the range. For example, <esl_histogram_Create(-100,
- * 100, 0.5)> would init the object to collect scores into
- * 400 bins $[-100< x \leq -99.5],[-99.5 < x \leq
- * -99.0]...[99.5 <x \leq 100.0]$. Aside from this, the
- * range specified by the bounds <xmin> and <xmax> only
- * needs to be an initial guess. The histogram object will
- * reallocate itself dynamically as needed to accommodate
- * scores that exceed current bounds.
- *
- * You can be sloppy about <xmax>; it does not have to
- * exactly match a bin upper bound. The initial allocation
- * is for all full-width bins with upper bounds $\leq
- * xmax$.
- *
- * <esl_histogram_Create()> creates a simplified histogram
- * object that collates only the "display" histogram. For
- * a more complex object that also keeps the raw data samples,
- * better suited for fitting distributions and goodness-of-fit
- * testing, use <esl_histogram_CreateFull()>.
- *
- * Args: xmin - caller guesses that minimum score will be > xmin
- * xmax - caller guesses that max score will be <= xmax
- * w - size of bins (1.0, for example)
- *
- * Returns: ptr to new <ESL_HISTOGRAM> object, which caller is responsible
- * for free'ing with <esl_histogram_Destroy()>.
- *
- * Throws: <NULL> on allocation failure.
- */
-ESL_HISTOGRAM *
-esl_histogram_Create(double xmin, double xmax, double w)
-{
- ESL_HISTOGRAM *h = NULL;
- int status;
- int i;
-
- ESL_ALLOC_WITH_TYPE(h, ESL_HISTOGRAM*, sizeof(ESL_HISTOGRAM));
-
- h->xmin = DBL_MAX; /* xmin/xmax are the observed min/max */
- h->xmax = -DBL_MAX;
- h->n = 0;
- h->obs = NULL; /* will get allocated below... */
- h->bmin = xmin; /* bmin/bmax are the allocated bounds */
- h->bmax = xmax;
- h->nb = (int)((xmax-xmin)/w);
- h->imin = h->nb;
- h->imax = -1;
- h->w = w;
-
- h->x = NULL;
- h->nalloc = 0;
-
- h->phi = 0.;
- h->cmin = h->imin; /* sentinel: no observed data yet */
- h->z = 0;
- h->Nc = 0;
- h->No = 0;
-
- h->expect = NULL; /* 'til a Set*() call */
- h->emin = -1; /* sentinel: no expected counts yet */
- h->tailbase = 0.; /* unused unless is_tailfit TRUE */
- h->tailmass = 1.0; /* <= 1.0 if is_tailfit TRUE */
-
- h->is_full = FALSE;
- h->is_done = FALSE;
- h->is_sorted = FALSE;
- h->is_tailfit = FALSE;
- h->is_rounded = FALSE;
- h->dataset_is = ESL_HISTOGRAM::COMPLETE;
-
- ESL_ALLOC_WITH_TYPE(h->obs, uint64_t*, sizeof(uint64_t) * h->nb);
- for (i = 0; i < h->nb; i++) h->obs[i] = 0;
- return h;
-
- ERROR:
- esl_histogram_Destroy(h);
- return NULL;
-}
-
-/* Function: esl_histogram_CreateFull()
- * Synopsis: A <ESL_HISTOGRAM> to keep all data samples.
- * Incept: SRE, Tue Jul 26 13:19:27 2005 [St. Louis]
- *
- * Purpose: Alternative form of <esl_histogram_Create()> that
- * creates a more complex histogram that will contain not just the
- * display histogram, but also keeps track of all
- * the raw sample values. Having a complete vector of raw
- * samples improves distribution-fitting and goodness-of-fit
- * tests, but will consume more memory.
- */
-ESL_HISTOGRAM *
-esl_histogram_CreateFull(double xmin, double xmax, double w)
-{
- int status;
- ESL_HISTOGRAM *h = esl_histogram_Create(xmin, xmax, w);
- if (h == NULL) return NULL;
-
- h->n = 0; /* make sure */
- h->nalloc = 128; /* arbitrary initial allocation size */
- ESL_ALLOC_WITH_TYPE(h->x, double*, sizeof(double) * h->nalloc);
- h->is_full = TRUE;
- return h;
-
- ERROR:
- esl_histogram_Destroy(h);
- return NULL;
-}
-
-
-/* Function: esl_histogram_Destroy()
- * Synopsis: Frees a <ESL_HISTOGRAM>.
- * Incept: SRE, Sat Jul 2 19:41:17 2005 [St. Louis]
- *
- * Purpose: Frees an <ESL_HISTOGRAM> object <h>.
- */
-void
-esl_histogram_Destroy(ESL_HISTOGRAM *h)
-{
- if (h == NULL) return;
- if (h->x != NULL) free(h->x);
- if (h->obs != NULL) free(h->obs);
- if (h->expect != NULL) free(h->expect);
- free(h);
- return;
-}
-
-/* Function: esl_histogram_Score2Bin()
-* Synopsis: Given a real-valued <x>; calculate integer bin <b>
-* Incept: SRE, Sun Dec 13 20:24:42 2009 [Yokohama]
-*
-* Purpose: For a real-valued <x>, figure out what bin it would
-* go into in the histogram <h>; return this value in
-* <*ret_b>.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslERANGE> if bin <b> would exceed the range of
-* an integer; for instance, if <x> isn't finite.
-*
-* Xref: J5/122. Replaces earlier macro implementation;
-* we needed to range check <x> better.
-*/
-int
-esl_histogram_Score2Bin(ESL_HISTOGRAM *h, double x, int *ret_b)
-{
- int status;
-
- if (! isfin(x)) ESL_XEXCEPTION(eslERANGE, "value added to histogram is not finite");
-
- x = ceil( ((x - h->bmin) / h->w) - 1.);
-
- /* x is now the bin number as a double, which we will convert to
- * int. Because x is a double (64-bit), we know all ints are exactly
- * represented. Check for under/overflow before conversion.
- */
- if (x < (double) INT_MIN || x > (double) INT_MAX)
- ESL_XEXCEPTION(eslERANGE, "value %f isn't going to fit in histogram", x);
-
- *ret_b = (int) x;
- return eslOK;
-
-ERROR:
- *ret_b = 0;
- return status;
-}
-
-/* Function: esl_histogram_Add()
-* Synopsis: Add a sample to the histogram.
-* Incept: SRE, Sat Jul 2 19:41:45 2005 [St. Louis]
-*
-* Purpose: Adds score <x> to a histogram <h>.
-*
-* The histogram will be automatically reallocated as
-* needed if the score is smaller or larger than the
-* current allocated bounds.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on reallocation failure.
-*
-* <eslERANGE> if <x> is beyond the reasonable range for
-* the histogram to store -- either because it isn't finite,
-* or because the histogram would need to allocate a number
-* of bins that exceeds <INT_MAX>.
-*
-* Throws <eslEINVAL> for cases where something has been done
-* to the histogram that requires it to be 'finished', and
-* adding more data is prohibited; for example,
-* if tail or censoring information has already been set.
-* On either failure, initial state of <h> is preserved.
-*/
-int
-esl_histogram_Add(ESL_HISTOGRAM *h, double x)
-{
- int status;
- void *tmp;
- int b; /* what bin we're in */
- int nnew; /* # of new bins created by a reallocation */
- int bi;
-
- /* Censoring info must only be set on a finished histogram;
- * don't allow caller to add data after configuration has been declared
- */
- if (h->is_done)
- ESL_EXCEPTION(eslEINVAL, "can't add more data to this histogram");
-
- /* If we're a full histogram, check whether we need to reallocate
- * the full data vector.
- */
- if (h->is_full && h->nalloc == h->n)
- {
- ESL_RALLOC_WITH_TYPE(h->x,double*, tmp, sizeof(double) * h->nalloc * 2);
- h->nalloc *= 2;
- }
-
- /* Which bin will we want to put x into?
- */
- if ((status = esl_histogram_Score2Bin(h,x, &b)) != eslOK) return status;
-
- /* Make sure we have that bin. Realloc as needed.
- * If that reallocation succeeds, we can no longer fail;
- * so we can change the state of h.
- */
- if (b < 0) /* Reallocate below? */
- {
- nnew = -b*2; /* overallocate by 2x */
- if (nnew > INT_MAX - h->nb)
- ESL_EXCEPTION(eslERANGE, "value %f requires unreasonable histogram bin number", x);
- ESL_RALLOC_WITH_TYPE(h->obs,uint64_t*, tmp, sizeof(uint64_t) * (nnew+ h->nb));
-
- memmove(h->obs+nnew, h->obs, sizeof(uint64_t) * h->nb);
- h->nb += nnew;
- b += nnew;
- h->bmin -= nnew*h->w;
- h->imin += nnew;
- h->cmin += nnew;
- if (h->imax > -1) h->imax += nnew;
- for (bi = 0; bi < nnew; bi++) h->obs[bi] = 0;
- }
- else if (b >= h->nb) /* Reallocate above? */
- {
- nnew = (b-h->nb+1) * 2; /* 2x overalloc */
- if (nnew > INT_MAX - h->nb)
- ESL_EXCEPTION(eslERANGE, "value %f requires unreasonable histogram bin number", x);
- ESL_RALLOC_WITH_TYPE(h->obs,uint64_t*, tmp, sizeof(uint64_t) * (nnew+ h->nb));
- for (bi = h->nb; bi < h->nb+nnew; bi++) h->obs[bi] = 0;
- if (h->imin == h->nb) { /* boundary condition of no data yet*/
- h->imin+=nnew;
- h->cmin+=nnew;
- }
- h->bmax += nnew*h->w;
- h->nb += nnew;
- }
-
- /* If we're a full histogram, then we keep the raw x value,
- * reallocating as needed.
- */
- if (h->is_full) h->x[h->n] = x;
- h->is_sorted = FALSE; /* not any more! */
-
- /* Bump the bin counter, and all the data sample counters.
- */
- h->obs[b]++;
- h->n++;
- h->Nc++;
- h->No++;
-
- if (b > h->imax) h->imax = b;
- if (b < h->imin) { h->imin = b; h->cmin = b; }
- if (x > h->xmax) h->xmax = x;
- if (x < h->xmin) h->xmin = x;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-/* esl_histogram_sort()
- * Incept: SRE, Thu Aug 18 10:45:46 2005 [St. Louis]
- *
- * Purpose: Sort the raw scores in a full histogram, from smallest to
- * largest. Has no effect on a normal histogram, or on a full
- * histogram that is already sorted.
- *
- * Returns: <eslOK> on success.
- * Upon return, <h->x[h->n-1]> is the high score, <h->x[0]> is the
- * low score.
- */
-int
-esl_histogram_sort(ESL_HISTOGRAM *h)
-{
- if (h->is_sorted) return eslOK; /* already sorted, don't do anything */
- if (! h->is_full) return eslOK; /* nothing to sort */
-
- esl_vec_DSortIncreasing(h->x, h->n);
- h->is_sorted = TRUE;
- return eslOK;
-}
-
-/*****************************************************************
- * 2. Declarations about the binned data before parameter fitting
- *****************************************************************/
-
-/* Function: esl_histogram_DeclareCensoring()
- * Synopsis: Collected data were left-censored.
- * Incept: SRE, Tue Aug 23 10:00:14 2005 [St. Louis]
- *
- * Purpose: Declare that the dataset collected in <h> is known to be a
- * censored distribution, where <z> samples were unobserved because
- * they had values $\leq$ some threshold <phi> ($\phi$).
- *
- * No more data can be added to the histogram with <_Add()>
- * after censoring information has been set.
- *
- * This function is for "true" censored datasets, where
- * the histogram truly contains no observed points
- * $x \leq \phi$, and the number that were censored is known
- * to be <z>.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEINVAL> if you try to set <phi> to a value that is
- * greater than the minimum <x> stored in the histogram.
- */
-int
-esl_histogram_DeclareCensoring(ESL_HISTOGRAM *h, int z, double phi)
-{
- if (phi > h->xmin) ESL_EXCEPTION(eslEINVAL, "no uncensored x can be <= phi");
-
- h->phi = phi;
- h->cmin = h->imin;
- h->z = z;
- h->Nc = h->n + z;
- h->No = h->n;
- h->dataset_is = ESL_HISTOGRAM::TRUE_CENSORED;
- h->is_done = TRUE;
- return eslOK;
-}
-
-/* Function: esl_histogram_DeclareRounding()
- * Synopsis: Declare collected data were no more accurate than bins.
- * Incept: SRE, Tue Jan 31 13:52:10 2006 [St. Louis]
- *
- * Purpose: Declare that the data sample values in the histogram <h>
- * are rounded off. Ideally, your bins in <h> should exactly
- * match the rounding procedure. This raises a flag that
- * binned parameter fitting routines will use when they set
- * an origin, using the lower bound of the bin instead of
- * the lowest raw value in the bin.
- */
-int
-esl_histogram_DeclareRounding(ESL_HISTOGRAM *h)
-{
- h->is_rounded = TRUE;
- return eslOK;
-}
-
-
-/* Function: esl_histogram_SetTail()
-* Synopsis: Declare only tail $>$ some threshold is considered "observed".
-* Incept: SRE, Tue Aug 23 09:01:10 2005 [St. Louis]
-*
-* Purpose: Suggest a threshold <phi> to split a histogram <h>
-* into "unobserved" data (values $\leq \phi$) and "observed"
-* data (values $> \phi$).
-*
-* The suggested <phi> is revised downwards to a $\phi$ at the next
-* bin lower bound, because operations on binned data in <h>
-* need to know unambiguously whether all the data in a given bin
-* will be counted as observed or unobserved.
-*
-* The probability mass that is in the resulting right tail
-* is optionally returned in <ret_newmass>. You need to know
-* this number if you're fitting a distribution solely to the
-* tail (an exponential tail, for example).
-*
-* Any data point $x_i \leq \phi$ is then considered to be
-* in a censored (unobserved) region for purposes of parameter
-* fitting, calculating expected binned counts,
-* and binned goodness-of-fit tests.
-*
-* No more data can be added to the histogram after
-* censoring information has been set.
-*
-* This function defines a "virtual" left-censoring: the
-* histogram actually contains complete data, but appropriate
-* flags are set to demarcate the "observed" data in the right
-* tail.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslERANGE> if <phi> is an unreasonable value that
-* can't be converted to an integer bin value.
-*/
-int
-esl_histogram_SetTail(ESL_HISTOGRAM *h, double phi, double *ret_newmass)
-{
- int b;
- int status;
-
- /* Usually, put true phi at the next bin lower bound, but
- * watch for a special case where phi is already exactly equal to a
- * bin upper bound.
- */
- if ((status = esl_histogram_Score2Bin(h,phi, &(h->cmin))) != eslOK) return status;
- if (phi == esl_histogram_Bin2UBound(h,h->cmin)) h->phi = phi;
- else h->phi = esl_histogram_Bin2LBound(h, h->cmin);
-
- h->z = 0;
- for (b = h->imin; b < h->cmin; b++)
- h->z += h->obs[b];
- h->Nc = h->n; /* (redundant) */
- h->No = h->n - h->z;
- h->dataset_is = ESL_HISTOGRAM::VIRTUAL_CENSORED;
- h->is_done = TRUE;
- if (ret_newmass != NULL) *ret_newmass = (double) h->No / (double) h->Nc;
- return eslOK;
-}
-
-/* Function: esl_histogram_SetTailByMass()
- * Synopsis: Declare only right tail mass is considered "observed".
- * Incept: SRE, Tue Aug 23 08:10:39 2005 [St. Louis]
- *
- * Purpose: Given a histogram <h> (with or without raw data samples),
- * find a cutoff score that at least fraction <pmass> of the samples
- * exceed. This threshold is stored internally in the histogram
- * as <h->phi>. The number of "virtually censored" samples (to the
- * left, with scores $\leq \phi$) is stored internally in <h->z>.
- *
- * The identified cutoff score must be a lower bound for some bin
- * (bins can't be partially censored). The censored mass
- * will thus usually be a bit greater than <pmass>, as the
- * routine will find the highest satisfactory <h->phi>. The
- * narrower the bin widths, the more accurately the routine
- * will be able to satisfy the requested <frac>. The actual
- * probability mass in the right tail is optionally returned
- * in <ret_newmass>. You need to know this number if you're
- * fitting a distribution solely to the tail (an exponential tail,
- * for example). It is safe for <ret_newmass> to point at
- * <pmass>, in which case the suggested <pmass> will be overwritten
- * with the actual mass upon return.
- *
- * This function defines that the binned data will be
- * fitted either as a tail, or as a (virtually) left-censored dataset.
- *
- * Returns: <eslOK> on success.
- */
-int
-esl_histogram_SetTailByMass(ESL_HISTOGRAM *h, double pmass, double *ret_newmass)
-{
- int b;
- uint64_t sum = 0;
-
- for (b = h->imax; b >= h->imin; b--)
- {
- sum += h->obs[b];
- if (sum >= (pmass * (double)h->n)) break;
- }
-
- h->phi = esl_histogram_Bin2LBound(h,b);
- h->z = h->n - sum;
- h->cmin = b;
- h->Nc = h->n; /* (redundant) */
- h->No = h->n - h->z;
- h->dataset_is = ESL_HISTOGRAM::VIRTUAL_CENSORED;
- h->is_done = TRUE;
- if (ret_newmass != NULL) *ret_newmass = (double) h->No / (double) h->Nc;
- return eslOK;
-}
-
-
-
-/*****************************************************************
- * 3. Routines for accessing data samples in a full histogram.
- *****************************************************************/
-
-/* Function: esl_histogram_GetRank()
- * Synopsis: Retrieve n'th high score.
- * Incept: SRE, Thu Jul 28 08:39:52 2005 [St. Louis]
- *
- * Purpose: Retrieve the <rank>'th highest score from a
- * full histogram <h>. <rank> is <1..n>, for
- * <n> total samples in the histogram; return it through
- * <ret_x>.
- *
- * If the raw scores aren't sorted, they are sorted
- * first (an $N \log N$ operation).
- *
- * This can be called at any time, even during data
- * collection, to see the current <rank>'th highest score.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEINVAL> if the histogram is display-only,
- * or if <rank> isn't in the range 1..n.
- */
-int
-esl_histogram_GetRank(ESL_HISTOGRAM *h, int rank, double *ret_x)
-{
- if (! h->is_full)
- ESL_EXCEPTION(eslEINVAL,
- "esl_histogram_GetRank() needs a full histogram");
- if (rank > h->n)
- ESL_EXCEPTION(eslEINVAL,
- "no such rank: not that many scores in the histogram");
- if (rank < 1)
- ESL_EXCEPTION(eslEINVAL, "histogram rank must be a value from 1..n");
-
- esl_histogram_sort(h); /* make sure */
- *ret_x = h->x[h->n - rank];
- return eslOK;
-}
-
-/* Function: esl_histogram_GetData()
- * Synopsis: Retrieve vector of all raw scores.
- * Incept: SRE, Fri Jan 27 07:57:21 2006 [St. Louis]
- *
- * Purpose: Retrieve the raw data values from the histogram <h>.
- * Return them in the vector <ret_x>, and the number
- * of values in <ret_n>. The values are indexed <[0..n-1]>,
- * from smallest to largest (<x[n-1]> is the high score).
- *
- * <ret_x> is a pointer to internal memory in the histogram <h>.
- * The histogram <h> is still responsible for that storage;
- * its memory will be free'd when you call
- * <esl_histogram_Destroy()>.
- *
- * You can only call this after you have finished collecting
- * all the data. Subsequent calls to <esl_histogram_Add()>
- * will fail.
- *
- * Internal note:
- * The prohibition against adding more data (by raising
- * the h->is_done flag) is because we're passing a pointer
- * to internal data storage back to the caller. Subsequent
- * calls to Add() will modify that memory -- in the worst case,
- * if Add() has to reallocate that storage, completely invalidating
- * the pointer that the caller has a copy of. We want to make
- * sure that the <ret_x> pointer stays valid.
- *
- * Args: h - histogram to retrieve data values from
- * ret_x - RETURN: pointer to the data samples, [0..n-1]
- * ret_n - RETURN: number of data samples
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEINVAL> if the histogram <h> is not a full histogram.
- */
-int
-esl_histogram_GetData(ESL_HISTOGRAM *h, double **ret_x, int *ret_n)
-{
- if (! h->is_full) ESL_EXCEPTION(eslEINVAL, "not a full histogram");
- esl_histogram_sort(h);
-
- *ret_x = h->x;
- *ret_n = h->n;
-
- h->is_done = TRUE;
- return eslOK;
-}
-
-
-/* Function: esl_histogram_GetTail()
- * Synopsis: Retrieve all raw scores above some threshold.
- * Incept: SRE, Fri Jan 27 07:56:38 2006 [St. Louis]
- *
- * Purpose: Given a full histogram <h>, retrieve all data values
- * above the threshold <phi> in the right (high scoring)
- * tail, as a ptr <ret_x> to an array of <ret_n> values
- * indexed <[0..n-1]> from lowest to highest score.
- * Optionally, it also returns the number of values in
- * rest of the histogram in <ret_z>;
- * this number is useful if you are going to fit
- * the tail as a left-censored distribution.
- *
- * The test is strictly greater than <phi>, not greater
- * than or equal to.
- *
- * <ret_x> is a pointer to internal memory in the histogram <h>.
- * The histogram <h> is still responsible for that storage;
- * its memory will be free'd when you call
- * <esl_histogram_Destroy()>.
- *
- * You can only call this after you have finished collecting
- * all the data. Subsequent calls to <esl_histogram_Add()>
- * will fail.
- *
- * Args: h - histogram to retrieve the tail from
- * phi - threshold: tail is all scores > phi
- * ret_x - optRETURN: ptr to vector of data values [0..n-1]
- * ret_n - optRETURN: number of data values in tail
- * ret_z - optRETURN: number of data values not in tail.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEINVAL> if the histogram is not a full histogram.
- */
-int
-esl_histogram_GetTail(ESL_HISTOGRAM *h, double phi,
- double **ret_x, int *ret_n, int *ret_z)
-{
- int hi, lo, mid;
-
- if (! h->is_full) ESL_EXCEPTION(eslEINVAL, "not a full histogram");
- esl_histogram_sort(h);
-
- if (h->n == 0) mid = h->n; /* we'll return NULL, 0, n */
- else if (h->x[0] > phi) mid = 0; /* we'll return x, n, 0 */
- else if (h->x[h->n-1] <= phi) mid = h->n; /* we'll return NULL, 0, n */
- else /* binary search, faster than a brute force scan */
- {
- lo = 0;
- hi = h->n-1; /* know hi>0, because above took care of n=0 and n=1 cases */
- while (1) {
- mid = (lo + hi + 1) / 2; /* +1 makes mid round up, mid=0 impossible */
- if (h->x[mid] <= phi) lo = mid; /* we're too far left */
- else if (h->x[mid-1] > phi) hi = mid; /* we're too far right */
- else break; /* ta-da! */
- }
- }
-
- if (ret_x != NULL) *ret_x = h->x + mid;
- if (ret_n != NULL) *ret_n = h->n - mid;
- if (ret_z != NULL) *ret_z = mid;
- h->is_done = TRUE;
- return eslOK;
-}
-
-
-/* Function: esl_histogram_GetTailByMass()
- * Synopsis: Retrieve all raw scores in right tail mass.
- * Incept: SRE, Sun Jan 29 17:56:37 2006 [St. Louis]
- *
- * Purpose: Given a full histogram <h>, retrieve the data values in
- * the right (high scoring) tail, as a pointer <ret_x>
- * to an array of <ret_n> values indexed <[0..n-1]> from
- * lowest to highest score. The tail is defined by a
- * given mass fraction threshold <pmass>; the mass in the returned
- * tail is $\leq$ this threshold. <pmass> is a probability,
- * so it must be $\geq 0$ and $\leq 1$.
- *
- * Optionally, the number of values in the rest of the
- * histogram can be returned in <ret_z>. This is useful
- * if you are going to fit the tail as a left-censored
- * distribution.
- *
- * <ret_x> is a pointer to internal memory in <h>.
- * The histogram <h> remains responsible for its storage,
- * which will be free'd when you call <esl_histogram_Destroy()>.
- * As a consequence, you can only call
- * <esl_histogram_GetTailByMass()> after you have finished
- * collecting data. Subsequent calls to <esl_histogram_Add()>
- * will fail.
- *
- * Args: h - histogram to retrieve the tail from
- * pmass - fractional mass threshold; tail contains <= pmass
- * ret_x - optRETURN: ptr to vector of data values [0..n-1]
- * ret_n - optRETURN: number of data values in tail x
- * ret_z - optRETURN: number of data values not in tail
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEINVAL> if the histogram is not a full histogram,
- * or <pmass> is not a probability.
- */
-int
-esl_histogram_GetTailByMass(ESL_HISTOGRAM *h, double pmass,
- double **ret_x, int *ret_n, int *ret_z)
-{
- uint64_t n;
-
- if (! h->is_full)
- ESL_EXCEPTION(eslEINVAL, "not a full histogram");
- if (pmass < 0. || pmass > 1.)
- ESL_EXCEPTION(eslEINVAL, "pmass not a probability");
-
- esl_histogram_sort(h);
-
- n = (uint64_t) ((double) h->n * pmass); /* rounds down, guaranteeing <= pmass */
-
- if (ret_x != NULL) *ret_x = h->x + (h->n - n);
- if (ret_n != NULL) *ret_n = n;
- if (ret_z != NULL) *ret_z = h->n - n;
- h->is_done = TRUE;
- return eslOK;
-}
-
-
-
-
-
-/*****************************************************************
- * 4. Setting expected counts
- *****************************************************************/
-
-/* Function: esl_histogram_SetExpect()
- * Synopsis: Set expected counts for complete distribution.
- * Incept: SRE, Wed Aug 17 17:36:58 2005 [St. Louis]
- *
- * Purpose: Given a histogram <h> containing some number of empirically
- * observed binned counts, and a pointer to a function <(*cdf)()>
- * that describes the expected cumulative distribution function
- * (CDF) for the complete data, conditional on some parameters
- * <params>; calculate the expected counts in each bin of the
- * histogram, and hold that information internally in the structure.
- *
- * The caller provides a function <(*cdf)()> that calculates
- * the CDF via a generic interface, taking only two
- * arguments: a quantile <x> and a void pointer to whatever
- * parameters it needs, which it will cast and interpret.
- * The <params> void pointer to the given parameters is
- * just passed along to the generic <(*cdf)()> function. The
- * caller will probably implement this <(*cdf)()> function as
- * a wrapper around its real CDF function that takes
- * explicit (non-void-pointer) arguments.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEMEM> on allocation failure; state of <h> is preserved.
- */
-int
-esl_histogram_SetExpect(ESL_HISTOGRAM *h,
- double (*cdf)(double x, void *params), void *params)
-{
- int status;
- int i;
- double ai,bi; /* ai < x <= bi : lower,upper bounds in bin */
-
- if (h->expect == NULL)
- ESL_ALLOC_WITH_TYPE(h->expect, double*, sizeof(double) * h->nb);
-
- for (i = 0; i < h->nb; i++)
- {
- ai = esl_histogram_Bin2LBound(h, i);
- bi = esl_histogram_Bin2UBound(h, i);
- h->expect[i] = h->Nc * ( (*cdf)(bi, params) - (*cdf)(ai, params) );
-
- if (h->emin == -1 && h->expect[i] > 0.) h->emin = i;
- }
-
- h->is_done = TRUE;
- return eslOK;
-
- ERROR:
- return status;
-}
-
-/* Function: esl_histogram_SetExpectedTail()
-* Synopsis: Set expected counts for right tail.
-* Incept: SRE, Mon Jan 30 08:57:57 2006 [St. Louis]
-*
-* Purpose: Given a histogram <h>, and a pointer to a generic function
-* <(*cdf)()> that describes the expected cumulative
-* distribution function for the right (high-scoring) tail
-* starting at <base_val> (all expected <x> $>$ <base_val>) and
-* containing a fraction <pmass> of the complete data
-* distribution (<pmass> $\geq 0$ and $\leq 1$);
-* set the expected binned counts for all complete bins
-* $\geq$ <base_val>.
-*
-* If <base_val> falls within a bin, that bin is considered
-* to be incomplete, and the next higher bin is the starting
-* point.
-*
-* Args: h - finished histogram
-* base_val - threshold for the tail: all expected x > base_val
-* pmass - fractional mass in the tail: 0 <= pmass <= 1
-* cdf - generic-interface CDF function describing the tail
-* params - void pointer to parameters for (*cdf)()
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on memory allocation failure.
-* <eslERANGE> if <base_val> isn't a reasonable value within
-* the histogram (it converts to a bin value outside
-* integer range).
-*/
-int
-esl_histogram_SetExpectedTail(ESL_HISTOGRAM *h, double base_val, double pmass,
- double (*cdf)(double x, void *params),
- void *params)
-{
- int status;
- int b;
- double ai, bi;
-
- if (h->expect == NULL) ESL_ALLOC_WITH_TYPE(h->expect,double*, sizeof(double) * h->nb);
-
- if ((status = esl_histogram_Score2Bin(h, base_val, &(h->emin))) != eslOK) return status;
- h->emin += 1;
- esl_vec_DSet(h->expect, h->emin, 0.);
-
- for (b = h->emin; b < h->nb; b++)
- {
- ai = esl_histogram_Bin2LBound(h, b);
- bi = esl_histogram_Bin2UBound(h, b);
- h->expect[b] = pmass * (double) h->Nc *
- ( (*cdf)(bi, params) - (*cdf)(ai, params) );
- }
-
- h->tailbase = base_val;
- h->tailmass = pmass;
- h->is_tailfit = TRUE;
- h->is_done = TRUE;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-
-/*****************************************************************
- * 5. Output and display of binned data.
- *****************************************************************/
- // ! we don't need io functions !
-
-
-/* Function: esl_histogram_Goodness()
- * Synopsis: Evaluate fit between observed, expected.
- * Incept: SRE, Wed Aug 17 12:46:05 2005 [St. Louis]
- *
- * Purpose: Given a histogram <h> with observed and expected counts,
- * where, for the expected counts, <nfitted> ($\geq 0$)
- * parameters were fitted (and thus should be subtracted
- * from the degrees of freedom);
- * Perform a G-test and/or a $\chi^2$ test for goodness of
- * fit between observed and expected, and optionally return
- * the number of bins the data were sorted into
- * (<ret_bins>), the G statistic and its probability (<ret_G> and
- * <ret_Gp>), and the $\chi^2$ statistic and its probability
- * (<ret_X2> and <ret_X2p>).
- *
- * If a goodness-of-fit probability is less than some threshold
- * (usually taken to be 0.01 or 0.05), that is considered to
- * be evidence that the observed data are unlikely to be consistent
- * with the tested distribution.
- *
- * The two tests should give similar
- * probabilities. However, both tests are sensitive to
- * arbitrary choices in how the data are binned, and
- * neither seems to be on an entirely sound theoretical footing.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEINVAL> if expected counts have not been set in
- * the histogram; <eslERANGE> or <eslENOHALT> on different internal
- * errors that can arise in calculating the probabilities;
- * <eslEMEM> on internal allocation failure.
- */
-int
-esl_histogram_Goodness(ESL_HISTOGRAM *h,
- int nfitted, int *ret_nbins,
- double *ret_G, double *ret_Gp,
- double *ret_X2, double *ret_X2p)
-{
- uint64_t *obs = NULL; /* observed in bin i, [0..nb-1] */
- double *exp = NULL; /* expected in bin i, [0..nb-1] */
- double *topx = NULL; /* all values in bin i <= topx[i] */
- int nb; /* # of re-bins */
- uint64_t minc; /* minimum target # of counts/bin */
- int i,b;
- double G, Gp;
- double X2, X2p;
- double tmp;
- int status;
- int bbase;
- uint64_t hmax;
- uint64_t nobs;
- double nexp;
-
- if (h->expect == NULL) ESL_EXCEPTION(eslEINVAL, "no expected counts in that histogram");
-
- /* Determine the smallest histogram bin included in
- * the goodness of fit evaluation.
- */
- bbase = h->cmin;
- if (h->is_tailfit && h->emin > bbase) bbase = h->emin;
-
- /* How many observed total counts are in the evaluated range,
- * and what is the maximum in any given histogram bin?
- */
- nobs = 0;
- hmax = 0;
- for (i = bbase; i <= h->imax; i++)
- {
- nobs += h->obs[i];
- if (h->obs[i] > hmax) hmax = h->obs[i];
- }
-
- /* Figure out how many eval bins we'd like to have, then allocate
- * for re-binning.
- * Number of bins for goodness-of-fit tests like G and X^2
- * is crucial but arbitrary, unfortunately. Some literature suggests
- * using 2*n^{0.4}, which gives:
- * n nbins #/bin
- * ----- ------ ------
- * 1000 31 32
- * 10000 79 127
- * 100000 200 500
- * 1000000 502 1992
- *
- * The most important thing seems to be to get the # of counts
- * in each bin to be roughly equal.
- */
- nb = 2* (int) pow((double) nobs, 0.4); /* "desired" nb. */
- minc = 1 + nobs / (2*nb); /* arbitrarily set min = 1/2 of the target # */
- ESL_ALLOC_WITH_TYPE(obs, uint64_t*, sizeof(uint64_t) * (nb*2+1)); /* final nb must be <= 2*nb+1 */
- ESL_ALLOC_WITH_TYPE(exp, double*, sizeof(double) * (nb*2+1));
- ESL_ALLOC_WITH_TYPE(topx, double*, sizeof(double) * (nb*2+1));
-
- /* Determine the observed counts in each bin: that is, partition
- * the <sum> in the evaluated region.
- * Sweep left to right on the histogram bins,
- * collecting sum of counts, dropping the sum into the next re-bin
- * whenever we have more than <minc> counts.
- */
- nobs = 0;
- nexp = 0.;
- for (i = 0, b = bbase; b <= h->imax; b++)
- {
- nobs += h->obs[b];
- nexp += h->expect[b];
-
- /* if we have enough counts, drop into bin i: */
- if (nobs >= minc && nexp >= minc) {
- ESL_DASSERT1( (i < (nb*2+1)) );
- obs[i] = nobs;
- exp[i] = nexp;
- topx[i] = esl_histogram_Bin2UBound(h,b);
- nobs = 0;
- nexp = 0.;
- i++;
- }
- }
- obs[i-1] += nobs; /* add the right tail to final bin */
- exp[i-1] += nexp;
- topx[i-1] = esl_histogram_Bin2UBound(h, h->imax);
- nb = i; /* nb is now actual # of bins, not target */
-
- /* Calculate the X^2 statistic: \sum (obs_i - exp_i)^2 / exp_i */
- X2 = 0.;
- for (i = 0; i < nb; i++)
- {
- tmp = (double) obs[i] - exp[i];
- X2 += tmp*tmp / exp[i];
- }
- /* X^2 is distributed approximately chi^2. */
- if (nb-nfitted >= 0 && X2 != eslINFINITY)
- {
- status = esl_stats_ChiSquaredTest(nb-nfitted, X2, &X2p);
- if (status != eslOK) return status;
- }
- else X2p = 0.;
-
- /* The G test assumes that #exp=#obs (the X^2 test didn't).
- * If that's not true, renormalize to make it so.
- */
- nobs = 0;
- nexp = 0.;
- for (i = 0; i < nb; i++)
- {
- nobs += obs[i];
- nexp += exp[i];
- }
- for (i = 0; i < nb; i++)
- exp[i] = exp[i] * (double) nobs / nexp;
-
- /* Calculate the G statistic: 2 * LLR */
- G = 0.;
- for (i = 0; i < nb; i++)
- G += (double) obs[i] * log ((double) obs[i] / exp[i]);
- G *= 2;
-
- /* G is distributed approximately as \chi^2.
- * -1 is because total #obs=#exp (which is must be)
- */
- ESL_DASSERT1( (G >= 0.));
- if (nb-nfitted-1 >= 0 && G != eslINFINITY)
- {
- status = esl_stats_ChiSquaredTest(nb-nfitted-1, G, &Gp);
- if (status != eslOK) return status;
- }
- else Gp = 0.;
-
- if (ret_nbins != NULL) *ret_nbins = nb;
- if (ret_G != NULL) *ret_G = G;
- if (ret_Gp != NULL) *ret_Gp = Gp;
- if (ret_X2 != NULL) *ret_X2 = X2;
- if (ret_X2p != NULL) *ret_X2p = X2p;
- free(obs);
- free(exp);
- free(topx);
- return eslOK;
-
- ERROR:
- if (ret_nbins != NULL) *ret_nbins = 0;
- if (ret_G != NULL) *ret_G = 0.;
- if (ret_Gp != NULL) *ret_Gp = 0.;
- if (ret_X2 != NULL) *ret_X2 = 0.;
- if (ret_X2p != NULL) *ret_X2p = 0.;
- if (obs != NULL) free(obs);
- if (exp != NULL) free(exp);
- if (topx != NULL) free(topx);
- return status;
-}
-
-/*****************************************************************
- * 6. Test driver.
- *****************************************************************/
-#ifdef eslHISTOGRAM_TESTDRIVE
-/* compile:
- * gcc -g -Wall -I. -L. -o test -DeslHISTOGRAM_TESTDRIVE esl_histogram.c -leasel -lm
- * run:
- * ./test -t1; ./test -t2; ./test -t3; ./test -t4; ./test -t5
- *
- * -t1 - complete data, fit to complete Gumbel\n\
- * -t2 - complete data, high scores fit as censored Gumbel\n\
- * -t3 - complete data, high scores fit to exponential tail\n\
- * -t4 - censored data, fit as censored Gumbel\n\
- * -t5 - complete data, binned, high scores fit to exponential tail\n\
- *
- * Some suggestions for manual testing:
- * ./test -t1 -j1 -v --surv test.xy; xmgrace test.xy
- * examine survivor plot fit, for -t1
- * do -t2 thru -t5 too
- *
- * ./test -t1 --j1 -v -qq test.xy; xmgrace test.xy
- * examine QQ plot fit, for -t1
- * do -t2 thru -t5 too
- *
- * ./test -t1 -v > foo
- * grep "^Estimated" foo | awk '{print $9}' | sort -g > test.xy
- * Look for straight line fit to G-test p values.
- * sub $9->$13 for chi-squared
- * sub Estimated -> Parametric for the parametric fits
- */
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "easel.h"
-#include "esl_stats.h"
-#include "esl_gumbel.h"
-#include "esl_exponential.h"
-#include "esl_random.h"
-#include "esl_getopts.h"
-
-
-
-static ESL_OPTIONS options[] = {
- /* name type default env_var range toggles reqs incompat */
- { "-j", eslARG_INT, "100", NULL, "n>0", NULL, NULL, NULL, "number of trials", 0 },
- { "-m", eslARG_INT, "0", NULL, "n>=0", NULL, NULL, NULL, "number of test samples", 0 },
- { "-n", eslARG_INT, "10000", NULL, "n>0", NULL, NULL, NULL, "number of training samples", 0 },
- { "-t", eslARG_INT, "1", NULL, "1<=n<=5", NULL, NULL, NULL, "test type choice, 1-5", 0 },
- { "-v", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "be verbose?", 0 },
- { "--ascii", eslARG_STRING, NULL, NULL, NULL, NULL, NULL, NULL, "output ASCII histogram to <f>", 0 },
- { "--cmass", eslARG_REAL, "0.7", NULL, "0<=x<=1", NULL, NULL, NULL, "set virtual censoring mass to <x>", 0 },
- { "--lambda", eslARG_REAL, "0.8", NULL, "x>0", NULL, NULL, NULL, "set Gumbel lambda param to <x>", 0 },
- { "--mu", eslARG_REAL, "10.0", NULL, NULL, NULL, NULL, NULL, "set Gumbel mu param to <x>", 0 },
- { "--phi", eslARG_REAL, "10.0", NULL, NULL, NULL, NULL, NULL, "set censoring threshold to <x>", 0 },
- { "--plot", eslARG_STRING, NULL, NULL, NULL, NULL, NULL, NULL, "output histogram to xmgrace file <f>", 0 },
- { "--qq", eslARG_STRING, NULL, NULL, NULL, NULL, NULL, NULL, "output Q-Q goodness of fit to xmgrace file <f>", 0 },
- { "--surv", eslARG_STRING, NULL, NULL, NULL, NULL, NULL, NULL, "output survival plot to xmgrace file <f>", 0 },
- { "--tail", eslARG_REAL, "0.1", NULL, "0<=x<=1", NULL, NULL, NULL, "set tail mass for fitting to <x>", 0 },
- { 0,0,0,0,0,0,0,0,0,0 },
-};
-
-static int
-binmacro_test(void)
-{
- ESL_HISTOGRAM *h = esl_histogram_Create(-100, 100, 1.0);
- double trialx[3] = { -42.42, 0, 42.42 };
- double x, ai, bi;
- int i,b;
-
- /* test bin<->score conversion macros.
- */
- for (i = 0; i < 3; i++)
- {
- x = trialx[i];
- b = esl_histogram_Score2Bin(h, x);
- ai = esl_histogram_Bin2LBound(h, b);
- bi = esl_histogram_Bin2UBound(h, b);
- if (x <= ai || x > bi) {
- fprintf(stderr,
- "failed: (ai=%.1f) <= (x=%.2f) < (bi=%.1f) in bin %d, bin macro test\n",
- ai, x, bi, b);
- esl_histogram_Destroy(h);
- return 0;
- }
- }
- esl_histogram_Destroy(h);
- return 1;
-}
-int
-main(int argc, char **argv)
-{
- ESL_GETOPTS *go;
- ESL_RANDOMNESS *r;
- ESL_HISTOGRAM *h;
- ESL_HISTOGRAM *h1;
- double p[2]; /* parametric mu, lambda */
- double ep[2]; /* estimated mu, lambda */
- double avg_ep[2]; /* average estimated mu, lambda over many trials */
- int ntrials, trial;
- int ntrain, ntest;
- int test_type;
- enum { COLLECT_COMPLETE, COLLECT_CENSORED } cstrategy;
- enum { FIT_BINNED, FIT_SAMPLES } bstrategy;
- enum { FIT_COMPLETE, FIT_CENSORED, FIT_TAIL} fstrategy;
- double phi; /* censoring threshold */
- int z;
- double cmass;
- double tailmass, save_tailmass;
- int nfitted;
- int nbins;
- double G, Gp, X2, X2p, minGp, minX2p;
- int verbose;
- FILE *outfp;
- char *ascfile, *plotfile, *survfile, *qqfile;
- int i;
- double x;
- double *xv;
- int n;
-
- go = esl_getopts_Create(options);
- esl_opt_ProcessCmdline(go, argc, argv);
- test_type = esl_opt_GetInteger(go, "-t");
- ntrials = esl_opt_GetInteger(go, "-j");
- ntrain = esl_opt_GetInteger(go, "-n");
- ntest = esl_opt_GetInteger(go, "-m");
- verbose = esl_opt_GetBoolean(go, "-v");
- cmass = esl_opt_GetReal (go, "--cmass");
- p[1] = esl_opt_GetReal (go, "--lambda");
- p[0] = esl_opt_GetReal (go, "--mu");
- phi = esl_opt_GetReal (go, "--phi");
- save_tailmass = esl_opt_GetReal (go, "--tail");
- ascfile = esl_opt_GetString (go, "--ascii");
- plotfile = esl_opt_GetString (go, "--plot");
- qqfile = esl_opt_GetString (go, "--qq");
- survfile = esl_opt_GetString (go, "--surv");
- esl_getopts_Destroy(go);
-
- r = esl_randomness_Create(42);
- avg_ep[0] = 0.;
- avg_ep[1] = 0.;
- minGp = 1.;
- minX2p = 1.;
- tailmass = save_tailmass;
-
- if (test_type == 1)
- {
- cstrategy = COLLECT_COMPLETE;
- bstrategy = FIT_SAMPLES;
- fstrategy = FIT_COMPLETE;
- }
- else if (test_type == 2)
- {
- cstrategy = COLLECT_COMPLETE;
- bstrategy = FIT_SAMPLES;
- fstrategy = FIT_CENSORED;
- }
- else if (test_type == 3)
- {
- cstrategy = COLLECT_COMPLETE;
- bstrategy = FIT_SAMPLES;
- fstrategy = FIT_TAIL;
- }
- else if (test_type == 4)
- {
- cstrategy = COLLECT_CENSORED;
- bstrategy = FIT_SAMPLES;
- fstrategy = FIT_CENSORED;
- }
- else if (test_type == 5)
- {
- cstrategy = COLLECT_COMPLETE;
- bstrategy = FIT_BINNED;
- fstrategy = FIT_TAIL;
- }
-
-
- for (trial = 0; trial < ntrials; trial++)
- {
- /* Collection of the training data in <h>.
- * Data set can either be complete, true censored, or virtual censored.
- */
- h = esl_histogram_CreateFull(-100, 100, 0.1);
- z = 0;
- for (i = 0; i < ntrain; i++) {
- x = esl_gumbel_Sample(r, p[0], p[1]);
- if (cstrategy != COLLECT_CENSORED || x > phi)
- esl_histogram_Add(h, x);
- else
- z++;
- }
- if (cstrategy == COLLECT_CENSORED)
- esl_histogram_DeclareCensoring(h, z, phi);
-
- /* Parameter fitting.
- * We test for four of twelve possible combinations of
- * collection strategy, binned vs. raw data, and complete,
- * censored, vs. tail fitting.
- * 1. complete Gumbel data, raw, fit to a Gumbel.
- * 2. complete Gumbel data, raw, tail fit as a censored Gumbel
- * 3. complete Gumbel data, raw, tail fit to an exponential tail
- * 4. censored Gumbel data, raw, censored fit to a Gumbel
- * 5 complete Gumbel data, binned, fit to an exponential tail.
- */
- if (cstrategy == COLLECT_COMPLETE &&
- bstrategy == FIT_SAMPLES &&
- fstrategy == FIT_COMPLETE)
- {
- esl_histogram_GetData(h, &xv, &n);
- esl_gumbel_FitComplete(xv, n, &(ep[0]), &ep[1]);
- }
- else if (cstrategy == COLLECT_COMPLETE &&
- bstrategy == FIT_SAMPLES &&
- fstrategy == FIT_CENSORED)
- {
- esl_histogram_GetTailByMass(h, cmass, &xv, &n, &z);
- esl_gumbel_FitCensored(xv, n, z, xv[0], &(ep[0]), &ep[1]);
- }
- else if (cstrategy == COLLECT_COMPLETE &&
- bstrategy == FIT_SAMPLES &&
- fstrategy == FIT_TAIL)
- {
- esl_histogram_GetTailByMass(h, tailmass, &xv, &n, &z);
- esl_exp_FitComplete(xv, n, &(ep[0]), &ep[1]);
- }
- else if (cstrategy == COLLECT_CENSORED &&
- bstrategy == FIT_SAMPLES &&
- fstrategy == FIT_CENSORED)
- {
- esl_histogram_GetData(h, &xv, &n);
- esl_gumbel_FitCensored(xv, n, h->z, h->phi, &(ep[0]), &ep[1]);
- }
- else if (cstrategy == COLLECT_COMPLETE &&
- bstrategy == FIT_BINNED &&
- fstrategy == FIT_TAIL)
- {
- tailmass = save_tailmass; /* reset to original for each trial. */
- esl_histogram_SetTailByMass(h, tailmass, &tailmass);
- esl_exp_FitCompleteBinned(h, &(ep[0]), &ep[1]);
- }
- else
- ESL_EXCEPTION(eslEINVAL, "not a scenario we currently test");
-
- /* Keep track of average estimated mu, lambda
- * for automated testing purposes.
- */
- avg_ep[0] += ep[0] / (double) ntrials;
- avg_ep[1] += ep[1] / (double) ntrials;
-
- /* Test data can either be the same as the training data,
- * or a new test set.
- */
- if (ntest > 0)
- {
- h1 = esl_histogram_CreateFull(-100.05, 100.05, 0.2);
- z = 0;
- for (i = 0; i < ntest; i++) {
- x = esl_gumbel_Sample(r, p[0], p[1]);
- if (cstrategy != COLLECT_CENSORED || x > phi)
- esl_histogram_Add(h1, x);
- else
- z++;
- }
- if (cstrategy == COLLECT_CENSORED)
- esl_histogram_DeclareCensoring(h, z, phi);
- }
- else h1 = h;
-
-
- /* Set expected binned counts in the test data, h1:
- */
- if (fstrategy == FIT_TAIL)
- esl_histogram_SetExpectedTail(h1, ep[0], tailmass,
- &esl_exp_generic_cdf, ep);
- else
- esl_histogram_SetExpect(h1, &esl_gumbel_generic_cdf, ep);
-
-
- /* Evaluate goodness-of-fit
- */
- nfitted = (ntest == 0)? 2 : 0;
- esl_histogram_Goodness(h1, nfitted, &nbins, &G, &Gp, &X2, &X2p);
-
- /* Track minimum goodness of fit probs, for automated testing
- */
- if (Gp < minGp) minGp = Gp;
- if (X2p < minX2p) minX2p = X2p;
-
- if (verbose)
- printf("Estimated: %6.2f %6.4f nb %4d G %g\tGp %g\tX2 %g\tX2p %g\n",
- ep[0], ep[1], nbins, G, Gp, X2, X2p);
-
- /* Output files, if requested.
- * (Best if ntrials=1. Will overwrite previous trials.)
- */
- if (ascfile != NULL)
- {
- outfp = fopen(ascfile, "w");
- esl_histogram_Print(outfp, h1);
- fclose(outfp);
- }
- if (plotfile != NULL)
- {
- outfp = fopen(plotfile, "w");
- esl_histogram_Plot(outfp, h1);
- fclose(outfp);
- }
- if (survfile != NULL)
- {
- outfp = fopen(survfile, "w");
- esl_histogram_PlotSurvival(outfp, h1);
- fclose(outfp);
- }
- if (qqfile != NULL)
- {
- outfp = fopen(qqfile, "w");
- if (fstrategy == FIT_TAIL)
- esl_histogram_PlotQQ(outfp, h1, &esl_exp_generic_invcdf, ep);
- else
- esl_histogram_PlotQQ(outfp, h1, &esl_gumbel_generic_invcdf, ep);
- fclose(outfp);
- }
-
- esl_histogram_Destroy(h);
- if (ntest > 0) esl_histogram_Destroy(h1);
- }
-
- /* Trap badness in an automated test.
- */
- if (fstrategy != FIT_TAIL && fabs(avg_ep[0] - p[0]) > 0.1)
- ESL_EXCEPTION(eslFAIL, "Something awry with Gumbel mu fit");
- if (fabs(avg_ep[1] - p[1]) > 0.1)
- ESL_EXCEPTION(eslFAIL, "Something awry with lambda fit");
- if (minGp < 1. / (1000. * ntrials))
- ESL_EXCEPTION(eslFAIL, "Something awry with G-test");
- if (minX2p < 1. / (1000. * ntrials))
- ESL_EXCEPTION(eslFAIL, "Something awry with chi squared test");
-
- /* Smaller final tests
- */
- if (! binmacro_test()) exit(1);
-
- esl_randomness_Destroy(r);
- return 0;
-}
-#endif /*eslHISTOGRAM_TESTDRIVE*/
-
-
-
-/*****************************************************************
- * 7. Examples
- *****************************************************************/
-
-/*****************************************************************
- * Five example main()'s for five use cases:
- * - complete data, fit to complete Gumbel
- * - complete data, high scores fit as censored Gumbel
- * - complete data, high scores fit to exponential tail
- * - censored data, fit as censored Gumbel
- * - complete data, binned, high scores fit to exponential tail
- *
- * (These same five cases are tested by ./test -t1 through ./test -t5.)
- *****************************************************************/
-/* Case 1. Complete data fit to complete Gumbel.
- * compile: gcc -I. -L. -o example -DeslHISTOGRAM_EXAMPLE1 esl_histogram.c -leasel -lm
- * run: ./example
- */
-#ifdef eslHISTOGRAM_EXAMPLE1
-/*::cexcerpt::histogram_example1::begin::*/
-#include "easel.h"
-#include "esl_random.h"
-#include "esl_histogram.h"
-#include "esl_gumbel.h"
-
-int
-main(int argc, char **argv)
-{
- ESL_RANDOMNESS *r = esl_randomness_Create(0);
- ESL_HISTOGRAM *h = esl_histogram_CreateFull(-100, 100, 0.2);
- int nsamples = 10000;
- double mu = 10.0;
- double lambda = 0.8;
- double params[2];
- int i;
- double x;
- double *xv;
- int n;
- double G, Gp, X2, X2p;
-
- for (i = 0; i < nsamples; i++) {
- x = esl_gumbel_Sample(r, mu, lambda);
- esl_histogram_Add(h, x);
- }
-
- esl_histogram_GetData(h, &xv, &n);
- esl_gumbel_FitComplete(xv, n, &mu, &lambda);
-
- params[0] = mu;
- params[1] = lambda;
- esl_histogram_SetExpect(h, &esl_gumbel_generic_cdf, ¶ms);
-
- esl_histogram_Print(stdout, h);
- esl_histogram_Goodness(h, 0, NULL, &G, &Gp, &X2, &X2p);
- printf("G = %f p = %f\n", G, Gp);
- printf("X^2 = %f p = %f\n", X2, X2p);
-
- esl_histogram_Destroy(h);
- esl_randomness_Destroy(r);
- return 0;
-}
-/*::cexcerpt::histogram_example1::end::*/
-#endif /*eslHISTOGRAM_EXAMPLE1*/
-
-
-
-/* Case 2. complete data, high scores fit as censored Gumbel
- * compile: gcc -I. -L. -o example -DeslHISTOGRAM_EXAMPLE2 esl_histogram.c -leasel -lm
- * run: ./example
- */
-#ifdef eslHISTOGRAM_EXAMPLE2
-/*::cexcerpt::histogram_example2::begin::*/
-#include "easel.h"
-#include "esl_random.h"
-#include "esl_histogram.h"
-#include "esl_gumbel.h"
-
-int
-main(int argc, char **argv)
-{
- ESL_RANDOMNESS *r = esl_randomness_Create(0);
- ESL_HISTOGRAM *h = esl_histogram_CreateFull(-100, 100, 0.2);
- int nsamples = 10000;
- double mu = 10.0;
- double lambda = 0.8;
- double params[2];
- int i;
- double x;
- double *xv;
- int n, z;
- double G, Gp, X2, X2p;
-
- for (i = 0; i < nsamples; i++) {
- x = esl_gumbel_Sample(r, mu, lambda);
- esl_histogram_Add(h, x);
- }
-
- esl_histogram_GetTailByMass(h, 0.5, &xv, &n, &z); /* fit to right 50% */
- esl_gumbel_FitCensored(xv, n, z, xv[0], &mu, &lambda);
-
- params[0] = mu;
- params[1] = lambda;
- esl_histogram_SetExpect(h, &esl_gumbel_generic_cdf, ¶ms);
-
- esl_histogram_Print(stdout, h);
- esl_histogram_Goodness(h, 0, NULL, &G, &Gp, &X2, &X2p);
- printf("G = %f p = %f\n", G, Gp);
- printf("X^2 = %f p = %f\n", X2, X2p);
-
- esl_histogram_Destroy(h);
- esl_randomness_Destroy(r);
- return 0;
-}
-/*::cexcerpt::histogram_example2::end::*/
-#endif /*eslHISTOGRAM_EXAMPLE2*/
-
-
-/* Case 3. complete data, high scores fit to exponential tail
- * compile: gcc -I. -L. -o example -DeslHISTOGRAM_EXAMPLE3 esl_histogram.c -leasel -lm
- * run: ./example
- */
-#ifdef eslHISTOGRAM_EXAMPLE3
-/*::cexcerpt::histogram_example3::begin::*/
-#include "easel.h"
-#include "esl_random.h"
-#include "esl_histogram.h"
-#include "esl_gumbel.h"
-#include "esl_exponential.h"
-
-int
-main(int argc, char **argv)
-{
- ESL_RANDOMNESS *r = esl_randomness_Create(0);
- ESL_HISTOGRAM *h = esl_histogram_CreateFull(-100, 100, 0.2);
- int nsamples = 10000;
- double mu = 10.0;
- double lambda = 0.8;
- double params[2];
- int i;
- double x;
- double *xv;
- int n;
- double G, Gp, X2, X2p;
-
- for (i = 0; i < nsamples; i++) {
- x = esl_gumbel_Sample(r, mu, lambda);
- esl_histogram_Add(h, x);
- }
-
- esl_histogram_GetTailByMass(h, 0.1, &xv, &n, NULL); /* fit to 10% tail */
- esl_exp_FitComplete(xv, n, &mu, &lambda);
-
- params[0] = mu;
- params[1] = lambda;
- esl_histogram_SetExpectedTail(h, mu, 0.1, &esl_exp_generic_cdf, ¶ms);
-
- esl_histogram_Print(stdout, h);
- esl_histogram_Goodness(h, 0, NULL, &G, &Gp, &X2, &X2p);
- printf("G = %f p = %f\n", G, Gp);
- printf("X^2 = %f p = %f\n", X2, X2p);
-
- esl_histogram_Destroy(h);
- esl_randomness_Destroy(r);
- return 0;
-}
-/*::cexcerpt::histogram_example3::end::*/
-#endif /*eslHISTOGRAM_EXAMPLE3*/
-
-/* Case 4. censored data, high scores fit as a censored Gumbel tail
- * compile:
- gcc -I. -L. -o example -DeslHISTOGRAM_EXAMPLE4 esl_histogram.c -leasel -lm
- * run: ./example
- */
-#ifdef eslHISTOGRAM_EXAMPLE4
-/*::cexcerpt::histogram_example4::begin::*/
-#include "easel.h"
-#include "esl_random.h"
-#include "esl_histogram.h"
-#include "esl_gumbel.h"
-
-int
-main(int argc, char **argv)
-{
- ESL_RANDOMNESS *r = esl_randomness_Create(0);
- ESL_HISTOGRAM *h = esl_histogram_CreateFull(-100, 100, 0.2);
- int nsamples = 10000;
- double mu = 10.0;
- double lambda = 0.8;
- double phi = 9.0;
- double params[2];
- int i;
- double x;
- double *xv;
- int n, z;
- double G, Gp, X2, X2p;
-
- z = 0;
- for (i = 0; i < nsamples; i++) {
- x = esl_gumbel_Sample(r, mu, lambda);
- if (x > phi) esl_histogram_Add(h, x);
- else z++;
- }
-
- esl_histogram_GetData(h, &xv, &n);
- esl_gumbel_FitCensored(xv, n, z, phi, &mu, &lambda);
-
- params[0] = mu;
- params[1] = lambda;
- esl_histogram_SetExpect(h, &esl_gumbel_generic_cdf, ¶ms);
-
- esl_histogram_Print(stdout, h);
- esl_histogram_Goodness(h, 0, NULL, &G, &Gp, &X2, &X2p);
- printf("G = %f p = %f\n", G, Gp);
- printf("X^2 = %f p = %f\n", X2, X2p);
-
- esl_histogram_Destroy(h);
- esl_randomness_Destroy(r);
- return 0;
-}
-/*::cexcerpt::histogram_example4::end::*/
-#endif /*eslHISTOGRAM_EXAMPLE4*/
-
-/* Case 5. complete data, binned high scores fit to exponential tail
- * compile:
- gcc -I. -L. -o example -DeslHISTOGRAM_EXAMPLE5 esl_histogram.c -leasel -lm
- * run: ./example
- */
-#ifdef eslHISTOGRAM_EXAMPLE5
-/*::cexcerpt::histogram_example5::begin::*/
-#include "easel.h"
-#include "esl_random.h"
-#include "esl_histogram.h"
-#include "esl_gumbel.h"
-#include "esl_exponential.h"
-
-int
-main(int argc, char **argv)
-{
- ESL_RANDOMNESS *r = esl_randomness_Create(0);
- ESL_HISTOGRAM *h = esl_histogram_Create(-100, 100, 1.0);
- int nsamples = 10000;
- double mu = 10.0;
- double lambda = 0.8;
- double params[2];
- int i;
- double x;
- double actual_mass;
- double G, Gp, X2, X2p;
-
- for (i = 0; i < nsamples; i++) {
- x = esl_gumbel_Sample(r, mu, lambda);
- x = ceil(x); /* crudely simulate an x of limited precision */
- esl_histogram_Add(h, x);
- }
-
- esl_histogram_SetTailByMass(h, 0.1, &actual_mass);
- esl_histogram_DeclareRounding(h);
- esl_exp_FitCompleteBinned(h, &mu, &lambda);
-
- params[0] = mu;
- params[1] = lambda;
- esl_histogram_SetExpectedTail(h, mu, actual_mass, &esl_exp_generic_cdf, ¶ms);
-
- esl_histogram_Print(stdout, h);
- esl_histogram_Goodness(h, 0, NULL, &G, &Gp, &X2, &X2p);
- printf("G = %f p = %f\n", G, Gp);
- printf("X^2 = %f p = %f\n", X2, X2p);
-
- esl_histogram_Destroy(h);
- esl_randomness_Destroy(r);
- return 0;
-}
-/*::cexcerpt::histogram_example5::end::*/
-#endif /*eslHISTOGRAM_EXAMPLE5*/
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_histogram.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_histogram.h
deleted file mode 100644
index 35e0cb5..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_histogram.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/* Collection and display of score histograms.
- *
- * SRE, Fri Jul 1 13:22:45 2005 [St. Louis]
- * SVN $Id: esl_histogram.h 181 2007-05-04 11:00:44Z eddys $
- */
-#ifndef ESL_HISTOGRAM_INCLUDED
-#define ESL_HISTOGRAM_INCLUDED
-
-#include <math.h> /* floor() is in one of the macros */
-
-
-/* Structure: ESL_HISTOGRAM
- *
- * Keeps a score histogram, in which scores are counted into bins of
- * size (width) w.
- * histogram starts at bmin < floor(xmin/w) * w
- * histogram ends at bmax >= ceil(xmax/w)*w
- * nb = (bmax-bmin)/w
- * each score x is counted into bin b = nb - (int) (bmax-x)/w
- * each bin b contains scores bw+bmin < x <= (b+1)w + bmin
- *
- * Anything having to do with the counts themselves (obs, n, etc)
- * is a uint64_t, with range 0..2^64-1 (up to 2e19).
- */
-typedef struct {
- /* The histogram is kept as counts in fixed-width bins.
- */
- uint64_t *obs; /* observed counts in bin b, 0..nb-1 (dynamic) */
- int nb; /* number of bins */
- double w; /* fixed width of each bin */
- double bmin, bmax; /* histogram bounds: all x satisfy bmin < x <= bmax */
- int imin, imax; /* smallest, largest bin that contain obs[i] > 0 */
-
- /* Optionally, in a "full" h, we can also keep all the raw samples in x.
- */
- double xmin, xmax; /* smallest, largest sample value x observed */
- uint64_t n; /* total number of raw data samples */
- double *x; /* optional: raw sample values x[0..n-1] */
- uint64_t nalloc; /* current allocated size of x */
-
- /* The binned data might be censored (either truly, or virtually).
- * This information has to be made available to a binned/censored
- * parameter fitting function, and to goodness-of-fit tests.
- */
- double phi; /* censoring value; all x_i > phi */
- int cmin; /* smallest bin index that contains uncensored data */
- uint64_t z; /* # of censored values <= phi */
- uint64_t Nc; /* # samples in complete data (including unobs) */
- uint64_t No; /* # of samples in observed data */
-
- /* Expected binned counts are set by SetExpect() or SetExpectedTail().
- */
- double *expect; /* expected counts in bin b, 0..nb-1 (not resized) */
- int emin; /* smallest bin index that contains expected counts */
- double tailbase; /* for tail fits: fitted x > tailbase */
- double tailmass; /* for tail fits: fractional prob in the tail */
-
- /* Some status flags
- */
- int is_full; /* TRUE when we're keeping raw data in x */
- int is_done; /* TRUE if we prevent more Add()'s */
- int is_sorted; /* TRUE if x is sorted smallest-to-largest */
- int is_tailfit; /* TRUE if expected dist only describes tail */
- int is_rounded; /* TRUE if values aren't more accurate than bins */
- enum { COMPLETE, VIRTUAL_CENSORED, TRUE_CENSORED } dataset_is;
-
-} ESL_HISTOGRAM;
-
-#define esl_histogram_Bin2LBound(h,b) ((h)->w*(b) + (h)->bmin)
-#define esl_histogram_Bin2UBound(h,b) ((h)->w*((b)+1) + (h)->bmin)
-
-/* Creating/destroying histograms and collecting data:
- */
-extern ESL_HISTOGRAM *esl_histogram_Create (double bmin, double bmax, double w);
-extern ESL_HISTOGRAM *esl_histogram_CreateFull(double bmin, double bmax, double w);
-extern void esl_histogram_Destroy(ESL_HISTOGRAM *h);
-extern int esl_histogram_Add(ESL_HISTOGRAM *h, double x);
-
-/* Declarations about the binned data before parameter fitting:
- */
-extern int esl_histogram_DeclareCensoring(ESL_HISTOGRAM *h, int z, double phi);
-extern int esl_histogram_DeclareRounding (ESL_HISTOGRAM *h);
-extern int esl_histogram_SetTail (ESL_HISTOGRAM *h, double phi,
- double *ret_newmass);
-extern int esl_histogram_SetTailByMass (ESL_HISTOGRAM *h, double pmass,
- double *ret_newmass);
-
-/* Accessing data samples in a full histogram:
- */
-extern int esl_histogram_GetRank(ESL_HISTOGRAM *h, int rank, double *ret_x);
-extern int esl_histogram_GetData(ESL_HISTOGRAM *h, double **ret_x, int *ret_n);
-extern int esl_histogram_GetTail(ESL_HISTOGRAM *h, double phi, double **ret_x,
- int *ret_n, int *ret_z);
-extern int esl_histogram_GetTailByMass(ESL_HISTOGRAM *h, double pmass,
- double **ret_x, int *ret_n, int *ret_z);
-
-
-/* Setting expected binned counts:
- */
-extern int esl_histogram_SetExpect(ESL_HISTOGRAM *h,
- double (*cdf)(double x, void *params),
- void *params);
-extern int esl_histogram_SetExpectedTail(ESL_HISTOGRAM *h, double base_val,
- double pmass,
- double (*cdf)(double x, void *params),
- void *params);
-
-/* Output/display of binned data:
- */
-// ! we don't need this functions !
-
-/* Goodness of fit testing
- */
-extern int esl_histogram_Goodness(ESL_HISTOGRAM *h, int nfitted,
- int *ret_nbins,
- double *ret_G, double *ret_Gp,
- double *ret_X2, double *ret_X2p);
-
-
-
-#endif /*!ESL_HISTOGRAM_INCLUDED*/
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_hmm.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_hmm.cpp
deleted file mode 100644
index 58ddbdd..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_hmm.cpp
+++ /dev/null
@@ -1,470 +0,0 @@
-/* General hidden Markov models (discrete, of alphabetic strings)
-*
-* SRE, Fri Jul 18 09:00:14 2008 [Janelia]
-* SVN $Id$
-*/
-
-#include <hmmer3/easel/esl_config.h>
-
-#include <stdlib.h>
-#include <math.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_alphabet.h>
-#include <hmmer3/easel/esl_hmm.h>
-#include <hmmer3/easel/esl_random.h>
-#include <hmmer3/easel/esl_vectorops.h>
-
-/* Function: esl_hmm_Create()
-* Synopsis: Allocates a new HMM.
-* Incept: SRE, Fri Jul 18 09:01:54 2008 [Janelia]
-*
-* Purpose: Allocates a new HMM of <M> states for
-* generating or modeling strings in the
-* alphabet <abc>.
-*
-* Returns: a pointer to the new HMM.
-*
-* Throws: <NULL> on allocation failure.
-*/
-ESL_HMM *
-esl_hmm_Create(const ESL_ALPHABET *abc, int M)
-{
- ESL_HMM *hmm = NULL;
- int k,x;
- int status;
-
- ESL_ALLOC_WITH_TYPE(hmm, ESL_HMM*, sizeof(ESL_HMM));
- hmm->t = NULL;
- hmm->e = NULL;
-
- ESL_ALLOC_WITH_TYPE(hmm->t, float**, sizeof(float *) * M);
- ESL_ALLOC_WITH_TYPE(hmm->e, float**, sizeof(float *) * M);
- ESL_ALLOC_WITH_TYPE(hmm->eo, float**, sizeof(float *) * abc->Kp);
- hmm->t[0] = NULL;
- hmm->e[0] = NULL;
- hmm->eo[0] = NULL;
-
- ESL_ALLOC_WITH_TYPE(hmm->t[0], float*, sizeof(float) * M * (M+1)); /* state M is the implicit end state */
- ESL_ALLOC_WITH_TYPE(hmm->e[0], float*, sizeof(float) * M * abc->K);
- ESL_ALLOC_WITH_TYPE(hmm->eo[0], float*, sizeof(float) * M * abc->Kp);
- ESL_ALLOC_WITH_TYPE(hmm->pi, float*, sizeof(float) * (M+1)); /* initial transition to state M means a L=0 sequence */
-
- for (k = 1; k < M; k++)
- {
- hmm->t[k] = hmm->t[0] + k*(M+1);
- hmm->e[k] = hmm->e[0] + k*abc->K;
- }
- for (x = 1; x < abc->Kp; x++)
- hmm->eo[x] = hmm->eo[0] + x*M;
-
- hmm->M = M;
- hmm->K = abc->K;
- hmm->abc = abc;
- return hmm;
-
-ERROR:
- esl_hmm_Destroy(hmm);
- return NULL;
-}
-
-/* Function: esl_hmm_Configure()
-* Synopsis: Set an HMM's emission odds ratios, including degenerate residues.
-* Incept: SRE, Thu Feb 26 11:49:54 2009 [Janelia]
-*
-* Purpose: Given a parameterized <hmm>, and some background
-* residue frequencies <fq>, set the emission odds ratios
-* (<hmm->eo[0..Kp-1][0..M-1]>) in the model.
-*
-* The frequencies <fq> do not necessarily have to
-* correspond to a null model. They are only used for
-* rescaling.
-*
-* If <fq> is <NULL>, uniform background frequencies are
-* used ($\frac{1}{K}$, for alphabet size $K$).
-*
-* Returns: <eslOK> on success.
-*
-* Throws: (no abnormal error conditions)
-*/
-int
-esl_hmm_Configure(ESL_HMM *hmm, float *fq)
-{
- int Kp = hmm->abc->Kp;
- int K = hmm->abc->K;
- int k,x,y;
- float uniform = 1.0f / (float) K;
- float use_fq;
- float denom;
-
- for (x = 0; x < K; x++) {
- use_fq = (fq == NULL) ? uniform : fq[x];
- for (k = 0; k < hmm->M; k++)
- hmm->eo[x][k] = hmm->e[k][x] / use_fq;
- }
-
- for (k = 0; k < hmm->M; k++)
- { /* -,*,~: treat as X */
- hmm->eo[K][k] = 1.0; /* gap char */
- hmm->eo[Kp-2][k] = 1.0; /* nonresidue */
- hmm->eo[Kp-1][k] = 1.0; /* missing data char */
- }
-
- for (x = K+1; x <= Kp-3; x++) {
- for (k = 0; k < hmm->M; k++)
- {
- hmm->eo[x][k] = 0.0f;
- denom = 0.0f;
- for (y = 0; y < K; y++)
- if (hmm->abc->degen[x][y])
- {
- hmm->eo[x][k] += hmm->e[k][y];
- denom += (fq == NULL) ? uniform : fq[y];
- }
- hmm->eo[x][k] = ((denom > 0.0f) ? hmm->eo[x][k] / denom : 0.0f);
- }
- }
- return eslOK;
-}
-
-
-/* Function: esl_hmm_Destroy()
-* Synopsis: Destroys an HMM.
-* Incept: SRE, Fri Jul 18 09:06:22 2008 [Janelia]
-*
-* Purpose: Frees an HMM.
-*/
-void
-esl_hmm_Destroy(ESL_HMM *hmm)
-{
- if (hmm == NULL) return;
-
- if (hmm->t != NULL) {
- if (hmm->t[0] != NULL) free(hmm->t[0]);
- free(hmm->t);
- }
- if (hmm->e != NULL) {
- if (hmm->e[0] != NULL) free(hmm->e[0]);
- free(hmm->e);
- }
- if (hmm->eo != NULL) {
- if (hmm->eo[0] != NULL) free(hmm->eo[0]);
- free(hmm->eo);
- }
- if (hmm->pi != NULL) free(hmm->pi);
- free(hmm);
- return;
-}
-
-
-ESL_HMX *
-esl_hmx_Create(int allocL, int allocM)
-{
- ESL_HMX *mx = NULL;
- int i;
- int status;
-
- ESL_ALLOC_WITH_TYPE(mx, ESL_HMX*, sizeof(ESL_HMX));
- mx->dp_mem = NULL;
- mx->dp = NULL;
- mx->sc = NULL;
-
- ESL_ALLOC_WITH_TYPE(mx->dp_mem, float*, sizeof(float) * (allocL+1) * allocM);
- mx->ncells = (allocL+1) * allocM;
-
- ESL_ALLOC_WITH_TYPE(mx->dp, float**, sizeof (float *) * (allocL+1));
- ESL_ALLOC_WITH_TYPE(mx->sc, float*, sizeof (float) * (allocL+2));
- mx->allocR = allocL+1;
-
- for (i = 0; i <= allocL; i++)
- mx->dp[i] = mx->dp_mem + i*allocM;
- mx->validR = allocL+1;
- mx->allocM = allocM;
-
- mx->L = 0;
- mx->M = 0;
- return mx;
-
-ERROR:
- esl_hmx_Destroy(mx);
- return NULL;
-}
-
-int
-esl_hmx_GrowTo(ESL_HMX *mx, int L, int M)
-{
- uint64_t ncells;
- void *p;
- int do_reset = FALSE;
- int i;
- int status;
-
- if (L < mx->allocR && M <= mx->allocM) return eslOK;
-
- /* Do we have to reallocate the 2D matrix, or can we get away with
- * rejiggering the row pointers into the existing memory?
- */
- ncells = (L+1) * M;
- if (ncells > mx->ncells)
- {
- ESL_RALLOC_WITH_TYPE(mx->dp_mem, float*, p, sizeof(float) * ncells);
- mx->ncells = ncells;
- do_reset = TRUE;
- }
-
- /* must we reallocate row pointers? */
- if (L >= mx->allocR)
- {
- ESL_RALLOC_WITH_TYPE(mx->dp, float**, p, sizeof(float *) * (L+1));
- ESL_RALLOC_WITH_TYPE(mx->sc, float*, p, sizeof(float) * (L+2));
- mx->allocR = L+1;
- mx->allocM = M;
- do_reset = TRUE;
- }
-
- /* must we widen the rows? */
- if (M > mx->allocM)
- {
- mx->allocM = M;
- do_reset = TRUE;
- }
-
- /* must we set some more valid row pointers? */
- if (L >= mx->validR)
- do_reset = TRUE;
-
- /* did we trigger a relayout of row pointers? */
- if (do_reset)
- {
- mx->validR = ESL_MIN(mx->ncells / mx->allocM, mx->allocR);
- for (i = 0; i < mx->validR; i++)
- mx->dp[i] = mx->dp_mem + i*mx->allocM;
- }
- mx->M = 0;
- mx->L = 0;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-void
-esl_hmx_Destroy(ESL_HMX *mx)
-{
- if (mx == NULL) return;
-
- if (mx->dp_mem != NULL) free(mx->dp_mem);
- if (mx->dp != NULL) free(mx->dp);
- if (mx->sc != NULL) free(mx->sc);
- free(mx);
- return;
-}
-
-
-/* Function: esl_hmm_Emit()
-* Synopsis: Emit a sequence from an HMM.
-* Incept: SRE, Fri Jul 18 13:16:20 2008 [Janelia]
-*
-* Purpose: Sample one sequence from an <hmm>, using random
-* number generator <r>. Optionally return the sequence,
-* the state path, and/or the length via <opt_dsq>,
-* <opt_path>, and <opt_L>.
-*
-* If <opt_dsq> or <opt_path> are requested, caller
-* becomes responsible for free'ing their memory.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: (no abnormal error conditions)
-*/
-int
-esl_hmm_Emit(ESL_RANDOMNESS *r, const ESL_HMM *hmm, ESL_DSQ **opt_dsq, int **opt_path, int *opt_L)
-{
- int k, L, allocL;
- int *path = NULL;
- ESL_DSQ *dsq = NULL;
- void *tmp = NULL;
- int status;
-
- ESL_ALLOC_WITH_TYPE(dsq, ESL_DSQ*, sizeof(ESL_DSQ) * 256);
- ESL_ALLOC_WITH_TYPE(path, int*, sizeof(int) * 256);
- allocL = 256;
-
- dsq[0] = eslDSQ_SENTINEL;
- path[0] = -1;
-
- k = esl_rnd_FChoose(r, hmm->pi, hmm->M+1);
- L = 0;
- while (k != hmm->M) /* M is the implicit end state */
- {
- L++;
- if (L >= allocL-1) { /* Reallocate path and seq if needed */
- ESL_RALLOC_WITH_TYPE(dsq, ESL_DSQ*, tmp, sizeof(ESL_DSQ) * (allocL*2));
- ESL_RALLOC_WITH_TYPE(path, int*, tmp, sizeof(int) * (allocL*2));
- allocL *= 2;
- }
-
- path[L] = k;
- dsq[L] = esl_rnd_FChoose(r, hmm->e[k], hmm->abc->K);
- k = esl_rnd_FChoose(r, hmm->t[k], hmm->M+1);
- }
-
- path[L+1] = hmm->M; /* sentinel for "end state" */
- dsq[L+1] = eslDSQ_SENTINEL;
-
- if (opt_dsq != NULL) *opt_dsq = dsq; else free(dsq);
- if (opt_path != NULL) *opt_path = path; else free(path);
- if (opt_L != NULL) *opt_L = L;
- return eslOK;
-
-ERROR:
- if (path != NULL) free(path);
- if (dsq != NULL) free(dsq);
- return status;
-}
-
-
-int
-esl_hmm_Forward(const ESL_DSQ *dsq, int L, const ESL_HMM *hmm, ESL_HMX *fwd, float *opt_sc)
-{
- int i, k, m;
- int M = hmm->M;
- float logsc = 0;
- float max;
-
- fwd->sc[0] = 0.0;
-
- if (L == 0) {
- fwd->sc[L+1] = logsc = log((double)hmm->pi[M]);
- if (opt_sc != NULL) *opt_sc = logsc;
- return eslOK;
- }
-
- max = 0.0;
- for (k = 0; k < M; k++) {
- fwd->dp[1][k] = hmm->eo[dsq[1]][k] * hmm->pi[k];
- max = ESL_MAX(fwd->dp[1][k], max);
- }
- for (k = 0; k < M; k++) {
- fwd->dp[1][k] /= max;
- }
- fwd->sc[1] = log((double)max);
-
- for (i = 2; i <= L; i++)
- {
- max = 0.0;
- for (k = 0; k < M; k++)
- {
- fwd->dp[i][k] = 0.0;
- for (m = 0; m < M; m++)
- fwd->dp[i][k] += fwd->dp[i-1][m] * hmm->t[m][k];
-
- fwd->dp[i][k] *= hmm->eo[dsq[i]][k];
-
- max = ESL_MAX(fwd->dp[i][k], max);
- }
-
- for (k = 0; k < M; k++)
- fwd->dp[i][k] /= max;
- fwd->sc[i] = log((double)max);
- }
-
-
- fwd->sc[L+1] = 0.0;
- for (m = 0; m < M; m++)
- fwd->sc[L+1] += fwd->dp[L][m] * hmm->t[m][M];
- fwd->sc[L+1] = log((double)fwd->sc[L+1]);
-
- logsc = 0.0;
- for (i = 1; i <= L+1; i++)
- logsc += fwd->sc[i];
-
- fwd->M = hmm->M;
- fwd->L = L;
- if (opt_sc != NULL) *opt_sc = logsc;
- return eslOK;
-}
-
-
-int
-esl_hmm_Backward(const ESL_DSQ *dsq, int L, const ESL_HMM *hmm, ESL_HMX *bck, float *opt_sc)
-{
- int i,k,m;
- int M = hmm->M;
- float logsc = 0.0;
- float max;
-
- bck->sc[L+1] = 0.0;
-
- if (L == 0) {
- bck->sc[0] = logsc = log((double)hmm->pi[M]);
- if (opt_sc != NULL) *opt_sc = logsc;
- return eslOK;
- }
-
- max = 0.0;
- for (k = 0; k < M; k++)
- {
- bck->dp[L][k] = hmm->t[k][M];
- max = ESL_MAX(bck->dp[L][k], max);
- }
- for (k = 0; k < M; k++)
- bck->dp[L][k] /= max;
- bck->sc[L] = log((double)max);
-
- for (i = L-1; i >= 1; i--)
- {
- max = 0.0;
- for (k = 0; k < M; k++)
- {
- bck->dp[i][k] = 0.0;
- for (m = 0; m < M; m++)
- bck->dp[i][k] += bck->dp[i+1][m] * hmm->eo[dsq[i+1]][m] * hmm->t[k][m];
-
- max = ESL_MAX(bck->dp[i][k], max);
- }
-
- for (k = 0; k < M; k++)
- bck->dp[i][k] /= max;
- bck->sc[i] = log((double)max);
- }
-
- bck->sc[0] = 0.0;
- for (m = 0; m < M; m++)
- bck->sc[0] += bck->dp[1][m] * hmm->eo[dsq[1]][m] * hmm->pi[m];
- bck->sc[0] = log((double)bck->sc[0]);
-
- logsc = 0.0;
- for (i = 0; i <= L; i++)
- logsc += bck->sc[i];
-
- bck->M = hmm->M;
- bck->L = L;
- if (opt_sc != NULL) *opt_sc = logsc;
- return eslOK;
-}
-
-
-int
-esl_hmm_PosteriorDecoding(const ESL_DSQ *dsq, int L, const ESL_HMM *hmm, ESL_HMX *fwd, ESL_HMX *bck, ESL_HMX *pp)
-{
- int i,k;
-
- for (i = 1; i <= L; i++)
- {
- for (k = 0; k < hmm->M; k++)
- pp->dp[i][k] = fwd->dp[i][k] * bck->dp[i][k];
- esl_vec_FNorm(pp->dp[i], hmm->M);
- }
- return eslOK;
-}
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_hmm.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_hmm.h
deleted file mode 100644
index 11c7310..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_hmm.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/* General hidden Markov models (discrete; of alphabetic strings)
-*
-* SRE, Fri Jul 18 08:54:41 2008 [Janelia]
-* SVN $Id$
-*/
-#ifndef ESL_HMM_INCLUDED
-#define ESL_HMM_INCLUDED
-
-#include "esl_alphabet.h"
-#include "esl_random.h"
-
-
-typedef struct {
- int M; /* number of states in the model */
- int K; /* size of alphabet (redundant w/ abc->K) */
- float *pi; /* initial (begin) distribution (0..M) */
- float **t; /* Mx(M+1) state transition probabilities */
- float **e; /* MxK emission probabilities */
-
- float **eo; /* K'xM emission odds ratios */
- const ESL_ALPHABET *abc; /* ptr to alphabet */
-} ESL_HMM;
-
-typedef struct {
- float **dp; /* [0..L][0..M-1] DP matrix */
- float *sc; /* [0..L+1] scale factors (log probs) */
- int M; /* actual model dimension (0..M-1) */
- int L; /* actual sequence dimension (1..L) */
-
- float *dp_mem; /* memory allocated for the resizable DP matrix */
- int allocR; /* current allocated # of rows: L+1 <= validR <= allocR */
- int validR; /* # of dp rows actually pointing at DP memory */
- int allocM; /* current set row width; M <= allocM */
- uint64_t ncells; /* total allocation of dp_mem; ncells >= (validR)(allocM)*/
-} ESL_HMX;
-
-
-
-extern ESL_HMM *esl_hmm_Create(const ESL_ALPHABET *abc, int M);
-extern int esl_hmm_Configure(ESL_HMM *hmm, float *fq);
-extern int esl_hmm_SetDegeneracies(ESL_HMM *hmm);
-extern void esl_hmm_Destroy(ESL_HMM *hmm);
-
-extern ESL_HMX *esl_hmx_Create(int allocL, int allocM);
-extern int esl_hmx_GrowTo (ESL_HMX *mx, int L, int M);
-extern void esl_hmx_Destroy(ESL_HMX *mx);
-
-extern int esl_hmm_Emit(ESL_RANDOMNESS *r, const ESL_HMM *hmm, ESL_DSQ **opt_dsq, int **opt_path, int *opt_L);
-extern int esl_hmm_Forward(const ESL_DSQ *dsq, int L, const ESL_HMM *hmm, ESL_HMX *fwd, float *opt_sc);
-extern int esl_hmm_Backward(const ESL_DSQ *dsq, int L, const ESL_HMM *hmm, ESL_HMX *bck, float *opt_sc);
-
-
-#endif /*!ESL_HMM_INCLUDED*/
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_keyhash.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_keyhash.cpp
deleted file mode 100644
index 04fccc6..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_keyhash.cpp
+++ /dev/null
@@ -1,813 +0,0 @@
-/* Partial emulation of Perl hashes (associative arrays),
- * mapping keys (ASCII char strings) to array indices.
- *
- * Contents:
- * 1. The <ESL_KEYHASH> object.
- * 2. Storing and retrieving keys.
- * 3. Internal functions.
- * 4. Benchmark drivers.
- * 5. Unit tests.
- * 6. Test driver.
- * 7. Example.
- * 8. Copyright and license information.
- *
- * SRE, Sun Jan 30 09:14:21 2005; from squid's gki.c, 1999.
- * SVN $Id: esl_keyhash.c 322 2009-02-09 12:51:53Z eddys $
- *
- * Reimplemented April 2008 (J3/14) with improved hash function
- * with larger dynamic range, and improved (pointerless) internals
- * in <ESL_KEYHASH>.
- */
-#include "esl_config.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-
-#include "easel.h"
-#include "esl_keyhash.h"
-
-static ESL_KEYHASH *keyhash_create(uint32_t hashsize, int init_key_alloc, int init_string_alloc);
-static uint32_t jenkins_hash(const char *key, uint32_t hashsize);
-static int key_upsize(ESL_KEYHASH *kh);
-
-
-/*****************************************************************
- *# 1. The <ESL_KEYHASH> object
- *****************************************************************/
-
-/* Function: esl_keyhash_Create()
- * Synopsis: Allocates a new keyhash.
- * Incept: SRE, Sun Jan 30 09:17:20 2005 [St. Louis]
- *
- * Purpose: Create a new hash table for key indexing, and returns
- * a pointer to it.
- *
- * Throws: <NULL> on allocation failure.
- */
-ESL_KEYHASH *
-esl_keyhash_Create(void)
-{
- return keyhash_create(128, /* initial hash table size (power of 2) */
- 128, /* initial alloc for up to 128 keys */
- 2048); /* initial alloc for keys totalling up to 2048 chars */
-}
-
-/* Function: esl_keyhash_Clone()
- * Synopsis: Duplicates a keyhash.
- * Incept: SRE, Fri Feb 15 18:57:50 2008 [Janelia]
- *
- * Purpose: Allocates and duplicates a keyhash <kh>. Returns a
- * pointer to the duplicate.
- *
- * Throws: <NULL> on allocation failure.
- */
-ESL_KEYHASH *
-esl_keyhash_Clone(const ESL_KEYHASH *kh)
-{
- ESL_KEYHASH *nw;
- int h;
-
- if ((nw = keyhash_create(kh->hashsize, kh->kalloc, kh->salloc)) == NULL) goto ERROR;
-
- for (h = 0; h < (int)kh->hashsize; h++)
- nw->hashtable[h] = kh->hashtable[h];
-
- for (h = 0; h < kh->nkeys; h++)
- {
- nw->nxt[h] = kh->nxt[h];
- nw->key_offset[h] = kh->key_offset[h];
- }
- nw->nkeys = kh->nkeys;
-
- memcpy(nw->smem, kh->smem, sizeof(char) * kh->sn);
- nw->sn = kh->sn;
- return nw;
-
- ERROR:
- esl_keyhash_Destroy(nw);
- return NULL;
-}
-
-
-/* Function: esl_keyhash_Get()
- * Synopsis: Returns a key name, given its index.
- * Incept: SRE, Tue Jul 15 09:40:56 2008 [Janelia]
- *
- * Purpose: Returns a pointer to the key name associated
- * with index <idx>. The key name is a <NUL>-terminated
- * string whose memory is managed internally in
- * the keyhash <kh>.
- */
-char *
-esl_keyhash_Get(const ESL_KEYHASH *kh, int idx)
-{
- return kh->smem + kh->key_offset[idx];
-}
-
-/* Function: esl_keyhash_GetNumber()
- * Synopsis: Returns the total number of keys stored.
- * Incept: SRE, Tue Jul 15 09:42:46 2008 [Janelia]
- *
- * Purpose: Returns the total number of keys currently stored in the
- * keyhash <kh>.
- */
-int
-esl_keyhash_GetNumber(const ESL_KEYHASH *kh)
-{
- return kh->nkeys;
-}
-
-
-/* Function: esl_keyhash_Reuse()
- * Synopsis: Reuse a keyhash.
- * Incept: SRE, Sun Feb 8 17:24:53 2009 [Casa de Gatos]
- *
- * Purpose: Empties keyhash <kh> so it can be reused without
- * creating a new one.
- *
- * Returns: <eslOK> on success.
- */
-int
-esl_keyhash_Reuse(ESL_KEYHASH *kh)
-{
- int i;
-
- for (i = 0; i < (int)kh->hashsize; i++) kh->hashtable[i] = -1;
- kh->nkeys = 0;
- kh->sn = 0;
- return eslOK;
-}
-
-
-
-/* Function: esl_keyhash_Destroy()
- * Synopsis: Frees a keyhash.
- * Incept: SRE, Sun Jan 30 09:19:19 2005 [St. Louis]
- *
- * Purpose: Destroys <kh>.
- *
- * Returns: (void)
- */
-void
-esl_keyhash_Destroy(ESL_KEYHASH *kh)
-{
- if (kh == NULL) return;
- if (kh->hashtable != NULL) free(kh->hashtable);
- if (kh->key_offset != NULL) free(kh->key_offset);
- if (kh->nxt != NULL) free(kh->nxt);
- if (kh->smem != NULL) free(kh->smem);
- free(kh);
-}
-
-/* Function: esl_keyhash_Dump()
- * Synopsis: Dumps debugging information about a keyhash.
- * Incept: SRE, Sun Jan 30 09:42:22 2005 [St. Louis]
- *
- * Purpose: Mainly for debugging purposes. Dump
- * some information about the hash table <kh>
- * to the stream <fp>, which might be stderr
- * or stdout.
- */
-void
-esl_keyhash_Dump(FILE *fp, const ESL_KEYHASH *kh)
-{
- int idx;
- int h;
- int nkeys;
- int nempty = 0;
- int maxkeys = -1;
- int minkeys = INT_MAX;
-
- for (h = 0; h < (int)kh->hashsize; h++)
- {
- for (nkeys = 0, idx = kh->hashtable[h]; idx != -1; idx = kh->nxt[idx]) nkeys++;
-
- if (nkeys == 0) nempty++;
- if (nkeys > maxkeys) maxkeys = nkeys;
- if (nkeys < minkeys) minkeys = nkeys;
- }
-
- fprintf(fp, "Total keys: %d\n", kh->nkeys);
- fprintf(fp, "Hash table size: %d\n", kh->hashsize);
- fprintf(fp, "Average occupancy: %.2f\n", (float) kh->nkeys /(float) kh->hashsize);
- fprintf(fp, "Unoccupied slots: %d\n", nempty);
- fprintf(fp, "Most in one slot: %d\n", maxkeys);
- fprintf(fp, "Least in one slot: %d\n", minkeys);
- fprintf(fp, "Keys allocated for: %d\n", kh->kalloc);
- fprintf(fp, "Key string space alloc: %d\n", kh->salloc);
- fprintf(fp, "Key string space used: %d\n", kh->sn);
-}
-/*--------------- end, <ESL_KEYHASH> object ---------------------*/
-
-
-
-
-/*****************************************************************
- *# 2. Storing and retrieving keys
- *****************************************************************/
-
-/* Function: esl_key_Store()
-* Synopsis: Store a key and get a key index for it.
-* Incept: SRE, Sun Jan 30 09:21:13 2005 [St. Louis]
-*
-* Purpose: Store a string <key> in the key index hash table <kh>.
-* Associate it with a unique key index, counting from
-* 0. It's this index that lets us map the hashed keys to
-* integer-indexed C arrays, clumsily emulating Perl's
-* hashes. Optionally returns the index through <opt_index>.
-*
-* Returns: <eslOK> on success; stores <key> in <kh>; <opt_index> is
-* returned, set to the next higher index value.
-* Returns <eslEDUP> if <key> was already stored in the table;
-* <opt_index> is set to the existing index for <key>.
-*
-* Throws: <eslEMEM> on allocation failure, and sets <opt_index> to -1.
-*/
-int
-esl_key_Store(ESL_KEYHASH *kh, const char *key, int *opt_index)
-{
- uint32_t val = jenkins_hash(key, kh->hashsize);
- int n = strlen(key);
- int idx;
- int status;
-
- /* Was this key already stored? */
- for (idx = kh->hashtable[val]; idx != -1; idx = kh->nxt[idx])
- if (strcmp(key, kh->smem + kh->key_offset[idx]) == 0)
- {
- if (opt_index != NULL) *opt_index = idx;
- return eslEDUP;
- }
-
- /* Reallocate key ptr/index memory if needed */
- if (kh->nkeys == kh->kalloc)
- {
- void *p;
- ESL_RALLOC_WITH_TYPE(kh->nxt,int*, p, sizeof(int)*kh->kalloc*2);
- kh->kalloc *= 2;
- }
-
- /* Reallocate key string memory if needed */
- while (kh->sn + n + 1 > kh->salloc)
- {
- void *p;
- ESL_RALLOC_WITH_TYPE(kh->smem,char*, p, sizeof(char) * kh->salloc * 2);
- kh->salloc *= 2;
- }
-
- /* Copy the key, assign its index */
- idx = kh->nkeys;
- kh->key_offset[idx] = kh->sn;
- strcpy(kh->smem + kh->key_offset[idx], key);
- kh->sn += n+1;
- kh->nkeys++;
-
- /* Insert new element at head of the approp linked list in hashtable */
- kh->nxt[idx] = kh->hashtable[val];
- kh->hashtable[val] = idx;
-
- /* Time to upsize? If we're 3x saturated, expand the hash table */
- if (kh->nkeys > 3*kh->hashsize)
- if ((status = key_upsize(kh)) != eslOK) goto ERROR;
-
- if (opt_index != NULL) *opt_index = idx;
- return eslOK;
-
-ERROR:
- if (opt_index != NULL) *opt_index = -1;
- return status;
-}
-
-/* Function: esl_key_Lookup()
- * Synopsis: Look up a key's array index.
- * Incept: SRE, Sun Jan 30 09:38:53 2005 [St. Louis]
- *
- * Purpose: Look up a <key> in the hash table <kh>.
- * If <key> is found, return <eslOK>, and optionally set <*opt_index>
- * to its array index (0..nkeys-1).
- * If <key> is not found, return <eslENOTFOUND>, and
- * optionally set <*opt_index> to -1.
- */
-int
-esl_key_Lookup(const ESL_KEYHASH *kh, const char *key, int *opt_index)
-{
- uint32_t val = jenkins_hash(key, kh->hashsize);
- int idx;
-
- for (idx = kh->hashtable[val]; idx != -1; idx = kh->nxt[idx])
- if (strcmp(key, kh->smem + kh->key_offset[idx]) == 0)
- {
- if (opt_index != NULL) *opt_index = idx;
- return eslOK;
- }
-
- if (opt_index != NULL) *opt_index = -1;
- return eslENOTFOUND;
-}
-
-
-/*---------- end, API for storing/retrieving keys ---------------*/
-
-
-
-
-/*****************************************************************
- * 3. Internal functions
- *****************************************************************/
-
-/* keyhash_create()
- * SRE, Sun Jan 30 09:45:47 2005 [St. Louis]
- *
- * The real creation function, which takes arguments for memory sizes.
- * This is abstracted to a static function because it's used by both
- * Create() and Clone() but slightly differently.
- *
- * Args: hashsize - size of hash table; this must be a power of two.
- * init_key_alloc - initial allocation for # of keys.
- * init_string_alloc - initial allocation for total size of key strings.
- *
- * Returns: An allocated hash table structure; or NULL on failure.
- */
-ESL_KEYHASH *
-keyhash_create(uint32_t hashsize, int init_key_alloc, int init_string_alloc)
-{
- ESL_KEYHASH *kh = NULL;
- int i;
- int status;
-
- ESL_ALLOC_WITH_TYPE(kh, ESL_KEYHASH*, sizeof(ESL_KEYHASH));
- kh->hashtable = NULL;
- kh->key_offset = NULL;
- kh->nxt = NULL;
- kh->smem = NULL;
-
- kh->hashsize = hashsize;
- kh->kalloc = init_key_alloc;
- kh->salloc = init_string_alloc;
-
- ESL_ALLOC_WITH_TYPE(kh->hashtable, int*, sizeof(int) * kh->hashsize);
- for (i = 0; i < (int)kh->hashsize; i++) kh->hashtable[i] = -1;
-
- ESL_ALLOC_WITH_TYPE(kh->key_offset, int*, sizeof(int) * kh->kalloc);
- ESL_ALLOC_WITH_TYPE(kh->nxt, int*, sizeof(int) * kh->kalloc);
- for (i = 0; i < kh->kalloc; i++) kh->nxt[i] = -1;
-
- ESL_ALLOC_WITH_TYPE(kh->smem, char*, sizeof(char) * kh->salloc);
- kh->nkeys = 0;
- kh->sn = 0;
- return kh;
-
- ERROR:
- esl_keyhash_Destroy(kh);
- return NULL;
-}
-
-
-/* jenkins_hash()
- * SRE, Wed Apr 16 09:31:10 2008
- *
- * The hash function.
- * This is Bob Jenkins' "one at a time" hash.
- * <key> is a NUL-terminated string of any length.
- * <hashsize> must be a power of 2.
- *
- * References:
- * [1] http://en.wikipedia.org/wiki/Hash_table
- * [2] http://www.burtleburtle.net/bob/hash/doobs.html
- */
-static uint32_t
-jenkins_hash(const char *key, uint32_t hashsize)
-{
- uint32_t val = 0;
- for (; *key != '\0'; key++)
- {
- val += *key;
- val += (val << 10);
- val ^= (val >> 6);
- }
- val += (val << 3);
- val ^= (val >> 11);
- val += (val << 15);
-
- return (val & (hashsize - 1));
-}
-
-/* key_upsize()
- * SRE, Sun Jan 30 09:50:39 2005 [St. Louis]
- *
- * Grow the hash table to the next available size.
- *
- * Args: old - the KEY hash table to reallocate.
- *
- * Returns: <eslOK> on success. 'Success' includes the case
- * where the hash table is already at its maximum size,
- * and cannot be upsized any more.
- *
- * Throws: <eslEMEM> on allocation failure, and
- * the hash table is left in its initial state.
- */
-static int
-key_upsize(ESL_KEYHASH *kh)
-{
- void *p;
- int i;
- uint32_t val;
- int status;
-
- /* 28 below because we're going to upsize in steps of 8x (2^3); need to be < 2^{31-3} */
- if (kh->hashsize >= (1<<28)) return eslOK; /* quasi-success (can't grow any more) */
-
- /* The catch here is that when you upsize the table, all the hash functions
- * change; so you have to go through all the keys, recompute their hash functions,
- * and store them again in the new table.
- */
- /* Allocate a new, larger hash table. (Don't change <kh> until this succeeds) */
- ESL_RALLOC_WITH_TYPE(kh->hashtable, int*, p, sizeof(int) * (kh->hashsize << 3));
- kh->hashsize = kh->hashsize << 3; /* 8x */
- for (i = 0; i < (int)kh->hashsize; i++) kh->hashtable[i] = -1;
-
- /* Store all the keys again. */
- for (i = 0; i < kh->nkeys; i++)
- {
- val = jenkins_hash(kh->smem + kh->key_offset[i], kh->hashsize);
- kh->nxt[i] = kh->hashtable[val];
- kh->hashtable[val] = i;
- }
- return eslOK;
-
- ERROR:
- return eslEMEM;
-}
-/*--------------- end, internal functions -----------------*/
-
-
-/*****************************************************************
- * 4. Benchmark driver
- *****************************************************************/
-#ifdef eslKEYHASH_BENCHMARK
-/*
- gcc -g -O2 -o keyhash_benchmark -I. -L. -DeslKEYHASH_BENCHMARK esl_keyhash.c -leasel -lm
- time ./keyhash_benchmark /usr/share/dict/words /usr/share/dict/words
- */
-#include "esl_config.h"
-
-#include <stdio.h>
-
-#include "easel.h"
-#include "esl_getopts.h"
-#include "esl_keyhash.h"
-#include "esl_stopwatch.h"
-
-static ESL_OPTIONS options[] = {
- /* name type default env range toggles reqs incomp help docgroup*/
- { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 },
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
-};
-static char usage[] = "[-options] <keyfile1> <keyfile2>";
-static char banner[] = "benchmarking speed of keyhash module";
-
-int
-main(int argc, char **argv)
-{
- ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage);
- ESL_KEYHASH *kh = esl_keyhash_Create();
- ESL_STOPWATCH *w = esl_stopwatch_Create();
- char *file1 = esl_opt_GetArg(go, 1);
- char *file2 = esl_opt_GetArg(go, 2);
- FILE *fp;
- char buf[256];
- char *s, *tok;
- int idx;
- int nstored, nsearched, nshared;
-
- /* Read/store keys from file 1.
- */
- esl_stopwatch_Start(w);
- if ((fp = fopen(file1, "r")) == NULL)
- { fprintf(stderr, "couldn't open %s\n", argv[1]); exit(1); }
- nstored = 0;
- while (fgets(buf, 256, fp) != NULL)
- {
- s = buf;
- esl_strtok(&s, " \t\r\n", &tok);
- esl_key_Store(kh, tok, &idx);
- nstored++;
- }
- fclose(fp);
- printf("Stored %d keys.\n", nstored);
-
- /* Look up keys from file 2.
- */
- if ((fp = fopen(file2, "r")) == NULL)
- { fprintf(stderr, "couldn't open %s\n", argv[2]); exit(1); }
- nsearched = nshared = 0;
- while (fgets(buf, 256, fp) != NULL)
- {
- s = buf;
- esl_strtok(&s, " \t\r\n", &tok);
-
- if (esl_key_Lookup(kh, tok, &idx) == eslOK) nshared++;
- nsearched++;
- }
- fclose(fp);
- esl_stopwatch_Stop(w);
- printf("Looked up %d keys.\n", nsearched);
- printf("In common: %d keys.\n", nshared);
- esl_stopwatch_Display(stdout, w, "# CPU Time: ");
-
- esl_stopwatch_Destroy(w);
- esl_keyhash_Destroy(kh);
- esl_getopts_Destroy(go);
- return 0;
-}
-#endif /*eslKEYHASH_BENCHMARK*/
-
-
-
-#ifdef eslKEYHASH_BENCHMARK2
-
-/* Benchmark #2 is a benchmark just of the hash function.
- * First we read in a bunch of keys from any file, one key per line.
- * Then we start timing, and compute a hash for each key.
- */
-
-/* gcc -O2 -o keyhash_benchmark2 -I. -L. -DeslKEYHASH_BENCHMARK2 esl_keyhash.c -leasel -lm
- * ./keyhash_benchmark2 <keyfile>
- */
-#include "esl_config.h"
-
-#include <stdio.h>
-#include <math.h>
-
-#include "easel.h"
-#include "esl_fileparser.h"
-#include "esl_getopts.h"
-#include "esl_keyhash.h"
-#include "esl_stats.h"
-#include "esl_stopwatch.h"
-#include "esl_vectorops.h"
-
-static ESL_OPTIONS options[] = {
- /* name type default env range toggles reqs incomp help docgroup*/
- { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 },
- { "-s", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show statistical test for hash uniformity", 0 },
- { "-v", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "be verbose: print hash values for keys", 0 },
- { "-x", eslARG_INT, "32768", NULL, NULL, NULL, NULL, NULL, "set hash table size to <n>", 0 },
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
-};
-static char usage[] = "[-options] <keyfile>";
-static char banner[] = "benchmarking speed of hash function in keyhash module";
-
-int
-main(int argc, char **argv)
-{
- ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage);
- ESL_FILEPARSER *efp = NULL;
- ESL_STOPWATCH *w = esl_stopwatch_Create();
- ESL_KEYHASH *kh = esl_keyhash_Create();
- char *keyfile = esl_opt_GetArg(go, 1);
- uint32_t hashsize = esl_opt_GetInteger(go, "-x");
- char *key;
- int keylen;
- char **karr = NULL;
- int kalloc;
- int *ct = NULL;
- int nkeys;
- int i;
- int status;
- uint32_t (*hashfunc)(const char*,uint32_t) = jenkins_hash;
-
- /* 1. Store the keys from the file, before starting the benchmark timer. */
- kalloc = 256;
- ESL_ALLOC_WITH_TYPE(karr, char**, sizeof(char *) * kalloc);
-
- if (esl_fileparser_Open(keyfile, NULL, &efp) != eslOK) esl_fatal("Failed to open key file %s\n", keyfile);
-
- nkeys = 0;
- while (esl_fileparser_NextLine(efp) == eslOK)
- {
- if (esl_fileparser_GetTokenOnLine(efp, &key, &keylen) != eslOK) esl_fatal("Failure in parsing key file\n");
-
- if (nkeys == kalloc) {
- void *tmp;
- ESL_RALLOC(karr, tmp, sizeof(char *) * kalloc * 2);
- kalloc *= 2;
- }
-
- esl_strdup(key, keylen, &(karr[nkeys]));
- nkeys++;
- }
- esl_fileparser_Close(efp);
- /* and karr[0..nkeys-1] are now the keys. */
-
-
- /* 2. benchmark hashing the keys. */
- esl_stopwatch_Start(w);
- for (i = 0; i < nkeys; i++) (*hashfunc)(karr[i], hashsize);
- esl_stopwatch_Stop(w);
- esl_stopwatch_Display(stdout, w, "# CPU Time: ");
-
- /* If user wanted to see the hashes, do that
- * separately, outside the timing loop.
- */
- if (esl_opt_GetBoolean(go, "-v"))
- {
- for (i = 0; i < nkeys; i++)
- printf("%-20s %9d\n", karr[i], (*hashfunc)(karr[i], hashsize));
- }
-
- /* Likewise, if user wanted to see statistical uniformity test...
- */
- if (esl_opt_GetBoolean(go, "-s"))
- {
- double mean, var, X2, pval;
-
- ESL_ALLOC_WITH_TYPE(ct, int*, sizeof(int) * hashsize);
- esl_vec_ISet(ct, hashsize, 0);
- for (i = 0; i < nkeys; i++) ct[(*hashfunc)(karr[i], hashsize)]++;
-
- esl_stats_IMean(ct, hashsize, &mean, &var);
- for (X2 = 0.0, i = 0; i < hashsize; i++)
- X2 += (((double) ct[i] - mean) * ((double) ct[i] - mean)) / mean;
-
- esl_stats_ChiSquaredTest(hashsize-1, X2, &pval);
-
- printf("Number of keys: %d\n", nkeys);
- printf("Hash table size: %d\n", hashsize);
- printf("Mean hash occupancy: %.2f\n", mean);
- printf("Minimum: %d\n", esl_vec_IMin(ct, hashsize));
- printf("Maximum: %d\n", esl_vec_IMax(ct, hashsize));
- printf("Variance: %.2f\n", var);
- printf("Chi-squared: %.2f\n", X2);
- printf("Chi-squared p-value: %.4f\n", pval);
- }
-
-
- /* 3. cleanup, exit. */
- for (i = 0; i < nkeys; i++) free(karr[i]);
- free(karr);
- esl_stopwatch_Destroy(w);
- esl_getopts_Destroy(go);
- return 0;
-
- ERROR:
- return status;
-}
-#endif /*eslKEYHASH_BENCHMARK2*/
-
-
-/*------------------- end, benchmark drivers --------------------*/
-
-
-/*****************************************************************
- * 5. Unit tests
- *****************************************************************/
-
-
-/*---------------------- end, unit tests ------------------------*/
-
-/*****************************************************************
- * 6. Test driver
- *****************************************************************/
-#ifdef eslKEYHASH_TESTDRIVE
-/* gcc -g -Wall -o test -I. -DeslKEYHASH_TESTDRIVE keyhash.c easel.c
- * ./test
- */
-#include <stdlib.h>
-#include <stdio.h>
-#include <assert.h>
-#include "easel.h"
-#include "esl_keyhash.h"
-
-#define NSTORE 1200
-#define NLOOKUP 1200
-#define KEYLEN 2
-
-int
-main(int argc, char **argv)
-{
- ESL_KEYHASH *h;
- char keys[NSTORE+NLOOKUP][KEYLEN+1];
- int i,j,nk,k42;
- int nmissed;
- int status;
-
- /* Generate 2400 random k=2 keys. 26^2 = 676 possible.
- * We'll store the first 1200 and search on the remaining
- * 1200. We're ~1.775x saturated; expect Poisson P(0) = 17% miss
- * rate on the searches, so we ought to exercise hits and
- * misses on the lookups.
- */
- srand(31);
- for (i = 0; i < NSTORE+NLOOKUP; i++)
- {
- for (j = 0; j < KEYLEN; j++)
- keys[i][j] = 'a' + (rand() % 26); /* yeah, low-order bits; so sue me */
- keys[i][j] = '\0';
- }
- /* spike a known one in (XX.. at key 42).
- */
- for (j = 0; j < KEYLEN; j++)
- keys[42][j] = 'X';
-
- h = esl_keyhash_Create();
- nk = 0;
- for (i = 0; i < NSTORE; i++)
- {
- status = esl_key_Store(h, keys[i], &j);
- if (status == eslOK) { assert(j==nk); nk++; }
- else if (status == eslEDUP) { assert(j<nk); }
- else esl_fatal("store failed.");
-
- if (i == 42) { k42 = j;} /* remember where key 42 went */
- }
- nmissed = 0;
- for (i = NSTORE; i < NSTORE+NLOOKUP; i++)
- {
- if (esl_key_Lookup(h, keys[i], &j) != eslOK) nmissed++;
- }
- esl_key_Lookup(h, keys[42], &j);
- assert(j==k42);
-
- /*
- printf("missed %d/%d (%.1f%%)\n", nmissed, NLOOKUP,
- 100. * (float) nmissed / (float) NLOOKUP);
- esl_keyhash_Dump(stdout, h);
- */
-
- esl_keyhash_Destroy(h);
- exit (0);
-}
-#endif /*eslKEYHASH_TESTDRIVE*/
-
-/*--------------------- end, test driver ------------------------*/
-
-
-
-/*****************************************************************
- * 7. Example
- *****************************************************************/
-#ifdef eslKEYHASH_EXAMPLE
-/*::cexcerpt::keyhash_example::begin::*/
-/* gcc -g -Wall -o keyhash_example -I. -DeslKEYHASH_EXAMPLE esl_keyhash.c easel.c
- * ./example /usr/share/dict/words /usr/share/dict/words
- */
-#include <stdio.h>
-#include "easel.h"
-#include "esl_keyhash.h"
-
-int
-main(int argc, char **argv)
-{
- ESL_KEYHASH *h = esl_keyhash_Create();
- FILE *fp;
- char buf[256];
- char *s, *tok;
- int idx;
- int nstored, nsearched, nshared;
-
- /* Read/store keys from file 1. */
- if ((fp = fopen(argv[1], "r")) == NULL) esl_fatal("couldn't open %s\n", argv[1]);
- nstored = 0;
- while (fgets(buf, 256, fp) != NULL)
- {
- s = buf;
- esl_strtok(&s, " \t\r\n", &tok);
- esl_key_Store(h, tok, &idx);
- nstored++;
- }
- fclose(fp);
- printf("Stored %d keys.\n", nstored);
-
- /* Look up keys from file 2. */
- if ((fp = fopen(argv[2], "r")) == NULL) esl_fatal("couldn't open %s\n", argv[1]);
- nsearched = nshared = 0;
- while (fgets(buf, 256, fp) != NULL)
- {
- s = buf;
- esl_strtok(&s, " \t\r\n", &tok);
- if (esl_key_Lookup(h, tok, &idx) == eslOK) nshared++;
- nsearched++;
- }
- fclose(fp);
- printf("Looked up %d keys.\n", nsearched);
- printf("In common: %d keys.\n", nshared);
- esl_keyhash_Destroy(h);
- return 0;
-}
-/*::cexcerpt::keyhash_example::end::*/
-#endif /*eslKEYHASH_EXAMPLE*/
-/*----------------------- end, example --------------------------*/
-
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_keyhash.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_keyhash.h
deleted file mode 100644
index cfbf168..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_keyhash.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/* Storing keys in hash tables, similar to Perl's associative arrays.
- *
- * SRE, Sun Jan 30 08:55:17 2005; from squid's gki.h, 1999.
- * SVN $Id: esl_keyhash.h 322 2009-02-09 12:51:53Z eddys $
- */
-#ifndef eslKEYHASH_INCLUDED
-#define eslKEYHASH_INCLUDED
-
-#include <stdio.h> /* for FILE */
-
-/* ESL_KEYHASH:
- * a dynamically resized hash structure;
- * contains a hash table and associated data
- *
- * Each key string is associated with an index i = (0..nkeys-1).
- * Key strings are stored in one array, in smem.
- * Each key has an offset in this array, key_offset[i].
- * Thus key number <i> is at: smem + key_offset[i].
- *
- * The keys are hashed, and stored in linked lists in
- * a hashtable by their index i = (0..nkeys-1), with -1
- * as a sentinel for end-of-list.
- *
- * hashtable[0..hashsize-1] = head of linked list;
- * index of first elem in list (0..nkeys-1),
- * or -1 if empty.
- * nxt[0..nkeys-1] = next elem in list (0..nkeys-1), or -1 if none.
- *
- * Thus a typical loop, looking for a <key>:
- * uint32_t val = jenkins_hash(key, kh->hashsize);
- * for (i = kh->hashtable[val]; i != -1; i = kh->nxt[i])
- * if (strcmp(key, kh->smem + kh->key_offset[i]) == 0) found_it;
- *
- */
-typedef struct {
- int *hashtable; /* hashtable[0..hashsize-1] = index of first elem, or -1 */
- uint32_t hashsize; /* size of the hash table */
-
- int *key_offset; /* key [idx=0..nkeys-1] starts at smem + key_offset[idx] */
- int *nxt; /* nxt [idx=0..nkeys-1], next "pointers" in hash table */
- int nkeys; /* number of keys stored */
- int kalloc; /* number of keys allocated for */
-
- char *smem; /* Array of memory for storing key strings (w/ \0's) */
- int salloc; /* current allocated size of <key_mem> */
- int sn; /* current used size of key strings, inclusive \0's */
-} ESL_KEYHASH;
-
-extern ESL_KEYHASH *esl_keyhash_Create(void);
-extern ESL_KEYHASH *esl_keyhash_Clone(const ESL_KEYHASH *kh);
-extern char * esl_keyhash_Get(const ESL_KEYHASH *kh, int idx);
-extern int esl_keyhash_GetNumber(const ESL_KEYHASH *kh);
-extern int esl_keyhash_Reuse(ESL_KEYHASH *kh);
-extern void esl_keyhash_Destroy(ESL_KEYHASH *kh);
-extern void esl_keyhash_Dump(FILE *fp, const ESL_KEYHASH *kh);
-
-extern int esl_key_Store ( ESL_KEYHASH *kh, const char *key, int *ret_index);
-extern int esl_key_Lookup(const ESL_KEYHASH *kh, const char *key, int *ret_index);
-
-
-#endif /* eslKEYHASH_INCLUDED */
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_minimizer.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_minimizer.cpp
deleted file mode 100644
index c034ba0..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_minimizer.cpp
+++ /dev/null
@@ -1,661 +0,0 @@
-/* Multidimensional optimization using conjugate gradient descent.
- *
- * Can be used even without derivative information; falls back to
- * a numeric gradient if analytic gradient is unavailable.
- *
- * SRE, Wed Jun 22 11:37:29 2005
- * SVN $Id: esl_minimizer.c 438 2009-11-12 22:29:50Z wheelert $
- */
-#include "esl_config.h"
-
-#include <math.h>
-#include <float.h>
-
-#include "easel.h"
-#include "esl_vectorops.h"
-#include "esl_minimizer.h"
-
-/* Return the negative gradient at a point, determined
- * numerically.
- */
-static void
-numeric_derivative(double *x, double *u, int n,
- double (*func)(double *, int, void*),
- void *prm, double relstep,
- double *dx)
-{
- int i;
- double delta;
- double f1, f2;
- double tmp;
-
- for (i = 0; i < n; i++)
- {
- delta = fabs(u[i] * relstep);
-
- tmp = x[i];
- x[i] = tmp + delta;
- f1 = (*func)(x, n, prm);
- x[i] = tmp - delta;
- f2 = (*func)(x, n, prm);
- x[i] = tmp;
-
- dx[i] = (-0.5 * (f1-f2)) / delta;
-
- ESL_DASSERT1((! isnan(dx[i])));
- }
-}
-
-/* bracket():
- * SRE, Wed Jul 27 11:43:32 2005 [St. Louis]
- *
- * Purpose: Bracket a minimum.
- *
- * The minimization is quasi-one-dimensional,
- * starting from an initial <n>-dimension vector <ori>
- * in the <n>-dimensional direction <d>.
- *
- * Caller passes a ptr to the objective function <*func()>,
- * and a void pointer to any necessary conditional
- * parameters <prm>. The objective function will
- * be evaluated at a point <x> by calling
- * <(*func)(x, n, prm)>. The caller's function
- * is responsible to casting <prm> to whatever it's
- * supposed to be, which might be a ptr to a structure,
- * for example; typically, for a parameter optimization
- * problem, this holds the observed data.
- *
- * The routine works in scalar multipliers relative
- * to origin <ori> and direction <d>; that is, a new <n>-dimensional
- * point <b> is defined as <ori> + <bx><d>, for a scalar <bx>.
- *
- * The routine identifies a triplet <ax>, <bx>, <cx> such
- * that $a < b < c$ and such that a minimum is known to
- * exist in the $(a,b)$ interval because $f(b) < f(a),
- * f(c)$. Also, the <a..b> and <b...c> intervals are in
- * a golden ratio; the <b..c> interval is 1.618 times larger
- * than <a..b>.
- *
- * Since <d> is usually in the direction of the gradient,
- * the points <ax>,<bx>,<cx> might be expected to be $\geq 0$;
- * however, when <ori> is already close to the minimum,
- * it is often faster to bracket the minimum using
- * a negative <ax>. The caller might then try to be "clever"
- * and assume that the minimum is in the <bx..cx> interval
- * when <ax> is negative, rather than the full <ax..cx>
- * interval. That cleverness can fail, though, if <ori>
- * is already in fact the minimum, because the line minimizer
- * in brent() assumes a non-inclusive interval. Use
- * <ax..cx> as the bracket.
- *
- * Args: ori - n-dimensional starting vector
- * d - n-dimensional direction to minimize along
- * n - # of dimensions
- * firststep - bx is initialized to this scalar multiplier
- * *func() - objective function to minimize
- * prm - void * to any constant data that *func() needs
- * wrk - workspace: 1 allocated n-dimensional vector
- * ret_ax - RETURN: ax < bx < cx scalar bracketing triplet
- * ret_bx - RETURN: ...ax may be negative
- * ret_cx - RETURN:
- * ret_fa - RETURN: function evaluated at a,b,c
- * ret_fb - RETURN: ... f(b) < f(a),f(c)
- * ret_fc - RETURN:
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslENOHALT> if it fails to converge.
- *
- * Xref: STL9/130.
- */
-static int
-bracket(double *ori, double *d, int n, double firststep,
- double (*func)(double *, int, void *), void *prm,
- double *wrk,
- double *ret_ax, double *ret_bx, double *ret_cx,
- double *ret_fa, double *ret_fb, double *ret_fc)
-{
- double ax,bx,cx; /* scalar multipliers */
- double fa,fb,fc; /* f() evaluations at those points */
- double swapper;
- int niter;
-
- /* Set and evaluate our first two points f(a) and f(b), which
- * are initially at 0.0 and <firststep>.
- */
- ax = 0.; /* always start w/ ax at the origin, ax=0 */
- fa = (*func)(ori, n, prm);
-
- bx = firststep;
- esl_vec_DCopy(ori, n, wrk);
- esl_vec_DAddScaled(wrk, d, bx, n);
- fb = (*func)(wrk, n, prm);
-
- /* In principle, we usually know that the minimum m lies to the
- * right of a, m>=a, because d is likely to be a gradient. You
- * might think we want 0 = a < b < c. In practice, there's problems
- * with that. It's far easier to identify bad points (f(x) > f(a))
- * than to identify good points (f(x) < f(a)), because letting f(x)
- * blow up to infinity is fine as far as bracketing is concerned.
- * It can be almost as hard to identify a point b that f(b) < f(a)
- * as it is to find the minimum in the first place!
- * Counterintuitively, in cases where f(b)>f(a), it's better
- * to just swap the a,b labels and look for c on the wrong side
- * of a! This often works immediately, if f(a) was reasonably
- * close to the minimum and f(b) and f(c) are both terrible.
- */
- if (fb > fa)
- {
- swapper = ax; ax = bx; bx = swapper;
- swapper = fa; fa = fb; fb = swapper;
- }
-
- /* Make our first guess at c.
- * Remember, we don't know that b>a any more, and c might go negative.
- * We'll either have: a..b...c with a=0;
- * or: c...b..a with b=0.
- * In many cases, we'll immediately be done.
- */
- cx = bx + (bx-ax)*1.618;
- esl_vec_DCopy(ori, n, wrk);
- esl_vec_DAddScaled(wrk, d, cx, n);
- fc = (*func)(wrk, n, prm);
-
- /* We're not satisfied until fb < fa, fc;
- * throughout the routine, we guarantee that fb < fa;
- * so we just check fc.
- */
- niter = 0;
- while (fc <= fb)
- {
- /* Slide over, discarding the a point; choose
- * new c point even further away.
- */
- ax = bx; bx = cx;
- fa = fb; fb = fc;
- cx = bx+(bx-ax)*1.618;
- esl_vec_DCopy(ori, n, wrk);
- esl_vec_DAddScaled(wrk, d, cx, n);
- fc = (*func)(wrk, n, prm);
-
- /* This is a rare instance. We've reach the minimum
- * by trying to bracket it. Also check that not all
- * three points are the same.
- */
- if (ax != bx && bx != cx && fa == fb && fb == fc) break;
-
- niter++;
- if (niter > 100)
- ESL_EXCEPTION(eslENORESULT, "Failed to bracket a minimum.");
- }
-
- /* We're about to return. Assure the caller that the points
- * are in order a < b < c, not the other way.
- */
- if (ax > cx)
- {
- swapper = ax; ax = cx; cx = swapper;
- swapper = fa; fa = fc; fc = swapper;
- }
-
- /* Return.
- */
- ESL_DPRINTF2(("\nbracket(): %d iterations\n", niter));
- ESL_DPRINTF2(("bracket(): triplet is %g %g %g along current direction\n",
- ax, bx, cx));
- ESL_DPRINTF2(("bracket(): f()'s there are: %g %g %g\n\n",
- fa, fb, fc));
-
- *ret_ax = ax; *ret_bx = bx; *ret_cx = cx;
- *ret_fa = fa; *ret_fb = fb; *ret_fc = fc;
- return eslOK;
-}
-
-/* brent():
- * SRE, Sun Jul 10 19:07:05 2005 [St. Louis]
- *
- * Purpose: Quasi-one-dimensional minimization of a function <*func()>
- * in <n>-dimensions, along vector <dir> starting from a
- * point <ori>. Identifies a scalar $x$ that approximates
- * the position of the minimum along this direction, in a
- * given bracketing interval (<a,b>). The minimum must
- * have been bracketed by the caller in the <(a,b)>
- * interval. <a> is often 0, because we often start at the
- * <ori>.
- *
- * A quasi-1D scalar coordinate $x$ (such as <a> or <b>) is
- * transformed to a point $\mathbf{p}$ in n-space as:
- * $\mathbf{p} = \mathbf{\mbox{ori}} + x
- * \mathbf{\mbox{dir}}$.
- *
- * Any extra (fixed) data needed to calculate <func> can be
- * passed through the void <prm> pointer.
- *
- * <eps> and <t> define the relative convergence tolerance,
- * $\mbox{tol} = \mbox{eps} |x| + t$. <eps> should not be
- * less than the square root of the machine precision. The
- * <DBL_EPSILON> is 2.2e-16 on many machines with 64-bit
- * doubles, so <eps> is on the order of 1e-8 or more. <t>
- * is a yet smaller number, used to avoid nonconvergence in
- * the pathological case $x=0$.
- *
- * Upon convergence (which is guaranteed), returns <xvec>,
- * the n-dimensional minimum. Optionally, will also return
- * <ret_x>, the scalar <x> that resulted in that
- * n-dimensional minimum, and <ret_fx>, the objective
- * function <*func(x)> at the minimum.
- *
- * This is an implementation of the R.P. Brent (1973)
- * algorithm for one-dimensional minimization without
- * derivatives (modified from Brent's ALGOL60 code). Uses a
- * combination of bisection search and parabolic
- * interpolation; should exhibit superlinear convergence in
- * most functions.
- *
- *
- * Args: ori - n-vector at origin
- * dir - direction vector (gradient) we're following from ori
- * n - dimensionality of ori, dir, and xvec
- * (*func) - ptr to caller's objective function
- * prm - ptr to any additional data (*func)() needs
- * a,b - minimum is bracketed on interval [a,b]
- * eps - tol = eps |x| + t; eps >= 2 * relative machine precision
- * t - additional factor for tol to avoid x=0 case.
- * xvec - RETURN: minimum, as an n-vector (caller allocated)
- * ret_x - optRETURN: scalar multiplier that gave xvec
- * ret_fx - optRETURN: f(x)
- *
- * Returns: (void)
- *
- * Reference: See [Brent73], Chapter 5. My version is derived directly
- * from Brent's description and his ALGOL60 code. I've
- * preserved his variable names as much as possible, to
- * make the routine follow his published description
- * closely. The Brent algorithm is also discussed in
- * Numerical Recipes [Press88].
- */
-static void
-brent(double *ori, double *dir, int n,
- double (*func)(double *, int, void *), void *prm,
- double a, double b, double eps, double t,
- double *xvec, double *ret_x, double *ret_fx)
-{
- double w,x,v,u; /* with [a,b]: Brent's six points */
- double m; /* midpoint of current [a,b] interval */
- double tol; /* tolerance = eps|x| + t */
- double fu,fv,fw,fx; /* function evaluations */
- double p,q; /* numerator, denominator of parabolic interpolation */
- double r;
- double d,e; /* last, next-to-last values of p/q */
- double c = 1. - (1./eslCONST_GOLD); /* Brent's c; 0.381966; golden ratio */
- int niter; /* number of iterations */
-
- x=v=w= a + c*(b-a); /* initial guess of x by golden section */
- esl_vec_DCopy(ori, n, xvec); /* build xvec from ori, dir, x */
- esl_vec_DAddScaled(xvec, dir, x, n);
- fx=fv=fw = (*func)(xvec, n, prm); /* initial function evaluation */
-
- e = 0.;
- niter = 0;
- while (1) /* algorithm is guaranteed to converge. */
- {
- m = 0.5 * (a+b);
- tol = eps*fabs(x) + t;
- if (fabs(x-m) <= 2*tol - 0.5*(b-a)) break; /* convergence test. */
- niter++;
-
- p = q = r = 0.;
- if (fabs(e) > tol)
- { /* Compute parabolic interpolation, u = x + p/q */
- r = (x-w)*(fx-fv);
- q = (x-v)*(fx-fw);
- p = (x-v)*q - (x-w)*r;
- q = 2*(q-r);
- if (q > 0) { p = -p; } else {q = -q;}
- r = e;
- e=d; /* e is now the next-to-last p/q */
- }
-
- if (fabs(p) < fabs(0.5*q*r) || p < q*(a-x) || p < q*(b-x))
- { /* Seems well-behaved? Use parabolic interpolation to compute new point u */
- d = p/q; /* d remembers last p/q */
- u = x+d; /* trial point, for now... */
-
- if (2.0*(u-a) < tol || 2.0*(b-u) < tol) /* don't evaluate func too close to a,b */
- d = (x < m)? tol : -tol;
- }
- else /* Badly behaved? Use golden section search to compute u. */
- {
- e = (x<m)? b-x : a-x; /* e = largest interval */
- d = c*e;
- }
-
- /* Evaluate f(), but not too close to x. */
- if (fabs(d) >= tol) u = x+d;
- else if (d > 0) u = x+tol;
- else u = x-tol;
- esl_vec_DCopy(ori, n, xvec); /* build xvec from ori, dir, u */
- esl_vec_DAddScaled(xvec, dir, u, n);
- fu = (*func)(xvec, n, prm); /* f(u) */
-
- /* Bookkeeping. */
- if (fu <= fx)
- {
- if (u < x) b = x; else a = x;
- v = w; fv = fw; w = x; fw = fx; x = u; fx = fu;
- }
- else
- {
- if (u < x) a = u; else b = u;
- if (fu <= fw || w == x)
- { v = w; fv = fw; w = u; fw = fu; }
- else if (fu <= fv || v==x || v ==w)
- { v = u; fv = fu; }
- }
- }
-
- /* Return.
- */
- esl_vec_DCopy(ori, n, xvec); /* build final xvec from ori, dir, x */
- esl_vec_DAddScaled(xvec, dir, x, n);
- if (ret_x != NULL) *ret_x = x;
- if (ret_fx != NULL) *ret_fx = fx;
- ESL_DPRINTF2(("\nbrent(): %d iterations\n", niter));
- ESL_DPRINTF2(("xx=%10.8f fx=%10.1f\n", x, fx));
-}
-
-
-/* Function: esl_min_ConjugateGradientDescent()
- * Incept: SRE, Wed Jun 22 08:49:42 2005 [St. Louis]
- *
- * Purpose: n-dimensional minimization by conjugate gradient descent.
- *
- * An initial point is provided by <x>, a vector of <n>
- * components. The caller also provides a function <*func()> that
- * compute the objective function f(x) when called as
- * <(*func)(x, n, prm)>, and a function <*dfunc()> that can
- * compute the gradient <dx> at <x> when called as
- * <(*dfunc)(x, n, prm, dx)>, given an allocated vector <dx>
- * to put the derivative in. Any additional data or fixed
- * parameters that these functions require are passed by
- * the void pointer <prm>.
- *
- * The first step of each iteration is to try to bracket
- * the minimum along the current direction. The initial step
- * size is controlled by <u[]>; the first step will not exceed
- * <u[i]> for any dimension <i>. (You can think of <u> as
- * being the natural "units" to use along a graph axis, if
- * you were plotting the objective function.)
- *
- * The caller also provides an allocated workspace sufficient to
- * hold four allocated n-vectors. (4 * sizeof(double) * n).
- *
- * Iterations continue until the objective function has changed
- * by less than a fraction <tol>. This should not be set to less than
- * sqrt(<DBL_EPSILON>).
- *
- * Upon return, <x> is the minimum, and <ret_fx> is f(x),
- * the function value at <x>.
- *
- * Args: x - an initial guess n-vector; RETURN: x at the minimum
- * u - "units": maximum initial step size along gradient when bracketing.
- * n - dimensionality of all vectors
- * *func() - function for computing objective function f(x)
- * *dfunc() - function for computing a gradient at x
- * prm - void ptr to any data/params func,dfunc need
- * tol - convergence criterion applied to f(x)
- * wrk - allocated 4xn-vector for workspace
- * ret_fx - optRETURN: f(x) at the minimum
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslENOHALT> if it fails to converge in MAXITERATIONS.
- * <eslERANGE> if the minimum is not finite, which may
- * indicate a problem in the implementation or choice of <*func()>.
- *
- * Xref: STL9/101.
- */
-int
-esl_min_ConjugateGradientDescent(double *x, double *u, int n,
- double (*func)(double *, int, void *),
- void (*dfunc)(double *, int, void *, double *),
- void *prm, double tol, double *wrk, double *ret_fx)
-{
- double oldfx;
- double coeff;
- int i, i1;
- double *dx, *cg, *w1, *w2;
- double cvg;
- double fa,fb,fc;
- double ax,bx,cx;
- double fx;
-
- dx = wrk;
- cg = wrk + n;
- w1 = wrk + 2*n;
- w2 = wrk + 3*n;
-
- oldfx = (*func)(x, n, prm); /* init the objective function */
-
- /* Bail out if the function is +/-inf: this can happen if the caller
- * has screwed something up, or has chosen a bad start point.
- */
- if (oldfx == eslINFINITY || oldfx == -eslINFINITY)
- ESL_EXCEPTION(eslERANGE, "minimum not finite");
-
-
- if (dfunc != NULL)
- {
- (*dfunc)(x, n, prm, dx); /* find the current negative gradient, - df(x)/dxi */
- esl_vec_DScale(dx, n, -1.0);
- }
- else numeric_derivative(x, u, n, func, prm, 1e-4, dx); /* resort to brute force */
-
- esl_vec_DCopy(dx, n, cg); /* and make that the first conjugate direction, cg */
-
-
-
- /* (failsafe) convergence test: a zero direction can happen,
- * and it either means we're stuck or we're finished (most likely stuck)
- */
- for (i1 = 0; i1 < n; i1++)
- if (cg[i1] != 0.) break;
- if (i1 == n) {
- if (ret_fx != NULL) *ret_fx = oldfx;
- return eslOK;
- }
-
- for (i = 0; i < MAXITERATIONS; i++)
- {
-
- /* Figure out the initial step size.
- */
- bx = fabs(u[0] / cg[0]);
- for (i1 = 1; i1 < n; i1++)
- {
- cx = fabs(u[i1] / cg[i1]);
- if (cx < bx) bx = cx;
- }
-
- /* Bracket the minimum.
- */
- bracket(x, cg, n, bx, func, prm, w1,
- &ax, &bx, &cx,
- &fa, &fb, &fc);
-
- /* Minimize along the line given by the conjugate gradient <cg> */
- brent(x, cg, n, func, prm, ax, cx, 1e-3, 1e-8, w2, NULL, &fx);
- esl_vec_DCopy(w2, n, x);
-
- /* Bail out if the function is now +/-inf: this can happen if the caller
- * has screwed something up.
- */
- if (fx == eslINFINITY || fx == -eslINFINITY)
- ESL_EXCEPTION(eslERANGE, "minimum not finite");
-
-
- /* Find the negative gradient at that point (temporarily in w1) */
- if (dfunc != NULL)
- {
- (*dfunc)(x, n, prm, w1);
- esl_vec_DScale(w1, n, -1.0);
- }
- else numeric_derivative(x, u, n, func, prm, 1e-4, w1); /* resort to brute force */
-
- /* Calculate the Polak-Ribiere coefficient */
- for (coeff = 0., i1 = 0; i1 < n; i1++)
- coeff += (w1[i1] - dx[i1]) * w1[i1];
- coeff /= esl_vec_DDot(dx, dx, n);
-
- /* Calculate the next conjugate gradient direction in w2 */
- esl_vec_DCopy(w1, n, w2);
- esl_vec_DAddScaled(w2, cg, coeff, n);
-
- /* Finishing set up for next iteration: */
- esl_vec_DCopy(w1, n, dx);
- esl_vec_DCopy(w2, n, cg);
-
- /* Now: x is the current point;
- * fx is the function value at that point;
- * dx is the current gradient at x;
- * cg is the current conjugate gradient direction.
- */
-
- /* Main convergence test. 1e-9 factor is fudging the case where our
- * minimum is at exactly f()=0.
- */
- cvg = 2.0 * fabs((oldfx-fx)) / (1e-10 + fabs(oldfx) + fabs(fx));
-
-// fprintf(stderr, "(%d): Old f() = %.9f New f() = %.9f Convergence = %.9f\n", i, oldfx, fx, cvg);
-// fprintf(stdout, "(%d): Old f() = %.9f New f() = %.9f Convergence = %.9f\n", i, oldfx, fx, cvg);
-
-#if eslDEBUGLEVEL >= 2
- printf("\nesl_min_ConjugateGradientDescent():\n");
- printf("new point: ");
- for (i1 = 0; i1 < n; i1++)
- printf("%g ", x[i1]);
-
- printf("\nnew gradient: ");
- for (i1 = 0; i1 < n; i1++)
- printf("%g ", dx[i1]);
-
- numeric_derivative(x, u, n, func, prm, 1e-4, w1);
- printf("\n(numeric grad): ");
- for (i1 = 0; i1 < n; i1++)
- printf("%g ", w1[i1]);
-
- printf("\nnew direction: ");
- for (i1 = 0; i1 < n; i1++)
- printf("%g ", cg[i1]);
-
- printf("\nOld f() = %g New f() = %g Convergence = %g\n\n", oldfx, fx, cvg);
-#endif
-
- if (cvg <= tol) break;
-
- /* Second (failsafe) convergence test: a zero direction can happen,
- * and it either means we're stuck or we're finished (most likely stuck)
- */
- for (i1 = 0; i1 < n; i1++)
- if (cg[i1] != 0.) break;
- if (i1 == n) break;
-
- oldfx = fx;
- }
-
-
- if (ret_fx != NULL) *ret_fx = fx;
-
- if (i == MAXITERATIONS)
- ESL_FAIL(eslENOHALT, NULL, " ");
-// ESL_EXCEPTION(eslENOHALT, "Failed to converge in ConjugateGradientDescent()");
-
-
-
- return eslOK;
-}
-
-
-
-
-
-
-/*****************************************************************
- * Example main()
- *****************************************************************/
-#ifdef eslMINIMIZER_EXAMPLE
-/*::cexcerpt::minimizer_example::begin::*/
-/* compile: gcc -g -Wall -I. -o example -DeslMINIMIZER_EXAMPLE esl_minimizer.c esl_vectorops.c easel.c -lm
- * run: ./example
- */
-#include <stdio.h>
-
-#include "easel.h"
-#include "esl_vectorops.h"
-#include "esl_minimizer.h"
-
-/* a simple multidimensional quadratic w/ a minimum at 0:
- * $f(x) = a_1 x_1^2 + ... a_n x_n^2$
- */
-static double
-example_func(double *x, int n, void *prm)
-{
- double *a;
- double fx;
- int i;
-
- a = (double *) prm; /* cast the data vector */
- for (fx = 0., i = 0; i < n; i++)
- fx += a[i] * x[i] * x[i];
- return fx;
-}
-/* gradient of the f(x): d/dx_i = 2 a_i x_i
- */
-static void
-example_dfunc(double *x, int n, void *prm, double *dx)
-{
- double *a;
- int i;
-
- a = (double *) prm; /* cast the data vector */
- for (i = 0; i < n; i++)
- dx[i] = 2.0 * a[i] * x[i];
-}
-int
-main(int argc, char **argv)
-{
- int n = 6;
- double a[6] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 };
- double x[6] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 };
- double u[6] = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
- double wrk[24];
- double fx;
- int i;
-
- esl_min_ConjugateGradientDescent(x, u, n,
- &example_func, &example_dfunc, (void *) a,
- 0.0001, wrk, &fx);
-
- printf("At minimum: f(x) = %g\n", fx);
- printf("vector x = ");
- for (i = 0; i < 6; i++) printf("%g ", x[i]);
- printf("\n");
-
- return 0;
-}
-/*::cexcerpt::minimizer_example::end::*/
-#endif /*eslMINIMIZER_EXAMPLE*/
-
-
-
-
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_minimizer.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_minimizer.h
deleted file mode 100644
index c883500..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_minimizer.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* Multidimensional optimization by conjugate gradient descent.
- *
- * SRE, Wed Jun 22 09:53:05 2005
- * SVN $Id: esl_minimizer.h 162 2007-04-10 23:50:12Z eddys $
- */
-#ifndef ESL_MINIMIZER_INCLUDED
-#define ESL_MINIMIZER_INCLUDED
-
-#define MAXITERATIONS 100
-
-extern int esl_min_Bracket(double *a, double *d, double *u, int n,
- double (*func)(double *, int, void *), void *prm,
- double *ret_fa,
- double *b, double *ret_bx, double *ret_fb,
- double *c, double *ret_cx, double *ret_fc);
-extern int esl_min_LineSearch(double *ori, double *d, double *u, int n,
- double (*func)(double *, int, void *), void *prm,
- double tol, double *b,
- double *x, double *ret_xx, double *ret_fx);
-extern int esl_min_ConjugateGradientDescent(double *x, double *u, int n,
- double (*func)(double *, int, void *),
- void (*dfunc)(double *, int, void *, double *),
- void *prm, double tol, double *wrk, double *ret_fx);
-
-#endif /*ESL_MINIMIZER_INCLUDED*/
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_msa.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_msa.cpp
deleted file mode 100644
index 98ec0a6..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_msa.cpp
+++ /dev/null
@@ -1,2886 +0,0 @@
-/*::cexcerpt::header_example::begin::*/
-/* Multiple sequence alignment file i/o.
-*
-* Contents:
-* 1. The <ESL_MSA> object
-* 2. The <ESL_MSAFILE> object
-* 3. Digital mode MSA's (augmentation: alphabet)
-* 4. Random MSA database access (augmentation: ssi)
-* 5. General i/o API, for all alignment formats
-* 6. Miscellaneous functions for manipulating MSAs
-* 7. Stockholm (Pfam/Rfam) format
-* 8. A2M format
-* 9. PSIBLAST format
-* 10. SELEX format
-* 11. Debugging/development routines
-* 16. Copyright and license information
-*
-* Augmentations:
-* alphabet: adds support for digital MSAs
-* keyhash: speeds up Stockholm file input
-* ssi: enables indexed random access in a file of many MSAs
-*
-* to do: SRE, Sat Jan 3 09:43:42 2009 (after selex parser added)
-* - SELEX parser is better in some respects than older Stockholm
-* parser; stricter, better error detection, better modularity.
-* Generalize the SELEX parser routines and use them for Stockholm.
-* - Test files for SELEX parser are in esl_msa_testfiles/selex, with
-* tabular summary list in 00MANIFEST. This is an experiment with
-* writing tests that require lots of external files, such as
-* format parsers. Write test driver routine that reads 00MANIFEST
-* and runs esl_msa_Read() against these files, checking for proper
-* return status, including errors.
-* - The selex parser's read_block() reads lines into memory and
-* parses them later. afp->linenumber is thus no longer an
-* accurate record of where a parse error occurs. read_xxx()
-* format parsers now need to include line number in their
-* afp->errbuf[] message upon eslEFORMAT error. Stockholm parser
-* doesn't do this. Make it so, and document in examples.
-* - Format autodetection doesn't work yet. Coordinate w/ how sqio
-* does it, and implement. May require buffering input to make
-* it work with .gz, pipes without rewinding a stream. Might be
-* a good idea to generalize input buffering - perhaps making
-* it part of ESL_FILEPARSER.
-* - PSIBLAST, A2M format only supported on output, not input.
-* Implement input parsers.
-* - SELEX format only supported on input, not output.
-* Implement output writer.
-* - More formats need to be parsed. Check on formats for current
-* best MSA programs, such as MUSCLE, MAFFT; implement i/o.
-*
-* SRE, Thu Jan 20 08:50:43 2005 [St. Louis]
- * SVN $Id: esl_msa.c 440 2009-11-13 17:02:49Z eddys $
-*/
-/*::cexcerpt::header_example::end::*/
-
-/*::cexcerpt::include_example::begin::*/
-
-#include <hmmer3/easel/esl_config.h>
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <limits.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/hmmer.h>
-
-#ifdef eslAUGMENT_KEYHASH
-#include "esl_keyhash.h"
-#endif
-#ifdef eslAUGMENT_ALPHABET
-#include <hmmer3/easel/esl_alphabet.h>
-#endif
-
-#ifdef eslAUGMENT_SSI
-#include "esl_ssi.h"
-#endif
-
-#include <hmmer3/easel/esl_wuss.h>
-#include <hmmer3/easel/esl_vectorops.h>
-#include "esl_msa.h"
-
-/*::cexcerpt::include_example::end::*/
-
-
-
-/******************************************************************************
-*# 1. The <ESL_MSA> object
-*****************************************************************************/
-
-/* create_mostly()
-* SRE, Sun Aug 27 16:40:00 2006 [Leesburg]
-*
-* This is the routine called by esl_msa_Create() and esl_msa_CreateDigital()
-* that does all allocation except the aseq/ax alignment data.
-*
-* <nseq> may be the exact known # of seqs in an alignment; or <nseq>
-* may be an allocation block size (to be expanded by doubling, in
-* esl_msa_Expand(), as in:
-* <if (msa->nseq == msa->sqalloc) esl_msa_Expand(msa);>
-* <nseq> should not be 0.
-*
-* <alen> may be the exact length of an alignment, in columns; or it
-* may be -1, which states that your parser will take responsibility
-* for expanding as needed as new input is read into a growing new
-* alignment.
-*
-* A created <msa> can only be <_Expand()>'ed if <alen> is -1.
-*
-* Args: <nseq> - number of sequences, or nseq allocation blocksize
-* <alen> - length of alignment in columns, or -1
-*
-* Returns: pointer to new MSA object, w/ all values initialized.
-* Note that msa->nseq is initialized to 0 here, even though space
-* is allocated.
-*
-* Throws: <NULL> on allocation failure.
-*/
-static ESL_MSA *
-create_mostly(int nseq, int64_t alen)
-{
- int status;
- ESL_MSA *msa = NULL;
- int i;
-
- ESL_ALLOC_WITH_TYPE(msa, ESL_MSA*, sizeof(ESL_MSA));
- msa->aseq = NULL;
- msa->sqname = NULL;
- msa->wgt = NULL;
- msa->alen = alen; /* if -1, then we're growable. */
- msa->nseq = 0; /* our caller (text or digital allocation) sets this. */
- msa->flags = 0;
-
-#ifdef eslAUGMENT_ALPHABET
- msa->abc = NULL;
- msa->ax = NULL;
-#endif /*eslAUGMENT_ALPHABET*/
-
- msa->name = NULL;
- msa->desc = NULL;
- msa->acc = NULL;
- msa->au = NULL;
- msa->ss_cons = NULL;
- msa->sa_cons = NULL;
- msa->pp_cons = NULL;
- msa->rf = NULL;
- msa->sqacc = NULL;
- msa->sqdesc = NULL;
- msa->ss = NULL;
- msa->sa = NULL;
- msa->pp = NULL;
- for (i = 0; i < eslMSA_NCUTS; i++) {
- msa->cutoff[i] = 0.;
- msa->cutset[i] = FALSE;
- }
- msa->sqalloc = nseq;
- msa->sqlen = NULL;
- msa->sslen = NULL;
- msa->salen = NULL;
- msa->pplen = NULL;
- msa->lastidx = 0;
-
- /* Unparsed markup, including comments and Stockholm tags.
- * GS, GC, and GR Stockholm tags require keyhash augmentation
- */
- msa->comment = NULL;
- msa->ncomment = 0;
- msa->alloc_ncomment = 0;
-
- msa->gf_tag = NULL;
- msa->gf = NULL;
- msa->ngf = 0;
- msa->alloc_ngf = 0;
-
- msa->gs_tag = NULL;
- msa->gs = NULL;
- msa->ngs = 0;
-
- msa->gc_tag = NULL;
- msa->gc = NULL;
- msa->ngc = 0;
-
- msa->gr_tag = NULL;
- msa->gr = NULL;
- msa->ngr = 0;
-
-#ifdef eslAUGMENT_KEYHASH
- msa->index = esl_keyhash_Create();
- msa->gs_idx = NULL;
- msa->gc_idx = NULL;
- msa->gr_idx = NULL;
-#endif /*eslAUGMENT_KEYHASH*/
-
-#ifdef eslAUGMENT_SSI
- msa->offset = 0;
-#endif
-
- /* Allocation, round 2.
- */
- if(nseq > 0) {
- ESL_ALLOC_WITH_TYPE(msa->sqname,char**, sizeof(char *) * nseq);
- ESL_ALLOC_WITH_TYPE(msa->wgt, double*, sizeof(double) * nseq);
- ESL_ALLOC_WITH_TYPE(msa->sqlen, int64_t*, sizeof(int64_t)* nseq);
- }
- /* Initialize at the second level.
- */
- for (i = 0; i < nseq; i++)
- {
- msa->sqname[i] = NULL;
- msa->sqlen[i] = 0;
- msa->wgt[i] = -1.0; /* "unset so far" */
- }
-
- return msa;
-
-ERROR:
- esl_msa_Destroy(msa);
- return NULL;
-}
-
-/* get_seqidx()
-*
-* Find the index of a given sequence <name> in an <msa>.
-* If caller has a good guess (for instance, the sequences are
-* coming in a previously seen order in a block of seqs or annotation),
-* the caller can pass this information in <guess>, or -1 if
-* it has no guess.
-*
-* This function behaves differently depending on whether
-* keyhash augmentation is available or not. Without keyhashing,
-* the name is identified by bruteforce search of the names
-* in the <msa>. With keyhashing, we hash search, which should
-* improve performance for large alignments.
-*
-* If the name does not already exist in the MSA, then it
-* is assumed to be a new sequence name that we need to store.
-* seqidx is set to msa->nseq, the MSA is Expand()'ed if necessary
-* to make room, the name is stored in msa->sqname[msa->nseq],
-* (and in the hash table, if we're keyhash augmented)
-* and msa->nseq is incremented.
-*
-* Returns: <eslOK> on success, and the seqidx is
-* passed back via <ret_idx>. If <name> is new
-* in the <msa>, the <name> is stored and the <msa>
-* may be internally reallocated if needed.
-*
-* Throws: <eslEMEM> if we try to add a name and allocation fails.
-* <eslEINVAL> if we try to add a name to a non-growable MSA.
-*/
-static int
-get_seqidx(ESL_MSA *msa, char *name, int guess, int *ret_idx)
-{
- int seqidx;
- int status;
-
- *ret_idx = -1;
-
- /* can we guess? */
- if (guess >= 0 &&
- guess < msa->nseq &&
- strcmp(name, msa->sqname[guess]) == 0)
- { *ret_idx = guess; return eslOK; }
-
- /* Else look it up - either brute force
- * or, if we're keyhash-augmented, by hashing.
- */
-#ifdef eslAUGMENT_KEYHASH
- status = esl_key_Store(msa->index, name, &seqidx);
- if (status == eslEDUP) { *ret_idx = seqidx; return eslOK; }
- if (status != eslOK) return status; /* an error. */
-#else
- for (seqidx = 0; seqidx < msa->nseq; seqidx++)
- if (strcmp(msa->sqname[seqidx], name) == 0) break;
- if (seqidx < msa->nseq)
- { *ret_idx = seqidx; return eslOK; }
-#endif
-
- /* If we reach here, then this is a new name that we're
- * adding.
- */
- if (seqidx >= msa->sqalloc &&
- (status = esl_msa_Expand(msa)) != eslOK)
- return status;
-
- status = esl_strdup(name, -1, &(msa->sqname[seqidx]));
- msa->nseq++;
- if (ret_idx != NULL) *ret_idx = seqidx;
- return status;
-}
-
-
-/* msa_get_rlen()
-*
-* Returns the raw (unaligned) length of sequence number <seqidx>
-* in <msa>.
-*/
-static int64_t
-msa_get_rlen(const ESL_MSA *msa, int seqidx)
-{
- int64_t rlen = 0;
- int pos;
-
-#ifdef eslAUGMENT_ALPHABET
- if (msa->flags & eslMSA_DIGITAL) rlen = esl_abc_dsqrlen(msa->abc, msa->ax[seqidx]);
-#endif
- if (! (msa->flags & eslMSA_DIGITAL))
- {
- for (pos = 0; pos < msa->alen; pos++)
- if (isalnum(msa->aseq[seqidx][pos])) rlen++;
- }
- return rlen;
-}
-
-
-/* set_seq_ss()
-*
-* Set the secondary structure annotation for sequence number
-* <seqidx> in an alignment <msa> by copying the string <ss>.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure.
-*/
-static int
-set_seq_ss(ESL_MSA *msa, int seqidx, const char *ss)
-{
- int status;
- int i;
-
- if (msa->ss == NULL)
- {
- ESL_ALLOC_WITH_TYPE(msa->ss, char**, sizeof(char *) * msa->sqalloc);
- for (i = 0; i < msa->sqalloc; i++) msa->ss[i] = NULL;
- }
- if (msa->ss[seqidx] != NULL) free(msa->ss[seqidx]);
- return (esl_strdup(ss, -1, &(msa->ss[seqidx])));
-
-ERROR:
- return status;
-}
-
-/* set_seq_sa()
-*
-* Set the surface accessibility annotation for sequence number
-* <seqidx> in an alignment <msa> by copying the string <sa>.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure.
-*/
-static int
-set_seq_sa(ESL_MSA *msa, int seqidx, const char *sa)
-{
- int status;
- int i;
-
- if (msa->sa == NULL)
- {
- ESL_ALLOC_WITH_TYPE(msa->sa, char**, sizeof(char *) * msa->sqalloc);
- for (i = 0; i < msa->sqalloc; i++) msa->sa[i] = NULL;
- }
- if (msa->sa[seqidx] != NULL) free(msa->sa[seqidx]);
- return (esl_strdup(sa, -1, &(msa->sa[seqidx])));
-
-ERROR:
- return status;
-}
-
-/* set_seq_pp()
-*
-* Set the posterior probability annotation for sequence number
-* <seqidx> in an alignment <msa> by copying the string <pp>.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure.
-*/
-static int
-set_seq_pp(ESL_MSA *msa, int seqidx, const char *pp)
-{
- int status;
- int i;
-
- if (msa->pp == NULL)
- {
- ESL_ALLOC_WITH_TYPE(msa->pp, char**, sizeof(char *) * msa->sqalloc);
- for (i = 0; i < msa->sqalloc; i++) msa->pp[i] = NULL;
- }
- if (msa->pp[seqidx] != NULL) free(msa->pp[seqidx]);
- return (esl_strdup(pp, -1, &(msa->pp[seqidx])));
-
-ERROR:
- return status;
-}
-
-
-
-
-/* verify_parse()
-*
-* Last function called after a multiple alignment parser thinks it's
-* done. Checks that parse was successful; makes sure required
-* information is present; makes sure required information is
-* consistent. Some fields that are only use during parsing may be
-* freed (sqlen, for example), and some fields are finalized now
-* (<msa->alen> is set, for example).
-*
-* <errbuf> is a place to sprintf an informative message about the
-* reason for a parse error. The caller provides an <errbuf>
-* of at least 512 bytes.
-*
-* Returns: <eslOK>, and errbuf is set to an empty string.
-*
-* Throws: <eslEFORMAT> if a problem is detected, and an
-* informative message about the failure is in errbuf.
-*/
-//static int
-//verify_parse(ESL_MSA *msa, char *errbuf)
-//{
-// int idx;
-//
-// if (msa->nseq == 0) ESL_FAIL(eslEFORMAT, errbuf, "parse error: no alignment data found");
-//
-// /* set alen, until proven otherwise; we'll check that the other seqs
-// * have the same length later.
-// */
-// msa->alen = msa->sqlen[0];
-//
-// /* We can rely on msa->sqname[] being valid for any index,
-// * because of the way the line parsers always store any name
-// * they add to the index.
-// */
-// for (idx = 0; idx < msa->nseq; idx++)
-// {
-//#ifdef eslAUGMENT_ALPHABET
-// if ((msa->flags & eslMSA_DIGITAL) && (msa->ax == NULL || msa->ax[idx] == NULL))
-// ESL_FAIL(eslEFORMAT, errbuf, "MSA %s parse error: no sequence for %s",
-// msa->name != NULL ? msa->name : "", msa->sqname[idx]);
-//#endif
-// if (! (msa->flags & eslMSA_DIGITAL) && (msa->aseq == NULL || msa->aseq[idx] == NULL))
-// ESL_FAIL(eslEFORMAT, errbuf, "MSA %s parse error: no sequence for %s",
-// msa->name != NULL ? msa->name : "", msa->sqname[idx]);
-//
-// /* either all weights must be set, or none of them */
-// if ((msa->flags & eslMSA_HASWGTS) && msa->wgt[idx] == -1.0)
-// ESL_FAIL(eslEFORMAT, errbuf, "MSA %s parse error: expected a weight for seq %s",
-// msa->name != NULL ? msa->name : "", msa->sqname[idx]);
-//
-// /* all aseq must be same length. */
-// if (msa->sqlen[idx] != msa->alen)
-// ESL_FAIL(eslEFORMAT, errbuf, "MSA %s parse error: sequence %s: length %" PRId64 ", expected %" PRId64,
-// msa->name != NULL ? msa->name : "", msa->sqname[idx], msa->sqlen[idx], msa->alen);
-//
-// /* if individual SS is present, it must have length right too */
-// if (msa->ss != NULL && msa->ss[idx] != NULL && msa->sslen[idx] != msa->alen)
-// ESL_FAIL(eslEFORMAT, errbuf, "MSA %s parse error: GR SS for %s: length %" PRId64 ", expected %" PRId64,
-// msa->name != NULL ? msa->name : "", msa->sqname[idx], msa->sslen[idx], msa->alen);
-//
-// /* if SA is present, must have length right */
-// if (msa->sa != NULL && msa->sa[idx] != NULL && msa->salen[idx] != msa->alen)
-// ESL_FAIL(eslEFORMAT, errbuf, "MSA %s parse error: GR SA for %s: length %" PRId64 ", expected %" PRId64,
-// msa->name != NULL ? msa->name : "", msa->sqname[idx], msa->salen[idx], msa->alen);
-//
-// /* if PP is present, must have length right */
-// if (msa->pp != NULL && msa->pp[idx] != NULL && msa->pplen[idx] != msa->alen)
-// ESL_FAIL(eslEFORMAT, errbuf, "MSA %s parse error: GR PP for %s: length %" PRId64 ", expected %" PRId64,
-// msa->name != NULL ? msa->name : "", msa->sqname[idx], msa->pplen[idx], msa->alen);
-// }
-//
-// /* if cons SS is present, must have length right */
-// if (msa->ss_cons != NULL && strlen(msa->ss_cons) != msa->alen)
-// ESL_FAIL(eslEFORMAT, errbuf, "MSA %s parse error: GC SS_cons markup: len %zd, expected %" PRId64,
-// msa->name != NULL ? msa->name : "", strlen(msa->ss_cons), msa->alen);
-//
-// /* if cons SA is present, must have length right */
-// if (msa->sa_cons != NULL && strlen(msa->sa_cons) != msa->alen)
-// ESL_FAIL(eslEFORMAT, errbuf, "MSA %s parse error: GC SA_cons markup: len %zd, expected %" PRId64,
-// msa->name != NULL ? msa->name : "", strlen(msa->sa_cons), msa->alen);
-//
-// /* if cons PP is present, must have length right */
-// if (msa->pp_cons != NULL && strlen(msa->pp_cons) != msa->alen)
-// ESL_FAIL(eslEFORMAT, errbuf, "MSA %s parse error: GC PP_cons markup: len %zd, expected %" PRId64,
-// msa->name != NULL ? msa->name : "", strlen(msa->pp_cons), msa->alen);
-//
-// /* if RF is present, must have length right */
-// if (msa->rf != NULL && strlen(msa->rf) != msa->alen)
-// ESL_FAIL(eslEFORMAT, errbuf, "MSA %s parse error: GC RF markup: len %zd, expected %" PRId64,
-// msa->name != NULL ? msa->name : "", strlen(msa->rf), msa->alen);
-//
-// /* If no weights were set, set 'em all to 1.0 */
-// if (!(msa->flags & eslMSA_HASWGTS))
-// for (idx = 0; idx < msa->nseq; idx++)
-// msa->wgt[idx] = 1.0;
-//
-// /* Clean up a little from the parser */
-// if (msa->sqlen != NULL) { free(msa->sqlen); msa->sqlen = NULL; }
-// if (msa->sslen != NULL) { free(msa->sslen); msa->sslen = NULL; }
-// if (msa->salen != NULL) { free(msa->salen); msa->salen = NULL; }
-// if (msa->pplen != NULL) { free(msa->pplen); msa->pplen = NULL; }
-// return eslOK;
-//}
-
-
-/* Function: esl_msa_Create()
-* Synopsis: Creates an <ESL_MSA> object.
-* Incept: SRE, Sun Jan 23 08:25:26 2005 [St. Louis]
-*
-* Purpose: Creates and initializes an <ESL_MSA> object, and returns a
-* pointer to it.
-*
-* If caller already knows the dimensions of the alignment,
-* both <nseq> and <alen>, then <msa = esl_msa_Create(nseq,
-* alen)> allocates the whole thing at once. The MSA's
-* <nseq> and <alen> fields are set accordingly, and the
-* caller doesn't have to worry about setting them; it can
-* just fill in <aseq>.
-*
-* If caller doesn't know the dimensions of the alignment
-* (for example, when parsing an alignment file), then
-* <nseq> is taken to be an initial allocation size, and
-* <alen> must be -1. <alen=-1> is used as a flag for a
-* "growable" MSA. For example, the call <msa =
-* esl_msa_Create(16, -1)>. allocates internally for an
-* initial block of 16 sequences, but without allocating
-* any space for individual sequences. This allocation can
-* be expanded (by doubling) by calling <esl_msa_Expand()>.
-* A created <msa> can only be <_Expand()>'ed if <alen> is
-* -1.
-*
-* In a growable alignment, caller becomes responsible for
-* memory allocation of each individual <aseq[i]>. Caller
-* is also responsible for setting <nseq> and <alen> when
-* it is done parsing and creating the new MSA. In
-* particular, the <esl_msa_Destroy()> function relies on
-* <nseq> to know how many individual sequences are
-* allocated.
-*
-* Args: <nseq> - number of sequences, or nseq allocation blocksize
-* <alen> - length of alignment in columns, or -1
-*
-* Returns: pointer to new MSA object, w/ all values initialized.
-*
-* Throws: <NULL> on allocation failure.
-*
-* Xref: squid's MSAAlloc()
-*/
-ESL_MSA *
-esl_msa_Create(int nseq, int64_t alen)
-{
- int status;
- ESL_MSA *msa;
- int i;
-
- msa = create_mostly(nseq, alen); /* aseq is null upon successful return */
- if (msa == NULL) return NULL; /* already threw error in mostly_create, so percolate */
-
- ESL_ALLOC_WITH_TYPE(msa->aseq, char**, sizeof(char *) * msa->sqalloc);
- for (i = 0; i < msa->sqalloc; i++)
- msa->aseq[i] = NULL;
-
- if (alen != -1) {
- for (i = 0; i < nseq; i++)
- {
- ESL_ALLOC_WITH_TYPE(msa->aseq[i], char*, sizeof(char) * (alen+1));
- msa->aseq[i][alen] = '\0'; /* caller might forget to null terminate; help the poor */
- }
- msa->nseq = nseq;
- }
- return msa;
-
-ERROR:
- esl_msa_Destroy(msa);
- return NULL;
-}
-
-
-/* Function: esl_msa_Expand()
-* Synopsis: Reallocate for more sequences.
-* Incept: SRE, Sun Jan 23 08:26:30 2005 [St. Louis]
-*
-* Purpose: Double the current sequence allocation in <msa>.
-* Typically used when we're reading an alignment sequentially
-* from a file, so we don't know nseq 'til we're done.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on reallocation failure; <msa> is undamaged,
-* and the caller may attempt to recover from the error.
-*
-* Throws <eslEINVAL> if <msa> is not growable: its <alen>
- * field must be -1 to be growable.
-*
-* Xref: squid's MSAExpand(), 1999.
-*/
-int
-esl_msa_Expand(ESL_MSA *msa)
-{
- int status;
- int old, newSz; /* old & new allocation sizes (max # seqs) */
- void *p; /* tmp ptr to realloc'ed memory */
- int i,j;
-
- if (msa->alen != -1)
- ESL_EXCEPTION(eslEINVAL, "that MSA is not growable");
-
- old = msa->sqalloc;
- newSz = 2*old;
-
- /* Normally either aseq (ascii) or ax (digitized) would be active, not both.
- * We could make sure that that's true, but that's checked elsewhere.
- */
- if (msa->aseq != NULL) ESL_RALLOC_WITH_TYPE(msa->aseq, char**, p, sizeof(char *) * newSz);
-#ifdef eslAUGMENT_ALPHABET
- if (msa->ax != NULL) ESL_RALLOC_WITH_TYPE(msa->ax, ESL_DSQ**, p, sizeof(ESL_DSQ *) * newSz);
-#endif /*eslAUGMENT_ALPHABET*/
-
- ESL_RALLOC_WITH_TYPE(msa->sqname, char**, p, sizeof(char *) * newSz);
- ESL_RALLOC_WITH_TYPE(msa->wgt, double*, p, sizeof(double) * newSz);
- ESL_RALLOC_WITH_TYPE(msa->sqlen, int64_t*, p, sizeof(int64_t)* newSz);
-
- if (msa->ss != NULL)
- {
- ESL_RALLOC_WITH_TYPE(msa->ss, char**, p, sizeof(char *) * newSz);
- ESL_RALLOC_WITH_TYPE(msa->sslen, int64_t*, p, sizeof(int64_t) * newSz);
- }
-
- if (msa->sa != NULL)
- {
- ESL_RALLOC_WITH_TYPE(msa->sa, char**, p, sizeof(char *) * newSz);
- ESL_RALLOC_WITH_TYPE(msa->salen, int64_t*, p, sizeof(int64_t) * newSz);
- }
-
- if (msa->pp != NULL)
- {
- ESL_RALLOC_WITH_TYPE(msa->pp, char**, p, sizeof(char *) * newSz);
- ESL_RALLOC_WITH_TYPE(msa->pplen, int64_t*, p, sizeof(int64_t) * newSz);
- }
-
- if (msa->sqacc != NULL)
- ESL_RALLOC_WITH_TYPE(msa->sqacc, char**, p, sizeof(char *) * newSz);
-
- if (msa->sqdesc != NULL)
- ESL_RALLOC_WITH_TYPE(msa->sqdesc, char**, p, sizeof(char *) * newSz);
-
- for (i = old; i < newSz; i++)
- {
- if (msa->aseq != NULL) msa->aseq[i] = NULL;
-#ifdef eslAUGMENT_ALPHABET
- if (msa->ax != NULL) msa->ax[i] = NULL;
-#endif /*eslAUGMENT_ALPHABET*/
- msa->sqname[i] = NULL;
- msa->wgt[i] = -1.0; /* -1.0 means "unset so far" */
- msa->sqlen[i] = 0;
-
- if (msa->ss != NULL) { msa->ss[i] = NULL; msa->sslen[i] = 0; }
- if (msa->sa != NULL) { msa->sa[i] = NULL; msa->salen[i] = 0; }
- if (msa->pp != NULL) { msa->pp[i] = NULL; msa->pplen[i] = 0; }
-
- if (msa->sqacc != NULL) msa->sqacc[i] = NULL;
- if (msa->sqdesc != NULL) msa->sqdesc[i] = NULL;
- }
-
- /* Reallocate and re-init for unparsed #=GS tags, if we have some.
- * gs is [0..ngs-1][0..nseq-1][], so we're reallocing the middle
- * set of pointers.
- */
- if (msa->gs != NULL)
- for (i = 0; i < msa->ngs; i++)
- {
- if (msa->gs[i] != NULL)
- {
- ESL_RALLOC_WITH_TYPE(msa->gs[i], char**, p, sizeof(char *) * newSz);
- for (j = old; j < newSz; j++)
- msa->gs[i][j] = NULL;
- }
- }
- /* Reallocate and re-init for unparsed #=GR tags, if we have some.
- * gr is [0..ngs-1][0..nseq-1][], so we're reallocing the middle
- * set of pointers.
- */
- if (msa->gr != NULL)
- for (i = 0; i < msa->ngr; i++)
- {
- if (msa->gr[i] != NULL)
- {
- ESL_RALLOC_WITH_TYPE(msa->gr[i], char**, p, sizeof(char *) * newSz);
- for (j = old; j < newSz; j++)
- msa->gr[i][j] = NULL;
- }
- }
-
- msa->sqalloc = newSz;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-/* Function: esl_msa_Copy()
-* Synopsis: Copies an MSA.
-* Incept: SRE, Tue Jan 22 15:30:32 2008 [Janelia]
-*
-* Purpose: Makes a copy of <msa> in <new>. Caller has
-* already allocated <new> to hold an MSA of
-* at least <msa->nseq> sequences and <msa->alen>
-* columns.
-*
-* Note: Because MSA's are not reusable, this function does a
-* lot of internal allocation for optional fields, without
-* checking <new> to see if space was already allocated. To
-* reuse an MSA <new> and copy new data into it, we'll
-* eventually need a <esl_msa_Reuse()> function, and/or
-* recode this to reuse or free any already-allocated
-* optional memory it encounters in <new>. Until then,
-* it's unlikely that <esl_msa_Copy()> is useful on its own;
-* the caller would be expected to call <esl_msa_Clone()>
-* instead.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure. In this case, <new>
-* was only partially constructed, and should be treated
-* as corrupt.
-*/
-int
-esl_msa_Copy(const ESL_MSA *msa, ESL_MSA *newMSA)
-{
- int i, x, j;
- int status;
-
- /* aseq[0..nseq-1][0..alen-1] strings,
- * or ax[0..nseq-1][(0) 1..alen (alen+1)] digital seqs
- * <new> must have one of them allocated already.
- */
- if (! (msa->flags & eslMSA_DIGITAL))
- for (i = 0; i < msa->nseq; i++)
- strcpy(newMSA->aseq[i], msa->aseq[i]);
-#ifdef eslAUGMENT_ALPHABET
- else
- {
- for (i = 0; i < msa->nseq; i++)
- memcpy(newMSA->ax[i], msa->ax[i], (msa->alen+2) * sizeof(ESL_DSQ));
- newMSA->abc = msa->abc;
- }
-#endif
-
- for (i = 0; i < msa->nseq; i++) {
- esl_strdup(msa->sqname[i], -1, &(newMSA->sqname[i]));
- newMSA->wgt[i] = msa->wgt[i];
- }
- /* alen, nseq were already set by Create() */
- newMSA->flags = msa->flags;
-
- esl_strdup(msa->name, -1, &(newMSA->name));
- esl_strdup(msa->desc, -1, &(newMSA->desc));
- esl_strdup(msa->acc, -1, &(newMSA->acc));
- esl_strdup(msa->au, -1, &(newMSA->au));
- esl_strdup(msa->ss_cons, -1, &(newMSA->ss_cons));
- esl_strdup(msa->sa_cons, -1, &(newMSA->sa_cons));
- esl_strdup(msa->pp_cons, -1, &(newMSA->pp_cons));
- esl_strdup(msa->rf, -1, &(newMSA->rf));
-
- if (msa->sqacc != NULL) {
- ESL_ALLOC_WITH_TYPE(newMSA->sqacc, char**, sizeof(char **) * msa->nseq);
- for (i = 0; i < msa->nseq; i++)
- esl_strdup(msa->sqacc[i], -1, &(newMSA->sqacc[i]));
- }
- if (msa->sqdesc != NULL) {
- ESL_ALLOC_WITH_TYPE(newMSA->sqdesc, char**, sizeof(char **) * msa->nseq);
- for (i = 0; i < msa->nseq; i++)
- esl_strdup(msa->sqdesc[i], -1, &(newMSA->sqdesc[i]));
- }
- if (msa->ss != NULL) {
- ESL_ALLOC_WITH_TYPE(newMSA->ss, char**, sizeof(char **) * msa->nseq);
- for (i = 0; i < msa->nseq; i++)
- esl_strdup(msa->ss[i], -1, &(newMSA->ss[i]));
- }
- if (msa->sa != NULL) {
- ESL_ALLOC_WITH_TYPE(newMSA->sa, char**, sizeof(char **) * msa->nseq);
- for (i = 0; i < msa->nseq; i++)
- esl_strdup(msa->sa[i], -1, &(newMSA->sa[i]));
- }
- if (msa->pp != NULL) {
- ESL_ALLOC_WITH_TYPE(newMSA->pp, char**, sizeof(char **) * msa->nseq);
- for (i = 0; i < msa->nseq; i++)
- esl_strdup(msa->pp[i], -1, &(newMSA->pp[i]));
- }
-
- for (x = 0; x < eslMSA_NCUTS; x++) {
- newMSA->cutoff[x] = msa->cutoff[x];
- newMSA->cutset[x] = msa->cutset[x];
- }
-
- if (msa->ncomment > 0) {
- ESL_ALLOC_WITH_TYPE(newMSA->comment, char**, sizeof(char **) * msa->ncomment);
- newMSA->ncomment = msa->ncomment;
- newMSA->alloc_ncomment = msa->ncomment;
- for (i = 0; i < msa->ncomment; i++)
- esl_strdup(msa->comment[i], -1, &(newMSA->comment[i]));
- }
-
- if (msa->ngf > 0) {
- ESL_ALLOC_WITH_TYPE(newMSA->gf_tag, char**, sizeof(char **) * msa->ngf);
- ESL_ALLOC_WITH_TYPE(newMSA->gf, char**, sizeof(char **) * msa->ngf);
- newMSA->ngf = msa->ngf;
- newMSA->alloc_ngf = msa->ngf;
- for (i = 0; i < msa->ngf; i++) {
- esl_strdup(msa->gf_tag[i], -1, &(newMSA->gf_tag[i]));
- esl_strdup(msa->gf[i], -1, &(newMSA->gf[i]));
- }
- }
-
- if (msa->ngs > 0) {
- ESL_ALLOC_WITH_TYPE(newMSA->gs_tag, char**, sizeof(char **) * msa->ngs);
- ESL_ALLOC_WITH_TYPE(newMSA->gs, char***, sizeof(char ***) * msa->ngs);
- newMSA->ngs = msa->ngs;
- for (i = 0; i < msa->ngs; i++) {
- ESL_ALLOC_WITH_TYPE(newMSA->gs[i], char**, sizeof(char **) * msa->nseq);
- esl_strdup(msa->gs_tag[i], -1, &(newMSA->gs_tag[i]));
- for (j = 0; j < msa->nseq; j++)
- esl_strdup(msa->gs[i][j], -1, &(newMSA->gs[i][j]));
- }
- }
-
- if (msa->ngc > 0) {
- ESL_ALLOC_WITH_TYPE(newMSA->gc_tag, char**, sizeof(char **) * msa->ngc);
- ESL_ALLOC_WITH_TYPE(newMSA->gc, char**, sizeof(char **) * msa->ngc);
- newMSA->ngc = msa->ngc;
- for (i = 0; i < msa->ngc; i++) {
- esl_strdup(msa->gc_tag[i], -1, &(newMSA->gc_tag[i]));
- esl_strdup(msa->gc[i], -1, &(newMSA->gc[i]));
- }
- }
-
- if (msa->ngr > 0) {
- ESL_ALLOC_WITH_TYPE(newMSA->gr_tag, char**, sizeof(char **) * msa->ngr);
- ESL_ALLOC_WITH_TYPE(newMSA->gr, char***, sizeof(char ***) * msa->ngr);
- newMSA->ngr = msa->ngr;
- for (i = 0; i < msa->ngr; i++) {
- ESL_ALLOC_WITH_TYPE(newMSA->gr[i], char**, sizeof(char **) * msa->nseq);
- esl_strdup(msa->gr_tag[i], -1, &(newMSA->gr_tag[i]));
- for (j = 0; j < msa->nseq; j++)
- esl_strdup(msa->gr[i][j], -1, &(newMSA->gr[i][j]));
- }
- }
-
-#ifdef eslAUGMENT_KEYHASH
- esl_keyhash_Destroy(newMSA->index); newMSA->index = NULL;
- esl_keyhash_Destroy(newMSA->gs_idx); newMSA->gs_idx = NULL;
- esl_keyhash_Destroy(newMSA->gc_idx); newMSA->gc_idx = NULL;
- esl_keyhash_Destroy(newMSA->gr_idx); newMSA->gr_idx = NULL;
-
- if (msa->index != NULL) newMSA->index = esl_keyhash_Clone(msa->index);
- if (msa->gs_idx != NULL) newMSA->gs_idx = esl_keyhash_Clone(msa->gs_idx);
- if (msa->gc_idx != NULL) newMSA->gc_idx = esl_keyhash_Clone(msa->gc_idx);
- if (msa->gr_idx != NULL) newMSA->gr_idx = esl_keyhash_Clone(msa->gr_idx);
-#endif
-
-#ifdef eslAUGMENT_SSI
- newMSA->offset = msa->offset;
-#endif
-
- return eslOK;
-
-ERROR:
- return status;
-}
-
-/* Function: esl_msa_Clone()
-* Synopsis: Duplicates an MSA.
-* Incept: SRE, Tue Jan 22 15:23:55 2008 [Janelia]
-*
-* Purpose: Make a duplicate of <msa>, in newly
-* allocated space.
-*
-* Returns: a pointer to the newly allocated clone.
-* Caller is responsible for free'ing it.
-*
-* Throws: <NULL> on allocation error.
-*/
-ESL_MSA *
-esl_msa_Clone(const ESL_MSA *msa)
-{
- ESL_MSA *nw = NULL;
- int status;
-
-#ifdef eslAUGMENT_ALPHABET
- if (msa->flags & eslMSA_DIGITAL) {
- if ((nw = esl_msa_CreateDigital(msa->abc, msa->nseq, msa->alen)) == NULL) return NULL;
- } else
-#endif
- if ((nw = esl_msa_Create(msa->nseq, msa->alen)) == NULL) return NULL;
-
- if ((status = esl_msa_Copy(msa, nw) ) != eslOK) goto ERROR;
- return nw;
-
-ERROR:
- esl_msa_Destroy(nw);
- return NULL;
-}
-
-
-/* Function: esl_msa_Destroy()
-* Synopsis: Frees an <ESL_MSA>.
-* Incept: SRE, Sun Jan 23 08:26:02 2005 [St. Louis]
-*
-* Purpose: Destroys <msa>.
-*
-* Xref: squid's MSADestroy().
-*/
-void
-esl_msa_Destroy(ESL_MSA *msa)
-{
- if (msa == NULL) return;
-
- if (msa->aseq != NULL)
- esl_Free2D((void **) msa->aseq, msa->nseq);
-#ifdef eslAUGMENT_ALPHABET
- if (msa->ax != NULL)
- esl_Free2D((void **) msa->ax, msa->nseq);
-#endif /*eslAUGMENT_ALPHABET*/
-
- esl_Free2D((void **) msa->sqname, msa->nseq);
- esl_Free2D((void **) msa->sqacc, msa->nseq);
- esl_Free2D((void **) msa->sqdesc, msa->nseq);
- esl_Free2D((void **) msa->ss, msa->nseq);
- esl_Free2D((void **) msa->sa, msa->nseq);
- esl_Free2D((void **) msa->pp, msa->nseq);
-
- if (msa->sqlen != NULL) free(msa->sqlen);
- if (msa->wgt != NULL) free(msa->wgt);
-
- if (msa->name != NULL) free(msa->name);
- if (msa->desc != NULL) free(msa->desc);
- if (msa->acc != NULL) free(msa->acc);
- if (msa->au != NULL) free(msa->au);
- if (msa->ss_cons != NULL) free(msa->ss_cons);
- if (msa->sa_cons != NULL) free(msa->sa_cons);
- if (msa->pp_cons != NULL) free(msa->pp_cons);
- if (msa->rf != NULL) free(msa->rf);
- if (msa->sslen != NULL) free(msa->sslen);
- if (msa->salen != NULL) free(msa->salen);
- if (msa->pplen != NULL) free(msa->pplen);
-
- esl_Free2D((void **) msa->comment, msa->ncomment);
- esl_Free2D((void **) msa->gf_tag, msa->ngf);
- esl_Free2D((void **) msa->gf, msa->ngf);
-
- esl_Free2D((void **) msa->gs_tag, msa->ngs);
- esl_Free3D((void ***)msa->gs, msa->ngs, msa->nseq);
- esl_Free2D((void **) msa->gc_tag, msa->ngc);
- esl_Free2D((void **) msa->gc, msa->ngc);
- esl_Free2D((void **) msa->gr_tag, msa->ngr);
- esl_Free3D((void ***)msa->gr, msa->ngr, msa->nseq);
-
-#ifdef eslAUGMENT_KEYHASH
- esl_keyhash_Destroy(msa->index);
- esl_keyhash_Destroy(msa->gs_idx);
- esl_keyhash_Destroy(msa->gc_idx);
- esl_keyhash_Destroy(msa->gr_idx);
-#endif /* keyhash augmentation */
-
- free(msa);
- return;
-}
-
-
-/* Function: esl_msa_SetName()
-* Synopsis: Set name of an MSA.
-* Incept: SRE, Sat Feb 23 18:42:47 2008 [Casa de Gatos]
-*
-* Purpose: Sets the name of the msa <msa> to <name>.
-*
-* <name> can be <NULL>, because the MSA name is an
- * optional field; in which case any existing name in
- * the <msa> is erased.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEMEM> on allocation error.
- */
-int
-esl_msa_SetName(ESL_MSA *msa, const char *name)
-{
- int status;
-
- if (msa->name != NULL) free(msa->name);
- status = esl_strdup(name, -1, &(msa->name));
- return status;
-}
-
-/* Function: esl_msa_SetDesc()
- * Synopsis: Set the description line of an MSA.
- * Incept: SRE, Sat Feb 23 18:47:06 2008 [Casa de Gatos]
- *
- * Purpose: Sets the description line of the msa <msa> to <desc>.
- *
- * As a special case, <desc> may be <NULL>, to facilitate
- * handling of optional annotation.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEMEM> on allocation error.
- */
-int
-esl_msa_SetDesc(ESL_MSA *msa, const char *desc)
-{
- int status;
-
- if (msa->desc != NULL) free(msa->desc);
- status = esl_strdup(desc, -1, &(msa->desc));
- return status;
-
-}
-
-/* Function: esl_msa_SetAccession()
- * Synopsis: Set the accession number of an MSA.
- * Incept: SRE, Sat Feb 23 18:49:04 2008 [Casa de Gatos]
- *
- * Purpose: Sets accession number of the msa <msa> to <acc>.
- *
- * As a special case, <acc> may be <NULL>, to facilitate
- * handling of optional annotation.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEMEM> on allocation error.
- */
-int
-esl_msa_SetAccession(ESL_MSA *msa, const char *acc)
-{
- int status;
-
- if (msa->acc != NULL) free(msa->acc);
- status = esl_strdup(acc, -1, &(msa->acc));
- return status;
-}
-
-
-/* Function: esl_msa_SetAuthor()
- * Synopsis: Set the author string in an MSA.
- * Incept: SRE, Wed Mar 4 10:41:21 2009 [Janelia]
- *
- * Purpose: Sets the author string in <msa> to <author>.
- *
- * As a special case, <author> may be <NULL>, to facilitate
- * handling of optional annotation.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEMEM> on allocation error.
- */
-int
-esl_msa_SetAuthor(ESL_MSA *msa, const char *author)
-{
- int status;
-
- if (msa->au != NULL) free(msa->au);
- status = esl_strdup(author, -1, &(msa->au));
- return status;
-}
-
-
-/* Function: esl_msa_SetSeqName()
- * Synopsis: Set an individual sequence name in an MSA.
- * Incept: SRE, Wed Mar 4 10:56:28 2009 [Janelia]
- *
- * Purpose: Set the name of sequence number <idx> in <msa>
- * to <name>.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEINVAL> if <name> is <NULL>;
- * <eslEMEM> on allocation error.
- *
- * Note: msa->sqname[] is not optional, so we may
- * rely on it already being allocated for
- * i=0..sqalloc-1.
- */
-int
-esl_msa_SetSeqName(ESL_MSA *msa, int idx, const char *name)
-{
- int status;
-
- if (idx >= msa->sqalloc) ESL_EXCEPTION(eslEINVAL, "no such sequence %d (only %d allocated)", idx, msa->sqalloc);
- if (name == NULL) ESL_EXCEPTION(eslEINVAL, "seq names are mandatory; NULL is not a valid name");
-
- if (msa->sqname[idx] != NULL) free(msa->sqname[idx]);
- status = esl_strdup(name, -1, &(msa->sqname[idx]));
- return status;
-}
-
-/* Function: esl_msa_SetSeqAccession()
- * Synopsis: Sets individual sequence accession in an MSA.
- * Incept: SRE, Wed Mar 4 11:03:26 2009 [Janelia]
- *
- * Purpose: Set the accession of sequence number <idx> in <msa> to
- * <acc>.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEMEM> on allocation error.
- */
-int
-esl_msa_SetSeqAccession(ESL_MSA *msa, int idx, const char *acc)
-{
- int i;
- int status;
-
- if (idx >= msa->sqalloc) ESL_EXCEPTION(eslEINVAL, "no such sequence %d (only %d allocated)", idx, msa->sqalloc);
- if (acc == NULL) {
- if (msa->sqacc != NULL) { free(msa->sqacc[idx]); msa->sqacc[idx] = NULL; }
- return eslOK;
- }
-
- /* Allocate/initialize the optional sqacc array, if it's not already done: */
- if (msa->sqacc == NULL) {
- ESL_ALLOC_WITH_TYPE(msa->sqacc, char**, sizeof(char *) * msa->sqalloc);
- for (i = 0; i < msa->sqalloc; i++) msa->sqacc[i] = NULL;
- }
- if (msa->sqacc[idx] != NULL) free(msa->sqacc[idx]);
-
- status = esl_strdup(acc, -1, &(msa->sqacc[idx]));
- return status;
-
- ERROR:
- return status;
-}
-
-/* Function: esl_msa_SetSeqDescription()
- * Synopsis: Sets individual sequence description in an MSA.
- * Incept: SRE, Wed Mar 4 11:09:37 2009 [Janelia]
- *
- * Purpose: Set the description of sequence number <idx> in <msa> to
- * <desc>.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEMEM> on allocation error.
- */
-int
-esl_msa_SetSeqDescription(ESL_MSA *msa, int idx, const char *desc)
-{
- int i;
- int status;
-
- if (idx >= msa->sqalloc) ESL_EXCEPTION(eslEINVAL, "no such sequence %d (only %d allocated)", idx, msa->sqalloc);
- if (desc == NULL) {
- if (msa->sqdesc != NULL) { free(msa->sqdesc[idx]); msa->sqdesc[idx] = NULL; }
- return eslOK;
- }
-
- /* Allocate/initialize the optional sqdesc array, if it's not already done: */
- if (msa->sqdesc == NULL) {
- ESL_ALLOC_WITH_TYPE(msa->sqdesc, char**, sizeof(char *) * msa->sqalloc);
- for (i = 0; i < msa->sqalloc; i++) msa->sqdesc[i] = NULL;
- }
- if (msa->sqdesc[idx] != NULL) free(msa->sqdesc[idx]);
-
- status = esl_strdup(desc, -1, &(msa->sqdesc[idx]));
- return status;
-
- ERROR:
- return status;
-}
-
-/* Function: esl_msa_FormatName()
- * Synopsis: Format name of an MSA, printf()-style.
- * Incept: SRE, Fri Sep 11 11:33:34 2009 [Janelia]
- *
- * Purpose: Sets the name of the msa <msa> using <name>, where
- * <name> is a <printf()>-style format with
- * arguments; for example, <esl_msa_FormatName(msa, "random%d", i)>.
- *
- * <name> can be <NULL>, because the MSA name is an
- * optional field; in which case any existing name in
- * the <msa> is erased.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEMEM> on allocation error;
- * <eslESYS> if a <*printf()> library call fails.
- */
-int
-esl_msa_FormatName(ESL_MSA *msa, const char *name, ...)
-{
- va_list ap;
- int status;
-
- if (msa->name != NULL) free(msa->name);
- if (name == NULL) { msa->name = NULL; return eslOK; }
-
- va_start(ap, name);
- status = esl_vsprintf(&(msa->name), name, &ap);
- va_end(ap);
- return status;
-}
-
-
-/* Function: esl_msa_FormatDesc()
- * Synopsis: Format the description line of an MSA, printf()-style.
- * Incept: SRE, Fri Sep 11 11:34:25 2009 [Janelia]
- *
- * Purpose: Format the description line of the msa <msa> using <desc>.
- * where <desc> is a <printf()>-style format with
- * arguments.
- * For example, <esl_msa_FormatDesc(msa, "sample %d", i)>.
- *
- * As a special case, <desc> may be <NULL>, to facilitate
- * handling of optional annotation.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEMEM> on allocation error;
- * <eslESYS> if a <*printf()> library call fails.
- */
-int
-esl_msa_FormatDesc(ESL_MSA *msa, const char *desc, ...)
-{
- va_list ap;
- int status;
-
- if (msa->desc != NULL) free(msa->desc);
- va_start(ap, desc);
- status = esl_vsprintf(&(msa->desc), desc, &ap);
- va_end(ap);
- return status;
-
-}
-
-/* Function: esl_msa_FormatAccession()
- * Synopsis: Format the accession number of an MSA, printf()-style.
- * Incept: SRE, Fri Sep 11 11:35:24 2009 [Janelia].
- *
- * Purpose: Sets accession number of the msa <msa> using <acc>,
- * where <acc> is a <printf()>-style format with arguments.
- * For example, <esl_msa_FormatAccession(msa, "PF%06d", i)>.
- *
- * As a special case, <acc> may be <NULL>, to facilitate
- * handling of optional annotation.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEMEM> on allocation error;
- * <eslESYS> if a <*printf()> library call fails.
- */
-int
-esl_msa_FormatAccession(ESL_MSA *msa, const char *acc, ...)
-{
- va_list ap;
- int status;
-
- if (msa->acc != NULL) free(msa->acc);
- va_start(ap, acc);
- status = esl_vsprintf(&(msa->acc), acc, &ap);
- va_end(ap);
- return status;
-}
-
-
-/* Function: esl_msa_FormatAuthor()
- * Synopsis: Format the author string in an MSA, printf()-style.
- * Incept: SRE, Fri Sep 11 11:36:05 2009 [Janelia]
- *
- * Purpose: Sets the author string in <msa>, using an <author> string
- * and arguments in same format as <printf()> would take.
- *
- * As a special case, <author> may be <NULL>, to facilitate
- * handling of optional annotation.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEMEM> on allocation error;
- * <eslESYS> if a <*printf()> library call fails.
- */
-int
-esl_msa_FormatAuthor(ESL_MSA *msa, const char *author, ...)
-{
- va_list ap;
- int status;
-
- if (msa->au != NULL) free(msa->au);
- va_start(ap, author);
- status = esl_vsprintf(&(msa->au), author, &ap);
- va_end(ap);
- return status;
-}
-
-
-/* Function: esl_msa_FormatSeqName()
- * Synopsis: Formats an individual sequence name in an MSA, printf()-style.
- * Incept: SRE, Fri Sep 11 11:36:35 2009 [Janelia]
- *
- * Purpose: Set the name of sequence number <idx> in <msa>
- * to <name>, where <name> is a <printf()>
- * style format and arguments.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEINVAL> if <name> is <NULL>;
- * <eslEMEM> on allocation error;
- * <eslESYS> if a <*printf()> library call fails.
- *
- * Note: msa->sqname[] is not optional, so we may
- * rely on it already being allocated for
- * i=0..sqalloc-1.
- */
-int
-esl_msa_FormatSeqName(ESL_MSA *msa, int idx, const char *name, ...)
-{
- va_list ap;
- int status;
-
- if (idx >= msa->sqalloc) ESL_EXCEPTION(eslEINVAL, "no such sequence %d (only %d allocated)", idx, msa->sqalloc);
- if (name == NULL) ESL_EXCEPTION(eslEINVAL, "seq names are mandatory; NULL is not a valid name");
-
- if (msa->sqname[idx] != NULL) free(msa->sqname[idx]);
-
- va_start(ap, name);
- status = esl_vsprintf(&(msa->sqname[idx]), name, &ap);
- va_end(ap);
- return status;
-}
-
-/* Function: esl_msa_FormatSeqAccession()
- * Synopsis: Format individual sequence accession in an MSA, printf()-style.
- * Incept: SRE, Fri Sep 11 11:37:08 2009 [Janelia]
- *
- * Purpose: Set the accession of sequence number <idx> in <msa> to
- * <acc>, where <acc> is a <printf()> style format and
- * arguments.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEMEM> on allocation error;
- * <eslESYS> if a <*printf()> library call fails.
- */
-int
-esl_msa_FormatSeqAccession(ESL_MSA *msa, int idx, const char *acc, ...)
-{
- va_list ap;
- int i;
- int status;
-
- if (idx >= msa->sqalloc) ESL_EXCEPTION(eslEINVAL, "no such sequence %d (only %d allocated)", idx, msa->sqalloc);
- if (acc == NULL) {
- if (msa->sqacc != NULL) { free(msa->sqacc[idx]); msa->sqacc[idx] = NULL; }
- return eslOK;
- }
-
- /* Allocate/initialize the optional sqacc array, if it's not already done: */
- if (msa->sqacc == NULL) {
- ESL_ALLOC_WITH_TYPE(msa->sqacc, char**, sizeof(char *) * msa->sqalloc);
- for (i = 0; i < msa->sqalloc; i++) msa->sqacc[i] = NULL;
- }
- if (msa->sqacc[idx] != NULL) free(msa->sqacc[idx]);
-
- va_start(ap, acc);
- status = esl_vsprintf(&(msa->sqacc[idx]), acc, &ap);
- va_end(ap);
- return status;
-
- ERROR:
- return status;
-}
-
-/* Function: esl_msa_FormatSeqDescription()
- * Synopsis: Formats individual sequence description in an MSA, printf()-style.
- * Incept: SRE, Fri Sep 11 11:37:35 2009 [Janelia]
- *
- * Purpose: Set the description of sequence number <idx> in <msa> to
- * <desc>, where <desc> may be a <printf()> style format and
- * arguments.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEMEM> on allocation error;
- * <eslESYS> if a <*printf()> library call fails.
- */
-int
-esl_msa_FormatSeqDescription(ESL_MSA *msa, int idx, const char *desc, ...)
-{
- va_list ap;
- int i;
- int status;
-
- if (idx >= msa->sqalloc) ESL_EXCEPTION(eslEINVAL, "no such sequence %d (only %d allocated)", idx, msa->sqalloc);
- if (desc == NULL) {
- if (msa->sqdesc != NULL) { free(msa->sqdesc[idx]); msa->sqdesc[idx] = NULL; }
- return eslOK;
- }
-
- /* Allocate/initialize the optional sqdesc array, if it's not already done: */
- if (msa->sqdesc == NULL) {
- ESL_ALLOC_WITH_TYPE(msa->sqdesc, char**, sizeof(char *) * msa->sqalloc);
- for (i = 0; i < msa->sqalloc; i++) msa->sqdesc[i] = NULL;
- }
- if (msa->sqdesc[idx] != NULL) free(msa->sqdesc[idx]);
-
- va_start(ap, desc);
- status = esl_vsprintf(&(msa->sqdesc[idx]), desc, &ap);
- va_end(ap);
- return status;
-
- ERROR:
- return status;
-}
-
-
-/*---------------------- end of ESL_MSA functions ---------------------------*/
-
-
-
-/******************************************************************************
-*# 3. Digital mode MSA's (augmentation: alphabet)
-*****************************************************************************/
-#ifdef eslAUGMENT_ALPHABET
-/* Function: esl_msa_GuessAlphabet()
-* Synopsis: Guess alphabet of MSA.
-* Incept: SRE, Fri May 18 09:55:08 2007 [Janelia]
-*
-* Purpose: Guess whether the sequences in the <msa> are
-* <eslDNA>, <eslRNA>, or <eslAMINO>, and return
-* that guess in <*ret_type>.
-*
-* The determination is made based on the classifications
-* of the individual sequences in the alignment. At least
-* one sequence must contain ten residues or more to be
-* classified. If one or more sequences is called
-* <eslAMINO> and one or more is called <eslDNA>/<eslRNA>,
-* the alignment's alphabet is considered to be
-* indeterminate (<eslUNKNOWN>). If some sequences are
-* <eslDNA> and some are <eslRNA>, the alignment is called
-* <eslDNA>; this should cause no problems, because Easel
-* reads U as a synonym for T in DNA sequence anyway.
-*
-* Tested on Pfam 21.0 and Rfam 7.0, this routine correctly
-* classified all 8957 Pfam alignments as protein, and 503
-* Rfam alignments as RNA (both seed and full alignments).
-*
-* Returns: <eslOK> on success, and <*ret_type> is set
-* to <eslDNA>, <eslRNA>, or <eslAMINO>.
-*
-* Returns <eslEAMBIGUOUS> and sets <*ret_type> to
-* <eslUNKNOWN> if the alphabet cannot be reliably guessed.
-*
-* Xref: J1/62
-*/
-int
-esl_msa_GuessAlphabet(const ESL_MSA *msa, int *ret_type)
-{
- int64_t namino = 0,
- ndna = 0,
- nrna = 0,
- nunknown = 0;
- int type;
- int i,x;
- int64_t j,n;
- int64_t ct[26];
-
- if (msa->flags & eslMSA_DIGITAL) { *ret_type = msa->abc->type; return eslOK; }
-
- *ret_type = eslUNKNOWN;
-
- /* On wide alignments, we're better off looking at individual sequence
- * classifications. We don't want to end up calling the whole alignment
- * indeterminate just because a few sequences have degenerate residue
- * codes.
- */
- for (i = 0; i < msa->nseq; i++)
- {
- for (x = 0; x < 26; x++) ct[x] = 0;
- for (n = 0, j = 0; j < msa->alen; j++) {
- x = toupper(msa->aseq[i][j]) - 'A';
- if (x < 0 || x > 26) continue;
- ct[x]++;
- n++;
- if (n > 10000) break; /* ought to know by now */
- }
- esl_abc_GuessAlphabet(ct, &type);
-
- switch (type) {
- case eslAMINO: namino++; break;
- case eslDNA: ndna++; break;
- case eslRNA: nrna++; break;
- default: nunknown++;
- }
- }
- if (namino > 0 && (ndna+nrna) == 0) *ret_type = eslAMINO;
- else if (ndna > 0 && (nrna+namino) == 0) *ret_type = eslDNA;
- else if (nrna > 0 && (ndna+namino) == 0) *ret_type = eslRNA;
- else if (ndna+nrna > 0 && namino == 0) *ret_type = eslDNA;
-
- /* On narrow alignments, no single sequence may be long enough to
- * be classified, but we can determine alphabet from composition
- * of the complete alignment. Of course, degenerate residue codes in
- * a DNA alignment will still screw us.
- */
- if (*ret_type == eslUNKNOWN)
- {
-
- n = 0;
- for (x = 0; x < 26; x++) ct[x] = 0;
- for (i = 0; i < msa->nseq; i++) {
- for (j = 0; j < msa->alen; j++) {
- x = toupper(msa->aseq[i][j]) - 'A';
- if (x < 0 || x > 26) continue;
- ct[x]++;
- n++;
- if (n > 10000) break; /* ought to know by now */
- }
- if (n > 10000) break;
- }
- esl_abc_GuessAlphabet(ct, ret_type);
- }
-
- if (*ret_type == eslUNKNOWN) return eslEAMBIGUOUS;
- else return eslOK;
-}
-
-
-/* Function: esl_msa_CreateDigital()
-* Synopsis: Create a digital <ESL_MSA>.
-* Incept: SRE, Sun Aug 27 16:49:58 2006 [Leesburg]
-*
-* Purpose: Same as <esl_msa_Create()>, except the returned MSA is configured
-* for a digital alignment using internal alphabet <abc>, instead of
-* a text alignment.
-*
-* Internally, this means the <ax> field is allocated instead of
-* the <aseq> field, and the <eslMSA_DIGITAL> flag is raised.
-*
-* Args: <nseq> - number of sequences, or nseq allocation blocksize
-* <alen> - length of alignment in columns, or -1
-*
-* Returns: pointer to new MSA object, w/ all values initialized.
-* Note that <msa->nseq> is initialized to 0, even though space
-* is allocated.
-*
-* Throws: NULL on allocation failure.
-*
-* Xref: squid's MSAAlloc()
-*/
-ESL_MSA *
-esl_msa_CreateDigital(const ESL_ALPHABET *abc, int nseq, int64_t alen)
-{
- int status;
- ESL_MSA *msa;
- int i;
-
- msa = create_mostly(nseq, alen); /* aseq is null upon successful return */
- if (msa == NULL) return NULL; /* already threw error in mostly_create, so percolate */
-
- ESL_ALLOC_WITH_TYPE(msa->ax, ESL_DSQ**, sizeof(ESL_DSQ *) * msa->sqalloc);
- for (i = 0; i < msa->sqalloc; i++)
- msa->ax[i] = NULL;
-
- if (alen != -1)
- {
- for (i = 0; i < nseq; i++) {
- ESL_ALLOC_WITH_TYPE(msa->ax[i], ESL_DSQ*, sizeof(ESL_DSQ) * (alen+2));
- msa->ax[i][0] = msa->ax[i][alen+1] = eslDSQ_SENTINEL; /* help the poor */
- }
- msa->nseq = nseq;
- }
-
- msa->abc = (ESL_ALPHABET *) abc; /* this cast away from const-ness is deliberate & safe. */
- msa->flags |= eslMSA_DIGITAL;
- return msa;
-
-ERROR:
- esl_msa_Destroy(msa);
- return NULL;
-}
-
-/* Function: esl_msa_Digitize()
-* Synopsis: Digitizes an msa, converting it from text mode.
-* Incept: SRE, Sat Aug 26 17:33:08 2006 [AA 5302 to Dulles]
-*
-* Purpose: Given an alignment <msa> in text mode, convert it to
-* digital mode, using alphabet <abc>.
-*
-* Internally, the <ax> digital alignment field is filled,
-* the <aseq> text alignment field is destroyed and free'd,
-* a copy of the alphabet pointer is kept in the msa's
-* <abc> reference, and the <eslMSA_DIGITAL> flag is raised
-* in <flags>.
-*
- * Because <esl_msa_Digitize()> may be called on
- * unvalidated user data, <errbuf> may be passed, for
- * capturing an informative error message. For example, in
- * reading alignments from files, invalid characters in the
- * alignment are caught at the digitization step.
- *
-* Args: abc - digital alphabet
-* msa - multiple alignment to digitize
- * errbuf - optional: error message buffer, or <NULL>
-*
-* Returns: <eslOK> on success;
-* <eslEINVAL> if one or more sequences contain invalid characters
-* that can't be digitized. If this happens, the <msa> is returned
-* unaltered - left in text mode, with <aseq> as it was. (This is
-* a normal error, because <msa->aseq> may be user input that we
-* haven't validated yet.)
-*
-* Throws: <eslEMEM> on allocation failure; in this case, state of <msa> may be
-* wedged, and it should only be destroyed, not used.
-*/
-int
-esl_msa_Digitize(const ESL_ALPHABET *abc, ESL_MSA *msa, char *errbuf)
-{
- char errbuf2[eslERRBUFSIZE];
- int i;
- int status;
-
- /* Contract checks */
- if (msa->aseq == NULL) ESL_EXCEPTION(eslEINVAL, "msa has no text alignment");
- if (msa->ax != NULL) ESL_EXCEPTION(eslEINVAL, "msa already has digital alignment");
- if (msa->flags & eslMSA_DIGITAL) ESL_EXCEPTION(eslEINVAL, "msa is flagged as digital");
-
- /* Validate before we convert. Then we can leave the <aseq> untouched if
- * any of the sequences contain invalid characters.
- */
- for (i = 0; i < msa->nseq; i++)
- if (esl_abc_ValidateSeq(abc, msa->aseq[i], msa->alen, errbuf2) != eslOK)
- ESL_FAIL(eslEINVAL, errbuf, "%s: %s", msa->sqname[i], errbuf2);
-
- /* Convert, sequence-by-sequence, free'ing aseq as we go. */
- ESL_ALLOC_WITH_TYPE(msa->ax, ESL_DSQ**, msa->sqalloc * sizeof(ESL_DSQ *));
- for (i = 0; i < msa->nseq; i++)
- {
- ESL_ALLOC_WITH_TYPE(msa->ax[i], ESL_DSQ*, (msa->alen+2) * sizeof(ESL_DSQ));
- status = esl_abc_Digitize(abc, msa->aseq[i], msa->ax[i]);
- if (status != eslOK) goto ERROR;
- free(msa->aseq[i]);
- }
- for (; i < msa->sqalloc; i++)
- msa->ax[i] = NULL;
- free(msa->aseq);
- msa->aseq = NULL;
-
- msa->abc = (ESL_ALPHABET *) abc; /* convince compiler that removing const-ness is safe */
- msa->flags |= eslMSA_DIGITAL;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-/* Function: esl_msa_Textize()
-* Synopsis: Convert a digital msa to text mode.
-* Incept: SRE, Sat Aug 26 18:14:30 2006 [AA 5302 to Dulles]
-*
-* Purpose: Given an alignment <msa> in digital mode, convert it
-* to text mode.
-*
-* Internally, the <aseq> text alignment field is filled, the
-* <ax> digital alignment field is destroyed and free'd, the
-* msa's <abc> digital alphabet reference is nullified, and
-* the <eslMSA_DIGITAL> flag is dropped in <flags>.
-*
-* Args: msa - multiple alignment to convert to text
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure.
-* <eslECORRUPT> if one or more of the digitized alignment strings
-* contain invalid characters.
-*/
-int
-esl_msa_Textize(ESL_MSA *msa)
-{
- int status;
- int i;
-
- /* Contract checks
- */
- if (msa->ax == NULL) ESL_EXCEPTION(eslEINVAL, "msa has no digital alignment");
- if (msa->aseq != NULL) ESL_EXCEPTION(eslEINVAL, "msa already has text alignment");
- if (! (msa->flags & eslMSA_DIGITAL)) ESL_EXCEPTION(eslEINVAL, "msa is not flagged as digital");
- if (msa->abc == NULL) ESL_EXCEPTION(eslEINVAL, "msa has no digital alphabet");
-
- /* Convert, sequence-by-sequence, free'ing ax as we go.
- */
- ESL_ALLOC_WITH_TYPE(msa->aseq, char**, msa->sqalloc * sizeof(char *));
- for (i = 0; i < msa->nseq; i++)
- {
- ESL_ALLOC_WITH_TYPE(msa->aseq[i], char*, (msa->alen+1) * sizeof(char));
- status = esl_abc_Textize(msa->abc, msa->ax[i], msa->alen, msa->aseq[i]);
- if (status != eslOK) goto ERROR;
- free(msa->ax[i]);
- }
- for (; i < msa->sqalloc; i++)
- msa->aseq[i] = NULL;
- free(msa->ax);
- msa->ax = NULL;
-
- msa->abc = NULL; /* nullify reference (caller still owns real abc) */
- msa->flags &= ~eslMSA_DIGITAL; /* drop the flag */
- return eslOK;
-
-ERROR:
- return status;
-}
-
-#endif
-
-/*---------------------- end of digital MSA functions -----------------------*/
-
-
-
-
-
-/******************************************************************************
-*# 4. Random MSA database access (augmentation: ssi)
-*****************************************************************************/
-#ifdef eslAUGMENT_SSI
-/* Function: esl_msafile_PositionByKey()
-* Synopsis: Use SSI to reposition file to start of named MSA.
-* Incept: SRE, Mon May 28 11:04:59 2007 [Janelia]
-*
-* Purpose: Reposition <afp> so that the next MSA we read
-* will be the one named (or accessioned) <key>.
-*
-* Returns: <eslOK> on success, and the file <afp> is repositioned
-* such that the next <esl_msafile_Read()> call will read the
-* alignment named <key>.
-*
-* Returns <eslENOTFOUND> if <key> isn't found in the index
-* for <afp>.
-*
-* Returns <eslEFORMAT> if something goes wrong trying to
-* read the index, indicating some sort of file format
-* problem in the SSI file.
-*
-* Throws: <eslEMEM> on allocation failure;
-* <eslEINVAL> if there's no open SSI index;
-* <eslESYS> if an <fseek()> fails.
-* In all these cases, the state of the <afp> is uncertain
-* and may be corrupt; the application should not continue
-* to use it.
-*/
-int
-esl_msafile_PositionByKey(ESL_MSAFILE *afp, const char *key)
-{
- uint16_t fh;
- off_t offset;
- int status;
-
- if (afp->ssi == NULL) ESL_EXCEPTION(eslEINVAL, "Need an open SSI index to call esl_msafile_PositionByKey()");
- if ((status = esl_ssi_FindName(afp->ssi, key, &fh, &offset, NULL, NULL)) != eslOK) return status;
- if (fseeko(afp->f, offset, SEEK_SET) != 0) ESL_EXCEPTION(eslESYS, "fseek failed");
-
- /* If the <afp> had an MSA cached, we will probably have to discard
- * it, unless by chance it's exactly the MSA we're looking for.
- */
- if (afp->msa_cache != NULL)
- {
- if ( (afp->msa_cache->name == NULL || strcmp(afp->msa_cache->name, key) != 0) &&
- (afp->msa_cache->acc == NULL || strcmp(afp->msa_cache->acc, key) != 0))
- {
- esl_msa_Destroy(afp->msa_cache);
- afp->msa_cache = NULL;
- }
- }
-
- /* The linenumber gets messed up after a file positioning. Best we can do
- * is to reset it to zero.
- */
- afp->linenumber = 0;
- return eslOK;
-}
-#endif /*eslAUGMENT_SSI*/
-/*------------- end of functions added by SSI augmentation -------------------*/
-
-
-
-
-
-/*****************************************************************
-*# 6. Miscellaneous functions for manipulating MSAs
-*****************************************************************/
-
-/* Function: esl_msa_ReasonableRF()
-* Synopsis: Determine a reasonable #=RF line marking "consensus" columns.
-* Incept: SRE, Wed Sep 3 10:42:05 2008 [Janelia]
-*
-* Purpose: Define an <rfline> for the multiple alignment <msa> that
-* marks consensus columns with an 'x', and non-consensus
-* columns with a '.'.
-*
-* Consensus columns are defined as columns with fractional
-* occupancy of $\geq$ <symfrac> in residues. For example,
-* if <symfrac> is 0.7, columns containing $\geq$ 70\%
-* residues are assigned as 'x' in the <rfline>, roughly
-* speaking. "Roughly speaking", because the fractional
-* occupancy is in fact calculated as a weighted frequency
-* using sequence weights in <msa->wgt>, and because
-* missing data symbols are ignored in order to be able to
-* deal with sequence fragments.
-*
-* The greater <symfrac> is, the more stringent the
-* definition, and the fewer columns will be defined as
-* consensus. <symfrac=0> will define all columns as
-* consensus. <symfrac=1> will only define a column as
-* consensus if it contains no gap characters at all.
-*
-* If the caller wants to designate any sequences as
-* fragments, it must convert all leading and trailing gaps
-* to the missing data symbol '~'.
-*
-* For text mode alignments, any alphanumeric character is
-* considered to be a residue, and any non-alphanumeric
-* character is considered to be a gap.
-*
-* The <rfline> is a NUL-terminated string, indexed
-* <0..alen-1>.
-*
-* The <rfline> result can be <msa->rf>, if the caller
-* wants to set the <msa's> own RF line; or it can be any
-* alternative storage provided by the caller. In either
-* case, the caller must provide allocated space for at
-* least <msa->alen+1> chars.
-*
-* Args: msa - MSA to define a consensus RF line for
-* symfrac - threshold for defining consensus columns
-* rfline - RESULT: string containing x for consensus, . for not
-*
-* Returns: <eslOK> on success.
-*
-* Xref: HMMER p7_Fastmodelmaker() uses an essentially identical
-* calculation to define model architecture, and could be
-* rewritten now to use this function.
-*
-* A2M format alignment output uses this to define
-* consensus columns when #=RF annotation isn't available.
-*/
-int
-esl_msa_ReasonableRF(ESL_MSA *msa, double symfrac, char *rfline)
-{
- int apos;
- int idx;
- double r;
- double totwgt;
-
-#ifdef eslAUGMENT_ALPHABET
- if (msa->flags & eslMSA_DIGITAL)
- {
- for (apos = 1; apos <= msa->alen; apos++)
- {
- r = totwgt = 0.;
- for (idx = 0; idx < msa->nseq; idx++)
- {
- if (esl_abc_XIsResidue(msa->abc, msa->ax[idx][apos])) { r += msa->wgt[idx]; totwgt += msa->wgt[idx]; }
- else if (esl_abc_XIsGap(msa->abc, msa->ax[idx][apos])) { totwgt += msa->wgt[idx]; }
- else if (esl_abc_XIsMissing(msa->abc, msa->ax[idx][apos])) continue;
- }
- if (r > 0. && r / totwgt >= symfrac) msa->rf[apos-1] = 'x';
- else msa->rf[apos-1] = '.';
- }
- }
-#endif
- if (! (msa->flags & eslMSA_DIGITAL))
- {
- for (apos = 0; apos < msa->alen; apos++)
- {
- r = totwgt = 0.;
- for (idx = 0; idx < msa->nseq; idx++)
- {
- if (isalpha(msa->aseq[idx][apos])) { r += msa->wgt[idx]; totwgt += msa->wgt[idx]; }
- else totwgt += msa->wgt[idx];
- }
- if (r > 0. && r / totwgt >= symfrac) msa->rf[apos] = 'x';
- else msa->rf[apos] = '.';
- }
- }
-
- msa->rf[msa->alen] = '\0';
- return eslOK;
-}
-
-
-/* Function: esl_msa_MarkFragments()
-* Synopsis: Heuristically define seq fragments in an alignment.
-* Incept: SRE, Wed Sep 3 11:49:25 2008 [Janelia]
-*
-* Purpose: Use a heuristic to define sequence fragments (as opposed
-* to "full length" sequences in alignment <msa>.
-*
-* The rule is that if the sequence has a raw (unaligned)
-* length of less than <fragthresh> times the alignment
-* length in columns, the sequence is defined as a fragment.
-*
-* For each fragment, all leading and trailing gap symbols
-* (all gaps before the first residue and after the last
-* residue) are converted to missing data symbols
-* (typically '~', but nonstandard digital alphabets may
-* have defined another character).
-*
-* As a special case, if <fragthresh> is negative, then all
-* sequences are defined as fragments.
-*
-* Args: msa - alignment in which to define and mark seq fragments
-* fragthresh - define frags if rlen < fragthresh * alen;
-* or if fragthresh < 0, all seqs are marked as frags.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: (no abnormal error conditions)
-*/
-int
-esl_msa_MarkFragments(ESL_MSA *msa, double fragthresh)
-{
- int i;
- int pos;
-
- for (i = 0; i < msa->nseq; i++)
- if (fragthresh < 0.0 || msa_get_rlen(msa, i) < fragthresh * msa->alen)
- {
-#ifdef eslAUGMENT_ALPHABET
- if (msa->flags & eslMSA_DIGITAL) {
- for (pos = 1; pos <= msa->alen; pos++) {
- if (esl_abc_XIsResidue(msa->abc, msa->ax[i][pos])) break;
- msa->ax[i][pos] = esl_abc_XGetMissing(msa->abc);
- }
- for (pos = msa->alen; pos >= 1; pos--) {
- if (esl_abc_XIsResidue(msa->abc, msa->ax[i][pos])) break;
- msa->ax[i][pos] = esl_abc_XGetMissing(msa->abc);
- }
- }
-#endif
- if (! (msa->flags & eslMSA_DIGITAL))
- {
- for (pos = 0; pos < msa->alen; pos++) {
- if (isalnum(msa->aseq[i][pos])) break;
- msa->aseq[i][pos] = '~';
- }
- for (pos = msa->alen-1; pos >= 0; pos--) {
- if (isalnum(msa->aseq[i][pos])) break;
- msa->aseq[i][pos] = '~';
- }
- }
- }
- return eslOK;
-}
-
-
-/* Function: esl_msa_SequenceSubset()
-* Synopsis: Select subset of sequences into a smaller MSA.
-* Incept: SRE, Wed Apr 13 10:05:44 2005 [St. Louis]
-*
-* Purpose: Given an array <useme> (0..nseq-1) of TRUE/FALSE flags for each
-* sequence in an alignment <msa>; create a new alignment containing
-* only those seqs which are flagged <useme=TRUE>. Return a pointer
-* to this newly allocated alignment through <ret_new>. Caller is
-* responsible for freeing it.
-*
-* The smaller alignment might now contain columns
-* consisting entirely of gaps or missing data, depending
-* on what sequence subset was extracted. The caller may
-* want to immediately call <esl_msa_MinimGaps()> on the
-* new alignment to clean this up.
-*
- * Unparsed GS and GR Stockholm annotation that is presumably still
- * valid is transferred to the new alignment. Unparsed GC, GF, and
- * comments that are potentially invalidated by taking the subset
- * of sequences are not transferred to the new MSA.
-*
-* Weights are transferred exactly. If they need to be
-* renormalized to some new total weight (such as the new,
-* smaller total sequence number), the caller must do that.
-*
-* <msa> may be in text mode or digital mode. The new MSA
-* in <ret_new> will have the same mode.
-*
-* Returns: <eslOK> on success, and <ret_new> is set to point at a new
-* (smaller) alignment.
-*
-* Throws: <eslEINVAL> if the subset has no sequences in it;
-* <eslEMEM> on allocation error.
-*
-* Xref: squid's MSASmallerAlignment(), 1999.
-*/
-int
-esl_msa_SequenceSubset(const ESL_MSA *msa, const int *useme, ESL_MSA **ret_new)
-{
- ESL_MSA *newMSA = NULL;
- int nnew; /* number of seqs in the new MSA */
- int oidx, nidx; /* old, new indices */
- int i;
- int status;
-
- *ret_new = NULL;
-
- nnew = 0;
- for (oidx = 0; oidx < msa->nseq; oidx++)
- if (useme[oidx]) nnew++;
- if (nnew == 0) ESL_EXCEPTION(eslEINVAL, "No sequences selected");
-
- /* Note that the Create() calls allocate exact space for the sequences,
- * so we will strcpy()/memcpy() into them below.
-*/
-#ifdef eslAUGMENT_ALPHABET
- if ((msa->flags & eslMSA_DIGITAL) &&
- (newMSA = esl_msa_CreateDigital(msa->abc, nnew, msa->alen)) == NULL)
- {status = eslEMEM; goto ERROR; }
-#endif
- if (! (msa->flags & eslMSA_DIGITAL) &&
- (newMSA = esl_msa_Create(nnew, msa->alen)) == NULL)
- {status = eslEMEM; goto ERROR; }
- if (newMSA == NULL)
- {status = eslEMEM; goto ERROR; }
-
-
- /* Copy the old to the new */
- for (nidx = 0, oidx = 0; oidx < msa->nseq; oidx++)
- if (useme[oidx])
-{
-#ifdef eslAUGMENT_ALPHABET
- if (msa->flags & eslMSA_DIGITAL)
- memcpy(newMSA->ax[nidx], msa->ax[oidx], sizeof(ESL_DSQ) * (msa->alen+2));
-#endif
- if (! (msa->flags & eslMSA_DIGITAL))
- strcpy(newMSA->aseq[nidx], msa->aseq[oidx]);
- if ((status = esl_strdup(msa->sqname[oidx], -1, &(newMSA->sqname[nidx]))) != eslOK) goto ERROR;
-
- newMSA->wgt[nidx] = msa->wgt[oidx];
-
- if (msa->sqacc != NULL && msa->sqacc[oidx] != NULL) {
- if ((status = esl_msa_SetSeqAccession(newMSA, nidx, msa->sqacc[oidx])) != eslOK) goto ERROR;
- }
- if (msa->sqdesc != NULL && msa->sqdesc[oidx] != NULL) {
- if ((status = esl_msa_SetSeqDescription(newMSA, nidx, msa->sqdesc[oidx])) != eslOK) goto ERROR;
- }
- if (msa->ss != NULL && msa->ss[oidx] != NULL) {
- if ((status = set_seq_ss(newMSA, nidx, msa->ss[oidx])) != eslOK) goto ERROR;
- }
- if (msa->sa != NULL && msa->sa[oidx] != NULL) {
- if ((status = set_seq_sa(newMSA, nidx, msa->sa[oidx])) != eslOK) goto ERROR;
- }
- if (msa->pp != NULL && msa->pp[oidx] != NULL) {
- if ((status = set_seq_pp(newMSA, nidx, msa->pp[oidx])) != eslOK) goto ERROR;
- }
- /* unparsed annotation */
- for(i = 0; i < msa->ngs; i++) {
- if(msa->gs[i] != NULL)
- if ((status = esl_msa_AddGS(newMSA, msa->gs_tag[i], nidx, msa->gs[i][oidx])) != eslOK) goto ERROR;
- }
- for(i = 0; i < msa->ngr; i++) {
- if(msa->gr[i] != NULL)
- if ((status = esl_msa_AppendGR(newMSA, msa->gr_tag[i], nidx, msa->gr[i][oidx])) != eslOK) goto ERROR;
- }
-
- nidx++;
- }
-
- newMSA->flags = msa->flags;
-
- if ((status = esl_strdup(msa->name, -1, &(newMSA->name))) != eslOK) goto ERROR;
- if ((status = esl_strdup(msa->desc, -1, &(newMSA->desc))) != eslOK) goto ERROR;
- if ((status = esl_strdup(msa->acc, -1, &(newMSA->acc))) != eslOK) goto ERROR;
- if ((status = esl_strdup(msa->au, -1, &(newMSA->au))) != eslOK) goto ERROR;
- if ((status = esl_strdup(msa->ss_cons, msa->alen, &(newMSA->ss_cons))) != eslOK) goto ERROR;
- if ((status = esl_strdup(msa->sa_cons, msa->alen, &(newMSA->sa_cons))) != eslOK) goto ERROR;
- if ((status = esl_strdup(msa->pp_cons, msa->alen, &(newMSA->pp_cons))) != eslOK) goto ERROR;
- if ((status = esl_strdup(msa->rf, msa->alen, &(newMSA->rf))) != eslOK) goto ERROR;
-
- for (i = 0; i < eslMSA_NCUTS; i++) {
- newMSA->cutoff[i] = msa->cutoff[i];
- newMSA->cutset[i] = msa->cutset[i];
- }
-
- newMSA->nseq = nnew;
- newMSA->sqalloc = nnew;
-
- /* Since we have a fully constructed MSA, we don't need the
- * aux info used by parsers.
- */
- if (newMSA->sqlen != NULL) { free(newMSA->sqlen); newMSA->sqlen = NULL; }
- if (newMSA->sslen != NULL) { free(newMSA->sslen); newMSA->sslen = NULL; }
- if (newMSA->salen != NULL) { free(newMSA->salen); newMSA->salen = NULL; }
- if (newMSA->pplen != NULL) { free(newMSA->pplen); newMSA->pplen = NULL; }
- newMSA->lastidx = -1;
-
- *ret_new = newMSA;
- return eslOK;
-
-ERROR:
- if (newMSA != NULL) esl_msa_Destroy(newMSA);
- *ret_new = NULL;
- return status;
-}
-
-
-/* remove_broken_basepairs_from_ss_string()
-*
-* Given an array <useme> (0..alen-1) of TRUE/FALSE flags, remove
-* any basepair from an SS string that is between alignment
-* columns (i,j) for which either <useme[i-1]> or <useme[j-1]> is FALSE.
-* Helper function for remove_broken_basepairs_from_msa().
-*
-* The input SS string will be overwritten. If it was not in
-* full WUSS format when pass in, it will be upon exit.
-* Note that that means if there's residues in the input ss
-* that correspond to gaps in an aligned sequence or RF
-* annotation, they will not be treated as gaps in the
-* returned SS. For example, a gap may become a '-' character,
-* a '_' character, or a ':' character. I'm not sure how
-* to deal with this in a better way. We could demand an
-* aligned sequence to use to de-gap the SS string, but
-* that would require disallowing any gap to be involved
-* in a basepair, which I'm not sure is something we want
-* to forbid.
-*
-* If the original SS is inconsistent it's left untouched and
-* non-eslOK is returned as listed below.
-*
-* Returns: <eslOK> on success.
-* <eslESYNTAX> if SS string
-* following esl_wuss_nopseudo() is inconsistent.
-* <eslEINVAL> if a derived ct array implies a pknotted
-* SS, this should be impossible.
-*/
-static int
-remove_broken_basepairs_from_ss_string(char *ss, char *errbuf, int len, const int *useme)
-{
- int64_t apos; /* alignment position */
- int *ct = NULL; /* 0..alen-1 base pair partners array for current sequence */
- char *ss_nopseudo = NULL; /* no-pseudoknot version of structure */
- int status;
-
- ESL_ALLOC_WITH_TYPE(ct, int*, sizeof(int) * (len+1));
- ESL_ALLOC_WITH_TYPE(ss_nopseudo, char*, sizeof(char) * (len+1));
-
- esl_wuss_nopseudo(ss, ss_nopseudo);
- if ((status = esl_wuss2ct(ss_nopseudo, len, ct)) != eslOK)
- ESL_FAIL(status, errbuf, "Consensus structure string is inconsistent.");
- for (apos = 1; apos <= len; apos++) {
- if (!(useme[apos-1])) {
- if (ct[apos] != 0) ct[ct[apos]] = 0;
- ct[apos] = 0;
- }
- }
- /* All broken bps removed from ct, convert to WUSS SS string and overwrite SS */
- if ((status = esl_ct2wuss(ct, len, ss)) != eslOK)
- ESL_FAIL(status, errbuf, "Error converting de-knotted bp ct array to WUSS notation.");
-
- free(ss_nopseudo);
- free(ct);
- return eslOK;
-
-ERROR:
- if (ct != NULL) free(ct);
- if (ss_nopseudo != NULL) free(ss_nopseudo);
- return status;
-}
-
-/* remove_broken_basepairs_from_msa()
-*
-* Given an array <useme> (0..alen-1) of TRUE/FALSE flags, remove
-* any basepair from SS_cons and individual SS annotation in alignment
-* columns (i,j) for which either <useme[i-1]> or <useme[j-1]> is FALSE.
-* Called automatically from esl_msa_ColumnSubset() with same <useme>.
-*
-* If the original structure data is inconsistent it's left untouched.
-*
-* Returns: <eslOK> on success.
-* <eslESYNTAX> if WUSS string for SS_cons or msa->ss
-* following esl_wuss_nopseudo() is inconsistent.
-* <eslEINVAL> if a derived ct array implies a pknotted
-* SS, this should be impossible
-*/
-static int
-remove_broken_basepairs_from_msa(ESL_MSA *msa, char *errbuf, const int *useme)
-{
- int status;
- int i;
-
- if (msa->ss_cons != NULL) {
- if((status = remove_broken_basepairs_from_ss_string(msa->ss_cons, errbuf, msa->alen, useme)) != eslOK) return status;
- }
- /* per-seq SS annotation */
- if (msa->ss != NULL) {
- for(i = 0; i < msa->nseq; i++) {
- if (msa->ss[i] != NULL) {
- if((status = remove_broken_basepairs_from_ss_string(msa->ss[i], errbuf, msa->alen, useme)) != eslOK) return status;
- }
- }
- }
- return eslOK;
-}
-
-
-/* Function: esl_msa_ColumnSubset()
-* Synopsis: Remove a selected subset of columns from the MSA
-*
-* Incept: SRE, Sun Feb 27 10:05:07 2005
-* From squid's MSAShorterAlignment(), 1999
-*
-* Purpose: Given an array <useme> (0..alen-1) of TRUE/FALSE flags,
-* where TRUE means "keep this column in the new alignment";
-* remove all columns annotated as FALSE in the <useme>
-* array. This is done in-place on the MSA, so the MSA is
-* modified: <msa->alen> is reduced, <msa->aseq> is shrunk
-* (or <msa->ax>, in the case of a digital mode alignment),
-* and all associated per-residue or per-column annotation
-* is shrunk.
-*
-* Returns: <eslOK> on success.
-* Possibilities from <remove_broken_basepairs_from_msa()> call:
-* <eslESYNTAX> if WUSS string for <SS_cons> or <msa->ss>
-* following <esl_wuss_nopseudo()> is inconsistent.
-* <eslEINVAL> if a derived ct array implies a pknotted SS.
-*/
-int
-esl_msa_ColumnSubset(ESL_MSA *msa, char *errbuf, const int *useme)
-{
- int status;
- int64_t opos; /* position in original alignment */
- int64_t npos; /* position in new alignment */
- int idx; /* sequence index */
- int i; /* markup index */
-
- /* Remove any basepairs from SS_cons and individual sequence SS
- * for aln columns i,j for which useme[i-1] or useme[j-1] are FALSE
- */
- if((status = remove_broken_basepairs_from_msa(msa, errbuf, useme)) != eslOK) return status;
-
- /* Since we're minimizing, we can overwrite in place, within the msa
- * we've already got.
- * opos runs all the way to msa->alen to include (and move) the \0
- * string terminators (or sentinel bytes, in the case of digital mode)
- */
- for (opos = 0, npos = 0; opos <= msa->alen; opos++)
- {
- if (opos < msa->alen && useme[opos] == FALSE) continue;
-
- if (npos != opos) /* small optimization */
- {
- /* The alignment, and per-residue annotations */
- for (idx = 0; idx < msa->nseq; idx++)
- {
-#ifdef eslAUGMENT_ALPHABET
- if (msa->flags & eslMSA_DIGITAL) /* watch off-by-one in dsq indexing */
- msa->ax[idx][npos+1] = msa->ax[idx][opos+1];
- else
- msa->aseq[idx][npos] = msa->aseq[idx][opos];
-#else
- msa->aseq[idx][npos] = msa->aseq[idx][opos];
-#endif /*eslAUGMENT_ALPHABET*/
- if (msa->ss != NULL && msa->ss[idx] != NULL) msa->ss[idx][npos] = msa->ss[idx][opos];
- if (msa->sa != NULL && msa->sa[idx] != NULL) msa->sa[idx][npos] = msa->sa[idx][opos];
- if (msa->pp != NULL && msa->pp[idx] != NULL) msa->pp[idx][npos] = msa->pp[idx][opos];
- for (i = 0; i < msa->ngr; i++)
- if (msa->gr[i][idx] != NULL)
- msa->gr[i][idx][npos] = msa->gr[i][idx][opos];
- }
- /* The per-column annotations */
- if (msa->ss_cons != NULL) msa->ss_cons[npos] = msa->ss_cons[opos];
- if (msa->sa_cons != NULL) msa->sa_cons[npos] = msa->sa_cons[opos];
- if (msa->pp_cons != NULL) msa->pp_cons[npos] = msa->pp_cons[opos];
- if (msa->rf != NULL) msa->rf[npos] = msa->rf[opos];
- for (i = 0; i < msa->ngc; i++)
- msa->gc[i][npos] = msa->gc[i][opos];
- }
- npos++;
- }
- msa->alen = npos-1; /* -1 because npos includes NUL terminators */
- return eslOK;
-}
-
-/* Function: esl_msa_MinimGaps()
-* Synopsis: Remove columns containing all gap symbols.
-* Incept: SRE, Sun Feb 27 11:03:42 2005 [St. Louis]
-*
-* Purpose: Remove all columns in the multiple alignment <msa>
-* that consist entirely of gaps or missing data.
-*
-* For a text mode alignment, <gaps> is a string defining
-* the gap characters, such as <"-_.">. For a digital mode
-* alignment, <gaps> may be passed as <NULL>, because the
-* internal alphabet already knows what the gap and missing
-* data characters are.
-*
-* <msa> is changed in-place to a narrower alignment
-* containing fewer columns. All per-residue and per-column
-* annotation is altered appropriately for the columns that
-* remain in the new alignment.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure.
-* Possibilities from <esl_msa_ColumnSubset()> call:
-* <eslESYNTAX> if WUSS string for <SS_cons> or <msa->ss>
-* following <esl_wuss_nopseudo()> is inconsistent.
-* <eslEINVAL> if a derived ct array implies a pknotted SS.
-*
-* Xref: squid's MSAMingap().
-*/
-int
-esl_msa_MinimGaps(ESL_MSA *msa, char *errbuf, const char *gaps)
-{
- int *useme = NULL; /* array of TRUE/FALSE flags for which cols to keep */
- int64_t apos; /* column index */
- int idx; /* sequence index */
- int status;
-
- ESL_ALLOC_WITH_TYPE(useme, int*, sizeof(int) * (msa->alen+1)); /* +1 is just to deal w/ alen=0 special case */
-
-#ifdef eslAUGMENT_ALPHABET /* digital mode case */
- if (msa->flags & eslMSA_DIGITAL) /* be careful of off-by-one: useme is 0..L-1 indexed */
- {
- for (apos = 1; apos <= msa->alen; apos++)
- {
- for (idx = 0; idx < msa->nseq; idx++)
- if (! esl_abc_XIsGap (msa->abc, msa->ax[idx][apos]) &&
- ! esl_abc_XIsMissing(msa->abc, msa->ax[idx][apos]))
- break;
- if (idx == msa->nseq) useme[apos-1] = FALSE; else useme[apos-1] = TRUE;
- }
- }
-#endif
- if (! (msa->flags & eslMSA_DIGITAL)) /* text mode case */
- {
- for (apos = 0; apos < msa->alen; apos++)
- {
- for (idx = 0; idx < msa->nseq; idx++)
- if (strchr(gaps, msa->aseq[idx][apos]) == NULL)
- break;
- if (idx == msa->nseq) useme[apos] = FALSE; else useme[apos] = TRUE;
- }
- }
-
- if((status = esl_msa_ColumnSubset(msa, errbuf, useme)) != eslOK) return status;
- free(useme);
- return eslOK;
-
-ERROR:
- if (useme != NULL) free(useme);
- return status;
-}
-
-/* Function: esl_msa_NoGaps()
-* Synopsis: Remove columns containing any gap symbol.
-* Incept: SRE, Sun Feb 27 10:17:58 2005 [St. Louis]
-*
-* Purpose: Remove all columns in the multiple alignment <msa> that
-* contain any gaps or missing data, such that the modified
-* MSA consists only of ungapped columns (a solid block of
-* residues).
-*
-* This is useful for filtering alignments prior to
-* phylogenetic analysis using programs that can't deal
-* with gaps.
-*
-* For a text mode alignment, <gaps> is a string defining
-* the gap characters, such as <"-_.">. For a digital mode
-* alignment, <gaps> may be passed as <NULL>, because the
-* internal alphabet already knows what the gap and
-* missing data characters are.
-*
-* <msa> is changed in-place to a narrower alignment
-* containing fewer columns. All per-residue and per-column
-* annotation is altered appropriately for the columns that
-* remain in the new alignment.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure.
-* Possibilities from <esl_msa_ColumnSubset()> call:
-* <eslESYNTAX> if WUSS string for <SS_cons> or <msa->ss>
-* following <esl_wuss_nopseudo()> is inconsistent.
-* <eslEINVAL> if a derived ct array implies a pknotted SS.
-*
-* Xref: squid's MSANogap().
-*/
-int
-esl_msa_NoGaps(ESL_MSA *msa, char *errbuf, const char *gaps)
-{
- int *useme = NULL; /* array of TRUE/FALSE flags for which cols to keep */
- int64_t apos; /* column index */
- int idx; /* sequence index */
- int status;
-
- ESL_ALLOC_WITH_TYPE(useme, int*, sizeof(int) * (msa->alen+1)); /* +1 is only to deal with alen=0 special case */
-
-#ifdef eslAUGMENT_ALPHABET /* digital mode case */
- if (msa->flags & eslMSA_DIGITAL) /* be careful of off-by-one: useme is 0..L-1 indexed */
- {
- for (apos = 1; apos <= msa->alen; apos++)
- {
- for (idx = 0; idx < msa->nseq; idx++)
- if (esl_abc_XIsGap (msa->abc, msa->ax[idx][apos]) ||
- esl_abc_XIsMissing(msa->abc, msa->ax[idx][apos]))
- break;
- if (idx == msa->nseq) useme[apos-1] = TRUE; else useme[apos-1] = FALSE;
- }
- }
-#endif
- if (! (msa->flags & eslMSA_DIGITAL)) /* text mode case */
- {
- for (apos = 0; apos < msa->alen; apos++)
- {
- for (idx = 0; idx < msa->nseq; idx++)
- if (strchr(gaps, msa->aseq[idx][apos]) != NULL)
- break;
- if (idx == msa->nseq) useme[apos] = TRUE; else useme[apos] = FALSE;
- }
- }
-
- esl_msa_ColumnSubset(msa, errbuf, useme);
- free(useme);
- return eslOK;
-
-ERROR:
- if (useme != NULL) free(useme);
- return status;
-}
-
-
-/* Function: esl_msa_SymConvert()
-* Synopsis: Global search/replace of symbols in an MSA.
-* Incept: SRE, Sun Feb 27 11:20:41 2005 [St. Louis]
-*
-* Purpose: In the aligned sequences in a text-mode <msa>, convert any
-* residue in the string <oldsyms> to its counterpart (at the same
-* position) in string <newsyms>.
-*
-* To convert DNA to RNA, <oldsyms> could be "Tt" and
-* <newsyms> could be "Uu". To convert IUPAC symbols to
-* N's, <oldsyms> could be "RYMKSWHBVDrymkswhbvd" and
-* <newsyms> could be "NNNNNNNNNNnnnnnnnnnn".
-*
-* As a special case, if <newsyms> consists of a single
-* character, then any character in the <oldsyms> is
-* converted to this character.
-*
-* Thus, <newsyms> must either be of the same length as
-* <oldsyms>, or of length 1. Anything else will cause
-* undefined behavior (and probably segfault).
-*
-* The conversion is done in-place, so the <msa> is
-* modified.
-*
-* This is a poor man's hack for processing text mode MSAs
-* into a more consistent text alphabet. It is unnecessary
-* for digital mode MSAs, which are already in a standard
-* internal alphabet. Calling <esl_msa_SymConvert()> on a
-* digital mode alignment throws an <eslEINVAL> error.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEINVAL> if <msa> is in digital mode, or if the <oldsyms>
-* and <newsyms> strings aren't valid together.
-*/
-int
-esl_msa_SymConvert(ESL_MSA *msa, const char *oldsyms, const char *newsyms)
-{
- int64_t apos; /* column index */
- int idx; /* sequence index */
- char *sptr;
- int special;
-
- if (msa->flags & eslMSA_DIGITAL)
- ESL_EXCEPTION(eslEINVAL, "can't SymConvert on digital mode alignment");
- if ((strlen(oldsyms) != strlen(newsyms)) && strlen(newsyms) != 1)
- ESL_EXCEPTION(eslEINVAL, "invalid newsyms/oldsyms pair");
-
- special = (strlen(newsyms) == 1 ? TRUE : FALSE);
-
- for (apos = 0; apos < msa->alen; apos++)
- for (idx = 0; idx < msa->nseq; idx++)
- if ((sptr = strchr((char*)oldsyms, msa->aseq[idx][apos])) != NULL)
- msa->aseq[idx][apos] = (special ? *newsyms : newsyms[sptr-oldsyms]);
- return eslOK;
-}
-
-/* Function: esl_msa_AddComment()
-* Incept: SRE, Tue Jun 1 17:37:21 1999 [St. Louis]
-*
-* Purpose: Add an (unparsed) comment line to the MSA structure,
-* allocating as necessary.
-*
-* Args: msa - a multiple alignment
-* s - comment line to add
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure.
-*/
-int
-esl_msa_AddComment(ESL_MSA *msa, char *s)
-{
- void *p;
- int status;
-
- /* If this is our first recorded comment, we need to allocate;
- * and if we've filled available space, we need to reallocate.
- */
- if (msa->comment == NULL) {
- ESL_ALLOC_WITH_TYPE(msa->comment, char**, sizeof(char *) * 16);
- msa->alloc_ncomment = 16;
- }
- if (msa->ncomment == msa->alloc_ncomment) {
- ESL_RALLOC_WITH_TYPE(msa->comment, char**, p, sizeof(char *) * msa->alloc_ncomment * 2);
- msa->alloc_ncomment *= 2;
- }
- if ((status = esl_strdup(s, -1, &(msa->comment[msa->ncomment]))) != eslOK) goto ERROR;
- msa->ncomment++;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-/* Function: esl_msa_AddGF()
-* Incept: SRE, Tue Jun 1 17:37:21 1999 [St. Louis]
-*
-* Purpose: Add an unparsed \verb+#=GF+ markup line to the MSA,
-* allocating as necessary. <tag> is the GF markup
-* tag; <value> is the text associated w/ that tag.
-*
-* Args: msa - a multiple alignment
-* tag - markup tag
-* value - markup text
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure.
-*/
-int
-esl_msa_AddGF(ESL_MSA *msa, char *tag, char *value)
-{
- void *p;
- int n;
- int status;
-
- /* If this is our first recorded unparsed #=GF line, we need to allocate().
- */
- if (msa->gf_tag == NULL) {
- ESL_ALLOC_WITH_TYPE(msa->gf_tag, char**, sizeof(char *) * 16);
- ESL_ALLOC_WITH_TYPE(msa->gf, char**, sizeof(char *) * 16);
- msa->alloc_ngf = 16;
- }
- /* or if we're out of room for new GF's, reallocate by doubling
- */
- if (msa->ngf == msa->alloc_ngf) {
- n = msa->alloc_ngf * 2;
- ESL_RALLOC_WITH_TYPE(msa->gf_tag, char**, p, sizeof(char *) * n);
- ESL_RALLOC_WITH_TYPE(msa->gf, char**, p, sizeof(char *) * n);
- msa->alloc_ngf = n;
- }
-
- if ((status = esl_strdup(tag, -1, &(msa->gf_tag[msa->ngf]))) != eslOK) goto ERROR;
- if ((status = esl_strdup(value, -1, &(msa->gf[msa->ngf]))) != eslOK) goto ERROR;
- msa->ngf++;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-/* Function: esl_msa_AddGS()
-* Incept: SRE, Tue Jun 1 17:37:21 1999 [St. Louis]
-*
-* Purpose: Add an unparsed \verb+#=GS+ markup line to the MSA,
-* allocating as necessary. It's possible that we
-* could get more than one of the same type of GS
-* tag per sequence; for example, "DR PDB;" structure
-* links in Pfam. Hack: handle these by appending to
-* the string, in a \verb+\n+ separated fashion.
-*
-* Args: msa - multiple alignment structure
-* tag - markup tag (e.g. "AC")
-* sqidx - index of sequence to assoc markup with (0..nseq-1)
-* value - markup (e.g. "P00666")
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure.
-*/
-int
-esl_msa_AddGS(ESL_MSA *msa, char *tag, int sqidx, char *value)
-{
- void *p;
- int tagidx;
- int i;
- int status;
-
- /* first GS tag? init&allocate */
- if (msa->gs_tag == NULL)
- {
-#ifdef eslAUGMENT_KEYHASH
- msa->gs_idx = esl_keyhash_Create();
- status = esl_key_Store(msa->gs_idx, tag, &tagidx);
- if (status != eslOK && status != eslEDUP) return status;
- ESL_DASSERT1((tagidx == 0));
-#else
- tagidx = 0;
-#endif
- ESL_ALLOC_WITH_TYPE(msa->gs_tag, char**, sizeof(char *)); /* one at a time. */
- ESL_ALLOC_WITH_TYPE(msa->gs, char***, sizeof(char **));
- ESL_ALLOC_WITH_TYPE(msa->gs[0], char**, sizeof(char *) * msa->sqalloc);
- for (i = 0; i < msa->sqalloc; i++)
- msa->gs[0][i] = NULL;
- }
- else
- {
- /* Get a tagidx for this GS tag.
- * tagidx < ngs; we already saw this tag;
- * tagidx == ngs; this is a new one.
- */
-#ifdef eslAUGMENT_KEYHASH
- status = esl_key_Store(msa->gs_idx, tag, &tagidx);
- if (status != eslOK && status != eslEDUP) return status;
-#else
- for (tagidx = 0; tagidx < msa->ngs; tagidx++)
- if (strcmp(msa->gs_tag[tagidx], tag) == 0) break;
-#endif
- /* Reallocation (in blocks of 1) */
- if (tagidx == msa->ngs )
- {
- ESL_RALLOC_WITH_TYPE(msa->gs_tag, char**, p, (msa->ngs+1) * sizeof(char *));
- ESL_RALLOC_WITH_TYPE(msa->gs, char***, p, (msa->ngs+1) * sizeof(char **));
- ESL_ALLOC_WITH_TYPE(msa->gs[msa->ngs], char**, sizeof(char *) * msa->sqalloc);
- for (i = 0; i < msa->sqalloc; i++)
- msa->gs[msa->ngs][i] = NULL;
- }
- }
-
- /* Store the tag, if it's new.
- */
- if (tagidx == msa->ngs)
- {
- if ((status = esl_strdup(tag, -1, &(msa->gs_tag[tagidx]))) != eslOK) goto ERROR;
- msa->ngs++;
- }
-
- /* Store the annotation on the sequence.
- * If seq is unannotated, dup the value; if
- * seq already has a GS annotation, cat a \n, then cat the value.
- */
- if (msa->gs[tagidx][sqidx] == NULL)
- {
- if ((status = esl_strdup(value, -1, &(msa->gs[tagidx][sqidx]))) != eslOK) goto ERROR;
- }
- else
- {
- int n1,n2;
- n1 = strlen(msa->gs[tagidx][sqidx]);
- n2 = strlen(value);
- ESL_RALLOC_WITH_TYPE(msa->gs[tagidx][sqidx], char*, p, sizeof(char) * (n1+n2+2));
- msa->gs[tagidx][sqidx][n1] = '\n';
- strcpy(msa->gs[tagidx][sqidx]+n1+1, value);
- }
- return eslOK;
-
-ERROR:
- return status;
-}
-
-/* Function: esl_msa_AppendGC()
-* Incept: SRE, Tue Jun 1 17:37:21 1999 [St. Louis]
-*
-* Purpose: Add an unparsed \verb+#=GC+ markup line to the MSA
-* structure, allocating as necessary. When called
-* multiple times for the same tag, appends value
-* strings together -- used when parsing multiblock
-* alignment files, for example.
-*
-* Args: msa - multiple alignment structure
-* tag - markup tag (e.g. "CS")
-* value - markup, one char per aligned column
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure.
-*/
-int
-esl_msa_AppendGC(ESL_MSA *msa, char *tag, char *value)
-{
- int tagidx;
- int status;
- void *p;
-
- /* Is this an unparsed tag name that we recognize?
- * If not, handle adding it to index, and reallocating
- * as needed.
- */
- if (msa->gc_tag == NULL) /* first tag? init&allocate */
- {
-#ifdef eslAUGMENT_KEYHASH
- msa->gc_idx = esl_keyhash_Create();
- status = esl_key_Store(msa->gc_idx, tag, &tagidx);
- if (status != eslOK && status != eslEDUP) return status;
- ESL_DASSERT1((tagidx == 0));
-#else
- tagidx = 0;
-#endif
- ESL_ALLOC_WITH_TYPE(msa->gc_tag, char**, sizeof(char **));
- ESL_ALLOC_WITH_TYPE(msa->gc, char**, sizeof(char **));
- msa->gc[0] = NULL;
- }
- else
- { /* new tag? */
- /* get tagidx for this GC tag. existing tag: <ngc; new: == ngc. */
-#ifdef eslAUGMENT_KEYHASH
- status = esl_key_Store(msa->gc_idx, tag, &tagidx);
- if (status != eslOK && status != eslEDUP) goto ERROR;
-#else
- for (tagidx = 0; tagidx < msa->ngc; tagidx++)
- if (strcmp(msa->gc_tag[tagidx], tag) == 0) break;
-#endif
- /* Reallocate, in block of one tag at a time
- */
- if (tagidx == msa->ngc)
- {
- ESL_RALLOC_WITH_TYPE(msa->gc_tag, char**, p, (msa->ngc+1) * sizeof(char **));
- ESL_RALLOC_WITH_TYPE(msa->gc, char**, p, (msa->ngc+1) * sizeof(char **));
- msa->gc[tagidx] = NULL;
- }
- }
- /* new tag? store it.
- */
- if (tagidx == msa->ngc)
- {
- if ((status = esl_strdup(tag, -1, &(msa->gc_tag[tagidx]))) != eslOK) goto ERROR;
- msa->ngc++;
- }
- return (esl_strcat(&(msa->gc[tagidx]), -1, value, -1));
-
-ERROR:
- return status;
-}
-
-/* Function: esl_msa_AppendGR()
-* Incept: SRE, Thu Jun 3 06:34:38 1999 [Madison]
-*
-* Purpose: Add an unparsed \verb+#=GR+ markup line to the MSA structure,
-* allocating as necessary.
-*
-* When called multiple times for the same tag, appends
-* value strings together -- used when parsing multiblock
-* alignment files, for example.
-*
-* Args: msa - multiple alignment structure
-* tag - markup tag (e.g. "SS")
-* sqidx - index of seq to assoc markup with (0..nseq-1)
-* value - markup, one char per aligned column
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure.
-*/
-int
-esl_msa_AppendGR(ESL_MSA *msa, char *tag, int sqidx, char *value)
-{
- void *p;
- int tagidx;
- int i;
- int status;
-
- if (msa->gr_tag == NULL) /* first tag? init&allocate */
- {
-#ifdef eslAUGMENT_KEYHASH
- msa->gr_idx = esl_keyhash_Create();
- status = esl_key_Store(msa->gr_idx, tag, &tagidx);
- if (status != eslOK && status != eslEDUP) return status;
- ESL_DASSERT1((tagidx == 0));
-#else
- tagidx = 0;
-#endif
- ESL_ALLOC_WITH_TYPE(msa->gr_tag, char**, sizeof(char *));
- ESL_ALLOC_WITH_TYPE(msa->gr, char***, sizeof(char **));
- ESL_ALLOC_WITH_TYPE(msa->gr[0], char**, sizeof(char *) * msa->sqalloc);
- for (i = 0; i < msa->sqalloc; i++)
- msa->gr[0][i] = NULL;
- }
- else
- {
- /* get tagidx for this GR tag. existing<ngr; new=ngr.
- */
-#ifdef eslAUGMENT_KEYHASH
- status = esl_key_Store(msa->gr_idx, tag, &tagidx);
- if (status != eslOK && status != eslEDUP) return status;
-#else
- for (tagidx = 0; tagidx < msa->ngr; tagidx++)
- if (strcmp(msa->gr_tag[tagidx], tag) == 0) break;
-#endif
- /* if a new tag, realloc for it */
- if (tagidx == msa->ngr)
- {
- ESL_RALLOC_WITH_TYPE(msa->gr_tag, char**, p, (msa->ngr+1) * sizeof(char *));
- ESL_RALLOC_WITH_TYPE(msa->gr, char***, p, (msa->ngr+1) * sizeof(char **));
- ESL_ALLOC_WITH_TYPE(msa->gr[msa->ngr], char**, sizeof(char *) * msa->sqalloc);
- for (i = 0; i < msa->sqalloc; i++)
- msa->gr[msa->ngr][i] = NULL;
- }
- }
-
- if (tagidx == msa->ngr)
- {
- if ((status = esl_strdup(tag, -1, &(msa->gr_tag[tagidx]))) != eslOK) goto ERROR;
- msa->ngr++;
- }
- return (esl_strcat(&(msa->gr[tagidx][sqidx]), -1, value, -1));
-
-ERROR:
- return status;
-}
-
-
-/* Function: esl_msa_Checksum()
-* Synopsis: Calculate a checksum for an MSA.
-* Incept: SRE, Tue Sep 16 13:23:34 2008 [Janelia]
-*
-* Purpose: Calculates a 32-bit checksum for <msa>.
-*
-* Only the alignment data are considered, not the sequence
-* names or other annotation. For text mode alignments, the
-* checksum is case sensitive.
-*
-* This is used as a quick way to try to verify that a
-* given alignment is identical to an expected one; for
-* example, when HMMER is mapping new sequence alignments
-* onto exactly the same seed alignment an HMM was built
-* from.
-*
-* Returns: <eslOK> on success.
-*
-* Xref: The checksum is a modified version of Jenkin's hash;
-* see <esl_keyhash> for the original and citations.
-*/
-int
-esl_msa_Checksum(const ESL_MSA *msa, uint32_t *ret_checksum)
-{
- uint32_t val = 0;
- int i,pos;
-
-#ifdef eslAUGMENT_ALPHABET
- if (msa->flags & eslMSA_DIGITAL)
- {
- for (i = 0; i < msa->nseq; i++)
- for (pos = 1; pos <= msa->alen; pos++)
- {
- val += msa->ax[i][pos];
- val += (val << 10);
- val ^= (val >> 6);
- }
- }
-#endif
- if (! (msa->flags & eslMSA_DIGITAL))
- {
- for (i = 0; i < msa->nseq; i++)
- for (pos = 0; pos < msa->alen; pos++)
- {
- val += msa->aseq[i][pos];
- val += (val << 10);
- val ^= (val >> 6);
- }
- }
- val += (val << 3);
- val ^= (val >> 11);
- val += (val << 15);
-
- *ret_checksum = val;
- return eslOK;
-}
-/*-------------------- end of misc MSA functions ----------------------*/
-
-
-
-
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_msa.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_msa.h
deleted file mode 100644
index 2e0f258..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_msa.h
+++ /dev/null
@@ -1,213 +0,0 @@
-/* Multiple sequence alignment file i/o.
-*
- * SVN $Id: esl_msa.h 430 2009-11-04 15:15:15Z nawrockie $
-* SRE, Wed Jan 19 19:16:28 2005
-*/
-#ifndef eslMSA_INCLUDED
-#define eslMSA_INCLUDED
-
-#include <hmmer3/easel/easel.h>
-
-#include <stdio.h>
-
-#ifdef eslAUGMENT_KEYHASH
-#include <hmmer3/easel/esl_keyhash.h>
-#endif
-#ifdef eslAUGMENT_ALPHABET
-#include <hmmer3/easel/esl_alphabet.h>
-#endif
-
-#ifdef eslAUGMENT_SSI
-#include <esl_ssi.h>
-#endif
-
-/* The following constants define the Pfam/Rfam cutoff set we propagate
-* from Stockholm format msa's into HMMER and Infernal models.
-*/
-/*::cexcerpt::msa_cutoffs::begin::*/
-#define eslMSA_TC1 0
-#define eslMSA_TC2 1
-#define eslMSA_GA1 2
-#define eslMSA_GA2 3
-#define eslMSA_NC1 4
-#define eslMSA_NC2 5
-#define eslMSA_NCUTS 6
-/*::cexcerpt::msa_cutoffs::end::*/
-
-/* Object: ESL_MSA
-*
-* A multiple sequence alignment.
-*/
-typedef struct {
- /* Mandatory information associated with the alignment.
- * (The important stuff.)
- */
- /*::cexcerpt::msa_mandatory::begin::*/
- char **aseq; /* alignment itself, [0..nseq-1][0..alen-1] */
- char **sqname; /* sequence names, [0..nseq-1][] */
- double *wgt; /* sequence weights [0..nseq-1] */
- int64_t alen; /* length of alignment (columns); or (if growable) -1 */
- int nseq; /* number of seqs in alignment; or (if growable) blocksize */
- int flags; /* flags for what info has been set */
- /*::cexcerpt::msa_mandatory::end::*/
-
-#ifdef eslAUGMENT_ALPHABET
- /* When augmented w/ digital alphabets, we can store pre-digitized data in
- * ax[][], instead of the text info in aseq[][].
- */
- ESL_ALPHABET *abc; /* reference ptr to alphabet */
- ESL_DSQ **ax; /* digitized aseqs [0..nseq-1][1..alen] */
-#endif
-
- /* Optional information that we understand, and that we might have.
- * (The occasionally useful stuff.)
- */
- /*::cexcerpt::msa_optional::begin::*/
- char *name; /* name of alignment, or NULL */
- char *desc; /* description of alignment, or NULL */
- char *acc; /* accession of alignment, or NULL */
- char *au; /* "author" information, or NULL */
- char *ss_cons; /* consensus sec structure, or NULL; [0..alen-1], even in digital mode */
- char *sa_cons; /* consensus surface access, or NULL; [0..alen-1], even in digital mode */
- char *pp_cons; /* consensus posterior prob, or NULL; [0..alen-1], even in digital mode */
- char *rf; /* reference coord system, or NULL; [0..alen-1], even in digital mode */
- char **sqacc; /* accession numbers for sequences i */
- char **sqdesc; /* description lines for sequences i */
- char **ss; /* per-seq secondary structures, or NULL */
- char **sa; /* per-seq surface accessibilities, or NULL */
- char **pp; /* posterior prob per residue, or NULL */
- float cutoff[eslMSA_NCUTS]; /* NC/TC/GA cutoffs propagated to Pfam/Rfam */
- int cutset[eslMSA_NCUTS]; /* TRUE if a cutoff is set; else FALSE */
- /*::cexcerpt::msa_optional::end::*/
-
- /* Info needed for maintenance of the data structure
- * (internal stuff.)
- */
- int sqalloc; /* # seqs currently allocated for */
- int64_t *sqlen; /* individual seq lengths during parsing */
- int64_t *sslen; /* individual ss lengths during parsing */
- int64_t *salen; /* individual sa lengths during parsing */
- int64_t *pplen; /* individual pp lengths during parsing */
- int lastidx; /* last index we saw; use for guessing next */
-
- /* Optional information, especially Stockholm markup.
- * (The stuff we don't understand, but we can regurgitate.)
- *
- * That is, we know what type of information it is, but it's
- * either (interpreted as) free-text comment, or it's Stockholm
- * markup with unfamiliar tags.
- */
- char **comment; /* free text comments, or NULL */
- int ncomment; /* number of comment lines */
- int alloc_ncomment; /* number of comment lines alloc'ed */
-
- char **gf_tag; /* markup tags for unparsed #=GF lines */
- char **gf; /* annotations for unparsed #=GF lines */
- int ngf; /* number of unparsed #=GF lines */
- int alloc_ngf; /* number of gf lines alloc'ed */
-
- char **gs_tag; /* markup tags for unparsed #=GS lines */
- char ***gs; /* [0..ngs-1][0..nseq-1][free text] markup */
- int ngs; /* number of #=GS tag types */
-
- char **gc_tag; /* markup tags for unparsed #=GC lines */
- char **gc; /* [0..ngc-1][0..alen-1] markup */
- int ngc; /* number of #=GC tag types */
-
- char **gr_tag; /* markup tags for unparsed #=GR lines */
- char ***gr; /* [0..ngr][0..nseq-1][0..alen-1] markup */
- int ngr; /* number of #=GR tag types */
-
-#ifdef eslAUGMENT_KEYHASH
- ESL_KEYHASH *index; /* name ->seqidx hash table */
- ESL_KEYHASH *gs_idx; /* hash of #=GS tag types */
- ESL_KEYHASH *gc_idx; /* hash of #=GC tag types */
- ESL_KEYHASH *gr_idx; /* hash of #=GR tag types */
-#endif /*eslAUGMENT_KEYHASH*/
-
-} ESL_MSA;
-
-
-
-/* Flags for msa->flags
-*/
-#define eslMSA_HASWGTS (1 << 0) /* 1 if wgts were set, 0 if default 1.0's */
-#define eslMSA_DIGITAL (1 << 1) /* if ax[][] is used instead of aseq[][] */
-
-
-/* Alignment file format codes.
-* Must coexist with sqio unaligned file format codes.
-* Rules:
-* - 0 is an unknown/unassigned format
-* - <=100 reserved for unaligned formats
-* - >100 reserved for aligned formats
-*/
-#define eslMSAFILE_UNKNOWN 0 /* unknown format */
-#define eslMSAFILE_STOCKHOLM 101 /* Stockholm format, interleaved */
-#define eslMSAFILE_PFAM 102 /* Pfam/Rfam one-line-per-seq Stockholm format */
-#define eslMSAFILE_A2M 103 /* UCSC SAM's fasta-like a2m format */
-#define eslMSAFILE_PSIBLAST 104 /* NCBI PSI-BLAST alignment format */
-#define eslMSAFILE_SELEX 105 /* old SELEX format (largely obsolete) */
-#define eslMSAFILE_AFA 106 /* aligned FASTA format */
-
-/* Declarations of the API
-*/
-/* 1. The ESL_MSA object */
-extern ESL_MSA *esl_msa_Create(int nseq, int64_t alen);
-extern void esl_msa_Destroy(ESL_MSA *msa);
-extern int esl_msa_Expand(ESL_MSA *msa);
-extern int esl_msa_Copy(const ESL_MSA *msa, ESL_MSA *newMSA);
-extern ESL_MSA *esl_msa_Clone(const ESL_MSA *msa);
-
-extern int esl_msa_SetName (ESL_MSA *msa, const char *name);
-extern int esl_msa_SetDesc (ESL_MSA *msa, const char *desc);
-extern int esl_msa_SetAccession (ESL_MSA *msa, const char *acc);
-extern int esl_msa_SetAuthor (ESL_MSA *msa, const char *author);
-extern int esl_msa_SetSeqName (ESL_MSA *msa, int idx, const char *name);
-extern int esl_msa_SetSeqAccession (ESL_MSA *msa, int idx, const char *acc);
-extern int esl_msa_SetSeqDescription(ESL_MSA *msa, int idx, const char *desc);
-
-extern int esl_msa_FormatName (ESL_MSA *msa, const char *name, ...);
-extern int esl_msa_FormatDesc (ESL_MSA *msa, const char *desc, ...);
-extern int esl_msa_FormatAccession (ESL_MSA *msa, const char *acc, ...);
-extern int esl_msa_FormatAuthor (ESL_MSA *msa, const char *author, ...);
-extern int esl_msa_FormatSeqName (ESL_MSA *msa, int idx, const char *name, ...);
-extern int esl_msa_FormatSeqAccession (ESL_MSA *msa, int idx, const char *acc, ...);
-extern int esl_msa_FormatSeqDescription(ESL_MSA *msa, int idx, const char *desc, ...);
-
-
-/* 3. Digital mode MSA's (augmentation: alphabet) */
-#ifdef eslAUGMENT_ALPHABET
-extern int esl_msa_GuessAlphabet(const ESL_MSA *msa, int *ret_type);
-extern ESL_MSA *esl_msa_CreateDigital(const ESL_ALPHABET *abc, int nseq, int64_t alen);
-extern int esl_msa_Digitize(const ESL_ALPHABET *abc, ESL_MSA *msa, char *errmsg);
-extern int esl_msa_Textize(ESL_MSA *msa);
-#endif
-
-/* 6. Miscellaneous functions for manipulating MSAs */
-extern int esl_msa_ReasonableRF (ESL_MSA *msa, double symfrac, char *rfline);
-extern int esl_msa_MarkFragments(ESL_MSA *msa, double fragthresh);
-extern int esl_msa_SequenceSubset(const ESL_MSA *msa, const int *useme, ESL_MSA **ret_new);
-extern int esl_msa_ColumnSubset(ESL_MSA *msa, char *errbuf, const int *useme);
-extern int esl_msa_MinimGaps(ESL_MSA *msa, char *errbuf, const char *gaps);
-extern int esl_msa_NoGaps(ESL_MSA *msa, char *errbuf, const char *gaps);
-extern int esl_msa_SymConvert(ESL_MSA *msa, const char *oldsyms, const char *newsyms);
-extern int esl_msa_AddComment(ESL_MSA *msa, char *s);
-extern int esl_msa_AddGF(ESL_MSA *msa, char *tag, char *value);
-extern int esl_msa_AddGS(ESL_MSA *msa, char *tag, int sqidx, char *value);
-extern int esl_msa_AppendGC(ESL_MSA *msa, char *tag, char *value);
-extern int esl_msa_AppendGR(ESL_MSA *msa, char *tag, int sqidx, char *value);
-extern int esl_msa_Checksum(const ESL_MSA *msa, uint32_t *ret_checksum);
-
-
-#endif /*eslMSA_INCLUDED*/
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_msacluster.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_msacluster.cpp
deleted file mode 100644
index 966f9fb..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_msacluster.cpp
+++ /dev/null
@@ -1,237 +0,0 @@
-/* Clustering sequences in an MSA by % identity.
-*
-* Table of contents:
-* 1. Single linkage clustering an MSA by %id
-* 2. Internal functions, interface to the clustering API
-* 3. Some internal functions needed for regression tests
-* 7. Copyright and license.
-*
-* Augmentations:
-* eslAUGMENT_ALPHABET: adds support for digital MSAs
-*
-*
-* (Why isn't this just part of the cluster or MSA modules? cluster
-* itself is a core module, dependent only on easel. MSA clustering
-* involves at least the distance, cluster, and msa modules. So we're
-* better off separating its functionality away into a more highly
-* derived module.)
-*
-* SRE, Sun Nov 5 10:06:53 2006 [Janelia]
- * SVN $Id: esl_msacluster.c 393 2009-09-27 12:04:55Z eddys $
-*/
-
-#include <hmmer3/easel/esl_config.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_cluster.h>
-#include <hmmer3/easel/esl_distance.h>
-#include <hmmer3/easel/esl_msa.h>
-#ifdef eslAUGMENT_ALPHABET
-#include <hmmer3/easel/esl_alphabet.h>
-#endif
-
-#include "esl_msacluster.h"
-
-/* These functions are going to get defined in an internal regression
-* testing section further below:
-*/
-#if defined(eslMSACLUSTER_REGRESSION) || defined(eslMSAWEIGHT_REGRESSION)
-#include <ctype.h>
-static double squid_distance(char *s1, char *s2);
-#ifdef eslAUGMENT_ALPHABET
-static double squid_xdistance(ESL_ALPHABET *a, ESL_DSQ *x1, ESL_DSQ *x2);
-#endif
-#endif
-
-/* These functions will define linkage between a pair of text or
-* digital aseq's:
-*/
-static int msacluster_clinkage(const void *v1, const void *v2, const void *p, int *ret_link);
-#ifdef eslAUGMENT_ALPHABET
-static int msacluster_xlinkage(const void *v1, const void *v2, const void *p, int *ret_link);
-#endif
-
-/* In digital mode, we'll need to pass the clustering routine two parameters -
-* %id threshold and alphabet ptr - so make a structure that bundles them.
-*/
-#ifdef eslAUGMENT_ALPHABET
-struct msa_param_s {
- double maxid;
- ESL_ALPHABET *abc;
-};
-#endif
-
-
-/*****************************************************************
-* 1. Single linkage clustering an MSA by %id
-*****************************************************************/
-
-/* Function: esl_msacluster_SingleLinkage()
-* Synopsis: Single linkage clustering by percent identity.
-* Incept: SRE, Sun Nov 5 10:11:45 2006 [Janelia]
-*
-* Purpose: Perform single link clustering of the sequences in
-* multiple alignment <msa>. Any pair of sequences with
-* percent identity $\geq$ <maxid> are linked (using
-* the definition from the \eslmod{distance} module).
-*
-* The resulting clustering is optionally returned in one
-* or more of <opt_c>, <opt_nin>, and <opt_nc>. The
-* <opt_c[0..nseq-1]> array assigns a cluster index
-* <(0..nc-1)> to each sequence. For example, <c[4] = 1>
-* means that sequence 4 is assigned to cluster 1. The
-* <opt_nin[0..nc-1]> array is the number of sequences
-* in each cluster. <opt_nc> is the number of clusters.
-*
-* Importantly, this algorithm runs in $O(N)$ memory, and
-* produces one discrete clustering. Compare to
-* <esl_tree_SingleLinkage()>, which requires an $O(N^2)$
-* adjacency matrix, and produces a hierarchical clustering
-* tree.
-*
-* The algorithm is worst case $O(LN^2)$ time, for $N$
-* sequences of length $L$. However, the worst case is no
-* links at all, and this is unusual. More typically, time
-* scales as about $LN \log N$. The best case scales as
-* $LN$, when there is just one cluster in a completely
-* connected graph.
-*
-* Args: msa - multiple alignment to cluster
-* maxid - pairwise identity threshold: cluster if $\geq$ <maxid>
-* opt_c - optRETURN: cluster assignments for each sequence, [0..nseq-1]
-* opt_nin - optRETURN: number of seqs in each cluster, [0..nc-1]
-* opt_nc - optRETURN: number of clusters
-*
-* Returns: <eslOK> on success; the <opt_c[0..nseq-1]> array contains
-* cluster indices <0..nc-1> assigned to each sequence; the
-* <opt_nin[0..nc-1]> array contains the number of seqs in
-* each cluster; and <opt_nc> contains the number of
-* clusters. The <opt_c> array and <opt_nin> arrays will be
-* allocated here, if non-<NULL>, and must be free'd by the
-* caller. The input <msa> is unmodified.
-*
-* The caller may pass <NULL> for either <opt_c> or
-* <opt_nc> if it is only interested in one of the two
-* results.
-*
-* Throws: <eslEMEM> on allocation failure, and <eslEINVAL> if a pairwise
-* comparison is invalid (which means the MSA is corrupted, so it
-* shouldn't happen). In either case, <opt_c> and <opt_nin> are set to <NULL>
-* and <opt_nc> is set to 0, and the <msa> is unmodified.
-*/
-int
-esl_msacluster_SingleLinkage(const ESL_MSA *msa, double maxid,
- int **opt_c, int **opt_nin, int *opt_nc)
-
-{
- int status;
- int *workspace = NULL;
- int *assignment = NULL;
- int *nin = NULL;
- int nc;
- int i;
-#ifdef eslAUGMENT_ALPHABET
- struct msa_param_s param;
-#endif
-
- /* Allocations */
- ESL_ALLOC_WITH_TYPE(workspace, int*, sizeof(int) * msa->nseq * 2);
- ESL_ALLOC_WITH_TYPE(assignment, int*, sizeof(int) * msa->nseq);
-
- /* call to SLC API: */
- if (! (msa->flags & eslMSA_DIGITAL))
- status = esl_cluster_SingleLinkage((void *) msa->aseq, (size_t) msa->nseq, sizeof(char *),
- msacluster_clinkage, (void *) &maxid,
- workspace, assignment, &nc);
-#ifdef eslAUGMENT_ALPHABET
- else {
- param.maxid = maxid;
- param.abc = msa->abc;
- status = esl_cluster_SingleLinkage((void *) msa->ax, (size_t) msa->nseq, sizeof(ESL_DSQ *),
- msacluster_xlinkage, (void *) ¶m,
- workspace, assignment, &nc);
- }
-#endif
-
- if (opt_nin != NULL)
- {
- ESL_ALLOC_WITH_TYPE(nin, int*, sizeof(int) * nc);
- for (i = 0; i < nc; i++) nin[i] = 0;
- for (i = 0; i < msa->nseq; i++)
- nin[assignment[i]]++;
- *opt_nin = nin;
- }
-
- /* cleanup and return */
- free(workspace);
- if (opt_c != NULL) *opt_c = assignment; else free(assignment);
- if (opt_nc != NULL) *opt_nc = nc;
- return eslOK;
-
-ERROR:
- if (workspace != NULL) free(workspace);
- if (assignment != NULL) free(assignment);
- if (nin != NULL) free(nin);
- if (opt_c != NULL) *opt_c = NULL;
- if (opt_nc != NULL) *opt_nc = 0;
- return status;
-}
-
-
-
-
-
-/*****************************************************************
-* 2. Internal functions, interface to the clustering API
-*****************************************************************/
-
-/* Definition of %id linkage in text-mode aligned seqs (>= maxid): */
-static int
-msacluster_clinkage(const void *v1, const void *v2, const void *p, int *ret_link)
-{
- char *as1 = *(char **) v1;
- char *as2 = *(char **) v2;
- double maxid = *(double *) p;
- double pid;
- int status = eslOK;
-
-#if defined(eslMSACLUSTER_REGRESSION) || defined(eslMSAWEIGHT_REGRESSION)
- pid = 1. - squid_distance(as1, as2);
-#else
- if ((status = esl_dst_CPairId(as1, as2, &pid, NULL, NULL)) != eslOK) return status;
-#endif
-
- *ret_link = (pid >= maxid ? TRUE : FALSE);
- return status;
-}
-
-/* Definition of % id linkage in digital aligned seqs (>= maxid) */
-#ifdef eslAUGMENT_ALPHABET
-static int
-msacluster_xlinkage(const void *v1, const void *v2, const void *p, int *ret_link)
-{
- ESL_DSQ *ax1 = *(ESL_DSQ **) v1;
- ESL_DSQ *ax2 = *(ESL_DSQ **) v2;
- struct msa_param_s *param = (struct msa_param_s *) p;
- double pid;
- int status = eslOK;
-
-#if defined(eslMSACLUSTER_REGRESSION) || defined(eslMSAWEIGHT_REGRESSION)
- pid = 1. - squid_xdistance(param->abc, ax1, ax2);
-#else
- if ( (status = esl_dst_XPairId(param->abc, ax1, ax2, &pid, NULL, NULL)) != eslOK) return status;
-#endif
-
- *ret_link = (pid >= param->maxid ? TRUE : FALSE);
- return status;
-}
-#endif
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_msacluster.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_msacluster.h
deleted file mode 100644
index 4431d7e..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_msacluster.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* Clustering sequences in an MSA by % identity.
-*
-* SRE, Sun Nov 5 10:08:14 2006 [Janelia]
-* SVN $Id: esl_msacluster.h 238 2008-03-28 11:53:19Z eddys $
-*/
-#ifndef ESL_MSACLUSTER_INCLUDED
-#define ESL_MSACLUSTER_INCLUDED
-
-extern int esl_msacluster_SingleLinkage(const ESL_MSA *msa, double maxid,
- int **opt_c, int **opt_nin, int *opt_nc);
-
-#endif /*ESL_MSACLUSTER_INCLUDED*/
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_msaweight.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_msaweight.cpp
deleted file mode 100644
index 63207bd..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_msaweight.cpp
+++ /dev/null
@@ -1,525 +0,0 @@
-/* Sequence weighting algorithms.
-*
-* Implementations of ad hoc sequence weighting algorithms for multiple
-* sequence alignments:
-* GSC weights: Gerstein et al., JMB 236:1067-1078, 1994.
-* PB weights: Henikoff and Henikoff, JMB 243:574-578, 1994.
-* BLOSUM weights: Henikoff and Henikoff, PNAS 89:10915-10919, 1992.
-*
-* Contents:
-* 1. Implementations of weighting algorithms.
-* 8. Copyright notice and license.
-*
- * SVN $Id: esl_msaweight.c 393 2009-09-27 12:04:55Z eddys $
-* SRE, Fri Nov 3 12:55:45 2006 [Janelia]
-*/
-
-#include <hmmer3/easel/esl_config.h>
-
-#include <math.h>
-#include <string.h>
-#include <ctype.h>
-
-/* Dependencies on Easel core: */
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_alphabet.h>
-#include <hmmer3/easel/esl_dmatrix.h>
-#include <hmmer3/easel/esl_msa.h>
-#include <hmmer3/easel/esl_vectorops.h>
-
-/* Dependencies on phylogeny modules: */
-#include <hmmer3/easel/esl_distance.h>
-#include <hmmer3/easel/esl_tree.h>
-#include <hmmer3/easel/esl_msacluster.h>
-#include <hmmer3/easel/esl_msaweight.h>
-
-
-/*****************************************************************
-* 1. Implementations of weighting algorithms
-*****************************************************************/
-
-/* Function: esl_msaweight_GSC()
-* Synopsis: GSC weights.
-* Incept: SRE, Fri Nov 3 13:31:14 2006 [Janelia]
-*
-* Purpose: Given a multiple sequence alignment <msa>, calculate
-* sequence weights according to the
-* Gerstein/Sonnhammer/Chothia algorithm. These weights
-* are stored internally in the <msa> object, replacing
-* any weights that may have already been there. Weights
-* are $\geq 0$ and they sum to <msa->nseq>.
-*
-* The <msa> may be in either digitized or text mode.
-* Digital mode is preferred, so that distance calculations
-* used by the GSC algorithm are robust against degenerate
-* residue symbols.
-*
-* This is an implementation of Gerstein et al., "A method to
-* weight protein sequences to correct for unequal
-* representation", JMB 236:1067-1078, 1994.
-*
-* The algorithm is $O(N^2)$ memory (it requires a pairwise
-* distance matrix) and $O(N^3 + LN^2)$ time ($N^3$ for a UPGMA
-* tree building step, $LN^2$ for distance matrix construction)
-* for an alignment of N sequences and L columns.
-*
-* In the current implementation, the actual memory
-* requirement is dominated by two full NxN distance
-* matrices (one tmp copy in UPGMA, and one here): for
-* 8-byte doubles, that's $16N^2$ bytes. To keep the
-* calculation under memory limits, don't process large
-* alignments: max 1400 sequences for 32 MB, max 4000
-* sequences for 256 MB, max 8000 seqs for 1 GB. Watch
-* out, because Pfam alignments can easily blow this up.
-*
-* Note: Memory usage could be improved. UPGMA consumes a distance
-* matrix, but that can be D itself, not a copy, if the
-* caller doesn't mind the destruction of D. Also, D is
-* symmetrical, so we could use upper or lower triangular
-* matrices if we rewrote dmatrix to allow them.
-*
-* I also think UPGMA can be reduced to O(N^2) time, by
-* being more tricky about rapidly identifying the minimum
-* element: could keep min of each row, and update that,
-* I think.
-*
-* Returns: <eslOK> on success, and the weights inside <msa> have been
-* modified.
-*
-* Throws: <eslEINVAL> if the alignment data are somehow invalid and
-* distance matrices can't be calculated. <eslEMEM> on an
-* allocation error. In either case, the original <msa> is
-* left unmodified.
-*
-* Xref: [Gerstein94]; squid::weight.c::GSCWeights(); STL11/81.
-*/
-int
-esl_msaweight_GSC(ESL_MSA *msa)
-{
- ESL_DMATRIX *D = NULL; /* distance matrix */
- ESL_TREE *T = NULL; /* UPGMA tree */
- double *x = NULL; /* storage per node, 0..N-2 */
- double lw, rw; /* total branchlen on left, right subtrees */
- double lx, rx; /* distribution of weight to left, right side */
- int i; /* counter over nodes */
- int status;
-
- /* Contract checks
- */
- ESL_DASSERT1( (msa != NULL) );
- ESL_DASSERT1( (msa->nseq >= 1) );
- ESL_DASSERT1( (msa->alen >= 1) );
- ESL_DASSERT1( (msa->wgt != NULL) );
- if (msa->nseq == 1) { msa->wgt[0] = 1.0; return eslOK; }
-
- /* GSC weights use a rooted tree with "branch lengths" calculated by
- * UPGMA on a fractional difference matrix - pretty crude.
- */
- if (! (msa->flags & eslMSA_DIGITAL)) {
- if ((status = esl_dst_CDiffMx(msa->aseq, msa->nseq, &D)) != eslOK) goto ERROR;
- }
-#ifdef eslAUGMENT_ALPHABET
- else {
- if ((status = esl_dst_XDiffMx(msa->abc, msa->ax, msa->nseq, &D)) != eslOK) goto ERROR;
- }
-#endif
-
- /* oi, look out here. UPGMA is correct, but old squid library uses
- * single linkage, so for regression tests ONLY, we use single link.
- */
-#ifdef eslMSAWEIGHT_REGRESSION
- if ((status = esl_tree_SingleLinkage(D, &T)) != eslOK) goto ERROR;
-#else
- if ((status = esl_tree_UPGMA(D, &T)) != eslOK) goto ERROR;
-#endif
- esl_tree_SetCladesizes(T);
-
- ESL_ALLOC_WITH_TYPE(x, double*, sizeof(double) * (T->N-1));
-
- /* Postorder traverse (leaves to root) to calculate the total branch
- * length under each internal node; store this in x[]. Remember the
- * total branch length (x[0]) for a future sanity check.
- */
- for (i = T->N-2; i >= 0; i--)
- {
- x[i] = T->ld[i] + T->rd[i];
- if (T->left[i] > 0) x[i] += x[T->left[i]];
- if (T->right[i] > 0) x[i] += x[T->right[i]];
- }
-
- /* Preorder traverse (root to leaves) to calculate the weights. Now
- * we use x[] to mean, the total weight *above* this node that we will
- * apportion to the node's left and right children. The two
- * meanings of x[] never cross: every x[] beneath x[i] is still a
- * total branch length.
- *
- * Because the API guarantees that msa is returned unmodified in case
- * of an exception, and we're touching msa->wgt here, no exceptions
- * may be thrown from now on in this function.
- */
- x[0] = 0; /* initialize: no branch to the root. */
- for (i = 0; i <= T->N-2; i++)
- {
- lw = T->ld[i]; if (T->left[i] > 0) lw += x[T->left[i]];
- rw = T->rd[i]; if (T->right[i] > 0) rw += x[T->right[i]];
-
- if (lw+rw == 0.)
- {
- /* A special case arises in GSC weights when all branch lengths in a subtree are 0.
- * In this case, all seqs in this clade should get equal weights, sharing x[i] equally.
- * So, split x[i] in proportion to cladesize, not to branch weight.
- */
- if (T->left[i] > 0) lx = x[i] * ((double) T->cladesize[T->left[i]] / (double) T->cladesize[i]);
- else lx = x[i] / (double) T->cladesize[i];
-
- if (T->right[i] > 0) rx = x[i] * ((double) T->cladesize[T->right[i]] / (double) T->cladesize[i]);
- else rx = x[i] / (double) T->cladesize[i];
- }
- else /* normal case: x[i] split in proportion to branch weight. */
- {
- lx = x[i] * lw/(lw+rw);
- rx = x[i] * rw/(lw+rw);
- }
-
- if (T->left[i] <= 0) msa->wgt[-(T->left[i])] = lx + T->ld[i];
- else x[T->left[i]] = lx + T->ld[i];
-
- if (T->right[i] <= 0) msa->wgt[-(T->right[i])] = rx + T->rd[i];
- else x[T->right[i]] = rx + T->rd[i];
- }
-
- /* Renormalize weights to sum to N.
- */
- esl_vec_DNorm(msa->wgt, msa->nseq);
- esl_vec_DScale(msa->wgt, msa->nseq, (double) msa->nseq);
- msa->flags |= eslMSA_HASWGTS;
-
- free(x);
- esl_tree_Destroy(T);
- esl_dmatrix_Destroy(D);
- return eslOK;
-
-ERROR:
- if (x != NULL) free(x);
- if (T != NULL) esl_tree_Destroy(T);
- if (D != NULL) esl_dmatrix_Destroy(D);
- return status;
-}
-
-
-/* Function: esl_msaweight_PB()
-* Synopsis: PB (position-based) weights.
-* Incept: SRE, Sun Nov 5 08:59:28 2006 [Janelia]
-*
-* Purpose: Given a multiple alignment <msa>, calculate sequence
-* weights according to the position-based weighting
-* algorithm (Henikoff and Henikoff, JMB 243:574-578,
-* 1994). These weights are stored internally in the <msa>
-* object, replacing any weights that may have already been
-* there. Weights are $\geq 0$ and they sum to <msa->nseq>.
-*
-* The <msa> may be in either digitized or text mode.
-* Digital mode is preferred, so that the algorithm
-* deals with degenerate residue symbols properly.
-*
-* The Henikoffs' algorithm does not give rules for dealing
-* with gaps or degenerate residue symbols. The rule here
-* is to ignore them. This means that longer sequences
-* initially get more weight; hence a "double
-* normalization" in which the weights are first divided by
-* sequence length in canonical residues (to compensate for
-* that effect), then normalized to sum to nseq.
-*
-* An advantage of the PB method is efficiency.
-* It is $O(1)$ in memory and $O(NL)$ time, for an alignment of
-* N sequences and L columns. This makes it a good method
-* for ad hoc weighting of very deep alignments.
-*
-* When the alignment is in simple text mode, IUPAC
-* degenerate symbols are not dealt with correctly; instead,
-* the algorithm simply uses the 26 letters as "residues"
-* (case-insensitively), and treats all other residues as
-* gaps.
-*
-* Returns: <eslOK> on success, and the weights inside <msa> have been
-* modified.
-*
-* Throws: <eslEMEM> on allocation error, in which case <msa> is
-* returned unmodified.
-*
-* Xref: [Henikoff94b]; squid::weight.c::PositionBasedWeights().
-*/
-int
-esl_msaweight_PB(ESL_MSA *msa)
-{
- int *nres = NULL; /* counts of each residue observed in a column */
- int ntotal; /* number of different symbols observed in a column */
- int rlen; /* number of residues in a sequence */
- int idx, pos, i;
- int K; /* alphabet size */
- int status;
-
- /* Contract checks
- */
- ESL_DASSERT1( (msa->nseq >= 1) );
- ESL_DASSERT1( (msa->alen >= 1) );
- if (msa->nseq == 1) { msa->wgt[0] = 1.0; return eslOK; }
-
- /* Initialize
- */
- if (! (msa->flags & eslMSA_DIGITAL))
- { ESL_ALLOC_WITH_TYPE(nres, int*, sizeof(int) * 26); K = 26; }
-#ifdef eslAUGMENT_ALPHABET
- else
- { ESL_ALLOC_WITH_TYPE(nres, int*, sizeof(int) * msa->abc->K); K = msa->abc->K; }
-#endif
-
- esl_vec_DSet(msa->wgt, msa->nseq, 0.);
-
- /* This section handles text alignments */
- if (! (msa->flags & eslMSA_DIGITAL))
- {
- for (pos = 0; pos < msa->alen; pos++)
- {
- /* Collect # of letters A..Z in this column, and total */
- esl_vec_ISet(nres, K, 0.);
- for (idx = 0; idx < msa->nseq; idx++)
- if (isalpha((int) msa->aseq[idx][pos]))
- nres[toupper((int) msa->aseq[idx][pos]) - 'A'] ++;
- for (ntotal = 0, i = 0; i < K; i++) if (nres[i] > 0) ntotal++;
-
- /* Bump weight on each seq by PB rule */
- if (ntotal > 0) {
- for (idx = 0; idx < msa->nseq; idx++) {
- if (isalpha((int) msa->aseq[idx][pos]))
- msa->wgt[idx] += 1. /
- (double) (ntotal * nres[toupper((int) msa->aseq[idx][pos]) - 'A'] );
- }
- }
- }
-
- /* first normalization by # of residues counted in each seq */
- for (idx = 0; idx < msa->nseq; idx++) {
- for (rlen = 0, pos = 0; pos < msa->alen; pos++)
- if (isalpha((int) msa->aseq[idx][pos])) rlen++;
- if (ntotal > 0) msa->wgt[idx] /= (double) rlen;
- /* if rlen == 0 for this seq, its weight is still 0.0, as initialized. */
- }
- }
-
- /* This section handles digital alignments. */
-#ifdef eslAUGMENT_ALPHABET
- else
- {
- for (pos = 1; pos <= msa->alen; pos++)
- {
- /* Collect # of residues 0..K-1 in this column, and total # */
- esl_vec_ISet(nres, K, 0.);
- for (idx = 0; idx < msa->nseq; idx++)
- if (esl_abc_XIsCanonical(msa->abc, msa->ax[idx][pos]))
- nres[(int) msa->ax[idx][pos]] ++;
- for (ntotal = 0, i = 0; i < K; i++) if (nres[i] > 0) ntotal++;
-
- /* Bump weight on each sequence by PB rule */
- if (ntotal > 0) {
- for (idx = 0; idx < msa->nseq; idx++) {
- if (esl_abc_XIsCanonical(msa->abc, msa->ax[idx][pos]))
- msa->wgt[idx] += 1. / (double) (ntotal * nres[msa->ax[idx][pos]]);
- }
- }
- }
-
- /* first normalization by # of residues counted in each seq */
- for (idx = 0; idx < msa->nseq; idx++)
- {
- for (rlen = 0, pos = 1; pos <= msa->alen; pos++)
- if (esl_abc_XIsCanonical(msa->abc, msa->ax[idx][pos])) rlen++;
- if (rlen > 0) msa->wgt[idx] /= (double) rlen;
- /* if rlen == 0 for this seq, its weight is still 0.0, as initialized. */
- }
- }
-#endif
-
- /* Make weights normalize up to nseq, and return. In pathological
- * case where all wgts were 0 (no seqs contain any unambiguous
- * residues), weights become 1.0.
- */
- esl_vec_DNorm(msa->wgt, msa->nseq);
- esl_vec_DScale(msa->wgt, msa->nseq, (double) msa->nseq);
- msa->flags |= eslMSA_HASWGTS;
-
- free(nres);
- return eslOK;
-
-ERROR:
- if (nres != NULL) free(nres);
- return status;
-}
-
-
-/* Function: esl_msaweight_BLOSUM()
-* Synopsis: BLOSUM weights.
-* Incept: SRE, Sun Nov 5 09:52:41 2006 [Janelia]
-*
-* Purpose: Given a multiple sequence alignment <msa> and an identity
-* threshold <maxid>, calculate sequence weights using the
-* BLOSUM algorithm (Henikoff and Henikoff, PNAS
-* 89:10915-10919, 1992). These weights are stored
-* internally in the <msa> object, replacing any weights
-* that may have already been there. Weights are $\geq 0$
-* and they sum to <msa->nseq>.
-*
-* The algorithm does a single linkage clustering by
-* fractional id, defines clusters such that no two clusters
-* have a pairwise link $\geq$ <maxid>), and assigns
-* weights of $\frac{1}{M_i}$ to each of the $M_i$
-* sequences in each cluster $i$. The <maxid> threshold
-* is a fractional pairwise identity, in the range
-* $0..1$.
-*
-* The <msa> may be in either digitized or text mode.
-* Digital mode is preferred, so that the pairwise identity
-* calculations deal with degenerate residue symbols
-* properly.
-*
-* Returns: <eslOK> on success, and the weights inside <msa> have been
-* modified.
-*
-* Throws: <eslEMEM> on allocation error. <eslEINVAL> if a pairwise
-* identity calculation fails because of corrupted sequence
-* data. In either case, the <msa> is unmodified.
-*
-* Xref: [Henikoff92]; squid::weight.c::BlosumWeights().
-*/
-int
-esl_msaweight_BLOSUM(ESL_MSA *msa, double maxid)
-{
- int *c = NULL; /* cluster assignments for each sequence */
- int *nmem = NULL; /* number of seqs in each cluster */
- int nc; /* number of clusters */
- int i; /* loop counter */
- int status;
-
- /* Contract checks
- */
- ESL_DASSERT1( (maxid >= 0. && maxid <= 1.) );
- ESL_DASSERT1( (msa->nseq >= 1) );
- ESL_DASSERT1( (msa->alen >= 1) );
- if (msa->nseq == 1) { msa->wgt[0] = 1.0; return eslOK; }
-
- if ((status = esl_msacluster_SingleLinkage(msa, maxid, &c, NULL, &nc)) != eslOK) goto ERROR;
- ESL_ALLOC_WITH_TYPE(nmem, int*, sizeof(int) * nc);
- esl_vec_ISet(nmem, nc, 0);
- for (i = 0; i < msa->nseq; i++) nmem[c[i]]++;
- for (i = 0; i < msa->nseq; i++) msa->wgt[i] = 1. / (double) nmem[c[i]];
-
- /* Make weights normalize up to nseq, and return.
- */
- esl_vec_DNorm(msa->wgt, msa->nseq);
- esl_vec_DScale(msa->wgt, msa->nseq, (double) msa->nseq);
- msa->flags |= eslMSA_HASWGTS;
-
- free(nmem);
- free(c);
- return eslOK;
-
-ERROR:
- if (c != NULL) free(c);
- if (nmem != NULL) free(nmem);
- return status;
-}
-
-/* Function: esl_msaweight_IDFilter()
-* Synopsis: Filter by %ID.
-* Incept: ER, Wed Oct 29 10:06:43 2008 [Janelia]
-*
-* Purpose: Constructs a new alignment by removing near-identical
-* sequences from a given alignment (where identity is
-* calculated *based on the alignment*).
-* Does not affect the given alignment.
-* Keeps earlier sequence, discards later one.
-*
-* Usually called as an ad hoc sequence "weighting" mechanism.
-*
-* Limitations:
-* Unparsed Stockholm markup is not propagated into the
-* new alignment.
-*
-* Return: <eslOK> on success, and the <newmsa>.
-*
-* Throws: <eslEMEM> on allocation error. <eslEINVAL> if a pairwise
-* identity calculation fails because of corrupted sequence
-* data. In either case, the <msa> is unmodified.
-*
-* Xref: squid::weight.c::FilterAlignment().
-*/
-//int
-//esl_msaweight_IDFilter(const ESL_MSA *msa, double maxid, ESL_MSA **ret_newmsa)
-//{
-// int *list = NULL; /* array of seqs in new msa */
-// int *useme = NULL; /* TRUE if seq is kept in new msa */
-// int nnew; /* number of seqs in new alignment */
-// double ident; /* pairwise percentage id */
-// int i,j; /* seqs counters*/
-// int remove; /* TRUE if sq is to be removed */
-// int status;
-//
-// /* Contract checks
-// */
-// ESL_DASSERT1( (msa != NULL) );
-// ESL_DASSERT1( (msa->nseq >= 1) );
-// ESL_DASSERT1( (msa->alen >= 1) );
-//
-// /* allocate */
-// ESL_ALLOC(list, sizeof(int) * msa->nseq);
-// ESL_ALLOC(useme, sizeof(int) * msa->nseq);
-// esl_vec_ISet(useme, msa->nseq, 0); /* initialize array */
-//
-// /* find which seqs to keep (list) */
-// nnew = 0;
-// for (i = 0; i < msa->nseq; i++)
-// {
-// remove = FALSE;
-// for (j = 0; j < nnew; j++)
-// {
-// if (! (msa->flags & eslMSA_DIGITAL)) {
-// if ((status = esl_dst_CPairId(msa->aseq[i], msa->aseq[list[j]], &ident, NULL, NULL)) != eslOK) goto ERROR;
-// }
-//#ifdef eslAUGMENT_ALPHABET
-// else {
-// if ((status = esl_dst_XPairId(msa->abc, msa->ax[i], msa->ax[list[j]], &ident, NULL, NULL)) != eslOK) goto ERROR;
-// }
-//#endif
-//
-// if (ident > maxid)
-// {
-// remove = TRUE;
-// break;
-// }
-// }
-// if (remove == FALSE) {
-// list[nnew++] = i;
-// useme[i] = TRUE;
-// }
-// }
-// if ((status = esl_msa_SequenceSubset(msa, useme, ret_newmsa)) != eslOK) goto ERROR;
-//
-// free(list);
-// free(useme);
-// return eslOK;
-//
-//ERROR:
-// if (list != NULL) free(list);
-// if (useme != NULL) free(useme);
-// return status;
-//}
-/*---------------- end, weighting implementations ----------------*/
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_msaweight.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_msaweight.h
deleted file mode 100644
index 18b5c17..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_msaweight.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* esl_msaweight.h
-* Sequence weighting algorithms.
-*
-* SVN $Id: esl_msaweight.h 302 2008-10-30 14:26:46Z eddys $
-* SRE, Sun Nov 5 09:11:13 2006 [Janelia]
-*/
-#ifndef ESL_MSAWEIGHT_INCLUDED
-#define ESL_MSAWEIGHT_INCLUDED
-
-#include <hmmer3/easel/esl_msa.h>
-
-extern int esl_msaweight_GSC(ESL_MSA *msa);
-extern int esl_msaweight_PB(ESL_MSA *msa);
-extern int esl_msaweight_BLOSUM(ESL_MSA *msa, double maxid);
-//extern int esl_msaweight_IDFilter(const ESL_MSA *msa, double maxid, ESL_MSA **ret_newmsa);
-
-
-#endif /*ESL_MSAWEIGHT_INCLUDED*/
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_random.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_random.cpp
deleted file mode 100644
index d4256d0..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_random.cpp
+++ /dev/null
@@ -1,736 +0,0 @@
-/* Portable, threadsafe random number generators.
-* Provides both a fast generator and a strong generator.
-*
-* 1. The ESL_RANDOMNESS object.
-* 2. The generators and esl_random().
-* 3. Other fundamental sampling (including Gaussian, gamma).
-* 4. Multinomial sampling from discrete probability n-vectors.
-*
-* See http://csrc.nist.gov/rng/ for the NIST random number
-* generation test suite.
-*
-* SRE, Wed Jul 14 10:54:46 2004 [St. Louis]
-* SVN $Id: esl_random.c 350 2009-06-21 14:13:57Z eddys $
-*
-* SRE, 30 May 2009: replaced with the Mersenne Twister and Knuth LCG.
-*/
-#include <hmmer3/easel/esl_config.h>
-
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <math.h>
-#include <time.h>
-
-#include <hmmer3/easel/easel.h>
-#include "esl_random.h"
-
-static uint32_t choose_arbitrary_seed(void);
-static uint32_t jenkins_mix3(uint32_t a, uint32_t b, uint32_t c);
-static uint32_t knuth (ESL_RANDOMNESS *r);
-static uint32_t mersenne_twister (ESL_RANDOMNESS *r);
-static void mersenne_seed_table(ESL_RANDOMNESS *r, uint32_t seed);
-static void mersenne_fill_table(ESL_RANDOMNESS *r);
-
-/*****************************************************************
-*# 1. The <ESL_RANDOMNESS> object.
-*****************************************************************/
-
-/* Function: esl_randomness_Create()
-* Synopsis: Create the default strong random number generator.
-* Incept: SRE, Wed Jul 14 13:02:18 2004 [St. Louis]
-*
-* Purpose: Create a random number generator using
-* a given random seed. The <seed> must be $\geq 0$.
-*
-* The default random number generator uses the Mersenne
-* Twister MT19937 algorithm \citep{Matsumoto98}. It has a
-* period of $2^{19937}-1$, and equidistribution over
-* $2^{32}$ values.
-*
-* If <seed> is $>0$, the random number generator is
-* reproducibly initialized with that seed. Two RNGs
-* created with the same nonzero seed will give exactly the
-* same stream of pseudorandom numbers. This allows you to
-* make reproducible stochastic simulations, for example.
-*
-* If <seed> is 0, an arbitrary seed is chosen.
-* Internally, the arbitrary seed is produced by a
-* combination of the current <time()> and the process id
-* (if available; POSIX only). Two RNGs created with
-* <seed>=0 will very probably (but not assuredly) give
-* different streams of pseudorandom numbers. The true seed
-* can be retrieved from the <ESL_RANDOMNESS> object using
-* <esl_randomness_GetSeed()>. The strategy used for
-* choosing the arbitrary seed is predictable, so it is
-* not secure in any sense, especially in the cryptographic
-* sense.
-*
-* Args: seed $>= 0$.
-*
-* Returns: an initialized <ESL_RANDOMNESS *> on success.
-* Caller free's with <esl_randomness_Destroy()>.
-*
-* Throws: <NULL> on failure.
-*
-* Xref: STL8/p57.
-* J5/21: Mersenne Twister.
-*/
-ESL_RANDOMNESS *
-esl_randomness_Create(uint32_t seed)
-{
- ESL_RANDOMNESS *r = NULL;
- int status;
-
- ESL_ALLOC_WITH_TYPE(r, ESL_RANDOMNESS*, sizeof(ESL_RANDOMNESS));
- r->type = eslRND_MERSENNE;
- r->mti = 0;
- r->x = 0;
- r->seed = 0;
- esl_randomness_Init(r, seed);
- return r;
-
-ERROR:
- return NULL;
-}
-
-/* Function: esl_randomness_CreateFast()
-* Synopsis: Create the alternative fast generator.
-* Incept: SRE, Sat May 30 06:35:23 2009 [Stockholm]
-*
-* Purpose: Same as <esl_randomness_Create()>, except that a simple
-* linear congruential generator will be used.
-*
-* This is a $(a=69069, c=1)$ LCG, with a period of
-* $2^{32}$. Because of the relatively short period, this
-* generator should not be used for serious simulations
-* involving large samples.
-*
-* The properties of this generator are not as good as the
-* default Mersenne Twister, but it is faster, especially
-* if you only need a small number of samples from the
-* generator; it is about 20x faster to initialize the
-* generator, and about 25\% faster to sample a number,
-* compared to the default.
-*
-* Args: seed $>= 0$.
-*
-* Returns: an initialized <ESL_RANDOMNESS *> on success.
-* Caller free's with <esl_randomness_Destroy()>.
-*
-* Throws: <NULL> on failure.
-*
-* Xref: J5/44: for accidental proof that the period is
-* indeed 2^32.
-*/
-ESL_RANDOMNESS *
-esl_randomness_CreateFast(uint32_t seed)
-{
- ESL_RANDOMNESS *r = NULL;
- int status;
-
- ESL_ALLOC_WITH_TYPE(r, ESL_RANDOMNESS*, sizeof(ESL_RANDOMNESS));
- r->type = eslRND_FAST;
- r->mti = 0;
- r->x = 0;
- r->seed = 0;
- esl_randomness_Init(r, seed);
- return r;
-
-ERROR:
- return NULL;
-}
-
-
-/* Function: esl_randomness_CreateTimeseeded()
-* Synopsis: Create an RNG with a quasirandom seed.
-* Incept: SRE, Wed Jul 14 11:22:54 2004 [St. Louis]
-*
-* Purpose: Like <esl_randomness_Create()>, but it initializes the
-* the random number generator using a POSIX <time()> call
-* (number of seconds since the POSIX epoch).
-*
-* This function is deprecated. Use
-* <esl_randomness_Create(0)> instead.
-*
-* Returns: an initialized <ESL_RANDOMNESS *> on success.
-* Caller free's with <esl_randomness_Destroy()>.
-*
-* Throws: <NULL> on failure.
-*
-* Xref: STL8/p57.
-*/
-ESL_RANDOMNESS *
-esl_randomness_CreateTimeseeded(void)
-{
- return esl_randomness_Create(0);
-}
-
-
-/* Function: esl_randomness_Init()
-* Synopsis: Reinitialize a RNG.
-* Incept: SRE, Wed Jul 14 13:13:05 2004 [St. Louis]
-*
-* Purpose: Reset and reinitialize an existing <ESL_RANDOMNESS>
-* object with a new seed.
-*
-* Not generally recommended. This does not make a
-* sequence of numbers more random, and may make it less
-* so. Sometimes, though, it's useful to reseed an RNG
-* to guarantee a particular reproducible series of
-* pseudorandom numbers at an arbitrary point in a program;
-* HMMER does this, for example, to guarantee the same
-* results from the same HMM/sequence comparison regardless
-* of where in a search the HMM or sequence occurs.
-*
-* Args: r - randomness object
-* seed - new seed to use; >0.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEINVAL> if seed is $<= 0$.
-*
-* Xref: STL8/p57.
-*/
-int
-esl_randomness_Init(ESL_RANDOMNESS *r, uint32_t seed)
-{
- if (seed == 0) seed = choose_arbitrary_seed();
- if (r->type == eslRND_MERSENNE)
- {
- mersenne_seed_table(r, seed);
- mersenne_fill_table(r);
- }
- else
- {
- r->seed = seed;
- r->x = jenkins_mix3(seed, 87654321, 12345678); /* arbitrary dispersion of the seed */
- if (r->x == 0) r->x = 42; /* make sure we don't have a zero */
- }
- return eslOK;
-}
-
-/* Function: esl_randomness_GetSeed()
-* Synopsis: Returns the value of RNG's seed.
-* Incept: SRE, Wed May 23 17:02:59 2007 [Janelia]
-*
-* Purpose: Return the value of the seed.
-*/
-uint32_t
-esl_randomness_GetSeed(const ESL_RANDOMNESS *r)
-{
- return r->seed;
-}
-
-
-/* Function: esl_randomness_Destroy()
-* Synopsis: Free an RNG.
-* Incept: SRE, Wed Jul 14 13:19:08 2004 [St. Louis]
-*
-* Purpose: Frees an <ESL_RANDOMNESS> object.
-*/
-void
-esl_randomness_Destroy(ESL_RANDOMNESS *r)
-{
- free(r);
- return;
-}
-
-/*----------- end of ESL_RANDOMNESS object functions --------------*/
-
-
-
-/*****************************************************************
-*# 2. The generators and <esl_random()>
-*****************************************************************/
-
-/* Function: esl_random()
-* Synopsis: Generate a uniform random deviate on [0,1)
-* Incept: SRE, Sat May 30 05:01:45 2009 [Stockholm]
-*
-* Purpose: Returns a uniform deviate x, $0.0 <= x < 1.0$, given
-* RNG <r>.
-*
-* Uses the original Mersenne Twister algorithm, MT19937
-* [Matsumoto98]. This generator has a period of $2^19937 -
-* 1$. It generates uniformly distributed variates on the
-* interval $0..2^32-1$.
-*
-* Notes: Easel previously used a reimplementation of ran2() from
-* Numerical Recipes in C, which uses L'Ecuyer's algorithm
-* for combining output of two linear congruential
-* generators, plus a Bays-Durham shuffle \citep{Press93}.
-* MT is about 10x faster.
-*
-* Returns: uniformly distribute random deviate on interval
-* $0.0 \leq x < 1.0$
-*
-* Throws: (no abnormal error conditions)
-*/
-double
-esl_random(ESL_RANDOMNESS *r)
-{
- uint32_t x = (r->type == eslRND_MERSENNE) ? mersenne_twister(r) : knuth(r);
- return ((double) x / 4294967296.0); /* 2^32: normalizes to [0,1) */
-}
-
-
-static uint32_t
-knuth(ESL_RANDOMNESS *r)
-{
- r->x *= 69069;
- r->x += 1;
- return r->x;
-}
-
-/* mersenne_twister() and other mersenne_*() functions below:
-* A simple serial implementation of the original Mersenne Twister
-* algorithm [Matsumoto98].
-*
-* There are more sophisticated and faster implementations of MT, using
-* vector instructions and/or directly generating IEEE754 doubles
-* bitwise rather than doing an expensive normalization. We can
-* improve the implementation later if necessary, but even the basic
-* MT offers ~10x speed improvement over Easel's previous RNG.
-* [SRE, 30 May 09, Stockholm]
-*/
-static uint32_t
-mersenne_twister(ESL_RANDOMNESS *r)
-{
- uint32_t x;
- if (r->mti >= 624) mersenne_fill_table(r);
-
- x = r->mt[r->mti++];
- x ^= (x>>11);
- x ^= (x<< 7) & 0x9d2c5680;
- x ^= (x<<15) & 0xefc60000;
- x ^= (x>>18);
- return x;
-}
-
-/* mersenne_seed_table()
-* Initialize the state of the RNG from a seed.
-* Uses the knuth linear congruential generator.
-*/
-static void
-mersenne_seed_table(ESL_RANDOMNESS *r, uint32_t seed)
-{
- int z;
-
- r->seed = seed;
- r->mt[0] = seed;
- for (z = 1; z < 624; z++)
- r->mt[z] = 69069 * r->mt[z-1];
- return;
-}
-
-/* mersenne_fill_table()
-* Refill the table with 624 new random numbers.
-* We do this whenever we've reseeded, or when we
-* run out of numbers.
-*/
-static void
-mersenne_fill_table(ESL_RANDOMNESS *r)
-{
- uint32_t y;
- int z;
- static uint32_t mag01[2] = { 0x0, 0x9908b0df };
-
- for (z = 0; z < 227; z++) /* 227 = N-M = 624-397 */
- {
- y = (r->mt[z] & 0x80000000) | (r->mt[z+1] & 0x7fffffff);
- r->mt[z] = r->mt[z+397] ^ (y>>1) ^ mag01[y & 0x1];
- }
- for (; z < 623; z++)
- {
- y = (r->mt[z] & 0x80000000) | (r->mt[z+1] & 0x7fffffff);
- r->mt[z] = r->mt[z-227] ^ (y>>1) ^ mag01[y & 0x1];
- }
- y = (r->mt[623] & 0x80000000) | (r->mt[0] & 0x7fffffff);
- r->mt[623] = r->mt[396] ^ (y>>1) ^ mag01[y & 0x1];
- r->mti = 0;
- return;
-}
-
-
-/* choose_arbitrary_seed()
-* Return a 'quasirandom' seed > 0.
-* This could be a *lot* better than it is now; see RFC1750
-* for a discussion of securely seeding RNGs.
-*/
-static uint32_t
-choose_arbitrary_seed(void)
-{
- uint32_t a = (uint32_t) time ((time_t *) NULL);
- uint32_t b = 87654321; /* anything nonzero */
- uint32_t c = 12345678; /* anything nonzero. jenkins' mix3 needs 3 numbers; add an arbitrary one */
- uint32_t seed;
-#ifdef HAVE_GETPID
- b = (uint32_t) getpid(); /* preferable b choice, if we have POSIX getpid(); else both b,c are arbitrary */
-#endif
- seed = jenkins_mix3(a,b,c); /* try to decorrelate closely spaced choices of pid/time */
-
- return (seed == 0) ? 42 : seed; /* 42 is entirely arbitrary, just to avoid seed==0. */
-}
-
-/* jenkins_mix3()
-*
-* from Bob Jenkins: given a,b,c, generate a number that's distributed
-* reasonably uniformly on the interval 0..2^32-1 even for closely
-* spaced choices of a,b,c.
-*/
-static uint32_t
-jenkins_mix3(uint32_t a, uint32_t b, uint32_t c)
-{
- a -= b; a -= c; a ^= (c>>13);
- b -= c; b -= a; b ^= (a<<8);
- c -= a; c -= b; c ^= (b>>13);
- a -= b; a -= c; a ^= (c>>12);
- b -= c; b -= a; b ^= (a<<16);
- c -= a; c -= b; c ^= (b>>5);
- a -= b; a -= c; a ^= (c>>3);
- b -= c; b -= a; b ^= (a<<10);
- c -= a; c -= b; c ^= (b>>15);
- return c;
-}
-/*----------- end of esl_random() --------------*/
-
-
-
-/*****************************************************************
-*# 3. Other fundamental sampling (including Gaussian, gamma)
-*****************************************************************/
-
-/* Function: esl_rnd_UniformPositive()
-* Synopsis: Generate a uniform positive random deviate on interval (0,1).
-* Incept: SRE, Wed Jul 14 13:31:23 2004 [St. Louis]
-*
-* Purpose: Same as <esl_random()>, but assure $0 < x < 1$;
-* (positive uniform deviate).
-*/
-double
-esl_rnd_UniformPositive(ESL_RANDOMNESS *r)
-{
- double x;
- do { x = esl_random(r); } while (x == 0.0);
- return x;
-}
-
-
-/* Function: esl_rnd_Gaussian()
-* Synopsis: Generate a Gaussian-distributed sample.
-* Incept: SRE, Wed Jul 14 13:50:36 2004 [St. Louis]
-*
-* Purpose: Pick a Gaussian-distributed random variable
-* with a given <mean> and standard deviation <stddev>, and
-* return it.
-*
-* Implementation is derived from the public domain
-* RANLIB.c <gennor()> function, written by Barry W. Brown
-* and James Lovato (M.D. Anderson Cancer Center, Texas
-* USA) using the method described in
-* \citep{AhrensDieter73}.
-*
-* Method: Impenetrability of the code is to be blamed on
-* FORTRAN/f2c lineage.
-*
-* Args: r - ESL_RANDOMNESS object
-* mean - mean of the Gaussian we're sampling from
-* stddev - standard deviation of the Gaussian
-*/
-double
-esl_rnd_Gaussian(ESL_RANDOMNESS *r, double mean, double stddev)
-{
- long i;
- double snorm,u,s,ustar,aa,w,y,tt;
-
- /* These static's are threadsafe: they are magic constants
- * we will not touch.
- */
- static const double a[32] = {
- 0.0,3.917609E-2,7.841241E-2,0.11777,0.1573107,0.1970991,0.2372021,0.2776904,
- 0.3186394,0.36013,0.4022501,0.4450965,0.4887764,0.5334097,0.5791322,
- 0.626099,0.6744898,0.7245144,0.7764218,0.8305109,0.8871466,0.9467818,
- 1.00999,1.077516,1.150349,1.229859,1.318011,1.417797,1.534121,1.67594,
- 1.862732,2.153875
- };
- static const double d[31] = {
- 0.0,0.0,0.0,0.0,0.0,0.2636843,0.2425085,0.2255674,0.2116342,0.1999243,
- 0.1899108,0.1812252,0.1736014,0.1668419,0.1607967,0.1553497,0.1504094,
- 0.1459026,0.14177,0.1379632,0.1344418,0.1311722,0.128126,0.1252791,
- 0.1226109,0.1201036,0.1177417,0.1155119,0.1134023,0.1114027,0.1095039
- };
- static const double t[31] = {
- 7.673828E-4,2.30687E-3,3.860618E-3,5.438454E-3,7.0507E-3,8.708396E-3,
- 1.042357E-2,1.220953E-2,1.408125E-2,1.605579E-2,1.81529E-2,2.039573E-2,
- 2.281177E-2,2.543407E-2,2.830296E-2,3.146822E-2,3.499233E-2,3.895483E-2,
- 4.345878E-2,4.864035E-2,5.468334E-2,6.184222E-2,7.047983E-2,8.113195E-2,
- 9.462444E-2,0.1123001,0.136498,0.1716886,0.2276241,0.330498,0.5847031
- };
- static const double h[31] = {
- 3.920617E-2,3.932705E-2,3.951E-2,3.975703E-2,4.007093E-2,4.045533E-2,
- 4.091481E-2,4.145507E-2,4.208311E-2,4.280748E-2,4.363863E-2,4.458932E-2,
- 4.567523E-2,4.691571E-2,4.833487E-2,4.996298E-2,5.183859E-2,5.401138E-2,
- 5.654656E-2,5.95313E-2,6.308489E-2,6.737503E-2,7.264544E-2,7.926471E-2,
- 8.781922E-2,9.930398E-2,0.11556,0.1404344,0.1836142,0.2790016,0.7010474
- };
-
- u = esl_random(r);
- s = 0.0;
- if(u > 0.5) s = 1.0;
- u += (u-s);
- u = 32.0*u;
- i = (long) (u);
- if(i == 32) i = 31;
- if(i == 0) goto S100;
- /*
- * START CENTER
- */
- ustar = u-(double)i;
- aa = a[i-1];
-S40:
- if (ustar <= t[i-1]) goto S60;
- w = (ustar - t[i-1]) * h[i-1];
-S50:
- /*
- * EXIT (BOTH CASES)
- */
- y = aa+w;
- snorm = y;
- if(s == 1.0) snorm = -y;
- return (stddev*snorm + mean);
-S60:
- /*
- * CENTER CONTINUED
- */
- u = esl_random(r);
- w = u*(a[i]-aa);
- tt = (0.5*w+aa)*w;
- goto S80;
-S70:
- tt = u;
- ustar = esl_random(r);
-S80:
- if(ustar > tt) goto S50;
- u = esl_random(r);
- if(ustar >= u) goto S70;
- ustar = esl_random(r);
- goto S40;
-S100:
- /*
- * START TAIL
- */
- i = 6;
- aa = a[31];
- goto S120;
-S110:
- aa += d[i-1];
- i += 1;
-S120:
- u += u;
- if(u < 1.0) goto S110;
- u -= 1.0;
-S140:
- w = u*d[i-1];
- tt = (0.5*w+aa)*w;
- goto S160;
-S150:
- tt = u;
-S160:
- ustar = esl_random(r);
- if(ustar > tt) goto S50;
- u = esl_random(r);
- if(ustar >= u) goto S150;
- u = esl_random(r);
- goto S140;
-}
-
-
-
-/* subfunctions that esl_rnd_Gamma() is going to call:
-*/
-static double
-gamma_ahrens(ESL_RANDOMNESS *r, double a) /* for a >= 3 */
-{
- double V; /* uniform deviates */
- double X,Y;
- double test;
-
- do {
- do { /* generate candidate X */
- Y = tan(eslCONST_PI * esl_random(r));
- X = Y * sqrt(2.*a -1.) + a - 1.;
- } while (X <= 0.);
- /* accept/reject X */
- V = esl_random(r);
- test = (1+Y*Y) * exp( (a-1.)* log(X/(a-1.)) - Y*sqrt(2.*a-1.));
- } while (V > test);
- return X;
-}
-static double
-gamma_integer(ESL_RANDOMNESS *r, unsigned int a) /* for small integer a, a < 12 */
-{
- int i;
- double U,X;
-
- U = 1.;
- for (i = 0; i < (int)a; i++)
- U *= esl_rnd_UniformPositive(r);
- X = -log(U);
-
- return X;
-}
-static double
-gamma_fraction(ESL_RANDOMNESS *r, double a) /* for fractional a, 0 < a < 1 */
-{ /* Knuth 3.4.1, exercise 16, pp. 586-587 */
- double p, U, V, X, q;
-
- p = eslCONST_E / (a + eslCONST_E);
- do {
- U = esl_random(r);
- V = esl_rnd_UniformPositive(r);
- if (U < p) {
- X = pow(V, 1./a);
- q = exp(-X);
- } else {
- X = 1. - log(V);
- q = pow(X, a-1.);
- }
- U = esl_random(r);
- } while (U >= q);
- return X;
-}
-
-
-/* Function: esl_rnd_Gamma()
-* Synopsis: Returns a random deviate from a Gamma(a, 1) distribution.
-* Incept: SRE, Wed Apr 17 13:10:03 2002 [St. Louis]
-*
-* Purpose: Return a random deviate distributed as Gamma(a, 1.)
-* \citep[pp. 133--134]{Knu-81a}.
-*
-* The implementation follows not only Knuth \citep{Knu-81a},
-* but also relied on examination of the implementation in
-* the GNU Scientific Library (libgsl) \citep{Galassi06}.
-*
-* Args: r - random number generation seed
-* a - order of the gamma function; a > 0
-*
-* Throws: <eslEINVAL> for $a <= 0$.
-*/
-double
-esl_rnd_Gamma(ESL_RANDOMNESS *r, double a)
-{
- double aint;
-
- aint = floor(a);
- if (a == aint && a < 12.)
- return gamma_integer(r, (unsigned int) a);
- else if (a > 3.)
- return gamma_ahrens(r, a);
- else if (a < 1.)
- return gamma_fraction(r, a);
- else
- return gamma_integer(r, aint) + gamma_fraction(r, a-aint);
- return eslOK;
-}
-
-
-/*****************************************************************
-*# 4. Multinomial sampling from discrete probability n-vectors
-*****************************************************************/
-
-/* Function: esl_rnd_DChoose()
-* Synopsis: Return random choice from discrete multinomial distribution.
-*
-* Purpose: Make a random choice from a normalized discrete
-* distribution <p> of <N> elements, where <p>
-* is double-precision. Returns the index of the
-* selected element, $0..N-1$.
-*
-* <p> must be a normalized probability distribution
-* (i.e. must sum to one). Sampling distribution is
-* undefined otherwise: that is, a choice will always
-* be returned, but it might be an arbitrary one.
-*
-* All $p_i$ must be $>>$ <DBL_EPSILON> in order to
-* have a non-zero probability of being sampled.
-*
-* <esl_rnd_FChoose()> is the same, but for floats in <p>.
-*
-* Note: Why the while (1) loop? Very rarely, because of machine
-* floating point representation, our roll is "impossibly"
-* >= total sum, even though any roll of esl_random() is
-* < 1.0 and the total sum is supposed to be 1.0 by
-* definition. This can happen when the total_sum is not
-* really 1.0, but something just less than that in the
-* machine representation, and the roll happens to also be
-* very very close to 1. I have not examined this analytically,
-* but empirically, it occurs at a frequency of about 1/10^8
-* as measured for bug #sq5... which suggests it is on the
-* order of machine epsilon (not surprisingly). The while
-* loop makes you go around and try again; it must eventually
-* succeed.
-*
-* The while() loop then makes the function vulnerable to
-* an infinite loop if <p> sums to <=0 -- which shouldn't
-* happen, but we shouldn't infinite loop if it does,
-* either. That's why there's a check on the sum of
-* <p>. We return -1 in this case, a non-standard error code
-* for Easel.
-*
-* Throws: -1 on failure. (This is a non-standard error code for Easel,
-* but the only way an error can happen is if <p> isn't a
-* normalized probability distribution.)
-*/
-int
-esl_rnd_DChoose(ESL_RANDOMNESS *r, const double *p, int N)
-{
- double roll; /* random fraction */
- double sum; /* integrated prob */
- int i; /* counter over the probs */
-
- roll = esl_random(r);
- sum = 0.0;
-
- while (1) { /* see note in header about this while() */
- for (i = 0; i < N; i++)
- {
- sum += p[i];
- if (roll < sum) return i; /* success! */
- }
- if (sum < 0.99) ESL_EXCEPTION(-1, "unnormalized distribution"); /* avoid inf loop */
- }
- /*UNREACHED*/
- ESL_EXCEPTION(-1, "unreached code was reached. universe collapses.");
-}
-int
-esl_rnd_FChoose(ESL_RANDOMNESS *r, const float *p, int N)
-{
- float roll; /* random fraction */
- float sum; /* integrated prob */
- int i; /* counter over the probs */
-
- roll = esl_random(r);
- sum = 0.0;
-
- while (1) { /* see note in header about this while() */
- for (i = 0; i < N; i++)
- {
- sum += p[i];
- if (roll < sum) return i; /* success */
- }
- if (sum < 0.99) ESL_EXCEPTION(-1, "unnormalized distribution"); /* avoid inf loop */
- }
- /*UNREACHED*/
- ESL_EXCEPTION(-1, "unreached code was reached. universe collapses.");
-}
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_random.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_random.h
deleted file mode 100644
index 1cd7018..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_random.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/* Easel's portable, threadsafe random number generator.
-*
-* SRE, Wed Jul 14 11:23:57 2004 [St. Louis]
- * SVN $Id: esl_random.h 408 2009-10-19 17:47:46Z eddys $
-*/
-#ifndef ESL_RANDOM_INCLUDED
-#define ESL_RANDOM_INCLUDED
-
-#define eslRND_FAST 0
-#define eslRND_MERSENNE 1
-
-typedef struct {
- int type; /* eslRND_FAST | eslRND_MERSENNE */
- int mti; /* current position in mt[] table */
- uint32_t mt[624]; /* state of the Mersenne Twister */
- uint32_t x; /* state of the Knuth generator */
- uint32_t seed; /* seed used to init the RNG */
-} ESL_RANDOMNESS;
-
-/* esl_rnd_Roll(a) chooses a uniformly distributed integer
-* in the range 0..a-1, given an initialized ESL_RANDOMNESS r.
-*/
-#define esl_rnd_Roll(r, a) ((int) (esl_random(r) * (a)))
-
-/* 1. The ESL_RANDOMNESS object.
-*/
-extern ESL_RANDOMNESS *esl_randomness_Create(uint32_t seed);
-extern ESL_RANDOMNESS *esl_randomness_CreateFast(uint32_t seed);
-extern ESL_RANDOMNESS *esl_randomness_CreateTimeseeded(void); /* DEPRECATED */
-extern void esl_randomness_Destroy(ESL_RANDOMNESS *r);
-extern int esl_randomness_Init(ESL_RANDOMNESS *r, uint32_t seed);
-extern uint32_t esl_randomness_GetSeed(const ESL_RANDOMNESS *r);
-
-/* 2. The generator, esl_random().
-*/
-extern double esl_random(ESL_RANDOMNESS *r);
-
-/* 3. Other fundamental sampling (including Gaussian, gamma).
-*/
-extern double esl_rnd_UniformPositive(ESL_RANDOMNESS *r);
-extern double esl_rnd_Gaussian(ESL_RANDOMNESS *r, double mean, double stddev);
-extern double esl_rnd_Gamma(ESL_RANDOMNESS *r, double a);
-
-/* 4. Multinomial sampling from discrete probability n-vectors.
-*/
-extern int esl_rnd_DChoose(ESL_RANDOMNESS *r, const double *p, int N);
-extern int esl_rnd_FChoose(ESL_RANDOMNESS *r, const float *p, int N);
-
-
-#endif /*ESL_RANDOM_INCLUDED*/
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_randomseq.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_randomseq.cpp
deleted file mode 100644
index 7767344..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_randomseq.cpp
+++ /dev/null
@@ -1,1154 +0,0 @@
-/* Generating, shuffling, and randomizing sequences.
-*
-* Contents:
-* 1. Generating iid sequences.
-* 2. Shuffling sequences.
-* 3. Randomizing sequences.
-* 4. Generating iid sequences (digital mode).
-* 5. Shuffling sequences (digital mode).
-* 6. Randomizing sequences (digital mode).
-* 11. Copyright and license information
-*
-* SRE, Thu Apr 24 08:59:26 2008 [Janelia]
-* SVN $Id$
-*/
-#include "esl_config.h"
-
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <math.h>
-#include <time.h>
-
-#include "easel.h"
-#include "esl_random.h"
-#include "esl_randomseq.h"
-#ifdef eslAUGMENT_ALPHABET
-#include "esl_alphabet.h"
-#endif
-
-/*****************************************************************
-*# 1. Generating iid sequences.
-*****************************************************************/
-
-/* Function: esl_rsq_IID()
-* Synopsis: Generate an iid random text sequence.
-* Incept: SRE, Thu Aug 5 09:03:03 2004 [St. Louis]
-*
-* Purpose: Generate a <NUL>-terminated i.i.d. symbol string of length <L>,
-* $0..L-1$, and leave it in <s>. The symbol alphabet is given
-* as a string <alphabet> of <K> total symbols, and the iid
-* probability of each residue is given in <p>. The caller
-* must provide an <s> that is allocated for at least
-* <(L+1)*sizeof(char)>, room for <L> residues and the <NUL> terminator.
-*
-* <esl_rsq_fIID()> does the same, but for a floating point
-* probability vector <p>, rather than a double precision
-* vector.
-*
-* Args: r - ESL_RANDOMNESS object
-* alphabet - e.g. "ACGT"
-* p - probability distribution [0..n-1]
-* K - number of symbols in alphabet
-* L - length of generated sequence
-* s - the generated sequence.
-* Caller allocated, >= (L+1) * sizeof(char).
-*
-* Return: <eslOK> on success.
-*/
-int
-esl_rsq_IID(ESL_RANDOMNESS *r, const char *alphabet, const double *p, int K, int L, char *s)
-{
- int x;
-
- for (x = 0; x < L; x++)
- s[x] = alphabet[esl_rnd_DChoose(r,p,K)];
- s[x] = '\0';
- return eslOK;
-}
-int
-esl_rsq_fIID(ESL_RANDOMNESS *r, const char *alphabet, const float *p, int K, int L, char *s)
-{
- int x;
-
- for (x = 0; x < L; x++)
- s[x] = alphabet[esl_rnd_FChoose(r,p,K)];
- s[x] = '\0';
- return eslOK;
-}
-/*------------ end, generating iid sequences --------------------*/
-
-
-/*****************************************************************
-*# 2. Shuffling sequences.
-*****************************************************************/
-
-/* Function: esl_rsq_CShuffle()
-* Synopsis: Shuffle a text sequence.
-* Incept: SRE, Fri Feb 23 08:17:50 2007 [Casa de Gatos]
-*
-* Purpose: Returns a shuffled version of <s> in <shuffled>, given
-* a source of randomness <r>.
-*
-* Caller provides allocated storage for <shuffled>, for at
-* least the same length as <s>.
-*
-* <shuffled> may also point to the same storage as <s>,
-* in which case <s> is shuffled in place.
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_rsq_CShuffle(ESL_RANDOMNESS *r, const char *s, char *shuffled)
-{
- int L, i;
- char c;
-
- L = strlen(s);
- if (shuffled != s) strcpy(shuffled, s);
- while (L > 1) {
- i = esl_rnd_Roll(r, L);
- c = shuffled[i];
- shuffled[i] = shuffled[L-1];
- shuffled[L-1] = c;
- L--;
- }
- return eslOK;
-}
-
-/* Function: esl_rsq_CShuffleDP()
-* Synopsis: Shuffle a text sequence, preserving diresidue composition.
-* Incept: SRE, Fri Feb 23 08:56:03 2007 [Casa de Gatos]
-*
-* Purpose: Given string <s>, and a source of randomness <r>,
-* returns shuffled version in <shuffled>. The shuffle
-* is a "doublet-preserving" (DP) shuffle which
-* shuffles a sequence while exactly preserving both mono-
-* and di-symbol composition.
-*
-* <s> may only consist of alphabetic characters [a-zA-Z].
-* The shuffle is done case-insensitively. The shuffled
-* string result is all upper case.
-*
-* Caller provides storage in <shuffled> of at least the
-* same length as <s>.
-*
-* <shuffled> may also point to the same storage as <s>,
-* in which case <s> is shuffled in place.
-*
-* The algorithm does an internal allocation of a
-* substantial amount of temporary storage, on the order of
-* <26 * strlen(s)>, so an allocation failure is possible
-* if <s> is long enough.
-*
-* The algorithm is a search for a random Eulerian walk on
-* a directed multigraph \citep{AltschulErickson85}.
-*
-* If <s> is of length 2 or less, this is a no-op, and
-* <shuffled> is a copy of <s>.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEINVAL> if <s> contains nonalphabetic characters.
-* <eslEMEM> on allocation failure.
-*/
-int
-esl_rsq_CShuffleDP(ESL_RANDOMNESS *r, const char *s, char *shuffled)
-{
- int status; /* Easel return status code */
- int len; /* length of s */
- int pos; /* a position in s or shuffled */
- int x,y; /* indices of two characters */
- char **E = NULL; /* edge lists: E[0] is the edge list from vertex A */
- int *nE = NULL; /* lengths of edge lists */
- int *iE = NULL; /* positions in edge lists */
- int n; /* tmp: remaining length of an edge list to be shuffled */
- char sf; /* last character in shuffled */
- char Z[26]; /* connectivity in last edge graph Z */
- int keep_connecting; /* flag used in Z connectivity algorithm */
- int is_eulerian; /* flag used for when we've got a good Z */
-
- /* First, verify that the string is entirely alphabetic. */
- len = strlen(s);
- for (pos = 0; pos < len; pos++)
- if (! isalpha((int) s[pos]))
- ESL_EXCEPTION(eslEINVAL, "String contains nonalphabetic characters");
-
- /* The edge case of len <= 2 */
- if (len <= 2)
- {
- if (s != shuffled) strcpy(shuffled, s);
- return eslOK;
- }
-
- /* Allocations. */
- ESL_ALLOC_WITH_TYPE(E, char**, sizeof(char *) * 26); for (x = 0; x < 26; x++) E[x] = NULL;
- ESL_ALLOC_WITH_TYPE(nE, int*, sizeof(int) * 26); for (x = 0; x < 26; x++) nE[x] = 0;
- ESL_ALLOC_WITH_TYPE(iE, int*, sizeof(int) * 26); for (x = 0; x < 26; x++) iE[x] = 0;
- for (x = 0; x < 26; x++)
- ESL_ALLOC_WITH_TYPE(E[x], char*, sizeof(char) * (len-1));
-
- /* "(1) Construct the doublet graph G and edge ordering E
- * corresponding to S."
- *
- * Note that these also imply the graph G; and note,
- * for any list x with nE[x] = 0, vertex x is not part
- * of G.
- */
- x = toupper((int) s[0]) - 'A';
- for (pos = 1; pos < len; pos++)
- {
- y = toupper((int) s[pos]) - 'A';
- E[x][nE[x]] = y;
- nE[x]++;
- x = y;
- }
-
- /* Now we have to find a random Eulerian edge ordering. */
- sf = toupper((int) s[len-1]) - 'A';
- is_eulerian = 0;
- while (! is_eulerian)
- {
- /* "(2) For each vertex s in G except s_f, randomly select
- * one edge from the s edge list of E(S) to be the
- * last edge of the s list in a new edge ordering."
- *
- * select random edges and move them to the end of each
- * edge list.
- */
- for (x = 0; x < 26; x++)
- {
- if (nE[x] == 0 || x == sf) continue;
- pos = esl_rnd_Roll(r, nE[x]);
- ESL_SWAP(E[x][pos], E[x][nE[x]-1], char);
- }
-
- /* "(3) From this last set of edges, construct the last-edge
- * graph Z and determine whether or not all of its
- * vertices are connected to s_f."
- *
- * a probably stupid algorithm for looking at the
- * connectivity in Z: iteratively sweep through the
- * edges in Z, and build up an array (confusing called Z[x])
- * whose elements are 1 if x is connected to sf, else 0.
- */
- for (x = 0; x < 26; x++) Z[x] = 0;
- Z[(int) sf] = keep_connecting = 1;
-
- while (keep_connecting) {
- keep_connecting = 0;
- for (x = 0; x < 26; x++) {
- if (nE[x] == 0) continue;
- y = E[x][nE[x]-1]; /* xy is an edge in Z */
- if (Z[x] == 0 && Z[y] == 1) { /* x is connected to sf in Z */
- Z[x] = 1;
- keep_connecting = 1;
- }
- }
- }
-
- /* if any vertex in Z is tagged with a 0, it's
- * not connected to sf, and we won't have a Eulerian
- * walk.
- */
- is_eulerian = 1;
- for (x = 0; x < 26; x++) {
- if (nE[x] == 0 || x == sf) continue;
- if (Z[x] == 0) {
- is_eulerian = 0;
- break;
- }
- }
-
- /* "(4) If any vertex is not connected in Z to s_f, the
- * new edge ordering will not be Eulerian, so return to
- * (2). If all vertices are connected in Z to s_f,
- * the new edge ordering will be Eulerian, so
- * continue to (5)."
- *
- * e.g. note infinite loop while is_eulerian is FALSE.
- */
- }
-
- /* "(5) For each vertex s in G, randomly permute the remaining
- * edges of the s edge list of E(S) to generate the s
- * edge list of the new edge ordering E(S')."
- *
- * Essentially a StrShuffle() on the remaining nE[x]-1 elements
- * of each edge list; unfortunately our edge lists are arrays,
- * not strings, so we can't just call out to StrShuffle().
- */
- for (x = 0; x < 26; x++)
- for (n = nE[x] - 1; n > 1; n--)
- {
- pos = esl_rnd_Roll(r, n);
- ESL_SWAP(E[x][pos], E[x][n-1], char);
- }
-
- /* "(6) Construct sequence S', a random DP permutation of
- * S, from E(S') as follows. Start at the s_1 edge list.
- * At each s_i edge list, add s_i to S', delete the
- * first edge s_i,s_j of the edge list, and move to
- * the s_j edge list. Continue this process until
- * all edge lists are exhausted."
- */
- pos = 0;
- x = toupper((int) s[0]) - 'A';
- while (1)
- {
- shuffled[pos++] = 'A'+ x; /* add s_i to S' */
-
- y = E[x][iE[x]];
- iE[x]++; /* "delete" s_i,s_j from edge list */
-
- x = y; /* move to s_j edge list. */
-
- if (iE[x] == nE[x])
- break; /* the edge list is exhausted. */
- }
- shuffled[pos++] = 'A' + sf;
- shuffled[pos] = '\0';
-
- /* Reality checks.
- */
- if (x != sf) ESL_XEXCEPTION(eslEINCONCEIVABLE, "hey, you didn't end on s_f.");
- if (pos != len) ESL_XEXCEPTION(eslEINCONCEIVABLE, "hey, pos (%d) != len (%d).", pos, len);
-
- /* Free and return.
- */
- esl_Free2D((void **) E, 26);
- free(nE);
- free(iE);
- return eslOK;
-
-ERROR:
- esl_Free2D((void **) E, 26);
- if (nE != NULL) free(nE);
- if (iE != NULL) free(iE);
- return status;
-}
-
-/* Function: esl_rsq_CShuffleKmers()
-* Synopsis: Shuffle k-mers in a text sequence.
-* Incept: SRE, Tue Nov 17 16:55:57 2009 [NHGRI retreat, Gettysburg]
-*
-* Purpose: Consider a text sequence <s> as a string of nonoverlapping
-* k-mers of length <K>. Shuffle the k-mers, given a random
-* number generator <r>. Put the shuffled sequence in
-* <shuffled>.
-*
-* If the length of <s> is not evenly divisible by <K>, the
-* remaining residues are left (unshuffled) as a prefix to
-* the shuffled k-mers.
-*
-* For example, shuffling ABCDEFGHIJK as k=3-mers might
-* result in ABFIJKFGHCDE.
-*
-* Caller provides allocated storage for <shuffled>,
-* for at least the same length as <s>.
-*
-* <shuffled> may also point to the same storage as <s>,
-* in which case <s> is shuffled in place.
-*
-* There is almost no formally justifiable reason why you'd
-* use this shuffle -- it's not like it preserves any
-* particularly well-defined statistical properties of the
-* sequence -- but it's a quick and dirty way to sort of
-* maybe possibly preserve some higher-than-monomer
-* statistics.
-*
-* Args: r - an <ESL_RANDOMNESS> random generator
-* s - sequence to shuffle
-* K - size of k-mers to break <s> into
-* shuffled - RESULT: the shuffled sequence
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation error.
-*/
-int
-esl_rsq_CShuffleKmers(ESL_RANDOMNESS *r, const char *s, int K, char *shuffled)
-{
- int L = strlen(s);
- int W = L / K; /* number of kmers "words" excluding leftover prefix */
- int P = L % K; /* leftover residues in prefix */
- int i;
- char *swap = NULL;
- int status;
-
- if (shuffled != s) strcpy(shuffled, s);
- ESL_ALLOC_WITH_TYPE(swap,char*, sizeof(char) * K);
- while (W > 1)
- {
- i = esl_rnd_Roll(r, W); /* pick a word */
- strncpy(swap, shuffled + P + i*K, K); /* copy it to tmp space */
- strncpy(shuffled + P + i*K, shuffled + P + (W-1)*K, K); /* move word W-1 to i */
- strncpy(shuffled + P + (W-1)*K, swap, K); /* move word i to W-1 */
- W--;
- }
- free(swap);
- return eslOK;
-
-ERROR:
- free(swap);
- return status;
-}
-
-/* Function: esl_rsq_CReverse()
-* Synopsis: Reverse a string.
-* Incept: SRE, Sat Feb 24 10:06:34 2007 [Casa de Gatos]
-*
-* Purpose: Returns a reversed version of <s> in <rev>.
-*
-* There are no restrictions on the symbols that <s>
-* might contain.
-*
-* Caller provides storage in <rev> for at least
-* <(strlen(s)+1)*sizeof(char)>.
-*
-* <s> and <rev> can point to the same storage, in which
-* case <s> is reversed in place.
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_rsq_CReverse(const char *s, char *rev)
-{
- int L, i;
- char c;
-
- L = strlen(s);
- for (i = 0; i < L/2; i++)
- { /* swap ends */
- c = s[L-i-1];
- rev[L-i-1] = s[i];
- rev[i] = c;
- }
- if (L%2) { rev[i] = s[i]; } /* don't forget middle residue in odd-length s */
- rev[L] = '\0';
- return eslOK;
-}
-
-/* Function: esl_rsq_CShuffleWindows()
-* Synopsis: Shuffle local windows of a text string.
-* Incept: SRE, Sat Feb 24 10:17:59 2007 [Casa de Gatos]
-*
-* Purpose: Given string <s>, shuffle residues in nonoverlapping
-* windows of width <w>, and put the result in <shuffled>.
-* See [Pearson88].
-*
-* <s> and <shuffled> can be identical to shuffle in place.
-*
-* Caller provides storage in <shuffled> for at least
-* <(strlen(s)+1)*sizeof(char)>.
-*
-* Args: s - string to shuffle in windows
-* w - window size (typically 10 or 20)
-* shuffled - allocated space for window-shuffled result.
-*
-* Return: <eslOK> on success.
-*/
-int
-esl_rsq_CShuffleWindows(ESL_RANDOMNESS *r, const char *s, int w, char *shuffled)
-{
- int L;
- char c;
- int i, j, k;
-
- L = strlen(s);
- if (shuffled != s) strcpy(shuffled, s);
- for (i = 0; i < L; i += w)
- for (j = ESL_MIN(L-1, i+w-1); j > i; j--)
- {
- k = i + esl_rnd_Roll(r, j-i);
- c = shuffled[k]; /* semantics of a j,k swap, because we might be shuffling in-place */
- shuffled[k] = shuffled[j];
- shuffled[j] = c;
- }
- return eslOK;
-}
-/*------------------ end, shuffling sequences -------------------*/
-
-
-
-/*****************************************************************
-*# 3. Randomizing sequences
-*****************************************************************/
-
-/* Function: esl_rsq_CMarkov0()
-* Synopsis: Generate new text string of same 0th order Markov properties.
-* Incept: SRE, Sat Feb 24 08:47:43 2007 [Casa de Gatos]
-*
-* Purpose: Makes a random string <markoved> with the same length and
-* 0-th order Markov properties as <s>, given randomness
-* source <r>.
-*
-* <s> and <markoved> can be point to the same storage, in which
-* case <s> is randomized in place, destroying the original
-* string.
-*
-* <s> must consist only of alphabetic characters [a-zA-Z].
-* Statistics are collected case-insensitively over 26 possible
-* residues. The random string is generated all upper case.
-*
-* Args: s - input string
-* markoved - randomly generated string
-* (storage allocated by caller, at least strlen(s)+1)
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEINVAL> if <s> contains nonalphabetic characters.
-*/
-int
-esl_rsq_CMarkov0(ESL_RANDOMNESS *r, const char *s, char *markoved)
-{
- int L;
- int i;
- double p[26]; /* initially counts, then probabilities */
- int x;
-
- /* First, verify that the string is entirely alphabetic. */
- L = strlen(s);
- for (i = 0; i < L; i++)
- if (! isalpha((int) s[i]))
- ESL_EXCEPTION(eslEINVAL, "String contains nonalphabetic characters");
-
- /* Collect zeroth order counts and convert to frequencies.
- */
- for (x = 0; x < 26; x++) p[x] = 0.;
- for (i = 0; i < L; i++)
- p[(int)(toupper((int) s[i]) - 'A')] += 1.0;
- if (L > 0)
- for (x = 0; x < 26; x++) p[x] /= (double) L;
-
- /* Generate a random string using those p's. */
- for (i = 0; i < L; i++)
- markoved[i] = esl_rnd_DChoose(r, p, 26) + 'A';
- markoved[i] = '\0';
-
- return eslOK;
-}
-
-/* Function: esl_rsq_CMarkov1()
-* Synopsis: Generate new text string of same 1st order Markov properties.
-* Incept: SRE, Sat Feb 24 09:21:46 2007 [Casa de Gatos]
-*
-* Purpose: Makes a random string <markoved> with the same length and
-* 1st order (di-residue) Markov properties as <s>, given
-* randomness source <r>.
-*
-* <s> and <markoved> can be point to the same storage, in which
-* case <s> is randomized in place, destroying the original
-* string.
-*
-* <s> must consist only of alphabetic characters [a-zA-Z].
-* Statistics are collected case-insensitively over 26 possible
-* residues. The random string is generated all upper case.
-*
-* If <s> is of length 2 or less, this is a no-op, and
-* <markoved> is a copy of <s>.
-*
-* Args: s - input string
-* markoved - new randomly generated string
-* (storage allocated by caller, at least strlen(s)+1)
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEINVAL> if <s> contains nonalphabetic characters.
-*/
-int
-esl_rsq_CMarkov1(ESL_RANDOMNESS *r, const char *s, char *markoved)
-{
- int L;
- int i;
- int x,y;
- int i0; /* initial symbol */
- double p[26][26]; /* conditional probabilities p[x][y] = P(y | x) */
- double p0[26]; /* marginal probabilities P(x), just for initial residue. */
-
- /* First, verify that the string is entirely alphabetic. */
- L = strlen(s);
- for (i = 0; i < L; i++)
- if (! isalpha((int) s[i]))
- ESL_EXCEPTION(eslEINVAL, "String contains nonalphabetic characters");
-
- /* The edge case of len <= 2 */
- if (L <= 2)
- {
- if (s != markoved) strcpy(markoved, s);
- return eslOK;
- }
-
- /* Collect first order counts and convert to frequencies. */
- for (x = 0; x < 26; x++)
- for (y = 0; y < 26; y++)
- p[x][y] = 0.;
-
- i0 = x = toupper((int) s[0]) - 'A';
- for (i = 1; i < L; i++)
- {
- y = toupper((int) s[i]) - 'A';
- p[x][y] += 1.0;
- x = y;
- }
- p[x][i0] += 1.0; /* "circularized": avoids a bug; see markov1_bug utest */
-
- for (x = 0; x < 26; x++)
- {
- p0[x] = 0.;
- for (y = 0; y < 26; y++)
- p0[x] += p[x][y]; /* now p0[x] = marginal counts of x, inclusive of 1st residue */
-
- for (y = 0; y < 26; y++)
- p[x][y] = (p0[x] > 0. ? p[x][y] / p0[x] : 0.); /* now p[x][y] = P(y | x) */
-
- p0[x] /= (double) L; /* now p0[x] = marginal P(x) */
- }
-
- /* Generate a random string using those p's. */
- x = esl_rnd_DChoose(r, p0, 26);
- markoved[0] = x + 'A';
- for (i = 1; i < L; i++)
- {
- y = esl_rnd_DChoose(r, p[x], 26);
- markoved[i] = y + 'A';
- x = y;
- }
- markoved[L] = '\0';
-
- return eslOK;
-}
-/*----------------- end, randomizing sequences ------------------*/
-
-
-
-/*****************************************************************
-*# 4. Generating iid sequences (digital mode).
-*****************************************************************/
-#ifdef eslAUGMENT_ALPHABET
-
-/* Function: esl_rsq_xIID()
-* Synopsis: Generate an iid random digital sequence.
-* Incept: SRE, Sat Feb 17 16:39:01 2007 [Casa de Gatos]
-*
-* Purpose: Generate an i.i.d. digital sequence of length <L> (1..L) and
-* leave it in <dsq>. The i.i.d. probability of each residue is
-* given in the probability vector <p>, and the number of
-* possible residues (the alphabet size) is given by <K>.
-* (Only the alphabet size <K> is needed here, as opposed to
-* a digital <ESL_ALPHABET>, but the caller presumably
-* has a digital alphabet.) The caller must provide a <dsq>
-* allocated for at least <L+2> residues of type <ESL_DSQ>,
-* room for <L> residues and leading/trailing digital sentinel bytes.
-*
-* <esl_rsq_xfIID()> does the same, but for a
-* single-precision float vector <p> rather than a
-* double-precision vector <p>.
-*
-* Args: r - ESL_RANDOMNESS object
-* p - probability distribution [0..n-1]
-* K - number of symbols in alphabet
-* L - length of generated sequence
-* ret_s - RETURN: the generated sequence.
-* (Caller-allocated, >= (L+2)*ESL_DSQ)
-*
-* Return: <eslOK> on success.
-*/
-int
-esl_rsq_xIID(ESL_RANDOMNESS *r, const double *p, int K, int L, ESL_DSQ *dsq)
-{
- int x;
-
- dsq[0] = dsq[L+1] = eslDSQ_SENTINEL;
- for (x = 1; x <= L; x++)
- dsq[x] = esl_rnd_DChoose(r,p,K);
- return eslOK;
-}
-int
-esl_rsq_xfIID(ESL_RANDOMNESS *r, const float *p, int K, int L, ESL_DSQ *dsq)
-{
- int x;
-
- dsq[0] = dsq[L+1] = eslDSQ_SENTINEL;
- for (x = 1; x <= L; x++)
- dsq[x] = esl_rnd_FChoose(r,p,K);
- return eslOK;
-}
-
-#endif /*eslAUGMENT_ALPHABET*/
-/*--------------------- end, digital generation ---------------- */
-
-
-
-/*****************************************************************
-*# 5. Shuffling sequences (digital mode)
-*****************************************************************/
-#ifdef eslAUGMENT_ALPHABET
-
-/* Function: esl_rsq_XShuffle()
-* Synopsis: Shuffle a digital sequence.
-* Incept: SRE, Fri Feb 23 08:24:20 2007 [Casa de Gatos]
-*
-* Purpose: Given a digital sequence <dsq> of length <L> residues,
-* shuffle it, and leave the shuffled version in <shuffled>.
-*
-* Caller provides allocated storage for <shuffled> for at
-* least the same length as <dsq>.
-*
-* <shuffled> may also point to the same storage as <dsq>,
-* in which case <dsq> is shuffled in place.
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_rsq_XShuffle(ESL_RANDOMNESS *r, const ESL_DSQ *dsq, int L, ESL_DSQ *shuffled)
-{
- int i;
- ESL_DSQ x;
-
- if (dsq != shuffled) esl_abc_dsqcpy(dsq, L, shuffled);
- while (L > 1) {
- i = 1 + esl_rnd_Roll(r, L);
- x = shuffled[i];
- shuffled[i] = shuffled[L];
- shuffled[L] = x;
- L--;
- }
- return eslOK;
-}
-
-/* Function: esl_rsq_XShuffleDP()
-* Synopsis: Shuffle a digital sequence, preserving diresidue composition.
-* Incept: SRE, Fri Feb 23 09:23:47 2007 [Casa de Gatos]
-*
-* Purpose: Same as <esl_rsq_CShuffleDP()>, except for a digital
-* sequence <dsq> of length <L>, encoded in a digital alphabet
-* of <K> residues.
-*
-* <dsq> may only consist of residue codes <0..K-1>; if it
-* contains gaps, degeneracies, or missing data, pass the alphabet's
-* <Kp> size, not its canonical <K>.
-*
-* If <L> $\leq 2$, this is a no-op; <shuffled> is a copy of <dsq>.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEINVAL> if <s> contains digital residue codes
-* outside the range <0..K-1>.
-* <eslEMEM> on allocation failure.
-*/
-int
-esl_rsq_XShuffleDP(ESL_RANDOMNESS *r, const ESL_DSQ *dsq, int L, int K, ESL_DSQ *shuffled)
-{
- int status; /* Easel return status code */
- int i; /* a position in dsq or shuffled */
- ESL_DSQ x,y; /* indices of two characters */
- ESL_DSQ **E = NULL; /* edge lists: E[0] is the edge list from vertex A */
- int *nE = NULL; /* lengths of edge lists */
- int *iE = NULL; /* positions in edge lists */
- int n; /* tmp: remaining length of an edge list to be shuffled */
- ESL_DSQ sf; /* last character in shuffled */
- ESL_DSQ *Z; /* connectivity in last edge graph Z */
- int keep_connecting; /* flag used in Z connectivity algorithm */
- int is_eulerian; /* flag used for when we've got a good Z */
-
- /* First, verify that we can deal with all the residues in dsq. */
- for (i = 1; i <= L; i++)
- if (dsq[i] >= K)
- ESL_EXCEPTION(eslEINVAL, "dsq contains unexpected residue codes");
-
- /* The edge case of L <= 2 */
- if (L <= 2)
- {
- if (dsq != shuffled) memcpy(shuffled, dsq, sizeof(ESL_DSQ) * (L+2));
- return eslOK;
- }
-
- /* Allocations. */
- ESL_ALLOC_WITH_TYPE(nE, int*, sizeof(int) * K); for (x = 0; x < K; x++) nE[x] = 0;
- ESL_ALLOC_WITH_TYPE(E, ESL_DSQ**, sizeof(ESL_DSQ *) * K); for (x = 0; x < K; x++) E[x] = NULL;
- ESL_ALLOC_WITH_TYPE(iE, int*, sizeof(int) * K); for (x = 0; x < K; x++) iE[x] = 0;
- ESL_ALLOC_WITH_TYPE(Z, ESL_DSQ*, sizeof(ESL_DSQ) * K);
- for (x = 0; x < K; x++)
- ESL_ALLOC_WITH_TYPE(E[x], ESL_DSQ*, sizeof(ESL_DSQ) * (L-1));
-
- /* "(1) Construct the doublet graph G and edge ordering E... */
- x = dsq[1];
- for (i = 2; i <= L; i++) {
- E[x][nE[x]] = dsq[i];
- nE[x]++;
- x = dsq[i];
- }
-
- /* Now we have to find a random Eulerian edge ordering. */
- sf = dsq[L];
- is_eulerian = 0;
- while (! is_eulerian)
- {
- for (x = 0; x < K; x++) {
- if (nE[x] == 0 || x == sf) continue;
- i = esl_rnd_Roll(r, nE[x]);
- ESL_SWAP(E[x][i], E[x][nE[x]-1], ESL_DSQ);
- }
-
- for (x = 0; x < K; x++) Z[x] = 0;
- Z[(int) sf] = keep_connecting = 1;
- while (keep_connecting) {
- keep_connecting = 0;
- for (x = 0; x < K; x++) {
- if (nE[x] == 0) continue;
- y = E[x][nE[x]-1]; /* xy is an edge in Z */
- if (Z[x] == 0 && Z[y] == 1) { /* x is connected to sf in Z */
- Z[x] = 1;
- keep_connecting = 1;
- }
- }
- }
-
- is_eulerian = 1;
- for (x = 0; x < K; x++) {
- if (nE[x] == 0 || x == sf) continue;
- if (Z[x] == 0) {
- is_eulerian = 0;
- break;
- }
- }
- }
-
- /* "(5) For each vertex s in G, randomly permute... */
- for (x = 0; x < K; x++)
- for (n = nE[x] - 1; n > 1; n--)
- {
- i = esl_rnd_Roll(r, n);
- ESL_SWAP(E[x][i], E[x][n-1], ESL_DSQ);
- }
-
- /* "(6) Construct sequence S'... */
- i = 1;
- x = dsq[1];
- while (1) {
- shuffled[i++] = x;
- y = E[x][iE[x]++];
- x = y;
- if (iE[x] == nE[x]) break;
- }
- shuffled[i++] = sf;
- shuffled[i] = eslDSQ_SENTINEL;
- shuffled[0] = eslDSQ_SENTINEL;
-
- /* Reality checks. */
- if (x != sf) ESL_XEXCEPTION(eslEINCONCEIVABLE, "hey, you didn't end on s_f.");
- if (i != L+1) ESL_XEXCEPTION(eslEINCONCEIVABLE, "hey, i (%d) overran L+1 (%d).", i, L+1);
-
- esl_Free2D((void **) E, K);
- free(nE);
- free(iE);
- free(Z);
- return eslOK;
-
-ERROR:
- esl_Free2D((void **) E, K);
- if (nE != NULL) free(nE);
- if (iE != NULL) free(iE);
- if (Z != NULL) free(Z);
- return status;
-}
-
-/* Function: esl_rsq_XShuffleKmers()
-* Synopsis: Shuffle k-mers in a digital sequence.
-* Incept: SRE, Tue Nov 17 18:58:00 2009 [NHGRI retreat, Gettysburg]
-*
-* Purpose: Same as <esl_rsq_CShuffleKmers()>, but shuffle digital
-* sequence <dsq> of length <L> into digital result <shuffled>.
-*
-* Args: r - an <ESL_RANDOMNESS> random generator
-* dsq - sequence to shuffle
-* K - size of k-mers to break <s> into
-* shuffled - RESULT: the shuffled sequence
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation error.
-*/
-int
-esl_rsq_XShuffleKmers(ESL_RANDOMNESS *r, const ESL_DSQ *dsq, int L, int K, ESL_DSQ *shuffled)
-{
- int W = L / K; /* number of kmers "words" excluding leftover prefix */
- int P = L % K; /* leftover residues in prefix */
- int i;
- char *swap = NULL;
- int status;
-
- if (shuffled != dsq) esl_abc_dsqcpy(dsq, L, shuffled);
- ESL_ALLOC_WITH_TYPE(swap,char*, sizeof(char) * K);
- while (W > 1)
- {
- i = esl_rnd_Roll(r, W); /* pick a word */
- memcpy(swap, shuffled + P + i*K, K); /* copy it to tmp space */
- memcpy(shuffled + P + i*K, shuffled + P + (W-1)*K, K); /* move word W-1 to i */
- memcpy(shuffled + P + (W-1)*K, swap, K); /* move word i to W-1 */
- W--;
- }
- free(swap);
- return eslOK;
-
-ERROR:
- free(swap);
- return status;
-}
-
-/* Function: esl_rsq_XReverse()
-* Synopsis: Reverse a digital sequence.
-* Incept: SRE, Sat Feb 24 10:13:30 2007 [Casa de Gatos]
-*
-* Purpose: Given a digital sequence <dsq> of length <L>, return
-* reversed version of it in <rev>.
-*
-* Caller provides storage in <rev> for at least
-* <(L+2)*sizeof(ESL_DSQ)>.
-*
-* <s> and <rev> can point to the same storage, in which
-* case <s> is reversed in place.
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_rsq_XReverse(const ESL_DSQ *dsq, int L, ESL_DSQ *rev)
-{
- int i;
- ESL_DSQ x;
-
- for (i = 1; i <= L/2; i++)
- { /* swap ends */
- x = dsq[L-i+1];
- rev[L-i+1] = dsq[i];
- rev[i] = x;
- }
- if (L%2) { rev[i] = dsq[i]; } /* don't forget middle residue in odd-length dsq */
- rev[0] = eslDSQ_SENTINEL;
- rev[L+1] = eslDSQ_SENTINEL;
- return eslOK;
-}
-
-
-/* Function: esl_rsq_XShuffleWindows()
-* Synopsis: Shuffle local windows of a digital sequence.
-* Incept: SRE, Sat Feb 24 10:51:31 2007 [Casa de Gatos]
-*
-* Purpose: Given a digital sequence <dsq> of length <L>, shuffle
-* residues in nonoverlapping windows of width <w>, and put
-* the result in <shuffled>. See [Pearson88].
-*
-* Caller provides storage in <shuffled> for at least
-* <(L+2)*sizeof(ESL_DSQ)>.
-*
-* <dsq> and <shuffled> can be identical to shuffle in place.
-*
-* Args: dsq - digital sequence to shuffle in windows
-* L - length of <dsq>
-* w - window size (typically 10 or 20)
-* shuffled - allocated space for window-shuffled result.
-*
-* Return: <eslOK> on success.
-*/
-int
-esl_rsq_XShuffleWindows(ESL_RANDOMNESS *r, const ESL_DSQ *dsq, int L, int w, ESL_DSQ *shuffled)
-{
- ESL_DSQ x;
- int i, j, k;
-
- if (dsq != shuffled) esl_abc_dsqcpy(dsq, L, shuffled);
- for (i = 1; i <= L; i += w)
- for (j = ESL_MIN(L, i+w-1); j > i; j--)
- {
- k = i + esl_rnd_Roll(r, j-i+1);
- x = shuffled[k]; /* semantics of a j,k swap, because we might be shuffling in-place */
- shuffled[k] = shuffled[j];
- shuffled[j] = x;
- }
- return eslOK;
-}
-
-#endif /*eslAUGMENT_ALPHABET*/
-/*------------------- end, digital shuffling -------------------*/
-
-
-
-/*****************************************************************
-*# 6. Randomizing sequences (digital mode)
-*****************************************************************/
-#ifdef eslAUGMENT_ALPHABET
-
-/* Function: esl_rsq_XMarkov0()
-* Synopsis: Generate new digital sequence of same 0th order Markov properties.
-* Incept: SRE, Sat Feb 24 09:12:32 2007 [Casa de Gatos]
-*
-* Purpose: Same as <esl_rsq_CMarkov0()>, except for a digital
-* sequence <dsq> of length <L>, encoded in a digital
-* alphabet of <K> residues; caller provides storage
-* for the randomized sequence <markoved> for at least
-* <L+2> <ESL_DSQ> residues, including the two flanking
-* sentinel bytes.
-*
-* <dsq> therefore may only consist of residue codes
-* in the range <0..K-1>. If it contains gaps,
-* degeneracies, or missing data, pass the alphabet's
-* <Kp> size, not its canonical <K>.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEINVAL> if <s> contains digital residue codes outside
-* the range <0..K-1>.
-* <eslEMEM> on allocation failure.
-*/
-int
-esl_rsq_XMarkov0(ESL_RANDOMNESS *r, const ESL_DSQ *dsq, int L, int K, ESL_DSQ *markoved)
-{
- int status;
- int i;
- double *p = NULL; /* initially counts, then probabilities */
- int x;
-
- /* First, verify that the string is entirely alphabetic. */
- for (i = 1; i <= L; i++)
- if (dsq[i] >= K)
- ESL_XEXCEPTION(eslEINVAL, "String contains unexpected residue codes");
-
- ESL_ALLOC_WITH_TYPE(p, double*, sizeof(double) * K);
- for (x = 0; x < K; x++) p[x] = 0.;
-
- for (i = 1; i <= L; i++)
- p[(int) dsq[i]] += 1.0;
- if (L > 0)
- for (x = 0; x < K; x++) p[x] /= (double) L;
-
- for (i = 1; i <= L; i++)
- markoved[i] = esl_rnd_DChoose(r, p, K);
- markoved[0] = eslDSQ_SENTINEL;
- markoved[L+1] = eslDSQ_SENTINEL;
-
- free(p);
- return eslOK;
-
-ERROR:
- if (p != NULL) free(p);
- return status;
-}
-
-
-
-/* Function: esl_rsq_XMarkov1()
-* Synopsis: Generate new digital sequence of same 1st order Markov properties.
-* Incept: SRE, Sat Feb 24 09:46:09 2007 [Casa de Gatos]
-*
-* Purpose: Same as <esl_rsq_CMarkov1()>, except for a digital
-* sequence <dsq> of length <L>, encoded in a digital
-* alphabet of <K> residues. Caller provides storage
-* for the randomized sequence <markoved> for at least
-* <L+2> <ESL_DSQ> residues, including the two flanking
-* sentinel bytes.
-*
-* <dsq> and <markoved> can be point to the same storage, in which
-* case <dsq> is randomized in place, destroying the original
-* string.
-*
-* <dsq> therefore may only consist of residue codes
-* in the range <0..K-1>. If it contains gaps,
-* degeneracies, or missing data, pass the alphabet's
-* <Kp> size, not its canonical <K>.
-*
-* If <L> $\leq 2$, this is a no-op; <markoved> is a copy of <dsq>.
-*
-* Args: dsq - input digital sequence 1..L
-* L - length of dsq
-* K - residue codes in dsq are in range 0..K-1
-* markoved - new randomly generated digital sequence;
-* storage allocated by caller, at least (L+2)*ESL_DSQ;
-* may be same as dsq to randomize in place.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEINVAL> if <s> contains digital residue codes outside
-* the range <0..K-1>.
-* <eslEMEM> on allocation failure.
-*/
-int
-esl_rsq_XMarkov1(ESL_RANDOMNESS *r, const ESL_DSQ *dsq, int L, int K, ESL_DSQ *markoved)
-{
- int status;
- int i;
- ESL_DSQ x,y;
- ESL_DSQ i0; /* initial symbol */
- double **p; /* conditional probabilities p[x][y] = P(y | x) */
- double *p0; /* marginal probabilities P(x), just for initial residue. */
-
- /* validate the input string */
- for (i = 1; i <= L; i++)
- if (dsq[i] >= K)
- ESL_XEXCEPTION(eslEINVAL, "String contains unexpected residue codes");
-
- /* The edge case of L <= 2 */
- if (L <= 2)
- {
- if (dsq != markoved) memcpy(markoved, dsq, sizeof(ESL_DSQ) * (L+2));
- return eslOK;
- }
-
- /* allocations */
- ESL_ALLOC_WITH_TYPE(p0, double*, sizeof(double) * K); for (x = 0; x < K; x++) p0[x] = 0.;
- ESL_ALLOC_WITH_TYPE(p, double**, sizeof(double *) * K); for (x = 0; x < K; x++) p[x] = NULL;
- for (x = 0; x < K; x++)
- { ESL_ALLOC_WITH_TYPE(p[x], double*, sizeof(double) * K); for (y = 0; y < K; y++) p[x][y] = 0.; }
-
- /* Collect first order counts and convert to frequencies. */
- i0 = x = dsq[1];
- for (i = 2; i <= L; i++)
- {
- y = dsq[i];
- p[x][y] += 1.0;
- x = y;
- }
- p[x][i0] += 1.0; /* "circularized": avoids a bug; see markov1_bug utest */
-
- for (x = 0; x < K; x++)
- {
- p0[x] = 0.;
- for (y = 0; y < K; y++)
- p0[x] += p[x][y]; /* now p0[x] = marginal counts of x, inclusive of 1st residue */
-
- for (y = 0; y < K; y++)
- p[x][y] = (p0[x] > 0. ? p[x][y] / p0[x] : 0.); /* now p[x][y] = P(y | x) */
-
- p0[x] /= (double) L; /* now p0[x] = marginal P(x) inclusive of 1st residue */
- }
-
- /* Generate a random string using those p's. */
- markoved[1] = esl_rnd_DChoose(r, p0, K);
- for (i = 2; i <= L; i++)
- markoved[i] = esl_rnd_DChoose(r, p[markoved[i-1]], K);
-
- markoved[0] = eslDSQ_SENTINEL;
- markoved[L+1] = eslDSQ_SENTINEL;
-
- esl_Free2D((void**)p, K);
- free(p0);
- return eslOK;
-
-ERROR:
- esl_Free2D((void**)p, K);
- if (p0 != NULL) free(p0);
- return status;
-}
-
-#endif /*eslAUGMENT_ALPHABET*/
-/*------------------ end, digital randomizing -------------------*/
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_randomseq.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_randomseq.h
deleted file mode 100644
index 8befedc..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_randomseq.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/* Generating, shuffling, and randomizing sequences.
-*
-* SRE, Thu Apr 24 09:38:13 2008 [Janelia]
-* SVN $Id$
-*/
-#ifndef ESL_RANDOMSEQ_INCLUDED
-#define ESL_RANDOMSEQ_INCLUDED
-
-#include "esl_random.h"
-
-/* 1. Generating iid sequences. */
-extern int esl_rsq_IID (ESL_RANDOMNESS *r, const char *alphabet, const double *p, int K, int L, char *s);
-extern int esl_rsq_fIID (ESL_RANDOMNESS *r, const char *alphabet, const float *p, int K, int L, char *s);
-
-/* 2. Shuffling sequences. */
-extern int esl_rsq_CShuffle (ESL_RANDOMNESS *r, const char *s, char *shuffled);
-extern int esl_rsq_CShuffleDP (ESL_RANDOMNESS *r, const char *s, char *shuffled);
-extern int esl_rsq_CShuffleKmers (ESL_RANDOMNESS *r, const char *s, int K, char *shuffled);
-extern int esl_rsq_CReverse (const char *s, char *rev);
-extern int esl_rsq_CShuffleWindows(ESL_RANDOMNESS *r, const char *s, int w, char *shuffled);
-
-/* 3. Randomizing sequences */
-extern int esl_rsq_CMarkov0 (ESL_RANDOMNESS *r, const char *s, char *markoved);
-extern int esl_rsq_CMarkov1 (ESL_RANDOMNESS *r, const char *s, char *markoved);
-
-/* 4. Generating iid sequences (digital mode). */
-extern int esl_rsq_xIID (ESL_RANDOMNESS *r, const double *p, int K, int L, ESL_DSQ *dsq);
-extern int esl_rsq_xfIID(ESL_RANDOMNESS *r, const float *p, int K, int L, ESL_DSQ *dsq);
-
-/* 5. Shuffling sequences (digital mode). */
-extern int esl_rsq_XShuffle (ESL_RANDOMNESS *r, const ESL_DSQ *dsq, int L, ESL_DSQ *shuffled);
-extern int esl_rsq_XShuffleDP (ESL_RANDOMNESS *r, const ESL_DSQ *dsq, int L, int K, ESL_DSQ *shuffled);
-extern int esl_rsq_XShuffleKmers (ESL_RANDOMNESS *r, const ESL_DSQ *dsq, int L, int K, ESL_DSQ *shuffled);
-extern int esl_rsq_XReverse(const ESL_DSQ *dsq, int L, ESL_DSQ *rev);
-extern int esl_rsq_XShuffleWindows(ESL_RANDOMNESS *r, const ESL_DSQ *dsq, int L, int w, ESL_DSQ *shuffled);
-
-/* 6. Randomizing sequences (digital mode) */
-extern int esl_rsq_XMarkov0 (ESL_RANDOMNESS *r, const ESL_DSQ *dsq, int L, int K, ESL_DSQ *markoved);
-extern int esl_rsq_XMarkov1 (ESL_RANDOMNESS *r, const ESL_DSQ *dsq, int L, int K, ESL_DSQ *markoved);
-
-#endif /*ESL_RANDOMSEQ_INCLUDED*/
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_ratematrix.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_ratematrix.cpp
deleted file mode 100644
index e83d613..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_ratematrix.cpp
+++ /dev/null
@@ -1,552 +0,0 @@
-/* Routines for manipulating evolutionary rate matrices.
-*
-* There is no specific object for this module. Rate matrix
-* operations use square nxn ESL_DMATRIX data objects. (The rmx
-* module essentially subclasses the dmx module.)
-*
-* An instantaneous rate matrix is usually denoted by Q. A
-* conditional probability matrix (for a specific t) is usually
-* denoted by P. An exchangeability matrix is denoted by E.
-* A stationary residue probability vector is denoted by pi.
-*
-* Two important relations among these:
-*
-* Q in terms of E and pi:
-* $Q_{ij} = E_{ij} \pi_j$ for $i \neq j$;
-* $Q_{ii} = -\sum_{j \neq i} Q_{ij}$
-*
-* P in terms of Q and t:
-* $P = e^{tQ}$
-*
-* Contents:
-* 1. Setting standard rate matrix models.
-* 2. Debugging routines for validating or dumping rate matrices.
-* 3. Other routines in the exposed ratematrix API.
-* 9. Copyright/license information.
-*
-*
-*
-* See also:
-* paml - i/o of rate matrices from/to data files in PAML format
-*
-* original: SRE, Tue Jul 13 15:51:23 2004 [St. Louis]
-* upgrade to Easel: SRE, Thu Mar 8 17:59:35 2007 [Janelia]
-* SVN $Id: esl_ratematrix.c 326 2009-02-28 15:49:07Z eddys $
-*/
-
-#include <hmmer3/easel/esl_config.h>
-
-#include <math.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/hmmer.h>
-
-#include <hmmer3/easel/esl_dmatrix.h>
-#include <hmmer3/easel/esl_vectorops.h>
-#include <hmmer3/easel/esl_ratematrix.h>
-
-
-/*****************************************************************
-* 1. Setting standard rate matrix models.
-*****************************************************************/
-
-/* Function: esl_rmx_SetWAG()
-* Incept: SRE, Thu Mar 8 18:00:00 2007 [Janelia]
-*
-* Purpose: Sets a $20 \times 20$ rate matrix <Q> to WAG parameters.
-* The caller allocated <Q>.
-*
-* If <pi> is non-<NULL>, it provides a vector of 20 amino
-* acid stationary probabilities in Easel alphabetic order,
-* A..Y, and the WAG stationary probabilities are set to
-* these desired $\pi_i$. If <pi> is <NULL>, the default
-* WAG stationary probabilities are used.
-*
-* The WAG parameters are a maximum likelihood
-* parameterization obtained by Whelan and Goldman
-* \citep{WhelanGoldman01}.
-*
-* Note: The data table was reformatted from wag.dat by the UTILITY1
-* executable in the paml module. The wag.dat file was obtained from
-* \url{http://www.ebi.ac.uk/goldman/WAG/wag.dat}. A copy
-* is in formats/wag.dat.
-*
-* Args: Q - a 20x20 rate matrix to set, allocated by caller.
-* pi - desired stationary probabilities A..Y, or
-* NULL to use WAG defaults.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEINVAL> if <Q> isn't a 20x20 general matrix; and
-* the state of <Q> is undefined.
-*/
-int
-esl_rmx_SetWAG(ESL_DMATRIX *Q, const double *pi)
-{
- static const double wagE[190] = {
- 1.027040, 0.738998, 0.030295, 1.582850, 0.021352, 6.174160, 0.210494, 0.398020, 0.046730, 0.081134,
- 1.416720, 0.306674, 0.865584, 0.567717, 0.049931, 0.316954, 0.248972, 0.930676, 0.570025, 0.679371,
- 0.249410, 0.193335, 0.170135, 0.039437, 0.127395, 1.059470, 0.030450, 0.138190, 0.906265, 0.074034,
- 0.479855, 2.584430, 0.088836, 0.373558, 0.890432, 0.323832, 0.397915, 0.384287, 0.084805, 0.154263,
- 2.115170, 0.061304, 0.499462, 3.170970, 0.257555, 0.893496, 0.390482, 0.103754, 0.315124, 1.190630,
- 0.174100, 0.404141, 4.257460, 0.934276, 4.854020, 0.509848, 0.265256, 5.429420, 0.947198, 0.096162,
- 1.125560, 3.956290, 0.554236, 3.012010, 0.131528, 0.198221, 1.438550, 0.109404, 0.423984, 0.682355,
- 0.161444, 0.243570, 0.696198, 0.099929, 0.556896, 0.415844, 0.171329, 0.195081, 0.908598, 0.098818,
- 0.616783, 5.469470, 0.099921, 0.330052, 4.294110, 0.113917, 3.894900, 0.869489, 1.545260, 1.543640,
- 0.933372, 0.551571, 0.528191, 0.147304, 0.439157, 0.102711, 0.584665, 2.137150, 0.186979, 5.351420,
- 0.497671, 0.683162, 0.635346, 0.679489, 3.035500, 3.370790, 1.407660, 1.071760, 0.704939, 0.545931,
- 1.341820, 0.740169, 0.319440, 0.967130, 0.344739, 0.493905, 3.974230, 1.613280, 1.028870, 1.224190,
- 2.121110, 0.512984, 0.374866, 0.822765, 0.171903, 0.225833, 0.473307, 1.458160, 1.386980, 0.326622,
- 1.516120, 2.030060, 0.795384, 0.857928, 0.554413, 4.378020, 2.006010, 1.002140, 0.152335, 0.588731,
- 0.649892, 0.187247, 0.118358, 7.821300, 0.305434, 1.800340, 2.058450, 0.196246, 0.314887, 0.301281,
- 0.251849, 0.232739, 1.388230, 0.113133, 0.717070, 0.129767, 0.156557, 1.529640, 0.336983, 0.262569,
- 0.212483, 0.137505, 0.665309, 0.515706, 0.071917, 0.139405, 0.215737, 1.163920, 0.523742, 0.110864,
- 0.365369, 0.240735, 0.543833, 0.325711, 0.196303, 6.454280, 0.103604, 3.873440, 0.420170, 0.133264,
- 0.398618, 0.428437, 1.086000, 0.216046, 0.227710, 0.381533, 0.786993, 0.291148, 0.314730, 2.485390};
- static const double wagpi[20] = {
- 0.086628, 0.019308, 0.057045, 0.058059,
- 0.038432, 0.083252, 0.024431, 0.048466,
- 0.062029, 0.086209, 0.019503, 0.039089,
- 0.045763, 0.036728, 0.043972, 0.069518,
- 0.061013, 0.070896, 0.014386, 0.035274
- }; // taken from esl_composition_WAG from easel.c
- int i,j,z;
-
- if (Q->m != 20 || Q->n != 20 || Q->type != ESL_DMATRIX::eslGENERAL)
- ESL_EXCEPTION(eslEINVAL, "Q must be a 20x20 general matrix");
- // we already filled wagpi in definition
- //esl_composition_WAG(wagpi);
-
- /* 1. Transfer the wag E lower triagonal matrix directly into Q. */
- z = 0;
- for (i = 0; i < 20; i++)
- {
- Q->mx[i][i] = 0.; /* code below depends on this zero initialization */
- for (j = 0; j < i; j++) {
- Q->mx[i][j] = wagE[z++];
- Q->mx[j][i] = Q->mx[i][j];
- }
- }
-
- /* 2. Set offdiagonals Q_ij = E_ij * pi_j */
- for (i = 0; i < 20; i++)
- for (j = 0; j < 20; j++)
- if (pi != NULL) Q->mx[i][j] *= pi[j];
- else Q->mx[i][j] *= wagpi[j];
-
- /* 3. Set diagonal Q_ii to -\sum_{i \neq j} Q_ij */
- for (i = 0; i < 20; i++)
- Q->mx[i][i] = -1. * esl_vec_DSum(Q->mx[i], 20);
-
- /* 4. Renormalize matrix to units of 1 substitution/site. */
- if (pi != NULL) esl_rmx_ScaleTo(Q, pi, 1.0);
- else esl_rmx_ScaleTo(Q, wagpi, 1.0);
-
- return eslOK;
-}
-
-
-/* Function: esl_rmx_SetJukesCantor()
-* Incept: SRE, Thu Mar 15 13:04:56 2007 [Janelia]
-*
-* Purpose: Sets a 4x4 rate matrix to a Jukes-Cantor model,
-* scaled to units of 1t = 1.0 substitutions/site.
-*
-* Note: eigenvalues of Q are 0, -4\alpha, -4\alpha, -4\alpha
-*/
-int
-esl_rmx_SetJukesCantor(ESL_DMATRIX *Q)
-{
- int i,j;
- double pi[4] = { 0.25, 0.25, 0.25, 0.25 };
-
- if (Q->m != 4 || Q->n != 4 || Q->type != ESL_DMATRIX::eslGENERAL)
- ESL_EXCEPTION(eslEINVAL, "Q must be a 4x4 general matrix");
-
- for (i = 0; i < 4; i++) {
- for (j = 0; j < 4; j++)
- {
- if (i != j) Q->mx[i][j] = 1.0;
- else Q->mx[i][j] = 0.0;
- }
- Q->mx[i][i] = -1. * esl_vec_DSum(Q->mx[i], 4);
- }
- esl_rmx_ScaleTo(Q, pi, 1.0);
- return eslOK;
-}
-
-
-/* Function: esl_rmx_SetKimura()
-* Incept: SRE, Thu Mar 15 13:08:08 2007 [Janelia]
-*
-* Purpose: Sets a 4x4 rate matrix to a Kimura 2-parameter
-* model, given transition and transversion
-* relative rates <alpha> and <beta>, respectively,
-* scaled to units of 1t = 1.0 substitutions/site.
-*
-* Note: eigenvalues of Q are 0, -4\alpha, -2(\alpha+\beta), -2(\alpha+\beta)
-*/
-int
-esl_rmx_SetKimura(ESL_DMATRIX *Q, double alpha, double beta)
-{
- int i,j;
- double pi[4] = { 0.25, 0.25, 0.25, 0.25 };
-
- if (Q->m != 4 || Q->n != 4 || Q->type != ESL_DMATRIX::eslGENERAL)
- ESL_EXCEPTION(eslEINVAL, "Q must be a 4x4 general matrix");
-
- for (i = 0; i < 4; i++) {
- for (j = 0; j < 4; j++)
- {
- if (i != j) Q->mx[i][j] = ((i+j)%2)? beta : alpha; /* even=0=transition;odd=1=transversion */
- else Q->mx[i][j] = 0.0;
- }
- Q->mx[i][i] = -1. * esl_vec_DSum(Q->mx[i], 4);
- }
- esl_rmx_ScaleTo(Q, pi, 1.0);
- return eslOK;
-}
-
-
-
-/* Function: esl_rmx_SetF81()
-* Incept: SRE, Thu Mar 15 13:33:30 2007 [Janelia]
-*
-* Purpose: Sets a 4x4 rate matrix to the F81 model (aka
-* equal-input model) given stationary base
-* compositions <pi>,
-* scaled to units of 1t = 1.0 substitutions/site.
-*/
-int
-esl_rmx_SetF81(ESL_DMATRIX *Q, double *pi)
-{
- int i,j;
-
- if (Q->m != 4 || Q->n != 4 || Q->type != ESL_DMATRIX::eslGENERAL)
- ESL_EXCEPTION(eslEINVAL, "Q must be a 4x4 general matrix");
-
- for (i = 0; i < 4; i++) {
- for (j = 0; j < 4; j++)
- {
- if (i != j) Q->mx[i][j] = pi[j];
- else Q->mx[i][j] = 0.0;
- }
- Q->mx[i][i] = -1. * esl_vec_DSum(Q->mx[i], 4);
- }
- esl_rmx_ScaleTo(Q, pi, 1.0);
- return eslOK;
-}
-
-
-/* Function: esl_rmx_SetHKY()
-* Incept: SRE, Thu Aug 12 08:26:39 2004 [St. Louis]
-*
-* Purpose: Given stationary base composition <pi> for ACGT, and
-* transition and transversion relative rates <alpha> and
-* <beta> respectively, sets the matrix <Q> to be the
-* corresponding HKY (Hasegawa/Kishino/Yano) DNA rate
-* matrix, scaled in units of 1t= 1.0 substitutions/site
-* \citep{Hasegawa85}.
-*
-* Args: pi - stationary base composition A..T
-* alpha - relative transition rate
-* beta - relative transversion rate
-*
-*
-* Returns: <eslOK>
-*
-* Xref:
-*/
-int
-esl_rmx_SetHKY( ESL_DMATRIX *Q, double *pi, double alpha, double beta)
-{
- int i,j;
-
- if (Q->m != 4 || Q->n != 4 || Q->type != ESL_DMATRIX::eslGENERAL)
- ESL_EXCEPTION(eslEINVAL, "Q must be a 4x4 general matrix");
-
- for (i = 0; i < 4; i++) {
- for (j = 0; j < 4; j++)
- {
- if (i != j) Q->mx[i][j] = ((i+j)%2)? pi[j]*beta : pi[j]*alpha; /* even=0=transition;odd=1=transversion */
- else Q->mx[i][j] = 0.;
- }
- Q->mx[i][i] = -1. * esl_vec_DSum(Q->mx[i], 4);
- }
- esl_rmx_ScaleTo(Q, pi, 1.0);
- return eslOK;
-}
-
-/*****************************************************************
-* 2. Debugging routines for validating or dumping rate matrices.
-*****************************************************************/
-
-/* Function: esl_rmx_ValidateP()
-* Incept: SRE, Sun Mar 11 10:30:50 2007 [Janelia]
-*
-* Purpose: Validates a conditional probability matrix <P>, whose
-* elements $P_{ij}$ represent conditional probabilities
-* $P(j \mid i)$; for example in a first-order Markov
-* chain, or a continuous-time Markov transition process
-* where <P> is for a particular $t$.
-*
-* Rows must sum to one, and each element $P_{ij}$ is a
-* probability $0 \leq P_{ij} \leq 1$.
-*
-* <tol> specifies the floating-point tolerance to which
-* the row sums must equal one: <fabs(sum-1.0) <= tol>.
-*
-* <errbuf> is an optional error message buffer. The caller
-* may pass <NULL> or a pointer to a buffer of at least
-* <eslERRBUFSIZE> characters.
-*
-* Args: P - matrix to validate
-* tol - floating-point tolerance (0.00001, for example)
-* errbuf - OPTIONAL: ptr to an error buffer of at least
-* <eslERRBUFSIZE> characters.
-*
-* Returns: <eslOK> on successful validation.
-* <eslFAIL> on failure, and if a non-<NULL> <errbuf> was
-* provided by the caller, a message describing
-* the reason for the failure is put there.
-*
-* Throws: (no abnormal error conditions)
-*/
-int
-esl_rmx_ValidateP(ESL_DMATRIX *P, double tol, char *errbuf)
-{
- int i,j;
- double sum;
-
- if (P->type != ESL_DMATRIX::eslGENERAL) ESL_EXCEPTION(eslEINVAL, "P must be type eslGENERAL to be validated");
-
- for (i = 0; i < P->n; i++)
- {
- sum = esl_vec_DSum(P->mx[i], P->m);
- if (fabs(sum-1.0) > tol) ESL_FAIL(eslFAIL, errbuf, "row %d does not sum to 1.0", i);
-
- for (j = 0; j < P->m; j++)
- if (P->mx[i][j] < 0.0 || P->mx[i][j] > 1.0)
- ESL_FAIL(eslFAIL, errbuf, "element %d,%d is not a probability (%f)", i,j,P->mx[i][j]);
- }
- return eslOK;
-}
-
-/* Function: esl_rmx_ValidateQ()
-* Incept: SRE, Sun Mar 11 10:30:50 2007 [Janelia]
-*
-* Purpose: Validates an instantaneous rate matrix <Q> for a
-* continuous-time Markov process, whose elements $q_{ij}$
-* represent instantaneous transition rates $i \rightarrow
-* j$.
-*
-* Rows satisfy the condition that
-* $q_{ii} = -\sum_{i \neq j} q_{ij}$, and also
-* that $q_{ij} \geq 0$ for all $j \neq i$.
-*
-* <tol> specifies the floating-point tolerance to which
-* that condition must hold: <fabs(sum-q_ii) <= tol>.
-*
-* <errbuf> is an optional error message buffer. The caller
-* may pass <NULL> or a pointer to a buffer of at least
-* <eslERRBUFSIZE> characters.
-*
-* Args: Q - rate matrix to validate
-* tol - floating-point tolerance (0.00001, for example)
-* errbuf - OPTIONAL: ptr to an error buffer of at least
-* <eslERRBUFSIZE> characters.
-*
-* Returns: <eslOK> on successful validation.
-* <eslFAIL> on failure, and if a non-<NULL> <errbuf> was
-* provided by the caller, a message describing
-* the reason for the failure is put there.
-*
-* Throws: (no abnormal error conditions)
-*/
-int
-esl_rmx_ValidateQ(ESL_DMATRIX *Q, double tol, char *errbuf)
-{
- int i,j;
- double qi;
-
- if (Q->type != ESL_DMATRIX::eslGENERAL) ESL_EXCEPTION(eslEINVAL, "Q must be type eslGENERAL to be validated");
- if (Q->n != Q->m) ESL_EXCEPTION(eslEINVAL, "a rate matrix Q must be square");
-
- for (i = 0; i < Q->n; i++)
- {
- qi = 0.;
- for (j = 0; j < Q->m; j++)
- {
- if (i != j) {
- if (Q->mx[i][j] < 0.) ESL_FAIL(eslFAIL, errbuf, "offdiag elem %d,%d < 0",i,j);
- qi += Q->mx[i][j];
- } else {
- if (Q->mx[i][j] > 0.) ESL_FAIL(eslFAIL, errbuf, "diag elem %d,%d < 0", i,j);
- }
- }
- if (fabs(qi + Q->mx[i][i]) > tol) ESL_FAIL(eslFAIL, errbuf, "row %d does not sum to 0.0", i);
- }
- return eslOK;
-}
-
-
-
-/*****************************************************************
-* 3. Other routines in the exposed ratematrix API.
-*****************************************************************/
-
-/* Function: esl_rmx_ScaleTo()
-* Incept: SRE, Tue Jul 13 16:05:16 2004 [St. Louis]
-*
-* Purpose: Rescales rate matrix <Q> so that expected substitution
-* rate per dt is <unit>.
-*
-* Expected substitution rate is:
-* $\sum_i \sum_j pi_i Q_ij \forall i \neq j$
-*
-* <unit> typically taken to be 1.0, so time units are substitutions/site.
-* An exception is PAM, where <unit> = 0.01 for 1 PAM unit.
-*
-* Args: Q - rate matrix to normalize
-* pi - stationary residue frequencies
-* unit - expected subsitution rate per dt
-* (1.0 = substitutions/site; 0.01 = PAMs)
-*
-* Returns: <eslOK> on success, and matrix Q is rescaled.
-*
-* Xref: STL8/p56.
-*/
-int
-esl_rmx_ScaleTo(ESL_DMATRIX *Q, const double *pi, double unit)
-{
- int i,j;
- double sum = 0.;
-
- if (Q->m != Q->n || Q->type != ESL_DMATRIX::eslGENERAL)
- ESL_EXCEPTION(eslEINVAL, "Q must be a square general matrix");
-
- for (i = 0; i < Q->m; i++)
- for (j = 0; j < Q->n; j++)
- if (i != j) sum += pi[i] * Q->mx[i][j];
-
- for (i = 0; i < Q->m; i++)
- for (j = 0; j < Q->n; j++)
- Q->mx[i][j] *= (unit / sum);
-
- return eslOK;
-}
-
-
-
-/* Function: esl_rmx_E2Q()
-* Incept: SRE, Tue Jul 13 15:52:41 2004 [St. Louis]
-*
-* Purpose: Given a lower triangular matrix ($j<i$) of
-* residue exchangeabilities <E>, and a stationary residue
-* frequency vector <pi>; assuming $E_{ij} = E_{ji}$;
-* calculates a rate matrix <Q> as
-*
-* $Q_{ij} = E_{ij} * \pi_j$
-*
-* The resulting <Q> is not normalized to any particular
-* number of substitutions/site/time unit. See
-* <esl_rmx_ScaleTo()> for that.
-*
-* Args: E - symmetric residue "exchangeabilities";
-* only lower triangular entries are used.
-* pi - residue frequencies at stationarity.
-* Q - RETURN: rate matrix, square (NxN).
-* Caller allocates the memory for this.
-*
-* Returns: <eslOK> on success; Q is calculated and filled in.
-*
-* Xref: STL8/p56.
-*/
-int
-esl_rmx_E2Q(ESL_DMATRIX *E, double *pi, ESL_DMATRIX *Q)
-{
- int i,j;
-
- if (E->n != Q->n) ESL_EXCEPTION(eslEINVAL, "E and Q sizes differ");
-
- /* Scale all off-diagonals to pi[j] * E[i][j].
- */
- for (i = 0; i < E->n; i++)
- for (j = 0; j < i; j++) /* only look at lower triangle of E. */
- {
- Q->mx[i][j] = pi[j] * E->mx[i][j];
- Q->mx[j][i] = pi[i] * E->mx[i][j];
- }
-
- /* Set diagonal to -\sum of all j != i.
- */
- for (i = 0; i < Q->n; i++)
- {
- Q->mx[i][i] = 0.; /* makes the vector sum work for j != i */
- Q->mx[i][i] = -1. * esl_vec_DSum(Q->mx[i], Q->n);
- }
- return eslOK;
-}
-
-
-/* Function: esl_rmx_RelativeEntropy()
-* Incept: SRE, Fri Mar 23 09:18:26 2007 [Janelia]
-*
-* Purpose: Given a conditional substitution probability matrix <P>,
-* with stationary probabilities <pi>, calculate its
-* relative entropy $H$:
-*
-* $H_t = \sum_{ij} P(j \mid i,t) \pi_i \log_2 \frac{P(j \mid i,t)} {\pi_j}$
-*
-* This assumes that the stationary probabilities are the
-* same as the background (null model) probabilities.
-*
-* Returns: the relative entropy, $H$, in bits
-*/
-double
-esl_rmx_RelativeEntropy(ESL_DMATRIX *P, double *pi)
-{
- double H = 0.;
- int i,j;
-
- for (i = 0; i < P->m; i++)
- for (j = 0; j < P->n; j++)
- H += P->mx[i][j] * pi[i] * log(P->mx[i][j] / pi[j]);
- return H / eslCONST_LOG2;
-}
-
-/* Function: esl_rmx_ExpectedScore()
-* Incept: SRE, Fri Mar 23 09:32:05 2007 [Janelia]
-*
-* Purpose: Given a conditional substitution probability matrix <P>
-* with stationary probabilities <pi>, calculate its
-* expected score:
-*
-* $ = \sum_{ij} \pi_j \pi_i \log_2 \frac{P(j \mid i,t)} {\pi_j}$
-*
-* This assumes that the stationary probabilities are the
-* same as the background (null model) probabilities.
-*
-* Returns: the expected score, in bits
-*/
-double
-esl_rmx_ExpectedScore(ESL_DMATRIX *P, double *pi)
-{
- double S = 0.;
- int i,j;
-
- for (i = 0; i < P->m; i++)
- for (j = 0; j < P->n; j++)
- S += pi[j] * pi[i] * log(P->mx[i][j] / pi[j]);
- return S / eslCONST_LOG2;
-}
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_ratematrix.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_ratematrix.h
deleted file mode 100644
index d3ff39d..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_ratematrix.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Routines for manipulating evolutionary rate matrices.
- *
- * SRE, Tue Jul 13 16:09:05 2004 [St. Louis]
- * SVN $Id: esl_ratematrix.h 162 2007-04-10 23:50:12Z eddys $
- */
-#ifndef ESL_RATEMATRIX_INCLUDED
-#define ESL_RATEMATRIX_INCLUDED
-
-/* 1. Setting standard rate matrix models. */
-extern int esl_rmx_SetWAG(ESL_DMATRIX *Q, const double *pi);
-extern int esl_rmx_SetJukesCantor(ESL_DMATRIX *Q);
-extern int esl_rmx_SetKimura(ESL_DMATRIX *Q, double alpha, double beta);
-extern int esl_rmx_SetF81(ESL_DMATRIX *Q, double *pi);
-extern int esl_rmx_SetHKY(ESL_DMATRIX *Q, double *pi, double alpha, double beta);
-
-/* 2. Debugging routines for validating or dumping rate matrices. */
-extern int esl_rmx_ValidateP(ESL_DMATRIX *P, double tol, char *errbuf);
-extern int esl_rmx_ValidateQ(ESL_DMATRIX *Q, double tol, char *errbuf);
-
-/* 3. Other routines in the exposed ratematrix API. */
-extern int esl_rmx_ScaleTo(ESL_DMATRIX *Q, const double *pi, double unit);
-extern int esl_rmx_E2Q(ESL_DMATRIX *E, double *pi, ESL_DMATRIX *Q);
-extern double esl_rmx_RelativeEntropy(ESL_DMATRIX *P, double *pi);
-extern double esl_rmx_ExpectedScore (ESL_DMATRIX *P, double *pi);
-
-
-#endif /*ESL_RATEMATRIX_INCLUDED*/
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_rootfinder.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_rootfinder.cpp
deleted file mode 100644
index 3053763..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_rootfinder.cpp
+++ /dev/null
@@ -1,348 +0,0 @@
-/* Finding roots.
-*
-* Contents:
-* 1. The ESL_ROOTFINDER object.
-* 2. One-dimensional root finding.
-* 6. Copyright and license information.
-*
-* SRE, Fri Apr 6 09:14:13 2007 [Janelia]
-* SVN $Id: esl_rootfinder.c 241 2008-04-01 19:01:52Z eddys $
-*/
-#include <hmmer3/easel/esl_config.h>
-
-#include <math.h>
-
-#include <hmmer3/easel/easel.h>
-#include "esl_rootfinder.h"
-
-/*****************************************************************
-* 1. The ESL_ROOTFINDER object.
-*****************************************************************/
-
-/* Function: esl_rootfinder_Create()
-* Synopsis: Creates ESL_ROOTFINDER for an $f(x)$
-* Incept: SRE, Tue Apr 10 19:54:09 2007 [Janelia]
-*
-* Purpose: Create a rootfinder to find a root of a function $f(x) = 0$.
-* <(*func)()> is a pointer to an implementation of the
-* function $f(x)$. <params> is a generic pointer to any
-* parameters or storage needed in <(*func)()> other than
-* the value of $x$.
-*
-* Caller implements a <func()> that takes three arguments.
-* The first two are the value <x>, and a void pointer to
-* any additional parameters that $f(x)$ depends on. The
-* result, $f(x)$, is returned via the third argument. This
-* function must return <eslOK> to indicate success. Upon
-* error, it may throw any error code it wishes.
-*
-*
-* Args: (*func)() - ptr to function that evaluates f(x)
-* params - ptr to parameters to be passed to (*func)()
-*
-* Returns: pointer to a new <ESL_ROOTFINDER> structure.
-*
-* Throws: <NULL> on allocation failure.
-*/
-ESL_ROOTFINDER *
-esl_rootfinder_Create(int (*func)(double, void*, double*), void *params)
-{
- int status;
- ESL_ROOTFINDER *R = NULL;
-
- ESL_ALLOC_WITH_TYPE(R, ESL_ROOTFINDER*, sizeof(ESL_ROOTFINDER));
- R->func = func;
- R->fdf = NULL; /* unused */
- R->params = params;
- R->xl = -eslINFINITY; /* not set yet */
- R->fl = 0.; /* not set yet */
- R->xr = eslINFINITY; /* not set yet */
- R->fr = 0.; /* not set yet */
- R->x0 = 0.; /* not set yet */
- R->f0 = 0.; /* not set yet */
- R->x = 0.; /* not set yet */
- R->fx = 0.; /* not set yet */
- R->dfx = 0.; /* unused */
- R->iter = 0;
- R->abs_tolerance = 1e-12;
- R->rel_tolerance = 1e-12;
- R->residual_tol = 0.;
- R->max_iter = 100;
- return R;
-
-ERROR:
- esl_rootfinder_Destroy(R);
- return NULL;
-}
-
-
-/* Function: esl_rootfinder_CreateFDF()
-* Synopsis: Creates ESL_ROOTFINDER that uses both $f(x)$, $f'(x)$
-* Incept: SRE, Tue Apr 10 20:47:42 2007 [Janelia]
-*
-* Purpose: Create a rootfinder that will find
-* a root of a function $f(x) = 0$ using first derivative
-* information $f'(x)$.
-*
-* Caller provides a pointer <*fdf()> to a function that
-* takes four arguments. The first two are the current <x>
-* value, and a void pointer to any additional parameters
-* that $f(x)$ depends on. <*fdf()> calculates the function
-* $f(x)$ and the derivative $f'(x)$ and returns them
-* through the remaining two arguments.
-*
-* Args: (*fdf)() - ptr to function that returns f(x) and f'(x)
-* params - ptr to parameters to be passed to (*fdf)()
-*
-* Returns: pointer to a new <ESL_ROOTFINDER> structure.
-*
-* Throws: <NULL> on allocation failure.
-*/
-ESL_ROOTFINDER *
-esl_rootfinder_CreateFDF(int (*fdf)(double, void*, double*, double*), void *params)
-{
- int status;
- ESL_ROOTFINDER *R = NULL;
-
- ESL_ALLOC_WITH_TYPE(R, ESL_ROOTFINDER*, sizeof(ESL_ROOTFINDER));
- R->func = NULL;
- R->fdf = fdf;
- R->params = params;
- R->xl = -eslINFINITY;
- R->fl = 0.; /* unused */
- R->xr = eslINFINITY;
- R->fr = 0.; /* unused */
- R->x0 = 0.;
- R->f0 = 0.;
- R->x = 0.; /* not set yet */
- R->fx = 0.; /* not set yet */
- R->dfx = 0.; /* not set yet */
- R->iter = 0;
- R->abs_tolerance = 1e-15;
- R->rel_tolerance = 1e-15;
- R->residual_tol = 0.;
- R->max_iter = 100;
- return R;
-
-ERROR:
- esl_rootfinder_Destroy(R);
- return NULL;
-}
-
-/* Function: esl_rootfinder_SetBrackets()
-* Incept: SRE, Wed Apr 11 08:35:10 2007 [Janelia]
-*
-* Purpose: Declare that a root is in the open interval
-* <(xl..xr)>.
-*
-* The function will be evaluated at both points.
-*
-* Args: R - rootfinder structure
-* xl,xr - root lies in open interval (xl..xr)
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEINVAL> if <xl,xr> cannot bracket a root,
-* because $f(x_l)$ and $f(x_r)$ do not have opposite
-* signs.
-*
-* Additionally, if either evaluation fails in the
-* caller-provided function, the error code from that
-* failure will be thrown.
-*/
-int
-esl_rootfinder_SetBrackets(ESL_ROOTFINDER *R, double xl, double xr)
-{
- int status;
- double dfx;
-
- R->xl = xl;
- R->xr = xr;
- if (R->func != NULL) {
- if ((status = (*R->func)(R->xl, R->params, &(R->fl))) != eslOK) return status;
- if ((status = (*R->func)(R->xr, R->params, &(R->fr))) != eslOK) return status;
- } else {
- if ((status = (*R->fdf) (R->xl, R->params, &(R->fl), &dfx)) != eslOK) return status;
- if ((status = (*R->fdf) (R->xr, R->params, &(R->fr), &dfx)) != eslOK) return status;
- }
- if (R->fl * R->fr >= 0) ESL_EXCEPTION(eslEINVAL, "xl,xr do not bracket a root");
- return eslOK;
-}
-
-int
-esl_rootfinder_SetAbsoluteTolerance(ESL_ROOTFINDER *R, double tol)
-{
- R->abs_tolerance = tol;
- return eslOK;
-}
-
-int
-esl_rootfinder_SetRelativeTolerance(ESL_ROOTFINDER *R, double tol)
-{
- R->rel_tolerance = tol;
- return eslOK;
-}
-
-int
-esl_rootfinder_SetResidualTolerance(ESL_ROOTFINDER *R, double tol)
-{
- R->residual_tol = tol;
- return eslOK;
-}
-
-int
-esl_rootfinder_SetMaxIterations(ESL_ROOTFINDER *R, int maxiter)
-{
- R->max_iter = maxiter;
- return eslOK;
-}
-
-
-void
-esl_rootfinder_Destroy(ESL_ROOTFINDER *R)
-{
- if (R == NULL) return;
- free(R);
-}
-
-
-/*****************************************************************
-* 2. One-dimensional root finding.
-*****************************************************************/
-
-/* Function: esl_root_Bisection()
-* Synopsis: Find a root of $f(x)$ by bisection method.
-* Incept: SRE, Wed Apr 11 08:40:11 2007 [Janelia]
-*
-* Purpose: Find a root in the open interval <xl..xr> by the bisection method,
-* and return it in <ret_x>.
-*
-* The bisection method is guaranteed to succeed, provided
-* that <xl>,<xr> do indeed bracket a root, though it may
-* be slow.
-*
-* The rootfinder <R> can be created either by
-* <esl_rootfinder_Create()> or
-* <esl_rootfinder_CreateFDF()>; if the latter (if the
-* function in the rootfinder <R> includes derivative
-* information), the bisection method will just ignore
-* the derivative.
-*
-* Args: R - a rootfinder object for the function
-* xl,xr - bounds of an open interval in which a root lies
-* ret_x - RETURN: a root that satisfies $f(x) = 0$.
-*
-* Returns: <eslOK> on success, and <ret_x> points to a root.
-*
-* Throws: <eslEINVAL> if <xl,xr> do not bracket a root.
-* <eslENOHALT> if the method exceeds the maximum number of
-* iterations set in <R>.
-*
-* Additionally, any failure code that the caller-provided
-* function $f(x)$ throws.
-*/
-int
-esl_root_Bisection(ESL_ROOTFINDER *R, double xl, double xr, double *ret_x)
-{
- int status;
- double xmag;
-
- if ((status = esl_rootfinder_SetBrackets(R, xl, xr)) != eslOK) return status;
-
- while (1) {
- R->iter++;
- if (R->iter > R->max_iter) ESL_EXCEPTION(eslENOHALT, "failed to converge in Bisection");
-
- /* Bisect and evaluate the function */
- R->x = (R->xl+R->xr)/2.;
- if (R->func != NULL) {
- if ((status = (*R->func)(R->x, R->params, &(R->fx))) != eslOK) return status;
- } else {
- if ((status = (*R->fdf) (R->x, R->params, &(R->fx), &(R->dfx))) != eslOK) return status;
- }
-
- /* Test for convergence */
- xmag = (R->xl < 0. && R->xr > 0.) ? 0. : R->x;
- if (R->fx == 0.) break; /* an exact root, lucky */
- if (((R->xr-R->xl) < R->abs_tolerance + R->rel_tolerance*xmag) || fabs(R->fx) < R->residual_tol) break;
-
- /* Narrow the bracket; pay attention to directionality */
- if (R->fl > 0.) {
- if (R->fx > 0.) { R->xl = R->x; R->fl = R->fx; }
- else { R->xr = R->x; R->fr = R->fx; }
- } else {
- if (R->fx < 0.) { R->xl = R->x; R->fl = R->fx; }
- else { R->xr = R->x; R->fr = R->fx; }
- }
- }
-
- *ret_x = R->x;
- return eslOK;
-}
-
-
-/* Function: esl_root_NewtonRaphson()
-* Synopsis: Find a root of $f(x)$ by Newton/Raphson method.
-* Incept: SRE, Wed Apr 11 08:56:28 2007 [Janelia]
-*
-* Purpose: Find a root by the Newton/Raphson method, starting from
-* an initial guess <guess>. Return the root in <ret_x>.
-*
-* The Newton/Raphson method is not guaranteed to succeed,
-* but when it does, it is much faster than bisection.
-*
-* Newton/Raphson uses first derivative information, so the
-* rootfinder <R> must be created with
-* <esl_rootfinder_CreateFDF()> for a function that evaluates
-* both $f(x)$ and $f'(x)$.
-*
-* Args: R - a rootfinder object for $f(x)$ and $f'(x)$
-* guess - an initial guess for the root
-* ret_x - RETURN: a root that satisfies $f(x) = 0$.
-*
-* Returns: <eslOK> on success, and <ret_x> points to a root.
-*
-* Throws: <eslENOHALT> if the method exceeds the maximum number of
-* iterations set in <R>.
-*
-* Additionally, any failure code that the caller-provided
-* function $f(x)$ throws.
-*/
-int
-esl_root_NewtonRaphson(ESL_ROOTFINDER *R, double guess, double *ret_x)
-{
- int status;
-
- R->x = guess;
- if ((status = (*R->fdf)(R->x, R->params, &(R->fx), &(R->dfx))) != eslOK) return status;
-
- while (1) {
- R->iter++;
- if (R->iter > R->max_iter) ESL_EXCEPTION(eslENOHALT, "failed to converge in Newton");
-
- /* printf("current: x=%20g f(x) = %20g f'(x) = %20g\n", R->x, R->fx, R->dfx); */
-
- /* Take a Newton/Raphson step. */
- R->x0 = R->x;
- R->f0 = R->fx;
- R->x = R->x - R->fx / R->dfx;
- (*R->fdf)(R->x, R->params, &(R->fx), &(R->dfx));
-
- /* Test for convergence. */
- if (R->fx == 0) break; /* an exact root, lucky */
- if ( (fabs((double)(R->x - R->x0)) < R->abs_tolerance + R->rel_tolerance*R->x) || fabs((double)(R->fx < R->residual_tol)) ) break;
- }
-
- *ret_x = R->x;
- return eslOK;
-}
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_rootfinder.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_rootfinder.h
deleted file mode 100644
index e46c618..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_rootfinder.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/* Finding roots of functions.
-*
-* SRE, Fri Apr 6 10:01:43 2007 [Janelia]
-* SVN $Id: esl_rootfinder.h 231 2008-03-25 14:43:57Z eddys $
-*/
-#ifndef ESL_ROOTFINDER_INCLUDED
-#define ESL_ROOTFINDER_INCLUDED
-
-#include <hmmer3/easel/esl_config.h>
-
-typedef struct {
- int (*func)(double, void*, double*);
- int (*fdf) (double, void*, double*, double*);
- void *params;
-
- double xl;
- double fl;
- double xr;
- double fr;
-
- double x0;
- double f0;
-
- double x;
- double fx;
- double dfx;
- int iter;
-
- double abs_tolerance;
- double rel_tolerance;
- double residual_tol;
- int max_iter;
-} ESL_ROOTFINDER;
-
-
-extern ESL_ROOTFINDER *esl_rootfinder_Create (int (*func)(double, void*, double*), void *params);
-extern ESL_ROOTFINDER *esl_rootfinder_CreateFDF(int (*fdf) (double, void*, double*, double*), void *params);
-
-extern int esl_rootfinder_SetBrackets(ESL_ROOTFINDER *R, double xl, double xr);
-extern int esl_rootfinder_SetAbsoluteTolerance(ESL_ROOTFINDER *R, double tol);
-extern int esl_rootfinder_SetRelativeTolerance(ESL_ROOTFINDER *R, double tol);
-extern int esl_rootfinder_SetResidualTolerance(ESL_ROOTFINDER *R, double tol);
-extern int esl_rootfinder_SetMaxIterations(ESL_ROOTFINDER *R, int maxiter);
-extern void esl_rootfinder_Destroy(ESL_ROOTFINDER *R);
-
-extern int esl_root_Bisection(ESL_ROOTFINDER *R, double xl, double xr, double *ret_x);
-extern int esl_root_NewtonRaphson(ESL_ROOTFINDER *R, double guess, double *ret_x);
-
-
-#endif /*ESL_ROOTFINDER_INCLUDED*/
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_scorematrix.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_scorematrix.cpp
deleted file mode 100644
index 050aeee..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_scorematrix.cpp
+++ /dev/null
@@ -1,974 +0,0 @@
-/* Routines for manipulating sequence alignment score matrices,
-* such as the BLOSUM and PAM matrices.
-*
-* Contents:
-* 1. The ESL_SCOREMATRIX object.
-* 2. Reading/writing score matrices.
-* 3. Interpreting score matrices probabilistically.
-* 4. Utility programs.
-* 8. License and copyright.
-*
-* SRE, Mon Apr 2 08:25:05 2007 [Janelia]
-* SVN $Id: esl_scorematrix.c 337 2009-05-12 02:13:02Z eddys $
-*/
-
-#include <hmmer3/easel/esl_config.h>
-
-#include <string.h>
-#include <math.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_dirichlet.h>
-#include <hmmer3/easel/esl_dmatrix.h>
-#include <hmmer3/easel/esl_rootfinder.h>
-#include <hmmer3/easel/esl_ratematrix.h>
-#include <hmmer3/easel/esl_scorematrix.h>
-#include <hmmer3/easel/esl_vectorops.h>
-
-/*****************************************************************
-* 1. The ESL_SCOREMATRIX object
-*****************************************************************/
-
-/* Function: esl_scorematrix_Create()
-* Synopsis: Create an <ESL_SCOREMATRIX>.
-* Incept: SRE, Mon Apr 2 08:38:10 2007 [Janelia]
-*
-* Purpose: Allocates a score matrix for alphabet <abc>, initializes
-* all scores to zero.
-*
-* Args: abc - pointer to digital alphabet
-*
-* Returns: a pointer to the new object.
-*
-* Throws: <NULL> on allocation failure.
-*/
-ESL_SCOREMATRIX *
-esl_scorematrix_Create(const ESL_ALPHABET *abc)
-{
- int status;
- int i;
- ESL_SCOREMATRIX *S = NULL;
-
- ESL_ALLOC_WITH_TYPE(S, ESL_SCOREMATRIX*, sizeof(ESL_SCOREMATRIX));
- S->s = NULL;
- S->K = abc->K;
- S->Kp = abc->Kp;
- S->isval = NULL;
- S->abc_r = abc;
- S->nc = 0;
- S->outorder = NULL;
- S->name = NULL;
- S->path = NULL;
-
- ESL_ALLOC_WITH_TYPE(S->s, int**, sizeof(int *) * abc->Kp);
- for (i = 0; i < abc->Kp; i++) S->s[i] = NULL;
- ESL_ALLOC_WITH_TYPE(S->isval, char*, sizeof(char) * abc->Kp);
- for (i = 0; i < abc->Kp; i++) S->isval[i] = FALSE;
- ESL_ALLOC_WITH_TYPE(S->outorder, char*, sizeof(char) * abc->Kp);
- S->outorder[0] = '\0'; /* init to empty string. */
-
- ESL_ALLOC_WITH_TYPE(S->s[0], int*, sizeof(int) * abc->Kp * abc->Kp);
- for (i = 1; i < abc->Kp; i++) S->s[i] = S->s[0] + abc->Kp * i;
-
- for (i = 0; i < abc->Kp*abc->Kp; i++) S->s[0][i] = 0;
- return S;
-
-ERROR:
- esl_scorematrix_Destroy(S);
- return NULL;
-}
-
-/* Function: esl_scorematrix_SetIdentity()
-* Synopsis: Set matrix to +1 match, 0 mismatch.
-* Incept: SRE, Mon Apr 16 20:17:00 2007 [Janelia]
-*
-* Purpose: Sets score matrix <S> to be +1 for a match,
-* 0 for a mismatch. <S> may be for any alphabet.
-*
-* Rarely useful in real use, but may be useful to create
-* simple examples (including debugging).
-*
-* Returns: <eslOK> on success, and the scores in <S> are set.
-*/
-int
-esl_scorematrix_SetIdentity(ESL_SCOREMATRIX *S)
-{
- int a;
- int x;
-
- for (a = 0; a < S->abc_r->Kp*S->abc_r->Kp; a++) S->s[0][a] = 0;
- for (a = 0; a < S->K; a++) S->s[a][a] = 1;
-
- for (x = 0; x < S->K; x++) S->isval[x] = TRUE;
- for (x = S->abc_r->K; x < S->Kp; x++) S->isval[x] = FALSE;
-
- strncpy(S->outorder, S->abc_r->sym, S->K);
- S->outorder[S->K] = '\0';
- S->nc = S->K;
- return eslOK;
-}
-
-/* Function: esl_scorematrix_SetBLOSUM62
-* Synopsis: Set matrix to BLOSUM62 scores.
-* Incept: SRE, Tue Apr 3 13:22:03 2007 [Janelia]
-*
-* Purpose: Set the 20x20 canonical residue scores in an
-* allocated amino acid score matrix <S> to BLOSUM62
-* scores \citep{Henikoff92}.
-*
-* Returns: <eslOK> on success, and the scores in <S> are set.
-*
-* Throws: <eslEMEM> on allocation error.
-*/
-int
-esl_scorematrix_SetBLOSUM62(ESL_SCOREMATRIX *S)
-{
- int x,y;
- static int blosum62[29][29] = {
- /* A C D E F G H I K L M N P Q R S T V W Y - B J Z O U X * ~ */
- { 4, 0, -2, -1, -2, 0, -2, -1, -1, -1, -1, -2, -1, -1, -1, 1, 0, 0, -3, -2, 0, -2, 0, -1, 0, 0, 0, -4, 0, }, /* A */
- { 0, 9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1, -1, -1, -2, -2, 0, -3, 0, -3, 0, 0, -2, -4, 0, }, /* C */
- { -2, -3, 6, 2, -3, -1, -1, -3, -1, -4, -3, 1, -1, 0, -2, 0, -1, -3, -4, -3, 0, 4, 0, 1, 0, 0, -1, -4, 0, }, /* D */
- { -1, -4, 2, 5, -3, -2, 0, -3, 1, -3, -2, 0, -1, 2, 0, 0, -1, -2, -3, -2, 0, 1, 0, 4, 0, 0, -1, -4, 0, }, /* E */
- { -2, -2, -3, -3, 6, -3, -1, 0, -3, 0, 0, -3, -4, -3, -3, -2, -2, -1, 1, 3, 0, -3, 0, -3, 0, 0, -1, -4, 0, }, /* F */
- { 0, -3, -1, -2, -3, 6, -2, -4, -2, -4, -3, 0, -2, -2, -2, 0, -2, -3, -2, -3, 0, -1, 0, -2, 0, 0, -1, -4, 0, }, /* G */
- { -2, -3, -1, 0, -1, -2, 8, -3, -1, -3, -2, 1, -2, 0, 0, -1, -2, -3, -2, 2, 0, 0, 0, 0, 0, 0, -1, -4, 0, }, /* H */
- { -1, -1, -3, -3, 0, -4, -3, 4, -3, 2, 1, -3, -3, -3, -3, -2, -1, 3, -3, -1, 0, -3, 0, -3, 0, 0, -1, -4, 0, }, /* I */
- { -1, -3, -1, 1, -3, -2, -1, -3, 5, -2, -1, 0, -1, 1, 2, 0, -1, -2, -3, -2, 0, 0, 0, 1, 0, 0, -1, -4, 0, }, /* K */
- { -1, -1, -4, -3, 0, -4, -3, 2, -2, 4, 2, -3, -3, -2, -2, -2, -1, 1, -2, -1, 0, -4, 0, -3, 0, 0, -1, -4, 0, }, /* L */
- { -1, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5, -2, -2, 0, -1, -1, -1, 1, -1, -1, 0, -3, 0, -1, 0, 0, -1, -4, 0, }, /* M */
- { -2, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 6, -2, 0, 0, 1, 0, -3, -4, -2, 0, 3, 0, 0, 0, 0, -1, -4, 0, }, /* N */
- { -1, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 7, -1, -2, -1, -1, -2, -4, -3, 0, -2, 0, -1, 0, 0, -2, -4, 0, }, /* P */
- { -1, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 5, 1, 0, -1, -2, -2, -1, 0, 0, 0, 3, 0, 0, -1, -4, 0, }, /* Q */
- { -1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 5, -1, -1, -3, -3, -2, 0, -1, 0, 0, 0, 0, -1, -4, 0, }, /* R */
- { 1, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 4, 1, -2, -3, -2, 0, 0, 0, 0, 0, 0, 0, -4, 0, }, /* S */
- { 0, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5, 0, -2, -2, 0, -1, 0, -1, 0, 0, 0, -4, 0, }, /* T */
- { 0, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 4, -3, -1, 0, -3, 0, -2, 0, 0, -1, -4, 0, }, /* V */
- { -3, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 11, 2, 0, -4, 0, -3, 0, 0, -2, -4, 0, }, /* W */
- { -2, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, 2, 7, 0, -3, 0, -2, 0, 0, -1, -4, 0, }, /* Y */
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* - */
- { -2, -3, 4, 1, -3, -1, 0, -3, 0, -4, -3, 3, -2, 0, -1, 0, -1, -3, -4, -3, 0, 4, 0, 1, 0, 0, -1, -4, 0, }, /* B */
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* J */
- { -1, -3, 1, 4, -3, -2, 0, -3, 1, -3, -1, 0, -1, 3, 0, 0, -1, -2, -3, -2, 0, 1, 0, 4, 0, 0, -1, -4, 0, }, /* Z */
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* O */
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* U */
- { 0, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, 0, 0, -1, -2, -1, 0, -1, 0, -1, 0, 0, -1, -4, 0, }, /* X */
- { -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 0, -4, 0, -4, 0, 0, -4, 1, 0, }, /* * */
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* ~ */
- };
- /* The BLOSUM62 background frequencies are the actual frequencies used to create
- * the matrix in 1992. */
- /* A C D E F G H I K L M N P Q R S T V W Y */
- /* double blosum62f[20] = { 0.074, 0.025, 0.054, 0.054, 0.047, 0.074, 0.026, 0.068, 0.058, 0.099, 0.025, 0.045, 0.039, 0.034, 0.052, 0.057, 0.051, 0.073, 0.013, 0.032 };
- */
-
- for (x = 0; x < S->K; x++) S->isval[x] = TRUE;
- for (x = S->abc_r->K; x < S->Kp; x++) S->isval[x] = FALSE;
- x = esl_abc_DigitizeSymbol(S->abc_r, 'B'); S->isval[x] = TRUE;
- x = esl_abc_DigitizeSymbol(S->abc_r, 'Z'); S->isval[x] = TRUE;
- x = esl_abc_DigitizeSymbol(S->abc_r, 'X'); S->isval[x] = TRUE;
-
- for (x = 0; x < S->Kp; x++)
- for (y = 0; y < S->Kp; y++)
- S->s[x][y] = blosum62[x][y];
-
- /* Bookkeeping necessary to be able to reproduce BLOSUM62 output format exactly, if we need to Write() */
- strcpy(S->outorder, "ARNDCQEGHILKMFPSTWYVBZX*");
- S->nc = strlen(S->outorder);
-
- if (esl_strdup("BLOSUM62", -1, &(S->name)) != eslOK) return eslEMEM;
- return eslOK;
-}
-
-
-/* Function: esl_scorematrix_SetWAG()
-* Synopsis: Set matrix using the WAG evolutionary model.
-* Incept: SRE, Thu Apr 12 13:23:28 2007 [Janelia]
-*
-* Purpose: Parameterize an amino acid score matrix <S> using the WAG
-* rate matrix \citep{WhelanGoldman01} as the underlying
-* evolutionary model, at a distance of <t>
-* substitutions/site, with scale factor <lambda>.
-*
-* Args: S - score matrix to set parameters in. Must be created for
-* an amino acid alphabet.
-* lambda - scale factor for scores
-* t - distance to exponentiate WAG to, in substitutions/site
-*
-* Returns: <eslOK> on success, and the 20x20 residue scores in <S> are set.
-*
-* Throws: <eslEINVAL> if <S> isn't an allocated amino acid score matrix.
-* <eslEMEM> on allocation failure.
-*/
-int
-esl_scorematrix_SetWAG(ESL_SCOREMATRIX *S, double lambda, double t)
-{
- int status;
- int i,j;
- ESL_DMATRIX *Q = NULL;
- ESL_DMATRIX *P = NULL;
- static const double wagpi[20] = {
- 0.086628, 0.019308, 0.057045, 0.058059,
- 0.038432, 0.083252, 0.024431, 0.048466,
- 0.062029, 0.086209, 0.019503, 0.039089,
- 0.045763, 0.036728, 0.043972, 0.069518,
- 0.061013, 0.070896, 0.014386, 0.035274
- }; // taken from esl_composition_WAG from easel.c
-
- if (S->K != 20) ESL_EXCEPTION(eslEINVAL, "Must be using an amino acid alphabet (K=20) to make WAG-based matrices");
-
- if (( Q = esl_dmatrix_Create(20, 20)) == NULL) goto ERROR;
- if (( P = esl_dmatrix_Create(20, 20)) == NULL) goto ERROR;
- // already filled wagpi in definition
- //if ( esl_composition_WAG(wagpi) != eslOK) goto ERROR;
- if ( esl_rmx_SetWAG(Q, wagpi) != eslOK) goto ERROR;
- if ( esl_dmx_Exp(Q, t, P) != eslOK) goto ERROR;
-
- for (i = 0; i < 20; i++)
- for (j = 0; j < 20; j++)
- P->mx[i][j] *= wagpi[i]; /* P_ij = P(j|i) pi_i */
-
- esl_scorematrix_SetFromProbs(S, lambda, P, wagpi, wagpi);
-
- if ((status = esl_strdup("WAG", -1, &(S->name))) != eslOK) goto ERROR;
-
- esl_dmatrix_Destroy(Q);
- esl_dmatrix_Destroy(P);
- return eslOK;
-
-ERROR:
- if (Q != NULL) esl_dmatrix_Destroy(Q);
- if (Q != NULL) esl_dmatrix_Destroy(P);
- return status;
-}
-
-
-/* Function: esl_scorematrix_SetFromProbs()
-* Synopsis: Set matrix from target and background probabilities.
-* Incept: SRE, Wed Apr 11 17:37:45 2007 [Janelia]
-*
-* Purpose: Sets the scores in a new score matrix <S> from target joint
-* probabilities in <P>, query background probabilities <fi>, and
-* target background probabilities <fj>, with scale factor <lambda>:
-* $s_{ij} = \frac{1}{\lambda} \frac{p_{ij}}{f_i f_j}$.
-*
-* Size of everything must match the canonical alphabet
-* size in <S>. That is, <S->abc->K> is the canonical
-* alphabet size of <S>; <P> must contain $K times K$
-* probabilities $P_{ij}$, and <fi>,<fj> must be vectors of
-* K probabilities. All probabilities must be nonzero.
-*
-* Args: S - score matrix to set scores in
-* lambda - scale factor
-* P - matrix of joint probabilities P_ij (KxK)
-* fi - query background probabilities (0..K-1)
-* fj - target background probabilities
-*
-* Returns: <eslOK> on success, and <S> contains the calculated score matrix.
-*/
-int
-esl_scorematrix_SetFromProbs(ESL_SCOREMATRIX *S, double lambda, const ESL_DMATRIX *P, const double *fi, const double *fj)
-{
- int i,j;
- double sc;
-
- for (i = 0; i < S->abc_r->K; i++)
- for (j = 0; j < S->abc_r->K; j++)
- {
- sc = log(P->mx[i][j] / (fi[i] * fj[j])) / lambda;
- S->s[i][j] = (int) (sc + (sc>0 ? 0.5 : -0.5)); /* that's rounding to the nearest integer */
- }
-
- for (i = 0; i < S->abc_r->K; i++)
- S->isval[i] = TRUE;
- S->nc = S->abc_r->K;
-
- strncpy(S->outorder, S->abc_r->sym, S->abc_r->K);
- S->outorder[S->nc] = '\0';
- return eslOK;
-}
-
-
-/* Function: esl_scorematrix_Copy()
-* Synopsis: Copy <src> matrix to <dest>.
-* Incept: SRE, Tue May 15 10:24:20 2007 [Janelia]
-*
-* Purpose: Copy <src> score matrix into <dest>. Caller
-* has allocated <dest> for the same alphabet as
-* <src>.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEINCOMPAT> if <dest> isn't allocated for
-* the same alphabet as <src>.
-* <eslEMEM> on allocation error.
-*/
-int
-esl_scorematrix_Copy(const ESL_SCOREMATRIX *src, ESL_SCOREMATRIX *dest)
-{
- int i,j;
- int status;
-
- if (src->abc_r->type != dest->abc_r->type || src->K != dest->K || src->Kp != dest->Kp)
- ESL_EXCEPTION(eslEINCOMPAT, "source and dest score matrix types don't match");
-
- for (i = 0; i < src->K; i++)
- for (j = 0; j < src->K; j++)
- dest->s[i][j] = src->s[i][j];
- for (i = 0; i < src->Kp; i++)
- dest->isval[i] = src->isval[i];
- dest->nc = src->nc;
- for (i = 0; i < src->nc; i++)
- dest->outorder[i] = src->outorder[i];
- dest->outorder[dest->nc] = '\0';
-
- if ((status = esl_strdup(src->name, -1, &(dest->name))) != eslOK) return status;
- if ((status = esl_strdup(src->path, -1, &(dest->path))) != eslOK) return status;
- return eslOK;
-}
-
-/* Function: esl_scorematrix_Clone()
-* Synopsis: Allocate a duplicate of a matrix.
-* Incept: SRE, Tue May 15 10:24:20 2007 [Janelia]
-*
-* Purpose: Allocates a new matrix and makes it a duplicate
-* of <S>. Return a pointer to the new matrix.
-*
-* Throws: <NULL> on allocation failure.
-*/
-ESL_SCOREMATRIX *
-esl_scorematrix_Clone(const ESL_SCOREMATRIX *S)
-{
- ESL_SCOREMATRIX *dup = NULL;
-
- if ((dup = esl_scorematrix_Create(S->abc_r)) == NULL) return NULL;
- if (esl_scorematrix_Copy(S, dup) != eslOK) { esl_scorematrix_Destroy(dup); return NULL; }
- return dup;
-}
-
-
-/* Function: esl_scorematrix_Compare()
-* Synopsis: Compare two matrices for equality.
-* Incept: SRE, Tue Apr 3 14:17:12 2007 [Janelia]
-*
-* Purpose: Compares two score matrices. Returns <eslOK> if they
-* are identical, <eslFAIL> if they differ. Every aspect
-* of the two matrices is compared.
-*
-* The annotation (name, filename path) are not
-* compared; we may want to compare an internally
-* generated scorematrix to one read from a file.
-*/
-int
-esl_scorematrix_Compare(const ESL_SCOREMATRIX *S1, const ESL_SCOREMATRIX *S2)
-{
- int a,b;
-
- if (strcmp(S1->outorder, S2->outorder) != 0) return eslFAIL;
- if (S1->nc != S2->nc) return eslFAIL;
-
- for (a = 0; a < S1->nc; a++)
- if (S1->isval[a] != S2->isval[a]) return eslFAIL;
-
- for (a = 0; a < S1->Kp; a++)
- for (b = 0; b < S1->Kp; b++)
- if (S1->s[a][b] != S2->s[a][b]) return eslFAIL;
-
- return eslOK;
-}
-
-/* Function: esl_scorematrix_CompareCanon()
-* Synopsis: Compares scores of canonical residues for equality.
-* Incept: SRE, Tue May 15 11:00:38 2007 [Janelia]
-*
-* Purpose: Compares the scores of canonical residues in
-* two score matrices <S1> and <S2> for equality.
-* Returns <eslOK> if they are identical, <eslFAIL>
-* if they differ. Peripheral aspects of the scoring matrices
-* having to do with noncanonical residues, output
-* order, and suchlike are ignored.
-*/
-int
-esl_scorematrix_CompareCanon(const ESL_SCOREMATRIX *S1, const ESL_SCOREMATRIX *S2)
-{
- int a,b;
-
- for (a = 0; a < S1->K; a++)
- for (b = 0; b < S1->K; b++)
- if (S1->s[a][b] != S2->s[a][b]) return eslFAIL;
- return eslOK;
-}
-
-
-
-/* Function: esl_scorematrix_Max()
-* Synopsis: Returns maximum value in score matrix.
-* Incept: SRE, Thu Apr 12 18:04:35 2007 [Janelia]
-*
-* Purpose: Returns the maximum value in score matrix <S>.
-*/
-int
-esl_scorematrix_Max(const ESL_SCOREMATRIX *S)
-{
- int i,j;
- int max = S->s[0][0];
-
- for (i = 0; i < S->K; i++)
- for (j = 0; j < S->K; j++)
- if (S->s[i][j] > max) max = S->s[i][j];
- return max;
-}
-
-/* Function: esl_scorematrix_Min()
-* Synopsis: Returns minimum value in score matrix.
-* Incept: SRE, Thu Apr 12 18:06:50 2007 [Janelia]
-*
-* Purpose: Returns the minimum value in score matrix <S>.
-*/
-int
-esl_scorematrix_Min(const ESL_SCOREMATRIX *S)
-{
- int i,j;
- int min = S->s[0][0];
-
- for (i = 0; i < S->K; i++)
- for (j = 0; j < S->K; j++)
- if (S->s[i][j] < min) min = S->s[i][j];
- return min;
-}
-
-
-/* Function: esl_scorematrix_IsSymmetric()
-* Synopsis: Returns <TRUE> for symmetric matrix.
-* Incept: SRE, Sat May 12 18:17:17 2007 [Janelia]
-*
-* Purpose: Returns <TRUE> if matrix <S> is symmetric,
-* or <FALSE> if it's not.
-*/
-int
-esl_scorematrix_IsSymmetric(const ESL_SCOREMATRIX *S)
-{
- int i,j;
-
- for (i = 0; i < S->K; i++)
- for (j = i; j < S->K; j++)
- if (S->s[i][j] != S->s[j][i]) return FALSE;
- return TRUE;
-}
-
-
-
-/* Function: esl_scorematrix_Destroy()
-* Synopsis: Frees a matrix.
-* Incept: SRE, Mon Apr 2 08:46:44 2007 [Janelia]
-*
-* Purpose: Frees a score matrix.
-*/
-void
-esl_scorematrix_Destroy(ESL_SCOREMATRIX *S)
-{
- if (S == NULL) return;
- if (S->s != NULL) {
- if (S->s[0] != NULL) free(S->s[0]);
- free(S->s);
- }
- if (S->isval != NULL) free(S->isval);
- if (S->outorder != NULL) free(S->outorder);
- if (S->name != NULL) free(S->name);
- if (S->path != NULL) free(S->path);
- free(S);
- return;
-}
-
-
-/*****************************************************************
-* 3. Interpreting score matrices probabilistically.
-*****************************************************************/
-
-static int set_degenerate_probs(const ESL_ALPHABET *abc, ESL_DMATRIX *P, double *fi, double *fj);
-
-
-struct lambda_params {
- const double *fi;
- const double *fj;
- const ESL_SCOREMATRIX *S;
-};
-
-static int
-lambda_fdf(double lambda, void *params, double *ret_fx, double *ret_dfx)
-{
- struct lambda_params *p = (struct lambda_params *) params;
- int i,j;
- double tmp;
-
- *ret_fx = 0.;
- *ret_dfx = 0.;
- for (i = 0; i < p->S->K; i++)
- for (j = 0; j < p->S->K; j++)
- {
- tmp = p->fi[i] * p->fj[j] * exp(lambda * (double) p->S->s[i][j]);
- *ret_fx += tmp;
- *ret_dfx += tmp * (double) p->S->s[i][j];
- }
- *ret_fx -= 1.0;
- return eslOK;
-}
-
-/* Function: esl_sco_ProbifyGivenBG()
-* Synopsis: Obtain $P_{ij}$ for matrix with known $\lambda$ and background.
-* Incept: SRE, Thu Apr 12 17:46:20 2007 [Janelia]
-*
-* Purpose: Given a score matrix <S> and known query and target
-* background frequencies <fi> and <fj>, calculate scale
-* <lambda> and implicit target probabilities \citep{Altschul01}.
-* Optionally returns either (or both) in <opt_lambda> and <opt_P>.
-*
-* The implicit target probabilities are returned in a
-* newly allocated $K \times K$ <ESL_DMATRIX>, over only
-* the canonical (typically 4 or 20) residues in the
-* residue alphabet.
-*
-* Args: S - score matrix
-* fi - background frequencies for sequence i
-* fj - background frequencies for sequence j
-* opt_lambda - optRETURN: calculated $\lambda$ parameter
-* opt_P - optRETURN: implicit target probabilities $p_{ij}$; a KxK DMATRIX.
-*
-* Returns: <eslOK> on success, <*ret_lambda> contains the
-* calculated $\lambda$ parameter, and <*ret_P> points to
-* the target probability matrix (which is allocated here,
-* and must be free'd by caller with <esl_dmatrix_Destroy(*ret_P)>.
-*
-* Throws: <eslEMEM> on allocation error;
-* <eslEINVAL> if matrix is invalid and has no solution for $\lambda$;
-* <eslENOHALT> if the solver fails to find $\lambda$.
-* In these cases, <*ret_lambda> is 0.0, and <*ret_P> is <NULL>.
-*/
-int
-esl_sco_ProbifyGivenBG(const ESL_SCOREMATRIX *S, const double *fi, const double *fj,
- double *opt_lambda, ESL_DMATRIX **opt_P)
-{
- int status;
- ESL_ROOTFINDER *R = NULL;
- ESL_DMATRIX *P = NULL;
- struct lambda_params p;
- double lambda_guess;
- double lambda;
- int i,j;
- double fx, dfx;
-
- /* First, solve for lambda by rootfinding.
- */
- /* Set up the data passed to the lambda_fdf function. */
- p.fi = fi;
- p.fj = fj;
- p.S = S;
-
- /* Bracket the root.
- * It's important that we come at the root from the far side, where
- * f(lambda) is positive; else we may identify the root we don't want
- * at lambda=0.
- */
- lambda_guess = 1. / (double) esl_scorematrix_Max(S);
- for (; lambda_guess < 50.; lambda_guess *= 2.0) {
- lambda_fdf(lambda_guess, &p, &fx, &dfx);
- if (fx > 0) break;
- }
- if (fx <= 0) ESL_EXCEPTION(eslEINVAL, "Failed to bracket root for solving lambda");
-
- /* Create a solver and find lambda by Newton/Raphson */
- if (( R = esl_rootfinder_CreateFDF(lambda_fdf, &p) ) == NULL) { status = eslEMEM; goto ERROR; }
- if (( status = esl_root_NewtonRaphson(R, lambda_guess, &lambda)) != eslOK) goto ERROR;
-
- /* Now, given solution for lambda, calculate P
- */
- if (opt_P != NULL)
- {
- if ((P = esl_dmatrix_Create(S->Kp, S->Kp)) == NULL) { status = eslEMEM; goto ERROR; }
- for (i = 0; i < S->K; i++)
- for (j = 0; j < S->K; j++)
- P->mx[i][j] = fi[i] * fj[j] * exp(lambda * (double) S->s[i][j]);
- set_degenerate_probs(S->abc_r, P, NULL, NULL);
- }
-
- esl_rootfinder_Destroy(R);
- if (opt_lambda != NULL) *opt_lambda = lambda;
- if (opt_P != NULL) *opt_P = P;
- return eslOK;
-
-ERROR:
- if (R != NULL) esl_rootfinder_Destroy(R);
- if (opt_lambda != NULL) *opt_lambda = 0.;
- if (opt_P != NULL) *opt_P = NULL;
- return status;
-
-
-}
-
-
-
-/* This section is an implementation of one of the ideas in
-* Yu and Altschul, PNAS 100:15688, 2003 [YuAltschul03]:
-* Given a valid score matrix, calculate its probabilistic
-* basis (P_ij, f_i, f_j, and lambda), on the assumption that
-* the background probabilities are the marginals of P_ij.
-*/
-struct yualtschul_params {
- ESL_DMATRIX *S; /* pointer to the KxK score matrix w/ values cast to doubles */
- ESL_DMATRIX *M; /* not a param per se: alloc'ed storage for M matrix provided to the objective function */
- ESL_DMATRIX *Y; /* likewise, alloc'ed storage for Y (M^-1) matrix provided to obj function */
-};
-
-/* yualtschul_func()
-*
-* This is the objective function we try to find a root of.
-* Its prototype is dictated by the esl_rootfinder API.
-*/
-static int
-yualtschul_func(double lambda, void *params, double *ret_fx)
-{
- int status;
- struct yualtschul_params *p = (struct yualtschul_params *) params;
- ESL_DMATRIX *S = p->S;
- ESL_DMATRIX *M = p->M;
- ESL_DMATRIX *Y = p->Y;
- int i,j;
-
- /* the M matrix has entries M_ij = e^{lambda * s_ij} */
- for (i = 0; i < S->n; i++)
- for (j = 0; j < S->n; j++)
- M->mx[i][j] = exp(lambda * S->mx[i][j]);
-
- /* the Y matrix is the inverse of M */
- if ((status = esl_dmx_Invert(M, Y)) != eslOK) return status;
-
- /* We're trying to find the root of \sum_ij Y_ij - 1 = 0 */
- *ret_fx = esl_dmx_Sum(Y) - 1.;
- return eslOK;
-}
-
-/* yualtschul_engine()
-*
-* This function backcalculates the probabilistic basis for a score
-* matrix S, when S is a double-precision matrix. Providing this
-* as a separate "engine" and writing esl_sco_Probify()
-* as a wrapper around it allows us to separately test inaccuracy
-* due to numerical performance of our linear algebra, versus
-* inaccuracy due to integer roundoff in integer scoring matrices.
-*
-* It is not uncommon for this to fail when S is derived from
-* integer scores. Because the scores may have been provided by the
-* user, and this may be our first chance to detect the "user error"
-* of an invalid matrix, this engine returns <eslENORESULT> as a normal error
-* if it can't reach a valid solution.
-*/
-static int
-yualtschul_engine(ESL_DMATRIX *S, ESL_DMATRIX *P, double *fi, double *fj, double *ret_lambda)
-{
- int status;
- ESL_ROOTFINDER *R = NULL;
- struct yualtschul_params p;
- double lambda;
- double xl, xr;
- double fx;
- int i,j;
-
- /* Set up a bisection method to find lambda */
- p.S = S;
- p.M = p.Y = NULL;
- if ((p.M = esl_dmatrix_Create(S->n, S->n)) == NULL) { status = eslEMEM; goto ERROR; }
- if ((p.Y = esl_dmatrix_Create(S->n, S->n)) == NULL) { status = eslEMEM; goto ERROR; }
- if ((R = esl_rootfinder_Create(yualtschul_func, &p)) == NULL) { status = eslEMEM; goto ERROR; }
-
- /* Need a reasonable initial guess for lambda; if we use extreme
- * lambda guesses, we'll introduce numeric instability in the
- * objective function, and may even blow up the values of e^{\lambda
- * s_ij} in the M matrix. Appears to be safe to start with lambda on
- * the order of 2/max(s_ij).
- */
- xr = 1. / esl_dmx_Max(S);
-
- /* Identify suitable brackets on lambda. */
- for (xl = xr; xl > 1e-10; xl /= 1.6) {
- if ((status = yualtschul_func(xl, &p, &fx)) != eslOK) goto ERROR;
- if (fx > 0.) break;
- }
- if (fx <= 0.) { status = eslENORESULT; goto ERROR; }
-
- for (; xr < 100.; xr *= 1.6) {
- if ((status = yualtschul_func(xr, &p, &fx)) != eslOK) goto ERROR;
- if (fx < 0.) break;
- }
- if (fx >= 0.) { status = eslENORESULT; goto ERROR; }
-
- /* Find lambda by bisection */
- if (esl_root_Bisection(R, xl, xr, &lambda) != eslOK) goto ERROR;
-
- /* Find fi, fj from Y: fi are column sums, fj are row sums */
- for (i = 0; i < S->n; i++) {
- fi[i] = 0.;
- for (j = 0; j < S->n; j++) fi[i] += p.Y->mx[j][i];
- }
- for (j = 0; j < S->n; j++) {
- fj[j] = 0.;
- for (i = 0; i < S->n; i++) fj[j] += p.Y->mx[j][i];
- }
-
- /* Find p_ij */
- for (i = 0; i < S->n; i++)
- for (j = 0; j < S->n; j++)
- P->mx[i][j] = fi[i] * fj[j] * p.M->mx[i][j];
-
- *ret_lambda = lambda;
- esl_dmatrix_Destroy(p.M);
- esl_dmatrix_Destroy(p.Y);
- esl_rootfinder_Destroy(R);
- return eslOK;
-
-ERROR:
- if (p.M != NULL) esl_dmatrix_Destroy(p.M);
- if (p.Y != NULL) esl_dmatrix_Destroy(p.Y);
- if (R != NULL) esl_rootfinder_Destroy(R);
- return status;
-}
-
-/* Function: esl_sco_Probify()
-* Synopsis: Calculate the probabilistic basis of a score matrix.
-* Incept: SRE, Wed Apr 11 07:56:44 2007 [Janelia]
-*
-* Purpose: Reverse engineering of a score matrix: given a "valid"
-* substitution matrix <S>, obtain implied joint
-* probabilities $p_{ij}$, query composition $f_i$, target
-* composition $f_j$, and scale $\lambda$, by assuming that
-* $f_i$ and $f_j$ are the appropriate marginals of $p_{ij}$.
-* Optionally return any or all of these solutions in
-* <*opt_P>, <*opt_fi>, <*opt_fj>, and <*opt_lambda>.
-*
-* The calculation is run only on canonical residue scores
-* $0..K-1$ in S, to calculate joint probabilities for all
-* canonical residues. Joint and background probabilities
-* involving degenerate residues are then calculated by
-* appropriate marginalizations.
-*
-* This implements an algorithm described in
-* \citep{YuAltschul03}.
-*
-* This algorithm works fine in principle, but when it is
-* applied to rounded integer scores with small dynamic
-* range (the typical situation for score matrices) it may
-* fail due to integer roundoff error. It works best for
-* score matrices built using small values of $\lambda$. Yu
-* and Altschul use $\lambda = 0.00635$ for BLOSUM62, which
-* amounts to scaling default BLOSUM62 up 50-fold. It
-* happens that default BLOSUM62 (which was created with
-* lambda = 0.3466, half-bits) can be successfully reverse
-* engineered (albeit with some loss of accuracy;
-* calculated lambda is 0.3240) but other common matrices
-* may fail. This failure results in a normal returned
-* error of <eslENORESULT>.
-*
-* Args: S - score matrix
-* opt_P - optRETURN: Kp X Kp matrix of implied target probs $p_{ij}$
-* opt_fi - optRETURN: vector of Kp $f_i$ background probs, 0..Kp-1
-* opt_fj - optRETURN: vector of Kp $f_j$ background probs, 0..Kp-1
-* opt_lambda - optRETURN: calculated $\lambda$ parameter
-*
-* Returns: <eslOK> on success, and <opt_P>, <opt_fi>, <opt_fj>, and <opt_lambda>
-* point to the results (for any of these that were passed non-<NULL>).
-*
-* <opt_P>, <opt_fi>, and <opt_fj>, if requested, are new
-* allocations, and must be freed by the caller.
-*
-* Returns <eslENORESULT> if the algorithm fails to determine a valid solution.
-*
-* Throws: <eslEMEM> on allocation failure.
-*
-* Xref: J1/35.
-*/
-int
-esl_sco_Probify(const ESL_SCOREMATRIX *S, ESL_DMATRIX **opt_P, double **opt_fi, double **opt_fj, double *opt_lambda)
-{
- int status;
- ESL_DMATRIX *Sd = NULL;
- ESL_DMATRIX *P = NULL;
- double *fi = NULL;
- double *fj = NULL;
- double lambda;
- int i,j;
-
- if (( Sd = esl_dmatrix_Create(S->K, S->K)) == NULL) {status = eslEMEM; goto ERROR; }
- if (( P = esl_dmatrix_Create(S->Kp, S->Kp)) == NULL) {status = eslEMEM; goto ERROR; }
- ESL_ALLOC_WITH_TYPE(fi, double*, sizeof(double) * S->Kp);
- ESL_ALLOC_WITH_TYPE(fj, double*, sizeof(double) * S->Kp);
-
- /* Construct a double-precision dmatrix from S.
- * I've tried integrating over the rounding uncertainty by
- * averaging over trials with values jittered by +/- 0.5,
- * but it doesn't appear to help much, if at all.
- */
- for (i = 0; i < S->K; i++)
- for (j = 0; j < S->K; j++)
- Sd->mx[i][j] = (double) S->s[i][j];
-
- /* Reverse engineer the doubles */
- if ((status = yualtschul_engine(Sd, P, fi, fj, &lambda)) != eslOK) goto ERROR;
-
- /* Set the degenerate probabilities by appropriate sums */
- set_degenerate_probs(S->abc_r, P, fi, fj);
-
- /* Done. */
- esl_dmatrix_Destroy(Sd);
- if (opt_P != NULL) *opt_P = P; else esl_dmatrix_Destroy(P);
- if (opt_fi != NULL) *opt_fi = fi; else free(fi);
- if (opt_fj != NULL) *opt_fj = fj; else free(fj);
- if (opt_lambda != NULL) *opt_lambda = lambda;
- return eslOK;
-
-ERROR:
- if (Sd != NULL) esl_dmatrix_Destroy(Sd);
- if (P != NULL) esl_dmatrix_Destroy(P);
- if (fi != NULL) free(fi);
- if (fj != NULL) free(fj);
- if (opt_P != NULL) *opt_P = NULL;
- if (opt_fi != NULL) *opt_fi = NULL;
- if (opt_fj != NULL) *opt_fj = NULL;
- if (opt_lambda != NULL) *opt_lambda = 0.;
- return status;
-}
-
-
-
-/* Function: esl_sco_RelEntropy()
-* Synopsis: Calculates relative entropy of a matrix.
-* Incept: SRE, Sat May 12 18:14:02 2007 [Janelia]
-*
-* Purpose: Calculates the relative entropy of score matrix <S> in
-* bits, given its background distributions <fi> and <fj> and
-* its scale <lambda>.
-*
-* Args: S - score matrix
-* fi - background freqs for sequence i
-* fj - background freqs for sequence j
-* lambda - scale factor $\lambda$ for <S>
-* ret_D - RETURN: relative entropy.
-*
-* Returns: <eslOK> on success, and <ret_D> contains the relative
-* entropy.
-*
-* Throws: <eslEMEM> on allocation error.
-* <eslEINVAL> if the implied $p_{ij}$'s don't sum to one,
-* probably indicating that <lambda> was not the correct
-* <lambda> for <S>, <fi>, and <fj>.
-* In either exception, <ret_D> is returned as 0.0.
-*/
-int
-esl_sco_RelEntropy(const ESL_SCOREMATRIX *S, const double *fi, const double *fj, double lambda, double *ret_D)
-{
- int status;
- double pij;
- double sum = 0.;
- int i,j;
- double D = 0;
-
- for (i = 0; i < S->K; i++)
- for (j = 0; j < S->K; j++)
- {
- pij = fi[i] * fj[j] * exp(lambda * (double) S->s[i][j]);
- sum += pij;
- if (pij > 0.) D += pij * log(pij / (fi[i] * fj[j]));
-
- }
- if (esl_DCompare(sum, 1.0, 1e-3) != eslOK)
- ESL_XEXCEPTION(eslEINVAL, "pij's don't sum to one: bad lambda?");
-
- D /= eslCONST_LOG2;
- *ret_D = D;
- return eslOK;
-
-ERROR:
- *ret_D = 0.;
- return status;
-}
-
-
-/* Input: P->mx[i][j] are joint probabilities p_ij for the canonical alphabet 0..abc->K-1,
-* but P matrix is allocated for Kp X Kp
-*
-* Fill in [i][j'=K..Kp-1], [i'=K..Kp-1][j], and [i'=K..Kp-1][j'=K..Kp-1] for degeneracies i',j'
-* Any p_ij involving a gap (K), nonresidue (Kp-2), or missing data (Kp-1) character is set to 0.0 by convention.
-*
-* Don't assume symmetry.
-*
-* If <fi> or <fj> background probability vectors are non-<NULL>, set them too.
-* (Corresponding to the assumption of background = marginal probs, rather than
-* background being given.)
-*/
-static int
-set_degenerate_probs(const ESL_ALPHABET *abc, ESL_DMATRIX *P, double *fi, double *fj)
-{
- int i,j,ip,jp;
-
- for (i = 0; i < abc->K; i++)
- {
- P->mx[i][abc->K] = 0.0;
- for (jp = abc->K+1; jp < abc->Kp; jp++)
- {
- P->mx[i][jp] = 0.0;
- for (j = 0; j < abc->K; j++)
- if (abc->degen[jp][j]) P->mx[i][jp] += P->mx[i][j];
- }
- P->mx[i][abc->Kp-2] = 0.0;
- P->mx[i][abc->Kp-1] = 0.0;
- }
-
- esl_vec_DSet(P->mx[abc->K], abc->Kp, 0.0); /* gap row */
-
- for (ip = abc->K+1; ip < abc->Kp-2; ip++)
- {
- for (j = 0; j < abc->K; j++)
- {
- P->mx[ip][j] = 0.0;
- for (i = 0; i < abc->K; i++)
- if (abc->degen[ip][i]) P->mx[ip][j] += P->mx[i][j];
- }
- P->mx[ip][abc->K] = 0.0;
-
- for (jp = abc->K+1; jp < abc->Kp; jp++)
- {
- P->mx[ip][jp] = 0.0;
- for (j = 0; j < abc->K; j++)
- if (abc->degen[jp][j]) P->mx[ip][jp] += P->mx[ip][j];
- }
- P->mx[ip][abc->Kp-2] = 0.0;
- P->mx[ip][abc->Kp-1] = 0.0;
- }
-
- esl_vec_DSet(P->mx[abc->Kp-2], abc->Kp, 0.0); /* nonresidue data ~ row */
- esl_vec_DSet(P->mx[abc->Kp-1], abc->Kp, 0.0); /* missing data ~ row */
-
- if (fi != NULL) { /* fi[i'] = p(i',X) */
- fi[abc->K] = 0.0;
- for (ip = abc->K+1; ip < abc->Kp-2; ip++) fi[ip] = P->mx[ip][abc->Kp-3];
- fi[abc->Kp-2] = 0.0;
- fi[abc->Kp-1] = 0.0;
- }
-
- if (fj != NULL) { /* fj[j'] = p(X,j')*/
- fj[abc->K] = 0.0;
- for (jp = abc->K+1; jp < abc->Kp-2; jp++) fj[jp] = P->mx[abc->Kp-3][jp];
- fj[abc->Kp-2] = 0.0;
- fj[abc->Kp-1] = 0.0;
- }
-
- return eslOK;
-}
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_scorematrix.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_scorematrix.h
deleted file mode 100644
index 8b1a0be..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_scorematrix.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/* Routines for manipulating sequence alignment score matrices.
-*
-* SRE, Mon Apr 2 08:33:23 2007 [Janelia]
- * SVN $Id: esl_scorematrix.h 337 2009-05-12 02:13:02Z eddys $
-*/
-#ifndef ESL_SCOREMATRIX_INCLUDED
-#define ESL_SCOREMATRIX_INCLUDED
-
-#include <hmmer3/easel/esl_alphabet.h>
-//#include <hmmer3/esl_fileparser.h>
-#include <hmmer3/easel/esl_dmatrix.h>
-#include <hmmer3/easel/esl_random.h>
-
-/* ESL_SCOREMATRIX:
-* allocation is in one array in s[0].
-*
-* i,j can range from 0..Kp-1, including all characters valid in the alphabet.
-* Only values for 0..K-1 (canonical alphabet) are mandatory.
-*/
-typedef struct {
- int **s; /* s[i][j] is the score of aligning residue i,j; i,j range 0..Kp-1 */
- int K; /* size of base alphabet (duplicate of S->abc_r->K) */
- int Kp; /* full size of s[][], including degeneracies (duplicate of S->abc_r->Kp) */
-
- /* bookkeeping for degenerate residues */
- char *isval; /* array 0..Kp-1: which residues of alphabet have valid scores in S. */
- const ESL_ALPHABET *abc_r; /* reference to the alphabet: includes K, Kp, and sym order */
-
- /* bookkeeping that lets us output exactly the residue order we read in a matrix file */
- int nc; /* number of residues with scores (inclusive of *, if present) */
- char *outorder; /* NUL-terminated string 0..nc-1 giving order of residues in col/row labels */
-
- char *name; /* optional: name of score matrix; or NULL */
- char *path; /* optional: full path to file that score matrix was read from; or NULL */
-} ESL_SCOREMATRIX;
-
-
-
-
-/* 1. The ESL_SCOREMATRIX object. */
-extern ESL_SCOREMATRIX *esl_scorematrix_Create(const ESL_ALPHABET *abc);
-extern int esl_scorematrix_SetIdentity(ESL_SCOREMATRIX *S);
-extern int esl_scorematrix_SetBLOSUM62(ESL_SCOREMATRIX *S);
-extern int esl_scorematrix_SetWAG(ESL_SCOREMATRIX *S, double lambda, double t);
-extern int esl_scorematrix_SetFromProbs(ESL_SCOREMATRIX *S, double lambda, const ESL_DMATRIX *P,
- const double *fi, const double *fj);
-extern int esl_scorematrix_Copy(const ESL_SCOREMATRIX *src, ESL_SCOREMATRIX *dest);
-extern ESL_SCOREMATRIX *esl_scorematrix_Clone(const ESL_SCOREMATRIX *S);
-extern int esl_scorematrix_Compare(const ESL_SCOREMATRIX *S1, const ESL_SCOREMATRIX *S2);
-extern int esl_scorematrix_CompareCanon(const ESL_SCOREMATRIX *S1, const ESL_SCOREMATRIX *S2);
-extern int esl_scorematrix_Max(const ESL_SCOREMATRIX *S);
-extern int esl_scorematrix_Min(const ESL_SCOREMATRIX *S);
-extern int esl_scorematrix_IsSymmetric(const ESL_SCOREMATRIX *S);
-extern void esl_scorematrix_Destroy(ESL_SCOREMATRIX *S);
-
-/* 3. Interpreting score matrices probabilistically. */
-extern int esl_sco_ProbifyGivenBG(const ESL_SCOREMATRIX *S, const double *fi, const double *fj,
- double *opt_lambda, ESL_DMATRIX **opt_P);
-extern int esl_sco_Probify(const ESL_SCOREMATRIX *S, ESL_DMATRIX **opt_P,
- double **opt_fi, double **opt_fj, double *opt_lambda);
-extern int esl_sco_RelEntropy(const ESL_SCOREMATRIX *S, const double *fi, const double *fj,
- double lambda, double *ret_D);
-
-
-
-
-
-#endif /*ESL_SCOREMATRIX_INCLUDED*/
-
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_sq.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_sq.cpp
deleted file mode 100644
index 405a59a..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_sq.cpp
+++ /dev/null
@@ -1,1848 +0,0 @@
-/* A sequence.
-*
-* Contents:
-* 1. Text version of the ESL_SQ object.
-* 2. Digitized version of the ESL_SQ object. [with <alphabet>]
-* 3. Other functions that operate on sequences.
-* 4. Getting single sequences from MSAs. [with <msa>]
-* 5. Internal functions.
-* 9. License and copyright.
-*
-* SRE, Mon Mar 31 17:18:59 2008 [Janelia]
- * SVN $Id: esl_sq.c 409 2009-10-19 23:47:18Z eddys $
-*/
-#include <hmmer3/easel/esl_config.h>
-
-#include <hmmer3/easel/easel.h>
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <ctype.h>
-
-#ifdef eslAUGMENT_ALPHABET
-#include <hmmer3/easel/esl_alphabet.h> /* alphabet aug adds digital sequences */
-#endif
-#ifdef eslAUGMENT_MSA
-#include <hmmer3/easel/esl_msa.h> /* msa aug adds ability to extract sq from an MSA */
-#endif
-#include "esl_sq.h"
-
-/* Shared parts of text/digital creation functions (defined in "internal functions" section) */
-static ESL_SQ *sq_create(int do_digital);
-static ESL_SQ *sq_create_from(const char *name, const char *desc, const char *acc);
-
-static ESL_SQ_BLOCK *sq_createblock(int count, int do_digital);
-
-static int sq_init(ESL_SQ *sq, int do_digital);
-static void sq_free(ESL_SQ *sq);
-
-/*****************************************************************
-*# 1. Text version of the <ESL_SQ> object.
-*****************************************************************/
-
-/* Function: esl_sq_Create()
-* Synopsis: Create a new, empty <ESL_SQ>.
-* Incept: SRE, Thu Dec 23 11:57:00 2004 [Zaragoza]
-*
-* Purpose: Creates an empty <ESL_SQ> sequence object, in text mode, with
-* internal fields allocated to reasonable initial sizes.
-*
-* Args: (void)
-*
-* Returns: a pointer to the new <ESL_SQ>. Caller frees this with
-* <esl_sq_Destroy()>.
-*
-* Throws: <NULL> if allocation fails.
-*/
-ESL_SQ *
-esl_sq_Create(void)
-{
- return sq_create(FALSE);
-}
-
-/* Function: esl_sq_CreateFrom()
-* Synopsis: Create a new <ESL_SQ> from text information.
-* Incept: SRE, Wed Mar 22 09:17:04 2006 [St. Louis]
-*
-* Purpose: Create a new <ESL_SQ> object in text mode from elemental data.
-* This provides an interface between non-Easel code
-* and Easel's object.
-*
-* Makes copies of all data. Caller is still
-* responsible for memory of name, seq, etc.
-*
-* <desc>, <acc>, and <ss> are optional. They can be passed
-* as <NULL> to leave them blank.
-*
-* <ss> is an optional alphabetic secondary structure
-* annotation string. If it is provided, its length must
-* match the length of <seq>.
-*
-* Args: name - name of the sequence (NUL-terminated)
-* seq - the sequence (alphabetic; NUL-terminated)
-* desc - optional: description line (or NULL)
-* acc - optional: accession (or NULL)
-* ss - optional: secondary structure annotation (or NULL)
-*
-* Returns: a pointer to the new object. Free with
-* <esl_sq_Destroy()>.
-*
-* Throws: <NULL> on allocation failure.
-*/
-// ! WARNING!!! Changed: seqLen added to parameters !
-ESL_SQ *
-esl_sq_CreateFrom(const char *name, const char *seq, int seqLen, const char *desc, const char *acc, const char *ss)
-{
- ESL_SQ *sq = NULL;
- int status;
-
- seqLen = ( 0 >= seqLen )? strlen( seq ) : seqLen;
- if ((sq = sq_create_from(name, desc, acc)) == NULL) goto ERROR;
- if ((status = esl_strdup(seq, seqLen, &(sq->seq))) != eslOK) goto ERROR;
-
- if (ss != NULL)
- {
- if (strlen(ss) != seqLen) ESL_XEXCEPTION(eslEINVAL, "ss, seq lengths mismatch");
- if ((status = esl_strdup(ss, seqLen, &(sq->ss))) != eslOK) goto ERROR;
- }
- else sq->ss = NULL;
-
- sq->n = seqLen;
- sq->salloc = seqLen+1;
-
- /* We assume we've created a complete sequence; set the coord bookkeeping accordingly. */
- sq->start = 1;
- sq->end = seqLen;
- sq->C = 0;
- sq->W = seqLen;
- sq->L = seqLen;
- return sq;
-
-ERROR:
- esl_sq_Destroy(sq);
- return NULL;
-}
-
-/* Function: esl_sq_Grow()
-* Synopsis: Assure that a <ESL_SQ> has space to add more residues.
-* Incept: SRE, Wed Jan 10 08:26:23 2007 [Janelia]
-*
-* Purpose: Assure that the sequence <sq> can hold at least
-* one more residue, whether in digital or text mode.
-* Reallocate if necessary. Optionally returns the number
-* of residues that can be added before the next call
-* to <esl_sq_Grow()> in <opt_nsafe>.
-*
-* The terminal <NUL> or sentinel count as a residue for
-* allocation purposes: that is, you may need to call
-* <esl_sq_Grow()> before terminating a new sequence.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on reallocation failure. In this case, the
-* original <sq> is untouched, and <*opt_nsafe> is returned
-* as 0.
-*
-* Xref: STL11/125.
-*/
-int
-esl_sq_Grow(ESL_SQ *sq, int64_t *opt_nsafe)
-{
- void *tmp;
- int64_t newSz;
- int64_t nsafe;
- int status;
-
- if (sq->seq != NULL) nsafe = sq->salloc - sq->n; /* text */
- else nsafe = (sq->salloc-1) - sq->n; /* digital: -1 because 0 is a sentinel */
-
- if (nsafe < 1)
- { /* reallocate by doubling (shouldn't need more, but if we do, keep doubling) */
- newSz = sq->salloc;
- do { nsafe += newSz; newSz*=2; } while (nsafe < 1);
-
- if (sq->seq != NULL) ESL_RALLOC_WITH_TYPE(sq->seq, char*, tmp, newSz * sizeof(char)); /* text */
- else ESL_RALLOC_WITH_TYPE(sq->dsq, ESL_DSQ*, tmp, newSz * sizeof(ESL_DSQ)); /* digital */
- if (sq->ss != NULL) ESL_RALLOC_WITH_TYPE(sq->ss, char*, tmp, newSz * sizeof(char));
- sq->salloc = newSz;
- }
- if (opt_nsafe != NULL) *opt_nsafe = nsafe;
- return eslOK;
-
-ERROR:
- if (opt_nsafe != NULL) *opt_nsafe = 0;
- return status;
-}
-
-/* Function: esl_sq_GrowTo()
-* Synopsis: Grows an <ESL_SQ> to hold a seq of at least <n> residues.
-* Incept: SRE, Fri Jan 18 11:06:50 2008 [UA5233 Westchester-Dulles]
-*
-* Purpose: Assure that the appropriate (text or digital) sequence
-* field in <sq> can hold up to a total of <n> residues,
-* reallocating as needed.
-*
-* If reallocated, the allocation will be $\geq (n+1)$ for
-* text mode (the +1 is for the terminal NUL byte), $\geq
-* (n+2)$ for digital mode (+2 for sentinel bytes at each
-* end). That is, you don't need to take these extra bytes into
-* account in your <n>; <n> is the number of residues, not
-* bytes.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure.
-*
-* Note that n=0 is fine here, because we'll allocate either n+1 or n+2.
-*/
-int
-esl_sq_GrowTo(ESL_SQ *sq, int64_t n)
-{
- void *tmp;
- int status;
-
- if (sq->seq != NULL) /* text mode */
- {
- if (n+1 > sq->salloc) {
- ESL_RALLOC_WITH_TYPE(sq->seq, char*, tmp, (n+1) * sizeof(char));
- if (sq->ss != NULL) ESL_RALLOC_WITH_TYPE(sq->ss, char*, tmp, (n+1) * sizeof(char));
- sq->salloc = n+1;
- }
- }
- else /* digital mode */
- {
- if (n+2 > sq->salloc) {
- ESL_RALLOC_WITH_TYPE(sq->dsq, ESL_DSQ*, tmp, (n+2) * sizeof(ESL_DSQ));
- if (sq->ss != NULL) ESL_RALLOC_WITH_TYPE(sq->ss, char*, tmp, (n+2) * sizeof(char));
- sq->salloc = n+2;
- }
- }
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-/* Function: esl_sq_Copy()
-* Synopsis: Make a copy of an <ESL_SQ>.
-* Incept: SRE, Sun Feb 24 17:59:24 2008 [UA5315 to St. Louis]
-*
-* Purpose: Copies a source sequence object <src> into
-* destination sequence object <dst>.
-*
-* The two objects don't have to be matched as far as
-* text/digital mode go; if mismatched, appropriate
-* text/digital conversion will be done.
-*
-* The destination sequence <dst> is reallocated internally
-* as necessary to hold a copy of <src>.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure.
-*
-* Note: Note the shenanigans involved in copying ss; you have
-* to pay attention to the ss being a 0..n-1 string in text
-* mode versus a 1..n string in digital mode.
-*/
-int
-esl_sq_Copy(const ESL_SQ *src, ESL_SQ *dst)
-{
- int status;
-
- /* If <src> has structure annotation and <dst> does not, initialize an allocation in <dst> */
- if (src->ss != NULL && dst->ss == NULL) ESL_ALLOC_WITH_TYPE(dst->ss, char*, sizeof(char) * dst->salloc);
-
- if ((status = esl_sq_SetName (dst, src->name)) != eslOK) goto ERROR;
- if ((status = esl_sq_SetSource (dst, src->source)) != eslOK) goto ERROR;
- if ((status = esl_sq_SetAccession(dst, src->acc)) != eslOK) goto ERROR;
- if ((status = esl_sq_SetDesc (dst, src->desc)) != eslOK) goto ERROR;
- if ((status = esl_sq_GrowTo (dst, src->n)) != eslOK) goto ERROR;
-
- if (src->seq != NULL && dst->seq != NULL) /* text to text */
- {
- strcpy(dst->seq, src->seq);
- if (src->ss != NULL) strcpy(dst->ss, src->ss);
- }
-#ifdef eslAUGMENT_ALPHABET
- else if (src->seq != NULL && dst->dsq != NULL) /* text to digital */
- {
- if ((status = esl_abc_Digitize(dst->abc, src->seq, dst->dsq)) != eslOK) goto ERROR;
- if (src->ss != NULL) {
- strcpy(dst->ss+1, src->ss);
- dst->ss[0] = '\0';
- }
- }
- else if (src->dsq != NULL && dst->seq != NULL) /* digital to text */
- {
- if ((status = esl_abc_Textize(src->abc, src->dsq, src->n, dst->seq)) != eslOK) goto ERROR;
- if (src->ss != NULL) strcpy(dst->ss, src->ss+1);
- }
- else /* digital to digital */
- {
- if (src->abc->type != dst->abc->type)
- ESL_XEXCEPTION(eslEINCOMPAT, "seq objects involved in Copy differ in digital alphabet");
- if ((status = esl_abc_dsqcpy(src->dsq, src->n, dst->dsq)) != eslOK) goto ERROR;
- if (src->ss != NULL) {
- strcpy(dst->ss+1, src->ss+1);
- dst->ss[0] = '\0';
- }
- }
-#endif
-
- dst->n = src->n;
- dst->start = src->start;
- dst->end = src->end;
- dst->C = src->C;
- dst->W = src->W;
- dst->L = src->L;
- /* don't copy allocations (nalloc, etc); dst knows its own memory */
- dst->roff = src->roff;
- dst->doff = src->doff;
- dst->hoff = src->hoff;
- dst->eoff = src->eoff;
- return eslOK;
-
-ERROR:
- esl_sq_Reuse(dst);
- return status;
-}
-
-/* Function: esl_sq_Compare()
-* Synopsis: Compare two sequence objects for equality.
-* Incept: SRE, Tue May 13 09:00:41 2008 [Janelia]
-*
-* Purpose: Compare the contents of two sequence objects <sq1>
-* and <sq2> for equality.
-*
-* Disk offsets are only compared if they are set in both
-* <sq1> and <sq2>. Allocation sizes are not compared at
-* all.
-*
-* Returns: <eslOK> if identical, <eslFAIL> if not.
-*/
-int
-esl_sq_Compare(ESL_SQ *sq1, ESL_SQ *sq2)
-{
- /* Annotation comparison */
- if (strcmp(sq1->name, sq2->name) != 0) return eslFAIL;
- if (strcmp(sq1->acc, sq2->acc) != 0) return eslFAIL;
- if (strcmp(sq1->desc, sq2->desc) != 0) return eslFAIL;
- if (strcmp(sq1->source, sq2->source) != 0) return eslFAIL;
- if (sq1->ss != NULL && sq2->ss != NULL) {
- if (strcmp(sq1->ss, sq2->ss) != 0) return eslFAIL;
- } else
- if (sq1->ss != NULL || sq2->ss != NULL) return eslFAIL;
- if (sq1->n != sq2->n) return eslFAIL;
-
- /* Sequence comparison */
- if (sq1->seq != NULL && sq2->seq != NULL) {
- if (strcmp(sq1->seq, sq2->seq) != 0) return eslFAIL;
- }
-#ifdef eslAUGMENT_ALPHABET
- else if (sq1->dsq != NULL && sq2->dsq != NULL) {
- if (memcmp(sq1->dsq, sq2->dsq, sizeof(ESL_DSQ) * (sq1->n+2)) != 0) return eslFAIL;
- }
-#endif
- else return eslFAIL;
-
- /* Coordinate comparison */
- if (sq1->start != sq2->start) return eslFAIL;
- if (sq1->end != sq2->end) return eslFAIL;
- if (sq1->C != sq2->C) return eslFAIL;
- if (sq1->W != sq2->W) return eslFAIL;
- if (sq1->L != sq2->L) return eslFAIL;
-
- /* Disk offset comparison */
- if (sq1->roff != -1 && sq2->roff != -1 && sq1->roff != sq2->roff) return eslFAIL;
- if (sq1->doff != -1 && sq2->doff != -1 && sq1->doff != sq2->doff) return eslFAIL;
- if (sq1->hoff != -1 && sq2->hoff != -1 && sq1->hoff != sq2->hoff) return eslFAIL;
- if (sq1->eoff != -1 && sq2->eoff != -1 && sq1->eoff != sq2->eoff) return eslFAIL;
-
- /* alphabet comparison */
-#ifdef eslAUGMENT_ALPHABET
- if (sq1->abc != NULL && (sq1->abc->type != sq2->abc->type)) return eslFAIL;
-#endif
- return eslOK;
-}
-
-
-
-/* Function: esl_sq_Reuse()
-* Synopsis: Reinitialize an <ESL_SQ> for re-use.
-* Incept: SRE, Thu Dec 23 12:23:51 2004 [Zaragoza]
-*
-* Purpose: Given a sequence object <sq> already in use;
-* reinitialize all its data, so a new seq
-* may be read into it. This allows sequential sequence
-* input without a lot of wasted allocation/free cycling.
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_sq_Reuse(ESL_SQ *sq)
-{
- sq->name[0] = '\0';
- sq->acc[0] = '\0';
- sq->desc[0] = '\0';
- sq->tax_id = -1;
- sq->source[0] = '\0';
- if (sq->seq != NULL) sq->seq[0] = '\0';
- if (sq->dsq != NULL) sq->dsq[0] = sq->dsq[1] = eslDSQ_SENTINEL;
- if (sq->ss != NULL) {
- if (sq->seq != NULL) sq->ss[0] = '\0';
- else sq->ss[0] = sq->ss[1] = '\0'; /* in digital mode, ss string is 1..n; 0 is a dummy \0 byte*/
- }
- sq->n = 0;
- sq->start = 0;
- sq->end = 0;
- sq->C = 0;
- sq->W = 0;
- sq->L = -1;
- sq->idx = -1;
- sq->doff = -1;
- sq->hoff = -1;
- sq->roff = -1;
- sq->eoff = -1;
- return eslOK;
-}
-
-/* Function: esl_sq_IsDigital()
-* Synopsis: Return <TRUE> if <sq> is digital.
-* Incept: SRE, Mon Mar 2 18:05:34 2009 [Casa de Gatos]
-*
-* Purpose: Return <TRUE> if <sq> is in digital mode,
-* and <FALSE> if not.
-*/
-int
-esl_sq_IsDigital(const ESL_SQ *sq)
-{
- return ((sq->dsq != NULL) ? TRUE : FALSE);
-}
-
-
-/* Function: esl_sq_IsText()
-* Synopsis: Return <TRUE> if <sq> is text mode.
-* Incept: SRE, Mon Mar 2 18:06:22 2009 [Casa de Gatos]
-*
-* Purpose: Return <TRUE> if <sq> is in text mode,
-* and <FALSE> if not.
-*/
-int
-esl_sq_IsText(const ESL_SQ *sq)
-{
- return ((sq->seq != NULL) ? TRUE : FALSE);
-}
-
-
-/* Function: esl_sq_Destroy()
-* Synopsis: Frees an <ESL_SQ>.
-* Incept: SRE, Thu Dec 23 12:28:07 2004 [Zaragoza]
-*
-* Purpose: Free a Create()'d <sq>.
-*/
-void
-esl_sq_Destroy(ESL_SQ *sq)
-{
- if (sq == NULL) return;
-
- if (sq->name != NULL) free(sq->name);
- if (sq->acc != NULL) free(sq->acc);
- if (sq->desc != NULL) free(sq->desc);
- if (sq->seq != NULL) free(sq->seq);
- if (sq->dsq != NULL) free(sq->dsq);
- if (sq->ss != NULL) free(sq->ss);
- if (sq->source != NULL) free(sq->source);
- free(sq);
- return;
-}
-
-/* Function: esl_sq_CreateBlock()
- * Synopsis: Create a new block of empty <ESL_SQ>.
- * Incept:
- *
- * Purpose: Creates a block of empty <ESL_SQ> sequence objects.
- *
- * Returns: a pointer to the new <ESL_SQ_BLOCK>. Caller frees this
- * with <esl_sq_DestroyBlock()>.
- *
- * Throws: <NULL> if allocation fails.
- */
-ESL_SQ_BLOCK *
-esl_sq_CreateBlock(int count)
-{
- return sq_createblock(count, FALSE);
-}
-
-/* Function: esl_sq_DestroyBlock()
- * Synopsis: Frees an <ESL_SQ_BLOCK>.
- * Incept:
- *
- * Purpose: Free a Create()'d block of <sq>.
- */
-void
-esl_sq_DestroyBlock(ESL_SQ_BLOCK *block)
-{
- int i;
-
- if (block == NULL) return;
-
- for (i = 0; i < block->listSize; ++i)
- {
- sq_free(block->list + i);
- }
-
- free(block->list);
- free(block);
- return;
-}
-
-#ifdef eslAUGMENT_ALPHABET
-
-/* Function: esl_sq_CreateDigitalBlock()
- * Synopsis: Create a new block of empty <ESL_SQ> in digital mode.
- * Incept:
- *
- * Purpose: Same as <esl_sq_CreateBlock()>, except the returned <sq>
- * is configured for a digital sequence using internal
- * alphabet <abc>, rather than a text sequence. Creates an
- * empty digital <ESL_SQ> sequence object, with internal
- * fields allocated to reasonable initial sizes.
- *
- * Returns: a pointer to the new <ESL_SQ_BLOCK>. Caller frees this with
- * <esl_sq_DestroyBlock()>.
- *
- * Throws: <NULL> if an allocation fails.
- *
- * Xref:
- */
-ESL_SQ_BLOCK *
-esl_sq_CreateDigitalBlock(int count, const ESL_ALPHABET *abc)
-{
- int i;
- ESL_SQ_BLOCK *block;
-
- if ((block = sq_createblock(count, TRUE)) == NULL) return NULL;
-
- for (i = 0; i < count; ++i)
- {
- block->list[i].abc = abc;
- }
-
- return block;
-}
-
-#endif /* eslAUGMENT_ALPHABET */
-
-/*--------------- end of ESL_SQ object functions ----------------*/
-
-
-
-
-/*****************************************************************
-*# 2. Digitized version of the <ESL_SQ> object. (Requires <alphabet>)
-*****************************************************************/
-#ifdef eslAUGMENT_ALPHABET
-
-/* Function: esl_sq_CreateDigital()
-* Synopsis: Create a new, empty <ESL_SQ> in digital mode.
-* Incept: SRE, Tue Jan 9 16:42:35 2007 [Janelia]
-*
-* Purpose: Same as <esl_sq_Create()>, except the returned <sq> is
-* configured for a digital sequence using internal
-* alphabet <abc>, rather than a text sequence. Creates an
-* empty digital <ESL_SQ> sequence object, with internal
-* fields allocated to reasonable initial sizes.
-*
-* Args: abc - pointer to internal alphabet
-*
-* Returns: a pointer to the new <ESL_SQ>. Caller frees this with
-* <esl_sq_Destroy()>.
-*
-* Throws: <NULL> if an allocation fails.
-*
-* Xref: STL11/124
-*/
-ESL_SQ *
-esl_sq_CreateDigital(const ESL_ALPHABET *abc)
-{
- ESL_SQ *s;
- if ((s = sq_create(TRUE)) == NULL) return NULL;
- s->abc = abc;
- return s;
-}
-
-/* Function: esl_sq_CreateDigitalFrom()
-* Synopsis: Create a new digital <ESL_SQ> from text info.
-* Incept: EPN, Fri Aug 24 13:38:56 2007
-*
-* Purpose: Create a new <ESL_SQ> object from elemental data.
-* Same as <esl_sq_CreateFrom> except takes a digital <ESL_DSQ *dsq>
-* instead of a text <char *seq> as the sequence to copy.
-*
-* Makes copies of all data. Caller is still
-* responsible for memory of name, seq, etc.
-*
-* <ss> is an optional alphabetic secondary structure
-* annotation string, <0..n-1>. If provided, its length
-* must match the length of <seq>. (Note that although the
-* argument <ss> is provided as a standard <0..n-1> C
-* string, <ss> is stored internally as a <1..n> string in
-* a digital sequence object, so that both the digital
-* sequence and its alphabetic secondary structure
-* annotation are indexed the same.)
-*
-* The object is growable; you can use <esl_sq_Reuse()>
-* on it.
-*
-* Args: abc - the digital alphabet
-* name - name of the sequence
-* dsq - digital sequence <1..L>
-* n - length of digitized sequence in residues (or -1 if unknown)
-* desc - optional: description line (or NULL)
-* acc - optional: accession (or NULL)
-* ss - optional: secondary structure annotation (or NULL)
-*
-* Returns: a pointer to the new object. Free with
-* <esl_sq_Destroy()>.
-*
-* Throws: <NULL> on allocation failure.
-*/
-ESL_SQ *
-esl_sq_CreateDigitalFrom(const ESL_ALPHABET *abc, const char *name, const ESL_DSQ *dsq, int64_t n,
- const char *desc, const char *acc, const char *ss)
-{
- ESL_SQ *sq = NULL;
- int status;
-
- if((sq = sq_create_from(name, desc, acc)) == NULL) goto ERROR;
- sq->n = (n == -1) ? esl_abc_dsqlen(dsq) : n;
- if ((status = esl_abc_dsqdup(dsq, sq->n, &(sq->dsq))) != eslOK) goto ERROR;
-
- if (ss != NULL)
- {
- if (strlen(ss) != sq->n) ESL_XEXCEPTION(eslEINVAL, "ss, seq lengths mismatch");
- ESL_ALLOC_WITH_TYPE(sq->ss, char*, sizeof(char) * (sq->n+2));
- sq->ss[0] = '\0';
- strcpy(sq->ss+1, ss);
- }
-
- /* We assume we've created a complete sequence; set the coord bookkeeping accordingly. */
- sq->start = 1;
- sq->end = n;
- sq->C = 0;
- sq->W = n;
- sq->L = n;
-
- sq->salloc = sq->n+2;
- sq->abc = abc;
- return sq;
-
-ERROR:
- esl_sq_Destroy(sq);
- return NULL;
-}
-
-
-/* Function: esl_sq_Digitize()
-* Synopsis: Convert an <ESL_SQ> to digital mode.
-* Incept: EPN, Mon Feb 12 11:09:06 2007
-*
-* Purpose: Given a sequence <sq> in text mode, convert it to
-* digital mode, using alphabet <abc>.
-*
-* Internally, the <dsq> digital sequence field is filled,
-* the <seq> text alignment field is destroyed and free'd,
-* a copy of the alphabet pointer is kept in the sq's
-* <abc> reference.
-*
-* Args: abc - digital alphabet
-* sq - sequence to digitize
-*
-* Returns: <eslOK> on success.
-* Returns <eslEINVAL> if the sequence contains invalid characters
-* that can't be digitized. If this happens, the <sq> is returned
-* unaltered - left in text mode, with <seq> as it was. (This is
-* a normal error, because <sq->seq> may be user input that we
-* haven't validated yet.)
-*
-* Throws: <eslEMEM> on allocation failure; in this case, state of <sq> may be
-* wedged, and it should only be destroyed, not used.
-*/
-int
-esl_sq_Digitize(const ESL_ALPHABET *abc, ESL_SQ *sq)
-{
- int status;
-
- /* Contract checks */
- if (sq->dsq != NULL) return eslOK;
- if (sq->seq == NULL) ESL_EXCEPTION(eslEINVAL, "sq has no text sequence");
-
- /* Validate before we convert, so we leave <seq> untouched if it's bad. */
- if (esl_abc_ValidateSeq(abc, sq->seq, sq->n, NULL) != eslOK) return eslEINVAL;
-
- /* Allocate dsq, ss properly; these are our last failure points. */
- /* You can't just call Grow() here, because it would grow for old text mode, not new digital */
- if (sq->salloc < sq->n+2) { /* it's possible (though unlikely) for salloc to be 1 residue too small */
- sq->salloc = sq->n+2;
- if (sq->ss != NULL) {
- void *tmp;
- ESL_RALLOC_WITH_TYPE(sq->ss, char*, tmp, sizeof(char) * sq->salloc);
- }
- }
- ESL_ALLOC_WITH_TYPE(sq->dsq, ESL_DSQ*, (sq->salloc) * sizeof(ESL_DSQ));
-
- /* Now convert. */
- if ((status = esl_abc_Digitize(abc, sq->seq, sq->dsq)) != eslOK) goto ERROR;
- if (sq->ss != NULL) {
- memmove(sq->ss+1, sq->ss, sq->n+1);
- sq->ss[0] = '\0';
- }
- free(sq->seq);
- sq->seq = NULL;
- sq->abc = abc;
- return eslOK;
-
-ERROR:
- if (sq->dsq != NULL) free(sq->dsq);
- return status;
-}
-
-/* Function: esl_sq_Textize()
-* Synopsis: Convert an <ESL_SQ> to text mode.
-* Incept: EPN, Mon Feb 12 11:15:06 2007
-*
-* Purpose: Given a sequence <sq> in digital mode, convert it
-* to text mode.
-*
-* Internally, the <seq> text alignment field is filled, the
-* <dsq> digital alignment field is destroyed and free'd, the
-* sq's <abc> digital alphabet reference is nullified.
-*
-* Args: sq - sequence object to convert to text
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure.
-* Throws <eslECORRUPT> if the digital sequence contains
-* invalid codes.
-*/
-int
-esl_sq_Textize(ESL_SQ *sq)
-{
- int status;
-
- /* Contract checks */
- if (sq->seq != NULL) return eslOK;
- if (sq->dsq == NULL) ESL_EXCEPTION(eslEINVAL, "sq has no digital sequence");
- if (sq->abc == NULL) ESL_EXCEPTION(eslEINVAL, "sq has no digital alphabet");
-
- /* Allocate. salloc is guaranteed big enough, if it was big enough for digital. */
- ESL_ALLOC_WITH_TYPE(sq->seq, char*, sq->salloc * sizeof(char));
-
- /* Convert. */
- if ((status = esl_abc_Textize(sq->abc, sq->dsq, sq->n, sq->seq)) != eslOK) goto ERROR;
- if (sq->ss != NULL)
- memmove(sq->ss, sq->ss+1, sq->n+1); /* slide back to 0..n-1; +1 includes terminal \0 */
-
- free(sq->dsq);
- sq->dsq = NULL;
- sq->abc = NULL; /* nullify reference (caller still owns real abc) */
- return eslOK;
-
-ERROR:
- if (sq->seq != NULL) free(sq->seq);
- return status;
-}
-
-/* Function: esl_sq_GuessAlphabet()
-* Synopsis: Guess alphabet type of a single sequence.
-* Incept: SRE, Wed May 16 11:03:44 2007 [Janelia]
-*
-* Purpose: Guess the alphabet type of biosequence <sq>, and store the
-* guess in <*ret_type>.
-*
-* All 26 letters are valid in the amino alphabet (even <O>
-* and <J> now), so the DNA alphabet is necessarily a subset.
-* Therefore most protein sequences can be identified
-* unambiguously (because they use letters that only occur
-* in amino acid sequence), but DNA sequences cannot be.
-*
-* The sequence must contain more than 10 residues, or it
-* is called <eslUNKNOWN>.
-*
-* Specifically, this routine calls the sequence <eslDNA>
-* if it consists only of the residues <ACGTN> and all four
-* of <ACGT> occur. (And analogously for <eslRNA>,
-* <ACGUN>.) It calls the sequence <eslAMINO> either if it
-* contains an amino-specific letter (<EFIJLOPQZ>), or if
-* it contains at least 15 of the 20 canonical amino acids
-* and consists only of canonical amino acids or <X>.
-
-* Thus DNA sequences containing IUPAC degeneracies other
-* than N are called <eslUNKNOWN>, rather than hazarding a
-* guess. It may be possible to improve on this in the
-* future by using residue occurrence frequencies.
-*
-* Note that a sequence of <ATATATA...> will be called
-* <eslUNKNOWN>, whereas a sequence <ACGTACGTACGT...>
-* (which could conceivably be "ala-cys-gly-thr...") will
-* be called <eslDNA>. Peptides of simple mono and di-amino
-* acid compositions do occur, but I have not (yet) seen a
-* peptide consisting only of all four residues <ACGT>.
-*
-* The routine is designed to be conservative, calling
-* <eslUNKNOWN> rather than making errors. In a test on the
-* Oct 2006 version of the NCBI nonredundant databases,
-* this routine called 0 <eslDNA> and 5694 <eslUNKNOWN> on
-* 4.0M protein sequences (99.9\% classification with no
-* false positives) and 0 <eslAMINO> and 155756
-* <eslUNKNOWN> in 4.4M DNA sequences (96\% classification
-* with no false positives). (Well, actually, one DNA call
-* was made in the protein database, but this was an
-* exception that proves the rule; that entry was indeed a
-* DNA contaminant. It has since been removed by NCBI.)
-*
-* Returns: <eslOK> on success, and <*ret_type> is set to
-* <eslAMINO>, <eslRNA>, or <eslDNA>.
-*
-* Returns <eslEAMBIGUOUS> if unable to determine the
-* alphabet type; in this case, <*ret_type> is set to
-* <eslUNKNOWN>.
-*
-* Xref: J1/62; 2007/0517-easel-guess-alphabet
-*/
-int
-esl_sq_GuessAlphabet(ESL_SQ *sq, int *ret_type)
-{
- int64_t ct[26];
- int x;
- int64_t i;
- int64_t n = 0;
-
- for (x = 0; x < 26; x++) ct[x] = 0;
- for (i = 0; i < sq->n; i++) {
- x = toupper(sq->seq[i]) - 'A';
- if (x < 0 || x > 26) continue;
- ct[x]++;
- n++;
- if (n > 10000) break; /* we oughta know by now! */
- }
- return esl_abc_GuessAlphabet(ct, ret_type);
-}
-#endif /*eslAUGMENT_ALPHABET*/
-
-/*---------- end of digitized ESL_SQ object functions -----------*/
-
-
-
-/*****************************************************************
-*# 3. Other functions that operate on sequences.
-*****************************************************************/
-
-/* Function: esl_sq_SetName()
- * Synopsis: Set the name of a sequence.
-* Incept: SRE, Thu Jan 11 08:42:53 2007 [Janelia]
-*
-* Purpose: Set the name of the sequence <sq> to <name>, reallocating
- * as needed. For example, <esl_sq_SetName(sq, "random")>.
-*
-* A copy of <name> is made, so if caller had <name> allocated,
-* it is still responsible for freeing it.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation error.
-*
-* Xref: STL11/125
-*/
-int
-esl_sq_SetName(ESL_SQ *sq, const char *name)
-{
- int n;
- void *tmp;
- int status;
-
- if (name == NULL) { sq->name[0] = '\0'; return eslOK; }
-
- n = strlen(name);
- if (n >= sq->nalloc)
- {
- ESL_RALLOC_WITH_TYPE(sq->name, char*, tmp, sizeof(char) * (n+1));
- sq->nalloc = n+1;
- }
- strcpy(sq->name, name);
- return eslOK;
-
-ERROR:
- return status;
-}
-
-/* Function: esl_sq_SetAccession()
- * Synopsis: Set the accession field in a sequence.
-* Incept: SRE, Fri Jan 18 09:48:54 2008 [Westchester airport]
-*
-* Purpose: Set the accession of the sequence <sq> to <acc>, reallocating
- * as needed. For example, <esl_sq_SetAccession(sq, "ACC12356")>.
-*
-* A copy of <acc> is made, so if caller had <acc> allocated,
-* it is still responsible for freeing it.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation error.
-*
-* Xref: STL11/125
-*/
-int
-esl_sq_SetAccession(ESL_SQ *sq, const char *acc)
-{
- int n;
- void *tmp;
- int status;
-
- if (acc == NULL) { sq->acc[0] = '\0'; return eslOK; }
-
- n = strlen(acc);
- if (n >= sq->aalloc)
- {
- ESL_RALLOC_WITH_TYPE(sq->acc, char*, tmp, sizeof(char) * (n+1));
- sq->aalloc = n+1;
- }
- strcpy(sq->acc, acc);
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-/* Function: esl_sq_SetDesc()
- * Synopsis: Set the description field in a sequence.
-* Incept: SRE, Fri Jan 18 09:46:14 2008 [Westchester airport]
-*
-* Purpose: Set the description of the sequence <sq> to <desc>, reallocating
- * as needed.
- * For example, <esl_sq_SetDesc(sq, "this is a random sequence")>.
-*
-* A copy of <desc> is made, so if caller had <desc> allocated,
-* it is still responsible for freeing it.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation error.
-*
-* Xref: STL11/125
-*/
-int
-esl_sq_SetDesc(ESL_SQ *sq, const char *desc)
-{
- int n;
- void *tmp;
- int status;
-
- if (desc == NULL) { sq->desc[0] = '\0'; return eslOK; }
-
- n = strlen(desc);
- if (n >= sq->dalloc)
- {
- ESL_RALLOC_WITH_TYPE(sq->desc, char*, tmp, sizeof(char) * (n+1));
- sq->dalloc = n+1;
- }
- strcpy(sq->desc, desc);
- return eslOK;
-
- ERROR:
- return status;
-}
-
-/* Function: esl_sq_SetSource()
- * Synopsis: Set the source name field in a sequence.
- * Incept: SRE, Wed May 7 16:17:56 2008 [Janelia]
- *
- * Purpose: Set the source of the sequence <sq> to <source>, reallocating
- * as needed. For example, <esl_sq_SetSource(sq, "X123456")>.
- *
- * A copy of <source> is made, so if caller had <source> allocated,
- * it is still responsible for freeing it.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEMEM> on allocation error.
- *
- * Xref: STL11/125
- */
-int
-esl_sq_SetSource(ESL_SQ *sq, const char *source)
-{
- int n;
- void *tmp;
- int status;
-
- if (source == NULL) { sq->source[0] = '\0'; return eslOK; }
-
- n = strlen(source);
- if (n >= sq->srcalloc)
- {
- ESL_RALLOC_WITH_TYPE(sq->source, char*, tmp, sizeof(char) * (n+1));
- sq->srcalloc = n+1;
- }
- strcpy(sq->source, source);
- return eslOK;
-
- ERROR:
- return status;
-}
-
-
-/* Function: esl_sq_FormatName()
- * Synopsis: Format a name of a sequence, printf()-style.
- * Incept: SRE, Fri Sep 11 10:59:01 2009 [Janelia]
- *
- * Purpose: Format the name of the sequence <sq> using
- * <printf()>-style format string <name> and corresponding
- * <printf()>-style arguments, reallocating as
- * needed.
- * For example, <esl_sq_FormatName(sq, "random%d", i)>.
- *
- * A copy of <name> is made, so if caller had <name> allocated,
- * it is still responsible for freeing it.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEMEM> on allocation error.
- */
-int
-esl_sq_FormatName(ESL_SQ *sq, const char *name, ...)
-{
- va_list argp;
- va_list argp2;
- int n;
- void *tmp;
- int status;
-
- if (name == NULL) { sq->name[0] = '\0'; return eslOK; }
-
- va_start(argp, name);
- va_copy(argp2, argp);
- if ((n = vsnprintf(sq->name, sq->nalloc, name, argp)) >= sq->nalloc)
- {
- ESL_RALLOC_WITH_TYPE(sq->name, char*, tmp, sizeof(char) * (n+1));
- sq->nalloc = n+1;
- vsnprintf(sq->name, sq->nalloc, name, argp2);
- }
- va_end(argp);
- va_end(argp2);
- return eslOK;
-
- ERROR:
- return status;
-}
-
-/* Function: esl_sq_FormatAccession()
- * Synopsis: Format the accession field in a sequence, printf()-style.
- * Incept: SRE, Fri Sep 11 11:00:37 2009 [Janelia]
-*
- * Purpose: Format the accession of the sequence <sq> using <printf()>-style
- * format string <acc> and corresponding <printf()>-style arguments,
- * reallocating as needed.
- * For example, <esl_sq_FormatAccession(sq, "ACC%06d", i)>.
-*
- * A copy of <acc> is made, so if caller had <acc> allocated,
- * it is still responsible for freeing it.
-*
-* Returns: <eslOK> on success.
-*
- * Throws: <eslEMEM> on allocation error.
-*/
-int
-esl_sq_FormatAccession(ESL_SQ *sq, const char *acc, ...)
-{
- va_list argp, argp2;
- int n;
- void *tmp;
- int status;
-
- if (acc == NULL) { sq->acc[0] = '\0'; return eslOK; }
-
- va_start(argp, acc);
- va_copy(argp2, argp);
- if ((n = vsnprintf(sq->acc, sq->aalloc, acc, argp)) >= sq->aalloc)
- {
- ESL_RALLOC_WITH_TYPE(sq->acc, char*, tmp, sizeof(char) * (n+1));
- sq->aalloc = n+1;
- vsnprintf(sq->acc, sq->aalloc, acc, argp2);
- }
- va_end(argp);
- va_end(argp2);
- return eslOK;
-
- ERROR:
- return status;
- }
-
-
-/* Function: esl_sq_FormatDesc()
- * Synopsis: Format the description field in a sequence, printf()-style.
- * Incept: SRE, Fri Sep 11 11:02:11 2009 [Janelia]
- *
- * Purpose: Format the description of the sequence <sq> using <printf()>-style
- * format string <desc> and corresponding <printf()>-style arguments,
- * reallocating as needed.
- * For example, <esl_sq_FormatDesc(sq, "random sequence %d", i)>.
- *
- * A copy of <desc> is made, so if caller had <desc> allocated,
- * it is still responsible for freeing it.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEMEM> on allocation error.
- */
-int
-esl_sq_FormatDesc(ESL_SQ *sq, const char *desc, ...)
-{
- va_list argp, argp2;
- int n;
- void *tmp;
- int status;
-
- if (desc == NULL) { sq->desc[0] = '\0'; return eslOK; }
-
- va_start(argp, desc);
- va_copy(argp2, argp);
- if ((n = vsnprintf(sq->desc, sq->dalloc, desc, argp)) >= sq->dalloc)
- {
- ESL_RALLOC_WITH_TYPE(sq->desc, char*, tmp, sizeof(char) * (n+1));
- sq->dalloc = n+1;
- vsnprintf(sq->desc, sq->dalloc, desc, argp2);
- }
- va_end(argp);
- va_end(argp2);
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-/* Function: esl_sq_FormatSource()
- * Synopsis: Format the source name field in a sequence, printf()-style.
- * Incept: SRE, Fri Sep 11 10:55:10 2009 [Janelia]
-*
- * Purpose: Format the source of the sequence <sq> using <printf()>-style
- * format string <source> and corresponding <printf()>-style arguments,
- * reallocating as needed.
- * For example, <esl_sq_FormatSource(sq, "source %d", i)>.
-*
-* A copy of <source> is made, so if caller had <source> allocated,
-* it is still responsible for freeing it.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation error.
-*/
-int
-esl_sq_FormatSource(ESL_SQ *sq, const char *source, ...)
-{
- va_list argp, argp2;
- int n;
- void *tmp;
- int status;
-
- if (source == NULL) { sq->source[0] = '\0'; return eslOK; }
-
- va_start(argp, source);
- va_copy(argp2, argp);
- if ((n = vsnprintf(sq->source, sq->srcalloc, source, argp)) >= sq->srcalloc)
- {
- ESL_RALLOC_WITH_TYPE(sq->source, char*, tmp, sizeof(char) * (n+1));
- sq->srcalloc = n+1;
- vsnprintf(sq->source, sq->srcalloc, source, argp2);
- }
- va_end(argp);
- va_end(argp2);
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-/* Function: esl_sq_AppendDesc()
- * Synopsis: Append a new line to a growing multiline description.
- * Incept: SRE, Thu May 22 15:33:43 2008 [Janelia]
- *
- * Purpose: Append line <desc> to the description annotation line
- * in <sq>.
- *
- * The annotation line <sq->desc> is a single line; it may
- * not contain \verb+\n+ newlines. Caller is responsible
- * for making sure <desc> does not terminate in \verb+\n+.
- * If <sq->desc> already contains a description
- * line (presumably because we're reading from a file format
- * that's split the description across multiple lines),
- * append a space before adding this next line <desc>.
- *
- * Returns: <eslOK> on success.
- *
- * Throws: <eslEMEM> on allocation failure.
- */
-int
-esl_sq_AppendDesc(ESL_SQ *sq, const char *desc)
-{
- void *tmp;
- int dlen = (sq->desc == NULL ? 0 : strlen(sq->desc));
- int newlen = (desc == NULL ? 0 : strlen(desc));
- int status;
-
- if (dlen + newlen + 1 >= sq->dalloc) { /* +1 for appended space */
- ESL_RALLOC_WITH_TYPE(sq->desc, char*, tmp, sizeof(char) * (newlen+dlen+eslSQ_DESCCHUNK));
- sq->dalloc = newlen+dlen+eslSQ_DESCCHUNK;
- }
-
- if (dlen > 0) { sq->desc[dlen] = ' '; dlen++; }
- strcpy(sq->desc + dlen, desc);
- return eslOK;
-
- ERROR:
- return status;
-}
-
-
-
-/* Function: esl_sq_SetCoordComplete()
-* Synopsis: Sets coords in a complete sequence.
-* Incept: SRE, Tue May 13 09:25:33 2008 [Janelia]
-*
-* Purpose: Declare that <sq> contains a complete sequence of length
-* <L>; set the coordinate and length information in <sq>
-* accordingly. This is used in building new sequence
-* objects.
-*
-* <sq->seq> or <sq->dsq> must contain a sequence of length
-* <L>.
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_sq_SetCoordComplete(ESL_SQ *sq, int64_t L)
-{
- sq->start = 1;
- sq->end = L;
- sq->C = 0;
- sq->W = L;
- sq->L = L;
- sq->n = L;
- return eslOK;
-}
-
-
-
-/* Function: esl_sq_CAddResidue()
-* Synopsis: Add one residue (or terminal NUL) to a text seq.
-* Incept: SRE, Wed Jan 10 07:58:20 2007 [Janelia]
-*
-* Purpose: Add one residue <c> onto a growing text mode sequence <sq>,
-* and deal with any necessary reallocation.
-*
-* The sequence in <sq> is not <NUL>-terminated. To
-* finish and NUL-terminate <sq>, call
-* <esl_sq_CAddResidue(sq, 0)>.
-*
-* Note: Not the most efficient routine, but convenient in some
-* routines. Parsers (where speed is at a premium) typically
-* use an addseq() kind of function instead.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on re-allocation failure.
-*
-* Xref: STL11/125.
-*/
-int
-esl_sq_CAddResidue(ESL_SQ *sq, char c)
-{
- if (esl_sq_Grow(sq, NULL) != eslOK) return eslEMEM;
- sq->seq[sq->n] = c;
- if (c != '\0') sq->n++;
- return eslOK;
-}
-
-#ifdef eslAUGMENT_ALPHABET
-/* Function: esl_sq_XAddResidue()
-* Synopsis: Add one residue (or terminal sentinel) to digital seq.
-* Incept: SRE, Wed Jan 10 08:23:23 2007 [Janelia]
-*
-* Purpose: Like <esl_sq_CAddResidue()>, but for digital mode
-* sequence: add a digital residue <x> onto a growing
-* digital sequence <sq>.
-*
-* The digital sequence in <sq> must be explicitly
-* terminated when you're done adding to it; call
-* <esl_sq_XAddResidue(sq, eslDSQ_SENTINEL)>.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on re-allocation failure.
-*
-* Xref: STL11/125.
-*/
-int
-esl_sq_XAddResidue(ESL_SQ *sq, ESL_DSQ x)
-{
- if (esl_sq_Grow(sq, NULL) != eslOK) return eslEMEM;
- sq->dsq[sq->n+1] = x;
- if (x != eslDSQ_SENTINEL) sq->n++;
- return eslOK;
-}
-#endif /* eslAUGMENT_ALPHABET */
-
-
-/* Function: esl_sq_ReverseComplement()
-* Synopsis: Reverse complement a sequence.
-* Incept: SRE, Thu May 15 20:52:13 2008 [Casa de Gatos]
-*
-* Purpose: Reverse complement the sequence <sq>, in place.
-*
-* If <sq> is in text mode, upper/lower case is preserved,
-* and the DNA alphabet is used (<Aa> is complemented to
-* <Tt>, not <Uu>). If a non-nucleic character is seen, it
-* is reverse complemented to an N, and the return status
-* is <eslEINVAL> (but the whole sequence is still reverse
-* complemented).
-*
-* If <sq> is in digital mode, the appropriate alphabet
-* (DNA vs. RNA) is used. If the alphabet has no defined
-* complement (such as amino acids), an <eslEINCOMPAT>
-* error is thrown, and the sequence isn't changed at all.
-*
-* Gap and missing data characters, if any, are preserved
-* (in text mode, <._-> are treated as gaps, and <~> as
-* missing data). Degenerate nucleic acid IUPAC characters
-* are complemented appropriately.
-*
-* The <start/end> coords in <sq> are swapped. (Note that
-* in the unusual case of sequences of length 1,
-* <start=end> and we can't unambiguously tell if a seq is
-* in the reverse complement direction or not; this is a
-* minor flaw in Easel's current coordinate handling.)
-*
-* Returns: <eslOK> on success.
-*
-* Returns <eslEINVAL> if the <sq> is in text mode, and we
-* see a character that doesn't belong to the IUPAC DNA/RNA
-* alphabet; in this case, the <sq> is still reverse
-* complemented using the DNA alphabet, with <N> for any
-* non-nucleic residues.
-*
-* Throws: <eslEINCOMPAT> if the <sq> is in digital mode, but the
-* digital alphabet has no defined complement.
-*/
-int
-esl_sq_ReverseComplement(ESL_SQ *sq)
-{
- int64_t i;
- int status = eslOK;
-
- if (sq->seq != NULL)
- {
- /* first, complement the sequence */
- for (i = 0; i < sq->n; i++)
- switch (sq->seq[i]) {
- case 'A': sq->seq[i] = 'T'; break;
- case 'C': sq->seq[i] = 'G'; break;
- case 'G': sq->seq[i] = 'C'; break;
- case 'T': sq->seq[i] = 'A'; break;
- case 'U': sq->seq[i] = 'A'; break;
- case 'R': sq->seq[i] = 'Y'; break;
- case 'Y': sq->seq[i] = 'R'; break;
- case 'M': sq->seq[i] = 'K'; break;
- case 'K': sq->seq[i] = 'M'; break;
- case 'S': sq->seq[i] = 'S'; break;
- case 'W': sq->seq[i] = 'W'; break;
- case 'H': sq->seq[i] = 'D'; break;
- case 'B': sq->seq[i] = 'V'; break;
- case 'V': sq->seq[i] = 'B'; break;
- case 'D': sq->seq[i] = 'H'; break;
- case 'N': sq->seq[i] = 'N'; break;
- case 'X': sq->seq[i] = 'X'; break;
- case 'a': sq->seq[i] = 't'; break;
- case 'c': sq->seq[i] = 'g'; break;
- case 'g': sq->seq[i] = 'c'; break;
- case 't': sq->seq[i] = 'a'; break;
- case 'u': sq->seq[i] = 'a'; break;
- case 'r': sq->seq[i] = 'y'; break;
- case 'y': sq->seq[i] = 'r'; break;
- case 'm': sq->seq[i] = 'k'; break;
- case 'k': sq->seq[i] = 'm'; break;
- case 's': sq->seq[i] = 's'; break;
- case 'w': sq->seq[i] = 'w'; break;
- case 'h': sq->seq[i] = 'd'; break;
- case 'b': sq->seq[i] = 'v'; break;
- case 'v': sq->seq[i] = 'b'; break;
- case 'd': sq->seq[i] = 'h'; break;
- case 'n': sq->seq[i] = 'n'; break;
- case 'x': sq->seq[i] = 'x'; break;
- case '.': sq->seq[i] = '.'; break;
- case '_': sq->seq[i] = '_'; break;
- case '-': sq->seq[i] = '-'; break;
- case '~': sq->seq[i] = '~'; break;
- default: sq->seq[i] = 'N'; status = eslEINVAL; break;
- }
-
- /* then, reverse it in place */
- for (i = 0; i < sq->n / 2; i++)
- ESL_SWAP(sq->seq[i], sq->seq[sq->n-i-1], char);
- }
-#ifdef eslAUGMENT_ALPHABET
- else
- {
- if (sq->abc->complement == NULL)
- ESL_EXCEPTION(eslEINCOMPAT, "tried to take reverse complement of a non-nucleic sequence");
-
- ESL_DSQ x;
- for (i = 1; i <= sq->n/2; i++)
- {
- x = sq->abc->complement[sq->dsq[sq->n-i+1]];
- sq->dsq[sq->n-i+1] = sq->abc->complement[sq->dsq[i]];
- sq->dsq[i] = x;
- }
- if (sq->n%2) sq->dsq[i] = sq->abc->complement[sq->dsq[i]];
- }
-#endif /*eslAUGMENT_ALPHABET*/
-
- ESL_SWAP(sq->start, sq->end, int);
- /* revcomp invalidates any secondary structure annotation */
- if (sq->ss != NULL) { free(sq->ss); sq->ss = NULL; }
- return status;
-}
-
-/* Function: esl_sq_Checksum()
- * Synopsis: Calculate a 32-bit checksum for a sequence.
- * Incept: SRE, Tue Aug 25 14:32:17 2009 [Janelia]
- *
- * Purpose: Calculate a 32-bit checksum for <sq>.
- *
- * Only the sequence data are considered, not name or other
- * annotation. For text mode sequences, the checksum is
- * case sensitive. The checksum is also sensitive to
- * whether the sequence is text or digital mode; the same
- * sequence in will yield different checksums in digital
- * vs. text mode.
- *
- * Returns: <eslOK> on success; the checksum is in <*ret_checksum>.
- */
-int
-esl_sq_Checksum(const ESL_SQ *sq, uint32_t *ret_checksum)
-{
- uint32_t val = 0;
- uint64_t pos;
-
- if (sq->seq != NULL)
- {
- for (pos = 0; pos < (uint64_t)sq->n; pos++)
- {
- val += sq->seq[pos];
- val += (val << 10);
- val ^= (val >> 6);
- }
- }
-#ifdef eslAUGMENT_ALPHABET
- else
- {
- for (pos = 1; pos <= (uint64_t)sq->n; pos++)
- {
- val += sq->dsq[pos];
- val += (val << 10);
- val ^= (val >> 6);
- }
- }
-#endif
-
- val += (val << 3);
- val ^= (val >> 11);
- val += (val << 15);
-
- *ret_checksum = val;
- return eslOK;
-}
-
-/*---------------------- end, other functions -------------------*/
-
-
-
-/*****************************************************************
-*# 4. Getting single sequences from MSAs (requires <msa>)
-*****************************************************************/
-#ifdef eslAUGMENT_MSA
-
-/* Function: esl_sq_GetFromMSA()
-* Synopsis: Get a single sequence from an MSA.
-* Incept: SRE, Tue Apr 1 11:13:28 2008 [Janelia]
-*
-* Purpose: Retrieve sequence number <which> (<0..msa->nseq-1>) from
-* <msa> and store it in the <sq> that the caller allocated
-* and provided. This version (as opposed to
-* <esl_sq_FetchFromMSA()>, below) allows caller to reuse
-* the same <sq> container for retrieving sequences one at
-* a time from an MSA.
-*
-* The retrieved sequence <sq> must be in the same mode as
-* the source <msa>, text versus digital.
-*
-* The retrieved sequence is dealigned. For a text mode
-* sequence, gap characters to be removed are assumed to be
-* <-_.>. For a digital mode sequence, gap characters are
-* defined by the digital alphabet.
-*
-* The <sq->source> field is set to the name of the MSA, if
-* an MSA name is present.
-*
-* Returns: <eslOK> on success, and the retrieved sequence is in <sq>.
-* Some of the internals of <sq> may have been reallocated if
-* necessary.
-*
-* Returns <eslEOD> if there is no sequence number <which>.
-*
-* Throws: <eslEMEM> on allocation error;
-* <eslEINVAL> if <sq> is in a different text/digital mode than
-* <msa>.
-*/
-int
-esl_sq_GetFromMSA(const ESL_MSA *msa, int which, ESL_SQ *sq)
-{
- char *gapchars = "-_.~"; /* hardcoded for now */
- char *acc = NULL;
- char *desc = NULL;
- char *ss = NULL;
- int status;
-
- if (which >= msa->nseq || which < 0) return eslEOD;
- if ( (msa->flags & eslMSA_DIGITAL) && sq->dsq == NULL) ESL_XEXCEPTION(eslEINVAL, "msa is digital, sq is not");
- if (!(msa->flags & eslMSA_DIGITAL) && sq->seq == NULL) ESL_XEXCEPTION(eslEINVAL, "msa is text, sq is not");
-
- /* watch out for completely missing optional msa annotations;
- * msa->sqacc[which] could segfault if msa->sqacc itself is NULL
- */
- if (msa->sqacc != NULL) acc = msa->sqacc[which];
- if (msa->sqdesc != NULL) desc = msa->sqdesc[which];
- if (msa->ss != NULL) ss = msa->ss[which];
-
- if ((status = esl_sq_SetName (sq, msa->sqname[which])) != eslOK) goto ERROR;
- if ((status = esl_sq_SetAccession(sq, acc)) != eslOK) goto ERROR;
- if ((status = esl_sq_SetDesc (sq, desc)) != eslOK) goto ERROR;
- if ((status = esl_sq_SetSource (sq, msa->name)) != eslOK) goto ERROR;
- if ((status = esl_sq_GrowTo (sq, msa->alen)) != eslOK) goto ERROR; /* can't be more than alen residues */
-
- if (! msa->flags & eslMSA_DIGITAL) /* text mode to text mode */
- {
- strcpy(sq->seq, msa->aseq[which]);
- if (ss != NULL) {
- strcpy(sq->ss, msa->ss[which]);
- esl_strdealign(sq->ss, sq->seq, gapchars, NULL);
- }
- esl_strdealign(sq->seq, sq->seq, gapchars, &(sq->n)); /* sq->n gets set as side effect */
- }
-#ifdef eslAUGMENT_ALPHABET
- else
- {
- esl_abc_dsqcpy(msa->ax[which], msa->alen, sq->dsq);
- if (ss != NULL) {
- strcpy(sq->ss+1, ss); sq->ss[0] = '\0';
- esl_abc_CDealign(sq->abc, sq->ss+1, sq->dsq, NULL);
- }
- esl_abc_XDealign(sq->abc, sq->dsq, sq->dsq, &(sq->n)); /* sq->n gets set as side effect */
- }
-#endif /*eslAUGMENT_ALPHABET*/
-
- /* This is a complete sequence; set bookkeeping accordingly */
- sq->start = 1;
- sq->end = sq->n;
- sq->C = 0;
- sq->W = sq->n;
- sq->L = sq->n;
-
- sq->roff = -1;
- sq->doff = -1;
- sq->hoff = -1;
- sq->eoff = -1;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-/* Function: esl_sq_FetchFromMSA()
-* Synopsis: Fetch a single sequence from an MSA.
-* Incept: SRE, Sun Mar 30 13:39:06 2008 [Janelia]
-*
-* Purpose: Retrieve sequence number <which> (<0..msa->nseq-1>) from <msa>, in a newly
-* allocated sequence object; return a pointer to this object
-* in <ret_sq>.
-*
-* The retrieved sequence is in the same mode as the source
-* <msa>, text versus digital.
-*
-* The retrieved sequence is dealigned. For a text mode
-* sequence, gap characters to be removed are assumed to be
-* <-_.>. For a digital mode sequence, gap characters are
-* defined by the digital alphabet.
-*
-* Returns: <eslOK> on success, and a pointer to the newly fetched
-* sequence is in <*ret_sq>, which caller is responsible for freeing.
-*
-* Returns <eslEOD> if there is no sequence number <which>.
-*
-* Throws: <eslEMEM> on allocation error.
-*/
-int
-esl_sq_FetchFromMSA(const ESL_MSA *msa, int which, ESL_SQ **ret_sq)
-{
- ESL_SQ *sq = NULL;
- char *acc = NULL;
- char *desc = NULL;
- char *ss = NULL;
- char *gapchars = "-_.~"; /* hardcoded for now; only affects text mode, not digital */
- int status;
-
- if (which >= msa->nseq || which < 0) return eslEOD;
-
- /* watch out for optional msa annotations being totally NULL */
- if (msa->sqacc != NULL) acc = msa->sqacc[which];
- if (msa->sqdesc != NULL) desc = msa->sqdesc[which];
- if (msa->ss != NULL) ss = msa->ss[which];
-
- if (! (msa->flags & eslMSA_DIGITAL)) /* text mode MSA to text mode sequence */
- {
- // ! Changed: esl_sq_CreateFrom now taske length of the sequence
- if ((sq = esl_sq_CreateFrom(msa->sqname[which], msa->aseq[which], strlen( msa->aseq[which] ), desc, acc, ss)) == NULL) goto ERROR;
- if (sq->ss != NULL) esl_strdealign(sq->ss, sq->seq, gapchars, NULL);
- esl_strdealign(sq->seq, sq->seq, gapchars, &(sq->n));
- }
-#ifdef eslAUGMENT_ALPHABET
- else /* digital mode MSA to digital mode sequence */
- {
- if ((sq = esl_sq_CreateDigitalFrom(msa->abc, msa->sqname[which], msa->ax[which], msa->alen, desc, acc, ss)) == NULL) goto ERROR;
- if (sq->ss != NULL) esl_abc_CDealign(sq->abc, sq->ss+1, sq->dsq, NULL);
- esl_abc_XDealign(sq->abc, sq->dsq, sq->dsq, &(sq->n));
- }
-#endif
-
- if ((status = esl_sq_SetSource(sq, msa->name)) != eslOK) goto ERROR;
-
- sq->start = 1;
- sq->end = sq->n;
- sq->L = sq->n;
- sq->C = 0;
- sq->W = sq->n;
- *ret_sq = sq;
- return eslOK;
-
-ERROR:
- esl_sq_Destroy(sq);
- *ret_sq = NULL;
- return eslEMEM;
-}
-#endif /*eslAUGMENT_MSA*/
-/*---------------- end, sequences from MSAs --------------------*/
-
-
-
-
-
-/*****************************************************************
-* 5. Internal functions
-*****************************************************************/
-
-/* Create and CreateDigital() are almost identical, so
-* their shared guts are here:
-*/
-static ESL_SQ *
-sq_create(int do_digital)
-{
- ESL_SQ *sq = NULL;
- int status;
-
- ESL_ALLOC_WITH_TYPE(sq, ESL_SQ*, sizeof(ESL_SQ));
-
- if (sq_init(sq, do_digital) != eslOK) goto ERROR;
-
- return sq;
-
- ERROR:
- esl_sq_Destroy(sq);
- return NULL;
-}
-
-/* Create an <ESL_SQ_BLOCK> object and its list of <ESL_SQ> objects */
-static ESL_SQ_BLOCK *
-sq_createblock(int count, int do_digital)
-{
- int i = 0;
-
- ESL_SQ_BLOCK *block = NULL;
- int status = eslOK;
-
- ESL_ALLOC_WITH_TYPE(block, ESL_SQ_BLOCK*, sizeof(ESL_SQ_BLOCK));
-
- block->count = 0;
- block->listSize = 0;
- block->list = NULL;
-
- ESL_ALLOC_WITH_TYPE(block->list, ESL_SQ*, sizeof(ESL_SQ) * count);
- block->listSize = count;
-
- for (i = 0; i < count; ++i)
- {
- if (sq_init(block->list + i, do_digital) != eslOK) goto ERROR;
- }
-
- return block;
-
- ERROR:
- esl_sq_DestroyBlock(block);
- return NULL;
-}
-
-/* Initialize <ESL_SQ> object */
-static int
-sq_init(ESL_SQ *sq, int do_digital)
-{
- int status;
-
- sq->name = NULL;
- sq->acc = NULL;
- sq->desc = NULL;
- sq->tax_id = -1;
- sq->seq = NULL;
- sq->dsq = NULL;
- sq->ss = NULL; /* Note that ss is optional - it will only be allocated if needed */
- /* n, coord bookkeeping, and strings are all set below by a call to Reuse() */
-
- sq->nalloc = eslSQ_NAMECHUNK;
- sq->aalloc = eslSQ_ACCCHUNK;
- sq->dalloc = eslSQ_DESCCHUNK;
- sq->salloc = eslSQ_SEQCHUNK;
- sq->srcalloc = eslSQ_NAMECHUNK;
-
- ESL_ALLOC_WITH_TYPE(sq->name, char*, sizeof(char) * sq->nalloc);
- ESL_ALLOC_WITH_TYPE(sq->acc, char*, sizeof(char) * sq->aalloc);
- ESL_ALLOC_WITH_TYPE(sq->desc, char*, sizeof(char) * sq->dalloc);
- ESL_ALLOC_WITH_TYPE(sq->source, char*, sizeof(char) * sq->srcalloc);
- if (do_digital) ESL_ALLOC_WITH_TYPE(sq->dsq, ESL_DSQ*, sizeof(ESL_DSQ) * sq->salloc);
- else ESL_ALLOC_WITH_TYPE(sq->seq, char*, sizeof(char) * sq->salloc);
-
- esl_sq_Reuse(sq); /* initialization of sq->n, offsets, and strings */
- return eslOK;
-
-ERROR:
- return eslEMEM;
-}
-
-/* CreateFrom and CreateDigitalFrom() are almost identical, so
-* their shared guts are here:
-*/
-static ESL_SQ *
-sq_create_from(const char *name, const char *desc, const char *acc)
-{
- ESL_SQ *sq = NULL;
- int64_t n;
- int status;
-
- ESL_ALLOC_WITH_TYPE(sq, ESL_SQ*, sizeof(ESL_SQ));
- sq->name = NULL;
- sq->acc = NULL;
- sq->desc = NULL;
- sq->seq = NULL;
- sq->dsq = NULL;
- sq->ss = NULL;
-
- if (name != NULL)
- {
- n = strlen(name)+1;
- ESL_ALLOC_WITH_TYPE(sq->name, char*, sizeof(char) * n);
- strcpy(sq->name, name);
- sq->nalloc = n;
- }
- else
- {
- sq->nalloc = eslSQ_NAMECHUNK;
- ESL_ALLOC_WITH_TYPE(sq->name, char*, sizeof(char) * sq->nalloc);
- sq->name[0] = '\0';
- }
-
- if (desc != NULL)
- {
- n = strlen(desc)+1;
- ESL_ALLOC_WITH_TYPE(sq->desc, char*, sizeof(char) * n);
- strcpy(sq->desc, desc);
- sq->dalloc = n;
- }
- else
- {
- sq->dalloc = eslSQ_DESCCHUNK;
- ESL_ALLOC_WITH_TYPE(sq->desc, char*, sizeof(char) * sq->dalloc);
- sq->desc[0] = '\0';
- }
-
- if (acc != NULL)
- {
- n = strlen(acc)+1;
- ESL_ALLOC_WITH_TYPE(sq->acc, char*, sizeof(char) * n);
- strcpy(sq->acc, acc);
- sq->aalloc = n;
- }
- else
- {
- sq->aalloc = eslSQ_ACCCHUNK;
- ESL_ALLOC_WITH_TYPE(sq->acc, char*, sizeof(char) * sq->aalloc);
- sq->acc[0] = '\0';
- }
-
- /* no source name */
- sq->srcalloc = eslSQ_NAMECHUNK;
- ESL_ALLOC_WITH_TYPE(sq->source, char*, sizeof(char) * sq->srcalloc);
- sq->source[0] = '\0';
-
- /* coord bookkeeping has to be set by the parent caller,
- * because that's where we know the seq length <n>. We don't
- * know it here.
- */
- sq->doff = -1;
- sq->hoff = -1;
- sq->roff = -1;
- sq->eoff = -1;
- return sq;
-
-ERROR:
- esl_sq_Destroy(sq);
- return NULL;
-}
-
-/* Free <ESL_SQ> object */
-static void
-sq_free(ESL_SQ *sq)
-{
- if (sq->name != NULL) free(sq->name);
- if (sq->acc != NULL) free(sq->acc);
- if (sq->desc != NULL) free(sq->desc);
- if (sq->source != NULL) free(sq->source);
- if (sq->seq != NULL) free(sq->seq);
- if (sq->dsq != NULL) free(sq->dsq);
- if (sq->ss != NULL) free(sq->ss);
-}
-
-/*----------------- end, internal functions ---------------------*/
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_sq.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_sq.h
deleted file mode 100644
index eabd056..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_sq.h
+++ /dev/null
@@ -1,198 +0,0 @@
-/* A sequence.
-*
-* SRE, Mon Mar 31 17:03:51 2008 [Janelia]
- * SVN $Id: esl_sq.h 409 2009-10-19 23:47:18Z eddys $
-*/
-#ifndef ESL_SQ_INCLUDED
-#define ESL_SQ_INCLUDED
-
-#include <sys/types.h>
-
-#ifdef eslAUGMENT_ALPHABET
-#include <hmmer3/easel/esl_alphabet.h>
-#endif
-#ifdef eslAUGMENT_MSA
-#include <hmmer3/easel/esl_msa.h>
-#endif
-
-/* ESL_SQ - a biosequence
-*
-* Can be either in text mode <seq>, or digital mode <dsq>.
-* One of them has to be NULL, and the other contains the data.
-*
-* When in text mode, <ss> and <seq> can hold up to <n=salloc-1>
-* residues and a terminal '\0', and both are indexed <0..n-1>.
-*
-* When in digital mode, <ss> and <dsq> can hold up to <n=salloc-2>
-* residues; both are indexed <1..n>, and positions 0 and n+1 are
-* sentinel bytes. The digital sequence <dsq> uses <eslDSQ_SENTINEL>
-* as its sentinels; as a hack, <ss> uses '\0' as sentinels. This
-* means that <sq->ss+1> is a valid NUL-terminated C string, but
-* <sq->ss> itself would be a string of length 0 because of the
-* leading NUL sentinel. Watch out for this.
-*
-* To save on allocation calls, the structure is designed to be reused
-* for subsequent sequences, rather than free'd and reallocated -
-* thus, we keep track of the allocated sizes of all the strings.
-*
-* Notes on when we need to reallocate:
-* - In a text mode sequence (seq 0..n-1), byte salloc-1 is
-* reserved for the NUL, so the sequence is full when
-* n == salloc-1.
-*
-* - In a digital mode sequence (dsq 1..n), bytes 0 and salloc-1
-* are reserved for sentinel bytes, so the reallocation condition
-* is when n == salloc-2.
-*
-* At least for now, the only way to set the <ss> structure annotation
-* field is by a CreateFrom(), by extraction from an MSA, or manually
-* (by directly allocating a sequence's <ss> field).
-*
-* A sequence object will usually be holding a complete (full length)
-* sequence. Three other cases arise less frequently:
-*
-* 1. We're a subsequence extracted from a source sequence.
-* <sourcename> is the name of the source.
-* <L> is the length of the source (and coords are 1..L).
-* The subsequence is from <start>..<end> on the source.
-* The length of the subsequence <n> is abs(<end>-<start>)+1.
-* <start> can be greater than <end> for a nucleic acid sequence;
-* in this case, the subsequence is reverse complemented.
-*
-* 2. We're a window on a source sequence.
-* This is similar to being a subsequence, with the added
-* wrinkle that we're scanning over a long source sequence
-* in overlapping segments, defined by a "previous context"
-* <C> and a "new window" <W> (the whole sequence is n=C+W
-* residues long):
-* s C W e
-* current window: |------||------------|
-* next window read: |------||------------|
-* s C W e
-* Here, dsq[1..n] is source[s..e]; each newly read
-* window starts at dsq[C+1], and is preceded by C
-* residues of context.
-*
-* 3. We're just after information about the sequence, not the
-* sequence itself; everything except the per-residue information
-* (such as <dsq/seq> and <ss>). We do this when SSI indexing,
-* for example, so we don't have to read entire huge seqs into
-* memory just to calculate their lengths for the index.
-*
-*/
-typedef struct {
- /*::cexcerpt::sq_sq::begin::*/
- char *name; /* name; one word, no whitespace ("\0" if no name) */
- char *acc; /* optional accession (1 word) ("\0" if none) */
- char *desc; /* description line ("\0" if no description) */
- int32_t tax_id; /* NCBI taxonomy id (-1 if none) */
- char *seq; /* sequence [0..n-1], or NULL if digital */
- ESL_DSQ *dsq; /* digitized sequence [1..n], or NULL if text */
- char *ss; /* optional sec structure [0..n-1], [1..n], or NULL */
- int64_t n; /* length of seq (or dsq) and ss */
- /*::cexcerpt::sq_sq::end::*/
-
- /* Coordinate info for: seq subseq window info */
- /* ---- ------ ------ ----- */
- int64_t start; /* coord of seq[0],dsq[1] on source [1..L] 1 1<=i<=L 1<=i<=L 0 */
- int64_t end; /* coord of seq[n-1],dsq[n] on source[1..L] L 1<=j<=L 1<=j<=L 0 */
- int64_t C; /* # of context residues for a window 0 0 n-W 0 */
- int64_t W; /* window width L n n-C 0 */
- int64_t L; /* source sequence length in residues L L (or -1) L (or -1) L */
- /* and n: length of seq (or dsq) and ss actually stored: L abs(j-i)+1 C+W 0 */
- /* In all the above bookkeeping, a -1 means "unknown" */
- char *source; /* name of the source of a subseq/window; or MSA name; or ""*/
-
- /* Memory allocation bookkeeping: (all inclusive of \0; >= strlen()+1) */
- int nalloc; /* allocated length of name */
- int aalloc; /* allocated length of accession */
- int dalloc; /* allocated length of description */
- int64_t salloc; /* alloc for seq or dsq, and ss if present */
- int srcalloc; /* allocated length for source name */
-
- /* Disk offset bookkeeping: */
- int64_t idx; /* ctr for which # seq this is; -1 if not counting */
- off_t roff; /* record offset (start of record); -1 if none */
- off_t hoff; /* offset to last byte of header; -1 if unknown */
- off_t doff; /* data offset (start of sequence data); -1 if none */
- off_t eoff; /* offset to last byte of record; -1 if unknown */
-
- /* Copy of a pointer to the alphabet, if digital mode */
-#if defined(eslAUGMENT_ALPHABET)
- const ESL_ALPHABET *abc; /* reference to the alphabet for <dsq> */
-#else
- const void *abc; /* void reference, if we're not even augmented */
-#endif
-} ESL_SQ;
-
-typedef struct {
- int count; /* number of <ESL_SQ> objects in the block */
- int listSize; /* maximum number elements in the list */
- ESL_SQ *list; /* array of <ESL_SQ> objects */
-} ESL_SQ_BLOCK;
-
-/* These control default initial allocation sizes in an ESL_SQ. */
-#define eslSQ_NAMECHUNK 32 /* allocation unit for name, source */
-#define eslSQ_ACCCHUNK 32 /* allocation unit for accession */
-#define eslSQ_DESCCHUNK 128 /* allocation unit for description */
-#define eslSQ_SEQCHUNK 256 /* allocation unit for seqs */
-
-
-extern ESL_SQ *esl_sq_Create(void);
-// ! Changed: seqLen added to parameters
-extern ESL_SQ *esl_sq_CreateFrom(const char *name, const char *seq, int seqLen,
- const char *desc, const char *acc, const char *ss);
-extern int esl_sq_Grow (ESL_SQ *sq, int64_t *ret_nsafe);
-extern int esl_sq_GrowTo(ESL_SQ *sq, int64_t n);
-extern int esl_sq_Copy(const ESL_SQ *src, ESL_SQ *dst);
-extern int esl_sq_Compare (ESL_SQ *sq1, ESL_SQ *sq2);
-extern int esl_sq_Reuse (ESL_SQ *sq);
-extern int esl_sq_IsDigital(const ESL_SQ *sq);
-extern int esl_sq_IsText (const ESL_SQ *sq);
-extern void esl_sq_Destroy (ESL_SQ *sq);
-extern int esl_sq_SetName (ESL_SQ *sq, const char *name);
-extern int esl_sq_SetAccession (ESL_SQ *sq, const char *acc);
-extern int esl_sq_SetDesc (ESL_SQ *sq, const char *desc);
-extern int esl_sq_SetSource (ESL_SQ *sq, const char *source);
-extern int esl_sq_FormatName (ESL_SQ *sq, const char *name, ...);
-extern int esl_sq_FormatAccession(ESL_SQ *sq, const char *acc, ...);
-extern int esl_sq_FormatDesc (ESL_SQ *sq, const char *desc, ...);
-extern int esl_sq_FormatSource (ESL_SQ *sq, const char *source, ...);
-
-extern int esl_sq_AppendDesc (ESL_SQ *sq, const char *desc);
-extern int esl_sq_SetCoordComplete(ESL_SQ *sq, int64_t L);
-extern int esl_sq_CAddResidue (ESL_SQ *sq, char c);
-extern int esl_sq_ReverseComplement(ESL_SQ *sq);
-extern int esl_sq_Checksum(const ESL_SQ *sq, uint32_t *ret_checksum);
-
-#ifdef eslAUGMENT_ALPHABET
-extern ESL_SQ *esl_sq_CreateDigital(const ESL_ALPHABET *abc);
-extern ESL_SQ *esl_sq_CreateDigitalFrom(const ESL_ALPHABET *abc, const char *name, const ESL_DSQ *dsq,
- int64_t L, const char *desc, const char *acc, const char *ss);
-extern int esl_sq_Digitize(const ESL_ALPHABET *abc, ESL_SQ *sq);
-extern int esl_sq_Textize(ESL_SQ *sq);
-extern int esl_sq_GuessAlphabet(ESL_SQ *sq, int *ret_type);
-extern int esl_sq_XAddResidue(ESL_SQ *sq, ESL_DSQ x);
-#endif
-
-#ifdef eslAUGMENT_MSA
-extern int esl_sq_GetFromMSA (const ESL_MSA *msa, int which, ESL_SQ *sq);
-extern int esl_sq_FetchFromMSA(const ESL_MSA *msa, int which, ESL_SQ **ret_sq);
-#endif
-
-extern ESL_SQ_BLOCK *esl_sq_CreateBlock(int count);
-#ifdef eslAUGMENT_ALPHABET
-extern ESL_SQ_BLOCK *esl_sq_CreateDigitalBlock(int count, const ESL_ALPHABET *abc);
-#endif
-extern void esl_sq_DestroyBlock(ESL_SQ_BLOCK *sqBlock);
-
-#endif /*!ESL_SQ_INCLUDED*/
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_sse.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_sse.cpp
deleted file mode 100644
index a62fe01..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_sse.cpp
+++ /dev/null
@@ -1,276 +0,0 @@
-/* Vectorized routines for Intel/AMD, using Streaming SIMD Extensions (SSE).
-*
-* Table of contents
-* 1. SIMD logf(), expf()
-* 2. Utilities for ps vectors (4 floats in a __m128)
-* 3. Utilities for epu8 vectors (16 uchars in a __m128i)
-* 3. Benchmark
-* 4. Unit tests
-* 5. Test driver
-* 6. Example
-* 7. Copyright and license
-*
-* SRE, Sun Dec 16 09:14:51 2007 [Janelia]
-* SVN $Id: esl_sse.c 341 2009-06-01 12:21:15Z eddys $
-*****************************************************************
-* Credits:
-*
-* The logf() and expf() routines are derivatives of routines by
-* Julien Pommier [http://gruntthepeon.free.fr/ssemath/]. Those
-* routines were in turn based on serial implementations in the Cephes
-* math library by Stephen Moshier [Moshier89;
-* http://www.moshier.net/#Cephes]. Thanks and credit to both Moshier
-* and Pommier for their clear code. Additional copyright and license
-* information is appended at the end of the file.
-*/
-
-#include <hmmer3/easel/esl_config.h>
-#ifdef HAVE_SSE2
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#include <float.h>
-
-#include <xmmintrin.h> /* SSE */
-#include <emmintrin.h> /* SSE2 */
-
-#include <hmmer3/easel/easel.h>
-#include "esl_sse.h"
-
-
-/*****************************************************************
-* 1. SSE SIMD logf(), expf()
-*****************************************************************/
-
-/* As of Dec 2007, I am unaware of any plans for Intel/AMD to release
-* SSE intrinsics for logf(), expf(), or other special functions.
-*
-* I need them, and the code below should suffice. If you know of
-* better ways to compute these functions, please let me know.
-*/
-
-/* Function: esl_sse_logf()
-* Synopsis: <r[z] = log x[z]>
-* Incept: SRE, Fri Dec 14 11:32:54 2007 [Janelia]
-*
-* Purpose: Given a vector <x> containing four floats, returns a
-* vector <r> in which each element <r[z] = logf(x[z])>.
-*
-* Valid in the domain $x_z > 0$ for normalized IEEE754
-* $x_z$.
-*
-* For <x> $< 0$, including -0, returns <NaN>. For <x> $==
-* 0$ or subnormal <x>, returns <-inf>. For <x = inf>,
-* returns <inf>. For <x = NaN>, returns <NaN>. For
-* subnormal <x>, returns <-inf>.
-*
-* Xref: J2/71.
-*
-* Note: Derived from an SSE1 implementation by Julian
-* Pommier. Converted to SSE2 and added handling
-* of IEEE754 specials.
-*/
-__m128
-esl_sse_logf(__m128 x)
-{
- static const float cephes_p[9] = { 7.0376836292E-2f, -1.1514610310E-1f, 1.1676998740E-1f,
- -1.2420140846E-1f, 1.4249322787E-1f, -1.6668057665E-1f,
- 2.0000714765E-1f, -2.4999993993E-1f, 3.3333331174E-1f };
- __m128 onev = _mm_set1_ps(1.0f); /* all elem = 1.0 */
- __m128 v0p5 = _mm_set1_ps(0.5f); /* all elem = 0.5 */
- __m128i vneg = _mm_set1_epi32(0x80000000); /* all elem have IEEE sign bit up */
- __m128i vexp = _mm_set1_epi32(0x7f800000); /* all elem have IEEE exponent bits up */
- __m128i ei;
- __m128 e;
- __m128 invalid_mask, zero_mask, inf_mask; /* masks used to handle special IEEE754 inputs */
- __m128 mask;
- __m128 origx;
- __m128 tmp;
- __m128 y;
- __m128 z;
-
- /* first, split x apart: x = frexpf(x, &e); */
- ei = _mm_srli_epi32(_mm_castps_si128( x ), 23); /* shift right 23: IEEE754 floats: ei = biased exponents */
- invalid_mask = _mm_castsi128_ps( _mm_cmpeq_epi32( _mm_and_si128(_mm_castps_si128( x ), vneg), vneg) ); /* mask any elem that's negative; these become NaN */
- zero_mask = _mm_castsi128_ps( _mm_cmpeq_epi32(ei, _mm_setzero_si128()) ); /* mask any elem zero or subnormal; these become -inf */
- inf_mask = _mm_castsi128_ps( _mm_cmpeq_epi32( _mm_and_si128(_mm_castps_si128( x ), vexp), vexp) ); /* mask any elem inf or NaN; log(inf)=inf, log(NaN)=NaN */
- origx = x; /* store original x, used for log(inf) = inf, log(NaN) = NaN */
-
- x = _mm_and_ps(x, _mm_castsi128_ps( _mm_set1_epi32(~0x7f800000)) ); /* x now the stored 23 bits of the 24-bit significand */
- x = _mm_or_ps (x, v0p5); /* sets hidden bit b[0] */
-
- ei = _mm_sub_epi32(ei, _mm_set1_epi32(126)); /* -127 (ei now signed base-2 exponent); then +1 */
- e = _mm_cvtepi32_ps(ei);
-
- /* now, calculate the log */
- mask = _mm_cmplt_ps(x, _mm_set1_ps(0.707106781186547524f)); /* avoid conditional branches. */
- tmp = _mm_and_ps(x, mask); /* tmp contains x values < 0.707, else 0 */
- x = _mm_sub_ps(x, onev);
- e = _mm_sub_ps(e, _mm_and_ps(onev, mask));
- x = _mm_add_ps(x, tmp);
- z = _mm_mul_ps(x,x);
-
- y = _mm_set1_ps(cephes_p[0]); y = _mm_mul_ps(y, x);
- y = _mm_add_ps(y, _mm_set1_ps(cephes_p[1])); y = _mm_mul_ps(y, x);
- y = _mm_add_ps(y, _mm_set1_ps(cephes_p[2])); y = _mm_mul_ps(y, x);
- y = _mm_add_ps(y, _mm_set1_ps(cephes_p[3])); y = _mm_mul_ps(y, x);
- y = _mm_add_ps(y, _mm_set1_ps(cephes_p[4])); y = _mm_mul_ps(y, x);
- y = _mm_add_ps(y, _mm_set1_ps(cephes_p[5])); y = _mm_mul_ps(y, x);
- y = _mm_add_ps(y, _mm_set1_ps(cephes_p[6])); y = _mm_mul_ps(y, x);
- y = _mm_add_ps(y, _mm_set1_ps(cephes_p[7])); y = _mm_mul_ps(y, x);
- y = _mm_add_ps(y, _mm_set1_ps(cephes_p[8])); y = _mm_mul_ps(y, x);
- y = _mm_mul_ps(y, z);
-
- tmp = _mm_mul_ps(e, _mm_set1_ps(-2.12194440e-4f));
- y = _mm_add_ps(y, tmp);
-
- tmp = _mm_mul_ps(z, v0p5);
- y = _mm_sub_ps(y, tmp);
-
- tmp = _mm_mul_ps(e, _mm_set1_ps(0.693359375f));
- x = _mm_add_ps(x, y);
- x = _mm_add_ps(x, tmp);
-
- /* IEEE754 cleanup: */
- x = esl_sse_select_ps(x, origx, inf_mask); /* log(inf)=inf; log(NaN) = NaN */
- x = _mm_or_ps(x, invalid_mask); /* log(x<0, including -0,-inf) = NaN */
- x = esl_sse_select_ps(x, _mm_set1_ps(-eslINFINITY), zero_mask); /* x zero or subnormal = -inf */
- return x;
-}
-
-/* Function: esl_sse_expf()
-* Synopsis: <r[z] = exp x[z]>
-* Incept: SRE, Fri Dec 14 14:46:27 2007 [Janelia]
-*
-* Purpose: Given a vector <x> containing four floats, returns a
-* vector <r> in which each element <r[z] = logf(x[z])>.
-*
-* Valid for all IEEE754 floats $x_z$.
-*
-* Xref: J2/71
-*
-* Note: Derived from an SSE1 implementation by Julian
-* Pommier. Converted to SSE2.
-*/
-__m128
-esl_sse_expf(__m128 x)
-{
- static const float cephes_p[6] = { 1.9875691500E-4f, 1.3981999507E-3f, 8.3334519073E-3f,
- 4.1665795894E-2f, 1.6666665459E-1f, 5.0000001201E-1f };
- static const float cephes_c[2] = { 0.693359375f, -2.12194440e-4f };
- static const float maxlogf = 88.72283905206835; /* log(2^128) */
- static const float minlogf = -103.27892990343185; /* log(2^-149) */
- __m128i k;
- __m128 mask, tmp, fx, z, y, minmask, maxmask;
-
- /* handle out-of-range and special conditions */
- maxmask = _mm_cmpgt_ps(x, _mm_set1_ps(maxlogf));
- minmask = _mm_cmple_ps(x, _mm_set1_ps(minlogf));
-
- /* range reduction: exp(x) = 2^k e^f = exp(f + k log 2); k = floorf(0.5 + x / log2): */
- fx = _mm_mul_ps(x, _mm_set1_ps(eslCONST_LOG2R));
- fx = _mm_add_ps(fx, _mm_set1_ps(0.5f));
-
- /* floorf() with SSE: */
- k = _mm_cvttps_epi32(fx); /* cast to int with truncation */
- tmp = _mm_cvtepi32_ps(k); /* cast back to float */
- mask = _mm_cmpgt_ps(tmp, fx); /* if it increased (i.e. if it was negative...) */
- mask = _mm_and_ps(mask, _mm_set1_ps(1.0f)); /* ...without a conditional branch... */
- fx = _mm_sub_ps(tmp, mask); /* then subtract one. */
- k = _mm_cvttps_epi32(fx); /* k is now ready for the 2^k part. */
-
- /* polynomial approx for e^f for f in range [-0.5, 0.5] */
- tmp = _mm_mul_ps(fx, _mm_set1_ps(cephes_c[0]));
- z = _mm_mul_ps(fx, _mm_set1_ps(cephes_c[1]));
- x = _mm_sub_ps(x, tmp);
- x = _mm_sub_ps(x, z);
- z = _mm_mul_ps(x, x);
-
- y = _mm_set1_ps(cephes_p[0]); y = _mm_mul_ps(y, x);
- y = _mm_add_ps(y, _mm_set1_ps(cephes_p[1])); y = _mm_mul_ps(y, x);
- y = _mm_add_ps(y, _mm_set1_ps(cephes_p[2])); y = _mm_mul_ps(y, x);
- y = _mm_add_ps(y, _mm_set1_ps(cephes_p[3])); y = _mm_mul_ps(y, x);
- y = _mm_add_ps(y, _mm_set1_ps(cephes_p[4])); y = _mm_mul_ps(y, x);
- y = _mm_add_ps(y, _mm_set1_ps(cephes_p[5])); y = _mm_mul_ps(y, z);
- y = _mm_add_ps(y, x);
- y = _mm_add_ps(y, _mm_set1_ps(1.0f));
-
- /* build 2^k by hand, by creating a IEEE754 float */
- k = _mm_add_epi32(k, _mm_set1_epi32(127));
- k = _mm_slli_epi32(k, 23);
- fx = _mm_castsi128_ps( k );
-
- /* put 2^k e^f together (fx = 2^k, y = e^f) and we're done */
- y = _mm_mul_ps(y, fx);
-
- /* special/range cleanup */
- y = esl_sse_select_ps(y, _mm_set1_ps(eslINFINITY), maxmask); /* exp(x) = inf for x > log(2^128) */
- y = esl_sse_select_ps(y, _mm_set1_ps(0.0f), minmask); /* exp(x) = 0 for x < log(2^-149) */
- return y;
-}
-
-
-/*****************************************************************
-* 2. Utilities for ps vectors (4 floats in a __m128)
-*****************************************************************/
-
-void
-esl_sse_dump_ps(FILE *fp, __m128 v)
-{
- float *p = (float *)&v;
- fprintf(fp, "[%13.8g, %13.8g, %13.8g, %13.8g]", p[0], p[1], p[2], p[3]);
-}
-
-#endif /* HAVE_SSE2 or not*/
-
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
-
-/* Additionally, esl_sse_logf() and esl_sse_expf() are
-* Copyright (C) 2007 Julien Pommier
-* Copyright (C) 1992 Stephen Moshier
-*
-* These functions derived from zlib-licensed routines by
-* Julien Pommier, http://gruntthepeon.free.fr/ssemath/. The
-* zlib license:
-*/
-
-/* Copyright (C) 2007 Julien Pommier
-
-This software is provided 'as-is', without any express or implied
-warranty. In no event will the authors be held liable for any damages
-arising from the use of this software.
-
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it
-freely, subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not
-claim that you wrote the original software. If you use this software
-in a product, an acknowledgment in the product documentation would be
-appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be
-misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-/* In turn, Pommier had derived the logf() and expf() functions from
-* serial versions in the Cephes math library. According to its
-* readme, Cephes is "copyrighted by the author" and "may be used
-* freely but it comes with no support or guarantee." Cephes is
-* available in NETLIB [http://www.netlib.org/cephes/]. NETLIB is
-* widely considered to be a free scientific code repository, though
-* the copyright and license status of many parts, including Cephes,
-* is ill-defined. We have attached Moshier's copyright,
-* to credit his original contribution. Thanks to both Pommier and
-* Moshier for their clear code.
-*/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_sse.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_sse.h
deleted file mode 100644
index eb7dc28..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_sse.h
+++ /dev/null
@@ -1,224 +0,0 @@
-/* Vectorized routines for Intel/AMD, using Streaming SIMD Extensions (SSE).
-*
-* SRE, Sun Dec 16 10:01:41 2007 [Janelia]
- * SVN $Id: esl_sse.h 353 2009-06-24 15:11:23Z farrarm $
-*/
-#ifdef HAVE_SSE2
-#ifndef ESL_SSE_INCLUDED
-#define ESL_SSE_INCLUDED
-
-#include "easel.h"
-
-#include <stdio.h>
-#include <xmmintrin.h> /* SSE */
-#include <emmintrin.h> /* SSE2 */
-
-
-extern __m128 esl_sse_logf(__m128 x);
-extern __m128 esl_sse_expf(__m128 x);
-extern void esl_sse_dump_ps(FILE *fp, __m128 v);
-
-/*****************************************************************
- * if the compiler does not support SSE2 cast functions replace
- * the calls with C casts. this is needed for older versions of
- * gcc, 3.4.
- *****************************************************************/
-#ifndef HAVE_SSE2_CAST
-#define _mm_castps_si128(x) (__m128i)(x)
-#define _mm_castsi128_ps(x) (__m128)(x)
-#endif
-
-/*****************************************************************
-* Inline utility functions for ps vectors (4 floats in a __m128)
-*****************************************************************/
-
-/* Function: esl_sse_select_ps()
-* Synopsis: SSE equivalent of <vec_sel()>
-* Incept: SRE, Sun Dec 16 12:39:03 2007 [Janelia]
-*
-* Purpose: Vector select. Returns a vector <r[z] = a[z]> where <mask[z]>
-* is all 0's; <r[z] = b[z]> where <mask[z]> is all 1's.
-*
-* Useful for avoiding conditional branches. For example,
-* to implement \ccode{if (a > 0) a += a;}:
-*
-* \begin{cchunk}
-* mask = _mm_cmpgt_ps(a, _mm_setzero_ps());
-* twoa = _mm_add_ps(a, a);
-* a = esl_sse_select_ps(a, twoa, mask);
-* \end{cchunk}
-*
-* Notes: As recommended by the Altivec/SSE Migration Guide,
-* Apple Computer, Inc.
-*/
-static inline __m128
-esl_sse_select_ps(__m128 a, __m128 b, __m128 mask)
-{
- b = _mm_and_ps(b, mask);
- a = _mm_andnot_ps(mask, a);
- return _mm_or_ps(a,b);
-}
-
-/* Function: esl_sse_any_gt_ps()
-* Synopsis: Returns TRUE if any a[z] > b[z]
-* Incept: SRE, Wed Jul 30 11:44:59 2008 [Janelia]
-*
-* Purpose: Returns TRUE if any a[z] > b[z] in two
-* <ps> vectors of floats.
-*
-* Xref: From Apple Altivec/SSE migration guide.
-*/
-static inline int
-esl_sse_any_gt_ps(__m128 a, __m128 b)
-{
- __m128 mask = _mm_cmpgt_ps(a,b);
- int maskbits = _mm_movemask_ps( mask );
- return maskbits != 0;
-}
-
-
-/* Function: esl_sse_hmax_ps()
-* Synopsis: Find the maximum of elements in a vector.
-* Incept: SRE, Wed Aug 6 10:00:11 2008 [Janelia]
-*
-* Purpose: Find the maximum valued element in the four float elements
-* in <a>, and return that maximum value in <*ret_max>.
-*
-* Xref: J3/90 for benchmarking of some alternative implementations.
-*/
-static inline void
-esl_sse_hmax_ps(__m128 a, float *ret_max)
-{
- a = _mm_max_ps(a, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 3, 2, 1)));
- a = _mm_max_ps(a, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 0, 3, 2)));
- _mm_store_ss(ret_max, a);
-}
-
-/* Function: esl_sse_hsum_ps()
-* Synopsis: Takes the horizontal sum of elements in a vector.
-* Incept: SRE, Sat Aug 16 15:50:33 2008 [Janelia]
-*
-* Purpose: Add the four float elements in vector <a>; return
-* that sum in <*ret_sum>.
-*/
-static inline void
-esl_sse_hsum_ps(__m128 a, float *ret_sum)
-{
- a = _mm_add_ps(a, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 3, 2, 1)));
- a = _mm_add_ps(a, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 0, 3, 2)));
- _mm_store_ss(ret_sum, a);
-}
-
-
-/* Function: esl_sse_rightshift_ps()
-* Synopsis: Shift vector elements to the right.
-* Incept: SRE, Thu Jul 31 17:13:59 2008 [Janelia]
-*
-* Purpose: Returns a vector containing
-* <{ b[0] a[0] a[1] a[2] }>:
-* i.e. shift the values in <a> to the
-* right, and load the first value of
-* <b> into the first slot.
-*/
-static inline __m128
-esl_sse_rightshift_ps(__m128 a, __m128 b)
-{
- return _mm_move_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 1, 0, 0)), b);
-}
-
-/* Function: esl_sse_leftshift_ps()
-* Synopsis: Shift vector elements to the left.
-* Incept: SRE, Thu Jul 31 17:22:02 2008 [Janelia]
-*
-* Purpose: Returns a vector containing
-* <{ a[1] a[2] a[3] b[0]}>:
-* i.e. shift the values in <a> to the
-* left and load the first value of
-* <b> into the first slot.
-*/
-static inline __m128
-esl_sse_leftshift_ps(__m128 a, __m128 b)
-{
- register __m128 v = _mm_move_ss(a, b); /* now b[0] a[1] a[2] a[3] */
- return _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 3, 2, 1)); /* now a[1] a[2] a[3] b[0] */
-}
-
-/*****************************************************************
-* inlined utilities for epu8 vectors (16 uchars in a __m128i)
-*****************************************************************/
-
-/* Function: esl_sse_any_gt_epu8()
-* Synopsis: Returns TRUE if any a[z] > b[z].
-* Incept: SRE, Wed Jul 30 11:27:09 2008 [Janelia]
-*
-* Purpose: Return TRUE if any <a[z] > b[z]> for <z=0..15>
-* in two <epu8> vectors of unsigned chars.
-*
-* We need this incantation because SSE provides
-* no <cmpgt_epu8> instruction.
-*
-* For equality tests, note that <cmpeq_epi8> works fine
-* for unsigned ints though there is no <cmpeq_epu8>
-* instruction either).
-*
-* See vec_any_gt
-*/
-static inline int
-esl_sse_any_gt_epu8(__m128i a, __m128i b)
-{
- __m128i mask = _mm_cmpeq_epi8(_mm_max_epu8(a,b), b); /* anywhere a>b, mask[z] = 0x0; elsewhere 0xff */
- int maskbits = _mm_movemask_epi8(_mm_xor_si128(mask, _mm_cmpeq_epi8(mask, mask))); /* the xor incantation is a bitwise inversion */
- return maskbits != 0;
-}
-static inline int
-esl_sse_any_gt_epi16(__m128i a, __m128i b)
-{
- return (_mm_movemask_epi8(_mm_cmpgt_epi16(a,b)) != 0);
-}
-
-
-/* Function: esl_sse_hmax_epu8()
-* Synopsis: Return the max of the 16 elements in epu8 vector.
-* Incept: SRE, Wed Jul 30 11:31:33 2008 [Janelia]
-*
-* Purpose: Returns the maximum value of the 16 elements in
-* an <epu8> vector.
-*/
-static inline uint8_t
-esl_sse_hmax_epu8(__m128i a)
-{
- a = _mm_max_epu8(a, _mm_srli_si128(a, 8));
- a = _mm_max_epu8(a, _mm_srli_si128(a, 4));
- a = _mm_max_epu8(a, _mm_srli_si128(a, 2));
- a = _mm_max_epu8(a, _mm_srli_si128(a, 1));
- return (uint8_t) _mm_extract_epi16(a, 0); /* only low-order 8 bits set; so _epi16 or _epi8 equiv; _epi8 is SSE4.1 */
-}
-
-/* Function: esl_sse_hmax_epi16()
-* Synopsis: Return the max of the 8 elements in epi16 vector.
-* Incept: SRE, Wed Jul 30 11:31:33 2008 [Janelia]
-*
-* Purpose: Returns the maximum value of the 16 elements in
-* an <epu8> vector.
-*/
-static inline int16_t
-esl_sse_hmax_epi16(__m128i a)
-{
- a = _mm_max_epi16(a, _mm_srli_si128(a, 8));
- a = _mm_max_epi16(a, _mm_srli_si128(a, 4));
- a = _mm_max_epi16(a, _mm_srli_si128(a, 2));
- return (int16_t) _mm_extract_epi16(a, 0); /* only low-order 8 bits set; so _epi16 or _epi8 equiv; _epi8 is SSE4.1 */
-}
-
-
-#endif /*ESL_SSE_INCLUDED*/
-#endif /*HAVE_SSE2*/
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_stack.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_stack.cpp
deleted file mode 100644
index 981c728..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_stack.cpp
+++ /dev/null
@@ -1,396 +0,0 @@
-/* Pushdown stacks for integers, pointers, and characters.
-*
-* Contents:
-* 1. The <ESL_STACK> object.
-* 2. Other functions in the API.
-* 3. Shuffling stacks. [eslAUGMENT_RANDOM]
-* 7. Copyright and license.
-*
-* Augmentations:
-* eslAUGMENT_RANDOM : adds function for shuffling a stack.
-*
-* SRE 1 March 2000 [Seattle]
-* Incorp into Easel SRE, Sun Dec 26 07:42:12 2004 [Zaragoza]
-* SVN $Id: esl_stack.c 249 2008-04-24 19:19:50Z eddys $
-*/
-#include <hmmer3/easel/esl_config.h>
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <hmmer3/easel/easel.h>
-#include "esl_stack.h"
-#ifdef eslAUGMENT_RANDOM
-#include <hmmer3/easel/esl_random.h>
-#endif
-
-/*****************************************************************
-*# 1. The <ESL_STACK> object.
-*****************************************************************/
-
-/* Function: esl_stack_ICreate()
-* Synopsis: Create an integer stack.
-* Incept: SRE, Sun Dec 26 09:11:50 2004 [Zaragoza]
-*
-* Purpose: Creates an integer stack.
-*
-* Returns: a pointer to the new stack.
-*
-* Throws: <NULL> on an allocation failure.
-*/
-ESL_STACK *
-esl_stack_ICreate(void)
-{
- int status;
- ESL_STACK *ns = NULL;
-
- ESL_ALLOC_WITH_TYPE(ns, ESL_STACK*, sizeof(ESL_STACK));
- ns->nalloc = ESL_STACK_INITALLOC;
- ns->pdata = NULL;
- ns->cdata = NULL;
- ESL_ALLOC_WITH_TYPE(ns->idata, int*, sizeof(int) * ns->nalloc);
- ns->n = 0;
- return ns;
-
-ERROR:
- esl_stack_Destroy(ns);
- return NULL;
-}
-
-/* Function: esl_stack_CCreate()
-* Synopsis: Create a character stack.
-* Incept: SRE, Sun Dec 26 09:15:35 2004 [Zaragoza]
-*
-* Purpose: Creates a character stack.
-*
-* Returns: a pointer to the new stack.
-*
-* Throws: <NULL> on an allocation failure.
-*/
-ESL_STACK *
-esl_stack_CCreate(void)
-{
- int status;
- ESL_STACK *cs = NULL;
-
- ESL_ALLOC_WITH_TYPE(cs, ESL_STACK*, sizeof(ESL_STACK));
- cs->nalloc = ESL_STACK_INITALLOC;
- cs->idata = NULL;
- cs->pdata = NULL;
- ESL_ALLOC_WITH_TYPE(cs->cdata, char*, sizeof(char) * cs->nalloc);
- cs->n = 0;
- return cs;
-
-ERROR:
- esl_stack_Destroy(cs);
- return NULL;
-}
-
-/* Function: esl_stack_PCreate()
-* Synopsis: Create a pointer stack.
-* Incept: SRE, Sun Dec 26 09:16:07 2004 [Zaragoza]
-*
-* Purpose: Creates a pointer stack.
-*
-* Returns: a pointer to the new stack.
-*
-* Throws: <NULL> on an allocation failure.
-*/
-ESL_STACK *
-esl_stack_PCreate(void)
-{
- int status;
- ESL_STACK *ps = NULL;
-
- ESL_ALLOC_WITH_TYPE(ps, ESL_STACK*, sizeof(ESL_STACK));
- ps->nalloc = ESL_STACK_INITALLOC;
- ps->idata = NULL;
- ps->cdata = NULL;
- ESL_ALLOC_WITH_TYPE(ps->pdata, void**, sizeof(void *) * ps->nalloc);
- ps->n = 0;
- return ps;
-
-ERROR:
- esl_stack_Destroy(ps);
- return NULL;
-}
-
-/* Function: esl_stack_Reuse()
-* Synopsis: Reuse a stack.
-* Incept: SRE, Tue Dec 28 04:21:36 2004 [Zaragoza]
-*
-* Purpose: Empties stack <s> so it can be reused without
-* creating a new one. The stack <s>
-* can be of any data type; it retains its original
-* type.
-*
-* Returns: <eslOK>
-*/
-int
-esl_stack_Reuse(ESL_STACK *s)
-{
- s->n = 0; /* it's that simple in this implementation */
- return eslOK;
-}
-
-/* Function: esl_stack_Destroy()
-* Synopsis: Free a stack.
-* Incept: SRE, Sun Dec 26 09:16:24 2004 [Zaragoza]
-*
-* Purpose: Destroys a created stack <s>, of any data type.
-*/
-void
-esl_stack_Destroy(ESL_STACK *s)
-{
- if (s->idata != NULL) free(s->idata);
- if (s->cdata != NULL) free(s->cdata);
- if (s->pdata != NULL) free(s->pdata);
- free(s);
-}
-
-
-/*****************************************************************
-*# 2. Other functions in the API.
-*****************************************************************/
-
-/* Function: esl_stack_IPush()
-* Synopsis: Push an integer onto a stack.
-* Incept: SRE, Sun Dec 26 09:17:17 2004 [Zaragoza]
-*
-* Purpose: Push an integer <x> onto an integer stack <ns>.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on reallocation failure.
-*/
-int
-esl_stack_IPush(ESL_STACK *ns, int x)
-{
- int status;
- int *ptr;
-
- if (ns->n == ns->nalloc) {
- ESL_RALLOC_WITH_TYPE(ns->idata, int*, ptr, sizeof(int) * ns->nalloc * 2);
- ns->nalloc += ns->nalloc; /* reallocate by doubling */
- }
- ns->idata[ns->n] = x;
- ns->n++;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-/* Function: esl_stack_CPush()
-* Synopsis: Push a char onto a stack.
-* Incept: SRE, Sun Dec 26 09:18:24 2004 [Zaragoza]
-*
-* Purpose: Push a character <c> onto a character stack <cs>.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on reallocation failure.
-*/
-int
-esl_stack_CPush(ESL_STACK *cs, char c)
-{
- int status;
- char *ptr;
-
- if (cs->n == cs->nalloc) {
- ESL_RALLOC_WITH_TYPE(cs->cdata, char*, ptr, sizeof(char) * cs->nalloc * 2);
- cs->nalloc += cs->nalloc; /* reallocate by doubling */
- }
- cs->cdata[cs->n] = c;
- cs->n++;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-/* Function: esl_stack_PPush()
-* Synopsis: Push a pointer onto a stack.
-* Incept: SRE, Sun Dec 26 09:18:49 2004 [Zaragoza]
-*
-* Purpose: Push a pointer <p> onto a pointer stack <ps>.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on reallocation failure.
-*/
-int
-esl_stack_PPush(ESL_STACK *ps, void *p)
-{
- int status;
- void *ptr;
-
- if (ps->n == ps->nalloc) {
- ESL_RALLOC_WITH_TYPE(ps->pdata, void**, ptr, sizeof(void *) * ps->nalloc * 2);
- ps->nalloc += ps->nalloc; /* reallocate by doubling */
- }
- ps->pdata[ps->n] = p;
- ps->n++;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-/* Function: esl_stack_IPop()
-* Synopsis: Pop an integer off a stack.
-* Incept: SRE, Sun Dec 26 09:19:12 2004 [Zaragoza]
-*
-* Purpose: Pops an integer off the integer stack <ns>, and returns
-* it through <ret_x>.
-*
-* Returns: <eslOK> on success. <eslEOD> if stack is empty.
-*/
-int
-esl_stack_IPop(ESL_STACK *ns, int *ret_x)
-{
- if (ns->n == 0) {*ret_x = 0; return eslEOD;}
- ns->n--;
- *ret_x = ns->idata[ns->n];
- return eslOK;
-}
-
-/* Function: esl_stack_CPop()
-* Synopsis: Pop a char off a stack.
-* Incept: SRE, Sun Dec 26 09:21:27 2004 [Zaragoza]
-*
-* Purpose: Pops a character off the character stack <cs>, and returns
-* it through <ret_c>.
-*
-* Returns: <eslOK> on success. <eslEOD> if stack is empty.
-*/
-int
-esl_stack_CPop(ESL_STACK *cs, char *ret_c)
-{
- if (cs->n == 0) {*ret_c = 0; return eslEOD;}
- cs->n--;
- *ret_c = cs->cdata[cs->n];
- return eslOK;
-}
-
-/* Function: esl_stack_PPop()
-* Synopsis: Pop a pointer off a stack.
-* Incept: SRE, Sun Dec 26 09:21:56 2004 [Zaragoza]
-*
-* Purpose: Pops a pointer off the pointer stack <ps>, and returns
-* it through <ret_p>.
-*
-* Returns: <eslOK> on success. <eslEOD> if stack is empty.
-*/
-int
-esl_stack_PPop(ESL_STACK *ps, void **ret_p)
-{
- if (ps->n == 0) {*ret_p = 0; return eslEOD;}
- ps->n--;
- *ret_p = ps->pdata[ps->n];
- return eslOK;
-}
-
-/* Function: esl_stack_ObjectCount()
-* Synopsis: Return the number of objects in a stack.
-* Incept: SRE, Sun Dec 26 09:22:41 2004 [Zaragoza]
-*
-* Purpose: Returns the number of data objects stored in the
-* stack <s>. The stack may be of any datatype.
-*/
-int
-esl_stack_ObjectCount(ESL_STACK *s)
-{
- return s->n;
-}
-
-/* Function: esl_stack_Convert2String()
-* Synopsis: Convert a char stack to a string.
-* Incept: SRE, Sun Dec 26 09:23:36 2004 [Zaragoza]
-*
-* Purpose: Converts a character stack <cs> to a NUL-terminated
-* string, and returns a pointer to the string. The
-* characters in the string are in the same order they
-* were pushed onto the stack. The stack is destroyed by
-* this operation, as if <esl_stack_Destroy()> had been
-* called on it. The caller becomes responsible for
-* free'ing the returned string.
-*
-* Returns: Pointer to the string; caller must <free()> this.
-*
-* Throws: NULL if a reallocation fails.
-*/
-char *
-esl_stack_Convert2String(ESL_STACK *cs)
-{
- char *s;
-
- if (esl_stack_CPush(cs, '\0') != eslOK)
- { free(cs->cdata); free(cs); return NULL; } /* nul-terminate the data or self-destruct */
- s = cs->cdata; /* data is already just a string - just return ptr to it */
- free(cs); /* free the stack around it. */
- return s;
-}
-
-/* Function: esl_stack_DiscardTopN()
-* Synopsis: Discard the top elements on a stack.
-* Incept: SRE, Tue Dec 28 04:33:06 2004 [St. Louis]
-*
-* Purpose: Throw away the top <n> elements on stack <s>.
-* Equivalent to <n> calls to a <Pop()> function.
-* If <n> equals or exceeds the number of elements
-* currently in the stack, the stack is emptied
-* as if <esl_stack_Reuse()> had been called.
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_stack_DiscardTopN(ESL_STACK *s, int n)
-{
- if (n <= s->n) s->n -= n;
- else s->n = 0;
- return eslOK;
-}
-
-/*****************************************************************
-*# 3. Shuffling stacks [with <eslAUGMENT_RANDOM>]
-*****************************************************************/
-#ifdef eslAUGMENT_RANDOM
-
-/* Function: esl_stack_Shuffle()
-* Synopsis: Randomly shuffle the elements in a stack.
-* Incept: SRE, Mon Mar 31 11:01:06 2008 [Janelia]
-*
-* Purpose: Randomly shuffle the elements in stack <s>, using
-* random numbers from generator <r>.
-*
-* Returns: <eslOK> on success, and the stack is randomly
-* shuffled.
-*/
-int
-esl_stack_Shuffle(ESL_RANDOMNESS *r, ESL_STACK *s)
-{
- int n = s->n;
- int w;
-
- while (n > 1) {
- w = esl_rnd_Roll(r, n); /* shuffling algorithm: swap last elem with w, decrement n. */
- if (s->idata != NULL) ESL_SWAP(s->idata[w], s->idata[n-1], int);
- else if (s->cdata != NULL) ESL_SWAP(s->cdata[w], s->cdata[n-1], char);
- else if (s->pdata != NULL) ESL_SWAP(s->pdata[w], s->pdata[n-1], void *);
- n--;
- }
- return eslOK;
-}
-#endif /*eslAUGMENT_RANDOM*/
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_stack.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_stack.h
deleted file mode 100644
index ee9a10a..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_stack.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/* Pushdown stacks for integers, pointers, and characters.
-*
-* nstack - SRE 1 March 2000. [Seattle]
-* mstack - SRE, Fri Oct 10 10:18:16 2003 [St. Louis]
-* cstack - SRE, Mon Oct 13 12:57:56 2003 [St. Louis]
-* Incorp into easel - SRE, Sun Dec 26 07:39:02 2004 [Zaragoza]
-* SVN $Id: esl_stack.h 249 2008-04-24 19:19:50Z eddys $
-*/
-#ifndef ESL_STACK_INCLUDED
-#define ESL_STACK_INCLUDED
-
-#define ESL_STACK_INITALLOC 128 /* initial allocation; realloc by doubling */
-
-#ifdef eslAUGMENT_RANDOM
-#include "esl_random.h"
-#endif /*eslAUGMENT_RANDOM*/
-
-typedef struct esl_stack_s {
- int *idata; /* integer data stack */
- void **pdata; /* pointer data stack */
- char *cdata; /* character data stack */
-
- int n; /* current (topmost) elem in data */
- int nalloc; /* # of elems allocated right now */
-} ESL_STACK;
-
-extern ESL_STACK *esl_stack_ICreate(void);
-extern ESL_STACK *esl_stack_CCreate(void);
-extern ESL_STACK *esl_stack_PCreate(void);
-
-extern int esl_stack_Reuse(ESL_STACK *s);
-extern void esl_stack_Destroy(ESL_STACK *s);
-
-extern int esl_stack_IPush(ESL_STACK *ns, int x);
-extern int esl_stack_CPush(ESL_STACK *cs, char c);
-extern int esl_stack_PPush(ESL_STACK *ps, void *p);
-
-extern int esl_stack_IPop(ESL_STACK *ns, int *ret_x);
-extern int esl_stack_CPop(ESL_STACK *cs, char *ret_c);
-extern int esl_stack_PPop(ESL_STACK *ps, void **ret_p);
-
-extern int esl_stack_ObjectCount(ESL_STACK *s);
-
-extern char *esl_stack_Convert2String(ESL_STACK *cs);
-extern int esl_stack_DiscardTopN(ESL_STACK *s, int n);
-
-#ifdef eslAUGMENT_RANDOM
-extern int esl_stack_Shuffle(ESL_RANDOMNESS *r, ESL_STACK *s);
-#endif /*eslAUGMENT_RANDOM*/
-
-
-#endif /*ESL_STACK_INCLUDED*/
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_stats.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_stats.cpp
deleted file mode 100644
index 752ef48..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_stats.cpp
+++ /dev/null
@@ -1,556 +0,0 @@
-/* Foundation for the statistics modules.
-*
-* Contents:
-* 1. The stats API.
-* 5. License and copyright information.
-*
-* SRE, Tue Jul 19 10:57:44 2005
-* SVN $Id: esl_stats.c 341 2009-06-01 12:21:15Z eddys $
-*/
-#include <hmmer3/easel/esl_config.h>
-
-#include <math.h>
-
-#include <hmmer3/easel/easel.h>
-#include "esl_stats.h"
-
-
-/* Function: esl_stats_DMean()
-* Synopsis: Calculates mean and $\sigma^2$ for samples $x_i$.
-* Incept: SRE, Tue Jul 19 11:04:00 2005 [St. Louis]
-*
-* Purpose: Calculates the sample mean and $s^2$, the unbiased
-* estimator of the population variance, for a
-* sample of <n> numbers <x[0]..x[n-1]>, and optionally
-* returns either or both through <ret_mean> and
-* <ret_var>.
-*
-* <esl_stats_FMean()> and <esl_stats_IMean()> do the same,
-* for float and integer vectors.
-*
-* Args: x - samples x[0]..x[n-1]
-* n - number of samples
-* opt_mean - optRETURN: mean
-* opt_var - optRETURN: estimate of population variance
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_stats_DMean(const double *x, int n, double *opt_mean, double *opt_var)
-{
- double sum = 0.;
- double sqsum = 0.;
- int i;
-
- for (i = 0; i < n; i++)
- {
- sum += x[i];
- sqsum += x[i]*x[i];
- }
- if (opt_mean != NULL) *opt_mean = sum / (double) n;
- if (opt_var != NULL) *opt_var = (sqsum - sum*sum/(double)n) / ((double)n-1);
- return eslOK;
-}
-int
-esl_stats_FMean(const float *x, int n, double *opt_mean, double *opt_var)
-{
- double sum = 0.;
- double sqsum = 0.;
- int i;
-
- for (i = 0; i < n; i++)
- {
- sum += x[i];
- sqsum += x[i]*x[i];
- }
- if (opt_mean != NULL) *opt_mean = sum / (double) n;
- if (opt_var != NULL) *opt_var = (sqsum - sum*sum/(double)n) / ((double)n-1);
- return eslOK;
-}
-int
-esl_stats_IMean(const int *x, int n, double *opt_mean, double *opt_var)
-{
- double sum = 0.;
- double sqsum = 0.;
- int i;
-
- for (i = 0; i < n; i++)
- {
- sum += x[i];
- sqsum += x[i]*x[i];
- }
- if (opt_mean != NULL) *opt_mean = sum / (double) n;
- if (opt_var != NULL) *opt_var = (sqsum - sum*sum/(double)n) / ((double)n-1);
- return eslOK;
-}
-
-
-/* Function: esl_stats_LogGamma()
-* Synopsis: Calculates $\log \Gamma(x)$.
-* Incept: SRE, Tue Nov 2 13:47:01 2004 [St. Louis]
-*
-* Purpose: Returns natural log of $\Gamma(x)$, for $x > 0$.
-*
-* Credit: Adapted from a public domain implementation in the
-* NCBI core math library. Thanks to John Spouge and
-* the NCBI. (According to NCBI, that's Dr. John
-* "Gammas Galore" Spouge to you, pal.)
-*
-* Args: x : argument, x > 0.0
-* ret_answer : RETURN: the answer
-*
-* Returns: Put the answer in <ret_answer>; returns <eslOK>.
-*
-* Throws: <eslERANGE> if $x <= 0$.
-*/
-int
-esl_stats_LogGamma(double x, double *ret_answer)
-{
- int i;
- double xx, tx;
- double tmp, value;
- static const double cof[11] = {
- 4.694580336184385e+04,
- -1.560605207784446e+05,
- 2.065049568014106e+05,
- -1.388934775095388e+05,
- 5.031796415085709e+04,
- -9.601592329182778e+03,
- 8.785855930895250e+02,
- -3.155153906098611e+01,
- 2.908143421162229e-01,
- -2.319827630494973e-04,
- 1.251639670050933e-10
- };
-
- /* Protect against invalid x<=0 */
- if (x <= 0.0) ESL_EXCEPTION(eslERANGE, "invalid x <= 0 in esl_stats_LogGamma()");
-
- xx = x - 1.0;
- tx = tmp = xx + 11.0;
- value = 1.0;
- for (i = 10; i >= 0; i--) /* sum least significant terms first */
- {
- value += cof[i] / tmp;
- tmp -= 1.0;
- }
- value = log(value);
- tx += 0.5;
- value += 0.918938533 + (xx+0.5)*log(tx) - tx;
- *ret_answer = value;
- return eslOK;
-}
-
-
-/* Function: esl_stats_Psi()
-* Synopsis: Calculates $\Psi(x)$ (the digamma function).
-* Incept: SRE, Tue Nov 15 13:57:59 2005 [St. Louis]
-*
-* Purpose: Computes $\Psi(x)$ (the "digamma" function), which is
-* the derivative of log of the Gamma function:
-* $d/dx \log \Gamma(x) = \frac{\Gamma'(x)}{\Gamma(x)} = \Psi(x)$.
-* Argument $x$ is $> 0$.
-*
-* This is J.M. Bernardo's "Algorithm AS103",
-* Appl. Stat. 25:315-317 (1976).
-*/
-int
-esl_stats_Psi(double x, double *ret_answer)
-{
- double answer = 0.;
- double x2;
-
- if (x <= 0.0) ESL_EXCEPTION(eslERANGE, "invalid x <= 0 in esl_stats_Psi()");
-
- /* For small x, Psi(x) ~= -0.5772 - 1/x + O(x), we're done.
- */
- if (x <= 1e-5) {
- *ret_answer = -eslCONST_EULER - 1./x;
- return eslOK;
- }
-
- /* For medium x, use Psi(1+x) = \Psi(x) + 1/x to c.o.v. x,
- * big enough for Stirling approximation to work...
- */
- while (x < 8.5) {
- answer = answer - 1./x;
- x += 1.;
- }
-
- /* For large X, use Stirling approximation
- */
- x2 = 1./x;
- answer += log(x) - 0.5 * x2;
- x2 = x2*x2;
- answer -= (1./12.)*x2;
- answer += (1./120.)*x2*x2;
- answer -= (1./252.)*x2*x2*x2;
-
- *ret_answer = answer;
- return eslOK;
-}
-
-
-
-/* Function: esl_stats_IncompleteGamma()
-* Synopsis: Calculates the incomplete Gamma function.
-*
-* Purpose: Returns $P(a,x)$ and $Q(a,x)$ where:
-*
-* \begin{eqnarray*}
-* P(a,x) & = & \frac{1}{\Gamma(a)} \int_{0}^{x} t^{a-1} e^{-t} dt \\
-* & = & \frac{\gamma(a,x)}{\Gamma(a)} \\
-* Q(a,x) & = & \frac{1}{\Gamma(a)} \int_{x}^{\infty} t^{a-1} e^{-t} dt\\
-* & = & 1 - P(a,x) \\
-* \end{eqnarray*}
-*
-* $P(a,x)$ is the CDF of a gamma density with $\lambda = 1$,
-* and $Q(a,x)$ is the survival function.
-*
-* For $x \simeq 0$, $P(a,x) \simeq 0$ and $Q(a,x) \simeq 1$; and
-* $P(a,x)$ is less prone to roundoff error.
-*
-* The opposite is the case for large $x >> a$, where
-* $P(a,x) \simeq 1$ and $Q(a,x) \simeq 0$; there, $Q(a,x)$ is
-* less prone to roundoff error.
-*
-* Method: Based on ideas from Numerical Recipes in C, Press et al.,
-* Cambridge University Press, 1988.
-*
-* Args: a - for instance, degrees of freedom / 2 [a > 0]
-* x - for instance, chi-squared statistic / 2 [x >= 0]
-* ret_pax - RETURN: P(a,x)
-* ret_qax - RETURN: Q(a,x)
-*
-* Return: <eslOK> on success.
-*
-* Throws: <eslERANGE> if <a> or <x> is out of accepted range.
-* <eslENOHALT> if approximation fails to converge.
-*/
-int
-esl_stats_IncompleteGamma(double a, double x, double *ret_pax, double *ret_qax)
-{
- int iter; /* iteration counter */
- double pax; /* P(a,x) */
- double qax; /* Q(a,x) */
-
- if (a <= 0.) ESL_EXCEPTION(eslERANGE, "esl_stats_IncompleteGamma(): a must be > 0");
- if (x < 0.) ESL_EXCEPTION(eslERANGE, "esl_stats_IncompleteGamma(): x must be >= 0");
-
- /* For x > a + 1 the following gives rapid convergence;
- * calculate Q(a,x) = \frac{\Gamma(a,x)}{\Gamma(a)},
- * using a continued fraction development for \Gamma(a,x).
- */
- if (x > a+1)
- {
- double oldp; /* previous value of p */
- double nu0, nu1; /* numerators for continued fraction calc */
- double de0, de1; /* denominators for continued fraction calc */
-
- nu0 = 0.; /* A_0 = 0 */
- de0 = 1.; /* B_0 = 1 */
- nu1 = 1.; /* A_1 = 1 */
- de1 = x; /* B_1 = x */
-
- oldp = nu1;
- for (iter = 1; iter < 100; iter++)
- {
- /* Continued fraction development:
- * set A_j = b_j A_j-1 + a_j A_j-2
- * B_j = b_j B_j-1 + a_j B_j-2
- * We start with A_2, B_2.
- */
- /* j = even: a_j = iter-a, b_j = 1 */
- /* A,B_j-2 are in nu0, de0; A,B_j-1 are in nu1,de1 */
- nu0 = nu1 + ((double)iter - a) * nu0;
- de0 = de1 + ((double)iter - a) * de0;
- /* j = odd: a_j = iter, b_j = x */
- /* A,B_j-2 are in nu1, de1; A,B_j-1 in nu0,de0 */
- nu1 = x * nu0 + (double) iter * nu1;
- de1 = x * de0 + (double) iter * de1;
- /* rescale */
- if (de1 != 0.)
- {
- nu0 /= de1;
- de0 /= de1;
- nu1 /= de1;
- de1 = 1.;
- }
- /* check for convergence */
- if (fabs((nu1-oldp)/nu1) < 1.e-7)
- {
- esl_stats_LogGamma(a, &qax);
- qax = nu1 * exp(a * log(x) - x - qax);
-
- if (ret_pax != NULL) *ret_pax = 1 - qax;
- if (ret_qax != NULL) *ret_qax = qax;
- return eslOK;
- }
-
- oldp = nu1;
- }
- ESL_EXCEPTION(eslENOHALT,
- "esl_stats_IncompleteGamma(): fraction failed to converge");
- }
- else /* x <= a+1 */
- {
- double p; /* current sum */
- double val; /* current value used in sum */
-
- /* For x <= a+1 we use a convergent series instead:
- * P(a,x) = \frac{\gamma(a,x)}{\Gamma(a)},
- * where
- * \gamma(a,x) = e^{-x}x^a \sum_{n=0}{\infty} \frac{\Gamma{a}}{\Gamma{a+1+n}} x^n
- * which looks appalling but the sum is in fact rearrangeable to
- * a simple series without the \Gamma functions:
- * = \frac{1}{a} + \frac{x}{a(a+1)} + \frac{x^2}{a(a+1)(a+2)} ...
- * and it's obvious that this should converge nicely for x <= a+1.
- */
- p = val = 1. / a;
- for (iter = 1; iter < 10000; iter++)
- {
- val *= x / (a+(double)iter);
- p += val;
-
- if (fabs(val/p) < 1.e-7)
- {
- esl_stats_LogGamma(a, &pax);
- pax = p * exp(a * log(x) - x - pax);
-
- if (ret_pax != NULL) *ret_pax = pax;
- if (ret_qax != NULL) *ret_qax = 1. - pax;
- return eslOK;
- }
- }
- ESL_EXCEPTION(eslENOHALT,
- "esl_stats_IncompleteGamma(): series failed to converge");
- }
- /*NOTREACHED*/
- return eslOK;
-}
-
-
-/* Function: esl_stats_ChiSquaredTest()
-* Synopsis: Calculates a $\chi^2$ P-value.
-* Incept: SRE, Tue Jul 19 11:39:32 2005 [St. Louis]
-*
-* Purpose: Calculate the probability that a chi-squared statistic
-* with <v> degrees of freedom would exceed the observed
-* chi-squared value <x>; return it in <ret_answer>. If
-* this probability is less than some small threshold (say,
-* 0.05 or 0.01), then we may reject the hypothesis we're
-* testing.
-*
-* Args: v - degrees of freedom
-* x - observed chi-squared value
-* ret_answer - RETURN: P(\chi^2 > x)
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslERANGE> if <v> or <x> are out of valid range.
-* <eslENOHALT> if iterative calculation fails.
-*/
-int
-esl_stats_ChiSquaredTest(int v, double x, double *ret_answer)
-{
- return esl_stats_IncompleteGamma((double)v/2, x/2, NULL, ret_answer);
-}
-
-
-/* Function: esl_stats_LinearRegression()
-* Synopsis: Fit data to a straight line.
-* Incept: SRE, Sat May 26 11:33:46 2007 [Janelia]
-*
-* Purpose: Fit <n> points <x[i]>, <y[i]> to a straight line
-* $y = a + bx$ by linear regression.
-*
-* The $x_i$ are taken to be known, and the $y_i$ are taken
-* to be observed quantities associated with a sampling
-* error $\sigma_i$. If known, the standard deviations
-* $\sigma_i$ for $y_i$ are provided in the <sigma> array.
-* If they are unknown, pass <sigma = NULL>, and the
-* routine will proceed with the assumption that $\sigma_i
-* = 1$ for all $i$.
-*
-* The maximum likelihood estimates for $a$ and $b$ are
-* optionally returned in <opt_a> and <opt_b>.
-*
-* The estimated standard deviations of $a$ and $b$ and
-* their estimated covariance are optionally returned in
-* <opt_sigma_a>, <opt_sigma_b>, and <opt_cov_ab>.
-*
-* The Pearson correlation coefficient is optionally
-* returned in <opt_cc>.
-*
-* The $\chi^2$ P-value for the regression fit is
-* optionally returned in <opt_Q>. This P-value may only be
-* obtained when the $\sigma_i$ are known. If <sigma> is
-* passed as <NULL> and <opt_Q> is requested, <*opt_Q> is
-* set to 1.0.
-*
-* This routine follows the description and algorithm in
-* \citep[pp.661-666]{Press93}.
-*
-* <n> must be greater than 2; at least two x[i] must
-* differ; and if <sigma> is provided, all <sigma[i]> must
-* be $>0$. If any of these conditions isn't met, the
-* routine throws <eslEINVAL>.
-*
-* Args: x - x[0..n-1]
-* y - y[0..n-1]
-* sigma - sample error in observed y_i
-* n - number of data points
-* opt_a - optRETURN: intercept estimate
-* opt_b - optRETURN: slope estimate
-* opt_sigma_a - optRETURN: error in estimate of a
-* opt_sigma_b - optRETURN: error in estimate of b
-* opt_cov_ab - optRETURN: covariance of a,b estimates
-* opt_cc - optRETURN: Pearson correlation coefficient for x,y
-* opt_Q - optRETURN: X^2 P-value for linear fit
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation error;
-* <eslEINVAL> if a contract condition isn't met;
-* <eslENORESULT> if the chi-squared test fails.
-* In these cases, all optional return values are set to 0.
-*/
-int
-esl_stats_LinearRegression(const double *x, const double *y, const double *sigma, int n,
- double *opt_a, double *opt_b,
- double *opt_sigma_a, double *opt_sigma_b, double *opt_cov_ab,
- double *opt_cc, double *opt_Q)
-{
- int status;
- double *t = NULL;
- double S, Sx, Sy, Stt;
- double Sxy, Sxx, Syy;
- double a, b, sigma_a, sigma_b, cov_ab, cc, X2, Q;
- double xdev, ydev;
- double tmp;
- int i;
-
- /* Contract checks. */
- if (n <= 2) ESL_XEXCEPTION(eslEINVAL, "n must be > 2 for linear regression fitting");
- if (sigma != NULL)
- for (i = 0; i < n; i++) if (sigma[i] <= 0.) ESL_XEXCEPTION(eslEINVAL, "sigma[%d] <= 0", i);
- status = eslEINVAL;
- for (i = 0; i < n; i++) if (x[i] != 0.) { status = eslOK; break; }
- if (status != eslOK) ESL_XEXCEPTION(eslEINVAL, "all x[i] are 0.");
-
- /* Allocations */
- ESL_ALLOC_WITH_TYPE(t, double*, sizeof(double) * n);
-
- /* S = \sum_{i=1}{n} \frac{1}{\sigma_i^2}. (S > 0.) */
- if (sigma != NULL) { for (S = 0., i = 0; i < n; i++) S += 1./ (sigma[i] * sigma[i]); }
- else S = (double) n;
-
- /* S_x = \sum_{i=1}{n} \frac{x[i]}{ \sigma_i^2} (Sx real.) */
- for (Sx = 0., i = 0; i < n; i++) {
- if (sigma == NULL) Sx += x[i];
- else Sx += x[i] / (sigma[i] * sigma[i]);
- }
-
- /* S_y = \sum_{i=1}{n} \frac{y[i]}{\sigma_i^2} (Sy real.) */
- for (Sy = 0., i = 0; i < n; i++) {
- if (sigma == NULL) Sy += y[i];
- else Sy += y[i] / (sigma[i] * sigma[i]);
- }
-
- /* t_i = \frac{1}{\sigma_i} \left( x_i - \frac{S_x}{S} \right) (t_i real) */
- for (i = 0; i < n; i++) {
- t[i] = x[i] - Sx/S;
- if (sigma != NULL) t[i] /= sigma[i];
- }
-
- /* S_{tt} = \sum_{i=1}^n t_i^2 (if at least one x is != 0, Stt > 0) */
- for (Stt = 0., i = 0; i < n; i++) { Stt += t[i] * t[i]; }
-
- /* b = \frac{1}{S_{tt}} \sum_{i=1}^{N} \frac{t_i y_i}{\sigma_i} */
- for (b = 0., i = 0; i < n; i++) {
- if (sigma != NULL) { b += t[i]*y[i] / sigma[i]; }
- else { b += t[i]*y[i]; }
- }
- b /= Stt;
-
- /* a = \frac{ S_y - S_x b } {S} */
- a = (Sy - Sx * b) / S;
-
- /* \sigma_a^2 = \frac{1}{S} \left( 1 + \frac{ S_x^2 }{S S_{tt}} \right) */
- sigma_a = sqrt ((1. + (Sx*Sx) / (S*Stt)) / S);
-
- /* \sigma_b = \frac{1}{S_{tt}} */
- sigma_b = sqrt (1. / Stt);
-
- /* Cov(a,b) = - \frac{S_x}{S S_{tt}} */
- cov_ab = -Sx / (S * Stt);
-
- /* Pearson correlation coefficient */
- Sxy = Sxx = Syy = 0.;
- for (i = 0; i < n; i++) {
- if (sigma != NULL) {
- xdev = (x[i] / (sigma[i] * sigma[i])) - (Sx / n);
- ydev = (y[i] / (sigma[i] * sigma[i])) - (Sy / n);
- } else {
- xdev = x[i] - (Sx / n);
- ydev = y[i] - (Sy / n);
- }
- Sxy += xdev * ydev;
- Sxx += xdev * xdev;
- Syy += ydev * ydev;
- }
- cc = Sxy / (sqrt(Sxx) * sqrt(Syy));
-
- /* \chi^2 */
- for (X2 = 0., i = 0; i < n; i++) {
- tmp = y[i] - a - b*x[i];
- if (sigma != NULL) tmp /= sigma[i];
- X2 += tmp*tmp;
- }
-
- /* We can calculate a goodness of fit if we know the \sigma_i */
- if (sigma != NULL) {
- if (esl_stats_ChiSquaredTest(n-2, X2, &Q) != eslOK) { status = eslENORESULT; goto ERROR; }
- } else Q = 1.0;
-
- /* If we didn't use \sigma_i, adjust the sigmas for a,b */
- if (sigma == NULL) {
- tmp = sqrt(X2 / (double)(n-2));
- sigma_a *= tmp;
- sigma_b *= tmp;
- }
-
- /* Done. Set up for normal return.
- */
- free(t);
- if (opt_a != NULL) *opt_a = a;
- if (opt_b != NULL) *opt_b = b;
- if (opt_sigma_a != NULL) *opt_sigma_a = sigma_a;
- if (opt_sigma_b != NULL) *opt_sigma_b = sigma_b;
- if (opt_cov_ab != NULL) *opt_cov_ab = cov_ab;
- if (opt_cc != NULL) *opt_cc = cc;
- if (opt_Q != NULL) *opt_Q = Q;
- return eslOK;
-
-ERROR:
- if (t != NULL) free(t);
- if (opt_a != NULL) *opt_a = 0.;
- if (opt_b != NULL) *opt_b = 0.;
- if (opt_sigma_a != NULL) *opt_sigma_a = 0.;
- if (opt_sigma_b != NULL) *opt_sigma_b = 0.;
- if (opt_cov_ab != NULL) *opt_cov_ab = 0.;
- if (opt_cc != NULL) *opt_cc = 0.;
- if (opt_Q != NULL) *opt_Q = 0.;
- return status;
-}
-/*---------------- end of API implementation --------------------*/
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_stats.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_stats.h
deleted file mode 100644
index 0d8389a..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_stats.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* esl_stats.h
-* Foundation for the statistics modules.
-*
-* SRE, Tue Jul 19 11:35:28 2005
-* SVN $Id: esl_stats.h 195 2007-08-09 19:02:55Z eddys $
-*/
-#ifndef ESL_STATS_INCLUDED
-#define ESL_STATS_INCLUDED
-
-extern int esl_stats_DMean(const double *x, int n, double *opt_mean, double *opt_var);
-extern int esl_stats_FMean(const float *x, int n, double *opt_mean, double *opt_var);
-extern int esl_stats_IMean(const int *x, int n, double *opt_mean, double *opt_var);
-extern int esl_stats_LogGamma(double x, double *ret_answer);
-extern int esl_stats_Psi(double x, double *ret_answer);
-extern int esl_stats_IncompleteGamma(double a, double x, double *ret_pax, double *ret_qax);
-extern int esl_stats_ChiSquaredTest(int v, double x, double *ret_answer);
-extern int esl_stats_LinearRegression(const double *x, const double *y, const double *sigma, int n,
- double *opt_a, double *opt_b,
- double *opt_sigma_a, double *opt_sigma_b, double *opt_cov_ab,
- double *opt_cc, double *opt_Q);
-#endif /*ESL_STATS_INCLUDED*/
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_tree.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_tree.cpp
deleted file mode 100644
index 4f543db..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_tree.cpp
+++ /dev/null
@@ -1,1218 +0,0 @@
-/* Phylogenetic trees.
-*
-* Contents:
-* 1. The ESL_TREE object.
-* 2. Newick format i/o
-* 3. Tree comparison algorithms.
-* 4. Clustering algorithms for distance-based tree construction.
-* 5. Generating simulated trees.
-* 9. Copyright notice and license.
-*
-* SVN $Id: esl_tree.c 326 2009-02-28 15:49:07Z eddys $
-* SRE, Tue May 2 14:08:42 2006 [St. Louis]
-*/
-
-#include <hmmer3/easel/esl_config.h>
-
-#include <math.h>
-#include <string.h>
-#include <ctype.h>
-#include <assert.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/hmmer.h>
-
-#include <hmmer3/easel/esl_dmatrix.h>
-#include <hmmer3/easel/esl_stack.h>
-#include <hmmer3/easel/esl_vectorops.h>
-#include <hmmer3/easel/esl_random.h>
-#include "esl_tree.h"
-
-/*****************************************************************
-* 1. The ESL_TREE object.
-*****************************************************************/
-
-/* Function: esl_tree_Create()
-* Incept: SRE, Tue May 2 14:10:17 2006 [St. Louis]
-*
-* Purpose: Allocate an empty tree structure for <ntaxa> taxa
-* and return a pointer to it. <ntaxa> must be $\geq 2$.
-*
-* Args: <ntaxa> - number of taxa
-*
-* Returns: pointer to the new <ESL_TREE> object; caller frees
-* this with <esl_tree_Destroy()>.
-*
-* Throws: <NULL> if allocation fails.
-*/
-ESL_TREE *
-esl_tree_Create(int ntaxa)
-{
- ESL_TREE *T = NULL;
- int i;
- int status;
-
- /* Contract verification */
- ESL_DASSERT1((ntaxa >= 2));
-
- /* 1st allocation round */
- ESL_ALLOC_WITH_TYPE(T, ESL_TREE*, sizeof(ESL_TREE));
- T->parent = NULL;
- T->left = NULL;
- T->right = NULL;
- T->ld = NULL;
- T->rd = NULL;
-
- /* 2nd allocation round */
- T->N = ntaxa;
- ESL_ALLOC_WITH_TYPE(T->parent, int*, sizeof(int) * (ntaxa-1));
- ESL_ALLOC_WITH_TYPE(T->left, int*, sizeof(int) * (ntaxa-1));
- ESL_ALLOC_WITH_TYPE(T->right, int*, sizeof(int) * (ntaxa-1));
- ESL_ALLOC_WITH_TYPE(T->ld, double*, sizeof(double) * (ntaxa-1));
- ESL_ALLOC_WITH_TYPE(T->rd, double*, sizeof(double) * (ntaxa-1));
-
- for (i = 0; i < ntaxa-1; i++)
- {
- T->parent[i] = 0;
- T->left[i ] = 0;
- T->right[i] = 0;
- T->ld[i] = 0.;
- T->rd[i] = 0.;
- }
-
- /* Optional info starts NULL
- */
- T->taxaparent = NULL;
- T->cladesize = NULL;
- T->taxonlabel = NULL;
- T->nodelabel = NULL;
-
- /* Additive trees are assumed by default, as opposed to linkage trees */
- T->is_linkage_tree = FALSE;
-
- /* Tree output options default to PHYLIP style
- */
- T->show_unrooted = FALSE;
- T->show_node_labels = TRUE;
- T->show_root_branchlength = FALSE;
- T->show_branchlengths = TRUE;
- T->show_quoted_labels = FALSE;
- T->show_numeric_taxonlabels = TRUE;
-
- T->nalloc = ntaxa;
- return T;
-
-ERROR:
- esl_tree_Destroy(T);
- return NULL;
-}
-
-/* Function: esl_tree_CreateGrowable()
-* Incept: SRE, Mon Nov 13 14:22:22 2006 [Janelia]
-*
-* Purpose: Allocate a growable tree structure for an initial
-* allocation of <nalloc> taxa, and return a pointer to it.
-* <nalloc> must be $\geq 2$.
-*
-* Args: <nalloc> - initial allocation size for number of taxa
-*
-* Returns: pointer to a new growable <ESL_TREE> object; caller frees
-* this with <esl_tree_Destroy()>.
-*
-* Throws: <NULL> if allocation fails.
-*/
-ESL_TREE *
-esl_tree_CreateGrowable(int nalloc)
-{
- ESL_TREE *T = esl_tree_Create(nalloc);
- if (T == NULL) return NULL;
-
- T->N = 0;
- return T;
-}
-
-
-// ! Here was esl_treeCreateFromString function
-// ! but we don't need it and it uses ReadNewick format function
-
-/* Function: esl_tree_Grow()
-* Incept: SRE, Fri Oct 27 08:49:47 2006 [Janelia]
-*
-* Purpose: Given a tree <T>, make sure it can hold one more taxon;
-* reallocate internally if necessary by doubling the
-* number of taxa it is currently allocated to hold.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure. In this case,
-* the data in the tree are unaffected.
-*/
-int
-esl_tree_Grow(ESL_TREE *T)
-{
- void *tmp;
- int nnew;
- int status;
- int i;
-
- if (T->N < T->nalloc) return eslOK; /* do we have room for next taxon? */
-
- nnew = T->nalloc * 2;
-
- /* There are N-1 interior nodes, so arrays of info for
- * interior nodes are allocated for (nnew-1), whereas
- * arrays of info for the N taxa are allocated (nnew).
- */
- ESL_RALLOC_WITH_TYPE(T->parent, int*, tmp, sizeof(int) * (nnew-1));
- ESL_RALLOC_WITH_TYPE(T->left, int*, tmp, sizeof(int) * (nnew-1));
- ESL_RALLOC_WITH_TYPE(T->right, int*, tmp, sizeof(int) * (nnew-1));
- ESL_RALLOC_WITH_TYPE(T->ld, double*, tmp, sizeof(double) * (nnew-1));
- ESL_RALLOC_WITH_TYPE(T->rd, double*, tmp, sizeof(double) * (nnew-1));
-
- /* 0..N-2 were already initialized or used.
- * Initialize newly alloced space N-1..nnew-2.
- */
- for (i = T->nalloc-1; i < nnew-1; i++)
- {
- T->parent[i] = 0;
- T->left[i ] = 0;
- T->right[i] = 0;
- T->ld[i] = 0.;
- T->rd[i] = 0.;
- }
-
- if (T->taxaparent != NULL)
- {
- ESL_RALLOC_WITH_TYPE(T->taxaparent, int*, tmp, sizeof(int) * nnew);
- for (i = T->nalloc; i < nnew; i++) T->taxaparent[i] = 0;
- }
-
- if (T->cladesize != NULL)
- {
- ESL_RALLOC_WITH_TYPE(T->cladesize, int*, tmp, sizeof(int) * nnew);
- for (i = T->nalloc; i < nnew; i++) T->cladesize[i] = 0;
- }
-
- if (T->taxonlabel != NULL)
- {
- ESL_RALLOC_WITH_TYPE(T->taxonlabel, char**, tmp, sizeof(char *) * nnew);
- for (i = T->nalloc; i < nnew; i++) T->taxonlabel[i] = NULL;
- }
-
- if (T->nodelabel != NULL)
- {
- ESL_RALLOC_WITH_TYPE(T->nodelabel, char**, tmp, sizeof(char *) * (nnew-1));
- for (i = T->nalloc-1; i < nnew-1; i++) T->nodelabel[i] = NULL;
- }
-
- T->nalloc = nnew;
- return eslOK;
-
-ERROR:
- return eslEMEM;
-}
-
-
-/* Function: esl_tree_SetTaxaParents()
-* Incept: SRE, Fri Sep 22 13:39:49 2006 [Janelia]
-*
-* Purpose: Constructs the <T->taxaparent[]> array in the tree
-* structure <T>, by an O(N) traversal of the tree.
-* Upon return, <T->taxaparent[i]> is the index
-* of the internal node that taxon <i> is a child of.
-*
-* Args: T - the tree structure to map
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on internal allocation error. In this case, the tree is
-* returned unchanged.
-*
-* Xref: STL11/63
-*/
-int
-esl_tree_SetTaxaParents(ESL_TREE *T)
-{
- int i;
- int status;
-
- if (T->taxaparent != NULL) return eslOK; /* map already exists. */
- ESL_ALLOC_WITH_TYPE(T->taxaparent, int*, sizeof(int) * T->N);
-
- for (i = 0; i < T->N-1; i++) /* traversal order doesn't matter */
- {
- if (T->left[i] <= 0) T->taxaparent[-(T->left[i])] = i;
- if (T->right[i] <= 0) T->taxaparent[-(T->right[i])] = i;
- }
- return eslOK;
-
-ERROR:
- if (T->taxaparent != NULL) { free(T->taxaparent); T->taxaparent = NULL; }
- return status;
-}
-
-
-/* Function: esl_tree_SetCladesizes()
-* Incept: SRE, Thu Nov 9 10:03:17 2006 [Janelia]
-*
-* Purpose: Constructs the <T->cladesize[]> array in tree structure
-* <T>. Upon successful return, <T->cladesize[i]> is the
-* number of taxa contained by the clade rooted at node <i>
-* in the tree. For example, <T->cladesize[0]> is $N$ by
-* definition, because 0 is the root of the tree.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation error; in this case, the
-* original <T> is unmodified.
-*/
-int
-esl_tree_SetCladesizes(ESL_TREE *T)
-{
- int i;
- int status;
-
- if (T->cladesize != NULL) return eslOK; /* already set. */
- ESL_ALLOC_WITH_TYPE(T->cladesize, int*, sizeof(int) * (T->N-1));
- esl_vec_ISet(T->cladesize, T->N-1, 0);
-
- for (i = T->N-2; i >= 0; i--)
- { /* taxon: ...else... internal node: */
- if (T->left[i] <= 0) T->cladesize[i]++; else T->cladesize[i] += T->cladesize[T->left[i]];
- if (T->right[i] <= 0) T->cladesize[i]++; else T->cladesize[i] += T->cladesize[T->right[i]];
- }
- return eslOK;
-
-ERROR:
- if (T->cladesize != NULL) { free(T->cladesize); T->cladesize = NULL; }
- return status;
-}
-
-
-/* Function: esl_tree_SetTaxonlabels()
-* Incept: SRE, Tue Nov 14 19:29:00 2006 [UA 921, IAD-SFO]
-*
-* Purpose: Given an array of taxon names <names[0..N-1]> with the
-* same order and number as the taxa in tree <T>, make a
-* copy of those names in <T>. For example, <names> might
-* be the sequence names in a multiple alignment,
-* <msa->sqname>.
-*
-* If the tree already had taxon names assigned to it, they
-* are replaced.
-*
-* As a special case, if the <names> argument is passed as
-* <NULL>, then the taxon labels are set to a string
-* corresponding to their internal index; that is, taxon 0
-* is labeled "0".
-*
-* Returns: <eslOK> on success, and internal state of <T>
-* (specifically, <T->taxonlabel[]>) now contains a copy
-* of the taxa names.
-*
-* Throws: <eslEMEM> on allocation failure. <T->taxonlabel[]> will be
-* <NULL> (even if it was already set).
-*/
-int
-esl_tree_SetTaxonlabels(ESL_TREE *T, char **names)
-{
- int i;
- int status;
-
- if (T->taxonlabel != NULL) esl_Free2D((void **) T->taxonlabel, T->N);
- ESL_ALLOC_WITH_TYPE(T->taxonlabel, char**, sizeof(char **) * T->nalloc);
- for (i = 0; i < T->nalloc; i++) T->taxonlabel[i] = NULL;
-
- if (names != NULL)
- {
- for (i = 0; i < T->N; i++)
- if ((status = esl_strdup(names[i], -1, &(T->taxonlabel[i]))) != eslOK) goto ERROR;
- }
- else
- {
- for (i = 0; i < T->N; i++)
- {
- ESL_ALLOC_WITH_TYPE(T->taxonlabel[i], char*, sizeof(char) * 32); /* enough width for any conceivable int */
- snprintf(T->taxonlabel[i], 32, "%d", i);
- }
- }
- return eslOK;
-
-ERROR:
- if (T->taxonlabel != NULL) esl_Free2D((void **) T->taxonlabel, T->nalloc);
- return status;
-}
-
-/* Function: esl_tree_RenumberNodes()
-* Synopsis: Assure nodes are numbered in preorder.
-* Incept: SRE, Fri Oct 27 09:33:26 2006 [Janelia]
-*
-* Purpose: Given a tree <T> whose internal nodes might be numbered in
-* any order, with the sole requirement that node 0 is the
-* root; renumber the internal nodes (if necessary) to be in Easel's
-* convention of preorder traversal. No other aspect of <T> is
-* altered (including its allocation size).
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure.
-*
-* Xref: STL11/77
-*/
-int
-esl_tree_RenumberNodes(ESL_TREE *T)
-{
- ESL_TREE *T2 = NULL;
- ESL_STACK *vs = NULL;
- int *map = NULL;
- int v,newI;
- int status;
- int needs_rearranging = FALSE;
-
-
- /* Pass 1. Preorder traverse of T by its children links;
- * construct map[old] -> new.
- */
- ESL_ALLOC_WITH_TYPE(map, int*, sizeof(int) * (T->N-1));
- if (( vs = esl_stack_ICreate()) == NULL) { status = eslEMEM; goto ERROR; };
- if (esl_stack_IPush(vs, 0) != eslOK) { status = eslEMEM; goto ERROR; };
- newI = 0;
- while (esl_stack_IPop(vs, &v) == eslOK)
- {
- if (v != newI) needs_rearranging = TRUE;
- map[v] = newI++;
- if (T->right[v] > 0 && esl_stack_IPush(vs, T->right[v]) != eslOK) { status = eslEMEM; goto ERROR; };
- if (T->left[v] > 0 && esl_stack_IPush(vs, T->left[v]) != eslOK) { status = eslEMEM; goto ERROR; };
- }
- if (! needs_rearranging) { status = eslOK; goto ERROR; } /* not an error; just cleaning up & returning eslOK. */
-
- /* Pass 2. Construct the guts of correctly numbered new T2.
- * (traversal order doesn't matter here)
- */
- if (( T2 = esl_tree_Create(T->nalloc)) == NULL) { status = eslEMEM; goto ERROR; };
- T2->N = T->N;
- if (T->nodelabel != NULL) {
- ESL_ALLOC_WITH_TYPE(T2->nodelabel, char**, sizeof(char *) * (T2->nalloc-1));
- for (v = 0; v < T2->nalloc-1; v++) T2->nodelabel[v] = NULL;
- }
- if (T->taxaparent != NULL) {
- ESL_ALLOC_WITH_TYPE(T2->taxaparent, int*, sizeof(int) * (T2->nalloc));
- for (v = 0; v < T2->nalloc; v++) T2->taxaparent[v] = 0;
- }
-
- for (v = 0; v < T->N-1; v++)
- {
- T2->parent[map[v]] = map[T->parent[v]];
- if (T->left[v] > 0) T2->left[map[v]] = map[T->left[v]]; /* internal nodes renumbered... */
- else T2->left[map[v]] = T->left[v]; /* ...taxon indices unchanged */
- if (T->right[v] > 0) T2->right[map[v]] = map[T->right[v]];
- else T2->right[map[v]] = T->right[v];
- T2->ld[map[v]] = T->ld[v];
- T2->rd[map[v]] = T->rd[v];
-
- if (T->taxaparent != NULL) {
- if (T->left[v] <= 0) T2->taxaparent[T->left[v]] = map[v];
- if (T->right[v] <= 0) T2->taxaparent[T->right[v]] = map[v];
- }
-
- if (T->nodelabel != NULL)
- T2->nodelabel[map[v]] = T2->nodelabel[v];
- }
-
- /* Finally, swap the new guts of T2 with the old guts of T;
- * destroy T2 and return. T is now renumbered.
- */
- ESL_SWAP(T->parent, T2->parent, int *);
- ESL_SWAP(T->left, T2->left, int *);
- ESL_SWAP(T->right, T2->right, int *);
- ESL_SWAP(T->ld, T2->ld, double *);
- ESL_SWAP(T->rd, T2->rd, double *);
- ESL_SWAP(T->taxaparent, T2->taxaparent, int *);
- ESL_SWAP(T->nodelabel, T2->nodelabel, char **);
-
- free(map);
- esl_stack_Destroy(vs);
- esl_tree_Destroy(T2);
- return eslOK;
-
-ERROR:
- if (map != NULL) free(map);
- if (vs != NULL) esl_stack_Destroy(vs);
- if (T2 != NULL) esl_tree_Destroy(T2);
- return status;
-
-}
-
-/* Function: esl_tree_VerifyUltrametric()
-* Incept: SRE, Tue Nov 7 15:25:40 2006 [Janelia]
-*
-* Purpose: Verify that tree <T> is ultrametric.
-*
-* Returns: <eslOK> if so; <eslFAIL> if not.
-*
-* Throws: <eslEMEM> on an allocation failure.
-*/
-int
-esl_tree_VerifyUltrametric(ESL_TREE *T)
-{
- double *d = NULL; /* Distance from root for each OTU */
- int status;
- int i, child, parent;
-
- /* First, calculate distance from root to each taxon.
- * (This chunk of code might be useful to put on its own someday.)
- */
- ESL_ALLOC_WITH_TYPE(d, double*, sizeof(double) * T->N);
- if ((status = esl_tree_SetTaxaParents(T)) != eslOK) goto ERROR;
- for (i = 0; i < T->N; i++)
- {
- d[i] = 0.0;
- child = i;
- parent = T->taxaparent[i];
- if (T->left[parent] == -i) d[i] += T->ld[parent];
- else if (T->right[parent] == -i) d[i] += T->rd[parent];
- else ESL_XEXCEPTION(eslEINCONCEIVABLE, "oops");
-
- while (parent != 0) /* upwards to the root */
- {
- child = parent;
- parent = T->parent[child];
- if (T->left[parent] == child) d[i] += T->ld[parent];
- else if (T->right[parent] == child) d[i] += T->rd[parent];
- else ESL_XEXCEPTION(eslEINCONCEIVABLE, "oops");
- }
- }
-
- /* In an ultrametric tree, all those distances must be equal.
- */
- for (i = 1; i < T->N; i++)
- if ((status = esl_DCompare(d[0], d[i], 0.0001)) != eslOK) break;
-
- free(d);
- return status;
-
-ERROR:
- if (d != NULL) free(d);
- return status;
-}
-
-
-/* Function: esl_tree_Validate()
-* Incept: SRE, Thu Nov 9 11:03:04 2006 [Janelia]
-*
-* Purpose: Validates the integrity of the data structure in <T>.
-* Returns <eslOK> if the internal data in <T> are
-* consistent and valid. Returns <eslFAIL> if not,
-* and if a non-<NULL> message buffer <errbuf> has been
-* provided by the caller, an informative message is
-* left in <errbuf> describing the reason for the
-* failure.
-*
-* Args: T - tree structure to validate
-* errbuf - NULL, or a buffer of at least p7_ERRBUFSIZE
-* chars to contain an error message upon
-* any validation failure.
-*/
-int
-esl_tree_Validate(ESL_TREE *T, char *errbuf)
-{
- int N;
- int i, c;
- int shouldbe;
- int status;
-
- if (errbuf != NULL) *errbuf = 0;
-
- N = T->N; /* just to save writing T->N so many times below */
- if (N < 2) ESL_XFAIL(eslFAIL, errbuf, "number of taxa is less than 2");
- if (T->parent[0] != 0) ESL_XFAIL(eslFAIL, errbuf, "parent of root 0 should be set to 0");
- if (T->nalloc < N) ESL_XFAIL(eslFAIL, errbuf, "number of taxa N is less than allocation");
-
- /* Verify preorder tree numbering.
- */
- for (i = 0; i < N-1; i++)
- {
- if (T->left[i] > 0 && T->left[i] < i)
- ESL_XFAIL(eslFAIL, errbuf, "l child of node %d not in preorder", i);
- if (T->right[i] > 0 && T->right[i] < i)
- ESL_XFAIL(eslFAIL, errbuf, "r child of node %d not in preorder", i);
- }
-
- /* Range checks on values. */
- for (i = 0; i < N-1; i++)
- {
- if (T->parent[i] < 0 || T->parent[i] > N-2)
- ESL_XFAIL(eslFAIL, errbuf, "parent idx of node %d invalid", i);
- if (T->left[i] < -(N-1) || T->left[i] > N-2)
- ESL_XFAIL(eslFAIL, errbuf, "left child idx of node %d invalid", i);
- if (T->right[i] < -(N-1) || T->right[i] > N-2)
- ESL_XFAIL(eslFAIL, errbuf, "right child idx of node %d invalid", i);
- if (T->ld[i] < 0.)
- ESL_XFAIL(eslFAIL, errbuf, "negative l branch length at node %d", i);
- if (T->rd[i] < 0.)
- ESL_XFAIL(eslFAIL, errbuf, "negative r branch length at node %d", i);
- if (T->cladesize != NULL && (T->cladesize[i] < 0 || T->cladesize[i] > N))
- ESL_XFAIL(eslFAIL, errbuf, "invalid cladesize at node %d", i);
- }
- for (c = 0; c < N; c++)
- if (T->taxaparent != NULL && (T->taxaparent[c] < 0 || T->taxaparent[c] > N-2))
- ESL_XFAIL(eslFAIL, errbuf, "invalid taxaparent at node %d", c);
-
- /* more sophisticated integrity checks on parent-child relations in
- nodes ...*/
- for (i = 1; i < T->N-1; i++)
- if (T->left[T->parent[i]] != i && T->right[T->parent[i]] != i)
- ESL_XFAIL(eslFAIL, errbuf, "parent/child link discrepancy at internal node %d\n", i);
-
- /* ...and between terminal nodes and taxa.
- */
- if (T->taxaparent != NULL)
- for (c = 0; c < T->N; c++)
- if (T->left[T->taxaparent[c]] != -c && T->right[T->taxaparent[c]] != -c)
- ESL_XFAIL(eslFAIL, errbuf, "parent/child link discrepancy at taxon %d\n", c);
-
- /* check on cladesizes */
- if (T->cladesize != NULL)
- for (i = 0; i < T->N-1; i++)
- {
- shouldbe = 0;
- if (T->left[i] > 0) shouldbe += T->cladesize[T->left[i]]; else shouldbe++;
- if (T->right[i] > 0) shouldbe += T->cladesize[T->right[i]]; else shouldbe++;
- if (shouldbe != T->cladesize[i])
- ESL_XFAIL(eslFAIL, errbuf, "incorrect cladesize at node %d", i);
- }
-
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-
-/* Function: esl_tree_Destroy()
-* Incept: SRE, Tue May 2 14:18:31 2006 [St. Louis]
-*
-* Purpose: Frees an <ESL_TREE> object.
-*/
-void
-esl_tree_Destroy(ESL_TREE *T)
-{
- if (T == NULL) return;
-
- if (T->parent != NULL) free(T->parent);
- if (T->left != NULL) free(T->left);
- if (T->right != NULL) free(T->right);
- if (T->ld != NULL) free(T->ld);
- if (T->rd != NULL) free(T->rd);
- if (T->taxaparent != NULL) free(T->taxaparent);
- if (T->cladesize != NULL) free(T->cladesize);
- if (T->taxonlabel != NULL) esl_Free2D((void **) T->taxonlabel, T->nalloc);
- if (T->nodelabel != NULL) esl_Free2D((void **) T->nodelabel, T->nalloc-1);
- free(T);
- return;
-}
-
-
-
-/*----------------- end, ESL_TREE object -----------------------*/
-
-// ! here were Newick format io functions. We don't need them
-
-
-/*****************************************************************
-* 3. Tree comparison algorithms
-*****************************************************************/
-
-/* Function: esl_tree_Compare()
-* Incept: SRE, Fri Sep 22 14:05:09 2006 [Janelia]
-*
-* Purpose: Given two trees <T1> and <T2> for the same
-* set of <N> taxa, compare the topologies of the
-* two trees.
-*
-* The routine must be able to determine which taxa are
-* equivalent in <T1> and <T2>. If <T1> and <T2> both have
-* taxon labels set, then the routine compares labels.
-* This is the usual case. (Therefore, the <N> labels must
-* all be different, or the routine will be unable to do
-* this mapping uniquely.) As a special case, if neither
-* <T1> nor <T2> has taxon labels, then the indexing of
-* taxa <0..N-1> is assumed to be exactly the same in the
-* two trees. (And if one tree has labels and the other
-* does not, an <eslEINVAL> exception is thrown.)
-*
-* For comparing unrooted topologies, be sure that <T1> and
-* <T2> both obey the unrooted tree convention that the
-* "root" is placed on the branch to taxon 0. (That is,
-* <T->left[0] = 0>.)
-*
-* Returns: <eslOK> if tree topologies are identical. <eslFAIL>
-* if they aren't.
-*
-* Throws: <eslEMEM> on allocation error. <eslEINVAL> if the taxa in
-* the trees can't be mapped uniquely and completely to
-* each other (because one tree doesn't have labels and
-* one does, or because the labels aren't unique, or the
-* two trees have different taxa).
-*/
-int
-esl_tree_Compare(ESL_TREE *T1, ESL_TREE *T2)
-{
- int *Mg = NULL; /* the M(g) tree-mapping function for internal nodes [0..N-2] */
- int *Mgt = NULL; /* the M(g) tree-mapping function for leaves (taxa), [0..N-1] */
- int g, child; /* node indices for parent, children */
- int a,b;
- int status;
-
- if (T1->N != T2->N) ESL_EXCEPTION(eslEINVAL, "trees don't have the same # of taxa");
-
- /* We need taxon parent map in tree 2, but not tree 1.
- */
- if ((status = esl_tree_SetTaxaParents(T2)) != eslOK) goto ERROR;
-
- /* We're going to use the tree mapping function M(g) [Goodman79].
- * In the implementation here, we split it into two, Mg[] for internal
- * nodes 0..N-2 and Mgt[] for taxa 0..N-1.
- *
- * Mg[g] for node g in T1 is the index of the lowest node in T2
- * that contains the same children taxa as the subtree
- * under g in T1.
- *
- * For the taxa, Mgt[g] for taxon g in T1 is the index of the
- * corresponding taxon in T2. If neither tree has taxon labels
- * Mgt[g] = g for all g. Otherwise we have to compare labels. Right
- * now, we do this by brute force, which is O(N^2); if this ever
- * becomes rate limiting, replace it with a keyhash to make it O(N)
- * (and in fact, the keyhash of taxon names could even become part
- * of the ESL_TREE).
- */
- ESL_ALLOC_WITH_TYPE(Mg, int*, sizeof(int) * (T1->N-1));
- ESL_ALLOC_WITH_TYPE(Mgt, int*, sizeof(int) * (T1->N));
- if (T1->taxonlabel != NULL && T2->taxonlabel != NULL)
- {
- esl_vec_ISet(Mgt, T1->N, -1); /* flags for "unset" */
- for (a = 0; a < T1->N; a++)
- {
- for (b = 0; b < T1->N; b++)
- if (strcmp(T1->taxonlabel[a], T2->taxonlabel[b]) == 0)
- { Mgt[a] = b; break; }
- }
- for (a = 0; a < T1->N; a++)
- if (Mgt[a] == -1) ESL_XEXCEPTION(eslEINVAL, "couldn't map taxa");
- }
- else if (T1->taxonlabel == NULL && T2->taxonlabel == NULL)
- {
- for (a = 0; a < T1->N; a++)
- Mgt[a] = a;
- }
- else
- ESL_XEXCEPTION(eslEINVAL, "either both trees must have taxon labels, or neither");
-
- /* Finally, we use the SDI algorithm [ZmasekEddy01] to construct
- * M(g) for internal nodes, by postorder traversal of T1.
- */
- for (g = T1->N-2; g >= 0; g--)
- {
- child = T1->left[g];
- if (child <= 0) a = T2->taxaparent[Mgt[-child]];
- else a = T2->parent[Mg[child]];
-
- child = T1->right[g];
- if (child <= 0) b = T2->taxaparent[Mgt[-child]];
- else b = T2->parent[Mg[child]];
-
- /* a shortcut in SDI: special case for exact tree comparison: */
- if (a != b) { free(Mg); free(Mgt); return eslFAIL; }
- Mg[g] = a;
- }
-
- free(Mg);
- free(Mgt);
- return eslOK;
-
-ERROR:
- if (Mg != NULL) free(Mg);
- if (Mgt != NULL) free(Mgt);
- return status;
-}
-
-/*----------------- end, tree comparison -----------------------*/
-
-
-
-
-
-
-/*****************************************************************
-* 4. Clustering algorithms for tree construction.
-*****************************************************************/
-
-/* cluster_engine()
-*
-* Implements four clustering algorithms for tree construction:
-* UPGMA, WPGMA, single-linkage, and maximum-linkage. These differ
-* only by the rule used to construct new distances after joining
-* two clusters i,j.
-*
-* Input <D_original> is a symmetric distance matrix, for <D->n> taxa.
-* The diagonal is all 0's, and off-diagonals are $\geq 0$. <D->n>
-* must be at least two.
-*
-* <mode> is one of <eslUPGMA>, <eslWPGMA>, <eslSINGLE_LINKAGE>, or
-* <eslCOMPLETE_LINKAGE>: a flag specifying which algorithm to use.
-*
-* The output is a tree structure, returned in <ret_T>.
-*
-* Returns <eslOK> on success.
-*
-* Throws <eslEMEM> on allocation failure.
-*
-* Complexity: O(N^2) in memory, O(N^3) in time.
-*
-* This function can be optimized. Memory usage is at least
-* 4x more than necessary. First, we don't need to make a copy of D
-* if the caller doesn't mind it being consumed. Second, D only
-* needs to be lower- or upper-triangular, because it's symmetric,
-* but that requires changing dmatrix module. In time,
-* O(N^2 log N) if not O(N^2) should be possible, by being more
-* sophisticated about identifying the minimum element;
-* see Gronau and Moran (2006).
-*
-*/
-static int
-cluster_engine(ESL_DMATRIX *D_original, int mode, ESL_TREE **ret_T)
-{
- ESL_DMATRIX *D = NULL;
- ESL_TREE *T = NULL;
- double *height = NULL; /* height of internal nodes [0..N-2] */
- int *idx = NULL; /* taxa or node index of row/col in D [0..N-1] */
- int *nin = NULL; /* # of taxa in clade in row/col in D [0..N-1] */
- int N;
- int i = 0, j = 0;
- int row,col;
- double minD;
- int status;
-
- /* Contract checks.
- */
- ESL_DASSERT1((D_original != NULL)); /* matrix exists */
- ESL_DASSERT1((D_original->n == D_original->m)); /* D is NxN square */
- ESL_DASSERT1((D_original->n >= 2)); /* >= 2 taxa */
-#if (eslDEBUGLEVEL >=1)
- for (i = 0; i < D_original->n; i++) {
- assert(D_original->mx[i][i] == 0.); /* self-self d = 0 */
- for (j = i+1; j < D_original->n; j++) /* D symmetric */
- assert(D_original->mx[i][j] == D_original->mx[j][i]);
- }
-#endif
-
- /* Allocations.
- * NxN copy of the distance matrix, which we'll iteratively whittle down to 2x2;
- * tree for N taxa;
- */
- if ((D = esl_dmatrix_Clone(D_original)) == NULL) return eslEMEM;
- if ((T = esl_tree_Create(D->n)) == NULL) return eslEMEM;
- ESL_ALLOC_WITH_TYPE(idx, int*, sizeof(int) * D->n);
- ESL_ALLOC_WITH_TYPE(nin, int*, sizeof(int) * D->n);
- ESL_ALLOC_WITH_TYPE(height, double*, sizeof(double) * (D->n-1));
- for (i = 0; i < D->n; i++) idx[i] = -i; /* assign taxa indices to row/col coords */
- for (i = 0; i < D->n; i++) nin[i ] = 1; /* each cluster starts as 1 */
- for (i = 0; i < D->n-1; i++) height[i] = 0.;
-
- /* If we're doing either single linkage or complete linkage clustering,
- * we will construct a "linkage tree", where ld[v], rd[v] "branch lengths"
- * below node v are the linkage value for clustering node v; thus
- * ld[v] == rd[v] in a linkage tree.
- * For UPGMA or WPGMA, we're building an additive tree, where ld[v] and
- * rd[v] are branch lengths.
- */
- if (mode == eslSINGLE_LINKAGE || mode == eslCOMPLETE_LINKAGE)
- T->is_linkage_tree = TRUE;
-
- for (N = D->n; N >= 2; N--)
- {
- /* Find minimum in our current N x N matrix.
- * (Don't init minD to -infinity; linkage trees use sparse distance matrices
- * with -infinity representing unlinked.)
- */
- minD = D->mx[0][1]; i = 0; j = 1; /* init with: if nothing else, try to link 0-1 */
- for (row = 0; row < N; row++)
- for (col = row+1; col < N; col++)
- if (D->mx[row][col] < minD)
- {
- minD = D->mx[row][col];
- i = row;
- j = col;
- }
-
- /* We're joining node at row/col i with node at row/col j.
- * Add node (index = N-2) to the tree at height minD/2.
- */
- T->left[N-2] = idx[i];
- T->right[N-2] = idx[j];
- if (T->is_linkage_tree) height[N-2] = minD;
- else height[N-2] = minD / 2.;
-
- /* Set the branch lengths (additive trees) or heights (linkage trees)
- */
- T->ld[N-2] = T->rd[N-2] = height[N-2];
- if (! T->is_linkage_tree) {
- if (idx[i] > 0) T->ld[N-2] -= height[idx[i]];
- if (idx[j] > 0) T->rd[N-2] -= height[idx[j]];
- }
-
- /* If either node was an internal node, record parent in it.
- */
- if (idx[i] > 0) T->parent[idx[i]] = N-2;
- if (idx[j] > 0) T->parent[idx[j]] = N-2;
-
- /* Now, build a new matrix by merging row i+j and col i+j.
- * 1. move j to N-1 (unless it's already there)
- * 2. move i to N-2 (unless it's already there)
- */
- if (j != N-1)
- {
- for (row = 0; row < N; row++)
- ESL_SWAP(D->mx[row][N-1], D->mx[row][j], double);
- for (col = 0; col < N; col++)
- ESL_SWAP(D->mx[N-1][col], D->mx[j][col], double);
- ESL_SWAP(idx[j], idx[N-1], int);
- ESL_SWAP(nin[j], nin[N-1], int);
- }
- if (i != N-2)
- {
- for (row = 0; row < N; row++)
- ESL_SWAP(D->mx[row][N-2], D->mx[row][i], double);
- for (col = 0; col < N; col++)
- ESL_SWAP(D->mx[N-2][col], D->mx[i][col], double);
- ESL_SWAP(idx[i], idx[N-2], int);
- ESL_SWAP(nin[i], nin[N-2], int);
- }
- i = N-2;
- j = N-1;
-
- /* 3. merge i (now at N-2) with j (now at N-1)
- * according to the desired clustering rule.
- */
- for (col = 0; col < N; col++)
- {
- switch (mode) {
- case eslUPGMA:
- D->mx[i][col] = (nin[i] * D->mx[i][col] + nin[j] * D->mx[j][col]) / (double) (nin[i] + nin[j]);
- break;
- case eslWPGMA: D->mx[i][col] = (D->mx[i][col] + D->mx[j][col]) / 2.; break;
- case eslSINGLE_LINKAGE: D->mx[i][col] = ESL_MIN(D->mx[i][col], D->mx[j][col]); break;
- case eslCOMPLETE_LINKAGE: D->mx[i][col] = ESL_MAX(D->mx[i][col], D->mx[j][col]); break;
- default: ESL_XEXCEPTION(eslEINCONCEIVABLE, "no such strategy");
- }
- D->mx[col][i] = D->mx[i][col];
- }
-
- /* row/col i is now the new cluster, and it corresponds to node N-2
- * in the tree (remember, N is decrementing at each iteration).
- * row/col j (N-1) falls away when we go back to the start of the loop
- * and decrement N.
- */
- nin[i] += nin[j];
- idx[i] = N-2;
- }
-
- esl_dmatrix_Destroy(D);
- free(height);
- free(idx);
- free(nin);
- if (ret_T != NULL) *ret_T = T;
- return eslOK;
-
-ERROR:
- if (D != NULL) esl_dmatrix_Destroy(D);
- if (T != NULL) esl_tree_Destroy(T);
- if (height != NULL) free(height);
- if (idx != NULL) free(idx);
- if (nin != NULL) free(nin);
- if (ret_T != NULL) *ret_T = NULL;
- return status;
-}
-
-
-/* Function: esl_tree_UPGMA()
-* Incept: SRE, Wed May 3 15:14:17 2006 [St. Louis]
-*
-* Purpose: Given distance matrix <D>, use the UPGMA algorithm
-* to construct a tree <T>.
-*
-* Returns: <eslOK> on success; the tree is returned in <ret_T>,
-* and must be freed by the caller with <esl_tree_Destroy()>.
-*
-* Throws: <eslEMEM> on allocation problem, and <ret_T> is set <NULL>.
-*/
-int
-esl_tree_UPGMA(ESL_DMATRIX *D, ESL_TREE **ret_T)
-{
- return cluster_engine(D, eslUPGMA, ret_T);
-}
-
-/* Function: esl_tree_WPGMA()
-* Incept: SRE, Wed May 3 15:47:13 2006 [St. Louis]
-*
-* Purpose: Given distance matrix <D>, use the WPGMA algorithm
-* to construct a tree <T>.
-*
-* Returns: <eslOK> on success; the tree is returned in <ret_T>,
-* and must be freed by the caller with <esl_tree_Destroy()>.
-*
-* Throws: <eslEMEM> on allocation problem, and <ret_T> is set <NULL>.
-*/
-int
-esl_tree_WPGMA(ESL_DMATRIX *D, ESL_TREE **ret_T)
-{
- return cluster_engine(D, eslWPGMA, ret_T);
-}
-
-/* Function: esl_tree_SingleLinkage()
-* Incept: SRE, Wed May 3 15:49:06 2006 [St. Louis]
-*
-* Purpose: Given distance matrix <D>, construct a single-linkage
-* (minimum distances) clustering tree <T>.
-*
-* Returns: <eslOK> on success; the tree is returned in <ret_T>,
-* and must be freed by the caller with <esl_tree_Destroy()>.
-*
-* Throws: <eslEMEM> on allocation problem, and <ret_T> is set <NULL>.
-*/
-int
-esl_tree_SingleLinkage(ESL_DMATRIX *D, ESL_TREE **ret_T)
-{
- return cluster_engine(D, eslSINGLE_LINKAGE, ret_T);
-}
-
-/* Function: esl_tree_CompleteLinkage()
-* Incept: SRE, Wed May 3 15:49:14 2006 [St. Louis]
-*
-* Purpose: Given distance matrix <D>, construct a complete-linkage
-* (maximum distances) clustering tree <T>.
-*
-* Returns: <eslOK> on success; the tree is returned in <ret_T>,
-* and must be freed by the caller with <esl_tree_Destroy()>.
-*
-* Throws: <eslEMEM> on allocation problem, and <ret_T> is set <NULL>.
-*/
-int
-esl_tree_CompleteLinkage(ESL_DMATRIX *D, ESL_TREE **ret_T)
-{
- return cluster_engine(D, eslCOMPLETE_LINKAGE, ret_T);
-}
-/*----------------- end, clustering algorithms ----------------*/
-
-
-
-/*****************************************************************
-* 5. Generating simulated trees
-*****************************************************************/
-
-/* Function: esl_tree_Simulate()
-* Synopsis: Generate a random rooted ultrametric tree.
-* Incept: SRE, Mon Oct 2 11:36:22 2006 [Janelia]
-*
-* Purpose: Generate a random rooted ultrametric tree of <N> taxa,
-* using the algorithm of Kuhner and Felsenstein (1994).
-*
-* The branch lengths are generated by choosing <N-1>
-* exponentially distributed split times, with decreasing
-* expectations of $\frac{1}{2},\frac{1}{3}..\frac{1}{N}$
-* as the simulation proceeds from the root. Thus the
-* total expected branch length on the tree is
-* $\sum_{k=2}^{N} \frac{1}{k}$.
-*
-* Args: r - random number source
-* N - number of taxa (>= 2)
-* ret_T - RETURN: sampled tree
-*
-* Returns: <eslOK> on success, and the new tree is allocated
-* here and returned via <ret_tree>; caller is
-* responsible for free'ing it.
-*
-* Throws: <eslEMEM> on allocation failure, in which case
-* the <ret_T> is returned <NULL>.
-*
-* Xref: STL11/65.
-*/
-int
-esl_tree_Simulate(ESL_RANDOMNESS *r, int N, ESL_TREE **ret_T)
-{
- ESL_TREE *T = NULL;
- int *branchpapa = NULL;
- int *branchside = NULL;
- int nactive;
- double d;
- int node;
- int bidx; /* index of an active branch */
- int status;
-
- ESL_DASSERT1( (r != NULL) );
- ESL_DASSERT1( (N >= 2) );
-
- /* Kuhner/Felsenstein uses a list of active branches,
- * which we implement by tracking the index of the parent
- * node (in <branchpapa>) and a 0/1 flag (in <branchside>)
- * for the branch to the left vs. right child.
- */
- if ((T = esl_tree_Create(N)) == NULL) goto ERROR;
- ESL_ALLOC_WITH_TYPE(branchpapa, int*, sizeof(int) * N);
- ESL_ALLOC_WITH_TYPE(branchside, int*, sizeof(int) * N);
-
- /* Initialize: add two branches from the root
- * onto the active list, and set internal node
- * counter to start at 1.
- */
- branchpapa[0] = 0; branchside[0] = 0;
- branchpapa[1] = 0; branchside[1] = 1;
- nactive = 2;
- node = 1;
-
- /* Algorithm proceeds by iterating:
- * 1. choose random time <d> from exponential(1/nactive)
- * 2. choose random active branch, <bidx>
- * 3. add new <node> to active branch at length d
- * 4. add d to all other active branches
- * 5. delete the old parent branch from the active list,
- * add the two new child branches to the active list
- */
- while (nactive < N)
- {
- d = (double) nactive * -log(esl_rnd_UniformPositive(r));
- bidx = esl_rnd_Roll(r, nactive);
- T->parent[node] = branchpapa[bidx];
-
- if (branchside[bidx] == 0) {
- T->left[branchpapa[bidx]] = node;
- T->ld [branchpapa[bidx]] += d;
- } else {
- T->right[branchpapa[bidx]] = node;
- T->rd [branchpapa[bidx]] += d;
- }
-
- ESL_SWAP(branchpapa[bidx], branchpapa[nactive-1], int);
- ESL_SWAP(branchside[bidx], branchside[nactive-1], int);
- for (bidx = 0; bidx < nactive-1; bidx++) {
- if (branchside[bidx] == 0) T->ld[branchpapa[bidx]] += d;
- else T->rd[branchpapa[bidx]] += d;
- }
-
- /* delete the branch at nactive-1 that we just added to;
- * replace it with two new branches
- */
- branchpapa[nactive-1] = node; branchside[nactive-1] = 0;
- branchpapa[nactive] = node; branchside[nactive] = 1;
- node++;
- nactive++;
- }
-
- /* Terminate by adding the N taxa to the N active branches.
- */
- d = (double) N * -log(esl_rnd_UniformPositive(r));
- for (bidx = 0; bidx < N; bidx++)
- {
- if (branchside[bidx] == 0) {
- T->left[branchpapa[bidx]] = -bidx; /* taxa indices stored as neg #'s */
- T->ld [branchpapa[bidx]] += d;
- } else {
- T->right[branchpapa[bidx]] = -bidx;
- T->rd [branchpapa[bidx]] += d;
- }
- }
-
- *ret_T = T;
- free(branchpapa);
- free(branchside);
- return eslOK;
-
-ERROR:
- if (T != NULL) esl_tree_Destroy(T);
- if (branchpapa != NULL) free(branchpapa);
- if (branchside != NULL) free(branchside);
- *ret_T = NULL;
- return status;
-}
-
-
-/* Function: esl_tree_ToDistanceMatrix()
-* Synopsis: Obtain a pairwise distance matrix from a tree.
-* Incept: SRE, Fri Oct 6 13:50:37 2006 [Janelia]
-*
-* Purpose: Given tree <T>, calculate a pairwise distance matrix
-* and return it in <ret_D>.
-*
-* Note: Algorithm here is O(N^3). It can probably be improved.
-* There ought to be a more efficient recursion that
-* saves recalculating node-node distances inside the tree.
-* All we do here is a brute force, upwards O(N) LCA
-* search for each of the N^2 taxon pairs.
-*
-* Args: T - input tree
-* ret_D - RETURN: the new distance matrix
-*
-* Returns: <eslOK> on success, and <ret_D> points to the distance
-* matrix, which caller is responsible for free'ing with
-* <esl_dmatrix_Destroy()>.
-*
-* Throws: <eslEMEM> on allocation failure, in which case
-* <ret_D> is returned <NULL>.
-*
-* Xref: STL11/66.
-*/
-int
-esl_tree_ToDistanceMatrix(ESL_TREE *T, ESL_DMATRIX **ret_D)
-{
- ESL_DMATRIX *D = NULL;
- int i,j; /* a pair of taxa {0..N-1} */
- int a,b; /* a pair of internal nodes {0..N-2} */
- int p; /* a tmp parent index */
- double d; /* ij distance */
- int status;
-
- D = esl_dmatrix_Create(T->N, T->N); /* creates a NxN square symmetric matrix; really only need triangular */
- if (D == NULL) { status = eslEMEM; goto ERROR; }
-
- if ((status = esl_tree_SetTaxaParents(T)) != eslOK) goto ERROR;
-
- for (i = 0; i < T->N; i++)
- {
- D->mx[i][i] = 0.; /* by definition */
- for (j = i+1; j < T->N; j++)
- {
- a = T->taxaparent[i];
- b = T->taxaparent[j];
- d = (T->left[a] == -i) ? T->ld[a] : T->rd[a];
- d += (T->left[b] == -j) ? T->ld[b] : T->rd[b];
- while (a != b) /* a brute force LCA algorithm */
- {
- if (a < b) ESL_SWAP(a, b, int);
- p = T->parent[a];
- d += (T->left[p] == a) ? T->ld[p] : T->rd[p];
- a = p;
- }
-
- D->mx[i][j] = D->mx[j][i] = d;
- }
- }
-
- *ret_D = D;
- return eslOK;
-
-ERROR:
- if (D != NULL) esl_dmatrix_Destroy(D);
- *ret_D = NULL;
- return status;
-}
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_tree.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_tree.h
deleted file mode 100644
index 620799e..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_tree.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/* esl_tree.c
-* Phylogenetic trees.
-*
-* SVN $Id: esl_tree.h 303 2008-11-24 19:09:31Z eddys $
-* SRE, Tue May 2 13:54:30 2006 [St. Louis]
-*/
-#ifndef ESL_TREE_INCLUDED
-#define ESL_TREE_INCLUDED
-
-#include <hmmer3/easel/esl_dmatrix.h>
-#include <hmmer3/easel/esl_random.h>
-
-/* Object: ESL_TREE
-*
-* All trees are represented as rooted trees, starting from
-* node 0. For N taxa, there are N-1 internal nodes, numbered
-* 0..N-2. Taxa on leaves are numbered 0..N-1, and represented
-* in <parent>, <left>, <right> as negative numbers.
-*
-*/
-typedef struct {
- int N; /* number of taxa */
-
- /* (Mandatory) information for the internal nodes of a rooted tree.
- * There are N-1 nodes, numbered 0..N-2, with the root at 0,
- * so each array below is indexed [0..N-2].
- * When an internal node has a left or right branch to a taxon,
- * <left>/<right> are <= 0, negative <taxon #>; if they're to
- * be used as array indices, flip their sign.
- * There is no ambiguity between taxon 0/root node 0, because
- * a taxon can't be a parent, and the root node can't be a child.
- * For an unrooted tree, by convention, taxon 0 is the outgroup: T->left[0] = 0,
- * and T->rd[0] = 0.0.
- */
- int *parent; /* index of parent of node: values are 0..N-2; parent of root 0 = 0 */
- int *left; /* index of left child: values are -(N-1)..0=taxa; 1..N-2=nodes */
- int *right; /* index of right child: values are -(N-1)..0=taxa; 1..N-2=nodes */
- double *ld; /* left branch length under node: values are >= 0 */
- double *rd; /* right branch length under node: values are >= 0 */
- /* in linkage trees, ld[x]=rd[x]= "height" (linkage value) of node, not branch lengths */
-
- /* Derived (optional) information, that we can reconstruct if
- * we need to from the mandatory info above.
- */
- int *taxaparent; /* for taxa [0..N-1]: index of its parent node, 0..N-2. [esl_tree_SetTaxaParents()] */
- int *cladesize; /* for nodes [0..N-2]: # taxa in this clade, 1..N [esl_tree_SetCladesizes()] */
-
- /* Optional information */
- char **taxonlabel; /* labels for taxa: [0..N-1] array of char strings */
- char **nodelabel; /* labels for nodes: [0..N-2] array of char strings */
-
- /* Tree mode options. */
- int is_linkage_tree; /* TRUE if this is a linkage tree; if FALSE, it's an additive tree */
-
-
- /* Tree output options. */
- int show_unrooted; /* TRUE to output 'root' as a trifurcation (a la PHYLIP) */
- int show_node_labels; /* TRUE to output labels for interior nodes */
- int show_root_branchlength; /* TRUE to show 0.0 branch length to root node (a la TreeAlign) */
- int show_branchlengths; /* TRUE to output branch lengths */
- int show_quoted_labels; /* TRUE to output ALL labels as quoted labels */
- int show_numeric_taxonlabels;/* TRUE to output taxa labels as their 0..N-1 indices if no other taxonlabel is present */
-
- /* Memory allocation information, when growing a tree (on input, for example)
- */
- int nalloc; /* current allocated # of taxa */
-
-} ESL_TREE;
-
-/* UPGMA, average-link, minimum-link, and maximum-link clustering are
-* all implemented by one algorithm, cluster_engine(), in esl_tree.c.
-* We define some flags (within the scope of the tree module) to
-* control the behavior, as we call the algorithm engine from four
-* different API functions.
-*/
-#define eslUPGMA 0
-#define eslWPGMA 1
-#define eslSINGLE_LINKAGE 2
-#define eslCOMPLETE_LINKAGE 3
-
-
-
-/* 1. The ESL_TREE object.
-*/
-extern ESL_TREE *esl_tree_Create(int ntaxa);
-extern ESL_TREE *esl_tree_CreateGrowable(int nalloc);
-extern ESL_TREE *esl_tree_CreateFromString(char *s);
-extern int esl_tree_Grow(ESL_TREE *T);
-extern int esl_tree_SetTaxaParents(ESL_TREE *T);
-extern int esl_tree_SetCladesizes(ESL_TREE *T);
-extern int esl_tree_SetTaxonlabels(ESL_TREE *T, char **names);
-extern int esl_tree_RenumberNodes(ESL_TREE *T);
-extern int esl_tree_VerifyUltrametric(ESL_TREE *T);
-extern int esl_tree_Validate(ESL_TREE *T, char *errbuf);
-extern void esl_tree_Destroy(ESL_TREE *T);
-
-/* 2. Newick format i/o
-*/
-// ! we don't need newick io functions
-
-/* 3. Tree comparison algorithms.
-*/
-extern int esl_tree_Compare(ESL_TREE *T1, ESL_TREE *T2);
-
-/* 4. Clustering algorithms for distance-based tree construction.
-*/
-extern int esl_tree_UPGMA(ESL_DMATRIX *D, ESL_TREE **ret_T);
-extern int esl_tree_WPGMA(ESL_DMATRIX *D, ESL_TREE **ret_T);
-extern int esl_tree_SingleLinkage(ESL_DMATRIX *D, ESL_TREE **ret_T);
-extern int esl_tree_CompleteLinkage(ESL_DMATRIX *D, ESL_TREE **ret_T);
-
-/* 5. Generating simulated trees.
-*/
-extern int esl_tree_Simulate(ESL_RANDOMNESS *r, int N, ESL_TREE **ret_T);
-extern int esl_tree_ToDistanceMatrix(ESL_TREE *T, ESL_DMATRIX **ret_D);
-
-
-#endif /*!ESL_TREE_INCLUDED*/
-
-
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_vectorops.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_vectorops.cpp
deleted file mode 100644
index 2398756..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_vectorops.cpp
+++ /dev/null
@@ -1,1144 +0,0 @@
-/* Operations on vectors of floats or doubles.
-*
-* Can operate on vectors of doubles, floats, or integers - appropriate
-* routine is prefixed with D, F, or I. For example, esl_vec_DSet() is
-* the Set routine for a vector of doubles; esl_vec_ISet() is for integers.
-*
-* Contents:
-* 1. The vectorops API.
-* 5. Copyright and license information.
-*
-* SRE, Tue Oct 1 15:23:25 2002 [St. Louis]
-* SVN $Id: esl_vectorops.c 319 2009-01-27 16:51:36Z eddys $
-*/
-
-#include <hmmer3/easel/esl_config.h>
-
-#include <math.h>
-#include <float.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/hmmer.h>
-#include <hmmer3/easel/esl_vectorops.h>
-
-/* Function: esl_vec_DSet()
-* Synopsis: Set all items in vector to scalar value.
-*
-* Purpose: Sets all <n> items in <vec> to <value>.
-*
-* <esl_vec_FSet()> and <esl_vec_ISet()> do the same,
-* for float and integer vectors.
-*/
-void
-esl_vec_DSet(double *vec, int n, double value)
-{
- int x;
- for (x = 0; x < n; x++) vec[x] = value;
-}
-void
-esl_vec_FSet(float *vec, int n, float value)
-{
- int x;
- for (x = 0; x < n; x++) vec[x] = value;
-}
-void
-esl_vec_ISet(int *vec, int n, int value)
-{
- int x;
- for (x = 0; x < n; x++) vec[x] = value;
-}
-
-
-/* Function: esl_vec_DScale()
-* Synopsis: Multiply all items in vector by scalar value.
-*
-* Purpose: Multiplies all <n> items in <vec> by <scale>.
-*
-* <esl_vec_FScale()> and <esl_vec_IScale()> do the same,
-* for float and integer vectors.
-*
-* Essentially the same as BLAS1's xSCAL().
-*/
-void
-esl_vec_DScale(double *vec, int n, double scale)
-{
- int x;
- for (x = 0; x < n; x++) vec[x] *= scale;
-}
-void
-esl_vec_FScale(float *vec, int n, float scale)
-{
- int x;
- for (x = 0; x < n; x++) vec[x] *= scale;
-}
-void
-esl_vec_IScale(int *vec, int n, int scale)
-{
- int x;
- for (x = 0; x < n; x++) vec[x] *= scale;
-}
-
-
-/* Function: esl_vec_DIncrement()
-* Synopsis: Add a scalar to all items in a vector.
-* Incept: SRE, Mon Mar 21 11:56:57 2005 [St. Louis]
-*
-* Purpose: Adds scalar <x> to all items in the <n>-vector <v>.
-*
-* <esl_vec_FIncrement()> and <esl_vec_IIncrement()> do the
-* same, for float and integer vectors.
-*/
-void
-esl_vec_DIncrement(double *v, int n, double x)
-{
- int i;
- for (i = 0; i < n; i++) v[i] += x;
-}
-void
-esl_vec_FIncrement(float *v, int n, float x)
-{
- int i;
- for (i = 0; i < n; i++) v[i] += x;
-}
-void
-esl_vec_IIncrement(int *v, int n, int x)
-{
- int i;
- for (i = 0; i < n; i++) v[i] += x;
-}
-
-
-
-/* Function: esl_vec_DSum()
-* Synopsis: Returns $\sum_i x_i$.
-*
-* Purpose: Returns the scalar sum of the <n> items in <vec>.
-*
-* <esl_vec_FSum()> and <esl_vec_ISum()> do the same,
-* but for float and integer vectors.
-*/
-double
-esl_vec_DSum(double *vec, int n)
-{
- double sum = 0.;
- int x;
- for (x = 0; x < n; x++) sum += vec[x];
- return sum;
-}
-float
-esl_vec_FSum(float *vec, int n)
-{
- float sum = 0.;
- int x;
- for (x = 0; x < n; x++) sum += vec[x];
- return sum;
-}
-int
-esl_vec_ISum(int *vec, int n)
-{
- int sum = 0;
- int x;
- for (x = 0; x < n; x++) sum += vec[x];
- return sum;
-}
-
-
-/* Function: esl_vec_DAdd()
-* Synopsis: Vector addition of two vectors.
-*
-* Purpose: Vector addition. Adds <vec2> to <vec1>, leaving
-* result in <vec1>. (<vec2> is unchanged.).
-* Both vectors are of size <n>.
-*
-* <esl_vec_FAdd()> and <esl_vec_IAdd()> do the same,
-* for float and integer vectors.
-*/
-void
-esl_vec_DAdd(double *vec1, const double *vec2, int n)
-{
- int x;
- for (x = 0; x < n; x++) vec1[x] += vec2[x];
-}
-void
-esl_vec_FAdd(float *vec1, const float *vec2, int n)
-{
- int x;
- for (x = 0; x < n; x++) vec1[x] += vec2[x];
-}
-void
-esl_vec_IAdd(int *vec1, const int *vec2, int n)
-{
- int x;
- for (x = 0; x < n; x++) vec1[x] += vec2[x];
-}
-
-
-/* Function: esl_vec_DAddScaled()
-* Synopsis: Scale <vec2> and add it to <vec1>.
-*
-* Purpose: Scales <vec2> by scalar <a>, and adds that
-* to <vec1>. Both vectors are of size <n>.
-*
-* <esl_vec_FAddScaled()> and <esl_vec_IAddScaled()> do the same,
-* for float and integer vectors.
-*
-* Essentially the same as BLAS1 xAXPY().
-*/
-void
-esl_vec_DAddScaled(double *vec1, double *vec2, double a, int n)
-{
- int x;
- for (x = 0; x < n; x++) vec1[x] += vec2[x] * a;
-}
-void
-esl_vec_FAddScaled(float *vec1, float *vec2, float a, int n)
-{
- int x;
- for (x = 0; x < n; x++) vec1[x] += vec2[x] * a;
-}
-void
-esl_vec_IAddScaled(int *vec1, int *vec2, int a, int n)
-{
- int x;
- for (x = 0; x < n; x++) vec1[x] += vec2[x] * a;
-}
-
-
-
-/* Function: esl_vec_DCopy()
-* Synopsis: Set <dest> vector to same values as <src>.
-*
-* Purpose: Copies <src> to <dest>. <src> is
-* unchanged. Both vectors are of size <n>.
-*
-* <esl_vec_FCopy()> and <esl_vec_ICopy()> do the same,
-* for float and integer vectors.
-*
-* Essentially the same as BLAS1 xCOPY().
-*/
-void
-esl_vec_DCopy(const double *src, const int n, double *dest)
-{
- int x;
- for (x = 0; x < n; x++) dest[x] = src[x];
-}
-void
-esl_vec_FCopy(const float *src, const int n, float *dest)
-{
- int x;
- for (x = 0; x < n; x++) dest[x] = src[x];
-}
-void
-esl_vec_ICopy(const int *src, const int n, int *dest)
-{
- int x;
- for (x = 0; x < n; x++) dest[x] = src[x];
-}
-
-
-/* Function: esl_vec_DCompare()
-* Synopsis: Return <eslOK> if two vectors are equal.
-* Incept: SRE, Mon Nov 6 10:20:28 2006 [Janelia]
-*
-* Purpose: Compare <vec1> to <vec2> for equality, by
-* comparing each cognate element pair. Both vectors
-* are of size <n>. Equality of elements is
-* defined by being $\leq$ fractional tolerance <tol>
-* for floating point comparisons, and strict equality
-* for integer comparisons. Return <eslOK>
-* if the vectors are equal, and <eslFAIL> if not.
-*
-* <esl_vec_FCompare()> and <esl_vec_ICompare()> do the same,
-* for float and integer vectors.
-*/
-int
-esl_vec_DCompare(const double *vec1, const double *vec2, int n, double tol)
-{
- int i;
- for (i = 0; i < n; i++) if (esl_DCompare(vec1[i], vec2[i], tol) == eslFAIL) return eslFAIL;
- return eslOK;
-}
-int
-esl_vec_FCompare( const float *vec1, const float *vec2, int n, float tol)
-{
- int i;
- for (i = 0; i < n; i++) if (esl_DCompare(vec1[i], vec2[i], tol) == eslFAIL) return eslFAIL;
- return eslOK;
-}
-int
-esl_vec_ICompare( const int *vec1, const int *vec2, int n)
-{
- int i;
- for (i = 0; i < n; i++) if (vec1[i] != vec2[i]) return eslFAIL;
- return eslOK;
-}
-
-
-
-/* Function: esl_vec_DSwap()
-* Synopsis: Swap two vectors.
-*
-* Purpose: Swaps <vec2> and <vec1>.
-* Both vectors are of size <n>.
-*
-* <esl_vec_FSwap()> and <esl_vec_ISwap()> do the same,
-* for float and integer vectors.
-*
-* Essentially the same as BLAS1 xSWAP().
-*
-* You will be better off swapping the pointers to
-* the vectors, if that's feasible.
-*/
-void
-esl_vec_DSwap(double *vec1, double *vec2, int n)
-{
- int x;
- double tmp;
-
- for (x = 0; x < n; x++)
- { tmp = vec1[x]; vec1[x] = vec2[x]; vec2[x] = tmp; }
-}
-void
-esl_vec_FSwap(float *vec1, float *vec2, int n)
-{
- int x;
- float tmp;
-
- for (x = 0; x < n; x++)
- { tmp = vec1[x]; vec1[x] = vec2[x]; vec2[x] = tmp; }
-}
-void
-esl_vec_ISwap(int *vec1, int *vec2, int n)
-{
- int x;
- int tmp;
-
- for (x = 0; x < n; x++)
- { tmp = vec1[x]; vec1[x] = vec2[x]; vec2[x] = tmp; }
-}
-
-
-
-
-/* Function: esl_vec_DDot()
-* Synopsis: Return the dot product of two vectors.
-*
-* Purpose: Returns the scalar dot product <vec1> $\cdot$ <vec2>.
-* Both vectors are of size <n>.
-*
-* <esl_vec_FDot()> and <esl_vec_IDot()> do the same,
-* for float and integer vectors.
-*/
-double
-esl_vec_DDot(double *vec1, double *vec2, int n)
-{
- double result = 0.;
- int x;
- for (x = 0; x < n; x++) result += vec1[x] * vec2[x];
- return result;
-}
-float
-esl_vec_FDot(float *vec1, float *vec2, int n)
-{
- float result = 0.;
- int x;
- for (x = 0; x < n; x++) result += vec1[x] * vec2[x];
- return result;
-}
-int
-esl_vec_IDot(int *vec1, int *vec2, int n)
-{
- int result = 0;
- int x;
- for (x = 0; x < n; x++) result += vec1[x] * vec2[x];
- return result;
-}
-
-
-
-/* Function: esl_vec_DMax()
-* Synopsis: Return value of the maximum element in a vector.
-*
-* Purpose: Returns the maximum value of the <n> values
-* in <vec>.
-*
-* <esl_vec_FMax()> and <esl_vec_IMax()> do the same,
-* for float and integer vectors.
-*/
-double
-esl_vec_DMax(double *vec, int n)
-{
- int i;
- double best;
-
- best = vec[0];
- for (i = 1; i < n; i++)
- if (vec[i] > best) best = vec[i];
- return best;
-}
-float
-esl_vec_FMax(float *vec, int n)
-{
- int i;
- float best;
-
- best = vec[0];
- for (i = 1; i < n; i++)
- if (vec[i] > best) best = vec[i];
- return best;
-}
-int
-esl_vec_IMax(int *vec, int n)
-{
- int i;
- int best;
-
- best = vec[0];
- for (i = 1; i < n; i++)
- if (vec[i] > best) best = vec[i];
- return best;
-}
-
-
-/* Function: esl_vec_DMin()
-* Synopsis: Return value of the minimum element in a vector.
-*
-* Purpose: Returns the minimum value of the <n> values
-* in <vec>.
-*
-* <esl_vec_FMin()> and <esl_vec_IMin()> do the same,
-* for float and integer vectors.
-*/
-double
-esl_vec_DMin(double *vec, int n)
-{
- int i;
- double best;
-
- best = vec[0];
- for (i = 1; i < n; i++)
- if (vec[i] < best) best = vec[i];
- return best;
-}
-float
-esl_vec_FMin(float *vec, int n)
-{
- int i;
- float best;
-
- best = vec[0];
- for (i = 1; i < n; i++)
- if (vec[i] < best) best = vec[i];
- return best;
-}
-int
-esl_vec_IMin(int *vec, int n)
-{
- int i;
- int best;
-
- best = vec[0];
- for (i = 1; i < n; i++)
- if (vec[i] < best) best = vec[i];
- return best;
-}
-
-
-/* Function: esl_vec_DArgMax()
-* Synopsis: Return index of maximum element in a vector.
-*
-* Purpose: Returns the index of the maximum value in the <n> values
-* in <vec>. In case of ties, the element with the smallest index
-* is returned.
-*
-* <esl_vec_FArgMax()> and <esl_vec_IArgMax()> do the same,
-* for float and integer vectors.
-*
-* Note: Do not change the behavior that the smallest index is
-* returned in case of ties. Some functions rely on this
-* behavior: optimal accuracy tracebacks in HMMER for example.
-*/
-int
-esl_vec_DArgMax(double *vec, int n)
-{
- int i;
- int best = 0;
-
- for (i = 1; i < n; i++)
- if (vec[i] > vec[best]) best = i;
- return best;
-}
-int
-esl_vec_FArgMax(float *vec, int n)
-{
- int i;
- int best = 0;
-
- for (i = 1; i < n; i++)
- if (vec[i] > vec[best]) best = i;
- return best;
-}
-int
-esl_vec_IArgMax(int *vec, int n)
-{
- int i;
- int best = 0;
-
- for (i = 1; i < n; i++)
- if (vec[i] > vec[best]) best = i;
- return best;
-}
-
-
-/* Function: esl_vec_DArgMin()
-* Synopsis: Return index of minimum element in a vector.
-*
-* Purpose: Returns the index of the minimum value in the <n> values
-* in <vec>.
-*
-* <esl_vec_FArgMin()> and <esl_vec_IArgMin()> do the same,
-* for float and integer vectors.
-*/
-int
-esl_vec_DArgMin(double *vec, int n)
-{
- int i;
- int best = 0;
- for (i = 1; i < n; i++)
- if (vec[i] < vec[best]) best = i;
- return best;
-}
-int
-esl_vec_FArgMin(float *vec, int n)
-{
- int i;
- int best = 0;
-
- for (i = 1; i < n; i++)
- if (vec[i] < vec[best]) best = i;
- return best;
-}
-int
-esl_vec_IArgMin(int *vec, int n)
-{
- int i;
- int best = 0;
-
- for (i = 1; i < n; i++)
- if (vec[i] < vec[best]) best = i;
- return best;
-}
-
-
-/* some static functions to pass to qsort() that the
-* upcoming Sort() functions will call
-*/
-static int
-qsort_DIncreasing(const void *xp1, const void *xp2)
-{
- double x1 = * (double *) xp1;
- double x2 = * (double *) xp2;
- if (x1 < x2) return -1;
- if (x1 > x2) return 1;
- return 0;
-}
-static int
-qsort_FIncreasing(const void *xp1, const void *xp2)
-{
- float x1 = * (float *) xp1;
- float x2 = * (float *) xp2;
- if (x1 < x2) return -1;
- if (x1 > x2) return 1;
- return 0;
-}
-static int
-qsort_IIncreasing(const void *xp1, const void *xp2)
-{
- int x1 = * (int *) xp1;
- int x2 = * (int *) xp2;
- if (x1 < x2) return -1;
- if (x1 > x2) return 1;
- return 0;
-}
-static int
-qsort_DDecreasing(const void *xp1, const void *xp2)
-{
- double x1 = * (double *) xp1;
- double x2 = * (double *) xp2;
- if (x1 > x2) return -1;
- if (x1 < x2) return 1;
- return 0;
-}
-static int
-qsort_FDecreasing(const void *xp1, const void *xp2)
-{
- float x1 = * (float *) xp1;
- float x2 = * (float *) xp2;
- if (x1 > x2) return -1;
- if (x1 < x2) return 1;
- return 0;
-}
-static int
-qsort_IDecreasing(const void *xp1, const void *xp2)
-{
- int x1 = * (int *) xp1;
- int x2 = * (int *) xp2;
- if (x1 > x2) return -1;
- if (x1 < x2) return 1;
- return 0;
-}
-
-/* Function: esl_vec_DSortIncreasing()
-* Synopsis: Sort vector from smallest to largest.
-* Incept: SRE, Wed Aug 17 10:44:31 2005 [St. Louis]
-*
-* Purpose: Sorts <vec> in place, from smallest to largest value.
-* (That is, <vec[0]> is the minimum and <vec[n-1]> is
-* the maximum.)
-*
-* <esl_vec_FSortIncreasing()> and <esl_vec_ISortIncreasing()>
-* do the same, for float and integer vectors.
-*/
-void
-esl_vec_DSortIncreasing(double *vec, int n)
-{
- qsort((void *) vec, n, sizeof(double), qsort_DIncreasing);
-}
-void
-esl_vec_FSortIncreasing(float *vec, int n)
-{
- qsort((void *) vec, n, sizeof(float), qsort_FIncreasing);
-}
-void
-esl_vec_ISortIncreasing(int *vec, int n)
-{
- qsort((void *) vec, n, sizeof(int), qsort_IIncreasing);
-}
-
-/* Function: esl_vec_DSortDecreasing()
-* Synopsis: Sort vector from largest to smallest.
-* Incept: SRE, Wed Aug 17 10:44:31 2005 [St. Louis]
-*
-* Purpose: Sorts <vec> in place, from largest to smallest value.
-* (That is, <vec[0]> is the maximum and <vec[n-1]> is
-* the minimum.)
-*
-* <esl_vec_FSortDecreasing()> and <esl_vec_ISortDecreasing()>
-* do the same, for float and integer vectors.
-*/
-void
-esl_vec_DSortDecreasing(double *vec, int n)
-{
- qsort((void *) vec, n, sizeof(double), qsort_DDecreasing);
-}
-void
-esl_vec_FSortDecreasing(float *vec, int n)
-{
- qsort((void *) vec, n, sizeof(float), qsort_FDecreasing);
-}
-void
-esl_vec_ISortDecreasing(int *vec, int n)
-{
- qsort((void *) vec, n, sizeof(int), qsort_IDecreasing);
-}
-
-
-/* Function: esl_vec_DDump()
-* Synopsis: Output vector to a stream as text.
-* Incept: ER, Thu Jul 21 12:54:56 CDT 2005 [St. Louis]
-*
-* Purpose: Given a vector, dump it to stream <ofp>.
-*
-* If <label> is non-NULL, they represent
-* single-character labels to put on the vector.
-* (For example, these might be a sequence alphabet).
-* Numbers 1..n is used if <label> is NULL.
-*
-* Args: ofp - output file pointer; stdout, for example.
-* v - vector to dump.
-* label - optional: NULL, or character labels
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_vec_DDump(FILE *ofp, double *v, int n, char *label)
-{
- int a;
-
- fprintf(ofp, " ");
- if (label != NULL)
- for (a = 0; a < n; a++) fprintf(ofp, " %c ", label[a]);
- else
- for (a = 0; a < n; a++) fprintf(ofp, "%10d ", a+1);
- fprintf(ofp, "\n");
-
- fprintf(ofp, " ");
- for (a = 0; a < n; a++) fprintf(ofp, "%10.6f ", v[a]);
- fprintf(ofp, "\n");
-
- return eslOK;
-}
-int
-esl_vec_FDump(FILE *ofp, float *v, int n, char *label)
-{
- int a;
-
- fprintf(ofp, " ");
- if (label != NULL)
- for (a = 0; a < n; a++) fprintf(ofp, " %c ", label[a]);
- else
- for (a = 0; a < n; a++) fprintf(ofp, "%10d ", a+1);
- fprintf(ofp, "\n");
-
- fprintf(ofp, " ");
- for (a = 0; a < n; a++) fprintf(ofp, "%10.6f ", v[a]);
- fprintf(ofp, "\n");
-
- return eslOK;
-}
-int
-esl_vec_IDump(FILE *ofp, int *v, int n, char *label)
-{
- int a;
-
- fprintf(ofp, " ");
- if (label != NULL)
- for (a = 0; a < n; a++) fprintf(ofp, " %c ", label[a]);
- else
- for (a = 0; a < n; a++) fprintf(ofp, "%8d ", a+1);
- fprintf(ofp, "\n");
-
- fprintf(ofp, " ");
- for (a = 0; a < n; a++) fprintf(ofp, "%8d ", v[a]);
- fprintf(ofp, "\n");
-
- return eslOK;
-}
-
-/* Function: esl_vec_D2F()
-* Synopsis: Convert between single-precision and double-precision vectors.
-* Incept: SRE, Thu Mar 30 09:04:17 2006 [St. Louis]
-*
-* Purpose: Copy a double vector <src> to a float vector <dst>. Caller
-* provides space in the float vector that is at
-* least <n>.
-*
-* Similarly, <esl_vec_F2D()> converts float to double;
-* <esl_vec_I2D()> converts integer to double;
-* <esl_vec_I2F()> converts integer to float.
-*/
-void
-esl_vec_D2F(double *src, int n, float *dst)
-{
- int i;
- for (i = 0; i < n; i++) dst[i] = src[i];
-}
-void
-esl_vec_F2D(float *src, int n, double *dst)
-{
- int i;
- for (i = 0; i < n; i++) dst[i] = src[i];
-}
-void
-esl_vec_I2F(int *src, int n, float *dst)
-{
- int i;
- for (i = 0; i < n; i++) dst[i] = src[i];
-}
-void
-esl_vec_I2D(int *src, int n, double *dst)
-{
- int i;
- for (i = 0; i < n; i++) dst[i] = src[i];
-}
-
-
-
-
-/* Function: esl_vec_DNorm()
-* Synopsis: Normalize probability vector.
-*
-* Purpose: Normalizes a probability vector <vec>,
-* such that $\sum_{i=1}{n} \mathrm{vec}_i = 1.0$.
-*
-* <esl_vec_FNorm()> does the same, for a probability vector
-* of floats.
-*/
-void
-esl_vec_DNorm(double *vec, int n)
-{
- int x;
- double sum;
-
- sum = esl_vec_DSum(vec, n);
- if (sum != 0.0) for (x = 0; x < n; x++) vec[x] /= sum;
- else for (x = 0; x < n; x++) vec[x] = 1. / (double) n;
-}
-void
-esl_vec_FNorm(float *vec, int n)
-{
- int x;
- float sum;
-
- sum = esl_vec_FSum(vec, n);
- if (sum != 0.0) for (x = 0; x < n; x++) vec[x] /= sum;
- else for (x = 0; x < n; x++) vec[x] = 1. / (float) n;
-}
-
-
-/* Function: esl_vec_DLog()
-* Synopsis: Convert probability vector elements to log probabilities.
-*
-* Purpose: Converts a probability vector <vec> to a log
-* probability vector: takes the log of each of the <n>
-* values in the vector.
-*
-* <esl_vec_FLog()> does the same, for a probability vector
-* of floats.
-*/
-void
-esl_vec_DLog(double *vec, int n)
-{
- int x;
- for (x = 0; x < n; x++)
- if (vec[x] > 0.) vec[x] = log(vec[x]);
- else vec[x] = -DBL_MAX;
-}
-void
-esl_vec_FLog(float *vec, int n)
-{
- int x;
- for (x = 0; x < n; x++)
- if (vec[x] > 0.) vec[x] = logf(vec[x]);
- else vec[x] = -FLT_MAX;
-}
-
-
-/* Function: esl_vec_DEntropy()
-* Synopsis: Return Shannon entropy of p-vector, in bits.
-*
-* Purpose: Returns the Shannon entropy of a probability vector <p>,
-* in bits ($\log_2$), defined as \citep{CoverThomas}:
-*
-* \[
-* H = \sum_x p_x \log_2 p_x.
-* \]
-*
-* <esl_vec_FEntropy()> does the same, for a probability vector
-* of floats.
-*/
-double
-esl_vec_DEntropy(const double *p, int n)
-{
- int i;
- double entropy;
-
- entropy = 0.;
- for(i = 0; i < n; i++)
- if (p[i] > 0.) entropy += p[i] * log(p[i]);
- return(-1.44269504 * entropy); /* converts to bits */
-}
-float
-esl_vec_FEntropy(const float *p, int n)
-{
- int i;
- float entropy;
-
- entropy = 0.;
- for(i = 0; i < n; i++)
- if (p[i] > 0.) entropy += p[i] * logf(p[i]);
- return(-1.44269504 * entropy); /* converts to bits */
-}
-
-/* Function: esl_vec_DRelEntropy()
-* Synopsis: Return relative entropy $D(p \parallel q)$ in bits.
-* Incept: SRE, Fri May 11 09:03:07 2007 [Janelia]
-*
-* Purpose: Returns Shannon relative entropy of probability
-* vectors <p> and <q> in bits, also known as the
-* Kullback Leibler "distance" \citep[p.18]{CoverThomas}:
-*
-* \[
-* D(p \parallel f) = \sum_x p_x \log_2 \frac{p_x}{q_x}.
-* \]
-*
-* If for any $x$ $q_x = 0$ and $p_x > 0$, the relative
-* entropy is $\infty$.
-*
-* <esl_vec_FRelEntropy()> does the same, for probability
-* vectors of floats.
-*/
-double
-esl_vec_DRelEntropy(const double *p, const double *q, int n)
-{
- int i;
- double kl;
-
- kl = 0.;
- for(i = 0; i < n; i++)
- if (p[i] > 0.) {
- if (q[i] == 0.) return eslINFINITY;
- else kl += p[i] * log(p[i]/q[i]);
- }
- return(1.44269504 * kl); /* converts to bits */
-}
-float
-esl_vec_FRelEntropy(const float *p, const float *q, int n)
-{
- int i;
- float kl;
-
- kl = 0.;
- for(i = 0; i < n; i++)
- if (p[i] > 0.) {
- if (q[i] == 0.) return eslINFINITY;
- else kl += p[i] * log((double)(p[i]/q[i]));
- }
- return(1.44269504 * kl); /* converts to bits */
-}
-
-
-/* Function: esl_vec_DExp()
-* Synopsis: Converts log probability vector elements to probabilities.
-*
-* Purpose: Converts a log probability vector <vec> back to a
-* probability vector: exponentiates each of the <n>
-* values in the vector.
-*
-* This routine only calls <exp()> on the elements of
-* vector, which are presumed to be log probabilities;
-* whether the resulting vector is a properly normalized
-* probability vector is the caller's problem.
-*
-* <esl_vec_FExp()> does the same, for a log probability vector
-* of floats.
-*/
-void
-esl_vec_DExp(double *vec, int n)
-{
- int x;
- for (x = 0; x < n; x++) vec[x] = exp(vec[x]);
-}
-void
-esl_vec_FExp(float *vec, int n)
-{
- int x;
- for (x = 0; x < n; x++) vec[x] = expf(vec[x]);
-}
-
-/* Function: esl_vec_DLogSum()
-* Synopsis: Given log-p-vector, return log of sum of probabilities.
-*
-* Purpose: <vec> is a log probability vector; return the log of the scalar sum
-* of the probabilities in <vec>. That is, the <n> elements in <vec>
-* are log probabilities, but the summation is done in probability
-* space, by exponentiating each of the <n> values in the vector,
-* summing, and returning the log of the sum.
-*
-* That is: return $\log \sum_i e^{v_i}$.
-*
-* The trick is to do this without numerical underflow or overflow.
-*
-* <esl_vec_FLogSum()> does the same, for a log probability vector
-* of floats.
-*/
-double
-esl_vec_DLogSum(double *vec, int n)
-{
- int x;
- double max, sum;
-
- max = esl_vec_DMax(vec, n);
- if (max == eslINFINITY) return eslINFINITY; /* avoid inf-inf below! */
- sum = 0.0;
- for (x = 0; x < n; x++)
- if (vec[x] > max - 50.)
- sum += exp(vec[x] - max);
- sum = log(sum) + max;
- return sum;
-}
-float
-esl_vec_FLogSum(float *vec, int n)
-{
- int x;
- float max, sum;
-
- max = esl_vec_FMax(vec, n);
- sum = 0.0;
- for (x = 0; x < n; x++)
- if (vec[x] > max - 50.)
- sum += expf(vec[x] - max);
- sum = logf(sum) + max;
- return sum;
-}
-
-
-/* Function: esl_vec_DLogNorm()
-* Synopsis: Normalize a log p-vector, make it a p-vector.
-* Incept: SRE, Thu Apr 7 17:45:39 2005 [St. Louis]
-*
-* Purpose: Given an unnormalized log probability vector <vec>
-* of length <n>, normalize it and make it a
-* probability vector.
-*
-* <esl_vec_FLogNorm()> does the same, but for a vector
-* of floats instead of doubles.
-*
-* Returns: (void); <vec> is changed in place.
-*/
-void
-esl_vec_DLogNorm(double *vec, int n)
-{
- double denom;
-
- denom = esl_vec_DLogSum(vec, n);
- esl_vec_DIncrement(vec, n, -1.*denom);
- esl_vec_DExp (vec, n);
- esl_vec_DNorm(vec, n);
-}
-void
-esl_vec_FLogNorm(float *vec, int n)
-{
- float denom;
-
- denom = esl_vec_FLogSum(vec, n);
- esl_vec_FIncrement(vec, n, -1.*denom);
- esl_vec_FExp (vec, n);
- esl_vec_FNorm(vec, n);
-}
-
-/* Function: esl_vec_DValidate()
-* Synopsis: Verifies that vector is p-vector.
-* Incept: ER, Tue Dec 5 09:38:54 EST 2006 [janelia]
-*
-* Purpose: Validate a probability vector <vec> of length <n>.
-* Each element has to be between 0 and 1, and
-* the sum of all elements has to be 1.
-*
-* Args: v - p vector to validate.
-* n - dimensionality of v
-* tol - convergence criterion applied to sum of v
-* errbuf - NULL, or a failure message buffer allocated
-* for at least <eslERRBUFSIZE> chars.
-*
-* Returns: <eslOK> on success, or <eslFAIL> on validation failure.
-* Upon failure, if caller provided a non-<NULL> <errbuf>,
-* an informative message is left there.
-*/
-int
-esl_vec_DValidate(double *vec, int n, double tol, char *errbuf)
-{
- int status;
- int x;
- double sum = 0.;
-
- if (errbuf) *errbuf = 0;
- if (n == 0) return eslOK;
-
- for (x = 0; x < n; x++) {
- if (vec[x] < 0.0 || vec[x] > 1.0)
- ESL_XFAIL(eslFAIL, errbuf, "value %d is not a probability between 0..1", x);
- sum += vec[x];
- }
-
- if (fabs(sum - 1.0) > tol)
- ESL_XFAIL(eslFAIL, errbuf, "vector does not sum to 1.0");
- return eslOK;
-
-ERROR:
- return status;
-}
-int
-esl_vec_FValidate(float *vec, int n, float tol, char *errbuf)
-{
- int status;
- int x;
- float sum = 0.;
-
- if (errbuf) *errbuf = 0;
- if (n == 0) return eslOK;
-
- for (x = 0; x < n; x++) {
- if (vec[x] < 0.0 || vec[x] > 1.0)
- ESL_XFAIL(eslFAIL, errbuf, "value %d is not a probability between 0..1", x);
- sum += vec[x];
- }
-
- if (fabs(sum - 1.0) > tol)
- ESL_XFAIL(eslFAIL, errbuf, "vector does not sum to 1.0");
- return eslOK;
-
-ERROR:
- return status;
-}
-
-/* Function: esl_vec_DLogValidate()
-* Synopsis: Verify that vector is a log-p-vector.
-* Incept: ER, Tue Dec 5 09:46:51 EST 2006 [janelia]
-*
-* Purpose: Validate a log probability vector <vec> of length <n>.
-* The exp of each element has to be between 0 and 1, and
-* the sum of all elements has to be 1.
-*
-* Args: v - log p vector to validate.
-* n - dimensionality of v
-* tol - convergence criterion applied to sum of exp v
-* errbuf - NULL, or a failure message buffer allocated
-* for at least p7_ERRBUFSIZE chars.
-*
-* Returns: <eslOK> on success, or <eslFAIL> on failure; upon failure,
-* if caller provided a non-<NULL> <errbuf>, an informative
-* message is left there.
-*
-* Throws: <eslEMEM> on allocation failure.
-*/
-int
-esl_vec_DLogValidate(double *vec, int n, double tol, char *errbuf)
-{
- int status;
- double *expvec = NULL;
-
- if (errbuf) *errbuf = 0;
- if (n == 0) return eslOK;
-
- ESL_ALLOC_WITH_TYPE(expvec, double*, sizeof(double)*n);
- esl_vec_DCopy(vec, n, expvec);
- esl_vec_DExp(expvec, n);
- if ((status = esl_vec_DValidate(expvec, n, tol, errbuf)) != eslOK) goto ERROR;
- free(expvec);
- return eslOK;
-
-ERROR:
- if (expvec != NULL) free(expvec);
- return status;
-}
-int
-esl_vec_FLogValidate(float *vec, int n, float tol, char *errbuf)
-{
- int status;
- float *expvec = NULL;
-
- if (errbuf) *errbuf = 0;
- if (n == 0) return eslOK;
-
- ESL_ALLOC_WITH_TYPE(expvec, float*, sizeof(float)*n);
- esl_vec_FCopy(vec, n, expvec);
- esl_vec_FExp(expvec, n);
- if ((status = esl_vec_FValidate(expvec, n, tol, errbuf)) != eslOK) goto ERROR;
- free(expvec);
- return eslOK;
-
-ERROR:
- if (expvec != NULL) free(expvec);
- return eslOK;
-}
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_vectorops.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_vectorops.h
deleted file mode 100644
index a9c061a..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_vectorops.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/* esl_vectorops.h
-* Header file for vectorops.c
-*
-* SRE, Tue Oct 1 15:23:37 2002 [St. Louis]
-* SVN $Id: esl_vectorops.h 319 2009-01-27 16:51:36Z eddys $
-*/
-#ifndef ESL_VECTOROPS_INCLUDED
-#define ESL_VECTOROPS_INCLUDED
-
-#include <stdio.h>
-
-extern void esl_vec_DSet(double *vec, int n, double value);
-extern void esl_vec_FSet(float *vec, int n, float value);
-extern void esl_vec_ISet(int *vec, int n, int value);
-
-extern void esl_vec_DScale(double *vec, int n, double scale);
-extern void esl_vec_FScale(float *vec, int n, float scale);
-extern void esl_vec_IScale(int *vec, int n, int scale);
-
-extern void esl_vec_DIncrement(double *v, int n, double x);
-extern void esl_vec_FIncrement(float *v, int n, float x);
-extern void esl_vec_IIncrement(int *v, int n, int x);
-
-extern double esl_vec_DSum(double *vec, int n);
-extern float esl_vec_FSum(float *vec, int n);
-extern int esl_vec_ISum(int *vec, int n);
-
-extern void esl_vec_DAdd(double *vec1, const double *vec2, int n);
-extern void esl_vec_FAdd(float *vec1, const float *vec2, int n);
-extern void esl_vec_IAdd(int *vec1, const int *vec2, int n);
-
-extern void esl_vec_DAddScaled(double *vec1, double *vec2, double a, int n);
-extern void esl_vec_FAddScaled(float *vec1, float *vec2, float a, int n);
-extern void esl_vec_IAddScaled(int *vec1, int *vec2, int a, int n);
-
-extern void esl_vec_DCopy(const double *src, const int n, double *dest);
-extern void esl_vec_FCopy(const float *src, const int n, float *dest);
-extern void esl_vec_ICopy(const int *src, const int n, int *dest);
-
-extern int esl_vec_DCompare(const double *vec1, const double *vec2, int n, double tol);
-extern int esl_vec_FCompare(const float *vec1, const float *vec2, int n, float tol);
-extern int esl_vec_ICompare(const int *vec1, const int *vec2, int n);
-
-extern void esl_vec_DSwap(double *vec1, double *vec2, int n);
-extern void esl_vec_FSwap(float *vec1, float *vec2, int n);
-extern void esl_vec_ISwap(int *vec1, int *vec2, int n);
-
-extern double esl_vec_DDot(double *vec1, double *vec2, int n);
-extern float esl_vec_FDot(float *vec1, float *vec2, int n);
-extern int esl_vec_IDot(int *vec1, int *vec2, int n);
-
-extern double esl_vec_DMax(double *vec, int n);
-extern float esl_vec_FMax(float *vec, int n);
-extern int esl_vec_IMax(int *vec, int n);
-
-extern double esl_vec_DMin(double *vec, int n);
-extern float esl_vec_FMin(float *vec, int n);
-extern int esl_vec_IMin(int *vec, int n);
-
-extern int esl_vec_DArgMax(double *vec, int n);
-extern int esl_vec_FArgMax(float *vec, int n);
-extern int esl_vec_IArgMax(int *vec, int n);
-
-extern int esl_vec_DArgMin(double *vec, int n);
-extern int esl_vec_FArgMin(float *vec, int n);
-extern int esl_vec_IArgMin(int *vec, int n);
-
-extern void esl_vec_DSortIncreasing(double *vec, int n);
-extern void esl_vec_FSortIncreasing(float *vec, int n);
-extern void esl_vec_ISortIncreasing(int *vec, int n);
-
-extern void esl_vec_DSortDecreasing(double *vec, int n);
-extern void esl_vec_FSortDecreasing(float *vec, int n);
-extern void esl_vec_ISortDecreasing(int *vec, int n);
-
-extern int esl_vec_DDump(FILE *ofp, double *v, int n, char *label);
-extern int esl_vec_FDump(FILE *ofp, float *v, int n, char *label);
-extern int esl_vec_IDump(FILE *ofp, int *v, int n, char *label);
-
-extern void esl_vec_D2F(double *src, int n, float *dst);
-extern void esl_vec_F2D(float *src, int n, double *dst);
-extern void esl_vec_I2F(int *src, int n, float *dst);
-extern void esl_vec_I2D(int *src, int n, double *dst);
-
-extern void esl_vec_DNorm(double *vec, int n);
-extern void esl_vec_FNorm(float *vec, int n);
-
-extern void esl_vec_DLog(double *vec, int n);
-extern void esl_vec_FLog(float *vec, int n);
-
-extern double esl_vec_DEntropy(const double *p, int n);
-extern float esl_vec_FEntropy(const float *p, int n);
-
-extern double esl_vec_DRelEntropy(const double *p, const double *f, int n);
-extern float esl_vec_FRelEntropy(const float *p, const float *f, int n);
-
-extern void esl_vec_DExp(double *vec, int n);
-extern void esl_vec_FExp(float *vec, int n);
-
-extern double esl_vec_DLogSum(double *vec, int n);
-extern float esl_vec_FLogSum(float *vec, int n);
-
-extern void esl_vec_DLogNorm(double *vec, int n);
-extern void esl_vec_FLogNorm(float *vec, int n);
-
-extern int esl_vec_DValidate(double *vec, int n, double tol, char *errbuf);
-extern int esl_vec_FValidate(float *vec, int n, float tol, char *errbuf);
-
-extern int esl_vec_DLogValidate(double *vec, int n, double tol, char *errbuf);
-extern int esl_vec_FLogValidate(float *vec, int n, float tol, char *errbuf);
-
-#endif /* ESL_VECTOROPS_INCLUDED */
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_wuss.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_wuss.cpp
deleted file mode 100644
index 068504d..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_wuss.cpp
+++ /dev/null
@@ -1,429 +0,0 @@
-/* wuss.c
-* RNA secondary structure markup in WUSS notation.
-*
-* SRE, Tue Feb 15 08:43:23 2005
-* SVN $Id: esl_wuss.c 241 2008-04-01 19:01:52Z eddys $
-* xref squid wuss.c.
-*/
-
-#include <hmmer3/easel/esl_config.h>
-
-#include <string.h>
-#include <ctype.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_stack.h>
-
-#include "esl_wuss.h"
-
-/* Function: esl_wuss2ct()
-* Incept: SRE, Tue Feb 15 08:44:54 2005 [St. Louis]
-*
-* Purpose: Given a secondary structure string <ss>, <0..len-1>,
-* in WUSS notation, convert it to a CT array, <1..len>,
-* in <ct>. Caller provides a <ct> allocated for at least
-* <len+1> ints. <ct[i]> is the position that residue i
-* base pairs to, or 0 if i is unpaired. <ct[0]> is undefined
-* (but if you care: it is set to 0).
-*
-* WUSS notation is interpreted loosely here, as input
-* WUSS. Any matching bracket pair or upper/lower case
-* alphabetic pair is interpreted as a base pair; any other
-* WUSS annotation is interpreted as unpaired.
-*
-* Returns: <eslOK> on success. Returns <eslESYNTAX> if the WUSS
-* string isn't valid.
-*
-* Throws: <eslEMEM> on allocation failure.
-*/
-int
-esl_wuss2ct(char *ss, int len, int *ct)
-{
- ESL_STACK *pda[27]; /* 1 secondary structure + up to 26 levels of pk's */
- int i;
- int pos, pair;
- int status; /* success or failure return status */
-
- /* Initialization: always initialize the main pda (0);
- * we'll init the pk pda's on demand.
- */
- if ((pda[0] = esl_stack_ICreate()) == NULL) goto FINISH;
- for (i = 1; i <= 26; i++) pda[i] = NULL;
-
- for (pos = 0; pos <= len; pos++) ct[pos] = 0;
-
- for (pos = 1; pos <= len; pos++)
- {
- if (!isprint((int) ss[pos-1])) /* armor against garbage */
- { status = eslESYNTAX; goto FINISH; }
-
- /* left side of a pair: push position onto stack 0 (pos = 1..L) */
- else if (ss[pos-1] == '<' ||
- ss[pos-1] == '(' ||
- ss[pos-1] == '[' ||
- ss[pos-1] == '{')
- {
- if ((status = esl_stack_IPush(pda[0], pos)) != eslOK) goto FINISH;
- }
-
- /* right side of a pair; resolve pair; check for agreement */
- else if (ss[pos-1] == '>' ||
- ss[pos-1] == ')' ||
- ss[pos-1] == ']' ||
- ss[pos-1] == '}')
- {
- if (esl_stack_IPop(pda[0], &pair) == eslEOD)
- { status = eslESYNTAX; goto FINISH;} /* no closing bracket */
- else if ((ss[pair-1] == '<' && ss[pos-1] != '>') ||
- (ss[pair-1] == '(' && ss[pos-1] != ')') ||
- (ss[pair-1] == '[' && ss[pos-1] != ']') ||
- (ss[pair-1] == '{' && ss[pos-1] != '}'))
- { status = eslESYNTAX; goto FINISH; } /* brackets don't match */
- else
- {
- ct[pos] = pair;
- ct[pair] = pos;
- }
- }
- /* same stuff for pseudoknots */
- else if (isupper((int) ss[pos-1]))
- {
- /* Create the PK stacks on demand.
- */
- i = ss[pos-1] - 'A' + 1;
- if (pda[i] == NULL)
- if ((pda[i] = esl_stack_ICreate()) == NULL)
- { status = eslEMEM; goto FINISH; }
-
- if ((status = esl_stack_IPush(pda[i], pos)) != eslOK) goto FINISH;
- }
- else if (islower((int) ss[pos-1]))
- {
- i = ss[pos-1] - 'a' + 1;
- if (pda[i] == NULL ||
- esl_stack_IPop(pda[i], &pair) == eslEOD)
- { status = eslESYNTAX; goto FINISH;}
- else
- {
- ct[pos] = pair;
- ct[pair] = pos;
- }
- }
- else if (strchr(":,_-.~", ss[pos-1]) == NULL)
- { status = eslESYNTAX; goto FINISH; } /* bogus character */
- }
- status = eslOK;
-
-FINISH:
- for (i = 0; i <= 26; i++)
- if (pda[i] != NULL)
- { /* nothing should be left on stacks */
- if (esl_stack_ObjectCount(pda[i]) != 0)
- status = eslESYNTAX;
- esl_stack_Destroy(pda[i]);
- }
- return status;
-}
-
-
-/* Function: esl_ct2wuss()
-* Incept: SRE, Wed Feb 16 11:22:53 2005 [St. Louis]
-*
-* Purpose: Convert a CT array <ct> for <n> residues (1..n) to a WUSS
-* format string <ss>. <ss> must be allocated for at least
-* n+1 chars (+1 for the terminal NUL).
-*
-* Currently limited to nonpseudoknotted structures. Attempting
-* to convert a pseudoknot-containing <ct> will return an
-* <eslEINVAL> error.
-*
-* Returns: <eslOK> on success.
-* <eslEINVAL> if <ct> contains a pseudoknot.
-*
-* Throws: <eslEMEM> on allocation failure.
-* <eslEINCONCEIVABLE> on internal failure.
-*/
-int
-esl_ct2wuss(int *ct, int n, char *ss)
-{
- ESL_STACK *pda = NULL; /* main stack */
- ESL_STACK *aux = NULL; /* aux storage */
- int status = eslEMEM; /* exit status 'til proven otherwise */
- int i,j;
- int nfaces;
- int minface;
-
- ss[0] = '\0'; /* in case we abort, and caller does something dumb w/ ss */
-
- if ((pda = esl_stack_ICreate()) == NULL) goto FINISH;
- if ((aux = esl_stack_ICreate()) == NULL) goto FINISH;
-
- for (j = 1; j <= n; j++)
- {
- if (ct[j] == 0) /* unpaired: push j. */
- {
- if (esl_stack_IPush(pda, j) != eslOK) goto FINISH;
- }
- else if (ct[j] > j) /* left side of a bp: push j. */
- {
- if (esl_stack_IPush(pda, j) != eslOK) goto FINISH;
- }
- else /* right side of a bp; main routine: resolve a subseq */
- {
- /* Pop back until we find the left partner of i;
- * store SS residues in aux;
- * keep track of #faces and the maximum face depth.
- */
- nfaces = 0;
- minface = -1;
- while (1)
- {
- if (esl_stack_IPop(pda, &i) != eslOK) goto FINISH;
-
- if (i < 0) /* a face counter */
- {
- nfaces++;
- if (i < minface) minface = i;
- }
- else if (ct[i] == j)
- break; /* we found the i,j pair. */
- else if (ct[i] == 0)
- {
- if (esl_stack_IPush(aux, i) != eslOK) goto FINISH;
- }
- else /* ct[i]>0, != j: i is paired, but not to j: pseudoknot! */
- {
- esl_stack_Destroy(pda); esl_stack_Destroy(aux);
- ESL_EXCEPTION(eslEINVAL, "pseudoknots not permitted yet");
- }
- }
-
- /* Now we know i,j pair; and we know how many faces are
- * above them; and we know the max depth of those faces.
- * That's enough to label the pair in WUSS notation.
- * if nfaces == 0, minface is -1; <> a closing bp of a hairpin.
- * if nfaces == 1, inherit minface, we're continuing a stem.
- * if nfaces > 1, bump minface in depth; we're closing a bifurc.
- */
- if (nfaces > 1 && minface > -4) minface--;
- switch (minface) {
- case -1: ss[i-1] = '<'; ss[j-1] = '>'; break;
- case -2: ss[i-1] = '('; ss[j-1] = ')'; break;
- case -3: ss[i-1] = '['; ss[j-1] = ']'; break;
- case -4: ss[i-1] = '{'; ss[j-1] = '}'; break;
- default:
- esl_stack_Destroy(pda); esl_stack_Destroy(aux);
- ESL_EXCEPTION(eslEINCONCEIVABLE, "no such face code");
- }
- if (esl_stack_IPush(pda, minface) != eslOK) goto FINISH;
-
- /* Now, aux contains all the unpaired residues we need to label,
- * according to the # of faces "above" them:
- * nfaces = 0: hairpin loop
- * nfaces = 1: bulge or interior loop
- * nfaces > 1: multifurc
- */
- while (esl_stack_IPop(aux, &i) == eslOK)
- {
- switch (nfaces) {
- case 0: ss[i-1] = '_'; break;
- case 1: ss[i-1] = '-'; break;
- default: ss[i-1] = ','; break; /* nfaces > 1 */
- }
- }
-
- } /* finished processing a subseq enclosed by a bp */
- } /* finished loop over j: end position on seq, 1..n*/
-
- /* Anything that's left in the pda is either a face counter
- * or external single-strand. Face counters are negative;
- * position indices are positive.
- */
- while (esl_stack_IPop(pda, &i) == eslOK)
- if (i > 0) ss[i-1] = ':';
- ss[n] = '\0';
- status = eslOK;
-
-FINISH:
- if (pda != NULL) esl_stack_Destroy(pda);
- if (aux != NULL) esl_stack_Destroy(aux);
- return status;
-}
-
-
-
-/* Function: esl_wuss2kh()
-* Incept: SRE, Tue Feb 15 10:05:35 2005 [St. Louis]
-*
-* Purpose: Converts a secondary structure string <ss> in
-* WUSS notation back to old KHS format in <kh>.
-* <kh> must be allocated for at least as much
-* space as <ss>. <kh> may be the same as <ss>,
-* in which case the conversion is done in-place.
-*
-* Note: Left bp chars are converted to > (left base of base pairs)
-* Right bp chars are converted to < (right base of base pairs)
-* Characters _-,:~ are converted to . (unpaired bases)
-* Character . is untouched (unpaired)
-* Everything else is untouched, including any pseudoknot notation.
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_wuss2kh(char *ss, char *kh)
-{
- while (*ss != '\0')
- {
- if (*ss == '<') *kh = '>';
- else if (*ss == '(') *kh = '>';
- else if (*ss == '[') *kh = '>';
- else if (*ss == '{') *kh = '>';
- else if (*ss == '>') *kh = '<';
- else if (*ss == ')') *kh = '<';
- else if (*ss == ']') *kh = '<';
- else if (*ss == '}') *kh = '<';
- else if (*ss == '_') *kh = '.';
- else if (*ss == '-') *kh = '.';
- else if (*ss == ',') *kh = '.';
- else if (*ss == ':') *kh = '.';
- else if (*ss == '~') *kh = '.';
- else *kh = *ss;
- ss++;
- kh++;
- }
- *kh = '\0';
- return eslOK;
-}
-
-
-/* Function: esl_kh2wuss()
-* Incept: SRE, Tue Feb 15 10:10:40 2005 [St. Louis]
-*
-* Purpose: Converts an old format secondary structure string <kh>
-* to shorthand WUSS format <ss>. <ss> must be allocated at least
-* as large as <kh>. <ss> can be identical to <kh>, in which
-* case the conversion is done in-place.
-*
-* Note: Character > is converted to < (left base of base pairs)
-* Character < is converted to > (right base of base pairs)
-* A space is converted to . (just in case)
-*
-* Returns: <eslOK> on success.
-*/
-int
-esl_kh2wuss(char *kh, char *ss)
-{
- while (*kh != '\0')
- {
- if (*kh == '>') *ss = '<';
- else if (*kh == '<') *ss = '>';
- else if (*kh == ' ') *ss = '.';
- else *ss = *kh;
- kh++;
- ss++;
- }
- *ss = '\0';
- return eslOK;
-}
-
-
-/* Function: esl_wuss_full()
-* Incept: SRE, Mon Feb 28 09:44:40 2005 [St. Louis]
-*
-* Purpose: Given a simple ("input") WUSS format annotation string <oldss>,
-* convert it to full ("output") WUSS format in <newss>.
-* <newss> must be allocated by the caller to be at least as
-* long as <oldss>. <oldss> and <newss> can be the same,
-* to convert a secondary structure string in place.
-*
-* Pseudoknot annotation is preserved, if <oldss> had it.
-*
-* Returns: <eslSYNTAX> if <oldss> isn't in valid WUSS format.
-*
-* Throws: <eslEMEM> on allocation failure.
-* <eslEINCONCEIVABLE> on internal error that can't happen.
-*/
-int
-esl_wuss_full(char *oldss, char *newss)
-{
- char *tmp = NULL;
- int *ct = NULL;
- int n;
- int i;
- int status;
-
- /* We can use the ct2wuss algorithm to generate a full WUSS string -
- * convert to ct, then back to WUSS. ct2wuss doesn't deal with pk's
- * though, and we want to propagate pk annotation if it's there. So
- * we need two workspaces: ct array, and a temporary ss string that
- * we use to hold non-pk annotation. As a final step, we overlay
- * the pk annotation from the original oldss annotation.
- */
- n = strlen(oldss);
- ESL_ALLOC_WITH_TYPE(ct, int*, sizeof(int) * (n+1));
- ESL_ALLOC_WITH_TYPE(tmp, char*, sizeof(char) * (n+1));
-
- esl_wuss_nopseudo(oldss, tmp);/* tmp = nonpseudoknotted oldss */
-
- status = esl_wuss2ct(tmp, n, ct); /* ct = oldss in ct format, no pks */
- if (status != eslOK) goto ERROR;
-
- status = esl_ct2wuss(ct, n, tmp); /* now tmp is a full WUSS string */
- if (status == eslEINVAL) { status = eslEINCONCEIVABLE; goto ERROR; }/* we're sure, no pk's */
- else if (status != eslOK) goto ERROR; /* EMEM, EINCONCEIVABLE */
-
- for (i = 0; i < n; i++)
- if (isalpha(oldss[i])) newss[i] = oldss[i]; /* transfer pk annotation */
- else newss[i] = tmp[i]; /* transfer new WUSS */
-
- free(ct);
- free(tmp);
- return eslOK;
-
-ERROR:
- free(ct);
- free(tmp);
- return status;
-}
-
-
-
-/* Function: esl_wuss_nopseudo()
-* Incept: SRE, Tue Feb 15 11:02:43 2005 [St. Louis]
-*
-* Purpose: Given a WUSS format annotation string <ss1>,
-* removes all pseudoknot annotation to create a new
-* WUSS string <ss2> that contains only a "canonical"
-* (nonpseudoknotted) structure. <ss2> must be allocated to
-* be at least as large as <ss1>. <ss1> and <ss2>
-* may be the same, in which case the conversion is
-* done in place. Pseudoknot annotation in <ss1> is
-* simply replaced by <.> in <ss2>; the resulting
-* <ss2> WUSS string is therefore in valid simplified format,
-* but may not be valid full format WUSS.
-*
-* Returns: <eslOK>.
-*/
-int
-esl_wuss_nopseudo(char *ss1, char *ss2)
-{
- while (*ss1 != '\0')
- {
- if (isalpha(*ss1)) *ss2 = '.';
- else *ss2 = *ss1;
- ss1++;
- ss2++;
- }
- *ss2 = '\0';
- return eslOK;
-}
-
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_wuss.h b/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_wuss.h
deleted file mode 100644
index 45e9f69..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/easel/esl_wuss.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* wuss.h
-* RNA secondary structure markup in WUSS notation.
-*
-* SVN $Id: esl_wuss.h 83 2005-12-13 20:54:07Z eddy $
-* SRE, Tue Feb 15 10:15:28 2005
-*/
-#ifndef eslWUSS_INCLUDED
-#define eslWUSS_INCLUDED
-
-
-extern int esl_wuss2ct(char *ss, int len, int *ct);
-extern int esl_ct2wuss(int *ct, int n, char *ss);
-extern int esl_wuss2kh(char *ss, char *kh);
-extern int esl_kh2wuss(char *kh, char *ss);
-extern int esl_wuss_full(char *oldss, char *newss);
-extern int esl_wuss_nopseudo(char *ss1, char *ss2);
-
-
-#endif /*eslWUSS_INCLUDED*/
-/*****************************************************************
-* Easel - a library of C functions for biological sequence analysis
-* Version h3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* Easel is distributed under the Janelia Farm Software License, a BSD
-* license. See the LICENSE file for more details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/emit.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/emit.cpp
deleted file mode 100644
index 494c76b..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/emit.cpp
+++ /dev/null
@@ -1,383 +0,0 @@
-/* Emitting (sampling) sequences from an HMM, in either core or
-* profile form.
-*
-* 1. Exported API: sequence emission routines.
-* 2. Private functions.
-* 4. Copyright and license.
-*
-* SRE, Tue Jan 9 08:55:53 2007 [Janelia] [The Crystal Method, Vegas]
- * SVN $Id: emit.c 2895 2009-09-11 20:16:34Z eddys $
-*/
-
-#include <hmmer3/p7_config.h>
-
-#include <math.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_random.h>
-#include <hmmer3/easel/esl_sq.h>
-#include <hmmer3/easel/esl_vectorops.h>
-
-#include <hmmer3/hmmer.h>
-
-static int sample_endpoints(ESL_RANDOMNESS *r, const P7_PROFILE *gm, int *ret_kstart, int *ret_kend);
-
-
-/*****************************************************************
-* 1. Exported API: sequence emission routines.
-*****************************************************************/
-
-/* Function: p7_CoreEmit()
-* Incept: SRE, Tue Jan 9 10:20:51 2007 [Janelia]
-*
-* Purpose: Generate (sample) a sequence from a core HMM <hmm>.
-*
-* Optionally return the sequence and/or its trace in <sq>
-* and <tr>, respectively, which the caller has
-* allocated. Having the caller provide these reusable
-* objects allows re-use of both <sq> and <tr> in repeated
-* calls, saving malloc/free wastage. Either can be passed
-* as <NULL> if it isn't needed.
-*
-* This does not set any fields in the <sq> except for the
-* sequence itself. Caller must set the name, and any other
-* annotation it wants to add.
-*
-* Trace is relative to the core model: it may include
-* I_0 and I_M states, B->DD->M entry is explicit, and a
-* 0 length generated sequence is possible.
-*
-* Args: r - source of randomness
-* hmm - core HMM to generate from
-* sq - opt: digital sequence sampled (or NULL)
-* tr - opt: trace sampled (or NULL)
-*
-* Returns: <eslOK> on success;
-* optionally return the digital sequence through <ret_sq>,
-* and optionally return its trace in <ret_tr>.
-*
-* Throws: <eslECORRUPT> if emission gets us into an illegal state,
-* probably indicating that a probability that should have
-* been zero wasn't.
-*
-* Throws <eslEMEM> on a reallocation error.
-*
-* In these cases, the contents of <sq> and <tr> may be
-* corrupted. Caller should not trust their data, but may
-* safely reuse them.
-*
-* Xref: STL11/124.
-*/
-int
-p7_CoreEmit(ESL_RANDOMNESS *r, const P7_HMM *hmm, ESL_SQ *sq, P7_TRACE *tr)
-{
- int k = 0; /* position in model nodes 1..M */
- int i = 0; /* position in sequence 1..L */
- char st = p7T_B; /* state type */
- int x; /* sampled residue */
- int status;
-
- if (sq != NULL) esl_sq_Reuse(sq);
- if (tr != NULL) {
- if ((status = p7_trace_Reuse(tr)) != eslOK) goto ERROR;
- if ((status = p7_trace_Append(tr, st, k, i)) != eslOK) goto ERROR;
- }
- while (st != p7T_E)
- {
- /* Sample next state type, given current state type (and current k) */
- switch (st) {
- case p7T_B:
- case p7T_M:
- switch (esl_rnd_FChoose(r, hmm->t[k], 3)) {
- case 0: st = p7T_M; break;
- case 1: st = p7T_I; break;
- case 2: st = p7T_D; break;
- default: ESL_XEXCEPTION(eslEINCONCEIVABLE, "impossible.");
- }
- break;
-
- case p7T_I:
- switch (esl_rnd_FChoose(r, hmm->t[k]+3, 2)) {
- case 0: st = p7T_M; break;
- case 1: st = p7T_I; break;
- default: ESL_XEXCEPTION(eslEINCONCEIVABLE, "impossible.");
- }
- break;
-
- case p7T_D:
- switch (esl_rnd_FChoose(r, hmm->t[k]+5, 2)) {
- case 0: st = p7T_M; break;
- case 1: st = p7T_D; break;
- default: ESL_XEXCEPTION(eslEINCONCEIVABLE, "impossible.");
- }
- break;
-
- default: ESL_XEXCEPTION(eslECORRUPT, "impossible state reached during emission");
- }
-
- /* Bump k,i if needed, depending on new state type */
- if (st == p7T_M || st == p7T_D) k++;
- if (st == p7T_M || st == p7T_I) i++;
-
- /* a transit to M_M+1 is a transit to the E state */
- if (k == hmm->M+1) {
- if (st == p7T_M) { st = p7T_E; k = 0; }
- else ESL_XEXCEPTION(eslECORRUPT, "failed to reach E state properly");
- }
-
- /* Sample new residue x if in match or insert */
- if (st == p7T_M) x = esl_rnd_FChoose(r, hmm->mat[k], hmm->abc->K);
- else if (st == p7T_I) x = esl_rnd_FChoose(r, hmm->ins[k], hmm->abc->K);
- else x = eslDSQ_SENTINEL;
-
- /* Add state to trace */
- if (tr != NULL) {
- if ((status = p7_trace_Append(tr, st, k, i)) != eslOK) goto ERROR;
- }
- /* Add x to sequence */
- if (sq != NULL && x != eslDSQ_SENTINEL)
- if ((status = esl_sq_XAddResidue(sq, x)) != eslOK) goto ERROR;
- }
-
- /* Terminate the trace and sequence (both are optional, remember) */
- if (tr != NULL) { tr->M = hmm->M; tr->L = i; }
- if (sq != NULL && (status = esl_sq_XAddResidue(sq, eslDSQ_SENTINEL)) != eslOK) goto ERROR;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-/* Function: p7_ProfileEmit()
-* Synopsis: Sample a sequence from the search form of the model.
-* Incept: SRE, Mon Jan 22 10:23:28 2007 [Janelia]
-*
-* Purpose: Sample a sequence from the implicit
-* probabilistic model of a Plan7 profile <gm>. This
-* requires also having the core probabilities of
-* the accompanying <hmm>, and the background
-* frequencies of null1 model <bg>.
-*
-* Optionally return the sequence and/or its trace in <sq>
-* and <tr>, respectively. Caller has allocated space for
-* both of these, though they may get reallocated/grown
-* here. Either can be passed as <NULL> if unneeded.
-*
-* Only the sequence field is set in the <sq>. Caller must
-* set the name, plus any other fields it wants to set. If
-* the <sq> was created in digital mode, this is the <sq->dsq>;
-* if the <sq> was created in text mode, this is <sq->seq>.
-*
-* <p7_ProfileEmit()> deliberately uses an <ESL_SQ> object
-* instead of a plain <ESL_DSQ *> or <char *> string, to
-* take advantage of the object's support for dynamic
-* reallocation of seq length, and to allow both digital and
-* text mode generation.
-*
-* Args: r - source of randomness
-* hmm - core probabilities of the profile
-* gm - configured search profile
-* sq - optRETURN: sampled sequence
-* tr - optRETURN: sampled trace
-*
-* Throws: (no abnormal error conditions)
-*/
-int
-p7_ProfileEmit(ESL_RANDOMNESS *r, const P7_HMM *hmm, const P7_PROFILE *gm, const P7_BG *bg, ESL_SQ *sq, P7_TRACE *tr)
-{
- char prv, st; /* prev, current state type */
- int k = 0; /* position in model nodes 1..M */
- int i = 0; /* position in sequence 1..L */
- int x; /* sampled residue */
- int kend = hmm->M; /* predestined end node */
- int status;
- float xt[p7P_NXSTATES][p7P_NXTRANS];
-
- /* Backcalculate the probabilities in the special states (loop and length model) */
- for (i = 0; i < p7P_NXSTATES; i++)
- for (x = 0; x < p7P_NXTRANS; x++)
- xt[i][x] = exp(gm->xsc[i][x]);
-
- if (sq != NULL) esl_sq_Reuse(sq);
- if (tr != NULL) {
- if ((status = p7_trace_Reuse(tr)) != eslOK) goto ERROR;
- if ((status = p7_trace_Append(tr, p7T_S, k, i)) != eslOK) goto ERROR;
- if ((status = p7_trace_Append(tr, p7T_N, k, i)) != eslOK) goto ERROR;
- }
- st = p7T_N;
- i = 0;
- while (st != p7T_T)
- {
- /* Sample a state transition. After this section, prv and st (prev->current state) are set;
- * k also gets set if we make a B->Mk entry transition.
- */
- prv = st;
- switch (st) {
- case p7T_B:
- if (p7_profile_IsLocal(gm))
- { /* local mode: enter the implicit profile: choose our entry and our predestined exit */
- if ((status = sample_endpoints(r, gm, &k, &kend)) != eslOK) goto ERROR;
- st = p7T_M; /* must be, because left wing is retracted */
- }
- else
- { /* glocal mode: treat B as M_0, use its transitions to MID. */
- /* FIXME: this is wrong. It should sample from B->Mk distribution! */
- switch (esl_rnd_FChoose(r, P7H_TMAT(hmm, 0), p7H_NTMAT)) {
- case 0: st = p7T_M; k = 1; break;
- case 1: st = p7T_I; k = 0; break;
- case 2: st = p7T_D; k = 1; break;
- default: ESL_XEXCEPTION(eslEINCONCEIVABLE, "impossible.");
- }
- }
- break;
-
- case p7T_M:
- if (k == kend) st = p7T_E; /* check our preordained fate */
- else {
- switch (esl_rnd_FChoose(r, P7H_TMAT(hmm, k), p7H_NTMAT)) {
- case 0: st = p7T_M; break;
- case 1: st = p7T_I; break;
- case 2: st = p7T_D; break;
- default: ESL_XEXCEPTION(eslEINCONCEIVABLE, "impossible.");
- }
- }
- break;
-
- case p7T_D:
- if (k == kend) st = p7T_E;
- else st = (esl_rnd_FChoose(r, P7H_TDEL(hmm, k), p7H_NTDEL) == 0) ? p7T_M : p7T_D;
- break;
-
- case p7T_I: st = (esl_rnd_FChoose(r, P7H_TINS(hmm, k), p7H_NTINS) == 0) ? p7T_M : p7T_I; break;
- case p7T_N: st = (esl_rnd_FChoose(r, xt[p7P_N], p7P_NXTRANS) == p7P_MOVE) ? p7T_B : p7T_N; break;
- case p7T_E: st = (esl_rnd_FChoose(r, xt[p7P_E], p7P_NXTRANS) == p7P_MOVE) ? p7T_C : p7T_J; break;
- case p7T_C: st = (esl_rnd_FChoose(r, xt[p7P_C], p7P_NXTRANS) == p7P_MOVE) ? p7T_T : p7T_C; break;
- case p7T_J: st = (esl_rnd_FChoose(r, xt[p7P_J], p7P_NXTRANS) == p7P_MOVE) ? p7T_B : p7T_J; break;
- default: ESL_XEXCEPTION(eslECORRUPT, "impossible state reached during emission");
- }
-
- /* Based on the transition we just sampled, update k. */
- if (st == p7T_E) k = 0;
- else if (st == p7T_M && prv != p7T_B) k++; /* be careful about B->Mk, where we already set k */
- else if (st == p7T_D) k++;
-
- /* Based on the transition we just sampled, generate a residue. */
- if (st == p7T_M) x = esl_rnd_FChoose(r, hmm->mat[k], hmm->abc->K);
- else if (st == p7T_I) x = esl_rnd_FChoose(r, hmm->ins[k], hmm->abc->K);
- else if ((st == p7T_N || st == p7T_C || st == p7T_J) && prv==st) x = esl_rnd_FChoose(r, bg->f, hmm->abc->K);
- else x = eslDSQ_SENTINEL;
-
- if (x != eslDSQ_SENTINEL) i++;
-
- /* Add residue (if any) to sequence */
- if (sq != NULL && x != eslDSQ_SENTINEL && (status = esl_sq_XAddResidue(sq, x)) != eslOK) goto ERROR;
-
- /* Add state to trace. */
- if (tr != NULL) {
- if ((status = p7_trace_Append(tr, st, k, i)) != eslOK) goto ERROR;
- }
- }
- /* Terminate the trace and sequence (both are optional, remember) */
- if (tr != NULL) { tr->M = hmm->M; tr->L = i; }
- if (sq != NULL && (status = esl_sq_XAddResidue(sq, eslDSQ_SENTINEL)) != eslOK) goto ERROR;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-/* Function: p7_emit_SimpleConsensus()
-* Synopsis: Generate simple consensus: ML residue in each match state
-* Incept: SRE, Mon Sep 1 09:10:47 2008 [Janelia]
-*
-* Purpose: Generate a simple consensus sequence for model <hmm>
-* consisting of the maximum probability residue in each
-* match state; store this consensus in <sq>.
-*/
-int
-p7_emit_SimpleConsensus(const P7_HMM *hmm, ESL_SQ *sq)
-{
- int k;
- int x;
- int status;
-
- if ((status = esl_sq_GrowTo(sq, hmm->M)) != eslOK) return status;
-
- for (k = 1; k <= hmm->M; k++)
- {
- x = esl_vec_FArgMax(hmm->mat[k], hmm->abc->K);
- if ((status = esl_sq_XAddResidue(sq, x)) != eslOK) return status;
- }
- if ((status = esl_sq_XAddResidue(sq, eslDSQ_SENTINEL)) != eslOK) return status;
- return eslOK;
-}
-
-
-
-/*****************************************************************
-* 2. Private functions.
-*****************************************************************/
-
-/* sample_endpoints()
-* Incept: SRE, Mon Jan 22 10:43:20 2007 [Janelia]
-*
-* Purpose: Given a profile <gm> and random number source <r>, sample
-* a begin transition from the implicit probabilistic profile
-* model, yielding a sampled start and end node; return these
-* via <ret_kstart> and <ret_kend>.
-*
-* By construction, the entry at node <kstart> is into a
-* match state, but the exit from node <kend> might turn
-* out to be from either a match or delete state.
-*
-* We assume that exits j are uniformly distributed for a
-* particular entry point i: $a_{ij} =$ constant $\forall
-* j$.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation error.
-*
-* Xref: STL11/138
-*/
-static int
-sample_endpoints(ESL_RANDOMNESS *r, const P7_PROFILE *gm, int *ret_kstart, int *ret_kend)
-{
- float *pstart = NULL;
- int k;
- int kstart, kend;
- int status;
-
- /* We have to backcalculate a probability distribution from the
- * lod B->Mk scores in a local model; this is a little time consuming,
- * but we don't have to do it often.
- */
- ESL_ALLOC_WITH_TYPE(pstart, float*, sizeof(float) * (gm->M+1));
- pstart[0] = 0.0f;
- for (k = 1; k <= gm->M; k++)
- pstart[k] = exp(p7P_TSC(gm, k-1, p7P_BM)) * (gm->M - k + 1); /* multiply p_ij by the number of exits j */
- kstart = esl_rnd_FChoose(r, pstart, gm->M+1); /* sample the starting position from that distribution */
- kend = kstart + esl_rnd_Roll(r, gm->M-kstart+1); /* and the exit uniformly from possible exits for it */
-
- free(pstart);
- *ret_kstart = kstart;
- *ret_kend = kend;
- return eslOK;
-
-ERROR:
- if (pstart != NULL) free(pstart);
- *ret_kstart = 0;
- *ret_kend = 0;
- return status;
-}
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/errors.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/errors.cpp
deleted file mode 100644
index 7afb7ae..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/errors.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-/* error handling.
-*
-* HMMER's fatal error messages distinguish between user errors
-* ("failure", with p7_Fail()) and internal faults ("death", with
-* p7_Die()). For now, though, there is no difference between the two
-* functions. Someday we might have p7_Die() print a comforting
-* apology, or provide some help on how to report bugs to us;
-* p7_Fail() might provide some pointers on where to read more
-* documentation.
-*
-* SRE, Fri Jan 12 08:46:02 2007
-* SVN $Id: errors.c 2374 2008-03-31 12:08:51Z eddys $
-*/
-
-#include "p7_config.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-
-#include "hmmer.h"
-
-/* Function: p7_Die()
-* Synopsis: Handle a fatal exception (something that's our fault)
-* Incept: SRE, Fri Jan 12 08:54:45 2007 [Janelia]
-*/
-void
-p7_Die(char *format, ...)
-{
- va_list argp;
- /* format the error mesg */
- fprintf(stderr, "\nFATAL: ");
- va_start(argp, format);
- vfprintf(stderr, format, argp);
- va_end(argp);
- fprintf(stderr, "\n");
- fflush(stderr);
- exit(1);
-}
-
-/* Function: p7_Fail()
-* Synopsis: Handle a user error (something that's the user's fault).
-* Incept: SRE, Fri Jan 12 08:54:45 2007 [Janelia]
-*/
-void
-p7_Fail(char *format, ...)
-{
- va_list argp;
- /* format the error mesg */
- fprintf(stderr, "\nError: ");
- va_start(argp, format);
- vfprintf(stderr, format, argp);
- va_end(argp);
- fprintf(stderr, "\n");
- fflush(stderr);
- exit(1);
-}
-
-
-
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/evalues.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/evalues.cpp
deleted file mode 100644
index 47258b3..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/evalues.cpp
+++ /dev/null
@@ -1,500 +0,0 @@
-/* Calculations and simulations relevant to E-value calculations.
-*
-* Contents:
-* 1. p7_Calibrate(): model calibration wrapper
-* 2. Determination of individual E-value parameters
-* 5. Copyright and license information
-*
-* SRE, Mon Aug 6 13:00:06 2007
- * SVN $Id: evalues.c 3041 2009-11-12 12:58:09Z eddys $
-*/
-#include <hmmer3/p7_config.h>
-
-#include <math.h>
-
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_gumbel.h>
-#include <hmmer3/easel/esl_random.h>
-#include <hmmer3/easel/esl_randomseq.h>
-#include <hmmer3/easel/esl_vectorops.h>
-
-#include <hmmer3/hmmer.h>
-
-/*****************************************************************
-* 1. p7_Calibrate(): model calibration wrapper
-*****************************************************************/
-
-/* Function: p7_Calibrate()
-* Synopsis: Calibrate the E-value parameters of a model.
-* Incept: SRE, Thu Dec 25 09:29:31 2008 [Magallon]
-*
-* Purpose: Calibrate the E-value parameters of a model with
-* one calculation ($\lambda$) and two brief simulations
-* (Viterbi $\mu$, Forward $\tau$).
-*
-* Args: hmm - HMM to be calibrated
-* cfg_b - OPTCFG: ptr to optional build configuration;
-* if <NULL>, use default parameters.
-* byp_rng - BYPASS optimization: pass ptr to <ESL_RANDOMNESS> generator
-* if already known;
-* <*byp_rng> == NULL> if <rng> return is desired;
-* pass <NULL> to use and discard internal default.
-* byp_bg - BYPASS optimization: pass ptr to <P7_BG> if already known;
-* <*byp_bg == NULL> if <bg> return is desired;
-* pass <NULL> to use and discard internal default.
-* byp_gm - BYPASS optimization: pass ptr to <gm> profile if already known;
-* pass <*byp_gm == NULL> if <gm> return desired;
-* pass <NULL> to use and discard internal default.
-* byp_om - BYPASS optimization: pass ptr to <om> profile if already known;
-* pass <*byp_om == NULL> if <om> return desired;
-* pass <NULL> to use and discard internal default.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure.
-* <eslEINVAL> if <hmm>, <gm>, <om> aren't compatible somehow.
-*
-* Xref: J4/41
-*/
-// ! CODE CHANGED: added taskstateinfo to function !
-int
-p7_Calibrate(P7_HMM *hmm, P7_BUILDER *cfg_b, ESL_RANDOMNESS **byp_rng, P7_BG **byp_bg, P7_PROFILE **byp_gm, P7_OPROFILE **byp_om,
- int percents, U2::TaskStateInfo & ti)
-{
- P7_BG *bg = (esl_byp_IsProvided(byp_bg) ? *byp_bg : NULL);
- P7_PROFILE *gm = (esl_byp_IsProvided(byp_gm) ? *byp_gm : NULL);
- P7_OPROFILE *om = (esl_byp_IsProvided(byp_om) ? *byp_om : NULL);
- ESL_RANDOMNESS *r = (esl_byp_IsProvided(byp_rng) ? *byp_rng : NULL);
- char *errbuf = ((cfg_b != NULL) ? cfg_b->errbuf : NULL);
- int EmL = ((cfg_b != NULL) ? cfg_b->EmL : 200);
- int EmN = ((cfg_b != NULL) ? cfg_b->EmN : 200);
- int EvL = ((cfg_b != NULL) ? cfg_b->EvL : 200);
- int EvN = ((cfg_b != NULL) ? cfg_b->EvN : 200);
- int EfL = ((cfg_b != NULL) ? cfg_b->EfL : 100);
- int EfN = ((cfg_b != NULL) ? cfg_b->EfN : 200);
- double Eft = ((cfg_b != NULL) ? cfg_b->Eft : 0.04);
- double lambda, mmu, vmu, tau;
- int status;
-
- /* Configure any objects we need
- * that weren't already passed to us as a bypass optimization
- */
- if (r == NULL) {
- if ((r = esl_randomness_CreateFast(42)) == NULL) ESL_XFAIL(eslEMEM, errbuf, "failed to create RNG");
- } else if (cfg_b != NULL && cfg_b->do_reseeding) {
- esl_randomness_Init(r, esl_randomness_GetSeed(r));
- }
-
- if (bg == NULL) {
- if ((bg = p7_bg_Create(hmm->abc)) == NULL) ESL_XFAIL(eslEMEM, errbuf, "failed to allocate background");
- }
-
- /* there's an odd case where the <om> is provided and a <gm> isn't going to be returned
- * where we don't need a <gm> at all, and <gm> stays <NULL> after the next block.
- * Note that the <EvL> length in the ProfileConfig doesn't matter; the individual
- * calibration routines MSVMu(), etc. contain their own length reconfig calls.
- */
- if ((esl_byp_IsInternal(byp_gm) && ! esl_byp_IsProvided(byp_om)) || esl_byp_IsReturned(byp_gm)) {
- if ( (gm = p7_profile_Create(hmm->M, hmm->abc)) == NULL) ESL_XFAIL(eslEMEM, errbuf, "failed to allocate profile");
- if ( (status = p7_ProfileConfig(hmm, bg, gm, EvL, p7_LOCAL)) != eslOK) ESL_XFAIL(status, errbuf, "failed to configure profile");
- }
-
- if (om == NULL) {
- if ((om = p7_oprofile_Create(hmm->M, hmm->abc)) == NULL) ESL_XFAIL(eslEMEM, errbuf, "failed to create optimized profile");
- if ((status = p7_oprofile_Convert(gm, om)) != eslOK) ESL_XFAIL(status, errbuf, "failed to convert to optimized profile");
- }
-
- /* The calibration steps themselves */
- if ((status = p7_Lambda(hmm, bg, &lambda)) != eslOK) ESL_XFAIL(status, errbuf, "failed to determine lambda");
- // ! CODE ADDED: check if task is canceled !
- if ((status = p7_MSVMu (r, om, bg, EmL, EmN, lambda, &mmu, ti )) == eslCANCELED) {
- goto ERROR;
- } else if( eslOK != status ) {
- ESL_XFAIL(status, errbuf, "failed to determine msv mu");
- }
-
- if ((status = p7_ViterbiMu(r, om, bg, EvL, EvN, lambda, &vmu, ti )) == eslCANCELED) {
- goto ERROR;
- } else if( eslOK != status ) {
- ESL_XFAIL(status, errbuf, "failed to determine vit mu");
- }
- if ((status = p7_Tau(r, om, bg, EfL, EfN, lambda, Eft, &tau, percents, ti )) == eslCANCELED) {
- goto ERROR;
- } else if( eslOK != status ) {
- ESL_XFAIL(status, errbuf, "failed to determine fwd tau");
- }
-
- /* Store results */
- hmm->evparam[p7_MLAMBDA] = om->evparam[p7_MLAMBDA] = lambda;
- hmm->evparam[p7_VLAMBDA] = om->evparam[p7_VLAMBDA] = lambda;
- hmm->evparam[p7_FLAMBDA] = om->evparam[p7_FLAMBDA] = lambda;
- hmm->evparam[p7_MMU] = om->evparam[p7_MMU] = mmu;
- hmm->evparam[p7_VMU] = om->evparam[p7_VMU] = vmu;
- hmm->evparam[p7_FTAU] = om->evparam[p7_FTAU] = tau;
- hmm->flags |= p7H_STATS;
-
- if (gm != NULL) {
- gm->evparam[p7_MLAMBDA] = lambda;
- gm->evparam[p7_VLAMBDA] = lambda;
- gm->evparam[p7_FLAMBDA] = lambda;
- gm->evparam[p7_MMU] = mmu;
- gm->evparam[p7_VMU] = vmu;
- gm->evparam[p7_FTAU] = tau;
- }
-
- if (byp_rng != NULL) *byp_rng = r; else esl_randomness_Destroy(r); /* bypass convention: no-op if rng was provided.*/
- if (byp_bg != NULL) *byp_bg = bg; else p7_bg_Destroy(bg); /* bypass convention: no-op if bg was provided. */
- if (byp_gm != NULL) *byp_gm = gm; else p7_profile_Destroy(gm); /* bypass convention: no-op if gm was provided. */
- if (byp_om != NULL) *byp_om = om; else p7_oprofile_Destroy(om); /* bypass convention: no-op if om was provided. */
- return eslOK;
-
-ERROR:
- if (! esl_byp_IsProvided(byp_rng)) esl_randomness_Destroy(r);
- if (! esl_byp_IsProvided(byp_bg)) p7_bg_Destroy(bg);
- if (! esl_byp_IsProvided(byp_gm)) p7_profile_Destroy(gm);
- if (! esl_byp_IsProvided(byp_om)) p7_oprofile_Destroy(om);
- return status;
-}
-/*---------------------- end, wrapper API -----------------------*/
-
-
-
-
-
-/*****************************************************************
-* 2. Determination of individual E-value parameters
-*****************************************************************/
-
-/* Function: p7_Lambda()
-* Synopsis: Determines length-corrected local lambda parameter.
-* Incept: SRE, Wed Aug 8 17:54:55 2007 [Janelia]
-*
-* Purpose: Determine the effective scale parameter $\hat{\lambda}$ to
-* use for model <hmm>. This will be applied both to
-* Viterbi Gumbel distributions and Forward exponential
-* tails.
-*
-* The 'true' $\lambda$ is always $\log 2 = 0.693$. The effective
-* lambda is corrected for edge effect, using the equation
-*
-* \[
-* \hat{\lambda} = \lambda + \frac{1.44}{MH}
-* \]
-*
-* where $M$ is the model length and $H$ is the model
-* relative entropy. The model relative entropy is
-* approximated by the average relative entropy of match
-* emission distributions. The 1.44 is an empirically
-* determined fudge factor [J1/125]. This edge-effect
-* correction is based largely on \citep{Altschul01},
-* except for the fudge factor, which we don't understand
-* and can't theoretically justify.
-*
-* Args: hmm : model to calculate corrected lambda for
-* bg : null model (source of background frequencies)
-* ret_lambda : RETURN: edge-corrected lambda
-*
-* Returns: <eslOK> on success, and <*ret_lambda> is the result.
-*
-* Throws: (no abnormal error conditions)
-*/
-int
-p7_Lambda(P7_HMM *hmm, P7_BG *bg, double *ret_lambda)
-{
- double H = p7_MeanMatchRelativeEntropy(hmm, bg);
-
- *ret_lambda = eslCONST_LOG2 + 1.44 / ((double) hmm->M * H);
- return eslOK;
-}
-
-
-/* Function: p7_MSVMu()
-* Synopsis: Determines the local MSV Gumbel mu parameter for a model.
-* Incept: SRE, Mon Aug 6 13:00:57 2007 [Janelia]
-*
-* Purpose: Given model <om> configured for local alignment (typically
-* multihit, but may be unihit), determine the Gumbel
-* location parameter $\mu$ for MSV scores by brief simulation. The
-* simulation generates <N> random sequences of length <L>
-* using background frequencies in the null model <bg> and
-* the random number generator <r>; scores them with <gm>
-* and <bg> with the MSV algorithm; and fits the
-* resulting distribution to a Gumbel of assumed <lambda>.
-*
-* Typical default choices are L=100, N=200, which gives
-* $\hat{\mu}$ estimates with precision (standard
-* deviation) of $\pm$ 0.1 bits, corresponding to an error
-* of $\pm$ 8\% in E-value estimates. [J1/135]. (Default L
-* was later increased to 200 to improve length dependence
-* slightly.)
-*
-* This function changes the length configuration of both
-* <om> and <bg>. The caller must remember to reconfigure
-* both of their length models appropriately for any
-* subsequent alignments.
-*
-* Args: r : source of random numbers
-* om : score profile (length config is changed upon return!)
-* bg : null model (length config is changed upon return!)
-* L : length of sequences to simulate
-* N : number of sequences to simulate
-* lambda : known Gumbel lambda parameter
-* ret_mmu : RETURN: ML estimate of location param mu
-*
-* Returns: <eslOK> on success, and <ret_mu> contains the ML estimate
-* of $\mu$.
-*
-* Throws: (no abnormal error conditions)
-*
-* Note: The FitCompleteLoc() function is simple, and it's tempting
-* to inline it here and save the <xv> working memory. However,
-* the FitCompleteLoc() function is vulnerable
-* to under/overflow error, and we'll probably fix it
-* eventually - need to be sure that fix applies here too.
-*/
-int
-p7_MSVMu(ESL_RANDOMNESS *r, P7_OPROFILE *om, P7_BG *bg, int L, int N, double lambda, double *ret_mmu, U2::TaskStateInfo & ti )
-{
- P7_OMX *ox = p7_omx_Create(om->M, 0, 0); /* DP matrix: 1 row version */
- ESL_DSQ *dsq = NULL;
- double *xv = NULL;
- int i;
- float sc, nullsc;
-#ifndef p7_IMPL_DUMMY
- float maxsc = (255 - om->base_b) / om->scale_b; /* if score overflows, use this */
-#endif
- int status;
-
- if (ox == NULL) { status = eslEMEM; goto ERROR; }
- ESL_ALLOC_WITH_TYPE(xv, double*, sizeof(double) * N);
- ESL_ALLOC_WITH_TYPE(dsq, ESL_DSQ*, sizeof(ESL_DSQ) * (L+2));
-
- p7_oprofile_ReconfigLength(om, L, L);
- p7_bg_SetLength(bg, L);
-
- for (i = 0; i < N; i++)
- {
- if ((status = esl_rsq_xfIID(r, bg->f, om->abc->K, L, dsq)) != eslOK) goto ERROR;
- if ((status = p7_bg_NullOne(bg, dsq, L, &nullsc)) != eslOK) goto ERROR;
-
- status = p7_MSVFilter(dsq, L, om, ox, &sc, 0, ti );
- if( eslCANCELED == status ) { goto ERROR; }
-#ifndef p7_IMPL_DUMMY
- if (status == eslERANGE) { sc = maxsc; status = eslOK;}
-#endif
- if (status != eslOK) goto ERROR;
-
- xv[i] = (sc - nullsc) / eslCONST_LOG2;
- }
-
- if ((status = esl_gumbel_FitCompleteLoc(xv, N, lambda, ret_mmu)) != eslOK) goto ERROR;
- p7_omx_Destroy(ox);
- free(xv);
- free(dsq);
- return eslOK;
-
-ERROR:
- *ret_mmu = 0.0;
- if (ox != NULL) p7_omx_Destroy(ox);
- if (xv != NULL) free(xv);
- if (dsq != NULL) free(dsq);
- return status;
-
-}
-
-/* Function: p7_ViterbiMu()
-* Synopsis: Determines the local Viterbi Gumbel mu parameter for a model.
-* Incept: SRE, Tue May 19 10:26:19 2009 [Janelia]
-*
-* Purpose: Identical to p7_MSVMu(), above, except that it fits
-* Viterbi scores instead of MSV scores.
-*
-* The difference between the two mus is small, but can be
-* up to ~1 bit or so for large, low-info models [J4/126] so
-* decided to calibrate the two mus separately [J5/8].
-*
-* Args: r : source of random numbers
-* om : score profile (length config is changed upon return!)
-* bg : null model (length config is changed upon return!)
-* L : length of sequences to simulate
-* N : number of sequences to simulate
-* lambda : known Gumbel lambda parameter
-* ret_vmu : RETURN: ML estimate of location param mu
-*
-* Returns: <eslOK> on success, and <ret_mu> contains the ML estimate
-* of $\mu$.
-*
-* Throws: (no abnormal error conditions)
-*/
-int
-p7_ViterbiMu(ESL_RANDOMNESS *r, P7_OPROFILE *om, P7_BG *bg, int L, int N, double lambda, double *ret_vmu, U2::TaskStateInfo & ti )
-{
- P7_OMX *ox = p7_omx_Create(om->M, 0, 0); /* DP matrix: 1 row version */
- ESL_DSQ *dsq = NULL;
- double *xv = NULL;
- int i;
- float sc, nullsc;
-#ifndef p7_IMPL_DUMMY
- float maxsc = (32767.0 - om->base_w) / om->scale_w; /* if score overflows, use this [J4/139] */
-#endif
- int status;
-
- if (ox == NULL) { status = eslEMEM; goto ERROR; }
- ESL_ALLOC_WITH_TYPE(xv, double*, sizeof(double) * N);
- ESL_ALLOC_WITH_TYPE(dsq, ESL_DSQ*, sizeof(ESL_DSQ) * (L+2));
-
- p7_oprofile_ReconfigLength(om, L, L);
- p7_bg_SetLength(bg, L);
-
- for (i = 0; i < N; i++)
- {
- if ((status = esl_rsq_xfIID(r, bg->f, om->abc->K, L, dsq)) != eslOK) goto ERROR;
- if ((status = p7_bg_NullOne(bg, dsq, L, &nullsc)) != eslOK) goto ERROR;
-
- status = p7_ViterbiFilter(dsq, L, om, ox, &sc, 0, ti );
- if( eslCANCELED == status ) { goto ERROR; }
-#ifndef p7_IMPL_DUMMY
- if (status == eslERANGE) { sc = maxsc; status = eslOK;}
-#endif
- if (status != eslOK) goto ERROR;
-
- xv[i] = (sc - nullsc) / eslCONST_LOG2;
- }
-
- if ((status = esl_gumbel_FitCompleteLoc(xv, N, lambda, ret_vmu)) != eslOK) goto ERROR;
- p7_omx_Destroy(ox);
- free(xv);
- free(dsq);
- return eslOK;
-
-ERROR:
- *ret_vmu = 0.0;
- if (ox != NULL) p7_omx_Destroy(ox);
- if (xv != NULL) free(xv);
- if (dsq != NULL) free(dsq);
- return status;
-
-}
-
-
-/* Function: p7_Tau()
-* Synopsis: Determine Forward tau by brief simulation.
-* Incept: SRE, Thu Aug 9 15:08:39 2007 [Janelia]
-*
-* Purpose: Determine the <tau> parameter for an exponential tail fit
-* to the Forward score distribution for model <om>, on
-* random sequences with the composition of the background
-* model <bg>. This <tau> parameter is for an exponential
-* distribution anchored from $P=1.0$, so it's not really a
-* tail per se; but it's only an accurate fit in the tail
-* of the Forward score distribution, from about $P=0.001$
-* or so.
-*
-* The determination of <tau> is done by a brief simulation
-* in which we fit a Gumbel distribution to a small number
-* of Forward scores of random sequences, and use that to
-* predict the location of the tail at probability <tailp>.
-*
-* The Gumbel is of course inaccurate, but we can use it
-* here solely as an empirical distribution to determine
-* the location of a reasonable <tau> more accurately on a
-* smaller number of samples than we could do with raw
-* order statistics.
-*
-* Typical choices are L=100, N=200, tailp=0.04, which
-* typically yield estimates $\hat{\mu}$ with a precision
-* (standard deviation) of $\pm$ 0.2 bits, corresponding to
-* a $\pm$ 15\% error in E-values. See [J1/135].
-*
-* The use of Gumbel fitting to a small number of $N$
-* samples and the extrapolation of $\hat{\mu}$ from the
-* estimated location of the 0.04 tail mass are both
-* empirical and carefully optimized against several
-* tradeoffs. Most importantly, around this choice of tail
-* probability, a systematic error introduced by the use of
-* the Gumbel fit is being cancelled by systematic error
-* introduced by the use of a higher tail probability than
-* the regime in which the exponential tail is a valid
-* approximation. See [J1/135] for discussion.
-*
-* This function changes the length configuration of both
-* <om> and <bg>. The caller must remember to reconfigure
-* both of their length models appropriately for any
-* subsequent alignments.
-*
-* Args: r : source of randomness
-* om : configured profile to sample sequences from
-* bg : null model (for background residue frequencies)
-* L : mean length model for seq emission from profile
-* N : number of sequences to generate
-* lambda : expected slope of the exponential tail (from p7_Lambda())
-* tailp : tail mass from which we will extrapolate mu
-* ret_mu : RETURN: estimate for the Forward mu (base of exponential tail)
-*
-* Returns: <eslOK> on success, and <*ret_fv> is the score difference
-* in bits.
-*
-* Throws: <eslEMEM> on allocation error, and <*ret_fv> is 0.
-*/
-int
-p7_Tau(ESL_RANDOMNESS *r, P7_OPROFILE *om, P7_BG *bg, int L, int N, double lambda, double tailp, double *ret_tau,
- int percents, U2::TaskStateInfo & ti)
-{
- P7_OMX *ox = p7_omx_Create(om->M, 0, L); /* DP matrix: for ForwardParser, L rows */
- ESL_DSQ *dsq = NULL;
- double *xv = NULL;
- float fsc, nullsc;
- double gmu, glam;
- int status;
- int i;
- int progressStart = 0;
-
- ESL_ALLOC_WITH_TYPE(xv, double*, sizeof(double) * N);
- ESL_ALLOC_WITH_TYPE(dsq, ESL_DSQ*, sizeof(ESL_DSQ) * (L+2));
- if (ox == NULL) { status = eslEMEM; goto ERROR; }
-
- p7_oprofile_ReconfigLength(om, L, L);
- p7_bg_SetLength(bg, L);
-
- progressStart = ti.progress;
- for (i = 0; i < N; i++)
- {
- ti.progress = progressStart + (int)(((double)percents / N) * i);
- if ((status = esl_rsq_xfIID(r, bg->f, om->abc->K, L, dsq)) != eslOK) goto ERROR;
- if ((status = p7_ForwardParser(dsq, L, om, ox, &fsc, 0, ti )) != eslOK) goto ERROR;
- if ((status = p7_bg_NullOne(bg, dsq, L, &nullsc)) != eslOK) goto ERROR;
- xv[i] = (fsc - nullsc) / eslCONST_LOG2;
- }
- if ((status = esl_gumbel_FitComplete(xv, N, &gmu, &glam)) != eslOK) goto ERROR;
-
- /* Explanation of the eqn below: first find the x at which the Gumbel tail
- * mass is predicted to be equal to tailp. Then back up from that x
- * by log(tailp)/lambda to set the origin of the exponential tail to 1.0
- * instead of tailp.
- */
- *ret_tau = esl_gumbel_invcdf(1.0-tailp, gmu, glam) + (log(tailp) / lambda);
-
- free(xv);
- free(dsq);
- p7_omx_Destroy(ox);
- return eslOK;
-
-ERROR:
- *ret_tau = 0.;
- if (xv != NULL) free(xv);
- if (dsq != NULL) free(dsq);
- if (ox != NULL) p7_omx_Destroy(ox);
- return status;
-}
-/*-------------- end, determining individual parameters ---------*/
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/eweight.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/eweight.cpp
deleted file mode 100644
index 53e0c42..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/eweight.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-/* "Entropy weighting" to determine absolute sequence number to use in hmmbuild.
-*
-* Reference:
-* L. Steven Johnson, "Remote Protein Homology Detection Using Hidden Markov Models",
-* Ph.D. thesis, Washington University School of Medicine, 2006.
-*
-* SRE, Fri May 4 14:01:54 2007 [Janelia] [Tom Waits, Orphans]
-* SVN $Id: eweight.c 2824 2009-06-10 16:06:06Z eddys $
-*/
-
-#include <hmmer3/p7_config.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_rootfinder.h>
-
-#include <hmmer3/hmmer.h>
-
-struct ew_param_s {
- const P7_HMM *hmm; /* ptr to the original count-based HMM, which remains unchanged */
- const P7_BG *bg; /* ptr to the null model */
- const P7_PRIOR *pri; /* Dirichlet prior used to parameterize from counts */
- P7_HMM *h2; /* our working space: a copy of <hmm> that we can muck with */
- double etarget; /* information content target, in bits */
-};
-
-/* Evaluate fx = rel entropy - etarget, which we want to be = 0,
-* for effective sequence number <x>.
-*/
-static int
-eweight_target_f(double Neff, void *params, double *ret_fx)
-{
- struct ew_param_s *p = (struct ew_param_s *) params;
-
- p7_hmm_CopyParameters(p->hmm, p->h2);
- p7_hmm_Scale(p->h2, Neff / (double) p->h2->nseq);
- p7_ParameterEstimation(p->h2, p->pri);
- *ret_fx = p7_MeanMatchRelativeEntropy(p->h2, p->bg) - p->etarget;
- return eslOK;
-}
-
-/* Function: p7_EntropyWeight()
-* Incept: SRE, Fri May 4 15:32:59 2007 [Janelia]
-*
-* Purpose: Use the "entropy weighting" algorithm to determine
-* what effective sequence number we should use, and
-* return it in <ret_Neff>.
-*
-* Caller provides a count-based <hmm>, and the
-* Dirichlet prior <pri> that's to be used to parameterize
-* models; neither of these will be modified.
-* Caller also provides the relative entropy
-* target in bits in <etarget>.
-*
-* <ret_Neff> will range from 0 to the true number of
-* sequences counted into the model, <hmm->nseq>.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure.
-*/
-int
-p7_EntropyWeight(const P7_HMM *hmm, const P7_BG *bg, const P7_PRIOR *pri, double etarget, double *ret_Neff)
-{
- int status;
- ESL_ROOTFINDER *R = NULL;
- struct ew_param_s p;
- double Neff;
- double fx;
-
- /* Store parameters in the structure we'll pass to the rootfinder
- */
- p.hmm = hmm;
- p.bg = bg;
- p.pri = pri;
- if ((p.h2 = p7_hmm_Clone(hmm)) == NULL) return eslEMEM;
- p.etarget = etarget;
-
- Neff = (double) hmm->nseq;
- if ((status = eweight_target_f(Neff, &p, &fx)) != eslOK) goto ERROR;
- if (fx > 0.)
- {
- if ((R = esl_rootfinder_Create(eweight_target_f, &p)) == NULL) {status = eslEMEM; goto ERROR;}
- esl_rootfinder_SetAbsoluteTolerance(R, 0.01); /* getting Neff to ~2 sig digits is fine */
- if ((status = esl_root_Bisection(R, 0., (double) hmm->nseq, &Neff)) != eslOK) goto ERROR;
-
- esl_rootfinder_Destroy(R);
- }
-
- p7_hmm_Destroy(p.h2);
- *ret_Neff = Neff;
- return eslOK;
-
-ERROR:
- if (p.h2 != NULL) p7_hmm_Destroy(p.h2);
- if (R != NULL) esl_rootfinder_Destroy(R);
- *ret_Neff = (double) hmm->nseq;
- return status;
-}
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/hmmer.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/hmmer.cpp
deleted file mode 100644
index 85d2455..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/hmmer.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-/* General routines used throughout HMMER.
-*
-* SRE, Fri Jan 12 13:19:38 2007 [Janelia] [Franz Ferdinand, eponymous]
-* SVN $Id: hmmer.c 2821 2009-06-05 11:13:23Z eddys $
-*/
-
-#include <hmmer3/p7_config.h>
-
-#include <math.h>
-#include <float.h>
-#include <assert.h>
-
-#include <hmmer3/easel/easel.h>
-#include "hmmer.h"
-
-
-/*****************************************************************
-* 1. Miscellaneous functions for H3
-*****************************************************************/
-
-// ! here were a banner function. we don't need it !
-
-
-/* Function: p7_AminoFrequencies()
-* Incept: SRE, Fri Jan 12 13:46:41 2007 [Janelia]
-*
-* Purpose: Fills a vector <f> with amino acid background frequencies,
-* in [A..Y] alphabetic order, same order that Easel digital
-* alphabet uses. Caller must provide <f> allocated for at
-* least 20 floats.
-*
-* These were updated 4 Sept 2007, from SwissProt 50.8,
-* (Oct 2006), counting over 85956127 (86.0M) residues.
-*
-* Returns: <eslOK> on success.
-*/
-int
-p7_AminoFrequencies(float *f)
-{
- f[0] = 0.0787945; /* A */
- f[1] = 0.0151600; /* C */
- f[2] = 0.0535222; /* D */
- f[3] = 0.0668298; /* E */
- f[4] = 0.0397062; /* F */
- f[5] = 0.0695071; /* G */
- f[6] = 0.0229198; /* H */
- f[7] = 0.0590092; /* I */
- f[8] = 0.0594422; /* K */
- f[9] = 0.0963728; /* L */
- f[10]= 0.0237718; /* M */
- f[11]= 0.0414386; /* N */
- f[12]= 0.0482904; /* P */
- f[13]= 0.0395639; /* Q */
- f[14]= 0.0540978; /* R */
- f[15]= 0.0683364; /* S */
- f[16]= 0.0540687; /* T */
- f[17]= 0.0673417; /* V */
- f[18]= 0.0114135; /* W */
- f[19]= 0.0304133; /* Y */
- return eslOK;
-}
-
-// ugene's funvtions for working with task settings
-// TODO: remove to another source file?
-
-void setDefaultUHMM3SearchSettings( UHMM3SearchSettings* it ) {
- assert( NULL != it );
- it->e = 10.0;
- it->t = OPTION_NOT_SET;
- it->z = OPTION_NOT_SET;
- it->domE = 10.0;
- it->domT = OPTION_NOT_SET;
- it->domZ = OPTION_NOT_SET;
- it->useBitCutoffs = -1;
- it->incE = 0.01;
- it->incT = OPTION_NOT_SET;
- it->incDomE = 0.01;
- it->incDomT = OPTION_NOT_SET;
- it->f1 = 0.02;
- it->f2 = 1e-3;
- it->f3 = 1e-5;
- it->doMax = FALSE;
- it->noBiasFilter = FALSE;
- it->noNull2 = FALSE;
- it->seed = 42;
-}
-
-bool checkUHMM3SearchSettings( UHMM3SearchSettings * it ) {
- assert( NULL != it );
-
- if( !( 0 < it->e ) ) { return false; }
- if( !( 0 < it->t || OPTION_NOT_SET == it->t ) ) { return false; }
- if( !( 0 < it->z || OPTION_NOT_SET == it->z ) ) { return false; }
- if( !( 0 < it->domE ) ) { return false; }
- if( !( 0 < it->domT || OPTION_NOT_SET == it->domT ) ) { return false; }
- if( !( 0 < it->domZ || OPTION_NOT_SET == it->domZ ) ) { return false; }
- if( !( 0 < it->incE ) ) { return false; }
- if( !( 0 < it->incT || OPTION_NOT_SET == it->incT ) ) { return false; }
- if( !( p7H_GA == it->useBitCutoffs || p7H_TC == it->useBitCutoffs || p7H_NC == it->useBitCutoffs
- || -1 == it->useBitCutoffs ) ) { return false; }
- if( !( 0 < it->incDomE ) ) { return false; }
- if( !( 0 < it->incDomT || OPTION_NOT_SET == it->incDomT ) ) { return false; }
- if( !( it->doMax == TRUE || it->doMax == FALSE ) ) { return false; }
- if( !( it->noBiasFilter == TRUE || it->noBiasFilter == FALSE ) ) { return false; }
- if( !( it->noNull2 == TRUE || it->noNull2 == FALSE ) ) { return false; }
- if( !( 0 <= it->seed ) ) { return false; }
-
- return true;
-}
-
-void setDefaultUHMM3BuildSettings( UHMM3BuildSettings * it ) {
- assert( NULL != it );
- it->archStrategy = p7_ARCH_FAST;
- it->wgtStrategy = p7_WGT_PB;
- it->effnStrategy = p7_EFFN_ENTROPY;
-
- it->eset = -1.0;
- it->seed = 42;
-
- it->symfrac = 0.5;
- it->fragtresh = 0.5;
- it->wid = 0.62;
- it->ere = -1.0;
- it->esigma = 45.0;
- it->eid = 0.62;
- it->eml = 200;
- it->emn = 200;
- it->evl = 200;
- it->evn = 200;
- it->efl = 100;
- it->efn = 200;
- it->eft = 0.04;
-}
-
-bool checkUHMM3BuildSettings( UHMM3BuildSettings * it ) {
- assert( NULL != it );
-
- if( it->archStrategy != p7_ARCH_FAST && it->archStrategy != p7_ARCH_HAND ) {
- return false;
- }
- if( it->wgtStrategy != p7_WGT_GSC && it->wgtStrategy != p7_WGT_BLOSUM && it->wgtStrategy != p7_WGT_PB &&
- it->wgtStrategy != p7_WGT_NONE && it->wgtStrategy != p7_WGT_GIVEN ) {
- return false;
- }
- if( it->effnStrategy != p7_EFFN_ENTROPY && it->effnStrategy != p7_EFFN_CLUST &&
- it->effnStrategy != p7_EFFN_NONE && it->effnStrategy != p7_EFFN_SET ) {
- return false;
- }
-
- if( !(0 <= it->symfrac && it->symfrac <= 1 ) ) { return false; }
- if( !(0 <= it->wid && it->wid <= 1 ) ) { return false; }
- if( !( 0 < it->eset ) ) { return false; }
- if( !( -1 == it->ere || 0 < it->ere ) ) { return false; }
- if( !( 0 <= it->fragtresh && it->fragtresh <= 1 ) ) { return false; }
- if( !( 0 < it->esigma ) ) { return false; }
- if( !(0 <= it->eid && it->eid <= 1 ) ) { return false; }
- if( !( 0 < it->eml ) ) { return false; }
- if( !( 0 < it->emn ) ) { return false; }
- if( !( 0 < it->evl ) ) { return false; }
- if( !( 0 < it->evn ) ) { return false; }
- if( !( 0 < it->efl ) ) { return false; }
- if( !( 0 < it->efn ) ) { return false; }
- if( !(0 < it->wid && it->wid < 1 ) ) { return false; }
- if( !( 0 < it->seed ) ) { return false; }
-
- return true;
-}
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/hmmer.h b/src/plugins_3rdparty/hmm3/src/hmmer3/hmmer.h
deleted file mode 100644
index 00a0b98..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/hmmer.h
+++ /dev/null
@@ -1,1149 +0,0 @@
-/* The all-encompassing include file for HMMER.
-* All-encompassing because there's a lot of crossdependency.
-* There's some opportunity for modularity, but not a lot.
-*
-* 1. P7_HMM: a core model.
-* 2. P7_PROFILE: a scoring profile, and its implicit model.
-* 3. P7_BG: a null (background) model.
-* 4. P7_TRACE: a traceback path (alignment of seq to profile).
-* 5. P7_HMMFILE: an HMM save file or database, open for reading.
-* 6. P7_GMX: a "generic" dynamic programming matrix
-* 7. P7_PRIOR: mixture Dirichlet prior for profile HMMs
-* 8. P7_SPENSEMBLE: segment pair ensembles for domain locations
-* 9. P7_ALIDISPLAY: an alignment formatted for printing
-* 10. P7_DOMAINDEF: reusably managing workflow in annotating domains
-* 11. P7_TOPHITS: ranking lists of top-scoring hits
-* 12. Inclusion of the architecture-specific optimized implementation.
-* 13. P7_PIPELINE: H3's accelerated seq/profile comparison pipeline
-* 14. P7_BUILDER: configuration options for new HMM construction.
-* 15. Declaration of functions in HMMER's exposed API.
-* 16. Copyright and license information.
-*
-* Also, see impl_{sse,vmx}/impl_{sse,vmx}.h for additional API
-* specific to the acceleration layer; in particular, the P7_OPROFILE
-* structure for an optimized profile.
-*
-* SRE, Wed Jan 3 13:46:42 2007 [Janelia] [Philip Glass, The Fog of War]
- * SVN $Id: hmmer.h 3048 2009-11-13 14:11:46Z eddys $
-*/
-#ifndef P7_HMMERH_INCLUDED
-#define P7_HMMERH_INCLUDED
-
-#include <hmmer3/p7_config.h>
-
-#include <stdio.h> /* FILE */
-
-#ifdef HAVE_MPI
-#include "mpi.h"
-#endif
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_alphabet.h> /* ESL_DSQ, ESL_ALPHABET */
-#include <hmmer3/easel/esl_dirichlet.h> /* ESL_MIXDCHLET */
-#include <hmmer3/easel/esl_dmatrix.h> /* ESL_DMATRIX */
-//#include "esl_histogram.h" /* ESL_HISTOGRAM */
-#include <hmmer3/easel/esl_hmm.h> /* ESL_HMM */
-#include <hmmer3/easel/esl_msa.h> /* ESL_MSA */
-#include <hmmer3/easel/esl_random.h> /* ESL_RANDOMNESS */
-#include <hmmer3/easel/esl_sq.h> /* ESL_SQ */
-#include <hmmer3/easel/esl_scorematrix.h> /* ESL_SCOREMATRIX */
-
-#include <U2Core/Task.h>
-
-#define HMMER3_CANCELED_ERROR "Task was canceled by user"
-#define HMMER3_UNKNOWN_ERROR "Sorry! Unknown error occurred!"
-
-/* Search modes. */
-#define p7_NO_MODE 0
-#define p7_LOCAL 1 /* multihit local: "fs" mode */
-#define p7_GLOCAL 2 /* multihit glocal: "ls" mode */
-#define p7_UNILOCAL 3 /* unihit local: "sw" mode */
-#define p7_UNIGLOCAL 4 /* unihit glocal: "s" mode */
-
-#define p7_IsLocal(mode) (mode == p7_LOCAL || mode == p7_UNILOCAL)
-#define p7_IsMulti(mode) (mode == p7_LOCAL || mode == p7_GLOCAL)
-
-#define p7_NEVPARAM 6 /* number of statistical parameters stored in models */
-#define p7_NCUTOFFS 6 /* number of Pfam score cutoffs stored in models */
-#define p7_NOFFSETS 3 /* number of disk offsets stored in models for hmmscan's fast model input */
-enum p7_evparams_e { p7_MMU = 0, p7_MLAMBDA = 1, p7_VMU = 2, p7_VLAMBDA = 3, p7_FTAU = 4, p7_FLAMBDA = 5 };
-enum p7_cutoffs_e { p7_GA1 = 0, p7_GA2 = 1, p7_TC1 = 2, p7_TC2 = 3, p7_NC1 = 4, p7_NC2 = 5 };
-enum p7_offsets_e { p7_MOFFSET = 0, p7_FOFFSET = 1, p7_POFFSET = 2 };
-
-#define p7_EVPARAM_UNSET -99999.0f /* if evparam[0] is unset, then all unset */
-#define p7_CUTOFF_UNSET -99999.0f /* if cutoff[XX1] is unset, then cutoff[XX2] unset, XX={GA,TC,NC} */
-#define p7_COMPO_UNSET -1.0f /* if compo[0] is unset, then all unset */
-
-/* Option flags when creating multiple alignments with p7_tracealign_*() */
-#define p7_DEFAULT 0
-#define p7_DIGITIZE (1<<0)
-#define p7_ALL_CONSENSUS_COLS (1<<1)
-#define p7_TRIM (1<<2)
-
-/* Option flags when creating faux traces with p7_trace_FauxFromMSA() */
-#define p7_MSA_COORDS (1<<0) /* default: i = unaligned seq residue coords */
-
-/*****************************************************************
-* 1. P7_HMM: a core model.
-*****************************************************************/
-
-/* Bit flags used in <hmm->flags>: optional annotation in an HMM
-*
-* Flags marked with ! may not be changed nor used for other meanings,
-* because they're codes used by HMMER2 (and earlier) that must be
-* preserved for reverse compatibility with old HMMER files.
-*
-* Why use flags? (So I don't ask this question of myself again:)
-* 1. The way we allocate an HMM, we need to know if we're allocating
-* M-width annotation fields (RF, CS, CA, MAP) before we read the
-* annotation from the file.
-* 2. Historically, H2 used flags, so we still need to read H2 flags
-* for backwards compatibility; so we may as well keep using them.
-*/
-#define p7H_HASBITS (1<<0) /* obsolete (was: model has log-odds scores) !*/
-#define p7H_DESC (1<<1) /* description exists !*/
-#define p7H_RF (1<<2) /* #RF annotation available !*/
-#define p7H_CS (1<<3) /* #CS annotation available !*/
-#define p7H_XRAY (1<<4) /* obsolete (was: structural data available) !*/
-#define p7H_HASPROB (1<<5) /* obsolete (was: model in probability form) !*/
-#define p7H_HASDNA (1<<6) /* obsolete (was: protein HMM->DNA seq params set) !*/
-#define p7H_STATS (1<<7) /* model has E-value statistics calibrated !*/
-#define p7H_MAP (1<<8) /* alignment map is available !*/
-#define p7H_ACC (1<<9) /* accession number is available !*/
-#define p7H_GA (1<<10) /* gathering thresholds available !*/
-#define p7H_TC (1<<11) /* trusted cutoffs available !*/
-#define p7H_NC (1<<12) /* noise cutoffs available !*/
-#define p7H_CA (1<<13) /* surface accessibilities available !*/
-#define p7H_COMPO (1<<14) /* model-specific residue composition available */
-#define p7H_CHKSUM (1<<15) /* model has an alignment checksum */
-
-
-/* Indices of Plan7 main model state transitions, hmm->t[k][] */
-enum p7h_transitions_e {
- p7H_MM = 0,
- p7H_MI = 1,
- p7H_MD = 2,
- p7H_IM = 3,
- p7H_II = 4,
- p7H_DM = 5,
- p7H_DD = 6
-};
-#define p7H_NTRANSITIONS 7
-
-/* How the hmm->t[k] vector is interpreted as separate probability vectors. */
-#define P7H_TMAT(hmm, k) ((hmm)->t[k])
-#define P7H_TINS(hmm, k) ((hmm)->t[k]+3)
-#define P7H_TDEL(hmm, k) ((hmm)->t[k]+5)
-#define p7H_NTMAT 3
-#define p7H_NTDEL 2
-#define p7H_NTINS 2
-
-/* Some notes:
-* 0. The model might be either in counts or probability form.
-* 1. t[0] is special: t[0][TMM,TMI,TMD] are the begin->M_1,I_0,D_1 entry probabilities,
-* t[0][TIM,TII] are the I_0 transitions, and delete state 0 doesn't
-* exist. Therefore D[0] transitions and mat[0] emissions are unused.
-* To simplify some normalization code, we adopt a convention that these are set
-* to valid probability distributions: 1.0 for t[0][TDM] and mat[0][0],
-* and 0 for the rest.
-* 2. t[M] is also special: TMD and TDD are 0 because there is no next delete state;
-* TDM is therefore 1.0 by definition. TMM and TDM are interpreted as the
-* M->E and D->E end transitions. t[M][TDM] must be 1.0, therefore.
-*/
-typedef struct p7_hmm_s {
- /*::cexcerpt::plan7_core::begin::*/
- int M; /* length of the model (# nodes) */
- float **t; /* transition prob's. t[(0),1..M][0..p7H_NTRANSITIONS-1] */
- float **mat; /* match emissions. mat[1..M][0..K-1] */
- float **ins; /* insert emissions. ins[1..M][0..K-1] */
- /*::cexcerpt::plan7_core::end::*/
-
- /* Annotation. Everything but <name> is optional. Flags are set when
- * optional values are set. All the char *'s are proper nul-terminated
- * strings, not just arrays. (hmm->map is an int array).
- */
- char *name; /* name of the model (mandatory) */ /* String, \0-terminated */
- char *acc; /* accession number of model (Pfam) (p7H_ACC) */ /* String, \0-terminated */
- char *desc; /* brief (1-line) description of model (p7H_DESC) */ /* String, \0-terminated */
- char *rf; /* reference line from alignment 1..M (p7H_RF) */ /* String; 0=' ', M+1='\0' */
- char *cs; /* consensus structure line 1..M (p7H_CS) */ /* String; 0=' ', M+1='\0' */
- char *ca; /* consensus accessibility line 1..M (p7H_CA) */ /* String; 0=' ', M+1='\0' */
- char *comlog; /* command line(s) that built model (optional: NULL) */ /* String, \0-terminated */
- int nseq; /* number of training sequences (optional: -1) */
- float eff_nseq; /* effective number of seqs (<= nseq) (optional: -1) */
- char *ctime; /* creation date (optional: NULL) */
- int *map; /* map of alignment cols onto model 1..M (p7H_MAP) */ /* Array; map[0]=0 */
- uint32_t checksum; /* checksum of training sequences (p7H_CHKSUM) */
- float evparam[p7_NEVPARAM]; /* E-value params (p7H_STATS) */
- float cutoff[p7_NCUTOFFS]; /* Pfam score cutoffs (p7H_{GA,TC,NC}) */
- float compo[p7_MAXABET]; /* model bg residue comp (p7H_COMPO) */
-
- off_t offset; /* HMM record offset on disk */
- ESL_ALPHABET *abc; /* COPY of alphabet info (hmm->abc->K is alphabet size) */
- int flags; /* status flags */
-} P7_HMM;
-
-
-/*****************************************************************
-* 2. P7_PROFILE: a scoring profile, and its implicit model.
-*****************************************************************/
-
-/* Indices for special state types in the length model, gm->xsc[x][]
-*/
-enum p7p_xstates_e {
- p7P_E = 0,
- p7P_N = 1,
- p7P_J = 2,
- p7P_C = 3
-};
-#define p7P_NXSTATES 4
-
-/* Indices for transitions from the length modeling scores gm->xsc[][x]
-*/
-enum p7p_xtransitions_e {
- p7P_LOOP = 0,
- p7P_MOVE = 1
-};
-#define p7P_NXTRANS 2
-
-/* Indices for transition scores gm->tsc[k][] */
-/* order is optimized for dynamic programming */
-enum p7p_tsc_e {
- p7P_MM = 0,
- p7P_IM = 1,
- p7P_DM = 2,
- p7P_BM = 3,
- p7P_MD = 4,
- p7P_DD = 5,
- p7P_MI = 6,
- p7P_II = 7,
-};
-#define p7P_NTRANS 8
-
-/* Indices for residue emission score vectors
-*/
-enum p7p_rsc_e {
- p7P_MSC = 0,
- p7P_ISC = 1
-};
-#define p7P_NR 2
-
-/* Accessing transition, emission scores */
-/* _BM is specially stored off-by-one: [k-1][p7P_BM] is score for entering at Mk */
-#define p7P_TSC(gm, k, s) ((gm)->tsc[(k) * p7P_NTRANS + (s)])
-#define p7P_MSC(gm, k, x) ((gm)->rsc[x][(k) * p7P_NR + p7P_MSC])
-#define p7P_ISC(gm, k, x) ((gm)->rsc[x][(k) * p7P_NR + p7P_ISC])
-
-
-typedef struct p7_profile_s {
- float *tsc; /* transitions [0.1..M-1][0..p7P_NTRANS-1], hand-indexed */
- float **rsc; /* emissions [0..Kp-1][0.1..M][p7P_NR], hand-indexed */
- float xsc[p7P_NXSTATES][p7P_NXTRANS]; /* special transitions [NECJ][LOOP,MOVE] */
-
- int mode; /* configured algorithm mode (e.g. p7_LOCAL) */
- int L; /* current configured target seq length */
- int allocM; /* max # of nodes allocated in this structure */
- int M; /* number of nodes in the model */
- float nj; /* expected # of uses of J; precalculated from loop config */
-
- /* Info, most of which is a copy from parent HMM: */
- char *name; /* unique name of model */
- char *acc; /* unique accession of model, or NULL */
- char *desc; /* brief (1-line) description of model, or NULL */
- char *rf; /* reference line from alignment 1..M; *rf=0 means unused */
- char *cs; /* consensus structure line 1..M, *cs=0 means unused */
- char *consensus; /* consensus residues to display in alignments, 1..M */
- float evparam[p7_NEVPARAM]; /* parameters for determining E-values, or UNSET */
- float cutoff[p7_NCUTOFFS]; /* per-seq/per-domain bit score cutoffs, or UNSET */
- float compo[p7_MAXABET]; /* per-model HMM filter composition, or UNSET */
-
- /* Disk offset information for hmmpfam's fast model retrieval */
- off_t offs[p7_NOFFSETS]; /* p7_{MFP}OFFSET, or -1 */
-
- off_t roff; /* record offset (start of record); -1 if none */
- off_t eoff; /* offset to last byte of record; -1 if unknown */
-
- const ESL_ALPHABET *abc; /* copy of pointer to appropriate alphabet */
-} P7_PROFILE;
-
-
-/*****************************************************************
-* 3. P7_BG: a null (background) model.
-*****************************************************************/
-
-typedef struct p7_bg_s {
- float p1; /* null model's self-loop probability */
- float *f; /* residue frequencies [0..K-1] */
-
- ESL_HMM *fhmm; /* 2-state HMM filter null model in prefilters */
-
- float omega; /* "prior" on the null2 score correction */
-
- const ESL_ALPHABET *abc; /* reference to alphabet in use */
-} P7_BG;
-
-/*****************************************************************
-* 4. P7_TRACE: a traceback (alignment of seq to profile).
-*****************************************************************/
-
-/* Traceback structure for alignment of a model to a sequence.
-*
-* A traceback only makes sense in a triplet (tr, gm, dsq), for a
-* given profile or HMM (with nodes 1..M) and a given digital sequence
-* (with positions 1..L).
-*
-* A traceback may be relative to a profile (usually) or to a core
-* model (as a special case in model construction; see build.c). You
-* can tell the difference by looking at the first statetype,
-* tr->st[0]; if it's a p7T_S, it's for a profile, and if it's p7T_B,
-* it's for a core model.
-*
-* A "profile" trace uniquely has S,N,C,T,J states and their
-* transitions; it also can have B->Mk and Mk->E internal entry/exit
- * transitions for local alignments.
- *
- * A "core" trace may contain I0, IM, and D1 states and their
- * transitions. A "core" trace can also have B->X->{MDI}k and
- * {MDI}k->X->E transitions as a special hack in a build procedure, to
- * deal with the case of a local alignment fragment implied by an
- * input alignment, which is "impossible" for a core model.
- * X "states" only appear in core traces, and only at these
- * entry/exit places; some code depends on this.
-*
-* A profile's N,C,J states emit on transition, not on state, so a
-* path of N emits 0 residues, NN emits 1 residue, NNN emits 2
-* residues, and so on. By convention, the trace always associates an
-* emission-on-transition with the trailing (destination) state, so
-* the first N, C, or J is stored in a trace as a nonemitter (i=0).
-*
-* A i coords in a traceback are usually 1..L with respect to an
-* unaligned digital target sequence, but in the special case of
-* traces faked from existing MSAs (as in hmmbuild), the coords may
-* be 1..alen relative to an MSA's columns.
-*/
-
-/* State types */
-enum p7t_statetype_e {
- p7T_BOGUS = 0,
- p7T_M = 1,
- p7T_D = 2,
- p7T_I = 3,
- p7T_S = 4,
- p7T_N = 5,
- p7T_B = 6,
- p7T_E = 7,
- p7T_C = 8,
- p7T_T = 9,
- p7T_J = 10,
- p7T_X = 11, /* missing data: used esp. for local entry/exits */
-};
-#define p7T_NSTATETYPES 12
-
-typedef struct p7_trace_s {
- int N; /* length of traceback */
- int nalloc; /* allocated length of traceback */
- char *st; /* state type code [0..N-1]*/
- int *k; /* node index; 1..M if M,D,I; else 0 [0..N-1]*/
- int *i; /* pos emitted in dsq, 1..L; else 0 [0..N-1]*/
- float *pp; /* posterior prob of x_i; else 0 [0..N-1]*/
- int M; /* model length M (maximum k) */
- int L; /* sequence length L (maximum i) */
-
- /* The following section is data generated by "indexing" a trace's domains */
- int ndom; /* number of domains in trace (= # of B or E states) */
- int *tfrom, *tto; /* locations of B/E states in trace (0..tr->N-1) */
- int *sqfrom, *sqto; /* first/last M-emitted residue on sequence (1..L) */
- int *hmmfrom, *hmmto;/* first/last M state on model (1..M) */
- int ndomalloc; /* current allocated size of these stacks */
-
-} P7_TRACE;
-
-
-// ! here were P7_HMMFILE definition. we don't need it !
-
-/*****************************************************************
-* 6. P7_GMX: a "generic" dynamic programming matrix
-*****************************************************************/
-
-enum p7g_scells_e {
- p7G_M = 0,
- p7G_I = 1,
- p7G_D = 2,
-};
-#define p7G_NSCELLS 3
-
-enum p7g_xcells_e {
- p7G_E = 0,
- p7G_N = 1,
- p7G_J = 2,
- p7G_B = 3,
- p7G_C = 4
-};
-#define p7G_NXCELLS 5
-
-
-typedef struct p7_gmx_s {
- int M; /* actual model dimension (model 1..M) */
- int L; /* actual sequence dimension (seq 1..L) */
-
- int allocR; /* current allocated # of rows : L+1 <= validR <= allocR */
- int validR; /* # of rows actually pointing at DP memory */
- int allocW; /* current set row width : M+1 <= allocW */
- uint64_t ncells; /* total # of allocated cells in 2D matrix : ncells >= (validR)(allocW) */
-
- float **dp; /* logically [0.1..L][0.1..M][0..p7G_NSCELLS-1]; indexed [i][k*p7G_NSCELLS+s] */
- float *xmx; /* logically [0.1..L][0..p7G_NXCELLS-1]; indexed [i*p7G_NXCELLS+s] */
-
- float *dp_mem;
-} P7_GMX;
-
-#define MMX(i,k) (dp[(i)][(k) * p7G_NSCELLS + p7G_M])
-#define IMX(i,k) (dp[(i)][(k) * p7G_NSCELLS + p7G_I])
-#define DMX(i,k) (dp[(i)][(k) * p7G_NSCELLS + p7G_D])
-#define XMX(i,s) (xmx[(i) * p7G_NXCELLS + (s)])
-
-#define TSC(s,k) (tsc[(k) * p7P_NTRANS + (s)])
-#define MSC(k) (rsc[(k) * p7P_NR + p7P_MSC])
-#define ISC(k) (rsc[(k) * p7P_NR + p7P_ISC])
-
-
-/*****************************************************************
-* 7. P7_PRIOR: mixture Dirichlet prior for profile HMMs
-*****************************************************************/
-
-typedef struct p7_prior_s {
- ESL_MIXDCHLET *tm; /* match transitions */
- ESL_MIXDCHLET *ti; /* insert transitions */
- ESL_MIXDCHLET *td; /* delete transitions */
- ESL_MIXDCHLET *em; /* match emissions */
- ESL_MIXDCHLET *ei; /* insert emissions */
-} P7_PRIOR;
-
-
-
-
-/*****************************************************************
-* 8. P7_SPENSEMBLE: segment pair ensembles for domain locations
-*****************************************************************/
-
-/* struct p7_spcoord_s:
-* a coord quad defining a segment pair.
-*/
-struct p7_spcoord_s {
- int idx; /* backreference index: which trace a seg came from, or which cluster a domain came from */
- int i, j; /* start,end in a target sequence (1..L) */
- int k, m; /* start,end in a query model (1..M) */
- float prob; /* posterior probability of segment */
-};
-
-/* Structure: P7_SPENSEMBLE
-*
-* Collection and clustering of an ensemble of sampled segment pairs,
-* in order to define domain locations using their posterior
-* probability distribution (as opposed to Viterbi MAP tracebacks).
-*/
-typedef struct p7_spensemble_s {
- /* Section 1: a collected ensemble of segment pairs */
- int nsamples; /* number of sampled traces */
- struct p7_spcoord_s *sp; /* array of sampled seg pairs; [0..n-1] */
- int nalloc; /* allocated size of <sp> */
- int n; /* number of seg pairs in <sp> */
-
- /* Section 2: then the ensemble is clustered by single-linkage clustering */
- int *workspace; /* temp space for Easel SLC algorithm: 2*n */
- int *assignment; /* each seg pair's cluster index: [0..n-1] = (0..nc-1) */
- int nc; /* number of different clusters */
-
- /* Section 3: then endpoint distribution is examined within each large cluster */
- int *epc; /* array counting frequency of each endpoint */
- int epc_alloc; /* allocated width of <epc> */
-
- /* Section 4: finally each large cluster is resolved into domain coords */
- struct p7_spcoord_s *sigc; /* array of coords for each domain, [0..nsigc-1] */
- int nsigc; /* number of "significant" clusters, domains */
- int nsigc_alloc; /* current allocated max for nsigc */
-} P7_SPENSEMBLE;
-
-
-/*****************************************************************
-* 9. P7_ALIDISPLAY: an alignment formatted for printing
-*****************************************************************/
-
-/* Structure: P7_ALIDISPLAY
-*
-* Alignment of a sequence domain to an HMM, formatted for printing.
-*
-* For an alignment of L residues and names C chars long, requires
-* 6L + 2C + 30 bytes; for typical case of L=100,C=10, that's
-* <0.7 Kb.
-*/
-typedef struct p7_alidisplay_s {
- char *rfline; /* reference coord info; or NULL */
- char *csline; /* consensus structure info; or NULL */
- char *model; /* aligned query consensus sequence */
- char *mline; /* "identities", conservation +'s, etc. */
- char *aseq; /* aligned target sequence */
- char *ppline; /* posterior prob annotation; or NULL */
- int N; /* length of strings */
-
- char *hmmname; /* name of HMM */
- char *hmmacc; /* accession of HMM; or [0]='\0' */
- char *hmmdesc; /* description of HMM; or [0]='\0' */
- int hmmfrom; /* start position on HMM (1..M, or -1) */
- int hmmto; /* end position on HMM (1..M, or -1) */
- int M; /* length of model */
-
- char *sqname; /* name of target sequence */
- char *sqacc; /* accession of target seq; or [0]='\0' */
- char *sqdesc; /* description of targ seq; or [0]='\0' */
- long sqfrom; /* start position on sequence (1..L) */
- long sqto; /* end position on sequence (1..L) */
- long L; /* length of sequence */
-
- int memsize; /* size of allocated block of memory */
- char *mem; /* memory used for the char data above */
-} P7_ALIDISPLAY;
-
-
-/*****************************************************************
-* 10. P7_DOMAINDEF: reusably managing workflow in defining domains
-*****************************************************************/
-
-typedef struct p7_dom_s {
- int ienv, jenv;
- int iali, jali;
- float envsc; /* Forward score in envelope ienv..jenv; nats; without null2 correction */
- float domcorrection; /* null2 correction to add null score when calculating a per-domain score */
- float dombias; /* FLogsum(0, log(bg->omega) + domcorrection): null2 contribution to bitscore */
- float oasc; /* optimal accuracy score (expected # residues correctly aligned) */
- float bitscore; /* overall score in bits, null corrected, if this were the only domain in seq */
- double pvalue; /* P-value of the bitscore */
- int is_reported; /* TRUE if domain meets reporting thresholds */
- int is_included; /* TRUE if domain meets inclusion thresholds */
- P7_ALIDISPLAY *ad;
-} P7_DOMAIN;
-
-/* Structure: P7_DOMAINDEF
-*
-* This is a container for all the necessary information for domain
-* definition procedures in <p7_domaindef.c>, including a bunch of
-* heuristic thresholds. The structure is reusable to minimize the
-* number of allocation/free cycles that need to be done when
-* processing a large number of sequences. You create the structure
-* with <p7_domaindef_Create()>; after you're done with defining
-* domains on a sequence, you call <p7_domaindef_Reuse()> before using
-* it on the next sequence; and when you're completely done, you free
-* it with <p7_domaindef_Destroy()>. All memory management is handled
-* internally; you don't need to reallocate anything yourself.
-*/
-typedef struct p7_domaindef_s {
- /* for posteriors of being in a domain, B, E */
- float *mocc; /* mocc[i=1..L] = prob that i is emitted by core model (is in a domain) */
- float *btot; /* btot[i=1..L] = cumulative expected times that domain starts at or before i */
- float *etot; /* etot[i=1..L] = cumulative expected times that domain ends at or before i */
- int L;
- int Lalloc;
-
- /* the ad hoc null2 model: 1..L nat scores for each residue, log f'(x_i) / f(x_i) */
- float *n2sc;
-
- /* rng and reusable memory for stochastic tracebacks */
- ESL_RANDOMNESS *r; /* random number generator */
- int do_reseeding; /* TRUE to reset the RNG, make results reproducible */
- P7_SPENSEMBLE *sp; /* an ensemble of sampled segment pairs (domain endpoints) */
- P7_TRACE *trr; /* reusable space for a trace of a domain */
- P7_TRACE *gtr; /* reusable space for a traceback of the entire target seq */
-
- /* Heuristic thresholds that control the region definition process */
- /* "rt" = "region threshold", for lack of better term */
- float rt1; /* controls when regions are called. mocc[i] post prob >= dt1 : triggers a region around i */
- float rt2; /* controls extent of regions. regions extended until mocc[i]-{b,e}occ[i] < dt2 */
- float rt3; /* controls when regions are flagged for split: if expected # of E preceding B is >= dt3 */
-
- /* Heuristic thresholds that control the stochastic traceback/clustering process */
- int nsamples; /* collect ensemble of this many stochastic traces */
- float min_overlap; /* 0.8 means >= 80% overlap of (smaller/larger) segment to link, both in seq and hmm */
- int of_smaller; /* see above; TRUE means overlap denom is calc'ed wrt smaller segment; FALSE means larger */
- int max_diagdiff; /* 4 means either start or endpoints of two segments must be within <=4 diagonals of each other */
- float min_posterior; /* 0.25 means a cluster must have >= 25% posterior prob in the sample to be reported */
- float min_endpointp; /* 0.02 means choose widest endpoint with post prob of at least 2% */
-
- /* storage of the results; domain locations, scores, alignments */
- P7_DOMAIN *dcl;
- int ndom; /* number of domains defined, in the end. */
- int nalloc; /* number of domain structures allocated in <dcl> */
-
- /* Additional results storage */
- float nexpected; /* posterior expected number of domains in the sequence (from posterior arrays) */
- int nregions; /* number of regions evaluated */
- int nclustered; /* number of regions evaluated by clustering ensemble of tracebacks */
- int noverlaps; /* number of envelopes defined in ensemble clustering that overlap w/ prev envelope */
- int nenvelopes; /* number of envelopes handed over for domain definition, null2, alignment, and scoring. */
-
-} P7_DOMAINDEF;
-
-
-/*****************************************************************
-* 11. P7_TOPHITS: ranking lists of top-scoring hits
-*****************************************************************/
-
-#define p7_HITFLAGS_DEFAULT 0
-#define p7_IS_INCLUDED (1<<0)
-#define p7_IS_REPORTED (1<<1)
-#define p7_IS_NEW (1<<2)
-#define p7_IS_DROPPED (1<<3)
-
-/* Structure: P7_HIT
-*
-* Info about a high-scoring database hit, kept so we can output a
-* sorted list of high hits at the end.
-*
-* sqfrom and sqto are the coordinates that will be shown
-* in the results, not coords in arrays... therefore, reverse
-* complements have sqfrom > sqto
-*/
-typedef struct p7_hit_s {
- char *name; /* name of the target */
- char *acc; /* accession of the target */
- char *desc; /* description of the target */
- double sortkey; /* number to sort by; big is better */
-
- float score; /* bit score of the sequence (all domains, w/ correction) */
- float pre_score; /* bit score of sequence before null2 correction */
- float sum_score; /* bit score reconstructed from sum of domain envelopes */
-
- double pvalue; /* P-value of the score */
- double pre_pvalue; /* P-value of the pre_score */
- double sum_pvalue; /* P-value of the sum_score */
-
- float nexpected; /* posterior expected number of domains in the sequence (from posterior arrays) */
- int nregions; /* number of regions evaluated */
- int nclustered; /* number of regions evaluated by clustering ensemble of tracebacks */
- int noverlaps; /* number of envelopes defined in ensemble clustering that overlap w/ prev envelope */
- int nenvelopes; /* number of envelopes handed over for domain definition, null2, alignment, and scoring. */
- int ndom; /* total # of domains identified in this seq */
-
- uint32_t flags; /* p7_IS_REPORTED | p7_IS_INCLUDED | p7_IS_NEW | p7_IS_DROPPED */
- int nreported; /* # of domains satisfying reporting thresholding */
- int nincluded; /* # of domains satisfying inclusion thresholding */
- int best_domain; /* index of best-scoring domain in dcl */
-
- P7_DOMAIN *dcl; /* domain coordinate list and alignment display */
-} P7_HIT;
-
-
-/* Structure: P7_TOPHITS
-* merging when we prepare to output results. "hit" list is NULL and
-* unavailable until after we do a sort.
-*/
-typedef struct p7_tophits_s {
- P7_HIT **hit; /* sorted pointer array */
- P7_HIT *unsrt; /* unsorted data storage */
- uint64_t Nalloc; /* current allocation size */
- uint64_t N; /* number of hits in list now */
- uint64_t nreported; /* number of hits that are reportable */
- uint64_t nincluded; /* number of hits that are includable */
- int is_sorted; /* TRUE when h->hit valid for all N hits */
-} P7_TOPHITS;
-
-
-
-/*****************************************************************
-* 12. The optimized implementation.
-*****************************************************************/
-#if defined (p7_IMPL_SSE)
-#include "impl_sse/impl_sse.h"
-#elif defined (p7_IMPL_VMX)
-#include "impl_vmx/impl_vmx.h"
-#else
-#include "impl_dummy/impl_dummy.h"
-#endif
-
-
-/*****************************************************************
-* 13. P7_PIPELINE: H3's accelerated seq/profile comparison pipeline
-*****************************************************************/
-
-enum p7_pipemodes_e { p7_SEARCH_SEQS = 0, p7_SCAN_MODELS = 1 };
-enum p7_zsetby_e { p7_ZSETBY_NTARGETS = 0, p7_ZSETBY_OPTION = 1, p7_ZSETBY_FILEINFO = 2 };
-
-typedef struct p7_pipeline_s {
- /* Dynamic programming matrices */
- P7_OMX *oxf; /* one-row Forward matrix, accel pipe */
- P7_OMX *oxb; /* one-row Backward matrix, accel pipe */
- P7_OMX *fwd; /* full Fwd matrix for domain envelopes */
- P7_OMX *bck; /* full Bck matrix for domain envelopes */
-
- /* Domain postprocessing */
- ESL_RANDOMNESS *r; /* random number generator */
- int do_reseeding; /* TRUE: reseed for reproducible results */
- P7_DOMAINDEF *ddef; /* domain definition workflow */
-
- /* Reporting threshold settings */
- int by_E; /* TRUE to cut per-target report off by E */
- double E; /* per-target E-value threshold */
- double T; /* per-target bit score threshold */
- int dom_by_E; /* TRUE to cut domain reporting off by E */
- double domE; /* domain E-value threshold */
- double domT; /* domain bit score threshold */
- int use_bit_cutoffs; /* (FALSE | p7H_GA | p7H_TC | p7H_NC) */
-
- /* Inclusion threshold settings */
- int inc_by_E; /* TRUE to threshold inclusion by E-values */
- double incE; /* per-target inclusion E-value threshold */
- double incT; /* per-target inclusion score threshold */
- int incdom_by_E; /* TRUE to threshold domain inclusion by E */
- double incdomE; /* per-domain inclusion E-value threshold */
- double incdomT; /* per-domain inclusion E-value threshold */
-
- /* Tracking search space sizes for E value calculations */
- double Z; /* eff # targs searched (per-target E-val) */
- double domZ; /* eff # signific targs (per-domain E-val) */
- enum p7_zsetby_e Z_setby; /* how Z was set */
- enum p7_zsetby_e domZ_setby; /* how domZ was set */
-
- /* Threshold settings for pipeline */
- int do_max; /* TRUE to run in slow/max mode */
- double F1; /* MSV filter threshold */
- double F2; /* Viterbi filter threshold */
- double F3; /* uncorrected Forward filter threshold */
- int do_biasfilter; /* TRUE to use biased comp HMM filter */
- int do_null2; /* TRUE to use null2 score corrections */
-
- /* Accounting. (reduceable in threaded/MPI parallel version) */
- uint64_t nmodels; /* # of HMMs searched */
- uint64_t nseqs; /* # of sequences searched */
- uint64_t nres; /* # of residues searched */
- uint64_t nnodes; /* # of model nodes searched */
- uint64_t n_past_msv; /* # comparisons that pass MSVFilter() */
- uint64_t n_past_bias; /* # comparisons that pass bias filter */
- uint64_t n_past_vit; /* # comparisons that pass ViterbiFilter() */
- uint64_t n_past_fwd; /* # comparisons that pass ForwardFilter() */
-
- enum p7_pipemodes_e mode; /* p7_SCAN_MODELS | p7_SEARCH_SEQS */
- int show_accessions;/* TRUE to output accessions not names */
- int show_alignments;/* TRUE to output alignments (default) */
-
- char errbuf[eslERRBUFSIZE];
-} P7_PIPELINE;
-
-// ! CODE ADDED: ugene's hmmer search settings !
-
-typedef struct _UHMM3SearchSettings {
- double e; // -E: report sequences <= this e-value treshold in output
- double t; // -T: report sequences >= this score treshold in output
- double z; // -Z: set # of camparisons done, for e-value calculation
- double domE; // --domE: report domains <= this e-value treshold in output
- double domT; // --domT: report domains >= this score cutoff in output
- double domZ; // --domZ: set number of significant seqs, for domain e-value calibration
- int useBitCutoffs; // --cut_ga: use profile's GA gathering cutoffs to set -T, --domT
- // --cut_nc: use profile's NC noising cutoffs to set -T, --domT
- // --cut_tc: use profile's TC trusted cutoffs to set -T, --domT
-
- double incE; // --incE: include sequences <= this e-value threshold in output ali
- double incT; // --incT: include sequences >= this score threshold in output ali
- double incDomE; // --incdomE: include domains <= this e-value threshold in output ali
- double incDomT; // --incdomT: include domains >= this score threshold in output ali
-
- double f1; // --F1: Stage 1 (MSV) threshold: promote hits w/ P <= F1
- double f2; // --F2: Stage 2 (Vit) threshold: promote hits w/ P <= F2
- double f3; // --F3: Stage 3 (Fwd) threshold: promote hits w/ P <= F3
-
- int doMax; // --max: Turn all heuristic filters off ( less speed more power )
- int noBiasFilter; // --nobias: turn off composition bias filter
- int noNull2; // --nonull2: turn off biased composition score corrections
-
- int seed; // --seed : set RNG seed ( if 0: one-time arbitrary seed )
-} UHMM3SearchSettings;
-
-extern void setDefaultUHMM3SearchSettings( UHMM3SearchSettings* it );
-extern bool checkUHMM3SearchSettings( UHMM3SearchSettings * it );
-
-#define OPTION_NOT_SET (-1.0)
-
-/*****************************************************************
-* 14. P7_BUILDER: pipeline for new HMM construction
-*****************************************************************/
-
-enum p7_archchoice_e { p7_ARCH_FAST = 0, p7_ARCH_HAND = 1 };
-enum p7_wgtchoice_e { p7_WGT_NONE = 0, p7_WGT_GIVEN = 1, p7_WGT_GSC = 2, p7_WGT_PB = 3, p7_WGT_BLOSUM = 4 };
-enum p7_effnchoice_e { p7_EFFN_NONE = 0, p7_EFFN_SET = 1, p7_EFFN_CLUST = 2, p7_EFFN_ENTROPY = 3 };
-
-typedef struct p7_builder_s {
- /* Model architecture */
- enum p7_archchoice_e arch_strategy; /* choice of model architecture determination algorithm */
- float symfrac; /* residue occ thresh for fast architecture determination */
- float fragthresh; /* < this fraction of average rlen, seq is a fragment */
-
- /* Relative sequence weights */
- enum p7_wgtchoice_e wgt_strategy; /* choice of relative sequence weighting algorithm */
- double wid; /* %id threshold for BLOSUM relative weighting */
-
- /* Effective sequence number */
- enum p7_effnchoice_e effn_strategy; /* choice of effective seq # determination algorithm */
- double re_target; /* rel entropy target for effn eweighting, if set; or -1.0*/
- double esigma; /* min total rel ent parameter for effn entropy weights */
- double eid; /* %id threshold for effn clustering */
- double eset; /* effective sequence number, if --eset; or -1.0 */
-
- /* Run-to-run variation due to random number generation */
- ESL_RANDOMNESS *r; /* RNG for E-value calibration simulations */
- int do_reseeding; /* TRUE to reseed, making results reproducible */
-
- /* E-value parameter calibration */
- int EmL; /* length of sequences generated for MSV fitting */
- int EmN; /* # of sequences generated for MSV fitting */
- int EvL; /* length of sequences generated for Viterbi fitting */
- int EvN; /* # of sequences generated for Viterbi fitting */
- int EfL; /* length of sequences generated for Forward fitting */
- int EfN; /* # of sequences generated for Forward fitting */
- double Eft; /* tail mass used for Forward fitting */
-
- /* Choice of prior */
- P7_PRIOR *prior; /* choice of prior when parameterizing from counts */
-
- /* Optional: information used for parameterizing single sequence queries */
- ESL_SCOREMATRIX *S; /* residue score matrix */
- ESL_DMATRIX *Q; /* Q->mx[a][b] = P(b|a) residue probabilities */
- double popen; /* gap open probability */
- double pextend; /* gap extend probability */
-
- const ESL_ALPHABET *abc; /* COPY of alphabet */
- char errbuf[eslERRBUFSIZE]; /* informative message on model construction failure */
-} P7_BUILDER;
-
-// CODE ADDED: ugene's hmmer build settings
-
-typedef struct _UHMM3BuildSettings {
- enum p7_archchoice_e archStrategy; // model construction strategies
- // --fast: assign cols >= symfrac residues as consensus
- // --hand: manual construction ( requires reference annotation )
-
- enum p7_wgtchoice_e wgtStrategy; // relative sequence weighting strategies
- // --wgsc: Gerstein/Sonnhammer/Chotia tree weights
- // --wblosum: Henikoff simple filter weights
- // --wpb: Henikoff position-based weigths
- // --wnone: don't do any relative weighting ( set all to 1 )
- // --wgiven: use weights as given in msa file
-
- enum p7_effnchoice_e effnStrategy; // effective sequence weighting strategies
- // --eent: adjust eff seq # to achieve relative entropy target
- // --eclust: eff seq # is # of single linkage clusters
- // --enone: no effective seq # weighting: just use nseq
- // --eset: seq eff seq # for all models
- double eset; // --eset argument
-
- int seed; // --seed argument
-
- float symfrac; // --symfrac. sets sym fraction controlling --fast construction
- float fragtresh; // --fragtresh. if L < x<L>, tag sequence as a fragment
- double wid; // --wid. for --wblosum: set identity cutoff
- double ere; // --ere. for --eent:set target relative entropy
- double esigma; // --esigma. for --eent: set sigma param to <x>
- double eid; // --eid. for --eclust. set fractional identity cutoff
- int eml; // --EmL. length of sequences for MSV Gumbel mu fit
- int emn; // --EmN. number of sequences for MSV Gumbel mu fit
- int evl; // --EvL. length of sequences for Viterbi Gumbel mu fit
- int evn; // --EvN. number of sequences for Viterbi Gumbel mu fit
- int efl; // --EfL. length of sequences for forward exp tail mu fit
- int efn; // --Efn. number of sequences for forward exp tail mu fit
- double eft; // --Eft. tail mass for forward exponential tail mu fit
-}UHMM3BuildSettings; // UHMM3BuildSettings
-
-extern void setDefaultUHMM3BuildSettings( UHMM3BuildSettings * it );
-extern bool checkUHMM3BuildSettings( UHMM3BuildSettings * it );
-
-/*****************************************************************
-* 15. Routines in HMMER's exposed API.
-*****************************************************************/
-
-/* build.c */
-extern int p7_Handmodelmaker(ESL_MSA *msa, P7_HMM **ret_hmm, P7_TRACE ***ret_tr);
-extern int p7_Fastmodelmaker(ESL_MSA *msa, float symfrac, P7_HMM **ret_hmm, P7_TRACE ***ret_tr);
-
-/* emit.c */
-extern int p7_CoreEmit (ESL_RANDOMNESS *r, const P7_HMM *hmm, ESL_SQ *sq, P7_TRACE *trr);
-extern int p7_ProfileEmit(ESL_RANDOMNESS *r, const P7_HMM *hmm, const P7_PROFILE *gm, const P7_BG *bg, ESL_SQ *sq, P7_TRACE *trr);
-extern int p7_emit_SimpleConsensus(const P7_HMM *hmm, ESL_SQ *sq);
-
-/* errors.c */
-extern void p7_Die (char *format, ...);
-extern void p7_Fail(char *format, ...);
-
-/* evalues.c */
-extern int p7_Calibrate(P7_HMM *hmm, P7_BUILDER *cfg_b, ESL_RANDOMNESS **byp_rng, P7_BG **byp_bg, P7_PROFILE **byp_gm, P7_OPROFILE **byp_om,
- int percents, U2::TaskStateInfo & ti);
-extern int p7_Lambda(P7_HMM *hmm, P7_BG *bg, double *ret_lambda);
-extern int p7_MSVMu (ESL_RANDOMNESS *r, P7_OPROFILE *om, P7_BG *bg, int L, int N, double lambda, double *ret_mmu, U2::TaskStateInfo & ti);
-extern int p7_ViterbiMu (ESL_RANDOMNESS *r, P7_OPROFILE *om, P7_BG *bg, int L, int N, double lambda, double *ret_vmu, U2::TaskStateInfo & ti);
-extern int p7_Tau (ESL_RANDOMNESS *r, P7_OPROFILE *om, P7_BG *bg, int L, int N, double lambda, double tailp, double *ret_tauint, int percents, U2::TaskStateInfo & ti);
-
-/* eweight.c */
-extern int p7_EntropyWeight(const P7_HMM *hmm, const P7_BG *bg, const P7_PRIOR *pri, double infotarget, double *ret_Neff);
-
-// here were definitions of generic versions of algorithms. we use only sse2 optimized
-
-/* heatmap.c (evolving now, intend to move this to Easel in the future) */
-// ! removed unused definitions !
-
-/* island.c */
-// ! removed unused definitions !
-
-/* h2_io.c */
-// ! removed unused definitions !
-
-/* hmmer.c */
-// ! removed unused definitions !
-
-extern int p7_AminoFrequencies(float *f);
-
-/* logsum.c */
-extern float p7_FLogsum(float a, float b );
-extern float p7_FLogsumError(float a, float b);
-// ! removed unused definitions !
-
-/* modelconfig.c */
-extern int p7_ProfileConfig(const P7_HMM *hmm, const P7_BG *bg, P7_PROFILE *gm, int L, int mode);
-extern int p7_ReconfigLength (P7_PROFILE *gm, int L);
-extern int p7_ReconfigMultihit(P7_PROFILE *gm, int L);
-extern int p7_ReconfigUnihit (P7_PROFILE *gm, int L);
-
-/* modelstats.c */
-extern double p7_MeanMatchRelativeEntropy(const P7_HMM *hmm, const P7_BG *bg);
-// ! removed unused definitions !
-// here were mpi support definitions. we don't need them
-
-/* tracealign.c */
-extern int p7_tracealign_Seqs(ESL_SQ **sq, P7_TRACE **trr, int nseq, int M, int optflags, ESL_MSA **ret_msa);
-extern int p7_tracealign_MSA (const ESL_MSA *premsa, P7_TRACE **trr, int M, int optflags, ESL_MSA **ret_postmsa);
-
-/* p7_alidisplay.c */
-extern P7_ALIDISPLAY *p7_alidisplay_Create(const P7_TRACE *trr, int which, const P7_OPROFILE *om, const ESL_SQ *sq);
-extern void p7_alidisplay_Destroy(P7_ALIDISPLAY *ad);
-extern char p7_alidisplay_EncodePostProb(float p);
-extern float p7_alidisplay_DecodePostProb(char pc);
-extern int p7_alidisplay_Print(FILE *fp, P7_ALIDISPLAY *ad, int min_aliwidth, int linewidth, int show_accessions);
-extern int p7_alidisplay_Backconvert(const P7_ALIDISPLAY *ad, const ESL_ALPHABET *abc, ESL_SQ **ret_sq, P7_TRACE **ret_tr);
-
-/* p7_bg.c */
-
-extern P7_BG *p7_bg_Create(const ESL_ALPHABET *abc);
-
-extern P7_BG *p7_bg_CreateUniform(const ESL_ALPHABET *abc);
-extern int p7_bg_Dump(FILE *ofp, const P7_BG *bg);
-extern void p7_bg_Destroy(P7_BG *bg);
-extern int p7_bg_SetLength(P7_BG *bg, int L);
-extern int p7_bg_NullOne(const P7_BG *bg, const ESL_DSQ *dsq, int L, float *ret_sc);
-
-extern int p7_bg_SetFilter (P7_BG *bg, int M, const float *compo);
-extern int p7_bg_FilterScore(P7_BG *bg, ESL_DSQ *dsq, int L, float *ret_sc);
-
-/* p7_builder.c */
-extern P7_BUILDER* p7_builder_Create( const UHMM3BuildSettings* settings, const ESL_ALPHABET *abc);
-extern int p7_builder_SetScoreSystem( P7_BUILDER * bld, ESL_SCOREMATRIX * s_matr, double popen, double pextend );
-extern void p7_builder_Destroy(P7_BUILDER *bld);
-
-extern int p7_Builder (P7_BUILDER *bld, ESL_MSA *msa, P7_BG *bg, P7_HMM **opt_hmm, P7_TRACE ***opt_trarr, P7_PROFILE **opt_gm, P7_OPROFILE **opt_om, ESL_MSA **opt_postmsa, U2::TaskStateInfo& ti );
-extern int p7_SingleBuilder(P7_BUILDER *bld, ESL_SQ *sq, P7_BG *bg, P7_HMM **opt_hmm, P7_TRACE **opt_tr, P7_PROFILE **opt_gm, P7_OPROFILE **opt_om,
- int percent, U2::TaskStateInfo & ti );
-
-/* p7_domaindef.c */
-extern P7_DOMAINDEF *p7_domaindef_Create (ESL_RANDOMNESS *r);
-extern int p7_domaindef_Fetch (P7_DOMAINDEF *ddef, int which, int *opt_i, int *opt_j, float *opt_sc, P7_ALIDISPLAY **opt_ad);
-extern int p7_domaindef_Reuse (P7_DOMAINDEF *ddef);
-extern int p7_domaindef_DumpPosteriors(FILE *ofp, P7_DOMAINDEF *ddef);
-extern void p7_domaindef_Destroy(P7_DOMAINDEF *ddef);
-
-extern int p7_domaindef_ByViterbi (P7_PROFILE *gm, const ESL_SQ *sq, P7_GMX *gx1, P7_GMX *gx2, P7_DOMAINDEF *ddef);
-extern int p7_domaindef_ByPosteriorHeuristics(const ESL_SQ *sq, P7_OPROFILE *om, P7_OMX *oxf, P7_OMX *oxb,
- P7_OMX *fwd, P7_OMX *bck, P7_DOMAINDEF *ddef, int percentBorder, U2::TaskStateInfo & ti, int wholeSeqSz );
-
-
-/* p7_gmx.c */
-extern P7_GMX *p7_gmx_Create(int allocM, int allocL);
-extern int p7_gmx_GrowTo(P7_GMX *gx, int allocM, int allocL);
-extern int p7_gmx_Reuse(P7_GMX *gx);
-extern void p7_gmx_Destroy(P7_GMX *gx);
-extern int p7_gmx_Compare(P7_GMX *gx1, P7_GMX *gx2, float tolerance);
-extern int p7_gmx_Dump(FILE *fp, P7_GMX *gx);
-extern int p7_gmx_DumpWindow(FILE *fp, P7_GMX *gx, int istart, int iend, int kstart, int kend, int show_specials);
-
-
-/* p7_hmm.c */
-/* 1. The P7_HMM object: allocation, initialization, destruction. */
-// CODE CHANGED: hmm creation changed
-extern P7_HMM *p7_hmm_Create(int M, const ESL_ALPHABET *abc);
-extern P7_HMM *p7_hmm_Create(int M, int alType, int flags );
-extern P7_HMM *p7_hmm_CreateShell(void);
-//extern int p7_hmm_CreateBody(P7_HMM *hmm, int M, const ESL_ALPHABET *abc);
-extern int p7_hmm_CreateBody(P7_HMM *hmm, int M, int alType );
-
-extern void p7_hmm_Destroy(P7_HMM *hmm);
-extern int p7_hmm_CopyParameters(const P7_HMM *src, P7_HMM *dest);
-extern P7_HMM *p7_hmm_Clone(const P7_HMM *hmm);
-extern int p7_hmm_Scale(P7_HMM *hmm, double scale);
-extern int p7_hmm_Zero(P7_HMM *hmm);
-extern char p7_hmm_EncodeStatetype(char *typestring);
-extern char *p7_hmm_DecodeStatetype(char st);
-/* 2. Convenience routines for setting fields in an HMM. */
-extern int p7_hmm_SetName (P7_HMM *hmm, char *name);
-extern int p7_hmm_SetAccession (P7_HMM *hmm, char *acc);
-extern int p7_hmm_SetDescription(P7_HMM *hmm, char *desc);
-extern int p7_hmm_AppendComlog (P7_HMM *hmm, int argc, char **argv);
-extern int p7_hmm_SetCtime (P7_HMM *hmm);
-extern int p7_hmm_SetComposition(P7_HMM *hmm);
-/* 3. Renormalization and rescaling counts in core HMMs. */
-extern int p7_hmm_Rescale(P7_HMM *hmm, float scale);
-extern int p7_hmm_Renormalize(P7_HMM *hmm);
-/* 4. Debugging and development code. */
-extern int p7_hmm_Dump(FILE *fp, P7_HMM *hmm);
-extern int p7_hmm_Sample (ESL_RANDOMNESS *r, int M, const ESL_ALPHABET *abc, P7_HMM **ret_hmm);
-extern int p7_hmm_SampleUngapped (ESL_RANDOMNESS *r, int M, const ESL_ALPHABET *abc, P7_HMM **ret_hmm);
-extern int p7_hmm_SampleEnumerable(ESL_RANDOMNESS *r, int M, const ESL_ALPHABET *abc, P7_HMM **ret_hmm);
-extern int p7_hmm_SampleUniform (ESL_RANDOMNESS *r, int M, const ESL_ALPHABET *abc,
- float tmi, float tii, float tmd, float tdd, P7_HMM **ret_hmm);
-extern int p7_hmm_Compare(P7_HMM *h1, P7_HMM *h2, float tol);
-extern int p7_hmm_Validate(P7_HMM *hmm, char *errbuf, float tol);
-/* 5. Other routines in the API */
-extern int p7_hmm_CalculateOccupancy(const P7_HMM *hmm, float *mocc, float *iocc);
-
-
-/* p7_hmmfile.c */
-// we don't need hmmfile functions
-
-/* p7_pipeline.c */
-extern P7_PIPELINE* p7_pipeline_Create( const UHMM3SearchSettings* settings, int M_hint, int L_hint, enum p7_pipemodes_e mode );
-extern int p7_pipeline_Reuse (P7_PIPELINE *pli);
-extern void p7_pipeline_Destroy(P7_PIPELINE *pli);
-extern int p7_pipeline_Merge (P7_PIPELINE *p1, P7_PIPELINE *p2);
-
-// ! CODE CHANGED: reporting by double values !
-extern int p7_pli_TargetReportable(P7_PIPELINE *pli, float score, double Pval);
-extern int p7_pli_DomainReportable(P7_PIPELINE *pli, float dom_score, double Pval);
-extern int p7_pli_TargetIncludable(P7_PIPELINE *pli, float score, double Pval);
-extern int p7_pli_DomainIncludable(P7_PIPELINE *pli, float dom_score, double Pval);
-extern int p7_pli_NewModel (P7_PIPELINE *pli, const P7_OPROFILE *om, P7_BG *bg);
-extern int p7_pli_NewModelThresholds(P7_PIPELINE *pli, const P7_OPROFILE *om);
-extern int p7_pli_NewSeq (P7_PIPELINE *pli, const ESL_SQ *sq);
-extern int p7_Pipeline (P7_PIPELINE *pli, P7_OPROFILE *om, P7_BG *bg, const ESL_SQ *sq, P7_TOPHITS *th, int percentPerFilters,
- U2::TaskStateInfo & ti, int wholeSeqSz );
-
-
-/* p7_prior.c */
-extern P7_PRIOR *p7_prior_CreateAmino(void);
-extern P7_PRIOR *p7_prior_CreateNucleic(void);
-extern P7_PRIOR *p7_prior_CreateLaplace(const ESL_ALPHABET *abc);
-extern void p7_prior_Destroy(P7_PRIOR *pri);
-
-extern int p7_ParameterEstimation(P7_HMM *hmm, const P7_PRIOR *pri);
-
-/* p7_profile.c */
-extern P7_PROFILE *p7_profile_Create(int M, const ESL_ALPHABET *abc);
-extern P7_PROFILE *p7_profile_Clone(const P7_PROFILE *gm);
-extern int p7_profile_Copy(const P7_PROFILE *src, P7_PROFILE *dst);
-extern int p7_profile_SetNullEmissions(P7_PROFILE *gm);
-extern int p7_profile_Reuse(P7_PROFILE *gm);
-extern void p7_profile_Destroy(P7_PROFILE *gm);
-extern int p7_profile_IsLocal(const P7_PROFILE *gm);
-extern int p7_profile_IsMultihit(const P7_PROFILE *gm);
-extern int p7_profile_GetT(const P7_PROFILE *gm, char st1, int k1,
- char st2, int k2, float *ret_tsc);
-extern int p7_profile_Validate(const P7_PROFILE *gm, char *errbuf, float tol);
-extern int p7_profile_Compare(P7_PROFILE *gm1, P7_PROFILE *gm2, float tol);
-
-/* p7_spensemble.c */
-P7_SPENSEMBLE *p7_spensemble_Create(int init_n, int init_epc, int init_sigc);
-extern int p7_spensemble_Reuse(P7_SPENSEMBLE *sp);
-extern int p7_spensemble_Add(P7_SPENSEMBLE *sp, int sampleidx, int i, int j, int k, int m);
-extern int p7_spensemble_Cluster(P7_SPENSEMBLE *sp,
- float min_overlap, int of_smaller, int max_diagdiff,
- float min_posterior, float min_endpointp,
- int *ret_nclusters);
-extern int p7_spensemble_GetClusterCoords(P7_SPENSEMBLE *sp, int which,
- int *ret_i, int *ret_j, int *ret_k, int *ret_m, float *ret_p);
-extern void p7_spensemble_Destroy(P7_SPENSEMBLE *sp);
-
-/* p7_tophits.c */
-extern P7_TOPHITS *p7_tophits_Create(void);
-extern int p7_tophits_Grow(P7_TOPHITS *h);
-extern int p7_tophits_CreateNextHit(P7_TOPHITS *h, P7_HIT **ret_hit);
-extern int p7_tophits_Add(P7_TOPHITS *h,
- char *name, char *acc, char *desc,
- double sortkey,
- float score, double pvalue,
- float mothersc, double motherp,
- int sqfrom, int sqto, int sqlen,
- int hmmfrom, int hmmto, int hmmlen,
- int domidx, int ndom,
- P7_ALIDISPLAY *ali);
-extern int p7_tophits_Sort(P7_TOPHITS *h);
-extern int p7_tophits_Merge(P7_TOPHITS *h1, P7_TOPHITS *h2);
-extern int p7_tophits_GetMaxNameLength(P7_TOPHITS *h);
-extern int p7_tophits_GetMaxAccessionLength(P7_TOPHITS *h);
-extern int p7_tophits_GetMaxShownLength(P7_TOPHITS *h);
-extern void p7_tophits_Destroy(P7_TOPHITS *h);
-
-extern int p7_tophits_Threshold(P7_TOPHITS *th, P7_PIPELINE *pli);
-extern int p7_tophits_CompareRanking(P7_TOPHITS *th, ESL_KEYHASH *kh, int *opt_nnew);
-extern int p7_tophits_Targets(FILE *ofp, P7_TOPHITS *th, P7_PIPELINE *pli, int textw);
-extern int p7_tophits_Domains(FILE *ofp, P7_TOPHITS *th, P7_PIPELINE *pli, int textw);
-extern int p7_tophits_Alignment(const P7_TOPHITS *th, const ESL_ALPHABET *abc,
- ESL_SQ **inc_sqarr, P7_TRACE **inc_trarr, int inc_n, int optflags,
- ESL_MSA **ret_msa);
-// removed unused definitions
-
-
-/* p7_trace.c */
-extern P7_TRACE *p7_trace_Create(void);
-extern P7_TRACE *p7_trace_CreateWithPP(void);
-extern int p7_trace_Reuse(P7_TRACE *trr);
-extern int p7_trace_Grow(P7_TRACE *trr);
-extern int p7_trace_GrowIndex(P7_TRACE *trr);
-extern int p7_trace_GrowTo(P7_TRACE *trr, int N);
-extern int p7_trace_GrowIndexTo(P7_TRACE *trr, int ndom);
-extern void p7_trace_Destroy(P7_TRACE *trr);
-extern void p7_trace_DestroyArray(P7_TRACE **trr, int N);
-
-extern int p7_trace_GetDomainCount (const P7_TRACE *trr, int *ret_ndom);
-extern int p7_trace_GetStateUseCounts(const P7_TRACE *trr, int *counts);
-extern int p7_trace_GetDomainCoords (const P7_TRACE *trr, int which, int *ret_i1, int *ret_i2,
- int *ret_k1, int *ret_k2);
-
-extern int p7_trace_Validate(const P7_TRACE *trr, const ESL_ALPHABET *abc, const ESL_DSQ *dsq, char *errbuf);
-extern int p7_trace_Dump(FILE *fp, const P7_TRACE *trr, const P7_PROFILE *gm, const ESL_DSQ *dsq);
-extern int p7_trace_Compare(P7_TRACE *tr1, P7_TRACE *tr2, float pptol);
-extern int p7_trace_Score(P7_TRACE *trr, ESL_DSQ *dsq, P7_PROFILE *gm, float *ret_sc);
-extern int p7_trace_SetPP(P7_TRACE *trr, const P7_GMX *pp);
-extern float p7_trace_GetExpectedAccuracy(const P7_TRACE *trr);
-
-extern int p7_trace_Append(P7_TRACE *trr, char st, int k, int i);
-extern int p7_trace_AppendWithPP(P7_TRACE *trr, char st, int k, int i, float pp);
-extern int p7_trace_Reverse(P7_TRACE *trr);
-extern int p7_trace_Index(P7_TRACE *trr);
-
-extern int p7_trace_FauxFromMSA(ESL_MSA *msa, int *matassign, int optflags, P7_TRACE **trr);
-extern int p7_trace_Doctor(P7_TRACE *trr, int *opt_ndi, int *opt_nid);
-
-extern int p7_trace_Count(P7_HMM *hmm, ESL_DSQ *dsq, float wt, P7_TRACE *trr);
-
-
-
-
-
-
-/* seqmodel.c */
-extern int p7_Seqmodel(const ESL_ALPHABET *abc, ESL_DSQ *dsq, int M, char *name,
- ESL_DMATRIX *P, float *f, double popen, double pextend,
- P7_HMM **ret_hmm);
-
-#endif /*P7_HMMERH_INCLUDED*/
-
-/*****************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/hmmer3_funcs.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/hmmer3_funcs.cpp
deleted file mode 100644
index 23fa49b..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/hmmer3_funcs.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <limits>
-
-#include <QtCore/QtGlobal>
-
-#include "hmmer3_funcs.h"
-
-#ifdef _WINDOWS
-
-int isnan( float x ) {
- return x != x;
-}
-
-int isinf( float x ) {
- float inf = infinity();
- return inf == x || inf == -x;
-}
-
-#if (!defined(_MSC_VER) || _MSC_VER < 1800)
-float roundf( float x ){
- if( isnan( x ) || isinf( x ) ) {
- return x;
- }
- return (float)(x >= 0.0 ? (int)(x + 0.5) : (int)(x - (int)(x-1) + 0.5) + (int)(x-1));
-}
-#endif
-
-#endif // _WINDOWS
-
-float infinity() {
- return std::numeric_limits< float >::infinity();
-}
-
-bool isfin( float x ) {
- return !std::isnan( x ) && !std::isinf( x );
-}
-
-const char TERM_SYM = '\0';
-
-void throwUHMMER3Exception( const char* str ) {
- UHMMER3Exception ex;
- int strSz = qMin( (int)strlen( str ), EXCEPTION_MSG_SZ - 1 );
- strncpy( ex.msg, str, strSz );
- ex.msg[strSz] = TERM_SYM;
- throw ex;
-}
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/hmmer3_funcs.h b/src/plugins_3rdparty/hmm3/src/hmmer3/hmmer3_funcs.h
deleted file mode 100644
index bfff9bc..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/hmmer3_funcs.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef _UHMM3_FUNCS_H_
-#define _UHMM3_FUNCS_H_
-
-#ifdef _WINDOWS
-
-#define snprintf sprintf_s
-
-extern int isnan( float x );
-extern int isinf( float x );
-
-#if (!defined(_MSC_VER) || _MSC_VER < 1800)
-extern float roundf( float x );
-#endif
-
-#else // if not Windows -> we have all functions and defines in math.h
-#include <math.h>
-#endif // _WINDOWS
-#include <cmath>
-
-extern float infinity();
-extern bool isfin( float x ); // sure that number is finite
-
-#define EXCEPTION_MSG_SZ 512
-struct UHMMER3Exception {
- char msg[EXCEPTION_MSG_SZ];
-
-}; // UHMMER3Exception
-
-extern void throwUHMMER3Exception( const char* str );
-
-#endif // _UHMM3_FUNCS_H_
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/decoding.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/decoding.cpp
deleted file mode 100644
index 6032970..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/decoding.cpp
+++ /dev/null
@@ -1,204 +0,0 @@
-/* Posterior decoding algorithms; SSE versions.
-*
-* Contents:
-* 1. Posterior decoding algorithms.
-* 6. Copyright and license information.
-*
-* SRE, Mon Aug 18 08:15:50 2008 [Janelia]
- * SVN $Id: decoding.c 3048 2009-11-13 14:11:46Z eddys $
-*/
-
-#include <hmmer3/p7_config.h>
-
-#include <stdio.h>
-#include <math.h>
-
-#include <xmmintrin.h> /* SSE */
-#include <emmintrin.h> /* SSE2 */
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_sse.h>
-
-#include <hmmer3/hmmer.h>
-#include "impl_sse.h"
-
-/*****************************************************************
-* 1. Posterior decoding algorithms.
-*****************************************************************/
-
-/* Function: p7_Decoding()
-* Synopsis: Posterior decoding of residue assignment.
-* Incept: SRE, Fri Aug 8 14:29:42 2008 [UA217 to SFO]
-*
-* Purpose: Identical to <p7_GDecoding()> except that <om>, <oxf>,
-* <oxb> are SSE optimized versions. See <p7_GDecoding()>
-* documentation for more info.
-*
-* Args: om - profile (must be the same that was used to fill <oxf>, <oxb>).
-* oxf - filled Forward matrix
-* oxb - filled Backward matrix
-* pp - RESULT: posterior decoding matrix.
-*
-* Returns: <eslOK> on success.
-*
-* Returns <eslERANGE> if numeric range of floating-point
-* values is exceeded during posterior probability
-* calculations. In this case, the <pp> matrix must not be
-* used by the caller; it will contain <NaN> values. To be
-* safe, the caller should recalculate a generic posterior
-* decoding matrix instead -- generic calculations are done
-* in log probability space and are robust.
-*
-* However, I currently believe that this overflow only
-* occurs on an unusual and ignorable situation: when a
-* <p7_UNILOCAL> model is used on a region that contains
-* two or more high scoring distinct alignments to the
-* model. And that only happens if domain definition fails,
-* after stochastic clustering, and an envelope that we
-* pass to p7_domaindef.c::rescore_isolated_domain()
-* erroneously contains 2+ distinct domains. (Note that
-* this is different from having 2+ expected B states: that
-* can happen normally, if a single consistent domain is
-* better described by 2+ passes through the model). And I
-* strongly believe all this only can happen on repetitive
-* or biased-composition junk that we want to ignore anyway.
-* Therefore the caller should be safe in ignoring any domain
-* for which <p7_Decoding()> returns <eslERANGE>.
-*
- * Exception (bug #h68): see hmmalign.c, where the model is
- * in unilocal mode, and it is entirely possible for the
- * user to give us a multidomain protein.
- *
-* Throws: (no abnormal error conditions)
-*
-* Xref: [J3/119-121]: for analysis of numeric range issues when
-* <scaleproduct> overflows.
-*/
-int
-p7_Decoding(const P7_OPROFILE *om, const P7_OMX *oxf, P7_OMX *oxb, P7_OMX *pp)
-{
- __m128 *ppv;
- __m128 *fv;
- __m128 *bv;
- __m128 totrv;
- int L = oxf->L;
- int M = om->M;
- int Q = p7O_NQF(M);
- int i,q;
- float scaleproduct = 1.0 / oxb->xmx[p7X_N];
-
- pp->M = M;
- pp->L = L;
-
- ppv = pp->dpf[0];
- for (q = 0; q < Q; q++) {
- *ppv = _mm_setzero_ps(); ppv++;
- *ppv = _mm_setzero_ps(); ppv++;
- *ppv = _mm_setzero_ps(); ppv++;
- }
- pp->xmx[p7X_E] = 0.0;
- pp->xmx[p7X_N] = 0.0;
- pp->xmx[p7X_J] = 0.0;
- pp->xmx[p7X_C] = 0.0;
- pp->xmx[p7X_B] = 0.0;
-
- for (i = 1; i <= L; i++)
- {
- ppv = pp->dpf[i];
- fv = oxf->dpf[i];
- bv = oxb->dpf[i];
- totrv = _mm_set1_ps(scaleproduct * oxf->xmx[i*p7X_NXCELLS+p7X_SCALE]);
-
- for (q = 0; q < Q; q++)
- {
- /* M */
- *ppv = _mm_mul_ps(*fv, *bv);
- *ppv = _mm_mul_ps(*ppv, totrv);
- ppv++; fv++; bv++;
-
- /* D */
- *ppv = _mm_setzero_ps();
- ppv++; fv++; bv++;
-
- /* I */
- *ppv = _mm_mul_ps(*fv, *bv);
- *ppv = _mm_mul_ps(*ppv, totrv);
- ppv++; fv++; bv++;
- }
- pp->xmx[i*p7X_NXCELLS+p7X_E] = 0.0;
- pp->xmx[i*p7X_NXCELLS+p7X_N] = oxf->xmx[(i-1)*p7X_NXCELLS+p7X_N] * oxb->xmx[i*p7X_NXCELLS+p7X_N] * om->xf[p7O_N][p7O_LOOP] * scaleproduct;
- pp->xmx[i*p7X_NXCELLS+p7X_J] = oxf->xmx[(i-1)*p7X_NXCELLS+p7X_J] * oxb->xmx[i*p7X_NXCELLS+p7X_J] * om->xf[p7O_J][p7O_LOOP] * scaleproduct;
- pp->xmx[i*p7X_NXCELLS+p7X_C] = oxf->xmx[(i-1)*p7X_NXCELLS+p7X_C] * oxb->xmx[i*p7X_NXCELLS+p7X_C] * om->xf[p7O_C][p7O_LOOP] * scaleproduct;
- pp->xmx[i*p7X_NXCELLS+p7X_B] = 0.0;
-
- if (oxb->has_own_scales) scaleproduct *= oxf->xmx[i*p7X_NXCELLS+p7X_SCALE] / oxb->xmx[i*p7X_NXCELLS+p7X_SCALE];
- }
-
- if (std::isinf(scaleproduct)) return eslERANGE;
- else return eslOK;
-}
-
-/* Function: p7_DomainDecoding()
-* Synopsis: Posterior decoding of domain location.
-* Incept: SRE, Tue Aug 5 08:39:07 2008 [Janelia]
-*
-* Purpose: Identical to <p7_GDomainDecoding()> except that <om>, <oxf>,
-* <oxb> are SSE optimized versions. See <p7_GDomainDecoding()>
-* documentation for more info.
-*
-* Args: gm - profile
-* oxf - filled Forward matrix
-* oxb - filled Backward matrix
-* ddef - container for the results.
-*
-* Returns: <eslOK> on success.
-*
-* <eslERANGE> on numeric overflow. See commentary in
-* <p7_Decoding()>.
-*
-* Throws: (no abnormal error conditions)
-*/
-int
-p7_DomainDecoding(const P7_OPROFILE *om, const P7_OMX *oxf, const P7_OMX *oxb, P7_DOMAINDEF *ddef)
-{
- int L = oxf->L;
- float scaleproduct = 1.0 / oxb->xmx[p7X_N];
- float njcp;
- int i;
-
- ddef->btot[0] = 0.0;
- ddef->etot[0] = 0.0;
- ddef->mocc[0] = 0.0;
- for (i = 1; i <= L; i++)
- {
- /* scaleproduct is prod_j=0^i-2 now */
- ddef->btot[i] = ddef->btot[i-1] +
- (oxf->xmx[(i-1)*p7X_NXCELLS+p7X_B] * oxb->xmx[(i-1)*p7X_NXCELLS+p7X_B] * oxf->xmx[(i-1)*p7X_NXCELLS+p7X_SCALE] * scaleproduct);
-
- if (oxb->has_own_scales) scaleproduct *= oxf->xmx[(i-1)*p7X_NXCELLS+p7X_SCALE] / oxb->xmx[(i-1)*p7X_NXCELLS+p7X_SCALE];
- /* scaleproduct is prod_j=0^i-1 now */
-
- ddef->etot[i] = ddef->etot[i-1] +
- (oxf->xmx[i*p7X_NXCELLS+p7X_E] * oxb->xmx[i*p7X_NXCELLS+p7X_E] * oxf->xmx[i*p7X_NXCELLS+p7X_SCALE] * scaleproduct);
-
- njcp = oxf->xmx[(i-1)*p7X_NXCELLS+p7X_N] * oxb->xmx[i*p7X_NXCELLS+p7X_N] * om->xf[p7O_N][p7O_LOOP] * scaleproduct;
- njcp += oxf->xmx[(i-1)*p7X_NXCELLS+p7X_J] * oxb->xmx[i*p7X_NXCELLS+p7X_J] * om->xf[p7O_J][p7O_LOOP] * scaleproduct;
- njcp += oxf->xmx[(i-1)*p7X_NXCELLS+p7X_C] * oxb->xmx[i*p7X_NXCELLS+p7X_C] * om->xf[p7O_C][p7O_LOOP] * scaleproduct;
- ddef->mocc[i] = 1. - njcp;
- }
- ddef->L = oxf->L;
-
- if (std::isinf(scaleproduct)) return eslERANGE;
- else return eslOK;
-}
-/*------------------ end, posterior decoding --------------------*/
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/fwdback.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/fwdback.cpp
deleted file mode 100644
index 413a6fe..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/fwdback.cpp
+++ /dev/null
@@ -1,762 +0,0 @@
-/* SSE implementation of Forward and Backward algorithms.
-*
-* Both profile and DP matrix are striped and interleaved, for fast
-* SIMD vector operations. Calculations are in probability space
-* (scaled odds ratios, actually) rather than log probabilities,
-* allowing fast multiply/add operations rather than slow Logsum()
-* calculations. Sparse rescaling is used to achieve full dynamic
-* range of scores.
-*
-* The Forward and Backward implementations may be used either in a
-* full O(ML) mode that keeps an entire DP matrix, or in a O(M+L)
-* linear memory "parsing" mode that only keeps one row of memory for
-* the main MDI states and one column 0..L for the special states
-* B,E,N,C,J. Keeping a full matrix allows subsequent stochastic
-* traceback or posterior decoding of any state. In parsing mode, we
-* can do posterior decoding on the special states and determine
-* regions of the target sequence that are generated by the
-* nonhomology states (NCJ) versus not -- thus, identifying where
-* high-probability "regions" are, the first step of identifying the
-* domain structure of a target sequence.
-*
-* Contents:
-* 1. Forward/Backward wrapper API
-* 2. Forward and Backward engine implementations
-* 8. Copyright and license information.
-*
-* SRE, Thu Jul 31 08:43:20 2008 [Janelia]
- * SVN $Id: fwdback.c 3018 2009-10-29 17:33:06Z farrarm $
-*/
-
-#include <hmmer3/p7_config.h>
-
-#include <stdio.h>
-#include <math.h>
-
-#include <xmmintrin.h> /* SSE */
-#include <emmintrin.h> /* SSE2 */
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_sse.h>
-
-#include <hmmer3/hmmer.h>
-
-#include <hmmer3/hmmer3_funcs.h>
-
-#include "impl_sse.h"
-
-// ! added percentBorder for function and taskStateInfo - to control cancel and progress
-static int forward_engine (int do_full, const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *fwd, float *opt_sc,
- int percentBorder, U2::TaskStateInfo & ti );
-static int backward_engine(int do_full, const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, const P7_OMX *fwd, P7_OMX *bck, float *opt_sc,
- int percentBorder, U2::TaskStateInfo & ti );
-
-
-/*****************************************************************
-* 1. Forward/Backward API.
-*****************************************************************/
-
-/* Function: p7_Forward()
-* Synopsis: The Forward algorithm, full matrix fill version.
-* Incept: SRE, Fri Aug 15 18:59:43 2008 [Casa de Gatos]
-*
-* Purpose: Calculates the Forward algorithm for sequence <dsq> of
-* length <L> residues, using optimized profile <om>, and a
-* preallocated DP matrix <ox>. Upon successful return, <ox>
-* contains the filled Forward matrix, and <*opt_sc>
-* optionally contains the raw Forward score in nats.
-*
-* This calculation requires $O(ML)$ memory and time.
-* The caller must provide a suitably allocated full <ox>
-* by calling <ox = p7_omx_Create(M, L, L)> or
-* <p7_omx_GrowTo(ox, M, L, L)>.
-*
-* The model <om> must be configured in local alignment
-* mode. The sparse rescaling method used to keep
-* probability values within single-precision floating
-* point dynamic range cannot be safely applied to models in
-* glocal or global mode.
-*
-* Args: dsq - digital target sequence, 1..L
-* L - length of dsq in residues
-* om - optimized profile
-* ox - RETURN: Forward DP matrix
-* opt_sc - RETURN: Forward score (in nats)
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEINVAL> if <ox> allocation is too small, or if the profile
-* isn't in local alignment mode.
-* <eslERANGE> if the score exceeds the limited range of
-* a probability-space odds ratio.
-* In either case, <*opt_sc> is undefined.
-*/
-int
-p7_Forward(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *opt_sc, int percentBorder, U2::TaskStateInfo & ti)
-{
-#ifdef p7_DEBUGGING
- if (om->M > ox->allocQ4*4) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few columns)");
- if (L >= ox->validR) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few MDI rows)");
- if (L >= ox->allocXR) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few X rows)");
- if (! p7_oprofile_IsLocal(om)) ESL_EXCEPTION(eslEINVAL, "Forward implementation makes assumptions that only work for local alignment");
-#endif
-
- return forward_engine(TRUE, dsq, L, om, ox, opt_sc, percentBorder, ti );
-}
-
-/* Function: p7_ForwardParser()
-* Synopsis: The Forward algorithm, linear memory parsing version.
-* Incept: SRE, Fri Aug 15 19:05:26 2008 [Casa de Gatos]
-*
-* Purpose: Same as <p7_Forward() except that the full matrix isn't
-* kept. Instead, a linear $O(M+L)$ memory algorithm is
-* used, keeping only the DP matrix values for the special
-* (BENCJ) states. These are sufficient to do posterior
-* decoding to identify high-probability regions where
-* domains are.
-*
-* The caller must provide a suitably allocated "parsing"
-* <ox> by calling <ox = p7_omx_Create(M, 0, L)> or
-* <p7_omx_GrowTo(ox, M, 0, L)>.
-*
-* Args: dsq - digital target sequence, 1..L
-* L - length of dsq in residues
-* om - optimized profile
-* ox - RETURN: Forward DP matrix
-* ret_sc - RETURN: Forward score (in nats)
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEINVAL> if <ox> allocation is too small, or if the profile
-* isn't in local alignment mode.
-* <eslERANGE> if the score exceeds the limited range of
-* a probability-space odds ratio.
-* In either case, <*opt_sc> is undefined.
-*/
-int
-p7_ForwardParser(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *opt_sc, int percentBorder, U2::TaskStateInfo & ti )
-{
-#ifdef p7_DEBUGGING
- if (om->M > ox->allocQ4*4) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few columns)");
- if (ox->validR < 1) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few MDI rows)");
- if (L >= ox->allocXR) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few X rows)");
- if (! p7_oprofile_IsLocal(om)) ESL_EXCEPTION(eslEINVAL, "Forward implementation makes assumptions that only work for local alignment");
-#endif
-
- return forward_engine(FALSE, dsq, L, om, ox, opt_sc, percentBorder, ti );
-}
-
-
-
-/* Function: p7_Backward()
-* Synopsis: The Backward algorithm; full matrix fill version.
-* Incept: SRE, Sat Aug 16 08:34:22 2008 [Janelia]
-*
-* Purpose: Calculates the Backward algorithm for sequence <dsq> of
-* length <L> residues, using optimized profile <om>, and a
-* preallocated DP matrix <bck>. A filled Forward matrix
-* must also be provided in <fwd>, because we need to use
-* the same sparse scaling factors that Forward used. The
-* <bck> matrix is filled in, and the Backward score (in
-* nats) is optionally returned in <opt_sc>.
-*
-* This calculation requires $O(ML)$ memory and time. The
-* caller must provide a suitably allocated full <bck> by
-* calling <bck = p7_omx_Create(M, L, L)> or
-* <p7_omx_GrowTo(bck, M, L, L)>.
-*
-* Because only the sparse scaling factors are needed from
-* the <fwd> matrix, the caller may have this matrix
-* calculated either in full or parsing mode.
-*
-* The model <om> must be configured in local alignment
-* mode. The sparse rescaling method used to keep
-* probability values within single-precision floating
-* point dynamic range cannot be safely applied to models in
-* glocal or global mode.
-*
-* Args: dsq - digital target sequence, 1..L
-* L - length of dsq in residues
-* om - optimized profile
-* fwd - filled Forward DP matrix, for scale factors
-* do_full - TRUE=full matrix; FALSE=linear memory parse mode
-* bck - RETURN: filled Backward matrix
-* opt_sc - optRETURN: Backward score (in nats)
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEINVAL> if <ox> allocation is too small, or if the profile
-* isn't in local alignment mode.
-* <eslERANGE> if the score exceeds the limited range of
-* a probability-space odds ratio.
-* In either case, <*opt_sc> is undefined.
-*/
-int
-p7_Backward(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, const P7_OMX *fwd, P7_OMX *bck, float *opt_sc, int percentBorder, U2::TaskStateInfo & ti)
-{
-#ifdef p7_DEBUGGING
- if (om->M > bck->allocQ4*4) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few columns)");
- if (L >= bck->validR) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few MDI rows)");
- if (L >= bck->allocXR) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few X rows)");
- if (L != fwd->L) ESL_EXCEPTION(eslEINVAL, "fwd matrix size doesn't agree with length L");
- if (! p7_oprofile_IsLocal(om)) ESL_EXCEPTION(eslEINVAL, "Forward implementation makes assumptions that only work for local alignment");
-#endif
-
- return backward_engine(TRUE, dsq, L, om, fwd, bck, opt_sc, percentBorder, ti );
-}
-
-
-
-/* Function: p7_BackwardParser()
-* Synopsis: The Backward algorithm, linear memory parsing version.
-* Incept: SRE, Sat Aug 16 08:34:13 2008 [Janelia]
-*
-* Purpose: Same as <p7_Backward()> except that the full matrix isn't
-* kept. Instead, a linear $O(M+L)$ memory algorithm is
-* used, keeping only the DP matrix values for the special
-* (BENCJ) states. These are sufficient to do posterior
-* decoding to identify high-probability regions where
-* domains are.
-*
-* The caller must provide a suitably allocated "parsing"
-* <bck> by calling <bck = p7_omx_Create(M, 0, L)> or
-* <p7_omx_GrowTo(bck, M, 0, L)>.
-*
-* Args: dsq - digital target sequence, 1..L
-* L - length of dsq in residues
-* om - optimized profile
-* fwd - filled Forward DP matrix, for scale factors
-* bck - RETURN: filled Backward matrix
-* opt_sc - optRETURN: Backward score (in nats)
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEINVAL> if <ox> allocation is too small, or if the profile
-* isn't in local alignment mode.
-* <eslERANGE> if the score exceeds the limited range of
-* a probability-space odds ratio.
-* In either case, <*opt_sc> is undefined.
-*/
-int
-p7_BackwardParser(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, const P7_OMX *fwd, P7_OMX *bck, float *opt_sc, int percentBorder, U2::TaskStateInfo & ti)
-{
-#ifdef p7_DEBUGGING
- if (om->M > bck->allocQ4*4) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few columns)");
- if (bck->validR < 1) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few MDI rows)");
- if (L >= bck->allocXR) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small (too few X rows)");
- if (L != fwd->L) ESL_EXCEPTION(eslEINVAL, "fwd matrix size doesn't agree with length L");
- if (! p7_oprofile_IsLocal(om)) ESL_EXCEPTION(eslEINVAL, "Forward implementation makes assumptions that only work for local alignment");
-#endif
-
- return backward_engine(FALSE, dsq, L, om, fwd, bck, opt_sc, percentBorder, ti );
-}
-
-
-
-/*****************************************************************
-* 2. Forward/Backward engine implementations (called thru API)
-*****************************************************************/
-
-static int
-forward_engine(int do_full, const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *opt_sc, int percentBorder, U2::TaskStateInfo & ti )
-{
- register __m128 mpv, dpv, ipv; /* previous row values */
- register __m128 sv; /* temp storage of 1 curr row value in progress */
- register __m128 dcv; /* delayed storage of D(i,q+1) */
- register __m128 xEv; /* E state: keeps max for Mk->E as we go */
- register __m128 xBv; /* B state: splatted vector of B[i-1] for B->Mk calculations */
- __m128 zerov; /* splatted 0.0's in a vector */
- float xN, xE, xB, xC, xJ; /* special states' scores */
- int i; /* counter over sequence positions 1..L */
- int q; /* counter over quads 0..nq-1 */
- int j; /* counter over DD iterations (4 is full serialization) */
- int Q = p7O_NQF(om->M); /* segment length: # of vectors */
- __m128 *dpc = ox->dpf[0]; /* current row, for use in {MDI}MO(dpp,q) access macro */
- __m128 *dpp; /* previous row, for use in {MDI}MO(dpp,q) access macro */
- __m128 *rp; /* will point at om->rfv[x] for residue x[i] */
- __m128 *tp; /* will point into (and step thru) om->tfv */
-
- /* Initialization. */
- ox->M = om->M;
- ox->L = L;
- ox->has_own_scales = TRUE; /* all forward matrices control their own scalefactors */
- zerov = _mm_setzero_ps();
- for (q = 0; q < Q; q++)
- MMO(dpc,q) = IMO(dpc,q) = DMO(dpc,q) = zerov;
- xE = ox->xmx[p7X_E] = 0.;
- xN = ox->xmx[p7X_N] = 1.;
- xJ = ox->xmx[p7X_J] = 0.;
- xB = ox->xmx[p7X_B] = om->xf[p7O_N][p7O_MOVE];
- xC = ox->xmx[p7X_C] = 0.;
-
- ox->xmx[p7X_SCALE] = 1.0;
- ox->totscale = 0.0;
-
-#if p7_DEBUGGING
- if (ox->debugging) p7_omx_DumpFBRow(ox, TRUE, 0, 9, 5, xE, xN, xJ, xB, xC); /* logify=TRUE, <rowi>=0, width=8, precision=5*/
-#endif
-
- // ! ADDED CODE !
- int progressStart = ti.progress;
- for (i = 1; i <= L; i++)
- {
- // ! ADDED CODE !
- ti.progress = progressStart + (int)(((double)percentBorder / L) * i);
- if( ti.cancelFlag ){ return eslCANCELED; }
-
- dpp = dpc;
- dpc = ox->dpf[do_full * i]; /* avoid conditional, use do_full as kronecker delta */
- rp = om->rfv[dsq[i]];
- tp = om->tfv;
- dcv = _mm_setzero_ps();
- xEv = _mm_setzero_ps();
- xBv = _mm_set1_ps(xB);
-
- /* Right shifts by 4 bytes. 4,8,12,x becomes x,4,8,12. Shift zeros on. */
- mpv = esl_sse_rightshift_ps(MMO(dpp,Q-1), zerov);
- dpv = esl_sse_rightshift_ps(DMO(dpp,Q-1), zerov);
- ipv = esl_sse_rightshift_ps(IMO(dpp,Q-1), zerov);
-
- for (q = 0; q < Q; q++)
- {
- /* Calculate new MMO(i,q); don't store it yet, hold it in sv. */
- sv = _mm_mul_ps(xBv, *tp); tp++;
- sv = _mm_add_ps(sv, _mm_mul_ps(mpv, *tp)); tp++;
- sv = _mm_add_ps(sv, _mm_mul_ps(ipv, *tp)); tp++;
- sv = _mm_add_ps(sv, _mm_mul_ps(dpv, *tp)); tp++;
- sv = _mm_mul_ps(sv, *rp); rp++;
- xEv = _mm_add_ps(xEv, sv);
-
- /* Load {MDI}(i-1,q) into mpv, dpv, ipv;
- * {MDI}MX(q) is then the current, not the prev row
- */
- mpv = MMO(dpp,q);
- dpv = DMO(dpp,q);
- ipv = IMO(dpp,q);
-
- /* Do the delayed stores of {MD}(i,q) now that memory is usable */
- MMO(dpc,q) = sv;
- DMO(dpc,q) = dcv;
-
- /* Calculate the next D(i,q+1) partially: M->D only;
- * delay storage, holding it in dcv
- */
- dcv = _mm_mul_ps(sv, *tp); tp++;
-
- /* Calculate and store I(i,q); assumes odds ratio for emission is 1.0 */
- sv = _mm_mul_ps(mpv, *tp); tp++;
- IMO(dpc,q) = _mm_add_ps(sv, _mm_mul_ps(ipv, *tp)); tp++;
- }
-
- /* Now the DD paths. We would rather not serialize them but
- * in an accurate Forward calculation, we have few options.
- */
- /* dcv has carried through from end of q loop above; store it
- * in first pass, we add M->D and D->D path into DMX
- */
- /* We're almost certainly're obligated to do at least one complete
- * DD path to be sure:
- */
- dcv = esl_sse_rightshift_ps(dcv, zerov);
- DMO(dpc,0) = zerov;
- tp = om->tfv + 7*Q; /* set tp to start of the DD's */
- for (q = 0; q < Q; q++)
- {
- DMO(dpc,q) = _mm_add_ps(dcv, DMO(dpc,q));
- dcv = _mm_mul_ps(DMO(dpc,q), *tp); tp++; /* extend DMO(q), so we include M->D and D->D paths */
- }
-
- /* now. on small models, it seems best (empirically) to just go
- * ahead and serialize. on large models, we can do a bit better,
- * by testing for when dcv (DD path) accrued to DMO(q) is below
- * machine epsilon for all q, in which case we know DMO(q) are all
- * at their final values. The tradeoff point is (empirically) somewhere around M=100,
- * at least on my desktop. We don't worry about the conditional here;
- * it's outside any inner loops.
- */
- if (om->M < 100)
- { /* Fully serialized version */
- for (j = 1; j < 4; j++)
- {
- dcv = esl_sse_rightshift_ps(dcv, zerov);
- tp = om->tfv + 7*Q; /* set tp to start of the DD's */
- for (q = 0; q < Q; q++)
- { /* note, extend dcv, not DMO(q); only adding DD paths now */
- DMO(dpc,q) = _mm_add_ps(dcv, DMO(dpc,q));
- dcv = _mm_mul_ps(dcv, *tp); tp++;
- }
- }
- }
- else
- { /* Slightly parallelized version, but which incurs some overhead */
- for (j = 1; j < 4; j++)
- {
- register __m128 cv; /* keeps track of whether any DD's change DMO(q) */
-
- dcv = esl_sse_rightshift_ps(dcv, zerov);
- tp = om->tfv + 7*Q; /* set tp to start of the DD's */
- cv = zerov;
- for (q = 0; q < Q; q++)
- { /* using cmpgt below tests if DD changed any DMO(q) *without* conditional branch */
- sv = _mm_add_ps(dcv, DMO(dpc,q));
- cv = _mm_or_ps(cv, _mm_cmpgt_ps(sv, DMO(dpc,q)));
- DMO(dpc,q) = sv; /* store new DMO(q) */
- dcv = _mm_mul_ps(dcv, *tp); tp++; /* note, extend dcv, not DMO(q) */
- }
- if (! _mm_movemask_ps(cv)) break; /* DD's didn't change any DMO(q)? Then done, break out. */
- }
- }
-
- /* Add D's to xEv */
- for (q = 0; q < Q; q++) xEv = _mm_add_ps(DMO(dpc,q), xEv);
-
- /* Finally the "special" states, which start from Mk->E (->C, ->J->B) */
- /* The following incantation is a horizontal sum of xEv's elements */
- /* These must follow DD calculations, because D's contribute to E in Forward
- * (as opposed to Viterbi)
- */
- xEv = _mm_add_ps(xEv, _mm_shuffle_ps(xEv, xEv, _MM_SHUFFLE(0, 3, 2, 1)));
- xEv = _mm_add_ps(xEv, _mm_shuffle_ps(xEv, xEv, _MM_SHUFFLE(1, 0, 3, 2)));
- _mm_store_ss(&xE, xEv);
-
- xN = xN * om->xf[p7O_N][p7O_LOOP];
- xC = (xC * om->xf[p7O_C][p7O_LOOP]) + (xE * om->xf[p7O_E][p7O_MOVE]);
- xJ = (xJ * om->xf[p7O_J][p7O_LOOP]) + (xE * om->xf[p7O_E][p7O_LOOP]);
- xB = (xJ * om->xf[p7O_J][p7O_MOVE]) + (xN * om->xf[p7O_N][p7O_MOVE]);
- /* and now xB will carry over into next i, and xC carries over after i=L */
-
- /* Sparse rescaling. xE above threshold? trigger a rescaling event. */
- if (xE > 1.0e4) /* that's a little less than e^10, ~10% of our dynamic range */
- {
- xN = xN / xE;
- xC = xC / xE;
- xJ = xJ / xE;
- xB = xB / xE;
- xEv = _mm_set1_ps(1.0 / xE);
- for (q = 0; q < Q; q++)
- {
- MMO(dpc,q) = _mm_mul_ps(MMO(dpc,q), xEv);
- DMO(dpc,q) = _mm_mul_ps(DMO(dpc,q), xEv);
- IMO(dpc,q) = _mm_mul_ps(IMO(dpc,q), xEv);
- }
- ox->xmx[i*p7X_NXCELLS+p7X_SCALE] = xE;
- ox->totscale += log((double)xE);
- xE = 1.0;
- }
- else ox->xmx[i*p7X_NXCELLS+p7X_SCALE] = 1.0;
-
- /* Storage of the specials. We could've stored these already
- * but using xE, etc. variables makes it easy to convert this
- * code to O(M) memory versions just by deleting storage steps.
- */
- ox->xmx[i*p7X_NXCELLS+p7X_E] = xE;
- ox->xmx[i*p7X_NXCELLS+p7X_N] = xN;
- ox->xmx[i*p7X_NXCELLS+p7X_J] = xJ;
- ox->xmx[i*p7X_NXCELLS+p7X_B] = xB;
- ox->xmx[i*p7X_NXCELLS+p7X_C] = xC;
-
-#if p7_DEBUGGING
- if (ox->debugging) p7_omx_DumpFBRow(ox, TRUE, i, 9, 5, xE, xN, xJ, xB, xC); /* logify=TRUE, <rowi>=i, width=8, precision=5*/
-#endif
- } /* end loop over sequence residues 1..L */
-
- /* finally C->T, and flip total score back to log space (nats) */
- /* On overflow, xC is inf or nan (nan arises because inf*0 = nan). */
- /* On an underflow (which shouldn't happen), we counterintuitively return infinity:
- * the effect of this is to force the caller to rescore us with full range.
- */
- if (std::isnan(xC)) ESL_EXCEPTION(eslERANGE, "forward score is NaN");
- else if (L>0 && xC == 0.0) ESL_EXCEPTION(eslERANGE, "forward score underflow (is 0.0)");
- else if (std::isinf(xC) == 1) ESL_EXCEPTION(eslERANGE, "forward score overflow (is infinity)");
-
- if (opt_sc != NULL) *opt_sc = ox->totscale + log((double)(xC * om->xf[p7O_C][p7O_MOVE]));
- return eslOK;
-}
-
-
-
-static int
-backward_engine(int do_full, const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, const P7_OMX *fwd, P7_OMX *bck, float *opt_sc,
- int percentBorder, U2::TaskStateInfo & ti)
-{
- register __m128 mpv, ipv, dpv; /* previous row values */
- register __m128 mcv, dcv; /* current row values */
- register __m128 tmmv, timv, tdmv; /* tmp vars for accessing rotated transition scores */
- register __m128 xBv; /* collects B->Mk components of B(i) */
- register __m128 xEv; /* splatted E(i) */
- __m128 zerov; /* splatted 0.0's in a vector */
- float xN, xE, xB, xC, xJ; /* special states' scores */
- int i; /* counter over sequence positions 0,1..L */
- int q; /* counter over quads 0..Q-1 */
- int Q = p7O_NQF(om->M); /* segment length: # of vectors */
- int j; /* DD segment iteration counter (4 = full serialization) */
- __m128 *dpc; /* current DP row */
- __m128 *dpp; /* next ("previous") DP row */
- __m128 *rp; /* will point into om->rfv[x] for residue x[i+1] */
- __m128 *tp; /* will point into (and step thru) om->tfv transition scores */
-
- /* initialize the L row. */
- bck->M = om->M;
- bck->L = L;
- bck->has_own_scales = FALSE; /* backwards scale factors are *usually* given by <fwd> */
- dpc = bck->dpf[L * do_full];
- xJ = 0.0;
- xB = 0.0;
- xN = 0.0;
- xC = om->xf[p7O_C][p7O_MOVE]; /* C<-T */
- xE = xC * om->xf[p7O_E][p7O_MOVE]; /* E<-C, no tail */
- xEv = _mm_set1_ps(xE);
- zerov = _mm_setzero_ps();
- dcv = zerov; /* solely to silence a compiler warning */
- for (q = 0; q < Q; q++) MMO(dpc,q) = DMO(dpc,q) = xEv;
- for (q = 0; q < Q; q++) IMO(dpc,q) = zerov;
-
- /* init row L's DD paths, 1) first segment includes xE, from DMO(q) */
- tp = om->tfv + 8*Q - 1; /* <*tp> now the [4 8 12 x] TDD quad */
- dpv = _mm_move_ss(DMO(dpc,Q-1), zerov); /* start leftshift: [1 5 9 13] -> [x 5 9 13] */
- dpv = _mm_shuffle_ps(dpv, dpv, _MM_SHUFFLE(0,3,2,1)); /* finish leftshift:[x 5 9 13] -> [5 9 13 x] */
- for (q = Q-1; q >= 0; q--)
- {
- dcv = _mm_mul_ps(dpv, *tp); tp--;
- DMO(dpc,q) = _mm_add_ps(DMO(dpc,q), dcv);
- dpv = DMO(dpc,q);
- }
- /* 2) three more passes, only extending DD component (dcv only; no xE contrib from DMO(q)) */
- for (j = 1; j < 4; j++)
- {
- tp = om->tfv + 8*Q - 1; /* <*tp> now the [4 8 12 x] TDD quad */
- dcv = _mm_move_ss(dcv, zerov); /* start leftshift: [1 5 9 13] -> [x 5 9 13] */
- dcv = _mm_shuffle_ps(dcv, dcv, _MM_SHUFFLE(0,3,2,1)); /* finish leftshift:[x 5 9 13] -> [5 9 13 x] */
- for (q = Q-1; q >= 0; q--)
- {
- dcv = _mm_mul_ps(dcv, *tp); tp--;
- DMO(dpc,q) = _mm_add_ps(DMO(dpc,q), dcv);
- }
- }
- /* now MD init */
- tp = om->tfv + 7*Q - 3; /* <*tp> now the [4 8 12 x] Mk->Dk+1 quad */
- dcv = _mm_move_ss(DMO(dpc,0), zerov); /* start leftshift: [1 5 9 13] -> [x 5 9 13] */
- dcv = _mm_shuffle_ps(dcv, dcv, _MM_SHUFFLE(0,3,2,1)); /* finish leftshift:[x 5 9 13] -> [5 9 13 x] */
- for (q = Q-1; q >= 0; q--)
- {
- MMO(dpc,q) = _mm_add_ps(MMO(dpc,q), _mm_mul_ps(dcv, *tp)); tp -= 7;
- dcv = DMO(dpc,q);
- }
-
- /* Sparse rescaling: same scale factors as fwd matrix */
- if (fwd->xmx[L*p7X_NXCELLS+p7X_SCALE] > 1.0)
- {
- xE = xE / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE];
- xN = xN / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE];
- xC = xC / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE];
- xJ = xJ / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE];
- xB = xB / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE];
- xEv = _mm_set1_ps(1.0 / fwd->xmx[L*p7X_NXCELLS+p7X_SCALE]);
- for (q = 0; q < Q; q++) {
- MMO(dpc,q) = _mm_mul_ps(MMO(dpc,q), xEv);
- DMO(dpc,q) = _mm_mul_ps(DMO(dpc,q), xEv);
- IMO(dpc,q) = _mm_mul_ps(IMO(dpc,q), xEv);
- }
- }
- bck->xmx[L*p7X_NXCELLS+p7X_SCALE] = fwd->xmx[L*p7X_NXCELLS+p7X_SCALE];
- bck->totscale = log((double)bck->xmx[L*p7X_NXCELLS+p7X_SCALE]);
-
- /* Stores */
- bck->xmx[L*p7X_NXCELLS+p7X_E] = xE;
- bck->xmx[L*p7X_NXCELLS+p7X_N] = xN;
- bck->xmx[L*p7X_NXCELLS+p7X_J] = xJ;
- bck->xmx[L*p7X_NXCELLS+p7X_B] = xB;
- bck->xmx[L*p7X_NXCELLS+p7X_C] = xC;
-
-#if p7_DEBUGGING
- if (bck->debugging) p7_omx_DumpFBRow(bck, TRUE, L, 9, 4, xE, xN, xJ, xB, xC); /* logify=TRUE, <rowi>=L, width=9, precision=4*/
-#endif
-
- /* main recursion */
- // ! ADDED CODE !
- int progressStart = ti.progress;
- for (i = L-1; i >= 1; i--) /* backwards stride */
- {
- // ! ADDED CODE !
- ti.progress = progressStart + (int)(((double)percentBorder / L) * i);
- if( ti.cancelFlag ){ return eslCANCELED; }
-
- /* phase 1. B(i) collected. Old row destroyed, new row contains
- * complete I(i,k), partial {MD}(i,k) w/ no {MD}->{DE} paths yet.
- */
- dpc = bck->dpf[i * do_full];
- dpp = bck->dpf[(i+1) * do_full];
- rp = om->rfv[dsq[i+1]] + Q-1; /* <*rp> is now the [4 8 12 x] match emission quad */
- tp = om->tfv + 7*Q - 1; /* <*tp> is now the [4 8 12 x] TII transition quad */
-
- /* leftshift the first transition quads */
- tmmv = _mm_move_ss(om->tfv[1], zerov); tmmv = _mm_shuffle_ps(tmmv, tmmv, _MM_SHUFFLE(0,3,2,1));
- timv = _mm_move_ss(om->tfv[2], zerov); timv = _mm_shuffle_ps(timv, timv, _MM_SHUFFLE(0,3,2,1));
- tdmv = _mm_move_ss(om->tfv[3], zerov); tdmv = _mm_shuffle_ps(tdmv, tdmv, _MM_SHUFFLE(0,3,2,1));
-
- mpv = _mm_mul_ps(MMO(dpp,0), om->rfv[dsq[i+1]][0]); /* precalc M(i+1,k+1) * e(M_k+1, x_{i+1}) */
- mpv = _mm_move_ss(mpv, zerov);
- mpv = _mm_shuffle_ps(mpv, mpv, _MM_SHUFFLE(0,3,2,1));
-
- xBv = zerov;
- for (q = Q-1; q >= 0; q--) /* backwards stride */
- {
- ipv = IMO(dpp,q); /* assumes emission odds ratio of 1.0; i+1's IMO(q) now free */
- IMO(dpc,q) = _mm_add_ps(_mm_mul_ps(ipv, *tp), _mm_mul_ps(mpv, timv)); tp--;
- DMO(dpc,q) = _mm_mul_ps(mpv, tdmv);
- mcv = _mm_add_ps(_mm_mul_ps(ipv, *tp), _mm_mul_ps(mpv, tmmv)); tp-= 2;
-
- mpv = _mm_mul_ps(MMO(dpp,q), *rp); rp--; /* obtain mpv for next q. i+1's MMO(q) is freed */
- MMO(dpc,q) = mcv;
-
- tdmv = *tp; tp--;
- timv = *tp; tp--;
- tmmv = *tp; tp--;
-
- xBv = _mm_add_ps(xBv, _mm_mul_ps(mpv, *tp)); tp--;
- }
-
- /* phase 2: now that we have accumulated the B->Mk transitions in xBv, we can do the specials */
- /* this incantation is a horiz sum of xBv elements: (_mm_hadd_ps() would require SSE3) */
- xBv = _mm_add_ps(xBv, _mm_shuffle_ps(xBv, xBv, _MM_SHUFFLE(0, 3, 2, 1)));
- xBv = _mm_add_ps(xBv, _mm_shuffle_ps(xBv, xBv, _MM_SHUFFLE(1, 0, 3, 2)));
- _mm_store_ss(&xB, xBv);
-
- xC = xC * om->xf[p7O_C][p7O_LOOP];
- xJ = (xB * om->xf[p7O_J][p7O_MOVE]) + (xJ * om->xf[p7O_J][p7O_LOOP]); /* must come after xB */
- xN = (xB * om->xf[p7O_N][p7O_MOVE]) + (xN * om->xf[p7O_N][p7O_LOOP]); /* must come after xB */
- xE = (xC * om->xf[p7O_E][p7O_MOVE]) + (xJ * om->xf[p7O_E][p7O_LOOP]); /* must come after xJ, xC */
- xEv = _mm_set1_ps(xE); /* splat */
-
-
- /* phase 3: {MD}->E paths and one step of the D->D paths */
- tp = om->tfv + 8*Q - 1; /* <*tp> now the [4 8 12 x] TDD quad */
- dpv = _mm_add_ps(DMO(dpc,0), xEv);
- dpv = _mm_move_ss(dpv, zerov);
- dpv = _mm_shuffle_ps(dpv, dpv, _MM_SHUFFLE(0,3,2,1));
- for (q = Q-1; q >= 0; q--)
- {
- dcv = _mm_mul_ps(dpv, *tp); tp--;
- DMO(dpc,q) = _mm_add_ps(DMO(dpc,q), _mm_add_ps(dcv, xEv));
- dpv = DMO(dpc,q);
- MMO(dpc,q) = _mm_add_ps(MMO(dpc,q), xEv);
- }
-
- /* phase 4: finish extending the DD paths */
- /* fully serialized for now */
- for (j = 1; j < 4; j++) /* three passes: we've already done 1 segment, we need 4 total */
- {
- dcv = _mm_move_ss(dcv, zerov);
- dcv = _mm_shuffle_ps(dcv, dcv, _MM_SHUFFLE(0,3,2,1));
- tp = om->tfv + 8*Q - 1; /* <*tp> now the [4 8 12 x] TDD quad */
- for (q = Q-1; q >= 0; q--)
- {
- dcv = _mm_mul_ps(dcv, *tp); tp--;
- DMO(dpc,q) = _mm_add_ps(DMO(dpc,q), dcv);
- }
- }
-
- /* phase 5: add M->D paths */
- dcv = _mm_move_ss(DMO(dpc,0), zerov);
- dcv = _mm_shuffle_ps(dcv, dcv, _MM_SHUFFLE(0,3,2,1));
- tp = om->tfv + 7*Q - 3; /* <*tp> is now the [4 8 12 x] Mk->Dk+1 quad */
- for (q = Q-1; q >= 0; q--)
- {
- MMO(dpc,q) = _mm_add_ps(MMO(dpc,q), _mm_mul_ps(dcv, *tp)); tp -= 7;
- dcv = DMO(dpc,q);
- }
-
- /* Sparse rescaling */
-
- /* In rare cases [J3/119] scale factors from <fwd> are
- * insufficient and backwards will overflow. In this case, we
- * switch on the fly to using our own scale factors, different
- * from those in <fwd>. This will complicate subsequent
- * posterior decoding routines.
- */
- if (xB > 1.0e16) bck->has_own_scales = TRUE;
-
- if (bck->has_own_scales) bck->xmx[i*p7X_NXCELLS+p7X_SCALE] = (xB > 1.0e4) ? xB : 1.0;
- else bck->xmx[i*p7X_NXCELLS+p7X_SCALE] = fwd->xmx[i*p7X_NXCELLS+p7X_SCALE];
-
- if (bck->xmx[i*p7X_NXCELLS+p7X_SCALE] > 1.0)
- {
- xE /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE];
- xN /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE];
- xJ /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE];
- xB /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE];
- xC /= bck->xmx[i*p7X_NXCELLS+p7X_SCALE];
- xBv = _mm_set1_ps(1.0 / bck->xmx[i*p7X_NXCELLS+p7X_SCALE]);
- for (q = 0; q < Q; q++) {
- MMO(dpc,q) = _mm_mul_ps(MMO(dpc,q), xBv);
- DMO(dpc,q) = _mm_mul_ps(DMO(dpc,q), xBv);
- IMO(dpc,q) = _mm_mul_ps(IMO(dpc,q), xBv);
- }
- bck->totscale += log((double)bck->xmx[i*p7X_NXCELLS+p7X_SCALE]);
- }
-
- /* Stores are separate only for pedagogical reasons: easy to
- * turn this into a more memory efficient version just by
- * deleting the stores.
- */
- bck->xmx[i*p7X_NXCELLS+p7X_E] = xE;
- bck->xmx[i*p7X_NXCELLS+p7X_N] = xN;
- bck->xmx[i*p7X_NXCELLS+p7X_J] = xJ;
- bck->xmx[i*p7X_NXCELLS+p7X_B] = xB;
- bck->xmx[i*p7X_NXCELLS+p7X_C] = xC;
-
-#if p7_DEBUGGING
- if (bck->debugging) p7_omx_DumpFBRow(bck, TRUE, i, 9, 4, xE, xN, xJ, xB, xC); /* logify=TRUE, <rowi>=i, width=9, precision=4*/
-#endif
- } /* thus ends the loop over sequence positions i */
-
- /* Termination at i=0, where we can only reach N,B states. */
- dpp = bck->dpf[1 * do_full];
- tp = om->tfv; /* <*tp> is now the [1 5 9 13] TBMk transition quad */
- rp = om->rfv[dsq[1]]; /* <*rp> is now the [1 5 9 13] match emission quad */
- xBv = zerov;
- for (q = 0; q < Q; q++)
- {
- mpv = _mm_mul_ps(MMO(dpp,q), *rp); rp++;
- mpv = _mm_mul_ps(mpv, *tp); tp += 7;
- xBv = _mm_add_ps(xBv, mpv);
- }
- /* horizontal sum of xBv */
- xBv = _mm_add_ps(xBv, _mm_shuffle_ps(xBv, xBv, _MM_SHUFFLE(0, 3, 2, 1)));
- xBv = _mm_add_ps(xBv, _mm_shuffle_ps(xBv, xBv, _MM_SHUFFLE(1, 0, 3, 2)));
- _mm_store_ss(&xB, xBv);
-
- xN = (xB * om->xf[p7O_N][p7O_MOVE]) + (xN * om->xf[p7O_N][p7O_LOOP]);
-
- bck->xmx[p7X_B] = xB;
- bck->xmx[p7X_C] = 0.0;
- bck->xmx[p7X_J] = 0.0;
- bck->xmx[p7X_N] = xN;
- bck->xmx[p7X_E] = 0.0;
- bck->xmx[p7X_SCALE] = 1.0;
-
-#if p7_DEBUGGING
- dpc = bck->dpf[0];
- for (q = 0; q < Q; q++) /* Not strictly necessary, but if someone's looking at DP matrices, this is nice to do: */
- MMO(dpc,q) = DMO(dpc,q) = IMO(dpc,q) = zerov;
- if (bck->debugging) p7_omx_DumpFBRow(bck, TRUE, 0, 9, 4, bck->xmx[p7X_E], bck->xmx[p7X_N], bck->xmx[p7X_J], bck->xmx[p7X_B], bck->xmx[p7X_C]); /* logify=TRUE, <rowi>=0, width=9, precision=4*/
-#endif
-
- if (std::isnan(xN)) ESL_EXCEPTION(eslERANGE, "backward score is NaN");
- else if (L>0 && xN == 0.0) ESL_EXCEPTION(eslERANGE, "backward score underflow (is 0.0)");
- else if (std::isinf(xN) == 1) ESL_EXCEPTION(eslERANGE, "backward score overflow (is infinity)");
-
- if (opt_sc != NULL) *opt_sc = bck->totscale + log((double)xN);
- return eslOK;
-}
-/*-------------- end, forward/backward engines -----------------*/
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/impl_sse.h b/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/impl_sse.h
deleted file mode 100644
index 2f8dc5d..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/impl_sse.h
+++ /dev/null
@@ -1,410 +0,0 @@
-/* SSE optimized implementation of various MSV, Viterbi, and Forward
-* routines: structures, declarations, and macros.
-*
-* SRE, Sun Nov 25 11:23:02 2007
- * SVN $Id: impl_sse.h 3018 2009-10-29 17:33:06Z farrarm $
-*/
-#ifndef P7_IMPL_SSE_INCLUDED
-#define P7_IMPL_SSE_INCLUDED
-
-#include <hmmer3/p7_config.h>
-
-#include <hmmer3/easel/esl_alphabet.h>
-#include <hmmer3/easel/esl_random.h>
-
-#include <xmmintrin.h> /* SSE */
-#include <emmintrin.h> /* SSE2 */
-
-#include <hmmer3/hmmer.h>
-
-#include <U2Core/Task.h>
-
-
-/* In calculating Q, the number of vectors we need in a row, we have
-* to make sure there's at least 2, or a striped implementation fails.
-*/
-#define p7O_NQB(M) ( ESL_MAX(2, ((((M)-1) / 16) + 1))) /* 16 uchars */
-#define p7O_NQW(M) ( ESL_MAX(2, ((((M)-1) / 8) + 1))) /* 8 words */
-#define p7O_NQF(M) ( ESL_MAX(2, ((((M)-1) / 4) + 1))) /* 4 floats */
-
-
-/*****************************************************************
-* 1. P7_OPROFILE: an optimized score profile
-*****************************************************************/
-/* The OPROFILE is striped [Farrar07] and interleaved, as is the DP matrix.
-* For example, the layout of a profile for an M=14 model (xref J2/46):
-*
-* rsc[x] : striped blocks of M emissions, starting with q=0
-* 1 11 1 1
-* 1593 2604 371x 482x
-*
-* tsc: grouped in order of accession in DP for 7 transition scores;
-* starting at q=0 for all but the three transitions to M, which
-* are rotated by -1 and rightshifted. DD's follow separately,
-* starting at q=0.
-*
-* { 1 1 1 1 1 1 1 }
-* { 1593 x482 x482 x482 1593 1593 1593 }
-* { [tBMk] [tMM] [tIM] [tDM] [tMD] [tMI] [tII] }
-*
-* { 11 1 1 1 11 11 11 }
-* { 2604 1593 1593 1593 2604 2604 2604 }
-* { [tBMk] [tMM] [tIM] [tDM] [tMD] [tMI] [tII] }
-*
-* { 1 11 11 11 1 1 1 }
-* { 371x 2604 2604 2604 371x 371x 371x }
-* { [tBMk] [tMM] [tIM] [tDM] [tMD] [tMI] [tII] }
-*
-* { 1 1 1 1 1 1 1 }
-* { 482x 371x 371x 371x 482x 482x 482x }
-* { [tBMk] [tMM] [tIM] [tDM] [tMD] [tMI] [tII] }
-*
-* { 1 11 1 1 }
-* { 1593 2604 371x 482x }
-* { [TDD] [TDD] [TDD] [TDD] }
-*
-*/
-
-#define p7O_NXSTATES 4 /* special states stored: ENJC */
-#define p7O_NXTRANS 2 /* special states all have 2 transitions: move, loop */
-#define p7O_NTRANS 8 /* 7 core transitions + BMk entry */
-enum p7o_xstates_e { p7O_E = 0, p7O_N = 1, p7O_J = 2, p7O_C = 3 };
-enum p7o_xtransitions_e { p7O_MOVE = 0, p7O_LOOP = 1 };
-enum p7o_tsc_e { p7O_BM = 0, p7O_MM = 1, p7O_IM = 2, p7O_DM = 3, p7O_MD = 4, p7O_MI = 5, p7O_II = 6, p7O_DD = 7 };
-
-typedef struct p7_oprofile_s {
- /* MSVFilter uses scaled, biased uchars: 16x unsigned byte vectors */
- __m128i **rbv; /* match scores [x][q]: rm, rm[0] are allocated */
- uint8_t tbm_b; /* constant B->Mk cost: scaled log 2/M(M+1) */
- uint8_t tec_b; /* constant E->C cost: scaled log 0.5 */
- uint8_t tjb_b; /* constant NCJ move cost: scaled log 3/(L+3) */
- float scale_b; /* typically 3 / log2: scores scale to 1/3 bits */
- uint8_t base_b; /* typically +190: offset of uchar scores */
- uint8_t bias_b; /* positive bias to emission scores, make them >=0 */
-
- /* ViterbiFilter uses scaled swords: 8x signed 16-bit integer vectors */
- __m128i **rwv; /* [x][q]: rw, rw[0] are allocated [Kp][Q8] */
- __m128i *twv; /* transition score blocks [8*Q8] */
- int16_t xw[p7O_NXSTATES][p7O_NXTRANS]; /* NECJ state transition costs */
- float scale_w; /* score units: typically 500 / log(2), 1/500 bits */
- int16_t base_w; /* offset of sword scores: typically +12000 */
- int16_t ddbound_w; /* threshold precalculated for lazy DD evaluation */
- float ncj_roundoff; /* missing precision on NN,CC,JJ after rounding */
-
- /* Forward, Backward use IEEE754 single-precision floats: 4x vectors */
- __m128 **rfv; /* [x][q]: rf, rf[0] are allocated [Kp][Q4] */
- __m128 *tfv; /* transition probability blocks [8*Q4] */
- float xf[p7O_NXSTATES][p7O_NXTRANS]; /* NECJ transition costs */
-
- /* Our actual vector mallocs, before we align the memory */
- __m128i *rbv_mem;
- __m128i *rwv_mem;
- __m128i *twv_mem;
- __m128 *tfv_mem;
- __m128 *rfv_mem;
-
- /* Disk offset information for hmmpfam's fast model retrieval */
- off_t offs[p7_NOFFSETS]; /* p7_{MFP}OFFSET, or -1 */
-
- /* Disk offset bookkeeping for h3f: */
- off_t roff; /* record offset (start of record); -1 if none */
- off_t eoff; /* offset to last byte of record; -1 if unknown */
-
- /* Information, annotation copied from parent profile: */
- char *name; /* unique name of model */
- char *acc; /* unique accession of model, or NULL */
- char *desc; /* brief (1-line) description of model, or NULL */
- char *rf; /* reference line 1..M; *ref=0: unused */
- char *cs; /* consensus structure line 1..M, *cs=0: unused */
- char *consensus; /* consensus residues for ali display, 1..M */
- float evparam[p7_NEVPARAM]; /* parameters for determining E-values, or UNSET */
- float cutoff[p7_NCUTOFFS]; /* per-seq/per-dom bit cutoffs, or UNSET */
- float compo[p7_MAXABET]; /* per-model HMM filter composition, or UNSET */
- const ESL_ALPHABET *abc; /* copy of ptr to alphabet information */
-
- /* Information about current configuration, size, allocation */
- int L; /* current configured target seq length */
- int M; /* model length */
- int allocM; /* maximum model length currently allocated for */
- int allocQ4; /* p7_NQF(allocM): alloc size for tf, rf */
- int allocQ8; /* p7_NQW(allocM): alloc size for tw, rw */
- int allocQ16; /* p7_NQB(allocM): alloc size for rb */
- int mode; /* currently must be p7_LOCAL */
- float nj; /* expected # of J's: 0 or 1, uni vs. multihit */
-
- int clone; /* this optimized profile structure is just a copy */
- /* of another profile structre. all pointers of */
- /* this structure should not be freed. */
-} P7_OPROFILE;
-
-typedef struct {
- int count; /* number of <P7_OPROFILE> objects in the block */
- int listSize; /* maximum number elements in the list */
- P7_OPROFILE **list; /* array of <P7_OPROFILE> objects */
-} P7_OM_BLOCK;
-
-/* retrieve match odds ratio [k][x]
-* this gets used in p7_alidisplay.c, when we're deciding if a residue is conserved or not */
-static inline float
-p7_oprofile_FGetEmission(const P7_OPROFILE *om, int k, int x)
-{
- union { __m128 v; float p[4]; } u;
- int Q = p7O_NQF(om->M);
- int q = ((k-1) % Q);
- int r = (k-1)/Q;
- u.v = om->rfv[x][q];
- return u.p[r];
-}
-
-/*****************************************************************
-* 2. P7_OMX: a one-row dynamic programming matrix
-*****************************************************************/
-
-enum p7x_scells_e { p7X_M = 0, p7X_D = 1, p7X_I = 2 };
-#define p7X_NSCELLS 3
-
-/* Besides ENJBC states, we may also store a rescaling factor on each row */
-enum p7x_xcells_e { p7X_E = 0, p7X_N = 1, p7X_J = 2, p7X_B = 3, p7X_C = 4, p7X_SCALE = 5 };
-#define p7X_NXCELLS 6
-
-/*
-*
-* dpf[][]
-* to access M(i,k) for i=0,1..L; k=1..M: dpf[i][(k-1)/4 + p7X_M].element[(k-1)%4]
-*
-* xmx[] arrays for individual special states:
-* xmx[ENJBC] = [0 1 2 3][4 5 6 7]..[L-2 L-1 L x] XRQ >= (L/4)+1
-* to access B[i] for example, for i=0..L: xmx[B][i/4].x[i%4] (quad i/4; element i%4).
-*/
-typedef struct p7_omx_s {
- int M; /* current actual model dimension */
- int L; /* current actual sequence dimension */
-
- /* The main dynamic programming matrix for M,D,I states */
- __m128 **dpf; /* striped DP matrix for [0,1..L][0..Q-1][MDI], float vectors */
- __m128i **dpw; /* striped DP matrix for [0,1..L][0..Q-1][MDI], sword vectors */
- __m128i **dpb; /* striped DP matrix for [0,1..L][0..Q-1] uchar vectors */
- void *dp_mem; /* DP memory shared by <dpb>, <dpw>, <dpf> */
- int allocR; /* current allocated # rows in dp{uf}. allocR >= validR >= L+1 */
- int validR; /* current # of rows actually pointing at DP memory */
- int allocQ4; /* current set row width in <dpf> quads: allocQ4*4 >= M */
- int allocQ8; /* current set row width in <dpw> octets: allocQ8*8 >= M */
- int allocQ16; /* current set row width in <dpb> 16-mers: allocQ16*16 >= M */
- size_t ncells; /* current allocation size of <dp_mem>, in accessible cells */
-
- /* The X states (for full,parser; or NULL, for scorer) */
- float *xmx; /* logically [0.1..L][ENJBCS]; indexed [i*p7X_NXCELLS+s] */
- void *x_mem; /* X memory before 16-byte alignment */
- int allocXR; /* # of rows allocated in each xmx[] array; allocXR >= L+1 */
- float totscale; /* log of the product of all scale factors (0.0 if unscaled) */
- int has_own_scales; /* TRUE to use own scale factors; FALSE if scales provided */
-
- /* Parsers,scorers only hold a row at a time, so to get them to dump full matrix, it
- * must be done during a DP calculation, after each row is calculated
- */
- int debugging; /* TRUE if we're in debugging mode */
- FILE *dfp; /* output stream for diagnostics */
-} P7_OMX;
-
-/* ?MXo(q) access macros work for either uchar or float, so long as you
-* init your "dp" to point to the appropriate array.
-*/
-#define MMXo(q) (dp[(q) * p7X_NSCELLS + p7X_M])
-#define DMXo(q) (dp[(q) * p7X_NSCELLS + p7X_D])
-#define IMXo(q) (dp[(q) * p7X_NSCELLS + p7X_I])
-#define XMXo(i,s) (xmx[(i) * p7X_NXCELLS + s])
-
-/* and this version works with a ptr to the approp DP row. */
-#define MMO(dp,q) ((dp)[(q) * p7X_NSCELLS + p7X_M])
-#define DMO(dp,q) ((dp)[(q) * p7X_NSCELLS + p7X_D])
-#define IMO(dp,q) ((dp)[(q) * p7X_NSCELLS + p7X_I])
-
-static inline float
-p7_omx_FGetMDI(const P7_OMX *ox, int s, int i, int k)
-{
- union { __m128 v; float p[4]; } u;
- int Q = p7O_NQF(ox->M);
- int q = p7X_NSCELLS * ((k-1) % Q) + s;
- int r = (k-1)/Q;
- u.v = ox->dpf[i][q];
- return u.p[r];
-}
-
-static inline void
-p7_omx_FSetMDI(const P7_OMX *ox, int s, int i, int k, float val)
-{
- union { __m128 v; float p[4]; } u;
- int Q = p7O_NQF(ox->M);
- int q = p7X_NSCELLS * ((k-1) % Q) + s;
- int r = (k-1)/Q;
-
- u.v = ox->dpf[i][q];
- u.p[r] = val;
- ox->dpf[i][q] = u.v;
-}
-
-
-
-/*****************************************************************
-* 3. Declarations of the external API.
-*****************************************************************/
-
-/* p7_omx.c */
-extern P7_OMX *p7_omx_Create(int allocM, int allocL, int allocXL);
-extern int p7_omx_GrowTo(P7_OMX *ox, int allocM, int allocL, int allocXL);
-extern int p7_omx_FDeconvert(P7_OMX *ox, P7_GMX *gx);
-extern int p7_omx_Reuse (P7_OMX *ox);
-extern void p7_omx_Destroy(P7_OMX *ox);
-
-extern int p7_omx_SetDumpMode(FILE *fp, P7_OMX *ox, int truefalse);
-extern int p7_omx_DumpMFRow(P7_OMX *ox, int rowi, uint8_t xE, uint8_t xN, uint8_t xJ, uint8_t xB, uint8_t xC);
-extern int p7_omx_DumpVFRow(P7_OMX *ox, int rowi, int16_t xE, int16_t xN, int16_t xJ, int16_t xB, int16_t xC);
-extern int p7_omx_DumpFBRow(P7_OMX *ox, int logify, int rowi, int width, int precision, float xE, float xN, float xJ, float xB, float xC);
-
-
-
-/* p7_oprofile.c */
-extern P7_OPROFILE *p7_oprofile_Create(int M, const ESL_ALPHABET *abc);
-extern int p7_oprofile_IsLocal(const P7_OPROFILE *om);
-extern void p7_oprofile_Destroy(P7_OPROFILE *om);
-extern P7_OPROFILE *p7_oprofile_Copy(P7_OPROFILE *om);
-extern P7_OPROFILE *p7_oprofile_Clone(const P7_OPROFILE *om);
-
-extern int p7_oprofile_Convert(const P7_PROFILE *gm, P7_OPROFILE *om);
-extern int p7_oprofile_ReconfigLength (P7_OPROFILE *om, int L, int wholeSeqSz);
-extern int p7_oprofile_ReconfigMSVLength (P7_OPROFILE *om, int L);
-extern int p7_oprofile_ReconfigRestLength(P7_OPROFILE *om, int L, int wholeSz);
-extern int p7_oprofile_ReconfigMultihit (P7_OPROFILE *om, int L, int wholeSz);
-extern int p7_oprofile_ReconfigUnihit (P7_OPROFILE *om, int L, int wholeSz);
-
-extern int p7_oprofile_Dump(FILE *fp, const P7_OPROFILE *om);
-extern int p7_oprofile_Sample(ESL_RANDOMNESS *r, const ESL_ALPHABET *abc, const P7_BG *bg, int M, int L,
- P7_HMM **opt_hmm, P7_PROFILE **opt_gm, P7_OPROFILE **ret_om);
-extern int p7_oprofile_Compare(const P7_OPROFILE *om1, const P7_OPROFILE *om2, float tol, char *errmsg);
-extern int p7_profile_SameAsMF(const P7_OPROFILE *om, P7_PROFILE *gm);
-extern int p7_profile_SameAsVF(const P7_OPROFILE *om, P7_PROFILE *gm);
-
-
-
-/* decoding.c */
-extern int p7_Decoding (const P7_OPROFILE *om, const P7_OMX *oxf, P7_OMX *oxb, P7_OMX *pp);
-extern int p7_DomainDecoding(const P7_OPROFILE *om, const P7_OMX *oxf, const P7_OMX *oxb, P7_DOMAINDEF *ddef);
-
-/* fwdback.c */
-// ! ADDED PERCENTS and TASKSTATEINFO !
-extern int p7_Forward (const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *fwd, float *opt_sc,
- int percentBorder, U2::TaskStateInfo & ti);
-extern int p7_ForwardParser (const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *fwd, float *opt_sc,
- int percentBorder, U2::TaskStateInfo & ti );
-extern int p7_Backward (const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, const P7_OMX *fwd, P7_OMX *bck, float *opt_sc,
- int percentBorder, U2::TaskStateInfo & ti);
-extern int p7_BackwardParser(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, const P7_OMX *fwd, P7_OMX *bck, float *opt_sc,
- int percentBorder, U2::TaskStateInfo & ti );
-
-// here were definitions of io functions. we don't need them
-
-
-/* msvfilter.c */
-extern int p7_MSVFilter (const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *ret_sc, int percentBorder, U2::TaskStateInfo & ti );
-
-/* null2.c */
-extern int p7_Null2_ByExpectation(const P7_OPROFILE *om, const P7_OMX *pp, float *null2);
-extern int p7_Null2_ByTrace (const P7_OPROFILE *om, const P7_TRACE *tr, int zstart, int zend, P7_OMX *wrk, float *null2);
-
-/* optacc.c */
-extern int p7_OptimalAccuracy(const P7_OPROFILE *om, const P7_OMX *pp, P7_OMX *ox, float *ret_e);
-extern int p7_OATrace (const P7_OPROFILE *om, const P7_OMX *pp, const P7_OMX *ox, P7_TRACE *tr);
-
-/* stotrace.c */
-extern int p7_StochasticTrace(ESL_RANDOMNESS *rng, const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, const P7_OMX *ox,
- P7_TRACE *tr);
-
-/* vitfilter.c */
-extern int p7_ViterbiFilter(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *ret_sc, int percentBorder, U2::TaskStateInfo & ti);
-
-/* vitscore.c */
-extern int p7_ViterbiScore (const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *ret_sc);
-
-/*****************************************************************
-* 4. Implementation specific initialization
-*****************************************************************/
-static inline void
-impl_Init(void)
-{
-#ifdef HAVE_FLUSH_ZERO_MODE
- /* In order to avoid the performance penalty dealing with sub-normal
- * values in the floating point calculations, set the processor flag
- * so sub-normals are "flushed" immediately to zero.
- */
- _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
-#endif
-}
-
-#endif /* P7_IMPL_SSE_INCLUDED */
-
-/*****************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-*****************************************************************/
-
-/*
-* Currently (and this remains in flux as of 14 Dec 07) an optimized
-* implementation is required to provide an MSVFilter(),
-* ViterbiFilter() and a ForwardFilter() implementation. A call to
-* p7_oprofile_Convert() makes an optimized profile that works for
-* all filters.
-*
-* Any "Filter" returns a score may be an approximation (with
-* characterized or at least characterizable error), and which may
-* have limited upper range, such that high scores are returned as
-* eslINFINITY. Additionally, Filters might only work on local
-* alignment modes, because they are allowed to make assumptions about
-* the range of scores.
-*
-* Here, MSVFilter() and ViterbiFilter() are 8-bit lspace
-* implementations with limited precision and limited range (max 20
-* bits); ForwardFilter() is a pspace float implementation with
-* correct precision and limited range (max ~127 bits). Both require
-* local mode models.
-*
-* An optimized implementation may also provide other optimized
-* routines. It provides specialized Convert*() functions for these,
-* which may no-op (if the OPROFILE already suffices), or may
-* overwrite parts of the OPROFILE that Filters or other routines
-* might need. Therefore, after using a "bonus" function, a fresh
-* Convert() will be needed before a Filter() is called again. This
-* API is tentative.
-*
-* For example, here, ViterbiScore() is a 32-bit lspace float SSE
-* implementation of the Viterbi algorithm.
-*
-* A "Score" function might be an additional target for optimization,
-* for example. A "Score" function returns a correct score with full
-* floating-point precision and range, and works for any mode model.
-*
-* In the generic implementation, profile scores are 32-bit floating
-* point log-odds scores. In an optimized implementation, internally,
-* profile scores can be of any type, and may be in log space (lspace)
-* or probability space (pspace). (Calculations in probability space
-* are useful in the Forward algorithm, but always limit range.) A
-* shorthand of "lspace uchar" means log-odds scores stored as
-* unsigned chars, for example; "pspace float" means odds ratios
-* stored as floats.
-*
-* A note on memory alignment: malloc() is required to return a
-* pointer "suitably aligned so that it may be aligned to a pointer of
-* any type of object" (C99 7.20.3). __m128 vectors are 128-bits wide,
-* so malloc() ought to return a pointer aligned on a 16-byte
-* boundary. However, this is not the case for glibc, and apparently
-* other system libraries. Google turns up threads of arguments
-* between glibc and gcc developers over whose problem this is; this
-* argument has apparently not been resolved, and is of no help.
-* Here, we manually align the relevant pointers by overallocating in
-* *_mem with malloc, then arithmetically manipulating the address to
-* mask off (~0xf).
-*/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/msvfilter.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/msvfilter.cpp
deleted file mode 100644
index 1eb369f..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/msvfilter.cpp
+++ /dev/null
@@ -1,221 +0,0 @@
-/* The MSV filter implementation; SSE version.
-*
-* A "filter" is a one-row, O(M), DP implementation that calculates an
-* approximated nat score (i.e. in limited precision - here, uchar)
-* and may have limited numeric range. It will return <eslERANGE> if
-* its numeric range is exceeded, in which case the caller will have
-* to obtain the score by another (probably slower) method.
-*
-* Contents:
-* 1. p7_MSVFilter() implementation.
-* 6. Copyright and license information
-*
-* SRE, Sun Nov 25 11:26:48 2007 [Casa de Gatos]
- * SVN $Id: msvfilter.c 3038 2009-11-05 12:55:34Z eddys $
-*/
-#include <hmmer3/p7_config.h>
-
-#include <stdio.h>
-#include <math.h>
-
-#include <xmmintrin.h> /* SSE */
-#include <emmintrin.h> /* SSE2 */
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_sse.h>
-
-#include <hmmer3/hmmer.h>
-#include "impl_sse.h"
-
-/*****************************************************************
-* 1. The p7_MSVFilter() DP implementation.
-*****************************************************************/
-
-/* Function: p7_MSVFilter()
-* Synopsis: Calculates MSV score, vewy vewy fast, in limited precision.
-* Incept: SRE, Wed Dec 26 15:12:25 2007 [Janelia]
-*
-* Purpose: Calculates an approximation of the MSV score for sequence
-* <dsq> of length <L> residues, using optimized profile <om>,
-* and a preallocated one-row DP matrix <ox>. Return the
-* estimated MSV score (in nats) in <ret_sc>.
-*
-* Score may overflow (and will, on high-scoring
-* sequences), but will not underflow.
-*
-* The model may be in any mode, because only its match
-* emission scores will be used. The MSV filter inherently
-* assumes a multihit local mode, and uses its own special
-* state transition scores, not the scores in the profile.
-*
-* Args: dsq - digital target sequence, 1..L
-* L - length of dsq in residues
-* om - optimized profile
-* ox - DP matrix
-* ret_sc - RETURN: MSV score (in nats)
-*
-* Note: We misuse the matrix <ox> here, using only a third of the
-* first dp row, accessing it as <dp[0..Q-1]> rather than
-* in triplets via <{MDI}MX(q)> macros, since we only need
-* to store M state values. We know that if <ox> was big
-* enough for normal DP calculations, it must be big enough
-* to hold the MSVFilter calculation.
-*
-* Returns: <eslOK> on success.
-* <eslERANGE> if the score overflows the limited range; in
-* this case, this is a high-scoring hit.
-*
-* Throws: <eslEINVAL> if <ox> allocation is too small.
-*/
-int
-p7_MSVFilter(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *ret_sc, int percentBorder, U2::TaskStateInfo & ti )
-{
- register __m128i mpv; /* previous row values */
- register __m128i xEv; /* E state: keeps max for Mk->E as we go */
- register __m128i xBv; /* B state: splatted vector of B[i-1] for B->Mk calculations */
- register __m128i sv; /* temp storage of 1 curr row value in progress */
- register __m128i biasv; /* emission bias in a vector */
- uint8_t xJ; /* special states' scores */
- int i; /* counter over sequence positions 1..L */
- int q; /* counter over vectors 0..nq-1 */
- int Q = p7O_NQB(om->M); /* segment length: # of vectors */
- __m128i *dp = ox->dpb[0]; /* we're going to use dp[0][0..q..Q-1], not {MDI}MX(q) macros*/
- __m128i *rsc; /* will point at om->rbv[x] for residue x[i] */
-
- __m128i xJv; /* vector for states score */
- __m128i tbmv; /* vector for B->Mk cost */
- __m128i tecv; /* vector for E->C cost */
- __m128i tjbv; /* vector for NCJ move cost */
- __m128i basev; /* offset for scores */
- __m128i ceilingv; /* saturateed simd value used to test for overflow */
- __m128i tempv; /* work vector */
-
- int cmp;
-
- /* Check that the DP matrix is ok for us. */
- if (Q > ox->allocQ16) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small");
- ox->M = om->M;
-
- /* Initialization. In offset unsigned arithmetic, -infinity is 0, and 0 is om->base.
- */
- biasv = _mm_set1_epi8((int8_t) om->bias_b); /* yes, you can set1() an unsigned char vector this way */
- for (q = 0; q < Q; q++) dp[q] = _mm_setzero_si128();
- xJ = 0;
-
- /* saturate simd register for overflow test */
- ceilingv = _mm_cmpeq_epi8(biasv, biasv);
-
- basev = _mm_set1_epi8((int8_t) om->base_b);
-
- tbmv = _mm_set1_epi8((int8_t) om->tbm_b);
- tecv = _mm_set1_epi8((int8_t) om->tec_b);
- tjbv = _mm_set1_epi8((int8_t) om->tjb_b);
-
- xJv = _mm_subs_epu8(biasv, biasv);
- xBv = _mm_subs_epu8(basev, tjbv);
-
-#if p7_DEBUGGING
- if (ox->debugging)
- {
- uint8_t xB;
- xB = _mm_extract_epi16(xBv, 0);
- xJ = _mm_extract_epi16(xJv, 0);
- p7_omx_DumpMFRow(ox, 0, 0, 0, xJ, xB, xJ);
- }
-#endif
-
- // ! ADDED CODE !
- int progressStart = ti.progress;
- for (i = 1; i <= L; i++)
- {
- // ! ADDED CODE !
- ti.progress = progressStart + (int)(((double)percentBorder / L) * i);
- if( ti.cancelFlag ){ return eslCANCELED; }
-
- rsc = om->rbv[dsq[i]];
- xEv = _mm_setzero_si128();
- xBv = _mm_subs_epu8(xBv, tbmv);
-
- /* Right shifts by 1 byte. 4,8,12,x becomes x,4,8,12.
- * Because ia32 is littlendian, this means a left bit shift.
- * Zeros shift on automatically, which is our -infinity.
- */
- mpv = _mm_slli_si128(dp[Q-1], 1);
- for (q = 0; q < Q; q++)
- {
- /* Calculate new MMXo(i,q); don't store it yet, hold it in sv. */
- sv = _mm_max_epu8(mpv, xBv);
- sv = _mm_adds_epu8(sv, biasv);
- sv = _mm_subs_epu8(sv, *rsc); rsc++;
- xEv = _mm_max_epu8(xEv, sv);
-
- mpv = dp[q]; /* Load {MDI}(i-1,q) into mpv */
- dp[q] = sv; /* Do delayed store of M(i,q) now that memory is usable */
- }
-
- /* test for the overflow condition */
- tempv = _mm_adds_epu8(xEv, biasv);
- tempv = _mm_cmpeq_epi8(tempv, ceilingv);
- cmp = _mm_movemask_epi8(tempv);
-
- /* Now the "special" states, which start from Mk->E (->C, ->J->B)
- * Use shuffles instead of shifts so when the last max has completed,
- * the last four elements of the simd register will contain the
- * max value. Then the last shuffle will broadcast the max value
- * to all simd elements.
- */
- tempv = _mm_shuffle_epi32(xEv, _MM_SHUFFLE(2, 3, 0, 1));
- xEv = _mm_max_epu8(xEv, tempv);
- tempv = _mm_shuffle_epi32(xEv, _MM_SHUFFLE(0, 1, 2, 3));
- xEv = _mm_max_epu8(xEv, tempv);
- tempv = _mm_shufflelo_epi16(xEv, _MM_SHUFFLE(2, 3, 0, 1));
- xEv = _mm_max_epu8(xEv, tempv);
- tempv = _mm_srli_si128(xEv, 1);
- xEv = _mm_max_epu8(xEv, tempv);
- xEv = _mm_shuffle_epi32(xEv, _MM_SHUFFLE(0, 0, 0, 0));
-
- /* immediately detect overflow */
- if (cmp != 0x0000)
- {
- *ret_sc = eslINFINITY;
- return eslERANGE;
- }
-
- xEv = _mm_subs_epu8(xEv, tecv);
- xJv = _mm_max_epu8(xJv,xEv);
-
- xBv = _mm_max_epu8(basev, xJv);
- xBv = _mm_subs_epu8(xBv, tjbv);
-
-#if p7_DEBUGGING
- if (ox->debugging)
- {
- uint8_t xB, xE;
- xB = _mm_extract_epi16(xBv, 0);
- xE = _mm_extract_epi16(xEv, 0);
- xJ = _mm_extract_epi16(xJv, 0);
- p7_omx_DumpMFRow(ox, i, xE, 0, xJ, xB, xJ);
- }
-#endif
- } /* end loop over sequence residues 1..L */
-
- xJ = (uint8_t) _mm_extract_epi16(xJv, 0);
-
- /* finally C->T, and add our missing precision on the NN,CC,JJ back */
- *ret_sc = ((float) (xJ - om->tjb_b) - (float) om->base_b);
- *ret_sc /= om->scale_b;
- *ret_sc -= 3.0; /* that's ~ L \log \frac{L}{L+3}, for our NN,CC,JJ */
-
- return eslOK;
-}
-/*------------------ end, p7_MSVFilter() ------------------------*/
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/null2.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/null2.cpp
deleted file mode 100644
index a114240..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/null2.cpp
+++ /dev/null
@@ -1,230 +0,0 @@
-/* "null2" model, biased composition correction; SSE implementations.
-*
-* Contents:
-* 1. Null2 estimation algorithms.
-* 6. Copyright and license information.
-*
-* SRE, Mon Aug 18 08:31:11 2008 [Janelia]
- * SVN $Id: null2.c 3018 2009-10-29 17:33:06Z farrarm $
-*/
-
-#include <hmmer3/p7_config.h>
-
-#include <stdlib.h>
-#include <string.h>
-
-#include <xmmintrin.h> /* SSE */
-#include <emmintrin.h> /* SSE2 */
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_sse.h>
-#include <hmmer3/easel/esl_vectorops.h>
-
-#include <hmmer3/hmmer.h>
-#include "impl_sse.h"
-
-/*****************************************************************
-* 1. Null2 estimation algorithms.
-*****************************************************************/
-
-/* Function: p7_Null2_ByExpectation()
-* Synopsis: Calculate null2 model from posterior probabilities.
-* Incept: SRE, Mon Aug 18 08:32:55 2008 [Janelia]
-*
-* Purpose: Identical to <p7_GNull2_ByExpectation()> except that
-* <om>, <pp> are SSE optimized versions of the profile
-* and the residue posterior probability matrix. See
-* <p7_GNull2_ByExpectation()> documentation.
-*
-* Args: om - profile, in any mode, target length model set to <L>
-* pp - posterior prob matrix, for <om> against domain envelope <dsq+i-1> (offset)
-* null2 - RETURN: null2 log odds scores per residue; <0..Kp-1>; caller allocated space
-*/
-int
-p7_Null2_ByExpectation(const P7_OPROFILE *om, const P7_OMX *pp, float *null2)
-{
- int M = om->M;
- int Ld = pp->L;
- int Q = p7O_NQF(M);
- float *xmx = pp->xmx; /* enables use of XMXo(i,s) macro */
- float norm;
- __m128 *rp;
- __m128 sv;
- float xfactor;
- int i,q,x;
-
- /* Calculate expected # of times that each emitting state was used
- * in generating the Ld residues in this domain.
- * The 0 row in <wrk> is used to hold these numbers.
- */
- memcpy(pp->dpf[0], pp->dpf[1], sizeof(__m128) * 3 * Q);
- XMXo(0,p7X_N) = XMXo(1,p7X_N);
- XMXo(0,p7X_C) = XMXo(1,p7X_C); /* 0.0 */
- XMXo(0,p7X_J) = XMXo(1,p7X_J); /* 0.0 */
-
- for (i = 2; i <= Ld; i++)
- {
- for (q = 0; q < Q; q++)
- {
- pp->dpf[0][q*3 + p7X_M] = _mm_add_ps(pp->dpf[i][q*3 + p7X_M], pp->dpf[0][q*3 + p7X_M]);
- pp->dpf[0][q*3 + p7X_I] = _mm_add_ps(pp->dpf[i][q*3 + p7X_I], pp->dpf[0][q*3 + p7X_I]);
- }
- XMXo(0,p7X_N) += XMXo(i,p7X_N);
- XMXo(0,p7X_C) += XMXo(i,p7X_C);
- XMXo(0,p7X_J) += XMXo(i,p7X_J);
- }
-
- /* Convert those expected #'s to frequencies, to use as posterior weights. */
- norm = 1.0 / (float) Ld;
- sv = _mm_set1_ps(norm);
- for (q = 0; q < Q; q++)
- {
- pp->dpf[0][q*3 + p7X_M] = _mm_mul_ps(pp->dpf[0][q*3 + p7X_M], sv);
- pp->dpf[0][q*3 + p7X_I] = _mm_mul_ps(pp->dpf[0][q*3 + p7X_I], sv);
- }
- XMXo(0,p7X_N) *= norm;
- XMXo(0,p7X_C) *= norm;
- XMXo(0,p7X_J) *= norm;
-
- /* Calculate null2's emission odds, by taking posterior weighted sum
- * over all emission vectors used in paths explaining the domain.
- */
- xfactor = XMXo(0, p7X_N) + XMXo(0, p7X_C) + XMXo(0, p7X_J);
- for (x = 0; x < om->abc->K; x++)
- {
- sv = _mm_setzero_ps();
- rp = om->rfv[x];
- for (q = 0; q < Q; q++)
- {
- sv = _mm_add_ps(sv, _mm_mul_ps(pp->dpf[0][q*3 + p7X_M], *rp)); rp++;
- sv = _mm_add_ps(sv, pp->dpf[0][q*3 + p7X_I]); /* insert odds implicitly 1.0 */
- // sv = _mm_add_ps(sv, _mm_mul_ps(pp->dpf[0][q*3 + p7X_I], *rp)); rp++;
- }
- esl_sse_hsum_ps(sv, &(null2[x]));
- null2[x] += xfactor;
- }
- /* now null2[x] = \frac{f_d(x)}{f_0(x)} for all x in alphabet,
- * 0..K-1, where f_d(x) are the ad hoc "null2" residue frequencies
- * for this envelope.
- */
-
- /* make valid scores for all degeneracies, by averaging the odds ratios. */
- esl_abc_FAvgScVec(om->abc, null2);
- null2[om->abc->K] = 1.0; /* gap character */
- null2[om->abc->Kp-2] = 1.0; /* nonresidue "*" */
- null2[om->abc->Kp-1] = 1.0; /* missing data "~" */
-
- return eslOK;
-}
-
-
-/* Function: p7_Null2_ByTrace()
-* Synopsis: Assign null2 scores to an envelope by the sampling method.
-* Incept: SRE, Mon Aug 18 10:22:49 2008 [Janelia]
-*
-* Purpose: Identical to <p7_GNull2_ByTrace()> except that
-* <om>, <wrk> are SSE optimized versions of the profile
-* and the residue posterior probability matrix. See
-* <p7_GNull2_ByTrace()> documentation.
-*/
-int
-p7_Null2_ByTrace(const P7_OPROFILE *om, const P7_TRACE *tr, int zstart, int zend, P7_OMX *wrk, float *null2)
-{
- union { __m128 v; float p[4]; } u;
- int Q = p7O_NQF(om->M);
- int Ld = 0;
- float *xmx = wrk->xmx; /* enables use of XMXo macro */
- float norm;
- float xfactor;
- __m128 sv;
- __m128 *rp;
- int q, r, s;
- int x;
- int z;
-
- /* We'll use the i=0 row in wrk for working space: dp[0][] and xmx[][0]. */
- for (q = 0; q < Q; q++)
- {
- wrk->dpf[0][q*3 + p7X_M] = _mm_setzero_ps();
- wrk->dpf[0][q*3 + p7X_I] = _mm_setzero_ps();
- }
- XMXo(0,p7X_N) = 0.0;
- XMXo(0,p7X_C) = 0.0;
- XMXo(0,p7X_J) = 0.0;
-
- /* Calculate emitting state usage in this particular trace segment */
- for (z = zstart; z <= zend; z++)
- {
- if (tr->i[z] == 0) continue; /* quick test for whether this trace elem emitted or not */
- Ld++;
- if (tr->k[z] > 0) /* must be an M or I */
- { /* surely there's an easier way? but our workspace is striped, interleaved quads... */
- s = ( (tr->st[z] == p7T_M) ? p7X_M : p7X_I);
- q = p7X_NSCELLS * ( (tr->k[z] - 1) % Q) + p7X_M;
- r = (tr->k[z] - 1) / Q;
- u.v = wrk->dpf[0][q];
- u.p[r] += 1.0; /* all this to increment a count by one! */
- wrk->dpf[0][q] = u.v;
-
- }
- else /* emitted an x_i with no k; must be an N,C,J */
- {
- switch (tr->st[z]) {
- case p7T_N: XMXo(0,p7X_N) += 1.0; break;
- case p7T_C: XMXo(0,p7X_C) += 1.0; break;
- case p7T_J: XMXo(0,p7X_J) += 1.0; break;
- }
- }
- }
- norm = 1.0 / (float) Ld;
- sv = _mm_set1_ps(norm);
- for (q = 0; q < Q; q++)
- {
- wrk->dpf[0][q*3 + p7X_M] = _mm_mul_ps(wrk->dpf[0][q*3 + p7X_M], sv);
- wrk->dpf[0][q*3 + p7X_I] = _mm_mul_ps(wrk->dpf[0][q*3 + p7X_I], sv);
- }
- XMXo(0,p7X_N) *= norm;
- XMXo(0,p7X_C) *= norm;
- XMXo(0,p7X_J) *= norm;
-
- /* Calculate null2's emission odds, by taking posterior weighted sum
- * over all emission vectors used in paths explaining the domain.
- */
- xfactor = XMXo(0,p7X_N) + XMXo(0,p7X_C) + XMXo(0,p7X_J);
- for (x = 0; x < om->abc->K; x++)
- {
- sv = _mm_setzero_ps();
- rp = om->rfv[x];
- for (q = 0; q < Q; q++)
- {
- sv = _mm_add_ps(sv, _mm_mul_ps(wrk->dpf[0][q*3 + p7X_M], *rp)); rp++;
- sv = _mm_add_ps(sv, wrk->dpf[0][q*3 + p7X_I]); /* insert emission odds implicitly 1.0 */
- // sv = _mm_add_ps(sv, _mm_mul_ps(wrk->dpf[0][q*3 + p7X_I], *rp)); rp++;
- }
- esl_sse_hsum_ps(sv, &(null2[x]));
- null2[x] += xfactor;
- }
-
- /* now null2[x] = \frac{f_d(x)}{f_0(x)} for all x in alphabet,
- * 0..K-1, where f_d(x) are the ad hoc "null2" residue frequencies
- * for this envelope.
- */
-
- /* make valid scores for all degeneracies, by averaging the odds ratios. */
- esl_abc_FAvgScVec(om->abc, null2);
- null2[om->abc->K] = 1.0; /* gap character */
- null2[om->abc->Kp-2] = 1.0; /* nonresidue "*" */
- null2[om->abc->Kp-1] = 1.0; /* missing data "~" */
-
- return eslOK;
-}
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/optacc.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/optacc.cpp
deleted file mode 100644
index cb9b480..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/optacc.cpp
+++ /dev/null
@@ -1,431 +0,0 @@
-/* Optimal accuracy alignment; SSE version.
-*
-* Contents:
-* 1. Optimal accuracy alignment, DP fill
-* 2. OA traceback
-* 7. Copyright and license information.
-*
-* SRE, Mon Aug 18 20:01:01 2008 [Casa de Gatos]
- * SVN $Id: optacc.c 3018 2009-10-29 17:33:06Z farrarm $
-*/
-
-#include <hmmer3/p7_config.h>
-
-#include <float.h>
-
-#include <xmmintrin.h>
-#include <emmintrin.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_sse.h>
-#include <hmmer3/easel/esl_vectorops.h>
-
-#include <hmmer3/hmmer.h>
-
-
-/*****************************************************************
-* 1. Optimal accuracy alignment, DP fill
-*****************************************************************/
-
-/* Function: p7_OptimalAccuracy()
-* Synopsis: DP fill of an optimal accuracy alignment calculation.
-* Incept: SRE, Mon Aug 18 11:04:48 2008 [Janelia]
-*
-* Purpose: Calculates the fill step of the optimal accuracy decoding
-* algorithm \citep{Kall05}.
-*
-* Caller provides the posterior decoding matrix <pp>,
-* which was calculated by Forward/Backward on a target sequence
-* of length <pp->L> using the query model <om>.
-*
-* Caller also provides a DP matrix <ox>, allocated for a full
-* <om->M> by <L> comparison. The routine fills this in
-* with OA scores.
-*
-* Args: gm - query profile
-* pp - posterior decoding matrix created by <p7_GPosteriorDecoding()>
-* gx - RESULT: caller provided DP matrix for <gm->M> by <L>
-* ret_e - RETURN: expected number of correctly decoded positions
-*
-* Returns: <eslOK> on success, and <*ret_e> contains the final OA
-* score, which is the expected number of correctly decoded
-* positions in the target sequence (up to <L>).
-*
-* Throws: (no abnormal error conditions)
-*/
-int
-p7_OptimalAccuracy(const P7_OPROFILE *om, const P7_OMX *pp, P7_OMX *ox, float *ret_e)
-{
- register __m128 mpv, dpv, ipv; /* previous row values */
- register __m128 sv; /* temp storage of 1 curr row value in progress */
- register __m128 xEv; /* E state: keeps max for Mk->E as we go */
- register __m128 xBv; /* B state: splatted vector of B[i-1] for B->Mk calculations */
- register __m128 dcv;
- float *xmx = ox->xmx;
- __m128 *dpc = ox->dpf[0]; /* current row, for use in {MDI}MO(dpp,q) access macro */
- __m128 *dpp; /* previous row, for use in {MDI}MO(dpp,q) access macro */
- __m128 *ppp; /* quads in the <pp> posterior probability matrix */
- __m128 *tp; /* quads in the <om->tfv> transition scores */
- __m128 zerov = _mm_setzero_ps();
- __m128 infv = _mm_set1_ps(-eslINFINITY);
- int M = om->M;
- int Q = p7O_NQF(M);
- int q;
- int j;
- int i;
- float t1, t2;
-
- ox->M = om->M;
- ox->L = pp->L;
- for (q = 0; q < Q; q++) MMO(dpc, q) = IMO(dpc,q) = DMO(dpc,q) = infv;
- XMXo(0, p7X_E) = -eslINFINITY;
- XMXo(0, p7X_N) = 0.;
- XMXo(0, p7X_J) = -eslINFINITY;
- XMXo(0, p7X_B) = 0.;
- XMXo(0, p7X_C) = -eslINFINITY;
-
- for (i = 1; i <= pp->L; i++)
- {
- dpp = dpc; /* previous DP row in OA matrix */
- dpc = ox->dpf[i]; /* current DP row in OA matrix */
- ppp = pp->dpf[i]; /* current row in the posterior probabilities per position */
- tp = om->tfv; /* transition probabilities */
- dcv = infv;
- xEv = infv;
- xBv = _mm_set1_ps(XMXo(i-1, p7X_B));
-
- mpv = esl_sse_rightshift_ps(MMO(dpp,Q-1), infv); /* Right shifts by 4 bytes. 4,8,12,x becomes x,4,8,12. */
- dpv = esl_sse_rightshift_ps(DMO(dpp,Q-1), infv);
- ipv = esl_sse_rightshift_ps(IMO(dpp,Q-1), infv);
- for (q = 0; q < Q; q++)
- {
- sv = _mm_and_ps(_mm_cmpgt_ps(*tp, zerov), xBv); tp++;
- sv = _mm_max_ps(sv, _mm_and_ps(_mm_cmpgt_ps(*tp, zerov), mpv)); tp++;
- sv = _mm_max_ps(sv, _mm_and_ps(_mm_cmpgt_ps(*tp, zerov), dpv)); tp++;
- sv = _mm_max_ps(sv, _mm_and_ps(_mm_cmpgt_ps(*tp, zerov), ipv)); tp++;
- sv = _mm_add_ps(sv, *ppp); ppp += 2;
- xEv = _mm_max_ps(xEv, sv);
-
- mpv = MMO(dpp,q);
- dpv = DMO(dpp,q);
- ipv = IMO(dpp,q);
-
- MMO(dpc,q) = sv;
- DMO(dpc,q) = dcv;
-
- dcv = _mm_and_ps(_mm_cmpgt_ps(*tp, zerov), sv); tp++;
-
- sv = _mm_and_ps(_mm_cmpgt_ps(*tp, zerov), mpv); tp++;
- sv = _mm_max_ps(sv, _mm_and_ps(_mm_cmpgt_ps(*tp, zerov), ipv)); tp++;
- IMO(dpc,q) = _mm_add_ps(sv, *ppp); ppp++;
- }
-
- /* dcv has carried through from end of q loop above; store it
- * in first pass, we add M->D and D->D path into DMX
- */
- dcv = esl_sse_rightshift_ps(dcv, infv);
- tp = om->tfv + 7*Q; /* set tp to start of the DD's */
- for (q = 0; q < Q; q++)
- {
- DMO(dpc, q) = _mm_max_ps(dcv, DMO(dpc, q));
- dcv = _mm_and_ps(_mm_cmpgt_ps(*tp, zerov), DMO(dpc,q)); tp++;
- }
-
- /* fully serialized D->D; can optimize later */
- for (j = 1; j < 4; j++)
- {
- dcv = esl_sse_rightshift_ps(dcv, infv);
- tp = om->tfv + 7*Q;
- for (q = 0; q < Q; q++)
- {
- DMO(dpc, q) = _mm_max_ps(dcv, DMO(dpc, q));
- dcv = _mm_and_ps(_mm_cmpgt_ps(*tp, zerov), dcv); tp++;
- }
- }
-
- /* D->E paths */
- for (q = 0; q < Q; q++) xEv = _mm_max_ps(xEv, DMO(dpc,q));
-
- /* Specials */
- esl_sse_hmax_ps(xEv, &(XMXo(i,p7X_E)));
-
- t1 = ( (om->xf[p7O_J][p7O_LOOP] == 0.0) ? 0.0 : ox->xmx[(i-1)*p7X_NXCELLS+p7X_J] + pp->xmx[i*p7X_NXCELLS+p7X_J]);
- t2 = ( (om->xf[p7O_E][p7O_LOOP] == 0.0) ? 0.0 : ox->xmx[ i *p7X_NXCELLS+p7X_E]);
- ox->xmx[i*p7X_NXCELLS+p7X_J] = ESL_MAX(t1, t2);
-
- t1 = ( (om->xf[p7O_C][p7O_LOOP] == 0.0) ? 0.0 : ox->xmx[(i-1)*p7X_NXCELLS+p7X_C] + pp->xmx[i*p7X_NXCELLS+p7X_C]);
- t2 = ( (om->xf[p7O_E][p7O_MOVE] == 0.0) ? 0.0 : ox->xmx[ i *p7X_NXCELLS+p7X_E]);
- ox->xmx[i*p7X_NXCELLS+p7X_C] = ESL_MAX(t1, t2);
-
- ox->xmx[i*p7X_NXCELLS+p7X_N] = ((om->xf[p7O_N][p7O_LOOP] == 0.0) ? 0.0 : ox->xmx[(i-1)*p7X_NXCELLS+p7X_N] + pp->xmx[i*p7X_NXCELLS+p7X_N]);
-
- t1 = ( (om->xf[p7O_N][p7O_MOVE] == 0.0) ? 0.0 : ox->xmx[i*p7X_NXCELLS+p7X_N]);
- t2 = ( (om->xf[p7O_J][p7O_MOVE] == 0.0) ? 0.0 : ox->xmx[i*p7X_NXCELLS+p7X_J]);
- ox->xmx[i*p7X_NXCELLS+p7X_B] = ESL_MAX(t1, t2);
- }
-
- *ret_e = ox->xmx[pp->L*p7X_NXCELLS+p7X_C];
- return eslOK;
-}
-/*------------------- end, OA DP fill ---------------------------*/
-
-
-
-
-
-/*****************************************************************
-* 2. OA traceback
-*****************************************************************/
-
-static inline float get_postprob(const P7_OMX *pp, int scur, int sprv, int k, int i);
-
-static inline int select_m(const P7_OPROFILE *om, const P7_OMX *ox, int i, int k);
-static inline int select_d(const P7_OPROFILE *om, const P7_OMX *ox, int i, int k);
-static inline int select_i(const P7_OPROFILE *om, const P7_OMX *ox, int i, int k);
-static inline int select_n(int i);
-static inline int select_c(const P7_OPROFILE *om, const P7_OMX *pp, const P7_OMX *ox, int i);
-static inline int select_j(const P7_OPROFILE *om, const P7_OMX *pp, const P7_OMX *ox, int i);
-static inline int select_e(const P7_OPROFILE *om, const P7_OMX *ox, int i, int *ret_k);
-static inline int select_b(const P7_OPROFILE *om, const P7_OMX *ox, int i);
-
-
-/* Function: p7_OATrace()
-* Synopsis: Optimal accuracy decoding: traceback.
-* Incept: SRE, Mon Aug 18 13:53:33 2008 [Janelia]
-*
-* Purpose: The traceback stage of the optimal accuracy decoding algorithm
-* \citep{Kall05}.
-*
-* Caller provides the OA DP matrix <ox> that was just
-* calculated by <p7_OptimalAccuracyDP()>, as well as the
-* posterior decoding matrix <pp>, which was calculated by
-* Forward/Backward on a target sequence using the query
-* model <gm>. Because the calculation depends only on
-* <pp>, the target sequence itself need not be provided.
-*
-* The resulting optimal accuracy decoding traceback is put
-* in a caller-provided traceback structure <tr>, which the
-* caller has allocated for optional posterior probability
-* annotation on residues (with <p7_trace_CreateWithPP()>,
-* generally). This structure will be reallocated
-* internally if necessary.
-*
-* Args: om - profile
-* pp - posterior probability matrix
-* ox - OA matrix to trace, LxM
-* tr - storage for the recovered traceback
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation error.
-* <eslEINVAL> if the trace <tr> isn't empty (needs to be Reuse()'d).
-*/
-int
-p7_OATrace(const P7_OPROFILE *om, const P7_OMX *pp, const P7_OMX *ox, P7_TRACE *tr)
-{
- int i = ox->L; /* position in sequence 1..L */
- int k = 0; /* position in model 1..M */
- int s0, s1; /* choice of a state */
- float postprob;
- int status;
-
- if (tr->N != 0) ESL_EXCEPTION(eslEINVAL, "trace not empty; needs to be Reuse()'d?");
-
- if ((status = p7_trace_AppendWithPP(tr, p7T_T, k, i, 0.0)) != eslOK) return status;
- if ((status = p7_trace_AppendWithPP(tr, p7T_C, k, i, 0.0)) != eslOK) return status;
-
- s0 = tr->st[tr->N-1];
- while (s0 != p7T_S)
- {
- switch (s0) {
- case p7T_M: s1 = select_m(om, ox, i, k); k--; i--; break;
- case p7T_D: s1 = select_d(om, ox, i, k); k--; break;
- case p7T_I: s1 = select_i(om, ox, i, k); i--; break;
- case p7T_N: s1 = select_n(i); break;
- case p7T_C: s1 = select_c(om, pp, ox, i); break;
- case p7T_J: s1 = select_j(om, pp, ox, i); break;
- case p7T_E: s1 = select_e(om, ox, i, &k); break;
- case p7T_B: s1 = select_b(om, ox, i); break;
- default: ESL_EXCEPTION(eslEINVAL, "bogus state in traceback");
- }
- if (s1 == -1) ESL_EXCEPTION(eslEINVAL, "OA traceback choice failed");
-
- postprob = get_postprob(pp, s1, s0, k, i);
- if ((status = p7_trace_AppendWithPP(tr, s1, k, i, postprob)) != eslOK) return status;
-
- if ( (s1 == p7T_N || s1 == p7T_J || s1 == p7T_C) && s1 == s0) i--;
- s0 = s1;
- } /* end traceback, at S state */
- tr->M = om->M;
- tr->L = ox->L;
- return p7_trace_Reverse(tr);
-}
-
-static inline float
-get_postprob(const P7_OMX *pp, int scur, int sprv, int k, int i)
-{
- int Q = p7O_NQF(pp->M);
- int q = (k-1) % Q; /* (q,r) is position of the current DP cell M(i,k) */
- int r = (k-1) / Q;
- union { __m128 v; float p[4]; } u;
-
- switch (scur) {
- case p7T_M: u.v = MMO(pp->dpf[i], q); return u.p[r];
- case p7T_I: u.v = IMO(pp->dpf[i], q); return u.p[r];
- case p7T_N: if (sprv == scur) return pp->xmx[i*p7X_NXCELLS+p7X_N];
- case p7T_C: if (sprv == scur) return pp->xmx[i*p7X_NXCELLS+p7X_C];
- case p7T_J: if (sprv == scur) return pp->xmx[i*p7X_NXCELLS+p7X_J];
- default: return 0.0;
- }
-}
-
-/* M(i,k) is reached from B(i-1), M(i-1,k-1), D(i-1,k-1), or I(i-1,k-1). */
-static inline int
-select_m(const P7_OPROFILE *om, const P7_OMX *ox, int i, int k)
-{
- int Q = p7O_NQF(ox->M);
- int q = (k-1) % Q; /* (q,r) is position of the current DP cell M(i,k) */
- int r = (k-1) / Q;
- __m128 *tp = om->tfv + 7*q; /* *tp now at start of transitions to cur cell M(i,k) */
- __m128 xBv = _mm_set1_ps(ox->xmx[(i-1)*p7X_NXCELLS+p7X_B]);
- __m128 zerov = _mm_setzero_ps();
- __m128 mpv, dpv, ipv;
- union { __m128 v; float p[4]; } u, tv;
- float path[4];
- int state[4] = { p7T_M, p7T_I, p7T_D, p7T_B };
-
- if (q > 0) {
- mpv = ox->dpf[i-1][(q-1)*3 + p7X_M];
- dpv = ox->dpf[i-1][(q-1)*3 + p7X_D];
- ipv = ox->dpf[i-1][(q-1)*3 + p7X_I];
- } else {
- mpv = esl_sse_rightshift_ps(ox->dpf[i-1][(Q-1)*3 + p7X_M], zerov);
- dpv = esl_sse_rightshift_ps(ox->dpf[i-1][(Q-1)*3 + p7X_D], zerov);
- ipv = esl_sse_rightshift_ps(ox->dpf[i-1][(Q-1)*3 + p7X_I], zerov);
- }
-
- /* paths are numbered so that most desirable choice in case of tie is first. */
- u.v = xBv; tv.v = *tp; path[3] = ((tv.p[r] == 0.0) ? -eslINFINITY : u.p[r]); tp++;
- u.v = mpv; tv.v = *tp; path[0] = ((tv.p[r] == 0.0) ? -eslINFINITY : u.p[r]); tp++;
- u.v = ipv; tv.v = *tp; path[1] = ((tv.p[r] == 0.0) ? -eslINFINITY : u.p[r]); tp++;
- u.v = dpv; tv.v = *tp; path[2] = ((tv.p[r] == 0.0) ? -eslINFINITY : u.p[r]);
- return state[esl_vec_FArgMax(path, 4)];
-}
-
-
-/* D(i,k) is reached from M(i, k-1) or D(i,k-1). */
-static inline int
-select_d(const P7_OPROFILE *om, const P7_OMX *ox, int i, int k)
-{
- int Q = p7O_NQF(ox->M);
- int q = (k-1) % Q; /* (q,r) is position of the current DP cell D(i,k) */
- int r = (k-1) / Q;
- __m128 zerov = _mm_setzero_ps();
- union { __m128 v; float p[4]; } mpv, dpv, tmdv, tddv;
- float path[2];
-
- if (q > 0) {
- mpv.v = ox->dpf[i][(q-1)*3 + p7X_M];
- dpv.v = ox->dpf[i][(q-1)*3 + p7X_D];
- tmdv.v = om->tfv[7*(q-1) + p7O_MD];
- tddv.v = om->tfv[7*Q + (q-1)];
- } else {
- mpv.v = esl_sse_rightshift_ps(ox->dpf[i][(Q-1)*3 + p7X_M], zerov);
- dpv.v = esl_sse_rightshift_ps(ox->dpf[i][(Q-1)*3 + p7X_D], zerov);
- tmdv.v = esl_sse_rightshift_ps(om->tfv[7*(Q-1) + p7O_MD], zerov);
- tddv.v = esl_sse_rightshift_ps(om->tfv[8*Q-1], zerov);
- }
-
- path[0] = ((tmdv.p[r] == 0.0) ? -eslINFINITY : mpv.p[r]);
- path[1] = ((tddv.p[r] == 0.0) ? -eslINFINITY : dpv.p[r]);
- return ((path[0] >= path[1]) ? p7T_M : p7T_D);
-}
-
-/* I(i,k) is reached from M(i-1, k) or I(i-1,k). */
-static inline int
-select_i(const P7_OPROFILE *om, const P7_OMX *ox, int i, int k)
-{
- int Q = p7O_NQF(ox->M);
- int q = (k-1) % Q; /* (q,r) is position of the current DP cell D(i,k) */
- int r = (k-1) / Q;
- __m128 *tp = om->tfv + 7*q + p7O_MI;
- union { __m128 v; float p[4]; } tv, mpv, ipv;
- float path[2];
-
- mpv.v = ox->dpf[i-1][q*3 + p7X_M]; tv.v = *tp; path[0] = ((tv.p[r] == 0.0) ? -eslINFINITY : mpv.p[r]); tp++;
- ipv.v = ox->dpf[i-1][q*3 + p7X_I]; tv.v = *tp; path[1] = ((tv.p[r] == 0.0) ? -eslINFINITY : ipv.p[r]);
- return ((path[0] >= path[1]) ? p7T_M : p7T_I);
-}
-
-/* N(i) must come from N(i-1) for i>0; else it comes from S */
-static inline int
-select_n(int i)
-{
- return ((i==0) ? p7T_S : p7T_N);
-}
-
-/* C(i) is reached from E(i) or C(i-1). */
-static inline int
-select_c(const P7_OPROFILE *om, const P7_OMX *pp, const P7_OMX *ox, int i)
-{
- float path[2];
- path[0] = ( (om->xf[p7O_C][p7O_LOOP] == 0.0) ? -eslINFINITY : ox->xmx[(i-1)*p7X_NXCELLS+p7X_C] + pp->xmx[i*p7X_NXCELLS+p7X_C]);
- path[1] = ( (om->xf[p7O_E][p7O_MOVE] == 0.0) ? -eslINFINITY : ox->xmx[ i *p7X_NXCELLS+p7X_E]);
- return ((path[0] > path[1]) ? p7T_C : p7T_E);
-}
-
-/* J(i) is reached from E(i) or J(i-1). */
-static inline int
-select_j(const P7_OPROFILE *om, const P7_OMX *pp, const P7_OMX *ox, int i)
-{
- float path[2];
-
- path[0] = ( (om->xf[p7O_J][p7O_LOOP] == 0.0) ? -eslINFINITY : ox->xmx[(i-1)*p7X_NXCELLS+p7X_J] + pp->xmx[i*p7X_NXCELLS+p7X_J]);
- path[1] = ( (om->xf[p7O_E][p7O_LOOP] == 0.0) ? -eslINFINITY : ox->xmx[ i *p7X_NXCELLS+p7X_E]);
- return ((path[0] > path[1]) ? p7T_J : p7T_E);
-}
-
-/* E(i) is reached from any M(i, k=1..M) or D(i, k=2..M). */
-/* This assumes all M_k->E, D_k->E are 1.0 */
-static inline int
-select_e(const P7_OPROFILE *om, const P7_OMX *ox, int i, int *ret_k)
-{
- int Q = p7O_NQF(ox->M);
- __m128 *dp = ox->dpf[i];
- union { __m128 v; float p[4]; } u;
- float max = -eslINFINITY;
- int smax, kmax;
- int q,r;
-
- /* precedence rules in case of ties here are a little tricky: M beats D: note the >= max! */
- for (q = 0; q < Q; q++)
- {
- u.v = *dp; dp++; for (r = 0; r < 4; r++) if (u.p[r] >= max) { max = u.p[r]; smax = p7T_M; kmax = r*Q + q + 1; }
- u.v = *dp; dp+=2; for (r = 0; r < 4; r++) if (u.p[r] > max) { max = u.p[r]; smax = p7T_D; kmax = r*Q + q + 1; }
- }
- *ret_k = kmax;
- return smax;
-}
-
-
-/* B(i) is reached from N(i) or J(i). */
-static inline int
-select_b(const P7_OPROFILE *om, const P7_OMX *ox, int i)
-{
- float path[2];
-
- path[0] = ( (om->xf[p7O_N][p7O_MOVE] == 0.0) ? -eslINFINITY : ox->xmx[i*p7X_NXCELLS+p7X_N]);
- path[1] = ( (om->xf[p7O_J][p7O_MOVE] == 0.0) ? -eslINFINITY : ox->xmx[i*p7X_NXCELLS+p7X_J]);
- return ((path[0] > path[1]) ? p7T_N : p7T_J);
-}
-/*---------------------- end, OA traceback ----------------------*/
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/p7_omx.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/p7_omx.cpp
deleted file mode 100644
index 575c167..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/p7_omx.cpp
+++ /dev/null
@@ -1,632 +0,0 @@
-/* SSE implementation of an optimized profile structure.
-*
-* Contents:
-* 1. The P7_OMX structure: a dynamic programming matrix
-* 2. Debugging dumps of P7_OMX structures
-* 3. Copyright and license information
-*
-* See also:
-* p7_omx.ai - figure illustrating the layout of a P7_OMX.
-*
-* SRE, Sun Nov 25 11:26:48 2007 [Casa de Gatos]
-* SVN $Id: p7_omx.c 2790 2009-04-23 12:43:39Z eddys $
-*/
-#include <hmmer3/p7_config.h>
-
-#include <stdio.h>
-#include <math.h>
-#include <float.h>
-
-#include <xmmintrin.h> /* SSE */
-#include <emmintrin.h> /* SSE2 */
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_alphabet.h>
-#include <hmmer3/easel/esl_vectorops.h>
-#include <hmmer3/easel/esl_sse.h>
-
-#include <hmmer3/hmmer.h>
-#include "impl_sse.h"
-
-/*****************************************************************
-* 1. The P7_OMX structure: a dynamic programming matrix
-*****************************************************************/
-
-/* Function: p7_omx_Create()
-* Synopsis: Create an optimized dynamic programming matrix.
-* Incept: SRE, Tue Nov 27 08:48:20 2007 [Janelia]
-*
-* Purpose: Allocates a reusable, resizeable <P7_OMX> for models up to
-* size <allocM> and target sequences up to length
-* <allocL/allocXL>, for use by any of the various optimized
-* DP routines.
-*
-* To allocate the very memory-efficient one-row matrix
-* used by *Filter() and *Score() functions that only
-* calculate scores, <allocM=M>, <allocL=0>, and
-* <allocXL=0>.
-*
-* To allocate the reasonably memory-efficient linear
-* arrays used by *Parser() functions that only keep
-* special (X) state scores, <allocM=M>, <allocL=0>,
-* and <allocXL=L>.
-*
-* To allocate a complete matrix suitable for functions
-* that need the whole DP matrix for traceback, sampling,
-* posterior decoding, or reestimation, <allocM=M> and
-* <allocL=allocXL=L>.
-*
-* Returns: a pointer to the new <P7_OMX>.
-*
-* Throws: <NULL> on allocation failure.
-*/
-P7_OMX *
-p7_omx_Create(int allocM, int allocL, int allocXL)
-{
- P7_OMX *ox = NULL;
- int i;
- int status;
-
- ESL_ALLOC_WITH_TYPE(ox, P7_OMX*, sizeof(P7_OMX));
- ox->dp_mem = NULL;
- ox->dpb = NULL;
- ox->dpw = NULL;
- ox->dpf = NULL;
- ox->xmx = NULL;
- ox->x_mem = NULL;
-
- /* DP matrix will be allocated for allocL+1 rows 0,1..L; allocQ4*p7X_NSCELLS columns */
- ox->allocR = allocL+1;
- ox->validR = ox->allocR;
- ox->allocQ4 = p7O_NQF(allocM);
- ox->allocQ8 = p7O_NQW(allocM);
- ox->allocQ16 = p7O_NQB(allocM);
- ox->ncells = ox->allocR * ox->allocQ4 * 4; /* # of DP cells allocated, where 1 cell contains MDI */
-
- ESL_ALLOC_WITH_TYPE(ox->dp_mem, __m128*, sizeof(__m128) * ox->allocR * ox->allocQ4 * p7X_NSCELLS + 15); /* floats always dominate; +15 for alignment */
- ESL_ALLOC_WITH_TYPE(ox->dpb, __m128i**, sizeof(__m128i *) * ox->allocR);
- ESL_ALLOC_WITH_TYPE(ox->dpw, __m128i**, sizeof(__m128i *) * ox->allocR);
- ESL_ALLOC_WITH_TYPE(ox->dpf, __m128**, sizeof(__m128 *) * ox->allocR);
-
- ox->dpb[0] = (__m128i *) ( ( (unsigned long int) ((char *) ox->dp_mem + 15) & (~0xf)));
- ox->dpw[0] = (__m128i *) ( ( (unsigned long int) ((char *) ox->dp_mem + 15) & (~0xf)));
- ox->dpf[0] = (__m128 *) ( ( (unsigned long int) ((char *) ox->dp_mem + 15) & (~0xf)));
-
- for (i = 1; i <= allocL; i++) {
- ox->dpf[i] = ox->dpf[0] + i * ox->allocQ4 * p7X_NSCELLS;
- ox->dpw[i] = ox->dpw[0] + i * ox->allocQ8 * p7X_NSCELLS;
- ox->dpb[i] = ox->dpb[0] + i * ox->allocQ16;
- }
-
- ox->allocXR = allocXL+1;
- ESL_ALLOC_WITH_TYPE(ox->x_mem, void*, sizeof(float) * ox->allocXR * p7X_NXCELLS + 15);
- ox->xmx = (float *) ( ( (unsigned long int) ((char *) ox->x_mem + 15) & (~0xf)));
-
- ox->M = 0;
- ox->L = 0;
- ox->totscale = 0.0;
- ox->has_own_scales = TRUE; /* most matrices are Forward, control their own scale factors */
-#ifdef p7_DEBUGGING
- ox->debugging = FALSE;
- ox->dfp = NULL;
-#endif
- return ox;
-
-ERROR:
- p7_omx_Destroy(ox);
- return NULL;
-}
-
-/* Function: p7_omx_GrowTo()
-* Synopsis: Assure that a DP matrix is big enough.
-* Incept: SRE, Thu Dec 20 09:27:07 2007 [Janelia]
-*
-* Purpose: Assures that an optimized DP matrix <ox> is allocated for
-* a model up to <allocM> in length; if not, reallocate to
-* make it so.
-*
-* Because the optimized matrix is one-row, only the model
-* length matters; the target sequence length isn't
-* relevant.
-*
-* Returns: <eslOK> on success, and <gx> may be reallocated upon
-* return; any data that may have been in <gx> must be
-* assumed to be invalidated.
-*
-* Throws: <eslEMEM> on allocation failure, and any data that may
-* have been in <gx> must be assumed to be invalidated.
-*/
-int
-p7_omx_GrowTo(P7_OMX *ox, int allocM, int allocL, int allocXL)
-{
- void *p;
- int nqf = p7O_NQF(allocM); /* segment length; total # of striped vectors for uchar */
- int nqw = p7O_NQW(allocM); /* segment length; total # of striped vectors for float */
- int nqb = p7O_NQB(allocM); /* segment length; total # of striped vectors for float */
- size_t ncells = (allocL+1) * nqf * 4;
- int reset_row_pointers = FALSE;
- int i;
- int status;
-
- /* If all possible dimensions are already satisfied, the matrix is fine */
- if (ox->allocQ4*4 >= allocM && ox->validR > allocL && ox->allocXR >= allocXL+1) return eslOK;
-
- /* If the main matrix is too small in cells, reallocate it;
- * and we'll need to realign/reset the row pointers later.
- */
- if (ncells > ox->ncells)
- {
- ESL_RALLOC_WITH_TYPE(ox->dp_mem, void*, p, sizeof(__m128) * (allocL+1) * nqf * p7X_NSCELLS + 15);
- ox->ncells = ncells;
- reset_row_pointers = TRUE;
- }
-
- /* If the X beams are too small, reallocate them. */
- if (allocXL+1 >= ox->allocXR)
- {
- ESL_RALLOC_WITH_TYPE(ox->x_mem, void*, p, sizeof(float) * (allocXL+1) * p7X_NXCELLS + 15);
- ox->allocXR = allocXL+1;
- ox->xmx = (float *) ( ( (unsigned long int) ((char *) ox->x_mem + 15) & (~0xf)));
- }
-
- /* If there aren't enough rows, reallocate the row pointers; we'll
- * realign and reset them later.
- */
- if (allocL >= ox->allocR)
- {
- ESL_RALLOC_WITH_TYPE(ox->dpb, __m128i**, p, sizeof(__m128i *) * (allocL+1));
- ESL_RALLOC_WITH_TYPE(ox->dpw, __m128i**, p, sizeof(__m128i *) * (allocL+1));
- ESL_RALLOC_WITH_TYPE(ox->dpf, __m128**, p, sizeof(__m128 *) * (allocL+1));
- ox->allocR = allocL+1;
- reset_row_pointers = TRUE;
- }
-
- /* must we widen the rows? */
- if (allocM > ox->allocQ4*4)
- reset_row_pointers = TRUE;
-
- /* must we set some more valid row pointers? */
- if (allocL >= ox->validR)
- reset_row_pointers = TRUE;
-
- /* now reset the row pointers, if needed */
- if (reset_row_pointers)
- {
- ox->dpb[0] = (__m128i *) ( ( (unsigned long int) ((char *) ox->dp_mem + 15) & (~0xf)));
- ox->dpw[0] = (__m128i *) ( ( (unsigned long int) ((char *) ox->dp_mem + 15) & (~0xf)));
- ox->dpf[0] = (__m128 *) ( ( (unsigned long int) ((char *) ox->dp_mem + 15) & (~0xf)));
-
- ox->validR = ESL_MIN( (int)ox->ncells / (nqf * 4), ox->allocR);
- for (i = 1; i < ox->validR; i++)
- {
- ox->dpb[i] = ox->dpb[0] + i * nqb;
- ox->dpw[i] = ox->dpw[0] + i * nqw * p7X_NSCELLS;
- ox->dpf[i] = ox->dpf[0] + i * nqf * p7X_NSCELLS;
- }
-
- ox->allocQ4 = nqf;
- ox->allocQ8 = nqw;
- ox->allocQ16 = nqb;
- }
-
- ox->M = 0;
- ox->L = 0;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-/* Function: p7_omx_FDeconvert()
-* Synopsis: Convert an optimized DP matrix to generic one.
-* Incept: SRE, Tue Aug 19 17:58:13 2008 [Janelia]
-*
-* Purpose: Convert the 32-bit float values in optimized DP matrix
-* <ox> to a generic one <gx>. Caller provides <gx> with sufficient
-* space to hold the <ox->M> by <ox->L> matrix.
-*
-* This function is used to gain access to the
-* somewhat more powerful debugging and display
-* tools available for generic DP matrices.
-*/
-int
-p7_omx_FDeconvert(P7_OMX *ox, P7_GMX *gx)
-{
- int Q = p7O_NQF(ox->M);
- int i, q, r, k;
- union { __m128 v; float p[4]; } u;
- float **dp = gx->dp;
- float *xmx = gx->xmx;
-
- for (i = 0; i <= ox->L; i++)
- {
- MMX(i,0) = DMX(i,0) = IMX(i,0) = -eslINFINITY;
- for (q = 0; q < Q; q++)
- {
- u.v = MMO(ox->dpf[i],q); for (r = 0; r < 4; r++) { k = (Q*r)+q+1; if (k <= ox->M) MMX(i, (Q*r)+q+1) = u.p[r]; }
- u.v = DMO(ox->dpf[i],q); for (r = 0; r < 4; r++) { k = (Q*r)+q+1; if (k <= ox->M) DMX(i, (Q*r)+q+1) = u.p[r]; }
- u.v = IMO(ox->dpf[i],q); for (r = 0; r < 4; r++) { k = (Q*r)+q+1; if (k <= ox->M) IMX(i, (Q*r)+q+1) = u.p[r]; }
- }
- XMX(i,p7G_E) = ox->xmx[i*p7X_NXCELLS+p7X_E];
- XMX(i,p7G_N) = ox->xmx[i*p7X_NXCELLS+p7X_N];
- XMX(i,p7G_J) = ox->xmx[i*p7X_NXCELLS+p7X_J];
- XMX(i,p7G_B) = ox->xmx[i*p7X_NXCELLS+p7X_B];
- XMX(i,p7G_C) = ox->xmx[i*p7X_NXCELLS+p7X_C];
- }
- gx->L = ox->L;
- gx->M = ox->M;
- return eslOK;
-}
-
-
-/* Function: p7_omx_Reuse()
-* Synopsis: Recycle an optimized DP matrix.
-* Incept: SRE, Wed Oct 22 11:31:00 2008 [Janelia]
-*
-* Purpose: Recycles <ox> for re-use.
-*
-* Returns: <eslOK> on success.
-*/
-int
-p7_omx_Reuse(P7_OMX *ox)
-{
- ox->M = 0;
- ox->L = 0;
- ox->totscale = 0.0;
- ox->has_own_scales = TRUE; /* default assumes a Forward matrix, with its own scale factors */
-#ifdef p7_DEBUGGING
- ox->debugging = FALSE;
- ox->dfp = NULL;
-#endif
- return eslOK;
-}
-
-
-
-
-/* Function: p7_omx_Destroy()
-* Synopsis: Frees an optimized DP matrix.
-* Incept: SRE, Tue Nov 27 09:11:42 2007 [Janelia]
-*
-* Purpose: Frees optimized DP matrix <ox>.
-*
-* Returns: (void)
-*/
-void
-p7_omx_Destroy(P7_OMX *ox)
-{
- if (ox == NULL) return;
- if (ox->x_mem != NULL) free(ox->x_mem);
- if (ox->dp_mem != NULL) free(ox->dp_mem);
- if (ox->dpf != NULL) free(ox->dpf);
- if (ox->dpw != NULL) free(ox->dpw);
- if (ox->dpb != NULL) free(ox->dpb);
- free(ox);
- return;
-}
-/*------------------- end, P7_OMX structure ---------------------*/
-
-
-
-/*****************************************************************
-* 2. Debugging dumps of P7_OMX structures
-*****************************************************************/
-/* Because the P7_OMX may be a one-row DP matrix, we can't just run a
-* DP calculation and then dump a whole matrix; we have to dump each
-* row one at a time, as the DP calculation is progressing. Thus we
-* need to call the dump from *within* some DP routines. We'd rather not
-* have anything like this in production code - not even a flag check.
-* So, we use a compile-time debugging idiom, with conditionally
-* compiled debugging code that's added to the DP routines to check a
-* debugging flag in the P7_OMX structure; if it's up, we dump a row.
-*
-* Therefore, the externally exposed API call is p7_omx_SetDumpMode(),
-* rather than the dumping routine itself; and all p7_omx_SetDumpMode()
-* does is sets the debugging flag in <ox>.
-*/
-
-/* Function: p7_omx_SetDumpMode()
-* Synopsis: Set an optimized DP matrix to be dumped for debugging.
-* Incept: SRE, Thu Dec 13 10:24:38 2007 [Janelia]
-*
-* Purpose: Sets debugging mode for DP matrix <ox>. If <truefalse>
-* flag is <TRUE>, then whenever a dynamic programming
-* calculation is run, dump DP matrix <ox> to stream <fp>
-* for diagnostics.
-*
-* When the dump mode is on, the DP routine itself actually
-* does the dumping, because it has to dump after every row
-* is calculated. (We're doing an optimized one-row
-* calculation.)
-*
-* If the code has not been compiled with the
-* <p7_DEBUGGING> flag up, this function is a no-op.
-*
-* Args: fp - output stream for diagnostics (stdout, perhaps)
-* ox - DP matrix to set debugging mode
-* truefalse - TRUE to set dumping, FALSE to unset
-*
-* Returns: <eslOK> on success.
-*
-* Throws: (no abnormal error conditions)
-*
-* Xref: J2/62.
-*/
-int
-p7_omx_SetDumpMode(FILE *fp, P7_OMX *ox, int truefalse)
-{
-#if p7_DEBUGGING
- ox->debugging = truefalse;
- ox->dfp = fp;
-#endif
- return eslOK;
-}
-
-
-/* Function: p7_omx_DumpMFRow()
-* Synopsis: Dump one row from MSV uchar version of a DP matrix.
-* Incept: SRE, Wed Jul 30 16:47:26 2008 [Janelia]
-*
-* Purpose: Dump current row of uchar part of DP matrix <ox> for diagnostics,
-* and include the values of specials <xE>, etc. The index <rowi> for
-* the current row is used as a row label. This routine has to be
-* specialized for the layout of the MSVFilter() row, because it's
-* all match scores dp[0..q..Q-1], rather than triplets of M,D,I.
-*
-* If <rowi> is 0, print a header first too.
-*
-* The output format is coordinated with <p7_gmx_Dump()> to
-* facilitate comparison to a known answer.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure.
-*/
-int
-p7_omx_DumpMFRow(P7_OMX *ox, int rowi, uint8_t xE, uint8_t xN, uint8_t xJ, uint8_t xB, uint8_t xC)
-{
- __m128i *dp = ox->dpb[0];
- int M = ox->M;
- int Q = p7O_NQB(M);
- uint8_t *v = NULL; /* array of unstriped scores */
- int q,z,k;
- union { __m128i v; uint8_t i[16]; } tmp;
- int status;
-
- ESL_ALLOC_WITH_TYPE(v, uint8_t*, sizeof(unsigned char) * ((Q*16)+1));
- v[0] = 0;
-
- /* Header (if we're on the 0th row) */
- if (rowi == 0)
- {
- fprintf(ox->dfp, " ");
- for (k = 0; k <= M; k++) fprintf(ox->dfp, "%3d ", k);
- fprintf(ox->dfp, "%3s %3s %3s %3s %3s\n", "E", "N", "J", "B", "C");
- fprintf(ox->dfp, " ");
- for (k = 0; k <= M+5; k++) fprintf(ox->dfp, "%3s ", "---");
- fprintf(ox->dfp, "\n");
- }
-
- /* Unpack and unstripe, then print M's. */
- for (q = 0; q < Q; q++) {
- tmp.v = dp[q];
- for (z = 0; z < 16; z++) v[q+Q*z+1] = tmp.i[z];
- }
- fprintf(ox->dfp, "%4d M ", rowi);
- for (k = 0; k <= M; k++) fprintf(ox->dfp, "%3d ", v[k]);
-
- /* The specials */
- fprintf(ox->dfp, "%3d %3d %3d %3d %3d\n", xE, xN, xJ, xB, xC);
-
- /* I's are all 0's; print just to facilitate comparison. */
- fprintf(ox->dfp, "%4d I ", rowi);
- for (k = 0; k <= M; k++) fprintf(ox->dfp, "%3d ", 0);
- fprintf(ox->dfp, "\n");
-
- /* D's are all 0's too */
- fprintf(ox->dfp, "%4d D ", rowi);
- for (k = 0; k <= M; k++) fprintf(ox->dfp, "%3d ", 0);
- fprintf(ox->dfp, "\n\n");
-
- free(v);
- return eslOK;
-
-ERROR:
- free(v);
- return status;
-}
-
-
-/* Function: p7_omx_DumpVFRow()
-* Synopsis: Dump current row of ViterbiFilter (int16) part of <ox> matrix.
-* Incept: SRE, Wed Jul 30 16:43:21 2008 [Janelia]
-*
-* Purpose: Dump current row of ViterbiFilter (int16) part of DP
-* matrix <ox> for diagnostics, and include the values of
-* specials <xE>, etc. The index <rowi> for the current row
-* is used as a row label.
-*
-* If <rowi> is 0, print a header first too.
-*
-* The output format is coordinated with <p7_gmx_Dump()> to
-* facilitate comparison to a known answer.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure.
-*/
-int
-p7_omx_DumpVFRow(P7_OMX *ox, int rowi, int16_t xE, int16_t xN, int16_t xJ, int16_t xB, int16_t xC)
-{
- __m128i *dp = ox->dpw[0]; /* must set <dp> before using {MDI}MX macros */
- int M = ox->M;
- int Q = p7O_NQW(M);
- int16_t *v = NULL; /* array of unstriped, uninterleaved scores */
- int q,z,k;
- union { __m128i v; int16_t i[8]; } tmp;
- int status;
-
- ESL_ALLOC_WITH_TYPE(v, int16_t*, sizeof(int16_t) * ((Q*8)+1));
- v[0] = 0;
-
- /* Header (if we're on the 0th row)
- */
- if (rowi == 0)
- {
- fprintf(ox->dfp, " ");
- for (k = 0; k <= M; k++) fprintf(ox->dfp, "%6d ", k);
- fprintf(ox->dfp, "%6s %6s %6s %6s %6s\n", "E", "N", "J", "B", "C");
- fprintf(ox->dfp, " ");
- for (k = 0; k <= M+5; k++) fprintf(ox->dfp, "%6s ", "------");
- fprintf(ox->dfp, "\n");
- }
-
- /* Unpack and unstripe, then print M's. */
- for (q = 0; q < Q; q++) {
- tmp.v = MMXo(q);
- for (z = 0; z < 8; z++) v[q+Q*z+1] = tmp.i[z];
- }
- fprintf(ox->dfp, "%4d M ", rowi);
- for (k = 0; k <= M; k++) fprintf(ox->dfp, "%6d ", v[k]);
-
- /* The specials */
- fprintf(ox->dfp, "%6d %6d %6d %6d %6d\n", xE, xN, xJ, xB, xC);
-
- /* Unpack and unstripe, then print I's. */
- for (q = 0; q < Q; q++) {
- tmp.v = IMXo(q);
- for (z = 0; z < 8; z++) v[q+Q*z+1] = tmp.i[z];
- }
- fprintf(ox->dfp, "%4d I ", rowi);
- for (k = 0; k <= M; k++) fprintf(ox->dfp, "%6d ", v[k]);
- fprintf(ox->dfp, "\n");
-
- /* Unpack, unstripe, then print D's. */
- for (q = 0; q < Q; q++) {
- tmp.v = DMXo(q);
- for (z = 0; z < 8; z++) v[q+Q*z+1] = tmp.i[z];
- }
- fprintf(ox->dfp, "%4d D ", rowi);
- for (k = 0; k <= M; k++) fprintf(ox->dfp, "%6d ", v[k]);
- fprintf(ox->dfp, "\n\n");
-
- free(v);
- return eslOK;
-
-ERROR:
- free(v);
- return status;
-
-}
-
-/* Function: p7_omx_DumpFBRow()
-* Synopsis: Dump one row from float part of a DP matrix.
-* Incept: SRE, Wed Jul 30 16:45:16 2008 [Janelia]
-*
-* Purpose: Dump current row of Forward/Backward (float) part of DP
-* matrix <ox> for diagnostics, and include the values of
-* specials <xE>, etc. The index <rowi> for the current row
-* is used as a row label.
-*
-* The output format of the floats is controlled by
-* <width>, <precision>; 8,5 is good for pspace, 5,2 is
-* fine for lspace.
-*
-* If <rowi> is 0, print a header first too.
-*
-* If <logify> is TRUE, then scores are printed as log(score); this is
-* useful for comparing DP with pspace scores with other DP matrices
-* (like generic P7_GMX ones) that use log-odds scores.
-*
-* The output format is coordinated with <p7_gmx_Dump()> to
-* facilitate comparison to a known answer.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure.
-*/
-int
-p7_omx_DumpFBRow(P7_OMX *ox, int logify, int rowi, int width, int precision, float xE, float xN, float xJ, float xB, float xC)
-{
- __m128 *dp;
- int M = ox->M;
- int Q = p7O_NQF(M);
- float *v = NULL; /* array of uninterleaved, unstriped scores */
- int q,z,k;
- union { __m128 v; float x[4]; } tmp;
- int status;
-
- dp = (ox->allocR == 1) ? ox->dpf[0] : ox->dpf[rowi]; /* must set <dp> before using {MDI}MX macros */
-
- ESL_ALLOC_WITH_TYPE(v, float*, sizeof(float) * ((Q*4)+1));
- v[0] = 0.;
-
- if (rowi == 0)
- {
- fprintf(ox->dfp, " ");
- for (k = 0; k <= M; k++) fprintf(ox->dfp, "%*d ", width, k);
- fprintf(ox->dfp, "%*s %*s %*s %*s %*s\n", width, "E", width, "N", width, "J", width, "B", width, "C");
- fprintf(ox->dfp, " ");
- for (k = 0; k <= M+5; k++) fprintf(ox->dfp, "%*s ", width, "--------");
- fprintf(ox->dfp, "\n");
- }
-
- /* Unpack, unstripe, then print M's. */
- for (q = 0; q < Q; q++) {
- tmp.v = MMXo(q);
- for (z = 0; z < 4; z++) v[q+Q*z+1] = tmp.x[z];
- }
- fprintf(ox->dfp, "%3d M ", rowi);
- if (logify) for (k = 0; k <= M; k++) fprintf(ox->dfp, "%*.*f ", width, precision, v[k] == 0. ? -eslINFINITY : log(v[k]));
- else for (k = 0; k <= M; k++) fprintf(ox->dfp, "%*.*f ", width, precision, v[k]);
-
- /* The specials */
- if (logify) fprintf(ox->dfp, "%*.*f %*.*f %*.*f %*.*f %*.*f\n",
- width, precision, xE == 0. ? -eslINFINITY : log(xE),
- width, precision, xN == 0. ? -eslINFINITY : log(xN),
- width, precision, xJ == 0. ? -eslINFINITY : log(xJ),
- width, precision, xB == 0. ? -eslINFINITY : log(xB),
- width, precision, xC == 0. ? -eslINFINITY : log(xC));
- else fprintf(ox->dfp, "%*.*f %*.*f %*.*f %*.*f %*.*f\n",
- width, precision, xE, width, precision, xN, width, precision, xJ,
- width, precision, xB, width, precision, xC);
-
- /* Unpack, unstripe, then print I's. */
- for (q = 0; q < Q; q++) {
- tmp.v = IMXo(q);
- for (z = 0; z < 4; z++) v[q+Q*z+1] = tmp.x[z];
- }
- fprintf(ox->dfp, "%3d I ", rowi);
- if (logify) for (k = 0; k <= M; k++) fprintf(ox->dfp, "%*.*f ", width, precision, v[k] == 0. ? -eslINFINITY : log(v[k]));
- else for (k = 0; k <= M; k++) fprintf(ox->dfp, "%*.*f ", width, precision, v[k]);
- fprintf(ox->dfp, "\n");
-
- /* Unpack, unstripe, then print D's. */
- for (q = 0; q < Q; q++) {
- tmp.v = DMXo(q);
- for (z = 0; z < 4; z++) v[q+Q*z+1] = tmp.x[z];
- }
- fprintf(ox->dfp, "%3d D ", rowi);
- if (logify) for (k = 0; k <= M; k++) fprintf(ox->dfp, "%*.*f ", width, precision, v[k] == 0. ? -eslINFINITY : log(v[k]));
- else for (k = 0; k <= M; k++) fprintf(ox->dfp, "%*.*f ", width, precision, v[k]);
- fprintf(ox->dfp, "\n\n");
-
- free(v);
- return eslOK;
-
-ERROR:
- free(v);
- return status;
-}
-/*------------- end, debugging dumps of P7_OMX ------------------*/
-
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/p7_oprofile.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/p7_oprofile.cpp
deleted file mode 100644
index b631013..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/p7_oprofile.cpp
+++ /dev/null
@@ -1,1453 +0,0 @@
-/* Routines for the P7_OPROFILE structure:
-* a search profile in an optimized implementation.
-*
-* Contents:
-* 1. The P7_OPROFILE object: allocation, initialization, destruction.
-* 2. Conversion from generic P7_PROFILE to optimized P7_OPROFILE
-* 3. Debugging and development utilities.
-* 4. Benchmark driver.
-* 5. Unit tests.
-* 6. Test driver.
-* 7. Example.
-* 8. Copyright and license information.
-*
-* SRE, Wed Jul 30 11:00:04 2008 [Janelia]
- * SVN $Id: p7_oprofile.c 3018 2009-10-29 17:33:06Z farrarm $
-*/
-#include <hmmer3/p7_config.h>
-
-#include <stdio.h>
-#include <string.h>
-#include <math.h> /* roundf() */
-
-#include <xmmintrin.h> /* SSE */
-#include <emmintrin.h> /* SSE2 */
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_random.h>
-#include <hmmer3/easel/esl_sse.h>
-#include <hmmer3/easel/esl_vectorops.h>
-
-#include <hmmer3/hmmer.h>
-#include <hmmer3/impl_sse/impl_sse.h>
-
-
-/*****************************************************************
-* 1. The P7_OPROFILE structure: a score profile.
-*****************************************************************/
-
-/* Function: p7_oprofile_Create()
-* Synopsis: Allocate an optimized profile structure.
-* Incept: SRE, Sun Nov 25 12:03:19 2007 [Casa de Gatos]
-*
-* Purpose: Allocate for profiles of up to <allocM> nodes for digital alphabet <abc>.
-*
-* Throws: <NULL> on allocation error.
-*/
-P7_OPROFILE *
-p7_oprofile_Create(int allocM, const ESL_ALPHABET *abc)
-{
- int status;
- P7_OPROFILE *om = NULL;
- int nqb = p7O_NQB(allocM); /* # of uchar vectors needed for query */
- int nqw = p7O_NQW(allocM); /* # of sword vectors needed for query */
- int nqf = p7O_NQF(allocM); /* # of float vectors needed for query */
- int x;
-
- /* level 0 */
- ESL_ALLOC_WITH_TYPE(om, P7_OPROFILE*, sizeof(P7_OPROFILE));
- om->rbv_mem = NULL;
- om->rwv_mem = NULL;
- om->twv_mem = NULL;
- om->rfv_mem = NULL;
- om->tfv_mem = NULL;
- om->rbv = NULL;
- om->rwv = NULL;
- om->twv = NULL;
- om->rfv = NULL;
- om->tfv = NULL;
- om->clone = 0;
-
- /* level 1 */
- ESL_ALLOC_WITH_TYPE(om->rbv_mem, __m128i*, sizeof(__m128i) * nqb * abc->Kp +15); /* +15 is for manual 16-byte alignment */
- ESL_ALLOC_WITH_TYPE(om->rwv_mem, __m128i*, sizeof(__m128i) * nqw * abc->Kp +15);
- ESL_ALLOC_WITH_TYPE(om->twv_mem,__m128i*, sizeof(__m128i) * nqw * p7O_NTRANS +15);
- ESL_ALLOC_WITH_TYPE(om->rfv_mem,__m128*, sizeof(__m128) * nqf * abc->Kp +15);
- ESL_ALLOC_WITH_TYPE(om->tfv_mem,__m128*, sizeof(__m128) * nqf * p7O_NTRANS +15);
-
- ESL_ALLOC_WITH_TYPE(om->rbv, __m128i**, sizeof(__m128i *) * abc->Kp);
- ESL_ALLOC_WITH_TYPE(om->rwv, __m128i**, sizeof(__m128i *) * abc->Kp);
- ESL_ALLOC_WITH_TYPE(om->rfv, __m128**, sizeof(__m128 *) * abc->Kp);
-
- /* align vector memory on 16-byte boundaries */
- om->rbv[0] = (__m128i *) (((unsigned long int) om->rbv_mem + 15) & (~0xf));
- om->rwv[0] = (__m128i *) (((unsigned long int) om->rwv_mem + 15) & (~0xf));
- om->twv = (__m128i *) (((unsigned long int) om->twv_mem + 15) & (~0xf));
- om->rfv[0] = (__m128 *) (((unsigned long int) om->rfv_mem + 15) & (~0xf));
- om->tfv = (__m128 *) (((unsigned long int) om->tfv_mem + 15) & (~0xf));
-
- /* set the rest of the row pointers for match emissions */
- for (x = 1; x < abc->Kp; x++) {
- om->rbv[x] = om->rbv[0] + (x * nqb);
- om->rwv[x] = om->rwv[0] + (x * nqw);
- om->rfv[x] = om->rfv[0] + (x * nqf);
- }
- om->allocQ16 = nqb;
- om->allocQ8 = nqw;
- om->allocQ4 = nqf;
-
- /* Remaining initializations */
- om->tbm_b = 0;
- om->tec_b = 0;
- om->tjb_b = 0;
- om->scale_b = 0.0f;
- om->base_b = 0;
- om->bias_b = 0;
-
- om->scale_w = 0.0f;
- om->base_w = 0;
- om->ddbound_w = 0;
- om->ncj_roundoff = 0.0f;
-
- for (x = 0; x < p7_NOFFSETS; x++) om->offs[x] = -1;
- for (x = 0; x < p7_NEVPARAM; x++) om->evparam[x] = p7_EVPARAM_UNSET;
- for (x = 0; x < p7_NCUTOFFS; x++) om->cutoff[x] = p7_CUTOFF_UNSET;
- for (x = 0; x < p7_MAXABET; x++) om->compo[x] = p7_COMPO_UNSET;
-
- om->name = NULL;
- om->acc = NULL;
- om->desc = NULL;
-
- /* in a P7_OPROFILE, we always allocate for the optional RF, CS annotation.
- * we only rely on the leading \0 to signal that it's unused, but
- * we initialize all this memory to zeros to shut valgrind up about
- * fwrite'ing uninitialized memory in the io functions.
- */
- ESL_ALLOC_WITH_TYPE(om->rf, char*, sizeof(char) * (allocM+2));
- ESL_ALLOC_WITH_TYPE(om->cs, char*, sizeof(char) * (allocM+2));
- ESL_ALLOC_WITH_TYPE(om->consensus, char*, sizeof(char) * (allocM+2));
- memset(om->rf, '\0', sizeof(char) * (allocM+2));
- memset(om->cs, '\0', sizeof(char) * (allocM+2));
- memset(om->consensus,'\0', sizeof(char) * (allocM+2));
-
- om->abc = abc;
- om->L = 0;
- om->M = 0;
- om->allocM = allocM;
- om->mode = p7_NO_MODE;
- om->nj = 0.0f;
- return om;
-
-ERROR:
- p7_oprofile_Destroy(om);
- return NULL;
-}
-
-/* Function: p7_oprofile_IsLocal()
-* Synopsis: Returns TRUE if profile is in local alignment mode.
-* Incept: SRE, Sat Aug 16 08:46:00 2008 [Janelia]
-*/
-int
-p7_oprofile_IsLocal(const P7_OPROFILE *om)
-{
- if (om->mode == p7_LOCAL || om->mode == p7_UNILOCAL) return TRUE;
- return FALSE;
-}
-
-
-
-/* Function: p7_oprofile_Destroy()
-* Synopsis: Frees an optimized profile structure.
-* Incept: SRE, Sun Nov 25 12:22:21 2007 [Casa de Gatos]
-*/
-void
-p7_oprofile_Destroy(P7_OPROFILE *om)
-{
- if (om == NULL) return;
-
- if (om->clone == 0)
- {
- if (om->rbv_mem != NULL) free(om->rbv_mem);
- if (om->rwv_mem != NULL) free(om->rwv_mem);
- if (om->twv_mem != NULL) free(om->twv_mem);
- if (om->rfv_mem != NULL) free(om->rfv_mem);
- if (om->tfv_mem != NULL) free(om->tfv_mem);
- if (om->rbv != NULL) free(om->rbv);
- if (om->rwv != NULL) free(om->rwv);
- if (om->rfv != NULL) free(om->rfv);
- if (om->name != NULL) free(om->name);
- if (om->acc != NULL) free(om->acc);
- if (om->desc != NULL) free(om->desc);
- if (om->rf != NULL) free(om->rf);
- if (om->cs != NULL) free(om->cs);
- if (om->consensus != NULL) free(om->consensus);
- }
-
- free(om);
-}
-
-/* Function: p7_oprofile_Copy()
- * Synopsis: Allocate an optimized profile structure.
- * Incept: SRE, Sun Nov 25 12:03:19 2007 [Casa de Gatos]
- *
- * Purpose: Allocate for profiles of up to <allocM> nodes for digital alphabet <abc>.
- *
- * Throws: <NULL> on allocation error.
- */
-P7_OPROFILE *
-p7_oprofile_Copy(P7_OPROFILE *om1)
-{
- int x, y;
- int status;
-
- int nqb = p7O_NQB(om1->allocM); /* # of uchar vectors needed for query */
- int nqw = p7O_NQW(om1->allocM); /* # of sword vectors needed for query */
- int nqf = p7O_NQF(om1->allocM); /* # of float vectors needed for query */
-
- size_t size = sizeof(char) * (om1->allocM+2);
-
- P7_OPROFILE *om2 = NULL;
-
- const ESL_ALPHABET *abc = om1->abc;
-
- /* level 0 */
- ESL_ALLOC_WITH_TYPE(om2, P7_OPROFILE*, sizeof(P7_OPROFILE));
- om2->rbv_mem = NULL;
- om2->rwv_mem = NULL;
- om2->twv_mem = NULL;
- om2->rfv_mem = NULL;
- om2->tfv_mem = NULL;
- om2->rbv = NULL;
- om2->rwv = NULL;
- om2->twv = NULL;
- om2->rfv = NULL;
- om2->tfv = NULL;
-
- /* level 1 */
- ESL_ALLOC_WITH_TYPE(om2->rbv_mem, __m128i*, sizeof(__m128i) * nqb * abc->Kp +15); /* +15 is for manual 16-byte alignment */
- ESL_ALLOC_WITH_TYPE(om2->rwv_mem, __m128i*, sizeof(__m128i) * nqw * abc->Kp +15);
- ESL_ALLOC_WITH_TYPE(om2->twv_mem, __m128i*, sizeof(__m128i) * nqw * p7O_NTRANS +15);
- ESL_ALLOC_WITH_TYPE(om2->rfv_mem, __m128*, sizeof(__m128) * nqf * abc->Kp +15);
- ESL_ALLOC_WITH_TYPE(om2->tfv_mem, __m128*, sizeof(__m128) * nqf * p7O_NTRANS +15);
-
- ESL_ALLOC_WITH_TYPE(om2->rbv, __m128i**, sizeof(__m128i *) * abc->Kp);
- ESL_ALLOC_WITH_TYPE(om2->rwv, __m128i**, sizeof(__m128i *) * abc->Kp);
- ESL_ALLOC_WITH_TYPE(om2->rfv, __m128**, sizeof(__m128 *) * abc->Kp);
-
- /* align vector memory on 16-byte boundaries */
- om2->rbv[0] = (__m128i *) (((unsigned long int) om2->rbv_mem + 15) & (~0xf));
- om2->rwv[0] = (__m128i *) (((unsigned long int) om2->rwv_mem + 15) & (~0xf));
- om2->twv = (__m128i *) (((unsigned long int) om2->twv_mem + 15) & (~0xf));
- om2->rfv[0] = (__m128 *) (((unsigned long int) om2->rfv_mem + 15) & (~0xf));
- om2->tfv = (__m128 *) (((unsigned long int) om2->tfv_mem + 15) & (~0xf));
-
- /* copy the vector data */
- memcpy(om2->rbv[0], om1->rbv[0], sizeof(__m128i) * nqb * abc->Kp);
- memcpy(om2->rwv[0], om1->rwv[0], sizeof(__m128i) * nqw * abc->Kp);
- memcpy(om2->rfv[0], om1->rfv[0], sizeof(__m128i) * nqf * abc->Kp);
-
- /* set the rest of the row pointers for match emissions */
- for (x = 1; x < abc->Kp; x++) {
- om2->rbv[x] = om2->rbv[0] + (x * nqb);
- om2->rwv[x] = om2->rwv[0] + (x * nqw);
- om2->rfv[x] = om2->rfv[0] + (x * nqf);
- }
- om2->allocQ16 = nqb;
- om2->allocQ8 = nqw;
- om2->allocQ4 = nqf;
-
- /* Remaining initializations */
- om2->tbm_b = om1->tbm_b;
- om2->tec_b = om1->tec_b;
- om2->tjb_b = om1->tjb_b;
- om2->scale_b = om1->scale_b;
- om2->base_b = om1->base_b;
- om2->bias_b = om1->bias_b;
-
- om2->scale_w = om1->scale_w;
- om2->base_w = om1->base_w;
- om2->ddbound_w = om1->ddbound_w;
- om2->ncj_roundoff = om1->ncj_roundoff;
-
- for (x = 0; x < p7_NOFFSETS; x++) om2->offs[x] = om1->offs[x];
- for (x = 0; x < p7_NEVPARAM; x++) om2->evparam[x] = om1->evparam[x];
- for (x = 0; x < p7_NCUTOFFS; x++) om2->cutoff[x] = om1->cutoff[x];
- for (x = 0; x < p7_MAXABET; x++) om2->compo[x] = om1->compo[x];
-
- for (x = 0; x < nqw * p7O_NTRANS; ++x) om2->twv[x] = om1->twv[x];
- for (x = 0; x < nqf * p7O_NTRANS; ++x) om2->tfv[x] = om1->tfv[x];
-
- for (x = 0; x < p7O_NXSTATES; x++)
- for (y = 0; y < p7O_NXTRANS; y++)
- {
- om2->xw[x][y] = om1->xw[x][y];
- om2->xf[x][y] = om1->xf[x][y];
- }
-
- if ((status = esl_strdup(om1->name, -1, &om2->name)) != eslOK) goto ERROR;
- if ((status = esl_strdup(om1->acc, -1, &om2->acc)) != eslOK) goto ERROR;
- if ((status = esl_strdup(om1->desc, -1, &om2->desc)) != eslOK) goto ERROR;
-
- /* in a P7_OPROFILE, we always allocate for the optional RF, CS annotation.
- * we only rely on the leading \0 to signal that it's unused, but
- * we initialize all this memory to zeros to shut valgrind up about
- * fwrite'ing uninitialized memory in the io functions.
- */
- ESL_ALLOC_WITH_TYPE(om2->rf, char*, size);
- ESL_ALLOC_WITH_TYPE(om2->cs, char*, size);
- ESL_ALLOC_WITH_TYPE(om2->consensus, char*, size);
-
- memcpy(om2->rf, om1->rf, size);
- memcpy(om2->cs, om1->cs, size);
- memcpy(om2->consensus, om1->consensus, size);
-
- om2->abc = om1->abc;
- om2->L = om1->L;
- om2->M = om1->M;
- om2->allocM = om1->allocM;
- om2->mode = om1->mode;
- om2->nj = om1->nj;
-
- om2->clone = om1->clone;
-
- return om2;
-
- ERROR:
- p7_oprofile_Destroy(om2);
- return NULL;
-}
-
-/* Function: p7_oprofile_Clone()
- * Synopsis: Allocate a cloned copy of the optimized profile structure. All
- * allocated memory from the original profile is not reallocated.
- * The cloned copy will point to the same memory as the original.
- * Incept: SRE, Sun Nov 25 12:03:19 2007 [Casa de Gatos]
- *
- * Purpose: Quick copy of an optimized profile used in mutiple threads.
- *
- * Throws: <NULL> on allocation error.
- */
-P7_OPROFILE *
-p7_oprofile_Clone(const P7_OPROFILE *om1)
-{
- int status;
-
- P7_OPROFILE *om2 = NULL;
-
- ESL_ALLOC_WITH_TYPE(om2, P7_OPROFILE*, sizeof(P7_OPROFILE));
- memcpy(om2, om1, sizeof(P7_OPROFILE));
-
- om2->clone = 1;
-
- return om2;
-
- ERROR:
- p7_oprofile_Destroy(om2);
- return NULL;
-}
-
-/*----------------- end, P7_OPROFILE structure ------------------*/
-
-
-
-/*****************************************************************
-* 2. Conversion from generic P7_PROFILE to optimized P7_OPROFILE
-*****************************************************************/
-
-/* biased_byteify()
-* Converts original log-odds residue score to a rounded biased uchar cost.
-* Match emission scores for MSVFilter get this treatment.
-* e.g. a score of +3.2, with scale 3.0 and bias 12, becomes 2.
-* 3.2*3 = 9.6; rounded = 10; bias-10 = 2.
-* When used, we add the bias, then subtract this cost.
-* (A cost of +255 is our -infinity "prohibited event")
-*/
-static uint8_t
-biased_byteify(P7_OPROFILE *om, float sc)
-{
- uint8_t b;
-
- sc = -1.0f * roundf(om->scale_b * sc); /* ugh. sc is now an integer cost represented in a float... */
- b = (sc > 255 - om->bias_b) ? 255 : (uint8_t) sc + om->bias_b; /* and now we cast, saturate, and bias it to an unsigned char cost... */
- return b;
-}
-
-/* unbiased_byteify()
-* Convert original transition score to a rounded uchar cost
-* Transition scores for MSVFilter get this treatment.
-* e.g. a score of -2.1, with scale 3.0, becomes a cost of 6.
-* (A cost of +255 is our -infinity "prohibited event")
-*/
-static uint8_t
-unbiased_byteify(P7_OPROFILE *om, float sc)
-{
- uint8_t b;
-
- sc = -1.0f * roundf(om->scale_b * sc); /* ugh. sc is now an integer cost represented in a float... */
- b = (sc > 255.) ? 255 : (uint8_t) sc; /* and now we cast and saturate it to an unsigned char cost... */
- return b;
-}
-
-/* wordify()
-* Converts log probability score to a rounded signed 16-bit integer cost.
-* Both emissions and transitions for ViterbiFilter get this treatment.
-* No bias term needed, because we use signed words.
-* e.g. a score of +3.2, with scale 500.0, becomes +1600.
-*/
-static int16_t
-wordify(P7_OPROFILE *om, float sc)
-{
- sc = roundf(om->scale_w * sc);
- if (sc >= 32767.0) return 32767;
- else if (sc <= -32768.0) return -32768;
- else return (int16_t) sc;
-}
-
-/* mf_conversion():
-*
-* This builds the MSVFilter() parts of the profile <om>, scores
-* in lspace uchars (16-way parallel), by rescaling, rounding, and
-* casting the scores in <gm>.
-*
-* Returns <eslOK> on success;
-* throws <eslEINVAL> if <om> hasn't been allocated properly.
-*/
-static int
-mf_conversion(const P7_PROFILE *gm, P7_OPROFILE *om)
-{
- int M = gm->M; /* length of the query */
- int nq = p7O_NQB(M); /* segment length; total # of striped vectors needed */
- float max = 0.0; /* maximum residue score: used for unsigned emission score bias */
- int x; /* counter over residues */
- int q; /* q counts over total # of striped vectors, 0..nq-1 */
- int k; /* the usual counter over model nodes 1..M */
- int z; /* counter within elements of one SIMD minivector */
- union { __m128i v; uint8_t i[16]; } tmp; /* used to align and load simd minivectors */
-
- if (nq > om->allocQ16) ESL_EXCEPTION(eslEINVAL, "optimized profile is too small to hold conversion");
-
- /* First we determine the basis for the limited-precision MSVFilter scoring system.
- * Default: 1/3 bit units, base offset 190: range 0..255 => -190..65 => -63.3..21.7 bits
- * See J2/66, J4/138 for analysis.
- */
- for (x = 0; x < gm->abc->K; x++) max = ESL_MAX(max, esl_vec_FMax(gm->rsc[x], (M+1)*2));
- om->scale_b = 3.0 / eslCONST_LOG2; /* scores in units of third-bits */
- om->base_b = 190;
- om->bias_b = unbiased_byteify(om, -1.0 * max);
-
- /* striped match costs: start at k=1. */
- for (x = 0; x < gm->abc->Kp; x++)
- for (q = 0, k = 1; q < nq; q++, k++)
- {
- for (z = 0; z < 16; z++) tmp.i[z] = ((k+ z*nq <= M) ? biased_byteify(om, p7P_MSC(gm, k+z*nq, x)) : 255);
- om->rbv[x][q] = tmp.v;
- }
-
- /* transition costs */
- om->tbm_b = unbiased_byteify(om, logf(2.0f / ((float) gm->M * (float) (gm->M+1)))); /* constant B->Mk penalty */
- om->tec_b = unbiased_byteify(om, logf(0.5f)); /* constant multihit E->C = E->J */
- om->tjb_b = unbiased_byteify(om, logf(3.0f / (float) (gm->L+3))); /* this adopts the L setting of the parent profile */
-
- return eslOK;
-}
-
-
-/* vf_conversion():
-*
-* This builds the ViterbiFilter() parts of the profile <om>, scores
-* in lspace swords (8-way parallel), by rescaling, rounding, and
-* casting the scores in <gm>.
-*
-* Returns <eslOK> on success;
-* throws <eslEINVAL> if <om> hasn't been allocated properly.
-*/
-static int
-vf_conversion(const P7_PROFILE *gm, P7_OPROFILE *om)
-{
- int M = gm->M; /* length of the query */
- int nq = p7O_NQW(M); /* segment length; total # of striped vectors needed */
- int x; /* counter over residues */
- int q; /* q counts over total # of striped vectors, 0..nq-1 */
- int k; /* the usual counter over model nodes 1..M */
- int kb; /* possibly offset base k for loading om's TSC vectors */
- int z; /* counter within elements of one SIMD minivector */
- int t; /* counter over transitions 0..7 = p7O_{BM,MM,IM,DM,MD,MI,II,DD}*/
- int tg; /* transition index in gm */
- int j; /* counter in interleaved vector arrays in the profile */
- int ddtmp; /* used in finding worst DD transition bound */
- int16_t maxval; /* used to prevent zero cost II */
- int16_t val;
- union { __m128i v; int16_t i[8]; } tmp; /* used to align and load simd minivectors */
-
- if (nq > om->allocQ8) ESL_EXCEPTION(eslEINVAL, "optimized profile is too small to hold conversion");
-
- /* First set the basis for the limited-precision scoring system.
- * Default: 1/500 bit units, base offset 12000: range -32768..32767 => -44768..20767 => -89.54..41.53 bits
- * See J4/138 for analysis.
- */
- om->scale_w = 500.0 / eslCONST_LOG2;
- om->base_w = 12000;
-
- /* striped match scores */
- for (x = 0; x < gm->abc->Kp; x++)
- for (k = 1, q = 0; q < nq; q++, k++)
- {
- for (z = 0; z < 8; z++) tmp.i[z] = ((k+ z*nq <= M) ? wordify(om, p7P_MSC(gm, k+z*nq, x)) : -32768);
- om->rwv[x][q] = tmp.v;
- }
-
- /* Transition costs, all but the DD's. */
- for (j = 0, k = 1, q = 0; q < nq; q++, k++)
- {
- for (t = p7O_BM; t <= p7O_II; t++) /* this loop of 7 transitions depends on the order in p7o_tsc_e */
- {
- switch (t) {
- case p7O_BM: tg = p7P_BM; kb = k-1; maxval = 0; break; /* gm has tBMk stored off by one! start from k=0 not 1 */
- case p7O_MM: tg = p7P_MM; kb = k-1; maxval = 0; break; /* MM, DM, IM vectors are rotated by -1, start from k=0 */
- case p7O_IM: tg = p7P_IM; kb = k-1; maxval = 0; break;
- case p7O_DM: tg = p7P_DM; kb = k-1; maxval = 0; break;
- case p7O_MD: tg = p7P_MD; kb = k; maxval = 0; break; /* the remaining ones are straight up */
- case p7O_MI: tg = p7P_MI; kb = k; maxval = 0; break;
- case p7O_II: tg = p7P_II; kb = k; maxval = -1; break;
- }
-
- for (z = 0; z < 8; z++) {
- val = ((kb+ z*nq < M) ? wordify(om, p7P_TSC(gm, kb+ z*nq, tg)) : -32768);
- tmp.i[z] = (val <= maxval) ? val : maxval; /* do not allow an II transition cost of 0, or hell may occur. */
- }
- om->twv[j++] = tmp.v;
- }
- }
-
- /* Finally the DD's, which are at the end of the optimized tsc vector; (j is already sitting there) */
- for (k = 1, q = 0; q < nq; q++, k++)
- {
- for (z = 0; z < 8; z++) tmp.i[z] = ((k+ z*nq < M) ? wordify(om, p7P_TSC(gm, k+ z*nq, p7P_DD)) : -32768);
- om->twv[j++] = tmp.v;
- }
-
- /* Specials. (Actually in same order in om and gm, but we copy in general form anyway.) */
- /* VF CC,NN,JJ transitions hardcoded zero; -3.0 nat approximation used instead; this papers
- * over a length independence problem, where the approximation weirdly outperforms the
- * exact solution, probably indicating that the model's Pascal distribution is problematic,
- * and the "approximation" is in fact closer to the One True Model, the mythic H4 supermodel.
- * [xref J5/36]
- */
- om->xw[p7O_E][p7O_LOOP] = wordify(om, gm->xsc[p7P_E][p7P_LOOP]);
- om->xw[p7O_E][p7O_MOVE] = wordify(om, gm->xsc[p7P_E][p7P_MOVE]);
- om->xw[p7O_N][p7O_MOVE] = wordify(om, gm->xsc[p7P_N][p7P_MOVE]);
- om->xw[p7O_N][p7O_LOOP] = 0; /* was wordify(om, gm->xsc[p7P_N][p7P_LOOP]); */
- om->xw[p7O_C][p7O_MOVE] = wordify(om, gm->xsc[p7P_C][p7P_MOVE]);
- om->xw[p7O_C][p7O_LOOP] = 0; /* was wordify(om, gm->xsc[p7P_C][p7P_LOOP]); */
- om->xw[p7O_J][p7O_MOVE] = wordify(om, gm->xsc[p7P_J][p7P_MOVE]);
- om->xw[p7O_J][p7O_LOOP] = 0; /* was wordify(om, gm->xsc[p7P_J][p7P_LOOP]); */
-
- om->ncj_roundoff = 0.0; /* goes along with NN=CC=JJ=0, -3.0 nat approximation */
- /* otherwise, would be = om->scale_w * gm->xsc[p7P_N][p7P_LOOP] - om->xw[p7O_N][p7O_LOOP]; */
- /* see J4/150 for discussion of VF error suppression, superceded by the -3.0 nat approximation */
-
- /* Transition score bound for "lazy F" DD path evaluation (xref J2/52) */
- om->ddbound_w = -32768;
- for (k = 2; k < M-1; k++)
- {
- ddtmp = (int) wordify(om, p7P_TSC(gm, k, p7P_DD));
- ddtmp += (int) wordify(om, p7P_TSC(gm, k+1, p7P_DM));
- ddtmp -= (int) wordify(om, p7P_TSC(gm, k+1, p7P_BM));
- om->ddbound_w = ESL_MAX(om->ddbound_w, ddtmp);
- }
-
- return eslOK;
-}
-
-
-/* fb_conversion()
-* This builds the Forward/Backward part of the optimized profile <om>,
-* where we use odds ratios (not log-odds scores).
-*/
-static int
-fb_conversion(const P7_PROFILE *gm, P7_OPROFILE *om)
-{
- int M = gm->M; /* length of the query */
- int nq = p7O_NQF(M); /* segment length; total # of striped vectors needed */
- int x; /* counter over residues */
- int q; /* q counts over total # of striped vectors, 0..nq-1 */
- int k; /* the usual counter over model nodes 1..M */
- int kb; /* possibly offset base k for loading om's TSC vectors */
- int z; /* counter within elements of one SIMD minivector */
- int t; /* counter over transitions 0..7 = p7O_{BM,MM,IM,DM,MD,MI,II,DD}*/
- int tg; /* transition index in gm */
- int j; /* counter in interleaved vector arrays in the profile */
- union { __m128 v; float x[4]; } tmp; /* used to align and load simd minivectors */
-
- if (nq > om->allocQ4) ESL_EXCEPTION(eslEINVAL, "optimized profile is too small to hold conversion");
-
- /* striped match scores: start at k=1 */
- for (x = 0; x < gm->abc->Kp; x++)
- for (k = 1, q = 0; q < nq; q++, k++)
- {
- for (z = 0; z < 4; z++) tmp.x[z] = (k+ z*nq <= M) ? p7P_MSC(gm, k+z*nq, x) : -eslINFINITY;
- om->rfv[x][q] = esl_sse_expf(tmp.v);
- }
-
- /* Transition scores, all but the DD's. */
- for (j = 0, k = 1, q = 0; q < nq; q++, k++)
- {
- for (t = p7O_BM; t <= p7O_II; t++) /* this loop of 7 transitions depends on the order in the definition of p7o_tsc_e */
- {
- switch (t) {
- case p7O_BM: tg = p7P_BM; kb = k-1; break; /* gm has tBMk stored off by one! start from k=0 not 1 */
- case p7O_MM: tg = p7P_MM; kb = k-1; break; /* MM, DM, IM quads are rotated by -1, start from k=0 */
- case p7O_IM: tg = p7P_IM; kb = k-1; break;
- case p7O_DM: tg = p7P_DM; kb = k-1; break;
- case p7O_MD: tg = p7P_MD; kb = k; break; /* the remaining ones are straight up */
- case p7O_MI: tg = p7P_MI; kb = k; break;
- case p7O_II: tg = p7P_II; kb = k; break;
- }
-
- for (z = 0; z < 4; z++) tmp.x[z] = (kb+z*nq < M) ? p7P_TSC(gm, kb+z*nq, tg) : -eslINFINITY;
- om->tfv[j++] = esl_sse_expf(tmp.v);
- }
- }
-
- /* And finally the DD's, which are at the end of the optimized tsc vector; (j is already there) */
- for (k = 1, q = 0; q < nq; q++, k++)
- {
- for (z = 0; z < 4; z++) tmp.x[z] = (k+z*nq < M) ? p7P_TSC(gm, k+z*nq, p7P_DD) : -eslINFINITY;
- om->tfv[j++] = esl_sse_expf(tmp.v);
- }
-
- /* Specials. (These are actually in exactly the same order in om and
- * gm, but we copy in general form anyway.)
- */
- om->xf[p7O_E][p7O_LOOP] = expf(gm->xsc[p7P_E][p7P_LOOP]);
- om->xf[p7O_E][p7O_MOVE] = expf(gm->xsc[p7P_E][p7P_MOVE]);
- om->xf[p7O_N][p7O_LOOP] = expf(gm->xsc[p7P_N][p7P_LOOP]);
- om->xf[p7O_N][p7O_MOVE] = expf(gm->xsc[p7P_N][p7P_MOVE]);
- om->xf[p7O_C][p7O_LOOP] = expf(gm->xsc[p7P_C][p7P_LOOP]);
- om->xf[p7O_C][p7O_MOVE] = expf(gm->xsc[p7P_C][p7P_MOVE]);
- om->xf[p7O_J][p7O_LOOP] = expf(gm->xsc[p7P_J][p7P_LOOP]);
- om->xf[p7O_J][p7O_MOVE] = expf(gm->xsc[p7P_J][p7P_MOVE]);
-
- return eslOK;
-}
-
-
-/* Function: p7_oprofile_Convert()
-* Synopsis: Converts standard profile to an optimized one.
-* Incept: SRE, Mon Nov 26 07:38:57 2007 [Janelia]
-*
-* Purpose: Convert a standard profile <gm> to an optimized profile <om>,
-* where <om> has already been allocated for a profile of at
-* least <gm->M> nodes and the same emission alphabet <gm->abc>.
-*
-* Args: gm - profile to optimize
-* om - allocated optimized profile for holding the result.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEINVAL> if <gm>, <om> aren't compatible.
-* <eslEMEM> on allocation failure.
-*/
-int
-p7_oprofile_Convert(const P7_PROFILE *gm, P7_OPROFILE *om)
-{
- int status, z;
-
- if (gm->abc->type != om->abc->type) ESL_EXCEPTION(eslEINVAL, "alphabets of the two profiles don't match");
- if (gm->M > om->allocM) ESL_EXCEPTION(eslEINVAL, "oprofile is too small");
-
- if ((status = mf_conversion(gm, om)) != eslOK) return status; /* MSVFilter()'s information */
- if ((status = vf_conversion(gm, om)) != eslOK) return status; /* ViterbiFilter()'s information */
- if ((status = fb_conversion(gm, om)) != eslOK) return status; /* ForwardFilter()'s information */
-
- if (om->name != NULL) free(om->name);
- if (om->acc != NULL) free(om->acc);
- if (om->desc != NULL) free(om->desc);
- if ((status = esl_strdup(gm->name, -1, &(om->name))) != eslOK) goto ERROR;
- if ((status = esl_strdup(gm->acc, -1, &(om->acc))) != eslOK) goto ERROR;
- if ((status = esl_strdup(gm->desc, -1, &(om->desc))) != eslOK) goto ERROR;
- strcpy(om->rf, gm->rf);
- strcpy(om->cs, gm->cs);
- strcpy(om->consensus, gm->consensus);
- for (z = 0; z < p7_NEVPARAM; z++) om->evparam[z] = gm->evparam[z];
- for (z = 0; z < p7_NCUTOFFS; z++) om->cutoff[z] = gm->cutoff[z];
- for (z = 0; z < p7_MAXABET; z++) om->compo[z] = gm->compo[z];
-
- om->mode = gm->mode;
- om->L = gm->L;
- om->M = gm->M;
- om->nj = gm->nj;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-/* Function: p7_oprofile_ReconfigLength()
-* Synopsis: Set the target sequence length of a model.
-* Incept: SRE, Thu Dec 20 09:56:40 2007 [Janelia]
-*
-* Purpose: Given an already configured model <om>, quickly reset its
-* expected length distribution for a new mean target sequence
-* length of <L>.
-*
-* This doesn't affect the length distribution of the null
-* model. That must also be reset, using <p7_bg_SetLength()>.
-*
-* We want this routine to run as fast as possible, because
-* this call is in the critical path: it must be called at
-* each new target sequence in a database search.
-*
-* Returns: <eslOK> on success. Costs/scores for N,C,J transitions are set
-* here.
-*/
-int
-p7_oprofile_ReconfigLength(P7_OPROFILE *om, int L, int wholeSeqSz)
-{
- int status;
- if ((status = p7_oprofile_ReconfigMSVLength (om, wholeSeqSz)) != eslOK) return status;
- if ((status = p7_oprofile_ReconfigRestLength(om, L, wholeSeqSz)) != eslOK) return status;
- return eslOK;
-}
-
-/* Function: p7_oprofile_ReconfigMSVLength()
-* Synopsis: Set the target sequence length of the MSVFilter part of the model.
-* Incept: SRE, Tue Dec 16 13:39:17 2008 [Janelia]
-*
-* Purpose: Given an already configured model <om>, quickly reset its
-* expected length distribution for a new mean target sequence
-* length of <L>, only for the part of the model that's used
-* for the accelerated MSV filter.
-*
-* The acceleration pipeline uses this to defer reconfiguring the
-* length distribution of the main model, mostly because hmmscan
-* reads the model in two pieces, MSV part first, then the rest.
-*
-* Returns: <eslOK> on success.
-*/
-int
-p7_oprofile_ReconfigMSVLength(P7_OPROFILE *om, int L)
-{
- om->tjb_b = unbiased_byteify(om, logf(3.0f / (float) (L+3)));
- return eslOK;
-}
-
-/* Function: p7_oprofile_ReconfigRestLength()
-* Synopsis: Set the target sequence length of the main profile.
-* Incept: SRE, Tue Dec 16 13:41:30 2008 [Janelia]
-*
-* Purpose: Given an already configured model <om>, quickly reset its
-* expected length distribution for a new mean target sequence
-* length of <L>, for everything except the MSV filter part
-* of the model.
-*
-* Calling <p7_oprofile_ReconfigMSVLength()> then
-* <p7_oprofile_ReconfigRestLength()> is equivalent to
-* just calling <p7_oprofile_ReconfigLength()>. The two
-* part version is used in the acceleration pipeline.
-*
-* Returns: <eslOK> on success.
-*/
-int
-p7_oprofile_ReconfigRestLength(P7_OPROFILE *om, int L, int wholeSz)
-{
- float pmove, ploop;
-
- pmove = (2.0f + om->nj) / ((float) wholeSz + 2.0f + om->nj); /* 2/(L+2) for sw; 3/(L+3) for fs */
- ploop = 1.0f - pmove;
-
- /* ForwardFilter() parameters: pspace floats */
- om->xf[p7O_N][p7O_LOOP] = om->xf[p7O_C][p7O_LOOP] = om->xf[p7O_J][p7O_LOOP] = ploop;
- om->xf[p7O_N][p7O_MOVE] = om->xf[p7O_C][p7O_MOVE] = om->xf[p7O_J][p7O_MOVE] = pmove;
-
- /* ViterbiFilter() parameters: lspace signed 16-bit ints */
- om->xw[p7O_N][p7O_MOVE] = om->xw[p7O_C][p7O_MOVE] = om->xw[p7O_J][p7O_MOVE] = wordify(om, logf(pmove));
- /* om->xw[p7O_N][p7O_LOOP] = om->xw[p7O_C][p7O_LOOP] = om->xw[p7O_J][p7O_LOOP] = wordify(om, logf(ploop)); */ /* 3nat approx in force: these stay 0 */
- /* om->ncj_roundoff = (om->scale_w * logf(ploop)) - om->xw[p7O_N][p7O_LOOP]; */ /* and this does too */
-
- om->L = L;
- return eslOK;
-}
-
-
-/* Function: p7_oprofile_ReconfigMultihit()
-* Synopsis: Quickly reconfig model into multihit mode for target length <L>.
-* Incept: SRE, Thu Aug 21 10:04:07 2008 [Janelia]
-*
-* Purpose: Given a profile <om> that's already been configured once,
-* quickly reconfigure it into a multihit mode for target
-* length <L>.
-*
-* This gets called in domain definition, when we need to
-* flip the model in and out of unihit mode to
-* process individual domains.
-*
-* Note: You can't just flip uni/multi mode alone, because that
-* parameterization also affects target length
-* modeling. You need to make sure uni vs. multi choice is
-* made before the length model is set, and you need to
-* make sure the length model is recalculated if you change
-* the uni/multi mode. Hence, these functions call
-* <p7_oprofile_ReconfigLength()>.
-*/
-int
-p7_oprofile_ReconfigMultihit(P7_OPROFILE *om, int L, int wholeSz)
-{
- om->xf[p7O_E][p7O_MOVE] = 0.5;
- om->xf[p7O_E][p7O_LOOP] = 0.5;
- om->nj = 1.0f;
-
- om->xw[p7O_E][p7O_MOVE] = wordify(om, -eslCONST_LOG2);
- om->xw[p7O_E][p7O_LOOP] = wordify(om, -eslCONST_LOG2);
-
- return p7_oprofile_ReconfigLength(om, L, wholeSz);
-}
-
-/* Function: p7_oprofile_ReconfigUnihit()
-* Synopsis: Quickly reconfig model into unihit mode for target length <L>.
-* Incept: SRE, Thu Aug 21 10:10:32 2008 [Janelia]
-*
-* Purpose: Given a profile <om> that's already been configured once,
-* quickly reconfigure it into a unihit mode for target
-* length <L>.
-*
-* This gets called in domain definition, when we need to
-* flip the model in and out of unihit <L=0> mode to
-* process individual domains.
-*/
-int
-p7_oprofile_ReconfigUnihit(P7_OPROFILE *om, int L, int wholeSz)
-{
- om->xf[p7O_E][p7O_MOVE] = 1.0f;
- om->xf[p7O_E][p7O_LOOP] = 0.0f;
- om->nj = 0.0f;
-
- om->xw[p7O_E][p7O_MOVE] = 0;
- om->xw[p7O_E][p7O_LOOP] = -32768;
-
- return p7_oprofile_ReconfigLength(om, L, wholeSz);
-}
-/*------------ end, conversions to P7_OPROFILE ------------------*/
-
-
-
-/*****************************************************************
-* 3. Debugging and development utilities.
-*****************************************************************/
-
-
-/* oprofile_dump_mf()
-*
-* Dump the MSVFilter part of a profile <om> to <stdout>.
-*/
-static int
-oprofile_dump_mf(FILE *fp, const P7_OPROFILE *om)
-{
- int M = om->M; /* length of the query */
- int nq = p7O_NQB(M); /* segment length; total # of striped vectors needed */
- int x; /* counter over residues */
- int q; /* q counts over total # of striped vectors, 0..nq-1 */
- int k; /* counter over nodes 1..M */
- int z; /* counter within elements of one SIMD minivector */
- union { __m128i v; uint8_t i[16]; } tmp; /* used to align and read simd minivectors */
-
- /* Header (rearranged column numbers, in the vectors) */
- fprintf(fp, " ");
- for (k =1, q = 0; q < nq; q++, k++)
- {
- fprintf(fp, "[ ");
- for (z = 0; z < 16; z++)
- if (k+z*nq <= M) fprintf(fp, "%4d ", k+z*nq);
- else fprintf(fp, "%4s ", "xx");
- fprintf(fp, "]");
- }
- fprintf(fp, "\n");
-
- /* Table of residue emissions */
- for (x = 0; x < om->abc->Kp; x++)
- {
- fprintf(fp, "(%c): ", om->abc->sym[x]);
-
- for (q = 0; q < nq; q++)
- {
- fprintf(fp, "[ ");
- _mm_store_si128(&tmp.v, om->rbv[x][q]);
- for (z = 0; z < 16; z++) fprintf(fp, "%4d ", tmp.i[z]);
- fprintf(fp, "]");
- }
- fprintf(fp, "\n");
- }
- fprintf(fp, "\n");
-
- fprintf(fp, "t_EC,EJ: %4d\n", om->tec_b);
- fprintf(fp, "t_NB,JB,CT: %4d\n", om->tjb_b);
- fprintf(fp, "t_BMk: %4d\n", om->tbm_b);
- fprintf(fp, "scale: %.2f\n", om->scale_b);
- fprintf(fp, "base: %4d\n", om->base_b);
- fprintf(fp, "bias: %4d\n", om->bias_b);
- fprintf(fp, "Q: %4d\n", nq);
- fprintf(fp, "M: %4d\n", M);
- return eslOK;
-}
-
-
-
-/* oprofile_dump_vf()
-*
-* Dump the ViterbiFilter part of a profile <om> to <stdout>.
-*/
-static int
-oprofile_dump_vf(FILE *fp, const P7_OPROFILE *om)
-{
- int M = om->M; /* length of the query */
- int nq = p7O_NQW(M); /* segment length; total # of striped vectors needed */
- int x; /* counter over residues */
- int q; /* q counts over total # of striped vectors, 0..nq-1 */
- int k; /* the usual counter over model nodes 1..M */
- int kb; /* possibly offset base k for loading om's TSC vectors */
- int z; /* counter within elements of one SIMD minivector */
- int t; /* counter over transitions 0..7 = p7O_{BM,MM,IM,DM,MD,MI,II,DD}*/
- int j; /* counter in interleaved vector arrays in the profile */
- union { __m128i v; int16_t i[8]; } tmp; /* used to align and read simd minivectors */
-
- /* Emission score header (rearranged column numbers, in the vectors) */
- fprintf(fp, " ");
- for (k =1, q = 0; q < nq; q++, k++)
- {
- fprintf(fp, "[ ");
- for (z = 0; z < 8; z++)
- if (k+z*nq <= M) fprintf(fp, "%6d ", k+z*nq);
- else fprintf(fp, "%6s ", "xx");
- fprintf(fp, "]");
- }
- fprintf(fp, "\n");
-
- /* Table of residue emissions */
- for (x = 0; x < om->abc->Kp; x++)
- {
- fprintf(fp, "(%c): ", om->abc->sym[x]);
-
- /* Match emission scores (insert emissions are assumed zero by design) */
- for (q = 0; q < nq; q++)
- {
- fprintf(fp, "[ ");
- _mm_store_si128(&tmp.v, om->rwv[x][q]);
- for (z = 0; z < 8; z++) fprintf(fp, "%6d ", tmp.i[z]);
- fprintf(fp, "]");
- }
- fprintf(fp, "\n");
- }
- fprintf(fp, "\n");
-
- /* Transitions */
- for (t = p7O_BM; t <= p7O_II; t++)
- {
- switch (t) {
- case p7O_BM: fprintf(fp, "\ntBM: "); break;
- case p7O_MM: fprintf(fp, "\ntMM: "); break;
- case p7O_IM: fprintf(fp, "\ntIM: "); break;
- case p7O_DM: fprintf(fp, "\ntDM: "); break;
- case p7O_MD: fprintf(fp, "\ntMD: "); break;
- case p7O_MI: fprintf(fp, "\ntMI: "); break;
- case p7O_II: fprintf(fp, "\ntII: "); break;
- }
-
- for (k = 1, q = 0; q < nq; q++, k++)
- {
- switch (t) {
- case p7O_BM: kb = k; break;
- case p7O_MM: kb = (1 + (nq+k-2)) % nq; break; /* MM, DM, IM quads rotated by +1 */
- case p7O_IM: kb = (1 + (nq+k-2)) % nq; break;
- case p7O_DM: kb = (1 + (nq+k-2)) % nq; break;
- case p7O_MD: kb = k; break; /* the remaining ones are straight up */
- case p7O_MI: kb = k; break;
- case p7O_II: kb = k; break;
- }
- fprintf(fp, "[ ");
- for (z = 0; z < 8; z++)
- if (kb+z*nq <= M) fprintf(fp, "%6d ", kb+z*nq);
- else fprintf(fp, "%6s ", "xx");
- fprintf(fp, "]");
- }
- fprintf(fp, "\n ");
- for (q = 0; q < nq; q++)
- {
- fprintf(fp, "[ ");
- _mm_store_si128(&tmp.v, om->twv[q*7 + t]);
- for (z = 0; z < 8; z++) fprintf(fp, "%6d ", tmp.i[z]);
- fprintf(fp, "]");
- }
- fprintf(fp, "\n");
- }
-
- /* DD transitions */
- fprintf(fp, "\ntDD: ");
- for (k =1, q = 0; q < nq; q++, k++)
- {
- fprintf(fp, "[ ");
- for (z = 0; z < 8; z++)
- if (k+z*nq <= M) fprintf(fp, "%6d ", k+z*nq);
- else fprintf(fp, "%6s ", "xx");
- fprintf(fp, "]");
- }
- fprintf(fp, "\n ");
- for (j = nq*7, q = 0; q < nq; q++, j++)
- {
- fprintf(fp, "[ ");
- _mm_store_si128(&tmp.v, om->twv[j]);
- for (z = 0; z < 8; z++) fprintf(fp, "%6d ", tmp.i[z]);
- fprintf(fp, "]");
- }
- fprintf(fp, "\n");
-
- fprintf(fp, "E->C: %6d E->J: %6d\n", om->xw[p7O_E][p7O_MOVE], om->xw[p7O_E][p7O_LOOP]);
- fprintf(fp, "N->B: %6d N->N: %6d\n", om->xw[p7O_N][p7O_MOVE], om->xw[p7O_N][p7O_LOOP]);
- fprintf(fp, "J->B: %6d J->J: %6d\n", om->xw[p7O_J][p7O_MOVE], om->xw[p7O_J][p7O_LOOP]);
- fprintf(fp, "C->T: %6d C->C: %6d\n", om->xw[p7O_C][p7O_MOVE], om->xw[p7O_C][p7O_LOOP]);
-
- fprintf(fp, "scale: %6.2f\n", om->scale_w);
- fprintf(fp, "base: %6d\n", om->base_w);
- fprintf(fp, "bound: %6d\n", om->ddbound_w);
- fprintf(fp, "Q: %6d\n", nq);
- fprintf(fp, "M: %6d\n", M);
- return eslOK;
-}
-
-
-/* oprofile_dump_fb()
-*
-* Dump the Forward/Backward part of a profile <om> to <stdout>.
-* <width>, <precision> control the floating point output:
-* 8,5 is a reasonable choice for prob space,
-* 5,2 is reasonable for log space.
-*/
-static int
-oprofile_dump_fb(FILE *fp, const P7_OPROFILE *om, int width, int precision)
-{
- int M = om->M; /* length of the query */
- int nq = p7O_NQF(M); /* segment length; total # of striped vectors needed */
- int x; /* counter over residues */
- int q; /* q counts over total # of striped vectors, 0..nq-1 */
- int k; /* the usual counter over model nodes 1..M */
- int kb; /* possibly offset base k for loading om's TSC vectors */
- int z; /* counter within elements of one SIMD minivector */
- int t; /* counter over transitions 0..7 = p7O_{BM,MM,IM,DM,MD,MI,II,DD}*/
- int j; /* counter in interleaved vector arrays in the profile */
- union { __m128 v; float x[4]; } tmp; /* used to align and read simd minivectors */
-
- /* Residue emissions */
- for (x = 0; x < om->abc->Kp; x++)
- {
- fprintf(fp, "(%c): ", om->abc->sym[x]);
- for (k =1, q = 0; q < nq; q++, k++)
- {
- fprintf(fp, "[ ");
- for (z = 0; z < 4; z++)
- if (k+z*nq <= M) fprintf(fp, "%*d ", width, k+z*nq);
- else fprintf(fp, "%*s ", width, "xx");
- fprintf(fp, "]");
- }
- fprintf(fp, "\nmat: ");
- for (q = 0; q < nq; q++)
- {
- fprintf(fp, "[ ");
- tmp.v = om->rfv[x][q];
- for (z = 0; z < 4; z++) fprintf(fp, "%*.*f ", width, precision, tmp.x[z]);
- fprintf(fp, "]");
- }
- fprintf(fp, "\n\n");
- }
-
- /* Transitions */
- for (t = p7O_BM; t <= p7O_II; t++)
- {
- switch (t) {
- case p7O_BM: fprintf(fp, "\ntBM: "); break;
- case p7O_MM: fprintf(fp, "\ntMM: "); break;
- case p7O_IM: fprintf(fp, "\ntIM: "); break;
- case p7O_DM: fprintf(fp, "\ntDM: "); break;
- case p7O_MD: fprintf(fp, "\ntMD: "); break;
- case p7O_MI: fprintf(fp, "\ntMI: "); break;
- case p7O_II: fprintf(fp, "\ntII: "); break;
- }
- for (k = 1, q = 0; q < nq; q++, k++)
- {
- switch (t) {
- case p7O_MM:/* MM, DM, IM quads rotated by +1 */
- case p7O_IM:
- case p7O_DM:
- kb = (1 + (nq+k-2)) % nq;
- break;
- case p7O_BM:/* the remaining ones are straight up */
- case p7O_MD:
- case p7O_MI:
- case p7O_II:
- kb = k;
- break;
- }
- fprintf(fp, "[ ");
- for (z = 0; z < 4; z++)
- if (kb+z*nq <= M) fprintf(fp, "%*d ", width, kb+z*nq);
- else fprintf(fp, "%*s ", width, "xx");
- fprintf(fp, "]");
- }
- fprintf(fp, "\n ");
- for (q = 0; q < nq; q++)
- {
- fprintf(fp, "[ ");
- tmp.v = om->tfv[q*7 + t];
- for (z = 0; z < 4; z++) fprintf(fp, "%*.*f ", width, precision, tmp.x[z]);
- fprintf(fp, "]");
- }
- fprintf(fp, "\n");
- }
-
- /* DD transitions */
- fprintf(fp, "\ntDD: ");
- for (k =1, q = 0; q < nq; q++, k++)
- {
- fprintf(fp, "[ ");
- for (z = 0; z < 4; z++)
- if (k+z*nq <= M) fprintf(fp, "%*d ", width, k+z*nq);
- else fprintf(fp, "%*s ", width, "xx");
- fprintf(fp, "]");
- }
- fprintf(fp, "\n ");
- for (j = nq*7, q = 0; q < nq; q++, j++)
- {
- fprintf(fp, "[ ");
- tmp.v = om->tfv[j];
- for (z = 0; z < 4; z++) fprintf(fp, "%*.*f ", width, precision, tmp.x[z]);
- fprintf(fp, "]");
- }
- fprintf(fp, "\n");
-
- /* Specials */
- fprintf(fp, "E->C: %*.*f E->J: %*.*f\n", width, precision, om->xf[p7O_E][p7O_MOVE], width, precision, om->xf[p7O_E][p7O_LOOP]);
- fprintf(fp, "N->B: %*.*f N->N: %*.*f\n", width, precision, om->xf[p7O_N][p7O_MOVE], width, precision, om->xf[p7O_N][p7O_LOOP]);
- fprintf(fp, "J->B: %*.*f J->J: %*.*f\n", width, precision, om->xf[p7O_J][p7O_MOVE], width, precision, om->xf[p7O_J][p7O_LOOP]);
- fprintf(fp, "C->T: %*.*f C->C: %*.*f\n", width, precision, om->xf[p7O_C][p7O_MOVE], width, precision, om->xf[p7O_C][p7O_LOOP]);
- fprintf(fp, "Q: %d\n", nq);
- fprintf(fp, "M: %d\n", M);
- return eslOK;
-}
-
-
-/* Function: p7_oprofile_Dump()
-* Synopsis: Dump internals of a <P7_OPROFILE>
-* Incept: SRE, Thu Dec 13 08:49:30 2007 [Janelia]
-*
-* Purpose: Dump the internals of <P7_OPROFILE> structure <om>
-* to stream <fp>; generally for testing or debugging
-* purposes.
-*
-* Args: fp - output stream (often stdout)
-* om - optimized profile to dump
-*
-* Returns: <eslOK> on success.
-*
-* Throws: (no abnormal error conditions)
-*/
-int
-p7_oprofile_Dump(FILE *fp, const P7_OPROFILE *om)
-{
- int status;
-
- fprintf(fp, "Dump of a <P7_OPROFILE> ::\n");
-
- fprintf(fp, "\n -- float part, odds ratios for Forward/Backward:\n");
- if ((status = oprofile_dump_fb(fp, om, 8, 5)) != eslOK) return status;
-
- fprintf(fp, "\n -- sword part, log odds for ViterbiFilter(): \n");
- if ((status = oprofile_dump_vf(fp, om)) != eslOK) return status;
-
- fprintf(fp, "\n -- uchar part, log odds for MSVFilter(): \n");
- if ((status = oprofile_dump_mf(fp, om)) != eslOK) return status;
-
- return eslOK;
-}
-
-
-/* Function: p7_oprofile_Sample()
-* Synopsis: Sample a random profile.
-* Incept: SRE, Wed Jul 30 13:11:52 2008 [Janelia]
-*
-* Purpose: Sample a random profile of <M> nodes for alphabet <abc>,
-* using <r> as the source of random numbers. Parameterize
-* it for generation of target sequences of mean length
-* <L>. Calculate its log-odds scores using background
-* model <bg>.
-*
-* Args: r - random number generator
-* abc - emission alphabet
-* bg - background frequency model
-* M - size of sampled profile, in nodes
-* L - configured target seq mean length
-* opt_hmm - optRETURN: sampled HMM
-* opt_gm - optRETURN: sampled normal profile
-* opt_om - RETURN: optimized profile
-*
-* Returns: <eslOK> on success.
-*
-* Throws: (no abnormal error conditions)
-*/
-int
-p7_oprofile_Sample(ESL_RANDOMNESS *r, const ESL_ALPHABET *abc, const P7_BG *bg, int M, int L,
- P7_HMM **opt_hmm, P7_PROFILE **opt_gm, P7_OPROFILE **ret_om)
-{
- P7_HMM *hmm = NULL;
- P7_PROFILE *gm = NULL;
- P7_OPROFILE *om = NULL;
- int status;
-
- if ((gm = p7_profile_Create (M, abc)) == NULL) { status = eslEMEM; goto ERROR; }
- if ((om = p7_oprofile_Create(M, abc)) == NULL) { status = eslEMEM; goto ERROR; }
-
- if ((status = p7_hmm_Sample(r, M, abc, &hmm)) != eslOK) goto ERROR;
- if ((status = p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL)) != eslOK) goto ERROR;
- if ((status = p7_oprofile_Convert(gm, om)) != eslOK) goto ERROR;
- if ((status = p7_oprofile_ReconfigLength(om, L, L)) != eslOK) goto ERROR;
-
- if (opt_hmm != NULL) *opt_hmm = hmm; else p7_hmm_Destroy(hmm);
- if (opt_gm != NULL) *opt_gm = gm; else p7_profile_Destroy(gm);
- *ret_om = om;
- return eslOK;
-
-ERROR:
- if (opt_hmm != NULL) *opt_hmm = NULL;
- if (opt_gm != NULL) *opt_gm = NULL;
- *ret_om = NULL;
- return status;
-}
-
-
-/* Function: p7_oprofile_Compare()
-* Synopsis: Compare two optimized profiles for equality.
-* Incept: SRE, Wed Jan 21 13:29:10 2009 [Janelia]
-*
-* Purpose: Compare the contents of <om1> and <om2>; return
-* <eslOK> if they are effectively identical profiles,
-* or <eslFAIL> if not.
-*
-* Floating point comparisons are done to a tolerance
-* of <tol> using <esl_FCompare()>.
-*
-* If a comparison fails, an informative error message is
-* left in <errmsg> to indicate why.
-*
-* Internal allocation sizes are not compared, only the
-* data.
-*
-* Args: om1 - one optimized profile to compare
-* om2 - the other
-* tol - floating point comparison tolerance; see <esl_FCompare()>
-* errmsg - ptr to array of at least <eslERRBUFSIZE> characters.
-*
-* Returns: <eslOK> on effective equality; <eslFAIL> on difference.
-*/
-int
-p7_oprofile_Compare(const P7_OPROFILE *om1, const P7_OPROFILE *om2, float tol, char *errmsg)
-{
- int Q4 = p7O_NQF(om1->M);
- int Q8 = p7O_NQW(om1->M);
- int Q16 = p7O_NQB(om1->M);
- int q, r, x, y;
- union { __m128i v; uint8_t c[16]; } a16, b16;
- union { __m128i v; int16_t w[8]; } a8, b8;
- union { __m128 v; float x[4]; } a4, b4;
-
- if (om1->mode != om2->mode) ESL_FAIL(eslFAIL, errmsg, "comparison failed: mode");
- if (om1->L != om2->L) ESL_FAIL(eslFAIL, errmsg, "comparison failed: L");
- if (om1->M != om2->M) ESL_FAIL(eslFAIL, errmsg, "comparison failed: M");
- if (om1->nj != om2->nj) ESL_FAIL(eslFAIL, errmsg, "comparison failed: nj");
- if (om1->abc->type != om2->abc->type) ESL_FAIL(eslFAIL, errmsg, "comparison failed: alphabet type");
-
- /* MSVFilter part */
- for (x = 0; x < om1->abc->Kp; x++)
- for (q = 0; q < Q16; q++)
- {
- a16.v = om1->rbv[x][q]; b16.v = om2->rbv[x][q];
- for (r = 0; r < 16; r++) if (a16.c[r] != b16.c[r]) ESL_FAIL(eslFAIL, errmsg, "comparison failed: rb[%d] elem %d", q, r);
- }
- if (om1->tbm_b != om2->tbm_b) ESL_FAIL(eslFAIL, errmsg, "comparison failed: tbm_b");
- if (om1->tec_b != om2->tec_b) ESL_FAIL(eslFAIL, errmsg, "comparison failed: tec_b");
- if (om1->tjb_b != om2->tjb_b) ESL_FAIL(eslFAIL, errmsg, "comparison failed: tjb_b");
- if (om1->scale_b != om2->scale_b) ESL_FAIL(eslFAIL, errmsg, "comparison failed: scale_b");
- if (om1->base_b != om2->base_b) ESL_FAIL(eslFAIL, errmsg, "comparison failed: base_b");
- if (om1->bias_b != om2->bias_b) ESL_FAIL(eslFAIL, errmsg, "comparison failed: bias_b");
-
- /* ViterbiFilter() part */
- for (x = 0; x < om1->abc->Kp; x++)
- for (q = 0; q < Q8; q++)
- {
- a8.v = om1->rwv[x][q]; b8.v = om2->rwv[x][q];
- for (r = 0; r < 8; r++) if (a8.w[r] != b8.w[r]) ESL_FAIL(eslFAIL, errmsg, "comparison failed: rw[%d] elem %d", q, r);
- }
- for (q = 0; q < 8*Q16; q++)
- {
- a8.v = om1->twv[q]; b8.v = om2->twv[q];
- for (r = 0; r < 8; r++) if (a8.w[r] != b8.w[r]) ESL_FAIL(eslFAIL, errmsg, "comparison failed: tw[%d] elem %d", q, r);
- }
- for (x = 0; x < p7O_NXSTATES; x++)
- for (y = 0; y < p7O_NXTRANS; y++)
- if (om1->xw[x][y] != om2->xw[x][y]) ESL_FAIL(eslFAIL, errmsg, "comparison failed: xw[%d][%d]", x, y);
-
- if (om1->scale_w != om2->scale_w) ESL_FAIL(eslFAIL, errmsg, "comparison failed: scale");
- if (om1->base_w != om2->base_w) ESL_FAIL(eslFAIL, errmsg, "comparison failed: base");
- if (om1->ddbound_w != om2->ddbound_w) ESL_FAIL(eslFAIL, errmsg, "comparison failed: ddbound_w");
-
- /* Forward/Backward part */
- for (x = 0; x < om1->abc->Kp; x++)
- for (q = 0; q < Q4; q++)
- {
- a4.v = om1->rfv[x][q]; b4.v = om2->rfv[x][q];
- for (r = 0; r < 4; r++) if (esl_FCompare(a4.x[r], b4.x[r], tol) != eslOK) ESL_FAIL(eslFAIL, errmsg, "comparison failed: rf[%d] elem %d", q, r);
- }
- for (q = 0; q < 8*Q4; q++)
- {
- a4.v = om1->tfv[q]; b4.v = om2->tfv[q];
- for (r = 0; r < 4; r++) if (a4.x[r] != b4.x[r]) ESL_FAIL(eslFAIL, errmsg, "comparison failed: tf[%d] elem %d", q, r);
- }
- for (x = 0; x < p7O_NXSTATES; x++)
- if (esl_vec_FCompare(om1->xf[x], om2->xf[x], p7O_NXTRANS, tol) != eslOK) ESL_FAIL(eslFAIL, errmsg, "comparison failed: xf[%d] vector", x);
-
- for (x = 0; x < p7_NOFFSETS; x++)
- if (om1->offs[x] != om2->offs[x]) ESL_FAIL(eslFAIL, errmsg, "comparison failed: offs[%d]", x);
-
- if (esl_strcmp(om1->name, om2->name) != 0) ESL_FAIL(eslFAIL, errmsg, "comparison failed: name");
- if (esl_strcmp(om1->acc, om2->acc) != 0) ESL_FAIL(eslFAIL, errmsg, "comparison failed: acc");
- if (esl_strcmp(om1->desc, om2->desc) != 0) ESL_FAIL(eslFAIL, errmsg, "comparison failed: desc");
- if (esl_strcmp(om1->rf, om2->rf) != 0) ESL_FAIL(eslFAIL, errmsg, "comparison failed: ref");
- if (esl_strcmp(om1->cs, om2->cs) != 0) ESL_FAIL(eslFAIL, errmsg, "comparison failed: cs");
- if (esl_strcmp(om1->consensus, om2->consensus) != 0) ESL_FAIL(eslFAIL, errmsg, "comparison failed: consensus");
-
- if (esl_vec_FCompare(om1->evparam, om2->evparam, p7_NEVPARAM, tol) != eslOK) ESL_FAIL(eslFAIL, errmsg, "comparison failed: evparam vector");
- if (esl_vec_FCompare(om1->cutoff, om2->cutoff, p7_NCUTOFFS, tol) != eslOK) ESL_FAIL(eslFAIL, errmsg, "comparison failed: cutoff vector");
- if (esl_vec_FCompare(om1->compo, om2->compo, p7_MAXABET, tol) != eslOK) ESL_FAIL(eslFAIL, errmsg, "comparison failed: compo vector");
-
- return eslOK;
-}
-
-
-/* Function: p7_profile_SameAsMF()
-* Synopsis: Set a generic profile's scores to give MSV scores.
-* Incept: SRE, Wed Jul 30 13:42:49 2008 [Janelia]
-*
-* Purpose: Set a generic profile's scores so that the normal <dp_generic> DP
-* algorithms will give the same score as <p7_MSVFilter()>:
-* all t_MM scores = 0; all other core transitions = -inf;
-* multihit local mode; all <t_BMk> entries uniformly <log 2/(M(M+1))>;
-* <tCC, tNN, tJJ> scores 0; total approximated later as -3;
-* rounded in the same way as the 8-bit limited precision.
-*
-* Returns: <eslOK> on success.
-*/
-int
-p7_profile_SameAsMF(const P7_OPROFILE *om, P7_PROFILE *gm)
-{
- int k,x;
- float tbm = roundf(om->scale_b * (log((double)(2.0f / ((float) gm->M * (float) (gm->M+1))))));
-
- /* Transitions */
- esl_vec_FSet(gm->tsc, p7P_NTRANS * gm->M, -eslINFINITY);
- for (k = 1; k < gm->M; k++) p7P_TSC(gm, k, p7P_MM) = 0.0f;
- for (k = 0; k < gm->M; k++) p7P_TSC(gm, k, p7P_BM) = tbm;
-
- /* Emissions */
- for (x = 0; x < gm->abc->Kp; x++)
- for (k = 0; k <= gm->M; k++)
- {
- gm->rsc[x][k*2] = (gm->rsc[x][k*2] <= -eslINFINITY) ? -eslINFINITY : roundf(om->scale_b * gm->rsc[x][k*2]);
- gm->rsc[x][k*2+1] = 0; /* insert score: VF makes it zero no matter what. */
- }
-
- /* Specials */
- for (k = 0; k < p7P_NXSTATES; k++)
- for (x = 0; x < p7P_NXTRANS; x++)
- gm->xsc[k][x] = (gm->xsc[k][x] <= -eslINFINITY) ? -eslINFINITY : roundf(om->scale_b * gm->xsc[k][x]);
-
- /* NN, CC, JJ hardcoded 0 in limited precision */
- gm->xsc[p7P_N][p7P_LOOP] = gm->xsc[p7P_J][p7P_LOOP] = gm->xsc[p7P_C][p7P_LOOP] = 0;
-
- return eslOK;
-}
-
-
-/* Function: p7_profile_SameAsVF()
-* Synopsis: Round a generic profile to match ViterbiFilter scores.
-* Incept: SRE, Wed Jul 30 13:37:48 2008 [Janelia]
-*
-* Purpose: Round all the scores in a generic (lspace) <P7_PROFILE> <gm> in
-* exactly the same way that the scores in the
-* <P7_OPROFILE> <om> were rounded. Then we can test that two profiles
-* give identical internal scores in testing, say,
-* <p7_ViterbiFilter()> against <p7_GViterbi()>.
-*
-* The 3nat approximation is used; NN=CC=JJ=0, and 3 nats are
-* subtracted at the end to account for their contribution.
-*
-* To convert a generic Viterbi score <gsc> calculated with this profile
-* to a nat score that should match ViterbiFilter() exactly,
-* do <(gsc / om->scale_w) - 3.0>.
-*
-* <gm> must be the same profile that <om> was constructed from.
-*
-* <gm> is irrevocably altered by this call.
-*
-* Do not call this more than once on any given <gm>!
-*
-* Args: <om> - optimized profile, containing scale information.
-* <gm> - generic profile that <om> was built from.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: (no abnormal error conditions)
-*/
-int
-p7_profile_SameAsVF(const P7_OPROFILE *om, P7_PROFILE *gm)
-{
- int k;
- int x;
-
- /* Transitions */
- /* <= -eslINFINITY test is used solely to silence compiler. really testing == -eslINFINITY */
- for (x = 0; x < gm->M*p7P_NTRANS; x++)
- gm->tsc[x] = (gm->tsc[x] <= -eslINFINITY) ? -eslINFINITY : roundf(om->scale_w * gm->tsc[x]);
-
- /* Enforce the rule that no II can be 0; max of -1 */
- for (x = p7P_II; x < gm->M*p7P_NTRANS; x += p7P_NTRANS)
- if (gm->tsc[x] == 0.0) gm->tsc[x] = -1.0;
-
- /* Emissions */
- for (x = 0; x < gm->abc->Kp; x++)
- for (k = 0; k <= gm->M; k++)
- {
- gm->rsc[x][k*2] = (gm->rsc[x][k*2] <= -eslINFINITY) ? -eslINFINITY : roundf(om->scale_w * gm->rsc[x][k*2]);
- gm->rsc[x][k*2+1] = 0.0; /* insert score: VF makes it zero no matter what. */
- }
-
- /* Specials */
- for (k = 0; k < p7P_NXSTATES; k++)
- for (x = 0; x < p7P_NXTRANS; x++)
- gm->xsc[k][x] = (gm->xsc[k][x] <= -eslINFINITY) ? -eslINFINITY : roundf(om->scale_w * gm->xsc[k][x]);
-
- /* 3nat approximation: NN, CC, JJ hardcoded 0 in limited precision */
- gm->xsc[p7P_N][p7P_LOOP] = gm->xsc[p7P_J][p7P_LOOP] = gm->xsc[p7P_C][p7P_LOOP] = 0.0;
-
- return eslOK;
-}
-/*------------ end, P7_OPROFILE debugging tools ----------------*/
-
-
-
-
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/stotrace.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/stotrace.cpp
deleted file mode 100644
index 0fecb8d..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/stotrace.cpp
+++ /dev/null
@@ -1,301 +0,0 @@
-/* SSE implementation of stochastic backtrace of a Forward matrix.
-* (Compare generic version, p7_GStochasticTrace().)
-*
-* Contents:
-* 1. Stochastic trace implementation.
-* 2. Selection of steps in the traceback.
-* 7. Copyright and license information.
-*
-* SRE, Fri Aug 15 08:02:43 2008 [Janelia]
- * SVN $Id: stotrace.c 3019 2009-10-30 14:46:16Z eddys $
-*/
-
-#include <hmmer3/p7_config.h>
-
-#include <stdio.h>
-#include <math.h>
-
-#include <xmmintrin.h> /* SSE */
-#include <emmintrin.h> /* SSE2 */
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_random.h>
-#include <hmmer3/easel/esl_sse.h>
-#include <hmmer3/easel/esl_vectorops.h>
-
-#include <hmmer3/hmmer.h>
-#include "impl_sse.h"
-
-static inline int select_m(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int k);
-static inline int select_d(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int k);
-static inline int select_i(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int k);
-static inline int select_n(int i);
-static inline int select_c(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i);
-static inline int select_j(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i);
-static inline int select_e(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int *ret_k);
-static inline int select_b(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i);
-
-
-/*****************************************************************
-* 1. Stochastic trace implementation.
-*****************************************************************/
-
-/* Function: p7_StochasticTrace()
-* Synopsis: Sample a traceback from a Forward matrix
-* Incept: SRE, Fri Aug 8 17:40:18 2008 [UA217, IAD-SFO]
-*
-* Purpose: Perform a stochastic traceback from Forward matrix <ox>,
-* using random number generator <r>, in order to sample an
-* alignment of model <om> to digital sequence <dsq> of
-* length <L>.
-*
-* The sampled traceback is returned in <tr>, which the
-* caller provides with at least an initial allocation;
-* the <tr> allocation will be grown as needed here.
-*
-* Args: r - source of random numbers
-* dsq - digital sequence being aligned, 1..L
-* L - length of dsq
-* om - profile
-* ox - Forward matrix to trace, LxM
-* tr - storage for the recovered traceback
-*
-* Returns: <eslOK> on success
-*
-* Throws: <eslEMEM> on allocation error.
-* <eslEINVAL> on several types of problems, including:
-* the trace isn't empty (wasn't Reuse()'d);
-*/
-int
-p7_StochasticTrace(ESL_RANDOMNESS *rng, const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, const P7_OMX *ox,
- P7_TRACE *tr)
-{
- int i; /* position in sequence 1..L */
- int k; /* position in model 1..M */
- int s0, s1; /* choice of a state */
- int status;
-
- if (tr->N != 0) ESL_EXCEPTION(eslEINVAL, "trace not empty; needs to be Reuse()'d?");
-
- i = L;
- k = 0;
- if ((status = p7_trace_Append(tr, p7T_T, k, i)) != eslOK) return status;
- if ((status = p7_trace_Append(tr, p7T_C, k, i)) != eslOK) return status;
- s0 = tr->st[tr->N-1];
- while (s0 != p7T_S)
- {
- switch (s0) {
- case p7T_M: s1 = select_m(rng, om, ox, i, k); k--; i--; break;
- case p7T_D: s1 = select_d(rng, om, ox, i, k); k--; break;
- case p7T_I: s1 = select_i(rng, om, ox, i, k); i--; break;
- case p7T_N: s1 = select_n(i); break;
- case p7T_C: s1 = select_c(rng, om, ox, i); break;
- case p7T_J: s1 = select_j(rng, om, ox, i); break;
- case p7T_E: s1 = select_e(rng, om, ox, i, &k); break;
- case p7T_B: s1 = select_b(rng, om, ox, i); break;
- default: ESL_EXCEPTION(eslEINVAL, "bogus state in traceback");
- }
- if (s1 == -1) ESL_EXCEPTION(eslEINVAL, "Stochastic traceback choice failed");
-
- if ((status = p7_trace_Append(tr, s1, k, i)) != eslOK) return status;
-
- if ( (s1 == p7T_N || s1 == p7T_J || s1 == p7T_C) && s1 == s0) i--;
- s0 = s1;
- } /* end traceback, at S state */
-
- tr->M = om->M;
- tr->L = L;
- return p7_trace_Reverse(tr);
-}
-/*------------------ end, stochastic traceback ------------------*/
-
-
-/*****************************************************************
-* 2. Selection of steps in the traceback
-*****************************************************************/
-/* The guts of the stochastic backtrace function is broken out in
-* pieces: each select_?() function randomly selects one of the
-* possible paths, according to their probability, and returns the
-* index of the state we move to next.
-*/
-
-/* M(i,k) is reached from B(i-1), M(i-1,k-1), D(i-1,k-1), or I(i-1,k-1). */
-static inline int
-select_m(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int k)
-{
- int Q = p7O_NQF(ox->M);
- int q = (k-1) % Q; /* (q,r) is position of the current DP cell M(i,k) */
- int r = (k-1) / Q;
- __m128 *tp = om->tfv + 7*q; /* *tp now at start of transitions to cur cell M(i,k) */
- __m128 xBv = _mm_set1_ps(ox->xmx[(i-1)*p7X_NXCELLS+p7X_B]);
- __m128 zerov = _mm_setzero_ps();
- __m128 mpv, dpv, ipv;
- union { __m128 v; float p[4]; } u;
- float path[4];
- int state[4] = { p7T_B, p7T_M, p7T_I, p7T_D };
-
- if (q > 0) {
- mpv = ox->dpf[i-1][(q-1)*3 + p7X_M];
- dpv = ox->dpf[i-1][(q-1)*3 + p7X_D];
- ipv = ox->dpf[i-1][(q-1)*3 + p7X_I];
- } else {
- mpv = esl_sse_rightshift_ps(ox->dpf[i-1][(Q-1)*3 + p7X_M], zerov);
- dpv = esl_sse_rightshift_ps(ox->dpf[i-1][(Q-1)*3 + p7X_D], zerov);
- ipv = esl_sse_rightshift_ps(ox->dpf[i-1][(Q-1)*3 + p7X_I], zerov);
- }
-
- u.v = _mm_mul_ps(xBv, *tp); tp++; path[0] = u.p[r];
- u.v = _mm_mul_ps(mpv, *tp); tp++; path[1] = u.p[r];
- u.v = _mm_mul_ps(ipv, *tp); tp++; path[2] = u.p[r];
- u.v = _mm_mul_ps(dpv, *tp); path[3] = u.p[r];
- esl_vec_FNorm(path, 4);
- return state[esl_rnd_FChoose(rng, path, 4)];
-}
-
-/* D(i,k) is reached from M(i, k-1) or D(i,k-1). */
-static inline int
-select_d(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int k)
-{
- int Q = p7O_NQF(ox->M);
- int q = (k-1) % Q; /* (q,r) is position of the current DP cell D(i,k) */
- int r = (k-1) / Q;
- __m128 zerov = _mm_setzero_ps();
- __m128 mpv, dpv;
- __m128 tmdv, tddv;
- union { __m128 v; float p[4]; } u;
- float path[2];
- int state[2] = { p7T_M, p7T_D };
-
- if (q > 0) {
- mpv = ox->dpf[i][(q-1)*3 + p7X_M];
- dpv = ox->dpf[i][(q-1)*3 + p7X_D];
- tmdv = om->tfv[7*(q-1) + p7O_MD];
- tddv = om->tfv[7*Q + (q-1)];
- } else {
- mpv = esl_sse_rightshift_ps(ox->dpf[i][(Q-1)*3 + p7X_M], zerov);
- dpv = esl_sse_rightshift_ps(ox->dpf[i][(Q-1)*3 + p7X_D], zerov);
- tmdv = esl_sse_rightshift_ps(om->tfv[7*(Q-1) + p7O_MD], zerov);
- tddv = esl_sse_rightshift_ps(om->tfv[8*Q-1], zerov);
- }
-
- u.v = _mm_mul_ps(mpv, tmdv); path[0] = u.p[r];
- u.v = _mm_mul_ps(dpv, tddv); path[1] = u.p[r];
- esl_vec_FNorm(path, 2);
- return state[esl_rnd_FChoose(rng, path, 2)];
-}
-
-/* I(i,k) is reached from M(i-1, k) or I(i-1,k). */
-static inline int
-select_i(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int k)
-{
- int Q = p7O_NQF(ox->M);
- int q = (k-1) % Q; /* (q,r) is position of the current DP cell D(i,k) */
- int r = (k-1) / Q;
- __m128 mpv = ox->dpf[i-1][q*3 + p7X_M];
- __m128 ipv = ox->dpf[i-1][q*3 + p7X_I];
- __m128 *tp = om->tfv + 7*q + p7O_MI;
- union { __m128 v; float p[4]; } u;
- float path[2];
- int state[2] = { p7T_M, p7T_I };
-
- u.v = _mm_mul_ps(mpv, *tp); tp++; path[0] = u.p[r];
- u.v = _mm_mul_ps(ipv, *tp); path[1] = u.p[r];
- esl_vec_FNorm(path, 2);
- return state[esl_rnd_FChoose(rng, path, 2)];
-}
-
-/* N(i) must come from N(i-1) for i>0; else it comes from S */
-static inline int
-select_n(int i)
-{
- if (i == 0) return p7T_S;
- else return p7T_N;
-}
-
-/* C(i) is reached from E(i) or C(i-1). */
-static inline int
-select_c(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i)
-{
- float path[2];
- int state[2] = { p7T_C, p7T_E };
-
- path[0] = ox->xmx[(i-1)*p7X_NXCELLS+p7X_C] * om->xf[p7O_C][p7O_LOOP];
- path[1] = ox->xmx[ i*p7X_NXCELLS+p7X_E] * om->xf[p7O_E][p7O_MOVE] * ox->xmx[i*p7X_NXCELLS+p7X_SCALE];
- esl_vec_FNorm(path, 2);
- return state[esl_rnd_FChoose(rng, path, 2)];
-}
-
-/* J(i) is reached from E(i) or J(i-1). */
-static inline int
-select_j(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i)
-{
- float path[2];
- int state[2] = { p7T_J, p7T_E };
-
- path[0] = ox->xmx[(i-1)*p7X_NXCELLS+p7X_J] * om->xf[p7O_J][p7O_LOOP];
- path[1] = ox->xmx[ i*p7X_NXCELLS+p7X_E] * om->xf[p7O_E][p7O_LOOP] * ox->xmx[i*p7X_NXCELLS+p7X_SCALE];
- esl_vec_FNorm(path, 2);
- return state[esl_rnd_FChoose(rng, path, 2)];
-}
-
-/* E(i) is reached from any M(i, k=1..M) or D(i, k=2..M). */
-/* Using FChoose() here would mean allocating tmp space for 2M-1 paths;
-* instead we use the fact that E(i) is itself the necessary normalization
-* factor, and implement FChoose's algorithm here for an on-the-fly
-* calculation.
-*/
-static inline int
-select_e(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int *ret_k)
-{
- int Q = p7O_NQF(ox->M);
- float sum = 0.0;
- float roll = esl_random(rng);
- float norm = 1.0 / ox->xmx[i*p7X_NXCELLS+p7X_E]; /* all M, D already scaled exactly the same */
- __m128 xEv = _mm_set1_ps(norm);
- union { __m128 v; float p[4]; } u;
- int q,r;
-
- while (1) {
- for (q = 0; q < Q; q++)
- {
- u.v = _mm_mul_ps(ox->dpf[i][q*3 + p7X_M], xEv);
- for (r = 0; r < 4; r++) {
- sum += u.p[r];
- if (roll < sum) { *ret_k = r*Q + q + 1; return p7T_M;}
- }
-
- u.v = _mm_mul_ps(ox->dpf[i][q*3 + p7X_D], xEv);
- for (r = 0; r < 4; r++) {
- sum += u.p[r];
- if (roll < sum) { *ret_k = r*Q + q + 1; return p7T_D;}
- }
- }
- if (sum < 0.99)
- ESL_EXCEPTION(-1, "Probabilities weren't normalized - failed to trace back from an E");
- }
- /*UNREACHED*/
- ESL_EXCEPTION(-1, "unreached code was reached. universe collapses.");
-}
-
-/* B(i) is reached from N(i) or J(i). */
-static inline int
-select_b(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i)
-{
- float path[2];
- int state[2] = { p7T_N, p7T_J };
-
- path[0] = ox->xmx[i*p7X_NXCELLS+p7X_N] * om->xf[p7O_N][p7O_MOVE];
- path[1] = ox->xmx[i*p7X_NXCELLS+p7X_J] * om->xf[p7O_J][p7O_MOVE];
- esl_vec_FNorm(path, 2);
- return state[esl_rnd_FChoose(rng, path, 2)];
-}
-/*---------------------- end, step selection --------------------*/
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/vitfilter.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/vitfilter.cpp
deleted file mode 100644
index 480c26a..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/impl_sse/vitfilter.cpp
+++ /dev/null
@@ -1,265 +0,0 @@
-/* Viterbi filter implementation; SSE version.
-*
-* This is a SIMD vectorized, striped, interleaved, one-row, reduced
- * precision (epi16) implementation of the Viterbi algorithm.
-*
-* It calculates a close approximation of the Viterbi score, in
- * limited precision (signed words: 16 bits) and range. It may overflow on
-* high scoring sequences, but this indicates that the sequence is a
-* high-scoring hit worth examining more closely anyway. It will not
-* underflow, in local alignment mode.
-*
-* Contents:
-* 1. Viterbi filter implementation.
-* 6. Copyright and license information
-*
-* SRE, Thu Jul 31 20:32:25 2008 [Casa de Gatos]
- * SVN $Id: vitfilter.c 3018 2009-10-29 17:33:06Z farrarm $
-*/
-
-#include <hmmer3/p7_config.h>
-
-#include <stdio.h>
-#include <math.h>
-
-#include <xmmintrin.h> /* SSE */
-#include <emmintrin.h> /* SSE2 */
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_sse.h>
-
-#include <hmmer3/hmmer.h>
-#include <hmmer3/impl_sse/impl_sse.h>
-
-
-/*****************************************************************
-* 1. Viterbi filter implementation.
-*****************************************************************/
-
-/* Function: p7_ViterbiFilter()
-* Synopsis: Calculates Viterbi score, vewy vewy fast, in limited precision.
-* Incept: SRE, Tue Nov 27 09:15:24 2007 [Janelia]
-*
-* Purpose: Calculates an approximation of the Viterbi score for sequence
-* <dsq> of length <L> residues, using optimized profile <om>,
-* and a preallocated one-row DP matrix <ox>. Return the
-* estimated Viterbi score (in nats) in <ret_sc>.
-*
-* Score may overflow (and will, on high-scoring
-* sequences), but will not underflow.
-*
-* The model must be in a local alignment mode; other modes
-* cannot provide the necessary guarantee of no underflow.
-*
-* This is a striped SIMD Viterbi implementation using Intel
-* SSE/SSE2 integer intrinsics \citep{Farrar07}, in reduced
-* precision (signed words, 16 bits).
-*
-* Args: dsq - digital target sequence, 1..L
-* L - length of dsq in residues
-* om - optimized profile
-* ox - DP matrix
-* ret_sc - RETURN: Viterbi score (in nats)
-*
-* Returns: <eslOK> on success;
-* <eslERANGE> if the score overflows; in this case
-* <*ret_sc> is <eslINFINITY>, and the sequence can
-* be treated as a high-scoring hit.
-*
-* Throws: <eslEINVAL> if <ox> allocation is too small, or if
-* profile isn't in a local alignment mode. (Must be in local
-* alignment mode because that's what helps us guarantee
-* limited dynamic range.)
-*
-* Xref: [Farrar07] for ideas behind striped SIMD DP.
-* J2/46-47 for layout of HMMER's striped SIMD DP.
-* J2/50 for single row DP.
-* J2/60 for reduced precision (epu8)
-* J2/65 for initial benchmarking
-* J2/66 for precision maximization
-* J4/138-140 for reimplementation in 16-bit precision
-*/
-int
-p7_ViterbiFilter(const ESL_DSQ *dsq, int L, const P7_OPROFILE *om, P7_OMX *ox, float *ret_sc, int percentBorder, U2::TaskStateInfo & ti )
-{
- register __m128i mpv, dpv, ipv; /* previous row values */
- register __m128i sv; /* temp storage of 1 curr row value in progress */
- register __m128i dcv; /* delayed storage of D(i,q+1) */
- register __m128i xEv; /* E state: keeps max for Mk->E as we go */
- register __m128i xBv; /* B state: splatted vector of B[i-1] for B->Mk calculations */
- register __m128i Dmaxv; /* keeps track of maximum D cell on row */
- int16_t xE, xB, xC, xJ, xN; /* special states' scores */
- int16_t Dmax; /* maximum D cell score on row */
- int i; /* counter over sequence positions 1..L */
- int q; /* counter over vectors 0..nq-1 */
- int Q = p7O_NQW(om->M); /* segment length: # of vectors */
- __m128i *dp = ox->dpw[0]; /* using {MDI}MX(q) macro requires initialization of <dp> */
- __m128i *rsc; /* will point at om->ru[x] for residue x[i] */
- __m128i *tsc; /* will point into (and step thru) om->tu */
-
- __m128i negInfv;
-
- /* Check that the DP matrix is ok for us. */
- if (Q > ox->allocQ8) ESL_EXCEPTION(eslEINVAL, "DP matrix allocated too small");
- if (om->mode != p7_LOCAL && om->mode != p7_UNILOCAL) ESL_EXCEPTION(eslEINVAL, "Fast filter only works for local alignment");
- ox->M = om->M;
-
- /* -infinity is -32768 */
- negInfv = _mm_set1_epi16(-32768);
- negInfv = _mm_srli_si128(negInfv, 14);
-
- /* Initialization. In unsigned arithmetic, -infinity is -32768
- */
- for (q = 0; q < Q; q++)
- MMXo(q) = IMXo(q) = DMXo(q) = _mm_set1_epi16(-32768);
- xN = om->base_w;
- xB = xN + om->xw[p7O_N][p7O_MOVE];
- xJ = -32768;
- xC = -32768;
- xE = -32768;
-
-#if p7_DEBUGGING
- if (ox->debugging) p7_omx_DumpVFRow(ox, 0, xE, 0, xJ, xB, xC); /* first 0 is <rowi>: do header. second 0 is xN: always 0 here. */
-#endif
-
- // ! ADDED CODE !
- int progressStart = ti.progress;
- for (i = 1; i <= L; i++)
- {
- // ! ADDED CODE !
- ti.progress = progressStart + (int)(((double)percentBorder / L) * i);
- if( ti.cancelFlag ){ return eslCANCELED; }
-
- rsc = om->rwv[dsq[i]];
- tsc = om->twv;
- dcv = _mm_set1_epi16(-32768); /* "-infinity" */
- xEv = _mm_set1_epi16(-32768);
- Dmaxv = _mm_set1_epi16(-32768);
- xBv = _mm_set1_epi16(xB);
-
- /* Right shifts by 1 value (2 bytes). 4,8,12,x becomes x,4,8,12.
- * Because ia32 is littlendian, this means a left bit shift.
- * Zeros shift on automatically; replace it with -32768.
- */
- mpv = MMXo(Q-1); mpv = _mm_slli_si128(mpv, 2); mpv = _mm_or_si128(mpv, negInfv);
- dpv = DMXo(Q-1); dpv = _mm_slli_si128(dpv, 2); dpv = _mm_or_si128(dpv, negInfv);
- ipv = IMXo(Q-1); ipv = _mm_slli_si128(ipv, 2); ipv = _mm_or_si128(ipv, negInfv);
-
- for (q = 0; q < Q; q++)
- {
- /* Calculate new MMXo(i,q); don't store it yet, hold it in sv. */
- sv = _mm_adds_epi16(xBv, *tsc); tsc++;
- sv = _mm_max_epi16 (sv, _mm_adds_epi16(mpv, *tsc)); tsc++;
- sv = _mm_max_epi16 (sv, _mm_adds_epi16(ipv, *tsc)); tsc++;
- sv = _mm_max_epi16 (sv, _mm_adds_epi16(dpv, *tsc)); tsc++;
- sv = _mm_adds_epi16(sv, *rsc); rsc++;
- xEv = _mm_max_epi16(xEv, sv);
-
- /* Load {MDI}(i-1,q) into mpv, dpv, ipv;
- * {MDI}MX(q) is then the current, not the prev row
- */
- mpv = MMXo(q);
- dpv = DMXo(q);
- ipv = IMXo(q);
-
- /* Do the delayed stores of {MD}(i,q) now that memory is usable */
- MMXo(q) = sv;
- DMXo(q) = dcv;
-
- /* Calculate the next D(i,q+1) partially: M->D only;
- * delay storage, holding it in dcv
- */
- dcv = _mm_adds_epi16(sv, *tsc); tsc++;
- Dmaxv = _mm_max_epi16(dcv, Dmaxv);
-
- /* Calculate and store I(i,q) */
- sv = _mm_adds_epi16(mpv, *tsc); tsc++;
- IMXo(q)= _mm_max_epi16 (sv, _mm_adds_epi16(ipv, *tsc)); tsc++;
- }
-
- /* Now the "special" states, which start from Mk->E (->C, ->J->B) */
- xE = esl_sse_hmax_epi16(xEv);
- if (xE >= 32767) { *ret_sc = eslINFINITY; return eslERANGE; } /* immediately detect overflow */
- xN = xN + om->xw[p7O_N][p7O_LOOP];
- xC = ESL_MAX(xC + om->xw[p7O_C][p7O_LOOP], xE + om->xw[p7O_E][p7O_MOVE]);
- xJ = ESL_MAX(xJ + om->xw[p7O_J][p7O_LOOP], xE + om->xw[p7O_E][p7O_LOOP]);
- xB = ESL_MAX(xJ + om->xw[p7O_J][p7O_MOVE], xN + om->xw[p7O_N][p7O_MOVE]);
- /* and now xB will carry over into next i, and xC carries over after i=L */
-
- /* Finally the "lazy F" loop (sensu [Farrar07]). We can often
- * prove that we don't need to evaluate any D->D paths at all.
- *
- * The observation is that if we can show that on the next row,
- * B->M(i+1,k) paths always dominate M->D->...->D->M(i+1,k) paths
- * for all k, then we don't need any D->D calculations.
- *
- * The test condition is:
- * max_k D(i,k) + max_k ( TDD(k-2) + TDM(k-1) - TBM(k) ) < xB(i)
- * So:
- * max_k (TDD(k-2) + TDM(k-1) - TBM(k)) is precalc'ed in om->dd_bound;
- * max_k D(i,k) is why we tracked Dmaxv;
- * xB(i) was just calculated above.
- */
- Dmax = esl_sse_hmax_epi16(Dmaxv);
- if (Dmax + om->ddbound_w > xB)
- {
- /* Now we're obligated to do at least one complete DD path to be sure. */
- /* dcv has carried through from end of q loop above */
- dcv = _mm_slli_si128(dcv, 2);
- dcv = _mm_or_si128(dcv, negInfv);
- tsc = om->twv + 7*Q; /* set tsc to start of the DD's */
- for (q = 0; q < Q; q++)
- {
- DMXo(q) = _mm_max_epi16(dcv, DMXo(q));
- dcv = _mm_adds_epi16(DMXo(q), *tsc); tsc++;
- }
-
- /* We may have to do up to three more passes; the check
- * is for whether crossing a segment boundary can improve
- * our score.
- */
- do {
- dcv = _mm_slli_si128(dcv, 2);
- dcv = _mm_or_si128(dcv, negInfv);
- tsc = om->twv + 7*Q; /* set tsc to start of the DD's */
- for (q = 0; q < Q; q++)
- {
- if (! esl_sse_any_gt_epi16(dcv, DMXo(q))) break;
- DMXo(q) = _mm_max_epi16(dcv, DMXo(q));
- dcv = _mm_adds_epi16(DMXo(q), *tsc); tsc++;
- }
- } while (q == Q);
- }
- else /* not calculating DD? then just store the last M->D vector calc'ed.*/
- {
- dcv = _mm_slli_si128(dcv, 2);
- DMXo(0) = _mm_or_si128(dcv, negInfv);
- }
-
-#if p7_DEBUGGING
- if (ox->debugging) p7_omx_DumpVFRow(ox, i, xE, 0, xJ, xB, xC);
-#endif
- } /* end loop over sequence residues 1..L */
-
- /* finally C->T */
- if (xC > -32768)
- {
- *ret_sc = (float) xC + (float) om->xw[p7O_C][p7O_MOVE] - (float) om->base_w;
- /* *ret_sc += L * om->ncj_roundoff; see J4/150 for rationale: superceded by -3.0nat approximation*/
- *ret_sc /= om->scale_w;
- *ret_sc -= 3.0; /* the NN/CC/JJ=0,-3nat approximation: see J5/36. That's ~ L \log \frac{L}{L+3}, for our NN,CC,JJ contrib */
- }
- else *ret_sc = -eslINFINITY;
- return eslOK;
-}
-/*---------------- end, p7_ViterbiFilter() ----------------------*/
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/logsum.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/logsum.cpp
deleted file mode 100644
index a2ab11b..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/logsum.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-/* p7_FLogsum() function used in the Forward() algorithm.
-*
-* Contents:
-* 1. Floating point log sum.
-* 6. Copyright and license information.
-*
-* Exegesis:
-*
-* Internally, HMMER3 profile scores are in nats: floating point
-* log-odds probabilities, with the log odds taken relative to
-* background residue frequencies, and the log to the base e.
-*
-* The Forward algorithm needs to calculate sums of probabilities.
-* Given two log probabilities s1 and s2, where s1 = \log
-* \frac{p_1}{f_1}, and s2 = \log \frac{p_2}{f_2}, we need to
-* calculate s3 = \log \frac{p_1 + p_2}{f_3}.
-*
-* The Forward algorithm guarantees that f_1 = f_2 = f_3, because it
-* is always concerned with summing terms that describe different
-* parses of the same target sequence prefix, and the product of the
-* background frequencies for the same sequence prefix is a constant.
-*
-* The naive solution is s3 = log(e^{s1} + e^{s2}). This requires
-* expensive calls to log() and exp().
-*
-* A better solution is s3 = s1 + log(1 + e^{s2-s1}). s1 should be the
-* greater, so s2-s1 is negative. For sufficiently small s2 << s1,
-* e^{s2-s1} becomes less than the machine's FLT_EPSILON, and s3 ~=
-* s1. (This is at about s2-s1 < -15.9, for the typical FLT_EPSILON of
-* 1.2e-7.)
-*
-* With some loss of accuracy, we can precalculate log(1 + e^{s2-s1})
-* for a discretized range of differences (s2-s1), and compute s3 = s1
-* + table_lookup(s2-s1). This is what HMMER's p7_FLogsum() function
-* does.
-*
-* SRE, Wed Jul 11 11:00:57 2007 [Janelia]
-* SVN $Id: logsum.c 2818 2009-06-03 12:31:02Z eddys $
-*/
-
-#include <cmath>
-#include <cassert>
-
-#include "p7_config.h"
-#include "hmmer.h"
-
-#include <task_local_storage/uHMMSearchTaskLocalStorage.h>
-
-using namespace U2;
-
-
-/*****************************************************************
-*= 1. floating point log sum
-*****************************************************************/
-
-// removed p7_FLogsumInit() - we init array in TaskLocalData class for hmmer
-/* Function: p7_FLogsum()
-* Synopsis: Approximate $\log(e^a + e^b)$.
-* Incept: SRE, Fri Jul 13 15:30:39 2007 [Janelia]
-*
-* Purpose: Returns a fast table-driven approximation to
-* $\log(e^a + e^b)$.
-*
-* Either <a> or <b> (or both) may be $-\infty$,
-* but neither may be $+\infty$ or <NaN>.
-*
-* Note: This function is a critical optimization target, because
-* it's in the inner loop of generic Forward() algorithms.
-*/
-float
-p7_FLogsum( float a, float b )
-{
- const float max = ESL_MAX(a, b);
- const float min = ESL_MIN(a, b);
-
- const UHMM3SearchTaskLocalData* tld = UHMM3SearchTaskLocalStorage::current();
- assert( NULL != tld );
- return (min == -eslINFINITY || (max-min) >= 15.7f) ? max : max +
- tld->flogsum_lookup[(int)((max-min)*p7_INTSCALE)];
-}
-
-/* Function: p7_FLogsumError()
-* Synopsis: Compute absolute error in probability from Logsum.
-* Incept: SRE, Sun Aug 3 10:22:18 2008 [Janelia]
-*
-* Purpose: Compute the absolute error in probability space
-* resulting from <p7_FLogsum()>'s table lookup
-* approximation: approximation result - exact result.
-*
-* This is of course computable analytically for
-* any <a,b> given <p7_LOGSUM_TBL>; but the function
-* is useful for some routines that want to determine
-* if <p7_FLogsum()> has been compiled in its
-* exact slow mode for debugging purposes. Testing
-* <p7_FLogsumError(-0.4, -0.5) > 0.0001>
-* for example, suffices to detect that the function
-* is compiled in its fast approximation mode given
-* the defaults.
-*/
-float
-p7_FLogsumError(float a, float b)
-{
- float approx = p7_FLogsum(a,b);
- float exact = log(exp(a) + exp(b));
- return (exp(approx) - exp(exact));
-}
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/modelconfig.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/modelconfig.cpp
deleted file mode 100644
index d31520a..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/modelconfig.cpp
+++ /dev/null
@@ -1,567 +0,0 @@
-/* Model configuration:
-* Converting a core model to a fully configured Plan7 search profile.
-*
-* Contents:
-* 1. Routines in the exposed API.
-* 5. Copyright and license
-*
-* Revised May 2005: xref STL9/77-81. (Uniform fragment distribution)
-* Again, Sept 2005: xref STL10/24-26. (Inherent target length dependency)
-* Again, Jan 2007: xref STL11/125,136-137 (HMMER3)
-* Again, Jul 2007: xref J1/103 (floating point ops)
-*
-* SRE, Mon May 2 10:55:16 2005 [St. Louis]
-* SRE, Fri Jan 12 08:06:33 2007 [Janelia] [Kate Bush, Aerial]
-* SRE, Tue Jul 10 13:19:46 2007 [Janelia] [HHGTTG]
- * SVN $Id: modelconfig.c 3041 2009-11-12 12:58:09Z eddys $
-*/
-#include <hmmer3/p7_config.h>
-
-#include <math.h>
-#include <float.h>
-#include <string.h>
-#include <ctype.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_vectorops.h>
-
-#include <hmmer3/hmmer.h>
-
-
-/*****************************************************************
-* 1. Routines in the exposed API.
-*****************************************************************/
-
-/* Function: p7_ProfileConfig()
-* Synopsis: Configure a search profile.
-* Incept: SRE, Sun Sep 25 12:21:25 2005 [St. Louis]
-*
-* Purpose: Given a model <hmm> with core probabilities, the null1
-* model <bg>, a desired search <mode> (one of <p7_LOCAL>,
-* <p7_GLOCAL>, <p7_UNILOCAL>, or <p7_UNIGLOCAL>), and an
-* expected target sequence length <L>; configure the
-* search model in <gm> with lod scores relative to the
-* background frequencies in <bg>.
-*
-* Returns: <eslOK> on success; the profile <gm> now contains
-* scores and is ready for searching target sequences.
-*
-* Throws: <eslEMEM> on allocation error.
-*/
-int
-p7_ProfileConfig(const P7_HMM *hmm, const P7_BG *bg, P7_PROFILE *gm, int L, int mode)
-{
- int k, x, z; /* counters over states, residues, annotation */
- int status;
- float *occ = NULL;
- float *tp, *rp;
- float sc[p7_MAXCODE];
- float mthresh;
- float Z;
-
- /* Contract checks */
- if (gm->abc->type != hmm->abc->type) ESL_XEXCEPTION(eslEINVAL, "HMM and profile alphabet don't match");
- if (hmm->M > gm->allocM) ESL_XEXCEPTION(eslEINVAL, "profile too small to hold HMM");
-
- /* Copy some pointer references and other info across from HMM */
- gm->M = hmm->M;
- gm->mode = mode;
- gm->roff = -1;
- gm->eoff = -1;
- gm->offs[p7_MOFFSET] = -1;
- gm->offs[p7_FOFFSET] = -1;
- gm->offs[p7_POFFSET] = -1;
- if (gm->name != NULL) free(gm->name);
- if (gm->acc != NULL) free(gm->acc);
- if (gm->desc != NULL) free(gm->desc);
- if ((status = esl_strdup(hmm->name, -1, &(gm->name))) != eslOK) goto ERROR;
- if ((status = esl_strdup(hmm->acc, -1, &(gm->acc))) != eslOK) goto ERROR;
- if ((status = esl_strdup(hmm->desc, -1, &(gm->desc))) != eslOK) goto ERROR;
- if (hmm->flags & p7H_RF) strcpy(gm->rf, hmm->rf);
- if (hmm->flags & p7H_CS) strcpy(gm->cs, hmm->cs);
- for (z = 0; z < p7_NEVPARAM; z++) gm->evparam[z] = hmm->evparam[z];
- for (z = 0; z < p7_NCUTOFFS; z++) gm->cutoff[z] = hmm->cutoff[z];
- for (z = 0; z < p7_MAXABET; z++) gm->compo[z] = hmm->compo[z];
-
- /* Determine the "consensus" residue for each match position.
- * This is only used for alignment displays, not in any calculations.
- */
- if (hmm->abc->type == eslAMINO) mthresh = 0.5;
- else if (hmm->abc->type == eslDNA) mthresh = 0.9;
- else if (hmm->abc->type == eslRNA) mthresh = 0.9;
- else mthresh = 0.5;
- gm->consensus[0] = ' ';
- for (k = 1; k <= hmm->M; k++) {
- x = esl_vec_FArgMax(hmm->mat[k], hmm->abc->K);
- gm->consensus[k] = ((hmm->mat[k][x] > mthresh) ? toupper(hmm->abc->sym[x]) : tolower(hmm->abc->sym[x]));
- }
- gm->consensus[hmm->M+1] = '\0';
-
- /* Entry scores. */
- if (p7_profile_IsLocal(gm))
- {
- /* Local mode entry: occ[k] /( \sum_i occ[i] * (M-i+1))
- * (Reduces to uniform 2/(M(M+1)) for occupancies of 1.0) */
- Z = 0.;
- ESL_ALLOC_WITH_TYPE(occ, float*, sizeof(float) * (hmm->M+1));
-
- if ((status = p7_hmm_CalculateOccupancy(hmm, occ, NULL)) != eslOK) goto ERROR;
- for (k = 1; k <= hmm->M; k++)
- Z += occ[k] * (float) (hmm->M-k+1);
- for (k = 1; k <= hmm->M; k++)
- p7P_TSC(gm, k-1, p7P_BM) = log((double)(occ[k] / Z)); /* note off-by-one: entry at Mk stored as [k-1][BM] */
-
- free(occ);
- }
- else /* glocal modes: left wing retraction; must be in log space for precision */
- {
- Z = log((double)(hmm->t[0][p7H_MD]));
- p7P_TSC(gm, 0, p7P_BM) = log((double)(1.0 - hmm->t[0][p7H_MD]));
- for (k = 1; k < hmm->M; k++)
- {
- p7P_TSC(gm, k, p7P_BM) = Z + log((double)(hmm->t[k][p7H_DM]));
- Z += log((double)(hmm->t[k][p7H_DD]));
- }
- }
-
- /* E state loop/move probabilities: nonzero for MOVE allows loops/multihits
- * N,C,J transitions are set later by length config
- */
- if (p7_profile_IsMultihit(gm)) {
- gm->xsc[p7P_E][p7P_MOVE] = -eslCONST_LOG2;
- gm->xsc[p7P_E][p7P_LOOP] = -eslCONST_LOG2;
- gm->nj = 1.0f;
- } else {
- gm->xsc[p7P_E][p7P_MOVE] = 0.0f;
- gm->xsc[p7P_E][p7P_LOOP] = -eslINFINITY;
- gm->nj = 0.0f;
- }
-
- /* Transition scores. */
- for (k = 1; k < gm->M; k++) {
- tp = gm->tsc + k * p7P_NTRANS;
- tp[p7P_MM] = log((double)(hmm->t[k][p7H_MM]));
- tp[p7P_MI] = log((double)(hmm->t[k][p7H_MI]));
- tp[p7P_MD] = log((double)(hmm->t[k][p7H_MD]));
- tp[p7P_IM] = log((double)(hmm->t[k][p7H_IM]));
- tp[p7P_II] = log((double)(hmm->t[k][p7H_II]));
- tp[p7P_DM] = log((double)(hmm->t[k][p7H_DM]));
- tp[p7P_DD] = log((double)(hmm->t[k][p7H_DD]));
- }
-
- /* Match emission scores. */
- sc[hmm->abc->K] = -eslINFINITY; /* gap character */
- sc[hmm->abc->Kp-2] = -eslINFINITY; /* nonresidue character */
- sc[hmm->abc->Kp-1] = -eslINFINITY; /* missing data character */
- for (k = 1; k <= hmm->M; k++) {
- for (x = 0; x < hmm->abc->K; x++)
- sc[x] = log((double)(hmm->mat[k][x] / bg->f[x]));
- esl_abc_FExpectScVec(hmm->abc, sc, bg->f);
- for (x = 0; x < hmm->abc->Kp; x++) {
- rp = gm->rsc[x] + k * p7P_NR;
- rp[p7P_MSC] = sc[x];
- }
- }
-
- /* Insert emission scores */
- /* SRE, Fri Dec 5 08:41:08 2008: We currently hardwire insert scores
- * to 0, i.e. corresponding to the insertion emission probabilities
- * being equal to the background probabilities. Benchmarking shows
- * that setting inserts to informative emission distributions causes
- * more problems than it's worth: polar biased composition hits
- * driven by stretches of "insertion" occur, and are difficult to
- * correct for.
- */
- for (x = 0; x < gm->abc->Kp; x++)
- {
- for (k = 1; k < hmm->M; k++) p7P_ISC(gm, k, x) = 0.0f;
- p7P_ISC(gm, hmm->M, x) = -eslINFINITY; /* init I_M to impossible. */
- }
- for (k = 1; k <= hmm->M; k++) p7P_ISC(gm, k, gm->abc->K) = -eslINFINITY; /* gap symbol */
- for (k = 1; k <= hmm->M; k++) p7P_ISC(gm, k, gm->abc->Kp-2) = -eslINFINITY; /* nonresidue symbol */
- for (k = 1; k <= hmm->M; k++) p7P_ISC(gm, k, gm->abc->Kp-1) = -eslINFINITY; /* missing data symbol */
-
-
-#if 0
- /* original (informative) insert setting: relies on sc[K, Kp-1] initialization to -inf above */
- for (k = 1; k < hmm->M; k++) {
- for (x = 0; x < hmm->abc->K; x++)
- sc[x] = log(hmm->ins[k][x] / bg->f[x]);
- esl_abc_FExpectScVec(hmm->abc, sc, bg->f);
- for (x = 0; x < hmm->abc->Kp; x++) {
- rp = gm->rsc[x] + k*p7P_NR;
- rp[p7P_ISC] = sc[x];
- }
- }
- for (x = 0; x < hmm->abc->Kp; x++)
- p7P_ISC(gm, hmm->M, x) = -eslINFINITY; /* init I_M to impossible. */
-#endif
-
- /* Remaining specials, [NCJ][MOVE | LOOP] are set by ReconfigLength()
- */
- gm->L = 0; /* force ReconfigLength to reconfig */
- if ((status = p7_ReconfigLength(gm, L)) != eslOK) goto ERROR;
- return eslOK;
-
-ERROR:
- if (occ != NULL) free(occ);
- return status;
-}
-
-/* Function: p7_ReconfigLength()
-* Synopsis: Set the target sequence length of a model.
-* Incept: SRE, Sun Sep 25 12:38:55 2005 [St. Louis]
-*
-* Purpose: Given a model already configured for scoring, in some
-* particular algorithm mode; reset the expected length
-* distribution of the profile for a new mean of <L>.
-*
-* This doesn't affect the length distribution of the null
-* model. That must also be reset, using <p7_bg_SetLength()>.
-*
-* We want this routine to run as fast as possible, because
-* the caller needs to dynamically reconfigure the model
-* for the length of each target sequence in a database
-* search. The profile has precalculated <gm->nj>,
-* the number of times the J state is expected to be used,
-* based on the E state loop transition in the current
-* configuration.
-*
-* Returns: <eslOK> on success; xsc[NCJ] scores are set here. These
-* control the target length dependence of the model.
-*/
-int
-p7_ReconfigLength(P7_PROFILE *gm, int L)
-{
- float ploop, pmove;
-
- /* Configure N,J,C transitions so they bear L/(2+nj) of the total
- * unannotated sequence length L.
- */
- pmove = (2.0f + gm->nj) / ((float) L + 2.0f + gm->nj); /* 2/(L+2) for sw; 3/(L+3) for fs */
- ploop = 1.0f - pmove;
- gm->xsc[p7P_N][p7P_LOOP] = gm->xsc[p7P_C][p7P_LOOP] = gm->xsc[p7P_J][p7P_LOOP] = log((double)ploop);
- gm->xsc[p7P_N][p7P_MOVE] = gm->xsc[p7P_C][p7P_MOVE] = gm->xsc[p7P_J][p7P_MOVE] = log((double)pmove);
- gm->L = L;
- return eslOK;
-}
-
-/* Function: p7_ReconfigMultihit()
-* Synopsis: Quickly reconfig model into multihit mode for target length <L>.
-* Incept: SRE, Sat Feb 23 09:16:01 2008 [Janelia]
-*
-* Purpose: Given a profile <gm> that's already been configured once,
-* quickly reconfigure it into a multihit mode for target
-* length <L>.
-*
-* This gets called in domain definition, when we need to
-* flip the model in and out of unihit <L=0> mode to
-* process individual domains.
-*
-* Note: You can't just flip uni/multi mode alone, because that
-* parameterization also affects target length
-* modeling. You need to make sure uni vs. multi choice is
-* made before the length model is set, and you need to
-* make sure the length model is recalculated if you change
-* the uni/multi mode. Hence, these functions call
-* <p7_ReconfigLength()>.
-*/
-int
-p7_ReconfigMultihit(P7_PROFILE *gm, int L)
-{
- gm->xsc[p7P_E][p7P_MOVE] = -eslCONST_LOG2;
- gm->xsc[p7P_E][p7P_LOOP] = -eslCONST_LOG2;
- gm->nj = 1.0f;
- return p7_ReconfigLength(gm, L);
-}
-
-/* Function: p7_ReconfigUnihit()
-* Synopsis: Quickly reconfig model into unihit mode for target length <L>.
-* Incept: SRE, Sat Feb 23 09:19:42 2008 [Janelia]
-*
-* Purpose: Given a profile <gm> that's already been configured once,
-* quickly reconfigure it into a unihit mode for target
-* length <L>.
-*
-* This gets called in domain definition, when we need to
-* flip the model in and out of unihit <L=0> mode to
-* process individual domains.
-*/
-int
-p7_ReconfigUnihit(P7_PROFILE *gm, int L)
-{
- gm->xsc[p7P_E][p7P_MOVE] = 0.0f;
- gm->xsc[p7P_E][p7P_LOOP] = -eslINFINITY;
- gm->nj = 0.0f;
- return p7_ReconfigLength(gm, L);
-}
-
-/*----------------------------------------------------------------------
-* Preamble.
-*
-* There are four search modes:
-* single-hit multi-hit
-* -------------------- ------------------------
-* local | sw (p7_UNILOCAL) fs (p7_LOCAL)
-* glocal | s (p7_UNIGLOCAL) ls (p7_GLOCAL)
-*
-* Additionally, each search mode is configured for a particular
-* target length. Thus "LS/400" means a model configured for glocal,
-* multihit alignment of a target sequence of length 400.
-*
-*-----------------------------------------------------------------------
-* Exegesis.
-*
-* When you enter this module, you've got an HMM (P7_HMM) in "core"
-* probability form: t[], mat[], ins[] are all valid, normalized
-* probabilities. The routines here are used to create the "profile"
-* form (P7_PROFILE) of the model: tsc[], msc[], isc[], bsc[], esc[],
-* and xsc[] fields as integer log-odds scores.
-*
-* Also in the process, xt[] are set to their algorithm-dependent
-* probabilities, though these probabilities are only for reference.
-*
-* The configuration process breaks down into distinct conceptual steps:
-*
-* 1. Algorithm configuration.
-* An "algorithm mode" is chosen. This determines whether
-* alignments will allow local entry/exit in the model, and sets
-* the probabilities in xt[XTE], which determine
-* multi-hit/single-hit behavior. The "nj" value of the HMM is
-* also set here (the expected # of times the J state will be used;
-* 0 for single-hit mode and 1 for the default parameterization of
-* multihit modes).
-*
-* 2. Wing retraction.
-* In a profile, the D_1 and D_M states of the core model are
-* removed. The probability of the paths B->D1...->Mk ("BMk") that
-* enter D1 and use all D's before reaching M_k is treated instead
-* as an additional dollop of B->Mk entry probability, and the
-* probability of paths Mk->Dk+1...D_M->E ("MkE") is treated
-* instead as an additional dollop of Mk->E exit probability. The
-* MkE path probability is subtracted from the Mk->Dk+1 transition.
-*
-* In local algorithm modes, these extra dollops are ignored, and
-* the model is renormalized appropriately. That is, the algorithm
-* overrides all B->DDDD->M and/or M->DDDD->E path probabilities
-* with its own internal entry/exit probabilities.
-*
-* If the algorithm mode is "global" at either entry or exit, then
-* the internal entries are set to BMk and internal exits are set
-* to MkE, and the model is renormalized appropriately. That is,
-* the algorithm treats B->DDDD->M and/or M->DDDD->E path
-* probabilities as internal entries/exits, instead of allowing
-* dynamic programming algorithms to use the D_1 or D_M states.
-*
-* These two alternatives are represented differently in traces,
-* where an X state is used to signal 'missing data' in a local
-* alignment. Thus B->X->Mk indicates local entry, whereas B->Mk in
-* a trace indicates a wing-retracted B->DDD->Mk entry with respect
-* to the core HMM; similarly Mk->X->E indicates local exit, and
-* Mk->E indicates a Mk->DDDD->E path in the core HMM.
-*
-* Wing retraction is a compulsive detail with two purposes. First,
-* it removes a mute cycle from the model, B->D1 ...D_M->E, which
-* cannot be correctly and efficiently dealt with by DP
-* recursions. (A DP algorithm could just *ignore* that path
-* though, and ignore the negligible amount of probability in it.)
-* Second, wing retraction reconciles the algorithm-dependent
-* entry/exit probabilities with the core model. For algorithms
-* that impose local internal entry/exit, we don't want there to be
-* any additional probability coming from "internal" B->DDD->M and
-* M->DDD->E paths, so wing retraction takes it away.
-*
-* 3. Local alignment D-path leveling.
-* For fully local alignments, we want every fragment ij (starting
-* at match i, ending from match j) to be equiprobable. There are
-* M(M+1)/2 possible such fragments, so the probability of each
-* one is 2/M(M+1).
-*
-* Notionally, we imagine a "model" consisting of the M(M+1)/2
-* possible fragments, with entry probability of 2/M(M+1) for each.
-*
-* Operationally, we achieve this by a trick inspired by a
-* suggestion from Bill Bruno. Bill suggested that for a model with
-* no delete states, if we set begin[k] = 1/(M-k+1) and end[k] =
-* (M-k+1) / [M(M+1)/2], all fragments are equiprobable: the prob
-* of any given fragment is
-* b_i * e_j * \prod_{k=i}^{j-1} (1-e_k);
-* that is, the fragment also includes (j-i) penalizing terms for
-* *not* ending at i..j-1. Remarkably, this gives the result we
-* want: this product is always 2/M(M+1), for any ij.
-*
-* However, D->D transitions throw a wrench into this trick,
-* though. A local alignment that goes M_i->D...D->M_j, for
-* example, only gets hit with one not-end penalty (for the
-* M_i->D). This means that paths including deletions will be
-* artifactually favored.
-*
-* A solution is to subtract log(1-e_k) from the deletion
-* transition scores as well as the match transition scores. Thus
-* one log(1-e_k) penalty is always exacted upon transitioning from
-* any node k->k+1. This is *not* part of the probabilistic model:
-* it is a score accounting trick that forces the DP algorithms to
-* associate a log(1-e_k) penalty for each node k->k+1 transition,
-* which makes the DP calculations give the result desired for our
-* *notional* probabilistic model with a single 2/M(M+1) transition
-* for each possible fragment. (A similar accounting trick is the
-* use of log-odds scoring, where we associate null model
-* transitions and emissions with appropriate terms in the HMM, to
-* assure that the final score of any path accounts for all the
-* desired probability terms in an overall log-odds score). The
-* overall score of any fragment can be rearranged such that there
-* is one term consisting of a product of all these penalties * b_i
-* * e_j = 2/M(M+1), and another term consisting of the actual
-* model transition path score between i,j.
-*
-* 4. Target length dependence.
-* Given a particular target sequence of length L, we want our HMM score
-* to be as independent as possible of L. Otherwise, long sequences will
-* give higher scores, even if they are nonhomologous.
-*
-* The traditional solution to this is Karlin/Altschul statistics,
-* which tells us that E(s=x) = KMNe^-{\lambda x}, so we expect to
-* have to make a -1 bit score correction for every 2x increase in
-* target sequence length (ignoring edge correction effects). K/A
-* statistics have been proven for local Viterbi single-hit
-* ungapped alignments. There is abundant literature showing they
-* hold empirically for local Viterbi single-hit gapped
-* alignments. In my hands the length dependence (though not the
-* form of the distribution) holds for any single-hit alignment
-* (local or glocal, Viterbi or forward) but it does not
-* hold for multihit alignment modes.
-*
-* HMMER's solution is to build the length dependence right into
-* the probabilistic model, so that we have a full probabilistic
-* model of the target sequence. We match the expected lengths of
-* the model M and the null model R by setting the p1, N, C, and J
-* transitions appropriately. R has to emit the whole sequence, so
-* it has a self-transition of L/(L+1). N, C, and J have to emit
-* (L-(k+1)x) residues of the sequence, where x is the expected
-* length of an alignment to the core model, and k is the expected
-* number of times that we cycle through the J state. k=0 in sw
-* mode, and k=1 in fs/ls mode w/ the standard [XTE][LOOP]
-* probability of 0.5.
-*
-* 5. Conversion of probabilities to integer log-odds scores.
-* This step incorporates the contribution of the null model,
-* and converts floating-point probs to the scaled integer log-odds
-* score values that are used by the DP alignment routines.
-*
-* Step 1 is done by the main p7_ProfileConfig() function, which takes
-* a choice of algorithm mode as an argument.
-*
-* Step 2 is done by the *wing_retraction*() functions, which also
-* go ahead and convert the affected transitions to log-odds scores;
-* left wing retraction sets bsc[], right wing retraction sets
-* esc[] and tsc[TM*].
-*
-* Step 3 is carried out by one of two delete path accounting routines,
-* which go ahead and set tsc[TD*].
-*
-* Step 4 is carried out by the p7_ReconfigLength() routine.
-*
-* Step 5 is carried out for all remaining scores by logoddsify_the_rest().
-*
-* Note that the profile never exists in a configured probability
-* form. The probability model for the search profile is implicit, not
-* explicit, because of the handling of local entry/exit transitions.
-* You can see this in more detail in emit.c:p7_ProfileEmit()
-* function, which samples sequences from the profile's probabilistic
-* model.
-*
-* So, overall, to find where the various scores and probs are set:
-* bsc : wing retraction (section 2)
-* esc : wing retraction (section 2)
-* tsc[TM*] : wing retraction (section 2)
-* tsc[TI*] : logoddsify_the_rest() (section 4)
-* tsc[TD*] : dpath leveling (section 3)
-* p1 : target_ldependence() (section 4)
-* xt[NCJ] : target_ldependence() (section 4)
-* xsc (all): logoddsify_the_rest() (section 4)
-* msc : logoddsify_the_rest() (section 5)
-* isc : logoddsify_the_rest() (section 5)
-*/
-
-
-/*****************************************************************
-* 2. The four config_*() functions for specific algorithm modes.
-*****************************************************************/
-
-/*****************************************************************
-* Exegesis.
-*
-* The following functions are the Plan7 equivalent of choosing
-* different alignment styles (fully local, fully global,
-* global/local, multihit, etc.)
-*
-* When you come into a configuration routine, the following
-* probabilities are valid in the model:
-* 1. t[1..M-1][0..6]: all the state transitions.
-* (Node M is special: it has only a match and a delete state,
-* no insert state, and M_M->E = 1.0 and D_M->E = 1.0 by def'n.)
-* 2. mat[1..M][]: all the match emissions.
-* 3. ins[1..M-1][]: all the insert emissions. Note that there is
-* no insert state in node M.
-* 4. tbd1: the B->D1 probability. The B->M1 probability is 1-tbd1.
-* These are the "data-dependent" probabilities in the model.
-*
-* The configuration routine gets to set the "algorithm-dependent"
-* probabilities:
-* 1. xt[XTN][MOVE,LOOP] dist controls unaligned N-terminal seq.
-* The higher xt[XTN][LOOP] is, the more unaligned seq we allow.
-* Similarly, xt[XTC][MOVE,LOOP] dist controls unaligned C-terminal
-* seq, and xt[XTJ][MOVE,LOOP] dist controls length of unaligned sequence
-* between multiple copies of a domain. Normally, if these are nonzero,
-* they are all set to be equal to hmm->p1, the loop probability
-* for the null hypothesis (see below).
-* 2. xt[XTE][MOVE,LOOP] distribution controls multihits.
-* Setting xt[XTE][LOOP] to 0.0 forces one hit per model.
-* 3. begin[1..M] controls entry probabilities. An algorithm
-* mode either imposes internal begin probabilities, or leaves begin[1]
-* as 1.0 and begin[k] = 0.0 for k>1.
-* 4. end[1..M] controls exit probabilities. An algorithm mode either
-* imposes internal exit probabilities, or leaves end[M] = 1.0
-* and end[k] = 0.0 for k<M.
-*
-* The configuration routine then calls routines as appropriate to set
-* up all the model's scores, given these configured probabilities. When
-* the config routine returns, all scores are ready for alignment:
-* bsc, esc, tsc, msc, isc, and xsc.
-*
-*****************************************************************
-*
-* SRE: REVISIT THE ISSUE BELOW. THE CONDITIONS ARE NO LONGER MET!
-*
-* There is (at least) one more issue worth noting.
-* If you want per-domain scores to sum up to per-sequence scores, which is
-* generally desirable if you don't want "bug" reports from vigilant users,
-* then one of the following two sets of conditions must be met:
-*
-* 1) t(E->J) = 0
-* e.g. no multidomain hits
-*
-* 2) t(N->N) = t(C->C) = t(J->J) = hmm->p1
-* e.g. unmatching sequence scores zero, and
-* N->B first-model score is equal to J->B another-model score.
-*
-* These constraints are obeyed in the default Config() functions below,
-* but in the future (say, when HMM editing may be allowed) we'll have
-* to remember this. Non-equality of the summed domain scores and
-* the total sequence score is a really easy "red flag" for people to
-* notice and report as a bug, even if it may make probabilistic
-* sense not to meet either constraint for certain modeling problems.
-*****************************************************************
-*/
-
-
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/modelstats.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/modelstats.cpp
deleted file mode 100644
index 74eca32..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/modelstats.cpp
+++ /dev/null
@@ -1,233 +0,0 @@
-/* Miscellaneous summary statistics calculated for HMMs and profiles.
-*
-* SRE, Fri May 4 11:43:20 2007 [Janelia]
-* SVN $Id: modelstats.c 2818 2009-06-03 12:31:02Z eddys $
-*/
-
-#include <hmmer3/p7_config.h>
-
-#include <math.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_vectorops.h>
-
-#include <hmmer3/hmmer.h>
-
-double p7_MeanMatchEntropy(const P7_HMM *hmm);
-
-/* Function: p7_MeanMatchInfo()
-* Incept: SRE, Fri May 4 11:43:56 2007 [Janelia]
-*
-* Purpose: Calculate the mean information content per match state
-* emission distribution, in bits:
-*
-* \[
-* \frac{1}{M} \sum_{k=1}^{M}
-* \left[
-* - \sum_x p_k(x) \log_2 p_k(x)
-* + \sum_x f(x) \log_2 f(x)
-* \right]
-* \]
-*
-* where $p_k(x)$ is emission probability for symbol $x$
-* from match state $k$, and $f(x)$ is the null model's
-* background emission probability for $x$.
-*/
-double
-p7_MeanMatchInfo(const P7_HMM *hmm, const P7_BG *bg)
-{
- return esl_vec_FEntropy(bg->f, hmm->abc->K) - p7_MeanMatchEntropy(hmm);
-}
-
-/* Function: p7_MeanMatchEntropy()
-* Incept: SRE, Fri May 4 13:37:15 2007 [Janelia]
-*
-* Purpose: Calculate the mean entropy per match state emission
-* distribution, in bits:
-*
-* \[
-* \frac{1}{M} \sum_{k=1}^{M} -\sum_x p_k(x) \log_2 p_k(x)
-* \]
-*
-* where $p_k(x)$ is emission probability for symbol $x$
-* from match state $k$.
-*/
-double
-p7_MeanMatchEntropy(const P7_HMM *hmm)
-{
- int k;
- double H = 0.;
-
- for (k = 1; k <= hmm->M; k++)
- H += esl_vec_FEntropy(hmm->mat[k], hmm->abc->K);
- H /= (double) hmm->M;
- return H;
-}
-
-
-/* Function: p7_MeanMatchRelativeEntropy()
-* Incept: SRE, Fri May 11 09:25:01 2007 [Janelia]
-*
-* Purpose: Calculate the mean relative entropy per match state emission
-* distribution, in bits:
-*
-* \[
-* \frac{1}{M} \sum_{k=1}^{M} \sum_x p_k(x) \log_2 \frac{p_k(x)}{f(x)}
-* \]
-*
-* where $p_k(x)$ is emission probability for symbol $x$
-* from match state $k$, and $f(x)$ is the null model's
-* background emission probability for $x$.
-*/
-double
-p7_MeanMatchRelativeEntropy(const P7_HMM *hmm, const P7_BG *bg)
-{
- int k;
- double KL = 0.;
-
-#if 0
- p7_bg_Dump(stdout, hmm->bg);
- for (k = 1; k <= hmm->M; k++)
- printf("Match %d : %.2f %.2f\n", k,
- esl_vec_FRelEntropy(hmm->mat[k], hmm->bg->f, hmm->abc->K),
- esl_vec_FEntropy(bg->f, hmm->abc->K) - esl_vec_FEntropy(hmm->mat[k], hmm->abc->K));
-#endif
-
- for (k = 1; k <= hmm->M; k++)
- KL += esl_vec_FRelEntropy(hmm->mat[k], bg->f, hmm->abc->K);
- KL /= (double) hmm->M;
- return KL;
-}
-
-
-// removed unused function p7_MeanForwardScore
-
-
-/* Function: p7_MeanPositionRelativeEntropy()
-* Synopsis: Calculate the mean score per match position, including gap cost.
-* Incept: SRE, Thu Sep 6 10:26:14 2007 [Janelia]
-*
-* Purpose: Calculate the mean score (relative entropy) in bits per
-* match (consensus) position in model <hmm>, given background
-* model <bg>.
-*
-* More specifically: the mean bitscore is weighted by
-* match state occupancy (match states that aren't used
-* much are downweighted), and the log transitions into
-* that match state from the previous M, D, or I are
-* counted against it, weighted by their probability.
-*
-* This isn't a complete accounting of the average score
-* per model position nor per aligned residue; most
-* notably, it doesn't include the contribution of
-* entry/exit probabilities. So don't expect to approximate
-* average scores by multiplying <*ret_entropy> by <M>.
-*
-* Returns: <eslOK> on success, and <*ret_entropy> is the result.
-*
-* Throws: <eslEMEM> on allocation failure, and <*ret_entropy> is 0.
-*/
-int
-p7_MeanPositionRelativeEntropy(const P7_HMM *hmm, const P7_BG *bg, double *ret_entropy)
-{
- int status;
- float *mocc = NULL;
- int k;
- double mre, tre;
- double xm, xi, xd;
-
- ESL_ALLOC_WITH_TYPE(mocc, float*, sizeof(float) * (hmm->M+1));
- if ((status = p7_hmm_CalculateOccupancy(hmm, mocc, NULL)) != eslOK) goto ERROR;
-
- /* mre = the weighted relative entropy per match emission */
- for (mre = 0., k = 1; k <= hmm->M; k++)
- mre += mocc[k] * esl_vec_FRelEntropy(hmm->mat[k], bg->f, hmm->abc->K);
- mre /= esl_vec_FSum(mocc+1, hmm->M);
-
- /* The weighted relative entropy per match entry transition, 2..M
- */
- for (tre = 0., k = 2; k <= hmm->M; k++)
- {
- xm = mocc[k-1]*hmm->t[k-1][p7H_MM] * log((double)(hmm->t[k-1][p7H_MM] / bg->p1));
- xi = mocc[k-1]*hmm->t[k-1][p7H_MI] * (log((double)(hmm->t[k-1][p7H_MM] / bg->p1)) + log((double)(hmm->t[k-1][p7H_IM] / bg->p1)));
- xd = (1.-mocc[k-1])*hmm->t[k-1][p7H_DM] * log((double)(hmm->t[k-1][p7H_DM] / bg->p1));
- tre += (xm+xi+xd) / eslCONST_LOG2;
- }
- tre /= esl_vec_FSum(mocc+2, hmm->M-1);
-
- free(mocc);
- *ret_entropy = mre+tre;
- return eslOK;
-
-ERROR:
- if (mocc != NULL) free(mocc);
- *ret_entropy = 0.;
- return status;
-}
-
-
-/* Function: p7_hmm_CompositionKLDist()
-* Synopsis: A statistic of model's composition bias.
-* Incept: SRE, Mon Jul 2 08:40:12 2007 [Janelia]
-*
-* Purpose: Calculates the K-L distance between the average match
-* state residue composition in model <hmm> and a
-* background frequency distribution in <bg>, and
-* return it in <ret_KL>.
-*
-* Optionally return the average match state residue
-* composition in <opt_avp>. This vector, of length
-* <hmm->abc->K> is allocated here and becomes the caller's
-* responsibility if <opt_avp> is non-<NULL>.
-*
-* The average match composition is an occupancy-weighted
-* average (see <p7_hmm_CalculateOccupancy()>.
-*
-* The `K-L distance' <*ret_KL> is the symmetricized
-* Kullback-Leibler distance in bits (log base 2).
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation error.
-*/
-int
-p7_hmm_CompositionKLDist(P7_HMM *hmm, P7_BG *bg, float *ret_KL, float **opt_avp)
-{
- int K = hmm->abc->K;
- float *occ = NULL;
- float *p = NULL;
- int status;
- int k;
-
- ESL_ALLOC_WITH_TYPE(occ, float*, sizeof(float) * (hmm->M+1));
- ESL_ALLOC_WITH_TYPE(p, float*, sizeof(float) * K);
- p7_hmm_CalculateOccupancy(hmm, occ, NULL);
-
- esl_vec_FSet(p, K, 0.);
- for (k = 1; k <= hmm->M; k++)
- esl_vec_FAddScaled(p, hmm->mat[k], occ[k], K);
- esl_vec_FNorm(p, K);
-
- *ret_KL = (esl_vec_FRelEntropy(p, bg->f, K) + esl_vec_FRelEntropy(bg->f, p, K)) / (2.0 * eslCONST_LOG2);
- if (opt_avp != NULL) *opt_avp = p; else free(p);
- free(occ);
- return eslOK;
-
-ERROR:
- if (occ != NULL) free(occ);
- if (p != NULL) free(p);
- *ret_KL = 0.0;
- if (opt_avp != NULL) *opt_avp = NULL;
- return status;
-}
-
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_alidisplay.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/p7_alidisplay.cpp
deleted file mode 100644
index 4c8ac89..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_alidisplay.cpp
+++ /dev/null
@@ -1,385 +0,0 @@
-/* P7_ALIDISPLAY: formatting and printing alignments
-*
-* Contents:
-* 1. The P7_ALIDISPLAY object
-*
-* SRE, Sun Dec 30 09:12:47 2007
- * SVN $Id: p7_alidisplay.c 3018 2009-10-29 17:33:06Z farrarm $
-*/
-
-#include <hmmer3/p7_config.h>
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <ctype.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/hmmer.h>
-
-
-/* Function: p7_alidisplay_Create()
-* Synopsis: Create an alignment display, from trace and oprofile.
-* Incept: SRE, Sun Dec 30 09:13:31 2007 [Janelia]
-*
-* Purpose: Creates and returns an alignment display for domain number
-* <which> in traceback <tr>, where the traceback
-* corresponds to an alignment of optimized profile <om> to digital sequence
-* <dsq>, and the unique name of that target
-* sequence <dsq> is <sqname>. The <which> index starts at 0.
-*
-* It will be a little faster if the trace is indexed with
-* <p7_trace_Index()> first. The number of domains is then
-* in <tr->ndom>. If the caller wants to create alidisplays
-* for all of these, it would loop <which> from
-* <0..tr->ndom-1>.
-*
-* However, even without an index, the routine will work fine.
-*
-* Args: tr - traceback
-* which - domain number, 0..tr->ndom-1
-* om - optimized profile (query)
-* sq - digital sequence (target)
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <NULL> on allocation failure, or if something's internally corrupt
-* in the data.
-*/
-P7_ALIDISPLAY *
-p7_alidisplay_Create(const P7_TRACE *tr, int which, const P7_OPROFILE *om, const ESL_SQ *sq)
-{
- P7_ALIDISPLAY *ad = NULL;
- char *Alphabet = om->abc->sym;
- int n, pos, z;
- int z1,z2;
- int k,x,i,s;
- int hmm_namelen, hmm_acclen, hmm_desclen;
- int sq_namelen, sq_acclen, sq_desclen;
- int status;
-
- /* First figure out which piece of the trace (from first match to last match)
- * we're going to represent, and how big it is.
- */
- if (tr->ndom > 0) { /* if we have an index, this is a little faster: */
- for (z1 = tr->tfrom[which]; z1 < tr->N; z1++) if (tr->st[z1] == p7T_M) break; /* find next M state */
- if (z1 == tr->N) return NULL; /* no M? corrupt trace */
- for (z2 = tr->tto[which]; z2 >= 0 ; z2--) if (tr->st[z2] == p7T_M) break; /* find prev M state */
- if (z2 == -1) return NULL; /* no M? corrupt trace */
- } else { /* without an index, we can still do it fine: */
- for (z1 = 0; which >= 0 && z1 < tr->N; z1++) if (tr->st[z1] == p7T_B) which--; /* find the right B state */
- if (z1 == tr->N) return NULL; /* no such domain <which> */
- for (; z1 < tr->N; z1++) if (tr->st[z1] == p7T_M) break; /* find next M state */
- if (z1 == tr->N) return NULL; /* no M? corrupt trace */
- for (z2 = z1; z2 < tr->N; z2++) if (tr->st[z2] == p7T_E) break; /* find the next E state */
- for (; z2 >= 0; z2--) if (tr->st[z2] == p7T_M) break; /* find prev M state */
- if (z2 == -1) return NULL; /* no M? corrupt trace */
- }
-
- /* Now we know that z1..z2 in the trace will be represented in the
- * alidisplay; that's z2-z1+1 positions. We need a \0 trailer on all
- * our display lines, so allocate z2-z1+2. We know each position is
- * M, D, or I, so there's a 1:1 correspondence of trace positions
- * with alignment display positions. We also know the display
- * starts and ends with M states.
- *
- * So now let's allocate. The alidisplay is packed into a single
- * memory space, so this appears to be intricate, but it's just
- * bookkeeping.
- */
- n = (z2-z1+2) * 3; /* model, mline, aseq mandatory */
- if (om->rf[0] != 0) n += z2-z1+2; /* optional reference line */
- if (om->cs[0] != 0) n += z2-z1+2; /* optional structure line */
- if (tr->pp != NULL) n += z2-z1+2; /* optional posterior prob line */
- hmm_namelen = strlen(om->name); n += hmm_namelen + 1;
- hmm_acclen = (om->acc != NULL ? strlen(om->acc) : 0); n += hmm_acclen + 1;
- hmm_desclen = (om->desc != NULL ? strlen(om->desc) : 0); n += hmm_desclen + 1;
- sq_namelen = strlen(sq->name); n += sq_namelen + 1;
- sq_acclen = strlen(sq->acc); n += sq_acclen + 1; /* sq->acc is "\0" when unset */
- sq_desclen = strlen(sq->desc); n += sq_desclen + 1; /* same for desc */
-
- ESL_ALLOC_WITH_TYPE(ad, P7_ALIDISPLAY*, sizeof(P7_ALIDISPLAY));
- ad->mem = NULL;
-
- pos = 0;
- ad->memsize = sizeof(char) * n;
- ESL_ALLOC_WITH_TYPE(ad->mem, char*, sizeof(char) * ad->memsize);
- if (om->rf[0] != 0) { ad->rfline = ad->mem + pos; pos += z2-z1+2; } else { ad->rfline = NULL; }
- if (om->cs[0] != 0) { ad->csline = ad->mem + pos; pos += z2-z1+2; } else { ad->csline = NULL; }
- ad->model = ad->mem + pos; pos += z2-z1+2;
- ad->mline = ad->mem + pos; pos += z2-z1+2;
- ad->aseq = ad->mem + pos; pos += z2-z1+2;
- if (tr->pp != NULL) { ad->ppline = ad->mem + pos; pos += z2-z1+2;} else { ad->ppline = NULL; }
- ad->hmmname = ad->mem + pos; pos += hmm_namelen +1;
- ad->hmmacc = ad->mem + pos; pos += hmm_acclen +1;
- ad->hmmdesc = ad->mem + pos; pos += hmm_desclen +1;
- ad->sqname = ad->mem + pos; pos += sq_namelen +1;
- ad->sqacc = ad->mem + pos; pos += sq_acclen +1;
- ad->sqdesc = ad->mem + pos; pos += sq_desclen +1;
-
- strcpy(ad->hmmname, om->name);
- if (om->acc != NULL) strcpy(ad->hmmacc, om->acc); else ad->hmmacc[0] = 0;
- if (om->desc != NULL) strcpy(ad->hmmdesc, om->desc); else ad->hmmdesc[0] = 0;
- strcpy(ad->sqname, sq->name);
- strcpy(ad->sqacc, sq->acc);
- strcpy(ad->sqdesc, sq->desc);
-
- /* Determine hit coords */
- ad->hmmfrom = tr->k[z1];
- ad->hmmto = tr->k[z2];
- ad->M = om->M;
- ad->sqfrom = tr->i[z1];
- ad->sqto = tr->i[z2];
- ad->L = sq->n;
-
- /* optional rf line */
- if (ad->rfline != NULL) {
- for (z = z1; z <= z2; z++) ad->rfline[z-z1] = ((tr->st[z] == p7T_I) ? '.' : om->rf[tr->k[z]]);
- ad->rfline[z-z1] = '\0';
- }
-
- /* optional cs line */
- if (ad->csline != NULL) {
- for (z = z1; z <= z2; z++) ad->csline[z-z1] = ((tr->st[z] == p7T_I) ? '.' : om->cs[tr->k[z]]);
- ad->csline[z-z1] = '\0';
- }
-
- /* optional pp line */
- if (ad->ppline != NULL) {
- for (z = z1; z <= z2; z++) ad->ppline[z-z1] = ( (tr->st[z] == p7T_D) ? '.' : p7_alidisplay_EncodePostProb(tr->pp[z]));
- ad->ppline[z-z1] = '\0';
- }
-
- /* mandatory three alignment display lines: model, mline, aseq */
- for (z = z1; z <= z2; z++)
- {
- k = tr->k[z];
- i = tr->i[z];
- x = sq->dsq[i];
- s = tr->st[z];
-
- switch (s) {
- case p7T_M:
- ad->model[z-z1] = om->consensus[k];
- if (x == esl_abc_DigitizeSymbol(om->abc, om->consensus[k])) ad->mline[z-z1] = ad->model[z-z1];
- else if (p7_oprofile_FGetEmission(om, k, x) > 1.0) ad->mline[z-z1] = '+'; /* >1 not >0; om has odds ratios, not scores */
- else ad->mline[z-z1] = ' ';
- ad->aseq [z-z1] = toupper(Alphabet[x]);
- break;
-
- case p7T_I:
- ad->model [z-z1] = '.';
- ad->mline [z-z1] = ' ';
- ad->aseq [z-z1] = tolower(Alphabet[x]);
- break;
-
- case p7T_D:
- ad->model [z-z1] = om->consensus[k];
- ad->mline [z-z1] = ' ';
- ad->aseq [z-z1] = '-';
- break;
-
- default: ESL_XEXCEPTION(eslEINVAL, "invalid state in trace: not M,D,I");
- }
- }
- ad->model [z2-z1+1] = '\0';
- ad->mline [z2-z1+1] = '\0';
- ad->aseq [z2-z1+1] = '\0';
- ad->N = z2-z1+1;
- return ad;
-
-ERROR:
- p7_alidisplay_Destroy(ad);
- return NULL;
-}
-
-/* Function: p7_alidisplay_Destroy()
-* Synopsis: Frees a <P7_ALIDISPLAY>
-* Incept: SRE, Thu Jan 3 10:00:36 2008 [Janelia]
-*/
-void
-p7_alidisplay_Destroy(P7_ALIDISPLAY *ad)
-{
- if (ad == NULL) return;
- if (ad->mem != NULL) free(ad->mem);
- free(ad);
-}
-
-
-static int
-integer_textwidth(long n)
-{
- int w = (n < 0)? 1 : 0;
- while (n != 0) { n /= 10; w++; }
- return w;
-}
-
-/* Function: p7_alidisplay_EncodePostProb()
-* Synopsis: Convert a posterior probability to a char code.
-* Incept: SRE, Thu Oct 23 08:20:20 2008 [Janelia]
-*
-* Purpose: Convert the posterior probability <p> to
-* a character code suitable for Stockholm format
-* <#=GC PP_cons> and <#=GR seqname PP> annotation
-* lines. HMMER uses the same codes in alignment
-* output.
-*
-* Characters <0-9*> are used; $0.0 \leq p < 0.05$
-* is coded as 0, $0.05 \leq p < 0.15$ is coded as
-* 1, ... and so on ..., $0.85 \leq p < 0.95$ is
-* coded as 9, and $0.95 \leq p \leq 1.0$ is coded
-* as '*'.
-*
-* Returns: the encoded character.
-*/
-char
-p7_alidisplay_EncodePostProb(float p)
-{
- return (p + 0.05 >= 1.0) ? '*' : (char) ((p + 0.05) * 10.0) + '0';
-}
-
-
-/* Function: p7_alidisplay_DecodePostProb()
-* Synopsis: Convert a char code post prob to an approx float.
-* Incept: SRE, Wed Dec 10 08:59:16 2008 [Janelia]
-*
-* Purpose: Convert posterior probability code <pc>, which
-* is [0-9*], to an approximate floating point probability.
-*
-* The result is crude, because <pc> has already discretized
-* with loss of precision. We require that
-* <p7_alidisplay_EncodePostProb(p7_alidisplay_DecodePostProb(pc)) == pc>,
-* and that <pc=='0'> decodes to a nonzero probability just to
-* avoid any possible absorbing-zero artifacts.
-*
-* Returns: the decoded real-valued approximate probability.
-*/
-float
-p7_alidisplay_DecodePostProb(char pc)
-{
- if (pc == '0') return 0.01;
- else if (pc == '*') return 1.0;
- else if (pc == '.') return 0.0;
- else return ((float) (pc - '0') / 10.);
-}
-
-
-
-// ! removed unused function: p7_alidisplay_Print !
-
-
-/* Function: p7_alidisplay_Backconvert()
-* Synopsis: Convert an alidisplay to a faux trace and subsequence.
-* Incept: SRE, Wed Dec 10 09:49:28 2008 [Janelia]
-*
-* Purpose: Convert alignment display object <ad> to a faux subsequence
-* and faux subsequence trace, returning them in <ret_sq> and
-* <ret_tr>.
-*
-* The subsequence <*ret_sq> is digital; ascii residues in
-* <ad> are digitized using digital alphabet <abc>.
-*
-* The subsequence and trace are suitable for passing as
-* array elements to <p7_MultipleAlignment>. This is the
-* main purpose of backconversion. Results of a profile
-* search are stored in a hit list as a processed
-* <P7_ALIDISPLAY>, not as a <P7_TRACE> and <ESL_SQ>, to
-* reduce space and to reduce communication overhead in
-* parallelized search implementations. After reduction
-* to a final hit list, a master may want to construct a
-* multiple alignment of all the significant hits.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failures. <eslECORRUPT> on unexpected internal
-* data corruption. On any exception, <*ret_sq> and <*ret_tr> are
-* <NULL>.
-*
-* Xref: J4/29.
-*/
-int
-p7_alidisplay_Backconvert(const P7_ALIDISPLAY *ad, const ESL_ALPHABET *abc, ESL_SQ **ret_sq, P7_TRACE **ret_tr)
-{
- ESL_SQ *sq = NULL; /* RETURN: faux subsequence */
- P7_TRACE *tr = NULL; /* RETURN: faux trace */
- int subL = 0; /* subsequence length in the <ad> */
- int a, i, k; /* coords for <ad>, <sq->dsq>, model */
- char st; /* state type: MDI */
- int status;
-
- /* Make a first pass over <ad> just to calculate subseq length */
- for (a = 0; a < ad->N; a++)
- if (! esl_abc_CIsGap(abc, ad->aseq[a])) subL++;
-
- /* Allocations */
- if ((sq = esl_sq_CreateDigital(abc)) == NULL) { status = eslEMEM; goto ERROR; }
- if ((status = esl_sq_GrowTo(sq, subL)) != eslOK) goto ERROR;
-
- if ((tr = (ad->ppline == NULL) ? p7_trace_Create() : p7_trace_CreateWithPP()) == NULL) { status = eslEMEM; goto ERROR; }
- if ((status = p7_trace_GrowTo(tr, subL+6)) != eslOK) goto ERROR; /* +6 is for SNB/ECT */
-
- /* Construction of dsq, trace */
- sq->dsq[0] = eslDSQ_SENTINEL;
- if ((status = ((ad->ppline == NULL) ? p7_trace_Append(tr, p7T_S, 0, 0) : p7_trace_AppendWithPP(tr, p7T_S, 0, 0, 0.0))) != eslOK) goto ERROR;
- if ((status = ((ad->ppline == NULL) ? p7_trace_Append(tr, p7T_N, 0, 0) : p7_trace_AppendWithPP(tr, p7T_N, 0, 0, 0.0))) != eslOK) goto ERROR;
- if ((status = ((ad->ppline == NULL) ? p7_trace_Append(tr, p7T_B, 0, 0) : p7_trace_AppendWithPP(tr, p7T_B, 0, 0, 0.0))) != eslOK) goto ERROR;
- k = ad->hmmfrom;
- i = 1;
- for (a = 0; a < ad->N; a++)
- {
- if (esl_abc_CIsResidue(abc, ad->model[a])) { st = (esl_abc_CIsResidue(abc, ad->aseq[a]) ? p7T_M : p7T_D); } else st = p7T_I;
-
- if ((status = ((ad->ppline == NULL) ? p7_trace_Append(tr, st, k, i) : p7_trace_AppendWithPP(tr, st, k, i, p7_alidisplay_DecodePostProb(ad->ppline[a])))) != eslOK) goto ERROR;
-
- switch (st) {
- case p7T_M: sq->dsq[i] = esl_abc_DigitizeSymbol(abc, ad->aseq[a]); k++; i++; break;
- case p7T_I: sq->dsq[i] = esl_abc_DigitizeSymbol(abc, ad->aseq[a]); i++; break;
- case p7T_D: k++; break;
- }
- }
- if ((status = ((ad->ppline == NULL) ? p7_trace_Append(tr, p7T_E, 0, 0) : p7_trace_AppendWithPP(tr, p7T_E, 0, 0, 0.0))) != eslOK) goto ERROR;
- if ((status = ((ad->ppline == NULL) ? p7_trace_Append(tr, p7T_C, 0, 0) : p7_trace_AppendWithPP(tr, p7T_C, 0, 0, 0.0))) != eslOK) goto ERROR;
- if ((status = ((ad->ppline == NULL) ? p7_trace_Append(tr, p7T_T, 0, 0) : p7_trace_AppendWithPP(tr, p7T_T, 0, 0, 0.0))) != eslOK) goto ERROR;
- sq->dsq[i] = eslDSQ_SENTINEL;
-
- /* some sanity checks */
- if (tr->N != ad->N + 6) ESL_XEXCEPTION(eslECORRUPT, "backconverted trace ended up with unexpected size (%s/%s)", ad->sqname, ad->hmmname);
- if (k != ad->hmmto + 1) ESL_XEXCEPTION(eslECORRUPT, "backconverted trace didn't end at expected place on model (%s/%s)", ad->sqname, ad->hmmname);
- if (i != subL + 1) ESL_XEXCEPTION(eslECORRUPT, "backconverted subseq didn't end at expected length (%s/%s)", ad->sqname, ad->hmmname);
-
- /* Set up <sq> annotation as a subseq of a source sequence */
- if ((status = esl_sq_FormatName(sq, "%s/%ld-%ld", ad->sqname, ad->sqfrom, ad->sqto)) != eslOK) goto ERROR;
- if ((status = esl_sq_FormatDesc(sq, "[subseq from] %s", ad->sqdesc[0] != '\0' ? ad->sqdesc : ad->sqname)) != eslOK) goto ERROR;
- if ((status = esl_sq_SetSource(sq, ad->sqname)) != eslOK) goto ERROR;
- if (ad->sqacc[0] != '\0') { if ((status = esl_sq_SetAccession (sq, ad->sqacc)) != eslOK) goto ERROR; }
- sq->n = subL;
- sq->start = ad->sqfrom;
- sq->end = ad->sqto;
- sq->C = 0;
- sq->W = subL;
- sq->L = ad->L;
-
- tr->M = ad->M;
- tr->L = ad->L;
-
- *ret_sq = sq;
- *ret_tr = tr;
- return eslOK;
-
-ERROR:
- if (sq != NULL) esl_sq_Destroy(sq);
- if (tr != NULL) p7_trace_Destroy(tr);
- *ret_sq = NULL;
- *ret_tr = NULL;
- return status;
-}
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_bg.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/p7_bg.cpp
deleted file mode 100644
index ac6a48b..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_bg.cpp
+++ /dev/null
@@ -1,302 +0,0 @@
-/* P7_BG: the null (background) model
-*
-* Contents:
-* 1. P7_BG object: allocation, initialization, destruction.
-* 2. Standard iid null model ("null1")
-* 3. Filter null model.
-*
-* SRE, Fri Jan 12 13:31:26 2007 [Janelia] [Ravel, Bolero]
-* SVN $Id: p7_bg.c 2778 2009-03-31 17:45:24Z eddys $
-*/
-
-#include <hmmer3/p7_config.h>
-
-#include <math.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_alphabet.h>
-#include <hmmer3/easel/esl_hmm.h>
-#include <hmmer3/easel/esl_vectorops.h>
-
-#include <hmmer3/hmmer.h>
-
-
-/*****************************************************************
-* 1. The P7_BG object: allocation, initialization, destruction.
-*****************************************************************/
-
-/* Function: p7_bg_Create()
-* Incept: SRE, Fri Jan 12 13:32:51 2007 [Janelia]
-*
-* Purpose: Allocate a <P7_BG> object for digital alphabet <abc>,
-* initializes it to appropriate default values, and
-* returns a pointer to it.
-*
-* For protein models, default iid background frequencies
-* are set (by <p7_AminoFrequencies()>) to average
-* SwissProt residue composition. For DNA, RNA and other
-* alphabets, default frequencies are set to a uniform
-* distribution.
-*
-* The model composition <bg->mcomp[]> is not initialized
-* here; neither is the filter null model <bg->fhmm>. To
-* use the filter null model, caller will want to
-* initialize these fields by calling
-* <p7_bg_SetFilterByHMM()>.
-*
-* Throws: <NULL> on allocation failure.
-*
-* Xref: STL11/125.
-*/
-P7_BG *
-p7_bg_Create(const ESL_ALPHABET *abc)
-{
- P7_BG *bg = NULL;
- int status;
-
- ESL_ALLOC_WITH_TYPE(bg, P7_BG*, sizeof(P7_BG));
- bg->f = NULL;
- bg->fhmm = NULL;
-
- ESL_ALLOC_WITH_TYPE(bg->f, float*, sizeof(float) * abc->K);
- if ((bg->fhmm = esl_hmm_Create(abc, 2)) == NULL) goto ERROR;
-
- if (abc->type == eslAMINO)
- {
- if (p7_AminoFrequencies(bg->f) != eslOK) goto ERROR;
- }
- else
- esl_vec_FSet(bg->f, abc->K, 1. / (float) abc->K);
-
- bg->p1 = 350./351.;
- bg->omega = 1./256.;
- bg->abc = abc;
- return bg;
-
-ERROR:
- p7_bg_Destroy(bg);
- return NULL;
-}
-
-
-/* Function: p7_bg_CreateUniform()
-* Synopsis: Creates background model with uniform freqs.
-* Incept: SRE, Sat Jun 30 10:25:27 2007 [Janelia]
-*
-* Purpose: Creates a background model for alphabet <abc>
-* with uniform residue frequencies.
-*/
-P7_BG *
-p7_bg_CreateUniform(const ESL_ALPHABET *abc)
-{
- P7_BG *bg = NULL;
- int status;
-
- ESL_ALLOC_WITH_TYPE(bg, P7_BG*, sizeof(P7_BG));
- bg->f = NULL;
- bg->fhmm = NULL;
-
- ESL_ALLOC_WITH_TYPE(bg->f, float*, sizeof(float) * abc->K);
- if ((bg->fhmm = esl_hmm_Create(abc, 2)) == NULL) goto ERROR;
-
- esl_vec_FSet(bg->f, abc->K, 1. / (float) abc->K);
- bg->p1 = 350./351.;
- bg->omega = 1./256.;
- bg->abc = (ESL_ALPHABET *) abc; /* safe: we're just keeping a reference */
- return bg;
-
-ERROR:
- p7_bg_Destroy(bg);
- return NULL;
-}
-
-
-/* Function: p7_bg_Dump()
-* Synopsis: Outputs <P7_BG> object as text, for diagnostics.
-* Incept: SRE, Fri May 25 08:07:11 2007 [Janelia]
-*
-* Purpose: Given a null model <bg>, dump it as text to stream <fp>.
-*/
-int
-p7_bg_Dump(FILE *ofp, const P7_BG *bg)
-{
- esl_vec_FDump(ofp, bg->f, bg->abc->K, bg->abc->sym);
- return eslOK;
-}
-
-
-
-/* Function: p7_bg_Destroy()
-* Incept: SRE, Fri Jan 12 14:04:30 2007 [Janelia]
-*
-* Purpose: Frees a <P7_BG> object.
-*
-* Returns: (void)
-*
-* Xref: STL11/125.
-*/
-void
-p7_bg_Destroy(P7_BG *bg)
-{
- if (bg != NULL) {
- if (bg->f != NULL) free(bg->f);
- if (bg->fhmm != NULL) esl_hmm_Destroy(bg->fhmm);
- free(bg);
- }
- return;
-}
-
-
-/* Function: p7_bg_SetLength()
-* Synopsis: Set the null model length distribution.
-* Incept: SRE, Thu Aug 28 08:44:22 2008 [Janelia]
-*
-* Purpose: Sets the geometric null model length
-* distribution in <bg> to a mean of <L> residues.
-*/
-int
-p7_bg_SetLength(P7_BG *bg, int L)
-{
- bg->p1 = (float) L / (float) (L+1);
-
- bg->fhmm->t[0][0] = bg->p1;
- bg->fhmm->t[0][1] = 1.0f - bg->p1;
-
- return eslOK;
-}
-
-
-
-
-/*****************************************************************
-* 2. Standard iid null model ("null1")
-*****************************************************************/
-
-/* Function: p7_bg_NullOne()
-* Incept: SRE, Mon Apr 23 08:13:26 2007 [Janelia]
-*
-* Purpose: Calculate the null1 lod score, for sequence <dsq>
-* of length <L> "aligned" to the base null model <bg>.
-*
-* Note: Because the residue composition in null1 <bg> is the
-* same as the background used to calculate residue
-* scores in profiles and null models, all we have to
-* do here is score null model transitions.
-*/
-int
-p7_bg_NullOne(const P7_BG *bg, const ESL_DSQ *dsq, int L, float *ret_sc)
-{
- *ret_sc = (float) L * log((double)bg->p1) + log((double)(1.-bg->p1));
- return eslOK;
-}
-
-
-
-
-
-/*****************************************************************
-* 3. Filter null model
-*****************************************************************/
-
-/* Function: p7_bg_SetFilter()
-* Synopsis: Configure filter HMM with new model composition.
-* Incept: SRE, Fri Dec 5 09:08:15 2008 [Janelia]
-*
-* Purpose: The "filter HMM" is an experimental filter in the
-* acceleration pipeline for avoiding biased composition
-* sequences. It has no effect on final scoring, if a
-* sequence passes all steps of the pipeline; it is only
-* used to eliminate biased sequences from further
-* consideration early in the pipeline, before the big guns
-* of domain postprocessing are applied.
-*
-* At least at present, it doesn't actually work as well as
-* one would hope. This will be an area of future work.
-* What we really want to do is make a better null model of
-* real protein sequences (and their biases), and incorporate
-* that model into the flanks (NCJ states) of the profile.
-*
-* <compo> is the average model residue composition, from
-* either the HMM or the copy in a profile or optimized
-* profile. <M> is the length of the model in nodes.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: (no abnormal error conditions)
-*
-* Xref: J4/25: generalized to use composition vector, not
-* specifically an HMM.
-*
-* Note: This looks like a two-state HMM, but if you start thinking
-* about its length distribution ("oh my god, L0 assumes a
-* fixed L=400 expectation, it's all wrong, it's not conditional
-* on the target sequence length and length modeling's messed
-* up!"), don't panic. It's set up as a conditional-on-L model;
-* the P(L) term is added in p7_bg_FilterScore() below.
-*/
-int
-p7_bg_SetFilter(P7_BG *bg, int M, const float *compo)
-{
- float L0 = 400.0; /* mean length in state 0 of filter HMM (normal background) */
- float L1 = (float) M / 8.0; /* mean length in state 1 of filter HMM (biased segment) */
-
- /* State 0 is the normal iid model. */
- bg->fhmm->t[0][0] = L0 / (L0+1.0f);
- bg->fhmm->t[0][1] = 1.0f / (L0+1.0f);
- bg->fhmm->t[0][2] = 1.0f; /* 1.0 transition to E means we'll set length distribution externally. */
- esl_vec_FCopy(bg->f, bg->abc->K, bg->fhmm->e[0]);
-
- /* State 1 is the potentially biased model composition. */
- bg->fhmm->t[1][0] = 1.0f / (L1+1.0f);
- bg->fhmm->t[1][1] = L1 / (L1+1.0f);
- bg->fhmm->t[1][2] = 1.0f; /* 1.0 transition to E means we'll set length distribution externally. */
- esl_vec_FCopy(compo, bg->abc->K, bg->fhmm->e[1]);
-
- bg->fhmm->pi[0] = 0.999;
- bg->fhmm->pi[1] = 0.001;
-
- esl_hmm_Configure(bg->fhmm, bg->f);
- return eslOK;
-}
-
-
-/* Function: p7_bg_FilterScore()
-* Synopsis: Calculates the filter null model score.
-* Incept: SRE, Thu Aug 28 08:52:53 2008 [Janelia]
-*
-* Purpose: Calculates the filter null model <bg> score for sequence
-* <dsq> of length <L>, and return it in
-* <*ret_sc>.
-*
-* The score is calculated as an HMM Forward score using
-* the two-state filter null model. It is a log-odds ratio,
-* relative to the iid background frequencies, in nats:
-* same as main model Forward scores.
-*
-* The filter null model has no length distribution of its
-* own; the same geometric length distribution (controlled
-* by <bg->p1>) that the null1 model uses is imposed.
-*/
-int
-p7_bg_FilterScore(P7_BG *bg, ESL_DSQ *dsq, int L, float *ret_sc)
-{
- ESL_HMX *hmx = esl_hmx_Create(L, bg->fhmm->M); /* optimization target: this can be a 2-row matrix, and it can be stored in <bg>. */
- float nullsc; /* (or it could be passed in as an arg, but for sure it shouldn't be alloc'ed here */
-
- esl_hmm_Forward(dsq, L, bg->fhmm, hmx, &nullsc);
-
- /* impose the length distribution */
- *ret_sc = nullsc + (float) L * logf(bg->p1) + logf(1.-bg->p1);
- esl_hmx_Destroy(hmx);
- return eslOK;
-}
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_builder.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/p7_builder.cpp
deleted file mode 100644
index 59f809d..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_builder.cpp
+++ /dev/null
@@ -1,668 +0,0 @@
-/* Standardized pipeline for construction of new HMMs.
-*
-* Contents:
-* 1. P7_BUILDER: allocation, initialization, destruction
-* 2. Standardized model construction API.
-* 3. Internal functions.
-* 4. Copyright and license information
-*
-* SRE, Thu Dec 11 08:44:58 2008 [Janelia] [Requiem for a Dream]
-* SVN $Id: p7_builder.c 2830 2009-06-16 18:24:14Z eddys $
-*/
-
-#include <hmmer3/p7_config.h>
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#include <assert.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_alphabet.h>
-#include <hmmer3/easel/esl_dmatrix.h>
-#include <hmmer3/easel/esl_msa.h>
-#include <hmmer3/easel/esl_msacluster.h>
-#include <hmmer3/easel/esl_msaweight.h>
-#include <hmmer3/easel/esl_random.h>
-#include <hmmer3/easel/esl_vectorops.h>
-
-#include <hmmer3/hmmer.h>
-
-/*****************************************************************
-* 1. P7_BUILDER: allocation, initialization, destruction
-*****************************************************************/
-
-/* Function: p7_builder_Create()
-* Synopsis: Create a default HMM construction configuration.
-* Incept: SRE, Thu Dec 11 13:14:21 2008 [Janelia]
-*
-* Purpose: Create a construction configuration for building
-* HMMs in alphabet <abc>, and return a pointer to it.
-*
-* An application configuration <go> may optionally be
-* provided. If <go> is <NULL>, default parameters are
-* used. If <go> is non-<NULL>, it must include appropriate
-* settings for all 24 ``standard build options'':
-*
-* Model construction: --fast --hand --symfrac --fragthresh
-* Relative weighting: --wgsc --wblosum --wpb --wgiven --wid
-* Effective seq #: --eent --eclust --enone --eset --ere --esigma --eid
-* E-val calibration: --EmL --EmN --EvL --EvN --EfL --EfN --Eft
-* run-to-run variation: --seed
-*
-* See <hmmbuild.c> or other big users of the build
-* pipeline for an example of appropriate <ESL_GETOPTS>
-* initializations of these 24 options.
-*/
-P7_BUILDER *
-p7_builder_Create( const UHMM3BuildSettings* settings, const ESL_ALPHABET *abc)
-{
- P7_BUILDER *bld = NULL;
- int status = eslOK;
- int seed = 0;
-
- assert( NULL != settings && NULL != abc );
- ESL_ALLOC_WITH_TYPE(bld, P7_BUILDER*, sizeof(P7_BUILDER));
- bld->prior = NULL;
- bld->r = NULL;
- bld->S = NULL;
- bld->Q = NULL;
-
- if( -1.0 != settings->ere ) {
- assert( 0 < settings->ere );
- bld->re_target = settings->ere;
- } else {
- switch( abc->type ) {
- case eslAMINO: bld->re_target = p7_ETARGET_AMINO; break;
- case eslDNA: bld->re_target = p7_ETARGET_DNA; break;
- case eslRNA: bld->re_target = p7_ETARGET_DNA; break;
- default: bld->re_target = p7_ETARGET_OTHER; break;
- }
- }
-
- bld->arch_strategy = settings->archStrategy;
- bld->wgt_strategy = settings->wgtStrategy;
- bld->effn_strategy = settings->effnStrategy;
- bld->eset = settings->eset;
- bld->symfrac = settings->symfrac;
- bld->wid = settings->wid;
- bld->eid = settings->eid;
- bld->EmL = settings->eml;
- bld->EmN = settings->emn;
- bld->EvL = settings->evl;
- bld->EvN = settings->evn;
- bld->EfL = settings->efl;
- bld->EfN = settings->efn;
- bld->Eft = settings->eft;
- bld->esigma = settings->esigma;
- bld->fragthresh = settings->fragtresh;
- seed = settings->seed;
-
- /* Normally we reinitialize the RNG to original seed before calibrating each model.
- * This eliminates run-to-run variation.
- * As a special case, seed==0 means choose an arbitrary seed and shut off the
- * reinitialization; this allows run-to-run variation.
- */
- bld->r = esl_randomness_CreateFast(seed);
- bld->do_reseeding = (seed == 0) ? FALSE : TRUE;
-
- switch (abc->type) {
- case eslAMINO: bld->prior = p7_prior_CreateAmino(); break;
- case eslDNA: bld->prior = p7_prior_CreateNucleic(); break;
- case eslRNA: bld->prior = p7_prior_CreateNucleic(); break;
- default: bld->prior = p7_prior_CreateLaplace(abc); break;
- }
- if (bld->prior == NULL) goto ERROR;
-
- bld->abc = abc;
- bld->errbuf[0] = '\0';
- return bld;
-
-ERROR:
- p7_builder_Destroy(bld);
- return NULL;
-}
-
-
-/* Function: p7_builder_SetScoreSystem()
- * Synopsis: Initialize score system for single sequence queries.
- * Incept: SRE, Fri Dec 12 10:04:36 2008 [Janelia]
- *
- * Purpose: Initialize the builder <bld> to be able to parameterize
- * single sequence queries.
- *
- * Read a standard substitution score matrix from file
- * <mxfile>. If <mxfile> is <NULL>, default to BLOSUM62
- * scores. If <mxfile> is "-", read score matrix from
- * <stdin> stream. If <env> is non-<NULL> and <mxfile> is
- * not found in the current working directory, look for
- * <mxfile> in colon-delimited directory list contained in
- * environment variable <env>.
- *
- * Set the gap-open and gap-extend probabilities to
- * <popen>, <pextend>, respectively.
- *
- *
- * Args: bld - <P7_BUILDER> to initialize
- * mxfile - score matrix file to use, or NULL for BLOSUM62 default
- * env - env variable containing directory list where <mxfile> may reside
- * popen - gap open probability
- * pextend - gap extend probability
- *
- * Returns: <eslOK> on success.
- *
- * <eslENOTFOUND> if <mxfile> can't be found or opened, even
- * in any of the directories specified by the <env> variable.
- *
- * <eslEINVAL> if the score matrix can't be converted into
- * conditional probabilities by the Yu and Altschul method,
- * either because it isn't a symmetric matrix or because
- * the Yu/Altschul numerical method fails to converge.
- *
- * On either error, <bld->errbuf> contains a useful error message
- * for the user.
- *
- * Throws: <eslEMEM> on allocation failure.
- */
-int p7_builder_SetScoreSystem( P7_BUILDER * bld, ESL_SCOREMATRIX * s_matr, double popen, double pextend ) {
- double *fa = NULL;
- double *fb = NULL;
- double slambda;
- int a,b;
- int status;
-
- assert( NULL == bld->S );
- assert( NULL == bld->Q );
- bld->errbuf[0] = '\0';
-
- if( NULL == s_matr ) {
- if( ( bld->S = esl_scorematrix_Create( bld->abc ) ) == NULL ) { status = eslEMEM; goto ERROR; }
- if( ( status = esl_scorematrix_SetBLOSUM62( bld->S ) ) != eslOK ) goto ERROR;
- } else {
- bld->S = s_matr;
- }
-
- if (! esl_scorematrix_IsSymmetric(bld->S)) {
- ESL_XFAIL(eslEINVAL, bld->errbuf, "Matrix isn't symmetric");
- }
- if ((status = esl_sco_Probify(bld->S, &(bld->Q), &fa, &fb, &slambda)) != eslOK) {
- ESL_XFAIL(eslEINVAL, bld->errbuf, "Yu/Altschul method failed to backcalculate probabilistic basis of score matrix");
- }
-
- for (a = 0; a < bld->abc->K; a++) {
- for (b = 0; b < bld->abc->K; b++) {
- bld->Q->mx[a][b] /= fa[a]; /* Q->mx[a][b] is now P(b | a) */
- }
- }
-
- bld->popen = popen;
- bld->pextend = pextend;
-
- free( fa );
- free( fb );
- return eslOK;
-
-ERROR:
- if (fa != NULL) free(fa);
- if (fb != NULL) free(fb);
- return status;
-}
-
-/* Function: p7_builder_Destroy()
-* Synopsis: Free a <P7_BUILDER>
-* Incept: SRE, Thu Dec 11 13:15:45 2008 [Janelia]
-*
-* Purpose: Frees a <P7_BUILDER> object.
-*/
-void
-p7_builder_Destroy(P7_BUILDER *bld)
-{
- if (bld == NULL) return;
-
- if (bld->prior != NULL) p7_prior_Destroy(bld->prior);
- if (bld->r != NULL) esl_randomness_Destroy(bld->r);
- if (bld->Q != NULL) esl_dmatrix_Destroy(bld->Q);
- if (bld->S != NULL) esl_scorematrix_Destroy(bld->S);
-
- free(bld);
- return;
-}
-/*------------------- end, P7_BUILDER ---------------------------*/
-
-
-
-
-/*****************************************************************
-* 2. Standardized model construction API.
-*****************************************************************/
-
-static int validate_msa (P7_BUILDER *bld, ESL_MSA *msa);
-static int relative_weights (P7_BUILDER *bld, ESL_MSA *msa);
-static int build_model (P7_BUILDER *bld, ESL_MSA *msa, P7_HMM **ret_hmm, P7_TRACE ***opt_tr);
-static int effective_seqnumber (P7_BUILDER *bld, const ESL_MSA *msa, P7_HMM *hmm, const P7_BG *bg);
-static int parameterize (P7_BUILDER *bld, P7_HMM *hmm);
-static int annotate (P7_BUILDER *bld, const ESL_MSA *msa, P7_HMM *hmm);
-static int calibrate (P7_BUILDER *bld, P7_HMM *hmm, P7_BG *bg, P7_PROFILE **opt_gm, P7_OPROFILE **opt_om,
- int percents, U2::TaskStateInfo & ti);
-static int make_post_msa (P7_BUILDER *bld, const ESL_MSA *premsa, const P7_HMM *hmm, P7_TRACE **tr, ESL_MSA **opt_postmsa);
-
-/* Function: p7_Builder()
-* Synopsis: Build a new HMM from an MSA.
-* Incept: SRE, Thu Dec 11 13:24:38 2008 [Janelia]
-*
-* Purpose: Take the multiple sequence alignment <msa> and a build configuration <bld>,
-* and build a new HMM.
-*
-* Effective sequence number determination and calibration steps require
-* additionally providing a null model <bg>.
-*
-* Args: bld - build configuration
-* msa - multiple sequence alignment
-* bg - null model
-* opt_hmm - optRETURN: new HMM
-* opt_trarr - optRETURN: array of faux tracebacks, <0..nseq-1>
-* opt_postmsa - optRETURN: RF-annotated, possibly modified MSA
-* opt_gm - optRETURN: profile corresponding to <hmm>
-* opt_om - optRETURN: optimized profile corresponding to <gm>
-*
-* Returns: <eslOK> on success. The new HMM is optionally returned in
-* <*opt_hmm>, along with optional returns of an array of faux tracebacks
-* for each sequence in <*opt_trarr>, the annotated MSA used to construct
-* the model in <*opt_postmsa>, a configured search profile in
-* <*opt_gm>, and an optimized search profile in <*opt_om>. These are
-* all optional returns because the caller may, for example, be interested
-* only in an optimized profile, or may only be interested in the HMM.
-*
-* Returns <eslENORESULT> if no consensus columns were annotated.
-* Returns <eslEFORMAT> on MSA format problems, such as a missing RF annotation
-* line in hand architecture construction. On any returned error,
-* <bld->errbuf> contains an informative error message.
-*
-* Throws: <eslEMEM> on allocation error.
-* <eslEINVAL> if relative weights couldn't be calculated from <msa>.
-*
-* Xref: J4/30.
-*/
-int
-p7_Builder(P7_BUILDER *bld, ESL_MSA *msa, P7_BG *bg,
- P7_HMM **opt_hmm, P7_TRACE ***opt_trarr, P7_PROFILE **opt_gm, P7_OPROFILE **opt_om, ESL_MSA **opt_postmsa, U2::TaskStateInfo& ti )
-{
- uint32_t checksum = 0; /* checksum calculated for the input MSA. hmmalign --mapali verifies against this. */
- P7_HMM *hmm = NULL;
- P7_TRACE **tr = NULL;
- P7_TRACE ***tr_ptr = (opt_trarr != NULL || opt_postmsa != NULL) ? &tr : NULL;
- int status;
-
- if( ti.cancelFlag ) { status = eslCANCELED; goto ERROR; }
- ti.progress = 0;
-
- if ((status = validate_msa (bld, msa)) != eslOK) goto ERROR;
- if ((status = esl_msa_Checksum (msa, &checksum)) != eslOK) ESL_XFAIL(status, bld->errbuf, "Failed to calculate checksum");
- if ((status = relative_weights (bld, msa)) != eslOK) goto ERROR;
- if ((status = esl_msa_MarkFragments(msa, bld->fragthresh)) != eslOK) goto ERROR;
- ti.progress += 10;
- if( ti.cancelFlag ) { status = eslCANCELED; goto ERROR; }
-
- if ((status = build_model (bld, msa, &hmm, tr_ptr)) != eslOK) goto ERROR;
- ti.progress += 30;
- if( ti.cancelFlag ) { status = eslCANCELED; goto ERROR; }
-
- if ((status = effective_seqnumber(bld, msa, hmm, bg)) != eslOK) goto ERROR;
- ti.progress += 10;
- if( ti.cancelFlag ) { status = eslCANCELED; goto ERROR; }
-
- if ((status = parameterize (bld, hmm)) != eslOK) goto ERROR;
- ti.progress += 10;
- if( ti.cancelFlag ) { status = eslCANCELED; goto ERROR; }
-
- if ((status = annotate (bld, msa, hmm)) != eslOK) goto ERROR;
- ti.progress += 5;
- if( ti.cancelFlag ) { status = eslCANCELED; goto ERROR; }
-
- if ((status = calibrate (bld, hmm, bg, opt_gm, opt_om, 30, ti )) != eslOK) goto ERROR;
- if( ti.cancelFlag ) { status = eslCANCELED; goto ERROR; }
-
- if ((status = make_post_msa (bld, msa, hmm, tr, opt_postmsa)) != eslOK) goto ERROR;
- ti.progress += 5;
- if( ti.cancelFlag ) { status = eslCANCELED; goto ERROR; }
-
- hmm->checksum = checksum;
- hmm->flags |= p7H_CHKSUM;
-
- if (opt_hmm != NULL) *opt_hmm = hmm; else p7_hmm_Destroy(hmm);
- if (opt_trarr != NULL) *opt_trarr = tr; else p7_trace_DestroyArray(tr, msa->nseq);
- return eslOK;
-
-ERROR:
- p7_hmm_Destroy(hmm);
- p7_trace_DestroyArray(tr, msa->nseq);
- if (opt_gm != NULL) p7_profile_Destroy(*opt_gm);
- if (opt_om != NULL) p7_oprofile_Destroy(*opt_om);
- return status;
-}
-
-
-/* Function: p7_SingleBuilder()
-* Synopsis: Build a new HMM from a single sequence.
-* Incept: SRE, Fri Dec 12 10:52:45 2008 [Janelia]
-*
-* Purpose: Take the sequence <sq> and a build configuration <bld>, and
-* build a new HMM.
-*
-* The single sequence scoring system in the <bld>
-* configuration must have been previously initialized by
-* <p7_builder_SetScoreSystem()>.
-*
-* Args: bld - build configuration
-* sq - query sequence
-* bg - null model (needed to paramaterize insert emission probs)
-* opt_hmm - optRETURN: new HMM
-* opt_gm - optRETURN: profile corresponding to <hmm>
-* opt_om - optRETURN: optimized profile corresponding to <gm>
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation error.
-* <eslEINVAL> if <bld> isn't properly configured somehow.
-*/
-int
-p7_SingleBuilder(P7_BUILDER *bld, ESL_SQ *sq, P7_BG *bg, P7_HMM **opt_hmm,
- P7_TRACE **opt_tr, P7_PROFILE **opt_gm, P7_OPROFILE **opt_om, int percent, U2::TaskStateInfo & ti )
-{
- P7_HMM *hmm = NULL;
- P7_TRACE *tr = NULL;
- int k;
- int status;
-
- bld->errbuf[0] = '\0';
- if (! bld->Q) ESL_XEXCEPTION(eslEINVAL, "score system not initialized");
-
- if ((status = p7_Seqmodel(bld->abc, sq->dsq, sq->n, sq->name, bld->Q, bg->f, bld->popen, bld->pextend, &hmm)) != eslOK) goto ERROR;
- if ((status = calibrate(bld, hmm, bg, opt_gm, opt_om, percent, ti )) != eslOK) goto ERROR;
-
- /* build a faux trace: relative to core model (B->M_1..M_L->E) */
- if (opt_tr != NULL)
- {
- if ((tr = p7_trace_Create()) == NULL) goto ERROR;
- if ((status = p7_trace_Append(tr, p7T_B, 0, 0)) != eslOK) goto ERROR;
- for (k = 1; k <= sq->n; k++)
- if ((status = p7_trace_Append(tr, p7T_M, k, k)) != eslOK) goto ERROR;
- if ((status = p7_trace_Append(tr, p7T_E, 0, 0)) != eslOK) goto ERROR;
- tr->M = sq->n;
- tr->L = sq->n;
- }
-
- // ! DANGER: CODE CHANGED !
- if (opt_hmm != NULL){
- *opt_hmm = hmm;
- } else {
- hmm->abc = NULL; /* because om->abc has ptr to it */
- p7_hmm_Destroy(hmm);
- }
- if (opt_tr != NULL) *opt_tr = tr;
- return eslOK;
-
-ERROR:
- p7_hmm_Destroy(hmm);
- if (tr != NULL) p7_trace_Destroy(tr);
- if (opt_gm != NULL) p7_profile_Destroy(*opt_gm);
- if (opt_om != NULL) p7_oprofile_Destroy(*opt_om);
- return status;
-}
-/*------------- end, model construction API ---------------------*/
-
-
-
-
-/*****************************************************************
-* 3. Internal functions
-*****************************************************************/
-
-/* validate_msa:
-* SRE, Thu Dec 3 16:10:31 2009 [J5/119; bug #h70 fix]
-*
-* HMMER uses a convention for missing data characters: they
-* indicate that a sequence is a fragment. (See
-* esl_msa_MarkFragments()).
-*
-* Because of the way these fragments will be handled in tracebacks,
-* we reject any alignment that uses missing data characters in any
-* other way.
-*
-* This validation step costs negligible time.
-*/
-static int
-validate_msa(P7_BUILDER *bld, ESL_MSA *msa)
-{
- int idx;
- int64_t apos;
-
- for (idx = 0; idx < msa->nseq; idx++)
- {
- apos = 1;
- while ( esl_abc_XIsMissing(msa->abc, msa->ax[idx][apos]) && apos <= msa->alen) apos++;
- while (! esl_abc_XIsMissing(msa->abc, msa->ax[idx][apos]) && apos <= msa->alen) apos++;
- while ( esl_abc_XIsMissing(msa->abc, msa->ax[idx][apos]) && apos <= msa->alen) apos++;
- if (apos != msa->alen+1) ESL_FAIL(eslEINVAL, bld->errbuf, "msa %s; sequence %s\nhas missing data chars (~) other than at fragment edges", msa->name, msa->sqname[idx]);
- }
-
- return eslOK;
-}
-
-/* set_relative_weights():
-* Set msa->wgt vector, using user's choice of relative weighting algorithm.
-*/
-static int
-relative_weights(P7_BUILDER *bld, ESL_MSA *msa)
-{
- int status = eslOK;
-
- if (bld->wgt_strategy == p7_WGT_NONE) { esl_vec_DSet(msa->wgt, msa->nseq, 1.); }
- else if (bld->wgt_strategy == p7_WGT_GIVEN) ;
- else if (bld->wgt_strategy == p7_WGT_PB) status = esl_msaweight_PB(msa);
- else if (bld->wgt_strategy == p7_WGT_GSC) status = esl_msaweight_GSC(msa);
- else if (bld->wgt_strategy == p7_WGT_BLOSUM) status = esl_msaweight_BLOSUM(msa, bld->wid);
- else ESL_EXCEPTION(eslEINCONCEIVABLE, "no such weighting strategy");
-
- if (status != eslOK) ESL_FAIL(status, bld->errbuf, "failed to set relative weights in alignment");
- return eslOK;
-}
-
-
-/* build_model():
-* Given <msa>, choose HMM architecture, collect counts;
-* upon return, <*ret_hmm> is newly allocated and contains
-* relative-weighted observed counts.
-* Optionally, caller can request an array of inferred traces for
-* the <msa> too.
-*/
-static int
-build_model(P7_BUILDER *bld, ESL_MSA *msa, P7_HMM **ret_hmm, P7_TRACE ***opt_tr)
-{
- int status;
-
- if (bld->arch_strategy == p7_ARCH_FAST)
- {
- status = p7_Fastmodelmaker(msa, bld->symfrac, ret_hmm, opt_tr);
- if (status == eslENORESULT) ESL_XFAIL(status, bld->errbuf, "Alignment %s has no consensus columns w/ > %d%% residues - can't build a model.\n", msa->name != NULL ? msa->name : "", (int) (100 * bld->symfrac));
- else if (status == eslEMEM) ESL_XFAIL(status, bld->errbuf, "Memory allocation failure in model construction.\n");
- else if (status != eslOK) ESL_XFAIL(status, bld->errbuf, "internal error in model construction.\n");
- }
- else if (bld->arch_strategy == p7_ARCH_HAND)
- {
- status = p7_Handmodelmaker(msa, ret_hmm, opt_tr);
- if (status == eslENORESULT) ESL_XFAIL(status, bld->errbuf, "Alignment %s has no annotated consensus columns - can't build a model.\n", msa->name != NULL ? msa->name : "");
- else if (status == eslEFORMAT) ESL_XFAIL(status, bld->errbuf, "Alignment %s has no reference annotation line\n", msa->name != NULL ? msa->name : "");
- else if (status == eslEMEM) ESL_XFAIL(status, bld->errbuf, "Memory allocation failure in model construction.\n");
- else if (status != eslOK) ESL_XFAIL(status, bld->errbuf, "internal error in model construction.\n");
- }
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-/* set_effective_seqnumber()
-* Incept: SRE, Fri May 11 08:14:57 2007 [Janelia]
-*
-* <hmm> comes in with weighted observed counts. It goes out with
-* those observed counts rescaled to sum to the "effective sequence
-* number".
-*
-* <msa> is needed because we may need to see the sequences in order
-* to determine effective seq #. (for --eclust)
-*
-* <prior> is needed because we may need to parameterize test models
-* looking for the right relative entropy. (for --eent, the default)
-*/
-static int
-effective_seqnumber(P7_BUILDER *bld, const ESL_MSA *msa, P7_HMM *hmm, const P7_BG *bg)
-{
- int status;
-
- if (bld->effn_strategy == p7_EFFN_NONE) hmm->eff_nseq = msa->nseq;
- else if (bld->effn_strategy == p7_EFFN_SET) hmm->eff_nseq = bld->eset;
- else if (bld->effn_strategy == p7_EFFN_CLUST)
- {
- int nclust;
-
- status = esl_msacluster_SingleLinkage(msa, bld->eid, NULL, NULL, &nclust);
- if (status == eslEMEM) ESL_XFAIL(status, bld->errbuf, "memory allocation failed");
- else if (status != eslOK) ESL_XFAIL(status, bld->errbuf, "single linkage clustering algorithm (at %d%% id) failed", (int)(100 * bld->eid));
-
- hmm->eff_nseq = (double) nclust;
- }
-
- else if (bld->effn_strategy == p7_EFFN_ENTROPY)
- {
- double etarget;
- double eff_nseq;
-
- etarget = (bld->esigma - eslCONST_LOG2R * log( 2.0 / ((double) hmm->M * (double) (hmm->M+1)))) / (double) hmm->M; /* xref J5/36. */
- etarget = ESL_MAX(bld->re_target, etarget);
-
- status = p7_EntropyWeight(hmm, bg, bld->prior, etarget, &eff_nseq);
- if (status == eslEMEM) ESL_XFAIL(status, bld->errbuf, "memory allocation failed");
- else if (status != eslOK) ESL_XFAIL(status, bld->errbuf, "internal failure in entropy weighting algorithm");
- hmm->eff_nseq = eff_nseq;
- }
-
- p7_hmm_Scale(hmm, hmm->eff_nseq / (double) hmm->nseq);
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-/* parameterize()
-* Converts counts to probability parameters.
-*/
-static int
-parameterize(P7_BUILDER *bld, P7_HMM *hmm)
-{
- int status;
-
- if ((status = p7_ParameterEstimation(hmm, bld->prior)) != eslOK) ESL_XFAIL(status, bld->errbuf, "parameter estimation failed");
-
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-
-/* annotate()
-* Transfer annotation information from MSA to new HMM.
-* Also sets model-specific residue composition (hmm->compo).
-*/
-static int
-annotate(P7_BUILDER *bld, const ESL_MSA *msa, P7_HMM *hmm)
-{
- int status;
-
- /* Name. */
- if (msa->name) p7_hmm_SetName(hmm, msa->name);
- else ESL_XFAIL(eslEINVAL, bld->errbuf, "Unable to name the HMM.");
-
- if ((status = p7_hmm_SetAccession (hmm, msa->acc)) != eslOK) ESL_XFAIL(status, bld->errbuf, "Failed to record MSA accession");
- if ((status = p7_hmm_SetDescription(hmm, msa->desc)) != eslOK) ESL_XFAIL(status, bld->errbuf, "Failed to record MSA description");
- // if ((status = p7_hmm_AppendComlog(hmm, go->argc, go->argv)) != eslOK) ESL_XFAIL(status, errbuf, "Failed to record command log");
- if ((status = p7_hmm_SetCtime(hmm)) != eslOK) ESL_XFAIL(status, bld->errbuf, "Failed to record timestamp");
- if ((status = p7_hmm_SetComposition(hmm)) != eslOK) ESL_XFAIL(status, bld->errbuf, "Failed to determine model composition");
- hmm->flags |= p7H_COMPO;
-
- if (msa->cutset[eslMSA_GA1] && msa->cutset[eslMSA_GA2]) { hmm->cutoff[p7_GA1] = msa->cutoff[eslMSA_GA1]; hmm->cutoff[p7_GA2] = msa->cutoff[eslMSA_GA2]; hmm->flags |= p7H_GA; }
- if (msa->cutset[eslMSA_TC1] && msa->cutset[eslMSA_TC2]) { hmm->cutoff[p7_TC1] = msa->cutoff[eslMSA_TC1]; hmm->cutoff[p7_TC2] = msa->cutoff[eslMSA_TC2]; hmm->flags |= p7H_TC; }
- if (msa->cutset[eslMSA_NC1] && msa->cutset[eslMSA_NC2]) { hmm->cutoff[p7_NC1] = msa->cutoff[eslMSA_NC1]; hmm->cutoff[p7_NC2] = msa->cutoff[eslMSA_NC2]; hmm->flags |= p7H_NC; }
-
- return eslOK;
-
-ERROR:
- return status;
-}
-
-/* calibrate()
-*
-* Sets the E value parameters of the model with two short simulations.
-* A profile and an oprofile are created here. If caller wants to keep either
-* of them, it can pass non-<NULL> <opt_gm>, <opt_om> pointers.
-*/
-static int
-calibrate(P7_BUILDER *bld, P7_HMM *hmm, P7_BG *bg, P7_PROFILE **opt_gm, P7_OPROFILE **opt_om, int percents, U2::TaskStateInfo & ti)
-{
- int status;
-
- if (opt_gm != NULL) *opt_gm = NULL;
- if (opt_om != NULL) *opt_om = NULL;
-
- if ((status = p7_Calibrate(hmm, bld, &(bld->r), &bg, opt_gm, opt_om, percents, ti )) != eslOK) goto ERROR;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-/* make_post_msa()
-*
-* Optionally, we can return the alignment we actually built the model
-* from (including RF annotation on assigned consensus columns, and any
-* trace doctoring to enforce Plan7 consistency).
-*/
-static int
-make_post_msa(P7_BUILDER *bld, const ESL_MSA *premsa, const P7_HMM *hmm, P7_TRACE **tr, ESL_MSA **opt_postmsa)
-{
- ESL_MSA *postmsa = NULL;
- int optflags = p7_DEFAULT;
- int status;
-
- if (opt_postmsa == NULL) return eslOK;
-
- /* someday we might want to transfer more info from HMM to postmsa */
- if ((status = p7_tracealign_MSA(premsa, tr, hmm->M, optflags, &postmsa)) != eslOK) goto ERROR;
-
- *opt_postmsa = postmsa;
- return eslOK;
-
-ERROR:
- if (postmsa != NULL) esl_msa_Destroy(postmsa);
- return status;
-}
-/*---------------- end, internal functions ----------------------*/
-
-
-
-
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_config.h b/src/plugins_3rdparty/hmm3/src/hmmer3/p7_config.h
deleted file mode 100644
index 7e0f432..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_config.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/* src/p7_config.h. Generated from p7_config.h.in by configure. */
-/* @configure_input@
-* p7config.h.in -> p7config.h
-*
-* p7config.h is generated from p7config.h.in by the ./configure script.
-* DO NOT EDIT p7config.h; only edit p7config.h.in.
-*
-* Configuration of HMMER, including both system-dependent configuration
-* (done by ./configure) and hardcoded configuration that someone might
-* want to alter someday.
-*
-* Because this header may configure the behavior of system headers
-* (for example, LFS support), it must be included before any other
-* header file.
-*
-* SRE, Mon Jan 1 16:07:28 2007 [Casa de Gatos] [Nirvana, Nevermind]
- * SVN $Id: p7_config.h.in 2884 2009-08-23 21:28:54Z eddys $
-*/
-#ifndef P7_CONFIGH_INCLUDED
-#define P7_CONFIGH_INCLUDED
-
-
-/*****************************************************************
-* 1. Compile-time constants that control HMMER's computational
-* behavior (memory and processor use), and output formatting.
-* It can be edited and configured manually before compilation.
-*****************************************************************/
-
-/* p7_RAMLIMIT controls the switch from fast full DP to slow
-* linear-memory divide and conquer. Default devotes 32 MB/thread.
-*/
-#ifndef p7_RAMLIMIT
-#define p7_RAMLIMIT 32
-#endif
-
-/* p7_ALILENGTH controls length of displayed alignment lines.
-*/
-#ifndef p7_ALILENGTH
-#define p7_ALILENGTH 50
-#endif
-
-/*****************************************************************
-* 2. Compile-time constants that control empirically tuned HMMER
-* default parameters. You can edit it, but you ought not to,
-* unless you're trying to improve on our empirical data.
-*****************************************************************/
-
-/* Relative entropy target defaults:
-* For proteins, hmmbuild's effective sequence number calculation
-* aims to achieve a certain relative entropy per match emission.
-* (= average score per match emission).
-* These are empirically tuned constants, from the work of Steve Johnson.
-*/
-#define p7_ETARGET_AMINO 0.59 /* bits */
-#define p7_ETARGET_DNA 0.59 /* bits */
-#define p7_ETARGET_OTHER 1.0 /* bits */ /* if you define your own alphabet, set this */
-
-#define p7_SEQDBENV "BLASTDB"
-#define p7_HMMDBENV "PFAMDB"
-
-/*****************************************************************
-* 3. The next section probably shouldn't be edited at all, unless
-* you really know what you're doing. It controls some fundamental
-* parameters in HMMER that occasionally get reconfigured in
-* experimental versions, or for variants of HMMER that work on
-* non-biological alphabets.
-*****************************************************************/
-
-/* The symbol alphabet is handled by ESL_ALPHABET objects, which
-* dynamically allocate; but sometimes HMMER uses statically-allocated
-* space, and it's useful to know a reasonable maximum for
-* symbol alphabet size.
-*/
-#define p7_MAXABET 20 /* maximum size of alphabet (4 or 20) */
-#define p7_MAXCODE 29 /* maximum degenerate alphabet size (18 or 29) */
-
-/* p7_MAX_SC_TXTLEN has to be large enough to represent a score as a
-* string, including \0 and a sign.
-*/
-#define p7_MAX_SC_TXTLEN 11
-
-/* In Forward algorithm implementations, we use a table lookup in
-* p7_FLogsum() to calculate summed probabilities in log
-* space. p7_INTSCALE defines the precision of the calculation; the
-* default of 1000.0 means rounding differences to the nearest 0.001
-* nat. p7_LOGSUM_TBL defines the size of the lookup table; the
-* default of 16000 means entries are calculated for differences of 0
-* to 16.000 nats (when p7_INTSCALE is 1000.0). e^{-p7_LOGSUM_TBL /
-* p7_INTSCALE} should be on the order of the machine FLT_EPSILON,
-* typically 1.2e-7.
-*/
-#define p7_INTSCALE 1000.0f
-#define p7_LOGSUM_TBL 16000
-
-/* Other stuff.
-*/
-#define p7_MAXDCHLET 20 /* maximum # Dirichlet components in mixture prior */
-
-
-/*****************************************************************
-* 4. The final section isn't meant to be human editable at all.
-* It is configured automatically by the ./configure script.
-*****************************************************************/
-
-/* Version info - set once for whole package in configure.ac
-*/
-#define HMMER_VERSION "3.0"
-#define HMMER_DATE "March 2010"
-#define HMMER_COPYRIGHT "Copyright (C) 2010 Howard Hughes Medical Institute."
-#define HMMER_LICENSE "Freely distributed under the GNU General Public License (GPLv3)."
-#define HMMER_URL "http://hmmer.org/"
-
-/* Large file support (must precede any header file inclusion.)
-*/
-/* #undef _FILE_OFFSET_BITS */
-/* #undef _LARGE_FILES */
-/* #undef _LARGEFILE_SOURCE */
-
-/* Choice of optimized implementation (one and only one must be set)
-*/
-#define p7_IMPL_SSE 1
-/* #undef p7_IMPL_VMX */
-/* #undef p7_IMPL_DUMMY */
-
-/* Optional parallel implementations
-*/
-#define HAVE_SSE2 1
-/* #undef HAVE_MPI */
-/* #undef HMMER_PVM */
-// ! we use our own threads !
-/* #undef HMMER_THREADS */
-/* #undef HAVE_PTHREAD_ATTR_SETSCOPE */
-/* #undef HAVE_PTHREAD_SETCONCURRENCY */
-
-#define HAVE_FLUSH_ZERO_MODE 1
-
-/* Debugging hooks
-*/
-/* #undef p7_DEBUGGING */
-
-#endif /*P7_CONFIGH_INCLUDED*/
-/*****************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-*****************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_domaindef.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/p7_domaindef.cpp
deleted file mode 100644
index f41596c..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_domaindef.cpp
+++ /dev/null
@@ -1,783 +0,0 @@
-/* Definition of multidomain structure of a target sequence, and
-* rescoring as a sum of individual domains, with null2 correction.
-*
-* Contents:
-* 1. The P7_DOMAINDEF object: allocation, reuse, destruction
-* 2. Routines inferring domain structure of a target sequence
-* 3. Internal routines
-*
-*
-* Exegesis:
-*
-* The key function here is <p7_domaindef_ByPosteriorHeuristics()>.
-* Everything else is support structure.
-*
-* When you call <p7_domaindef_ByPosteriorHeuristics()>, you have a
-* per-sequence hit that's judged significant, and you've calculated
-* Forward/Backward score matrices for the complete sequence. Thus,
-* the input data are the model <gm>, the sequence <sq>, and filled-in
-* forward and backward matrices <fwd>, <bck>.
-*
-* The function then chews over this data, using posterior
-* probabilities and heuristics to define, score, and obtain
-* display-ready alignments for individual domains. When it's done,
-* your <fwd> and <bck> matrices have been effectively destroyed (they
-* get reused for individual domain alignment calculations), and
-* <ddef> contains all the per-domain results you need. It returns to
-* you the number of domains it's defined (in <ret_ndom>), and the
-* total per-sequence score derived by a sum of individual domain
-* scores (in <ret_sc>).
-*
-* The <P7_DOMAINDEF> structure is a reusable container that manages
-* all the necessary working memory and heuristic thresholds.
-*
-* SRE, Thu Jan 24 09:28:01 2008 [Janelia]
- * SVN $Id: p7_domaindef.c 2895 2009-09-11 20:16:34Z eddys $
-*/
-
-#include <hmmer3/p7_config.h>
-
-#include <math.h>
-#include <string.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_random.h>
-#include <hmmer3/easel/esl_sq.h>
-#include <hmmer3/easel/esl_vectorops.h>
-
-#include <hmmer3/hmmer.h>
-
-static int is_multidomain_region (P7_DOMAINDEF *ddef, int i, int j);
-static int region_trace_ensemble (P7_DOMAINDEF *ddef, const P7_OPROFILE *om, const ESL_DSQ *dsq, int ireg, int jreg, const P7_OMX *fwd, P7_OMX *wrk, int *ret_nc);
-static int rescore_isolated_domain(P7_DOMAINDEF *ddef, const P7_OPROFILE *om, const ESL_SQ *sq, P7_OMX *ox1, P7_OMX *ox2,
- int i, int j, int null2_is_done);
-
-
-/*****************************************************************
-* 1. The P7_DOMAINDEF object: allocation, reuse, destruction
-*****************************************************************/
-
-/* Function: p7_domaindef_Create()
-* Synopsis: Creates a new <P7_DOMAINDEF> object.
-* Incept: SRE, Fri Jan 25 13:21:31 2008 [Janelia]
-*
-* Purpose: Creates a new <P7_DOMAINDEF> object, with <r> registered
-* as its random number generator, using default settings
-* for all thresholds.
-*
-* Returns: a pointer to the new <P7_DOMAINDEF> object.
-*
-* Throws: <NULL> on allocation failure.
-*/
-P7_DOMAINDEF *
-p7_domaindef_Create(ESL_RANDOMNESS *r)
-{
- P7_DOMAINDEF *ddef = NULL;
- int Lalloc = 512; /* this initial alloc doesn't matter much; space is realloced as needed */
- int nalloc = 32;
- int status;
-
- /* level 1 alloc */
- ESL_ALLOC_WITH_TYPE(ddef, P7_DOMAINDEF*, sizeof(P7_DOMAINDEF));
- ddef->mocc = ddef->btot = ddef->etot = NULL;
- ddef->n2sc = NULL;
- ddef->sp = NULL;
- ddef->trr = NULL;
- ddef->dcl = NULL;
-
- /* level 2 alloc: posterior prob arrays */
- ESL_ALLOC_WITH_TYPE(ddef->mocc, float*, sizeof(float) * (Lalloc+1));
- ESL_ALLOC_WITH_TYPE(ddef->btot, float*, sizeof(float) * (Lalloc+1));
- ESL_ALLOC_WITH_TYPE(ddef->etot, float*, sizeof(float) * (Lalloc+1));
- ESL_ALLOC_WITH_TYPE(ddef->n2sc, float*, sizeof(float) * (Lalloc+1));
- ddef->mocc[0] = ddef->etot[0] = ddef->btot[0] = 0.;
- ddef->n2sc[0] = 0.;
- ddef->Lalloc = Lalloc;
- ddef->L = 0;
-
- /* level 2 alloc: results storage */
- ESL_ALLOC_WITH_TYPE(ddef->dcl, P7_DOMAIN*, sizeof(P7_DOMAIN) * nalloc);
- ddef->nalloc = nalloc;
- ddef->ndom = 0;
-
- ddef->nexpected = 0.0;
- ddef->nregions = 0;
- ddef->nclustered = 0;
- ddef->noverlaps = 0;
- ddef->nenvelopes = 0;
-
- /* default thresholds */
- ddef->rt1 = 0.25;
- ddef->rt2 = 0.10;
- ddef->rt3 = 0.20;
- ddef->nsamples = 200;
- ddef->min_overlap = 0.8;
- ddef->of_smaller = TRUE;
- ddef->max_diagdiff = 4;
- ddef->min_posterior = 0.25;
- ddef->min_endpointp = 0.02;
-
- /* allocate reusable, growable objects that domain def reuses for each seq */
- ddef->sp = p7_spensemble_Create(1024, 64, 32); /* init allocs = # sampled pairs; max endpoint range; # of domains */
- ddef->trr = p7_trace_CreateWithPP();
- ddef->gtr = p7_trace_Create();
-
- /* keep a copy of ptr to the RNG */
- ddef->r = r;
- ddef->do_reseeding = TRUE;
- return ddef;
-
-ERROR:
- p7_domaindef_Destroy(ddef);
- return NULL;
-}
-
-
-/* p7_domaindef_GrowTo()
-* Synopsis: Reallocates a <P7_DOMAINDEF> for new seq length <L>
-* Incept: SRE, Fri Jan 25 13:27:24 2008 [Janelia]
-*
-* Purpose: Reallocates a <P7_DOMAINDEF> object <ddef> so that
-* it can hold a sequence of up to <L> residues.
-*
-* (This might be a no-op, if <ddef> is already large
-* enough.)
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure. In this case, the
-* data in <ddef> are unaffected.
-*/
-static int
-p7_domaindef_GrowTo(P7_DOMAINDEF *ddef, int L)
-{
- void *p;
- int status;
-
- if (L <= ddef->Lalloc) return eslOK;
-
- ESL_RALLOC_WITH_TYPE(ddef->mocc, float*, p, sizeof(float) * (L+1));
- ESL_RALLOC_WITH_TYPE(ddef->btot, float*, p, sizeof(float) * (L+1));
- ESL_RALLOC_WITH_TYPE(ddef->etot, float*, p, sizeof(float) * (L+1));
- ESL_RALLOC_WITH_TYPE(ddef->n2sc, float*, p, sizeof(float) * (L+1));
- ddef->Lalloc = L;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-/* Function: p7_domaindef_Reuse()
-* Synopsis: Prepare to reuse a <P7_DOMAINDEF> on a new sequence.
-* Incept: SRE, Fri Jan 25 13:48:36 2008 [Janelia]
-*
-* Purpose: Prepare a <P7_DOMAINDEF> object <ddef> to be reused on
-* a new sequence, reusing as much memory as possible.
-*
-* Note: Because of the way we handle alidisplays, handing them off to
-* the caller, we don't reuse their memory; any unused
-* alidisplays are destroyed. It's not really possible to
-* reuse alidisplay memory. We need alidisplays to persist
-* until all sequences have been processed and we're
-* writing our final output to the user.
-*
-* Returns: <eslOK> on success.
-*/
-int
-p7_domaindef_Reuse(P7_DOMAINDEF *ddef)
-{
- int status;
- int d;
-
- /* If ddef->dcl is NULL, we turned the domain list over to a P7_HIT
- * for permanent storage, and we need to allocate a new one;
- * else, reuse the one we've got.
- */
- if (ddef->dcl == NULL)
- ESL_ALLOC_WITH_TYPE(ddef->dcl, P7_DOMAIN*, sizeof(P7_DOMAIN) * ddef->nalloc);
- else
- {
- for (d = 0; d < ddef->ndom; d++) {
- p7_alidisplay_Destroy(ddef->dcl[d].ad);
- ddef->dcl[d].ad = NULL;
- }
-
- }
- ddef->ndom = 0;
- ddef->L = 0;
-
- ddef->nexpected = 0.0;
- ddef->nregions = 0;
- ddef->nclustered = 0;
- ddef->noverlaps = 0;
- ddef->nenvelopes = 0;
-
- p7_spensemble_Reuse(ddef->sp);
- p7_trace_Reuse(ddef->trr); /* probable overkill; should already have been called */
- p7_trace_Reuse(ddef->gtr); /* likewise */
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-/* Function: p7_domaindef_DumpPosteriors()
-* Synopsis: Output posteriors that define domain structure to a stream.
-* Incept: SRE, Fri Feb 29 08:32:14 2008 [Janelia]
-*
-* Purpose: Output the vectors from <ddef> that are used to
-* define domain structure to a stream <ofp>, in xmgrace format.
-*
-* There are four vectors. The first set is
-* <mocc[1..i..L]>, the probability that residue <i> is
-* emitted by the core model (is in a domain). The second
-* set is <btot[1..i..L]>, the cumulative expected number
-* of times that a domain uses a B state (starts) at or
-* before position <i>. The third set is <etot[1..i..L]>,
-* the cumulative expected number of times that a domain
-* uses an E state (ends) at or before position <i>. The
-* fourth set is <n2sc[1..i..L]>, the score of residue i
-* under the ad hoc null2 model; this is a measure of local
-* biased composition.
-*
-* These three fields will only be available after a call
-* to domain definition by
-* <p7_domaindef_ByPosteriorHeuristics()>.
-*
-* Returns: <eslOK> on success
-*
-* Xref: J2/126
-*/
-int
-p7_domaindef_DumpPosteriors(FILE *ofp, P7_DOMAINDEF *ddef)
-{
- int i;
-
- for (i = 1; i <= ddef->L; i++)
- fprintf(ofp, "%d %f\n", i, ddef->mocc[i]);
- fprintf(ofp, "&\n");
-
- for (i = 1; i <= ddef->L; i++)
- fprintf(ofp, "%d %f\n", i, ddef->btot[i]);
- fprintf(ofp, "&\n");
-
- for (i = 1; i <= ddef->L; i++)
- fprintf(ofp, "%d %f\n", i, ddef->etot[i]);
- fprintf(ofp, "&\n");
-
- for (i = 1; i <= ddef->L; i++)
- fprintf(ofp, "%d %f\n", i, ddef->n2sc[i]);
- fprintf(ofp, "&\n");
-
- return eslOK;
-}
-
-
-
-/* Function: p7_domaindef_Destroy()
-* Synopsis: Destroys a <P7_DOMAINDEF>.
-* Incept: SRE, Fri Jan 25 13:52:46 2008 [Janelia]
-*
-* Purpose: Destroys a <P7_DOMAINDEF>.
-*/
-void
-p7_domaindef_Destroy(P7_DOMAINDEF *ddef)
-{
- int d;
-
- if (ddef == NULL) return;
-
- if (ddef->mocc != NULL) free(ddef->mocc);
- if (ddef->btot != NULL) free(ddef->btot);
- if (ddef->etot != NULL) free(ddef->etot);
- if (ddef->n2sc != NULL) free(ddef->n2sc);
-
- if (ddef->dcl != NULL) {
- for (d = 0; d < ddef->ndom; d++)
- p7_alidisplay_Destroy(ddef->dcl[d].ad);
- free(ddef->dcl);
- }
-
- p7_spensemble_Destroy(ddef->sp);
- p7_trace_Destroy(ddef->trr);
- p7_trace_Destroy(ddef->gtr);
- free(ddef);
- return;
-}
-
-/*****************************************************************
-* 2. Routines inferring domain structure of a target sequence
-*****************************************************************/
-
-#if 0
-/* Function: p7_domaindef_ByViterbi()
-* Synopsis: Define domains in a sequence by maximum likelihood.
-* Incept: SRE, Fri Jan 25 15:10:21 2008 [Janelia]
-*
-* Purpose: Use a Viterbi (maximum likelihood) parse to determine
-* the domain structure of sequence <sq> aligned to
-* model <gm>. Caller provides a filled Viterbi matrix
-* in <gx1>, and a second matrix of at least the same
-* size for scratch space in <gx2>.
-*
-* Upon return, <ddef> contains definitions of all the
-* domains, bounds defined by Viterbi parse, individually
-* scored by null2-corrected Forward, and aligned by
-* optimal posterior accuracy.
-*
-* Returns: <eslOK> on success.
-*/
-int
-p7_domaindef_ByViterbi(P7_PROFILE *gm, const ESL_SQ *sq, P7_GMX *gx1, P7_GMX *gx2, P7_DOMAINDEF *ddef)
-{
- int d;
- int saveL = gm->L;
-
- p7_domaindef_GrowTo(ddef, sq->n);
- p7_GTrace (sq->dsq, sq->n, gm, gx1, ddef->gtr);
- p7_trace_Index(ddef->gtr);
-
- p7_ReconfigUnihit(gm, 0); /* process each domain in unihit L=0 mode */
-
- for (d = 0; d < ddef->gtr->ndom; d++)
- rescore_isolated_domain(ddef, gm, sq, gx1, gx2, ddef->gtr->sqfrom[d], ddef->gtr->sqto[d], FALSE);
- p7_ReconfigMultihit(gm, saveL); /* restore original profile configuration */
- return eslOK;
-}
-#endif
-
-
-/* Function: p7_domaindef_ByPosteriorHeuristics()
-* Synopsis: Define domains in a sequence using posterior probs.
-* Incept: SRE, Sat Feb 23 08:17:44 2008 [Janelia]
-*
-* Purpose: Given a sequence <sq> and model <om> for which we have
-* already calculated a Forward and Backward parsing
-* matrices <oxf> and <oxb>; use posterior probability
-* heuristics to determine an annotated domain structure;
-* and for each domain found, score it (with null2
-* calculations) and obtain an optimal accuracy alignment,
-* using <fwd> and <bck> matrices as workspace for the
-* necessary full-matrix DP calculations. Caller provides a
-* new or reused <ddef> object to hold these results.
-*
-* Upon return, <ddef> contains the definitions of all the
-* domains: their bounds, their null-corrected Forward
-* scores, and their optimal posterior accuracy alignments.
-*
-* Returns: <eslOK> on success.
-*
-* <eslERANGE> on numeric overflow in posterior
-* decoding. This should not be possible for multihit
-* models.
-*/
-int
-p7_domaindef_ByPosteriorHeuristics(const ESL_SQ *sq, P7_OPROFILE *om,
- P7_OMX *oxf, P7_OMX *oxb, P7_OMX *fwd, P7_OMX *bck,
- P7_DOMAINDEF *ddef, int percentBorder, U2::TaskStateInfo & ti, int wholeSeqSz )
-{
- int i, j;
- int triggered;
- int d;
- int i2,j2;
- int last_j2;
- int saveL = om->L;
- int nc;
- int status;
-
- if ((status = p7_domaindef_GrowTo(ddef, sq->n)) != eslOK) return status; /* ddef's btot,etot,mocc now ready for seq of length n */
- if ((status = p7_DomainDecoding(om, oxf, oxb, ddef)) != eslOK) return status; /* ddef->{btot,etot,mocc} now made. */
-
- esl_vec_FSet(ddef->n2sc, sq->n+1, 0.0); /* ddef->n2sc null2 scores are initialized */
- ddef->nexpected = ddef->btot[sq->n]; /* posterior expectation for # of domains (same as etot[sq->n]) */
-
- p7_oprofile_ReconfigUnihit(om, saveL, wholeSeqSz); /* process each domain in unilocal mode */
- i = -1;
- triggered = FALSE;
-
- // ! ADDED CODE !
- int progressStart = ti.progress;
- for (j = 1; j <= sq->n; j++)
- {
- // ! ADDED CODE !
- ti.progress = progressStart + (int)(((double)percentBorder / sq->n) * i);
- if( ti.cancelFlag ){ return eslCANCELED; }
-
- if (! triggered)
- { /* xref J2/101 for what the logic below is: */
- if (ddef->mocc[j] - (ddef->btot[j] - ddef->btot[j-1]) < ddef->rt2) i = j;
- else if (i == -1) i = j;
- if (ddef->mocc[j] >= ddef->rt1) triggered = TRUE;
- }
- else if (ddef->mocc[j] - (ddef->etot[j] - ddef->etot[j-1]) < ddef->rt2)
- {
- /* We have a region i..j to evaluate. */
- p7_omx_GrowTo(fwd, om->M, j-i+1, j-i+1);
- p7_omx_GrowTo(bck, om->M, j-i+1, j-i+1);
- ddef->nregions++;
- if (is_multidomain_region(ddef, i, j))
- {
- /* This region appears to contain more than one domain, so we have to
- * resolve it by cluster analysis of posterior trace samples, to define
- * one or more domain envelopes.
- */
- ddef->nclustered++;
-
- /* Resolve the region into domains by stochastic trace
- * clustering; assign position-specific null2 model by
- * stochastic trace clustering; there is redundancy
- * here; we will consolidate later if null2 strategy
- * works
- */
- p7_oprofile_ReconfigMultihit(om, saveL, wholeSeqSz);
- p7_Forward(sq->dsq+i-1, j-i+1, om, fwd, NULL, 0, ti );
- // ! ADDED CODE !
- if( ti.cancelFlag ){ return eslCANCELED; }
-
- region_trace_ensemble(ddef, om, sq->dsq, i, j, fwd, bck, &nc);
- p7_oprofile_ReconfigUnihit(om, saveL, wholeSeqSz);
- /* ddef->n2sc is now set on i..j by the traceback-dependent method */
-
- last_j2 = 0;
- for (d = 0; d < nc; d++) {
- p7_spensemble_GetClusterCoords(ddef->sp, d, &i2, &j2, NULL, NULL, NULL);
- if (i2 <= last_j2) ddef->noverlaps++;
-
- /* Note that k..m coords on model are available, but we're currently ignoring them.
- This leads to a rare clustering bug that we eventually need to fix [xref J3/32]:
- two different regions in one profile HMM might have hit same seq domain,
- and when we now go to calculate an OA trace, nothing constrains us to find the
- two different alignments to the HMM; in fact, because OA is optimal, we'll
- find one and the *same* alignment, leading to an apparent duplicate alignment
- in the output.
- */
- ddef->nenvelopes++;
- if (rescore_isolated_domain(ddef, om, sq, fwd, bck, i2, j2, TRUE) == eslOK)
- last_j2 = j2;
- }
- p7_spensemble_Reuse(ddef->sp);
- p7_trace_Reuse(ddef->trr);
- }
- else
- {
- /* The region looks simple, single domain; convert the region to an envelope. */
- ddef->nenvelopes++;
- rescore_isolated_domain(ddef, om, sq, fwd, bck, i, j, FALSE);
- }
- i = -1;
- triggered = FALSE;
- }
- }
-
- p7_oprofile_ReconfigMultihit(om, saveL, wholeSeqSz); /* restore original profile configuration */
- return eslOK;
-}
-
-
-
-/*****************************************************************
-* 3. Internal routines
-*****************************************************************/
-
-
-/* is_multidomain_region()
-* SRE, Fri Feb 8 11:35:04 2008 [Janelia]
-*
-* This defines the trigger for when we need to hand a "region" off to
-* a deeper analysis (using stochastic tracebacks and clustering)
-* because there's reason to suspect it may encompass two or more
-* domains.
-*
-* The criterion is to find the split point z at which the expected
-* number of E occurrences preceding B occurrences is maximized, and
-* if that number is greater than the heuristic threshold <ddef->rt3>,
-* then return TRUE. In other words, we're checking to see if there's
-* any point in the region at which it looks like an E was followed by
-* a B, as expected for a multidomain interpretation of the region.
-*
-* More precisely: return TRUE if \max_z [ \min (B(z), E(z)) ] >= rt3
-* where
-* E(z) = expected number of E states occurring in region before z is emitted
-* = \sum_{y=i}^{z} eocc[i] = etot[z] - etot[i-1]
-* B(z) = expected number of B states occurring in region after z is emitted
-* = \sum_{y=z}^{j} bocc[i] = btot[j] - btot[z-1]
-*
-*
-* Because this relies on the <ddef->etot> and <ddef->btot> arrays,
-* <calculate_domain_posteriors()> needs to have been called first.
-*
-* Xref: J2/101.
-*/
-static int
-is_multidomain_region(P7_DOMAINDEF *ddef, int i, int j)
-{
- int z;
- float max;
- float expected_n;
-
- max = -1.0;
- for (z = i; z <= j; z++)
- {
- expected_n = ESL_MIN( (ddef->etot[z] - ddef->etot[i-1]), (ddef->btot[j] - ddef->btot[z-1]));
- max = ESL_MAX(max, expected_n);
- }
-
- return ( (max >= ddef->rt3) ? TRUE : FALSE);
-}
-
-
-/* region_trace_ensemble()
-* SRE, Fri Feb 8 11:49:44 2008 [Janelia]
-*
-* Here, we've decided that region <ireg>..<jreg> in sequence <dsq> might be
-* composed of more than one domain, and we're going to use clustering
-* of a posterior ensemble of stochastic tracebacks to sort it out.
-*
-* Caller provides a filled Forward matrix in <fwd> for the sequence
-* region <dsq+ireg-1>, length <jreg-ireg+1>, for the model <om>
-* configured in multihit mode with its target length distribution
-* set to the total length of <dsq>: i.e., the same model
-* configuration used to score the complete sequence (if it weren't
-* multihit, we wouldn't be worried about multiple domains).
-*
-* Caller also provides a DP matrix in <wrk> containing at least one
-* row, for use as temporary workspace. (This will typically be the
-* caller's Backwards matrix, which we haven't yet used at this point
-* in the processing pipeline.)
-*
-* Caller provides <ddef>, which defines heuristic parameters that
-* control the clustering, and provides working space for the
-* calculation and the answers. The <ddef->sp> object must have been
-* reused (i.e., it needs to be fresh; we're going to use it here);
-* the caller needs to Reuse() it specifically, because it can't just
-* Reuse() the whole <ddef>, when it's in the process of analyzing
-* regions.
-*
-* Upon return, <*ret_nc> contains the number of clusters that were
-* defined.
-*
-* The caller can retrieve info on each cluster by calling
-* <p7_spensemble_GetClusterCoords(ddef->sp...)> on the
-* <P7_SPENSEMBLE> object in <ddef>.
-*
-* Other information on what's happened in working memory:
-*
-* <ddef->n2sc[ireg..jreg]> now contains log f'(x_i) / f(x_i) null2 scores
-* for each residue.
-*
-* <ddef->sp> gets filled in, and upon return, it's holding the answers
-* (the cluster definitions). When the caller is done retrieving those
-* answers, it needs to <esl_spensemble_Reuse()> it before calling
-* <region_trace_ensemble()> again.
-*
-* <ddef->tr> is used as working memory for sampled traces.
-*
-* <wrk> has had its zero row clobbered as working space for a null2 calculation.
-*/
-static int
-region_trace_ensemble(P7_DOMAINDEF *ddef, const P7_OPROFILE *om, const ESL_DSQ *dsq, int ireg, int jreg,
- const P7_OMX *fwd, P7_OMX *wrk, int *ret_nc)
-{
- int Lr = jreg-ireg+1;
- int t, d, d2;
- int nov, n;
- int nc;
- int pos;
- float null2[p7_MAXCODE];
-
- esl_vec_FSet(ddef->n2sc+ireg, Lr, 0.0); /* zero the null2 scores in region */
-
- /* By default, we make results reproducible by forcing a reset of
- * the RNG to its originally seeded state.
- */
- if (ddef->do_reseeding)
- esl_randomness_Init(ddef->r, esl_randomness_GetSeed(ddef->r));
-
- /* Collect an ensemble of sampled traces; calculate null2 odds ratios from these */
- for (t = 0; t < ddef->nsamples; t++)
- {
- p7_StochasticTrace(ddef->r, dsq+ireg-1, Lr, om, fwd, ddef->trr);
- p7_trace_Index(ddef->trr);
-
- pos = 1;
- for (d = 0; d < ddef->trr->ndom; d++)
- {
- p7_spensemble_Add(ddef->sp, t, ddef->trr->sqfrom[d]+ireg-1, ddef->trr->sqto[d]+ireg-1, ddef->trr->hmmfrom[d], ddef->trr->hmmto[d]);
-
- p7_Null2_ByTrace(om, ddef->trr, ddef->trr->tfrom[d], ddef->trr->tto[d], wrk, null2);
-
- /* residues outside domains get bumped +1: because f'(x) = f(x), so f'(x)/f(x) = 1 in these segments */
- for (; pos <= ddef->trr->sqfrom[d]; pos++) ddef->n2sc[ireg+pos-1] += 1.0;
-
- /* Residues inside domains get bumped by their null2 ratio */
- for (; pos <= ddef->trr->sqto[d]; pos++) ddef->n2sc[ireg+pos-1] += null2[dsq[ireg+pos-1]];
- }
- /* the remaining residues in the region outside any domains get +1 */
- for (; pos <= Lr; pos++) ddef->n2sc[ireg+pos-1] += 1.0;
-
- p7_trace_Reuse(ddef->trr);
- }
-
- /* Convert the accumulated n2sc[] ratios in this region to log odds null2 scores on each residue. */
- for (pos = ireg; pos <= jreg; pos++)
- ddef->n2sc[pos] = logf(ddef->n2sc[pos] / (float) ddef->nsamples);
-
- /* Cluster the ensemble of traces to break region into envelopes. */
- p7_spensemble_Cluster(ddef->sp, ddef->min_overlap, ddef->of_smaller, ddef->max_diagdiff, ddef->min_posterior, ddef->min_endpointp, &nc);
-
- /* A little hacky now. Remove "dominated" domains relative to seq coords. */
- for (d = 0; d < nc; d++)
- ddef->sp->assignment[d] = 0; /* overload <assignment> to flag that a domain is dominated */
-
- /* who dominates who? (by post prob) */
- for (d = 0; d < nc; d++)
- {
- for (d2 = d+1; d2 < nc; d2++)
- {
- nov = ESL_MIN(ddef->sp->sigc[d].j, ddef->sp->sigc[d2].j) - ESL_MAX(ddef->sp->sigc[d].i, ddef->sp->sigc[d2].i) + 1;
- if (nov == 0) break;
- n = ESL_MIN(ddef->sp->sigc[d].j - ddef->sp->sigc[d].i + 1, ddef->sp->sigc[d2].j - ddef->sp->sigc[d2].i + 1);
- if ((float) nov / (float) n >= 0.8) /* overlap */
- {
- if (ddef->sp->sigc[d].prob > ddef->sp->sigc[d2].prob) ddef->sp->assignment[d2] = 1;
- else ddef->sp->assignment[d] = 1;
- }
- }
- }
-
- /* shrink the sigc list, removing dominated domains */
- d = 0;
- for (d2 = 0; d2 < nc; d2++)
- {
- if (ddef->sp->assignment[d2]) continue; /* skip domain d2, it's dominated. */
- if (d != d2) memcpy(ddef->sp->sigc + d, ddef->sp->sigc + d2, sizeof(struct p7_spcoord_s));
- d++;
- }
- ddef->sp->nc = d;
- *ret_nc = d;
- return eslOK;
-}
-
-/* rescore_isolated_domain()
-* SRE, Fri Feb 8 09:18:33 2008 [Janelia]
-*
-* We have isolated a single domain's envelope from <i>..<j> in
-* sequence <sq>, and now we want to score it in isolation and obtain
-* an alignment display for it.
-*
-* (Later, we can add up all the individual domain scores from this
-* seq into a new per-seq score, to compare to the original per-seq
-* score).
-*
-* The caller provides model <om> configured in unilocal mode; by
-* using unilocal (as opposed to multilocal), we're going to force the
-* identification of a single domain in this envelope now.
-*
-* The alignment is an optimal accuracy alignment (sensu IH Holmes),
-* also obtained in unilocal mode.
-*
-* The caller provides DP matrices <ox1> and <ox2> with sufficient
-* space to hold Forward and Backward calculations for this domain
-* against the model. (The caller will typically already have matrices
-* sufficient for the complete sequence lying around, and can just use
-* those.) The caller also provides a <P7_DOMAINDEF> object which is
-* (efficiently, we trust) managing any necessary temporary working
-* space and heuristic thresholds.
-*
-* Returns <eslOK> if a domain was successfully identified, scored,
-* and aligned in the envelope; if so, the per-domain information is
-* registered in <ddef>, in <ddef->dcl>.
-*
-* And here's what's happened to our working memory:
-*
-* <ddef>: <ddef->tr> has been used, and possibly reallocated, for
-* the OA trace of the domain. Before exit, we called
-* <Reuse()> on it.
-*
-* <ox1> : happens to be holding OA score matrix for the domain
-* upon return, but that's not part of the spec; officially
-* its contents are "undefined".
-*
-* <ox2> : happens to be holding a posterior probability matrix
-* for the domain upon return, but we're not making that
-* part of the spec, so caller shouldn't rely on this;
-* spec just makes its contents "undefined".
-*/
-static int
-rescore_isolated_domain(P7_DOMAINDEF *ddef, const P7_OPROFILE *om, const ESL_SQ *sq,
- P7_OMX *ox1, P7_OMX *ox2, int i, int j, int null2_is_done)
-{
- P7_DOMAIN *dom = NULL;
- int Ld = j-i+1;
- float domcorrection = 0.0;
- float envsc, oasc;
- int z;
- int pos;
- float null2[p7_MAXCODE];
- int status;
-
- U2::TaskStateInfo tmpTi;
- p7_Forward (sq->dsq + i-1, Ld, om, ox1, &envsc, 0, tmpTi );
- p7_Backward(sq->dsq + i-1, Ld, om, ox1, ox2, NULL, 0, tmpTi );
-
- status = p7_Decoding(om, ox1, ox2, ox2); /* <ox2> is now overwritten with post probabilities */
- if (status == eslERANGE) return eslFAIL; /* rare: numeric overflow; domain is assumed to be repetitive garbage [J3/119-212] */
-
- /* Is null2 set already for this i..j? (It is, if we're in a domain that
- * was defined by stochastic traceback clustering in a multidomain region;
- * it isn't yet, if we're in a simple one-domain region). If it isn't,
- * do it now, by the expectation (posterior decoding) method.
- */
- if (! null2_is_done) {
- p7_Null2_ByExpectation(om, ox2, null2);
- for (pos = i; pos <= j; pos++) {
- //ddef->n2sc[pos] = logf(null2[sq->dsq[pos]]);
- ddef->n2sc[pos] = log((double)null2[sq->dsq[pos]]);
- }
- }
-
- for (pos = i; pos <= j; pos++)
- domcorrection += ddef->n2sc[pos];
-
- /* Find an optimal accuracy alignment */
- p7_OptimalAccuracy(om, ox2, ox1, &oasc); /* <ox1> is now overwritten with OA scores */
- p7_OATrace (om, ox2, ox1, ddef->trr); /* <tr>'s seq coords are offset by i-1, rel to orig dsq */
-
- /* hack the trace's sq coords to be correct w.r.t. original dsq */
- for (z = 0; z < ddef->trr->N; z++)
- if (ddef->trr->i[z] > 0) ddef->trr->i[z] += i-1;
-
- /* get ptr to next empty domain structure in domaindef's results */
- if (ddef->ndom == ddef->nalloc) {
- void *p;
- ESL_RALLOC_WITH_TYPE(ddef->dcl, P7_DOMAIN*, p, sizeof(P7_DOMAIN) * (ddef->nalloc*2));
- ddef->nalloc *= 2;
- }
- dom = &(ddef->dcl[ddef->ndom]);
-
- /* store the results in it */
- dom->ienv = i;
- dom->jenv = j;
- dom->envsc = envsc;
- dom->domcorrection = domcorrection;
- dom->oasc = oasc;
- dom->dombias = 0.0; /* gets set later, using bg->omega and dombias */
- dom->bitscore = 0.0; /* gets set later by caller, using envsc, null score, and dombias */
- dom->pvalue = 1.0; /* gets set later by caller, using bitscore */
- dom->is_reported = FALSE; /* gets set later by caller */
- dom->is_included = FALSE; /* gets set later by caller */
- dom->ad = p7_alidisplay_Create(ddef->trr, 0, om, sq);
- dom->iali = dom->ad->sqfrom;
- dom->jali = dom->ad->sqto;
-
- ddef->ndom++;
-
- p7_trace_Reuse(ddef->trr);
- return eslOK;
-
-ERROR:
- p7_trace_Reuse(ddef->trr);
- return status;
-}
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_gmx.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/p7_gmx.cpp
deleted file mode 100644
index 2e6bce6..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_gmx.cpp
+++ /dev/null
@@ -1,305 +0,0 @@
-/* P7_GMX implementation: a generic dynamic programming matrix
-*
-* Contents:
-* 1. The <P7_GMX> object
-* 2. Debugging aids
-* 5. Copyright and license information
-*
-* SRE, Tue Jan 30 11:14:11 2007 [Einstein's, in St. Louis]
- * SVN $Id: p7_gmx.c 3048 2009-11-13 14:11:46Z eddys $
-*/
-
-#include "p7_config.h"
-#include "hmmer.h"
-
-/*****************************************************************
-*= 1. The <P7_GMX> object.
-*****************************************************************/
-
-/* Function: p7_gmx_Create()
-* Incept: SRE, Tue Jan 30 11:20:33 2007 [Einstein's, in St. Louis]
-*
-* Purpose: Allocate a reusable, resizeable <P7_GMX> for models up to
-* size <allocM> and sequences up to length <allocL>.
-*
-* We've set this up so it should be easy to allocate
-* aligned memory, though we're not doing this yet.
-*
-* Returns: a pointer to the new <P7_GMX>.
-*
-* Throws: <NULL> on allocation error.
-*/
-P7_GMX *
-p7_gmx_Create(int allocM, int allocL)
-{
- int status;
- P7_GMX *gx = NULL;
- int i;
-
- /* level 1: the structure itself */
- ESL_ALLOC_WITH_TYPE(gx, P7_GMX*, sizeof(P7_GMX));
- gx->dp = NULL;
- gx->xmx = NULL;
- gx->dp_mem = NULL;
-
- /* level 2: row pointers, 0.1..L; and dp cell memory */
- ESL_ALLOC_WITH_TYPE(gx->dp, float**, sizeof(float *) * (allocL+1));
- ESL_ALLOC_WITH_TYPE(gx->xmx, float*, sizeof(float) * (allocL+1) * p7G_NXCELLS);
- ESL_ALLOC_WITH_TYPE(gx->dp_mem, float*, sizeof(float) * (allocL+1) * (allocM+1) * p7G_NSCELLS);
-
- /* Set the row pointers. */
- for (i = 0; i <= allocL; i++)
- gx->dp[i] = gx->dp_mem + i * (allocM+1) * p7G_NSCELLS;
-
- /* Initialize memory that's allocated but unused, only to keep
- * valgrind and friends happy.
- */
- for (i = 0; i <= allocL; i++)
- {
- gx->dp[i][0 * p7G_NSCELLS + p7G_M] = -eslINFINITY; /* M_0 */
- gx->dp[i][0 * p7G_NSCELLS + p7G_I] = -eslINFINITY; /* I_0 */
- gx->dp[i][0 * p7G_NSCELLS + p7G_D] = -eslINFINITY; /* D_0 */
- gx->dp[i][1 * p7G_NSCELLS + p7G_D] = -eslINFINITY; /* D_1 */
- gx->dp[i][allocM * p7G_NSCELLS + p7G_I] = -eslINFINITY; /* I_M */
- }
-
- gx->M = 0;
- gx->L = 0;
- gx->allocW = allocM+1;
- gx->allocR = allocL+1;
- gx->validR = allocL+1;
- gx->ncells = (uint64_t) (allocM+1)* (uint64_t) (allocL+1);
- return gx;
-
-ERROR:
- if (gx != NULL) p7_gmx_Destroy(gx);
- return NULL;
-}
-
-/* Function: p7_gmx_GrowTo()
-* Synopsis: Assure that DP matrix is big enough.
-* Incept: SRE, Tue Jan 30 11:31:23 2007 [Olin Library, St. Louis]
-*
-* Purpose: Assures that a DP matrix <gx> is allocated
-* for a model of size up to <M> and a sequence of
-* length up to <L>; reallocates if necessary.
-*
-* This function does not respect the configured
-* <RAMLIMIT>; it will allocate what it's told to
-* allocate.
-*
-* Returns: <eslOK> on success, and <gx> may be reallocated upon
-* return; any data that may have been in <gx> must be
-* assumed to be invalidated.
-*
-* Throws: <eslEMEM> on allocation failure, and any data that may
-* have been in <gx> must be assumed to be invalidated.
-*/
-int
-p7_gmx_GrowTo(P7_GMX *gx, int M, int L)
-{
- int status;
- void *p;
- int i;
- uint64_t ncells;
- int do_reset = FALSE;
-
- if (M < gx->allocW && L < gx->validR) return eslOK;
-
- /* must we realloc the 2D matrices? (or can we get away with just
- * jiggering the row pointers, if we are growing in one dimension
- * while shrinking in another?)
- */
- ncells = (uint64_t) (M+1) * (uint64_t) (L+1);
- if (ncells > gx->ncells)
- {
- ESL_RALLOC_WITH_TYPE(gx->dp_mem, float*, p, sizeof(float) * ncells * p7G_NSCELLS);
- gx->ncells = ncells;
- do_reset = TRUE;
- }
-
- /* must we reallocate the row pointers? */
- if (L >= gx->allocR)
- {
- ESL_RALLOC_WITH_TYPE(gx->xmx, float*, p, sizeof(float) * (L+1) * p7G_NXCELLS);
- ESL_RALLOC_WITH_TYPE(gx->dp, float**, p, sizeof(float *) * (L+1));
- gx->allocR = L+1;
- gx->allocW = M+1;
- do_reset = TRUE;
- }
-
- /* must we widen the rows? */
- if (M >= gx->allocW)
- {
- gx->allocW = M+1;
- do_reset = TRUE;
- }
-
- /* must we set some more valid row pointers? */
- if (L >= gx->validR)
- do_reset = TRUE;
-
- /* reset all the row pointers.*/
- if (do_reset)
- {
- gx->validR = ESL_MIN(gx->ncells / gx->allocW, gx->allocR);
- for (i = 0; i < gx->validR; i++)
- gx->dp[i] = gx->dp_mem + i * (gx->allocW) * p7G_NSCELLS;
- }
-
- gx->M = 0;
- gx->L = 0;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-/* Function: p7_gmx_Reuse()
- * Synopsis: Recycle a generic DP matrix.
- * Incept: SRE, Fri Nov 13 08:48:52 2009 [Janelia]
- *
- * Purpose: Recycles <gx> for reuse.
- *
- * Returns: <eslOK> on success.
- */
-int
-p7_gmx_Reuse(P7_GMX *gx)
-{
- /* not much to do here. The memory rearrangement for a new seq is all in GrowTo(). */
- gx->M = 0;
- gx->L = 0;
- return eslOK;
-}
-
-
-/* Function: p7_gmx_Destroy()
-* Synopsis: Frees a DP matrix.
-* Incept: SRE, Tue Jan 30 11:17:36 2007 [Einstein's, in St. Louis]
-*
-* Purpose: Frees a <P7_GMX>.
-*
-* Returns: (void)
-*/
-void
-p7_gmx_Destroy(P7_GMX *gx)
-{
- if (gx == NULL) return;
-
- if (gx->dp != NULL) free(gx->dp);
- if (gx->xmx != NULL) free(gx->xmx);
- if (gx->dp_mem != NULL) free(gx->dp_mem);
- free(gx);
- return;
-}
-
-/*****************************************************************
-* 2. Debugging aids
-*****************************************************************/
-
-/* Function: p7_gmx_Compare()
-* Synopsis: Compare two DP matrices for equality within given tolerance.
-* Incept: SRE, Sat May 16 09:56:41 2009 [Janelia]
-*
-* Purpose: Compare all the values in DP matrices <gx1> and <gx2> using
-* <esl_FCompare()> and relative epsilon <tolerance>. If any
-* value pairs differ by more than the acceptable <tolerance>
-* return <eslFAIL>. If all value pairs are identical within
-* tolerance, return <eslOK>.
-*/
-int
-p7_gmx_Compare(P7_GMX *gx1, P7_GMX *gx2, float tolerance)
-{
- int i,k,x;
- if (gx1->M != gx2->M) return eslFAIL;
- if (gx1->L != gx2->L) return eslFAIL;
-
- for (i = 0; i <= gx1->L; i++)
- {
- for (k = 1; k <= gx1->M; k++) /* k=0 is a boundary; doesn't need to be checked */
- {
- if (esl_FCompare(gx1->dp[i][k * p7G_NSCELLS + p7G_M], gx2->dp[i][k * p7G_NSCELLS + p7G_M], tolerance) != eslOK) return eslFAIL;
- if (esl_FCompare(gx1->dp[i][k * p7G_NSCELLS + p7G_I], gx2->dp[i][k * p7G_NSCELLS + p7G_I], tolerance) != eslOK) return eslFAIL;
- if (esl_FCompare(gx1->dp[i][k * p7G_NSCELLS + p7G_D], gx2->dp[i][k * p7G_NSCELLS + p7G_D], tolerance) != eslOK) return eslFAIL;
- }
- for (x = 0; x < p7G_NXCELLS; x++)
- if (esl_FCompare(gx1->xmx[i * p7G_NXCELLS + x], gx2->xmx[i * p7G_NXCELLS + x], tolerance) != eslOK) return eslFAIL;
- }
- return eslOK;
-}
-
-
-
-/* Function: p7_gmx_Dump()
-* Synopsis: Dump a DP matrix to a stream, for diagnostics.
-* Incept: SRE, Fri Jul 13 09:56:04 2007 [Janelia]
-*
-* Purpose: Dump matrix <gx> to stream <fp> for diagnostics.
-*/
-int
-p7_gmx_Dump(FILE *ofp, P7_GMX *gx)
-{
- return p7_gmx_DumpWindow(ofp, gx, 0, gx->L, 0, gx->M, TRUE);
-}
-
-
-/* Function: p7_gmx_DumpWindow()
-* Synopsis: Dump a window of a DP matrix to a stream for diagnostics.
-* Incept: SRE, Mon Apr 14 08:45:28 2008 [Janelia]
-*
-* Purpose: Dump a window of matrix <gx> to stream <fp> for diagnostics,
-* from row <istart> to <iend>, from column <kstart> to <kend>.
-*
-* If <show_specials> is <TRUE>, scores for the special
-* <ENJBC> states are also displayed.
-*
-* Asking for <0..L,0..M> with <show_specials=TRUE> is the
-* same as <p7_gmx_Dump()>.
-*
-* Returns: <eslOK> on success.
-*/
-int
-p7_gmx_DumpWindow(FILE *ofp, P7_GMX *gx, int istart, int iend, int kstart, int kend, int show_specials)
-{
- int i, k, x;
- int width = 9;
- int precision = 4;
-
- /* Header */
- fprintf(ofp, " ");
- for (k = kstart; k <= kend; k++) fprintf(ofp, "%*d ", width, k);
- if (show_specials) fprintf(ofp, "%*s %*s %*s %*s %*s\n", width, "E", width, "N", width, "J", width, "B", width, "C");
- fprintf(ofp, " ");
- for (k = kstart; k <= kend; k++) fprintf(ofp, "%*.*s ", width, width, "----------");
- if (show_specials) fprintf(ofp, "%*.*s ", width, width, "----------");
- fprintf(ofp, "\n");
-
- /* DP matrix data */
- for (i = istart; i <= iend; i++)
- {
- fprintf(ofp, "%3d M ", i);
- for (k = kstart; k <= kend; k++) fprintf(ofp, "%*.*f ", width, precision, gx->dp[i][k * p7G_NSCELLS + p7G_M]);
- if (show_specials)
- for (x = 0; x < p7G_NXCELLS; x++) fprintf(ofp, "%*.*f ", width, precision, gx->xmx[ i * p7G_NXCELLS + x]);
- fprintf(ofp, "\n");
-
- fprintf(ofp, "%3d I ", i);
- for (k = kstart; k <= kend; k++) fprintf(ofp, "%*.*f ", width, precision, gx->dp[i][k * p7G_NSCELLS + p7G_I]);
- fprintf(ofp, "\n");
-
- fprintf(ofp, "%3d D ", i);
- for (k = kstart; k <= kend; k++) fprintf(ofp, "%*.*f ", width, precision, gx->dp[i][k * p7G_NSCELLS + p7G_D]);
- fprintf(ofp, "\n\n");
- }
- return eslOK;
-}
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_hmm.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/p7_hmm.cpp
deleted file mode 100644
index 1b9d802..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_hmm.cpp
+++ /dev/null
@@ -1,1222 +0,0 @@
-/* The Plan7 core HMM data structure.
-*
-* Contents:
-* 1. The P7_HMM object: allocation, initialization, destruction.
-* 2. Convenience routines for setting fields in an HMM.
-* 3. Renormalization and rescaling counts in core HMMs.
-* 4. Debugging and development code.
-* 5. Other routines in the API.
-* 8. Copyright and license.
-*
-* SRE, Mon Jan 1 16:20:29 2007 [Casa de Gatos] [Verdi, La Traviata]
- * SVN $Id: p7_hmm.c 3051 2009-11-15 13:50:20Z eddys $
-*/
-#include <hmmer3/p7_config.h>
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <time.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_alphabet.h>
-#include <hmmer3/easel/esl_vectorops.h>
-#include <hmmer3/easel/esl_random.h>
-#include <hmmer3/easel/esl_alphabet.h>
-
-#include <hmmer3/hmmer.h>
-
-
-
-
-/*****************************************************************
-*# 1. The P7_HMM object: allocation, initialization, destruction.
-*****************************************************************/
-
-/* Function: p7_hmm_Create()
-* Incept: SRE, Fri Mar 31 14:07:43 2006 [St. Louis]
-*
-* Purpose: Allocate a <P7_HMM> of <M> nodes, for symbol
-* alphabet <abc>, and return a pointer to it.
-*
-* The HMM only keeps a copy of the <abc> alphabet
-* pointer. The caller is responsible for providing the
-* alphabet, keeping it around while the HMM is in use,
-* and (eventually) free'ing the alphabet when it's
-* not needed any more. (Basically, just a step removed
-* from keeping the alphabet as a global.)
-*
-* Throws: <NULL> on allocation failure.
-*/
-// ! HMM CREATION CODE CHANGED !
-P7_HMM *
-p7_hmm_Create(int M, const ESL_ALPHABET *abc)
-{
- P7_HMM *hmm = NULL;
-
- if ((hmm = p7_hmm_CreateShell()) == NULL) return NULL;
- p7_hmm_CreateBody(hmm, M, abc->type);
- return hmm;
-}
-
-P7_HMM *
-p7_hmm_Create(int M, int alType, int flags )
-{
- P7_HMM *hmm = NULL;
-
- if ((hmm = p7_hmm_CreateShell()) == NULL) return NULL;
- hmm->flags = flags;
- p7_hmm_CreateBody(hmm, M, alType );
- return hmm;
-}
-
-
-/* Function: p7_hmm_CreateShell()
-* Incept: SRE, Fri Mar 31 14:09:45 2006 [St. Louis]
-*
-* Purpose: Allocate the shell of a <P7_HMM>: everything that
-* doesn't depend on knowing the number of nodes M.
-*
-* HMM input (<hmmio.c>) uses two-step shell/body
-* allocation because it has to read for a ways from the
-* HMM file before it reads the model size M or the
-* alphabet type.
-*
-* Returns: a pointer to the new <P7_HMM> on success.
-*
-* Throws: <NULL> on allocation failure.
-*/
-P7_HMM *
-p7_hmm_CreateShell(void)
-{
- P7_HMM *hmm = NULL;
- int z;
- int status;
-
- ESL_ALLOC_WITH_TYPE(hmm, P7_HMM*, sizeof(P7_HMM));
- hmm->M = 0;
- hmm->t = NULL;
- hmm->mat = NULL;
- hmm->ins = NULL;
-
- hmm->name = NULL;
- hmm->acc = NULL;
- hmm->desc = NULL;
- hmm->rf = NULL;
- hmm->cs = NULL;
- hmm->ca = NULL;
- hmm->comlog = NULL;
- hmm->nseq = -1;
- hmm->eff_nseq = -1.0;
- hmm->ctime = NULL;
- hmm->map = NULL;
- hmm->checksum = 0;
-
- for (z = 0; z < p7_NCUTOFFS; z++) hmm->cutoff[z] = p7_CUTOFF_UNSET;
- for (z = 0; z < p7_NEVPARAM; z++) hmm->evparam[z] = p7_EVPARAM_UNSET;
- for (z = 0; z < p7_MAXABET; z++) hmm->compo[z] = p7_COMPO_UNSET;
-
- hmm->offset = 0;
- hmm->flags = 0;
- hmm->abc = NULL;
- return hmm;
-
-ERROR:
- return NULL;
-}
-
-/* Function: p7_hmm_CreateBody()
-* Incept: SRE, Fri Mar 31 14:24:44 2006 [St. Louis]
-*
-* Purpose: Given an allocated shell <hmm>, and a now-known number
-* of nodes <M> and alphabet <abc>, allocate
-* the remainder of it for that many nodes.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure; in this case, the HMM
-* is likely corrupted, and the caller should destroy it.
-*/
-int
-p7_hmm_CreateBody(P7_HMM *hmm, int M, int alType )
-{
- int k;
- int status;
-
- hmm->abc = esl_alphabet_Create( alType );
-
- hmm->M = M;
-
- /* level 1 */
- ESL_ALLOC_WITH_TYPE(hmm->t, float**, (M+1) * sizeof(float *));
- ESL_ALLOC_WITH_TYPE(hmm->mat, float**, (M+1) * sizeof(float *));
- ESL_ALLOC_WITH_TYPE(hmm->ins, float**, (M+1) * sizeof(float *));
- hmm->t[0] = NULL;
- hmm->mat[0] = NULL;
- hmm->ins[0] = NULL;
-
- /* level 2 */
- ESL_ALLOC_WITH_TYPE(hmm->t[0], float*, (p7H_NTRANSITIONS*(M+1)) * sizeof(float));
- ESL_ALLOC_WITH_TYPE(hmm->mat[0], float*, (hmm->abc->K*(M+1)) * sizeof(float));
- ESL_ALLOC_WITH_TYPE(hmm->ins[0], float*, (hmm->abc->K*(M+1)) * sizeof(float));
- for (k = 1; k <= M; k++) {
- hmm->mat[k] = hmm->mat[0] + k * hmm->abc->K;
- hmm->ins[k] = hmm->ins[0] + k * hmm->abc->K;
- hmm->t[k] = hmm->t[0] + k * p7H_NTRANSITIONS;
- }
-
- /* Enforce conventions on unused but allocated distributions, so
- * Compare() tests succeed unless memory was corrupted.
- */
- if ((status = p7_hmm_Zero(hmm)) != eslOK) goto ERROR;
- hmm->mat[0][0] = 1.0;
- hmm->t[0][p7H_DM] = 1.0;
-
- /* Optional allocation, status flag dependent */
- if (hmm->flags & p7H_RF) ESL_ALLOC_WITH_TYPE(hmm->rf, char*, (M+2) * sizeof(char));
- if (hmm->flags & p7H_CS) ESL_ALLOC_WITH_TYPE(hmm->cs, char*, (M+2) * sizeof(char));
- if (hmm->flags & p7H_CA) ESL_ALLOC_WITH_TYPE(hmm->ca, char*, (M+2) * sizeof(char));
- if (hmm->flags & p7H_MAP) ESL_ALLOC_WITH_TYPE(hmm->map, int*, (M+1) * sizeof(int));
-
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-/* Function: p7_hmm_Destroy()
-* Incept: SRE, Fri Mar 31 15:13:25 2006 [St. Louis]
-*
-* Purpose: Frees both the shell and body of an <hmm>.
-* Works even if the <hmm> is damaged (incompletely allocated)
-* or even <NULL>.
-*
-* Note: Remember, leave reference pointers like abc, gm, and
-* bg alone. These are under the application's control not ours.
-*
-* Returns: (void).
-*/
-void
-p7_hmm_Destroy(P7_HMM *hmm)
-{
- if (hmm == NULL) return;
-
- if (hmm->mat != NULL) {
- if (hmm->mat[0] != NULL) free(hmm->mat[0]);
- free(hmm->mat);
- }
- if (hmm->ins != NULL) {
- if (hmm->ins[0] != NULL) free(hmm->ins[0]);
- free(hmm->ins);
- }
- if (hmm->t != NULL) {
- if (hmm->t[0] != NULL) free(hmm->t[0]);
- free(hmm->t);
- }
-
- if (hmm->name != NULL) free(hmm->name);
- if (hmm->acc != NULL) free(hmm->acc);
- if (hmm->desc != NULL) free(hmm->desc);
- if (hmm->rf != NULL) free(hmm->rf);
- if (hmm->cs != NULL) free(hmm->cs);
- if (hmm->ca != NULL) free(hmm->ca);
- if (hmm->comlog != NULL) free(hmm->comlog);
- if (hmm->ctime != NULL) free(hmm->ctime);
- if (hmm->map != NULL) free(hmm->map);
- // ! CODE ADDED: !
- if (hmm->abc != NULL) esl_alphabet_Destroy(hmm->abc);
-
- free(hmm);
- return;
-}
-
-/* Function: p7_hmm_CopyParameters()
-* Incept: SRE, Fri May 4 14:10:17 2007 [Janelia]
-*
-* Purpose: Copy parameters of <src> to <dest>. The HMM <dest> must
-* be allocated by the caller for the same
-* alphabet and M as <src>.
-*
-* Both core and search model parameters are copied.
-*
-* No annotation is copied. This is because several
-* annotation fields are variable-length strings that
-* require individual allocations. The
-* <p7_hmm_CopyParameters()> function is for cases where we
-* have to repeatedly reset the parameters of a model - for
-* example, in entropy weighting.
-*
-* Returns: <eslOK> on success.
-*/
-int
-p7_hmm_CopyParameters(const P7_HMM *src, P7_HMM *dest)
-{
- int k;
- for (k = 0; k <= src->M; k++) {
- esl_vec_FCopy(src->t[k], p7H_NTRANSITIONS, dest->t[k]);
- esl_vec_FCopy(src->mat[k], src->abc->K, dest->mat[k]);
- esl_vec_FCopy(src->ins[k], src->abc->K, dest->ins[k]);
- }
- return eslOK;
-}
-
-/* Function: p7_hmm_Clone()
-* Incept: SRE, Fri Jan 26 15:34:42 2007 [Janelia]
-*
-* Purpose: Duplicates an hmm.
-*
-* Note: does not duplicate the objects the HMM refers to,
-* if any (profile, null model, or alphabet); only copies
-* the reference pointers.
-*
-* Returns: a pointer to the duplicate.
-*
-* Throws: <NULL> on allocation failure.
-*/
-P7_HMM *
-p7_hmm_Clone(const P7_HMM *hmm)
-{
- int status;
- P7_HMM *newHMM = NULL;
- int z;
-
- if ((newHMM = p7_hmm_Create(hmm->M, hmm->abc)) == NULL) goto ERROR;
- p7_hmm_CopyParameters(hmm, newHMM);
-
- if ((status = esl_strdup(hmm->name, -1, &(newHMM->name))) != eslOK) goto ERROR;
- if ((status = esl_strdup(hmm->acc, -1, &(newHMM->acc))) != eslOK) goto ERROR;
- if ((status = esl_strdup(hmm->desc, -1, &(newHMM->desc))) != eslOK) goto ERROR;
-
- if ((hmm->flags & p7H_RF) && (status = esl_strdup(hmm->rf, -1, &(newHMM->rf))) != eslOK) goto ERROR;
- if ((hmm->flags & p7H_CS) && (status = esl_strdup(hmm->cs, -1, &(newHMM->cs))) != eslOK) goto ERROR;
- if ((hmm->flags & p7H_CA) && (status = esl_strdup(hmm->ca, -1, &(newHMM->ca))) != eslOK) goto ERROR;
- if ((hmm->comlog != NULL) && (status = esl_strdup(hmm->comlog, -1, &(newHMM->comlog))) != eslOK) goto ERROR;
- if ((hmm->ctime != NULL) && (status = esl_strdup(hmm->ctime, -1, &(newHMM->ctime))) != eslOK) goto ERROR;
- if (hmm->flags & p7H_MAP) {
- ESL_ALLOC_WITH_TYPE(newHMM->map, int*, sizeof(int) * (hmm->M+1));
- esl_vec_ICopy(hmm->map, hmm->M+1, newHMM->map);
- }
- newHMM->nseq = hmm->nseq;
- newHMM->eff_nseq = hmm->eff_nseq;
- newHMM->checksum = hmm->checksum;
-
- for (z = 0; z < p7_NEVPARAM; z++) newHMM->evparam[z] = hmm->evparam[z];
- for (z = 0; z < p7_NCUTOFFS; z++) newHMM->cutoff[z] = hmm->cutoff[z];
- for (z = 0; z < p7_MAXABET; z++) newHMM->compo[z] = hmm->compo[z];
-
- newHMM->offset = hmm->offset;
- newHMM->flags = hmm->flags;
- // ! CODE CHANGED: creating new alphabet, because in ugene hmm need to store its own hmm !
- newHMM->abc = esl_alphabet_Create( hmm->abc->type );
- return newHMM;
-
-ERROR:
- if (newHMM != NULL) p7_hmm_Destroy(newHMM);
- return NULL;
-}
-
-/* Function: p7_hmm_Scale()
-* Incept: SRE, Fri May 4 14:19:33 2007 [Janelia]
-*
-* Purpose: Given a counts-based model <hmm>, scale core
-* by a multiplicative factor of <scale>. Used in
-* absolute sequence weighting.
-*
-* Returns: <eslOK> on success.
-*/
-int
-p7_hmm_Scale(P7_HMM *hmm, double scale)
-{
- int k;
-
- for (k = 0; k <= hmm->M; k++) {
- esl_vec_FScale(hmm->t[k], p7H_NTRANSITIONS, scale);
- esl_vec_FScale(hmm->mat[k], hmm->abc->K, scale);
- esl_vec_FScale(hmm->ins[k], hmm->abc->K, scale);
- }
- return eslOK;
-}
-
-
-/* Function: p7_hmm_Zero()
-* Incept: SRE, Mon Jan 1 16:32:59 2007 [Casa de Gatos]
-*
-* Purpose: Zeroes all counts/probabilities fields in core model,
-* including emissions, transitions, and model
-* composition.
-*
-* Returns: <eslOK> on success.
-*/
-int
-p7_hmm_Zero(P7_HMM *hmm)
-{
- int k;
-
- for (k = 0; k <= hmm->M; k++) {
- esl_vec_FSet(hmm->t[k], p7H_NTRANSITIONS, 0.);
- esl_vec_FSet(hmm->mat[k], hmm->abc->K, 0.);
- esl_vec_FSet(hmm->ins[k], hmm->abc->K, 0.);
- }
- esl_vec_FSet(hmm->compo, p7_MAXABET, 0.);
- return eslOK;
-}
-
-
-
-/* Function: p7_hmm_EncodeStatetype()
-* Synopsis: Convert a state type string to internal code.
-* Incept: SRE, Sat Oct 25 10:48:43 2008 [Janelia]
-*
-* Purpose: Converts state type string <typestring> case insensitively to
-* an internal code, and returns the code. For example,
-* <p7_hmm_DecodeStatetype("M")> returns <p7T_M>.
-*
-* If the string isn't recognized, returns <p7T_BOGUS>.
-*/
-char
-p7_hmm_EncodeStatetype(char *typestring)
-{
- if (esl_strcasecmp(typestring, "M") == 0) return p7T_M;
- else if (esl_strcasecmp(typestring, "D") == 0) return p7T_D;
- else if (esl_strcasecmp(typestring, "I") == 0) return p7T_I;
- else if (esl_strcasecmp(typestring, "S") == 0) return p7T_S;
- else if (esl_strcasecmp(typestring, "N") == 0) return p7T_N;
- else if (esl_strcasecmp(typestring, "B") == 0) return p7T_B;
- else if (esl_strcasecmp(typestring, "E") == 0) return p7T_E;
- else if (esl_strcasecmp(typestring, "C") == 0) return p7T_C;
- else if (esl_strcasecmp(typestring, "T") == 0) return p7T_T;
- else if (esl_strcasecmp(typestring, "J") == 0) return p7T_J;
- else if (esl_strcasecmp(typestring, "X") == 0) return p7T_X;
- else return p7T_BOGUS;
-}
-
-/* Function: p7_hmm_DecodeStatetype()
-* Incept: SRE, Mon Jan 1 18:47:34 2007 [Casa de Gatos]
-*
-* Purpose: Returns the state type in text, as a string of length 1
-* (2 if you count <NUL>). For example, <p7_DecodeStatetype(p7T_S)>
-* returns "S".
-*
-* Throws: an internal <eslEINVAL> exception if the code doesn't
-* exist, and returns <NULL>.
-*/
-char *
-p7_hmm_DecodeStatetype(char st)
-{
- switch (st) {
- case p7T_M: return "M";
- case p7T_D: return "D";
- case p7T_I: return "I";
- case p7T_S: return "S";
- case p7T_N: return "N";
- case p7T_B: return "B";
- case p7T_E: return "E";
- case p7T_C: return "C";
- case p7T_T: return "T";
- case p7T_J: return "J";
- case p7T_X: return "X";
- default: break;
- }
- esl_exception(eslEINVAL, __FILE__, __LINE__, "no such statetype code %d", st);
- return NULL;
-}
-
-
-
-
-/*****************************************************************
-* 2. Convenience routines for setting fields in an HMM.
-*****************************************************************/
-
-/* Function: p7_hmm_SetName()
-* Incept: SRE, Mon Jan 1 16:53:23 2007 [Casa de Gatos]
-*
-* Purpose: Set or change the name of a Plan7 HMM to <name>.
-* Any trailing whitespace (including newline) is chopped off.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation error, and original name (if any)
-* remains.
-*/
-int
-p7_hmm_SetName(P7_HMM *hmm, char *name)
-{
- int status;
- void *tmp;
- int n;
-
- if (name == NULL) {
- if (hmm->name != NULL) free(hmm->name);
- hmm->name = NULL;
- } else {
- n = strlen(name);
- ESL_RALLOC_WITH_TYPE(hmm->name, char*, tmp, sizeof(char)*(n+1));
- strcpy(hmm->name, name);
- if ((status = esl_strchop(hmm->name, n)) != eslOK) goto ERROR;
- }
- return eslOK;
-
-ERROR:
- return status;
-}
-
-/* Function: p7_hmm_SetAccession()
-* Incept: SRE, Mon Jan 1 16:53:53 2007 [Casa de Gatos]
-*
-* Purpose: Set or change the accession number of a Plan7 HMM to <acc>,
-* and raise the <P7_ACC> flag. Trailing whitespace (including newline)
-* is chopped.
-*
-* If <acc> is <NULL>, unset the HMM's accession (if any) and drop
-* the <P7_ACC> flag.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation error, and original name (if any)
-* remains.
-*/
-int
-p7_hmm_SetAccession(P7_HMM *hmm, char *acc)
-{
- int status;
- void *tmp;
- int n;
-
- if (acc == NULL) {
- if (hmm->acc != NULL) free(hmm->acc);
- hmm->acc = NULL;
- hmm->flags &= ~p7H_ACC;
- } else {
- n = strlen(acc);
- ESL_RALLOC_WITH_TYPE(hmm->acc, char*, tmp, sizeof(char)*(n+1));
- strcpy(hmm->acc, acc);
- if ((status = esl_strchop(hmm->acc, n)) != eslOK) goto ERROR;
- hmm->flags |= p7H_ACC;
- }
- return eslOK;
-
-ERROR:
- return status;
-}
-
-/* Function: p7_hmm_SetDescription()
-* Incept: SRE, Mon Jan 1 16:59:28 2007 [Casa de Gatos]
-*
-* Purpose: Set or change the description line of a Plan7 HMM.
-* Trailing whitespace (including newline) is chopped.
-*/
-int
-p7_hmm_SetDescription(P7_HMM *hmm, char *desc)
-{
- int status;
- void *tmp;
- int n;
-
- if (desc == NULL)
- {
- if (hmm->desc != NULL) free(hmm->desc);
- hmm->desc = NULL;
- hmm->flags &= ~p7H_DESC;
- }
- else
- {
- n = strlen(desc);
- ESL_RALLOC_WITH_TYPE(hmm->desc, char*, tmp, sizeof(char)*(n+1));
- strcpy(hmm->desc, desc);
- if ((status = esl_strchop(hmm->desc, n)) != eslOK) goto ERROR;
- hmm->flags |= p7H_DESC;
- }
- return eslOK;
-
-ERROR:
- return status;
-}
-
-/* Function: p7_hmm_AppendComlog()
-* Incept: SRE, Mon Jan 1 18:23:42 2007 [Casa de Gatos]
-*
-* Purpose: Concatenate command line options and append as a new line in the
-* command line log. Command line log is multiline, with each line
-* ending in newline char, except for last line.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure.
-*/
-int
-p7_hmm_AppendComlog(P7_HMM *hmm, int argc, char **argv)
-{
- int status;
- void *tmp;
- int n;
- int i;
-
- /* figure out length of added command line, and (re)allocate comlog */
- n = argc-1; /* account for 1 space per arg, except last one */
- for (i = 0; i < argc; i++)
- n += strlen(argv[i]);
-
- if (hmm->comlog != NULL) {
- n += strlen(hmm->comlog) + 1; /* +1 for the \n we're going to add to the old comlog */
- ESL_RALLOC_WITH_TYPE(hmm->comlog, char*, tmp, sizeof(char)* (n+1));
- strcat(hmm->comlog, "\n");
- } else {
- ESL_ALLOC_WITH_TYPE(hmm->comlog, char*, sizeof(char)* (n+1));
- *(hmm->comlog) = '\0'; /* need this to make strcat work */
- }
-
- for (i = 0; i < argc-1; i++)
- {
- strcat(hmm->comlog, argv[i]);
- strcat(hmm->comlog, " ");
- }
- strcat(hmm->comlog, argv[argc-1]);
- return eslOK;
-
-ERROR:
- return status;
-}
-
-/* Function: p7_hmm_SetCtime()
-* Date: SRE, Wed Oct 29 11:53:19 1997 [TWA 721 over the Atlantic]
-*
-* Purpose: Set the <ctime> field in a new HMM to the current time.
-*
-* This function is not reentrant and not threadsafe, because
-* it calls the nonreentrant ANSI C ctime() function.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure. <eslESYS> if the <time()>
-* system call fails to obtain the calendar time.
-*/
-int
-p7_hmm_SetCtime(P7_HMM *hmm)
-{
- int status;
- char *s = NULL;
- time_t date;
-
- if ((date = time(NULL)) == -1) { status = eslESYS; goto ERROR; }
- if ((status = esl_strdup(ctime(&date), -1, &s)) != eslOK) goto ERROR;
- if ((status = esl_strchop(s, -1)) != eslOK) goto ERROR;
-
- if (hmm->ctime != NULL) free(hmm->ctime);
- hmm->ctime = s;
- return eslOK;
-
-ERROR:
- if (s != NULL) free(s);
- return status;
-}
-
-
-
-/* Function: p7_hmm_SetComposition()
-* Synopsis: Calculate and set model composition, <hmm->compo[]>
-* Incept: SRE, Tue Sep 16 13:54:29 2008 [Janelia]
-*
-* Purpose: Calculates the mean residue composition emitted by
-* model <hmm>, and set <hmm->compo[]> to it.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure, in which case
-* values in <hmm->compo[]> are unchanged.
-*/
-int
-p7_hmm_SetComposition(P7_HMM *hmm)
-{
- float *mocc = NULL;
- float *iocc = NULL;
- float norm = 0.0;
- int k;
- int status;
-
- ESL_ALLOC_WITH_TYPE(mocc, float*, sizeof(float) * (hmm->M+1));
- ESL_ALLOC_WITH_TYPE(iocc, float*, sizeof(float) * (hmm->M+1));
-
- p7_hmm_CalculateOccupancy(hmm, mocc, iocc);
- esl_vec_FSet(hmm->compo, hmm->abc->K, 0.0);
- esl_vec_FAddScaled(hmm->compo, hmm->ins[0], iocc[0], hmm->abc->K);
- for (k = 1; k <= hmm->M; k++)
- {
- esl_vec_FAddScaled(hmm->compo, hmm->mat[k], mocc[k], hmm->abc->K);
- esl_vec_FAddScaled(hmm->compo, hmm->ins[k], iocc[k], hmm->abc->K);
- }
- norm = esl_vec_FSum(mocc, hmm->M+1);
- norm += esl_vec_FSum(iocc, hmm->M+1);
-
- esl_vec_FScale(hmm->compo, hmm->abc->K, 1.0 / norm);
- hmm->flags |= p7H_COMPO;
-
- free(mocc);
- free(iocc);
- return eslOK;
-
-ERROR:
- if (mocc != NULL) free(mocc);
- if (iocc != NULL) free(iocc);
- return status;
-}
-
-
-/*---------------- end, internal-setting routines ---------------*/
-
-
-
-
-/*****************************************************************
-* 3. Renormalization and rescaling counts in core HMMs.
-*****************************************************************/
-
-/* Function: p7_hmm_Rescale()
-* Incept: Steve Johnson, 3 May 2004
-* eweights code incorp: SRE, Thu May 20 10:34:03 2004 [St. Louis]
-*
-* Purpose: Scale a counts-based core HMM by some factor, for
-* adjusting counts to a new effective sequence number.
-* Only affects the core probability model (<t>, <ins>, and <mat>).
-*
-* Args: hmm - counts based HMM.
-* scale - scaling factor (e.g. eff_nseq/nseq); 1.0=no scaling.
-*
-* Returns: <eslOK> on success.
-*/
-int
-p7_hmm_Rescale(P7_HMM *hmm, float scale)
-{
- int k;
-
- for (k = 0; k <= hmm->M; k++) {
- esl_vec_FScale(hmm->mat[k], hmm->abc->K, scale);
- esl_vec_FScale(hmm->ins[k], hmm->abc->K, scale);
- esl_vec_FScale(hmm->t[k], 7, scale);
- }
- return eslOK;
-}
-
-/* Function: p7_hmm_Renormalize()
-* Incept: SRE, Mon Jan 1 18:39:42 2007 [Casa de Gatos]
-*
-* Purpose: Take a core HMM in counts form, and renormalize
-* all probability vectors in the core probability model. Enforces
-* Plan7 restrictions on nonexistent transitions.
-*
-* Leaves other flags (stats and profile) alone, so caller
-* needs to be wary. Renormalizing a probability model that
-* has stats and profile scores wouldn't usually invalidate
-* those data; and if we're renormalizing a counts model, we
-* shouldn't have stats or profile scores yet anyway.
-*
-* Args: hmm - the model to renormalize.
-*
-* Return: <eslOK> on success.
-*/
-int
-p7_hmm_Renormalize(P7_HMM *hmm)
-{
- int k; /* counter for model position */
-
- for (k = 0; k <= hmm->M; k++) {
- esl_vec_FNorm(hmm->mat[k], hmm->abc->K);
- esl_vec_FNorm(hmm->ins[k], hmm->abc->K);
- esl_vec_FNorm(P7H_TMAT(hmm, k), p7H_NTMAT); /* TMX */
- esl_vec_FNorm(P7H_TDEL(hmm, k), p7H_NTDEL); /* TIX */
- esl_vec_FNorm(P7H_TINS(hmm, k), p7H_NTINS); /* TDX */
- }
- /* If t[M][TD*] distribution was all zeros, we just made TDD nonzero. Oops.
- * Re-enforce t's on that final delete state. */
- hmm->t[hmm->M][p7H_DM] = 1.0;
- hmm->t[hmm->M][p7H_DD] = 0.0;
-
- /* Rare: if t[M][TM*] distribution was all zeros (all final transitions
- * were D_M -> E) then we just made nonexistent M_M->D_M+1 transition nonzero.
- * Fix that too.
- */
- if (hmm->t[hmm->M][p7H_MD] > 0.) {
- hmm->t[hmm->M][p7H_MD] = 0.;
- hmm->t[hmm->M][p7H_MM] = 0.5;
- hmm->t[hmm->M][p7H_MI] = 0.5;
- }
-
- return eslOK;
-}
-
-/*****************************************************************
-* 4. Debugging and development code
-*****************************************************************/
-
-/* Function: p7_hmm_Dump()
-* Incept: SRE, Mon Jan 1 18:44:15 2007 [Casa de Gatos]
-*
-* Purpose: Debugging: dump the probabilities (or counts) from a core HMM.
-*
-* Returns: <eslOK> on success.
-*/
-int
-p7_hmm_Dump(FILE *fp, P7_HMM *hmm)
-{
- int k; /* counter for nodes */
- int x; /* counter for symbols */
- int ts; /* counter for state transitions */
-
- for (k = 0; k <= hmm->M; k++)
- { /* Line 1: k, match emissions */
- fprintf(fp, " %5d ", k);
- for (x = 0; x < hmm->abc->K; x++)
- fprintf(fp, "%9.4f ", hmm->mat[k][x]);
- fputs("\n", fp);
- /* Line 2: insert emissions */
- fprintf(fp, " ");
- for (x = 0; x < hmm->abc->K; x++)
- fprintf(fp, "%9.4f ", hmm->ins[k][x]);
- fputs("\n", fp);
- /* Line 3: transition probs */
- fprintf(fp, " ");
- for (ts = 0; ts < 7; ts++)
- fprintf(fp, "%9.4f ", hmm->t[k][ts]);
- fputs("\n", fp);
- }
- fputs("//\n", fp);
- return eslOK;
-}
-
-/* Function: p7_hmm_Sample()
-* Incept: SRE, Sat Jan 6 13:43:03 2007 [Casa de Gatos]
-*
-* Purpose: Creates a random HMM of length <M> nodes,
-* for alphabet <abc>, obtaining randomness from
-* <r>.
-*
-* Probably only useful for debugging.
-*
-* Note: Compare p7_hmm_Renormalize(), which has a similar
-* structure, except it normalizes instead of
-* sampling each probability vector.
-*
-* Returns: <eslOK> on success, and the new hmm is returned
-* through <ret_hmm); caller is responsible for
-* freeing this object with <p7_hmm_Destroy()>.
-*
-* Throws: <eslEMEM> on allocation error.
-*/
-int
-p7_hmm_Sample(ESL_RANDOMNESS *r, int M, const ESL_ALPHABET *abc, P7_HMM **ret_hmm)
-{
- P7_HMM *hmm = NULL;
- char *logmsg = "[random HMM created by sampling]";
- int k;
- int status;
-
- hmm = p7_hmm_Create(M, abc);
- if (hmm == NULL) { status = eslEMEM; goto ERROR; }
-
- for (k = 0; k <= M; k++)
- {
- if (k > 0) esl_dirichlet_FSampleUniform(r, abc->K, hmm->mat[k]);
- esl_dirichlet_FSampleUniform(r, abc->K, hmm->ins[k]);
- esl_dirichlet_FSampleUniform(r, 3, hmm->t[k]);
- esl_dirichlet_FSampleUniform(r, 2, hmm->t[k]+3);
- if (k > 0) esl_dirichlet_FSampleUniform(r, 2, hmm->t[k]+5);
- }
- /* Node M is special: no transitions to D, transitions to M
- * are interpreted as transitions to E. Overwrite a little of
- * what we did in node M.
- */
- esl_dirichlet_FSampleUniform(r, 2, hmm->t[M]); /* TMM,TMI only */
- hmm->t[M][p7H_MD] = 0.;
- hmm->t[M][p7H_DM] = 1.0;
- hmm->t[M][p7H_DD] = 0.0;
-
- /* Add mandatory annotation, and some relevant optional annotation */
- p7_hmm_SetName(hmm, "sampled-hmm");
- p7_hmm_AppendComlog(hmm, 1, &logmsg);
- p7_hmm_SetCtime(hmm);
-
- *ret_hmm = hmm;
- return eslOK;
-
-ERROR:
- if (hmm != NULL) p7_hmm_Destroy(hmm);
- *ret_hmm = NULL;
- return status;
-
-}
-
-/* Function: p7_hmm_SampleUngapped()
-* Incept: SRE, Thu Jan 25 09:38:30 2007 [Janelia]
-*
-* Purpose: Same as <p7_hmm_Sample()>, except all
-* M $\rightarrow$ M transitions are 1.0:
-* an ungapped model. Useful for testing
-* as a limit case.
-*
-* Returns: <eslOK> on success, and the new hmm is returned
-* through <ret_hmm); caller is responsible for
-* freeing this object with <p7_hmm_Destroy()>.
-*
-* Throws: <eslEMEM> on allocation error.
-*
-* Xref: STL11/140
-*/
-int
-p7_hmm_SampleUngapped(ESL_RANDOMNESS *r, int M, const ESL_ALPHABET *abc, P7_HMM **ret_hmm)
-{
- P7_HMM *hmm = NULL;
- int k;
- int status;
-
- if ((status = p7_hmm_Sample(r, M, abc, &hmm)) != eslOK) goto ERROR;
- for (k = 0; k <= M; k++) {
- hmm->t[k][p7H_MM] = 1.0;
- hmm->t[k][p7H_MD] = 0.0;
- hmm->t[k][p7H_MI] = 0.0;
- }
- *ret_hmm = hmm;
- return eslOK;
-
-ERROR:
- if (hmm != NULL) p7_hmm_Destroy(hmm);
- *ret_hmm = NULL;
- return status;
-}
-
-/* Function: esl_hmm_SampleEnumerable()
-* Incept: SRE, Wed Apr 18 09:38:09 2007 [Janelia]
-*
-* Purpose: Sample a random HMM with random emission and
-* transition probabilities with the exception that
-* all transitions to insert are zero. This makes
-* it possible to create a model with a finite,
-* easily enumerable sequence space (all seqs of
-* length $\leq M).
-*
-* To achieve this in the profile as well as the core HMM,
-* the caller must configure a unihit mode
-* (<p7_ProfileConfig(hmm, bg, gm, p7_UNILOCAL)> or
-* <p7_UNIGLOCAL>), and a target length of zero
-* (<p7_ReconfigLength(gm, 0)>).
-*
-* Useful for debugging and validating Forward/Viterbi
-* algorithms.
-*
-* Returns: <eslOK> on success. The newly allocated hmm is returned through
-* <ret_hmm>. The caller is responsible for freeing this object
-* with <p7_hmm_Destroy()>.
-*
-* Throws: <eslEMEM> on allocation error.
-*/
-int
-p7_hmm_SampleEnumerable(ESL_RANDOMNESS *r, int M, const ESL_ALPHABET *abc, P7_HMM **ret_hmm)
-{
- P7_HMM *hmm = NULL;
- char *logmsg = "[random enumerable HMM created by sampling]";
- int k;
- float tmp[2];
- int status;
-
- hmm = p7_hmm_Create(M, abc);
- if (hmm == NULL) { status = eslEMEM; goto ERROR; }
-
- for (k = 0; k <= M; k++)
- {
- if (k > 0) esl_dirichlet_FSampleUniform(r, abc->K, hmm->mat[k]); /* match emission probs */
- esl_dirichlet_FSampleUniform(r, abc->K, hmm->ins[k]); /* insert emission probs */
- esl_dirichlet_FSampleUniform(r, 2, tmp);
- hmm->t[k][p7H_MM] = tmp[0];
- hmm->t[k][p7H_MI] = 0.;
- hmm->t[k][p7H_MD] = tmp[1];
- hmm->t[k][p7H_IM] = 1.; /* I transitions irrelevant since I's are unreached. */
- hmm->t[k][p7H_II] = 0.;
- if (k > 0) esl_dirichlet_FSampleUniform(r, 2, hmm->t[k]+5); /* delete transitions to M,D */
- }
-
- /* Node M is special: no transitions to D, transitions to M
- * are interpreted as transitions to E. Overwrite a little of
- * what we did in node M.
- */
- hmm->t[M][p7H_MM] = 1.;
- hmm->t[M][p7H_MD] = 0.;
- hmm->t[M][p7H_DM] = 1.;
- hmm->t[M][p7H_DD] = 0.;
-
- /* Add mandatory annotation
- */
- p7_hmm_SetName(hmm, "sampled-hmm");
- p7_hmm_AppendComlog(hmm, 1, &logmsg);
- p7_hmm_SetCtime(hmm);
-
- /* SRE DEBUGGING */
- p7_hmm_Validate(hmm, NULL, 0.0001);
-
- *ret_hmm = hmm;
- return eslOK;
-
-ERROR:
- if (hmm != NULL) p7_hmm_Destroy(hmm);
- *ret_hmm = NULL;
- return status;
-}
-
-/* Function: p7_hmm_SampleUniform()
-* Incept: SRE, Thu Feb 22 10:04:19 2007 [Janelia]
-*
-* Purpose: Sample a model that uses uniform transition probabilities,
-* determined by <tmi>, <tii>, <tmd>, and <tdd>,
-* the probabilistic equivalent of gap-open/gap-extend for
-* inserts, deletes.
-*
-* Useful for testing expected behavior on single-sequence
-* models, where transitions are position-independent.
-*
-* Returns: <eslOK> on success, and the new hmm is returned
-* through <ret_hmm); caller is responsible for
-* freeing this object with <p7_hmm_Destroy()>.
-*
-* Throws: <eslEMEM> on allocation error.
-*
-* Xref: J1/5.
-*/
-int
-p7_hmm_SampleUniform(ESL_RANDOMNESS *r, int M, const ESL_ALPHABET *abc,
- float tmi, float tii, float tmd, float tdd,
- P7_HMM **ret_hmm)
-{
- int status;
- P7_HMM *hmm = NULL;
- char *logmsg = "[HMM with uniform transitions, random emissions]";
- int k;
-
- hmm = p7_hmm_Create(M, abc);
- if (hmm == NULL) { status = eslEMEM; goto ERROR; }
-
- for (k = 0; k <= M; k++)
- {
- if (k > 0) esl_dirichlet_FSampleUniform(r, abc->K, hmm->mat[k]);
- esl_dirichlet_FSampleUniform(r, abc->K, hmm->ins[k]);
- hmm->t[k][p7H_MM] = 1.0 - tmi - tmd;
- hmm->t[k][p7H_MI] = tmi;
- hmm->t[k][p7H_MD] = tmd;
- hmm->t[k][p7H_IM] = 1.0 - tii;
- hmm->t[k][p7H_II] = tii;
- hmm->t[k][p7H_DM] = 1.0 - tdd;
- hmm->t[k][p7H_DD] = tdd;
- }
-
- /* Deal w/ special stuff at node 0, M, overwriting some of what we
- * just did.
- */
- hmm->t[M][p7H_MM] = 1.0 - tmi;
- hmm->t[M][p7H_MD] = 0.;
- hmm->t[M][p7H_DM] = 1.0;
- hmm->t[M][p7H_DD] = 0.;
-
- /* Add mandatory annotation
- */
- p7_hmm_SetName(hmm, "sampled-hmm");
- p7_hmm_AppendComlog(hmm, 1, &logmsg);
- p7_hmm_SetCtime(hmm);
-
- *ret_hmm = hmm;
- return eslOK;
-
-ERROR:
- if (hmm != NULL) p7_hmm_Destroy(hmm);
- *ret_hmm = NULL;
- return status;
-}
-
-
-
-/* Function: p7_hmm_Compare()
-* Incept: SRE, Sat Jan 6 14:14:58 2007 [Casa de Gatos]
-*
-* Purpose: Compare two HMMs <h1> and <h2> to each other;
-* return <eslOK> if they're identical, and <eslFAIL>
-* if they differ. Floating-point probabilities are
-* compared for equality within a fractional tolerance
-* <tol>.
-*/
-int
-p7_hmm_Compare(P7_HMM *h1, P7_HMM *h2, float tol)
-{
- int k, z;
-
- if (h1->abc->type != h2->abc->type) return eslFAIL;
- if (h1->M != h2->M) return eslFAIL;
- if (h1->flags != h2->flags) return eslFAIL;
-
- for (k = 0; k <= h1->M; k++) /* (it's safe to include 0 here.) */
- {
- if (esl_vec_FCompare(h1->mat[k], h2->mat[k], h1->abc->K, tol) != eslOK) return eslFAIL;
- if (esl_vec_FCompare(h1->ins[k], h2->ins[k], h1->abc->K, tol) != eslOK) return eslFAIL;
- if (esl_vec_FCompare(h1->t[k], h2->t[k], 7, tol) != eslOK) return eslFAIL;
- }
-
- if (strcmp(h1->name, h2->name) != 0) return eslFAIL;
- if (strcmp(h1->comlog, h2->comlog) != 0) return eslFAIL;
- if (strcmp(h1->ctime, h2->ctime) != 0) return eslFAIL;
-
- if (h1->nseq != h2->nseq) return eslFAIL;
- if (h1->eff_nseq != h2->eff_nseq) return eslFAIL;
- if (h1->checksum != h2->checksum) return eslFAIL;
-
- if (esl_strcmp(h1->acc, h2->acc) != 0) return eslFAIL;
- if (esl_strcmp(h1->desc, h2->desc) != 0) return eslFAIL;
-
- if ((h1->flags & p7H_RF) && esl_strcmp(h1->rf, h2->rf) != 0) return eslFAIL;
- if ((h1->flags & p7H_CS) && esl_strcmp(h1->cs, h2->cs) != 0) return eslFAIL;
- if ((h1->flags & p7H_CA) && esl_strcmp(h1->ca, h2->ca) != 0) return eslFAIL;
- if ((h1->flags & p7H_MAP) && esl_vec_ICompare(h1->map, h2->map, h1->M+1) != 0) return eslFAIL;
-
- if (h1->flags & p7H_GA) {
- if (esl_FCompare(h1->cutoff[p7_GA1], h2->cutoff[p7_GA1], tol) != eslOK) return eslFAIL;
- if (esl_FCompare(h1->cutoff[p7_GA2], h2->cutoff[p7_GA2], tol) != eslOK) return eslFAIL;
- }
- if (h1->flags & p7H_TC) {
- if (esl_FCompare(h1->cutoff[p7_TC1], h2->cutoff[p7_TC1], tol) != eslOK) return eslFAIL;
- if (esl_FCompare(h1->cutoff[p7_TC2], h2->cutoff[p7_TC2], tol) != eslOK) return eslFAIL;
- }
- if (h1->flags & p7H_NC) {
- if (esl_FCompare(h1->cutoff[p7_NC1], h2->cutoff[p7_NC1], tol) != eslOK) return eslFAIL;
- if (esl_FCompare(h1->cutoff[p7_NC2], h2->cutoff[p7_NC2], tol) != eslOK) return eslFAIL;
- }
-
- if (h1->flags & p7H_STATS) {
- for (z = 0; z < p7_NEVPARAM; z++)
- if (esl_FCompare(h1->evparam[z], h2->evparam[z], tol) != eslOK) return eslFAIL;
- }
-
- return eslOK;
-}
-
-/* Function: p7_hmm_Validate()
-* Incept: SRE, Sat Jan 6 14:43:00 2007 [Casa de Gatos]
-*
-* Purpose: Validates the internals of the HMM structure <hmm>.
-*
-* Probability vectors are validated to sum up to
-* within a fractional tolerance <tol> of 1.0.
-*
-* Probably only useful for debugging and development,
-* not production code.
-*
-* Returns: <eslOK> if <hmm> internals look fine.
-* Returns <eslFAIL> if something is wrong.
-*/
-int
-p7_hmm_Validate(P7_HMM *hmm, char *errbuf, float tol)
-{
- int status;
- int k;
-
- if (hmm == NULL) ESL_XFAIL(eslFAIL, errbuf, "HMM is a null pointer");
- if (hmm->M < 1) ESL_XFAIL(eslFAIL, errbuf, "HMM has M < 1");
- if (hmm->abc == NULL) ESL_XFAIL(eslFAIL, errbuf, "HMM has no alphabet reference");
- if (hmm->abc->type == eslUNKNOWN) ESL_XFAIL(eslFAIL, errbuf, "HMM's alphabet is set to unknown");
-
- for (k = 0; k <= hmm->M; k++)
- {
- if (esl_vec_FValidate(hmm->mat[k], hmm->abc->K, tol, NULL) != eslOK) ESL_XFAIL(eslFAIL, errbuf, "mat[%d] fails pvector validation", k);
- if (esl_vec_FValidate(hmm->ins[k], hmm->abc->K, tol, NULL) != eslOK) ESL_XFAIL(eslFAIL, errbuf, "ins[%d] fails pvector validation", k);
- if (esl_vec_FValidate(hmm->t[k], 3, tol, NULL) != eslOK) ESL_XFAIL(eslFAIL, errbuf, "t_M[%d] fails pvector validation", k);
- if (esl_vec_FValidate(hmm->t[k]+3, 2, tol, NULL) != eslOK) ESL_XFAIL(eslFAIL, errbuf, "t_I[%d] fails pvector validation", k);
- if (esl_vec_FValidate(hmm->t[k]+5, 2, tol, NULL) != eslOK) ESL_XFAIL(eslFAIL, errbuf, "t_D[%d] fails pvector validation", k);
- }
- if (hmm->t[hmm->M][p7H_MD] != 0.0) ESL_XFAIL(eslFAIL, errbuf, "TMD should be 0 for last node");
- if (hmm->t[hmm->M][p7H_DM] != 1.0) ESL_XFAIL(eslFAIL, errbuf, "TDM should be 1 for last node");
- if (hmm->t[hmm->M][p7H_DD] != 0.0) ESL_XFAIL(eslFAIL, errbuf, "TDD should be 0 for last node");
-
- if (hmm->name == NULL) ESL_XFAIL(eslFAIL, errbuf, "name is NULL: this field is mandatory");
- /* comlog is either NULL or a free text string: hard to validate */
- /* ctime, ditto */
- if ( (hmm->nseq != -1) && hmm->nseq <= 0) ESL_XFAIL(eslFAIL, errbuf, "invalid nseq");
- if ( (hmm->eff_nseq != -1.0f) && hmm->eff_nseq <= 0.0f) ESL_XFAIL(eslFAIL, errbuf, "invalid eff_nseq");
- if (!(hmm->flags & p7H_CHKSUM) && hmm->checksum != 0 ) ESL_XFAIL(eslFAIL, errbuf, "p7H_CHKSUM flag down, but nonzero checksum present");
-
- if (hmm->flags & p7H_RF) {
- if (hmm->rf == NULL || strlen(hmm->rf) != hmm->M+1) ESL_XFAIL(eslFAIL, errbuf, "p7H_RF flag up, but rf string is invalid");
- } else
- if (hmm->rf != NULL) ESL_XFAIL(eslFAIL, errbuf, "p7H_RF flag down, but rf string is present");
- if (hmm->flags & p7H_CS) {
- if (hmm->cs == NULL || strlen(hmm->cs) != hmm->M+1) ESL_XFAIL(eslFAIL, errbuf, "p7H_CS flag up, but cs string is invalid");
- } else
- if (hmm->cs != NULL) ESL_XFAIL(eslFAIL, errbuf, "p7H_CS flag down, but cs string is present");
- if (hmm->flags & p7H_CA) {
- if (hmm->ca == NULL || strlen(hmm->ca) != hmm->M+1) ESL_XFAIL(eslFAIL, errbuf, "p7H_CA flag up, but ca string is invalid");
- } else
- if (hmm->ca != NULL) ESL_XFAIL(eslFAIL, errbuf, "p7H_CA flag down, but ca string is present");
- if ( (hmm->flags & p7H_MAP) && hmm->map == NULL) ESL_XFAIL(eslFAIL, errbuf, "p7H_MAP flag up, but map string is null");
- if (! (hmm->flags & p7H_MAP) && hmm->map != NULL) ESL_XFAIL(eslFAIL, errbuf, "p7H_MAP flag down, but map string is present");
-
- if (hmm->flags & p7H_STATS) {
- if (hmm->evparam[p7_MLAMBDA] <= 0.) ESL_XFAIL(eslFAIL, errbuf, "lambda parameter can't be negative");
- if (hmm->evparam[p7_VLAMBDA] <= 0.) ESL_XFAIL(eslFAIL, errbuf, "lambda parameter can't be negative");
- if (hmm->evparam[p7_FLAMBDA] <= 0.) ESL_XFAIL(eslFAIL, errbuf, "lambda parameter can't be negative");
- }
- if (hmm->flags & p7H_COMPO && esl_vec_FValidate(hmm->compo, hmm->abc->K, tol, NULL) != eslOK)
- ESL_XFAIL(eslFAIL, errbuf, "composition fails pvector validation");
-
- return eslOK;
-
-ERROR:
- return status;
-}
-/*------------- end of debugging/development code ----------------*/
-
-
-
-
-/*****************************************************************
-* 5. Other routines in the API.
-*****************************************************************/
-
-/* Function: p7_hmm_CalculateOccupancy()
-* Incept: SRE, Mon Jan 22 08:10:05 2007 [Janelia]
-*
-* Purpose: Calculate a vector <mocc[1..M]> containing probability
-* that each match state is used in a sampled path through
-* the model. Caller provides allocated space (<M+1> floats)
-* for <mocc>.
-*
-* Caller may optionally provide an array <iocc[0..M]> as
-* well, which (if provided) will be set to contain the
-* expected number of times that a sampled path would contain
-* each insert state.
-*
-* Returns: <eslOK> on success.
-*/
-int
-p7_hmm_CalculateOccupancy(const P7_HMM *hmm, float *mocc, float *iocc)
-{
- int k;
-
- mocc[0] = 0.; /* no M_0 state */
- mocc[1] = hmm->t[0][p7H_MI] + hmm->t[0][p7H_MM]; /* initialize w/ 1 - B->D_1 */
- for (k = 2; k <= hmm->M; k++)
- mocc[k] = mocc[k-1] * (hmm->t[k-1][p7H_MM] + hmm->t[k-1][p7H_MI]) +
- (1.0-mocc[k-1]) * hmm->t[k-1][p7H_DM];
-
- if (iocc != NULL) {
- iocc[0] = hmm->t[0][p7H_MI] / hmm->t[0][p7H_IM];
- for (k = 1; k <= hmm->M; k++)
- iocc[k] = mocc[k] * hmm->t[k][p7H_MI] / hmm->t[k][p7H_IM];
- }
-
- return eslOK;
-}
-
-/*---------------- end of the rest of the API -------------------*/
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_pipeline.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/p7_pipeline.cpp
deleted file mode 100644
index 3640aff..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_pipeline.cpp
+++ /dev/null
@@ -1,794 +0,0 @@
-/* H3's accelerated seq/profile comparison pipeline
-*
-* Contents:
-* 1. P7_PIPELINE: allocation, initialization, destruction
-* 2. Pipeline API
-* 5. Copyright and license information
-*
-* SRE, Fri Dec 5 10:09:39 2008 [Janelia] [BSG3, Bear McCreary]
- * SVN $Id: p7_pipeline.c 2962 2009-10-19 22:28:48Z eddys $
-*/
-
-#include <hmmer3/p7_config.h>
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#include <assert.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_exponential.h>
-#include <hmmer3/easel/esl_gumbel.h>
-#include <hmmer3/easel/esl_vectorops.h>
-
-#include <hmmer3/hmmer.h>
-
-
-/*****************************************************************
-* 1. The P7_PIPELINE object: allocation, initialization, destruction.
-*****************************************************************/
-
-/* Function: p7_pipeline_Create()
-* Synopsis: Create a new accelerated comparison pipeline.
-* Incept: SRE, Fri Dec 5 10:11:31 2008 [Janelia]
-*
-* Purpose: Given an application configuration structure <go>
-* containing certain standardized options (described
-* below), some initial guesses at the model size <M_hint>
-* and sequence length <L_hint> that will be processed,
-* and a <mode> that can be either <p7_SCAN_MODELS> or
-* <p7_SEARCH_SEQS> depending on whether we're searching one sequence
-* against a model database (hmmscan mode) or one model
-* against a sequence database (hmmsearch mode); create new
-* pipeline object.
-*
-* In search mode, we would generally know the length of
-* our query profile exactly, and would pass <om->M> as <M_hint>;
-* in scan mode, we generally know the length of our query
-* sequence exactly, and would pass <sq->n> as <L_hint>.
-* Targets will come in various sizes as we read them,
-* and the pipeline will resize any necessary objects as
-* needed, so the other (unknown) length is only an
-* initial allocation.
-*
-* The configuration <go> must include settings for the
-* following options:
-*
-* || option || description || usually ||
- * | --noali | don't output alignments (smaller output) | FALSE |
-* | -E | report hits <= this E-value threshold | 10.0 |
-* | -T | report hits >= this bit score threshold | NULL |
-* | -Z | set initial hit search space size | NULL |
-* | --domZ | set domain search space size | NULL |
-* | --domE | report domains <= this E-value threshold | 10.0 |
-* | --domT | report domains <= this bit score threshold | NULL |
-* | --incE | include hits <= this E-value threshold | 0.01 |
-* | --incT | include hits >= this bit score threshold | NULL |
-* | --incdomE | include domains <= this E-value threshold | 0.01 |
-* | --incdomT | include domains <= this score threshold | NULL |
- * | --cut_ga | model-specific thresholding using GA | FALSE |
- * | --cut_nc | model-specific thresholding using NC | FALSE |
- * | --cut_tc | model-specific thresholding using TC | FALSE |
-* | --max | turn all heuristic filters off | FALSE |
-* | --F1 | Stage 1 (MSV) thresh: promote hits P <= F1 | 0.02 |
-* | --F2 | Stage 2 (Vit) thresh: promote hits P <= F2 | 1e-3 |
-* | --F3 | Stage 2 (Fwd) thresh: promote hits P <= F3 | 1e-5 |
-* | --nobias | turn OFF composition bias filter HMM | FALSE |
-* | --nonull2 | turn OFF biased comp score correction | FALSE |
-* | --seed | RNG seed (0=use arbitrary seed) | 42 |
- * | --acc | prefer accessions over names in output | FALSE |
-
-*
-* Returns: ptr to new <P7_PIPELINE> object on success. Caller frees this
-* with <p7_pipeline_Destroy()>.
-*
-* Throws: <NULL> on allocation failure.
-*/
-P7_PIPELINE * p7_pipeline_Create(const UHMM3SearchSettings* settings, int M_hint, int L_hint, enum p7_pipemodes_e mode) {
- P7_PIPELINE *pli = NULL;
- int seed = settings->seed;
- int status;
-
- assert( NULL != settings );
- ESL_ALLOC_WITH_TYPE(pli, P7_PIPELINE*, sizeof(P7_PIPELINE));
-
- if ((pli->fwd = p7_omx_Create(M_hint, L_hint, L_hint)) == NULL) goto ERROR;
- if ((pli->bck = p7_omx_Create(M_hint, L_hint, L_hint)) == NULL) goto ERROR;
- if ((pli->oxf = p7_omx_Create(M_hint, 0, L_hint)) == NULL) goto ERROR;
- if ((pli->oxb = p7_omx_Create(M_hint, 0, L_hint)) == NULL) goto ERROR;
-
- /* Normally, we reinitialize the RNG to the original seed every time we're
- * about to collect a stochastic trace ensemble. This eliminates run-to-run
- * variability. As a special case, if seed==0, we choose an arbitrary one-time
- * seed: time() sets the seed, and we turn off the reinitialization.
- */
- pli->r = esl_randomness_CreateFast(seed);
- pli->do_reseeding = (seed == 0) ? FALSE : TRUE;
- pli->ddef = p7_domaindef_Create(pli->r);
- pli->ddef->do_reseeding = pli->do_reseeding;
-
- /* Configure reporting thresholds */
- pli->by_E = TRUE;
- pli->E = settings->e;
- assert( 0 < pli->E );
- pli->T = 0.0;
- pli->dom_by_E = TRUE;
- pli->domE = settings->domE;
- assert( 0 < pli->domE );
- pli->domT = 0.0;
- pli->use_bit_cutoffs = FALSE;
-
- if( OPTION_NOT_SET != settings->t ) {
- pli->T = settings->t;
- assert( 0 < pli->T );
- pli->by_E = FALSE;
- }
- if( OPTION_NOT_SET != settings->domT ) {
- pli->domT = settings->domT;
- assert( 0 < pli->domT );
- pli->dom_by_E = FALSE;
- }
-
- /* Configure inclusion thresholds */
- pli->inc_by_E = TRUE;
- pli->incE = settings->incE;
- assert( 0 < pli->incE );
- pli->incT = 0.0;
- pli->incdom_by_E = TRUE;
- pli->incdomE = settings->incDomE;
- assert( 0 < pli->incdomE );
- pli->incdomT = 0.0;
-
- if( OPTION_NOT_SET != settings->incT ) {
- pli->incT = settings->incT;
- assert( 0 < pli->incT );
- pli->inc_by_E = FALSE;
- }
- if( OPTION_NOT_SET != settings->incDomT ) {
- pli->incdomT = settings->incDomT;
- assert( 0 < pli->incdomT );
- pli->incdom_by_E = FALSE;
- }
-
- /* Configure for one of the model-specific thresholding options */
- if( p7H_GA == settings->useBitCutoffs ) {
- pli->T = pli->domT = 0.0;
- pli->by_E = pli->dom_by_E = FALSE;
- pli->use_bit_cutoffs = p7H_GA;
- }
- if( p7H_NC == settings->useBitCutoffs ) {
- pli->T = pli->domT = 0.0;
- pli->by_E = pli->dom_by_E = FALSE;
- pli->use_bit_cutoffs = p7H_NC;
- }
- if( p7H_TC == settings->useBitCutoffs ) {
- pli->T = pli->domT = 0.0;
- pli->by_E = pli->dom_by_E = FALSE;
- pli->use_bit_cutoffs = p7H_TC;
- }
-
- /* Configure search space sizes for E value calculations
- */
- pli->Z = pli->domZ = 0.0;
- pli->Z_setby = pli->domZ_setby = p7_ZSETBY_NTARGETS;
- if( OPTION_NOT_SET != settings->z ) {
- pli->Z_setby = p7_ZSETBY_OPTION;
- pli->Z = settings->z;
- assert( 0 < pli->Z );
- }
- if( OPTION_NOT_SET != settings->domZ ) {
- pli->domZ_setby = p7_ZSETBY_OPTION;
- pli->domZ = settings->domZ;
- assert( 0 < pli->domZ );
- }
-
- /* Configure accelaration pipeline thresholds */
- pli->do_max = FALSE;
- pli->do_biasfilter = TRUE;
- pli->do_null2 = TRUE;
- pli->F1 = ESL_MIN(1.0, settings->f1 );
- pli->F2 = ESL_MIN(1.0, settings->f2 );
- pli->F3 = ESL_MIN(1.0, settings->f3 );
-
- if( TRUE == settings->doMax ){
- pli->do_max = TRUE;
- pli->do_biasfilter = FALSE;
- pli->F1 = pli->F2 = pli->F3 = 1.0;
- }
- if( TRUE == settings->noNull2 ) {
- pli->do_null2 = FALSE;
- }
- if( TRUE == settings->noBiasFilter ) {
- pli->do_biasfilter = FALSE;
- }
-
- /* Accounting as we collect results */
- pli->nmodels = 0;
- pli->nseqs = 0;
- pli->nres = 0;
- pli->nnodes = 0;
- pli->n_past_msv = 0;
- pli->n_past_bias = 0;
- pli->n_past_vit = 0;
- pli->n_past_fwd = 0;
-
- pli->mode = mode;
- pli->errbuf[0] = '\0';
-
- return pli;
-ERROR:
- p7_pipeline_Destroy(pli);
- return NULL;
-}
-
-
-/* Function: p7_pipeline_Reuse()
-* Synopsis: Reuse a pipeline for next target.
-* Incept: SRE, Fri Dec 5 10:31:36 2008 [Janelia]
-*
-* Purpose: Reuse <pli> for next target sequence (search mode)
-* or model (scan mode).
-*
-* May eventually need to distinguish from reusing pipeline
-* for next query, but we're not really focused on multiquery
-* use of hmmscan/hmmsearch/phmmer for the moment.
-*/
-int
-p7_pipeline_Reuse(P7_PIPELINE *pli)
-{
- p7_omx_Reuse(pli->oxf);
- p7_omx_Reuse(pli->oxb);
- p7_omx_Reuse(pli->fwd);
- p7_omx_Reuse(pli->bck);
- p7_domaindef_Reuse(pli->ddef);
- return eslOK;
-}
-
-
-
-/* Function: p7_pipeline_Destroy()
-* Synopsis: Free a <P7_PIPELINE> object.
-* Incept: SRE, Fri Dec 5 10:30:23 2008 [Janelia]
-*
-* Purpose: Free a <P7_PIPELINE> object.
-*/
-void
-p7_pipeline_Destroy(P7_PIPELINE *pli)
-{
- if (pli == NULL) return;
-
- p7_omx_Destroy(pli->oxf);
- p7_omx_Destroy(pli->oxb);
- p7_omx_Destroy(pli->fwd);
- p7_omx_Destroy(pli->bck);
- esl_randomness_Destroy(pli->r);
- p7_domaindef_Destroy(pli->ddef);
- free(pli);
-}
-/*---------------- end, P7_PIPELINE object ----------------------*/
-
-
-
-
-
-/*****************************************************************
-* 2. The pipeline API.
-*****************************************************************/
-
-// ! CODE CHANGED: reporting tresholds need to work with double's !
-
-/* Function: p7_pli_TargetReportable
-* Synopsis: Returns TRUE if target score meets reporting threshold.
-* Incept: SRE, Tue Dec 9 08:57:26 2008 [Janelia]
-*
-* Purpose: Returns <TRUE> if the bit score <score> and/or
-* P-value <Pval> meeds per-target reporting thresholds
-* for the processing pipeline.
-*/
-int
-p7_pli_TargetReportable(P7_PIPELINE *pli, float score, double Pval)
-{
- if ( pli->by_E && Pval * pli->Z <= pli->E) return TRUE;
- else if (! pli->by_E && score >= pli->T) return TRUE;
- else return FALSE;
-}
-
-/* Function: p7_pli_DomainReportable
-* Synopsis: Returns TRUE if domain score meets reporting threshold.
-* Incept: SRE, Tue Dec 9 09:01:01 2008 [Janelia]
-*
-* Purpose: Returns <TRUE> if the bit score <score> and/or
-* P-value <Pval> meets per-domain reporting thresholds
-* for the processing pipeline.
-*/
-int
-p7_pli_DomainReportable(P7_PIPELINE *pli, float dom_score, double Pval)
-{
- if ( pli->dom_by_E && Pval * pli->domZ <= pli->domE) return TRUE;
- else if (! pli->dom_by_E && dom_score >= pli->domT) return TRUE;
- else return FALSE;
-}
-
-/* Function: p7_pli_TargetIncludable()
-* Synopsis: Returns TRUE if target score meets inclusion threshold.
-* Incept: SRE, Fri Jan 16 11:18:08 2009 [Janelia]
-*/
-int
-p7_pli_TargetIncludable(P7_PIPELINE *pli, float score, double Pval)
-{
- if ( pli->inc_by_E && Pval * pli->Z <= pli->incE) return TRUE;
- else if (! pli->inc_by_E && score >= pli->incT) return TRUE;
- else return FALSE;
-}
-
-/* Function: p7_pli_DomainIncludable()
-* Synopsis: Returns TRUE if domain score meets inclusion threshold.
-* Incept: SRE, Fri Jan 16 11:20:38 2009 [Janelia]
-*/
-int
-p7_pli_DomainIncludable(P7_PIPELINE *pli, float dom_score, double Pval)
-{
- if ( pli->incdom_by_E && Pval * pli->domZ <= pli->incdomE) return TRUE;
- else if (! pli->incdom_by_E && dom_score >= pli->incdomT) return TRUE;
- else return FALSE;
-}
-
-
-
-
-/* Function: p7_pli_NewModel()
-* Synopsis: Prepare pipeline for a new model (target or query)
-* Incept: SRE, Fri Dec 5 10:35:37 2008 [Janelia]
-*
-* Purpose: Caller has a new model <om>. Prepare the pipeline <pli>
-* to receive this model as either a query or a target.
-*
-* The pipeline may alter the null model <bg> in a model-specific
-* way (if we're using a composition bias filter HMM in the
-* pipeline).
-*
-* Returns: <eslOK> on success.
-*
-* <eslEINVAL> if pipeline expects to be able to use a
-* model's bit score thresholds, but this model does not
-* have the appropriate ones set.
-*/
-int
-p7_pli_NewModel(P7_PIPELINE *pli, const P7_OPROFILE *om, P7_BG *bg)
-{
- int status = eslOK;
-
- pli->nmodels++;
- pli->nnodes += om->M;
- if (pli->Z_setby == p7_ZSETBY_NTARGETS && pli->mode == p7_SCAN_MODELS) pli->Z = pli->nmodels;
-
- if (pli->do_biasfilter) p7_bg_SetFilter(bg, om->M, om->compo);
-
- if (pli->mode == p7_SEARCH_SEQS)
- status = p7_pli_NewModelThresholds(pli, om);
-
- return status;
-}
-
-/* Function: p7_pli_NewModelThresholds()
- * Synopsis: Set reporting and inclusion bit score thresholds on a new model.
- * Incept: SRE, Sat Oct 17 12:07:43 2009 [Janelia]
- *
- * Purpose: Set the bit score thresholds on a new model, if we're
- * using Pfam GA, TC, or NC cutoffs for reporting or
- * inclusion.
- *
- * In a "search" pipeline, this only needs to be done once
- * per query model, so <p7_pli_NewModelThresholds()> gets
- * called by <p7_pli_NewModel()>.
- *
- * In a "scan" pipeline, this needs to be called for each
- * model, and it needs to be called after
- * <p7_oprofile_ReadRest()>, because that's when the bit
- * score thresholds get read.
- *
- * Returns: <eslOK> on success.
- *
- * <eslEINVAL> if pipeline expects to be able to use a
- * model's bit score thresholds, but this model does not
- * have the appropriate ones set.
- *
- * Xref: Written to fix bug #h60.
- */
-int
-p7_pli_NewModelThresholds(P7_PIPELINE *pli, const P7_OPROFILE *om)
-{
-
- if (pli->use_bit_cutoffs)
- {
- if (pli->use_bit_cutoffs == p7H_GA)
- {
- if (om->cutoff[p7_GA1] == p7_CUTOFF_UNSET) ESL_FAIL(eslEINVAL, pli->errbuf, "GA bit thresholds unavailable on model %s\n", om->name);
- pli->T = pli->incT = om->cutoff[p7_GA1];
- pli->domT = pli->incdomT = om->cutoff[p7_GA2];
- }
- else if (pli->use_bit_cutoffs == p7H_TC)
- {
- if (om->cutoff[p7_TC1] == p7_CUTOFF_UNSET) ESL_FAIL(eslEINVAL, pli->errbuf, "TC bit thresholds unavailable on model %s\n", om->name);
- pli->T = pli->incT = om->cutoff[p7_TC1];
- pli->domT = pli->incdomT = om->cutoff[p7_TC2];
- }
- else if (pli->use_bit_cutoffs == p7H_NC)
- {
- if (om->cutoff[p7_NC1] == p7_CUTOFF_UNSET) ESL_FAIL(eslEINVAL, pli->errbuf, "NC bit thresholds unavailable on model %s\n", om->name);
- pli->T = pli->incT = om->cutoff[p7_NC1];
- pli->domT = pli->incdomT = om->cutoff[p7_NC2];
- }
- }
-
- return eslOK;
- }
-
-
-/* Function: p7_pli_NewSeq()
-* Synopsis: Prepare pipeline for a new sequence (target or query)
-* Incept: SRE, Fri Dec 5 10:57:15 2008 [Janelia]
-*
-* Purpose: Caller has a new sequence <sq>. Prepare the pipeline <pli>
-* to receive this model as either a query or a target.
-*
-* Returns: <eslOK> on success.
-*/
-int
-p7_pli_NewSeq(P7_PIPELINE *pli, const ESL_SQ *sq)
-{
- pli->nseqs++;
- pli->nres += sq->n;
- if (pli->Z_setby == p7_ZSETBY_NTARGETS && pli->mode == p7_SEARCH_SEQS) pli->Z = pli->nseqs;
- return eslOK;
-}
-
-/* Function: p7_pipeline_Merge()
- * Synopsis: Merge the pipeline statistics
- * Incept:
- *
- * Purpose: Caller has a new model <om>. Prepare the pipeline <pli>
- * to receive this model as either a query or a target.
- *
- * The pipeline may alter the null model <bg> in a model-specific
- * way (if we're using a composition bias filter HMM in the
- * pipeline).
- *
- * Returns: <eslOK> on success.
- *
- * <eslEINVAL> if pipeline expects to be able to use a
- * model's bit score thresholds, but this model does not
- * have the appropriate ones set.
- */
-int
-p7_pipeline_Merge(P7_PIPELINE *p1, P7_PIPELINE *p2)
-{
- /* if we are searching a sequence database, we need to keep track of the
- * number of sequences and residues processed.
- */
- if (p1->mode == p7_SEARCH_SEQS)
- {
- p1->nseqs += p2->nseqs;
- p1->nres += p2->nres;
- }
- else
- {
- p1->nmodels += p2->nmodels;
- p1->nnodes += p2->nnodes;
- }
-
- p1->n_past_msv += p2->n_past_msv;
- p1->n_past_bias += p2->n_past_bias;
- p1->n_past_vit += p2->n_past_vit;
- p1->n_past_fwd += p2->n_past_fwd;
-
- if (p1->Z_setby == p7_ZSETBY_NTARGETS)
- {
- p1->Z += (p1->mode == p7_SCAN_MODELS) ? p2->nmodels : p2->nseqs;
- }
- else
- {
- p1->Z = p2->Z;
- }
-
- return eslOK;
-}
-
-/* Function: p7_Pipeline()
-* Synopsis: HMMER3's accelerated seq/profile comparison pipeline.
-* Incept: SRE, Thu Dec 4 17:17:01 2008 [Janelia]
-*
-* Purpose: Run H3's accelerated pipeline to compare profile <om>
-* against sequence <sq>. If a significant hit is found,
-* information about it is added to the <hitlist>. The pipeline
-* accumulates beancounting information about how many comparisons
-* flow through the pipeline while it's active.
-*
-* Returns: <eslOK> on success. If a significant hit is obtained,
-* its information is added to the growing <hitlist>.
-*
- * <eslEINVAL> if (in a scan pipeline) we're supposed to
- * set GA/TC/NC bit score thresholds but the model doesn't
- * have any.
- *
-* <eslERANGE> on numerical overflow errors in the
-* optimized vector implementations; particularly in
-* posterior decoding. I don't believe this is possible for
-* multihit local models, but I'm set up to catch it
-* anyway. We may emit a warning to the user, but cleanly
-* skip the problematic sequence and continue.
-*
-* Throws: (no abnormal error conditions)
-*
-* Xref: J4/25.
-*/
-
-int
-p7_Pipeline(P7_PIPELINE *pli, P7_OPROFILE *om, P7_BG *bg, const ESL_SQ *sq, P7_TOPHITS *hitlist, int percentPerFilters,
- U2::TaskStateInfo & ti, int wholeSeqSz )
-{
- P7_HIT *hit = NULL; /* ptr to the current hit output data */
- float usc, vfsc, fwdsc; /* filter scores */
- float filtersc; /* HMM null filter score */
- float nullsc; /* null model score */
- float seqbias;
- float seq_score; /* the corrected per-seq bit score */
- float sum_score; /* the corrected reconstruction score for the seq */
- float pre_score, pre2_score; /* uncorrected bit scores for seq */
- double P; /* P-value of a hit */
- int Ld; /* # of residues in envelopes */
- int d;
- int status;
-
- // ! CODE ADDED !
- assert( 0 < percentPerFilters );
- if( ti.cancelFlag ) { return eslCANCELED; }
-
- if (sq->n == 0) return eslOK; /* silently skip length 0 seqs; they'd cause us all sorts of weird problems */
-
- p7_omx_GrowTo(pli->oxf, om->M, 0, sq->n); /* expand the one-row omx if needed */
-
- /* Base null model score (we could calculate this in NewSeq(), for a scan pipeline) */
- // !!! CODE CHANGED !!!
- //p7_bg_NullOne (bg, sq->dsq, sq->n, &nullsc);
- p7_bg_NullOne (bg, sq->dsq, wholeSeqSz, &nullsc);
-
- /* First level filter: the MSV filter, multihit with <om> */
- // ! CODE ADDED !
- status = p7_MSVFilter(sq->dsq, sq->n, om, pli->oxf, &usc, percentPerFilters, ti );
- if( eslCANCELED == status ) { return eslCANCELED; }
-
- seq_score = (usc - nullsc) / eslCONST_LOG2;
- P = esl_gumbel_surv(seq_score, om->evparam[p7_MMU], om->evparam[p7_MLAMBDA]);
- if (P > pli->F1) return eslOK;
- pli->n_past_msv++;
-
- /* biased composition HMM filtering */
- if (pli->do_biasfilter)
- {
- p7_bg_FilterScore(bg, sq->dsq, sq->n, &filtersc);
- seq_score = (usc - filtersc) / eslCONST_LOG2;
- P = esl_gumbel_surv(seq_score, om->evparam[p7_MMU], om->evparam[p7_MLAMBDA]);
- if (P > pli->F1) return eslOK;
- }
- else filtersc = nullsc;
- pli->n_past_bias++;
-
- // !!! CODE DELETED !!!
-
- /* Second level filter: ViterbiFilter(), multihit with <om> */
- if (P > pli->F2)
- {
- // ! CODE ADDED !
- p7_ViterbiFilter(sq->dsq, sq->n, om, pli->oxf, &vfsc, percentPerFilters, ti );
- if( eslCANCELED == status ) { return eslCANCELED; }
-
- seq_score = (vfsc-filtersc) / eslCONST_LOG2;
- P = esl_gumbel_surv(seq_score, om->evparam[p7_VMU], om->evparam[p7_VLAMBDA]);
- if (P > pli->F2) return eslOK;
- }
- pli->n_past_vit++;
-
- /* Parse it with Forward and obtain its real Forward score. */
- // ! CODE ADDED !
- p7_ForwardParser(sq->dsq, sq->n, om, pli->oxf, &fwdsc, percentPerFilters, ti );
- if( eslCANCELED == status ) { return eslCANCELED; }
-
- seq_score = (fwdsc-filtersc) / eslCONST_LOG2;
- P = esl_exp_surv(seq_score, om->evparam[p7_FTAU], om->evparam[p7_FLAMBDA]);
- if (P > pli->F3) return eslOK;
- pli->n_past_fwd++;
-
- /* ok, it's for real. Now a Backwards parser pass, and hand it to domain definition workflow */
- p7_omx_GrowTo(pli->oxb, om->M, 0, sq->n);
- // ! CODE ADDED !
- p7_BackwardParser(sq->dsq, sq->n, om, pli->oxf, pli->oxb, NULL, percentPerFilters, ti );
- if( eslCANCELED == status ) { return eslCANCELED; }
-
- status = p7_domaindef_ByPosteriorHeuristics(sq, om, pli->oxf, pli->oxb, pli->fwd, pli->bck, pli->ddef, percentPerFilters, ti, wholeSeqSz );
- if( eslCANCELED == status ) { return eslCANCELED; }
- // ! CODE CHANGED: ESL_FAIL was here !
- else if (status != eslOK) ESL_EXCEPTION(status, pli->errbuf, "domain definition workflow failure"); /* eslERANGE can happen */
-
- if (pli->ddef->nregions == 0) return eslOK; /* score passed threshold but there's no discrete domains here */
- if (pli->ddef->nenvelopes == 0) return eslOK; /* rarer: region was found, stochastic clustered, no envelopes found */
-
-
- /* Calculate the null2-corrected per-seq score */
- if (pli->do_null2)
- {
- seqbias = esl_vec_FSum(pli->ddef->n2sc, sq->n+1);
- seqbias = p7_FLogsum(0.0, log((double)(bg->omega)) + seqbias);
- }
- else seqbias = 0.0;
- pre_score = (fwdsc - nullsc) / eslCONST_LOG2;
- seq_score = (fwdsc - (nullsc + seqbias)) / eslCONST_LOG2;
-
-
- /* Calculate the "reconstruction score": estimated
- * per-sequence score as sum of individual domains,
- * discounting domains that aren't significant after they're
- * null-corrected.
- */
- sum_score = 0.0f;
- seqbias = 0.0f;
- Ld = 0;
- if (pli->do_null2)
- {
- for (d = 0; d < pli->ddef->ndom; d++)
- {
- if (pli->ddef->dcl[d].envsc - pli->ddef->dcl[d].domcorrection > 0.0)
- {
- sum_score += pli->ddef->dcl[d].envsc;
- Ld += pli->ddef->dcl[d].jenv - pli->ddef->dcl[d].ienv + 1;
- seqbias += pli->ddef->dcl[d].domcorrection;
- }
- }
- seqbias = p7_FLogsum(0.0, log((double)(bg->omega)) + seqbias);
- }
- else
- {
- for (d = 0; d < pli->ddef->ndom; d++)
- {
- if (pli->ddef->dcl[d].envsc > 0.0)
- {
- sum_score += pli->ddef->dcl[d].envsc;
- Ld += pli->ddef->dcl[d].jenv - pli->ddef->dcl[d].ienv + 1;
- }
- }
- seqbias = 0.0;
- }
- //sum_score += (sq->n-Ld) * log((double)((float) sq->n / (float) (sq->n+3)));
- sum_score += (sq->n-Ld) * log((double)((float) wholeSeqSz / (float) (wholeSeqSz+3)));
- pre2_score = (sum_score - nullsc) / eslCONST_LOG2;
- sum_score = (sum_score - (nullsc + seqbias)) / eslCONST_LOG2;
-
- /* A special case: let sum_score override the seq_score when it's better, and it includes at least 1 domain */
- if (Ld > 0 && sum_score > seq_score)
- {
- seq_score = sum_score;
- pre_score = pre2_score;
- }
-
- /* Apply thresholding and determine whether to put this
- * target into the hit list. E-value thresholding may
- * only be a lower bound for now, so this list may be longer
- * than eventually reported.
- */
- P = esl_exp_surv (seq_score, om->evparam[p7_FTAU], om->evparam[p7_FLAMBDA]);
- if (p7_pli_TargetReportable(pli, seq_score, P))
- {
- p7_tophits_CreateNextHit(hitlist, &hit);
- if (pli->mode == p7_SEARCH_SEQS) {
- if ( (status = esl_strdup(sq->name, -1, &(hit->name))) != eslOK) ESL_EXCEPTION(eslEMEM, "allocation failure");
- if (sq->acc[0] != '\0' && (status = esl_strdup(sq->acc, -1, &(hit->acc))) != eslOK) ESL_EXCEPTION(eslEMEM, "allocation failure");
- if (sq->desc[0] != '\0' && (status = esl_strdup(sq->desc, -1, &(hit->desc))) != eslOK) ESL_EXCEPTION(eslEMEM, "allocation failure");
- } else {
- if ((status = esl_strdup(om->name, -1, &(hit->name))) != eslOK) ESL_EXCEPTION(eslEMEM, "allocation failure");
- if ((status = esl_strdup(om->acc, -1, &(hit->acc))) != eslOK) ESL_EXCEPTION(eslEMEM, "allocation failure");
- if ((status = esl_strdup(om->desc, -1, &(hit->desc))) != eslOK) ESL_EXCEPTION(eslEMEM, "allocation failure");
- }
- hit->ndom = pli->ddef->ndom;
- hit->nexpected = pli->ddef->nexpected;
- hit->nregions = pli->ddef->nregions;
- hit->nclustered = pli->ddef->nclustered;
- hit->noverlaps = pli->ddef->noverlaps;
- hit->nenvelopes = pli->ddef->nenvelopes;
-
- hit->pre_score = pre_score;
- hit->pre_pvalue = esl_exp_surv (hit->pre_score, om->evparam[p7_FTAU], om->evparam[p7_FLAMBDA]);
-
- hit->score = seq_score;
- hit->pvalue = P;
- hit->sortkey = pli->inc_by_E ? -log(P) : seq_score; /* per-seq output sorts on bit score if inclusion is by score */
-
- hit->sum_score = sum_score;
- hit->sum_pvalue = esl_exp_surv (hit->sum_score, om->evparam[p7_FTAU], om->evparam[p7_FLAMBDA]);
-
- /* Transfer all domain coordinates (unthresholded for
- * now) with their alignment displays to the hit list,
- * associated with the sequence. Domain reporting will
- * be thresholded after complete hit list is collected,
- * because we probably need to know # of significant
- * hits found to set domZ, and thence threshold and
- * count reported domains.
- */
- hit->dcl = pli->ddef->dcl;
- pli->ddef->dcl = NULL;
- hit->best_domain = 0;
- for (d = 0; d < hit->ndom; d++)
- {
- Ld = hit->dcl[d].jenv - hit->dcl[d].ienv + 1;
- // !!! CODE CHANGED !!!
- //hit->dcl[d].bitscore = hit->dcl[d].envsc + (sq->n-Ld) * log((float) sq->n / (float) (sq->n+3));
- hit->dcl[d].bitscore = hit->dcl[d].envsc + (wholeSeqSz-Ld) * log((double) wholeSeqSz / (double) (wholeSeqSz+3));
- hit->dcl[d].dombias = (pli->do_null2 ? p7_FLogsum(0.0, log((double)bg->omega) + hit->dcl[d].domcorrection) : 0.0);
- hit->dcl[d].bitscore = (hit->dcl[d].bitscore - (nullsc + hit->dcl[d].dombias)) / eslCONST_LOG2;
- hit->dcl[d].pvalue = esl_exp_surv (hit->dcl[d].bitscore, om->evparam[p7_FTAU], om->evparam[p7_FLAMBDA]);
-
- if (hit->dcl[d].bitscore > hit->dcl[hit->best_domain].bitscore) hit->best_domain = d;
- }
-
- /* If we're using model-specific bit score thresholds (GA | TC |
- * NC) and we're in an hmmscan pipeline (mode = p7_SCAN_MODELS),
- * then we *must* apply those reporting or inclusion thresholds
- * now, because this model is about to go away; we won't have
- * its thresholds after all targets have been processed.
- *
- * If we're using E-value thresholds and we don't know the
- * search space size (Z_setby or domZ_setby =
- * p7_ZSETBY_NTARGETS), we *cannot* apply those thresholds now,
- * and we *must* wait until all targets have been processed
- * (see p7_tophits_Threshold()).
- *
- * For any other thresholding, it doesn't matter whether we do
- * it here (model-specifically) or at the end (in
- * p7_tophits_Threshold()).
- *
- * What we actually do, then, is to set the flags if we're using
- * model-specific score thresholds (regardless of whether we're
- * in a scan or a search pipeline); otherwise we leave it to
- * p7_tophits_Threshold(). p7_tophits_Threshold() is always
- * responsible for *counting* the reported, included sequences.
- *
- * [xref J5/92]
- */
- if (pli->use_bit_cutoffs)
- {
- if (p7_pli_TargetReportable(pli, hit->score, hit->pvalue))
- {
- hit->flags |= p7_IS_REPORTED;
- if (p7_pli_TargetIncludable(pli, hit->score, hit->pvalue))
- hit->flags |= p7_IS_INCLUDED;
- }
-
- for (d = 0; d < hit->ndom; d++)
- {
- if (p7_pli_DomainReportable(pli, hit->dcl[d].bitscore, hit->dcl[d].pvalue))
- {
- hit->dcl[d].is_reported = TRUE;
- if (p7_pli_DomainIncludable(pli, hit->dcl[d].bitscore, hit->dcl[d].pvalue))
- hit->dcl[d].is_included = TRUE;
- }
- }
- }
- }
-
- return eslOK;
-}
-
-
-
-// ! here were function p7_pli_Statistics. we don't need it !
-
-/*------------------- end, pipeline API -------------------------*/
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_prior.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/p7_prior.cpp
deleted file mode 100644
index 7de3218..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_prior.cpp
+++ /dev/null
@@ -1,443 +0,0 @@
-/* Mixture Dirichlet priors for profile HMMs.
-*
-*
-* SRE, Sat Mar 24 09:12:44 2007 [Janelia]
-* SVN $Id: p7_prior.c 2670 2008-12-17 14:05:43Z eddys $
-*/
-
-#include <hmmer3/p7_config.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_alphabet.h>
-#include <hmmer3/easel/esl_vectorops.h>
-
-#include <hmmer3/hmmer.h>
-
-
-/* Function: p7_prior_CreateAmino()
-* Incept: SRE, Sat Mar 24 09:35:36 2007 [Janelia]
-*
-* Purpose: Creates the default mixture Dirichlet prior for protein
-* sequences.
-*
-* The transition priors (match, insert, delete) are all
-* single Dirichlets, originally trained by Graeme
-* Mitchison in the mid-1990's. Notes have been lost, but
-* we believe they were trained on an early version of
-* Pfam.
-*
-* The match emission prior is a nine-component mixture
-* from Kimmen Sjolander, who trained it on the Blocks9
-* database \citep{Sjolander96}.
-*
-* The insert emission prior is a single Dirichlet with
-* high $|\alpha|$, such that insert emission probabilities
-* are essentially fixed by the prior, regardless of
-* observed count data. The slightly polar parameterization
-* was obtained by training on Pfam 1.0.
-*
-* Returns: a pointer to the new <P7_PRIOR> structure.
-*/
-P7_PRIOR *
-p7_prior_CreateAmino(void)
-{
- int status;
- P7_PRIOR *pri = NULL;
- int q;
- /* default match mixture coefficients: [Sjolander96] */
- static const double defmq[9] = {
- 0.178091, 0.056591, 0.0960191, 0.0781233, 0.0834977,
- 0.0904123, 0.114468, 0.0682132, 0.234585 };
-
- /* default match mixture Dirichlet components [Sjolander96] */
- static const double defm[9][20] = {
- { 0.270671, 0.039848, 0.017576, 0.016415, 0.014268,
- 0.131916, 0.012391, 0.022599, 0.020358, 0.030727,
- 0.015315, 0.048298, 0.053803, 0.020662, 0.023612,
- 0.216147, 0.147226, 0.065438, 0.003758, 0.009621 },
- { 0.021465, 0.010300, 0.011741, 0.010883, 0.385651,
- 0.016416, 0.076196, 0.035329, 0.013921, 0.093517,
- 0.022034, 0.028593, 0.013086, 0.023011, 0.018866,
- 0.029156, 0.018153, 0.036100, 0.071770, 0.419641 },
- { 0.561459, 0.045448, 0.438366, 0.764167, 0.087364,
- 0.259114, 0.214940, 0.145928, 0.762204, 0.247320,
- 0.118662, 0.441564, 0.174822, 0.530840, 0.465529,
- 0.583402, 0.445586, 0.227050, 0.029510, 0.121090 },
- { 0.070143, 0.011140, 0.019479, 0.094657, 0.013162,
- 0.048038, 0.077000, 0.032939, 0.576639, 0.072293,
- 0.028240, 0.080372, 0.037661, 0.185037, 0.506783,
- 0.073732, 0.071587, 0.042532, 0.011254, 0.028723 },
- { 0.041103, 0.014794, 0.005610, 0.010216, 0.153602,
- 0.007797, 0.007175, 0.299635, 0.010849, 0.999446,
- 0.210189, 0.006127, 0.013021, 0.019798, 0.014509,
- 0.012049, 0.035799, 0.180085, 0.012744, 0.026466 },
- { 0.115607, 0.037381, 0.012414, 0.018179, 0.051778,
- 0.017255, 0.004911, 0.796882, 0.017074, 0.285858,
- 0.075811, 0.014548, 0.015092, 0.011382, 0.012696,
- 0.027535, 0.088333, 0.944340, 0.004373, 0.016741 },
- { 0.093461, 0.004737, 0.387252, 0.347841, 0.010822,
- 0.105877, 0.049776, 0.014963, 0.094276, 0.027761,
- 0.010040, 0.187869, 0.050018, 0.110039, 0.038668,
- 0.119471, 0.065802, 0.025430, 0.003215, 0.018742 },
- { 0.452171, 0.114613, 0.062460, 0.115702, 0.284246,
- 0.140204, 0.100358, 0.550230, 0.143995, 0.700649,
- 0.276580, 0.118569, 0.097470, 0.126673, 0.143634,
- 0.278983, 0.358482, 0.661750, 0.061533, 0.199373 },
- { 0.005193, 0.004039, 0.006722, 0.006121, 0.003468,
- 0.016931, 0.003647, 0.002184, 0.005019, 0.005990,
- 0.001473, 0.004158, 0.009055, 0.003630, 0.006583,
- 0.003172, 0.003690, 0.002967, 0.002772, 0.002686 },
- };
-
- ESL_ALLOC_WITH_TYPE(pri, P7_PRIOR*, sizeof(P7_PRIOR));
- pri->tm = pri->ti = pri->td = pri->em = pri->ei = NULL;
-
- pri->tm = esl_mixdchlet_Create(1, 3); /* single component; 3 params */
- pri->ti = esl_mixdchlet_Create(1, 2); /* single component; 2 params */
- pri->td = esl_mixdchlet_Create(1, 2); /* single component; 2 params */
- pri->em = esl_mixdchlet_Create(9, 20); /* 9 component; 20 params */
- pri->ei = esl_mixdchlet_Create(1, 20); /* single component; 20 params */
-
-
- if (pri->tm == NULL || pri->ti == NULL || pri->td == NULL || pri->em == NULL || pri->ei == NULL) goto ERROR;
-
- /* Transition priors: originally from Graeme Mitchison. Notes are lost, but we believe
- * they were trained on an early version of Pfam.
- */
- pri->tm->pq[0] = 1.0;
- pri->tm->alpha[0][0] = 0.7939; /* TMM */
- pri->tm->alpha[0][1] = 0.0278; /* TMI */ /* Markus suggests ~10x MD, ~0.036; test! */
- pri->tm->alpha[0][2] = 0.0135; /* TMD */ /* Markus suggests 0.1x MI, ~0.004; test! */
-
- pri->ti->pq[0] = 1.0;
- pri->ti->alpha[0][0] = 0.1551; /* TIM */
- pri->ti->alpha[0][1] = 0.1331; /* TII */
-
- pri->td->pq[0] = 1.0;
- pri->td->alpha[0][0] = 0.9002; /* TDM */
- pri->td->alpha[0][1] = 0.5630; /* TDD */
-
- /* Match emission priors are from Kimmen Sjolander, trained
- * on the Blocks9 database. [Sjolander96]
- */
- for (q = 0; q < 9; q++)
- {
- pri->em->pq[q] = defmq[q];
- esl_vec_DCopy(defm[q], 20, pri->em->alpha[q]);
- }
-
- /* Insert emission priors were trained on Pfam 1.0, 10 Nov 1996;
- * see ~/projects/plan7/InsertStatistics.
- * Inserts are slightly biased towards polar residues and away from
- * hydrophobic residues.
- */
- pri->ei->pq[0] = 1.0;
- pri->ei->alpha[0][0] = 681.; /* A */
- pri->ei->alpha[0][1] = 120.; /* C */
- pri->ei->alpha[0][2] = 623.; /* D */
- pri->ei->alpha[0][3] = 651.; /* E */
- pri->ei->alpha[0][4] = 313.; /* F */
- pri->ei->alpha[0][5] = 902.; /* G */
- pri->ei->alpha[0][6] = 241.; /* H */
- pri->ei->alpha[0][7] = 371.; /* I */
- pri->ei->alpha[0][8] = 687.; /* K */
- pri->ei->alpha[0][9] = 676.; /* L */
- pri->ei->alpha[0][10] = 143.; /* M */
- pri->ei->alpha[0][11] = 548.; /* N */
- pri->ei->alpha[0][12] = 647.; /* P */
- pri->ei->alpha[0][13] = 415.; /* Q */
- pri->ei->alpha[0][14] = 551.; /* R */
- pri->ei->alpha[0][15] = 926.; /* S */
- pri->ei->alpha[0][16] = 623.; /* T */
- pri->ei->alpha[0][17] = 505.; /* V */
- pri->ei->alpha[0][18] = 102.; /* W */
- pri->ei->alpha[0][19] = 269.; /* Y */
-
- return pri;
-
-ERROR:
- if (pri != NULL) p7_prior_Destroy(pri);
- return NULL;
-}
-
-
-/* Function: p7_prior_CreateNucleicNew()
-* Incept: TJW, Thu Nov 12 21:15:11 EST 2009 [Couch at home]
-*
-* Purpose: Creates the default mixture Dirichlet prior for nucleotiden
-* sequences.
-*
-* The transition priors (match, insert, delete) are all
-* single Dirichlets, originally trained by Graeme
-* Mitchison in the mid-1990's. Notes have been lost, but
-* we believe they were trained on an early version of
-* Pfam.
-*
-* The match emission prior is an eight-component mixture
-* trained against a portion of the rmark dataset
-*
-* The insert emission prior is a single Dirichlet with
-* high $|\alpha|$, such that insert emission probabilities
-* are essentially fixed by the prior, regardless of
-* observed count data.
-*
-* Returns: a pointer to the new <P7_PRIOR> structure.
-*/
-P7_PRIOR *
-p7_prior_CreateNucleic(void)
-{
- int status;
- P7_PRIOR *pri = NULL;
- int q;
-
-
- /* Match emission priors are trained on rmark database [Nawrocki 08]
- */
-
- /* Plus-1 Laplace prior
- int num_comp = 1;
- static double defmq[2] = { 1.0 };
- static double defm[1][4] = {
- { 1.0, 1.0, 1.0, 1.0} //
- };
- */
- /*
- int num_comp = 2;
- static double defmq[2] = { 0.42, 0.58 };
- static double defm[2][4] = {
- { 0.94, 0.90, 0.89, 1.13}, //
- { 0.096, 0.078, 0.093, 0.089} //
- };
- */
- /*
- //weird - but this performs marginally better than the best 2- 5- or 8-component mixtures tested
- // (on rmark - MER: 2 better than 5/8-comp , 3 better than 2-comp )
- int num_comp = 4;
- static double defmq[4] = { 0.16, 0.29, 0.12, 0.43 };
- static double defm[4][4] = {
- { 0.36, 0.10, 5.3, 0.13}, // G
- { 0.05, 0.18, 0.03, 0.19}, // CT
- { 7.1, 0.13, 0.35, 0.17}, // A
- { 0.96, 0.92, 0.91, 1.19} // uniform
- };
- */
-
- /*On rmark, this model does only slightly better than the 2-component model
- It's chosen as the default on grounds of reasonableness, given that it shows
- a non-uniform transition:transversion ratio. It's based on the results
- of training against a portion of rmark, but the overspecified numbers
- resulting from that training have been rounded/simplified.
- */
- int num_comp = 5;
- static double defmq[5] = { 0.16, 0.13, 0.17, 0.15, 0.39 };
- static double defm[5][4] = {
- { 6.0, 0.2, 0.5, 0.2}, // A
- { 0.2, 8.0, 0.2, 0.5}, // C
- { 0.5, 0.2, 8.0, 0.2}, // G
- { 0.2, 0.5, 0.2, 4.0}, // T
- { 1.3, 1.2, 1.2, 1.4} // uniform
- };
-
-
- /* gives no improved performance in my hands over the 5-component model
- int num_comp = 8;
- static double defmq[8] = { 0.13, 0.08, 0.08, 0.13, 0.08, 0.08, 0.17, 0.25 } ;
- static double defm[8][4] = {
- { 4.0, 0.3, 0.5, 0.4}, // A
- { 0.3, 22.0, 0.3, 0.8}, // C
- { 1.0, 0.4, 28.0, 0.4}, // G
- { 0.5, 0.8, 0.3, 6.0}, // T
- { 1.8, 0.8, 6.0, 1.0}, // AG
- { 0.6, 6.0, 0.6, 2.4}, // CT
- { 0.03, 0.01, 0.02, 0.02}, // anything, but highly conserved
- { 2.0, 2.0, 2.0, 2.0} // anything, not much conservation
- };
- */
-
- ESL_ALLOC_WITH_TYPE(pri, P7_PRIOR*, sizeof(P7_PRIOR));
- pri->tm = pri->ti = pri->td = pri->em = pri->ei = NULL;
-
-
- pri->tm = esl_mixdchlet_Create(1, 3); // match transitions; single component; 3 params
- pri->ti = esl_mixdchlet_Create(1, 2); // insert transitions; single component; 2 params
- pri->td = esl_mixdchlet_Create(1, 2); // delete transitions; single component; 2 params
- pri->em = esl_mixdchlet_Create(num_comp, 4); // match emissions; X component; 4 params
- pri->ei = esl_mixdchlet_Create(1, 4); // insert emissions; single component; 4 params
-
-
- if (pri->tm == NULL || pri->ti == NULL || pri->td == NULL || pri->em == NULL || pri->ei == NULL) goto ERROR;
-
- /* Transition priors: roughly, learned from rmark benchmark - hand-beautified (trimming overspecified significant digits)
- */
- pri->tm->pq[0] = 1.0;
- pri->tm->alpha[0][0] = 2.0; // TMM
- pri->tm->alpha[0][1] = 0.1; // TMI
- pri->tm->alpha[0][2] = 0.1; // TMD
-
-
- pri->ti->pq[0] = 1.0;
- pri->ti->alpha[0][0] = 0.06; // TIM
- pri->ti->alpha[0][1] = 0.2; // TII
-
- pri->td->pq[0] = 1.0;
- pri->td->alpha[0][0] = 0.1; // TDM
- pri->td->alpha[0][1] = 0.2; // TDD
-
-
-
- /* Match emission priors */
- for (q = 0; q < num_comp; q++)
- {
- pri->em->pq[q] = defmq[q];
- esl_vec_DCopy(defm[q], 4, pri->em->alpha[q]);
- }
-
-
- /* Insert emission priors. Should that alphas be lower? higher?
- */
- pri->ei->pq[0] = 1.0;
- esl_vec_DSet(pri->ei->alpha[0], 4, 1.0);
-
- return pri;
-
-ERROR:
- if (pri != NULL) p7_prior_Destroy(pri);
- return NULL;
-}
-
-/* Function: p7_prior_CreateLaplace()
-* Synopsis: Creates Laplace plus-one prior.
-* Incept: SRE, Sat Jun 30 09:48:13 2007 [Janelia]
-*
-* Purpose: Create a Laplace plus-one prior for alphabet <abc>.
-*/
-P7_PRIOR *
-p7_prior_CreateLaplace(const ESL_ALPHABET *abc)
-{
- P7_PRIOR *pri = NULL;
- int status;
-
- ESL_ALLOC_WITH_TYPE(pri, P7_PRIOR*, sizeof(P7_PRIOR));
- pri->tm = pri->ti = pri->td = pri->em = pri->ei = NULL;
-
- pri->tm = esl_mixdchlet_Create(1, 3); /* single component; 3 params */
- pri->ti = esl_mixdchlet_Create(1, 2); /* single component; 2 params */
- pri->td = esl_mixdchlet_Create(1, 2); /* single component; 2 params */
- pri->em = esl_mixdchlet_Create(1, abc->K); /* single component; K params */
- pri->ei = esl_mixdchlet_Create(1, abc->K); /* single component; K params */
-
- if (pri->tm == NULL || pri->ti == NULL || pri->td == NULL || pri->em == NULL || pri->ei == NULL) goto ERROR;
-
- pri->tm->pq[0] = 1.0; esl_vec_DSet(pri->tm->alpha[0], 3, 1.0); /* match transitions */
- pri->ti->pq[0] = 1.0; esl_vec_DSet(pri->ti->alpha[0], 2, 1.0); /* insert transitions */
- pri->td->pq[0] = 1.0; esl_vec_DSet(pri->td->alpha[0], 2, 1.0); /* delete transitions */
- pri->em->pq[0] = 1.0; esl_vec_DSet(pri->em->alpha[0], abc->K, 1.0); /* match emissions */
- pri->ei->pq[0] = 1.0; esl_vec_DSet(pri->ei->alpha[0], abc->K, 1.0); /* insert emissions */
- return pri;
-
-ERROR:
- p7_prior_Destroy(pri);
- return NULL;
-}
-
-
-/* Function: p7_prior_Destroy()
-* Incept: SRE, Sat Mar 24 09:55:09 2007 [Janelia]
-*
-* Purpose: Frees a mixture Dirichlet prior.
-*/
-void
-p7_prior_Destroy(P7_PRIOR *pri)
-{
- if (pri == NULL) return;
- if (pri->tm != NULL) esl_mixdchlet_Destroy(pri->tm);
- if (pri->ti != NULL) esl_mixdchlet_Destroy(pri->ti);
- if (pri->td != NULL) esl_mixdchlet_Destroy(pri->td);
- if (pri->em != NULL) esl_mixdchlet_Destroy(pri->em);
- if (pri->ei != NULL) esl_mixdchlet_Destroy(pri->ei);
- free(pri);
-}
-
-
-
-/* Function: p7_ParameterEstimation()
-* Incept: SRE, Sat Mar 24 10:15:37 2007 [Janelia]
-*
-* Purpose: Given an <hmm> containing collected, weighted counts;
-* and given a mixture Dirichlet prior <pri>;
-* calculate mean posterior parameter estimates for
-* all model parameters, converting the
-* HMM to a parameterized probabilistic model.
-*
-* Returns: <eslOK> on success.
-*/
-int
-p7_ParameterEstimation(P7_HMM *hmm, const P7_PRIOR *pri)
-{
- int k;
- double c[p7_MAXABET];
- double p[p7_MAXABET];
- double mix[p7_MAXDCHLET];
-
- /* Match transitions 0,1..M: 0 is the B state
- * TMD at node M is 0.
- */
- for (k = 0; k <= hmm->M; k++) {
- esl_vec_F2D(hmm->t[k], 3, c);
- esl_mixdchlet_MPParameters(c, 3, pri->tm, mix, p);
- esl_vec_D2F(p, 3, hmm->t[k]);
- }
- hmm->t[hmm->M][p7H_MD] = 0.0;
- esl_vec_FNorm(hmm->t[hmm->M], 3);
-
- /* Insert transitions, 0..M
- */
- for (k = 0; k <= hmm->M; k++) {
- esl_vec_F2D(hmm->t[k]+3, 2, c);
- esl_mixdchlet_MPParameters(c, 2, pri->ti, mix, p);
- esl_vec_D2F(p, 2, hmm->t[k]+3);
- }
-
- /* Delete transitions, 1..M-1
- * For k=0, which is unused; convention sets TMM=1.0, TMD=0.0
- * For k=M, TMM = 1.0 (to the E state) and TMD=0.0 (no next D; must go to E).
- */
- for (k = 1; k < hmm->M; k++) {
- esl_vec_F2D(hmm->t[k]+5, 2, c);
- esl_mixdchlet_MPParameters(c, 2, pri->td, mix, p);
- esl_vec_D2F(p, 2, hmm->t[k]+5);
- }
- hmm->t[0][p7H_DM] = hmm->t[hmm->M][p7H_DM] = 1.0;
- hmm->t[0][p7H_DD] = hmm->t[hmm->M][p7H_DD] = 0.0;
-
- /* Match emissions, 1..M
- * Convention sets mat[0] to a valid pvector: first elem 1, the rest 0.
- */
- for (k = 1; k <= hmm->M; k++) {
- esl_vec_F2D(hmm->mat[k], hmm->abc->K, c);
- esl_mixdchlet_MPParameters(c, hmm->abc->K, pri->em, mix, p);
- esl_vec_D2F(p, hmm->abc->K, hmm->mat[k]);
- }
- esl_vec_FSet(hmm->mat[0], hmm->abc->K, 0.);
- hmm->mat[0][0] = 1.0;
-
- /* Insert emissions 0..M
- */
- for (k = 0; k <= hmm->M; k++) {
- esl_vec_F2D(hmm->ins[k], hmm->abc->K, c);
- esl_mixdchlet_MPParameters(c, hmm->abc->K, pri->ei, mix, p);
- esl_vec_D2F(p, hmm->abc->K, hmm->ins[k]);
- }
- return eslOK;
-}
-
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_profile.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/p7_profile.cpp
deleted file mode 100644
index e7a2bcb..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_profile.cpp
+++ /dev/null
@@ -1,608 +0,0 @@
-/* Routines for the P7_PROFILE structure - Plan 7's search profile
-*
-* 1. The P7_PROFILE object: allocation, initialization, destruction.
-* 2. Access methods.
-* 3. Debugging and development code.
-* 4. Unit tests.
-* 5. Test driver.
-*
-* See also:
-* modelconfig.c : routines that configure a profile given an HMM
-*
-* SRE, Thu Jan 11 15:16:47 2007 [Janelia] [Sufjan Stevens, Illinois]
- * SVN $Id: p7_profile.c 3041 2009-11-12 12:58:09Z eddys $
-*/
-
-#include <hmmer3/p7_config.h>
-
-#include <math.h>
-
-#include <string.h>
-#ifdef HAVE_MPI
-#include <mpi.h>
-#endif
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_vectorops.h>
-
-#include <hmmer3/hmmer.h>
-
-
-/*****************************************************************
-* 1. The P7_PROFILE object: allocation, initialization, destruction.
-*****************************************************************/
-
-/* Function: p7_profile_Create()
-* Synopsis: Allocates a profile.
-* Incept: SRE, Thu Jan 11 15:53:28 2007 [Janelia]
-*
-* Purpose: Allocates for a profile of up to <M> nodes, for digital
-* alphabet <abc>.
-*
-* Because this function might be in the critical path (in
-* hmmscan, for example), we leave much of the model
-* unintialized, including scores and length model
-* probabilities. The <p7_ProfileConfig()> call is what
-* sets these.
-*
-* The alignment mode is set to <p7_NO_MODE>. The
-* reference pointer <gm->abc> is set to <abc>.
-*
-* Returns: a pointer to the new profile.
-*
-* Throws: <NULL> on allocation error.
-*
-* Xref: STL11/125.
-*/
-P7_PROFILE *
-p7_profile_Create(int allocM, const ESL_ALPHABET *abc)
-{
- P7_PROFILE *gm = NULL;
- int x;
- int status;
-
- /* level 0 */
- ESL_ALLOC_WITH_TYPE(gm, P7_PROFILE*, sizeof(P7_PROFILE));
- gm->tsc = NULL;
- gm->rsc = NULL;
- gm->rf = NULL;
- gm->cs = NULL;
- gm->consensus = NULL;
-
- /* level 1 */
- ESL_ALLOC_WITH_TYPE(gm->tsc, float*, sizeof(float) * allocM * p7P_NTRANS);
- ESL_ALLOC_WITH_TYPE(gm->rsc, float**, sizeof(float *) * abc->Kp);
- ESL_ALLOC_WITH_TYPE(gm->rf, char*, sizeof(char) * (allocM+2)); /* yes, +2: each is (0)1..M, +trailing \0 */
- ESL_ALLOC_WITH_TYPE(gm->cs, char*, sizeof(char) * (allocM+2));
- ESL_ALLOC_WITH_TYPE(gm->consensus, char*, sizeof(char) * (allocM+2));
- gm->rsc[0] = NULL;
-
- /* level 2 */
- ESL_ALLOC_WITH_TYPE(gm->rsc[0], float*, sizeof(float) * abc->Kp * (allocM+1) * p7P_NR);
- for (x = 1; x < abc->Kp; x++)
- gm->rsc[x] = gm->rsc[0] + x * (allocM+1) * p7P_NR;
-
- /* Initialize some edge pieces of memory that are never used,
- * and are only present for indexing convenience.
- */
- esl_vec_FSet(gm->tsc, p7P_NTRANS, -eslINFINITY); /* node 0 nonexistent, has no transitions */
- if (allocM > 1) {
- p7P_TSC(gm, 1, p7P_DM) = -eslINFINITY; /* delete state D_1 is wing-retracted */
- p7P_TSC(gm, 1, p7P_DD) = -eslINFINITY;
- }
- for (x = 0; x < abc->Kp; x++) {
- p7P_MSC(gm, 0, x) = -eslINFINITY; /* no emissions from nonexistent M_0... */
- p7P_ISC(gm, 0, x) = -eslINFINITY; /* or I_0... */
- /* I_M is initialized in profile config, when we know actual M, not just allocated max M */
- }
- x = esl_abc_XGetGap(abc); /* no emission can emit/score gap characters */
- esl_vec_FSet(gm->rsc[x], (allocM+1)*p7P_NR, -eslINFINITY);
- x = esl_abc_XGetMissing(abc); /* no emission can emit/score missing data characters */
- esl_vec_FSet(gm->rsc[x], (allocM+1)*p7P_NR, -eslINFINITY);
-
- /* Set remaining info */
- gm->mode = p7_NO_MODE;
- gm->L = 0;
- gm->allocM = allocM;
- gm->M = 0;
- gm->nj = 0.0f;
-
- gm->roff = -1;
- gm->eoff = -1;
- gm->offs[p7_MOFFSET] = -1;
- gm->offs[p7_FOFFSET] = -1;
- gm->offs[p7_POFFSET] = -1;
-
- gm->name = NULL;
- gm->acc = NULL;
- gm->desc = NULL;
- gm->rf[0] = 0; /* RF line is optional annotation; this flags that it's not set yet */
- gm->cs[0] = 0; /* likewise for CS annotation line */
- gm->consensus[0] = 0;
-
- for (x = 0; x < p7_NEVPARAM; x++) gm->evparam[x] = p7_EVPARAM_UNSET;
- for (x = 0; x < p7_NCUTOFFS; x++) gm->cutoff[x] = p7_CUTOFF_UNSET;
- for (x = 0; x < p7_MAXABET; x++) gm->compo[x] = p7_COMPO_UNSET;
-
- gm->abc = abc;
- return gm;
-
-ERROR:
- p7_profile_Destroy(gm);
- return NULL;
-}
-
-
-/* Function: p7_profile_Copy()
-* Synopsis: Copy a profile.
-* Incept: SRE, Sun Feb 17 10:27:37 2008 [Janelia]
-*
-* Purpose: Copies profile <src> to profile <dst>, where <dst>
-* has already been allocated to be of sufficient size.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation error; <eslEINVAL> if <dst> is too small
-* to fit <src>.
-*/
-int
-p7_profile_Copy(const P7_PROFILE *src, P7_PROFILE *dst)
-{
- int x,z;
- int status;
-
- if (src->M > dst->allocM) ESL_EXCEPTION(eslEINVAL, "destination profile is too small to hold a copy of source profile");
-
- esl_vec_FCopy(src->tsc, src->M*p7P_NTRANS, dst->tsc);
- for (x = 0; x < src->abc->Kp; x++) esl_vec_FCopy(src->rsc[x], (src->M+1)*p7P_NR, dst->rsc[x]);
- for (x = 0; x < p7P_NXSTATES; x++) esl_vec_FCopy(src->xsc[x], p7P_NXTRANS, dst->xsc[x]);
-
- dst->mode = src->mode;
- dst->L = src->L;
- dst->allocM = src->allocM;
- dst->M = src->M;
- dst->nj = src->nj;
-
- dst->roff = src->roff;
- dst->eoff = src->eoff;
- for (x = 0; x < p7_NOFFSETS; ++x) dst->offs[x] = src->offs[x];
-
- if (dst->name != NULL) free(dst->name);
- if (dst->acc != NULL) free(dst->acc);
- if (dst->desc != NULL) free(dst->desc);
-
- if ((status = esl_strdup(src->name, -1, &(dst->name))) != eslOK) return status;
- if ((status = esl_strdup(src->acc, -1, &(dst->acc))) != eslOK) return status;
- if ((status = esl_strdup(src->desc, -1, &(dst->desc))) != eslOK) return status;
-
- strcpy(dst->rf, src->rf); /* RF is optional: if it's not set, *rf=0, and strcpy still works fine */
- strcpy(dst->cs, src->cs); /* CS is also optional annotation */
- strcpy(dst->consensus, src->consensus); /* consensus though is always present on a valid profile */
-
- for (z = 0; z < p7_NEVPARAM; z++) dst->evparam[z] = src->evparam[z];
- for (z = 0; z < p7_NCUTOFFS; z++) dst->cutoff[z] = src->cutoff[z];
- for (z = 0; z < p7_MAXABET; z++) dst->compo[z] = src->compo[z];
- return eslOK;
-}
-
-
-/* Function: p7_profile_Clone()
-* Synopsis: Duplicates a profile.
-* Incept: SRE, Mon Jun 25 08:29:23 2007 [Janelia]
-*
-* Purpose: Duplicate profile <gm>; return a pointer
-* to the newly allocated copy.
-*/
-P7_PROFILE *
-p7_profile_Clone(const P7_PROFILE *gm)
-{
- P7_PROFILE *g2 = NULL;
- int status;
-
- if ((g2 = p7_profile_Create(gm->allocM, gm->abc)) == NULL) return NULL;
- if ((status = p7_profile_Copy(gm, g2)) != eslOK) goto ERROR;
- return g2;
-
-ERROR:
- p7_profile_Destroy(g2);
- return NULL;
-}
-
-
-
-/* Function: p7_profile_SetNullEmissions()
-* Synopsis: Set all emission scores to zero (experimental).
-* Incept: SRE, Mon Jun 25 08:12:06 2007 [Janelia]
-*
-* Purpose: Set all emission scores in profile <gm> to zero.
-* This makes the profile a null model, with all the same
-* length distributions as the original model, but
-* the emission probabilities of the background.
-*
-* Written to test the idea that score statistics will be
-* even better behaved when using a null model with the
-* same length distribution as the search model.
-*
-* Returns: <eslOK> on success.
-*/
-int
-p7_profile_SetNullEmissions(P7_PROFILE *gm)
-{
- int x;
- for (x = 0; x <= gm->abc->K; x++) esl_vec_FSet(gm->rsc[x], (gm->M+1)*p7P_NR, 0.0); /* canonicals */
- for (x = gm->abc->K+1; x <= gm->abc->Kp-3; x++) esl_vec_FSet(gm->rsc[x], (gm->M+1)*p7P_NR, 0.0); /* noncanonicals */
- return eslOK;
-}
-
-
-/* Function: p7_profile_Reuse()
-* Synopsis: Prepare profile to be re-used for a new HMM.
-* Incept: SRE, Wed Jan 2 17:32:36 2008 [Janelia]
-*
-* Purpose: Prepare profile <gm>'s memory to be re-used
-* for a new HMM.
-*/
-int
-p7_profile_Reuse(P7_PROFILE *gm)
-{
- /* name, acc, desc annotation is dynamically allocated for each HMM */
- if (gm->name != NULL) { free(gm->name); gm->name = NULL; }
- if (gm->acc != NULL) { free(gm->acc); gm->acc = NULL; }
- if (gm->desc != NULL) { free(gm->desc); gm->desc = NULL; }
-
- /* set annotations to empty strings */
- gm->rf[0] = 0;
- gm->cs[0] = 0;
- gm->consensus[0] = 0;
-
- /* reset some other things, but leave the rest alone. */
- gm->mode = p7_NO_MODE;
- gm->L = 0;
- gm->M = 0;
- gm->nj = 0.0f;
-
- gm->roff = -1;
- gm->eoff = -1;
- gm->offs[p7_MOFFSET] = -1;
- gm->offs[p7_FOFFSET] = -1;
- gm->offs[p7_POFFSET] = -1;
-
- return eslOK;
-}
-
-
-/* Function: p7_profile_Destroy()
-* Synopsis: Frees a profile.
-* Incept: SRE, Thu Jan 11 15:54:17 2007 [Janelia]
-*
-* Purpose: Frees a profile <gm>.
-*
-* Returns: (void).
-*
-* Xref: STL11/125.
-*/
-void
-p7_profile_Destroy(P7_PROFILE *gm)
-{
- if (gm != NULL) {
- if (gm->rsc != NULL && gm->rsc[0] != NULL) free(gm->rsc[0]);
- if (gm->tsc != NULL) free(gm->tsc);
- if (gm->rsc != NULL) free(gm->rsc);
- if (gm->name != NULL) free(gm->name);
- if (gm->acc != NULL) free(gm->acc);
- if (gm->desc != NULL) free(gm->desc);
- if (gm->rf != NULL) free(gm->rf);
- if (gm->cs != NULL) free(gm->cs);
- if (gm->consensus != NULL) free(gm->consensus);
- free(gm);
- }
- return;
-}
-
-
-/*****************************************************************
-* 2. Access methods.
-*****************************************************************/
-
-/* Function: p7_profile_IsLocal()
-* Synopsis: Return TRUE if profile is in a local alignment mode.
-* Incept: SRE, Thu Jul 12 11:57:49 2007 [Janelia]
-*
-* Purpose: Return <TRUE> if profile is in a local alignment mode.
-*/
-int
-p7_profile_IsLocal(const P7_PROFILE *gm)
-{
- if (gm->mode == p7_UNILOCAL || gm->mode == p7_LOCAL) return TRUE;
- return FALSE;
-}
-
-/* Function: p7_profile_IsMultihit()
-* Synopsis: Return TRUE if profile is in a multihit alignment mode.
-* Incept: SRE, Thu Jul 12 11:58:58 2007 [Janelia]
-*
-* Purpose: Return <TRUE> if profile is in a multihit alignment mode.
-*/
-int
-p7_profile_IsMultihit(const P7_PROFILE *gm)
-{
- if (gm->mode == p7_LOCAL || gm->mode == p7_GLOCAL) return TRUE;
- return FALSE;
-}
-
-
-
-
-/* Function: p7_profile_GetTScore()
-* Incept: SRE, Wed Apr 12 14:20:18 2006 [St. Louis]
-*
-* Purpose: Convenience function that looks up a transition score in
-* profile <gm> for a transition from state type <st1> in
-* node <k1> to state type <st2> in node <k2>. For unique
-* state types that aren't in nodes (<p7T_S>, for example), the
-* <k> value is ignored, though it would be customarily passed as 0.
-* Return the transition score in <ret_tsc>.
-*
- * This function would almost always be called on profile
- * traces, of course, but it's possible to call it
- * on core traces (for example, if you were to try to
- * trace_Dump() during HMM construction, and you wanted
- * to see detailed profile scores for that trace). Core traces
- * can contain <p7T_X> "states" used solely to signal
- * a sequence fragment, treated as missing data. Transitions
- * involving <p7T_X> states are assigned zero score here.
- * This is safe, because we would only ever use this number
- * for display, not as a log probability somewhere.
- *
-* Returns: <eslOK> on success, and <*ret_tsc> contains the requested
-* transition score.
-*
-* Throws: <eslEINVAL> if a nonexistent transition is requested. Now
-* <*ret_tsc> is set to $-\infty$.
- *
-*/
-int
-p7_profile_GetT(const P7_PROFILE *gm, char st1, int k1, char st2, int k2, float *ret_tsc)
-{
- float tsc = 0.0f;
- int status;
-
- /* Detect transitions that can only come from core traces;
- * return 0.0 as a special case (this is only done for displaying
- * "scores" in trace dumps, during debugging.)
- */
- if (st1 == p7T_X || st2 == p7T_X) return eslOK;
- if (st1 == p7T_B && st2 == p7T_I) return eslOK;
- if (st1 == p7T_B && st2 == p7T_D) return eslOK;
- if (st1 == p7T_I && st1 == p7T_E) return eslOK;
-
- /* Now we're sure this is a profile trace, as it should usually be. */
- switch (st1) {
- case p7T_S: break;
- case p7T_T: break;
- case p7T_N:
- switch (st2) {
- case p7T_B: tsc = gm->xsc[p7P_N][p7P_MOVE]; break;
- case p7T_N: tsc = gm->xsc[p7P_N][p7P_LOOP]; break;
- default: ESL_XEXCEPTION(eslEINVAL, "bad transition %s->%s", p7_hmm_DecodeStatetype(st1), p7_hmm_DecodeStatetype(st2));
- }
- break;
-
- case p7T_B:
- switch (st2) {
- case p7T_M: tsc = p7P_TSC(gm, k2-1, p7P_BM); break; /* remember, B->Mk is stored in [k-1][p7P_BM] */
- default: ESL_XEXCEPTION(eslEINVAL, "bad transition %s->%s", p7_hmm_DecodeStatetype(st1), p7_hmm_DecodeStatetype(st2));
- }
- break;
-
- case p7T_M:
- switch (st2) {
- case p7T_M: tsc = p7P_TSC(gm, k1, p7P_MM); break;
- case p7T_I: tsc = p7P_TSC(gm, k1, p7P_MI); break;
- case p7T_D: tsc = p7P_TSC(gm, k1, p7P_MD); break;
- case p7T_E:
- if (k1 != gm->M && ! p7_profile_IsLocal(gm)) ESL_EXCEPTION(eslEINVAL, "local end transition (M%d of %d) in non-local model", k1, gm->M);
- tsc = 0.0f; /* by def'n in H3 local alignment */
- break;
- default: ESL_XEXCEPTION(eslEINVAL, "bad transition %s_%d->%s", p7_hmm_DecodeStatetype(st1), k1, p7_hmm_DecodeStatetype(st2));
- }
- break;
-
- case p7T_D:
- switch (st2) {
- case p7T_M: tsc = p7P_TSC(gm, k1, p7P_DM); break;
- case p7T_D: tsc = p7P_TSC(gm, k1, p7P_DD); break;
- case p7T_E:
- if (k1 != gm->M && ! p7_profile_IsLocal(gm)) ESL_EXCEPTION(eslEINVAL, "local end transition (D%d of %d) in non-local model", k1, gm->M);
- tsc = 0.0f; /* by def'n in H3 local alignment */
- break;
- default: ESL_XEXCEPTION(eslEINVAL, "bad transition %s_%d->%s", p7_hmm_DecodeStatetype(st1), k1, p7_hmm_DecodeStatetype(st2));
- }
- break;
-
- case p7T_I:
- switch (st2) {
- case p7T_M: tsc = p7P_TSC(gm, k1, p7P_IM); break;
- case p7T_I: tsc = p7P_TSC(gm, k1, p7P_II); break;
- default: ESL_XEXCEPTION(eslEINVAL, "bad transition %s_%d->%s", p7_hmm_DecodeStatetype(st1), k1, p7_hmm_DecodeStatetype(st2));
- }
- break;
-
- case p7T_E:
- switch (st2) {
- case p7T_C: tsc = gm->xsc[p7P_E][p7P_MOVE]; break;
- case p7T_J: tsc = gm->xsc[p7P_E][p7P_LOOP]; break;
- default: ESL_XEXCEPTION(eslEINVAL, "bad transition %s->%s", p7_hmm_DecodeStatetype(st1), p7_hmm_DecodeStatetype(st2));
- }
- break;
-
- case p7T_J:
- switch (st2) {
- case p7T_B: tsc = gm->xsc[p7P_J][p7P_MOVE]; break;
- case p7T_J: tsc = gm->xsc[p7P_J][p7P_LOOP]; break;
- default: ESL_XEXCEPTION(eslEINVAL, "bad transition %s->%s", p7_hmm_DecodeStatetype(st1), p7_hmm_DecodeStatetype(st2));
- }
- break;
-
- case p7T_C:
- switch (st2) {
- case p7T_T: tsc = gm->xsc[p7P_C][p7P_MOVE]; break;
- case p7T_C: tsc = gm->xsc[p7P_C][p7P_LOOP]; break;
- default: ESL_XEXCEPTION(eslEINVAL, "bad transition %s->%s", p7_hmm_DecodeStatetype(st1), p7_hmm_DecodeStatetype(st2));
- }
- break;
-
- default: ESL_XEXCEPTION(eslEINVAL, "bad state type %d in traceback", st1);
- }
-
- *ret_tsc = tsc;
- return eslOK;
-
-ERROR:
- *ret_tsc = -eslINFINITY;
- return status;
-}
-
-
-/*****************************************************************
-* 3. Debugging and development code.
-*****************************************************************/
-
-/* Function: p7_profile_Validate()
-* Incept: SRE, Tue Jan 23 13:58:04 2007 [Janelia]
-*
-* Purpose: Validates the internals of the generic profile structure
-* <gm>.
-*
-* TODO: currently this function is incomplete, and only
-* validates the entry distribution.
-*
-* Returns: <eslOK> if <gm> internals look fine. Returns <eslFAIL>
-* if something is wrong, and leaves an error message in
-* <errbuf> if caller passed it non-<NULL>.
-*/
-int
-p7_profile_Validate(const P7_PROFILE *gm, char *errbuf, float tol)
-{
- int status;
- int k;
- double *pstart = NULL;
-
- ESL_ALLOC_WITH_TYPE(pstart, double*, sizeof(double) * (gm->M+1));
- pstart[0] = 0.0;
-
- /* Validate the entry distribution.
- * In a glocal model, this is an explicit probability distribution,
- * corresponding to left wing retraction.
- * In a local model, this is an implicit probability distribution,
- * corresponding to the implicit local alignment model, and we have
- * to calculate the M(M+1)/2 fragment probabilities accordingly.
- */
- if (p7_profile_IsLocal(gm))
- { /* the code block below is also in emit.c:sample_endpoints */
- for (k = 1; k <= gm->M; k++)
- pstart[k] = exp(p7P_TSC(gm, k-1, p7P_BM)) * (gm->M - k + 1); /* multiply p_ij by the number of exits j */
- }
- else
- {
- for (k = 1; k <= gm->M; k++)
- pstart[k] = exp(p7P_TSC(gm, k-1, p7P_BM));
- }
-
- if (esl_vec_DValidate(pstart, gm->M+1, tol, NULL) != eslOK) ESL_XFAIL(eslFAIL, errbuf, "profile entry distribution is not normalized properly");
- free(pstart);
- return eslOK;
-
-ERROR:
- if (pstart != NULL) free(pstart);
- return eslFAIL;
-}
-
-/* Function: p7_profile_Compare()
-* Synopsis: Compare two profiles for equality.
-* Incept: SRE, Thu Jun 21 17:57:56 2007 [Janelia]
-*
-* Purpose: Compare two profiles <gm1> and <gm2> to each other.
-* Return <eslOK> if they're identical, and <eslFAIL> if
-* they differ. Floating-point probabilities are
-* compared for equality within a fractional tolerance
-* <tol>. Only compares the scores, not any annotation
-* on the profiles.
-*/
-int
-p7_profile_Compare(P7_PROFILE *gm1, P7_PROFILE *gm2, float tol)
-{
- int x;
-
- if (gm1->mode != gm2->mode) return eslFAIL;
- if (gm1->M != gm2->M) return eslFAIL;
-
- if (esl_vec_FCompare(gm1->tsc, gm2->tsc, gm1->M*p7P_NTRANS, tol) != eslOK) return eslFAIL;
- for (x = 0; x < gm1->abc->Kp; x++)
- if (esl_vec_FCompare(gm1->rsc[x], gm2->rsc[x], (gm1->M+1)*p7P_NR, tol) != eslOK) return eslFAIL;
-
- for (x = 0; x < p7P_NXSTATES; x++)
- if (esl_vec_FCompare(gm1->xsc[x], gm2->xsc[x], p7P_NXTRANS, tol) != eslOK) return eslFAIL;
-
- return eslOK;
-}
-
-//int p7_profile_Dump( P7_PROFILE* p, FILE* out ) {
-// int i = 0;
-// int j = 0;
-// fprintf( out, "P7_PROFILE_DUMP\n" );
-// fprintf( out, "mode %d\n", p->mode );
-// fprintf( out, "L %d\n", p->L );
-// fprintf( out, "allocM %d\n", p->allocM );
-// fprintf( out, "M %d\n", p->M );
-// fprintf( out, "nj %f\n", p->nj );
-// fprintf( out, "name %s\n", p->name );
-// fprintf( out, "acc %s\n", p->acc );
-// fprintf( out, "desc %s\n", p->desc );
-// fprintf( out, "rf %s\n", p->rf );
-// fprintf( out, "cs %s\n", p->cs );
-// fprintf( out, "consensus %s\n", p->consensus );
-// fprintf( out, "evparam: " );
-//
-// for( i = 0; i < p7_NEVPARAM; ++i ) {
-// fprintf( out, " %f ", p->evparam[i] );
-// }
-// fprintf( out, "\ncutoff: " );
-// for( i = 0; i < p7_NCUTOFFS; ++i ) {
-// fprintf( out, " %f ", p->cutoff[i] );
-// }
-// fprintf( out, "\ncompo: " );
-// for( i = 0; i < p7_MAXABET; ++i ) {
-// fprintf( out, " %f ", p->compo[i] );
-// }
-// fprintf( out, "\n" );
-// fprintf( out, "abc %d\n", p->abc->type );
-//
-// fprintf( out, "tsc: " );
-// for( i = 0; i < p->M; ++i ) {
-// for( j = 0; j < p7P_NTRANS; ++j ) {
-// fprintf( out, " %f ", p7P_TSC( p, i, j ) );
-// }
-// }
-//
-// fprintf( out, "\nxsc: " );
-// for( i = 0; i < p7P_NXSTATES; ++i ) {
-// for( j = 0; j < p7P_NXTRANS; ++j ) {
-// fprintf( out, " %f ", p->xsc[i][j] );
-// }
-// }
-//
-// fprintf( out, "\n" );
-// // without rsc
-// return 0;
-//}
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_spensemble.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/p7_spensemble.cpp
deleted file mode 100644
index b5be057..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_spensemble.cpp
+++ /dev/null
@@ -1,459 +0,0 @@
-/* Defining domain number and coordinates in a significant hit by
-* posterior sampling and clustering.
-*
-* SRE, Wed Jan 9 07:26:34 2008 [Janelia]
-* SVN $Id: p7_spensemble.c 2818 2009-06-03 12:31:02Z eddys $
-*/
-
-#include <math.h>
-
-#include <hmmer3/p7_config.h>
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_cluster.h>
-#include <hmmer3/easel/esl_vectorops.h>
-#include <hmmer3/hmmer.h>
-
-/* Function: p7_spensemble_Create()
-* Synopsis: Allocates a <P7_SPENSEMBLE>
-* Incept: SRE, Wed Jan 9 10:00:14 2008 [Janelia]
-*
-* Purpose: Create a new <P7_SPENSEMBL> with specified initial
-* allocation sizes: <init_n> for the number of sampled
-* segment pairs, <init_epc> for the range over
-* which one of a domain's (i,j,k,m) sampled endpoints
-* falls, and <init_sigc> for the number of significant
-* clusters (domains) that will be defined.
-*
-* The values of these initial allocations are only
-* relevant to tuning memory performance, because the
-* object is reallocated/grown as needed. You can make
-* guesses, and the better your guess, the fewer
-* reallocations will be needed; but everything will work
-* fine regardless of what these initial allocations are.
-*
-* A <P7_SPENSEMBLE> is designed to be reused for many
-* target sequences and/or models, to minimize alloc/free
-* calls.
-*
-* Args: init_n - initial allocation for # of sampled segment pairs
-* init_epc - initial allocation for maximum endpoint range
-* init_sigc - initial allocation for # of significant clusters, domains
-*
-* Returns: a pointer to the new <P7_SPENSEMBLE>.
-*
-* Throws: <NULL> on allocation failure.
-*/
-P7_SPENSEMBLE *
-p7_spensemble_Create(int init_n, int init_epc, int init_sigc)
-{
- P7_SPENSEMBLE *sp = NULL;
- int status;
-
- ESL_ALLOC_WITH_TYPE(sp, P7_SPENSEMBLE*, sizeof(P7_SPENSEMBLE));
- sp->sp = NULL;
- sp->workspace = NULL;
- sp->assignment = NULL;
- sp->epc = NULL;
- sp->sigc = NULL;
-
- sp->nalloc = init_n;
- sp->epc_alloc = init_epc;
- sp->nsigc_alloc = init_sigc;
-
- ESL_ALLOC_WITH_TYPE(sp->sp, struct p7_spcoord_s*, sizeof(struct p7_spcoord_s) * sp->nalloc);
- ESL_ALLOC_WITH_TYPE(sp->workspace, int*, sizeof(int) * sp->nalloc * 2); /* workspace is 2n */
- ESL_ALLOC_WITH_TYPE(sp->assignment, int*, sizeof(int) * sp->nalloc);
- ESL_ALLOC_WITH_TYPE(sp->epc, int*, sizeof(int) * sp->epc_alloc);
- ESL_ALLOC_WITH_TYPE(sp->sigc, struct p7_spcoord_s*, sizeof(struct p7_spcoord_s) * sp->nsigc_alloc);
- sp->nsamples = 0;
- sp->n = 0;
- sp->nc = 0;
- sp->nsigc = 0;
- return sp;
-
-ERROR:
- p7_spensemble_Destroy(sp);
- return NULL;
-}
-
-/* Function: p7_spensemble_Reuse()
-* Synopsis: Reinitializes a <P7_SPENSEMBLE>.
-* Incept: SRE, Wed Jan 9 10:26:36 2008 [Janelia]
-*
-* Purpose: Reinitialize <sp> so it can be used again to collect
-* and process a new segment pair ensemble, without
-* having to free and reallocate.
-*
-* Returns: <eslOK> on success.
-*/
-int
-p7_spensemble_Reuse(P7_SPENSEMBLE *sp)
-{
- sp->nsamples = 0;
- sp->n = 0;
- sp->nc = 0;
- sp->nsigc = 0;
- return eslOK;
-}
-
-/* Function: p7_spsensemble_Add()
-* Synopsis: Add a new segment pair to a growing ensemble.
-* Incept: SRE, Wed Jan 9 10:28:04 2008 [Janelia]
-*
-* Purpose: Adds a new segment pair to a growing ensemble <sp>.
-* The segment pair is defined by start/end positions
-* <i>,<j> on a target sequence (1..L), and start/end
-* positions <k>,<m> on a query model (1..M).
-*
-* You also provide the index <sampleidx> of which sampled
-* traceback this segment pair came from; each traceback
-* contains one or more segment pairs. These <sampleidx>
-* indices start at 0 and they must arrive sequentially:
-* that is, the caller must <Add()> all the segment pairs
-* from traceback sample 0, then <Add()> all the segment
-* pairs from traceback sample 1, and so on.
-*
-* The reason to enforce sequential addition has to do with
-* the internals of the ensemble clustering algorithm;
-* specifically with how it calculates the posterior
-* probability of a cluster in the ensemble. You can't
-* calculate the posterior probability of a cluster simply
-* by dividing the number of segment pairs in a cluster by
-* the total number of traces, because you can get
-* "probabilities" of greater than one: sometimes more than
-* one pair from the same trace get clustered together
-* (because one domain got split into two or more segment
-* pairs). Rather, what it does is to count the total
-* number of traces that have one or more segments in the
-* cluster, divided by the total number of traces. An
-* efficient way to implement this is, when counting
-* segments that belong to a cluster, only increment the
-* numerator if the segment has a different traceback index
-* than the last segment we counted in this cluster. (We'd
-* rather not have to keep track of a table of all the
-* traceback indices we've seen so far.)
-*
-* Args: sp - ensemble to add this segment pair to
-* sampleidx - index of traceback that this seg pair came from (0..nsamples-1)
-* i,j - start,end position on target sequence (1..L)
-* k,m - start,end position on query model (1..M)
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEINVAL> if the <sampleidx> is out of order.
-* <eslEMEM> if a reallocation fails.
-*/
-int
-p7_spensemble_Add(P7_SPENSEMBLE *sp, int sampleidx, int i, int j, int k, int m)
-{
- int status;
-
- if (sampleidx > sp->nsamples) ESL_EXCEPTION(eslEINVAL, "seg pair's <sampleidx> is out of order");
- else if (sampleidx == sp->nsamples) sp->nsamples++;
-
- if (sp->n >= sp->nalloc) {
- void *p;
- ESL_RALLOC_WITH_TYPE(sp->sp, struct p7_spcoord_s*, p, sizeof(struct p7_spcoord_s) * sp->nalloc * 2);
- ESL_RALLOC_WITH_TYPE(sp->workspace, int*, p, sizeof(int) * sp->nalloc * 4); /* remember, workspace is 2n */
- ESL_RALLOC_WITH_TYPE(sp->assignment, int*, p, sizeof(int) * sp->nalloc * 2);
- sp->nalloc *= 2;
- }
-
- sp->sp[sp->n].idx = sampleidx;
- sp->sp[sp->n].i = i;
- sp->sp[sp->n].j = j;
- sp->sp[sp->n].k = k;
- sp->sp[sp->n].m = m;
- sp->n++;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-/* struct p7_linkparam_s:
-* used just within this .c, as part of setting up the clustering problem in
-* the form that Easel's general SLC algorithm will take it.
-*/
-struct p7_linkparam_s {
- float min_overlap; /* 0.8 means >= 80% overlap of (smaller/larger) segment is required, both in seq and hmm */
- int of_smaller; /* TRUE means overlap fraction is w.r.t. smaller segment; FALSE means w.r.t. larger segment */
- int max_diagdiff; /* 4 means either start or endpoints of two segments must be within <= 4 diagonals of each other */
- float min_posterior; /* 0.25 means a cluster must occur w/ >= 25% posterior probability in the sample to be "significant" */
- float min_endpointp; /* 0.02 means choose widest endpoint with post. prob. of at least 2% */
-};
-
-
-/* link_spsamples():
-*
-* Defines the rule used for single linkage clustering of sampled
-* domain coordinates. (API is dictated by Easel's general single
-* linkage clustering routine.)
-*/
-static int
-link_spsamples(const void *v1, const void *v2, const void *prm, int *ret_link)
-{
- struct p7_spcoord_s *h1 = (struct p7_spcoord_s *) v1;
- struct p7_spcoord_s *h2 = (struct p7_spcoord_s *) v2;
- struct p7_linkparam_s *param = (struct p7_linkparam_s *) prm;
- int nov, n;
- int d1, d2;
-
- /* seq overlap test */
- nov = ESL_MIN(h1->j, h2->j) - ESL_MAX(h1->i, h2->i) + 1; /* overlap */
- n = (param->of_smaller ? ESL_MIN(h1->j - h1->i + 1, h2->j - h2->i + 1) : /* min length of the two hits */
- ESL_MAX(h1->j - h1->i + 1, h2->j - h2->i + 1)); /* max length of the two hits */
- if ((float) nov / (float) n < param->min_overlap) { *ret_link = FALSE; return eslOK; }
-
- /* hmm overlap test */
- nov = ESL_MIN(h1->m, h2->m) - ESL_MAX(h1->k, h2->k);
- n = (param->of_smaller ? ESL_MIN(h1->m - h1->k + 1, h2->m - h2->k + 1) :
- ESL_MAX(h1->m - h1->k + 1, h2->m - h2->k + 1));
- if ((float) nov / (float) n < param->min_overlap) { *ret_link = FALSE; return eslOK; }
-
- /* nearby diagonal test */
- d1 = (h1->i - h1->k); d2 = (h2->i - h2->k); if (abs(d1-d2) <= param->max_diagdiff) { *ret_link = TRUE; return eslOK; }
- d1 = (h1->j - h1->m); d2 = (h2->j - h2->m); if (abs(d1-d2) <= param->max_diagdiff) { *ret_link = TRUE; return eslOK; }
-
- *ret_link = FALSE;
- return eslOK;
-}
-
-/* cluster_orderer()
-* is the routine that gets passed to qsort() to sort
-* the significant clusters by order of occurrence on
-* the target sequence
-*/
-static int
-cluster_orderer(const void *v1, const void *v2)
-{
- struct p7_spcoord_s *h1 = (struct p7_spcoord_s *) v1;
- struct p7_spcoord_s *h2 = (struct p7_spcoord_s *) v2;
-
- if (h1->i < h2->i) return -1;
- else if (h1->i > h2->i) return 1;
- else return 0;
-}
-
-/* Function: p7_spensemble_Cluster()
-* Synopsis: Cluster a seg pair ensemble and define domains.
-* Incept: SRE, Wed Jan 9 11:04:07 2008 [Janelia]
-*
-* Purpose: Given a collected segment pair ensemble <sp>, cluster it;
-* identify significant clusters with high posterior probability;
-* and define consensus endpoints for each significant cluster.
-*
-* Clustering is single-linkage. The linkage rule is
-* controlled by the <min_overlap>, <of_smaller>, and
-* <max_diagdiff> parameters. To be linked, two segments
-* must overlap by a fraction $\geq$ <min_overlap>,
-* relative to either the smaller or larger of the two
-* segments (<of_smaller = TRUE> or <FALSE>), in both their
-* sequence and model coords, and either the start or end of both
-* segments must lie within $\leq$ <max_diagdiff> diagonals
-* of each other.
-*
-* The threshold for cluster "significance" is controlled
-* by the <min_posterior> parameter. Clusters with
-* posterior probability $\geq$ this threshold are called
-* significant.
-*
-* Consensus endpoint definition within a cluster is
-* controlled by the <min_endpointp> parameter. The widest
-* endpoint that has a posterior probability of $\geq
-* min_endpointp> is chosen; this is done independently for
-* each coordinate (i,j,k,m).
-*
-* A reasonable (and tested) parameterization is
-* <min_overlap = 0.8>, <of_smaller = TRUE>, <max_diagdiff
-* = 4>, <min_posterior = 0.25>, <min_endpointp = 0.02>.
-*
-* Args: sp - segment pair ensemble to cluster
-* min_overlap - linkage requires fractional overlap >= this, in both seq and hmm segments
-* of_smaller - overlap fraction denominators uses either the smaller (if TRUE) or larger (if FALSE) segment
-* max_diagdiff - linkage requires that start, end points of both seg pairs are <= this
-* min_posterior - clusters with posterior prob >= this are defined as significant
-* min_endpointp - widest endpoint with post prob >= this is defined as consensus endpoint coord
-*
-* Returns: the number of significant clusters in <*ret_nclusters>.
-* The caller can then obtain consensus endpoints for each cluster
-* by making a series of <p7_spensemble_GetClusterCoords()> calls.
-*
-*/
-int
-p7_spensemble_Cluster(P7_SPENSEMBLE *sp,
- float min_overlap, int of_smaller, int max_diagdiff, float min_posterior, float min_endpointp,
- int *ret_nclusters)
-{
- struct p7_linkparam_s param;
- int status;
- int c;
- int h;
- int idx_of_last;
- int *ninc = NULL;
- int cwindow_width;
- int epc_threshold;
- int imin, jmin, kmin, mmin;
- int imax, jmax, kmax, mmax;
- int best_i, best_j, best_k, best_m;
-
- /* set up a single linkage clustering problem for Easel's general routine */
- param.min_overlap = min_overlap;
- param.of_smaller = of_smaller;
- param.max_diagdiff = max_diagdiff;
- param.min_posterior = min_posterior;
- param.min_endpointp = min_endpointp;
- if ((status = esl_cluster_SingleLinkage(sp->sp, sp->n, sizeof(struct p7_spcoord_s), link_spsamples, (void *) ¶m,
- sp->workspace, sp->assignment, &(sp->nc))) != eslOK) goto ERROR;
-
- ESL_ALLOC_WITH_TYPE(ninc, int*, sizeof(int) * sp->nc);
-
- /* Look at each cluster in turn; most will be too small to worry about. */
- for (c = 0; c < sp->nc; c++)
- {
- /* Calculate posterior probability of each cluster.
- * The extra wrinkle here is that this probability is w.r.t the number of sampled traces;
- * but the clusters might contain more than one seg pair from a given trace.
- * That's what the idx_of_last logic is doing, avoiding double-counting.
- */
- idx_of_last = -1;
- for (ninc[c] = 0, h = 0; h < sp->n; h++) {
- if (sp->assignment[h] == c) {
- if (sp->sp[h].idx != idx_of_last) ninc[c]++;
- idx_of_last = sp->sp[h].idx;
- }
- }
- /* Reject low probability clusters: */
- if ((float) ninc[c] / (float) sp->nsamples < min_posterior) continue;
-
- /* Find the maximum extent of all seg pairs in this cluster in i,j k,m */
- for (imin = 0, h = 0; h < sp->n; h++)
- if (sp->assignment[h] == c)
- {
- if (imin == 0) {
- imin = imax = sp->sp[h].i;
- jmin = jmax = sp->sp[h].j;
- kmin = kmax = sp->sp[h].k;
- mmin = mmax = sp->sp[h].m;
- } else {
- imin = ESL_MIN(imin, sp->sp[h].i); imax = ESL_MAX(imax, sp->sp[h].i);
- jmin = ESL_MIN(jmin, sp->sp[h].j); jmax = ESL_MAX(jmax, sp->sp[h].j);
- kmin = ESL_MIN(kmin, sp->sp[h].k); kmax = ESL_MAX(kmax, sp->sp[h].k);
- mmin = ESL_MIN(mmin, sp->sp[h].m); mmax = ESL_MAX(mmax, sp->sp[h].m);
- }
- }
-
- /* Set up a window in which we can examine the end point distributions for i,j,k,m in turn, independently */
- cwindow_width = ESL_MAX(ESL_MAX(imax-imin+1, jmax-jmin+1),
- ESL_MAX(kmax-kmin+1, mmax-mmin+1));
- if (cwindow_width > sp->epc_alloc) {
- void *p;
- ESL_RALLOC_WITH_TYPE(sp->epc, int*, p, sizeof(int) * cwindow_width);
- sp->epc_alloc = cwindow_width;
- }
-
- epc_threshold = (int) ceilf((float) ninc[c] * min_endpointp); /* round up. freq of >= epc_threshold means we're >= min_p */
-
- /* Identify the leftmost i that has enough endpoints. */
- esl_vec_ISet(sp->epc, imax-imin+1, 0);
- for (h = 0; h < sp->n; h++) if (sp->assignment[h] == c) sp->epc[sp->sp[h].i-imin]++;
- for (best_i = imin; best_i <= imax; best_i++) if (sp->epc[best_i-imin] >= epc_threshold) break;
- if (best_i > imax) best_i = imin + esl_vec_IArgMax(sp->epc, imax-imin+1);
-
- /* Identify the leftmost k that has enough endpoints */
- esl_vec_ISet(sp->epc, kmax-kmin+1, 0);
- for (h = 0; h < sp->n; h++) if (sp->assignment[h] == c) sp->epc[sp->sp[h].k-kmin]++;
- for (best_k = kmin; best_k <= kmax; best_k++) if (sp->epc[best_k-kmin] >= epc_threshold) break;
- if (best_k > kmax) best_k = kmin + esl_vec_IArgMax(sp->epc, kmax-kmin+1);
-
- /* Identify the rightmost j that has enough endpoints. */
- esl_vec_ISet(sp->epc, jmax-jmin+1, 0);
- for (h = 0; h < sp->n; h++) if (sp->assignment[h] == c) sp->epc[sp->sp[h].j-jmin]++;
- for (best_j = jmax; best_j >= jmin; best_j--) if (sp->epc[best_j-jmin] >= epc_threshold) break;
- if (best_j < jmin) best_j = jmin + esl_vec_IArgMax(sp->epc, jmax-jmin+1);
-
- /* Identify the rightmost m that has enough endpoints. */
- esl_vec_ISet(sp->epc, mmax-mmin+1, 0);
- for (h = 0; h < sp->n; h++) if (sp->assignment[h] == c) sp->epc[sp->sp[h].m-mmin]++;
- for (best_m = mmax; best_m >= mmin; best_m--) if (sp->epc[best_m-mmin] >= epc_threshold) break;
- if (best_m < mmin) best_m = mmin + esl_vec_IArgMax(sp->epc, mmax-mmin+1);
-
- /* If there's no well-defined domain (say, a long stretch of biased composition),
- the coords above might come out inconsistent; in this case, just reject the domain.
- */
- if (best_i > best_j || best_k > best_m) continue;
-
- if (sp->nsigc >= sp->nsigc_alloc) {
- void *p;
- ESL_RALLOC_WITH_TYPE(sp->sigc, struct p7_spcoord_s*, p, sizeof(struct p7_spcoord_s) * sp->nsigc_alloc * 2);
- sp->nsigc_alloc *= 2;
- }
-
- sp->sigc[sp->nsigc].i = best_i;
- sp->sigc[sp->nsigc].j = best_j;
- sp->sigc[sp->nsigc].k = best_k;
- sp->sigc[sp->nsigc].m = best_m;
- sp->sigc[sp->nsigc].idx = c;
- sp->sigc[sp->nsigc].prob = (float) ninc[c] / (float) sp->nsamples;
- sp->nsigc++;
- }
-
- /* Now we need to make sure those domains are ordered by start point,
- * because later we're going to calculate overlaps by i_cur - j_prv
- */
- qsort((void *) sp->sigc, sp->nsigc, sizeof(struct p7_spcoord_s), cluster_orderer);
-
- free(ninc);
- *ret_nclusters = sp->nsigc;
- return eslOK;
-
-ERROR:
- if (ninc != NULL) free(ninc);
- *ret_nclusters = 0;
- return status;
-}
-
-/* Function: p7_spensemble_GetClusterCoords()
-* Synopsis: Retrieve consensus coords of one significant segment pair cluster.
-* Incept: SRE, Wed Jan 9 11:39:27 2008 [Janelia]
-*
-* Purpose: Retrieve the consensus coords of significant segment pair cluster <which>
-* from the ensemble <sp>, which has already been clustered with
-* <p7_spensemble_Cluster()>.
-*
-* Returns: <eslOK> on success, and the consensus coords are in <*opt_i>, <*opt_j>,
-* <*opt_k>, and <*opt_m>; the (sampled) posterior probability of the
-* cluster is in <*opt_p>. All of these returned values are optional;
-* the caller can pass a <NULL> for any value it's not interested in
-* retrieving.
-*/
-int
-p7_spensemble_GetClusterCoords(P7_SPENSEMBLE *sp, int which, int *opt_i, int *opt_j, int *opt_k, int *opt_m, float *opt_p)
-{
- if (opt_i != NULL) *opt_i = sp->sigc[which].i;
- if (opt_j != NULL) *opt_j = sp->sigc[which].j;
- if (opt_k != NULL) *opt_k = sp->sigc[which].k;
- if (opt_m != NULL) *opt_m = sp->sigc[which].m;
- if (opt_p != NULL) *opt_p = sp->sigc[which].prob;
- return eslOK;
-}
-
-
-/* Function: p7_spensemble_Destroy()
-* Synopsis: Deallocate a <P7_SPENSEMBLE>
-* Incept: SRE, Wed Jan 9 11:42:01 2008 [Janelia]
-*
-* Purpose: Destroys a <P7_SPENSEMBLE>.
-*/
-void
-p7_spensemble_Destroy(P7_SPENSEMBLE *sp)
-{
- if (sp == NULL) return;
- if (sp->sp != NULL) free(sp->sp);
- if (sp->workspace != NULL) free(sp->workspace);
- if (sp->assignment != NULL) free(sp->assignment);
- if (sp->epc != NULL) free(sp->epc);
- if (sp->sigc != NULL) free(sp->sigc);
- free(sp);
-}
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_tophits.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/p7_tophits.cpp
deleted file mode 100644
index 1f4c0ff..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_tophits.cpp
+++ /dev/null
@@ -1,787 +0,0 @@
-/* P7_TOPHITS: implementation of ranked list of top-scoring hits
-*
-* Contents:
-* 1. The P7_TOPHITS object.
-* 4. Copyright and license information.
-*
-* SRE, Fri Dec 28 07:14:54 2007 [Janelia] [Enigma, MCMXC a.D.]
- * SVN $Id: p7_tophits.c 3047 2009-11-13 12:31:16Z eddys $
-*/
-
-#include <hmmer3/p7_config.h>
-
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include <math.h>
-
-#include <hmmer3/easel/easel.h>
-
-#include <hmmer3/hmmer.h>
-
-/* Function: p7_tophits_Create()
-* Synopsis: Allocate a hit list.
-* Incept: SRE, Fri Dec 28 07:17:51 2007 [Janelia]
-*
-* Purpose: Allocates a new <P7_TOPHITS> hit list and return a pointer
-* to it.
-*
-* Throws: <NULL> on allocation failure.
-*/
-P7_TOPHITS *
-p7_tophits_Create(void)
-{
- P7_TOPHITS *h = NULL;
- int default_nalloc = 256;
- int status;
-
- ESL_ALLOC_WITH_TYPE(h, P7_TOPHITS*, sizeof(P7_TOPHITS));
- h->hit = NULL;
- h->unsrt = NULL;
-
- ESL_ALLOC_WITH_TYPE(h->hit, P7_HIT **, sizeof(P7_HIT *) * default_nalloc);
- ESL_ALLOC_WITH_TYPE(h->unsrt, P7_HIT*, sizeof(P7_HIT) * default_nalloc);
- h->Nalloc = default_nalloc;
- h->N = 0;
- h->nreported = 0;
- h->nincluded = 0;
- h->is_sorted = TRUE; /* but only because there's 0 hits */
- h->hit[0] = h->unsrt; /* if you're going to call it "sorted" when it contains just one hit, you need this */
- return h;
-
-ERROR:
- p7_tophits_Destroy(h);
- return NULL;
-}
-
-
-/* Function: p7_tophits_Grow()
-* Synopsis: Reallocates a larger hit list, if needed.
-* Incept: SRE, Fri Dec 28 07:37:27 2007 [Janelia]
-*
-* Purpose: If list <h> cannot hold another hit, doubles
-* the internal allocation.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure; in this case,
-* the data in <h> are unchanged.
-*/
-int
-p7_tophits_Grow(P7_TOPHITS *h)
-{
- void *p;
- P7_HIT *ori = h->unsrt;
- int Nalloc = h->Nalloc * 2; /* grow by doubling */
- int i;
- int status;
-
- if (h->N < h->Nalloc) return eslOK; /* we have enough room for another hit */
-
- ESL_RALLOC_WITH_TYPE(h->hit, P7_HIT **, p, sizeof(P7_HIT *) * Nalloc);
- ESL_RALLOC_WITH_TYPE(h->unsrt, P7_HIT*, p, sizeof(P7_HIT) * Nalloc);
-
- /* If we grow a sorted list, we have to translate the pointers
- * in h->hit, because h->unsrt might have just moved in memory.
- */
- if (h->is_sorted)
- {
- for (i = 0; i < h->N; i++)
- h->hit[i] = h->unsrt + (h->hit[i] - ori);
- }
-
- h->Nalloc = Nalloc;
- return eslOK;
-
-ERROR:
- return eslEMEM;
-}
-
-
-/* Function: p7_tophits_CreateNextHit()
-* Synopsis: Get pointer to new structure for recording a hit.
-* Incept: SRE, Tue Mar 11 08:44:53 2008 [Janelia]
-*
-* Purpose: Ask the top hits object <h> to do any necessary
-* internal allocation and bookkeeping to add a new,
-* empty hit to its list; return a pointer to
-* this new <P7_HIT> structure for data to be filled
-* in by the caller.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation error.
-*/
-int
-p7_tophits_CreateNextHit(P7_TOPHITS *h, P7_HIT **ret_hit)
-{
- P7_HIT *hit = NULL;
- int status;
-
- if ((status = p7_tophits_Grow(h)) != eslOK) goto ERROR;
-
- hit = &(h->unsrt[h->N]);
- h->N++;
- if (h->N >= 2) h->is_sorted = FALSE;
-
- hit->name = NULL;
- hit->acc = NULL;
- hit->desc = NULL;
- hit->sortkey = 0.0;
-
- hit->score = 0.0;
- hit->pre_score = 0.0;
- hit->sum_score = 0.0;
-
- hit->pvalue = 0.0;
- hit->pre_pvalue = 0.0;
- hit->sum_pvalue = 0.0;
-
- hit->ndom = 0;
- hit->nexpected = 0.0;
- hit->nregions = 0;
- hit->nclustered = 0;
- hit->noverlaps = 0;
- hit->nenvelopes = 0;
-
- hit->flags = p7_HITFLAGS_DEFAULT;
- hit->nreported = 0;
- hit->nincluded = 0;
- hit->best_domain = -1;
- hit->dcl = NULL;
-
- *ret_hit = hit;
- return eslOK;
-
-ERROR:
- *ret_hit = NULL;
- return status;
-}
-
-
-
-/* Function: p7_tophits_Add()
-* Synopsis: Add a hit to the top hits list.
-* Incept: SRE, Fri Dec 28 08:26:11 2007 [Janelia]
-*
-* Purpose: Adds a hit to the top hits list <h>.
-*
-* <name>, <acc>, and <desc> are copied, so caller may free
-* them if it likes.
-*
-* Only the pointer <ali> is kept. Caller turns over memory
-* management of <ali> to the top hits object; <ali> will
-* be free'd when the top hits structure is free'd.
-*
-* Args: h - active top hit list
-* name - name of target
-* acc - accession of target (may be NULL)
-* desc - description of target (may be NULL)
-* sortkey - value to sort by: bigger is better
-* score - score of this hit
-* pvalue - P-value of this hit
-* mothersc - score of parent whole sequence
-* motherp - P-value of parent whole sequence
-* sqfrom - 1..L pos in target seq of start
-* sqto - 1..L pos; sqfrom > sqto if rev comp
-* sqlen - length of sequence, L
-* hmmfrom - 0..M+1 pos in HMM of start
-* hmmto - 0..M+1 pos in HMM of end
-* hmmlen - length of HMM, M
-* domidx - number of this domain
-* ndom - total # of domains in sequence
-* ali - optional printable alignment info
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> if reallocation failed.
-*
-* Note: Is this actually used anywhere? (SRE, 10 Dec 08)
-* I think it's not up to date.
- *
- * That's right. This function is completely obsolete.
- * It is used in benchmark and test code, so you can't
- * delete it yet; benchmarks and test code should be
- * revised (SRE, 26 Oct 09)
-*/
-int
-p7_tophits_Add(P7_TOPHITS *h,
- char *name, char *acc, char *desc,
- double sortkey,
- float score, double pvalue,
- float mothersc, double motherp,
- int sqfrom, int sqto, int sqlen,
- int hmmfrom, int hmmto, int hmmlen,
- int domidx, int ndom,
- P7_ALIDISPLAY *ali)
-{
- int status;
-
- if ((status = p7_tophits_Grow(h)) != eslOK) return status;
- if ((status = esl_strdup(name, -1, &(h->unsrt[h->N].name))) != eslOK) return status;
- if ((status = esl_strdup(acc, -1, &(h->unsrt[h->N].acc))) != eslOK) return status;
- if ((status = esl_strdup(desc, -1, &(h->unsrt[h->N].desc))) != eslOK) return status;
- h->unsrt[h->N].sortkey = sortkey;
- h->unsrt[h->N].score = score;
- h->unsrt[h->N].pre_score = 0.0;
- h->unsrt[h->N].sum_score = 0.0;
- h->unsrt[h->N].pvalue = pvalue;
- h->unsrt[h->N].pre_pvalue = 0.0;
- h->unsrt[h->N].sum_pvalue = 0.0;
- h->unsrt[h->N].nexpected = 0;
- h->unsrt[h->N].nregions = 0;
- h->unsrt[h->N].nclustered = 0;
- h->unsrt[h->N].noverlaps = 0;
- h->unsrt[h->N].nenvelopes = 0;
- h->unsrt[h->N].ndom = ndom;
- h->unsrt[h->N].flags = 0;
- h->unsrt[h->N].nreported = 0;
- h->unsrt[h->N].nincluded = 0;
- h->unsrt[h->N].best_domain= 0;
- h->unsrt[h->N].dcl = NULL;
- h->N++;
-
- if (h->N >= 2) h->is_sorted = FALSE;
- return eslOK;
-}
-
-/* hit_sorter(): qsort's pawn, below */
-static int
-hit_sorter(const void *vh1, const void *vh2)
-{
- P7_HIT *h1 = *((P7_HIT **) vh1); /* don't ask. don't change. Don't Panic. */
- P7_HIT *h2 = *((P7_HIT **) vh2);
-
- if (h1->sortkey < h2->sortkey) return 1;
- else if (h1->sortkey > h2->sortkey) return -1;
- else return strcmp(h1->name, h2->name);
-}
-
-/* Function: p7_tophits_Sort()
-* Synopsis: Sorts a hit list.
-* Incept: SRE, Fri Dec 28 07:51:56 2007 [Janelia]
-*
-* Purpose: Sorts a top hit list. After this call,
-* <h->hit[i]> points to the i'th ranked
-* <P7_HIT> for all <h->N> hits.
-*
-* Returns: <eslOK> on success.
-*/
-int
-p7_tophits_Sort(P7_TOPHITS *h)
-{
- int i;
-
- if (h->is_sorted) return eslOK;
- for (i = 0; i < h->N; i++) h->hit[i] = h->unsrt + i;
- if (h->N > 1) qsort(h->hit, h->N, sizeof(P7_HIT *), hit_sorter);
- h->is_sorted = TRUE;
- return eslOK;
-}
-
-/* Function: p7_tophits_Merge()
-* Synopsis: Merge two top hits lists.
-* Incept: SRE, Fri Dec 28 09:32:12 2007 [Janelia]
-*
-* Purpose: Merge <h2> into <h1>. Upon return, <h1>
-* contains the sorted, merged list. <h2>
-* is effectively destroyed; caller should
-* not access it further, and may as well free
-* it immediately.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure, and
-* both <h1> and <h2> remain valid.
-*/
-int
-p7_tophits_Merge(P7_TOPHITS *h1, P7_TOPHITS *h2)
-{
- void *p;
- P7_HIT **new_hit = NULL;
- P7_HIT *ori1 = h1->unsrt; /* original base of h1's data */
- P7_HIT *new2;
- int i,j,k;
- int Nalloc = h1->Nalloc + h2->Nalloc;
- int status;
-
- /* Make sure the two lists are sorted */
- if ((status = p7_tophits_Sort(h1)) != eslOK) goto ERROR;
- if ((status = p7_tophits_Sort(h2)) != eslOK) goto ERROR;
-
- /* Attempt our allocations, so we fail early if we fail.
- * Reallocating h1->unsrt screws up h1->hit, so fix it.
- */
- ESL_RALLOC_WITH_TYPE(h1->unsrt, P7_HIT*, p, sizeof(P7_HIT) * Nalloc);
- ESL_ALLOC_WITH_TYPE(new_hit, P7_HIT **, sizeof(P7_HIT *) * Nalloc);
- for (i = 0; i < h1->N; i++)
- h1->hit[i] = h1->unsrt + (h1->hit[i] - ori1);
-
- /* Append h2's unsorted data array to h1. h2's data begin at <new2> */
- new2 = h1->unsrt + h1->N;
- memcpy(new2, h2->unsrt, sizeof(P7_HIT) * h2->N);
-
- /* Merge the sorted hit lists */
- for (i=0,j=0,k=0; i < h1->N && j < h2->N ; k++)
- new_hit[k] = (hit_sorter(&h1->hit[i], &h2->hit[j]) > 0) ? new2 + (h2->hit[j++] - h2->unsrt) : h1->hit[i++];
- while (i < h1->N) new_hit[k++] = h1->hit[i++];
- while (j < h2->N) new_hit[k++] = new2 + (h2->hit[j++] - h2->unsrt);
-
- /* h2 now turns over management of name, acc, desc memory to h1;
- * nullify its pointers, to prevent double free. */
- for (i = 0; i < h2->N; i++)
- {
- h2->unsrt[i].name = NULL;
- h2->unsrt[i].acc = NULL;
- h2->unsrt[i].desc = NULL;
- h2->unsrt[i].dcl = NULL;
- }
-
- /* Construct the new grown h1 */
- free(h1->hit);
- h1->hit = new_hit;
- h1->Nalloc = Nalloc;
- h1->N += h2->N;
- /* and is_sorted is TRUE, as a side effect of p7_tophits_Sort() above. */
- return eslOK;
-
-ERROR:
- if (new_hit != NULL) free(new_hit);
- return status;
-}
-
-
-/* Function: p7_tophits_GetMaxNameLength()
- * Synopsis: Returns maximum name length in hit list (targets).
-* Incept: SRE, Fri Dec 28 09:00:13 2007 [Janelia]
-*
-* Purpose: Returns the maximum name length of all the registered
-* hits, in chars. This is useful when deciding how to
-* format output.
-*
-* The maximum is taken over all registered hits. This
-* opens a possible side effect: caller might print only
-* the top hits, and the max name length in these top hits
-* may be different than the max length over all the hits.
-*
-* If there are no hits in <h>, or none of the
-* hits have names, returns 0.
-*/
-int
-p7_tophits_GetMaxNameLength(P7_TOPHITS *h)
-{
- int i, max, n;
- for (max = 0, i = 0; i < h->N; i++)
- if (h->unsrt[i].name != NULL) {
- n = strlen(h->unsrt[i].name);
- max = ESL_MAX(n, max);
- }
- return max;
-}
-
-/* Function: p7_tophits_GetMaxAccessionLength()
- * Synopsis: Returns maximum accession length in hit list (targets).
- * Incept: SRE, Tue Aug 25 09:18:33 2009 [Janelia]
- *
- * Purpose: Same as <p7_tophits_GetMaxNameLength()>, but for
- * accessions. If there are no hits in <h>, or none
- * of the hits have accessions, returns 0.
- */
-int
-p7_tophits_GetMaxAccessionLength(P7_TOPHITS *h)
-{
- int i, max, n;
- for (max = 0, i = 0; i < h->N; i++)
- if (h->unsrt[i].acc != NULL) {
- n = strlen(h->unsrt[i].acc);
- max = ESL_MAX(n, max);
- }
- return max;
-}
-
-/* Function: p7_tophits_GetMaxShownLength()
- * Synopsis: Returns max shown name/accession length in hit list.
- * Incept: SRE, Tue Aug 25 09:30:43 2009 [Janelia]
- *
- * Purpose: Same as <p7_tophits_GetMaxNameLength()>, but
- * for the case when --acc is on, where
- * we show accession if one is available, and
- * fall back to showing the name if it is not.
- * Returns the max length of whatever is being
- * shown as the reported "name".
- */
-int
-p7_tophits_GetMaxShownLength(P7_TOPHITS *h)
-{
- int i, max, n;
- for (max = 0, i = 0; i < h->N; i++)
- {
- if (h->unsrt[i].acc != NULL && h->unsrt[i].acc[0] != '\0')
- {
- n = strlen(h->unsrt[i].acc);
- max = ESL_MAX(n, max);
- }
- else if (h->unsrt[i].name != NULL)
- {
- n = strlen(h->unsrt[i].name);
- max = ESL_MAX(n, max);
- }
- }
- return max;
-}
-
-
-/* Function: p7_tophits_Reuse()
-* Synopsis: Reuse a hit list, freeing internals.
-* Incept: SRE, Fri Jun 6 15:39:05 2008 [Janelia]
-*
-* Purpose: Reuse the tophits list <h>; save as
-* many malloc/free cycles as possible,
-* as opposed to <Destroy()>'ing it and
-* <Create>'ing a new one.
-*/
-int
-p7_tophits_Reuse(P7_TOPHITS *h)
-{
- int i, j;
-
- if (h == NULL) return eslOK;
- if (h->unsrt != NULL)
- {
- for (i = 0; i < h->N; i++)
- {
- if (h->unsrt[i].name != NULL) free(h->unsrt[i].name);
- if (h->unsrt[i].acc != NULL) free(h->unsrt[i].acc);
- if (h->unsrt[i].desc != NULL) free(h->unsrt[i].desc);
- if (h->unsrt[i].dcl != NULL) {
- for (j = 0; j < h->unsrt[i].ndom; j++)
- if (h->unsrt[i].dcl[j].ad != NULL) p7_alidisplay_Destroy(h->unsrt[i].dcl[j].ad);
- free(h->unsrt[i].dcl);
- }
- }
- }
- h->N = 0;
- h->is_sorted = TRUE;
- h->hit[0] = h->unsrt;
- return eslOK;
-}
-
-/* Function: p7_tophits_Destroy()
-* Synopsis: Frees a hit list.
-* Incept: SRE, Fri Dec 28 07:33:21 2007 [Janelia]
-*/
-void
-p7_tophits_Destroy(P7_TOPHITS *h)
-{
- int i,j;
- if (h == NULL) return;
- if (h->hit != NULL) free(h->hit);
- if (h->unsrt != NULL)
- {
- for (i = 0; i < h->N; i++)
- {
- if (h->unsrt[i].name != NULL) free(h->unsrt[i].name);
- if (h->unsrt[i].acc != NULL) free(h->unsrt[i].acc);
- if (h->unsrt[i].desc != NULL) free(h->unsrt[i].desc);
- if (h->unsrt[i].dcl != NULL) {
- for (j = 0; j < h->unsrt[i].ndom; j++)
- if (h->unsrt[i].dcl[j].ad != NULL) p7_alidisplay_Destroy(h->unsrt[i].dcl[j].ad);
- free(h->unsrt[i].dcl);
- }
- }
- free(h->unsrt);
- }
- free(h);
- return;
-}
-/*---------------- end, P7_TOPHITS object -----------------------*/
-
-
-
-
-
-
-/*****************************************************************
-* 2. Standard (human-readable) output of pipeline results
-*****************************************************************/
-
-/* workaround_bug_h74():
-* Different envelopes, identical alignment
-*
-* Bug #h74, though extremely rare, arises from a limitation in H3's
-* implementation of Forward/Backward, as follows:
-*
-* 1. A multidomain region is analyzed by stochastic clustering
-* 2. Overlapping envelopes are found (w.r.t sequence coords), though
-* trace clusters are distinct if HMM endpoints are also considered.
-* 3. We have no facility for limiting Forward/Backward to a specified
-* range of profile coordinates, so each envelope is passed to
-* rescore_isolated_domain() and analyzed independently.
-* 4. Optimal accuracy alignment may identify exactly the same alignment
-* in the overlap region shared by the two envelopes.
-*
-* The disturbing result is two different envelopes that have
-* identical alignments and alignment endpoints.
-*
-* The correct fix is to define envelopes not only by sequence
-* endpoints but also by profile endpoints, passing them to
-* rescore_isolated_domain(), and limiting F/B calculations to this
-* pieces of the DP lattice. This requires a fair amount of work,
-* adding to the optimized API.
-*
-* The workaround is to detect when there are duplicate alignments,
-* and only display one. We show the one with the best bit score.
-*
-* If we ever implement envelope-limited versions of F/B, revisit this
-* fix.
-*
-* SRE, Tue Dec 22 16:27:04 2009
-* xref J5/130; notebook/2009/1222-hmmer-bug-h74
-*/
-static int
-workaround_bug_h74(P7_TOPHITS *th)
-{
- int h;
- int d1, d2;
- int dremoved;
-
- for (h = 0; h < th->N; h++)
- if (th->hit[h]->noverlaps)
- {
- for (d1 = 0; d1 < th->hit[h]->ndom; d1++)
- for (d2 = d1+1; d2 < th->hit[h]->ndom; d2++)
- if (th->hit[h]->dcl[d1].iali == th->hit[h]->dcl[d2].iali &&
- th->hit[h]->dcl[d1].jali == th->hit[h]->dcl[d2].jali)
- {
- dremoved = (th->hit[h]->dcl[d1].bitscore >= th->hit[h]->dcl[d2].bitscore) ? d2 : d1;
- if (th->hit[h]->dcl[dremoved].is_reported) { th->hit[h]->dcl[dremoved].is_reported = FALSE; th->hit[h]->nreported--; }
- if (th->hit[h]->dcl[dremoved].is_included) { th->hit[h]->dcl[dremoved].is_included = FALSE; th->hit[h]->nincluded--; }
- }
- }
- return eslOK;
-}
-
-
-/* Function: p7_tophits_Threshold()
-* Synopsis: Apply score and E-value thresholds to a hitlist before output.
-* Incept: SRE, Tue Dec 9 09:04:55 2008 [Janelia]
-*
-* Purpose: After a pipeline has completed, go through it and mark all
-* the targets and domains that are "significant" (satisfying
-* the reporting thresholds set for the pipeline).
-*
- * Also sets the final total number of reported and
- * included targets, the number of reported and included
- * targets in each target, and the size of the search space
- * for per-domain conditional E-value calculations,
-* <pli->domZ>. By default, <pli->domZ> is the number of
-* significant targets reported.
-*
- * If model-specific thresholds were used in the pipeline,
- * we cannot apply those thresholds now. They were already
- * applied in the pipeline. In this case all we're
- * responsible for here is counting them (setting
- * nreported, nincluded counters).
- *
-* Returns: <eslOK> on success.
-*/
-int
-p7_tophits_Threshold(P7_TOPHITS *th, P7_PIPELINE *pli)
-{
- int h, d; /* counters over sequence hits, domains in sequences */
-
- /* Flag reported, included targets (if we're using general thresholds) */
- if (! pli->use_bit_cutoffs)
- {
- for (h = 0; h < th->N; h++)
- {
- if (p7_pli_TargetReportable(pli, th->hit[h]->score, th->hit[h]->pvalue))
- {
- th->hit[h]->flags |= p7_IS_REPORTED;
- if (p7_pli_TargetIncludable(pli, th->hit[h]->score, th->hit[h]->pvalue))
- th->hit[h]->flags |= p7_IS_INCLUDED;
- }
- }
- }
-
- /* Count reported, included targets */
- th->nreported = 0;
- th->nincluded = 0;
- for (h = 0; h < th->N; h++)
- {
- if (th->hit[h]->flags & p7_IS_REPORTED) th->nreported++;
- if (th->hit[h]->flags & p7_IS_INCLUDED) th->nincluded++;
- }
-
- /* Now we can determined domZ, the effective search space in which additional domains are found */
- if (pli->domZ_setby == p7_ZSETBY_NTARGETS) pli->domZ = (double) th->nreported;
-
-
- /* Second pass is over domains, flagging reportable/includable ones.
- * Depends on knowing the domZ we just set.
- * Note how this enforces a hierarchical logic of
- * (sequence|domain) must be reported to be included, and
- * domain can only be (reported|included) if whole sequence is too.
- */
- if (! pli->use_bit_cutoffs)
- {
- for (h = 0; h < th->N; h++)
- {
- if (th->hit[h]->flags & p7_IS_REPORTED)
- {
- for (d = 0; d < th->hit[h]->ndom; d++)
- {
- if (p7_pli_DomainReportable(pli, th->hit[h]->dcl[d].bitscore, th->hit[h]->dcl[d].pvalue))
- th->hit[h]->dcl[d].is_reported = TRUE;
- if ((th->hit[h]->flags & p7_IS_INCLUDED) &&
- p7_pli_DomainIncludable(pli, th->hit[h]->dcl[d].bitscore, th->hit[h]->dcl[d].pvalue))
- th->hit[h]->dcl[d].is_included = TRUE;
- }
- }
- }
- }
-
- /* Count the reported, included domains */
- for (h = 0; h < th->N; h++)
- for (d = 0; d < th->hit[h]->ndom; d++)
- {
- if (th->hit[h]->dcl[d].is_reported) th->hit[h]->nreported++;
- if (th->hit[h]->dcl[d].is_included) th->hit[h]->nincluded++;
- }
-
- workaround_bug_h74(th); /* blech. This function is defined above; see commentary and crossreferences there. */
-
- return eslOK;
-}
-
-// ! here were p7_tophits_CompareRanking. we don't need it !
-
-// ! here were p7_tophits_Targets. we don't need it !
-// ! here were p7_tophits_Domains. we don't need it !
-
-
-
-
-
-/* Function: p7_tophits_Alignment()
-* Synopsis: Create a multiple alignment of all the included domains.
-* Incept: SRE, Wed Dec 10 11:04:40 2008 [Janelia]
-*
-* Purpose: Create a multiple alignment of all domains marked
-* "includable" in the top hits list <th>, and return it in
-* <*ret_msa>.
-*
-* Use of <optflags> is identical to <optflags> in <p7_MultipleAlignment()>.
-* Possible flags include <p7_DIGITIZE>, <p7_ALL_CONSENSUS_COLS>,
-* and <p7_TRIM>; they may be OR'ed together. Otherwise, pass
-* <p7_DEFAULT> to set no flags.
-*
-* Caller may optionally provide <inc_sqarr>, <inc_trarr>, and
-* <inc_n> to include additional sequences in the alignment
-* (the jackhmmer query, for example). Otherwise, pass <NULL, NULL, 0>.
-*
-* Returns: <eslOK> on success, and <*ret_msa> points to a new MSA that
-* the caller is responsible for freeing.
-*
-* Returns <eslFAIL> if there are no reported domains that
-* satisfy reporting thresholds, in which case <*ret_msa>
-* is <NULL>.
-*
-* Throws: <eslEMEM> on allocation failure; <eslECORRUPT> on
-* unexpected internal data corruption.
-*
-* Xref: J4/29: incept.
-* J4/76: added inc_sqarr, inc_trarr, inc_n, optflags
-*/
-int
-p7_tophits_Alignment(const P7_TOPHITS *th, const ESL_ALPHABET *abc,
- ESL_SQ **inc_sqarr, P7_TRACE **inc_trarr, int inc_n,
- int optflags, ESL_MSA **ret_msa)
-{
- ESL_SQ **sqarr = NULL;
- P7_TRACE **trarr = NULL;
- ESL_MSA *msa = NULL;
- int ndom = 0;
- int h, d, y;
- int M;
- int status;
-
- /* How many domains will be included in the new alignment?
- * We also set model size M here; every alignment has a copy.
- */
- for (h = 0; h < th->N; h++)
- if (th->hit[h]->flags & p7_IS_INCLUDED)
- {
- for (d = 0; d < th->hit[h]->ndom; d++)
- if (th->hit[h]->dcl[d].is_included)
- ndom++;
- }
- if (inc_n+ndom == 0) { status = eslFAIL; goto ERROR; }
-
- if (inc_n) M = inc_trarr[0]->M;
- else M = th->hit[0]->dcl[0].ad->M;
-
- /* Allocation */
- ESL_ALLOC_WITH_TYPE(sqarr, ESL_SQ **, sizeof(ESL_SQ *) * (ndom + inc_n));
- ESL_ALLOC_WITH_TYPE(trarr, P7_TRACE **, sizeof(P7_TRACE *) * (ndom + inc_n));
- /* Inclusion of preexisting seqs, traces: make copy of pointers */
- for (y = 0; y < inc_n; y++) { sqarr[y] = inc_sqarr[y]; trarr[y] = inc_trarr[y]; }
- for (; y < (ndom+inc_n); y++) { sqarr[y] = NULL; trarr[y] = NULL; }
-
- /* Make faux sequences, traces from hit list */
- y = inc_n;
- for (h = 0; h < th->N; h++)
- if (th->hit[h]->flags & p7_IS_INCLUDED)
- {
- for (d = 0; d < th->hit[h]->ndom; d++)
- if (th->hit[h]->dcl[d].is_included)
- {
- if ((status = p7_alidisplay_Backconvert(th->hit[h]->dcl[d].ad, abc, &(sqarr[y]), &(trarr[y]))) != eslOK) goto ERROR;
- y++;
- }
- }
-
- /* Make the multiple alignment */
- if ((status = p7_tracealign_Seqs(sqarr, trarr, inc_n+ndom, M, optflags, &msa)) != eslOK) goto ERROR;
-
- /* Clean up */
- for (y = inc_n; y < ndom+inc_n; y++) esl_sq_Destroy(sqarr[y]);
- for (y = inc_n; y < ndom+inc_n; y++) p7_trace_Destroy(trarr[y]);
- free(sqarr);
- free(trarr);
- *ret_msa = msa;
- return eslOK;
-
-ERROR:
- if (sqarr != NULL) { for (y = inc_n; y < ndom+inc_n; y++) if (sqarr[y] != NULL) esl_sq_Destroy(sqarr[y]); free(sqarr); }
- if (trarr != NULL) { for (y = inc_n; y < ndom+inc_n; y++) if (trarr[y] != NULL) p7_trace_Destroy(trarr[y]); free(trarr); }
- if (msa != NULL) esl_msa_Destroy(msa);
- *ret_msa = NULL;
- return status;
-}
-/*---------------- end, standard output format ------------------*/
-
-
-
-
-
-/*****************************************************************
-* 3. Tabular (parsable) output of pipeline results.
-*****************************************************************/
-
-// we don't need it
-
-/*------------------- end, tabular output -----------------------*/
-
-
-
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_trace.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/p7_trace.cpp
deleted file mode 100644
index 110fd69..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/p7_trace.cpp
+++ /dev/null
@@ -1,1538 +0,0 @@
-/* P7_TRACE, the traceback structure.
-*
-* Contents:
-* 1. The P7_TRACE structure
-* 2. Access routines
-* 3. Debugging tools
-* 4. Creating traces by DP traceback
-* 5. Counting traces into new HMMs
-* 6. Copyright and license information
-*
-* Stylistic note: elements in a trace path are usually indexed by z.
-*
-* SRE, Tue Jan 2 2007 [Casa de Gatos]
- * SVN $Id: p7_trace.c 3041 2009-11-12 12:58:09Z eddys $
-*/
-
-#include <hmmer3/p7_config.h>
-
-#include <stdlib.h>
-#include <stdio.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/hmmer.h>
-
-
-/*****************************************************************
-* 1. The P7_TRACE structure.
-*****************************************************************/
-
-static P7_TRACE *trace_create_engine(int initial_nalloc, int initial_ndomalloc, int with_posteriors);
-
-/* Function: p7_trace_Create()
-* Synopsis: Allocates a (growable, reusable) traceback.
-*
-* Purpose: Allocates a traceback.
-*
-* Tracebacks are growable. A reasonable initial internal
-* allocation is made here, and routines that generate
-* tracebacks will dynamically grow the trace as needed.
-*
-* Tracebacks are reusable. Usually a routine only
-* allocates one, and reuses its memory over and over as
-* new target sequences are aligned.
-*
-* Returns: a pointer to the new <P7_TRACE> structure on success.
-*
-* Throws: <NULL> on allocation error.
-*/
-P7_TRACE *
-p7_trace_Create(void)
-{
- int initial_nalloc = 256;
- int initial_ndomalloc = 16;
- int with_posteriors = FALSE;
- return trace_create_engine(initial_nalloc, initial_ndomalloc, with_posteriors);
-}
-
-/* Function: p7_trace_CreateWithPP()
-* Synopsis: Allocates a traceback that includes posterior probs.
-* Incept: SRE, Tue Aug 19 13:08:12 2008 [Janelia]
-*
-* Purpose: Allocates a traceback that includes <tr->pp[z]> fields
-* for posterior probabilities of emitted residues;
-* otherwise identical to <p7_trace_Create()>.
-*/
-P7_TRACE *
-p7_trace_CreateWithPP(void)
-{
- int initial_nalloc = 256;
- int initial_ndomalloc = 16;
- int with_posteriors = TRUE;
- return trace_create_engine(initial_nalloc, initial_ndomalloc, with_posteriors);
-}
-
-static P7_TRACE *
-trace_create_engine(int initial_nalloc, int initial_ndomalloc, int with_posteriors)
-{
- P7_TRACE *tr = NULL;
- int status;
-
- ESL_ALLOC_WITH_TYPE(tr, P7_TRACE*, sizeof(P7_TRACE));
- tr->st = NULL;
- tr->k = NULL;
- tr->i = NULL;
- tr->pp = NULL;
- tr->M = 0;
- tr->L = 0;
- tr->tfrom = tr->tto = NULL;
- tr->sqfrom = tr->sqto = NULL;
- tr->hmmfrom = tr->hmmto = NULL;
-
- /* The trace data itself */
- ESL_ALLOC_WITH_TYPE(tr->st, char*, sizeof(char) * initial_nalloc);
- ESL_ALLOC_WITH_TYPE(tr->k, int*, sizeof(int) * initial_nalloc);
- ESL_ALLOC_WITH_TYPE(tr->i, int*, sizeof(int) * initial_nalloc);
- if (with_posteriors)
- ESL_ALLOC_WITH_TYPE(tr->pp, float*, sizeof(float) * initial_nalloc);
- tr->N = 0;
- tr->nalloc = initial_nalloc;
-
- /* The trace's index: table of domain start/stop coords */
- ESL_ALLOC_WITH_TYPE(tr->tfrom, int*, sizeof(int) * initial_ndomalloc);
- ESL_ALLOC_WITH_TYPE(tr->tto, int*, sizeof(int) * initial_ndomalloc);
- ESL_ALLOC_WITH_TYPE(tr->sqfrom, int*, sizeof(int) * initial_ndomalloc);
- ESL_ALLOC_WITH_TYPE(tr->sqto, int*, sizeof(int) * initial_ndomalloc);
- ESL_ALLOC_WITH_TYPE(tr->hmmfrom, int*, sizeof(int) * initial_ndomalloc);
- ESL_ALLOC_WITH_TYPE(tr->hmmto, int*, sizeof(int) * initial_ndomalloc);
- tr->ndom = 0;
- tr->ndomalloc = initial_ndomalloc;
- return tr;
-
-ERROR:
- if (tr != NULL) p7_trace_Destroy(tr);
- return NULL;
-}
-
-
-/* Function: p7_trace_Reuse()
-* Synopsis: Prepare a trace for reuse.
-* Incept: SRE, Tue Jan 9 13:02:34 2007 [Janelia]
-*
-* Purpose: Reinitializes an existing trace object, reusing its
-* memory.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: (no abnormal error conditions)
-*
-* Xref: STL11/124
-*/
-int
-p7_trace_Reuse(P7_TRACE *tr)
-{
- tr->N = 0;
- tr->M = 0;
- tr->L = 0;
- tr->ndom = 0;
- return eslOK;
-}
-
-/* Function: p7_trace_Grow()
-* Synopsis: Grow the allocation for trace data.
-*
-* Purpose: If <tr> can't fit another state, double its allocation for
-* traceback data.
-*
-* This doesn't reallocate the domain index; see
-* <p7_trace_GrowIndex()> or <p7_trace_GrowIndexTo()> for
-* that.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure; in this case, the data in
-* <tr> are unaffected.
-*/
-int
-p7_trace_Grow(P7_TRACE *tr)
-{
- void* tmp;
- int status;
-
- if (tr->N < tr->nalloc) return eslOK;
-
- ESL_RALLOC_WITH_TYPE(tr->st, char*, tmp, sizeof(char) *2*tr->nalloc);
- ESL_RALLOC_WITH_TYPE(tr->k, int*, tmp, sizeof(int) *2*tr->nalloc);
- ESL_RALLOC_WITH_TYPE(tr->i, int*, tmp, sizeof(int) *2*tr->nalloc);
- if (tr->pp != NULL) ESL_RALLOC_WITH_TYPE(tr->pp, float*, tmp, sizeof(float) *2*tr->nalloc);
- tr->nalloc *= 2;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-/* Function: p7_trace_GrowIndex()
-* Synopsis: Grows the allocation of the trace's domain index.
-* Incept: SRE, Fri Jan 4 10:40:02 2008 [Janelia]
-*
-* Purpose: If <tr> can't fit another domain in its index,
-* double the allocation of the index in <tr>.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure; in this case, the
-* data in <tr> are unaffected.
-*/
-int
-p7_trace_GrowIndex(P7_TRACE *tr)
-{
- void *p;
- int status;
-
- if (tr->ndom < tr->ndomalloc) return eslOK;
-
- ESL_RALLOC_WITH_TYPE(tr->tfrom, int*, p, sizeof(int)*2*tr->ndomalloc);
- ESL_RALLOC_WITH_TYPE(tr->tto, int*, p, sizeof(int)*2*tr->ndomalloc);
- ESL_RALLOC_WITH_TYPE(tr->sqfrom, int*, p, sizeof(int)*2*tr->ndomalloc);
- ESL_RALLOC_WITH_TYPE(tr->sqto, int*, p, sizeof(int)*2*tr->ndomalloc);
- ESL_RALLOC_WITH_TYPE(tr->hmmfrom, int*, p, sizeof(int)*2*tr->ndomalloc);
- ESL_RALLOC_WITH_TYPE(tr->hmmto, int*, p, sizeof(int)*2*tr->ndomalloc);
- tr->ndomalloc *= 2;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-/* Function: p7_trace_GrowTo()
-* Synopsis: Reallocates trace to a given minimum size.
-*
-* Purpose: Reallocates a trace structure <tr> to hold a trace
-* of at least length <N> states.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure; the data in <tr>
-* are unaffected by failure.
-*/
-int
-p7_trace_GrowTo(P7_TRACE *tr, int N)
-{
- int status;
- void *tmp;
-
- if (N < tr->nalloc) return eslOK; /* no-op */
-
- ESL_RALLOC_WITH_TYPE(tr->st, char*, tmp, sizeof(char) *N);
- ESL_RALLOC_WITH_TYPE(tr->k, int*, tmp, sizeof(int) *N);
- ESL_RALLOC_WITH_TYPE(tr->i, int*, tmp, sizeof(int) *N);
- if (tr->pp != NULL) ESL_RALLOC_WITH_TYPE(tr->pp, float*, tmp, sizeof(float) *N);
- tr->nalloc = N;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-/* Function: p7_trace_GrowIndexTo()
-* Synopsis: Reallocates domain index for a given minimum number.
-* Incept: SRE, Fri Jan 4 10:47:43 2008 [Janelia]
-*
-* Purpose: Reallocates the domain index in <tr> to index
-* at least <ndom> domains.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure, in which case
-* the data in <tr> are unaffected.
-*/
-int
-p7_trace_GrowIndexTo(P7_TRACE *tr, int ndom)
-{
- void *p;
- int status;
-
- if (ndom < tr->ndomalloc) return eslOK;
-
- ESL_RALLOC_WITH_TYPE(tr->tfrom, int*, p, sizeof(int)*ndom);
- ESL_RALLOC_WITH_TYPE(tr->tto, int*, p, sizeof(int)*ndom);
- ESL_RALLOC_WITH_TYPE(tr->sqfrom, int*, p, sizeof(int)*ndom);
- ESL_RALLOC_WITH_TYPE(tr->sqto, int*, p, sizeof(int)*ndom);
- ESL_RALLOC_WITH_TYPE(tr->hmmfrom, int*, p, sizeof(int)*ndom);
- ESL_RALLOC_WITH_TYPE(tr->hmmto, int*, p, sizeof(int)*ndom);
- tr->ndomalloc = ndom;
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-/* Function: p7_trace_Destroy()
-* Synopsis: Frees a trace.
-*
-* Purpose: Frees a trace structure <tr>.
-*
-* Returns: (void)
-*/
-void
-p7_trace_Destroy(P7_TRACE *tr)
-{
- if (tr == NULL) return;
- if (tr->st != NULL) free(tr->st);
- if (tr->k != NULL) free(tr->k);
- if (tr->i != NULL) free(tr->i);
- if (tr->pp != NULL) free(tr->pp);
- if (tr->tfrom != NULL) free(tr->tfrom);
- if (tr->tto != NULL) free(tr->tto);
- if (tr->sqfrom != NULL) free(tr->sqfrom);
- if (tr->sqto != NULL) free(tr->sqto);
- if (tr->hmmfrom != NULL) free(tr->hmmfrom);
- if (tr->hmmto != NULL) free(tr->hmmto);
- free(tr);
- return;
-}
-
-/* Function: p7_trace_DestroyArray()
-*
-* Purpose: Frees an array of <N> trace structures, <tr[0..N-1]>.
-*
-* Returns: (void)
-*/
-void
-p7_trace_DestroyArray(P7_TRACE **tr, int N)
-{
- int idx;
-
- if (tr == NULL) return;
- for (idx = 0; idx < N; idx++)
- {
- if (tr[idx] == NULL) continue;
- p7_trace_Destroy(tr[idx]);
- }
- free(tr);
- return;
-}
-
-/*---------------------- end, P7_TRACE --------------------------*/
-
-
-
-
-/*****************************************************************
-* 2. Access routines
-*****************************************************************/
-
-/* Function: p7_trace_GetDomainCount()
-* Incept: SRE, Tue Feb 27 13:11:43 2007 [Janelia]
-*
-* Purpose: Determine the number of hits in the trace <tr> -- that is,
-* the number of times the trace traverses the model from
-* B...E. Return that number in <ret_ndom>.
-*
-* Done simply by counting the number of B states used in
-* the trace.
-*
-* Only sensible on profile traces. Core traces have 1
-* domain by definition.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: (no abnormal error conditions)
-*/
-int
-p7_trace_GetDomainCount(const P7_TRACE *tr, int *ret_ndom)
-{
- int z;
- int ndom = 0;
-
- if (tr->ndom > 0)
- ndom = tr->ndom; /* if we already indexed the domains, we know the answer */
- else {
- for (z = 0; z < tr->N; z++)
- if (tr->st[z] == p7T_B) ndom++;
- }
- *ret_ndom = ndom;
- return eslOK;
-}
-
-/* Function: p7_trace_GetStateUseCounts()
-* Incept: SRE, Sun May 27 10:30:13 2007 [Janelia]
-*
-* Purpose: Accumulate counts of each different state type in trace <tr>.
-*
-* <counts[]> is allocated for at least <p7T_NSTATETYPES>
-* integers, indexed by statetype. Upon return,
-* <counts[p7T_M]> contains the number of match states
-* in the trace, for example.
-*/
-int
-p7_trace_GetStateUseCounts(const P7_TRACE *tr, int *counts)
-{
- int x,z;
-
- for (x = 0; x < p7T_NSTATETYPES; x++) counts[x] = 0;
-
- for (z = 0; z < tr->N; z++) {
- x = tr->st[z];
- if (x < 0 || x >= p7T_NSTATETYPES) ESL_EXCEPTION(eslEINVAL, "bad state type");
- counts[x]++;
- }
- return eslOK;
-}
-
-/* Function: p7_trace_GetDomainCoords()
-* Incept: SRE, Tue Feb 27 13:08:32 2007 [Janelia]
-*
-* Purpose: Retrieve the bounds of domain alignment number <which> in
-* traceback <tr>. <which> starts from 0. The total number
-* of domains in a trace can be obtained from
-* <p7_trace_GetDomainCount()>, or caller can just loop
-* an increasing <which> count until <eslEOD> is returned.
-*
-* Start/end in the sequence are returned in <ret_i1>,
-* <ret_i2>. Start/end in the model are returned in <ret_k1>,
-* <ret_k2>.
-*
-* It only makes sense to call this function on profile
-* traces.
-*
-* By local alignment bounds convention, the domain
-* alignment is defined as bounded by match states, so <k1>
-* and <k2> are the coords of the first and last match
-* state (in range 1..M), and <i1> and <i2> are the coords
-* of the residues aligned to those match states. Profiles
-* do allow a Mk->DDD->E trailer; nonetheless, if such a
-* trailer occurs, the k2 coord still refers to the last
-* match state's coordinate. Note that such trailers would
-* only occur in generated or sampled paths, not Viterbi
-* paths; in Viterbi alignments with exit probabilities of
-* 1.0, the direct Mk->E path will always have higher
-* probability than a Mk->DDD->E path.
-*
-* Returns: <eslOK> on success, and the coords are returned.
-* <eslEOD> if the trace doesn't contain a <which>'th
-* domain, and the coords are all returned as 0.
-*
-* Throws: <eslEINVAL> if you stupidly pass a <which> less than 0;
-* <eslECORRUPT> if something is grievously wrong with <tr>.
-*/
-int
-p7_trace_GetDomainCoords(const P7_TRACE *tr, int which,
- int *ret_i1, int *ret_i2, int *ret_k1, int *ret_k2)
-{
- int status;
- int z;
-
- if (which < 0) ESL_XEXCEPTION(eslEINVAL, "bad which < 0");
-
- if (tr->ndom) /* do we have an index? then this'll be easy */
- {
- if (which >= tr->ndom) { status = eslEOD; goto ERROR; }
- *ret_i1 = tr->sqfrom[which];
- *ret_i2 = tr->sqto[which];
- *ret_k1 = tr->hmmfrom[which];
- *ret_k2 = tr->hmmto[which];
- return eslOK;
- }
-
- /* else, the hard way.
- * skip z to one state past the which'th B state.
- */
- for (z = 0; which >= 0 && z < tr->N; z++)
- if (tr->st[z] == p7T_B) which--;
- if (z == tr->N) { status = eslEOD; goto ERROR; }
-
- /* skip to the first M state and record i1,k1:
- * in a profile trace, this must be the next state.
- */
- if (tr->st[z] != p7T_M) ESL_XEXCEPTION(eslECORRUPT, "not a profile trace?");
- *ret_i1 = tr->i[z];
- *ret_k1 = tr->k[z];
-
- /* skip to the end E, then look back at the last M, record i2,k2.
- */
- for (; z < tr->N; z++)
- if (tr->st[z] == p7T_E) break;
- if (z == tr->N) ESL_EXCEPTION(eslECORRUPT, "invalid trace: no E for a B");
- do { z--; } while (tr->st[z] == p7T_D); /* roll back over any D trailer */
- if (tr->st[z] != p7T_M) ESL_EXCEPTION(eslECORRUPT, "invalid trace: no M");
- *ret_i2 = tr->i[z];
- *ret_k2 = tr->k[z];
- return eslOK;
-
-ERROR:
- *ret_i1 = 0;
- *ret_i2 = 0;
- *ret_k1 = 0;
- *ret_k2 = 0;
- return status;
-}
-/*---------------- end, access routines -------------------------*/
-
-
-
-
-/*****************************************************************
-* 3. Debugging tools.
-*****************************************************************/
-
-/* Function: p7_trace_Validate()
-* Incept: SRE, Fri Jan 5 09:17:24 2007 [Janelia]
-*
-* Purpose: Validate the internal data in a trace structure <tr>
-* representing an alignment of an HMM to a
-* digital sequence <sq>. The digital sequence may be either
-* unaligned (usually) or aligned (in the case of "fake"
-* tracebacks generated from an MSA during a
-* model construction process).
-*
-* We don't pass the HMM that the trace is associated with,
-* because we might have constructed the trace during
-* HMM construction when we don't have an HMM yet; but
-* we always have a digital sequence.
-*
-* Intended for debugging, development, and testing
-* purposes.
-*
-* Args: tr - trace to validate
-* abc - alphabet corresponding to sequence <sq>
-* sq - digital sequence that <tr> is explaining
-* errbuf - NULL, or an error message buffer allocated
-* for at least eslERRBUFSIZE chars.
-*
-* Returns: <eslOK> if trace appears fine.
-* Returns <eslFAIL> if a problem is detected; if <errbuf> is
-* provided (non-<NULL>), an informative message is formatted
-* there to indicate the reason for the failure.
-*/
-int
-p7_trace_Validate(const P7_TRACE *tr, const ESL_ALPHABET *abc, const ESL_DSQ *dsq, char *errbuf)
-{
- int z; /* position in trace */
- int i; /* position in sequence */
- int k; /* position in model */
- char prv; /* type of the previous state */
- int is_core; /* TRUE if trace is a core trace, not profile */
-
- /* minimum trace length is a core's B->Mk->E. If we don't have at least that,
- * we're definitely in trouble
- */
- if (tr->N < 3) ESL_FAIL(eslFAIL, errbuf, "trace is too short");
- if (tr->N > tr->nalloc) ESL_FAIL(eslFAIL, errbuf, "N of %d isn't sensible", tr->N);
-
- /* Determine if this is a core trace or a profile trace, so we can
- * construct validation tests appropriately.
- */
- if (tr->st[0] == p7T_B) is_core = TRUE;
- else if (tr->st[0] == p7T_S) is_core = FALSE;
- else ESL_FAIL(eslFAIL, errbuf, "first state neither S nor B");
-
- /* Verify "sentinels", the final states of the trace
- * (before we start looking backwards and forwards from each state in
- * our main validation loop)
- */
- if (is_core && tr->st[tr->N-1] != p7T_E) ESL_FAIL(eslFAIL, errbuf, "last state not E");
- if (!is_core && tr->st[tr->N-1] != p7T_T) ESL_FAIL(eslFAIL, errbuf, "last state not T");
- if (tr->k[0] != 0) ESL_FAIL(eslFAIL, errbuf, "first state shouldn't have k set");
- if (tr->i[0] != 0) ESL_FAIL(eslFAIL, errbuf, "first state shouldn't have i set");
- if (tr->k[tr->N-1] != 0) ESL_FAIL(eslFAIL, errbuf, "last state shouldn't have k set");
- if (tr->i[tr->N-1] != 0) ESL_FAIL(eslFAIL, errbuf, "last state shouldn't have i set");
-
- if (tr->pp != NULL && tr->pp[0] != 0.0) ESL_FAIL(eslFAIL, errbuf, "first state doesn't emit; but post prob isn't 0");
- if (tr->pp != NULL && tr->pp[tr->N-1] != 0.0) ESL_FAIL(eslFAIL, errbuf, "last state doesn't emit; but post prob isn't 0");
-
- /* Main validation loop. */
- k = 0;
- i = 1;
- for (z = 1; z < tr->N-1; z++)
- {
- for (; dsq[i] != eslDSQ_SENTINEL; i++) /* find next non-gap residue in dsq */
- if (esl_abc_XIsResidue(abc, dsq[i]) || esl_abc_XIsNonresidue(abc, dsq[i])) break; /* '*' included as emitted "residue" */
-
- /* watch out for missing data states X: can only be one.
- * prv state might have to skip over one (but not more) missing data states
- */
- prv = (tr->st[z-1] == p7T_X)? tr->st[z-2] : tr->st[z-1];
-
- switch (tr->st[z]) {
- case p7T_S:
- ESL_FAIL(eslFAIL, errbuf, "S must be first state");
- break;
-
- case p7T_X:
- if (! is_core) ESL_FAIL(eslFAIL, errbuf, "X state (missing data) only appears in core traces");
- if (prv != p7T_B && tr->st[z+1] != p7T_E) /* only B->X and X->E are possible */
- ESL_FAIL(eslFAIL, errbuf, "bad transition involving missing data (X state) not at start/end");
- break;
-
- case p7T_N:
- if (is_core) ESL_FAIL(eslFAIL, errbuf, "core trace can't contain N");
- if (tr->k[z] != 0) ESL_FAIL(eslFAIL, errbuf, "no N should have k set");
- if (prv == p7T_S) { /* 1st N doesn't emit */
- if (tr->i[z] != 0) ESL_FAIL(eslFAIL, errbuf, "first N shouldn't have i set");
- if (tr->pp != NULL && tr->pp[z] != 0.0) ESL_FAIL(eslFAIL, errbuf, "first N can't have nonzero post prob");
- } else if (prv == p7T_N) { /* subsequent N's do */
- if (tr->i[z] != i) ESL_FAIL(eslFAIL, errbuf, "expected i doesn't match trace's i");
- i++;
- } else ESL_FAIL(eslFAIL, errbuf, "bad transition to N; expected {S,N}->N");
- break;
-
- case p7T_B:
- if (tr->k[z] != 0) ESL_FAIL(eslFAIL, errbuf, "B shouldn't have k set");
- if (tr->i[z] != 0) ESL_FAIL(eslFAIL, errbuf, "B shouldn't have i set");
- if (tr->pp != NULL && tr->pp[z] != 0.0) ESL_FAIL(eslFAIL, errbuf, "B can't have nonzero post prob");
- if (prv != p7T_N && prv != p7T_J)
- ESL_FAIL(eslFAIL, errbuf, "bad transition to B; expected {N,J}->B");
- break;
-
- case p7T_M:
- if (prv == p7T_B) k = tr->k[z]; else k++; /* on a B->Mk entry, trust k; else verify */
-
- if (tr->k[z] != k) ESL_FAIL(eslFAIL, errbuf, "expected k doesn't match trace's k");
- if (tr->i[z] != i) ESL_FAIL(eslFAIL, errbuf, "expected i doesn't match trace's i");
- if (prv != p7T_B && prv != p7T_M && prv != p7T_D && prv != p7T_I)
- ESL_FAIL(eslFAIL, errbuf, "bad transition to M; expected {B,M,D,I}->M");
- i++;
- break;
-
- case p7T_D:
- k++;
- if (tr->st[z-1] == p7T_X) k = tr->k[z]; /* with fragments, a X->Ik case is possible */
- if (tr->k[z] != k) ESL_FAIL(eslFAIL, errbuf, "expected k doesn't match trace's k");
- if (tr->i[z] != 0) ESL_FAIL(eslFAIL, errbuf, "D shouldn't have i set");
- if (tr->pp != NULL && tr->pp[z] != 0.0) ESL_FAIL(eslFAIL, errbuf, "D can't have nonzero post prob");
- if (is_core) {
- if (prv != p7T_M && prv != p7T_D && prv != p7T_B)
- ESL_FAIL(eslFAIL, errbuf, "bad transition to D; expected {B,M,D}->D");
- } else {
- if (prv != p7T_M && prv != p7T_D)
- ESL_FAIL(eslFAIL, errbuf, "bad transition to D; expected {M,D}->D");
- }
- break;
-
- case p7T_I:
- if (tr->st[z-1] == p7T_X) k = tr->k[z]; /* with fragments, a X->Ik case is possible */
- if (tr->k[z] != k) ESL_FAIL(eslFAIL, errbuf, "expected k doesn't match trace's k");
- if (tr->i[z] != i) ESL_FAIL(eslFAIL, errbuf, "expected i doesn't match trace's i");
- if (is_core) {
- if (prv != p7T_B && prv != p7T_M && prv != p7T_I)
- ESL_FAIL(eslFAIL, errbuf, "bad transition to I; expected {B,M,I}->I");
- } else {
- if (prv != p7T_M && prv != p7T_I)
- ESL_FAIL(eslFAIL, errbuf, "bad transition to I; expected {M,I}->I");
- }
- i++;
- break;
-
- case p7T_E:
- if (tr->k[z] != 0) ESL_FAIL(eslFAIL, errbuf, "E shouldn't have k set");
- if (tr->i[z] != 0) ESL_FAIL(eslFAIL, errbuf, "E shouldn't have i set");
- if (tr->pp != NULL && tr->pp[z] != 0.0) ESL_FAIL(eslFAIL, errbuf, "E can't have nonzero post prob");
- if (is_core) {
- if (prv != p7T_M && prv != p7T_D && prv != p7T_I)
- ESL_FAIL(eslFAIL, errbuf, "bad transition to E; expected {M,D,I}->E");
- } else {
- if (prv != p7T_M && prv != p7T_D)
- ESL_FAIL(eslFAIL, errbuf, "bad transition to E; expected {M,D}->E");
- }
- break;
-
- case p7T_J:
- if (tr->k[z] != 0) ESL_FAIL(eslFAIL, errbuf, "no J should have k set");
- if (prv == p7T_E) { /* 1st J doesn't emit */
- if (tr->i[z] != 0) ESL_FAIL(eslFAIL, errbuf, "first J shouldn't have i set");
- if (tr->pp != NULL && tr->pp[z] != 0.0) ESL_FAIL(eslFAIL, errbuf, "first J can't have nonzero post prob");
- } else if (prv == p7T_J) { /* subsequent J's do */
- if (tr->i[z] != i) ESL_FAIL(eslFAIL, errbuf, "expected i doesn't match trace's i");
- i++;
- } else ESL_FAIL(eslFAIL, errbuf, "bad transition to J; expected {E,J}->J");
- break;
-
- case p7T_C:
- if (is_core) ESL_FAIL(eslFAIL, errbuf, "core trace can't contain C");
- if (tr->k[z] != 0) ESL_FAIL(eslFAIL, errbuf, "no C should have k set");
- if (prv == p7T_E) { /* 1st C doesn't emit */
- if (tr->i[z] != 0) ESL_FAIL(eslFAIL, errbuf, "first C shouldn't have i set");
- if (tr->pp != NULL && tr->pp[z] != 0.0) ESL_FAIL(eslFAIL, errbuf, "first C can't have nonzero post prob");
- } else if (prv == p7T_C) { /* subsequent C's do */
- if (tr->i[z] != i) ESL_FAIL(eslFAIL, errbuf, "expected i doesn't match trace's i");
- i++;
- } else ESL_FAIL(eslFAIL, errbuf, "bad transition to C; expected {E,C}->C");
- break;
-
- case p7T_T:
- ESL_FAIL(eslFAIL, errbuf, "T must be last state");
- break;
- }
- }
-
- /* Trace should have accounted for all residues in the dsq */
- for (; dsq[i] != eslDSQ_SENTINEL; i++)
- if (esl_abc_XIsResidue(abc, dsq[i]))
- ESL_FAIL(eslFAIL, errbuf, "trace didn't account for all residues in the sq");
-
- /* No k larger than M; no i-1 larger than L (i is sitting on dsq[n+1] sentinel right now) */
- if (k > tr->M) ESL_FAIL(eslFAIL, errbuf, "M=%d, but k went to %d\n", tr->M, k);
- if (i-1 > tr->L) ESL_FAIL(eslFAIL, errbuf, "L=%d, but i went to %d\n", tr->L, i);
-
- return eslOK;
-}
-
-
-/* Function: p7_trace_Dump()
-* Incept: SRE, Fri Jan 5 09:26:04 2007 [Janelia]
-*
-* Purpose: Dumps internals of a traceback structure <tr> to <fp>.
-* If <gm> is non-NULL, also prints transition/emission scores.
-* If <dsq> is non-NULL, also prints residues (using alphabet
-* in the <gm>).
-*
-* Args: fp - stream to dump to (often stdout)
-* tr - trace to dump
-* gm - NULL, or score profile corresponding to trace
-* dsq - NULL, or digitized seq corresponding to trace
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEINVAL> if trace contains something corrupt or invalid;
-* in this case, dump will be aborted, possibly after partial
-* output.
-*/
-int
-p7_trace_Dump(FILE *fp, const P7_TRACE *tr, const P7_PROFILE *gm, const ESL_DSQ *dsq) /* replace void w/ P7_PROFILE */
-{
- int z; /* counter for trace position */
- if (tr == NULL) { fprintf(fp, " [ trace is NULL ]\n"); return eslOK; }
-
- if (gm == NULL)
- { /* Yes, this does get used: during model construction. */
- fprintf(fp, "st k i - traceback len %d\n", tr->N);
- fprintf(fp, "-- ---- ----\n");
- for (z = 0; z < tr->N; z++) {
- fprintf(fp, "%1s %4d %6d\n",
- p7_hmm_DecodeStatetype(tr->st[z]),
- tr->k[z],
- tr->i[z]);
- }
- }
- else
- {
- int status;
- float accuracy = 0.0f;
- float sc = 0.0f;
- float tsc;
- int xi;
-
-
- fprintf(fp, "st k i transit emission postprob - traceback len %d\n", tr->N);
- fprintf(fp, "-- ---- ------ -------- -------- --------\n");
- for (z = 0; z < tr->N; z++)
- {
- if (z < tr->N-1)
- {
- status = p7_profile_GetT(gm, tr->st[z], tr->k[z], tr->st[z+1], tr->k[z+1], &tsc);
- if (status != eslOK) return status;
- }
- else tsc = 0.0f;
-
- fprintf(fp, "%1s %4d %6d %8.4f", p7_hmm_DecodeStatetype(tr->st[z]), tr->k[z], tr->i[z], tsc);
- sc += tsc;
-
- if (dsq != NULL) {
- xi = dsq[tr->i[z]];
-
- if (tr->st[z] == p7T_M) {
- fprintf(fp, " %8.4f", p7P_MSC(gm, tr->k[z], xi));
- sc += p7P_MSC(gm, tr->k[z], xi);
- if (tr->pp != NULL) {
- fprintf(fp, " %8.4f", tr->pp[z]);
- accuracy += tr->pp[z];
- }
- fprintf(fp, " %c", gm->abc->sym[xi]);
- }
- else if (tr->st[z] == p7T_I) {
- fprintf(fp, " %8.4f", p7P_ISC(gm, tr->k[z], xi));
- sc += p7P_ISC(gm, tr->k[z], xi);
- if (tr->pp != NULL) {
- fprintf(fp, " %8.4f", tr->pp[z]);
- accuracy += tr->pp[z];
- }
- fprintf(fp, " %c", (char) tolower((int) gm->abc->sym[xi]));
- }
- else if ((tr->st[z] == p7T_N && tr->st[z-1] == p7T_N) ||
- (tr->st[z] == p7T_C && tr->st[z-1] == p7T_C) ||
- (tr->st[z] == p7T_J && tr->st[z-1] == p7T_J)) {
- fprintf(fp, " %8d", 0);
- if (tr->pp != NULL) {
- fprintf(fp, " %8.4f", tr->pp[z]);
- accuracy += tr->pp[z];
- }
- fprintf(fp, " %c", (char) tolower((int) gm->abc->sym[xi]));
- }
- }
- else fprintf(fp, " %8s %8s %c", "-", "-", '-');
- fputs("\n", fp);
- }
- fprintf(fp, " -------- -------- --------\n");
- fprintf(fp, " total: %8.4f %8.4f\n\n", sc, accuracy);
- }
-
-
- return eslOK;
-}
-
-
-/* Function: p7_trace_Compare()
-* Synopsis: Compare two traces for identity
-* Incept: SRE, Wed Aug 20 09:05:24 2008 [Janelia]
-*
-* Purpose: Compare two tracebacks; return <eslOK> if they
-* are identical, <eslFAIL> if not.
-*
-* If posterior probability annotation is present in
-* both traces, they are compared using <esl_FCompare()>
-* and a relative tolerance of <pptol>.
-*
-* If domain indices are present in both traces,
-* the two indexes are compared.
-*/
-int
-p7_trace_Compare(P7_TRACE *tr1, P7_TRACE *tr2, float pptol)
-{
- int z,d;
-
- if (tr1->N != tr2->N) esl_fatal("FAIL");
- if (tr1->M != tr2->M) esl_fatal("FAIL");
- if (tr1->L != tr2->L) esl_fatal("FAIL");
-
- /* Main data in the trace */
- for (z = 0; z < tr1->N; z++)
- {
- if (tr1->st[z] != tr2->st[z]) esl_fatal("FAIL");
- if (tr1->k[z] != tr2->k[z]) esl_fatal("FAIL");
- if (tr1->i[z] != tr2->i[z]) esl_fatal("FAIL");
- }
-
- /* Optional posterior probability annotation */
- if (tr1->pp != NULL && tr2->pp != NULL)
- {
- for (z = 0; z < tr1->N; z++)
- if (tr1->i[z] != 0) /* an emission: has a nonzero posterior prob*/
- {
- if (esl_FCompare(tr1->pp[z], tr2->pp[z], pptol) != eslOK) esl_fatal("FAIL");
- }
- else
- {
- if (tr1->pp[z] != tr2->pp[z]) esl_fatal("FAIL"); /* both 0.0 */
- }
- }
-
- /* Optional domain index */
- if (tr1->ndom > 0 && tr2->ndom > 0)
- {
- if (tr1->ndom != tr2->ndom) esl_fatal("FAIL");
-
- for (d = 0; d < tr1->ndom; d++)
- {
- if (tr1->tfrom[d] != tr2->tfrom[d]) esl_fatal("FAIL");
- if (tr1->tto[d] != tr2->tto[d]) esl_fatal("FAIL");
- if (tr1->sqfrom[d] != tr2->sqfrom[d]) esl_fatal("FAIL");
- if (tr1->sqto[d] != tr2->sqto[d]) esl_fatal("FAIL");
- if (tr1->hmmfrom[d] != tr2->hmmfrom[d]) esl_fatal("FAIL");
- if (tr1->hmmto[d] != tr2->hmmto[d]) esl_fatal("FAIL");
- }
- }
- return eslOK;
-}
-
-
-
-
-/* Function: p7_trace_Score()
-* Incept: SRE, Tue Mar 6 14:40:34 2007 [Janelia]
-*
-* Purpose: Score path <tr> for digital target sequence <dsq>
-* using profile <gm>. Return the lod score in
-* <ret_sc>.
-*
-* Args: tr - traceback path to score
-* dsq - digitized sequence
-* gm - score profile
-* ret_sc - RETURN: lod score of trace <tr>
-*
-* Returns: <eslOK> on success, and <*ret_sc> contains the
-* lod score for the trace.
-*
-* Throws: <eslEINVAL> if something's wrong with the trace.
-* Now <*ret_sc> is returned as $-\infty$.
-*/
-int
-p7_trace_Score(P7_TRACE *tr, ESL_DSQ *dsq, P7_PROFILE *gm, float *ret_sc)
-{
- float sc; /* total lod score */
- float tsc; /* a transition score */
- int z; /* position in tr */
- int xi; /* digitized symbol in dsq */
- int status;
-
- sc = 0.0f;
- for (z = 0; z < tr->N-1; z++) {
- xi = dsq[tr->i[z]];
-
- if (tr->st[z] == p7T_M) sc += p7P_MSC(gm, tr->k[z], xi);
- else if (tr->st[z] == p7T_I) sc += p7P_ISC(gm, tr->k[z], xi);
-
- if ((status = p7_profile_GetT(gm, tr->st[z], tr->k[z],
- tr->st[z+1], tr->k[z+1], &tsc)) != eslOK) goto ERROR;
- sc += tsc;
- }
-
- *ret_sc = sc;
- return eslOK;
-
-ERROR:
- *ret_sc = -eslINFINITY;
- return status;
-}
-
-/* Function: p7_trace_SetPP()
-* Synopsis: Set posterior probs of an arbitrary trace.
-* Incept: SRE, Tue Aug 19 14:16:10 2008 [Janelia]
-*
-* Purpose: Set the posterior probability fields of an arbitrary
-* trace <tr>, by accessing posterior residue probabilities
-* in decoding matrix <pp>.
-*
-* In general, <pp> was created by <p7_GDecoding()>
-* or converted from the optimized matrix created by
-* <p7_Decoding()>.
-*
-* This is classed as a debugging function for the moment,
-* because in general traces with posterior probabilities are
-* created directly using optimal accuracy DP routines.
-* This function allows us to add PP annotation to any
-* trace.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation error.
-* <eslEINVAL> on internal corruptions.
-*/
-int
-p7_trace_SetPP(P7_TRACE *tr, const P7_GMX *pp)
-{
- float **dp = pp->dp; /* so {MDI}MX() macros work */
- float *xmx = pp->xmx; /* so XMX() macro works */
- int z;
- int status;
-
- if (tr->pp == NULL) ESL_ALLOC_WITH_TYPE(tr->pp, float*, sizeof(float) * tr->nalloc);
-
- for (z = 0; z < tr->N; z++)
- {
- if (tr->i[z] > 0) /* an emitting state? */
- {
- switch (tr->st[z]) {
- case p7T_M: tr->pp[z] = MMX(tr->i[z], tr->k[z]); break;
- case p7T_I: tr->pp[z] = IMX(tr->i[z], tr->k[z]); break;
- case p7T_N: tr->pp[z] = XMX(tr->i[z], p7G_N); break;
- case p7T_C: tr->pp[z] = XMX(tr->i[z], p7G_C); break;
- case p7T_J: tr->pp[z] = XMX(tr->i[z], p7G_J); break;
- default: ESL_EXCEPTION(eslEINVAL, "no such emitting state");
- }
- }
- else
- tr->pp[z] = 0.0;
- }
- return eslOK;
-
-ERROR:
- return status;
-}
-
-/* Function: p7_trace_GetExpectedAccuracy()
-* Synopsis: Returns the sum of the posterior residue decoding probs.
-* Incept: SRE, Tue Aug 19 15:29:18 2008 [Janelia]
-*/
-float
-p7_trace_GetExpectedAccuracy(const P7_TRACE *tr)
-{
- float accuracy = 0.0;
- int z;
-
- for (z = 0; z < tr->N; z++)
- accuracy += tr->pp[z];
- return accuracy;
-}
-
-/*------------------ end, debugging tools -----------------------*/
-
-
-
-
-/*****************************************************************
-* 4. Creating traces by DP traceback
-*****************************************************************/
-
-/* Function: p7_trace_Append()
-* Synopsis: Add an element (state/residue) to a growing trace.
-*
-* Purpose: Adds an element to a trace <tr> that is growing
-* left-to-right. The element is defined by a state type
-* <st> (such as <p7T_M>); a node index <k> (1..M for
-* M,D,I main states; else 0); and a dsq position <i> (1..L
-* for emitters, else 0).
-*
-* For CNJ states, which emit on transition, by convention
-* we associate the emission with the downstream state; therefore
-* the first state in any run of CNJ states has i=0.
-*
-* Reallocates the trace (by doubling) if necessary.
-*
-* Caller can grow a trace right-to-left too, if it
-* plans to call <p7_trace_Reverse()>.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on reallocation failure. The element is successfully
-* added, but no more elements can be added before this trace is
-* destroyed.
-*
-* <eslEINVAL> if you try to add an element to a trace whose
-* reallocation has already failed.
-*/
-int
-p7_trace_Append(P7_TRACE *tr, char st, int k, int i)
-{
- int status;
-
- if ((status = p7_trace_Grow(tr)) != eslOK) return status;
-
- switch (st) {
- /* Emit-on-transition states: */
- case p7T_N:
- case p7T_C:
- case p7T_J:
- tr->i[tr->N] = ( (tr->st[tr->N-1] == st) ? i : 0);
- tr->k[tr->N] = 0;
- break;
- /* Nonemitting states, outside main model: */
- case p7T_X:
- case p7T_S:
- case p7T_B:
- case p7T_E:
- case p7T_T: tr->i[tr->N] = 0; tr->k[tr->N] = 0; break;
- /* Nonemitting, but in main model (k valid) */
- case p7T_D: tr->i[tr->N] = 0; tr->k[tr->N] = k; break;
- /* Emitting states, with valid k position in model: */
- case p7T_M:
- case p7T_I: tr->i[tr->N] = i; tr->k[tr->N] = k; break;
- default: ESL_EXCEPTION(eslEINVAL, "no such state; can't append");
- }
-
- tr->st[tr->N] = st;
- tr->N++;
- return eslOK;
-}
-
-/* Function: p7_trace_AppendWithPP()
-* Synopsis: Add element to growing trace, with posterior probability.
-*
-* Purpose: Same as <p7_trace_Append()>, but also records a posterior
-* probability estimate for emitted residues. <pp> is assumed to be
-* zero for nonemitting states even if a nonzero argument is
-* mistakenly passed.
-*/
-int
-p7_trace_AppendWithPP(P7_TRACE *tr, char st, int k, int i, float pp)
-{
- int status;
-
- if ((status = p7_trace_Grow(tr)) != eslOK) return status;
-
- switch (st) {
- /* Emit-on-transition states: */
- case p7T_N:
- case p7T_C:
- case p7T_J:
- if (tr->st[tr->N-1] == st)
- {
- tr->i[tr->N] = i;
- tr->pp[tr->N] = pp;
- }
- else
- {
- tr->i[tr->N] = 0;
- tr->pp[tr->N] = 0.0;
- }
- tr->k[tr->N] = 0;
- break;
- /* Nonemitting states, outside main model: */
- case p7T_X:
- case p7T_S:
- case p7T_B:
- case p7T_E:
- case p7T_T: tr->i[tr->N] = 0; tr->pp[tr->N] = 0.0; tr->k[tr->N] = 0; break;
- /* Nonemitting, but in main model (k valid) */
- case p7T_D: tr->i[tr->N] = 0; tr->pp[tr->N] = 0.0; tr->k[tr->N] = k; break;
- /* Emitting states, with valid k position in model: */
- case p7T_M:
- case p7T_I: tr->i[tr->N] = i; tr->pp[tr->N] = pp; tr->k[tr->N] = k; break;
- default: ESL_EXCEPTION(eslEINVAL, "no such state; can't append");
- }
-
- tr->st[tr->N] = st;
- tr->N++;
- return eslOK;
-}
-
-/* Function: p7_trace_Reverse()
-* Synopsis: Reverse the arrays in a traceback structure.
-*
-* Purpose: Reverse the arrays in a traceback structure. Tracebacks
-* from DP algorithms are collected backwards, and they call this
-* function when they're done.
-*
-* At least for now, this invalidates any domain index
-* table, if it exists. The expectd order of invocation is
-* to create the traceback backwards, <Reverse()> it, then
-* <IndexDomains()> it.
-*
-* Args: tr - the traceback to reverse. tr->N must be set.
-*
-* Return: <eslOK> on success; <tr> is modified.
-*/
-int
-p7_trace_Reverse(P7_TRACE *tr)
-{
- int z;
- int tmp;
- float tmpf;
-
- /* For emit-on-transition states N,C,J, traces always obey the
- * C-,Cx,Cx,Cx convention even when they were constructed backwards;
- * so we make them Cx,Cx,Cx,C- by pulling residues backwards by one,
- * just before reversing them. (Other ways of doing this would be
- * fine too.
- */
- for (z = 0; z < tr->N; z++)
- {
- if ( (tr->st[z] == p7T_N && tr->st[z+1] == p7T_N) ||
- (tr->st[z] == p7T_C && tr->st[z+1] == p7T_C) ||
- (tr->st[z] == p7T_J && tr->st[z+1] == p7T_J))
- {
- if (tr->i[z] == 0 && tr->i[z+1] > 0)
- {
- tr->i[z] = tr->i[z+1];
- tr->i[z+1] = 0;
- if (tr->pp != NULL) {
- tr->pp[z] = tr->pp[z+1];
- tr->pp[z+1] = 0.0;
- }
- }
- }
- }
-
- /* Reverse the trace in place. */
- for (z = 0; z < tr->N/2; z++)
- {
- tmp = tr->st[tr->N-z-1]; tr->st[tr->N-z-1] = tr->st[z]; tr->st[z] = tmp;
- tmp = tr->k[tr->N-z-1]; tr->k[tr->N-z-1] = tr->k[z]; tr->k[z] = tmp;
- tmp = tr->i[tr->N-z-1]; tr->i[tr->N-z-1] = tr->i[z]; tr->i[z] = tmp;
- if (tr->pp != NULL) {
- tmpf = tr->pp[tr->N-z-1]; tr->pp[tr->N-z-1] = tr->pp[z]; tr->pp[z] = tmpf;
- }
- }
- /* don't worry about the middle residue in odd-length N, since we're in-place */
- return eslOK;
-}
-
-
-/* Function: p7_trace_Index()
-* Synopsis: Internally index the domains in a trace.
-* Incept: SRE, Fri Jan 4 11:12:24 2008 [Janelia]
-*
-* Purpose: Create an internal index of the domains in <tr>.
-* This makes calls to <GetDomainCount()> and
-* <GetDomainCoords()> more efficient, and it is
-* a necessary prerequisite for creating alignments
-* of any individual domains in a multidomain trace with
-* <p7_alidisplay_Create()>.
-*
-* Returns: <eslOK> on success.
-*
-* Throws: <eslEMEM> on allocation failure, in which case the
-* data in the trace is still fine, but the domain index
-* table isn't constructed.
-*/
-int
-p7_trace_Index(P7_TRACE *tr)
-{
- int z;
- int status;
-
- tr->ndom = 0;
- for (z = 0; z < tr->N; z++)
- {
- switch (tr->st[z]) {
- case p7T_B:
- if ((status = p7_trace_GrowIndex(tr)) != eslOK) goto ERROR;
- tr->tfrom[tr->ndom] = z;
- tr->sqfrom[tr->ndom] = 0;
- tr->hmmfrom[tr->ndom] = 0;
- break;
-
- case p7T_M:
- if (tr->sqfrom[tr->ndom] == 0) tr->sqfrom[tr->ndom] = tr->i[z];
- if (tr->hmmfrom[tr->ndom] == 0) tr->hmmfrom[tr->ndom] = tr->k[z];
- tr->sqto[tr->ndom] = tr->i[z];
- tr->hmmto[tr->ndom] = tr->k[z];
- break;
-
- case p7T_E:
- tr->tto[tr->ndom] = z;
- tr->ndom++;
- break;
- }
- }
- return eslOK;
-
-ERROR:
- return status;
-}
-/*----------- end, creating traces by DP traceback ---------------*/
-
-
-/*****************************************************************
-* 5. Creating faux traces from MSAs
-*****************************************************************/
-
-/* Function: p7_trace_FauxFromMSA()
-* Synopsis: Create array of faux tracebacks from an existing MSA.
-* Incept: SRE, Thu May 21 08:07:25 2009 [Janelia]
-*
-* Purpose: Given an existing <msa> and an array <matassign> that
-* flags the alignment columns that are assigned to consensus
-* match states (matassign[1..alen] = 1|0); create an array
-* of faux traces <tr[0..msa->nseq-1]>. <optflags> controls
-* optional behavior; it can be <p7_DEFAULT> or <p7_MSA_COORDS>,
-* as explained below.
-*
-* The traces are core traces: they start/end with B/E,
-* they may use I_0,I_M, and D_1 states. Any flanking
-* insertions (outside the first/last consensus column) are
-* assigned to I_0 and I_M.
-*
-* If the input alignment contains sequence fragments,
-* caller should first convert leading/trailing gaps to
-* missing data symbols. This hack causes entry/exit
-* transitions to be encoded in the trace as B->X->{MDI}k
-* and {MDI}k->X->E, rather than B->DDDD->Mk, Mk->DDDDD->E
-* paths involving terminal deletions, and all functions
- * that use traces, such as <p7_trace_Count()>, (should)
-* ignore transitions involving <p7T_X> states.
-*
-* By default (<optflags = p7_DEFAULT>), the <i> coordinate
-* in the faux tracebacks is <1..L>, relative to the
-* unaligned raw sequences in <msa>, the way most H3 traces
-* are supposed to be. In some cases (such as model
-* construction from an MSA) it is convenient to reference
-* residues in the MSA cooordinate system directly; setting
-* <optflags = p7_MSA_COORDS> makes the traces come out
-* with <i=1..alen> coords for residues.
-*
-* Important: an MSA may imply DI and ID transitions that
-* are illegal in a core model. If the only purpose of the
-* traces is to go straight back into alignment
-* construction through a <p7_tracealign_*> function, this
-* is ok, because the <p7_tracealign_*> routines can handle
-* DI and ID transitions (enabling reconstruction of almost
-* exactly the same input alignment, modulo unaligned
-* insertions). This is what happens for <hmmalign
-* --mapali>, for example. However, if the caller wants to
-* use the traces for anything else, these illegal DI and
-* ID transitions have to be removed first, and the caller
-* should use <p7_trace_Doctor()> to do it.
-*
-* Args: msa - digital alignment
-* matassign - flag for each alignment column, whether
-* it is consensus or not. matassign[1..alen] = 1|0;
-* matassign[0] = 0
-* optflags - p7_DEFAULT | p7_MSA_COORDS
-* tr - RETURN: caller provides 0..nseq-1 pointer
-* array for holding returned traces.
-*
-* Returns: <eslOK> on success, and tr[0..nseq-1] now point to newly
-* created traces; caller is responsible for freeing these.
-*
-* Throws: <eslEMEM> on allocation error.
-*
-* Xref: J5/17: build.c::fake_tracebacks() becomes p7_trace_FauxFromMSA();
-* ability to handle MSA or raw coords added.
-*/
-int
-p7_trace_FauxFromMSA(ESL_MSA *msa, int *matassign, int optflags, P7_TRACE **tr)
-{
- int idx; /* counter over seqs in MSA */
- int k; /* position in HMM */
- int apos; /* position in alignment columns 1..alen */
- int rpos; /* position in unaligned sequence residues 1..L */
- int showpos; /* coord to actually record: apos or rpos */
- int status = eslOK;
-
- for (idx = 0; idx < msa->nseq; idx++) tr[idx] = NULL;
-
- for (idx = 0; idx < msa->nseq; idx++)
- {
- if ((tr[idx] = p7_trace_Create()) == NULL) goto ERROR;
- if ((status = p7_trace_Append(tr[idx], p7T_B, 0, 0)) != eslOK) goto ERROR;
-
- for (k = 0, rpos = 1, apos = 1; apos <= msa->alen; apos++)
- {
- showpos = (optflags & p7_MSA_COORDS) ? apos : rpos;
-
- if (matassign[apos])
- { /* match or delete */
- k++;
- if (esl_abc_XIsResidue(msa->abc, msa->ax[idx][apos]))
- status = p7_trace_Append(tr[idx], p7T_M, k, showpos);
- else if (esl_abc_XIsGap (msa->abc, msa->ax[idx][apos]))
- status = p7_trace_Append(tr[idx], p7T_D, k, 0);
- else if (esl_abc_XIsNonresidue(msa->abc, msa->ax[idx][apos]))
- status = p7_trace_Append(tr[idx], p7T_M, k, showpos); /* treat * as a residue! */
- else if (esl_abc_XIsMissing(msa->abc, msa->ax[idx][apos]))
- {
- if (tr[idx]->st[tr[idx]->N-1] != p7T_X)
- status = p7_trace_Append(tr[idx], p7T_X, k, 0); /* allow only one X in a row */
- }
- else ESL_XEXCEPTION(eslEINCONCEIVABLE, "can't happen");
- }
- else
- { /* insert or nothing */
- if (esl_abc_XIsResidue(msa->abc, msa->ax[idx][apos]))
- status = p7_trace_Append(tr[idx], p7T_I, k, showpos);
- else if (esl_abc_XIsNonresidue(msa->abc, msa->ax[idx][apos]))
- status = p7_trace_Append(tr[idx], p7T_I, k, showpos); /* treat * as a residue! */
- else if (esl_abc_XIsMissing(msa->abc, msa->ax[idx][apos]))
- {
- if (tr[idx]->st[tr[idx]->N-1] != p7T_X)
- status = p7_trace_Append(tr[idx], p7T_X, k, 0);
- }
- else if (! esl_abc_XIsGap(msa->abc, msa->ax[idx][apos]))
- ESL_XEXCEPTION(eslEINCONCEIVABLE, "can't happen");
- }
-
- if (esl_abc_XIsResidue(msa->abc, msa->ax[idx][apos])) rpos++;
- if (status != eslOK) goto ERROR;
- }
- if ((status = p7_trace_Append(tr[idx], p7T_E, 0, 0)) != eslOK) goto ERROR;
- /* k == M by construction; set tr->L = msa->alen since coords are w.r.t. ax */
- tr[idx]->M = k;
- tr[idx]->L = msa->alen;
- }
- return eslOK;
-
-
-ERROR:
- for (idx = 0; idx < msa->nseq; idx++) { p7_trace_Destroy(tr[idx]); tr[idx] = NULL; }
- return status;
-}
-
-
-
-/* Function: p7_trace_Doctor()
-* Incept: SRE, Thu May 21 08:45:46 2009 [Janelia]
-*
-* Purpose: Plan 7 disallows D->I and I->D "chatter" transitions.
-* However, these transitions will be implied by many
-* alignments. trace_doctor() arbitrarily collapses I->D or
-* D->I into a single M position in the trace.
-*
-* trace_doctor does not examine any scores when it does
-* this. In ambiguous situations (D->I->D) the symbol
-* will be pulled arbitrarily to the left, regardless
-* of whether that's the best column to put it in or not.
-*
-* Args: tr - trace to doctor
-* opt_ndi - optRETURN: number of DI transitions doctored
-* opt_nid - optRETURN: number of ID transitions doctored
-*
-* Return: <eslOK> on success, and the trace <tr> is modified.
-*/
-int
-p7_trace_Doctor(P7_TRACE *tr, int *opt_ndi, int *opt_nid)
-{
- int opos; /* position in old trace */
- int npos; /* position in new trace (<= opos) */
- int ndi, nid; /* number of DI, ID transitions doctored */
-
- /* overwrite the trace from left to right */
- ndi = nid = 0;
- opos = npos = 0;
- while (opos < tr->N) {
- /* fix implied D->I transitions; D transforms to M, I pulled in */
- if (tr->st[opos] == p7T_D && tr->st[opos+1] == p7T_I) {
- tr->st[npos] = p7T_M;
- tr->k[npos] = tr->k[opos]; /* D transforms to M */
- tr->i[npos] = tr->i[opos+1]; /* insert char moves back */
- opos += 2;
- npos += 1;
- ndi++;
- } /* fix implied I->D transitions; D transforms to M, I is pushed in */
- else if (tr->st[opos]== p7T_I && tr->st[opos+1]== p7T_D) {
- tr->st[npos] = p7T_M;
- tr->k[npos] = tr->k[opos+1]; /* D transforms to M */
- tr->i[npos] = tr->i[opos]; /* insert char moves up */
- opos += 2;
- npos += 1;
- nid++;
- } /* everything else is just copied */
- else {
- tr->st[npos] = tr->st[opos];
- tr->k[npos] = tr->k[opos];
- tr->i[npos] = tr->i[opos];
- opos++;
- npos++;
- }
- }
- tr->N = npos;
-
- if (opt_ndi != NULL) *opt_ndi = ndi;
- if (opt_nid != NULL) *opt_nid = nid;
- return eslOK;
-}
-/*-------------- end, faux traces from MSAs ---------------------*/
-
-
-/*****************************************************************
-* 6. Counting traces into new HMMs.
-*****************************************************************/
-
-/* Function: p7_trace_Count()
-*
-* Purpose: Count a traceback into a count-based core HMM structure.
-* (Usually as part of a model parameter re-estimation.)
-*
-* The traceback may either be a core traceback (as in model
-* construction) or a profile traceback (as in model
-* reestimation).
-*
-* If it is a profile traceback, we have to be careful how
-* we translate an internal entry path from a score profile
-* back to the core model. Sometimes a B->M_k transition is
-* an internal entry from local alignment, and sometimes it
-* is a wing-folded B->D_1..DDM_k alignment to the core
-* model.
-*
-* This is one of the purposes of the special p7T_X
-* 'missing data' state in tracebacks. Local alignment entry
- * is indicated by a B->X->{MDI}_k 'missing data' path, and
-* direct B->M_k or M_k->E transitions in a traceback are
-* interpreted as wing retraction in a glocal model.
-*
- * The <p7T_X> state is also used in core traces in model
-* construction literally to mean missing data, in the
-* treatment of sequence fragments.
-*
-* Args: hmm - counts-based HMM to count <tr> into
-* tr - alignment of seq to HMM
-* dsq - digitized sequence that traceback aligns to the HMM (1..L)
-* (or can be an ax, aligned digital seq)
-* wt - weight on this sequence
-*
-* Return: <eslOK> on success.
-* Weighted count events are accumulated in hmm's mat[][], ins[][],
-* t[][] fields: the core probability model.
-*
-* Throws: <eslEINVAL> if something's corrupt in the trace; effect on hmm
-* counts is undefined, because it may abort at any point in the trace.
-*/
-int
-p7_trace_Count(P7_HMM *hmm, ESL_DSQ *dsq, float wt, P7_TRACE *tr)
-{
- int z; /* position in tr */
- int i; /* symbol position in seq */
- int st,st2; /* state type (cur, nxt) */
- int k,k2,ktmp; /* node index (cur, nxt) */
-
- for (z = 0; z < tr->N-1; z++)
- {
- if (tr->st[z] == p7T_X) continue; /* skip missing data */
-
- /* pull some info into tmp vars for notational clarity later. */
- st = tr->st[z];
- st2 = tr->st[z+1];
- k = tr->k[z];
- k2 = tr->k[z+1];
- i = tr->i[z];
-
- /* Emission counts. */
- if (st == p7T_M) esl_abc_FCount(hmm->abc, hmm->mat[k], dsq[i], wt);
- else if (st == p7T_I) esl_abc_FCount(hmm->abc, hmm->ins[k], dsq[i], wt);
-
- /* Transition counts */
- if (st2 == p7T_X) continue; /* ignore transition to missing data */
-
- if (st == p7T_B) {
- if (st2 == p7T_M && k2 > 1) /* wing-retracted B->DD->Mk path */
- {
- hmm->t[0][p7H_MD] += wt;
- for (ktmp = 1; ktmp < k2-1; ktmp++)
- hmm->t[ktmp][p7H_DD] += wt;
- hmm->t[ktmp][p7H_DM] += wt;
- }
- else {
- switch (st2) {
- case p7T_M: hmm->t[0][p7H_MM] += wt; break;
- case p7T_I: hmm->t[0][p7H_MI] += wt; break;
- case p7T_D: hmm->t[0][p7H_MD] += wt; break;
- default: ESL_EXCEPTION(eslEINVAL, "bad transition in trace");
- }
- }
- }
- else if (st == p7T_M) {
- switch (st2) {
- case p7T_M: hmm->t[k][p7H_MM] += wt; break;
- case p7T_I: hmm->t[k][p7H_MI] += wt; break;
- case p7T_D: hmm->t[k][p7H_MD] += wt; break;
- case p7T_E: hmm->t[k][p7H_MM] += wt; break; /* k==M. A local alignment would've been Mk->X->E. */
- default: ESL_EXCEPTION(eslEINVAL, "bad transition in trace");
- }
- }
- else if (st == p7T_I) {
- switch (st2) {
- case p7T_M: hmm->t[k][p7H_IM] += wt; break;
- case p7T_I: hmm->t[k][p7H_II] += wt; break;
- case p7T_E: hmm->t[k][p7H_IM] += wt; break; /* k==M. */
- default: ESL_EXCEPTION(eslEINVAL, "bad transition in trace");
- }
- }
- else if (st == p7T_D) {
- switch (st2) {
- case p7T_M: hmm->t[k][p7H_DM] += wt; break;
- case p7T_D: hmm->t[k][p7H_DD] += wt; break;
- case p7T_E: hmm->t[k][p7H_DM] += wt; break; /* k==M. A local alignment would've been Dk->X->E. */
- default: ESL_EXCEPTION(eslEINVAL, "bad transition in trace");
- }
- }
- } /* end loop over trace position */
- return eslOK;
-}
-/*--------------------- end, trace counting ---------------------*/
-
-
-
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/seqmodel.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/seqmodel.cpp
deleted file mode 100644
index ebc9937..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/seqmodel.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-/* Creating profile HMMs from single sequences.
-*
-* Contents:
-* 1. Routines in the exposed API.
-*
-* SRE, Fri Mar 23 07:54:02 2007 [Janelia] [Decembrists, Picaresque]
- * SVN $Id: seqmodel.c 2895 2009-09-11 20:16:34Z eddys $
-*/
-
-#include <hmmer3/p7_config.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_vectorops.h>
-
-#include <hmmer3/hmmer.h>
-
-/*****************************************************************
-* 1. Routines in the exposed API.
-*****************************************************************/
-
-
-/* Function: p7_Seqmodel()
-* Synopsis: Make a profile HMM from a single sequence.
-* Incept: SRE, Tue Sep 4 10:29:14 2007 [Janelia]
-*
-* Purpose: Make a profile HMM from a single sequence, for
-* probabilistic Smith/Waterman alignment, HMMER3-style.
-*
-* The query is digital sequence <dsq> of length <M>
-* residues in alphabet <abc>, named <name>.
-*
-* The scoring system is given by <Q>, <f>, <popen>, and
-* <pextend>. <Q> is a $K \times K$ matrix giving
-* conditional residue probabilities $P(a \mid b)}$; these
-* are typically obtained by reverse engineering a score
-* matrix like BLOSUM62. <f> is a vector of $K$ background
-* frequencies $p_a$. <popen> and <pextend> are the
-* probabilities assigned to gap-open ($t_{MI}$ and
-* $t_{MD}$) and gap-extend ($t_{II}$ and $t_{DD}$)
-* transitions.
-*
-* Args:
-*
-* Returns: <eslOK> on success, and a newly allocated HMM is returned
-* in <ret_hmm>.
-*
-* Throws: <eslEMEM> on allocation error, and <*ret_hmm> is <NULL>.
-*/
-int
-p7_Seqmodel(const ESL_ALPHABET *abc, ESL_DSQ *dsq, int M, char *name,
- ESL_DMATRIX *Q, float *f, double popen, double pextend,
- P7_HMM **ret_hmm)
-{
- int status;
- P7_HMM *hmm = NULL;
- char *logmsg = "[HMM created from a query sequence]";
- int k;
-
- if ((hmm = p7_hmm_Create(M, abc)) == NULL) { status = eslEMEM; goto ERROR; }
-
- for (k = 0; k <= M; k++)
- {
- /* Use rows of P matrix as source of match emission vectors */
- if (k > 0) esl_vec_D2F(Q->mx[(int) dsq[k]], abc->K, hmm->mat[k]);
-
- /* Set inserts to background for now. This will be improved. */
- esl_vec_FCopy(f, abc->K, hmm->ins[k]);
-
- hmm->t[k][p7H_MM] = 1.0 - 2 * popen;
- hmm->t[k][p7H_MI] = popen;
- hmm->t[k][p7H_MD] = popen;
- hmm->t[k][p7H_IM] = 1.0 - pextend;
- hmm->t[k][p7H_II] = pextend;
- hmm->t[k][p7H_DM] = 1.0 - pextend;
- hmm->t[k][p7H_DD] = pextend;
- }
-
- /* Deal w/ special stuff at node M, overwriting a little of what we
- * just did.
- */
- hmm->t[M][p7H_MM] = 1.0 - popen;
- hmm->t[M][p7H_MD] = 0.;
- hmm->t[M][p7H_DM] = 1.0;
- hmm->t[M][p7H_DD] = 0.;
-
- /* Add mandatory annotation
- */
- p7_hmm_SetName(hmm, name);
- p7_hmm_AppendComlog(hmm, 1, &logmsg);
- hmm->nseq = 1;
- p7_hmm_SetCtime(hmm);
- hmm->checksum = 0;
-
- *ret_hmm = hmm;
- return eslOK;
-
-ERROR:
- if (hmm != NULL) p7_hmm_Destroy(hmm);
- *ret_hmm = NULL;
- return status;
-}
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/hmmer3/tracealign.cpp b/src/plugins_3rdparty/hmm3/src/hmmer3/tracealign.cpp
deleted file mode 100644
index d798954..0000000
--- a/src/plugins_3rdparty/hmm3/src/hmmer3/tracealign.cpp
+++ /dev/null
@@ -1,816 +0,0 @@
-/* Construction of multiple alignments from traces.
-*
-* Contents:
-* 1. API for aligning sequence or MSA traces
-* 2. Internal functions used by the API
-* 3. Copyright and license.
-*
-* SRE, Tue Oct 21 19:38:19 2008 [Casa de Gatos]
- * SVN $Id: tracealign.c 3041 2009-11-12 12:58:09Z eddys $
-*/
-#include <hmmer3/p7_config.h>
-
-#include <hmmer3/easel/easel.h>
-#include <hmmer3/easel/esl_vectorops.h>
-
-#include <hmmer3/hmmer.h>
-
-static int map_new_msa(P7_TRACE **tr, int nseq, int M, int optflags, int **ret_inscount, int **ret_matuse, int **ret_matmap, int *ret_alen);
-static ESL_DSQ get_dsq_z(ESL_SQ **sq, const ESL_MSA *premsa, P7_TRACE **tr, int idx, int z);
-static int make_digital_msa(ESL_SQ **sq, const ESL_MSA *premsa, P7_TRACE **tr, int nseq, const int *matuse, const int *matmap, int M, int alen, int optflags, ESL_MSA **ret_msa);
-static int make_text_msa (ESL_SQ **sq, const ESL_MSA *premsa, P7_TRACE **tr, int nseq, const int *matuse, const int *matmap, int M, int alen, int optflags, ESL_MSA **ret_msa);
-static int annotate_rf(ESL_MSA *msa, int M, const int *matuse, const int *matmap);
-static int annotate_posterior_probability(ESL_MSA *msa, P7_TRACE **tr, const int *matmap, int M, int optflags);
-static int rejustify_insertions_digital ( ESL_MSA *msa, const int *inserts, const int *matmap, const int *matuse, int M);
-static int rejustify_insertions_text (const ESL_ALPHABET *abc, ESL_MSA *msa, const int *inserts, const int *matmap, const int *matuse, int M);
-
-
-/*****************************************************************
-* 1. API for aligning sequence or MSA traces
-*****************************************************************/
-
-/* Function: p7_tracealign_Seqs()
-* Synopsis: Convert array of traces (for a sequence array) to a new MSA.
-* Incept: SRE, Tue Oct 21 19:40:33 2008 [Janelia]
-*
-* Purpose: Convert an array of <nseq> traces <tr[0..nseq-1]>,
-* corresponding to an array of digital sequences
-* <sq[0..nseq-1]> aligned to a model of
-* length <M>, to a new multiple sequence alignment.
-* The new alignment structure is allocated here, and returned
-* in <*ret_msa>.
-*
-* <optflags> controls some optional behaviors in producing
-* the alignment, as follows:
-*
-* <p7_DIGITIZE>: creates the MSA in digital mode, as
-* opposed to a default text mode.
-*
-* <p7_ALL_CONSENSUS_COLS>: create a column for every
-* consensus column in the model, even if it means having
-* all gap characters (deletions) in a column; this
-* guarantees that the alignment will have at least <M>
-* columns. The default is to only show columns that have
-* at least one residue in them.
-*
-* <p7_TRIM>: trim off any residues that get assigned to
-* flanking N,C states.
-*
-* The <optflags> can be combined by logical OR; for
-* example, <p7_DIGITIZE | p7_ALL_CONSENSUS_COLS>.
-*
-* Args: sq - array of digital sequences, 0..nseq-1
-* tr - array of tracebacks, 0..nseq-1
-* nseq - number of sequences
-* M - length of model sequences were aligned to
-* optflags - flags controlling optional behaviours.
-* ret_msa - RETURN: new multiple sequence alignment
-*
-* Returns: <eslOK> on success, and <*ret_msa> points to a new
-* <ESL_MSA> object. Caller is responsible for free'ing
-* this new MSA with <esl_msa_Destroy()>.
-*
-* Throws: <eslEMEM> on allocation failure; <*ret_msa> is <NULL>.
-*
-* Notes: * why a text mode, when most of HMMER works in digital
-* sequences and alignments? Text mode MSAs are created
-* for output, whereas digital mode MSAs are created for
-* internal use. Text mode allows HMMER's output
-* conventions to be used for match vs. insert columns:
-* lowercase/. for residues/gaps in inserts, uppercase/-
-* for residues/gaps in match columns.
-*
-* * why not pass HMM as an argument, so we can transfer
-* column annotation? In <p7_tophits_Alignment()>, the
-* HMM is unavailable -- because of constraints of what's
-* made available to the master process in an MPI
-* implementation. (We could make the HMM an optional
-* argument.)
-*/
-int
-p7_tracealign_Seqs(ESL_SQ **sq, P7_TRACE **tr, int nseq, int M, int optflags, ESL_MSA **ret_msa)
-{
- ESL_MSA *msa = NULL; /* RETURN: new MSA */
- const ESL_ALPHABET *abc = sq[0]->abc;
- int *inscount = NULL; /* array of max gaps between aligned columns */
- int *matmap = NULL; /* matmap[k] = apos of match k matmap[1..M] = [1..alen] */
- int *matuse = NULL; /* TRUE if an alignment column is associated with match state k [1..M] */
- int idx; /* counter over sequences */
- int alen; /* width of alignment */
- int status;
-
- if ((status = map_new_msa(tr, nseq, M, optflags, &inscount, &matuse, &matmap, &alen)) != eslOK) return status;
-
- if (optflags & p7_DIGITIZE) { if ((status = make_digital_msa(sq, NULL, tr, nseq, matuse, matmap, M, alen, optflags, &msa)) != eslOK) goto ERROR; }
- else { if ((status = make_text_msa (sq, NULL, tr, nseq, matuse, matmap, M, alen, optflags, &msa)) != eslOK) goto ERROR; }
-
- if ((status = annotate_rf(msa, M, matuse, matmap)) != eslOK) goto ERROR;
- if ((status = annotate_posterior_probability(msa, tr, matmap, M, optflags)) != eslOK) goto ERROR;
-
- if (optflags & p7_DIGITIZE) rejustify_insertions_digital( msa, inscount, matmap, matuse, M);
- else rejustify_insertions_text (abc, msa, inscount, matmap, matuse, M);
-
- for (idx = 0; idx < nseq; idx++)
- {
- esl_msa_SetSeqName(msa, idx, sq[idx]->name);
- if (sq[idx]->acc[0] != '\0') esl_msa_SetSeqAccession (msa, idx, sq[idx]->acc);
- if (sq[idx]->desc[0] != '\0') esl_msa_SetSeqDescription(msa, idx, sq[idx]->desc);
- msa->wgt[idx] = 1.0;
- if (msa->sqlen != NULL) msa->sqlen[idx] = sq[idx]->n;
- }
-
- free(inscount);
- free(matmap);
- free(matuse);
- *ret_msa = msa;
- return eslOK;
-
-ERROR:
- if (msa != NULL) esl_msa_Destroy(msa);
- if (inscount != NULL) free(inscount);
- if (matmap != NULL) free(matmap);
- if (matuse != NULL) free(matuse);
- *ret_msa = NULL;
- return status;
-}
-
-
-/* Function: p7_tracealign_MSA()
-* Synopsis: Convert array of traces (for a previous MSA) to a new MSA.
-* Incept: SRE, Mon Mar 2 18:18:22 2009 [Casa de Gatos]
-*
-* Purpose: Identical to <p7_tracealign_Seqs()> except that the trace
-* array <tr> accompanies a digital multiple alignment <premsa>,
-* rather than an array of digital sequences.
-*
-* This gets used in <p7_Builder()>, where we've
-* constructed an array of faux traces directly from an
-* input alignment, and we want to reconstruct the
-* MSA that corresponds to what HMMER actually used
-* to build its model (after trace doctoring to be
-* compatible with Plan 7, and with <#=RF> annotation
-* on assigned consensus columns).
-*
-* Xref: J4/102.
-*/
-int
-p7_tracealign_MSA(const ESL_MSA *premsa, P7_TRACE **tr, int M, int optflags, ESL_MSA **ret_postmsa)
-{
- const ESL_ALPHABET *abc = premsa->abc;
- ESL_MSA *msa = NULL; /* RETURN: new MSA */
- int *inscount = NULL; /* array of max gaps between aligned columns */
- int *matmap = NULL; /* matmap[k] = apos of match k matmap[1..M] = [1..alen] */
- int *matuse = NULL; /* TRUE if an alignment column is associated with match state k [1..M] */
- int idx; /* counter over sequences */
- int alen; /* width of alignment */
- int status;
-
- if ((status = map_new_msa(tr, premsa->nseq, M, optflags, &inscount, &matuse, &matmap, &alen)) != eslOK) return status;
-
- if (optflags & p7_DIGITIZE) { if ((status = make_digital_msa(NULL, premsa, tr, premsa->nseq, matuse, matmap, M, alen, optflags, &msa)) != eslOK) goto ERROR; }
- else { if ((status = make_text_msa (NULL, premsa, tr, premsa->nseq, matuse, matmap, M, alen, optflags, &msa)) != eslOK) goto ERROR; }
-
- if ((status = annotate_rf(msa, M, matuse, matmap)) != eslOK) goto ERROR;
- if ((status = annotate_posterior_probability(msa, tr, matmap, M, optflags)) != eslOK) goto ERROR;
-
- if (optflags & p7_DIGITIZE) rejustify_insertions_digital( msa, inscount, matmap, matuse, M);
- else rejustify_insertions_text (abc, msa, inscount, matmap, matuse, M);
-
-
- /* Transfer information from old MSA to new */
- esl_msa_SetName (msa, premsa->name);
- esl_msa_SetDesc (msa, premsa->desc);
- esl_msa_SetAccession(msa, premsa->acc);
-
- for (idx = 0; idx < premsa->nseq; idx++)
- {
- esl_msa_SetSeqName (msa, idx, premsa->sqname[idx]);
- if (msa->sqacc) esl_msa_SetSeqAccession (msa, idx, premsa->sqacc[idx]);
- if (msa->sqdesc) esl_msa_SetSeqDescription(msa, idx, premsa->sqdesc[idx]);
- msa->wgt[idx] = premsa->wgt[idx];
- }
-
- if (premsa->flags & eslMSA_HASWGTS)
- msa->flags |= eslMSA_HASWGTS;
-
- free(inscount);
- free(matmap);
- free(matuse);
- *ret_postmsa = msa;
- return eslOK;
-
-ERROR:
- if (msa != NULL) esl_msa_Destroy(msa);
- if (inscount != NULL) free(inscount);
- if (matmap != NULL) free(matmap);
- if (matuse != NULL) free(matuse);
- *ret_postmsa = NULL;
- return status;
-}
-/*--------------- end, exposed API ------------------------------*/
-
-
-
-
-/*****************************************************************
-* 2. Internal functions used by the API
-*****************************************************************/
-
-/* map_new_msa()
-*
-* Construct <inscount[0..M]>, <matuse[1..M]>, and <matmap[1..M]>
-* arrays for mapping model consensus nodes <1..M> onto columns
-* <1..alen> of a new MSA.
-*
-* Here's the problem. We want to align the match states in columns,
-* but some sequences have inserted symbols in them; we need some
-* sort of overall knowledge of where the inserts are and how long
-* they are in order to create the alignment.
-*
-* Here's our trick. inscount[] is a 0..M array; inserts[k] stores
-* the maximum number of times insert substate k was used. This
-* is the maximum number of gaps to insert between canonical
-* column k and k+1. inserts[0] is the N-term tail; inserts[M] is
-* the C-term tail.
-*
-* Additionally, matuse[k=1..M] says whether we're going to make an
-* alignment column for consensus position k. By default this is
-* <TRUE> only if there is at least one residue in the column. If
-* the <p7_ALL_CONSENSUS_COLS> option flag is set, though, all
-* matuse[1..M] are set <TRUE>. (matuse[0] is unused, always <FALSE>.)
-*
-* Then, using these arrays, we construct matmap[] and determine alen.
-* If match state k is represented as an alignment column,
-* matmap[1..M] = that position, <1..alen>.
-* If match state k is not in the alignment (<matuse[k] == FALSE>),
-* matmap[k] = matmap[k-1] = the last alignment column that a match
-* state did map to; this is a trick to make some apos coordinate setting
-* work cleanly.
-* Because of this trick, you can't just assume because matmap[k] is
-* nonzero that match state k maps somewhere in the alignment;
-* you have to check matuse[k] == TRUE, then look at what matmap[k] says.
-* Remember that N and C emit on transition, hence the check for an
-* N->N or C->C transition before bumping nins.
-* <matmap[0]> is unused; by convention, <matmap[0] = 0>.
-*/
-static int
-map_new_msa(P7_TRACE **tr, int nseq, int M, int optflags, int **ret_inscount,
- int **ret_matuse, int **ret_matmap, int *ret_alen)
-{
- int *inscount = NULL; /* inscount[k=0..M] == max # of inserts in node k */
- int *matuse = NULL; /* matuse[k=1..M] == TRUE|FALSE: does node k map to an alignment column */
- int *matmap = NULL; /* matmap[k=1..M]: if matuse[k] TRUE, what column 1..alen does node k map to */
- int idx; /* counter over sequences */
- int nins; /* counter for inserted residues observed */
- int z; /* index into trace positions */
- int alen; /* length of alignment */
- int k; /* counter over nodes 1..M */
- int status;
-
- ESL_ALLOC_WITH_TYPE(inscount, int*, sizeof(int) * (M+1));
- ESL_ALLOC_WITH_TYPE(matuse, int*, sizeof(int) * (M+1)); matuse[0] = 0;
- ESL_ALLOC_WITH_TYPE(matmap, int*, sizeof(int) * (M+1)); matmap[0] = 0;
- esl_vec_ISet(inscount, M+1, 0);
- if (optflags & p7_ALL_CONSENSUS_COLS) esl_vec_ISet(matuse+1, M, TRUE);
- else esl_vec_ISet(matuse+1, M, FALSE);
-
- for (idx = 0; idx < nseq; idx++)
- {
- nins = 0;
- k = 0;
- for (z = 1; z < tr[idx]->N; z++)
- {
- switch (tr[idx]->st[z]) {
- case p7T_I: nins++; break;
- case p7T_N: if (tr[idx]->st[z-1] == p7T_N) nins++; break;
- case p7T_C: if (tr[idx]->st[z-1] == p7T_C) nins++; break;
-
- case p7T_M: /* M,D: record max. reset ctr; M only: set matuse[] */
- k = tr[idx]->k[z]; /* k++ doesn't work. May be a B->X->Mk fragment entry */
- inscount[k-1] = ESL_MAX(nins, inscount[k-1]);
- matuse[k] = TRUE;
- nins = 0;
- break;
-
- case p7T_D: /* Can handle I->D transitions even though currently not in H3 models */
- k = tr[idx]->k[z]; /* k++ doesn't work; see above */
- inscount[k-1] = ESL_MAX(nins, inscount[k-1]);
- nins = 0;
- break;
-
- case p7T_T: /* T: record C-tail max, for a profile trace */
- inscount[M] = ESL_MAX(nins, inscount[M]);
- break;
-
- case p7T_E: /* this handles case of core traces, which do have I_M state */
- inscount[k] = ESL_MAX(nins, inscount[k]); /* [M] doesn't work, because of {DMI}k->X->E frag exit */
- break;
-
- case p7T_B: /* B: record N-tail max for a profile trace; I0 for a core trace */
- inscount[0] = ESL_MAX(nins, inscount[0]);
- nins = 0;
- break;
-
- case p7T_S: break; /* don't need to do anything on S, X states */
- case p7T_X: break;
-
- case p7T_J: p7_Die("J state unsupported");
- default: p7_Die("Unrecognized statetype %d", tr[idx]->st[z]);
- }
- }
- }
-
- /* if we're trimming N and C off, reset inscount[0], inscount[M] to 0. */
- if (optflags & p7_TRIM) { inscount[0] = inscount[M] = 0; }
-
- /* Use inscount, matuse to set the matmap[] */
- alen = inscount[0];
- for (k = 1; k <= M; k++) {
- if (matuse[k]) { matmap[k] = alen+1; alen += 1+inscount[k]; }
- else { matmap[k] = alen; alen += inscount[k]; }
- }
-
- *ret_inscount = inscount;
- *ret_matuse = matuse;
- *ret_matmap = matmap;
- *ret_alen = alen;
- return eslOK;
-
-ERROR:
- if (inscount != NULL) free(inscount);
- if (matuse != NULL) free(matuse);
- if (matmap != NULL) free(matmap);
- *ret_inscount = NULL;
- *ret_matuse = NULL;
- *ret_matmap = NULL;
- *ret_alen = 0;
- return status;
-}
-
-
-/* get_dsq_z()
-* this abstracts residue-fetching from either a sq array or a previous MSA;
-* one and only one of <sq>, <msa> is non-<NULL>;
-* get the digital residue corresponding to tr[idx]->i[z].
-*/
-static ESL_DSQ
-get_dsq_z(ESL_SQ **sq, const ESL_MSA *premsa, P7_TRACE **tr, int idx, int z)
-{
- return ( (premsa == NULL) ? sq[idx]->dsq[tr[idx]->i[z]] : premsa->ax[idx][tr[idx]->i[z]]);
-}
-
-/* make_digital_msa()
- * Create a new digital MSA, given traces <tr> for digital <sq> or for
- * a digital <premsa>. (One and only one of <sq>,<premsa> are
- * non-<NULL>.
- * The traces may either be profile traces or core traces;
- * core traces may contain X "states" for fragments.
-*/
-static int
-make_digital_msa(ESL_SQ **sq, const ESL_MSA *premsa, P7_TRACE **tr, int nseq, const int *matuse, const int *matmap, int M, int alen, int optflags, ESL_MSA **ret_msa)
-{
- const ESL_ALPHABET *abc = (sq == NULL) ? premsa->abc : sq[0]->abc;
- ESL_MSA *msa = NULL;
- int idx;
- int apos;
- int z;
- int status;
-
- if ((msa = esl_msa_CreateDigital(abc, nseq, alen)) == NULL) { status = eslEMEM; goto ERROR; }
-
- for (idx = 0; idx < nseq; idx++)
- {
- msa->ax[idx][0] = eslDSQ_SENTINEL;
- for (apos = 1; apos <= alen; apos++) msa->ax[idx][apos] = esl_abc_XGetGap(abc);
- msa->ax[idx][alen+1] = eslDSQ_SENTINEL;
-
- apos = 1;
- for (z = 0; z < tr[idx]->N; z++)
- {
- switch (tr[idx]->st[z]) {
- case p7T_M:
- msa->ax[idx][matmap[tr[idx]->k[z]]] = get_dsq_z(sq, premsa, tr, idx, z);
- apos = matmap[tr[idx]->k[z]] + 1;
- break;
-
- case p7T_D:
- if (matuse[tr[idx]->k[z]]) /* bug h77: if all col is deletes, do nothing; do NOT overwrite a column */
- msa->ax[idx][matmap[tr[idx]->k[z]]] = esl_abc_XGetGap(abc); /* overwrites ~ in Dk column on X->Dk */
- apos = matmap[tr[idx]->k[z]] + 1;
- break;
-
- case p7T_I:
- if ( !(optflags & p7_TRIM) || (tr[idx]->k[z] != 0 && tr[idx]->k[z] != M)) {
- msa->ax[idx][apos] = get_dsq_z(sq, premsa, tr, idx, z);
- apos++;
- }
- break;
-
- case p7T_N:
- case p7T_C:
- if (! (optflags & p7_TRIM) && tr[idx]->i[z] > 0) {
- msa->ax[idx][apos] = get_dsq_z(sq, premsa, tr, idx, z);
- apos++;
- }
- break;
-
- case p7T_E:
- apos = matmap[M]+1; /* set position for C-terminal tail */
- break;
-
- case p7T_X:
- /* Mark fragments (B->X and X->E containing core traces):
- * convert flanks from gaps to ~
- */
- if (tr[idx]->st[z-1] == p7T_B)
- { /* B->X leader. This is a core trace and a fragment. Convert leading gaps to ~ */
- /* to set apos for an initial Ik: peek at next state for B->X->Ik; superfluous for ->{DM}k: */
- for (apos = 1; apos <= matmap[tr[idx]->k[z+1]]; apos++)
- msa->ax[idx][apos] = esl_abc_XGetMissing(abc);
- /* tricky! apos is now exactly where it needs to be for X->Ik. all other cases except B->X->Ik set their own apos */
- }
- else if (tr[idx]->st[z+1] == p7T_E)
- { /* X->E trailer. This is a core trace and a fragment. Convert trailing gaps to ~ */
- /* don't need to set apos for trailer. There can't be any more residues in a core trace once we hit X->E */
- for (; apos <= alen; apos++)
- msa->ax[idx][apos] = esl_abc_XGetMissing(abc);
- }
- else ESL_XEXCEPTION(eslECORRUPT, "make_digital_msa(): X state in unexpected position in trace");
-
- break;
-
- default:
- break;
- }
- }
- }
-
- msa->nseq = nseq;
- msa->alen = alen;
- *ret_msa = msa;
- return eslOK;
-
-ERROR:
- if (msa) esl_msa_Destroy(msa);
- *ret_msa = NULL;
- return status;
-}
-
-
-/* make_text_msa()
- * Create a new text MSA, given traces <tr> for digital <sq> or for a digital <premsa>.
-* (One and only one of <sq>,<premsa> are non-<NULL>.
-*
-* The reason to make a text-mode MSA rather than let Easel handle printing a digital
-* MSA is to impose HMMER's standard representation on gap characters and insertions:
-* at inserts, gaps are '.' and residues are lower-case, whereas at matches, gaps are '-'
-* and residues are upper case.
- *
- * Also see comments in make_digital_msa(), above.
-*/
-static int
-make_text_msa(ESL_SQ **sq, const ESL_MSA *premsa, P7_TRACE **tr, int nseq, const int *matuse, const int *matmap, int M, int alen, int optflags, ESL_MSA **ret_msa)
-{
- const ESL_ALPHABET *abc = (sq == NULL) ? premsa->abc : sq[0]->abc;
- ESL_MSA *msa = NULL;
- int idx;
- int apos;
- int z;
- int k;
- int status;
-
- if ((msa = esl_msa_Create(nseq, alen)) == NULL) { status = eslEMEM; goto ERROR; }
-
- for (idx = 0; idx < nseq; idx++)
- {
- for (apos = 0; apos < alen; apos++) msa->aseq[idx][apos] = '.';
- for (k = 1; k <= M; k++) if (matuse[k]) msa->aseq[idx][-1+matmap[k]] = '-';
- msa->aseq[idx][apos] = '\0';
-
- apos = 0;
- for (z = 0; z < tr[idx]->N; z++)
- {
- switch (tr[idx]->st[z]) {
- case p7T_M:
- msa->aseq[idx][-1+matmap[tr[idx]->k[z]]] = toupper(abc->sym[get_dsq_z(sq, premsa, tr, idx, z)]);
- apos = matmap[tr[idx]->k[z]]; /* i.e. one past the match column. remember, text mode is 0..alen-1 */
- break;
-
- case p7T_D:
- if (matuse[tr[idx]->k[z]]) /* bug #h77: if all column is deletes, do nothing; do NOT overwrite a column */
- msa->aseq[idx][-1+matmap[tr[idx]->k[z]]] = '-'; /* overwrites ~ in Dk column on X->Dk */
- apos = matmap[tr[idx]->k[z]];
- break;
-
- case p7T_I:
- if ( !(optflags & p7_TRIM) || (tr[idx]->k[z] != 0 && tr[idx]->k[z] != M)) {
- msa->aseq[idx][apos] = tolower(abc->sym[get_dsq_z(sq, premsa, tr, idx, z)]);
- apos++;
- }
- break;
-
- case p7T_N:
- case p7T_C:
- if (! (optflags & p7_TRIM) && tr[idx]->i[z] > 0) {
- msa->aseq[idx][apos] = tolower(abc->sym[get_dsq_z(sq, premsa, tr, idx, z)]);
- apos++;
- }
- break;
-
- case p7T_E:
- apos = matmap[M]; /* set position for C-terminal tail */
- break;
-
- case p7T_X:
- /* Mark fragments (B->X and X->E containing core traces):
- * convert flanks from gaps to ~
- */
- if (tr[idx]->st[z-1] == p7T_B)
- { /* B->X leader. This is a core trace and a fragment. Convert leading gaps to ~ */
- for (apos = 0; apos < matmap[tr[idx]->k[z+1]]; apos++)
- msa->aseq[idx][apos] = '~';
- /* tricky; apos exactly where it must be for X->Ik; see comments in make_digital_msa() */
- }
- else if (tr[idx]->st[z+1] == p7T_E)
- { /* X->E trailer. This is a core trace and a fragment. Convert trailing gaps to ~ */
- for (; apos < alen; apos++)
- msa->aseq[idx][apos] = '~';
- }
- else ESL_XEXCEPTION(eslECORRUPT, "make_text_msa(): X state in unexpected position in trace");
-
- break;
-
- default:
- break;
- }
- }
- }
- msa->nseq = nseq;
- msa->alen = alen;
- *ret_msa = msa;
- return eslOK;
-
-ERROR:
- if (msa != NULL) esl_msa_Destroy(msa);
- *ret_msa = NULL;
- return status;
-}
-
-
-
-/* annotate_rf()
-* Synopsis: Add RF reference coordinate annotation line to new MSA.
-* Incept: SRE, Fri Jan 16 09:30:08 2009 [Janelia]
-*
-* Purpose: Create an RF reference coordinate annotation line that annotates the
-* consensus columns: the columns associated with profile match states.
-*
-* Recall that msa->rf is <NULL> when unset/by default in an MSA;
-* msa->rf[0..alen-1] = 'x' | '.' is the simplest convention;
-* msa->rf is a NUL-terminated string (msa->rf[alen] = '\0')
-*
-* Args: M - profile length
-* matuse - matuse[1..M] == TRUE | FALSE : is this match state represented
-* by a column in the alignment.
-* matmap - matmap[1..M] == (1..alen): if matuse[k], then what alignment column
-* does state k map to.
-*
-* Returns: <eslOK> on success; msa->rf is set to an appropriate reference
-* coordinate string.
-*
-* Throws: <eslEMEM> on allocation failure.
-*/
-static int
-annotate_rf(ESL_MSA *msa, int M, const int *matuse, const int *matmap)
-{
- int apos, k;
- int status;
-
- ESL_ALLOC_WITH_TYPE(msa->rf, char*, sizeof(char) * (msa->alen+1));
- for (apos = 0; apos < msa->alen; apos++)
- msa->rf[apos] = '.';
- msa->rf[msa->alen] = '\0';
-
- for (k = 1; k <= M; k++)
- if (matuse[k]) msa->rf[matmap[k]-1] = 'x'; /* watch off by one: rf[0..alen-1]; matmap[] = 1..alen */
- return eslOK;
-
-ERROR:
- return status;
-}
-
-
-
-/* annotate_posterior_probability()
-* Synopsis: Add posterior probability annotation lines to new MSA.
-*/
-static int
-annotate_posterior_probability(ESL_MSA *msa, P7_TRACE **tr, const int *matmap, int M, int optflags)
-{
- double *totp = NULL; /* total posterior probability in column <apos>: [0..alen-1] */
- int *matuse = NULL; /* #seqs with pp annotation in column <apos>: [0..alen-1] */
- int idx; /* counter over sequences [0..nseq-1] */
- int apos; /* counter for alignment columns: pp's are [0..alen-1] (unlike ax) */
- int z; /* counter over trace positions [0..tr->N-1] */
- int status;
-
- /* Determine if any of the traces have posterior probability annotation. */
- for (idx = 0; idx < msa->nseq; idx++)
- if (tr[idx]->pp != NULL) break;
- if (idx == msa->nseq) return eslOK;
-
- ESL_ALLOC_WITH_TYPE(matuse, int*, sizeof(double) * (msa->alen)); esl_vec_ISet(matuse, msa->alen, 0);
- ESL_ALLOC_WITH_TYPE(totp, double*, sizeof(double) * (msa->alen)); esl_vec_DSet(totp, msa->alen, 0.0);
-
- ESL_ALLOC_WITH_TYPE(msa->pp, char**, sizeof(char *) * msa->sqalloc);
- for (idx = 0; idx < msa->nseq; idx++)
- {
- if (tr[idx]->pp == NULL) { msa->pp[idx] = NULL; continue; }
-
- ESL_ALLOC_WITH_TYPE(msa->pp[idx], char*, sizeof(char) * (msa->alen+1));
- for (apos = 0; apos < msa->alen; apos++) msa->pp[idx][apos] = '.';
- msa->pp[idx][msa->alen] = '\0';
-
- apos = 0;
- for (z = 0; z < tr[idx]->N; z++)
- {
- switch (tr[idx]->st[z]) {
- case p7T_M:
- msa->pp[idx][matmap[tr[idx]->k[z]]-1] = p7_alidisplay_EncodePostProb(tr[idx]->pp[z]);
- totp [matmap[tr[idx]->k[z]]-1]+= tr[idx]->pp[z];
- matuse[matmap[tr[idx]->k[z]]-1]++;
- case p7T_D:
- apos = matmap[tr[idx]->k[z]];
- break;
-
- case p7T_I:
- if ( !(optflags & p7_TRIM) || (tr[idx]->k[z] != 0 && tr[idx]->k[z] != M)) {
- msa->pp[idx][apos] = p7_alidisplay_EncodePostProb(tr[idx]->pp[z]);
- apos++;
- }
- break;
-
- case p7T_N:
- case p7T_C:
- if (! (optflags & p7_TRIM) && tr[idx]->i[z] > 0) {
- msa->pp[idx][apos] = p7_alidisplay_EncodePostProb(tr[idx]->pp[z]);
- apos++;
- }
- break;
-
- case p7T_E:
- apos = matmap[M]; /* set position for C-terminal tail */
- break;
-
- default:
- break;
- }
- }
- }
- for (; idx < msa->sqalloc; idx++) msa->pp[idx] = NULL; /* for completeness, following easel MSA conventions, but should be a no-op: nseq==sqalloc */
-
- /* Consensus posterior probability annotation: only on match columns */
- ESL_ALLOC_WITH_TYPE(msa->pp_cons, char*, sizeof(char) * (msa->alen+1));
- for (apos = 0; apos < msa->alen; apos++) msa->pp_cons[apos] = '.';
- msa->pp_cons[msa->alen] = '\0';
- for (apos = 0; apos < msa->alen; apos++)
- if (matuse[apos]) msa->pp_cons[apos] = p7_alidisplay_EncodePostProb( totp[apos] / (double) matuse[apos]);
-
- free(matuse);
- free(totp);
- return eslOK;
-
-ERROR:
- if (matuse != NULL) free(matuse);
- if (totp != NULL) free(totp);
- if (msa->pp != NULL) esl_Free2D((void **) msa->pp, msa->sqalloc);
- return status;
-}
-
-
-/* Function: rejustify_insertions_digital()
-* Synopsis:
-* Incept: SRE, Thu Oct 23 13:06:12 2008 [Janelia]
-*
-* Purpose:
-*
-* Args: msa - alignment to rejustify
-* digital mode: ax[0..nseq-1][1..alen] and abc is valid
-* text mode: aseq[0..nseq-1][0..alen-1]
-* inserts - # of inserted columns following node k, for k=0.1..M
-* inserts[0] is for N state; inserts[M] is for C state
-* matmap - index of column associated with node k [k=0.1..M; matmap[0] = 0]
-* this is an alignment column index 1..alen, same offset as <ax>
-* if applied to text mode aseq or annotation, remember to -1
-* if no residues use match state k, matmap[k] is the
-* index of the last column used before node k's columns
-* start: thus matmap[k]+1 is always the start of
-* node k's insertion (if any).
-* matuse - TRUE if an alignment column is associated with node k: [k=0.1..M; matuse[0] = 0].
-* if matuse[k] == 0, every sequence deleted at node k,
-* and we're collapsing the column rather than showing all
-* gaps.
-*
-* Note: The insertion for node k is of length <inserts[k]> columns,
-* and in 1..alen coords it runs from
-* matmap[k]+1 .. matmap[k+1]-matuse[k+1].
-*
-*
-* Returns:
-*
-* Throws: (no abnormal error conditions)
-*
-* Xref:
-*/
-static int
-rejustify_insertions_digital(ESL_MSA *msa, const int *inserts, const int *matmap, const int *matuse, int M)
-{
- int idx;
- int k;
- int apos;
- int nins;
- int npos, opos;
-
- for (idx = 0; idx < msa->nseq; idx++)
- {
- for (k = 0; k < M; k++)
- if (inserts[k] > 1)
- {
- for (nins = 0, apos = matmap[k]+1; apos <= matmap[k+1]-matuse[k+1]; apos++)
- if (esl_abc_XIsResidue(msa->abc, msa->ax[idx][apos])) nins++;
-
- if (k == 0) nins = 0; /* N-terminus is right justified */
- else nins /= 2; /* split in half; nins now = # of residues left left-justified */
-
- opos = npos = matmap[k+1]-matuse[k+1];
- while (opos >= matmap[k]+1+nins) {
- if (esl_abc_XIsGap(msa->abc, msa->ax[idx][opos])) opos--;
- else {
- msa->ax[idx][npos] = msa->ax[idx][opos];
- if (msa->pp != NULL && msa->pp[idx] != NULL) msa->pp[idx][npos-1] = msa->pp[idx][opos-1];
- npos--;
- opos--;
- }
- }
- while (npos >= matmap[k]+1+nins) {
- msa->ax[idx][npos] = esl_abc_XGetGap(msa->abc);
- if (msa->pp != NULL && msa->pp[idx] != NULL) msa->pp[idx][npos-1] = '.';
- npos--;
- }
- }
- }
- return eslOK;
-}
-
-static int
-rejustify_insertions_text(const ESL_ALPHABET *abc, ESL_MSA *msa, const int *inserts, const int *matmap, const int *matuse, int M)
-{
- int idx;
- int k;
- int apos;
- int nins;
- int npos, opos;
-
- for (idx = 0; idx < msa->nseq; idx++)
- {
- for (k = 0; k < M; k++)
- if (inserts[k] > 1)
- {
- for (nins = 0, apos = matmap[k]; apos < matmap[k+1]-matuse[k+1]; apos++)
- if (esl_abc_CIsResidue(abc, msa->aseq[idx][apos])) nins++;
-
- if (k == 0) nins = 0; /* N-terminus is right justified */
- else nins /= 2; /* split in half; nins now = # of residues left left-justified */
-
- opos = npos = -1+matmap[k+1]-matuse[k+1];
- while (opos >= matmap[k]+nins) {
- if (esl_abc_CIsGap(abc, msa->aseq[idx][opos])) opos--;
- else {
- msa->aseq[idx][npos] = msa->aseq[idx][opos];
- if (msa->pp != NULL && msa->pp[idx] != NULL) msa->pp[idx][npos] = msa->pp[idx][opos];
- npos--;
- opos--;
- }
- }
- while (npos >= matmap[k]+nins) {
- msa->aseq[idx][npos] = '.';
- if (msa->pp != NULL && msa->pp[idx] != NULL) msa->pp[idx][npos] = '.';
- npos--;
- }
- }
- }
- return eslOK;
-}
-/*---------------- end, internal functions ----------------------*/
-
-
-
-/************************************************************
-* HMMER - Biological sequence analysis with profile HMMs
-* Version 3.0; March 2010
-* Copyright (C) 2010 Howard Hughes Medical Institute.
-* Other copyrights also apply. See the COPYRIGHT file for a full list.
-*
-* HMMER is distributed under the terms of the GNU General Public License
-* (GPLv3). See the LICENSE file for details.
-************************************************************/
diff --git a/src/plugins_3rdparty/hmm3/src/phmmer/uHMM3PhmmerDialogImpl.cpp b/src/plugins_3rdparty/hmm3/src/phmmer/uHMM3PhmmerDialogImpl.cpp
deleted file mode 100644
index ae1995a..0000000
--- a/src/plugins_3rdparty/hmm3/src/phmmer/uHMM3PhmmerDialogImpl.cpp
+++ /dev/null
@@ -1,227 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include <math.h>
-
-#include <QtCore/qglobal.h>
-#if (QT_VERSION < 0x050000) //Qt 5
-#include <QtGui/QMessageBox>
-#include <QtGui/QPushButton>
-#else
-#include <QtWidgets/QMessageBox>
-#include <QtWidgets/QPushButton>
-#endif
-
-#include <U2Core/AppContext.h>
-#include <U2Core/GObjectTypes.h>
-#include <U2Core/L10n.h>
-#include <U2Core/U2OpStatusUtils.h>
-#include <U2Core/U2SafePoints.h>
-
-#include <U2Gui/DialogUtils.h>
-#include <U2Gui/HelpButton.h>
-#include <U2Gui/LastUsedDirHelper.h>
-#include <U2Gui/U2FileDialog.h>
-
-#include "phmmer/uhmm3PhmmerTask.h"
-#include "uHMM3PhmmerDialogImpl.h"
-
-namespace U2 {
-
-const QString UHMM3PhmmerDialogImpl::QUERY_FILES_DIR = "uhmm3_phmmer_query_files_dir";
-const QString UHMM3PhmmerDialogImpl::DOM_E_PLUS_PREFIX = "1E+";
-const QString UHMM3PhmmerDialogImpl::DOM_E_MINUS_PREFIX = "1E";
-const QString UHMM3PhmmerDialogImpl::ANNOTATIONS_DEFAULT_NAME = "signal";
-
-UHMM3PhmmerDialogImpl::UHMM3PhmmerDialogImpl(const U2SequenceObject * seqObj, QWidget * p) : QDialog(p) {
- assert(NULL != seqObj);
- setupUi(this);
- new HelpButton(this, buttonBox, "17470701");
- buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Search"));
- buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
-
- U2OpStatusImpl os;
- model.dbSequence = seqObj->getWholeSequence(os);
- SAFE_POINT_EXT(!os.hasError(), QMessageBox::critical(QApplication::activeWindow(), L10N::errorTitle(), os.getError()), );
- setModelValues(); // default model here
-
- // Annotations widget
- CreateAnnotationModel annModel;
- annModel.hideLocation = true;
- annModel.sequenceObjectRef = seqObj;
- annModel.useAminoAnnotationTypes = seqObj->getAlphabet()->isAmino();
- annModel.data->type = U2FeatureTypes::MiscSignal;
- annModel.data->name = ANNOTATIONS_DEFAULT_NAME;
- annModel.sequenceLen = seqObj->getSequenceLength();
- annotationsWidgetController = new CreateAnnotationWidgetController(annModel, this);
- QWidget * firstTab = mainTabWidget->widget(0);
- assert(NULL != firstTab);
- QVBoxLayout * curLayout = qobject_cast< QVBoxLayout* >(firstTab->layout());
- assert(NULL != curLayout);
- curLayout->insertWidget(ANNOTATIONS_WIDGET_LOCATION, annotationsWidgetController->getWidget());
-
- QPushButton* okPushButton = buttonBox->button(QDialogButtonBox::Ok);
- QPushButton* cancelPushButton = buttonBox->button(QDialogButtonBox::Cancel);
-
- connect(queryToolButton, SIGNAL(clicked()), SLOT(sl_queryToolButtonClicked()));
- connect(okPushButton, SIGNAL(clicked()), SLOT(sl_okButtonClicked()));
- connect(cancelPushButton, SIGNAL(clicked()), SLOT(sl_cancelButtonClicked()));
- connect(useEvalTresholdsButton, SIGNAL(toggled(bool)), SLOT(sl_useEvalTresholdsButtonChanged(bool)));
- connect(useScoreTresholdsButton, SIGNAL(toggled(bool)), SLOT(sl_useScoreTresholdsButtonChanged(bool)));
- connect(domZCheckBox, SIGNAL(stateChanged(int)), SLOT(sl_domZCheckBoxChanged(int)));
- connect(maxCheckBox, SIGNAL(stateChanged(int)), SLOT(sl_maxCheckBoxChanged(int)));
- connect(domESpinBox, SIGNAL(valueChanged(int)), SLOT(sl_domESpinBoxChanged(int)));
-
- adjustSize();
-}
-
-void UHMM3PhmmerDialogImpl::setModelValues() {
- const UHMM3PhmmerSettings & settings = model.phmmerSettings;
- domESpinBox->setValue(1); assert(10.0 == settings.domE);
- scoreTresholdDoubleSpin->setValue(settings.domT);
- f1DoubleSpinBox->setValue(settings.f1);
- f2DoubleSpinBox->setValue(settings.f2);
- f3DoubleSpinBox->setValue(settings.f3);
- seedSpinBox->setValue(settings.seed);
- emlSpinBox->setValue(settings.eml);
- emnSpinBox->setValue(settings.emn);
- evlSpinBox->setValue(settings.evl);
- evnSpinBox->setValue(settings.evn);
- eflSpinBox->setValue(settings.efl);
- efnSpinBox->setValue(settings.efn);
- eftDoubleSpinBox->setValue(settings.eft);
- popenDoubleSpinBox->setValue(settings.popen);
- pextendDoubleSpinBox->setValue(settings.pextend);
-}
-
-void UHMM3PhmmerDialogImpl::sl_queryToolButtonClicked() {
- LastUsedDirHelper helper(QUERY_FILES_DIR);
- helper.url = U2FileDialog::getOpenFileName(this, tr("Select query sequence file"),
- helper, DialogUtils::prepareDocumentsFileFilterByObjType(GObjectTypes::SEQUENCE, true));
- if(!helper.url.isEmpty()) {
- queryLineEdit->setText(helper.url);
- }
-}
-
-void UHMM3PhmmerDialogImpl::sl_cancelButtonClicked() {
- reject();
-}
-
-void UHMM3PhmmerDialogImpl::getModelValues() {
- UHMM3PhmmerSettings & settings = model.phmmerSettings;
-
- model.queryfile = queryLineEdit->text();
- if(useEvalTresholdsButton->isChecked()) {
- settings.domE = pow(10.0, domESpinBox->value());
- settings.domT = OPTION_NOT_SET;
- } else if(useScoreTresholdsButton->isChecked()) {
- settings.domT = scoreTresholdDoubleSpin->value();
- } else {
- assert(false);
- }
-
- settings.popen = popenDoubleSpinBox->value();
- settings.pextend = pextendDoubleSpinBox->value();
-
- settings.noBiasFilter = nobiasCheckBox->isChecked();
- settings.noNull2 = nonull2CheckBox->isChecked();
- settings.doMax = maxCheckBox->isChecked();
- settings.f1 = f1DoubleSpinBox->value();
- settings.f2 = f2DoubleSpinBox->value();
- settings.f3 = f3DoubleSpinBox->value();
-
- settings.eml = emlSpinBox->value();
- settings.emn = emnSpinBox->value();
- settings.evl = evlSpinBox->value();
- settings.evn = evnSpinBox->value();
- settings.efl = eflSpinBox->value();
- settings.efn = efnSpinBox->value();
- settings.eft = eftDoubleSpinBox->value();
- settings.seed = seedSpinBox->value();
-}
-
-QString UHMM3PhmmerDialogImpl::checkModel() {
- assert(model.phmmerSettings.isValid());
- QString ret;
- if(model.queryfile.isEmpty()) {
- ret = tr("Query sequence file path is empty");
- return ret;
- }
- ret = annotationsWidgetController->validate();
- if(!ret.isEmpty()) {
- QMessageBox::critical(this, tr("Error: bad arguments!"), ret);
- return ret;
- }
-
- return ret;
-}
-
-void UHMM3PhmmerDialogImpl::sl_okButtonClicked() {
- getModelValues();
- QString err = checkModel();
- if(!err.isEmpty()) {
- QMessageBox::critical(this, tr("Error: bad arguments!"), err);
- return;
- }
- bool objectPrepared = annotationsWidgetController->prepareAnnotationObject();
- if (!objectPrepared){
- QMessageBox::warning(this, tr("Error"), tr("Cannot create an annotation object. Please check settings"));
- return;
- }
- const CreateAnnotationModel & annModel = annotationsWidgetController->getModel();
- UHMM3PhmmerToAnnotationsTask * phmmerTask = new UHMM3PhmmerToAnnotationsTask(model.queryfile, model.dbSequence,
- annModel.getAnnotationObject(), annModel.groupName, annModel.description, annModel.data->type, annModel.data->name, model.phmmerSettings);
- AppContext::getTaskScheduler()->registerTopLevelTask(phmmerTask);
-
- QDialog::accept();
-}
-
-void UHMM3PhmmerDialogImpl::sl_useEvalTresholdsButtonChanged(bool checked) {
- domESpinBox->setEnabled(checked);
-}
-
-void UHMM3PhmmerDialogImpl::sl_useScoreTresholdsButtonChanged(bool checked) {
- scoreTresholdDoubleSpin->setEnabled(checked);
-}
-
-void UHMM3PhmmerDialogImpl::sl_domZCheckBoxChanged(int state) {
- assert(Qt::PartiallyChecked != state);
- bool checked = Qt::Checked == state;
- domZDoubleSpinBox->setEnabled(checked);
-}
-
-void UHMM3PhmmerDialogImpl::sl_maxCheckBoxChanged(int state) {
- assert(Qt::PartiallyChecked != state);
- bool unchecked = Qt::Unchecked == state;
- f1Label->setEnabled(unchecked);
- f2Label->setEnabled(unchecked);
- f3Label->setEnabled(unchecked);
- f1DoubleSpinBox->setEnabled(unchecked);
- f2DoubleSpinBox->setEnabled(unchecked);
- f3DoubleSpinBox->setEnabled(unchecked);
-}
-
-void UHMM3PhmmerDialogImpl::sl_domESpinBoxChanged(int newVal) {
- const QString & prefix = 0 <= newVal ? DOM_E_PLUS_PREFIX : DOM_E_MINUS_PREFIX;
- domESpinBox->setPrefix(prefix);
-}
-
-} // U2
diff --git a/src/plugins_3rdparty/hmm3/src/phmmer/uhmm3PhmmerTask.cpp b/src/plugins_3rdparty/hmm3/src/phmmer/uhmm3PhmmerTask.cpp
deleted file mode 100644
index 7fc893e..0000000
--- a/src/plugins_3rdparty/hmm3/src/phmmer/uhmm3PhmmerTask.cpp
+++ /dev/null
@@ -1,474 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include <QtCore/QFileInfo>
-#include <QtCore/QMutexLocker>
-
-#include <U2Core/AnnotationTableObject.h>
-#include <U2Core/AppContext.h>
-#include <U2Core/AppResources.h>
-#include <U2Core/Counter.h>
-#include <U2Core/DNASequenceObject.h>
-#include <U2Core/DNATranslation.h>
-#include <U2Core/DocumentModel.h>
-#include <U2Core/GObjectTypes.h>
-#include <U2Core/L10n.h>
-#include <U2Core/LoadDocumentTask.h>
-#include <U2Core/Log.h>
-#include <U2Core/U1AnnotationUtils.h>
-#include <U2Core/U2SafePoints.h>
-
-#include "uhmm3phmmer.h"
-#include "uhmm3PhmmerTask.h"
-#include "task_local_storage/uHMMSearchTaskLocalStorage.h"
-
-#define UHMM3_PHMMER_LOG_CAT "uhmm3_phmmer_log_category"
-
-using namespace U2;
-
-namespace U2 {
-
-static int countPhmmerMemInMB(qint64 dbLen, int queryLen) {
- SAFE_POINT(0 < dbLen && 0 < queryLen, "Invalid sequence length", 0x7fffffff); //INT32_MAX = INT32_MAX
- return qMax(((double)dbLen * queryLen / (1024 * 1024)) * 10, 2.0);
-}
-
-/**************************************
-* General hmmer3 phmmer task.
-**************************************/
-
-UHMM3PhmmerTask::UHMM3PhmmerTask(const DNASequence &q,
- const DNASequence &d,
- const UHMM3PhmmerSettings &set) :
- Task(tr("HMM Phmmer task"), TaskFlags_FOSE_COSC),
- query(q),
- db(d),
- settings(set),
- loadQueryTask(NULL),
- loadDbTask(NULL)
-{
- GCOUNTER(cvar, tvar, "UHMM3PhmmerTask");
- CHECK_EXT(0 != query.length(), stateInfo.setError(L10N::badArgument(tr("Input query sequence"))), );
- CHECK_EXT(0 < db.length(), stateInfo.setError(L10N::badArgument(tr("Database sequence to search in"))), );
-
- setTaskName(tr("HMM Phmmer search %1 sequence in %2 database").arg(query.getName()).arg(db.getName()));
- addMemResource();
-}
-
-UHMM3PhmmerTask::UHMM3PhmmerTask(const QString &queryFilename,
- const QString &dbFilename,
- const UHMM3PhmmerSettings &set) :
- Task(tr("HMM Phmmer task"), TaskFlags_FOSE_COSC),
- settings(set),
- loadQueryTask(NULL),
- loadDbTask(NULL)
-{
- CHECK_EXT(!queryFilename.isEmpty(), stateInfo.setError(L10N::badArgument(tr("Query sequence file path"))), );
- CHECK_EXT(!dbFilename.isEmpty(), stateInfo.setError(L10N::badArgument(tr("Database sequence file path"))), );
-
- setTaskName(tr("HMM Phmmer search %1 sequence with %2 database").arg(queryFilename).arg(dbFilename));
-
- loadQueryTask = LoadDocumentTask::getDefaultLoadDocTask(queryFilename);
- CHECK_EXT(NULL != loadQueryTask, stateInfo.setError(tr("Error opening query sequence file")), );
- addSubTask(loadQueryTask);
-
- loadDbTask = LoadDocumentTask::getDefaultLoadDocTask(dbFilename);
- CHECK_EXT(NULL != loadDbTask, stateInfo.setError(tr("Error opening database sequence file")), );
- addSubTask(loadDbTask);
-}
-
-UHMM3PhmmerTask::UHMM3PhmmerTask(const QString &queryFilename,
- const DNASequence &d,
- const UHMM3PhmmerSettings &s) :
- Task(tr("HMM Phmmer task"), TaskFlags_FOSE_COSC),
- db(d),
- settings(s),
- loadQueryTask(NULL),
- loadDbTask(NULL)
-{
- CHECK_EXT(!queryFilename.isEmpty(), stateInfo.setError(L10N::badArgument(tr("Query sequence file path"))), );
- CHECK_EXT(0 < db.length(), stateInfo.setError(L10N::badArgument(tr("Database sequence to search in"))), );
-
- setTaskName(tr("HMM Phmmer search %1 sequence in %2 database").arg(queryFilename).arg(db.getName()));
-
- loadQueryTask = LoadDocumentTask::getDefaultLoadDocTask(queryFilename);
- CHECK_EXT(NULL != loadQueryTask, stateInfo.setError(tr("Error opening query sequence file")), );
- addSubTask(loadQueryTask);
-}
-
-void UHMM3PhmmerTask::addMemResource() {
- SAFE_POINT_EXT(!db.isNull(), setError("An internal error: db is NULL"), );
- SAFE_POINT_EXT(!query.isNull(), setError("An internal error: query is NULL"), );
-
- int howManyMem = countPhmmerMemInMB(db.length(), query.length());
- addTaskResource(TaskResourceUsage(RESOURCE_MEMORY, howManyMem));
- algoLog.trace(QString("%1 requires %2 of memory").arg(getTaskName()).arg(howManyMem));
-}
-
-DNASequence UHMM3PhmmerTask::getSequenceFromDocument(Document *doc, TaskStateInfo &ti) {
- DNASequence ret;
- CHECK_EXT(NULL != doc, ti.setError(tr("Error loading sequence document:")), ret);
-
- QList<GObject *> objsList = doc->findGObjectByType(GObjectTypes::SEQUENCE);
- CHECK_EXT(!objsList.isEmpty(), ti.setError(tr("No dna sequence objects found in document")), ret);
-
- U2SequenceObject *seqObj = qobject_cast<U2SequenceObject *>(objsList.first());
- CHECK_EXT(NULL != seqObj, ti.setError(tr("No dna sequence objects found in document")), ret);
-
- ret = seqObj->getWholeSequence(ti);
- CHECK_OP(ti, ret);
- CHECK_EXT(0 < ret.length(), ti.setError(tr("Empty sequence loaded from document")), ret);
-
- return ret;
-}
-
-QList<Task *> UHMM3PhmmerTask::onSubTaskFinished(Task *subTask) {
- QMutexLocker locker(&loadTasksMtx);
- QList<Task *> res;
- SAFE_POINT_EXT(NULL != subTask, setError("An internal error: the subtask is NULL"), res);
-
- if (loadQueryTask == subTask) {
- query = getSequenceFromDocument(loadQueryTask->getDocument(), stateInfo);
- CHECK_OP_EXT(stateInfo, stateInfo.setError(getError() + tr(" query sequence")), res);
- loadQueryTask = NULL;
- } else if (loadDbTask == subTask) {
- db = getSequenceFromDocument(loadDbTask->getDocument(), stateInfo);
- CHECK_OP_EXT(stateInfo, stateInfo.setError(getError() + tr(" db sequence")), res);
- loadDbTask = NULL;
- } else {
- setError("Unexpected bahavior: an undefined task has finished");
- FAIL("", res);
- }
-
- if (NULL == loadQueryTask && NULL == loadDbTask) {
- addMemResource();
- }
- return res;
-}
-
-UHMM3SearchResult UHMM3PhmmerTask::getResult() const {
- return result;
-}
-
-QList<SharedAnnotationData> UHMM3PhmmerTask::getResultsAsAnnotations(const QString &name) const {
- QList<SharedAnnotationData> annotations;
- SAFE_POINT(!name.isEmpty(), "An empty annotation name", annotations);
-
- foreach(const UHMM3SearchSeqDomainResult &domain, result.domainResList) {
- AnnotationData *annData = new AnnotationData();
-
- annData->name = name;
- annData->setStrand(U2Strand::Direct);
- annData->location->regions << domain.seqRegion;
- annData->qualifiers << U2Qualifier("Query_sequence", query.getName());
- domain.writeQualifiersToAnnotation(annData);
-
- annotations << SharedAnnotationData(annData);
- }
-
- return annotations;
-}
-
-void UHMM3PhmmerTask::run() {
- UHMM3SearchTaskLocalStorage::createTaskContext(getTaskId());
- result = UHMM3Phmmer::phmmer(query.seq.data(), query.length(), db.seq.data(), db.length(), settings, stateInfo, db.length());
- UHMM3SearchTaskLocalStorage::freeTaskContext(getTaskId());
-}
-
-/*******************************************
- *HMMER3 phmmer sequence walker task
- ********************************************/
-UHMM3SWPhmmerTask::UHMM3SWPhmmerTask(const QString &qF,
- const DNASequence &db,
- const UHMM3PhmmerSettings &s,
- int ch) :
- Task("", TaskFlags_NR_FOSE_COSC),
- queryFilename(qF),
- dbSeq(db),
- settings(s),
- searchChunkSize(ch),
- loadQueryTask(NULL),
- swTask(NULL),
- complTranslation(NULL),
- aminoTranslation(NULL)
-{
- GCOUNTER(cvar, tvar, "UHMM3SWPhmmerTask");
-
- SAFE_POINT_EXT(searchChunkSize > 0, setError("Invalid search chunk size"), );
- setTaskName(tr("HMM Phmmer search %1 sequence in %2 database").arg(queryFilename).arg(db.getName()));
-
- CHECK_EXT(!queryFilename.isEmpty(), setError(L10N::badArgument("querySeq filename")), );
- CHECK_EXT(0 < dbSeq.seq.length(), setError(L10N::badArgument("sequence")), );
-
- loadQueryTask = LoadDocumentTask::getDefaultLoadDocTask(queryFilename);
- CHECK_EXT(NULL != loadQueryTask, setError(tr("Can not create load query doc task")), );
- addSubTask(loadQueryTask);
-}
-
-QList<Task *> UHMM3SWPhmmerTask::onSubTaskFinished(Task *subTask) {
- QList<Task *> res;
- SAFE_POINT(subTask != NULL ,"An internal error: the subtask is NULL", res);
-
- if (loadQueryTask == subTask) {
- querySeq = UHMM3PhmmerTask::getSequenceFromDocument(loadQueryTask->getDocument(), stateInfo);
- CHECK_OP_EXT(stateInfo, setError(getError() + tr(" querySeq sequence")), res);
- swTask = getSWSubtask();
- res << swTask;
- } else if (swTask != subTask) {
- setError("Undefined behavior: an unexpected task has finished");
- FAIL("", res);
- }
- return res;
-}
-
-void UHMM3SWPhmmerTask::checkAlphabets() {
- SAFE_POINT_EXT(dbSeq.alphabet != NULL, setError("DB SEQ ALPHABET is NULL"), );
- if (dbSeq.alphabet->isRaw()) {
- setError(tr("Invalid db sequence alphabet: %1").arg(dbSeq.alphabet->getName()));
- return;
- }
-
- SAFE_POINT_EXT(querySeq.alphabet != NULL, setError("Query SEQ ALPHABET is NULL"), );
- if (querySeq.alphabet->isRaw()) {
- setError(tr("Invalid query sequence alphabet: %1").arg(querySeq.alphabet->getName()));
- return;
- }
-}
-
-void UHMM3SWPhmmerTask::setTranslations() {
- if (dbSeq.alphabet->isNucleic()) {
- DNATranslationRegistry *transReg = AppContext::getDNATranslationRegistry();
- SAFE_POINT_EXT(NULL != transReg, setError("An internal error: the translation registry is NULL"), );
- DNATranslation *complTT = transReg->lookupComplementTranslation(dbSeq.alphabet);
- if (complTT != NULL) {
- complTranslation = complTT;
- }
-
- if (querySeq.alphabet->isAmino()) {
- QList<DNATranslation *> aminoTs = transReg->lookupTranslation(dbSeq.alphabet, DNATranslationType_NUCL_2_AMINO);
- if (!aminoTs.empty()) {
- aminoTranslation = transReg->getStandardGeneticCodeTranslation(dbSeq.alphabet);
- }
- }
- } else {
- Q_ASSERT(dbSeq.alphabet->isAmino());
- if (querySeq.alphabet->isNucleic()) {
- setError(tr("Cannot search for nucleic query in amino sequence"));
- return;
- }
- }
-}
-
-SequenceWalkerTask *UHMM3SWPhmmerTask::getSWSubtask() {
- CHECK(!hasError() && !isCanceled(), NULL);
- SAFE_POINT_EXT(0 != querySeq.length(), setError("The sequence is empty"), NULL);
-
- checkAlphabets();
- CHECK_OP(stateInfo, NULL);
-
- setTranslations();
- CHECK_OP(stateInfo, NULL);
-
- SequenceWalkerConfig config;
- config.seq = dbSeq.seq.data();
- config.seqSize = dbSeq.seq.size();
- config.complTrans = complTranslation;
- config.strandToWalk = complTranslation == NULL ? StrandOption_DirectOnly : StrandOption_Both;
- config.aminoTrans = aminoTranslation;
- config.overlapSize = 0;
- config.chunkSize = config.seqSize;
- config.lastChunkExtraLen = config.chunkSize / 2;
- config.nThreads = MAX_PARALLEL_SUBTASKS_AUTO;
- config.walkCircular = false;
-
- return new SequenceWalkerTask(config, this, tr("HMMER3 phmmer sequence walker search task"));
-}
-
-void UHMM3SWPhmmerTask::onRegion(SequenceWalkerSubtask *t, TaskStateInfo &ti) {
- SAFE_POINT_EXT(t != NULL, setError("An internal error: the SequenceWalkerSubtask is NULL"), );
- if (hasError() || ti.hasError() || isCanceled() || ti.cancelFlag) {
- return;
- }
-
- const char *seq = t->getRegionSequence();
- int seqLen = t->getRegionSequenceLen();
- bool isAmino = t->isAminoTranslated();
-
- UHMM3SearchTaskLocalStorage::createTaskContext(t->getTaskId());
- int wholeSeqSz = t->getGlobalConfig().seqSize;
- wholeSeqSz = isAmino ? (wholeSeqSz / 3) : wholeSeqSz;
- UHMM3SearchResult generalRes = UHMM3Phmmer::phmmer(querySeq.seq.constData(), querySeq.length(),
- seq, seqLen, settings, stateInfo, wholeSeqSz);
- if (ti.hasError()) {
- UHMM3SearchTaskLocalStorage::freeTaskContext(t->getTaskId());
- return;
- }
-
- QMutexLocker locker(&writeResultsMtx);
- UHMM3SWSearchTask::writeResults(generalRes.domainResList, t, results, overlaps, querySeq.length());
- UHMM3SearchTaskLocalStorage::freeTaskContext(t->getTaskId());
-}
-
-Task::ReportResult UHMM3SWPhmmerTask::report() {
- CHECK_OP(stateInfo, ReportResult_Finished);
- UHMM3SWSearchTask::processOverlaps(overlaps, results, querySeq.length() / 2);
- qSort(results.begin(), results.end(), UHMM3SWSearchTask::uhmm3SearchDomainResultLessThan);
- return ReportResult_Finished;
-}
-
-QList<TaskResourceUsage> UHMM3SWPhmmerTask::getResources(SequenceWalkerSubtask *t) {
- QList<TaskResourceUsage> res;
- SAFE_POINT_EXT(t != NULL, setError("An internal error: the SequenceWalkerSubtask is NULL"), res);
- int howManyMem = countPhmmerMemInMB(dbSeq.length(), querySeq.length());
- res << TaskResourceUsage(RESOURCE_MEMORY, howManyMem);
- algoLog.trace(QString("%1 requires %2 of memory").arg(getTaskName()).arg(howManyMem));
- return res;
-}
-
-QList<SharedAnnotationData>
-UHMM3SWPhmmerTask::getResultsAsAnnotations(U2FeatureType type, const QString &name, const QString &annDescription) const {
- QList<SharedAnnotationData> annotations;
- SAFE_POINT(!name.isEmpty(), "An annotation name is empty", annotations);
-
- foreach(const UHMM3SWSearchTaskDomainResult &res, results) {
- SharedAnnotationData annData(new AnnotationData());
- annData->type = type;
- annData->name = name;
- annData->setStrand(res.onCompl ? U2Strand::Complementary : U2Strand::Direct);
- annData->location->regions << res.generalResult.seqRegion;
- annData->qualifiers << U2Qualifier("Query_sequence", querySeq.getName());
- U1AnnotationUtils::addDescriptionQualifier(annData, annDescription);
- res.generalResult.writeQualifiersToAnnotation(annData);
- annotations << annData;
- }
-
- return annotations;
-}
-
-QList<UHMM3SWSearchTaskDomainResult> UHMM3SWPhmmerTask::getResult() const {
- return results;
-}
-
-/*******************************************
- *HMMER3 phmmer search to annotations task.
- ********************************************/
-void UHMM3PhmmerToAnnotationsTask::checkArgs() {
- if (queryfile.isEmpty()) {
- stateInfo.setError(L10N::badArgument(tr("querySeq sequence file path")));
- return;
- }
-
- if (dbSeq.isNull()) {
- stateInfo.setError(L10N::badArgument(tr("db sequence")));
- return;
- }
-
- if (NULL == annotationObj.data()) {
- stateInfo.setError(L10N::badArgument(tr("annotation object")));
- return;
- }
-
- if (annName.isEmpty()) {
- stateInfo.setError(L10N::badArgument(tr("annotation name")));
- return;
- }
-
- if (annGroup.isEmpty()) {
- stateInfo.setError(L10N::badArgument(tr("annotation group")));
- return;
- }
-}
-
-UHMM3PhmmerToAnnotationsTask::UHMM3PhmmerToAnnotationsTask(const QString &qfile,
- const DNASequence &db,
- AnnotationTableObject *o,
- const QString &gr,
- const QString &annDescription,
- U2FeatureType type,
- const QString &name,
- const UHMM3PhmmerSettings &set) :
- Task(tr("HMM Phmmer task"), TaskFlags_NR_FOSE_COSC | TaskFlag_ReportingIsSupported | TaskFlag_ReportingIsEnabled),
- queryfile(qfile),
- dbSeq(db),
- annotationObj(o),
- annGroup(gr),
- annDescription(annDescription),
- annType(type),
- annName(name),
- settings(set),
- phmmerTask(NULL),
- createAnnotationsTask(NULL)
-{
- checkArgs();
- CHECK_OP(stateInfo, );
- setTaskName(tr("HMM Phmmer search %1 sequence with %2 database").arg(queryfile).arg(dbSeq.getName()));
- phmmerTask = new UHMM3SWPhmmerTask(queryfile, dbSeq, settings);
- addSubTask(phmmerTask);
-}
-
-QList<Task *> UHMM3PhmmerToAnnotationsTask::onSubTaskFinished(Task *subTask) {
- QList<Task *> res;
- SAFE_POINT_EXT(NULL != subTask, setError("An internal error: the subtask is NULL"), res);
-
- if (annotationObj.isNull()) {
- stateInfo.setError(tr("Annotation object was removed"));
- return res;
- }
-
- if (phmmerTask == subTask) {
- QList<SharedAnnotationData> annotations = phmmerTask->getResultsAsAnnotations(annType, annName, annDescription);
-
- if (annotations.isEmpty()) {
- return res;
- }
-
- createAnnotationsTask = new CreateAnnotationsTask(annotationObj, annotations, annGroup);
- res << createAnnotationsTask;
- } else if (createAnnotationsTask != subTask) {
- setError("Unexpected behavior: an undefined task finished");
- FAIL("", res);
- }
-
- return res;
-}
-
-QString UHMM3PhmmerToAnnotationsTask::generateReport() const {
- QString res;
- res += "<table>";
- res += "<tr><td width=200><b>" + tr("Query sequence") + "</b></td><td>" + QFileInfo(queryfile).absoluteFilePath() + "</td></tr>";
-
- if (hasError() || isCanceled()) {
- res += "<tr><td width=200><b>" + tr("Task was not finished") + "</b></td><td></td></tr>";
- res += "</table>";
- return res;
- }
-
- res += "<tr><td><b>" + tr("Result annotation table") + "</b></td><td>" + annotationObj->getDocument()->getName() + "</td></tr>";
- res += "<tr><td><b>" + tr("Result annotation group") + "</b></td><td>" + annGroup + "</td></tr>";
- res += "<tr><td><b>" + tr("Result annotation name") + "</b></td><td>" + annName + "</td></tr>";
-
- int nResults = createAnnotationsTask == NULL ? 0 : createAnnotationsTask->getAnnotationCount();
- res += "<tr><td><b>" + tr("Results count") + "</b></td><td>" + QString::number(nResults) + "</td></tr>";
- res += "</table>";
- return res;
-}
-
-} // U2
diff --git a/src/plugins_3rdparty/hmm3/src/phmmer/uhmm3PhmmerTask.h b/src/plugins_3rdparty/hmm3/src/phmmer/uhmm3PhmmerTask.h
deleted file mode 100644
index 643bf4f..0000000
--- a/src/plugins_3rdparty/hmm3/src/phmmer/uhmm3PhmmerTask.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#ifndef _GB2_UHMMER_PHMMER_TASK_H_
-#define _GB2_UHMMER_PHMMER_TASK_H_
-
-#include <QtCore/QList>
-#include <QtCore/QMutex>
-
-#include <U2Core/CreateAnnotationTask.h>
-#include <U2Core/DNASequence.h>
-#include <U2Core/SequenceWalkerTask.h>
-#include <U2Core/Task.h>
-
-#include "uhmm3phmmer.h"
-#include "search/uHMM3SearchTask.h"
-
-namespace U2 {
-
-class AnnotationTableObject;
-class LoadDocumentTask;
-
-/**************************************
- *General hmmer3 phmmer task.
- **************************************/
-class UHMM3PhmmerTask : public Task {
- Q_OBJECT
-public:
- static DNASequence getSequenceFromDocument(Document *doc, TaskStateInfo &ti);
-
-public:
- UHMM3PhmmerTask(const DNASequence &query, const DNASequence &db, const UHMM3PhmmerSettings &settings);
- UHMM3PhmmerTask(const QString &queryFilename, const QString &dbFilename, const UHMM3PhmmerSettings &settings);
- UHMM3PhmmerTask(const QString &queryFilename, const DNASequence &db, const UHMM3PhmmerSettings &settings);
-
- void run();
-
- QList<Task *> onSubTaskFinished(Task* subTask);
-
- UHMM3SearchResult getResult() const;
- QList<SharedAnnotationData> getResultsAsAnnotations(const QString &name) const;
-
-private:
- void addMemResource();
-
-private:
- DNASequence query;
- DNASequence db;
- UHMM3SearchResult result;
- UHMM3PhmmerSettings settings;
-
- LoadDocumentTask * loadQueryTask;
- LoadDocumentTask * loadDbTask;
- QMutex loadTasksMtx;
-
-}; // UHMM3PhmmerTask
-
-/******************************
- *Sequence walker phmmer
- ******************************/
-class UHMM3SWPhmmerTask : public Task, public SequenceWalkerCallback {
- Q_OBJECT
-public:
- static const int DEFAULT_CHUNK_SIZE = 1000000; // 1mb
-
-public:
- UHMM3SWPhmmerTask(const QString &queryFilename, const DNASequence &db,
- const UHMM3PhmmerSettings &settings, int chunk = DEFAULT_CHUNK_SIZE);
-
- QList<SharedAnnotationData> getResultsAsAnnotations(U2FeatureType type, const QString &name, const QString &annDescription) const;
- QList<UHMM3SWSearchTaskDomainResult> getResult()const;
-
- QList<Task *> onSubTaskFinished(Task *subTask);
-
- virtual void onRegion(SequenceWalkerSubtask *t, TaskStateInfo &ti);
-
- virtual QList<TaskResourceUsage> getResources(SequenceWalkerSubtask *t);
-
- ReportResult report();
-
-private:
- SequenceWalkerTask *getSWSubtask();
- void checkAlphabets();
- void setTranslations();
-
-private:
- QString queryFilename;
- DNASequence dbSeq;
- UHMM3PhmmerSettings settings;
- int searchChunkSize;
- LoadDocumentTask * loadQueryTask;
- DNASequence querySeq;
- SequenceWalkerTask *swTask;
- DNATranslation * complTranslation;
- DNATranslation * aminoTranslation;
- QMutex writeResultsMtx;
- QList<UHMM3SWSearchTaskDomainResult> results;
- QList<UHMM3SWSearchTaskDomainResult> overlaps;
-
-}; // UHMM3SWPhmmerTask
-
-/*******************************************
-* HMMER3 phmmer search to annotations task.
-********************************************/
-
-class UHMM3PhmmerToAnnotationsTask : public Task {
- Q_OBJECT
-public:
- UHMM3PhmmerToAnnotationsTask(const QString &querySeqfile, const DNASequence &dbSeq, AnnotationTableObject *obj,
- const QString &group, const QString &annDescription, U2FeatureType type, const QString &name, const UHMM3PhmmerSettings &setings);
-
- QList<Task *> onSubTaskFinished(Task *subTask);
-
- QString generateReport() const;
-
-private:
- void checkArgs();
-
-private:
- QString queryfile;
- DNASequence dbSeq;
- QString annGroup;
- const QString annDescription;
- U2FeatureType annType;
- QString annName;
- UHMM3PhmmerSettings settings;
- QPointer<AnnotationTableObject> annotationObj;
- UHMM3SWPhmmerTask * phmmerTask;
- CreateAnnotationsTask * createAnnotationsTask;
-
-}; // UHMM3PhmmerToAnnotationsTask
-
-} // U2
-
-#endif // _GB2_UHMMER_PHMMER_TASK_H_
diff --git a/src/plugins_3rdparty/hmm3/src/phmmer/uhmm3phmmer.cpp b/src/plugins_3rdparty/hmm3/src/phmmer/uhmm3phmmer.cpp
deleted file mode 100644
index cca1af4..0000000
--- a/src/plugins_3rdparty/hmm3/src/phmmer/uhmm3phmmer.cpp
+++ /dev/null
@@ -1,262 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include <util/uhmm3Utilities.h>
-#include "uhmm3phmmer.h"
-
-namespace U2 {
-
-UHMM3PhmmerSettings::UHMM3PhmmerSettings() {
- UHMM3SearchSettings defaultSearch;
- setDefaultUHMM3SearchSettings( &defaultSearch );
- setSearchSettings( defaultSearch );
-
- UHMM3BuildSettings defaultBuild;
- setDefaultUHMM3BuildSettings( &defaultBuild );
- setBuildSettings( defaultBuild );
-
- popen = 0.02;
- pextend = 0.4;
- substMatr = SMatrix();
-}
-
-UHMM3BuildSettings UHMM3PhmmerSettings::getBuildSettings() const {
- UHMM3BuildSettings ret;
- setDefaultUHMM3BuildSettings( &ret );
- ret.seed = seed;
- ret.eml = eml;
- ret.emn = emn;
- ret.evl = evl;
- ret.evn = evn;
- ret.efl = efl;
- ret.efn = efn;
- ret.eft = eft;
- return ret;
-}
-
-void UHMM3PhmmerSettings::setBuildSettings( const UHMM3BuildSettings & from ) {
- eml = from.eml;
- emn = from.emn;
- evl = from.evl;
- evn = from.evn;
- efl = from.efl;
- efn = from.efn;
- eft = from.eft;
- seed = from.seed;
-}
-
-UHMM3SearchSettings UHMM3PhmmerSettings::getSearchSettings() const {
- UHMM3SearchSettings ret;
- setDefaultUHMM3SearchSettings( &ret );
- ret.e = e;
- ret.t = t;
- ret.z = z;
- ret.domE = domE;
- ret.domT = domT;
- ret.domZ = domZ;
- ret.incE = incE;
- ret.incT = incT;
- ret.incDomE = incDomE;
- ret.incDomT = incDomT;
- ret.f1 = f1;
- ret.f2 = f2;
- ret.f3 = f3;
- ret.doMax = doMax;
- ret.noNull2 = noNull2;
- ret.noBiasFilter = noBiasFilter;
- return ret;
-}
-
-void UHMM3PhmmerSettings::setSearchSettings( const UHMM3SearchSettings & from ) {
- e = from.e;
- t = from.t;
- z = from.z;
- domE = from.domE;
- domT = from.domT;
- domZ = from.domZ;
- incE = from.incE;
- incT = from.incT;
- incDomE = from.incDomE;
- incDomT = from.incDomT;
- f1 = from.f1;
- f2 = from.f2;
- f3 = from.f3;
- doMax = from.doMax;
- noNull2 = from.noNull2;
- noBiasFilter = from.noBiasFilter;
-}
-
-bool UHMM3PhmmerSettings::isValid() const {
- if( !( 0 <= popen && popen <= 0.5 ) ) { return false; }
- if( !( 0 <= pextend && pextend < 1 ) ) { return false; }
- if( !( 0 < e ) ) { return false; }
- if( !( 0 < t || OPTION_NOT_SET == t ) ) { return false; }
- if( !( 0 < z || OPTION_NOT_SET == z ) ) { return false; }
- if( !( 0 < domE ) ) { return false; }
- if( !( 0 < domT || OPTION_NOT_SET == domT ) ) { return false; }
- if( !( 0 < domZ || OPTION_NOT_SET == domZ ) ) { return false; }
- if( !( 0 < incE ) ) { return false; }
- if( !( 0 < incT || OPTION_NOT_SET == incT ) ) { return false; }
- if( !( 0 < incDomE ) ) { return false; }
- if( !( 0 < incDomT || OPTION_NOT_SET == incDomT ) ) { return false; }
- if( !( (bool)doMax == TRUE || (bool)doMax == FALSE ) ) { return false; }
- if( !( (bool)noBiasFilter == TRUE || (bool)noBiasFilter == FALSE ) ) { return false; }
- if( !( (bool)noNull2 == TRUE || (bool)noNull2 == FALSE ) ) { return false; }
- if( !( 0 < eml ) ) { return false; }
- if( !( 0 < emn ) ) { return false; }
- if( !( 0 < evl ) ) { return false; }
- if( !( 0 < evn ) ) { return false; }
- if( !( 0 < efl ) ) { return false; }
- if( !( 0 < efn ) ) { return false; }
- if( !( 0 < eft && eft < 1 ) ) { return false; }
- if( !( 0 <= seed ) ) { return false; }
- return true;
-}
-
-static void destroyAllIfYouCan( ESL_SQ * query, ESL_SQ * db, ESL_ALPHABET * abc, P7_BG * bg, P7_BUILDER * bld, P7_PIPELINE * pli,
- P7_TOPHITS * th, P7_OPROFILE * om ) {
- if( NULL != query ) { esl_sq_Destroy( query ); }
- if( NULL != db ) { esl_sq_Destroy( db ); }
- if( NULL != abc ) { esl_alphabet_Destroy( abc ); }
- if( NULL != bg ) { p7_bg_Destroy( bg ); }
- if( NULL != bld ) { p7_builder_Destroy( bld ); }
- if( NULL != pli ) { p7_pipeline_Destroy( pli ); }
- if( NULL != th ) { p7_tophits_Destroy( th ); }
- if( NULL != om ) { p7_oprofile_Destroy( om ); }
-}
-
-const int PHMMER_PERCENT_PER_FILTER = 17;
-const int PHMMER_SINGLE_BUILDER_PROGRESS = 15;
-
-/* we catch all exceptions here */
-UHMM3SearchResult UHMM3Phmmer::phmmer( const char * p_querySq, int querySqLen, const char * p_dbSq, int dbSqLen,
- const UHMM3PhmmerSettings& settings, TaskStateInfo& ti, int wholeSeqSz ) {
- UHMM3SearchResult res;
- ESL_SQ *querySq = NULL;
- ESL_SQ *dbSq = NULL;
- ESL_ALPHABET *abc = NULL;
- P7_BG *bg = NULL;
- P7_BUILDER *bld = NULL;
- P7_PIPELINE *pli = NULL;
- P7_TOPHITS *th = NULL;
- P7_OPROFILE *om = NULL;
- int status = eslOK;
- QByteArray errStr;
-
- if( ti.hasError() ) {
- return res;
- }
- if( NULL == p_querySq || 0 >= querySqLen ) {
- ti.setError( tr( "No input query sequence given" ) );
- return res;
- }
- if( NULL == p_dbSq || 0 >= dbSqLen ) {
- ti.setError( tr( "Database sequence to search in is not given" ) );
- return res;
- }
-
- try {
- abc = esl_alphabet_Create( eslAMINO );
- if( NULL == abc ) {
- errStr = tr( "Run out of memory (creating alphabet failed)" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
- bg = p7_bg_Create( abc );
- if( NULL == bg ) {
- errStr = tr( "Run out of memory (creating null model failed)" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
-
- UHMM3BuildSettings bldSettings = settings.getBuildSettings();
- bld = p7_builder_Create( &bldSettings, abc );
- if( NULL == bld ) {
- errStr = tr( "Run out of memory (creating builder failed)" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
-
- ESL_SCOREMATRIX * hmmer3SubstMatr = UHMM3Utilities::convertScoreMatrix( settings.substMatr );
- status = p7_builder_SetScoreSystem( bld, hmmer3SubstMatr, settings.popen, settings.pextend );
- if( status != eslOK ) {
- errStr = tr( "Setting scoring system failed with error: '%1'" ).arg( bld->errbuf ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
-
- dbSq = esl_sq_CreateFrom( NULL, p_dbSq, dbSqLen, NULL, NULL, NULL );
- esl_sq_Digitize( abc, dbSq );
- if( NULL == dbSq || NULL == dbSq->dsq ) {
- errStr = tr( "Error with digitizing sequence to search in" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
-
- querySq = esl_sq_CreateFrom( NULL, p_querySq, querySqLen, NULL, NULL, NULL );
- esl_sq_Digitize( abc, querySq );
- if( NULL == querySq || NULL == querySq->dsq ) {
- errStr = tr( "Error digitizing query sequence" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
-
- /* bypass HMM - only need model */
- status = p7_SingleBuilder( bld, querySq, bg, NULL, NULL, NULL, &om, PHMMER_SINGLE_BUILDER_PROGRESS, ti );
- if( eslCANCELED == status ) {
- errStr = tr( HMMER3_CANCELED_ERROR ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- } else if( eslOK != status ) {
- errStr = tr( "Error with creating HMM profile for query sequence" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
-
- th = p7_tophits_Create();
- if( NULL == th ) {
- errStr = tr( "Run out of memory (creating top hits list failed)" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
- UHMM3SearchSettings searchSettings = settings.getSearchSettings();
- pli = p7_pipeline_Create( &searchSettings, om->M, 400, p7_SEARCH_SEQS ); /* 400 is a dummy length for now */
- if( NULL == pli ) {
- errStr = tr( "Run out of memory (creating pipeline failed)" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
-
- p7_pli_NewModel( pli, om, bg );
- p7_pli_NewSeq( pli, dbSq );
- p7_bg_SetLength( bg, wholeSeqSz );
- p7_oprofile_ReconfigLength( om, dbSq->n, wholeSeqSz );
- int ret = p7_Pipeline( pli, om, bg, dbSq, th, PHMMER_PERCENT_PER_FILTER, ti, wholeSeqSz );
- if( eslCANCELED == ret ) {
- errStr = tr( HMMER3_CANCELED_ERROR ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
- assert( eslOK == ret );
-
- p7_tophits_Sort(th);
- p7_tophits_Threshold( th, pli );
- res.fillResults( th, pli );
- } catch( const UHMMER3Exception& ex ) {
- ti.setError( ex.msg );
- } catch(...) {
- ti.setError( tr( HMMER3_UNKNOWN_ERROR ) );
- }
-
- destroyAllIfYouCan( querySq, dbSq, abc, bg, bld, pli, th, om );
- return res;
-}
-
-} // U2
diff --git a/src/plugins_3rdparty/hmm3/src/phmmer/uhmm3phmmer.h b/src/plugins_3rdparty/hmm3/src/phmmer/uhmm3phmmer.h
deleted file mode 100644
index 07ac906..0000000
--- a/src/plugins_3rdparty/hmm3/src/phmmer/uhmm3phmmer.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#ifndef _GB2_UHMM3_PHMMER_H_
-#define _GB2_UHMM3_PHMMER_H_
-
-#include <QtCore/QObject>
-
-#include <U2Core/Task.h>
-#include <U2Core/SMatrix.h>
-
-#include <hmmer3/hmmer.h>
-
-#include <search/uhmm3SearchResult.h>
-
-namespace U2 {
-
-class UHMM3PhmmerSettings {
-public:
- // same as in UHMM3SearchSettings
- double e;
- double t;
- double z;
- double domE;
- double domT;
- double domZ;
-
- double incE;
- double incT;
- double incDomE;
- double incDomT;
-
- double f1;
- double f2;
- double f3;
- int doMax;
- int noBiasFilter;
- int noNull2;
-
- // same as in UHMM3BuildSettings
- int eml; // --EmL. length of sequences for MSV Gumbel mu fit
- int emn; // --EmN. number of sequences for MSV Gumbel mu fit
- int evl; /* length of sequences for Viterbi Gumbel mu fit */
- int evn; /* number of sequences for Viterbi Gumbel mu fit */
- int efl; /* length of sequences for Forward exp tail mu fit */
- int efn; /* number of sequences for Forward exp tail mu fit */
- float eft; /* tail mass for Forward exponential tail mu fit */
- int seed;
-
- // scoring system
- double popen; /* gap open probability */
- double pextend; /* gap extend probability */
- SMatrix substMatr; /* default is null here -> BLOSUM62 there */
-
- UHMM3PhmmerSettings();
-
- UHMM3BuildSettings getBuildSettings() const;
- void setBuildSettings( const UHMM3BuildSettings & from );
-
- UHMM3SearchSettings getSearchSettings() const;
- void setSearchSettings( const UHMM3SearchSettings & from );
-
- bool isValid() const;
-
-}; // UHMM3PhmmerSettings
-
-class UHMM3Phmmer : public QObject {
- Q_OBJECT
-public:
- static UHMM3SearchResult phmmer( const char * querySq, int querySqLen, const char * dbSq, int dbSqLen,
- const UHMM3PhmmerSettings & settings, TaskStateInfo & ti, int wholeSeqSz );
-
-}; // UHMM3Phmmer
-
-} // U2
-
-#endif // _GB2_UHMM3_PHMMER_H_
diff --git a/src/plugins_3rdparty/hmm3/src/search/Hmmer3SearchWorkflowTask.cpp b/src/plugins_3rdparty/hmm3/src/search/Hmmer3SearchWorkflowTask.cpp
deleted file mode 100644
index 3e6afea..0000000
--- a/src/plugins_3rdparty/hmm3/src/search/Hmmer3SearchWorkflowTask.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-/**
-* UGENE - Integrated Bioinformatics Tools.
-* Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
-* http://ugene.unipro.ru
-*
-* This program is free software; you can redistribute it and/or
-* modify it under the terms of the GNU General Public License
-* as published by the Free Software Foundation; either version 2
-* of the License, or (at your option) any later version.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU General Public License for more details.
-*
-* You should have received a copy of the GNU General Public License
-* along with this program; if not, write to the Free Software
-* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
-* MA 02110-1301, USA.
-*/
-
-#include <U2Core/AppContext.h>
-#include <U2Core/BaseDocumentFormats.h>
-#include <U2Core/U1AnnotationUtils.h>
-#include <workers/HMM3SearchWorker.h>
-
-#include "Hmmer3SearchWorkflowTask.h"
-
-namespace U2 {
-
-using namespace LocalWorkflow;
-
-Hmmer3SearchWorfklowTask::Hmmer3SearchWorfklowTask(const QString &profileUrl, U2SequenceObject *sequenceObject, AnnotationTableObject *annotationsObject,
- const QString &group, const QString &description, U2FeatureType type, const QString &name, const UHMM3SearchTaskSettings &settings)
-: Task("HMMER 3 search workflow task", TaskFlags_NR_FOSE_COSC | TaskFlag_ReportingIsSupported | TaskFlag_ReportingIsEnabled),
-profileUrl(profileUrl), sequenceObject(sequenceObject), annotationsObject(annotationsObject),
-group(group), description(description), type(type), name(name), settings(settings), workflowTask(NULL), resultCount(0)
-{
-
-}
-
-void Hmmer3SearchWorfklowTask::prepare() {
- SimpleInOutWorkflowTaskConfig config = getConfig();
- CHECK_OP(stateInfo, );
-
- workflowTask = new SimpleInOutWorkflowTask(config);
- addSubTask(workflowTask);
-}
-
-Task::ReportResult Hmmer3SearchWorfklowTask::report() {
- CHECK_OP(stateInfo, ReportResult_Finished);
- Document *doc = workflowTask->getDocument();
- CHECK(NULL != doc, ReportResult_Finished);
- QList<GObject*> objects = doc->findGObjectByType(GObjectTypes::ANNOTATION_TABLE);
- if (objects.isEmpty()) {
- setError(tr("No annotations objects found"));
- return ReportResult_Finished;
- }
- AnnotationTableObject *hmmerObject = qobject_cast<AnnotationTableObject*>(objects.first());
- QList<SharedAnnotationData> data;
- foreach (Annotation *annotation, hmmerObject->getAnnotations()) {
- SharedAnnotationData annData = annotation->getData();
- annData->name = name;
- annData->type = type;
- data << annData;
- }
- U1AnnotationUtils::addDescriptionQualifier(data, description);
- resultCount += data.size();
- annotationsObject->addAnnotations(data, group);
- return ReportResult_Finished;
-}
-
-QString Hmmer3SearchWorfklowTask::generateReport() const {
- QString res;
- res += "<table>";
- res += "<tr><td width=200><b>" + tr("HMM profile used") + "</b></td><td>" + QFileInfo(profileUrl).absoluteFilePath() + "</td></tr>";
-
- if (hasError() || isCanceled()) {
- res += "<tr><td width=200><b>" + tr("Task was not finished") + "</b></td><td></td></tr>";
- res += "</table>";
- return res;
- }
-
- res += "<tr><td><b>" + tr("Result annotation table") + "</b></td><td>" + annotationsObject->getDocument()->getName() + "</td></tr>";
- res += "<tr><td><b>" + tr("Result annotation group") + "</b></td><td>" + group + "</td></tr>";
- res += "<tr><td><b>" + tr("Result annotation name") + "</b></td><td>" + name + "</td></tr>";
-
- res += "<tr><td><b>" + tr("Results count") + "</b></td><td>" + QString::number(resultCount) + "</td></tr>";
- res += "</table>";
- return res;
-}
-
-SimpleInOutWorkflowTaskConfig Hmmer3SearchWorfklowTask::getConfig() {
- SimpleInOutWorkflowTaskConfig config;
- U2DbiRef dbiRef = AppContext::getDbiRegistry()->getSessionTmpDbiRef(stateInfo);
- CHECK_OP(stateInfo, config);
- config.objects << sequenceObject->clone(dbiRef, stateInfo);
- CHECK_OP(stateInfo, config);
- config.inFormat = BaseDocumentFormats::PLAIN_GENBANK;
- config.outFormat = BaseDocumentFormats::PLAIN_GENBANK;
- config.schemaName = "hmm3-search";
- config.emptyResultPossible = true;
-
- config.extraArgs << "--hmm=" + profileUrl;
- config.extraArgs << "--seed=" + QString::number(settings.inner.seed);
-
- if (p7H_GA == settings.inner.useBitCutoffs) {
- config.extraArgs << "--threshold-type=" + HMM3SearchWorker::CUT_GA_THRESHOLD;
- } else if (p7H_NC == settings.inner.useBitCutoffs) {
- config.extraArgs << "--threshold-type=" + HMM3SearchWorker::CUT_NC_THRESHOLD;
- } else if (p7H_TC == settings.inner.useBitCutoffs) {
- config.extraArgs << "--threshold-type=" + HMM3SearchWorker::CUT_TC_THRESHOLD;
- } else if (OPTION_NOT_SET == settings.inner.domT) {
- config.extraArgs << "--domE=" + QString::number(log10(settings.inner.domE));
- } else {
- config.extraArgs << "--domT=" + QString::number(settings.inner.domT);
- }
- if (settings.inner.domZ > 0) {
- config.extraArgs << "--domZ=" + QString::number(settings.inner.domZ);
- }
-
- config.extraArgs << "--nobias=" + QString::number(settings.inner.noBiasFilter);
- config.extraArgs << "--nonull2=" + QString::number(settings.inner.noNull2);
- config.extraArgs << "--max=" + QString::number(settings.inner.doMax);
-
- if (!settings.inner.doMax) {
- config.extraArgs << "--F1=" + QString::number(settings.inner.f1);
- config.extraArgs << "--F2=" + QString::number(settings.inner.f2);
- config.extraArgs << "--F3=" + QString::number(settings.inner.f3);
- }
- return config;
-}
-
-} // U2
diff --git a/src/plugins_3rdparty/hmm3/src/search/Hmmer3SearchWorkflowTask.h b/src/plugins_3rdparty/hmm3/src/search/Hmmer3SearchWorkflowTask.h
deleted file mode 100644
index 9c47cc1..0000000
--- a/src/plugins_3rdparty/hmm3/src/search/Hmmer3SearchWorkflowTask.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/**
-* UGENE - Integrated Bioinformatics Tools.
-* Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
-* http://ugene.unipro.ru
-*
-* This program is free software; you can redistribute it and/or
-* modify it under the terms of the GNU General Public License
-* as published by the Free Software Foundation; either version 2
-* of the License, or (at your option) any later version.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU General Public License for more details.
-*
-* You should have received a copy of the GNU General Public License
-* along with this program; if not, write to the Free Software
-* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
-* MA 02110-1301, USA.
-*/
-
-#ifndef _U2_HMMER3_SEARCH_WORKFLOW_TASK_H_
-#define _U2_HMMER3_SEARCH_WORKFLOW_TASK_H_
-
-#include <U2Core/U2FeatureType.h>
-#include <U2Lang/SimpleWorkflowTask.h>
-#include <search/uHMM3SearchTask.h>
-
-namespace U2 {
-
-class AnnotationTableObject;
-class SimpleInOutWorkflowTask;
-class U2SequenceObject;
-
-class Hmmer3SearchWorfklowTask : public Task {
- Q_OBJECT
-public:
- Hmmer3SearchWorfklowTask(const QString &profileUrl, U2SequenceObject *sequenceObject, AnnotationTableObject *annotationsObject,
- const QString &group, const QString &description, U2FeatureType type, const QString &name, const UHMM3SearchTaskSettings &settings);
-
- void prepare();
- ReportResult report();
- QString generateReport() const;
-
-private:
- SimpleInOutWorkflowTaskConfig getConfig();
-
-private:
- QString profileUrl;
- U2SequenceObject *sequenceObject;
- AnnotationTableObject *annotationsObject;
- QString group;
- QString description;
- U2FeatureType type;
- QString name;
- UHMM3SearchTaskSettings settings;
- SimpleInOutWorkflowTask *workflowTask;
- int resultCount;
-};
-
-} // U2
-
-#endif // _U2_HMMER3_SEARCH_WORKFLOW_TASK_H_
diff --git a/src/plugins_3rdparty/hmm3/src/search/uHMM3SearchDialogImpl.cpp b/src/plugins_3rdparty/hmm3/src/search/uHMM3SearchDialogImpl.cpp
deleted file mode 100644
index c8b7a30..0000000
--- a/src/plugins_3rdparty/hmm3/src/search/uHMM3SearchDialogImpl.cpp
+++ /dev/null
@@ -1,246 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include <math.h>
-
-#include <QMessageBox>
-#include <QPushButton>
-
-#include <U2Core/AppContext.h>
-#include <U2Core/DNAAlphabet.h>
-#include <U2Core/GObjectTypes.h>
-#include <U2Core/L10n.h>
-#include <U2Core/U2OpStatusUtils.h>
-#include <U2Core/U2SafePoints.h>
-
-#include <U2Gui/DialogUtils.h>
-#include <U2Gui/HelpButton.h>
-#include <U2Gui/LastUsedDirHelper.h>
-#include <U2Gui/U2FileDialog.h>
-
-#include <search/Hmmer3SearchWorkflowTask.h>
-
-#include "uHMM3SearchDialogImpl.h"
-#include "gobject/uHMMObject.h"
-
-namespace U2 {
-
-const QString UHMM3SearchDialogImpl::DOM_E_PLUS_PREFIX = "1E+";
-const QString UHMM3SearchDialogImpl::DOM_E_MINUS_PREFIX = "1E";
-const QString UHMM3SearchDialogImpl::HMM_FILES_DIR_ID = "uhmmer3_search_dlg_impl_hmm_dir";
-const QString UHMM3SearchDialogImpl::ANNOTATIONS_DEFAULT_NAME = "hmm_signal";
-
-UHMM3SearchDialogImpl::UHMM3SearchDialogImpl(U2SequenceObject *seqObj, QWidget *p)
- : QDialog(p)
-{
- assert(NULL != seqObj);
-
- setupUi(this);
- new HelpButton(this, buttonBox, "17470700");
- buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Run"));
- buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
-
- useScoreTresholdGroup.addButton(useExplicitScoreTresholdButton);
- useScoreTresholdGroup.addButton(useGATresholdsButton);
- useScoreTresholdGroup.addButton(useNCTresholdsButton);
- useScoreTresholdGroup.addButton(useTCTresholdsButton);
- useExplicitScoreTresholdButton->setChecked(true);
-
- model.sequence = QPointer<U2SequenceObject>(seqObj);
- setModelValues(); // default settings here
-
- // Annotations widget
- CreateAnnotationModel annModel;
- annModel.hideLocation = true;
- annModel.sequenceObjectRef = seqObj;
- annModel.useAminoAnnotationTypes = seqObj->getAlphabet()->isAmino();
- annModel.data->type = U2FeatureTypes::MiscSignal;
- annModel.data->name = ANNOTATIONS_DEFAULT_NAME;
- annModel.sequenceLen = seqObj->getSequenceLength();
- annotationsWidgetController = new CreateAnnotationWidgetController(annModel, this);
- QWidget * firstTab = tabWidget->widget(0);
- assert(NULL != firstTab);
- QVBoxLayout * curLayout = qobject_cast< QVBoxLayout* >(firstTab->layout());
- assert(NULL != curLayout);
- QWidget * aw = annotationsWidgetController->getWidget();
- curLayout->insertWidget(1, aw);
-
- QPushButton* searchButton = buttonBox->button(QDialogButtonBox::Ok);
- QPushButton* cancelButton = buttonBox->button(QDialogButtonBox::Cancel);
-
- connect(cancelButton, SIGNAL(clicked()), SLOT(reject()));
- connect(searchButton, SIGNAL(clicked()), SLOT(sl_okButtonClicked()));
- connect(useEvalTresholdsButton, SIGNAL(toggled(bool)), SLOT(sl_useEvalTresholdsButtonChanged(bool)));
- connect(useScoreTresholdsButton, SIGNAL(toggled(bool)), SLOT(sl_useScoreTresholdsButtonChanged(bool)));
- connect(useExplicitScoreTresholdButton, SIGNAL(toggled(bool)), SLOT(sl_useExplicitScoreTresholdButton(bool)));
- connect(maxCheckBox, SIGNAL(stateChanged(int)), SLOT(sl_maxCheckBoxChanged(int)));
- connect(domESpinBox, SIGNAL(valueChanged(int)), SLOT(sl_domESpinBoxChanged(int)));
- connect(queryHmmFileToolButton, SIGNAL(clicked()), SLOT(sl_queryHmmFileToolButtonClicked()));
- connect(domZCheckBox, SIGNAL(stateChanged(int)), SLOT(sl_domZCheckBoxChanged(int)));
-}
-
-void UHMM3SearchDialogImpl::setModelValues() {
- const UHMM3SearchSettings & settings = model.searchSettings.inner;
- domESpinBox->setValue(1); assert(10.0 == settings.domE);
- scoreTresholdDoubleSpin->setValue(0); // because default is OPTION_NOT_SET
- domZDoubleSpinBox->setValue(0); // because default is OPTION_NOT_SET
- nobiasCheckBox->setChecked((bool)settings.noBiasFilter);
- nonull2CheckBox->setChecked((bool)settings.noNull2);
- maxCheckBox->setChecked((bool)settings.doMax);
- f1DoubleSpinBox->setValue(settings.f1);
- f2DoubleSpinBox->setValue(settings.f2);
- f3DoubleSpinBox->setValue(settings.f3);
- seedSpinBox->setValue(settings.seed);
-}
-
-void UHMM3SearchDialogImpl::getModelValues() {
- UHMM3SearchSettings & settings = model.searchSettings.inner;
-
- if(useEvalTresholdsButton->isChecked()) {
- settings.domE = pow(10.0, domESpinBox->value());
- settings.domT = OPTION_NOT_SET;
- } else if(useScoreTresholdsButton->isChecked()) {
- if(useExplicitScoreTresholdButton->isChecked()) {
- settings.domT = scoreTresholdDoubleSpin->value();
- } else if(useGATresholdsButton->isChecked()) {
- settings.useBitCutoffs = p7H_GA;
- } else if(useNCTresholdsButton->isChecked()) {
- settings.useBitCutoffs = p7H_NC;
- } else if(useTCTresholdsButton->isChecked()) {
- settings.useBitCutoffs = p7H_TC;
- } else {
- assert(false);
- }
- } else {
- assert(false);
- }
-
- if(domZCheckBox->isChecked()) {
- settings.domZ = domZDoubleSpinBox->value();
- } else {
- settings.domZ = OPTION_NOT_SET;
- }
-
- settings.noBiasFilter = nobiasCheckBox->isChecked();
- settings.noNull2 = nonull2CheckBox->isChecked();
- settings.doMax = maxCheckBox->isChecked();
-
- settings.f1 = f1DoubleSpinBox->value();
- settings.f2 = f2DoubleSpinBox->value();
- settings.f3 = f3DoubleSpinBox->value();
-
- settings.seed = seedSpinBox->value();
-
- model.hmmfile = queryHmmFileEdit->text();
-}
-
-QString UHMM3SearchDialogImpl::checkModel() {
- assert(checkUHMM3SearchSettings(&model.searchSettings.inner));
- QString ret;
-
- if(model.hmmfile.isEmpty()) {
- ret = tr("HMM profile file path is empty");
- return ret;
- }
- ret = annotationsWidgetController->validate();
- if(!ret.isEmpty()) {
- return ret;
- }
-
- return ret;
-}
-
-void UHMM3SearchDialogImpl::sl_okButtonClicked() {
- getModelValues();
- QString err = checkModel();
- if (!err.isEmpty()) {
- QMessageBox::critical(this, tr("Error: bad arguments!"), err);
- return;
- }
-
- SAFE_POINT(!model.sequence.isNull(), L10N::nullPointerError("sequence object"), );
-
- bool objectPrepared = annotationsWidgetController->prepareAnnotationObject();
- if (!objectPrepared) {
- QMessageBox::warning(this, tr("Error"), tr("Cannot create an annotation object. Please check settings"));
- return;
- }
-
- const CreateAnnotationModel &annModel = annotationsWidgetController->getModel();
- Hmmer3SearchWorfklowTask *searchTask = new Hmmer3SearchWorfklowTask(model.hmmfile, model.sequence, annModel.getAnnotationObject(),
- annModel.groupName, annModel.description, annModel.data->type, annModel.data->name, model.searchSettings);
- AppContext::getTaskScheduler()->registerTopLevelTask(searchTask);
-
- QDialog::accept();
-}
-
-void UHMM3SearchDialogImpl::sl_useEvalTresholdsButtonChanged(bool checked) {
- domESpinBox->setEnabled(checked);
-}
-
-void UHMM3SearchDialogImpl::sl_useScoreTresholdsButtonChanged(bool checked) {
- useExplicitScoreTresholdButton->setEnabled(checked);
- useGATresholdsButton->setEnabled(checked);
- useNCTresholdsButton->setEnabled(checked);
- useTCTresholdsButton->setEnabled(checked);
- if(!checked) {
- scoreTresholdDoubleSpin->setEnabled(false);
- } else {
- scoreTresholdDoubleSpin->setEnabled(useExplicitScoreTresholdButton->isChecked());
- }
-}
-
-void UHMM3SearchDialogImpl::sl_useExplicitScoreTresholdButton(bool checked) {
- scoreTresholdDoubleSpin->setEnabled(checked);
-}
-
-void UHMM3SearchDialogImpl::sl_maxCheckBoxChanged(int state) {
- assert(Qt::PartiallyChecked != state);
- bool unchecked = Qt::Unchecked == state;
- f1Label->setEnabled(unchecked);
- f2Label->setEnabled(unchecked);
- f3Label->setEnabled(unchecked);
- f1DoubleSpinBox->setEnabled(unchecked);
- f2DoubleSpinBox->setEnabled(unchecked);
- f3DoubleSpinBox->setEnabled(unchecked);
-}
-
-void UHMM3SearchDialogImpl::sl_domESpinBoxChanged(int newVal) {
- const QString & prefix = 0 <= newVal ? DOM_E_PLUS_PREFIX : DOM_E_MINUS_PREFIX;
- domESpinBox->setPrefix(prefix);
-}
-
-void UHMM3SearchDialogImpl::sl_queryHmmFileToolButtonClicked() {
- LastUsedDirHelper helper(HMM_FILES_DIR_ID);
- helper.url = U2FileDialog::getOpenFileName(this, tr("Select query HMM profile"),
- helper, DialogUtils::prepareDocumentsFileFilterByObjType(UHMMObject::UHMM_OT, true));
- if(!helper.url.isEmpty()) {
- queryHmmFileEdit->setText(helper.url);
- }
-}
-
-void UHMM3SearchDialogImpl::sl_domZCheckBoxChanged(int state) {
- assert(Qt::PartiallyChecked != state);
- bool checked = Qt::Checked == state;
- domZDoubleSpinBox->setEnabled(checked);
-}
-
-} // U2
diff --git a/src/plugins_3rdparty/hmm3/src/search/uHMM3SearchTask.cpp b/src/plugins_3rdparty/hmm3/src/search/uHMM3SearchTask.cpp
deleted file mode 100644
index a9b6b4a..0000000
--- a/src/plugins_3rdparty/hmm3/src/search/uHMM3SearchTask.cpp
+++ /dev/null
@@ -1,550 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include <QtCore/QByteArray>
-#include <QtCore/QFileInfo>
-
-#include <U2Core/AnnotationTableObject.h>
-#include <U2Core/AppContext.h>
-#include <U2Core/AppResources.h>
-#include <U2Core/Counter.h>
-#include <U2Core/DNAAlphabet.h>
-#include <U2Core/DNASequenceObject.h>
-#include <U2Core/DNATranslation.h>
-#include <U2Core/IOAdapter.h>
-#include <U2Core/IOAdapterUtils.h>
-#include <U2Core/L10n.h>
-#include <U2Core/LoadDocumentTask.h>
-#include <U2Core/Log.h>
-#include <U2Core/U1AnnotationUtils.h>
-#include <U2Core/U2SafePoints.h>
-
-#include <format/uHMMFormat.h>
-#include <gobject/uHMMObject.h>
-#include <task_local_storage/uHMMSearchTaskLocalStorage.h>
-#include <util/uhmm3Utilities.h>
-
-#include "uHMM3SearchTask.h"
-
-#define UHMM3_SEARCH_LOG_CAT "hmm3_search_log_category"
-
-using namespace U2;
-
-namespace U2 {
-
-static int countSearchMemInMB(qint64 seqLen, int hmmLen) {
- return ((77 * seqLen + 10500 * hmmLen) / (1024 * 1024)) + 2;
-}
-
-/*****************************************************
-* UHMM3SWSearchTask
-*****************************************************/
-
-static void recountRegion(U2Region& region, bool isAmino, bool isCompl, U2Region globalR) {
- int len = isAmino? region.length * 3 : region.length;
- int start = isAmino? region.startPos * 3 : region.startPos;
-
- if(isCompl) {
- start = globalR.length - start - len;
- }
- region.startPos = globalR.startPos + start;
- region.length = len;
-}
-
-static void recountUHMM3SWSearchRegions(UHMM3SearchSeqDomainResult& res, bool isAmino, bool isCompl, U2Region globalR) {
- recountRegion(res.seqRegion, isAmino, isCompl, globalR);
- recountRegion(res.envRegion, isAmino, isCompl, globalR);
-}
-
-UHMM3SWSearchTask::UHMM3SWSearchTask(const P7_HMM* h, const DNASequence& s, const UHMM3SearchTaskSettings& set, int ch)
-: Task("", TaskFlag_NoRun), sequence(s), settings(set),
- complTranslation(NULL), aminoTranslation(NULL), swTask(NULL), loadHmmTask(NULL), searchChunkSize(ch) {
- GCOUNTER(cvar, tvar, "UHMM3SWSearchTask");
-
-
-
- assert(searchChunkSize > 0);
- if(NULL == h) {
- setTaskName(tr("HMM search task with amino and complement translations"));
- stateInfo.setError(L10N::badArgument("hmm"));
- return;
- }
- hmms.append(h);
- assert(NULL != h->name);
- setTaskName(tr("HMM search task with amino and complement translations using '%1' profile HMM").arg(h->name));
-
- if(!sequence.seq.length()) {
- stateInfo.setError(L10N::badArgument("sequence"));
- return;
- }
-}
-
-UHMM3SWSearchTask::UHMM3SWSearchTask(const QString& hF, const DNASequence& seq, const UHMM3SearchTaskSettings& s, int ch)
-: Task("", TaskFlag_NoRun), sequence(seq), settings(s),
- complTranslation(NULL), aminoTranslation(NULL), swTask(NULL), loadHmmTask(NULL), hmmFilename(hF), searchChunkSize(ch) {
-
- assert(searchChunkSize > 0);
- if(hmmFilename.isEmpty()) {
- setTaskName(tr("HMM search task with amino and complement translations"));
- stateInfo.setError(L10N::badArgument("HMM file name"));
- return;
- }
- setTaskName(tr("HMM search task with amino and complement translations using '%1' profile HMM").arg(hmmFilename));
-
- if(!sequence.seq.length()) {
- stateInfo.setError(L10N::badArgument("sequence"));
- return;
- }
-}
-
-SequenceWalkerTask* UHMM3SWSearchTask::getSWSubtask() {
- assert(!hasError());
- SAFE_POINT(!hmms.isEmpty(), "UHMM3SWSearchTask::getSWSubtask:: No HMM profiles", NULL);
-
- bool ok = checkAlphabets(hmms.first()->abc->type, sequence.alphabet);
- if(!ok) {
- assert(hasError());
- return NULL;
- }
- ok = setTranslations(hmms.first()->abc->type, sequence.alphabet);
- if(!ok) {
- assert(hasError());
- return NULL;
- }
-
- SequenceWalkerConfig config;
- config.seq = sequence.seq.data();
- config.seqSize = sequence.seq.size();
- config.complTrans = complTranslation;
- config.strandToWalk = complTranslation == NULL ? StrandOption_DirectOnly : StrandOption_Both;
- config.aminoTrans = aminoTranslation;
- /*config.overlapSize = 2 * hmm->M;
- config.chunkSize = qMax(searchChunkSize, 6 * hmm->M);*/
- config.overlapSize = 0;
- config.chunkSize = config.seqSize;
- config.lastChunkExtraLen = config.chunkSize / 2;
- config.nThreads = MAX_PARALLEL_SUBTASKS_AUTO;
- config.walkCircular = false;
-
- return new SequenceWalkerTask(config, this, tr("HMM search task with amino and complement translations"));
-}
-
-void UHMM3SWSearchTask::prepare() {
- if(hasError()) {
- return;
- }
-
- if(!hmms.isEmpty()) {
- swTask = getSWSubtask();
- if(NULL == swTask) {
- assert(hasError());
- return;
- }
- addSubTask(swTask);
- } else {
- IOAdapterFactory* iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(IOAdapterUtils::url2io(hmmFilename));
- assert(NULL != iof);
- loadHmmTask = new LoadDocumentTask(UHMMFormat::UHHMER_FORMAT_ID, hmmFilename, iof, QVariantMap());
- addSubTask(loadHmmTask);
- }
-}
-
-QList< Task* > UHMM3SWSearchTask::onSubTaskFinished(Task* subTask) {
- assert(NULL != subTask);
- QList< Task* > res;
- if(subTask->hasError()) {
- stateInfo.setError(subTask->getError());
- return res;
- }
-
- if(loadHmmTask == subTask) {
- hmms = UHMM3Utilities::getHmmsFromDocument(loadHmmTask->getDocument(), stateInfo);
- //init results and overlaps
- for(int i = 0; i<hmms.size(); i++){
- results[i] = QList<UHMM3SWSearchTaskDomainResult>();
- overlaps[i] = QList<UHMM3SWSearchTaskDomainResult>();
- }
- swTask = getSWSubtask();
- if(NULL == swTask) {
- assert(hasError());
- return res;
- }
- res << swTask;
- } else {
- if(swTask != subTask) {
- assert(0 && "undefined_subtask_finished");
- }
- }
-
- return res;
-}
-
-void UHMM3SWSearchTask::onRegion(SequenceWalkerSubtask* t, TaskStateInfo& ti) {
- assert(NULL != t);
- if(stateInfo.hasError() || ti.hasError()) {
- return;
- }
-
- const char* seq = t->getRegionSequence();
- int seqLen = t->getRegionSequenceLen();
-
- UHMM3SearchTaskLocalStorage::createTaskContext(t->getTaskId());
- int wholeSeqSz = t->getGlobalConfig().seqSize;
- wholeSeqSz = t->isAminoTranslated() ? (wholeSeqSz / 3) : wholeSeqSz;
- QList<UHMM3SearchResult > generalResults;
- foreach(const P7_HMM* hmm, hmms){
- UHMM3SearchResult generalRes = UHMM3Search::search(hmm, seq, seqLen, settings.inner, ti, wholeSeqSz);
- generalResults.append(generalRes);
- if(ti.hasError()) {
- UHMM3SearchTaskLocalStorage::freeTaskContext(t->getTaskId());
- return;
- }
- }
-
- QMutexLocker locker(&writeResultsMtx);
- for(int i = 0; i<hmms.size(); i++){
- if (i < generalResults.size()){
- UHMM3SearchResult generalRes = generalResults[i];
- writeResults(generalRes.domainResList, t, results[i], overlaps[i], hmms.at(i)->M);
- }
- }
- UHMM3SearchTaskLocalStorage::freeTaskContext(t->getTaskId());
-}
-
-void UHMM3SWSearchTask::writeResults(const QList<UHMM3SearchSeqDomainResult> & domains, SequenceWalkerSubtask * t,
- QList<UHMM3SWSearchTaskDomainResult> & results, QList<UHMM3SWSearchTaskDomainResult> & overlaps, int halfOverlap) {
- bool isCompl = t->isDNAComplemented();
- bool isAmino = t->isAminoTranslated();
- U2Region globalRegion = t->getGlobalRegion();
- foreach(const UHMM3SearchSeqDomainResult& domainRes, domains) {
- UHMM3SWSearchTaskDomainResult res;
- res.generalResult = domainRes;
- res.onCompl = isCompl;
- res.onAmino = isAmino;
- recountUHMM3SWSearchRegions(res.generalResult, isAmino, isCompl, globalRegion);
- if(t->intersectsWithOverlaps(res.generalResult.seqRegion)) {
- bool add = true;
- if(!res.onCompl && t->hasRightOverlap()) { // if it will be found in a next chunk
- U2Region nextChunkRegion(globalRegion.endPos() - halfOverlap, halfOverlap);
- add = !nextChunkRegion.contains(res.generalResult.seqRegion);
- } else if(res.onCompl && t->hasLeftOverlap()) { // if it will be found on prev chunk
- U2Region prevChunkRegion(globalRegion.startPos, halfOverlap);
- add = !prevChunkRegion.contains(res.generalResult.seqRegion);
- }
- if(add) {
- res.borderResult = (t->hasLeftOverlap() && res.generalResult.seqRegion.startPos == globalRegion.startPos) ||
- (t->hasRightOverlap() && res.generalResult.seqRegion.endPos() == globalRegion.endPos());
- overlaps.append(res);
- }
- } else { // no intersections
- results.append(res);
- }
- }
-}
-
-QList< TaskResourceUsage > UHMM3SWSearchTask::getResources(SequenceWalkerSubtask * t) {
- assert(NULL != t);
- assert(!sequence.isNull() && !hmms.isEmpty());
-
- QList< TaskResourceUsage > res;
- int howManyMem = 0;
- for(int i = 0; i<hmms.size(); i++){
- int tmp = countSearchMemInMB(t->getRegionSequenceLen(), hmms.at(i)->M);
- if (howManyMem < tmp){
- howManyMem = tmp;
- }
- }
- res << TaskResourceUsage(RESOURCE_MEMORY, howManyMem);
- algoLog.trace(QString("%1 requires %2 of memory").arg(getTaskName()).arg(howManyMem));
- return res;
-}
-
-/* Same as in HMMSearchTask */
-void
-UHMM3SWSearchTask::processOverlaps(QList<UHMM3SWSearchTaskDomainResult> & overlaps,
- QList<UHMM3SWSearchTaskDomainResult> & results, int maxCommonLen) {
- for(int i = 0; i < overlaps.count(); ++i){
- UHMM3SWSearchTaskDomainResult & r1 = overlaps[i];
- if (r1.filtered) {
- continue;
- }
- for(int j = i + 1; j < overlaps.count(); ++j){
- UHMM3SWSearchTaskDomainResult & r2 = overlaps[j];
- if (r2.filtered) {
- continue;
- }
- if (r1.onCompl != r2.onCompl) { //check both regions are on the same strand
- continue;
- }
- if (r1.onAmino) { //check both regions have the same amino frame
- int s1 = r1.onCompl ? r1.generalResult.seqRegion.endPos() % 3 : r1.generalResult.seqRegion.startPos % 3;
- int s2 = r2.onCompl ? r2.generalResult.seqRegion.endPos() % 3 : r2.generalResult.seqRegion.startPos % 3;
- if (s1 != s2) {
- continue;
- }
- }
- if (r1.generalResult.seqRegion.contains(r2.generalResult.seqRegion) &&
- r1.generalResult.seqRegion != r2.generalResult.seqRegion) {
- r2.filtered = true;
- } else if (r2.generalResult.seqRegion.contains(r1.generalResult.seqRegion) &&
- r2.generalResult.seqRegion != r1.generalResult.seqRegion) {
- r1.filtered = true;
- break;
- } else if (r1.generalResult.seqRegion.intersect(r2.generalResult.seqRegion).length >= maxCommonLen) {
- bool useR1 = r2.generalResult.score <= r1.generalResult.score;
- if (r1.generalResult.score == r2.generalResult.score && r1.generalResult.ival == r2.generalResult.ival
- && r1.borderResult && !r2.borderResult) {
- useR1 = false;
- }
- if (useR1) {
- r2.filtered = true;
- } else {
- r1.filtered = true;
- break;
- }
- }
- }
- }
-
- foreach(const UHMM3SWSearchTaskDomainResult& r, overlaps) {
- if (!r.filtered) {
- results.append(r);
- }
- }
-}
-
-bool
-UHMM3SWSearchTask::uhmm3SearchDomainResultLessThan(const UHMM3SWSearchTaskDomainResult & r1, const UHMM3SWSearchTaskDomainResult & r2) {
- if(r1.generalResult.score == r2.generalResult.score) {
- if(r1.generalResult.seqRegion == r2.generalResult.seqRegion) {
- if(r1.onCompl == r2.onCompl) {
- return &r1 < &r2;
- }
- return r2.onCompl;
- }
- return r1.generalResult.seqRegion < r2.generalResult.seqRegion;
- }
- return r1.generalResult.score > r2.generalResult.score;
-}
-
-Task::ReportResult UHMM3SWSearchTask::report() {
- if(hasError()) {
- return ReportResult_Finished;
- }
- for(int i = 0; i<hmms.size(); i++){
- processOverlaps(overlaps[i], results[i], hmms.at(i)->M / 2);
- qSort(results[i].begin(), results[i].end(), uhmm3SearchDomainResultLessThan);
- }
- return ReportResult_Finished;
-}
-
-bool UHMM3SWSearchTask::checkAlphabets(int hmmAl, const DNAAlphabet* seqAl) {
- assert(!hasError());
- assert(NULL != seqAl);
- assert(0 <= hmmAl);
-
- if(eslUNKNOWN == hmmAl || eslNONSTANDARD == hmmAl) {
- stateInfo.setError(tr("Unrecognized alphabet of sequence"));
- return false;
- }
- if( seqAl->isRaw() ) {
- stateInfo.setError(tr("Invalid alphabet of sequence"));
- return false;
- }
-
- if(eslDNA == hmmAl || eslRNA == hmmAl) {
- if (seqAl->isAmino()) {
- stateInfo.setError(tr("Cannot search for nucleic HMM profile in amino sequence"));
- return false;
- }
- }
- return true;
-}
-
-bool UHMM3SWSearchTask::setTranslations(int hmmAl, const DNAAlphabet* seqAl) {
- assert(!hasError());
- assert(NULL != seqAl);
- assert(0 <= hmmAl);
-
- if(seqAl->isNucleic()) {
- DNATranslationRegistry* transReg = AppContext::getDNATranslationRegistry();
- assert(NULL != transReg);
- DNATranslation* complTT = transReg->lookupComplementTranslation(seqAl);
- if (complTT != NULL) {
- complTranslation = complTT;
- }
- if(hmmAl == eslAMINO) {
- QList< DNATranslation* > aminoTs = transReg->lookupTranslation(seqAl, DNATranslationType_NUCL_2_AMINO);
- if(!aminoTs.empty()) {
- aminoTranslation = transReg->getStandardGeneticCodeTranslation(seqAl);
- }
- }
- } else {
- if(!seqAl->isAmino()) {
- stateInfo.setError("unrecognized_sequence_alphabet_found");
- return false;
- }
- }
-
- return true;
-}
-
-QList<UHMM3SWSearchTaskDomainResult> UHMM3SWSearchTask::getResults() const {
- QList<UHMM3SWSearchTaskDomainResult> res;
- for(int i = 0; i<hmms.size(); i++){
- res.append(results[i]);
- }
- return res;
-}
-
-QList< SharedAnnotationData >
-UHMM3SWSearchTask::getResultsAsAnnotations(const QList<UHMM3SWSearchTaskDomainResult> & results, const P7_HMM * hmm, U2FeatureType type, const QString & name) {
- assert(!name.isEmpty());
- QList< SharedAnnotationData > annotations;
-
- foreach(const UHMM3SWSearchTaskDomainResult & res, results) {
- SharedAnnotationData annData(new AnnotationData());
- annData->type = type;
- annData->name = name;
- annData->setStrand(res.onCompl ? U2Strand::Complementary : U2Strand::Direct);
- annData->location->regions << res.generalResult.seqRegion;
-
- assert(NULL != hmm);
- QString hmmInfo = hmm->name;
- if(NULL != hmm->acc) {
- hmmInfo += QString().sprintf("\n Accession number in PFAM database: %s", hmm->acc);
- }
- if(NULL != hmm->desc) {
- hmmInfo += QString().sprintf("\n Description: %s", hmm->desc);
- }
- assert(!hmmInfo.isEmpty());
- annData->qualifiers << U2Qualifier("HMM_model", hmmInfo);
- res.generalResult.writeQualifiersToAnnotation(annData);
-
- annotations << annData;
- }
-
- return annotations;
-}
-
-QList<SharedAnnotationData> UHMM3SWSearchTask::getResultsAsAnnotations(U2FeatureType aType, const QString & aname) {
- QList< SharedAnnotationData > res;
- for(int i = 0; i<hmms.size(); i++){
- res.append(getResultsAsAnnotations(results[i], hmms.at(i), aType, aname));
- }
- return res;
-}
-
-/*****************************************************
- * UHMM3SearchTaskSettings
- *****************************************************/
-
-UHMM3SearchTaskSettings::UHMM3SearchTaskSettings() {
- setDefaultUHMM3SearchSettings(&inner);
-}
-
-/*****************************************************
-* UHMM3SearchTask
-*****************************************************/
-
-UHMM3SearchTask::UHMM3SearchTask(const UHMM3SearchTaskSettings &_settings, const QList<const P7_HMM *>& _hmmProfiles, const QByteArray &_sequence)
- : Task(tr("HMM search task"), TaskFlag_None),
- settings(_settings), hmmProfiles(_hmmProfiles), sequence(_sequence)
-{
- assert(!hmmProfiles.isEmpty() && "Bad HMM profile given");
- assert(hmmProfiles.first()->name);
-
- if (hmmProfiles.size() == 1){
- setTaskName(tr("HMM search with '%1'").arg(hmmProfiles.first()->name));
- }else{
- setTaskName(tr("HMM search with %1 profiles").arg(hmmProfiles.size()));
- }
-
-}
-
-void UHMM3SearchTask::prepare() {
- assert(!hmmProfiles.isEmpty() && "Bad HMM profile given");
- assert(hmmProfiles.first()->M > 0);
-
- int howManyMem = 0;
- for(int i = 0; i<hmmProfiles.size(); i++){
- int tmp = countSearchMemInMB(sequence.length(), hmmProfiles.at(i)->M);
- if (howManyMem < tmp){
- howManyMem = tmp;
- }
- }
-
- addTaskResource(TaskResourceUsage(RESOURCE_MEMORY, howManyMem));
-
- algoLog.trace(QString("%1 needs %2 of memory").arg(getTaskName()).arg(howManyMem));
-}
-
-void UHMM3SearchTask::run() {
- UHMM3SearchTaskLocalStorage::createTaskContext(getTaskId());
- foreach(const P7_HMM * hmm, hmmProfiles){
- UHMM3SearchResult res = UHMM3Search::search(hmm, sequence.data(), sequence.length(), settings.inner, stateInfo, sequence.length());
- result.append(res);
- }
- UHMM3SearchTaskLocalStorage::freeTaskContext(getTaskId());
-}
-
-/*****************************************************
-* UHMM3LoadProfileAndSearchTask
-*****************************************************/
-
-UHMM3LoadProfileAndSearchTask::UHMM3LoadProfileAndSearchTask(const UHMM3SearchTaskSettings &_settings, const QString &_hmmProfileFile, const QByteArray &_sequence)
- : Task(tr("HMM search with '%1' HMM profile file").arg(_hmmProfileFile), TaskFlags_NR_FOSCOE),
- loadHmmProfileTask(0), hmmSearchTask(0),
- settings(_settings), sequence(_sequence)
-{
- IOAdapterFactory *iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(IOAdapterUtils::url2io(_hmmProfileFile));
- assert(iof);
-
- loadHmmProfileTask = new LoadDocumentTask(UHMMFormat::UHHMER_FORMAT_ID, _hmmProfileFile, iof);
- addSubTask(loadHmmProfileTask);
-}
-
-QList<Task*> UHMM3LoadProfileAndSearchTask::onSubTaskFinished(Task* subTask) {
- QList<Task*> subTasks;
-
- propagateSubtaskError();
- if(subTask->hasError() || subTask->isCanceled()) {
- return subTasks;
- }
-
- if (loadHmmProfileTask == subTask) {
- QList<const P7_HMM *> hmmProfiles = UHMM3Utilities::getHmmsFromDocument(loadHmmProfileTask->getDocument(), stateInfo);
- assert(!hmmProfiles.isEmpty() && "Bad HMM profile");
-
- hmmSearchTask = new UHMM3SearchTask(settings, hmmProfiles, sequence);
- subTasks << hmmSearchTask;
- }
- else if (hmmSearchTask == subTask) {
- // pass
- }
- else {
- assert(!"Undefined task");
- }
-
- return subTasks;
-}
-
-} // U2
diff --git a/src/plugins_3rdparty/hmm3/src/search/uHMM3SearchTask.h b/src/plugins_3rdparty/hmm3/src/search/uHMM3SearchTask.h
deleted file mode 100644
index 17b7672..0000000
--- a/src/plugins_3rdparty/hmm3/src/search/uHMM3SearchTask.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#ifndef _GB2_UHMM3_SEARCH_TASK_H_
-#define _GB2_UHMM3_SEARCH_TASK_H_
-
-#include <QtCore/QMutex>
-
-#include <U2Core/Task.h>
-#include <U2Core/SequenceWalkerTask.h>
-#include <U2Core/CreateAnnotationTask.h>
-#include <U2Core/DNASequence.h>
-#include <U2Core/DNATranslation.h>
-
-#include "uhmm3search.h"
-
-namespace U2 {
-
-class AnnotationTableObject;
-class LoadDocumentTask;
-
-/**************************************
-* General hmmer3 search task.
-**************************************/
-
-/* envelope over hmmer3 search settings */
-class UHMM3SearchTaskSettings {
-public:
- UHMM3SearchSettings inner;
- UHMM3SearchTaskSettings();
-}; // UHMMER3SearchTaskSettings
-
-class UHMM3SearchTask : public Task {
- Q_OBJECT
-
-public:
- UHMM3SearchTask(const UHMM3SearchTaskSettings &settings, const QList<const P7_HMM *>& hmmProfiles, const QByteArray &sequence);
-
- virtual void prepare();
- virtual void run();
-
- QList<UHMM3SearchResult> getResult() const {
- assert(isFinished());
- return result;
- }
-
-private:
- UHMM3SearchTaskSettings settings;
- QList<const P7_HMM *> hmmProfiles;
- QByteArray sequence;
- QList<UHMM3SearchResult> result;
-
-}; // UHMM3SearchTask
-
-class UHMM3LoadProfileAndSearchTask : public Task {
- Q_OBJECT
-
-public:
- UHMM3LoadProfileAndSearchTask(const UHMM3SearchTaskSettings &settings, const QString &hmmProfileFile, const QByteArray &sequence);
-
- QList<UHMM3SearchResult> getResult() const {
- assert(isFinished());
- return hmmSearchTask->getResult();
- }
-
-protected:
- virtual QList<Task*> onSubTaskFinished(Task *subTask);
-
-private:
- LoadDocumentTask *loadHmmProfileTask;
- UHMM3SearchTask *hmmSearchTask;
-
- UHMM3SearchTaskSettings settings;
- QByteArray sequence;
-
-}; // UHMM3LoadProfileAndSearchTask
-
-
-/**************************************
-* Sequence walker version of hmmer3 search task.
-**************************************/
-/* we cover only domains results here */
-class UHMM3SWSearchTaskDomainResult {
-public:
- UHMM3SWSearchTaskDomainResult() : onCompl(false), onAmino(false), borderResult(false), filtered(false) {}
- UHMM3SearchSeqDomainResult generalResult;
- bool onCompl;
- bool onAmino;
- bool borderResult;
- bool filtered;
-
-}; // UHMM3SWSearchTaskDomainResult
-
-class UHMM3SWSearchTask : public Task, SequenceWalkerCallback {
- Q_OBJECT
-public:
- static const int DEFAULT_CHUNK_SIZE = 1000000; // 1 MB
- static void writeResults(const QList<UHMM3SearchSeqDomainResult> & domains, SequenceWalkerSubtask * t,
- QList<UHMM3SWSearchTaskDomainResult> & result, QList<UHMM3SWSearchTaskDomainResult> & overlaps,
- int halfOverlap);
- static void processOverlaps(QList<UHMM3SWSearchTaskDomainResult> & overlaps, QList<UHMM3SWSearchTaskDomainResult> & results,
- int maxCommonLen);
-
- static bool uhmm3SearchDomainResultLessThan(const UHMM3SWSearchTaskDomainResult & r1, const UHMM3SWSearchTaskDomainResult & r2);
-
-public:
- UHMM3SWSearchTask(const P7_HMM* hmm, const DNASequence& sequence,
- const UHMM3SearchTaskSettings& set, int chunk = DEFAULT_CHUNK_SIZE);
-
- UHMM3SWSearchTask(const QString& hmmFilename, const DNASequence& sequence,
- const UHMM3SearchTaskSettings&, int chunk = DEFAULT_CHUNK_SIZE);
-
- virtual void prepare();
-
- QList<UHMM3SWSearchTaskDomainResult> getResults() const;
-
- static QList< SharedAnnotationData > getResultsAsAnnotations(const QList<UHMM3SWSearchTaskDomainResult> & results,
- const P7_HMM * hmm, U2FeatureType type, const QString & name);
-
- QList<SharedAnnotationData> getResultsAsAnnotations(U2FeatureType aType, const QString & aname);
-
- QList<Task *> onSubTaskFinished(Task* subTask);
-
- virtual void onRegion(SequenceWalkerSubtask* t, TaskStateInfo& ti);
-
- virtual QList< TaskResourceUsage > getResources(SequenceWalkerSubtask * t);
-
- ReportResult report();
-
-private:
- bool setTranslations(int hmmAl, const DNAAlphabet* seqAl);
- bool checkAlphabets(int hmmAl, const DNAAlphabet* seqAl);
- SequenceWalkerTask* getSWSubtask();
-
-private:
- QList<const P7_HMM*> hmms;
- DNASequence sequence;
- UHMM3SearchTaskSettings settings;
- DNATranslation* complTranslation;
- DNATranslation* aminoTranslation;
- QMap<int, QList<UHMM3SWSearchTaskDomainResult> > results;
- QMap<int, QList<UHMM3SWSearchTaskDomainResult> > overlaps;
- QMutex writeResultsMtx;
- SequenceWalkerTask* swTask;
- LoadDocumentTask* loadHmmTask;
- QString hmmFilename;
- int searchChunkSize;
-
-}; // UHMM3SWSearchTask
-
-} // U2
-
-#endif // _GB2_UHMM3_SEARCH_TASK_H_
diff --git a/src/plugins_3rdparty/hmm3/src/search/uhmm3QDActor.cpp b/src/plugins_3rdparty/hmm3/src/search/uhmm3QDActor.cpp
deleted file mode 100644
index ad63c3e..0000000
--- a/src/plugins_3rdparty/hmm3/src/search/uhmm3QDActor.cpp
+++ /dev/null
@@ -1,294 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include "uhmm3QDActor.h"
-#include <gobject/uHMMObject.h>
-#include <search/uHMM3SearchTask.h>
-#include <search/uHMM3SearchDialogImpl.h>
-
-#include <U2Core/FailTask.h>
-#include <U2Core/AppContext.h>
-#include <U2Core/TaskSignalMapper.h>
-#include <U2Core/DNASequenceObject.h>
-
-#include <U2Gui/DialogUtils.h>
-
-#include <U2Designer/DelegateEditors.h>
-
-#include <U2Lang/BaseTypes.h>
-
-
-namespace U2 {
-
-static const QString PROFILE_ATTR("hmm-profile");
-static const QString NSEQ_ATTR("seqs-num");
-static const QString DOM_E_ATTR("e-val");
-static const QString DOM_T_ATTR("score");
-static const QString NO_BIAS_ATTR("no-bias-filter");
-static const QString NO_NULL_ATTR("no-score-corrections");
-static const QString DO_MAX_ATTR("do-max");
-static const QString F1_ATTR("msv-filter-threshold");
-static const QString F2_ATTR("viterbi-filter-threshold");
-static const QString F3_ATTR("forward-filter-threshold");
-static const QString SEED_ATTR("random-generator-seed");
-static const QString USE_EVAL("use-e-val");
-static const QString MIN_LEN("min-len");
-static const QString MAX_LEN("max-len");
-
-UHMM3QDActor::UHMM3QDActor( QDActorPrototype const* proto ) : QDActor(proto) {
- units["hmm"] = new QDSchemeUnit(this);
- cfg->setAnnotationKey("hmm_signal");
- CHECK(NULL != proto->getEditor(), );
- PropertyDelegate* evpd = proto->getEditor()->getDelegate(DOM_E_ATTR);
- connect(evpd, SIGNAL(si_valueChanged(int)), SLOT(sl_evChanged(int)));
-}
-
-int UHMM3QDActor::getMinResultLen() const {
- return cfg->getParameter(MIN_LEN)->getAttributeValueWithoutScript<int>();
-}
-
-int UHMM3QDActor::getMaxResultLen() const {
- return cfg->getParameter(MAX_LEN)->getAttributeValueWithoutScript<int>();
-}
-
-QString UHMM3QDActor::getText() const {
- return tr("HMM3");
-}
-
-Task* UHMM3QDActor::getAlgorithmTask( const QVector<U2Region>& location ) {
- const DNASequence& dnaSeq = scheme->getSequence();
- const char* seq = dnaSeq.seq.constData();
-
- QMap<QString, Attribute*> params = cfg->getParameters();
- QString hmmFileStr = params.value(PROFILE_ATTR)->getAttributeValueWithoutScript<QString>();
- QStringList hmmFiles = hmmFileStr.split(QRegExp("\\s*;\\s*"));
-
- Task* t = new Task(tr("QD HMM3 search"), TaskFlag_NoRun);
-
- UHMM3SearchTaskSettings stngs;
-
- float domENum = cfg->getParameter(DOM_E_ATTR)->getAttributeValueWithoutScript<int>();
- bool filterByEvalue = cfg->getParameter(USE_EVAL)->getAttributeValueWithoutScript<bool>();
- if (filterByEvalue) {
- stngs.inner.domE = pow(10, domENum);
- stngs.inner.domT = OPTION_NOT_SET;
- } else {
- stngs.inner.domT = cfg->getParameter(DOM_T_ATTR)->getAttributeValueWithoutScript<double>();
- }
-
- stngs.inner.domZ = cfg->getParameter(NSEQ_ATTR)->getAttributeValueWithoutScript<double>();
- stngs.inner.noBiasFilter = cfg->getParameter(NO_BIAS_ATTR)->getAttributeValueWithoutScript<bool>();
- stngs.inner.noNull2 = cfg->getParameter(NO_NULL_ATTR)->getAttributeValueWithoutScript<bool>();
- stngs.inner.doMax = cfg->getParameter(DO_MAX_ATTR)->getAttributeValueWithoutScript<bool>();
- stngs.inner.f1 = (float)cfg->getParameter(F1_ATTR)->getAttributeValueWithoutScript<double>();
- stngs.inner.f2 = (float)cfg->getParameter(F2_ATTR)->getAttributeValueWithoutScript<double>();
- stngs.inner.f3 = (float)cfg->getParameter(F3_ATTR)->getAttributeValueWithoutScript<double>();
- stngs.inner.seed = cfg->getParameter(SEED_ATTR)->getAttributeValueWithoutScript<int>();
-
- foreach(QString hmmFile, hmmFiles) {
- foreach(U2Region r, location) {
- DNASequence sequence;
- sequence.seq = QByteArray(seq + r.startPos, r.length);
- sequence.alphabet = dnaSeq.alphabet;
-
- UHMM3SWSearchTask* st = new UHMM3SWSearchTask(hmmFile, sequence, stngs);
- t->addSubTask(st);
- offsets[st] = r.startPos;
- }
- }
-
- connect(new TaskSignalMapper(t), SIGNAL(si_taskFinished(Task*)), SLOT(sl_onTaskFinished(Task*)));
-
- return t;
-}
-
-void UHMM3QDActor::sl_onTaskFinished(Task*) {
- QString aname = cfg->getAnnotationKey();
- foreach(UHMM3SWSearchTask* t, offsets.keys()) {
- QList<SharedAnnotationData> annotations = t->getResultsAsAnnotations(U2FeatureTypes::MiscSignal, aname);
- int offset = offsets.value(t);
- foreach(SharedAnnotationData d, annotations) {
- U2Region r = d->location->regions.first();
- if (r.length < getMinResultLen() || r.length > getMaxResultLen()) {
- continue;
- }
- r.startPos+=offset;
- QDResultUnit ru(new QDResultUnitData);
- ru->strand = d->getStrand();
- ru->quals = d->qualifiers;
- ru->region = r;
- ru->owner = units.value("hmm");
- QDResultGroup::buildGroupFromSingleResult(ru, results);
- }
- }
- offsets.clear();
-}
-
-void UHMM3QDActor::sl_evChanged(int i) {
- CHECK(NULL != proto->getEditor(), );
- PropertyDelegate* pd = proto->getEditor()->getDelegate(DOM_E_ATTR);
- SpinBoxDelegate* evpd = qobject_cast<SpinBoxDelegate*>(pd);
- assert(evpd);
- if (i>0) {
- evpd->setEditorProperty("prefix", "1e+");
- } else {
- evpd->setEditorProperty("prefix", "1e");
- }
-}
-
-void UHMM3QDActor::updateEditor() {
- int val = cfg->getParameter(DOM_E_ATTR)->getAttributeValueWithoutScript<int>();
- sl_evChanged(val);
-}
-
-//Prototype
-//////////////////////////////////////////////////////////////////////////
-
-UHMM3QDActorPrototype::UHMM3QDActorPrototype() {
- descriptor.setId("hmm3");
- descriptor.setDisplayName(UHMM3QDActor::tr("HMM3"));
- descriptor.setDocumentation(UHMM3QDActor::tr(
- "Searches HMM signals in a sequence with one or more profile HMM"
- " and saves the results as annotations."));
-
- {
- Descriptor pd(PROFILE_ATTR,
- UHMM3QDActor::tr("Profile HMM"),
- UHMM3QDActor::tr("Semicolon-separated list of input HMM files."));
-
- Descriptor mind(MIN_LEN,
- UHMM3QDActor::tr("Min Length"),
- UHMM3QDActor::tr("Minimum length of a result region."));
-
- Descriptor maxd(MAX_LEN,
- UHMM3QDActor::tr("Max Length"),
- UHMM3QDActor::tr("Maximum length of a result region."));
-
- // Parameters controlling reporting thresholds
- //
- Descriptor useEvd(USE_EVAL,
- UHMM3QDActor::tr("Use E-value"),
- UHMM3QDActor::tr("Filters by E-value if true. Otherwise filters by score."));
-
- Descriptor ded(DOM_E_ATTR,
- UHMM3QDActor::tr("Filter by High E-value"),
- UHMM3QDActor::tr("Reports domains <= this E-value threshold"
- " in output."));
-
- Descriptor dtd(DOM_T_ATTR,
- UHMM3QDActor::tr("Filter by Low Score"),
- UHMM3QDActor::tr("Reports domains >= this score cutoff in output."));
-
- // Parameters controlling the acceleration pipeline
- //
- Descriptor md(DO_MAX_ATTR,
- UHMM3QDActor::tr("Max"),
- UHMM3QDActor::tr(
- "Turns off all acceleration heuristic filters. This increases"
- " sensitivity somewhat, at a large cost in speed."));
-
- Descriptor f1d(F1_ATTR,
- UHMM3QDActor::tr("MSV Filter Threshold"),
- UHMM3QDActor::tr("P-value threshold for the MSV filter step"
- " of the acceleration pipeline."));
-
- Descriptor f2d(F2_ATTR,
- UHMM3QDActor::tr("Viterbi Filter Threshold"),
- UHMM3QDActor::tr("P-value threshold for the Viterbi filter step"
- " of the acceleration pipeline."));
-
- Descriptor f3d(F3_ATTR,
- UHMM3QDActor::tr("Forward Filter Threshold"),
- UHMM3QDActor::tr("P-value threshold for the Forward filter step"
- " of the acceleration pipeline."));
-
- Descriptor bd(NO_BIAS_ATTR,
- UHMM3QDActor::tr("No Bias Filter"),
- UHMM3QDActor::tr("Turns off composition bias filter. This increases"
- " sensitivity somewhat, but can come at a high cost in"
- " speed."));
-
- // Other parameters
- //
- Descriptor nd(NO_NULL_ATTR,
- UHMM3QDActor::tr("No Null2"),
- UHMM3QDActor::tr("Turns off the null2 score corrections for"
- " biased composition."));
-
- Descriptor nsd(NSEQ_ATTR,
- UHMM3QDActor::tr("Number of Sequences"),
- UHMM3QDActor::tr("Specifies number of significant sequences."
- " It is used for domain E-value calculations."));
-
- Descriptor sd(SEED_ATTR,
- UHMM3QDActor::tr("Seed"),
- UHMM3QDActor::tr("Random number seed. The default is to use"
- " a fixed seed(42), so that results are exactly reproducible."
- " Any other positive integer will give different (but also"
- " reproducible) results. A choice of 0 uses a randomly"
- " chosen seed."));
-
- attributes << new Attribute(pd, BaseTypes::STRING_TYPE(), true);
- attributes << new Attribute(mind, BaseTypes::NUM_TYPE(), false, QVariant(10));
- attributes << new Attribute(maxd, BaseTypes::NUM_TYPE(), false, QVariant(1000));
- attributes << new Attribute(useEvd, BaseTypes::BOOL_TYPE(), false, true);
- attributes << new Attribute(ded, BaseTypes::NUM_TYPE(), false, QVariant(1));
- attributes << new Attribute(dtd, BaseTypes::NUM_TYPE(), false, QVariant(0.01));
-
- attributes << new Attribute(md, BaseTypes::BOOL_TYPE(), false, QVariant(false));
- attributes << new Attribute(f1d, BaseTypes::NUM_TYPE(), false, QVariant(0.02));
- attributes << new Attribute(f2d, BaseTypes::NUM_TYPE(), false, QVariant(0.001));
- attributes << new Attribute(f3d, BaseTypes::NUM_TYPE(), false, QVariant(0.00001));
- attributes << new Attribute(bd, BaseTypes::BOOL_TYPE(), false, QVariant(false));
- attributes << new Attribute(nd, BaseTypes::BOOL_TYPE(), false, QVariant(false));
- attributes << new Attribute(nsd, BaseTypes::NUM_TYPE(), false, QVariant(1));
- attributes << new Attribute(sd, BaseTypes::NUM_TYPE(), false, QVariant(42));
- }
-
- QMap<QString, PropertyDelegate*> delegates;
-
- {
- delegates[PROFILE_ATTR] = new URLDelegate(
- DialogUtils::prepareDocumentsFileFilterByObjType(UHMMObject::UHMM_OT, true),
- UHMM3SearchDialogImpl::HMM_FILES_DIR_ID, true/*multi*/);
-
- QVariantMap eMap; eMap["prefix"]= ("1e+"); eMap["minimum"] = (-99); eMap["maximum"] = (1);
- delegates[DOM_E_ATTR] = new SpinBoxDelegate(eMap);
-
- QVariantMap nMap; nMap["maximum"] = 999999999.00; nMap["minimum"] = 0.00;
- delegates[NSEQ_ATTR] = new DoubleSpinBoxDelegate(nMap);
-
- QVariantMap tMap; tMap["decimals"]= (2); tMap["minimum"] = 0.01; tMap["maximum"] = 99.99;
- tMap["singleStep"] = (1.0);
- delegates[DOM_T_ATTR] = new DoubleSpinBoxDelegate(tMap);
-
- QVariantMap sMap; sMap["minimum"] = 0; sMap["maximum"] = INT_MAX;
- delegates[SEED_ATTR] = new SpinBoxDelegate(sMap);
-
- QVariantMap lenMap; lenMap["minimum"] = 2; lenMap["maximum"] = INT_MAX;
- delegates[MIN_LEN] = new SpinBoxDelegate(lenMap);
- delegates[MAX_LEN] = new SpinBoxDelegate(lenMap);
- }
-
- editor = new DelegateEditor(delegates);
-}
-
-} //namespace
diff --git a/src/plugins_3rdparty/hmm3/src/search/uhmm3QDActor.h b/src/plugins_3rdparty/hmm3/src/search/uhmm3QDActor.h
deleted file mode 100644
index 79a0fa3..0000000
--- a/src/plugins_3rdparty/hmm3/src/search/uhmm3QDActor.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#ifndef _U2_UHMM3_QDACTOR_H_
-#define _U2_UHMM3_QDACTOR_H_
-
-#include "U2Lang/QDScheme.h"
-#include <U2Lang/QueryDesignerRegistry.h>
-
-
-namespace U2 {
-
-class UHMM3SWSearchTask;
-
-class UHMM3QDActor : public QDActor {
- Q_OBJECT
-public:
- UHMM3QDActor(QDActorPrototype const* proto);
- int getMinResultLen() const;
- int getMaxResultLen() const;
- QString getText() const;
- Task* getAlgorithmTask(const QVector<U2Region>& location);
- QColor defaultColor() const { return QColor(0x66,0xa3,0xd2); }
- virtual bool hasStrand() const { return false; }
- virtual void updateEditor();
-private slots:
- void sl_onTaskFinished(Task*);
- void sl_evChanged(int);
-private:
- QMap<UHMM3SWSearchTask*, qint64> offsets;
-};
-
-class UHMM3QDActorPrototype : public QDActorPrototype {
-public:
- UHMM3QDActorPrototype();
- QIcon getIcon() const { return QIcon(":hmm3/images/hmmer_16.png"); }
- QDActor* createInstance() const { return new UHMM3QDActor(this); }
-};
-
-} //namespace
-
-#endif
diff --git a/src/plugins_3rdparty/hmm3/src/search/uhmm3SearchResult.cpp b/src/plugins_3rdparty/hmm3/src/search/uhmm3SearchResult.cpp
deleted file mode 100644
index 98eec03..0000000
--- a/src/plugins_3rdparty/hmm3/src/search/uhmm3SearchResult.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include <math.h>
-#include <cassert>
-
-#include "uhmm3SearchResult.h"
-
-using namespace U2;
-
-static UHMM3SearchSeqDomainResult getDomainRes( const P7_HIT* hit, int dom, const P7_PIPELINE* pli ) {
- assert( NULL != hit );
- assert( 0 <= dom && dom < hit->ndom );
- UHMM3SearchSeqDomainResult res;
- const P7_DOMAIN& domain = hit->dcl[dom];
-
- res.isSignificant = domain.is_included;
- res.score = domain.bitscore;
- res.bias = domain.dombias;
- res.ival = domain.pvalue * pli->Z;
- res.cval = domain.pvalue * pli->domZ;
- res.queryRegion = U2Region( domain.ad->hmmfrom, domain.ad->hmmto - domain.ad->hmmfrom );
- res.seqRegion = U2Region( domain.ad->sqfrom - 1, domain.ad->sqto - domain.ad->sqfrom + 1 );
- res.envRegion = U2Region( domain.ienv, domain.jenv - domain.ienv );
- res.acc = domain.oasc / ( 1.0 + fabs( (float)( domain.jenv - domain.ienv ) ) );
- return res;
-}
-
-namespace U2 {
-
-void UHMM3SearchSeqDomainResult::writeQualifiersToAnnotation( AnnotationData * annData ) const {
- assert( NULL != annData );
- annData->qualifiers << U2Qualifier( "Independent_e-value", QString().sprintf( "%.5e", ival ) );
- annData->qualifiers << U2Qualifier( "Conditional_e-value", QString().sprintf( "%.5e", cval ) );
- annData->qualifiers << U2Qualifier( "Score", QString().sprintf( "%1.f", score ) );
- annData->qualifiers << U2Qualifier( "Bias", QString().sprintf( "%.5e", bias ) );
- annData->qualifiers << U2Qualifier( "Accuracy_per_residue", QString().sprintf( "%.5e", acc ) );
- annData->qualifiers << U2Qualifier( "HMM_region", QString().sprintf( "%d...%d", queryRegion.startPos, queryRegion.endPos()) );
- annData->qualifiers << U2Qualifier( "Envelope_of_domain_location", QString().sprintf( "%d...%d",
- envRegion.startPos, envRegion.endPos() ) );
-}
-
-void UHMM3SearchResult::fillDomainsResult( const P7_TOPHITS* th, const P7_PIPELINE* pli ) {
- assert( NULL != th && ( 0 == th->N || 1 == th->N ) );
- assert( NULL != pli );
-
- if( 0 == th->N ) {
- return;
- }
-
- P7_HIT* hit = th->hit[0];
-
- int d = 0;
- for( d = 0; d < hit->ndom; d++ ) {
- if( hit->dcl[d].is_reported ) {
- domainResList << getDomainRes( hit, d, pli );
- }
- }
-}
-
-void UHMM3SearchResult::fillFullSeqResults( const P7_TOPHITS* th, const P7_PIPELINE* pli ) {
- assert( NULL != th && ( 1 == th->N || 0 == th->N ) );
- assert( NULL != pli );
- fullSeqResult.isReported = th->nreported != 0;
- if( !th->N ) {
- assert( !fullSeqResult.isReported );
- return;
- }
- P7_HIT* hit = th->hit[0];
- if( !(hit->flags & p7_IS_REPORTED) ) {
- return;
- }
-
- fullSeqResult.eval = hit->pvalue * pli->Z;
- fullSeqResult.score = hit->score;
- fullSeqResult.bias = hit->pre_score - hit->score;
- fullSeqResult.expectedDomainsNum = hit->nexpected;
- fullSeqResult.reportedDomainsNum = hit->nreported;
-}
-
-void UHMM3SearchResult::fillResults( const P7_TOPHITS* th, const P7_PIPELINE* pli ) {
- assert( NULL != th );
- assert( NULL != pli );
- fillFullSeqResults( th, pli );
- fillDomainsResult( th, pli );
-}
-
-} // U2
diff --git a/src/plugins_3rdparty/hmm3/src/search/uhmm3SearchResult.h b/src/plugins_3rdparty/hmm3/src/search/uhmm3SearchResult.h
deleted file mode 100644
index 601272a..0000000
--- a/src/plugins_3rdparty/hmm3/src/search/uhmm3SearchResult.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#ifndef _GB2_UHMM3_SEARCH_RESULT_H_
-#define _GB2_UHMM3_SEARCH_RESULT_H_
-
-#include <U2Core/U2Region.h>
-#include <U2Core/AnnotationData.h>
-
-#include <hmmer3/hmmer.h>
-
-namespace U2 {
-
-class UHMM3SearchSeqDomainResult {
-public:
- float score;
- float bias;
- double ival; /* independent e-value */
- double cval; /* conditional e-value */
-
- U2Region queryRegion; /* hmm region for hmmsearch and seq region for phmmer */
- U2Region seqRegion;
- U2Region envRegion; /* envelope of domains location */
-
- double acc; /* expected accuracy per residue of the alignment */
-
- bool isSignificant; /* domain meets inclusion tresholds */
-
-public:
- void writeQualifiersToAnnotation( AnnotationData * annData ) const;
-
-}; // UHMM3SearchSeqDomainResult
-
-class UHMM3SearchCompleteSeqResult {
-public:
- double eval;
- float score;
- float bias;
- float expectedDomainsNum;
- int reportedDomainsNum;
- bool isReported;
-
- UHMM3SearchCompleteSeqResult() : isReported( false ) {}
-}; // UHMM3SearchCompleteSeqResult
-
-class UHMM3SearchResult {
-public:
- UHMM3SearchCompleteSeqResult fullSeqResult;
- QList< UHMM3SearchSeqDomainResult > domainResList;
-
- void fillResults( const P7_TOPHITS* th, const P7_PIPELINE* pli );
-
-private:
- void fillFullSeqResults( const P7_TOPHITS* th, const P7_PIPELINE* pli );
- void fillDomainsResult( const P7_TOPHITS* th, const P7_PIPELINE* pli );
-
-}; // UHMM3SearchResult
-
-} // U2
-
-#endif // _GB2_UHMM3_SEARCH_RESULT_H_
diff --git a/src/plugins_3rdparty/hmm3/src/search/uhmm3search.cpp b/src/plugins_3rdparty/hmm3/src/search/uhmm3search.cpp
deleted file mode 100644
index 574b616..0000000
--- a/src/plugins_3rdparty/hmm3/src/search/uhmm3search.cpp
+++ /dev/null
@@ -1,157 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include <U2Core/DNAInfo.h>
-
-#include <util/uhmm3Utilities.h>
-#include "uhmm3search.h"
-
-using namespace U2;
-
-static void delAllIfYouCan( ESL_SQ* dbsq, ESL_ALPHABET* abc, P7_BG* bg, P7_PIPELINE* pli, P7_TOPHITS* th, P7_PROFILE* gm, P7_OPROFILE* om ) {
- if( NULL != bg ) { p7_bg_Destroy( bg ); }
- if( NULL != pli ) { p7_pipeline_Destroy( pli ); }
- if( NULL != th ) { p7_tophits_Destroy( th ); }
- if( NULL != gm ) { p7_profile_Destroy( gm ); }
- if( NULL != om ) { p7_oprofile_Destroy( om ); }
- if( NULL != dbsq ) { esl_sq_Destroy( dbsq ); }
- if( NULL != abc ) { esl_alphabet_Destroy( abc ); }
-}
-
-namespace U2 {
-
-const int SEARCH_PERCENT_PER_FILTERS = 20;
-
-/* we catch all exceptions here. don't worry
-*/
-UHMM3SearchResult UHMM3Search::search( const P7_HMM* hmm, const char* sq, int sqLen,
- const UHMM3SearchSettings& settings, TaskStateInfo& tsi, int wholeSeqSz ) {
- ESL_SQ *dbsq = NULL;
- ESL_ALPHABET *abc = NULL; /* copy of abc in hmm */
- P7_BG *bg = NULL;
- P7_PIPELINE *pli = NULL;
- P7_TOPHITS *th = NULL;
- P7_PROFILE *gm = NULL;
- P7_OPROFILE *om = NULL;
- UHMM3SearchResult res;
- QByteArray errStr;
-
- if( tsi.hasError() || tsi.cancelFlag ) {
- return res;
- }
- if( NULL == hmm ) {
- tsi.setError( tr( "Bad HMM profile given" ) );
- return res;
- }
- if( NULL == sq || 0 >= sqLen ) {
- tsi.setError( tr( "Empty sequence given" ) );
- return res;
- }
-
- try {
- // preparing...
- dbsq = esl_sq_CreateFrom( NULL, sq, sqLen, NULL, NULL, NULL );
- if( NULL == dbsq ) {
- errStr = tr( "Run out of memory (creation of sequence failed)" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
- int dbsqAbcType = eslUNKNOWN;
- esl_sq_GuessAlphabet( dbsq, &dbsqAbcType );
- if( eslUNKNOWN != dbsqAbcType && dbsqAbcType != hmm->abc->type ) { /* if we can recognize */
- errStr = tr( "Profile HMM and sequence alphabets no matched" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
-
- abc = esl_alphabet_Create( hmm->abc->type );
- if( NULL == abc ) {
- errStr = tr( "Run out of memory (creation of alphabet failed)" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
- esl_sq_Digitize( abc, dbsq );
- if( NULL == dbsq || NULL == dbsq->dsq ) {
- errStr = tr( "Run out of memory (digitizing of sequence failed)" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
-
- bg = p7_bg_Create( abc );
- if( NULL == bg ) {
- errStr = tr( "Run out of memory (creation of null model failed)" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
-
- gm = p7_profile_Create (hmm->M, abc);
- if( NULL == gm ) {
- errStr = tr( "Run out of memory" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
- om = p7_oprofile_Create(hmm->M, abc);
- if( NULL == om ) {
- errStr = tr( "Run out of memory (creation of optimized profile failed)" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
- p7_ProfileConfig(hmm, bg, gm, 100, p7_LOCAL); /* 100 is a dummy length for now; and MSVFilter requires local mode */
- p7_oprofile_Convert(gm, om); /* <om> is now p7_LOCAL, multihit */
-
- /* Create processing pipeline and hit list */
- pli = p7_pipeline_Create( &settings, om->M, 100, p7_SEARCH_SEQS); /* L_hint = 100 is just a dummy for now */
- if( NULL == pli ) {
- errStr = tr( "Run out of memory" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
- th = p7_tophits_Create();
- if( NULL == th ) {
- errStr = tr( "Run out of memory (top hits list creation failed)" ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
-
- // main part
- p7_pli_NewModel( pli, om, bg );
- p7_pli_NewSeq( pli, dbsq );
- // !!! CODE CHANGED !!!
- //p7_bg_SetLength( bg, dbsq->n );
- p7_bg_SetLength( bg, wholeSeqSz );
- // !!! CODE CHANGED !!!
- //p7_oprofile_ReconfigLength( om, dbsq->n);
- p7_oprofile_ReconfigLength( om, dbsq->n, wholeSeqSz );
-
- tsi.progress = 0;
- int ret = p7_Pipeline( pli, om, bg, dbsq, th, SEARCH_PERCENT_PER_FILTERS, tsi, wholeSeqSz );
- if( eslCANCELED == ret ) {
- errStr = tr( HMMER3_CANCELED_ERROR ).toLatin1();
- throwUHMMER3Exception( errStr.data() );
- }
- assert( eslOK == ret );
-
- // work with results
- p7_tophits_Sort(th);
- p7_tophits_Threshold(th, pli);
- res.fillResults( th, pli );
- } catch( const UHMMER3Exception& ex ) {
- tsi.setError( ex.msg );
- } catch(...) {
- tsi.setError( tr( HMMER3_UNKNOWN_ERROR ) );
- }
-
- delAllIfYouCan( dbsq, abc, bg, pli, th, gm, om );
- return res;
-}
-
-} // U2
diff --git a/src/plugins_3rdparty/hmm3/src/task_local_storage/uHMMSearchTaskLocalData.h b/src/plugins_3rdparty/hmm3/src/task_local_storage/uHMMSearchTaskLocalData.h
deleted file mode 100644
index 312a269..0000000
--- a/src/plugins_3rdparty/hmm3/src/task_local_storage/uHMMSearchTaskLocalData.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#ifndef _GB2_HMMER3_SEARCH_TL_DATA_H_
-#define _GB2_HMMER3_SEARCH_TL_DATA_H_
-
-#include <hmmer3/p7_config.h>
-
-namespace U2 {
-
-struct UHMM3SearchTaskLocalData {
- float flogsum_lookup[p7_LOGSUM_TBL];
-
- UHMM3SearchTaskLocalData();
-}; // UHMM3SearchTaskLocalData
-
-} // U2
-
-#endif // _GB2_HMMER3_SEARCH_TL_DATA_H_
diff --git a/src/plugins_3rdparty/hmm3/src/task_local_storage/uHMMSearchTaskLocalStorage.cpp b/src/plugins_3rdparty/hmm3/src/task_local_storage/uHMMSearchTaskLocalStorage.cpp
deleted file mode 100644
index 422fdd9..0000000
--- a/src/plugins_3rdparty/hmm3/src/task_local_storage/uHMMSearchTaskLocalStorage.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include <cassert>
-
-#include <QtCore/QMutexLocker>
-
-#include "uHMMSearchTaskLocalStorage.h"
-
-namespace U2 {
-
-QHash< qint64, UHMM3SearchTaskLocalData* > UHMM3SearchTaskLocalStorage::data;
-QThreadStorage< ContextId* > UHMM3SearchTaskLocalStorage::tls;
-QMutex UHMM3SearchTaskLocalStorage::mutex;
-const UHMM3SearchTaskLocalData UHMM3SearchTaskLocalStorage::defaultData;
-
-const UHMM3SearchTaskLocalData* UHMM3SearchTaskLocalStorage::current() {
- ContextId* idc = tls.localData();
- if( NULL != idc ) {
- QMutexLocker locker( &mutex );
- UHMM3SearchTaskLocalData* res = data.value( idc->id );
- assert( NULL != res );
- return res;
- } else {
- return &defaultData;
- }
-}
-
-UHMM3SearchTaskLocalData* UHMM3SearchTaskLocalStorage::createTaskContext( qint64 ctxId ) {
- QMutexLocker locker( &mutex );
- assert( !data.contains( ctxId ) );
- UHMM3SearchTaskLocalData* ctx = new UHMM3SearchTaskLocalData();
- data[ctxId] = ctx;
-
- assert(!tls.hasLocalData());
- ContextId* idc = new ContextId( ctxId );
- tls.setLocalData( idc );
-
- return ctx;
-}
-
-void UHMM3SearchTaskLocalStorage::freeTaskContext( qint64 ctxId ) {
- QMutexLocker locker( &mutex );
-
- UHMM3SearchTaskLocalData* lData = data.value( ctxId );
- assert( NULL != lData );
-
- int howMany = data.remove( ctxId );
- Q_UNUSED( howMany );
- assert( 1 == howMany );
-
- delete lData;
-}
-
-} // U2
diff --git a/src/plugins_3rdparty/hmm3/src/tests/uhmmer3BuildTests.cpp b/src/plugins_3rdparty/hmm3/src/tests/uhmmer3BuildTests.cpp
deleted file mode 100644
index 11514d8..0000000
--- a/src/plugins_3rdparty/hmm3/src/tests/uhmmer3BuildTests.cpp
+++ /dev/null
@@ -1,429 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include <U2Core/AppContext.h>
-#include <U2Core/IOAdapter.h>
-#include <U2Core/IOAdapterUtils.h>
-#include <U2Core/TextUtils.h>
-
-#include "uhmmer3BuildTests.h"
-
-namespace U2 {
-
-/**************************
-* GTest_UHMMER3Build
-**************************/
-
-const QString GTest_UHMMER3Build::INPUT_FILE_TAG = "inputFile";
-const QString GTest_UHMMER3Build::OUTPUT_FILE_TAG = "outputFile";
-const QString GTest_UHMMER3Build::DEL_OUTPUT_TAG = "delOutput";
-
-const QString GTest_UHMMER3Build::SEED_OPTION_TAG = "seed";
-
-const QString GTest_UHMMER3Build::MODEL_CONSTRUCTION_OPTION_TAG = "mc";
-const QString GTest_UHMMER3Build::RELATIVE_WEIGHTING_OPTION_TAG = "rw";
-const QString GTest_UHMMER3Build::EFFECTIVE_WEIGHTING_OPTION_TAG = "ew";
-const QString GTest_UHMMER3Build::E_VALUE_CALIBATION_OPTION_TAG = "eval";
-
-static void setSeedOption( UHMM3BuildSettings & settings, TaskStateInfo & stateInfo, const QString& str ) {
- if( str.isEmpty() ) {
- return;
- }
-
- bool ok = false;
- int num = str.toInt( &ok );
- if( !ok || !( 0 <= num ) ) {
- stateInfo.setError( "cannot_parse_option_seed" );
- return;
- }
- settings.seed = num;
-}
-
-static void setModelConstructionOption( UHMM3BuildSettings & settings, TaskStateInfo & stateInfo, const QString& s ) {
- QString str = s.toLower();
- if( str.startsWith( "fast" ) ) {
- settings.archStrategy = p7_ARCH_FAST;
- QString numStr = str.mid( 4 ).trimmed();
-
- if( !numStr.isEmpty() ) {
- bool ok = false;
- float num = numStr.toFloat( &ok );
- if( !ok || !( 0 <= num && 1 >= num ) ) {
- stateInfo.setError( "cannot_parse_mc_option: symfrac" );
- return;
- }
- settings.symfrac = num;
- }
- return;
- }
- if( str.startsWith( "hand" ) ) {
- settings.archStrategy = p7_ARCH_HAND;
- return;
- }
- if( !str.isEmpty() ) {
- stateInfo.setError( "unrecognized_mc_option" );
- }
-}
-
-static void setRelativeWeightingOption( UHMM3BuildSettings & settings, TaskStateInfo & stateInfo, const QString& s ) {
- QString str = s.toLower();
- if( str.startsWith( "wgsc" ) ) {
- settings.wgtStrategy = p7_WGT_GSC;
- return;
- }
- if( str.startsWith( "wblosum" ) ) {
- settings.wgtStrategy = p7_WGT_BLOSUM;
- QString numStr = str.mid( 7 ).trimmed();
- if( !numStr.isEmpty() ) {
- bool ok = false;
- float num = numStr.toFloat( &ok );
- if( !ok || !( 0 <= num && 1 >= num ) ) {
- stateInfo.setError( "cannot_parse_rw_option:wid" );
- return;
- }
- settings.wid = num;
- }
- return;
- }
- if( str.startsWith( "wpb" ) ) {
- settings.wgtStrategy = p7_WGT_PB;
- return;
- }
- if( str.startsWith( "wnone" ) ) {
- settings.wgtStrategy = p7_WGT_NONE;
- return;
- }
- if( str.startsWith( "wgiven" ) ) {
- settings.wgtStrategy = p7_WGT_GIVEN;
- return;
- }
- if( !str.isEmpty() ) {
- stateInfo.setError( "unrecognized_rw_option" );
- }
-}
-
-static void setEffectiveWeightingOption( UHMM3BuildSettings & settings, TaskStateInfo & stateInfo, const QString& s ) {
- QString str = s.toLower();
- if( str.startsWith( "eent" ) ) {
- settings.effnStrategy = p7_EFFN_ENTROPY;
- QString numStr = str.mid( 4 ).trimmed();
-
- if( !numStr.isEmpty() ) {
- QStringList nums = numStr.split( " ", QString::SkipEmptyParts );
-
- if( nums.isEmpty() ) {
- stateInfo.setError( "cannot_parse_ew_option: eent" );
- return;
- }
- bool ok = false;
- double num = nums[0].toDouble( &ok );
- if( !ok ) {
- stateInfo.setError( "cannot_parse_ew_option: ere" );
- return;
- }
- if( num > 0 ) {
- settings.ere = num;
- return;
- }
- }
- return;
- }
- if( str.startsWith( "eclust" ) ) {
- settings.effnStrategy = p7_EFFN_CLUST;
- QString numStr = str.mid( 6 ).trimmed();
-
- if( !numStr.isEmpty() ) {
- bool ok = false;
- double num = numStr.toDouble( &ok );
- if( !ok || !( 0 <= num && 1 >= num ) ) {
- stateInfo.setError( "cannot_parse_ew_option: eid" );
- return;
- }
- settings.eid = num;
- }
- return;
- }
- if( str.startsWith( "enone" ) ) {
- settings.effnStrategy = p7_EFFN_NONE;
- return;
- }
- if( str.startsWith( "eset" ) ) {
- settings.effnStrategy = p7_EFFN_SET;
- QString numStr = str.mid( 4 ).trimmed();
- bool ok = false;
- double num = numStr.toDouble( &ok );
-
- if( !ok || 0 > num ) {
- stateInfo.setError( "cannot_parse_ew_option: eset" );
- return;
- }
- settings.eset = num;
- return;
- }
- if( !str.isEmpty() ) {
- stateInfo.setError( "unrecognized_ew_option" );
- }
-}
-
-static void setEvalueCalibrationOption( UHMM3BuildSettings & settings, TaskStateInfo & stateInfo, const QString& s ) {
- QString str = s.toLower();
- if( str.isEmpty() ) {
- return;
- }
- QStringList l = str.split( " ", QString::SkipEmptyParts );
- if( 5 != l.size() ) {
- stateInfo.setError( "error_parsing_eval_option" );
- return;
- }
- settings.evl = l[0].toInt();
- settings.evn = l[1].toInt();
- settings.efl = l[2].toInt();
- settings.efn = l[3].toInt();
- settings.eft = l[4].toDouble();
-}
-
-void GTest_UHMMER3Build::init( XMLTestFormat *tf, const QDomElement& el ) {
- Q_UNUSED( tf );
-
- inFile = el.attribute( INPUT_FILE_TAG );
- outFile = el.attribute( OUTPUT_FILE_TAG );
-
- QString delOutStr = el.attribute( DEL_OUTPUT_TAG );
- delOutFile = !delOutStr.isEmpty() && delOutStr.toLower() != "no" && delOutStr.toLower() != "n";
- setBuildSettings( bldSettings.inner, el, stateInfo );
-}
-
-void GTest_UHMMER3Build::setBuildSettings( UHMM3BuildSettings & settings, const QDomElement& el, TaskStateInfo & ti ) {
- setModelConstructionOption( settings, ti, el.attribute( MODEL_CONSTRUCTION_OPTION_TAG ) );
- setRelativeWeightingOption( settings, ti, el.attribute( RELATIVE_WEIGHTING_OPTION_TAG ) );
- setEffectiveWeightingOption( settings, ti, el.attribute( EFFECTIVE_WEIGHTING_OPTION_TAG ) );
- setEvalueCalibrationOption( settings, ti, el.attribute( E_VALUE_CALIBATION_OPTION_TAG ) );
- setSeedOption( settings, ti, el.attribute( SEED_OPTION_TAG ) );
-}
-
-void GTest_UHMMER3Build::setAndCheckArgs() {
- if( hasError() ) {
- return;
- }
-
- if( inFile.isEmpty() ) {
- stateInfo.setError( tr( "No input file given" ) );
- return;
- }
- inFile = env->getVar( "COMMON_DATA_DIR" ) + "/" + inFile;
-
- if( outFile.isEmpty() ) {
- stateInfo.setError( tr( "No output file given" ) );
- return;
- }
- outFile = env->getVar( "TEMP_DATA_DIR" ) + "/" + outFile;
-
- /* Build settings */
- bldSettings.outFile = outFile;
-}
-
-void GTest_UHMMER3Build::prepare() {
- setAndCheckArgs();
- if( hasError() ) {
- return;
- }
-
- buildTask = new UHMM3BuildToFileTask( bldSettings, inFile );
- addSubTask( buildTask );
-}
-
-Task::ReportResult GTest_UHMMER3Build::report() {
- if( buildTask->hasError() ) {
- if( delOutFile ) {
- QFile::remove( outFile );
- }
- stateInfo.setError( buildTask->getError() );
- }
- return ReportResult_Finished;
-}
-
-void GTest_UHMMER3Build::cleanup() {
- if( delOutFile ) {
- QFile::remove( outFile );
- }
-}
-
-GTest_UHMMER3Build::~GTest_UHMMER3Build() {
- cleanup();
-}
-
-/**************************
-* GTest_CompareHmmFiles
-**************************/
-
-const QString GTest_CompareHmmFiles::FILE1_NAME_TAG = "file1";
-const QString GTest_CompareHmmFiles::FILE2_NAME_TAG = "file2";
-const QString GTest_CompareHmmFiles::FILE1_TMP_TAG = "tmp1";
-const QString GTest_CompareHmmFiles::FILE2_TMP_TAG = "tmp2";
-
-const int BUF_SZ = 2048;
-const char TERM_SYM = '\0';
-
-const QByteArray DATE_STR = "DATE";
-const QByteArray NAME_STR = "NAME";
-const QByteArray HEADER_STR = "HMMER3/";
-
-void GTest_CompareHmmFiles::init( XMLTestFormat *tf, const QDomElement &el ) {
- Q_UNUSED( tf );
-
- filename1 = el.attribute( FILE1_NAME_TAG );
- filename2 = el.attribute( FILE2_NAME_TAG );
-
- QString file1TmpStr = el.attribute( FILE1_TMP_TAG );
- file1Tmp = !file1TmpStr.isEmpty() && file1TmpStr.toLower() != "no" && file1TmpStr.toLower() != "n";
-
- QString file2TmpStr = el.attribute( FILE2_TMP_TAG );
- file2Tmp = !file2TmpStr.isEmpty() && file2TmpStr.toLower() != "no" && file2TmpStr.toLower() != "n";
-}
-
-void GTest_CompareHmmFiles::setAndCheckArgs() {
- if( filename1.isEmpty() ) {
- stateInfo.setError( tr( "File #1 not set" ) );
- return;
- }
- filename1 = env->getVar( file1Tmp? "TEMP_DATA_DIR" : "COMMON_DATA_DIR" ) + "/" + filename1;
-
- if( filename2.isEmpty() ) {
- stateInfo.setError( tr( "File #2 not set" ) );
- return;
- }
- filename2 = env->getVar( file2Tmp? "TEMP_DATA_DIR" : "COMMON_DATA_DIR" ) + "/" + filename2;
-}
-
-static const float BUILD_COMPARE_FLOAT_EPS = 0.00002;
-
-static bool compareStr( const QString& s1, const QString& s2 ) {
- assert( s1.size() == s2.size() );
-
- QStringList words1 = s1.split( QRegExp( "\\s+" ), QString::SkipEmptyParts );
- QStringList words2 = s2.split( QRegExp( "\\s+" ), QString::SkipEmptyParts );
-
- if( words1.size() != words2.size() ) {
- return false;
- }
-
- int sz = words1.size();
- for( int i = 0; i < sz; ++i ) {
- bool ok1 = false;
- bool ok2 = false;
- float num1 = 0;
- float num2 = 0;
-
- num1 = words1.at( i ).toFloat( &ok1 );
- num2 = words2.at( i ).toFloat( &ok2 );
-
- if( ok1 != ok2 ) {
- return false;
- } else {
- if( ok1 ) {
- if( (isfin( num1 ) && !isfin( num2 )) || ( isfin( num2 ) && !isfin( num1 ) ) ) {
- return false;
- }
- if( qAbs( num1 - num2 ) > BUILD_COMPARE_FLOAT_EPS ) {
- return false;
- }
- continue;
- } else {
- if( words1.at( i ) != words2.at( i ) ) {
- return false;
- }
- continue;
- }
- }
- }
- return true;
-}
-
-Task::ReportResult GTest_CompareHmmFiles::report() {
- assert( !hasError() );
- setAndCheckArgs();
- if( hasError() ) {
- return ReportResult_Finished;
- }
-
- IOAdapterFactory* iof1 = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(IOAdapterUtils::url2io(filename1));
- QScopedPointer<IOAdapter> io1(iof1->createIOAdapter());
- if (io1.isNull()) {
- stateInfo.setError( tr( "Error creating ioadapter for first file" ) );
- return ReportResult_Finished;
- }
- if( !io1->open( filename1, IOAdapterMode_Read ) ) {
- stateInfo.setError( tr( "Error opening 1 file" ) );
- return ReportResult_Finished;
- }
-
- IOAdapterFactory* iof2 = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(IOAdapterUtils::url2io(filename2));
- QScopedPointer<IOAdapter> io2(iof2->createIOAdapter());
- if (io2.isNull()) {
- stateInfo.setError( tr( "Error creating ioadapter for second file" ) );
- return ReportResult_Finished;
- }
- if( !io2->open( filename2, IOAdapterMode_Read ) ) {
- stateInfo.setError( tr( "Error opening second file" ) );
- return ReportResult_Finished;
- }
-
- QByteArray buf1( BUF_SZ, TERM_SYM );
- QByteArray buf2( BUF_SZ, TERM_SYM );
- int bytes1 = 0;
- int bytes2 = 0;
-
- do {
- bytes1 = io1->readUntil( buf1.data(), BUF_SZ, TextUtils::LINE_BREAKS, IOAdapter::Term_Include );
- bytes2 = io2->readUntil( buf2.data(), BUF_SZ, TextUtils::LINE_BREAKS, IOAdapter::Term_Include );
- if( buf1.startsWith( DATE_STR ) && buf2.startsWith( DATE_STR ) ) {
- continue;
- }
- if( buf1.startsWith( HEADER_STR ) && buf2.startsWith( HEADER_STR ) ) {
- continue;
- }
- if( buf1.startsWith( NAME_STR ) && buf2.startsWith( NAME_STR ) ) {
- QString name1 = QByteArray( buf1.data(), bytes1 ).mid( NAME_STR.size() ).trimmed();
- QString name2 = QByteArray( buf2.data(), bytes2 ).mid( NAME_STR.size() ).trimmed();
-
- if( name1.startsWith( name2 ) || name2.startsWith( name1 ) ) {
- continue;
- }
- stateInfo.setError( tr( "Names of aligments not matched" ) );
- return ReportResult_Finished;
- }
- if( bytes1 != bytes2 ) {
- stateInfo.setError( tr( "Comparing files length not matched" ) );
- return ReportResult_Finished;
- }
-
- QString s1 = QString::fromLatin1( buf1.data(), bytes1 );
- QString s2 = QString::fromLatin1( buf2.data(), bytes2 );
- if( !compareStr( s1, s2 ) ) {
- stateInfo.setError( tr( "Files parts not equal:'%1' and '%2'" ).arg( s1 ).arg( s2 ) );
- return ReportResult_Finished;
- }
- } while( 0 < bytes1 && 0 < bytes2 );
-
- return ReportResult_Finished;
-}
-
-} // U2
diff --git a/src/plugins_3rdparty/hmm3/src/tests/uhmmer3BuildTests.h b/src/plugins_3rdparty/hmm3/src/tests/uhmmer3BuildTests.h
deleted file mode 100644
index 73bb9c0..0000000
--- a/src/plugins_3rdparty/hmm3/src/tests/uhmmer3BuildTests.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#ifndef _GB2_UHMMER_BUILD_TESTS_H_
-#define _GB2_UHMMER_BUILD_TESTS_H_
-
-#include <QtXml/QDomElement>
-
-#include <U2Test/XMLTestUtils.h>
-#include <U2Core/Task.h>
-
-#include <build/uHMM3BuildTask.h>
-
-namespace U2 {
-
-/*****************************************
-* Builds hmm profile from input file
-*****************************************/
-class GTest_UHMMER3Build : public GTest {
- Q_OBJECT
-public:
- static const QString INPUT_FILE_TAG;
- static const QString OUTPUT_FILE_TAG;
- static const QString DEL_OUTPUT_TAG;
-
- static const QString RELATIVE_WEIGHTING_OPTION_TAG; /* --wgsc, --wblosum, --wpb, --wnone, --wgiven */
-
- static const QString EFFECTIVE_WEIGHTING_OPTION_TAG;/* --eent, --eclust, --enone, --eset */
-
- static const QString E_VALUE_CALIBATION_OPTION_TAG; /* */
-
- static const QString MODEL_CONSTRUCTION_OPTION_TAG; /* --fast + symfrac and
- --hand ( requires RF annotation )*/
- static const QString SEED_OPTION_TAG;
-
- static void setBuildSettings( UHMM3BuildSettings & settings, const QDomElement& el, TaskStateInfo & ti );
-
-public:
- SIMPLE_XML_TEST_BODY_WITH_FACTORY( GTest_UHMMER3Build, "uhmmer3-build" );
- ~GTest_UHMMER3Build();
-
- virtual void prepare();
- ReportResult report();
- void cleanup();
-
-private:
- void setAndCheckArgs();
-
-private:
- QString inFile;
- QString outFile;
- bool delOutFile;
- UHMM3BuildToFileTask* buildTask;
- UHMM3BuildTaskSettings bldSettings;
-
-}; // GTest_UHMMER3Build
-
-/*****************************************
-* Totally compares two hmm files
-*****************************************/
-class GTest_CompareHmmFiles : public GTest {
- Q_OBJECT
-public:
- static const QString FILE1_NAME_TAG;
- static const QString FILE2_NAME_TAG;
- static const QString FILE1_TMP_TAG;
- static const QString FILE2_TMP_TAG;
-
-public:
- SIMPLE_XML_TEST_BODY_WITH_FACTORY( GTest_CompareHmmFiles, "hmm3-compare-files" );
- ReportResult report();
-
-private:
- void setAndCheckArgs();
-
-private:
- QString filename1;
- QString filename2;
- bool file1Tmp;
- bool file2Tmp;
-
-}; // GTest_CompareHmmFiles
-
-} // U2
-
-#endif // _GB2_UHMMER_BUILD_TESTS_H_
diff --git a/src/plugins_3rdparty/hmm3/src/tests/uhmmer3PhmmerTests.cpp b/src/plugins_3rdparty/hmm3/src/tests/uhmmer3PhmmerTests.cpp
deleted file mode 100644
index 346d356..0000000
--- a/src/plugins_3rdparty/hmm3/src/tests/uhmmer3PhmmerTests.cpp
+++ /dev/null
@@ -1,260 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include <U2Core/AppContext.h>
-#include <U2Algorithm/SubstMatrixRegistry.h>
-#include <U2Core/L10n.h>
-#include <U2Core/LoadDocumentTask.h>
-
-#include "uhmmer3BuildTests.h"
-#include "uhmmer3SearchTests.h"
-#include "uhmmer3PhmmerTests.h"
-
-namespace U2 {
-
-/****************************************
- * GTest_UHMM3Phmmer
- ****************************************/
-
-const QString GTest_UHMM3Phmmer::QUERY_FILENAME_TAG = "query";
-const QString GTest_UHMM3Phmmer::DB_FILENAME_TAG = "db";
-const QString GTest_UHMM3Phmmer::PHMMER_TASK_CTX_NAME_TAG = "ctxName";
-
-const QString GTest_UHMM3Phmmer::GAP_OPEN_PROBAB_OPTION_TAG = "popen";
-const QString GTest_UHMM3Phmmer::GAP_EXTEND_PROBAB_OPTION_TAG = "pextend";
-const QString GTest_UHMM3Phmmer::SUBST_MATR_NAME_OPTION_TAG = "substMatr";
-const QString GTest_UHMM3Phmmer::SEARCH_CHUNK_OPTION_TAG = "chunk";
-
-const double BAD_DOUBLE_OPTION = -1.0;
-
-static void setDoubleOption( double & to, const QString & str, TaskStateInfo & ti ) {
- if( str.isEmpty() ) {
- return;
- }
- bool ok = false;
- to = str.toDouble( &ok );
- if( !ok ) {
- to = BAD_DOUBLE_OPTION;
- ti.setError( QString( "cannot_parse_double_from: %1" ).arg( str ) );
- }
-}
-
-static void setSubstMatrixOption( SMatrix& to, const QString & str, TaskStateInfo & ti ) {
- if( str.isEmpty() ) {
- to = SMatrix();
- return;
- }
- SubstMatrixRegistry * smr = AppContext::getSubstMatrixRegistry();
- assert( NULL != smr );
-
- if( smr->getMatrix( str ).isEmpty() ) {
- ti.setError( QString( "matrix %1 not registered" ).arg( str ) );
- return;
- }
- to = smr->getMatrix(str);
- assert(!to.isEmpty());
-}
-
-void GTest_UHMM3Phmmer::init( XMLTestFormat *tf, const QDomElement& el ) {
- Q_UNUSED( tf );
-
- phmmerTask = NULL;
- queryFilename = el.attribute( QUERY_FILENAME_TAG );
- dbFilename = el.attribute( DB_FILENAME_TAG );
- taskCtxName = el.attribute( PHMMER_TASK_CTX_NAME_TAG );
- ctxAdded = false;
- loadDbTask = NULL;
-
- UHMM3SearchSettings searchSettings;
- setDefaultUHMM3SearchSettings( &searchSettings );
- GTest_UHMM3Search::setSearchTaskSettings( searchSettings, el, settigsStateInfo );
- if( settigsStateInfo.hasError() ) { return; }
- settings.setSearchSettings( searchSettings );
-
- UHMM3BuildSettings buildSettings;
- setDefaultUHMM3BuildSettings( &buildSettings );
- GTest_UHMMER3Build::setBuildSettings( buildSettings, el, settigsStateInfo );
- settings.setBuildSettings( buildSettings );
- if( settigsStateInfo.hasError() ) { return; }
-
- setDoubleOption( settings.popen, el.attribute( GAP_OPEN_PROBAB_OPTION_TAG ), scoringSystemStateInfo );
- if( scoringSystemStateInfo.hasError() ) { return; }
- setDoubleOption( settings.pextend, el.attribute( GAP_EXTEND_PROBAB_OPTION_TAG ), scoringSystemStateInfo );
- if( scoringSystemStateInfo.hasError() ) { return; }
- setSubstMatrixOption( settings.substMatr, el.attribute( SUBST_MATR_NAME_OPTION_TAG ), scoringSystemStateInfo );
- assert( settings.substMatr.isEmpty() );
- if( scoringSystemStateInfo.hasError() ) { return; }
-
- chunk = UHMM3SWPhmmerTask::DEFAULT_CHUNK_SIZE;
- QString chunkStr = el.attribute(SEARCH_CHUNK_OPTION_TAG);
- if(!chunkStr.isEmpty()) {
- bool ok = false;
- int chunkCandidate = chunkStr.toInt(&ok);
- if(ok && chunkCandidate > 0) {
- chunk = chunkCandidate;
- }
- }
-}
-
-void GTest_UHMM3Phmmer::setAndCheckArgs() {
- assert( !stateInfo.hasError() );
-
- if( queryFilename.isEmpty() ) {
- stateInfo.setError( L10N::badArgument( "query sequence filename" ) );
- return;
- }
- queryFilename = env->getVar( "COMMON_DATA_DIR" ) + "/" +queryFilename;
-
- if( dbFilename.isEmpty() ) {
- stateInfo.setError( L10N::badArgument( "db sequence filename" ) );
- return;
- }
- dbFilename = env->getVar( "COMMON_DATA_DIR" ) + "/" + dbFilename;
-
- if( taskCtxName.isEmpty() ) {
- stateInfo.setError( L10N::badArgument( "phmmer task context name" ) );
- return;
- }
-
- if( settigsStateInfo.hasError() ) {
- stateInfo.setError( settigsStateInfo.getError() );
- return;
- }
- if( scoringSystemStateInfo.hasError() ) {
- stateInfo.setError( scoringSystemStateInfo.getError() );
- return;
- }
-}
-
-void GTest_UHMM3Phmmer::prepare() {
- assert( !hasError() && NULL == phmmerTask );
- setAndCheckArgs();
- if( hasError() ) {
- return;
- }
- loadDbTask = LoadDocumentTask::getDefaultLoadDocTask(dbFilename);
- if(loadDbTask == NULL) {
- setError(L10N::errorOpeningFileRead(dbFilename));
- return;
- }
- addSubTask(loadDbTask);
-}
-
-QList<Task*> GTest_UHMM3Phmmer::onSubTaskFinished(Task* subTask) {
- QList<Task*> res;
- if(subTask->hasError()) {
- setError(subTask->getError());
- return res;
- }
-
- if( loadDbTask == subTask ) {
- DNASequence dbSeq = UHMM3PhmmerTask::getSequenceFromDocument( loadDbTask->getDocument(), stateInfo );
- if( hasError() ) {
- setError(getError());
- }
- phmmerTask = new UHMM3SWPhmmerTask( queryFilename, dbSeq, settings, chunk );
- res << phmmerTask;
- } else if(phmmerTask != subTask) {
- assert(false);
- }
- return res;
-}
-
-Task::ReportResult GTest_UHMM3Phmmer::report() {
- if( hasError() ) {
- return ReportResult_Finished;
- }
- assert( NULL != phmmerTask );
-
- if( !phmmerTask->isCanceled() && !phmmerTask->hasError() ) {
- addContext( taskCtxName, phmmerTask );
- ctxAdded = true;
- }
- return ReportResult_Finished;
-}
-
-void GTest_UHMM3Phmmer::cleanup() {
- if( NULL != phmmerTask && ctxAdded ) {
- removeContext( taskCtxName );
- }
-}
-
-GTest_UHMM3Phmmer::~GTest_UHMM3Phmmer() {
-}
-
-/****************************************
-* GTest_UHMM3PhmmerCompare
-****************************************/
-const QString GTest_UHMM3PhmmerCompare::PHMMER_TASK_CTX_NAME_TAG = "phmmerTask";
-const QString GTest_UHMM3PhmmerCompare::TRUE_OUT_FILENAME_TAG = "trueOut";
-
-void GTest_UHMM3PhmmerCompare::init( XMLTestFormat *tf, const QDomElement& el ) {
- Q_UNUSED( tf );
-
- phmmerTaskCtxName = el.attribute( PHMMER_TASK_CTX_NAME_TAG );
- trueOutFilename = el.attribute( TRUE_OUT_FILENAME_TAG );
-}
-
-void GTest_UHMM3PhmmerCompare::setAndCheckArgs() {
- if( phmmerTaskCtxName.isEmpty() ) {
- stateInfo.setError( L10N::badArgument( "phmmer task context name" ) );
- return;
- }
- if( trueOutFilename.isEmpty() ) {
- stateInfo.setError( L10N::badArgument( "true out filename" ) );
- return;
- }
- trueOutFilename = env->getVar( "COMMON_DATA_DIR" ) + "/" + trueOutFilename;
-
- phmmerTask = getContext< UHMM3SWPhmmerTask >( this, phmmerTaskCtxName );
- if( NULL == phmmerTask ) {
- stateInfo.setError( QString( "cannot find phmmer task %1 in context" ).arg( phmmerTaskCtxName ) );
- return;
- }
-}
-
-Task::ReportResult GTest_UHMM3PhmmerCompare::report() {
- assert( !hasError() );
- setAndCheckArgs();
- if( hasError() ) {
- return ReportResult_Finished;
- }
-
- UHMM3SearchResult trueRes;
- try {
- trueRes = GTest_UHMM3SearchCompare::getOriginalSearchResult( trueOutFilename );
- } catch( const QString& ex ) {
- stateInfo.setError( ex );
- } catch(...) {
- stateInfo.setError( "undefined_error_occurred" );
- }
-
- if( hasError() ) {
- return ReportResult_Finished;
- }
-
- assert( NULL != phmmerTask );
- qSort(trueRes.domainResList.begin(), trueRes.domainResList.end(), GTest_UHMM3SearchCompare::searchResultLessThan);
- GTest_UHMM3SearchCompare::swCompareResults(phmmerTask->getResult(), trueRes, stateInfo, true);
- return ReportResult_Finished;
-}
-
-} // U2
diff --git a/src/plugins_3rdparty/hmm3/src/tests/uhmmer3PhmmerTests.h b/src/plugins_3rdparty/hmm3/src/tests/uhmmer3PhmmerTests.h
deleted file mode 100644
index fe3d795..0000000
--- a/src/plugins_3rdparty/hmm3/src/tests/uhmmer3PhmmerTests.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#ifndef _GB2_UHMMER_PHMMER_TESTS_H_
-#define _GB2_UHMMER_PHMMER_TESTS_H_
-
-#include <QtCore/QString>
-#include <QtXml/QDomElement>
-
-#include <U2Test/XMLTestUtils.h>
-#include <phmmer/uhmm3PhmmerTask.h>
-
-namespace U2 {
-
-/*****************************************
-* Test for hmmer3 phmmer.
-* settings set by same tags from hmm3-search and hmm3-build tests + gaps probab. options and subst. matr
-* we test here 1<->1 queries
-*****************************************/
-class GTest_UHMM3Phmmer : public GTest {
- Q_OBJECT
-public:
- static const QString QUERY_FILENAME_TAG;
- static const QString DB_FILENAME_TAG;
- static const QString PHMMER_TASK_CTX_NAME_TAG;
-
- static const QString GAP_OPEN_PROBAB_OPTION_TAG;
- static const QString GAP_EXTEND_PROBAB_OPTION_TAG;
- static const QString SUBST_MATR_NAME_OPTION_TAG; /* name of registered substitution matrix. if empty - BLOSUM62 is used */
- static const QString SEARCH_CHUNK_OPTION_TAG;
-
-public:
- SIMPLE_XML_TEST_BODY_WITH_FACTORY( GTest_UHMM3Phmmer, "hmm3-phmmer" );
- ~GTest_UHMM3Phmmer();
-
- void prepare();
- ReportResult report();
- void cleanup();
- QList<Task*> onSubTaskFinished(Task* subTask);
-
-private:
- void setAndCheckArgs();
-
-private:
- UHMM3PhmmerSettings settings;
- QString queryFilename;
- QString dbFilename;
- QString taskCtxName;
- UHMM3SWPhmmerTask * phmmerTask;
- TaskStateInfo settigsStateInfo;
- TaskStateInfo scoringSystemStateInfo;
- int chunk;
- bool ctxAdded;
- LoadDocumentTask* loadDbTask;
-
-}; // GTest_UHMM3Phmmer
-
-/*****************************************
-* Test compares original hmmer3 phmmer results with UHMM3SearchResults
-*
-* Note, that you should make original hmmer3 to show results in academic version (e.g. 1.01e-23)
-*****************************************/
-
-class GTest_UHMM3PhmmerCompare : public GTest {
- Q_OBJECT
-public:
- static const QString PHMMER_TASK_CTX_NAME_TAG;
- static const QString TRUE_OUT_FILENAME_TAG;
-
-public:
- SIMPLE_XML_TEST_BODY_WITH_FACTORY( GTest_UHMM3PhmmerCompare, "hmm3-phmmer-compare" );
- ReportResult report();
-
-private:
- void setAndCheckArgs();
-
-private:
- QString phmmerTaskCtxName;
- QString trueOutFilename;
- UHMM3SWPhmmerTask * phmmerTask;
-
-}; // GTest_UHMM3PhmmerCompare
-
-} // U2
-
-#endif // _GB2_UHMMER_PHMMER_TESTS_H_
diff --git a/src/plugins_3rdparty/hmm3/src/tests/uhmmer3SearchTests.cpp b/src/plugins_3rdparty/hmm3/src/tests/uhmmer3SearchTests.cpp
deleted file mode 100644
index 1845cd1..0000000
--- a/src/plugins_3rdparty/hmm3/src/tests/uhmmer3SearchTests.cpp
+++ /dev/null
@@ -1,752 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include "uhmmer3SearchTests.h"
-#include <gobject/uHMMObject.h>
-
-#include <U2Core/DocumentModel.h>
-#include <U2Core/IOAdapter.h>
-#include <U2Core/IOAdapterUtils.h>
-#include <U2Core/LoadDocumentTask.h>
-#include <U2Core/AppContext.h>
-#include <U2Core/DNASequenceObject.h>
-#include <U2Core/TextUtils.h>
-#include <U2Core/U2SafePoints.h>
-
-#include <QtCore/QList>
-
-namespace U2 {
-
-/*******************************
-* GTest_GeneralUHMM3Search
-********************************/
-
-const QString GTest_UHMM3Search::SEQ_DOC_CTX_NAME_TAG = "seqDoc";
-const QString GTest_UHMM3Search::HMM_FILENAME_TAG = "hmm";
-const QString GTest_UHMM3Search::HMMSEARCH_TASK_CTX_NAME_TAG = "taskCtxName";
-const QString GTest_UHMM3Search::ALGORITHM_TYPE_OPTION_TAG = "algo";
-const QString GTest_UHMM3Search::SW_CHUNK_SIZE_OPTION_TAG = "chunk";
-
-const QString GTest_UHMM3Search::SEQ_E_OPTION_TAG = "seqE";
-const QString GTest_UHMM3Search::SEQ_T_OPTION_TAG = "seqT";
-const QString GTest_UHMM3Search::Z_OPTION_TAG = "z";
-const QString GTest_UHMM3Search::DOM_E_OPTION_TAG = "domE";
-const QString GTest_UHMM3Search::DOM_T_OPTION_TAG = "domT";
-const QString GTest_UHMM3Search::DOM_Z_OPTION_TAG = "domZ";
-const QString GTest_UHMM3Search::USE_BIT_CUTOFFS_OPTION_TAG = "ubc";
-const QString GTest_UHMM3Search::INC_SEQ_E_OPTION_TAG = "incE";
-const QString GTest_UHMM3Search::INC_SEQ_T_OPTION_TAG = "incT";
-const QString GTest_UHMM3Search::INC_DOM_E_OPTION_TAG = "incdomE";
-const QString GTest_UHMM3Search::INC_DOM_T_OPTION_TAG = "incdomT";
-const QString GTest_UHMM3Search::MAX_OPTION_TAG = "max";
-const QString GTest_UHMM3Search::F1_OPTION_TAG = "f1";
-const QString GTest_UHMM3Search::F2_OPTION_TAG = "f2";
-const QString GTest_UHMM3Search::F3_OPTION_TAG = "f3";
-const QString GTest_UHMM3Search::NOBIAS_OPTION_TAG = "nobias";
-const QString GTest_UHMM3Search::NONULL2_OPTION_TAG = "nonull2";
-const QString GTest_UHMM3Search::SEED_OPTION_TAG = "seed";
-const QString GTest_UHMM3Search::REMOTE_MACHINE_VAR = "MACHINE";
-
-static void setDoubleOption( double& num, const QDomElement& el, const QString& optionName, TaskStateInfo& si ) {
- if( si.hasError() ) {
- return;
- }
- QString numStr = el.attribute( optionName );
- if( numStr.isEmpty() ) {
- return;
- }
-
- bool ok = false;
- double ret = numStr.toDouble( &ok );
- if( !ok ) {
- si.setError( QString( "cannot_parse_double_number_from %1. Option: %2" ).arg( numStr ).arg( optionName ) );
- return;
- }
- num = ret;
-}
-
-static void setUseBitCutoffsOption( int& ret, const QDomElement& el, const QString& opName, TaskStateInfo& si ) {
- if( si.hasError() ) {
- return;
- }
-
- QString str = el.attribute( opName ).toLower();
-
- if( "ga" == str ) {
- ret = p7H_GA;
- } else if( "nc" == str ) {
- ret = p7H_NC;
- } else if( "tc" == str ) {
- ret = p7H_TC;
- } else if( !str.isEmpty() ) {
- si.setError( QString( "unrecognized_value_in %1 option" ).arg( opName ) );
- }
-}
-
-static void setBooleanOption( int& ret, const QDomElement& el, const QString& opName, TaskStateInfo& si ) {
- if( si.hasError() ) {
- return;
- }
- QString str = el.attribute( opName ).toLower();
-
- if( !str.isEmpty() && "n" != str && "no" != str ) {
- ret = TRUE;
- } else {
- ret = FALSE;
- }
-}
-
-static void setIntegerOption( int& num, const QDomElement& el, const QString& optionName, TaskStateInfo& si ) {
- if( si.hasError() ) {
- return;
- }
- QString numStr = el.attribute( optionName );
- if( numStr.isEmpty() ) {
- return;
- }
-
- bool ok = false;
- int ret = numStr.toInt( &ok );
- if( !ok ) {
- si.setError( QString( "cannot_parse_integer_number_from %1. Option: %2" ).arg( numStr ).arg( optionName ) );
- return;
- }
- num = ret;
-}
-
-void GTest_UHMM3Search::setSearchTaskSettings( UHMM3SearchSettings& settings, const QDomElement& el, TaskStateInfo& si ) {
- setDoubleOption( settings.e, el, GTest_UHMM3Search::SEQ_E_OPTION_TAG, si );
- setDoubleOption( settings.t, el, GTest_UHMM3Search::SEQ_T_OPTION_TAG, si );
- setDoubleOption( settings.z, el, GTest_UHMM3Search::Z_OPTION_TAG, si );
- setDoubleOption( settings.f1, el, GTest_UHMM3Search::F1_OPTION_TAG, si );
- setDoubleOption( settings.f2, el, GTest_UHMM3Search::F2_OPTION_TAG, si );
- setDoubleOption( settings.f3, el, GTest_UHMM3Search::F3_OPTION_TAG, si );
- setDoubleOption( settings.domE, el, GTest_UHMM3Search::DOM_E_OPTION_TAG, si );
- setDoubleOption( settings.domT, el, GTest_UHMM3Search::DOM_T_OPTION_TAG, si );
- setDoubleOption( settings.domZ, el, GTest_UHMM3Search::DOM_Z_OPTION_TAG, si );
- setDoubleOption( settings.incE, el, GTest_UHMM3Search::INC_SEQ_E_OPTION_TAG, si );
- setDoubleOption( settings.incT, el, GTest_UHMM3Search::INC_SEQ_T_OPTION_TAG, si );
- setDoubleOption( settings.incDomE, el, GTest_UHMM3Search::INC_DOM_E_OPTION_TAG, si );
- setDoubleOption( settings.incDomT, el, GTest_UHMM3Search::INC_DOM_T_OPTION_TAG, si );
-
- setBooleanOption( settings.doMax, el, GTest_UHMM3Search::MAX_OPTION_TAG, si );
- setBooleanOption( settings.noBiasFilter, el, GTest_UHMM3Search::NOBIAS_OPTION_TAG, si );
- setBooleanOption( settings.noNull2, el, GTest_UHMM3Search::NONULL2_OPTION_TAG, si );
-
- setIntegerOption( settings.seed, el, GTest_UHMM3Search::SEED_OPTION_TAG, si );
-
- setUseBitCutoffsOption( settings.useBitCutoffs, el, GTest_UHMM3Search::USE_BIT_CUTOFFS_OPTION_TAG, si );
-}
-
-static void setSearchAlgoType( GTest_UHMM3SearchAlgoType& alType, const QString& s ) {
- QString str = s.toLower();
-
- if( "general" == str ) {
- alType = GENERAL_SEARCH;
- } else if( "sw" == str ) {
- alType = SEQUENCE_WALKER_SEARCH;
- } else {
- alType = UNKNOWN_SEARCH;
- }
-}
-
-static P7_HMM * takeHmmFromDoc( Document * doc ) {
- assert( NULL != doc );
- QList< GObject* > objs = doc->getObjects();
- assert( 1 == objs.size() );
- UHMMObject * hmmObj = qobject_cast< UHMMObject* >( objs.at( 0 ) );
- if( NULL == hmmObj ) {
- return NULL;
- }
- return hmmObj->takeHMM();
-}
-
-void GTest_UHMM3Search::init( XMLTestFormat *tf, const QDomElement& el ) {
- Q_UNUSED( tf );
-
- hmmFilename = el.attribute( HMM_FILENAME_TAG );
- searchTaskCtxName = el.attribute( HMMSEARCH_TASK_CTX_NAME_TAG );
-
- searchTaskToCtx = NULL;
- hmm = NULL;
-
- seqDocCtxName = el.attribute( SEQ_DOC_CTX_NAME_TAG );
- setSearchAlgoType( algo, el.attribute( ALGORITHM_TYPE_OPTION_TAG ) );
- setSearchTaskSettings( settings.inner, el, stateInfo );
-
- swChunk = UHMM3SWSearchTask::DEFAULT_CHUNK_SIZE;
- QString chunkStr = el.attribute(SW_CHUNK_SIZE_OPTION_TAG);
- if( !chunkStr.isEmpty() ) {
- bool ok = false;
- int candidate = chunkStr.toInt(&ok);
- if(ok && candidate > 0) {
- swChunk = candidate;
- }
- }
-
- cleanuped = false;
- ctxAdded = false;
-
- machinePath = env->getVar( REMOTE_MACHINE_VAR );
- if( !machinePath.isEmpty() ) {
- algo = SEQUENCE_WALKER_SEARCH;
- }
-}
-
-void GTest_UHMM3Search::setAndCheckArgs() {
- assert( !stateInfo.hasError() );
- if( hmmFilename.isEmpty() ) {
- stateInfo.setError( "hmm_filename_is_empty" );
- return;
- }
- hmmFilename = env->getVar( "COMMON_DATA_DIR" ) + "/" + hmmFilename;
-
- if( searchTaskCtxName.isEmpty() ) {
- stateInfo.setError( "task_ctx_name_is_empty" );
- return;
- }
-
- if( seqDocCtxName.isEmpty() ) {
- stateInfo.setError( "sequence_document_ctx_name_is_empty" );
- return;
- }
-
- if( UNKNOWN_SEARCH == algo ) {
- stateInfo.setError( "unknown_algorithm_type" );
- return;
- }
-
- Document* seqDoc = getContext<Document>( this, seqDocCtxName );
- if( NULL == seqDoc ) {
- stateInfo.setError( QString( "context %1 not found" ).arg( seqDocCtxName ) );
- return;
- }
- QList< GObject* > objsList = seqDoc->findGObjectByType( GObjectTypes::SEQUENCE );
- CHECK_EXT(!objsList.isEmpty(),setError("No sequence objects found!"), );
- U2SequenceObject* seqObj = qobject_cast< U2SequenceObject* >( objsList.first() );
- sequence = seqObj->getWholeSequence(stateInfo);
- CHECK_OP(stateInfo, );
- CHECK_EXT(sequence.length() > 0, setError(tr("Sequence is empty")), );
-
- if( !machinePath.isEmpty() ) {
- machinePath = env->getVar( "COMMON_DATA_DIR" ) + "/" + machinePath;
- }
-}
-
-void GTest_UHMM3Search::prepare() {
- assert( !hasError() );
- setAndCheckArgs();
- if( hasError() ) {
- return;
- }
-
- switch( algo ) {
- case GENERAL_SEARCH:
- searchTaskToCtx = new UHMM3LoadProfileAndSearchTask(settings, hmmFilename, sequence.seq);
- addSubTask( searchTaskToCtx );
- break;
- case SEQUENCE_WALKER_SEARCH:
- if( machinePath.isEmpty() ) { /* search task on local machine */
- searchTaskToCtx = new UHMM3SWSearchTask( hmmFilename, sequence, settings, swChunk );
- addSubTask( searchTaskToCtx );
- } else { /* search on remote machine */
- addSubTask( LoadDocumentTask::getDefaultLoadDocTask( hmmFilename ) );
- }
- break;
- default:
- assert( 0 && "undefined_algorithm_type" );
- }
-}
-
-QList< Task* > GTest_UHMM3Search::onSubTaskFinished( Task * sub ) {
- assert( NULL != sub );
- QList< Task* > res;
- LoadDocumentTask * loadHmmTask = qobject_cast<LoadDocumentTask*>( sub );
- if( NULL == loadHmmTask ) {
- return res;
- }
- if( loadHmmTask->hasError() ) {
- setError( loadHmmTask->getError() );
- return res;
- }
-
- hmm = takeHmmFromDoc( loadHmmTask->getDocument() );
- assert( NULL != hmm );
- return res;
-}
-
-Task::ReportResult GTest_UHMM3Search::report() {
- if( stateInfo.hasError() ) {
- return ReportResult_Finished;
- }
-
- if(searchTaskToCtx != NULL && !searchTaskToCtx->hasError() && !searchTaskToCtx->isCanceled() ) {
- addContext( searchTaskCtxName, searchTaskToCtx );
- ctxAdded = true;
- }
- return ReportResult_Finished;
-}
-
-void GTest_UHMM3Search::cleanup() {
- if( cleanuped ) {
- return;
- }
- if( ctxAdded ) {
- removeContext( searchTaskCtxName );
- }
- if( NULL != hmm ) {
- p7_hmm_Destroy( hmm );
- }
- cleanuped = true;
-}
-
-GTest_UHMM3Search::~GTest_UHMM3Search() {
- if( !cleanuped ) {
- cleanup();
- }
-}
-
-/**************************
-* GTest_GeneralUHMM3SearchCompare
-**************************/
-
-const int BUF_SZ = 2048;
-const char TERM_SYM = '\0';
-
-static void readLine( IOAdapter* io, QByteArray& to, QStringList* tokens = NULL ) {
- assert( NULL != io );
- to.clear();
- QByteArray buf( BUF_SZ, TERM_SYM );
- bool there = false;
- int bytes = 0;
- while( !there ) {
- int ret = io->readUntil( buf.data(), BUF_SZ, TextUtils::LINE_BREAKS, IOAdapter::Term_Include, &there );
- if( 0 > ret ) {
- throw QString( "read_error_occurred" );
- }
- if( 0 == ret ) {
- break;
- }
- to.append( QByteArray( buf.data(), ret ) );
- bytes += ret;
- }
- to = to.trimmed();
- if( 0 == bytes ) {
- throw QString( "unexpected_end_of_file_found" );
- }
-
- if( NULL != tokens ) {
- *tokens = QString( to ).split( QRegExp( "\\s+" ), QString::SkipEmptyParts );
- }
-}
-
-static QByteArray getNextToken( QStringList& tokens ) {
- if( tokens.isEmpty() ) {
- throw QString( "unexpected_end_of_line:token_is_missing" );
- }
- return tokens.takeFirst().toLatin1();
-}
-
-static double getDouble( const QByteArray& numStr ) {
- bool ok = false;
- double ret = numStr.toDouble( &ok );
- if( ok ) {
- return ret;
- }
- throw QString( GTest_UHMM3SearchCompare::tr( "Internal error (cannot parse float number from string '%1')" ).arg( QString( numStr ) ) );
-}
-
-static float getFloat( const QByteArray& numStr ) {
- return (float)getDouble( numStr );
-}
-
-static bool getSignificance( const QByteArray& str ) {
- if( "!" == str ) {
- return true;
- } else if( "?" == str ) {
- return false;
- }
- throw QString( GTest_UHMM3SearchCompare::tr( "Can't parse significance:%1" ).arg( QString( str ) ) );
-}
-
-static UHMM3SearchSeqDomainResult getDomainRes( QStringList& tokens ) {
- UHMM3SearchSeqDomainResult res;
-
- getNextToken( tokens );
- res.isSignificant = getSignificance( getNextToken( tokens ) );
- res.score = getFloat( getNextToken( tokens ) );
- res.bias = getFloat( getNextToken( tokens ) );
- res.cval = getDouble( getNextToken( tokens ) );
- res.ival = getDouble( getNextToken( tokens ) );
-
- int hmmFrom = (int)getFloat( getNextToken( tokens ) );
- int hmmTo = (int)getFloat( getNextToken( tokens ) );
- res.queryRegion = U2Region( hmmFrom, hmmTo - hmmFrom );
- getNextToken( tokens );
-
- int aliFrom = (int)getFloat( getNextToken( tokens ) );
- int aliTo = (int)getFloat( getNextToken( tokens ) );
- res.seqRegion = U2Region( aliFrom - 1, aliTo - aliFrom + 1 );
- getNextToken( tokens );
-
- int envFrom = (int)getFloat( getNextToken( tokens ) );
- int envTo = (int)getFloat( getNextToken( tokens ) );
- res.envRegion = U2Region( envFrom, envTo - envFrom );
- getNextToken( tokens );
-
- res.acc = getDouble( getNextToken( tokens ) );
- return res;
-}
-
-const double COMPARE_PERCENT_BORDER = 0.1; // 10 percent
-
-template<class T>
-static bool compareNumbers( T f1, T f2 ) {
- bool ret = false;
- if( 0 == f1 ) {
- ret = 0 == f2 ? true : f2 < COMPARE_PERCENT_BORDER;
- } else if( 0 == f2 ) {
- ret = f1 < COMPARE_PERCENT_BORDER;
- } else {
- ret = ( qAbs( f1 - f2 ) ) < COMPARE_PERCENT_BORDER;
- }
-
- if( !ret ) {
- qDebug() << "!!! compare numbers mismatch: " << f1 << " and " << f2 << " !!!\n";
- }
-
- return ret;
-}
-
-void GTest_UHMM3SearchCompare::generalCompareResults( const UHMM3SearchResult& myRes, const UHMM3SearchResult& trueRes, TaskStateInfo& ti ) {
- const UHMM3SearchCompleteSeqResult& myFull = myRes.fullSeqResult;
- const UHMM3SearchCompleteSeqResult& trueFull = trueRes.fullSeqResult;
-
- if( myFull.isReported != trueFull.isReported ) {
- ti.setError( QString( "reported_flag_not_matched: %1 and %2" ).arg( myFull.isReported ).arg( trueFull.isReported ) );
- return;
- }
-
- if( myFull.isReported ) {
- if( !compareNumbers<float>( myFull.bias, trueFull.bias ) ) {
- ti.setError( QString( "full_seq_bias_not_matched: %1 and %2" ).arg( myFull.bias ).arg( trueFull.bias ) ); return;
- }
- if( !compareNumbers<double>( myFull.eval, trueFull.eval ) ) {
- ti.setError( QString( "full_seq_eval_not_matched: %1 and %2" ).arg( myFull.eval ).arg( trueFull.eval ) ); return;
- }
- if( !compareNumbers<float>( myFull.score, trueFull.score ) ) {
- ti.setError( QString( "full_seq_score_not_matched: %1 and %2" ).arg( myFull.score ).arg( trueFull.score ) ); return;
- }
- if( !compareNumbers<float>( myFull.expectedDomainsNum, trueFull.expectedDomainsNum ) ) {
- ti.setError( QString( "full_seq_exp_not_matched: %1 and %2" ).arg( myFull.expectedDomainsNum ).arg( trueFull.expectedDomainsNum ) );
- return;
- }
- if( myFull.reportedDomainsNum != trueFull.reportedDomainsNum ) {
- ti.setError( QString( "full_seq_n_not_matched: %1 and %2" ).arg( myFull.reportedDomainsNum ).arg( trueFull.reportedDomainsNum ) );
- return;
- }
- }
-
- const QList< UHMM3SearchSeqDomainResult >& myDoms = myRes.domainResList;
- const QList< UHMM3SearchSeqDomainResult >& trueDoms = trueRes.domainResList;
- if( myDoms.size() != trueDoms.size() ) {
- ti.setError( QString( "domain_res_number_not_matched: %1 and %2" ).arg( myDoms.size() ).arg( trueDoms.size() ) );
- return;
- }
- for( int i = 0; i < myDoms.size(); ++i ) {
- UHMM3SearchSeqDomainResult myCurDom = myDoms.at( i );
- UHMM3SearchSeqDomainResult trueCurDom = trueDoms.at( i );
- if( !compareNumbers<double>( myCurDom.acc, trueCurDom.acc ) ) {
- ti.setError( QString( "dom_acc_not_matched: %1 and %2" ).arg( myCurDom.acc ).arg( trueCurDom.acc ) ); return;
- }
- if( !compareNumbers<float>( myCurDom.bias, trueCurDom.bias ) ) {
- ti.setError( QString( "dom_bias_not_matched: %1 and %2" ).arg( myCurDom.bias ).arg( trueCurDom.bias ) ); return;
- }
- if( !compareNumbers<double>( myCurDom.cval, trueCurDom.cval ) ) {
- ti.setError( QString( "dom_cval_not_matched: %1 and %2" ).arg( myCurDom.cval ).arg( trueCurDom.cval ) ); return;
- }
- if( !compareNumbers<double>( myCurDom.ival, trueCurDom.ival ) ) {
- ti.setError( QString( "dom_ival_not_matched: %1 and %2" ).arg( myCurDom.ival ).arg( trueCurDom.ival ) ); return;
- }
- if( !compareNumbers<float>( myCurDom.score, trueCurDom.score ) ) {
- ti.setError( QString( "dom_score_not_matched: %1 and %2" ).arg( myCurDom.score ).arg( trueCurDom.score ) ); return;
- }
- if( myCurDom.envRegion != trueCurDom.envRegion ) {
- ti.setError( QString( "dom_env_region_not_matched: %1---%2 and %3---%4" ).
- arg( myCurDom.envRegion.startPos ).arg( myCurDom.envRegion.length ).arg( trueCurDom.envRegion.startPos ).
- arg( trueCurDom.envRegion.length ) ); return;
- }
- if( myCurDom.queryRegion != trueCurDom.queryRegion ) {
- ti.setError( QString( "dom_hmm_region_not_matched: %1---%2 and %3---%4" ).
- arg( myCurDom.queryRegion.startPos ).arg( myCurDom.queryRegion.length ).arg( trueCurDom.queryRegion.startPos ).
- arg( trueCurDom.queryRegion.length ) ); return;
- }
- if( myCurDom.seqRegion != trueCurDom.seqRegion ) {
- ti.setError( QString( "dom_seq_region_not_matched: %1---%2 and %3---%4" ).
- arg( myCurDom.seqRegion.startPos ).arg( myCurDom.seqRegion.length ).arg( trueCurDom.seqRegion.startPos ).
- arg( trueCurDom.seqRegion.length ) ); return;
- }
- if( myCurDom.isSignificant != trueCurDom.isSignificant ) {
- ti.setError( QString( "dom_sign_not_matched: %1 and %2" ).arg( myCurDom.isSignificant ).arg( trueCurDom.isSignificant ) );
- return;
- }
- }
-}
-
-static QList<int>
-findEqualDomain(const QList<UHMM3SWSearchTaskDomainResult>& res, const UHMM3SearchSeqDomainResult & dres, bool compareSeqRegion) {
- QList<int> diff;
- for(int i = 0; i < res.size(); ++i) {
- UHMM3SearchSeqDomainResult dom = res.at(i).generalResult;
- int count = 0;
- if( !compareNumbers<double>( dom.acc, dres.acc ) ) { count++; }
- if( !compareNumbers<float>( dom.bias, dres.bias ) ) { count++; }
- if( !compareNumbers<double>( dom.cval, dres.cval ) ) { count++; }
- if( !compareNumbers<double>( dom.ival, dres.ival ) ) { count++; }
- if( !compareNumbers<float>( dom.score, dres.score ) ) { count++; }
- if( dom.queryRegion != dres.queryRegion ) { count++; }
- if( compareSeqRegion && dom.seqRegion != dres.seqRegion ) { count++; }
- if( compareSeqRegion && dom.envRegion != dres.envRegion ) { count++; }
- if( dom.isSignificant != dres.isSignificant ) { count++; }
- diff << count;
- }
- return diff;
-}
-
-static QString seqDomainResult2String(const UHMM3SearchSeqDomainResult & r) {
- return QString("score=%1, eval=%2, bias=%3, acc=%4, query=%5 seq=%6").arg(r.score).arg(r.ival).arg(r.bias).arg(r.acc).
- arg(QString("%1..%2").arg(r.queryRegion.startPos).arg(r.queryRegion.endPos())).
- arg(QString("%1..%2").arg(r.seqRegion.startPos).arg(r.seqRegion.endPos()));
-}
-
-/* we compare here that every domain of trueResult is included in myResult */
-void
-GTest_UHMM3SearchCompare::swCompareResults( const QList<UHMM3SWSearchTaskDomainResult>& myR, const UHMM3SearchResult& trueR,
- TaskStateInfo& ti, bool compareSeqRegion ) {
- int sz = trueR.domainResList.size();
- int i = 0;
- for( i = 0; i < sz; ++i ) {
- const UHMM3SearchSeqDomainResult & trueDom = trueR.domainResList.at(i);
- if(trueDom.score < 2) {
- continue;
- }
- QList<int> diff = findEqualDomain(myR, trueDom, compareSeqRegion);
- if(!diff.contains(0)) {
- int minPos = 0;
- int min = 1000000;
- for(int j = 0; j < myR.size(); ++j) {
- float d = qAbs(myR.at(j).generalResult.score - trueR.domainResList.at(i).score);
- if( d < min ) {
- min = d;
- minPos = j;
- }
- }
- if(!myR.isEmpty()) {
- ti.setError( QString( "Cannot find result #%1: %2, most close result: %3" ).
- arg(i).
- arg(seqDomainResult2String(trueR.domainResList.at(i))).
- arg(seqDomainResult2String(myR.at(minPos).generalResult)));
- } else {
- ti.setError( QString( "Cannot find result #%1: %2" ).
- arg(i).arg(seqDomainResult2String(trueR.domainResList.at(i))));
- }
- return;
- }
- }
-}
-
-const QString GTest_UHMM3SearchCompare::SEARCH_TASK_CTX_NAME_TAG = "searchTask";
-const QString GTest_UHMM3SearchCompare::TRUE_OUT_FILE_TAG = "trueOut";
-
-void GTest_UHMM3SearchCompare::init( XMLTestFormat *tf, const QDomElement& el ) {
- Q_UNUSED( tf );
-
- searchTaskCtxName = el.attribute( SEARCH_TASK_CTX_NAME_TAG );
- trueOutFilename = el.attribute( TRUE_OUT_FILE_TAG );
-}
-
-void GTest_UHMM3SearchCompare::setAndCheckArgs() {
- assert( !hasError() );
-
- if( searchTaskCtxName.isEmpty() ) {
- stateInfo.setError( "search_task_ctx_name_is_empty" );
- return;
- }
-
- if( trueOutFilename.isEmpty() ) {
- stateInfo.setError( "true_out_filename_is_empty" );
- return;
- }
- trueOutFilename = env->getVar( "COMMON_DATA_DIR" ) + "/" + trueOutFilename;
-
- Task* searchTask = getContext<Task>( this, searchTaskCtxName );
- if( NULL == searchTask ) {
- stateInfo.setError( tr( "No search task in test context" ) );
- return;
- }
-
- generalTask = qobject_cast< UHMM3LoadProfileAndSearchTask* >( searchTask );
- swTask = qobject_cast< UHMM3SWSearchTask* >( searchTask );
-
- if( NULL != generalTask ) {
- algo = GENERAL_SEARCH;
- } else if (NULL != swTask) {
- algo = SEQUENCE_WALKER_SEARCH;
- } else {
- assert( 0 && "cannot_cast_task_to_search_task" );
- }
-}
-
-bool GTest_UHMM3SearchCompare::searchResultLessThan(const UHMM3SearchSeqDomainResult & r1, const UHMM3SearchSeqDomainResult & r2) {
- if( r1.score == r2.score ) {
- if(r1.seqRegion == r2.seqRegion) {
- return &r1 < &r2;
- }
- return r1.seqRegion < r2.seqRegion;
- }
- return r1.score > r2.score;
-}
-
-UHMM3SearchResult GTest_UHMM3SearchCompare::getOriginalSearchResult( const QString & filename ) {
- assert( !filename.isEmpty() );
-
- IOAdapterFactory* iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById( IOAdapterUtils::url2io( filename ) );
- QScopedPointer< IOAdapter > io( iof->createIOAdapter() );
- if (io.isNull()) {
- throw QString( "cannot_create_io_adapter_for_'%1'_file" ).arg( filename );
- }
- if( !io->open( filename, IOAdapterMode_Read ) ) {
- throw QString( "cannot_open_'%1'_file" ).arg( filename );
- }
-
- UHMM3SearchResult res;
- QByteArray buf;
- QStringList tokens;
- bool wasHeader = false;
- bool wasFullSeqResult = false;
- readLine( io.data(), buf ); /* the first line. starts with # search or # phmmer */
- do {
- readLine( io.data(), buf );
- if( buf.isEmpty() ) { /* but no error - empty line here */
- continue;
- }
- if( buf.startsWith( "# HMMER 3" ) ) {
- wasHeader = true;
- continue;
- }
- if( buf.startsWith( "Scores for complete sequences" ) ) {
- if( !wasHeader ) {
- throw QString( "hmmer_output_header_is_missing" );
- }
- UHMM3SearchCompleteSeqResult& fullSeqRes = res.fullSeqResult;
- readLine( io.data(), buf );
- readLine( io.data(), buf );
- readLine( io.data(), buf );
- readLine( io.data(), buf, &tokens );
- if( buf.startsWith( "[No hits detected" ) ) {
- fullSeqRes.isReported = false;
- break;
- } else {
- fullSeqRes.eval = getDouble( getNextToken( tokens ) );
- fullSeqRes.score = getFloat( getNextToken( tokens ) );
- fullSeqRes.bias = getFloat( getNextToken( tokens ) );
- /* skip best domain res. we will check it later */
- getNextToken( tokens );getNextToken( tokens );getNextToken( tokens );
- fullSeqRes.expectedDomainsNum = getFloat( getNextToken( tokens ) );
- fullSeqRes.reportedDomainsNum = (int)getFloat( getNextToken( tokens ) );
- fullSeqRes.isReported = true;
- wasFullSeqResult = true;
- }
- continue;
- }
- if( buf.startsWith( "Domain annotation for each sequence" ) ) {
- if( !wasFullSeqResult ) {
- throw QString( "full_seq_result_is_missing" );
- }
- readLine( io.data(), buf );
- readLine( io.data(), buf );
- readLine( io.data(), buf );
- QList< UHMM3SearchSeqDomainResult >& domainResList = res.domainResList;
- assert( domainResList.isEmpty() );
-
- int nDomains = res.fullSeqResult.reportedDomainsNum;
- int i = 0;
- for( i = 0; i < nDomains; ++i ) {
- readLine( io.data(), buf, &tokens );
- domainResList << getDomainRes( tokens );
- }
- break;
- }
- } while ( 1 );
- return res;
-}
-
-Task::ReportResult GTest_UHMM3SearchCompare::report() {
- assert( !hasError() );
- setAndCheckArgs();
- if( hasError() ) {
- return ReportResult_Finished;
- }
-
- UHMM3SearchResult trueRes;
- try {
- trueRes = getOriginalSearchResult( trueOutFilename );
- } catch( const QString& ex ) {
- stateInfo.setError( ex );
- } catch(...) {
- stateInfo.setError( "undefined_error_occurred" );
- }
-
- if( hasError() ) {
- return ReportResult_Finished;
- }
-
- switch( algo ) {
- case GENERAL_SEARCH:{
- assert( NULL != generalTask );
- QList<UHMM3SearchResult> res = generalTask->getResult();
- if(res.size() < 1){
- stateInfo.setError("no result");
- return ReportResult_Finished;
- }
- generalCompareResults(res.first() , trueRes, stateInfo );
- break;
- }
- case SEQUENCE_WALKER_SEARCH:
- {
- QList<UHMM3SWSearchTaskDomainResult> result;
- if( NULL != swTask ) {
- result = swTask->getResults();
- } else {
- assert( false );
- }
- qSort(trueRes.domainResList.begin(), trueRes.domainResList.end(), searchResultLessThan);
- swCompareResults( result, trueRes, stateInfo );
- }
- break;
- default:
- assert( 0 && "unknown_algo_type" );
- }
-
- return ReportResult_Finished;
-}
-
-} // U2
diff --git a/src/plugins_3rdparty/hmm3/src/tests/uhmmer3SearchTests.h b/src/plugins_3rdparty/hmm3/src/tests/uhmmer3SearchTests.h
deleted file mode 100644
index 96c08f3..0000000
--- a/src/plugins_3rdparty/hmm3/src/tests/uhmmer3SearchTests.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#ifndef _GB2_UHMMER3_SEARCH_TESTS_H_
-#define _GB2_UHMMER3_SEARCH_TESTS_H_
-
-#include <QtXml/QDomElement>
-
-#include <U2Test/XMLTestUtils.h>
-#include <search/uHMM3SearchTask.h>
-
-namespace U2 {
-
-
-/*****************************************
-* Test for hmmer3 search.
-*
-* we test here only files with one sequences. no multi-queries
-*
-* NOTE: you should make hmmer search output files print with 5 signs after point e.g. 0.12345
-* NOTE: you should make p7_pli_TargetReportable, p7_pli_DomainReportable report by double, not float (in original hmmer3 code!!!)
-*****************************************/
-enum GTest_UHMM3SearchAlgoType {
- UNKNOWN_SEARCH = -1,
- GENERAL_SEARCH,
- SEQUENCE_WALKER_SEARCH
-};// GTest_UHMM3SearchAlType
-
-class GTest_UHMM3Search : public GTest {
- Q_OBJECT
-public:
- static const QString SEQ_DOC_CTX_NAME_TAG; /* loaded sequence document */
- static const QString HMM_FILENAME_TAG;
- static const QString HMMSEARCH_TASK_CTX_NAME_TAG; /* finished UHMM3SearchTask */
- static const QString ALGORITHM_TYPE_OPTION_TAG;
- static const QString SW_CHUNK_SIZE_OPTION_TAG;
- /* reporting thresholds options */
- static const QString SEQ_E_OPTION_TAG; /* -E */
- static const QString SEQ_T_OPTION_TAG; /* -T */
- static const QString Z_OPTION_TAG; /* -Z */
- static const QString DOM_E_OPTION_TAG; /* --domE */
- static const QString DOM_T_OPTION_TAG; /* --domT */
- static const QString DOM_Z_OPTION_TAG; /* --domZ */
- static const QString USE_BIT_CUTOFFS_OPTION_TAG; /* --cut_ga, --cut_nc, --cut_tc or none */
- /* significance thresholds options */
- static const QString INC_SEQ_E_OPTION_TAG; /* --incE */
- static const QString INC_SEQ_T_OPTION_TAG; /* --incT */
- static const QString INC_DOM_E_OPTION_TAG; /* --incdomE */
- static const QString INC_DOM_T_OPTION_TAG; /* --incdomT */
- /* acceleration heuristics options */
- static const QString MAX_OPTION_TAG; /* --max */
- static const QString F1_OPTION_TAG; /* --F1 */
- static const QString F2_OPTION_TAG; /* --F2 */
- static const QString F3_OPTION_TAG; /* --F3 */
- static const QString NOBIAS_OPTION_TAG; /* --nobias */
- static const QString NONULL2_OPTION_TAG; /* --nonull2 */
- static const QString SEED_OPTION_TAG; /* --seed */
-
- static const QString REMOTE_MACHINE_VAR;
-
- static void setSearchTaskSettings( UHMM3SearchSettings& set, const QDomElement& el, TaskStateInfo& si );
-
-public:
- SIMPLE_XML_TEST_BODY_WITH_FACTORY( GTest_UHMM3Search, "hmm3-search" );
- ~GTest_UHMM3Search();
-
- void prepare();
- ReportResult report();
- void cleanup();
-
- virtual QList< Task* > onSubTaskFinished( Task * sub );
-
-private:
- void setAndCheckArgs();
-
-private:
- UHMM3SearchTaskSettings settings;
- QString hmmFilename;
- Task* searchTaskToCtx; /* general or sw or RemoteTask. we will add it to context */
- QString searchTaskCtxName;
- DNASequence sequence;
- QString seqDocCtxName;
- GTest_UHMM3SearchAlgoType algo;
- int swChunk;
- bool cleanuped;
-
- QString machinePath;
- P7_HMM * hmm;
- bool ctxAdded;
-
-}; // GTest_GeneralUHMM3Search
-
-/*****************************************
-* Test compares original hmmer3 search results with UHMM3SearchResults
-*
-* Note, that you should make original hmmer3 to show results in academic version (e.g. 1.01e-23)
-*****************************************/
-class GTest_UHMM3SearchCompare : public GTest {
- Q_OBJECT
-public:
- static const QString SEARCH_TASK_CTX_NAME_TAG;
- static const QString TRUE_OUT_FILE_TAG; /* file with original hmmer3 output */
-
- static UHMM3SearchResult getOriginalSearchResult( const QString & filename );
- static void generalCompareResults( const UHMM3SearchResult& myRes, const UHMM3SearchResult& trueRes, TaskStateInfo& ti );
- static void swCompareResults( const QList<UHMM3SWSearchTaskDomainResult>& myR, const UHMM3SearchResult& trueR,
- TaskStateInfo& ti, bool compareSeqRegion = false );
- static bool searchResultLessThan(const UHMM3SearchSeqDomainResult & r1, const UHMM3SearchSeqDomainResult & r2);
-
-public:
- SIMPLE_XML_TEST_BODY_WITH_FACTORY( GTest_UHMM3SearchCompare, "hmm3-search-compare" );
- ReportResult report();
-
-private:
- void setAndCheckArgs();
-
-private:
- QString searchTaskCtxName;
- QString trueOutFilename;
- UHMM3LoadProfileAndSearchTask *generalTask;
- UHMM3SWSearchTask* swTask;
- GTest_UHMM3SearchAlgoType algo;
-
-}; // GTest_GeneralUHMM3SearchCompare
-
-}
-
-#endif // _GB2_UHMMER3_SEARCH_TESTS_H_
diff --git a/src/plugins_3rdparty/hmm3/src/tests/uhmmer3Tests.cpp b/src/plugins_3rdparty/hmm3/src/tests/uhmmer3Tests.cpp
deleted file mode 100644
index de784ab..0000000
--- a/src/plugins_3rdparty/hmm3/src/tests/uhmmer3Tests.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include "uhmmer3BuildTests.h"
-#include "uhmmer3SearchTests.h"
-#include "uhmmer3PhmmerTests.h"
-
-#include "uhmmer3Tests.h"
-
-namespace U2 {
-
-QList< XMLTestFactory* > UHMMER3Tests::createTestFactories() {
- QList< XMLTestFactory* > res;
- res << GTest_CompareHmmFiles::createFactory();
- res << GTest_UHMMER3Build::createFactory();
- res << GTest_UHMM3Search::createFactory();
- res << GTest_UHMM3SearchCompare::createFactory();
- res << GTest_UHMM3Phmmer::createFactory();
- res << GTest_UHMM3PhmmerCompare::createFactory();
- return res;
-}
-
-} // GB2
diff --git a/src/plugins_3rdparty/hmm3/src/uHMM3Plugin.cpp b/src/plugins_3rdparty/hmm3/src/uHMM3Plugin.cpp
deleted file mode 100644
index dd08d19..0000000
--- a/src/plugins_3rdparty/hmm3/src/uHMM3Plugin.cpp
+++ /dev/null
@@ -1,293 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include <QAction>
-#include <QMenu>
-#include <QMessageBox>
-
-#include <U2Core/AppContext.h>
-#include <U2Core/GAutoDeleteList.h>
-#include <U2Core/GObjectSelection.h>
-#include <U2Core/MAlignmentObject.h>
-
-#include <U2Gui/GUIUtils.h>
-#include <U2Gui/MainWindow.h>
-#include <U2Gui/ObjectViewModel.h>
-#include <U2Gui/ProjectView.h>
-#include <U2Gui/ToolsMenu.h>
-#include <U2Core/QObjectScopedPointer.h>
-
-#include <U2View/ADVConstants.h>
-#include <U2View/ADVSequenceObjectContext.h>
-#include <U2View/ADVUtils.h>
-#include <U2View/AnnotatedDNAView.h>
-#include <U2View/MSAEditor.h>
-#include <U2View/MSAEditorFactory.h>
-
-#include <U2Test/GTestFrameworkComponents.h>
-#include <U2Test/XMLTestFormat.h>
-
-#include "uHMM3Plugin.h"
-#include "build/uHMM3BuildDialogImpl.h"
-#include "format/uHMMFormat.h"
-#include "phmmer/uHMM3PhmmerDialogImpl.h"
-#include "search/uHMM3SearchDialogImpl.h"
-#include "search/uhmm3QDActor.h"
-#include "tests/uhmmer3Tests.h"
-#include "workers/HMM3IOWorker.h"
-
-Q_DECLARE_METATYPE(QMenu*);
-
-namespace U2 {
-
-/****************************************
- * UHMM3Plugin
- ****************************************/
-
-extern "C" Q_DECL_EXPORT Plugin* U2_PLUGIN_INIT_FUNC() {
- UHMM3Plugin * plug = new UHMM3Plugin();
- return plug;
-}
-
-UHMM3Plugin::UHMM3Plugin() : Plugin( tr( "HMM3" ), tr( "HMM profile tools. Plugin is based on HMMER 3.0b3 package: freely distributable implementation of profile HMM software for protein sequence analysis. Home page of project: http://hmmer.janelia.org/" ) ) {
- // UHMMFormat
- DocumentFormatRegistry* dfRegistry = AppContext::getDocumentFormatRegistry();
- assert( NULL != dfRegistry );
- bool ok = dfRegistry->registerFormat( new UHMMFormat( dfRegistry ) );
- assert( ok ); Q_UNUSED( ok );
-
- LocalWorkflow::HMM3Lib::init();
-
- QDActorPrototypeRegistry* qdpr = AppContext::getQDActorProtoRegistry();
- qdpr->registerProto(new UHMM3QDActorPrototype());
-
- // Tests
- GTestFormatRegistry* tfr = AppContext::getTestFramework()->getTestFormatRegistry();
- assert( NULL != tfr );
- XMLTestFormat *xmlTestFormat = qobject_cast<XMLTestFormat*>(tfr->findFormat("XML"));
- assert( NULL != xmlTestFormat );
-
- GAutoDeleteList<XMLTestFactory>* l = new GAutoDeleteList<XMLTestFactory>(this);
- l->qlist = UHMMER3Tests::createTestFactories();
-
- foreach( XMLTestFactory* f, l->qlist ) {
- bool res = xmlTestFormat->registerTestFactory( f );
- assert(res);
- Q_UNUSED(res);
- }
-
- // HMMER3 menu
- MainWindow * mainWnd = AppContext::getMainWindow();
- if( mainWnd ) {
- QAction * buildAction = new QAction( tr( "Build HMM3 profile..." ), this );
- buildAction->setObjectName(ToolsMenu::HMMER_BUILD3);
- connect( buildAction, SIGNAL( triggered() ), SLOT( sl_buildProfile() ) );
- ToolsMenu::addAction(ToolsMenu::HMMER_MENU, buildAction);
-
- QAction * searchAction = new QAction( tr( "Search with HMMER3..." ), this );
- searchAction->setObjectName(ToolsMenu::HMMER_SEARCH3);
- connect( searchAction, SIGNAL( triggered() ), SLOT( sl_searchHMMSignals() ) );
- ToolsMenu::addAction(ToolsMenu::HMMER_MENU, searchAction);
-
- QAction * phmmerAction = new QAction( tr( "Search with HMMER3 phmmer..." ), this );
- phmmerAction->setObjectName(ToolsMenu::HMMER_SEARCH3P);
- connect( phmmerAction, SIGNAL( triggered() ), SLOT( sl_phmmerSearch() ) );
- ToolsMenu::addAction(ToolsMenu::HMMER_MENU, phmmerAction);
-
- // contexts
- msaEditorCtx = new UHMM3MSAEditorContext( this );
- msaEditorCtx->init();
-
- advCtx = new UHMM3ADVContext( this );
- advCtx->init();
- }
-}
-
-void UHMM3Plugin::sl_buildProfile() {
- MAlignment ma;
- MWMDIWindow* w = AppContext::getMainWindow()->getMDIManager()->getActiveWindow();
- if( NULL != w ) {
- GObjectViewWindow* ow = qobject_cast< GObjectViewWindow* >( w );
- if( NULL != ow ) {
- GObjectView* ov = ow->getObjectView();
- MSAEditor * av = qobject_cast< MSAEditor* >( ov );
- if( NULL != av ) {
- MAlignmentObject* maObj = av->getMSAObject();
- if (maObj != NULL)
- ma = maObj->getMAlignment();
- }
- }
- }
- QWidget *p = (QWidget*)AppContext::getMainWindow()->getQMainWindow();
-
- QObjectScopedPointer<UHMM3BuildDialogImpl> buildDlg = new UHMM3BuildDialogImpl( ma, p );
- buildDlg->exec();
-}
-
-/* TODO: same as in function sl_search in uHMMPlugin.cpp */
-U2SequenceObject * UHMM3Plugin::getDnaSequenceObject() const {
- U2SequenceObject * seqObj = NULL;
- GObjectViewWindow * activeWnd = qobject_cast< GObjectViewWindow* >( AppContext::getMainWindow()->getMDIManager()->getActiveWindow() );
- if( NULL != activeWnd ) {
- AnnotatedDNAView * dnaView = qobject_cast< AnnotatedDNAView* >( activeWnd->getObjectView() );
- seqObj = NULL != dnaView ? dnaView->getSequenceInFocus()->getSequenceObject() : NULL;
- }
- if( NULL == seqObj ) {
- ProjectView * projView = AppContext::getProjectView();
- if( NULL != projView ) {
- const GObjectSelection * objSelection = projView->getGObjectSelection();
- GObject* obj = objSelection->getSelectedObjects().size() == 1 ? objSelection->getSelectedObjects().first() : NULL;
- seqObj = qobject_cast< U2SequenceObject* >( obj );
- }
- }
- return seqObj;
-}
-
-void UHMM3Plugin::sl_searchHMMSignals() {
- U2SequenceObject * seqObj = getDnaSequenceObject();
- if( NULL == seqObj ) {
- QMessageBox::critical( NULL, tr( "Error!" ), tr( "Target sequence not selected: no opened annotated dna view" ) );
- return;
- }
- QWidget *p = (QWidget*)AppContext::getMainWindow()->getQMainWindow();
- QObjectScopedPointer<UHMM3SearchDialogImpl> searchDlg = new UHMM3SearchDialogImpl( seqObj, p );
- searchDlg->exec();
-}
-
-void UHMM3Plugin::sl_phmmerSearch() {
- U2SequenceObject * seqObj = getDnaSequenceObject();
- if( NULL == seqObj ) {
- QMessageBox::critical( NULL, tr( "Error!" ), tr( "Target sequence not selected: no opened annotated dna view" ) );
- return;
- }
- QWidget *p = (QWidget*)AppContext::getMainWindow()->getQMainWindow();
- QObjectScopedPointer<UHMM3PhmmerDialogImpl> phmmerDlg = new UHMM3PhmmerDialogImpl( seqObj, p );
- phmmerDlg->exec();
-}
-
-UHMM3Plugin::~UHMM3Plugin() {
-}
-
-/****************************************
-* UHMM3MSAEditorContext
-****************************************/
-
-UHMM3MSAEditorContext::UHMM3MSAEditorContext( QObject * p ) : GObjectViewWindowContext( p, MSAEditorFactory::ID ) {
-}
-
-void UHMM3MSAEditorContext::initViewContext( GObjectView * view ) {
- MSAEditor* msaed = qobject_cast<MSAEditor*>(view);
- assert(msaed!=NULL);
- if (msaed->getMSAObject() == NULL)
- return;
-
- GObjectViewAction * action = new GObjectViewAction( this, view, tr("Build HMMER3 profile") );
- action->setObjectName("Build HMMER3 profile");
- action->setIcon( QIcon( ":/hmm3/images/hmmer_16.png" ) );
- connect( action, SIGNAL( triggered() ), SLOT( sl_build() ) );
- addViewAction( action );
-}
-
-void UHMM3MSAEditorContext::buildMenu( GObjectView * v, QMenu * m ) {
- MSAEditor* msaed = qobject_cast<MSAEditor*>(v);
- assert( NULL != msaed && NULL != m );
- if (msaed->getMSAObject() == NULL)
- return;
-
- QList< GObjectViewAction* > list = getViewActions( v );
- assert( 1 == list.size() );
- QMenu* aMenu = GUIUtils::findSubMenu( m, MSAE_MENU_ADVANCED );
- SAFE_POINT(aMenu != NULL, "aMenu", );
- aMenu->addAction( list.first() );
-}
-
-void UHMM3MSAEditorContext::sl_build() {
- GObjectViewAction * action = qobject_cast< GObjectViewAction* >( sender() );
- assert( NULL != action );
- MSAEditor * ed = qobject_cast< MSAEditor* >( action->getObjectView() );
- assert( NULL != ed );
- MAlignmentObject * obj = ed->getMSAObject();
- if (obj != NULL) {
- QObjectScopedPointer<UHMM3BuildDialogImpl> buildDlg = new UHMM3BuildDialogImpl( obj->getMAlignment() );
- buildDlg->exec();
- CHECK(!buildDlg.isNull(), );
- }
-}
-
-/****************************************
-* UHMM3ADVContext
-****************************************/
-
-UHMM3ADVContext::UHMM3ADVContext( QObject * p ) : GObjectViewWindowContext( p, ANNOTATED_DNA_VIEW_FACTORY_ID ) {
-}
-
-void UHMM3ADVContext::initViewContext( GObjectView * view ) {
- assert( NULL != view );
- AnnotatedDNAView * av = qobject_cast< AnnotatedDNAView* >( view );
-
- ADVGlobalAction * searchAction = new ADVGlobalAction( av, QIcon( ":/hmm3/images/hmmer_16.png" ),
- tr( "Find HMM signals with HMMER3..." ), 70 );
- searchAction->setObjectName("Find HMM signals with HMMER3");
- connect( searchAction, SIGNAL( triggered() ), SLOT( sl_search() ) );
-}
-
-void UHMM3ADVContext::sl_search() {
- QWidget * parent = getParentWidget( sender() );
- assert( NULL != parent );
- U2SequenceObject * seqObj = getSequenceInFocus( sender() );
- if( NULL == seqObj ) {
- QMessageBox::critical( parent, tr( "error" ), tr( "No sequence in focus found" ) );
- return;
- }
-
- QObjectScopedPointer<UHMM3SearchDialogImpl> searchDlg = new UHMM3SearchDialogImpl( seqObj, parent );
- searchDlg->exec();
-}
-
-QWidget * UHMM3ADVContext::getParentWidget( QObject * sender ) {
- assert( NULL != sender );
- GObjectViewAction * action = qobject_cast< GObjectViewAction* >( sender );
- assert( NULL != action );
- AnnotatedDNAView * av = qobject_cast< AnnotatedDNAView* >( action->getObjectView() );
- assert( NULL != av );
-
- if( av->getWidget() ){
- return av->getWidget();
- } else {
- return ( QWidget* )AppContext::getMainWindow()->getQMainWindow();
- }
- return NULL; // unreachable code
-}
-
-U2SequenceObject * UHMM3ADVContext::getSequenceInFocus( QObject * sender ) {
- assert( NULL != sender );
- GObjectViewAction * action = qobject_cast< GObjectViewAction* >( sender );
- assert( NULL != action );
- AnnotatedDNAView * av = qobject_cast< AnnotatedDNAView* >( action->getObjectView() );
- assert( NULL != av );
- ADVSequenceObjectContext* seqCtx = av->getSequenceInFocus();
- if( NULL == seqCtx ) {
- return NULL;
- }
- return seqCtx->getSequenceObject();
-}
-
-} // GB2
diff --git a/src/plugins_3rdparty/hmm3/src/uHMM3Plugin.h b/src/plugins_3rdparty/hmm3/src/uHMM3Plugin.h
deleted file mode 100644
index 08a8221..0000000
--- a/src/plugins_3rdparty/hmm3/src/uHMM3Plugin.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#ifndef _GB2_UHMMER3_PLUGIN_H_
-#define _GB2_UHMMER3_PLUGIN_H_
-
-#include <U2Core/PluginModel.h>
-#include <U2Core/AppContext.h>
-#include <U2Core/DNASequenceObject.h>
-#include <U2Gui/ObjectViewModel.h>
-
-namespace U2 {
-
-class UHMM3MSAEditorContext;
-class UHMM3ADVContext;
-
-class UHMM3Plugin : public Plugin {
- Q_OBJECT
-public:
- UHMM3Plugin();
- ~UHMM3Plugin();
-
-private:
- U2SequenceObject * getDnaSequenceObject() const;
-
-private slots:
- void sl_buildProfile();
- void sl_searchHMMSignals();
- void sl_phmmerSearch();
-
-private:
- UHMM3MSAEditorContext * msaEditorCtx;
- UHMM3ADVContext * advCtx;
-
-}; // uHMM3Plugin
-
-class UHMM3MSAEditorContext : public GObjectViewWindowContext {
- Q_OBJECT
-public:
- UHMM3MSAEditorContext( QObject * p );
-
-protected slots:
- void sl_build();
-
-protected:
- virtual void initViewContext( GObjectView * view );
- virtual void buildMenu( GObjectView * v, QMenu * m );
-
-}; // UHMM3MSAEditorContext
-
-class UHMM3ADVContext : public GObjectViewWindowContext {
- Q_OBJECT
-public:
- UHMM3ADVContext( QObject * p );
-
-protected slots:
- void sl_search();
-
-private:
- QWidget * getParentWidget( QObject * sender );
- U2SequenceObject * getSequenceInFocus( QObject * sender );
-
-protected:
- virtual void initViewContext( GObjectView * view );
-
-}; // UHMM3ADVContext
-
-} // U2
-
-#endif // _GB2_UHMMER3_PLUGIN_H_
diff --git a/src/plugins_3rdparty/hmm3/src/util/uhmm3Utilities.cpp b/src/plugins_3rdparty/hmm3/src/util/uhmm3Utilities.cpp
deleted file mode 100644
index 229f3a8..0000000
--- a/src/plugins_3rdparty/hmm3/src/util/uhmm3Utilities.cpp
+++ /dev/null
@@ -1,246 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include <cassert>
-
-#include <U2Core/MAlignmentInfo.h>
-#include <U2Core/SMatrix.h>
-#include <U2Core/U2OpStatusUtils.h>
-#include <U2Core/AppContext.h>
-#include <U2Core/IOAdapterUtils.h>
-
-#include <gobject/uHMMObject.h>
-#include <format/uHMMFormat.h>
-#include "uhmm3Utilities.h"
-
-using namespace U2;
-
-const char TERM_SYM = '\0';
-
-static void copyData( const QByteArray& from, char* to ) {
- assert( NULL != to );
- int sz = from.size();
- const char* fromData = from.data();
- qCopy( fromData, fromData + sz, to );
- to[sz] = TERM_SYM;
-}
-
-static bool allocAndCopyData( const QByteArray& from, char** to ) {
- assert( NULL != to );
- *to = (char*)calloc( sizeof( char ), from.size() + 1 );
- if( NULL == *to ) {
- return false;
- }
- copyData( from, *to );
- return true;
-}
-
-static void setMsaCutoffs( const QVariantMap& info, ESL_MSA* msa, MAlignmentInfo::Cutoffs cof ) {
- assert( NULL != msa );
- if( MAlignmentInfo::hasCutoff( info, cof ) ) {
- int ind = static_cast< int >( cof );
- msa->cutoff[ind] = MAlignmentInfo::getCutoff( info, cof );
- msa->cutset[ind] = TRUE;
- }
-}
-
-static bool convertMalignmentInfo( const QVariantMap& info, ESL_MSA* msa ) {
- assert( NULL != msa );
- bool ok = false;
-
- if( !MAlignmentInfo::hasName( info ) ) {
- return false;
- }
- QString name = MAlignmentInfo::getName( info );
- assert( !name.isEmpty() );
- ok = allocAndCopyData( name.toLatin1(), &msa->name );
- if( !ok ) {
- return false;
- }
-
- if( MAlignmentInfo::hasAccession( info ) ) {
- QString acc = MAlignmentInfo::getAccession( info );
- assert( !acc.isEmpty() );
- ok = allocAndCopyData( acc.toLatin1(), &msa->acc );
- if( !ok ) {
- return false;
- }
- }
- if( MAlignmentInfo::hasDescription( info ) ) {
- QString desc = MAlignmentInfo::getDescription( info );
- assert( !desc.isEmpty() );
- ok = allocAndCopyData( desc.toLatin1(), &msa->desc );
- if( !ok ) {
- return false;
- }
- }
- if( MAlignmentInfo::hasSSConsensus( info ) ) {
- QString cs = MAlignmentInfo::getSSConsensus( info );
- assert( !cs.isEmpty() );
- ok = allocAndCopyData( cs.toLatin1(), &msa->ss_cons );
- if( !ok ) {
- return false;
- }
- }
- if( MAlignmentInfo::hasReferenceLine( info ) ) {
- QString rf = MAlignmentInfo::getReferenceLine( info );
- assert( !rf.isEmpty() );
- ok = allocAndCopyData( rf.toLatin1(), &msa->rf );
- if( !ok ) {
- return false;
- }
- }
- setMsaCutoffs( info, msa, MAlignmentInfo::CUTOFF_GA1 );
- setMsaCutoffs( info, msa, MAlignmentInfo::CUTOFF_GA2 );
- setMsaCutoffs( info, msa, MAlignmentInfo::CUTOFF_NC1 );
- setMsaCutoffs( info, msa, MAlignmentInfo::CUTOFF_NC2 );
- setMsaCutoffs( info, msa, MAlignmentInfo::CUTOFF_TC1 );
- setMsaCutoffs( info, msa, MAlignmentInfo::CUTOFF_TC2 );
-
- return true;
-}
-
-
-namespace U2 {
-
-ESL_SCOREMATRIX * UHMM3Utilities::convertScoreMatrix( const SMatrix& it ) {
- if (it.isEmpty()) {
- return NULL;
- }
- assert( false );
- return NULL;
-}
-
-int UHMM3Utilities::convertAlphabetType( const DNAAlphabet * al ) {
- assert( NULL != al );
- DNAAlphabetType alType = al->getType();
- int ret = 0;
-
- switch( alType ) {
- case DNAAlphabet_RAW:
- ret = eslNONSTANDARD;
- break;
- case DNAAlphabet_AMINO:
- assert( BaseDNAAlphabetIds::AMINO_DEFAULT() == al->getId() );
- ret = eslAMINO;
- break;
- case DNAAlphabet_NUCL:
- {
- QString alId = al->getId();
- if( BaseDNAAlphabetIds::NUCL_DNA_DEFAULT() == alId || BaseDNAAlphabetIds::NUCL_DNA_EXTENDED() == alId ) {
- ret = eslDNA;
- } else if( BaseDNAAlphabetIds::NUCL_RNA_DEFAULT() == alId || BaseDNAAlphabetIds::NUCL_RNA_EXTENDED() == alId ) {
- ret = eslRNA;
- } else {
- ret = BAD_ALPHABET;
- }
- }
- break;
- default:
- ret = BAD_ALPHABET;
- }
-
- return ret;
-}
-
-ESL_MSA * UHMM3Utilities::convertMSA( const MAlignment & ma ) {
- ESL_MSA * msa = NULL;
- int i = 0;
- bool ok = false;
- int nseq = ma.getNumRows();
- int alen = ma.getLength();
-
- assert( 0 < nseq && 0 < alen );
-
- msa = esl_msa_Create( nseq, alen );
- if( NULL == msa ) {
- return NULL;
- }
- msa->nseq = nseq;
- for (i = 0; i < nseq; i++) {
- const MAlignmentRow& row = ma.getRow(i);
- ok = allocAndCopyData( row.getName().toLatin1(), &msa->sqname[i] );
- if( !ok ) {
- esl_msa_Destroy( msa );
- return NULL;
- }
- U2OpStatus2Log os;
- QByteArray sequence = row.toByteArray(ma.getLength(), os);
- copyData(sequence, msa->aseq[i] );
- }
-
- ok = convertMalignmentInfo( ma.getInfo(), msa );
- if( !ok ) {
- esl_msa_Destroy( msa );
- return NULL;
- }
-
- return msa;
-}
-
-QList<const P7_HMM *> UHMM3Utilities::getHmmsFromDocument( Document* doc, TaskStateInfo& ti ){
- QList<const P7_HMM *> res;
- SAFE_POINT( NULL != doc, "UHMM3Utilities::getHmmsFromDocument:: doc is NULL", res);
-
- const QList<GObject*>& gobjects = doc->getObjects();
- foreach(GObject* gobj, gobjects){
- UHMMObject* obj = qobject_cast< UHMMObject* >( gobj );
- if( NULL != obj ) {
- res.append((P7_HMM*)obj->getHMM());
- }
- }
-
- if (res.isEmpty()){
- ti.setError( "no_hmm_found_in_file" );
- }
- return res;
-}
-
-QList< GObject* > UHMM3Utilities::getDocObjects( const QList< const P7_HMM* >& hmms ){
- QList< GObject* > res;
- foreach(const P7_HMM* hmm, hmms ) {
- res.append( new UHMMObject( const_cast<P7_HMM*>(hmm), QString( hmm->name ) ) );
- }
- return res;
-}
-
-Document * UHMM3Utilities::getSavingDocument( const QList<const P7_HMM* >& hmms, const QString & outfile ){
- assert( !hmms.isEmpty() );
- QList< GObject* > docObjects = getDocObjects( hmms );
- UHMMFormat* hmmFrmt = qobject_cast< UHMMFormat* >
- ( AppContext::getDocumentFormatRegistry()->getFormatById( UHMMFormat::UHHMER_FORMAT_ID ) );
- assert( NULL != hmmFrmt );
-
- IOAdapterFactory* iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById( IOAdapterUtils::url2io( outfile ) );
- assert( NULL != iof );
-
- U2OpStatus2Log os;
- QVariantMap hints;
- hints.insert(DocumentFormat::DBI_REF_HINT, qVariantFromValue(U2DbiRef()));
- Document* doc = hmmFrmt->createNewLoadedDocument(iof, outfile, os, hints);
- CHECK_OP(os, NULL);
- foreach(GObject* obj, docObjects) {
- doc->addObject(obj);
- }
- return doc;
-}
-
-} // U2
diff --git a/src/plugins_3rdparty/hmm3/src/util/uhmm3Utilities.h b/src/plugins_3rdparty/hmm3/src/util/uhmm3Utilities.h
deleted file mode 100644
index 3412fc9..0000000
--- a/src/plugins_3rdparty/hmm3/src/util/uhmm3Utilities.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#ifndef _GB2_UHMM3_UTILITIES_H_
-#define _GB2_UHMM3_UTILITIES_H_
-
-#include <U2Core/DocumentModel.h>
-#include <U2Core/DNAAlphabet.h>
-#include <U2Core/MAlignment.h>
-
-#include <hmmer3/hmmer.h>
-#include <hmmer3/easel/esl_scorematrix.h>
-#include <hmmer3/easel/esl_msa.h>
-
-namespace U2 {
-
-class SMatrix;
-
-class UHMM3Utilities {
-public:
- static const int BAD_ALPHABET = -1;
-
-public:
- static ESL_SCOREMATRIX * convertScoreMatrix( const SMatrix& it );
-
- static int convertAlphabetType( const DNAAlphabet * al );
-
- static ESL_MSA * convertMSA( const MAlignment & ma );
-
- static QList<const P7_HMM *> getHmmsFromDocument( Document* doc, TaskStateInfo& ti );
-
- static QList< GObject* > getDocObjects( const QList<const P7_HMM* >& hmms );
-
- static Document * getSavingDocument( const QList<const P7_HMM* >& hmms, const QString & outfile );
-
-
-}; // UHMM3Utilities
-
-} // U2
-
-#endif // _GB2_UHMM3_UTILITIES_H_
diff --git a/src/plugins_3rdparty/hmm3/src/workers/HMM3IOWorker.cpp b/src/plugins_3rdparty/hmm3/src/workers/HMM3IOWorker.cpp
deleted file mode 100644
index 8b881b8..0000000
--- a/src/plugins_3rdparty/hmm3/src/workers/HMM3IOWorker.cpp
+++ /dev/null
@@ -1,322 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include "HMM3IOWorker.h"
-#include "HMM3BuildWorker.h"
-#include "HMM3SearchWorker.h"
-
-#include <U2Lang/Datatype.h>
-#include <U2Lang/IntegralBusModel.h>
-#include <U2Lang/WorkflowEnv.h>
-#include <U2Lang/ActorPrototypeRegistry.h>
-#include <U2Designer/DelegateEditors.h>
-#include <U2Lang/CoreLibConstants.h>
-#include <U2Lang/BaseTypes.h>
-#include <U2Lang/BaseSlots.h>
-#include <U2Lang/BaseAttributes.h>
-
-#include <U2Core/AppContext.h>
-#include <U2Core/IOAdapter.h>
-#include <U2Core/Log.h>
-#include <U2Core/GUrlUtils.h>
-#include <U2Core/FailTask.h>
-#include <U2Core/TaskSignalMapper.h>
-#include <U2Core/IOAdapterUtils.h>
-#include <U2Core/LoadDocumentTask.h>
-
-#include <util/uhmm3Utilities.h>
-#include <format/uHMMFormat.h>
-#include <gobject/uHMMObject.h>
-#include <build/uHMM3BuildTask.h>
-
-#include <U2Gui/DialogUtils.h>
-
-
-/* TRANSLATOR U2::LocalWorkflow::HMM3Lib */
-
-namespace U2 {
-namespace LocalWorkflow {
-
-const QString HMM3_ID("hmm3");
-const QString HMM3_EXT("hmm");
-const QString HMM3Reader::ACTOR("hmm3-read-profile");
-const QString HMM3Writer::ACTOR("hmm3-write-profile");
-static const QString HMM3_OUT_PORT_ID("out-hmm3");
-static const QString HMM3_IN_PORT_ID("in-hmm3");
-const QString HMM3Lib::HMM3_PROFILE_TYPE_ID("hmm3.profile");
-
-DataTypePtr HMM3Lib::HMM3_PROFILE_TYPE() {
- DataTypeRegistry* dtr = WorkflowEnv::getDataTypeRegistry();
- assert(dtr);
- static bool startup = true;
- if (startup)
- {
- dtr->registerEntry(DataTypePtr(new DataType(HMM3_PROFILE_TYPE_ID, tr("HMM3 Profile"), "")));
- startup = false;
- }
- return dtr->getById(HMM3_PROFILE_TYPE_ID);
-}
-
-const Descriptor HMM3Lib::HMM3_SLOT("hmm3-profile", QObject::tr("HMM3 Profile"), "");
-
-const Descriptor HMM3Lib::HMM3_CATEGORY() {return Descriptor("hmmer3", tr("HMMER3 Tools"), "");}
-
-HMM3IOProto::HMM3IOProto(const Descriptor& _desc, const QList<PortDescriptor*>& _ports, const QList<Attribute*>& _attrs )
-: IntegralBusActorPrototype(_desc, _ports, _attrs) {
-}
-
-bool HMM3IOProto::isAcceptableDrop(const QMimeData * md, QVariantMap * params, const QString & urlAttrId ) const {
- if (md->hasUrls()) {
- QList<QUrl> urls = md->urls();
- if (urls.size() == 1)
- {
- QString url = urls.at(0).toLocalFile();
- QString ext = GUrlUtils::getUncompressedExtension(url);
- if (ext == HMM3_EXT) {
- if (params) {
- params->insert(urlAttrId, url);
- }
- return true;
- }
- }
- }
- return false;
-}
-
-ReadHMM3Proto::ReadHMM3Proto(const Descriptor& _desc, const QList<PortDescriptor*>& _ports, const QList<Attribute*>& _attrs )
-: HMM3IOProto( _desc, _ports, _attrs ) {
-
- attrs << new Attribute(BaseAttributes::URL_IN_ATTRIBUTE(), BaseTypes::STRING_TYPE(), true);
- QMap<QString, PropertyDelegate*> delegateMap;
- delegateMap[BaseAttributes::URL_IN_ATTRIBUTE().getId()] = new URLDelegate( DialogUtils::prepareDocumentsFileFilterByObjType( UHMMObject::UHMM_OT, true), HMM3_ID, true, false, false );
- setEditor(new DelegateEditor(delegateMap));
- setIconPath( ":/hmm3/images/hmmer_16.png" );
-}
-
-bool ReadHMM3Proto::isAcceptableDrop(const QMimeData * md, QVariantMap * params ) const {
- return HMM3IOProto::isAcceptableDrop( md, params, BaseAttributes::URL_IN_ATTRIBUTE().getId() );
-}
-
-WriteHMM3Proto::WriteHMM3Proto(const Descriptor& _desc, const QList<PortDescriptor*>& _ports, const QList<Attribute*>& _attrs )
-: HMM3IOProto( _desc, _ports, _attrs ) {
- attrs << new Attribute(BaseAttributes::URL_OUT_ATTRIBUTE(), BaseTypes::STRING_TYPE(), true );
- attrs << new Attribute(BaseAttributes::FILE_MODE_ATTRIBUTE(), BaseTypes::NUM_TYPE(), false, SaveDoc_Roll);
-
- QMap<QString, PropertyDelegate*> delegateMap;
- delegateMap[BaseAttributes::URL_OUT_ATTRIBUTE().getId()] = new URLDelegate(DialogUtils::prepareDocumentsFileFilterByObjType( UHMMObject::UHMM_OT, true), HMM3_ID, false );
- delegateMap[BaseAttributes::FILE_MODE_ATTRIBUTE().getId()] = new FileModeDelegate(false);
-
- setEditor(new DelegateEditor(delegateMap));
- setIconPath( ":/hmm3/images/hmmer_16.png" );
- setValidator(new ScreenedParamValidator(BaseAttributes::URL_OUT_ATTRIBUTE().getId(), ports.first()->getId(), BaseSlots::URL_SLOT().getId()));
- setPortValidator(HMM3_IN_PORT_ID, new ScreenedSlotValidator(BaseSlots::URL_SLOT().getId()));
-}
-
-bool WriteHMM3Proto::isAcceptableDrop(const QMimeData * md, QVariantMap * params ) const {
- return HMM3IOProto::isAcceptableDrop( md, params, BaseAttributes::URL_OUT_ATTRIBUTE().getId() );
-}
-
-void HMM3IOWorkerFactory::init() {
-
- ActorPrototypeRegistry* r = WorkflowEnv::getProtoRegistry();
- assert(r);
- {
- Descriptor id(HMM3_IN_PORT_ID, HMM3Lib::tr("HMM3 profile"), HMM3Lib::tr("Input HMM3 profile"));
- Descriptor ud(BaseSlots::URL_SLOT().getId(), HMM3Lib::tr("Location"), HMM3Lib::tr("Location hint for the target file."));
-
- QMap<Descriptor, DataTypePtr> m;
- m[ud] = BaseTypes::STRING_TYPE();
- m[HMM3Lib::HMM3_SLOT] = HMM3Lib::HMM3_PROFILE_TYPE();
- DataTypePtr t(new MapDataType(Descriptor("write.hmm.content"), m));
-
- QList<PortDescriptor*> p; QList<Attribute*> a;
- p << new PortDescriptor(id, t, true /*input*/);
-
- Descriptor desc(HMM3Writer::ACTOR, HMM3Lib::tr("Write HMM3 Profile"), HMM3Lib::tr("Saves all input HMM3 profiles to specified location."));
- IntegralBusActorPrototype* proto = new WriteHMM3Proto(desc, p, a);
- proto->setPrompter(new HMM3WritePrompter());
- r->registerProto(HMM3Lib::HMM3_CATEGORY(), proto);
- }
- {
- Descriptor od(HMM3_OUT_PORT_ID, HMM3Lib::tr("HMM3 profile"), HMM3Lib::tr("Loaded HMM3 profile"));
-
- QList<PortDescriptor*> p; QList<Attribute*> a;
- QMap<Descriptor, DataTypePtr> outM;
- outM[HMM3Lib::HMM3_SLOT] = HMM3Lib::HMM3_PROFILE_TYPE();
- p << new PortDescriptor(od, DataTypePtr(new MapDataType("hmm.read.out", outM)), false /*output*/, true);
-
- Descriptor desc(HMM3Reader::ACTOR, HMM3Lib::tr("Read HMM3 Profile"), HMM3Lib::tr("Reads HMM3 profiles from file(s). The files can be local or Internet URLs."));
- IntegralBusActorPrototype* proto = new ReadHMM3Proto(desc, p, a);
- proto->setPrompter(new HMM3ReadPrompter());
- r->registerProto(HMM3Lib::HMM3_CATEGORY(), proto);
- }
-
- DomainFactory* localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
- localDomain->registerEntry(new HMM3IOWorkerFactory(HMM3Reader::ACTOR));
- localDomain->registerEntry(new HMM3IOWorkerFactory(HMM3Writer::ACTOR));
-}
-
-QString HMM3ReadPrompter::composeRichDoc() {
- return tr("Read HMM3 profile(s) from %1.").arg(getHyperlink(BaseAttributes::URL_IN_ATTRIBUTE().getId(), getURL(BaseAttributes::URL_IN_ATTRIBUTE().getId())));
-}
-
-QString HMM3WritePrompter::composeRichDoc() {
- IntegralBusPort* input = qobject_cast<IntegralBusPort*>(target->getPort(HMM3_IN_PORT_ID));
- Actor* producer = input->getProducer(HMM3Lib::HMM3_SLOT.getId());
- QString unsetStr = "<font color='red'>"+tr("unset")+"</font>";
- QString producerStr = producer ? producer->getLabel() : unsetStr;
- QString url = getScreenedURL(input, BaseAttributes::URL_OUT_ATTRIBUTE().getId(), BaseSlots::URL_SLOT().getId());
- url = getHyperlink(BaseAttributes::URL_OUT_ATTRIBUTE().getId(), url);
- QString doc = tr("Save HMM3 profile(s) from <u>%1</u> to <u>%2</u>.")
- .arg(producerStr)
- .arg(url);
- return doc;
-}
-
-void HMM3IOWorkerFactory::cleanup() {
- DomainFactory* ld = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
- DomainFactory* f = ld->unregisterEntry(HMM3Reader::ACTOR);
- delete f;
- f = ld->unregisterEntry(HMM3Writer::ACTOR);
- delete f;
-
- ActorPrototypeRegistry* r = WorkflowEnv::getProtoRegistry();
- ActorPrototype* p = r->unregisterProto(HMM3Reader::ACTOR);
- assert(p); delete p;
- p = r->unregisterProto(HMM3Writer::ACTOR);
- assert(p); delete p;
-}
-
-Worker* HMM3IOWorkerFactory::createWorker(Actor* a) {
- BaseWorker* w = NULL;
- if (HMM3Reader::ACTOR == a->getProto()->getId()) {
- w = new HMM3Reader(a);
- }
- else if (HMM3Writer::ACTOR == a->getProto()->getId()) {
- w = new HMM3Writer(a);
- }
-
- return w;
-}
-
-void HMM3Reader::init() {
- output = ports.value(HMM3_OUT_PORT_ID);
- urls = WorkflowUtils::expandToUrls(actor->getParameter(BaseAttributes::URL_IN_ATTRIBUTE().getId())->getAttributeValue<QString>(context));
-}
-
-Task* HMM3Reader::tick() {
- if (urls.isEmpty()) {
- setDone();
- output->setEnded();
- } else {
- const QString& filename = urls.takeFirst();
- IOAdapterFactory* iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById( IOAdapterUtils::url2io( filename ));
- assert( NULL != iof );
- Task* t = new LoadDocumentTask( UHMMFormat::UHHMER_FORMAT_ID, filename, iof, QVariantMap() );
- connect(t, SIGNAL(si_stateChanged()), SLOT(sl_taskFinished()));
- return t;
-
- }
- return NULL;
-}
-
-void HMM3Reader::sl_taskFinished() {
- LoadDocumentTask* t = qobject_cast<LoadDocumentTask*>(sender());
- if (t->getState() != Task::State_Finished) return;
- if (output) {
- if (!t->hasError()) {
- TaskStateInfo os;
- QList<const P7_HMM*> hmms = UHMM3Utilities::getHmmsFromDocument( t->getDocument(), os );
- for(int i = 0; i<hmms.size(); i++){
- QVariant v = qVariantFromValue<const P7_HMM*>(hmms[i]);
- output->put(Message(HMM3Lib::HMM3_PROFILE_TYPE(), v));
- }
- }
- ioLog.info(tr("Loaded HMM3 profile(s) from %1").arg(t->getURLString()));
- }
-}
-
-void HMM3Writer::init() {
- input = ports.value(HMM3_IN_PORT_ID);
-}
-
-Task* HMM3Writer::tick() {
- if (input->hasMessage()) {
- Message inputMessage = getMessageAndSetupScriptValues(input);
- if (inputMessage.isEmpty()) {
- return NULL;
- }
- url = getValue<QString>(BaseAttributes::URL_OUT_ATTRIBUTE().getId());
- fileMode = actor->getParameter(BaseAttributes::FILE_MODE_ATTRIBUTE().getId())->getAttributeValue<uint>(context);
- QVariantMap data = inputMessage.getData().toMap();
-
- const P7_HMM* hmm = data.value(HMM3Lib::HMM3_SLOT.getId()).value<const P7_HMM*>();
- QString anUrl = url;
- if (anUrl.isEmpty()) {
- anUrl = data.value(BaseSlots::URL_SLOT().getId()).toString();
- }
- if (anUrl.isEmpty() || hmm == NULL) {
- QString err = (hmm == NULL) ? tr("Empty HMM3 passed for writing to %1").arg(anUrl) : tr("Unspecified URL for writing HMM3");
- //if (failFast) {
- return new FailTask(err);
- /*} else {
- ioLog.error(err);
- return NULL;
- }*/
- }
- assert(!anUrl.isEmpty());
- anUrl = context->absolutePath(anUrl);
- int count = ++counter[anUrl];
- if (count != 1) {
- anUrl = GUrlUtils::prepareFileName(anUrl, count, QStringList(HMM3_EXT));
- } else {
- anUrl = GUrlUtils::ensureFileExt( anUrl, QStringList(HMM3_EXT)).getURLString();
- }
- ioLog.info(tr("Writing HMM3 profile to %1").arg(anUrl));
- Document* savingDocument = UHMM3Utilities::getSavingDocument( QList<const P7_HMM*>()<<hmm, anUrl );
- context->getMonitor()->addOutputFile(anUrl, getActor()->getId());
- //return new HMM3WriteTask(anUrl, hmm, fileMode);
- return new SaveDocumentTask( savingDocument );
- } else if (input->isEnded()) {
- setDone();
- }
- return NULL;
-}
-
-
-void HMM3Lib::init() {
- HMM3IOWorkerFactory::init();
- HMM3BuildWorkerFactory::init();
- HMM3SearchWorkerFactory::init();
-}
-
-void HMM3Lib::cleanup() {
- //FIXME need locking
- //HMM3IOWorkerFactory::cleanup();
- //HMM3BuildWorkerFactory::cleanup();
- //HMM3SearchWorkerFactory::cleanup();
-
- //DataTypeRegistry* dr = WorkflowEnv::getDataTypeRegistry();
- //dr->unregisterEntry(HMM3_PROFILE_TYPE->getId());
-}
-
-} //namespace LocalWorkflow
-} //namespace U2
diff --git a/src/plugins_3rdparty/hmm3/src/workers/HMM3IOWorker.h b/src/plugins_3rdparty/hmm3/src/workers/HMM3IOWorker.h
deleted file mode 100644
index a698a27..0000000
--- a/src/plugins_3rdparty/hmm3/src/workers/HMM3IOWorker.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#ifndef _U2_HMM3_IO_WORKER_H_
-#define _U2_HMM3_IO_WORKER_H_
-
-#include <U2Lang/LocalDomain.h>
-#include <U2Lang/WorkflowUtils.h>
-#include <U2Lang/WorkflowEnv.h>
-#include <U2Lang/WorkflowMonitor.h>
-
-
-#include <U2Core/SaveDocumentTask.h>
-
-#include <hmmer3/hmmer.h>
-
-struct msa_struct;
-
-Q_DECLARE_METATYPE(const P7_HMM*);
-
-namespace U2 {
-namespace LocalWorkflow {
-
-class HMM3Lib : public QObject {
- Q_OBJECT
-public:
- static const Descriptor HMM3_CATEGORY();
- static const Descriptor HMM3_SLOT;
- static DataTypePtr HMM3_PROFILE_TYPE();
- static const QString HMM3_PROFILE_TYPE_ID;
- static void init();
- static void cleanup();
-};
-
-class HMM3IOProto : public IntegralBusActorPrototype {
-public:
- HMM3IOProto(const Descriptor& desc, const QList<PortDescriptor*>& ports,
- const QList<Attribute*>& attrs = QList<Attribute*>());
- virtual bool isAcceptableDrop(const QMimeData*, QVariantMap*) const = 0;
- bool isAcceptableDrop(const QMimeData*, QVariantMap*, const QString&) const;
-};
-
-class ReadHMM3Proto : public HMM3IOProto {
-public:
- ReadHMM3Proto(const Descriptor& _desc, const QList<PortDescriptor*>& _ports,
- const QList<Attribute*>& _attrs = QList<Attribute*>());
- virtual bool isAcceptableDrop(const QMimeData*, QVariantMap*) const;
-};
-
-class WriteHMM3Proto : public HMM3IOProto {
-public:
- WriteHMM3Proto(const Descriptor& _desc, const QList<PortDescriptor*>& _ports,
- const QList<Attribute*>& _attrs = QList<Attribute*>());
- virtual bool isAcceptableDrop(const QMimeData*, QVariantMap*) const;
-};
-
-class HMM3ReadPrompter : public PrompterBase<HMM3ReadPrompter> {
- Q_OBJECT
-public:
- HMM3ReadPrompter(Actor* p = 0) : PrompterBase<HMM3ReadPrompter>(p) {}
-protected:
- QString composeRichDoc();
-};
-
-class HMM3WritePrompter : public PrompterBase<HMM3WritePrompter> {
- Q_OBJECT
-public:
- HMM3WritePrompter(Actor* p = 0) : PrompterBase<HMM3WritePrompter>(p) {}
-protected:
- QString composeRichDoc();
-};
-
-
-class HMM3Reader : public BaseWorker {
- Q_OBJECT
-public:
- static const QString ACTOR;
- HMM3Reader(Actor* a) : BaseWorker(a), output(NULL) {}
- virtual void init();
- virtual Task* tick();
- virtual void cleanup() {}
-private slots:
- void sl_taskFinished();
-
-protected:
- CommunicationChannel *output;
- QStringList urls;
-};
-
-class HMM3Writer : public BaseWorker {
- Q_OBJECT
-public:
- static const QString ACTOR;
- HMM3Writer(Actor* a) : BaseWorker(a), input(NULL), done(false), fileMode(SaveDoc_Overwrite) {}
- virtual void init();
- virtual Task* tick();
- virtual void cleanup() {}
-
-protected:
- CommunicationChannel *input;
- QString url;
- QMap<QString,int> counter;
- bool done;
- uint fileMode;
-};
-
-class HMM3IOWorkerFactory : public DomainFactory {
-public:
- static void init();
- static void cleanup();
- HMM3IOWorkerFactory(const Descriptor& d) : DomainFactory(d) {}
- virtual Worker* createWorker(Actor* a);
-};
-
-} // Workflow namespace
-} // U2 namespace
-
-#endif
-
diff --git a/src/plugins_3rdparty/hmm3/src/workers/HMM3SearchWorker.cpp b/src/plugins_3rdparty/hmm3/src/workers/HMM3SearchWorker.cpp
deleted file mode 100644
index 300a5f9..0000000
--- a/src/plugins_3rdparty/hmm3/src/workers/HMM3SearchWorker.cpp
+++ /dev/null
@@ -1,355 +0,0 @@
-/**
- * UGENE - Integrated Bioinformatics Tools.
- * Copyright (C) 2008-2016 UniPro <ugene at unipro.ru>
- * http://ugene.unipro.ru
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
-
-#include "HMM3SearchWorker.h"
-#include "HMM3IOWorker.h"
-#include "search/uHMM3SearchTask.h"
-
-#include <U2Lang/Datatype.h>
-#include <U2Lang/IntegralBusModel.h>
-#include <U2Lang/WorkflowEnv.h>
-#include <U2Lang/ActorPrototypeRegistry.h>
-#include <U2Lang/BaseTypes.h>
-#include <U2Lang/BaseSlots.h>
-#include <U2Lang/BasePorts.h>
-#include <U2Lang/BaseActorCategories.h>
-#include <U2Designer/DelegateEditors.h>
-#include <U2Lang/CoreLibConstants.h>
-
-#include <U2Core/AnnotationData.h>
-#include <U2Core/DNASequence.h>
-#include <U2Core/DNATranslation.h>
-#include <U2Core/DNAAlphabet.h>
-#include <U2Core/AppContext.h>
-#include <U2Core/DNASequenceObject.h>
-#include <U2Core/Log.h>
-#include <U2Core/FailTask.h>
-#include <U2Core/MultiTask.h>
-#include <U2Core/TaskSignalMapper.h>
-#include <U2Core/U2OpStatusUtils.h>
-
-//#include <QtGui/QApplication>
-/* TRANSLATOR U2::LocalWorkflow::HMM3SearchWorker */
-
-namespace U2 {
-namespace LocalWorkflow {
-
-/*******************************
- * HMM3SearchWorkerFactory
- *******************************/
-static const QString HMM3_PORT("in-hmm3");
-
-static const QString NAME_ATTR("result-name");
-static const QString THRESHOLD_ATTR("threshold-type");
-static const QString DOM_E_ATTR("e-val");
-static const QString DOM_T_ATTR("score");
-static const QString DOM_Z_ATTR("domZ");
-static const QString SEED_ATTR("seed");
-static const QString BIAS_FILTER_ATTR("nobias");
-static const QString SCORE_CORRECTION_ATTR("nonull2");
-static const QString HEURISTIC_FILTER_ATTR("max");
-static const QString F1_ATTR("F1");
-static const QString F2_ATTR("F2");
-static const QString F3_ATTR("F3");
-
-const QString HMM3SearchWorker::E_THRESHOLD = "domE";
-const QString HMM3SearchWorker::T_THRESHOLD = "domT";
-const QString HMM3SearchWorker::CUT_GA_THRESHOLD = "cut_ga";
-const QString HMM3SearchWorker::CUT_NC_THRESHOLD = "cut_nc";
-const QString HMM3SearchWorker::CUT_TC_THRESHOLD = "cut_tc";
-
-const QString HMM3SearchWorkerFactory::ACTOR("hmm3-search");
-
-void HMM3SearchWorkerFactory::init() {
-
- QList<PortDescriptor*> p; QList<Attribute*> a;
- {
- Descriptor hd(HMM3_PORT, HMM3SearchWorker::tr("HMM3 profile"), HMM3SearchWorker::tr("HMM3 profile(s) to search with."));
- Descriptor sd(BasePorts::IN_SEQ_PORT_ID(), HMM3SearchWorker::tr("Input sequence"),
- HMM3SearchWorker::tr("An input sequence (nucleotide or protein) to search in."));
- Descriptor od(BasePorts::OUT_ANNOTATIONS_PORT_ID(), HMM3SearchWorker::tr("HMM3 annotations"),
- HMM3SearchWorker::tr("Annotations marking found similar sequence regions."));
-
- QMap<Descriptor, DataTypePtr> hmmM;
- hmmM[HMM3Lib::HMM3_SLOT] = HMM3Lib::HMM3_PROFILE_TYPE();
- p << new PortDescriptor(hd, DataTypePtr(new MapDataType("hmm.search.hmm", hmmM)), true /*input*/, false, IntegralBusPort::BLIND_INPUT);
- QMap<Descriptor, DataTypePtr> seqM;
- seqM[BaseSlots::DNA_SEQUENCE_SLOT()] = BaseTypes::DNA_SEQUENCE_TYPE();
- p << new PortDescriptor(sd, DataTypePtr(new MapDataType("hmm.search.sequence", seqM)), true /*input*/);
- QMap<Descriptor, DataTypePtr> outM;
- outM[BaseSlots::ANNOTATION_TABLE_SLOT()] = BaseTypes::ANNOTATION_TABLE_TYPE();
- p << new PortDescriptor(od, DataTypePtr(new MapDataType("hmm.search.out", outM)), false /*input*/, true);
- }
-
- {
- Descriptor nd(NAME_ATTR, HMM3SearchWorker::tr("Result annotation"), HMM3SearchWorker::tr("A name of the result annotations."));
- Descriptor nsd(SEED_ATTR, HMM3SearchWorker::tr("Seed"), HMM3SearchWorker::tr("Random generator seed. 0 - means that one-time arbitrary seed will be used."));
- Descriptor td(THRESHOLD_ATTR, HMM3SearchWorker::tr("Threshold type"), HMM3SearchWorker::tr("Controlling reporting and model-specific thresholds."));
- Descriptor ded(DOM_E_ATTR, HMM3SearchWorker::tr("Filter by high E-value"), HMM3SearchWorker::tr("Report domains with e-value less than."));
- Descriptor dtd(DOM_T_ATTR, HMM3SearchWorker::tr("Filter by low score"), HMM3SearchWorker::tr("Report domains with score greater than."));
- Descriptor dzd(DOM_Z_ATTR, HMM3SearchWorker::tr("Significant sequences"), HMM3SearchWorker::tr("Number of significant sequences, for domain E-value calculation."));
- Descriptor bfd(BIAS_FILTER_ATTR, HMM3SearchWorker::tr("No bias"), HMM3SearchWorker::tr("Turn off composition bias filter."));
- Descriptor scd(SCORE_CORRECTION_ATTR, HMM3SearchWorker::tr("No score corrections"), HMM3SearchWorker::tr("Turn off biased composition score corrections."));
- Descriptor hfd(HEURISTIC_FILTER_ATTR, HMM3SearchWorker::tr("No heuristic filters"), HMM3SearchWorker::tr("Turn all heuristic filters off (less speed, more power)."));
- Descriptor f1d(F1_ATTR, HMM3SearchWorker::tr("MSV threshold"), HMM3SearchWorker::tr("Stage 1 (MSV) threshold: promote hits w/ P <= F1."));
- Descriptor f2d(F2_ATTR, HMM3SearchWorker::tr("Vit threshold"), HMM3SearchWorker::tr("Stage 2 (Vit) threshold: promote hits w/ P <= F2."));
- Descriptor f3d(F3_ATTR, HMM3SearchWorker::tr("Fwd threshold"), HMM3SearchWorker::tr("Stage 3 (Fwd) threshold: promote hits w/ P <= F3."));
-
- a << new Attribute(nd, BaseTypes::STRING_TYPE(), true, "hmm_signal");
- a << new Attribute(nsd, BaseTypes::NUM_TYPE(), false, 0.0);
- a << new Attribute(td, BaseTypes::STRING_TYPE(), true, HMM3SearchWorker::E_THRESHOLD);
- Attribute *domEAttr = new Attribute(ded, BaseTypes::NUM_TYPE(), false, 1);
- Attribute *domTAttr = new Attribute(dtd, BaseTypes::NUM_TYPE(), false, 0.0);
- a << domEAttr;
- a << domTAttr;
- a << new Attribute(dzd, BaseTypes::NUM_TYPE(), false, 0);
- a << new Attribute(bfd, BaseTypes::BOOL_TYPE(), false, false);
- a << new Attribute(scd, BaseTypes::BOOL_TYPE(), false, false);
- a << new Attribute(hfd, BaseTypes::BOOL_TYPE(), false, false);
- Attribute *f1Attr = new Attribute(f1d, BaseTypes::NUM_TYPE(), false, 0.02);
- Attribute *f2Attr = new Attribute(f2d, BaseTypes::NUM_TYPE(), false, 0.001);
- Attribute *f3Attr = new Attribute(f3d, BaseTypes::NUM_TYPE(), false, 0.00001);
- a << f1Attr;
- a << f2Attr;
- a << f3Attr;
-
- domEAttr->addRelation(new VisibilityRelation(THRESHOLD_ATTR, HMM3SearchWorker::E_THRESHOLD));
- domTAttr->addRelation(new VisibilityRelation(THRESHOLD_ATTR, HMM3SearchWorker::T_THRESHOLD));
- f1Attr->addRelation(new VisibilityRelation(HEURISTIC_FILTER_ATTR, false));
- f2Attr->addRelation(new VisibilityRelation(HEURISTIC_FILTER_ATTR, false));
- f3Attr->addRelation(new VisibilityRelation(HEURISTIC_FILTER_ATTR, false));
- }
-
- Descriptor desc(HMM3SearchWorkerFactory::ACTOR, HMM3SearchWorker::tr("HMM3 Search"),
- HMM3SearchWorker::tr("Searches each input sequence for significantly similar sequence matches to all specified HMM profiles."
- " In case several profiles were supplied, searches with all profiles one by one and outputs united set of annotations for each sequence."));
- ActorPrototype* proto = new IntegralBusActorPrototype(desc, p, a);
- QMap<QString, PropertyDelegate*> delegates;
- {
- QVariantMap map;
- map[HMM3SearchWorker::tr("<= E-value")] = HMM3SearchWorker::E_THRESHOLD;
- map[HMM3SearchWorker::tr(">= score")] = HMM3SearchWorker::T_THRESHOLD;
- map[HMM3SearchWorker::tr("Use profile's GA gathering cutoffs")] = HMM3SearchWorker::CUT_GA_THRESHOLD;
- map[HMM3SearchWorker::tr("Use profile's NC noise cutoffs")] = HMM3SearchWorker::CUT_NC_THRESHOLD;
- map[HMM3SearchWorker::tr("Use profile's TC trusted cutoffs")] = HMM3SearchWorker::CUT_TC_THRESHOLD;
- delegates[THRESHOLD_ATTR] = new ComboBoxDelegate(map);
- }
- {
- QVariantMap eMap; eMap["prefix"]= ("1e"); eMap["minimum"] = (-99); eMap["maximum"] = (0);
- delegates[DOM_E_ATTR] = new SpinBoxDelegate(eMap);
- }
- {
- QVariantMap nMap; nMap["maximum"] = (INT_MAX); nMap["minimum"] = (0);
- delegates[SEED_ATTR] = new SpinBoxDelegate(nMap);
- }
- {
- QVariantMap tMap; tMap["decimals"]= (2); tMap["minimum"] = (-1e+09); tMap["maximum"] = (1e+09);
- tMap["singleStep"] = (0.1);
- delegates[DOM_T_ATTR] = new DoubleSpinBoxDelegate(tMap);
- }
- {
- QVariantMap map; map["decimals"] = 2; map["minimum"] = 0.0; map["maximum"] = 1000000000.0;
- delegates[DOM_Z_ATTR] = new DoubleSpinBoxDelegate(map);
- }
- {
- QVariantMap map; map["decimals"] = 5; map["minimum"] = -1000000000.0; map["maximum"] = 1000000000.0;
- delegates[F1_ATTR] = new DoubleSpinBoxDelegate(map);
- delegates[F2_ATTR] = new DoubleSpinBoxDelegate(map);
- delegates[F3_ATTR] = new DoubleSpinBoxDelegate(map);
- }
-
- proto->setEditor(new DelegateEditor(delegates));
- proto->setIconPath(":/hmm3/images/hmmer_16.png");
- proto->setPrompter(new HMM3SearchPrompter());
- WorkflowEnv::getProtoRegistry()->registerProto(HMM3Lib::HMM3_CATEGORY(), proto);
-
- DomainFactory* localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
- localDomain->registerEntry(new HMM3SearchWorkerFactory());
-}
-
-/*******************************
- * HMM3SearchPrompter
- *******************************/
-QString HMM3SearchPrompter::composeRichDoc() {
- Actor* hmmProducer = qobject_cast<IntegralBusPort*>(target->getPort(HMM3_PORT))->getProducer(HMM3_PORT);
- Actor* seqProducer = qobject_cast<IntegralBusPort*>(target->getPort(BasePorts::IN_SEQ_PORT_ID()))->getProducer(BasePorts::IN_SEQ_PORT_ID());
-
- QString seqName = seqProducer ? tr("For each sequence from <u>%1</u>,").arg(seqProducer->getLabel()) : "";
- QString hmmName = hmmProducer ? tr("using all profiles provided by <u>%1</u>,").arg(hmmProducer->getLabel()) : "";
-
- QString resultName = getHyperlink(NAME_ATTR, getRequiredParam(NAME_ATTR));
-
- QString doc = tr("%1 search HMM3 signals %2. "
- "<br>Output the list of found regions annotated as <u>%4</u>.")
- .arg(seqName)
- .arg(hmmName)
- .arg(resultName);
-
- return doc;
-}
-
-/*******************************
- * HMM3SearchWorker
- *******************************/
-HMM3SearchWorker::HMM3SearchWorker(Actor* a) : BaseWorker(a, false), hmmPort(NULL), seqPort(NULL), output(NULL) {
-}
-
-void HMM3SearchWorker::init() {
- setDefaultUHMM3SearchSettings(&cfg);
-
- hmmPort = ports.value(HMM3_PORT);
- seqPort = ports.value(BasePorts::IN_SEQ_PORT_ID());
- output = ports.value(BasePorts::OUT_ANNOTATIONS_PORT_ID());
- seqPort->addComplement(output);
- output->addComplement(seqPort);
-
- initConfig();
- resultName = actor->getParameter(NAME_ATTR)->getAttributeValue<QString>(context);
- if(resultName.isEmpty()){
- algoLog.details(tr("Value for attribute name is empty, default name used"));
- resultName = "hmm_signal";
- }
-}
-
-bool HMM3SearchWorker::isReady() const {
- if (isDone()) {
- return false;
- }
- bool seqEnded = seqPort->isEnded();
- bool hmmEnded = hmmPort->isEnded();
- int seqHasMes = seqPort->hasMessage();
- int hmmHasMes = hmmPort->hasMessage();
- return hmmHasMes || (hmmEnded && (seqHasMes || seqEnded));
-}
-
-Task* HMM3SearchWorker::tick() {
- while (hmmPort->hasMessage()) {
- hmms << hmmPort->get().getData().toMap().value(HMM3Lib::HMM3_SLOT.getId()).value<const P7_HMM*>();
- }
- if (!hmmPort->isEnded()) { // || hmms.isEmpty() || !seqPort->hasMessage()
- return NULL;
- }
-
- if (seqPort->hasMessage()) {
- Message inputMessage = getMessageAndSetupScriptValues(seqPort);
- if (inputMessage.isEmpty() || hmms.isEmpty()) {
- output->transit();
- return NULL;
- }
- SharedDbiDataHandler seqId = inputMessage.getData().toMap().value(BaseSlots::DNA_SEQUENCE_SLOT().getId()).value<SharedDbiDataHandler>();
- QScopedPointer<U2SequenceObject> seqObj(StorageUtils::getSequenceObject(context->getDataStorage(), seqId));
- if (seqObj.isNull()) {
- return NULL;
- }
- U2OpStatusImpl os;
- DNASequence dnaSequence = seqObj->getWholeSequence(os);
- CHECK_OP(os, new FailTask(os.getError()));
-
- if (dnaSequence.alphabet->getType() != DNAAlphabet_RAW) {
- QList<Task*> subtasks;
- UHMM3SearchTaskSettings settings;
- settings.inner = cfg;
- foreach(const P7_HMM* hmm, hmms) {
- subtasks << new UHMM3SWSearchTask (hmm, dnaSequence, settings);
- }
- Task* searchTask = new MultiTask(tr("Find HMM3 signals in %1").arg(dnaSequence.getName()), subtasks);
- connect(new TaskSignalMapper(searchTask), SIGNAL(si_taskFinished(Task*)), SLOT(sl_taskFinished(Task*)));
- return searchTask;
- }
- QString err = tr("Bad sequence supplied to input: %1").arg(dnaSequence.getName());
- return new FailTask(err);
- } if (seqPort->isEnded()) {
- setDone();
- output->setEnded();
- }
- return NULL;
-}
-
-void HMM3SearchWorker::sl_taskFinished(Task *t) {
- SAFE_POINT(NULL != t, "Invalid task is encountered",);
- if (t->isCanceled()) {
- return;
- }
- if (NULL != output) {
- QList<SharedAnnotationData> list;
-
- foreach(Task *sub, t->getSubtasks()) {
- UHMM3SWSearchTask *hst = qobject_cast<UHMM3SWSearchTask *>(sub);
- if (hst == NULL){
- continue;
- }
- list.append(hst->getResultsAsAnnotations(U2FeatureTypes::MiscSignal, resultName));
- }
-
- CHECK(!list.isEmpty(), );
-
- const SharedDbiDataHandler tableId = context->getDataStorage()->putAnnotationTable(list);
- output->put(Message(BaseTypes::ANNOTATION_TABLE_TYPE(), qVariantFromValue<SharedDbiDataHandler>(tableId)));
- algoLog.info(tr("Found %1 HMM3 signals").arg(list.size()));
- }
-}
-
-void HMM3SearchWorker::cleanup() {
-}
-
-void HMM3SearchWorker::initConfig() {
- const QString thresholdType = getValue<QString>(THRESHOLD_ATTR);
- if (E_THRESHOLD == thresholdType) {
- int domENum = actor->getParameter(DOM_E_ATTR)->getAttributeValue<int>(context);
- if (domENum > 1) {
- algoLog.details(tr("Power of e-value must be less or equal to zero. Using default value: 1e+1"));
- domENum = 1;
- }
- cfg.domE = pow(10, domENum);
- } else if (T_THRESHOLD == thresholdType) {
- cfg.domT = (float)actor->getParameter(DOM_T_ATTR)->getAttributeValue<double>(context);
- if (cfg.domT <= 0) {
- algoLog.details(tr("Score must be greater than zero. Using default value: 0.01"));
- cfg.domT = 0.01;
- }
- } else if (CUT_GA_THRESHOLD == thresholdType) {
- cfg.useBitCutoffs = p7H_GA;
- } else if (CUT_NC_THRESHOLD == thresholdType) {
- cfg.useBitCutoffs = p7H_NC;
- } else if (CUT_TC_THRESHOLD == thresholdType) {
- cfg.useBitCutoffs = p7H_TC;
- }
-
- double domZValue = getValue<double>(DOM_Z_ATTR);
- if (domZValue > 0) {
- cfg.domZ = domZValue;
- }
-
- cfg.noBiasFilter = getValue<bool>(BIAS_FILTER_ATTR);
- cfg.noNull2 = getValue<bool>(SCORE_CORRECTION_ATTR);
- cfg.doMax = getValue<bool>(HEURISTIC_FILTER_ATTR);
-
- if (!cfg.doMax) {
- cfg.f1 = getValue<double>(F1_ATTR);
- cfg.f2 = getValue<double>(F1_ATTR);
- cfg.f3 = getValue<double>(F1_ATTR);
- }
-
- cfg.seed = actor->getParameter(SEED_ATTR)->getAttributeValue<int>(context);
-}
-
-} //namespace LocalWorkflow
-} //namespace U2
diff --git a/src/plugins_3rdparty/hmm3/transl/english.ts b/src/plugins_3rdparty/hmm3/transl/english.ts
deleted file mode 100644
index 49dd9c1..0000000
--- a/src/plugins_3rdparty/hmm3/transl/english.ts
+++ /dev/null
@@ -1,2209 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!DOCTYPE TS>
-<TS version="2.1" language="en" sourcelanguage="en">
-<context>
- <name>QObject</name>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="350"/>
- <source>HMM reader error occurred: </source>
- <translation>HMM reader error:</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="351"/>
- <source>Reading file failed</source>
- <translation>Reading file failed</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="78"/>
- <source>HMM3 Profile</source>
- <translation>HMM3 Profile</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormat.cpp" line="41"/>
- <source>Writing HMM profile file failed</source>
- <translation>Writing HMM profile file failed</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormat.cpp" line="302"/>
- <source>HMM files are read only</source>
- <translation>HMM files are read only</translation>
- </message>
-</context>
-<context>
- <name>U2::GTest_CompareHmmFiles</name>
- <message>
- <location filename="../src/tests/uhmmer3BuildTests.cpp" line="304"/>
- <source>File #1 not set</source>
- <translation>File #1 not set</translation>
- </message>
- <message>
- <location filename="../src/tests/uhmmer3BuildTests.cpp" line="310"/>
- <source>File #2 not set</source>
- <translation>File #2 not set</translation>
- </message>
- <message>
- <location filename="../src/tests/uhmmer3BuildTests.cpp" line="370"/>
- <source>Error creating ioadapter for first file</source>
- <translation>Error creating ioadapter for first file</translation>
- </message>
- <message>
- <location filename="../src/tests/uhmmer3BuildTests.cpp" line="374"/>
- <source>Error opening 1 file</source>
- <translation>Error opening 1 file</translation>
- </message>
- <message>
- <location filename="../src/tests/uhmmer3BuildTests.cpp" line="381"/>
- <source>Error creating ioadapter for second file</source>
- <translation>Error creating ioadapter for second file</translation>
- </message>
- <message>
- <location filename="../src/tests/uhmmer3BuildTests.cpp" line="385"/>
- <source>Error opening second file</source>
- <translation>Error opening second file</translation>
- </message>
- <message>
- <location filename="../src/tests/uhmmer3BuildTests.cpp" line="410"/>
- <source>Names of aligments not matched</source>
- <translation>Names of aligments not matched</translation>
- </message>
- <message>
- <location filename="../src/tests/uhmmer3BuildTests.cpp" line="414"/>
- <source>Comparing files length not matched</source>
- <translation>Comparing files length not matched</translation>
- </message>
- <message>
- <location filename="../src/tests/uhmmer3BuildTests.cpp" line="421"/>
- <source>Files parts not equal:'%1' and '%2'</source>
- <translation>Files parts not equal:'%1' and '%2'</translation>
- </message>
-</context>
-<context>
- <name>U2::GTest_UHMM3Search</name>
- <message>
- <location filename="../src/tests/uhmmer3SearchTests.cpp" line="247"/>
- <source>Sequence is empty</source>
- <translation>Sequence is empty</translation>
- </message>
-</context>
-<context>
- <name>U2::GTest_UHMM3SearchCompare</name>
- <message>
- <location filename="../src/tests/uhmmer3SearchTests.cpp" line="374"/>
- <source>Internal error (cannot parse float number from string '%1')</source>
- <translation>Internal error (cannot parse float number from string '%1')</translation>
- </message>
- <message>
- <location filename="../src/tests/uhmmer3SearchTests.cpp" line="387"/>
- <source>Can't parse significance:%1</source>
- <translation>Can't parse significance:%1</translation>
- </message>
- <message>
- <location filename="../src/tests/uhmmer3SearchTests.cpp" line="602"/>
- <source>No search task in test context</source>
- <translation>No search task in test context</translation>
- </message>
-</context>
-<context>
- <name>U2::GTest_UHMMER3Build</name>
- <message>
- <location filename="../src/tests/uhmmer3BuildTests.cpp" line="228"/>
- <source>No input file given</source>
- <translation>No input file given</translation>
- </message>
- <message>
- <location filename="../src/tests/uhmmer3BuildTests.cpp" line="234"/>
- <source>No output file given</source>
- <translation>No output file given</translation>
- </message>
-</context>
-<context>
- <name>U2::Hmmer3SearchWorfklowTask</name>
- <message>
- <location filename="../src/search/Hmmer3SearchWorkflowTask.cpp" line="56"/>
- <source>No annotations objects found</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/search/Hmmer3SearchWorkflowTask.cpp" line="76"/>
- <source>HMM profile used</source>
- <translation type="unfinished">HMM profile used</translation>
- </message>
- <message>
- <location filename="../src/search/Hmmer3SearchWorkflowTask.cpp" line="79"/>
- <source>Task was not finished</source>
- <translation type="unfinished">Task was not finished</translation>
- </message>
- <message>
- <location filename="../src/search/Hmmer3SearchWorkflowTask.cpp" line="84"/>
- <source>Result annotation table</source>
- <translation type="unfinished">Result annotation table</translation>
- </message>
- <message>
- <location filename="../src/search/Hmmer3SearchWorkflowTask.cpp" line="85"/>
- <source>Result annotation group</source>
- <translation type="unfinished">Result annotation group</translation>
- </message>
- <message>
- <location filename="../src/search/Hmmer3SearchWorkflowTask.cpp" line="86"/>
- <source>Result annotation name</source>
- <translation type="unfinished">Result annotation name</translation>
- </message>
- <message>
- <location filename="../src/search/Hmmer3SearchWorkflowTask.cpp" line="88"/>
- <source>Results count</source>
- <translation type="unfinished">Results count</translation>
- </message>
-</context>
-<context>
- <name>U2::LocalWorkflow::HMM3BuildPrompter</name>
- <message>
- <location filename="../src/workers/HMM3BuildWorker.cpp" line="109"/>
- <source>For each MSA from <u>%1</u>,</source>
- <translation>For each MSA from <u>%1</u>,</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3BuildWorker.cpp" line="111"/>
- <source>%1 builds a HMM3 profile.</source>
- <translation>%1 builds a HMM3 profile.</translation>
- </message>
-</context>
-<context>
- <name>U2::LocalWorkflow::HMM3BuildWorker</name>
- <message>
- <location filename="../src/workers/HMM3BuildWorker.cpp" line="61"/>
- <source>Input MSA</source>
- <translation>Input MSA</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3BuildWorker.cpp" line="62"/>
- <source>Input multiple sequence alignment for building statistical model.</source>
- <translation>Input multiple sequence alignment for building statistical model.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3BuildWorker.cpp" line="63"/>
- <source>HMM3 profile</source>
- <translation>HMM3 profile</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3BuildWorker.cpp" line="63"/>
- <source>Produced HMM3 profile</source>
- <translation>Produced HMM3 profile</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3BuildWorker.cpp" line="73"/>
- <source>Random seed</source>
- <translation>Random seed</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3BuildWorker.cpp" line="73"/>
- <source>Random generator seed. 0 - means that one-time arbitrary seed will be used.</source>
- <translation>Random generator seed. 0 - means that one-time arbitrary seed will be used.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3BuildWorker.cpp" line="77"/>
- <source>HMM3 Build</source>
- <translation>HMM3 Build</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3BuildWorker.cpp" line="77"/>
- <source>Builds a HMM3 profile from a multiple sequence alignment.<p>The HMM3 profile is a statistical model which captures position-specific information about how conserved each column of the alignment is, and which residues are likely.</source>
- <translation>Builds a HMM3 profile from a multiple sequence alignment.<p>The HMM3 profile is a statistical model which captures position-specific information about how conserved each column of the alignment is, and which residues are likely.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3BuildWorker.cpp" line="182"/>
- <source>Built HMM3 profile</source>
- <translation>Built HMM3 profile</translation>
- </message>
-</context>
-<context>
- <name>U2::LocalWorkflow::HMM3Lib</name>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="72"/>
- <source>HMM3 Profile</source>
- <translation>HMM3 Profile</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="80"/>
- <source>HMMER3 Tools</source>
- <translation>HMMER3 Tools</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="142"/>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="159"/>
- <source>HMM3 profile</source>
- <translation>HMM3 profile</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="142"/>
- <source>Input HMM3 profile</source>
- <translation>Input HMM3 profile</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="143"/>
- <source>Location</source>
- <translation>Location</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="143"/>
- <source>Location hint for the target file.</source>
- <translation>Location hint for the target file.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="153"/>
- <source>Write HMM3 Profile</source>
- <translation>Write HMM3 Profile</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="153"/>
- <source>Saves all input HMM3 profiles to specified location.</source>
- <translation>Saves all input HMM3 profiles to specified location.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="159"/>
- <source>Loaded HMM3 profile</source>
- <translation>Loaded HMM3 profile</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="166"/>
- <source>Read HMM3 Profile</source>
- <translation>Read HMM3 Profile</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="166"/>
- <source>Reads HMM3 profiles from file(s). The files can be local or Internet URLs.</source>
- <translation>Reads HMM3 profiles from file(s). The files can be local or Internet URLs.</translation>
- </message>
-</context>
-<context>
- <name>U2::LocalWorkflow::HMM3ReadPrompter</name>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="178"/>
- <source>Read HMM3 profile(s) from %1.</source>
- <translation>Read HMM3 profile(s) from %1.</translation>
- </message>
-</context>
-<context>
- <name>U2::LocalWorkflow::HMM3Reader</name>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="253"/>
- <source>Loaded HMM3 profile(s) from %1</source>
- <translation>Loaded HMM3 profile(s) from %1</translation>
- </message>
-</context>
-<context>
- <name>U2::LocalWorkflow::HMM3SearchPrompter</name>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="195"/>
- <source>For each sequence from <u>%1</u>,</source>
- <translation>For each sequence from <u>%1</u>,</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="196"/>
- <source>using all profiles provided by <u>%1</u>,</source>
- <translation>using all profiles provided by <u>%1</u>,</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="200"/>
- <source>%1 search HMM3 signals %2. <br>Output the list of found regions annotated as <u>%4</u>.</source>
- <translation>%1 search HMM3 signals %2. <br>Output the list of found regions annotated as <u>%4</u>.</translation>
- </message>
-</context>
-<context>
- <name>U2::LocalWorkflow::HMM3SearchWorker</name>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="85"/>
- <source>HMM3 profile</source>
- <translation>HMM3 profile</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="85"/>
- <source>HMM3 profile(s) to search with.</source>
- <translation>HMM3 profile(s) to search with.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="86"/>
- <source>Input sequence</source>
- <translation>Input sequence</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="87"/>
- <source>An input sequence (nucleotide or protein) to search in.</source>
- <translation>An input sequence (nucleotide or protein) to search in.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="88"/>
- <source>HMM3 annotations</source>
- <translation>HMM3 annotations</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="89"/>
- <source>Annotations marking found similar sequence regions.</source>
- <translation>Annotations marking found similar sequence regions.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="103"/>
- <source>Result annotation</source>
- <translation>Result annotation</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="103"/>
- <source>A name of the result annotations.</source>
- <translation>A name of the result annotations.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="104"/>
- <source>Seed</source>
- <translation>Seed</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="104"/>
- <source>Random generator seed. 0 - means that one-time arbitrary seed will be used.</source>
- <translation>Random generator seed. 0 - means that one-time arbitrary seed will be used.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="105"/>
- <source>Threshold type</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="105"/>
- <source>Controlling reporting and model-specific thresholds.</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="106"/>
- <source>Filter by high E-value</source>
- <translation>Filter by high E-value</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="106"/>
- <source>Report domains with e-value less than.</source>
- <translation>Report domains with e-value less than.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="107"/>
- <source>Filter by low score</source>
- <translation>Filter by low score</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="107"/>
- <source>Report domains with score greater than.</source>
- <translation>Report domains with score greater than.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="108"/>
- <source>Significant sequences</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="108"/>
- <source>Number of significant sequences, for domain E-value calculation.</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="109"/>
- <source>No bias</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="109"/>
- <source>Turn off composition bias filter.</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="110"/>
- <source>No score corrections</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="110"/>
- <source>Turn off biased composition score corrections.</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="111"/>
- <source>No heuristic filters</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="111"/>
- <source>Turn all heuristic filters off (less speed, more power).</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="112"/>
- <source>MSV threshold</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="112"/>
- <source>Stage 1 (MSV) threshold: promote hits w/ P <= F1.</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="113"/>
- <source>Vit threshold</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="113"/>
- <source>Stage 2 (Vit) threshold: promote hits w/ P <= F2.</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="114"/>
- <source>Fwd threshold</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="114"/>
- <source>Stage 3 (Fwd) threshold: promote hits w/ P <= F3.</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="141"/>
- <source>HMM3 Search</source>
- <translation>HMM3 Search</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="142"/>
- <source>Searches each input sequence for significantly similar sequence matches to all specified HMM profiles. In case several profiles were supplied, searches with all profiles one by one and outputs united set of annotations for each sequence.</source>
- <translation>Searches each input sequence for significantly similar sequence matches to all specified HMM profiles. In case several profiles were supplied, searches with all profiles one by one and outputs united set of annotations for each sequence.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="318"/>
- <source>Power of e-value must be less or equal to zero. Using default value: 1e+1</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <source>Power of e-value must be less or equal to zero. Using default value: 1e-1</source>
- <translation type="vanished">Power of e-value must be less or equal to zero. Using default value: 1e-1</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="325"/>
- <source>Score must be greater than zero. Using default value: 0.01</source>
- <translation>Score must be greater than zero. Using default value: 0.01</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="227"/>
- <source>Value for attribute name is empty, default name used</source>
- <translation>Value for attribute name is empty, default name used</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="148"/>
- <source><= E-value</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="149"/>
- <source>>= score</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="150"/>
- <source>Use profile's GA gathering cutoffs</source>
- <translation type="unfinished">Use profile's GA gathering cutoffs</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="151"/>
- <source>Use profile's NC noise cutoffs</source>
- <translation type="unfinished">Use profile's NC noise cutoffs</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="152"/>
- <source>Use profile's TC trusted cutoffs</source>
- <translation type="unfinished">Use profile's TC trusted cutoffs</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="273"/>
- <source>Find HMM3 signals in %1</source>
- <translation>Find HMM3 signals in %1</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="277"/>
- <source>Bad sequence supplied to input: %1</source>
- <translation>Bad sequence supplied to input: %1</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="306"/>
- <source>Found %1 HMM3 signals</source>
- <translation>Found %1 HMM3 signals</translation>
- </message>
-</context>
-<context>
- <name>U2::LocalWorkflow::HMM3WritePrompter</name>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="184"/>
- <source>unset</source>
- <translation>unset</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="188"/>
- <source>Save HMM3 profile(s) from <u>%1</u> to <u>%2</u>.</source>
- <translation>Save HMM3 profile(s) from <u>%1</u> to <u>%2</u>.</translation>
- </message>
-</context>
-<context>
- <name>U2::LocalWorkflow::HMM3Writer</name>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="277"/>
- <source>Empty HMM3 passed for writing to %1</source>
- <translation>Empty HMM3 passed for writing to %1</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="277"/>
- <source>Unspecified URL for writing HMM3</source>
- <translation>Unspecified URL for writing HMM3</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="293"/>
- <source>Writing HMM3 profile to %1</source>
- <translation>Writing HMM3 profile to %1</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3ADVContext</name>
- <message>
- <location filename="../src/uHMM3Plugin.cpp" line="247"/>
- <source>Find HMM signals with HMMER3...</source>
- <translation>Find HMM signals with HMMER3...</translation>
- </message>
- <message>
- <location filename="../src/uHMM3Plugin.cpp" line="257"/>
- <source>error</source>
- <translation>Error</translation>
- </message>
- <message>
- <location filename="../src/uHMM3Plugin.cpp" line="257"/>
- <source>No sequence in focus found</source>
- <translation>No sequence in focus found</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3Build</name>
- <message>
- <location filename="../src/build/uhmm3build.cpp" line="65"/>
- <source>UGENE cannot determine alphabet of alignment</source>
- <translation>UGENE cannot determine alphabet of alignment</translation>
- </message>
- <message>
- <location filename="../src/build/uhmm3build.cpp" line="70"/>
- <source>Run out of memory (creating alphabet failed)</source>
- <translation>Run out of memory (creating alphabet failed)</translation>
- </message>
- <message>
- <location filename="../src/build/uhmm3build.cpp" line="76"/>
- <source>Run out of memory (creating null model failed)</source>
- <translation>Run out of memory (creating null model failed)</translation>
- </message>
- <message>
- <location filename="../src/build/uhmm3build.cpp" line="81"/>
- <source>Run out of memory (creating builder failed)</source>
- <translation>Run out of memory (creating builder failed)</translation>
- </message>
- <message>
- <location filename="../src/build/uhmm3build.cpp" line="87"/>
- <source>Run out of memory (creating multiple alignment failed)</source>
- <translation>Run out of memory (creating multiple alignment failed)</translation>
- </message>
- <message>
- <location filename="../src/build/uhmm3build.cpp" line="92"/>
- <source>Run out of memory (digitizing of alignment failed)</source>
- <translation>Run out of memory (digitizing of alignment failed)</translation>
- </message>
- <message>
- <location filename="../src/build/uhmm3build.cpp" line="100"/>
- <source>Model building failed</source>
- <translation>Model building failed</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3BuildDialogImpl</name>
- <message>
- <location filename="../src/build/uHMM3BuildDialogImpl.cpp" line="63"/>
- <source>Build</source>
- <translation>Build</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildDialogImpl.cpp" line="64"/>
- <source>Cancel</source>
- <translation>Cancel</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildDialogImpl.cpp" line="119"/>
- <source>Select multiple alignment file</source>
- <translation>Select multiple alignment file</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildDialogImpl.cpp" line="78"/>
- <source>Select hmm file to create</source>
- <translation>Select HMM file to create</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildDialogImpl.cpp" line="185"/>
- <source>input file is empty</source>
- <translation>Input file is empty</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildDialogImpl.cpp" line="188"/>
- <source>output hmm file is empty</source>
- <translation>Output HMM profile file is empty</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildDialogImpl.cpp" line="197"/>
- <source>Error: bad arguments!</source>
- <translation>Error: bad arguments!</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3BuildTask</name>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="57"/>
- <source>Build HMM profile from %1 alignment</source>
- <translation>Build HMM profile from %1 alignment</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="71"/>
- <source>Given multiple alignment has no sequences</source>
- <translation>Given multiple alignment has no sequences</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="74"/>
- <source>Given multiple alignment is empty</source>
- <translation>Given multiple alignment is empty</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3BuildToFileTask</name>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="126"/>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="146"/>
- <source>Build HMM profile to '%1'</source>
- <translation>Build HMM profile to '%1'</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="163"/>
- <source>Build HMM profile '%1' -> '%2'</source>
- <translation>Build HMM profile '%1' -> '%2'</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="129"/>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="149"/>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="170"/>
- <source>Output file is not given</source>
- <translation>Output file is not given</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="134"/>
- <source>No multiple alignments given</source>
- <translation>No multiple alignments given</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="166"/>
- <source>Input file is not given</source>
- <translation>Input file is not given</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="181"/>
- <source>Unrecognized input alignment file format</source>
- <translation>Unrecognized input alignment file format</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="188"/>
- <source>Error opening '%1' file</source>
- <translation>Error opening '%1' file</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="229"/>
- <source>No multiple alignments found in input file</source>
- <translation>No multiple alignments found in input file</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="276"/>
- <source>Source alignment</source>
- <translation>Source alignment</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="278"/>
- <source>Profile name</source>
- <translation>Profile name</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="282"/>
- <source>Options:</source>
- <translation>Options:</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="283"/>
- <source>Model construction strategies</source>
- <translation>Model construction strategies</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="291"/>
- <source>Relative model construction strategies</source>
- <translation>Relative model construction strategies</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="293"/>
- <source>Gerstein/Sonnhammer/Chothia tree weights</source>
- <translation>Gerstein/Sonnhammer/Chothia tree weights</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="294"/>
- <source>Henikoff simple filter weights</source>
- <translation>Henikoff simple filter weights</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="295"/>
- <source>Henikoff position-based weights</source>
- <translation>Henikoff position-based weights</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="296"/>
- <source>No relative weighting; set all to 1</source>
- <translation>No relative weighting; set all to 1</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="297"/>
- <source>Weights given in MSA file</source>
- <translation>Weights given in MSA file</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="302"/>
- <source>Effective sequence weighting strategies</source>
- <translation>Effective sequence weighting strategies</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="304"/>
- <source>adjust effective sequence number to achieve relative entropy target</source>
- <translation>Adjust effective sequence number to achieve relative entropy target</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="305"/>
- <source>effective sequence number is number of single linkage clusters</source>
- <translation>Effective sequence number is number of single linkage clusters</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="306"/>
- <source>no effective sequence number weighting: just use number of sequences</source>
- <translation>No effective sequence number weighting: just use number of sequences</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="307"/>
- <source>set effective sequence number for all models to: %1</source>
- <translation>Set effective sequence number for all models to: %1</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="313"/>
- <source>Task finished with error: '%1'</source>
- <translation>Task finished with error: '%1'</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3LoadProfileAndSearchTask</name>
- <message>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="514"/>
- <source>HMM search with '%1' HMM profile file</source>
- <translation>HMM search with '%1' HMM profile file</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3MSAEditorContext</name>
- <message>
- <location filename="../src/uHMM3Plugin.cpp" line="202"/>
- <source>Build HMMER3 profile</source>
- <translation>Build HMMER3 profile</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3Phmmer</name>
- <message>
- <location filename="../src/phmmer/uhmm3phmmer.cpp" line="168"/>
- <source>No input query sequence given</source>
- <translation>No input query sequence given</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3phmmer.cpp" line="172"/>
- <source>Database sequence to search in is not given</source>
- <translation>Database sequence to search in is not given</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3phmmer.cpp" line="179"/>
- <source>Run out of memory (creating alphabet failed)</source>
- <translation>Run out of memory (creating alphabet failed)</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3phmmer.cpp" line="184"/>
- <source>Run out of memory (creating null model failed)</source>
- <translation>Run out of memory (creating null model failed)</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3phmmer.cpp" line="191"/>
- <source>Run out of memory (creating builder failed)</source>
- <translation>Run out of memory (creating builder failed)</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3phmmer.cpp" line="198"/>
- <source>Setting scoring system failed with error: '%1'</source>
- <translation>Setting scoring system failed with error: '%1'</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3phmmer.cpp" line="205"/>
- <source>Error with digitizing sequence to search in</source>
- <translation>Error with digitizing sequence to search in</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3phmmer.cpp" line="212"/>
- <source>Error digitizing query sequence</source>
- <translation>Error digitizing query sequence</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3phmmer.cpp" line="222"/>
- <source>Error with creating HMM profile for query sequence</source>
- <translation>Error with creating HMM profile for query sequence</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3phmmer.cpp" line="228"/>
- <source>Run out of memory (creating top hits list failed)</source>
- <translation>Run out of memory (creating top hits list failed)</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3phmmer.cpp" line="234"/>
- <source>Run out of memory (creating pipeline failed)</source>
- <translation>Run out of memory (creating pipeline failed)</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3PhmmerDialogImpl</name>
- <message>
- <location filename="../src/phmmer/uHMM3PhmmerDialogImpl.cpp" line="58"/>
- <source>Search</source>
- <translation>Search</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uHMM3PhmmerDialogImpl.cpp" line="59"/>
- <source>Cancel</source>
- <translation>Cancel</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uHMM3PhmmerDialogImpl.cpp" line="117"/>
- <source>Select query sequence file</source>
- <translation>Select query sequence file</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uHMM3PhmmerDialogImpl.cpp" line="165"/>
- <source>Query sequence file path is empty</source>
- <translation>Query sequence file path is empty</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uHMM3PhmmerDialogImpl.cpp" line="170"/>
- <location filename="../src/phmmer/uHMM3PhmmerDialogImpl.cpp" line="181"/>
- <source>Error: bad arguments!</source>
- <translation>Error: bad arguments!</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uHMM3PhmmerDialogImpl.cpp" line="186"/>
- <source>Error</source>
- <translation>Error</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uHMM3PhmmerDialogImpl.cpp" line="186"/>
- <source>Cannot create an annotation object. Please check settings</source>
- <translation>Cannot create an annotation object. Please check settings</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3PhmmerTask</name>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="61"/>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="79"/>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="101"/>
- <source>HMM Phmmer task</source>
- <translation>HMM Phmmer task</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="72"/>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="110"/>
- <source>HMM Phmmer search %1 sequence in %2 database</source>
- <translation>HMM Phmmer search %1 sequence in %2 databas</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="87"/>
- <source>HMM Phmmer search %1 sequence with %2 database</source>
- <translation>HMM Phmmer search %1 sequence with %2 database</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="69"/>
- <source>Input query sequence</source>
- <translation>Input query sequence</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="70"/>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="108"/>
- <source>Database sequence to search in</source>
- <translation>Database sequence to search in</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="84"/>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="107"/>
- <source>Query sequence file path</source>
- <translation>Query sequence file path</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="85"/>
- <source>Database sequence file path</source>
- <translation>Database sequence file path</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="90"/>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="113"/>
- <source>Error opening query sequence file</source>
- <translation>Error opening query sequence file</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="94"/>
- <source>Error opening database sequence file</source>
- <translation>Error opening database sequence file</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="128"/>
- <source>Error loading sequence document:</source>
- <translation>Error loading sequence document:</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="131"/>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="134"/>
- <source>No dna sequence objects found in document</source>
- <translation>No dna sequence objects found in document</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="138"/>
- <source>Empty sequence loaded from document</source>
- <translation>Empty sequence loaded from document</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="150"/>
- <source> query sequence</source>
- <translation> query sequence</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="154"/>
- <source> db sequence</source>
- <translation> db sequence</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3PhmmerToAnnotationsTask</name>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="375"/>
- <source>querySeq sequence file path</source>
- <translation>querySeq sequence file path</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="380"/>
- <source>db sequence</source>
- <translation>Database sequence</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="385"/>
- <source>annotation object</source>
- <translation>Annotation object</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="390"/>
- <source>annotation name</source>
- <translation>Annotation name</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="395"/>
- <source>annotation group</source>
- <translation>Annotation group</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="408"/>
- <source>HMM Phmmer task</source>
- <translation>HMM Phmmer task</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="422"/>
- <source>HMM Phmmer search %1 sequence with %2 database</source>
- <translation>Phmmer search %1 sequence with %2 database</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="432"/>
- <source>Annotation object was removed</source>
- <translation>Annotation object was removed</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="456"/>
- <source>Query sequence</source>
- <translation>Query sequence</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="459"/>
- <source>Task was not finished</source>
- <translation>Task was not finished</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="464"/>
- <source>Result annotation table</source>
- <translation>Result annotation table</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="465"/>
- <source>Result annotation group</source>
- <translation>Result annotation group</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="466"/>
- <source>Result annotation name</source>
- <translation>Result annotation name</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="469"/>
- <source>Results count</source>
- <translation>Results count</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3Plugin</name>
- <message>
- <location filename="../src/uHMM3Plugin.cpp" line="70"/>
- <source>HMM3</source>
- <translation>HMM3</translation>
- </message>
- <message>
- <location filename="../src/uHMM3Plugin.cpp" line="70"/>
- <source>HMM profile tools. Plugin is based on HMMER 3.0b3 package: freely distributable implementation of profile HMM software for protein sequence analysis. Home page of project: http://hmmer.janelia.org/</source>
- <translation>HMM profile tools. Plugin is based on HMMER 3.0b3 package: freely distributable implementation of profile HMM software for protein sequence analysis. Home page of project: http://hmmer.janelia.org/</translation>
- </message>
- <message>
- <location filename="../src/uHMM3Plugin.cpp" line="100"/>
- <source>Build HMM3 profile...</source>
- <translation>Build HMM3 profile...</translation>
- </message>
- <message>
- <location filename="../src/uHMM3Plugin.cpp" line="105"/>
- <source>Search with HMMER3...</source>
- <translation>Search with HMMER3...</translation>
- </message>
- <message>
- <location filename="../src/uHMM3Plugin.cpp" line="110"/>
- <source>Search with HMMER3 phmmer...</source>
- <translation>Search with HMMER3 phmmer...</translation>
- </message>
- <message>
- <location filename="../src/uHMM3Plugin.cpp" line="167"/>
- <location filename="../src/uHMM3Plugin.cpp" line="178"/>
- <source>Error!</source>
- <translation>Error!</translation>
- </message>
- <message>
- <location filename="../src/uHMM3Plugin.cpp" line="167"/>
- <location filename="../src/uHMM3Plugin.cpp" line="178"/>
- <source>Target sequence not selected: no opened annotated dna view</source>
- <translation>No target sequence</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3QDActor</name>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="73"/>
- <location filename="../src/search/uhmm3QDActor.cpp" line="167"/>
- <source>HMM3</source>
- <translation>HMM3</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="84"/>
- <source>QD HMM3 search</source>
- <translation>QD HMM3 search</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="168"/>
- <source>Searches HMM signals in a sequence with one or more profile HMM and saves the results as annotations.</source>
- <translation>Searches HMM signals in a sequence with one or more profile HMM and saves the results as annotations.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="174"/>
- <source>Profile HMM</source>
- <translation>Profile HMM</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="175"/>
- <source>Semicolon-separated list of input HMM files.</source>
- <translation>Semicolon-separated list of input HMM files.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="178"/>
- <source>Min Length</source>
- <translation>Min Length</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="179"/>
- <source>Minimum length of a result region.</source>
- <translation>Minimum length of a result region.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="182"/>
- <source>Max Length</source>
- <translation>Max Length</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="183"/>
- <source>Maximum length of a result region.</source>
- <translation>Maximum length of a result region.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="188"/>
- <source>Use E-value</source>
- <translation>Use E-value</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="189"/>
- <source>Filters by E-value if true. Otherwise filters by score.</source>
- <translation>Filters by E-value if true. Otherwise filters by score.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="192"/>
- <source>Filter by High E-value</source>
- <translation>Filter by High E-value</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="193"/>
- <source>Reports domains <= this E-value threshold in output.</source>
- <translation>Reports domains <= this E-value threshold in output.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="197"/>
- <source>Filter by Low Score</source>
- <translation>Filter by Low Score</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="198"/>
- <source>Reports domains >= this score cutoff in output.</source>
- <translation>Reports domains >= this score cutoff in output.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="203"/>
- <source>Max</source>
- <translation>Max</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="204"/>
- <source>Turns off all acceleration heuristic filters. This increases sensitivity somewhat, at a large cost in speed.</source>
- <translation>Turns off all acceleration heuristic filters. This increases sensitivity somewhat, at a large cost in speed.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="209"/>
- <source>MSV Filter Threshold</source>
- <translation>MSV Filter Threshold</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="210"/>
- <source>P-value threshold for the MSV filter step of the acceleration pipeline.</source>
- <translation>P-value threshold for the MSV filter step of the acceleration pipeline.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="214"/>
- <source>Viterbi Filter Threshold</source>
- <translation>Viterbi Filter Threshold</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="215"/>
- <source>P-value threshold for the Viterbi filter step of the acceleration pipeline.</source>
- <translation>P-value threshold for the Viterbi filter step of the acceleration pipeline.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="219"/>
- <source>Forward Filter Threshold</source>
- <translation>Forward Filter Threshold</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="220"/>
- <source>P-value threshold for the Forward filter step of the acceleration pipeline.</source>
- <translation>P-value threshold for the Forward filter step of the acceleration pipeline.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="224"/>
- <source>No Bias Filter</source>
- <translation>No Bias Filter</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="225"/>
- <source>Turns off composition bias filter. This increases sensitivity somewhat, but can come at a high cost in speed.</source>
- <translation>Turns off composition bias filter. This increases sensitivity somewhat, but can come at a high cost in speed.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="232"/>
- <source>No Null2</source>
- <translation>No Null2</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="233"/>
- <source>Turns off the null2 score corrections for biased composition.</source>
- <translation>Turns off the null2 score corrections for biased composition.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="237"/>
- <source>Number of Sequences</source>
- <translation>Number of Sequences</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="238"/>
- <source>Specifies number of significant sequences. It is used for domain E-value calculations.</source>
- <translation>Specifies number of significant sequences. It is used for domain E-value calculations.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="242"/>
- <source>Seed</source>
- <translation>Seed</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="243"/>
- <source>Random number seed. The default is to use a fixed seed(42), so that results are exactly reproducible. Any other positive integer will give different (but also reproducible) results. A choice of 0 uses a randomly chosen seed.</source>
- <translation>Random number seed. The default is to use a fixed seed(42), so that results are exactly reproducible. Any other positive integer will give different (but also reproducible) results. A choice of 0 uses a randomly chosen seed.</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3SWPhmmerTask</name>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="216"/>
- <source>HMM Phmmer search %1 sequence in %2 database</source>
- <translation>HMM Phmmer search %1 sequence in %2 databas</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="222"/>
- <source>Can not create load query doc task</source>
- <translation>Can not create load query doc task</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="232"/>
- <source> querySeq sequence</source>
- <translation> querySeq sequence</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="245"/>
- <source>Invalid db sequence alphabet: %1</source>
- <translation>Invalid db sequence alphabet: %1</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="251"/>
- <source>Invalid query sequence alphabet: %1</source>
- <translation>Invalid query sequence alphabet: %1</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="274"/>
- <source>Cannot search for nucleic query in amino sequence</source>
- <translation>Cannot search for nucleic query in amino sequence</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="302"/>
- <source>HMMER3 phmmer sequence walker search task</source>
- <translation>HMMER3 phmmer sequence walker search task</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3SWSearchTask</name>
- <message>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="86"/>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="106"/>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="147"/>
- <source>HMM search task with amino and complement translations</source>
- <translation>HMM search task with amino and complement translations</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="92"/>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="110"/>
- <source>HMM search task with amino and complement translations using '%1' profile HMM</source>
- <translation>HMM search task with amino and complement translations using '%1' profile HMM</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="365"/>
- <source>Unrecognized alphabet of sequence</source>
- <translation>Unrecognized alphabet of sequence</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="369"/>
- <source>Invalid alphabet of sequence</source>
- <translation>Invalid alphabet of sequence</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="375"/>
- <source>Cannot search for nucleic HMM profile in amino sequence</source>
- <translation>Cannot search for nucleic HMM profile in amino sequence</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3SWSearchToAnnotationsTask</name>
- <message>
- <source>hmm profile filename</source>
- <translation type="vanished">Profile HMM file path</translation>
- </message>
- <message>
- <source>annotation object</source>
- <translation type="vanished">Annotation object</translation>
- </message>
- <message>
- <source>annotations group name</source>
- <translation type="vanished">Annotations group name</translation>
- </message>
- <message>
- <source>annotations name</source>
- <translation type="vanished">Annotations name</translation>
- </message>
- <message>
- <source>HMMER3 search task</source>
- <translation type="vanished">HMM3 search task</translation>
- </message>
- <message>
- <source>dna sequence</source>
- <translation type="vanished">Sequence to search in</translation>
- </message>
- <message>
- <source>HMMER3 search task with '%1' profile</source>
- <translation type="vanished">HMMER3 search task with '%1' profile</translation>
- </message>
- <message>
- <source>Sequence file</source>
- <translation type="vanished">Sequence file path</translation>
- </message>
- <message>
- <source>HMM profile used</source>
- <translation type="vanished">HMM profile used</translation>
- </message>
- <message>
- <source>Task was not finished</source>
- <translation type="vanished">Task was not finished</translation>
- </message>
- <message>
- <source>Result annotation table</source>
- <translation type="vanished">Result annotation table</translation>
- </message>
- <message>
- <source>Result annotation group</source>
- <translation type="vanished">Result annotation group</translation>
- </message>
- <message>
- <source>Result annotation name</source>
- <translation type="vanished">Result annotation name</translation>
- </message>
- <message>
- <source>Results count</source>
- <translation type="vanished">Results count</translation>
- </message>
- <message>
- <source>Cannot load sequence document</source>
- <translation type="vanished">Cannot load sequence document</translation>
- </message>
- <message>
- <source>No sequence objects loaded</source>
- <translation type="vanished">No sequence objects found in document</translation>
- </message>
- <message>
- <source>Unknown sequence type loaded</source>
- <translation type="vanished">No sequence objects found in document</translation>
- </message>
- <message>
- <source>Empty sequence loaded</source>
- <translation type="vanished">Empty sequence loaded</translation>
- </message>
- <message>
- <source>Annotation object removed</source>
- <translation type="vanished">Choosed annotation object removed</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3Search</name>
- <message>
- <location filename="../src/search/uhmm3search.cpp" line="61"/>
- <source>Bad HMM profile given</source>
- <translation>Bad HMM profile given</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3search.cpp" line="65"/>
- <source>Empty sequence given</source>
- <translation>Empty sequence given</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3search.cpp" line="73"/>
- <source>Run out of memory (creation of sequence failed)</source>
- <translation>Run out of memory (creation of sequence failed)</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3search.cpp" line="79"/>
- <source>Profile HMM and sequence alphabets no matched</source>
- <translation>Profile HMM and sequence alphabets no matched</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3search.cpp" line="85"/>
- <source>Run out of memory (creation of alphabet failed)</source>
- <translation>Run out of memory (creation of alphabet failed)</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3search.cpp" line="90"/>
- <source>Run out of memory (digitizing of sequence failed)</source>
- <translation>Run out of memory (digitizing of sequence failed)</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3search.cpp" line="96"/>
- <source>Run out of memory (creation of null model failed)</source>
- <translation>Run out of memory (creation of null model failed)</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3search.cpp" line="102"/>
- <location filename="../src/search/uhmm3search.cpp" line="116"/>
- <source>Run out of memory</source>
- <translation>Run out of memory</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3search.cpp" line="107"/>
- <source>Run out of memory (creation of optimized profile failed)</source>
- <translation>Run out of memory (creation of optimized profile failed)</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3search.cpp" line="121"/>
- <source>Run out of memory (top hits list creation failed)</source>
- <translation>Run out of memory (top hits list creation failed)</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3SearchDialogImpl</name>
- <message>
- <location filename="../src/search/uHMM3SearchDialogImpl.cpp" line="58"/>
- <source>Run</source>
- <translation>Run</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchDialogImpl.cpp" line="59"/>
- <source>Cancel</source>
- <translation>Cancel</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchDialogImpl.cpp" line="160"/>
- <source>HMM profile file path is empty</source>
- <translation>HMM profile file path is empty</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchDialogImpl.cpp" line="175"/>
- <source>Error: bad arguments!</source>
- <translation>Error: bad arguments!</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchDialogImpl.cpp" line="183"/>
- <source>Error</source>
- <translation>Error</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchDialogImpl.cpp" line="183"/>
- <source>Cannot create an annotation object. Please check settings</source>
- <translation>Cannot create an annotation object. Please check settings</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchDialogImpl.cpp" line="233"/>
- <source>Select query HMM profile</source>
- <translation>Select query HMM profile</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3SearchTask</name>
- <message>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="469"/>
- <source>HMM search task</source>
- <translation>HMM search task</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="478"/>
- <source>HMM search with %1 profiles</source>
- <translation>HMM search with %1 profiles</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="476"/>
- <source>HMM search with '%1'</source>
- <translation>HMM search with '%1'</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMMFormat</name>
- <message>
- <location filename="../src/format/uHMMFormat.cpp" line="279"/>
- <source>Unknown error occurred</source>
- <translation>Unknown error occurred</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormat.cpp" line="305"/>
- <source>Profile HMM format</source>
- <translation>Profile HMM format</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormat.cpp" line="306"/>
- <source>hmm is a format for storing hmm profiles</source>
- <translation>hmm is a format for storing hmm profiles</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMMFormatReader</name>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="117"/>
- <source>Number expected in NULE line. %1 found</source>
- <translation>Number expected in NULE line. %1 found</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="140"/>
- <source>Bad line in header section: '%1'</source>
- <translation>Bad line in header section: '%1'</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="147"/>
- <source>Empty value in header line:%1</source>
- <translation>Empty value in header line:%1</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="387"/>
- <source>Input file made by unknown version of HMMER or is not HMM profile file</source>
- <translation>Input file made by unknown version of HMMER or is not HMM profile file</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="750"/>
- <source>ALPH must precede NULE in HMMER2 save files</source>
- <translation>ALPH must precede NULE in HMMER2 save files</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="769"/>
- <source>ALPH section must precede HMM</source>
- <translation>ALPH section must precede HMM</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="774"/>
- <source>Unknown alphabet</source>
- <translation>Unknown alphabet</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="797"/>
- <source>Failed to allocate body of the new HMM</source>
- <translation>Failed to allocate body of the new HMM</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="157"/>
- <source>Cannot parse integer from string: '%1'</source>
- <translation>Cannot parse integer from string: '%1'</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="168"/>
- <source>Cannot parse float number from string: '%1'</source>
- <translation>Cannot parse float number from string: '%1'</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="184"/>
- <source>Cannot parse 2 float numbers from string: '%1'</source>
- <translation>Cannot parse 2 float numbers from string: '%1'</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="195"/>
- <source>Cannot parse unsigned integer from string: '%1'</source>
- <translation>Cannot parse unsigned integer from string: '%1'</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="207"/>
- <source>Cannot parse y/n value from string: '%1'</source>
- <translation>Cannot parse y/n value from string: '%1'</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="216"/>
- <source>Run out of memory (date allocation failed)</source>
- <translation>Run out of memory (date allocation failed)</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="231"/>
- <source>Bad STATS line: '%1'</source>
- <translation>Bad STATS line: '%1'</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="235"/>
- <source>Bad STATS line: '%1'/ LOCAL word was not found</source>
- <translation>Bad STATS line: '%1'/ LOCAL word was not found</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="263"/>
- <location filename="../src/format/uHMMFormatReader.cpp" line="286"/>
- <source>Bad STATS line: %1. %2 not recognized</source>
- <translation>Bad STATS line: %1. %2 not recognized</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="299"/>
- <source>Unexpected end of file</source>
- <translation>Unexpected end of file</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="409"/>
- <source>Run out of memory (allocation of HMM shell failed)</source>
- <translation>Run out of memory (allocation of HMM shell failed)</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="439"/>
- <location filename="../src/format/uHMMFormatReader.cpp" line="697"/>
- <source>Length of HMM model should be positive. Found: %1</source>
- <translation>Length of HMM model should be positive. Found: %1</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="447"/>
- <location filename="../src/format/uHMMFormatReader.cpp" line="707"/>
- <source>Unrecognized alphabet type: %1</source>
- <translation>Unrecognized alphabet type: %1</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="470"/>
- <location filename="../src/format/uHMMFormatReader.cpp" line="731"/>
- <source>Number of sequences should be positive. Found: %1</source>
- <translation>Number of sequences should be positive. Found: %1</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="477"/>
- <source>EFFN shoold be positive. Found: %1</source>
- <translation>EFFN shoold be positive. Found: %1</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="512"/>
- <source>Unrecognized tag in header section: '%1'</source>
- <translation>Unrecognized tag in header section: '%1'</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="523"/>
- <source>Missing one or more STATS parameters</source>
- <translation>Missing one or more STATS parameters</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="528"/>
- <source>Run out of memory (failed to create alphabet)</source>
- <translation>Run out of memory (failed to create alphabet)</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="533"/>
- <source>Run out of memory (allocation of HMM body failed)</source>
- <translation>Run out of memory (allocation of HMM body failed)</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="574"/>
- <source>Line was excpected to start with %1. Found: %2</source>
- <translation>Line was excpected to start with %1. Found: %2</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="616"/>
- <source>Excpected to find closing '//'. Found %1 instead</source>
- <translation>Excpected to find closing '//'. Found %1 instead</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="633"/>
- <source>HMM name not found</source>
- <translation>HMM name not found</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="659"/>
- <source>Allocation failure, HMM shell</source>
- <translation>Allocation failure, HMM shell</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="800"/>
- <source>Failed to create background model</source>
- <translation>Failed to create background model</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="830"/>
- <source>Expected match line to start with %1. saw %2</source>
- <translation>Expected match line to start with %1. saw %2</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="884"/>
- <source>Expected closing //. found %1 instead</source>
- <translation>Expected closing //. found %1 instead</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="897"/>
- <source>No NAME found for HMM</source>
- <translation>No NAME found for HMM</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="900"/>
- <source>No LENG found for HMM</source>
- <translation>No LENG found for HMM</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="903"/>
- <source>No ALPH found for HMM</source>
- <translation>No ALPH found for HMM</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="909"/>
- <source>Failed to calibrate HMMER2 model after input conversion</source>
- <translation>Failed to calibrate HMMER2 model after input conversion</translation>
- </message>
-</context>
-<context>
- <name>UHMM3BuildDialog</name>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="62"/>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="79"/>
- <source>...</source>
- <translation>...</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="126"/>
- <source>assign cols w/ >= symfrac residues as consensus</source>
- <translation>assign cols w/ >= symfrac residues as consensus</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="129"/>
- <source>fast</source>
- <translation>Fast</translation>
- </message>
- <message>
- <source>Hmm3 Build</source>
- <translation type="vanished">Hmm3 Build</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="20"/>
- <source>HMM3 Build</source>
- <translation type="unfinished">HMM3 Build</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="42"/>
- <source>Input and output</source>
- <translation>Input and output</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="52"/>
- <source>Input alignment file</source>
- <translation>Input alignment file</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="69"/>
- <source>Build to profile</source>
- <translation>Build to profile</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="104"/>
- <source>Construction strategies</source>
- <translation>Construction strategies</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="142"/>
- <source>Sym fraction</source>
- <translation>Sym fraction</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="164"/>
- <source>Manual construction</source>
- <translation>Manual construction</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="167"/>
- <source>Hand (requires reference annotation)</source>
- <translation>Hand (requires reference annotation)</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="185"/>
- <source>Tag sequence as a fragment, if L < x*<L>, where x is:</source>
- <translation>Tag sequence as a fragment, if L < x*<L>, where x is:</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="226"/>
- <source>Relative weighting</source>
- <translation>Relative weighting</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="234"/>
- <source>Henikoff position-based weights</source>
- <translation>Henikoff position-based weights</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="250"/>
- <source>Gerstein/Sonnhammer/Chothia tree weights</source>
- <translation>Gerstein/Sonnhammer/Chothia tree weights</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="274"/>
- <source>Henikoff simple filter weights</source>
- <translation>Henikoff simple filter weights</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="284"/>
- <source>Identity cutoff</source>
- <translation>Identity cutoff</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="309"/>
- <source>Skip relative weighting; set all to 1</source>
- <translation>Skip relative weighting; set all to 1</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="316"/>
- <source>Use weights given in alignment file</source>
- <translation>Use weights given in alignment file</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="339"/>
- <source>Effective weighting</source>
- <translation>Effective weighting</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="347"/>
- <source>Adjust effective sequence number to achieve relative entropy target</source>
- <translation>Adjust effective sequence number to achieve relative entropy target</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="381"/>
- <source>Minimum relative entropy/position</source>
- <translation>Minimum relative entropy/position</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="433"/>
- <source>Sigma parameter</source>
- <translation>Sigma parameter</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="466"/>
- <source>Use number of single linkage clusters as effective</source>
- <translation>Use number of single linkage clusters as effective</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="476"/>
- <source>Fractional identity cutoff</source>
- <translation>Fractional identity cutoff</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="501"/>
- <source>Use number of sequences as effective</source>
- <translation>Use number of sequences as effective</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="516"/>
- <source>Effective sequence number for all models to</source>
- <translation>Effective sequence number for all models to</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="554"/>
- <source>E-value calibration</source>
- <translation>E-value calibration</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="573"/>
- <source>Length of sequences for MSV Gumbel mu fit</source>
- <translation>Length of sequences for MSV Gumbel mu fit</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="609"/>
- <source>Number of sequences for MSV Gumbel mu fit</source>
- <translation>Number of sequences for MSV Gumbel mu fit</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="642"/>
- <source>Length of sequences for Viterbi Gumbel mu fit</source>
- <translation>Length of sequences for Viterbi Gumbel mu fit</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="672"/>
- <source>Number of sequences for Viterbi Gumbel mu fit</source>
- <translation>Number of sequences for Viterbi Gumbel mu fit</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="702"/>
- <source>Length of sequences for Forward exp tail mu fit</source>
- <translation>Length of sequences for Forward exp tail mu fit</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="732"/>
- <source>Number of sequences for Forward exp tail mu fit</source>
- <translation>Number of sequences for Forward exp tail mu fit</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="762"/>
- <source>Tail mass for Forward exponential tail mu fit</source>
- <translation>Tail mass for Forward exponential tail mu fit</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="803"/>
- <source>Other</source>
- <translation>Other</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="819"/>
- <source>Random generator seed</source>
- <translation>Random generator seed</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="862"/>
- <source>* 0 means that one-time arbitrary seed will be used</source>
- <translation>* 0 means that one-time arbitrary seed will be used</translation>
- </message>
-</context>
-<context>
- <name>UHMM3PhmmerDialog</name>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="20"/>
- <source>Phmmer Search</source>
- <translation>Phmmer Search</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="36"/>
- <source>Input and output</source>
- <translation>Input and output</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="50"/>
- <source>Query sequence file:</source>
- <translation>Query sequence file:</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="60"/>
- <source>...</source>
- <translation>...</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="87"/>
- <source>Reporting tresholds</source>
- <translation>Reporting tresholds</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="103"/>
- <source>Report domains with E-value less than</source>
- <translation>Report domains with E-value less than</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="142"/>
- <source>Report domains with score greater than</source>
- <translation>Report domains with score greater than</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="166"/>
- <source>Number of significant sequences for domain E-value calculation</source>
- <translation>Number of significant sequences for domain E-value calculation</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="201"/>
- <source>Scoring system</source>
- <translation>Scoring system</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="217"/>
- <source>Gap open probability</source>
- <translation>Gap open probability</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="247"/>
- <source>Gap extend probability</source>
- <translation>Gap extend probability</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="285"/>
- <source>Acceleration</source>
- <translation>Acceleration</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="297"/>
- <source>Turn off composition bias filter</source>
- <translation>Turn off composition bias filter</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="310"/>
- <source>Turn off biased composition score corrections</source>
- <translation>Turn off biased composition score corrections</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="338"/>
- <source>Viterbi filter treshold</source>
- <translation>Viterbi filter treshold</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="368"/>
- <source>MSV filter treshold</source>
- <translation>MSV filter treshold</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="401"/>
- <source>Forward filter treshold</source>
- <translation>Forward filter treshold</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="437"/>
- <source>E-value calibration</source>
- <translation>E-value calibration</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="456"/>
- <source>Length of sequences for MSV Gumbel mu fit</source>
- <translation>Length of sequences for MSV Gumbel mu fit</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="492"/>
- <source>Number of sequences for MSV Gumbel mu fit</source>
- <translation>Number of sequences for MSV Gumbel mu fit</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="525"/>
- <source>Length of sequences for Viterbi Gumbel mu fit</source>
- <translation>Length of sequences for Viterbi Gumbel mu fit</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="555"/>
- <source>Number of sequences for Viterbi Gumbel mu fit</source>
- <translation>Number of sequences for Viterbi Gumbel mu fit</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="585"/>
- <source>Length of sequences for Forward exp tail mu fit</source>
- <translation>Length of sequences for Forward exp tail mu fit</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="615"/>
- <source>Number of sequences for Forward exp tail mu fit</source>
- <translation>Number of sequences for Forward exp tail mu fit</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="645"/>
- <source>Tail mass for Forward exponential tail mu fit</source>
- <translation>Tail mass for Forward exponential tail mu fit</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="686"/>
- <source>Other</source>
- <translation>Other</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="702"/>
- <source>Random generator seed</source>
- <translation>Random generator seed</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="745"/>
- <source>* 0 means that one-time arbitrary seed will be used</source>
- <translation>* 0 means that one-time arbitrary seed will be used</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="113"/>
- <source>1E+</source>
- <translation>1E+</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="323"/>
- <source>Turn all heuristic filters off (less speed, more power)</source>
- <translation>Turn all heuristic filters off (less speed, more power)</translation>
- </message>
-</context>
-<context>
- <name>UHMM3SearchDialog</name>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="65"/>
- <source>Query HMM file:</source>
- <translation>Query profile HMM file:</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="75"/>
- <source>...</source>
- <translation>...</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="119"/>
- <source>1E+</source>
- <translation>1E+</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="54"/>
- <source>Input and output</source>
- <translation>Input and output</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="92"/>
- <source>Reporting thresholds</source>
- <translation>Reporting thresholds</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="109"/>
- <source>Report domains with E-value less than</source>
- <translation>Report domains with E-value less than</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="146"/>
- <source>Report domains with score greater than</source>
- <translation>Report domains with score greater than</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="20"/>
- <source>HMM3 Search</source>
- <translation>HMM3 Search</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="180"/>
- <source>Score threshold:</source>
- <translation>Score threshold:</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="229"/>
- <source>Use profile's GA gathering cutoffs</source>
- <translation>Use profile's GA gathering cutoffs</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="265"/>
- <source>Use profile's NC noise cutoffs</source>
- <translation>Use profile's NC noise cutoffs</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="295"/>
- <source>Use profile's TC trusted cutoffs</source>
- <translation>Use profile's TC trusted cutoffs</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="312"/>
- <source>Number of significant sequences for domain E-value calculation</source>
- <translation>Number of significant sequences for domain E-value calculation</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="345"/>
- <source>Acceleration heuristics</source>
- <translation>Acceleration heuristics</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="360"/>
- <source>Turn off composition bias filter</source>
- <translation>Turn off composition bias filter</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="373"/>
- <source>Turn off biased composition score corrections</source>
- <translation>Turn off biased composition score corrections</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="401"/>
- <source>MSV filter threshold:</source>
- <translation>MSV filter threshold:</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="434"/>
- <source>Viterbi filter threshold:</source>
- <translation>Viterbi filter threshold:</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="464"/>
- <source>Forward filter threshold:</source>
- <translation>Forward filter threshold:</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="500"/>
- <source>Other</source>
- <translation>Other</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="522"/>
- <source>Random generator seed</source>
- <translation>Random generator seed</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="565"/>
- <source>* 0 means that one-time arbitrary seed will be used</source>
- <translation>* 0 means that one-time arbitrary seed will be used</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="386"/>
- <source>Turn all heuristic filters off (less speed, more power)</source>
- <translation>Turn all heuristic filters off (less speed, more power)</translation>
- </message>
-</context>
-</TS>
diff --git a/src/plugins_3rdparty/hmm3/transl/russian.ts b/src/plugins_3rdparty/hmm3/transl/russian.ts
deleted file mode 100644
index 98dacdd..0000000
--- a/src/plugins_3rdparty/hmm3/transl/russian.ts
+++ /dev/null
@@ -1,2206 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<!DOCTYPE TS>
-<TS version="2.1" language="ru_RU" sourcelanguage="en">
-<context>
- <name>QObject</name>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="350"/>
- <source>HMM reader error occurred: </source>
- <translation>Ошибка чтения:</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="351"/>
- <source>Reading file failed</source>
- <translation>Ошибка чтения файла</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="78"/>
- <source>HMM3 Profile</source>
- <translation>Профиль HММ</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormat.cpp" line="41"/>
- <source>Writing HMM profile file failed</source>
- <translation>Ошибка записи файла</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormat.cpp" line="302"/>
- <source>HMM files are read only</source>
- <translation>HMM профайлы доступны только для чтения</translation>
- </message>
-</context>
-<context>
- <name>U2::GTest_CompareHmmFiles</name>
- <message>
- <location filename="../src/tests/uhmmer3BuildTests.cpp" line="304"/>
- <source>File #1 not set</source>
- <translation>File #1 not set</translation>
- </message>
- <message>
- <location filename="../src/tests/uhmmer3BuildTests.cpp" line="310"/>
- <source>File #2 not set</source>
- <translation>File #2 not set</translation>
- </message>
- <message>
- <location filename="../src/tests/uhmmer3BuildTests.cpp" line="370"/>
- <source>Error creating ioadapter for first file</source>
- <translation>Error creating ioadapter for first file</translation>
- </message>
- <message>
- <location filename="../src/tests/uhmmer3BuildTests.cpp" line="374"/>
- <source>Error opening 1 file</source>
- <translation>Error opening 1 file</translation>
- </message>
- <message>
- <location filename="../src/tests/uhmmer3BuildTests.cpp" line="381"/>
- <source>Error creating ioadapter for second file</source>
- <translation>Error creating ioadapter for second file</translation>
- </message>
- <message>
- <location filename="../src/tests/uhmmer3BuildTests.cpp" line="385"/>
- <source>Error opening second file</source>
- <translation>Error opening second file</translation>
- </message>
- <message>
- <location filename="../src/tests/uhmmer3BuildTests.cpp" line="410"/>
- <source>Names of aligments not matched</source>
- <translation>Names of aligments not matched</translation>
- </message>
- <message>
- <location filename="../src/tests/uhmmer3BuildTests.cpp" line="414"/>
- <source>Comparing files length not matched</source>
- <translation>Comparing files length not matched</translation>
- </message>
- <message>
- <location filename="../src/tests/uhmmer3BuildTests.cpp" line="421"/>
- <source>Files parts not equal:'%1' and '%2'</source>
- <translation>Files parts not equal:'%1' and '%2'</translation>
- </message>
-</context>
-<context>
- <name>U2::GTest_UHMM3Search</name>
- <message>
- <location filename="../src/tests/uhmmer3SearchTests.cpp" line="247"/>
- <source>Sequence is empty</source>
- <translation>Последовательность для поиска не задана</translation>
- </message>
-</context>
-<context>
- <name>U2::GTest_UHMM3SearchCompare</name>
- <message>
- <location filename="../src/tests/uhmmer3SearchTests.cpp" line="374"/>
- <source>Internal error (cannot parse float number from string '%1')</source>
- <translation>Невозможно конвертировать строку в вещественное число: %1</translation>
- </message>
- <message>
- <location filename="../src/tests/uhmmer3SearchTests.cpp" line="387"/>
- <source>Can't parse significance:%1</source>
- <translation>Невозможно разобрать значение: %1</translation>
- </message>
- <message>
- <location filename="../src/tests/uhmmer3SearchTests.cpp" line="602"/>
- <source>No search task in test context</source>
- <translation>No search task in test context</translation>
- </message>
-</context>
-<context>
- <name>U2::GTest_UHMMER3Build</name>
- <message>
- <location filename="../src/tests/uhmmer3BuildTests.cpp" line="228"/>
- <source>No input file given</source>
- <translation>Входной файл множественного выравнивания не задан</translation>
- </message>
- <message>
- <location filename="../src/tests/uhmmer3BuildTests.cpp" line="234"/>
- <source>No output file given</source>
- <translation>Файл HMM профайла не установлен</translation>
- </message>
-</context>
-<context>
- <name>U2::Hmmer3SearchWorfklowTask</name>
- <message>
- <location filename="../src/search/Hmmer3SearchWorkflowTask.cpp" line="56"/>
- <source>No annotations objects found</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/search/Hmmer3SearchWorkflowTask.cpp" line="76"/>
- <source>HMM profile used</source>
- <translation type="unfinished">HMM профайл</translation>
- </message>
- <message>
- <location filename="../src/search/Hmmer3SearchWorkflowTask.cpp" line="79"/>
- <source>Task was not finished</source>
- <translation type="unfinished">Задание завершено с ошибкой</translation>
- </message>
- <message>
- <location filename="../src/search/Hmmer3SearchWorkflowTask.cpp" line="84"/>
- <source>Result annotation table</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/search/Hmmer3SearchWorkflowTask.cpp" line="85"/>
- <source>Result annotation group</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/search/Hmmer3SearchWorkflowTask.cpp" line="86"/>
- <source>Result annotation name</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/search/Hmmer3SearchWorkflowTask.cpp" line="88"/>
- <source>Results count</source>
- <translation type="unfinished">Количество результатов</translation>
- </message>
-</context>
-<context>
- <name>U2::LocalWorkflow::HMM3BuildPrompter</name>
- <message>
- <location filename="../src/workers/HMM3BuildWorker.cpp" line="109"/>
- <source>For each MSA from <u>%1</u>,</source>
- <translation>Для каждого MSA из <u>%1</u>,</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3BuildWorker.cpp" line="111"/>
- <source>%1 builds a HMM3 profile.</source>
- <translation>%1 построить HMM3 профиль.</translation>
- </message>
-</context>
-<context>
- <name>U2::LocalWorkflow::HMM3BuildWorker</name>
- <message>
- <location filename="../src/workers/HMM3BuildWorker.cpp" line="61"/>
- <source>Input MSA</source>
- <translation>Входное MSA</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3BuildWorker.cpp" line="62"/>
- <source>Input multiple sequence alignment for building statistical model.</source>
- <translation>Входное множественное выравнивание для построения статистической модели.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3BuildWorker.cpp" line="63"/>
- <source>HMM3 profile</source>
- <translation>Профиль HММ3</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3BuildWorker.cpp" line="63"/>
- <source>Produced HMM3 profile</source>
- <translation>Профиль построенный с помощью HMM3</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3BuildWorker.cpp" line="73"/>
- <source>Random seed</source>
- <translation>Случайная затравка</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3BuildWorker.cpp" line="73"/>
- <source>Random generator seed. 0 - means that one-time arbitrary seed will be used.</source>
- <translation>Генератор случайной затравки. 0 - означает, что один раз будет использована случайная затравка.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3BuildWorker.cpp" line="77"/>
- <source>HMM3 Build</source>
- <translation>Построение профиля с помощью HMMER3</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3BuildWorker.cpp" line="77"/>
- <source>Builds a HMM3 profile from a multiple sequence alignment.<p>The HMM3 profile is a statistical model which captures position-specific information about how conserved each column of the alignment is, and which residues are likely.</source>
- <translation>Строит HMM3 профиль из множественного выравнивания.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3BuildWorker.cpp" line="182"/>
- <source>Built HMM3 profile</source>
- <translation>Построить профиль с помощью HMMER3</translation>
- </message>
-</context>
-<context>
- <name>U2::LocalWorkflow::HMM3Lib</name>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="72"/>
- <source>HMM3 Profile</source>
- <translation>Профиль HММ</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="80"/>
- <source>HMMER3 Tools</source>
- <translation>Инструменты HMMER3 (скрытые марковские модели)</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="142"/>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="159"/>
- <source>HMM3 profile</source>
- <translation>Профиль HММ3</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="142"/>
- <source>Input HMM3 profile</source>
- <translation>Входной профиль HMM3</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="143"/>
- <source>Location</source>
- <translation>Расположение</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="143"/>
- <source>Location hint for the target file.</source>
- <translation>Расположение подсказки для целефого файла.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="153"/>
- <source>Write HMM3 Profile</source>
- <translation>Запись профиля HMM3</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="153"/>
- <source>Saves all input HMM3 profiles to specified location.</source>
- <translation>Сохраняет все входные HMM3 профили в указанное место.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="159"/>
- <source>Loaded HMM3 profile</source>
- <translation>Загруженный профиль HMM3</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="166"/>
- <source>Read HMM3 Profile</source>
- <translation>Чтение профиля HMM3</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="166"/>
- <source>Reads HMM3 profiles from file(s). The files can be local or Internet URLs.</source>
- <translation>Читает HMM3 профили из файла(ов). Файлы могут быть локальными или из интернета.</translation>
- </message>
-</context>
-<context>
- <name>U2::LocalWorkflow::HMM3ReadPrompter</name>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="178"/>
- <source>Read HMM3 profile(s) from %1.</source>
- <translation>Прочитать профиль HMM3 из %1.</translation>
- </message>
-</context>
-<context>
- <name>U2::LocalWorkflow::HMM3Reader</name>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="253"/>
- <source>Loaded HMM3 profile(s) from %1</source>
- <translation>Загруженный профиль(и) HMM3 из %1</translation>
- </message>
-</context>
-<context>
- <name>U2::LocalWorkflow::HMM3SearchPrompter</name>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="195"/>
- <source>For each sequence from <u>%1</u>,</source>
- <translation>Для каждой последовательности из <u>%1</u>,</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="196"/>
- <source>using all profiles provided by <u>%1</u>,</source>
- <translation>используя все профили произведенные <u>%1</u>,</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="200"/>
- <source>%1 search HMM3 signals %2. <br>Output the list of found regions annotated as <u>%4</u>.</source>
- <translation>%1 искать сигналы HMM3 %2. <br>Аннотировать найденные регионы как <u>%4</u>.</translation>
- </message>
-</context>
-<context>
- <name>U2::LocalWorkflow::HMM3SearchWorker</name>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="85"/>
- <source>HMM3 profile</source>
- <translation>Профиль HММ3</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="85"/>
- <source>HMM3 profile(s) to search with.</source>
- <translation>HMM3 профили для поиска.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="86"/>
- <source>Input sequence</source>
- <translation>Входная последовательность</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="87"/>
- <source>An input sequence (nucleotide or protein) to search in.</source>
- <translation>Входная последовательность (нуклеотидная или белковая) для поиска.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="88"/>
- <source>HMM3 annotations</source>
- <translation>Аннотации HMM3</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="89"/>
- <source>Annotations marking found similar sequence regions.</source>
- <translation>Маркировка аннотаций обнаружила схожие зоны последовательности.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="103"/>
- <source>Result annotation</source>
- <translation>Результирующая аннотация</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="103"/>
- <source>A name of the result annotations.</source>
- <translation>Имя результирующих аннотаций.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="104"/>
- <source>Seed</source>
- <translation>Затравка</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="104"/>
- <source>Random generator seed. 0 - means that one-time arbitrary seed will be used.</source>
- <translation>Генератор случайной затравки. 0 - означает, что один раз будет использована случайная затравка.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="105"/>
- <source>Threshold type</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="105"/>
- <source>Controlling reporting and model-specific thresholds.</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="106"/>
- <source>Filter by high E-value</source>
- <translation>Фильтрация по E-value</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="106"/>
- <source>Report domains with e-value less than.</source>
- <translation>Сохранять результаты c величиной E-value менее чем.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="107"/>
- <source>Filter by low score</source>
- <translation>Фильтрация по низким показателям</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="107"/>
- <source>Report domains with score greater than.</source>
- <translation>Сохранять результаты с итоговой суммой более чем.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="108"/>
- <source>Significant sequences</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="108"/>
- <source>Number of significant sequences, for domain E-value calculation.</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="109"/>
- <source>No bias</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="109"/>
- <source>Turn off composition bias filter.</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="110"/>
- <source>No score corrections</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="110"/>
- <source>Turn off biased composition score corrections.</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="111"/>
- <source>No heuristic filters</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="111"/>
- <source>Turn all heuristic filters off (less speed, more power).</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="112"/>
- <source>MSV threshold</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="112"/>
- <source>Stage 1 (MSV) threshold: promote hits w/ P <= F1.</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="113"/>
- <source>Vit threshold</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="113"/>
- <source>Stage 2 (Vit) threshold: promote hits w/ P <= F2.</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="114"/>
- <source>Fwd threshold</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="114"/>
- <source>Stage 3 (Fwd) threshold: promote hits w/ P <= F3.</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="141"/>
- <source>HMM3 Search</source>
- <translation>Поиск с помощью HMMER3</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="142"/>
- <source>Searches each input sequence for significantly similar sequence matches to all specified HMM profiles. In case several profiles were supplied, searches with all profiles one by one and outputs united set of annotations for each sequence.</source>
- <translation>Ищет все входные последовательности для аналогичной последовательности соответствующей всем указанным профилям HММ. В случае, если профилей несколько, ищутся все профили по порядку и на выходе получается набор аннотаций для каждой последовательности.</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="318"/>
- <source>Power of e-value must be less or equal to zero. Using default value: 1e+1</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <source>Power of e-value must be less or equal to zero. Using default value: 1e-1</source>
- <translation type="vanished">Значение e-value должно быть меньше или равно нулю. Используется значение по умолчанию: 1e-1</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="325"/>
- <source>Score must be greater than zero. Using default value: 0.01</source>
- <translation>Оценка должна быть больше нуля. Значение по умолчанию: 0,01</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="227"/>
- <source>Value for attribute name is empty, default name used</source>
- <translation>Имя пусто, использовано значение по умолчанию</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="148"/>
- <source><= E-value</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="149"/>
- <source>>= score</source>
- <translation type="unfinished"></translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="150"/>
- <source>Use profile's GA gathering cutoffs</source>
- <translation type="unfinished">Использовать отсечения GA</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="151"/>
- <source>Use profile's NC noise cutoffs</source>
- <translation type="unfinished">Использовать отсечения NC</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="152"/>
- <source>Use profile's TC trusted cutoffs</source>
- <translation type="unfinished">Использовать отсечения TC</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="273"/>
- <source>Find HMM3 signals in %1</source>
- <translation>Искать сигналы HMM3 в %1</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="277"/>
- <source>Bad sequence supplied to input: %1</source>
- <translation>Неправильная последовательность подана на вход: %1</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3SearchWorker.cpp" line="306"/>
- <source>Found %1 HMM3 signals</source>
- <translation>Найденные %1 сигналы HMM3</translation>
- </message>
-</context>
-<context>
- <name>U2::LocalWorkflow::HMM3WritePrompter</name>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="184"/>
- <source>unset</source>
- <translation>не указан</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="188"/>
- <source>Save HMM3 profile(s) from <u>%1</u> to <u>%2</u>.</source>
- <translation>Сохранить профиль(и) HMM3 из <u>%1</u> в <u>%2</u>.</translation>
- </message>
-</context>
-<context>
- <name>U2::LocalWorkflow::HMM3Writer</name>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="277"/>
- <source>Empty HMM3 passed for writing to %1</source>
- <translation>Пустой HMM3 подан для записи в %1</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="277"/>
- <source>Unspecified URL for writing HMM3</source>
- <translation>Неизвестный URL для записи HMM3</translation>
- </message>
- <message>
- <location filename="../src/workers/HMM3IOWorker.cpp" line="293"/>
- <source>Writing HMM3 profile to %1</source>
- <translation>Запись HMM3 профиля в %1</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3ADVContext</name>
- <message>
- <location filename="../src/uHMM3Plugin.cpp" line="247"/>
- <source>Find HMM signals with HMMER3...</source>
- <translation>Поиск с помощью скрытой марковской модели (HMMER3)...</translation>
- </message>
- <message>
- <location filename="../src/uHMM3Plugin.cpp" line="257"/>
- <source>error</source>
- <translation>Ошибка</translation>
- </message>
- <message>
- <location filename="../src/uHMM3Plugin.cpp" line="257"/>
- <source>No sequence in focus found</source>
- <translation>Не найдено открытой последовательности</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3Build</name>
- <message>
- <location filename="../src/build/uhmm3build.cpp" line="65"/>
- <source>UGENE cannot determine alphabet of alignment</source>
- <translation>Некорректный алфавит</translation>
- </message>
- <message>
- <location filename="../src/build/uhmm3build.cpp" line="70"/>
- <source>Run out of memory (creating alphabet failed)</source>
- <translation>Нехватка памяти: невозможно создать алфавит</translation>
- </message>
- <message>
- <location filename="../src/build/uhmm3build.cpp" line="76"/>
- <source>Run out of memory (creating null model failed)</source>
- <translation>Нехватка памяти: невозможно создать null-модель</translation>
- </message>
- <message>
- <location filename="../src/build/uhmm3build.cpp" line="81"/>
- <source>Run out of memory (creating builder failed)</source>
- <translation>Нехватка памяти:невозможно создать builder</translation>
- </message>
- <message>
- <location filename="../src/build/uhmm3build.cpp" line="87"/>
- <source>Run out of memory (creating multiple alignment failed)</source>
- <translation>Нехватка памяти: невозможно конвертировать множественное выравнивание</translation>
- </message>
- <message>
- <location filename="../src/build/uhmm3build.cpp" line="92"/>
- <source>Run out of memory (digitizing of alignment failed)</source>
- <translation>Нехватка памяти: невозможно оцифровать множественное выравнивание</translation>
- </message>
- <message>
- <location filename="../src/build/uhmm3build.cpp" line="100"/>
- <source>Model building failed</source>
- <translation>Создание модели завершилось с ошибкой</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3BuildDialogImpl</name>
- <message>
- <location filename="../src/build/uHMM3BuildDialogImpl.cpp" line="63"/>
- <source>Build</source>
- <translation>Построить</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildDialogImpl.cpp" line="64"/>
- <source>Cancel</source>
- <translation>Отмена</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildDialogImpl.cpp" line="119"/>
- <source>Select multiple alignment file</source>
- <translation>Выберите файл множественного выравнивания</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildDialogImpl.cpp" line="78"/>
- <source>Select hmm file to create</source>
- <translation>Выберите файл HMM профайла</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildDialogImpl.cpp" line="185"/>
- <source>input file is empty</source>
- <translation>Входной файл не выбран</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildDialogImpl.cpp" line="188"/>
- <source>output hmm file is empty</source>
- <translation>Выходной HMM файл не выбран</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildDialogImpl.cpp" line="197"/>
- <source>Error: bad arguments!</source>
- <translation>Ошибка: неверные входные параметры!</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3BuildTask</name>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="57"/>
- <source>Build HMM profile from %1 alignment</source>
- <translation>Построение HMM профайла из выравнивания %1</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="71"/>
- <source>Given multiple alignment has no sequences</source>
- <translation>Данное множественное выравнивание не содержит последовательностей</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="74"/>
- <source>Given multiple alignment is empty</source>
- <translation>Данное множественное выравнивание пусто</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3BuildToFileTask</name>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="126"/>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="146"/>
- <source>Build HMM profile to '%1'</source>
- <translation>Построить профайл HMM в файл '%1'</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="163"/>
- <source>Build HMM profile '%1' -> '%2'</source>
- <translation>Построить HMM профайл: '%1' -> '%2'</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="129"/>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="149"/>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="170"/>
- <source>Output file is not given</source>
- <translation>Файл HMM профайла не установлен</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="134"/>
- <source>No multiple alignments given</source>
- <translation>Список множественных выравниваний пуст</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="166"/>
- <source>Input file is not given</source>
- <translation>Входной файл множественного выравнивания не задан</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="181"/>
- <source>Unrecognized input alignment file format</source>
- <translation>Неизвестный формат входного файла множественного выравнивания</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="188"/>
- <source>Error opening '%1' file</source>
- <translation>Ошибка открытия файла '%1'</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="229"/>
- <source>No multiple alignments found in input file</source>
- <translation>Не найдено множественных выравниваний во входном файле</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="276"/>
- <source>Source alignment</source>
- <translation>Входное множественное выравнивание</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="278"/>
- <source>Profile name</source>
- <translation>HMM профайл</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="282"/>
- <source>Options:</source>
- <translation>Опции:</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="283"/>
- <source>Model construction strategies</source>
- <translation>Стратегии построения</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="291"/>
- <source>Relative model construction strategies</source>
- <translation>Относительные веса</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="293"/>
- <source>Gerstein/Sonnhammer/Chothia tree weights</source>
- <translation>Gerstein/Sonnhammer/Chothia веса</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="294"/>
- <source>Henikoff simple filter weights</source>
- <translation>Простой фильтр весов Henikoff</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="295"/>
- <source>Henikoff position-based weights</source>
- <translation>Веса Henikoff основанные на позициях</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="296"/>
- <source>No relative weighting; set all to 1</source>
- <translation>Не использовать относительные веса</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="297"/>
- <source>Weights given in MSA file</source>
- <translation>Использовать веса из файла выравнивания</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="302"/>
- <source>Effective sequence weighting strategies</source>
- <translation>Эффективные веса</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="304"/>
- <source>adjust effective sequence number to achieve relative entropy target</source>
- <translation>Отрегулируйте эффективный порядковый номер, чтобы достичь цель энтропии</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="305"/>
- <source>effective sequence number is number of single linkage clusters</source>
- <translation>Использовать число единичных кластеров как эффективное</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="306"/>
- <source>no effective sequence number weighting: just use number of sequences</source>
- <translation>Использовать число последовательностей в качестве эффективного</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="307"/>
- <source>set effective sequence number for all models to: %1</source>
- <translation>Установить эффективное число последовательностей для всех моделей: %1</translation>
- </message>
- <message>
- <location filename="../src/build/uHMM3BuildTask.cpp" line="313"/>
- <source>Task finished with error: '%1'</source>
- <translation>Задание закончилось с ошибкой: '%1'</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3LoadProfileAndSearchTask</name>
- <message>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="514"/>
- <source>HMM search with '%1' HMM profile file</source>
- <translation>Поиск сигналов HMM '%1'</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3MSAEditorContext</name>
- <message>
- <location filename="../src/uHMM3Plugin.cpp" line="202"/>
- <source>Build HMMER3 profile</source>
- <translation>Построить профиль с помощью HMMER3</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3Phmmer</name>
- <message>
- <location filename="../src/phmmer/uhmm3phmmer.cpp" line="168"/>
- <source>No input query sequence given</source>
- <translation>Входная последовательность-запрос не задана</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3phmmer.cpp" line="172"/>
- <source>Database sequence to search in is not given</source>
- <translation>Последовательность для поиска не задана</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3phmmer.cpp" line="179"/>
- <source>Run out of memory (creating alphabet failed)</source>
- <translation>Ошибка: нехватка памяти</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3phmmer.cpp" line="184"/>
- <source>Run out of memory (creating null model failed)</source>
- <translation>Ошибка: нехватка памяти</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3phmmer.cpp" line="191"/>
- <source>Run out of memory (creating builder failed)</source>
- <translation>Ошибка: нехватка памяти</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3phmmer.cpp" line="198"/>
- <source>Setting scoring system failed with error: '%1'</source>
- <translation>Ошибка установки системы оценки: %1</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3phmmer.cpp" line="205"/>
- <source>Error with digitizing sequence to search in</source>
- <translation>Невозможно оцифровать последовательность-запрос</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3phmmer.cpp" line="212"/>
- <source>Error digitizing query sequence</source>
- <translation>Невозможно оцифровать последовательность-запрос</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3phmmer.cpp" line="222"/>
- <source>Error with creating HMM profile for query sequence</source>
- <translation>Ошибка в создании HMM профайла для входной последовательности-запроса</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3phmmer.cpp" line="228"/>
- <source>Run out of memory (creating top hits list failed)</source>
- <translation>Ошибка: нехватка памяти</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3phmmer.cpp" line="234"/>
- <source>Run out of memory (creating pipeline failed)</source>
- <translation>Ошибка: нехватка памяти</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3PhmmerDialogImpl</name>
- <message>
- <location filename="../src/phmmer/uHMM3PhmmerDialogImpl.cpp" line="58"/>
- <source>Search</source>
- <translation>Искать</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uHMM3PhmmerDialogImpl.cpp" line="59"/>
- <source>Cancel</source>
- <translation>Отмена</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uHMM3PhmmerDialogImpl.cpp" line="117"/>
- <source>Select query sequence file</source>
- <translation>Выберите последовательность</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uHMM3PhmmerDialogImpl.cpp" line="165"/>
- <source>Query sequence file path is empty</source>
- <translation>Путь до файла последовательности пуст</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uHMM3PhmmerDialogImpl.cpp" line="170"/>
- <location filename="../src/phmmer/uHMM3PhmmerDialogImpl.cpp" line="181"/>
- <source>Error: bad arguments!</source>
- <translation>Ошибка: неверные входные параметры!</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uHMM3PhmmerDialogImpl.cpp" line="186"/>
- <source>Error</source>
- <translation>Ошибка</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uHMM3PhmmerDialogImpl.cpp" line="186"/>
- <source>Cannot create an annotation object. Please check settings</source>
- <translation>Невозможно создать аннотацию. Проверьте настройки</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3PhmmerTask</name>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="61"/>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="79"/>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="101"/>
- <source>HMM Phmmer task</source>
- <translation>Поиск phmmer</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="72"/>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="110"/>
- <source>HMM Phmmer search %1 sequence in %2 database</source>
- <translation>Phmmer-поиск последовательности %1 в последовательности %2</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="87"/>
- <source>HMM Phmmer search %1 sequence with %2 database</source>
- <translation>Phmmer-поиск последовательности %1 в последовательности %2</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="69"/>
- <source>Input query sequence</source>
- <translation>Входная последовательность-запрос</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="70"/>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="108"/>
- <source>Database sequence to search in</source>
- <translation>Путь до файла последовательности в которой производить поиск</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="84"/>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="107"/>
- <source>Query sequence file path</source>
- <translation>Путь до файла последовательности-запроса</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="85"/>
- <source>Database sequence file path</source>
- <translation>Путь до файла последовательности в которой производить поиск</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="90"/>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="113"/>
- <source>Error opening query sequence file</source>
- <translation>Ошибка открытия файла последовательности-запроса</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="94"/>
- <source>Error opening database sequence file</source>
- <translation>Ошибка открытия файла последовательности, в которой производится поиск</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="128"/>
- <source>Error loading sequence document:</source>
- <translation>Ошибка загрузки файла последовательности:</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="131"/>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="134"/>
- <source>No dna sequence objects found in document</source>
- <translation>В документе не найдено последовательностей</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="138"/>
- <source>Empty sequence loaded from document</source>
- <translation>Из файла загружена пустая последовательность</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="150"/>
- <source> query sequence</source>
- <translation>Входная последовательность-запрос</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="154"/>
- <source> db sequence</source>
- <translation>Последовательность, по которой производится поиск</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3PhmmerToAnnotationsTask</name>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="375"/>
- <source>querySeq sequence file path</source>
- <translation>Путь до файла последовательности-запроса</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="380"/>
- <source>db sequence</source>
- <translation>последовательность, в которой производится поиск</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="385"/>
- <source>annotation object</source>
- <translation>Объект аннотаций</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="390"/>
- <source>annotation name</source>
- <translation>Имя аннотаций</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="395"/>
- <source>annotation group</source>
- <translation>Группа аннотаций</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="408"/>
- <source>HMM Phmmer task</source>
- <translation>Поиск phmmer</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="422"/>
- <source>HMM Phmmer search %1 sequence with %2 database</source>
- <translation>Phmmer-поиск последовательности %1 в последовательности %2</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="432"/>
- <source>Annotation object was removed</source>
- <translation>Выбранный объект аннотаций удалён</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="456"/>
- <source>Query sequence</source>
- <translation>Входная последовательность-запрос</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="459"/>
- <source>Task was not finished</source>
- <translation>Задание завершено с ошибкой</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="464"/>
- <source>Result annotation table</source>
- <translation>Таблица аннотаций результата</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="465"/>
- <source>Result annotation group</source>
- <translation>Группа аннотаций результата</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="466"/>
- <source>Result annotation name</source>
- <translation>Имя аннотаций результата</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="469"/>
- <source>Results count</source>
- <translation>Количество результатов</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3Plugin</name>
- <message>
- <location filename="../src/uHMM3Plugin.cpp" line="70"/>
- <source>HMM3</source>
- <translation>HMM3</translation>
- </message>
- <message>
- <location filename="../src/uHMM3Plugin.cpp" line="70"/>
- <source>HMM profile tools. Plugin is based on HMMER 3.0b3 package: freely distributable implementation of profile HMM software for protein sequence analysis. Home page of project: http://hmmer.janelia.org/</source>
- <translation>Инструменты работы с HMM профайлами. Модуль основан на пакете HMMER3.0b3: анализ биологических последовательностей при помощи скрытых марковских моделей (HMM). Домашняя страница проекта: http://hmmer.janelia.org/</translation>
- </message>
- <message>
- <location filename="../src/uHMM3Plugin.cpp" line="100"/>
- <source>Build HMM3 profile...</source>
- <translation>Построение профиля с помощью HMMER3...</translation>
- </message>
- <message>
- <location filename="../src/uHMM3Plugin.cpp" line="105"/>
- <source>Search with HMMER3...</source>
- <translation>Поиск с помощью HMMER3...</translation>
- </message>
- <message>
- <location filename="../src/uHMM3Plugin.cpp" line="110"/>
- <source>Search with HMMER3 phmmer...</source>
- <translation>Поиск с помощью HMMER3 phmmer...</translation>
- </message>
- <message>
- <location filename="../src/uHMM3Plugin.cpp" line="167"/>
- <location filename="../src/uHMM3Plugin.cpp" line="178"/>
- <source>Error!</source>
- <translation>Ошибка!</translation>
- </message>
- <message>
- <location filename="../src/uHMM3Plugin.cpp" line="167"/>
- <location filename="../src/uHMM3Plugin.cpp" line="178"/>
- <source>Target sequence not selected: no opened annotated dna view</source>
- <translation>Не открыта последовательность для поиска</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3QDActor</name>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="73"/>
- <location filename="../src/search/uhmm3QDActor.cpp" line="167"/>
- <source>HMM3</source>
- <translation>HMM3</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="84"/>
- <source>QD HMM3 search</source>
- <translation>Поиск HMM3</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="168"/>
- <source>Searches HMM signals in a sequence with one or more profile HMM and saves the results as annotations.</source>
- <translation>Поиск сигналов в последовательности, используя один или несколько профилей скрытых марковских моделей (СММ), полученных с помощью HMMER3.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="174"/>
- <source>Profile HMM</source>
- <translation>Профиль СММ</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="175"/>
- <source>Semicolon-separated list of input HMM files.</source>
- <translation>Один или несколько HMM файлов.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="178"/>
- <source>Min Length</source>
- <translation>Минимальная длина</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="179"/>
- <source>Minimum length of a result region.</source>
- <translation>Минимальная длина найденного региона.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="182"/>
- <source>Max Length</source>
- <translation>Максимальная длина</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="183"/>
- <source>Maximum length of a result region.</source>
- <translation>Максимальная длина найденного региона.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="188"/>
- <source>Use E-value</source>
- <translation>Использовать E-value</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="189"/>
- <source>Filters by E-value if true. Otherwise filters by score.</source>
- <translation>Фильтровать по E-value если верно. В противном случае по счету.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="192"/>
- <source>Filter by High E-value</source>
- <translation>Фильтрация по E-value</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="193"/>
- <source>Reports domains <= this E-value threshold in output.</source>
- <translation>Возвращаются результаты с E-value (математическим ожиданием) <= указанного значения.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="197"/>
- <source>Filter by Low Score</source>
- <translation>Фильтрация по низким показателям</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="198"/>
- <source>Reports domains >= this score cutoff in output.</source>
- <translation>Возвращаются результаты с показателями >= указанного значения.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="203"/>
- <source>Max</source>
- <translation>Максимальная точность</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="204"/>
- <source>Turns off all acceleration heuristic filters. This increases sensitivity somewhat, at a large cost in speed.</source>
- <translation>Выключает использование всех эвристических фильтров для ускорения вычислений (MSV и других). Это повышает чувствительность алгоритма, но сказывается на скорости.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="209"/>
- <source>MSV Filter Threshold</source>
- <translation>Порог для MSV фильтрации</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="210"/>
- <source>P-value threshold for the MSV filter step of the acceleration pipeline.</source>
- <translation>Пороговая величина, используемая на шаге MSV фильтрации. MSV является одним из эвристических фильтров для ускорения вычислений.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="214"/>
- <source>Viterbi Filter Threshold</source>
- <translation>Порог для Viterbi фильтрации</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="215"/>
- <source>P-value threshold for the Viterbi filter step of the acceleration pipeline.</source>
- <translation>Пороговая величина, используемая на шаге Viterbi фильтрации. Viterbi является одним из эвристических фильтров для ускорения вычислений.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="219"/>
- <source>Forward Filter Threshold</source>
- <translation>Порог для Forward фильтрации</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="220"/>
- <source>P-value threshold for the Forward filter step of the acceleration pipeline.</source>
- <translation>Пороговая величина, используемая на шаге Forward фильтрации. Forward является одним из эвристических фильтров для ускорения вычислений.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="224"/>
- <source>No Bias Filter</source>
- <translation>Отключить Bias фильтрацию</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="225"/>
- <source>Turns off composition bias filter. This increases sensitivity somewhat, but can come at a high cost in speed.</source>
- <translation>Отключает Bias фильтрацию. Это повышает чувствительность алгоритма, но сказывается на скорости.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="232"/>
- <source>No Null2</source>
- <translation>No Null2</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="233"/>
- <source>Turns off the null2 score corrections for biased composition.</source>
- <translation>Отключает внесение null2 поправок показателей.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="237"/>
- <source>Number of Sequences</source>
- <translation>Количество последовательностей</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="238"/>
- <source>Specifies number of significant sequences. It is used for domain E-value calculations.</source>
- <translation>Количество значимых последовательностей.</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="242"/>
- <source>Seed</source>
- <translation>Затравка</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3QDActor.cpp" line="243"/>
- <source>Random number seed. The default is to use a fixed seed(42), so that results are exactly reproducible. Any other positive integer will give different (but also reproducible) results. A choice of 0 uses a randomly chosen seed.</source>
- <translation>По умолчанию, используется фиксированное значение(42), таким образом результаты будут одинаковы для различных запусков. При использовании любого другого положительного числа результаты будут отличаться, но также будут воспроизводимы. Для генерации случайной затравки введите 0.</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3SWPhmmerTask</name>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="216"/>
- <source>HMM Phmmer search %1 sequence in %2 database</source>
- <translation>Phmmer-поиск последовательности %1 в последовательности %2</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="222"/>
- <source>Can not create load query doc task</source>
- <translation>Can not create load query doc task</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="232"/>
- <source> querySeq sequence</source>
- <translation> querySeq sequence</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="245"/>
- <source>Invalid db sequence alphabet: %1</source>
- <translation>Неверный алфавит последовательности:%1</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="251"/>
- <source>Invalid query sequence alphabet: %1</source>
- <translation>Неверный алфавит последовательности: %1</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="274"/>
- <source>Cannot search for nucleic query in amino sequence</source>
- <translation>Поиск сигналов нуклеинового HMM профайла в аминоксилотной последовательности невозможен</translation>
- </message>
- <message>
- <location filename="../src/phmmer/uhmm3PhmmerTask.cpp" line="302"/>
- <source>HMMER3 phmmer sequence walker search task</source>
- <translation>Поиск HMM сигналов, учитывая возможные трансляции</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3SWSearchTask</name>
- <message>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="86"/>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="106"/>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="147"/>
- <source>HMM search task with amino and complement translations</source>
- <translation>Поиск HMM сигналов, учитывая возможные трансляции</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="92"/>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="110"/>
- <source>HMM search task with amino and complement translations using '%1' profile HMM</source>
- <translation>Поиск HMM сигналов, учитывая возможные трансляции. HMM профайл: '%1'</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="365"/>
- <source>Unrecognized alphabet of sequence</source>
- <translation>Неизвестный алфавит последовательности</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="369"/>
- <source>Invalid alphabet of sequence</source>
- <translation>Неверный алфавит последовательности</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="375"/>
- <source>Cannot search for nucleic HMM profile in amino sequence</source>
- <translation>Поиск сигналов нуклеинового HMM профайла в аминоксилотной последовательности невозможен</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3SWSearchToAnnotationsTask</name>
- <message>
- <source>hmm profile filename</source>
- <translation type="vanished">Путь до HMM профайла</translation>
- </message>
- <message>
- <source>annotation object</source>
- <translation type="vanished">Объект аннотации</translation>
- </message>
- <message>
- <source>annotations group name</source>
- <translation type="vanished">Имя группы аннотаций</translation>
- </message>
- <message>
- <source>annotations name</source>
- <translation type="vanished">Имя аннотаций</translation>
- </message>
- <message>
- <source>HMMER3 search task</source>
- <translation type="vanished">Поиск HMM сигналов</translation>
- </message>
- <message>
- <source>dna sequence</source>
- <translation type="vanished">Последовательность для поиска</translation>
- </message>
- <message>
- <source>HMMER3 search task with '%1' profile</source>
- <translation type="vanished">Поиск сигналов HMM профайла '%1'</translation>
- </message>
- <message>
- <source>Sequence file</source>
- <translation type="vanished">Путь до файла последовательности для поиска</translation>
- </message>
- <message>
- <source>HMM profile used</source>
- <translation type="vanished">HMM профайл</translation>
- </message>
- <message>
- <source>Task was not finished</source>
- <translation type="vanished">Задание завершено с ошибкой</translation>
- </message>
- <message>
- <source>Result annotation table</source>
- <translation type="vanished">Таблица результатов</translation>
- </message>
- <message>
- <source>Result annotation group</source>
- <translation type="vanished">Группа аннотаций</translation>
- </message>
- <message>
- <source>Result annotation name</source>
- <translation type="vanished">Имя аннотаций</translation>
- </message>
- <message>
- <source>Results count</source>
- <translation type="vanished">Количество результатов</translation>
- </message>
- <message>
- <source>Cannot load sequence document</source>
- <translation type="vanished">Не удалось загрузить документ последовательности</translation>
- </message>
- <message>
- <source>No sequence objects loaded</source>
- <translation type="vanished">Не загружен объект последовательности</translation>
- </message>
- <message>
- <source>Unknown sequence type loaded</source>
- <translation type="vanished">Загружена последовательность неизвестного типа</translation>
- </message>
- <message>
- <source>Empty sequence loaded</source>
- <translation type="vanished">Загружена пустая последовательность</translation>
- </message>
- <message>
- <source>Annotation object removed</source>
- <translation type="vanished">Выбранный объект аннотаций удалён</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3Search</name>
- <message>
- <location filename="../src/search/uhmm3search.cpp" line="61"/>
- <source>Bad HMM profile given</source>
- <translation>HMM профайл не задан</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3search.cpp" line="65"/>
- <source>Empty sequence given</source>
- <translation>Последовательность для поиска не задана</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3search.cpp" line="73"/>
- <source>Run out of memory (creation of sequence failed)</source>
- <translation>Нехватка памяти: невозможно конвертировать последовательность</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3search.cpp" line="79"/>
- <source>Profile HMM and sequence alphabets no matched</source>
- <translation>Алфавиты профайла HMM и последовательности не совпадают</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3search.cpp" line="85"/>
- <source>Run out of memory (creation of alphabet failed)</source>
- <translation>Нехватка памяти: невозможно создать алфавит</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3search.cpp" line="90"/>
- <source>Run out of memory (digitizing of sequence failed)</source>
- <translation>Невозможно оцифровать последовательность</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3search.cpp" line="96"/>
- <source>Run out of memory (creation of null model failed)</source>
- <translation>Нехватка памяти: невозможно создать null-модель</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3search.cpp" line="102"/>
- <location filename="../src/search/uhmm3search.cpp" line="116"/>
- <source>Run out of memory</source>
- <translation>Нехватка памяти: невозможно создать профайл</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3search.cpp" line="107"/>
- <source>Run out of memory (creation of optimized profile failed)</source>
- <translation>Нехватка памяти: невозможно создать оптимизированный профайл</translation>
- </message>
- <message>
- <location filename="../src/search/uhmm3search.cpp" line="121"/>
- <source>Run out of memory (top hits list creation failed)</source>
- <translation>Нехватка памяти: невозможно создать tophits</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3SearchDialogImpl</name>
- <message>
- <location filename="../src/search/uHMM3SearchDialogImpl.cpp" line="58"/>
- <source>Run</source>
- <translation>Запуск</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchDialogImpl.cpp" line="59"/>
- <source>Cancel</source>
- <translation>Отмена</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchDialogImpl.cpp" line="160"/>
- <source>HMM profile file path is empty</source>
- <translation>Путь до HMM профайла не задан</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchDialogImpl.cpp" line="175"/>
- <source>Error: bad arguments!</source>
- <translation>Ошибка: неверные входные параметры!</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchDialogImpl.cpp" line="183"/>
- <source>Error</source>
- <translation>Ошибка</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchDialogImpl.cpp" line="183"/>
- <source>Cannot create an annotation object. Please check settings</source>
- <translation>Невозможно создать аннотацию. Проверьте настройки</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchDialogImpl.cpp" line="233"/>
- <source>Select query HMM profile</source>
- <translation>Выберите HMM профайл</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMM3SearchTask</name>
- <message>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="469"/>
- <source>HMM search task</source>
- <translation>Поиск сигналов HMM</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="478"/>
- <source>HMM search with %1 profiles</source>
- <translation>Поиск сигналов HMM '%1'</translation>
- </message>
- <message>
- <location filename="../src/search/uHMM3SearchTask.cpp" line="476"/>
- <source>HMM search with '%1'</source>
- <translation>Поиск сигналов HMM '%1'</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMMFormat</name>
- <message>
- <location filename="../src/format/uHMMFormat.cpp" line="279"/>
- <source>Unknown error occurred</source>
- <translation>Неизвестная ошибка</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormat.cpp" line="305"/>
- <source>Profile HMM format</source>
- <translation>HMM формат</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormat.cpp" line="306"/>
- <source>hmm is a format for storing hmm profiles</source>
- <translation>hmm это формат для хранения профилей hmm</translation>
- </message>
-</context>
-<context>
- <name>U2::UHMMFormatReader</name>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="117"/>
- <source>Number expected in NULE line. %1 found</source>
- <translation>Number expected in NULE line. %1 found</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="140"/>
- <source>Bad line in header section: '%1'</source>
- <translation>Неверная строка в заголовке: %1</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="147"/>
- <source>Empty value in header line:%1</source>
- <translation>Пустое значение в строке: %1</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="387"/>
- <source>Input file made by unknown version of HMMER or is not HMM profile file</source>
- <translation>Входной файл произведен неизвестной версией HMMER или это не файл профиля HMM</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="750"/>
- <source>ALPH must precede NULE in HMMER2 save files</source>
- <translation>ALPH must precede NULE in HMMER2 save files</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="769"/>
- <source>ALPH section must precede HMM</source>
- <translation>ALPH section must precede HMM</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="774"/>
- <source>Unknown alphabet</source>
- <translation>Неизвестный алфавит последовательности</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="797"/>
- <source>Failed to allocate body of the new HMM</source>
- <translation>Невозможно создать тело HMM</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="157"/>
- <source>Cannot parse integer from string: '%1'</source>
- <translation>Невозможно конвертировать строку в целое число: %1</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="168"/>
- <source>Cannot parse float number from string: '%1'</source>
- <translation>Невозможно конвертировать строку в вещественное число: %1</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="184"/>
- <source>Cannot parse 2 float numbers from string: '%1'</source>
- <translation>Невозможно конвертировать 2 вещественных числа из строки: %1</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="195"/>
- <source>Cannot parse unsigned integer from string: '%1'</source>
- <translation>Невозможно конвертировать строку в положительное целое число: %1</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="207"/>
- <source>Cannot parse y/n value from string: '%1'</source>
- <translation>Невозможно определить булево значение значение из строки: %1</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="216"/>
- <source>Run out of memory (date allocation failed)</source>
- <translation>Нехватка памяти</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="231"/>
- <source>Bad STATS line: '%1'</source>
- <translation>Неверная строка параметров: %1</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="235"/>
- <source>Bad STATS line: '%1'/ LOCAL word was not found</source>
- <translation>Неверная строка параметров (не обнаружен токен 'LOCAL'): %1</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="263"/>
- <location filename="../src/format/uHMMFormatReader.cpp" line="286"/>
- <source>Bad STATS line: %1. %2 not recognized</source>
- <translation>Неверная строка параметров (не обнаружен тэг %2): %1</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="299"/>
- <source>Unexpected end of file</source>
- <translation>Неожиданный конец строки</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="409"/>
- <source>Run out of memory (allocation of HMM shell failed)</source>
- <translation>Нехватка памяти</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="439"/>
- <location filename="../src/format/uHMMFormatReader.cpp" line="697"/>
- <source>Length of HMM model should be positive. Found: %1</source>
- <translation>Длина модели должна быть положительной. Обнаруженная длина: %1</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="447"/>
- <location filename="../src/format/uHMMFormatReader.cpp" line="707"/>
- <source>Unrecognized alphabet type: %1</source>
- <translation>Неопознан тип алфавита: %1
-</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="470"/>
- <location filename="../src/format/uHMMFormatReader.cpp" line="731"/>
- <source>Number of sequences should be positive. Found: %1</source>
- <translation>Число последовательностей должно быть положительным. Обнаруженная длина: %1</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="477"/>
- <source>EFFN shoold be positive. Found: %1</source>
- <translation>Эффективное число должно быть положительным. Обнаруженное число: %1</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="512"/>
- <source>Unrecognized tag in header section: '%1'</source>
- <translation>Неопознанный тэг в заголовке: %1</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="523"/>
- <source>Missing one or more STATS parameters</source>
- <translation>Не обнаружена одна или несколько строк STATS-параметров</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="528"/>
- <source>Run out of memory (failed to create alphabet)</source>
- <translation>Нехватка памяти: невозможно создать алфавит</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="533"/>
- <source>Run out of memory (allocation of HMM body failed)</source>
- <translation>Нехватка памяти</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="574"/>
- <source>Line was excpected to start with %1. Found: %2</source>
- <translation>Ожидаемое начало строки: %1. Обнаруженное начало строки: %2</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="616"/>
- <source>Excpected to find closing '//'. Found %1 instead</source>
- <translation>Ожидался токен '//'. Обнаруженный токен: %1</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="633"/>
- <source>HMM name not found</source>
- <translation>В HMM профайле не обнаружено имя</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="659"/>
- <source>Allocation failure, HMM shell</source>
- <translation>Allocation failure, HMM shell</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="800"/>
- <source>Failed to create background model</source>
- <translation>Failed to create background model</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="830"/>
- <source>Expected match line to start with %1. saw %2</source>
- <translation>Expected match line to start with %1. saw %2</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="884"/>
- <source>Expected closing //. found %1 instead</source>
- <translation>Ожидался токен '//'. Обнаруженный токен: %1</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="897"/>
- <source>No NAME found for HMM</source>
- <translation>В HMM профайле не обнаружено имя</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="900"/>
- <source>No LENG found for HMM</source>
- <translation>No LENG found for HMM</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="903"/>
- <source>No ALPH found for HMM</source>
- <translation>No ALPH found for HMM</translation>
- </message>
- <message>
- <location filename="../src/format/uHMMFormatReader.cpp" line="909"/>
- <source>Failed to calibrate HMMER2 model after input conversion</source>
- <translation>Failed to calibrate HMMER2 model after input conversion</translation>
- </message>
-</context>
-<context>
- <name>UHMM3BuildDialog</name>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="62"/>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="79"/>
- <source>...</source>
- <translation>...</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="126"/>
- <source>assign cols w/ >= symfrac residues as consensus</source>
- <translation>assign cols w/ >= symfrac residues as consensus</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="129"/>
- <source>fast</source>
- <translation>Быстрая</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="42"/>
- <source>Input and output</source>
- <translation>Ввод и вывод</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="20"/>
- <source>HMM3 Build</source>
- <translation>Построить профиль с помощью HMMER3</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="52"/>
- <source>Input alignment file</source>
- <translation>Файл выравнивания</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="69"/>
- <source>Build to profile</source>
- <translation>HMM профайл</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="104"/>
- <source>Construction strategies</source>
- <translation>Стратегии построения</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="142"/>
- <source>Sym fraction</source>
- <translation>Доля</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="164"/>
- <source>Manual construction</source>
- <translation>Ручное построение</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="167"/>
- <source>Hand (requires reference annotation)</source>
- <translation>Ручная</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="185"/>
- <source>Tag sequence as a fragment, if L < x*<L>, where x is:</source>
- <translation>Tag sequence as a fragment, if L < x*<L>, where x is:</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="226"/>
- <source>Relative weighting</source>
- <translation>Относительные веса</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="234"/>
- <source>Henikoff position-based weights</source>
- <translation>Веса Henikoff основанные на позициях</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="250"/>
- <source>Gerstein/Sonnhammer/Chothia tree weights</source>
- <translation>Gerstein/Sonnhammer/Chothia веса</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="274"/>
- <source>Henikoff simple filter weights</source>
- <translation>Простой фильтр весов Henikoff</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="284"/>
- <source>Identity cutoff</source>
- <translation>Отсечки идентичности</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="309"/>
- <source>Skip relative weighting; set all to 1</source>
- <translation>Не использовать относительные веса</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="316"/>
- <source>Use weights given in alignment file</source>
- <translation>Использовать веса из файла выравнивания</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="339"/>
- <source>Effective weighting</source>
- <translation>Эффективные веса</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="347"/>
- <source>Adjust effective sequence number to achieve relative entropy target</source>
- <translation>Отрегулируйте эффективный порядковый номер, чтобы достичь цель энтропии</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="381"/>
- <source>Minimum relative entropy/position</source>
- <translation>Минимальнеая относительная энтрпия</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="433"/>
- <source>Sigma parameter</source>
- <translation>Сигма-параметр</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="466"/>
- <source>Use number of single linkage clusters as effective</source>
- <translation>Использовать число единичных кластеров как эффективное</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="476"/>
- <source>Fractional identity cutoff</source>
- <translation>Долевая отсечка идентичности</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="501"/>
- <source>Use number of sequences as effective</source>
- <translation>Использовать число последовательностей как эффективное</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="516"/>
- <source>Effective sequence number for all models to</source>
- <translation>Установить эффективное число последовательностей для всех моделей</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="554"/>
- <source>E-value calibration</source>
- <translation>Калибрация E-value</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="573"/>
- <source>Length of sequences for MSV Gumbel mu fit</source>
- <translation>Длина последовательностей для MSV Gumbel mu fit</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="609"/>
- <source>Number of sequences for MSV Gumbel mu fit</source>
- <translation>Число последовательностей для MSV Gumbel mu fit</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="642"/>
- <source>Length of sequences for Viterbi Gumbel mu fit</source>
- <translation>Длина последовательностей для Viterbi Gumbel mu fit</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="672"/>
- <source>Number of sequences for Viterbi Gumbel mu fit</source>
- <translation>Число последовательностей для MSV Gumbel mu fit</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="702"/>
- <source>Length of sequences for Forward exp tail mu fit</source>
- <translation>Длина последовательностей для Forward exp tail mu fit</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="732"/>
- <source>Number of sequences for Forward exp tail mu fit</source>
- <translation>Число последовательностей для Forward exp tail mu fit</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="762"/>
- <source>Tail mass for Forward exponential tail mu fit</source>
- <translation>Масса для прямого экспоненциального хвоста mu fit</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="803"/>
- <source>Other</source>
- <translation>Разное</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="819"/>
- <source>Random generator seed</source>
- <translation>Затравка генератора случайных чисел</translation>
- </message>
- <message>
- <location filename="../src/build/UHMM3BuildDialog.ui" line="862"/>
- <source>* 0 means that one-time arbitrary seed will be used</source>
- <translation>* 0 - означает, что один раз будет использована случайная затравка</translation>
- </message>
-</context>
-<context>
- <name>UHMM3PhmmerDialog</name>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="20"/>
- <source>Phmmer Search</source>
- <translation>Поиск phmmer</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="36"/>
- <source>Input and output</source>
- <translation>Ввод и вывод</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="50"/>
- <source>Query sequence file:</source>
- <translation>Файл последовательности:</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="60"/>
- <source>...</source>
- <translation>...</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="87"/>
- <source>Reporting tresholds</source>
- <translation>Допустимые результаты</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="103"/>
- <source>Report domains with E-value less than</source>
- <translation>Сохранять результаты c величиной E-value менее</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="142"/>
- <source>Report domains with score greater than</source>
- <translation>Отсечение по итоговой сумме</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="166"/>
- <source>Number of significant sequences for domain E-value calculation</source>
- <translation>Число значимых последовательностей для подсчета E-value домена</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="201"/>
- <source>Scoring system</source>
- <translation>Оценка пропусков</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="217"/>
- <source>Gap open probability</source>
- <translation>Вероятность открытия пробела</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="247"/>
- <source>Gap extend probability</source>
- <translation>Вероятность продления пробела</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="285"/>
- <source>Acceleration</source>
- <translation>Производительность</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="297"/>
- <source>Turn off composition bias filter</source>
- <translation>Выключить composition bias фильтр</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="310"/>
- <source>Turn off biased composition score corrections</source>
- <translation>Выключить diased composition коррекию итоговой суммы</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="338"/>
- <source>Viterbi filter treshold</source>
- <translation>Порог фильтрации Viterbi</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="368"/>
- <source>MSV filter treshold</source>
- <translation>Порог фильтрации MSV</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="401"/>
- <source>Forward filter treshold</source>
- <translation>Порог фильтрации Forward</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="437"/>
- <source>E-value calibration</source>
- <translation>Калибрация E-value</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="456"/>
- <source>Length of sequences for MSV Gumbel mu fit</source>
- <translation>Длина последовательностей для MSV Gumbel mu fit</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="492"/>
- <source>Number of sequences for MSV Gumbel mu fit</source>
- <translation>Число последовательностей для MSV Gumbel mu fit</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="525"/>
- <source>Length of sequences for Viterbi Gumbel mu fit</source>
- <translation>Длина последовательностей для Viterbi Gumbel mu fit</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="555"/>
- <source>Number of sequences for Viterbi Gumbel mu fit</source>
- <translation>Число последовательностей для Viterbi Gumbel mu fit</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="585"/>
- <source>Length of sequences for Forward exp tail mu fit</source>
- <translation>Длина последовательностей для Forward exp tail mu fit</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="615"/>
- <source>Number of sequences for Forward exp tail mu fit</source>
- <translation>Число последовательностей для Forward exp tail mu fit</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="645"/>
- <source>Tail mass for Forward exponential tail mu fit</source>
- <translation>Масса для прямого экспоненциального хвоста mu fit</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="686"/>
- <source>Other</source>
- <translation>Разное</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="702"/>
- <source>Random generator seed</source>
- <translation>Затравка генератора случайных чисел</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="745"/>
- <source>* 0 means that one-time arbitrary seed will be used</source>
- <translation>* 0 - означает, что один раз будет использована случайная затравка</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="113"/>
- <source>1E+</source>
- <translation>1E+</translation>
- </message>
- <message>
- <location filename="../src/phmmer/UHMM3PhmmerDialog.ui" line="323"/>
- <source>Turn all heuristic filters off (less speed, more power)</source>
- <translation>Отключить все эвристические фильтры (более точно, менее производительно)</translation>
- </message>
-</context>
-<context>
- <name>UHMM3SearchDialog</name>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="65"/>
- <source>Query HMM file:</source>
- <translation>HMM профайл:</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="54"/>
- <source>Input and output</source>
- <translation>Ввод и вывод</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="75"/>
- <source>...</source>
- <translation>...</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="92"/>
- <source>Reporting thresholds</source>
- <translation>Допустимые результаты</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="109"/>
- <source>Report domains with E-value less than</source>
- <translation>Сохранять результаты c величиной E-value менее</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="119"/>
- <source>1E+</source>
- <translation>1E+</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="146"/>
- <source>Report domains with score greater than</source>
- <translation>Отсечение по итоговой сумме</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="20"/>
- <source>HMM3 Search</source>
- <translation>Поиск с помощью HMMER3</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="180"/>
- <source>Score threshold:</source>
- <translation>Порог итоговой суммы:</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="229"/>
- <source>Use profile's GA gathering cutoffs</source>
- <translation>Использовать отсечения GA</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="265"/>
- <source>Use profile's NC noise cutoffs</source>
- <translation>Использовать отсечения NC</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="295"/>
- <source>Use profile's TC trusted cutoffs</source>
- <translation>Использовать отсечения TC</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="312"/>
- <source>Number of significant sequences for domain E-value calculation</source>
- <translation>Число значимых последовательностей для подсчета E-value домена</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="345"/>
- <source>Acceleration heuristics</source>
- <translation>Производительность</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="360"/>
- <source>Turn off composition bias filter</source>
- <translation>Выключить composition bias фильтр</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="373"/>
- <source>Turn off biased composition score corrections</source>
- <translation>Выключить diased composition коррекию итоговой суммы</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="401"/>
- <source>MSV filter threshold:</source>
- <translation>Порог фильтрации MSV:</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="434"/>
- <source>Viterbi filter threshold:</source>
- <translation>Порог фильтрации Viterbi:</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="464"/>
- <source>Forward filter threshold:</source>
- <translation>Порог фильтрации Forward:</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="500"/>
- <source>Other</source>
- <translation>Разное</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="522"/>
- <source>Random generator seed</source>
- <translation>Затравка генератора случайных чисел</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="565"/>
- <source>* 0 means that one-time arbitrary seed will be used</source>
- <translation>* 0 - означает, что один раз будет использована случайная затравка</translation>
- </message>
- <message>
- <location filename="../src/search/UHMM3SearchDialog.ui" line="386"/>
- <source>Turn all heuristic filters off (less speed, more power)</source>
- <translation>Отключить все эвристические фильтры (более точно, менее производительно)</translation>
- </message>
-</context>
-</TS>
diff --git a/src/plugins_3rdparty/kalign/src/KalignDialogController.cpp b/src/plugins_3rdparty/kalign/src/KalignDialogController.cpp
index dda0bb3..2c2d643 100644
--- a/src/plugins_3rdparty/kalign/src/KalignDialogController.cpp
+++ b/src/plugins_3rdparty/kalign/src/KalignDialogController.cpp
@@ -59,7 +59,7 @@ KalignDialogController::KalignDialogController(QWidget* w, const MAlignment& _ma
: QDialog(w), ma(_ma), settings(_settings)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470741");
+ new HelpButton(this, buttonBox, "18220601");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Align"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
@@ -123,7 +123,7 @@ KalignAlignWithExtFileSpecifyDialogController::KalignAlignWithExtFileSpecifyDial
saveController(NULL)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470767");
+ new HelpButton(this, buttonBox, "18220627");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Align"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins_3rdparty/primer3/src/Primer3Dialog.cpp b/src/plugins_3rdparty/primer3/src/Primer3Dialog.cpp
index 1f621a5..5de6890 100644
--- a/src/plugins_3rdparty/primer3/src/Primer3Dialog.cpp
+++ b/src/plugins_3rdparty/primer3/src/Primer3Dialog.cpp
@@ -46,7 +46,7 @@ Primer3Dialog::Primer3Dialog(const Primer3TaskSettings &defaultSettings, ADVSequ
defaultSettings(defaultSettings)
{
ui.setupUi(this);
- new HelpButton(this, ui.helpButton, "17470720");
+ new HelpButton(this, ui.helpButton, "18220580");
QPushButton* pbPick = ui.pickPrimersButton;
QPushButton* pbReset = ui.resetButton;
diff --git a/src/plugins_3rdparty/sitecon/src/SiteconBuildDialogController.cpp b/src/plugins_3rdparty/sitecon/src/SiteconBuildDialogController.cpp
index 4b4f6c5..6f526d3 100644
--- a/src/plugins_3rdparty/sitecon/src/SiteconBuildDialogController.cpp
+++ b/src/plugins_3rdparty/sitecon/src/SiteconBuildDialogController.cpp
@@ -55,7 +55,7 @@ SiteconBuildDialogController::SiteconBuildDialogController(SiteconPlugin* pl, QW
saveController(NULL) {
task = NULL;
setupUi(this);
- new HelpButton(this, buttonBox, "17470692");
+ new HelpButton(this, buttonBox, "18220552");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Build"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins_3rdparty/sitecon/src/SiteconSearchDialogController.cpp b/src/plugins_3rdparty/sitecon/src/SiteconSearchDialogController.cpp
index 84be062..61b3691 100644
--- a/src/plugins_3rdparty/sitecon/src/SiteconSearchDialogController.cpp
+++ b/src/plugins_3rdparty/sitecon/src/SiteconSearchDialogController.cpp
@@ -86,7 +86,7 @@ public:
SiteconSearchDialogController::SiteconSearchDialogController(ADVSequenceObjectContext* _ctx, QWidget *p):QDialog(p) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470688");
+ new HelpButton(this, buttonBox, "18220548");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Search"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/plugins_3rdparty/umuscle/src/MuscleAlignDialogController.cpp b/src/plugins_3rdparty/umuscle/src/MuscleAlignDialogController.cpp
index 9dfe8a8..fe3c280 100644
--- a/src/plugins_3rdparty/umuscle/src/MuscleAlignDialogController.cpp
+++ b/src/plugins_3rdparty/umuscle/src/MuscleAlignDialogController.cpp
@@ -43,7 +43,7 @@ MuscleAlignDialogController::MuscleAlignDialogController(QWidget* w, const MAlig
: QDialog(w), ma(_ma), settings(_settings)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470703");
+ new HelpButton(this, buttonBox, "18220563");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Align"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
@@ -133,7 +133,7 @@ MuscleAlignWithExtFileSpecifyDialogController::MuscleAlignWithExtFileSpecifyDial
saveController(NULL)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470702");
+ new HelpButton(this, buttonBox, "18220562");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Align"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/ugene_version.pri b/src/ugene_version.pri
index 9e6e310..a94bd3e 100644
--- a/src/ugene_version.pri
+++ b/src/ugene_version.pri
@@ -1,13 +1,13 @@
# This file contains UGENE version info
# product version
-UGENE_VERSION=1.23.1
+UGENE_VERSION=1.24.2
# minimum UGENE version whose SQLite databases are compatible with this version
UGENE_MIN_VERSION_SQLITE=1.13.0
# minimum UGENE version whose MySQL databases are compatible with this version
-UGENE_MIN_VERSION_MYSQL=1.16.0
+UGENE_MIN_VERSION_MYSQL=1.24.0
# distribution info
isEmpty( U2_DISTRIBUTION_INFO ) {
@@ -16,5 +16,5 @@ U2_DISTRIBUTION_INFO=sources
# int version levels for executables
UGENE_VER_MAJOR=1
-UGENE_VER_MINOR=23
-UGENE_VER_PATCH=1
+UGENE_VER_MINOR=24
+UGENE_VER_PATCH=2
diff --git a/src/ugenem/src/SendReportDialog.cpp b/src/ugenem/src/SendReportDialog.cpp
index 60a29da..e137005 100644
--- a/src/ugenem/src/SendReportDialog.cpp
+++ b/src/ugenem/src/SendReportDialog.cpp
@@ -208,7 +208,6 @@ bool ReportSender::send(const QString &additionalInfo, const QString &dumpUrl) {
reply = netManager->post(request, multiPart);
multiPart->setParent(reply);
- connect(reply, SIGNAL(finished()), SLOT(sl_rreplyFinished()));
loop.exec();
if (reply->error() != QNetworkReply::NoError) {
diff --git a/src/ugeneui/html/version_news.html b/src/ugeneui/html/version_news.html
index 342e977..6ff7cab 100644
--- a/src/ugeneui/html/version_news.html
+++ b/src/ugeneui/html/version_news.html
@@ -52,30 +52,27 @@
</head>
<body>
<div>
- <h3 lang="en">Major changes in UGENE 1.23</h3>
+ <h3 lang="en">Major changes in UGENE 1.24</h3>
<ul class="features_list" lang="en">
- <li><span class="research_field">Auto-update:</span> there is no need to download a new version manually anymore. As soon as the new version released, UGENE will ask your permission to update and with your consent the program will be updated automatically.</li>
- <li>Performance:
+ <li><span class="research_field">Workflow Designer:</span>
<ul class="sub_features_list">
- <li>Opening of large Genbank files was improved.</li>
- <li>The visualisation of restriction sites was improved.</li>
+ <li>New element: "Convert SnpEff Variations to Annotations". Element converts variations with SnpEff information into standard annotations, so you can observe them in the Sequence View.</li>
+ <li>NGS workflows improvements.</li>
</ul>
</li>
- <li>Minor interface improvements and bug fixes.</li>
+ <li>Bug fixes.</li>
</ul>
- <p lang="en">For details see <a href="https://youtu.be/viDH3EgXvO0">the video</a> about UGENE 1.22 and 1.23 versions.</p>
- <h3 lang="ru">Изменения в UGENE 1.23</h3>
+ <h3 lang="ru">Изменения в UGENE 1.24</h3>
<ul class="features_list" lang="ru">
- <li><span class="research_field">Авто-обновление:</span> больше не нужно скачивать новую версию вручную. Как только новая версия будет доступна, UGENE предложит Вам обновить программу. С Вашего согласия UGENE будет автоматически обновлен.</li>
- <li>Производительность:
+ <li><span class="research_field">Дизайнер вычислительных схем:</span>
<ul class="sub_features_list">
- <li>Большие файлы Genbank открываются быстрее.</li>
- <li>Ускорен процесс отрисовки сайтов рестрикции.</li>
+ <li>Новый элемент: "Преобразование вариаций SnpEff в аннотации". Элемент преобразует вариации с информацией от SnpEff в стандартные аннотации, которые вы можете открыть в редакторе последовательностей.</li>
+ <li>Улучшения схем NGS.</li>
</ul>
</li>
- <li>Небольшие улучшения графического интерфейса и исправление ошибок.</li>
+ <li>Исправление ошибок.</li>
</ul>
- <p lang="ru">Обзор изменений в версиях программы 1.22 и 1.23 можно посмотреть в следующем <a href="https://youtu.be/viDH3EgXvO0">видео</a>.</p>
+
</div>
</body>
diff --git a/src/ugeneui/src/app_settings/directories_settings/DirectoriesSettingsGUIController.cpp b/src/ugeneui/src/app_settings/directories_settings/DirectoriesSettingsGUIController.cpp
index 2b5c155..707294c 100644
--- a/src/ugeneui/src/app_settings/directories_settings/DirectoriesSettingsGUIController.cpp
+++ b/src/ugeneui/src/app_settings/directories_settings/DirectoriesSettingsGUIController.cpp
@@ -82,7 +82,7 @@ AppSettingsGUIPageWidget* DirectoriesSettingsPageController::createWidget(AppSet
return r;
}
-const QString DirectoriesSettingsPageController::helpPageId = QString("17470455");
+const QString DirectoriesSettingsPageController::helpPageId = QString("18220315");
DirectoriesSettingsPageWidget::DirectoriesSettingsPageWidget(DirectoriesSettingsPageController* /*ctrl*/) {
setupUi(this);
diff --git a/src/ugeneui/src/app_settings/format_settings/FormatSettingsGUIController.cpp b/src/ugeneui/src/app_settings/format_settings/FormatSettingsGUIController.cpp
index d6350f3..f771558 100644
--- a/src/ugeneui/src/app_settings/format_settings/FormatSettingsGUIController.cpp
+++ b/src/ugeneui/src/app_settings/format_settings/FormatSettingsGUIController.cpp
@@ -71,7 +71,7 @@ AppSettingsGUIPageWidget* FormatSettingsGUIPageController::createWidget(AppSetti
return r;
}
-const QString FormatSettingsGUIPageController::helpPageId = QString("17470448");
+const QString FormatSettingsGUIPageController::helpPageId = QString("18220308");
FormatSettingsGUIPageWidget::FormatSettingsGUIPageWidget(FormatSettingsGUIPageController*) {
setupUi(this);
diff --git a/src/ugeneui/src/app_settings/logview_settings/LogSettingsGUIController.cpp b/src/ugeneui/src/app_settings/logview_settings/LogSettingsGUIController.cpp
index 088a57a..471cefc 100644
--- a/src/ugeneui/src/app_settings/logview_settings/LogSettingsGUIController.cpp
+++ b/src/ugeneui/src/app_settings/logview_settings/LogSettingsGUIController.cpp
@@ -62,7 +62,7 @@ AppSettingsGUIPageWidget* LogSettingsPageController::createWidget(AppSettingsGUI
return w;
}
-const QString LogSettingsPageController::helpPageId = QString("17470449");
+const QString LogSettingsPageController::helpPageId = QString("18220309");
//////////////////////////////////////////////////////////////////////////
// widget
diff --git a/src/ugeneui/src/app_settings/network_settings/NetworkSettingsGUIController.cpp b/src/ugeneui/src/app_settings/network_settings/NetworkSettingsGUIController.cpp
index 7a0f3ef..0c98a7b 100644
--- a/src/ugeneui/src/app_settings/network_settings/NetworkSettingsGUIController.cpp
+++ b/src/ugeneui/src/app_settings/network_settings/NetworkSettingsGUIController.cpp
@@ -65,7 +65,7 @@ AppSettingsGUIPageWidget* NetworkSettingsPageController::createWidget(AppSetting
return r;
}
-const QString NetworkSettingsPageController::helpPageId = QString("17470447");
+const QString NetworkSettingsPageController::helpPageId = QString("18220307");
NetworkSettingsPageWidget::NetworkSettingsPageWidget() {
setupUi( this );
diff --git a/src/ugeneui/src/app_settings/resource_settings/ResourceSettingsGUIController.cpp b/src/ugeneui/src/app_settings/resource_settings/ResourceSettingsGUIController.cpp
index 460912e..8e61327 100644
--- a/src/ugeneui/src/app_settings/resource_settings/ResourceSettingsGUIController.cpp
+++ b/src/ugeneui/src/app_settings/resource_settings/ResourceSettingsGUIController.cpp
@@ -59,7 +59,7 @@ AppSettingsGUIPageWidget* ResourceSettingsGUIPageController::createWidget(AppSet
return r;
}
-const QString ResourceSettingsGUIPageController::helpPageId = QString("17470446");
+const QString ResourceSettingsGUIPageController::helpPageId = QString("18220306");
ResourceSettingsGUIPageWidget::ResourceSettingsGUIPageWidget(ResourceSettingsGUIPageController*) {
setupUi(this);
diff --git a/src/ugeneui/src/app_settings/user_apps_settings/UserApplicationsSettingsGUIController.cpp b/src/ugeneui/src/app_settings/user_apps_settings/UserApplicationsSettingsGUIController.cpp
index 75abb2d..75c4530 100644
--- a/src/ugeneui/src/app_settings/user_apps_settings/UserApplicationsSettingsGUIController.cpp
+++ b/src/ugeneui/src/app_settings/user_apps_settings/UserApplicationsSettingsGUIController.cpp
@@ -110,7 +110,7 @@ AppSettingsGUIPageWidget* UserApplicationsSettingsPageController::createWidget(A
return r;
}
-const QString UserApplicationsSettingsPageController::helpPageId = QString("17470445");
+const QString UserApplicationsSettingsPageController::helpPageId = QString("18220305");
UserApplicationsSettingsPageWidget::UserApplicationsSettingsPageWidget(UserApplicationsSettingsPageController* ctrl) {
setupUi(this);
diff --git a/src/ugeneui/src/main_window/ShutdownTask.cpp b/src/ugeneui/src/main_window/ShutdownTask.cpp
index 9fdacbe..eacaa5c 100644
--- a/src/ugeneui/src/main_window/ShutdownTask.cpp
+++ b/src/ugeneui/src/main_window/ShutdownTask.cpp
@@ -87,65 +87,6 @@ static bool closeViews() {
return true;
}
-class CloseWindowsTask : public Task {
-public:
- CloseWindowsTask() : Task(QObject::tr("Close windows"), TaskFlags(TaskFlag_NoRun)) {}
- void prepare() {
- Project* proj = AppContext::getProject();
- if (proj == NULL) {
- return;
- }
- if ( proj->isTreeItemModified() || proj->getProjectURL().isEmpty() ) {
- addSubTask(AppContext::getProjectService()->saveProjectTask(SaveProjectTaskKind_SaveProjectAndDocumentsAskEach));
- }
- }
-
- QList<Task*> onSubTaskFinished(Task* subTask) {
- if (subTask->isCanceled()) {
- stateInfo.cancelFlag = true;
- return QList<Task*>();
- }
- coreLog.trace("Closing views");
- if (!closeViews()) {
- getTopLevelParentTask()->cancel();
- }
- return QList<Task*>();
- }
-
- ReportResult report() {
- // wait for saving/closing tasks if any
- foreach(Task* t, AppContext::getTaskScheduler()->getTopLevelTasks()) {
- if (t != getTopLevelParentTask() && !t->isFinished()) {
- return ReportResult_CallMeAgain;
- }
- }
- return ReportResult_Finished;
- }
-};
-
-class CancelAllTask : public Task {
-public:
- CancelAllTask() : Task(ShutdownTask::tr("Cancel active tasks"), TaskFlag_NoRun) {}
- void prepare() {
- // cancel all tasks but ShutdownTask
- QList<Task*> activeTopTasks = AppContext::getTaskScheduler()->getTopLevelTasks();
- activeTopTasks.removeOne(getTopLevelParentTask());
- foreach(Task* t, activeTopTasks) {
- coreLog.trace(QString("Canceling: %1").arg(t->getTaskName()));
- t->cancel();
- }
- }
-
- ReportResult report() {
- foreach(Task* t, AppContext::getTaskScheduler()->getTopLevelTasks()) {
- if (t->isCanceled() && !t->isFinished()) {
- return ReportResult_CallMeAgain;
- }
- }
- return ReportResult_Finished;
- }
-};
-
// This function prepends empty string to RecentProjects in UGENE SETTINGS in order
// to prevent project auto loading on next UGENE launch
static void cancelProjectAutoLoad() {
@@ -194,8 +135,8 @@ QList<Task*> ShutdownTask::onSubTaskFinished(Task* subTask) {
stateInfo.cancelFlag = subTask->isCanceled();
if (isCanceled() || subTask->hasError() ) {
- mw->setShutDownInProcess(false);
- return res; //stop shutdown process
+ cancelShutdown();
+ return res;
}
ServiceRegistry* sr = AppContext::getServiceRegistry();
@@ -220,6 +161,11 @@ QList<Task*> ShutdownTask::onSubTaskFinished(Task* subTask) {
return res;
}
+void ShutdownTask::cancelShutdown() {
+ mw->setShutDownInProcess(false);
+ cancel();
+}
+
Task::ReportResult ShutdownTask::report() {
if (propagateSubtaskError() || hasError() || isCanceled()) {
setErrorNotificationSuppression(true);
@@ -242,4 +188,67 @@ Task::ReportResult ShutdownTask::report() {
return Task::ReportResult_Finished;
}
+CloseWindowsTask::CloseWindowsTask()
+ : Task(QObject::tr("Close windows"), TaskFlags(TaskFlag_NoRun))
+{
+
+}
+
+void CloseWindowsTask::prepare() {
+ Project* proj = AppContext::getProject();
+ if (proj == NULL) {
+ return;
+ }
+ if ( proj->isTreeItemModified() || proj->getProjectURL().isEmpty() ) {
+ addSubTask(AppContext::getProjectService()->saveProjectTask(SaveProjectTaskKind_SaveProjectAndDocumentsAskEach));
+ }
+}
+
+QList<Task *> CloseWindowsTask::onSubTaskFinished(Task *subTask) {
+ if (subTask->isCanceled()) {
+ stateInfo.cancelFlag = true;
+ return QList<Task*>();
+ }
+ coreLog.trace(tr("Closing views"));
+ if (!closeViews()) {
+ getTopLevelParentTask()->cancel();
+ }
+ return QList<Task*>();
+}
+
+Task::ReportResult CloseWindowsTask::report() {
+ // wait for saving/closing tasks if any
+ foreach(Task* t, AppContext::getTaskScheduler()->getTopLevelTasks()) {
+ if (t != getTopLevelParentTask() && !t->isFinished()) {
+ return ReportResult_CallMeAgain;
+ }
+ }
+ return ReportResult_Finished;
+}
+
+CancelAllTask::CancelAllTask()
+ : Task(ShutdownTask::tr("Cancel active tasks"), TaskFlag_NoRun)
+{
+
+}
+
+void CancelAllTask::prepare() {
+ // cancel all tasks but ShutdownTask
+ QList<Task*> activeTopTasks = AppContext::getTaskScheduler()->getTopLevelTasks();
+ activeTopTasks.removeOne(getTopLevelParentTask());
+ foreach(Task* t, activeTopTasks) {
+ coreLog.trace(tr("Canceling: %1").arg(t->getTaskName()));
+ t->cancel();
+ }
+}
+
+Task::ReportResult CancelAllTask::report() {
+ foreach(Task* t, AppContext::getTaskScheduler()->getTopLevelTasks()) {
+ if (t->isCanceled() && !t->isFinished()) {
+ return ReportResult_CallMeAgain;
+ }
+ }
+ return ReportResult_Finished;
+}
+
} // namespace U2
diff --git a/src/ugeneui/src/main_window/ShutdownTask.h b/src/ugeneui/src/main_window/ShutdownTask.h
index 4c8596e..6a91eb1 100644
--- a/src/ugeneui/src/main_window/ShutdownTask.h
+++ b/src/ugeneui/src/main_window/ShutdownTask.h
@@ -42,12 +42,32 @@ protected:
virtual QList<Task*> onSubTaskFinished(Task* subTask);
private:
+ void cancelShutdown();
+
MainWindowImpl* mw;
bool docsToRemoveAreFetched;
QList<Document *> docsToRemove;
};
+class CloseWindowsTask : public Task {
+public:
+ CloseWindowsTask();
+
+private:
+ void prepare();
+ QList<Task *> onSubTaskFinished(Task *subTask);
+ ReportResult report();
+};
+
+class CancelAllTask : public Task {
+public:
+ CancelAllTask();
+
+private:
+ void prepare();
+ ReportResult report();
+};
-}//namespace
+} // namespace U2
-#endif
+#endif // _U2_SHUTDOWN_TASK_H_
diff --git a/src/ugeneui/src/project_support/DocumentFormatSelectorController.cpp b/src/ugeneui/src/project_support/DocumentFormatSelectorController.cpp
index a7fcc70..7a0cd77 100644
--- a/src/ugeneui/src/project_support/DocumentFormatSelectorController.cpp
+++ b/src/ugeneui/src/project_support/DocumentFormatSelectorController.cpp
@@ -58,7 +58,7 @@ DocumentFormatSelectorController::DocumentFormatSelectorController(QList<FormatD
: QDialog(p), formatDetectionResults(results)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470424");
+ new HelpButton(this, buttonBox, "18220284");
setObjectName("DocumentFormatSelectorDialog");
}
diff --git a/src/ugeneui/src/project_support/DocumentProviderSelectorController.cpp b/src/ugeneui/src/project_support/DocumentProviderSelectorController.cpp
index e581510..92ccef7 100644
--- a/src/ugeneui/src/project_support/DocumentProviderSelectorController.cpp
+++ b/src/ugeneui/src/project_support/DocumentProviderSelectorController.cpp
@@ -99,7 +99,7 @@ DocumentProviderSelectorController::DocumentProviderSelectorController(const QLi
formatDetectionResults(formatDetectionResults)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470632");
+ new HelpButton(this, buttonBox, "18220492");
}
int DocumentProviderSelectorController::getSelectedFormatIdx() const {
diff --git a/src/ugeneui/src/project_support/DocumentReadingModeSelectorController.cpp b/src/ugeneui/src/project_support/DocumentReadingModeSelectorController.cpp
index a16035e..2c9ad2c 100644
--- a/src/ugeneui/src/project_support/DocumentReadingModeSelectorController.cpp
+++ b/src/ugeneui/src/project_support/DocumentReadingModeSelectorController.cpp
@@ -80,7 +80,7 @@ bool DocumentReadingModeSelectorController::adjustReadingMode(FormatDetectionRes
d->setModal(true);
ui.setupUi(d.data());
- new HelpButton(d.data(), ui.buttonBox, "17470397");
+ new HelpButton(d.data(), ui.buttonBox, "18220257");
bool canBeShortReads = minSequenceSize > 0 && maxSequenceSize < 2000;
bool haveReadAligners = !AppContext::getDnaAssemblyAlgRegistry()->getRegisteredAlgorithmIds().isEmpty();
diff --git a/src/ugeneui/src/project_support/ExportProjectDialogController.cpp b/src/ugeneui/src/project_support/ExportProjectDialogController.cpp
index 53d94c9..f655c7f 100644
--- a/src/ugeneui/src/project_support/ExportProjectDialogController.cpp
+++ b/src/ugeneui/src/project_support/ExportProjectDialogController.cpp
@@ -55,7 +55,7 @@ ExportProjectDialogController::ExportProjectDialogController(QWidget *p, const Q
: QDialog(p)
{
setupUi(this);
- new HelpButton(this, buttonBox, "17470397");
+ new HelpButton(this, buttonBox, "18220257");
setModal(true);
projectFile = fixProjectFile(defaultProjectFileName);
diff --git a/src/ugeneui/src/project_support/MultipleDocumentsReadingModeSelectorController.cpp b/src/ugeneui/src/project_support/MultipleDocumentsReadingModeSelectorController.cpp
index 705bb8f..c4d9a3e 100644
--- a/src/ugeneui/src/project_support/MultipleDocumentsReadingModeSelectorController.cpp
+++ b/src/ugeneui/src/project_support/MultipleDocumentsReadingModeSelectorController.cpp
@@ -119,7 +119,7 @@ QString MultipleDocumentsReadingModeDialog::setupNewUrl() {
bool MultipleDocumentsReadingModeDialog::setupGUI(QList<GUrl>& _urls, QVariantMap& props, const QMap<QString, qint64>& headerSequenceLengths){
setModal(true);
setupUi(this);
- new HelpButton(this, buttonBox, "17470488");
+ new HelpButton(this, buttonBox, "18220348");
// doesn't matter from what position, because excluded fileName all path of documents are the same
CHECK(!urls.isEmpty(), false);
diff --git a/src/ugeneui/src/project_support/ProjectLoaderImpl.cpp b/src/ugeneui/src/project_support/ProjectLoaderImpl.cpp
index 42b92c5..bba6b0c 100644
--- a/src/ugeneui/src/project_support/ProjectLoaderImpl.cpp
+++ b/src/ugeneui/src/project_support/ProjectLoaderImpl.cpp
@@ -821,7 +821,7 @@ void SaveProjectDialogController::sl_clicked(QAbstractButton *button) {
//////////////////////////////////////////////////////////////////////////
ProjectDialogController::ProjectDialogController(ProjectDialogController::Mode m, QWidget *p):QDialog(p) {
setupUi(this);
- new HelpButton(this, buttonBox, "17470421");
+ new HelpButton(this, buttonBox, "18220281");
buttonBox->button(QDialogButtonBox::Ok)->setText(tr("Create"));
buttonBox->button(QDialogButtonBox::Cancel)->setText(tr("Cancel"));
diff --git a/src/ugeneui/src/project_support/ProjectTasksGui.cpp b/src/ugeneui/src/project_support/ProjectTasksGui.cpp
index ce6e6f6..5a29293 100644
--- a/src/ugeneui/src/project_support/ProjectTasksGui.cpp
+++ b/src/ugeneui/src/project_support/ProjectTasksGui.cpp
@@ -127,7 +127,7 @@ QList<Task*> OpenProjectTask::onSubTaskFinished(Task* subTask) {
//////////////////////////////////////////////////////////////////////////
/// Save project
SaveProjectTask::SaveProjectTask(SaveProjectTaskKind _k, Project* p, const QString& _url, bool silentSave_)
- : Task(tr("Save project"), TaskFlag_NoRun), k(_k), proj(p), url(_url), silentSave(silentSave_)
+ : Task(tr("Save project"), TaskFlags(TaskFlag_NoRun) | TaskFlag_CancelOnSubtaskCancel), k(_k), proj(p), url(_url), silentSave(silentSave_)
{
}
@@ -203,16 +203,10 @@ void SaveProjectTask::prepare() {
ssTasks.append(new SaveOnlyProjectTask(proj, url));
}
if (!ssTasks.isEmpty()) {
- addSubTask(new MultiTask(tr("Save documents, remove phantom docs, save project"), ssTasks));
+ addSubTask(new MultiTask(tr("Save documents, remove phantom docs, save project"), ssTasks, false, TaskFlags_NR_FOSE_COSC));
}
}
-Task::ReportResult SaveProjectTask::report() {
- return Task::ReportResult_Finished;
-}
-
-
-
//////////////////////////////////////////////////////////////////////////
/// SaveOnlyProjectTask
diff --git a/src/ugeneui/src/project_support/ProjectTasksGui.h b/src/ugeneui/src/project_support/ProjectTasksGui.h
index bf43f41..2f69f17 100644
--- a/src/ugeneui/src/project_support/ProjectTasksGui.h
+++ b/src/ugeneui/src/project_support/ProjectTasksGui.h
@@ -88,7 +88,6 @@ public:
~SaveProjectTask();
virtual void prepare();
- ReportResult report();
private:
SaveProjectTaskKind k;
diff --git a/ugene.pro b/ugene.pro
index 06b9ec9..59c32ac 100644
--- a/ugene.pro
+++ b/ugene.pro
@@ -88,10 +88,6 @@ use_opencl() {
SUBDIRS += src/plugins/opencl_support
}
-use_sse2() {
- SUBDIRS += src/plugins_3rdparty/hmm3
-}
-
exclude_list_enabled() {
SUBDIRS -= src/plugins/CoreTests
SUBDIRS -= src/plugins/test_runner
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/ugene.git
More information about the debian-med-commit
mailing list