[med-svn] [mothur] 01/01: Imported Upstream version 1.38.1
Tomasz Buchert
tomasz at moszumanska.debian.org
Sat Jul 30 00:22:50 UTC 2016
This is an automated email from the git hooks/post-receive script.
tomasz pushed a commit to annotated tag upstream/1.38.1
in repository mothur.
commit f75191ae1059f61947c84a4fe64f76a523a050ff
Author: Tomasz Buchert <tomasz at debian.org>
Date: Thu Jul 21 22:00:03 2016 +0200
Imported Upstream version 1.38.1
---
Makefile | 4 +-
Mothur.xcodeproj/project.pbxproj | 104 +-
TestMothur/dataset.h | 2 +-
TestMothur/testcommands/testrenamefilecommand.cpp | 97 ++
TestMothur/testcommands/testrenamefilecommand.h | 40 +
TestMothur/testtrimoligos.cpp | 43 +
TestMothur/testtrimoligos.hpp | 38 +
makefile-internal | 4 +-
source/averagelinkage.cpp | 3 -
source/chimera/mothurchimera.cpp | 70 +-
source/classifier/bayesian.cpp | 77 +-
source/classifier/classify.cpp | 210 +--
source/classifier/phylosummary.cpp | 3 -
source/classifier/phylotree.cpp | 2 +-
source/cluster.hpp | 6 +-
source/commandfactory.cpp | 25 +-
source/commands/aligncommand.cpp | 26 +-
source/commands/chimeraccodecommand.cpp | 26 +-
source/commands/chimeracheckcommand.cpp | 27 +-
source/commands/chimerapintailcommand.cpp | 23 +-
source/commands/chimerapintailcommand.h | 2 -
source/commands/chimeraslayercommand.cpp | 31 +-
source/commands/chimerauchimecommand.cpp | 41 +-
source/commands/chimerauchimecommand.h | 24 +-
source/commands/chimeravsearchcommand.cpp | 1661 +++++++++++++++++++++
source/commands/chimeravsearchcommand.h | 66 +
source/commands/classifyotucommand.cpp | 147 +-
source/commands/classifyotucommand.h | 4 +-
source/commands/classifyrfsharedcommand.cpp | 51 +-
source/commands/classifyrfsharedcommand.h | 4 +-
source/commands/classifyseqscommand.cpp | 42 +-
source/commands/classifyseqscommand.h | 2 -
source/commands/clearmemorycommand.cpp | 70 -
source/commands/clearmemorycommand.h | 41 -
source/commands/clustercommand.cpp | 17 +-
source/commands/clustersplitcommand.cpp | 24 +-
source/commands/countseqscommand.cpp | 131 +-
source/commands/countseqscommand.h | 5 +-
source/commands/getmimarkspackagecommand.cpp | 1 +
source/commands/hclustercommand.cpp | 499 -------
source/commands/hclustercommand.h | 73 -
source/commands/lefsecommand.cpp | 23 +-
source/commands/lefsecommand.h | 4 +-
source/commands/makebiomcommand.cpp | 20 +-
source/commands/makecontigscommand.cpp | 81 +-
source/commands/mgclustercommand.cpp | 354 ++---
source/commands/mgclustercommand.h | 2 -
source/commands/parsefastaqcommand.cpp | 1 +
source/commands/pcrseqscommand.cpp | 22 +-
source/commands/pcrseqscommand.h | 15 +-
source/commands/pipelinepdscommand.cpp | 790 ----------
source/commands/pipelinepdscommand.h | 57 -
source/commands/renamefilecommand.cpp | 706 +++++++++
source/commands/renamefilecommand.h | 50 +
source/commands/renameseqscommand.cpp | 1 +
source/commands/sensspeccommand.cpp | 60 +-
source/commands/sensspeccommand.h | 2 +-
source/commands/seqerrorcommand.cpp | 1 +
source/commands/splitgroupscommand.cpp | 2 +-
source/commands/sracommand.cpp | 1 +
source/commands/summarytaxcommand.cpp | 32 +-
source/commands/summarytaxcommand.h | 2 +-
source/datastructures/alignmentdb.cpp | 98 +-
source/datastructures/designmap.cpp | 7 +-
source/datastructures/groupmap.cpp | 26 +-
source/datastructures/oligos.cpp | 70 +-
source/datastructures/referencedb.cpp | 34 -
source/datastructures/referencedb.h | 49 -
source/datastructures/sequence.cpp | 6 +
source/datastructures/sequence.hpp | 1 +
source/datastructures/sequencecountparser.cpp | 6 +-
source/datastructures/sequencecountparser.h | 2 +-
source/datastructures/sequenceparser.cpp | 6 +-
source/datastructures/sequenceparser.h | 2 +-
source/hcluster.cpp | 807 ----------
source/hcluster.h | 86 --
source/heatmapsim.cpp | 38 +-
source/heatmapsim.h | 2 +-
source/mothur.cpp | 9 +-
source/mothurout.cpp | 205 ++-
source/mothurout.h | 5 +-
source/randomforest/abstractrandomforest.cpp | 59 -
source/randomforest/abstractrandomforest.hpp | 67 -
source/randomforest/regularizedrandomforest.cpp | 63 -
source/randomforest/regularizedrandomforest.h | 30 -
source/read/readblast.cpp | 117 +-
source/read/readblast.h | 3 +-
source/trimoligos.cpp | 1061 +++++++------
source/trimoligos.h | 4 +-
source/validparameter.cpp | 38 +-
source/vsearchfileparser.cpp | 12 +-
source/vsearchfileparser.h | 5 +-
source/weightedlinkage.cpp | 3 -
93 files changed, 4331 insertions(+), 4582 deletions(-)
diff --git a/Makefile b/Makefile
old mode 100755
new mode 100644
index bc86111..a49ad17
--- a/Makefile
+++ b/Makefile
@@ -15,8 +15,8 @@ USEBOOST ?= yes
BOOST_LIBRARY_DIR="\"Enter_your_boost_library_path_here\""
BOOST_INCLUDE_DIR="\"Enter_your_boost_include_path_here\""
MOTHUR_FILES="\"Enter_your_default_path_here\""
-RELEASE_DATE = "\"6/20/2016\""
-VERSION = "\"1.37.6\""
+RELEASE_DATE = "\"7/20/2016\""
+VERSION = "\"1.38.0\""
ifeq ($(strip $(64BIT_VERSION)),yes)
CXXFLAGS += -DBIT_VERSION
diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj
old mode 100755
new mode 100644
index 7bf0eb8..a4d9cc1
--- a/Mothur.xcodeproj/project.pbxproj
+++ b/Mothur.xcodeproj/project.pbxproj
@@ -136,7 +136,6 @@
481FB5A01AC1B71B0076CFF3 /* classifysvmsharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7B2181FE17AD777B00286E6A /* classifysvmsharedcommand.cpp */; };
481FB5A11AC1B71B0076CFF3 /* classifytreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7EEB0F414F29BFD00344B83 /* classifytreecommand.cpp */; };
481FB5A21AC1B71B0076CFF3 /* clearcutcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B69612D37EC400DA6239 /* clearcutcommand.cpp */; };
- 481FB5A31AC1B7300076CFF3 /* clearmemorycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73DDBB913C4A0D1006AAE38 /* clearmemorycommand.cpp */; };
481FB5A41AC1B7300076CFF3 /* clustercommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B69C12D37EC400DA6239 /* clustercommand.cpp */; };
481FB5A51AC1B7300076CFF3 /* clusterdoturcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B69E12D37EC400DA6239 /* clusterdoturcommand.cpp */; };
481FB5A61AC1B7300076CFF3 /* clusterfragmentscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6A012D37EC400DA6239 /* clusterfragmentscommand.cpp */; };
@@ -174,7 +173,6 @@
481FB5C71AC1B74F0076CFF3 /* getsabundcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B70612D37EC400DA6239 /* getsabundcommand.cpp */; };
481FB5C81AC1B74F0076CFF3 /* getseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B70812D37EC400DA6239 /* getseqscommand.cpp */; };
481FB5C91AC1B74F0076CFF3 /* getsharedotucommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B70A12D37EC400DA6239 /* getsharedotucommand.cpp */; };
- 481FB5CA1AC1B74F0076CFF3 /* hclustercommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71A12D37EC400DA6239 /* hclustercommand.cpp */; };
481FB5CB1AC1B74F0076CFF3 /* heatmapcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71E12D37EC400DA6239 /* heatmapcommand.cpp */; };
481FB5CC1AC1B74F0076CFF3 /* heatmapsimcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B72212D37EC400DA6239 /* heatmapsimcommand.cpp */; };
481FB5CD1AC1B74F0076CFF3 /* helpcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B72812D37EC400DA6239 /* helpcommand.cpp */; };
@@ -213,7 +211,6 @@
481FB5EF1AC1B77E0076CFF3 /* pcoacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78712D37EC400DA6239 /* pcoacommand.cpp */; };
481FB5F11AC1B77E0076CFF3 /* phylodiversitycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78B12D37EC400DA6239 /* phylodiversitycommand.cpp */; };
481FB5F21AC1B77E0076CFF3 /* phylotypecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B79112D37EC400DA6239 /* phylotypecommand.cpp */; };
- 481FB5F31AC1B77E0076CFF3 /* pipelinepdscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B79512D37EC400DA6239 /* pipelinepdscommand.cpp */; };
481FB5F41AC1B77E0076CFF3 /* preclustercommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B79712D37EC400DA6239 /* preclustercommand.cpp */; };
481FB5F51AC1B77E0076CFF3 /* primerdesigncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74C06E816A9C0A8008390A3 /* primerdesigncommand.cpp */; };
481FB5F61AC1B77E0076CFF3 /* quitcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7A112D37EC400DA6239 /* quitcommand.cpp */; };
@@ -285,7 +282,6 @@
481FB6391AC1B7EA0076CFF3 /* ordervector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B77712D37EC400DA6239 /* ordervector.cpp */; };
481FB63A1AC1B7EA0076CFF3 /* qualityscores.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B79F12D37EC400DA6239 /* qualityscores.cpp */; };
481FB63B1AC1B7EA0076CFF3 /* rabundvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7A312D37EC400DA6239 /* rabundvector.cpp */; };
- 481FB63C1AC1B7EA0076CFF3 /* referencedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721765613BB9F7D0014DAAE /* referencedb.cpp */; };
481FB63D1AC1B7EA0076CFF3 /* reportfile.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7CB12D37EC400DA6239 /* reportfile.cpp */; };
481FB63E1AC1B7EA0076CFF3 /* sabundvector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7CF12D37EC400DA6239 /* sabundvector.cpp */; };
481FB63F1AC1B7EA0076CFF3 /* sequencecountparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A741FAD115D1688E0067BCC5 /* sequencecountparser.cpp */; };
@@ -309,7 +305,6 @@
481FB6511AC1B8100076CFF3 /* engine.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6DA12D37EC400DA6239 /* engine.cpp */; };
481FB6521AC1B8100076CFF3 /* fileoutput.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B6E012D37EC400DA6239 /* fileoutput.cpp */; };
481FB6531AC1B8100076CFF3 /* gotohoverlap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71012D37EC400DA6239 /* gotohoverlap.cpp */; };
- 481FB6541AC1B8100076CFF3 /* hcluster.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71812D37EC400DA6239 /* hcluster.cpp */; };
481FB6551AC1B8100076CFF3 /* heatmap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71C12D37EC400DA6239 /* heatmap.cpp */; };
481FB6561AC1B8100076CFF3 /* heatmapsim.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B72012D37EC400DA6239 /* heatmapsim.cpp */; };
481FB6571AC1B8100076CFF3 /* inputdata.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B72D12D37EC400DA6239 /* inputdata.cpp */; };
@@ -329,12 +324,10 @@
481FB6671AC1B8450076CFF3 /* randomnumber.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7186173D4041002163C2 /* randomnumber.cpp */; };
481FB6681AC1B8450076CFF3 /* rarecalc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7A512D37EC400DA6239 /* rarecalc.cpp */; };
481FB6691AC1B8520076CFF3 /* abstractdecisiontree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7386C241619E52200651424 /* abstractdecisiontree.cpp */; };
- 481FB66A1AC1B8520076CFF3 /* abstractrandomforest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48705AC319BE32C50075E977 /* abstractrandomforest.cpp */; };
481FB66B1AC1B8520076CFF3 /* decisiontree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7386C28161A110700651424 /* decisiontree.cpp */; };
481FB66C1AC1B8520076CFF3 /* randomforest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77E1937161B201E00DB1A2A /* randomforest.cpp */; };
481FB66D1AC1B8520076CFF3 /* rftreenode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77E193A161B289600DB1A2A /* rftreenode.cpp */; };
481FB66E1AC1B8520076CFF3 /* forest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 83F25B0A163B031200ABE73D /* forest.cpp */; };
- 481FB66F1AC1B8520076CFF3 /* regularizedrandomforest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 834D9D561656D7C400E7FAB9 /* regularizedrandomforest.cpp */; };
481FB6701AC1B8820076CFF3 /* raredisplay.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7A712D37EC400DA6239 /* raredisplay.cpp */; };
481FB6711AC1B8820076CFF3 /* rarefact.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B7A912D37EC400DA6239 /* rarefact.cpp */; };
481FB6721AC1B8820076CFF3 /* refchimeratest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7E6BE10912F710D8007ADDBE /* refchimeratest.cpp */; };
@@ -374,16 +367,21 @@
4827A4DC1CB3ED2200345170 /* fastqdataset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4827A4DA1CB3ED2100345170 /* fastqdataset.cpp */; };
4829D9671B8387D0002EEED4 /* testbiominfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4829D9651B8387D0002EEED4 /* testbiominfocommand.cpp */; };
483C952E188F0CAD0035E7B7 /* (null) in Sources */ = {isa = PBXBuildFile; };
+ 4846AD8A1D3810DD00DE9913 /* testtrimoligos.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4846AD881D3810DD00DE9913 /* testtrimoligos.cpp */; };
48705AC419BE32C50075E977 /* getmimarkspackagecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48705ABB19BE32C50075E977 /* getmimarkspackagecommand.cpp */; };
48705AC519BE32C50075E977 /* oligos.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48705ABD19BE32C50075E977 /* oligos.cpp */; };
48705AC619BE32C50075E977 /* mergesfffilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48705ABF19BE32C50075E977 /* mergesfffilecommand.cpp */; };
48705AC719BE32C50075E977 /* sharedrjsd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48705AC119BE32C50075E977 /* sharedrjsd.cpp */; };
- 48705AC819BE32C50075E977 /* abstractrandomforest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48705AC319BE32C50075E977 /* abstractrandomforest.cpp */; };
487C5A871AB88B93002AF48A /* mimarksattributescommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 487C5A851AB88B93002AF48A /* mimarksattributescommand.cpp */; };
487D09EB1CB2CEF3007039BF /* vsearchfileparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 489B55701BCD7F0100FB7DC8 /* vsearchfileparser.cpp */; };
487D09EC1CB2CEFE007039BF /* averagelinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2114A7671C654D7400D3D8D9 /* averagelinkage.cpp */; };
+ 488841611CC515A000C5E972 /* (null) in Sources */ = {isa = PBXBuildFile; };
+ 488841621CC515A000C5E972 /* (null) in Sources */ = {isa = PBXBuildFile; };
+ 488841651CC6C34900C5E972 /* renamefilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 488841631CC6C34900C5E972 /* renamefilecommand.cpp */; };
+ 488841661CC6C35500C5E972 /* renamefilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 488841631CC6C34900C5E972 /* renamefilecommand.cpp */; };
4893DE2918EEF28100C615DF /* (null) in Sources */ = {isa = PBXBuildFile; };
489B55721BCD7F0100FB7DC8 /* vsearchfileparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 489B55701BCD7F0100FB7DC8 /* vsearchfileparser.cpp */; };
+ 48A11C6E1CDA40F0003481D8 /* testrenamefilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48A11C6C1CDA40F0003481D8 /* testrenamefilecommand.cpp */; };
48A85BAD18E1AF2000199B6F /* (null) in Sources */ = {isa = PBXBuildFile; };
48B662031BBB1B6600997EE4 /* testrenameseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48B662011BBB1B6600997EE4 /* testrenameseqscommand.cpp */; };
48C51DF01A76B888004ECDF1 /* fastqread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48C51DEF1A76B888004ECDF1 /* fastqread.cpp */; };
@@ -400,9 +398,9 @@
48DB37B31B3B27E000C372A4 /* makefilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48DB37B11B3B27E000C372A4 /* makefilecommand.cpp */; };
48DB37B41B3B27E000C372A4 /* makefilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48DB37B11B3B27E000C372A4 /* makefilecommand.cpp */; };
48E981CF189C38FB0042BE9D /* (null) in Sources */ = {isa = PBXBuildFile; };
+ 48EDB76C1D1320DD00F76E93 /* chimeravsearchcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48EDB76A1D1320DD00F76E93 /* chimeravsearchcommand.cpp */; };
48F98E4D1A9CFD670005E81B /* completelinkage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48F98E4C1A9CFD670005E81B /* completelinkage.cpp */; };
7E6BE10A12F710D8007ADDBE /* refchimeratest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7E6BE10912F710D8007ADDBE /* refchimeratest.cpp */; };
- 834D9D581656D7C400E7FAB9 /* regularizedrandomforest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 834D9D561656D7C400E7FAB9 /* regularizedrandomforest.cpp */; };
835FE03D19F00640005AA754 /* classifysvmsharedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7B2181FE17AD777B00286E6A /* classifysvmsharedcommand.cpp */; };
835FE03E19F00A4D005AA754 /* svm.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7B21820117AD77BD00286E6A /* svm.cpp */; };
83F25B0C163B031200ABE73D /* forest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 83F25B0A163B031200ABE73D /* forest.cpp */; };
@@ -416,7 +414,6 @@
A7190B221768E0DF00A9AFA6 /* lefsecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7190B201768E0DF00A9AFA6 /* lefsecommand.cpp */; };
A71CB160130B04A2001E7287 /* anosimcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71CB15E130B04A2001E7287 /* anosimcommand.cpp */; };
A71FE12C12EDF72400963CA7 /* mergegroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */; };
- A721765713BB9F7D0014DAAE /* referencedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721765613BB9F7D0014DAAE /* referencedb.cpp */; };
A721AB6A161C570F009860A1 /* alignnode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB66161C570F009860A1 /* alignnode.cpp */; };
A721AB6B161C570F009860A1 /* aligntree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB68161C570F009860A1 /* aligntree.cpp */; };
A721AB71161C572A009860A1 /* kmernode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721AB6D161C572A009860A1 /* kmernode.cpp */; };
@@ -427,7 +424,6 @@
A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A727864312E9E28C00F86ABA /* removerarecommand.cpp */; };
A7386C251619E52300651424 /* abstractdecisiontree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7386C241619E52200651424 /* abstractdecisiontree.cpp */; };
A7386C29161A110800651424 /* decisiontree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7386C28161A110700651424 /* decisiontree.cpp */; };
- A73DDBBA13C4A0D1006AAE38 /* clearmemorycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73DDBB913C4A0D1006AAE38 /* clearmemorycommand.cpp */; };
A73DDC3813C4BF64006AAE38 /* mothurmetastats.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73DDC3713C4BF64006AAE38 /* mothurmetastats.cpp */; };
A741744C175CD9B1007DF49B /* makelefsecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A741744A175CD9B1007DF49B /* makelefsecommand.cpp */; };
A741FAD215D1688E0067BCC5 /* sequencecountparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A741FAD115D1688E0067BCC5 /* sequencecountparser.cpp */; };
@@ -566,8 +562,6 @@
A7E9B8DD12D37EC400DA6239 /* gower.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71212D37EC400DA6239 /* gower.cpp */; };
A7E9B8DE12D37EC400DA6239 /* groupmap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71412D37EC400DA6239 /* groupmap.cpp */; };
A7E9B8DF12D37EC400DA6239 /* hamming.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71612D37EC400DA6239 /* hamming.cpp */; };
- A7E9B8E012D37EC400DA6239 /* hcluster.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71812D37EC400DA6239 /* hcluster.cpp */; };
- A7E9B8E112D37EC400DA6239 /* hclustercommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71A12D37EC400DA6239 /* hclustercommand.cpp */; };
A7E9B8E212D37EC400DA6239 /* heatmap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71C12D37EC400DA6239 /* heatmap.cpp */; };
A7E9B8E312D37EC400DA6239 /* heatmapcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B71E12D37EC400DA6239 /* heatmapcommand.cpp */; };
A7E9B8E412D37EC400DA6239 /* heatmapsim.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B72012D37EC400DA6239 /* heatmapsim.cpp */; };
@@ -624,7 +618,6 @@
A7E9B91912D37EC400DA6239 /* phylotree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B78F12D37EC400DA6239 /* phylotree.cpp */; };
A7E9B91A12D37EC400DA6239 /* phylotypecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B79112D37EC400DA6239 /* phylotypecommand.cpp */; };
A7E9B91B12D37EC400DA6239 /* pintail.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B79312D37EC400DA6239 /* pintail.cpp */; };
- A7E9B91C12D37EC400DA6239 /* pipelinepdscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B79512D37EC400DA6239 /* pipelinepdscommand.cpp */; };
A7E9B91D12D37EC400DA6239 /* preclustercommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B79712D37EC400DA6239 /* preclustercommand.cpp */; };
A7E9B91E12D37EC400DA6239 /* prng.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B79912D37EC400DA6239 /* prng.cpp */; };
A7E9B91F12D37EC400DA6239 /* progress.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B79B12D37EC400DA6239 /* progress.cpp */; };
@@ -812,8 +805,9 @@
4827A4DB1CB3ED2100345170 /* fastqdataset.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fastqdataset.h; sourceTree = "<group>"; };
4829D9651B8387D0002EEED4 /* testbiominfocommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = testbiominfocommand.cpp; sourceTree = "<group>"; };
4829D9661B8387D0002EEED4 /* testbiominfocommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = testbiominfocommand.h; sourceTree = "<group>"; };
+ 4846AD881D3810DD00DE9913 /* testtrimoligos.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = testtrimoligos.cpp; sourceTree = "<group>"; };
+ 4846AD891D3810DD00DE9913 /* testtrimoligos.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = testtrimoligos.hpp; sourceTree = "<group>"; };
484F21691BA1C5F8001C1B5F /* makefile-internal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = "makefile-internal"; sourceTree = SOURCE_ROOT; };
- 48705ABA19BE32C50075E977 /* abstractrandomforest.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = abstractrandomforest.hpp; path = source/randomforest/abstractrandomforest.hpp; sourceTree = SOURCE_ROOT; };
48705ABB19BE32C50075E977 /* getmimarkspackagecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = getmimarkspackagecommand.cpp; path = source/commands/getmimarkspackagecommand.cpp; sourceTree = SOURCE_ROOT; };
48705ABC19BE32C50075E977 /* getmimarkspackagecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = getmimarkspackagecommand.h; path = source/commands/getmimarkspackagecommand.h; sourceTree = SOURCE_ROOT; };
48705ABD19BE32C50075E977 /* oligos.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = oligos.cpp; path = source/datastructures/oligos.cpp; sourceTree = SOURCE_ROOT; };
@@ -822,12 +816,15 @@
48705AC019BE32C50075E977 /* mergesfffilecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mergesfffilecommand.h; path = source/commands/mergesfffilecommand.h; sourceTree = SOURCE_ROOT; };
48705AC119BE32C50075E977 /* sharedrjsd.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sharedrjsd.cpp; path = source/calculators/sharedrjsd.cpp; sourceTree = SOURCE_ROOT; };
48705AC219BE32C50075E977 /* sharedrjsd.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = sharedrjsd.h; path = source/calculators/sharedrjsd.h; sourceTree = SOURCE_ROOT; };
- 48705AC319BE32C50075E977 /* abstractrandomforest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = abstractrandomforest.cpp; path = source/randomforest/abstractrandomforest.cpp; sourceTree = SOURCE_ROOT; };
487C5A851AB88B93002AF48A /* mimarksattributescommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mimarksattributescommand.cpp; path = source/commands/mimarksattributescommand.cpp; sourceTree = SOURCE_ROOT; };
487C5A861AB88B93002AF48A /* mimarksattributescommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mimarksattributescommand.h; path = source/commands/mimarksattributescommand.h; sourceTree = SOURCE_ROOT; };
48844B261AA74AF9006EF2B8 /* compare.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = compare.h; path = source/datastructures/compare.h; sourceTree = SOURCE_ROOT; };
+ 488841631CC6C34900C5E972 /* renamefilecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = renamefilecommand.cpp; path = source/commands/renamefilecommand.cpp; sourceTree = SOURCE_ROOT; };
+ 488841641CC6C34900C5E972 /* renamefilecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = renamefilecommand.h; path = source/commands/renamefilecommand.h; sourceTree = SOURCE_ROOT; };
489B55701BCD7F0100FB7DC8 /* vsearchfileparser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = vsearchfileparser.cpp; path = source/vsearchfileparser.cpp; sourceTree = "<group>"; };
489B55711BCD7F0100FB7DC8 /* vsearchfileparser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = vsearchfileparser.h; path = source/vsearchfileparser.h; sourceTree = "<group>"; };
+ 48A11C6C1CDA40F0003481D8 /* testrenamefilecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testrenamefilecommand.cpp; path = testcommands/testrenamefilecommand.cpp; sourceTree = "<group>"; };
+ 48A11C6D1CDA40F0003481D8 /* testrenamefilecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = testrenamefilecommand.h; path = testcommands/testrenamefilecommand.h; sourceTree = "<group>"; };
48B662011BBB1B6600997EE4 /* testrenameseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = testrenameseqscommand.cpp; path = testcommands/testrenameseqscommand.cpp; sourceTree = "<group>"; };
48B662021BBB1B6600997EE4 /* testrenameseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = testrenameseqscommand.h; path = testcommands/testrenameseqscommand.h; sourceTree = "<group>"; };
48C51DEE1A76B870004ECDF1 /* fastqread.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = fastqread.h; path = source/datastructures/fastqread.h; sourceTree = SOURCE_ROOT; };
@@ -850,6 +847,8 @@
48D6E96A1CA4262A008DF76B /* dataset.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = dataset.h; sourceTree = "<group>"; };
48DB37B11B3B27E000C372A4 /* makefilecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = makefilecommand.cpp; path = source/commands/makefilecommand.cpp; sourceTree = SOURCE_ROOT; };
48DB37B21B3B27E000C372A4 /* makefilecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = makefilecommand.h; path = source/commands/makefilecommand.h; sourceTree = SOURCE_ROOT; };
+ 48EDB76A1D1320DD00F76E93 /* chimeravsearchcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = chimeravsearchcommand.cpp; path = source/commands/chimeravsearchcommand.cpp; sourceTree = "<group>"; };
+ 48EDB76B1D1320DD00F76E93 /* chimeravsearchcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = chimeravsearchcommand.h; path = source/commands/chimeravsearchcommand.h; sourceTree = "<group>"; };
48F98E4C1A9CFD670005E81B /* completelinkage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = completelinkage.cpp; path = source/completelinkage.cpp; sourceTree = SOURCE_ROOT; };
7B2181FE17AD777B00286E6A /* classifysvmsharedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = classifysvmsharedcommand.cpp; path = source/commands/classifysvmsharedcommand.cpp; sourceTree = SOURCE_ROOT; };
7B2181FF17AD777B00286E6A /* classifysvmsharedcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = classifysvmsharedcommand.h; path = source/commands/classifysvmsharedcommand.h; sourceTree = SOURCE_ROOT; };
@@ -858,8 +857,6 @@
7E6BE10812F710D8007ADDBE /* refchimeratest.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = refchimeratest.h; path = source/refchimeratest.h; sourceTree = "<group>"; };
7E6BE10912F710D8007ADDBE /* refchimeratest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = refchimeratest.cpp; path = source/refchimeratest.cpp; sourceTree = "<group>"; };
7E78911B135F3E8600E725D2 /* eachgapdistignorens.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = eachgapdistignorens.h; path = source/calculators/eachgapdistignorens.h; sourceTree = SOURCE_ROOT; };
- 834D9D561656D7C400E7FAB9 /* regularizedrandomforest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = regularizedrandomforest.cpp; path = source/randomforest/regularizedrandomforest.cpp; sourceTree = SOURCE_ROOT; };
- 834D9D571656D7C400E7FAB9 /* regularizedrandomforest.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = regularizedrandomforest.h; path = source/randomforest/regularizedrandomforest.h; sourceTree = SOURCE_ROOT; };
83F25B0A163B031200ABE73D /* forest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = forest.cpp; path = source/randomforest/forest.cpp; sourceTree = SOURCE_ROOT; };
83F25B0B163B031200ABE73D /* forest.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = forest.h; path = source/randomforest/forest.h; sourceTree = SOURCE_ROOT; };
8DD76FB20486AB0100D96B5E /* mothur */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = mothur; sourceTree = BUILT_PRODUCTS_DIR; };
@@ -882,8 +879,6 @@
A71CB15F130B04A2001E7287 /* anosimcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = anosimcommand.h; path = source/commands/anosimcommand.h; sourceTree = SOURCE_ROOT; };
A71FE12A12EDF72400963CA7 /* mergegroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mergegroupscommand.h; path = source/commands/mergegroupscommand.h; sourceTree = SOURCE_ROOT; };
A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mergegroupscommand.cpp; path = source/commands/mergegroupscommand.cpp; sourceTree = SOURCE_ROOT; };
- A721765513BB9F7D0014DAAE /* referencedb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = referencedb.h; path = source/datastructures/referencedb.h; sourceTree = SOURCE_ROOT; };
- A721765613BB9F7D0014DAAE /* referencedb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = referencedb.cpp; path = source/datastructures/referencedb.cpp; sourceTree = SOURCE_ROOT; };
A721AB66161C570F009860A1 /* alignnode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = alignnode.cpp; path = source/classifier/alignnode.cpp; sourceTree = SOURCE_ROOT; };
A721AB67161C570F009860A1 /* alignnode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = alignnode.h; path = source/classifier/alignnode.h; sourceTree = SOURCE_ROOT; };
A721AB68161C570F009860A1 /* aligntree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = aligntree.cpp; path = source/classifier/aligntree.cpp; sourceTree = SOURCE_ROOT; };
@@ -907,8 +902,6 @@
A7386C201619CACB00651424 /* rftreenode.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = rftreenode.hpp; path = source/randomforest/rftreenode.hpp; sourceTree = SOURCE_ROOT; };
A7386C241619E52200651424 /* abstractdecisiontree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = abstractdecisiontree.cpp; path = source/randomforest/abstractdecisiontree.cpp; sourceTree = SOURCE_ROOT; };
A7386C28161A110700651424 /* decisiontree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = decisiontree.cpp; path = source/randomforest/decisiontree.cpp; sourceTree = SOURCE_ROOT; };
- A73DDBB813C4A0D1006AAE38 /* clearmemorycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = clearmemorycommand.h; path = source/commands/clearmemorycommand.h; sourceTree = SOURCE_ROOT; };
- A73DDBB913C4A0D1006AAE38 /* clearmemorycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = clearmemorycommand.cpp; path = source/commands/clearmemorycommand.cpp; sourceTree = SOURCE_ROOT; };
A73DDC3613C4BF64006AAE38 /* mothurmetastats.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mothurmetastats.h; path = source/metastats/mothurmetastats.h; sourceTree = SOURCE_ROOT; };
A73DDC3713C4BF64006AAE38 /* mothurmetastats.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mothurmetastats.cpp; path = source/metastats/mothurmetastats.cpp; sourceTree = SOURCE_ROOT; };
A741744A175CD9B1007DF49B /* makelefsecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = makelefsecommand.cpp; path = source/commands/makelefsecommand.cpp; sourceTree = SOURCE_ROOT; };
@@ -1199,10 +1192,6 @@
A7E9B71512D37EC400DA6239 /* groupmap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = groupmap.h; path = source/datastructures/groupmap.h; sourceTree = SOURCE_ROOT; };
A7E9B71612D37EC400DA6239 /* hamming.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = hamming.cpp; path = source/calculators/hamming.cpp; sourceTree = SOURCE_ROOT; };
A7E9B71712D37EC400DA6239 /* hamming.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = hamming.h; path = source/calculators/hamming.h; sourceTree = SOURCE_ROOT; };
- A7E9B71812D37EC400DA6239 /* hcluster.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = hcluster.cpp; path = source/hcluster.cpp; sourceTree = "<group>"; };
- A7E9B71912D37EC400DA6239 /* hcluster.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = hcluster.h; path = source/hcluster.h; sourceTree = "<group>"; };
- A7E9B71A12D37EC400DA6239 /* hclustercommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = hclustercommand.cpp; path = source/commands/hclustercommand.cpp; sourceTree = SOURCE_ROOT; };
- A7E9B71B12D37EC400DA6239 /* hclustercommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; lineEnding = 0; name = hclustercommand.h; path = source/commands/hclustercommand.h; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.objcpp; };
A7E9B71C12D37EC400DA6239 /* heatmap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = heatmap.cpp; path = source/heatmap.cpp; sourceTree = "<group>"; };
A7E9B71D12D37EC400DA6239 /* heatmap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = heatmap.h; path = source/heatmap.h; sourceTree = "<group>"; };
A7E9B71E12D37EC400DA6239 /* heatmapcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = heatmapcommand.cpp; path = source/commands/heatmapcommand.cpp; sourceTree = SOURCE_ROOT; };
@@ -1320,8 +1309,6 @@
A7E9B79212D37EC400DA6239 /* phylotypecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = phylotypecommand.h; path = source/commands/phylotypecommand.h; sourceTree = SOURCE_ROOT; };
A7E9B79312D37EC400DA6239 /* pintail.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = pintail.cpp; path = source/chimera/pintail.cpp; sourceTree = SOURCE_ROOT; };
A7E9B79412D37EC400DA6239 /* pintail.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = pintail.h; path = source/chimera/pintail.h; sourceTree = SOURCE_ROOT; };
- A7E9B79512D37EC400DA6239 /* pipelinepdscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = pipelinepdscommand.cpp; path = source/commands/pipelinepdscommand.cpp; sourceTree = SOURCE_ROOT; };
- A7E9B79612D37EC400DA6239 /* pipelinepdscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = pipelinepdscommand.h; path = source/commands/pipelinepdscommand.h; sourceTree = SOURCE_ROOT; };
A7E9B79712D37EC400DA6239 /* preclustercommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = preclustercommand.cpp; path = source/commands/preclustercommand.cpp; sourceTree = SOURCE_ROOT; };
A7E9B79812D37EC400DA6239 /* preclustercommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = preclustercommand.h; path = source/commands/preclustercommand.h; sourceTree = SOURCE_ROOT; };
A7E9B79912D37EC400DA6239 /* prng.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = prng.cpp; path = source/calculators/prng.cpp; sourceTree = SOURCE_ROOT; };
@@ -1639,8 +1626,6 @@
A7E9B6E112D37EC400DA6239 /* fileoutput.h */,
A7E9B71112D37EC400DA6239 /* gotohoverlap.hpp */,
A7E9B71012D37EC400DA6239 /* gotohoverlap.cpp */,
- A7E9B71812D37EC400DA6239 /* hcluster.cpp */,
- A7E9B71912D37EC400DA6239 /* hcluster.h */,
A7E9B71C12D37EC400DA6239 /* heatmap.cpp */,
A7E9B71D12D37EC400DA6239 /* heatmap.h */,
A7E9B72012D37EC400DA6239 /* heatmapsim.cpp */,
@@ -1733,6 +1718,8 @@
48D6E96A1CA4262A008DF76B /* dataset.h */,
4827A4DA1CB3ED2100345170 /* fastqdataset.cpp */,
4827A4DB1CB3ED2100345170 /* fastqdataset.h */,
+ 4846AD881D3810DD00DE9913 /* testtrimoligos.cpp */,
+ 4846AD891D3810DD00DE9913 /* testtrimoligos.hpp */,
48D6E9661CA42389008DF76B /* testvsearchfileparser.cpp */,
48D6E9671CA42389008DF76B /* testvsearchfileparser.h */,
481FB5221AC0AA010076CFF3 /* testcontainers */,
@@ -1750,6 +1737,8 @@
48C728731B6AB4CD00D40830 /* testgetgroupscommand.cpp */,
48C728691B69598400D40830 /* testmergegroupscommand.h */,
48C728681B69598400D40830 /* testmergegroupscommand.cpp */,
+ 48A11C6C1CDA40F0003481D8 /* testrenamefilecommand.cpp */,
+ 48A11C6D1CDA40F0003481D8 /* testrenamefilecommand.h */,
48B662011BBB1B6600997EE4 /* testrenameseqscommand.cpp */,
48B662021BBB1B6600997EE4 /* testrenameseqscommand.h */,
48C7286F1B6AB3B900D40830 /* testremovegroupscommand.cpp */,
@@ -1785,8 +1774,6 @@
children = (
A7386C1B1619CACB00651424 /* abstractdecisiontree.hpp */,
A7386C241619E52200651424 /* abstractdecisiontree.cpp */,
- 48705ABA19BE32C50075E977 /* abstractrandomforest.hpp */,
- 48705AC319BE32C50075E977 /* abstractrandomforest.cpp */,
A7386C1D1619CACB00651424 /* decisiontree.hpp */,
A7386C28161A110700651424 /* decisiontree.cpp */,
A7386C1E1619CACB00651424 /* macros.h */,
@@ -1796,8 +1783,6 @@
A77E193A161B289600DB1A2A /* rftreenode.cpp */,
83F25B0A163B031200ABE73D /* forest.cpp */,
83F25B0B163B031200ABE73D /* forest.h */,
- 834D9D561656D7C400E7FAB9 /* regularizedrandomforest.cpp */,
- 834D9D571656D7C400E7FAB9 /* regularizedrandomforest.h */,
);
name = randomforest;
path = /Users/sarahwestcott/Desktop/mothur;
@@ -1851,6 +1836,8 @@
A7E9B68A12D37EC400DA6239 /* chimeraslayercommand.cpp */,
A74D36B6137DAFAA00332B0C /* chimerauchimecommand.h */,
A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */,
+ 48EDB76A1D1320DD00F76E93 /* chimeravsearchcommand.cpp */,
+ 48EDB76B1D1320DD00F76E93 /* chimeravsearchcommand.h */,
A7E9B68D12D37EC400DA6239 /* chopseqscommand.h */,
A7E9B68C12D37EC400DA6239 /* chopseqscommand.cpp */,
A7E9B69112D37EC400DA6239 /* classifyotucommand.h */,
@@ -1865,8 +1852,6 @@
A7EEB0F414F29BFD00344B83 /* classifytreecommand.cpp */,
A7E9B69712D37EC400DA6239 /* clearcutcommand.h */,
A7E9B69612D37EC400DA6239 /* clearcutcommand.cpp */,
- A73DDBB813C4A0D1006AAE38 /* clearmemorycommand.h */,
- A73DDBB913C4A0D1006AAE38 /* clearmemorycommand.cpp */,
A7E9B69D12D37EC400DA6239 /* clustercommand.h */,
A7E9B69C12D37EC400DA6239 /* clustercommand.cpp */,
A7E9B69F12D37EC400DA6239 /* clusterdoturcommand.h */,
@@ -1941,8 +1926,6 @@
A7E9B70812D37EC400DA6239 /* getseqscommand.cpp */,
A7E9B70B12D37EC400DA6239 /* getsharedotucommand.h */,
A7E9B70A12D37EC400DA6239 /* getsharedotucommand.cpp */,
- A7E9B71B12D37EC400DA6239 /* hclustercommand.h */,
- A7E9B71A12D37EC400DA6239 /* hclustercommand.cpp */,
A7E9B71F12D37EC400DA6239 /* heatmapcommand.h */,
A7E9B71E12D37EC400DA6239 /* heatmapcommand.cpp */,
A7E9B72312D37EC400DA6239 /* heatmapsimcommand.h */,
@@ -2023,8 +2006,6 @@
A7E9B78B12D37EC400DA6239 /* phylodiversitycommand.cpp */,
A7E9B79212D37EC400DA6239 /* phylotypecommand.h */,
A7E9B79112D37EC400DA6239 /* phylotypecommand.cpp */,
- A7E9B79612D37EC400DA6239 /* pipelinepdscommand.h */,
- A7E9B79512D37EC400DA6239 /* pipelinepdscommand.cpp */,
A7E9B79812D37EC400DA6239 /* preclustercommand.h */,
A7E9B79712D37EC400DA6239 /* preclustercommand.cpp */,
A74C06E616A9C097008390A3 /* primerdesigncommand.h */,
@@ -2047,6 +2028,8 @@
A727864312E9E28C00F86ABA /* removerarecommand.cpp */,
A7E9B7CA12D37EC400DA6239 /* removeseqscommand.h */,
A7E9B7C912D37EC400DA6239 /* removeseqscommand.cpp */,
+ 488841631CC6C34900C5E972 /* renamefilecommand.cpp */,
+ 488841641CC6C34900C5E972 /* renamefilecommand.h */,
A7CFA42F1755400500D9ED4D /* renameseqscommand.h */,
A7CFA4301755401800D9ED4D /* renameseqscommand.cpp */,
A7E9B7CE12D37EC400DA6239 /* reversecommand.h */,
@@ -2336,8 +2319,6 @@
A7E9B7A012D37EC400DA6239 /* qualityscores.h */,
A7E9B7A312D37EC400DA6239 /* rabundvector.cpp */,
A7E9B7A412D37EC400DA6239 /* rabundvector.hpp */,
- A721765513BB9F7D0014DAAE /* referencedb.h */,
- A721765613BB9F7D0014DAAE /* referencedb.cpp */,
A7E9B7CB12D37EC400DA6239 /* reportfile.cpp */,
A7E9B7CC12D37EC400DA6239 /* reportfile.h */,
A7E9B7CF12D37EC400DA6239 /* sabundvector.cpp */,
@@ -2592,7 +2573,6 @@
481FB6271AC1B7EA0076CFF3 /* alignmentdb.cpp in Sources */,
481FB6351AC1B7EA0076CFF3 /* kmerdb.cpp in Sources */,
481FB5721AC1B6D40076CFF3 /* simpson.cpp in Sources */,
- 481FB5A31AC1B7300076CFF3 /* clearmemorycommand.cpp in Sources */,
481FB55D1AC1B6690076CFF3 /* sharedchao1.cpp in Sources */,
481FB5FE1AC1B7970076CFF3 /* removerarecommand.cpp in Sources */,
481FB53C1AC1B5F10076CFF3 /* bootstrap.cpp in Sources */,
@@ -2701,7 +2681,6 @@
481FB5D81AC1B75C0076CFF3 /* makecontigscommand.cpp in Sources */,
481FB6481AC1B7EA0076CFF3 /* sparsedistancematrix.cpp in Sources */,
481FB5531AC1B6490076CFF3 /* parsimony.cpp in Sources */,
- 481FB63C1AC1B7EA0076CFF3 /* referencedb.cpp in Sources */,
481FB6641AC1B8450076CFF3 /* optionparser.cpp in Sources */,
481FB68B1AC1BA9E0076CFF3 /* aligntree.cpp in Sources */,
481FB5FB1AC1B77E0076CFF3 /* removelineagecommand.cpp in Sources */,
@@ -2718,7 +2697,6 @@
481FB61D1AC1B7AC0076CFF3 /* unifracunweightedcommand.cpp in Sources */,
481FB6141AC1B7AC0076CFF3 /* subsamplecommand.cpp in Sources */,
481FB5481AC1B61F0076CFF3 /* hellinger.cpp in Sources */,
- 481FB5CA1AC1B74F0076CFF3 /* hclustercommand.cpp in Sources */,
481FB5D41AC1B75C0076CFF3 /* listseqscommand.cpp in Sources */,
481FB6521AC1B8100076CFF3 /* fileoutput.cpp in Sources */,
481FB6851AC1B8B80076CFF3 /* validcalculator.cpp in Sources */,
@@ -2750,6 +2728,7 @@
481FB5E61AC1B77E0076CFF3 /* normalizesharedcommand.cpp in Sources */,
481FB5E71AC1B77E0076CFF3 /* nmdscommand.cpp in Sources */,
481FB52B1AC1B09F0076CFF3 /* setseedcommand.cpp in Sources */,
+ 4846AD8A1D3810DD00DE9913 /* testtrimoligos.cpp in Sources */,
481FB5261AC0ADA00076CFF3 /* sequence.cpp in Sources */,
481FB5C61AC1B74F0076CFF3 /* getrelabundcommand.cpp in Sources */,
481FB6571AC1B8100076CFF3 /* inputdata.cpp in Sources */,
@@ -2764,21 +2743,21 @@
481FB6411AC1B7EA0076CFF3 /* sequenceparser.cpp in Sources */,
481FB6381AC1B7EA0076CFF3 /* oligos.cpp in Sources */,
481FB59E1AC1B71B0076CFF3 /* classifyseqscommand.cpp in Sources */,
- 481FB5F31AC1B77E0076CFF3 /* pipelinepdscommand.cpp in Sources */,
481FB5CF1AC1B75C0076CFF3 /* indicatorcommand.cpp in Sources */,
481FB64F1AC1B8100076CFF3 /* consensus.cpp in Sources */,
481FB5441AC1B6140076CFF3 /* goodscoverage.cpp in Sources */,
481FB5DD1AC1B77E0076CFF3 /* matrixoutputcommand.cpp in Sources */,
481FB5771AC1B6EA0076CFF3 /* spearman.cpp in Sources */,
481FB6031AC1B7970076CFF3 /* secondarystructurecommand.cpp in Sources */,
- 481FB66F1AC1B8520076CFF3 /* regularizedrandomforest.cpp in Sources */,
481FB5361AC1B5DC0076CFF3 /* getopt_long.cpp in Sources */,
481FB5A41AC1B7300076CFF3 /* clustercommand.cpp in Sources */,
481FB5671AC1B6AD0076CFF3 /* sharedmorisitahorn.cpp in Sources */,
481FB5581AC1B6590076CFF3 /* shannonrange.cpp in Sources */,
481FB5601AC1B6790076CFF3 /* sharedjest.cpp in Sources */,
481FB64A1AC1B7F40076CFF3 /* suffixnodes.cpp in Sources */,
+ 488841661CC6C35500C5E972 /* renamefilecommand.cpp in Sources */,
481FB53F1AC1B6000076CFF3 /* canberra.cpp in Sources */,
+ 48A11C6E1CDA40F0003481D8 /* testrenamefilecommand.cpp in Sources */,
481FB62B1AC1B7EA0076CFF3 /* database.cpp in Sources */,
481FB5BD1AC1B74F0076CFF3 /* getlabelcommand.cpp in Sources */,
481FB5B91AC1B74F0076CFF3 /* getcurrentcommand.cpp in Sources */,
@@ -2814,7 +2793,6 @@
481FB5831AC1B6FF0076CFF3 /* ccode.cpp in Sources */,
481FB5681AC1B6B20076CFF3 /* sharedochiai.cpp in Sources */,
481FB66E1AC1B8520076CFF3 /* forest.cpp in Sources */,
- 481FB66A1AC1B8520076CFF3 /* abstractrandomforest.cpp in Sources */,
481FB56A1AC1B6B80076CFF3 /* sharedsobs.cpp in Sources */,
481FB6671AC1B8450076CFF3 /* randomnumber.cpp in Sources */,
481FB5DB1AC1B75C0076CFF3 /* makelefsecommand.cpp in Sources */,
@@ -2848,7 +2826,6 @@
481FB5651AC1B6A70076CFF3 /* sharedlennon.cpp in Sources */,
481FB53E1AC1B5FC0076CFF3 /* calculator.cpp in Sources */,
481FB6241AC1B7BA0076CFF3 /* qFinderDMM.cpp in Sources */,
- 481FB6541AC1B8100076CFF3 /* hcluster.cpp in Sources */,
481FB6311AC1B7EA0076CFF3 /* fullmatrix.cpp in Sources */,
481FB51C1AC0A63E0076CFF3 /* main.cpp in Sources */,
481FB58F1AC1B71B0076CFF3 /* newcommandtemplate.cpp in Sources */,
@@ -2878,6 +2855,7 @@
481FB5391AC1B5E90076CFF3 /* ace.cpp in Sources */,
481FB5751AC1B6EA0076CFF3 /* soergel.cpp in Sources */,
481FB5DA1AC1B75C0076CFF3 /* makegroupcommand.cpp in Sources */,
+ 488841621CC515A000C5E972 /* (null) in Sources */,
481FB5691AC1B6B50076CFF3 /* sharedrjsd.cpp in Sources */,
481FB6801AC1B8960076CFF3 /* slibshuff.cpp in Sources */,
481FB67B1AC1B88F0076CFF3 /* readphylipvector.cpp in Sources */,
@@ -3003,6 +2981,7 @@
A7E9B8A812D37EC400DA6239 /* clustercommand.cpp in Sources */,
A7E9B8A912D37EC400DA6239 /* clusterdoturcommand.cpp in Sources */,
A7E9B8AA12D37EC400DA6239 /* clusterfragmentscommand.cpp in Sources */,
+ 48EDB76C1D1320DD00F76E93 /* chimeravsearchcommand.cpp in Sources */,
A7E9B8AB12D37EC400DA6239 /* clustersplitcommand.cpp in Sources */,
A7E9B8AC12D37EC400DA6239 /* cmdargs.cpp in Sources */,
A7E9B8AD12D37EC400DA6239 /* collect.cpp in Sources */,
@@ -3052,10 +3031,9 @@
A7E9B8DC12D37EC400DA6239 /* gotohoverlap.cpp in Sources */,
A7E9B8DD12D37EC400DA6239 /* gower.cpp in Sources */,
A7E9B8DE12D37EC400DA6239 /* groupmap.cpp in Sources */,
+ 488841651CC6C34900C5E972 /* renamefilecommand.cpp in Sources */,
4893DE2918EEF28100C615DF /* (null) in Sources */,
A7E9B8DF12D37EC400DA6239 /* hamming.cpp in Sources */,
- A7E9B8E012D37EC400DA6239 /* hcluster.cpp in Sources */,
- A7E9B8E112D37EC400DA6239 /* hclustercommand.cpp in Sources */,
A7E9B8E212D37EC400DA6239 /* heatmap.cpp in Sources */,
A7E9B8E312D37EC400DA6239 /* heatmapcommand.cpp in Sources */,
A7E9B8E412D37EC400DA6239 /* heatmapsim.cpp in Sources */,
@@ -3112,11 +3090,11 @@
A7E9B91412D37EC400DA6239 /* parsimonycommand.cpp in Sources */,
A7E9B91512D37EC400DA6239 /* pcoacommand.cpp in Sources */,
A7E9B91712D37EC400DA6239 /* phylodiversitycommand.cpp in Sources */,
+ 488841611CC515A000C5E972 /* (null) in Sources */,
A7E9B91812D37EC400DA6239 /* phylosummary.cpp in Sources */,
A7E9B91912D37EC400DA6239 /* phylotree.cpp in Sources */,
A7E9B91A12D37EC400DA6239 /* phylotypecommand.cpp in Sources */,
A7E9B91B12D37EC400DA6239 /* pintail.cpp in Sources */,
- A7E9B91C12D37EC400DA6239 /* pipelinepdscommand.cpp in Sources */,
48DB37B31B3B27E000C372A4 /* makefilecommand.cpp in Sources */,
A7E9B91D12D37EC400DA6239 /* preclustercommand.cpp in Sources */,
A7E9B91E12D37EC400DA6239 /* prng.cpp in Sources */,
@@ -3184,7 +3162,6 @@
A7E9B95D12D37EC400DA6239 /* sharedsorclass.cpp in Sources */,
A7E9B95E12D37EC400DA6239 /* sharedsorest.cpp in Sources */,
A7E9B95F12D37EC400DA6239 /* sharedthetan.cpp in Sources */,
- 48705AC819BE32C50075E977 /* abstractrandomforest.cpp in Sources */,
A7E9B96012D37EC400DA6239 /* sharedthetayc.cpp in Sources */,
A7E9B96112D37EC400DA6239 /* sharedutilities.cpp in Sources */,
A7E9B96212D37EC400DA6239 /* shen.cpp in Sources */,
@@ -3256,8 +3233,6 @@
A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */,
A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */,
A7730EFF13967241007433A3 /* countseqscommand.cpp in Sources */,
- A721765713BB9F7D0014DAAE /* referencedb.cpp in Sources */,
- A73DDBBA13C4A0D1006AAE38 /* clearmemorycommand.cpp in Sources */,
A73DDC3813C4BF64006AAE38 /* mothurmetastats.cpp in Sources */,
A79234D713C74BF6002B08E2 /* mothurfisher.cpp in Sources */,
A795840D13F13CD900F201D5 /* countgroupscommand.cpp in Sources */,
@@ -3303,7 +3278,6 @@
A721AB72161C572A009860A1 /* kmertree.cpp in Sources */,
A721AB77161C573B009860A1 /* taxonomynode.cpp in Sources */,
83F25B0C163B031200ABE73D /* forest.cpp in Sources */,
- 834D9D581656D7C400E7FAB9 /* regularizedrandomforest.cpp in Sources */,
A7496D2E167B531B00CC7D7C /* kruskalwalliscommand.cpp in Sources */,
A79EEF8616971D4A0006DEC1 /* filtersharedcommand.cpp in Sources */,
A74C06E916A9C0A9008390A3 /* primerdesigncommand.cpp in Sources */,
@@ -3350,7 +3324,7 @@
"DYLIB_CURRENT_VERSION[sdk=*]" = "";
GCC_DYNAMIC_NO_PIC = NO;
GCC_MODEL_TUNING = G5;
- GCC_OPTIMIZATION_LEVEL = 3;
+ GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"VERSION=\"\\\"1.37.1\\\"\"",
"RELEASE_DATE=\"\\\"04/11/2016\\\"\"",
@@ -3385,7 +3359,7 @@
DEPLOYMENT_LOCATION = YES;
DSTROOT = TARGET_BUILD_DIR;
GCC_MODEL_TUNING = G5;
- GCC_OPTIMIZATION_LEVEL = 3;
+ GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"VERSION=\"\\\"1.37.1\\\"\"",
"RELEASE_DATE=\"\\\"04/11/2016\\\"\"",
@@ -3413,17 +3387,17 @@
buildSettings = {
CLANG_WARN_UNREACHABLE_CODE = YES;
DEPLOYMENT_LOCATION = NO;
- GCC_C_LANGUAGE_STANDARD = "compiler-default";
+ GCC_C_LANGUAGE_STANDARD = c11;
GCC_ENABLE_SSE3_EXTENSIONS = NO;
GCC_ENABLE_SSE41_EXTENSIONS = NO;
GCC_ENABLE_SSE42_EXTENSIONS = NO;
- GCC_OPTIMIZATION_LEVEL = 3;
+ GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"MOTHUR_FILES=\"\\\"/Users/sarahwestcott/desktop/release\\\"\"",
"VERSION=\"\\\"1.36.0\\\"\"",
"RELEASE_DATE=\"\\\"07/23/2015\\\"\"",
);
- GCC_VERSION = "/usr/bin/c++";
+ GCC_VERSION = "";
"GCC_VERSION[arch=*]" = "";
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
GCC_WARN_ABOUT_MISSING_PROTOTYPES = NO;
@@ -3460,10 +3434,10 @@
buildSettings = {
CLANG_WARN_UNREACHABLE_CODE = YES;
DEPLOYMENT_LOCATION = NO;
- GCC_C_LANGUAGE_STANDARD = "compiler-default";
+ GCC_C_LANGUAGE_STANDARD = c11;
GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
GCC_MODEL_TUNING = "";
- GCC_OPTIMIZATION_LEVEL = 3;
+ GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"VERSION=\"\\\"1.37.6\\\"\"",
"RELEASE_DATE=\"\\\"06/20/2016\\\"\"",
@@ -3539,7 +3513,7 @@
MTL_ENABLE_DEBUG_INFO = YES;
ONLY_ACTIVE_ARCH = YES;
PRODUCT_NAME = "$(TARGET_NAME)";
- SDKROOT = macosx;
+ SDKROOT = macosx10.9;
};
name = Debug;
};
@@ -3576,7 +3550,7 @@
MTL_ENABLE_DEBUG_INFO = NO;
ONLY_ACTIVE_ARCH = YES;
PRODUCT_NAME = "$(TARGET_NAME)";
- SDKROOT = macosx;
+ SDKROOT = macosx10.9;
};
name = Release;
};
diff --git a/TestMothur/dataset.h b/TestMothur/dataset.h
index e725922..3160b4c 100644
--- a/TestMothur/dataset.h
+++ b/TestMothur/dataset.h
@@ -29,7 +29,7 @@ public:
vector<FastqRead> getReverseFastq() { return fastqData.getReverseFastq(); }
vector<string> getSubsetFRFastq(int n) { return fastqData.getSubsetFRFastq(n); }
- vector<string> getSubsetFNGFiles(int); //number of uniques, Fasta, name, group returned
+ vector<string> getSubsetFNGFiles(int); //number of uniques passed in. 3 files returned -> Fasta, name, group.
private:
MothurOut* m;
diff --git a/TestMothur/testcommands/testrenamefilecommand.cpp b/TestMothur/testcommands/testrenamefilecommand.cpp
new file mode 100644
index 0000000..8d0b5aa
--- /dev/null
+++ b/TestMothur/testcommands/testrenamefilecommand.cpp
@@ -0,0 +1,97 @@
+//
+// testrenamefilecommand.cpp
+// Mothur
+//
+// Created by Sarah Westcott on 5/4/16.
+// Copyright (c) 2016 Schloss Lab. All rights reserved.
+//
+
+#include "testrenamefilecommand.h"
+#include "dataset.h"
+#include "catch.hpp"
+
+/**************************************************************************************************/
+TestRenameFileCommand::TestRenameFileCommand() { //setup
+ m = MothurOut::getInstance();
+ TestDataSet data;
+ filenames = data.getSubsetFNGFiles(100);
+}
+/**************************************************************************************************/
+TestRenameFileCommand::~TestRenameFileCommand() {
+ for (int i = 0; i < filenames.size(); i++) { m->mothurRemove(filenames[i]); } //teardown
+}
+/**************************************************************************************************/
+TEST_CASE("Testing RenameFileCommand Class") {
+ TestRenameFileCommand testRename;
+
+ SECTION("Testing GetNewName - with prefix") {
+ INFO("Using prefix=greatData") // Only appears on a FAIL
+
+ testRename.prefix = "greatData";
+ testRename.mothurGenerated = true;
+
+ CAPTURE(testRename.getNewName(testRename.filenames[0], "fasta")); // Displays this variable on a FAIL
+
+ CHECK(testRename.getNewName(testRename.filenames[0], "fasta") == "greatData.txt");
+
+ testRename.filenames[0] = testRename.getNewName(testRename.filenames[0], "fasta"); //for teardown
+ }
+
+ SECTION("Testing GetNewName - with user name") {
+ INFO("Using prefix=greatData") // Only appears on a FAIL
+
+ testRename.outputfile = "greatData.fasta";
+ testRename.mothurGenerated = false;
+
+ CAPTURE(testRename.getNewName(testRename.filenames[0], "fasta")); // Displays this variable on a FAIL
+
+ CHECK(testRename.getNewName(testRename.filenames[0], "fasta") == "greatData.fasta");
+
+ testRename.filenames[0] = testRename.getNewName(testRename.filenames[0], "fasta"); //for teardown
+ }
+
+
+ SECTION("Testing RenameOrCopy - deleteOld=false") {
+ INFO("Uses mothur rename function to move or system command to copy.") // Only appears on a FAIL
+
+ testRename.deleteOld = false;
+
+ testRename.renameOrCopy(testRename.filenames[0], "greatData.new.fasta");
+
+ ifstream in, in2;
+ int ableToOpen = testRename.m->openInputFile("greatData.new.fasta", in);
+ in.close();
+
+ CAPTURE(ableToOpen);
+
+ CHECK(ableToOpen == 0);
+
+ int ableToOpen2 = testRename.m->openInputFile(testRename.filenames[0], in2);
+ in2.close();
+
+ CAPTURE(ableToOpen2);
+
+ CHECK(ableToOpen2 == 0);
+
+ testRename.m->mothurRemove("greatData.new.fasta");
+ }
+
+ SECTION("Testing RenameOrCopy - deleteOld=true") {
+ INFO("Uses mothur rename function to move or system command to copy.") // Only appears on a FAIL
+
+ testRename.deleteOld = true;
+
+ testRename.renameOrCopy(testRename.filenames[0], "greatData.new.fasta");
+
+ ifstream in, in2;
+ int ableToOpen = testRename.m->openInputFile("greatData.new.fasta", in);
+ in.close();
+
+ CAPTURE(ableToOpen);
+
+ CHECK(ableToOpen == 0);
+
+ testRename.filenames[0] = testRename.getNewName(testRename.filenames[0], "fasta"); //for teardown
+ }
+}
+/**************************************************************************************************/
diff --git a/TestMothur/testcommands/testrenamefilecommand.h b/TestMothur/testcommands/testrenamefilecommand.h
new file mode 100644
index 0000000..6f41685
--- /dev/null
+++ b/TestMothur/testcommands/testrenamefilecommand.h
@@ -0,0 +1,40 @@
+//
+// testrenamefilecommand.h
+// Mothur
+//
+// Created by Sarah Westcott on 5/4/16.
+// Copyright (c) 2016 Schloss Lab. All rights reserved.
+//
+
+#ifndef __Mothur__testrenamefilecommand__
+#define __Mothur__testrenamefilecommand__
+
+#include "renamefilecommand.h"
+
+class TestRenameFileCommand : public RenameFileCommand {
+
+public:
+
+ TestRenameFileCommand();
+ ~TestRenameFileCommand();
+
+
+ MothurOut* m;
+ vector<string> filenames;
+
+ //private functions
+ using RenameFileCommand::getNewName;
+ using RenameFileCommand::renameOrCopy;
+
+ //private variables
+ using RenameFileCommand::prefix;
+ using RenameFileCommand::mothurGenerated;
+ using RenameFileCommand::outputfile;
+ using RenameFileCommand::deleteOld;
+
+
+
+};
+
+
+#endif /* defined(__Mothur__testrenamefilecommand__) */
diff --git a/TestMothur/testtrimoligos.cpp b/TestMothur/testtrimoligos.cpp
new file mode 100644
index 0000000..64b747b
--- /dev/null
+++ b/TestMothur/testtrimoligos.cpp
@@ -0,0 +1,43 @@
+//
+// testtrimoligos.cpp
+// Mothur
+//
+// Created by Sarah Westcott on 7/14/16.
+// Copyright © 2016 Schloss Lab. All rights reserved.
+//
+
+#include "catch.hpp"
+#include "testtrimoligos.hpp"
+
+/**************************************************************************************************/
+TestTrimOligos::TestTrimOligos() { //setup
+ m = MothurOut::getInstance();
+
+ //set up barcodes, primers, pairedBarcodes and pairedPrimers
+
+ //Set up vector of seqs with barcodes and primers
+
+ //Set up vector of seqs with pairedbarcodes and pairedprimers
+
+}
+/**************************************************************************************************/
+TestTrimOligos::~TestTrimOligos() {
+
+}
+/**************************************************************************************************/
+
+TEST_CASE("Testing TrimOligos Class") {
+ TestTrimOligos testTrim;
+
+
+ //Create trimoligos classes with various constructors
+
+
+ //run all public strip functions
+
+
+ //run private functions
+
+
+}
+/**************************************************************************************************/
diff --git a/TestMothur/testtrimoligos.hpp b/TestMothur/testtrimoligos.hpp
new file mode 100644
index 0000000..e150c3d
--- /dev/null
+++ b/TestMothur/testtrimoligos.hpp
@@ -0,0 +1,38 @@
+//
+// testtrimoligos.hpp
+// Mothur
+//
+// Created by Sarah Westcott on 7/14/16.
+// Copyright © 2016 Schloss Lab. All rights reserved.
+//
+
+#ifndef testtrimoligos_hpp
+#define testtrimoligos_hpp
+
+#include "trimoligos.h"
+#include "sequence.hpp"
+
+class TestTrimOligos : public TrimOligos {
+
+
+public:
+
+ TestTrimOligos();
+ ~TestTrimOligos();
+
+ MothurOut* m;
+ vector<Sequence> seqs;
+ vector<Sequence> pairedSeqs;
+
+ map<string, int> barcodes;
+ map<string, int> primers;
+ map<int, oligosPair> pairedPrimers;
+ map<int, oligosPair> pairedBarcodes;
+
+ using TrimOligos::compareDNASeq(string, string);
+ using TrimOligos::countDiffs(string, string);
+
+};
+
+
+#endif /* testtrimoligos_hpp */
diff --git a/makefile-internal b/makefile-internal
old mode 100755
new mode 100644
index e826802..f7bf6f8
--- a/makefile-internal
+++ b/makefile-internal
@@ -1,8 +1,8 @@
64BIT_VERSION ?= yes
USEREADLINE ?= yes
USEBOOST ?= yes
-RELEASE_DATE = "\"5/11/2016\""
-VERSION = "\"1.37.4\""
+RELEASE_DATE = "\"7/20/2016\""
+VERSION = "\"1.38.0\""
# Optimize to level 3:
CXXFLAGS += -O3
diff --git a/source/averagelinkage.cpp b/source/averagelinkage.cpp
index 8627253..7942407 100644
--- a/source/averagelinkage.cpp
+++ b/source/averagelinkage.cpp
@@ -2,10 +2,7 @@
#define AVERAGE_H
//test
-#include "mothur.h"
#include "cluster.hpp"
-#include "rabundvector.hpp"
-#include "sparsedistancematrix.h"
/* This class implements the average UPGMA, average neighbor clustering algorithm */
diff --git a/source/chimera/mothurchimera.cpp b/source/chimera/mothurchimera.cpp
index 58c920a..3ec2757 100644
--- a/source/chimera/mothurchimera.cpp
+++ b/source/chimera/mothurchimera.cpp
@@ -8,7 +8,6 @@
*/
#include "mothurchimera.h"
-#include "referencedb.h"
//***************************************************************************************************************
//this is a vertical soft filter
@@ -100,53 +99,32 @@ vector<Sequence*> MothurChimera::readSeqs(string file) {
int count = 0;
length = 0;
unaligned = false;
- ReferenceDB* rdb = ReferenceDB::getInstance();
- if (file == "saved") {
-
-
- m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory."); m->mothurOutEndLine();
-
- for (int i = 0; i < rdb->referenceSeqs.size(); i++) {
- Sequence* temp = new Sequence(rdb->referenceSeqs[i].getName(), rdb->referenceSeqs[i].getAligned());
-
- if (count == 0) { length = temp->getAligned().length(); count++; } //gets first seqs length
- else if (length != temp->getAligned().length()) { unaligned = true; }
-
- if (temp->getName() != "") { container.push_back(temp); }
- }
-
- templateFileName = rdb->getSavedReference();
-
- }else {
-
- m->mothurOut("Reading sequences from " + file + "..."); cout.flush();
-
-
- ifstream in;
- m->openInputFile(file, in);
-
- //read in seqs and store in vector
- while(!in.eof()){
-
- if (m->control_pressed) { return container; }
-
- Sequence* current = new Sequence(in); m->gobble(in);
-
- if (count == 0) { length = current->getAligned().length(); count++; } //gets first seqs length
- else if (length != current->getAligned().length()) { unaligned = true; }
-
- if (current->getName() != "") {
- container.push_back(current);
- if (rdb->save) { rdb->referenceSeqs.push_back(*current); }
- }
- }
- in.close();
+
+ m->mothurOut("Reading sequences from " + file + "..."); cout.flush();
+
+
+ ifstream in;
+ m->openInputFile(file, in);
+
+ //read in seqs and store in vector
+ while(!in.eof()){
+
+ if (m->control_pressed) { return container; }
+
+ Sequence* current = new Sequence(in); m->gobble(in);
+
+ if (count == 0) { length = current->getAligned().length(); count++; } //gets first seqs length
+ else if (length != current->getAligned().length()) { unaligned = true; }
+
+ if (current->getName() != "") { container.push_back(current); }
+ }
+ in.close();
+
+ m->mothurOut("Done."); m->mothurOutEndLine();
+
+ filterString = (string(container[0]->getAligned().length(), '1'));
- m->mothurOut("Done."); m->mothurOutEndLine();
-
- filterString = (string(container[0]->getAligned().length(), '1'));
- }
return container;
}
diff --git a/source/classifier/bayesian.cpp b/source/classifier/bayesian.cpp
index a0ece69..55589f4 100644
--- a/source/classifier/bayesian.cpp
+++ b/source/classifier/bayesian.cpp
@@ -10,23 +10,18 @@
#include "bayesian.h"
#include "kmer.hpp"
#include "phylosummary.h"
-#include "referencedb.h"
+
/**************************************************************************************************/
Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff, int i, int tid, bool f, bool sh) :
Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) {
try {
- ReferenceDB* rdb = ReferenceDB::getInstance();
threadID = tid;
flip = f;
shortcuts = sh;
string baseName = tempFile;
-
- if (baseName == "saved") { baseName = rdb->getSavedReference(); }
-
string baseTName = tfile;
- if (baseTName == "saved") { baseTName = rdb->getSavedTaxonomy(); }
-
+
/************calculate the probablity that each word will be in a specific taxonomy*************/
string tfileroot = m->getFullPathName(baseTName.substr(0,baseTName.find_last_of(".")+1));
string tempfileroot = m->getRootName(m->getSimpleName(baseName));
@@ -50,23 +45,8 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) {
if(probFileTest && probFileTest2 && phyloTreeTest && probFileTest3){
FilesGood = checkReleaseDate(probFileTest, probFileTest2, phyloTreeTest, probFileTest3);
}
-
- //if you want to save, but you dont need to calculate then just read
- if (rdb->save && probFileTest && probFileTest2 && phyloTreeTest && probFileTest3 && FilesGood && (tempFile != "saved")) {
- ifstream saveIn;
- m->openInputFile(tempFile, saveIn);
-
- while (!saveIn.eof()) {
- Sequence temp(saveIn);
- m->gobble(saveIn);
-
- rdb->referenceSeqs.push_back(temp);
- }
- saveIn.close();
- }
- if(probFileTest && probFileTest2 && phyloTreeTest && probFileTest3 && FilesGood){
- if (tempFile == "saved") { m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory."); m->mothurOutEndLine(); }
+ if(probFileTest && probFileTest2 && phyloTreeTest && probFileTest3 && FilesGood){
m->mothurOut("Reading template taxonomy... "); cout.flush();
@@ -78,18 +58,10 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) {
genusNodes = phyloTree->getGenusNodes();
genusTotals = phyloTree->getGenusTotals();
- if (tfile == "saved") {
- m->mothurOutEndLine(); m->mothurOut("Using probabilties from " + rdb->getSavedTaxonomy() + " that are saved in memory... "); cout.flush();;
- wordGenusProb = rdb->wordGenusProb;
- WordPairDiffArr = rdb->WordPairDiffArr;
- }else {
- m->mothurOut("Reading template probabilities... "); cout.flush();
- readProbFile(probFileTest, probFileTest2, probFileName, probFileName2);
- }
+ m->mothurOut("Reading template probabilities... "); cout.flush();
+ readProbFile(probFileTest, probFileTest2, probFileName, probFileName2);
- //save probabilities
- if (rdb->save) { rdb->wordGenusProb = wordGenusProb; rdb->WordPairDiffArr = WordPairDiffArr; }
- }else{
+ }else{
//create search database and names vector
generateDatabaseAndNames(tfile, tempFile, method, ksize, 0.0, 0.0, 0.0, 0.0);
@@ -187,19 +159,13 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) {
phyloTree = new PhyloTree(phyloTreeTest, phyloTreeName);
maxLevel = phyloTree->getMaxLevel();
-
- //save probabilities
- if (rdb->save) { rdb->wordGenusProb = wordGenusProb; rdb->WordPairDiffArr = WordPairDiffArr; }
}
}
if (m->debug) { m->mothurOut("[DEBUG]: about to generateWordPairDiffArr\n"); }
generateWordPairDiffArr();
if (m->debug) { m->mothurOut("[DEBUG]: done generateWordPairDiffArr\n"); }
-
- //save probabilities
- if (rdb->save) { rdb->wordGenusProb = wordGenusProb; rdb->WordPairDiffArr = WordPairDiffArr; }
-
+
m->mothurOut("DONE."); m->mothurOutEndLine();
m->mothurOut("It took " + toString(time(NULL) - start) + " seconds get probabilities. "); m->mothurOutEndLine();
}
@@ -442,34 +408,7 @@ int Bayesian::generateWordPairDiffArr(){
exit(1);
}
}
-/*************************************************************************************************
-map<string, int> Bayesian::parseTaxMap(string newTax) {
- try{
-
- map<string, int> parsed;
-
- newTax = newTax.substr(0, newTax.length()-1); //get rid of last ';'
-
- //parse taxonomy
- string individual;
- while (newTax.find_first_of(';') != -1) {
- individual = newTax.substr(0,newTax.find_first_of(';'));
- newTax = newTax.substr(newTax.find_first_of(';')+1, newTax.length());
- parsed[individual] = 1;
- }
-
- //get last one
- parsed[newTax] = 1;
-
- return parsed;
-
- }
- catch(exception& e) {
- m->errorOut(e, "Bayesian", "parseTax");
- exit(1);
- }
-}
-**************************************************************************************************/
+/**************************************************************************************************/
void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string inNumName) {
try{
diff --git a/source/classifier/classify.cpp b/source/classifier/classify.cpp
index 26c13df..3bec5d1 100644
--- a/source/classifier/classify.cpp
+++ b/source/classifier/classify.cpp
@@ -13,148 +13,94 @@
#include "suffixdb.hpp"
#include "blastdb.hpp"
#include "distancedb.hpp"
-#include "referencedb.h"
/**************************************************************************************************/
void Classify::generateDatabaseAndNames(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch) {
try {
maxLevel = 0;
- ReferenceDB* rdb = ReferenceDB::getInstance();
-
- if (tfile == "saved") { tfile = rdb->getSavedTaxonomy(); }
-
taxFile = tfile;
int numSeqs = 0;
-
- if (tempFile == "saved") {
- int start = time(NULL);
- m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory."); m->mothurOutEndLine();
-
- numSeqs = rdb->referenceSeqs.size();
- templateFile = rdb->getSavedReference();
- tempFile = rdb->getSavedReference();
-
- bool needToGenerate = true;
- string kmerDBName;
- if(method == "kmer") {
- database = new KmerDB(tempFile, kmerSize);
-
- kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
- ifstream kmerFileTest(kmerDBName.c_str());
- if(kmerFileTest){
- bool GoodFile = m->checkReleaseVersion(kmerFileTest, m->getVersion());
- if (GoodFile) { needToGenerate = false; }
- }
- }
- else if(method == "suffix") { database = new SuffixDB(numSeqs); }
- else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, "", threadID); }
- else if(method == "distance") { database = new DistanceDB(); }
- else {
- m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
- m->mothurOutEndLine();
- database = new KmerDB(tempFile, 8);
- }
-
- if (needToGenerate) {
- for (int k = 0; k < rdb->referenceSeqs.size(); k++) {
- Sequence temp(rdb->referenceSeqs[k].getName(), rdb->referenceSeqs[k].getAligned());
- names.push_back(temp.getName());
- database->addSequence(temp);
- }
- if ((method == "kmer") && (!shortcuts)) {;} //don't print
- else {database->generateDB(); }
- }else if ((method == "kmer") && (!needToGenerate)) {
- ifstream kmerFileTest(kmerDBName.c_str());
- database->readKmerDB(kmerFileTest);
-
- for (int k = 0; k < rdb->referenceSeqs.size(); k++) {
- names.push_back(rdb->referenceSeqs[k].getName());
- }
- }
-
- database->setNumSeqs(numSeqs);
-
- m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences and generate the search databases.");m->mothurOutEndLine();
-
- }else {
-
- templateFile = tempFile;
-
- int start = time(NULL);
-
- m->mothurOut("Generating search database... "); cout.flush();
-
- //need to know number of template seqs for suffixdb
- if (method == "suffix") {
- ifstream inFASTA;
- m->openInputFile(tempFile, inFASTA);
- m->getNumSeqs(inFASTA, numSeqs);
- inFASTA.close();
- }
- bool needToGenerate = true;
- string kmerDBName;
- if(method == "kmer") {
- database = new KmerDB(tempFile, kmerSize);
-
- kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
- ifstream kmerFileTest(kmerDBName.c_str());
- if(kmerFileTest){
- bool GoodFile = m->checkReleaseVersion(kmerFileTest, m->getVersion());
- if (GoodFile) { needToGenerate = false; }
- }
- }
- else if(method == "suffix") { database = new SuffixDB(numSeqs); }
- else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, "", threadID); }
- else if(method == "distance") { database = new DistanceDB(); }
- else {
- m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
- m->mothurOutEndLine();
- database = new KmerDB(tempFile, 8);
- }
-
- if (needToGenerate) {
- ifstream fastaFile;
- m->openInputFile(tempFile, fastaFile);
-
- while (!fastaFile.eof()) {
- Sequence temp(fastaFile);
- m->gobble(fastaFile);
-
- if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
-
- names.push_back(temp.getName());
-
- database->addSequence(temp);
- }
- fastaFile.close();
+ templateFile = tempFile;
+
+ int start = time(NULL);
+
+ m->mothurOut("Generating search database... "); cout.flush();
+
+ //need to know number of template seqs for suffixdb
+ if (method == "suffix") {
+ ifstream inFASTA;
+ m->openInputFile(tempFile, inFASTA);
+ m->getNumSeqs(inFASTA, numSeqs);
+ inFASTA.close();
+ }
+
+ bool needToGenerate = true;
+ string kmerDBName;
+ if(method == "kmer") {
+ database = new KmerDB(tempFile, kmerSize);
+
+ kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
+ ifstream kmerFileTest(kmerDBName.c_str());
+ if(kmerFileTest){
+ bool GoodFile = m->checkReleaseVersion(kmerFileTest, m->getVersion());
+ int shortcutTimeStamp = m->getTimeStamp(kmerDBName);
+ int referenceTimeStamp = m->getTimeStamp(tempFile);
+
+ //if the shortcut file is older then the reference file, remake shortcut file
+ if (shortcutTimeStamp < referenceTimeStamp) { GoodFile = false; }
- if ((method == "kmer") && (!shortcuts)) {;} //don't print
- else {database->generateDB(); }
-
- }else if ((method == "kmer") && (!needToGenerate)) {
- ifstream kmerFileTest(kmerDBName.c_str());
- database->readKmerDB(kmerFileTest);
-
- ifstream fastaFile;
- m->openInputFile(tempFile, fastaFile);
-
- while (!fastaFile.eof()) {
- Sequence temp(fastaFile);
- m->gobble(fastaFile);
-
- if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
- names.push_back(temp.getName());
- }
- fastaFile.close();
- }
-
- database->setNumSeqs(names.size());
-
- m->mothurOut("DONE."); m->mothurOutEndLine();
- m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine();
- }
+ if (GoodFile) { needToGenerate = false; }
+ }
+ }
+ else if(method == "suffix") { database = new SuffixDB(numSeqs); }
+ else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, "", threadID); }
+ else if(method == "distance") { database = new DistanceDB(); }
+ else {
+ m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
+ m->mothurOutEndLine();
+ database = new KmerDB(tempFile, 8);
+ }
+
+ if (needToGenerate) {
+ ifstream fastaFile;
+ m->openInputFile(tempFile, fastaFile);
+
+ while (!fastaFile.eof()) {
+ Sequence temp(fastaFile);
+ m->gobble(fastaFile);
+
+ names.push_back(temp.getName());
+
+ database->addSequence(temp);
+ }
+ fastaFile.close();
+
+ if ((method == "kmer") && (!shortcuts)) {;} //don't print
+ else {database->generateDB(); }
+
+ }else if ((method == "kmer") && (!needToGenerate)) {
+ ifstream kmerFileTest(kmerDBName.c_str());
+ database->readKmerDB(kmerFileTest);
+
+ ifstream fastaFile;
+ m->openInputFile(tempFile, fastaFile);
+
+ while (!fastaFile.eof()) {
+ Sequence temp(fastaFile);
+ m->gobble(fastaFile);
+
+ names.push_back(temp.getName());
+ }
+ fastaFile.close();
+ }
+
+ database->setNumSeqs(names.size());
+
+ m->mothurOut("DONE."); m->mothurOutEndLine();
+ m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine();
+
readTaxonomy(taxFile);
diff --git a/source/classifier/phylosummary.cpp b/source/classifier/phylosummary.cpp
index 695d8e2..3d12efc 100644
--- a/source/classifier/phylosummary.cpp
+++ b/source/classifier/phylosummary.cpp
@@ -8,7 +8,6 @@
*/
#include "phylosummary.h"
-#include "referencedb.h"
/**************************************************************************************************/
PhyloSummary::PhyloSummary(string refTfile, CountTable* c, bool r, int p){
@@ -24,7 +23,6 @@ PhyloSummary::PhyloSummary(string refTfile, CountTable* c, bool r, int p){
groupmap = NULL;
//check for necessary files
- if (refTfile == "saved") { ReferenceDB* rdb = ReferenceDB::getInstance(); refTfile = rdb->getSavedTaxonomy(); }
string taxFileNameTest = m->getFullPathName((refTfile.substr(0,refTfile.find_last_of(".")+1) + "tree.sum"));
ifstream FileTest(taxFileNameTest.c_str());
@@ -80,7 +78,6 @@ PhyloSummary::PhyloSummary(string refTfile, GroupMap* g, bool r, int p){
ct = NULL;
//check for necessary files
- if (refTfile == "saved") { ReferenceDB* rdb = ReferenceDB::getInstance(); refTfile = rdb->getSavedTaxonomy(); }
string taxFileNameTest = m->getFullPathName((refTfile.substr(0,refTfile.find_last_of(".")+1) + "tree.sum"));
ifstream FileTest(taxFileNameTest.c_str());
diff --git a/source/classifier/phylotree.cpp b/source/classifier/phylotree.cpp
index ac8980b..1985725 100644
--- a/source/classifier/phylotree.cpp
+++ b/source/classifier/phylotree.cpp
@@ -186,7 +186,7 @@ int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
map<string, int>::iterator childPointer;
int currentNode = 0;
- int level = 1;
+ int level = 0;
tree[0].accessions.push_back(seqName);
m->removeConfidences(seqTaxonomy);
diff --git a/source/cluster.hpp b/source/cluster.hpp
index 0c436b9..846c6f4 100644
--- a/source/cluster.hpp
+++ b/source/cluster.hpp
@@ -6,9 +6,11 @@
#include "mothur.h"
#include "sparsedistancematrix.h"
#include "mothurout.h"
+#include "rabundvector.hpp"
+#include "listvector.hpp"
-class RAbundVector;
-class ListVector;
+//class RAbundVector;
+//class ListVector;
class Cluster {
diff --git a/source/commandfactory.cpp b/source/commandfactory.cpp
index eab30c4..bec2db3 100644
--- a/source/commandfactory.cpp
+++ b/source/commandfactory.cpp
@@ -50,7 +50,6 @@
#include "secondarystructurecommand.h"
#include "getsharedotucommand.h"
#include "getlistcountcommand.h"
-#include "hclustercommand.h"
#include "classifyseqscommand.h"
#include "phylotypecommand.h"
#include "mgclustercommand.h"
@@ -86,7 +85,6 @@
#include "getlineagecommand.h"
#include "removelineagecommand.h"
#include "parsefastaqcommand.h"
-#include "pipelinepdscommand.h"
#include "deuniqueseqscommand.h"
#include "pairwiseseqscommand.h"
#include "clusterdoturcommand.h"
@@ -114,7 +112,6 @@
#include "deuniquetreecommand.h"
#include "countseqscommand.h"
#include "countgroupscommand.h"
-#include "clearmemorycommand.h"
#include "summarytaxcommand.h"
#include "chimeraperseuscommand.h"
#include "shhhseqscommand.h"
@@ -153,6 +150,8 @@
#include "setseedcommand.h"
#include "makefilecommand.h"
#include "biominfocommand.h"
+#include "renamefilecommand.h"
+#include "chimeravsearchcommand.h"
//needed for testing project
//CommandFactory* CommandFactory::_uniqueInstance;
@@ -217,7 +216,6 @@ CommandFactory::CommandFactory(){
commands["align.check"] = "align.check";
commands["get.sharedseqs"] = "get.sharedseqs";
commands["get.otulist"] = "get.otulist";
- commands["hcluster"] = "hcluster";
commands["phylotype"] = "phylotype";
commands["mgcluster"] = "mgcluster";
commands["pre.cluster"] = "pre.cluster";
@@ -269,9 +267,7 @@ CommandFactory::CommandFactory(){
commands["deunique.tree"] = "deunique.tree";
commands["count.seqs"] = "count.seqs";
commands["count.groups"] = "count.groups";
- commands["clear.memory"] = "clear.memory";
commands["pairwise.seqs"] = "pairwise.seqs";
- commands["pipeline.pds"] = "pipeline.pds";
commands["classify.seqs"] = "classify.seqs";
commands["dist.seqs"] = "dist.seqs";
commands["filter.seqs"] = "filter.seqs";
@@ -283,6 +279,7 @@ CommandFactory::CommandFactory(){
commands["chimera.perseus"] = "chimera.perseus";
commands["chimera.pintail"] = "chimera.pintail";
commands["chimera.bellerophon"] = "chimera.bellerophon";
+ commands["chimera.vsearch"] = "chimera.vsearch";
commands["screen.seqs"] = "screen.seqs";
commands["summary.seqs"] = "summary.seqs";
commands["cluster.split"] = "cluster.split";
@@ -308,7 +305,7 @@ CommandFactory::CommandFactory(){
commands["make.table"] = "make.table";
commands["sff.multiple"] = "sff.multiple";
commands["quit"] = "quit";
- commands["classify.rf"] = "classify.rf";
+ //commands["classify.rf"] = "classify.rf";
commands["classify.svm"] = "classify.svm";
commands["filter.shared"] = "filter.shared";
commands["primer.design"] = "primer.design";
@@ -329,6 +326,7 @@ CommandFactory::CommandFactory(){
commands["make.file"] = "make.file";
commands["biom.info"] = "biom.info";
commands["set.seed"] = "set.seed";
+ commands["rename.file"] = "rename.file";
}
@@ -476,7 +474,6 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
else if(commandName == "align.check") { command = new AlignCheckCommand(optionString); }
else if(commandName == "get.sharedseqs") { command = new GetSharedOTUCommand(optionString); }
else if(commandName == "get.otulist") { command = new GetListCountCommand(optionString); }
- else if(commandName == "hcluster") { command = new HClusterCommand(optionString); }
else if(commandName == "classify.seqs") { command = new ClassifySeqsCommand(optionString); }
else if(commandName == "chimera.ccode") { command = new ChimeraCcodeCommand(optionString); }
else if(commandName == "chimera.check") { command = new ChimeraCheckCommand(optionString); }
@@ -484,6 +481,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
else if(commandName == "chimera.uchime") { command = new ChimeraUchimeCommand(optionString); }
else if(commandName == "chimera.pintail") { command = new ChimeraPintailCommand(optionString); }
else if(commandName == "chimera.bellerophon") { command = new ChimeraBellerophonCommand(optionString); }
+ else if(commandName == "chimera.vsearch") { command = new ChimeraVsearchCommand(optionString); }
else if(commandName == "phylotype") { command = new PhylotypeCommand(optionString); }
else if(commandName == "mgcluster") { command = new MGClusterCommand(optionString); }
else if(commandName == "pre.cluster") { command = new PreClusterCommand(optionString); }
@@ -519,7 +517,6 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
else if((commandName == "remove.otus") || (commandName == "remove.otulabels")) { command = new RemoveOtuLabelsCommand(optionString); }
else if((commandName == "list.otus") ||(commandName == "list.otulabels")) { command = new ListOtuLabelsCommand(optionString); }
else if(commandName == "fastq.info") { command = new ParseFastaQCommand(optionString); }
- else if(commandName == "pipeline.pds") { command = new PipelineCommand(optionString); }
else if(commandName == "deunique.seqs") { command = new DeUniqueSeqsCommand(optionString); }
else if(commandName == "pairwise.seqs") { command = new PairwiseSeqsCommand(optionString); }
else if(commandName == "cluster.classic") { command = new ClusterDoturCommand(optionString); }
@@ -541,7 +538,6 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
else if(commandName == "deunique.tree") { command = new DeuniqueTreeCommand(optionString); }
else if((commandName == "count.seqs") || (commandName == "make.table")) { command = new CountSeqsCommand(optionString); }
else if(commandName == "count.groups") { command = new CountGroupsCommand(optionString); }
- else if(commandName == "clear.memory") { command = new ClearMemoryCommand(optionString); }
else if(commandName == "summary.tax") { command = new SummaryTaxCommand(optionString); }
else if(commandName == "summary.qual") { command = new SummaryQualCommand(optionString); }
else if(commandName == "chimera.perseus") { command = new ChimeraPerseusCommand(optionString); }
@@ -577,6 +573,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
else if(commandName == "set.seed") { command = new SetSeedCommand(optionString); }
else if(commandName == "make.file") { command = new MakeFileCommand(optionString); }
else if(commandName == "biom.info") { command = new BiomInfoCommand(optionString); }
+ else if(commandName == "rename.file") { command = new RenameFileCommand(optionString); }
else { command = new NoCommand(optionString); }
return command;
@@ -650,7 +647,6 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
else if(commandName == "align.check") { pipecommand = new AlignCheckCommand(optionString); }
else if(commandName == "get.sharedseqs") { pipecommand = new GetSharedOTUCommand(optionString); }
else if(commandName == "get.otulist") { pipecommand = new GetListCountCommand(optionString); }
- else if(commandName == "hcluster") { pipecommand = new HClusterCommand(optionString); }
else if(commandName == "classify.seqs") { pipecommand = new ClassifySeqsCommand(optionString); }
else if(commandName == "chimera.ccode") { pipecommand = new ChimeraCcodeCommand(optionString); }
else if(commandName == "chimera.check") { pipecommand = new ChimeraCheckCommand(optionString); }
@@ -658,6 +654,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
else if(commandName == "chimera.slayer") { pipecommand = new ChimeraSlayerCommand(optionString); }
else if(commandName == "chimera.pintail") { pipecommand = new ChimeraPintailCommand(optionString); }
else if(commandName == "chimera.bellerophon") { pipecommand = new ChimeraBellerophonCommand(optionString); }
+ else if(commandName == "chimera.vsearch") { pipecommand = new ChimeraVsearchCommand(optionString); }
else if(commandName == "phylotype") { pipecommand = new PhylotypeCommand(optionString); }
else if(commandName == "mgcluster") { pipecommand = new MGClusterCommand(optionString); }
else if(commandName == "pre.cluster") { pipecommand = new PreClusterCommand(optionString); }
@@ -714,7 +711,6 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
else if(commandName == "deunique.tree") { pipecommand = new DeuniqueTreeCommand(optionString); }
else if((commandName == "count.seqs") || (commandName == "make.table")) { pipecommand = new CountSeqsCommand(optionString); }
else if(commandName == "count.groups") { pipecommand = new CountGroupsCommand(optionString); }
- else if(commandName == "clear.memory") { pipecommand = new ClearMemoryCommand(optionString); }
else if(commandName == "summary.tax") { pipecommand = new SummaryTaxCommand(optionString); }
else if(commandName == "summary.qual") { pipecommand = new SummaryQualCommand(optionString); }
else if(commandName == "chimera.perseus") { pipecommand = new ChimeraPerseusCommand(optionString); }
@@ -750,6 +746,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
else if(commandName == "set.seed") { pipecommand = new SetSeedCommand(optionString); }
else if(commandName == "make.file") { pipecommand = new MakeFileCommand(optionString); }
else if(commandName == "biom.info") { pipecommand = new BiomInfoCommand(optionString); }
+ else if(commandName == "rename.file") { pipecommand = new RenameFileCommand(optionString); }
else { pipecommand = new NoCommand(optionString); }
return pipecommand;
@@ -809,7 +806,6 @@ Command* CommandFactory::getCommand(string commandName){
else if(commandName == "align.check") { shellcommand = new AlignCheckCommand(); }
else if(commandName == "get.sharedseqs") { shellcommand = new GetSharedOTUCommand(); }
else if(commandName == "get.otulist") { shellcommand = new GetListCountCommand(); }
- else if(commandName == "hcluster") { shellcommand = new HClusterCommand(); }
else if(commandName == "classify.seqs") { shellcommand = new ClassifySeqsCommand(); }
else if(commandName == "chimera.ccode") { shellcommand = new ChimeraCcodeCommand(); }
else if(commandName == "chimera.check") { shellcommand = new ChimeraCheckCommand(); }
@@ -817,6 +813,7 @@ Command* CommandFactory::getCommand(string commandName){
else if(commandName == "chimera.uchime") { shellcommand = new ChimeraUchimeCommand(); }
else if(commandName == "chimera.pintail") { shellcommand = new ChimeraPintailCommand(); }
else if(commandName == "chimera.bellerophon") { shellcommand = new ChimeraBellerophonCommand(); }
+ else if(commandName == "chimera.vsearch") { shellcommand = new ChimeraVsearchCommand(); }
else if(commandName == "phylotype") { shellcommand = new PhylotypeCommand(); }
else if(commandName == "mgcluster") { shellcommand = new MGClusterCommand(); }
else if(commandName == "pre.cluster") { shellcommand = new PreClusterCommand(); }
@@ -873,7 +870,6 @@ Command* CommandFactory::getCommand(string commandName){
else if(commandName == "deunique.tree") { shellcommand = new DeuniqueTreeCommand(); }
else if((commandName == "count.seqs") || (commandName == "make.table")) { shellcommand = new CountSeqsCommand(); }
else if(commandName == "count.groups") { shellcommand = new CountGroupsCommand(); }
- else if(commandName == "clear.memory") { shellcommand = new ClearMemoryCommand(); }
else if(commandName == "summary.tax") { shellcommand = new SummaryTaxCommand(); }
else if(commandName == "summary.qual") { shellcommand = new SummaryQualCommand(); }
else if(commandName == "chimera.perseus") { shellcommand = new ChimeraPerseusCommand(); }
@@ -909,6 +905,7 @@ Command* CommandFactory::getCommand(string commandName){
else if(commandName == "set.seed") { shellcommand = new SetSeedCommand(); }
else if(commandName == "make.file") { shellcommand = new MakeFileCommand(); }
else if(commandName == "biom.info") { shellcommand = new BiomInfoCommand(); }
+ else if(commandName == "rename.file") { pipecommand = new RenameFileCommand(); }
else { shellcommand = new NoCommand(); }
return shellcommand;
diff --git a/source/commands/aligncommand.cpp b/source/commands/aligncommand.cpp
index e7fb7eb..33190ec 100644
--- a/source/commands/aligncommand.cpp
+++ b/source/commands/aligncommand.cpp
@@ -15,7 +15,6 @@
*/
#include "aligncommand.h"
-#include "referencedb.h"
//**********************************************************************************************************************
vector<string> AlignCommand::setParameters(){
@@ -31,7 +30,6 @@ vector<string> AlignCommand::setParameters(){
CommandParameter pgapextend("gapextend", "Number", "", "-2.0", "", "", "","",false,false); parameters.push_back(pgapextend);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
CommandParameter pflip("flip", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pflip);
- CommandParameter psave("save", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psave);
CommandParameter pthreshold("threshold", "Number", "", "0.50", "", "", "","",false,false); parameters.push_back(pthreshold);
CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
@@ -63,7 +61,6 @@ string AlignCommand::getHelpString(){
helpString += "The flip parameter is used to specify whether or not you want mothur to try the reverse complement if a sequence falls below the threshold. The default is false.";
helpString += "The threshold is used to specify a cutoff at which an alignment is deemed 'bad' and the reverse complement may be tried. The default threshold is 0.50, meaning 50% of the bases are removed in the alignment.";
helpString += "If the flip parameter is set to true the reverse complement of the sequence is aligned and the better alignment is reported.";
- helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
helpString += "The default for the threshold parameter is 0.50, meaning at least 50% of the bases must remain or the sequence is reported as potentially reversed.";
helpString += "The align.seqs command should be in the following format:";
helpString += "align.seqs(reference=yourTemplateFile, fasta=yourCandidateFile, align=yourAlignmentMethod, search=yourSearchmethod, ksize=yourKmerSize, match=yourMatchBonus, mismatch=yourMismatchpenalty, gapopen=yourGapopenPenalty, gapextend=yourGapExtendPenalty)";
@@ -112,7 +109,6 @@ AlignCommand::AlignCommand(){
AlignCommand::AlignCommand(string option) {
try {
abort = false; calledHelp = false;
- ReferenceDB* rdb = ReferenceDB::getInstance();
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true;}
@@ -261,28 +257,12 @@ AlignCommand::AlignCommand(string option) {
m->mothurConvert(temp, processors);
temp = validParameter.validFile(parameters, "flip", false); if (temp == "not found"){ temp = "f"; }
- flip = m->isTrue(temp);
-
- temp = validParameter.validFile(parameters, "save", false); if (temp == "not found"){ temp = "f"; }
- save = m->isTrue(temp);
- rdb->save = save;
- if (save) { //clear out old references
- rdb->clearMemory();
- }
+ flip = m->isTrue(temp);
//this has to go after save so that if the user sets save=t and provides no reference we abort
templateFileName = validParameter.validFile(parameters, "reference", true);
- if (templateFileName == "not found") {
- //check for saved reference sequences
- if (rdb->referenceSeqs.size() != 0) {
- templateFileName = "saved";
- }else {
- m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required for the align.seqs command.");
- m->mothurOutEndLine();
- abort = true;
- }
+ if (templateFileName == "not found") { m->mothurOut("[ERROR]: The reference parameter is a required for the align.seqs command, aborting.\n"); abort = true;
}else if (templateFileName == "not open") { abort = true; }
- else { if (save) { rdb->setSavedReference(templateFileName); } }
temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found"){ temp = "0.50"; }
m->mothurConvert(temp, threshold);
@@ -367,7 +347,7 @@ int AlignCommand::execute(){
//delete accnos file if its blank else report to user
if (m->isBlank(accnosFileName)) { m->mothurRemove(accnosFileName); hasAccnos = false; }
else {
- m->mothurOut("Some of your sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
+ m->mothurOut("[WARNING]: Some of your sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
if (!flip) {
m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well.");
}else{ m->mothurOut(" If the reverse compliment proved to be better it was reported."); }
diff --git a/source/commands/chimeraccodecommand.cpp b/source/commands/chimeraccodecommand.cpp
index b94ab26..b687f69 100644
--- a/source/commands/chimeraccodecommand.cpp
+++ b/source/commands/chimeraccodecommand.cpp
@@ -9,7 +9,7 @@
#include "chimeraccodecommand.h"
#include "ccode.h"
-#include "referencedb.h"
+
//**********************************************************************************************************************
vector<string> ChimeraCcodeCommand::setParameters(){
try {
@@ -23,7 +23,6 @@ vector<string> ChimeraCcodeCommand::setParameters(){
CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
- CommandParameter psave("save", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psave);
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
@@ -49,7 +48,6 @@ string ChimeraCcodeCommand::getHelpString(){
helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences. \n";
helpString += "The window parameter allows you to specify the window size for searching for chimeras. \n";
helpString += "The numwanted parameter allows you to specify how many sequences you would each query sequence compared with.\n";
- helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
helpString += "The chimera.ccode command should be in the following format: \n";
helpString += "chimera.ccode(fasta=yourFastaFile, reference=yourTemplate) \n";
helpString += "Example: chimera.ccode(fasta=AD.align, reference=core_set_aligned.imputed.fasta) \n";
@@ -98,7 +96,6 @@ ChimeraCcodeCommand::ChimeraCcodeCommand(){
ChimeraCcodeCommand::ChimeraCcodeCommand(string option) {
try {
abort = false; calledHelp = false;
- ReferenceDB* rdb = ReferenceDB::getInstance();
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
@@ -248,27 +245,10 @@ ChimeraCcodeCommand::ChimeraCcodeCommand(string option) {
temp = validParameter.validFile(parameters, "numwanted", false); if (temp == "not found") { temp = "20"; }
m->mothurConvert(temp, numwanted);
- temp = validParameter.validFile(parameters, "save", false); if (temp == "not found"){ temp = "f"; }
- save = m->isTrue(temp);
- rdb->save = save;
- if (save) { //clear out old references
- rdb->clearMemory();
- }
-
//this has to go after save so that if the user sets save=t and provides no reference we abort
templatefile = validParameter.validFile(parameters, "reference", true);
- if (templatefile == "not found") {
- //check for saved reference sequences
- if (rdb->referenceSeqs.size() != 0) {
- templatefile = "saved";
- }else {
- m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
- m->mothurOutEndLine();
- abort = true;
- }
- }else if (templatefile == "not open") { abort = true; }
- else { if (save) { rdb->setSavedReference(templatefile); } }
-
+ if (templatefile == "not found") { m->mothurOut("[ERROR]: The reference parameter is a required, aborting.\n"); abort = true;
+ }else if (templatefile == "not open") { abort = true; }
}
}
diff --git a/source/commands/chimeracheckcommand.cpp b/source/commands/chimeracheckcommand.cpp
index 2c98626..ff733a7 100644
--- a/source/commands/chimeracheckcommand.cpp
+++ b/source/commands/chimeracheckcommand.cpp
@@ -8,7 +8,6 @@
*/
#include "chimeracheckcommand.h"
-#include "referencedb.h"
//**********************************************************************************************************************
vector<string> ChimeraCheckCommand::setParameters(){
@@ -23,7 +22,6 @@ vector<string> ChimeraCheckCommand::setParameters(){
CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
- CommandParameter psave("save", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psave);
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
@@ -50,8 +48,7 @@ string ChimeraCheckCommand::getHelpString(){
helpString += "The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence, default is False.\n";
helpString += "The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n";
helpString += "You may enter multiple name files by separating their names with dashes. ie. fasta=abrecovery.svg.names-amzon.svg.names \n";
- helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
- helpString += "The chimera.check command should be in the following format: \n";
+ helpString += "The chimera.check command should be in the following format: \n";
helpString += "chimera.check(fasta=yourFastaFile, reference=yourTemplateFile, processors=yourProcessors, ksize=yourKmerSize) \n";
helpString += "Example: chimera.check(fasta=AD.fasta, reference=core_set_aligned,imputed.fasta, processors=4, ksize=8) \n";
helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
@@ -94,8 +91,7 @@ ChimeraCheckCommand::ChimeraCheckCommand(){
ChimeraCheckCommand::ChimeraCheckCommand(string option) {
try {
abort = false; calledHelp = false;
- ReferenceDB* rdb = ReferenceDB::getInstance();
-
+
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
else if(option == "citation") { citation(); abort = true; calledHelp = true;}
@@ -296,27 +292,10 @@ ChimeraCheckCommand::ChimeraCheckCommand(string option) {
m->setProcessors(temp);
m->mothurConvert(temp, processors);
- temp = validParameter.validFile(parameters, "save", false); if (temp == "not found"){ temp = "f"; }
- save = m->isTrue(temp);
- rdb->save = save;
- if (save) { //clear out old references
- rdb->clearMemory();
- }
-
//this has to go after save so that if the user sets save=t and provides no reference we abort
templatefile = validParameter.validFile(parameters, "reference", true);
- if (templatefile == "not found") {
- //check for saved reference sequences
- if (rdb->referenceSeqs.size() != 0) {
- templatefile = "saved";
- }else {
- m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
- m->mothurOutEndLine();
- abort = true;
- }
+ if (templatefile == "not found") { m->mothurOut("[ERROR]: The reference parameter is a required, aborting.\n"); abort = true;
}else if (templatefile == "not open") { abort = true; }
- else { if (save) { rdb->setSavedReference(templatefile); } }
-
temp = validParameter.validFile(parameters, "ksize", false); if (temp == "not found") { temp = "7"; }
m->mothurConvert(temp, ksize);
diff --git a/source/commands/chimerapintailcommand.cpp b/source/commands/chimerapintailcommand.cpp
index e51f16e..3cc64d7 100644
--- a/source/commands/chimerapintailcommand.cpp
+++ b/source/commands/chimerapintailcommand.cpp
@@ -26,7 +26,6 @@ vector<string> ChimeraPintailCommand::setParameters(){
CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
- CommandParameter psave("save", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psave);
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
@@ -50,7 +49,6 @@ string ChimeraPintailCommand::getHelpString(){
helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n";
helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied. You can apply an ecoli mask by typing, mask=default. \n";
helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
- helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n";
helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n";
helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n";
@@ -101,7 +99,6 @@ ChimeraPintailCommand::ChimeraPintailCommand(){
ChimeraPintailCommand::ChimeraPintailCommand(string option) {
try {
abort = false; calledHelp = false;
- rdb = ReferenceDB::getInstance();
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
@@ -249,26 +246,11 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option) {
temp = validParameter.validFile(parameters, "increment", false); if (temp == "not found") { temp = "25"; }
m->mothurConvert(temp, increment);
- temp = validParameter.validFile(parameters, "save", false); if (temp == "not found"){ temp = "f"; }
- save = m->isTrue(temp);
- rdb->save = save;
- if (save) { //clear out old references
- rdb->clearMemory();
- }
//this has to go after save so that if the user sets save=t and provides no reference we abort
templatefile = validParameter.validFile(parameters, "reference", true);
- if (templatefile == "not found") {
- //check for saved reference sequences
- if (rdb->referenceSeqs.size() != 0) {
- templatefile = "saved";
- }else {
- m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
- m->mothurOutEndLine();
- abort = true;
- }
- }else if (templatefile == "not open") { abort = true; }
- else { if (save) { rdb->setSavedReference(templatefile); } }
+ if (templatefile == "not found") { m->mothurOut("[ERROR]: The reference parameter is a required, aborting.\n"); abort = true;
+ }else if (templatefile == "not open") { abort = true; }
maskfile = validParameter.validFile(parameters, "mask", false);
@@ -368,7 +350,6 @@ int ChimeraPintailCommand::execute(){
//check for quantile to save the time
string baseName = templatefile;
- if (templatefile == "saved") { baseName = rdb->getSavedReference(); }
string tempQuan = "";
if ((!filter) && (maskfile == "")) {
diff --git a/source/commands/chimerapintailcommand.h b/source/commands/chimerapintailcommand.h
index 3f43c5f..6e51b0c 100644
--- a/source/commands/chimerapintailcommand.h
+++ b/source/commands/chimerapintailcommand.h
@@ -13,7 +13,6 @@
#include "mothur.h"
#include "command.hpp"
#include "mothurchimera.h"
-#include "referencedb.h"
/***********************************************************/
@@ -37,7 +36,6 @@ public:
int execute();
void help() { m->mothurOut(getHelpString()); }
private:
- ReferenceDB* rdb;
vector<int> processIDS; //processid
vector<linePair*> lines;
diff --git a/source/commands/chimeraslayercommand.cpp b/source/commands/chimeraslayercommand.cpp
index f36670f..66ec4f9 100644
--- a/source/commands/chimeraslayercommand.cpp
+++ b/source/commands/chimeraslayercommand.cpp
@@ -9,7 +9,6 @@
#include "chimeraslayercommand.h"
#include "deconvolutecommand.h"
-#include "referencedb.h"
#include "sequenceparser.h"
#include "counttable.h"
@@ -45,7 +44,6 @@ vector<string> ChimeraSlayerCommand::setParameters(){
CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
- CommandParameter psave("save", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psave);
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
@@ -89,7 +87,6 @@ string ChimeraSlayerCommand::getHelpString(){
helpString += "The search parameter allows you to specify search method for finding the closest parent. Choices are blast and kmer. Default=blast. \n";
helpString += "The realign parameter allows you to realign the query to the potential parents. Choices are true or false, default true. \n";
helpString += "The blastlocation parameter allows you to specify the location of your blast executable. By default mothur will look in ./blast/bin relative to mothur's executable. \n";
- helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
helpString += "The chimera.slayer command should be in the following format: \n";
helpString += "chimera.slayer(fasta=yourFastaFile, reference=yourTemplate, search=yourSearch) \n";
helpString += "Example: chimera.slayer(fasta=AD.align, reference=core_set_aligned.imputed.fasta, search=kmer) \n";
@@ -138,8 +135,7 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(){
//***************************************************************************************************************
ChimeraSlayerCommand::ChimeraSlayerCommand(string option) {
try {
- abort = false; calledHelp = false;
- ReferenceDB* rdb = ReferenceDB::getInstance();
+ abort = false; calledHelp = false;
hasCount = false;
hasName = false;
@@ -489,12 +485,6 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option) {
m->setProcessors(temp);
m->mothurConvert(temp, processors);
- temp = validParameter.validFile(parameters, "save", false); if (temp == "not found"){ temp = "f"; }
- save = m->isTrue(temp);
- rdb->save = save;
- if (save) { //clear out old references
- rdb->clearMemory();
- }
string path;
it = parameters.find("reference");
@@ -516,14 +506,8 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option) {
templatefile = validParameter.validFile(parameters, "reference", true);
if (templatefile == "not open") { abort = true; }
else if (templatefile == "not found") { //check for saved reference sequences
- if (rdb->referenceSeqs.size() != 0) {
- templatefile = "saved";
- }else {
- m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
- m->mothurOutEndLine();
- abort = true;
- }
- }else { if (save) { rdb->setSavedReference(templatefile); } }
+ m->mothurOut("[ERROR]: The reference parameter is a required, aborting.\n"); abort = true;
+ }
}
}else if (hasName) { templatefile = "self";
if (save) {
@@ -539,13 +523,8 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option) {
}
}
else {
- if (rdb->referenceSeqs.size() != 0) {
- templatefile = "saved";
- }else {
- m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
- m->mothurOutEndLine();
- templatefile = ""; abort = true;
- }
+ m->mothurOut("[ERROR]: The reference parameter is a required, aborting.\n");
+ templatefile = ""; abort = true;
}
diff --git a/source/commands/chimerauchimecommand.cpp b/source/commands/chimerauchimecommand.cpp
index e43baaa..4bb0278 100644
--- a/source/commands/chimerauchimecommand.cpp
+++ b/source/commands/chimerauchimecommand.cpp
@@ -9,9 +9,7 @@
#include "chimerauchimecommand.h"
#include "deconvolutecommand.h"
-//#include "uc.h"
#include "sequence.hpp"
-#include "referencedb.h"
#include "systemcommand.h"
//**********************************************************************************************************************
@@ -139,7 +137,6 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(){
ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
try {
abort = false; calledHelp = false; hasName=false; hasCount=false;
- ReferenceDB* rdb = ReferenceDB::getInstance();
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
@@ -500,27 +497,15 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
templatefile = validParameter.validFile(parameters, "reference", true);
if (templatefile == "not open") { abort = true; }
else if (templatefile == "not found") { //check for saved reference sequences
- if (rdb->getSavedReference() != "") {
- templatefile = rdb->getSavedReference();
- m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
- }else {
- m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
- m->mothurOutEndLine();
- abort = true;
- }
+ m->mothurOut("[ERROR]: The reference parameter is a required.\n"); abort = true;
}
}
}else if (hasName) { templatefile = "self"; }
else if (hasCount) { templatefile = "self"; }
- else {
- if (rdb->getSavedReference() != "") {
- templatefile = rdb->getSavedReference();
- m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
- }else {
- m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
- m->mothurOutEndLine();
- templatefile = ""; abort = true;
- }
+ else {
+ m->mothurOut("[ERROR]: The reference parameter is a required.");
+
+ templatefile = ""; abort = true;
}
string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
@@ -690,7 +675,7 @@ int ChimeraUchimeCommand::execute(){
if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
- m->printVsearchFile(nameMapCount, newFasta, "ab");
+ m->printVsearchFile(nameMapCount, newFasta, "/ab=", "/");
fastaFileNames[s] = newFasta;
}
@@ -1172,8 +1157,8 @@ int ChimeraUchimeCommand::driverGroups(string outputFName, string filename, stri
int start = time(NULL); if (m->control_pressed) { outCountList.close(); m->mothurRemove(countlist); return 0; }
int error;
- if (hasCount) { error = cparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
- else { error = sparser->getSeqs(groups[i], filename, true); if ((error == 1) || m->control_pressed) { return 0; } }
+ if (hasCount) { error = cparser->getSeqs(groups[i], filename, "/ab=", "/", true); if ((error == 1) || m->control_pressed) { return 0; } }
+ else { error = sparser->getSeqs(groups[i], filename, "/ab=", "/", true); if ((error == 1) || m->control_pressed) { return 0; } }
int numSeqs = driver((outputFName + groups[i]), filename, (accnos+groups[i]), (alns+ groups[i]), numChimeras);
totalSeqs += numSeqs;
@@ -1250,6 +1235,14 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc
outputFName = "\"" + outputFName + "\"";
filename = "\"" + filename + "\"";
alns = "\"" + alns + "\"";
+
+ if (filename.length() > 257) {
+ m->mothurOut("[ERROR]: " + filename + " filename is " + toString(filename.length()) + " long. The uchime program can't handle files with a full path longer than 257 characters, please correct.\n"); m->control_pressed = true; return 0;
+ }else if ((alns.length() > 257) && (chimealns)) {
+ m->mothurOut("[ERROR]: " + alns + " filename is " + toString(alns.length()) + " long. The uchime program can't handle files with a full path longer than 257 characters, please correct.\n"); m->control_pressed = true; return 0;
+ }else if (outputFName.length() > 257) {
+ m->mothurOut("[ERROR]: " + outputFName + " filename is " + toString(outputFName.length()) + " long. The uchime program can't handle files with a full path longer than 257 characters, please correct input file name.\n"); m->control_pressed = true; return 0;
+ }
vector<char*> cPara;
@@ -1499,7 +1492,7 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc
//int numArgs = cPara.size();
//uchime_main(numArgs, uchimeParameters);
- //cout << "commandString = " << commandString << endl;
+
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
#else
commandString = "\"" + commandString + "\"";
diff --git a/source/commands/chimerauchimecommand.h b/source/commands/chimerauchimecommand.h
index 5ba479c..032c6c9 100644
--- a/source/commands/chimerauchimecommand.h
+++ b/source/commands/chimerauchimecommand.h
@@ -11,7 +11,6 @@
*
*/
-#include "mothur.h"
#include "command.hpp"
#include "sequenceparser.h"
#include "counttable.h"
@@ -189,9 +188,9 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){
int error;
if (pDataArray->hasCount) {
- error = cparser->getSeqs(pDataArray->groups[i], pDataArray->filename, true); if ((error == 1) || pDataArray->m->control_pressed) { delete cparser; return 0; }
+ error = cparser->getSeqs(pDataArray->groups[i], pDataArray->filename, "/ab=", "/", true); if ((error == 1) || pDataArray->m->control_pressed) { delete cparser; return 0; }
}else {
- error = parser->getSeqs(pDataArray->groups[i], pDataArray->filename, true); if ((error == 1) || pDataArray->m->control_pressed) { delete parser; return 0; }
+ error = parser->getSeqs(pDataArray->groups[i], pDataArray->filename, "/ab=", "/", true); if ((error == 1) || pDataArray->m->control_pressed) { delete parser; return 0; }
}
//int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras);
@@ -202,6 +201,14 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){
string filename = "\"" + pDataArray->filename + "\"";
string alns = "\"" + pDataArray->alns+pDataArray->groups[i] + "\"";
string accnos = pDataArray->accnos+pDataArray->groups[i];
+
+ if (pDataArray->filename.length() > 257) {
+ pDataArray->m->mothurOut("[ERROR]: " + pDataArray->filename + " filename is " + toString(pDataArray->filename.length()) + " long. The uchime program can't handle files with a full path longer than 257 characters, please correct.\n"); pDataArray->m->control_pressed = true; return 0;
+ }else if ((pDataArray->alns.length() > 257) && (pDataArray->chimealns)) {
+ pDataArray->m->mothurOut("[ERROR]: " + pDataArray->alns + " filename is " + toString(pDataArray->alns.length()) + " long. The uchime program can't handle files with a full path longer than 257 characters, please correct.\n"); pDataArray->m->control_pressed = true; return 0;
+ }else if (pDataArray->outputFName.length() > 257) {
+ pDataArray->m->mothurOut("[ERROR]: " + pDataArray->outputFName + " filename is " + toString(pDataArray->outputFName.length()) + " long. The uchime program can't handle files with a full path longer than 257 characters, please correct input file name.\n"); pDataArray->m->control_pressed = true; return 0;
+ }
vector<char*> cPara;
@@ -544,6 +551,17 @@ static DWORD WINAPI MyUchimeSeqsThreadFunction(LPVOID lpParam){
string alns = "\"" + pDataArray->alns+ "\"";
string templatefile = "\"" + pDataArray->templatefile + "\"";
string accnos = pDataArray->accnos;
+
+ if (pDataArray->filename.length() > 257) {
+ pDataArray->m->mothurOut("[ERROR]: " + pDataArray->filename + " filename is " + toString(pDataArray->filename.length()) + " long. The uchime program can't handle files with a full path longer than 257 characters, please correct.\n"); pDataArray->m->control_pressed = true; return 0;
+ }else if ((pDataArray->alns.length() > 257) && (pDataArray->chimealns)) {
+ pDataArray->m->mothurOut("[ERROR]: " + pDataArray->alns + " filename is " + toString(pDataArray->alns.length()) + " long. The uchime program can't handle files with a full path longer than 257 characters, please correct.\n"); pDataArray->m->control_pressed = true; return 0;
+ }else if (pDataArray->outputFName.length() > 257) {
+ pDataArray->m->mothurOut("[ERROR]: " + pDataArray->outputFName + " filename is " + toString(pDataArray->outputFName.length()) + " long. The uchime program can't handle files with a full path longer than 257 characters, please correct input file name.\n"); pDataArray->m->control_pressed = true; return 0;
+ }else if (pDataArray->templatefile.length() > 257) {
+ pDataArray->m->mothurOut("[ERROR]: " + pDataArray->templatefile + " filename is " + toString(pDataArray->templatefile.length()) + " long. The uchime program can't handle files with a full path longer than 257 characters, please correct input file name.\n"); pDataArray->m->control_pressed = true; return 0;
+ }
+
vector<char*> cPara;
diff --git a/source/commands/chimeravsearchcommand.cpp b/source/commands/chimeravsearchcommand.cpp
new file mode 100644
index 0000000..9d93523
--- /dev/null
+++ b/source/commands/chimeravsearchcommand.cpp
@@ -0,0 +1,1661 @@
+//
+// chimeravsearchcommand.cpp
+// Mothur
+//
+// Created by Sarah Westcott on 6/16/16.
+// Copyright (c) 2016 Schloss Lab. All rights reserved.
+//
+
+#include "chimeravsearchcommand.h"
+#include "deconvolutecommand.h"
+#include "sequence.hpp"
+#include "systemcommand.h"
+#include "degapseqscommand.h"
+
+//**********************************************************************************************************************
+vector<string> ChimeraVsearchCommand::setParameters(){
+ try {
+ CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
+ CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
+ CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+ CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+ CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew);
+ CommandParameter pchimealns("uchimealns", "Boolean", "", "F", "", "", "","alns",false,false); parameters.push_back(pchimealns);
+ CommandParameter pminh("minh", "Number", "", "0.28", "", "", "","",false,false); parameters.push_back(pminh);
+ CommandParameter pmindiv("mindiv", "Number", "", "0.8", "", "", "","",false,false); parameters.push_back(pmindiv);
+ CommandParameter pxn("xn", "Number", "", "8.0", "", "", "","",false,false); parameters.push_back(pxn);
+ CommandParameter pdn("dn", "Number", "", "1.4", "", "", "","",false,false); parameters.push_back(pdn);
+ CommandParameter pmindiffs("mindiffs", "Number", "", "3", "", "", "","",false,false); parameters.push_back(pmindiffs);
+ CommandParameter pdups("dereplicate", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdups);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraVsearchCommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string ChimeraVsearchCommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The chimera.vsearch command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
+ helpString += "This command is a wrapper for vsearch https://github.com/torognes/vsearch.\n";
+ helpString += "The chimera.vsearch command parameters are fasta, name, count, reference, processors, dereplicate, abskew, uchimealns, minh, mindiv, xn, dn, mindiffs.\n";
+ helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
+ helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
+ helpString += "The count parameter allows you to provide a count file, if you are using template=self. When you use a count file with group info and dereplicate=T, mothur will create a *.pick.count_table file containing seqeunces after chimeras are removed. \n";
+ helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
+ helpString += "The group parameter allows you to provide a group file. The group file can be used with a namesfile and reference=self. When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
+ helpString += "If the dereplicate parameter is false, then if one group finds the seqeunce to be chimeric, then all groups find it to be chimeric, default=f.\n";
+ helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
+ helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
+ helpString += "The abskew parameter can only be used with template=self. Minimum abundance skew. Default 1.9. Abundance skew is: min [ abund(parent1), abund(parent2) ] / abund(query).\n";
+ helpString += "The uchimealns parameter allows you to indicate you would like a file containing multiple alignments of query sequences to parents in human readable format. Alignments show columns with differences that support or contradict a chimeric model.\n";
+ helpString += "The minh parameter - mininum score to report chimera. Default 0.3. Values from 0.1 to 5 might be reasonable. Lower values increase sensitivity but may report more false positives. If you decrease xn you may need to increase minh, and vice versa.\n";
+ helpString += "The mindiv parameter - minimum divergence ratio, default 0.5. Div ratio is 100%% - %%identity between query sequence and the closest candidate for being a parent. If you don't care about very close chimeras, then you could increase mindiv to, say, 1.0 or 2.0, and also decrease minh, say to 0.1, to increase sensitivity. How well this works will depend on your data. Best is to tune parameters on a good benchmark.\n";
+ helpString += "The xn parameter - weight of a no vote. Default 8.0. Decreasing this weight to around 3 or 4 may give better performance on denoised data.\n";
+ helpString += "The dn parameter - pseudo-count prior on number of no votes. Default 1.4. Probably no good reason to change this unless you can retune to a good benchmark for your data. Reasonable values are probably in the range from 0.2 to 2.\n";
+ helpString += "The mindiffs parameter - minimum number of differences in segment Default = (3).\n";
+ helpString += "The chimera.vsearch command should be in the following format: \n";
+ helpString += "chimera.vsearch(fasta=yourFastaFile, reference=yourTemplate) \n";
+ helpString += "Example: chimera.vsearch(fasta=AD.align, reference=silva.gold.align) \n";
+ helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraVsearchCommand", "getHelpString");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string ChimeraVsearchCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
+
+ if (type == "chimera") { pattern = "[filename],[tag],vsearch.chimeras"; }
+ else if (type == "accnos") { pattern = "[filename],[tag],vsearch.accnos"; }
+ else if (type == "alns") { pattern = "[filename],[tag],vsearch.alns"; }
+ else if (type == "count") { pattern = "[filename],[tag],vsearch.pick.count_table"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraVsearchCommand", "getOutputPattern");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+ChimeraVsearchCommand::ChimeraVsearchCommand(){
+ try {
+ abort = true; calledHelp = true;
+ setParameters();
+ vector<string> tempOutNames;
+ outputTypes["chimera"] = tempOutNames;
+ outputTypes["accnos"] = tempOutNames;
+ outputTypes["alns"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraVsearchCommand", "ChimeraVsearchCommand");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+ChimeraVsearchCommand::ChimeraVsearchCommand(string option) {
+ try {
+ abort = false; calledHelp = false; hasName=false; hasCount=false;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+
+ else {
+ vector<string> myArray = setParameters();
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter("chimera.vsearch");
+ map<string,string>::iterator it;
+
+ //check to make sure all parameters are valid for command
+ for (it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ vector<string> tempOutNames;
+ outputTypes["chimera"] = tempOutNames;
+ outputTypes["accnos"] = tempOutNames;
+ outputTypes["alns"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+
+ //check for required parameters
+ fastafile = validParameter.validFile(parameters, "fasta", false);
+ if (fastafile == "not found") {
+ //if there is a current fasta file, use it
+ string filename = m->getFastaFile();
+ if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
+ else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
+ }else {
+ m->splitAtDash(fastafile, fastaFileNames);
+
+ //go through files and make sure they are good, if not, then disregard them
+ for (int i = 0; i < fastaFileNames.size(); i++) {
+
+ bool ignore = false;
+ if (fastaFileNames[i] == "current") {
+ fastaFileNames[i] = m->getFastaFile();
+ if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
+ //erase from file list
+ fastaFileNames.erase(fastaFileNames.begin()+i);
+ i--;
+ }
+ }
+
+ if (!ignore) {
+
+ if (inputDir != "") {
+ string path = m->hasPath(fastaFileNames[i]);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
+ }
+
+ int ableToOpen;
+ ifstream in;
+
+ ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
+
+ //if you can't open it, try default location
+ if (ableToOpen == 1) {
+ if (m->getDefaultPath() != "") { //default path is set
+ string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
+ m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ fastaFileNames[i] = tryPath;
+ }
+ }
+
+ if (ableToOpen == 1) {
+ if (m->getOutputDir() != "") { //default path is set
+ string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
+ m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ fastaFileNames[i] = tryPath;
+ }
+ }
+
+ in.close();
+
+ if (ableToOpen == 1) {
+ m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
+ //erase from file list
+ fastaFileNames.erase(fastaFileNames.begin()+i);
+ i--;
+ }else {
+ m->setFastaFile(fastaFileNames[i]);
+ }
+ }
+ }
+
+ //make sure there is at least one valid file left
+ if (fastaFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
+ }
+
+
+ //check for required parameters
+ namefile = validParameter.validFile(parameters, "name", false);
+ if (namefile == "not found") { namefile = ""; }
+ else {
+ m->splitAtDash(namefile, nameFileNames);
+
+ //go through files and make sure they are good, if not, then disregard them
+ for (int i = 0; i < nameFileNames.size(); i++) {
+
+ bool ignore = false;
+ if (nameFileNames[i] == "current") {
+ nameFileNames[i] = m->getNameFile();
+ if (nameFileNames[i] != "") { m->mothurOut("Using " + nameFileNames[i] + " as input file for the name parameter where you had given current."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
+ //erase from file list
+ nameFileNames.erase(nameFileNames.begin()+i);
+ i--;
+ }
+ }
+
+ if (!ignore) {
+
+ if (inputDir != "") {
+ string path = m->hasPath(nameFileNames[i]);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { nameFileNames[i] = inputDir + nameFileNames[i]; }
+ }
+
+ int ableToOpen;
+ ifstream in;
+
+ ableToOpen = m->openInputFile(nameFileNames[i], in, "noerror");
+
+ //if you can't open it, try default location
+ if (ableToOpen == 1) {
+ if (m->getDefaultPath() != "") { //default path is set
+ string tryPath = m->getDefaultPath() + m->getSimpleName(nameFileNames[i]);
+ m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ nameFileNames[i] = tryPath;
+ }
+ }
+
+ if (ableToOpen == 1) {
+ if (m->getOutputDir() != "") { //default path is set
+ string tryPath = m->getOutputDir() + m->getSimpleName(nameFileNames[i]);
+ m->mothurOut("Unable to open " + nameFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ nameFileNames[i] = tryPath;
+ }
+ }
+
+ in.close();
+
+ if (ableToOpen == 1) {
+ m->mothurOut("Unable to open " + nameFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
+ //erase from file list
+ nameFileNames.erase(nameFileNames.begin()+i);
+ i--;
+ }else {
+ m->setNameFile(nameFileNames[i]);
+ }
+ }
+ }
+ }
+
+ if (nameFileNames.size() != 0) { hasName = true; }
+
+ //check for required parameters
+ vector<string> countfileNames;
+ countfile = validParameter.validFile(parameters, "count", false);
+ if (countfile == "not found") {
+ countfile = "";
+ }else {
+ m->splitAtDash(countfile, countfileNames);
+
+ //go through files and make sure they are good, if not, then disregard them
+ for (int i = 0; i < countfileNames.size(); i++) {
+
+ bool ignore = false;
+ if (countfileNames[i] == "current") {
+ countfileNames[i] = m->getCountTableFile();
+ if (nameFileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true;
+ //erase from file list
+ countfileNames.erase(countfileNames.begin()+i);
+ i--;
+ }
+ }
+
+ if (!ignore) {
+
+ if (inputDir != "") {
+ string path = m->hasPath(countfileNames[i]);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { countfileNames[i] = inputDir + countfileNames[i]; }
+ }
+
+ int ableToOpen;
+ ifstream in;
+
+ ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
+
+ //if you can't open it, try default location
+ if (ableToOpen == 1) {
+ if (m->getDefaultPath() != "") { //default path is set
+ string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
+ m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ countfileNames[i] = tryPath;
+ }
+ }
+
+ if (ableToOpen == 1) {
+ if (m->getOutputDir() != "") { //default path is set
+ string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
+ m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ countfileNames[i] = tryPath;
+ }
+ }
+
+ in.close();
+
+ if (ableToOpen == 1) {
+ m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
+ //erase from file list
+ countfileNames.erase(countfileNames.begin()+i);
+ i--;
+ }else {
+ m->setCountTableFile(countfileNames[i]);
+ }
+ }
+ }
+ }
+
+ if (countfileNames.size() != 0) { hasCount = true; }
+
+ //make sure there is at least one valid file left
+ if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
+
+ if (!hasName && hasCount) { nameFileNames = countfileNames; }
+
+ if ((hasCount || hasName) && (nameFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of name or count files does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
+
+ bool hasGroup = true;
+ groupfile = validParameter.validFile(parameters, "group", false);
+ if (groupfile == "not found") { groupfile = ""; hasGroup = false; }
+ else {
+ m->splitAtDash(groupfile, groupFileNames);
+
+ //go through files and make sure they are good, if not, then disregard them
+ for (int i = 0; i < groupFileNames.size(); i++) {
+
+ bool ignore = false;
+ if (groupFileNames[i] == "current") {
+ groupFileNames[i] = m->getGroupFile();
+ if (groupFileNames[i] != "") { m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You have no current namefile, ignoring current."); m->mothurOutEndLine(); ignore=true;
+ //erase from file list
+ groupFileNames.erase(groupFileNames.begin()+i);
+ i--;
+ }
+ }
+
+ if (!ignore) {
+
+ if (inputDir != "") {
+ string path = m->hasPath(groupFileNames[i]);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { groupFileNames[i] = inputDir + groupFileNames[i]; }
+ }
+
+ int ableToOpen;
+ ifstream in;
+
+ ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");
+
+ //if you can't open it, try default location
+ if (ableToOpen == 1) {
+ if (m->getDefaultPath() != "") { //default path is set
+ string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);
+ m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ groupFileNames[i] = tryPath;
+ }
+ }
+
+ if (ableToOpen == 1) {
+ if (m->getOutputDir() != "") { //default path is set
+ string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);
+ m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ groupFileNames[i] = tryPath;
+ }
+ }
+
+ in.close();
+
+ if (ableToOpen == 1) {
+ m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
+ //erase from file list
+ groupFileNames.erase(groupFileNames.begin()+i);
+ i--;
+ }else {
+ m->setGroupFile(groupFileNames[i]);
+ }
+ }
+ }
+
+ //make sure there is at least one valid file left
+ if (groupFileNames.size() == 0) { m->mothurOut("[ERROR]: no valid group files."); m->mothurOutEndLine(); abort = true; }
+ }
+
+ if (hasGroup && (groupFileNames.size() != fastaFileNames.size())) { m->mothurOut("[ERROR]: The number of groupfiles does not match the number of fastafiles, please correct."); m->mothurOutEndLine(); abort=true; }
+
+ if (hasGroup && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
+
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
+
+ string path;
+ it = parameters.find("reference");
+ //user has given a template file
+ if(it != parameters.end()){
+ if (it->second == "self") { templatefile = "self"; }
+ else {
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["reference"] = inputDir + it->second; }
+
+ templatefile = validParameter.validFile(parameters, "reference", true);
+ if (templatefile == "not open") { abort = true; }
+ else if (templatefile == "not found") { //check for saved reference sequences
+ m->mothurOut("[ERROR]: The reference parameter is a required.\n"); abort = true;
+ }
+ }
+ }else if (hasName) { templatefile = "self"; }
+ else if (hasCount) { templatefile = "self"; }
+ else {
+ m->mothurOut("[ERROR]: The reference parameter is a required.");
+
+ templatefile = ""; abort = true;
+ }
+
+ string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
+ m->setProcessors(temp);
+ m->mothurConvert(temp, processors);
+
+ abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; }
+ if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
+
+ temp = validParameter.validFile(parameters, "chimealns", false); if (temp == "not found") { temp = "f"; }
+ chimealns = m->isTrue(temp);
+
+ minh = validParameter.validFile(parameters, "minh", false); if (minh == "not found") { useMinH = false; minh = "0.28"; } else{ useMinH = true; }
+ mindiv = validParameter.validFile(parameters, "mindiv", false); if (mindiv == "not found") { useMindiv = false; mindiv = "0.8"; } else{ useMindiv = true; }
+ xn = validParameter.validFile(parameters, "xn", false); if (xn == "not found") { useXn = false; xn = "8.0"; } else{ useXn = true; }
+ dn = validParameter.validFile(parameters, "dn", false); if (dn == "not found") { useDn = false; dn = "1.4"; } else{ useDn = true; }
+ mindiffs = validParameter.validFile(parameters, "mindiffs", false); if (mindiffs == "not found") { useMindiffs = false; mindiffs = "3"; } else{ useMindiffs = true; }
+
+ temp = validParameter.validFile(parameters, "dereplicate", false);
+ if (temp == "not found") { temp = "false"; }
+ dups = m->isTrue(temp);
+
+
+ if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
+ if (hasCount && (templatefile != "self")) { m->mothurOut("You have provided a countfile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
+ if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
+
+ //look for uchime exe
+ path = m->argv;
+ string tempPath = path;
+ for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
+ path = path.substr(0, (tempPath.find_last_of('m')));
+
+ string vsearchCommand;
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ vsearchCommand = path + "vsearch"; // format the database, -o option gives us the ability
+ if (m->debug) {
+ m->mothurOut("[DEBUG]: Uchime location using \"which vsearch\" = ");
+ Command* newCommand = new SystemCommand("which vsearch"); m->mothurOutEndLine();
+ newCommand->execute();
+ delete newCommand;
+ m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = ");
+ newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine();
+ newCommand->execute();
+ delete newCommand;
+ }
+#else
+ m->mothurOut("[ERROR]: The chimera.vsearch command is not available for windows. The vsearch program is not supported on the Windows platform, aborting."); m->mothurOutEndLine(); abort=true;
+#endif
+
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ //test to make sure uchime exists
+ ifstream in;
+ vsearchCommand = m->getFullPathName(vsearchCommand);
+ int ableToOpen = m->openInputFile(vsearchCommand, in, "no error"); in.close();
+ if(ableToOpen == 1) {
+ m->mothurOut(vsearchCommand + " file does not exist. Checking path... \n");
+ //check to see if uchime is in the path??
+
+ string vLocation = m->findProgramPath("vsearch");
+
+
+ ifstream in2;
+ ableToOpen = m->openInputFile(vLocation, in2, "no error"); in2.close();
+
+
+
+ if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + vLocation + " file does not exist. mothur requires the vsearch executable."); m->mothurOutEndLine(); abort = true; }
+ else { m->mothurOut("Found vsearch in your path, using " + vLocation + "\n");vsearchLocation = vLocation; }
+ }else { vsearchLocation = vsearchCommand; }
+
+ vsearchLocation = m->getFullPathName(vsearchLocation);
+ #endif
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraVsearchCommand", "ChimeraVsearchCommand");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+
+int ChimeraVsearchCommand::execute(){
+ try{
+
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ for (int s = 0; s < fastaFileNames.size(); s++) {
+
+ m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
+
+ int start = time(NULL);
+ string nameFile = "";
+ if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
+ variables["[tag]"] = "denovo";
+ if (templatefile != "self") { variables["[tag]"] = "ref"; }
+ string outputFileName = getOutputFileName("chimera", variables);
+ string accnosFileName = getOutputFileName("accnos", variables);
+ string alnsFileName = getOutputFileName("alns", variables);
+ string newFasta = m->getRootName(fastaFileNames[s]) + "temp";
+ string newCountFile = "";
+
+ //you provided a groupfile
+ string groupFile = "";
+ bool hasGroup = false;
+ int numSeqs = 0;
+ if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; hasGroup = true; }
+ else if (hasCount) {
+ CountTable ct;
+ if (ct.testGroups(nameFileNames[s])) { hasGroup = true; }
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFileNames[s]));
+ newCountFile = getOutputFileName("count", variables);
+ }
+
+ if ((templatefile == "self") && (!hasGroup)) { //you want to run uchime with a template=self and no groups
+
+ if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
+ if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
+ nameFile = nameFileNames[s];
+ }else { nameFile = getNamesFile(fastaFileNames[s]); }
+
+ map<string, string> seqs;
+ numSeqs = readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
+
+ //read namefile
+ vector<seqPriorityNode> nameMapCount;
+ int error;
+ if (hasCount) {
+ CountTable ct;
+ ct.readTable(nameFile, true, false);
+ for(map<string, string>::iterator it = seqs.begin(); it != seqs.end(); it++) {
+ int num = ct.getNumSeqs(it->first);
+ if (num == 0) { error = 1; }
+ else {
+ seqPriorityNode temp(num, it->second, it->first);
+ nameMapCount.push_back(temp);
+ }
+ }
+ }else {
+ error = m->readNames(nameFile, nameMapCount, seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
+ }
+ if (error == 1) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
+ if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
+
+ m->printVsearchFile(nameMapCount, newFasta, ";size=", ";");
+ fastaFileNames[s] = newFasta;
+ }
+
+ if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
+
+ if (hasGroup) {
+ if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
+ nameFile = nameFileNames[s];
+ }else { nameFile = getNamesFile(fastaFileNames[s]); }
+
+ //Parse sequences by group
+ vector<string> groups;
+ map<string, string> uniqueNames;
+ if (hasCount) {
+ cparser = new SequenceCountParser(nameFile, fastaFileNames[s]);
+ groups = cparser->getNamesOfGroups();
+ uniqueNames = cparser->getAllSeqsMap();
+ }else{
+ sparser = new SequenceParser(groupFile, fastaFileNames[s], nameFile);
+ groups = sparser->getNamesOfGroups();
+ uniqueNames = sparser->getAllSeqsMap();
+ }
+
+ if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
+
+ //clears files
+ ofstream out, out1, out2;
+ m->openOutputFile(outputFileName, out); out.close();
+ m->openOutputFile(accnosFileName, out1); out1.close();
+ if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
+
+ if(processors == 1) { driverGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, 0, groups.size(), groups);
+
+ if (hasCount && dups) {
+ CountTable c; c.readTable(nameFile, true, false);
+ if (!m->isBlank(newCountFile)) {
+ ifstream in2;
+ m->openInputFile(newCountFile, in2);
+
+ string name, group;
+ while (!in2.eof()) {
+ in2 >> name >> group; m->gobble(in2);
+ c.setAbund(name, group, 0);
+ }
+ in2.close();
+ }
+ m->mothurRemove(newCountFile);
+ c.printTable(newCountFile);
+ }
+
+ }else{ createProcessesGroups(outputFileName, newFasta, accnosFileName, alnsFileName, newCountFile, groups, nameFile, groupFile, fastaFileNames[s]); }
+
+ if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
+
+
+ if (!dups) {
+ int totalChimeras = deconvoluteResults(uniqueNames, outputFileName, accnosFileName, alnsFileName);
+
+ m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(uniqueNames.size()) + " sequences. " + toString(totalChimeras) + " chimeras were found."); m->mothurOutEndLine();
+ m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine();
+ }else {
+
+ if (hasCount) {
+ set<string> doNotRemove;
+ CountTable c; c.readTable(newCountFile, true, true);
+ vector<string> namesInTable = c.getNamesOfSeqs();
+ for (int i = 0; i < namesInTable.size(); i++) {
+ int temp = c.getNumSeqs(namesInTable[i]);
+ if (temp == 0) { c.remove(namesInTable[i]); }
+ else { doNotRemove.insert((namesInTable[i])); }
+ }
+ //remove names we want to keep from accnos file.
+ set<string> accnosNames = m->readAccnos(accnosFileName);
+ ofstream out2;
+ m->openOutputFile(accnosFileName, out2);
+ for (set<string>::iterator it = accnosNames.begin(); it != accnosNames.end(); it++) {
+ if (doNotRemove.count(*it) == 0) { out2 << (*it) << endl; }
+ }
+ out2.close();
+ c.printTable(newCountFile);
+ outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile);
+ }
+ }
+
+ if (hasCount) { delete cparser; }
+ else { delete sparser; }
+
+ if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
+
+ }else{
+ if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
+
+ int numChimeras = 0;
+
+ if(processors == 1){ driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
+ else{ createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
+
+ //add headings
+ ofstream out;
+ m->openOutputFile(outputFileName+".temp", out);
+ out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
+ out.close();
+
+ m->appendFiles(outputFileName, outputFileName+".temp");
+ m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
+
+ if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
+
+ //remove file made for uchime
+ if (templatefile == "self") { m->mothurRemove(fastaFileNames[s]); }
+
+ m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found."); m->mothurOutEndLine();
+ }
+
+ outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
+ outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
+ if (chimealns) { outputNames.push_back(alnsFileName); outputTypes["alns"].push_back(alnsFileName); }
+ }
+
+ //set accnos file as new current accnosfile
+ string current = "";
+ itTypes = outputTypes.find("accnos");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
+ }
+
+ itTypes = outputTypes.find("count");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+ }
+
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraVsearchCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int ChimeraVsearchCommand::deconvoluteResults(map<string, string>& uniqueNames, string outputFileName, string accnosFileName, string alnsFileName){
+ try {
+ map<string, string>::iterator itUnique;
+ int total = 0;
+
+ ofstream out2;
+ m->openOutputFile(accnosFileName+".temp", out2);
+
+ string name;
+ set<string> namesInFile; //this is so if a sequence is found to be chimera in several samples we dont write it to the results file more than once
+ set<string>::iterator itNames;
+ set<string> chimerasInFile;
+ set<string>::iterator itChimeras;
+
+ if (!m->isBlank(accnosFileName)) {
+ //edit accnos file
+ ifstream in2;
+ m->openInputFile(accnosFileName, in2);
+
+ while (!in2.eof()) {
+ if (m->control_pressed) { in2.close(); out2.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName+".temp")); return 0; }
+
+ in2 >> name; m->gobble(in2);
+
+ //find unique name
+ itUnique = uniqueNames.find(name);
+
+ if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing accnos results. Cannot find " + name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+ else {
+ itChimeras = chimerasInFile.find((itUnique->second));
+
+ if (itChimeras == chimerasInFile.end()) {
+ out2 << itUnique->second << endl;
+ chimerasInFile.insert((itUnique->second));
+ total++;
+ }
+ }
+ }
+ in2.close();
+ }
+ out2.close();
+
+ m->mothurRemove(accnosFileName);
+ rename((accnosFileName+".temp").c_str(), accnosFileName.c_str());
+
+
+
+ //edit chimera file
+ ifstream in;
+ m->openInputFile(outputFileName, in);
+
+ ofstream out;
+ m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
+ //out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
+
+ float temp1;
+ string parent1, parent2, parent3, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
+ name = "";
+ namesInFile.clear();
+ //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
+ /* 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+ 0.000000 F11Fcsw_33372/ab=18/ * * * * * * * * * * * * * * N
+ 0.0000 GQY1XT001C296C;size=356; * * * * * * * * 0 0 0 0 0 0 * N
+ 0.0469 GQY1XT001CPCVN;size=154; GQY1XT001C296C;size=356; GQY1XT001C44N8;size=323; GQY1XT001C44N8;size=323; 93.8 91.5 92.3 92.6 92.3 4 0 7 9 3 7 1.5 N
+ 0.018300 F11Fcsw_14980/ab=16/ F11Fcsw_1915/ab=35/ F11Fcsw_6032/ab=42/ 79.9 78.7 78.2 78.7 79.2 3 0 5 11 10 20 1.46 N
+ */
+
+ while (!in.eof()) {
+
+ if (m->control_pressed) { in.close(); out.close(); m->mothurRemove((outputFileName+".temp")); return 0; }
+
+ bool print = false;
+ in >> temp1; m->gobble(in);
+ in >> name; m->gobble(in);
+ in >> parent1; m->gobble(in);
+ in >> parent2; m->gobble(in);
+ in >> parent3; m->gobble(in);
+ in >> temp2 >> temp3 >> temp4 >> temp5 >> temp6 >> temp7 >> temp8 >> temp9 >> temp10 >> temp11 >> temp12 >> temp13 >> flag;
+ m->gobble(in);
+
+ //parse name - name will look like U68590/ab=1/
+ string restOfName = "";
+ int pos = name.find_first_of(';');
+ if (pos != string::npos) {
+ restOfName = name.substr(pos);
+ name = name.substr(0, pos);
+ }
+
+ //find unique name
+ itUnique = uniqueNames.find(name);
+
+ if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find "+ name + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+ else {
+ name = itUnique->second;
+ //is this name already in the file
+ itNames = namesInFile.find((name));
+
+ if (itNames == namesInFile.end()) { //no not in file
+ if (flag == "N") { //are you really a no??
+ //is this sequence really not chimeric??
+ itChimeras = chimerasInFile.find(name);
+
+ //then you really are a no so print, otherwise skip
+ if (itChimeras == chimerasInFile.end()) { print = true; }
+ }else{ print = true; }
+ }
+ }
+
+ if (print) {
+ out << temp1 << '\t' << name << restOfName << '\t';
+ namesInFile.insert(name);
+
+ //parse parent1 names
+ if (parent1 != "*") {
+ restOfName = "";
+ pos = parent1.find_first_of(';');
+ if (pos != string::npos) {
+ restOfName = parent1.substr(pos);
+ parent1 = parent1.substr(0, pos);
+ }
+
+ itUnique = uniqueNames.find(parent1);
+ if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentA "+ parent1 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+ else { out << itUnique->second << restOfName << '\t'; }
+ }else { out << parent1 << '\t'; }
+
+ //parse parent2 names
+ if (parent2 != "*") {
+ restOfName = "";
+ pos = parent2.find_first_of(';');
+ if (pos != string::npos) {
+ restOfName = parent2.substr(pos);
+ parent2 = parent2.substr(0, pos);
+ }
+
+ itUnique = uniqueNames.find(parent2);
+ if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentB "+ parent2 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+ else { out << itUnique->second << restOfName << '\t'; }
+ }else { out << parent2 << '\t'; }
+
+ //parse parent3 names
+ if (parent3 != "*") {
+ restOfName = "";
+ pos = parent3.find_first_of(';');
+ if (pos != string::npos) {
+ restOfName = parent3.substr(pos);
+ parent3 = parent3.substr(0, pos);
+ }
+
+ itUnique = uniqueNames.find(parent3);
+ if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing chimera results. Cannot find parentC "+ parent3 + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+ else { out << itUnique->second << restOfName << '\t'; }
+ }else { out << parent3 << '\t'; }
+
+ out << temp2 << '\t' << temp3 << '\t' << temp4 << '\t' << temp5 << '\t' << temp6 << '\t' << temp7 << '\t' << temp8 << '\t' << temp9 << '\t' << temp10 << '\t' << temp11 << '\t' << temp12 << temp13 << '\t' << flag << endl;
+ }
+ }
+ in.close();
+ out.close();
+
+ m->mothurRemove(outputFileName);
+ rename((outputFileName+".temp").c_str(), outputFileName.c_str());
+
+
+ //edit anls file
+ //assumptions - in file each read will always look like - if uchime source is updated, revisit this code.
+ /*
+ ------------------------------------------------------------------------
+ Query ( 179 nt) F21Fcsw_11639/ab=591/
+ ParentA ( 179 nt) F11Fcsw_6529/ab=1625/
+ ParentB ( 181 nt) F21Fcsw_12128/ab=1827/
+
+ A 1 AAGgAAGAtTAATACaagATGgCaTCatgAGtccgCATgTtcAcatGATTAAAG--gTaTtcCGGTagacGATGGGGATG 78
+ Q 1 AAGTAAGACTAATACCCAATGACGTCTCTAGAAGACATCTGAAAGAGATTAAAG--ATTTATCGGTGATGGATGGGGATG 78
+ B 1 AAGgAAGAtTAATcCaggATGggaTCatgAGttcACATgTccgcatGATTAAAGgtATTTtcCGGTagacGATGGGGATG 80
+ Diffs N N A N?N N N NNN N?NB N ?NaNNN B B NN NNNN
+ Votes 0 0 + 000 0 0 000 000+ 0 00!000 + 00 0000
+ Model AAAAAAAAAAAAAAAAAAAAAAxBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+
+ A 79 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCttCGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
+ Q 79 CGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 158
+ B 81 CGTtccATTAGaTaGTaGGCGGGGTAACGGCCCACCtAGtCAACGATggaTAGGGGTTCTGAGAGGAAGGTCCCCCACAT 160
+ Diffs NNN N N N N N BB NNN
+ Votes 000 0 0 0 0 0 ++ 000
+ Model BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+
+ A 159 TGGAACTGAGACACGGTCCAA 179
+ Q 159 TGGAACTGAGACACGGTCCAA 179
+ B 161 TGGAACTGAGACACGGTCCAA 181
+ Diffs
+ Votes
+ Model BBBBBBBBBBBBBBBBBBBBB
+
+ Ids. QA 76.6%, QB 77.7%, AB 93.7%, QModel 78.9%, Div. +1.5%
+ Diffs Left 7: N 0, A 6, Y 1 (14.3%); Right 35: N 1, A 30, Y 4 (11.4%), Score 0.0047
+ */
+ if (chimealns) {
+ ifstream in3;
+ m->openInputFile(alnsFileName, in3);
+
+ ofstream out3;
+ m->openOutputFile(alnsFileName+".temp", out3); out3.setf(ios::fixed, ios::floatfield); out3.setf(ios::showpoint);
+
+ name = "";
+ namesInFile.clear();
+ string line = "";
+
+ while (!in3.eof()) {
+ if (m->control_pressed) { in3.close(); out3.close(); m->mothurRemove(outputFileName); m->mothurRemove((accnosFileName)); m->mothurRemove((alnsFileName+".temp")); return 0; }
+
+ line = "";
+ line = m->getline(in3);
+ string temp = "";
+
+ if (line != "") {
+ istringstream iss(line);
+ iss >> temp;
+
+ //are you a name line
+ if ((temp == "Query") || (temp == "ParentA") || (temp == "ParentB")) {
+ int spot = 0;
+ for (int i = 0; i < line.length(); i++) {
+ spot = i;
+ if (line[i] == ')') { break; }
+ else { out3 << line[i]; }
+ }
+
+ if (spot == (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+ else if ((spot+2) > (line.length() - 1)) { m->mothurOut("[ERROR]: could not line sequence name in line " + line + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+ else {
+ out << line[spot] << line[spot+1];
+
+ name = line.substr(spot+2);
+
+ //parse name - name will either look like U68590/ab=1/ or U68590
+ string restOfName = "";
+ int pos = name.find_first_of(';');
+ if (pos != string::npos) {
+ restOfName = name.substr(pos);
+ name = name.substr(0, pos);
+ }
+
+ //find unique name
+ itUnique = uniqueNames.find(name);
+
+ if (itUnique == uniqueNames.end()) { m->mothurOut("[ERROR]: trouble parsing alns results. Cannot find "+ name + "."); m->mothurOutEndLine();m->control_pressed = true; }
+ else {
+ //only limit repeats on query names
+ if (temp == "Query") {
+ itNames = namesInFile.find((itUnique->second));
+
+ if (itNames == namesInFile.end()) {
+ out << itUnique->second << restOfName << endl;
+ namesInFile.insert((itUnique->second));
+ }
+ }else { out << itUnique->second << restOfName << endl; }
+ }
+
+ }
+
+ }else { //not need to alter line
+ out3 << line << endl;
+ }
+ }else { out3 << endl; }
+ }
+ in3.close();
+ out3.close();
+
+ m->mothurRemove(alnsFileName);
+ rename((alnsFileName+".temp").c_str(), alnsFileName.c_str());
+ }
+
+ return total;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraVsearchCommand", "deconvoluteResults");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int ChimeraVsearchCommand::readFasta(string filename, map<string, string>& seqs){
+ try {
+ //create input file for uchime
+ //read through fastafile and store info
+ ifstream in;
+ m->openInputFile(filename, in);
+
+ int num = 0;
+ while (!in.eof()) {
+
+ if (m->control_pressed) { in.close(); return 0; }
+
+ Sequence seq(in); m->gobble(in);
+ seqs[seq.getName()] = seq.getUnaligned();
+ num++;
+ }
+ in.close();
+
+ return num;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraVsearchCommand", "readFasta");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+string ChimeraVsearchCommand::getNamesFile(string& inputFile){
+ try {
+ string nameFile = "";
+
+ m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
+
+ //use unique.seqs to create new name and fastafile
+ string inputString = "fasta=" + inputFile;
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+ m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine();
+ m->mothurCalling = true;
+
+ Command* uniqueCommand = new DeconvoluteCommand(inputString);
+ uniqueCommand->execute();
+
+ map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
+
+ delete uniqueCommand;
+ m->mothurCalling = false;
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+
+ nameFile = filenames["name"][0];
+ inputFile = filenames["fasta"][0];
+
+ return nameFile;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraVsearchCommand", "getNamesFile");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int ChimeraVsearchCommand::driverGroups(string outputFName, string filename, string accnos, string alns, string countlist, int start, int end, vector<string> groups){
+ try {
+
+ int totalSeqs = 0;
+ int numChimeras = 0;
+
+
+ ofstream outCountList;
+ if (hasCount && dups) { m->openOutputFile(countlist, outCountList); }
+
+ for (int i = start; i < end; i++) {
+ int start = time(NULL); if (m->control_pressed) { outCountList.close(); m->mothurRemove(countlist); return 0; }
+
+ int error;
+ if (hasCount) { error = cparser->getSeqs(groups[i], filename, ";size=", ";", true); if ((error == 1) || m->control_pressed) { return 0; } }
+ else { error = sparser->getSeqs(groups[i], filename, ";size=", ";", true); if ((error == 1) || m->control_pressed) { return 0; } }
+
+ int numSeqs = driver((outputFName + groups[i]), filename, (accnos+groups[i]), (alns+ groups[i]), numChimeras);
+ totalSeqs += numSeqs;
+
+ if (m->control_pressed) { return 0; }
+
+ //remove file made for uchime
+ if (!m->debug) { m->mothurRemove(filename); }
+ else { m->mothurOut("[DEBUG]: saving file: " + filename + ".\n"); }
+
+ //if we provided a count file with group info and set dereplicate=t, then we want to create a *.pick.count_table
+ //This table will zero out group counts for seqs determined to be chimeric by that group.
+ if (dups) {
+ if (!m->isBlank(accnos+groups[i])) {
+ ifstream in;
+ m->openInputFile(accnos+groups[i], in);
+ string name;
+ if (hasCount) {
+ while (!in.eof()) {
+ in >> name; m->gobble(in);
+ outCountList << name << '\t' << groups[i] << endl;
+ }
+ in.close();
+ }else {
+ map<string, string> thisnamemap = sparser->getNameMap(groups[i]);
+ map<string, string>::iterator itN;
+ ofstream out;
+ m->openOutputFile(accnos+groups[i]+".temp", out);
+ while (!in.eof()) {
+ in >> name; m->gobble(in);
+ itN = thisnamemap.find(name);
+ if (itN != thisnamemap.end()) {
+ vector<string> tempNames; m->splitAtComma(itN->second, tempNames);
+ for (int j = 0; j < tempNames.size(); j++) { out << tempNames[j] << endl; }
+
+ }else { m->mothurOut("[ERROR]: parsing cannot find " + name + ".\n"); m->control_pressed = true; }
+ }
+ out.close();
+ in.close();
+ m->renameFile(accnos+groups[i]+".temp", accnos+groups[i]);
+ }
+
+ }
+ }
+
+ //append files
+ m->appendFiles((outputFName+groups[i]), outputFName); m->mothurRemove((outputFName+groups[i]));
+ m->appendFiles((accnos+groups[i]), accnos); m->mothurRemove((accnos+groups[i]));
+ if (chimealns) { m->appendFiles((alns+groups[i]), alns); m->mothurRemove((alns+groups[i])); }
+
+ m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences from group " + groups[i] + "."); m->mothurOutEndLine();
+ }
+
+ if (hasCount && dups) { outCountList.close(); }
+
+ return totalSeqs;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraVsearchCommand", "driverGroups");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int ChimeraVsearchCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
+ try {
+
+ outputFName = m->getFullPathName(outputFName);
+ string outputFNamec = m->getFullPathName(outputFName+"vsearch_out");
+ filename = m->getFullPathName(filename);
+ alns = m->getFullPathName(alns);
+
+ //to allow for spaces in the path
+ outputFName = "\"" + outputFName + "\"";
+ outputFNamec = "\"" + outputFNamec + "\"";
+ filename = "\"" + filename + "\"";
+ alns = "\"" + alns + "\"";
+
+ vector<char*> cPara;
+
+ string vsearchCommand = vsearchLocation;
+ vsearchCommand = "\"" + vsearchCommand + "\" ";
+
+ char* tempUchime;
+ tempUchime= new char[vsearchCommand.length()+1];
+ *tempUchime = '\0';
+ strncat(tempUchime, vsearchCommand.c_str(), vsearchCommand.length());
+ cPara.push_back(tempUchime);
+
+ //are you using a reference file
+ if (templatefile != "self") {
+ string outputFileName = filename.substr(1, filename.length()-2) + ".vsearch_formatted";
+ prepFile(filename.substr(1, filename.length()-2), outputFileName);
+ filename = outputFileName;
+ filename = "\"" + filename + "\"";
+ //add reference file
+ char* tempRef = new char[5];
+ //strcpy(tempRef, "--db");
+ *tempRef = '\0'; strncat(tempRef, "--db", 4);
+ cPara.push_back(tempRef);
+ char* tempR = new char[templatefile.length()+1];
+ //strcpy(tempR, templatefile.c_str());
+ *tempR = '\0'; strncat(tempR, templatefile.c_str(), templatefile.length());
+ cPara.push_back(tempR);
+
+ char* tempIn = new char[8];
+ *tempIn = '\0'; strncat(tempIn, "--uchime_ref", 12);
+ cPara.push_back(tempIn);
+ char* temp = new char[filename.length()+1];
+ *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
+ cPara.push_back(temp);
+
+ }else { //denovo
+ char* tempIn = new char[8];
+ *tempIn = '\0'; strncat(tempIn, "--uchime_denovo", 15);
+ cPara.push_back(tempIn);
+ char* temp = new char[filename.length()+1];
+ *temp = '\0'; strncat(temp, filename.c_str(), filename.length());
+ cPara.push_back(temp);
+ }
+
+
+ char* tempO = new char[11];
+ *tempO = '\0'; strncat(tempO, "--chimeras", 10);
+ cPara.push_back(tempO);
+ char* tempout = new char[outputFNamec.length()+1];
+ *tempout = '\0'; strncat(tempout, outputFNamec.c_str(), outputFNamec.length());
+ cPara.push_back(tempout);
+
+ char* tempchimeraout = new char[12];
+ *tempchimeraout = '\0'; strncat(tempchimeraout, "--uchimeout", 11);
+ cPara.push_back(tempchimeraout);
+ char* tempoutc = new char[outputFName.length()+1];
+ *tempoutc = '\0'; strncat(tempoutc, outputFName.c_str(), outputFName.length());
+ cPara.push_back(tempoutc);
+
+ char* tempxsize = new char[8];
+ *tempxsize = '\0'; strncat(tempxsize, "--xsize", 7);
+ cPara.push_back(tempxsize);
+
+ if (chimealns) {
+ char* tempA = new char[13];
+ *tempA = '\0'; strncat(tempA, "--uchimealns", 12);
+ //strcpy(tempA, "--uchimealns");
+ cPara.push_back(tempA);
+ char* tempa = new char[alns.length()+1];
+ //strcpy(tempa, alns.c_str());
+ *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
+ cPara.push_back(tempa);
+ }
+
+
+ if (useAbskew) {
+ char* tempskew = new char[9];
+ *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
+ //strcpy(tempskew, "--abskew");
+ cPara.push_back(tempskew);
+ char* tempSkew = new char[abskew.length()+1];
+ //strcpy(tempSkew, abskew.c_str());
+ *tempSkew = '\0'; strncat(tempSkew, abskew.c_str(), abskew.length());
+ cPara.push_back(tempSkew);
+ }
+
+ if (useMinH) {
+ char* tempminh = new char[7];
+ *tempminh = '\0'; strncat(tempminh, "--minh", 6);
+ //strcpy(tempminh, "--minh");
+ cPara.push_back(tempminh);
+ char* tempMinH = new char[minh.length()+1];
+ *tempMinH = '\0'; strncat(tempMinH, minh.c_str(), minh.length());
+ //strcpy(tempMinH, minh.c_str());
+ cPara.push_back(tempMinH);
+ }
+
+ if (useMindiv) {
+ char* tempmindiv = new char[9];
+ *tempmindiv = '\0'; strncat(tempmindiv, "--mindiv", 8);
+ //strcpy(tempmindiv, "--mindiv");
+ cPara.push_back(tempmindiv);
+ char* tempMindiv = new char[mindiv.length()+1];
+ *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
+ //strcpy(tempMindiv, mindiv.c_str());
+ cPara.push_back(tempMindiv);
+ }
+
+ if (useMindiffs) {
+ char* tempmindiv = new char[9];
+ *tempmindiv = '\0'; strncat(tempmindiv, "--mindiffs", 10);
+ cPara.push_back(tempmindiv);
+ char* tempMindiv = new char[mindiv.length()+1];
+ *tempMindiv = '\0'; strncat(tempMindiv, mindiv.c_str(), mindiv.length());
+ cPara.push_back(tempMindiv);
+ }
+
+ if (useXn) {
+ char* tempxn = new char[5];
+ //strcpy(tempxn, "--xn");
+ *tempxn = '\0'; strncat(tempxn, "--xn", 4);
+ cPara.push_back(tempxn);
+ char* tempXn = new char[xn.length()+1];
+ //strcpy(tempXn, xn.c_str());
+ *tempXn = '\0'; strncat(tempXn, xn.c_str(), xn.length());
+ cPara.push_back(tempXn);
+ }
+
+ if (useDn) {
+ char* tempdn = new char[5];
+ //strcpy(tempdn, "--dn");
+ *tempdn = '\0'; strncat(tempdn, "--dn", 4);
+ cPara.push_back(tempdn);
+ char* tempDn = new char[dn.length()+1];
+ *tempDn = '\0'; strncat(tempDn, dn.c_str(), dn.length());
+ //strcpy(tempDn, dn.c_str());
+ cPara.push_back(tempDn);
+ }
+
+ //--threads=1
+ char* threads = new char[12]; threads[0] = '\0'; strncat(threads, "--threads=1", 11);
+ cPara.push_back(threads);
+
+ char** vsearchParameters;
+ vsearchParameters = new char*[cPara.size()];
+ string commandString = "";
+ for (int i = 0; i < cPara.size(); i++) { vsearchParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; }
+ //int numArgs = cPara.size();
+
+ if (m->debug) { m->mothurOut("[DEBUG]: vsearch command = " + commandString + ".\n"); }
+ //cout << "commandString = " << commandString << endl;
+
+ system(commandString.c_str());
+
+ //free memory
+ for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; }
+ delete[] vsearchParameters;
+
+ //remove "" from filenames
+ outputFName = outputFName.substr(1, outputFName.length()-2);
+ outputFNamec = outputFNamec.substr(1, outputFNamec.length()-2);
+ filename = filename.substr(1, filename.length()-2);
+ alns = alns.substr(1, alns.length()-2);
+
+ if (m->control_pressed) { return 0; }
+
+ //create accnos file from uchime results
+ ifstream in;
+ m->openInputFile(outputFNamec, in, "no error");
+
+ ofstream out;
+ m->openOutputFile(accnos, out);
+
+ numChimeras = 0;
+ while(!in.eof()) {
+
+ if (m->control_pressed) { break; }
+
+ Sequence seq(in); m->gobble(in);
+
+ string name = seq.getName();
+
+ name = name.substr(0, name.length()-1); //rip off last ;
+ name = name.substr(0, name.find_last_of(';'));
+
+ out << name << endl; numChimeras++;
+ }
+ in.close();
+ out.close();
+
+ m->mothurRemove(outputFNamec);
+
+ //if (templatefile != "self") { m->mothurRemove(filename); }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraVsearchCommand", "driver");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+//uchime can't handle some of the things allowed in mothurs fasta files. This functions "cleans up" the file.
+int ChimeraVsearchCommand::prepFile(string filename, string output) {
+ try {
+
+ ifstream in;
+ m->openInputFile(filename, in);
+
+ ofstream out;
+ m->openOutputFile(output, out);
+
+ while (!in.eof()) {
+ if (m->control_pressed) { break; }
+
+ Sequence seq(in); m->gobble(in);
+
+ if (seq.getName() != "") { seq.printUnAlignedSequence(out); }
+ }
+ in.close();
+ out.close();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraVsearchCommand", "prepFile");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+
+int ChimeraVsearchCommand::createProcesses(string outputFileName, string filename, string accnos, string alns, int& numChimeras) {
+ try {
+
+ processIDS.clear();
+ int process = 1;
+ int num = 0;
+ vector<string> files;
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ //break up file into multiple files
+ m->divideFile(filename, processors, files);
+
+ if (m->control_pressed) { return 0; }
+
+ //loop through and create all the processes you want
+ while (process != processors) {
+ pid_t pid = fork();
+
+ if (pid > 0) {
+ processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
+ process++;
+ }else if (pid == 0){
+ num = driver(outputFileName + toString(m->mothurGetpid(process)) + ".temp", files[process], accnos + toString(m->mothurGetpid(process)) + ".temp", alns + toString(m->mothurGetpid(process)) + ".temp", numChimeras);
+
+ //pass numSeqs to parent
+ ofstream out;
+ string tempFile = outputFileName + toString(m->mothurGetpid(process)) + ".num.temp";
+ m->openOutputFile(tempFile, out);
+ out << num << endl;
+ out << numChimeras << endl;
+ out.close();
+
+ exit(0);
+ }else {
+ m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+ for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+ exit(0);
+ }
+ }
+
+ //do my part
+ num = driver(outputFileName, files[0], accnos, alns, numChimeras);
+
+ //force parent to wait until all the processes are done
+ for (int i=0;i<processIDS.size();i++) {
+ int temp = processIDS[i];
+ wait(&temp);
+ }
+
+ for (int i = 0; i < processIDS.size(); i++) {
+ ifstream in;
+ string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
+ m->openInputFile(tempFile, in);
+ if (!in.eof()) {
+ int tempNum = 0;
+ in >> tempNum; m->gobble(in);
+ num += tempNum;
+ in >> tempNum;
+ numChimeras += tempNum;
+ }
+ in.close(); m->mothurRemove(tempFile);
+ }
+#endif
+ //append output files
+ for(int i=0;i<processIDS.size();i++){
+ m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
+ m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
+
+ m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
+ m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
+
+ if (chimealns) {
+ m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
+ m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
+ }
+ }
+
+ //get rid of the file pieces.
+ for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
+
+ return num;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraVsearchCommand", "createProcesses");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+
+int ChimeraVsearchCommand::createProcessesGroups(string outputFName, string filename, string accnos, string alns, string newCountFile, vector<string> groups, string nameFile, string groupFile, string fastaFile) {
+ try {
+
+ processIDS.clear();
+ int process = 1;
+ int num = 0;
+
+ CountTable newCount;
+ if (hasCount && dups) { newCount.readTable(nameFile, true, false); }
+
+ //sanity check
+ if (groups.size() < processors) { processors = groups.size(); }
+
+ //divide the groups between the processors
+ vector<linePair> lines;
+ int remainingPairs = groups.size();
+ int startIndex = 0;
+ for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) {
+ int numPairs = remainingPairs; //case for last processor
+ if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); }
+ lines.push_back(linePair(startIndex, (startIndex+numPairs))); //startIndex, endIndex
+ startIndex = startIndex + numPairs;
+ remainingPairs = remainingPairs - numPairs;
+ }
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+
+ //loop through and create all the processes you want
+ while (process != processors) {
+ pid_t pid = fork();
+
+ if (pid > 0) {
+ processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
+ process++;
+ }else if (pid == 0){
+ num = driverGroups(outputFName + toString(m->mothurGetpid(process)) + ".temp", filename + toString(m->mothurGetpid(process)) + ".temp", accnos + toString(m->mothurGetpid(process)) + ".temp", alns + toString(m->mothurGetpid(process)) + ".temp", accnos + ".byCount." + toString(m->mothurGetpid(process)) + ".temp", lines[process].start, lines[process].end, groups);
+
+ //pass numSeqs to parent
+ ofstream out;
+ string tempFile = outputFName + toString(m->mothurGetpid(process)) + ".num.temp";
+ m->openOutputFile(tempFile, out);
+ out << num << endl;
+ out.close();
+
+ exit(0);
+ }else {
+ m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+ for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+ exit(0);
+ }
+
+ }
+ m->mothurOut(toString( getpid() ) + " here\n");
+
+ //do my part
+ num = driverGroups(outputFName, filename, accnos, alns, accnos + ".byCount", lines[0].start, lines[0].end, groups);
+
+ //force parent to wait until all the processes are done
+ for (int i=0;i<processIDS.size();i++) {
+ int temp = processIDS[i];
+ wait(&temp);
+ }
+
+ for (int i = 0; i < processIDS.size(); i++) {
+ ifstream in;
+ string tempFile = outputFName + toString(processIDS[i]) + ".num.temp";
+ m->openInputFile(tempFile, in);
+ if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+ in.close(); m->mothurRemove(tempFile);
+ }
+
+#endif
+
+ //read my own
+ if (hasCount && dups) {
+ if (!m->isBlank(accnos + ".byCount")) {
+ ifstream in2;
+ m->openInputFile(accnos + ".byCount", in2);
+
+ string name, group;
+ while (!in2.eof()) {
+ in2 >> name >> group; m->gobble(in2);
+ newCount.setAbund(name, group, 0);
+ }
+ in2.close();
+ }
+ m->mothurRemove(accnos + ".byCount");
+ }
+
+ //append output files
+ for(int i=0;i<processIDS.size();i++){
+ m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
+ m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));
+
+ m->appendFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
+ m->mothurRemove((accnos + toString(processIDS[i]) + ".temp"));
+
+ if (chimealns) {
+ m->appendFiles((alns + toString(processIDS[i]) + ".temp"), alns);
+ m->mothurRemove((alns + toString(processIDS[i]) + ".temp"));
+ }
+
+ if (hasCount && dups) {
+ if (!m->isBlank(accnos + ".byCount." + toString(processIDS[i]) + ".temp")) {
+ ifstream in2;
+ m->openInputFile(accnos + ".byCount." + toString(processIDS[i]) + ".temp", in2);
+
+ string name, group;
+ while (!in2.eof()) {
+ in2 >> name >> group; m->gobble(in2);
+ newCount.setAbund(name, group, 0);
+ }
+ in2.close();
+ }
+ m->mothurRemove(accnos + ".byCount." + toString(processIDS[i]) + ".temp");
+ }
+
+ }
+
+ //print new *.pick.count_table
+ if (hasCount && dups) { newCount.printTable(newCountFile); }
+
+ return num;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraVsearchCommand", "createProcessesGroups");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+
diff --git a/source/commands/chimeravsearchcommand.h b/source/commands/chimeravsearchcommand.h
new file mode 100644
index 0000000..ae1713f
--- /dev/null
+++ b/source/commands/chimeravsearchcommand.h
@@ -0,0 +1,66 @@
+//
+// chimeravsearchcommand.h
+// Mothur
+//
+// Created by Sarah Westcott on 6/16/16.
+// Copyright (c) 2016 Schloss Lab. All rights reserved.
+//
+
+#ifndef __Mothur__chimeravsearchcommand__
+#define __Mothur__chimeravsearchcommand__
+
+#include "command.hpp"
+#include "sequenceparser.h"
+#include "counttable.h"
+#include "sequencecountparser.h"
+
+/***********************************************************/
+
+class ChimeraVsearchCommand : public Command {
+public:
+ ChimeraVsearchCommand(string);
+ ChimeraVsearchCommand();
+ ~ChimeraVsearchCommand() {}
+
+ vector<string> setParameters();
+ string getCommandName() { return "chimera.vsearch"; }
+ string getCommandCategory() { return "Sequence Processing"; }
+
+ string getHelpString();
+ string getOutputPattern(string);
+ string getCitation() { return "vsearch by https://github.com/torognes/vsearch.\nhttp://www.mothur.org/wiki/Chimera.vsearch\n"; }
+ string getDescription() { return "detect chimeric sequences"; }
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+private:
+ vector<int> processIDS; //processid
+ int driver(string, string, string, string, int&);
+ int createProcesses(string, string, string, string, int&);
+
+ bool abort, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, ucl, useMindiffs, hasCount, hasName, dups;
+ string fastafile, groupfile, templatefile, outputDir, namefile, countfile, abskew, minh, mindiv, xn, dn, mindiffs, vsearchLocation;
+ int processors;
+
+ SequenceParser* sparser;
+ SequenceCountParser* cparser;
+ vector<string> outputNames;
+ vector<string> fastaFileNames;
+ vector<string> nameFileNames;
+ vector<string> groupFileNames;
+
+ string getNamesFile(string&);
+ int readFasta(string, map<string, string>&);
+ int deconvoluteResults(map<string, string>&, string, string, string);
+ int driverGroups(string, string, string, string, string, int, int, vector<string>);
+ int createProcessesGroups(string, string, string, string, string, vector<string>, string, string, string);
+ int prepFile(string filename, string);
+
+
+};
+
+/***********************************************************/
+
+#endif
+
diff --git a/source/commands/classifyotucommand.cpp b/source/commands/classifyotucommand.cpp
index cd08e3b..0c8e231 100644
--- a/source/commands/classifyotucommand.cpp
+++ b/source/commands/classifyotucommand.cpp
@@ -17,7 +17,6 @@ vector<string> ClassifyOtuCommand::setParameters(){
try {
CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(plist);
CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none","constaxonomy",false,true,true); parameters.push_back(ptaxonomy);
- CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(preftaxonomy);
CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
CommandParameter poutput("output", "Multiple", "plain-detail", "detail", "", "", "","",false,false, true); parameters.push_back(poutput);
@@ -47,8 +46,7 @@ vector<string> ClassifyOtuCommand::setParameters(){
string ClassifyOtuCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The classify.otu command parameters are list, taxonomy, reftaxonomy, name, group, count, persample, cutoff, label, basis, relabund and probs. The taxonomy and list parameters are required unless you have a valid current file.\n";
- helpString += "The reftaxonomy parameter allows you give the name of the reference taxonomy file used when you classified your sequences. Providing it will keep the rankIDs in the summary file static.\n";
+ helpString += "The classify.otu command parameters are list, taxonomy, name, group, count, persample, cutoff, label, basis, relabund and probs. The taxonomy and list parameters are required unless you have a valid current file.\n";
helpString += "The name parameter allows you add a names file with your taxonomy file.\n";
helpString += "The group parameter allows you provide a group file to use in creating the summary file breakdown.\n";
helpString += "The count parameter allows you add a count file associated with your list file. When using the count parameter mothur assumes your list file contains only uniques.\n";
@@ -166,14 +164,6 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option) {
if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
}
- it = parameters.find("reftaxonomy");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["reftaxonomy"] = inputDir + it->second; }
- }
-
it = parameters.find("group");
//user has given a template file
if(it != parameters.end()){
@@ -214,10 +204,6 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option) {
}
else if (taxfile == "not open") { abort = true; }
else { m->setTaxonomyFile(taxfile); }
-
- refTaxonomy = validParameter.validFile(parameters, "reftaxonomy", true);
- if (refTaxonomy == "not found") { refTaxonomy = ""; m->mothurOut("reftaxonomy is not required, but if given will keep the rankIDs in the summary file static."); m->mothurOutEndLine(); }
- else if (refTaxonomy == "not open") { abort = true; }
namefile = validParameter.validFile(parameters, "name", true);
if (namefile == "not open") { namefile = ""; abort = true; }
@@ -415,7 +401,7 @@ int ClassifyOtuCommand::execute(){
}
}
//**********************************************************************************************************************
-vector<string> ClassifyOtuCommand::findConsensusTaxonomy(vector<string> names, int& size, string& conTax) {
+vector<string> ClassifyOtuCommand::findConsensusTaxonomy(vector<string> names, int& size, string& conTax, string group) {
try{
conTax = "";
vector<string> allNames;
@@ -427,53 +413,73 @@ vector<string> ClassifyOtuCommand::findConsensusTaxonomy(vector<string> names, i
size = 0;
for (int i = 0; i < names.size(); i++) {
-
- //if namesfile include the names
- if (namefile != "") {
-
- //is this sequence in the name file - namemap maps seqName -> repSeqName
- it2 = nameMap.find(names[i]);
-
- if (it2 == nameMap.end()) { //this name is not in name file, skip it
- m->mothurOut(names[i] + " is not in your name file. I will not include it in the consensus."); m->mothurOutEndLine();
- }else{
-
- //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
- it = taxMap.find(it2->second);
-
- if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
-
- if (names[i] != it2->second) { m->mothurOut(names[i] + " is represented by " + it2->second + " and is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine(); }
- else { m->mothurOut(names[i] + " is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine(); }
- }else{
-
- //add seq to tree
- phylo->addSeqToTree(names[i], it->second);
- size++;
- allNames.push_back(names[i]);
- }
- }
-
- }else{
- //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
- it = taxMap.find(names[i]);
-
- if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
- m->mothurOut("[WARNING]: " + names[i] + " is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine();
- }else{
+
+ if (group != "") { //no need to check for name file, names already added in previous step
+ //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
+ it = taxMap.find(names[i]);
+
+ if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
+ m->mothurOut("[WARNING]: " + names[i] + " is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine();
+ }else{
if (countfile != "") {
- int numDups = ct->getNumSeqs(names[i]);
+ int numDups = ct->getGroupCount(names[i], group);
for (int j = 0; j < numDups; j++) { phylo->addSeqToTree(names[i], it->second); }
size += numDups;
}else{
- //add seq to tree
+ //add seq to tree
phylo->addSeqToTree(names[i], it->second);
- size++;
+ size++;
}
allNames.push_back(names[i]);
- }
- }
+ }
+ }else {
+ //if namesfile include the names
+ if (namefile != "") {
+
+ //is this sequence in the name file - namemap maps seqName -> repSeqName
+ it2 = nameMap.find(names[i]);
+
+ if (it2 == nameMap.end()) { //this name is not in name file, skip it
+ m->mothurOut(names[i] + " is not in your name file. I will not include it in the consensus."); m->mothurOutEndLine();
+ }else{
+
+ //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
+ it = taxMap.find(it2->second);
+
+ if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
+
+ if (names[i] != it2->second) { m->mothurOut(names[i] + " is represented by " + it2->second + " and is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine(); }
+ else { m->mothurOut(names[i] + " is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine(); }
+ }else{
+
+ //add seq to tree
+ phylo->addSeqToTree(names[i], it->second);
+ size++;
+ allNames.push_back(names[i]);
+ }
+ }
+
+ }else{
+ //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
+ it = taxMap.find(names[i]);
+
+ if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
+ m->mothurOut("[WARNING]: " + names[i] + " is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine();
+ }else{
+ if (countfile != "") {
+ int numDups = ct->getNumSeqs(names[i]);
+ for (int j = 0; j < numDups; j++) { phylo->addSeqToTree(names[i], it->second); }
+ size += numDups;
+ }else{
+ //add seq to tree
+ phylo->addSeqToTree(names[i], it->second);
+ size++;
+ }
+ allNames.push_back(names[i]);
+ }
+ }
+ }
if (m->control_pressed) { delete phylo; return allNames; }
@@ -508,7 +514,7 @@ vector<string> ClassifyOtuCommand::findConsensusTaxonomy(vector<string> names, i
//is this taxonomy above cutoff
int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
-
+
if (consensusConfidence >= cutoff) { //if yes, add it
if (probs) {
conTax += bestChild.name + "(" + toString(consensusConfidence) + ");";
@@ -523,10 +529,10 @@ vector<string> ClassifyOtuCommand::findConsensusTaxonomy(vector<string> names, i
//move down a level
currentNode = bestChild;
}
-
- if (myLevel != phylo->getMaxLevel()) { conTax = m->addUnclassifieds(conTax, phylo->getMaxLevel(), probs); }
- if (conTax == "") { conTax = "no_consensus;"; }
+ if (conTax == "") { conTax = "unknown;"; }
+
+ if (myLevel != phylo->getMaxLevel()) { conTax = m->addUnclassifieds(conTax, phylo->getMaxLevel(), probs); }
delete phylo;
@@ -562,13 +568,8 @@ int ClassifyOtuCommand::process(ListVector* processList) {
out << "OTU\tSize\tTaxonomy" << endl;
PhyloSummary* taxaSum;
- if (countfile != "") {
- if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, ct,relabund, printlevel); }
- else { taxaSum = new PhyloSummary(ct,relabund, printlevel); }
- }else {
- if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, groupMap, relabund, printlevel); }
- else { taxaSum = new PhyloSummary(groupMap,relabund, printlevel); }
- }
+ if (countfile != "") { taxaSum = new PhyloSummary(ct,relabund, printlevel); }
+ else { taxaSum = new PhyloSummary(groupMap,relabund, printlevel); }
vector<string> outs;
vector<PhyloSummary*> taxaSums;
@@ -585,13 +586,8 @@ int ClassifyOtuCommand::process(ListVector* processList) {
outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile);
PhyloSummary* taxaSumt;
- if (countfile != "") {
- if (refTaxonomy != "") { taxaSumt = new PhyloSummary(refTaxonomy, ct, relabund, printlevel); }
- else { taxaSumt = new PhyloSummary(ct, relabund, printlevel); }
- }else {
- if (refTaxonomy != "") { taxaSumt = new PhyloSummary(refTaxonomy, groupMap,relabund, printlevel); }
- else { taxaSumt = new PhyloSummary(groupMap,relabund, printlevel); }
- }
+ if (countfile != "") { taxaSumt = new PhyloSummary(ct, relabund, printlevel);
+ }else { taxaSumt = new PhyloSummary(groupMap,relabund, printlevel); }
taxaSums.push_back(taxaSumt);
}
}
@@ -608,7 +604,7 @@ int ClassifyOtuCommand::process(ListVector* processList) {
vector<string> thisNames;
m->splitAtComma(binnames, thisNames);
- names = findConsensusTaxonomy(thisNames, size, conTax);
+ names = findConsensusTaxonomy(thisNames, size, conTax, "");
if (m->control_pressed) { break; }
@@ -683,7 +679,7 @@ int ClassifyOtuCommand::process(ListVector* processList) {
}
for (itParsed = parsedNames.begin(); itParsed != parsedNames.end(); itParsed++) {
- vector<string> theseNames = findConsensusTaxonomy(itParsed->second, size, conTax);
+ vector<string> theseNames = findConsensusTaxonomy(itParsed->second, size, conTax, itParsed->first);
if (m->control_pressed) { break; }
@@ -719,6 +715,7 @@ int ClassifyOtuCommand::process(ListVector* processList) {
if (persample) {
for (int i = 0; i < groups.size(); i++) {
ofstream outSums;
+ variables["[distance]"] = processList->getLabel() + "." + groups[i];
string outputSumFile = getOutputFileName("taxsummary", variables);
m->openOutputFile(outputSumFile, outSums);
outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile);
diff --git a/source/commands/classifyotucommand.h b/source/commands/classifyotucommand.h
index 5668a8a..ae108ae 100644
--- a/source/commands/classifyotucommand.h
+++ b/source/commands/classifyotucommand.h
@@ -40,7 +40,7 @@ private:
CountTable* ct;
ListVector* list;
InputData* input;
- string listfile, namefile, taxfile, label, outputDir, refTaxonomy, groupfile, basis, countfile, output;
+ string listfile, namefile, taxfile, label, outputDir, groupfile, basis, countfile, output;
bool abort, allLines, probs, persample, relabund;
int cutoff, threshold, printlevel;
set<string> labels; //holds labels to be used
@@ -50,7 +50,7 @@ private:
int process(ListVector*);
int processTaxMap();
- vector<string> findConsensusTaxonomy(vector<string>, int&, string&); // returns the name of the "representative" taxonomy of given bin
+ vector<string> findConsensusTaxonomy(vector<string>, int&, string&, string); // returns the name of the "representative" taxonomy of given bin
};
diff --git a/source/commands/classifyrfsharedcommand.cpp b/source/commands/classifyrfsharedcommand.cpp
index 49881f8..2258e3a 100644
--- a/source/commands/classifyrfsharedcommand.cpp
+++ b/source/commands/classifyrfsharedcommand.cpp
@@ -10,6 +10,7 @@
#include "randomforest.hpp"
#include "decisiontree.hpp"
#include "rftreenode.hpp"
+#include "sharedutilities.h"
//**********************************************************************************************************************
vector<string> ClassifyRFSharedCommand::setParameters(){
@@ -20,7 +21,7 @@ vector<string> ClassifyRFSharedCommand::setParameters(){
CommandParameter potupersplit("otupersplit", "Multiple", "log2-squareroot", "log2", "", "", "","",false,false); parameters.push_back(potupersplit);
CommandParameter psplitcriteria("splitcriteria", "Multiple", "gainratio-infogain", "gainratio", "", "", "","",false,false); parameters.push_back(psplitcriteria);
CommandParameter pnumtrees("numtrees", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pnumtrees);
-
+ //CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets);
// parameters related to pruning
CommandParameter pdopruning("prune", "Boolean", "", "T", "", "", "", "", false, false); parameters.push_back(pdopruning);
CommandParameter ppruneaggrns("pruneaggressiveness", "Number", "", "0.9", "", "", "", "", false, false); parameters.push_back(ppruneaggrns);
@@ -51,6 +52,7 @@ string ClassifyRFSharedCommand::getHelpString(){
helpString += "The classify.rf command allows you to ....\n";
helpString += "The classify.rf command parameters are: shared, design, label, groups, otupersplit.\n";
helpString += "The label parameter is used to analyze specific labels in your input.\n";
+ //helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
helpString += "The groups parameter allows you to specify which of the groups in your designfile you would like analyzed.\n";
helpString += "The classify.rf should be in the following format: \n";
helpString += "classify.rf(shared=yourSharedFile, design=yourDesignFile)\n";
@@ -216,6 +218,12 @@ ClassifyRFSharedCommand::ClassifyRFSharedCommand(string option) {
if (groups == "not found") { groups = ""; }
else { m->splitAtDash(groups, Groups); }
m->setGroups(Groups);
+
+ //sets = validParameter.validFile(parameters, "sets", false);
+ //if (sets == "not found") { sets = ""; }
+ //else {
+ // m->splitAtDash(sets, Sets);
+ //}
//Commonly used to process list, rabund, sabund, shared and relabund files. Look at "smart distancing" examples below in the execute function.
string label = validParameter.validFile(parameters, "label", false);
@@ -238,12 +246,32 @@ int ClassifyRFSharedCommand::execute() {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- InputData input(sharedfile, "sharedfile");
- vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
-
- //read design file
- designMap.read(designfile);
-
+
+ //read design file
+ designMap.read(designfile);
+
+ /*if (Sets.size() != 0) { //user has picked sets find groups to include from lookup
+ //make sure sets are all in designMap
+ SharedUtil* util = new SharedUtil();
+ vector<string> dGroups = designMap.getCategory();
+ util->setGroups(Sets, dGroups);
+
+ vector<string> groupsToSelect = designMap.getNamesGroups(Sets);
+
+ if (Groups.size() != 0) {
+ //make sure all user selected groups are in the sets asked for
+ util->setGroups(Groups, groupsToSelect);
+ m->setGroups(Groups);
+ }else {
+ m->setGroups(groupsToSelect);
+ }
+ delete util;
+ }*/
+
+ InputData input(sharedfile, "sharedfile");
+ vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+
+
string lastLabel = lookup[0]->getLabel();
set<string> processedLabels;
set<string> userLabels = labels;
@@ -346,9 +374,9 @@ void ClassifyRFSharedCommand::processSharedAndDesignData(vector<SharedRAbundVect
map<string, int> treatmentToIntMap;
map<int, string> intToTreatmentMap;
- vector<string> groups = designMap.getCategory();
- for (int i = 0; i < groups.size(); i++) {
- string treatmentName = groups[i];
+ //vector<string> groups = designMap.getCategory();
+ for (int i = 0; i < lookup.size(); i++) {
+ string treatmentName = designMap.get(lookup[i]->getGroup());
treatmentToIntMap[treatmentName] = i;
intToTreatmentMap[i] = treatmentName;
}
@@ -379,9 +407,12 @@ void ClassifyRFSharedCommand::processSharedAndDesignData(vector<SharedRAbundVect
RandomForest randomForest(dataSet, numDecisionTrees, treeSplitCriterion, doPruning, pruneAggressiveness, discardHighErrorTrees, highErrorTreeDiscardThreshold, optimumFeatureSubsetSelectionCriteria, featureStandardDeviationThreshold);
randomForest.populateDecisionTrees();
+
randomForest.calcForrestErrorRate();
+
randomForest.printConfusionMatrix(intToTreatmentMap);
+
map<string, string> variables;
variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "RF.";
variables["[distance]"] = lookup[0]->getLabel();
diff --git a/source/commands/classifyrfsharedcommand.h b/source/commands/classifyrfsharedcommand.h
index 29611cd..2fbc8f0 100755
--- a/source/commands/classifyrfsharedcommand.h
+++ b/source/commands/classifyrfsharedcommand.h
@@ -33,9 +33,9 @@ public:
private:
bool abort;
string outputDir;
- vector<string> outputNames, Groups;
+ vector<string> outputNames, Groups, Sets;
- string sharedfile, designfile;
+ string sharedfile, designfile, sets;
set<string> labels;
bool allLines;
diff --git a/source/commands/classifyseqscommand.cpp b/source/commands/classifyseqscommand.cpp
index 7ec7582..d5b3cb6 100644
--- a/source/commands/classifyseqscommand.cpp
+++ b/source/commands/classifyseqscommand.cpp
@@ -33,7 +33,6 @@ vector<string> ClassifySeqsCommand::setParameters(){
CommandParameter pcutoff("cutoff", "Number", "", "80", "", "", "","",false,true); parameters.push_back(pcutoff);
CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pprobs);
CommandParameter piters("iters", "Number", "", "100", "", "", "","",false,true); parameters.push_back(piters);
- CommandParameter psave("save", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psave);
CommandParameter pshortcuts("shortcuts", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pshortcuts);
CommandParameter prelabund("relabund", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prelabund);
CommandParameter pnumwanted("numwanted", "Number", "", "10", "", "", "","",false,true); parameters.push_back(pnumwanted);
@@ -64,7 +63,6 @@ string ClassifySeqsCommand::getHelpString(){
helpString += "The method parameter allows you to specify classification method to use. Your options are: wang, knn and zap. The default is wang.\n";
helpString += "The ksize parameter allows you to specify the kmer size for finding most similar template to candidate. The default is 8.\n";
helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
- helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
helpString += "The mistmatch parameter allows you to specify the penalty for having different bases. The default is -1.0.\n";
helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
@@ -127,7 +125,7 @@ ClassifySeqsCommand::ClassifySeqsCommand(){
ClassifySeqsCommand::ClassifySeqsCommand(string option) {
try {
abort = false; calledHelp = false;
- rdb = ReferenceDB::getInstance(); hasName = false; hasCount=false;
+ hasName = false; hasCount=false;
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
@@ -493,41 +491,18 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option) {
temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
m->setProcessors(temp);
m->mothurConvert(temp, processors);
-
- temp = validParameter.validFile(parameters, "save", false); if (temp == "not found"){ temp = "f"; }
- save = m->isTrue(temp);
- rdb->save = save;
- if (save) { //clear out old references
- rdb->clearMemory();
- }
-
+
//this has to go after save so that if the user sets save=t and provides no reference we abort
templateFileName = validParameter.validFile(parameters, "reference", true);
- if (templateFileName == "not found") {
- //check for saved reference sequences
- if (rdb->referenceSeqs.size() != 0) {
- templateFileName = "saved";
- }else {
- m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required for the classify.seqs command.");
- m->mothurOutEndLine();
- abort = true;
- }
- }else if (templateFileName == "not open") { abort = true; }
- else { if (save) { rdb->setSavedReference(templateFileName); } }
+ if (templateFileName == "not found") {
+ m->mothurOut("[ERROR]: The reference parameter is a required for the classify.seqs command.\n"); abort = true;
+ }else if (templateFileName == "not open") { abort = true; }
+
//this has to go after save so that if the user sets save=t and provides no reference we abort
taxonomyFileName = validParameter.validFile(parameters, "taxonomy", true);
- if (taxonomyFileName == "not found") {
- //check for saved reference sequences
- if (rdb->wordGenusProb.size() != 0) {
- taxonomyFileName = "saved";
- }else {
- m->mothurOut("[ERROR]: You don't have any saved taxonomy information and the taxonomy parameter is a required for the classify.seqs command.");
- m->mothurOutEndLine();
- abort = true;
- }
- }else if (taxonomyFileName == "not open") { abort = true; }
- else { if (save) { rdb->setSavedTaxonomy(taxonomyFileName); } }
+ if (taxonomyFileName == "not found") { m->mothurOut("[ERROR]: The taxonomy parameter is a required for the classify.seqs command.\n"); abort = true;
+ }else if (taxonomyFileName == "not open") { abort = true; }
search = validParameter.validFile(parameters, "search", false); if (search == "not found"){ search = "kmer"; }
@@ -640,7 +615,6 @@ int ClassifySeqsCommand::execute(){
m->mothurOut("Classifying sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
string baseTName = m->getSimpleName(taxonomyFileName);
- if (taxonomyFileName == "saved") { baseTName = rdb->getSavedTaxonomy(); }
//set rippedTaxName to
string RippedTaxName = "";
diff --git a/source/commands/classifyseqscommand.h b/source/commands/classifyseqscommand.h
index c390941..57e9937 100644
--- a/source/commands/classifyseqscommand.h
+++ b/source/commands/classifyseqscommand.h
@@ -13,7 +13,6 @@
#include "command.hpp"
#include "classify.h"
-#include "referencedb.h"
#include "sequence.hpp"
#include "bayesian.h"
#include "phylotree.h"
@@ -67,7 +66,6 @@ private:
map<string, vector<string> >::iterator itNames;
Classify* classify;
- ReferenceDB* rdb;
string fastaFileName, templateFileName, countfile, distanceFileName, namefile, search, method, taxonomyFileName, outputDir, groupfile, output;
int processors, kmerSize, numWanted, cutoff, iters, printlevel;
diff --git a/source/commands/clearmemorycommand.cpp b/source/commands/clearmemorycommand.cpp
deleted file mode 100644
index abdc555..0000000
--- a/source/commands/clearmemorycommand.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * clearmemorycommand.cpp
- * Mothur
- *
- * Created by westcott on 7/6/11.
- * Copyright 2011 Schloss Lab. All rights reserved.
- *
- */
-
-#include "clearmemorycommand.h"
-#include "referencedb.h"
-
-//**********************************************************************************************************************
-vector<string> ClearMemoryCommand::setParameters(){
- try {
- vector<string> myArray;
- return myArray;
- }
- catch(exception& e) {
- m->errorOut(e, "ClearMemoryCommand", "setParameters");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-string ClearMemoryCommand::getHelpString(){
- try {
- string helpString = "";
- helpString += "The clear.memory command removes saved reference data from memory.\n";
- helpString += "The clear.memory command should be in the following format: clear.memory().\n";
- return helpString;
- }
- catch(exception& e) {
- m->errorOut(e, "ClearMemoryCommand", "getHelpString");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-
-ClearMemoryCommand::ClearMemoryCommand(string option) {
- try {
- abort = false; calledHelp = false;
-
- //allow user to run help
- if(option == "help") { help(); abort = true; calledHelp = true; }
- else if(option == "citation") { citation(); abort = true; calledHelp = true;}
- }
- catch(exception& e) {
- m->errorOut(e, "ClearMemoryCommand", "ClearMemoryCommand");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
-
-int ClearMemoryCommand::execute(){
- try {
-
- if (abort == true) { if (calledHelp) { return 0; } return 2; }
-
- ReferenceDB* rdb = ReferenceDB::getInstance();
- rdb->clearMemory();
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "ClearMemoryCommand", "execute");
- exit(1);
- }
-}
-//**********************************************************************************************************************/
diff --git a/source/commands/clearmemorycommand.h b/source/commands/clearmemorycommand.h
deleted file mode 100644
index 765f41d..0000000
--- a/source/commands/clearmemorycommand.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef CLEARMEMORYCOMMAND_H
-#define CLEARMEMORYCOMMAND_H
-
-/*
- * clearmemorycommand.h
- * Mothur
- *
- * Created by westcott on 7/6/11.
- * Copyright 2011 Schloss Lab. All rights reserved.
- *
- */
-
-#include "command.hpp"
-
-class ClearMemoryCommand : public Command {
-public:
- ClearMemoryCommand(string);
- ClearMemoryCommand(){ abort = true; calledHelp = true; }
- ~ClearMemoryCommand(){}
-
- vector<string> setParameters();
- string getCommandName() { return "clear.memory"; }
- string getCommandCategory() { return "General"; }
-
- string getHelpString();
- string getOutputPattern(string) { return ""; }
- string getCitation() { return "http://www.mothur.org/wiki/Clear.memory"; }
- string getDescription() { return "remove saved references from memory"; }
-
-
- int execute();
- void help() { m->mothurOut(getHelpString()); }
-
-
-private:
- bool abort;
- vector<string> outputNames;
-};
-
-#endif
-
diff --git a/source/commands/clustercommand.cpp b/source/commands/clustercommand.cpp
index d562e81..1c0de93 100644
--- a/source/commands/clustercommand.cpp
+++ b/source/commands/clustercommand.cpp
@@ -257,13 +257,6 @@ ClusterCommand::ClusterCommand(string option) {
temp = validParameter.validFile(parameters, "sim", false); if (temp == "not found") { temp = "F"; }
sim = m->isTrue(temp);
- //bool cutoffSet = false;
- temp = validParameter.validFile(parameters, "cutoff", false);
- if (temp == "not found") { temp = "10"; }
- //else { cutoffSet = true; }
- m->mothurConvert(temp, cutoff);
- cutoff += (5 / (precision * 10.0));
-
//temp = validParameter.validFile(parameters, "adjust", false); if (temp == "not found") { temp = "F"; }
//if (m->isNumeric1(temp)) { m->mothurConvert(temp, adjust); }
//else if (m->isTrue(temp)) { adjust = 1.0; }
@@ -283,6 +276,14 @@ ClusterCommand::ClusterCommand(string option) {
#else
if ((method == "agc") || (method == "dgc")) { m->mothurOut("[ERROR]: The agc and dgc clustering methods are not available for Windows, aborting\n."); abort = true; }
#endif
+
+ //bool cutoffSet = false;
+ temp = validParameter.validFile(parameters, "cutoff", false);
+ if (temp == "not found") { temp = "10"; }
+ //else { cutoffSet = true; }
+ m->mothurConvert(temp, cutoff);
+ if ((method != "agc") && (method != "dgc")) { cutoff += (5 / (precision * 10.0)); }
+
showabund = validParameter.validFile(parameters, "showabund", false);
if (showabund == "not found") { showabund = "T"; }
@@ -563,6 +564,8 @@ int ClusterCommand::vsearchDriver(string inputFile, string ucClusteredFile, stri
for (int i = 0; i < vsearchParameters.size(); i++) { commandString += toString(vsearchParameters[i]) + " "; }
//cout << "commandString = " << commandString << endl;
+ //exit(1);
+
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
#else
commandString = "\"" + commandString + "\"";
diff --git a/source/commands/clustersplitcommand.cpp b/source/commands/clustersplitcommand.cpp
index 0deb25d..41f61b7 100644
--- a/source/commands/clustersplitcommand.cpp
+++ b/source/commands/clustersplitcommand.cpp
@@ -346,11 +346,6 @@ ClusterSplitCommand::ClusterSplitCommand(string option) {
//not using file option and don't have fasta method with classic
if (((splitmethod != "fasta") && classic) && (file == "")) { m->mothurOut("[ERROR]: splitmethod must be fasta to use cluster.classic, or you must use the file option.\n"); abort=true; }
-
- cutoffNotSet = false;
- temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { cutoffNotSet = true; temp = "0.25"; }
- m->mothurConvert(temp, cutoff);
- cutoff += (5 / (precision * 10.0));
temp = validParameter.validFile(parameters, "taxlevel", false); if (temp == "not found") { temp = "3"; }
m->mothurConvert(temp, taxLevelCutoff);
@@ -369,6 +364,11 @@ ClusterSplitCommand::ClusterSplitCommand(string option) {
if ((method == "agc") || (method == "dgc")) { m->mothurOut("[ERROR]: The agc and dgc clustering methods are not available for Windows, aborting\n."); abort = true; }
#endif
+ cutoffNotSet = false;
+ temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { cutoffNotSet = true; temp = "0.25"; }
+ m->mothurConvert(temp, cutoff);
+ if ((method != "agc") && (method != "dgc")) { cutoff += (5 / (precision * 10.0)); }
+
if ((splitmethod == "distance") || (splitmethod == "classify") || (splitmethod == "fasta")) { }
else { m->mothurOut("[ERROR]: " + splitmethod + " is not a valid splitting method. Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); abort = true; }
@@ -592,7 +592,7 @@ map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames,
map<float, int> labelBin;
vector<float> orderFloat;
int numSingleBins;
-
+
//read in singletons
if (singleton != "none") {
@@ -612,7 +612,7 @@ map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames,
in.close();
m->mothurRemove(singleton);
-
+
numSingleBins = listSingle->getNumBins();
}else{ listSingle = NULL; numSingleBins = 0; }
@@ -652,14 +652,14 @@ map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames,
string filledInList = listNames[k] + "filledInTemp";
ofstream outFilled;
m->openOutputFile(filledInList, outFilled);
-
+
//for each label needed
for(int l = 0; l < orderFloat.size(); l++){
string thisLabel;
if (orderFloat[l] == -1) { thisLabel = "unique"; }
else { thisLabel = toString(orderFloat[l], length-1); }
-
+
//this file has reached the end
if (list == NULL) {
list = input->getListVector(lastLabel, true);
@@ -668,7 +668,7 @@ map<float, int> ClusterSplitCommand::completeListFile(vector<string> listNames,
float labelFloat;
if (list->getLabel() == "unique") { labelFloat = -1.0; }
else { convert(list->getLabel(), labelFloat); }
-
+
//check for missing labels
if (labelFloat > orderFloat[l]) { //you are missing the label, get the next smallest one
//if its bigger get last label, otherwise keep it
@@ -1454,6 +1454,10 @@ int ClusterSplitCommand::vsearchDriver(string inputFile, string ucClusteredFile,
char* wordlength = new char[15]; wordlength[0] = '\0'; strncat(wordlength, "--wordlength=8", 14);
vsearchParameters.push_back(wordlength);
+ //--threads=1
+ char* threads = new char[12]; threads[0] = '\0'; strncat(threads, "--threads=1", 11);
+ vsearchParameters.push_back(threads);
+
//--uc=$ROOT.clustered.uc
string tempIn = "--uc=" + ucClusteredFile;
char* uc = new char[tempIn.length()+1]; uc[0] = '\0'; strncat(uc, tempIn.c_str(), tempIn.length());
diff --git a/source/commands/countseqscommand.cpp b/source/commands/countseqscommand.cpp
index 79278e3..12f9374 100644
--- a/source/commands/countseqscommand.cpp
+++ b/source/commands/countseqscommand.cpp
@@ -19,7 +19,6 @@ vector<string> CountSeqsCommand::setParameters(){
CommandParameter pname("name", "InputTypes", "", "", "NameSHared", "NameSHared", "none","count",false,false,true); parameters.push_back(pname);
CommandParameter pgroup("group", "InputTypes", "", "", "sharedGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
- CommandParameter plarge("large", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(plarge);
CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
@@ -40,7 +39,6 @@ string CountSeqsCommand::getHelpString(){
string helpString = "";
helpString += "The count.seqs aka. make.table command reads a name or shared file and outputs a .count_table file. You may also provide a group with the names file to get the counts broken down by group.\n";
helpString += "The groups parameter allows you to indicate which groups you want to include in the counts, by default all groups in your groupfile are used.\n";
- helpString += "The large parameter indicates the name and group files are too large to fit in RAM.\n";
helpString += "When you use the groups parameter and a sequence does not represent any sequences from the groups you specify it is not included in the .count.summary file.\n";
helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
helpString += "The count.seqs command should be in the following format: count.seqs(name=yourNameFile).\n";
@@ -172,10 +170,7 @@ CountSeqsCommand::CountSeqsCommand(string option) {
m->splitAtDash(groups, Groups);
m->setGroups(Groups);
- string temp = validParameter.validFile(parameters, "large", false); if (temp == "not found") { temp = "F"; }
- large = m->isTrue(temp);
-
- temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
+ string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
m->setProcessors(temp);
m->mothurConvert(temp, processors);
@@ -211,8 +206,7 @@ int CountSeqsCommand::execute(){
variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(namefile));
string outputFileName = getOutputFileName("count", variables);
- if (!large) { total = processSmall(outputFileName); }
- else { total = processLarge(outputFileName); }
+ total = process(outputFileName);
if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
@@ -359,7 +353,7 @@ unsigned long long CountSeqsCommand::processShared(vector<SharedRAbundVector*>&
}
//**********************************************************************************************************************
-unsigned long long CountSeqsCommand::processSmall(string outputFileName){
+unsigned long long CountSeqsCommand::process(string outputFileName){
try {
ofstream out;
m->openOutputFile(outputFileName, out); outputTypes["count"].push_back(outputFileName);
@@ -604,7 +598,6 @@ unsigned long long CountSeqsCommand::driver(unsigned long long start, unsigned l
//adjust start if null strings
if (start == 0) { m->zapGremlins(in); m->gobble(in); }
-
bool done = false;
unsigned long long total = 0;
while (!done) {
@@ -674,124 +667,6 @@ unsigned long long CountSeqsCommand::driver(unsigned long long start, unsigned l
exit(1);
}
}
-//**********************************************************************************************************************
-
-unsigned long long CountSeqsCommand::processLarge(string outputFileName){
- try {
- set<string> namesOfGroups;
- map<string, int> initial;
- for (set<string>::iterator it = namesOfGroups.begin(); it != namesOfGroups.end(); it++) { initial[(*it)] = 0; }
- ofstream out;
- m->openOutputFile(outputFileName, out);
- outputNames.push_back(outputFileName); outputTypes["count"].push_back(outputFileName);
- out << "Representative_Sequence\ttotal";
- if (groupfile == "") { out << endl; }
-
- map<string, unsigned long long> namesToIndex;
- string outfile = m->getRootName(groupfile) + "sorted.groups.temp";
- string outName = m->getRootName(namefile) + "sorted.name.temp";
- map<int, string> indexToName;
- map<int, string> indexToGroup;
- if (groupfile != "") {
- time_t estart = time(NULL);
- //convert name file to redundant -> unique. set unique name equal to index so we can use vectors, save name for later.
- string newNameFile = m->getRootName(namefile) + ".name.temp";
- string newGroupFile = m->getRootName(groupfile) + ".group.temp";
- indexToName = processNameFile(newNameFile);
- indexToGroup = getGroupNames(newGroupFile, namesOfGroups);
-
- //sort file by first column so the names of sequences will be easier to find
- //use the unix sort
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- string command = "sort -n " + newGroupFile + " -o " + outfile;
- system(command.c_str());
- command = "sort -n " + newNameFile + " -o " + outName;
- system(command.c_str());
- #else //sort using windows sort
- string command = "sort " + newGroupFile + " /O " + outfile;
- system(command.c_str());
- command = "sort " + newNameFile + " /O " + outName;
- system(command.c_str());
- #endif
- m->mothurRemove(newNameFile);
- m->mothurRemove(newGroupFile);
-
- m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to sort and index the group and name files. "); m->mothurOutEndLine();
- }else { outName = namefile; }
-
- time_t estart = time(NULL);
- //open input file
- ifstream in;
- m->openInputFile(outName, in);
-
- //open input file
- ifstream in2;
-
- unsigned long long total = 0;
- vector< vector<int> > nameMapCount;
- if (groupfile != "") {
- m->openInputFile(outfile, in2);
- nameMapCount.resize(indexToName.size());
- for (int i = 0; i < nameMapCount.size(); i++) {
- nameMapCount[i].resize(indexToGroup.size(), 0);
- }
- }
-
- while (!in.eof()) {
- if (m->control_pressed) { break; }
-
- string firstCol;
- in >> firstCol; m->gobble(in);
-
- if (groupfile != "") {
- int uniqueIndex;
- in >> uniqueIndex; m->gobble(in);
-
- string name; int groupIndex;
- in2 >> name >> groupIndex; m->gobble(in2);
-
- if (name != firstCol) { m->mothurOut("[ERROR]: found " + name + " in your groupfile, but " + firstCol + " was in your namefile, please correct.\n"); m->control_pressed = true; }
-
- nameMapCount[uniqueIndex][groupIndex]++;
- total++;
- }else {
- string secondCol;
- in >> secondCol; m->gobble(in);
- int num = m->getNumNames(secondCol);
- out << firstCol << '\t' << num << endl;
- total += num;
- }
- }
- in.close();
-
- if (groupfile != "") {
- m->mothurRemove(outfile);
- m->mothurRemove(outName);
- in2.close();
- for (map<int, string>::iterator it = indexToGroup.begin(); it != indexToGroup.end(); it++) { out << '\t' << it->second; }
- out << endl;
- for (int i = 0; i < nameMapCount.size(); i++) {
- string totalsLine = "";
- int seqTotal = 0;
- for (int j = 0; j < nameMapCount[i].size(); j++) {
- seqTotal += nameMapCount[i][j];
- totalsLine += '\t' + toString(nameMapCount[i][j]);
- }
- out << indexToName[i] << '\t' << seqTotal << totalsLine << endl;
- }
- }
-
- out.close();
-
- m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to create the count table file. "); m->mothurOutEndLine();
-
- return total;
- }
- catch(exception& e) {
- m->errorOut(e, "CountSeqsCommand", "processLarge");
- exit(1);
- }
-}
/**************************************************************************************************/
map<int, string> CountSeqsCommand::processNameFile(string name) {
try {
diff --git a/source/commands/countseqscommand.h b/source/commands/countseqscommand.h
index 8125009..d73d4f0 100644
--- a/source/commands/countseqscommand.h
+++ b/source/commands/countseqscommand.h
@@ -38,13 +38,12 @@ public:
private:
string namefile, groupfile, outputDir, groups, sharedfile;
- bool abort, large, allLines;
+ bool abort, allLines;
vector<string> Groups, outputNames;
int processors;
set<string> labels;
- unsigned long long processSmall(string);
- unsigned long long processLarge(string);
+ unsigned long long process(string);
map<int, string> processNameFile(string);
map<int, string> getGroupNames(string, set<string>&);
diff --git a/source/commands/getmimarkspackagecommand.cpp b/source/commands/getmimarkspackagecommand.cpp
index 769364f..e05d42c 100644
--- a/source/commands/getmimarkspackagecommand.cpp
+++ b/source/commands/getmimarkspackagecommand.cpp
@@ -467,6 +467,7 @@ int GetMIMarksPackageCommand::readFile(){
thisFileName1 = pieces[1];
thisFileName2 = pieces[2];
group = pieces[0];
+ m->checkGroupName(group);
}else if (pieces.size() == 4) {
if (!setOligosParameter) { m->mothurOut("[ERROR]: You must have an oligosfile with the index file option. Aborting. \n"); m->control_pressed = true; }
thisFileName1 = pieces[0];
diff --git a/source/commands/hclustercommand.cpp b/source/commands/hclustercommand.cpp
deleted file mode 100644
index 6c5de5c..0000000
--- a/source/commands/hclustercommand.cpp
+++ /dev/null
@@ -1,499 +0,0 @@
-/*
- * hclustercommand.cpp
- * Mothur
- *
- * Created by westcott on 10/13/09.
- * Copyright 2009 Schloss Lab. All rights reserved.
- *
- */
-
-#include "hclustercommand.h"
-
-//**********************************************************************************************************************
-vector<string> HClusterCommand::setParameters(){
- try {
- CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","list-rabund-sabund",false,false,true); parameters.push_back(pphylip);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName","",false,false,true); parameters.push_back(pname);
- CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "ColumnName","list-rabund-sabund",false,false,true); parameters.push_back(pcolumn);
- CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "","",false,false,true); parameters.push_back(pcutoff);
- CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
- CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "","",false,false); parameters.push_back(pmethod);
- CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
- CommandParameter psorted("sorted", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psorted);
- CommandParameter pshowabund("showabund", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pshowabund);
- CommandParameter ptiming("timing", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptiming);
- CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
- CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
- CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
-
- vector<string> myArray;
- for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
- return myArray;
- }
- catch(exception& e) {
- m->errorOut(e, "HClusterCommand", "setParameters");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-string HClusterCommand::getHelpString(){
- try {
- string helpString = "";
- helpString += "The hcluster command parameter options are cutoff, precision, method, phylip, column, name, showabund, timing and sorted. Phylip or column and name are required, unless you have valid current files.\n";
- helpString += "The phylip and column parameter allow you to enter your distance file, and sorted indicates whether your column distance file is already sorted. \n";
- helpString += "The name parameter allows you to enter your name file and is required if your distance file is in column format. \n";
- helpString += "The hcluster command should be in the following format: \n";
- helpString += "hcluster(column=youDistanceFile, name=yourNameFile, method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n";
- helpString += "The acceptable hcluster methods are furthest, nearest, weighted and average.\n";
- return helpString;
- }
- catch(exception& e) {
- m->errorOut(e, "HClusterCommand", "getHelpString");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-string HClusterCommand::getOutputPattern(string type) {
- try {
- string pattern = "";
-
- if (type == "list") { pattern = "[filename],[clustertag],list"; }
- else if (type == "rabund") { pattern = "[filename],[clustertag],rabund"; }
- else if (type == "sabund") { pattern = "[filename],[clustertag],sabund"; }
- else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
-
- return pattern;
- }
- catch(exception& e) {
- m->errorOut(e, "HClusterCommand", "getOutputPattern");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
-HClusterCommand::HClusterCommand(){
- try {
- abort = true; calledHelp = true;
- setParameters();
- vector<string> tempOutNames;
- outputTypes["list"] = tempOutNames;
- outputTypes["rabund"] = tempOutNames;
- outputTypes["sabund"] = tempOutNames;
- }
- catch(exception& e) {
- m->errorOut(e, "HClusterCommand", "HClusterCommand");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-//This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
-HClusterCommand::HClusterCommand(string option) {
- try{
- abort = false; calledHelp = false;
-
- //allow user to run help
- if(option == "help") { help(); abort = true; calledHelp = true; }
- else if(option == "citation") { citation(); abort = true; calledHelp = true;}
-
- else {
- vector<string> myArray = setParameters();
-
- OptionParser parser(option);
- map<string,string> parameters = parser.getParameters();
-
- ValidParameters validParameter;
- map<string,string>::iterator it;
-
- //check to make sure all parameters are valid for command
- for (it = parameters.begin(); it != parameters.end(); it++) {
- if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {
- abort = true;
- }
- }
-
- //initialize outputTypes
- vector<string> tempOutNames;
- outputTypes["list"] = tempOutNames;
- outputTypes["rabund"] = tempOutNames;
- outputTypes["sabund"] = tempOutNames;
-
- //if the user changes the input directory command factory will send this info to us in the output parameter
- string inputDir = validParameter.validFile(parameters, "inputdir", false);
- if (inputDir == "not found"){ inputDir = ""; }
- else {
- string path;
- it = parameters.find("phylip");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["phylip"] = inputDir + it->second; }
- }
-
- it = parameters.find("column");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["column"] = inputDir + it->second; }
- }
-
- it = parameters.find("name");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["name"] = inputDir + it->second; }
- }
- }
-
- //if the user changes the output directory command factory will send this info to us in the output parameter
- outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
-
- //check for required parameters
- phylipfile = validParameter.validFile(parameters, "phylip", true);
- if (phylipfile == "not open") { abort = true; }
- else if (phylipfile == "not found") { phylipfile = ""; }
- else { distfile = phylipfile; format = "phylip"; m->setPhylipFile(phylipfile); }
-
- columnfile = validParameter.validFile(parameters, "column", true);
- if (columnfile == "not open") { abort = true; }
- else if (columnfile == "not found") { columnfile = ""; }
- else { distfile = columnfile; format = "column"; m->setColumnFile(columnfile); }
-
- namefile = validParameter.validFile(parameters, "name", true);
- if (namefile == "not open") { abort = true; }
- else if (namefile == "not found") { namefile = ""; }
- else { m->setNameFile(namefile); }
-
- if ((phylipfile == "") && (columnfile == "")) {
- //is there are current file available for either of these?
- //give priority to column, then phylip
- columnfile = m->getColumnFile();
- if (columnfile != "") { m->mothurOut("Using " + columnfile + " as input file for the column parameter."); m->mothurOutEndLine(); }
- else {
- phylipfile = m->getPhylipFile();
- if (phylipfile != "") { m->mothurOut("Using " + phylipfile + " as input file for the phylip parameter."); m->mothurOutEndLine(); }
- else {
- m->mothurOut("No valid current files. You must provide a phylip or column file before you can use the hcluster command."); m->mothurOutEndLine();
- abort = true;
- }
- }
- }
- else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a hcluster command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
-
- if (columnfile != "") {
- if (namefile == "") {
- namefile = m->getNameFile();
- if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
- else {
- m->mothurOut("You need to provide a namefile if you are going to use the column format."); m->mothurOutEndLine();
- abort = true;
- }
- }
- }
-
- //check for optional parameter and set defaults
- // ...at some point should added some additional type checking...
- //get user cutoff and precision or use defaults
- string temp;
- temp = validParameter.validFile(parameters, "precision", false);
- if (temp == "not found") { temp = "100"; }
- //saves precision legnth for formatting below
- length = temp.length();
- m->mothurConvert(temp, precision);
-
- temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "T"; }
- hard = m->isTrue(temp);
-
- temp = validParameter.validFile(parameters, "cutoff", false);
- if (temp == "not found") { temp = "10"; }
- m->mothurConvert(temp, cutoff);
- cutoff += (5 / (precision * 10.0));
-
- method = validParameter.validFile(parameters, "method", false);
- if (method == "not found") { method = "average"; }
-
- if ((method == "furthest") || (method == "nearest") || (method == "average") || (method == "weighted")) { }
- else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest, average or weighted."); m->mothurOutEndLine(); abort = true; }
-
- showabund = validParameter.validFile(parameters, "showabund", false);
- if (showabund == "not found") { showabund = "T"; }
-
- sort = validParameter.validFile(parameters, "sorted", false);
- if (sort == "not found") { sort = "F"; }
- sorted = m->isTrue(sort);
-
- timing = validParameter.validFile(parameters, "timing", false);
- if (timing == "not found") { timing = "F"; }
-
-
- if (abort == false) {
-
- if (outputDir == "") { outputDir += m->hasPath(distfile); }
- fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
-
- if (method == "furthest") { tag = "fn"; }
- else if (method == "nearest") { tag = "nn"; }
- else if (method == "weighted") { tag = "wn"; }
- else { tag = "an"; }
-
- map<string, string> variables;
- variables["[filename]"] = fileroot;
- variables["[clustertag]"] = tag;
-
- string sabundFileName = getOutputFileName("sabund",variables);
- string rabundFileName = getOutputFileName("rabund",variables);
- string listFileName = getOutputFileName("list", variables);
-
- m->openOutputFile(sabundFileName, sabundFile);
- m->openOutputFile(rabundFileName, rabundFile);
- m->openOutputFile(listFileName, listFile);
-
- outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
- outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
- outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
- }
- }
- }
- catch(exception& e) {
- m->errorOut(e, "HClusterCommand", "HClusterCommand");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
-
-int HClusterCommand::execute(){
- try {
-
- if (abort == true) { if (calledHelp) { return 0; } return 2; }
-
- NameAssignment* nameMap = NULL;
- if(namefile != ""){
- nameMap = new NameAssignment(namefile);
- nameMap->readMap();
- }
-
- time_t estart = time(NULL);
-
- if (!sorted) {
- read = new ReadCluster(distfile, cutoff, outputDir, true);
- read->setFormat(format);
- read->read(nameMap);
-
- if (m->control_pressed) {
- delete read;
- sabundFile.close();
- rabundFile.close();
- listFile.close();
- for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- return 0;
- }
-
- distfile = read->getOutputFile();
-
- list = read->getListVector();
- delete read;
- }else {
- list = new ListVector(nameMap->getListVector());
- }
-
- if (m->control_pressed) {
- sabundFile.close();
- rabundFile.close();
- listFile.close();
- for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- return 0;
- }
-
- m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to sort. "); m->mothurOutEndLine();
- estart = time(NULL);
-
- //list vector made by read contains all sequence names
- if(list != NULL){
- rabund = new RAbundVector(list->getRAbundVector());
- }else{
- m->mothurOut("Error: no list vector!"); m->mothurOutEndLine(); return 0;
- }
-
- list->printHeaders(listFile);
-
- float previousDist = 0.00000;
- float rndPreviousDist = 0.00000;
- oldRAbund = *rabund;
- oldList = *list;
-
- print_start = true;
- start = time(NULL);
-
- cluster = new HCluster(rabund, list, method, distfile, nameMap, cutoff);
- vector<seqDist> seqs; seqs.resize(1); // to start loop
-
- if (m->control_pressed) {
- delete cluster;
- sabundFile.close();
- rabundFile.close();
- listFile.close();
- for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- return 0;
- }
-
- float saveCutoff = cutoff;
-
- while (seqs.size() != 0){
-
- seqs = cluster->getSeqs();
-
- //to account for cutoff change in average neighbor
- if (seqs.size() != 0) {
- if (seqs[0].dist > cutoff) { break; }
- }
-
- if (m->control_pressed) {
- delete cluster;
- sabundFile.close();
- rabundFile.close();
- listFile.close();
- for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- return 0;
- }
-
- for (int i = 0; i < seqs.size(); i++) { //-1 means skip me
-
- if (seqs[i].seq1 != seqs[i].seq2) {
- cutoff = cluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
-
- if (m->control_pressed) {
- delete cluster;
- sabundFile.close();
- rabundFile.close();
- listFile.close();
- for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- return 0;
- }
-
-
- float rndDist;
- if (hard) {
- rndDist = m->ceilDist(seqs[i].dist, precision);
- }else{
- rndDist = m->roundDist(seqs[i].dist, precision);
- }
-
-
- if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
- printData("unique");
- }
- else if((rndDist != rndPreviousDist)){
- printData(toString(rndPreviousDist, length-1));
- }
-
- previousDist = seqs[i].dist;
- rndPreviousDist = rndDist;
- oldRAbund = *rabund;
- oldList = *list;
- }
- }
- }
-
- if (m->control_pressed) {
- delete cluster;
- sabundFile.close();
- rabundFile.close();
- listFile.close();
- for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- return 0;
- }
-
- if(previousDist <= 0.0000){
- printData("unique");
- }
- else if(rndPreviousDist<cutoff){
- printData(toString(rndPreviousDist, length-1));
- }
-
- sabundFile.close();
- rabundFile.close();
- listFile.close();
- delete cluster;
-
- if (m->control_pressed) {
- for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- return 0;
- }
-
-
- if (saveCutoff != cutoff) {
- if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); }
- else { saveCutoff = m->roundDist(saveCutoff, precision); }
-
- m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();
- }
-
- //set list file as new current listfile
- string current = "";
- itTypes = outputTypes.find("list");
- if (itTypes != outputTypes.end()) {
- if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
- }
-
- //set rabund file as new current rabundfile
- itTypes = outputTypes.find("rabund");
- if (itTypes != outputTypes.end()) {
- if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
- }
-
- //set sabund file as new current sabundfile
- itTypes = outputTypes.find("sabund");
- if (itTypes != outputTypes.end()) {
- if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
- }
-
-
- m->mothurOutEndLine();
- m->mothurOut("Output File Names: "); m->mothurOutEndLine();
- for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
- m->mothurOutEndLine();
-
- m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster. "); m->mothurOutEndLine();
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "HClusterCommand", "execute");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
-
-void HClusterCommand::printData(string label){
- try {
- if (m->isTrue(timing)) {
- m->mothurOut("\tTime: " + toString(time(NULL) - start) + "\tsecs for " + toString(oldRAbund.getNumBins())
- + "\tclusters. Updates: " + toString(loops)); m->mothurOutEndLine();
- }
- print_start = true;
- loops = 0;
- start = time(NULL);
-
- oldRAbund.setLabel(label);
- if (m->isTrue(showabund)) {
- oldRAbund.getSAbundVector().print(cout);
- }
- oldRAbund.print(rabundFile);
- oldRAbund.getSAbundVector().print(sabundFile);
-
- oldList.setLabel(label);
- oldList.print(listFile);
- }
- catch(exception& e) {
- m->errorOut(e, "HClusterCommand", "printData");
- exit(1);
- }
-
-
-}
-//**********************************************************************************************************************
-
diff --git a/source/commands/hclustercommand.h b/source/commands/hclustercommand.h
deleted file mode 100644
index 1b8e9b7..0000000
--- a/source/commands/hclustercommand.h
+++ /dev/null
@@ -1,73 +0,0 @@
-#ifndef HCLUSTERCOMMAND_H
-#define HCLUSTERCOMMAND_H
-
-/*
- * hclustercommand.h
- * Mothur
- *
- * Created by westcott on 10/13/09.
- * Copyright 2009 Schloss Lab. All rights reserved.
- *
- */
-
-#include "command.hpp"
-#include "hcluster.h"
-#include "rabundvector.hpp"
-#include "sabundvector.hpp"
-#include "listvector.hpp"
-#include "readcluster.h"
-
-/******************************************************************/
-//This command is an implementation of the HCluster algorithmn described in
-//ESPRIT: estimating species richness using large collections of 16S rRNA pyrosequences by
-//Yijun Sun1,2,*, Yunpeng Cai2, Li Liu1, Fahong Yu1, Michael L. Farrell3, William McKendree3
-//and William Farmerie1 1
-
-//Interdisciplinary Center for Biotechnology Research, 2Department of Electrical and Computer Engineering,
-//University of Florida, Gainesville, FL 32610-3622 and 3Materials Technology Directorate, Air Force Technical
-//Applications Center, 1030 S. Highway A1A, Patrick AFB, FL 32925-3002, USA
-//Received January 28, 2009; Revised April 14, 2009; Accepted April 15, 2009
-/************************************************************/
-class HClusterCommand : public Command {
-
-public:
- HClusterCommand(string);
- HClusterCommand();
- ~HClusterCommand(){}
-
- vector<string> setParameters();
- string getCommandName() { return "hcluster"; }
- string getCommandCategory() { return "Clustering"; }
-
- string getHelpString();
- string getOutputPattern(string);
- string getCitation() { return "Sun Y, Cai Y, Liu L, Yu F, Farrell ML, Mckendree W, Farmerie W (2009). ESPRIT: estimating species richness using large collections of 16S rRNA pyrosequences. Nucleic Acids Res 37: e76. \nhttp://www.mothur.org/wiki/Hcluster"; }
- string getDescription() { return "cluster your sequences into OTUs using a distance matrix"; }
-
- int execute();
- void help() { m->mothurOut(getHelpString()); }
-
-
-private:
- HCluster* cluster;
- ListVector* list;
- RAbundVector* rabund;
- RAbundVector oldRAbund;
- ListVector oldList;
- ReadCluster* read;
-
- bool abort, sorted, print_start, hard;
- string method, fileroot, tag, distfile, format, phylipfile, columnfile, namefile, sort, showabund, timing, outputDir;
- double cutoff;
- int precision, length;
- ofstream sabundFile, rabundFile, listFile;
- time_t start;
- unsigned long loops;
- vector<string> outputNames;
-
- void printData(string label);
-};
-
-/************************************************************/
-
-#endif
diff --git a/source/commands/lefsecommand.cpp b/source/commands/lefsecommand.cpp
index efacc3e..a49db1e 100644
--- a/source/commands/lefsecommand.cpp
+++ b/source/commands/lefsecommand.cpp
@@ -8,6 +8,7 @@
#include "lefsecommand.h"
#include "linearalgebra.h"
+#include "sharedutilities.h"
//**********************************************************************************************************************
vector<string> LefseCommand::setParameters(){
@@ -20,7 +21,7 @@ vector<string> LefseCommand::setParameters(){
//CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses);
CommandParameter palpha("aalpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha);
CommandParameter pwalpha("walpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pwalpha);
-
+ CommandParameter psets("sets", "String", "", "", "", "", "","",false,false); parameters.push_back(psets);
CommandParameter plda("lda", "Number", "", "2.0", "", "", "","",false,false); parameters.push_back(plda);
CommandParameter pwilc("wilc", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pwilc);
CommandParameter pnormmillion("norm", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pnormmillion);
@@ -68,6 +69,7 @@ string LefseCommand::getHelpString(){
helpString += "The wilc parameter is used to indicate whether to perform the Wilcoxon test. Default=T. \n";
helpString += "The iters parameter is used to set the number of bootstrap iteration for LDA. Default=30. \n";
//helpString += "The wilcsamename parameter is used to indicate whether perform the wilcoxon test only among the subclasses with the same name. Default=F. \n";
+ helpString += "The sets parameter allows you to specify which of the sets in your designfile you would like to analyze. The set names are separated by dashes. THe default is all sets in the designfile.\n";
helpString += "The curv parameter is used to set whether perform the wilcoxon testing the Curtis's approach [BETA VERSION] Default=F. \n";
helpString += "The norm parameter is used to multiply relative abundances by 1000000. Recommended when very low values are present. Default=T. \n";
helpString += "The fboots parameter is used to set the subsampling fraction value for each bootstrap iteration. Default=0.67. \n";
@@ -249,6 +251,12 @@ LefseCommand::LefseCommand(string option) {
if (temp == "not found") { temp = "10"; }
m->mothurConvert(temp, minC);
+ sets = validParameter.validFile(parameters, "sets", false);
+ if (sets == "not found") { sets = ""; }
+ else {
+ m->splitAtDash(sets, Sets);
+ }
+
multiClassStrat = validParameter.validFile(parameters, "multiclass", false);
if (multiClassStrat == "not found"){ multiClassStrat = "onevall"; }
if ((multiClassStrat != "onevall") && (multiClassStrat != "onevone")) { m->mothurOut("Invalid multiclass option: choices are onevone or onevall."); m->mothurOutEndLine(); abort=true; }
@@ -275,6 +283,18 @@ int LefseCommand::execute(){
//if user did not select class use first column
if (mclass == "") { mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); if (subclass == "") { subclass = mclass; } }
+ if (Sets.size() != 0) { //user has picked sets find groups to include from lookup
+ //make sure sets are all in designMap
+ SharedUtil* util = new SharedUtil();
+ vector<string> dGroups = designMap.getCategory(mclass);
+ util->setGroups(Sets, dGroups);
+ delete util;
+
+ designMap.setDefaultClass(mclass);
+ vector<string> groupsToSelect = designMap.getNamesGroups(Sets);
+ m->setGroups(groupsToSelect);
+ }
+
InputData input(sharedfile, "sharedfile");
vector<SharedRAbundFloatVector*> lookup = input.getSharedRAbundFloatVectors();
string lastLabel = lookup[0]->getLabel();
@@ -283,7 +303,6 @@ int LefseCommand::execute(){
set<string> processedLabels;
set<string> userLabels = labels;
-
//as long as you are not at the end of the file or done wih the lines you want
while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
diff --git a/source/commands/lefsecommand.h b/source/commands/lefsecommand.h
index cb1bbb6..4f9ca72 100644
--- a/source/commands/lefsecommand.h
+++ b/source/commands/lefsecommand.h
@@ -49,8 +49,8 @@ public:
private:
bool abort, allLines, wilc, wilcsamename, curv, subject, normMillion;
- string outputDir, sharedfile, designfile, mclass, subclass, rankTec, multiClassStrat;
- vector<string> outputNames;
+ string outputDir, sharedfile, designfile, mclass, subclass, rankTec, multiClassStrat, sets;
+ vector<string> outputNames, Sets;
set<string> labels;
double anovaAlpha, wilcoxonAlpha, fBoots, ldaThreshold;
int nlogs, iters, strict, minC;
diff --git a/source/commands/makebiomcommand.cpp b/source/commands/makebiomcommand.cpp
index eb99184..bb5d252 100644
--- a/source/commands/makebiomcommand.cpp
+++ b/source/commands/makebiomcommand.cpp
@@ -1026,8 +1026,14 @@ int MakeBiomCommand::getGreenGenesOTUIDs(vector<SharedRAbundVector*>& lookup, ma
m->removeConfidences(OTUTaxonomy);
//remove unclassifieds to match template
- int thisPos = OTUTaxonomy.find("unclassified;");
- if (thisPos != string::npos) { OTUTaxonomy = OTUTaxonomy.substr(0, thisPos); }
+ int thisPos = OTUTaxonomy.find("unclassified;"); //"Porphyromonadaceae"_unclassified;
+ if (thisPos != string::npos) {
+ OTUTaxonomy = OTUTaxonomy.substr(0, thisPos);
+ thisPos = OTUTaxonomy.find_last_of(";"); //remove rest of parent taxon
+ if (thisPos != string::npos) {
+ OTUTaxonomy = OTUTaxonomy.substr(0, thisPos);
+ }
+ }
//get list of reference ids that map to this taxonomy
vector<string> referenceIds = phylo.getSeqs(OTUTaxonomy);
@@ -1192,8 +1198,14 @@ int MakeBiomCommand::getGreenGenesOTUIDs(vector<SharedRAbundFloatVector*>& looku
m->removeConfidences(OTUTaxonomy);
//remove unclassifieds to match template
- int thisPos = OTUTaxonomy.find("unclassified;");
- if (thisPos != string::npos) { OTUTaxonomy = OTUTaxonomy.substr(0, thisPos); }
+ int thisPos = OTUTaxonomy.find("unclassified;"); //"Porphyromonadaceae"_unclassified;
+ if (thisPos != string::npos) {
+ OTUTaxonomy = OTUTaxonomy.substr(0, thisPos);
+ thisPos = OTUTaxonomy.find_last_of(";"); //remove rest of parent taxon
+ if (thisPos != string::npos) {
+ OTUTaxonomy = OTUTaxonomy.substr(0, thisPos);
+ }
+ }
//get list of reference ids that map to this taxonomy
vector<string> referenceIds = phylo.getSeqs(OTUTaxonomy);
diff --git a/source/commands/makecontigscommand.cpp b/source/commands/makecontigscommand.cpp
index 3a8b62d..ff8ba86 100644
--- a/source/commands/makecontigscommand.cpp
+++ b/source/commands/makecontigscommand.cpp
@@ -2927,6 +2927,7 @@ vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
if (m->control_pressed) { return files; }
+ bool skip = false;
string line = m->getline(in); m->gobble(in);
vector<string> pieces = m->splitWhiteSpace(line);
@@ -2939,6 +2940,7 @@ vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
rindex = "";
}else if (pieces.size() == 3) {
group = pieces[0];
+ m->checkGroupName(group);
forward = pieces[1];
reverse = pieces[2];
findex = "";
@@ -2975,16 +2977,21 @@ vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
}
}
-
+ //look for mothur exe
+ string mpath = m->argv;
+ string tempPath = mpath;
+ for (int i = 0; i < mpath.length(); i++) { tempPath[i] = tolower(mpath[i]); }
+ mpath = mpath.substr(0, (tempPath.find_last_of('m')));
//check to make sure both are able to be opened
ifstream in2;
int openForward = m->openInputFile(forward, in2, "noerror");
+ string tryPath = forward;
//if you can't open it, try default location
if (openForward == 1) {
if (m->getDefaultPath() != "") { //default path is set
- string tryPath = m->getDefaultPath() + m->getSimpleName(forward);
+ tryPath = m->getDefaultPath() + m->getSimpleName(forward);
m->mothurOut("Unable to open " + forward + ". Trying default " + tryPath); m->mothurOutEndLine();
ifstream in3;
openForward = m->openInputFile(tryPath, in3, "noerror");
@@ -2996,7 +3003,7 @@ vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
//if you can't open it, try output location
if (openForward == 1) {
if (m->getOutputDir() != "") { //default path is set
- string tryPath = m->getOutputDir() + m->getSimpleName(forward);
+ tryPath = m->getOutputDir() + m->getSimpleName(forward);
m->mothurOut("Unable to open " + forward + ". Trying output directory " + tryPath); m->mothurOutEndLine();
ifstream in4;
openForward = m->openInputFile(tryPath, in4, "noerror");
@@ -3005,17 +3012,31 @@ vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
}
}
+ //if you can't open it, try mothur's location
+ if (openForward == 1) {
+ tryPath = mpath + m->getSimpleName(forward);
+ m->mothurOut("Unable to open " + forward + ". Trying mothur's executable directory " + tryPath); m->mothurOutEndLine();
+ ifstream in4;
+ openForward = m->openInputFile(tryPath, in4, "noerror");
+ forward = tryPath;
+ in4.close();
+ }
+
if (openForward == 1) { //can't find it
m->mothurOut("[WARNING]: can't find " + forward + ", ignoring pair.\n");
- }else{ in2.close(); }
+ }else{
+ if (m->isBlank(tryPath)) { m->mothurOut("[WARNING]: " + forward + " is blank, skipping.\n"); skip=true; }
+ in2.close();
+ }
ifstream in3;
int openReverse = m->openInputFile(reverse, in3, "noerror");
+ tryPath = reverse;
//if you can't open it, try default location
if (openReverse == 1) {
if (m->getDefaultPath() != "") { //default path is set
- string tryPath = m->getDefaultPath() + m->getSimpleName(reverse);
+ tryPath = m->getDefaultPath() + m->getSimpleName(reverse);
m->mothurOut("Unable to open " + reverse + ". Trying default " + tryPath); m->mothurOutEndLine();
ifstream in3;
openReverse = m->openInputFile(tryPath, in3, "noerror");
@@ -3024,10 +3045,20 @@ vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
}
}
+ //if you can't open it, try mothur's location
+ if (openReverse == 1) {
+ tryPath = mpath + m->getSimpleName(reverse);
+ m->mothurOut("Unable to open " + reverse + ". Trying mothur's executable directory " + tryPath); m->mothurOutEndLine();
+ ifstream in4;
+ openForward = m->openInputFile(tryPath, in4, "noerror");
+ reverse = tryPath;
+ in4.close();
+ }
+
//if you can't open it, try output location
if (openReverse == 1) {
if (m->getOutputDir() != "") { //default path is set
- string tryPath = m->getOutputDir() + m->getSimpleName(reverse);
+ tryPath = m->getOutputDir() + m->getSimpleName(reverse);
m->mothurOut("Unable to open " + reverse + ". Trying output directory " + tryPath); m->mothurOutEndLine();
ifstream in4;
openReverse = m->openInputFile(tryPath, in4, "noerror");
@@ -3038,17 +3069,18 @@ vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
if (openReverse == 1) { //can't find it
m->mothurOut("[WARNING]: can't find " + reverse + ", ignoring pair.\n");
- }else{ in3.close(); }
+ }else{ if (m->isBlank(tryPath)) { m->mothurOut("[WARNING]: " + reverse + " is blank, skipping.\n"); skip=true; } in3.close(); }
int openFindex = 0;
if ((findex != "") && (findex != "NONE")){
ifstream in4;
openFindex = m->openInputFile(findex, in4, "noerror"); in4.close();
+ tryPath = findex;
//if you can't open it, try default location
if (openFindex == 1) {
if (m->getDefaultPath() != "") { //default path is set
- string tryPath = m->getDefaultPath() + m->getSimpleName(findex);
+ tryPath = m->getDefaultPath() + m->getSimpleName(findex);
m->mothurOut("Unable to open " + findex + ". Trying default " + tryPath); m->mothurOutEndLine();
ifstream in5;
openFindex = m->openInputFile(tryPath, in5, "noerror");
@@ -3057,10 +3089,20 @@ vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
}
}
+ //if you can't open it, try mothur's location
+ if (openFindex == 1) {
+ tryPath = mpath + m->getSimpleName(findex);
+ m->mothurOut("Unable to open " + findex + ". Trying mothur's executable directory " + tryPath); m->mothurOutEndLine();
+ ifstream in14;
+ openForward = m->openInputFile(tryPath, in14, "noerror");
+ findex = tryPath;
+ in14.close();
+ }
+
//if you can't open it, try output location
if (openFindex == 1) {
if (m->getOutputDir() != "") { //default path is set
- string tryPath = m->getOutputDir() + m->getSimpleName(findex);
+ tryPath = m->getOutputDir() + m->getSimpleName(findex);
m->mothurOut("Unable to open " + findex + ". Trying output directory " + tryPath); m->mothurOutEndLine();
ifstream in6;
openFindex = m->openInputFile(tryPath, in6, "noerror");
@@ -3071,6 +3113,8 @@ vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
if (openFindex == 1) { //can't find it
m->mothurOut("[WARNING]: can't find " + findex + ", ignoring pair.\n");
+ }else{
+ if (m->isBlank(tryPath)) { m->mothurOut("[WARNING]: " + findex + " is blank, skipping.\n"); skip=true; }
}
}
@@ -3078,11 +3122,12 @@ vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
if ((rindex != "") && (rindex != "NONE")) {
ifstream in7;
openRindex = m->openInputFile(rindex, in7, "noerror"); in7.close();
+ tryPath = rindex;
//if you can't open it, try default location
if (openRindex == 1) {
if (m->getDefaultPath() != "") { //default path is set
- string tryPath = m->getDefaultPath() + m->getSimpleName(rindex);
+ tryPath = m->getDefaultPath() + m->getSimpleName(rindex);
m->mothurOut("Unable to open " + rindex + ". Trying default " + tryPath); m->mothurOutEndLine();
ifstream in8;
openRindex = m->openInputFile(tryPath, in8, "noerror");
@@ -3091,10 +3136,20 @@ vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
}
}
+ //if you can't open it, try mothur's location
+ if (openFindex == 1) {
+ tryPath = mpath + m->getSimpleName(rindex);
+ m->mothurOut("Unable to open " + rindex + ". Trying mothur's executable directory " + tryPath); m->mothurOutEndLine();
+ ifstream in14;
+ openForward = m->openInputFile(tryPath, in14, "noerror");
+ rindex = tryPath;
+ in14.close();
+ }
+
//if you can't open it, try output location
if (openRindex == 1) {
if (m->getOutputDir() != "") { //default path is set
- string tryPath = m->getOutputDir() + m->getSimpleName(rindex);
+ tryPath = m->getOutputDir() + m->getSimpleName(rindex);
m->mothurOut("Unable to open " + rindex + ". Trying output directory " + tryPath); m->mothurOutEndLine();
ifstream in9;
openRindex = m->openInputFile(tryPath, in9, "noerror");
@@ -3105,12 +3160,14 @@ vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
if (openRindex == 1) { //can't find it
m->mothurOut("[WARNING]: can't find " + rindex + ", ignoring pair.\n");
+ }else{
+ if (m->isBlank(tryPath)) { m->mothurOut("[WARNING]: " + rindex + " is blank, skipping.\n"); skip=true; }
}
}
- if ((openForward != 1) && (openReverse != 1) && (openFindex != 1) && (openRindex != 1)) { //good pair
+ if ((openForward != 1) && (openReverse != 1) && (openFindex != 1) && (openRindex != 1) && (!skip)) { //good pair
file2Group[files.size()] = group;
vector<string> pair;
#ifdef USE_BOOST
diff --git a/source/commands/mgclustercommand.cpp b/source/commands/mgclustercommand.cpp
index e62208b..bd2a472 100644
--- a/source/commands/mgclustercommand.cpp
+++ b/source/commands/mgclustercommand.cpp
@@ -42,7 +42,7 @@ vector<string> MGClusterCommand::setParameters(){
string MGClusterCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard, method, merge, min, length, penalty, adjust and hcluster. The blast parameter is required.\n";
+ helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard, method, merge, min, length, penalty and adjust. The blast parameter is required.\n";
helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similar to the OTUs.\n";
helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
@@ -53,7 +53,6 @@ string MGClusterCommand::getHelpString(){
helpString += "The adjust parameter is used to handle missing distances. If you set a cutoff, adjust=f by default. If not, adjust=t by default. Adjust=f, means ignore missing distances and adjust cutoff as needed with the average neighbor method. Adjust=t, will treat missing distances as 1.0. You can also set the value the missing distances should be set to, adjust=0.5 would give missing distances a value of 0.5.\n";
helpString += "The penalty parameter is used to adjust the error rate. The default is 0.10.\n";
helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster. By default merge is true, meaning you want to merge.\n";
- helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering. This may be necessary if your file is too large to fit into RAM. The default is false.\n";
helpString += "The mgcluster command should be in the following format: \n";
helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
@@ -210,10 +209,7 @@ MGClusterCommand::MGClusterCommand(string option) {
minWanted = m->isTrue(temp);
temp = validParameter.validFile(parameters, "merge", false); if (temp == "not found") { temp = "true"; }
- merge = m->isTrue(temp);
-
- temp = validParameter.validFile(parameters, "hcluster", false); if (temp == "not found") { temp = "false"; }
- hclusterWanted = m->isTrue(temp);
+ merge = m->isTrue(temp);
temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "T"; }
hard = m->isTrue(temp);
@@ -257,7 +253,7 @@ int MGClusterCommand::execute(){
//read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
//must remember to delete those objects here since readBlast does not
- read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
+ read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted);
read->read(nameMap);
list = new ListVector(nameMap->getListVector());
@@ -309,253 +305,113 @@ int MGClusterCommand::execute(){
double saveCutoff = cutoff;
- if (!hclusterWanted) {
- //get distmatrix and overlap
- SparseDistanceMatrix* distMatrix = read->getDistMatrix();
- overlapMatrix = read->getOverlapMatrix(); //already sorted by read
- delete read;
-
- //create cluster
- if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method, adjust); }
- else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method, adjust); }
- else if(method == "average"){ cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method, adjust); }
- cluster->setMapWanted(true);
- Seq2Bin = cluster->getSeqtoBin();
- oldSeq2Bin = Seq2Bin;
-
- if (m->control_pressed) {
- delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
- listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
- outputTypes.clear();
- return 0;
- }
+
+ //get distmatrix and overlap
+ SparseDistanceMatrix* distMatrix = read->getDistMatrix();
+ overlapMatrix = read->getOverlapMatrix(); //already sorted by read
+ delete read;
+
+ //create cluster
+ if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method, adjust); }
+ else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method, adjust); }
+ else if(method == "average"){ cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method, adjust); }
+ cluster->setMapWanted(true);
+ Seq2Bin = cluster->getSeqtoBin();
+ oldSeq2Bin = Seq2Bin;
+
+ if (m->control_pressed) {
+ delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
+ outputTypes.clear();
+ return 0;
+ }
+
+
+ //cluster using cluster classes
+ while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
+ if (m->debug) { cout << "numNodes=" << distMatrix->getNNodes() << " smallDist = " << distMatrix->getSmallDist() << endl; }
- //cluster using cluster classes
- while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
-
- if (m->debug) { cout << "numNodes=" << distMatrix->getNNodes() << " smallDist = " << distMatrix->getSmallDist() << endl; }
+ cluster->update(cutoff);
+
+ if (m->control_pressed) {
+ delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
+ outputTypes.clear();
+ return 0;
+ }
+
+ float dist = distMatrix->getSmallDist();
+ float rndDist;
+ if (hard) {
+ rndDist = m->ceilDist(dist, precision);
+ }else{
+ rndDist = m->roundDist(dist, precision);
+ }
+
+ if(previousDist <= 0.0000 && dist != previousDist){
+ oldList.setLabel("unique");
+ printData(&oldList, counts);
+ }
+ else if(rndDist != rndPreviousDist){
+ if (merge) {
+ ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
+
+ if (m->control_pressed) {
+ delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
+ outputTypes.clear();
+ return 0;
+ }
+
+ temp->setLabel(toString(rndPreviousDist, precisionLength-1));
+ printData(temp, counts);
+ delete temp;
+ }else{
+ oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
+ printData(&oldList, counts);
+ }
+ }
+
+ previousDist = dist;
+ rndPreviousDist = rndDist;
+ oldList = *list;
+ Seq2Bin = cluster->getSeqtoBin();
+ oldSeq2Bin = Seq2Bin;
+ }
+
+ if(previousDist <= 0.0000){
+ oldList.setLabel("unique");
+ printData(&oldList, counts);
+ }
+ else if(rndPreviousDist<cutoff){
+ if (merge) {
+ ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
- cluster->update(cutoff);
-
- if (m->control_pressed) {
- delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
- listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
- outputTypes.clear();
- return 0;
- }
-
- float dist = distMatrix->getSmallDist();
- float rndDist;
- if (hard) {
- rndDist = m->ceilDist(dist, precision);
- }else{
- rndDist = m->roundDist(dist, precision);
- }
-
- if(previousDist <= 0.0000 && dist != previousDist){
- oldList.setLabel("unique");
- printData(&oldList, counts);
- }
- else if(rndDist != rndPreviousDist){
- if (merge) {
- ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
-
- if (m->control_pressed) {
- delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
- listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
- outputTypes.clear();
- return 0;
- }
-
- temp->setLabel(toString(rndPreviousDist, precisionLength-1));
- printData(temp, counts);
- delete temp;
- }else{
- oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
- printData(&oldList, counts);
- }
- }
-
- previousDist = dist;
- rndPreviousDist = rndDist;
- oldList = *list;
- Seq2Bin = cluster->getSeqtoBin();
- oldSeq2Bin = Seq2Bin;
- }
-
- if(previousDist <= 0.0000){
- oldList.setLabel("unique");
- printData(&oldList, counts);
- }
- else if(rndPreviousDist<cutoff){
- if (merge) {
- ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
-
- if (m->control_pressed) {
- delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
- listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
- outputTypes.clear();
- return 0;
- }
-
- temp->setLabel(toString(rndPreviousDist, precisionLength-1));
- printData(temp, counts);
- delete temp;
- }else{
- oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
- printData(&oldList, counts);
- }
- }
-
- //free memory
- overlapMatrix.clear();
- delete distMatrix;
- delete cluster;
-
- }else { //use hcluster to cluster
- //get distmatrix and overlap
- overlapFile = read->getOverlapFile();
- distFile = read->getDistFile();
- delete read;
-
- //sort the distance and overlap files
- sortHclusterFiles(distFile, overlapFile);
-
- if (m->control_pressed) {
- delete nameMap; delete list; delete rabund;
- listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
- outputTypes.clear();
- return 0;
- }
-
- //create cluster
- hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
- hcluster->setMapWanted(true);
- Seq2Bin = cluster->getSeqtoBin();
- oldSeq2Bin = Seq2Bin;
-
- vector<seqDist> seqs; seqs.resize(1); // to start loop
- //ifstream inHcluster;
- //m->openInputFile(distFile, inHcluster);
-
- if (m->control_pressed) {
- delete nameMap; delete list; delete rabund; delete hcluster;
- listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
- outputTypes.clear();
- return 0;
- }
-
- while (seqs.size() != 0){
-
- seqs = hcluster->getSeqs();
-
- //to account for cutoff change in average neighbor
- if (seqs.size() != 0) {
- if (seqs[0].dist > cutoff) { break; }
- }
-
- if (m->control_pressed) {
- delete nameMap; delete list; delete rabund; delete hcluster;
- listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
- m->mothurRemove(distFile);
- m->mothurRemove(overlapFile);
- outputTypes.clear();
- return 0;
- }
-
- for (int i = 0; i < seqs.size(); i++) { //-1 means skip me
-
- if (seqs[i].seq1 != seqs[i].seq2) {
-
- cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
-
- if (m->control_pressed) {
- delete nameMap; delete list; delete rabund; delete hcluster;
- listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
- m->mothurRemove(distFile);
- m->mothurRemove(overlapFile);
- outputTypes.clear();
- return 0;
- }
-
- float rndDist;
- if (hard) {
- rndDist = m->ceilDist(seqs[i].dist, precision);
- }else{
- rndDist = m->roundDist(seqs[i].dist, precision);
- }
-
- if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
- oldList.setLabel("unique");
- printData(&oldList, counts);
- }
- else if((rndDist != rndPreviousDist)){
- if (merge) {
- ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
-
- if (m->control_pressed) {
- delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
- listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
- m->mothurRemove(distFile);
- m->mothurRemove(overlapFile);
- outputTypes.clear();
- return 0;
- }
-
- temp->setLabel(toString(rndPreviousDist, precisionLength-1));
- printData(temp, counts);
- delete temp;
- }else{
- oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
- printData(&oldList, counts);
- }
- }
-
- previousDist = seqs[i].dist;
- rndPreviousDist = rndDist;
- oldList = *list;
- Seq2Bin = cluster->getSeqtoBin();
- oldSeq2Bin = Seq2Bin;
- }
- }
- }
- //inHcluster.close();
-
- if(previousDist <= 0.0000){
- oldList.setLabel("unique");
- printData(&oldList, counts);
- }
- else if(rndPreviousDist<cutoff){
- if (merge) {
- ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
-
- if (m->control_pressed) {
- delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
- listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
- m->mothurRemove(distFile);
- m->mothurRemove(overlapFile);
- outputTypes.clear();
- return 0;
- }
-
- temp->setLabel(toString(rndPreviousDist, precisionLength-1));
- printData(temp, counts);
- delete temp;
- }else{
- oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
- printData(&oldList, counts);
- }
- }
-
- delete hcluster;
- m->mothurRemove(distFile);
- m->mothurRemove(overlapFile);
- }
-
- delete list;
+ if (m->control_pressed) {
+ delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
+ outputTypes.clear();
+ return 0;
+ }
+
+ temp->setLabel(toString(rndPreviousDist, precisionLength-1));
+ printData(temp, counts);
+ delete temp;
+ }else{
+ oldList.setLabel(toString(rndPreviousDist, precisionLength-1));
+ printData(&oldList, counts);
+ }
+ }
+
+ //free memory
+ overlapMatrix.clear();
+ delete distMatrix;
+ delete cluster;
+ delete list;
delete rabund;
listFile.close();
+
if (countfile == "") {
sabundFile.close();
rabundFile.close();
diff --git a/source/commands/mgclustercommand.h b/source/commands/mgclustercommand.h
index 878892c..d477143 100644
--- a/source/commands/mgclustercommand.h
+++ b/source/commands/mgclustercommand.h
@@ -14,7 +14,6 @@
#include "readblast.h"
#include "nameassignment.hpp"
#include "cluster.hpp"
-#include "hcluster.h"
#include "rabundvector.hpp"
#include "sabundvector.hpp"
#include "counttable.h"
@@ -45,7 +44,6 @@ private:
ReadBlast* read;
NameAssignment* nameMap;
Cluster* cluster;
- HCluster* hcluster;
ListVector* list;
CountTable* ct;
ListVector oldList;
diff --git a/source/commands/parsefastaqcommand.cpp b/source/commands/parsefastaqcommand.cpp
index cb6df56..d6c6492 100644
--- a/source/commands/parsefastaqcommand.cpp
+++ b/source/commands/parsefastaqcommand.cpp
@@ -1018,6 +1018,7 @@ vector< vector<string> > ParseFastaQCommand::readFile(){
if (oligosfile != "") { m->mothurOut("[ERROR]: You cannot have an oligosfile and 3 column file option at the same time. Aborting. \n"); m->control_pressed = true; }
if (groupfile != "") { m->mothurOut("[ERROR]: You cannot have an groupfile and 3 column file option at the same time. Aborting. \n"); m->control_pressed = true; }
group = pieces[0];
+ m->checkGroupName(group);
forward = pieces[1];
reverse = pieces[2];
findex = "";
diff --git a/source/commands/pcrseqscommand.cpp b/source/commands/pcrseqscommand.cpp
index 83bf2b0..0bb15bd 100644
--- a/source/commands/pcrseqscommand.cpp
+++ b/source/commands/pcrseqscommand.cpp
@@ -22,7 +22,7 @@ vector<string> PcrSeqsCommand::setParameters(){
CommandParameter pend("end", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pend);
CommandParameter pnomatch("nomatch", "Multiple", "reject-keep", "reject", "", "", "","",false,false); parameters.push_back(pnomatch);
CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
-
+ CommandParameter prdiffs("rdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(prdiffs);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
CommandParameter pkeepprimer("keepprimer", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepprimer);
CommandParameter pkeepdots("keepdots", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pkeepdots);
@@ -44,7 +44,7 @@ string PcrSeqsCommand::getHelpString(){
try {
string helpString = "";
helpString += "The pcr.seqs command reads a fasta file.\n";
- helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, pdiffs, processors, keepprimer and keepdots.\n";
+ helpString += "The pcr.seqs command parameters are fasta, oligos, name, group, count, taxonomy, ecoli, start, end, nomatch, pdiffs, rdiffs, processors, keepprimer and keepdots.\n";
helpString += "The ecoli parameter is used to provide a fasta file containing a single reference sequence (e.g. for e. coli) this must be aligned. Mothur will trim to the start and end positions of the reference sequence.\n";
helpString += "The start parameter allows you to provide a starting position to trim to.\n";
helpString += "The end parameter allows you to provide a ending position to trim from.\n";
@@ -52,7 +52,8 @@ string PcrSeqsCommand::getHelpString(){
helpString += "The processors parameter allows you to use multiple processors.\n";
helpString += "The keepprimer parameter allows you to keep the primer, default=false.\n";
helpString += "The keepdots parameter allows you to keep the leading and trailing .'s, default=true.\n";
- helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
+ helpString += "The pdiffs parameter is used to specify the number of differences allowed in the forward primer. The default is 0.\n";
+ helpString += "The rdiffs parameter is used to specify the number of differences allowed in the reverse primer. The default is 0.\n";
helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Pcr.seqs .\n";
return helpString;
@@ -269,6 +270,9 @@ PcrSeqsCommand::PcrSeqsCommand(string option) {
temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found") { temp = "0"; }
m->mothurConvert(temp, pdiffs);
+
+ temp = validParameter.validFile(parameters, "rdiffs", false); if (temp == "not found") { temp = "0"; }
+ m->mothurConvert(temp, rdiffs);
nomatch = validParameter.validFile(parameters, "nomatch", false); if (nomatch == "not found") { nomatch = "reject"; }
@@ -587,7 +591,7 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string
if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
// Allocate memory for thread data.
- pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, locationsFile+extension, m, oligosfile, ecolifile, nomatch, keepprimer, keepdots, start, end, length, pdiffs, lines[i].start, lines[i].end);
+ pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, locationsFile+extension, m, oligosfile, ecolifile, nomatch, keepprimer, keepdots, start, end, length, pdiffs, rdiffs, lines[i].start, lines[i].end);
pDataArray.push_back(tempPcr);
//default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
@@ -714,7 +718,7 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
revPrimer = oligos.getReversePrimers();
}
- TrimOligos trim(pdiffs, 0, primers, barcodes, revPrimer);
+ TrimOligos trim(pdiffs, rdiffs, 0, primers, barcodes, revPrimer);
while (!done) {
@@ -792,9 +796,9 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
int primerStart = 0; int primerEnd = 0;
vector<int> results = trim.findReverse(currSeq, primerStart, primerEnd);
bool good = true;
- if (results[0] > pdiffs) { good = false; }
+ if (results[0] > rdiffs) { good = false; }
totalDiffs += results[0];
- commentString += "rpdiffs=" + toString(results[0]) + "(" + trim.getCodeValue(results[1], pdiffs) + ") ";
+ commentString += "rpdiffs=" + toString(results[0]) + "(" + trim.getCodeValue(results[1], rdiffs) + ") ";
if(!good){ if (nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
else{
@@ -875,10 +879,10 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
currSeq.setComment("\t" + commentString + "\t" + seqComment);
}
- if (totalDiffs > pdiffs) { trashCode += "t"; goodSeq = false; }
+ if (totalDiffs > (pdiffs + rdiffs)) { trashCode += "t"; goodSeq = false; }
//trimming removed all bases
- // if (currSeq.getUnaligned() == "") { goodSeq = false; }
+ if (currSeq.getUnaligned() == "") { goodSeq = false; }
if(goodSeq == 1) {
currSeq.printSequence(goodFile);
diff --git a/source/commands/pcrseqscommand.h b/source/commands/pcrseqscommand.h
index 44fe076..4258b36 100644
--- a/source/commands/pcrseqscommand.h
+++ b/source/commands/pcrseqscommand.h
@@ -40,7 +40,7 @@ private:
vector<linePair> lines;
bool abort, keepprimer, keepdots, fileAligned, pairedOligos;
string fastafile, oligosfile, taxfile, groupfile, namefile, countfile, ecolifile, outputDir, nomatch;
- int start, end, processors, length, pdiffs, numFPrimers, numRPrimers;
+ int start, end, processors, length, pdiffs, rdiffs, numFPrimers, numRPrimers;
Oligos oligos;
vector<string> outputNames;
@@ -68,13 +68,13 @@ struct pcrData {
string goodFasta, badFasta, oligosfile, ecolifile, nomatch, locationsName;
unsigned long long fstart;
unsigned long long fend;
- int count, start, end, length, pdiffs, pstart, pend;
+ int count, start, end, length, pdiffs, pstart, pend, rdiffs;
MothurOut* m;
set<string> badSeqNames;
bool keepprimer, keepdots, fileAligned, adjustNeeded;
pcrData(){}
- pcrData(string f, string gf, string bfn, string loc, MothurOut* mout, string ol, string ec, string nm, bool kp, bool kd, int st, int en, int l, int pd, unsigned long long fst, unsigned long long fen) {
+ pcrData(string f, string gf, string bfn, string loc, MothurOut* mout, string ol, string ec, string nm, bool kp, bool kd, int st, int en, int l, int pd, int rd, unsigned long long fst, unsigned long long fen) {
filename = f;
goodFasta = gf;
badFasta = bfn;
@@ -90,6 +90,7 @@ struct pcrData {
fstart = fst;
fend = fen;
pdiffs = pd;
+ rdiffs = rd;
locationsName = loc;
count = 0;
fileAligned = true;
@@ -152,7 +153,7 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
numFPrimers = primers.size();
}
- TrimOligos trim(pDataArray->pdiffs, 0, primers, barcodes, revPrimer);
+ TrimOligos trim(pDataArray->pdiffs, pDataArray->rdiffs, 0, primers, barcodes, revPrimer);
for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process
pDataArray->count++;
@@ -243,9 +244,9 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
int primerStart = 0; int primerEnd = 0;
vector<int> results = trim.findReverse(currSeq, primerStart, primerEnd);
bool good = true;
- if (results[0] > pDataArray->pdiffs) { good = false; }
+ if (results[0] > pDataArray->rdiffs) { good = false; }
totalDiffs += results[0];
- commentString += "rpdiffs=" + toString(results[0]) + "(" + trim.getCodeValue(results[1], pDataArray->pdiffs) + ") ";
+ commentString += "rpdiffs=" + toString(results[0]) + "(" + trim.getCodeValue(results[1], pDataArray->rdiffs) + ") ";
if(!good){ if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "r"; }
else{
@@ -324,7 +325,7 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
currSeq.setComment("\t" + commentString + "\t" + seqComment);
}
- if (totalDiffs > pDataArray->pdiffs) { trashCode += "t"; goodSeq = false; }
+ if (totalDiffs > (pDataArray->pdiffs + pDataArray->rdiffs)) { trashCode += "t"; goodSeq = false; }
//trimming removed all bases
if (currSeq.getUnaligned() == "") { goodSeq = false; }
diff --git a/source/commands/pipelinepdscommand.cpp b/source/commands/pipelinepdscommand.cpp
deleted file mode 100644
index ec70400..0000000
--- a/source/commands/pipelinepdscommand.cpp
+++ /dev/null
@@ -1,790 +0,0 @@
-/*
- * pipelinepdscommand.cpp
- * Mothur
- *
- * Created by westcott on 10/5/10.
- * Copyright 2010 Schloss Lab. All rights reserved.
- *
- */
-
-#include "pipelinepdscommand.h"
-#include "sffinfocommand.h"
-#include "commandoptionparser.hpp"
-
-//**********************************************************************************************************************
-vector<string> PipelineCommand::setParameters(){
- try {
- CommandParameter psff("sff", "InputTypes", "", "", "none", "oneRequired", "pipe","",false,false,true); parameters.push_back(psff);
- CommandParameter poligos("oligos", "InputTypes", "", "", "none", "oneRequired", "pipe","",false,false,true); parameters.push_back(poligos);
- CommandParameter palign("align", "InputTypes", "", "", "none", "oneRequired", "pipe","",false,false,true); parameters.push_back(palign);
- CommandParameter pchimera("chimera", "InputTypes", "", "", "none", "oneRequired", "pipe","",false,false,true); parameters.push_back(pchimera);
- CommandParameter pclassify("classify", "InputTypes", "", "", "none", "oneRequired", "pipe","",false,false,true); parameters.push_back(pclassify);
- CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "oneRequired", "pipe","",false,false,true); parameters.push_back(ptaxonomy);
- CommandParameter ppipeline("pipeline", "InputTypes", "", "", "none", "oneRequired", "none","",false,false,true); parameters.push_back(ppipeline);
- CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
- CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
- CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
- CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
-
- vector<string> myArray;
- for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
- return myArray;
- }
- catch(exception& e) {
- m->errorOut(e, "PipelineCommand", "setParameters");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-string PipelineCommand::getHelpString(){
- try {
- string helpString = "";
- helpString += "The pipeline.pds command is designed to guide you through your analysis using mothur.\n";
- helpString += "The pipeline.pds command parameters are pipeline, sff, oligos, align, chimera, classify, taxonomy and processors.\n";
- helpString += "The sff parameter allows you to enter your sff file. It is required, if not using pipeline parameter.\n";
- helpString += "The oligos parameter allows you to enter your oligos file. It is required, if not using pipeline parameter.\n";
- helpString += "The align parameter allows you to enter a template to use with the aligner. It is required, if not using pipeline parameter.\n";
- helpString += "The chimera parameter allows you to enter a template to use for chimera detection. It is required, if not using pipeline parameter.\n";
- helpString += "The classify parameter allows you to enter a template to use for classification. It is required, if not using pipeline parameter.\n";
- helpString += "The taxonomy parameter allows you to enter a taxonomy file for the classify template to use for classification. It is required, if not using pipeline parameter.\n";
- helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
- helpString += "The pipeline parameter allows you to enter your own pipeline file. This file should look like a mothur batchfile, but where you would be using a mothur generated file, you can use current instead.\n";
- helpString += "Example: trim.seqs(processors=8, allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50, fasta=may1.v13.fasta, oligos=may1.v13.oligos, qfile=may1.v13.qual)\n";
- helpString += "then, you could enter unique.seqs(fasta=current), and mothur would use the .trim.fasta file from the trim.seqs command. \n";
- helpString += "then you could enter align.seqs(candidate=current, template=silva.v13.align, processors=8). , and mothur would use the .trim.unique.fasta file from the unique.seqs command. \n";
- helpString += "If no pipeline file is given then mothur will use Pat's pipeline. \n";
- helpString += "Here is a list of the commands used in Pat's pipeline.\n";
- helpString += "All paralellized commands will use the processors you entered.\n";
- helpString += "The sffinfo command takes your sff file and extracts the fasta and quality files.\n";
- helpString += "The trim.seqs command uses your oligos file and the quality and fasta files generated by sffinfo.\n";
- helpString += "The trim.seqs command sets the following parameters: allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50.\n";
- helpString += "The unique.seqs command uses the trimmed fasta file and removes redundant sequences, don't worry the names file generated by unique.seqs will be used in the pipeline to make sure they are included.\n";
- helpString += "The align.seqs command aligns the unique sequences using the aligners default options. \n";
- helpString += "The screen.seqs command screens the sequences using optimize=end-minlength. \n";
- helpString += "The pipeline uses chimera.slayer to detect chimeras using the default options. \n";
- helpString += "The pipeline removes all sequences determined to be chimeric by chimera.slayer. \n";
- helpString += "The filter.seqs command filters the sequences using vertical=T, trump=. \n";
- helpString += "The unique.seqs command uses the filtered fasta file and name file to remove sequences that have become redundant after filtering.\n";
- helpString += "The pre.cluster command clusters sequences that have no more than 2 differences.\n";
- helpString += "The dist.seqs command is used to generate a column and phylip formatted distance matrix using cutoff=0.20 for column.\n";
- helpString += "The pipeline uses cluster with method=average, hard=T. \n";
- helpString += "The classify.seqs command is used to classify the sequences using the bayesian method with a cutoff of 80.\n";
- helpString += "The phylotype command is used to cluster the sequences based on their classification.\n";
- helpString += "The clearcut command is used to generate a tree using neighbor=T. \n";
- helpString += "The summary.single and summary.shared commands are run on the otu files from cluster and phylotype commands. \n";
- helpString += "The summary.shared command uses calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc. \n";
- helpString += "The summary.single command uses calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson. \n";
- helpString += "The classify.otu command is used to get the concensus taxonomy for otu files from cluster and phylotype commands. \n";
- helpString += "The phylo.diversity command run on the tree generated by clearcut with rarefy=T, iters=100. \n";
- helpString += "The unifrac commands are also run on the tree generated by clearcut with random=F, distance=T. \n";
- helpString += "\n";
- return helpString;
- }
- catch(exception& e) {
- m->errorOut(e, "PipelineCommand", "getHelpString");
- exit(1);
- }
-}
-
-
-//**********************************************************************************************************************
-PipelineCommand::PipelineCommand(string option) {
- try {
- cFactory = CommandFactory::getInstance();
- abort = false; calledHelp = false;
-
- //allow user to run help
- if(option == "help") { help(); abort = true; calledHelp = true; }
- else if(option == "citation") { citation(); abort = true; calledHelp = true;}
-
- else {
- vector<string> myArray = setParameters();
-
- OptionParser parser(option);
- map<string, string> parameters = parser.getParameters();
-
- ValidParameters validParameter;
- map<string, string>::iterator it;
-
- //check to make sure all parameters are valid for command
- for (it = parameters.begin(); it != parameters.end(); it++) {
- if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
- }
-
- //if the user changes the input directory command factory will send this info to us in the output parameter
- string inputDir = validParameter.validFile(parameters, "inputdir", false);
- if (inputDir == "not found"){ inputDir = ""; }
- else {
- string path;
- it = parameters.find("sff");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["sff"] = inputDir + it->second; }
- }
-
- it = parameters.find("oligos");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["oligos"] = inputDir + it->second; }
- }
-
- it = parameters.find("align");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["align"] = inputDir + it->second; }
- }
-
- it = parameters.find("chimera");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["chimera"] = inputDir + it->second; }
- }
-
- it = parameters.find("classify");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["classify"] = inputDir + it->second; }
- }
-
- it = parameters.find("taxonomy");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
- }
-
- it = parameters.find("pipeline");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["pipeline"] = inputDir + it->second; }
- }
- }
-
- outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
-
- pipeFilename = validParameter.validFile(parameters, "pipeline", true);
- if (pipeFilename == "not found") { pipeFilename = ""; }
- else if (pipeFilename == "not open") { pipeFilename = ""; abort = true; }
-
- string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
- m->setProcessors(temp);
- m->mothurConvert(temp, processors);
-
- if (pipeFilename != "") {
- abort = readUsersPipeline();
- }else{
- sffFile = validParameter.validFile(parameters, "sff", true);
- if (sffFile == "not found") { m->mothurOut("sff is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true; }
- else if (sffFile == "not open") { sffFile = ""; abort = true; }
- else { m->setSFFFile(sffFile); }
-
- oligosFile = validParameter.validFile(parameters, "oligos", true);
- if (oligosFile == "not found") { m->mothurOut("oligos is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true; }
- else if (oligosFile == "not open") { oligosFile = ""; abort = true; }
-
- alignFile = validParameter.validFile(parameters, "align", true);
- if (alignFile == "not found") { m->mothurOut("align is a required parameter for the pipeline command. Please provide the template to align with."); m->mothurOutEndLine(); abort = true; }
- else if (alignFile == "not open") { alignFile = ""; abort = true; }
-
- chimeraFile = validParameter.validFile(parameters, "chimera", true);
- if (chimeraFile == "not found") { m->mothurOut("chimera is a required parameter for the pipeline command. Please provide the template to check for chimeras with."); m->mothurOutEndLine(); abort = true; }
- else if (chimeraFile == "not open") { chimeraFile = ""; abort = true; }
-
- classifyFile = validParameter.validFile(parameters, "classify", true);
- if (classifyFile == "not found") { m->mothurOut("classify is a required parameter for the pipeline command. Please provide the template to use with the classifier."); m->mothurOutEndLine(); abort = true; }
- else if (classifyFile == "not open") { classifyFile = ""; abort = true; }
-
- taxonomyFile = validParameter.validFile(parameters, "taxonomy", true);
- if (taxonomyFile == "not found") { m->mothurOut("taxonomy is a required parameter for the pipeline command."); m->mothurOutEndLine(); abort = true; }
- else if (taxonomyFile == "not open") { taxonomyFile = ""; abort = true; }
- }
- }
-
- }
- catch(exception& e) {
- m->errorOut(e, "PipelineCommand", "PipelineCommand");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-
-int PipelineCommand::execute(){
- try {
- if (abort == true) { if (calledHelp) { return 0; } return 2; }
-
- int start = time(NULL);
-
- if (pipeFilename == "") {
- createPatsPipeline();
-
- //run Pats pipeline
- for (int i = 0; i < commands.size(); i++) {
- m->mothurOutEndLine(); m->mothurOut("mothur > " + commands[i]); m->mothurOutEndLine();
-
- if (m->control_pressed) { return 0; }
-
- CommandOptionParser parser(commands[i]);
- string commandName = parser.getCommandString();
- string options = parser.getOptionString();
-
- //executes valid command
- Command* command = cFactory->getCommand(commandName, options, "pipe");
- command->execute();
-
- //add output files to list
- map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
- map<string, vector<string> >::iterator itMade;
- for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) {
- vector<string> temp = itMade->second;
- for (int j = 0; j < temp.size(); j++) { outputNames.push_back(temp[j]); }
- }
-
- }
-
- }else { runUsersPipeline(); }
-
- if (m->control_pressed) { return 0; }
-
- m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run the pipeline analysis."); m->mothurOutEndLine(); m->mothurOutEndLine();
-
- m->mothurOutEndLine();
- m->mothurOut("Output File Names: "); m->mothurOutEndLine();
- for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
- m->mothurOutEndLine();
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "PipelineCommand", "execute");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-
-bool PipelineCommand::readUsersPipeline(){
- try {
-
- ifstream in;
- m->openInputFile(pipeFilename, in);
-
- string nextCommand = "";
-
- map<string, vector<string> > mothurMadeFiles;
-
- while(!in.eof()) {
- nextCommand = m->getline(in); m->gobble(in);
-
- if (nextCommand[0] != '#') {
- bool error = false;
-
- string commandName, options;
- error = parseCommand(nextCommand, commandName, options);
-
- if (error) { in.close(); return error; }
- if (commandName == "pipeline.pds") { m->mothurOut("Cannot run the pipeline.pds command from inside the pipeline.pds command."); m->mothurOutEndLine(); in.close(); return true; }
-
- error = checkForValidAndRequiredParameters(commandName, options, mothurMadeFiles);
-
- if (error) { in.close(); return error; }
- }
- }
-
- in.close();
-
- return false;
- }
- catch(exception& e) {
- m->errorOut(e, "PipelineCommand", "readUsersPipeline");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-
-bool PipelineCommand::parseCommand(string nextCommand, string& name, string& options){
- try {
- CommandOptionParser parser(nextCommand);
- name = parser.getCommandString();
- options = parser.getOptionString();
-
- if (name == "") { return true; } //name == "" if () are not right
-
- return false;
- }
- catch(exception& e) {
- m->errorOut(e, "PipelineCommand", "parseCommand");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-
-bool PipelineCommand::checkForValidAndRequiredParameters(string name, string options, map<string, vector<string> >& mothurMadeFiles){
- try {
-
- if (name == "system") { return false; }
-
- //get shell of the command so we can check to make sure its valid without running it
- Command* command = cFactory->getCommand(name);
-
- //check to make sure all parameters are valid for command
- vector<string> validParameters = command->setParameters();
-
- OptionParser parser(options);
- map<string, string> parameters = parser.getParameters();
-
- ValidParameters validParameter;
- map<string, string>::iterator it;
- map<string, vector<string> >::iterator itMade;
-
- for (it = parameters.begin(); it != parameters.end(); it++) {
-
- if (validParameter.isValidParameter(it->first, validParameters, it->second) != true) { return true; } // not valid
- if (it->second == "current") {
- itMade = mothurMadeFiles.find(it->first);
-
- if (itMade == mothurMadeFiles.end()) {
- m->mothurOut("You have the " + it->first + " listed as a current file for the " + name + " command, but it seems mothur will not make that file in your current pipeline, please correct."); m->mothurOutEndLine();
- return true;
- }
- }
- }
-
- //is the command missing any required
- vector<CommandParameter> commandParameters = command->getParameters();
- vector<string> requiredParameters;
- for (int i = 0; i < commandParameters.size(); i++) {
- if (commandParameters[i].required) {
- requiredParameters.push_back(commandParameters[i].name);
- }
- }
-
- for (int i = 0; i < requiredParameters.size(); i++) {
- it = parameters.find(requiredParameters[i]);
-
- if (it == parameters.end()) {
-
- string paraToLookFor = requiredParameters[i];
-
- //does mothur have a current file for this?
- itMade = mothurMadeFiles.find(requiredParameters[i]);
-
- if (itMade == mothurMadeFiles.end()) {
- m->mothurOut(name + " requires the " + requiredParameters[i] + " parameter, please correct."); m->mothurOutEndLine();
-
- }
- }
- }
-
-
- //update MothurMade
- map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
- for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) {
- mothurMadeFiles[itMade->first] = itMade->second; //adds any new types
- }
-
- return false;
- }
- catch(exception& e) {
- m->errorOut(e, "PipelineCommand", "checkForValidAndRequiredParameters");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-int PipelineCommand::runUsersPipeline(){
- try {
- ifstream in;
- m->openInputFile(pipeFilename, in);
-
- string nextCommand = "";
-
- map<string, vector<string> > mothurMadeFiles;
-
- while(!in.eof()) {
- nextCommand = m->getline(in); m->gobble(in);
-
- if (nextCommand[0] != '#') {
- CommandOptionParser parser(nextCommand);
- string commandName = parser.getCommandString();
- string options = parser.getOptionString();
-
- if ((options != "") && (commandName != "system")) {
- bool error = fillInMothurMade(options, mothurMadeFiles);
- if (error) { in.close(); return 0; }
- }
-
- m->mothurOutEndLine(); m->mothurOut("mothur > " + commandName + "(" + options + ")"); m->mothurOutEndLine();
-
- if (m->control_pressed) { return 0; }
-
- //executes valid command
- Command* command = cFactory->getCommand(commandName, options, "pipe");
- command->execute();
-
- //add output files to list
- map<string, vector<string> > thisCommandsFile = command->getOutputFiles();
- map<string, vector<string> >::iterator itMade;
- map<string, vector<string> >::iterator it;
- for (itMade = thisCommandsFile.begin(); itMade != thisCommandsFile.end(); itMade++) {
-
- vector<string> temp = itMade->second;
- for (int k = 0; k < temp.size(); k++) { outputNames.push_back(temp[k]); } //
-
- //update Mothur Made for each file
- it = mothurMadeFiles.find(itMade->first);
-
- if (it == mothurMadeFiles.end()) { //new type
-
- mothurMadeFiles[itMade->first] = temp;
-
- }else{ //update existing type
- vector<string> oldFileNames = it->second;
- //look at new files, see if an old version of the file exists, if so update, else just add.
- //for example you may have abrecovery.fasta and amazon.fasta as old files and you created a new amazon.trim.fasta.
-
- for (int k = 0; k < temp.size(); k++) {
-
- //get base name
- string root = m->getSimpleName(temp[k]);
- string individual = "";
- for(int i=0;i<root.length();i++){
- if(root[i] == '.'){
- root = individual;
- break;
- }else{
- individual += root[i];
- }
- }
-
- //look for that base name in oldfiles
- int spot = -1;
- for (int l = 0; l < oldFileNames.size(); l++) {
- int pos = oldFileNames[l].find(root);
- if (pos != string::npos) {
- spot = l;
- break;
- }
- }
-
- //if you found it update it, else add it
- if (spot != -1) {
- mothurMadeFiles[it->first][spot] = temp[k];
- }else{
- mothurMadeFiles[it->first].push_back(temp[k]);
- }
- }
- }
- }
- }
- }
-
- in.close();
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "PipelineCommand", "runUsersPipeline");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-bool PipelineCommand::fillInMothurMade(string& options, map<string, vector<string> >& mothurMadeFiles){
- try {
-
- OptionParser parser(options);
- map<string, string> parameters = parser.getParameters();
- map<string, string>::iterator it;
- map<string, vector<string> >::iterator itMade;
-
- options = "";
-
- //fill in mothurmade filenames
- for (it = parameters.begin(); it != parameters.end(); it++) {
- string paraType = it->first;
- string tempOption = it->second;
-
- if (tempOption == "current") {
-
- itMade = mothurMadeFiles.find(paraType);
-
- if (itMade == mothurMadeFiles.end()) {
- m->mothurOut("Looking for a current " + paraType + " file, but it seems mothur has not made that file type in your current pipeline, please correct."); m->mothurOutEndLine();
- return true;
- }else{
- vector<string> temp = itMade->second;
-
- if (temp.size() > 1) {
- //ask user which file to use
- m->mothurOut("More than one file has been created for the " + paraType + " parameter. "); m->mothurOutEndLine();
- for (int i = 0; i < temp.size(); i++) {
- m->mothurOut(toString(i) + " - " + temp[i]); m->mothurOutEndLine();
- }
-
- m->mothurOut("Please select the number of the file you would like to use: ");
- int num = 0;
- cin >> num;
- m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
-
- if ((num < 0) || (num > (temp.size()-1))) { m->mothurOut("Not a valid response, quitting."); m->mothurOutEndLine(); return true; }
- else {
- tempOption = temp[num];
- }
-
- //clears buffer so next command doesn't have error
- string s;
- getline(cin, s);
-
- vector<string> newTemp;
- for (int i = 0; i < temp.size(); i++) {
- if (i == num) { newTemp.push_back(temp[i]); }
- else {
- m->mothurOut("Would you like to remove " + temp[i] + " as an option for " + paraType + ", (y/n): "); m->mothurOutEndLine();
- string response;
- cin >> response;
- m->mothurOutJustToLog(response); m->mothurOutEndLine();
-
- if (response == "n") { newTemp.push_back(temp[i]); }
-
- //clears buffer so next command doesn't have error
- string s;
- getline(cin, s);
- }
- }
-
- mothurMadeFiles[paraType] = newTemp;
-
-
- }else if (temp.size() == 0){
- m->mothurOut("Sorry, we seem to think you created a " + paraType + " file, but it seems mothur doesn't have a filename."); m->mothurOutEndLine();
- return true;
- }else{
- tempOption = temp[0];
- }
- }
- }
-
- options += it->first + "=" + tempOption + ", ";
- }
-
- //rip off extra comma
- options = options.substr(0, (options.length()-2));
-
- return false;
- }
- catch(exception& e) {
- m->errorOut(e, "PipelineCommand", "fillInMothurMade");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
-void PipelineCommand::createPatsPipeline(){
- try {
-
- //sff.info command
- string thisCommand = "sffinfo(sff=" + sffFile + ")";
- commands.push_back(thisCommand);
-
- //trim.seqs command
- string fastaFile = m->getRootName(m->getSimpleName(sffFile)) + "fasta";
- string qualFile = m->getRootName(m->getSimpleName(sffFile)) + "qual";
- thisCommand = "trim.seqs(processors=" + toString(processors) + ", fasta=current, allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50, oligos=" + oligosFile + ", qfile=current)";
- commands.push_back(thisCommand);
-
- //unique.seqs
- string groupFile = m->getRootName(m->getSimpleName(fastaFile)) + "groups";
- qualFile = m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
- fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
- thisCommand = "unique.seqs(fasta=current)";
- commands.push_back(thisCommand);
-
- //align.seqs
- string nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "names";
- fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "unique" + m->getExtension(fastaFile);
- thisCommand = "align.seqs(processors=" + toString(processors) + ", candidate=current, template=" + alignFile + ")";
- commands.push_back(thisCommand);
-
- //screen.seqs
- fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "align";
- thisCommand = "screen.seqs(processors=" + toString(processors) + ", fasta=current, name=current, group=current, optimize=end-minlength)";
- commands.push_back(thisCommand);
-
- //chimera.slayer
- fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "good" + m->getExtension(fastaFile);
- nameFile = m->getRootName(m->getSimpleName(nameFile)) + "good" + m->getExtension(nameFile);
- groupFile = m->getRootName(m->getSimpleName(groupFile)) + "good" + m->getExtension(groupFile);
- thisCommand = "chimera.slayer(processors=" + toString(processors) + ", fasta=current, template=" + chimeraFile + ")";
- commands.push_back(thisCommand);
-
- //remove.seqs
- string accnosFile = m->getRootName(m->getSimpleName(fastaFile)) + "slayer.accnos";
- thisCommand = "remove.seqs(fasta=current, name=current, group=current, accnos=current, dups=T)";
- commands.push_back(thisCommand);
-
- //filter.seqs
- nameFile = m->getRootName(m->getSimpleName(nameFile)) + "pick" + m->getExtension(nameFile);
- groupFile = m->getRootName(m->getSimpleName(groupFile)) + "pick" + m->getExtension(groupFile);
- fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "pick" + m->getExtension(fastaFile);
- thisCommand = "filter.seqs(processors=" + toString(processors) + ", fasta=current, vertical=T, trump=.)";
- commands.push_back(thisCommand);
-
- //unique.seqs
- fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "filter.fasta";
- thisCommand = "unique.seqs(fasta=current, name=current)";
- commands.push_back(thisCommand);
-
- //pre.cluster
- nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "names";
- fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "unique" + m->getExtension(fastaFile);
- thisCommand = "pre.cluster(fasta=current, name=current, diffs=2)";
- commands.push_back(thisCommand);
-
- //dist.seqs
- nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "precluster.names";
- fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "precluster" + m->getExtension(fastaFile);
- thisCommand = "dist.seqs(processors=" + toString(processors) + ", fasta=current, cutoff=0.20)";
- commands.push_back(thisCommand);
-
- //dist.seqs
- string columnFile = m->getRootName(m->getSimpleName(fastaFile)) + "dist";
- thisCommand = "dist.seqs(processors=" + toString(processors) + ", fasta=current, output=lt)";
- commands.push_back(thisCommand);
-
- //read.dist
- string phylipFile = m->getRootName(m->getSimpleName(fastaFile)) + "phylip.dist";
- thisCommand = "read.dist(column=current, name=current)";
- commands.push_back(thisCommand);
-
- //cluster
- thisCommand = "cluster(method=average, hard=T)";
- commands.push_back(thisCommand);
-
- string listFile = m->getRootName(m->getSimpleName(columnFile)) + "an.list";
- string rabundFile = m->getRootName(m->getSimpleName(columnFile)) + "an.rabund";
-
- //degap.seqs
- thisCommand = "degap.seqs(fasta=current)";
- commands.push_back(thisCommand);
-
- //classify.seqs
- fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "ng.fasta";
- thisCommand = "classify.seqs(processors=" + toString(processors) + ", fasta=current, name=current, template=" + classifyFile + ", taxonomy=" + taxonomyFile + ", cutoff=80)";
- commands.push_back(thisCommand);
-
- string RippedTaxName = m->getRootName(m->getSimpleName(taxonomyFile));
- RippedTaxName = m->getExtension(RippedTaxName.substr(0, RippedTaxName.length()-1));
- if (RippedTaxName[0] == '.') { RippedTaxName = RippedTaxName.substr(1, RippedTaxName.length()); }
- RippedTaxName += ".";
-
- string fastaTaxFile = m->getRootName(m->getSimpleName(fastaFile)) + RippedTaxName + "taxonomy";
- string taxSummaryFile = m->getRootName(m->getSimpleName(fastaFile)) + RippedTaxName + "tax.summary";
-
- //phylotype
- thisCommand = "phylotype(taxonomy=current, name=current)";
- commands.push_back(thisCommand);
-
- string phyloListFile = m->getRootName(m->getSimpleName(fastaTaxFile)) + "tx.list";
- string phyloRabundFile = m->getRootName(m->getSimpleName(fastaTaxFile)) + "tx.rabund";
-
- //clearcut
- thisCommand = "clearcut(phylip=current, neighbor=T)";
- commands.push_back(thisCommand);
-
- string treeFile = m->getRootName(m->getSimpleName(phylipFile)) + "tre";
-
- //read.otu
- thisCommand = "make.shared(list=" + listFile + ", group=" + groupFile + ", label=0.03)";
- commands.push_back(thisCommand);
-
- string sharedFile = m->getRootName(m->getSimpleName(listFile)) + "shared";
-
- //read.otu
- thisCommand = "make.shared(list=" + phyloListFile + ", group=" + groupFile + ", label=1)";
- commands.push_back(thisCommand);
-
- string phyloSharedFile = m->getRootName(m->getSimpleName(phyloListFile)) + "shared";
-
- //read.otu
- thisCommand = "set.current(shared=" + sharedFile + ")";
- commands.push_back(thisCommand);
-
- //summary.single
- thisCommand = "summary.single(shared=current, calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
- commands.push_back(thisCommand);
-
- //summary.shared
- thisCommand = "summary.shared(shared=current, calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc)";
- commands.push_back(thisCommand);
-
- //read.otu
- //thisCommand = "read.otu(rabund=" + rabundFile + ", label=0.03)";
- //commands.push_back(thisCommand);
-
- //summary.single
- thisCommand = "summary.single(rabund=" + rabundFile + ", label=0.03, calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
- commands.push_back(thisCommand);
-
- //read.otu
- thisCommand = "set.current(shared=" + phyloSharedFile + ")";
- commands.push_back(thisCommand);
-
- //summary.single
- thisCommand = "summary.single(shared=current, calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
- commands.push_back(thisCommand);
-
- //summary.shared
- thisCommand = "summary.shared(shared=current, calc=sharednseqs-sharedsobs-sharedchao-sharedace-anderberg-jclass-jest-kulczynski-kulczynskicody-lennon-ochiai-sorclass-sorest-whittaker-braycurtis-jabund-morisitahorn-sorabund-thetan-thetayc)";
- commands.push_back(thisCommand);
-
- //read.otu
- //thisCommand = "read.otu(rabund=" + phyloRabundFile + ", label=1)";
- //commands.push_back(thisCommand);
-
- //summary.single
- thisCommand = "summary.single(rabund=" + phyloRabundFile + ", label=1, calc=nseqs-sobs-coverage-bergerparker-chao-ace-jack-bootstrap-boneh-efron-shen-solow-shannon-npshannon-invsimpson-qstat-simpsoneven-shannoneven-heip-smithwilson, size=5000)";
- commands.push_back(thisCommand);
-
- //classify.otu
- thisCommand = "classify.otu(taxonomy=" + fastaTaxFile + ", name=" + nameFile + ", list=" + listFile + ", cutoff=51, label=0.03)";
- commands.push_back(thisCommand);
-
- //classify.otu
- thisCommand = "classify.otu(taxonomy=" + fastaTaxFile + ", name=" + nameFile + ", list=" + phyloListFile + ", cutoff=51, label=1)";
- commands.push_back(thisCommand);
-
- //read.tree
- thisCommand = "set.current(tree=" + treeFile + ", name=" + nameFile + ", group=" + groupFile + ")";
- commands.push_back(thisCommand);
-
- //phylo.diversity
- thisCommand = "phylo.diversity(tree=current, group=current, name=current, iters=100,rarefy=T)";
- commands.push_back(thisCommand);
-
- //unifrac.weighted
- thisCommand = "unifrac.weighted(tree=current, group=current, name=current, random=false, distance=true, groups=all, processors=" + toString(processors) + ")";
- commands.push_back(thisCommand);
-
- //unifrac.unweighted
- thisCommand = "unifrac.unweighted(tree=current, group=current, name=current, random=false, distance=true, processors=" + toString(processors) + ")";
- commands.push_back(thisCommand);
-
-
- }
- catch(exception& e) {
- m->errorOut(e, "PipelineCommand", "createPatsPipeline");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
diff --git a/source/commands/pipelinepdscommand.h b/source/commands/pipelinepdscommand.h
deleted file mode 100644
index 22386bc..0000000
--- a/source/commands/pipelinepdscommand.h
+++ /dev/null
@@ -1,57 +0,0 @@
-#ifndef PIPELINEPDSCOMMAND_H
-#define PIPELINEPDSCOMMAND_H
-
-/*
- * pipelinepdscommand.h
- * Mothur
- *
- * Created by westcott on 10/5/10.
- * Copyright 2010 Schloss Lab. All rights reserved.
- *
- */
-
-
-#include "command.hpp"
-#include "commandfactory.hpp"
-
-/****************************************************/
-
-class PipelineCommand : public Command {
-
-public:
- PipelineCommand(string);
- PipelineCommand() { abort = true; calledHelp = true; setParameters(); }
- ~PipelineCommand(){}
-
- vector<string> setParameters();
- string getCommandName() { return "pipeline.pds"; }
- string getCommandCategory() { return "Hidden"; }
- string getHelpString();
- string getOutputPattern(string) { return ""; }
- string getCitation() { return "Schloss PD, Gevers D, Westcott SL (2011). Reducing the effects of PCR amplification and sequencing artifacts on 16S rRNA-based studies. PLoS ONE. 6:e27310.\nhttp://www.mothur.org/wiki/Pipeline.pds"; }
- string getDescription() { return "pat's pipeline"; }
-
-
- int execute();
- void help() { m->mothurOut(getHelpString()); }
-
-private:
- bool abort;
- CommandFactory* cFactory;
- vector<string> outputNames;
- vector<string> commands;
- string outputDir, sffFile, alignFile, oligosFile, taxonomyFile, pipeFilename, classifyFile, chimeraFile;
- int processors;
-
- bool readUsersPipeline();
- int runUsersPipeline();
- void createPatsPipeline();
- bool parseCommand(string, string&, string&);
- bool checkForValidAndRequiredParameters(string, string, map<string, vector<string> >&);
- bool fillInMothurMade(string&, map<string, vector<string> >&);
-};
-
-/****************************************************/
-
-#endif
-
diff --git a/source/commands/renamefilecommand.cpp b/source/commands/renamefilecommand.cpp
new file mode 100644
index 0000000..2190908
--- /dev/null
+++ b/source/commands/renamefilecommand.cpp
@@ -0,0 +1,706 @@
+//
+// renamefilecommand.cpp
+// Mothur
+//
+// Created by Sarah Westcott on 4/18/16.
+// Copyright (c) 2016 Schloss Lab. All rights reserved.
+//
+
+#include "renamefilecommand.h"
+#include "systemcommand.h"
+
+//**********************************************************************************************************************
+vector<string> RenameFileCommand::setParameters(){
+ try {
+ CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pflow);
+ CommandParameter pfile("file", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pfile);
+ CommandParameter pbiom("biom", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pbiom);
+ CommandParameter pphylip("phylip", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pphylip);
+ CommandParameter pcolumn("column", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pcolumn);
+ CommandParameter psummary("summary", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(psummary);
+ CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pfasta);
+ CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pname);
+ CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pgroup);
+ CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(plist);
+ CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(ptaxonomy);
+ CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pqfile);
+ CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);
+ CommandParameter prabund("rabund", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(prabund);
+ CommandParameter psabund("sabund", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(psabund);
+ CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pdesign);
+ CommandParameter porder("order", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(porder);
+ CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(ptree);
+ CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pshared);
+ CommandParameter pcount("count", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pcount);
+ CommandParameter poutputname("new", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(poutputname);
+ CommandParameter pinputname("input", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pinputname);
+ CommandParameter prelabund("relabund", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(prelabund);
+ CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(psff);
+ CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pconstaxonomy);
+ CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(poligos);
+ CommandParameter pmothurgenerated("shorten", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmothurgenerated);
+ CommandParameter pdeleteold("deleteold", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pdeleteold);
+ CommandParameter pseed("seed", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pseed);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter pprefix("prefix", "String", "", "", "", "", "","",false,false); parameters.push_back(pprefix);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RenameFileCommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string RenameFileCommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The rename.file command allows you to rename files and updates the current files saved by mothur.\n";
+ helpString += "The rename.file command parameters are: phylip, column, list, rabund, sabund, name, group, design, tree, shared, relabund, fasta, qfile, sff, oligos, accnos, biom, count, summary, file, taxonomy, constaxonomy, input, output, prefix, deletedold and shorten.\n";
+ helpString += "The output parameter allows you to provide an output file name for the input file you provide.\n";
+ helpString += "The shorten parameter is used to inicate you want mothur to generate output file names for you. For example: stability.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.pick.an.unique_list.shared would become stability.an.shared. Default=true.";
+ helpString += "The prefix parameter allows you to enter your own prefix for shortened names.";
+ helpString += "The deleteold parameter indicates whether you want to delete the old file. Default=true.";
+ helpString += "The rename.file command should be in the following format: \n";
+ helpString += "rename.file(fasta=current, name=current, group=current, taxonomy=current, shorten=t)\n";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RenameFileCommand", "getHelpString");
+ exit(1);
+ }
+}
+
+
+//**********************************************************************************************************************
+RenameFileCommand::RenameFileCommand(){
+ try {
+ abort = true; calledHelp = true;
+ setParameters();
+ vector<string> tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RenameFileCommand", "RenameFileCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string RenameFileCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
+ m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RenameFileCommand", "getOutputPattern");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+RenameFileCommand::RenameFileCommand(string option) {
+ try {
+ abort = false; calledHelp = false;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+
+ else {
+ //valid paramters for this command
+ vector<string> myArray = setParameters();
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+ map<string,string>::iterator it;
+ //check to make sure all parameters are valid for command
+ for (it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ vector<string> tempOutNames;
+ outputTypes["summary"] = tempOutNames;
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+ string path;
+ it = parameters.find("phylip");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["phylip"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("column");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["column"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("fasta");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["fasta"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("list");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["list"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("rabund");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["rabund"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("sabund");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["sabund"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("name");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["name"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("group");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["group"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("design");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["design"] = inputDir + it->second; }
+ }
+
+
+ it = parameters.find("tree");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["tree"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("shared");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["shared"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("input");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["input"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("relabund");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["relabund"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("fasta");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["fasta"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("qfile");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["qfile"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("sff");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["sff"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("oligos");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["oligos"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("accnos");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["accnos"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("taxonomy");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("flow");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["flow"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("biom");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["biom"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("summary");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["summary"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("file");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["file"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("constaxonomy");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["constaxonomy"] = inputDir + it->second; }
+ }
+ }
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
+
+ int numFiles = 0;
+ //check for parameters
+ phylipfile = validParameter.validFile(parameters, "phylip", true);
+ if (phylipfile == "not open") { m->mothurOut("Ignoring: " + parameters["phylip"]); m->mothurOutEndLine(); phylipfile = ""; }
+ else if (phylipfile == "not found") { phylipfile = ""; }
+ if (phylipfile != "") { numFiles++; }
+
+ columnfile = validParameter.validFile(parameters, "column", true);
+ if (columnfile == "not open") { m->mothurOut("Ignoring: " + parameters["column"]); m->mothurOutEndLine(); columnfile = ""; }
+ else if (columnfile == "not found") { columnfile = ""; }
+ if (columnfile != "") { numFiles++; }
+
+ listfile = validParameter.validFile(parameters, "list", true);
+ if (listfile == "not open") { m->mothurOut("Ignoring: " + parameters["list"]); m->mothurOutEndLine(); listfile = ""; }
+ else if (listfile == "not found") { listfile = ""; }
+ if (listfile != "") { numFiles++; }
+
+ rabundfile = validParameter.validFile(parameters, "rabund", true);
+ if (rabundfile == "not open") { m->mothurOut("Ignoring: " + parameters["rabund"]); m->mothurOutEndLine(); rabundfile = ""; }
+ else if (rabundfile == "not found") { rabundfile = ""; }
+ if (rabundfile != "") { numFiles++; }
+
+ sabundfile = validParameter.validFile(parameters, "sabund", true);
+ if (sabundfile == "not open") { m->mothurOut("Ignoring: " + parameters["sabund"]); m->mothurOutEndLine(); sabundfile = ""; }
+ else if (sabundfile == "not found") { sabundfile = ""; }
+ if (sabundfile != "") { numFiles++; }
+
+ namefile = validParameter.validFile(parameters, "name", true);
+ if (namefile == "not open") { m->mothurOut("Ignoring: " + parameters["name"]); m->mothurOutEndLine(); namefile = ""; }
+ else if (namefile == "not found") { namefile = ""; }
+ if (namefile != "") { numFiles++; }
+
+ groupfile = validParameter.validFile(parameters, "group", true);
+ if (groupfile == "not open") { m->mothurOut("Ignoring: " + parameters["group"]); m->mothurOutEndLine(); groupfile = ""; }
+ else if (groupfile == "not found") { groupfile = ""; }
+ if (groupfile != "") { numFiles++; }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { m->mothurOut("Ignoring: " + parameters["count"]); m->mothurOutEndLine(); countfile = ""; }
+ else if (countfile == "not found") { countfile = ""; }
+ if (countfile != "") { numFiles++; }
+
+ designfile = validParameter.validFile(parameters, "design", true);
+ if (designfile == "not open") { m->mothurOut("Ignoring: " + parameters["design"]); m->mothurOutEndLine(); designfile = ""; }
+ else if (designfile == "not found") { designfile = ""; }
+ if (designfile != "") { numFiles++; }
+
+ inputfile = validParameter.validFile(parameters, "input", true);
+ if (inputfile == "not open") { m->mothurOut("Ignoring: " + parameters["input"]); m->mothurOutEndLine(); inputfile = ""; }
+ else if (inputfile == "not found") { inputfile = ""; }
+ if (inputfile != "") { numFiles++; }
+
+ treefile = validParameter.validFile(parameters, "tree", true);
+ if (treefile == "not open") { m->mothurOut("Ignoring: " + parameters["tree"]); m->mothurOutEndLine(); treefile = ""; }
+ else if (treefile == "not found") { treefile = ""; }
+ if (treefile != "") { numFiles++; }
+
+ sharedfile = validParameter.validFile(parameters, "shared", true);
+ if (sharedfile == "not open") { m->mothurOut("Ignoring: " + parameters["shared"]); m->mothurOutEndLine(); sharedfile = ""; }
+ else if (sharedfile == "not found") { sharedfile = ""; }
+ if (sharedfile != "") { numFiles++; }
+
+ relabundfile = validParameter.validFile(parameters, "relabund", true);
+ if (relabundfile == "not open") { m->mothurOut("Ignoring: " + parameters["relabund"]); m->mothurOutEndLine(); relabundfile = ""; }
+ else if (relabundfile == "not found") { relabundfile = ""; }
+ if (relabundfile != "") { numFiles++; }
+
+ fastafile = validParameter.validFile(parameters, "fasta", true);
+ if (fastafile == "not open") { m->mothurOut("Ignoring: " + parameters["fasta"]); m->mothurOutEndLine(); fastafile = ""; }
+ else if (fastafile == "not found") { fastafile = ""; }
+ if (fastafile != "") { numFiles++; }
+
+ qualfile = validParameter.validFile(parameters, "qfile", true);
+ if (qualfile == "not open") { m->mothurOut("Ignoring: " + parameters["qfile"]); m->mothurOutEndLine(); qualfile = ""; }
+ else if (qualfile == "not found") { qualfile = ""; }
+ if (qualfile != "") { numFiles++; }
+
+ sfffile = validParameter.validFile(parameters, "sff", true);
+ if (sfffile == "not open") { m->mothurOut("Ignoring: " + parameters["sff"]); m->mothurOutEndLine(); sfffile = ""; }
+ else if (sfffile == "not found") { sfffile = ""; }
+ if (sfffile != "") { numFiles++; }
+
+ oligosfile = validParameter.validFile(parameters, "oligos", true);
+ if (oligosfile == "not open") { m->mothurOut("Ignoring: " + parameters["oligos"]); m->mothurOutEndLine(); oligosfile = ""; }
+ else if (oligosfile == "not found") { oligosfile = ""; }
+ if (oligosfile != "") { numFiles++; }
+
+ accnosfile = validParameter.validFile(parameters, "accnos", true);
+ if (accnosfile == "not open") { m->mothurOut("Ignoring: " + parameters["accnos"]); m->mothurOutEndLine(); accnosfile = ""; }
+ else if (accnosfile == "not found") { accnosfile = ""; }
+ if (accnosfile != "") { numFiles++; }
+
+ taxonomyfile = validParameter.validFile(parameters, "taxonomy", true);
+ if (taxonomyfile == "not open") { m->mothurOut("Ignoring: " + parameters["taxonomy"]); m->mothurOutEndLine(); taxonomyfile = ""; }
+ else if (taxonomyfile == "not found") { taxonomyfile = ""; }
+ if (taxonomyfile != "") { numFiles++; }
+
+ constaxonomyfile = validParameter.validFile(parameters, "constaxonomy", true);
+ if (constaxonomyfile == "not open") { m->mothurOut("Ignoring: " + parameters["constaxonomy"]); m->mothurOutEndLine(); constaxonomyfile = ""; }
+ else if (constaxonomyfile == "not found") { constaxonomyfile = ""; }
+ if (constaxonomyfile != "") { numFiles++; }
+
+ flowfile = validParameter.validFile(parameters, "flow", true);
+ if (flowfile == "not open") { m->mothurOut("Ignoring: " + parameters["flow"]); m->mothurOutEndLine(); flowfile = ""; }
+ else if (flowfile == "not found") { flowfile = ""; }
+ if (flowfile != "") { numFiles++; }
+
+ biomfile = validParameter.validFile(parameters, "biom", true);
+ if (biomfile == "not open") { m->mothurOut("Ignoring: " + parameters["biom"]); m->mothurOutEndLine(); biomfile = ""; }
+ else if (biomfile == "not found") { biomfile = ""; }
+ if (biomfile != "") { numFiles++; }
+
+ summaryfile = validParameter.validFile(parameters, "summary", true);
+ if (summaryfile == "not open") { m->mothurOut("Ignoring: " + parameters["summary"]); m->mothurOutEndLine(); summaryfile = ""; }
+ else if (summaryfile == "not found") { summaryfile = ""; }
+ if (summaryfile != "") { numFiles++; }
+
+ filefile = validParameter.validFile(parameters, "file", true);
+ if (filefile == "not open") { m->mothurOut("Ignoring: " + parameters["file"]); m->mothurOutEndLine(); filefile = ""; }
+ else if (filefile == "not found") { filefile = ""; }
+ if (filefile != "") { numFiles++; }
+
+ string temp = validParameter.validFile(parameters, "shorten", false); if (temp == "not found") { temp = "T"; }
+ mothurGenerated = m->isTrue(temp);
+
+ temp = validParameter.validFile(parameters, "deleteold", false); if (temp == "not found") { temp = "T"; }
+ deleteOld = m->isTrue(temp);
+
+ prefix = validParameter.validFile(parameters, "prefix", false); if (prefix == "not found") { temp = ""; }
+
+ outputfile = validParameter.validFile(parameters, "new", false);
+ if (outputfile == "not found") {
+ if (!mothurGenerated) { m->mothurOut("[ERROR]: you must enter an output file name"); m->mothurOutEndLine(); abort=true; }
+ outputfile = "";
+ }else if (outputDir != "") { outputfile = outputDir + m->getSimpleName(outputfile); }
+
+ if ((!mothurGenerated) && (numFiles > 1)) {
+ m->mothurOut("[ERROR]: You cannot use more than one file parameter unless mothur is generating the output filenames for you.\n"); abort= true;
+ }
+
+ if ((mothurGenerated) && (outputfile != "") && (numFiles != 1)) {
+ m->mothurOut("[ERROR]: You must allow mothur to generate the filenames or input one file at a time with a new name, not both.\n"); abort= true;
+ }
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "RenameFileCommand", "RenameFileCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int RenameFileCommand::execute(){
+ try {
+
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ string newName = outputfile;
+
+ //look for file types
+ if (fastafile != "") {
+ newName = getNewName(fastafile, "fasta");
+ renameOrCopy(fastafile, newName);
+ m->setFastaFile(newName);
+ }
+ if (qualfile != "") {
+ newName = getNewName(qualfile, "qfile");
+ renameOrCopy(qualfile, newName);
+ m->setQualFile(newName);
+ }
+ if (phylipfile != "") {
+ newName = getNewName(phylipfile, "phylip");
+ renameOrCopy(phylipfile, newName);
+ m->setPhylipFile(newName);
+ }
+ if (columnfile != "") {
+ newName = getNewName(columnfile, "column");
+ renameOrCopy(columnfile, newName);
+ m->setColumnFile(newName);
+ }
+ if (listfile != "") {
+ newName = getNewName(listfile, "list");
+ renameOrCopy(listfile, newName);
+ m->setListFile(newName);
+ }
+ if (rabundfile != "") {
+ newName = getNewName(rabundfile, "rabund");
+ renameOrCopy(rabundfile, newName);
+ m->setRabundFile(newName);
+ }
+ if (sabundfile != "") {
+ newName = getNewName(sabundfile, "sabund");
+ renameOrCopy(sabundfile, newName);
+ m->setSabundFile(newName);
+ }
+ if (namefile != "") {
+ newName = getNewName(namefile, "name");
+ renameOrCopy(namefile, newName);
+ m->setNameFile(newName);
+ }
+ if (groupfile != "") {
+ newName = getNewName(groupfile, "group");
+ renameOrCopy(groupfile, newName);
+ m->setGroupFile(newName);
+ }
+ if (treefile != "") {
+ newName = getNewName(treefile, "tree");
+ renameOrCopy(treefile, newName);
+ m->setTreeFile(newName);
+ }
+ if (sharedfile != "") {
+ newName = getNewName(sharedfile, "shared");
+ renameOrCopy(sharedfile, newName);
+ m->setSharedFile(newName);
+ }
+ if (relabundfile != "") {
+ newName = getNewName(relabundfile, "relabund");
+ renameOrCopy(relabundfile, newName);
+ m->setRelAbundFile(newName);
+ }
+ if (designfile != "") {
+ newName = getNewName(designfile, "design");
+ renameOrCopy(designfile, newName);
+ m->setDesignFile(newName);
+ }
+ if (sfffile != "") {
+ newName = getNewName(sfffile, "sff");
+ renameOrCopy(sfffile, newName);
+ m->setSFFFile(newName);
+ }
+ if (oligosfile != "") {
+ newName = getNewName(oligosfile, "oligos");
+ renameOrCopy(oligosfile, newName);
+ m->setOligosFile(newName);
+ }
+ if (accnosfile != "") {
+ newName = getNewName(accnosfile, "accnos");
+ renameOrCopy(accnosfile, newName);
+ m->setAccnosFile(newName);
+ }
+ if (taxonomyfile != "") {
+ newName = getNewName(taxonomyfile, "taxonomy");
+ renameOrCopy(taxonomyfile, newName);
+ m->setTaxonomyFile(newName);
+ }
+ if (constaxonomyfile != "") {
+ newName = getNewName(constaxonomyfile, "constaxonomy");
+ renameOrCopy(constaxonomyfile, newName);
+ m->setTaxonomyFile(newName);
+ }
+ if (flowfile != "") {
+ newName = getNewName(flowfile, "flow");
+ renameOrCopy(flowfile, newName);
+ m->setFlowFile(newName);
+ }
+ if (biomfile != "") {
+ newName = getNewName(biomfile, "biom");
+ renameOrCopy(biomfile, newName);
+ m->setBiomFile(newName);
+ }
+ if (countfile != "") {
+ newName = getNewName(countfile, "count");
+ renameOrCopy(countfile, newName);
+ m->setCountTableFile(newName);
+ }
+ if (summaryfile != "") {
+ newName = getNewName(summaryfile, "summary");
+ renameOrCopy(summaryfile, newName);
+ m->setSummaryFile(newName);
+ }
+ if (filefile != "") {
+ newName = getNewName(filefile, "file");
+ renameOrCopy(filefile, newName);
+ m->setFileFile(newName);
+ }
+ if (inputfile != "") {
+ newName = getNewName(inputfile, "input");
+ renameOrCopy(inputfile, newName);
+ }
+
+ m->mothurOutEndLine(); m->mothurOut("Current files saved by mothur:"); m->mothurOutEndLine();
+ if (m->hasCurrentFiles()) { m->printCurrentFiles(""); }
+
+ return 0;
+ }
+
+ catch(exception& e) {
+ m->errorOut(e, "RenameFileCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+string RenameFileCommand::getNewName(string name, string type){
+ try {
+ string newName = outputfile;
+
+ if (mothurGenerated) {
+ string extension = m->getExtension(name);
+ string basicName = "final";
+ string tag = "";
+
+ if (prefix == "") {
+ int pos = name.find_first_of(".");
+ if (pos != string::npos) { basicName = name.substr(0, pos); }
+ }else { basicName = prefix; }
+
+ if ((type == "shared") || (type == "list") || (type == "relabund") || (type == "rabund") || (type == "sabund")) {
+ vector<string> tags; tags.push_back(".an."); tags.push_back(".tx."); tags.push_back(".agc."); tags.push_back(".dgc."); tags.push_back(".nn."); tags.push_back(".fn."); tags.push_back(".wn.");
+ vector<string> newTags; newTags.push_back("an"); newTags.push_back("tx"); newTags.push_back("agc"); newTags.push_back("dgc"); newTags.push_back("nn"); newTags.push_back("fn"); newTags.push_back("wn");
+
+ for (int i = 0; i < tags.size(); i++) {
+ int pos2 = name.find_first_of(tags[i]);
+ if (pos2 != string::npos) { tag = newTags[i]; break; }
+ }
+ }else if (type == "constaxonomy") {
+ extension = ".cons.taxonomy";
+ }
+
+ newName = basicName;
+ if (tag != "") { newName += "." + tag; }
+ newName += extension;
+ }
+
+ return newName;
+ }
+
+ catch(exception& e) {
+ m->errorOut(e, "RenameFileCommand", "getNewFileName");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+string RenameFileCommand::renameOrCopy(string oldName, string newName){
+ try {
+ if (deleteOld) { m->renameFile(oldName, newName); }
+ else {
+ string command = "copy ";
+
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ command = "cp ";
+ #endif
+
+ string inputString = command + oldName + " " + newName;
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+ m->mothurOut("Running command: system(" + inputString + ")"); m->mothurOutEndLine();
+ m->mothurCalling = true;
+
+ Command* systemCommand = new SystemCommand(inputString);
+ systemCommand->execute();
+ delete systemCommand;
+ m->mothurCalling = false;
+ m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+ }
+
+ return newName;
+ }
+
+ catch(exception& e) {
+ m->errorOut(e, "RenameFileCommand", "renameOrCopy");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+
+
diff --git a/source/commands/renamefilecommand.h b/source/commands/renamefilecommand.h
new file mode 100644
index 0000000..96b095d
--- /dev/null
+++ b/source/commands/renamefilecommand.h
@@ -0,0 +1,50 @@
+//
+// renamefilecommand.h
+// Mothur
+//
+// Created by Sarah Westcott on 4/18/16.
+// Copyright (c) 2016 Schloss Lab. All rights reserved.
+//
+
+#ifndef __Mothur__renamefilecommand__
+#define __Mothur__renamefilecommand__
+
+#include "command.hpp"
+
+class RenameFileCommand : public Command {
+
+#ifdef UNIT_TEST
+ friend class TestRenameFileCommand;
+#endif
+
+public:
+
+ RenameFileCommand(string);
+ RenameFileCommand();
+ ~RenameFileCommand(){}
+
+ vector<string> setParameters();
+ string getCommandName() { return "rename.file"; }
+ string getCommandCategory() { return "General"; }
+
+ string getHelpString();
+ string getOutputPattern(string);
+ string getCitation() { return "http://www.mothur.org/wiki/rename.file"; }
+ string getDescription() { return "renames file and updates current"; }
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+
+private:
+ string accnosfile, phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, designfile, taxonomyfile, biomfile, countfile, summaryfile, inputfile, outputDir;
+ string treefile, sharedfile, ordergroupfile, relabundfile, fastafile, qualfile, sfffile, oligosfile, flowfile, filefile, outputfile, constaxonomyfile, prefix;
+ bool mothurGenerated, abort, deleteOld;
+
+ vector<string> outputNames;
+
+ string getNewName(string name, string type);
+ string renameOrCopy(string oldName, string newName);
+};
+
+#endif /* defined(__Mothur__renamefilecommand__) */
diff --git a/source/commands/renameseqscommand.cpp b/source/commands/renameseqscommand.cpp
index 3087867..baff6f2 100644
--- a/source/commands/renameseqscommand.cpp
+++ b/source/commands/renameseqscommand.cpp
@@ -771,6 +771,7 @@ vector<map<string, string> > RenameSeqsCommand::readFiles(){
}else if (pieces.size() == 3) {
thisFileName = pieces[0]+"-"+pieces[1];
group = pieces[2];
+ m->checkGroupName(group);
}else {
m->mothurOut("[ERROR]: Your file contains " + toString(pieces.size()) + " columns. TThe file option allows you to provide a 2 or 3 column file. The first column contains the file type: fasta or qfile. The second column is the filename, and the optional third column can be a group name. If there is a third column, all sequences in the file will be assigned to that group. This can be helpful when renaming data separated into samples.\n"); m->control_pressed = true;
}
diff --git a/source/commands/sensspeccommand.cpp b/source/commands/sensspeccommand.cpp
index aad48d4..d00f076 100644
--- a/source/commands/sensspeccommand.cpp
+++ b/source/commands/sensspeccommand.cpp
@@ -15,6 +15,7 @@ vector<string> SensSpecCommand::setParameters(){
CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none","sensspec",false,true,true); parameters.push_back(plist);
CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","",false,false); parameters.push_back(pphylip);
CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","",false,false); parameters.push_back(pcolumn);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pcount);
CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname);
CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
CommandParameter pcutoff("cutoff", "Number", "", "-1.00", "", "", "","",false,false); parameters.push_back(pcutoff);
@@ -37,7 +38,7 @@ vector<string> SensSpecCommand::setParameters(){
string SensSpecCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The sens.spec command....\n";
+ helpString += "The sens.spec command determines the quality of the clusters.\n";
return helpString;
}
catch(exception& e) {
@@ -141,6 +142,14 @@ SensSpecCommand::SensSpecCommand(string option) {
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["name"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
//check for required parameters
@@ -167,6 +176,11 @@ SensSpecCommand::SensSpecCommand(string option) {
if (namefile == "not found") { namefile = ""; }
else if (namefile == "not open") { namefile = ""; abort = true; }
else { m->setNameFile(namefile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not found") { countfile = ""; }
+ else if (countfile == "not open") { countfile = ""; abort = true; }
+ else { m->setCountTableFile(countfile); }
if ((phylipfile == "") && (columnfile == "")) { //is there are current file available for either of these?
@@ -183,8 +197,19 @@ SensSpecCommand::SensSpecCommand(string option) {
}
}else if ((phylipfile != "") && (columnfile != "")) { m->mothurOut("When executing a sens.spec command you must enter ONLY ONE of the following: phylip or column."); m->mothurOutEndLine(); abort = true; }
- if ((namefile == "") && (columnfile != "")) {
- m->mothurOut("[ERROR]: you must provide a name file with a column file."); m->mothurOutEndLine(); abort = true;
+ if (columnfile != "") {
+ if ((namefile == "") && (countfile == "")){
+ namefile = m->getNameFile();
+ if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
+ else {
+ countfile = m->getCountTableFile();
+ if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You need to provide a namefile or countfile if you are going to use the column format."); m->mothurOutEndLine();
+ abort = true;
+ }
+ }
+ }
}
if ((namefile == "") && (phylipfile != "")) {
@@ -609,17 +634,24 @@ string SensSpecCommand::preProcessList(){
}
phylipFile.close();
}else {
- ifstream nameFileHandle;
- m->openInputFile(namefile, nameFileHandle);
- string uniqueSeqName, redundantSeqNames;
-
- while(nameFileHandle){
- if (m->control_pressed) { return ""; }
- nameFileHandle >> uniqueSeqName >> redundantSeqNames;
- uniqueNames.insert(uniqueSeqName);
- m->gobble(nameFileHandle);
+ if (namefile != "") {
+ ifstream nameFileHandle;
+ m->openInputFile(namefile, nameFileHandle);
+ string uniqueSeqName, redundantSeqNames;
+
+ while(nameFileHandle){
+ if (m->control_pressed) { return ""; }
+ nameFileHandle >> uniqueSeqName >> redundantSeqNames;
+ uniqueNames.insert(uniqueSeqName);
+ m->gobble(nameFileHandle);
+ }
+ nameFileHandle.close();
+ }else if (countfile != "") {
+ CountTable ct;
+ ct.readTable(countfile, false, true);
+ vector<string> countNames = ct.getNamesOfSeqs();
+ for (int i = 0; i < countNames.size(); i++) { uniqueNames.insert(countNames[i]); }
}
- nameFileHandle.close();
}
//read list file, if numSeqs > unique names then remove redundant names
@@ -637,7 +669,7 @@ string SensSpecCommand::preProcessList(){
//read in list vector
ListVector list(in);
-
+
//listfile is already unique
if (list.getNumSeqs() == uniqueNames.size()) { in.close(); out.close(); m->mothurRemove(newListFile); return ""; }
diff --git a/source/commands/sensspeccommand.h b/source/commands/sensspeccommand.h
index 6dcf74e..b6460a7 100644
--- a/source/commands/sensspeccommand.h
+++ b/source/commands/sensspeccommand.h
@@ -41,7 +41,7 @@ private:
void setUpOutput();
void outputStatistics(string, string);
- string listFile, distFile, sensSpecFileName, phylipfile, columnfile, namefile;
+ string listFile, distFile, sensSpecFileName, phylipfile, columnfile, namefile, countfile;
string outputDir;
string format;
vector<string> outputNames;
diff --git a/source/commands/seqerrorcommand.cpp b/source/commands/seqerrorcommand.cpp
index d773064..f061fd3 100644
--- a/source/commands/seqerrorcommand.cpp
+++ b/source/commands/seqerrorcommand.cpp
@@ -362,6 +362,7 @@ int SeqErrorCommand::execute(){
ofstream errorCountFile;
m->openOutputFile(errorCountFileName, errorCountFile);
outputNames.push_back(errorCountFileName); outputTypes["errorcount"].push_back(errorCountFileName);
+ m->mothurOut("\nMultiply error rate by 100 to obtain the percent sequencing errors.\n");
m->mothurOut("Overall error rate:\t" + toString((double)(totalBases - totalMatches) / (double)totalBases) + "\n");
m->mothurOut("Errors\tSequences\n");
errorCountFile << "Errors\tSequences\n";
diff --git a/source/commands/splitgroupscommand.cpp b/source/commands/splitgroupscommand.cpp
index a1ab81d..50b80a4 100644
--- a/source/commands/splitgroupscommand.cpp
+++ b/source/commands/splitgroupscommand.cpp
@@ -294,7 +294,7 @@ int SplitGroupCommand::runNameGroup(){
variables["[filename]"] = namefileRoot;
string newName = getOutputFileName("name",variables);
- parser->getSeqs(Groups[i], newFasta, false);
+ parser->getSeqs(Groups[i], newFasta, "/ab=", "/", false);
outputNames.push_back(newFasta); outputTypes["fasta"].push_back(newFasta);
if (m->control_pressed) { delete parser; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
diff --git a/source/commands/sracommand.cpp b/source/commands/sracommand.cpp
index 1cff894..14f8475 100644
--- a/source/commands/sracommand.cpp
+++ b/source/commands/sracommand.cpp
@@ -963,6 +963,7 @@ int SRACommand::readFile(map<string, vector<string> >& files){
thisFileName1 = pieces[1];
thisFileName2 = pieces[2];
group = pieces[0];
+ m->checkGroupName(group);
if (setOligosParameter) { m->mothurOut("[ERROR]: You cannot have an oligosfile and 3 column file option at the same time. Aborting. \n"); m->control_pressed = true; }
if ((thisFileName2 != "none") && (thisFileName2 != "NONE" )) { if (!using3NONE) { libLayout = "paired"; } else { m->mothurOut("[ERROR]: You cannot have a 3 column file with paired and unpaired files at the same time. Aborting. \n"); m->control_pressed = true; } }
else { thisFileName2 = ""; libLayout = "single"; using3NONE = true; }
diff --git a/source/commands/summarytaxcommand.cpp b/source/commands/summarytaxcommand.cpp
index 26cc60b..5c4b00a 100644
--- a/source/commands/summarytaxcommand.cpp
+++ b/source/commands/summarytaxcommand.cpp
@@ -17,7 +17,6 @@ vector<string> SummaryTaxCommand::setParameters(){
CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pname);
CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
- CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(preftaxonomy);
CommandParameter prelabund("relabund", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prelabund);
CommandParameter poutput("output", "Multiple", "simple-detail", "detail", "", "", "","",false,false, true); parameters.push_back(poutput);
CommandParameter pthreshold("threshold", "Number", "", "0", "", "", "","",false,true); parameters.push_back(pthreshold);
@@ -44,7 +43,6 @@ string SummaryTaxCommand::getHelpString(){
helpString += "The name parameter allows you to enter a name file associated with your taxonomy file. \n";
helpString += "The group parameter allows you add a group file so you can have the summary totals broken up by group.\n";
helpString += "The count parameter allows you add a count file so you can have the summary totals broken up by group.\n";
- helpString += "The reftaxonomy parameter allows you give the name of the reference taxonomy file used when you classified your sequences. It is not required, but providing it will keep the rankIDs in the summary file static.\n";
helpString += "The threshold parameter allows you to specify a cutoff for the taxonomy file that is being inputted. Once the classification falls below the threshold the mothur will refer to it as unclassified when calculating the concensus. This feature is similar to adjusting the cutoff in classify.seqs. Default=0.\n";
helpString += "The output parameter allows you to specify format of your summary file. Options are simple and detail. The default is detail.\n";
helpString += "The printlevel parameter allows you to specify taxlevel of your summary file to print to. Options are 1 to the maz level in the file. The default is -1, meaning max level. If you select a level greater than the level your sequences classify to, mothur will print to the level your max level. \n";
@@ -140,14 +138,6 @@ SummaryTaxCommand::SummaryTaxCommand(string option) {
if (path == "") { parameters["group"] = inputDir + it->second; }
}
- it = parameters.find("reftaxonomy");
- //user has given a template file
- if(it != parameters.end()){
- path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- if (path == "") { parameters["reftaxonomy"] = inputDir + it->second; }
- }
-
it = parameters.find("count");
//user has given a template file
if(it != parameters.end()){
@@ -194,11 +184,7 @@ SummaryTaxCommand::SummaryTaxCommand(string option) {
m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
}
- refTaxonomy = validParameter.validFile(parameters, "reftaxonomy", true);
- if (refTaxonomy == "not found") { refTaxonomy = ""; m->mothurOut("reftaxonomy is not required, but if given will keep the rankIDs in the summary file static."); m->mothurOutEndLine(); }
- else if (refTaxonomy == "not open") { refTaxonomy = ""; abort = true; }
-
- //if the user changes the output directory command factory will send this info to us in the output parameter
+ //if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
outputDir = "";
outputDir += m->hasPath(taxfile); //if user entered a file with a path then preserve it
@@ -249,13 +235,8 @@ int SummaryTaxCommand::execute(){
}
PhyloSummary* taxaSum;
- if (countfile != "") {
- if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, ct, relabund, printlevel); }
- else { taxaSum = new PhyloSummary(ct, relabund, printlevel); }
- }else {
- if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, groupMap, relabund, printlevel); }
- else { taxaSum = new PhyloSummary(groupMap, relabund, printlevel); }
- }
+ if (countfile != "") { taxaSum = new PhyloSummary(ct, relabund, printlevel);
+ }else { taxaSum = new PhyloSummary(groupMap, relabund, printlevel); }
if (m->control_pressed) { if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; } delete taxaSum; return 0; }
@@ -274,12 +255,14 @@ int SummaryTaxCommand::execute(){
if (threshold != 0) { taxon = processTaxMap(taxon); }
+ //cout << taxon << endl;
+
//add sequence to summary, countfile info included from Phylosummary constructor
taxaSum->addSeqToTree(name, taxon);
}
in.close();
}
- else if ((threshold != 0) && (namefile != "")) {
+ else if (namefile != "") {
map<string, vector<string> > nameMap;
map<string, vector<string> >::iterator itNames;
m->readNames(namefile, nameMap);
@@ -380,7 +363,8 @@ string SummaryTaxCommand::processTaxMap(string tax) {
confidence = "-1";
}
float con = 0;
- convert(confidence, con);
+
+ m->mothurConvert(confidence, con);
if (con == -1) { i += taxLength; } //not a confidence score, no confidence scores on this taxonomy
else if ( con < threshold) { spot = i; break; } //below threshold, set all to unclassified
diff --git a/source/commands/summarytaxcommand.h b/source/commands/summarytaxcommand.h
index b7b59a6..28b4f73 100644
--- a/source/commands/summarytaxcommand.h
+++ b/source/commands/summarytaxcommand.h
@@ -35,7 +35,7 @@ class SummaryTaxCommand : public Command {
private:
bool abort, relabund;
- string taxfile, outputDir, namefile, groupfile, refTaxonomy, countfile, output;
+ string taxfile, outputDir, namefile, groupfile, countfile, output;
int printlevel, threshold;
vector<string> outputNames;
map<string, int> nameMap;
diff --git a/source/datastructures/alignmentdb.cpp b/source/datastructures/alignmentdb.cpp
index 33de4d3..ea65c8e 100644
--- a/source/datastructures/alignmentdb.cpp
+++ b/source/datastructures/alignmentdb.cpp
@@ -11,7 +11,6 @@
#include "kmerdb.hpp"
#include "suffixdb.hpp"
#include "blastdb.hpp"
-#include "referencedb.h"
/**************************************************************************************************/
AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int tid){ // This assumes that the template database is in fasta format, may
@@ -20,68 +19,43 @@ AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gap
longest = 0;
method = s;
bool needToGenerate = true;
- ReferenceDB* rdb = ReferenceDB::getInstance();
bool silent = false;
threadID = tid;
- if (fastaFileName == "saved-silent") {
- fastaFileName = "saved"; silent = true;
- }
-
- if (fastaFileName == "saved") {
- int start = time(NULL);
-
- if (!silent) { m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory."); m->mothurOutEndLine(); }
-
- for (int i = 0; i < rdb->referenceSeqs.size(); i++) {
- templateSequences.push_back(rdb->referenceSeqs[i]);
- //save longest base
- if (rdb->referenceSeqs[i].getUnaligned().length() >= longest) { longest = (rdb->referenceSeqs[i].getUnaligned().length()+1); }
- }
- fastaFileName = rdb->getSavedReference();
-
- numSeqs = templateSequences.size();
- if (!silent) { m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences.");m->mothurOutEndLine(); }
+ int start = time(NULL);
+ m->mothurOutEndLine();
+ m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t"); cout.flush();
+ //bool aligned = false;
+ int tempLength = 0;
+
+ ifstream fastaFile;
+ m->openInputFile(fastaFileName, fastaFile);
+
+ while (!fastaFile.eof()) {
+ Sequence temp(fastaFile); m->gobble(fastaFile);
- }else {
- int start = time(NULL);
- m->mothurOutEndLine();
- m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t"); cout.flush();
- //bool aligned = false;
- int tempLength = 0;
+ if (m->control_pressed) { templateSequences.clear(); break; }
- ifstream fastaFile;
- m->openInputFile(fastaFileName, fastaFile);
-
- while (!fastaFile.eof()) {
- Sequence temp(fastaFile); m->gobble(fastaFile);
-
- if (m->control_pressed) { templateSequences.clear(); break; }
-
- if (temp.getName() != "") {
- templateSequences.push_back(temp);
-
- if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
-
- //save longest base
- if (temp.getUnaligned().length() >= longest) { longest = (temp.getUnaligned().length()+1); }
-
- if (tempLength != 0) {
- if (tempLength != temp.getAligned().length()) { m->mothurOut("[ERROR]: template is not aligned, aborting.\n"); m->control_pressed=true; }
- }else { tempLength = temp.getAligned().length(); }
- }
- }
- fastaFile.close();
-
- numSeqs = templateSequences.size();
- //all of this is elsewhere already!
-
- m->mothurOut("DONE.");
- m->mothurOutEndLine(); cout.flush();
- m->mothurOut("It took " + toString(time(NULL) - start) + " to read " + toString(templateSequences.size()) + " sequences."); m->mothurOutEndLine();
+ if (temp.getName() != "") {
+ templateSequences.push_back(temp);
+
+ //save longest base
+ if (temp.getUnaligned().length() >= longest) { longest = (temp.getUnaligned().length()+1); }
+
+ if (tempLength != 0) {
+ if (tempLength != temp.getAligned().length()) { m->mothurOut("[ERROR]: template is not aligned, aborting.\n"); m->control_pressed=true; }
+ }else { tempLength = temp.getAligned().length(); }
+ }
+ }
+ fastaFile.close();
+
+ numSeqs = templateSequences.size();
+ //all of this is elsewhere already!
+
+ m->mothurOut("DONE.");
+ m->mothurOutEndLine(); cout.flush();
+ m->mothurOut("It took " + toString(time(NULL) - start) + " to read " + toString(templateSequences.size()) + " sequences."); m->mothurOutEndLine();
- }
-
//in case you delete the seqs and then ask for them
emptySequence = Sequence();
@@ -93,13 +67,19 @@ AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gap
string kmerDBName;
if(method == "kmer") {
search = new KmerDB(fastaFileName, kmerSize);
-
-
+
kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
+
ifstream kmerFileTest(kmerDBName.c_str());
if(kmerFileTest){
bool GoodFile = m->checkReleaseVersion(kmerFileTest, m->getVersion());
+ int shortcutTimeStamp = m->getTimeStamp(kmerDBName);
+ int referenceTimeStamp = m->getTimeStamp(fastaFileName);
+
+ //if the shortcut file is older then the reference file, remake shortcut file
+ if (shortcutTimeStamp < referenceTimeStamp) { GoodFile = false; }
+
if (GoodFile) { needToGenerate = false; }
}
diff --git a/source/datastructures/designmap.cpp b/source/datastructures/designmap.cpp
index 507cc40..079287b 100644
--- a/source/datastructures/designmap.cpp
+++ b/source/datastructures/designmap.cpp
@@ -67,6 +67,7 @@ int DesignMap::read(string file) {
//file without headers, fix it
if (temp != "group") {
group = temp;
+ m->checkGroupName(group);
if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + "\n"); }
//if group info, then read it
@@ -74,6 +75,7 @@ int DesignMap::read(string file) {
for (int i = 0; i < numCategories; i++) {
int thisIndex = indexCategoryMap[originalGroupIndexes[i]]; //find index of this category because we sort the values.
string temp = tempColumnHeaders[i];
+ m->checkGroupName(temp);
categoryValues[thisIndex] = temp;
if (m->debug) { m->mothurOut("[DEBUG]: value = " + temp + "\n"); }
@@ -101,7 +103,8 @@ int DesignMap::read(string file) {
if (m->control_pressed) { break; }
- in >> group; m->gobble(in);
+ in >> group; m->gobble(in);
+ m->checkGroupName(group);
if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + "\n"); }
//if group info, then read it
@@ -110,6 +113,7 @@ int DesignMap::read(string file) {
int thisIndex = indexCategoryMap[originalGroupIndexes[i]]; //find index of this category because we sort the values.
string temp = "not found";
in >> temp; categoryValues[thisIndex] = temp; m->gobble(in);
+ m->checkGroupName(temp);
if (m->debug) { m->mothurOut("[DEBUG]: value = " + temp + "\n"); }
@@ -231,6 +235,7 @@ string DesignMap::get(string groupName, string categoryName) {
//add group, assumes order is correct
int DesignMap::push_back(string group, vector<string> values) {
try {
+ m->checkGroupName(group);
map<string, int>::iterator it = indexGroupNameMap.find(group);
if (it == indexGroupNameMap.end()) {
if (values.size() != getNumCategories()) { m->mothurOut("[ERROR]: Your design file has a " + toString(getNumCategories()) + " categories and " + group + " has " + toString(values.size()) + ", please correct."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
diff --git a/source/datastructures/groupmap.cpp b/source/datastructures/groupmap.cpp
index ef39321..9f049e1 100644
--- a/source/datastructures/groupmap.cpp
+++ b/source/datastructures/groupmap.cpp
@@ -25,6 +25,7 @@ int GroupMap::addSeq(string name, string group) {
try {
int error = 0;
+ m->checkGroupName(group);
setNamesOfGroups(group);
if (m->debug) { m->mothurOut("[DEBUG]: name = '" + name + "', group = '" + group + "'\n"); }
@@ -65,7 +66,8 @@ int GroupMap::readMap() {
if (columnOne) { seqName = pieces[i]; columnOne=false; }
else { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
- if (pairDone) {
+ if (pairDone) {
+ m->checkGroupName(seqGroup);
setNamesOfGroups(seqGroup);
if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
@@ -90,7 +92,8 @@ int GroupMap::readMap() {
if (columnOne) { seqName = pieces[i]; columnOne=false; }
else { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
- if (pairDone) {
+ if (pairDone) {
+ m->checkGroupName(seqGroup);
setNamesOfGroups(seqGroup);
if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
@@ -135,7 +138,8 @@ int GroupMap::readDesignMap() {
if (columnOne) { seqName = pieces[i]; columnOne=false; }
else { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
- if (pairDone) {
+ if (pairDone) {
+ m->checkGroupName(seqGroup);
setNamesOfGroups(seqGroup);
if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
@@ -160,7 +164,8 @@ int GroupMap::readDesignMap() {
if (columnOne) { seqName = pieces[i]; columnOne=false; }
else { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
- if (pairDone) {
+ if (pairDone) {
+ m->checkGroupName(seqGroup);
setNamesOfGroups(seqGroup);
if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
@@ -209,7 +214,8 @@ int GroupMap::readMap(string filename) {
if (columnOne) { seqName = pieces[i]; columnOne=false; }
else { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
- if (pairDone) {
+ if (pairDone) {
+ m->checkGroupName(seqGroup);
setNamesOfGroups(seqGroup);
if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
@@ -234,7 +240,8 @@ int GroupMap::readMap(string filename) {
if (columnOne) { seqName = pieces[i]; columnOne=false; }
else { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
- if (pairDone) {
+ if (pairDone) {
+ m->checkGroupName(seqGroup);
setNamesOfGroups(seqGroup);
if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
@@ -282,7 +289,8 @@ int GroupMap::readDesignMap(string filename) {
if (columnOne) { seqName = pieces[i]; columnOne=false; }
else { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
- if (pairDone) {
+ if (pairDone) {
+ m->checkGroupName(seqGroup);
setNamesOfGroups(seqGroup);
if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
@@ -307,7 +315,8 @@ int GroupMap::readDesignMap(string filename) {
if (columnOne) { seqName = pieces[i]; columnOne=false; }
else { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
- if (pairDone) {
+ if (pairDone) {
+ m->checkGroupName(seqGroup);
setNamesOfGroups(seqGroup);
if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
@@ -353,6 +362,7 @@ string GroupMap::getGroup(string sequenceName) {
/************************************************************/
void GroupMap::setGroup(string sequenceName, string groupN) {
+ m->checkGroupName(groupN);
setNamesOfGroups(groupN);
m->checkName(sequenceName);
it = groupmap.find(sequenceName);
diff --git a/source/datastructures/oligos.cpp b/source/datastructures/oligos.cpp
index 245613e..ef57452 100644
--- a/source/datastructures/oligos.cpp
+++ b/source/datastructures/oligos.cpp
@@ -91,6 +91,7 @@ int Oligos::readOligos(){
m->openInputFile(oligosfile, inOligos);
string type, oligo, roligo, group;
+ bool pfUsesNone = false; bool prUsesNone = false; bool bfUsesNone = false; bool brUsesNone = false;
while(!inOligos.eof()){
@@ -146,7 +147,13 @@ int Oligos::readOligos(){
roligo[i] = toupper(roligo[i]);
if(roligo[i] == 'U') { roligo[i] = 'T'; }
}
- if (reversePairs) { roligo = reverseOligo(roligo); }
+
+ if (oligo == "NONE") { pfUsesNone = true; }
+ else if (roligo == "NONE") { prUsesNone = true; }
+
+ if (roligo != "NONE") {
+ if (reversePairs) { roligo = reverseOligo(roligo); }
+ }
group = "";
// get rest of line in case there is a primer name
@@ -202,7 +209,12 @@ int Oligos::readOligos(){
if(reverseBarcode[i] == 'U') { reverseBarcode[i] = 'T'; }
}
- if (reversePairs) { reverseBarcode = reverseOligo(reverseBarcode); }
+ if (oligo == "NONE") { bfUsesNone = true; }
+ else if (reverseBarcode == "NONE") { brUsesNone = true; }
+
+ if (reverseBarcode != "NONE") {
+ if (reversePairs) { reverseBarcode = reverseOligo(reverseBarcode); }
+ }
oligosPair newPair(oligo, reverseBarcode);
if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
@@ -240,6 +252,60 @@ int Oligos::readOligos(){
if (hasPBarcodes || hasPPrimers) {
pairedOligos = true;
if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine(); return 0; }
+
+ //check for "NONE" to make sure if none is used then all primers in that position are NONE
+ //ex. Can't have: PRIMER NONE reversePrimer and PRIMER fowardPrimer reversePrimer in same file
+ if (bfUsesNone) {
+ bool allNONE = true;
+ for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
+ if ((itBar->second).forward != "NONE") {
+ allNONE = false;
+ break;
+ }
+ }
+ if (!allNONE) {
+ m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix forwardBarcode=NONE and forwardBarcode=barcodeString in same file. Mothur assumes all sequences have forward barcodes or all do not, quitting."); m->mothurOutEndLine(); return 0;
+ }
+ }
+
+ if (brUsesNone) {
+ bool allNONE = true;
+ for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
+ if ((itBar->second).reverse != "NONE") {
+ allNONE = false;
+ break;
+ }
+ }
+ if (!allNONE) {
+ m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix reverseBarcode=NONE and reverseBarcode=barcodeString in same file. Mothur assumes all sequences have reverse barcodes or all do not, quitting."); m->mothurOutEndLine(); return 0;
+ }
+ }
+
+ if (pfUsesNone) {
+ bool allNONE = true;
+ for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
+ if ((itPrimer->second).forward != "NONE") {
+ allNONE = false;
+ break;
+ }
+ }
+ if (!allNONE) {
+ m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix forwardPrimer=NONE and forwardPrimer=primerString in same file. Mothur assumes all sequences have forward primers or all do not, quitting."); m->mothurOutEndLine(); return 0;
+ }
+ }
+
+ if (prUsesNone) {
+ bool allNONE = true;
+ for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
+ if ((itPrimer->second).reverse != "NONE") {
+ allNONE = false;
+ break;
+ }
+ }
+ if (!allNONE) {
+ m->control_pressed = true; m->mothurOut("[ERROR]: cannot mix reversePrimer=NONE and reversePrimer=primerString in same file. Mothur assumes all sequences have reverse primers or all do not, quitting."); m->mothurOutEndLine(); return 0;
+ }
+ }
}
diff --git a/source/datastructures/referencedb.cpp b/source/datastructures/referencedb.cpp
deleted file mode 100644
index 249d2db..0000000
--- a/source/datastructures/referencedb.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * referencedb.cpp
- * Mothur
- *
- * Created by westcott on 6/29/11.
- * Copyright 2011 Schloss Lab. All rights reserved.
- *
- */
-
-#include "referencedb.h"
-
-//needed for testing project
-//ReferenceDB* ReferenceDB::myInstance;
-
-/******************************************************/
-ReferenceDB* ReferenceDB::getInstance() {
- if(myInstance == NULL) {
- myInstance = new ReferenceDB();
- }
- return myInstance;
- }
-/******************************************************/
-void ReferenceDB::clearMemory() {
- referenceSeqs.clear();
- setSavedReference("");
- for(int i = 0; i < wordGenusProb.size(); i++) { wordGenusProb[i].clear(); }
- wordGenusProb.clear();
- WordPairDiffArr.clear();
- setSavedTaxonomy("");
-}
-/*******************************************************
-ReferenceDB::~ReferenceDB() { myInstance = NULL; }
-*******************************************************/
-
diff --git a/source/datastructures/referencedb.h b/source/datastructures/referencedb.h
deleted file mode 100644
index 5262e80..0000000
--- a/source/datastructures/referencedb.h
+++ /dev/null
@@ -1,49 +0,0 @@
-#ifndef MYREFERENCEDB_H
-#define MYREFERENCEDB_H
-
-/*
- * referencedb.h
- * Mothur
- *
- * Created by westcott on 6/29/11.
- * Copyright 2011 Schloss Lab. All rights reserved.
- *
- */
-
-
-#include "mothur.h"
-#include "sequence.hpp"
-
-/***********************************************/
-
-class ReferenceDB {
-
- public:
-
- static ReferenceDB* getInstance();
- void clearMemory();
-
- bool save;
- vector<Sequence> referenceSeqs;
- vector< vector<float> > wordGenusProb;
- vector<diffPair> WordPairDiffArr;
-
- string getSavedReference() { return referencefile; }
- void setSavedReference(string p) { referencefile = p; }
- string getSavedTaxonomy() { return taxonomyfile; }
- void setSavedTaxonomy(string p) { taxonomyfile = p; }
-
- private:
-
- static ReferenceDB* myInstance;
- ReferenceDB() { referencefile = ""; taxonomyfile = ""; save = false; }
- ReferenceDB(const ReferenceDB&){}// Disable copy constructor
- void operator=(const ReferenceDB&){} // Disable assignment operator
- ~ReferenceDB(){ myInstance = 0; }
-
- string referencefile, taxonomyfile;
-};
-/***********************************************/
-
-#endif
-
diff --git a/source/datastructures/sequence.cpp b/source/datastructures/sequence.cpp
index 0f53c3b..fef6f1e 100644
--- a/source/datastructures/sequence.cpp
+++ b/source/datastructures/sequence.cpp
@@ -733,7 +733,13 @@ void Sequence::printSequence(ostream& out){
out << unaligned << endl;
}
}
+//********************************************************************************************************************
+void Sequence::printUnAlignedSequence(ostream& out){
+
+ out << ">" << name << comment << endl;
+ out << unaligned << endl;
+}
//********************************************************************************************************************
int Sequence::getAlignLength(){
diff --git a/source/datastructures/sequence.hpp b/source/datastructures/sequence.hpp
index 84da20e..6248b83 100644
--- a/source/datastructures/sequence.hpp
+++ b/source/datastructures/sequence.hpp
@@ -74,6 +74,7 @@ public:
int getLongHomoPolymer();
bool getIsAligned();
void printSequence(ostream&);
+ void printUnAlignedSequence(ostream&);
private:
MothurOut* m;
diff --git a/source/datastructures/sequencecountparser.cpp b/source/datastructures/sequencecountparser.cpp
index 5349da0..e49ba98 100644
--- a/source/datastructures/sequencecountparser.cpp
+++ b/source/datastructures/sequencecountparser.cpp
@@ -159,7 +159,7 @@ vector<Sequence> SequenceCountParser::getSeqs(string g){
}
}
/************************************************************/
-int SequenceCountParser::getSeqs(string g, string filename, bool uchimeFormat=false){
+int SequenceCountParser::getSeqs(string g, string filename, string tag, string tag2, bool uchimeFormat=false){
try {
map<string, vector<Sequence> >::iterator it;
vector<Sequence> seqForThisGroup;
@@ -191,7 +191,7 @@ int SequenceCountParser::getSeqs(string g, string filename, bool uchimeFormat=fa
error = 1;
m->mothurOut("[ERROR]: " + seqForThisGroup[i].getName() + " is in your fastafile, but is not in your count file, please correct."); m->mothurOutEndLine();
}else {
- seqPriorityNode temp(itCount->second, seqForThisGroup[i].getAligned(), seqForThisGroup[i].getName());
+ seqPriorityNode temp(itCount->second, seqForThisGroup[i].getUnaligned(), seqForThisGroup[i].getName());
nameVector.push_back(temp);
}
}
@@ -206,7 +206,7 @@ int SequenceCountParser::getSeqs(string g, string filename, bool uchimeFormat=fa
if(m->control_pressed) { out.close(); m->mothurRemove(filename); return 1; }
- out << ">" << nameVector[i].name << "/ab=" << nameVector[i].numIdentical << "/" << endl << nameVector[i].seq << endl; //
+ out << ">" << nameVector[i].name << tag << nameVector[i].numIdentical << tag2 << endl << nameVector[i].seq << endl; //
}
}else {
diff --git a/source/datastructures/sequencecountparser.h b/source/datastructures/sequencecountparser.h
index 4889ea6..cce8e32 100644
--- a/source/datastructures/sequencecountparser.h
+++ b/source/datastructures/sequencecountparser.h
@@ -38,7 +38,7 @@ public:
vector<Sequence> getSeqs(string); //returns unique sequences in a specific group
map<string, int> getCountTable(string); //returns seqName -> numberOfRedundantSeqs for a specific group - the count file format, but each line is parsed by group.
- int getSeqs(string, string, bool); //prints unique sequences in a specific group to a file - group, filename, uchimeFormat=false
+ int getSeqs(string, string, string, string, bool); //prints unique sequences in a specific group to a file - group, filename, uchimeFormat=false, tag (/ab= or ;size=), tag2(/ or ;)
int getCountTable(string, string); //print seqName -> numberRedundantSeqs for a specific group - group, filename
map<string, string> getAllSeqsMap(){ return allSeqsMap; } //returns map where the key=sequenceName and the value=representativeSequence - helps us remove duplicates after group by group processing
diff --git a/source/datastructures/sequenceparser.cpp b/source/datastructures/sequenceparser.cpp
index 37891eb..0c8ce7d 100644
--- a/source/datastructures/sequenceparser.cpp
+++ b/source/datastructures/sequenceparser.cpp
@@ -351,7 +351,7 @@ vector<Sequence> SequenceParser::getSeqs(string g){
}
}
/************************************************************/
-int SequenceParser::getSeqs(string g, string filename, bool uchimeFormat=false){
+int SequenceParser::getSeqs(string g, string filename, string tag, string tag2, bool uchimeFormat=false){
try {
map<string, vector<Sequence> >::iterator it;
vector<Sequence> seqForThisGroup;
@@ -385,7 +385,7 @@ int SequenceParser::getSeqs(string g, string filename, bool uchimeFormat=false){
}else {
int num = m->getNumNames(itNameMap->second);
- seqPriorityNode temp(num, seqForThisGroup[i].getAligned(), seqForThisGroup[i].getName());
+ seqPriorityNode temp(num, seqForThisGroup[i].getUnaligned(), seqForThisGroup[i].getName());
nameVector.push_back(temp);
}
}
@@ -400,7 +400,7 @@ int SequenceParser::getSeqs(string g, string filename, bool uchimeFormat=false){
if(m->control_pressed) { out.close(); m->mothurRemove(filename); return 1; }
- out << ">" << nameVector[i].name << "/ab=" << nameVector[i].numIdentical << "/" << endl << nameVector[i].seq << endl; //
+ out << ">" << nameVector[i].name << tag << nameVector[i].numIdentical << tag2 << endl << nameVector[i].seq << endl; //
}
}else {
diff --git a/source/datastructures/sequenceparser.h b/source/datastructures/sequenceparser.h
index 98438f6..5e79b8f 100644
--- a/source/datastructures/sequenceparser.h
+++ b/source/datastructures/sequenceparser.h
@@ -41,7 +41,7 @@ class SequenceParser {
vector<Sequence> getSeqs(string); //returns unique sequences in a specific group
map<string, string> getNameMap(string); //returns seqName -> namesOfRedundantSeqs separated by commas for a specific group - the name file format, but each line is parsed by group.
- int getSeqs(string, string, bool); //prints unique sequences in a specific group to a file - group, filename, uchimeFormat=false
+ int getSeqs(string, string, string, string, bool); //prints unique sequences in a specific group to a file - group, filename, uchimeFormat=false, tag(/ab= or ;size=), tag2(/ or ;)
int getNameMap(string, string); //print seqName -> namesOfRedundantSeqs separated by commas for a specific group - group, filename
map<string, string> getAllSeqsMap(){ return allSeqsMap; } //returns map where the key=sequenceName and the value=representativeSequence - helps us remove duplicates after group by group processing
diff --git a/source/hcluster.cpp b/source/hcluster.cpp
deleted file mode 100644
index f8f4809..0000000
--- a/source/hcluster.cpp
+++ /dev/null
@@ -1,807 +0,0 @@
-/*
- * hcluster.cpp
- * Mothur
- *
- * Created by westcott on 10/13/09.
- * Copyright 2009 Schloss Lab. All rights reserved.
- *
- */
-
-#include "hcluster.h"
-#include "rabundvector.hpp"
-#include "listvector.hpp"
-
-/***********************************************************************/
-HCluster::HCluster(RAbundVector* rav, ListVector* lv, string ms, string d, NameAssignment* n, float c) : rabund(rav), list(lv), method(ms), distfile(d), nameMap(n), cutoff(c) {
- try {
- m = MothurOut::getInstance();
- mapWanted = false;
- exitedBreak = false;
- numSeqs = list->getNumSeqs();
-
- //initialize cluster array
- for (int i = 0; i < numSeqs; i++) {
- clusterNode temp(1, -1, i);
- clusterArray.push_back(temp);
- }
-
- if ((method == "furthest") || (method == "nearest")) {
- m->openInputFile(distfile, filehandle);
- }else{
- processFile();
- }
- }
- catch(exception& e) {
- m->errorOut(e, "HCluster", "HCluster");
- exit(1);
- }
-}
-/***********************************************************************/
-
-void HCluster::clusterBins(){
- try {
- //cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << rabund->get(clusterArray[smallRow].smallChild) << '\t' << rabund->get(clusterArray[smallCol].smallChild);
-
- rabund->set(clusterArray[smallCol].smallChild, rabund->get(clusterArray[smallRow].smallChild)+rabund->get(clusterArray[smallCol].smallChild));
- rabund->set(clusterArray[smallRow].smallChild, 0);
- rabund->setLabel(toString(smallDist));
-
- //cout << '\t' << rabund->get(clusterArray[smallRow].smallChild) << '\t' << rabund->get(clusterArray[smallCol].smallChild) << endl;
- }
- catch(exception& e) {
- m->errorOut(e, "HCluster", "clusterBins");
- exit(1);
- }
-
-
-}
-
-/***********************************************************************/
-
-void HCluster::clusterNames(){
- try {
- ///cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << list->get(clusterArray[smallRow].smallChild) << '\t' << list->get(clusterArray[smallCol].smallChild);
- if (mapWanted) { updateMap(); }
-
- list->set(clusterArray[smallCol].smallChild, list->get(clusterArray[smallRow].smallChild)+','+list->get(clusterArray[smallCol].smallChild));
- list->set(clusterArray[smallRow].smallChild, "");
- list->setLabel(toString(smallDist));
-
- //cout << '\t' << list->get(clusterArray[smallRow].smallChild) << '\t' << list->get(clusterArray[smallCol].smallChild) << endl;
-
- }
- catch(exception& e) {
- m->errorOut(e, "HCluster", "clusterNames");
- exit(1);
- }
-
-}
-/***********************************************************************/
-int HCluster::getUpmostParent(int node){
- try {
- while (clusterArray[node].parent != -1) {
- node = clusterArray[node].parent;
- }
-
- return node;
- }
- catch(exception& e) {
- m->errorOut(e, "HCluster", "getUpmostParent");
- exit(1);
- }
-}
-/***********************************************************************/
-void HCluster::printInfo(){
- try {
-
- cout << "link table" << endl;
- for (itActive = activeLinks.begin(); itActive!= activeLinks.end(); itActive++) {
- cout << itActive->first << " = " << itActive->second << endl;
- }
- cout << endl;
- for (int i = 0; i < linkTable.size(); i++) {
- cout << i << '\t';
- for (it = linkTable[i].begin(); it != linkTable[i].end(); it++) {
- cout << it->first << '-' << it->second << '\t' ;
- }
- cout << endl;
- }
- cout << endl << "clusterArray" << endl;
-
- for (int i = 0; i < clusterArray.size(); i++) {
- cout << i << '\t' << clusterArray[i].numSeq << '\t' << clusterArray[i].parent << '\t' << clusterArray[i].smallChild << endl;
- }
- cout << endl;
-
-
- }
- catch(exception& e) {
- m->errorOut(e, "HCluster", "getUpmostParent");
- exit(1);
- }
-}
-/***********************************************************************/
-int HCluster::makeActive() {
- try {
- int linkValue = 1;
-
- itActive = activeLinks.find(smallRow);
- it2Active = activeLinks.find(smallCol);
-
- if ((itActive == activeLinks.end()) && (it2Active == activeLinks.end())) { //both are not active so add them
- int size = linkTable.size();
- map<int, int> temp; map<int, int> temp2;
-
- //add link to eachother
- temp[smallRow] = 1; // 1 2
- temp2[smallCol] = 1; // 1 0 1
- // 2 1 0
- linkTable.push_back(temp);
- linkTable.push_back(temp2);
-
- //add to activeLinks
- activeLinks[smallRow] = size;
- activeLinks[smallCol] = size+1;
-
- }else if ((itActive != activeLinks.end()) && (it2Active == activeLinks.end())) { //smallRow is active, smallCol is not
- int size = linkTable.size();
- int alreadyActiveRow = itActive->second;
- map<int, int> temp;
-
- //add link to eachother
- temp[smallRow] = 1; // 6 2 3 5
- linkTable.push_back(temp); // 6 0 1 2 0
- linkTable[alreadyActiveRow][smallCol] = 1; // 2 1 0 1 1
- // 3 2 1 0 0
- // 5 0 1 0 0
- //add to activeLinks
- activeLinks[smallCol] = size;
-
- }else if ((itActive == activeLinks.end()) && (it2Active != activeLinks.end())) { //smallCol is active, smallRow is not
- int size = linkTable.size();
- int alreadyActiveCol = it2Active->second;
- map<int, int> temp;
-
- //add link to eachother
- temp[smallCol] = 1; // 6 2 3 5
- linkTable.push_back(temp); // 6 0 1 2 0
- linkTable[alreadyActiveCol][smallRow] = 1; // 2 1 0 1 1
- // 3 2 1 0 0
- // 5 0 1 0 0
- //add to activeLinks
- activeLinks[smallRow] = size;
-
- }else { //both are active so add one
- int row = itActive->second;
- int col = it2Active->second;
-
-
- linkTable[row][smallCol]++;
- linkTable[col][smallRow]++;
- linkValue = linkTable[row][smallCol];
- }
-
- return linkValue;
- }
- catch(exception& e) {
- m->errorOut(e, "HCluster", "makeActive");
- exit(1);
- }
-}
-/***********************************************************************/
-void HCluster::updateArrayandLinkTable() {
- try {
- //if cluster was made update clusterArray and linkTable
- int size = clusterArray.size();
-
- //add new node
- clusterNode temp(clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq, -1, clusterArray[smallCol].smallChild);
- clusterArray.push_back(temp);
-
- //update child nodes
- clusterArray[smallRow].parent = size;
- clusterArray[smallCol].parent = size;
-
- if (method == "furthest") {
-
- //update linkTable by merging clustered rows and columns
- int rowSpot = activeLinks[smallRow];
- int colSpot = activeLinks[smallCol];
-
- //fix old rows
- for (int i = 0; i < linkTable.size(); i++) {
- //check if they are in map
- it = linkTable[i].find(smallRow);
- it2 = linkTable[i].find(smallCol);
-
- if ((it!=linkTable[i].end()) && (it2!=linkTable[i].end())) { //they are both there
- linkTable[i][size] = linkTable[i][smallRow]+linkTable[i][smallCol];
- linkTable[i].erase(smallCol); //delete col row
- linkTable[i].erase(smallRow); //delete col row
- }else if ((it==linkTable[i].end()) && (it2!=linkTable[i].end())) { //only col
- linkTable[i][size] = linkTable[i][smallCol];
- linkTable[i].erase(smallCol); //delete col
- }else if ((it!=linkTable[i].end()) && (it2==linkTable[i].end())) { //only row
- linkTable[i][size] = linkTable[i][smallRow];
- linkTable[i].erase(smallRow); //delete col
- }
- }
-
- //merge their values
- for (it = linkTable[rowSpot].begin(); it != linkTable[rowSpot].end(); it++) {
- it2 = linkTable[colSpot].find(it->first); //does the col also have this
-
- if (it2 == linkTable[colSpot].end()) { //not there so add it
- linkTable[colSpot][it->first] = it->second;
- }else { //merge them
- linkTable[colSpot][it->first] = it->second + it2->second;
- }
- }
-
- linkTable[colSpot].erase(size);
- linkTable.erase(linkTable.begin()+rowSpot); //delete row
-
- //update activerows
- activeLinks.erase(smallRow);
- activeLinks.erase(smallCol);
- activeLinks[size] = colSpot;
-
- //adjust everybody elses spot since you deleted - time vs. space
- for (itActive = activeLinks.begin(); itActive != activeLinks.end(); itActive++) {
- if (itActive->second > rowSpot) { activeLinks[itActive->first]--; }
- }
- }
- }
- catch(exception& e) {
- m->errorOut(e, "HCluster", "updateArrayandLinkTable");
- exit(1);
- }
-}
-/***********************************************************************/
-double HCluster::update(int row, int col, float distance){
- try {
- bool cluster = false;
- smallRow = row;
- smallCol = col;
- smallDist = distance;
-
- //find upmost parent of row and col
- smallRow = getUpmostParent(smallRow);
- smallCol = getUpmostParent(smallCol);
-
- //you don't want to cluster with yourself
- if (smallRow != smallCol) {
-
- if ((method == "furthest") || (method == "nearest")) {
- //can we cluster???
- if (method == "nearest") { cluster = true; }
- else{ //assume furthest
- //are they active in the link table
- int linkValue = makeActive(); //after this point this nodes info is active in linkTable
- if (linkValue == (clusterArray[smallRow].numSeq * clusterArray[smallCol].numSeq)) { cluster = true; }
- }
-
- if (cluster) {
- updateArrayandLinkTable();
- clusterBins();
- clusterNames();
- }
- }else {
- cluster = true;
- updateArrayandLinkTable();
- clusterBins();
- clusterNames();
- combineFile();
- }
- }
-
- return cutoff;
- //printInfo();
- }
- catch(exception& e) {
- m->errorOut(e, "HCluster", "update");
- exit(1);
- }
-}
-/***********************************************************************/
-void HCluster::setMapWanted(bool ms) {
- try {
- mapWanted = ms;
-
- //initialize map
- for (int i = 0; i < list->getNumBins(); i++) {
-
- //parse bin
- string names = list->get(i);
- while (names.find_first_of(',') != -1) {
- //get name from bin
- string name = names.substr(0,names.find_first_of(','));
- //save name and bin number
- seq2Bin[name] = i;
- names = names.substr(names.find_first_of(',')+1, names.length());
- }
-
- //get last name
- seq2Bin[names] = i;
- }
-
- }
- catch(exception& e) {
- m->errorOut(e, "HCluster", "setMapWanted");
- exit(1);
- }
-}
-/***********************************************************************/
-void HCluster::updateMap() {
-try {
- //update location of seqs in smallRow since they move to smallCol now
- string names = list->get(clusterArray[smallRow].smallChild);
- while (names.find_first_of(',') != -1) {
- //get name from bin
- string name = names.substr(0,names.find_first_of(','));
- //save name and bin number
- seq2Bin[name] = clusterArray[smallCol].smallChild;
- names = names.substr(names.find_first_of(',')+1, names.length());
- }
-
- //get last name
- seq2Bin[names] = clusterArray[smallCol].smallChild;
- }
- catch(exception& e) {
- m->errorOut(e, "HCluster", "updateMap");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-vector<seqDist> HCluster::getSeqs(){
- try {
- vector<seqDist> sameSeqs;
-
- if ((method == "furthest") || (method == "nearest")) {
- sameSeqs = getSeqsFNNN();
- }else{
- sameSeqs = getSeqsAN();
- }
-
- return sameSeqs;
- }
- catch(exception& e) {
- m->errorOut(e, "HCluster", "getSeqs");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-vector<seqDist> HCluster::getSeqsFNNN(){
- try {
- string firstName, secondName;
- float distance, prevDistance;
- vector<seqDist> sameSeqs;
- prevDistance = -1;
-
- //if you are not at the beginning of the file
- if (exitedBreak) {
- sameSeqs.push_back(next);
- prevDistance = next.dist;
- exitedBreak = false;
- }
-
- //get entry
- while (!filehandle.eof()) {
-
- filehandle >> firstName >> secondName >> distance; m->gobble(filehandle);
-
- //save first one
- if (prevDistance == -1) { prevDistance = distance; }
-
- map<string,int>::iterator itA = nameMap->find(firstName);
- map<string,int>::iterator itB = nameMap->find(secondName);
- if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); }
- if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); }
-
- //using cutoff
- if (distance > cutoff) { break; }
-
- if (distance != -1) { //-1 means skip me
-
- //are the distances the same
- if (distance == prevDistance) { //save in vector
- seqDist temp(itA->second, itB->second, distance);
- sameSeqs.push_back(temp);
- exitedBreak = false;
- }else{
- next.seq1 = itA->second;
- next.seq2 = itB->second;
- next.dist = distance;
- exitedBreak = true;
- break;
- }
- }
- }
-
- //rndomize matching dists
- random_shuffle(sameSeqs.begin(), sameSeqs.end());
-
- return sameSeqs;
- }
- catch(exception& e) {
- m->errorOut(e, "HCluster", "getSeqsFNNN");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-vector<seqDist> HCluster::getSeqsAN(){
- try {
- int firstName, secondName;
- float prevDistance;
- vector<seqDist> sameSeqs;
- prevDistance = -1;
-
- m->openInputFile(distfile, filehandle, "no error");
-
- //is the smallest value in mergedMin or the distfile?
- float mergedMinDist = 10000;
- float distance = 10000;
- if (mergedMin.size() > 0) { mergedMinDist = mergedMin[0].dist; }
-
- if (!filehandle.eof()) {
- filehandle >> firstName >> secondName >> distance; m->gobble(filehandle);
- //save first one
- if (prevDistance == -1) { prevDistance = distance; }
- if (distance != -1) { //-1 means skip me
- seqDist temp(firstName, secondName, distance);
- sameSeqs.push_back(temp);
- }else{ distance = 10000; }
- }
-
- if (mergedMinDist < distance) { //get minimum distance from mergedMin
- //remove distance we saved from file
- sameSeqs.clear();
- prevDistance = mergedMinDist;
-
- for (int i = 0; i < mergedMin.size(); i++) {
- if (mergedMin[i].dist == prevDistance) {
- sameSeqs.push_back(mergedMin[i]);
- }else { break; }
- }
- }else{ //get minimum from file
- //get entry
- while (!filehandle.eof()) {
-
- filehandle >> firstName >> secondName >> distance; m->gobble(filehandle);
-
- if (prevDistance == -1) { prevDistance = distance; }
-
- if (distance != -1) { //-1 means skip me
- //are the distances the same
- if (distance == prevDistance) { //save in vector
- seqDist temp(firstName, secondName, distance);
- sameSeqs.push_back(temp);
- }else{
- break;
- }
- }
- }
- }
- filehandle.close();
-
- //randomize matching dists
- random_shuffle(sameSeqs.begin(), sameSeqs.end());
-
- //can only return one value since once these are merged the other distances in sameSeqs may have changed
- vector<seqDist> temp;
- if (sameSeqs.size() > 0) { temp.push_back(sameSeqs[0]); }
-
- return temp;
- }
- catch(exception& e) {
- m->errorOut(e, "HCluster", "getSeqsAN");
- exit(1);
- }
-}
-
-/***********************************************************************/
-int HCluster::combineFile() {
- try {
- //int bufferSize = 64000; //512k - this should be a variable that the user can set to optimize code to their hardware
- //char* inputBuffer;
- //inputBuffer = new char[bufferSize];
- //size_t numRead;
-
- string tempDistFile = distfile + ".temp";
- ofstream out;
- m->openOutputFile(tempDistFile, out);
-
- //FILE* in;
- //in = fopen(distfile.c_str(), "rb");
-
- ifstream in;
- m->openInputFile(distfile, in, "no error");
-
- int first, second;
- float dist;
-
- vector< map<int, float> > smallRowColValues;
- smallRowColValues.resize(2); //0 = row, 1 = col
- int count = 0;
-
- //go through file pulling out distances related to rows merging
- //if mergedMin contains distances add those back into file
- //bool done = false;
- //partialDist = "";
- //while ((numRead = fread(inputBuffer, 1, bufferSize, in)) != 0) {
-//cout << "number of char read = " << numRead << endl;
-//cout << inputBuffer << endl;
- //if (numRead < bufferSize) { done = true; }
-
- //parse input into individual distances
- //int spot = 0;
- //string outputString = "";
- //while(spot < numRead) {
- //cout << "spot = " << spot << endl;
- // seqDist nextDist = getNextDist(inputBuffer, spot, bufferSize);
-
- //you read a partial distance
- // if (nextDist.seq1 == -1) { break; }
- while (!in.eof()) {
- //first = nextDist.seq1; second = nextDist.seq2; dist = nextDist.dist;
- //cout << "next distance = " << first << '\t' << second << '\t' << dist << endl;
- //since file is sorted and mergedMin is sorted
- //you can put the smallest distance from each through the code below and keep the file sorted
-
- in >> first >> second >> dist; m->gobble(in);
-
- if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempDistFile); return 0; }
-
- //while there are still values in mergedMin that are smaller than the distance read from file
- while (count < mergedMin.size()) {
-
- //is the distance in mergedMin smaller than from the file
- if (mergedMin[count].dist < dist) {
- //is this a distance related to the columns merging?
- //if yes, save in memory
- if ((mergedMin[count].seq1 == smallRow) && (mergedMin[count].seq2 == smallCol)) { //do nothing this is the smallest distance from last time
- }else if (mergedMin[count].seq1 == smallCol) {
- smallRowColValues[1][mergedMin[count].seq2] = mergedMin[count].dist;
- }else if (mergedMin[count].seq2 == smallCol) {
- smallRowColValues[1][mergedMin[count].seq1] = mergedMin[count].dist;
- }else if (mergedMin[count].seq1 == smallRow) {
- smallRowColValues[0][mergedMin[count].seq2] = mergedMin[count].dist;
- }else if (mergedMin[count].seq2 == smallRow) {
- smallRowColValues[0][mergedMin[count].seq1] = mergedMin[count].dist;
- }else { //if no, write to temp file
- //outputString += toString(mergedMin[count].seq1) + '\t' + toString(mergedMin[count].seq2) + '\t' + toString(mergedMin[count].dist) + '\n';
- //if (mergedMin[count].dist < cutoff) {
- out << mergedMin[count].seq1 << '\t' << mergedMin[count].seq2 << '\t' << mergedMin[count].dist << endl;
- //}
- }
- count++;
- }else{ break; }
- }
-
- //is this a distance related to the columns merging?
- //if yes, save in memory
- if ((first == smallRow) && (second == smallCol)) { //do nothing this is the smallest distance from last time
- }else if (first == smallCol) {
- smallRowColValues[1][second] = dist;
- }else if (second == smallCol) {
- smallRowColValues[1][first] = dist;
- }else if (first == smallRow) {
- smallRowColValues[0][second] = dist;
- }else if (second == smallRow) {
- smallRowColValues[0][first] = dist;
-
- }else { //if no, write to temp file
- //outputString += toString(first) + '\t' + toString(second) + '\t' + toString(dist) + '\n';
- //if (dist < cutoff) {
- out << first << '\t' << second << '\t' << dist << endl;
- //}
- }
- }
-
- //out << outputString;
- //if(done) { break; }
- //}
- //fclose(in);
- in.close();
-
- //if values in mergedMin are larger than the the largest in file then
- while (count < mergedMin.size()) {
- //is this a distance related to the columns merging?
- //if yes, save in memory
- if ((mergedMin[count].seq1 == smallRow) && (mergedMin[count].seq2 == smallCol)) { //do nothing this is the smallest distance from last time
- }else if (mergedMin[count].seq1 == smallCol) {
- smallRowColValues[1][mergedMin[count].seq2] = mergedMin[count].dist;
- }else if (mergedMin[count].seq2 == smallCol) {
- smallRowColValues[1][mergedMin[count].seq1] = mergedMin[count].dist;
- }else if (mergedMin[count].seq1 == smallRow) {
- smallRowColValues[0][mergedMin[count].seq2] = mergedMin[count].dist;
- }else if (mergedMin[count].seq2 == smallRow) {
- smallRowColValues[0][mergedMin[count].seq1] = mergedMin[count].dist;
-
- }else { //if no, write to temp file
- //if (mergedMin[count].dist < cutoff) {
- out << mergedMin[count].seq1 << '\t' << mergedMin[count].seq2 << '\t' << mergedMin[count].dist << endl;
- //}
- }
- count++;
- }
- out.close();
- mergedMin.clear();
-
- //rename tempfile to distfile
- m->mothurRemove(distfile);
- rename(tempDistFile.c_str(), distfile.c_str());
-//cout << "remove = "<< renameOK << " rename = " << ok << endl;
-
- //merge clustered rows averaging the distances
- map<int, float>::iterator itMerge;
- map<int, float>::iterator it2Merge;
- for(itMerge = smallRowColValues[0].begin(); itMerge != smallRowColValues[0].end(); itMerge++) {
- //does smallRowColValues[1] have a distance to this seq too?
- it2Merge = smallRowColValues[1].find(itMerge->first);
-
- float average;
- if (it2Merge != smallRowColValues[1].end()) { //if yes, then average
- //average
- if (method == "average") {
- int total = clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq;
- average = ((clusterArray[smallRow].numSeq * itMerge->second) + (clusterArray[smallCol].numSeq * it2Merge->second)) / (float) total;
- }else { //weighted
- average = ((itMerge->second * 1.0) + (it2Merge->second * 1.0)) / (float) 2.0;
- }
-
- smallRowColValues[1].erase(it2Merge);
-
- seqDist temp(clusterArray[smallRow].parent, itMerge->first, average);
- mergedMin.push_back(temp);
- }else {
- //can't find value so update cutoff
- if (cutoff > itMerge->second) { cutoff = itMerge->second; }
- }
- }
-
- //update cutoff
- for(itMerge = smallRowColValues[1].begin(); itMerge != smallRowColValues[1].end(); itMerge++) {
- if (cutoff > itMerge->second) { cutoff = itMerge->second; }
- }
-
- //sort merged values
- sort(mergedMin.begin(), mergedMin.end(), compareSequenceDistance);
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "HCluster", "combineFile");
- exit(1);
- }
-}
-/***********************************************************************
-seqDist HCluster::getNextDist(char* buffer, int& index, int size){
- try {
- seqDist next;
- int indexBefore = index;
- string first, second, distance;
- first = ""; second = ""; distance = "";
- int tabCount = 0;
-//cout << "partial = " << partialDist << endl;
- if (partialDist != "") { //read what you can, you know it is less than a whole distance.
- for (int i = 0; i < partialDist.size(); i++) {
- if (tabCount == 0) {
- if (partialDist[i] == '\t') { tabCount++; }
- else { first += partialDist[i]; }
- }else if (tabCount == 1) {
- if (partialDist[i] == '\t') { tabCount++; }
- else { second += partialDist[i]; }
- }else if (tabCount == 2) {
- distance += partialDist[i];
- }
- }
- partialDist = "";
- }
-
- //try to get another distance
- bool gotDist = false;
- while (index < size) {
- if ((buffer[index] == 10) || (buffer[index] == 13)) { //newline in unix or windows
- gotDist = true;
-
- //m->gobble space
- while (index < size) {
- if (isspace(buffer[index])) { index++; }
- else { break; }
- }
- break;
- }else{
- if (tabCount == 0) {
- if (buffer[index] == '\t') { tabCount++; }
- else { first += buffer[index]; }
- }else if (tabCount == 1) {
- if (buffer[index] == '\t') { tabCount++; }
- else { second += buffer[index]; }
- }else if (tabCount == 2) {
- distance += buffer[index];
- }
- index++;
- }
- }
-
- //there was not a whole distance in the buffer, ie. buffer = "1 2 0.01 2 3 0."
- //then you want to save the partial distance.
- if (!gotDist) {
- for (int i = indexBefore; i < size; i++) {
- partialDist += buffer[i];
- }
- index = size + 1;
- next.seq1 = -1; next.seq2 = -1; next.dist = 0.0;
- }else{
- int firstname, secondname;
- float dist;
-
- convert(first, firstname);
- convert(second, secondname);
- convert(distance, dist);
-
- next.seq1 = firstname; next.seq2 = secondname; next.dist = dist;
- }
-
- return next;
- }
- catch(exception& e) {
- m->errorOut(e, "HCluster", "getNextDist");
- exit(1);
- }
-}
-***********************************************************************/
-int HCluster::processFile() {
- try {
- string firstName, secondName;
- float distance;
-
- ifstream in;
- m->openInputFile(distfile, in, "no error");
-
- ofstream out;
- string outTemp = distfile + ".temp";
- m->openOutputFile(outTemp, out);
-
- //get entry
- while (!in.eof()) {
- if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outTemp); return 0; }
-
- in >> firstName >> secondName >> distance; m->gobble(in);
-
- map<string,int>::iterator itA = nameMap->find(firstName);
- map<string,int>::iterator itB = nameMap->find(secondName);
- if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); }
- if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); }
-
- //using cutoff
- if (distance > cutoff) { break; }
-
- if (distance != -1) { //-1 means skip me
- out << itA->second << '\t' << itB->second << '\t' << distance << endl;
- }
- }
-
- in.close();
- out.close();
-
- m->mothurRemove(distfile);
- rename(outTemp.c_str(), distfile.c_str());
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "HCluster", "processFile");
- exit(1);
- }
-}
-/***********************************************************************/
-
-
-
-
-
-
-
-
diff --git a/source/hcluster.h b/source/hcluster.h
deleted file mode 100644
index d859886..0000000
--- a/source/hcluster.h
+++ /dev/null
@@ -1,86 +0,0 @@
-#ifndef HCLUSTER_H
-#define HCLUSTER_H
-
-/*
- * hcluster.h
- * Mothur
- *
- * Created by westcott on 10/13/09.
- * Copyright 2009 Schloss Lab. All rights reserved.
- *
- */
-
-
-#include "mothur.h"
-#include "nameassignment.hpp"
-
-class RAbundVector;
-class ListVector;
-
-/***********************************************************************/
-class HCluster {
-
-public:
- HCluster(RAbundVector*, ListVector*, string, string, NameAssignment*, float);
- ~HCluster(){};
- double update(int, int, float);
- void setMapWanted(bool m);
- map<string, int> getSeqtoBin() { return seq2Bin; }
- vector<seqDist> getSeqs();
-
-protected:
- void clusterBins();
- void clusterNames();
- int getUpmostParent(int);
- int makeActive();
- void printInfo();
- void updateArrayandLinkTable();
- void updateMap();
- vector<seqDist> getSeqsFNNN();
- vector<seqDist> getSeqsAN();
- int combineFile();
- int processFile();
- //seqDist getNextDist(char*, int&, int);
-
- RAbundVector* rabund;
- ListVector* list;
- NameAssignment* nameMap;
-
- vector<clusterNode> clusterArray;
-
- //note: the nearest and average neighbor method do not use the link table or active links
- vector< map<int, int> > linkTable; // vector of maps - linkTable[1][6] = 2 would mean sequence in spot 1 has 2 links with sequence in 6
- map<int, int> activeLinks; //maps sequence to index in linkTable
- map<int, int>::iterator it;
- map<int, int>::iterator itActive;
- map<int, int>::iterator it2Active;
- map<int, int>::iterator it2;
-
- int numSeqs;
- int smallRow;
- int smallCol;
- float smallDist, cutoff;
- map<string, int> seq2Bin;
- bool mapWanted, exitedBreak;
- seqDist next;
- string method, distfile;
- ifstream filehandle;
-
- vector<seqDist> mergedMin;
- string partialDist;
- MothurOut* m;
-
-
-};
-
-/***********************************************************************/
-
-
-
-
-
-
-
-#endif
-
-
diff --git a/source/heatmapsim.cpp b/source/heatmapsim.cpp
index 62965f9..8db4aeb 100644
--- a/source/heatmapsim.cpp
+++ b/source/heatmapsim.cpp
@@ -54,9 +54,9 @@ vector<string> HeatMapSim::getPic(vector<SharedRAbundVector*> lookup, vector<Cal
}
sims.clear();
-// double biggest = -1;
- double biggest = 1;
- float scaler;
+ double biggest = 0;
+ double smallest = 10000000;
+ //float scaler;
//get sim for each comparison and save them so you can find the relative similairity
for(int i = 0; i < (lookup.size()-1); i++){
@@ -70,22 +70,26 @@ vector<string> HeatMapSim::getPic(vector<SharedRAbundVector*> lookup, vector<Cal
//get similairity between groups
data = calcs[k]->getValues(subset);
sims.push_back(1.0 - data[0]);
-
+
//save biggest similairity to set relative sim
-// if (data[0] > biggest) { biggest = data[0]; }
+ if ((1.0 - data[0]) > biggest) { biggest = (1.0 - data[0]); }
+
+ //save smalllest similairity to set relative sim
+ if ((1.0 - data[0]) < smallest) { smallest = (1.0 - data[0]); }
+
}
}
-
+
//map biggest similairity found to red
- scaler = 255.0 / biggest;
-
+ float scalerBig = 255.0 / biggest;
+
int count = 0;
//output similairites to file
for(int i = 0; i < (lookup.size()-1); i++){
for(int j = (i+1); j < lookup.size(); j++){
//find relative color
- int color = scaler * sims[count];
+ int color = scalerBig * sims[count];
//draw box
outsvg << "<rect fill=\"rgb(" + toString(color) + ",0,0)\" stroke=\"rgb(" + toString(color) + ",0,0)\" x=\"" + toString((i*150)+80) + "\" y=\"" + toString((j*150)+75) + "\" width=\"150\" height=\"150\"/>\n";
count++;
@@ -93,7 +97,7 @@ vector<string> HeatMapSim::getPic(vector<SharedRAbundVector*> lookup, vector<Cal
}
int y = ((lookup.size() * 150) + 120);
- printLegend(y, biggest);
+ printLegend(y, biggest, smallest);
outsvg << "</g>\n</svg>\n";
outsvg.close();
@@ -131,6 +135,7 @@ string HeatMapSim::getPic(vector< vector<double> > dists, vector<string> groups)
}
double biggest = -1;
+ double smallest = 10000000;
float scaler;
//get sim for each comparison and save them so you can find the relative similairity
@@ -144,6 +149,9 @@ string HeatMapSim::getPic(vector< vector<double> > dists, vector<string> groups)
//save biggest similairity to set relative sim
if (sim > biggest) { biggest = sim; }
+
+ //save smalllest similairity to set relative sim
+ if (sim < smallest) { smallest = sim; }
}
}
@@ -164,7 +172,7 @@ string HeatMapSim::getPic(vector< vector<double> > dists, vector<string> groups)
}
int y = ((dists.size() * 150) + 120);
- printLegend(y, biggest);
+ printLegend(y, biggest, smallest);
outsvg << "</g>\n</svg>\n";
outsvg.close();
@@ -180,10 +188,8 @@ string HeatMapSim::getPic(vector< vector<double> > dists, vector<string> groups)
//**********************************************************************************************************************
-void HeatMapSim::printLegend(int y, float maxSim) {
+void HeatMapSim::printLegend(int y, float maxSim, float minSim) {
try {
- maxSim = 1;
-
//output legend and color labels
//go through map and give each score a color value
string color;
@@ -196,7 +202,7 @@ void HeatMapSim::printLegend(int y, float maxSim) {
x += 3;
}
- float scaler = maxSim / 5.0;
+ float scaler = (maxSim-minSim) / 5.0;
//prints legend labels
x = 0;
@@ -204,7 +210,7 @@ void HeatMapSim::printLegend(int y, float maxSim) {
float label = scaler*i;
label = int(label * 1000 + 0.5);
label /= 1000.0;
- string text = toString(label, 1);
+ string text = toString(label, 3);
outsvg << "<text fill=\"black\" class=\"seri\" font-size=\"" + toString(fontSize) + "\" x=\"" + toString(x) + "\" y=\"" + toString(y-3) + "\">" + text + "</text>\n";
x += 153;
diff --git a/source/heatmapsim.h b/source/heatmapsim.h
index aa23c18..2c0c931 100644
--- a/source/heatmapsim.h
+++ b/source/heatmapsim.h
@@ -26,7 +26,7 @@ class HeatMapSim {
string getPic(vector< vector<double> >, vector<string>);
private:
- void printLegend(int, float);
+ void printLegend(int, float, float);
string format, groupComb, outputDir, inputfile;
int fontSize;
diff --git a/source/mothur.cpp b/source/mothur.cpp
index e203fbb..e6e3a0e 100644
--- a/source/mothur.cpp
+++ b/source/mothur.cpp
@@ -10,13 +10,11 @@
#include "mothur.h"
#include "engine.hpp"
#include "mothurout.h"
-#include "referencedb.h"
/**************************************************************************************************/
CommandFactory* CommandFactory::_uniqueInstance = 0;
MothurOut* MothurOut::_uniqueInstance = 0;
-ReferenceDB* ReferenceDB::myInstance = 0;
/***********************************************************************/
volatile int ctrlc_pressed = 0;
void ctrlc_handler ( int sig ) {
@@ -275,8 +273,11 @@ int main(int argc, char *argv[]){
if (!createLogFile) { m->mothurRemove(newlogFileName); }
if (mothur != NULL) { delete mothur; }
-
- return 0;
+
+ int returnCode = 0;
+ if (m->getNumErrors() != 0) { returnCode = 1; }
+
+ return returnCode;
}
catch(exception& e) {
m->errorOut(e, "mothur", "main");
diff --git a/source/mothurout.cpp b/source/mothurout.cpp
index 078e4db..86d9e52 100644
--- a/source/mothurout.cpp
+++ b/source/mothurout.cpp
@@ -570,11 +570,16 @@ unsigned long long MothurOut::getTotalRAM() {
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- long pages = get_phys_pages();
- long page_size = sysconf(_SC_PAGE_SIZE);
- if ((page_size == -1) || (pages == -1))
+ #if defined _SC_PHYS_PAGES && defined _SC_PAGESIZE
+ /* This works on linux-gnu, solaris2 and cygwin. */
+ double pages = sysconf (_SC_PHYS_PAGES);
+ double pagesize = sysconf (_SC_PAGESIZE);
+ if (0 <= pages && 0 <= pagesize)
+ return pages * pagesize;
+ #else
mothurOut("[WARNING]: Cannot determine amount of RAM");
- return pages * page_size;
+ #endif
+
#elif defined (_WIN32)
MEMORYSTATUSEX status;
status.dwLength = sizeof(status);
@@ -587,6 +592,7 @@ unsigned long long MothurOut::getTotalRAM() {
return si.totalram * si.mem_unit;
#endif
+ return 0;
}
catch(exception& e) {
errorOut(e, "MothurOut", "getTotalRAM");
@@ -594,21 +600,6 @@ unsigned long long MothurOut::getTotalRAM() {
}
}
/***********************************************************************/
-unsigned long MothurOut::get_phys_pages () {
- unsigned long phys_pages = 0;
-#if (_SC_PAGE_SIZE)
- uint64_t mem;
- size_t len = sizeof(mem);
- sysctlbyname("hw.memsize", &mem, &len, NULL, 0);
- phys_pages = mem/sysconf(_SC_PAGE_SIZE);
-#elif (_SC_PHYS_PAGES)
- phys_pages = sysconf(_SC_PHYS_PAGES);
-#else
- mothurOut("[WARNING]: Cannot determine number of physical pages\n");
-#endif
- return phys_pages;
-}
-/***********************************************************************/
int MothurOut::openOutputFileAppend(string fileName, ofstream& fileHandle){
try {
fileName = getFullPathName(fileName);
@@ -2988,6 +2979,28 @@ int MothurOut::checkName(string& name) {
exit(1);
}
}
+/************************************************************/
+bool MothurOut::checkGroupName(string name) {
+ try {
+
+ bool goodName = true;
+ for (int i = 0; i < name.length(); i++) {
+ if (name[i] == ':') { goodName = false; break; }
+ else if (name[i] == '-') { goodName = false; break; }
+ else if (name[i] == '/') { goodName = false; break; }
+ }
+
+ if (!goodName) {
+ mothurOut("\n[ERROR]: group " + name + " contains illegal characters in the name. Group names cannot include :, -, or / characters. The ':' character is a special character used in trees. Using ':' will result in your tree being unreadable by tree reading software. The '-' character is a special character used by mothur to parse group names. Using the '-' character will prevent you from selecting groups. The '/' character will created unreadable filenames when mothur inclu [...]
+ }
+
+ return goodName;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "checkGroupName");
+ exit(1);
+ }
+}
/**********************************************************************************************************************/
int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, map<string, string>& fastamap) {
try {
@@ -3352,7 +3365,7 @@ int MothurOut::mothurRemove(string filename){
bool MothurOut::mothurConvert(string item, int& num){
try {
bool error = false;
-
+
if (isNumeric1(item)) {
convert(item, num);
}else {
@@ -3927,27 +3940,6 @@ void MothurOut::splitAtDash(string& estim, vector<string>& container) {
string individual = "";
int estimLength = estim.size();
bool prevEscape = false;
- /*for(int i=0;i<estimLength;i++){
- if(prevEscape){
- individual += estim[i];
- prevEscape = false;
- }
- else{
- if(estim[i] == '\\'){
- prevEscape = true;
- }
- else if(estim[i] == '-'){
- container.push_back(individual);
- individual = "";
- prevEscape = false;
- }
- else{
- individual += estim[i];
- prevEscape = false;
- }
- }
- }*/
-
for(int i=0;i<estimLength;i++){
if(estim[i] == '-'){
@@ -3983,28 +3975,6 @@ void MothurOut::splitAtDash(string& estim, set<string>& container) {
string individual = "";
int estimLength = estim.size();
bool prevEscape = false;
- /*
- for(int i=0;i<estimLength;i++){
- if(prevEscape){
- individual += estim[i];
- prevEscape = false;
- }
- else{
- if(estim[i] == '\\'){
- prevEscape = true;
- }
- else if(estim[i] == '-'){
- container.insert(individual);
- individual = "";
- prevEscape = false;
- }
- else{
- individual += estim[i];
- prevEscape = false;
- }
- }
- }
- */
for(int i=0;i<estimLength;i++){
if(estim[i] == '-'){
@@ -4038,28 +4008,6 @@ void MothurOut::splitAtDash(string& estim, set<int>& container) {
int lineNum;
int estimLength = estim.size();
bool prevEscape = false;
- /*
- for(int i=0;i<estimLength;i++){
- if(prevEscape){
- individual += estim[i];
- prevEscape = false;
- }
- else{
- if(estim[i] == '\\'){
- prevEscape = true;
- }
- else if(estim[i] == '-'){
- convert(individual, lineNum); //convert the string to int
- container.insert(lineNum);
- individual = "";
- prevEscape = false;
- }
- else{
- individual += estim[i];
- prevEscape = false;
- }
- }
- }*/
for(int i=0;i<estimLength;i++){
if(estim[i] == '-'){
@@ -4125,20 +4073,6 @@ void MothurOut::splitAtComma(string& estim, vector<string>& container) {
}
container.push_back(individual);
-
-
-
-// string individual;
-//
-// while (estim.find_first_of(',') != -1) {
-// individual = estim.substr(0,estim.find_first_of(','));
-// if ((estim.find_first_of(',')+1) <= estim.length()) { //checks to make sure you don't have comma at end of string
-// estim = estim.substr(estim.find_first_of(',')+1, estim.length());
-// container.push_back(individual);
-// }
-// }
-// //get last one
-// container.push_back(estim);
}
catch(exception& e) {
errorOut(e, "MothurOut", "splitAtComma");
@@ -4163,18 +4097,6 @@ void MothurOut::splitAtChar(string& prefix, string& suffix, char c){
}
}
-
- /*
-
- prefix = suffix.substr(0,suffix.find_first_of(c));
- if ((suffix.find_first_of(c)+2) <= suffix.length()) { //checks to make sure you don't have comma at end of string
- suffix = suffix.substr(suffix.find_first_of(c)+1, suffix.length());
- string space = " ";
- while(suffix.at(0) == ' ')
- suffix = suffix.substr(1, suffix.length());
- }else { suffix = ""; }
- */
-
}
catch(exception& e) {
errorOut(e, "MothurOut", "splitAtChar");
@@ -4430,6 +4352,63 @@ bool MothurOut::checkReleaseVersion(ifstream& file, string version) {
}
}
/**************************************************************************************************/
+int MothurOut::getTimeStamp(string filename) {
+ try {
+ int timeStamp = 0;
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ struct stat st;
+ int errorCode = stat (filename.c_str(), &st);
+ if (errorCode != 0) {
+ mothurOut("[ERROR]: Can't find timestamp for " + filename + "\n"); control_pressed = true;
+ }else {
+ timeStamp = st.st_mtime;
+ }
+#else
+ HANDLE hFile;
+
+ hFile = CreateFile(filename.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL,
+ OPEN_EXISTING, 0, NULL);
+
+ if(hFile == INVALID_HANDLE_VALUE) {
+ mothurOut("[ERROR]: Can't find timestamp for " + filename + "\n"); control_pressed = true;
+ return timeStamp;
+ }
+
+ FILETIME ftCreate, ftAccess, ftWrite;
+ SYSTEMTIME stUTC;
+ DWORD dwRet;
+
+ // Retrieve the file times for the file.
+ bool success = GetFileTime(hFile, &ftCreate, &ftAccess, &ftWrite);
+
+ if (success) {
+ FileTimeToSystemTime(&ftWrite, &stUTC);
+
+ tm time;
+ time.tm_sec = stUTC.wSecond;
+ time.tm_min = stUTC.wMinute;
+ time.tm_hour = stUTC.wHour;
+ time.tm_mday = stUTC.wDay;
+ time.tm_mon = stUTC.wMonth - 1;
+ time.tm_year = stUTC.wYear - 1900;
+ time.tm_isdst = -1;
+ time_t t = mktime(&time);
+
+ timeStamp = t;
+ }
+ else { mothurOut("[ERROR]: Can't find timestamp for " + filename + "\n"); control_pressed = true; }
+
+#endif
+
+ return timeStamp;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "getTimeStamp");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
vector<double> MothurOut::getAverages(vector< vector<double> >& dists) {
try{
vector<double> averages; //averages.resize(numComp, 0.0);
@@ -4873,7 +4852,7 @@ int MothurOut::min(int A, int B){
}
}
//**********************************************************************************************************************
-int MothurOut::printVsearchFile(vector<seqPriorityNode>& nameMapCount, string filename, string tag){
+int MothurOut::printVsearchFile(vector<seqPriorityNode>& nameMapCount, string filename, string tag, string tag2){
try {
sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
@@ -4884,7 +4863,7 @@ int MothurOut::printVsearchFile(vector<seqPriorityNode>& nameMapCount, string fi
//print new file in order of
for (int i = 0; i < nameMapCount.size(); i++) {
if (control_pressed) {break;}
- out << ">" << nameMapCount[i].name << "/" + tag + "=" << nameMapCount[i].numIdentical << "/" << endl << nameMapCount[i].seq << endl;
+ out << ">" << nameMapCount[i].name << tag << nameMapCount[i].numIdentical << tag2 << endl << nameMapCount[i].seq << endl;
}
out.close();
diff --git a/source/mothurout.h b/source/mothurout.h
index 532082f..ca1ea12 100644
--- a/source/mothurout.h
+++ b/source/mothurout.h
@@ -140,7 +140,7 @@ class MothurOut {
int readNames(string, map<string, vector<string> >&);
int readNames(string, vector<seqPriorityNode>&, map<string, string>&);
int mothurRemove(string);
- int printVsearchFile(vector<seqPriorityNode>&, string, string); //sorts and prints by abundance adding /ab=xxx/
+ int printVsearchFile(vector<seqPriorityNode>&, string, string, string); //sorts and prints by abundance adding /ab=xxx/
bool mothurConvert(char, int&); //use for converting user inputs. Sets commandInputsConvertError to true if error occurs. Engines check this.
bool mothurConvert(string, int&); //use for converting user inputs. Sets commandInputsConvertError to true if error occurs. Engines check this.
bool mothurConvert(string, intDist&); //use for converting user inputs. Sets commandInputsConvertError to true if error occurs. Engines check this.
@@ -150,6 +150,7 @@ class MothurOut {
//searchs and checks
bool checkReleaseVersion(ifstream&, string);
+ int getTimeStamp(string filename);
bool anyLabelsToProcess(string, set<string>&, string);
bool inUsersGroups(vector<string>, vector<string>); //returns true if any of the strings in first vector are in second vector
bool inUsersGroups(vector<int>, vector< vector<int> >);
@@ -171,7 +172,6 @@ class MothurOut {
string mothurGetpid(int);
unsigned long long getRAMUsed();
unsigned long long getTotalRAM();
- unsigned long get_phys_pages();
string getStringFromVector(vector<string>&, string); //creates string like "v[0], v[1], ... v[n]" where ', ' is string.
string getStringFromVector(vector<int>&, string); //creates string like "v[0], v[1], ... v[n]" where ', ' is string.
string getStringFromVector(vector<double>&, string); //creates string like "v[0], v[1], ... v[n]" where ', ' is string.
@@ -193,6 +193,7 @@ class MothurOut {
string makeList(vector<string>&);
bool isSubset(vector<string>, vector<string>); //bigSet, subset
int checkName(string&);
+ bool checkGroupName(string name);
map<string, vector<string> > parseClasses(string);
string addUnclassifieds(string tax, int maxlevel, bool probs);
diff --git a/source/randomforest/abstractrandomforest.cpp b/source/randomforest/abstractrandomforest.cpp
deleted file mode 100644
index 59e3baf..0000000
--- a/source/randomforest/abstractrandomforest.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-//
-// abstractrandomforest.cpp
-// Mothur
-//
-// Created by Sarah Westcott on 10/1/12.
-// Copyright (c) 2012 Schloss Lab. All rights reserved.
-//
-
-#include "abstractrandomforest.hpp"
-
-/***********************************************************************/
-AbstractRandomForest::AbstractRandomForest(const std::vector < std::vector<int> > dataSet,
- const int numDecisionTrees,
- const string treeSplitCriterion = "informationGain")
-: dataSet(dataSet),
-numDecisionTrees(numDecisionTrees),
-numSamples((int)dataSet.size()),
-numFeatures((int)(dataSet[0].size() - 1)),
-globalDiscardedFeatureIndices(getGlobalDiscardedFeatureIndices()),
-globalVariableImportanceList(numFeatures, 0),
-treeSplitCriterion(treeSplitCriterion) {
- m = MothurOut::getInstance();
- // TODO: double check if the implemenatation of 'globalOutOfBagEstimates' is correct
-}
-
-/***********************************************************************/
-
-vector<int> AbstractRandomForest::getGlobalDiscardedFeatureIndices() {
- try {
- vector<int> globalDiscardedFeatureIndices;
-
- // calculate feature vectors
- vector< vector<int> > featureVectors(numFeatures, vector<int>(numSamples, 0));
- for (int i = 0; i < numSamples; i++) {
- if (m->control_pressed) { return globalDiscardedFeatureIndices; }
- for (int j = 0; j < numFeatures; j++) { featureVectors[j][i] = dataSet[i][j]; }
- }
-
- for (int i = 0; i < featureVectors.size(); i++) {
- if (m->control_pressed) { return globalDiscardedFeatureIndices; }
- double standardDeviation = m->getStandardDeviation(featureVectors[i]);
- if (standardDeviation <= 0){ globalDiscardedFeatureIndices.push_back(i); }
- }
-
- if (m->debug) {
- m->mothurOut("number of global discarded features: " + toString(globalDiscardedFeatureIndices.size())+ "\n");
- m->mothurOut("total features: " + toString(featureVectors.size())+ "\n");
- }
-
- return globalDiscardedFeatureIndices;
- }
- catch(exception& e) {
- m->errorOut(e, "AbstractRandomForest", "getGlobalDiscardedFeatureIndices");
- exit(1);
- }
-}
-
-/***********************************************************************/
-
diff --git a/source/randomforest/abstractrandomforest.hpp b/source/randomforest/abstractrandomforest.hpp
deleted file mode 100644
index 3be91b9..0000000
--- a/source/randomforest/abstractrandomforest.hpp
+++ /dev/null
@@ -1,67 +0,0 @@
-//
-// abstractrandomforest.hpp
-// rrf-fs-prototype
-//
-// Created by Abu Zaher Faridee on 7/20/12.
-// Copyright (c) 2012 Schloss Lab. All rights reserved.
-//
-
-#ifndef rrf_fs_prototype_abstractrandomforest_hpp
-#define rrf_fs_prototype_abstractrandomforest_hpp
-
-#include "mothurout.h"
-#include "macros.h"
-#include "abstractdecisiontree.hpp"
-
-#define DEBUG_MODE
-
-/***********************************************************************/
-
-class AbstractRandomForest{
-public:
- // intialization with vectors
- AbstractRandomForest(const std::vector < std::vector<int> > dataSet,
- const int numDecisionTrees,
- const string);
- virtual ~AbstractRandomForest(){ }
- virtual int populateDecisionTrees() = 0;
- virtual int calcForrestErrorRate() = 0;
- virtual int calcForrestVariableImportance(string) = 0;
-
-/***********************************************************************/
-
-protected:
-
- // TODO: create a better way of discarding feature
- // currently we just set FEATURE_DISCARD_SD_THRESHOLD to 0 to solved this
- // it can be tuned for better selection
- // also, there might be other factors like Mean or other stuffs
- // same would apply for createLocalDiscardedFeatureList in the TreeNode class
-
- // TODO: Another idea is getting an aggregated discarded feature indices after the run, from combining
- // the local discarded feature indices
- // this would penalize a feature, even if in global space the feature looks quite good
- // the penalization would be averaged, so this woould unlikely to create a local optmina
-
- vector<int> getGlobalDiscardedFeatureIndices();
-
- int numDecisionTrees;
- int numSamples;
- int numFeatures;
- vector< vector<int> > dataSet;
- vector<int> globalDiscardedFeatureIndices;
- vector<double> globalVariableImportanceList;
- string treeSplitCriterion;
- // This is a map of each feature to outcome count of each classes
- // e.g. 1 => [2 7] means feature 1 has 2 outcome of 0 and 7 outcome of 1
- map<int, vector<int> > globalOutOfBagEstimates;
-
- // TODO: fix this, do we use pointers?
- vector<AbstractDecisionTree*> decisionTrees;
-
- MothurOut* m;
-
-private:
-
-};
-#endif
diff --git a/source/randomforest/regularizedrandomforest.cpp b/source/randomforest/regularizedrandomforest.cpp
deleted file mode 100644
index 0a33f4e..0000000
--- a/source/randomforest/regularizedrandomforest.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-//
-// regularizedrandomforest.cpp
-// Mothur
-//
-// Created by Kathryn Iverson on 11/16/12.
-// Copyright (c) 2012 Schloss Lab. All rights reserved.
-//
-
-#include "regularizedrandomforest.h"
-
-RegularizedRandomForest::RegularizedRandomForest(const vector <vector<int> > dataSet,
- const int numDecisionTrees,
- const string treeSplitCriterion = "gainratio")
- // TODO: update ctor according to basic RandomForest Class
- : Forest(dataSet,
- numDecisionTrees,
- treeSplitCriterion,
- false, 0.9, true, 0.4, "log2", 0.0) {
- m = MothurOut::getInstance();
-}
-
-int RegularizedRandomForest::calcForrestErrorRate() {
- //
- try {
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "RegularizedRandomForest", "calcForrestErrorRate");
- exit(1);
- }
-}
-
-int RegularizedRandomForest::calcForrestVariableImportance(string filename) {
- //
- try {
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "RegularizedRandomForest", "calcForrestVariableImportance");
- exit(1);
- }
-}
-
-int RegularizedRandomForest::populateDecisionTrees() {
- //
- try {
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "RegularizedRandomForest", "populateDecisionTrees");
- exit(1);
- }
-}
-
-int RegularizedRandomForest::updateGlobalOutOfBagEstimates(DecisionTree *decisionTree) {
- try {
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "RegularizedRandomForest", "updateGlobalOutOfBagEstimates");
- exit(1);
- }
-}
diff --git a/source/randomforest/regularizedrandomforest.h b/source/randomforest/regularizedrandomforest.h
deleted file mode 100644
index 94bd624..0000000
--- a/source/randomforest/regularizedrandomforest.h
+++ /dev/null
@@ -1,30 +0,0 @@
-//
-// regularizedrandomforest.h
-// Mothur
-//
-// Created by Kathryn Iverson on 11/16/12.
-// Copyright (c) 2012 Schloss Lab. All rights reserved.
-//
-
-#ifndef __Mothur__regularizedrandomforest__
-#define __Mothur__regularizedrandomforest__
-
-#include "forest.h"
-#include "decisiontree.hpp"
-
-class RegularizedRandomForest: public Forest {
-public:
- //
- RegularizedRandomForest(const vector <vector<int> > dataSet,const int numDecisionTrees, const string);
-
- int calcForrestErrorRate();
- int calcForrestVariableImportance(string);
- int populateDecisionTrees();
- int updateGlobalOutOfBagEstimates(DecisionTree* decisionTree);
-
-private:
- //
- MothurOut* m;
-};
-
-#endif /* defined(__Mothur__regularizedrandomforest__) */
diff --git a/source/read/readblast.cpp b/source/read/readblast.cpp
index 84fddcf..9e7b8db 100644
--- a/source/read/readblast.cpp
+++ b/source/read/readblast.cpp
@@ -16,7 +16,7 @@ inline bool compareOverlap(seqDist left, seqDist right){
return (left.dist < right.dist);
}
/*********************************************************************************************/
-ReadBlast::ReadBlast(string file, float c, float p, int l, bool ms, bool h) : blastfile(file), cutoff(c), penalty(p), length(l), minWanted(ms), hclusterWanted(h) {
+ReadBlast::ReadBlast(string file, float c, float p, int l, bool ms) : blastfile(file), cutoff(c), penalty(p), length(l), minWanted(ms) {
try {
m = MothurOut::getInstance();
matrix = NULL;
@@ -53,23 +53,11 @@ int ReadBlast::read(NameAssignment* nameMap) {
ofstream outOverlap;
//create objects needed for read
- if (!hclusterWanted) {
- matrix = new SparseDistanceMatrix();
- matrix->resize(nseqs);
- }else{
- overlapFile = m->getRootName(blastfile) + "overlap.dist";
- distFile = m->getRootName(blastfile) + "hclusterDists.dist";
-
- m->openOutputFile(overlapFile, outOverlap);
- m->openOutputFile(distFile, outDist);
- }
+ matrix = new SparseDistanceMatrix();
+ matrix->resize(nseqs);
- if (m->control_pressed) {
- fileHandle.close();
- if (!hclusterWanted) { delete matrix; }
- else { outOverlap.close(); m->mothurRemove(overlapFile); outDist.close(); m->mothurRemove(distFile); }
- return 0;
- }
+
+ if (m->control_pressed) { fileHandle.close(); delete matrix; return 0; }
Progress* reading = new Progress("Reading blast: ", nseqs * nseqs);
@@ -101,12 +89,8 @@ int ReadBlast::read(NameAssignment* nameMap) {
//if there is a valid overlap, add it
if ((startRef <= length) && ((endQuery+length) >= lengthThisSeq) && (thisoverlap < cutoff)) {
- if (!hclusterWanted) {
- seqDist overlapValue(itA->second, itB->second, thisoverlap);
- overlap.push_back(overlapValue);
- }else {
- outOverlap << itA->first << '\t' << itB->first << '\t' << thisoverlap << endl;
- }
+ seqDist overlapValue(itA->second, itB->second, thisoverlap);
+ overlap.push_back(overlapValue);
}
}
}else { m->mothurOut("Error in your blast file, cannot read."); m->mothurOutEndLine(); exit(1); }
@@ -115,13 +99,7 @@ int ReadBlast::read(NameAssignment* nameMap) {
//read file
while(!fileHandle.eof()){
- if (m->control_pressed) {
- fileHandle.close();
- if (!hclusterWanted) { delete matrix; }
- else { outOverlap.close(); m->mothurRemove(overlapFile); outDist.close(); m->mothurRemove(distFile); }
- delete reading;
- return 0;
- }
+ if (m->control_pressed) { fileHandle.close(); delete matrix; delete reading; return 0; }
//read in line from file
fileHandle >> firstName >> secondName >> percentId >> numBases >> mismatch >> gap >> startQuery >> endQuery >> startRef >> endRef >> eScore >> score;
@@ -155,13 +133,9 @@ int ReadBlast::read(NameAssignment* nameMap) {
//if there is a valid overlap, add it
if ((startRef <= length) && ((endQuery+length) >= lengthThisSeq) && (thisoverlap < cutoff)) {
- if (!hclusterWanted) {
- seqDist overlapValue(itA->second, itB->second, thisoverlap);
- //cout << "overlap = " << itA->second << '\t' << itB->second << '\t' << thisoverlap << endl;
- overlap.push_back(overlapValue);
- }else {
- outOverlap << itA->first << '\t' << itB->first << '\t' << thisoverlap << endl;
- }
+ seqDist overlapValue(itA->second, itB->second, thisoverlap);
+ //cout << "overlap = " << itA->second << '\t' << itB->second << '\t' << thisoverlap << endl;
+ overlap.push_back(overlapValue);
}
} //end else
}else { //end row
@@ -185,17 +159,13 @@ int ReadBlast::read(NameAssignment* nameMap) {
//is this distance below cutoff
if (distance < cutoff) {
- if (!hclusterWanted) {
- if (itA->second < it->first) {
- PDistCell value(it->first, distance);
- matrix->addCell(itA->second, value);
- }else {
- PDistCell value(itA->second, distance);
- matrix->addCell(it->first, value);
- }
- }else{
- outDist << itA->first << '\t' << nameMap->get(it->first) << '\t' << distance << endl;
- }
+ if (itA->second < it->first) {
+ PDistCell value(it->first, distance);
+ matrix->addCell(itA->second, value);
+ }else {
+ PDistCell value(itA->second, distance);
+ matrix->addCell(it->first, value);
+ }
}
//not going to need this again
dists[it->first].erase(itDist);
@@ -226,12 +196,8 @@ int ReadBlast::read(NameAssignment* nameMap) {
//if there is a valid overlap, add it
if ((startRef <= length) && ((endQuery+length) >= lengthThisSeq) && (thisoverlap < cutoff)) {
- if (!hclusterWanted) {
- seqDist overlapValue(itA->second, itB->second, thisoverlap);
- overlap.push_back(overlapValue);
- }else {
- outOverlap << itA->first << '\t' << itB->first << '\t' << thisoverlap << endl;
- }
+ seqDist overlapValue(itA->second, itB->second, thisoverlap);
+ overlap.push_back(overlapValue);
}
}
}//end if current row
@@ -257,17 +223,13 @@ int ReadBlast::read(NameAssignment* nameMap) {
//is this distance below cutoff
if (distance < cutoff) {
- if (!hclusterWanted) {
- if (itA->second < it->first) {
- PDistCell value(it->first, distance);
- matrix->addCell(itA->second, value);
- }else {
- PDistCell value(itA->second, distance);
- matrix->addCell(it->first, value);
- }
- }else{
- outDist << itA->first << '\t' << nameMap->get(it->first) << '\t' << distance << endl;
- }
+ if (itA->second < it->first) {
+ PDistCell value(it->first, distance);
+ matrix->addCell(itA->second, value);
+ }else {
+ PDistCell value(itA->second, distance);
+ matrix->addCell(it->first, value);
+ }
}
//not going to need this again
dists[it->first].erase(itDist);
@@ -279,28 +241,11 @@ int ReadBlast::read(NameAssignment* nameMap) {
thisRowsBlastScores.clear();
dists.clear();
- if (m->control_pressed) {
- fileHandle.close();
- if (!hclusterWanted) { delete matrix; }
- else { outOverlap.close(); m->mothurRemove(overlapFile); outDist.close(); m->mothurRemove(distFile); }
- delete reading;
- return 0;
- }
+ if (m->control_pressed) { fileHandle.close(); delete matrix; delete reading; return 0; }
- if (!hclusterWanted) {
- sort(overlap.begin(), overlap.end(), compareOverlap);
- }else {
- outDist.close();
- outOverlap.close();
- }
-
- if (m->control_pressed) {
- fileHandle.close();
- if (!hclusterWanted) { delete matrix; }
- else { m->mothurRemove(overlapFile); m->mothurRemove(distFile); }
- delete reading;
- return 0;
- }
+ sort(overlap.begin(), overlap.end(), compareOverlap);
+
+ if (m->control_pressed) { fileHandle.close(); delete matrix; delete reading; return 0; }
reading->finish();
delete reading;
diff --git a/source/read/readblast.h b/source/read/readblast.h
index 97ce6c7..70877d8 100644
--- a/source/read/readblast.h
+++ b/source/read/readblast.h
@@ -22,7 +22,7 @@
class ReadBlast {
public:
- ReadBlast(string, float, float, int, bool, bool); //blastfile, cutoff, penalty, length of overlap, min or max bsr, hclusterWanted
+ ReadBlast(string, float, float, int, bool); //blastfile, cutoff, penalty, length of overlap, min or max bsr
~ReadBlast() {}
int read(NameAssignment*);
@@ -36,7 +36,6 @@ private:
int length; //number of amino acids overlapped
float penalty, cutoff; //penalty is used to adjust error rate
bool minWanted; //if true choose min bsr, if false choose max bsr
- bool hclusterWanted;
SparseDistanceMatrix* matrix;
vector<seqDist> overlap;
diff --git a/source/trimoligos.cpp b/source/trimoligos.cpp
index 7e0321b..64096a3 100644
--- a/source/trimoligos.cpp
+++ b/source/trimoligos.cpp
@@ -24,6 +24,7 @@ TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<stri
bdiffs = b;
ldiffs = l;
sdiffs = s;
+ rdiffs = 0;
barcodes = br;
primers = pr;
@@ -73,6 +74,7 @@ TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<
bdiffs = b;
ldiffs = l;
sdiffs = s;
+ rdiffs = 0;
paired = true;
hasIndex = hi;
@@ -138,12 +140,13 @@ TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<
}
/********************************************************************/
//strip, pdiffs, bdiffs, primers, barcodes, revPrimers
-TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, vector<string> r){
+TrimOligos::TrimOligos(int p, int rd, int b, map<string, int> pr, map<string, int> br, vector<string> r){
try {
m = MothurOut::getInstance();
pdiffs = p;
bdiffs = b;
+ rdiffs = rd;
barcodes = br;
primers = pr;
@@ -282,7 +285,7 @@ vector<int> TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primer
string rawSequence = seq.getUnaligned();
int maxRevPrimerLength = revPrimer[0].length();
vector<int> success;
- success.push_back(pdiffs + 1000); //guilty until proven innocent
+ success.push_back(rdiffs + 1000); //guilty until proven innocent
success.push_back(1e6); //no matches found
for(int i=0;i<revPrimer.size();i++){
@@ -310,10 +313,10 @@ vector<int> TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primer
}
//cout << maxRevPrimerLength << endl;
//if you found the barcode or if you don't want to allow for diffs
- if ((pdiffs == 0) || (success[0] == 0)) { return success; }
+ if ((rdiffs == 0) || (success[0] == 0)) { return success; }
else { //try aligning and see if you can find it
Alignment* alignment;
- if (revPrimer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRevPrimerLength+pdiffs+1)); }
+ if (revPrimer.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRevPrimerLength+rdiffs+1)); }
else{ alignment = NULL; }
//can you find the revPrimer
@@ -324,15 +327,15 @@ vector<int> TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primer
for(int i=0;i<revPrimer.size();i++){
- if (rawSequence.length() < revPrimer[i].length()+pdiffs) {} //ignore primers too long for this seq
+ if (rawSequence.length() < revPrimer[i].length()+rdiffs) {} //ignore primers too long for this seq
else{
//undefined if not forced into an int.
- int stopSpot = rawRSequence.length()-(revPrimer[i].length()+pdiffs);
+ int stopSpot = rawRSequence.length()-(revPrimer[i].length()+rdiffs);
for (int j = 0; j < stopSpot; j++){
string oligo = reverseOligo(revPrimer[i]);
- string rawChunk = rawRSequence.substr(j,oligo.length()+pdiffs);
+ string rawChunk = rawRSequence.substr(j,oligo.length()+rdiffs);
//cout << "r before = " << oligo << '\t' << rawChunk << endl;
// cout << oligo << '\t' << olength << endl;
//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
@@ -350,7 +353,7 @@ vector<int> TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primer
oligo = oligo.substr(0,alnLength);
temp = temp.substr(0,alnLength);
int numDiff = countDiffs(oligo, temp);
- if (alnLength == 0) { numDiff = pdiffs + 1000; }
+ if (alnLength == 0) { numDiff = rdiffs + 1000; }
//cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
if(numDiff < minDiff){
@@ -367,8 +370,8 @@ vector<int> TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primer
if (alignment != NULL) { delete alignment; }
- if(minDiff > pdiffs) { primerStart = 0; primerEnd = 0; success[0] = minDiff; success[1] = 1e6; return success; } //no good matches
- else if(minCount > 1) { primerStart = 0; primerEnd = 0; success[0] = minDiff; success[1] = pdiffs + 10000; return success; } //can't tell the difference between multiple primers
+ if(minDiff > rdiffs) { primerStart = 0; primerEnd = 0; success[0] = minDiff; success[1] = 1e6; return success; } //no good matches
+ else if(minCount > 1) { primerStart = 0; primerEnd = 0; success[0] = minDiff; success[1] = rdiffs + 10000; return success; } //can't tell the difference between multiple primers
else{ success[0] = minDiff; success[1] = 0; return success; }
}
@@ -541,7 +544,21 @@ vector<int> TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq,
break;
}
- if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
+ if (foligo == "NONE") {
+ if (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length()))) {
+ group = it->first;
+ reverseSeq.setUnaligned(rawRSequence.substr(roligo.length())); //trim reverse
+ success[0] = 0; success[1] = 0; success[2] = 0; success[3] = 0;
+ break;
+ }
+ }else if (roligo == "NONE") {
+ if (compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) {
+ group = it->first;
+ forwardSeq.setUnaligned(rawFSequence.substr(foligo.length())); //trim forward
+ success[0] = 0; success[1] = 0; success[2] = 0; success[3] = 0;
+ break;
+ }
+ }else if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
group = it->first;
forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
@@ -585,46 +602,53 @@ vector<int> TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq,
success[1] = bdiffs + 1000; //if the sequence is shorter than the barcode then bail out
break;
}
- //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
- //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
- oligo = alignment->getSeqAAln();
- string temp = alignment->getSeqBAln();
-
- int alnLength = oligo.length();
-
- for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
- oligo = oligo.substr(0,alnLength);
- temp = temp.substr(0,alnLength);
- int numDiff = countDiffs(oligo, temp);
-
- if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
- if (alnLength == 0) { numDiff = bdiffs + 1000; }
- //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
-
- if(numDiff < minDiff){
- minDiff = numDiff;
- minCount = 1;
- minFGroup.clear();
- minFGroup.push_back(it->second);
- int tempminFPos = 0;
- minFPos.clear();
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminFPos++;
+ if (oligo != "NONE") {
+ //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
+ //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+ alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
+ oligo = alignment->getSeqAAln();
+ string temp = alignment->getSeqBAln();
+
+ int alnLength = oligo.length();
+
+ for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
+ oligo = oligo.substr(0,alnLength);
+ temp = temp.substr(0,alnLength);
+ int numDiff = countDiffs(oligo, temp);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
+
+ if (alnLength == 0) { numDiff = bdiffs + 1000; }
+ //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
+
+ if(numDiff < minDiff){
+ minDiff = numDiff;
+ minCount = 1;
+ minFGroup.clear();
+ minFGroup.push_back(it->second);
+ int tempminFPos = 0;
+ minFPos.clear();
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminFPos++;
+ }
}
- }
- minFPos.push_back(tempminFPos);
- }else if(numDiff == minDiff){
- minFGroup.push_back(it->second);
- int tempminFPos = 0;
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminFPos++;
+ minFPos.push_back(tempminFPos);
+ }else if(numDiff == minDiff){
+ minFGroup.push_back(it->second);
+ int tempminFPos = 0;
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminFPos++;
+ }
}
+ minFPos.push_back(tempminFPos);
}
- minFPos.push_back(tempminFPos);
+ }else { //is a match
+ minFGroup.push_back(it->second);
+ minFPos.push_back(0);
+ minDiff = 0;
}
}
@@ -654,44 +678,50 @@ vector<int> TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq,
break;
}
- //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
- oligo = alignment->getSeqAAln();
- string temp = alignment->getSeqBAln();
-
- int alnLength = oligo.length();
- for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
- oligo = oligo.substr(0,alnLength);
- temp = temp.substr(0,alnLength);
- int numDiff = countDiffs(oligo, temp);
-
- if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
-
- if (alnLength == 0) { numDiff = bdiffs + 1000; }
-
- //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
- if(numDiff < minDiff){
- minDiff = numDiff;
- minCount = 1;
- minRGroup.clear();
- minRGroup.push_back(it->second);
- int tempminRPos = 0;
- minRPos.clear();
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminRPos++;
+ if (oligo != "NONE") {
+ //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+ alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
+ oligo = alignment->getSeqAAln();
+ string temp = alignment->getSeqBAln();
+
+ int alnLength = oligo.length();
+ for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
+ oligo = oligo.substr(0,alnLength);
+ temp = temp.substr(0,alnLength);
+ int numDiff = countDiffs(oligo, temp);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
+
+ if (alnLength == 0) { numDiff = bdiffs + 1000; }
+
+ //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
+ if(numDiff < minDiff){
+ minDiff = numDiff;
+ minCount = 1;
+ minRGroup.clear();
+ minRGroup.push_back(it->second);
+ int tempminRPos = 0;
+ minRPos.clear();
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminRPos++;
+ }
}
- }
- minRPos.push_back(tempminRPos);
- }else if(numDiff == minDiff){
- int tempminRPos = 0;
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminRPos++;
+ minRPos.push_back(tempminRPos);
+ }else if(numDiff == minDiff){
+ int tempminRPos = 0;
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminRPos++;
+ }
}
+ minRPos.push_back(tempminRPos);
+ minRGroup.push_back(it->second);
}
- minRPos.push_back(tempminRPos);
+ }else { //is a match
+ minRPos.push_back(0);
minRGroup.push_back(it->second);
+ minDiff = 0;
}
}
@@ -768,7 +798,27 @@ vector<int> TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq,
break;
}
- if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
+ if (foligo == "NONE") {
+ if (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length()))) {
+ group = it->first;
+ if (!hasIndex) { //if you are using index file then just matching
+ reverseSeq.setUnaligned(rawRSequence.substr(roligo.length())); //trim reverse
+ reverseQual.trimQScores(roligo.length(), -1);
+ }
+ success[0] = 0; success[1] = 0; success[2] = 0; success[3] = 0;
+ break;
+ }
+ }else if (roligo == "NONE") {
+ if (compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) {
+ group = it->first;
+ if (!hasIndex) { //if you are using index file then just matching
+ forwardSeq.setUnaligned(rawFSequence.substr(foligo.length())); //trim forward
+ forwardQual.trimQScores(foligo.length(), -1);
+ }
+ success[0] = 0; success[1] = 0; success[2] = 0; success[3] = 0;
+ break;
+ }
+ }else if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
group = it->first;
if (!hasIndex) { //if you are using index file then just matching
forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
@@ -816,44 +866,50 @@ vector<int> TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq,
success[1] = bdiffs + 1000; //if the sequence is shorter than the barcode then bail out
break;
}
- //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
- //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
- oligo = alignment->getSeqAAln();
- string temp = alignment->getSeqBAln();
-
- int alnLength = oligo.length();
-
- for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
- oligo = oligo.substr(0,alnLength);
- temp = temp.substr(0,alnLength);
- int numDiff = countDiffs(oligo, temp);
-
- if (alnLength == 0) { numDiff = bdiffs + 1000; }
- if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
-
- if(numDiff < minDiff){
- minDiff = numDiff;
- minCount = 1;
- minFGroup.clear();
- minFGroup.push_back(it->second);
- int tempminFPos = 0;
- minFPos.clear();
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminFPos++;
+ if (oligo != "NONE") {
+ //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
+ //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+ alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
+ oligo = alignment->getSeqAAln();
+ string temp = alignment->getSeqBAln();
+
+ int alnLength = oligo.length();
+
+ for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
+ oligo = oligo.substr(0,alnLength);
+ temp = temp.substr(0,alnLength);
+ int numDiff = countDiffs(oligo, temp);
+
+ if (alnLength == 0) { numDiff = bdiffs + 1000; }
+ if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
+
+ if(numDiff < minDiff){
+ minDiff = numDiff;
+ minCount = 1;
+ minFGroup.clear();
+ minFGroup.push_back(it->second);
+ int tempminFPos = 0;
+ minFPos.clear();
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminFPos++;
+ }
}
- }
- minFPos.push_back(tempminFPos);
- }else if(numDiff == minDiff){
- minFGroup.push_back(it->second);
- int tempminFPos = 0;
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminFPos++;
+ minFPos.push_back(tempminFPos);
+ }else if(numDiff == minDiff){
+ minFGroup.push_back(it->second);
+ int tempminFPos = 0;
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminFPos++;
+ }
}
+ minFPos.push_back(tempminFPos);
}
- minFPos.push_back(tempminFPos);
+ }else { //is a match
+ minFGroup.push_back(it->second);
+ minFPos.push_back(0);
+ minDiff = 0;
}
}
@@ -883,43 +939,49 @@ vector<int> TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq,
break;
}
- //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
- oligo = alignment->getSeqAAln();
- string temp = alignment->getSeqBAln();
-
- int alnLength = oligo.length();
- for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
- oligo = oligo.substr(0,alnLength);
- temp = temp.substr(0,alnLength);
- int numDiff = countDiffs(oligo, temp);
- if (alnLength == 0) { numDiff = bdiffs + 1000; }
-
- if (m->debug) { m->mothurOut("[DEBUG]: reverse " + reverseSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
-
- //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
- if(numDiff < minDiff){
- minDiff = numDiff;
- minCount = 1;
- minRGroup.clear();
- minRGroup.push_back(it->second);
- int tempminRPos = 0;
- minRPos.clear();
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminRPos++;
+ if (oligo != "NONE") {
+ //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+ alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
+ oligo = alignment->getSeqAAln();
+ string temp = alignment->getSeqBAln();
+
+ int alnLength = oligo.length();
+ for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
+ oligo = oligo.substr(0,alnLength);
+ temp = temp.substr(0,alnLength);
+ int numDiff = countDiffs(oligo, temp);
+ if (alnLength == 0) { numDiff = bdiffs + 1000; }
+
+ if (m->debug) { m->mothurOut("[DEBUG]: reverse " + reverseSeq.getName() + " aligned fragment=" + temp + ", barcode=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
+
+ //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
+ if(numDiff < minDiff){
+ minDiff = numDiff;
+ minCount = 1;
+ minRGroup.clear();
+ minRGroup.push_back(it->second);
+ int tempminRPos = 0;
+ minRPos.clear();
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminRPos++;
+ }
}
- }
- minRPos.push_back(tempminRPos);
- }else if(numDiff == minDiff){
- int tempminRPos = 0;
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminRPos++;
+ minRPos.push_back(tempminRPos);
+ }else if(numDiff == minDiff){
+ int tempminRPos = 0;
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminRPos++;
+ }
}
+ minRPos.push_back(tempminRPos);
+ minRGroup.push_back(it->second);
}
- minRPos.push_back(tempminRPos);
+ }else { //is a match
+ minRPos.push_back(0);
minRGroup.push_back(it->second);
+ minDiff = 0;
}
}
@@ -1010,7 +1072,23 @@ vector<int> TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, i
break;
}
- if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
+ if (foligo == "NONE") {
+ if (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()))) {
+ group = it->first;
+ seq.setUnaligned(rawSeq.substr(0, (rawSeq.length()-roligo.length()))); //trim reverse
+ if(qual.getName() != ""){ qual.trimQScores(-1, rawSeq.length()-roligo.length()); }
+ success[0] = 0; success[1] = 0; success[2] = 0; success[3] = 0;
+ break;
+ }
+ }else if (roligo == "NONE") {
+ if (compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) {
+ group = it->first;
+ seq.setUnaligned(rawSeq.substr(foligo.length())); //trim forward
+ if(qual.getName() != ""){ qual.trimQScores(foligo.length(), -1); }
+ success[0] = 0; success[1] = 0; success[2] = 0; success[3] = 0;
+ break;
+ }
+ }else if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
group = it->first;
string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
@@ -1059,43 +1137,49 @@ vector<int> TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, i
break;
}
//cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
- //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs));
- oligo = alignment->getSeqAAln();
- string temp = alignment->getSeqBAln();
-
- int alnLength = oligo.length();
-
- for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
- oligo = oligo.substr(0,alnLength);
- temp = temp.substr(0,alnLength);
- int numDiff = countDiffs(oligo, temp);
-
- if (alnLength == 0) { numDiff = bdiffs + 1000; }
- //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
-
- if(numDiff < minDiff){
- minDiff = numDiff;
- minCount = 1;
- minFGroup.clear();
- minFGroup.push_back(it->second);
- int tempminFPos = 0;
- minFPos.clear();
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminFPos++;
+ if (oligo != "NONE") {
+ //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+ alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs));
+ oligo = alignment->getSeqAAln();
+ string temp = alignment->getSeqBAln();
+
+ int alnLength = oligo.length();
+
+ for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
+ oligo = oligo.substr(0,alnLength);
+ temp = temp.substr(0,alnLength);
+ int numDiff = countDiffs(oligo, temp);
+
+ if (alnLength == 0) { numDiff = bdiffs + 1000; }
+ //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
+
+ if(numDiff < minDiff){
+ minDiff = numDiff;
+ minCount = 1;
+ minFGroup.clear();
+ minFGroup.push_back(it->second);
+ int tempminFPos = 0;
+ minFPos.clear();
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminFPos++;
+ }
}
- }
- minFPos.push_back(tempminFPos);
- }else if(numDiff == minDiff){
- minFGroup.push_back(it->second);
- int tempminFPos = 0;
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminFPos++;
+ minFPos.push_back(tempminFPos);
+ }else if(numDiff == minDiff){
+ minFGroup.push_back(it->second);
+ int tempminFPos = 0;
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminFPos++;
+ }
}
+ minFPos.push_back(tempminFPos);
}
- minFPos.push_back(tempminFPos);
+ }else { //is a match
+ minFGroup.push_back(it->second);
+ minFPos.push_back(0);
+ minDiff = 0;
}
}
@@ -1122,7 +1206,8 @@ vector<int> TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, i
string rawRSequence = reverseOligo(seq.getUnaligned());
//cout << irbarcodes.size() << '\t' << maxRBarcodeLength << endl;
for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
- string oligo = reverseOligo(it->first);
+ string oligo = it->first;
+ if (oligo != "NONE") { oligo = reverseOligo(oligo); }
//cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl;
if(rawRSequence.length() < maxRBarcodeLength){ //let's just assume that the barcodes are the same length
success[2] = rawRSequence.length();
@@ -1130,47 +1215,49 @@ vector<int> TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, i
break;
}
- //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
- oligo = alignment->getSeqAAln();
- string temp = alignment->getSeqBAln();
-
- int alnLength = oligo.length();
- for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
- oligo = oligo.substr(0,alnLength);
- temp = temp.substr(0,alnLength);
- int numDiff = countDiffs(oligo, temp);
- if (alnLength == 0) { numDiff = bdiffs + 1000; }
-
- //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
- if(numDiff < minDiff){
- minDiff = numDiff;
- minCount = 1;
- minRGroup.clear();
- minRGroup.push_back(it->second);
- int tempminRPos = 0;
- minRPos.clear();
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminRPos++;
- }
- }
- minRPos.push_back(tempminRPos);
- }else if(numDiff == minDiff){
- int tempminRPos = 0;
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminRPos++;
+ if (oligo != "NONE") {
+ //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+ alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
+ oligo = alignment->getSeqAAln();
+ string temp = alignment->getSeqBAln();
+
+ int alnLength = oligo.length();
+ for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
+ oligo = oligo.substr(0,alnLength);
+ temp = temp.substr(0,alnLength);
+ int numDiff = countDiffs(oligo, temp);
+ if (alnLength == 0) { numDiff = bdiffs + 1000; }
+
+ //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
+ if(numDiff < minDiff){
+ minDiff = numDiff;
+ minCount = 1;
+ minRGroup.clear();
+ minRGroup.push_back(it->second);
+ int tempminRPos = 0;
+ minRPos.clear();
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminRPos++;
+ }
}
+ minRPos.push_back(tempminRPos);
+ }else if(numDiff == minDiff){
+ int tempminRPos = 0;
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminRPos++;
+ }
+ }
+ minRPos.push_back(tempminRPos);
+ minRGroup.push_back(it->second);
}
- minRPos.push_back(tempminRPos);
+ }else { //is a match
+ minRPos.push_back(0);
minRGroup.push_back(it->second);
+ minDiff = 0;
}
-
}
-
-
-
if(minDiff > bdiffs) { success[2] = minDiff; success[3] = 1e6; } //no good matches
else {
@@ -1264,7 +1351,27 @@ vector<int> TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, i
break;
}
- if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
+ if (foligo == "NONE") {
+ if (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()))) {
+ group = it->first;
+ if (!keepForward) {
+ seq.setUnaligned(rawSeq.substr(0, (rawSeq.length()-roligo.length()))); //trim reverse
+ if(qual.getName() != ""){ qual.trimQScores(-1, rawSeq.length()-roligo.length()); }
+ }
+ success[0] = 0; success[1] = 0; success[2] = 0; success[3] = 0;
+ break;
+ }
+ }else if (roligo == "NONE") {
+ if (compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) {
+ group = it->first;
+ if (!keepForward) {
+ seq.setUnaligned(rawSeq.substr(foligo.length())); //trim forward
+ if(qual.getName() != ""){ qual.trimQScores(foligo.length(), -1); }
+ }
+ success[0] = 0; success[1] = 0; success[2] = 0; success[3] = 0;
+ break;
+ }
+ }else if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
group = it->first;
if (!keepForward) {
string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
@@ -1315,49 +1422,56 @@ vector<int> TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, i
break;
}
//cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+pdiffs) << endl;
- //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
- oligo = alignment->getSeqAAln();
- string temp = alignment->getSeqBAln();
-
-// cout << endl;
-// cout << oligo << endl;
-// cout << temp << endl;
-// cout << endl;
-
- int alnLength = oligo.length();
-
- for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
- oligo = oligo.substr(0,alnLength);
- temp = temp.substr(0,alnLength);
- int numDiff = countDiffs(oligo, temp);
-
- if (alnLength == 0) { numDiff = pdiffs + 1000; }
- //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
-
- if(numDiff < minDiff){
- minDiff = numDiff;
- minCount = 1;
- minFGroup.clear();
- minFGroup.push_back(it->second);
- int tempminFPos = 0;
- minFPos.clear();
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminFPos++;
+ if (oligo != "NONE") {
+ //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+ alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
+ oligo = alignment->getSeqAAln();
+ string temp = alignment->getSeqBAln();
+
+ // cout << endl;
+ // cout << oligo << endl;
+ // cout << temp << endl;
+ // cout << endl;
+
+ int alnLength = oligo.length();
+
+ for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
+ oligo = oligo.substr(0,alnLength);
+ temp = temp.substr(0,alnLength);
+ int numDiff = countDiffs(oligo, temp);
+
+ if (alnLength == 0) { numDiff = pdiffs + 1000; }
+ //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
+
+ if(numDiff < minDiff){
+ minDiff = numDiff;
+ minCount = 1;
+ minFGroup.clear();
+ minFGroup.push_back(it->second);
+ int tempminFPos = 0;
+ minFPos.clear();
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminFPos++;
+ }
}
- }
- minFPos.push_back(tempminFPos);
- }else if(numDiff == minDiff){
- minFGroup.push_back(it->second);
- int tempminFPos = 0;
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminFPos++;
+ minFPos.push_back(tempminFPos);
+ }else if(numDiff == minDiff){
+ minFGroup.push_back(it->second);
+ int tempminFPos = 0;
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminFPos++;
+ }
}
+ minFPos.push_back(tempminFPos);
}
- minFPos.push_back(tempminFPos);
+ }else { //is a match
+ minFGroup.push_back(it->second);
+ minFPos.push_back(0);
+ minDiff = 0;
}
+
}
fMinDiff = minDiff;
@@ -1382,7 +1496,8 @@ vector<int> TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, i
string rawRSequence = reverseOligo(seq.getUnaligned());
for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
- string oligo = reverseOligo(it->first);
+ string oligo = it->first;
+ if (oligo != "NONE") { oligo = reverseOligo(oligo); }
//cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl;
if(rawRSequence.length() < maxRPrimerLength){ //let's just assume that the barcodes are the same length
success[2] = rawRSequence.length();
@@ -1390,48 +1505,53 @@ vector<int> TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, i
break;
}
- //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
- oligo = alignment->getSeqAAln();
- string temp = alignment->getSeqBAln();
-
-// cout << endl;
-// cout << oligo << endl;
-// cout << temp << endl;
-// cout << endl;
-
- int alnLength = oligo.length();
- for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
- oligo = oligo.substr(0,alnLength);
- temp = temp.substr(0,alnLength);
- int numDiff = countDiffs(oligo, temp);
- if (alnLength == 0) { numDiff = pdiffs + 1000; }
-
- //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
- if(numDiff < minDiff){
- minDiff = numDiff;
- minCount = 1;
- minRGroup.clear();
- minRGroup.push_back(it->second);
- int tempminRPos = 0;
- minRPos.clear();
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminRPos++;
+ if (oligo != "NONE") {
+ //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+ alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
+ oligo = alignment->getSeqAAln();
+ string temp = alignment->getSeqBAln();
+
+ // cout << endl;
+ // cout << oligo << endl;
+ // cout << temp << endl;
+ // cout << endl;
+
+ int alnLength = oligo.length();
+ for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
+ oligo = oligo.substr(0,alnLength);
+ temp = temp.substr(0,alnLength);
+ int numDiff = countDiffs(oligo, temp);
+ if (alnLength == 0) { numDiff = pdiffs + 1000; }
+
+ //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
+ if(numDiff < minDiff){
+ minDiff = numDiff;
+ minCount = 1;
+ minRGroup.clear();
+ minRGroup.push_back(it->second);
+ int tempminRPos = 0;
+ minRPos.clear();
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminRPos++;
+ }
}
- }
- minRPos.push_back(tempminRPos);
- }else if(numDiff == minDiff){
- int tempminRPos = 0;
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminRPos++;
+ minRPos.push_back(tempminRPos);
+ }else if(numDiff == minDiff){
+ int tempminRPos = 0;
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminRPos++;
+ }
}
+ minRPos.push_back(tempminRPos);
+ minRGroup.push_back(it->second);
}
- minRPos.push_back(tempminRPos);
+ }else { //is a match
+ minRPos.push_back(0);
minRGroup.push_back(it->second);
+ minDiff = 0;
}
-
}
if(minDiff > pdiffs) { success[2] = minDiff; success[3] = 1e6; } //no good matches
@@ -1518,7 +1638,23 @@ vector<int> TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq,
break;
}
- if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
+ if (foligo == "NONE") {
+ if (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length()))) {
+ group = it->first;
+ reverseSeq.setUnaligned(rawRSequence.substr(roligo.length())); //trim reverse
+ reverseQual.trimQScores(roligo.length(), -1);
+ success[0] = 0; success[1] = 0; success[2] = 0; success[3] = 0;
+ break;
+ }
+ }else if (roligo == "NONE") {
+ if (compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) {
+ group = it->first;
+ forwardSeq.setUnaligned(rawFSequence.substr(foligo.length())); //trim forward
+ forwardQual.trimQScores(foligo.length(), -1);
+ success[0] = 0; success[1] = 0; success[2] = 0; success[3] = 0;
+ break;
+ }
+ }else if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length())))) {
group = it->first;
forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
@@ -1565,45 +1701,51 @@ vector<int> TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq,
break;
}
//cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
- //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
- oligo = alignment->getSeqAAln();
- string temp = alignment->getSeqBAln();
-
- int alnLength = oligo.length();
-
- for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
- oligo = oligo.substr(0,alnLength);
- temp = temp.substr(0,alnLength);
- int numDiff = countDiffs(oligo, temp);
-
- if (alnLength == 0) { numDiff = pdiffs + 1000; }
- //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
-
- if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
-
- if(numDiff < minDiff){
- minDiff = numDiff;
- minCount = 1;
- minFGroup.clear();
- minFGroup.push_back(it->second);
- int tempminFPos = 0;
- minFPos.clear();
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminFPos++;
+ if (oligo != "NONE") {
+ //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+ alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
+ oligo = alignment->getSeqAAln();
+ string temp = alignment->getSeqBAln();
+
+ int alnLength = oligo.length();
+
+ for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
+ oligo = oligo.substr(0,alnLength);
+ temp = temp.substr(0,alnLength);
+ int numDiff = countDiffs(oligo, temp);
+
+ if (alnLength == 0) { numDiff = pdiffs + 1000; }
+ //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
+
+ if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
+
+ if(numDiff < minDiff){
+ minDiff = numDiff;
+ minCount = 1;
+ minFGroup.clear();
+ minFGroup.push_back(it->second);
+ int tempminFPos = 0;
+ minFPos.clear();
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminFPos++;
+ }
}
- }
- minFPos.push_back(tempminFPos);
- }else if(numDiff == minDiff){
- minFGroup.push_back(it->second);
- int tempminFPos = 0;
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminFPos++;
+ minFPos.push_back(tempminFPos);
+ }else if(numDiff == minDiff){
+ minFGroup.push_back(it->second);
+ int tempminFPos = 0;
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminFPos++;
+ }
}
+ minFPos.push_back(tempminFPos);
}
- minFPos.push_back(tempminFPos);
+ }else { //is a match
+ minFGroup.push_back(it->second);
+ minFPos.push_back(0);
+ minDiff = 0;
}
}
@@ -1615,7 +1757,7 @@ vector<int> TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq,
//check for reverse match
if (alignment != NULL) { delete alignment; }
- if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
+ if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
else{ alignment = NULL; }
//can you find the barcode
@@ -1633,45 +1775,50 @@ vector<int> TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq,
break;
}
- //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
- oligo = alignment->getSeqAAln();
- string temp = alignment->getSeqBAln();
-
- int alnLength = oligo.length();
- for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
- oligo = oligo.substr(0,alnLength);
- temp = temp.substr(0,alnLength);
- int numDiff = countDiffs(oligo, temp);
- if (alnLength == 0) { numDiff = pdiffs + 1000; }
-
- if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
-
- //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
- if(numDiff < minDiff){
- minDiff = numDiff;
- minCount = 1;
- minRGroup.clear();
- minRGroup.push_back(it->second);
- int tempminRPos = 0;
- minRPos.clear();
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminRPos++;
+ if (oligo != "NONE") {
+ //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+ alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
+ oligo = alignment->getSeqAAln();
+ string temp = alignment->getSeqBAln();
+
+ int alnLength = oligo.length();
+ for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
+ oligo = oligo.substr(0,alnLength);
+ temp = temp.substr(0,alnLength);
+ int numDiff = countDiffs(oligo, temp);
+ if (alnLength == 0) { numDiff = pdiffs + 1000; }
+
+ if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
+
+ //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
+ if(numDiff < minDiff){
+ minDiff = numDiff;
+ minCount = 1;
+ minRGroup.clear();
+ minRGroup.push_back(it->second);
+ int tempminRPos = 0;
+ minRPos.clear();
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminRPos++;
+ }
}
- }
- minRPos.push_back(tempminRPos);
- }else if(numDiff == minDiff){
- int tempminRPos = 0;
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminRPos++;
+ minRPos.push_back(tempminRPos);
+ }else if(numDiff == minDiff){
+ int tempminRPos = 0;
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminRPos++;
+ }
}
+ minRPos.push_back(tempminRPos);
+ minRGroup.push_back(it->second);
}
- minRPos.push_back(tempminRPos);
+ }else { //is a match
+ minRPos.push_back(0);
minRGroup.push_back(it->second);
+ minDiff = 0;
}
-
}
if(minDiff > pdiffs) { success[2] = minDiff; success[3] = 1e6; } //no good matches
@@ -1748,7 +1895,21 @@ vector<int> TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq,
break;
}
- if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
+ if (foligo == "NONE") {
+ if (compareDNASeq(roligo, rawRSequence.substr(0,roligo.length()))) {
+ group = it->first;
+ reverseSeq.setUnaligned(rawRSequence.substr(roligo.length())); //trim reverse
+ success[0] = 0; success[1] = 0; success[2] = 0; success[3] = 0;
+ break;
+ }
+ }else if (roligo == "NONE") {
+ if (compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) {
+ group = it->first;
+ forwardSeq.setUnaligned(rawFSequence.substr(foligo.length())); //trim forward
+ success[0] = 0; success[1] = 0; success[2] = 0; success[3] = 0;
+ break;
+ }
+ }else if((compareDNASeq(foligo, rawFSequence.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawRSequence.substr((rawRSequence.length()-roligo.length()),roligo.length())))) {
group = it->first;
forwardSeq.setUnaligned(rawFSequence.substr(foligo.length()));
reverseSeq.setUnaligned(rawRSequence.substr(roligo.length()));
@@ -1793,45 +1954,51 @@ vector<int> TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq,
break;
}
//cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
- //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
- oligo = alignment->getSeqAAln();
- string temp = alignment->getSeqBAln();
-
- int alnLength = oligo.length();
-
- for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
- oligo = oligo.substr(0,alnLength);
- temp = temp.substr(0,alnLength);
- int numDiff = countDiffs(oligo, temp);
-
- if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
-
- if (alnLength == 0) { numDiff = pdiffs + 1000; }
- //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
-
- if(numDiff < minDiff){
- minDiff = numDiff;
- minCount = 1;
- minFGroup.clear();
- minFGroup.push_back(it->second);
- int tempminFPos = 0;
- minFPos.clear();
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminFPos++;
+ if (oligo != "NONE") {
+ //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+ alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
+ oligo = alignment->getSeqAAln();
+ string temp = alignment->getSeqBAln();
+
+ int alnLength = oligo.length();
+
+ for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
+ oligo = oligo.substr(0,alnLength);
+ temp = temp.substr(0,alnLength);
+ int numDiff = countDiffs(oligo, temp);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: forward " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
+
+ if (alnLength == 0) { numDiff = pdiffs + 1000; }
+ //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
+
+ if(numDiff < minDiff){
+ minDiff = numDiff;
+ minCount = 1;
+ minFGroup.clear();
+ minFGroup.push_back(it->second);
+ int tempminFPos = 0;
+ minFPos.clear();
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminFPos++;
+ }
}
- }
- minFPos.push_back(tempminFPos);
- }else if(numDiff == minDiff){
- minFGroup.push_back(it->second);
- int tempminFPos = 0;
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminFPos++;
+ minFPos.push_back(tempminFPos);
+ }else if(numDiff == minDiff){
+ minFGroup.push_back(it->second);
+ int tempminFPos = 0;
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminFPos++;
+ }
}
+ minFPos.push_back(tempminFPos);
}
- minFPos.push_back(tempminFPos);
+ }else { //is a match
+ minFGroup.push_back(it->second);
+ minFPos.push_back(0);
+ minDiff = 0;
}
}
@@ -1843,7 +2010,7 @@ vector<int> TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq,
//check for reverse match
if (alignment != NULL) { delete alignment; }
- if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
+ if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
else{ alignment = NULL; }
//can you find the barcode
@@ -1861,44 +2028,50 @@ vector<int> TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq,
break;
}
- //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
- alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
- oligo = alignment->getSeqAAln();
- string temp = alignment->getSeqBAln();
-
- int alnLength = oligo.length();
- for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
- oligo = oligo.substr(0,alnLength);
- temp = temp.substr(0,alnLength);
- int numDiff = countDiffs(oligo, temp);
-
- if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
-
- if (alnLength == 0) { numDiff = pdiffs + 1000; }
-
- //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
- if(numDiff < minDiff){
- minDiff = numDiff;
- minCount = 1;
- minRGroup.clear();
- minRGroup.push_back(it->second);
- int tempminRPos = 0;
- minRPos.clear();
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminRPos++;
+ if (oligo != "NONE") {
+ //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+ alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
+ oligo = alignment->getSeqAAln();
+ string temp = alignment->getSeqBAln();
+
+ int alnLength = oligo.length();
+ for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1; break; } }
+ oligo = oligo.substr(0,alnLength);
+ temp = temp.substr(0,alnLength);
+ int numDiff = countDiffs(oligo, temp);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: reverse " + forwardSeq.getName() + " aligned fragment=" + temp + ", primer=" + oligo + ", numDiffs=" + toString(numDiff) + ".\n"); }
+
+ if (alnLength == 0) { numDiff = pdiffs + 1000; }
+
+ //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
+ if(numDiff < minDiff){
+ minDiff = numDiff;
+ minCount = 1;
+ minRGroup.clear();
+ minRGroup.push_back(it->second);
+ int tempminRPos = 0;
+ minRPos.clear();
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminRPos++;
+ }
}
- }
- minRPos.push_back(tempminRPos);
- }else if(numDiff == minDiff){
- int tempminRPos = 0;
- for(int i=0;i<alnLength;i++){
- if(temp[i] != '-'){
- tempminRPos++;
+ minRPos.push_back(tempminRPos);
+ }else if(numDiff == minDiff){
+ int tempminRPos = 0;
+ for(int i=0;i<alnLength;i++){
+ if(temp[i] != '-'){
+ tempminRPos++;
+ }
}
+ minRPos.push_back(tempminRPos);
+ minRGroup.push_back(it->second);
}
- minRPos.push_back(tempminRPos);
+ }else { //is a match
+ minRPos.push_back(0);
minRGroup.push_back(it->second);
+ minDiff = 0;
}
}
diff --git a/source/trimoligos.h b/source/trimoligos.h
index 2fb4aef..f49c3d6 100644
--- a/source/trimoligos.h
+++ b/source/trimoligos.h
@@ -19,7 +19,7 @@
class TrimOligos {
public:
- TrimOligos(int,int, map<string, int>, map<string, int>, vector<string>); //pdiffs, bdiffs, primers, barcodes, revPrimers
+ TrimOligos(int,int,int, map<string, int>, map<string, int>, vector<string>); //pdiffs, bdiffs, primers, barcodes, revPrimers
TrimOligos(int,int, int, int, map<string, int>, map<string, int>, vector<string>, vector<string>, vector<string>); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimers, linker, spacer
TrimOligos(int,int, int, int, map<int, oligosPair>, map<int, oligosPair>, bool); //pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, hasIndex
~TrimOligos();
@@ -55,7 +55,7 @@ class TrimOligos {
string getCodeValue(int, int);
private:
- int pdiffs, bdiffs, ldiffs, sdiffs;
+ int pdiffs, bdiffs, ldiffs, sdiffs, rdiffs;
bool paired, hasIndex;
map<string, int> barcodes;
diff --git a/source/validparameter.cpp b/source/validparameter.cpp
index 252831f..18638e5 100644
--- a/source/validparameter.cpp
+++ b/source/validparameter.cpp
@@ -233,18 +233,34 @@ string ValidParameters::validFile(map<string, string>& container, string paramet
ableToOpen = m->openInputFile(it->second, in, "noerror");
in.close();
- //if you can't open it, try default location
- if (ableToOpen == 1) {
- if (m->getDefaultPath() != "") { //default path is set
- string tryPath = m->getDefaultPath() + m->getSimpleName(it->second);
- m->mothurOut("Unable to open " + it->second + ". Trying default " + tryPath); m->mothurOutEndLine();
- ifstream in2;
- ableToOpen = m->openInputFile(tryPath, in2, "noerror");
- in2.close();
- container[parameter] = tryPath;
- }
- }
+ //if you can't open it, try default location
+ if (ableToOpen == 1) {
+ if (m->getDefaultPath() != "") { //default path is set
+ string tryPath = m->getDefaultPath() + m->getSimpleName(it->second);
+ m->mothurOut("Unable to open " + it->second + ". Trying default " + tryPath); m->mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ container[parameter] = tryPath;
+ }
+ }
+ //if you can't open it, try mothur's location
+ if (ableToOpen == 1) {
+ //look for uchime exe
+ string mpath = m->argv;
+ string tempPath = mpath;
+ for (int i = 0; i < mpath.length(); i++) { tempPath[i] = tolower(mpath[i]); }
+ mpath = mpath.substr(0, (tempPath.find_last_of('m')));
+
+ string tryPath = mpath + m->getSimpleName(it->second);
+ m->mothurOut("Unable to open " + it->second + ". Trying mothur's location " + tryPath); m->mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ container[parameter] = tryPath;
+ }
+
//if you can't open it, try default location
if (ableToOpen == 1) {
if (m->getOutputDir() != "") { //default path is set
diff --git a/source/vsearchfileparser.cpp b/source/vsearchfileparser.cpp
index bd92467..98492c2 100644
--- a/source/vsearchfileparser.cpp
+++ b/source/vsearchfileparser.cpp
@@ -7,10 +7,7 @@
//
#include "vsearchfileparser.h"
-#include "deconvolutecommand.h"
-#include "sequence.hpp"
-#include "rabundvector.hpp"
-#include "sabundvector.hpp"
+
/***********************************************************************/
VsearchFileParser::VsearchFileParser(){
@@ -114,7 +111,7 @@ string VsearchFileParser::createVsearchFasta(string inputFile){
}
in.close();
- m->printVsearchFile(seqs, vsearchFasta, "size");
+ m->printVsearchFile(seqs, vsearchFasta, ";size=", ";");
return vsearchFasta;
}
@@ -190,6 +187,7 @@ int VsearchFileParser::createListFile(string inputFile, string listFile, string
if (bin == "") { bin = seqName; }
else { bin += ',' + seqName; }
list.set(clusterNumber, bin);
+
}
}
@@ -215,7 +213,7 @@ int VsearchFileParser::createListFile(string inputFile, string listFile, string
}
}
out.close();
-
+
return 0;
}
catch(exception& e) {
@@ -228,7 +226,7 @@ int VsearchFileParser::createListFile(string inputFile, string listFile, string
string VsearchFileParser::removeAbundances(string seqName){
try {
- int pos = seqName.find_last_of("/", seqName.length()-2); //don't look at the last /
+ int pos = seqName.find_last_of(";", seqName.length()-2); //don't look at the last /
if (pos != string::npos) { seqName = seqName.substr(0, pos); }
return seqName;
diff --git a/source/vsearchfileparser.h b/source/vsearchfileparser.h
index 3327da7..b08b7e6 100644
--- a/source/vsearchfileparser.h
+++ b/source/vsearchfileparser.h
@@ -10,7 +10,10 @@
#define __Mothur__vsearchfileparser__
#include "mothurout.h"
-
+#include "deconvolutecommand.h"
+#include "sequence.hpp"
+#include "rabundvector.hpp"
+#include "sabundvector.hpp"
/**************************************************************************************************/
diff --git a/source/weightedlinkage.cpp b/source/weightedlinkage.cpp
index b79986b..a6333db 100644
--- a/source/weightedlinkage.cpp
+++ b/source/weightedlinkage.cpp
@@ -1,10 +1,7 @@
#ifndef WEIGHTEDLINKAGE_H
#define WEIGHTEDLINKAGE_H
-
-#include "mothur.h"
#include "cluster.hpp"
-#include "rabundvector.hpp"
/* This class implements the WPGMA, weighted average neighbor clustering algorithm */
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/mothur.git
More information about the debian-med-commit
mailing list