[med-svn] [Git][med-team/prank][upstream] New upstream version 170703
Charles Plessy (@plessy)
gitlab at salsa.debian.org
Fri Sep 5 03:06:36 BST 2025
Charles Plessy pushed to branch upstream at Debian Med / prank
Commits:
4c1744d0 by Charles Plessy at 2025-09-05T10:10:04+09:00
New upstream version 170703
- - - - -
29 changed files:
- − VERSION_HISTORY
- src/Makefile
- − src/Makefile.qt
- src/ancestralnode.cpp
- src/ancestralnode.h
- src/config.h
- + src/fasttree_tree.cpp
- + src/fasttree_tree.h
- src/guidetree.cpp
- src/node.cpp
- src/node.h
- − src/prank.1
- src/prank.cpp
- src/prank.h
- − src/prank.pro
- src/progressivealignment.cpp
- src/progressivealignment.h
- + src/raxmlancestors.cpp
- + src/raxmlancestors.h
- + src/raxmlrebl.cpp
- + src/raxmlrebl.h
- src/readalignment.cpp
- src/readfile.cpp
- src/readnewick.cpp
- src/readnewick.h
- src/terminalnode.h
- src/terminalsequence.cpp
- src/translatesequences.cpp
- src/treenode.h
Changes:
=====================================
VERSION_HISTORY deleted
=====================================
@@ -1,128 +0,0 @@
-v.170427
-Changes by Nikolai Hecker
-- make the temp files thread safe
-- fix a compile issue with GCC 6.2.0
-- reduce the information written to stdout; disable with "-verbose"
-
-
-v.150803
-- force linear order of alignment anchors
-- convert multifurcating trees to bifurcating ones
-- finish despite BppAncestor failing
-
-
-v.140603
-- now avoids crashes due to weird sequence names
-- now (codon) ancestral reconstructio also for translated alignments
-- new option '-njgaps' to consider gaps as mismatches for NJ distances
-
-
-v.140110
-- a fix for the external tool paths
-
-
-v.131211
-- search order for external tools changed: use own one first
-- '-nobbpa' to run without BppAncestors (even when available)
-- a fix for rare crashes due to overly long branch lengths
-
-
-v.131119
-- ancestor inference for translated alignments of DNA sequences
-- new option '-treeonly'
-
-
-v.130820
-- disabled copmutation of alignment score for the guide tree alignment
-- more information about iteration with a user-provided tree
-- workaround for a BppAncestor bug causing incomplete last codon
-- path to other binaries no more affected by renaming of the program file
-
-
-v.130708
-- Significant bug fixes -- update *strongly* recommended:
- * option -F was mistakenly turned off by the new iterative approach
- * without option -F, ancestor reconstruction (and scoring) were incorrect
-
-
-v.130410
-- More information about optimization score and fix for last alignment
-- Minor fixes on alignment conversion and use of external models
-
-
-v.130129
-- Introduced alignment score and automatic iteration to maximise the score
-- Changed interface for the analysis input and output
-- New output: inferred evolutionary events per branch
-
-
-v.121218
-- More detailed information about unmatching names. New option "-prunedata".
-
-
-v.121212
-- Support for some NHX tags.
-
-
-v.121210
-- Fixed underflow errors affecting ancestral reconstruction of large
- alignments.
-
-
-v.121018
-- Ancestral sequences can differently indicate insertions and deletions.
-- Can update an alignment, recomputing nodes with tag "[&&NHX:XN=realign]"
-
-
-v.121002
-- Alignment merge now accepts trees such as "(t1:#.#,t2:#.#);".
- Provided with "-t=filename" or "-tree=tree-string".
-
-
-v.120827
-- All files now under the GPL licence.
-
-
-v.120814
-- Can now also merge "alignments" of one sequence
-- New option '-mergedist=#' to define the distance for two alignments
-
-
-v.120717
-- All input data now converted to upper-case.
-
-
-v.120716
-- Fixed the translated alignment (been broken in recent clean up)
-- Fixed the output order of ancestral sequences
-
-
-v.120712
-- For codon alignment, MAFFT guide tree now with protein sequences
- (fixes several issues with codon alignment)
-
-
-v.120626
-- Guide tree estimation from a MAFFT alignment
-- Merge of two pre-defined alignments
-- Support for Exonerate and MAFFT on Windows
-- Clean up of some code
-
-
-v.111130
-- Exonerate anchoring now also for guidetree computation. Experimental!
-
-
-v.111129
-- Allow guide trees with no branch lengths. Default branch length is 0.1;
- use -fixedbranches=# to change.
-- Removed the dependency to boost libraries.
-
-
-v.111013
-- First update in Google Code
-- Alignment speed ups with Exonerate anchoring.
-
-
-v.101018
-- Last version before migration to Google Code
=====================================
src/Makefile
=====================================
@@ -71,7 +71,10 @@ SOURCES = writefile.cpp \
check_version.cpp \
exonerate_reads.cpp \
mafft_alignment.cpp \
- bppancestors.cpp
+ bppancestors.cpp \
+ raxmlancestors.cpp \
+ fasttree_tree.cpp \
+ raxmlrebl.cpp
OBJECTS = writefile.o \
treenode.o \
translatesequences.o \
@@ -104,7 +107,10 @@ OBJECTS = writefile.o \
check_version.o \
exonerate_reads.o \
mafft_alignment.o \
- bppancestors.o
+ bppancestors.o \
+ raxmlancestors.o \
+ fasttree_tree.o \
+ raxmlrebl.o
DESTDIR =
TARGET = prank
@@ -158,15 +164,7 @@ compiler_clean:
####### Compile
writefile.o: writefile.cpp writefile.h \
- ancestralnode.h \
- treenode.h \
- sequence.h \
- site.h \
- intmatrix.h \
- flmatrix.h \
- dbmatrix.h \
- boolmatrix.h \
- ancestralsequence.h
+ ancestralnode.h
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o writefile.o writefile.cpp
treenode.o: treenode.cpp config.h \
@@ -175,14 +173,14 @@ treenode.o: treenode.cpp config.h \
flmatrix.h \
intmatrix.h \
ancestralnode.h \
+ writefile.h \
treenode.h \
sequence.h \
site.h \
boolmatrix.h \
- ancestralsequence.h \
- writefile.h \
hirschberg.h \
phylomatchscore.h \
+ ancestralsequence.h \
terminalsequence.h \
fullprobability.h \
postprobability.h \
@@ -195,12 +193,7 @@ translatesequences.o: translatesequences.cpp translatesequences.h \
dbmatrix.h \
flmatrix.h \
intmatrix.h \
- ancestralnode.h \
- treenode.h \
- sequence.h \
- site.h \
- boolmatrix.h \
- ancestralsequence.h
+ ancestralnode.h
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o translatesequences.o translatesequences.cpp
terminalsequence.o: terminalsequence.cpp terminalsequence.h \
@@ -212,9 +205,7 @@ terminalsequence.o: terminalsequence.cpp terminalsequence.h \
boolmatrix.h \
config.h \
hmmodel.h \
- ancestralnode.h \
- treenode.h \
- ancestralsequence.h
+ ancestralnode.h
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o terminalsequence.o terminalsequence.cpp
terminalnode.o: terminalnode.cpp terminalnode.h \
@@ -228,8 +219,7 @@ terminalnode.o: terminalnode.cpp terminalnode.h \
terminalsequence.h \
config.h \
hmmodel.h \
- ancestralnode.h \
- ancestralsequence.h
+ ancestralnode.h
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o terminalnode.o terminalnode.cpp
site.o: site.cpp site.h \
@@ -238,10 +228,7 @@ site.o: site.cpp site.h \
dbmatrix.h \
boolmatrix.h \
hmmodel.h \
- ancestralnode.h \
- treenode.h \
- sequence.h \
- ancestralsequence.h
+ ancestralnode.h
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o site.o site.cpp
sequence.o: sequence.cpp sequence.h \
@@ -261,7 +248,6 @@ readnewick.o: readnewick.cpp readnewick.h \
dbmatrix.h \
boolmatrix.h \
ancestralnode.h \
- ancestralsequence.h \
node.h
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o readnewick.o readnewick.cpp
@@ -295,11 +281,6 @@ pwhirschberg.o: pwhirschberg.cpp config.h \
flmatrix.h \
intmatrix.h \
ancestralnode.h \
- treenode.h \
- sequence.h \
- site.h \
- boolmatrix.h \
- ancestralsequence.h \
pwhirschberg.h \
pwsite.h \
exonerate_reads.h
@@ -316,7 +297,6 @@ progressivealignment.o: progressivealignment.cpp readnewick.h \
readfile.h \
writefile.h \
ancestralnode.h \
- ancestralsequence.h \
guidetree.h \
progressivealignment.h \
config.h \
@@ -324,12 +304,16 @@ progressivealignment.o: progressivealignment.cpp readnewick.h \
translatesequences.h \
node.h \
mafft_alignment.h \
- hirschberg.h \
+ exonerate_reads.h \
+ bppancestors.h \
+ raxmlancestors.h \
+ fasttree_tree.h \
+ readalignment.h \
phylomatchscore.h \
+ ancestralsequence.h \
terminalsequence.h \
- readalignment.h \
- exonerate_reads.h \
- bppancestors.h
+ hirschberg.h \
+ raxmlrebl.h
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o progressivealignment.o progressivealignment.cpp
prank.o: prank.cpp progressivealignment.h \
@@ -343,7 +327,6 @@ prank.o: prank.cpp progressivealignment.h \
sequence.h \
site.h \
boolmatrix.h \
- ancestralsequence.h \
readfile.h \
writefile.h \
readnewick.h \
@@ -351,6 +334,15 @@ prank.o: prank.cpp progressivealignment.h \
translatesequences.h \
node.h \
mafft_alignment.h \
+ exonerate_reads.h \
+ bppancestors.h \
+ raxmlancestors.h \
+ fasttree_tree.h \
+ readalignment.h \
+ phylomatchscore.h \
+ ancestralsequence.h \
+ terminalsequence.h \
+ hirschberg.h \
check_version.h \
prank.h
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o prank.o prank.cpp
@@ -361,13 +353,12 @@ postprobability.o: postprobability.cpp config.h \
flmatrix.h \
intmatrix.h \
ancestralnode.h \
- treenode.h \
+ postprobability.h \
sequence.h \
site.h \
boolmatrix.h \
- ancestralsequence.h \
- postprobability.h \
phylomatchscore.h \
+ ancestralsequence.h \
terminalsequence.h
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o postprobability.o postprobability.cpp
@@ -377,12 +368,11 @@ phylomatchscore.o: phylomatchscore.cpp config.h \
flmatrix.h \
intmatrix.h \
ancestralnode.h \
- treenode.h \
+ phylomatchscore.h \
sequence.h \
site.h \
boolmatrix.h \
ancestralsequence.h \
- phylomatchscore.h \
terminalsequence.h
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o phylomatchscore.o phylomatchscore.cpp
@@ -397,11 +387,6 @@ hmmodel.o: hmmodel.cpp hmmodel.h \
flmatrix.h \
intmatrix.h \
ancestralnode.h \
- treenode.h \
- sequence.h \
- site.h \
- boolmatrix.h \
- ancestralsequence.h \
eigen.h
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o hmmodel.o hmmodel.cpp
@@ -411,31 +396,26 @@ hirschberg.o: hirschberg.cpp config.h \
flmatrix.h \
intmatrix.h \
ancestralnode.h \
- treenode.h \
+ exonerate_reads.h \
+ hirschberg.h \
sequence.h \
site.h \
boolmatrix.h \
- ancestralsequence.h \
- exonerate_reads.h \
- hirschberg.h \
phylomatchscore.h \
+ ancestralsequence.h \
terminalsequence.h
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o hirschberg.o hirschberg.cpp
guidetree.o: guidetree.cpp guidetree.h \
flmatrix.h \
intmatrix.h \
- pwhirschberg.h \
- pwsite.h \
+ fasttree_tree.h \
config.h \
hmmodel.h \
dbmatrix.h \
ancestralnode.h \
- treenode.h \
- sequence.h \
- site.h \
- boolmatrix.h \
- ancestralsequence.h \
+ pwhirschberg.h \
+ pwsite.h \
translatesequences.h
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o guidetree.o guidetree.cpp
@@ -445,13 +425,12 @@ fullprobability.o: fullprobability.cpp config.h \
flmatrix.h \
intmatrix.h \
ancestralnode.h \
- treenode.h \
+ fullprobability.h \
sequence.h \
site.h \
boolmatrix.h \
- ancestralsequence.h \
- fullprobability.h \
phylomatchscore.h \
+ ancestralsequence.h \
terminalsequence.h
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o fullprobability.o fullprobability.cpp
@@ -470,12 +449,11 @@ characterprobability.o: characterprobability.cpp config.h \
flmatrix.h \
intmatrix.h \
ancestralnode.h \
- treenode.h \
+ characterprobability.h \
sequence.h \
site.h \
boolmatrix.h \
ancestralsequence.h \
- characterprobability.h \
terminalsequence.h
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o characterprobability.o characterprobability.cpp
@@ -491,8 +469,7 @@ ancestralsequence.o: ancestralsequence.cpp ancestralsequence.h \
boolmatrix.h \
config.h \
hmmodel.h \
- ancestralnode.h \
- treenode.h
+ ancestralnode.h
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o ancestralsequence.o ancestralsequence.cpp
ancestralnode.o: ancestralnode.cpp config.h \
@@ -501,14 +478,14 @@ ancestralnode.o: ancestralnode.cpp config.h \
flmatrix.h \
intmatrix.h \
ancestralnode.h \
+ writefile.h \
treenode.h \
sequence.h \
site.h \
boolmatrix.h \
- ancestralsequence.h \
- writefile.h \
hirschberg.h \
phylomatchscore.h \
+ ancestralsequence.h \
terminalsequence.h \
fullprobability.h \
postprobability.h \
@@ -527,11 +504,6 @@ exonerate_reads.o: exonerate_reads.cpp exonerate_reads.h \
flmatrix.h \
intmatrix.h \
ancestralnode.h \
- treenode.h \
- sequence.h \
- site.h \
- boolmatrix.h \
- ancestralsequence.h \
translatesequences.h
$(CXX) -c $(CXXFLAGS) $(INCPATH) -o exonerate_reads.o exonerate_reads.cpp
@@ -541,29 +513,63 @@ mafft_alignment.o: mafft_alignment.cpp mafft_alignment.h \
dbmatrix.h \
flmatrix.h \
intmatrix.h \
+ ancestralnode.h
+ $(CXX) -c $(CXXFLAGS) $(INCPATH) -o mafft_alignment.o mafft_alignment.cpp
+
+bppancestors.o: bppancestors.cpp bppancestors.h \
ancestralnode.h \
+ config.h \
+ hmmodel.h \
+ dbmatrix.h \
+ flmatrix.h \
+ intmatrix.h \
+ readfile.h \
+ readnewick.h \
treenode.h \
sequence.h \
site.h \
- boolmatrix.h \
- ancestralsequence.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o mafft_alignment.o mafft_alignment.cpp
+ boolmatrix.h
+ $(CXX) -c $(CXXFLAGS) $(INCPATH) -o bppancestors.o bppancestors.cpp
-bppancestors.o: bppancestors.cpp bppancestors.h \
+raxmlancestors.o: raxmlancestors.cpp raxmlancestors.h \
ancestralnode.h \
+ config.h \
+ hmmodel.h \
+ dbmatrix.h \
+ flmatrix.h \
+ intmatrix.h \
+ readfile.h \
+ readnewick.h \
treenode.h \
sequence.h \
site.h \
- intmatrix.h \
- flmatrix.h \
+ boolmatrix.h
+ $(CXX) -c $(CXXFLAGS) $(INCPATH) -o raxmlancestors.o raxmlancestors.cpp
+
+fasttree_tree.o: fasttree_tree.cpp fasttree_tree.h \
+ config.h \
+ hmmodel.h \
dbmatrix.h \
- boolmatrix.h \
- ancestralsequence.h \
+ flmatrix.h \
+ intmatrix.h \
+ ancestralnode.h
+ $(CXX) -c $(CXXFLAGS) $(INCPATH) -o fasttree_tree.o fasttree_tree.cpp
+
+raxmlrebl.o: raxmlrebl.cpp raxmlrebl.h \
+ ancestralnode.h \
config.h \
hmmodel.h \
+ dbmatrix.h \
+ flmatrix.h \
+ intmatrix.h \
+ node.h \
readfile.h \
- readnewick.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o bppancestors.o bppancestors.cpp
+ readnewick.h \
+ treenode.h \
+ sequence.h \
+ site.h \
+ boolmatrix.h
+ $(CXX) -c $(CXXFLAGS) $(INCPATH) -o raxmlrebl.o raxmlrebl.cpp
####### Manpages
=====================================
src/Makefile.qt deleted
=====================================
@@ -1,612 +0,0 @@
-#############################################################################
-# Makefile for building: prank
-# Generated by qmake (2.01a) (Qt 4.8.1) on: Wed Jun 27 11:28:15 2012
-# Project: prank.pro
-# Template: app
-# Command: /usr/bin/qmake -o Makefile prank.pro
-#############################################################################
-
-####### Compiler, tools and options
-
-CC = gcc
-CXX = g++
-DEFINES = -DQT_WEBKIT
-CFLAGS = -m64 -pipe -g $(DEFINES)
-CXXFLAGS = -m64 -pipe -g $(DEFINES)
-INCPATH = -I/usr/share/qt4/mkspecs/linux-g++-64 -I. -I/usr/include
-LINK = g++
-LFLAGS = -m64
-LIBS = $(SUBLIBS)
-AR = ar cqs
-RANLIB =
-QMAKE = /usr/bin/qmake
-TAR = tar -cf
-COMPRESS = gzip -9f
-COPY = cp -f
-SED = sed
-COPY_FILE = $(COPY)
-COPY_DIR = $(COPY) -r
-STRIP = strip
-INSTALL_FILE = install -m 644 -p
-INSTALL_DIR = $(COPY_DIR)
-INSTALL_PROGRAM = install -m 755 -p
-DEL_FILE = rm -f
-SYMLINK = ln -f -s
-DEL_DIR = rmdir
-MOVE = mv -f
-CHK_DIR_EXISTS= test -d
-MKDIR = mkdir -p
-
-####### Output directory
-
-OBJECTS_DIR = ./
-
-####### Files
-
-SOURCES = writefile.cpp \
- treenode.cpp \
- translatesequences.cpp \
- terminalsequence.cpp \
- terminalnode.cpp \
- site.cpp \
- sequence.cpp \
- readnewick.cpp \
- readfile.cpp \
- readalignment.cpp \
- pwsite.cpp \
- pwhirschberg.cpp \
- progressivealignment.cpp \
- prank.cpp \
- postprobability.cpp \
- phylomatchscore.cpp \
- node.cpp \
- intmatrix.cpp \
- hmmodel.cpp \
- hirschberg.cpp \
- guidetree.cpp \
- fullprobability.cpp \
- flmatrix.cpp \
- eigen.cpp \
- dbmatrix.cpp \
- characterprobability.cpp \
- boolmatrix.cpp \
- ancestralsequence.cpp \
- ancestralnode.cpp \
- check_version.cpp \
- exonerate_reads.cpp \
- mafft_alignment.cpp
-OBJECTS = writefile.o \
- treenode.o \
- translatesequences.o \
- terminalsequence.o \
- terminalnode.o \
- site.o \
- sequence.o \
- readnewick.o \
- readfile.o \
- readalignment.o \
- pwsite.o \
- pwhirschberg.o \
- progressivealignment.o \
- prank.o \
- postprobability.o \
- phylomatchscore.o \
- node.o \
- intmatrix.o \
- hmmodel.o \
- hirschberg.o \
- guidetree.o \
- fullprobability.o \
- flmatrix.o \
- eigen.o \
- dbmatrix.o \
- characterprobability.o \
- boolmatrix.o \
- ancestralsequence.o \
- ancestralnode.o \
- check_version.o \
- exonerate_reads.o \
- mafft_alignment.o
-DIST = /usr/share/qt4/mkspecs/common/unix.conf \
- /usr/share/qt4/mkspecs/common/linux.conf \
- /usr/share/qt4/mkspecs/common/gcc-base.conf \
- /usr/share/qt4/mkspecs/common/gcc-base-unix.conf \
- /usr/share/qt4/mkspecs/common/g++-base.conf \
- /usr/share/qt4/mkspecs/common/g++-unix.conf \
- /usr/share/qt4/mkspecs/qconfig.pri \
- /usr/share/qt4/mkspecs/modules/qt_phonon.pri \
- /usr/share/qt4/mkspecs/modules/qt_webkit_version.pri \
- /usr/share/qt4/mkspecs/features/qt_functions.prf \
- /usr/share/qt4/mkspecs/features/qt_config.prf \
- /usr/share/qt4/mkspecs/features/exclusive_builds.prf \
- /usr/share/qt4/mkspecs/features/default_pre.prf \
- /usr/share/qt4/mkspecs/features/debug.prf \
- /usr/share/qt4/mkspecs/features/default_post.prf \
- prank.pro
-QMAKE_TARGET = prank
-DESTDIR =
-TARGET = prank
-
-first: all
-####### Implicit rules
-
-.SUFFIXES: .o .c .cpp .cc .cxx .C
-
-.cpp.o:
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o "$@" "$<"
-
-.cc.o:
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o "$@" "$<"
-
-.cxx.o:
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o "$@" "$<"
-
-.C.o:
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o "$@" "$<"
-
-.c.o:
- $(CC) -c $(CFLAGS) $(INCPATH) -o "$@" "$<"
-
-####### Build rules
-
-all: Makefile $(TARGET)
-
-$(TARGET): $(OBJECTS)
- $(LINK) $(LFLAGS) -o $(TARGET) $(OBJECTS) $(OBJCOMP) $(LIBS)
-
-Makefile: prank.pro /usr/share/qt4/mkspecs/linux-g++-64/qmake.conf /usr/share/qt4/mkspecs/common/unix.conf \
- /usr/share/qt4/mkspecs/common/linux.conf \
- /usr/share/qt4/mkspecs/common/gcc-base.conf \
- /usr/share/qt4/mkspecs/common/gcc-base-unix.conf \
- /usr/share/qt4/mkspecs/common/g++-base.conf \
- /usr/share/qt4/mkspecs/common/g++-unix.conf \
- /usr/share/qt4/mkspecs/qconfig.pri \
- /usr/share/qt4/mkspecs/modules/qt_phonon.pri \
- /usr/share/qt4/mkspecs/modules/qt_webkit_version.pri \
- /usr/share/qt4/mkspecs/features/qt_functions.prf \
- /usr/share/qt4/mkspecs/features/qt_config.prf \
- /usr/share/qt4/mkspecs/features/exclusive_builds.prf \
- /usr/share/qt4/mkspecs/features/default_pre.prf \
- /usr/share/qt4/mkspecs/features/debug.prf \
- /usr/share/qt4/mkspecs/features/default_post.prf
- $(QMAKE) -o Makefile prank.pro
-/usr/share/qt4/mkspecs/common/unix.conf:
-/usr/share/qt4/mkspecs/common/linux.conf:
-/usr/share/qt4/mkspecs/common/gcc-base.conf:
-/usr/share/qt4/mkspecs/common/gcc-base-unix.conf:
-/usr/share/qt4/mkspecs/common/g++-base.conf:
-/usr/share/qt4/mkspecs/common/g++-unix.conf:
-/usr/share/qt4/mkspecs/qconfig.pri:
-/usr/share/qt4/mkspecs/modules/qt_phonon.pri:
-/usr/share/qt4/mkspecs/modules/qt_webkit_version.pri:
-/usr/share/qt4/mkspecs/features/qt_functions.prf:
-/usr/share/qt4/mkspecs/features/qt_config.prf:
-/usr/share/qt4/mkspecs/features/exclusive_builds.prf:
-/usr/share/qt4/mkspecs/features/default_pre.prf:
-/usr/share/qt4/mkspecs/features/debug.prf:
-/usr/share/qt4/mkspecs/features/default_post.prf:
-qmake: FORCE
- @$(QMAKE) -o Makefile prank.pro
-
-dist:
- @$(CHK_DIR_EXISTS) .tmp/prank1.0.0 || $(MKDIR) .tmp/prank1.0.0
- $(COPY_FILE) --parents $(SOURCES) $(DIST) .tmp/prank1.0.0/ && (cd `dirname .tmp/prank1.0.0` && $(TAR) prank1.0.0.tar prank1.0.0 && $(COMPRESS) prank1.0.0.tar) && $(MOVE) `dirname .tmp/prank1.0.0`/prank1.0.0.tar.gz . && $(DEL_FILE) -r .tmp/prank1.0.0
-
-
-clean:compiler_clean
- -$(DEL_FILE) $(OBJECTS)
- -$(DEL_FILE) *~ core *.core
-
-
-####### Sub-libraries
-
-distclean: clean
- -$(DEL_FILE) $(TARGET)
- -$(DEL_FILE) Makefile
-
-
-check: first
-
-compiler_clean:
-
-####### Compile
-
-writefile.o: writefile.cpp writefile.h \
- ancestralnode.h \
- treenode.h \
- sequence.h \
- site.h \
- intmatrix.h \
- flmatrix.h \
- dbmatrix.h \
- boolmatrix.h \
- ancestralsequence.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o writefile.o writefile.cpp
-
-treenode.o: treenode.cpp config.h \
- hmmodel.h \
- dbmatrix.h \
- flmatrix.h \
- intmatrix.h \
- ancestralnode.h \
- treenode.h \
- sequence.h \
- site.h \
- boolmatrix.h \
- ancestralsequence.h \
- writefile.h \
- hirschberg.h \
- phylomatchscore.h \
- terminalsequence.h \
- fullprobability.h \
- postprobability.h \
- characterprobability.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o treenode.o treenode.cpp
-
-translatesequences.o: translatesequences.cpp translatesequences.h \
- config.h \
- hmmodel.h \
- dbmatrix.h \
- flmatrix.h \
- intmatrix.h \
- ancestralnode.h \
- treenode.h \
- sequence.h \
- site.h \
- boolmatrix.h \
- ancestralsequence.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o translatesequences.o translatesequences.cpp
-
-terminalsequence.o: terminalsequence.cpp terminalsequence.h \
- sequence.h \
- site.h \
- intmatrix.h \
- flmatrix.h \
- dbmatrix.h \
- boolmatrix.h \
- config.h \
- hmmodel.h \
- ancestralnode.h \
- treenode.h \
- ancestralsequence.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o terminalsequence.o terminalsequence.cpp
-
-terminalnode.o: terminalnode.cpp terminalnode.h \
- treenode.h \
- sequence.h \
- site.h \
- intmatrix.h \
- flmatrix.h \
- dbmatrix.h \
- boolmatrix.h \
- terminalsequence.h \
- config.h \
- hmmodel.h \
- ancestralnode.h \
- ancestralsequence.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o terminalnode.o terminalnode.cpp
-
-site.o: site.cpp site.h \
- intmatrix.h \
- flmatrix.h \
- dbmatrix.h \
- boolmatrix.h \
- hmmodel.h \
- ancestralnode.h \
- treenode.h \
- sequence.h \
- ancestralsequence.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o site.o site.cpp
-
-sequence.o: sequence.cpp sequence.h \
- site.h \
- intmatrix.h \
- flmatrix.h \
- dbmatrix.h \
- boolmatrix.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o sequence.o sequence.cpp
-
-readnewick.o: readnewick.cpp readnewick.h \
- treenode.h \
- sequence.h \
- site.h \
- intmatrix.h \
- flmatrix.h \
- dbmatrix.h \
- boolmatrix.h \
- ancestralnode.h \
- ancestralsequence.h \
- node.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o readnewick.o readnewick.cpp
-
-readfile.o: readfile.cpp readfile.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o readfile.o readfile.cpp
-
-readalignment.o: readalignment.cpp readalignment.h \
- sequence.h \
- site.h \
- intmatrix.h \
- flmatrix.h \
- dbmatrix.h \
- boolmatrix.h \
- treenode.h \
- phylomatchscore.h \
- ancestralsequence.h \
- terminalsequence.h \
- config.h \
- hmmodel.h \
- ancestralnode.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o readalignment.o readalignment.cpp
-
-pwsite.o: pwsite.cpp pwsite.h \
- flmatrix.h \
- intmatrix.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o pwsite.o pwsite.cpp
-
-pwhirschberg.o: pwhirschberg.cpp config.h \
- hmmodel.h \
- dbmatrix.h \
- flmatrix.h \
- intmatrix.h \
- ancestralnode.h \
- treenode.h \
- sequence.h \
- site.h \
- boolmatrix.h \
- ancestralsequence.h \
- pwhirschberg.h \
- pwsite.h \
- exonerate_reads.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o pwhirschberg.o pwhirschberg.cpp
-
-progressivealignment.o: progressivealignment.cpp readnewick.h \
- treenode.h \
- sequence.h \
- site.h \
- intmatrix.h \
- flmatrix.h \
- dbmatrix.h \
- boolmatrix.h \
- readfile.h \
- writefile.h \
- ancestralnode.h \
- ancestralsequence.h \
- guidetree.h \
- progressivealignment.h \
- config.h \
- hmmodel.h \
- translatesequences.h \
- node.h \
- mafft_alignment.h \
- hirschberg.h \
- phylomatchscore.h \
- terminalsequence.h \
- readalignment.h \
- exonerate_reads.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o progressivealignment.o progressivealignment.cpp
-
-prank.o: prank.cpp progressivealignment.h \
- config.h \
- hmmodel.h \
- dbmatrix.h \
- flmatrix.h \
- intmatrix.h \
- ancestralnode.h \
- treenode.h \
- sequence.h \
- site.h \
- boolmatrix.h \
- ancestralsequence.h \
- readfile.h \
- writefile.h \
- readnewick.h \
- guidetree.h \
- translatesequences.h \
- node.h \
- mafft_alignment.h \
- check_version.h \
- prank.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o prank.o prank.cpp
-
-postprobability.o: postprobability.cpp config.h \
- hmmodel.h \
- dbmatrix.h \
- flmatrix.h \
- intmatrix.h \
- ancestralnode.h \
- treenode.h \
- sequence.h \
- site.h \
- boolmatrix.h \
- ancestralsequence.h \
- postprobability.h \
- phylomatchscore.h \
- terminalsequence.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o postprobability.o postprobability.cpp
-
-phylomatchscore.o: phylomatchscore.cpp config.h \
- hmmodel.h \
- dbmatrix.h \
- flmatrix.h \
- intmatrix.h \
- ancestralnode.h \
- treenode.h \
- sequence.h \
- site.h \
- boolmatrix.h \
- ancestralsequence.h \
- phylomatchscore.h \
- terminalsequence.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o phylomatchscore.o phylomatchscore.cpp
-
-node.o: node.cpp node.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o node.o node.cpp
-
-intmatrix.o: intmatrix.cpp intmatrix.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o intmatrix.o intmatrix.cpp
-
-hmmodel.o: hmmodel.cpp hmmodel.h \
- dbmatrix.h \
- flmatrix.h \
- intmatrix.h \
- ancestralnode.h \
- treenode.h \
- sequence.h \
- site.h \
- boolmatrix.h \
- ancestralsequence.h \
- eigen.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o hmmodel.o hmmodel.cpp
-
-hirschberg.o: hirschberg.cpp config.h \
- hmmodel.h \
- dbmatrix.h \
- flmatrix.h \
- intmatrix.h \
- ancestralnode.h \
- treenode.h \
- sequence.h \
- site.h \
- boolmatrix.h \
- ancestralsequence.h \
- exonerate_reads.h \
- hirschberg.h \
- phylomatchscore.h \
- terminalsequence.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o hirschberg.o hirschberg.cpp
-
-guidetree.o: guidetree.cpp guidetree.h \
- flmatrix.h \
- intmatrix.h \
- pwhirschberg.h \
- pwsite.h \
- config.h \
- hmmodel.h \
- dbmatrix.h \
- ancestralnode.h \
- treenode.h \
- sequence.h \
- site.h \
- boolmatrix.h \
- ancestralsequence.h \
- translatesequences.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o guidetree.o guidetree.cpp
-
-fullprobability.o: fullprobability.cpp config.h \
- hmmodel.h \
- dbmatrix.h \
- flmatrix.h \
- intmatrix.h \
- ancestralnode.h \
- treenode.h \
- sequence.h \
- site.h \
- boolmatrix.h \
- ancestralsequence.h \
- fullprobability.h \
- phylomatchscore.h \
- terminalsequence.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o fullprobability.o fullprobability.cpp
-
-flmatrix.o: flmatrix.cpp flmatrix.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o flmatrix.o flmatrix.cpp
-
-eigen.o: eigen.cpp eigen.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o eigen.o eigen.cpp
-
-dbmatrix.o: dbmatrix.cpp dbmatrix.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o dbmatrix.o dbmatrix.cpp
-
-characterprobability.o: characterprobability.cpp config.h \
- hmmodel.h \
- dbmatrix.h \
- flmatrix.h \
- intmatrix.h \
- ancestralnode.h \
- treenode.h \
- sequence.h \
- site.h \
- boolmatrix.h \
- ancestralsequence.h \
- characterprobability.h \
- terminalsequence.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o characterprobability.o characterprobability.cpp
-
-boolmatrix.o: boolmatrix.cpp boolmatrix.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o boolmatrix.o boolmatrix.cpp
-
-ancestralsequence.o: ancestralsequence.cpp ancestralsequence.h \
- sequence.h \
- site.h \
- intmatrix.h \
- flmatrix.h \
- dbmatrix.h \
- boolmatrix.h \
- config.h \
- hmmodel.h \
- ancestralnode.h \
- treenode.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o ancestralsequence.o ancestralsequence.cpp
-
-ancestralnode.o: ancestralnode.cpp config.h \
- hmmodel.h \
- dbmatrix.h \
- flmatrix.h \
- intmatrix.h \
- ancestralnode.h \
- treenode.h \
- sequence.h \
- site.h \
- boolmatrix.h \
- ancestralsequence.h \
- writefile.h \
- hirschberg.h \
- phylomatchscore.h \
- terminalsequence.h \
- fullprobability.h \
- postprobability.h \
- characterprobability.h \
- terminalnode.h \
- readalignment.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o ancestralnode.o ancestralnode.cpp
-
-check_version.o: check_version.cpp check_version.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o check_version.o check_version.cpp
-
-exonerate_reads.o: exonerate_reads.cpp exonerate_reads.h \
- config.h \
- hmmodel.h \
- dbmatrix.h \
- flmatrix.h \
- intmatrix.h \
- ancestralnode.h \
- treenode.h \
- sequence.h \
- site.h \
- boolmatrix.h \
- ancestralsequence.h \
- translatesequences.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o exonerate_reads.o exonerate_reads.cpp
-
-mafft_alignment.o: mafft_alignment.cpp mafft_alignment.h \
- config.h \
- hmmodel.h \
- dbmatrix.h \
- flmatrix.h \
- intmatrix.h \
- ancestralnode.h \
- treenode.h \
- sequence.h \
- site.h \
- boolmatrix.h \
- ancestralsequence.h
- $(CXX) -c $(CXXFLAGS) $(INCPATH) -o mafft_alignment.o mafft_alignment.cpp
-
-####### Install
-
-install: FORCE
-
-uninstall: FORCE
-
-FORCE:
-
=====================================
src/ancestralnode.cpp
=====================================
@@ -696,8 +696,23 @@ bool AncestralNode::readThisNode()
}
if(not success)
{
- cout<<"Reading the alignment file failed. Exiting.\n\n";
- exit(-1);
+ cout<<"\nReading the alignment failed. Trying in log-space.\n";
+
+ LOGVALUES = true;
+ delete pms;
+ pms = new PhyloMatchScore(lChild->getSequence(),rChild->getSequence());
+
+ success = ra->readSeqs(lChild->getSequence(),rChild->getSequence(),pms,this,&path);
+
+ if(not success)
+ {
+ cout<<"Reading the alignment file failed. Exiting.\n\n";
+ exit(-1);
+ }
+
+ LOGVALUES = false;
+ delete pms;
+ pms = new PhyloMatchScore(lChild->getSequence(),rChild->getSequence());
}
@@ -924,6 +939,20 @@ void AncestralNode::getSubtreeBelow(std::string *subtree)
*subtree = rightSubtree+","+leftSubtree;
}
+void AncestralNode::getAllFullSubtreesWithNodename(map<string,string> *subtrees,bool treefirst)
+{
+ getLChild()->getAllFullSubtreesWithNodename(subtrees,treefirst);
+ getRChild()->getAllFullSubtreesWithNodename(subtrees,treefirst);
+
+ string subtree = "";
+ this->getSubtreeBelow(&subtree);
+ if(treefirst)
+ subtrees->insert(make_pair(subtree,this->getNodeName()));
+ else
+ subtrees->insert(make_pair(this->getNodeName(),subtree));
+}
+
+
void AncestralNode::markRealignSubtrees(map<string,float> *subtrees)
{
if(lInternal)
=====================================
src/ancestralnode.h
=====================================
@@ -60,6 +60,7 @@ public:
void getAllSubtrees(std::map<std::string,float> *subtrees);
void getAllSubtreesWithNodename(std::map<std::string,std::string> *subtrees);
+ void getAllFullSubtreesWithNodename(std::map<std::string,std::string> *subtrees, bool treefirst);
void getSubtreeBelow(std::string *subtree);
void markRealignSubtrees(std::map<std::string,float> *subtrees);
=====================================
src/config.h
=====================================
@@ -55,7 +55,7 @@ extern bool MERGE;
extern bool TREESTRING;
extern bool PARTLYALIGNED;
extern bool PREALIGNED;
-extern bool PRINTSCOREONLY;
+extern bool PRINTSCORE;
extern bool UPDATE;
extern bool UPDATESECOND;
extern float updateTolerance;
@@ -67,6 +67,7 @@ extern bool TREEFROMALIGNMENT;
extern bool TREEONLY;
extern bool SCOREMAFFT;
extern bool BPPANCESTORS;
+extern bool MLANCESTORS;
extern int format;
@@ -162,6 +163,8 @@ extern int FBW;
extern bool SKIPINS;
extern bool EXONERATE;
+extern bool FASTTREE;
+extern bool RAXMLREBL;
extern int initialAnchDist;
extern int minAnchDist;
=====================================
src/fasttree_tree.cpp
=====================================
@@ -0,0 +1,170 @@
+/***************************************************************************
+ * Copyright (C) 2010-2014 by Ari Loytynoja *
+ * ari.loytynoja at gmail.com *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
+ ***************************************************************************/
+
+#include "fasttree_tree.h"
+#include <netdb.h>
+#include <cstring>
+#include <cstdio>
+#include <cstdlib>
+#include <unistd.h>
+#include <iostream>
+#include <sstream>
+#include <vector>
+
+using namespace std;
+
+#if defined (__APPLE__)
+#include <mach-o/dyld.h>
+#endif
+
+
+FastTree_tree::FastTree_tree()
+{
+}
+
+bool FastTree_tree::test_executable()
+{
+ #if defined (__CYGWIN__)
+ char path[200] = "";
+ int length = readlink("/proc/self/exe",path,200-1);
+ path[length] = '\0';
+
+ string epath = string(path).substr(0,length);
+ if (epath.find("/")!=std::string::npos)
+ epath = epath.substr(0,epath.rfind("/")+1);
+ progpath = epath;
+ epath = epath+"fasttree >/dev/null 2>/dev/null";
+ int status = system(epath.c_str());
+
+ return WEXITSTATUS(status) == 0;
+
+ # else
+
+ char path[200] = "";
+ string epath;
+
+ #if defined (__APPLE__)
+ uint32_t size = sizeof(path);
+ _NSGetExecutablePath(path, &size);
+ epath = string(path);
+ if (epath.find("/")!=std::string::npos)
+ epath = epath.substr(0,epath.rfind("/")+1);
+
+ #else
+ int length = readlink("/proc/self/exe",path,200-1);
+ path[length] = '\0';
+ epath = string(path).substr(0,length);
+ if (epath.find("/")!=std::string::npos)
+ epath = epath.substr(0,epath.rfind("/")+1);
+
+ #endif
+
+ char hostname[1024];
+ hostname[1023] = '\0';
+ gethostname(hostname, 1023);
+
+ progpath = epath;
+ epath = epath+"fasttree >/dev/null 2>/dev/null";
+ int status = system(epath.c_str());
+ if(WEXITSTATUS(status) == 0)
+ return true;
+
+ if(WEXITSTATUS(status) == 1 && strcmp(hostname, "wasabi2")==0)
+ return true;
+
+ progpath = "";
+ status = system("fasttree >/dev/null 2>/dev/null");
+
+ if(WEXITSTATUS(status) == 1 && strcmp(hostname, "wasabi2")==0)
+ return true;
+
+ return WEXITSTATUS(status) == 0;
+
+ #endif
+}
+
+string FastTree_tree::infer_phylogeny(std::vector<string> *names,std::vector<string> *sequences,bool is_protein)
+{
+
+ stringstream f_name;
+
+ int r = rand();
+ while(true)
+ {
+
+ f_name <<tmp_dir<<"/d"<<r<<".fas";
+ ifstream f_file(f_name.str().c_str());
+
+ if(!f_file)
+ {
+ break;
+ }
+ r = rand();
+ }
+
+ ofstream f_output;
+ f_output.open( f_name.str().c_str(), (ios::out) );
+
+ vector<string>::iterator si = sequences->begin();
+ vector<string>::iterator ni = names->begin();
+ for(;si!=sequences->end();si++,ni++)
+ {
+ f_output<<">"<<*ni<<"\n"<<*si<<"\n";
+ }
+ f_output.close();
+
+ stringstream command;
+ command << progpath<<"fasttree -quiet -nopr -nosupport ";
+ if(is_protein)
+ command << f_name.str() << " 2>/dev/null";
+ else
+ command << "-nt "<<f_name.str() << " 2>/dev/null";
+
+ if(NOISE>0)
+ cout<<"cmd: "<<command.str()<<endl;
+
+ FILE *fpipe;
+ if ( !(fpipe = (FILE*)popen(command.str().c_str(),"r")) )
+ {
+ cout<<"Problems with fasttree pipe.\nExiting.\n";
+ exit(1);
+ }
+
+ char line[256];
+ string tree = "";
+ while ( fgets( line, sizeof line, fpipe))
+ {
+ if(NOISE>1)
+ cout<<"FastTree: "<<line;
+ tree += line;
+ }
+ pclose(fpipe);
+
+ if(NOISE>1)
+ cout<<"FastTree_tree: "<<tree<<"\n";
+
+
+ //delete files
+ if( remove( f_name.str().c_str() ) != 0 )
+ perror("Error deleting temporary file in FastTree_tree::infer_phylogeny");
+
+ return tree;
+}
+
=====================================
src/fasttree_tree.h
=====================================
@@ -0,0 +1,23 @@
+#ifndef FASTTREE_TREE_H
+#define FASTTREE_TREE_H
+
+#include <fstream>
+#include <string>
+#include <vector>
+#include <sys/stat.h>
+#include "config.h"
+
+using namespace std;
+
+
+class FastTree_tree
+{
+ string progpath;
+
+public:
+ FastTree_tree();
+ bool test_executable();
+ string infer_phylogeny(std::vector<string> *names, std::vector<string> *sequences, bool is_protein);
+};
+
+#endif // FASTTREE_TREE_H
=====================================
src/guidetree.cpp
=====================================
@@ -20,6 +20,7 @@
#include <cstdio>
#include "guidetree.h"
+#include "fasttree_tree.h"
#include "pwhirschberg.h"
#include "pwsite.h"
#include "config.h"
@@ -31,6 +32,7 @@ using namespace std;
void GuideTree::computeTree(vector<string>* sequences,vector<string>* names,IntMatrix* substScores)
{
bool isDna = (substScores->X()<=5);
+
int ns = sequences->size();
string full_alphabet = "ARNDCQEGHILKMFPSTWYVX";
@@ -301,6 +303,17 @@ void GuideTree::computeTree(vector<string>* seqs,vector<string>* names,bool isDn
return;
}
+ if(FASTTREE)
+ {
+ FastTree_tree ft;
+ if(ft.test_executable())
+ {
+ this->tree = ft.infer_phylogeny(names,seqs,!isDna);
+ return;
+ }
+ }
+
+
int ns = seqs->size();
FlMatrix* distance = new FlMatrix(ns,ns,"pw distances");
distance->initialise(0);
=====================================
src/node.cpp
=====================================
@@ -28,9 +28,6 @@
using namespace std;
-std::string mpTree;
-float halfLength;
-float maxSpan;
extern float defaultBranchLength;
Node::~Node()
@@ -61,8 +58,17 @@ Node::~Node()
int Node::count = 1;
-Node::Node(string t)
+int Node::warned = 0;
+
+bool Node::warnings = true;
+
+std::string Node::mpTree = "";
+
+Node::Node(string t, bool root)
{
+
+ warnings = root;
+
mpTree = "";
maxLength = 0.0;
maxSpan = 0.0;
@@ -83,6 +89,8 @@ Node::Node(string t)
tree = t;
+ warned = false;
+
for (unsigned int i = 0; i < tree.size(); i++)
{
if(tree[i] == ' ')
@@ -99,6 +107,9 @@ Node::Node(string t)
tree.erase(start,stop-start+1);
}
+ checkBifurcation(&tree);
+
+// cout<<"tree "<<tree<<endl;
divideTree(tree,subTrees,subDistances);
subDistances[0] = abs(subDistances[0]);
@@ -111,10 +122,12 @@ Node::Node(string t)
revTrees[0] = subTrees[1]+":"+num;
revTrees[1] = subTrees[0]+":"+num;
+ reverseTree = "";
child0 = new Node(subTrees[0],this,0);
+ child0->set_distance_to_parent(subDistances[0]);
child1 = new Node(subTrees[1],this,1);
-
+ child1->set_distance_to_parent(subDistances[1]);
float currPair = subDistances[0]+child0->maxLength+subDistances[1]+child1->maxLength;
if (currPair > maxSpan)
@@ -122,9 +135,15 @@ Node::Node(string t)
maxSpan = currPair;
}
+ dist_to_parent = 0;
- findMiddlePoint();
+ if(root)
+ findMiddlePoint();
+ else
+ mpTree = "("+child0->tree+":"+num+","+child1->tree+":"+num+");";
+// cout<<child0->tree<<"\n"<<child1->tree<<"\n"<<num<<"\n\n";
+// cout<<"mp "<<mpTree<<endl;
}
string Node::rootedTree()
@@ -159,12 +178,50 @@ void Node::checkBifurcation(string *t)
if (comma!=open || comma!=end)
{
+ if(warnings && warned==0)
+ {
cout<<"Correcting (arbitrarily) for multifurcating nodes."<<endl;
+ warned++;
+ }
+
+ string tmp = *t;
+ if(tmp.at(0)=='(')
+ tmp.erase(0,1);
+ while(tmp.at(tmp.length()-1)!=')')
+ tmp.erase(tmp.length()-1);
+ if(tmp.at(tmp.length()-1)==')')
+ tmp.erase(tmp.length()-1);
- if(open==end+1 && open == comma)
+// cout<<open<<" "<<end<<" "<<comma<<" "<<tmp<<endl;
+ vector<string> strees;
+
+ int po=0;
+ string fr = "";
+ for(int i=0;i<tmp.length();i++)
+ {
+ if(tmp.at(i)=='(')
+ po++;
+ if(tmp.at(i)==')')
+ po--;
+ if(tmp.at(i)==',' && po==0)
{
- t->append(":0.0)");
+ strees.push_back(fr);
+
+ fr = "";
+ continue;
}
+ fr += tmp.at(i);
+ }
+ strees.push_back(fr);
+
+ string str = strees.at(0);
+ for(int i=1;i<strees.size()-1;i++)
+ {
+ str = "("+str+","+strees.at(i)+"):0.0";
+ }
+ str = "("+str+","+strees.at(strees.size()-1)+")";
+
+ *t = str;
}
}
@@ -205,11 +262,14 @@ Node::Node(string t,Node* p,int branch)
char num1[10];
sprintf(num1,"%.5f",subDistances[1]);
- revTrees[0] = "("+parent->revTrees[branch]+","+subTrees[1]+":"+num1+"):"+num0;
- revTrees[1] = "("+parent->revTrees[branch]+","+subTrees[0]+":"+num0+"):"+num1;
+ revTrees[0] = "("+subTrees[1]+":"+num1+","+parent->revTrees[branch]+"):"+num0;
+ revTrees[1] = "("+subTrees[0]+":"+num0+","+parent->revTrees[branch]+"):"+num1;
+ reverseTree = parent->revTrees[branch];
child0 = new Node(subTrees[0],this,0);
+ child0->set_distance_to_parent(subDistances[0]);
child1 = new Node(subTrees[1],this,1);
+ child1->set_distance_to_parent(subDistances[1]);
float currPair = subDistances[0]+child0->maxLength+subDistances[1]+child1->maxLength;
@@ -651,3 +711,25 @@ void Node::prune_up()
}
// cout<<"prune up out "<<this->get_name()<<endl;
}
+
+string Node::markNodes()
+{
+ if (this->is_leaf())
+ {
+ namehash = hash(name.c_str());
+ return this->name;
+ }
+ string name0 = child0->markNodes();
+ string name1 = child1->markNodes();
+
+ string namef = name0+name1;
+ if(name0>name1)
+ namef = name1+name0;
+
+ namehash = hash(namef.c_str());
+
+// cout<<name<<" "<<namef<<" "<<namehash<<endl;
+
+ return namef;
+}
+
=====================================
src/node.h
=====================================
@@ -26,8 +26,10 @@
#include <string>
#include <vector>
+#include <map>
#include <iostream>
#include <sstream>
+#include <stdlib.h>
using namespace std;
class Node
@@ -36,6 +38,8 @@ class Node
std::string subTrees[2];
std::string revTrees[2];
+ std::string reverseTree;
+
Node* parent;
Node* child0;
Node* child1;
@@ -110,6 +114,8 @@ class Node
return name;
}
+ int namehash;
+
double dist_to_parent;
void set_distance_to_parent(double d)
{
@@ -147,27 +153,222 @@ class Node
if (!is_leaf())
{
stringstream ss;
- ss<<"("<<child0->print_subtree()<<","<<child1->print_subtree()<<"):"<<dist_to_parent;
+ char num[10];
+ sprintf(num,"%.5f",dist_to_parent);
+ ss<<"("<<child0->print_subtree()<<","<<child1->print_subtree()<<"):"<<num; //dist_to_parent;
return ss.str();
}
else
{
stringstream ss;
- ss<<tree<<":"<<dist_to_parent;
+ char num[10];
+ sprintf(num,"%.5f",dist_to_parent);
+ ss<<tree<<":"<<num; //dist_to_parent;
return ss.str();
}
}
+// int hash(string s)
+// {
+
+// int hash = 0;
+// int offset = 'a' - 1;
+// for(string::const_iterator it=s.begin(); it!=s.end(); ++it) {
+// hash = hash << 1 | (*it - offset);
+// }
+// return hash;
+// }
+
+ unsigned int hash(const char* s,unsigned int seed = 0)
+ {
+ unsigned int hash = seed;
+ while (*s)
+ {
+ hash = hash * 101 + *s++;
+ }
+ return hash;
+ }
+
+ float halfLength;
+ float maxSpan;
+
+ static bool warnings;
+
+ static std::string mpTree;
static int count;
+ static int warned;
public:
Node();
~Node();
- Node(std::string t);
+ Node(std::string t, bool root = true);
std::string rootedTree();
+
void mark_sequences(vector<string> *names);
+ string markNodes();
+
+ int findMarkedNode(int h0, int h1) {
+ if (is_leaf())
+ return 0;
+
+// cout<<name<<" "<<namehash<<endl;
+ if(this->namehash == h0 || this->namehash == h1)
+ {
+// cout<<name<<" "<<namehash<<endl;
+ return this->namehash;
+ }
+ else
+ {
+ int f0 = child0->findMarkedNode(h0,h1);
+ if(f0 == h0 || f0 == h1)
+ return f0;
+ else
+ return child1->findMarkedNode(h0,h1);
+ }
+ }
+
+ bool rootAtMarkedNode(int h0, int h1) {
+ if (is_leaf())
+ return false;
+
+// cout<<"root "<<name<<" "<<namehash<<endl;
+
+ if(this->namehash == h1 || this->namehash == h0)
+ {
+ string rt = this->reverseTree;
+ int pos = rt.find_last_of(':');
+ if(pos != string::npos)
+ {
+// cout<<"\nb1 "<<dist_to_parent<<endl;
+// cout<<"\nf "<<this->tree<<endl;
+// cout<<"r "<<this->reverseTree<<endl<<endl;
+
+ string num = rt.substr(pos+1);
+ rt = rt.substr(0,pos);
+ stringstream ss(num);
+ float f;
+ ss >> f;
+ f/=2;
+ stringstream fs;
+ fs << f;
+ mpTree = "("+this->tree+":"+fs.str()+","+rt+":"+fs.str()+");";
+ }
+ else
+ {
+ cout<<"\nb "<<dist_to_parent<<endl;
+ cout<<"\nf "<<this->tree<<endl;
+ cout<<"r "<<this->reverseTree<<endl<<endl;
+
+ mpTree = "("+this->tree+":0,"+this->reverseTree+");";
+ }
+ return true;
+ }
+// else if(this->namehash == h0)
+// {
+// string rt = this->reverseTree;
+// int pos = rt.find_last_of(':');
+// if(pos != string::npos)
+// {
+// cout<<"\nb2 "<<dist_to_parent<<endl;
+// cout<<"\nf "<<this->tree<<endl;
+// cout<<"r "<<this->reverseTree<<endl<<endl;
+
+// string num = rt.substr(pos+1);
+// rt = rt.substr(0,pos);
+// stringstream ss(num);
+// float f;
+// ss >> f;
+// f/=2;
+// stringstream fs;
+// fs << f;
+// mpTree = "("+rt+":"+fs.str()+","+this->tree+":"+fs.str()+");";
+// }
+// else
+// {
+// cout<<"\nb "<<dist_to_parent<<endl;
+// cout<<"\nf "<<this->tree<<endl;
+// cout<<"r "<<this->reverseTree<<endl<<endl;
+
+// mpTree = "("+this->reverseTree+","+this->tree+":0);";
+// }
+// return true;
+// }
+
+
+/*
+ if(child0->namehash == h0 || child0->namehash == h1)
+ {
+ halfLength = maxSpan/2;
+
+// cout<<"root "<<child0->name<<" "<<child0->namehash<<endl;
+
+ float b0 = halfLength-child0->maxLength;
+ if(b0<0)
+ b0=0.001;
+ float b1 = subDistances[0]-b0;
+
+ char num0[10];
+ sprintf(num0,"%.5f",b0);
+ char num1[10];
+ sprintf(num1,"%.5f",b1);
+
+ char num[10];
+ sprintf(num,"%.5f",subDistances[1]);
+
+ mpTree = "("+child0->tree+":"+num0+",("+subTrees[1]+":"+num+","+parent->revTrees[0]+"):"+num1+");";
+
+ cout<<"\n\n1a: "<<child0->tree<<":"<<num0<<endl
+ <<"2a: "
+ <<subTrees[1]<<":"<<num<<endl
+ <<"3a: "
+ <<parent->revTrees[0]<<endl
+ <<endl;
+
+ cout<<mpTree<<endl<<endl;
+
+ return true;
+ }
+ else if(child1->namehash == h0 || child1->namehash == h1)
+ {
+ halfLength = maxSpan/2;
+
+// cout<<"root "<<child1->name<<" "<<child1->namehash<<endl;
+
+ float b0 = halfLength-child1->maxLength;
+ if(b0<0)
+ b0=0.001;
+ float b1 = subDistances[1]-b0;
+
+ char num0[10];
+ sprintf(num0,"%.5f",b0);
+ char num1[10];
+ sprintf(num1,"%.5f",b1);
+
+ char num[10];
+ sprintf(num,"%.5f",subDistances[0]);
+
+ int b = 0;
+ if(parent->child1->name == this->name)
+ b = 1;
+ mpTree = "(("+parent->revTrees[b]+","+subTrees[0]+":"+num+"):"+num1+","+child1->tree+":"+num0+");";
+// cout<<"\n\n1b: "<<child1->tree<<":"<<num0<<endl<<"2b: "<<parent->revTrees[b]<<endl<<"3b: "<<subTrees[0]<<":"<<num<<endl<<endl;
+
+// cout<<mpTree<<endl;
+ return true;
+
+ }
+*/
+ else
+ {
+ if(child0->rootAtMarkedNode(h0,h1))
+ return true;
+ else
+ return child1->rootAtMarkedNode(h0,h1);
+ }
+ }
+
void prune_tree()
{
this->prune_down();
@@ -244,6 +445,68 @@ public:
return "";
}
}
+
+ void getRootNamehashes(int *hash0,int *hash1)
+ {
+ *hash0 = child0->namehash;
+ *hash1 = child1->namehash;
+ }
+
+ void getNamehashes(vector<int> *h)
+ {
+ if(!is_leaf())
+ {
+ child0->getNamehashes(h);
+ child1->getNamehashes(h);
+ }
+ h->push_back(namehash);
+ return;
+ }
+
+ void getNameNamehashes(map<string,int> *h)
+ {
+ if(!is_leaf())
+ {
+ child0->getNameNamehashes(h);
+ child1->getNameNamehashes(h);
+ }
+ h->insert(make_pair(name,namehash));
+ return;
+ }
+
+ void getNamehashNames(map<int,string> *h)
+ {
+ if(!is_leaf())
+ {
+ child0->getNamehashNames(h);
+ child1->getNamehashNames(h);
+ }
+ h->insert(make_pair(namehash,name));
+ return;
+ }
+
+ void getNamehashesDistance(map<int,float> *h)
+ {
+ if(!is_leaf())
+ {
+ child0->getNamehashesDistance(h);
+ child1->getNamehashesDistance(h);
+ }
+
+ float dist;
+ char num[10];
+ sprintf(num,"%.5f",dist_to_parent);
+ dist = strtof(num, NULL);
+ h->insert(make_pair(namehash,dist));
+ return;
+ }
+
+ int getNodeNumber()
+ {
+ if(!is_leaf())
+ return child0->getNodeNumber()+child1->getNodeNumber()+1;
+ return 1;
+ }
};
=====================================
src/prank.1 deleted
=====================================
@@ -1,230 +0,0 @@
-.\" Automatically generated by Pod::Man 2.28 (Pod::Simple 3.29)
-.\"
-.\" Standard preamble:
-.\" ========================================================================
-.de Sp \" Vertical space (when we can't use .PP)
-.if t .sp .5v
-.if n .sp
-..
-.de Vb \" Begin verbatim text
-.ft CW
-.nf
-.ne \\$1
-..
-.de Ve \" End verbatim text
-.ft R
-.fi
-..
-.\" Set up some character translations and predefined strings. \*(-- will
-.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
-.\" double quote, and \*(R" will give a right double quote. \*(C+ will
-.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
-.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
-.\" nothing in troff, for use with C<>.
-.tr \(*W-
-.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
-.ie n \{\
-. ds -- \(*W-
-. ds PI pi
-. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
-. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
-. ds L" ""
-. ds R" ""
-. ds C` ""
-. ds C' ""
-'br\}
-.el\{\
-. ds -- \|\(em\|
-. ds PI \(*p
-. ds L" ``
-. ds R" ''
-. ds C`
-. ds C'
-'br\}
-.\"
-.\" Escape single quotes in literal strings from groff's Unicode transform.
-.ie \n(.g .ds Aq \(aq
-.el .ds Aq '
-.\"
-.\" If the F register is turned on, we'll generate index entries on stderr for
-.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
-.\" entries marked with X<> in POD. Of course, you'll have to process the
-.\" output yourself in some meaningful fashion.
-.\"
-.\" Avoid warning from groff about undefined register 'F'.
-.de IX
-..
-.nr rF 0
-.if \n(.g .if rF .nr rF 1
-.if (\n(rF:(\n(.g==0)) \{
-. if \nF \{
-. de IX
-. tm Index:\\$1\t\\n%\t"\\$2"
-..
-. if !\nF==2 \{
-. nr % 0
-. nr F 2
-. \}
-. \}
-.\}
-.rr rF
-.\" ========================================================================
-.\"
-.IX Title "PRANK 1"
-.TH PRANK 1 "2017-04-27" "v.121211" "The Probabilistic Alignment Kit"
-.\" For nroff, turn off justification. Always turn off hyphenation; it makes
-.\" way too many mistakes in technical documents.
-.if n .ad l
-.nh
-.SH "NAME"
-prank \- Computes probabilistic multiple sequence alignments
-.SH "SYNOPSIS"
-.IX Header "SYNOPSIS"
-\&\fBprank\fR \fIsequence_file\fR
-.PP
-\&\fBprank\fR [optional parameters] \-d=\fIsequence_file\fR [optional parameters]
-.SH "DESCRIPTION"
-.IX Header "DESCRIPTION"
-The Probabilistic Alignment Kit (\s-1PRANK\s0) is a probabilistic multiple alignment
-program for \s-1DNA,\s0 codon and amino-acid sequences. It's based on a novel algorithm
-that treats insertions correctly and avoids over-estimation of the number of
-deletion events.
-.PP
-In addition, \s-1PRANK\s0 borrows ideas from maximum likelihood methods used in
-phylogenetics and correctly takes into account the evolutionary distances
-between sequences. Lastly, \s-1PRANK\s0 allows for defining a potential structure for
-sequences to be aligned and then, simultaneously with the alignment, predicts
-the locations of structural units in the sequences.
-.SH "OPTIONS"
-.IX Header "OPTIONS"
-.SS "\s-1INPUT/OUTPUT PARAMETERS\s0"
-.IX Subsection "INPUT/OUTPUT PARAMETERS"
-.IP "\fB\-d=\f(BIsequence_file\fB\fR" 8
-.IX Item "-d=sequence_file"
-The input sequence file in \s-1FASTA\s0 format.
-.IP "\fB\-t=\f(BItree_file\fB\fR" 8
-.IX Item "-t=tree_file"
-The tree file to use. If unset, an appriximated \s-1NJ\s0 tree is generated.
-.IP "\fB\-o=\f(BIoutput_file\fB\fR" 8
-.IX Item "-o=output_file"
-Set the name of the output file. If unset, \fIoutput_file\fR is set to \fBoutput\fR.
-.IP "\fB\-f=\f(BIoutput_format\fB\fR" 8
-.IX Item "-f=output_format"
-Set the output format. \fIoutput_format\fR can be one of \fBfasta\fR (default),
-\&\fBphylipi\fR, \fBphylips\fR, \fBpaml\fR, or \fBnexus\fR.
-.IP "\fB\-m=\f(BImodel_file\fB\fR" 8
-.IX Item "-m=model_file"
-The model file to use. If unset, \fImodel_file\fR is set to \fB\s-1HKY2/WAG\s0\fR.
-.IP "\fB\-support\fR" 8
-.IX Item "-support"
-Compute posterior support.
-.IP "\fB\-showxml\fR" 8
-.IX Item "-showxml"
-Output alignment xml-file.
-.IP "\fB\-showtree\fR" 8
-.IX Item "-showtree"
-Output alignment guidetree.
-.IP "\fB\-showanc\fR" 8
-.IX Item "-showanc"
-Output ancestral sequences.
-.IP "\fB\-showall\fR" 8
-.IX Item "-showall"
-Output all of these.
-.IP "\fB\-noanchors\fR" 8
-.IX Item "-noanchors"
-Do not use Exonerate anchoring. (Exonerate to be installed separately.)
-.IP "\fB\-nomafft\fR" 8
-.IX Item "-nomafft"
-Do not use \s-1MAFFT\s0 for guide tree. (\s-1MAFFT\s0 to be installed separately.)
-.IP "\fB\-njtree\fR" 8
-.IX Item "-njtree"
-Estimate tree from an input alignment (and realign).
-.IP "\fB\-shortnames\fR" 8
-.IX Item "-shortnames"
-Truncate names at first space character.
-.IP "\fB\-quiet\fR" 8
-.IX Item "-quiet"
-Reduce output.
-.SS "\s-1ALIGNMENT MERGE\s0"
-.IX Subsection "ALIGNMENT MERGE"
-.IP "\fB\-d1=\f(BIalignment_file\fB\fR" 8
-.IX Item "-d1=alignment_file"
-The first input alignment file in \s-1FASTA\s0 format.
-.IP "\fB\-d2=\f(BIalignment_file\fB\fR" 8
-.IX Item "-d2=alignment_file"
-The second input alignment file in \s-1FASTA\s0 format.
-.IP "\fB\-t1=\f(BItree_file\fB\fR" 8
-.IX Item "-t1=tree_file"
-The tree file for the first alignment. If unset, an appriximated \s-1NJ\s0 tree is generated.
-.IP "\fB\-t2=\f(BItree_file\fB\fR" 8
-.IX Item "-t2=tree_file"
-The tree file for the second alignment. If unset, an appriximated \s-1NJ\s0 tree is generated.
-.SS "\s-1MODEL PARAMETERS\s0"
-.IX Subsection "MODEL PARAMETERS"
-.IP "\fB\-F\fR, \fB+F\fR" 8
-.IX Item "-F, +F"
-Force insertions to be always skipped.
-.IP "\fB\-gaprate=\f(BI#\fB\fR" 8
-.IX Item "-gaprate=#"
-Set the gap opening rate. The default is \fB0.025\fR for \s-1DNA\s0 and \fB0.005\fR for
-proteins.
-.IP "\fB\-gapext=\f(BI#\fB\fR" 8
-.IX Item "-gapext=#"
-Set the gap extension probability. The default is \fB0.75\fR for \s-1DNA\s0 and \fB0.5\fR for
-proteins.
-.IP "\fB\-codon\fR" 8
-.IX Item "-codon"
-Use empirical codon model for coding \s-1DNA.\s0
-.IP "\fB\-DNA\fR, \fB\-protein\fR" 8
-.IX Item "-DNA, -protein"
-Use \s-1DNA\s0 or protein model, respectively. Disables auto-detection of model.
-.IP "\fB\-termgap\fR" 8
-.IX Item "-termgap"
-Penalise terminal gaps normally.
-.IP "\fB\-nomissing\fR" 8
-.IX Item "-nomissing"
-No missing data. Use \fB\-F\fR for terminal gaps.
-.IP "\fB\-keep\fR" 8
-.IX Item "-keep"
-Do not remove gaps from pre-aligned sequences.
-.SS "\s-1OTHER PARAMETERS\s0"
-.IX Subsection "OTHER PARAMETERS"
-.IP "\fB\-iterate=#\fR" 8
-.IX Item "-iterate=#"
-Rounds of re-alignment iteration; by default, iterate five times and keep the best result.
-.IP "\fB\-once\fR" 8
-.IX Item "-once"
-Run only once. Same as \-iterate=1.
-.IP "\fB\-prunetree\fR" 8
-.IX Item "-prunetree"
-Prune guide tree branches with no sequence data.
-.IP "\fB\-prunedata\fR" 8
-.IX Item "-prunedata"
-Prune sequence data with no guide tree leaves.
-.IP "\fB\-uselogs\fR" 8
-.IX Item "-uselogs"
-Slower but should work for a greater number of sequences.
-.IP "\fB\-translate\fR" 8
-.IX Item "-translate"
-Translate input data to protein sequences.
-.IP "\fB\-mttranslate\fR" 8
-.IX Item "-mttranslate"
-Translate input data to protein sequencess using mt table.
-.IP "\fB\-convert\fR" 8
-.IX Item "-convert"
-Do not align, just convert to a different format.
-.IP "\fB\-dna=\f(BIdna_sequence_file\fB\fR" 8
-.IX Item "-dna=dna_sequence_file"
-\&\s-1DNA\s0 sequence file for backtranslation of protein alignment.
-.IP "\fB\-help\fR" 8
-.IX Item "-help"
-Show an extended help page with more options.
-.IP "\fB\-version\fR" 8
-.IX Item "-version"
-Show version and check for updates.
-.SH "AUTHORS"
-.IX Header "AUTHORS"
-\&\fBprank\fR was written by Ari Loytynoja.
-.PP
-This manual page was originally written by Manuel Prinz <manuel at debian.org> for
-the Debian project (and may be used by others).
=====================================
src/prank.cpp
=====================================
@@ -18,6 +18,7 @@
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
***************************************************************************/
+#include <limits.h>
#include <cstdlib>
#include <cmath>
#include <fstream>
@@ -35,48 +36,52 @@ using namespace std;
char tmp_dir[1000] = "";
bool verbose = false;
-//main
int main(int argc, char *argv[])
{
- version = 170427;
+ version = 170703;
readArguments(argc, argv);
int time1 = time(0);
// character array for assigning system tmp path
- char* tmpPath = NULL;
+ char* tmpPath = NULL;
//mkdtemp template
const char* mktemplate = "tmpdirprankmsaXXXXXX";
- //create temporary directory
- //get tmpPath from environment or assign tmpPath to NULL (if last getenv fails)
- if( (tmpPath = getenv("TMPDIR")) == NULL)
- {
- if( (tmpPath = getenv("TMP")) == NULL)
- {
- if( (tmpPath = getenv("TEMPDIR")) == NULL)
- {
- tmpPath = getenv("TEMP");
- }
- }
- }
-
- // define temp dir based on whether tmpPath was found
- if(tmpPath == NULL)
- {
- sprintf(tmp_dir, "%s", mktemplate);
- }
- else
- {
- sprintf(tmp_dir, "%s/%s", tmpPath, mktemplate);
- }
-
- // call mkdtemp
- if( (mkdtemp(tmp_dir) == NULL) )
- {
- perror("'mkdtemp' failed to generate temporary directory while creating ProgressiveAlignbment object.\n");
- exit(EXIT_FAILURE);
- }
+ //create temporary directory
+ //get tmpPath from environment or assign tmpPath to NULL (if last getenv fails)
+ if( (tmpPath = getenv("TMPDIR")) == NULL)
+ {
+ if( (tmpPath = getenv("TMP")) == NULL)
+ {
+ if( (tmpPath = getenv("TEMPDIR")) == NULL)
+ {
+ tmpPath = getenv("TEMP");
+ }
+ }
+ }
+
+ // define temp dir based on whether tmpPath was found
+ if(tmpPath == NULL)
+ {
+ sprintf(tmp_dir, "%s", mktemplate);
+ }
+ else
+ {
+ sprintf(tmp_dir, "%s/%s", tmpPath, mktemplate);
+ }
+
+ // call mkdtemp
+ if( (mkdtemp(tmp_dir) == NULL) )
+ {
+ perror("'mkdtemp' failed to generate temporary directory while creating ProgressiveAlignbment object.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ char *abspath;
+ abspath = realpath(tmp_dir, NULL);
+ strncpy(tmp_dir,abspath,1000);
+ free(abspath);
ProgressiveAlignment* pa = new ProgressiveAlignment(treefile,seqfile,dnafile);
@@ -160,13 +165,11 @@ void readArguments(int argc, char *argv[])
exit(0);
}
-
- else if (s=="-verbose")
+ else if (s=="-verbose")
{
- verbose = true;
+ verbose = true;
}
-
/********* input/output: **********/
// sequence data file
@@ -247,13 +250,11 @@ void readArguments(int argc, char *argv[])
else if (s=="-keep")
{
PREALIGNED = true;
- PRINTSCOREONLY = false;
}
else if (s=="-score")
{
- PREALIGNED = true;
- PRINTSCOREONLY = true;
+ PRINTSCORE = true;
}
else if (s=="-update")
@@ -317,6 +318,12 @@ void readArguments(int argc, char *argv[])
BPPANCESTORS = false;
}
+ // do not estimate ancestors with raxml
+ else if (s=="-nomlanc")
+ {
+ MLANCESTORS = false;
+ }
+
// output alignment format
else if (s.substr(0,3)=="-f=")
{
@@ -755,6 +762,20 @@ void readArguments(int argc, char *argv[])
cout<<endl<<"unaccepted combination: -F -gapanch; disabling -gapanch"<<endl;
}
+ /********* technical: guide tree **********/
+
+ // use FastTree for guidetree computation
+ else if (s=="-nofasttree")
+ {
+ FASTTREE = false;
+ }
+
+ // use FastTree for guidetree computation
+ else if (s=="-raxmlrebl")
+ {
+ RAXMLREBL = true;
+ }
+
/********* technical: memory & speed efficiency **********/
// matrix resize factor
@@ -832,8 +853,11 @@ void printHelp(bool complete)
cout<<" -showall [output all of these]"<<endl;
if (complete)
{
+ cout<<" -raxmlrebl [recompute branch lengths for prealigned data]"<<endl;
+ cout<<" -nomafft [no MAFFT initial alignment]"<<endl;
+ cout<<" -nofasttree [no FastTree guidetree]"<<endl;
cout<<" -noanchors [no Exonerate anchoring]"<<endl;
- cout<<" -nomafft [no MAFFT guide tree]"<<endl;
+ cout<<" -nomlanc [no RAxML ancestors]"<<endl;
cout<<" -scoremafft [score also MAFFT alignment]"<<endl;
}
cout<<" -support [compute posterior support]"<<endl;
@@ -900,7 +924,6 @@ void printHelp(bool complete)
if (complete)
cout<<" -dna=dna_sequence_file [DNA sequence file for backtranslation of protein alignment]"<<endl;
cout<<" -version [check for updates]"<<endl;
- cout<<" -verbose [print progress etc. during runtime]"<<endl;
cout<<"\n -help [show more options]"<<endl;
cout<<""<<endl;
=====================================
src/prank.h
=====================================
@@ -69,7 +69,7 @@ bool TREESTRING = false;
bool PARTLYALIGNED = false;
bool PREALIGNED = false;
-bool PRINTSCOREONLY = false;
+bool PRINTSCORE = false;
bool UPDATE = false;
bool UPDATESECOND = true;
@@ -92,7 +92,8 @@ bool TREEONLY = false;
// compute score for mafft alignment
bool SCOREMAFFT = false;
-// estimate ancestors with bppancestors
+// estimate ancestors with raxml / bppancestors
+bool MLANCESTORS = true;
bool BPPANCESTORS = true;
// merge two alignments
@@ -306,6 +307,12 @@ int missingLimit = 1000;
// skip gaps in anchoring ancestral seqs (?)
bool SKIPGAPANCH = true;
+/********* technical: guide tree & branch lengths **********/
+
+bool FASTTREE = true;
+
+bool RAXMLREBL = false;
+
/********* technical: memory & speed efficiency **********/
// matrix resize factor
=====================================
src/prank.pro deleted
=====================================
@@ -1,99 +0,0 @@
-# -------------------------------------------------
-# Project created by QtCreator 2010-06-28T10:03:12
-# -------------------------------------------------
-QT -= core \
- gui \
- webkit
-TARGET = prank
-CONFIG = debug
-
-#CONFIG += console
-#CONFIG -= app_bundle
-TEMPLATE = app
-SOURCES += writefile.cpp \
- treenode.cpp \
- translatesequences.cpp \
- terminalsequence.cpp \
- terminalnode.cpp \
- site.cpp \
- sequence.cpp \
- readnewick.cpp \
- readfile.cpp \
- readalignment.cpp \
- pwsite.cpp \
- pwhirschberg.cpp \
- progressivealignment.cpp \
- prank.cpp \
- postprobability.cpp \
- phylomatchscore.cpp \
- node.cpp \
- intmatrix.cpp \
- hmmodel.cpp \
- hirschberg.cpp \
- guidetree.cpp \
- fullprobability.cpp \
- flmatrix.cpp \
- eigen.cpp \
- dbmatrix.cpp \
- characterprobability.cpp \
- boolmatrix.cpp \
- ancestralsequence.cpp \
- ancestralnode.cpp \
- check_version.cpp \
- exonerate_reads.cpp \
- mafft_alignment.cpp \
- bppancestors.cpp
-OTHER_FILES += \
- ../VERSION_HISTORY \
- prank.1.pod \
- Makefile.no_Qt \
- Makefile
-HEADERS += writefile.h \
- treenode.h \
- translatesequences.h \
- terminalsequence.h \
- terminalnode.h \
- site.h \
- sequence.h \
- readnewick.h \
- readfile.h \
- readalignment.h \
- pwsite.h \
- pwhirschberg.h \
- progressivealignment.h \
- prank.h \
- postprobability.h \
- phylomatchscore.h \
- node.h \
- intmatrix.h \
- hmmodel.h \
- hirschberg.h \
- guidetree.h \
- fullprobability.h \
- flmatrix.h \
- eigen.h \
- dbmatrix.h \
- config.h \
- characterprobability.h \
- boolmatrix.h \
- ancestralsequence.h \
- ancestralnode.h \
- check_version.h \
- exonerate_reads.h \
- mafft_alignment.h \
- bppancestors.h
-
-
-INCLUDEPATH += /usr/include
-
-
-
-
-
-
-
-
-
-
-
-
=====================================
src/progressivealignment.cpp
=====================================
@@ -36,6 +36,8 @@
#include "exonerate_reads.h"
#include "mafft_alignment.h"
#include "bppancestors.h"
+#include "raxmlancestors.h"
+#include "raxmlrebl.h"
using namespace std;
@@ -49,13 +51,6 @@ ProgressiveAlignment::ProgressiveAlignment(string treefile,string seqfile,string
if (NOISE>=0)
this->showInfo();
- Exonerate_reads er;
- if (!CONVERT && EXONERATE && !er.test_executable())
- {
-// cout<<"The executable for Exonerate not found. Fast alignment anchoring is not used.\n";
- EXONERATE = false;
- }
-
// Backtranslate predefined protein alignment to DNA and exit.
//
if (BACKTRANSLATE)
@@ -72,6 +67,15 @@ ProgressiveAlignment::ProgressiveAlignment(string treefile,string seqfile,string
exit(0);
}
+ Exonerate_reads er;
+ if (EXONERATE && !er.test_executable())
+ {
+// cout<<"The executable for Exonerate not found. Fast alignment anchoring is not used.\n";
+ EXONERATE = false;
+ }
+
+
+ ancestorsInferred = false;
// Get the sequence data
//
@@ -91,16 +95,6 @@ ProgressiveAlignment::ProgressiveAlignment(string treefile,string seqfile,string
}
-// map<string,string> org_stuff;
-// for(int i=0;i<names.size();i++)
-// org_stuff.insert(org_stuff.begin(),pair<string,string>(names.at(i),sequences.at(i)));
-// cout<<"org "<<org_stuff.size()<<endl;
-
-
-// cout<<"check 1\n";
-// this->checkStuff(&org_stuff,&names,&sequences);
-// cout<<"check 1\n";
-
// Make setting and set the alignment model
//
@@ -154,9 +148,6 @@ ProgressiveAlignment::ProgressiveAlignment(string treefile,string seqfile,string
AncestralNode* root = static_cast<AncestralNode*>(nodes[rn.getRoot()]);
-// string tmpstr;
-// root->getNewickBrl(&tmpstr);
-// cout<<tmpstr<<endl;
// If an old tree is provided, mark the shared sub-trees
//
@@ -178,6 +169,8 @@ ProgressiveAlignment::ProgressiveAlignment(string treefile,string seqfile,string
BPPANCESTORS = false;
}
+ root->updateTerminalNames();
+
/////////////////////////////////
// Different alignment options //
/////////////////////////////////
@@ -187,14 +180,27 @@ ProgressiveAlignment::ProgressiveAlignment(string treefile,string seqfile,string
//
if (PREALIGNED)
{
+ if(RAXMLREBL)
+ {
+ RaxmlRebl rr;
+ if(rr.testExecutable())
+ {
+ rr.inferBranchLengths(root,&names,&sequences,isDna);
+
+ }
+
+ }
+
this->readAlignment(root,&names,&sequences,isDna,longest);
- int nSubst; int nIns; int nDel; int nInsDel; bool noSuffix=true;
- int bestScore = this->computeParsimonyScore(root,isDna,-1,&nSubst,&nIns,&nDel,&nInsDel,noSuffix);
- cout<<"\nAlignment score: "<<bestScore;
- if(PRINTSCOREONLY)
+ if(PRINTSCORE)
{
+ int nSubst; int nIns; int nDel; int nInsDel; bool noSuffix=true;
+ int bestScore = this->computeParsimonyScore(root,isDna,-1,&nSubst,&nIns,&nDel,&nInsDel,noSuffix);
+
+ cout<<"\nAlignment score: "<<bestScore;
+
if(nInsDel>0)
cout<<" [ "<<nSubst<<" subst., "<<nIns<<" ins., "<<nDel<<" del., "<<nInsDel<<" indel. ]"<<endl<<endl;
else
@@ -211,9 +217,12 @@ ProgressiveAlignment::ProgressiveAlignment(string treefile,string seqfile,string
{
this->partlyAlign(root,&names,&sequences,isDna,longest);
- int nSubst; int nIns; int nDel; int nInsDel; bool noSuffix=true;
- int bestScore = this->computeParsimonyScore(root,isDna,-1,&nSubst,&nIns,&nDel,&nInsDel,noSuffix);
- cout<<"\nAlignment score: "<<bestScore<<endl;
+ if(PRINTSCORE)
+ {
+ int nSubst; int nIns; int nDel; int nInsDel; bool noSuffix=true;
+ int bestScore = this->computeParsimonyScore(root,isDna,-1,&nSubst,&nIns,&nDel,&nInsDel,noSuffix);
+ cout<<"\nAlignment score: "<<bestScore<<endl;
+ }
}
// Alignment based on an old tree: re-align nodes that have changed
@@ -222,9 +231,12 @@ ProgressiveAlignment::ProgressiveAlignment(string treefile,string seqfile,string
{
this->updateAlignment(root,&names,&sequences,isDna,longest);
- int nSubst; int nIns; int nDel; int nInsDel; bool noSuffix=true;
- int bestScore = this->computeParsimonyScore(root,isDna,-1,&nSubst,&nIns,&nDel,&nInsDel,noSuffix);
- cout<<"\nAlignment score: "<<bestScore<<endl;
+ if(PRINTSCORE)
+ {
+ int nSubst; int nIns; int nDel; int nInsDel; bool noSuffix=true;
+ int bestScore = this->computeParsimonyScore(root,isDna,-1,&nSubst,&nIns,&nDel,&nInsDel,noSuffix);
+ cout<<"\nAlignment score: "<<bestScore<<endl;
+ }
}
// Regular alignment
@@ -272,6 +284,7 @@ ProgressiveAlignment::ProgressiveAlignment(string treefile,string seqfile,string
while (thisIteration<=iterations)
{
+ ancestorsInferred = false;
map<string,float> subtreesOld;
if (UPDATESECOND)
@@ -283,6 +296,11 @@ ProgressiveAlignment::ProgressiveAlignment(string treefile,string seqfile,string
gt.computeTree(&sequences,&names,isDna);
tree = gt.getTree();
+
+ Node* n = new Node(tree);
+ tree = n->print_tree();
+ delete n;
+
if (NOISE>0)
cout<<tree<<endl;
@@ -492,6 +510,7 @@ void ProgressiveAlignment::printAlignment(AncestralNode *root,vector<string> *nm
if (WRITEANCSEQ)
this->printAncestral(root,filename,isDna,verbose);
+
}
}
else
@@ -506,11 +525,13 @@ void ProgressiveAlignment::printAlignment(AncestralNode *root,vector<string> *nm
this->setAlignedSequences(root);
this->reconstructAncestors(root,isDna);
+
if (WRITEXML)
this->printXml(root,filename,false);
if (WRITEANCSEQ)
this->printAncestral(root,filename+".pep",isDna,verbose);
+
}
vector<string> dSeqs;
@@ -520,6 +541,11 @@ void ProgressiveAlignment::printAlignment(AncestralNode *root,vector<string> *nm
exit(-1);
}
+ if(format!=17 && !(WRITEXML || WRITEANCSEQ)) {
+ file = filename+".nuc"+formatExtension(format);
+ wfa.writeSeqs(file.c_str(),nms,&dSeqs,format);
+ return;
+ }
bool tmpCODON = CODON;
bool tmpisDna = isDna;
@@ -572,7 +598,6 @@ void ProgressiveAlignment::printAlignment(AncestralNode *root,vector<string> *nm
}
ReadAlignment ra;
- ra.cleanUp();
ra.initialiseMatrices(longest+2);
codonRoot->setTotalNodes();
@@ -774,6 +799,29 @@ void ProgressiveAlignment::printXml(AncestralNode *root,string filename,bool tra
void ProgressiveAlignment::reconstructAncestors(AncestralNode *root,bool isDna)
{
+ RaxmlAncestors rxmla;
+ if(MLANCESTORS && rxmla.testExecutable() && root->getTerminalNodeNumber()>3)
+ {
+ if(NOISE>0)
+ cout<<"Using RAxML to infer ancestral sequences\n";
+
+ map<string,string> aseqs;
+ string atree;
+
+ bool worked = rxmla.inferAncestors(root,&aseqs,&atree,isDna);
+
+ if(worked) {
+ root->setAncSequenceStrings(&aseqs);
+
+ vector<string> aseqs2;
+ this->getAncestralAlignmentMatrix(root,&aseqs2);
+ root->setAncSequenceGaps(&aseqs2);
+
+ ancestorsInferred = true;
+
+ return;
+ }
+ }
BppAncestors bppa;
if(BPPANCESTORS && bppa.testExecutable())
@@ -854,7 +902,7 @@ void ProgressiveAlignment::reconstructAncestors(AncestralNode *root,bool isDna)
}
if(NOISE>0)
- cout<<"BppAncestor not used. Inferring approximate ancestral sequences\n";
+ cout<<"RAxML not used. Inferring approximate ancestral sequences\n";
vector<string> aseqs;
this->getAncestralAlignmentMatrix(root,&aseqs);
@@ -874,11 +922,11 @@ void ProgressiveAlignment::setAlignedSequences(AncestralNode *root)
int ProgressiveAlignment::computeParsimonyScore(AncestralNode *root,bool isDna,int bestScore,int *nSubst,int *nIns,int *nDel,int *nInsDel,bool noSuffix)
{
-// if (! (WRITEXML || WRITEANCSEQ))
-// {
+ if (! ancestorsInferred )
+ {
this->setAlignedSequences(root);
this->reconstructAncestors(root,isDna);
-// }
+ }
string alpha = hmm->getFullAlphabet();
int sAlpha = alpha.length();
=====================================
src/progressivealignment.h
=====================================
@@ -39,6 +39,8 @@
#include "mafft_alignment.h"
#include "exonerate_reads.h"
#include "bppancestors.h"
+#include "raxmlancestors.h"
+#include "fasttree_tree.h"
#include "readalignment.h"
#include "hirschberg.h"
@@ -50,6 +52,8 @@ public:
private:
+ bool ancestorsInferred;
+
Site *sites;
void getAlignmentMatrix(AncestralNode *root,char* alignment,bool translate);
@@ -63,7 +67,7 @@ private:
void readAlignment(AncestralNode *root,vector<string> *names,vector<string> *sequences,bool isDna,int longest,bool verbose=true,bool writeOutput=true,string outputSuffix="")
{
- if(PRINTSCOREONLY)
+ if(PRINTSCORE)
verbose = false;
if(!this->sequencesAligned(sequences))
@@ -220,7 +224,7 @@ private:
{
cout<<" - converting '"<<seqfile<<"' to '"<<outfile<<this->formatExtension(format)<<"'"<<endl<<endl;
}
- else if (PRINTSCOREONLY)
+ else if (PRINTSCORE)
{
cout<<" - computing score for '"<<seqfile<<"' and '"<<treefile<<"'"<<endl<<endl;
if(NOISE==0) { NOISE=-1; SCREEN = false;}
@@ -277,22 +281,27 @@ private:
Exonerate_reads er;
bool exonerateOK = er.test_executable();
- BppAncestors ba;
- bool bppaOK = ba.testExecutable();
+ RaxmlAncestors ra;
+ bool rxmlaOK = ra.testExecutable();
+
+ FastTree_tree ft;
+ bool fsttOK = ft.test_executable();
- if(mafftOK || exonerateOK || bppaOK)
+ if(mafftOK || exonerateOK || rxmlaOK)
{
cout<<" - external tools available:\n";
if(mafftOK)
cout<<" MAFFT for initial alignment\n";
+ if(FASTTREE && fsttOK)
+ cout<<" FastTree for guide tree estimation\n";
if(exonerateOK)
cout<<" Exonerate for alignment anchoring\n";
- if(bppaOK)
- cout<<" BppAncestor for ancestral state reconstruction\n";
+ if(rxmlaOK)
+ cout<<" RAxML for ancestral state reconstruction\n";
}
cout<<endl;
- if(treefile!="" && iterations>1)
+ if(treefile!="" && iterations>1 && !PREALIGNED)
{
cout<<"Warning: iterative search may change the guide tree. If you want to keep\n the phylogeny provided in '"<<treefile<<
"', please add option '-once'.\n\n";
@@ -349,6 +358,22 @@ private:
exit(-1);
}
+ vector<string> tnames;
+ root->getTerminalNames(&tnames);
+
+ for (int i=0; i<(int)tnames.size()-1; i++)
+ {
+ for (int j=i+1; j<(int)tnames.size(); j++)
+ {
+ if(tnames.at(i)==tnames.at(j))
+ {
+ cout<<"Name '"<<tnames.at(i)<<"' is defined in the tree more than once.\n";
+ exit(-1);
+ }
+ }
+ }
+
+
// .. and make sure that the sequence data and the tree match!
//
if(nsqs != names->size() && !PRUNEDATA)
@@ -860,9 +885,9 @@ private:
int hit;
if(isDna)
- hit = sequences->at(i).find_first_not_of("ACGTURYMKSWHBVDNacgturymkswhbvdn-",pos);
+ hit = sequences->at(i).find_first_not_of("ACGTURYMKSWHBVDNacgturymkswhbvdn-.",pos);
else
- hit = sequences->at(i).find_first_not_of("ARNDCQEGHILKMFPSTWYVXarndcqeghilkmfpstwyvx-",pos);
+ hit = sequences->at(i).find_first_not_of("ARNDCQEGHILKMFPSTWYVXarndcqeghilkmfpstwyvx-.",pos);
if(hit != string::npos)
{
@@ -1237,8 +1262,9 @@ private:
// this->setHMModel(sequences,isDna);
-
Node* n = new Node(*tree);
+ *tree = n->print_tree();
+
n->mark_sequences(names);
int treeleaves = 0;
int treematches = 0;
=====================================
src/raxmlancestors.cpp
=====================================
@@ -0,0 +1,409 @@
+/***************************************************************************
+ * Copyright (C) 2013 by Ari Loytynoja *
+ * ari at ebi.ac.uk *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
+ ***************************************************************************/
+
+#include <fstream>
+#include <cstdio>
+#include <cstdlib>
+#include <iostream>
+#include <sstream>
+#include <vector>
+#include <exception>
+#include <algorithm>
+#include <unistd.h>
+#include "raxmlancestors.h"
+#include "readfile.h"
+#include "readnewick.h"
+
+#if defined (__APPLE__)
+#include <mach-o/dyld.h>
+#endif
+
+using namespace std;
+
+RaxmlAncestors::RaxmlAncestors()
+{
+}
+
+bool RaxmlAncestors::testExecutable()
+{
+
+ #if defined (__CYGWIN__)
+ char path[200] = "";
+ int length = readlink("/proc/self/exe",path,200-1);
+
+ string epath = string(path).substr(0,length);
+ if (epath.find("/")!=std::string::npos)
+ epath = epath.substr(0,epath.rfind("/")+1);
+ bppdistpath = epath;
+ epath = epath+"raxml -h >/dev/null 2>/dev/null";
+ int status = system(epath.c_str());
+
+ return WEXITSTATUS(status) == 0;
+
+ # else
+
+ char path[200] = "";
+ string epath;
+
+ #if defined (__APPLE__)
+ uint32_t size = sizeof(path);
+ _NSGetExecutablePath(path, &size);
+ epath = string(path);
+ if (epath.find("/")!=std::string::npos)
+ epath = epath.substr(0,epath.rfind("/")+1);
+ //epath = "DYLD_LIBRARY_PATH="+epath+" "+epath;
+
+ #else
+ int length = readlink("/proc/self/exe",path,200-1);
+ epath = string(path).substr(0,length);
+ if (epath.find("/")!=std::string::npos)
+ epath = epath.substr(0,epath.rfind("/")+1);
+
+ #endif
+
+ raxmlpath = epath;
+ epath = epath+"raxml -h >/dev/null 2>/dev/null";
+ int status = system(epath.c_str());
+
+
+ if(WEXITSTATUS(status) == 0)
+ return true;
+
+ raxmlpath = "";
+ status = system("raxml -h >/dev/null 2>/dev/null");
+
+
+ return WEXITSTATUS(status) == 0;
+
+ #endif
+}
+
+bool RaxmlAncestors::inferAncestors(AncestralNode *root,map<string,string> *aseqs,string *atree,bool isDna)
+{
+
+ stringstream f_name;
+ stringstream t_name;
+ stringstream i_name;
+ stringstream o_name;
+ stringstream m_name;
+ stringstream p_name;
+
+ int r = rand();
+ while(true)
+ {
+
+ f_name.str("");
+ t_name.str("");
+ i_name.str("");
+ o_name.str("");
+ m_name.str("");
+ p_name.str("");
+
+ f_name <<tmp_dir<<"/f"<<r<<".phy";
+ ifstream f_file(f_name.str().c_str());
+
+ t_name <<tmp_dir<<"/t"<<r<<".tre";
+ ifstream t_file(t_name.str().c_str());
+
+ i_name <<tmp_dir<<"/RAxML_info."<<r;
+ ifstream i_file(i_name.str().c_str());
+
+ o_name <<tmp_dir<<"/RAxML_marginalAncestralStates."<<r;
+ ifstream o_file(o_name.str().c_str());
+
+ m_name <<tmp_dir<<"/RAxML_nodeLabelledRootedTree."<<r;
+ ifstream m_file(m_name.str().c_str());
+
+ p_name <<tmp_dir<<"/RAxML_marginalAncestralProbabilities."<<r;
+ ifstream p_file(p_name.str().c_str());
+
+ if(!f_file && !t_file && !i_file && !o_file && !m_file && !p_file)
+ {
+ ofstream f_tmp;
+ f_tmp.open(f_name.str().c_str(), (ios::out) );
+ ofstream t_tmp;
+ t_tmp.open(t_name.str().c_str(), (ios::out) );
+ ofstream o_tmp;
+ o_tmp.open(o_name.str().c_str(), (ios::out) );
+ ofstream m_tmp;
+ m_tmp.open(m_name.str().c_str(), (ios::out) );
+ ofstream p_tmp;
+ p_tmp.open(p_name.str().c_str(), (ios::out) );
+
+ break;
+ }
+ r = rand();
+ }
+
+
+
+ ////////////
+
+ int l = root->getSequence()->length();
+
+ vector<string> names;
+ vector<string> sequences;
+
+ root->getTerminalNames(&names);
+ for (int i=0; i<names.size(); i++)
+ {
+ sequences.push_back(string(""));
+ }
+
+ vector<string> col;
+
+ bool tmpDOTS = DOTS;
+ DOTS = false;
+ for (int i=0; i<l; i++)
+ {
+ col.clear();
+ root->getCharactersAt(&col,i);
+ vector<string>::iterator cb = col.begin();
+ vector<string>::iterator ce = col.end();
+ vector<string>::iterator si = sequences.begin();
+
+ for (; cb!=ce; cb++,si++)
+ {
+ *si+=*cb;
+ }
+ }
+ DOTS = tmpDOTS;
+
+ vector<string>::iterator si = sequences.begin();
+ vector<string>::iterator ni = names.begin();
+
+ vector<int> masked_columns;
+
+ for(int i=0;i<sequences.at(0).length();i++) {
+ int ic=0;
+ for(si = sequences.begin();si!=sequences.end();si++)
+ {
+ char c = si->at(i);
+ if(c != 'N' && c != '-' && c != 'X' )
+ {
+ ic++;
+ continue;
+ }
+ }
+ if(ic==0) {
+ for(si = sequences.begin();si!=sequences.end();si++)
+ {
+ char c = si->at(i);
+ if(c == 'N')
+ si->at(i)='A';
+ if(c == 'X')
+ si->at(i)='G';
+
+ masked_columns.push_back(i);
+ }
+ }
+ }
+
+ ofstream f_output;
+ f_output.open( f_name.str().c_str(), (ios::out) );
+ f_output<<sequences.size()<<" "<<sequences.at(0).length()<<"\n";
+ int count = 0;
+ map<string,string> tmp_names;
+ for(si = sequences.begin();si!=sequences.end();si++,ni++)
+ {
+ string name1 = *ni+":";
+ stringstream name2;
+ name2<<"seq"<<count++;
+ tmp_names.insert(make_pair(name1,name2.str()+":"));
+ f_output<<name2.str()<<"\n"<<*si<<"\n";
+ }
+ f_output.close();
+
+
+ string tree = "";
+
+ root->getLabelledNewickBrl(&tree);
+ tree.erase(std::remove(tree.begin(), tree.end(), '#'), tree.end());
+ for(map<string,string>::iterator it = tmp_names.begin();it != tmp_names.end(); it++)
+ {
+ size_t pos = 0;
+ if((pos = tree.find("("+it->first)) != std::string::npos)
+ {
+ tree.replace(pos+1, it->first.length(), it->second);
+ }
+ if((pos = tree.find(","+it->first)) != std::string::npos)
+ {
+ tree.replace(pos+1, it->first.length(), it->second);
+ }
+ }
+
+ stringstream tag;
+ tag << root->getNodeName();
+ char b,e; int num;
+ tag >> b >> num >> e;
+ tag.clear();
+ tag.str("");
+ tag << num;
+ tree += tag.str()+";";
+
+ ofstream t_output;
+ t_output.open( t_name.str().c_str(), (ios::out) );
+ t_output<<tree<<endl;
+ t_output.close();
+
+
+ stringstream command;
+ command << raxmlpath<<"raxml -s "<<f_name.str()<<" -t "<<t_name.str()<<" -w "+string(tmp_dir)+" -f A -T 2 -n "<<r;
+ if(!isDna)
+ command << " -m PROTCATJTT";
+ else
+ command << " -m GTRCAT";
+
+ if(NOISE>0)
+ cout<<"cmd: "<<command.str()<<endl;
+
+
+ FILE *fpipe;
+ char line[256];
+
+ if ( !(fpipe = (FILE*)popen(command.str().c_str(),"r")) )
+ {
+ cout<<"Problems with raxml pipe.\nExiting.\n";
+ exit(1);
+ }
+ while ( fgets( line, sizeof line, fpipe))
+ {
+ if(NOISE>1)
+ cout<<"RAxML: "+string(line);
+ }
+ pclose(fpipe);
+
+ o_name.str("");
+ o_name <<tmp_dir<<"/RAxML_marginalAncestralStates."<<r;
+ ifstream o_file(o_name.str().c_str());
+
+
+ ReadNewick rn;
+ string rtree = rn.readFile(m_name.str().c_str());
+
+ if (rtree.length()==0)
+ {
+
+ //delete files
+ if( remove( f_name.str().c_str() ) != 0 )
+ perror("Error deleting temporary file in RaxmlAncestors::inferAncestors");
+ if( remove( t_name.str().c_str() ) != 0 )
+ perror("Error deleting temporary file in RaxmlAncestors::inferAncestors");
+ if( remove( i_name.str().c_str() ) != 0 )
+ perror("Error deleting temporary file in RaxmlAncestors::inferAncestors");
+ if( remove( o_name.str().c_str() ) != 0 )
+ perror("Error deleting temporary file in RaxmlAncestors::inferAncestors");
+ if( remove( m_name.str().c_str() ) != 0 )
+ perror("Error deleting temporary file in RaxmlAncestors::inferAncestors");
+ if( remove( p_name.str().c_str() ) != 0 )
+ perror("Error deleting temporary file in RaxmlAncestors::inferAncestors");
+
+ stringstream r_name;
+ r_name <<tmp_dir<<"/f"<<r<<".phy.reduced";
+ ifstream r_file(r_name.str().c_str());
+ if(r_file)
+ if( remove( r_name.str().c_str() ) != 0 )
+ perror("Error deleting temporary file in RaxmlAncestors::inferAncestors");
+
+
+ return false;
+ }
+
+ map<string,TreeNode*> rtnodes;
+ rn.buildTree(rtree,&rtnodes);
+
+ AncestralNode* rtroot = static_cast<AncestralNode*>(rtnodes[rn.getRoot()]);
+
+ map<string,string> rtsubtrees;
+ rtroot->getAllFullSubtreesWithNodename(&rtsubtrees,false);
+ delete rtroot;
+
+
+ map<string,TreeNode*> innodes;
+ rn.buildTree(tree,&innodes);
+
+ AncestralNode* inroot = static_cast<AncestralNode*>(innodes[rn.getRoot()]);
+
+ map<string,string> subtrees;
+ inroot->getAllFullSubtreesWithNodename(&subtrees,true);
+ delete inroot;
+
+
+ string temp, name, sequence = "";
+
+ while (!o_file.eof())
+ {
+ getline(o_file, temp, '\n'); // Copy current line in temporary string
+ stringstream tmpstr(temp);
+ name="";sequence="";
+
+ tmpstr >> name >> sequence;
+
+ if(sequence.length()>0) {
+ vector<int>::iterator mi;
+ for(mi = masked_columns.begin();mi!=masked_columns.end();mi++) {
+ for(si = sequences.begin();si!=sequences.end();si++)
+ {
+ char c = sequence.at(*mi);
+ if(c == 'A')
+ sequence.at(*mi)='N';
+ if(c == 'G')
+ sequence.at(*mi)='X';
+ }
+ }
+ }
+
+ map<string,string>::iterator it = rtsubtrees.find("#"+name+"#");
+ if(it!=rtsubtrees.end())
+ {
+ string subtree = it->second;
+ it = subtrees.find(subtree);
+ if(it!=subtrees.end())
+ {
+ std::replace(sequence.begin(),sequence.end(),'?','-');
+ aseqs->insert(aseqs->begin(),pair<string,string>(it->second,sequence));
+ }
+ }
+ }
+
+ //delete files
+ if( remove( f_name.str().c_str() ) != 0 )
+ perror("Error deleting temporary file in RaxmlAncestors::inferAncestors");
+ if( remove( t_name.str().c_str() ) != 0 )
+ perror("Error deleting temporary file in RaxmlAncestors::inferAncestors");
+ if( remove( i_name.str().c_str() ) != 0 )
+ perror("Error deleting temporary file in RaxmlAncestors::inferAncestors");
+ if( remove( o_name.str().c_str() ) != 0 )
+ perror("Error deleting temporary file in RaxmlAncestors::inferAncestors");
+ if( remove( m_name.str().c_str() ) != 0 )
+ perror("Error deleting temporary file in RaxmlAncestors::inferAncestors");
+ if( remove( p_name.str().c_str() ) != 0 )
+ perror("Error deleting temporary file in RaxmlAncestors::inferAncestors");
+
+ stringstream r_name;
+ r_name <<tmp_dir<<"/f"<<r<<".phy.reduced";
+ ifstream r_file(r_name.str().c_str());
+ if(r_file)
+ if( remove( r_name.str().c_str() ) != 0 )
+ perror("Error deleting temporary file in RaxmlAncestors::inferAncestors");
+
+
+ return aseqs->size()>0;
+}
=====================================
src/raxmlancestors.h
=====================================
@@ -0,0 +1,38 @@
+/***************************************************************************
+ * Copyright (C) 2013 by Ari Loytynoja *
+ * ari at ebi.ac.uk *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
+ ***************************************************************************/
+
+#ifndef RAXMLANCESTORS_H
+#define RAXMLANCESTORS_H
+
+#include "ancestralnode.h"
+#include "config.h"
+#include <sys/stat.h>
+
+class RaxmlAncestors
+{
+ std::string raxmlpath;
+
+public:
+ RaxmlAncestors();
+ bool testExecutable();
+ bool inferAncestors(AncestralNode *root,map<string,string> *aseqs,string *atree,bool isDna);
+};
+
+#endif // RAXMLANCESTORS_H
=====================================
src/raxmlrebl.cpp
=====================================
@@ -0,0 +1,306 @@
+/***************************************************************************
+ * Copyright (C) 2013 by Ari Loytynoja *
+ * ari at ebi.ac.uk *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
+ ***************************************************************************/
+
+#include <fstream>
+#include <cstdio>
+#include <cstdlib>
+#include <iostream>
+#include <sstream>
+#include <vector>
+#include <exception>
+#include <algorithm>
+#include <unistd.h>
+#include "raxmlrebl.h"
+#include "readfile.h"
+#include "readnewick.h"
+#include "node.h"
+
+#if defined (__APPLE__)
+#include <mach-o/dyld.h>
+#endif
+
+using namespace std;
+
+RaxmlRebl::RaxmlRebl()
+{
+}
+
+bool RaxmlRebl::testExecutable()
+{
+
+ #if defined (__CYGWIN__)
+ char path[200];
+ int length = readlink("/proc/self/exe",path,200-1);
+
+ string epath = string(path).substr(0,length);
+ if (epath.find("/")!=std::string::npos)
+ epath = epath.substr(0,epath.rfind("/")+1);
+ bppdistpath = epath;
+ epath = epath+"raxml -h >/dev/null 2>/dev/null";
+ int status = system(epath.c_str());
+
+ return WEXITSTATUS(status) == 0;
+
+ # else
+
+ char path[200];
+ string epath;
+
+ #if defined (__APPLE__)
+ uint32_t size = sizeof(path);
+ _NSGetExecutablePath(path, &size);
+ epath = string(path);
+ if (epath.find("/")!=std::string::npos)
+ epath = epath.substr(0,epath.rfind("/")+1);
+ //epath = "DYLD_LIBRARY_PATH="+epath+" "+epath;
+
+ #else
+ int length = readlink("/proc/self/exe",path,200-1);
+ epath = string(path).substr(0,length);
+ if (epath.find("/")!=std::string::npos)
+ epath = epath.substr(0,epath.rfind("/")+1);
+
+ #endif
+
+ raxmlpath = epath;
+ epath = epath+"raxml -h >/dev/null 2>/dev/null";
+ int status = system(epath.c_str());
+
+
+ if(WEXITSTATUS(status) == 0)
+ return true;
+
+ raxmlpath = "";
+ status = system("raxml -h >/dev/null 2>/dev/null");
+
+
+ return WEXITSTATUS(status) == 0;
+
+ #endif
+}
+
+bool RaxmlRebl::inferBranchLengths(AncestralNode *root,vector<string> *names,vector<string> *sequences,bool isDna)
+{
+
+ stringstream f_name;
+ stringstream t_name;
+ stringstream i_name;
+ stringstream o_name;
+ stringstream m_name;
+
+ int r = rand();
+ while(true)
+ {
+
+ f_name.str("");
+ t_name.str("");
+ i_name.str("");
+ o_name.str("");
+ m_name.str("");
+
+ f_name <<tmp_dir<<"/f"<<r<<".phy";
+ ifstream f_file(f_name.str().c_str());
+
+ t_name <<tmp_dir<<"/t"<<r<<".tre";
+ ifstream t_file(t_name.str().c_str());
+
+ i_name <<tmp_dir<<"/RAxML_info."<<r;
+ ifstream i_file(i_name.str().c_str());
+
+ o_name <<tmp_dir<<"/RAxML_result."<<r;
+ ifstream o_file(o_name.str().c_str());
+
+ if(!f_file && !t_file && !i_file && !o_file)
+ {
+ ofstream f_tmp;
+ f_tmp.open(f_name.str().c_str(), (ios::out) );
+ ofstream t_tmp;
+ t_tmp.open(t_name.str().c_str(), (ios::out) );
+ ofstream o_tmp;
+ o_tmp.open(o_name.str().c_str(), (ios::out) );
+
+ break;
+ }
+ r = rand();
+ }
+
+
+ vector<string>::iterator si = sequences->begin();
+ vector<string>::iterator ni = names->begin();
+
+ ofstream f_output;
+ f_output.open( f_name.str().c_str(), (ios::out) );
+ f_output<<sequences->size()<<" "<<sequences->at(0).length()<<"\n";
+ int count = 0;
+ map<string,string> tmp_names;
+ map<string,string> rev_names;
+ for(;si!=sequences->end();si++,ni++)
+ {
+ string name1 = *ni+":";
+ stringstream name2;
+ name2<<"seq"<<count++;
+ tmp_names.insert(make_pair(name1,name2.str()+":"));
+ rev_names.insert(make_pair(name2.str(),*ni));
+ f_output<<name2.str()<<"\n"<<*si<<"\n";
+// cout<<name2.str()<<"\n"<<*si<<"\n";
+ }
+ f_output.close();
+
+
+ string tree = "";
+ root->getLabelledNewickBrl(&tree);
+ tree.erase(std::remove(tree.begin(), tree.end(), '#'), tree.end());
+ tree += ";";
+ for(map<string,string>::iterator it = tmp_names.begin();it != tmp_names.end(); it++)
+ {
+ size_t pos = 0;
+ if((pos = tree.find("("+it->first)) != std::string::npos)
+ tree.replace(pos+1, it->first.length(), it->second);
+ if((pos = tree.find(","+it->first)) != std::string::npos)
+ tree.replace(pos+1, it->first.length(), it->second);
+ }
+
+
+ ofstream t_output;
+ t_output.open( t_name.str().c_str(), (ios::out) );
+ t_output<<tree<<endl;
+ t_output.close();
+
+
+ stringstream command;
+ command << raxmlpath<<"raxml -s "<<f_name.str()<<" -t "<<t_name.str()<<" -w "+string(tmp_dir)+" -f e -T 2 -n "<<r;
+ if(!isDna)
+ command << " -m PROTCATJTT";
+ else
+ command << " -m GTRCAT";
+
+ if(NOISE>0)
+ cout<<"cmd: "<<command.str()<<endl;
+
+
+ FILE *fpipe;
+ char line[256];
+
+ if ( !(fpipe = (FILE*)popen(command.str().c_str(),"r")) )
+ {
+ cout<<"Problems with raxml pipe.\nExiting.\n";
+ exit(1);
+ }
+ while ( fgets( line, sizeof line, fpipe))
+ {
+ if(NOISE>1)
+ cout<<"RAxML: "+string(line);
+ }
+ pclose(fpipe);
+
+ ReadNewick rn;
+ string rtree = rn.readFile(o_name.str().c_str());
+
+
+ Node* n1 = new Node(tree,false);
+ Node* n2 = new Node(rtree,false);
+
+ int h0,h1;
+ string s = n2->markNodes();
+ s = n1->markNodes();
+ n1->getRootNamehashes(&h0,&h1);
+
+ bool found = n2->rootAtMarkedNode(h0,h1);
+ if(found)
+ {
+ string ntree = n2->rootedTree();
+
+ Node* n3 = new Node(ntree,false);
+ s = n3->markNodes();
+
+ ReadNewick rn;
+ map<string,TreeNode*> tmpnodes;
+ rn.buildTree(tree,&tmpnodes);
+ AncestralNode* tmproot = static_cast<AncestralNode*>(tmpnodes[rn.getRoot()]);
+
+ map<int,string> ids;
+ tmproot->markNodes();
+ tmproot->getNamehashNames(&ids);
+
+ map<int,float> map1;
+ map<int,float> map3;
+
+ n1->getNamehashesDistance(&map1);
+ n3->getNamehashesDistance(&map3);
+
+ map<string,float> brls;
+ int nbrls = 0;
+
+ for(map<int,float>::iterator it1=map1.begin();it1!=map1.end();it1++)
+ {
+ map<int,string>::iterator it=ids.find(it1->first);
+ if(it != ids.end())
+ {
+ map<int,float>::iterator it3=map3.find(it1->first);
+ if(it3 != map3.end())
+ {
+ map<string,string>::iterator nit = rev_names.find(it->second);
+ if(nit != rev_names.end() )
+ {
+ brls.insert(make_pair(nit->second,it3->second));
+ nbrls++;
+// cout<<nit->second<<" "<<it1->first<<" "<<it1->second<<" "<<it3->second<<endl;
+ }
+ else if(it->second.at(0)=='#')
+ {
+ brls.insert(make_pair(it->second,it3->second));
+ nbrls++;
+// cout<<it->second<<" "<<it1->first<<" "<<it1->second<<" "<<it3->second<<endl;
+ }
+ }
+ }
+
+ }
+
+ delete tmproot;
+ delete n3;
+
+ if(nbrls == n1->getNodeNumber())
+ {
+ int nubrl = 0;
+ root->updateBranchLengths(&brls,&nubrl);
+ if(nubrl != nbrls)
+ cout<<nubrl<<" Updating branch lengths failed!\n\n";
+ }
+ }
+
+ delete n1;
+ delete n2;
+
+ //delete files
+ if( remove( f_name.str().c_str() ) != 0 )
+ perror("Error deleting temporary file in RaxmlRebl::inferAncestors");
+ if( remove( t_name.str().c_str() ) != 0 )
+ perror("Error deleting temporary file in RaxmlRebl::inferAncestors");
+ if( remove( i_name.str().c_str() ) != 0 )
+ perror("Error deleting temporary file in RaxmlRebl::inferAncestors");
+ if( remove( o_name.str().c_str() ) != 0 )
+ perror("Error deleting temporary file in RaxmlRebl::inferAncestors");
+ if( remove( m_name.str().c_str() ) != 0 )
+ perror("Error deleting temporary file in RaxmlRebl::inferAncestors");
+
+
+ return true;
+}
=====================================
src/raxmlrebl.h
=====================================
@@ -0,0 +1,39 @@
+/***************************************************************************
+ * Copyright (C) 2013 by Ari Loytynoja *
+ * ari at ebi.ac.uk *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
+ ***************************************************************************/
+
+#ifndef RAXMLREBL_H
+#define RAXMLREBL_H
+
+#include "ancestralnode.h"
+#include "config.h"
+#include <sys/stat.h>
+#include "node.h"
+
+class RaxmlRebl
+{
+ std::string raxmlpath;
+
+public:
+ RaxmlRebl();
+ bool testExecutable();
+ bool inferBranchLengths(AncestralNode *root,vector<string> *names,vector<string> *sequences,bool isDna);
+};
+
+#endif // RAXMLREBL_H
=====================================
src/readalignment.cpp
=====================================
@@ -1104,7 +1104,7 @@ bool ReadAlignment::readSeqs(Sequence* s1,Sequence* s2,PhyloMatchScore *pms,Tree
return false;
// cout<<"something wrong"<<endl;
-// cout<<"ns: "<<s<<"; "<<proc<<" "<<state<<"; ("<<i<<" "<<j<<"): "<<newsite->index()<<endl;
+// cout<<"ns: "<<s<<"; "<<proc<<" "<<state<<"; ("<<i<<"/"<<sl2<<" "<<j<<"/"<<sl1<<"): "<<newsite->index()<<endl;
// exit(-1);
}
countSites++;
=====================================
src/readfile.cpp
=====================================
@@ -198,7 +198,7 @@ int ReadFile::readFile(const char* inputfile)
else
{
- cout<<"Input file format unrecognized. Only FASTA format supported. Exiting.\n\n";
+ cout<<"Input file format not recognized as Fasta, Nexus or Phylip. Format not supported. Exiting.\n\n";
exit(-1);
}
@@ -208,8 +208,8 @@ int ReadFile::readFile(const char* inputfile)
string temp = seqs.at(i);
transform( temp.begin(), temp.end(), temp.begin(), (int(*)(int))toupper );
- seqs.at(i) = temp;
+ seqs.at(i) = temp;
if ((int)seqs.at(i).length()<1)
{
cout<<"Failed to read sequence "<<names.at(i)<<". Exiting.\n\n";
@@ -217,6 +217,7 @@ int ReadFile::readFile(const char* inputfile)
}
string name = names.at(i);
+
while (nameset.find(name) != nameset.end())
{
cout<<"Sequence name "<<name<<" is defined more than once! Adding suffix '.1'.\n";
=====================================
src/readnewick.cpp
=====================================
@@ -19,6 +19,7 @@
***************************************************************************/
#include <cstdlib>
+#include <cctype>
#include <string>
#include <map>
#include <iostream>
@@ -123,12 +124,13 @@ void ReadNewick::buildTree(string s,map<string,TreeNode*>* nodes)
}
else
- {
+ {
cout<<"Problem with the guidetee: brackets ("<<open<<","<<end<<") and commas ("<<comma<<") don't match)"<<endl<<endl;
exit(-1);
}
int count = 1;
+ int biggest = 0;
string r;
@@ -181,17 +183,19 @@ void ReadNewick::buildTree(string s,map<string,TreeNode*>* nodes)
b--;
// cout<<"tag "<<tag<<endl;
- // An attempt to get around a BppAncestor bug: not needed anymore
+ // For RAxML ancestor tree
stringstream tc;
-// char * pEnd;
-// if(tag!="" && strtol(tag.c_str(),&pEnd,10)>0)
-// {
-// tc<<"#"<<tag<<"#";
-// }
-// else
-// {
+ char * pEnd;
+ if(tag == "ROOT")
+ tc<<"#ROOT#";
+ else if(tag!="" && is_number(tag) && strtol(tag.c_str(),&pEnd,10)>0)
+ {
+ tc<<"#"<<tag<<"#";
+ }
+ else
+ {
tc<<"#"<<count++<<"#";
-// }
+ }
AncestralNode *tn = new AncestralNode(n);
tn->setNodeName(tc.str());
=====================================
src/readnewick.h
=====================================
@@ -33,6 +33,13 @@ class ReadNewick
std::string s;
std::string root;
std::map<std::string,TreeNode*> nodes;
+
+ bool is_number(const std::string& s)
+ {
+ std::string::const_iterator it = s.begin();
+ while (it != s.end() && std::isdigit(*it)) ++it;
+ return !s.empty() && it == s.end();
+ }
public:
ReadNewick();
~ReadNewick();
=====================================
src/terminalnode.h
=====================================
@@ -53,6 +53,7 @@ public:
void getAllSubtrees(std::map<std::string,float> *subtrees) {}
void getAllSubtreesWithNodename(std::map<std::string,std::string> *subtrees) {}
+ void getAllFullSubtreesWithNodename(std::map<std::string,std::string> *subtrees,bool treefirst) {}
void getSubtreeBelow(std::string *subtree) { *subtree = nodeName; }
void markRealignSubtrees(std::map<std::string,float> *subtrees) {}
=====================================
src/terminalsequence.cpp
=====================================
@@ -70,7 +70,7 @@ TerminalSequence::TerminalSequence(string* s)
if (s->size()%3!=0)
{
- cout<<s->size()<<" "<<*s<<endl;
+// cout<<s->size()<<" "<<*s<<endl;
cout<<"codon sequence length is not multiple of three!"<<endl;
exit(0);
}
@@ -80,7 +80,6 @@ TerminalSequence::TerminalSequence(string* s)
{
codons.insert(make_pair(alpha.substr(i,3),i/3));
}
-
sAlpha /= 3;
codons.insert(make_pair("---",sAlpha));
@@ -96,14 +95,18 @@ TerminalSequence::TerminalSequence(string* s)
for (int i=0; i<(int)S.length(); i+=3)
{
- ci = codons.find(S.substr(i,3))->second;
+ ci=-1;
+ if(codons.find(S.substr(i,3)) != codons.end())
+ ci = codons.find(S.substr(i,3))->second;
if (ci>=0 && ci<sAlpha)
charseq += S.substr(i,3);
else if (S.substr(i,3)=="---" || S.substr(i,3)=="...")
;
else if(i+3<S.length() || PREALIGNED)
+ {
charseq += "NNN";
+ }
else
{
// cout<<"removing: "<<S.substr(i,3)<<endl;
@@ -129,7 +132,10 @@ TerminalSequence::TerminalSequence(string* s)
{
for (int i=0; i<(int)s->length(); i++)
{
- ci = fullAlpha.find(toupper(s->at(i)));
+ ci=-1;
+ if(fullAlpha.find(toupper(s->at(i))) != string::npos)
+ ci = fullAlpha.find(toupper(s->at(i)));
+
if (ci>=0 && ci<sFullAlpha)
charseq += s->at(i);
else
@@ -144,7 +150,9 @@ TerminalSequence::TerminalSequence(string* s)
{
for (int i=0; i<(int)s->length(); i++)
{
- ci = fullAlpha.find(toupper(s->at(i)));
+ ci=-1;
+ if(fullAlpha.find(toupper(s->at(i))) != string::npos)
+ ci = fullAlpha.find(toupper(s->at(i)));
if (ci>=0 && ci<sFullAlpha)
charseq += s->at(i);
else
=====================================
src/translatesequences.cpp
=====================================
@@ -193,12 +193,23 @@ bool TranslateSequences::translateDNA(std::vector<std::string> *names,std::vecto
vector<string>::iterator nit = names->begin();
vector<string>::iterator pit = protein->begin();
+// map<string,string>::iterator it = dnaSequences->begin();
+// for(;it!=dnaSequences->end();it++)
+// cout<<it->first<<endl;
+
for (; pit!=protein->end(); pit++,nit++)
{
+ string dnaname = *nit;
// string dnaSeq = dnaSeqs.find(*nit)->second;
- string dnaSeq = dnaSequences->find(*nit)->second;
+ if(dnaSequences->find(dnaname) == dnaSequences->end()
+ && dnaname.substr(0,3)=="Seq")
+ {
+ dnaname.at(0)='s';
+ }
+
+ string dnaSeq = dnaSequences->find(dnaname)->second;
string nuc = "";
// cout<<endl<<*nit<<endl<<*pit<<endl<<dnaSeq<<endl;
=====================================
src/treenode.h
=====================================
@@ -65,6 +65,7 @@ protected:
float branchLength; // length of the branch below
bool terminal; // is/not terminal node
bool root; // is/not root node
+ int namehash;
std::string ln,rn,n3; // names of left, right and third (unrooted) branch
float ld,rd,d3; // lengths of those branches
@@ -202,6 +203,7 @@ public:
virtual void getAllSubtrees(std::map<std::string,float> *subtrees) = 0;
virtual void getAllSubtreesWithNodename(std::map<std::string,std::string> *subtrees) = 0;
+ virtual void getAllFullSubtreesWithNodename(std::map<std::string,std::string> *subtrees,bool treefirst) = 0;
virtual void getSubtreeBelow(std::string *subtree) = 0;
virtual void markRealignSubtrees(std::map<std::string,float> *subtrees) = 0;
@@ -247,6 +249,16 @@ public:
return hash;
}
+ unsigned int hash2(const char* s,unsigned int seed = 0)
+ {
+ unsigned int hash = seed;
+ while (*s)
+ {
+ hash = hash * 101 + *s++;
+ }
+ return hash;
+ }
+
virtual void getCharactersAt(std::vector<std::string>* ,int,bool t=false ) {}
virtual void getAncCharactersAt(std::vector<std::string>* ,int ,bool,bool ) {}
virtual void getAllCharactersAt(std::vector<std::string>* ,int ,bool, bool ) {}
@@ -351,6 +363,66 @@ public:
virtual bool updateInsertionSite(int i, bool has_parent) {}
+
+ std::string markNodes()
+ {
+ if (this->terminal)
+ {
+ namehash = hash2(nodeName.c_str());
+ return this->nodeName;
+ }
+ std::string name0 = lChild->markNodes();
+ std::string name1 = rChild->markNodes();
+
+ std::string namef = name0+name1;
+ if(name0>name1)
+ namef = name1+name0;
+
+ namehash = hash2(namef.c_str());
+
+ return namef;
+ }
+
+ void getNamehashNames(std::map<int,std::string> *h)
+ {
+ if(!terminal)
+ {
+ lChild->getNamehashNames(h);
+ rChild->getNamehashNames(h);
+ }
+ h->insert(make_pair(namehash,nodeName));
+ return;
+ }
+
+
+ void updateBranchLengths(std::map<std::string,float> *brls,int *found)
+ {
+ if(!terminal)
+ {
+ lChild->updateBranchLengths(brls,found);
+ rChild->updateBranchLengths(brls,found);
+ }
+
+ std::map<std::string,float>::iterator it = brls->find(nodeName);
+ if(it != brls->end())
+ {
+ branchLength = it->second;
+ (*found)++;
+ }
+ }
+
+ void updateTerminalNames()
+ {
+ if(!terminal)
+ {
+ lChild->updateTerminalNames();
+ rChild->updateTerminalNames();
+ }
+ else if(nodeName.substr(0,3)=="seq")
+ {
+ nodeName.replace(0,3,"Seq");
+ }
+ }
};
#endif
View it on GitLab: https://salsa.debian.org/med-team/prank/-/commit/4c1744d0d4f46f9bdbaa08840c492ffeabb1e794
--
View it on GitLab: https://salsa.debian.org/med-team/prank/-/commit/4c1744d0d4f46f9bdbaa08840c492ffeabb1e794
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20250905/94afacf1/attachment-0001.htm>
More information about the debian-med-commit
mailing list