[med-svn] r28 - in trunk/packages: . kalign kalign/branches
kalign/branches/upstream kalign/branches/upstream/current
Charles Plessy
charles-guest at costa.debian.org
Sun Apr 30 11:45:35 UTC 2006
Author: charles-guest
Date: 2006-04-30 11:45:31 +0000 (Sun, 30 Apr 2006)
New Revision: 28
Added:
trunk/packages/kalign/
trunk/packages/kalign/branches/
trunk/packages/kalign/branches/upstream/
trunk/packages/kalign/branches/upstream/current/
trunk/packages/kalign/branches/upstream/current/COPYING
trunk/packages/kalign/branches/upstream/current/Makefile
trunk/packages/kalign/branches/upstream/current/README
trunk/packages/kalign/branches/upstream/current/kalign.c
trunk/packages/kalign/tags/
Log:
[svn-inject] Installing original source of kalign
Added: trunk/packages/kalign/branches/upstream/current/COPYING
===================================================================
--- trunk/packages/kalign/branches/upstream/current/COPYING 2006-04-23 00:57:51 UTC (rev 27)
+++ trunk/packages/kalign/branches/upstream/current/COPYING 2006-04-30 11:45:31 UTC (rev 28)
@@ -0,0 +1,340 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
Added: trunk/packages/kalign/branches/upstream/current/Makefile
===================================================================
--- trunk/packages/kalign/branches/upstream/current/Makefile 2006-04-23 00:57:51 UTC (rev 27)
+++ trunk/packages/kalign/branches/upstream/current/Makefile 2006-04-30 11:45:31 UTC (rev 28)
@@ -0,0 +1,30 @@
+PREFIX = /usr/local/bin
+CC = gcc
+CFLAGS = -O9 -funroll-loops -fomit-frame-pointer -pipe -Wall
+DEBUGFLAGS = -ggdb -Wall -m32
+
+SOURCES = kalign.c
+PROGS = kalign
+OBJECTS = $(SOURCES:.c=.o)
+DEBUGPROGS = kalign_debug
+DEBUGOBJECTS = $(SOURCES:.c=_debug.o)
+
+kalign: $(OBJECTS)
+ $(CC) $(CFLAGS) $(OBJECTS) -o $(PROGS)
+kalign.o: kalign.c
+ $(CC) $(CFLAGS) -c kalign.c
+
+debug: $(DEBUGOBJECTS)
+ $(CC) $(DEBUGFLAGS) $(DEBUGOBJECTS) -o $(DEBUGPROGS)
+
+%_debug.o: %.c
+ $(CC) $(DEBUGFLAGS) -c $< -o $@
+
+install :
+ mkdir -p $(PREFIX)
+ cp $(PROGS) $(PREFIX)
+ chmod 755 $(PREFIX)/$(PROGS)
+
+clean:
+ rm -f $(PROGS) $(OBJECTS)
+ rm -f *~
Property changes on: trunk/packages/kalign/branches/upstream/current/Makefile
___________________________________________________________________
Name: svn:executable
+
Added: trunk/packages/kalign/branches/upstream/current/README
===================================================================
--- trunk/packages/kalign/branches/upstream/current/README 2006-04-23 00:57:51 UTC (rev 27)
+++ trunk/packages/kalign/branches/upstream/current/README 2006-04-30 11:45:31 UTC (rev 28)
@@ -0,0 +1,72 @@
+-----------------------------------------------------------------------
+ Kalign version 1.03, Copyright (C) 2004, 2005 Timo Lassmann
+
+ http://msa.cgb.ki.se/
+ timo.lassmann at cgb.ki.se
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ A copy of this license is in the COPYING file.
+-----------------------------------------------------------------------
+Installation:
+
+%make
+
+and as root:
+
+%make install
+
+
+Usage:
+ kalign [Options] infile.fasta outfile.fasta
+ or:
+ kalign [Options] -i infile.fasta -o outfile.fasta
+ or:
+ kalign [Options] < infile.fasta > outfile.fasta
+
+ Options:
+ -i/-input Name of input file.
+ -o/-output Name of output file.
+ -gpo/-gap_open Gap open penalty (default 6.0).
+ -gpe/gap_ext Gap extension penalty (default 0.9).
+ -p/-points Wu-Manber algorithm used in both distance calculation and dynamic programming
+ -w Wu-Manber algorithm not used at all
+ -f/-fast fast heuristic alignment
+ ( - recommended for >500 sequences)
+
+ -q 'quiet' - no messages are sent to standard error
+
+ Without an option specified Kalign will use the Wu-Manber algorithm for distance calculation only.
+ ( - recommended for < 500 sequences)
+
+Revision History:
+
+Kalign 1.02:
+
+ - introduced -q (for 'quiet') option -> nothing is written to stderr
+
+Kalign 1.01:
+
+ - input alignment can be read from stdin and output printed to stdout
+
+ i.e. to run kalign:
+
+ kalign < input_alignment.fasta > output_alignment.fasta [options]
+
+Kalign 1.0:
+
+ initial release of Kalign
+
+
\ No newline at end of file
Added: trunk/packages/kalign/branches/upstream/current/kalign.c
===================================================================
--- trunk/packages/kalign/branches/upstream/current/kalign.c 2006-04-23 00:57:51 UTC (rev 27)
+++ trunk/packages/kalign/branches/upstream/current/kalign.c 2006-04-30 11:45:31 UTC (rev 28)
@@ -0,0 +1,2413 @@
+/*
+ Kalign.c
+
+ Released under GPL
+ Copyright (C) 2004, 2005 Timo Lassmann <timo.lassmann at cgb.ki.se>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ Kalign - version 1.0 (Aug. 2004)
+
+ Please send bug reports, comments etc. to:
+ timolassmann at gmail.com
+
+*/
+
+
+#include <unistd.h>
+#include <stdio.h>
+#include<ctype.h>
+#include <string.h>
+#include <stdlib.h>
+#include <getopt.h>
+#define SEEK_START 0
+#define SEEK_END 2
+#define INFTY 0x8000000
+
+void* tmalloc(int size);
+
+short gon250mt[]={
+ 24,
+ 0, 0,
+ 5, 0, 115,
+ -3, 0, -32, 47,
+ 0, 0, -30, 27, 36,
+ -23, 0, -8, -45, -39, 70,
+ 5, 0, -20, 1, -8, -52, 66,
+ -8, 0, -13, 4, 4, -1, -14, 60,
+ -8, 0, -11, -38, -27, 10, -45, -22, 40,
+ -4, 0, -28, 5, 12, -33, -11, 6, -21, 32,
+ -12, 0, -15, -40, -28, 20, -44, -19, 28, -21, 40,
+ -7, 0, -9, -30, -20, 16, -35, -13, 25, -14, 28, 43,
+ -3, 0, -18, 22, 9, -31, 4, 12, -28, 8, -30, -22, 38,
+ 3, 0, -31, -7, -5, -38, -16, -11, -26, -6, -23, -24, -9, 76,
+ -2, 0, -24, 9, 17, -26, -10, 12, -19, 15, -16, -10, 7, -2, 27,
+ -6, 0, -22, -3, 4, -32, -10, 6, -24, 27, -22, -17, 3, -9, 15, 47,
+ 11, 0, 1, 5, 2, -28, 4, -2, -18, 1, -21, -14, 9, 4, 2, -2, 22,
+ 6, 0, -5, 0, -1, -22, -11, -3, -6, 1, -13, -6, 5, 1, 0, -2, 15, 25,
+ 1, 0, 0, -29, -19, 1, -33, -20, 31, -17, 18, 16, -22, -18, -15, -20, -10, 0, 34,
+ -36, 0, -10, -52, -43, 36, -40, -8, -18, -35, -7, -10, -36, -50, -27, -16, -33, -35, -26, 142,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ -22, 0, -5, -28, -27, 51, -40, 22, -7, -21, 0, -2, -14, -31, -17, -18, -19, -19, -11, 41, 0, 78,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+struct sequence_info{
+ int** s;
+ int* sl;
+ int** sn;
+ int* lsn;
+ int** gis;
+ int** relpos;
+ int** sip;
+ int* nsip;
+};
+
+struct dp_matrix{
+ int** tb;
+ int** m;
+ int* a;
+ int* ga;
+ int* gb;
+ int* true_x;
+ int* true_y;
+ void* tb_mem;
+ void* m_mem;
+ int x;
+ int y;
+};
+
+struct dp_matrix* dp_matrix_alloc(struct dp_matrix *dp,int x,int y,int p);
+struct dp_matrix* dp_matrix_realloc(struct dp_matrix *dp,int x,int y,int p);
+struct dp_matrix* dp_matrix_init(struct dp_matrix *dp,int x,int y);
+void dp_matrix_free(struct dp_matrix *dp,int p);
+
+
+int numseq = 0;
+int numprofiles = 0;
+
+int** ten_wu_manber(int* seq,int len,int p[]);
+
+struct sequence_info* read_sequences(struct sequence_info* si,char* infile);
+double distance_calculation(int** matches[],int len_a,int len_b,int a,int b);
+double distance_calculation2(int** matches[],int len_a,int len_b,int a,int b);
+
+void fill_hash(struct sequence_info* si,int** matches[],int nowu);
+int* upgma(double **dm,int* tree);
+void add_ptm(int** matches[],int** matrix,int a,int b);
+struct dp_matrix* consistency_check(struct dp_matrix *dp,int len_a,int len_b,int dia);
+int* make_profile(int* prof,int* seq,int len,int** subm);
+int** read_matrix(short *matrix_pointer,int** subm,int gpo);
+int* main_fast_dyn(int* path,struct dp_matrix *dp,int* prof1,int* prof2,int len_a,int len_b);
+
+int** make_new_profile(int**newp,int** profa,int** profb,int* path);
+
+void print_alignment(struct sequence_info* si,char* outfile);
+struct sequence_info* update(struct sequence_info* si,int** profile,int a,int b,int newnode,int* path);
+
+void set_gap_penalties(int* prof,int len,int nsip);
+void update_gaps(int old_len,int*gis,int new_len,int *newgaps);
+void update_hash(struct sequence_info* si,int** matches[],int a,int b,int new);
+int gpo = 0;
+void add_ptm2(int** matches[],struct dp_matrix *dp,int a,int b);
+struct dp_matrix* consistency_check2(struct dp_matrix *dp,int len_a,int len_b,int dia);
+char* get_input_into_string(char* string,char* infile);
+
+
+int main(int argc,char **argv)
+{
+ char *infile = 0;
+ char *outfile = 0;
+ int i,j,c;
+ int dia = 24;
+ static int help_flag = 0;
+ static int gpe = 0;
+ static int df = 0;
+ static int quiet = 0;
+ static int nowu = 0;
+ int* tree = 0;
+ static int points = 0;
+ int a,b;
+ int len_a;
+ int len_b;
+ int newnode = 0;
+ int* path = 0;
+ int** matches[8000];
+ double** dm = 0;
+
+ static short *matrix_pointer;
+ int** submatrix = 0;
+
+ int** profile = 0;
+ int* profa = 0;
+ int* profb = 0;
+
+ struct sequence_info *si = 0;
+
+ struct dp_matrix *dp = 0;
+
+ static char license[] = "\n\
+Kalign version 1.04, Copyright (C) 2004, 2005, 2006 Timo Lassmann\n\n\
+ Kalign is free software. You can redistribute it and/or modify\n\
+ it under the terms of the GNU General Public License as\n\
+ published by the Free Software Foundation.\n\n";
+
+
+ static char usage[] = "\n\
+Usage:\n\
+ kalign [Options] infile.fasta outfile.fasta\n\
+ or:\n\
+ kalign [Options] -i infile.fasta -o outfile.fasta \n\
+ or:\n\
+ kalign [Options] < infile.fasta > outfile.fasta \n\
+ \n\
+ Options:\n\
+ -i/-input Name of input file.\n\
+ -o/-output Name of output file.\n\
+ -g/-gpo/-gap_open Gap open penalty (default 6.0).\n\
+ -e/-gpe/gap_ext Gap extension penalty (default 0.9).\n\
+ -p/-points Wu-Manber algorithm used in both distance calculation and dynamic programming\n\
+ -w Wu-Manber algorithm not used at all\n\
+ -f/-fast fast heuristic alignment\n\
+ ( - recommended for >500 sequences)\n\
+ \n\
+ -q 'quiet' - no messages are sent to standard error\n\
+ \n\
+ In default mode Kalign will use the Wu-Manber algorithm for distance calculation only.\n\
+ ( - recommended for < 500 sequences)\n\n\
+Please cite:\n\n\
+ Timo Lassmann and Erik L.L. Sonnhammer (2005)\n\
+ Kalign - an accurate and fast multiple sequence alignment algorithm.\n\
+ BMC Bioinformatics 6:298\n\
+ \n\
+ ";
+
+ while (1){
+ static struct option long_options[] ={
+ {"help", no_argument,0,'h'},//-h
+ {"points", no_argument, 0, 'p'},//-n
+ {"fast", no_argument, 0, 'f'},//-n
+ {"input", required_argument, 0, 'i'},
+ {"output", required_argument, 0, 'o'},
+ {"gap_open", required_argument, 0, 0},
+ {"gpo", required_argument, 0, 0},
+ {"gap_ext", required_argument, 0, 0},
+
+ {"gpe", required_argument, 0, 0},
+ {0, 0, 0, 0}
+ };
+ int option_index = 0;
+ c = getopt_long_only (argc, argv, "hi:o:fnqpw",long_options, &option_index);
+ //c = getopt (argc, argv, "hi:o:fnqg:e:pw");
+ /* Detect the end of the options. */
+ if (c == -1){
+ break;
+ }
+ switch (c){
+ case 0:
+ if (long_options[option_index].flag != 0){
+// fprintf(stderr,"option %s\n",long_options[option_index].name);
+ break;
+ }
+// fprintf (stderr,"option %s", long_options[option_index].name);
+ if (optarg){
+ if (option_index == 5){
+ gpo = atof(optarg)*10;
+ }
+ if (option_index == 6){
+ gpo = atof(optarg)*10;
+ }
+ if(option_index == 7){
+ gpe = atof(optarg)*10;
+ }
+
+ if(option_index == 8){
+ gpe = atof(optarg)*10;
+ }
+ }
+// fprintf (stderr,"\n");
+ break;
+ case 'q':
+ quiet = 1;
+ break;
+ case 'h':
+ help_flag = 1;
+ quiet = 0;
+ break;
+ case 'g':
+ gpo = atof(optarg)*10;
+ break;
+ case 'e':
+ gpe = atof(optarg)*10;
+ break;
+ case 'i':
+ infile = optarg;
+ break;
+ case 'o':
+ outfile = optarg;
+ break;
+ case 'p':
+ points = 1;
+ break;
+ case 'w':
+ nowu = 1;
+ break;
+ case 'f':
+ df = 1;
+ points = 0;
+ //nowu = 1;
+ break;
+ case '?':
+ exit(1);
+ break;
+ default:
+ abort ();
+ }
+ }
+ if (optind < argc){
+ c = 0;
+ while (optind < argc){
+ switch(c){
+ case 0:
+ infile = argv[optind++];
+ break;
+ case 1:
+ outfile = argv[optind++];
+ break;
+ default:
+ fprintf(stderr,"Unrecognised option:%s\n",argv[optind++]);
+ break;
+ }
+ c++;
+ }
+ }
+
+
+ if(!quiet)fprintf(stderr,"%s", license);
+
+ if (help_flag){
+ fprintf(stderr,"%s\n", usage);
+ exit(1);
+ }
+
+ char *string = 0;
+ string = get_input_into_string(string,infile);
+ if(string){
+ //printf("%s\n",string);
+ si = read_sequences(si,string);
+ /*for (i = 0;i < numseq;i++){
+ for(j = 0;j < si->sl[i];j++){
+ printf("%c",(char)si->s[i][j]+65);
+ }
+ }*/
+ //exit(0);
+ }else{
+ if(!quiet) fprintf(stderr,"%s\n", usage);
+ exit(1);
+ }
+
+
+
+ //if (infile){
+ // si = read_sequences(si,infile);
+ //}else{
+ // fprintf(stderr,"No input file!\n");
+ // exit(1);
+ //}
+ //Got sequences;
+
+
+ tree = tmalloc (sizeof(int)*(numseq-1)*2);
+ if(!df){
+
+ for (i = 8000;i--;){
+ matches[i] = 0;
+ }
+ fill_hash(si,matches,nowu);
+ //fast distance calculation;
+ dm = tmalloc (sizeof(double*)*numprofiles);
+ for (i = numprofiles;i--;){
+ dm[i] = tmalloc (sizeof (double)*numprofiles);
+ for (j = numprofiles;j--;){
+ dm[i][j] = 0;
+ }
+ }
+ if(!quiet)fprintf(stderr,"Distance Calculation:\n");
+ b = (numseq*(numseq-1))/2;
+ a = 1;
+ if (!nowu){
+ for (i = 0; i < numseq;i++){
+ for (j = i+1; j < numseq;j++){
+ dm[i][j] = distance_calculation(matches,si->sl[i],si->sl[j],i,j);
+ if(!quiet)fprintf(stderr,"\r%8.0f percent done",(double)a /(double)b * 100);
+ a++;
+ }
+ }
+ }else{
+ for (i = 0; i < numseq;i++){
+ for (j = i+1; j < numseq;j++){
+ dm[i][j] = distance_calculation2(matches,si->sl[i],si->sl[j],i,j);
+ if(!quiet)fprintf(stderr,"\r%8.0f percent done",(double)a /(double)b * 100);
+ a++;
+ }
+ }
+ }
+ for (i = 8000;i--;){
+ if (matches[i]){
+ for (j = numseq;j--;){
+ if (matches[i][j]){
+ matches[i][j][0] &= 0x0000ffff;
+ }
+ }
+ }
+ }
+ tree = upgma(dm,tree);
+ }
+ if(df){
+ newnode = numseq;
+ c = 2;
+ tree[0] = 0;
+ tree[1] = 1;
+ for (i = 2;i < numseq;i++){
+ tree[c] = i;
+ tree[c+1] = newnode;
+ newnode++;
+ c += 2;
+ }
+
+ }
+ //Alignment
+ //read in substitution matrix;
+
+
+ matrix_pointer = gon250mt;
+ if(!gpo)gpo = 61;
+ if(gpe)gpe = gpe << 1;
+ if(!gpe)gpe = 18;
+
+ submatrix = tmalloc(sizeof (int*) * 32);
+ for (i = 32;i--;){
+ submatrix[i] = tmalloc(sizeof(int)*32);
+ for (j = 32;j--;){
+ submatrix[i][j] = gpe;
+ }
+ }
+ submatrix = read_matrix(matrix_pointer,submatrix,gpo);
+
+ //fprintf(stderr,"%d\n",numprofiles);
+ profile = tmalloc (sizeof(int*)*numprofiles);
+ for ( i = numprofiles;i--;){
+ profile[i] = 0;
+ }
+ //dynamic programming matrix
+
+ dp = dp_matrix_alloc(dp,511,511,points);
+
+ newnode = numseq;
+ if(!quiet)fprintf(stderr,"\nAlignment:\n");
+ for (i = 0; i < (numseq-1)*2;i +=2){
+ //fprintf(stderr,"Aligning:%d and %d ->%d\n",tree[i],tree[i+1],newnode);
+ if(!quiet)fprintf(stderr,"\r%8.0f percent done",(double)(newnode-numseq) /(double)numseq * 100);
+ a = tree[i];
+ b = tree[i+1];
+ len_a = si->sl[a];
+ len_b = si->sl[b];
+ if (len_a < len_b){
+ j = a;
+ a = b;
+ b = j;
+ j = len_a;
+ len_a = len_b;
+ len_b = j;
+ }
+ dp = dp_matrix_realloc(dp,len_a,len_b,points);
+ if (points){
+ //add_poits_to_matrix
+ dp = dp_matrix_init(dp,len_a,len_b);
+ add_ptm(matches,dp->m,a,b);
+ dp = consistency_check(dp,len_a,len_b,dia);
+ //add_ptm2(matches,dp,a,b);
+ //dp = consistency_check2(dp,len_a,len_b,dia);
+ }
+
+ // no points
+ if (!points){
+ for (j = 1; j <= si->sl[a];j++){
+ dp->true_x[j] = 0;
+ }
+ for (j = 1; j <= si->sl[b];j++){
+ dp->true_y[j] = 0;
+ }
+ dp->true_x[0] = 2;
+ dp->true_y[0] = 2;
+ }
+ /*for (j = 1; j < si->sl[a];j++){
+ fprintf(stderr,"%d",dp->true_x[j]);
+ }
+ fprintf(stderr,"\n");
+ for (j = 1; j < si->sl[b];j++){
+ fprintf(stderr,"%d",dp->true_y[j]);
+ }
+ fprintf(stderr,"\n");*/
+
+ path = tmalloc(sizeof(int)*(len_a+len_b+2));
+ for (j = len_a+len_b+2;j--;){
+ path[j] = 0;
+ }
+ if (a < numseq){
+ profile[a] = make_profile(profile[a],si->s[a],len_a,submatrix);
+ }
+ if (b < numseq){
+ profile[b] = make_profile(profile[b],si->s[b],len_b,submatrix);
+ }
+ profa = profile[a];
+ profb = profile[b];
+
+ set_gap_penalties(profa,len_a,si->nsip[b]);
+ set_gap_penalties(profb,len_b,si->nsip[a]);
+ path = main_fast_dyn(path,dp,profa,profb,len_a,len_b);
+
+ profile[newnode] = tmalloc(sizeof(int)*64*(path[0]+1));
+ si = update(si,profile,a,b,newnode,path);
+ if (points){
+ update_hash(si,matches,a,b,newnode);
+ }
+ free(profa);
+ free(profb);
+ newnode++;
+ }
+ if(!quiet)fprintf(stderr,"\r%8.0f percent done",100.0);
+ if(!quiet)fprintf(stderr,"\n");
+ print_alignment(si,outfile);
+ if (!df){
+ for (i = numprofiles;i--;){
+ free(dm[i]);
+ }
+ free(dm);
+ }
+
+
+
+ dp_matrix_free(dp,points);
+ /*for (i = 8000;i--;){
+ if (matches[i]){
+ for (j = numprofiles;j--;){
+ if (matches[i][j]){
+ free(matches[i][j]);
+ }
+ }
+ free(matches[i]);
+ }
+ }*/
+ for (i = 0; i < numprofiles;i++){
+ free(si->sip[i]);
+ // si->relpos[i] = 0;
+ }
+ free(si->sip);
+ free(si->nsip);
+ //
+ for (i = 32;i--;){
+ free(submatrix[i]);
+ }
+ free(submatrix);
+ free(profile[numprofiles-1]);
+ free(profile);
+ free(tree);
+
+ for (i = numseq;i--;){
+ free(si->s[i]);
+ free(si->sn[i]);
+ free(si->gis[i]);
+ //free(si->relpos[i]);
+ }
+ for (i = numprofiles;i--;){
+ free(si->relpos[i]);
+ }
+ free(si->s);
+ free(si->sn);
+ free(si->gis);
+ free(si->relpos);
+ free(si->sl);
+ free(si->lsn);
+ free(si);
+ return 1;
+}
+
+void set_gap_penalties(int* prof,int len,int nsip)
+{
+ int i;
+ prof += (64 *(len));
+ i = len;
+ while(i--){
+ prof -= 64;
+ prof[26] = prof[41]*nsip;
+ prof[27] = prof[46]*nsip;
+ }
+}
+
+struct sequence_info* update(struct sequence_info* si,int** profile,int a,int b,int newnode,int* path)
+{
+ int i,c;
+ int posa = 0;
+ int posb = 0;
+ int adda = 0;
+ int addb = 0;
+ int* gap_a = 0;
+ int* gap_b = 0;
+ int len_a;
+ int len_b;
+ int* newp = 0;
+ int* profa = 0;
+ int* profb = 0;
+ len_a = si->sl[a];
+ len_b = si->sl[b];
+
+ newp = profile[newnode];
+ profa = profile[a];
+ profb = profile[b];
+ si->sl[newnode] = path[0];
+ si->relpos[newnode] = tmalloc(sizeof(int) * (si->sl[newnode]+1));
+ for (i = si->sl[newnode]+1;i--;){
+ si->relpos[newnode][i] = i;
+ }
+ gap_a = tmalloc ((len_a+1)*sizeof(int));
+ gap_b = tmalloc ((len_b+1)*sizeof(int));
+
+ for (i = len_a+1;i--;){
+ gap_a[i] = 0;
+ }
+ for (i = len_b+1;i--;){
+ gap_b[i] = 0;
+ }
+
+ c = 1;
+ while(path[c] != 3){
+ if (!path[c]){
+ // fprintf(stderr,"Align %d\n",c);
+ for (i = 64; i--;){
+ newp[i] = profa[i] + profb[i];
+ }
+ si->relpos[a][posa] += adda;
+ si->relpos[b][posb] += addb;
+ profa += 64;
+ profb += 64;
+ posa++;
+ posb++;
+ }
+ if (path[c] & 1){
+ // fprintf(stderr,"Gap_A:%d\n",c);
+ for (i = 64; i--;){
+ newp[i] = profb[i];
+ }
+ si->relpos[b][posb] += addb;
+ adda += 1;
+ gap_a[posa] += 1;
+ profb += 64;
+ posb++;
+ if (path[c] & 4){
+ // fprintf(stderr,"Gap_open");
+ newp[9] += si->nsip[a];//1;
+ i = si->nsip[a] *gpo;
+ newp[32] -= i;//a
+ newp[33] -= i;//b
+ newp[34] -= i;//c
+ newp[35] -= i;//d
+ newp[36] -= i;//e
+ newp[37] -= i;//f
+ newp[38] -= i;//g
+ newp[39] -= i;//h
+ newp[40] -= i;//i
+ //newp[41] -= i;//j
+ newp[42] -= i;//k
+ newp[43] -= i;//l
+ newp[44] -= i;//m
+ newp[45] -= i;//n
+ //newp[46] -= i;//o
+ newp[47] -= i;//p
+ newp[48] -= i;//q
+ newp[49] -= i;//r
+ newp[50] -= i;//s
+ newp[51] -= i;//t
+ //newp[52] -= i;//u
+ newp[53] -= i;//v
+ newp[54] -= i;//w
+ newp[52] -= i;//x
+ newp[53] -= i;//y
+ newp[54] -= i;//z
+ }
+ if (path[c] & 16){
+ newp[9] += si->nsip[a];//1;
+ i = si->nsip[a] *gpo;
+ newp[32] -= i;//a
+ newp[33] -= i;//b
+ newp[34] -= i;//c
+ newp[35] -= i;//d
+ newp[36] -= i;//e
+ newp[37] -= i;//f
+ newp[38] -= i;//g
+ newp[39] -= i;//h
+ newp[40] -= i;//i
+ //newp[41] -= i;//j
+ newp[42] -= i;//k
+ newp[43] -= i;//l
+ newp[44] -= i;//m
+ newp[45] -= i;//n
+ //newp[46] -= i;//o
+ newp[47] -= i;//p
+ newp[48] -= i;//q
+ newp[49] -= i;//r
+ newp[50] -= i;//s
+ newp[51] -= i;//t
+ //newp[52] -= i;//u
+ newp[53] -= i;//v
+ newp[54] -= i;//w
+ newp[52] -= i;//x
+ newp[53] -= i;//y
+ newp[54] -= i;//z
+ }
+ }
+ if (path[c] & 2){
+ // fprintf(stderr,"Gap_B:%d\n",c);
+ for (i = 64; i--;){
+ newp[i] = profa[i];
+ }
+ si->relpos[a][posa] += adda;
+ addb += 1;
+ gap_b[posb] += 1;
+ posa++;
+ profa+=64;
+ if (path[c] & 4){
+ // fprintf(stderr,"Gap_open");
+ newp[9] += si->nsip[b];//1;
+ i = si->nsip[b] *gpo;
+ newp[32] -= i;//a
+ newp[33] -= i;//b
+ newp[34] -= i;//c
+ newp[35] -= i;//d
+ newp[36] -= i;//e
+ newp[37] -= i;//f
+ newp[38] -= i;//g
+ newp[39] -= i;//h
+ newp[40] -= i;//i
+ //newp[41] -= i;//j
+ newp[42] -= i;//k
+ newp[43] -= i;//l
+ newp[44] -= i;//m
+ newp[45] -= i;//n
+ //newp[46] -= i;//o
+ newp[47] -= i;//p
+ newp[48] -= i;//q
+ newp[49] -= i;//r
+ newp[50] -= i;//s
+ newp[51] -= i;//t
+ //newp[52] -= i;//u
+ newp[53] -= i;//v
+ newp[54] -= i;//w
+ newp[52] -= i;//x
+ newp[53] -= i;//y
+ newp[54] -= i;//z
+ }
+ if (path[c] & 16){
+ // fprintf(stderr,"Gap_close");
+ newp[9] += si->nsip[b];//1;
+ i = si->nsip[b] *gpo;
+ newp[32] -= i;//a
+ newp[33] -= i;//b
+ newp[34] -= i;//c
+ newp[35] -= i;//d
+ newp[36] -= i;//e
+ newp[37] -= i;//f
+ newp[38] -= i;//g
+ newp[39] -= i;//h
+ newp[40] -= i;//i
+ //newp[41] -= i;//j
+ newp[42] -= i;//k
+ newp[43] -= i;//l
+ newp[44] -= i;//m
+ newp[45] -= i;//n
+ //newp[46] -= i;//o
+ newp[47] -= i;//p
+ newp[48] -= i;//q
+ newp[49] -= i;//r
+ newp[50] -= i;//s
+ newp[51] -= i;//t
+ //newp[52] -= i;//u
+ newp[53] -= i;//v
+ newp[54] -= i;//w
+ newp[52] -= i;//x
+ newp[53] -= i;//y
+ newp[54] -= i;//z
+ }
+ }
+ newp += 64;
+ c++;
+ }
+ for (i = 64; i--;){
+ newp[i] = 0;
+ }
+
+ //fprintf(stderr,"%d-%d %d %d\n",c,path[0],len_a,len_b);
+ si->nsip[newnode] = si->nsip[a] + si->nsip[b];
+ si->sip[newnode] = tmalloc(sizeof(int)*si->nsip[newnode]);
+ c =0;
+ for (i = si->nsip[a];i--;){
+ si->sip[newnode][c] = si->sip[a][i];
+ update_gaps(si->sl[si->sip[a][i]],si->gis[si->sip[a][i]],si->sl[newnode],gap_a);
+ c++;
+ }
+ for (i = si->nsip[b];i--;){
+ si->sip[newnode][c] = si->sip[b][i];
+ update_gaps(si->sl[si->sip[b][i]],si->gis[si->sip[b][i]],si->sl[newnode],gap_b);
+ c++;
+ }
+ free(gap_a);
+ free(gap_b);
+ free(path);
+ return si;
+}
+
+
+void update_gaps(int old_len,int*gis,int new_len,int *newgaps)
+{
+ unsigned int i,j;
+ int add = 0;
+ int rel_pos = 0;
+ for (i = 0; i <= old_len;i++){
+ add = 0;
+ for (j = rel_pos;j <= rel_pos + gis[i];j++){
+ if (newgaps[j] != 0){
+ add += newgaps[j];
+ }
+ }
+ rel_pos += gis[i]+1;
+ gis[i] += add;
+ }
+}
+
+void print_alignment(struct sequence_info* si,char* outfile)
+{
+ int i,j,c;
+ int tmp;
+ FILE *output_file = NULL;
+ if(outfile){
+ if ((output_file = fopen(outfile, "w")) == NULL){
+ fprintf(stderr,"can't open output\n");
+ }else{
+ for (i = 0; i < numseq;i++){
+ for (j =0; j < si->lsn[i];j++){
+ putc(si->sn[i][j],output_file);
+ }
+ putc('\n',output_file);
+ c = 0;
+ for (j = 0; j < si->sl[i];j++){
+ tmp =si->gis[i][j];
+ while (tmp){
+ putc('-',output_file);
+ c++;
+ if(c == 60){
+ putc('\n',output_file);
+ c = 0;
+ }
+ tmp--;
+ }
+ putc((char)si->s[i][j]+65,output_file);
+ c++;
+ if(c == 60){
+ putc('\n',output_file);
+ c = 0;
+ }
+ }
+ tmp =si->gis[i][si->sl[i]];
+ while (tmp){
+ putc('-',output_file);
+ c++;
+ if(c == 60){
+ putc('\n',output_file);
+ c = 0;
+ }
+ tmp--;
+ }
+ putc('\n',output_file);
+ }
+ }
+ }else{
+ for (i = 0; i < numseq;i++){
+ for (j =0; j < si->lsn[i];j++){
+ printf("%c",si->sn[i][j]);
+ }
+ printf("\n");
+ c = 0;
+ for (j = 0; j < si->sl[i];j++){
+ tmp =si->gis[i][j];
+ while (tmp){
+ printf("-");
+ c++;
+ if(c == 60){
+ printf("\n");
+ c = 0;
+ }
+ tmp--;
+ }
+ printf("%c",(char)si->s[i][j]+65);
+ c++;
+ if(c == 60){
+ printf("\n");
+ c = 0;
+ }
+ }
+ tmp =si->gis[i][si->sl[i]];
+ while (tmp){
+ printf("-");
+ c++;
+ if(c == 60){
+ printf("\n");
+ c = 0;
+ }
+ tmp--;
+ }
+ printf("\n");
+ }
+ }
+}
+
+int* main_fast_dyn(int* path,struct dp_matrix *dp,int* prof1,int* prof2,int len_a,int len_b)
+{
+ register int i,j,c;
+ int i_limit = 0;
+ int j_limit = 0;
+ int startx = 0;
+ int starty = 0;
+ int endx = 0;
+ int endy = 0;
+ int* tx = 0;
+ int* ty = 0;
+ int** trace = 0;
+ int* gap_a = 0;
+ int* gap_b = 0;
+ int* align = 0;
+ int* tracep = 0;
+ int pa = 0;
+ int pga = 0;
+ int pgb = 0;
+ int ca = 0;
+ int cga = 0;
+ int cgb = 0;
+ int ipos;
+ int jpos;
+ unsigned int* freq = 0;
+
+ tx = dp->true_x;
+ ty = dp->true_y;
+
+
+ freq = tmalloc((len_a+1) * 26 * sizeof(unsigned int));
+ prof1 += len_a << 6;
+ freq += len_a *26;
+ for (i = len_a;i--;){
+ prof1 -= 64;
+ freq -= 26;
+ c = 1;
+ for (j = 26; j--;){
+ if(prof1[j]){
+ freq[c] = j;
+ c++;
+ }
+ }
+ freq[0] = c;
+ }
+
+ align = dp->a;
+ gap_a = dp->ga;
+ gap_b = dp->gb;
+ align[0] = 0;
+ gap_a[0] = -INFTY;
+ gap_b[0] = -INFTY;
+ trace = dp->tb;
+ endx = len_a;
+ startx = len_a;
+ endy = len_b;
+ starty = len_b;
+
+ trace[len_a][len_b] = 32;
+
+ prof1 += len_a << 6;
+
+ freq += len_a *26;
+
+ do{
+ while(tx[startx] != 2){
+ startx--;
+ }
+ while(ty[starty] != 2){
+ starty--;
+ }
+ i_limit = endx-startx;
+ j_limit = endy-starty;
+ //copy last cell;
+ align[j_limit] = align[0];
+ gap_a[j_limit] = gap_a[0];
+ gap_b[j_limit] = gap_b[0];
+ //init of first row;
+ tracep = trace[endx];
+ j = j_limit;
+ if (endx == len_a){
+ while(--j){
+ align[j] = -INFTY;
+ gap_a[j] = 0;
+ gap_b[j] = -INFTY;
+ tracep[starty+j] = 8;
+ }
+ }else{
+ prof2 += endy << 6;
+ while(--j){
+ prof2 -= 64;
+ tracep[starty+j] = 0;
+ align[j] = -INFTY;
+ gap_a[j] = align[j+1] + prof2[26];
+ if (gap_a[j+1] > gap_a[j]){
+ gap_a[j] = gap_a[j+1];
+ tracep[starty+j] |= 8;
+ }
+ gap_b[j] = -INFTY;
+ }
+ prof2 -= (starty+1) << 6;//+1 cos while(--j) stops at 1;(1-1 = 0 stop!!!)
+ }
+ align[0] = -INFTY;
+ gap_a[0] = -INFTY;
+ gap_b[0] = -INFTY;
+ prof2 += starty << 6;
+ i = i_limit;
+ while(--i){
+ prof1 -= 64;
+
+ freq -= 26;
+
+ ipos = startx+i;
+ tracep = trace[ipos];
+
+ pa = align[j_limit];
+ pga = gap_a[j_limit];
+ pgb = gap_b[j_limit];
+
+ align[j_limit] = -INFTY;
+ gap_a[j_limit] = -INFTY;
+
+ tracep[endy] = 0;
+
+ if (endy == len_b){
+ gap_b[j_limit] = 0;
+ tracep[endy] |= 16;
+ }else{
+ gap_b[j_limit] = pa+prof1[26];
+ if(pgb > gap_b[j_limit]){
+ gap_b[j_limit] = pgb;//pgb+prof2[endy][27];
+ tracep[endy] |= 16;
+ }
+ }
+ j = j_limit;
+ prof2 += j_limit << 6;
+ while(--j){
+ prof2 -= 64;
+ jpos = starty+j;
+ ca = align[j];
+ cga = gap_a[j];
+ cgb = gap_b[j];
+ align[j] = pa;
+ tracep[jpos] = 1;
+ if((c = pga+(prof2+ 64)[26]) > align[j]){
+ align[j] = c;//pga+prof1[ipos+1][26];
+ tracep[jpos] = 2;
+ }
+ if((c = pgb+(prof1+ 64)[26]) > align[j]){
+ align[j] = c;//pgb+prof2[jpos+1][26];
+ tracep[jpos] = 4;
+ }
+ for (c = freq[0];--c;){
+ align[j] += prof1[freq[c]]*prof2[freq[c] | 32];
+ }
+ gap_a[j] = align[j+1]+prof2[26];
+ if (gap_a[j+1] > gap_a[j]){
+ gap_a[j] = gap_a[j+1];//gap_a[j+1]+prof1[ipos][27];
+ tracep[jpos] |= 8;
+ }
+ gap_b[j] = ca+prof1[26];// prof2[jpos][26];
+ if(cgb > gap_b[j]){
+ gap_b[j] = cgb;//cgb+prof2[jpos][27];
+ tracep[jpos] |= 16;
+ }
+ pa = ca;
+ pga = cga;
+ pgb = cgb;
+ }
+ prof2 -= 64;
+ //LAST CELL (0)
+ ca = align[0];
+ cga = gap_a[0];
+ cgb = gap_b[0];
+
+ align[0] = pa;
+ tracep[starty] = 1;
+ if((c = pga+(prof2+ 64)[26]) > align[0]){
+ align[0] = c;//pga+prof1[ipos+1][26];
+ tracep[starty] = 2;
+ }
+ if((c = pgb+(prof1+ 64)[26]) > align[0]){
+ align[0] = c;//pgb+prof2[jpos+1][26];
+ tracep[starty] = 4;
+ }
+ for (c = freq[0];--c;){
+ align[j] += prof1[freq[c]]*prof2[freq[c] | 32];
+ }
+
+ gap_a[j] = -INFTY;
+
+ gap_b[0] = ca+prof1[26];
+ if(cgb > gap_b[0]){
+ gap_b[0] = cgb;
+ tracep[starty] |= 16;
+ }
+ }
+ prof1 -= 64;
+
+ freq -= 26;
+ tracep = trace[startx];
+ j = j_limit;
+
+ prof2 += j_limit << 6;
+ pa = align[j];
+ pga = gap_a[j];
+ pgb = gap_b[j];
+
+ align[j] = -INFTY;
+ gap_a[j] = -INFTY;
+ gap_b[j_limit] = -INFTY;
+ while(--j){
+ prof2 -= 64;
+
+ jpos = starty+j;
+
+ ca = align[j];
+ cga = gap_a[j];
+ cgb = gap_b[j];
+
+ align[j] = pa;
+ tracep[jpos] = 1;
+ if((c = pga+(prof2+ 64)[26]) > align[j]){
+ align[j] = c;//pga+prof1[ipos+1][26];
+ tracep[jpos] = 2;
+ }
+ //Gap_b->Align
+ if((c = pgb+(prof1+ 64)[26]) > align[j]){
+ align[j] = c;//pgb+prof2[jpos+1][26];
+ tracep[jpos] = 4;
+ }
+
+ for (c = freq[0];--c;){
+ align[j] += prof1[freq[c]]*prof2[freq[c] | 32];
+ }
+ gap_a[j] = align[j+1]+prof2[26];
+ if (gap_a[j+1] > gap_a[j]){
+ gap_a[j] = gap_a[j+1];//gap_a[j+1]+prof1[ipos][27];
+ tracep[jpos] |= 8;
+ }
+ gap_b[j] = -INFTY;
+ pa = ca;
+ pga = cga;
+ pgb = cgb;
+ }
+
+ prof2 -= 64;
+
+ ca = align[0];
+ cga = gap_a[0];
+ cgb = gap_b[0];
+ align[0] = pa;
+ tracep[starty] = 1;
+ if((c = pga+(prof2+ 64)[26]) > align[0]){
+ align[0] = c;//pga+prof1[ipos+1][26];
+ tracep[starty] = 2;
+ }
+ if((c = pgb+(prof1+ 64)[26]) > align[0]){
+ align[0] = c;//pgb+prof2[jpos+1][26];
+ tracep[starty] = 4;
+ }
+
+ for (c = freq[0];--c;){
+ align[j] += prof1[freq[c]]*prof2[freq[c] | 32];
+ }
+ gap_a[j] = align[j+1]+prof2[26];
+ //fprintf(stderr,"Gap-a:%d\n",prof2[26]);
+ //Gap_a->Gap_a
+ if (gap_a[j+1] > gap_a[j]){
+ gap_a[j] = gap_a[j+1];//gap_a[j+1]+prof1[ipos][27];
+ tracep[starty] |= 8;
+ }
+ gap_b[0] = ca+prof1[26];// prof2[jpos][26];
+ //fprintf(stderr,"Gap-b:%d\n",prof1[26]);
+ //Gap_b->Gap_b
+ if(cgb > gap_b[0]){
+ gap_b[0] = cgb;
+ tracep[starty] |= 16;
+ }
+ prof2 -= starty<<6;
+ //fprintf(stderr,"\nMOVED-::%d\n",(starty) << 6);
+ endx = startx;
+ endy = starty;
+ startx--;
+ starty--;
+ }while (startx >= 0 || starty >= 0);
+
+ free(freq);
+
+ ca = gap_b[0];
+ c = 2;
+ if(gap_a[0] > ca){
+ ca = gap_a[0];
+ c = 1;
+ }
+ if(align[0] >= ca){
+ //ca = align[0];
+ c = 0;
+ }
+ //fprintf(stderr,"STATE:%d %d\n",c,ca);
+ ca = c;
+
+ i = 0;
+ j = 0;
+ c = 1;
+ while(trace[i][j] < 32){
+ // fprintf(stderr,"%d->%d %d:%d %d:%d\n",c,trace[i][j],i,j,len_a,len_b);
+ switch(ca){
+ case 0:
+ if (trace[i][j] & 2){
+ ca = 1;
+ if(i+1!= len_a){
+ path[c+1] |= 16;
+ // fprintf(stderr,"GAP_CLOSE\n");
+ }
+ }else if (trace[i][j] & 4){
+ ca = 2;
+ if(j+1!= len_b){
+ path[c+1] |= 16;
+ // fprintf(stderr,"GAP_CLOSE\n");
+ }
+ }
+
+ //path[c] = 0;
+ i++;
+ j++;
+ break;
+ case 1:
+ if(trace[i][j] & 8){
+ ca = 1;
+ if(i!=0 && i!= len_a){
+ // fprintf(stderr,"GAP_EXT\n");
+ if(!(path[c]&16)){
+ path[c] |= 8;
+ }
+ }
+ }else{
+ ca = 0;
+ if(i!=0 && i!= len_a){
+ // fprintf(stderr,"GAP_OPEN\n");
+ path[c] |= 4;
+ }
+
+ }
+ path[c] |= 1;
+ j++;
+ break;
+ case 2:
+ if(trace[i][j] & 16){
+ ca = 2;
+ if(j !=0 && j != len_b){
+ // fprintf(stderr,"GAP_EXT\n");
+ if(!(path[c]&16)){
+ path[c] |= 8;
+ }
+ }
+ }else{
+ ca = 0;
+ if(j!=0 && j != len_b){
+ // fprintf(stderr,"GAP_OPEN\n");
+ path[c] |= 4;
+ }
+ }
+ path[c] |= 2;
+ i++;
+ break;
+ }
+ c++;
+ }
+ path[0] = c-1;
+ path[c] = 3;
+ return path;
+}
+
+int* make_profile(int* prof,int* seq,int len,int** subm)
+{
+ int i,j,c;
+ prof = tmalloc(sizeof(int)*(len+1)*64);
+ prof += (64 *len);
+ //fprintf(stderr,"Len:%d %d\n",len,64*len);
+ for (i = 64;i--;){
+ prof[i] = 0;
+ }
+ prof[9+32] = subm[0][9];
+ prof[14+32] = subm[0][14];
+ i = len;
+ while(i--){
+ prof -= 64;
+ //fprintf(stderr,"-64\n");
+ for (j = 26; j--;){
+ prof[j] = 0;
+ }
+ c = seq[i];
+ prof[c] +=1;
+ prof += 32;
+ for(j = 32;j--;){
+ prof[j] = subm[c][j];
+ }
+ prof -= 32;
+
+ }
+ return prof;
+}
+
+struct dp_matrix* consistency_check(struct dp_matrix *dp,int len_a,int len_b,int dia)
+{
+ int** mx = 0;
+ int* mxp = 0;
+ int* mxop = 0;
+ int* tx = 0;
+ int* ty = 0;
+ int** tb = 0;
+ int* tbp = 0;
+ int* tbop = 0;
+ register int i,j;
+ register int c = 0;
+ mx = dp->m;
+ tx = dp->true_x;
+ ty = dp->true_y;
+ tb = dp->tb;
+ mxop = mx[len_a];
+ /*for (i = 0;i <= len_a;i++){
+ for (j = 0;j <= len_b;j++){
+ printf("%3d",dp->m[i][j]);
+ }
+ printf("\n");
+ }
+ printf("\n");*/
+
+
+ tbop = tb[len_a];
+ for (j = len_b;j--;){
+ tbop[j] = 0;
+ }
+ tbop[len_b] = 0;
+ for (i = len_a;i--;){
+ mxp = mx[i];
+ tbp = tb[i];
+ tbp[len_b] = 0;
+ for (j = len_b;j--;){
+ tbp[j] = 0;
+ if (mxp[j]){
+ tbp[j] = tbop[j+1] + 1;
+ }
+ mxp[j] += mxop[j+1];
+ if (mxp[j+1] > mxp[j]){
+ mxp[j] = mxp[j+1];
+ tbp[j] = -1;
+ }
+ if (mxop[j] > mxp[j]){
+ mxp[j] = mxop[j];
+ tbp[j] = -2;
+ }
+ }
+ mxop = mxp;
+ tbop = tbp;
+ }
+ /*for (i = 0;i <= len_a;i++){
+ for (j = 0;j <= len_b;j++){
+ printf("%3d",dp->m[i][j]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+ printf("\n");
+ for (i = 0;i <= len_a;i++){
+ for (j = 0;j <= len_b;j++){
+ printf("%2d",tb[i][j]);
+ }
+ printf("\n");
+ }
+ printf("\n");*/
+
+
+ c = 0;
+ i = 0;
+ j = 0;
+ while (i < len_a && j < len_b){
+ //fprintf(stderr,"%d %d\n",tb[i][j] & 0x0000ffff,c);
+ switch (tb[i][j]){
+ case -1:
+ //printf("%d:%d %d\n",i,j,c);
+ c = 0;
+ j++;
+ break;
+ case -2:
+ //printf("%d:%d %d\n",i,j,c);
+ c = 0;
+ i++;
+ break;
+ default:
+ if (!c){
+ c = tb[i][j];
+ if (c < dia){
+ c = 0;
+ }else{
+ c -= 1;
+ while (--c){
+ tx[i+c] = 2;
+ ty[j+c] = 2;
+ }
+ }
+ }
+ // tx[i] = 2;
+ // ty[j] = 2;
+ i++;
+ j++;
+ break;
+
+ }
+ }
+ //exit(0);
+ tx[0] = 2;
+ ty[0] = 2;
+
+ return dp;
+}
+
+struct dp_matrix* consistency_check2(struct dp_matrix *dp,int len_a,int len_b,int dia)
+{
+ int** mx = 0;
+ int* mxp = 0;
+ int* mxop = 0;
+ int* tx = 0;
+ int* ty = 0;
+ int** tb = 0;
+ int* tbp = 0;
+ int* tbop = 0;
+ register int i,j;
+ int old_j = 0;
+ register int c = 0;
+ mx = dp->m;
+ tx = dp->true_x;
+ ty = dp->true_y;
+ tb = dp->tb;
+ mxop = mx[len_a];
+ /*printf("%1d ",0);
+ for (j = 0;j <= len_b;j++){
+ printf("%1d",dp->true_y[j]);
+ }
+ printf("\n\n");
+
+ for (i = 0;i <= len_a;i++){
+ printf("%1d ",dp->true_x[i]);
+ for (j = 0;j <= len_b;j++){
+ printf("%1d",dp->m[i][j]);
+ }
+ printf("\n");
+ }
+ printf("\n\n");*/
+
+ tbop = tb[len_a];
+ for (j = len_b;j--;){
+ tbop[j] = 0;
+ }
+ tbop[len_b] = 0;
+ for (i = len_a;i--;){
+ if (tx[i]){
+ old_j = len_b;
+ mxp = mx[i];
+ tbp = tb[i];
+ tbp[len_b] = 0;
+ for (j = len_b;j--;){
+ if (ty[j]){
+ tbp[j] = 0;
+ //if (mxp[j]){
+ // tbp[j] = tbop[j+1] + 1;
+ //}
+ // mxp[j] += mxop[j+1];
+
+ mxp[j] += mxop[old_j];
+ if (mxp[old_j] > mxp[j]){
+ mxp[j] = mxp[old_j];
+ tbp[j] = -1;
+ }
+ if (mxop[old_j] > mxp[j]){
+ mxp[j] = mxop[old_j];
+ tbp[j] = -2;
+ }
+ old_j = j;
+ }
+ }
+ mxop = mxp;
+ tbop = tbp;
+ }
+ }
+ /*printf("\n\n");
+ printf("%3d ",0);
+ for (j = 0;j <= len_b;j++){
+ printf("%3d",dp->true_y[j]);
+ }
+ printf("\n\n");
+
+ for (i = 0;i <= len_a;i++){
+ printf("%3d ",dp->true_x[i]);
+ for (j = 0;j <= len_b;j++){
+ printf("%3d",dp->m[i][j]);
+ }
+ printf("\n");
+ }
+ printf("\n\n");*/
+ c = 0;
+ i = 1;
+ j = 1;
+ while (i < len_a && j < len_b){
+ //fprintf(stderr,"%d %d %d %d\n",i,j,tb[i][j],c);
+ switch (tb[i][j]){
+ case 0:
+ /*if (c > dia){
+ c -= 1;
+ while (--c){
+ tx[i-c] = 2;
+ ty[j-c] = 2;
+ }
+
+ }
+ c = 0;*/
+ tx[i] = 2;
+ ty[j] = 2;
+ i++;
+ j++;
+ break;
+ case -1:
+ /*if (c > dia){
+ c -= 1;
+ while (--c){
+ tx[i-c] = 2;
+ ty[j-c] = 2;
+ }
+ }
+ c = 0;*/
+ j++;
+ break;
+ case -2:
+ /*if (c > dia){
+ c -= 1;
+ while (--c){
+ tx[i-c] = 2;
+ ty[j-c] = 2;
+ }
+ }
+ c = 0;*/
+ i++;
+ break;
+ /*default:
+ // fprintf(stderr,"ALIGN\n");
+ if (!c){
+ c = tb[i][j];
+ if (c < dia){
+ c = 0;
+ }else{
+
+ c -= 1;
+ while (--c){
+ tx[i+c] = 2;
+ ty[j+c] = 2;
+ }
+ }
+ }
+ i++;
+ j++;
+ break;*/
+
+ }
+ while (!tx[i]){
+ i++;
+ }
+ while (!ty[j]){
+ j++;
+ }
+ }
+ /*printf("\n\n");
+ printf("%2d ",0);
+ for (j = 0;j <= len_b;j++){
+ printf("%2d ",dp->true_y[j]);
+ }
+ printf("\n\n");
+
+ for (i = 0;i <= len_a;i++){
+ printf("%2d ",dp->true_x[i]);
+ for (j = 0;j <= len_b;j++){
+ printf("%2d ",dp->tb[i][j]);
+ }
+ printf("\n");
+ }
+ printf("\n\n");*/
+
+ tx[0] = 2;
+ ty[0] = 2;
+ /*i = 0;
+ for (j = 0;j <= len_a;j++){
+ fprintf(stderr,"%d",dp->true_x[j]);
+ if (dp->true_x[j] == 2){
+ i++;
+ }
+ }
+ fprintf(stderr,"\n%d\n",i);
+
+ i = 0;
+ for (j = 0;j <= len_b;j++){
+ fprintf(stderr,"%d",dp->true_y[j]);
+ if (dp->true_y[j] == 2){
+ i++;
+ }
+ }
+ fprintf(stderr,"\n%d\n",i);*/
+ //exit(0);
+
+ return dp;
+}
+
+void add_ptm2(int** matches[],struct dp_matrix *dp,int a,int b)
+{
+ register int i,j,c;
+ int* posa = 0;
+ int* posb = 0;
+ int* mc = 0;
+ int *tx = 0;
+ int *ty = 0;
+ int** matrix = 0;
+ matrix = dp->m;
+ tx = dp->true_x;
+ ty = dp->true_y;
+ for (c =8000;c--;){
+ if (matches[c]){
+ if (matches[c][a][0]!= 1){
+ if (matches[c][b][0]!= 1){
+ posa = matches[c][a];
+ posb = matches[c][b];
+ for (i = posa[0];--i;){
+ mc = matrix[posa[i]];
+ tx[posa[i]] = 1;
+ for (j = posb[0];--j;){
+ mc[posb[j]] += 1;
+
+ ty[posb[j]] = 1;
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+void add_ptm(int** matches[],int** matrix,int a,int b)
+{
+ register int i,j,c;
+ int* posa = 0;
+ int* posb = 0;
+ int* mc = 0;
+ for (c =8000;c--;){
+ if (matches[c]){
+ if (matches[c][a][0]!= 1){
+ if (matches[c][b][0]!= 1){
+ posa = matches[c][a];
+ posb = matches[c][b];
+ for (i = posa[0];--i;){
+ mc = matrix[posa[i]];
+ for (j = posb[0];--j;){
+ mc[posb[j]] += 1;
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+int** read_matrix(short *matrix_pointer,int** subm,int gpo)
+{
+ char *amino_acid_order = "ABCDEFGHIKLMNPQRSTVWXYZ";
+ int i,j;
+ int m_pos = 0;
+ m_pos = 0;
+ for (i = 0;i < 23;i++){
+ for (j = 0;j <= i;j++){
+ if (i == j){
+ subm[amino_acid_order[i]-65][amino_acid_order[j]-65] += matrix_pointer[m_pos];
+ }else{
+ subm[amino_acid_order[i]-65][amino_acid_order[j]-65] += matrix_pointer[m_pos];
+ subm[amino_acid_order[j]-65][amino_acid_order[i]-65] += matrix_pointer[m_pos];
+ }
+ m_pos++;
+ }
+ }
+ for (i = 26;i--;){
+ subm[i][9] = -gpo;
+ }
+ return subm;
+}
+
+int* upgma(double **dm,int* tree)
+{
+ int i,j,t;
+ int *as = 0;
+ double max;
+ int node_a = 0;
+ int node_b = 0;
+ int cnode = 0;
+ cnode = numseq;
+ as = tmalloc (sizeof(int)*numprofiles);
+ for (i = numseq; i--;){
+ as[i] = 1;
+ }
+ for (i = numseq; i < numprofiles;i++){
+ as[i] = 0;
+ }
+ t = 0;
+ while (cnode != numprofiles){
+ max = -INFTY;
+ for (i = 0;i < numprofiles; i++){
+ if (as[i]){
+ for ( j = i +1;j < numprofiles;j++){
+ if (as[j]){
+ if (dm[i][j] > max){
+ max = dm[i][j];
+ node_a = i;
+ node_b = j;
+ }
+ }
+ }
+ }
+ }
+ /*deactivate sequences to be joined*/
+ as[node_a] = 0;
+ as[node_b] = 0;
+ tree[t] = node_a;
+ tree[t+1] = node_b;
+ /*calculate new distances*/
+ for (i = numprofiles;i--;){
+ if (as[i]){
+ dm[i][cnode] = (node_a < i)?dm[node_a][i]:dm[i][node_a];
+ dm[i][cnode] += (node_b < i)?dm[node_b][i]:dm[i][node_b];
+ dm[i][cnode] /= 2;
+ }
+ }
+ as[cnode] = 1;
+ cnode++;
+ t += 2;
+ }
+ free(as);
+ return tree;
+}
+
+double distance_calculation2(int** matches[],int len_a,int len_b,int a,int b)//,int size)
+{
+ double d = 0;
+ register int c;
+ for (c = 8000;c--;){
+ if (matches[c]){
+ if ((matches[c][a][0]&0x0000ffff)-1){
+ if ((matches[c][b][0]&0x0000ffff)-1){
+ d++;
+ }
+ }
+ }
+ }
+ if (len_a > len_b){
+ d /= (double)len_b;
+ }else{
+ d /= (double)len_a;
+ }
+ return d;
+}
+
+
+double distance_calculation(int** matches[],int len_a,int len_b,int a,int b)//,int size)
+{
+ double d = 0;
+ register int i,j,c,tmp1,tmp2;
+ int* diagonal = 0;
+ int* p1 = 0;
+ int* p2 = 0;
+ int *p = 0;
+ diagonal = tmalloc(sizeof(int)*(len_a+len_b));
+ for(i = len_a + len_b;i--;){
+ diagonal[i] = 0;
+ }
+ diagonal += len_a;
+ for (c = 8000;c--;){
+ if (matches[c]){
+ if ((matches[c][a][0]&0x0000ffff)-1){
+ p1 = matches[c][a];
+ tmp1 = (p1[0] >> 16);
+ if ((matches[c][b][0]&0x0000ffff)-1){
+ p2 = matches[c][b];
+ tmp2 = tmp1 * (p2[0]>>16);
+ for (i = p1[0] & 0x0000ffff;--i;){
+ p = diagonal - p1[i];
+ for (j = p2[0] & 0x0000ffff;--j;){
+ p[p2[j]] += tmp2;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ diagonal -= len_a;
+ c = 0; //min
+ for (i = 3;i--;){
+ for(j = len_a+len_b;j--;){
+ //fprintf(stdout," %d c:%d\n",diagonal[j],c);
+ if (diagonal[j]){
+ if(diagonal[j] > (c & 0x0000ffff)){
+ c = diagonal[j]| (j<<16);
+ //fprintf(stderr,"c:%d %d\n",c &0x0000ffff, c >>16);
+ }
+ }
+ }
+ if (c){
+ d += c & 0x0000ffff;
+ diagonal[c>>16] = 0;
+ c = 0;
+ }
+ }
+ free(diagonal);
+ return d;
+}
+
+void fill_hash(struct sequence_info* si,int** matches[],int nowu)
+{
+ int i,j,c,f;
+ int key = 0;
+ int *sp = 0;
+ int aacode[26] = {0,0,1,2,3,4,5,6,7,-1,8,9,10,11,-1,12,13,14,15,16,-1,17,18,0,19,0};
+ int aadecode[20] = {0,2,3,4,5,6,7,8,10,11,12,13,15,16,17,18,19,21,22,24};
+ int patterns[30] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+ int **out = 0;
+ int map[10] = {0,0,0,0,0,0,0,0,0,0};
+ for (i = numseq;i--;){
+ sp = si->s[i];
+ for(j = si->sl[i]-2;j--;){
+ key = ((aacode[sp[j]]*20)+aacode[sp[j+1]])*20+aacode[sp[j+2]];
+ //fprintf(stderr,"Pattern:%c%c%c matches in %d at %d\n",sp[j]+65,sp[j+1]+65,sp[j+2]+65,i,j);
+ if (!matches[key]){
+ matches[key] = tmalloc(sizeof(int*)*numprofiles);
+ for (c = numprofiles;c--;){
+ matches[key][c] = 0;
+ }
+ }
+ if (!matches[key][i]){
+ matches[key][i] = tmalloc(sizeof(int)*10);
+ matches[key][i][0] = 1;
+ }
+ if (!(matches[key][i][0] %10)){
+ matches[key][i] = realloc(matches[key][i],(matches[key][i][0]+10) * sizeof(int));
+ }
+ matches[key][i][matches[key][i][0]] = j;
+ matches[key][i][0] += 1;
+ }
+ }
+ for (i = 8000;i--;){
+ if (matches[i]){
+ for (j = numseq;j--;){
+ if (matches[i][j]){
+ matches[i][j][0] |= 0x00040000;
+ }
+ }
+ }
+ }
+ if(nowu){
+ for (i = 8000;i--;){
+ if (matches[i]){
+ for (j = numseq;j--;){
+ if (!matches[i][j]){
+ matches[i][j] = tmalloc(sizeof(int)*1);
+ matches[i][j][0] = 1;
+ }
+ }
+ }
+ }
+ }else{
+
+ c = 0;
+ for (i = numseq;i--;){
+ for (j = 8000;j--;){
+ if (matches[j]){
+ if(!matches[j][i]){
+ map[c] = j;
+ key = j;
+ patterns[c*3 + 2] = aadecode[key / 400];
+ key %= 400;
+ patterns[c*3 + 1] = aadecode[key /20];
+ key %= 20;
+ patterns[c*3 + 0] = aadecode[key];
+ c++;
+ }
+ if(c == 10){
+ //start of 10x Wu-Manber;
+ out = ten_wu_manber(si->s[i],si->sl[i],patterns);
+ for (f = 0;f < 10;f++){
+ matches[map[f]][i] = out[f];
+ matches[map[f]][i][0] |= 0x00010000;
+ }
+ //free(out);
+ c = 0;
+ }
+ }
+ }
+ if (c){
+ for (f = c*3;f < 30;f++){
+ patterns[f] = 9;
+ }
+ out = ten_wu_manber(si->s[i],si->sl[i],patterns);
+ for (f = 0;f < c;f++){
+ matches[map[f]][i] = out[f];
+ matches[map[f]][i][0] |= 0x00010000;
+ }
+ //free(out);
+ c = 0;
+ }
+ }
+ }
+}
+
+void* tmalloc(int size){
+ void* p;
+ p = (void*)malloc(size);
+ if (!p){
+ fprintf(stderr,"Out of memory!\n- try running Kalign with the -f option enabled\n");
+ exit(0);
+ }
+ return p;
+}
+
+int** ten_wu_manber(int* seq,int len,int p[])
+{
+ unsigned int Tc = 0;
+ unsigned int Tc2 = 0;
+ register unsigned int R0 = 0;
+ register unsigned int R1= 153391689;//1;
+ register unsigned int S0 = 0;
+ register unsigned int S1 = 0;
+ unsigned int T[26] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+ unsigned int T2[26] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+ int **out = 0;
+ out = tmalloc(sizeof(int*)*10);
+ for (Tc = 10;Tc--;){
+ out[Tc] = tmalloc(sizeof(int)*10);
+ out[Tc][0] = 1;
+ }
+
+ for (Tc = 0; Tc < 30;Tc++){
+ T[p[Tc]] |= 1 << Tc;
+ T2[p[Tc]] = T[p[Tc]]&153391689;//MASK;
+ }
+ while(--len){
+ //fprintf(stderr,"%d\n",len);
+ Tc = T[seq[len]];
+ Tc2 = T2[seq[len]];
+ S0 <<= 1;
+ S0 |= 153391689;//MASK;
+ S0 &= Tc;
+ S0 |= Tc2;
+ //S1 = ((R1 << 1) & Tc) | ((R0 | S0) << 1) | R0;//| 1;
+ S1 = (R1 << 1) & Tc;//| 1;
+ S1 |= (R0 | S0) << 1;
+ S1 |= R0;
+ S1 |= 153391689;
+
+ //S1 = (R1 << 1) & (Tc) | ((R0 | S0) << 1) | (R0) | (153391689);
+
+ if (S1 & 0x24924924){
+ if (S1 & 0x00004924){
+ if (S1 & 0x00000024){
+ if (S1 & 4){
+ if (!(out[0][0] %10)){
+ out[0] = realloc(out[0],(out[0][0]+10) * sizeof(int));
+ }
+ out[0][out[0][0]] = len;
+ out[0][0]++;
+ }
+ if (S1 & 32){
+ if (!(out[1][0] %10)){
+ out[1] = realloc(out[1],(out[1][0]+10) * sizeof(int));
+ }
+ out[1][out[1][0]] = len;
+ out[1][0]++;
+ }
+ }
+ if (S1 & 0x00004900){
+ if (S1 & 256){
+ if (!(out[2][0] %10)){
+ out[2] = realloc(out[2],(out[2][0]+10) * sizeof(int));
+ }
+ out[2][out[2][0]] = len;
+ out[2][0]++;
+ }
+ if (S1 & 2048){
+ if (!(out[3][0] %10)){
+ out[3] = realloc(out[3],(out[3][0]+10) * sizeof(int));
+ }
+ out[3][out[3][0]] = len;
+ out[3][0]++;
+ }
+ if (S1 & 16384){
+ if (!(out[4][0] %10)){
+ out[4] = realloc(out[4],(out[4][0]+10) * sizeof(int));
+ }
+ out[4][out[4][0]] = len;
+ out[4][0]++;
+ }
+ }
+ }
+
+
+
+ if (S1 & 0x24920000){
+ if (S1 & 0x00920000){
+ if (S1 & 131072){
+ if (!(out[5][0] %10)){
+ out[5] = realloc(out[5],(out[5][0]+10) * sizeof(int));
+ }
+ out[5][out[5][0]] = len;
+ out[5][0]++;
+ }
+ if (S1 & 1048576){
+ if (!(out[6][0] %10)){
+ out[6] = realloc(out[6],(out[6][0]+10) * sizeof(int));
+ }
+ out[6][out[6][0]] = len;
+ out[6][0]++;
+ }
+ if (S1 & 8388608){
+ if (!(out[7][0] %10)){
+ out[7] = realloc(out[7],(out[7][0]+10) * sizeof(int));
+ }
+ out[7][out[7][0]] = len;
+ out[7][0]++;
+ }
+ }
+
+ if (S1 & 0x24000000){
+ if (S1 & 67108864){
+ if (!(out[8][0] %10)){
+ out[8] = realloc(out[8],(out[8][0]+10) * sizeof(int));
+ }
+ out[8][out[8][0]] = len;
+ out[8][0]++;
+ }
+ if (S1 & 536870912){
+ if (!(out[9][0] %10)){
+ out[9] = realloc(out[9],(out[9][0]+10) * sizeof(int));
+ }
+ out[9][out[9][0]] = len;
+ out[9][0]++;
+ }
+ }
+ }
+ }
+ R0 = S0;
+ R1 = S1;
+ }
+ return out;
+}
+
+void update_hash(struct sequence_info* si,int** matches[],int a,int b,int new)
+{
+ int n,i,j,c;
+ int nma;
+ int nmb;
+ int ma = 0;
+ int mb = 0;
+ int comp;
+ int* rela = 0;
+ int* relb = 0;
+ rela = si->relpos[a];
+ relb = si->relpos[b];
+ /*fprintf(stderr,"LEN%d:%d\n",a,si->sl[a]);
+ for (i = 0; i < si->sl[a]-1;i++){
+ fprintf(stderr,"%d ",si->relpos[a][i]);
+ //if (si->relpos[a][i] > si->relpos[a][i+1]){
+ // exit(0);
+ //}
+ }
+ fprintf(stderr,"\n");*/
+ //exit(0);
+ for (n = 8000;n--;){
+ if(matches[n]){
+ if ((nma = matches[n][a][0]-1)){
+ if((nmb = matches[n][b][0]-1)){
+ //fprintf(stderr,"UPDATING:\n");
+ //fprintf(stderr,"nma:%d nmb:%d\n",nma,nmb);
+ matches[n][new] = tmalloc(sizeof(int) * (nma+nmb+1));
+ //matches[n][new][0] = nma+nmb+1;
+ /*fprintf(stderr,"A:");
+ for (i = 1;i <= nma;i++){
+ fprintf(stderr,"%d:%d ",rela[matches[n][a][i] & 0x0000ffff],matches[n][a][i]>>16);
+ }
+ fprintf(stderr,"\n");
+ fprintf(stderr,"B:");
+ for (i = 1;i <= nmb;i++){
+ fprintf(stderr,"%d:%d ",relb[matches[n][b][i]& 0x0000ffff],matches[n][b][i]>>16);
+ }
+ fprintf(stderr,"\n");*/
+
+ i = 1;
+ j = 1;
+ c = 1;
+ while (i <= nma && j <= nmb ){
+ ma = matches[n][a][i];
+ mb = matches[n][b][j];
+ comp = (rela[ma] > relb[mb]) - (rela[ma] < relb[mb]);
+ switch(comp){
+ case 0:
+ matches[n][new][c] = rela[ma];
+ i++;
+ j++;
+ // c++;
+ break;
+ case 1:
+ matches[n][new][c] = rela[ma];
+ i++;
+ break;
+ case -1:
+ matches[n][new][c] = relb[mb];
+ j++;
+ break;
+ }
+ c++;
+ }
+ ma = matches[n][a][i];
+ mb = matches[n][b][j];
+ //fprintf(stderr,"c:%d i:%d j:%d\n",c,i,j);
+ while (i <= nma){
+ ma = matches[n][a][i];
+ //matches[n][new][c] =rela[ma & 0x0000ffff] | (ma & 0xffff0000);
+ matches[n][new][c] =rela[ma];// | (ma & 0xffff0000);
+ i++;
+ c++;
+ }
+ while (j <= nmb){
+ mb = matches[n][b][j];
+ //matches[n][new][c] =relb[mb & 0x0000ffff] | (mb & 0xffff0000);
+ matches[n][new][c] =relb[mb];// | (mb & 0xffff0000);
+ j++;
+ c++;
+ }
+ matches[n][new][0] = c;
+ /*fprintf(stderr,"N:");
+ for (i = 0;i < c;i++){
+ fprintf(stderr,"%d:%d ",matches[n][new][i]& 0x0000ffff,matches[n][new][i]>>16);
+ }
+ fprintf(stderr,"\n");*/
+ //exit(0);
+ free(matches[n][a]);
+ free(matches[n][b]);
+
+
+ }
+ }
+ if(!matches[n][new]){
+ //fprintf(stderr,"SHIT\n");
+ matches[n][new] = tmalloc (sizeof(int)*1);
+ matches[n][new][0] = 1;
+ }
+
+ }
+ }
+}
+
+char* get_input_into_string(char* string,char* infile)
+{
+ int i = 0;
+ int string_length = 2;
+ char c = 0;
+ FILE *file = 0;
+ if(infile){
+ if (!(file = fopen( infile, "r" ))){
+ fprintf(stderr,"Cannot open file '%s'\n", infile);
+ exit(1);
+ }
+ if (fseek(file,0,SEEK_END) != 0){
+ (void)fprintf(stderr, "ERROR: fseek failed\n");
+ (void)exit(EXIT_FAILURE);
+ }
+ i= ftell (file);
+ if (fseek(file,0,SEEK_START) != 0){
+ (void)fprintf(stderr, "ERROR: fseek failed\n");
+ (void)exit(EXIT_FAILURE);
+ }
+ string = tmalloc ((i+1)* sizeof(char));
+ fread(string,sizeof(char), i, file);
+ string[i] = 0;
+ }else{
+ if (!isatty(0)){
+ string = malloc(sizeof(char*)*string_length);
+ while (!feof (stdin)){
+ c = getc(stdin);
+ if (i == string_length){
+ string_length <<=1;
+ string = realloc(string,sizeof(char)*string_length);
+ }
+ string[i] = c;
+ i++;
+ }
+ string[i] = 0;
+ }else{
+ return 0;
+ }
+ }
+ return string;
+}
+
+struct sequence_info* read_sequences(struct sequence_info* si,char* string)
+{
+ int c = 0;
+ int n = 0;
+ int i = 0;
+ int j = 0;
+ int stop = 0;
+ int nbytes;
+ nbytes = strlen(string);
+
+ si = (struct sequence_info *) tmalloc(sizeof(struct sequence_info));
+ for (i =0;i < nbytes;i++){
+ if (string[i] == '>'&& stop == 0){
+ stop = 1;
+ numseq++;
+ }
+ if (string[i] == '\n'){
+ stop = 0;
+ }
+ }
+
+ numprofiles = (numseq << 1) - 1;
+ si->s = tmalloc(sizeof(int*) * numseq);
+ si->sl = tmalloc(sizeof(int) * numprofiles);
+ si->sip = tmalloc(sizeof(int*)* numprofiles);
+ si->nsip = tmalloc(sizeof(int)* numprofiles);
+ si->sn = tmalloc(sizeof(int*) * numseq);
+ si->lsn = tmalloc(sizeof(int) * numseq);
+ si->gis = tmalloc(sizeof(int*) * numprofiles);
+ si->relpos = tmalloc(sizeof(int*) * numprofiles);
+ j = 0;
+ for (i =0;i < nbytes;i++){
+ if (string[i] == '>' && stop == 0){
+ stop = 1;
+ si->sl[j] =c;
+ j++;
+ c = 0;
+ }
+ if (string[i] == '\n'){
+ if(stop == 1){
+ si->lsn[j-1] = n;
+ n = 0;
+ }
+ stop = 0;
+
+ }
+ if (stop == 1 && string[i] != '\n' && string[i] != 0 ){
+ n++;
+ }
+ if (stop == 0 && string[i] != '\n' && string[i] != 0 ){
+ //if (string[i] >64){
+ if (isalpha((int)string[i])){
+ c++;
+ }
+ }
+ }
+ si->sl[j] = c;
+ for (i =1;i < numseq+1;i++){si->sl[i-1] = si->sl[i];}
+ for (i = numseq;i--;){
+ si->s[i] = tmalloc(sizeof(int)*(si->sl[i]+1));
+ si->sn[i] = tmalloc(sizeof(int)*(si->lsn[i]+1));
+ si->gis[i] = tmalloc(sizeof(int)*(si->sl[i]+1));
+ si->relpos[i] = tmalloc(sizeof(int)*(si->sl[i]+1));
+ si->sip[i] = tmalloc(1* sizeof(int));
+ si->nsip[i] = 1;
+ si->sip[i][0] = i;
+ for(j = si->sl[i]+1;j--;){
+ si->gis[i][j] = 0;
+ si->relpos[i][j] = j;
+ }
+ }
+ j =0;
+ for (i =0;i < nbytes;i++){
+ if (string[i] == '>'&& stop == 0 ){
+ stop = 1;
+ j++;
+ c = 0;
+ }
+ if (string[i] == '\n'){
+ if(stop == 1){
+ n = 0;
+ }
+ stop = 0;
+ }
+ if (stop == 1 &&string[i] != '\n' && string[i] != 0 ){
+ si->sn[j-1][n] = string[i];
+ n++;
+ }
+ if (stop == 0 && string[i] != '\n' && string[i] != 0 ){
+ //if(string[i] > 64){
+ if(isalpha((int)string[i])){
+ si->s[j-1][c] = toupper(string[i])-65;
+ c++;
+ }
+ }
+ }
+ for (i =0;i< numseq;i++){
+ si->s[i][si->sl[i]] = 0;
+ si->sn[i][si->lsn[i]] = 0;
+ }
+ free(string);
+ return si;
+}
+
+struct dp_matrix* dp_matrix_init(struct dp_matrix *dp,int x,int y)
+{
+ int** mx = 0;
+ //int** tb = 0;
+ int* mxp = 0;
+ //int* tbp = 0;
+ int* tx = 0;
+ int* ty = 0;
+ register int i,j;
+
+ tx = dp->true_x;
+ ty = dp->true_y;
+ //tb = dp->tb;
+ mx = dp->m;
+ for (i = x+1;i--;){
+ //tbp = tb[i];
+ mxp = mx[i];
+ tx[i] = 0;
+ for (j = y+1;j--;){
+ // tbp[j] = 0;
+ mxp[j] = 0;
+ }
+ }
+ for (j = y+1;j--;){
+ ty[j] = 0;
+ }
+ return dp;
+}
+
+struct dp_matrix* dp_matrix_realloc(struct dp_matrix *dp,int x,int y,int p)
+{
+ int i;
+ if ( x > dp->x || y > dp->y){
+ //printf("REALLOCING:%d-%d %d-%d\n",x,y,dp->x,dp->y);
+ i = 1;
+ while (i <= y){
+ i <<= 1;
+ // printf("i:%d y:%d\n",i,y);
+ }
+ y = i-1;
+ i = 1;
+ while (i <= x){
+ i <<= 1;
+ //printf("i:%d y:%d\n",i,y);
+ }
+ x = i-1;
+ //printf("NEWX:%d NEWY:%d\n",x,y);
+ dp->a = (int*) realloc(dp->a,sizeof(int) * (y+1));
+ dp->ga = (int*) realloc(dp->ga,sizeof(int) * (y+1));
+ dp->gb = (int*) realloc(dp->gb,sizeof(int) * (y+1));
+ dp->tb = (int**) realloc (dp->tb,sizeof (int*)*(x+1));
+ dp->tb_mem = (void*) realloc(dp->tb_mem,sizeof(int) * (x+1) * (y+1));
+ if(p){
+ dp->m = (int**) realloc (dp->m,sizeof (int*) * (x+1));
+ dp->m_mem = (void*) realloc(dp->m_mem,sizeof(int) * (x+1) * (y+1));
+ dp->tb[0] = (int*) dp->tb_mem;
+ dp->m[0] = (int *) dp->m_mem;
+ for (i = 1; i <= x; i++){
+ dp->tb[i] = dp->tb[0] +(i*(y+1));
+ dp->m[i] = dp->m[0] +(i*(y+1));
+ }
+ }else{
+ dp->tb[0] = (int*) dp->tb_mem;
+ for (i = 1; i <= x; i++){
+ dp->tb[i] = dp->tb[0] +(i*(y+1));
+ }
+ }
+ dp->true_x = realloc(dp->true_x,sizeof(int) * (x+1));
+ dp->true_y = realloc(dp->true_y,sizeof(int) * (y+1));
+ dp->x = x;
+ dp->y = y;
+ }
+ return dp;
+}
+
+struct dp_matrix* dp_matrix_alloc(struct dp_matrix *dp,int x,int y,int p)
+{
+ int i;
+ dp = (struct dp_matrix *) tmalloc(sizeof(struct dp_matrix));
+ dp->x = x;
+ dp->y = y;
+ dp->a = (int*) tmalloc(sizeof(int)* (y+1));
+ dp->ga = (int*) tmalloc(sizeof(int)* (y+1));
+ dp->gb = (int*) tmalloc(sizeof(int)* (y+1));
+ dp->true_x = (int*) tmalloc(sizeof(int) * (x+1));
+ dp->true_y = (int*) tmalloc(sizeof(int) * (y+1));
+ dp->tb = (int**) tmalloc(sizeof(int*) * (x+1));
+ dp->tb_mem = (void *) tmalloc(sizeof(int) * (x+1) * (y+1));
+ if (p){
+ dp->m = (int**) tmalloc(sizeof(int*) * (x+1));
+ dp->m_mem = (void *) tmalloc(sizeof(int) * (x+1) * (y+1));
+ dp->tb[0] = (int*) dp->tb_mem;
+ dp->m[0] = (int*) dp->m_mem;
+ for ( i = 1; i <= x;i++){
+ dp->tb[i] = dp->tb[0] +(i*(y+1));
+ dp->m[i] = dp->m[0] +(i*(y+1));
+ }
+ }else{
+ dp->tb[0] = (int*) dp->tb_mem;
+ for ( i = 1; i <= x;i++){
+ dp->tb[i] = dp->tb[0] +(i*(y+1));
+ }
+ }
+ return dp;
+}
+
+void dp_matrix_free(struct dp_matrix *dp,int p )
+{
+ free(dp->a);
+ free(dp->ga);
+ free(dp->gb);
+ free(dp->true_x);
+ free(dp->true_y);
+ free(dp->tb);
+ free(dp->tb_mem);
+ if (p){
+ free(dp->m);
+ free(dp->m_mem);
+ }
+ free(dp);
+}
More information about the debian-med-commit
mailing list