[med-svn] [sumaclust] 04/06: New upstream version 1.0.20
Andreas Tille
tille at debian.org
Wed Oct 18 17:47:08 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository sumaclust.
commit e6bd80faf1d942e92521d040b4aeb2d341bf3a94
Author: Andreas Tille <tille at debian.org>
Date: Wed Oct 18 19:45:25 2017 +0200
New upstream version 1.0.20
---
._global.mk | Bin 0 -> 171 bytes
Licence_CeCILL_V2-en.txt | 506 +++++++
Licence_CeCILL_V2-fr.txt | 512 ++++++++
Makefile | 52 +
debian/changelog | 12 -
debian/compat | 1 -
debian/control | 27 -
debian/copyright | 519 --------
debian/doc-base | 18 -
debian/docs | 1 -
debian/install | 1 -
debian/manpages | 1 -
debian/patches/hardening.patch | 15 -
debian/patches/series | 1 -
debian/rules | 26 -
debian/source/format | 1 -
debian/sumaclust.1 | 88 --
debian/watch | 9 -
global.mk | 45 +
mtcompare_sumaclust.c | 334 +++++
mtcompare_sumaclust.h | 15 +
sumaclust.c | 1083 +++++++++++++++
sumaclust.h | 34 +
sumaclust_user_manual.pdf | Bin 0 -> 79732 bytes
sumalibs/._global.mk | Bin 0 -> 171 bytes
sumalibs/Licence_CeCILL_V2-en.txt | 506 +++++++
sumalibs/Licence_CeCILL_V2-fr.txt | 512 ++++++++
sumalibs/global.mk | 10 +
sumalibs/libfasta/._Makefile | Bin 0 -> 171 bytes
sumalibs/libfasta/Makefile | 33 +
sumalibs/libfasta/fasta_header_handler.c | 126 ++
sumalibs/libfasta/fasta_header_handler.h | 23 +
sumalibs/libfasta/fasta_header_parser.c | 1960 ++++++++++++++++++++++++++++
sumalibs/libfasta/fasta_header_parser.h | 13 +
sumalibs/libfasta/fasta_header_parser.l | 178 +++
sumalibs/libfasta/fasta_seq_writer.c | 76 ++
sumalibs/libfasta/fasta_seq_writer.h | 19 +
sumalibs/libfasta/header_mem_handler.c | 93 ++
sumalibs/libfasta/header_mem_handler.h | 22 +
sumalibs/libfasta/sequence.c | 454 +++++++
sumalibs/libfasta/sequence.h | 64 +
sumalibs/libfile/._Makefile | Bin 0 -> 171 bytes
sumalibs/libfile/Makefile | 25 +
sumalibs/libfile/fileHandling.c | 92 ++
sumalibs/libfile/fileHandling.h | 20 +
sumalibs/liblcs/._Makefile | Bin 0 -> 171 bytes
sumalibs/liblcs/Makefile | 25 +
sumalibs/liblcs/_lcs.ext.1.c | 168 +++
sumalibs/liblcs/_lcs.ext.2.c | 34 +
sumalibs/liblcs/_lcs.ext.3.c | 34 +
sumalibs/liblcs/_lcs.h | 29 +
sumalibs/liblcs/_lcs_fast.h | 597 +++++++++
sumalibs/liblcs/banded_LCS_alignment.c | 211 +++
sumalibs/liblcs/banded_LCS_alignment.h | 9 +
sumalibs/liblcs/sse_banded_LCS_alignment.c | 724 ++++++++++
sumalibs/liblcs/sse_banded_LCS_alignment.h | 24 +
sumalibs/liblcs/upperband.c | 382 ++++++
sumalibs/liblcs/upperband.h | 18 +
sumalibs/libsse/_sse.h | 961 ++++++++++++++
sumalibs/libutils/._Makefile | Bin 0 -> 171 bytes
sumalibs/libutils/Makefile | 25 +
sumalibs/libutils/debug.c | 32 +
sumalibs/libutils/debug.h | 25 +
sumalibs/libutils/utilities.c | 230 ++++
sumalibs/libutils/utilities.h | 56 +
65 files changed, 10391 insertions(+), 720 deletions(-)
diff --git a/._global.mk b/._global.mk
new file mode 100644
index 0000000..6adb72e
Binary files /dev/null and b/._global.mk differ
diff --git a/Licence_CeCILL_V2-en.txt b/Licence_CeCILL_V2-en.txt
new file mode 100644
index 0000000..fcc8df2
--- /dev/null
+++ b/Licence_CeCILL_V2-en.txt
@@ -0,0 +1,506 @@
+
+CeCILL FREE SOFTWARE LICENSE AGREEMENT
+
+
+ Notice
+
+This Agreement is a Free Software license agreement that is the result
+of discussions between its authors in order to ensure compliance with
+the two main principles guiding its drafting:
+
+ * firstly, compliance with the principles governing the distribution
+ of Free Software: access to source code, broad rights granted to
+ users,
+ * secondly, the election of a governing law, French law, with which
+ it is conformant, both as regards the law of torts and
+ intellectual property law, and the protection that it offers to
+ both authors and holders of the economic rights over software.
+
+The authors of the CeCILL (for Ce[a] C[nrs] I[nria] L[ogiciel] L[ibre])
+license are:
+
+Commissariat � l'Energie Atomique - CEA, a public scientific, technical
+and industrial research establishment, having its principal place of
+business at 25 rue Leblanc, immeuble Le Ponant D, 75015 Paris, France.
+
+Centre National de la Recherche Scientifique - CNRS, a public scientific
+and technological establishment, having its principal place of business
+at 3 rue Michel-Ange, 75794 Paris cedex 16, France.
+
+Institut National de Recherche en Informatique et en Automatique -
+INRIA, a public scientific and technological establishment, having its
+principal place of business at Domaine de Voluceau, Rocquencourt, BP
+105, 78153 Le Chesnay cedex, France.
+
+
+ Preamble
+
+The purpose of this Free Software license agreement is to grant users
+the right to modify and redistribute the software governed by this
+license within the framework of an open source distribution model.
+
+The exercising of these rights is conditional upon certain obligations
+for users so as to preserve this status for all subsequent redistributions.
+
+In consideration of access to the source code and the rights to copy,
+modify and redistribute granted by the license, users are provided only
+with a limited warranty and the software's author, the holder of the
+economic rights, and the successive licensors only have limited liability.
+
+In this respect, the risks associated with loading, using, modifying
+and/or developing or reproducing the software by the user are brought to
+the user's attention, given its Free Software status, which may make it
+complicated to use, with the result that its use is reserved for
+developers and experienced professionals having in-depth computer
+knowledge. Users are therefore encouraged to load and test the
+suitability of the software as regards their requirements in conditions
+enabling the security of their systems and/or data to be ensured and,
+more generally, to use and operate it in the same conditions of
+security. This Agreement may be freely reproduced and published,
+provided it is not altered, and that no provisions are either added or
+removed herefrom.
+
+This Agreement may apply to any or all software for which the holder of
+the economic rights decides to submit the use thereof to its provisions.
+
+
+ Article 1 - DEFINITIONS
+
+For the purpose of this Agreement, when the following expressions
+commence with a capital letter, they shall have the following meaning:
+
+Agreement: means this license agreement, and its possible subsequent
+versions and annexes.
+
+Software: means the software in its Object Code and/or Source Code form
+and, where applicable, its documentation, "as is" when the Licensee
+accepts the Agreement.
+
+Initial Software: means the Software in its Source Code and possibly its
+Object Code form and, where applicable, its documentation, "as is" when
+it is first distributed under the terms and conditions of the Agreement.
+
+Modified Software: means the Software modified by at least one
+Contribution.
+
+Source Code: means all the Software's instructions and program lines to
+which access is required so as to modify the Software.
+
+Object Code: means the binary files originating from the compilation of
+the Source Code.
+
+Holder: means the holder(s) of the economic rights over the Initial
+Software.
+
+Licensee: means the Software user(s) having accepted the Agreement.
+
+Contributor: means a Licensee having made at least one Contribution.
+
+Licensor: means the Holder, or any other individual or legal entity, who
+distributes the Software under the Agreement.
+
+Contribution: means any or all modifications, corrections, translations,
+adaptations and/or new functions integrated into the Software by any or
+all Contributors, as well as any or all Internal Modules.
+
+Module: means a set of sources files including their documentation that
+enables supplementary functions or services in addition to those offered
+by the Software.
+
+External Module: means any or all Modules, not derived from the
+Software, so that this Module and the Software run in separate address
+spaces, with one calling the other when they are run.
+
+Internal Module: means any or all Module, connected to the Software so
+that they both execute in the same address space.
+
+GNU GPL: means the GNU General Public License version 2 or any
+subsequent version, as published by the Free Software Foundation Inc.
+
+Parties: mean both the Licensee and the Licensor.
+
+These expressions may be used both in singular and plural form.
+
+
+ Article 2 - PURPOSE
+
+The purpose of the Agreement is the grant by the Licensor to the
+Licensee of a non-exclusive, transferable and worldwide license for the
+Software as set forth in Article 5 hereinafter for the whole term of the
+protection granted by the rights over said Software.
+
+
+ Article 3 - ACCEPTANCE
+
+3.1 The Licensee shall be deemed as having accepted the terms and
+conditions of this Agreement upon the occurrence of the first of the
+following events:
+
+ * (i) loading the Software by any or all means, notably, by
+ downloading from a remote server, or by loading from a physical
+ medium;
+ * (ii) the first time the Licensee exercises any of the rights
+ granted hereunder.
+
+3.2 One copy of the Agreement, containing a notice relating to the
+characteristics of the Software, to the limited warranty, and to the
+fact that its use is restricted to experienced users has been provided
+to the Licensee prior to its acceptance as set forth in Article 3.1
+hereinabove, and the Licensee hereby acknowledges that it has read and
+understood it.
+
+
+ Article 4 - EFFECTIVE DATE AND TERM
+
+
+ 4.1 EFFECTIVE DATE
+
+The Agreement shall become effective on the date when it is accepted by
+the Licensee as set forth in Article 3.1.
+
+
+ 4.2 TERM
+
+The Agreement shall remain in force for the entire legal term of
+protection of the economic rights over the Software.
+
+
+ Article 5 - SCOPE OF RIGHTS GRANTED
+
+The Licensor hereby grants to the Licensee, who accepts, the following
+rights over the Software for any or all use, and for the term of the
+Agreement, on the basis of the terms and conditions set forth hereinafter.
+
+Besides, if the Licensor owns or comes to own one or more patents
+protecting all or part of the functions of the Software or of its
+components, the Licensor undertakes not to enforce the rights granted by
+these patents against successive Licensees using, exploiting or
+modifying the Software. If these patents are transferred, the Licensor
+undertakes to have the transferees subscribe to the obligations set
+forth in this paragraph.
+
+
+ 5.1 RIGHT OF USE
+
+The Licensee is authorized to use the Software, without any limitation
+as to its fields of application, with it being hereinafter specified
+that this comprises:
+
+ 1. permanent or temporary reproduction of all or part of the Software
+ by any or all means and in any or all form.
+
+ 2. loading, displaying, running, or storing the Software on any or
+ all medium.
+
+ 3. entitlement to observe, study or test its operation so as to
+ determine the ideas and principles behind any or all constituent
+ elements of said Software. This shall apply when the Licensee
+ carries out any or all loading, displaying, running, transmission
+ or storage operation as regards the Software, that it is entitled
+ to carry out hereunder.
+
+
+ 5.2 ENTITLEMENT TO MAKE CONTRIBUTIONS
+
+The right to make Contributions includes the right to translate, adapt,
+arrange, or make any or all modifications to the Software, and the right
+to reproduce the resulting software.
+
+The Licensee is authorized to make any or all Contributions to the
+Software provided that it includes an explicit notice that it is the
+author of said Contribution and indicates the date of the creation thereof.
+
+
+ 5.3 RIGHT OF DISTRIBUTION
+
+In particular, the right of distribution includes the right to publish,
+transmit and communicate the Software to the general public on any or
+all medium, and by any or all means, and the right to market, either in
+consideration of a fee, or free of charge, one or more copies of the
+Software by any means.
+
+The Licensee is further authorized to distribute copies of the modified
+or unmodified Software to third parties according to the terms and
+conditions set forth hereinafter.
+
+
+ 5.3.1 DISTRIBUTION OF SOFTWARE WITHOUT MODIFICATION
+
+The Licensee is authorized to distribute true copies of the Software in
+Source Code or Object Code form, provided that said distribution
+complies with all the provisions of the Agreement and is accompanied by:
+
+ 1. a copy of the Agreement,
+
+ 2. a notice relating to the limitation of both the Licensor's
+ warranty and liability as set forth in Articles 8 and 9,
+
+and that, in the event that only the Object Code of the Software is
+redistributed, the Licensee allows future Licensees unhindered access to
+the full Source Code of the Software by indicating how to access it, it
+being understood that the additional cost of acquiring the Source Code
+shall not exceed the cost of transferring the data.
+
+
+ 5.3.2 DISTRIBUTION OF MODIFIED SOFTWARE
+
+When the Licensee makes a Contribution to the Software, the terms and
+conditions for the distribution of the resulting Modified Software
+become subject to all the provisions of this Agreement.
+
+The Licensee is authorized to distribute the Modified Software, in
+source code or object code form, provided that said distribution
+complies with all the provisions of the Agreement and is accompanied by:
+
+ 1. a copy of the Agreement,
+
+ 2. a notice relating to the limitation of both the Licensor's
+ warranty and liability as set forth in Articles 8 and 9,
+
+and that, in the event that only the object code of the Modified
+Software is redistributed, the Licensee allows future Licensees
+unhindered access to the full source code of the Modified Software by
+indicating how to access it, it being understood that the additional
+cost of acquiring the source code shall not exceed the cost of
+transferring the data.
+
+
+ 5.3.3 DISTRIBUTION OF EXTERNAL MODULES
+
+When the Licensee has developed an External Module, the terms and
+conditions of this Agreement do not apply to said External Module, that
+may be distributed under a separate license agreement.
+
+
+ 5.3.4 COMPATIBILITY WITH THE GNU GPL
+
+The Licensee can include a code that is subject to the provisions of one
+of the versions of the GNU GPL in the Modified or unmodified Software,
+and distribute that entire code under the terms of the same version of
+the GNU GPL.
+
+The Licensee can include the Modified or unmodified Software in a code
+that is subject to the provisions of one of the versions of the GNU GPL,
+and distribute that entire code under the terms of the same version of
+the GNU GPL.
+
+
+ Article 6 - INTELLECTUAL PROPERTY
+
+
+ 6.1 OVER THE INITIAL SOFTWARE
+
+The Holder owns the economic rights over the Initial Software. Any or
+all use of the Initial Software is subject to compliance with the terms
+and conditions under which the Holder has elected to distribute its work
+and no one shall be entitled to modify the terms and conditions for the
+distribution of said Initial Software.
+
+The Holder undertakes that the Initial Software will remain ruled at
+least by this Agreement, for the duration set forth in Article 4.2.
+
+
+ 6.2 OVER THE CONTRIBUTIONS
+
+The Licensee who develops a Contribution is the owner of the
+intellectual property rights over this Contribution as defined by
+applicable law.
+
+
+ 6.3 OVER THE EXTERNAL MODULES
+
+The Licensee who develops an External Module is the owner of the
+intellectual property rights over this External Module as defined by
+applicable law and is free to choose the type of agreement that shall
+govern its distribution.
+
+
+ 6.4 JOINT PROVISIONS
+
+The Licensee expressly undertakes:
+
+ 1. not to remove, or modify, in any manner, the intellectual property
+ notices attached to the Software;
+
+ 2. to reproduce said notices, in an identical manner, in the copies
+ of the Software modified or not.
+
+The Licensee undertakes not to directly or indirectly infringe the
+intellectual property rights of the Holder and/or Contributors on the
+Software and to take, where applicable, vis-�-vis its staff, any and all
+measures required to ensure respect of said intellectual property rights
+of the Holder and/or Contributors.
+
+
+ Article 7 - RELATED SERVICES
+
+7.1 Under no circumstances shall the Agreement oblige the Licensor to
+provide technical assistance or maintenance services for the Software.
+
+However, the Licensor is entitled to offer this type of services. The
+terms and conditions of such technical assistance, and/or such
+maintenance, shall be set forth in a separate instrument. Only the
+Licensor offering said maintenance and/or technical assistance services
+shall incur liability therefor.
+
+7.2 Similarly, any Licensor is entitled to offer to its licensees, under
+its sole responsibility, a warranty, that shall only be binding upon
+itself, for the redistribution of the Software and/or the Modified
+Software, under terms and conditions that it is free to decide. Said
+warranty, and the financial terms and conditions of its application,
+shall be subject of a separate instrument executed between the Licensor
+and the Licensee.
+
+
+ Article 8 - LIABILITY
+
+8.1 Subject to the provisions of Article 8.2, the Licensee shall be
+entitled to claim compensation for any direct loss it may have suffered
+from the Software as a result of a fault on the part of the relevant
+Licensor, subject to providing evidence thereof.
+
+8.2 The Licensor's liability is limited to the commitments made under
+this Agreement and shall not be incurred as a result of in particular:
+(i) loss due the Licensee's total or partial failure to fulfill its
+obligations, (ii) direct or consequential loss that is suffered by the
+Licensee due to the use or performance of the Software, and (iii) more
+generally, any consequential loss. In particular the Parties expressly
+agree that any or all pecuniary or business loss (i.e. loss of data,
+loss of profits, operating loss, loss of customers or orders,
+opportunity cost, any disturbance to business activities) or any or all
+legal proceedings instituted against the Licensee by a third party,
+shall constitute consequential loss and shall not provide entitlement to
+any or all compensation from the Licensor.
+
+
+ Article 9 - WARRANTY
+
+9.1 The Licensee acknowledges that the scientific and technical
+state-of-the-art when the Software was distributed did not enable all
+possible uses to be tested and verified, nor for the presence of
+possible defects to be detected. In this respect, the Licensee's
+attention has been drawn to the risks associated with loading, using,
+modifying and/or developing and reproducing the Software which are
+reserved for experienced users.
+
+The Licensee shall be responsible for verifying, by any or all means,
+the suitability of the product for its requirements, its good working
+order, and for ensuring that it shall not cause damage to either persons
+or properties.
+
+9.2 The Licensor hereby represents, in good faith, that it is entitled
+to grant all the rights over the Software (including in particular the
+rights set forth in Article 5).
+
+9.3 The Licensee acknowledges that the Software is supplied "as is" by
+the Licensor without any other express or tacit warranty, other than
+that provided for in Article 9.2 and, in particular, without any warranty
+as to its commercial value, its secured, safe, innovative or relevant
+nature.
+
+Specifically, the Licensor does not warrant that the Software is free
+from any error, that it will operate without interruption, that it will
+be compatible with the Licensee's own equipment and software
+configuration, nor that it will meet the Licensee's requirements.
+
+9.4 The Licensor does not either expressly or tacitly warrant that the
+Software does not infringe any third party intellectual property right
+relating to a patent, software or any other property right. Therefore,
+the Licensor disclaims any and all liability towards the Licensee
+arising out of any or all proceedings for infringement that may be
+instituted in respect of the use, modification and redistribution of the
+Software. Nevertheless, should such proceedings be instituted against
+the Licensee, the Licensor shall provide it with technical and legal
+assistance for its defense. Such technical and legal assistance shall be
+decided on a case-by-case basis between the relevant Licensor and the
+Licensee pursuant to a memorandum of understanding. The Licensor
+disclaims any and all liability as regards the Licensee's use of the
+name of the Software. No warranty is given as regards the existence of
+prior rights over the name of the Software or as regards the existence
+of a trademark.
+
+
+ Article 10 - TERMINATION
+
+10.1 In the event of a breach by the Licensee of its obligations
+hereunder, the Licensor may automatically terminate this Agreement
+thirty (30) days after notice has been sent to the Licensee and has
+remained ineffective.
+
+10.2 A Licensee whose Agreement is terminated shall no longer be
+authorized to use, modify or distribute the Software. However, any
+licenses that it may have granted prior to termination of the Agreement
+shall remain valid subject to their having been granted in compliance
+with the terms and conditions hereof.
+
+
+ Article 11 - MISCELLANEOUS
+
+
+ 11.1 EXCUSABLE EVENTS
+
+Neither Party shall be liable for any or all delay, or failure to
+perform the Agreement, that may be attributable to an event of force
+majeure, an act of God or an outside cause, such as defective
+functioning or interruptions of the electricity or telecommunications
+networks, network paralysis following a virus attack, intervention by
+government authorities, natural disasters, water damage, earthquakes,
+fire, explosions, strikes and labor unrest, war, etc.
+
+11.2 Any failure by either Party, on one or more occasions, to invoke
+one or more of the provisions hereof, shall under no circumstances be
+interpreted as being a waiver by the interested Party of its right to
+invoke said provision(s) subsequently.
+
+11.3 The Agreement cancels and replaces any or all previous agreements,
+whether written or oral, between the Parties and having the same
+purpose, and constitutes the entirety of the agreement between said
+Parties concerning said purpose. No supplement or modification to the
+terms and conditions hereof shall be effective as between the Parties
+unless it is made in writing and signed by their duly authorized
+representatives.
+
+11.4 In the event that one or more of the provisions hereof were to
+conflict with a current or future applicable act or legislative text,
+said act or legislative text shall prevail, and the Parties shall make
+the necessary amendments so as to comply with said act or legislative
+text. All other provisions shall remain effective. Similarly, invalidity
+of a provision of the Agreement, for any reason whatsoever, shall not
+cause the Agreement as a whole to be invalid.
+
+
+ 11.5 LANGUAGE
+
+The Agreement is drafted in both French and English and both versions
+are deemed authentic.
+
+
+ Article 12 - NEW VERSIONS OF THE AGREEMENT
+
+12.1 Any person is authorized to duplicate and distribute copies of this
+Agreement.
+
+12.2 So as to ensure coherence, the wording of this Agreement is
+protected and may only be modified by the authors of the License, who
+reserve the right to periodically publish updates or new versions of the
+Agreement, each with a separate number. These subsequent versions may
+address new issues encountered by Free Software.
+
+12.3 Any Software distributed under a given version of the Agreement may
+only be subsequently distributed under the same version of the Agreement
+or a subsequent version, subject to the provisions of Article 5.3.4.
+
+
+ Article 13 - GOVERNING LAW AND JURISDICTION
+
+13.1 The Agreement is governed by French law. The Parties agree to
+endeavor to seek an amicable solution to any disagreements or disputes
+that may arise during the performance of the Agreement.
+
+13.2 Failing an amicable solution within two (2) months as from their
+occurrence, and unless emergency proceedings are necessary, the
+disagreements or disputes shall be referred to the Paris Courts having
+jurisdiction, by the more diligent Party.
+
+
+Version 2.0 dated 2006-09-05.
diff --git a/Licence_CeCILL_V2-fr.txt b/Licence_CeCILL_V2-fr.txt
new file mode 100644
index 0000000..1613fca
--- /dev/null
+++ b/Licence_CeCILL_V2-fr.txt
@@ -0,0 +1,512 @@
+
+CONTRAT DE LICENCE DE LOGICIEL LIBRE CeCILL
+
+
+ Avertissement
+
+Ce contrat est une licence de logiciel libre issue d'une concertation
+entre ses auteurs afin que le respect de deux grands principes pr�side �
+sa r�daction:
+
+ * d'une part, le respect des principes de diffusion des logiciels
+ libres: acc�s au code source, droits �tendus conf�r�s aux
+ utilisateurs,
+ * d'autre part, la d�signation d'un droit applicable, le droit
+ fran�ais, auquel elle est conforme, tant au regard du droit de la
+ responsabilit� civile que du droit de la propri�t� intellectuelle
+ et de la protection qu'il offre aux auteurs et titulaires des
+ droits patrimoniaux sur un logiciel.
+
+Les auteurs de la licence CeCILL (pour Ce[a] C[nrs] I[nria] L[ogiciel]
+L[ibre]) sont:
+
+Commissariat � l'Energie Atomique - CEA, �tablissement public de
+recherche � caract�re scientifique, technique et industriel, dont le
+si�ge est situ� 25 rue Leblanc, immeuble Le Ponant D, 75015 Paris.
+
+Centre National de la Recherche Scientifique - CNRS, �tablissement
+public � caract�re scientifique et technologique, dont le si�ge est
+situ� 3 rue Michel-Ange, 75794 Paris cedex 16.
+
+Institut National de Recherche en Informatique et en Automatique -
+INRIA, �tablissement public � caract�re scientifique et technologique,
+dont le si�ge est situ� Domaine de Voluceau, Rocquencourt, BP 105, 78153
+Le Chesnay cedex.
+
+
+ Pr�ambule
+
+Ce contrat est une licence de logiciel libre dont l'objectif est de
+conf�rer aux utilisateurs la libert� de modification et de
+redistribution du logiciel r�gi par cette licence dans le cadre d'un
+mod�le de diffusion en logiciel libre.
+
+L'exercice de ces libert�s est assorti de certains devoirs � la charge
+des utilisateurs afin de pr�server ce statut au cours des
+redistributions ult�rieures.
+
+L'accessibilit� au code source et les droits de copie, de modification
+et de redistribution qui en d�coulent ont pour contrepartie de n'offrir
+aux utilisateurs qu'une garantie limit�e et de ne faire peser sur
+l'auteur du logiciel, le titulaire des droits patrimoniaux et les
+conc�dants successifs qu'une responsabilit� restreinte.
+
+A cet �gard l'attention de l'utilisateur est attir�e sur les risques
+associ�s au chargement, � l'utilisation, � la modification et/ou au
+d�veloppement et � la reproduction du logiciel par l'utilisateur �tant
+donn� sa sp�cificit� de logiciel libre, qui peut le rendre complexe �
+manipuler et qui le r�serve donc � des d�veloppeurs ou des
+professionnels avertis poss�dant des connaissances informatiques
+approfondies. Les utilisateurs sont donc invit�s � charger et tester
+l'ad�quation du logiciel � leurs besoins dans des conditions permettant
+d'assurer la s�curit� de leurs syst�mes et/ou de leurs donn�es et, plus
+g�n�ralement, � l'utiliser et l'exploiter dans les m�mes conditions de
+s�curit�. Ce contrat peut �tre reproduit et diffus� librement, sous
+r�serve de le conserver en l'�tat, sans ajout ni suppression de clauses.
+
+Ce contrat est susceptible de s'appliquer � tout logiciel dont le
+titulaire des droits patrimoniaux d�cide de soumettre l'exploitation aux
+dispositions qu'il contient.
+
+
+ Article 1 - DEFINITIONS
+
+Dans ce contrat, les termes suivants, lorsqu'ils seront �crits avec une
+lettre capitale, auront la signification suivante:
+
+Contrat: d�signe le pr�sent contrat de licence, ses �ventuelles versions
+post�rieures et annexes.
+
+Logiciel: d�signe le logiciel sous sa forme de Code Objet et/ou de Code
+Source et le cas �ch�ant sa documentation, dans leur �tat au moment de
+l'acceptation du Contrat par le Licenci�.
+
+Logiciel Initial: d�signe le Logiciel sous sa forme de Code Source et
+�ventuellement de Code Objet et le cas �ch�ant sa documentation, dans
+leur �tat au moment de leur premi�re diffusion sous les termes du Contrat.
+
+Logiciel Modifi�: d�signe le Logiciel modifi� par au moins une
+Contribution.
+
+Code Source: d�signe l'ensemble des instructions et des lignes de
+programme du Logiciel et auquel l'acc�s est n�cessaire en vue de
+modifier le Logiciel.
+
+Code Objet: d�signe les fichiers binaires issus de la compilation du
+Code Source.
+
+Titulaire: d�signe le ou les d�tenteurs des droits patrimoniaux d'auteur
+sur le Logiciel Initial.
+
+Licenci�: d�signe le ou les utilisateurs du Logiciel ayant accept� le
+Contrat.
+
+Contributeur: d�signe le Licenci� auteur d'au moins une Contribution.
+
+Conc�dant: d�signe le Titulaire ou toute personne physique ou morale
+distribuant le Logiciel sous le Contrat.
+
+Contribution: d�signe l'ensemble des modifications, corrections,
+traductions, adaptations et/ou nouvelles fonctionnalit�s int�gr�es dans
+le Logiciel par tout Contributeur, ainsi que tout Module Interne.
+
+Module: d�signe un ensemble de fichiers sources y compris leur
+documentation qui permet de r�aliser des fonctionnalit�s ou services
+suppl�mentaires � ceux fournis par le Logiciel.
+
+Module Externe: d�signe tout Module, non d�riv� du Logiciel, tel que ce
+Module et le Logiciel s'ex�cutent dans des espaces d'adressage
+diff�rents, l'un appelant l'autre au moment de leur ex�cution.
+
+Module Interne: d�signe tout Module li� au Logiciel de telle sorte
+qu'ils s'ex�cutent dans le m�me espace d'adressage.
+
+GNU GPL: d�signe la GNU General Public License dans sa version 2 ou
+toute version ult�rieure, telle que publi�e par Free Software Foundation
+Inc.
+
+Parties: d�signe collectivement le Licenci� et le Conc�dant.
+
+Ces termes s'entendent au singulier comme au pluriel.
+
+
+ Article 2 - OBJET
+
+Le Contrat a pour objet la concession par le Conc�dant au Licenci� d'une
+licence non exclusive, cessible et mondiale du Logiciel telle que
+d�finie ci-apr�s � l'article 5 pour toute la dur�e de protection des droits
+portant sur ce Logiciel.
+
+
+ Article 3 - ACCEPTATION
+
+3.1 L'acceptation par le Licenci� des termes du Contrat est r�put�e
+acquise du fait du premier des faits suivants:
+
+ * (i) le chargement du Logiciel par tout moyen notamment par
+ t�l�chargement � partir d'un serveur distant ou par chargement �
+ partir d'un support physique;
+ * (ii) le premier exercice par le Licenci� de l'un quelconque des
+ droits conc�d�s par le Contrat.
+
+3.2 Un exemplaire du Contrat, contenant notamment un avertissement
+relatif aux sp�cificit�s du Logiciel, � la restriction de garantie et �
+la limitation � un usage par des utilisateurs exp�riment�s a �t� mis �
+disposition du Licenci� pr�alablement � son acceptation telle que
+d�finie � l'article 3.1 ci dessus et le Licenci� reconna�t en avoir pris
+connaissance.
+
+
+ Article 4 - ENTREE EN VIGUEUR ET DUREE
+
+
+ 4.1 ENTREE EN VIGUEUR
+
+Le Contrat entre en vigueur � la date de son acceptation par le Licenci�
+telle que d�finie en 3.1.
+
+
+ 4.2 DUREE
+
+Le Contrat produira ses effets pendant toute la dur�e l�gale de
+protection des droits patrimoniaux portant sur le Logiciel.
+
+
+ Article 5 - ETENDUE DES DROITS CONCEDES
+
+Le Conc�dant conc�de au Licenci�, qui accepte, les droits suivants sur
+le Logiciel pour toutes destinations et pour la dur�e du Contrat dans
+les conditions ci-apr�s d�taill�es.
+
+Par ailleurs, si le Conc�dant d�tient ou venait � d�tenir un ou
+plusieurs brevets d'invention prot�geant tout ou partie des
+fonctionnalit�s du Logiciel ou de ses composants, il s'engage � ne pas
+opposer les �ventuels droits conf�r�s par ces brevets aux Licenci�s
+successifs qui utiliseraient, exploiteraient ou modifieraient le
+Logiciel. En cas de cession de ces brevets, le Conc�dant s'engage �
+faire reprendre les obligations du pr�sent alin�a aux cessionnaires.
+
+
+ 5.1 DROIT D'UTILISATION
+
+Le Licenci� est autoris� � utiliser le Logiciel, sans restriction quant
+aux domaines d'application, �tant ci-apr�s pr�cis� que cela comporte:
+
+ 1. la reproduction permanente ou provisoire du Logiciel en tout ou
+ partie par tout moyen et sous toute forme.
+
+ 2. le chargement, l'affichage, l'ex�cution, ou le stockage du
+ Logiciel sur tout support.
+
+ 3. la possibilit� d'en observer, d'en �tudier, ou d'en tester le
+ fonctionnement afin de d�terminer les id�es et principes qui sont
+ � la base de n'importe quel �l�ment de ce Logiciel; et ceci,
+ lorsque le Licenci� effectue toute op�ration de chargement,
+ d'affichage, d'ex�cution, de transmission ou de stockage du
+ Logiciel qu'il est en droit d'effectuer en vertu du Contrat.
+
+
+ 5.2 DROIT D'APPORTER DES CONTRIBUTIONS
+
+Le droit d'apporter des Contributions comporte le droit de traduire,
+d'adapter, d'arranger ou d'apporter toute autre modification au Logiciel
+et le droit de reproduire le logiciel en r�sultant.
+
+Le Licenci� est autoris� � apporter toute Contribution au Logiciel sous
+r�serve de mentionner, de fa�on explicite, son nom en tant qu'auteur de
+cette Contribution et la date de cr�ation de celle-ci.
+
+
+ 5.3 DROIT DE DISTRIBUTION
+
+Le droit de distribution comporte notamment le droit de diffuser, de
+transmettre et de communiquer le Logiciel au public sur tout support et
+par tout moyen ainsi que le droit de mettre sur le march� � titre
+on�reux ou gratuit, un ou des exemplaires du Logiciel par tout proc�d�.
+
+Le Licenci� est autoris� � distribuer des copies du Logiciel, modifi� ou
+non, � des tiers dans les conditions ci-apr�s d�taill�es.
+
+
+ 5.3.1 DISTRIBUTION DU LOGICIEL SANS MODIFICATION
+
+Le Licenci� est autoris� � distribuer des copies conformes du Logiciel,
+sous forme de Code Source ou de Code Objet, � condition que cette
+distribution respecte les dispositions du Contrat dans leur totalit� et
+soit accompagn�e:
+
+ 1. d'un exemplaire du Contrat,
+
+ 2. d'un avertissement relatif � la restriction de garantie et de
+ responsabilit� du Conc�dant telle que pr�vue aux articles 8
+ et 9,
+
+et que, dans le cas o� seul le Code Objet du Logiciel est redistribu�,
+le Licenci� permette aux futurs Licenci�s d'acc�der facilement au Code
+Source complet du Logiciel en indiquant les modalit�s d'acc�s, �tant
+entendu que le co�t additionnel d'acquisition du Code Source ne devra
+pas exc�der le simple co�t de transfert des donn�es.
+
+
+ 5.3.2 DISTRIBUTION DU LOGICIEL MODIFIE
+
+Lorsque le Licenci� apporte une Contribution au Logiciel, les conditions
+de distribution du Logiciel Modifi� en r�sultant sont alors soumises �
+l'int�gralit� des dispositions du Contrat.
+
+Le Licenci� est autoris� � distribuer le Logiciel Modifi�, sous forme de
+code source ou de code objet, � condition que cette distribution
+respecte les dispositions du Contrat dans leur totalit� et soit
+accompagn�e:
+
+ 1. d'un exemplaire du Contrat,
+
+ 2. d'un avertissement relatif � la restriction de garantie et de
+ responsabilit� du Conc�dant telle que pr�vue aux articles 8
+ et 9,
+
+et que, dans le cas o� seul le code objet du Logiciel Modifi� est
+redistribu�, le Licenci� permette aux futurs Licenci�s d'acc�der
+facilement au code source complet du Logiciel Modifi� en indiquant les
+modalit�s d'acc�s, �tant entendu que le co�t additionnel d'acquisition
+du code source ne devra pas exc�der le simple co�t de transfert des donn�es.
+
+
+ 5.3.3 DISTRIBUTION DES MODULES EXTERNES
+
+Lorsque le Licenci� a d�velopp� un Module Externe les conditions du
+Contrat ne s'appliquent pas � ce Module Externe, qui peut �tre distribu�
+sous un contrat de licence diff�rent.
+
+
+ 5.3.4 COMPATIBILITE AVEC LA LICENCE GNU GPL
+
+Le Licenci� peut inclure un code soumis aux dispositions d'une des
+versions de la licence GNU GPL dans le Logiciel modifi� ou non et
+distribuer l'ensemble sous les conditions de la m�me version de la
+licence GNU GPL.
+
+Le Licenci� peut inclure le Logiciel modifi� ou non dans un code soumis
+aux dispositions d'une des versions de la licence GNU GPL et distribuer
+l'ensemble sous les conditions de la m�me version de la licence GNU GPL.
+
+
+ Article 6 - PROPRIETE INTELLECTUELLE
+
+
+ 6.1 SUR LE LOGICIEL INITIAL
+
+Le Titulaire est d�tenteur des droits patrimoniaux sur le Logiciel
+Initial. Toute utilisation du Logiciel Initial est soumise au respect
+des conditions dans lesquelles le Titulaire a choisi de diffuser son
+oeuvre et nul autre n'a la facult� de modifier les conditions de
+diffusion de ce Logiciel Initial.
+
+Le Titulaire s'engage � ce que le Logiciel Initial reste au moins r�gi
+par le Contrat et ce, pour la dur�e vis�e � l'article 4.2.
+
+
+ 6.2 SUR LES CONTRIBUTIONS
+
+Le Licenci� qui a d�velopp� une Contribution est titulaire sur celle-ci
+des droits de propri�t� intellectuelle dans les conditions d�finies par
+la l�gislation applicable.
+
+
+ 6.3 SUR LES MODULES EXTERNES
+
+Le Licenci� qui a d�velopp� un Module Externe est titulaire sur celui-ci
+des droits de propri�t� intellectuelle dans les conditions d�finies par
+la l�gislation applicable et reste libre du choix du contrat r�gissant
+sa diffusion.
+
+
+ 6.4 DISPOSITIONS COMMUNES
+
+Le Licenci� s'engage express�ment:
+
+ 1. � ne pas supprimer ou modifier de quelque mani�re que ce soit les
+ mentions de propri�t� intellectuelle appos�es sur le Logiciel;
+
+ 2. � reproduire � l'identique lesdites mentions de propri�t�
+ intellectuelle sur les copies du Logiciel modifi� ou non.
+
+Le Licenci� s'engage � ne pas porter atteinte, directement ou
+indirectement, aux droits de propri�t� intellectuelle du Titulaire et/ou
+des Contributeurs sur le Logiciel et � prendre, le cas �ch�ant, �
+l'�gard de son personnel toutes les mesures n�cessaires pour assurer le
+respect des dits droits de propri�t� intellectuelle du Titulaire et/ou
+des Contributeurs.
+
+
+ Article 7 - SERVICES ASSOCIES
+
+7.1 Le Contrat n'oblige en aucun cas le Conc�dant � la r�alisation de
+prestations d'assistance technique ou de maintenance du Logiciel.
+
+Cependant le Conc�dant reste libre de proposer ce type de services. Les
+termes et conditions d'une telle assistance technique et/ou d'une telle
+maintenance seront alors d�termin�s dans un acte s�par�. Ces actes de
+maintenance et/ou assistance technique n'engageront que la seule
+responsabilit� du Conc�dant qui les propose.
+
+7.2 De m�me, tout Conc�dant est libre de proposer, sous sa seule
+responsabilit�, � ses licenci�s une garantie, qui n'engagera que lui,
+lors de la redistribution du Logiciel et/ou du Logiciel Modifi� et ce,
+dans les conditions qu'il souhaite. Cette garantie et les modalit�s
+financi�res de son application feront l'objet d'un acte s�par� entre le
+Conc�dant et le Licenci�.
+
+
+ Article 8 - RESPONSABILITE
+
+8.1 Sous r�serve des dispositions de l'article 8.2, le Licenci� a la
+facult�, sous r�serve de prouver la faute du Conc�dant concern�, de
+solliciter la r�paration du pr�judice direct qu'il subirait du fait du
+Logiciel et dont il apportera la preuve.
+
+8.2 La responsabilit� du Conc�dant est limit�e aux engagements pris en
+application du Contrat et ne saurait �tre engag�e en raison notamment:
+(i) des dommages dus � l'inex�cution, totale ou partielle, de ses
+obligations par le Licenci�, (ii) des dommages directs ou indirects
+d�coulant de l'utilisation ou des performances du Logiciel subis par le
+Licenci� et (iii) plus g�n�ralement d'un quelconque dommage indirect. En
+particulier, les Parties conviennent express�ment que tout pr�judice
+financier ou commercial (par exemple perte de donn�es, perte de
+b�n�fices, perte d'exploitation, perte de client�le ou de commandes,
+manque � gagner, trouble commercial quelconque) ou toute action dirig�e
+contre le Licenci� par un tiers, constitue un dommage indirect et
+n'ouvre pas droit � r�paration par le Conc�dant.
+
+
+ Article 9 - GARANTIE
+
+9.1 Le Licenci� reconna�t que l'�tat actuel des connaissances
+scientifiques et techniques au moment de la mise en circulation du
+Logiciel ne permet pas d'en tester et d'en v�rifier toutes les
+utilisations ni de d�tecter l'existence d'�ventuels d�fauts. L'attention
+du Licenci� a �t� attir�e sur ce point sur les risques associ�s au
+chargement, � l'utilisation, la modification et/ou au d�veloppement et �
+la reproduction du Logiciel qui sont r�serv�s � des utilisateurs avertis.
+
+Il rel�ve de la responsabilit� du Licenci� de contr�ler, par tous
+moyens, l'ad�quation du produit � ses besoins, son bon fonctionnement et
+de s'assurer qu'il ne causera pas de dommages aux personnes et aux biens.
+
+9.2 Le Conc�dant d�clare de bonne foi �tre en droit de conc�der
+l'ensemble des droits attach�s au Logiciel (comprenant notamment les
+droits vis�s � l'article 5).
+
+9.3 Le Licenci� reconna�t que le Logiciel est fourni "en l'�tat" par le
+Conc�dant sans autre garantie, expresse ou tacite, que celle pr�vue �
+l'article 9.2 et notamment sans aucune garantie sur sa valeur commerciale,
+son caract�re s�curis�, innovant ou pertinent.
+
+En particulier, le Conc�dant ne garantit pas que le Logiciel est exempt
+d'erreur, qu'il fonctionnera sans interruption, qu'il sera compatible
+avec l'�quipement du Licenci� et sa configuration logicielle ni qu'il
+remplira les besoins du Licenci�.
+
+9.4 Le Conc�dant ne garantit pas, de mani�re expresse ou tacite, que le
+Logiciel ne porte pas atteinte � un quelconque droit de propri�t�
+intellectuelle d'un tiers portant sur un brevet, un logiciel ou sur tout
+autre droit de propri�t�. Ainsi, le Conc�dant exclut toute garantie au
+profit du Licenci� contre les actions en contrefa�on qui pourraient �tre
+diligent�es au titre de l'utilisation, de la modification, et de la
+redistribution du Logiciel. N�anmoins, si de telles actions sont
+exerc�es contre le Licenci�, le Conc�dant lui apportera son aide
+technique et juridique pour sa d�fense. Cette aide technique et
+juridique est d�termin�e au cas par cas entre le Conc�dant concern� et
+le Licenci� dans le cadre d'un protocole d'accord. Le Conc�dant d�gage
+toute responsabilit� quant � l'utilisation de la d�nomination du
+Logiciel par le Licenci�. Aucune garantie n'est apport�e quant �
+l'existence de droits ant�rieurs sur le nom du Logiciel et sur
+l'existence d'une marque.
+
+
+ Article 10 - RESILIATION
+
+10.1 En cas de manquement par le Licenci� aux obligations mises � sa
+charge par le Contrat, le Conc�dant pourra r�silier de plein droit le
+Contrat trente (30) jours apr�s notification adress�e au Licenci� et
+rest�e sans effet.
+
+10.2 Le Licenci� dont le Contrat est r�sili� n'est plus autoris� �
+utiliser, modifier ou distribuer le Logiciel. Cependant, toutes les
+licences qu'il aura conc�d�es ant�rieurement � la r�siliation du Contrat
+resteront valides sous r�serve qu'elles aient �t� effectu�es en
+conformit� avec le Contrat.
+
+
+ Article 11 - DISPOSITIONS DIVERSES
+
+
+ 11.1 CAUSE EXTERIEURE
+
+Aucune des Parties ne sera responsable d'un retard ou d'une d�faillance
+d'ex�cution du Contrat qui serait d� � un cas de force majeure, un cas
+fortuit ou une cause ext�rieure, telle que, notamment, le mauvais
+fonctionnement ou les interruptions du r�seau �lectrique ou de
+t�l�communication, la paralysie du r�seau li�e � une attaque
+informatique, l'intervention des autorit�s gouvernementales, les
+catastrophes naturelles, les d�g�ts des eaux, les tremblements de terre,
+le feu, les explosions, les gr�ves et les conflits sociaux, l'�tat de
+guerre...
+
+11.2 Le fait, par l'une ou l'autre des Parties, d'omettre en une ou
+plusieurs occasions de se pr�valoir d'une ou plusieurs dispositions du
+Contrat, ne pourra en aucun cas impliquer renonciation par la Partie
+int�ress�e � s'en pr�valoir ult�rieurement.
+
+11.3 Le Contrat annule et remplace toute convention ant�rieure, �crite
+ou orale, entre les Parties sur le m�me objet et constitue l'accord
+entier entre les Parties sur cet objet. Aucune addition ou modification
+aux termes du Contrat n'aura d'effet � l'�gard des Parties � moins
+d'�tre faite par �crit et sign�e par leurs repr�sentants d�ment habilit�s.
+
+11.4 Dans l'hypoth�se o� une ou plusieurs des dispositions du Contrat
+s'av�rerait contraire � une loi ou � un texte applicable, existants ou
+futurs, cette loi ou ce texte pr�vaudrait, et les Parties feraient les
+amendements n�cessaires pour se conformer � cette loi ou � ce texte.
+Toutes les autres dispositions resteront en vigueur. De m�me, la
+nullit�, pour quelque raison que ce soit, d'une des dispositions du
+Contrat ne saurait entra�ner la nullit� de l'ensemble du Contrat.
+
+
+ 11.5 LANGUE
+
+Le Contrat est r�dig� en langue fran�aise et en langue anglaise, ces
+deux versions faisant �galement foi.
+
+
+ Article 12 - NOUVELLES VERSIONS DU CONTRAT
+
+12.1 Toute personne est autoris�e � copier et distribuer des copies de
+ce Contrat.
+
+12.2 Afin d'en pr�server la coh�rence, le texte du Contrat est prot�g�
+et ne peut �tre modifi� que par les auteurs de la licence, lesquels se
+r�servent le droit de publier p�riodiquement des mises � jour ou de
+nouvelles versions du Contrat, qui poss�deront chacune un num�ro
+distinct. Ces versions ult�rieures seront susceptibles de prendre en
+compte de nouvelles probl�matiques rencontr�es par les logiciels libres.
+
+12.3 Tout Logiciel diffus� sous une version donn�e du Contrat ne pourra
+faire l'objet d'une diffusion ult�rieure que sous la m�me version du
+Contrat ou une version post�rieure, sous r�serve des dispositions de
+l'article 5.3.4.
+
+
+ Article 13 - LOI APPLICABLE ET COMPETENCE TERRITORIALE
+
+13.1 Le Contrat est r�gi par la loi fran�aise. Les Parties conviennent
+de tenter de r�gler � l'amiable les diff�rends ou litiges qui
+viendraient � se produire par suite ou � l'occasion du Contrat.
+
+13.2 A d�faut d'accord amiable dans un d�lai de deux (2) mois � compter
+de leur survenance et sauf situation relevant d'une proc�dure d'urgence,
+les diff�rends ou litiges seront port�s par la Partie la plus diligente
+devant les Tribunaux comp�tents de Paris.
+
+
+Version 2.0 du 2006-09-05.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..9e83793
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,52 @@
+EXEC=sumaclust
+
+SUMACLUST_SRC= sumaclust.c \
+ mtcompare_sumaclust.c
+
+
+SUMACLUST_OBJ= $(patsubst %.c,%.o,$(SUMACLUST_SRC))
+
+
+SRCS= $(SUMACLUST_SRC)
+
+LIB= -lfasta -llcs -lfile -lutils -lm #-ll
+
+
+include ./global.mk
+
+all: $(EXEC)
+
+
+########
+#
+# sumaclust compilation
+#
+########
+
+# executable compilation and link
+
+ifeq ($(CC),gcc)
+ LFLAGS = -fopenmp
+else
+ LFLAGS =
+endif
+
+sumaclust: $(SUMACLUST_OBJ) $(LIBFASTA) $(LIBLCS) $(LIBFILE) $(LIBUTILS)
+ $(CC) $(LDFLAGS) -o $@ $(LFLAGS) $(SUMACLUST_OBJ) $(LIB) $(LIBFASTAPATH) $(LIBLCSPATH) $(LIBFILEPATH) $(LIBUTILSPATH)
+
+########
+#
+# project management
+#
+########
+
+clean:
+ rm -f *.o
+ rm -f *.P
+ rm -f $(EXEC)
+ $(MAKE) -C ./sumalibs/libfasta clean
+ $(MAKE) -C ./sumalibs/liblcs clean
+ $(MAKE) -C ./sumalibs/libfile clean
+ $(MAKE) -C ./sumalibs/libutils clean
+
+
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index ec4ab7e..0000000
--- a/debian/changelog
+++ /dev/null
@@ -1,12 +0,0 @@
-sumaclust (1.0.20-1) unstable; urgency=medium
-
- * New upstream version
- * cme fix dpkg-control
-
- -- Andreas Tille <tille at debian.org> Thu, 19 May 2016 13:02:05 +0200
-
-sumaclust (1.0.10-1) unstable; urgency=medium
-
- * Initial upload to Debian (Closes: #795176)
-
- -- Andreas Tille <tille at debian.org> Tue, 11 Aug 2015 14:59:19 +0200
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index ec63514..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-9
diff --git a/debian/control b/debian/control
deleted file mode 100644
index f255848..0000000
--- a/debian/control
+++ /dev/null
@@ -1,27 +0,0 @@
-Source: sumaclust
-Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Tim Booth <tbooth at ceh.ac.uk>,
- Andreas Tille <tille at debian.org>
-Section: science
-Priority: optional
-Build-Depends: debhelper (>= 9)
-Standards-Version: 3.9.8
-Vcs-Browser: https://anonscm.debian.org/viewvc/debian-med/trunk/packages/sumaclust/trunk/
-Vcs-Svn: svn://anonscm.debian.org/debian-med/trunk/packages/sumaclust/trunk/
-Homepage: http://metabarcoding.org/sumaclust
-
-Package: sumaclust
-Architecture: any
-Depends: ${shlibs:Depends},
- ${misc:Depends}
-Description: fast and exact clustering of genomic sequences
- With the development of next-generation sequencing, efficient tools are
- needed to handle millions of sequences in reasonable amounts of time.
- Sumaclust is a program developed by the LECA. Sumaclust aims to cluster
- sequences in a way that is fast and exact at the same time. This tool
- has been developed to be adapted to the type of data generated by DNA
- metabarcoding, i.e. entirely sequenced, short markers. Sumaclust
- clusters sequences using the same clustering algorithm as UCLUST and CD-
- HIT. This algorithm is mainly useful to detect the 'erroneous' sequences
- created during amplification and sequencing protocols, deriving from
- 'true' sequences.
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index 19beeea..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,519 +0,0 @@
-Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Upstream-Contact: Céline Mercier <celine.mercier at metabarcoding.org>
-Source: https://git.metabarcoding.org/obitools/sumaclust
-
-Files: *
-Copyright: 2013-2015 Tiayyba Riaz
- Celine Mercier
- Eric Coissac
-License: CeCILL-2
-
-Files: debian/*
-Copyright: 2015 Andreas Tille <tille at debian.org>
-License: CeCILL-2
-
-License: CeCILL-2
- CeCILL FREE SOFTWARE LICENSE AGREEMENT
- .
- Notice
- .
- This Agreement is a Free Software license agreement that is the result
- of discussions between its authors in order to ensure compliance with
- the two main principles guiding its drafting:
- .
- * firstly, compliance with the principles governing the distribution
- of Free Software: access to source code, broad rights granted to
- users,
- * secondly, the election of a governing law, French law, with which
- it is conformant, both as regards the law of torts and
- intellectual property law, and the protection that it offers to
- both authors and holders of the economic rights over software.
- .
- The authors of the CeCILL (for Ce[a] C[nrs] I[nria] L[ogiciel] L[ibre])
- license are:
- .
- Commissariat à l'Energie Atomique - CEA, a public scientific, technical
- and industrial research establishment, having its principal place of
- business at 25 rue Leblanc, immeuble Le Ponant D, 75015 Paris, France.
- .
- Centre National de la Recherche Scientifique - CNRS, a public scientific
- and technological establishment, having its principal place of business
- at 3 rue Michel-Ange, 75794 Paris cedex 16, France.
- .
- Institut National de Recherche en Informatique et en Automatique -
- INRIA, a public scientific and technological establishment, having its
- principal place of business at Domaine de Voluceau, Rocquencourt, BP
- 105, 78153 Le Chesnay cedex, France.
- .
- .
- Preamble
- .
- The purpose of this Free Software license agreement is to grant users
- the right to modify and redistribute the software governed by this
- license within the framework of an open source distribution model.
- .
- The exercising of these rights is conditional upon certain obligations
- for users so as to preserve this status for all subsequent redistributions.
- .
- In consideration of access to the source code and the rights to copy,
- modify and redistribute granted by the license, users are provided only
- with a limited warranty and the software's author, the holder of the
- economic rights, and the successive licensors only have limited liability.
- .
- In this respect, the risks associated with loading, using, modifying
- and/or developing or reproducing the software by the user are brought to
- the user's attention, given its Free Software status, which may make it
- complicated to use, with the result that its use is reserved for
- developers and experienced professionals having in-depth computer
- knowledge. Users are therefore encouraged to load and test the
- suitability of the software as regards their requirements in conditions
- enabling the security of their systems and/or data to be ensured and,
- more generally, to use and operate it in the same conditions of
- security. This Agreement may be freely reproduced and published,
- provided it is not altered, and that no provisions are either added or
- removed herefrom.
- .
- This Agreement may apply to any or all software for which the holder of
- the economic rights decides to submit the use thereof to its provisions.
- .
- .
- Article 1 - DEFINITIONS
- .
- For the purpose of this Agreement, when the following expressions
- commence with a capital letter, they shall have the following meaning:
- .
- Agreement: means this license agreement, and its possible subsequent
- versions and annexes.
- .
- Software: means the software in its Object Code and/or Source Code form
- and, where applicable, its documentation, "as is" when the Licensee
- accepts the Agreement.
- .
- Initial Software: means the Software in its Source Code and possibly its
- Object Code form and, where applicable, its documentation, "as is" when
- it is first distributed under the terms and conditions of the Agreement.
- .
- Modified Software: means the Software modified by at least one
- Contribution.
- .
- Source Code: means all the Software's instructions and program lines to
- which access is required so as to modify the Software.
- .
- Object Code: means the binary files originating from the compilation of
- the Source Code.
- .
- Holder: means the holder(s) of the economic rights over the Initial
- Software.
- .
- Licensee: means the Software user(s) having accepted the Agreement.
- .
- Contributor: means a Licensee having made at least one Contribution.
- .
- Licensor: means the Holder, or any other individual or legal entity, who
- distributes the Software under the Agreement.
- .
- Contribution: means any or all modifications, corrections, translations,
- adaptations and/or new functions integrated into the Software by any or
- all Contributors, as well as any or all Internal Modules.
- .
- Module: means a set of sources files including their documentation that
- enables supplementary functions or services in addition to those offered
- by the Software.
- .
- External Module: means any or all Modules, not derived from the
- Software, so that this Module and the Software run in separate address
- spaces, with one calling the other when they are run.
- .
- Internal Module: means any or all Module, connected to the Software so
- that they both execute in the same address space.
- .
- GNU GPL: means the GNU General Public License version 2 or any
- subsequent version, as published by the Free Software Foundation Inc.
- .
- Parties: mean both the Licensee and the Licensor.
- .
- These expressions may be used both in singular and plural form.
- .
- .
- Article 2 - PURPOSE
- .
- The purpose of the Agreement is the grant by the Licensor to the
- Licensee of a non-exclusive, transferable and worldwide license for the
- Software as set forth in Article 5 hereinafter for the whole term of the
- protection granted by the rights over said Software.
- .
- .
- Article 3 - ACCEPTANCE
- .
- 3.1 The Licensee shall be deemed as having accepted the terms and
- conditions of this Agreement upon the occurrence of the first of the
- following events:
- .
- * (i) loading the Software by any or all means, notably, by
- downloading from a remote server, or by loading from a physical
- medium;
- * (ii) the first time the Licensee exercises any of the rights
- granted hereunder.
- .
- 3.2 One copy of the Agreement, containing a notice relating to the
- characteristics of the Software, to the limited warranty, and to the
- fact that its use is restricted to experienced users has been provided
- to the Licensee prior to its acceptance as set forth in Article 3.1
- hereinabove, and the Licensee hereby acknowledges that it has read and
- understood it.
- .
- .
- Article 4 - EFFECTIVE DATE AND TERM
- .
- .
- 4.1 EFFECTIVE DATE
- .
- The Agreement shall become effective on the date when it is accepted by
- the Licensee as set forth in Article 3.1.
- .
- .
- 4.2 TERM
- .
- The Agreement shall remain in force for the entire legal term of
- protection of the economic rights over the Software.
- .
- .
- Article 5 - SCOPE OF RIGHTS GRANTED
- .
- The Licensor hereby grants to the Licensee, who accepts, the following
- rights over the Software for any or all use, and for the term of the
- Agreement, on the basis of the terms and conditions set forth hereinafter.
- .
- Besides, if the Licensor owns or comes to own one or more patents
- protecting all or part of the functions of the Software or of its
- components, the Licensor undertakes not to enforce the rights granted by
- these patents against successive Licensees using, exploiting or
- modifying the Software. If these patents are transferred, the Licensor
- undertakes to have the transferees subscribe to the obligations set
- forth in this paragraph.
- .
- .
- 5.1 RIGHT OF USE
- .
- The Licensee is authorized to use the Software, without any limitation
- as to its fields of application, with it being hereinafter specified
- that this comprises:
- .
- 1. permanent or temporary reproduction of all or part of the Software
- by any or all means and in any or all form.
- .
- 2. loading, displaying, running, or storing the Software on any or
- all medium.
- .
- 3. entitlement to observe, study or test its operation so as to
- determine the ideas and principles behind any or all constituent
- elements of said Software. This shall apply when the Licensee
- carries out any or all loading, displaying, running, transmission
- or storage operation as regards the Software, that it is entitled
- to carry out hereunder.
- .
- .
- 5.2 ENTITLEMENT TO MAKE CONTRIBUTIONS
- .
- The right to make Contributions includes the right to translate, adapt,
- arrange, or make any or all modifications to the Software, and the right
- to reproduce the resulting software.
- .
- The Licensee is authorized to make any or all Contributions to the
- Software provided that it includes an explicit notice that it is the
- author of said Contribution and indicates the date of the creation thereof.
- .
- .
- 5.3 RIGHT OF DISTRIBUTION
- .
- In particular, the right of distribution includes the right to publish,
- transmit and communicate the Software to the general public on any or
- all medium, and by any or all means, and the right to market, either in
- consideration of a fee, or free of charge, one or more copies of the
- Software by any means.
- .
- The Licensee is further authorized to distribute copies of the modified
- or unmodified Software to third parties according to the terms and
- conditions set forth hereinafter.
- .
- .
- 5.3.1 DISTRIBUTION OF SOFTWARE WITHOUT MODIFICATION
- .
- The Licensee is authorized to distribute true copies of the Software in
- Source Code or Object Code form, provided that said distribution
- complies with all the provisions of the Agreement and is accompanied by:
- .
- 1. a copy of the Agreement,
- .
- 2. a notice relating to the limitation of both the Licensor's
- warranty and liability as set forth in Articles 8 and 9,
- .
- and that, in the event that only the Object Code of the Software is
- redistributed, the Licensee allows future Licensees unhindered access to
- the full Source Code of the Software by indicating how to access it, it
- being understood that the additional cost of acquiring the Source Code
- shall not exceed the cost of transferring the data.
- .
- .
- 5.3.2 DISTRIBUTION OF MODIFIED SOFTWARE
- .
- When the Licensee makes a Contribution to the Software, the terms and
- conditions for the distribution of the resulting Modified Software
- become subject to all the provisions of this Agreement.
- .
- The Licensee is authorized to distribute the Modified Software, in
- source code or object code form, provided that said distribution
- complies with all the provisions of the Agreement and is accompanied by:
- .
- 1. a copy of the Agreement,
- .
- 2. a notice relating to the limitation of both the Licensor's
- warranty and liability as set forth in Articles 8 and 9,
- .
- and that, in the event that only the object code of the Modified
- Software is redistributed, the Licensee allows future Licensees
- unhindered access to the full source code of the Modified Software by
- indicating how to access it, it being understood that the additional
- cost of acquiring the source code shall not exceed the cost of
- transferring the data.
- .
- .
- 5.3.3 DISTRIBUTION OF EXTERNAL MODULES
- .
- When the Licensee has developed an External Module, the terms and
- conditions of this Agreement do not apply to said External Module, that
- may be distributed under a separate license agreement.
- .
- .
- 5.3.4 COMPATIBILITY WITH THE GNU GPL
- .
- The Licensee can include a code that is subject to the provisions of one
- of the versions of the GNU GPL in the Modified or unmodified Software,
- and distribute that entire code under the terms of the same version of
- the GNU GPL.
- .
- The Licensee can include the Modified or unmodified Software in a code
- that is subject to the provisions of one of the versions of the GNU GPL,
- and distribute that entire code under the terms of the same version of
- the GNU GPL.
- .
- .
- Article 6 - INTELLECTUAL PROPERTY
- .
- .
- 6.1 OVER THE INITIAL SOFTWARE
- .
- The Holder owns the economic rights over the Initial Software. Any or
- all use of the Initial Software is subject to compliance with the terms
- and conditions under which the Holder has elected to distribute its work
- and no one shall be entitled to modify the terms and conditions for the
- distribution of said Initial Software.
- .
- The Holder undertakes that the Initial Software will remain ruled at
- least by this Agreement, for the duration set forth in Article 4.2.
- .
- .
- 6.2 OVER THE CONTRIBUTIONS
- .
- The Licensee who develops a Contribution is the owner of the
- intellectual property rights over this Contribution as defined by
- applicable law.
- .
- .
- 6.3 OVER THE EXTERNAL MODULES
- .
- The Licensee who develops an External Module is the owner of the
- intellectual property rights over this External Module as defined by
- applicable law and is free to choose the type of agreement that shall
- govern its distribution.
- .
- .
- 6.4 JOINT PROVISIONS
- .
- The Licensee expressly undertakes:
- .
- 1. not to remove, or modify, in any manner, the intellectual property
- notices attached to the Software;
- .
- 2. to reproduce said notices, in an identical manner, in the copies
- of the Software modified or not.
- .
- The Licensee undertakes not to directly or indirectly infringe the
- intellectual property rights of the Holder and/or Contributors on the
- Software and to take, where applicable, vis-à-vis its staff, any and all
- measures required to ensure respect of said intellectual property rights
- of the Holder and/or Contributors.
- .
- .
- Article 7 - RELATED SERVICES
- .
- 7.1 Under no circumstances shall the Agreement oblige the Licensor to
- provide technical assistance or maintenance services for the Software.
- .
- However, the Licensor is entitled to offer this type of services. The
- terms and conditions of such technical assistance, and/or such
- maintenance, shall be set forth in a separate instrument. Only the
- Licensor offering said maintenance and/or technical assistance services
- shall incur liability therefor.
- .
- 7.2 Similarly, any Licensor is entitled to offer to its licensees, under
- its sole responsibility, a warranty, that shall only be binding upon
- itself, for the redistribution of the Software and/or the Modified
- Software, under terms and conditions that it is free to decide. Said
- warranty, and the financial terms and conditions of its application,
- shall be subject of a separate instrument executed between the Licensor
- and the Licensee.
- .
- .
- Article 8 - LIABILITY
- .
- 8.1 Subject to the provisions of Article 8.2, the Licensee shall be
- entitled to claim compensation for any direct loss it may have suffered
- from the Software as a result of a fault on the part of the relevant
- Licensor, subject to providing evidence thereof.
- .
- 8.2 The Licensor's liability is limited to the commitments made under
- this Agreement and shall not be incurred as a result of in particular:
- (i) loss due the Licensee's total or partial failure to fulfill its
- obligations, (ii) direct or consequential loss that is suffered by the
- Licensee due to the use or performance of the Software, and (iii) more
- generally, any consequential loss. In particular the Parties expressly
- agree that any or all pecuniary or business loss (i.e. loss of data,
- loss of profits, operating loss, loss of customers or orders,
- opportunity cost, any disturbance to business activities) or any or all
- legal proceedings instituted against the Licensee by a third party,
- shall constitute consequential loss and shall not provide entitlement to
- any or all compensation from the Licensor.
- .
- .
- Article 9 - WARRANTY
- .
- 9.1 The Licensee acknowledges that the scientific and technical
- state-of-the-art when the Software was distributed did not enable all
- possible uses to be tested and verified, nor for the presence of
- possible defects to be detected. In this respect, the Licensee's
- attention has been drawn to the risks associated with loading, using,
- modifying and/or developing and reproducing the Software which are
- reserved for experienced users.
- .
- The Licensee shall be responsible for verifying, by any or all means,
- the suitability of the product for its requirements, its good working
- order, and for ensuring that it shall not cause damage to either persons
- or properties.
- .
- 9.2 The Licensor hereby represents, in good faith, that it is entitled
- to grant all the rights over the Software (including in particular the
- rights set forth in Article 5).
- .
- 9.3 The Licensee acknowledges that the Software is supplied "as is" by
- the Licensor without any other express or tacit warranty, other than
- that provided for in Article 9.2 and, in particular, without any warranty
- as to its commercial value, its secured, safe, innovative or relevant
- nature.
- .
- Specifically, the Licensor does not warrant that the Software is free
- from any error, that it will operate without interruption, that it will
- be compatible with the Licensee's own equipment and software
- configuration, nor that it will meet the Licensee's requirements.
- .
- 9.4 The Licensor does not either expressly or tacitly warrant that the
- Software does not infringe any third party intellectual property right
- relating to a patent, software or any other property right. Therefore,
- the Licensor disclaims any and all liability towards the Licensee
- arising out of any or all proceedings for infringement that may be
- instituted in respect of the use, modification and redistribution of the
- Software. Nevertheless, should such proceedings be instituted against
- the Licensee, the Licensor shall provide it with technical and legal
- assistance for its defense. Such technical and legal assistance shall be
- decided on a case-by-case basis between the relevant Licensor and the
- Licensee pursuant to a memorandum of understanding. The Licensor
- disclaims any and all liability as regards the Licensee's use of the
- name of the Software. No warranty is given as regards the existence of
- prior rights over the name of the Software or as regards the existence
- of a trademark.
- .
- .
- Article 10 - TERMINATION
- .
- 10.1 In the event of a breach by the Licensee of its obligations
- hereunder, the Licensor may automatically terminate this Agreement
- thirty (30) days after notice has been sent to the Licensee and has
- remained ineffective.
- .
- 10.2 A Licensee whose Agreement is terminated shall no longer be
- authorized to use, modify or distribute the Software. However, any
- licenses that it may have granted prior to termination of the Agreement
- shall remain valid subject to their having been granted in compliance
- with the terms and conditions hereof.
- .
- .
- Article 11 - MISCELLANEOUS
- .
- .
- 11.1 EXCUSABLE EVENTS
- .
- Neither Party shall be liable for any or all delay, or failure to
- perform the Agreement, that may be attributable to an event of force
- majeure, an act of God or an outside cause, such as defective
- functioning or interruptions of the electricity or telecommunications
- networks, network paralysis following a virus attack, intervention by
- government authorities, natural disasters, water damage, earthquakes,
- fire, explosions, strikes and labor unrest, war, etc.
- .
- 11.2 Any failure by either Party, on one or more occasions, to invoke
- one or more of the provisions hereof, shall under no circumstances be
- interpreted as being a waiver by the interested Party of its right to
- invoke said provision(s) subsequently.
- .
- 11.3 The Agreement cancels and replaces any or all previous agreements,
- whether written or oral, between the Parties and having the same
- purpose, and constitutes the entirety of the agreement between said
- Parties concerning said purpose. No supplement or modification to the
- terms and conditions hereof shall be effective as between the Parties
- unless it is made in writing and signed by their duly authorized
- representatives.
- .
- 11.4 In the event that one or more of the provisions hereof were to
- conflict with a current or future applicable act or legislative text,
- said act or legislative text shall prevail, and the Parties shall make
- the necessary amendments so as to comply with said act or legislative
- text. All other provisions shall remain effective. Similarly, invalidity
- of a provision of the Agreement, for any reason whatsoever, shall not
- cause the Agreement as a whole to be invalid.
- .
- .
- 11.5 LANGUAGE
- .
- The Agreement is drafted in both French and English and both versions
- are deemed authentic.
- .
- .
- Article 12 - NEW VERSIONS OF THE AGREEMENT
- .
- 12.1 Any person is authorized to duplicate and distribute copies of this
- Agreement.
- .
- 12.2 So as to ensure coherence, the wording of this Agreement is
- protected and may only be modified by the authors of the License, who
- reserve the right to periodically publish updates or new versions of the
- Agreement, each with a separate number. These subsequent versions may
- address new issues encountered by Free Software.
- .
- 12.3 Any Software distributed under a given version of the Agreement may
- only be subsequently distributed under the same version of the Agreement
- or a subsequent version, subject to the provisions of Article 5.3.4.
- .
- .
- Article 13 - GOVERNING LAW AND JURISDICTION
- .
- 13.1 The Agreement is governed by French law. The Parties agree to
- endeavor to seek an amicable solution to any disagreements or disputes
- that may arise during the performance of the Agreement.
- .
- 13.2 Failing an amicable solution within two (2) months as from their
- occurrence, and unless emergency proceedings are necessary, the
- disagreements or disputes shall be referred to the Paris Courts having
- jurisdiction, by the more diligent Party.
- .
- .
- Version 2.0 dated 2006-09-05.
diff --git a/debian/doc-base b/debian/doc-base
deleted file mode 100644
index 5252767..0000000
--- a/debian/doc-base
+++ /dev/null
@@ -1,18 +0,0 @@
-Document: sumaclust
-Title: Sumaclust: fast and exact clustering of sequences
-Author: Céline Mercier <celine.mercier at metabarcoding.org>
-Abstract: fast and exact clustering of genomic sequences
- With the development of next-generation sequencing, efficient tools are
- needed to handle millions of sequences in reasonable amounts of time.
- Sumaclust is a program developed by the LECA. Sumaclust aims to cluster
- sequences in a way that is fast and exact at the same time. This tool
- has been developed to be adapted to the type of data generated by DNA
- metabarcoding, i.e. entirely sequenced, short markers. Sumaclust
- clusters sequences using the same clustering algorithm as UCLUST and CD-
- HIT. This algorithm is mainly useful to detect the 'erroneous' sequences
- created during amplification and sequencing protocols, deriving from
- 'true' sequences.
-Section: Science/Biology
-
-Format: pdf
-Files: /usr/share/doc/sumaclust/sumaclust_user_manual.pdf
diff --git a/debian/docs b/debian/docs
deleted file mode 100644
index a136337..0000000
--- a/debian/docs
+++ /dev/null
@@ -1 +0,0 @@
-*.pdf
diff --git a/debian/install b/debian/install
deleted file mode 100644
index 0aebd39..0000000
--- a/debian/install
+++ /dev/null
@@ -1 +0,0 @@
-sumaclust usr/bin
diff --git a/debian/manpages b/debian/manpages
deleted file mode 100644
index 0f65186..0000000
--- a/debian/manpages
+++ /dev/null
@@ -1 +0,0 @@
-debian/*.1
diff --git a/debian/patches/hardening.patch b/debian/patches/hardening.patch
deleted file mode 100644
index 0c70e87..0000000
--- a/debian/patches/hardening.patch
+++ /dev/null
@@ -1,15 +0,0 @@
-Author: Andreas Tille <tille at debian.org>
-Last-Update: Wed, 05 Aug 2015 17:54:58 +0200
-Description: Propagate hardening options
-
---- a/global.mk
-+++ b/global.mk
-@@ -10,7 +10,7 @@ LIBFILE = ./sumalibs/libfile/libfile.a
- LIBUTILS = ./sumalibs/libutils/libutils.a
-
- CC=gcc
--LDFLAGS=
-+LDFLAGS:=$(LDFLAGS)
-
-
- ifeq ($(CC),gcc)
diff --git a/debian/patches/series b/debian/patches/series
deleted file mode 100644
index 814900f..0000000
--- a/debian/patches/series
+++ /dev/null
@@ -1 +0,0 @@
-hardening.patch
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index a8d61d2..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/make -f
-# -*- makefile -*-
-
-# Uncomment this to turn on verbose mode.
-#export DH_VERBOSE=1
-
-# Build fails when trying to set this
-# export DEB_BUILD_MAINT_OPTIONS = hardening=+all
-
-%:
- dh $@
-
-override_dh_clean:
- dh_clean
- #Upstream is very messy. Scrub out all the .svn stuff
- #and .o files.
- find sumalibs -name '*.[oa]' -delete
-
-override_dh_compress:
- dh_compress --exclude=.pdf
-
-# May be it makes sense at some point in time to use *.md
-# source for the manpage - currently the result is not really
-# the usual manpage structure
-#override_dh_installman:
-# go-md2man -in=sumatra_user_manual.md -out=sumatra.1
\ No newline at end of file
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/debian/sumaclust.1 b/debian/sumaclust.1
deleted file mode 100644
index 85120d8..0000000
--- a/debian/sumaclust.1
+++ /dev/null
@@ -1,88 +0,0 @@
-.TH SUMACLUST "1" "August 2015" "sumaclust 1.0.10" "User Commands"
-.SH NAME
-sumaclust \- star clustering of genetic sequences
-.SH SYNOPSIS
-.B sumaclust
-\fI[options] <dataset>\fR
-.SH DESCRIPTION
-.P
-With the development of next-generation sequencing, efficient tools are
-needed to handle millions of sequences in reasonable amounts of time.
-Sumaclust is a program developed by the LECA. Sumaclust aims to cluster
-sequences in a way that is fast and exact at the same time. This tool
-has been developed to be adapted to the type of data generated by DNA
-metabarcoding, i.e. entirely sequenced, short markers. Sumaclust
-clusters sequences using the same clustering algorithm as UCLUST and CD-
-HIT. This algorithm is mainly useful to detect the 'erroneous' sequences
-created during amplification and sequencing protocols, deriving from 'true'
-sequences.
-.SH OPTIONS
-.TP
-\fB\-h\fR
-[H]elp \- print <this> help
-.TP
-\fB\-l\fR
-: Reference sequence length is the shortest.
-.TP
-\fB\-L\fR
-Reference sequence length is the largest.
-.TP
-\fB\-a\fR
-Reference sequence length is the alignment length (default).
-.TP
-\fB\-n\fR
-Score is normalized by reference sequence length (default).
-.TP
-\fB\-r\fR
-: Raw score, not normalized.
-.TP
-\fB\-d\fR
-: Score is expressed in distance (default : score is expressed in similarity).
-.HP
-\fB\-t\fR ##.## : Score threshold for clustering. If the score is normalized and expressed in similarity (default),
-.IP
-it is an identity, e.g. 0.95 for an identity of 95%. If the score is normalized
-and expressed in distance, it is (1.0 \- identity), e.g. 0.05 for an identity of 95%.
-If the score is not normalized and expressed in similarity, it is the length of the
-Longest Common Subsequence. If the score is not normalized and expressed in distance,
-it is (reference length \- LCS length).
-Only sequences with a similarity above ##.## with the center sequence of a cluster
-are assigned to that cluster. Default: 0.97.
-.TP
-\fB\-e\fR
-Exact option: A sequence is assigned to the cluster with the center sequence presenting the
-highest similarity score > threshold, as opposed to the default 'fast' option where a sequence is
-assigned to the first cluster found with a center sequence presenting a score > threshold.
-.TP
-\fB\-R\fR ##
-Maximum ratio between the counts of two sequences so that the less abundant one can be considered
-as a variant of the more abundant one. Default: 1.0.
-.TP
-\fB\-p\fR ##
-Multithreading with ## threads using openMP.
-.TP
-\fB\-s\fR ####
-Sorting by ####. Must be 'None' for no sorting, or a key in the fasta header of each sequence,
-except for the count that can be computed (default : sorting by count).
-.TP
-\fB\-o\fR
-Sorting is in ascending order (default : descending).
-.TP
-\fB\-g\fR
-n's are replaced with a's (default: sequences with n's are discarded).
-.TP
-\fB\-B\fR ###
-Output of the OTU table in BIOM format is activated, and written to file ###.
-.TP
-\fB\-O\fR ###
-Output of the OTU map (observation map) is activated, and written to file ###.
-.TP
-\fB\-F\fR ###
-Output in FASTA format is written to file ### instead of standard output.
-.TP
-\fB\-f\fR
-Output in FASTA format is deactivated.
-.PP
-Argument : the nucleotide dataset to cluster
-.SH SEE ALSO
-http://metabarcoding.org/sumatra
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index af6f494..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,9 +0,0 @@
-version=3
-
-https://git.metabarcoding.org/obitools/sumaclust/wikis/home .*/sumaclust/uploads/.*/sumaclust_v(\d[\d.-]+)\.(?:tar(?:\.gz|\.bz2)?|tgz)
-
-# sumaclust is maintained in a Gitlab repository
-# https://wiki.debian.org/debian/watch#Gitlab
-# recommends something like this but it does not work
-# opts=filenamemangle=s/.*\.tar\.gz\?ref=v?(\d\S*)/<project>-$1\.tar\.gz/g \
-# https://git.metabarcoding.org/obitools/sumaclust/tags .*archive\.tar\.gz\?ref=v?(\d\S*)
diff --git a/global.mk b/global.mk
new file mode 100644
index 0000000..9b7651f
--- /dev/null
+++ b/global.mk
@@ -0,0 +1,45 @@
+
+LIBFASTAPATH = -L./sumalibs/libfasta
+LIBLCSPATH = -L./sumalibs/liblcs
+LIBFILEPATH = -L./sumalibs/libfile
+LIBUTILSPATH = -L./sumalibs/libutils
+
+LIBFASTA = ./sumalibs/libfasta/libfasta.a
+LIBLCS = ./sumalibs/liblcs/liblcs.a
+LIBFILE = ./sumalibs/libfile/libfile.a
+LIBUTILS = ./sumalibs/libutils/libutils.a
+
+CC=gcc
+LDFLAGS=
+
+
+ifeq ($(CC),gcc)
+ CFLAGS = -O3 -s -DOMP_SUPPORT -fopenmp -w
+else
+ CFLAGS = -O3 -w
+endif
+
+
+default: all
+
+%.o: %.c
+ $(CC) $(CFLAGS) -c -o $@ $< $(LIB)
+
+
+########
+#
+# libraries compilation
+#
+########
+
+./sumalibs/libfasta/libfasta.a:
+ $(MAKE) -C ./sumalibs/libfasta
+
+./sumalibs/liblcs/liblcs.a:
+ $(MAKE) -C ./sumalibs/liblcs
+
+./sumalibs/libfile/libfile.a:
+ $(MAKE) -C ./sumalibs/libfile
+
+./sumalibs/libutils/libutils.a:
+ $(MAKE) -C ./sumalibs/libutils
\ No newline at end of file
diff --git a/mtcompare_sumaclust.c b/mtcompare_sumaclust.c
new file mode 100644
index 0000000..da9e5ab
--- /dev/null
+++ b/mtcompare_sumaclust.c
@@ -0,0 +1,334 @@
+/*
+ * mtcompare_cumaclust.c
+ *
+ * Author: Celine Mercier
+ *
+ */
+
+
+#ifdef OMP_SUPPORT
+#include <omp.h>
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <sys/time.h>
+
+#include "./sumalibs/libfasta/sequence.h"
+#include "./sumalibs/libutils/utilities.h"
+#include "./sumalibs/liblcs/upperband.h"
+#include "./sumalibs/liblcs/sse_banded_LCS_alignment.h"
+
+#include "sumaclust.h"
+
+
+
+static double computeScore(void* c, int32_t seed_number, int32_t i, int thread_number,int32_t maxcount)
+{
+ thread_control_t *control=(thread_control_t*)c;
+ fastaSeqPtr* db = control->db;
+ fastaSeqPtr db_i = db[i];
+ fastaSeqPtr db_seed_number = db[seed_number];
+ int LCSmin;
+ double score;
+
+ score = control->worstscore;
+
+ if (db_i->count <= maxcount)
+ filters(db_i, db_seed_number,
+ control->threshold,
+ control->normalize,
+ control->reference,
+ control->lcsmode,
+ &score,
+ &LCSmin);
+
+ if (score == -1.0)
+ score = alignForSumathings(db_seed_number->sequence, control->iseqs1[thread_number],
+ db_i->sequence, control->iseqs2[thread_number],
+ db_seed_number->length, db_i->length,
+ control->normalize, control->reference,
+ control->lcsmode, control->addresses[thread_number],
+ control->sizeForSeqs, LCSmin);
+
+ return score;
+}
+
+
+inline void putSeqInClusterMT(void *c, int32_t center_idx, int32_t seq, double score)
+{
+ // saves a sequence as belonging to a cluster and its score with the seed
+
+ thread_control_t *control=(thread_control_t*)c;
+ fastaSeqPtr* db = control->db;
+ fastaSeqPtr pseq = db[seq];
+
+
+ pseq->center = db+center_idx;
+ pseq->center_index = center_idx; // saves cluster
+ pseq->score = score; // saves score with the seed
+ pseq->cluster_center = FALSE;
+}
+
+
+int64_t timevaldiff(struct timeval *starttime, struct timeval *finishtime)
+{
+ int64_t msec;
+ msec=(finishtime->tv_sec-starttime->tv_sec)*1000000;
+ msec+=(finishtime->tv_usec-starttime->tv_usec)/1000000;
+ return msec;
+}
+
+void computeOneSeed(void* c)
+{
+ thread_control_t *control=(thread_control_t*)c;
+ BOOL found;
+ int32_t seed_number;
+ int32_t nextone=control->n;
+ int64_t elapsedtime;
+ struct timeval current;
+ struct timeval start;
+
+ seed_number = control->next;
+ found = FALSE;
+
+ //printf("\n seed = %d, n = %d", seed_number, control->n);
+
+ #ifdef OMP_SUPPORT
+ omp_set_num_threads(control->threads_number);
+ #endif
+
+ gettimeofday(&start,NULL);
+
+ #ifdef OMP_SUPPORT
+ #pragma omp parallel default(none) \
+ firstprivate(found) \
+ firstprivate(seed_number) \
+ firstprivate(control) \
+ shared(nextone)
+ #endif
+
+ {
+ int32_t i;
+ double score;
+ int32_t current_seed;
+ #ifdef OMP_SUPPORT
+ int thread_id=omp_get_thread_num();
+ #else
+ int thread_id=0;
+ #endif
+ int nseq = control->n;
+ BOOL fast = control->fast;
+ BOOL lcsmode = control->lcsmode;
+ int normalize = control->normalize;
+ double threshold = control->threshold;
+ BOOL first = TRUE;
+ BOOL not_already_in_a_cluster;
+ BOOL threshold_bad;
+ int32_t priv_nextone=control->n;
+ int32_t maxcount = (double)(control->db[seed_number]->count) * control->max_ratio;
+
+ fastaSeqPtr* db = control->db;
+
+ #ifdef OMP_SUPPORT
+ #pragma omp for schedule(dynamic,10)
+ #endif
+
+ for (i=seed_number+1; \
+ i < nseq; \
+ i++)
+ {
+
+ current_seed = db[i]->center_index;
+ not_already_in_a_cluster = current_seed == i; // At the beginning all the sequences are their own center
+
+ if ((! fast) || not_already_in_a_cluster)
+ {
+ score = computeScore((void*)control, seed_number, i, thread_id,maxcount); // computes LCS score or 0 if k-mer filter not passed
+
+ if (lcsmode || normalize)
+ threshold_bad = (score < threshold);
+ else
+ threshold_bad = (score > threshold);
+
+ if (threshold_bad) // similarity under threshold
+ {
+ if (!found && not_already_in_a_cluster && (i < priv_nextone))
+ {
+ priv_nextone=i; // saves potential next seed
+// *potential_nexts_list = i;
+
+ found = TRUE; // saves the fact that a next seed
+ // has been found for this thread
+ }
+ }
+ else if (not_already_in_a_cluster || \
+ ((! fast) && \
+ (db[i]->score < score)))
+ { // if seq matching with current seed :
+ // clustering with seed if seq doesn't belong to any cluster yet
+ // OR in exact mode and the score is better with this seed
+ if (! lcsmode && normalize)
+ score = 1.0 - score;
+ putSeqInClusterMT((void*)control, seed_number, i, score); // saves new seed for this seq
+ }
+ } // if ((! fast) || on_current_seed)
+
+ } // for (i=seed_number+1;...
+
+ #ifdef OMP_SUPPORT
+ #pragma omp flush(nextone)
+ #endif
+ if (priv_nextone < nextone)
+ #ifdef OMP_SUPPORT
+ #pragma omp critical
+ #endif
+ if (priv_nextone < nextone)
+ nextone=priv_nextone;
+
+ }
+
+ gettimeofday(¤t,NULL);
+ elapsedtime = timevaldiff(&start,¤t);
+ control->elapsedtime+=elapsedtime;
+
+ control->next=nextone;
+
+ if (control->next < (control->n)-1)
+ (control->seeds_counter)++;
+ else if (control->next == (control->n)-1)
+ {
+ control->stop = TRUE;
+ (control->seeds_counter)++;
+ }
+ else if (control->next == control->n)
+ control->stop = TRUE;
+}
+
+
+void initializeCentersAndScores(void *c)
+{
+ // Initializing the scores table for each seq :
+
+ thread_control_t *control = (thread_control_t*) c;
+ int32_t i;
+ fastaSeqPtr *db_i;
+ int scoremax;
+
+ if (control->normalize && control->lcsmode)
+ scoremax = 1.0;
+ else if (!control->lcsmode)
+ scoremax = 0.0;
+ else
+ scoremax = (*(control->db))->length;
+
+ for (i=0, db_i = control->db;
+ i <= control->n-1;
+ i++,db_i++)
+ {
+ (*db_i)->center = (control->db)+i;
+ (*db_i)->center_index = i;
+ (*db_i)->score = scoremax;
+ (*db_i)->cluster_center = TRUE;
+ }
+}
+
+
+void freeEverything(void *c)
+{
+ thread_control_t *control=(thread_control_t*)c;
+ int i;
+
+ // free(control->potential_nexts_list);
+ if ((control->reference == ALILEN) && (control->normalize || !control->lcsmode))
+ {
+ for (i=0; i < control->threads_number; i++)
+ free(control->addresses[i]);
+ free(control->addresses);
+ }
+ free(control->iseqs1);
+ free(control->iseqs2);
+}
+
+
+int mt_compare_sumaclust(fastaSeqPtr* db, int n, BOOL fast, double threshold, BOOL normalize,
+ int reference, BOOL lcsmode, int threads_number, double max_ratio)
+{
+ thread_control_t control;
+ int32_t i;
+ int lmax, lmin;
+
+ if (lcsmode || normalize)
+ fprintf(stderr,"Clustering sequences when similarity >= %lf\n", threshold);
+ else
+ fprintf(stderr,"Clustering sequences when distance <= %lf\n", threshold);
+
+ fprintf(stderr,"Aligning and clustering... \n");
+
+ #ifdef OMP_SUPPORT
+ control.threads_number = omp_get_max_threads();
+ #else
+ control.threads_number = 1;
+ #endif
+ if (threads_number < control.threads_number)
+ control.threads_number = threads_number;
+
+ calculateMaxAndMinLen(db, n, &lmax, &lmin);
+
+ control.addresses = (int16_t**) malloc(control.threads_number*sizeof(int16_t*));
+ control.iseqs1 = (int16_t**) malloc(control.threads_number*sizeof(int16_t*));
+ control.iseqs2 = (int16_t**) malloc(control.threads_number*sizeof(int16_t*));
+
+ for (i=0; i < control.threads_number; i++)
+ control.sizeForSeqs = prepareTablesForSumathings(lmax, lmin, threshold, normalize, reference, lcsmode, (control.addresses)+i, (control.iseqs1)+i, (control.iseqs2)+i);
+
+ control.db = db;
+ control.next = 0;
+ control.normalize = normalize;
+ control.reference = reference;
+ control.threshold = threshold;
+ control.max_ratio = max_ratio;
+ control.lcsmode = lcsmode;
+ control.stop = FALSE;
+ control.fast = fast;
+ control.seeds_counter = 1;
+// control.potential_nexts_list = (int*) calloc(control.threads_number, sizeof(int));
+ control.n = n;
+
+ if (lcsmode || normalize)
+ control.worstscore = 0.0;
+ else
+ control.worstscore = lmax;
+
+ control.elapsedtime=0;
+
+ fprintf(stderr, "%d threads running\n", control.threads_number);
+
+ // initialize scores table :
+ initializeCentersAndScores(&control);
+
+ while (control.stop == FALSE)
+ {
+ if ((control.next)%100 == 0)
+ {
+ float p = ((float)(control.next)/(float)n)*100;
+ fprintf(stderr,"\rDone : %f %% ",p);
+ }
+ computeOneSeed(&control);
+
+ }
+
+ for (i=0; i < control.threads_number; i++)
+ {
+ free((*((control.iseqs1)+i))-(control.sizeForSeqs)+lmax);
+ free((*((control.iseqs2)+i))-(control.sizeForSeqs)+lmax);
+ }
+
+ freeEverything(&control);
+ fprintf(stderr,"\rDone : 100 %% %d clusters created. \n\n", control.seeds_counter);
+ fprintf(stderr,"Pure computation time %f \n\n", (double)control.elapsedtime/1000000.);
+
+
+ return(control.seeds_counter);
+}
diff --git a/mtcompare_sumaclust.h b/mtcompare_sumaclust.h
new file mode 100644
index 0000000..b5476c3
--- /dev/null
+++ b/mtcompare_sumaclust.h
@@ -0,0 +1,15 @@
+/*
+ * mtcompare.h
+ *
+ * Created on: 12 mars 2013
+ * Author: celinemercier
+ */
+
+#ifndef MTCOMPARE_H_
+#define MTCOMPARE_H_
+
+int mt_compare_sumaclust(fastaSeqPtr* db, int n, BOOL fast, double threshold, BOOL normalize,
+ int reference, BOOL lcsmode, int threads_number, double max_ratio);
+
+
+#endif /* MTCOMPARE_H_ */
diff --git a/sumaclust.c b/sumaclust.c
new file mode 100644
index 0000000..e3e9e49
--- /dev/null
+++ b/sumaclust.c
@@ -0,0 +1,1083 @@
+/**
+ * FileName: sumaclust.c
+ * Author: Celine Mercier
+ * Description: star clustering of DNA sequences
+ * **/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <math.h>
+#include <sys/time.h>
+
+#include "./sumalibs/libutils/utilities.h"
+#include "./sumalibs/libfasta/sequence.h"
+#include "./sumalibs/libfasta/fasta_header_parser.h"
+#include "./sumalibs/libfasta/fasta_header_handler.h"
+#include "./sumalibs/libfasta/fasta_seq_writer.h"
+#include "./sumalibs/liblcs/upperband.h"
+#include "./sumalibs/liblcs/sse_banded_LCS_alignment.h"
+
+#include "mtcompare_sumaclust.h"
+#include "sumaclust.h"
+
+#define VERSION "1.0.20"
+
+
+/* ----------------------------------------------- */
+/* printout help */
+/* ----------------------------------------------- */
+
+#define PP fprintf(stdout,
+
+
+static void PrintHelp()
+{
+ PP "------------------------------------------------------------\n");
+ PP " SUMACLUST Version %s\n", VERSION);
+ PP "------------------------------------------------------------\n");
+ PP " Synopsis : star clustering of sequences.\n");
+ PP " Usage: sumaclust [options] <dataset>\n");
+ PP "------------------------------------------------------------\n");
+ PP " Options:\n");
+ PP " -h : [H]elp - print <this> help\n\n");
+ PP " -l : Reference sequence length is the shortest. \n\n");
+ PP " -L : Reference sequence length is the largest. \n\n");
+ PP " -a : Reference sequence length is the alignment length (default). \n\n");
+ PP " -n : Score is normalized by reference sequence length (default).\n\n");
+ PP " -r : Raw score, not normalized. \n\n");
+ PP " -d : Score is expressed in distance (default : score is expressed in similarity). \n\n");
+ PP " -t ##.## : Score threshold for clustering. If the score is normalized and expressed in similarity (default),\n");
+ PP " it is an identity, e.g. 0.95 for an identity of 95%%. If the score is normalized\n");
+ PP " and expressed in distance, it is (1.0 - identity), e.g. 0.05 for an identity of 95%%.\n");
+ PP " If the score is not normalized and expressed in similarity, it is the length of the\n");
+ PP " Longest Common Subsequence. If the score is not normalized and expressed in distance,\n");
+ PP " it is (reference length - LCS length).\n");
+ PP " Only sequences with a similarity above ##.## with the center sequence of a cluster\n");
+ PP " are assigned to that cluster. Default: 0.97.\n\n");
+ PP " -e : Exact option : A sequence is assigned to the cluster with the center sequence presenting the\n");
+ PP " highest similarity score > threshold, as opposed to the default 'fast' option where a sequence is\n");
+ PP " assigned to the first cluster found with a center sequence presenting a score > threshold.\n\n");
+ PP " -R ## : Maximum ratio between the counts of two sequences so that the less abundant one can be considered\n");
+ PP " as a variant of the more abundant one. Default: 1.0.\n\n");
+ PP " -p ## : Multithreading with ## threads using openMP.\n\n");
+ PP " -s #### : Sorting by ####. Must be 'None' for no sorting, or a key in the fasta header of each sequence,\n");
+ PP " except for the count that can be computed (default : sorting by count).\n\n");
+ PP " -o : Sorting is in ascending order (default : descending).\n\n");
+ PP " -g : n's are replaced with a's (default: sequences with n's are discarded).\n\n");
+ PP " -B ### : Output of the OTU table in BIOM format is activated, and written to file ###.\n\n");
+ PP " -O ### : Output of the OTU map (observation map) is activated, and written to file ###.\n\n");
+ PP " -F ### : Output in FASTA format is written to file ### instead of standard output.\n\n");
+ PP " -f : Output in FASTA format is deactivated.\n");
+ PP "\n");
+ PP "------------------------------------------------------------\n");
+ PP " Argument : the nucleotide dataset to cluster (or nothing \n");
+ PP " if the standard input should be used). \n");
+ PP "------------------------------------------------------------\n");
+ PP " http://metabarcoding.org/sumaclust\n");
+ PP "------------------------------------------------------------\n\n");
+}
+
+#undef PP
+
+/* ----------------------------------------------- */
+/* printout usage and exit */
+/* ----------------------------------------------- */
+
+#define PP fprintf(stderr,
+
+
+static void ExitUsage(stat)
+ int stat;
+{
+ PP "usage: sumaclust [-l|L|a|n|r|d|e|o|g|f] [-t threshold_value] [-s sorting_key] [-R maximum_ratio] [-p number_of_threads]\n");
+ PP "[-B file_name_for_BIOM-formatted_output] [-O file_name_for_OTU_table-formatted_output] [-F file_name_for_FASTA-formatted_output] dataset\n");
+ PP "type \"sumaclust -h\" for help\n");
+
+ if (stat)
+ exit(stat);
+}
+
+#undef PP
+
+
+static char* sortingKey="count";
+
+static int sortSeqsP(const void **s1, const void **s2)
+{
+ int res;
+ double r1;
+ double r2;
+
+ r1 = atof(getItemFromHeader(sortingKey, ((fastaSeqPtr) *s2)->header));
+ r2 = atof(getItemFromHeader(sortingKey, ((fastaSeqPtr) *s2)->header));
+ if (r2 > r1)
+ res = 1;
+ else if (r2 < r1)
+ res = -1;
+ else
+ res = 0;
+
+ return(res);
+}
+
+
+static int reverseSortSeqsP(const void **s1, const void **s2)
+{
+ int res;
+ double r1;
+ double r2;
+
+ r1 = atof(getItemFromHeader(sortingKey, ((fastaSeqPtr) *s2)->header));
+ r2 = atof(getItemFromHeader(sortingKey, ((fastaSeqPtr) *s2)->header));
+
+ if (r1 > r2)
+ res = 1;
+ else if (r1 < r2)
+ res = -1;
+ else
+ res = 0;
+
+ return(res);
+}
+
+
+int uniqSeqsDoubleSortFunction(const void *s1, const void *s2)
+{
+ int c;
+ char* str_r1;
+ double r1;
+ double r2;
+
+ c = strcmp(((fastaSeqPtr) s1)->sequence, ((fastaSeqPtr) s2)->sequence);
+ if (c == 0)
+ {
+ str_r1 = getItemFromHeader(sortingKey, ((fastaSeqPtr) s1)->header);
+ if (str_r1 == NULL)
+ {
+ fprintf(stderr, "\nERROR: '%s' not in sequence header(s).\n\n", sortingKey);
+ exit(1);
+ }
+ r1 = atof(str_r1);
+ r2 = atof(getItemFromHeader(sortingKey, ((fastaSeqPtr) s2)->header));
+
+ if (r2 > r1)
+ c = 1;
+ else if (r2 < r1)
+ c = -1;
+ else
+ c = 0;
+ }
+ return(c);
+}
+
+
+int uniqSeqsDoubleReverseSortFunction(const void *s1, const void *s2)
+{
+ int c;
+ char* str_r1;
+ double r1;
+ double r2;
+
+ c = strcmp(((fastaSeqPtr) s1)->sequence, ((fastaSeqPtr) s2)->sequence);
+ if (c == 0)
+ {
+ str_r1 = getItemFromHeader(sortingKey, ((fastaSeqPtr) s1)->header);
+ if (str_r1 == NULL)
+ {
+ fprintf(stderr, "\nERROR: '%s' not in sequence header(s).\n\n", sortingKey);
+ exit(1);
+ }
+ r1 = atof(str_r1);
+ r2 = atof(getItemFromHeader(sortingKey, ((fastaSeqPtr) s2)->header));
+
+ if (r1 > r2)
+ c = 1;
+ else if (r1 < r2)
+ c = -1;
+ else
+ c = 0;
+ }
+ return(c);
+}
+
+
+void printInBIOMformat(fastaSeqPtr* uniqSeqs, int count, int numberOfCenters, char* biomFile_name)
+{
+ int i, j, n;
+ FILE* biomFile;
+ struct tm* tm_info;
+ time_t timer;
+ char buffer_date[20];
+ fastaSeqPtr* c;
+ fastaSeqPtr* seq;
+ int id_len;
+ int row_number;
+ BOOL first_center = TRUE;
+
+ int buffer_col_rows;
+ int buffer_col_rows_1;
+ int buffer_col_rows_2;
+
+ buffer_col_rows = 29;
+ buffer_col_rows_1 = 9;
+ buffer_col_rows_2 = 20;
+
+ n = 0;
+
+ biomFile = fopen(biomFile_name, "w");
+ if (biomFile == NULL)
+ fprintf(stderr, "\nCan't open BIOM output file.\n"); //, %s outputFilename);
+
+ for (i=0; i<count; i++) // Loop to store columns
+ {
+ seq = uniqSeqs+i;
+ id_len = strlen((*seq)->accession_id);
+ j=0;
+
+ if ((*seq)->cluster_center) // center sequence
+ {
+ n++;
+ (*seq)->cluster_weight_unique_ids = 1;
+
+ if (first_center)
+ {
+ (*seq)->columns_BIOM_size = id_len + buffer_col_rows;
+ (*seq)->columns_BIOM = (char*) malloc(((*seq)->columns_BIOM_size)*sizeof(char));
+ strcpy((*seq)->columns_BIOM, "{\"id\": \"");
+ first_center = FALSE;
+ }
+ else
+ {
+ (*seq)->columns_BIOM_size = id_len + buffer_col_rows + 1;
+ (*seq)->columns_BIOM = (char*) malloc(((*seq)->columns_BIOM_size)*sizeof(char));
+ strcpy((*seq)->columns_BIOM, ",{\"id\": \"");
+ }
+
+ memcpy((*seq)->columns_BIOM + (*seq)->columns_BIOM_size - id_len - buffer_col_rows_2 - 1, (*seq)->accession_id, id_len);
+ memcpy((*seq)->columns_BIOM + (*seq)->columns_BIOM_size - buffer_col_rows_2 - 1, "\", \"metadata\": null}", buffer_col_rows_2+1);
+
+ if ((*seq)->next != NULL) // not last sequence
+ {
+ for (j=1; ((((*seq)+j)->next != NULL) && (((*seq)+j)->uniqHead == FALSE)); j++) // identical sequences
+ {
+ id_len = strlen((*(seq)+j)->accession_id);
+ n++;
+
+ (*seq)->cluster_weight_unique_ids++;
+ (*seq)->columns_BIOM_size = (*seq)->columns_BIOM_size + id_len + buffer_col_rows;
+ (*seq)->columns_BIOM = realloc((*seq)->columns_BIOM, ((*seq)->columns_BIOM_size) * sizeof(char));
+ memcpy((*seq)->columns_BIOM + (*seq)->columns_BIOM_size - buffer_col_rows - id_len - 1, ",{\"id\": \"", buffer_col_rows_1);
+ memcpy((*seq)->columns_BIOM + (*seq)->columns_BIOM_size - id_len - buffer_col_rows_2 - 1, (*(seq)+j)->accession_id, id_len);
+ memcpy((*seq)->columns_BIOM + (*seq)->columns_BIOM_size - buffer_col_rows_2 - 1, "\", \"metadata\": null}", buffer_col_rows_2+1);
+ }
+ if ((((*seq)+j)->next == NULL) && (((*seq)+j)->uniqHead == FALSE)) // last sequence
+ {
+ id_len = strlen((*(seq)+j)->accession_id);
+ n++;
+
+ (*seq)->cluster_weight_unique_ids++;
+ (*seq)->columns_BIOM_size = (*seq)->columns_BIOM_size + id_len + buffer_col_rows;
+ (*seq)->columns_BIOM = realloc((*seq)->columns_BIOM, ((*seq)->columns_BIOM_size) * sizeof(char));
+ memcpy((*seq)->columns_BIOM + (*seq)->columns_BIOM_size - buffer_col_rows - id_len - 1, ",{\"id\": \"", buffer_col_rows_1);
+ memcpy((*seq)->columns_BIOM + (*seq)->columns_BIOM_size - id_len - buffer_col_rows_2 - 1, (*(seq)+j)->accession_id, id_len);
+ memcpy((*seq)->columns_BIOM + (*seq)->columns_BIOM_size - buffer_col_rows_2 - 1, "\", \"metadata\": null}", buffer_col_rows_2+1);
+ }
+ }
+ }
+ else // not a center sequence
+ {
+ n++;
+
+ c = (*seq)->center;
+
+ id_len = strlen((*seq)->accession_id);
+ n++;
+
+ (*c)->cluster_weight_unique_ids++;
+ (*c)->columns_BIOM_size = (*c)->columns_BIOM_size + id_len + buffer_col_rows;
+ (*c)->columns_BIOM = realloc((*c)->columns_BIOM, ((*c)->columns_BIOM_size) * sizeof(char));
+ memcpy((*c)->columns_BIOM + (*c)->columns_BIOM_size - buffer_col_rows - id_len - 1, ",{\"id\": \"", buffer_col_rows_1);
+ memcpy((*c)->columns_BIOM + (*c)->columns_BIOM_size - id_len - buffer_col_rows_2 - 1, (*seq)->accession_id, id_len);
+ memcpy((*c)->columns_BIOM + (*c)->columns_BIOM_size - buffer_col_rows_2 - 1, "\", \"metadata\": null}", buffer_col_rows_2+1);
+
+ if ((*seq)->next != NULL) // not last sequence
+ {
+ for (j=1; ((((*seq)+j)->next != NULL) && (((*seq)+j)->uniqHead == FALSE)); j++) // identical sequences
+ {
+ id_len = strlen((*(seq)+j)->accession_id);
+ n++;
+
+ (*c)->cluster_weight_unique_ids++;
+ (*c)->columns_BIOM_size = (*c)->columns_BIOM_size + id_len + buffer_col_rows;
+ (*c)->columns_BIOM = realloc((*c)->columns_BIOM, ((*c)->columns_BIOM_size) * sizeof(char));
+ memcpy((*c)->columns_BIOM + (*c)->columns_BIOM_size - buffer_col_rows - id_len - 1, ",{\"id\": \"", buffer_col_rows_1);
+ memcpy((*c)->columns_BIOM + (*c)->columns_BIOM_size - id_len - buffer_col_rows_2 - 1, (*(seq)+j)->accession_id, id_len);
+ memcpy((*c)->columns_BIOM + (*c)->columns_BIOM_size - buffer_col_rows_2 - 1, "\", \"metadata\": null}", buffer_col_rows_2+1);
+ }
+
+ if ((((*seq)+j)->next == NULL) && (((*seq)+j)->uniqHead == FALSE)) // last sequence
+ {
+ id_len = strlen((*(seq)+j)->accession_id);
+ n++;
+
+ (*c)->cluster_weight_unique_ids++;
+ (*c)->columns_BIOM_size = (*c)->columns_BIOM_size + id_len + buffer_col_rows;
+ (*c)->columns_BIOM = realloc((*c)->columns_BIOM, ((*c)->columns_BIOM_size) * sizeof(char));
+ memcpy((*c)->columns_BIOM + (*c)->columns_BIOM_size - buffer_col_rows - id_len - 1, ",{\"id\": \"", buffer_col_rows_1);
+ memcpy((*c)->columns_BIOM + (*c)->columns_BIOM_size - id_len - buffer_col_rows_2 - 1, (*(seq)+j)->accession_id, id_len);
+ memcpy((*c)->columns_BIOM + (*c)->columns_BIOM_size - buffer_col_rows_2 - 1, "\", \"metadata\": null}", buffer_col_rows_2+1);
+ }
+ }
+ }
+ }
+
+ time(&timer);
+ tm_info = localtime(&timer);
+ strftime(buffer_date, 20, "%Y-%m-%dT%H:%M:%S", tm_info);
+
+ fprintf(biomFile, "{\"id\": \"None\",\"format\": \"Biological Observation Matrix 1.0.0\","
+ "\"format_url\": \"http://biom-format.org\",\"type\": \"OTU table\","
+ "\"generated_by\": \"SUMACLUST %s\",\"date\": \"%s\",\"matrix_type\": \"sparse\","
+ "\"matrix_element_type\": \"int\",\"shape\": [%d, %d],",
+ VERSION, buffer_date, numberOfCenters, n);
+
+ // print data
+
+ row_number = 0;
+ n = 0;
+
+ fprintf(biomFile, "\"data\": [");
+
+ for (i=0; i<count; i++)
+ {
+ seq = uniqSeqs+i;
+ if ((*seq)->cluster_center) // center sequence
+ {
+ for (j=0; j<(*seq)->cluster_weight_unique_ids; j++)
+ {
+ if ((row_number == (numberOfCenters - 1)) && (j == ((*seq)->cluster_weight_unique_ids - 1))) // last seq to print
+ fprintf(biomFile, "[%d,%d,1]],", row_number, n);
+ else
+ fprintf(biomFile, "[%d,%d,1],", row_number, n);
+ n++;
+ }
+ row_number++;
+ }
+ }
+ // end data
+
+ // Print rows
+
+ first_center = TRUE;
+
+ for (i=0; i<count; i++)
+ {
+ seq = uniqSeqs+i;
+ if ((*seq)->cluster_center) // center sequence
+ {
+ if (first_center)
+ {
+ fprintf(biomFile, "\"rows\": [{\"id\": \"%s\", \"metadata\": null}", (*seq)->accession_id);
+ first_center = FALSE;
+ }
+ else
+ fprintf(biomFile, ",{\"id\": \"%s\", \"metadata\": null}", (*seq)->accession_id);
+ }
+ }
+
+ // Print columns
+
+ fprintf(biomFile, "],\"columns\": [");
+ for (i=0; i<count; i++)
+ {
+ seq = uniqSeqs+i;
+ if ((*seq)->cluster_center) // center sequence
+ fprintf(biomFile, (*seq)->columns_BIOM);
+ }
+ fprintf(biomFile, "]}");
+
+ fclose(biomFile);
+}
+
+
+void printInOTUtableFormat(fastaSeqPtr* uniqSeqs, int count, char* OTUtableFile_name)
+{
+ int i, j;
+ FILE* OTUtableFile;
+ fastaSeqPtr* c;
+ fastaSeqPtr* seq;
+ int id_len;
+
+ OTUtableFile = fopen(OTUtableFile_name, "w");
+ if (OTUtableFile == NULL)
+ fprintf(stderr, "\nCan't open OTU table output file.\n"); //, %s outputFilename);
+
+ for (i=0; i<count; i++)
+ {
+ seq = uniqSeqs+i;
+ id_len = strlen((*seq)->accession_id);
+ j=0;
+
+
+ if ((*seq)->cluster_center) // center sequence
+ {
+ (*seq)->line_OTU_table_size = id_len*2 + 2;
+ (*seq)->line_OTU_table = (char*) malloc(((*seq)->line_OTU_table_size)*sizeof(char));
+ strcpy((*seq)->line_OTU_table, (*seq)->accession_id);
+ memcpy((*seq)->line_OTU_table + (*seq)->line_OTU_table_size - id_len - 2, "\t", 1);
+ memcpy((*seq)->line_OTU_table + (*seq)->line_OTU_table_size - id_len - 1, (*seq)->accession_id, id_len);
+ memcpy((*seq)->line_OTU_table + (*seq)->line_OTU_table_size - 1, "\0", 1);
+
+ if ((*seq)->next != NULL) // not last sequence
+ {
+ for (j=1; ((((*seq)+j)->next != NULL) && (((*seq)+j)->uniqHead == FALSE)); j++) // identical sequences
+ {
+ id_len = strlen((*(seq)+j)->accession_id);
+
+ (*seq)->line_OTU_table_size = (*seq)->line_OTU_table_size + id_len + 1;
+ (*seq)->line_OTU_table = realloc((*seq)->line_OTU_table, ((*seq)->line_OTU_table_size) * sizeof(char));
+ memcpy((*seq)->line_OTU_table + (*seq)->line_OTU_table_size - id_len - 2, "\t", 1);
+ memcpy((*seq)->line_OTU_table + (*seq)->line_OTU_table_size - id_len - 1, (*(seq)+j)->accession_id, id_len);
+ memcpy((*seq)->line_OTU_table + (*seq)->line_OTU_table_size - 1, "\0", 1);
+ }
+
+ if ((((*seq)+j)->next == NULL) && (((*seq)+j)->uniqHead == FALSE)) // last sequence
+ {
+ id_len = strlen((*(seq)+j)->accession_id);
+
+ (*seq)->line_OTU_table_size = (*seq)->line_OTU_table_size + id_len + 1;
+ (*seq)->line_OTU_table = realloc((*seq)->line_OTU_table, ((*seq)->line_OTU_table_size) * sizeof(char));
+ memcpy((*seq)->line_OTU_table + (*seq)->line_OTU_table_size - id_len - 2, "\t", 1);
+ memcpy((*seq)->line_OTU_table + (*seq)->line_OTU_table_size - id_len - 1, (*(seq)+j)->accession_id, id_len);
+ memcpy((*seq)->line_OTU_table + (*seq)->line_OTU_table_size - 1, "\0", 1);
+ }
+ }
+ }
+ else // not a center sequence
+ {
+ c = (*seq)->center;
+
+ (*c)->line_OTU_table_size = (*c)->line_OTU_table_size + id_len + 1;
+ (*c)->line_OTU_table = realloc((*c)->line_OTU_table, ((*c)->line_OTU_table_size) * sizeof(char));
+ memcpy((*c)->line_OTU_table + (*c)->line_OTU_table_size - id_len - 2, "\t", 1);
+ memcpy((*c)->line_OTU_table + (*c)->line_OTU_table_size - id_len - 1, (*seq)->accession_id, id_len);
+ memcpy((*c)->line_OTU_table + (*c)->line_OTU_table_size - 1, "\0", 1);
+
+ if ((*seq)->next != NULL) // not last sequence
+ {
+ for (j=1; ((((*seq)+j)->next != NULL) && (((*seq)+j)->uniqHead == FALSE)); j++) // identical sequences
+ {
+ id_len = strlen((*(seq)+j)->accession_id);
+
+ (*c)->line_OTU_table_size = (*c)->line_OTU_table_size + id_len + 1;
+ (*c)->line_OTU_table = realloc((*c)->line_OTU_table, ((*c)->line_OTU_table_size) * sizeof(char));
+ memcpy((*c)->line_OTU_table + (*c)->line_OTU_table_size - id_len - 2, "\t", 1);
+ memcpy((*c)->line_OTU_table + (*c)->line_OTU_table_size - id_len - 1, (*(seq)+j)->accession_id, id_len);
+ memcpy((*c)->line_OTU_table + (*c)->line_OTU_table_size - 1, "\0", 1);
+ }
+
+ if ((((*seq)+j)->next == NULL) && (((*seq)+j)->uniqHead == FALSE)) // last sequence
+ {
+ id_len = strlen((*(seq)+j)->accession_id);
+
+ (*c)->line_OTU_table_size = (*c)->line_OTU_table_size + id_len + 1;
+ (*c)->line_OTU_table = realloc((*c)->line_OTU_table, ((*c)->line_OTU_table_size) * sizeof(char));
+ memcpy((*c)->line_OTU_table + (*c)->line_OTU_table_size - id_len - 2, "\t", 1);
+ memcpy((*c)->line_OTU_table + (*c)->line_OTU_table_size - id_len - 1, (*(seq)+j)->accession_id, id_len);
+ memcpy((*c)->line_OTU_table + (*c)->line_OTU_table_size - 1, "\0", 1);
+ }
+ }
+ }
+ }
+
+ // Print rows
+
+ for (i=0; i<count; i++)
+ {
+ seq = uniqSeqs+i;
+ if ((*seq)->cluster_center) // center sequence
+ {
+ fprintf(OTUtableFile, (*seq)->line_OTU_table);
+ fprintf(OTUtableFile, "\n");
+ }
+ }
+
+ fclose(OTUtableFile);
+}
+
+
+void printSeq(fastaSeqPtr* seq, fastaSeqPtr* center, double score, FILE* output)
+{
+ int i;
+
+ char* score_n;
+ char* score_v;
+ char* cluster_n;
+ char* cluster_v;
+ char* center_n;
+ char* center_true;
+ char* center_false;
+ int id_size;
+
+ score_n = (char*) malloc(14*sizeof(char));
+ score_v = (char*) malloc(20*sizeof(char));
+
+ strcpy(score_n, "cluster_score");
+ sprintf(score_v,"%f", score);
+
+ id_size = strlen((*center)->accession_id);
+
+ cluster_n = (char*) malloc(8*sizeof(char));
+ cluster_v = (char*) malloc((id_size+1)*sizeof(char));
+
+ strcpy(cluster_n, "cluster");
+ strcpy(cluster_v, (*center)->accession_id);
+
+ center_n = (char*) malloc(15*sizeof(char));
+ strcpy(center_n, "cluster_center");
+ center_true = (char*) malloc(5*sizeof(char));
+ strcpy(center_true, "True");
+ center_false = (char*) malloc(6*sizeof(char));
+ strcpy(center_false, "False");
+
+ (*seq)->header = table_header_add_field((*seq)->header, cluster_n, cluster_v);
+ (*seq)->header = table_header_add_field((*seq)->header, score_n, score_v);
+ if ((*seq)->cluster_center)
+ (*seq)->header = table_header_add_field((*seq)->header, center_n, center_true);
+ else
+ (*seq)->header = table_header_add_field((*seq)->header, center_n, center_false);
+
+ printOnlyHeaderFromTable((*seq)->header, output);
+ printOnlySeqFromFastaSeqPtr((*seq), output);
+
+ if ((*seq)->next != NULL)
+ {
+ for (i=1; ((((*seq)+i)->next != NULL) && (((*seq)+i)->uniqHead == FALSE)); i++)
+ {
+ ((*seq)+i)->header = table_header_add_field(((*seq)+i)->header, cluster_n, cluster_v);
+ ((*seq)+i)->header = table_header_add_field(((*seq)+i)->header, score_n, score_v);
+ ((*seq)+i)->header = table_header_add_field(((*seq)+i)->header, center_n, center_false);
+
+ printOnlyHeaderFromTable(((*seq)+i)->header, output);
+ printOnlySeqFromFastaSeqPtr(((*seq)+i), output);
+ }
+
+ if ((((*seq)+i)->next == NULL) && (((*seq)+i)->uniqHead == FALSE)) // last sequence
+ {
+ ((*seq)+i)->header = table_header_add_field(((*seq)+i)->header, cluster_n, cluster_v);
+ ((*seq)+i)->header = table_header_add_field(((*seq)+i)->header, score_n, score_v);
+ ((*seq)+i)->header = table_header_add_field(((*seq)+i)->header, center_n, center_false);
+
+ printOnlyHeaderFromTable(((*seq)+i)->header, output);
+ printOnlySeqFromFastaSeqPtr(((*seq)+i), output);
+ }
+ }
+}
+
+
+void putSeqInCluster(fastaSeqPtr* seq, fastaSeqPtr* center, double score)
+{
+ (*seq)->center = center;
+ (*seq)->score = score;
+}
+
+
+int compare(fastaSeqPtr* db, int n, BOOL fastOption, double threshold, BOOL normalize, int reference, BOOL lcsmode,
+ double max_ratio)
+{
+ double score;
+ double scoremax;
+ double worstscore;
+ BOOL toCluster;
+ static BOOL first=TRUE;
+ int32_t i,j,k;
+ int center;
+ float p;
+ BOOL found;
+ int lmax, lmin;
+ int16_t* address;
+ int16_t* iseq1;
+ int16_t* iseq2;
+ int l1;
+ int l2;
+ char* s1;
+ char* s2;
+ int sizeForSeqs;
+ int LCSmin;
+
+ if (lcsmode || normalize)
+ fprintf(stderr,"Clustering sequences when similarity >= %lf\n", threshold);
+ else
+ fprintf(stderr,"Clustering sequences when distance <= %lf\n", threshold);
+
+ fprintf(stderr,"Aligning and clustering... \n");
+
+ int* centers = (int*) malloc(n * sizeof(int));
+
+ for (i=0; i < n; i++)
+ centers[i] = -1;
+
+ k=0;
+ found = FALSE;
+
+ calculateMaxAndMinLen(db, n, &lmax, &lmin);
+
+ sizeForSeqs = prepareTablesForSumathings(lmax, lmin, threshold, normalize, reference, lcsmode, &address, &iseq1, &iseq2);
+
+ if (lcsmode || normalize)
+ worstscore = 0.0;
+ else
+ worstscore = lmax;
+
+ for (i=0; i < n; i++)
+ {
+ if (i%100 == 0)
+ {
+ p = (i/(float)n)*100;
+ fprintf(stderr,"\rDone : %f %% %d clusters created",p,k);
+ }
+
+ if (first)
+ {
+ first = FALSE;
+ if (normalize && lcsmode)
+ score = 1.0;
+ else if (!lcsmode)
+ score = 0.0;
+ else
+ score = (*(db+i))->length;
+ (*(db+i))->cluster_center = TRUE;
+ putSeqInCluster(db+i, db+i, score);
+ centers[k] = i;
+ k++;
+ }
+
+ else
+ {
+ scoremax = worstscore;
+ center = 0;
+ found = FALSE;
+ toCluster = FALSE;
+ j=0;
+
+ s1 = (*(db+i))->sequence;
+ l1 = (*(db+i))->length;
+
+ while (((found == FALSE) && (centers[j] != -1) && (fastOption == TRUE)) || ((fastOption == FALSE) && (centers[j] != -1)))
+ {
+ score = worstscore;
+
+ if ((double) ((*(db+i))->count) / (double) ((*(db+centers[j]))->count) <= max_ratio)
+ {
+ filters((*(db+i)), (*(db+centers[j])), threshold, normalize, reference, lcsmode, &score, &LCSmin);
+ }
+
+ if (score == -1.0)
+ {
+ s2 = (*(db+centers[j]))->sequence;
+ l2 = (*(db+centers[j]))->length;
+
+ score = alignForSumathings(s1, iseq1, s2, iseq2, l1, l2, normalize, reference, lcsmode, address, sizeForSeqs, LCSmin);
+ }
+
+ if (((score >= threshold) && (lcsmode || normalize) && (score > scoremax)) || ((!lcsmode && !normalize) && (score <= threshold) && (score < scoremax)))
+ {
+ toCluster = TRUE;
+ scoremax = score;
+ center = centers[j];
+ if (fastOption == TRUE)
+ found = TRUE;
+ }
+ j++;
+ }
+
+ if (toCluster)
+ {
+ if (!lcsmode && normalize)
+ scoremax = 1.0 - scoremax;
+ (*(db+i))->cluster_center = FALSE;
+ putSeqInCluster(db+i, db+center, scoremax);
+ }
+ else
+ {
+ if (normalize && lcsmode)
+ score = 1.0;
+ else if (!lcsmode)
+ score = 0.0;
+ else
+ score = (*(db+i))->length;
+ (*(db+i))->cluster_center = TRUE;
+ putSeqInCluster(db+i, db+i, score);
+ centers[k] = i;
+ k++;
+ }
+ }
+ }
+ fprintf(stderr,"\rDone : 100 %% %d clusters created. \n",k);
+
+ free(centers);
+
+ free(iseq1-sizeForSeqs+lmax);
+ free(iseq2-sizeForSeqs+lmax);
+
+ if (normalize && reference == ALILEN)
+ free(address);
+
+ return(k);
+}
+
+
+void computeClusterWeights(fastaSeqPtr* uniqSeqs, int n)
+{
+ int i,j;
+ fastaSeqPtr* seq;
+ fastaSeqPtr* center;
+ char* cluster_weight_n;
+ char* cluster_weight_v;
+ int cluster_weight;
+
+ for (i=0; i<n; i++)
+ {
+ seq = uniqSeqs+i;
+
+ if ((*seq)->cluster_center)
+ (*seq)->cluster_weight = (*seq)->count;
+ else
+ {
+ center = (*seq)->center;
+ ((*center)->cluster_weight)+=(*seq)->count;
+ }
+ }
+
+ for (i=0; i<n; i++)
+ {
+ seq = uniqSeqs+i;
+
+ if ((*seq)->cluster_center)
+ cluster_weight = (*seq)->cluster_weight;
+ else
+ {
+ center = (*seq)->center;
+ cluster_weight = (*center)->cluster_weight;
+ }
+ cluster_weight_n = (char*) malloc(15*sizeof(char));
+ cluster_weight_v = (char*) malloc(20*sizeof(char));
+ strcpy(cluster_weight_n, "cluster_weight");
+ sprintf(cluster_weight_v,"%d", cluster_weight);
+ (*seq)->header = table_header_add_field((*seq)->header, cluster_weight_n, cluster_weight_v);
+
+ if ((*seq)->next != NULL) // not the last sequence
+ {
+ for (j=1; ((((*seq)+j)->next != NULL) && (((*seq)+j)->uniqHead == FALSE)); j++)
+ (*(seq)+j)->header = table_header_add_field((*(seq)+j)->header, cluster_weight_n, cluster_weight_v);
+
+ if ((((*seq)+j)->next == NULL) && (((*seq)+j)->uniqHead == FALSE)) // last sequence
+ (*(seq)+j)->header = table_header_add_field((*(seq)+j)->header, cluster_weight_n, cluster_weight_v);
+ }
+ }
+}
+
+
+int main(int argc, char** argv)
+{
+
+ int32_t carg = 0;
+ int32_t errflag = 0;
+ char* sort;
+ double threshold = 0.97;
+ double max_ratio = 1.0;
+ BOOL lcsmode = TRUE;
+ BOOL fastOption = TRUE;
+ BOOL normalize = TRUE;
+ BOOL reverse = FALSE;
+ BOOL onlyATGC = TRUE;
+ int reference = ALILEN;
+ int nproc = 1;
+ BOOL printBIOM = FALSE;
+ BOOL printOTUtable = FALSE;
+ BOOL printFASTA = TRUE;
+ BOOL printFASTAtofile = FALSE;
+ FILE* FASTA_output = stdout;
+ fastaSeqCount db;
+ int i,n;
+ fastaSeqPtr* uniqSeqs;
+ char* biomFile_name;
+ char* OTUtableFile_name;
+ char* FASTA_file_name;
+ int numberOfCenters;
+
+
+ sort = malloc(1024*sizeof(char));
+ strcpy(sort, "count");
+
+ biomFile_name = malloc(1024*sizeof(char));
+ OTUtableFile_name = malloc(1024*sizeof(char));
+ FASTA_file_name = malloc(1024*sizeof(char));
+
+
+ while ((carg = getopt(argc, argv, "hlLanrdet:p:s:ogB:O:R:fF:")) != -1) {
+ switch (carg) {
+ /* -------------------- */
+ case 'h': /* help */
+ /* -------------------- */
+ PrintHelp();
+ exit(0);
+ break;
+
+ /* -------------------------------------------------- */
+ case 'l': /* Normalize LCS/Error by the shortest sequence length*/
+ /* -------------------------------------------------- */
+ reference=MINLEN;
+ break;
+
+ /* -------------------------------------------------- */
+ case 'L': /* Normalize LCS/Error by the largest sequence length */
+ /* -------------------------------------------------- */
+ reference=MAXLEN;
+ break;
+
+ /* -------------------------------------------------- */
+ case 'a': /* Normalize LCS/Error by the alignment length */
+ /* -------------------------------------------------- */
+ reference=ALILEN;
+ break;
+
+ /* -------------------------------------------------- */
+ case 'n': /* Normalize LCS by the reference length */
+ /* -------------------------------------------------- */
+ normalize=TRUE;
+ break;
+
+ /* -------------------------------------------------- */
+ case 'r': /* No normalization */
+ /* -------------------------------------------------- */
+ normalize=FALSE;
+ break;
+
+ /* -------------------------------------------------- */
+ case 'd': /* Score is expressed in distance */
+ /* -------------------------------------------------- */
+ lcsmode=FALSE;
+ break;
+
+ /* ---------------------------------------------------------------------------------------------------------- */
+ case 'e': /* center with the best score > threshold is chosen, otherwise first center with a score > threshold */
+ /* ---------------------------------------------------------------------------------------------------------- */
+ fastOption=FALSE;
+ break;
+
+ /* ------------------------------------------------------------------- */
+ case 't': /* Clusters only pairs with similarity higher than (threshold) */
+ /* ------------------------------------------------------------------- */
+ sscanf(optarg,"%lf",&threshold);
+ break;
+
+
+ /* ------------------------------------------------------------------- */
+ case 'R': /* maximum ratio between counts of two sequences connected by an edge */
+ /* ------------------------------------------------------------------- */
+ sscanf(optarg,"%lf",&max_ratio);
+ break;
+
+ /* -------------------------------------------------- */
+ case 'p': /* number of processors to use */
+ /* -------------------------------------------------- */
+ sscanf(optarg,"%d",&nproc);
+ break;
+
+ /* -------------------------------------------------- */
+ case 's': /* Sorting option */
+ /* -------------------------------------------------- */
+ sscanf(optarg, "%s", sort);
+ sortingKey = sort;
+ break;
+
+ /* -------------------------------------------------- */
+ case 'o': /* reverse sorting */
+ /* -------------------------------------------------- */
+ reverse=TRUE;
+ break;
+
+ /* -------------------------------------------------- */
+ case 'g': /* replace n's with a's in sequences */
+ /* -------------------------------------------------- */
+ onlyATGC=FALSE;
+ break;
+
+ /* -------------------------------------------------- */
+ case 'B': /* file name to print results in BIOM format */
+ /* -------------------------------------------------- */
+ sscanf(optarg, "%s", biomFile_name);
+ printBIOM=TRUE;
+ break;
+
+ /* -------------------------------------------------- */
+ case 'O': /* file name to print results in OTU table format */
+ /* -------------------------------------------------- */
+ sscanf(optarg, "%s", OTUtableFile_name);
+ printOTUtable=TRUE;
+ break;
+
+ /* -------------------------------------------------- */
+ case 'f': /* don't print results in FASTA format */
+ /* -------------------------------------------------- */
+ printFASTA=FALSE;
+ break;
+
+ /* ---------------------------------------------- */
+ case 'F': /* file name to print results in FASTA format */
+ /* ---------------------------------------------- */
+ sscanf(optarg, "%s", FASTA_file_name);
+ printFASTAtofile=TRUE;
+ break;
+
+
+ case '?': /* invalid option */
+ errflag++;
+ break;
+ }
+ }
+
+ if (errflag)
+ ExitUsage(errflag);
+
+ fprintf(stderr,"===========================================================\n");
+ fprintf(stderr," SUMACLUST version %s\n",VERSION);
+#ifdef __SSE2__
+ fprintf(stderr," Alignment using SSE2 instructions.\n");
+#else
+ fprintf(stderr," Alignment using standard code, SSE2 unavailable.\n");
+#endif
+ fprintf(stderr,"===========================================================\n");
+
+ if ((threshold == 0.0) || (normalize && (threshold > 1.0)))
+ {
+ fprintf(stderr, "\nERROR: Please specify a threshold > 0, and < 1 when scores are normalized.\n\n");
+ exit(1);
+ }
+
+ fprintf(stderr,"Reading dataset...");
+ db = seq_readAllSeq2(argv[optind], TRUE, onlyATGC);
+
+ fprintf(stderr,"\n%d sequences\n",db.count);
+
+ if (db.count == 0)
+ {
+ fprintf(stderr, "\nNo valid sequences. Exiting program.\n\n");
+ exit(1);
+ }
+
+ if (!onlyATGC)
+ (void)cleanDB(db);
+
+ if (!lcsmode && normalize)
+ threshold = 1.0 - threshold;
+
+ if (threshold > 0)
+ (void)hashDB(db);
+
+ addCounts(&db);
+
+ // first sorting of sequences to have good unique heads
+
+ if ((strcmp(sortingKey, "None") != 0) && (strcmp(sortingKey, "none") != 0))
+ {
+ if (reverse == FALSE)
+ qsort((void*) db.fastaSeqs, db.count, sizeof(fastaSeq), uniqSeqsDoubleSortFunction);
+ else
+ qsort((void*) db.fastaSeqs, db.count, sizeof(fastaSeq), uniqSeqsDoubleReverseSortFunction);
+ }
+
+ // getting the vector of unique seqs
+ uniqSeqs = (fastaSeqPtr*) malloc((db.count)*sizeof(fastaSeqPtr));
+ n = uniqSeqsVector(&db, &uniqSeqs);
+ uniqSeqs = realloc(uniqSeqs, n*sizeof(fastaSeqPtr));
+
+ // putting a flag on the last sequence
+ for (i=0; i<(db.count-1); i++)
+ ((db.fastaSeqs)+i)->next = (db.fastaSeqs)+i-1;
+ ((db.fastaSeqs)+(db.count)-1)->next = NULL;
+
+ // sorting unique sequences
+ if (strcmp(sortingKey, "count") == 0)
+ {
+ fprintf(stderr,"Sorting sequences by count...\n", n);
+ if (reverse == FALSE)
+ qsort((void*) uniqSeqs, n, sizeof(fastaSeqPtr), sortSeqsWithCounts);
+ else
+ qsort((void*) uniqSeqs, n, sizeof(fastaSeqPtr), reverseSortSeqsWithCounts);
+ }
+ else if ((strcmp(sortingKey, "None") != 0) && (strcmp(sortingKey, "none") != 0))
+ {
+ fprintf(stderr,"Sorting sequences by %s...\n", sortingKey);
+ if (reverse == FALSE)
+ qsort((void*) uniqSeqs, n, sizeof(fastaSeqPtr), sortSeqsP);
+ else
+ qsort((void*) uniqSeqs, n, sizeof(fastaSeqPtr), reverseSortSeqsP);
+ }
+
+ if (max_ratio > 0)
+ fprintf(stderr,"Maximum ratio between the counts of two sequences to connect them: %lf\n", max_ratio);
+
+ // Computing
+ if (nproc==1)
+ numberOfCenters = compare(uniqSeqs, n, fastOption, threshold, normalize, reference, lcsmode, max_ratio);
+
+ else
+ numberOfCenters = mt_compare_sumaclust(uniqSeqs, n, fastOption, threshold, normalize, reference, lcsmode, nproc, max_ratio);
+
+ // Computing cluster weights
+ computeClusterWeights(uniqSeqs, n);
+
+ // Printing results
+
+ // FASTA file
+ if (printFASTA)
+ {
+
+ if (printFASTAtofile)
+ {
+ FASTA_output = fopen(FASTA_file_name, "w");
+ if (FASTA_output == NULL)
+ fprintf(stderr, "\nCan't open FASTA output file.\n"); //, %s outputFilename);
+ }
+
+ for (i=0; i<n; i++)
+ {
+ printSeq(uniqSeqs+i, (*(uniqSeqs+i))->center, (*(uniqSeqs+i))->score, FASTA_output);
+ }
+ fprintf(stderr,"Done.\n");
+ }
+
+ // BIOM file
+ if (printBIOM)
+ {
+ fprintf(stderr,"Printing results in BIOM format...\n");
+ printInBIOMformat(uniqSeqs, n, numberOfCenters, biomFile_name);
+ fprintf(stderr,"Done.\n");
+ }
+
+ // OTU table file
+ if (printOTUtable)
+ {
+ fprintf(stderr,"Printing results in OTU table format...\n");
+ printInOTUtableFormat(uniqSeqs, n, OTUtableFile_name);
+ fprintf(stderr,"Done.\n");
+ }
+
+ // Freeing
+ for (i=0; i < db.count; i++)
+ {
+ free(((db.fastaSeqs)[i]).table);
+ free_header_table(((db.fastaSeqs)[i]).header);
+ }
+ free(db.fastaSeqs);
+ free(sort);
+ free(uniqSeqs);
+
+ return(0);
+
+}
diff --git a/sumaclust.h b/sumaclust.h
new file mode 100644
index 0000000..873b641
--- /dev/null
+++ b/sumaclust.h
@@ -0,0 +1,34 @@
+/*
+ * sumaclust.h
+ *
+ * Created on: april 2, 2012
+ * Author: mercier
+ */
+
+
+#ifndef SUMACLUST_H_
+#define SUMACLUST_H_
+
+typedef struct {
+ int32_t next;
+ int32_t threads_number;
+ int* potential_nexts_list;
+ fastaSeqPtr* db;
+ int n;
+ int normalize;
+ int reference;
+ BOOL lcsmode;
+ BOOL fast;
+ double threshold;
+ BOOL stop;
+ int sizeForSeqs;
+ int16_t** addresses;
+ int16_t** iseqs1;
+ int16_t** iseqs2;
+ int seeds_counter;
+ double worstscore;
+ double max_ratio;
+ int64_t elapsedtime;
+} thread_control_t;
+
+#endif /* SUMACLUST_H_ */
diff --git a/sumaclust_user_manual.pdf b/sumaclust_user_manual.pdf
new file mode 100644
index 0000000..1ab0b51
Binary files /dev/null and b/sumaclust_user_manual.pdf differ
diff --git a/sumalibs/._global.mk b/sumalibs/._global.mk
new file mode 100644
index 0000000..6adb72e
Binary files /dev/null and b/sumalibs/._global.mk differ
diff --git a/sumalibs/Licence_CeCILL_V2-en.txt b/sumalibs/Licence_CeCILL_V2-en.txt
new file mode 100644
index 0000000..fcc8df2
--- /dev/null
+++ b/sumalibs/Licence_CeCILL_V2-en.txt
@@ -0,0 +1,506 @@
+
+CeCILL FREE SOFTWARE LICENSE AGREEMENT
+
+
+ Notice
+
+This Agreement is a Free Software license agreement that is the result
+of discussions between its authors in order to ensure compliance with
+the two main principles guiding its drafting:
+
+ * firstly, compliance with the principles governing the distribution
+ of Free Software: access to source code, broad rights granted to
+ users,
+ * secondly, the election of a governing law, French law, with which
+ it is conformant, both as regards the law of torts and
+ intellectual property law, and the protection that it offers to
+ both authors and holders of the economic rights over software.
+
+The authors of the CeCILL (for Ce[a] C[nrs] I[nria] L[ogiciel] L[ibre])
+license are:
+
+Commissariat � l'Energie Atomique - CEA, a public scientific, technical
+and industrial research establishment, having its principal place of
+business at 25 rue Leblanc, immeuble Le Ponant D, 75015 Paris, France.
+
+Centre National de la Recherche Scientifique - CNRS, a public scientific
+and technological establishment, having its principal place of business
+at 3 rue Michel-Ange, 75794 Paris cedex 16, France.
+
+Institut National de Recherche en Informatique et en Automatique -
+INRIA, a public scientific and technological establishment, having its
+principal place of business at Domaine de Voluceau, Rocquencourt, BP
+105, 78153 Le Chesnay cedex, France.
+
+
+ Preamble
+
+The purpose of this Free Software license agreement is to grant users
+the right to modify and redistribute the software governed by this
+license within the framework of an open source distribution model.
+
+The exercising of these rights is conditional upon certain obligations
+for users so as to preserve this status for all subsequent redistributions.
+
+In consideration of access to the source code and the rights to copy,
+modify and redistribute granted by the license, users are provided only
+with a limited warranty and the software's author, the holder of the
+economic rights, and the successive licensors only have limited liability.
+
+In this respect, the risks associated with loading, using, modifying
+and/or developing or reproducing the software by the user are brought to
+the user's attention, given its Free Software status, which may make it
+complicated to use, with the result that its use is reserved for
+developers and experienced professionals having in-depth computer
+knowledge. Users are therefore encouraged to load and test the
+suitability of the software as regards their requirements in conditions
+enabling the security of their systems and/or data to be ensured and,
+more generally, to use and operate it in the same conditions of
+security. This Agreement may be freely reproduced and published,
+provided it is not altered, and that no provisions are either added or
+removed herefrom.
+
+This Agreement may apply to any or all software for which the holder of
+the economic rights decides to submit the use thereof to its provisions.
+
+
+ Article 1 - DEFINITIONS
+
+For the purpose of this Agreement, when the following expressions
+commence with a capital letter, they shall have the following meaning:
+
+Agreement: means this license agreement, and its possible subsequent
+versions and annexes.
+
+Software: means the software in its Object Code and/or Source Code form
+and, where applicable, its documentation, "as is" when the Licensee
+accepts the Agreement.
+
+Initial Software: means the Software in its Source Code and possibly its
+Object Code form and, where applicable, its documentation, "as is" when
+it is first distributed under the terms and conditions of the Agreement.
+
+Modified Software: means the Software modified by at least one
+Contribution.
+
+Source Code: means all the Software's instructions and program lines to
+which access is required so as to modify the Software.
+
+Object Code: means the binary files originating from the compilation of
+the Source Code.
+
+Holder: means the holder(s) of the economic rights over the Initial
+Software.
+
+Licensee: means the Software user(s) having accepted the Agreement.
+
+Contributor: means a Licensee having made at least one Contribution.
+
+Licensor: means the Holder, or any other individual or legal entity, who
+distributes the Software under the Agreement.
+
+Contribution: means any or all modifications, corrections, translations,
+adaptations and/or new functions integrated into the Software by any or
+all Contributors, as well as any or all Internal Modules.
+
+Module: means a set of sources files including their documentation that
+enables supplementary functions or services in addition to those offered
+by the Software.
+
+External Module: means any or all Modules, not derived from the
+Software, so that this Module and the Software run in separate address
+spaces, with one calling the other when they are run.
+
+Internal Module: means any or all Module, connected to the Software so
+that they both execute in the same address space.
+
+GNU GPL: means the GNU General Public License version 2 or any
+subsequent version, as published by the Free Software Foundation Inc.
+
+Parties: mean both the Licensee and the Licensor.
+
+These expressions may be used both in singular and plural form.
+
+
+ Article 2 - PURPOSE
+
+The purpose of the Agreement is the grant by the Licensor to the
+Licensee of a non-exclusive, transferable and worldwide license for the
+Software as set forth in Article 5 hereinafter for the whole term of the
+protection granted by the rights over said Software.
+
+
+ Article 3 - ACCEPTANCE
+
+3.1 The Licensee shall be deemed as having accepted the terms and
+conditions of this Agreement upon the occurrence of the first of the
+following events:
+
+ * (i) loading the Software by any or all means, notably, by
+ downloading from a remote server, or by loading from a physical
+ medium;
+ * (ii) the first time the Licensee exercises any of the rights
+ granted hereunder.
+
+3.2 One copy of the Agreement, containing a notice relating to the
+characteristics of the Software, to the limited warranty, and to the
+fact that its use is restricted to experienced users has been provided
+to the Licensee prior to its acceptance as set forth in Article 3.1
+hereinabove, and the Licensee hereby acknowledges that it has read and
+understood it.
+
+
+ Article 4 - EFFECTIVE DATE AND TERM
+
+
+ 4.1 EFFECTIVE DATE
+
+The Agreement shall become effective on the date when it is accepted by
+the Licensee as set forth in Article 3.1.
+
+
+ 4.2 TERM
+
+The Agreement shall remain in force for the entire legal term of
+protection of the economic rights over the Software.
+
+
+ Article 5 - SCOPE OF RIGHTS GRANTED
+
+The Licensor hereby grants to the Licensee, who accepts, the following
+rights over the Software for any or all use, and for the term of the
+Agreement, on the basis of the terms and conditions set forth hereinafter.
+
+Besides, if the Licensor owns or comes to own one or more patents
+protecting all or part of the functions of the Software or of its
+components, the Licensor undertakes not to enforce the rights granted by
+these patents against successive Licensees using, exploiting or
+modifying the Software. If these patents are transferred, the Licensor
+undertakes to have the transferees subscribe to the obligations set
+forth in this paragraph.
+
+
+ 5.1 RIGHT OF USE
+
+The Licensee is authorized to use the Software, without any limitation
+as to its fields of application, with it being hereinafter specified
+that this comprises:
+
+ 1. permanent or temporary reproduction of all or part of the Software
+ by any or all means and in any or all form.
+
+ 2. loading, displaying, running, or storing the Software on any or
+ all medium.
+
+ 3. entitlement to observe, study or test its operation so as to
+ determine the ideas and principles behind any or all constituent
+ elements of said Software. This shall apply when the Licensee
+ carries out any or all loading, displaying, running, transmission
+ or storage operation as regards the Software, that it is entitled
+ to carry out hereunder.
+
+
+ 5.2 ENTITLEMENT TO MAKE CONTRIBUTIONS
+
+The right to make Contributions includes the right to translate, adapt,
+arrange, or make any or all modifications to the Software, and the right
+to reproduce the resulting software.
+
+The Licensee is authorized to make any or all Contributions to the
+Software provided that it includes an explicit notice that it is the
+author of said Contribution and indicates the date of the creation thereof.
+
+
+ 5.3 RIGHT OF DISTRIBUTION
+
+In particular, the right of distribution includes the right to publish,
+transmit and communicate the Software to the general public on any or
+all medium, and by any or all means, and the right to market, either in
+consideration of a fee, or free of charge, one or more copies of the
+Software by any means.
+
+The Licensee is further authorized to distribute copies of the modified
+or unmodified Software to third parties according to the terms and
+conditions set forth hereinafter.
+
+
+ 5.3.1 DISTRIBUTION OF SOFTWARE WITHOUT MODIFICATION
+
+The Licensee is authorized to distribute true copies of the Software in
+Source Code or Object Code form, provided that said distribution
+complies with all the provisions of the Agreement and is accompanied by:
+
+ 1. a copy of the Agreement,
+
+ 2. a notice relating to the limitation of both the Licensor's
+ warranty and liability as set forth in Articles 8 and 9,
+
+and that, in the event that only the Object Code of the Software is
+redistributed, the Licensee allows future Licensees unhindered access to
+the full Source Code of the Software by indicating how to access it, it
+being understood that the additional cost of acquiring the Source Code
+shall not exceed the cost of transferring the data.
+
+
+ 5.3.2 DISTRIBUTION OF MODIFIED SOFTWARE
+
+When the Licensee makes a Contribution to the Software, the terms and
+conditions for the distribution of the resulting Modified Software
+become subject to all the provisions of this Agreement.
+
+The Licensee is authorized to distribute the Modified Software, in
+source code or object code form, provided that said distribution
+complies with all the provisions of the Agreement and is accompanied by:
+
+ 1. a copy of the Agreement,
+
+ 2. a notice relating to the limitation of both the Licensor's
+ warranty and liability as set forth in Articles 8 and 9,
+
+and that, in the event that only the object code of the Modified
+Software is redistributed, the Licensee allows future Licensees
+unhindered access to the full source code of the Modified Software by
+indicating how to access it, it being understood that the additional
+cost of acquiring the source code shall not exceed the cost of
+transferring the data.
+
+
+ 5.3.3 DISTRIBUTION OF EXTERNAL MODULES
+
+When the Licensee has developed an External Module, the terms and
+conditions of this Agreement do not apply to said External Module, that
+may be distributed under a separate license agreement.
+
+
+ 5.3.4 COMPATIBILITY WITH THE GNU GPL
+
+The Licensee can include a code that is subject to the provisions of one
+of the versions of the GNU GPL in the Modified or unmodified Software,
+and distribute that entire code under the terms of the same version of
+the GNU GPL.
+
+The Licensee can include the Modified or unmodified Software in a code
+that is subject to the provisions of one of the versions of the GNU GPL,
+and distribute that entire code under the terms of the same version of
+the GNU GPL.
+
+
+ Article 6 - INTELLECTUAL PROPERTY
+
+
+ 6.1 OVER THE INITIAL SOFTWARE
+
+The Holder owns the economic rights over the Initial Software. Any or
+all use of the Initial Software is subject to compliance with the terms
+and conditions under which the Holder has elected to distribute its work
+and no one shall be entitled to modify the terms and conditions for the
+distribution of said Initial Software.
+
+The Holder undertakes that the Initial Software will remain ruled at
+least by this Agreement, for the duration set forth in Article 4.2.
+
+
+ 6.2 OVER THE CONTRIBUTIONS
+
+The Licensee who develops a Contribution is the owner of the
+intellectual property rights over this Contribution as defined by
+applicable law.
+
+
+ 6.3 OVER THE EXTERNAL MODULES
+
+The Licensee who develops an External Module is the owner of the
+intellectual property rights over this External Module as defined by
+applicable law and is free to choose the type of agreement that shall
+govern its distribution.
+
+
+ 6.4 JOINT PROVISIONS
+
+The Licensee expressly undertakes:
+
+ 1. not to remove, or modify, in any manner, the intellectual property
+ notices attached to the Software;
+
+ 2. to reproduce said notices, in an identical manner, in the copies
+ of the Software modified or not.
+
+The Licensee undertakes not to directly or indirectly infringe the
+intellectual property rights of the Holder and/or Contributors on the
+Software and to take, where applicable, vis-�-vis its staff, any and all
+measures required to ensure respect of said intellectual property rights
+of the Holder and/or Contributors.
+
+
+ Article 7 - RELATED SERVICES
+
+7.1 Under no circumstances shall the Agreement oblige the Licensor to
+provide technical assistance or maintenance services for the Software.
+
+However, the Licensor is entitled to offer this type of services. The
+terms and conditions of such technical assistance, and/or such
+maintenance, shall be set forth in a separate instrument. Only the
+Licensor offering said maintenance and/or technical assistance services
+shall incur liability therefor.
+
+7.2 Similarly, any Licensor is entitled to offer to its licensees, under
+its sole responsibility, a warranty, that shall only be binding upon
+itself, for the redistribution of the Software and/or the Modified
+Software, under terms and conditions that it is free to decide. Said
+warranty, and the financial terms and conditions of its application,
+shall be subject of a separate instrument executed between the Licensor
+and the Licensee.
+
+
+ Article 8 - LIABILITY
+
+8.1 Subject to the provisions of Article 8.2, the Licensee shall be
+entitled to claim compensation for any direct loss it may have suffered
+from the Software as a result of a fault on the part of the relevant
+Licensor, subject to providing evidence thereof.
+
+8.2 The Licensor's liability is limited to the commitments made under
+this Agreement and shall not be incurred as a result of in particular:
+(i) loss due the Licensee's total or partial failure to fulfill its
+obligations, (ii) direct or consequential loss that is suffered by the
+Licensee due to the use or performance of the Software, and (iii) more
+generally, any consequential loss. In particular the Parties expressly
+agree that any or all pecuniary or business loss (i.e. loss of data,
+loss of profits, operating loss, loss of customers or orders,
+opportunity cost, any disturbance to business activities) or any or all
+legal proceedings instituted against the Licensee by a third party,
+shall constitute consequential loss and shall not provide entitlement to
+any or all compensation from the Licensor.
+
+
+ Article 9 - WARRANTY
+
+9.1 The Licensee acknowledges that the scientific and technical
+state-of-the-art when the Software was distributed did not enable all
+possible uses to be tested and verified, nor for the presence of
+possible defects to be detected. In this respect, the Licensee's
+attention has been drawn to the risks associated with loading, using,
+modifying and/or developing and reproducing the Software which are
+reserved for experienced users.
+
+The Licensee shall be responsible for verifying, by any or all means,
+the suitability of the product for its requirements, its good working
+order, and for ensuring that it shall not cause damage to either persons
+or properties.
+
+9.2 The Licensor hereby represents, in good faith, that it is entitled
+to grant all the rights over the Software (including in particular the
+rights set forth in Article 5).
+
+9.3 The Licensee acknowledges that the Software is supplied "as is" by
+the Licensor without any other express or tacit warranty, other than
+that provided for in Article 9.2 and, in particular, without any warranty
+as to its commercial value, its secured, safe, innovative or relevant
+nature.
+
+Specifically, the Licensor does not warrant that the Software is free
+from any error, that it will operate without interruption, that it will
+be compatible with the Licensee's own equipment and software
+configuration, nor that it will meet the Licensee's requirements.
+
+9.4 The Licensor does not either expressly or tacitly warrant that the
+Software does not infringe any third party intellectual property right
+relating to a patent, software or any other property right. Therefore,
+the Licensor disclaims any and all liability towards the Licensee
+arising out of any or all proceedings for infringement that may be
+instituted in respect of the use, modification and redistribution of the
+Software. Nevertheless, should such proceedings be instituted against
+the Licensee, the Licensor shall provide it with technical and legal
+assistance for its defense. Such technical and legal assistance shall be
+decided on a case-by-case basis between the relevant Licensor and the
+Licensee pursuant to a memorandum of understanding. The Licensor
+disclaims any and all liability as regards the Licensee's use of the
+name of the Software. No warranty is given as regards the existence of
+prior rights over the name of the Software or as regards the existence
+of a trademark.
+
+
+ Article 10 - TERMINATION
+
+10.1 In the event of a breach by the Licensee of its obligations
+hereunder, the Licensor may automatically terminate this Agreement
+thirty (30) days after notice has been sent to the Licensee and has
+remained ineffective.
+
+10.2 A Licensee whose Agreement is terminated shall no longer be
+authorized to use, modify or distribute the Software. However, any
+licenses that it may have granted prior to termination of the Agreement
+shall remain valid subject to their having been granted in compliance
+with the terms and conditions hereof.
+
+
+ Article 11 - MISCELLANEOUS
+
+
+ 11.1 EXCUSABLE EVENTS
+
+Neither Party shall be liable for any or all delay, or failure to
+perform the Agreement, that may be attributable to an event of force
+majeure, an act of God or an outside cause, such as defective
+functioning or interruptions of the electricity or telecommunications
+networks, network paralysis following a virus attack, intervention by
+government authorities, natural disasters, water damage, earthquakes,
+fire, explosions, strikes and labor unrest, war, etc.
+
+11.2 Any failure by either Party, on one or more occasions, to invoke
+one or more of the provisions hereof, shall under no circumstances be
+interpreted as being a waiver by the interested Party of its right to
+invoke said provision(s) subsequently.
+
+11.3 The Agreement cancels and replaces any or all previous agreements,
+whether written or oral, between the Parties and having the same
+purpose, and constitutes the entirety of the agreement between said
+Parties concerning said purpose. No supplement or modification to the
+terms and conditions hereof shall be effective as between the Parties
+unless it is made in writing and signed by their duly authorized
+representatives.
+
+11.4 In the event that one or more of the provisions hereof were to
+conflict with a current or future applicable act or legislative text,
+said act or legislative text shall prevail, and the Parties shall make
+the necessary amendments so as to comply with said act or legislative
+text. All other provisions shall remain effective. Similarly, invalidity
+of a provision of the Agreement, for any reason whatsoever, shall not
+cause the Agreement as a whole to be invalid.
+
+
+ 11.5 LANGUAGE
+
+The Agreement is drafted in both French and English and both versions
+are deemed authentic.
+
+
+ Article 12 - NEW VERSIONS OF THE AGREEMENT
+
+12.1 Any person is authorized to duplicate and distribute copies of this
+Agreement.
+
+12.2 So as to ensure coherence, the wording of this Agreement is
+protected and may only be modified by the authors of the License, who
+reserve the right to periodically publish updates or new versions of the
+Agreement, each with a separate number. These subsequent versions may
+address new issues encountered by Free Software.
+
+12.3 Any Software distributed under a given version of the Agreement may
+only be subsequently distributed under the same version of the Agreement
+or a subsequent version, subject to the provisions of Article 5.3.4.
+
+
+ Article 13 - GOVERNING LAW AND JURISDICTION
+
+13.1 The Agreement is governed by French law. The Parties agree to
+endeavor to seek an amicable solution to any disagreements or disputes
+that may arise during the performance of the Agreement.
+
+13.2 Failing an amicable solution within two (2) months as from their
+occurrence, and unless emergency proceedings are necessary, the
+disagreements or disputes shall be referred to the Paris Courts having
+jurisdiction, by the more diligent Party.
+
+
+Version 2.0 dated 2006-09-05.
diff --git a/sumalibs/Licence_CeCILL_V2-fr.txt b/sumalibs/Licence_CeCILL_V2-fr.txt
new file mode 100644
index 0000000..1613fca
--- /dev/null
+++ b/sumalibs/Licence_CeCILL_V2-fr.txt
@@ -0,0 +1,512 @@
+
+CONTRAT DE LICENCE DE LOGICIEL LIBRE CeCILL
+
+
+ Avertissement
+
+Ce contrat est une licence de logiciel libre issue d'une concertation
+entre ses auteurs afin que le respect de deux grands principes pr�side �
+sa r�daction:
+
+ * d'une part, le respect des principes de diffusion des logiciels
+ libres: acc�s au code source, droits �tendus conf�r�s aux
+ utilisateurs,
+ * d'autre part, la d�signation d'un droit applicable, le droit
+ fran�ais, auquel elle est conforme, tant au regard du droit de la
+ responsabilit� civile que du droit de la propri�t� intellectuelle
+ et de la protection qu'il offre aux auteurs et titulaires des
+ droits patrimoniaux sur un logiciel.
+
+Les auteurs de la licence CeCILL (pour Ce[a] C[nrs] I[nria] L[ogiciel]
+L[ibre]) sont:
+
+Commissariat � l'Energie Atomique - CEA, �tablissement public de
+recherche � caract�re scientifique, technique et industriel, dont le
+si�ge est situ� 25 rue Leblanc, immeuble Le Ponant D, 75015 Paris.
+
+Centre National de la Recherche Scientifique - CNRS, �tablissement
+public � caract�re scientifique et technologique, dont le si�ge est
+situ� 3 rue Michel-Ange, 75794 Paris cedex 16.
+
+Institut National de Recherche en Informatique et en Automatique -
+INRIA, �tablissement public � caract�re scientifique et technologique,
+dont le si�ge est situ� Domaine de Voluceau, Rocquencourt, BP 105, 78153
+Le Chesnay cedex.
+
+
+ Pr�ambule
+
+Ce contrat est une licence de logiciel libre dont l'objectif est de
+conf�rer aux utilisateurs la libert� de modification et de
+redistribution du logiciel r�gi par cette licence dans le cadre d'un
+mod�le de diffusion en logiciel libre.
+
+L'exercice de ces libert�s est assorti de certains devoirs � la charge
+des utilisateurs afin de pr�server ce statut au cours des
+redistributions ult�rieures.
+
+L'accessibilit� au code source et les droits de copie, de modification
+et de redistribution qui en d�coulent ont pour contrepartie de n'offrir
+aux utilisateurs qu'une garantie limit�e et de ne faire peser sur
+l'auteur du logiciel, le titulaire des droits patrimoniaux et les
+conc�dants successifs qu'une responsabilit� restreinte.
+
+A cet �gard l'attention de l'utilisateur est attir�e sur les risques
+associ�s au chargement, � l'utilisation, � la modification et/ou au
+d�veloppement et � la reproduction du logiciel par l'utilisateur �tant
+donn� sa sp�cificit� de logiciel libre, qui peut le rendre complexe �
+manipuler et qui le r�serve donc � des d�veloppeurs ou des
+professionnels avertis poss�dant des connaissances informatiques
+approfondies. Les utilisateurs sont donc invit�s � charger et tester
+l'ad�quation du logiciel � leurs besoins dans des conditions permettant
+d'assurer la s�curit� de leurs syst�mes et/ou de leurs donn�es et, plus
+g�n�ralement, � l'utiliser et l'exploiter dans les m�mes conditions de
+s�curit�. Ce contrat peut �tre reproduit et diffus� librement, sous
+r�serve de le conserver en l'�tat, sans ajout ni suppression de clauses.
+
+Ce contrat est susceptible de s'appliquer � tout logiciel dont le
+titulaire des droits patrimoniaux d�cide de soumettre l'exploitation aux
+dispositions qu'il contient.
+
+
+ Article 1 - DEFINITIONS
+
+Dans ce contrat, les termes suivants, lorsqu'ils seront �crits avec une
+lettre capitale, auront la signification suivante:
+
+Contrat: d�signe le pr�sent contrat de licence, ses �ventuelles versions
+post�rieures et annexes.
+
+Logiciel: d�signe le logiciel sous sa forme de Code Objet et/ou de Code
+Source et le cas �ch�ant sa documentation, dans leur �tat au moment de
+l'acceptation du Contrat par le Licenci�.
+
+Logiciel Initial: d�signe le Logiciel sous sa forme de Code Source et
+�ventuellement de Code Objet et le cas �ch�ant sa documentation, dans
+leur �tat au moment de leur premi�re diffusion sous les termes du Contrat.
+
+Logiciel Modifi�: d�signe le Logiciel modifi� par au moins une
+Contribution.
+
+Code Source: d�signe l'ensemble des instructions et des lignes de
+programme du Logiciel et auquel l'acc�s est n�cessaire en vue de
+modifier le Logiciel.
+
+Code Objet: d�signe les fichiers binaires issus de la compilation du
+Code Source.
+
+Titulaire: d�signe le ou les d�tenteurs des droits patrimoniaux d'auteur
+sur le Logiciel Initial.
+
+Licenci�: d�signe le ou les utilisateurs du Logiciel ayant accept� le
+Contrat.
+
+Contributeur: d�signe le Licenci� auteur d'au moins une Contribution.
+
+Conc�dant: d�signe le Titulaire ou toute personne physique ou morale
+distribuant le Logiciel sous le Contrat.
+
+Contribution: d�signe l'ensemble des modifications, corrections,
+traductions, adaptations et/ou nouvelles fonctionnalit�s int�gr�es dans
+le Logiciel par tout Contributeur, ainsi que tout Module Interne.
+
+Module: d�signe un ensemble de fichiers sources y compris leur
+documentation qui permet de r�aliser des fonctionnalit�s ou services
+suppl�mentaires � ceux fournis par le Logiciel.
+
+Module Externe: d�signe tout Module, non d�riv� du Logiciel, tel que ce
+Module et le Logiciel s'ex�cutent dans des espaces d'adressage
+diff�rents, l'un appelant l'autre au moment de leur ex�cution.
+
+Module Interne: d�signe tout Module li� au Logiciel de telle sorte
+qu'ils s'ex�cutent dans le m�me espace d'adressage.
+
+GNU GPL: d�signe la GNU General Public License dans sa version 2 ou
+toute version ult�rieure, telle que publi�e par Free Software Foundation
+Inc.
+
+Parties: d�signe collectivement le Licenci� et le Conc�dant.
+
+Ces termes s'entendent au singulier comme au pluriel.
+
+
+ Article 2 - OBJET
+
+Le Contrat a pour objet la concession par le Conc�dant au Licenci� d'une
+licence non exclusive, cessible et mondiale du Logiciel telle que
+d�finie ci-apr�s � l'article 5 pour toute la dur�e de protection des droits
+portant sur ce Logiciel.
+
+
+ Article 3 - ACCEPTATION
+
+3.1 L'acceptation par le Licenci� des termes du Contrat est r�put�e
+acquise du fait du premier des faits suivants:
+
+ * (i) le chargement du Logiciel par tout moyen notamment par
+ t�l�chargement � partir d'un serveur distant ou par chargement �
+ partir d'un support physique;
+ * (ii) le premier exercice par le Licenci� de l'un quelconque des
+ droits conc�d�s par le Contrat.
+
+3.2 Un exemplaire du Contrat, contenant notamment un avertissement
+relatif aux sp�cificit�s du Logiciel, � la restriction de garantie et �
+la limitation � un usage par des utilisateurs exp�riment�s a �t� mis �
+disposition du Licenci� pr�alablement � son acceptation telle que
+d�finie � l'article 3.1 ci dessus et le Licenci� reconna�t en avoir pris
+connaissance.
+
+
+ Article 4 - ENTREE EN VIGUEUR ET DUREE
+
+
+ 4.1 ENTREE EN VIGUEUR
+
+Le Contrat entre en vigueur � la date de son acceptation par le Licenci�
+telle que d�finie en 3.1.
+
+
+ 4.2 DUREE
+
+Le Contrat produira ses effets pendant toute la dur�e l�gale de
+protection des droits patrimoniaux portant sur le Logiciel.
+
+
+ Article 5 - ETENDUE DES DROITS CONCEDES
+
+Le Conc�dant conc�de au Licenci�, qui accepte, les droits suivants sur
+le Logiciel pour toutes destinations et pour la dur�e du Contrat dans
+les conditions ci-apr�s d�taill�es.
+
+Par ailleurs, si le Conc�dant d�tient ou venait � d�tenir un ou
+plusieurs brevets d'invention prot�geant tout ou partie des
+fonctionnalit�s du Logiciel ou de ses composants, il s'engage � ne pas
+opposer les �ventuels droits conf�r�s par ces brevets aux Licenci�s
+successifs qui utiliseraient, exploiteraient ou modifieraient le
+Logiciel. En cas de cession de ces brevets, le Conc�dant s'engage �
+faire reprendre les obligations du pr�sent alin�a aux cessionnaires.
+
+
+ 5.1 DROIT D'UTILISATION
+
+Le Licenci� est autoris� � utiliser le Logiciel, sans restriction quant
+aux domaines d'application, �tant ci-apr�s pr�cis� que cela comporte:
+
+ 1. la reproduction permanente ou provisoire du Logiciel en tout ou
+ partie par tout moyen et sous toute forme.
+
+ 2. le chargement, l'affichage, l'ex�cution, ou le stockage du
+ Logiciel sur tout support.
+
+ 3. la possibilit� d'en observer, d'en �tudier, ou d'en tester le
+ fonctionnement afin de d�terminer les id�es et principes qui sont
+ � la base de n'importe quel �l�ment de ce Logiciel; et ceci,
+ lorsque le Licenci� effectue toute op�ration de chargement,
+ d'affichage, d'ex�cution, de transmission ou de stockage du
+ Logiciel qu'il est en droit d'effectuer en vertu du Contrat.
+
+
+ 5.2 DROIT D'APPORTER DES CONTRIBUTIONS
+
+Le droit d'apporter des Contributions comporte le droit de traduire,
+d'adapter, d'arranger ou d'apporter toute autre modification au Logiciel
+et le droit de reproduire le logiciel en r�sultant.
+
+Le Licenci� est autoris� � apporter toute Contribution au Logiciel sous
+r�serve de mentionner, de fa�on explicite, son nom en tant qu'auteur de
+cette Contribution et la date de cr�ation de celle-ci.
+
+
+ 5.3 DROIT DE DISTRIBUTION
+
+Le droit de distribution comporte notamment le droit de diffuser, de
+transmettre et de communiquer le Logiciel au public sur tout support et
+par tout moyen ainsi que le droit de mettre sur le march� � titre
+on�reux ou gratuit, un ou des exemplaires du Logiciel par tout proc�d�.
+
+Le Licenci� est autoris� � distribuer des copies du Logiciel, modifi� ou
+non, � des tiers dans les conditions ci-apr�s d�taill�es.
+
+
+ 5.3.1 DISTRIBUTION DU LOGICIEL SANS MODIFICATION
+
+Le Licenci� est autoris� � distribuer des copies conformes du Logiciel,
+sous forme de Code Source ou de Code Objet, � condition que cette
+distribution respecte les dispositions du Contrat dans leur totalit� et
+soit accompagn�e:
+
+ 1. d'un exemplaire du Contrat,
+
+ 2. d'un avertissement relatif � la restriction de garantie et de
+ responsabilit� du Conc�dant telle que pr�vue aux articles 8
+ et 9,
+
+et que, dans le cas o� seul le Code Objet du Logiciel est redistribu�,
+le Licenci� permette aux futurs Licenci�s d'acc�der facilement au Code
+Source complet du Logiciel en indiquant les modalit�s d'acc�s, �tant
+entendu que le co�t additionnel d'acquisition du Code Source ne devra
+pas exc�der le simple co�t de transfert des donn�es.
+
+
+ 5.3.2 DISTRIBUTION DU LOGICIEL MODIFIE
+
+Lorsque le Licenci� apporte une Contribution au Logiciel, les conditions
+de distribution du Logiciel Modifi� en r�sultant sont alors soumises �
+l'int�gralit� des dispositions du Contrat.
+
+Le Licenci� est autoris� � distribuer le Logiciel Modifi�, sous forme de
+code source ou de code objet, � condition que cette distribution
+respecte les dispositions du Contrat dans leur totalit� et soit
+accompagn�e:
+
+ 1. d'un exemplaire du Contrat,
+
+ 2. d'un avertissement relatif � la restriction de garantie et de
+ responsabilit� du Conc�dant telle que pr�vue aux articles 8
+ et 9,
+
+et que, dans le cas o� seul le code objet du Logiciel Modifi� est
+redistribu�, le Licenci� permette aux futurs Licenci�s d'acc�der
+facilement au code source complet du Logiciel Modifi� en indiquant les
+modalit�s d'acc�s, �tant entendu que le co�t additionnel d'acquisition
+du code source ne devra pas exc�der le simple co�t de transfert des donn�es.
+
+
+ 5.3.3 DISTRIBUTION DES MODULES EXTERNES
+
+Lorsque le Licenci� a d�velopp� un Module Externe les conditions du
+Contrat ne s'appliquent pas � ce Module Externe, qui peut �tre distribu�
+sous un contrat de licence diff�rent.
+
+
+ 5.3.4 COMPATIBILITE AVEC LA LICENCE GNU GPL
+
+Le Licenci� peut inclure un code soumis aux dispositions d'une des
+versions de la licence GNU GPL dans le Logiciel modifi� ou non et
+distribuer l'ensemble sous les conditions de la m�me version de la
+licence GNU GPL.
+
+Le Licenci� peut inclure le Logiciel modifi� ou non dans un code soumis
+aux dispositions d'une des versions de la licence GNU GPL et distribuer
+l'ensemble sous les conditions de la m�me version de la licence GNU GPL.
+
+
+ Article 6 - PROPRIETE INTELLECTUELLE
+
+
+ 6.1 SUR LE LOGICIEL INITIAL
+
+Le Titulaire est d�tenteur des droits patrimoniaux sur le Logiciel
+Initial. Toute utilisation du Logiciel Initial est soumise au respect
+des conditions dans lesquelles le Titulaire a choisi de diffuser son
+oeuvre et nul autre n'a la facult� de modifier les conditions de
+diffusion de ce Logiciel Initial.
+
+Le Titulaire s'engage � ce que le Logiciel Initial reste au moins r�gi
+par le Contrat et ce, pour la dur�e vis�e � l'article 4.2.
+
+
+ 6.2 SUR LES CONTRIBUTIONS
+
+Le Licenci� qui a d�velopp� une Contribution est titulaire sur celle-ci
+des droits de propri�t� intellectuelle dans les conditions d�finies par
+la l�gislation applicable.
+
+
+ 6.3 SUR LES MODULES EXTERNES
+
+Le Licenci� qui a d�velopp� un Module Externe est titulaire sur celui-ci
+des droits de propri�t� intellectuelle dans les conditions d�finies par
+la l�gislation applicable et reste libre du choix du contrat r�gissant
+sa diffusion.
+
+
+ 6.4 DISPOSITIONS COMMUNES
+
+Le Licenci� s'engage express�ment:
+
+ 1. � ne pas supprimer ou modifier de quelque mani�re que ce soit les
+ mentions de propri�t� intellectuelle appos�es sur le Logiciel;
+
+ 2. � reproduire � l'identique lesdites mentions de propri�t�
+ intellectuelle sur les copies du Logiciel modifi� ou non.
+
+Le Licenci� s'engage � ne pas porter atteinte, directement ou
+indirectement, aux droits de propri�t� intellectuelle du Titulaire et/ou
+des Contributeurs sur le Logiciel et � prendre, le cas �ch�ant, �
+l'�gard de son personnel toutes les mesures n�cessaires pour assurer le
+respect des dits droits de propri�t� intellectuelle du Titulaire et/ou
+des Contributeurs.
+
+
+ Article 7 - SERVICES ASSOCIES
+
+7.1 Le Contrat n'oblige en aucun cas le Conc�dant � la r�alisation de
+prestations d'assistance technique ou de maintenance du Logiciel.
+
+Cependant le Conc�dant reste libre de proposer ce type de services. Les
+termes et conditions d'une telle assistance technique et/ou d'une telle
+maintenance seront alors d�termin�s dans un acte s�par�. Ces actes de
+maintenance et/ou assistance technique n'engageront que la seule
+responsabilit� du Conc�dant qui les propose.
+
+7.2 De m�me, tout Conc�dant est libre de proposer, sous sa seule
+responsabilit�, � ses licenci�s une garantie, qui n'engagera que lui,
+lors de la redistribution du Logiciel et/ou du Logiciel Modifi� et ce,
+dans les conditions qu'il souhaite. Cette garantie et les modalit�s
+financi�res de son application feront l'objet d'un acte s�par� entre le
+Conc�dant et le Licenci�.
+
+
+ Article 8 - RESPONSABILITE
+
+8.1 Sous r�serve des dispositions de l'article 8.2, le Licenci� a la
+facult�, sous r�serve de prouver la faute du Conc�dant concern�, de
+solliciter la r�paration du pr�judice direct qu'il subirait du fait du
+Logiciel et dont il apportera la preuve.
+
+8.2 La responsabilit� du Conc�dant est limit�e aux engagements pris en
+application du Contrat et ne saurait �tre engag�e en raison notamment:
+(i) des dommages dus � l'inex�cution, totale ou partielle, de ses
+obligations par le Licenci�, (ii) des dommages directs ou indirects
+d�coulant de l'utilisation ou des performances du Logiciel subis par le
+Licenci� et (iii) plus g�n�ralement d'un quelconque dommage indirect. En
+particulier, les Parties conviennent express�ment que tout pr�judice
+financier ou commercial (par exemple perte de donn�es, perte de
+b�n�fices, perte d'exploitation, perte de client�le ou de commandes,
+manque � gagner, trouble commercial quelconque) ou toute action dirig�e
+contre le Licenci� par un tiers, constitue un dommage indirect et
+n'ouvre pas droit � r�paration par le Conc�dant.
+
+
+ Article 9 - GARANTIE
+
+9.1 Le Licenci� reconna�t que l'�tat actuel des connaissances
+scientifiques et techniques au moment de la mise en circulation du
+Logiciel ne permet pas d'en tester et d'en v�rifier toutes les
+utilisations ni de d�tecter l'existence d'�ventuels d�fauts. L'attention
+du Licenci� a �t� attir�e sur ce point sur les risques associ�s au
+chargement, � l'utilisation, la modification et/ou au d�veloppement et �
+la reproduction du Logiciel qui sont r�serv�s � des utilisateurs avertis.
+
+Il rel�ve de la responsabilit� du Licenci� de contr�ler, par tous
+moyens, l'ad�quation du produit � ses besoins, son bon fonctionnement et
+de s'assurer qu'il ne causera pas de dommages aux personnes et aux biens.
+
+9.2 Le Conc�dant d�clare de bonne foi �tre en droit de conc�der
+l'ensemble des droits attach�s au Logiciel (comprenant notamment les
+droits vis�s � l'article 5).
+
+9.3 Le Licenci� reconna�t que le Logiciel est fourni "en l'�tat" par le
+Conc�dant sans autre garantie, expresse ou tacite, que celle pr�vue �
+l'article 9.2 et notamment sans aucune garantie sur sa valeur commerciale,
+son caract�re s�curis�, innovant ou pertinent.
+
+En particulier, le Conc�dant ne garantit pas que le Logiciel est exempt
+d'erreur, qu'il fonctionnera sans interruption, qu'il sera compatible
+avec l'�quipement du Licenci� et sa configuration logicielle ni qu'il
+remplira les besoins du Licenci�.
+
+9.4 Le Conc�dant ne garantit pas, de mani�re expresse ou tacite, que le
+Logiciel ne porte pas atteinte � un quelconque droit de propri�t�
+intellectuelle d'un tiers portant sur un brevet, un logiciel ou sur tout
+autre droit de propri�t�. Ainsi, le Conc�dant exclut toute garantie au
+profit du Licenci� contre les actions en contrefa�on qui pourraient �tre
+diligent�es au titre de l'utilisation, de la modification, et de la
+redistribution du Logiciel. N�anmoins, si de telles actions sont
+exerc�es contre le Licenci�, le Conc�dant lui apportera son aide
+technique et juridique pour sa d�fense. Cette aide technique et
+juridique est d�termin�e au cas par cas entre le Conc�dant concern� et
+le Licenci� dans le cadre d'un protocole d'accord. Le Conc�dant d�gage
+toute responsabilit� quant � l'utilisation de la d�nomination du
+Logiciel par le Licenci�. Aucune garantie n'est apport�e quant �
+l'existence de droits ant�rieurs sur le nom du Logiciel et sur
+l'existence d'une marque.
+
+
+ Article 10 - RESILIATION
+
+10.1 En cas de manquement par le Licenci� aux obligations mises � sa
+charge par le Contrat, le Conc�dant pourra r�silier de plein droit le
+Contrat trente (30) jours apr�s notification adress�e au Licenci� et
+rest�e sans effet.
+
+10.2 Le Licenci� dont le Contrat est r�sili� n'est plus autoris� �
+utiliser, modifier ou distribuer le Logiciel. Cependant, toutes les
+licences qu'il aura conc�d�es ant�rieurement � la r�siliation du Contrat
+resteront valides sous r�serve qu'elles aient �t� effectu�es en
+conformit� avec le Contrat.
+
+
+ Article 11 - DISPOSITIONS DIVERSES
+
+
+ 11.1 CAUSE EXTERIEURE
+
+Aucune des Parties ne sera responsable d'un retard ou d'une d�faillance
+d'ex�cution du Contrat qui serait d� � un cas de force majeure, un cas
+fortuit ou une cause ext�rieure, telle que, notamment, le mauvais
+fonctionnement ou les interruptions du r�seau �lectrique ou de
+t�l�communication, la paralysie du r�seau li�e � une attaque
+informatique, l'intervention des autorit�s gouvernementales, les
+catastrophes naturelles, les d�g�ts des eaux, les tremblements de terre,
+le feu, les explosions, les gr�ves et les conflits sociaux, l'�tat de
+guerre...
+
+11.2 Le fait, par l'une ou l'autre des Parties, d'omettre en une ou
+plusieurs occasions de se pr�valoir d'une ou plusieurs dispositions du
+Contrat, ne pourra en aucun cas impliquer renonciation par la Partie
+int�ress�e � s'en pr�valoir ult�rieurement.
+
+11.3 Le Contrat annule et remplace toute convention ant�rieure, �crite
+ou orale, entre les Parties sur le m�me objet et constitue l'accord
+entier entre les Parties sur cet objet. Aucune addition ou modification
+aux termes du Contrat n'aura d'effet � l'�gard des Parties � moins
+d'�tre faite par �crit et sign�e par leurs repr�sentants d�ment habilit�s.
+
+11.4 Dans l'hypoth�se o� une ou plusieurs des dispositions du Contrat
+s'av�rerait contraire � une loi ou � un texte applicable, existants ou
+futurs, cette loi ou ce texte pr�vaudrait, et les Parties feraient les
+amendements n�cessaires pour se conformer � cette loi ou � ce texte.
+Toutes les autres dispositions resteront en vigueur. De m�me, la
+nullit�, pour quelque raison que ce soit, d'une des dispositions du
+Contrat ne saurait entra�ner la nullit� de l'ensemble du Contrat.
+
+
+ 11.5 LANGUE
+
+Le Contrat est r�dig� en langue fran�aise et en langue anglaise, ces
+deux versions faisant �galement foi.
+
+
+ Article 12 - NOUVELLES VERSIONS DU CONTRAT
+
+12.1 Toute personne est autoris�e � copier et distribuer des copies de
+ce Contrat.
+
+12.2 Afin d'en pr�server la coh�rence, le texte du Contrat est prot�g�
+et ne peut �tre modifi� que par les auteurs de la licence, lesquels se
+r�servent le droit de publier p�riodiquement des mises � jour ou de
+nouvelles versions du Contrat, qui poss�deront chacune un num�ro
+distinct. Ces versions ult�rieures seront susceptibles de prendre en
+compte de nouvelles probl�matiques rencontr�es par les logiciels libres.
+
+12.3 Tout Logiciel diffus� sous une version donn�e du Contrat ne pourra
+faire l'objet d'une diffusion ult�rieure que sous la m�me version du
+Contrat ou une version post�rieure, sous r�serve des dispositions de
+l'article 5.3.4.
+
+
+ Article 13 - LOI APPLICABLE ET COMPETENCE TERRITORIALE
+
+13.1 Le Contrat est r�gi par la loi fran�aise. Les Parties conviennent
+de tenter de r�gler � l'amiable les diff�rends ou litiges qui
+viendraient � se produire par suite ou � l'occasion du Contrat.
+
+13.2 A d�faut d'accord amiable dans un d�lai de deux (2) mois � compter
+de leur survenance et sauf situation relevant d'une proc�dure d'urgence,
+les diff�rends ou litiges seront port�s par la Partie la plus diligente
+devant les Tribunaux comp�tents de Paris.
+
+
+Version 2.0 du 2006-09-05.
diff --git a/sumalibs/global.mk b/sumalibs/global.mk
new file mode 100644
index 0000000..c58123e
--- /dev/null
+++ b/sumalibs/global.mk
@@ -0,0 +1,10 @@
+
+CC=gcc
+LDFLAGS=
+
+CFLAGS = -O3 -w
+
+default: all
+
+%.o: %.c
+ $(CC) $(CFLAGS) -c -o $@ $< $(LIB)
diff --git a/sumalibs/libfasta/._Makefile b/sumalibs/libfasta/._Makefile
new file mode 100644
index 0000000..6adb72e
Binary files /dev/null and b/sumalibs/libfasta/._Makefile differ
diff --git a/sumalibs/libfasta/Makefile b/sumalibs/libfasta/Makefile
new file mode 100644
index 0000000..97cf5ec
--- /dev/null
+++ b/sumalibs/libfasta/Makefile
@@ -0,0 +1,33 @@
+
+SOURCES = fasta_header_parser.c \
+ fasta_seq_writer.c \
+ fasta_header_handler.c \
+ header_mem_handler.c \
+ sequence.c
+
+SRCS=$(SOURCES)
+
+
+OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
+
+LIBFILE = libfasta.a
+RANLIB = ranlib
+
+
+include ../global.mk
+
+all: $(LIBFILE)
+
+fasta_header_parser.c: fasta_header_parser.l
+ flex -Pheader_yy -t $< > $@
+
+dic_parser.c: dic_parser.l
+ lex -Phashtable_yy -t $< > $@
+
+clean:
+ rm -rf $(OBJECTS) $(LIBFILE)
+ rm -f *.a
+
+$(LIBFILE): $(OBJECTS)
+ ar -cr $@ $?
+ $(RANLIB) $@
diff --git a/sumalibs/libfasta/fasta_header_handler.c b/sumalibs/libfasta/fasta_header_handler.c
new file mode 100644
index 0000000..f57d8c7
--- /dev/null
+++ b/sumalibs/libfasta/fasta_header_handler.c
@@ -0,0 +1,126 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "sequence.h"
+#include "fasta_header_parser.h"
+#include "fasta_header_handler.h"
+
+
+char* char_header_add_field(char* header, char* name, char* value)
+{
+ int lheader = strlen(header);
+ header = (char*) realloc(header, (lheader+strlen(name)+strlen(value)+4)*sizeof(char));
+ if (header[lheader-1] == '.')
+ {
+ strcpy(header+lheader-1,";");
+ strcpy(header+lheader," ");
+ strcpy(header+lheader+1,name);
+ strcpy(header+lheader+1+strlen(name),"=");
+ strcpy(header+lheader+1+strlen(name)+1,value);
+ }
+ else
+ {
+ strcpy(header+lheader,";");
+ strcpy(header+lheader+1," ");
+ strcpy(header+lheader+2,name);
+ strcpy(header+lheader+2+strlen(name),"=");
+ strcpy(header+lheader+2+strlen(name)+1,value);
+ }
+ return header;
+}
+
+
+char* fastaSeqPtr_header_add_field(fastaSeqPtr seq, char* name, char* value)
+{
+ int lheader = strlen(seq->rawheader);
+ int i;
+ char* buffer;
+ char* rawheader;
+
+ rawheader = (char*) malloc((lheader+strlen(name)+strlen(value)+5)*sizeof(char));
+ strcpy(rawheader, seq->rawheader);
+
+ buffer = calloc(lheader, sizeof(char));
+
+ i=0;
+
+ while ((rawheader[i] != ' ') && (rawheader[i] != 0))
+ i++;
+
+ if (rawheader[i] == ' ')
+ strcpy(buffer, rawheader+i);
+ else
+ strcpy(rawheader+i, " ");
+
+ i++;
+
+ strcpy(rawheader+i,name);
+ strcpy(rawheader+i+strlen(name),"=");
+ strcpy(rawheader+i+strlen(name)+1,value);
+ strcpy(rawheader+i+strlen(name)+1+strlen(value),";");
+ strcpy(rawheader+i+strlen(name)+1+strlen(value)+1, buffer);
+
+ free(buffer);
+
+ return(rawheader);
+}
+
+
+element_from_header* table_header_add_field(element_from_header* header, char* name, char* value)
+{
+ int nbf;
+ nbf = atoi(header[0].value);
+ nbf++;
+ header = (element_from_header*) realloc(header, (nbf+1)*sizeof(element_from_header));
+ header[nbf].name = (char*) malloc((1+strlen(name))*sizeof(char));
+ strcpy(header[nbf].name, name);
+ header[nbf].value = (char*) malloc((1+strlen(value))*sizeof(char));
+ strcpy(header[nbf].value, value);
+ sprintf(header[0].value, "%d", nbf);
+ return(header);
+}
+
+
+void free_header_table(element_from_header* header)
+{
+ int i;
+ int nbf = atoi(header[0].value);
+
+ for (i = 0; i <= nbf; i++)
+ {
+ free((header[i]).name);
+ free((header[i]).value);
+ }
+ free(header);
+}
+
+
+char* getItemFromHeader(char* name, element_from_header* header)
+{
+ char* value = 0;
+ int nbf;
+ int i;
+ nbf = atoi(header[0].value);
+ for (i = 1; i <= nbf; i++)
+ {
+ if (strcmp(header[i].name,name)==0)
+ value = header[i].value;
+ }
+ return value;
+}
+
+
+void changeValue(element_from_header* header, char* name, char* newValue)
+{
+ int i;
+ int nbf = atoi(header[0].value);
+
+ for (i = 1; i <= nbf; i++)
+ {
+ if (strcmp(header[i].name, name)==0)
+ {
+ header[i].value = realloc(header[i].value, (1+strlen(newValue))*sizeof(char));
+ strcpy(header[i].value, newValue);
+ }
+ }
+}
diff --git a/sumalibs/libfasta/fasta_header_handler.h b/sumalibs/libfasta/fasta_header_handler.h
new file mode 100644
index 0000000..e68b81a
--- /dev/null
+++ b/sumalibs/libfasta/fasta_header_handler.h
@@ -0,0 +1,23 @@
+
+#ifndef FASTA_HEADER_HANDLER_H_
+#define FASTA_HEADER_HANDLER_H_
+
+
+#include "sequence.h"
+
+
+char* char_header_add_field(char*,char*,char*);
+
+char* fastaSeqPtr_header_add_field(fastaSeqPtr seq, char* name, char* value);
+
+element_from_header* table_header_add_dic(element_from_header* header, char* name, struct hashtable *hashtab);
+
+element_from_header* table_header_add_field(element_from_header* header, char* name, char* value);
+
+void free_header_table(element_from_header*);
+
+char* getItemFromHeader(char*, element_from_header*);
+
+void changeValue(element_from_header* header, char* name, char* newValue);
+
+#endif
diff --git a/sumalibs/libfasta/fasta_header_parser.c b/sumalibs/libfasta/fasta_header_parser.c
new file mode 100644
index 0000000..ad18723
--- /dev/null
+++ b/sumalibs/libfasta/fasta_header_parser.c
@@ -0,0 +1,1960 @@
+
+#line 3 "<stdout>"
+
+#define YY_INT_ALIGNED short int
+
+/* A lexical scanner generated by flex */
+
+#define yy_create_buffer header_yy_create_buffer
+#define yy_delete_buffer header_yy_delete_buffer
+#define yy_flex_debug header_yy_flex_debug
+#define yy_init_buffer header_yy_init_buffer
+#define yy_flush_buffer header_yy_flush_buffer
+#define yy_load_buffer_state header_yy_load_buffer_state
+#define yy_switch_to_buffer header_yy_switch_to_buffer
+#define yyin header_yyin
+#define yyleng header_yyleng
+#define yylex header_yylex
+#define yylineno header_yylineno
+#define yyout header_yyout
+#define yyrestart header_yyrestart
+#define yytext header_yytext
+#define yywrap header_yywrap
+#define yyalloc header_yyalloc
+#define yyrealloc header_yyrealloc
+#define yyfree header_yyfree
+
+#define FLEX_SCANNER
+#define YY_FLEX_MAJOR_VERSION 2
+#define YY_FLEX_MINOR_VERSION 5
+#define YY_FLEX_SUBMINOR_VERSION 35
+#if YY_FLEX_SUBMINOR_VERSION > 0
+#define FLEX_BETA
+#endif
+
+/* First, we deal with platform-specific or compiler-specific issues. */
+
+/* begin standard C headers. */
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+
+/* end standard C headers. */
+
+/* flex integer type definitions */
+
+#ifndef FLEXINT_H
+#define FLEXINT_H
+
+/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
+
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+
+/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
+ * if you want the limit (max/min) macros for int types.
+ */
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS 1
+#endif
+
+#include <inttypes.h>
+typedef int8_t flex_int8_t;
+typedef uint8_t flex_uint8_t;
+typedef int16_t flex_int16_t;
+typedef uint16_t flex_uint16_t;
+typedef int32_t flex_int32_t;
+typedef uint32_t flex_uint32_t;
+typedef uint64_t flex_uint64_t;
+#else
+typedef signed char flex_int8_t;
+typedef short int flex_int16_t;
+typedef int flex_int32_t;
+typedef unsigned char flex_uint8_t;
+typedef unsigned short int flex_uint16_t;
+typedef unsigned int flex_uint32_t;
+#endif /* ! C99 */
+
+/* Limits of integral types. */
+#ifndef INT8_MIN
+#define INT8_MIN (-128)
+#endif
+#ifndef INT16_MIN
+#define INT16_MIN (-32767-1)
+#endif
+#ifndef INT32_MIN
+#define INT32_MIN (-2147483647-1)
+#endif
+#ifndef INT8_MAX
+#define INT8_MAX (127)
+#endif
+#ifndef INT16_MAX
+#define INT16_MAX (32767)
+#endif
+#ifndef INT32_MAX
+#define INT32_MAX (2147483647)
+#endif
+#ifndef UINT8_MAX
+#define UINT8_MAX (255U)
+#endif
+#ifndef UINT16_MAX
+#define UINT16_MAX (65535U)
+#endif
+#ifndef UINT32_MAX
+#define UINT32_MAX (4294967295U)
+#endif
+
+#endif /* ! FLEXINT_H */
+
+#ifdef __cplusplus
+
+/* The "const" storage-class-modifier is valid. */
+#define YY_USE_CONST
+
+#else /* ! __cplusplus */
+
+/* C99 requires __STDC__ to be defined as 1. */
+#if defined (__STDC__)
+
+#define YY_USE_CONST
+
+#endif /* defined (__STDC__) */
+#endif /* ! __cplusplus */
+
+#ifdef YY_USE_CONST
+#define yyconst const
+#else
+#define yyconst
+#endif
+
+/* Returned upon end-of-file. */
+#define YY_NULL 0
+
+/* Promotes a possibly negative, possibly signed char to an unsigned
+ * integer for use as an array index. If the signed char is negative,
+ * we want to instead treat it as an 8-bit unsigned char, hence the
+ * double cast.
+ */
+#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)
+
+/* Enter a start condition. This macro really ought to take a parameter,
+ * but we do it the disgusting crufty way forced on us by the ()-less
+ * definition of BEGIN.
+ */
+#define BEGIN (yy_start) = 1 + 2 *
+
+/* Translate the current start state into a value that can be later handed
+ * to BEGIN to return to the state. The YYSTATE alias is for lex
+ * compatibility.
+ */
+#define YY_START (((yy_start) - 1) / 2)
+#define YYSTATE YY_START
+
+/* Action number for EOF rule of a given start state. */
+#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
+
+/* Special action meaning "start processing a new file". */
+#define YY_NEW_FILE header_yyrestart(header_yyin )
+
+#define YY_END_OF_BUFFER_CHAR 0
+
+/* Size of default input buffer. */
+#ifndef YY_BUF_SIZE
+#define YY_BUF_SIZE 16384
+#endif
+
+/* The state buf must be large enough to hold one state per character in the main buffer.
+ */
+#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type))
+
+#ifndef YY_TYPEDEF_YY_BUFFER_STATE
+#define YY_TYPEDEF_YY_BUFFER_STATE
+typedef struct yy_buffer_state *YY_BUFFER_STATE;
+#endif
+
+#ifndef YY_TYPEDEF_YY_SIZE_T
+#define YY_TYPEDEF_YY_SIZE_T
+typedef size_t yy_size_t;
+#endif
+
+extern yy_size_t header_yyleng;
+
+extern FILE *header_yyin, *header_yyout;
+
+#define EOB_ACT_CONTINUE_SCAN 0
+#define EOB_ACT_END_OF_FILE 1
+#define EOB_ACT_LAST_MATCH 2
+
+ #define YY_LESS_LINENO(n)
+
+/* Return all but the first "n" matched characters back to the input stream. */
+#define yyless(n) \
+ do \
+ { \
+ /* Undo effects of setting up header_yytext. */ \
+ int yyless_macro_arg = (n); \
+ YY_LESS_LINENO(yyless_macro_arg);\
+ *yy_cp = (yy_hold_char); \
+ YY_RESTORE_YY_MORE_OFFSET \
+ (yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \
+ YY_DO_BEFORE_ACTION; /* set up header_yytext again */ \
+ } \
+ while ( 0 )
+
+#define unput(c) yyunput( c, (yytext_ptr) )
+
+#ifndef YY_STRUCT_YY_BUFFER_STATE
+#define YY_STRUCT_YY_BUFFER_STATE
+struct yy_buffer_state
+ {
+ FILE *yy_input_file;
+
+ char *yy_ch_buf; /* input buffer */
+ char *yy_buf_pos; /* current position in input buffer */
+
+ /* Size of input buffer in bytes, not including room for EOB
+ * characters.
+ */
+ yy_size_t yy_buf_size;
+
+ /* Number of characters read into yy_ch_buf, not including EOB
+ * characters.
+ */
+ yy_size_t yy_n_chars;
+
+ /* Whether we "own" the buffer - i.e., we know we created it,
+ * and can realloc() it to grow it, and should free() it to
+ * delete it.
+ */
+ int yy_is_our_buffer;
+
+ /* Whether this is an "interactive" input source; if so, and
+ * if we're using stdio for input, then we want to use getc()
+ * instead of fread(), to make sure we stop fetching input after
+ * each newline.
+ */
+ int yy_is_interactive;
+
+ /* Whether we're considered to be at the beginning of a line.
+ * If so, '^' rules will be active on the next match, otherwise
+ * not.
+ */
+ int yy_at_bol;
+
+ int yy_bs_lineno; /**< The line count. */
+ int yy_bs_column; /**< The column count. */
+
+ /* Whether to try to fill the input buffer when we reach the
+ * end of it.
+ */
+ int yy_fill_buffer;
+
+ int yy_buffer_status;
+
+#define YY_BUFFER_NEW 0
+#define YY_BUFFER_NORMAL 1
+ /* When an EOF's been seen but there's still some text to process
+ * then we mark the buffer as YY_EOF_PENDING, to indicate that we
+ * shouldn't try reading from the input source any more. We might
+ * still have a bunch of tokens to match, though, because of
+ * possible backing-up.
+ *
+ * When we actually see the EOF, we change the status to "new"
+ * (via header_yyrestart()), so that the user can continue scanning by
+ * just pointing header_yyin at a new input file.
+ */
+#define YY_BUFFER_EOF_PENDING 2
+
+ };
+#endif /* !YY_STRUCT_YY_BUFFER_STATE */
+
+/* Stack of input buffers. */
+static size_t yy_buffer_stack_top = 0; /**< index of top of stack. */
+static size_t yy_buffer_stack_max = 0; /**< capacity of stack. */
+static YY_BUFFER_STATE * yy_buffer_stack = 0; /**< Stack as an array. */
+
+/* We provide macros for accessing buffer states in case in the
+ * future we want to put the buffer states in a more general
+ * "scanner state".
+ *
+ * Returns the top of the stack, or NULL.
+ */
+#define YY_CURRENT_BUFFER ( (yy_buffer_stack) \
+ ? (yy_buffer_stack)[(yy_buffer_stack_top)] \
+ : NULL)
+
+/* Same as previous macro, but useful when we know that the buffer stack is not
+ * NULL or when we need an lvalue. For internal use only.
+ */
+#define YY_CURRENT_BUFFER_LVALUE (yy_buffer_stack)[(yy_buffer_stack_top)]
+
+/* yy_hold_char holds the character lost when header_yytext is formed. */
+static char yy_hold_char;
+static yy_size_t yy_n_chars; /* number of characters read into yy_ch_buf */
+yy_size_t header_yyleng;
+
+/* Points to current character in buffer. */
+static char *yy_c_buf_p = (char *) 0;
+static int yy_init = 0; /* whether we need to initialize */
+static int yy_start = 0; /* start state number */
+
+/* Flag which is used to allow header_yywrap()'s to do buffer switches
+ * instead of setting up a fresh header_yyin. A bit of a hack ...
+ */
+static int yy_did_buffer_switch_on_eof;
+
+void header_yyrestart (FILE *input_file );
+void header_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer );
+YY_BUFFER_STATE header_yy_create_buffer (FILE *file,int size );
+void header_yy_delete_buffer (YY_BUFFER_STATE b );
+void header_yy_flush_buffer (YY_BUFFER_STATE b );
+void header_yypush_buffer_state (YY_BUFFER_STATE new_buffer );
+void header_yypop_buffer_state (void );
+
+static void header_yyensure_buffer_stack (void );
+static void header_yy_load_buffer_state (void );
+static void header_yy_init_buffer (YY_BUFFER_STATE b,FILE *file );
+
+#define YY_FLUSH_BUFFER header_yy_flush_buffer(YY_CURRENT_BUFFER )
+
+YY_BUFFER_STATE header_yy_scan_buffer (char *base,yy_size_t size );
+YY_BUFFER_STATE header_yy_scan_string (yyconst char *yy_str );
+YY_BUFFER_STATE header_yy_scan_bytes (yyconst char *bytes,yy_size_t len );
+
+void *header_yyalloc (yy_size_t );
+void *header_yyrealloc (void *,yy_size_t );
+void header_yyfree (void * );
+
+#define yy_new_buffer header_yy_create_buffer
+
+#define yy_set_interactive(is_interactive) \
+ { \
+ if ( ! YY_CURRENT_BUFFER ){ \
+ header_yyensure_buffer_stack (); \
+ YY_CURRENT_BUFFER_LVALUE = \
+ header_yy_create_buffer(header_yyin,YY_BUF_SIZE ); \
+ } \
+ YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \
+ }
+
+#define yy_set_bol(at_bol) \
+ { \
+ if ( ! YY_CURRENT_BUFFER ){\
+ header_yyensure_buffer_stack (); \
+ YY_CURRENT_BUFFER_LVALUE = \
+ header_yy_create_buffer(header_yyin,YY_BUF_SIZE ); \
+ } \
+ YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \
+ }
+
+#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol)
+
+/* Begin user sect3 */
+
+typedef unsigned char YY_CHAR;
+
+FILE *header_yyin = (FILE *) 0, *header_yyout = (FILE *) 0;
+
+typedef int yy_state_type;
+
+extern int header_yylineno;
+
+int header_yylineno = 1;
+
+extern char *header_yytext;
+#define yytext_ptr header_yytext
+
+static yy_state_type yy_get_previous_state (void );
+static yy_state_type yy_try_NUL_trans (yy_state_type current_state );
+static int yy_get_next_buffer (void );
+static void yy_fatal_error (yyconst char msg[] );
+
+/* Done after the current pattern has been matched and before the
+ * corresponding action - sets up header_yytext.
+ */
+#define YY_DO_BEFORE_ACTION \
+ (yytext_ptr) = yy_bp; \
+ header_yyleng = (yy_size_t) (yy_cp - yy_bp); \
+ (yy_hold_char) = *yy_cp; \
+ *yy_cp = '\0'; \
+ (yy_c_buf_p) = yy_cp;
+
+#define YY_NUM_RULES 12
+#define YY_END_OF_BUFFER 13
+/* This struct is not used in this scanner,
+ but its presence is necessary. */
+struct yy_trans_info
+ {
+ flex_int32_t yy_verify;
+ flex_int32_t yy_nxt;
+ };
+static yyconst flex_int16_t yy_accept[29] =
+ { 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 13, 2,
+ 3, 1, 12, 4, 5, 7, 6, 8, 9, 10,
+ 11, 2, 3, 4, 5, 8, 9, 0
+ } ;
+
+static yyconst flex_int32_t yy_ec[256] =
+ { 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 2, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 3, 1,
+ 4, 5, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1
+ } ;
+
+static yyconst flex_int32_t yy_meta[6] =
+ { 0,
+ 1, 2, 3, 3, 4
+ } ;
+
+static yyconst flex_int16_t yy_base[36] =
+ { 0,
+ 0, 4, 23, 0, 9, 0, 14, 0, 27, 0,
+ 0, 29, 29, 0, 0, 29, 29, 0, 0, 29,
+ 29, 0, 0, 0, 0, 0, 0, 29, 19, 23,
+ 23, 7, 5, 3, 1
+ } ;
+
+static yyconst flex_int16_t yy_def[36] =
+ { 0,
+ 29, 29, 2, 3, 28, 5, 28, 7, 28, 30,
+ 31, 28, 28, 32, 33, 28, 28, 34, 35, 28,
+ 28, 30, 31, 32, 33, 34, 35, 0, 28, 28,
+ 28, 28, 28, 28, 28
+ } ;
+
+static yyconst flex_int16_t yy_nxt[35] =
+ { 0,
+ 28, 11, 27, 26, 12, 11, 25, 24, 12, 14,
+ 15, 16, 17, 13, 18, 19, 20, 21, 13, 10,
+ 10, 10, 10, 22, 23, 22, 28, 13, 9, 28,
+ 28, 28, 28, 28
+ } ;
+
+static yyconst flex_int16_t yy_chk[35] =
+ { 0,
+ 0, 1, 35, 34, 1, 2, 33, 32, 2, 5,
+ 5, 5, 5, 5, 7, 7, 7, 7, 7, 29,
+ 29, 29, 29, 30, 31, 30, 9, 3, 28, 28,
+ 28, 28, 28, 28
+ } ;
+
+static yy_state_type yy_last_accepting_state;
+static char *yy_last_accepting_cpos;
+
+extern int header_yy_flex_debug;
+int header_yy_flex_debug = 0;
+
+/* The intent behind this definition is that it'll catch
+ * any uses of REJECT which flex missed.
+ */
+#define REJECT reject_used_but_not_detected
+#define yymore() yymore_used_but_not_detected
+#define YY_MORE_ADJ 0
+#define YY_RESTORE_YY_MORE_OFFSET
+char *header_yytext;
+#line 1 "fasta_header_parser.l"
+/*
+ * Add -ll in Makefile if you modify this file to convert to .c
+ */
+
+
+
+#line 10 "fasta_header_parser.l"
+
+#include <stdlib.h>
+#include <string.h>
+#include "header_mem_handler.h"
+#include "fasta_header_handler.h"
+
+#define MEMALLOCATED 10
+#define BUFFER 5
+
+#define YY_DECL int header_parser(int *nbf, int *memory_allocated, element_from_header **p_header)
+
+
+#line 502 "<stdout>"
+
+#define INITIAL 0
+#define REGID 1
+#define REGNAME 2
+#define REGVAL 3
+
+#ifndef YY_NO_UNISTD_H
+/* Special case for "unistd.h", since it is non-ANSI. We include it way
+ * down here because we want the user's section 1 to have been scanned first.
+ * The user has a chance to override it with an option.
+ */
+#include <unistd.h>
+#endif
+
+#ifndef YY_EXTRA_TYPE
+#define YY_EXTRA_TYPE void *
+#endif
+
+static int yy_init_globals (void );
+
+/* Accessor methods to globals.
+ These are made visible to non-reentrant scanners for convenience. */
+
+int header_yylex_destroy (void );
+
+int header_yyget_debug (void );
+
+void header_yyset_debug (int debug_flag );
+
+YY_EXTRA_TYPE header_yyget_extra (void );
+
+void header_yyset_extra (YY_EXTRA_TYPE user_defined );
+
+FILE *header_yyget_in (void );
+
+void header_yyset_in (FILE * in_str );
+
+FILE *header_yyget_out (void );
+
+void header_yyset_out (FILE * out_str );
+
+yy_size_t header_yyget_leng (void );
+
+char *header_yyget_text (void );
+
+int header_yyget_lineno (void );
+
+void header_yyset_lineno (int line_number );
+
+/* Macros after this point can all be overridden by user definitions in
+ * section 1.
+ */
+
+#ifndef YY_SKIP_YYWRAP
+#ifdef __cplusplus
+extern "C" int header_yywrap (void );
+#else
+extern int header_yywrap (void );
+#endif
+#endif
+
+ static void yyunput (int c,char *buf_ptr );
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy (char *,yyconst char *,int );
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen (yyconst char * );
+#endif
+
+#ifndef YY_NO_INPUT
+
+#ifdef __cplusplus
+static int yyinput (void );
+#else
+static int input (void );
+#endif
+
+#endif
+
+/* Amount of stuff to slurp up with each read. */
+#ifndef YY_READ_BUF_SIZE
+#define YY_READ_BUF_SIZE 8192
+#endif
+
+/* Copy whatever the last rule matched to the standard output. */
+#ifndef ECHO
+/* This used to be an fputs(), but since the string might contain NUL's,
+ * we now use fwrite().
+ */
+#define ECHO fwrite( header_yytext, header_yyleng, 1, header_yyout )
+#endif
+
+/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL,
+ * is returned in "result".
+ */
+#ifndef YY_INPUT
+#define YY_INPUT(buf,result,max_size) \
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
+ { \
+ int c = '*'; \
+ yy_size_t n; \
+ for ( n = 0; n < max_size && \
+ (c = getc( header_yyin )) != EOF && c != '\n'; ++n ) \
+ buf[n] = (char) c; \
+ if ( c == '\n' ) \
+ buf[n++] = (char) c; \
+ if ( c == EOF && ferror( header_yyin ) ) \
+ YY_FATAL_ERROR( "input in flex scanner failed" ); \
+ result = n; \
+ } \
+ else \
+ { \
+ errno=0; \
+ while ( (result = fread(buf, 1, max_size, header_yyin))==0 && ferror(header_yyin)) \
+ { \
+ if( errno != EINTR) \
+ { \
+ YY_FATAL_ERROR( "input in flex scanner failed" ); \
+ break; \
+ } \
+ errno=0; \
+ clearerr(header_yyin); \
+ } \
+ }\
+\
+
+#endif
+
+/* No semi-colon after return; correct usage is to write "yyterminate();" -
+ * we don't want an extra ';' after the "return" because that will cause
+ * some compilers to complain about unreachable statements.
+ */
+#ifndef yyterminate
+#define yyterminate() return YY_NULL
+#endif
+
+/* Number of entries by which start-condition stack grows. */
+#ifndef YY_START_STACK_INCR
+#define YY_START_STACK_INCR 25
+#endif
+
+/* Report a fatal error. */
+#ifndef YY_FATAL_ERROR
+#define YY_FATAL_ERROR(msg) yy_fatal_error( msg )
+#endif
+
+/* end tables serialization structures and prototypes */
+
+/* Default declaration of generated scanner - a define so the user can
+ * easily add parameters.
+ */
+#ifndef YY_DECL
+#define YY_DECL_IS_OURS 1
+
+extern int header_yylex (void);
+
+#define YY_DECL int header_yylex (void)
+#endif /* !YY_DECL */
+
+/* Code executed at the beginning of each rule, after header_yytext and header_yyleng
+ * have been set up.
+ */
+#ifndef YY_USER_ACTION
+#define YY_USER_ACTION
+#endif
+
+/* Code executed at the end of each rule. */
+#ifndef YY_BREAK
+#define YY_BREAK break;
+#endif
+
+#define YY_RULE_SETUP \
+ YY_USER_ACTION
+
+/** The main scanner function which does all the work.
+ */
+YY_DECL
+{
+ register yy_state_type yy_current_state;
+ register char *yy_cp, *yy_bp;
+ register int yy_act;
+
+#line 32 "fasta_header_parser.l"
+
+
+ int i;
+ int size_needed;
+ int free_size;
+ char* field;
+
+
+#line 696 "<stdout>"
+
+ if ( !(yy_init) )
+ {
+ (yy_init) = 1;
+
+#ifdef YY_USER_INIT
+ YY_USER_INIT;
+#endif
+
+ if ( ! (yy_start) )
+ (yy_start) = 1; /* first start state */
+
+ if ( ! header_yyin )
+ header_yyin = stdin;
+
+ if ( ! header_yyout )
+ header_yyout = stdout;
+
+ if ( ! YY_CURRENT_BUFFER ) {
+ header_yyensure_buffer_stack ();
+ YY_CURRENT_BUFFER_LVALUE =
+ header_yy_create_buffer(header_yyin,YY_BUF_SIZE );
+ }
+
+ header_yy_load_buffer_state( );
+ }
+
+ while ( 1 ) /* loops until end-of-file is reached */
+ {
+ yy_cp = (yy_c_buf_p);
+
+ /* Support of header_yytext. */
+ *yy_cp = (yy_hold_char);
+
+ /* yy_bp points to the position in yy_ch_buf of the start of
+ * the current run.
+ */
+ yy_bp = yy_cp;
+
+ yy_current_state = (yy_start);
+yy_match:
+ do
+ {
+ register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)];
+ if ( yy_accept[yy_current_state] )
+ {
+ (yy_last_accepting_state) = yy_current_state;
+ (yy_last_accepting_cpos) = yy_cp;
+ }
+ while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
+ {
+ yy_current_state = (int) yy_def[yy_current_state];
+ if ( yy_current_state >= 29 )
+ yy_c = yy_meta[(unsigned int) yy_c];
+ }
+ yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
+ ++yy_cp;
+ }
+ while ( yy_base[yy_current_state] != 29 );
+
+yy_find_action:
+ yy_act = yy_accept[yy_current_state];
+ if ( yy_act == 0 )
+ { /* have to back up */
+ yy_cp = (yy_last_accepting_cpos);
+ yy_current_state = (yy_last_accepting_state);
+ yy_act = yy_accept[yy_current_state];
+ }
+
+ YY_DO_BEFORE_ACTION;
+
+do_action: /* This label is used only to access EOF actions. */
+
+ switch ( yy_act )
+ { /* beginning of action switch */
+ case 0: /* must back up */
+ /* undo the effects of YY_DO_BEFORE_ACTION */
+ *yy_cp = (yy_hold_char);
+ yy_cp = (yy_last_accepting_cpos);
+ yy_current_state = (yy_last_accepting_state);
+ goto yy_find_action;
+
+case 1:
+YY_RULE_SETUP
+#line 40 "fasta_header_parser.l"
+{
+ /*printf("\n<INITIAL>{SUP},%s",header_yytext);*/
+ BEGIN(REGID);
+ }
+ YY_BREAK
+case 2:
+/* rule 2 can match eol */
+YY_RULE_SETUP
+#line 45 "fasta_header_parser.l"
+{
+ i=0;
+
+ field = malloc_field(&free_size);
+ (*p_header)[*nbf].name = (char*) malloc(3*sizeof(char));
+ strcpy(((*p_header)[*nbf]).name,"id");
+
+ size_needed = strlen(header_yytext)+1;
+ (*p_header)[*nbf].value = (char*) malloc(sizeof(char)*size_needed);
+ strcpy(((*p_header)[*nbf]).value,header_yytext);
+
+ (*nbf)++;
+ }
+ YY_BREAK
+case 3:
+YY_RULE_SETUP
+#line 60 "fasta_header_parser.l"
+{
+ BEGIN(REGNAME);
+ }
+ YY_BREAK
+case 4:
+/* rule 4 can match eol */
+YY_RULE_SETUP
+#line 64 "fasta_header_parser.l"
+{
+ /*fprintf(stderr,"\n<REGNAME>{WORD} **%s**",header_yytext);*/
+ field = store_in_field(field,header_yytext,&free_size,&i);
+ }
+ YY_BREAK
+case 5:
+YY_RULE_SETUP
+#line 69 "fasta_header_parser.l"
+{
+ /*fprintf(stderr,"\n<REGNAME>{SPACE} **%s**",header_yytext);*/
+ if (i != 0)
+ field = store_in_field(field,header_yytext,&free_size,&i);
+ }
+ YY_BREAK
+case 6:
+YY_RULE_SETUP
+#line 75 "fasta_header_parser.l"
+{
+ /*fprintf(stderr,"\n<REGNAME>{EQUAL},%s",header_yytext);*/
+ field = store_in_header_table(field, &((*p_header)[*nbf].name), &free_size, &i);
+ BEGIN(REGVAL);
+ }
+ YY_BREAK
+case 7:
+YY_RULE_SETUP
+#line 81 "fasta_header_parser.l"
+{
+ /*fprintf(stderr,"\n<REGNAME>{SEP},%s",header_yytext);*/
+ (*p_header)[*nbf].name = (char*) malloc(19*sizeof(char));
+ strcpy((*p_header)[*nbf].name,"definition");
+ field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i);
+ p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated);
+ BEGIN(REGNAME);
+ }
+ YY_BREAK
+case 8:
+/* rule 8 can match eol */
+YY_RULE_SETUP
+#line 90 "fasta_header_parser.l"
+{
+ /*fprintf(stderr,"\n<REGVAL>{WORD} **%s**\n",header_yytext);*/
+ field = store_in_field(field,header_yytext,&free_size,&i);
+ }
+ YY_BREAK
+case 9:
+YY_RULE_SETUP
+#line 95 "fasta_header_parser.l"
+{
+ /*fprintf(stderr,"\n<REGVAL>{SPACE} **%s**\n",header_yytext);*/
+ field = store_in_field(field,header_yytext,&free_size,&i);
+ }
+ YY_BREAK
+case 10:
+YY_RULE_SETUP
+#line 100 "fasta_header_parser.l"
+{
+ /*fprintf(stderr,"\n<REGVAL>{SEP},%s\n",header_yytext);*/
+
+ field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i);
+ p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated);
+ BEGIN(REGNAME);
+ }
+ YY_BREAK
+case 11:
+YY_RULE_SETUP
+#line 109 "fasta_header_parser.l"
+{
+ /*fprintf(stderr, "\nWarning : separator ';' probably missing in header after %s",(*p_header)[*nbf].name);*/
+ }
+ YY_BREAK
+case YY_STATE_EOF(REGVAL):
+#line 113 "fasta_header_parser.l"
+{
+ field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i);
+ p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated);
+ end_header_table(p_header, *nbf);
+
+ free(field);
+ BEGIN(INITIAL);
+ return 0;
+ }
+ YY_BREAK
+case YY_STATE_EOF(REGNAME):
+#line 123 "fasta_header_parser.l"
+{
+ /*(*p_header)[*nbf].name = (char*) malloc(sizeof(char)*19);
+ strcpy((*p_header)[*nbf].name,"other_informations");
+ field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i);
+ p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated);
+ */
+ end_header_table(p_header, *nbf);
+
+ free(field);
+ BEGIN(INITIAL);
+ return 0;
+ }
+ YY_BREAK
+case 12:
+YY_RULE_SETUP
+#line 136 "fasta_header_parser.l"
+ECHO;
+ YY_BREAK
+#line 918 "<stdout>"
+case YY_STATE_EOF(INITIAL):
+case YY_STATE_EOF(REGID):
+ yyterminate();
+
+ case YY_END_OF_BUFFER:
+ {
+ /* Amount of text matched not including the EOB char. */
+ int yy_amount_of_matched_text = (int) (yy_cp - (yytext_ptr)) - 1;
+
+ /* Undo the effects of YY_DO_BEFORE_ACTION. */
+ *yy_cp = (yy_hold_char);
+ YY_RESTORE_YY_MORE_OFFSET
+
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW )
+ {
+ /* We're scanning a new file or input source. It's
+ * possible that this happened because the user
+ * just pointed header_yyin at a new source and called
+ * header_yylex(). If so, then we have to assure
+ * consistency between YY_CURRENT_BUFFER and our
+ * globals. Here is the right place to do so, because
+ * this is the first action (other than possibly a
+ * back-up) that will match for the new input source.
+ */
+ (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
+ YY_CURRENT_BUFFER_LVALUE->yy_input_file = header_yyin;
+ YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL;
+ }
+
+ /* Note that here we test for yy_c_buf_p "<=" to the position
+ * of the first EOB in the buffer, since yy_c_buf_p will
+ * already have been incremented past the NUL character
+ * (since all states make transitions on EOB to the
+ * end-of-buffer state). Contrast this with the test
+ * in input().
+ */
+ if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
+ { /* This was really a NUL. */
+ yy_state_type yy_next_state;
+
+ (yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text;
+
+ yy_current_state = yy_get_previous_state( );
+
+ /* Okay, we're now positioned to make the NUL
+ * transition. We couldn't have
+ * yy_get_previous_state() go ahead and do it
+ * for us because it doesn't know how to deal
+ * with the possibility of jamming (and we don't
+ * want to build jamming into it because then it
+ * will run more slowly).
+ */
+
+ yy_next_state = yy_try_NUL_trans( yy_current_state );
+
+ yy_bp = (yytext_ptr) + YY_MORE_ADJ;
+
+ if ( yy_next_state )
+ {
+ /* Consume the NUL. */
+ yy_cp = ++(yy_c_buf_p);
+ yy_current_state = yy_next_state;
+ goto yy_match;
+ }
+
+ else
+ {
+ yy_cp = (yy_c_buf_p);
+ goto yy_find_action;
+ }
+ }
+
+ else switch ( yy_get_next_buffer( ) )
+ {
+ case EOB_ACT_END_OF_FILE:
+ {
+ (yy_did_buffer_switch_on_eof) = 0;
+
+ if ( header_yywrap( ) )
+ {
+ /* Note: because we've taken care in
+ * yy_get_next_buffer() to have set up
+ * header_yytext, we can now set up
+ * yy_c_buf_p so that if some total
+ * hoser (like flex itself) wants to
+ * call the scanner after we return the
+ * YY_NULL, it'll still work - another
+ * YY_NULL will get returned.
+ */
+ (yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ;
+
+ yy_act = YY_STATE_EOF(YY_START);
+ goto do_action;
+ }
+
+ else
+ {
+ if ( ! (yy_did_buffer_switch_on_eof) )
+ YY_NEW_FILE;
+ }
+ break;
+ }
+
+ case EOB_ACT_CONTINUE_SCAN:
+ (yy_c_buf_p) =
+ (yytext_ptr) + yy_amount_of_matched_text;
+
+ yy_current_state = yy_get_previous_state( );
+
+ yy_cp = (yy_c_buf_p);
+ yy_bp = (yytext_ptr) + YY_MORE_ADJ;
+ goto yy_match;
+
+ case EOB_ACT_LAST_MATCH:
+ (yy_c_buf_p) =
+ &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)];
+
+ yy_current_state = yy_get_previous_state( );
+
+ yy_cp = (yy_c_buf_p);
+ yy_bp = (yytext_ptr) + YY_MORE_ADJ;
+ goto yy_find_action;
+ }
+ break;
+ }
+
+ default:
+ YY_FATAL_ERROR(
+ "fatal flex scanner internal error--no action found" );
+ } /* end of action switch */
+ } /* end of scanning one token */
+} /* end of header_yylex */
+
+/* yy_get_next_buffer - try to read in a new buffer
+ *
+ * Returns a code representing an action:
+ * EOB_ACT_LAST_MATCH -
+ * EOB_ACT_CONTINUE_SCAN - continue scanning from current position
+ * EOB_ACT_END_OF_FILE - end of file
+ */
+static int yy_get_next_buffer (void)
+{
+ register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
+ register char *source = (yytext_ptr);
+ register int number_to_move, i;
+ int ret_val;
+
+ if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] )
+ YY_FATAL_ERROR(
+ "fatal flex scanner internal error--end of buffer missed" );
+
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 )
+ { /* Don't try to fill the buffer, so this is an EOF. */
+ if ( (yy_c_buf_p) - (yytext_ptr) - YY_MORE_ADJ == 1 )
+ {
+ /* We matched a single character, the EOB, so
+ * treat this as a final EOF.
+ */
+ return EOB_ACT_END_OF_FILE;
+ }
+
+ else
+ {
+ /* We matched some text prior to the EOB, first
+ * process it.
+ */
+ return EOB_ACT_LAST_MATCH;
+ }
+ }
+
+ /* Try to read more data. */
+
+ /* First move last chars to start of buffer. */
+ number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr)) - 1;
+
+ for ( i = 0; i < number_to_move; ++i )
+ *(dest++) = *(source++);
+
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING )
+ /* don't do the read, it's not guaranteed to return an EOF,
+ * just force an EOF
+ */
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0;
+
+ else
+ {
+ yy_size_t num_to_read =
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
+
+ while ( num_to_read <= 0 )
+ { /* Not enough room in the buffer - grow it. */
+
+ /* just a shorter name for the current buffer */
+ YY_BUFFER_STATE b = YY_CURRENT_BUFFER;
+
+ int yy_c_buf_p_offset =
+ (int) ((yy_c_buf_p) - b->yy_ch_buf);
+
+ if ( b->yy_is_our_buffer )
+ {
+ yy_size_t new_size = b->yy_buf_size * 2;
+
+ if ( new_size <= 0 )
+ b->yy_buf_size += b->yy_buf_size / 8;
+ else
+ b->yy_buf_size *= 2;
+
+ b->yy_ch_buf = (char *)
+ /* Include room in for 2 EOB chars. */
+ header_yyrealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 );
+ }
+ else
+ /* Can't grow it, we don't own it. */
+ b->yy_ch_buf = 0;
+
+ if ( ! b->yy_ch_buf )
+ YY_FATAL_ERROR(
+ "fatal error - scanner input buffer overflow" );
+
+ (yy_c_buf_p) = &b->yy_ch_buf[yy_c_buf_p_offset];
+
+ num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size -
+ number_to_move - 1;
+
+ }
+
+ if ( num_to_read > YY_READ_BUF_SIZE )
+ num_to_read = YY_READ_BUF_SIZE;
+
+ /* Read in more data. */
+ YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
+ (yy_n_chars), num_to_read );
+
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
+ }
+
+ if ( (yy_n_chars) == 0 )
+ {
+ if ( number_to_move == YY_MORE_ADJ )
+ {
+ ret_val = EOB_ACT_END_OF_FILE;
+ header_yyrestart(header_yyin );
+ }
+
+ else
+ {
+ ret_val = EOB_ACT_LAST_MATCH;
+ YY_CURRENT_BUFFER_LVALUE->yy_buffer_status =
+ YY_BUFFER_EOF_PENDING;
+ }
+ }
+
+ else
+ ret_val = EOB_ACT_CONTINUE_SCAN;
+
+ if ((yy_size_t) ((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
+ /* Extend the array by 50%, plus the number we really need. */
+ yy_size_t new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1);
+ YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) header_yyrealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size );
+ if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" );
+ }
+
+ (yy_n_chars) += number_to_move;
+ YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR;
+ YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR;
+
+ (yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0];
+
+ return ret_val;
+}
+
+/* yy_get_previous_state - get the state just before the EOB char was reached */
+
+ static yy_state_type yy_get_previous_state (void)
+{
+ register yy_state_type yy_current_state;
+ register char *yy_cp;
+
+ yy_current_state = (yy_start);
+
+ for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp )
+ {
+ register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1);
+ if ( yy_accept[yy_current_state] )
+ {
+ (yy_last_accepting_state) = yy_current_state;
+ (yy_last_accepting_cpos) = yy_cp;
+ }
+ while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
+ {
+ yy_current_state = (int) yy_def[yy_current_state];
+ if ( yy_current_state >= 29 )
+ yy_c = yy_meta[(unsigned int) yy_c];
+ }
+ yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
+ }
+
+ return yy_current_state;
+}
+
+/* yy_try_NUL_trans - try to make a transition on the NUL character
+ *
+ * synopsis
+ * next_state = yy_try_NUL_trans( current_state );
+ */
+ static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state )
+{
+ register int yy_is_jam;
+ register char *yy_cp = (yy_c_buf_p);
+
+ register YY_CHAR yy_c = 1;
+ if ( yy_accept[yy_current_state] )
+ {
+ (yy_last_accepting_state) = yy_current_state;
+ (yy_last_accepting_cpos) = yy_cp;
+ }
+ while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
+ {
+ yy_current_state = (int) yy_def[yy_current_state];
+ if ( yy_current_state >= 29 )
+ yy_c = yy_meta[(unsigned int) yy_c];
+ }
+ yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
+ yy_is_jam = (yy_current_state == 28);
+
+ return yy_is_jam ? 0 : yy_current_state;
+}
+
+ static void yyunput (int c, register char * yy_bp )
+{
+ register char *yy_cp;
+
+ yy_cp = (yy_c_buf_p);
+
+ /* undo effects of setting up header_yytext */
+ *yy_cp = (yy_hold_char);
+
+ if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 )
+ { /* need to shift things up to make room */
+ /* +2 for EOB chars. */
+ register yy_size_t number_to_move = (yy_n_chars) + 2;
+ register char *dest = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_size + 2];
+ register char *source =
+ &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move];
+
+ while ( source > YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
+ *--dest = *--source;
+
+ yy_cp += (int) (dest - source);
+ yy_bp += (int) (dest - source);
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars =
+ (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_buf_size;
+
+ if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 )
+ YY_FATAL_ERROR( "flex scanner push-back overflow" );
+ }
+
+ *--yy_cp = (char) c;
+
+ (yytext_ptr) = yy_bp;
+ (yy_hold_char) = *yy_cp;
+ (yy_c_buf_p) = yy_cp;
+}
+
+#ifndef YY_NO_INPUT
+#ifdef __cplusplus
+ static int yyinput (void)
+#else
+ static int input (void)
+#endif
+
+{
+ int c;
+
+ *(yy_c_buf_p) = (yy_hold_char);
+
+ if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR )
+ {
+ /* yy_c_buf_p now points to the character we want to return.
+ * If this occurs *before* the EOB characters, then it's a
+ * valid NUL; if not, then we've hit the end of the buffer.
+ */
+ if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
+ /* This was really a NUL. */
+ *(yy_c_buf_p) = '\0';
+
+ else
+ { /* need more input */
+ yy_size_t offset = (yy_c_buf_p) - (yytext_ptr);
+ ++(yy_c_buf_p);
+
+ switch ( yy_get_next_buffer( ) )
+ {
+ case EOB_ACT_LAST_MATCH:
+ /* This happens because yy_g_n_b()
+ * sees that we've accumulated a
+ * token and flags that we need to
+ * try matching the token before
+ * proceeding. But for input(),
+ * there's no matching to consider.
+ * So convert the EOB_ACT_LAST_MATCH
+ * to EOB_ACT_END_OF_FILE.
+ */
+
+ /* Reset buffer status. */
+ header_yyrestart(header_yyin );
+
+ /*FALLTHROUGH*/
+
+ case EOB_ACT_END_OF_FILE:
+ {
+ if ( header_yywrap( ) )
+ return 0;
+
+ if ( ! (yy_did_buffer_switch_on_eof) )
+ YY_NEW_FILE;
+#ifdef __cplusplus
+ return yyinput();
+#else
+ return input();
+#endif
+ }
+
+ case EOB_ACT_CONTINUE_SCAN:
+ (yy_c_buf_p) = (yytext_ptr) + offset;
+ break;
+ }
+ }
+ }
+
+ c = *(unsigned char *) (yy_c_buf_p); /* cast for 8-bit char's */
+ *(yy_c_buf_p) = '\0'; /* preserve header_yytext */
+ (yy_hold_char) = *++(yy_c_buf_p);
+
+ return c;
+}
+#endif /* ifndef YY_NO_INPUT */
+
+/** Immediately switch to a different input stream.
+ * @param input_file A readable stream.
+ *
+ * @note This function does not reset the start condition to @c INITIAL .
+ */
+ void header_yyrestart (FILE * input_file )
+{
+
+ if ( ! YY_CURRENT_BUFFER ){
+ header_yyensure_buffer_stack ();
+ YY_CURRENT_BUFFER_LVALUE =
+ header_yy_create_buffer(header_yyin,YY_BUF_SIZE );
+ }
+
+ header_yy_init_buffer(YY_CURRENT_BUFFER,input_file );
+ header_yy_load_buffer_state( );
+}
+
+/** Switch to a different input buffer.
+ * @param new_buffer The new input buffer.
+ *
+ */
+ void header_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer )
+{
+
+ /* TODO. We should be able to replace this entire function body
+ * with
+ * header_yypop_buffer_state();
+ * header_yypush_buffer_state(new_buffer);
+ */
+ header_yyensure_buffer_stack ();
+ if ( YY_CURRENT_BUFFER == new_buffer )
+ return;
+
+ if ( YY_CURRENT_BUFFER )
+ {
+ /* Flush out information for old buffer. */
+ *(yy_c_buf_p) = (yy_hold_char);
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
+ }
+
+ YY_CURRENT_BUFFER_LVALUE = new_buffer;
+ header_yy_load_buffer_state( );
+
+ /* We don't actually know whether we did this switch during
+ * EOF (header_yywrap()) processing, but the only time this flag
+ * is looked at is after header_yywrap() is called, so it's safe
+ * to go ahead and always set it.
+ */
+ (yy_did_buffer_switch_on_eof) = 1;
+}
+
+static void header_yy_load_buffer_state (void)
+{
+ (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
+ (yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
+ header_yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
+ (yy_hold_char) = *(yy_c_buf_p);
+}
+
+/** Allocate and initialize an input buffer state.
+ * @param file A readable stream.
+ * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE.
+ *
+ * @return the allocated buffer state.
+ */
+ YY_BUFFER_STATE header_yy_create_buffer (FILE * file, int size )
+{
+ YY_BUFFER_STATE b;
+
+ b = (YY_BUFFER_STATE) header_yyalloc(sizeof( struct yy_buffer_state ) );
+ if ( ! b )
+ YY_FATAL_ERROR( "out of dynamic memory in header_yy_create_buffer()" );
+
+ b->yy_buf_size = size;
+
+ /* yy_ch_buf has to be 2 characters longer than the size given because
+ * we need to put in 2 end-of-buffer characters.
+ */
+ b->yy_ch_buf = (char *) header_yyalloc(b->yy_buf_size + 2 );
+ if ( ! b->yy_ch_buf )
+ YY_FATAL_ERROR( "out of dynamic memory in header_yy_create_buffer()" );
+
+ b->yy_is_our_buffer = 1;
+
+ header_yy_init_buffer(b,file );
+
+ return b;
+}
+
+/** Destroy the buffer.
+ * @param b a buffer created with header_yy_create_buffer()
+ *
+ */
+ void header_yy_delete_buffer (YY_BUFFER_STATE b )
+{
+
+ if ( ! b )
+ return;
+
+ if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */
+ YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
+
+ if ( b->yy_is_our_buffer )
+ header_yyfree((void *) b->yy_ch_buf );
+
+ header_yyfree((void *) b );
+}
+
+#ifndef __cplusplus
+extern int isatty (int );
+#endif /* __cplusplus */
+
+/* Initializes or reinitializes a buffer.
+ * This function is sometimes called more than once on the same buffer,
+ * such as during a header_yyrestart() or at EOF.
+ */
+ static void header_yy_init_buffer (YY_BUFFER_STATE b, FILE * file )
+
+{
+ int oerrno = errno;
+
+ header_yy_flush_buffer(b );
+
+ b->yy_input_file = file;
+ b->yy_fill_buffer = 1;
+
+ /* If b is the current buffer, then header_yy_init_buffer was _probably_
+ * called from header_yyrestart() or through yy_get_next_buffer.
+ * In that case, we don't want to reset the lineno or column.
+ */
+ if (b != YY_CURRENT_BUFFER){
+ b->yy_bs_lineno = 1;
+ b->yy_bs_column = 0;
+ }
+
+ b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0;
+
+ errno = oerrno;
+}
+
+/** Discard all buffered characters. On the next scan, YY_INPUT will be called.
+ * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
+ *
+ */
+ void header_yy_flush_buffer (YY_BUFFER_STATE b )
+{
+ if ( ! b )
+ return;
+
+ b->yy_n_chars = 0;
+
+ /* We always need two end-of-buffer characters. The first causes
+ * a transition to the end-of-buffer state. The second causes
+ * a jam in that state.
+ */
+ b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
+ b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
+
+ b->yy_buf_pos = &b->yy_ch_buf[0];
+
+ b->yy_at_bol = 1;
+ b->yy_buffer_status = YY_BUFFER_NEW;
+
+ if ( b == YY_CURRENT_BUFFER )
+ header_yy_load_buffer_state( );
+}
+
+/** Pushes the new state onto the stack. The new state becomes
+ * the current state. This function will allocate the stack
+ * if necessary.
+ * @param new_buffer The new state.
+ *
+ */
+void header_yypush_buffer_state (YY_BUFFER_STATE new_buffer )
+{
+ if (new_buffer == NULL)
+ return;
+
+ header_yyensure_buffer_stack();
+
+ /* This block is copied from header_yy_switch_to_buffer. */
+ if ( YY_CURRENT_BUFFER )
+ {
+ /* Flush out information for old buffer. */
+ *(yy_c_buf_p) = (yy_hold_char);
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
+ }
+
+ /* Only push if top exists. Otherwise, replace top. */
+ if (YY_CURRENT_BUFFER)
+ (yy_buffer_stack_top)++;
+ YY_CURRENT_BUFFER_LVALUE = new_buffer;
+
+ /* copied from header_yy_switch_to_buffer. */
+ header_yy_load_buffer_state( );
+ (yy_did_buffer_switch_on_eof) = 1;
+}
+
+/** Removes and deletes the top of the stack, if present.
+ * The next element becomes the new top.
+ *
+ */
+void header_yypop_buffer_state (void)
+{
+ if (!YY_CURRENT_BUFFER)
+ return;
+
+ header_yy_delete_buffer(YY_CURRENT_BUFFER );
+ YY_CURRENT_BUFFER_LVALUE = NULL;
+ if ((yy_buffer_stack_top) > 0)
+ --(yy_buffer_stack_top);
+
+ if (YY_CURRENT_BUFFER) {
+ header_yy_load_buffer_state( );
+ (yy_did_buffer_switch_on_eof) = 1;
+ }
+}
+
+/* Allocates the stack if it does not exist.
+ * Guarantees space for at least one push.
+ */
+static void header_yyensure_buffer_stack (void)
+{
+ yy_size_t num_to_alloc;
+
+ if (!(yy_buffer_stack)) {
+
+ /* First allocation is just for 2 elements, since we don't know if this
+ * scanner will even need a stack. We use 2 instead of 1 to avoid an
+ * immediate realloc on the next call.
+ */
+ num_to_alloc = 1;
+ (yy_buffer_stack) = (struct yy_buffer_state**)header_yyalloc
+ (num_to_alloc * sizeof(struct yy_buffer_state*)
+ );
+ if ( ! (yy_buffer_stack) )
+ YY_FATAL_ERROR( "out of dynamic memory in header_yyensure_buffer_stack()" );
+
+ memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*));
+
+ (yy_buffer_stack_max) = num_to_alloc;
+ (yy_buffer_stack_top) = 0;
+ return;
+ }
+
+ if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){
+
+ /* Increase the buffer to prepare for a possible push. */
+ int grow_size = 8 /* arbitrary grow size */;
+
+ num_to_alloc = (yy_buffer_stack_max) + grow_size;
+ (yy_buffer_stack) = (struct yy_buffer_state**)header_yyrealloc
+ ((yy_buffer_stack),
+ num_to_alloc * sizeof(struct yy_buffer_state*)
+ );
+ if ( ! (yy_buffer_stack) )
+ YY_FATAL_ERROR( "out of dynamic memory in header_yyensure_buffer_stack()" );
+
+ /* zero only the new slots.*/
+ memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*));
+ (yy_buffer_stack_max) = num_to_alloc;
+ }
+}
+
+/** Setup the input buffer state to scan directly from a user-specified character buffer.
+ * @param base the character buffer
+ * @param size the size in bytes of the character buffer
+ *
+ * @return the newly allocated buffer state object.
+ */
+YY_BUFFER_STATE header_yy_scan_buffer (char * base, yy_size_t size )
+{
+ YY_BUFFER_STATE b;
+
+ if ( size < 2 ||
+ base[size-2] != YY_END_OF_BUFFER_CHAR ||
+ base[size-1] != YY_END_OF_BUFFER_CHAR )
+ /* They forgot to leave room for the EOB's. */
+ return 0;
+
+ b = (YY_BUFFER_STATE) header_yyalloc(sizeof( struct yy_buffer_state ) );
+ if ( ! b )
+ YY_FATAL_ERROR( "out of dynamic memory in header_yy_scan_buffer()" );
+
+ b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */
+ b->yy_buf_pos = b->yy_ch_buf = base;
+ b->yy_is_our_buffer = 0;
+ b->yy_input_file = 0;
+ b->yy_n_chars = b->yy_buf_size;
+ b->yy_is_interactive = 0;
+ b->yy_at_bol = 1;
+ b->yy_fill_buffer = 0;
+ b->yy_buffer_status = YY_BUFFER_NEW;
+
+ header_yy_switch_to_buffer(b );
+
+ return b;
+}
+
+/** Setup the input buffer state to scan a string. The next call to header_yylex() will
+ * scan from a @e copy of @a str.
+ * @param yystr a NUL-terminated string to scan
+ *
+ * @return the newly allocated buffer state object.
+ * @note If you want to scan bytes that may contain NUL values, then use
+ * header_yy_scan_bytes() instead.
+ */
+YY_BUFFER_STATE header_yy_scan_string (yyconst char * yystr )
+{
+
+ return header_yy_scan_bytes(yystr,strlen(yystr) );
+}
+
+/** Setup the input buffer state to scan the given bytes. The next call to header_yylex() will
+ * scan from a @e copy of @a bytes.
+ * @param bytes the byte buffer to scan
+ * @param len the number of bytes in the buffer pointed to by @a bytes.
+ *
+ * @return the newly allocated buffer state object.
+ */
+YY_BUFFER_STATE header_yy_scan_bytes (yyconst char * yybytes, yy_size_t _yybytes_len )
+{
+ YY_BUFFER_STATE b;
+ char *buf;
+ yy_size_t n, i;
+
+ /* Get memory for full buffer, including space for trailing EOB's. */
+ n = _yybytes_len + 2;
+ buf = (char *) header_yyalloc(n );
+ if ( ! buf )
+ YY_FATAL_ERROR( "out of dynamic memory in header_yy_scan_bytes()" );
+
+ for ( i = 0; i < _yybytes_len; ++i )
+ buf[i] = yybytes[i];
+
+ buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR;
+
+ b = header_yy_scan_buffer(buf,n );
+ if ( ! b )
+ YY_FATAL_ERROR( "bad buffer in header_yy_scan_bytes()" );
+
+ /* It's okay to grow etc. this buffer, and we should throw it
+ * away when we're done.
+ */
+ b->yy_is_our_buffer = 1;
+
+ return b;
+}
+
+#ifndef YY_EXIT_FAILURE
+#define YY_EXIT_FAILURE 2
+#endif
+
+static void yy_fatal_error (yyconst char* msg )
+{
+ (void) fprintf( stderr, "%s\n", msg );
+ exit( YY_EXIT_FAILURE );
+}
+
+/* Redefine yyless() so it works in section 3 code. */
+
+#undef yyless
+#define yyless(n) \
+ do \
+ { \
+ /* Undo effects of setting up header_yytext. */ \
+ int yyless_macro_arg = (n); \
+ YY_LESS_LINENO(yyless_macro_arg);\
+ header_yytext[header_yyleng] = (yy_hold_char); \
+ (yy_c_buf_p) = header_yytext + yyless_macro_arg; \
+ (yy_hold_char) = *(yy_c_buf_p); \
+ *(yy_c_buf_p) = '\0'; \
+ header_yyleng = yyless_macro_arg; \
+ } \
+ while ( 0 )
+
+/* Accessor methods (get/set functions) to struct members. */
+
+/** Get the current line number.
+ *
+ */
+int header_yyget_lineno (void)
+{
+
+ return header_yylineno;
+}
+
+/** Get the input stream.
+ *
+ */
+FILE *header_yyget_in (void)
+{
+ return header_yyin;
+}
+
+/** Get the output stream.
+ *
+ */
+FILE *header_yyget_out (void)
+{
+ return header_yyout;
+}
+
+/** Get the length of the current token.
+ *
+ */
+yy_size_t header_yyget_leng (void)
+{
+ return header_yyleng;
+}
+
+/** Get the current token.
+ *
+ */
+
+char *header_yyget_text (void)
+{
+ return header_yytext;
+}
+
+/** Set the current line number.
+ * @param line_number
+ *
+ */
+void header_yyset_lineno (int line_number )
+{
+
+ header_yylineno = line_number;
+}
+
+/** Set the input stream. This does not discard the current
+ * input buffer.
+ * @param in_str A readable stream.
+ *
+ * @see header_yy_switch_to_buffer
+ */
+void header_yyset_in (FILE * in_str )
+{
+ header_yyin = in_str ;
+}
+
+void header_yyset_out (FILE * out_str )
+{
+ header_yyout = out_str ;
+}
+
+int header_yyget_debug (void)
+{
+ return header_yy_flex_debug;
+}
+
+void header_yyset_debug (int bdebug )
+{
+ header_yy_flex_debug = bdebug ;
+}
+
+static int yy_init_globals (void)
+{
+ /* Initialization is the same as for the non-reentrant scanner.
+ * This function is called from header_yylex_destroy(), so don't allocate here.
+ */
+
+ (yy_buffer_stack) = 0;
+ (yy_buffer_stack_top) = 0;
+ (yy_buffer_stack_max) = 0;
+ (yy_c_buf_p) = (char *) 0;
+ (yy_init) = 0;
+ (yy_start) = 0;
+
+/* Defined in main.c */
+#ifdef YY_STDINIT
+ header_yyin = stdin;
+ header_yyout = stdout;
+#else
+ header_yyin = (FILE *) 0;
+ header_yyout = (FILE *) 0;
+#endif
+
+ /* For future reference: Set errno on error, since we are called by
+ * header_yylex_init()
+ */
+ return 0;
+}
+
+/* header_yylex_destroy is for both reentrant and non-reentrant scanners. */
+int header_yylex_destroy (void)
+{
+
+ /* Pop the buffer stack, destroying each element. */
+ while(YY_CURRENT_BUFFER){
+ header_yy_delete_buffer(YY_CURRENT_BUFFER );
+ YY_CURRENT_BUFFER_LVALUE = NULL;
+ header_yypop_buffer_state();
+ }
+
+ /* Destroy the stack itself. */
+ header_yyfree((yy_buffer_stack) );
+ (yy_buffer_stack) = NULL;
+
+ /* Reset the globals. This is important in a non-reentrant scanner so the next time
+ * header_yylex() is called, initialization will occur. */
+ yy_init_globals( );
+
+ return 0;
+}
+
+/*
+ * Internal utility routines.
+ */
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy (char* s1, yyconst char * s2, int n )
+{
+ register int i;
+ for ( i = 0; i < n; ++i )
+ s1[i] = s2[i];
+}
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen (yyconst char * s )
+{
+ register int n;
+ for ( n = 0; s[n]; ++n )
+ ;
+
+ return n;
+}
+#endif
+
+void *header_yyalloc (yy_size_t size )
+{
+ return (void *) malloc( size );
+}
+
+void *header_yyrealloc (void * ptr, yy_size_t size )
+{
+ /* The cast to (char *) in the following accommodates both
+ * implementations that use char* generic pointers, and those
+ * that use void* generic pointers. It works with the latter
+ * because both ANSI C and C++ allow castless assignment from
+ * any pointer type to void*, and deal with argument conversions
+ * as though doing an assignment.
+ */
+ return (void *) realloc( (char *) ptr, size );
+}
+
+void header_yyfree (void * ptr )
+{
+ free( (char *) ptr ); /* see header_yyrealloc() for (char *) cast */
+}
+
+#define YYTABLES_NAME "yytables"
+
+#line 136 "fasta_header_parser.l"
+
+
+
+int header_yywrap()
+{
+ return 1;
+}
+
+element_from_header* header_parser_main(char *h)
+{
+ int nbfields,memory_allocated;
+ element_from_header* header;
+ char* nbfields_n;
+ char* nbfields_v;
+
+ nbfields_n = (char*) malloc(9*sizeof(char));
+ nbfields_v = (char*) malloc(5*sizeof(char));
+
+ memory_allocated=MEMALLOCATED;
+
+ nbfields=1;
+
+ strcpy(nbfields_n, "nbfields");
+ strcpy(nbfields_v, "1");
+
+ header = (element_from_header*) malloc(memory_allocated * sizeof(element_from_header));
+
+ header[0].name = nbfields_n;
+ header[0].value = nbfields_v;
+
+ YY_BUFFER_STATE state;
+
+ state=header_yy_scan_string(h);
+
+ header_parser(&nbfields, &memory_allocated, &header);
+
+ header_yy_delete_buffer(state);
+
+ return header;
+}
+
+
+
+
+
diff --git a/sumalibs/libfasta/fasta_header_parser.h b/sumalibs/libfasta/fasta_header_parser.h
new file mode 100644
index 0000000..985b460
--- /dev/null
+++ b/sumalibs/libfasta/fasta_header_parser.h
@@ -0,0 +1,13 @@
+
+#ifndef FASTA_HEADER_PARSER_H_
+#define FASTA_HEADER_PARSER_H_
+
+typedef struct {
+ char *name;
+ void *value;
+}element_from_header;
+
+element_from_header* header_parser_main(char*);
+
+
+#endif
diff --git a/sumalibs/libfasta/fasta_header_parser.l b/sumalibs/libfasta/fasta_header_parser.l
new file mode 100644
index 0000000..c270c64
--- /dev/null
+++ b/sumalibs/libfasta/fasta_header_parser.l
@@ -0,0 +1,178 @@
+/*
+ * Add -ll in Makefile if you modify this file to convert to .c
+ */
+
+%x REGID
+%x REGNAME
+%x REGVAL
+
+%{
+
+#include <stdlib.h>
+#include <string.h>
+#include "header_mem_handler.h"
+#include "fasta_header_handler.h"
+
+#define MEMALLOCATED 10
+#define BUFFER 5
+
+#define YY_DECL int header_parser(int *nbf, int *memory_allocated, element_from_header **p_header)
+
+
+%}
+
+WORD [^>[:blank:]=;]+
+WORDID [^>[:blank:]]+
+SUP >
+EOL \n
+SEP ;
+SPACE [[:blank:]]+
+EQUAL =
+
+%%
+
+ int i;
+ int size_needed;
+ int free_size;
+ char* field;
+
+
+<INITIAL>{SUP} {
+ /*printf("\n<INITIAL>{SUP},%s",yytext);*/
+ BEGIN(REGID);
+ }
+
+<INITIAL,REGID>{WORDID} {
+ i=0;
+
+ field = malloc_field(&free_size);
+ (*p_header)[*nbf].name = (char*) malloc(3*sizeof(char));
+ strcpy(((*p_header)[*nbf]).name,"id");
+
+ size_needed = strlen(yytext)+1;
+ (*p_header)[*nbf].value = (char*) malloc(sizeof(char)*size_needed);
+ strcpy(((*p_header)[*nbf]).value,yytext);
+
+ (*nbf)++;
+ }
+
+
+<INITIAL,REGID>{SPACE} {
+ BEGIN(REGNAME);
+ }
+
+<REGNAME>{WORD} {
+ /*fprintf(stderr,"\n<REGNAME>{WORD} **%s**",yytext);*/
+ field = store_in_field(field,yytext,&free_size,&i);
+ }
+
+<REGNAME>{SPACE} {
+ /*fprintf(stderr,"\n<REGNAME>{SPACE} **%s**",yytext);*/
+ if (i != 0)
+ field = store_in_field(field,yytext,&free_size,&i);
+ }
+
+<REGNAME>{EQUAL} {
+ /*fprintf(stderr,"\n<REGNAME>{EQUAL},%s",yytext);*/
+ field = store_in_header_table(field, &((*p_header)[*nbf].name), &free_size, &i);
+ BEGIN(REGVAL);
+ }
+
+<REGNAME>{SEP} {
+ /*fprintf(stderr,"\n<REGNAME>{SEP},%s",yytext);*/
+ (*p_header)[*nbf].name = (char*) malloc(19*sizeof(char));
+ strcpy((*p_header)[*nbf].name,"definition");
+ field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i);
+ p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated);
+ BEGIN(REGNAME);
+ }
+
+<REGVAL>{WORD} {
+ /*fprintf(stderr,"\n<REGVAL>{WORD} **%s**\n",yytext);*/
+ field = store_in_field(field,yytext,&free_size,&i);
+ }
+
+<REGVAL>{SPACE} {
+ /*fprintf(stderr,"\n<REGVAL>{SPACE} **%s**\n",yytext);*/
+ field = store_in_field(field,yytext,&free_size,&i);
+ }
+
+<REGVAL>{SEP} {
+ /*fprintf(stderr,"\n<REGVAL>{SEP},%s\n",yytext);*/
+
+ field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i);
+ p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated);
+ BEGIN(REGNAME);
+ }
+
+
+<REGVAL>{EQUAL} {
+ /*fprintf(stderr, "\nWarning : separator ';' probably missing in header after %s",(*p_header)[*nbf].name);*/
+ }
+
+<REGVAL><<EOF>> {
+ field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i);
+ p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated);
+ end_header_table(p_header, *nbf);
+
+ free(field);
+ BEGIN(INITIAL);
+ return 0;
+ }
+
+<REGNAME><<EOF>> {
+ /*(*p_header)[*nbf].name = (char*) malloc(sizeof(char)*19);
+ strcpy((*p_header)[*nbf].name,"other_informations");
+ field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i);
+ p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated);
+ */
+ end_header_table(p_header, *nbf);
+
+ free(field);
+ BEGIN(INITIAL);
+ return 0;
+ }
+
+%%
+
+int header_yywrap()
+{
+ return 1;
+}
+
+element_from_header* header_parser_main(char *h)
+{
+ int nbfields,memory_allocated;
+ element_from_header* header;
+ char* nbfields_n;
+ char* nbfields_v;
+
+ nbfields_n = (char*) malloc(9*sizeof(char));
+ nbfields_v = (char*) malloc(5*sizeof(char));
+
+ memory_allocated=MEMALLOCATED;
+
+ nbfields=1;
+
+ strcpy(nbfields_n, "nbfields");
+ strcpy(nbfields_v, "1");
+
+ header = (element_from_header*) malloc(memory_allocated * sizeof(element_from_header));
+
+ header[0].name = nbfields_n;
+ header[0].value = nbfields_v;
+
+ YY_BUFFER_STATE state;
+
+ state=yy_scan_string(h);
+
+ header_parser(&nbfields, &memory_allocated, &header);
+
+ yy_delete_buffer(state);
+
+ return header;
+}
+
+
+
+
diff --git a/sumalibs/libfasta/fasta_seq_writer.c b/sumalibs/libfasta/fasta_seq_writer.c
new file mode 100644
index 0000000..ccee773
--- /dev/null
+++ b/sumalibs/libfasta/fasta_seq_writer.c
@@ -0,0 +1,76 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "sequence.h"
+#include "fasta_header_parser.h"
+
+
+void printOnlySeqFromFastaSeqPtr(fastaSeqPtr seq, FILE* output)
+{
+ char nuc;
+ int n=60;
+ int l = strlen(seq->sequence);
+ for (n=60; n<l; n+=60)
+ {
+ nuc = seq->sequence[n];
+ seq->sequence[n]=0;
+ fprintf(output,"%s\n",seq->sequence+n-60);
+ seq->sequence[n]=nuc;
+ }
+ fprintf(output,"%s\n",seq->sequence+n-60);
+}
+
+
+void printOnlySeqFromChar(char* seq, FILE* output)
+{
+ char nuc;
+ int n=60;
+ int l = strlen(seq);
+ for (n=60; n<l; n+=60)
+ {
+ nuc = seq[n];
+ seq[n]=0;
+ fprintf(output,"%s\n",seq+n-60);
+ seq[n]=nuc;
+ }
+ fprintf(output,"%s\n",seq+n-60);
+}
+
+
+void printOnlyHeaderFromFastaSeqPtr(fastaSeqPtr seq, FILE* output)
+{
+ fprintf(output,">%s\n",seq->rawheader);
+}
+
+
+void printOnlyHeaderFromTable(element_from_header* header, FILE* output)
+{
+ int i;
+ int nbf;
+
+ nbf = atoi(header[0].value);
+
+ fprintf(output,">%s ",header[1].value);
+
+ for (i = 2; i <= nbf; i++)
+ {
+ if (strcmp(header[i].name, "definition") != 0)
+ {
+ fprintf(output,"%s",header[i].name);
+ fprintf(output,"=");
+ fprintf(output,"%s; ",header[i].value);
+ }
+ }
+
+ if (strcmp(header[nbf].name, "definition") == 0)
+ fprintf(output,"%s; ",header[nbf].value);
+
+ fprintf(output,"\n");
+}
+
+
+void printHeaderAndSeqFromFastaSeqPtr(fastaSeqPtr seq, FILE* output)
+{
+ printOnlyHeaderFromFastaSeqPtr(seq, output);
+ printOnlySeqFromFastaSeqPtr(seq, output);
+}
diff --git a/sumalibs/libfasta/fasta_seq_writer.h b/sumalibs/libfasta/fasta_seq_writer.h
new file mode 100644
index 0000000..39f8212
--- /dev/null
+++ b/sumalibs/libfasta/fasta_seq_writer.h
@@ -0,0 +1,19 @@
+
+#ifndef FASTA_SEQ_WRITER_H_
+#define FASTA_SEQ_WRITER_H_
+
+#include "sequence.h"
+
+
+void printOnlySeqFromFastaSeqPtr(fastaSeqPtr, FILE*);
+
+void printOnlySeqFromChar(char*, FILE*);
+
+void printOnlyHeaderFromFastaSeqPtr(fastaSeqPtr, FILE*);
+
+void printOnlyHeaderFromTable(element_from_header*, FILE*);
+
+void printHeaderAndSeqFromFastaSeqPtr(fastaSeqPtr, FILE*);
+
+
+#endif
diff --git a/sumalibs/libfasta/header_mem_handler.c b/sumalibs/libfasta/header_mem_handler.c
new file mode 100644
index 0000000..a0b8e7c
--- /dev/null
+++ b/sumalibs/libfasta/header_mem_handler.c
@@ -0,0 +1,93 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "header_mem_handler.h"
+#include <string.h>
+
+#define FIELD_BUFFER 1024
+
+
+char* malloc_field(int *free_size)
+{
+ char* field = (char*) malloc(sizeof(char) * FIELD_BUFFER);
+ field[0] = 0;
+ (*free_size) = FIELD_BUFFER;
+ return field;
+}
+
+int check_mem_field(int size_needed)
+{
+ int number_of_chunks_to_alloc;
+ number_of_chunks_to_alloc = size_needed / FIELD_BUFFER + 1;
+ return number_of_chunks_to_alloc;
+}
+
+char* realloc_field(int number_of_chunks_to_alloc, char* field)
+{
+ int size_needed;
+ size_needed = number_of_chunks_to_alloc * FIELD_BUFFER;
+ field = realloc(field, (size_needed)*sizeof(char));
+ return field;
+}
+
+char* check_and_realloc_field(char* field, int size_needed, int* free_size)
+{
+ size_needed = size_needed + strlen(field);
+ int number_of_chunks_to_alloc = check_mem_field(size_needed);
+ if (strlen(field)>0)
+ field = realloc_field(number_of_chunks_to_alloc, field);
+ else
+ {
+ free(field);
+ field = malloc(number_of_chunks_to_alloc * FIELD_BUFFER);
+ }
+ (*free_size) = number_of_chunks_to_alloc*FIELD_BUFFER - size_needed + 1;
+ return field;
+}
+
+
+char* store_in_field(char* field, char* yytext, int* free_size, int* i)
+{
+ int size_needed;
+ size_needed = strlen(yytext)+1;
+ if (size_needed > (*free_size))
+ field = check_and_realloc_field(field, size_needed, free_size);
+ else
+ (*free_size) = (*free_size) - size_needed + 1;
+ strcpy(&(field[(*i)]),yytext);
+ (*i) = (*i)+size_needed-1;
+ return field;
+}
+
+
+char* store_in_header_table(char* field, char** storing_place, int* free_size, int* i)
+{
+ int size_needed;
+ size_needed = strlen(field)+1;
+ *storing_place = (char*) malloc(size_needed*sizeof(char));
+ strcpy(*storing_place,field);
+ (*i)=0;
+ free(field);
+ field = malloc_field(free_size);
+ return field;
+}
+
+
+element_from_header** check_and_realloc_mem_in_header_table(element_from_header** p_header, int* nbf, int* memory_allocated)
+{
+ (*nbf)++;
+
+ if (*nbf == *memory_allocated)
+ {
+ (*memory_allocated)++;
+ *p_header = (element_from_header*) realloc(*p_header, (*memory_allocated) * sizeof(element_from_header));
+ }
+
+ return p_header;
+}
+
+void end_header_table(element_from_header** p_header, int nbf)
+{
+ nbf = nbf - 1;
+ //fprintf(stderr, "nbf = %d", nbf);
+ sprintf((*p_header)->value, "%d", nbf);
+}
diff --git a/sumalibs/libfasta/header_mem_handler.h b/sumalibs/libfasta/header_mem_handler.h
new file mode 100644
index 0000000..bfb591b
--- /dev/null
+++ b/sumalibs/libfasta/header_mem_handler.h
@@ -0,0 +1,22 @@
+#ifndef HEADER_MEM_HANDLER_H_
+#define HEADER_MEM_HANDLER_H_
+
+#include "fasta_header_parser.h"
+
+char* malloc_field(int*);
+
+int check_mem_field(int);
+
+char* realloc_field(int, char*);
+
+char* check_and_realloc_field(char*, int, int*);
+
+char* store_in_field(char*, char*, int*, int*);
+
+char* store_in_header_table(char*, char**, int*, int*);
+
+element_from_header** check_and_realloc_mem_in_header_table(element_from_header**, int*, int*);
+
+void end_header_table(element_from_header** p_header, int nbf);
+
+#endif
diff --git a/sumalibs/libfasta/sequence.c b/sumalibs/libfasta/sequence.c
new file mode 100644
index 0000000..2cf3d10
--- /dev/null
+++ b/sumalibs/libfasta/sequence.c
@@ -0,0 +1,454 @@
+/**
+ * FileName: sequence.c
+ * Authors: Tiayyba Riaz, Celine Mercier
+ * Description: C file for sequence reading and parsing
+ * **/
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "../libutils/utilities.h"
+#include "sequence.h"
+#include "../libfile/fileHandling.h"
+#include "fasta_header_handler.h"
+#include "fasta_header_parser.h"
+
+
+/*
+ * Function Name: seq_getNext(FILE *fp, char *fieldDelim)
+ * Description: Gets the next sequence from file by calling another function, passes the sequence
+ * to other function to get the header elements and nucleotide suquence into a strcuture of
+ * type fastaSeq and returns a pointer to this newly populated structure.
+ */
+
+fastaSeqPtr seq_getNext(FILE *fp, char *fieldDelim, BOOL isStandardSeq, BOOL onlyATGC)
+{
+ char *seq;
+ char *header;
+ char *strTemp;
+ fastaSeqPtr seqElem;
+ int seqLen;
+
+ seq = seq_readNextFromFilebyLine(fp);
+ if (seq == NULL) return NULL;
+
+ /* Find header separator \n, if not found return NULL */
+ strTemp = strchr(seq, '\n');
+ if(strTemp == NULL)
+ return NULL;
+
+ seqLen = strlen(strTemp);
+ header = (char*) util_malloc(1+(strlen(seq) - seqLen)*sizeof(char), __FILE__, __LINE__);
+
+ /* Separate header in header variable */
+ strncpy(header, seq, strTemp - seq);
+ header[strTemp - seq] = '\0';
+ /* Get memory for new sequence structure element */
+ seqElem = (fastaSeqPtr) util_malloc(sizeof(fastaSeq), __FILE__, __LINE__);
+ /* Parse header and assign values to structure fields */
+ seq_fillHeader(header, fieldDelim, seqElem);
+ /* Get clean sequence and assign to structure field */
+ if (isStandardSeq)
+ if (onlyATGC)
+ seq_fillSeqOnlyATGC(strTemp, seqElem, seqLen);
+ else
+ seq_fillSeq(strTemp, seqElem, seqLen);
+ else
+ seq_fillDigitSeq(strTemp, seqElem, seqLen);
+ /* Type cast the char * seq to void pointer and deallocate the memory pointed by this */
+ util_free((void *)seq);
+ /* Return new sequence structure element */
+ return seqElem;
+}
+
+
+char *seq_readNextFromFilebyLine(FILE* fp)
+{
+ char newc = '\0';
+ BOOL seqCompleted = FALSE;
+ int length = 500;
+ int32_t len;
+ char tempstr[length];
+ char* buffer;
+
+ if (feof(fp)) return NULL;
+ newc = file_nextChar(fp);
+ if (newc != '>') ungetc(newc, fp);
+
+ buffer = util_malloc(1*sizeof(char), __FILE__, __LINE__);
+ buffer[0] = '\0';
+
+ while(!seqCompleted)
+ {
+ newc = file_nextChar(fp);
+ if(newc == '>' || newc == '\0')
+ {
+ seqCompleted = TRUE;
+ if (newc == '>')
+ ungetc(newc, fp); // Make sure next time we start from sequence delimiter >
+ }
+ else
+ {
+ ungetc(newc, fp);
+ if(file_nextLine( fp, tempstr, length) != NULL)
+ {
+ len = strlen(tempstr) + strlen(buffer) + 1;
+ buffer = util_realloc(buffer, len, __FILE__, __LINE__);
+ strcat(buffer, tempstr);
+ }
+ else
+ {
+ seqCompleted = TRUE;
+ }
+ }
+ }
+ return buffer;
+}
+
+
+/*
+ * Function Name: seq_fillHeader(char* header, char *fieldDelim, fastaSeqPtr seqElem)
+ */
+void seq_fillHeader(char* header, char *fieldDelim, fastaSeqPtr seqElem)
+{
+ char* IdEnd;
+ int IdSize;
+
+ seqElem->rawheader = strdup(header);
+
+ IdEnd = strchr(header, ' ');
+ if (IdEnd == NULL)
+ IdSize = strlen(header);
+ else
+ IdSize = strlen(header) - strlen(IdEnd);
+
+ seqElem->accession_id = (char*) util_malloc(1+IdSize*sizeof(char), __FILE__, __LINE__);
+
+ strncpy(seqElem->accession_id, header, IdSize);
+
+ (seqElem->accession_id)[IdSize] = '\0';
+}
+
+
+/*
+ * Function Name: seq_fillSeq(char *seq, fastaSeqPtr seqElem)
+ * Description: Parses the whole sequences for actual nucleotide sequences and stores that
+ * sequence in the field of structure 'seqElem' .
+ */
+void seq_fillSeq(char *seq, fastaSeqPtr seqElem, int seqLen)
+{
+ char* seqTemp;
+ char c;
+ int32_t index = 0, seqIndex = 0, len = strlen(seq);
+ char* seqAlphabets = "acgtACGT-nN";
+
+ seqTemp = (char*) util_malloc(seqLen*sizeof(char), __FILE__, __LINE__);
+
+ while (index < len)
+ {
+ c = seq[index++];
+ if (strchr(seqAlphabets, c) != NULL)
+ seqTemp[seqIndex++] = tolower(c);
+ }
+ seqTemp[seqIndex] = '\0';
+ seqElem->length=seqIndex;
+ seqElem->sequence = strdup(seqTemp);
+}
+
+
+void seq_fillSeqOnlyATGC(char *seq, fastaSeqPtr seqElem, int seqLen)
+{
+ char* seqTemp;
+ char c;
+ int32_t index = 0, seqIndex = 0, len = strlen(seq);
+ char* seqAlphabets = "acgtACGT";
+ int notAllATGC = 0;
+
+ seqTemp = (char*) util_malloc(seqLen*sizeof(char), __FILE__, __LINE__);
+
+ while (index < len)
+ {
+ c = seq[index++];
+ if (strchr(seqAlphabets, c) != NULL)
+ seqTemp[seqIndex++] = tolower(c);
+ else if (c != '\n')
+ notAllATGC = 1;
+ }
+
+ if (notAllATGC)
+ seqTemp[0] = '\0';
+ else
+ {
+ seqTemp[seqIndex] = '\0';
+ seqElem->length=seqIndex;
+ }
+ seqElem->sequence = strdup(seqTemp);
+}
+
+
+void seq_fillDigitSeq(char *seq, fastaSeqPtr seqElem, int seqLen)
+{
+ char* seqTemp;
+ char c;
+ int32_t index = 0, seqIndex = 0, len = strlen(seq);
+
+ seqTemp = (char*) util_malloc(seqLen*sizeof(char), __FILE__, __LINE__);
+
+ while (index < len)
+ {
+ c = seq[index++];
+ if ((c >= '0' && c <= '9') || c == ' ')
+ seqTemp[seqIndex++] = c;
+ /*else
+ {
+ printf("Error in input file");
+ exit(0);
+ }*/
+ }
+ seqTemp[seqIndex] = '\0';
+ seqElem->sequence = strdup(seqTemp);
+}
+
+
+fastaSeqCount seq_readAllSeq2(char *fileName, BOOL isStandardSeq, BOOL onlyATGC)
+{
+ FILE* fp;
+ fastaSeqPtr seqPtr;
+ fastaSeqPtr seqPtrAr;
+
+ int32_t counter = 0;
+ int32_t slots = 1000;
+ fastaSeqCount allseqs;
+ int32_t discarded=0;
+
+ if ((fileName == NULL) || (strcmp(fileName, "-") == 0))
+ fp = stdin;
+ else
+ {
+ fp = file_open(fileName, TRUE);
+ exitIfEmptyFile(fp);
+ }
+
+ if (fp == NULL)
+ {
+ fprintf(stderr, "\nCould not open file.\n");
+ exit(1);
+ }
+
+ seqPtrAr = (fastaSeqPtr) util_malloc(slots*sizeof(fastaSeq), __FILE__, __LINE__);
+
+ seqPtr = seq_getNext(fp, " ", isStandardSeq, onlyATGC);
+
+ while (seqPtr != NULL)
+ {
+ if (counter == slots)
+ {
+ slots += 1000;
+ seqPtrAr = (fastaSeqPtr)util_realloc(seqPtrAr, slots*sizeof(fastaSeq), __FILE__, __LINE__);
+ }
+
+ if ((seqPtr->sequence)[0] != '\0')
+ seqPtrAr[counter++] = *seqPtr;
+ else
+ discarded++;
+
+ util_free((void *)seqPtr);
+ seqPtr = seq_getNext(fp, " ", isStandardSeq, onlyATGC);
+ }
+ fclose(fp);
+
+ if (counter != slots)
+ seqPtrAr = (fastaSeqPtr)util_realloc(seqPtrAr, counter*sizeof(fastaSeq), __FILE__, __LINE__);
+
+ allseqs.count = counter;
+ allseqs.fastaSeqs = seqPtrAr;
+
+ if (discarded)
+ fprintf(stderr, "\nDiscarded %d sequences that did not contain only 'AaTtGgCc' characters.", discarded);
+
+ return allseqs;
+}
+
+
+int32_t seq_findSeqByAccId (char *accid, fastaSeqCountPtr allseqs)
+{
+ int32_t i;
+
+ for (i = 0; i < allseqs->count; i++)
+ {
+ if (strcmp (accid, allseqs->fastaSeqs[i].accession_id) == 0)
+ return i;
+ }
+ return -1;
+}
+
+
+void seq_printSeqs (fastaSeqCountPtr allseq)
+{
+ int32_t i;
+
+ for (i = 0; i < allseq->count; i++)
+ //for (i = 0; i < 4; i++)
+ {
+ if (allseq->fastaSeqs[i].sequence == NULL) continue;
+ if (allseq->fastaSeqs[i].rawheader)
+ printf (">%s\n", allseq->fastaSeqs[i].rawheader);
+ else
+ printf (">%s\n", allseq->fastaSeqs[i].accession_id);
+ printf ("%s\n", allseq->fastaSeqs[i].sequence);
+ }
+}
+
+
+int cleanDB(fastaSeqCount db) // replace not a/t/g/c with a's
+{
+ int32_t i;
+ char *seq;
+ BOOL changed;
+ int32_t seqchanged=0;
+ int32_t nucchanged=0;
+
+ fprintf(stderr,"Cleaning dataset...");
+
+ for (i=0; i < db.count;i++)
+ {
+
+ changed=FALSE;
+ for (seq = db.fastaSeqs[i].sequence; *seq!=0; seq++)
+ {
+ if (*seq!='a' && *seq!='c' && *seq!='g' && *seq!='t')
+ {
+ changed=TRUE;
+ nucchanged++;
+ *seq='a';
+ }
+ }
+ if (changed)
+ seqchanged++;
+ }
+
+ if (seqchanged)
+ fprintf(stderr," : %d nucleotides substituted in %d sequences\n",nucchanged,seqchanged);
+ else
+ fprintf(stderr," : Done\n");
+
+ return(db.count);
+}
+
+
+void addCounts(fastaSeqCount* db)
+{
+ int s;
+ char* count;
+ element_from_header* header;
+ char* count_n;
+ char* count_v;
+
+ count_n = (char*) malloc(6*sizeof(char));
+ count_v = (char*) malloc(2*sizeof(char));
+
+ strcpy(count_n, "count");
+ strcpy(count_v, "1");
+
+ for (s=0; s < db->count; s++)
+ {
+ header = header_parser_main(db->fastaSeqs[s].rawheader);
+ count = getItemFromHeader("count", header);
+ if (count == 0) // no count field
+ {
+ header = table_header_add_field(header, count_n, count_v);
+ db->fastaSeqs[s].count = 1;
+ }
+ else
+ db->fastaSeqs[s].count = atoi(count);
+ db->fastaSeqs[s].header = header;
+ }
+}
+
+
+int uniqSeqsVector(fastaSeqCount* db, fastaSeqPtr** uniqSeqs)
+{
+ int i, j, k;
+ *(*(uniqSeqs)) = db->fastaSeqs;
+ db->fastaSeqs[0].uniqHead = TRUE;
+
+ i = 0;
+ k = 1;
+
+ for (j=1; j < db->count; j++)
+ {
+ if (strcmp(db->fastaSeqs[i].sequence, db->fastaSeqs[j].sequence) == 0)
+ {
+ db->fastaSeqs[i].count += db->fastaSeqs[j].count;
+ db->fastaSeqs[j].uniqHead = FALSE;
+ }
+ else
+ {
+ db->fastaSeqs[j].uniqHead = TRUE;
+ *(*(uniqSeqs)+k) = (db->fastaSeqs)+j;
+ k++;
+ i = j;
+ }
+ }
+ return(k);
+}
+
+
+void calculateMaxAndMinLen(fastaSeqPtr* db, int n, int* lmax, int* lmin)
+{
+ int i;
+ int l;
+
+ *lmax = 0;
+ for (i=0; i < n; i++)
+ {
+ l = (*(db+i))->length;
+ if (l > *lmax)
+ *lmax = l;
+ }
+
+ *lmin = *lmax;
+ for (i=0; i < n; i++)
+ {
+ l = (*(db+i))->length;
+ if (l < *lmin)
+ *lmin = l;
+ }
+}
+
+
+void calculateMaxAndMinLenDB(fastaSeqCount db, int* lmax, int* lmin)
+{
+ int i;
+ int l;
+
+ *lmax = 0;
+ for (i=0; i < db.count; i++)
+ {
+ l = ((db.fastaSeqs)+i)->length;
+ if (l > *lmax)
+ *lmax = l;
+ }
+
+ *lmin = *lmax;
+ for (i=0; i < db.count; i++)
+ {
+ l = ((db.fastaSeqs)+i)->length;;
+ if (l < *lmin)
+ *lmin = l;
+ }
+}
+
+
+int sortSeqsWithCounts(const void **s1, const void **s2)
+{
+ return(((fastaSeqPtr) *s2)->count - ((fastaSeqPtr) *s1)->count);
+}
+
+
+int reverseSortSeqsWithCounts(const void **s1, const void **s2)
+{
+ return(((fastaSeqPtr) *s1)->count - ((fastaSeqPtr) *s2)->count);
+}
diff --git a/sumalibs/libfasta/sequence.h b/sumalibs/libfasta/sequence.h
new file mode 100644
index 0000000..fa2d782
--- /dev/null
+++ b/sumalibs/libfasta/sequence.h
@@ -0,0 +1,64 @@
+/**
+ * FileName: sequence.h
+ * Authors: Tiayyba Riaz, Celine Mercier
+ * Description: Prototypes and other declarations for sequences
+ * **/
+#ifndef SEQUENCE_H_
+#define SEQUENCE_H_
+
+#include <stdint.h>
+#include <stdio.h>
+#include "../libutils/utilities.h"
+#include "fasta_header_parser.h"
+
+
+typedef struct {
+ char* accession_id; // identifier
+ char *rawheader; // not parsed header
+ element_from_header* header; // parsed header
+ char *sequence; // DNA sequence itself
+ int32_t length; // DNA sequence's length
+ int32_t count; // abundance of the sequence
+ unsigned char *table; // 4mer occurrence table build using function buildTable
+ int32_t over; // count of 4mer with occurrences greater than 255 (overflow)
+ struct fastaSeqPtr* next; // next unique sequence for example
+ BOOL cluster_center; // whether the sequence is a cluster center or not
+ int32_t cluster_weight; // cluster weight when sequence is cluster center
+ int32_t cluster_weight_unique_ids; // cluster weight when sequence is cluster center, counting the number sequence records
+ double score; // score with cluster center for example
+ struct fastaSeqPtr* center; // pointer to the sequence's cluster center
+ int32_t center_index; // index of the sequence's cluster center
+ BOOL uniqHead; // whether the sequence is a unique head or not
+ char* columns_BIOM; // to print in BIOM format
+ int columns_BIOM_size; // size allocated for columns_BIOM
+ char* line_OTU_table; // to print in OTU table format
+ int line_OTU_table_size; // size allocated for line_OTU_table
+ struct hashtable *sample_counts; // sample counts for sumaclean
+}fastaSeq,*fastaSeqPtr;
+
+
+typedef struct {
+ int32_t count;
+ fastaSeqPtr fastaSeqs;
+}fastaSeqCount, *fastaSeqCountPtr;
+
+
+fastaSeqPtr seq_getNext(FILE *fp, char *fieldDelim, BOOL isStandardSeq, BOOL onlyATGC);
+char *seq_readNextFromFilebyLine(FILE* fp);
+void seq_fillSeq(char *seq, fastaSeqPtr seqElem, int seqLen);
+void seq_fillSeqOnlyATGC(char *seq, fastaSeqPtr seqElem, int seqLen);
+void seq_fillDigitSeq(char *seq, fastaSeqPtr seqElem, int seqLen);
+void seq_fillHeader(char* header, char *fieldDelim, fastaSeqPtr seqElem);
+fastaSeqCount seq_readAllSeq2(char *fileName, BOOL isStandardSeq, BOOL onlyATGC);
+int32_t seq_findSeqByAccId (char *accid, fastaSeqCountPtr allseqs);
+void seq_printSeqs (fastaSeqCountPtr allseq);
+int cleanDB(fastaSeqCount);
+void addCounts(fastaSeqCount* db);
+int uniqSeqsVector(fastaSeqCount* db, fastaSeqPtr** uniqSeqs);
+void calculateMaxAndMinLen(fastaSeqPtr* db, int n, int* lmax, int* lmin);
+void calculateMaxAndMinLenDB(fastaSeqCount db, int* lmax, int* lmin);
+int sortSeqsWithCounts(const void **s1, const void **s2);
+int reverseSortSeqsWithCounts(const void **s1, const void **s2);
+void readSampleCounts(fastaSeqCount* db, char* key_name);
+
+#endif /*SEQUENCE_H_*/
diff --git a/sumalibs/libfile/._Makefile b/sumalibs/libfile/._Makefile
new file mode 100644
index 0000000..6adb72e
Binary files /dev/null and b/sumalibs/libfile/._Makefile differ
diff --git a/sumalibs/libfile/Makefile b/sumalibs/libfile/Makefile
new file mode 100644
index 0000000..fc12708
--- /dev/null
+++ b/sumalibs/libfile/Makefile
@@ -0,0 +1,25 @@
+
+SOURCES = fileHandling.c
+
+
+SRCS=$(SOURCES)
+
+
+OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
+
+LIBFILE= libfile.a
+RANLIB=ranlib
+
+
+include ../global.mk
+
+all: $(LIBFILE)
+
+clean:
+ rm -rf $(OBJECTS) $(LIBFILE)
+ rm -f *.P
+ rm -f *.a
+
+$(LIBFILE): $(OBJECTS)
+ ar -cr $@ $?
+ $(RANLIB) $@
diff --git a/sumalibs/libfile/fileHandling.c b/sumalibs/libfile/fileHandling.c
new file mode 100644
index 0000000..ea12e09
--- /dev/null
+++ b/sumalibs/libfile/fileHandling.c
@@ -0,0 +1,92 @@
+/**
+ * FileName: fileHandling.c
+ * Authors: Tiayyba Riaz, Celine Mercier
+ * Description: C file for file handling functions
+ * **/
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../libutils/utilities.h"
+
+/*
+ * Function Name: fileOpen(char* fileName, BOOL abortOnError)
+ * Description: Opens the file and returns the pointer to file object
+ */
+FILE *file_open(char* fileName, BOOL abortOnError)
+{
+ FILE* fp;
+
+ if (fileName == NULL && abortOnError)
+ ERRORABORT(FILE_OPENING_ERROR, "File name not given.");
+
+ if (fileName == NULL)
+ return NULL;
+
+ fp = fopen(fileName, "r");
+ return fp;
+}
+
+FILE *file_openrw(char* fileName, BOOL abortOnError)
+{
+ FILE* fp;
+
+ if (fileName == NULL && abortOnError)
+ ERRORABORT(FILE_OPENING_ERROR, "File name not given.");
+
+ if (fileName == NULL)
+ return NULL;
+
+ fp = fopen(fileName, "w+");
+ return fp;
+}
+
+/*
+ * Function Name: fileNextChar(FILE* fp)
+ * Description: Reads the file and returns next character, if file is null or its end of file, returns \�.
+ */
+char file_nextChar(FILE* fp)
+{
+ if (fp == NULL)
+ return '\0';
+
+ if(feof(fp))
+ return '\0';
+
+ return (char) fgetc(fp);
+}
+
+/*
+ * Function Name: *fileNextLine(FILE *fp, char *buffer, int32_t bufferSize)
+ * Description: Reads the file and returns next line, if file is null or its end of file, returns \�.
+ */
+char *file_nextLine(FILE *fp, char *buffer, int32_t bufferSize)
+{
+ if(fp == NULL)
+ return NULL;
+
+ if(feof(fp))
+ return NULL;
+
+ return fgets(buffer, bufferSize, fp);
+}
+
+
+void exitIfEmptyFile(FILE *file)
+{
+ long savedOffset = ftell(file);
+ if (fseek(file, 0, SEEK_END) != 0)
+ {
+ fprintf(stderr, "\nError moving the offset in an input file\n");
+ exit(1);
+ }
+
+ if (ftell(file) == 0)
+ {
+ fprintf(stderr, "\nInput file is empty.\n");
+ exit(1);
+ }
+ fseek(file, savedOffset, SEEK_SET);
+}
+
diff --git a/sumalibs/libfile/fileHandling.h b/sumalibs/libfile/fileHandling.h
new file mode 100644
index 0000000..334d8c3
--- /dev/null
+++ b/sumalibs/libfile/fileHandling.h
@@ -0,0 +1,20 @@
+/**
+ * FileName: fileHandling.h
+ * Authors: Tiayyba Riaz, Celine Mercier
+ * Description: Header file for file handling functions
+ * **/
+
+
+#ifndef FILEHANDLING_H_
+#define FILEHANDLING_H_
+
+#include "../libutils/utilities.h"
+/* Prototypes */
+
+FILE *file_open(char* fileName, BOOL abortOnError);
+char file_nextChar(FILE* fp);
+char *file_nextLine(FILE *fp, char *buffer, int32_t bufferSize);
+FILE *file_openrw(char* fileName, BOOL abortOnError);
+void exitIfEmptyFile(FILE *file);
+
+#endif /*FILEHANDLING_H_*/
diff --git a/sumalibs/liblcs/._Makefile b/sumalibs/liblcs/._Makefile
new file mode 100644
index 0000000..6adb72e
Binary files /dev/null and b/sumalibs/liblcs/._Makefile differ
diff --git a/sumalibs/liblcs/Makefile b/sumalibs/liblcs/Makefile
new file mode 100644
index 0000000..43a787c
--- /dev/null
+++ b/sumalibs/liblcs/Makefile
@@ -0,0 +1,25 @@
+
+SOURCES = sse_banded_LCS_alignment.c \
+ upperband.c
+
+SRCS=$(SOURCES)
+
+
+OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
+
+LIBFILE= liblcs.a
+RANLIB=ranlib
+
+
+include ../global.mk
+
+all: $(LIBFILE)
+
+clean:
+ rm -rf $(OBJECTS) $(LIBFILE)
+ rm -f *.P
+ rm -f *.a
+
+$(LIBFILE): $(OBJECTS)
+ ar -cr $@ $?
+ $(RANLIB) $@
diff --git a/sumalibs/liblcs/_lcs.ext.1.c b/sumalibs/liblcs/_lcs.ext.1.c
new file mode 100644
index 0000000..eeb1a21
--- /dev/null
+++ b/sumalibs/liblcs/_lcs.ext.1.c
@@ -0,0 +1,168 @@
+#include "_lcs.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+
+#include <stdio.h>
+
+
+
+// Allocate a band allowing to align sequences of length : 'length'
+
+column_t* allocateColumn(int length,column_t *column, bool mode8bits)
+{
+ int size;
+ bool newc = false;
+
+ // The band length should be equal to the length
+ // of the sequence + 7 for taking into account its
+ // shape
+
+ size = (length+1) * ((mode8bits) ? sizeof(int8_t):sizeof(int16_t));
+
+
+ // If the pointer to the old column is NULL we allocate
+ // a new column
+
+ if (column==NULL)
+ {
+
+ column = malloc(sizeof(column_t));
+ if (!column)
+ return NULL;
+
+ column->size = 0;
+ column->data.shrt=NULL;
+ column->score.shrt=NULL;
+ newc = true;
+ }
+
+ // Otherwise we check if its size is sufficient
+ // or if it should be extended
+
+ if (size > column->size)
+ {
+ int16_t *old = column->data.shrt;
+ int16_t *olds= column->score.shrt;
+
+ column->data.shrt = malloc(size);
+ column->score.shrt= malloc(size);
+
+ if (column->data.shrt==NULL || column->score.shrt==NULL)
+ {
+ fprintf(stderr,"Allocation Error on column for a size of %d\n" , size);
+ column->data.shrt = old;
+ column->score.shrt= olds;
+
+ if (newc)
+ {
+ free(column);
+ column=NULL;
+ return NULL;
+ }
+ return NULL;
+ }
+ else
+ column->size = size;
+ }
+
+ return column;
+}
+
+void freeColumn(column_p column)
+{
+ if (column)
+ {
+ if (column->data.shrt)
+ free(column->data.shrt);
+
+ if (column->score.shrt)
+ free(column->score.shrt);
+
+ free(column);
+ }
+}
+
+int fastLCSScore(const char* seq1, const char* seq2,column_pp column,int32_t* lpath)
+{
+ return fastLCSScore16(seq1,seq2,column,lpath);
+}
+
+int simpleLCS(const char* seq1, const char* seq2,column_pp ppcolumn,int32_t* lpath)
+{
+ int lseq1,lseq2; // length of the both sequences
+ int lcs;
+ int itmp; // tmp variables for swap
+ const char* stmp; //
+ int32_t *score;
+ int32_t *path;
+ column_t *column;
+ int32_t i,j;
+ int32_t sl,su,sd;
+ int32_t pl,pu,pd;
+
+ // Made seq1 the longest sequences
+ lseq1=strlen(seq1);
+ lseq2=strlen(seq2);
+
+ if (lseq1 < lseq2)
+ {
+ itmp=lseq1;
+ lseq1=lseq2;
+ lseq2=itmp;
+
+ stmp=seq1;
+ seq1=seq2;
+ seq2=stmp;
+ }
+
+ lseq1++;
+ lseq2++;
+
+ // a band sized to the smallest sequence is allocated
+
+ if (ppcolumn)
+ column = *ppcolumn;
+ else
+ column=NULL;
+
+ column = allocateColumn(lseq1*2,column,0);
+ score = (int32_t*) column->score.shrt;
+ path = (int32_t*) column->data.shrt;
+
+ memset(score,0,lseq1 * sizeof(int32_t));
+
+ for (j=0; j < lseq1; j++)
+ path[j]=j;
+
+ for (i=1; i< lseq2; i++)
+ {
+ sl=0;
+ pl=i;
+ for (j=1; j < lseq1; j++)
+ {
+ sd=score[j-1] + (seq2[i-1]==seq1[j-1] ? 1:0);
+ pd=path[j-1] + 1;
+
+ su=score[j];
+ pu=path[j] + 1;
+
+ score[j-1]=sl;
+
+ if (su > sl) sl=su, pl=pu;
+ if (sd > sl) sl=sd, pl=pd;
+ }
+ }
+
+ lcs = sl;
+ if(lpath) *lpath=pl;
+
+ if (ppcolumn)
+ *ppcolumn=column;
+ else
+ freeColumn(column);
+
+ return lcs;
+}
+
diff --git a/sumalibs/liblcs/_lcs.ext.2.c b/sumalibs/liblcs/_lcs.ext.2.c
new file mode 100644
index 0000000..381dc6a
--- /dev/null
+++ b/sumalibs/liblcs/_lcs.ext.2.c
@@ -0,0 +1,34 @@
+#include "_lcs.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+
+#include <stdio.h>
+
+
+
+
+#define VSIZE (8)
+#define VTYPE vInt16
+#define STYPE int16_t
+#define CMENB shrt
+#define VMODE false
+#define FASTLCSSCORE fastLCSScore16
+#define INSERT_REG _MM_INSERT_EPI16
+#define EXTRACT_REG _MM_EXTRACT_EPI16
+#define EQUAL_REG _MM_CMPEQ_EPI16
+#define GREATER_REG _MM_CMPGT_EPI16
+#define SMALLER_REG _MM_CMPLT_EPI16
+#define ADD_REG _MM_ADD_EPI16
+#define SUB_REG _MM_SUB_EPI16
+#define AND_REG _MM_AND_SI128
+#define ANDNOT_REG _MM_ANDNOT_SI128
+#define OR_REG _MM_OR_SI128
+#define SET_CONST _MM_SET1_EPI16
+#define GET_MAX _MM_MAX_EPI16
+#define GET_MIN _MM_MIN_EPI16
+#define MIN_SCORE INT16_MIN
+#define MAX_SCORE 32000
+
+#include "_lcs_fast.h"
diff --git a/sumalibs/liblcs/_lcs.ext.3.c b/sumalibs/liblcs/_lcs.ext.3.c
new file mode 100644
index 0000000..5c3a150
--- /dev/null
+++ b/sumalibs/liblcs/_lcs.ext.3.c
@@ -0,0 +1,34 @@
+#include "_lcs.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+
+#include <stdio.h>
+
+
+
+
+#define VSIZE (16)
+#define VTYPE vInt8
+#define STYPE int8_t
+#define CMENB byte
+#define VMODE true
+#define FASTLCSSCORE fastLCSScore8
+#define INSERT_REG _MM_INSERT_EPI8
+#define EXTRACT_REG _MM_EXTRACT_EPI8
+#define EQUAL_REG _MM_CMPEQ_EPI8
+#define GREATER_REG _MM_CMPGT_EPI8
+#define SMALLER_REG _MM_CMPLT_EPI8
+#define ADD_REG _MM_ADD_EPI8
+#define SUB_REG _MM_SUB_EPI8
+#define AND_REG _MM_AND_SI128
+#define ANDNOT_REG _MM_ANDNOT_SI128
+#define OR_REG _MM_OR_SI128
+#define SET_CONST _MM_SET1_EPI8
+#define GET_MAX _MM_MAX_EPI8
+#define GET_MIN _MM_MIN_EPI8
+#define MIN_SCORE INT8_MIN
+#define MAX_SCORE 127
+
+#include "_lcs_fast.h"
diff --git a/sumalibs/liblcs/_lcs.h b/sumalibs/liblcs/_lcs.h
new file mode 100644
index 0000000..cfc032f
--- /dev/null
+++ b/sumalibs/liblcs/_lcs.h
@@ -0,0 +1,29 @@
+#include "../libsse/_sse.h"
+
+#define bool char
+#define false (1==0)
+#define true (1==1)
+
+typedef struct {
+ int16_t size;
+
+ union { int16_t *shrt;
+ int8_t *byte;
+ } data;
+
+ union { int16_t *shrt;
+ int8_t *byte;
+ } score;
+
+
+} column_t, **column_pp, *column_p;
+
+column_p allocateColumn(int length,column_t *column, bool mode8bits);
+
+void freeColumn(column_p column);
+
+int fastLCSScore16(const char* seq1, const char* seq2,column_pp ppcolumn,int32_t* lpath);
+int fastLCSScore8(const char* seq1, const char* seq2,column_pp ppcolumn,int32_t* lpath);
+int simpleLCS(const char* seq1, const char* seq2,column_pp ppcolumn,int32_t* lpath);
+
+int fastLCSScore(const char* seq1, const char* seq2,column_pp column,int32_t* lpath);
diff --git a/sumalibs/liblcs/_lcs_fast.h b/sumalibs/liblcs/_lcs_fast.h
new file mode 100644
index 0000000..3d0ac00
--- /dev/null
+++ b/sumalibs/liblcs/_lcs_fast.h
@@ -0,0 +1,597 @@
+
+/*
+ * Print a SSE register for debug purpose
+ */
+
+#ifdef __SSE2__
+
+static void printreg(VTYPE r)
+{
+ STYPE a0,a1,a2,a3,a4,a5,a6,a7;
+#if VMODE
+ STYPE a8,a9,a10,a11,a12,a13,a14,a15;
+#endif
+
+ a0= EXTRACT_REG(r,0);
+ a1= EXTRACT_REG(r,1);
+ a2= EXTRACT_REG(r,2);
+ a3= EXTRACT_REG(r,3);
+ a4= EXTRACT_REG(r,4);
+ a5= EXTRACT_REG(r,5);
+ a6= EXTRACT_REG(r,6);
+ a7= EXTRACT_REG(r,7);
+#if VMODE
+ a8= EXTRACT_REG(r,8);
+ a9= EXTRACT_REG(r,9);
+ a10= EXTRACT_REG(r,10);
+ a11= EXTRACT_REG(r,11);
+ a12= EXTRACT_REG(r,12);
+ a13= EXTRACT_REG(r,13);
+ a14= EXTRACT_REG(r,14);
+ a15= EXTRACT_REG(r,15);
+#endif
+
+printf( "a00 :-> %7d %7d %7d %7d "
+ " %7d %7d %7d %7d "
+#if VMODE
+ "%7d %7d %7d %7d "
+ " %7d %7d %7d %7d "
+#endif
+ "\n"
+ , a0,a1,a2,a3,a4,a5,a6,a7
+#if VMODE
+ , a8,a9,a10,a11,a12,a13,a14,a15
+#endif
+);
+}
+
+/*
+ * set position p of a SSE register with the value v
+ */
+
+static inline VTYPE insert_reg(VTYPE r, STYPE v, int p)
+{
+ switch (p) {
+ case 0: return INSERT_REG(r,v,0);
+ case 1: return INSERT_REG(r,v,1);
+ case 2: return INSERT_REG(r,v,2);
+ case 3: return INSERT_REG(r,v,3);
+ case 4: return INSERT_REG(r,v,4);
+ case 5: return INSERT_REG(r,v,5);
+ case 6: return INSERT_REG(r,v,6);
+ case 7: return INSERT_REG(r,v,7);
+#if VMODE
+ case 8: return INSERT_REG(r,v,8);
+ case 9: return INSERT_REG(r,v,9);
+ case 10: return INSERT_REG(r,v,10);
+ case 11: return INSERT_REG(r,v,11);
+ case 12: return INSERT_REG(r,v,12);
+ case 13: return INSERT_REG(r,v,13);
+ case 14: return INSERT_REG(r,v,14);
+ case 15: return INSERT_REG(r,v,15);
+#endif
+ }
+ return _MM_SETZERO_SI128();
+}
+
+static inline STYPE extract_reg(VTYPE r, int p)
+{
+ switch (p) {
+ case 0: return EXTRACT_REG(r,0);
+ case 1: return EXTRACT_REG(r,1);
+ case 2: return EXTRACT_REG(r,2);
+ case 3: return EXTRACT_REG(r,3);
+ case 4: return EXTRACT_REG(r,4);
+ case 5: return EXTRACT_REG(r,5);
+ case 6: return EXTRACT_REG(r,6);
+ case 7: return EXTRACT_REG(r,7);
+#if VMODE
+ case 8: return EXTRACT_REG(r,8);
+ case 9: return EXTRACT_REG(r,9);
+ case 10: return EXTRACT_REG(r,10);
+ case 11: return EXTRACT_REG(r,11);
+ case 12: return EXTRACT_REG(r,12);
+ case 13: return EXTRACT_REG(r,13);
+ case 14: return EXTRACT_REG(r,14);
+ case 15: return EXTRACT_REG(r,15);
+#endif
+ }
+ return 0;
+}
+
+#define GET_H_SYMBOLE(s,p) ((p && p < lseq1) ? (s)[(p)-1]:255)
+#define GET_V_SYMBOLE(s,p) ((p && p < lseq2) ? (s)[(p)-1]:0)
+
+#define LSHIFT_SCORE(r) { r = _MM_SLLI_SI128((r),sizeof(STYPE)); }
+#define SET_H_SYMBOLE(r,p,s) { r = insert_reg((r),(STYPE)GET_H_SYMBOLE(seq1,(s)),(p)); }
+#define PUSH_V_SYMBOLE(r,s) { r = insert_reg(_MM_SLLI_SI128((r),sizeof(STYPE)),(STYPE)GET_V_SYMBOLE(seq2,(s)),0); }
+#define EQUAL(f1,f2) _MM_AND_SI128(EQUAL_REG((f1),(f2)),SET_CONST(1))
+
+int FASTLCSSCORE(const char* seq1, const char* seq2,column_pp ppcolumn,int32_t* lpath)
+{
+ int lseq1,lseq2; // length of the both sequences
+
+ int itmp; // tmp variables for swap
+ const char* stmp; //
+
+ int nbands; // Number of bands of width eight in the score matrix
+ int lastband; // width of the last band
+
+ // Register for scanning the score matrix
+ VTYPE minus1;
+ VTYPE minus2;
+ VTYPE current;
+
+ VTYPE left;
+ VTYPE top;
+ VTYPE diag;
+
+
+ VTYPE sminus1;
+ VTYPE sminus2;
+ VTYPE scurrent;
+
+ VTYPE sleft;
+ VTYPE stop;
+ VTYPE sdiag;
+
+ VTYPE way;
+ VTYPE onevect;
+ VTYPE maxvect;
+
+ VTYPE fhseq; // The fragment of the horizontal sequence
+ // to consider for aligment
+ VTYPE fvseq; // The fragment of the horizontal sequence
+ // to consider for aligment
+ VTYPE match;
+
+ int band;
+ int line;
+ int limit;
+
+ int lcs;
+
+ int h;
+ int i;
+
+ column_t *column;
+
+
+ // Made seq1 the longest sequences
+ lseq1=strlen(seq1);
+ lseq2=strlen(seq2);
+
+ if (lseq1 < 10 || lseq2 < 10)
+ return simpleLCS(seq1,seq2,ppcolumn,lpath);
+
+ if (lseq1 < lseq2)
+ {
+ itmp=lseq1;
+ lseq1=lseq2;
+ lseq2=itmp;
+
+ stmp=seq1;
+ seq1=seq2;
+ seq2=stmp;
+ }
+
+ // we add one to both lengths for taking into
+ // account the extra line and column in the score
+ // matrix
+
+ lseq1++;
+ lseq2++;
+
+ // a band sized to the smallest sequence is allocated
+
+ if (ppcolumn)
+ column = *ppcolumn;
+ else
+ column=NULL;
+
+ column = allocateColumn(lseq2,column,VMODE);
+
+ // Check memory allocation
+ if (column == NULL)
+ return -1;
+
+ for (i=0; i<lseq2;i++)
+ {
+ column->data.CMENB[i]=MIN_SCORE;
+ column->score.CMENB[i]=-1;
+ }
+
+ nbands = lseq1 / VSIZE; // You have VSIZE element in one SSE register
+ // Alignment will be realized in nbands
+
+ lastband = lseq1 - (nbands * VSIZE); // plus one of width lastband except if
+ // lastband==0
+
+ if (lastband) nbands++;
+ else lastband=VSIZE;
+
+ lastband--;
+
+// printf("seq1 : %s seq2 : %s\n",seq1,seq2);
+
+
+ minus2 = SET_CONST(MIN_SCORE);
+ minus1 = _MM_SETZERO_SI128();
+
+ sminus1= _MM_SETZERO_SI128();
+ sminus2= _MM_SETZERO_SI128();
+ onevect= SET_CONST(1);
+ maxvect= SET_CONST(MAX_SCORE);
+
+ h=0;
+
+ fhseq = _MM_SETZERO_SI128();
+ fvseq = _MM_SETZERO_SI128();
+
+ //
+ // Beginning of the first band
+ //
+
+ for (line = 0; line < VSIZE; line++,h++) // avant VSIZE - 1
+ {
+// printf("line= %4d h= %4d\n",line,h);
+ SET_H_SYMBOLE(fhseq,line,h)
+ PUSH_V_SYMBOLE(fvseq,line)
+ minus2 = insert_reg(minus2,0,h);
+ minus1 = insert_reg(minus1,MIN_SCORE,line); // 0 avant
+ match = EQUAL(fhseq,fvseq);
+
+ if (lpath)
+ {
+ sminus2 = insert_reg(sminus2,line-1,line); // Je ne suis pas certain de l'initialisation
+ sminus1 = insert_reg(sminus1,0,line);
+ }
+
+// printreg(fvseq);
+// printreg(fhseq);
+// printreg(match);
+// printf("================================\n");
+
+ current = minus1; // The best score is the upper one
+ // It cannot be the best as set to MIN_SCORE
+
+ left = minus1;
+
+// printf("Vert = "); printreg(current);
+
+
+ LSHIFT_SCORE(minus1) // I shift minus1 so now I'll compare with the left position
+ minus1=insert_reg(minus1,(column)->data.CMENB[line],0);
+
+ top=minus1;
+
+ if (lpath)
+ {
+ sleft=sminus1; // I store the path length corresponding to the upper path
+ LSHIFT_SCORE(sminus1) // I shift to prepare the score coming from the left side
+ sminus1=insert_reg(sminus1,(column)->score.CMENB[line],0);
+ stop=sminus1;
+ sdiag=sminus2;
+
+ }
+
+// printf("Horz = "); printreg(minus1);
+
+ current = GET_MAX(current,minus1); // Look for the best between upper and left
+
+// printf("BstHV= "); printreg(current);
+//
+// printf("Diag = "); printreg(ADD_REG(minus2,match));
+
+ diag=minus2;
+
+ // minus2 = ; // Minus2 contains the diagonal score, so I add the match reward
+ // Diag score are setup to 0 so this one will win on the first iteration
+ current = GET_MAX(current,ADD_REG(minus2,match));
+
+ if (lpath)
+ {
+// printf("\n");
+// printf("current: ");
+// printreg(current);
+// printf("current: ");
+// printreg(SUB_REG(current,match));
+// printf("diag : ");
+// printreg(diag);
+// printf("left : ");
+// printreg(left);
+// printf("top : ");
+// printreg(top);
+
+
+ way = EQUAL_REG(SUB_REG(current,match),diag);
+ scurrent= OR_REG(AND_REG(way,sdiag),
+ ANDNOT_REG(way,maxvect));
+// printf("sdiag : ");
+// printreg(scurrent);
+ way = EQUAL_REG(current,left);
+ scurrent= GET_MIN(scurrent,OR_REG(AND_REG(way,sleft),
+ ANDNOT_REG(way,maxvect)));
+
+// printf("sleft : ");
+// printreg(scurrent);
+ way = EQUAL_REG(current,top);
+ scurrent= GET_MIN(scurrent,OR_REG(AND_REG(way,stop),
+ ANDNOT_REG(way,maxvect)));
+// printf("stop : ");
+// printreg(scurrent);
+
+ scurrent= ADD_REG(scurrent,onevect);
+
+ sminus2=sminus1;
+ sminus1=scurrent;
+ }
+// printf("line %d :Best = ",line); printreg(current);
+//
+// printf("================================\n");
+
+ minus2=minus1;
+ minus1=current;
+
+// printf("min2 = "); printreg(minus2);
+// printf("min1 = "); printreg(minus1);
+// printf("================================\n");
+
+// printf("\n");
+// printf("sdiag : ");
+// printreg(sminus2);
+// printf("scur : ");
+// printreg(scurrent);
+// printf("current: ");
+// printreg(current);
+// printf("%8s\n",seq1);
+// printf("%8s\n",seq2);
+// printf("================================\n");
+
+
+ } ///// <<<<<<<<------- Fin du debut de la premiere bande
+
+
+// printf("================================\n");
+
+ (column)->data.CMENB[lseq2-VSIZE+line]=EXTRACT_REG(current,VSIZE-1);
+
+
+ if (lpath)
+ (column)->score.CMENB[lseq2-VSIZE+line]=EXTRACT_REG(scurrent,VSIZE-1);
+
+
+
+ for (band=0; band < nbands; band++)
+ {
+// SET_H_SYMBOLE(fhseq,line,h)
+// minus2 = insert_reg(minus2,0,line);
+// minus1 = insert_reg(minus1,MIN_SCORE,line); // 0 avant
+// h++;
+
+ for (; line < lseq2; line++)
+ {
+// printf("Je tourne avec line= %d \n",line);
+ PUSH_V_SYMBOLE(fvseq,line)
+
+ match = EQUAL(fhseq,fvseq);
+
+// printreg(fvseq);
+// printreg(fhseq);
+// printreg(match);
+// printf("================================\n");
+
+ current = minus1;
+
+ left = minus1;
+
+ // Store the last current score in extra column
+ (column)->data.CMENB[line-VSIZE]=EXTRACT_REG(current,VSIZE-1);
+ LSHIFT_SCORE(minus1)
+ minus1=insert_reg(minus1,(column)->data.CMENB[line],0);
+
+ top = minus1;
+
+// printf("Vert = "); printreg(current);
+
+ if (lpath)
+ {
+ sleft= sminus1;
+ (column)->score.CMENB[line-VSIZE]=EXTRACT_REG(scurrent,VSIZE-1);
+ LSHIFT_SCORE(sminus1)
+ sminus1=insert_reg(sminus1,(column)->score.CMENB[line],0);
+ stop=sminus1;
+ sdiag=sminus2;
+ }
+
+// printf("line = %d --> get = %d\n",line,(column)->data.CMENB[line]);
+
+// printf("Horz = "); printreg(minus1);
+
+ current = GET_MAX(current,minus1);
+
+ diag=minus2;
+
+ current = GET_MAX(current,ADD_REG(minus2,match));
+
+ if (lpath)
+ {
+// printf("\n");
+// printf("current: ");
+// printreg(current);
+// printf("current: ");
+// printreg(SUB_REG(current,match));
+// printf("diag : ");
+// printreg(diag);
+// printf("left : ");
+// printreg(left);
+// printf("top : ");
+// printreg(top);
+
+ way = EQUAL_REG(SUB_REG(current,match),diag);
+ scurrent= OR_REG(AND_REG(way,sdiag),
+ ANDNOT_REG(way,maxvect));
+
+// printf("sdiag : ");
+// printreg(scurrent);
+
+ way = EQUAL_REG(current,left);
+ scurrent= GET_MIN(scurrent,OR_REG(AND_REG(way,sleft),
+ ANDNOT_REG(way,maxvect)));
+
+// printf("sleft : ");
+// printreg(scurrent);
+
+ way = EQUAL_REG(current,top);
+ scurrent= GET_MIN(scurrent,OR_REG(AND_REG(way,stop),
+ ANDNOT_REG(way,maxvect)));
+
+// printf("stop : ");
+// printreg(scurrent);
+
+ scurrent= ADD_REG(scurrent,onevect);
+
+ sminus2=sminus1;
+ sminus1=scurrent;
+ }
+
+ minus2=minus1;
+ minus1=current;
+
+// printf("\n");
+// printf("sdiag : ");
+// printreg(sminus2);
+// printf("scur : ");
+// printreg(scurrent);
+// printf("current: ");
+// printreg(current);
+// printf("%8s\n",seq1);
+// printf("%8s\n",seq2);
+ }
+// printf("================================\n");
+
+ // end of the band and beginnig of the next one
+
+ limit=(band==(nbands-1)) ? lastband:VSIZE;
+
+ for (line = 0; line < limit; line++,h++)
+ {
+// printf("Je fini avec line= %d \n",line);
+
+ SET_H_SYMBOLE(fhseq,line,h)
+ PUSH_V_SYMBOLE(fvseq,line)
+
+
+ minus2 = insert_reg(minus2,MIN_SCORE,line);
+ minus1 = insert_reg(minus1,MIN_SCORE,line);
+ current = minus1;
+ left=minus1;
+
+ match = EQUAL(fhseq,fvseq);
+
+ if (lpath)
+ {
+ sminus2 = insert_reg(sminus2,lseq2-VSIZE+line,line);
+ sminus1 = insert_reg(sminus1,h,line);
+ sleft= sminus1;
+ }
+
+
+// printf("\n");
+// printf("fhseq = "); printreg(fhseq);
+// printf("fvseq = "); printreg(fvseq);
+// printf("----------------------------------------------------------------\n");
+// printf("match = "); printreg(match);
+
+
+ (column)->data.CMENB[lseq2-VSIZE+line]=EXTRACT_REG(current,VSIZE-1);
+ LSHIFT_SCORE(minus1)
+ minus1=insert_reg(minus1,(column)->data.CMENB[line],0);
+ top=minus1;
+
+ current = GET_MAX(current,minus1);
+
+ if (lpath)
+ {
+ (column)->score.CMENB[lseq2-VSIZE+line]=EXTRACT_REG(scurrent,VSIZE-1);
+ LSHIFT_SCORE(sminus1)
+ sminus1=insert_reg(sminus1,(column)->score.CMENB[line],0);
+ stop=sminus1;
+ sdiag=sminus2;
+
+ way = EQUAL_REG(current,minus1);
+
+ scurrent= OR_REG(AND_REG(way,sminus1),
+ ANDNOT_REG(way,scurrent));
+ }
+
+
+ diag=minus2;
+
+ current = GET_MAX(current,ADD_REG(minus2,match));
+
+ if (lpath)
+ {
+ way = EQUAL_REG(SUB_REG(current,match),diag);
+ scurrent= OR_REG(AND_REG(way,sdiag),
+ ANDNOT_REG(way,maxvect));
+
+ way = EQUAL_REG(current,left);
+ scurrent= GET_MIN(scurrent,OR_REG(AND_REG(way,sleft),
+ ANDNOT_REG(way,maxvect)));
+
+ way = EQUAL_REG(current,top);
+ scurrent= GET_MIN(scurrent,OR_REG(AND_REG(way,stop),
+ ANDNOT_REG(way,maxvect)));
+
+ scurrent= ADD_REG(scurrent,onevect);
+
+ sminus2=sminus1;
+ sminus1=scurrent;
+ }
+
+// printf("currt = "); printreg(current);
+
+ minus2=minus1;
+ minus1=current;
+
+// printf("\n");
+// printf("sdiag : ");
+// printreg(sminus2);
+// printf("scur : ");
+// printreg(scurrent);
+// printf("current: ");
+// printreg(current);
+// printf("%8s\n",seq1);
+// printf("%8s\n",seq2);
+
+// printf("Je stocke line= %d la valeur %d\n",lseq2-VSIZE+line,(column)->data.CMENB[lseq2-VSIZE+line]);
+ }
+
+ }
+
+// printf("\n");
+// printf("line = %d, h= %d, lastband = %d\n",line,h,lastband);
+// printf("currt = "); printreg(current);
+ lcs = extract_reg(current,lastband);
+
+ if(lpath)
+ *lpath= extract_reg(scurrent,lastband);
+// printf("lastband = %d (%d) lcs = %d\n",lastband,lseq2,lcs);
+
+ if (ppcolumn)
+ *ppcolumn=column;
+ else
+ freeColumn(column);
+
+ return lcs;
+}
+
+#else
+int FASTLCSSCORE(const char* seq1, const char* seq2,column_pp ppcolumn,int32_t* lpath)
+{
+ return simpleLCS(seq1,seq2,ppcolumn,lpath);
+}
+
+#endif /* __SSE2__ */
+
diff --git a/sumalibs/liblcs/banded_LCS_alignment.c b/sumalibs/liblcs/banded_LCS_alignment.c
new file mode 100644
index 0000000..0fae829
--- /dev/null
+++ b/sumalibs/liblcs/banded_LCS_alignment.c
@@ -0,0 +1,211 @@
+/*
+ * banded_LCS_alignment.c
+ *
+ * Created on: 7 nov. 2012
+ * Author: merciece
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include "../libutils/utilities.h"
+
+
+typedef struct {
+ int score;
+ int l_path;
+}infos;
+
+
+int calculateScore(char nuc1, char nuc2)
+{
+ return(nuc1 == nuc2);
+}
+
+infos** banded_align(char *seq1, char *seq2, int l1, int l2, int bandLengthRight, int bandLengthLeft)
+{
+ int i, j;
+ //int c;
+ //double id;
+ int start, end;
+ int diag_score, delete, insert, mismatch;
+ int l_path, l_path_i, l_path_d;
+ int bestScore;
+ int mismatch_margin;
+ int stop;
+ int diag_index;
+ infos **matrix;
+
+ l1++;
+ l2++;
+ mismatch_margin = bandLengthLeft; // the biggest one
+ diag_index = l1-l2; // diagonal index
+ stop=0;
+
+ //fprintf(stderr,"\nseq1 = %s, seq2=%s, bandLengthR = %d, bandLengthL = %d", seq1, seq2, bandLengthRight, bandLengthLeft);
+
+ // Matrix initialization~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ matrix = (infos**) malloc(l1 * sizeof(infos*));
+ for (i = 0; i < l1; i++)
+ matrix[i] = (infos*) malloc(l2 * sizeof(infos));
+
+ for (i = 0; i < l1; i++)
+ for (j = 0; j < l2; j++)
+ {
+ matrix[i][j].score = 0;
+ matrix[i][j].l_path = 0;
+ }
+
+ for (i = 0; i < l1; i++)
+ matrix[i][0].l_path = i;
+
+ for (j = 0; j < l2; j++)
+ matrix[0][j].l_path = j;
+
+ // Matrix initialized~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ for (i = 1; i < l1; i++)
+ {
+ start = i - bandLengthLeft;
+ if (start < 1)
+ start = 1;
+ end = i+bandLengthRight+1;
+ if (end > l2)
+ end = l2;
+
+ for (j = start; j < end; j++)
+ {
+ delete = matrix[i-1][j].score;
+ l_path_d = matrix[i-1][j].l_path + 1;
+ insert = matrix[i][j-1].score;
+ l_path_i = matrix[i][j-1].l_path + 1;
+ mismatch = 0;
+
+ diag_score = calculateScore(seq1[i-1], seq2[j-1]);
+ bestScore = matrix[i-1][j-1].score + diag_score;
+ l_path = matrix[i-1][j-1].l_path + 1;
+ if (diag_score == 0) // mismatch
+ mismatch = 1;
+
+ if ((insert > bestScore) || ((insert == bestScore) && (l_path_i < l_path)))
+ {
+ bestScore = matrix[i][j-1].score;
+ l_path = l_path_i;
+ mismatch = 0;
+ }
+
+ if ((delete > bestScore) || ((delete == bestScore) && (l_path_d < l_path)))
+ {
+ bestScore = delete;
+ l_path = l_path_d;
+ mismatch = 0;
+ }
+
+ /*if (((i-j) - diag_index == 0) && (mismatch == 1))
+ {
+ //fprintf(stderr, "\nR = %d, L = %d\n", bandLengthRight, bandLengthLeft);
+ if (bandLengthRight+bandLengthLeft == 0)
+ {
+ stop = 1;
+ //fprintf(stderr, "\nBREAKING LOOPS\n");
+ break;
+ }
+ if (bandLengthRight != 0)
+ bandLengthRight = bandLengthRight - 1;
+ if (bandLengthLeft != 0)
+ bandLengthLeft = bandLengthLeft - 1;
+ }*/
+
+ (matrix[i][j]).score = bestScore;
+ (matrix[i][j]).l_path = l_path;
+ }
+
+ //if ((bandLengthRight + bandLengthLeft == 0) && ((matrix[i][j].l_path - matrix[i][j].score) > mismatch_margin))
+ if (stop==1)
+ break;
+ }
+ return(matrix);
+}
+
+
+void calculateBandLength(int l1, int l2, double threshold, int* bandLengthRight, int* bandLengthLeft)
+{
+ (*bandLengthLeft) = round(-l1 * threshold + l1);
+ (*bandLengthRight) = round(-l1 * threshold + l2);
+
+// fprintf(stderr,"\nR=%d, L=%d", (*bandLengthRight), (*bandLengthLeft));
+}
+
+
+double calculateId(infos** matrix, int len1, int len2)
+{
+ double id;
+ int l_ali;
+ int l_lcs;
+
+ l_lcs = matrix[len1][len2].score;
+ l_ali = matrix[len1][len2].l_path;
+
+ if (l_lcs == 0)
+ id = 0.0;
+ else
+ id = (double) l_lcs / (double) l_ali;
+
+ //fprintf(stderr, "\n%d, %d\n", l_lcs, l_ali);
+ return(id);
+}
+
+
+double banded_lcs_align(int16_t* seq1, int16_t* seq2, int l1, int l2, double threshold, BOOL n, int ref, BOOL lcsmode, int16_t* address)
+{
+ double id;
+ int bandLengthRight, bandLengthLeft;
+ int i,j;
+
+ char* s1;
+ char* s2;
+
+ s1 = (char*) malloc(l1*sizeof(char)+1);
+ s2 = (char*) malloc(l2*sizeof(char)+1);
+
+ for (i=l1-1, j=0; i>=0, j<l1; i--, j++)
+ *(s1+i) = (char) *(seq1+j);
+
+ for (i=0; i<l2; i++)
+ *(s2+i) = (char) *(seq2+i);
+
+ *(s1+l1) = 0;
+ *(s2+l2) = 0;
+
+ //fprintf(stderr, "\nl1=%d, %s\nl2=%d, %s\n", l1, s1, l2, s2);
+
+ infos** matrix;
+
+ calculateBandLength(l1, l2, threshold, &bandLengthRight, &bandLengthLeft);
+
+ matrix = banded_align(s1, s2, l1, l2, bandLengthRight, bandLengthLeft);
+
+ /*fprintf(stderr, "\n");
+ for (i = 0; i <= l1; i++)
+ {
+ fprintf(stderr, "\n");
+ for (j = 0; j <= l2; j++)
+ fprintf(stderr, "%d/%d\t", matrix[i][j].score, matrix[i][j].l_path); //matrix[i][j].stop);
+ }
+ fprintf(stderr, "\n");*/
+
+ id = calculateId(matrix, l1, l2);
+
+ for (i = 0; i <= l1; i++)
+ free(matrix[i]);
+
+ free(matrix);
+
+ free(s1);
+ free(s2);
+
+ //fprintf(stderr, "\nscore = %lf\n", id);
+
+ return(id);
+}
diff --git a/sumalibs/liblcs/banded_LCS_alignment.h b/sumalibs/liblcs/banded_LCS_alignment.h
new file mode 100644
index 0000000..14183c9
--- /dev/null
+++ b/sumalibs/liblcs/banded_LCS_alignment.h
@@ -0,0 +1,9 @@
+/*
+ * banded_LCS_alignment.h
+ *
+ * Created on: november 8, 2012
+ * Author: mercier
+ */
+
+
+double banded_lcs_align(int16_t* seq1, int16_t* seq2, int l1, int l2, double threshold, BOOL n, int ref, BOOL lcsmode, int16_t* address);
diff --git a/sumalibs/liblcs/sse_banded_LCS_alignment.c b/sumalibs/liblcs/sse_banded_LCS_alignment.c
new file mode 100644
index 0000000..425c6f7
--- /dev/null
+++ b/sumalibs/liblcs/sse_banded_LCS_alignment.c
@@ -0,0 +1,724 @@
+/*
+ * sse_banded_LCS_alignment.c
+ *
+ * Created on: 7 nov. 2012
+ * Author: celine mercier
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <stdint.h>
+#include "../libutils/utilities.h"
+#include "../libsse/_sse.h"
+
+
+
+/*static void printreg(__m128i r)
+{
+ int16_t a0,a1,a2,a3,a4,a5,a6,a7;
+
+ a0= _MM_EXTRACT_EPI16(r,0);
+ a1= _MM_EXTRACT_EPI16(r,1);
+ a2= _MM_EXTRACT_EPI16(r,2);
+ a3= _MM_EXTRACT_EPI16(r,3);
+ a4= _MM_EXTRACT_EPI16(r,4);
+ a5= _MM_EXTRACT_EPI16(r,5);
+ a6= _MM_EXTRACT_EPI16(r,6);
+ a7= _MM_EXTRACT_EPI16(r,7);
+
+fprintf(stderr, "a00 :-> %7d %7d %7d %7d "
+ " %7d %7d %7d %7d "
+ "\n"
+ , a0,a1,a2,a3,a4,a5,a6,a7
+ );
+}
+*/
+
+static inline int extract_reg(__m128i r, int p)
+{
+ switch (p) {
+ case 0: return(_MM_EXTRACT_EPI16(r,0));
+ case 1: return(_MM_EXTRACT_EPI16(r,1));
+ case 2: return(_MM_EXTRACT_EPI16(r,2));
+ case 3: return(_MM_EXTRACT_EPI16(r,3));
+ case 4: return(_MM_EXTRACT_EPI16(r,4));
+ case 5: return(_MM_EXTRACT_EPI16(r,5));
+ case 6: return(_MM_EXTRACT_EPI16(r,6));
+ case 7: return(_MM_EXTRACT_EPI16(r,7));
+ }
+ return(0);
+}
+
+
+void sse_banded_align_lcs_and_ali_len(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal, int16_t* address, double* lcs_length, int* ali_length)
+{
+ register int j;
+ int k1, k2;
+ int max, diff;
+ int l_reg, l_loc;
+ int line;
+ int numberOfRegistersPerLine;
+ int numberOfRegistersFor3Lines;
+
+ BOOL even_line;
+ BOOL odd_line;
+ BOOL even_BLL;
+ BOOL odd_BLL;
+
+ um128* SSEregisters;
+ um128* p_diag;
+ um128* p_gap1;
+ um128* p_gap2;
+ um128* p_diag_j;
+ um128* p_gap1_j;
+ um128* p_gap2_j;
+ um128 current;
+
+ um128* l_ali_SSEregisters;
+ um128* p_l_ali_diag;
+ um128* p_l_ali_gap1;
+ um128* p_l_ali_gap2;
+ um128* p_l_ali_diag_j;
+ um128* p_l_ali_gap1_j;
+ um128* p_l_ali_gap2_j;
+ um128 l_ali_current;
+
+ um128 nucs1;
+ um128 nucs2;
+ um128 scores;
+
+ um128 boolean_reg;
+
+ // Initialisations
+
+ odd_BLL = bandLengthLeft & 1;
+ even_BLL = !odd_BLL;
+
+ max = INT16_MAX - l1;
+
+ numberOfRegistersPerLine = bandLengthTotal / 8;
+ numberOfRegistersFor3Lines = 3 * numberOfRegistersPerLine;
+
+ SSEregisters = (um128*) calloc(numberOfRegistersFor3Lines * 2, sizeof(um128));
+ l_ali_SSEregisters = SSEregisters + numberOfRegistersFor3Lines;
+
+ // preparer registres SSE
+
+ for (j=0; j<numberOfRegistersFor3Lines; j++)
+ l_ali_SSEregisters[j].i = _MM_LOAD_SI128(address+j*8);
+
+ p_diag = SSEregisters;
+ p_gap1 = SSEregisters+numberOfRegistersPerLine;
+ p_gap2 = SSEregisters+2*numberOfRegistersPerLine;
+
+ p_l_ali_diag = l_ali_SSEregisters;
+ p_l_ali_gap1 = l_ali_SSEregisters+numberOfRegistersPerLine;
+ p_l_ali_gap2 = l_ali_SSEregisters+2*numberOfRegistersPerLine;
+
+ // Loop on diagonals = 'lines' :
+ for (line=2; line <= l1+l2; line++)
+ {
+ odd_line = line & 1;
+ even_line = !odd_line;
+
+ // loop on the registers of a line :
+ for (j=0; j < numberOfRegistersPerLine; j++)
+ {
+ p_diag_j = p_diag+j;
+ p_gap1_j = p_gap1+j;
+ p_gap2_j = p_gap2+j;
+ p_l_ali_diag_j = p_l_ali_diag+j;
+ p_l_ali_gap1_j = p_l_ali_gap1+j;
+ p_l_ali_gap2_j = p_l_ali_gap2+j;
+
+ // comparing nucleotides for diagonal scores :
+
+ // k1 = position of the 1st nucleotide to align for seq1 and k2 = position of the 1st nucleotide to align for seq2
+ if (odd_line && odd_BLL)
+ k1 = (line / 2) + ((bandLengthLeft+1) / 2) - j*8;
+ else
+ k1 = (line / 2) + (bandLengthLeft/2) - j*8;
+
+ k2 = line - k1 - 1;
+
+ nucs1.i = _MM_LOADU_SI128(seq1+l1-k1);
+ nucs2.i = _MM_LOADU_SI128(seq2+k2);
+
+/* fprintf(stderr, "\nnucs, r %d, k1 = %d, k2 = %d\n", j, k1, k2);
+ printreg(nucs1.i);
+ printreg(nucs2.i);
+*/
+
+ // computing diagonal score :
+ scores.i = _MM_AND_SI128(_MM_CMPEQ_EPI16(nucs1.i, nucs2.i), _MM_SET1_EPI16(1));
+ current.i = _MM_ADDS_EPU16(p_diag_j->i, scores.i);
+
+ // Computing alignment length
+
+ l_ali_current.i = p_l_ali_diag_j->i;
+ boolean_reg.i = _MM_CMPGT_EPI16(p_gap1_j->i, current.i);
+ l_ali_current.i = _MM_OR_SI128(
+ _MM_AND_SI128(p_l_ali_gap1_j->i, boolean_reg.i),
+ _MM_ANDNOT_SI128(boolean_reg.i, l_ali_current.i));
+ current.i = _MM_OR_SI128(
+ _MM_AND_SI128(p_gap1_j->i, boolean_reg.i),
+ _MM_ANDNOT_SI128(boolean_reg.i, current.i));
+ boolean_reg.i = _MM_AND_SI128(
+ _MM_CMPEQ_EPI16(p_gap1_j->i, current.i),
+ _MM_CMPLT_EPI16(p_l_ali_gap1_j->i, l_ali_current.i));
+ l_ali_current.i = _MM_OR_SI128(
+ _MM_AND_SI128(p_l_ali_gap1_j->i, boolean_reg.i),
+ _MM_ANDNOT_SI128(boolean_reg.i, l_ali_current.i));
+ current.i = _MM_OR_SI128(
+ _MM_AND_SI128(p_gap1_j->i, boolean_reg.i),
+ _MM_ANDNOT_SI128(boolean_reg.i, current.i));
+ boolean_reg.i = _MM_CMPGT_EPI16(p_gap2_j->i, current.i);
+ l_ali_current.i = _MM_OR_SI128(
+ _MM_AND_SI128(p_l_ali_gap2_j->i, boolean_reg.i),
+ _MM_ANDNOT_SI128(boolean_reg.i, l_ali_current.i));
+ current.i = _MM_OR_SI128(
+ _MM_AND_SI128(p_gap2_j->i, boolean_reg.i),
+ _MM_ANDNOT_SI128(boolean_reg.i, current.i));
+ boolean_reg.i = _MM_AND_SI128(
+ _MM_CMPEQ_EPI16(p_gap2_j->i, current.i),
+ _MM_CMPLT_EPI16(p_l_ali_gap2_j->i, l_ali_current.i));
+ l_ali_current.i = _MM_OR_SI128(
+ _MM_AND_SI128(p_l_ali_gap2_j->i, boolean_reg.i),
+ _MM_ANDNOT_SI128(boolean_reg.i, l_ali_current.i));
+ current.i = _MM_OR_SI128(
+ _MM_AND_SI128(p_gap2_j->i, boolean_reg.i),
+ _MM_ANDNOT_SI128(boolean_reg.i, current.i));
+
+
+/* fprintf(stderr, "\nline = %d", line);
+ fprintf(stderr, "\nDiag, r %d : ", j);
+ printreg((*(p_diag_j)).i);
+ fprintf(stderr, "Gap1 : ");
+ printreg((*(p_gap1_j)).i);
+ fprintf(stderr, "Gap2 : ");
+ printreg((*(p_gap2_j)).i);
+ fprintf(stderr, "current : ");
+ printreg(current.i);
+ fprintf(stderr, "L ALI\nDiag r %d : ", j);
+ printreg((*(p_l_ali_diag_j)).i);
+ fprintf(stderr, "Gap1 : ");
+ printreg((*(p_l_ali_gap1_j)).i);
+ fprintf(stderr, "Gap2 : ");
+ printreg((*(p_l_ali_gap2_j)).i);
+ fprintf(stderr, "current : ");
+ printreg(l_ali_current.i);
+*/
+
+ // diag = gap1 and gap1 = current
+ p_diag_j->i = p_gap1_j->i;
+ p_gap1_j->i = current.i;
+
+ // l_ali_diag = l_ali_gap1 and l_ali_gap1 = l_ali_current+1
+ p_l_ali_diag_j->i = p_l_ali_gap1_j->i;
+ p_l_ali_gap1_j->i = _MM_ADD_EPI16(l_ali_current.i, _MM_SET1_EPI16(1));
+ }
+
+ // shifts for gap2, to do only once all the registers of a line have been computed Copier gap2 puis le charger depuis la copie?
+
+ for (j=0; j < numberOfRegistersPerLine; j++)
+ {
+ if ((odd_line && even_BLL) || (even_line && odd_BLL))
+ {
+ p_gap2[j].i = _MM_LOADU_SI128((p_gap1[j].s16)-1);
+ p_l_ali_gap2[j].i = _MM_LOADU_SI128((p_l_ali_gap1[j].s16)-1);
+ if (j == 0)
+ {
+ p_gap2[j].i = _MM_INSERT_EPI16(p_gap2[j].i, 0, 0);
+ p_l_ali_gap2[j].i = _MM_INSERT_EPI16(p_l_ali_gap2[j].i, max, 0);
+ }
+ }
+ else
+ {
+ p_gap2[j].i = _MM_LOADU_SI128(p_gap1[j].s16+1);
+ p_l_ali_gap2[j].i = _MM_LOADU_SI128(p_l_ali_gap1[j].s16+1);
+ if (j == numberOfRegistersPerLine - 1)
+ {
+ p_gap2[j].i = _MM_INSERT_EPI16(p_gap2[j].i, 0, 7);
+ p_l_ali_gap2[j].i = _MM_INSERT_EPI16(p_l_ali_gap2[j].i, max, 7);
+ }
+ }
+ }
+ // end shifts for gap2
+
+ }
+
+/* /// Recovering LCS and alignment lengths \\\ */
+
+ // finding the location of the results in the registers :
+ diff = l1-l2;
+ if ((diff & 1) && odd_BLL)
+ l_loc = (int) floor((double)(bandLengthLeft) / (double)2) - floor((double)(diff) / (double)2);
+ else
+ l_loc = (int) floor((double)(bandLengthLeft) / (double)2) - ceil((double)(diff) / (double)2);
+
+ l_reg = (int)floor((double)l_loc/(double)8.0);
+ //fprintf(stderr, "\nl_reg = %d, l_loc = %d\n", l_reg, l_loc);
+ l_loc = l_loc - l_reg*8;
+
+ // extracting the results from the registers :
+ *lcs_length = extract_reg(p_gap1[l_reg].i, l_loc);
+ *ali_length = extract_reg(p_l_ali_gap1[l_reg].i, l_loc) - 1;
+
+ // freeing the registers
+ free(SSEregisters);
+}
+
+
+double sse_banded_align_just_lcs(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal)
+{
+ register int j;
+ int k1, k2;
+ int diff;
+ int l_reg, l_loc;
+ int16_t l_lcs;
+ int line;
+ int numberOfRegistersPerLine;
+ int numberOfRegistersFor3Lines;
+
+ BOOL even_line;
+ BOOL odd_line;
+ BOOL even_BLL;
+ BOOL odd_BLL;
+
+ um128* SSEregisters;
+ um128* p_diag;
+ um128* p_gap1;
+ um128* p_gap2;
+ um128* p_diag_j;
+ um128* p_gap1_j;
+ um128* p_gap2_j;
+ um128 current;
+
+ um128 nucs1;
+ um128 nucs2;
+ um128 scores;
+
+ // Initialisations
+
+ odd_BLL = bandLengthLeft & 1;
+ even_BLL = !odd_BLL;
+
+ numberOfRegistersPerLine = bandLengthTotal / 8;
+ numberOfRegistersFor3Lines = 3 * numberOfRegistersPerLine;
+
+ SSEregisters = malloc(numberOfRegistersFor3Lines * sizeof(um128));
+
+ // preparer registres SSE
+
+ for (j=0; j<numberOfRegistersFor3Lines; j++)
+ (*(SSEregisters+j)).i = _MM_SETZERO_SI128();
+
+ p_diag = SSEregisters;
+ p_gap1 = SSEregisters+numberOfRegistersPerLine;
+ p_gap2 = SSEregisters+2*numberOfRegistersPerLine;
+
+ // Loop on diagonals = 'lines' :
+ for (line=2; line <= l1+l2; line++)
+ {
+ odd_line = line & 1;
+ even_line = !odd_line;
+
+ // loop on the registers of a line :
+ for (j=0; j < numberOfRegistersPerLine; j++)
+ {
+ p_diag_j = p_diag+j;
+ p_gap1_j = p_gap1+j;
+ p_gap2_j = p_gap2+j;
+
+ // comparing nucleotides for diagonal scores :
+
+ // k1 = position of the 1st nucleotide to align for seq1 and k2 = position of the 1st nucleotide to align for seq2
+ if (odd_line && odd_BLL)
+ k1 = (line / 2) + ((bandLengthLeft+1) / 2) - j*8;
+ else
+ k1 = (line / 2) + (bandLengthLeft/2) - j*8;
+
+ k2 = line - k1 - 1;
+
+ nucs1.i = _MM_LOADU_SI128(seq1+l1-k1);
+ nucs2.i = _MM_LOADU_SI128(seq2+k2);
+
+ // computing diagonal score :
+ scores.i = _MM_AND_SI128(_MM_CMPEQ_EPI16(nucs1.i, nucs2.i), _MM_SET1_EPI16(1));
+ current.i = _MM_ADDS_EPU16((*(p_diag_j)).i, scores.i);
+
+ // current = max(gap1, current)
+ current.i = _MM_MAX_EPI16((*(p_gap1_j)).i, current.i);
+
+ // current = max(gap2, current)
+ current.i = _MM_MAX_EPI16((*(p_gap2_j)).i, current.i);
+
+ // diag = gap1 and gap1 = current
+ (*(p_diag_j)).i = (*(p_gap1_j)).i;
+ (*(p_gap1_j)).i = current.i;
+ }
+
+ // shifts for gap2, to do only once all the registers of a line have been computed
+
+ for (j=0; j < numberOfRegistersPerLine; j++)
+ {
+ if ((odd_line && even_BLL) || (even_line && odd_BLL))
+ {
+ (*(p_gap2+j)).i = _MM_LOADU_SI128(((*(p_gap1+j)).s16)-1);
+ if (j == 0)
+ {
+ (*(p_gap2+j)).i = _MM_INSERT_EPI16((*(p_gap2+j)).i, 0, 0);
+ }
+ }
+ else
+ {
+ (*(p_gap2+j)).i = _MM_LOADU_SI128(((*(p_gap1+j)).s16)+1);
+ if (j == numberOfRegistersPerLine - 1)
+ {
+ (*(p_gap2+j)).i = _MM_INSERT_EPI16((*(p_gap2+j)).i, 0, 7);
+ }
+ }
+ }
+ // end shifts for gap2
+
+ }
+
+/* /// Recovering LCS and alignment lengths \\\ */
+
+ // finding the location of the results in the registers :
+ diff = l1-l2;
+ if ((diff & 1) && odd_BLL)
+ l_loc = (int) floor((double)(bandLengthLeft) / (double)2) - floor((double)(diff) / (double)2);
+ else
+ l_loc = (int) floor((double)(bandLengthLeft) / (double)2) - ceil((double)(diff) / (double)2);
+
+ l_reg = (int)floor((double)l_loc/(double)8.0);
+ //fprintf(stderr, "\nl_reg = %d, l_loc = %d\n", l_reg, l_loc);
+ l_loc = l_loc - l_reg*8;
+
+ // extracting LCS from the registers :
+ l_lcs = extract_reg((*(p_gap1+l_reg)).i, l_loc);
+
+ // freeing the registers
+ free(SSEregisters);
+
+ return((double) l_lcs);
+}
+
+
+inline void calculateBandLengths(int l1, int l2, int* bandLengthRight, int* bandLengthLeft, int LCSmin)
+{
+ (*bandLengthLeft) = l1 - LCSmin;
+ (*bandLengthRight) = l2 - LCSmin;
+}
+
+
+int calculateLCSmin(int l1, int l2, double threshold, BOOL normalize, int reference, BOOL lcsmode)
+{
+ int LCSmin;
+
+ if (threshold > 0)
+ {
+ if (normalize)
+ {
+ if (reference == MINLEN)
+ LCSmin = threshold*l2;
+ else // ref = maxlen or alilen
+ LCSmin = threshold*l1;
+ }
+ else if (lcsmode)
+ LCSmin = threshold;
+ else if ((reference == MINLEN)) // not lcsmode
+ LCSmin = l2 - threshold;
+ else // not lcsmode and ref = maxlen or alilen
+ LCSmin = l1 - threshold;
+ }
+ else
+ LCSmin = 0;
+
+ return(LCSmin);
+}
+
+
+int calculateSSEBandLength(int bandLengthRight, int bandLengthLeft)
+{
+// *bandLengthTotal= (double) floor(bandLengthRight + bandLengthLeft) / 2.0 + 1;
+ int bandLengthTotal= (double)(bandLengthRight + bandLengthLeft) / 2.0 + 1.0;
+
+ return (bandLengthTotal & (~ (int)7)) + (( bandLengthTotal & (int)7) ? 8:0); // Calcule le multiple de 8 superieur
+}
+
+
+int calculateSizeToAllocate(int maxLen, int minLen, int LCSmin)
+{
+ int size;
+ int notUsed;
+
+ calculateBandLengths(maxLen, minLen, ¬Used, &size, LCSmin); // max size = max left band length * 2
+
+ //fprintf(stderr, "\nsize for address before %8 = %d", size);
+
+ size*= 2;
+ size = (size & (~ (int)7)) + (( size & (int)7) ? 8:0); // Calcule le multiple de 8 superieur
+ size*= 3;
+ size+= 16;
+
+ //fprintf(stderr, "\nsize for address = %d", size);
+
+ return(size*sizeof(int16_t));
+}
+
+
+void iniSeq(int16_t* seq, int size, int16_t iniValue)
+{
+ int16_t *target=seq;
+ int16_t *end = target + (size_t)size;
+
+ for (; target < end; target++)
+ *target = iniValue;
+}
+
+
+void putSeqInSeq(int16_t* seq, char* s, int l, BOOL reverse)
+{
+ int16_t *target=seq;
+ int16_t *end = target + (size_t)l;
+ char *source=s;
+
+ if (reverse)
+ for (source=s + (size_t)l-1; target < end; target++, source--)
+ *target=*source;
+ else
+ for (; target < end; source++,target++)
+ *target=*source;
+}
+
+
+void initializeAddressWithGaps(int16_t* address, int bandLengthTotal, int bandLengthLeft, int l1)
+{
+ int i;
+ int address_00, x_address_10, address_01, address_01_shifted;
+ int numberOfRegistersPerLine;
+ int bm;
+ int value=INT16_MAX-l1;
+
+ numberOfRegistersPerLine = bandLengthTotal / 8;
+ bm = bandLengthLeft%2;
+
+ for (i=0; i < (3*numberOfRegistersPerLine*8); i++)
+ address[i] = value;
+
+ // 0,0 set to 1 and 0,1 and 1,0 set to 2
+
+ address_00 = bandLengthLeft / 2;
+
+ x_address_10 = address_00 + bm - 1;
+ address_01 = numberOfRegistersPerLine*8 + x_address_10;
+
+ address_01_shifted = numberOfRegistersPerLine*16 + address_00 - bm;
+
+ // fill address_00, address_01,+1, address_01_shifted,+1
+
+ address[address_00] = 1;
+ address[address_01] = 2;
+ address[address_01+1] = 2;
+ address[address_01_shifted] = 2;
+ address[address_01_shifted+1] = 2;
+}
+
+
+double sse_banded_lcs_align(int16_t* seq1, int16_t* seq2, int l1, int l2, BOOL normalize, int reference, BOOL lcsmode, int16_t* address, int LCSmin)
+{
+ double id;
+ int bandLengthRight, bandLengthLeft, bandLengthTotal;
+ int ali_length;
+
+ //fprintf(stderr, "\nl1 = %d, l2 = %d\n", l1, l2);
+
+ calculateBandLengths(l1, l2, &bandLengthRight, &bandLengthLeft, LCSmin);
+
+ //fprintf(stderr, "\nBLL = %d, BLR = %d, LCSmin = %d\n", bandLengthLeft, bandLengthRight, LCSmin);
+
+ bandLengthTotal = calculateSSEBandLength(bandLengthRight, bandLengthLeft);
+
+ //fprintf(stderr, "\nBLT = %d\n", bandLengthTotal);
+
+ if ((reference == ALILEN) && (normalize || !lcsmode))
+ {
+ initializeAddressWithGaps(address, bandLengthTotal, bandLengthLeft, l1);
+ sse_banded_align_lcs_and_ali_len(seq1, seq2, l1, l2, bandLengthLeft, bandLengthTotal, address, &id, &ali_length);
+ }
+ else
+ id = sse_banded_align_just_lcs(seq1, seq2, l1, l2, bandLengthLeft, bandLengthTotal);
+
+ //fprintf(stderr, "\nid before normalizations = %f", id);
+
+ //fprintf(stderr, "\nlcs = %f, ali = %d\n", id, ali_length);
+
+ if (!lcsmode && !normalize)
+ switch(reference) {
+ case ALILEN: id = ali_length - id;
+ break;
+ case MAXLEN: id = l1 - id;
+ break;
+ case MINLEN: id = l2 - id;
+ }
+
+ //fprintf(stderr, "\n2>>> %f, %d\n", id, ali_length);
+ if (normalize)
+ switch(reference) {
+ case ALILEN: id = id / (double) ali_length;
+ break;
+ case MAXLEN: id = id / (double) l1;
+ break;
+ case MINLEN: id = id / (double) l2;
+ }
+
+ //fprintf(stderr, "\nid = %f\n", id);
+ return(id);
+}
+
+
+double generic_sse_banded_lcs_align(char* seq1, char* seq2, double threshold, BOOL normalize, int reference, BOOL lcsmode, int16_t** address, int* buffer_size, int16_t** iseq1,
+ int16_t** iseq2, int* buffer_sizeS)
+{
+ double id;
+ int l1;
+ int l2;
+ int lmax, lmin;
+ int sizeToAllocateForBand;
+ int maxBLL, notUsed;
+ int sizeToAllocateForSeqs;
+ int LCSmin;
+
+ l1 = strlen(seq1);
+ l2 = strlen(seq2);
+
+ if (l2 > l1)
+ {
+ lmax = l1;
+ lmin = l2;
+ }
+ else
+ {
+ lmax = l2;
+ lmin = l1;
+ }
+
+ if (!lcsmode && (normalize==TRUE))
+ {
+ threshold = 1.0 - threshold;
+ }
+
+ LCSmin = calculateLCSmin(lmax, lmin, threshold, normalize, reference, lcsmode);
+
+// Allocating space for matrix band if the alignment must be computed
+
+ if ((reference == ALILEN) && ((lcsmode && normalize) || (!lcsmode))) // checking if alignment must be computed
+ {
+ sizeToAllocateForBand = calculateSizeToAllocate(lmax, lmin, LCSmin);
+
+ if (sizeToAllocateForBand > (*buffer_size))
+ {
+ // reallocating if needed
+ address = reallocA16Address(*address, sizeToAllocateForBand);
+ }
+ }
+
+// Allocating space for the int16_t arrays representing the sequences
+
+ calculateBandLengths(lmax, lmin, ¬Used, &maxBLL, LCSmin);
+
+ sizeToAllocateForSeqs = 2*maxBLL+lmax;
+
+ if (sizeToAllocateForSeqs > *buffer_sizeS)
+ {
+ (*(iseq1)) = realloc((*(iseq1)), sizeToAllocateForSeqs*sizeof(int16_t));
+ (*(iseq2)) = realloc((*(iseq2)), sizeToAllocateForSeqs*sizeof(int16_t));
+ }
+
+ iniSeq(*(iseq1), maxBLL, 0);
+ iniSeq(*(iseq2), maxBLL, 255);
+ *(iseq1) = *(iseq1)+maxBLL;
+ *(iseq2) = *(iseq2)+maxBLL;
+
+ // longest seq must be first argument of sse_align function
+ if (l2 > l1)
+ {
+ putSeqInSeq((*(iseq1)), seq2, l2, TRUE);
+ putSeqInSeq((*(iseq2)), seq1, l1, FALSE);
+ id = sse_banded_lcs_align(*(iseq1), *(iseq2), l2, l1, normalize, reference, lcsmode, *address, LCSmin);
+ }
+ else
+ {
+ putSeqInSeq((*(iseq1)), seq1, l1, TRUE);
+ putSeqInSeq((*(iseq2)), seq2, l2, FALSE);
+ id = sse_banded_lcs_align(*(iseq1), *(iseq2), l1, l2, normalize, reference, lcsmode, *address, LCSmin);
+ }
+
+ return(id);
+}
+
+
+int prepareTablesForSumathings(int lmax, int lmin, double threshold, BOOL normalize, int reference, BOOL lcsmode,
+ int16_t** address, int16_t** iseq1, int16_t** iseq2)
+{
+ int sizeToAllocateForBand;
+ int maxBLL;
+ int notUsed;
+ int sizeToAllocateForSeqs;
+ int LCSmin;
+
+ LCSmin = calculateLCSmin(lmax, lmin, threshold, normalize, reference, lcsmode);
+
+ // Allocating space for matrix band if the alignment must be computed
+
+ if ((reference == ALILEN) && (normalize || !lcsmode)) // checking if alignment must be computed
+ {
+ sizeToAllocateForBand = calculateSizeToAllocate(lmax, lmin, LCSmin);
+ (*(address)) = getA16Address(sizeToAllocateForBand);
+ }
+
+ // Allocating space for the int16_t arrays representing the sequences
+
+ calculateBandLengths(lmax, lmin, ¬Used, &maxBLL, LCSmin);
+
+ sizeToAllocateForSeqs = 2*maxBLL+lmax;
+ (*(iseq1)) = malloc(sizeToAllocateForSeqs*sizeof(int16_t));
+ (*(iseq2)) = malloc(sizeToAllocateForSeqs*sizeof(int16_t));
+
+ iniSeq(*(iseq1), maxBLL, 0);
+ iniSeq(*(iseq2), maxBLL, 255);
+ *(iseq1) = *(iseq1)+maxBLL;
+ *(iseq2) = *(iseq2)+maxBLL;
+
+ return(maxBLL+lmax);
+}
+
+
+double alignForSumathings(char* seq1, int16_t* iseq1, char* seq2, int16_t* iseq2, int l1, int l2,
+ BOOL normalize, int reference, BOOL lcsmode, int16_t* address, int sizeForSeqs, int LCSmin)
+{
+ double id;
+
+ iniSeq(iseq1, sizeForSeqs, 0);
+ iniSeq(iseq2, sizeForSeqs, 255);
+
+ if (l2 > l1)
+ {
+ putSeqInSeq(iseq1, seq2, l2, TRUE);
+ putSeqInSeq(iseq2, seq1, l1, FALSE);
+ id = sse_banded_lcs_align(iseq1, iseq2, l2, l1, normalize, reference, lcsmode, address, LCSmin);
+ }
+ else
+ {
+ putSeqInSeq(iseq1, seq1, l1, TRUE);
+ putSeqInSeq(iseq2, seq2, l2, FALSE);
+ id = sse_banded_lcs_align(iseq1, iseq2, l1, l2, normalize, reference, lcsmode, address, LCSmin);
+ }
+
+ return(id);
+}
+
diff --git a/sumalibs/liblcs/sse_banded_LCS_alignment.h b/sumalibs/liblcs/sse_banded_LCS_alignment.h
new file mode 100644
index 0000000..95f50b0
--- /dev/null
+++ b/sumalibs/liblcs/sse_banded_LCS_alignment.h
@@ -0,0 +1,24 @@
+/*
+ * sse_banded_LCS_alignment.h
+ *
+ * Created on: november 29, 2012
+ * Author: mercier
+ */
+
+#ifndef SSE_BANDED_LCS_ALIGNMENT_H_
+#define SSE_BANDED_LCS_ALIGNMENT_H_
+#include <stdint.h>
+
+double sse_banded_lcs_align(int16_t* seq1, int16_t* seq2, int l1, int l2, BOOL normalize, int reference, BOOL lcsmode, int16_t* address, int LCSmin);
+int calculateSizeToAllocate(int maxLen, int minLen, int LCSmin);
+void calculateThresholdFromErrorNumber(int error, int length, double* threshold);
+void iniSeq(int16_t* seq, int size, int16_t iniValue);
+void putSeqInSeq(int16_t* seq, char* s, int l, BOOL reverse);
+double generic_sse_banded_lcs_align(char* seq1, char* seq2, double threshold, BOOL normalize, int reference, BOOL lcsmode, int16_t** address, int* buffer_size, int16_t** iseq1,
+ int16_t** iseq2, int* buffer_sizeS);
+int prepareTablesForSumathings(int lmax, int lmin, double threshold, BOOL normalize, int reference, BOOL lcsmode,
+ int16_t** address, int16_t** iseq1, int16_t** iseq2);
+double alignForSumathings(char* seq1, int16_t* iseq1, char* seq2, int16_t* iseq2, int l1, int l2, BOOL normalize,
+ int reference, BOOL lcsmode, int16_t* address, int sizeForSeqs, int LCSmin);
+int calculateLCSmin(int l1, int l2, double threshold, BOOL normalize, int reference, BOOL lcsmode);
+#endif
diff --git a/sumalibs/liblcs/upperband.c b/sumalibs/liblcs/upperband.c
new file mode 100644
index 0000000..4948bd0
--- /dev/null
+++ b/sumalibs/liblcs/upperband.c
@@ -0,0 +1,382 @@
+#include "../libsse/_sse.h"
+#include <stdio.h>
+#include <math.h>
+#include "../libutils/utilities.h"
+#include "../libfasta/sequence.h"
+#include "sse_banded_LCS_alignment.h"
+
+
+inline static uchar_v hash4m128(uchar_v frag)
+{
+ uchar_v words;
+
+ vUInt8 mask_03= _MM_SET1_EPI8(0x03); // charge le registre avec 16x le meme octet
+ vUInt8 mask_FC= _MM_SET1_EPI8(0xFC);
+
+ frag.m = _MM_SRLI_EPI64(frag.m,1); // shift logic a droite sur 2 x 64 bits
+ frag.m = _MM_AND_SI128(frag.m,mask_03); // and sur les 128 bits
+
+
+ words.m= _MM_SLLI_EPI64(frag.m,2);
+ words.m= _MM_AND_SI128(words.m,mask_FC);
+ frag.m = _MM_SRLI_SI128(frag.m,1);
+ words.m= _MM_OR_SI128(words.m,frag.m);
+
+ words.m= _MM_SLLI_EPI64(words.m,2);
+ words.m= _MM_AND_SI128(words.m,mask_FC);
+ frag.m = _MM_SRLI_SI128(frag.m,1);
+ words.m= _MM_OR_SI128(words.m,frag.m);
+
+ words.m= _MM_SLLI_EPI64(words.m,2);
+ words.m= _MM_AND_SI128(words.m,mask_FC);
+ frag.m = _MM_SRLI_SI128(frag.m,1);
+ words.m= _MM_OR_SI128(words.m,frag.m);
+
+ return words;
+}
+
+#ifdef __SSE2__
+
+inline static int anyzerom128(vUInt8 data)
+{
+ vUInt8 mask_00= _MM_SETZERO_SI128();
+ uint64_v tmp;
+ tmp.m = _MM_CMPEQ_EPI8(data,mask_00);
+ return (int)(tmp.c[0]!=0 || tmp.c[1]!=0);
+}
+
+#else
+
+inline static int anyzerom128(vUInt8 data)
+{
+ int i;
+ um128 tmp;
+ tmp.i = data;
+ for (i=0;i<8;i++)
+ if (tmp.s8[i]==0)
+ return 1;
+ return 0;
+}
+
+#endif
+
+inline static void dumpm128(unsigned short *table,vUInt8 data)
+{
+ memcpy(table,&data,16);
+}
+
+/**
+ * Compute 4mer occurrence table from a DNA sequence
+ *
+ * sequence : a pointer to the null terminated nuc sequence
+ * table : a pointer to a 256 cells unisgned char table for
+ * storing the occurrence table
+ * count : pointer to an int value used as a return value
+ * containing the global word counted
+ *
+ * returns the number of words observed in the sequence with a
+ * count greater than 255.
+ */
+
+int buildTable(const char* sequence, unsigned char *table, int *count)
+{
+ int overflow = 0;
+ int wc=0;
+ int i;
+ vUInt8 mask_00= _MM_SETZERO_SI128();
+
+ uchar_v frag;
+ uchar_v words;
+ uchar_v zero;
+
+ char* s;
+
+ s=(char*)sequence;
+
+ memset(table,0,256*sizeof(unsigned char));
+
+ // encode ascii sequence with A : 00 C : 01 T: 10 G : 11
+
+ for(frag.m=_MM_LOADU_SI128((vUInt8*)s);
+ ! anyzerom128(frag.m);
+ s+=12,frag.m=_MM_LOADU_SI128((vUInt8*)s))
+ {
+ words= hash4m128(frag);
+
+ // printf("%d %d %d %d\n",words.c[0],words.c[1],words.c[2],words.c[3]);
+
+ if (table[words.c[0]]<255) table[words.c[0]]++; else overflow++;
+ if (table[words.c[1]]<255) table[words.c[1]]++; else overflow++;
+ if (table[words.c[2]]<255) table[words.c[2]]++; else overflow++;
+ if (table[words.c[3]]<255) table[words.c[3]]++; else overflow++;
+ if (table[words.c[4]]<255) table[words.c[4]]++; else overflow++;
+ if (table[words.c[5]]<255) table[words.c[5]]++; else overflow++;
+ if (table[words.c[6]]<255) table[words.c[6]]++; else overflow++;
+ if (table[words.c[7]]<255) table[words.c[7]]++; else overflow++;
+ if (table[words.c[8]]<255) table[words.c[8]]++; else overflow++;
+ if (table[words.c[9]]<255) table[words.c[9]]++; else overflow++;
+ if (table[words.c[10]]<255) table[words.c[10]]++; else overflow++;
+ if (table[words.c[11]]<255) table[words.c[11]]++; else overflow++;
+
+ wc+=12;
+ }
+
+ zero.m=_MM_CMPEQ_EPI8(frag.m,mask_00);
+ //printf("frag=%d %d %d %d\n",frag.c[0],frag.c[1],frag.c[2],frag.c[3]);
+ //printf("zero=%d %d %d %d\n",zero.c[0],zero.c[1],zero.c[2],zero.c[3]);
+ words = hash4m128(frag);
+
+ if (zero.c[0]+zero.c[1]+zero.c[2]+zero.c[3]==0)
+ for(i=0;zero.c[i+3]==0;i++,wc++)
+ if (table[words.c[i]]<255) table[words.c[i]]++; else overflow++;
+
+ if (count) *count=wc;
+ return overflow;
+}
+
+static inline vUInt16 partialminsum(vUInt8 ft1,vUInt8 ft2)
+{
+ vUInt8 mini;
+ vUInt16 minilo;
+ vUInt16 minihi;
+ vUInt8 mask_00= _MM_SETZERO_SI128();
+
+ mini = _MM_MIN_EPU8(ft1,ft2);
+ minilo = _MM_UNPACKLO_EPI8(mini,mask_00);
+ minihi = _MM_UNPACKHI_EPI8(mini,mask_00);
+
+ return _MM_ADDS_EPU16(minilo,minihi);
+}
+
+int compareTable(unsigned char *t1, int over1, unsigned char* t2, int over2)
+{
+ vUInt8 ft1;
+ vUInt8 ft2;
+ vUInt8 *table1=(vUInt8*)t1;
+ vUInt8 *table2=(vUInt8*)t2;
+ ushort_v summini;
+ int i;
+ int total;
+
+ ft1 = _MM_LOADU_SI128(table1);
+ ft2 = _MM_LOADU_SI128(table2);
+ summini.m = partialminsum(ft1,ft2);
+ table1++;
+ table2++;
+
+
+ for (i=1;i<16;i++,table1++,table2++)
+ {
+ ft1 = _MM_LOADU_SI128(table1);
+ ft2 = _MM_LOADU_SI128(table2);
+ summini.m = _MM_ADDS_EPU16(summini.m,partialminsum(ft1,ft2));
+
+ }
+
+ // Finishing the sum process
+
+ summini.m = _MM_ADDS_EPU16(summini.m,_MM_SRLI_SI128(summini.m,8)); // sum the 4 firsts with the 4 lasts
+ summini.m = _MM_ADDS_EPU16(summini.m,_MM_SRLI_SI128(summini.m,4));
+
+ total = summini.c[0]+summini.c[1];
+ total+= (over1 < over2) ? over1:over2;
+
+ return total;
+}
+
+int threshold4(int wordcount,double identity)
+{
+ int error;
+ int lmax;
+
+ wordcount+=3;
+ error = (int)floor((double)wordcount * ((double)1.0-identity));
+ lmax = (wordcount - error) / (error + 1);
+ if (lmax < 4)
+ return 0;
+ return (lmax - 3) \
+ * (error + 1) \
+ + ((wordcount - error) % (error + 1));
+}
+
+
+int thresholdLCS4(int32_t reflen,int32_t lcs)
+{
+ int nbfrag;
+ int smin;
+ int R;
+ int common;
+
+ nbfrag = (reflen - lcs)*2 + 1;
+ smin = lcs/nbfrag;
+ R = lcs - smin * nbfrag;
+ common = MAX(smin - 2,0) * R + MAX(smin - 3,0) * (nbfrag - R);
+ return common;
+}
+
+
+int hashDB(fastaSeqCount db)
+{
+ int32_t i;
+ int32_t count;
+
+ fprintf(stderr,"Indexing dataset...");
+
+ for (i=0; i < db.count;i++)
+ {
+ db.fastaSeqs[i].table = util_malloc((256)*sizeof(unsigned char), __FILE__, __LINE__);
+ db.fastaSeqs[i].over = buildTable((const char*)(db.fastaSeqs[i].sequence),
+ db.fastaSeqs[i].table,
+ &count);
+ }
+
+ fprintf(stderr," : Done\n");
+
+ return db.count;
+}
+
+
+BOOL isPossible(fastaSeqPtr seq1, fastaSeqPtr seq2, double threshold, BOOL normalize, int reference, BOOL lcsmode)
+{
+ int32_t reflen;
+ int32_t maxlen;
+ int32_t lcs;
+ int32_t mincount;
+
+ if (seq1->length < 12 || seq2->length < 12)
+ return TRUE;
+
+ maxlen = MAX(seq1->length,seq2->length);
+
+ if (reference==ALILEN || reference==MAXLEN)
+ reflen = maxlen;
+ else
+ reflen = MIN(seq1->length,seq2->length);
+
+ if (normalize)
+ {
+ if (! lcsmode)
+ threshold = 1. - threshold;
+
+ lcs = (int32_t)ceil((double)reflen * threshold);
+ }
+ else
+ {
+ if (! lcsmode)
+ threshold = reflen - threshold;
+ lcs = (int32_t) threshold;
+ }
+
+ if (lcs > MIN(seq1->length,seq2->length))
+ return FALSE;
+
+ mincount = thresholdLCS4(maxlen,lcs);
+
+ return compareTable(seq1->table,seq1->over,seq2->table,seq2->over) >=mincount;
+}
+
+
+BOOL isPossibleSumathings(fastaSeqPtr seq1, fastaSeqPtr seq2, int l1, int l2, double threshold, BOOL normalize, int reference, BOOL lcsmode)
+{ // optimized version of the filter for sumaclust and sumatra
+
+ int32_t reflen;
+ int32_t lcs;
+ int32_t mincount;
+
+ if (l1 < 12 || l2 < 12)
+ return TRUE;
+
+ if (reference==ALILEN || reference==MAXLEN)
+ reflen = l1;
+ else
+ reflen = l2;
+
+ if (normalize)
+ lcs = (int32_t)ceil((double)reflen * threshold);
+ else
+ {
+ if (! lcsmode)
+ threshold = reflen - threshold;
+ lcs = (int32_t) threshold;
+ }
+
+ mincount = thresholdLCS4(l1,lcs);
+
+ return compareTable(seq1->table,seq1->over,seq2->table,seq2->over) >=mincount;
+}
+
+
+void filters(fastaSeqPtr seq1, fastaSeqPtr seq2, double threshold, BOOL normalize, int reference, BOOL lcsmode, double* score, int* LCSmin)
+{ // score takes value -1 if filters are passed. score must be initialized in calling function.
+ int l1;
+ int l2;
+
+ l1 = seq1->length;
+ l2 = seq2->length;
+
+ if (l1 >= l2)
+ {
+ *LCSmin = calculateLCSmin(l1, l2, threshold, normalize, reference, lcsmode);
+ if (l2 >= *LCSmin)
+ {
+ if (isPossibleSumathings(seq1, seq2, l1, l2, threshold, normalize, reference, lcsmode)) // 4-mers filter
+ *score = -1.0;
+ }
+ }
+ else
+ {
+ *LCSmin = calculateLCSmin(l2, l1, threshold, normalize, reference, lcsmode);
+ if (l1 >= *LCSmin)
+ {
+ if (isPossibleSumathings(seq2, seq1, l2, l1, threshold, normalize, reference, lcsmode)) // 4-mers filter
+ *score = -1.0;
+ }
+ }
+}
+
+
+void filtersSumatra(fastaSeqPtr seq1, fastaSeqPtr seq2, double threshold, BOOL normalize, int reference, BOOL lcsmode, double* score, int* LCSmin)
+{ // score takes value -2 if filters are not passed, -1 if filters are passed and >= 0 with max score if the 2 sequences are identical.
+
+ int l1;
+ int l2;
+ l1 = seq1->length;
+
+ *score = -2.0;
+
+ if (strcmp(seq1->sequence, seq2->sequence) == 0) // the 2 sequences are identical
+ {
+ if (lcsmode && normalize)
+ *score = 1.0;
+ else if (!lcsmode)
+ *score = 0.0;
+ else
+ *score = l1;
+ }
+
+ else if (threshold != 0)
+ {
+ l2 = seq2->length;
+
+ if (l1 >= l2)
+ {
+ *LCSmin = calculateLCSmin(l1, l2, threshold, normalize, reference, lcsmode);
+ if (l2 >= *LCSmin)
+ {
+ if (isPossibleSumathings(seq1, seq2, l1, l2, threshold, normalize, reference, lcsmode)) // 4-mers filter
+ *score = -1.0;
+ }
+ }
+ else
+ {
+ *LCSmin = calculateLCSmin(l2, l1, threshold, normalize, reference, lcsmode);
+ if (l1 >= *LCSmin)
+ {
+ if (isPossibleSumathings(seq2, seq1, l2, l1, threshold, normalize, reference, lcsmode)) // 4-mers filter
+ *score = -1.0;
+ }
+ }
+ }
+ else
+ *LCSmin = 0;
+}
diff --git a/sumalibs/liblcs/upperband.h b/sumalibs/liblcs/upperband.h
new file mode 100644
index 0000000..cded693
--- /dev/null
+++ b/sumalibs/liblcs/upperband.h
@@ -0,0 +1,18 @@
+
+#ifndef UPPERBAND_H_
+#define UPPERBAND_H_
+
+
+int buildTable(const char *sequence, unsigned char *table, int *count);
+int compareTable(unsigned char *t1, int over1, unsigned char* t2, int over2);
+int threshold4(int wordcount,double identity);
+int thresholdLCS4(int32_t reflen,int32_t lcs);
+
+
+int hashDB(fastaSeqCount);
+BOOL isPossible(fastaSeqPtr, fastaSeqPtr, BOOL, int, double, BOOL);
+BOOL isPossibleSumathings(fastaSeqPtr seq1, fastaSeqPtr seq2, int l1, int l2, double threshold, BOOL normalize, int reference, BOOL lcsmode);
+void filters(fastaSeqPtr seq1, fastaSeqPtr seq2, double threshold, BOOL normalize, int reference, BOOL lcsmode, double* score, int* LCSmin);
+void filtersSumatra(fastaSeqPtr seq1, fastaSeqPtr seq2, double threshold, BOOL normalize, int reference, BOOL lcsmode, double* score, int* LCSmin);
+#endif
+
diff --git a/sumalibs/libsse/_sse.h b/sumalibs/libsse/_sse.h
new file mode 100644
index 0000000..8754721
--- /dev/null
+++ b/sumalibs/libsse/_sse.h
@@ -0,0 +1,961 @@
+#ifndef _SSE_H_
+#define _SSE_H_
+
+#include <string.h>
+
+#include <inttypes.h>
+#ifdef __SSE2__
+#include <xmmintrin.h>
+#else
+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+#endif /* __SSE2__ */
+
+#ifndef MAX
+#define MAX(x,y) (((x)>(y)) ? (x):(y))
+#define MIN(x,y) (((x)<(y)) ? (x):(y))
+#endif
+
+#define ALIGN __attribute__((aligned(16)))
+typedef __m128i vUInt8;
+typedef __m128i vInt8;
+
+typedef __m128i vUInt16;
+typedef __m128i vInt16;
+
+typedef __m128i vUInt64;
+
+typedef union
+{
+ __m128i i;
+ int64_t s64[ 2];
+ int16_t s16[ 8];
+ int8_t s8 [16];
+ uint8_t u8 [16];
+ uint16_t u16[8 ];
+ uint32_t u32[4 ];
+ uint64_t u64[2 ];
+} um128;
+
+typedef union
+ {
+ vUInt8 m;
+ uint8_t c[16];
+ } uchar_v;
+
+typedef union
+ {
+ vUInt16 m;
+ uint16_t c[8];
+ } ushort_v;
+
+typedef union
+ {
+ vUInt64 m;
+ uint64_t c[2];
+ } uint64_v;
+
+
+#ifdef __SSE2__
+
+static inline int8_t _s2_extract_epi8(__m128i r, const int p)
+{
+#define ACTIONP(r,x) return _mm_extract_epi16(r,x) & 0xFF
+#define ACTIONI(r,x) return _mm_extract_epi16(r,x) >> 8
+ switch (p) {
+ case 0: ACTIONP(r,0);
+ case 1: ACTIONI(r,0);
+ case 2: ACTIONP(r,1);
+ case 3: ACTIONI(r,1);
+ case 4: ACTIONP(r,2);
+ case 5: ACTIONI(r,2);
+ case 6: ACTIONP(r,3);
+ case 7: ACTIONI(r,3);
+ case 8: ACTIONP(r,4);
+ case 9: ACTIONI(r,4);
+ case 10: ACTIONP(r,5);
+ case 11: ACTIONI(r,5);
+ case 12: ACTIONP(r,6);
+ case 13: ACTIONI(r,6);
+ case 14: ACTIONP(r,7);
+ case 15: ACTIONI(r,7);
+ }
+#undef ACTIONP
+#undef ACTIONI
+
+ return 0;
+}
+
+static inline __m128i _s2_max_epi8(__m128i a, __m128i b)
+{
+ __m128i mask = _mm_cmpgt_epi8( a, b );
+ a = _mm_and_si128 (a,mask );
+ b = _mm_andnot_si128(mask,b);
+ return _mm_or_si128(a,b);
+}
+
+static inline __m128i _s2_min_epi8(__m128i a, __m128i b)
+{
+ __m128i mask = _mm_cmplt_epi8( a, b );
+ a = _mm_and_si128 (a,mask );
+ b = _mm_andnot_si128(mask,b);
+ return _mm_or_si128(a,b);
+}
+
+static inline __m128i _s2_insert_epi8(__m128i r, int b, const int p)
+{
+#define ACTIONP(r,x) return _mm_insert_epi16(r,(_mm_extract_epi16(r,x) & 0xFF00) | (b & 0x00FF),x)
+#define ACTIONI(r,x) return _mm_insert_epi16(r,(_mm_extract_epi16(r,x) & 0x00FF) | ((b << 8)& 0xFF00),x)
+ switch (p) {
+ case 0: ACTIONP(r,0);
+ case 1: ACTIONI(r,0);
+ case 2: ACTIONP(r,1);
+ case 3: ACTIONI(r,1);
+ case 4: ACTIONP(r,2);
+ case 5: ACTIONI(r,2);
+ case 6: ACTIONP(r,3);
+ case 7: ACTIONI(r,3);
+ case 8: ACTIONP(r,4);
+ case 9: ACTIONI(r,4);
+ case 10: ACTIONP(r,5);
+ case 11: ACTIONI(r,5);
+ case 12: ACTIONP(r,6);
+ case 13: ACTIONI(r,6);
+ case 14: ACTIONP(r,7);
+ case 15: ACTIONI(r,7);
+ }
+#undef ACTIONP
+#undef ACTIONI
+
+ return _mm_setzero_si128();
+}
+
+// Fill a SSE Register with 16 time the same 8bits integer value
+#define _MM_SET1_EPI8(x) _mm_set1_epi8(x)
+#define _MM_INSERT_EPI8(r,x,i) _s2_insert_epi8((r),(x),(i))
+#define _MM_CMPEQ_EPI8(x,y) _mm_cmpeq_epi8((x),(y))
+#define _MM_CMPGT_EPI8(x,y) _mm_cmpgt_epi8((x),(y))
+#define _MM_CMPLT_EPI8(x,y) _mm_cmplt_epi8((x),(y))
+#define _MM_MAX_EPI8(x,y) _s2_max_epi8((x),(y))
+#define _MM_MIN_EPI8(x,y) _s2_min_epi8((x),(y))
+#define _MM_ADD_EPI8(x,y) _mm_add_epi8((x),(y))
+#define _MM_SUB_EPI8(x,y) _mm_sub_epi8((x),(y))
+#define _MM_EXTRACT_EPI8(r,p) _s2_extract_epi8((r),(p))
+
+#define _MM_MIN_EPU8(x,y) _mm_min_epu8((x),(y))
+
+// Fill a SSE Register with 8 time the same 16bits integer value
+#define _MM_SET1_EPI16(x) _mm_set1_epi16(x)
+
+#define _MM_INSERT_EPI16(r,x,i) _mm_insert_epi16((r),(x),(i))
+#define _MM_CMPEQ_EPI16(x,y) _mm_cmpeq_epi16((x),(y))
+#define _MM_CMPGT_EPI16(x,y) _mm_cmpgt_epi16((x),(y))
+#define _MM_CMPGT_EPU16(x,y) _mm_cmpgt_epu16((x),(y)) // n'existe pas ??
+#define _MM_CMPLT_EPI16(x,y) _mm_cmplt_epi16((x),(y))
+#define _MM_MAX_EPI16(x,y) _mm_max_epi16((x),(y))
+#define _MM_MIN_EPI16(x,y) _mm_min_epi16((x),(y))
+#define _MM_ADD_EPI16(x,y) _mm_add_epi16((x),(y))
+#define _MM_SUB_EPI16(x,y) _mm_sub_epi16((x),(y))
+#define _MM_EXTRACT_EPI16(r,p) _mm_extract_epi16((r),(p))
+#define _MM_UNPACKLO_EPI8(a,b) _mm_unpacklo_epi8((a),(b))
+#define _MM_UNPACKHI_EPI8(a,b) _mm_unpackhi_epi8((a),(b))
+#define _MM_ADDS_EPU16(x,y) _mm_adds_epu16((x),(y))
+
+// Multiplication
+#define _MM_MULLO_EPI16(x,y) _mm_mullo_epi16((x), (y))
+
+#define _MM_SRLI_EPI64(r,x) _mm_srli_epi64((r),(x))
+#define _MM_SLLI_EPI64(r,x) _mm_slli_epi64((r),(x))
+
+// Set a SSE Register to 0
+#define _MM_SETZERO_SI128() _mm_setzero_si128()
+
+#define _MM_AND_SI128(x,y) _mm_and_si128((x),(y))
+#define _MM_ANDNOT_SI128(x,y) _mm_andnot_si128((x),(y))
+#define _MM_OR_SI128(x,y) _mm_or_si128((x),(y))
+#define _MM_XOR_SI128(x,y) _mm_xor_si128((x),(y))
+#define _MM_SLLI_SI128(r,s) _mm_slli_si128((r),(s))
+#define _MM_SRLI_SI128(r,s) _mm_srli_si128((r),(s))
+
+// Load a SSE register from an unaligned address
+#define _MM_LOADU_SI128(x) _mm_loadu_si128(x)
+
+// Load a SSE register from an aligned address (/!\ not defined when SSE not available)
+#define _MM_LOAD_SI128(x) _mm_load_si128(x)
+
+// #define _MM_UNPACKLO_EPI8(x,y) _mm_unpacklo_epi8((x),(y))
+
+#else /* __SSE2__ Not defined */
+
+static inline __m128i _em_set1_epi8(int x)
+{
+ um128 a;
+
+ x&=0xFF;
+ a.s8[0]=x;
+ a.s8[1]=x;
+ a.u16[1]=a.u16[0];
+ a.u32[1]=a.u32[0];
+ a.u64[1]=a.u64[0];
+
+ return a.i;
+}
+
+static inline __m128i _em_insert_epi8(__m128i r, int x, const int i)
+{
+ um128 a;
+ a.i=r;
+ a.s8[i]=x & 0xFF;
+ return a.i;
+}
+
+static inline __m128i _em_cmpeq_epi8(__m128i a, __m128i b)
+{
+ um128 x;
+ um128 y;
+ um128 r;
+
+ x.i=a;
+ y.i=b;
+
+#define R(z) r.s8[z]=(x.s8[z]==y.s8[z]) ? 0xFF:0
+ R(0);
+ R(1);
+ R(2);
+ R(3);
+ R(4);
+ R(5);
+ R(6);
+ R(7);
+ R(8);
+ R(9);
+ R(10);
+ R(11);
+ R(12);
+ R(13);
+ R(14);
+ R(15);
+#undef R
+
+ return r.i;
+}
+
+static inline __m128i _em_cmpgt_epi8(__m128i a, __m128i b)
+{
+ um128 x;
+ um128 y;
+ um128 r;
+
+ x.i=a;
+ y.i=b;
+
+#define R(z) r.s8[z]=(x.s8[z]>y.s8[z]) ? 0xFF:0
+ R(0);
+ R(1);
+ R(2);
+ R(3);
+ R(4);
+ R(5);
+ R(6);
+ R(7);
+ R(8);
+ R(9);
+ R(10);
+ R(11);
+ R(12);
+ R(13);
+ R(14);
+ R(15);
+#undef R
+
+ return r.i;
+}
+
+static inline __m128i _em_cmplt_epi8(__m128i a, __m128i b)
+{
+ um128 x;
+ um128 y;
+ um128 r;
+
+ x.i=a;
+ y.i=b;
+
+#define R(z) r.s8[z]=(x.s8[z]<y.s8[z]) ? 0xFF:0
+ R(0);
+ R(1);
+ R(2);
+ R(3);
+ R(4);
+ R(5);
+ R(6);
+ R(7);
+ R(8);
+ R(9);
+ R(10);
+ R(11);
+ R(12);
+ R(13);
+ R(14);
+ R(15);
+#undef R
+
+ return r.i;
+}
+
+static inline __m128i _em_max_epi8(__m128i a, __m128i b)
+{
+ um128 x;
+ um128 y;
+ um128 r;
+
+ x.i=a;
+ y.i=b;
+
+#define R(z) r.s8[z]=MAX(x.s8[z],y.s8[z])
+ R(0);
+ R(1);
+ R(2);
+ R(3);
+ R(4);
+ R(5);
+ R(6);
+ R(7);
+ R(8);
+ R(9);
+ R(10);
+ R(11);
+ R(12);
+ R(13);
+ R(14);
+ R(15);
+#undef R
+
+ return r.i;
+}
+
+static inline __m128i _em_min_epi8(__m128i a, __m128i b)
+{
+ um128 x;
+ um128 y;
+ um128 r;
+
+ x.i=a;
+ y.i=b;
+
+#define R(z) r.s8[z]=MIN(x.s8[z],y.s8[z])
+ R(0);
+ R(1);
+ R(2);
+ R(3);
+ R(4);
+ R(5);
+ R(6);
+ R(7);
+ R(8);
+ R(9);
+ R(10);
+ R(11);
+ R(12);
+ R(13);
+ R(14);
+ R(15);
+#undef R
+
+ return r.i;
+}
+
+static inline __m128i _em_add_epi8(__m128i a, __m128i b)
+{
+ um128 x;
+ um128 y;
+ um128 r;
+
+ x.i=a;
+ y.i=b;
+
+#define R(z) r.s8[z]=x.s8[z]+y.s8[z]
+ R(0);
+ R(1);
+ R(2);
+ R(3);
+ R(4);
+ R(5);
+ R(6);
+ R(7);
+ R(8);
+ R(9);
+ R(10);
+ R(11);
+ R(12);
+ R(13);
+ R(14);
+ R(15);
+#undef R
+
+ return r.i;
+}
+
+static inline __m128i _em_sub_epi8(__m128i a, __m128i b)
+{
+ um128 x;
+ um128 y;
+ um128 r;
+
+ x.i=a;
+ y.i=b;
+
+#define R(z) r.s8[z]=x.s8[z]+y.s8[z]
+ R(0);
+ R(1);
+ R(2);
+ R(3);
+ R(4);
+ R(5);
+ R(6);
+ R(7);
+ R(8);
+ R(9);
+ R(10);
+ R(11);
+ R(12);
+ R(13);
+ R(14);
+ R(15);
+#undef R
+
+ return r.i;
+}
+
+
+static inline int _em_extract_epi8(__m128i r, const int i)
+{
+ um128 a;
+
+ a.i=r;
+
+ return a.s8[i] & 0xFF;
+}
+
+static inline __m128i _em_min_epu8(__m128i a, __m128i b)
+{
+ um128 x;
+ um128 y;
+ um128 r;
+
+ x.i=a;
+ y.i=b;
+
+#define R(z) r.u8[z]=MIN(x.u8[z],y.u8[z])
+ R(0);
+ R(1);
+ R(2);
+ R(3);
+ R(4);
+ R(5);
+ R(6);
+ R(7);
+ R(8);
+ R(9);
+ R(10);
+ R(11);
+ R(12);
+ R(13);
+ R(14);
+ R(15);
+#undef R
+
+ return r.i;
+}
+
+static inline __m128i _em_set1_epi16(int x)
+{
+ um128 a;
+
+ x&=0xFFFF;
+ a.s16[0]=x;
+ a.s16[1]=x;
+ a.u32[1]=a.u32[0];
+ a.u64[1]=a.u64[0];
+
+ return a.i;
+}
+
+static inline __m128i _em_insert_epi16(__m128i r, int x, const int i)
+{
+ um128 a;
+ a.i=r;
+ a.s16[i]=x & 0xFFFF;
+ return a.i;
+}
+
+static inline __m128i _em_cmpeq_epi16(__m128i a, __m128i b)
+{
+ um128 x;
+ um128 y;
+ um128 r;
+
+ x.i=a;
+ y.i=b;
+
+#define R(z) r.s16[z]=(x.s16[z]==y.s16[z]) ? 0xFFFF:0
+ R(0);
+ R(1);
+ R(2);
+ R(3);
+ R(4);
+ R(5);
+ R(6);
+ R(7);
+#undef R
+
+ return r.i;
+}
+
+static inline __m128i _em_cmpgt_epi16(__m128i a, __m128i b)
+{
+ um128 x;
+ um128 y;
+ um128 r;
+
+ x.i=a;
+ y.i=b;
+
+#define R(z) r.s16[z]=(x.s16[z]>y.s16[z]) ? 0xFFFF:0
+ R(0);
+ R(1);
+ R(2);
+ R(3);
+ R(4);
+ R(5);
+ R(6);
+ R(7);
+#undef R
+
+ return r.i;
+}
+
+static inline __m128i _em_cmplt_epi16(__m128i a, __m128i b)
+{
+ um128 x;
+ um128 y;
+ um128 r;
+
+ x.i=a;
+ y.i=b;
+
+#define R(z) r.s16[z]=(x.s16[z]<y.s16[z]) ? 0xFFFF:0
+ R(0);
+ R(1);
+ R(2);
+ R(3);
+ R(4);
+ R(5);
+ R(6);
+ R(7);
+#undef R
+
+ return r.i;
+}
+
+static inline __m128i _em_max_epi16(__m128i a, __m128i b)
+{
+ um128 x;
+ um128 y;
+ um128 r;
+
+ x.i=a;
+ y.i=b;
+
+
+#define R(z) r.s16[z]=MAX(x.s16[z],y.s16[z])
+ R(0);
+ R(1);
+ R(2);
+ R(3);
+ R(4);
+ R(5);
+ R(6);
+ R(7);
+#undef R
+
+ return r.i;
+}
+
+static inline __m128i _em_min_epi16(__m128i a, __m128i b)
+{
+ um128 x;
+ um128 y;
+ um128 r;
+
+ x.i=a;
+ y.i=b;
+
+
+#define R(z) r.s16[z]=MIN(x.s16[z],y.s16[z])
+ R(0);
+ R(1);
+ R(2);
+ R(3);
+ R(4);
+ R(5);
+ R(6);
+ R(7);
+#undef R
+
+ return r.i;
+}
+
+static inline __m128i _em_add_epi16(__m128i a, __m128i b)
+{
+ um128 x;
+ um128 y;
+ um128 r;
+
+ x.i=a;
+ y.i=b;
+
+#define R(z) r.s16[z]=x.s16[z]+y.s16[z]
+ R(0);
+ R(1);
+ R(2);
+ R(3);
+ R(4);
+ R(5);
+ R(6);
+ R(7);
+#undef R
+
+ return r.i;
+}
+
+static inline __m128i _em_sub_epi16(__m128i a, __m128i b)
+{
+ um128 x;
+ um128 y;
+ um128 r;
+
+ x.i=a;
+ y.i=b;
+
+#define R(z) r.s16[z]=x.s16[z]+y.s16[z]
+ R(0);
+ R(1);
+ R(2);
+ R(3);
+ R(4);
+ R(5);
+ R(6);
+ R(7);
+#undef R
+
+ return r.i;
+}
+
+static inline int _em_extract_epi16(__m128i r, const int i)
+{
+ um128 a;
+ a.i=r;
+ return a.s16[i] & 0xFFFF;
+}
+
+static inline __m128i _em_unpacklo_epi8(__m128i a, __m128i b)
+{
+ um128 x;
+ um128 y;
+ um128 r;
+
+ x.i=a;
+ y.i=b;
+
+#define R(z) r.s16[z]=(((int16_t)(y.s8[z])) << 8) | (int16_t)(x.s8[z])
+ R(0);
+ R(1);
+ R(2);
+ R(3);
+ R(4);
+ R(5);
+ R(6);
+ R(7);
+#undef R
+
+ return r.i;
+}
+
+static inline __m128i _em_unpackhi_epi8(__m128i a, __m128i b)
+{
+ um128 x;
+ um128 y;
+ um128 r;
+
+ x.i=a;
+ y.i=b;
+
+#define R(z) r.s16[z]=(((int16_t)(y.s8[z+8])) << 8) | (int16_t)(x.s8[z+8])
+ R(0);
+ R(1);
+ R(2);
+ R(3);
+ R(4);
+ R(5);
+ R(6);
+ R(7);
+#undef R
+
+ return r.i;
+}
+
+static inline __m128i _em_adds_epu16(__m128i a, __m128i b)
+{
+ um128 x;
+ um128 y;
+ um128 r;
+
+ x.i=a;
+ y.i=b;
+
+
+#define R(z) r.u16[z]=x.u16[z]+y.u16[z]
+ R(0);
+ R(1);
+ R(2);
+ R(3);
+ R(4);
+ R(5);
+ R(6);
+ R(7);
+#undef R
+
+ return r.i;
+}
+
+static inline __m128i _em_srli_epi64(__m128i a, int b)
+{
+ um128 x;
+
+ x.i=a;
+
+ x.s64[0]>>=b;
+ x.s64[1]>>=b;
+
+ return x.i;
+}
+
+static inline __m128i _em_slli_epi64(__m128i a, int b)
+{
+ um128 x;
+
+ x.i=a;
+
+ x.s64[0]<<=b;
+ x.s64[1]<<=b;
+
+ return x.i;
+}
+
+static inline __m128i _em_setzero_si128()
+{
+ um128 x;
+
+ x.s64[0]=x.s64[1]=0;
+
+ return x.i;
+}
+
+static inline __m128i _em_and_si128(__m128i a, __m128i b)
+{
+ um128 x;
+ um128 y;
+ um128 r;
+
+ x.i=a;
+ y.i=b;
+
+
+#define R(z) r.u64[z]=x.u64[z] & y.u64[z]
+ R(0);
+ R(1);
+#undef R
+
+ return r.i;
+}
+
+static inline __m128i _em_andnot_si128(__m128i a, __m128i b)
+{
+ um128 x;
+ um128 y;
+ um128 r;
+
+ x.i=a;
+ y.i=b;
+
+
+#define R(z) r.u64[z]=(~x.u64[z]) & y.u64[z]
+ R(0);
+ R(1);
+#undef R
+
+ return r.i;
+}
+
+static inline __m128i _em_or_si128(__m128i a, __m128i b)
+{
+ um128 x;
+ um128 y;
+ um128 r;
+
+ x.i=a;
+ y.i=b;
+
+#define R(z) r.u64[z]=x.u64[z] | y.u64[z]
+ R(0);
+ R(1);
+#undef R
+
+ return r.i;
+}
+
+static inline __m128i _em_xor_si128(__m128i a, __m128i b)
+{
+ um128 x;
+ um128 y;
+ um128 r;
+
+ x.i=a;
+ y.i=b;
+
+#define R(z) r.u64[z]=x.u64[z] ^ y.u64[z]
+ R(0);
+ R(1);
+#undef R
+
+ return r.i;
+}
+
+static inline __m128i _em_slli_si128(__m128i a, int b)
+{
+ um128 x;
+
+ x.i=a;
+
+#define R(z) x.u8[z]=(z>=b) ? x.u8[z-b]:0
+ R(15);
+ R(14);
+ R(13);
+ R(12);
+ R(11);
+ R(10);
+ R(9);
+ R(8);
+ R(7);
+ R(6);
+ R(5);
+ R(4);
+ R(3);
+ R(2);
+ R(1);
+ R(0);
+#undef R
+
+ return x.i;
+}
+
+static inline __m128i _em_srli_si128(__m128i a, int b)
+{
+ um128 x;
+
+ x.i=a;
+
+#define R(z) x.u8[z]=((b+z) > 15) ? 0:x.u8[z+b]
+ R(0);
+ R(1);
+ R(2);
+ R(3);
+ R(4);
+ R(5);
+ R(6);
+ R(7);
+ R(8);
+ R(9);
+ R(10);
+ R(11);
+ R(12);
+ R(13);
+ R(14);
+ R(15);
+#undef R
+
+ return x.i;
+}
+
+inline static __m128i _em_loadu_si128(__m128i const *P)
+{
+ um128 tmp;
+ um128 *pp=(um128*)P;
+
+ tmp.u8[0]=(*pp).u8[0];
+ tmp.u8[1]=(*pp).u8[1];
+ tmp.u8[2]=(*pp).u8[2];
+ tmp.u8[3]=(*pp).u8[3];
+ tmp.u8[4]=(*pp).u8[4];
+ tmp.u8[5]=(*pp).u8[5];
+ tmp.u8[6]=(*pp).u8[6];
+ tmp.u8[7]=(*pp).u8[7];
+ tmp.u8[8]=(*pp).u8[8];
+ tmp.u8[9]=(*pp).u8[9];
+ tmp.u8[10]=(*pp).u8[10];
+ tmp.u8[11]=(*pp).u8[11];
+ tmp.u8[12]=(*pp).u8[12];
+ tmp.u8[13]=(*pp).u8[13];
+ tmp.u8[14]=(*pp).u8[14];
+ tmp.u8[15]=(*pp).u8[15];
+ return tmp.i;
+}
+
+
+#define _MM_SET1_EPI8(x) _em_set1_epi8(x)
+#define _MM_INSERT_EPI8(r,x,i) _em_insert_epi8((r),(x),(i))
+#define _MM_CMPEQ_EPI8(x,y) _em_cmpeq_epi8((x),(y))
+#define _MM_CMPGT_EPI8(x,y) _em_cmpgt_epi8((x),(y))
+#define _MM_CMPLT_EPI8(x,y) _em_cmplt_epi8((x),(y))
+#define _MM_MAX_EPI8(x,y) _em_max_epi8((x),(y))
+#define _MM_MIN_EPI8(x,y) _em_min_epi8((x),(y))
+#define _MM_ADD_EPI8(x,y) _em_add_epi8((x),(y))
+#define _MM_SUB_EPI8(x,y) _em_sub_epi8((x),(y))
+#define _MM_EXTRACT_EPI8(r,p) _em_extract_epi8((r),(p))
+
+#define _MM_MIN_EPU8(x,y) _em_min_epu8((x),(y))
+
+#define _MM_SET1_EPI16(x) _em_set1_epi16(x)
+#define _MM_INSERT_EPI16(r,x,i) _em_insert_epi16((r),(x),(i))
+#define _MM_CMPEQ_EPI16(x,y) _em_cmpeq_epi16((x),(y))
+#define _MM_CMPGT_EPI16(x,y) _em_cmpgt_epi16((x),(y))
+#define _MM_CMPLT_EPI16(x,y) _em_cmplt_epi16((x),(y))
+#define _MM_MAX_EPI16(x,y) _em_max_epi16((x),(y))
+#define _MM_MIN_EPI16(x,y) _em_min_epi16((x),(y))
+#define _MM_ADD_EPI16(x,y) _em_add_epi16((x),(y))
+#define _MM_SUB_EPI16(x,y) _em_sub_epi16((x),(y))
+#define _MM_EXTRACT_EPI16(r,p) _em_extract_epi16((r),(p))
+#define _MM_UNPACKLO_EPI8(a,b) _em_unpacklo_epi8((a),(b))
+#define _MM_UNPACKHI_EPI8(a,b) _em_unpackhi_epi8((a),(b))
+#define _MM_ADDS_EPU16(x,y) _em_adds_epu16((x),(y))
+
+#define _MM_SRLI_EPI64(r,x) _em_srli_epi64((r),(x))
+#define _MM_SLLI_EPI64(r,x) _em_slli_epi64((r),(x))
+
+#define _MM_SETZERO_SI128() _em_setzero_si128()
+
+#define _MM_AND_SI128(x,y) _em_and_si128((x),(y))
+#define _MM_ANDNOT_SI128(x,y) _em_andnot_si128((x),(y))
+#define _MM_OR_SI128(x,y) _em_or_si128((x),(y))
+#define _MM_XOR_SI128(x,y) _em_xor_si128((x),(y))
+#define _MM_SLLI_SI128(r,s) _em_slli_si128((r),(s))
+#define _MM_SRLI_SI128(r,s) _em_srli_si128((r),(s))
+
+#define _MM_LOADU_SI128(x) _em_loadu_si128(x)
+#define _MM_LOAD_SI128(x) _em_loadu_si128(x)
+
+
+#endif /* __SSE2__ */
+
+#define _MM_NOT_SI128(x) _MM_XOR_SI128((x),(_MM_SET1_EPI8(0xFFFF)))
+
+#endif
diff --git a/sumalibs/libutils/._Makefile b/sumalibs/libutils/._Makefile
new file mode 100644
index 0000000..6adb72e
Binary files /dev/null and b/sumalibs/libutils/._Makefile differ
diff --git a/sumalibs/libutils/Makefile b/sumalibs/libutils/Makefile
new file mode 100644
index 0000000..8428d77
--- /dev/null
+++ b/sumalibs/libutils/Makefile
@@ -0,0 +1,25 @@
+
+SOURCES = utilities.c \
+ debug.c
+
+SRCS=$(SOURCES)
+
+
+OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
+
+LIBFILE= libutils.a
+RANLIB=ranlib
+
+
+include ../global.mk
+
+all: $(LIBFILE)
+
+clean:
+ rm -rf $(OBJECTS) $(LIBFILE)
+ rm -f *.P
+ rm -f *.a
+
+$(LIBFILE): $(OBJECTS)
+ ar -cr $@ $?
+ $(RANLIB) $@
diff --git a/sumalibs/libutils/debug.c b/sumalibs/libutils/debug.c
new file mode 100644
index 0000000..b0131d5
--- /dev/null
+++ b/sumalibs/libutils/debug.c
@@ -0,0 +1,32 @@
+/*
+ * debug.c
+ *
+ * Created on: 4 sept. 2012
+ * Author: coissac
+ */
+
+#include <stdlib.h>
+#include <inttypes.h>
+#include "debug.h"
+
+char* int2bin(int64_t i,size_t bits)
+{
+ static char str[65];
+ uint64_t u;
+
+ if (bits > 64)
+ return NULL;
+
+ str[bits] = 0;
+
+ // type punning because signed shift is implementation-defined
+ u = *(unsigned *)&i;
+
+ for(; bits--; u >>= 1)
+ str[bits] = u & 1 ? '1' : '0';
+
+ return str;
+}
+
+
+
diff --git a/sumalibs/libutils/debug.h b/sumalibs/libutils/debug.h
new file mode 100644
index 0000000..6462cb5
--- /dev/null
+++ b/sumalibs/libutils/debug.h
@@ -0,0 +1,25 @@
+/*
+ * debug.h
+ *
+ * Created on: 4 sept. 2012
+ * Author: coissac
+ */
+
+#ifndef DEBUG_H_
+#define DEBUG_H_
+
+#ifdef DEBUG
+#undef DEBUG
+#endif
+
+#ifdef DEBUG_ON
+#define DEBUG(format,...) fprintf(stderr,"[%s:%d] : "format"\n",__FILE__,__LINE__,__VA_ARGS__)
+#else
+#define DEBUG(format,...)
+#endif
+#include <stdint.h>
+
+char * int2bin(int64_t i,size_t bits);
+
+
+#endif /* DEBUG_H_ */
diff --git a/sumalibs/libutils/utilities.c b/sumalibs/libutils/utilities.c
new file mode 100644
index 0000000..71c3ce6
--- /dev/null
+++ b/sumalibs/libutils/utilities.c
@@ -0,0 +1,230 @@
+/**
+ * FileName: utilities.c
+ * Author: Tiayyba Riaz
+ * Description: C file for miscellenious functions and macros
+ * **/
+
+#include "utilities.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/*
+ * Function Name: errorAbort(int errorCode, char* errorMsg, char* fileName, int lineNumber)
+ * Description: Reports an error on standard error and aborts
+ */
+void errorAbort(int32_t errorCode, char* errorMsg, char* fileName, int32_t lineNumber)
+{
+ fprintf(stderr,"Error %d in file %s line %d : %s\n",
+ errorCode,
+ fileName,
+ lineNumber,
+ errorMsg);
+
+ abort();
+}
+
+void *util_malloc(size_t chunksize, const char *filename, int32_t line)
+{
+ void * chunk;
+
+ chunk = calloc(1,chunksize);
+
+ if (!chunk)
+ errorAbort(MEM_ALLOC_ERROR,"Could not allocate memory.",filename,line);
+
+ return chunk;
+}
+
+/*
+ * Function Name: util_realloc(void *chunk, int32_t newsize, const char *filename, int32_t line)
+ * Description: Overloading realloc funstion, changes the size of the memory object pointed to by chunk
+ * to the size specified by newsize. If memory cannot be allocated, gives the error on stderr and aborts.
+ */
+void *util_realloc(void *chunk, size_t newsize, const char *filename, int32_t line)
+{
+ void *newchunk;
+
+ newchunk = realloc(chunk,newsize);
+
+ if (!newchunk)
+ {
+ errorAbort(MEM_ALLOC_ERROR,"Could not allocate memory.",filename,line);
+ }
+
+ return newchunk;
+}
+
+/*
+ * Function Name: util_free(void *chunk)
+ * Description: Returns the memory specified by chunk back to operating syste.
+ */
+void util_free(void *chunk)
+{
+ free(chunk);
+}
+
+BOOL util_findInArr(int32_t tempArr[], int seqNo, int32_t noOfSeqs)
+{
+ int index;
+
+ for(index = 0; index < noOfSeqs; index++)
+ {
+ if(tempArr[index] == seqNo) return TRUE;
+ }
+
+ return FALSE;
+}
+
+
+/**
+ *
+ * String handling utilities
+ *
+ **/
+
+/*
+ * Function Name: str_chopAtDelim(char *dest, char *src, char *delim, BOOL includeDelim)
+ * Description: chops the string startig from source to the delimeter specified.
+ */
+char *str_chopAtDelim(char *dest, char *src, char *delim, BOOL includeDelim)
+{
+ char *temp;
+ int32_t len;
+
+ /* returns a pointer to the first occurance of delim in src*/
+ temp = strstr(src, delim);
+
+ if (temp == NULL)
+ return NULL;
+
+ if (includeDelim)
+ {
+ /* temp - src + strlen(delim) -> a string between src and delimeter including delimeter*/
+ len = temp - src + strlen(delim);
+ strncpy(dest, src, len);
+ }
+ else
+ {
+ len = temp - src;
+ strncpy(dest, src, temp - src);
+ }
+ dest[len] = '\0';
+
+ return dest;
+}
+
+/*
+ * Function Name: str_sepNameValue(char *name, char *value, char *src, char *delim)
+ * Description: .
+ */
+void str_sepNameValue(char *name, char *value, char *src, char *delim)
+{
+ char *temp;
+
+ temp = strstr(src, delim);
+
+ if(temp != NULL)
+ {
+ strncpy(name, src, temp - src);
+ strcpy(value, temp + strlen(delim));
+ }
+ else
+ {
+ strcpy(name, src);
+ strcpy(value, "");
+ }
+}
+
+/*
+ * Function Name: str_removeSpaces(char *src)
+ * Description: Removes the spaces from the start and end of the string.
+ */
+int str_isSpace (char ch)
+{
+ switch (ch)
+ {
+ case ' ':
+ case '\t':
+ case '\n':
+ return 1;
+ }
+ return 0;
+}
+
+void str_removeSpaces(char *src)
+{
+ int32_t start = 0, end = strlen(src) - 1;
+ int32_t index = 0;
+
+ if (src == NULL || end < 0) return;
+
+ while(str_isSpace(src[start]) && start < end) start++;
+ while(str_isSpace(src[end]) && end > start) end--;
+
+ if ( start == end && src[start] == ' ')
+ {
+ src[0] = '\0';
+ return;
+ }
+ if (start > 0)
+ {
+ while(start <= end)
+ {
+ src[index] = src[start];
+ index++;
+ start++;
+ }
+ src[index] = '\0';
+ return;
+ }
+ src[end+1] = '\0';
+}
+
+/*
+ * Function Name: str_strrstr(char *src, char *delim)
+ * Description: Searches the position of last occurence of string delim in src.
+ */
+char *str_strrstr(char *src, char *delim)
+{
+ char *last, *next;
+ next = strstr(src, delim);
+ last = next;
+ while(next != NULL)
+ {
+ last = next;
+ next = strstr(last + 1, delim);
+ }
+ return last;
+}
+
+
+void* getA16Address(int size)
+{
+ void* address;
+ address = (void*) malloc(size);
+ while ((((long long unsigned int) (address))%16) != 0)
+ address++;
+ return(address);
+}
+
+
+void** reallocA16Address(void** address, int size)
+{
+ if (*(address) == NULL)
+ *(address) = malloc(size);
+ *(address) = realloc(address, size);
+ while ((((long long unsigned int) (*(address)))%16) != 0)
+ (*(address))++;
+ return(address);
+}
+
+
+
+
+
+
+
+
+
+
diff --git a/sumalibs/libutils/utilities.h b/sumalibs/libutils/utilities.h
new file mode 100644
index 0000000..36138f9
--- /dev/null
+++ b/sumalibs/libutils/utilities.h
@@ -0,0 +1,56 @@
+/**
+ * FileName: utilities.h
+ * Author: Tiayyba Riaz
+ * Description: Header file for miscellenious functions and macros
+ * **/
+
+#ifndef UTILITIES_H_
+#define UTILITIES_H_
+
+#include <stdint.h>
+#include <stdio.h>
+#include <time.h>
+
+
+
+//static char *basecodes = "00100020000000000003000000";
+
+//#define BASEIDX(ch) basecodes[ch - 'a'] - 48
+
+#ifndef MAX
+#define MAX(x,y) (((x)>(y)) ? (x):(y))
+#define MIN(x,y) (((x)<(y)) ? (x):(y))
+#endif
+
+typedef char BOOL;
+#define TRUE (3==3)
+#define FALSE (!TRUE)
+#define ALILEN (0)
+#define MAXLEN (1)
+#define MINLEN (2)
+
+
+/* Error Codes */
+#define FILE_OPENING_ERROR (1)
+#define MEM_ALLOC_ERROR (2)
+
+/* Prototypes */
+void errorAbort(int32_t code, char* errorMsg, char* fileName, int32_t lineNumber);
+char *str_strrstr(char *src, char *delim);
+void str_removeSpaces(char *src);
+void str_sepNameValue(char *name, char *value, char *src, char *delim);
+char *str_chopAtDelim(char *dest, char *src, char *delim, BOOL includeDelim);
+void util_free(void *chunk);
+void *util_realloc(void *chunk, size_t newsize, const char *filename, int32_t line);
+void *util_malloc(size_t chunksize, const char *filename, int32_t line);
+BOOL util_findInArr(int32_t tempArr[], int seqNo, int32_t noOfSeqs);
+void* getA16Address(int size);
+void** reallocA16Address(void** address, int size);
+
+/* Macros */
+#define ERRORABORT(code, msg) errorAbort((code), (msg), __FILE__, __LINE__)
+
+#endif /*UTILITIES_H_*/
+
+
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/sumaclust.git
More information about the debian-med-commit
mailing list