[med-svn] [ecopcr] 01/05: Imported Upstream version 0.5.0
Andreas Tille
tille at debian.org
Wed Jun 24 17:35:01 UTC 2015
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository ecopcr.
commit dfe3c335f97d1f076bee19ac5110ed2340e34583
Author: Andreas Tille <tille at debian.org>
Date: Wed Jun 24 16:55:17 2015 +0200
Imported Upstream version 0.5.0
---
Licence_CeCILL_V2-en.txt | 506 +++++++++++++++++
Licence_CeCILL_V2-fr.txt | 512 +++++++++++++++++
VERSION | 1 +
doxyfile | 1237 +++++++++++++++++++++++++++++++++++++++++
src/Makefile | 103 ++++
src/ecofind.c | 373 +++++++++++++
src/ecogrep.c | 403 ++++++++++++++
src/ecoisundertaxon.c | 123 ++++
src/ecopcr.c | 695 +++++++++++++++++++++++
src/global.mk | 18 +
src/libapat/CODES/dft_code.h | 14 +
src/libapat/CODES/dna_code.h | 71 +++
src/libapat/CODES/prot_code.h | 51 ++
src/libapat/Gmach.h | 97 ++++
src/libapat/Gtypes.h | 104 ++++
src/libapat/Makefile | 24 +
src/libapat/apat.h | 173 ++++++
src/libapat/apat_parse.c | 369 ++++++++++++
src/libapat/apat_search.c | 339 +++++++++++
src/libapat/libstki.c | 379 +++++++++++++
src/libapat/libstki.h | 87 +++
src/libecoPCR/Makefile | 31 ++
src/libecoPCR/ecoError.c | 26 +
src/libecoPCR/ecoIOUtils.c | 122 ++++
src/libecoPCR/ecoMalloc.c | 79 +++
src/libecoPCR/ecoPCR.h | 269 +++++++++
src/libecoPCR/ecoapat.c | 199 +++++++
src/libecoPCR/ecodna.c | 156 ++++++
src/libecoPCR/ecofilter.c | 20 +
src/libecoPCR/econame.c | 61 ++
src/libecoPCR/ecorank.c | 52 ++
src/libecoPCR/ecoseq.c | 230 ++++++++
src/libecoPCR/ecotax.c | 351 ++++++++++++
src/libthermo/Makefile | 22 +
src/libthermo/nnparams.c | 619 +++++++++++++++++++++
src/libthermo/nnparams.h | 63 +++
tests/ecodb.rdx | Bin 0 -> 354 bytes
tests/ecodb.tdx | Bin 0 -> 10514540 bytes
tests/ecodb_001.sdx | Bin 0 -> 26051477 bytes
tools/ecoPCRFilter.py | 303 ++++++++++
tools/ecoPCRFormat.py | 651 ++++++++++++++++++++++
tools/ecoSort.py | 811 +++++++++++++++++++++++++++
42 files changed, 9744 insertions(+)
diff --git a/Licence_CeCILL_V2-en.txt b/Licence_CeCILL_V2-en.txt
new file mode 100644
index 0000000..fcc8df2
--- /dev/null
+++ b/Licence_CeCILL_V2-en.txt
@@ -0,0 +1,506 @@
+
+CeCILL FREE SOFTWARE LICENSE AGREEMENT
+
+
+ Notice
+
+This Agreement is a Free Software license agreement that is the result
+of discussions between its authors in order to ensure compliance with
+the two main principles guiding its drafting:
+
+ * firstly, compliance with the principles governing the distribution
+ of Free Software: access to source code, broad rights granted to
+ users,
+ * secondly, the election of a governing law, French law, with which
+ it is conformant, both as regards the law of torts and
+ intellectual property law, and the protection that it offers to
+ both authors and holders of the economic rights over software.
+
+The authors of the CeCILL (for Ce[a] C[nrs] I[nria] L[ogiciel] L[ibre])
+license are:
+
+Commissariat � l'Energie Atomique - CEA, a public scientific, technical
+and industrial research establishment, having its principal place of
+business at 25 rue Leblanc, immeuble Le Ponant D, 75015 Paris, France.
+
+Centre National de la Recherche Scientifique - CNRS, a public scientific
+and technological establishment, having its principal place of business
+at 3 rue Michel-Ange, 75794 Paris cedex 16, France.
+
+Institut National de Recherche en Informatique et en Automatique -
+INRIA, a public scientific and technological establishment, having its
+principal place of business at Domaine de Voluceau, Rocquencourt, BP
+105, 78153 Le Chesnay cedex, France.
+
+
+ Preamble
+
+The purpose of this Free Software license agreement is to grant users
+the right to modify and redistribute the software governed by this
+license within the framework of an open source distribution model.
+
+The exercising of these rights is conditional upon certain obligations
+for users so as to preserve this status for all subsequent redistributions.
+
+In consideration of access to the source code and the rights to copy,
+modify and redistribute granted by the license, users are provided only
+with a limited warranty and the software's author, the holder of the
+economic rights, and the successive licensors only have limited liability.
+
+In this respect, the risks associated with loading, using, modifying
+and/or developing or reproducing the software by the user are brought to
+the user's attention, given its Free Software status, which may make it
+complicated to use, with the result that its use is reserved for
+developers and experienced professionals having in-depth computer
+knowledge. Users are therefore encouraged to load and test the
+suitability of the software as regards their requirements in conditions
+enabling the security of their systems and/or data to be ensured and,
+more generally, to use and operate it in the same conditions of
+security. This Agreement may be freely reproduced and published,
+provided it is not altered, and that no provisions are either added or
+removed herefrom.
+
+This Agreement may apply to any or all software for which the holder of
+the economic rights decides to submit the use thereof to its provisions.
+
+
+ Article 1 - DEFINITIONS
+
+For the purpose of this Agreement, when the following expressions
+commence with a capital letter, they shall have the following meaning:
+
+Agreement: means this license agreement, and its possible subsequent
+versions and annexes.
+
+Software: means the software in its Object Code and/or Source Code form
+and, where applicable, its documentation, "as is" when the Licensee
+accepts the Agreement.
+
+Initial Software: means the Software in its Source Code and possibly its
+Object Code form and, where applicable, its documentation, "as is" when
+it is first distributed under the terms and conditions of the Agreement.
+
+Modified Software: means the Software modified by at least one
+Contribution.
+
+Source Code: means all the Software's instructions and program lines to
+which access is required so as to modify the Software.
+
+Object Code: means the binary files originating from the compilation of
+the Source Code.
+
+Holder: means the holder(s) of the economic rights over the Initial
+Software.
+
+Licensee: means the Software user(s) having accepted the Agreement.
+
+Contributor: means a Licensee having made at least one Contribution.
+
+Licensor: means the Holder, or any other individual or legal entity, who
+distributes the Software under the Agreement.
+
+Contribution: means any or all modifications, corrections, translations,
+adaptations and/or new functions integrated into the Software by any or
+all Contributors, as well as any or all Internal Modules.
+
+Module: means a set of sources files including their documentation that
+enables supplementary functions or services in addition to those offered
+by the Software.
+
+External Module: means any or all Modules, not derived from the
+Software, so that this Module and the Software run in separate address
+spaces, with one calling the other when they are run.
+
+Internal Module: means any or all Module, connected to the Software so
+that they both execute in the same address space.
+
+GNU GPL: means the GNU General Public License version 2 or any
+subsequent version, as published by the Free Software Foundation Inc.
+
+Parties: mean both the Licensee and the Licensor.
+
+These expressions may be used both in singular and plural form.
+
+
+ Article 2 - PURPOSE
+
+The purpose of the Agreement is the grant by the Licensor to the
+Licensee of a non-exclusive, transferable and worldwide license for the
+Software as set forth in Article 5 hereinafter for the whole term of the
+protection granted by the rights over said Software.
+
+
+ Article 3 - ACCEPTANCE
+
+3.1 The Licensee shall be deemed as having accepted the terms and
+conditions of this Agreement upon the occurrence of the first of the
+following events:
+
+ * (i) loading the Software by any or all means, notably, by
+ downloading from a remote server, or by loading from a physical
+ medium;
+ * (ii) the first time the Licensee exercises any of the rights
+ granted hereunder.
+
+3.2 One copy of the Agreement, containing a notice relating to the
+characteristics of the Software, to the limited warranty, and to the
+fact that its use is restricted to experienced users has been provided
+to the Licensee prior to its acceptance as set forth in Article 3.1
+hereinabove, and the Licensee hereby acknowledges that it has read and
+understood it.
+
+
+ Article 4 - EFFECTIVE DATE AND TERM
+
+
+ 4.1 EFFECTIVE DATE
+
+The Agreement shall become effective on the date when it is accepted by
+the Licensee as set forth in Article 3.1.
+
+
+ 4.2 TERM
+
+The Agreement shall remain in force for the entire legal term of
+protection of the economic rights over the Software.
+
+
+ Article 5 - SCOPE OF RIGHTS GRANTED
+
+The Licensor hereby grants to the Licensee, who accepts, the following
+rights over the Software for any or all use, and for the term of the
+Agreement, on the basis of the terms and conditions set forth hereinafter.
+
+Besides, if the Licensor owns or comes to own one or more patents
+protecting all or part of the functions of the Software or of its
+components, the Licensor undertakes not to enforce the rights granted by
+these patents against successive Licensees using, exploiting or
+modifying the Software. If these patents are transferred, the Licensor
+undertakes to have the transferees subscribe to the obligations set
+forth in this paragraph.
+
+
+ 5.1 RIGHT OF USE
+
+The Licensee is authorized to use the Software, without any limitation
+as to its fields of application, with it being hereinafter specified
+that this comprises:
+
+ 1. permanent or temporary reproduction of all or part of the Software
+ by any or all means and in any or all form.
+
+ 2. loading, displaying, running, or storing the Software on any or
+ all medium.
+
+ 3. entitlement to observe, study or test its operation so as to
+ determine the ideas and principles behind any or all constituent
+ elements of said Software. This shall apply when the Licensee
+ carries out any or all loading, displaying, running, transmission
+ or storage operation as regards the Software, that it is entitled
+ to carry out hereunder.
+
+
+ 5.2 ENTITLEMENT TO MAKE CONTRIBUTIONS
+
+The right to make Contributions includes the right to translate, adapt,
+arrange, or make any or all modifications to the Software, and the right
+to reproduce the resulting software.
+
+The Licensee is authorized to make any or all Contributions to the
+Software provided that it includes an explicit notice that it is the
+author of said Contribution and indicates the date of the creation thereof.
+
+
+ 5.3 RIGHT OF DISTRIBUTION
+
+In particular, the right of distribution includes the right to publish,
+transmit and communicate the Software to the general public on any or
+all medium, and by any or all means, and the right to market, either in
+consideration of a fee, or free of charge, one or more copies of the
+Software by any means.
+
+The Licensee is further authorized to distribute copies of the modified
+or unmodified Software to third parties according to the terms and
+conditions set forth hereinafter.
+
+
+ 5.3.1 DISTRIBUTION OF SOFTWARE WITHOUT MODIFICATION
+
+The Licensee is authorized to distribute true copies of the Software in
+Source Code or Object Code form, provided that said distribution
+complies with all the provisions of the Agreement and is accompanied by:
+
+ 1. a copy of the Agreement,
+
+ 2. a notice relating to the limitation of both the Licensor's
+ warranty and liability as set forth in Articles 8 and 9,
+
+and that, in the event that only the Object Code of the Software is
+redistributed, the Licensee allows future Licensees unhindered access to
+the full Source Code of the Software by indicating how to access it, it
+being understood that the additional cost of acquiring the Source Code
+shall not exceed the cost of transferring the data.
+
+
+ 5.3.2 DISTRIBUTION OF MODIFIED SOFTWARE
+
+When the Licensee makes a Contribution to the Software, the terms and
+conditions for the distribution of the resulting Modified Software
+become subject to all the provisions of this Agreement.
+
+The Licensee is authorized to distribute the Modified Software, in
+source code or object code form, provided that said distribution
+complies with all the provisions of the Agreement and is accompanied by:
+
+ 1. a copy of the Agreement,
+
+ 2. a notice relating to the limitation of both the Licensor's
+ warranty and liability as set forth in Articles 8 and 9,
+
+and that, in the event that only the object code of the Modified
+Software is redistributed, the Licensee allows future Licensees
+unhindered access to the full source code of the Modified Software by
+indicating how to access it, it being understood that the additional
+cost of acquiring the source code shall not exceed the cost of
+transferring the data.
+
+
+ 5.3.3 DISTRIBUTION OF EXTERNAL MODULES
+
+When the Licensee has developed an External Module, the terms and
+conditions of this Agreement do not apply to said External Module, that
+may be distributed under a separate license agreement.
+
+
+ 5.3.4 COMPATIBILITY WITH THE GNU GPL
+
+The Licensee can include a code that is subject to the provisions of one
+of the versions of the GNU GPL in the Modified or unmodified Software,
+and distribute that entire code under the terms of the same version of
+the GNU GPL.
+
+The Licensee can include the Modified or unmodified Software in a code
+that is subject to the provisions of one of the versions of the GNU GPL,
+and distribute that entire code under the terms of the same version of
+the GNU GPL.
+
+
+ Article 6 - INTELLECTUAL PROPERTY
+
+
+ 6.1 OVER THE INITIAL SOFTWARE
+
+The Holder owns the economic rights over the Initial Software. Any or
+all use of the Initial Software is subject to compliance with the terms
+and conditions under which the Holder has elected to distribute its work
+and no one shall be entitled to modify the terms and conditions for the
+distribution of said Initial Software.
+
+The Holder undertakes that the Initial Software will remain ruled at
+least by this Agreement, for the duration set forth in Article 4.2.
+
+
+ 6.2 OVER THE CONTRIBUTIONS
+
+The Licensee who develops a Contribution is the owner of the
+intellectual property rights over this Contribution as defined by
+applicable law.
+
+
+ 6.3 OVER THE EXTERNAL MODULES
+
+The Licensee who develops an External Module is the owner of the
+intellectual property rights over this External Module as defined by
+applicable law and is free to choose the type of agreement that shall
+govern its distribution.
+
+
+ 6.4 JOINT PROVISIONS
+
+The Licensee expressly undertakes:
+
+ 1. not to remove, or modify, in any manner, the intellectual property
+ notices attached to the Software;
+
+ 2. to reproduce said notices, in an identical manner, in the copies
+ of the Software modified or not.
+
+The Licensee undertakes not to directly or indirectly infringe the
+intellectual property rights of the Holder and/or Contributors on the
+Software and to take, where applicable, vis-�-vis its staff, any and all
+measures required to ensure respect of said intellectual property rights
+of the Holder and/or Contributors.
+
+
+ Article 7 - RELATED SERVICES
+
+7.1 Under no circumstances shall the Agreement oblige the Licensor to
+provide technical assistance or maintenance services for the Software.
+
+However, the Licensor is entitled to offer this type of services. The
+terms and conditions of such technical assistance, and/or such
+maintenance, shall be set forth in a separate instrument. Only the
+Licensor offering said maintenance and/or technical assistance services
+shall incur liability therefor.
+
+7.2 Similarly, any Licensor is entitled to offer to its licensees, under
+its sole responsibility, a warranty, that shall only be binding upon
+itself, for the redistribution of the Software and/or the Modified
+Software, under terms and conditions that it is free to decide. Said
+warranty, and the financial terms and conditions of its application,
+shall be subject of a separate instrument executed between the Licensor
+and the Licensee.
+
+
+ Article 8 - LIABILITY
+
+8.1 Subject to the provisions of Article 8.2, the Licensee shall be
+entitled to claim compensation for any direct loss it may have suffered
+from the Software as a result of a fault on the part of the relevant
+Licensor, subject to providing evidence thereof.
+
+8.2 The Licensor's liability is limited to the commitments made under
+this Agreement and shall not be incurred as a result of in particular:
+(i) loss due the Licensee's total or partial failure to fulfill its
+obligations, (ii) direct or consequential loss that is suffered by the
+Licensee due to the use or performance of the Software, and (iii) more
+generally, any consequential loss. In particular the Parties expressly
+agree that any or all pecuniary or business loss (i.e. loss of data,
+loss of profits, operating loss, loss of customers or orders,
+opportunity cost, any disturbance to business activities) or any or all
+legal proceedings instituted against the Licensee by a third party,
+shall constitute consequential loss and shall not provide entitlement to
+any or all compensation from the Licensor.
+
+
+ Article 9 - WARRANTY
+
+9.1 The Licensee acknowledges that the scientific and technical
+state-of-the-art when the Software was distributed did not enable all
+possible uses to be tested and verified, nor for the presence of
+possible defects to be detected. In this respect, the Licensee's
+attention has been drawn to the risks associated with loading, using,
+modifying and/or developing and reproducing the Software which are
+reserved for experienced users.
+
+The Licensee shall be responsible for verifying, by any or all means,
+the suitability of the product for its requirements, its good working
+order, and for ensuring that it shall not cause damage to either persons
+or properties.
+
+9.2 The Licensor hereby represents, in good faith, that it is entitled
+to grant all the rights over the Software (including in particular the
+rights set forth in Article 5).
+
+9.3 The Licensee acknowledges that the Software is supplied "as is" by
+the Licensor without any other express or tacit warranty, other than
+that provided for in Article 9.2 and, in particular, without any warranty
+as to its commercial value, its secured, safe, innovative or relevant
+nature.
+
+Specifically, the Licensor does not warrant that the Software is free
+from any error, that it will operate without interruption, that it will
+be compatible with the Licensee's own equipment and software
+configuration, nor that it will meet the Licensee's requirements.
+
+9.4 The Licensor does not either expressly or tacitly warrant that the
+Software does not infringe any third party intellectual property right
+relating to a patent, software or any other property right. Therefore,
+the Licensor disclaims any and all liability towards the Licensee
+arising out of any or all proceedings for infringement that may be
+instituted in respect of the use, modification and redistribution of the
+Software. Nevertheless, should such proceedings be instituted against
+the Licensee, the Licensor shall provide it with technical and legal
+assistance for its defense. Such technical and legal assistance shall be
+decided on a case-by-case basis between the relevant Licensor and the
+Licensee pursuant to a memorandum of understanding. The Licensor
+disclaims any and all liability as regards the Licensee's use of the
+name of the Software. No warranty is given as regards the existence of
+prior rights over the name of the Software or as regards the existence
+of a trademark.
+
+
+ Article 10 - TERMINATION
+
+10.1 In the event of a breach by the Licensee of its obligations
+hereunder, the Licensor may automatically terminate this Agreement
+thirty (30) days after notice has been sent to the Licensee and has
+remained ineffective.
+
+10.2 A Licensee whose Agreement is terminated shall no longer be
+authorized to use, modify or distribute the Software. However, any
+licenses that it may have granted prior to termination of the Agreement
+shall remain valid subject to their having been granted in compliance
+with the terms and conditions hereof.
+
+
+ Article 11 - MISCELLANEOUS
+
+
+ 11.1 EXCUSABLE EVENTS
+
+Neither Party shall be liable for any or all delay, or failure to
+perform the Agreement, that may be attributable to an event of force
+majeure, an act of God or an outside cause, such as defective
+functioning or interruptions of the electricity or telecommunications
+networks, network paralysis following a virus attack, intervention by
+government authorities, natural disasters, water damage, earthquakes,
+fire, explosions, strikes and labor unrest, war, etc.
+
+11.2 Any failure by either Party, on one or more occasions, to invoke
+one or more of the provisions hereof, shall under no circumstances be
+interpreted as being a waiver by the interested Party of its right to
+invoke said provision(s) subsequently.
+
+11.3 The Agreement cancels and replaces any or all previous agreements,
+whether written or oral, between the Parties and having the same
+purpose, and constitutes the entirety of the agreement between said
+Parties concerning said purpose. No supplement or modification to the
+terms and conditions hereof shall be effective as between the Parties
+unless it is made in writing and signed by their duly authorized
+representatives.
+
+11.4 In the event that one or more of the provisions hereof were to
+conflict with a current or future applicable act or legislative text,
+said act or legislative text shall prevail, and the Parties shall make
+the necessary amendments so as to comply with said act or legislative
+text. All other provisions shall remain effective. Similarly, invalidity
+of a provision of the Agreement, for any reason whatsoever, shall not
+cause the Agreement as a whole to be invalid.
+
+
+ 11.5 LANGUAGE
+
+The Agreement is drafted in both French and English and both versions
+are deemed authentic.
+
+
+ Article 12 - NEW VERSIONS OF THE AGREEMENT
+
+12.1 Any person is authorized to duplicate and distribute copies of this
+Agreement.
+
+12.2 So as to ensure coherence, the wording of this Agreement is
+protected and may only be modified by the authors of the License, who
+reserve the right to periodically publish updates or new versions of the
+Agreement, each with a separate number. These subsequent versions may
+address new issues encountered by Free Software.
+
+12.3 Any Software distributed under a given version of the Agreement may
+only be subsequently distributed under the same version of the Agreement
+or a subsequent version, subject to the provisions of Article 5.3.4.
+
+
+ Article 13 - GOVERNING LAW AND JURISDICTION
+
+13.1 The Agreement is governed by French law. The Parties agree to
+endeavor to seek an amicable solution to any disagreements or disputes
+that may arise during the performance of the Agreement.
+
+13.2 Failing an amicable solution within two (2) months as from their
+occurrence, and unless emergency proceedings are necessary, the
+disagreements or disputes shall be referred to the Paris Courts having
+jurisdiction, by the more diligent Party.
+
+
+Version 2.0 dated 2006-09-05.
diff --git a/Licence_CeCILL_V2-fr.txt b/Licence_CeCILL_V2-fr.txt
new file mode 100644
index 0000000..1613fca
--- /dev/null
+++ b/Licence_CeCILL_V2-fr.txt
@@ -0,0 +1,512 @@
+
+CONTRAT DE LICENCE DE LOGICIEL LIBRE CeCILL
+
+
+ Avertissement
+
+Ce contrat est une licence de logiciel libre issue d'une concertation
+entre ses auteurs afin que le respect de deux grands principes pr�side �
+sa r�daction:
+
+ * d'une part, le respect des principes de diffusion des logiciels
+ libres: acc�s au code source, droits �tendus conf�r�s aux
+ utilisateurs,
+ * d'autre part, la d�signation d'un droit applicable, le droit
+ fran�ais, auquel elle est conforme, tant au regard du droit de la
+ responsabilit� civile que du droit de la propri�t� intellectuelle
+ et de la protection qu'il offre aux auteurs et titulaires des
+ droits patrimoniaux sur un logiciel.
+
+Les auteurs de la licence CeCILL (pour Ce[a] C[nrs] I[nria] L[ogiciel]
+L[ibre]) sont:
+
+Commissariat � l'Energie Atomique - CEA, �tablissement public de
+recherche � caract�re scientifique, technique et industriel, dont le
+si�ge est situ� 25 rue Leblanc, immeuble Le Ponant D, 75015 Paris.
+
+Centre National de la Recherche Scientifique - CNRS, �tablissement
+public � caract�re scientifique et technologique, dont le si�ge est
+situ� 3 rue Michel-Ange, 75794 Paris cedex 16.
+
+Institut National de Recherche en Informatique et en Automatique -
+INRIA, �tablissement public � caract�re scientifique et technologique,
+dont le si�ge est situ� Domaine de Voluceau, Rocquencourt, BP 105, 78153
+Le Chesnay cedex.
+
+
+ Pr�ambule
+
+Ce contrat est une licence de logiciel libre dont l'objectif est de
+conf�rer aux utilisateurs la libert� de modification et de
+redistribution du logiciel r�gi par cette licence dans le cadre d'un
+mod�le de diffusion en logiciel libre.
+
+L'exercice de ces libert�s est assorti de certains devoirs � la charge
+des utilisateurs afin de pr�server ce statut au cours des
+redistributions ult�rieures.
+
+L'accessibilit� au code source et les droits de copie, de modification
+et de redistribution qui en d�coulent ont pour contrepartie de n'offrir
+aux utilisateurs qu'une garantie limit�e et de ne faire peser sur
+l'auteur du logiciel, le titulaire des droits patrimoniaux et les
+conc�dants successifs qu'une responsabilit� restreinte.
+
+A cet �gard l'attention de l'utilisateur est attir�e sur les risques
+associ�s au chargement, � l'utilisation, � la modification et/ou au
+d�veloppement et � la reproduction du logiciel par l'utilisateur �tant
+donn� sa sp�cificit� de logiciel libre, qui peut le rendre complexe �
+manipuler et qui le r�serve donc � des d�veloppeurs ou des
+professionnels avertis poss�dant des connaissances informatiques
+approfondies. Les utilisateurs sont donc invit�s � charger et tester
+l'ad�quation du logiciel � leurs besoins dans des conditions permettant
+d'assurer la s�curit� de leurs syst�mes et/ou de leurs donn�es et, plus
+g�n�ralement, � l'utiliser et l'exploiter dans les m�mes conditions de
+s�curit�. Ce contrat peut �tre reproduit et diffus� librement, sous
+r�serve de le conserver en l'�tat, sans ajout ni suppression de clauses.
+
+Ce contrat est susceptible de s'appliquer � tout logiciel dont le
+titulaire des droits patrimoniaux d�cide de soumettre l'exploitation aux
+dispositions qu'il contient.
+
+
+ Article 1 - DEFINITIONS
+
+Dans ce contrat, les termes suivants, lorsqu'ils seront �crits avec une
+lettre capitale, auront la signification suivante:
+
+Contrat: d�signe le pr�sent contrat de licence, ses �ventuelles versions
+post�rieures et annexes.
+
+Logiciel: d�signe le logiciel sous sa forme de Code Objet et/ou de Code
+Source et le cas �ch�ant sa documentation, dans leur �tat au moment de
+l'acceptation du Contrat par le Licenci�.
+
+Logiciel Initial: d�signe le Logiciel sous sa forme de Code Source et
+�ventuellement de Code Objet et le cas �ch�ant sa documentation, dans
+leur �tat au moment de leur premi�re diffusion sous les termes du Contrat.
+
+Logiciel Modifi�: d�signe le Logiciel modifi� par au moins une
+Contribution.
+
+Code Source: d�signe l'ensemble des instructions et des lignes de
+programme du Logiciel et auquel l'acc�s est n�cessaire en vue de
+modifier le Logiciel.
+
+Code Objet: d�signe les fichiers binaires issus de la compilation du
+Code Source.
+
+Titulaire: d�signe le ou les d�tenteurs des droits patrimoniaux d'auteur
+sur le Logiciel Initial.
+
+Licenci�: d�signe le ou les utilisateurs du Logiciel ayant accept� le
+Contrat.
+
+Contributeur: d�signe le Licenci� auteur d'au moins une Contribution.
+
+Conc�dant: d�signe le Titulaire ou toute personne physique ou morale
+distribuant le Logiciel sous le Contrat.
+
+Contribution: d�signe l'ensemble des modifications, corrections,
+traductions, adaptations et/ou nouvelles fonctionnalit�s int�gr�es dans
+le Logiciel par tout Contributeur, ainsi que tout Module Interne.
+
+Module: d�signe un ensemble de fichiers sources y compris leur
+documentation qui permet de r�aliser des fonctionnalit�s ou services
+suppl�mentaires � ceux fournis par le Logiciel.
+
+Module Externe: d�signe tout Module, non d�riv� du Logiciel, tel que ce
+Module et le Logiciel s'ex�cutent dans des espaces d'adressage
+diff�rents, l'un appelant l'autre au moment de leur ex�cution.
+
+Module Interne: d�signe tout Module li� au Logiciel de telle sorte
+qu'ils s'ex�cutent dans le m�me espace d'adressage.
+
+GNU GPL: d�signe la GNU General Public License dans sa version 2 ou
+toute version ult�rieure, telle que publi�e par Free Software Foundation
+Inc.
+
+Parties: d�signe collectivement le Licenci� et le Conc�dant.
+
+Ces termes s'entendent au singulier comme au pluriel.
+
+
+ Article 2 - OBJET
+
+Le Contrat a pour objet la concession par le Conc�dant au Licenci� d'une
+licence non exclusive, cessible et mondiale du Logiciel telle que
+d�finie ci-apr�s � l'article 5 pour toute la dur�e de protection des droits
+portant sur ce Logiciel.
+
+
+ Article 3 - ACCEPTATION
+
+3.1 L'acceptation par le Licenci� des termes du Contrat est r�put�e
+acquise du fait du premier des faits suivants:
+
+ * (i) le chargement du Logiciel par tout moyen notamment par
+ t�l�chargement � partir d'un serveur distant ou par chargement �
+ partir d'un support physique;
+ * (ii) le premier exercice par le Licenci� de l'un quelconque des
+ droits conc�d�s par le Contrat.
+
+3.2 Un exemplaire du Contrat, contenant notamment un avertissement
+relatif aux sp�cificit�s du Logiciel, � la restriction de garantie et �
+la limitation � un usage par des utilisateurs exp�riment�s a �t� mis �
+disposition du Licenci� pr�alablement � son acceptation telle que
+d�finie � l'article 3.1 ci dessus et le Licenci� reconna�t en avoir pris
+connaissance.
+
+
+ Article 4 - ENTREE EN VIGUEUR ET DUREE
+
+
+ 4.1 ENTREE EN VIGUEUR
+
+Le Contrat entre en vigueur � la date de son acceptation par le Licenci�
+telle que d�finie en 3.1.
+
+
+ 4.2 DUREE
+
+Le Contrat produira ses effets pendant toute la dur�e l�gale de
+protection des droits patrimoniaux portant sur le Logiciel.
+
+
+ Article 5 - ETENDUE DES DROITS CONCEDES
+
+Le Conc�dant conc�de au Licenci�, qui accepte, les droits suivants sur
+le Logiciel pour toutes destinations et pour la dur�e du Contrat dans
+les conditions ci-apr�s d�taill�es.
+
+Par ailleurs, si le Conc�dant d�tient ou venait � d�tenir un ou
+plusieurs brevets d'invention prot�geant tout ou partie des
+fonctionnalit�s du Logiciel ou de ses composants, il s'engage � ne pas
+opposer les �ventuels droits conf�r�s par ces brevets aux Licenci�s
+successifs qui utiliseraient, exploiteraient ou modifieraient le
+Logiciel. En cas de cession de ces brevets, le Conc�dant s'engage �
+faire reprendre les obligations du pr�sent alin�a aux cessionnaires.
+
+
+ 5.1 DROIT D'UTILISATION
+
+Le Licenci� est autoris� � utiliser le Logiciel, sans restriction quant
+aux domaines d'application, �tant ci-apr�s pr�cis� que cela comporte:
+
+ 1. la reproduction permanente ou provisoire du Logiciel en tout ou
+ partie par tout moyen et sous toute forme.
+
+ 2. le chargement, l'affichage, l'ex�cution, ou le stockage du
+ Logiciel sur tout support.
+
+ 3. la possibilit� d'en observer, d'en �tudier, ou d'en tester le
+ fonctionnement afin de d�terminer les id�es et principes qui sont
+ � la base de n'importe quel �l�ment de ce Logiciel; et ceci,
+ lorsque le Licenci� effectue toute op�ration de chargement,
+ d'affichage, d'ex�cution, de transmission ou de stockage du
+ Logiciel qu'il est en droit d'effectuer en vertu du Contrat.
+
+
+ 5.2 DROIT D'APPORTER DES CONTRIBUTIONS
+
+Le droit d'apporter des Contributions comporte le droit de traduire,
+d'adapter, d'arranger ou d'apporter toute autre modification au Logiciel
+et le droit de reproduire le logiciel en r�sultant.
+
+Le Licenci� est autoris� � apporter toute Contribution au Logiciel sous
+r�serve de mentionner, de fa�on explicite, son nom en tant qu'auteur de
+cette Contribution et la date de cr�ation de celle-ci.
+
+
+ 5.3 DROIT DE DISTRIBUTION
+
+Le droit de distribution comporte notamment le droit de diffuser, de
+transmettre et de communiquer le Logiciel au public sur tout support et
+par tout moyen ainsi que le droit de mettre sur le march� � titre
+on�reux ou gratuit, un ou des exemplaires du Logiciel par tout proc�d�.
+
+Le Licenci� est autoris� � distribuer des copies du Logiciel, modifi� ou
+non, � des tiers dans les conditions ci-apr�s d�taill�es.
+
+
+ 5.3.1 DISTRIBUTION DU LOGICIEL SANS MODIFICATION
+
+Le Licenci� est autoris� � distribuer des copies conformes du Logiciel,
+sous forme de Code Source ou de Code Objet, � condition que cette
+distribution respecte les dispositions du Contrat dans leur totalit� et
+soit accompagn�e:
+
+ 1. d'un exemplaire du Contrat,
+
+ 2. d'un avertissement relatif � la restriction de garantie et de
+ responsabilit� du Conc�dant telle que pr�vue aux articles 8
+ et 9,
+
+et que, dans le cas o� seul le Code Objet du Logiciel est redistribu�,
+le Licenci� permette aux futurs Licenci�s d'acc�der facilement au Code
+Source complet du Logiciel en indiquant les modalit�s d'acc�s, �tant
+entendu que le co�t additionnel d'acquisition du Code Source ne devra
+pas exc�der le simple co�t de transfert des donn�es.
+
+
+ 5.3.2 DISTRIBUTION DU LOGICIEL MODIFIE
+
+Lorsque le Licenci� apporte une Contribution au Logiciel, les conditions
+de distribution du Logiciel Modifi� en r�sultant sont alors soumises �
+l'int�gralit� des dispositions du Contrat.
+
+Le Licenci� est autoris� � distribuer le Logiciel Modifi�, sous forme de
+code source ou de code objet, � condition que cette distribution
+respecte les dispositions du Contrat dans leur totalit� et soit
+accompagn�e:
+
+ 1. d'un exemplaire du Contrat,
+
+ 2. d'un avertissement relatif � la restriction de garantie et de
+ responsabilit� du Conc�dant telle que pr�vue aux articles 8
+ et 9,
+
+et que, dans le cas o� seul le code objet du Logiciel Modifi� est
+redistribu�, le Licenci� permette aux futurs Licenci�s d'acc�der
+facilement au code source complet du Logiciel Modifi� en indiquant les
+modalit�s d'acc�s, �tant entendu que le co�t additionnel d'acquisition
+du code source ne devra pas exc�der le simple co�t de transfert des donn�es.
+
+
+ 5.3.3 DISTRIBUTION DES MODULES EXTERNES
+
+Lorsque le Licenci� a d�velopp� un Module Externe les conditions du
+Contrat ne s'appliquent pas � ce Module Externe, qui peut �tre distribu�
+sous un contrat de licence diff�rent.
+
+
+ 5.3.4 COMPATIBILITE AVEC LA LICENCE GNU GPL
+
+Le Licenci� peut inclure un code soumis aux dispositions d'une des
+versions de la licence GNU GPL dans le Logiciel modifi� ou non et
+distribuer l'ensemble sous les conditions de la m�me version de la
+licence GNU GPL.
+
+Le Licenci� peut inclure le Logiciel modifi� ou non dans un code soumis
+aux dispositions d'une des versions de la licence GNU GPL et distribuer
+l'ensemble sous les conditions de la m�me version de la licence GNU GPL.
+
+
+ Article 6 - PROPRIETE INTELLECTUELLE
+
+
+ 6.1 SUR LE LOGICIEL INITIAL
+
+Le Titulaire est d�tenteur des droits patrimoniaux sur le Logiciel
+Initial. Toute utilisation du Logiciel Initial est soumise au respect
+des conditions dans lesquelles le Titulaire a choisi de diffuser son
+oeuvre et nul autre n'a la facult� de modifier les conditions de
+diffusion de ce Logiciel Initial.
+
+Le Titulaire s'engage � ce que le Logiciel Initial reste au moins r�gi
+par le Contrat et ce, pour la dur�e vis�e � l'article 4.2.
+
+
+ 6.2 SUR LES CONTRIBUTIONS
+
+Le Licenci� qui a d�velopp� une Contribution est titulaire sur celle-ci
+des droits de propri�t� intellectuelle dans les conditions d�finies par
+la l�gislation applicable.
+
+
+ 6.3 SUR LES MODULES EXTERNES
+
+Le Licenci� qui a d�velopp� un Module Externe est titulaire sur celui-ci
+des droits de propri�t� intellectuelle dans les conditions d�finies par
+la l�gislation applicable et reste libre du choix du contrat r�gissant
+sa diffusion.
+
+
+ 6.4 DISPOSITIONS COMMUNES
+
+Le Licenci� s'engage express�ment:
+
+ 1. � ne pas supprimer ou modifier de quelque mani�re que ce soit les
+ mentions de propri�t� intellectuelle appos�es sur le Logiciel;
+
+ 2. � reproduire � l'identique lesdites mentions de propri�t�
+ intellectuelle sur les copies du Logiciel modifi� ou non.
+
+Le Licenci� s'engage � ne pas porter atteinte, directement ou
+indirectement, aux droits de propri�t� intellectuelle du Titulaire et/ou
+des Contributeurs sur le Logiciel et � prendre, le cas �ch�ant, �
+l'�gard de son personnel toutes les mesures n�cessaires pour assurer le
+respect des dits droits de propri�t� intellectuelle du Titulaire et/ou
+des Contributeurs.
+
+
+ Article 7 - SERVICES ASSOCIES
+
+7.1 Le Contrat n'oblige en aucun cas le Conc�dant � la r�alisation de
+prestations d'assistance technique ou de maintenance du Logiciel.
+
+Cependant le Conc�dant reste libre de proposer ce type de services. Les
+termes et conditions d'une telle assistance technique et/ou d'une telle
+maintenance seront alors d�termin�s dans un acte s�par�. Ces actes de
+maintenance et/ou assistance technique n'engageront que la seule
+responsabilit� du Conc�dant qui les propose.
+
+7.2 De m�me, tout Conc�dant est libre de proposer, sous sa seule
+responsabilit�, � ses licenci�s une garantie, qui n'engagera que lui,
+lors de la redistribution du Logiciel et/ou du Logiciel Modifi� et ce,
+dans les conditions qu'il souhaite. Cette garantie et les modalit�s
+financi�res de son application feront l'objet d'un acte s�par� entre le
+Conc�dant et le Licenci�.
+
+
+ Article 8 - RESPONSABILITE
+
+8.1 Sous r�serve des dispositions de l'article 8.2, le Licenci� a la
+facult�, sous r�serve de prouver la faute du Conc�dant concern�, de
+solliciter la r�paration du pr�judice direct qu'il subirait du fait du
+Logiciel et dont il apportera la preuve.
+
+8.2 La responsabilit� du Conc�dant est limit�e aux engagements pris en
+application du Contrat et ne saurait �tre engag�e en raison notamment:
+(i) des dommages dus � l'inex�cution, totale ou partielle, de ses
+obligations par le Licenci�, (ii) des dommages directs ou indirects
+d�coulant de l'utilisation ou des performances du Logiciel subis par le
+Licenci� et (iii) plus g�n�ralement d'un quelconque dommage indirect. En
+particulier, les Parties conviennent express�ment que tout pr�judice
+financier ou commercial (par exemple perte de donn�es, perte de
+b�n�fices, perte d'exploitation, perte de client�le ou de commandes,
+manque � gagner, trouble commercial quelconque) ou toute action dirig�e
+contre le Licenci� par un tiers, constitue un dommage indirect et
+n'ouvre pas droit � r�paration par le Conc�dant.
+
+
+ Article 9 - GARANTIE
+
+9.1 Le Licenci� reconna�t que l'�tat actuel des connaissances
+scientifiques et techniques au moment de la mise en circulation du
+Logiciel ne permet pas d'en tester et d'en v�rifier toutes les
+utilisations ni de d�tecter l'existence d'�ventuels d�fauts. L'attention
+du Licenci� a �t� attir�e sur ce point sur les risques associ�s au
+chargement, � l'utilisation, la modification et/ou au d�veloppement et �
+la reproduction du Logiciel qui sont r�serv�s � des utilisateurs avertis.
+
+Il rel�ve de la responsabilit� du Licenci� de contr�ler, par tous
+moyens, l'ad�quation du produit � ses besoins, son bon fonctionnement et
+de s'assurer qu'il ne causera pas de dommages aux personnes et aux biens.
+
+9.2 Le Conc�dant d�clare de bonne foi �tre en droit de conc�der
+l'ensemble des droits attach�s au Logiciel (comprenant notamment les
+droits vis�s � l'article 5).
+
+9.3 Le Licenci� reconna�t que le Logiciel est fourni "en l'�tat" par le
+Conc�dant sans autre garantie, expresse ou tacite, que celle pr�vue �
+l'article 9.2 et notamment sans aucune garantie sur sa valeur commerciale,
+son caract�re s�curis�, innovant ou pertinent.
+
+En particulier, le Conc�dant ne garantit pas que le Logiciel est exempt
+d'erreur, qu'il fonctionnera sans interruption, qu'il sera compatible
+avec l'�quipement du Licenci� et sa configuration logicielle ni qu'il
+remplira les besoins du Licenci�.
+
+9.4 Le Conc�dant ne garantit pas, de mani�re expresse ou tacite, que le
+Logiciel ne porte pas atteinte � un quelconque droit de propri�t�
+intellectuelle d'un tiers portant sur un brevet, un logiciel ou sur tout
+autre droit de propri�t�. Ainsi, le Conc�dant exclut toute garantie au
+profit du Licenci� contre les actions en contrefa�on qui pourraient �tre
+diligent�es au titre de l'utilisation, de la modification, et de la
+redistribution du Logiciel. N�anmoins, si de telles actions sont
+exerc�es contre le Licenci�, le Conc�dant lui apportera son aide
+technique et juridique pour sa d�fense. Cette aide technique et
+juridique est d�termin�e au cas par cas entre le Conc�dant concern� et
+le Licenci� dans le cadre d'un protocole d'accord. Le Conc�dant d�gage
+toute responsabilit� quant � l'utilisation de la d�nomination du
+Logiciel par le Licenci�. Aucune garantie n'est apport�e quant �
+l'existence de droits ant�rieurs sur le nom du Logiciel et sur
+l'existence d'une marque.
+
+
+ Article 10 - RESILIATION
+
+10.1 En cas de manquement par le Licenci� aux obligations mises � sa
+charge par le Contrat, le Conc�dant pourra r�silier de plein droit le
+Contrat trente (30) jours apr�s notification adress�e au Licenci� et
+rest�e sans effet.
+
+10.2 Le Licenci� dont le Contrat est r�sili� n'est plus autoris� �
+utiliser, modifier ou distribuer le Logiciel. Cependant, toutes les
+licences qu'il aura conc�d�es ant�rieurement � la r�siliation du Contrat
+resteront valides sous r�serve qu'elles aient �t� effectu�es en
+conformit� avec le Contrat.
+
+
+ Article 11 - DISPOSITIONS DIVERSES
+
+
+ 11.1 CAUSE EXTERIEURE
+
+Aucune des Parties ne sera responsable d'un retard ou d'une d�faillance
+d'ex�cution du Contrat qui serait d� � un cas de force majeure, un cas
+fortuit ou une cause ext�rieure, telle que, notamment, le mauvais
+fonctionnement ou les interruptions du r�seau �lectrique ou de
+t�l�communication, la paralysie du r�seau li�e � une attaque
+informatique, l'intervention des autorit�s gouvernementales, les
+catastrophes naturelles, les d�g�ts des eaux, les tremblements de terre,
+le feu, les explosions, les gr�ves et les conflits sociaux, l'�tat de
+guerre...
+
+11.2 Le fait, par l'une ou l'autre des Parties, d'omettre en une ou
+plusieurs occasions de se pr�valoir d'une ou plusieurs dispositions du
+Contrat, ne pourra en aucun cas impliquer renonciation par la Partie
+int�ress�e � s'en pr�valoir ult�rieurement.
+
+11.3 Le Contrat annule et remplace toute convention ant�rieure, �crite
+ou orale, entre les Parties sur le m�me objet et constitue l'accord
+entier entre les Parties sur cet objet. Aucune addition ou modification
+aux termes du Contrat n'aura d'effet � l'�gard des Parties � moins
+d'�tre faite par �crit et sign�e par leurs repr�sentants d�ment habilit�s.
+
+11.4 Dans l'hypoth�se o� une ou plusieurs des dispositions du Contrat
+s'av�rerait contraire � une loi ou � un texte applicable, existants ou
+futurs, cette loi ou ce texte pr�vaudrait, et les Parties feraient les
+amendements n�cessaires pour se conformer � cette loi ou � ce texte.
+Toutes les autres dispositions resteront en vigueur. De m�me, la
+nullit�, pour quelque raison que ce soit, d'une des dispositions du
+Contrat ne saurait entra�ner la nullit� de l'ensemble du Contrat.
+
+
+ 11.5 LANGUE
+
+Le Contrat est r�dig� en langue fran�aise et en langue anglaise, ces
+deux versions faisant �galement foi.
+
+
+ Article 12 - NOUVELLES VERSIONS DU CONTRAT
+
+12.1 Toute personne est autoris�e � copier et distribuer des copies de
+ce Contrat.
+
+12.2 Afin d'en pr�server la coh�rence, le texte du Contrat est prot�g�
+et ne peut �tre modifi� que par les auteurs de la licence, lesquels se
+r�servent le droit de publier p�riodiquement des mises � jour ou de
+nouvelles versions du Contrat, qui poss�deront chacune un num�ro
+distinct. Ces versions ult�rieures seront susceptibles de prendre en
+compte de nouvelles probl�matiques rencontr�es par les logiciels libres.
+
+12.3 Tout Logiciel diffus� sous une version donn�e du Contrat ne pourra
+faire l'objet d'une diffusion ult�rieure que sous la m�me version du
+Contrat ou une version post�rieure, sous r�serve des dispositions de
+l'article 5.3.4.
+
+
+ Article 13 - LOI APPLICABLE ET COMPETENCE TERRITORIALE
+
+13.1 Le Contrat est r�gi par la loi fran�aise. Les Parties conviennent
+de tenter de r�gler � l'amiable les diff�rends ou litiges qui
+viendraient � se produire par suite ou � l'occasion du Contrat.
+
+13.2 A d�faut d'accord amiable dans un d�lai de deux (2) mois � compter
+de leur survenance et sauf situation relevant d'une proc�dure d'urgence,
+les diff�rends ou litiges seront port�s par la Partie la plus diligente
+devant les Tribunaux comp�tents de Paris.
+
+
+Version 2.0 du 2006-09-05.
diff --git a/VERSION b/VERSION
new file mode 100644
index 0000000..8f0916f
--- /dev/null
+++ b/VERSION
@@ -0,0 +1 @@
+0.5.0
diff --git a/doxyfile b/doxyfile
new file mode 100644
index 0000000..84eef52
--- /dev/null
+++ b/doxyfile
@@ -0,0 +1,1237 @@
+# Doxyfile 1.4.6
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+# TAG = value [value, ...]
+# For lists items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
+# by quotes) that should identify the project.
+
+PROJECT_NAME = ecoPCR
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER = 0.1
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY = doc/api
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 4096 sub-directories (in 2 levels) under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of
+# source files, where putting all generated files in the same directory would
+# otherwise cause performance problems for the file system.
+
+CREATE_SUBDIRS = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Brazilian, Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish,
+# Dutch, Finnish, French, German, Greek, Hungarian, Italian, Japanese,
+# Japanese-en (Japanese with English messages), Korean, Korean-en, Norwegian,
+# Polish, Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish,
+# Swedish, and Ukrainian.
+
+OUTPUT_LANGUAGE = English
+
+# This tag can be used to specify the encoding used in the generated output.
+# The encoding is not always determined by the language that is chosen,
+# but also whether or not the output is meant for Windows or non-Windows users.
+# In case there is a difference, setting the USE_WINDOWS_ENCODING tag to YES
+# forces the Windows encoding (this is the default for the Windows binary),
+# whereas setting the tag to NO uses a Unix-style encoding (the default for
+# all platforms other than Windows).
+
+USE_WINDOWS_ENCODING = NO
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is
+# used as the annotated text. Otherwise, the brief description is used as-is.
+# If left blank, the following values are used ("$name" is automatically
+# replaced with the name of the entity): "The $name class" "The $name widget"
+# "The $name file" "is" "provides" "specifies" "contains"
+# "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
+ALWAYS_DETAILED_SEC = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+
+INLINE_INHERITED_MEMB = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES = YES
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip.
+
+STRIP_FROM_PATH =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful is your file systems
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like the Qt-style comments (thus requiring an
+# explicit @brief command for a brief description.
+
+JAVADOC_AUTOBRIEF = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the DETAILS_AT_TOP tag is set to YES then Doxygen
+# will output the detailed description near the top, like JavaDoc.
+# If set to NO, the detailed description appears after the member
+# documentation.
+
+DETAILS_AT_TOP = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+
+INHERIT_DOCS = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
+# a new page for each member. If set to NO, the documentation of a member will
+# be part of the file/class/namespace that contains it.
+
+SEPARATE_MEMBER_PAGES = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE = 8
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
+# sources only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C = YES
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
+# sources only. Doxygen will then generate output that is more tailored for Java.
+# For instance, namespaces will be presented as packages, qualified scopes
+# will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA = NO
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want to
+# include (a tag file for) the STL sources as input, then you should
+# set this tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
+# func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+
+BUILTIN_STL_SUPPORT = NO
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+
+SUBGROUPING = YES
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
+EXTRACT_PRIVATE = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
+EXTRACT_STATIC = NO
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES = YES
+
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS = NO
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+
+CASE_SENSE_NAMES = NO
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES = YES
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
+INLINE_INFO = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
+SORT_MEMBER_DOCS = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+
+SORT_BRIEF_DOCS = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
+GENERATE_BUGLIST = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST = YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or define consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and defines in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES = YES
+
+# If the sources in your project are distributed over multiple directories
+# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
+# in the documentation. The default is NO.
+
+SHOW_DIRECTORIES = NO
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from the
+# version control system). Doxygen will invoke the program by executing (via
+# popen()) the command <command> <input-file>, where <command> is the value of
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
+# provided by doxygen. Whatever the program writes to standard output
+# is used as the file version. See the manual for examples.
+
+FILE_VERSION_FILTER =
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
+WARNINGS = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR = YES
+
+# This WARN_NO_PARAMDOC option can be abled to get warnings for
+# functions that are documented, but have no documentation for their parameters
+# or return value. If set to NO (the default) doxygen will only warn about
+# wrong or incomplete parameter documentation, but not about the absence of
+# documentation.
+
+WARN_NO_PARAMDOC = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text. Optionally the format may contain
+# $version, which will be replaced by the version of the file (if it could
+# be obtained via FILE_VERSION_FILTER)
+
+WARN_FORMAT = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
+WARN_LOGFILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT =
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py
+
+FILE_PATTERNS =
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
+RECURSIVE = NO
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE =
+
+# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
+# directories that are symbolic links (a Unix filesystem feature) are excluded
+# from the input.
+
+EXCLUDE_SYMLINKS = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
+EXAMPLE_PATTERNS =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output. If FILTER_PATTERNS is specified, this tag will be
+# ignored.
+
+INPUT_FILTER =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis. Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match. The filters are a list of the form:
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
+# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
+# is applied to all files.
+
+FILTER_PATTERNS =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C and C++ comments will always remain visible.
+
+STRIP_CODE_COMMENTS = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES (the default)
+# then for each documented function all documented
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = YES
+
+# If the REFERENCES_RELATION tag is set to YES (the default)
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
+REFERENCES_RELATION = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code
+# will point to the HTML generated by the htags(1) tool instead of doxygen
+# built-in source browser. The htags tool is part of GNU's global source
+# tagging system (see http://www.gnu.org/software/global/global.html). You
+# will need version 4.8.6 or higher.
+
+USE_HTAGS = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX = NO
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX = 5
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX =
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header.
+
+HTML_HEADER =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
+HTML_FOOTER =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If the tag is left blank doxygen
+# will generate a default style sheet. Note that doxygen will try to copy
+# the style sheet file to the HTML output directory, so don't put your own
+# stylesheet in the HTML output directory as well, or it will be erased!
+
+HTML_STYLESHEET =
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS = YES
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compressed HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+
+CHM_FILE =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND = NO
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
+# top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it.
+
+DISABLE_INDEX = NO
+
+# This tag can be used to set the number of enum values (range [1..20])
+# that doxygen will group on one line in the generated HTML documentation.
+
+ENUM_VALUES_PER_LINE = 4
+
+# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be
+# generated containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+,
+# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are
+# probably better off using the HTML help feature.
+
+GENERATE_TREEVIEW = NO
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
+TREEVIEW_WIDTH = 250
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX = YES
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+
+LATEX_CMD_NAME = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, a4wide, letter, legal and
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS = NO
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX = NO
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
+LATEX_HIDE_INDICES = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
+GENERATE_RTF = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_RTF = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT = man
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+
+GENERATE_XML = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_SCHEMA =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_DTD =
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING = YES
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
+GENERATE_PERLMOD = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader. This is useful
+# if you want to understand what is going on. On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY = YES
+
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+
+EXPAND_ONLY_PREDEF = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
+SEARCH_INCLUDES = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all function-like macros that are alone
+# on a line, have an all uppercase name, and do not end with a semicolon. Such
+# function macros are typically used for boiler-plate code, and will confuse
+# the parser if not removed.
+
+SKIP_FUNCTION_MACROS = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles.
+# Optionally an initial location of the external documentation
+# can be added for each tagfile. The format of a tag file without
+# this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths or
+# URLs. If a location is present for each tag, the installdox tool
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen
+# is run, you must also specify the path to the tagfile here.
+
+TAGFILES =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
+ALLEXTERNALS = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
+EXTERNAL_GROUPS = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
+# or super classes. Setting the tag to NO turns the diagrams off. Note that
+# this option is superseded by the HAVE_DOT option below. This is only a
+# fallback. It is recommended to install and use dot, since it yields more
+# powerful graphs.
+
+CLASS_DIAGRAMS = YES
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT = NO
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# the CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH = YES
+
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for groups, showing the direct groups dependencies
+
+GROUP_GRAPHS = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+
+UML_LOOK = NO
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS = NO
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
+INCLUDE_GRAPH = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH = YES
+
+# If the CALL_GRAPH and HAVE_DOT tags are set to YES then doxygen will
+# generate a call dependency graph for every global function or class method.
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable call graphs for selected
+# functions only using the \callgraph command.
+
+CALL_GRAPH = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY = YES
+
+# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
+# then doxygen will show the dependencies a directory has on other directories
+# in a graphical way. The dependency relations are determined by the #include
+# relations between the files in the directories.
+
+DIRECTORY_GRAPH = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are png, jpg, or gif
+# If left blank png will be used.
+
+DOT_IMAGE_FORMAT = png
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+
+DOT_PATH =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS =
+
+# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width
+# (in pixels) of the graphs generated by dot. If a graph becomes larger than
+# this value, doxygen will try to truncate the graph, so that it fits within
+# the specified constraint. Beware that most browsers cannot cope with very
+# large images.
+
+MAX_DOT_GRAPH_WIDTH = 1024
+
+# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height
+# (in pixels) of the graphs generated by dot. If a graph becomes larger than
+# this value, doxygen will try to truncate the graph, so that it fits within
+# the specified constraint. Beware that most browsers cannot cope with very
+# large images.
+
+MAX_DOT_GRAPH_HEIGHT = 1024
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes
+# that lay further from the root node will be omitted. Note that setting this
+# option to 1 or 2 may greatly reduce the computation time needed for large
+# code bases. Also note that a graph may be further truncated if the graph's
+# image dimensions are not sufficient to fit the graph (see MAX_DOT_GRAPH_WIDTH
+# and MAX_DOT_GRAPH_HEIGHT). If 0 is used for the depth value (the default),
+# the graph is not depth-constrained.
+
+MAX_DOT_GRAPH_DEPTH = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, which results in a white background.
+# Warning: Depending on the platform used, enabling this option may lead to
+# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
+# read).
+
+DOT_TRANSPARENT = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10)
+# support this, this feature is disabled by default.
+
+DOT_MULTI_TARGETS = NO
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+
+DOT_CLEANUP = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to the search engine
+#---------------------------------------------------------------------------
+
+# The SEARCHENGINE tag specifies whether or not a search engine should be
+# used. If set to NO the values of all tags below this one will be ignored.
+
+SEARCHENGINE = NO
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..9a0759c
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,103 @@
+EXEC=ecoPCR ecofind ecogrep
+
+PCR_SRC= ecopcr.c
+PCR_OBJ= $(patsubst %.c,%.o,$(PCR_SRC))
+
+FIND_SRC= ecofind.c
+FIND_OBJ= $(patsubst %.c,%.o,$(FIND_SRC))
+
+GREP_SRC= ecogrep.c
+GREP_OBJ= $(patsubst %.c,%.o,$(GREP_SRC))
+
+IUT_SRC= ecoisundertaxon.c
+IUT_OBJ= $(patsubst %.c,%.o,$(IUT_SRC))
+
+SRCS= $(PCR_SRC) $(FIND_SRC) $(IUT_SRC)
+
+LIB= -lecoPCR -lthermo -lapat -lz -lm
+
+LIBFILE= libapat/libapat.a \
+ libecoPCR/libecoPCR.a \
+ libthermo/libthermo.a
+
+
+include global.mk
+
+all: $(EXEC)
+
+
+########
+#
+# ecoPCR compilation
+#
+########
+
+# executable compilation and link
+
+ecoPCR: $(PCR_OBJ) $(LIBFILE)
+ $(CC) $(LDFLAGS) -o $@ $< $(LIBPATH) $(LIB)
+
+########
+#
+# ecofind compilation
+#
+########
+
+# executable compilation and link
+
+ecofind: $(FIND_OBJ) $(LIBFILE)
+ $(CC) $(LDFLAGS) -o $@ $< $(LIBPATH) $(LIB)
+
+########
+#
+# ecogrep compilation
+#
+########
+
+# executable compilation and link
+
+ecogrep: $(GREP_OBJ) $(LIBFILE)
+ $(CC) $(LDFLAGS) -o $@ $< $(LIBPATH) $(LIB)
+
+########
+#
+# IsUnderTaxon compilation
+#
+########
+
+# executable compilation and link
+
+ecoisundertaxon: $(IUT_OBJ) $(LIBFILE)
+ $(CC) $(LDFLAGS) -o $@ $< $(LIBPATH) $(LIB)
+
+########
+#
+# library compilation
+#
+########
+
+libapat/libapat.a:
+ $(MAKE) -C libapat
+
+libecoPCR/libecoPCR.a:
+ $(MAKE) -C libecoPCR
+
+libthermo/libthermo.a:
+ $(MAKE) -C libthermo
+
+
+########
+#
+# project management
+#
+########
+
+clean:
+ rm -f *.o
+ rm -f $(EXEC)
+ $(MAKE) -C libapat clean
+ $(MAKE) -C libecoPCR clean
+ $(MAKE) -C libthermo clean
+
+
+
\ No newline at end of file
diff --git a/src/ecofind.c b/src/ecofind.c
new file mode 100644
index 0000000..060c3ff
--- /dev/null
+++ b/src/ecofind.c
@@ -0,0 +1,373 @@
+#include "libecoPCR/ecoPCR.h"
+#include <regex.h>
+#include <string.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <stdio.h>
+#define VERSION "0.1"
+
+/**
+ * display the result
+ **/
+
+void displayPath(ecotx_t *taxon, ecotaxonomy_t *taxonomy){
+
+ if (taxon != taxon->parent){
+ displayPath(taxon->parent,taxonomy);
+ printf(";");
+ }
+ if (rank_index("no rank",taxonomy->ranks) != taxon->rank)
+ printf("%s:", taxonomy->ranks->label[taxon->rank]);
+ printf("%s", taxon->name);
+}
+
+
+static void printresult(ecotx_t *taxon,econame_t* name,ecotaxonomy_t *taxonomy, int32_t pathDisplay)
+{
+ char* rankname;
+ char* classname;
+ char* matchedname=taxon->name;
+
+ classname="scientific name";
+ if (name)
+ {
+ classname=name->classname;
+ matchedname=name->name;
+ }
+
+ rankname= taxonomy->ranks->label[taxon->rank];
+
+ printf("%10d \t| %15s \t|\t %-50s \t|\t %15s \t|\t %s",
+ taxon->taxid,
+ rankname,
+ matchedname,
+ classname,
+ taxon->name);
+ if (pathDisplay) {
+ printf("\t|\t");
+ displayPath(taxon, taxonomy);
+ }
+ printf("\n");
+}
+
+/**
+ * display header before printing any result
+ **/
+static void printheader(int32_t pathDisplay)
+{
+ printf("# %12s \t| %15s \t|\t %-50s \t|\t %-15s \t|\t %s%s\n#\n",
+ "taxonomy id",
+ "taxonomy rank",
+ "name",
+ "class name",
+ "scientific name",
+ pathDisplay ? "\t|\t path":"");
+}
+
+
+/**
+ * display son's list for given taxon
+ **/
+static void get_son(ecotaxonomy_t *taxonomy, ecotx_t *taxon, int32_t *count, char *rankname, int32_t pathDisplay)
+{
+ int32_t i;
+ ecotx_t *current_taxon;
+
+ for ( i = 0, current_taxon = taxonomy->taxons->taxon;
+ i < taxonomy->taxons->count;
+ i++, current_taxon++)
+ {
+
+ if (taxon != current_taxon && taxon->taxid == current_taxon->parent->taxid)
+ {
+ if (rankname == NULL || !strcmp(rankname,taxonomy->ranks->label[current_taxon->rank]))
+ {
+ printresult(current_taxon, NULL, taxonomy, pathDisplay);
+ (*count)++;
+ }
+ get_son(taxonomy,current_taxon,count,rankname, pathDisplay);
+ }
+ }
+}
+
+
+
+/**
+ * display list of rank filter option (-l option)
+ **/
+static void listfilteroptions(ecorankidx_t *ranks)
+{
+ int32_t i;
+
+ printf("#\n");
+
+ for ( i=0;
+ i < ranks->count;
+ i++)
+ {
+ printf("# %s \n",ranks->label[i]);
+ }
+
+ printf("#\n");
+}
+
+
+/* ---------------------------------------- */
+/* get back on given taxid taxonomic parent */
+/* and display it */
+/* ---------------------------------------- */
+void gettaxidparents(int32_t taxid, ecotaxonomy_t *taxonomy, char *rankname, int32_t pathDisplay)
+{
+ ecotx_t *next_parent;
+ int32_t c = 0;
+
+ next_parent = eco_findtaxonbytaxid(taxonomy, taxid);
+
+ printheader(pathDisplay);
+
+ printresult(next_parent, NULL,taxonomy, pathDisplay);
+
+ while ( strcmp(next_parent->name, "root") )
+ {
+ next_parent = next_parent->parent;
+ if (rankname == NULL || !strcmp(rankname,taxonomy->ranks->label[next_parent->rank]))
+ {
+ printresult(next_parent, NULL,taxonomy, pathDisplay);
+ c++;
+ }
+ }
+
+ printf("# %d parent(s) found\n#\n",c);
+}
+
+
+/**
+ * printout usage and exit
+ **/
+#define PP fprintf(stderr,
+
+static void ExitUsage(stat)
+ int stat;
+{
+ PP "usage: ecofind [-d database] [-h] [-l] [-P] [-r taxonomic rank] [-p taxid] [-s taxid] <taxon name pattern> ... \n");
+ PP "type \"ecofind -h\" for help\n");
+ if (stat)
+ exit(stat);
+}
+
+#undef PP
+
+/**
+ * printout help
+ **/
+#define PP fprintf(stdout,
+
+static void PrintHelp()
+{
+ PP "------------------------------------------\n");
+ PP " ecofind Version %s\n", VERSION);
+ PP "------------------------------------------\n");
+ PP "synopsis : searching for taxonomic and rank and\n");
+ PP " taxonomy id for given regular expression patterns\n\n");
+ PP "usage: ecofind [options] <patterns>\n");
+ PP "------------------------------------------\n");
+ PP "options:\n");
+ PP "-a : [A]ll enable the search on all alternative names and not only scientific names.\n\n");
+ PP "-d : [D]atabase containing the taxonomy.\n");
+ PP " To match the expected format, the database\n");
+ PP " has to be formated first by the ecoPCRFormat.py\n");
+ PP " program located in the tools directory.\n");
+ PP " Write the database radical without any extension.\n\n");
+ PP "-h : [H]elp - print <this> help\n\n");
+ PP "-l : [L]ist all taxonomic rank available for -r option\n\n");
+ PP "-P : [P]ath : add a column containing the full path for each displayed taxon\n\n");
+ PP "-p : [P]arents : specifiying this option displays all parental tree's information for the given taxid.\n\n");
+ PP "-r : [R]estrict to given taxonomic rank\n\n");
+ PP "-s : [S]ons: specifiying this option displays all subtree's information for the given taxid.\n\n");
+ PP "-P : Display taxonomic [P]ath as suplementary column in output\n\n");
+ PP "arguments:\n");
+ PP "<taxon> name pattern bearing regular expressions\n\n");
+ PP "------------------------------------------\n");
+ PP " http://www.grenoble.prabi.fr/trac/ecoPCR/\n");
+ PP "------------------------------------------\n\n");
+}
+
+/* ----------------------------------------------- */
+
+#define PATTERN_NUMBER 10
+#define PATTERN_LENGHT 40
+#define RESULT_LENGTH 100
+
+int main(int argc, char **argv)
+{
+ int32_t carg = 0;
+ int32_t nummatch = 0;
+ int32_t k,j = 0;
+ int32_t errflag = 0;
+ int32_t tax_count = 0;
+ int32_t alternative = 0;
+ char *prefix = NULL;
+ ecotaxonomy_t *taxonomy;
+ econame_t *name;
+ int32_t name_count;
+
+ int re_error;
+ int re_match;
+ regex_t re_preg;
+
+ int32_t uptree = 0;
+ int32_t subtree = 0;
+ char *rankname = NULL;
+ int32_t rankfilter = 1;
+ int32_t list = 0;
+ int32_t path = 0;
+
+ ecotx_t *subtree_parent;
+ int32_t count_son = 0;
+
+
+ while ((carg = getopt(argc, argv, "had:p:s:r:lP")) != -1) {
+ switch (carg) {
+ case 's': /* path to the database */
+ sscanf(optarg,"%d",&subtree);
+ break;
+
+ case 'r': /* rank filter */
+ rankname = ECOMALLOC(strlen(optarg)+1,"allocation rankname");
+ strcpy(rankname,optarg);
+ rankfilter = 0;
+ break;
+
+ case 'd': /* path to the database */
+ prefix = ECOMALLOC(strlen(optarg)+1,"allocation prefix");
+ strcpy(prefix,optarg);
+ break;
+
+ case 'l': /* list rank filter options */
+ list = 1;
+ break;
+
+ case 'P': /* Path output option */
+ path=1;
+ break;
+
+ case 'a': /* allow alternative names */
+ alternative = 1;
+ break;
+
+ case 'h': /* display help */
+ PrintHelp();
+ exit(0);
+ break;
+
+ case 'p': /* taxid */
+ sscanf(optarg,"%d",&uptree);
+ break;
+
+ case '?': /* bad option */
+ errflag++;
+ }
+ }
+
+ if ((argc - optind) < 1)
+ errflag++;
+
+ if (prefix == NULL)
+ {
+ prefix = getenv("ECOPCRDB");
+ if (prefix == NULL)
+ errflag++;
+ }
+
+ if (errflag && !uptree && !rankname && !subtree && !list)
+ ExitUsage(errflag);
+
+ /**
+ * load taxonomy using libecoPCR functions
+ **/
+ printf("# \n# opening %s database\n",prefix);
+
+ taxonomy = read_taxonomy(prefix,1);
+ tax_count = taxonomy->taxons->count;
+ name_count = taxonomy->names->count;
+
+
+ /* ---------------------------------------- */
+ /* list -r option possibility */
+ /* ---------------------------------------- */
+ if (list)
+ {
+ listfilteroptions(taxonomy->ranks);
+ return 0;
+ }
+
+ /* ---------------------------------------- */
+ /* display taxid parent if -t option */
+ /* specified in command line */
+ /* ---------------------------------------- */
+ if (uptree)
+ {
+ gettaxidparents(uptree,taxonomy,rankname, path);
+ return 0;
+ }
+
+ /* ---------------------------------------- */
+ /* display taxid sons if -s option */
+ /* specified in command line */
+ /* ---------------------------------------- */
+ if (subtree)
+ {
+ printheader(path);
+ subtree_parent = eco_findtaxonbytaxid(taxonomy,subtree);
+ printresult(subtree_parent, NULL,taxonomy, path);
+ get_son(taxonomy, subtree_parent,&count_son,rankname, path);
+ printf("# %d son(s) found\n#\n",count_son);
+ return 0;
+ }
+
+ printf("# %d taxons\n", tax_count);
+
+ /**
+ * parse taxonomy
+ **/
+ for (k=optind;k<argc;k++)
+ {
+ printf("#\n# searching for '%s' pattern\n",argv[k]);
+
+ re_error = regcomp (&re_preg, argv[k], REG_NOSUB | REG_EXTENDED | REG_ICASE);
+ if (re_error)
+ {
+ fprintf(stderr,"# misformed pattern '%s'\n",argv[k]);
+ exit(1);
+ }
+
+ nummatch=0;
+
+ printheader(path);
+
+ for (j=0,name=taxonomy->names->names;
+ j < name_count;
+ name++,j++)
+ {
+
+ if(rankname)
+ rankfilter = !(strcmp(rankname,taxonomy->ranks->label[name->taxon->rank]));
+
+ re_match = regexec (&re_preg, name->name, 0, NULL, 0);
+
+ if (!re_match && (alternative || name->is_scientificname) && rankfilter)
+ {
+ printresult(name->taxon,name,taxonomy, path);
+ nummatch++;
+ }
+
+ }
+
+ printf("# %d records found \n",nummatch);
+ regfree(&re_preg);
+ }
+
+ return 0;
+}
+
+
diff --git a/src/ecogrep.c b/src/ecogrep.c
new file mode 100644
index 0000000..8d45312
--- /dev/null
+++ b/src/ecogrep.c
@@ -0,0 +1,403 @@
+#include "libecoPCR/ecoPCR.h"
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+
+
+#define VERSION "0.1"
+
+void getLineContent(char *stream, ecoseq_t *seq, ecoseq_t *oligoseq_1, ecoseq_t *oligoseq_2){
+
+ int i;
+ char *buffer;
+
+ for( i=0, buffer = strtok(stream,"|");
+ buffer != NULL;
+ i++, buffer = strtok(NULL,"|"))
+ {
+ switch (i) {
+ case 0:
+ seq->AC = strdup(buffer);
+ break;
+ case 2:
+ sscanf(buffer,"%d",&seq->taxid);
+ break;
+ case 13:
+ oligoseq_1->SQ = strdup(buffer);
+ oligoseq_1->SQ_length = strlen(buffer);
+ break;
+ case 16:
+ oligoseq_2->SQ = strdup(buffer);
+ oligoseq_2->SQ_length = strlen(buffer);
+ break;
+ case 20:
+ seq->SQ = strdup(buffer);
+ seq->SQ_length = strlen(buffer);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+
+void freememory(char **tab, int32_t num){
+ int32_t i;
+ for (i=0;i<num-1;i++){
+ ECOFREE(tab[i],"Error in freememory function");
+ }
+}
+
+/**
+ * Check if pattern match a string using mamberall libapat function
+ * @param line array containing sequence information
+ * @param pattern array containing patterns to test on the sequence
+ * @param numpattern number of pattern in pattern array
+ * @param error_max error rate allowed by the user
+ *
+ * @return int 1 if a pattern match, else 0
+ **/
+int ispatternmatching(ecoseq_t *seq, PatternPtr pattern){
+ if (pattern != NULL)
+ {
+ SeqPtr apatseq = NULL;
+ apatseq=ecoseq2apatseq(seq,apatseq,0);
+ return ManberAll(apatseq,pattern,0,0,apatseq->seqlen) > 0;
+ }
+ else return 0;
+}
+
+/* ----------------------------------------------- */
+/* printout help */
+/* ----------------------------------------------- */
+#define PP fprintf(stdout,
+
+static void PrintHelp()
+{
+ PP "\n------------------------------------------\n");
+ PP " ecogrep Version %s\n", VERSION);
+ PP "------------------------------------------\n");
+ PP " synopsis : filtering ecoPCR result based on\n");
+ PP " taxonomic id filter and regular expression pattern\n");
+ PP " usage: ecogrep [options] filename\n");
+ PP "------------------------------------------\n");
+ PP " options:\n");
+ PP " -1 : [FIRST] : compare the given pattern with direct strand oligonucleotide\n\n");
+ PP " -2 : [SECOND] : compare the given pattern with reverse strand oligonucleotide\n\n");
+ PP " -d : [D]atabase containing taxonomic information\n\n");
+ PP " -e : [E]rrors : max error allowed in pattern match (option-1, -2 and -p) (0 by default)\n\n");
+ PP " -p : [P]attern : oligonucleotide pattern\n\n");
+ PP " -h : [H]elp : print <this> help\n\n");
+ PP " -i : [I]gnore subtree under given taxonomic id\n\n");
+ PP " -r : [R]estrict search to subtree under given taxomic id\n\n");
+ PP " -v : in[V]ert the sense of matching, to select non-matching lines.\n\n");
+ PP " argument:\n");
+ PP " ecoPCR ouput file name\n");
+ PP "------------------------------------------\n\n");
+ PP " http://www.grenoble.prabi.fr/trac/ecoPCR/\n");
+ PP "------------------------------------------\n\n");
+}
+
+#undef PP
+
+/* ----------------------------------------------- */
+/* printout usage and exit */
+/* ----------------------------------------------- */
+
+#define PP fprintf(stderr,
+
+static void ExitUsage(stat)
+ int stat;
+{
+ PP "usage: ecogrep [-d database] [-p pattern] [-i taxid] [-r taxid] [-v] [-h] <file name>\n");
+ PP "type \"ecogrep -h\" for help\n");
+
+ if (stat)
+ exit(stat);
+}
+
+#undef PP
+
+/* ----------------------------------------------- */
+/* MAIN */
+/* ----------------------------------------------- */
+
+#define LINE_BUFF_SIZE 10000
+
+int main(int argc, char **argv){
+ int32_t carg = 0;
+ int32_t r = 0; // number of restricted taxid
+ int32_t i = 0; // number of ignored taxid
+ int32_t v = 0; // stores if -v mode is active
+ int32_t k = 0; // file counter
+ int32_t errflag = 0;
+ int32_t error_max = 0; // stores the error rate allowed by the user
+ int32_t matchingresult = 0; // stores number of matching result
+
+ ecotaxonomy_t *taxonomy; // stores the taxonomy
+
+ ecoseq_t *seq = NULL; // stores sequence info
+ ecoseq_t *oligoseq_1 = NULL; // stores the oligo_1 info
+ ecoseq_t *oligoseq_2 = NULL; // stores the oligo_2 info
+
+ char *database = NULL; // stores the database path (for taxonomy)
+
+ char *p = NULL; // pattern for sequence
+ PatternPtr pattern = NULL; // stores the build pattern for sequence
+ char *o1 = NULL; // pattern for direct strand oligo
+ PatternPtr oligo_1 = NULL; // stores the build pattern for direct strand oligo
+ char *o2 = NULL; // pattern for reverse strand oligo
+ PatternPtr oligo_2 = NULL; // stores the build pattern for reverse strand oligo
+
+ int32_t *restricted_taxid = NULL; // stores the restricted taxid
+ int32_t *ignored_taxid = NULL; // stores the ignored taxid
+
+ FILE *file = NULL; // stores the data stream, stdin by default
+ char *stream = ECOMALLOC(sizeof(char *)*LINE_BUFF_SIZE,"error stream buffer allocation");
+ char *orig = ECOMALLOC(sizeof(char *)*LINE_BUFF_SIZE,"error orig buffer allocation");
+
+ int is_ignored = 0;
+ int is_included = 0;
+ int is_matching = 0;
+ int match_o1 = 0;
+ int match_o2 = 0;
+ int good = 0;
+
+ seq = new_ecoseq();
+ oligoseq_1 = new_ecoseq();
+ oligoseq_2 = new_ecoseq();
+
+ /**
+ * Parse commande line options
+ **/
+ while ((carg = getopt(argc, argv, "1:2:p:d:i:r:e:vh")) != -1) {
+
+ switch (carg) {
+ case '1':
+ o1 = ECOMALLOC(strlen(optarg)+1,
+ "Error on o1 allocation");
+ strcpy(o1,optarg);
+ break;
+
+ case '2':
+ o2 = ECOMALLOC(strlen(optarg)+1,
+ "Error on o2 allocation");
+ strcpy(o2,optarg);
+ break;
+
+ case 'd':
+ database = ECOMALLOC(strlen(optarg)+1,
+ "Error on datafile allocation");
+ strcpy(database,optarg);
+ break;
+
+ case 'i':
+ ignored_taxid = ECOREALLOC( ignored_taxid,
+ sizeof(int32_t)*(i+1),
+ "Error on ignored_taxid reallocation");
+ sscanf(optarg,"%d",&ignored_taxid[i]);
+ i++;
+ break;
+
+ case 'r':
+ restricted_taxid = ECOREALLOC( restricted_taxid,
+ sizeof(int32_t)*(r+1),
+ "Error on restricted_taxid reallocation");
+ sscanf(optarg,"%d",&restricted_taxid[r]);
+ r++;
+ break;
+
+ case 'v':
+ v = 1;
+ break;
+
+ case 'h':
+ PrintHelp();
+ exit(0);
+ break;
+
+ case 'e':
+ sscanf(optarg,"%d",&error_max);
+ break;
+
+ case 'p':
+ p = ECOMALLOC(strlen(optarg)+1,
+ "Error on pattern allocation");
+ strcpy(p,optarg);
+ break;
+
+ case '?':
+ errflag++;
+ }
+ }
+
+ /**
+ * Check sequence pattern length and build it in PatternPtr format
+ **/
+ if(p)
+ {
+ if (strlen(p) > 32){
+ printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\
+ \n# Please check it out : %s\n",p);
+ exit(EXIT_FAILURE);
+ }
+ else if ( (pattern = buildPattern(p,error_max)) == NULL)
+ exit(EXIT_FAILURE);
+ }
+
+
+
+ /**
+ * Check o1 pattern length and build it in PatternPtr format
+ **/
+ if(o1)
+ {
+ if (strlen(o1) > 32){
+ printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\
+ \n# Please check it out : %s\n",o1);
+ exit(EXIT_FAILURE);
+ }
+ else if ( (oligo_1 = buildPattern(o1,error_max)) == NULL)
+ exit(EXIT_FAILURE);
+ }
+
+ /**
+ * Check o2 pattern length and build it in PatternPtr format
+ **/
+ if(o2)
+ {
+ if (strlen(o2) > 32){
+ printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\
+ \n# Please check it out : %s\n",o2);
+ exit(EXIT_FAILURE);
+ }
+ else if ( (oligo_2 = buildPattern(o2,error_max)) == NULL)
+ exit(EXIT_FAILURE);
+ }
+
+ /**
+ * try to get the database name from environment variable
+ * if no database name specified in the -d option
+ **/
+ if (database == NULL)
+ {
+ database = getenv("ECOPCRDB");
+ if (database == NULL)
+ errflag++;
+ }
+
+ /**
+ * check at leat one processing is asked
+ * either patterns or taxid filters
+ **/
+ if ( !p && !o1 && !o2 && restricted_taxid == NULL && ignored_taxid == NULL )
+ {
+ errflag++;
+ }
+ if (errflag)
+ ExitUsage(errflag);
+
+ /**
+ * Get the taxonomy back
+ **/
+ taxonomy = read_taxonomy(database,0);
+
+ /**
+ * Parse the stream
+ **/
+ for (k=0 ; argc >= optind ; optind++, k++){
+
+ matchingresult = 0;
+
+ if ( (file = fopen(argv[optind], "r")) == NULL)
+ {
+ if (isatty(fileno(stdin)) == 0)
+ {
+ file = stdin;
+ printf("# Processing standard input...\n");
+ }
+ else
+ break;
+ }
+ else
+ printf("# Processing %s...\n",argv[optind]);
+
+ while( fgets(stream, LINE_BUFF_SIZE, file) != NULL ){
+
+ if (stream[0]!= '#')
+ {
+
+ stream[LINE_BUFF_SIZE-1]=0;
+
+ strcpy(orig,stream);
+
+ getLineContent(stream,seq,oligoseq_1,oligoseq_2);
+
+ /* -----------------------------------------------*/
+ /* is ignored if at least one option -i */
+ /* AND */
+ /* if current sequence is son of taxid */
+ /* -----------------------------------------------*/
+ is_ignored = ( (i > 0) && (eco_is_taxid_included( taxonomy,
+ ignored_taxid,
+ i,
+ seq->taxid))
+ );
+
+ /* -----------------------------------------------*/
+ /* is included if no -r option */
+ /* OR */
+ /* if current sequence is son of taxid */
+ /* -----------------------------------------------*/
+ is_included = ( (r == 0) || (eco_is_taxid_included( taxonomy,
+ restricted_taxid,
+ r,
+ seq->taxid))
+ );
+
+ /* ----------------------------------------------------------- */
+ /* match if no pattern or if pattern match current sequence */
+ /* ----------------------------------------------------------- */
+ is_matching = ( !p || (ispatternmatching(seq,pattern)));
+
+ /* ---------------------------------------------------------------------------- */
+ /* match if no direct oligo pattern or if pattern match current direct oligo */
+ /* ---------------------------------------------------------------------------- */
+ match_o1 = (!o1 || (ispatternmatching(oligoseq_1,oligo_1)));
+
+ /* ------------------------------------------------------------------------------- */
+ /* match if no revesrse oligo pattern or if pattern match current reverse oligo */
+ /* ------------------------------------------------------------------------------- */
+ match_o2 = (!o2 || (ispatternmatching(oligoseq_2,oligo_2)));
+
+ good = (is_included && is_matching && !is_ignored && match_o1 && match_o2);
+
+ if (v)
+ good=!good;
+
+ if ( good )
+ {
+ printf("%s",orig);
+ matchingresult++;
+ }
+ }
+ }
+ if ( file != stdin )
+ fclose(file);
+
+ printf("# %d matching result(s)\n#\n",matchingresult);
+ }
+
+ /**
+ * clean and free before leaving
+ **/
+ ECOFREE(orig,"Error in free orig");
+ ECOFREE(stream,"Error in free stream");
+ ECOFREE(ignored_taxid,"Error in free stream");
+ ECOFREE(restricted_taxid,"Error in free stream");
+
+ return 0;
+}
diff --git a/src/ecoisundertaxon.c b/src/ecoisundertaxon.c
new file mode 100644
index 0000000..1e41659
--- /dev/null
+++ b/src/ecoisundertaxon.c
@@ -0,0 +1,123 @@
+#include "libecoPCR/ecoPCR.h"
+#include <getopt.h>
+#include <stdlib.h>
+
+#define VERSION "0.1"
+
+/* ----------------------------------------------- */
+/* printout verbose mode */
+/* ----------------------------------------------- */
+static void printTaxon(ecotx_t *taxon){
+ printf("# taxid : %d | rank : %d | name : %s \n\n",taxon->taxid, taxon->rank, taxon->name);
+}
+
+/* ----------------------------------------------- */
+/* printout help */
+/* ----------------------------------------------- */
+#define PP fprintf(stdout,
+
+static void PrintHelp()
+{
+ PP "\n------------------------------------------\n");
+ PP " ecoisundertaxon Version %s\n", VERSION);
+ PP "------------------------------------------\n");
+ PP " synopsis : searching relationship in taxonomy\n");
+ PP " usage: ecoisundertaxon [options] database\n");
+ PP "------------------------------------------\n");
+ PP " options:\n");
+ PP " -1 : [FIRST] taxomic id of the hypothetical son\n\n");
+ PP " -2 : [SECOND] taxonomic id of the hypothetical parent\n\n");
+ PP " -h : [H]elp - print <this> help\n\n");
+ PP " -v : [V]erbose mode. Display taxonomic information for both\n");
+ PP " : taxonomic id.\n\n");
+ PP "------------------------------------------\n");
+ PP " database : to match the expected format, the database\n");
+ PP " has to be formated first by the ecoPCRFormat.py program located.\n");
+ PP " in the tools directory. Type the radical only, leaving out the extension\n");
+ PP "------------------------------------------\n\n");
+ PP " https://www.grenoble.prabi.fr/trac/ecoPCR/wiki");
+ PP "------------------------------------------\n\n");
+}
+
+#undef PP
+
+/* ----------------------------------------------- */
+/* printout usage and exit */
+/* ----------------------------------------------- */
+
+#define PP fprintf(stderr,
+
+static void ExitUsage(stat)
+ int stat;
+{
+ PP "usage: ecoisundertaxon [-1 taxid] [-2 taxid] [-v] [-h] datafile\n");
+ PP "type \"ecoisundertaxon -h\" for help\n");
+
+ if (stat)
+ exit(stat);
+}
+
+#undef PP
+
+/* ----------------------------------------------- */
+/* MAIN */
+/* ----------------------------------------------- */
+
+int main(int argc, char **argv){
+ int32_t carg = 0;
+ int32_t taxid_1 = 0;
+ int32_t taxid_2 = 0;
+ int32_t verbose = 0;
+ int32_t errflag = 0;
+ ecotaxonomy_t *taxonomy = NULL;
+ ecotx_t *son = NULL;
+ ecotx_t *parent = NULL;
+
+
+ while ((carg = getopt(argc, argv, "1:2:vh")) != -1) {
+ switch (carg) {
+ case '1':
+ sscanf(optarg,"%d",&taxid_1);
+ break;
+
+ case '2':
+ sscanf(optarg,"%d",&taxid_2);
+ break;
+
+ case 'v':
+ verbose = 1;
+ break;
+
+ case 'h':
+ PrintHelp();
+ exit(0);
+ break;
+
+ case '?':
+ errflag++;
+ }
+ }
+
+ if ((argc -= optind) != 1)
+ errflag++;
+
+ if (errflag)
+ ExitUsage(errflag);
+
+ taxonomy = read_taxonomy(argv[optind],0);
+
+ son = eco_findtaxonbytaxid(taxonomy, taxid_1);
+
+ if (verbose){
+ parent = eco_findtaxonbytaxid(taxonomy, taxid_2);
+ printTaxon(son);
+ printTaxon(parent);
+ }
+
+ if (eco_isundertaxon(son, taxid_2))
+ printf("# taxid_1 (%d) is son of taxid_2 (%d)\n",taxid_1, taxid_2);
+ else
+ printf("# taxid_1 (%d) is NOT son of taxid_2 (%d)\n",taxid_1, taxid_2);
+
+ return 0;
+}
\ No newline at end of file
diff --git a/src/ecopcr.c b/src/ecopcr.c
new file mode 100644
index 0000000..0b533bd
--- /dev/null
+++ b/src/ecopcr.c
@@ -0,0 +1,695 @@
+#include "libecoPCR/ecoPCR.h"
+#include "libthermo/nnparams.h"
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <getopt.h>
+
+
+#define VERSION "0.2"
+
+
+/* ----------------------------------------------- */
+/* printout help */
+/* ----------------------------------------------- */
+#define PP fprintf(stdout,
+
+static void PrintHelp()
+{
+ PP "------------------------------------------\n");
+ PP " ecoPCR Version %s\n", VERSION);
+ PP "------------------------------------------\n");
+ PP "synopsis : searching for sequence and taxonomy hybriding with given primers\n");
+ PP "usage: ecoPCR [options] <nucleotidic patterns>\n");
+ PP "------------------------------------------\n");
+ PP "options:\n");
+ PP "-a : Salt concentration in M for Tm computation (default 0.05 M)\n\n");
+ PP "-c : Consider that the database sequences are [c]ircular\n\n");
+ PP "-d : [D]atabase : to match the expected format, the database\n");
+ PP " has to be formated first by the ecoPCRFormat.py program located.\n");
+ PP " in the tools directory.\n");
+ PP " ecoPCRFormat.py creates three file types :\n");
+ PP " .sdx : contains the sequences\n");
+ PP " .tdx : contains information concerning the taxonomy\n");
+ PP " .rdx : contains the taxonomy rank\n\n");
+ PP " ecoPCR needs all the file type. As a result, you have to write the\n");
+ PP " database radical without any extension. For example /ecoPCRDB/gbmam\n\n");
+ PP "-D : Keeps the specified number of nucleotides on each side of the in silico \n");
+ PP " amplified sequences (including the amplified DNA fragment plus the two target \n");
+ PP " sequences of the primers).\n\n");
+ PP "-e : [E]rror : max errors allowed by oligonucleotide (0 by default)\n\n");
+ PP "-h : [H]elp - print <this> help\n\n");
+ PP "-i : [I]gnore the given taxonomy id.\n");
+ PP " Taxonomy id are available using the ecofind program.\n");
+ PP " see its help typing ecofind -h for more information.\n\n");
+ PP "-k : [K]ingdom mode : set the kingdom mode\n");
+ PP " super kingdom mode by default.\n\n");
+ PP "-l : minimum [L]ength : define the minimum amplication length. \n\n");
+ PP "-L : maximum [L]ength : define the maximum amplicationlength. \n\n");
+ PP "-m : Salt correction method for Tm computation (SANTALUCIA : 1\n");
+ PP " or OWCZARZY:2, default=1)\n\n");
+ PP "-r : [R]estricts the search to the given taxonomic id.\n");
+ PP " Taxonomy id are available using the ecofind program.\n");
+ PP " see its help typing ecofind -h for more information.\n\n");
+ PP "\n");
+ PP "------------------------------------------\n");
+ PP "first argument : oligonucleotide for direct strand\n\n");
+ PP "second argument : oligonucleotide for reverse strand\n\n");
+ PP "------------------------------------------\n");
+ PP "Table result description : \n");
+ PP "column 1 : accession number\n");
+ PP "column 2 : sequence length\n");
+ PP "column 3 : taxonomic id\n");
+ PP "column 4 : rank\n");
+ PP "column 5 : species taxonomic id\n");
+ PP "column 6 : scientific name\n");
+ PP "column 7 : genus taxonomic id\n");
+ PP "column 8 : genus name\n");
+ PP "column 9 : family taxonomic id\n");
+ PP "column 10 : family name\n");
+ PP "column 11 : super kingdom taxonomic id\n");
+ PP "column 12 : super kingdom name\n");
+ PP "column 13 : strand (direct or reverse)\n");
+ PP "column 14 : first oligonucleotide\n");
+ PP "column 15 : number of errors for the first strand\n");
+ PP "column 16 : Tm for hybridization of primer 1 at this site\n");
+ PP "column 17 : second oligonucleotide\n");
+ PP "column 18 : number of errors for the second strand\n");
+ PP "column 19 : Tm for hybridization of primer 1 at this site\n");
+ PP "column 20 : amplification length\n");
+ PP "column 21 : sequence\n");
+ PP "column 22 : definition\n");
+ PP "------------------------------------------\n");
+ PP " http://www.grenoble.prabi.fr/trac/ecoPCR/\n");
+ PP "------------------------------------------\n\n");
+ PP "\n");
+
+}
+
+#undef PP
+
+/* ----------------------------------------------- */
+/* printout usage and exit */
+/* ----------------------------------------------- */
+
+#define PP fprintf(stderr,
+
+static void ExitUsage(stat)
+ int stat;
+{
+ PP "usage: ecoPCR [-d database] [-l value] [-L value] [-e value] [-r taxid] [-i taxid] [-k] oligo1 oligo2\n");
+ PP "type \"ecoPCR -h\" for help\n");
+
+ if (stat)
+ exit(stat);
+}
+
+#undef PP
+
+void printRepeat(ecoseq_t *seq,
+ char* primer1, char* primer2,
+ PNNParams tparm,
+ PatternPtr o1, PatternPtr o2,
+ char strand,
+ char kingdom,
+ int32_t pos1, int32_t pos2,
+ int32_t err1, int32_t err2,
+ ecotaxonomy_t *taxonomy,
+ int32_t delta)
+{
+ char *AC;
+ int32_t seqlength;
+ int32_t taxid;
+ int32_t species_taxid;
+ int32_t genus_taxid;
+ int32_t family_taxid;
+ int32_t superkingdom_taxid;
+ char *rank;
+ char *scientificName;
+ char *genus_name;
+ char *family_name;
+ char *superkingdom_name;
+
+ ecotx_t *taxon;
+ ecotx_t *main_taxon;
+
+ char oligo1[MAX_PAT_LEN+1];
+ char oligo2[MAX_PAT_LEN+1];
+
+ int32_t error1;
+ int32_t error2;
+ int32_t ldelta,rdelta;
+
+ char *amplifia = NULL;
+ int32_t amplength;
+ double tm1,tm2;
+ double tm=0;
+
+ int32_t i;
+
+ AC = seq->AC;
+ seqlength = seq->SQ_length;
+
+
+ main_taxon = &taxonomy->taxons->taxon[seq->taxid];
+ taxid = main_taxon->taxid;
+ scientificName= main_taxon->name;
+ rank = taxonomy->ranks->label[main_taxon->rank];
+ taxon = eco_getspecies(main_taxon,taxonomy);
+ if (taxon)
+ {
+ species_taxid = taxon->taxid;
+ scientificName= taxon->name;
+ }
+ else
+ species_taxid = -1;
+
+ taxon = eco_getgenus((taxon) ? taxon:main_taxon,taxonomy);
+ if (taxon)
+ {
+ genus_taxid = taxon->taxid;
+ genus_name= taxon->name;
+ }
+ else
+ {
+ genus_taxid = -1;
+ genus_name = "###";
+ }
+
+ taxon = eco_getfamily((taxon) ? taxon:main_taxon,taxonomy);
+ if (taxon)
+ {
+ family_taxid = taxon->taxid;
+ family_name= taxon->name;
+ }
+ else
+ {
+ family_taxid = -1;
+ family_name = "###";
+ }
+
+ if (kingdom)
+ taxon = eco_getkingdom((taxon) ? taxon:main_taxon,taxonomy);
+ else
+ taxon = eco_getsuperkingdom((taxon) ? taxon:main_taxon,taxonomy);
+
+ if (taxon)
+ {
+ superkingdom_taxid = taxon->taxid;
+ superkingdom_name= taxon->name;
+ }
+ else
+ {
+ superkingdom_taxid = -1;
+ superkingdom_name = "###";
+ }
+
+
+
+ ldelta=(pos1 <= delta)?pos1:delta;
+
+
+
+ /*rdelta=((pos2+delta)>=seqlength)?seqlength-pos2-1:delta; */
+ rdelta=((pos2+delta)>=seqlength)?seqlength-pos2:delta;
+
+ amplifia = getSubSequence(seq->SQ,pos1-ldelta,pos2+rdelta);
+ amplength= strlen(amplifia)-rdelta-ldelta;
+
+ if (strand=='R')
+ {
+
+ ecoComplementSequence(amplifia);
+
+ strncpy(oligo1,amplifia + rdelta ,o2->patlen);
+
+ oligo1[o2->patlen]=0;
+ error1=err2;
+
+ strncpy(oligo2, amplifia + rdelta + amplength - o1->patlen,o1->patlen);
+ oligo2[o1->patlen]=0;
+ error2=err1;
+
+ if (delta==0)
+ amplifia+=o2->patlen;
+ else
+ {
+ delta=ldelta;
+ ldelta=rdelta+o2->patlen;
+ rdelta=delta+o1->patlen;
+ }
+ }
+ else /* strand == 'D' */
+ {
+ strncpy(oligo1,amplifia+ldelta,o1->patlen);
+ oligo1[o1->patlen]=0;
+ error1=err1;
+
+ strncpy(oligo2,amplifia + ldelta + amplength - o2->patlen,o2->patlen);
+ oligo2[o2->patlen]=0;
+ error2=err2;
+
+ if (delta==0)
+ amplifia+=o1->patlen;
+ else
+ {
+ ldelta+=o1->patlen;
+ rdelta+=o2->patlen;
+ }
+
+ }
+
+
+ ecoComplementSequence(oligo2);
+ if(delta==0)
+ amplifia[amplength - o2->patlen - o1->patlen]=0;
+ else
+ {
+ delta=ldelta+rdelta+amplength-o1->patlen-o2->patlen;
+ for (i=0;i<ldelta;i++)
+ amplifia[i]|=32;
+ for (i=1;i<=rdelta;i++)
+ amplifia[delta-i]|=32;
+
+ amplifia[delta]=0;
+
+ }
+
+ tm1=nparam_CalcTwoTM(tparm,oligo1,primer1,o1->patlen) - 273.15;
+ tm2=nparam_CalcTwoTM(tparm,oligo2,primer2,o2->patlen) - 273.15;
+ tm = (tm1 < tm2) ? tm1:tm2;
+ printf("%-15s | %9d | %8d | %-20s | %8d | %-30s | %8d | %-30s | %8d | %-30s | %8d | %-30s | %c | %-32s | %2d | %5.2f | %-32s | %2d | %5.2f | %5d | %s | %s\n",
+ AC,
+ seqlength,
+ taxid,
+ rank,
+ species_taxid,
+ scientificName,
+ genus_taxid,
+ genus_name,
+ family_taxid,
+ family_name,
+ superkingdom_taxid,
+ superkingdom_name,
+ strand,
+ oligo1,
+ error1,
+ tm1,
+ oligo2,
+ error2,
+ tm2,
+ amplength - o1->patlen - o2->patlen,
+ amplifia,
+ seq->DE
+ );
+
+}
+
+int main(int argc, char **argv)
+{
+ ecoseq_t *seq;
+ ecotaxonomy_t *taxonomy;
+ char *scname;
+ char head[11];
+ char tail[11];
+
+ int carg;
+
+ char *oligo1=NULL;
+ char *oligo2=NULL;
+
+ PatternPtr o1;
+ PatternPtr o2;
+ PatternPtr o1c;
+ PatternPtr o2c;
+
+ int32_t delta=0;
+ int32_t lmin=0;
+ int32_t lmax=0;
+ int32_t error_max=0;
+ int32_t errflag=0;
+ char kingdom_mode=0;
+
+ char *prefix = NULL;
+
+ int32_t checkedSequence = 0;
+ int32_t positiveSequence= 0;
+ int32_t amplifiatCount = 0;
+
+ int32_t o1Hits;
+ int32_t o2Hits;
+ int32_t o1cHits;
+ int32_t o2cHits;
+
+ int32_t begin;
+ int32_t length;
+
+ SeqPtr apatseq=NULL;
+ StackiPtr stktmp;
+
+ int32_t i;
+ int32_t j;
+ int32_t posi;
+ int32_t posj;
+ int32_t erri;
+ int32_t errj;
+
+ int32_t *restricted_taxid = NULL;
+ int32_t *ignored_taxid = NULL;
+ int32_t r=0;
+ int32_t g=0;
+ int32_t circular=0;
+
+ int32_t saltmethod=SALT_METHOD_SANTALUCIA;
+ double salt=0.05;
+ CNNParams tparm;
+
+ while ((carg = getopt(argc, argv, "hcd:l:L:e:i:r:km:a:tD:")) != -1) {
+
+ switch (carg) {
+ /* -------------------- */
+ case 'd': /* database name */
+ /* -------------------- */
+ prefix = ECOMALLOC(strlen(optarg)+1,
+ "Error on prefix allocation");
+ strcpy(prefix,optarg);
+ break;
+
+ /* -------------------- */
+ case 'h': /* help */
+ /* -------------------- */
+ PrintHelp();
+ exit(0);
+ break;
+
+ /* ------------------------- */
+ case 'D': /* min amplification lenght */
+ /* ------------------------- */
+ sscanf(optarg,"%d",&delta);
+ break;
+
+ /* ------------------------- */
+ case 'l': /* min amplification lenght */
+ /* ------------------------- */
+ sscanf(optarg,"%d",&lmin);
+ break;
+
+ /* -------------------------- */
+ case 'L': /* max amplification lenght */
+ /* -------------------------- */
+ sscanf(optarg,"%d",&lmax);
+ break;
+
+ /* -------------------- */
+ case 'e': /* error max */
+ /* -------------------- */
+ sscanf(optarg,"%d",&error_max);
+ break;
+
+ /* -------------------- */
+ case 'k': /* set the kingdom mode */
+ kingdom_mode = 1; /* -------------------- */
+ break;
+
+ /* ------------------------------------------ */
+ case 'r': /* stores the restricting search taxonomic id */
+ /* ------------------------------------------ */
+ restricted_taxid = ECOREALLOC(restricted_taxid,sizeof(int32_t)*(r+1),
+ "Error on restricted_taxid reallocation");
+ sscanf(optarg,"%d",&restricted_taxid[r]);
+ r++;
+ break;
+
+ /* --------------------------------- */
+ case 'i': /* stores the taxonomic id to ignore */
+ /* --------------------------------- */
+ ignored_taxid = ECOREALLOC(ignored_taxid,sizeof(int32_t)*(g+1),
+ "Error on excluded_taxid reallocation");
+ sscanf(optarg,"%d",&ignored_taxid[g]);
+ g++;
+ break;
+
+ /* -------------------- */
+ case 'c': /* stores the taxonomic id to ignore */
+ /* --------------------------------- */
+ circular = 1;
+ break;
+
+
+ /* --------------------------------- */
+ case 'm': /* set salt method */
+ /* --------------------------------- */
+ sscanf(optarg,"%d",&(saltmethod));
+ break;
+
+ /* --------------------------------- */
+ case 'a': /* set salt */
+ /* --------------------------------- */
+ sscanf(optarg,"%lf",&(salt));
+ break;
+
+ case '?': /* bad option */
+ /* -------------------- */
+ errflag++;
+ }
+
+ }
+
+ /**
+ * check the path to the database is given as last argument
+ */
+ if ((argc -= optind) == 2)
+ {
+
+ oligo1 = ECOMALLOC(strlen(argv[optind])+1,
+ "Error on oligo1 allocation");
+ strcpy(oligo1,argv[optind]);
+ optind++;
+ oligo2 = ECOMALLOC(strlen(argv[optind])+1,
+ "Error on oligo1 allocation");
+ strcpy(oligo2,argv[optind]);
+
+ if (circular)
+ {
+ circular = strlen(oligo1);
+ if (strlen(oligo2)>(size_t)circular)
+ circular = strlen(oligo2);
+ }
+ }
+ else
+ errflag++;
+
+ if (prefix == NULL)
+ {
+ prefix = getenv("ECOPCRDB");
+ if (prefix == NULL)
+ errflag++;
+ }
+
+ nparam_InitParams(&tparm,DEF_CONC_PRIMERS,
+ DEF_CONC_PRIMERS,
+ salt,
+ saltmethod);
+
+ if (!oligo1 || !oligo2)
+ errflag++;
+
+ if (errflag)
+ ExitUsage(errflag);
+
+ o1 = buildPattern(oligo1,error_max);
+ o2 = buildPattern(oligo2,error_max);
+
+ o1c = complementPattern(o1);
+ o2c = complementPattern(o2);
+
+ printf("#@ecopcr-v2\n");
+ printf("#\n");
+ printf("# ecoPCR version %s\n",VERSION);
+ printf("# direct strand oligo1 : %-32s ; oligo2c : %32s\n", o1->cpat,o2c->cpat);
+ printf("# reverse strand oligo2 : %-32s ; oligo1c : %32s\n", o2->cpat,o1c->cpat);
+ printf("# max error count by oligonucleotide : %d\n",error_max);
+
+ double tm,tm1,tm2;
+
+ tm1=nparam_CalcSelfTM(&tparm,o1->cpat,o1->patlen) - 273.15;
+ tm2=nparam_CalcSelfTM(&tparm,o2->cpat,o2->patlen) - 273.15;
+ tm = (tm1 < tm2) ? tm1:tm2;
+
+ printf("# optimal Tm for primers 1 : %5.2f\n",tm1);
+ printf("# optimal Tm for primers 2 : %5.2f\n",tm2);
+
+ printf("# database : %s\n",prefix);
+ if (lmin && lmax)
+ printf("# amplifiat length between [%d,%d] bp\n",lmin,lmax);
+ else if (lmin)
+ printf("# amplifiat length larger than %d bp\n",lmin);
+ else if (lmax)
+ printf("# amplifiat length smaller than %d bp\n",lmax);
+ if (kingdom_mode)
+ printf("# output in kingdom mode\n");
+ else
+ printf("# output in superkingdom mode\n");
+ if (circular)
+ printf("# DB sequences are considered as circular\n");
+ else
+ printf("# DB sequences are considered as linear\n");
+ printf("#\n");
+
+ taxonomy = read_taxonomy(prefix,0);
+
+ seq = ecoseq_iterator(prefix);
+
+ checkedSequence = 0;
+ positiveSequence= 0;
+ amplifiatCount = 0;
+
+ while(seq)
+ {
+ checkedSequence++;
+ /**
+ * check if current sequence should be included
+ **/
+ if ( (r == 0) ||
+ (eco_is_taxid_included(taxonomy,
+ restricted_taxid,
+ r,
+ taxonomy->taxons->taxon[seq->taxid].taxid)
+ )
+ )
+ if ((g == 0) ||
+ !(eco_is_taxid_included(taxonomy,
+ ignored_taxid,
+ g,
+ taxonomy->taxons->taxon[seq->taxid].taxid)
+ )
+ )
+ {
+
+ scname = taxonomy->taxons->taxon[seq->taxid].name;
+ strncpy(head,seq->SQ,10);
+ head[10]=0;
+ strncpy(tail,seq->SQ+seq->SQ_length-10,10);
+ tail[10]=0;
+
+ apatseq=ecoseq2apatseq(seq,apatseq,circular);
+
+ o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen+apatseq->circular);
+ o2cHits= 0;
+
+ if (o1Hits)
+ {
+ stktmp = apatseq->hitpos[0];
+ begin = stktmp->val[0] + o1->patlen;
+
+ if (lmax)
+ length= stktmp->val[stktmp->top-1] + o1->patlen - begin + lmax + o2->patlen;
+ else
+ length= apatseq->seqlen - begin;
+
+ if (circular)
+ {
+ begin = 0;
+ length=apatseq->seqlen+circular;
+ }
+ o2cHits = ManberAll(apatseq,o2c,1,begin,length);
+
+ if (o2cHits)
+ for (i=0; i < o1Hits;i++)
+ {
+ posi = apatseq->hitpos[0]->val[i];
+
+ if (posi < apatseq->seqlen)
+ {
+ erri = apatseq->hiterr[0]->val[i];
+ for (j=0; j < o2cHits; j++)
+ {
+ posj =apatseq->hitpos[1]->val[j];
+
+ if (posj < apatseq->seqlen)
+ {
+ posj+=o2c->patlen;
+ // printf("coucou %d %d %d\n",posi,posj,apatseq->seqlen);
+ errj =apatseq->hiterr[1]->val[j];
+ length = 0;
+ if (posj > posi)
+ length=posj - posi - o1->patlen - o2->patlen;
+ if (posj < posi)
+ length= posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
+ if (length &&
+ (!lmin || (length >= lmin)) &&
+ (!lmax || (length <= lmax)))
+ printRepeat(seq,oligo1,oligo2,&tparm,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy,delta);
+ //printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname);
+
+ }
+ }
+ }
+ }
+ }
+
+ o2Hits = ManberAll(apatseq,o2,2,0,apatseq->seqlen);
+ o1cHits= 0;
+ if (o2Hits)
+ {
+ stktmp = apatseq->hitpos[2];
+ begin = stktmp->val[0] + o2->patlen;
+
+ if (lmax)
+ length= stktmp->val[stktmp->top-1] + o2->patlen - begin + lmax + o1->patlen;
+ else
+ length= apatseq->seqlen - begin;
+
+ if (circular)
+ {
+ begin = 0;
+ length=apatseq->seqlen+circular;
+ }
+
+ o1cHits = ManberAll(apatseq,o1c,3,begin,length);
+
+ if (o1cHits)
+ for (i=0; i < o2Hits;i++)
+ {
+ posi = apatseq->hitpos[2]->val[i];
+
+ if (posi < apatseq->seqlen)
+ {
+ erri = apatseq->hiterr[2]->val[i];
+ for (j=0; j < o1cHits; j++)
+ {
+ posj=apatseq->hitpos[3]->val[j];
+ if (posj < apatseq->seqlen)
+ {
+ posj+=o1c->patlen;
+ errj=apatseq->hiterr[3]->val[j];
+
+ length = 0;
+ if (posj > posi)
+ length=posj - posi + 1 - o2->patlen - o1->patlen; /* - o1->patlen : suppress by <EC> */
+ if (posj < posi)
+ length= posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
+
+ if (length &&
+ (!lmin || (length >= lmin)) &&
+ (!lmax || (length <= lmax)))
+ printRepeat(seq,oligo1,oligo2,&tparm,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy,delta);
+ //printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname);
+ }
+ }
+ }
+ }
+ }
+
+ } /* End of taxonomic selection */
+
+ delete_ecoseq(seq);
+
+ seq = ecoseq_iterator(NULL);
+ }
+
+ ECOFREE(restricted_taxid, "Error: could not free restricted_taxid\n");
+ ECOFREE(ignored_taxid, "Error: could not free excluded_taxid\n");
+
+ return 0;
+}
diff --git a/src/global.mk b/src/global.mk
new file mode 100644
index 0000000..6b1f018
--- /dev/null
+++ b/src/global.mk
@@ -0,0 +1,18 @@
+MACHINE=MAC_OS_X
+LIBPATH= -Llibapat -LlibecoPCR -Llibthermo
+MAKEDEPEND = gcc -D$(MACHINE) -M $(CPPFLAGS) -o $*.d $<
+
+CC=gcc
+CFLAGS= -W -Wall -O2 -g
+
+default: all
+
+%.o: %.c
+ $(CC) -D$(MACHINE) $(CFLAGS) -c -o $@ $<
+
+%.P : %.c
+ $(MAKEDEPEND)
+ @sed 's/\($*\)\.o[ :]*/\1.o $@ : /g' < $*.d > $@; \
+ rm -f $*.d; [ -s $@ ] || rm -f $@
+
+include $(SRCS:.c=.P)
\ No newline at end of file
diff --git a/src/libapat/CODES/dft_code.h b/src/libapat/CODES/dft_code.h
new file mode 100644
index 0000000..b9caf28
--- /dev/null
+++ b/src/libapat/CODES/dft_code.h
@@ -0,0 +1,14 @@
+/* ----------------------------------------------- */
+/* dft_pat_seq_code.h */
+/* default alphabet encoding for alpha */
+/* ----------------------------------------------- */
+
+ 0x00000001 /* A */, 0x00000002 /* B */, 0x00000004 /* C */,
+ 0x00000008 /* D */, 0x00000010 /* E */, 0x00000020 /* F */,
+ 0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */,
+ 0x00000200 /* J */, 0x00000400 /* K */, 0x00000800 /* L */,
+ 0x00001000 /* M */, 0x00002000 /* N */, 0x00004000 /* O */,
+ 0x00008000 /* P */, 0x00010000 /* Q */, 0x00020000 /* R */,
+ 0x00040000 /* S */, 0x00080000 /* T */, 0x00100000 /* U */,
+ 0x00200000 /* V */, 0x00400000 /* W */, 0x00800000 /* X */,
+ 0x01000000 /* Y */, 0x02000000 /* Z */
diff --git a/src/libapat/CODES/dna_code.h b/src/libapat/CODES/dna_code.h
new file mode 100644
index 0000000..0febf41
--- /dev/null
+++ b/src/libapat/CODES/dna_code.h
@@ -0,0 +1,71 @@
+/* ----------------------------------------------- */
+/* dna_code.h */
+/* alphabet encoding for dna/rna */
+/* ----------------------------------------- */
+/* IUPAC encoding */
+/* ----------------------------------------- */
+/* G/A/T/C */
+/* U=T */
+/* R=AG */
+/* Y=CT */
+/* M=AC */
+/* K=GT */
+/* S=CG */
+/* W=AT */
+/* H=ACT */
+/* B=CGT */
+/* V=ACG */
+/* D=AGT */
+/* N=ACGT */
+/* X=ACGT */
+/* EFIJLOPQZ not recognized */
+/* ----------------------------------------- */
+/* dual encoding */
+/* ----------------------------------------- */
+/* A=ADHMNRVW */
+/* B=BCDGHKMNRSTUVWY */
+/* C=BCHMNSVY */
+/* D=ABDGHKMNRSTUVWY */
+/* G=BDGKNRSV */
+/* H=ABCDHKMNRSTUVWY */
+/* K=BDGHKNRSTUVWY */
+/* M=ABCDHMNRSVWY */
+/* N=ABCDGHKMNRSTUVWY */
+/* R=ABDGHKMNRSVW */
+/* S=BCDGHKMNRSVY */
+/* T=BDHKNTUWY */
+/* U=BDHKNTUWY */
+/* V=ABCDGHKMNRSVWY */
+/* W=ABDHKMNRTUVWY */
+/* X=ABCDGHKMNRSTUVWY */
+/* Y=BCDHKMNSTUVWY */
+/* EFIJLOPQZ not recognized */
+/* ----------------------------------------------- */
+
+#ifndef USE_DUAL
+
+ /* IUPAC */
+
+ 0x00000001 /* A */, 0x00080044 /* B */, 0x00000004 /* C */,
+ 0x00080041 /* D */, 0x00000000 /* E */, 0x00000000 /* F */,
+ 0x00000040 /* G */, 0x00080005 /* H */, 0x00000000 /* I */,
+ 0x00000000 /* J */, 0x00080040 /* K */, 0x00000000 /* L */,
+ 0x00000005 /* M */, 0x00080045 /* N */, 0x00000000 /* O */,
+ 0x00000000 /* P */, 0x00000000 /* Q */, 0x00000041 /* R */,
+ 0x00000044 /* S */, 0x00080000 /* T */, 0x00080000 /* U */,
+ 0x00000045 /* V */, 0x00080001 /* W */, 0x00080045 /* X */,
+ 0x00080004 /* Y */, 0x00000000 /* Z */
+
+#else
+ /* DUAL */
+
+ 0x00623089 /* A */, 0x017e34ce /* B */, 0x01243086 /* C */,
+ 0x017e34cb /* D */, 0x00000000 /* E */, 0x00000000 /* F */,
+ 0x0026244a /* G */, 0x017e348f /* H */, 0x00000000 /* I */,
+ 0x00000000 /* J */, 0x017e24ca /* K */, 0x00000000 /* L */,
+ 0x0166308f /* M */, 0x017e34cf /* N */, 0x00000000 /* O */,
+ 0x00000000 /* P */, 0x00000000 /* Q */, 0x006634cb /* R */,
+ 0x012634ce /* S */, 0x0158248a /* T */, 0x0158248a /* U */,
+ 0x016634cf /* V */, 0x017a348b /* W */, 0x017e34cf /* X */,
+ 0x017c348e /* Y */, 0x00000000 /* Z */
+#endif
diff --git a/src/libapat/CODES/prot_code.h b/src/libapat/CODES/prot_code.h
new file mode 100644
index 0000000..edcdfc1
--- /dev/null
+++ b/src/libapat/CODES/prot_code.h
@@ -0,0 +1,51 @@
+/* ----------------------------------------------- */
+/* prot_code.h */
+/* alphabet encoding for proteins */
+/* ----------------------------------------- */
+/* IUPAC encoding */
+/* ----------------------------------------- */
+/* B=DN */
+/* Z=EQ */
+/* X=any - {X} */
+/* JOU not recognized */
+/* ----------------------------------------- */
+/* dual encoding */
+/* ----------------------------------------- */
+/* B=BDN */
+/* D=BD */
+/* E=EZ */
+/* N=BN */
+/* Q=QZ */
+/* X=any - {X} */
+/* Z=EQZ */
+/* JOU not recognized */
+/* ----------------------------------------------- */
+
+#ifndef USE_DUAL
+
+ /* IUPAC */
+
+ 0x00000001 /* A */, 0x00002008 /* B */, 0x00000004 /* C */,
+ 0x00000008 /* D */, 0x00000010 /* E */, 0x00000020 /* F */,
+ 0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */,
+ 0x00000000 /* J */, 0x00000400 /* K */, 0x00000800 /* L */,
+ 0x00001000 /* M */, 0x00002000 /* N */, 0x00000000 /* O */,
+ 0x00008000 /* P */, 0x00010000 /* Q */, 0x00020000 /* R */,
+ 0x00040000 /* S */, 0x00080000 /* T */, 0x00000000 /* U */,
+ 0x00200000 /* V */, 0x00400000 /* W */, 0x037fffff /* X */,
+ 0x01000000 /* Y */, 0x00010010 /* Z */
+
+#else
+ /* DUAL */
+
+ 0x00000001 /* A */, 0x0000200a /* B */, 0x00000004 /* C */,
+ 0x0000000a /* D */, 0x02000010 /* E */, 0x00000020 /* F */,
+ 0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */,
+ 0x00000000 /* J */, 0x00000400 /* K */, 0x00000800 /* L */,
+ 0x00001000 /* M */, 0x00002002 /* N */, 0x00000000 /* O */,
+ 0x00008000 /* P */, 0x02010000 /* Q */, 0x00020000 /* R */,
+ 0x00040000 /* S */, 0x00080000 /* T */, 0x00000000 /* U */,
+ 0x00200000 /* V */, 0x00400000 /* W */, 0x037fffff /* X */,
+ 0x01000000 /* Y */, 0x02010010 /* Z */
+
+#endif
diff --git a/src/libapat/Gmach.h b/src/libapat/Gmach.h
new file mode 100644
index 0000000..8fb1c69
--- /dev/null
+++ b/src/libapat/Gmach.h
@@ -0,0 +1,97 @@
+/* ---------------------------------------------------------------- */
+/* Copyright (c) Atelier de BioInformatique */
+/* @file: Gmach.h */
+/* @desc: machine dependant setup */
+/* @+ *should* be included in all ABI softs */
+/* */
+/* @history: */
+/* @+ <Gloup> : Jul 95 : MWC first draft */
+/* @+ <Gloup> : Jan 96 : adapted to Pwg */
+/* @+ <Gloup> : Nov 00 : adapted to Mac_OS_X */
+/* ---------------------------------------------------------------- */
+
+#ifndef _H_Gmach
+
+ /* OS names */
+
+#define _H_Gmach
+
+ /* Macintosh Classic */
+ /* Think C environment */
+#ifdef THINK_C
+#define MACINTOSH
+#define MAC_OS_C
+#endif
+
+
+ /* Macintosh Classic */
+ /* Code-Warrior */
+#ifdef __MWERKS__
+#define MACINTOSH
+#define MAC_OS_C
+#endif
+
+ /* Macintosh OS-X */
+#ifdef MAC_OS_X
+#define MACINTOSH
+#define UNIX
+#define UNIX_BSD
+#endif
+
+ /* LINUX */
+#ifdef LINUX
+#define UNIX
+#define UNIX_BSD
+#endif
+
+ /* other Unix Boxes */
+ /* SunOS / Solaris */
+#ifdef SUN
+#define UNIX
+#ifdef SOLARIS
+#define UNIX_S7
+#else
+#define UNIX_BSD
+#endif
+#endif
+
+ /* SGI Irix */
+#ifdef SGI
+#define UNIX
+#define UNIX_S7
+#endif
+
+/* ansi setup */
+/* for unix machines see makefile */
+
+#ifndef PROTO
+#define PROTO 1
+#endif
+
+#ifndef ANSI_PROTO
+#define ANSI_PROTO PROTO
+#endif
+
+#ifndef ANSI_STR
+#define ANSI_STR 1
+#endif
+
+/* unistd.h header file */
+
+#ifdef UNIX
+#define HAS_UNISTD_H <unistd.h>
+#endif
+
+/* getopt.h header file */
+
+#ifdef MAC_OS_C
+#define HAS_GETOPT_H "getopt.h"
+#endif
+
+#ifdef SGI
+#define HAS_GETOPT_H <getopt.h>
+#endif
+
+
+
+#endif
diff --git a/src/libapat/Gtypes.h b/src/libapat/Gtypes.h
new file mode 100644
index 0000000..9bf5a93
--- /dev/null
+++ b/src/libapat/Gtypes.h
@@ -0,0 +1,104 @@
+/* ---------------------------------------------------------------- */
+/* Copyright (c) Atelier de BioInformatique */
+/* @file: Gtypes.h */
+/* @desc: general & machine dependant types */
+/* @+ *should* be included in all ABI softs */
+/* */
+/* @history: */
+/* @+ <Gloup> : Jan 91 : MWC first draft */
+/* @+ <Gloup> : Jul 95 : Gmach addition */
+/* ---------------------------------------------------------------- */
+
+#define _H_Gtypes
+
+#ifndef _H_Gmach
+#include "Gmach.h"
+#endif
+
+#ifndef NULL
+#include <stdio.h> /* is the official NULL here ? */
+#endif
+
+/* ==================================================== */
+/* constantes */
+/* ==================================================== */
+
+#ifndef PROTO
+#define PROTO 1 /* prototypes flag */
+#endif
+
+#ifdef MAC_OS_C
+#define Vrai true /* TC boolean values */
+#define Faux false /* */
+#else
+#define Vrai 0x1 /* bool values = TRUE */
+#define Faux 0x0 /* = FALSE */
+#endif
+
+#define Nil NULL /* nil pointer */
+
+#define kBigInt16 0x7fff /* plus grand 16 bits signe */
+#define kBigInt32 0x7fffffff /* plus grand 32 bits signe */
+#define kBigUInt16 0xffff /* plus grand 16 bits ~signe */
+#define kBigUInt32 0xffffffff /* plus grand 32 bits ~signe */
+
+#ifdef MAC_OS_C
+/* ==================================================== */
+/* Types (for Macintosh ThinK C || MWerks) */
+/* ==================================================== */
+
+ /* --- specific sizes --------- */
+typedef long Int32; /* Int32 = 32 bits signe */
+typedef unsigned long UInt32; /* UInt32 = 32 bits ~signe */
+typedef short Int16; /* Int16 = 16 bits signe */
+typedef unsigned short UInt16; /* UInt32 = 16 bits ~signe */
+typedef char Int8; /* Int8 = 8 bits signe */
+typedef unsigned char UInt8; /* UInt8 = 8 bits ~signe */
+
+ /* --- default types ---------- */
+
+typedef Boolean Bool; /* booleen */
+
+typedef long Int; /* 'natural' int (>= 32 bits) */
+
+typedef void *Ptr; /* pointeur */
+
+#elif ((defined SUN) || (defined SGI) || (defined UNIX))
+/* ==================================================== */
+/* Types (for Sun & Iris - 32 bits machines) */
+/* ==================================================== */
+
+ /* --- specific sizes --------- */
+typedef int Int32; /* Int32 = 32 bits signe */
+typedef unsigned int UInt32; /* UInt32 = 32 bits ~signe */
+typedef short Int16; /* Int16 = 16 bits signe */
+typedef unsigned short UInt16; /* UInt32 = 16 bits ~signe */
+typedef char Int8; /* Int8 = 8 bits signe */
+typedef unsigned char UInt8; /* UInt8 = 8 bits ~signe */
+
+ /* --- default types ---------- */
+
+typedef int Bool; /* booleen (int for ANSI) */
+
+typedef int Int; /* 'natural' int (>= 32 bits) */
+
+typedef void *Ptr; /* pointeur */
+
+#else
+/* ==================================================== */
+/* Types (for undefined machines) */
+/* ==================================================== */
+
+#error undefined MACHINE <please edit Gmach.h>
+
+#endif
+
+/* ==================================================== */
+/* special macro for prototypes */
+/* ==================================================== */
+
+#if PROTO
+#define P(s) s
+#else
+#define P(s) ()
+#endif
diff --git a/src/libapat/Makefile b/src/libapat/Makefile
new file mode 100644
index 0000000..b4dc9be
--- /dev/null
+++ b/src/libapat/Makefile
@@ -0,0 +1,24 @@
+
+SOURCES = apat_parse.c \
+ apat_search.c \
+ libstki.c
+
+SRCS=$(SOURCES)
+
+
+OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
+
+LIBFILE= libapat.a
+RANLIB=ranlib
+
+
+include ../global.mk
+
+all: $(LIBFILE)
+
+clean:
+ rm -rf $(OBJECTS) $(LIBFILE)
+
+$(LIBFILE): $(OBJECTS)
+ ar -cr $@ $?
+ $(RANLIB) $@
diff --git a/src/libapat/apat.h b/src/libapat/apat.h
new file mode 100644
index 0000000..eaa06df
--- /dev/null
+++ b/src/libapat/apat.h
@@ -0,0 +1,173 @@
+/* ==================================================== */
+/* Copyright (c) Atelier de BioInformatique */
+/* Dec. 94 */
+/* File: apat.h */
+/* Purpose: pattern scan */
+/* History: */
+/* 28/12/94 : <Gloup> ascan first version */
+/* 14/05/99 : <Gloup> last revision */
+/* ==================================================== */
+
+#ifndef _H_Gtypes
+#include "Gtypes.h"
+#endif
+
+#ifndef _H_libstki
+#include "libstki.h"
+#endif
+
+#define H_apat
+
+/* ----------------------------------------------- */
+/* constantes */
+/* ----------------------------------------------- */
+
+#ifndef BUFSIZ
+#define BUFSIZ 1024 /* io buffer size */
+#endif
+
+#define MAX_NAME_LEN BUFSIZ /* max length of sequence name */
+
+#define ALPHA_LEN 26 /* alphabet length */
+ /* *DO NOT* modify */
+
+#define MAX_PATTERN 4 /* max # of patterns */
+ /* *DO NOT* modify */
+
+#define MAX_PAT_LEN 32 /* max pattern length */
+ /* *DO NOT* modify */
+
+#define MAX_PAT_ERR 32 /* max # of errors */
+ /* *DO NOT* modify */
+
+#define PATMASK 0x3ffffff /* mask for 26 symbols */
+ /* *DO NOT* modify */
+
+#define OBLIBIT 0x4000000 /* bit 27 to 1 -> oblig. pos */
+ /* *DO NOT* modify */
+
+ /* mask for position */
+#define ONEMASK 0x80000000 /* mask for highest position */
+
+ /* masks for Levenhstein edit */
+#define OPER_IDT 0x00000000 /* identity */
+#define OPER_INS 0x40000000 /* insertion */
+#define OPER_DEL 0x80000000 /* deletion */
+#define OPER_SUB 0xc0000000 /* substitution */
+
+#define OPER_SHFT 30 /* <unused> shift */
+
+ /* Levenhstein Opcodes */
+#define SOPER_IDT 0x0 /* identity */
+#define SOPER_INS 0x1 /* insertion */
+#define SOPER_DEL 0x2 /* deletion */
+#define SOPER_SUB 0x3 /* substitution */
+
+ /* Levenhstein Opcodes masks */
+#define OPERMASK 0xc0000000 /* mask for Opcodes */
+#define NOPERMASK 0x3fffffff /* negate of previous */
+
+ /* special chars in pattern */
+#define PATCHARS "[]!#"
+
+ /* 26 letter alphabet */
+ /* in alphabetical order */
+
+#define ORD_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+
+ /* protein alphabet */
+
+#define PROT_ALPHA "ACDEFGHIKLMNPQRSTVWY"
+
+ /* dna/rna alphabet */
+
+#define DNA_ALPHA "ABCDGHKMNRSTUVWXY"
+
+
+/* ----------------------------------------------- */
+/* data structures */
+/* ----------------------------------------------- */
+
+ /* -------------------- */
+typedef enum { /* data encoding */
+ /* -------------------- */
+ alpha = 0, /* [A-Z] */
+ dna, /* IUPAC DNA */
+ protein /* IUPAC proteins */
+} CodType;
+
+ /* -------------------- */
+typedef struct { /* sequence */
+ /* -------------------- */
+ char *name; /* sequence name */
+ Int32 seqlen; /* sequence length */
+ Int32 seqsiz; /* sequence buffer size */
+ Int32 datsiz; /* data buffer size */
+ Int32 circular;
+ UInt8 *data; /* data buffer */
+ char *cseq; /* sequence buffer */
+ StackiPtr hitpos[MAX_PATTERN]; /* stack of hit pos. */
+ StackiPtr hiterr[MAX_PATTERN]; /* stack of errors */
+} Seq, *SeqPtr;
+
+ /* -------------------- */
+typedef struct { /* pattern */
+ /* -------------------- */
+ int patlen; /* pattern length */
+ int maxerr; /* max # of errors */
+ char *cpat; /* pattern string */
+ Int32 *patcode; /* encoded pattern */
+ UInt32 *smat; /* S matrix */
+ UInt32 omask; /* oblig. bits mask */
+ Bool hasIndel; /* are indels allowed */
+ Bool ok; /* is pattern ok */
+} Pattern, *PatternPtr;
+
+/* ----------------------------------------------- */
+/* macros */
+/* ----------------------------------------------- */
+
+#ifndef NEW
+#define NEW(typ) (typ*)malloc(sizeof(typ))
+#define NEWN(typ, dim) (typ*)malloc((unsigned long)(dim) * sizeof(typ))
+#define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (unsigned long)(dim) * sizeof(typ))
+#define FREE(ptr) free((void *) ptr)
+#endif
+
+/* ----------------------------------------------- */
+/* prototypes */
+/* ----------------------------------------------- */
+
+ /* apat_seq.c */
+
+SeqPtr FreeSequence (SeqPtr pseq);
+SeqPtr NewSequence (void);
+int ReadNextSequence (SeqPtr pseq);
+int WriteSequence (FILE *filou , SeqPtr pseq);
+
+ /* apat_parse.c */
+
+Int32 *GetCode (CodType ctype);
+int CheckPattern (Pattern *ppat);
+int EncodePattern (Pattern *ppat, CodType ctype);
+int ReadPattern (Pattern *ppat);
+void PrintDebugPattern (Pattern *ppat);
+
+ /* apat_search.c */
+
+int CreateS (Pattern *ppat, Int32 lalpha);
+Int32 ManberNoErr (Seq *pseq , Pattern *ppat, int patnum,int begin,int length);
+Int32 ManberSub (Seq *pseq , Pattern *ppat, int patnum,int begin,int length);
+Int32 ManberIndel (Seq *pseq , Pattern *ppat, int patnum,int begin,int length);
+Int32 ManberAll (Seq *pseq , Pattern *ppat, int patnum,int begin,int length);
+Int32 NwsPatAlign (Seq *pseq , Pattern *ppat, Int32 nerr ,
+ Int32 *reslen , Int32 *reserr);
+
+ /* apat_sys.c */
+
+float UserCpuTime (int reset);
+float SysCpuTime (int reset);
+char *StrCpuTime (int reset);
+void Erreur (char *msg , int stat);
+int AccessFile (char *path, char *mode);
+
diff --git a/src/libapat/apat_parse.c b/src/libapat/apat_parse.c
new file mode 100644
index 0000000..43cda48
--- /dev/null
+++ b/src/libapat/apat_parse.c
@@ -0,0 +1,369 @@
+/* ==================================================== */
+/* Copyright (c) Atelier de BioInformatique */
+/* Mar. 92 */
+/* File: apat_parse.c */
+/* Purpose: Codage du pattern */
+/* History: */
+/* 00/07/94 : <Gloup> first version (stanford) */
+/* 00/11/94 : <Gloup> revised for DNA/PROTEIN */
+/* 30/12/94 : <Gloup> modified EncodePattern */
+/* for manber search */
+/* 14/05/99 : <Gloup> indels added */
+/* ==================================================== */
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "Gtypes.h"
+#include "apat.h"
+ /* -------------------- */
+ /* default char */
+ /* encodings */
+ /* -------------------- */
+
+static Int32 sDftCode[] = {
+
+#include "CODES/dft_code.h"
+
+};
+ /* -------------------- */
+ /* char encodings */
+ /* IUPAC */
+ /* -------------------- */
+
+ /* IUPAC Proteins */
+static Int32 sProtCode[] = {
+
+#include "CODES/prot_code.h"
+
+};
+ /* IUPAC Dna/Rna */
+static Int32 sDnaCode[] = {
+
+#include "CODES/dna_code.h"
+
+};
+
+
+/* -------------------------------------------- */
+/* internal replacement of gets */
+/* -------------------------------------------- */
+static char *sGets(char *buffer, int size) {
+
+ char *ebuf;
+
+ if (! fgets(buffer, size-1, stdin))
+ return NULL;
+
+ /* remove trailing line feed */
+
+ ebuf = buffer + strlen(buffer);
+
+ while (--ebuf >= buffer) {
+ if ((*ebuf == '\n') || (*ebuf == '\r'))
+ *ebuf = '\000';
+ else
+ break;
+ }
+
+ return buffer;
+}
+
+/* -------------------------------------------- */
+/* returns actual code associated to type */
+/* -------------------------------------------- */
+
+Int32 *GetCode(CodType ctype)
+{
+ Int32 *code = sDftCode;
+
+ switch (ctype) {
+ case dna : code = sDnaCode ; break;
+ case protein : code = sProtCode ; break;
+ default : code = sDftCode ; break;
+ }
+
+ return code;
+}
+
+/* -------------------------------------------- */
+
+#define BAD_IF(tst) if (tst) return 0
+
+int CheckPattern(Pattern *ppat)
+{
+ int lev;
+ char *pat;
+
+ pat = ppat->cpat;
+
+ BAD_IF (*pat == '#');
+
+ for (lev = 0; *pat ; pat++)
+
+ switch (*pat) {
+
+ case '[' :
+ BAD_IF (lev);
+ BAD_IF (*(pat+1) == ']');
+ lev++;
+ break;
+
+ case ']' :
+ lev--;
+ BAD_IF (lev);
+ break;
+
+ case '!' :
+ BAD_IF (lev);
+ BAD_IF (! *(pat+1));
+ BAD_IF (*(pat+1) == ']');
+ break;
+
+ case '#' :
+ BAD_IF (lev);
+ BAD_IF (*(pat-1) == '[');
+ break;
+
+ default :
+ if (! isupper(*pat))
+ return 0;
+ break;
+ }
+
+ return (lev ? 0 : 1);
+}
+
+#undef BAD_IF
+
+
+/* -------------------------------------------- */
+static char *skipOblig(char *pat)
+{
+ return (*(pat+1) == '#' ? pat+1 : pat);
+}
+
+/* -------------------------------------------- */
+static char *splitPattern(char *pat)
+{
+ switch (*pat) {
+
+ case '[' :
+ for (; *pat; pat++)
+ if (*pat == ']')
+ return skipOblig(pat);
+ return NULL;
+ break;
+
+ case '!' :
+ return splitPattern(pat+1);
+ break;
+
+ }
+
+ return skipOblig(pat);
+}
+
+/* -------------------------------------------- */
+static Int32 valPattern(char *pat, Int32 *code)
+{
+ Int32 val;
+
+ switch (*pat) {
+
+ case '[' :
+ return valPattern(pat+1, code);
+ break;
+
+ case '!' :
+ val = valPattern(pat+1, code);
+ return (~val & PATMASK);
+ break;
+
+ default :
+ val = 0x0;
+ while (isupper(*pat)) {
+ val |= code[*pat - 'A'];
+ pat++;
+ }
+ return val;
+ }
+
+ return 0x0;
+}
+
+/* -------------------------------------------- */
+static Int32 obliBitPattern(char *pat)
+{
+ return (*(pat + strlen(pat) - 1) == '#' ? OBLIBIT : 0x0);
+}
+
+
+/* -------------------------------------------- */
+static int lenPattern(char *pat)
+{
+ int lpat;
+
+ lpat = 0;
+
+ while (*pat) {
+
+ if (! (pat = splitPattern(pat)))
+ return 0;
+
+ pat++;
+
+ lpat++;
+ }
+
+ return lpat;
+}
+
+/* -------------------------------------------- */
+/* Interface */
+/* -------------------------------------------- */
+
+/* -------------------------------------------- */
+/* encode un pattern */
+/* -------------------------------------------- */
+int EncodePattern(Pattern *ppat, CodType ctype)
+{
+ int pos, lpat;
+ Int32 *code;
+ char *pp, *pa, c;
+
+ ppat->ok = Faux;
+
+ code = GetCode(ctype);
+
+ ppat->patlen = lpat = lenPattern(ppat->cpat);
+
+ if (lpat <= 0)
+ return 0;
+
+ if (! (ppat->patcode = NEWN(Int32, lpat)))
+ return 0;
+
+ pa = pp = ppat->cpat;
+
+ pos = 0;
+
+ while (*pa) {
+
+ pp = splitPattern(pa);
+
+ c = *++pp;
+
+ *pp = '\000';
+
+ ppat->patcode[pos++] = valPattern(pa, code) | obliBitPattern(pa);
+
+ *pp = c;
+
+ pa = pp;
+ }
+
+ ppat->ok = Vrai;
+
+ return lpat;
+}
+
+/* -------------------------------------------- */
+/* remove blanks */
+/* -------------------------------------------- */
+static char *RemBlanks(char *s)
+{
+ char *sb, *sc;
+
+ for (sb = sc = s ; *sb ; sb++)
+ if (! isspace(*sb))
+ *sc++ = *sb;
+
+ return s;
+}
+
+/* -------------------------------------------- */
+/* count non blanks */
+/* -------------------------------------------- */
+static Int32 CountAlpha(char *s)
+{
+ Int32 n;
+
+ for (n = 0 ; *s ; s++)
+ if (! isspace(*s))
+ n++;
+
+ return n;
+}
+
+
+/* -------------------------------------------- */
+/* lit un pattern */
+/* <pattern> #mis */
+/* ligne starting with '/' are comments */
+/* -------------------------------------------- */
+int ReadPattern(Pattern *ppat)
+{
+ int val;
+ char *spac;
+ char buffer[BUFSIZ];
+
+ ppat->ok = Vrai;
+
+ if (! sGets(buffer, sizeof(buffer)))
+ return 0;
+
+ if (*buffer == '/')
+ return ReadPattern(ppat);
+
+ if (! CountAlpha(buffer))
+ return ReadPattern(ppat);
+
+ for (spac = buffer ; *spac ; spac++)
+ if ((*spac == ' ') || (*spac == '\t'))
+ break;
+
+ ppat->ok = Faux;
+
+ if (! *spac)
+ return 0;
+
+ if (sscanf(spac, "%d", &val) != 1)
+ return 0;
+
+ ppat->hasIndel = (val < 0);
+
+ ppat->maxerr = ((val >= 0) ? val : -val);
+
+ *spac = '\000';
+
+ (void) RemBlanks(buffer);
+
+ if ((ppat->cpat = NEWN(char, strlen(buffer)+1)))
+ strcpy(ppat->cpat, buffer);
+
+ ppat->ok = (ppat->cpat != NULL);
+
+ return (ppat->ok ? 1 : 0);
+}
+
+/* -------------------------------------------- */
+/* ecrit un pattern - Debug - */
+/* -------------------------------------------- */
+void PrintDebugPattern(Pattern *ppat)
+{
+ int i;
+
+ printf("Pattern : %s\n", ppat->cpat);
+ printf("Encoding : \n\t");
+
+ for (i = 0 ; i < ppat->patlen ; i++) {
+ printf("0x%8.8x ", ppat->patcode[i]);
+ if (i%4 == 3)
+ printf("\n\t");
+ }
+ printf("\n");
+}
+
diff --git a/src/libapat/apat_search.c b/src/libapat/apat_search.c
new file mode 100644
index 0000000..f0dd394
--- /dev/null
+++ b/src/libapat/apat_search.c
@@ -0,0 +1,339 @@
+/* ==================================================== */
+/* Copyright (c) Atelier de BioInformatique */
+/* Dec. 94 */
+/* File: apat_search.c */
+/* Purpose: recherche du pattern */
+/* algorithme de Baeza-Yates/Gonnet */
+/* Manber (agrep) */
+/* History: */
+/* 07/12/94 : <MFS> first version */
+/* 28/12/94 : <Gloup> revised version */
+/* 14/05/99 : <Gloup> last revision */
+/* ==================================================== */
+
+#if 0
+#ifndef THINK_C
+#include <sys/types.h>
+#endif
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "Gtypes.h"
+#include "libstki.h"
+#include "apat.h"
+
+#define POP PopiOut
+#define PUSH PushiIn
+#define TOPCURS CursiToTop
+#define DOWNREAD ReadiDown
+
+#define KRONECK(x, msk) ((~x & msk) ? 0 : 1)
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+
+/* -------------------------------------------- */
+/* Construction de la matrice S */
+/* -------------------------------------------- */
+
+int CreateS(Pattern *ppat, Int32 lalpha)
+{
+ Int32 i, j, indx;
+ UInt32 pindx, amask, omask, *smat;
+
+ ppat->ok = Faux;
+
+ omask = 0x0L;
+
+ if (! (smat = NEWN(UInt32, lalpha)))
+ return 0;
+
+ for (i = 0 ; i < lalpha ; i++)
+ smat[i] = 0x0;
+
+ for (i = ppat->patlen - 1, amask = 0x1L ; i >= 0 ; i--, amask <<= 1) {
+
+ indx = ppat->patcode[i];
+
+ if (ppat->patcode[i] & OBLIBIT)
+ omask |= amask;
+
+ for (j = 0, pindx = 0x1L ; j < lalpha ; j++, pindx <<= 1)
+ if (indx & pindx)
+ smat[j] |= amask;
+ }
+
+ ppat->smat = smat;
+
+ ppat->omask = omask;
+
+ ppat->ok = Vrai;
+
+ return 1;
+
+}
+
+/* -------------------------------------------- */
+/* Baeza-Yates/Manber algorithm */
+/* NoError */
+/* -------------------------------------------- */
+Int32 ManberNoErr(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
+{
+ UInt32 pos;
+ UInt32 smask, r;
+ UInt8 *data;
+ StackiPtr *stkpos, *stkerr;
+ UInt32 end;
+
+ end = begin + length;
+ end = (end <= (size_t)(pseq->seqlen+pseq->circular)) ? end:(size_t)(pseq->seqlen+pseq->circular);
+
+
+ /* create local masks */
+
+ smask = r = 0x1L << ppat->patlen;
+
+ /* init. scan */
+ data = pseq->data + begin;
+ stkpos = pseq->hitpos + patnum;
+ stkerr = pseq->hiterr + patnum;
+
+ /* loop on text data */
+
+ for (pos = begin ; pos < end ; pos++) {
+
+ r = (r >> 1) & ppat->smat[*data++];
+
+ if (r & 0x1L) {
+ PUSH(stkpos, pos - ppat->patlen + 1);
+ PUSH(stkerr, 0);
+ }
+
+ r |= smask;
+ }
+
+ return (*stkpos)->top; /* aka # of hits */
+}
+
+/* -------------------------------------------- */
+/* Baeza-Yates/Manber algorithm */
+/* Substitution only */
+/* */
+/* Note : r array is stored as : */
+/* 0 0 r(0,j) r(0,j+1) r(1,j) r(1,j+1) ... */
+/* */
+/* -------------------------------------------- */
+Int32 ManberSub(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
+{
+ int e, emax, found;
+ UInt32 pos;
+ UInt32 smask, cmask, sindx;
+ UInt32 *pr, r[2 * MAX_PAT_ERR + 2];
+ UInt8 *data;
+ StackiPtr *stkpos, *stkerr;
+ UInt32 end;
+
+ end = begin + length;
+ end = (end <= (size_t)(pseq->seqlen+pseq->circular)) ? end:(size_t)(pseq->seqlen+pseq->circular);
+
+ /* create local masks */
+ emax = ppat->maxerr;
+
+ r[0] = r[1] = 0x0;
+
+ cmask = smask = 0x1L << ppat->patlen;
+
+ for (e = 0, pr = r + 3 ; e <= emax ; e++, pr += 2)
+ *pr = cmask;
+
+ cmask = ~ ppat->omask;
+
+ /* init. scan */
+ data = pseq->data + begin;
+ stkpos = pseq->hitpos + patnum;
+ stkerr = pseq->hiterr + patnum;
+
+ /* loop on text data */
+
+ for (pos = begin ; pos < end ; pos++) {
+
+ sindx = ppat->smat[*data++];
+
+ for (e = found = 0, pr = r ; e <= emax ; e++, pr += 2) {
+
+ pr[2] = pr[3] | smask;
+
+ pr[3] = ((pr[0] >> 1) & cmask) /* sub */
+ | ((pr[2] >> 1) & sindx); /* ident */
+
+ if (pr[3] & 0x1L) { /* found */
+ if (! found) {
+ PUSH(stkpos, pos - ppat->patlen + 1);
+ PUSH(stkerr, e);
+ }
+ found++;
+ }
+ }
+ }
+
+ return (*stkpos)->top; /* aka # of hits */
+}
+
+/* -------------------------------------------- */
+/* Baeza-Yates/Manber algorithm */
+/* Substitution + Indels */
+/* */
+/* Note : r array is stored as : */
+/* 0 0 r(0,j) r(0,j+1) r(1,j) r(1,j+1) ... */
+/* */
+/* Warning: may return shifted pos. */
+/* */
+/* -------------------------------------------- */
+Int32 ManberIndel(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
+{
+ int e, emax, found;
+ UInt32 pos;
+ UInt32 smask, cmask, sindx;
+ UInt32 *pr, r[2 * MAX_PAT_ERR + 2];
+ UInt8 *data;
+ StackiPtr *stkpos, *stkerr;
+ UInt32 end;
+
+ end = begin + length;
+ end = (end <= (size_t)(pseq->seqlen+pseq->circular)) ? end:(size_t)(pseq->seqlen+pseq->circular);
+
+ /* create local masks */
+ emax = ppat->maxerr;
+
+ r[0] = r[1] = 0x0;
+
+ cmask = smask = 0x1L << ppat->patlen;
+
+ for (e = 0, pr = r + 3 ; e <= emax ; e++, pr += 2) {
+ *pr = cmask;
+ cmask = (cmask >> 1) | smask;
+ }
+
+ cmask = ~ ppat->omask;
+
+ /* init. scan */
+ data = pseq->data + begin;
+ stkpos = pseq->hitpos + patnum;
+ stkerr = pseq->hiterr + patnum;
+
+ /* loop on text data */
+
+ for (pos = begin ; pos < end ; pos++) {
+
+ sindx = ppat->smat[*data++];
+
+ for (e = found = 0, pr = r ; e <= emax ; e++, pr += 2) {
+
+ pr[2] = pr[3] | smask;
+
+ pr[3] = (( pr[0] /* ins */
+ | (pr[0] >> 1) /* sub */
+ | (pr[1] >> 1)) /* del */
+ & cmask)
+ | ((pr[2] >> 1) & sindx); /* ident */
+
+ if (pr[3] & 0x1L) { /* found */
+ if (! found) {
+ PUSH(stkpos, pos - ppat->patlen + 1);
+ PUSH(stkerr, e);
+ }
+ found++;
+ }
+
+ }
+ }
+
+ return (*stkpos)->top; /* aka # of hits */
+}
+
+/* -------------------------------------------- */
+/* Baeza-Yates/Manber algorithm */
+/* API call to previous functions */
+/* -------------------------------------------- */
+Int32 ManberAll(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
+{
+ if (ppat->maxerr == 0)
+ return ManberNoErr(pseq, ppat, patnum, begin, length);
+ else if (ppat->hasIndel)
+ return ManberIndel(pseq, ppat, patnum, begin, length);
+ else
+ return ManberSub(pseq, ppat, patnum, begin, length);
+}
+
+
+/* -------------------------------------------- */
+/* Alignement NWS */
+/* pour edition des hits */
+/* (avec substitution obligatoire aux bords) */
+/* -------------------------------------------- */
+
+Int32 NwsPatAlign(pseq, ppat, nerr, reslen, reserr)
+ Seq *pseq;
+ Pattern *ppat;
+ Int32 nerr, *reslen, *reserr;
+{
+ UInt8 *sseq, *px;
+ Int32 i, j, lseq, lpat, npos, dindel, dsub,
+ *pc, *pi, *pd, *ps;
+ UInt32 amask;
+
+ static Int32 sTab[(MAX_PAT_LEN+MAX_PAT_ERR+1) * (MAX_PAT_LEN+1)];
+
+ lseq = pseq->seqlen;
+
+ pc = sTab; /* |----|----| --> i */
+ pi = pc - 1; /* | ps | pd | | */
+ pd = pi - lseq; /* |----|----| | */
+ ps = pd - 1; /* | pi | pc | v j */
+ /* |---------| */
+
+ lseq = pseq->seqlen;
+ lpat = ppat->patlen;
+
+ sseq = pseq->data - 1;
+
+ amask = ONEMASK >> lpat;
+
+ for (j = 0 ; j <= lpat ; j++) {
+
+ for (i = 0 , px = sseq ; i <= lseq ; i++, px++) {
+
+ if (i && j) {
+ dindel = MIN(*pi, *pd) + 1;
+ dsub = *ps + KRONECK(ppat->smat[*px], amask);
+ *pc = MIN(dindel, dsub);
+ }
+ else if (i) /* j == 0 */
+ *pc = *pi + 1;
+ else if (j) /* i == 0 */
+ *pc = *pd + 1;
+ else /* root */
+ *pc = 0;
+
+ pc++;
+ pi++;
+ pd++;
+ ps++;
+ }
+
+ amask <<= 1;
+ }
+
+ pc--;
+
+ for (i = lseq, npos = 0 ; i >= 0 ; i--, pc--) {
+ if (*pc <= nerr) {
+ *reslen++ = i;
+ *reserr++ = *pc;
+ npos++;
+ }
+ }
+
+ return npos;
+}
diff --git a/src/libapat/libstki.c b/src/libapat/libstki.c
new file mode 100644
index 0000000..1ca9868
--- /dev/null
+++ b/src/libapat/libstki.c
@@ -0,0 +1,379 @@
+/* ==================================================== */
+/* Copyright (c) Atelier de BioInformatique */
+/* Mar. 92 */
+/* File: libstki.c */
+/* Purpose: A library to deal with 'stacks' of */
+/* integers */
+/* Note: 'stacks' are dynamic (i.e. size is */
+/* automatically readjusted when needed) */
+/* History: */
+/* 00/03/92 : <Gloup> first draft */
+/* 15/08/93 : <Gloup> revised version */
+/* 14/05/99 : <Gloup> last revision */
+/* ==================================================== */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "Gtypes.h"
+#include "libstki.h"
+
+
+/* ============================ */
+/* Constantes et Macros locales */
+/* ============================ */
+
+#define ExpandStack(stkh) ResizeStacki((stkh), (*stkh)->size << 1)
+
+#define ShrinkStack(stkh) ResizeStacki((stkh), (*stkh)->size >> 1)
+
+
+static Int16 sStkiLastError = kStkiNoErr;
+
+/* -------------------------------------------- */
+/* gestion des erreurs */
+/* get/reset erreur flag */
+/* */
+/* @function: StkiError */
+/* -------------------------------------------- */
+
+Int16 StkiError(Bool reset)
+{
+ Int16 err;
+
+ err = sStkiLastError;
+
+ if (reset)
+ sStkiLastError = kStkiNoErr;
+
+ return err;
+
+} /* end of StkiError */
+
+/* -------------------------------------------- */
+/* creation d'un stack */
+/* */
+/* @function: NewStacki */
+/* -------------------------------------------- */
+
+StackiPtr NewStacki(Int32 size)
+{
+ StackiPtr stki;
+
+ if (! (stki = NEW(Stacki)))
+ return NULL;
+
+ stki->size = size;
+ stki->top = 0;
+ stki->cursor = 0;
+
+ if ( ! (stki->val = NEWN(Int32, size))) {
+ sStkiLastError = kStkiMemErr;
+ return FreeStacki(stki);
+ }
+
+ return stki;
+
+} /* end of NewStacki */
+
+
+/* -------------------------------------------- */
+/* liberation d'un stack */
+/* */
+/* @function: FreeStacki */
+/* -------------------------------------------- */
+
+StackiPtr FreeStacki(StackiPtr stki)
+{
+ if (stki) {
+ if (stki->val)
+ FREE(stki->val);
+ FREE(stki);
+ }
+
+ return NULL;
+
+} /* end of FreeStacki */
+
+/* -------------------------------------------- */
+/* creation d'un vecteur de stacks */
+/* */
+/* @function: NewStackiVector */
+/* -------------------------------------------- */
+
+StackiHdle NewStackiVector(Int32 vectSize, Int32 stackSize)
+{
+ Int32 i;
+ StackiHdle stkh;
+
+ if (! (stkh = NEWN(StackiPtr, vectSize))) {
+ sStkiLastError = kStkiMemErr;
+ return NULL;
+ }
+
+ for (i = 0 ; i < vectSize ; i++)
+ if (! (stkh[i] = NewStacki(stackSize)))
+ return FreeStackiVector(stkh, i);
+
+ return stkh;
+
+} /* end of NewStackiVector */
+
+
+/* -------------------------------------------- */
+/* liberation d'un vecteur de stacks */
+/* */
+/* @function: FreeStackiVector */
+/* -------------------------------------------- */
+
+StackiHdle FreeStackiVector(StackiHdle stkh, Int32 vectSize)
+{
+ Int32 i;
+
+ if (stkh) {
+ for (i = 0 ; i < vectSize ; i++)
+ (void) FreeStacki(stkh[i]);
+ FREE(stkh);
+ }
+
+ return NULL;
+
+} /* end of FreeStackiVector */
+
+/* -------------------------------------------- */
+/* resize d'un stack */
+/* */
+/* @function: ResizeStacki */
+/* -------------------------------------------- */
+
+Int32 ResizeStacki(StackiHdle stkh, Int32 size)
+{
+ Int32 resize = 0; /* assume error */
+ Int32 *val;
+
+ if ((val = REALLOC(Int32, (*stkh)->val, size))) {
+ (*stkh)->size = resize = size;
+ (*stkh)->val = val;
+ }
+
+ if (! resize)
+ sStkiLastError = kStkiMemErr;
+
+ return resize;
+
+} /* end of ResizeStacki */
+
+/* -------------------------------------------- */
+/* empilage(/lement) */
+/* */
+/* @function: PushiIn */
+/* -------------------------------------------- */
+
+Bool PushiIn(StackiHdle stkh, Int32 val)
+{
+ if (((*stkh)->top >= (*stkh)->size) && (! ExpandStack(stkh)))
+ return Faux;
+
+ (*stkh)->val[((*stkh)->top)++] = val;
+
+ return Vrai;
+
+} /* end of PushiIn */
+
+/* -------------------------------------------- */
+/* depilage(/lement) */
+/* */
+/* @function: PopiOut */
+/* -------------------------------------------- */
+
+Bool PopiOut(StackiHdle stkh, Int32 *val)
+{
+ if ((*stkh)->top <= 0)
+ return Faux;
+
+ *val = (*stkh)->val[--((*stkh)->top)];
+
+ if ( ((*stkh)->top < ((*stkh)->size >> 1))
+ && ((*stkh)->top > kMinStackiSize))
+
+ (void) ShrinkStack(stkh);
+
+ return Vrai;
+
+} /* end of PopiOut */
+
+/* -------------------------------------------- */
+/* lecture descendante */
+/* */
+/* @function: ReadiDown */
+/* -------------------------------------------- */
+
+Bool ReadiDown(StackiPtr stki, Int32 *val)
+{
+ if (stki->cursor <= 0)
+ return Faux;
+
+ *val = stki->val[--(stki->cursor)];
+
+ return Vrai;
+
+} /* end of ReadiDown */
+
+/* -------------------------------------------- */
+/* lecture ascendante */
+/* */
+/* @function: ReadiUp */
+/* -------------------------------------------- */
+
+Bool ReadiUp(StackiPtr stki, Int32 *val)
+{
+ if (stki->cursor >= stki->top)
+ return Faux;
+
+ *val = stki->val[(stki->cursor)++];
+
+ return Vrai;
+
+} /* end of ReadiUp */
+
+/* -------------------------------------------- */
+/* remontee/descente du curseur */
+/* */
+/* @function: CursiToTop */
+/* @function: CursiToBottom */
+/* -------------------------------------------- */
+
+void CursiToTop(StackiPtr stki)
+{
+ stki->cursor = stki->top;
+
+} /* end of CursiToTop */
+
+void CursiToBottom(stki)
+ StackiPtr stki;
+{
+ stki->cursor = 0;
+
+} /* end of CursiToBottom */
+
+/* -------------------------------------------- */
+/* echange des valeurs cursor <-> (top - 1) */
+/* */
+/* @function: CursiSwap */
+/* -------------------------------------------- */
+
+void CursiSwap(StackiPtr stki)
+{
+ Int32 tmp;
+
+ if ((stki->top <= 0) || (stki->cursor < 0))
+ return;
+
+ tmp = stki->val[stki->cursor];
+ stki->val[stki->cursor] = stki->val[stki->top - 1];
+ stki->val[stki->top - 1] = tmp;
+
+} /* end of CursiSwap */
+
+/* -------------------------------------------- */
+/* Recherche d'une valeur en stack a partir du */
+/* curseur courant en descendant. */
+/* on laisse le curseur a l'endroit trouve */
+/* */
+/* @function: SearchDownStacki */
+/* -------------------------------------------- */
+
+Bool SearchDownStacki(StackiPtr stki, Int32 sval)
+{
+ Int32 val;
+ Bool more;
+
+ while ((more = ReadiDown(stki, &val)))
+ if (val == sval)
+ break;
+
+ return more;
+
+} /* end of SearchDownStacki */
+
+/* -------------------------------------------- */
+/* Recherche dichotomique d'une valeur en stack */
+/* le stack est suppose trie par valeurs */
+/* croissantes. */
+/* on place le curseur a l'endroit trouve */
+/* */
+/* @function: BinSearchStacki */
+/* -------------------------------------------- */
+
+Bool BinSearchStacki(StackiPtr stki, Int32 sval)
+{
+ Int32 midd, low, high, span;
+
+ low = 0;
+ high = stki->top - 1;
+
+ while (high >= low) {
+
+ midd = (high + low) / 2;
+
+ span = stki->val[midd] - sval;
+
+ if (span == 0) {
+ stki->cursor = midd;
+ return Vrai;
+ }
+
+ if (span > 0)
+ high = midd - 1;
+ else
+ low = midd + 1;
+ }
+
+ return Faux;
+
+} /* end of BinSearchStacki */
+
+/* -------------------------------------------- */
+/* teste l'egalite *physique* de deux stacks */
+/* */
+/* @function: SameStacki */
+/* -------------------------------------------- */
+
+Bool SameStacki(StackiPtr stki1, StackiPtr stki2)
+{
+ if (stki1->top != stki2->top)
+ return Faux;
+
+ return ((memcmp(stki1->val, stki2->val,
+ stki1->top * sizeof(Int32)) == 0) ? Vrai : Faux);
+
+} /* end of SameStacki */
+
+
+/* -------------------------------------------- */
+/* inverse l'ordre des elements dans un stack */
+/* */
+/* @function: ReverseStacki */
+/* -------------------------------------------- */
+
+Bool ReverseStacki(StackiPtr stki)
+{
+ Int32 *t, *b, swp;
+
+ if (stki->top <= 0)
+ return Faux;
+
+ b = stki->val;
+ t = b + stki->top - 1;
+
+ while (t > b) {
+ swp = *t;
+ *t-- = *b;
+ *b++ = swp;
+ }
+
+ return Vrai;
+
+} /* end of ReverseStacki */
+
diff --git a/src/libapat/libstki.h b/src/libapat/libstki.h
new file mode 100644
index 0000000..6331ae7
--- /dev/null
+++ b/src/libapat/libstki.h
@@ -0,0 +1,87 @@
+/* ==================================================== */
+/* Copyright (c) Atelier de BioInformatique */
+/* Mar. 92 */
+/* File: libstki.h */
+/* Purpose: library of dynamic stacks holding */
+/* integer values */
+/* History: */
+/* 00/03/92 : <Gloup> first draft */
+/* 07/07/93 : <Gloup> complete revision */
+/* 10/03/94 : <Gloup> added xxxVector funcs */
+/* 14/05/99 : <Gloup> last revision */
+/* ==================================================== */
+
+#ifndef _H_Gtypes
+#include "Gtypes.h"
+#endif
+
+#define _H_libstki
+
+/* ==================================================== */
+/* Constantes de dimensionnement */
+/* ==================================================== */
+
+#ifndef kMinStackiSize
+#define kMinStackiSize 2 /* taille mini stack */
+#endif
+
+
+#define kStkiNoErr 0 /* ok */
+#define kStkiMemErr 1 /* not enough memory */
+
+#define kStkiReset Vrai
+#define kStkiGet Faux
+
+/* ==================================================== */
+/* Macros standards */
+/* ==================================================== */
+
+#ifndef NEW
+#define NEW(typ) (typ*)malloc(sizeof(typ))
+#define NEWN(typ, dim) (typ*)malloc((unsigned long)(dim) * sizeof(typ))
+#define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (unsigned long)(dim) * sizeof(typ))
+#define FREE(ptr) free((Ptr) ptr)
+#endif
+
+
+/* ==================================================== */
+/* Types & Structures de donnees */
+/* ==================================================== */
+
+ /* -------------------- */
+ /* structure : pile */
+ /* -------------------- */
+typedef struct Stacki {
+ /* ---------------------*/
+ Int32 size; /* stack size */
+ Int32 top; /* current free pos. */
+ Int32 cursor; /* current cursor */
+ Int32 *val; /* values */
+ /* ---------------------*/
+} Stacki, *StackiPtr, **StackiHdle;
+
+
+
+/* ==================================================== */
+/* Prototypes (generated by mproto) */
+/* ==================================================== */
+
+ /* libstki.c */
+
+Int16 StkiError (Bool reset );
+StackiPtr NewStacki (Int32 size );
+StackiPtr FreeStacki (StackiPtr stki );
+StackiHdle NewStackiVector (Int32 vectSize, Int32 stackSize );
+StackiHdle FreeStackiVector (StackiHdle stkh, Int32 vectSize );
+Int32 ResizeStacki (StackiHdle stkh , Int32 size );
+Bool PushiIn (StackiHdle stkh , Int32 val );
+Bool PopiOut (StackiHdle stkh , Int32 *val );
+Bool ReadiDown (StackiPtr stki , Int32 *val );
+Bool ReadiUp (StackiPtr stki , Int32 *val );
+void CursiToTop (StackiPtr stki );
+void CursiToBottom (StackiPtr stki );
+void CursiSwap (StackiPtr stki );
+Bool SearchDownStacki (StackiPtr stki , Int32 sval );
+Bool BinSearchStacki (StackiPtr stki , Int32 sval );
+Bool SameStacki (StackiPtr stki1 , StackiPtr stki2 );
+Bool ReverseStacki (StackiPtr stki );
diff --git a/src/libecoPCR/Makefile b/src/libecoPCR/Makefile
new file mode 100644
index 0000000..08f3745
--- /dev/null
+++ b/src/libecoPCR/Makefile
@@ -0,0 +1,31 @@
+
+SOURCES = ecoapat.c \
+ ecodna.c \
+ ecoError.c \
+ ecoIOUtils.c \
+ ecoMalloc.c \
+ ecorank.c \
+ ecoseq.c \
+ ecotax.c \
+ ecofilter.c \
+ econame.c
+
+SRCS=$(SOURCES)
+
+OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
+
+LIBFILE= libecoPCR.a
+RANLIB= ranlib
+
+
+include ../global.mk
+
+
+all: $(LIBFILE)
+
+clean:
+ rm -rf $(OBJECTS) $(LIBFILE)
+
+$(LIBFILE): $(OBJECTS)
+ ar -cr $@ $?
+ $(RANLIB) $@
diff --git a/src/libecoPCR/ecoError.c b/src/libecoPCR/ecoError.c
new file mode 100644
index 0000000..00bbfa2
--- /dev/null
+++ b/src/libecoPCR/ecoError.c
@@ -0,0 +1,26 @@
+#include "ecoPCR.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * print the message given as argument and exit the program
+ * @param error error number
+ * @param message the text explaining what's going on
+ * @param filename the file source where the program failed
+ * @param linenumber the line where it has failed
+ * filename and linenumber are written at pre-processing
+ * time by a macro
+ */
+void ecoError(int32_t error,
+ const char* message,
+ const char * filename,
+ int linenumber)
+{
+ fprintf(stderr,"Error %d in file %s line %d : %s\n",
+ error,
+ filename,
+ linenumber,
+ message);
+
+ abort();
+}
diff --git a/src/libecoPCR/ecoIOUtils.c b/src/libecoPCR/ecoIOUtils.c
new file mode 100644
index 0000000..8d7ce82
--- /dev/null
+++ b/src/libecoPCR/ecoIOUtils.c
@@ -0,0 +1,122 @@
+#include "ecoPCR.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#define SWAPINT32(x) ((((x) << 24) & 0xFF000000) | (((x) << 8) & 0xFF0000) | \
+ (((x) >> 8) & 0xFF00) | (((x) >> 24) & 0xFF))
+
+
+int32_t is_big_endian()
+{
+ int32_t i=1;
+
+ return (int32_t)((char*)&i)[0];
+}
+
+
+
+
+int32_t swap_int32_t(int32_t i)
+{
+ return SWAPINT32(i);
+}
+
+
+/**
+ * Read part of the file
+ * @param *f the database
+ * @param recordSize the size to be read
+ *
+ * @return buffer
+ */
+void *read_ecorecord(FILE *f,int32_t *recordSize)
+{
+ static void *buffer =NULL;
+ int32_t buffersize=0;
+ int32_t read;
+
+ if (!recordSize)
+ ECOERROR(ECO_ASSERT_ERROR,
+ "recordSize cannot be NULL");
+
+ read = fread(recordSize,
+ 1,
+ sizeof(int32_t),
+ f);
+
+ if (feof(f))
+ return NULL;
+
+ if (read != sizeof(int32_t))
+ ECOERROR(ECO_IO_ERROR,"Reading record size error");
+
+ if (is_big_endian())
+ *recordSize=swap_int32_t(*recordSize);
+
+ if (buffersize < *recordSize)
+ {
+ if (buffer)
+ buffer = ECOREALLOC(buffer,*recordSize,
+ "Increase size of record buffer");
+ else
+ buffer = ECOMALLOC(*recordSize,
+ "Allocate record buffer");
+ }
+
+ read = fread(buffer,
+ 1,
+ *recordSize,
+ f);
+
+ if (read != *recordSize)
+ ECOERROR(ECO_IO_ERROR,"Reading record data error");
+
+ return buffer;
+};
+
+
+
+
+
+/**
+ * Open the database and check it's readable
+ * @param filename name of the database (.sdx, .rdx, .tbx)
+ * @param sequencecount buffer - pointer to variable storing the number of occurence
+ * @param abort_on_open_error boolean to define the behaviour in case of error
+ * while opening the database
+ * @return FILE type
+ **/
+FILE *open_ecorecorddb(const char *filename,
+ int32_t *sequencecount,
+ int32_t abort_on_open_error)
+{
+ FILE *f;
+ int32_t read;
+
+ f = fopen(filename,"rb");
+
+ if (!f)
+ {
+ if (abort_on_open_error)
+ ECOERROR(ECO_IO_ERROR,"Cannot open file");
+ else
+ {
+ *sequencecount=0;
+ return NULL;
+ }
+ }
+
+ read = fread(sequencecount,
+ 1,
+ sizeof(int32_t),
+ f);
+
+ if (read != sizeof(int32_t))
+ ECOERROR(ECO_IO_ERROR,"Reading record size error");
+
+ if (is_big_endian())
+ *sequencecount=swap_int32_t(*sequencecount);
+
+ return f;
+}
+
diff --git a/src/libecoPCR/ecoMalloc.c b/src/libecoPCR/ecoMalloc.c
new file mode 100644
index 0000000..0ea8d3b
--- /dev/null
+++ b/src/libecoPCR/ecoMalloc.c
@@ -0,0 +1,79 @@
+#include "ecoPCR.h"
+#include <stdlib.h>
+
+static int eco_log_malloc = 0;
+
+void eco_trace_memory_allocation()
+{
+ eco_log_malloc=1;
+}
+
+void eco_untrace_memory_allocation()
+{
+ eco_log_malloc=0;
+}
+
+
+void *eco_malloc(int32_t chunksize,
+ const char *error_message,
+ const char *filename,
+ int32_t line)
+{
+ void * chunk;
+
+ chunk = calloc(1,chunksize);
+
+ if (!chunk)
+ ecoError(ECO_MEM_ERROR,error_message,filename,line);
+
+ if (eco_log_malloc)
+ fprintf(stderr,
+ "Memory segment located at %p of size %d is allocated (file : %s [%d])",
+ chunk,
+ chunksize,
+ filename,
+ line);
+
+ return chunk;
+}
+
+void *eco_realloc(void *chunk,
+ int32_t newsize,
+ const char *error_message,
+ const char *filename,
+ int32_t line)
+{
+ void *newchunk;
+
+ newchunk = realloc(chunk,newsize);
+
+ if (!newchunk)
+ ecoError(ECO_MEM_ERROR,error_message,filename,line);
+
+ if (eco_log_malloc)
+ fprintf(stderr,
+ "Old memory segment %p is reallocated at %p with a size of %d (file : %s [%d])",
+ chunk,
+ newchunk,
+ newsize,
+ filename,
+ line);
+
+ return newchunk;
+}
+
+void eco_free(void *chunk,
+ const char *error_message,
+ const char *filename,
+ int32_t line)
+{
+ free(chunk);
+
+ if (eco_log_malloc)
+ fprintf(stderr,
+ "Memory segment %p is released => %s (file : %s [%d])",
+ chunk,
+ error_message,
+ filename,
+ line);
+}
diff --git a/src/libecoPCR/ecoPCR.h b/src/libecoPCR/ecoPCR.h
new file mode 100644
index 0000000..c5e2c69
--- /dev/null
+++ b/src/libecoPCR/ecoPCR.h
@@ -0,0 +1,269 @@
+#ifndef ECOPCR_H_
+#define ECOPCR_H_
+
+#include <stdio.h>
+#include <inttypes.h>
+
+#ifndef H_apat
+#include "../libapat/apat.h"
+#endif
+
+/*****************************************************
+ *
+ * Data type declarations
+ *
+ *****************************************************/
+
+/*
+ *
+ * Sequence types
+ *
+ */
+
+typedef struct {
+
+ int32_t taxid;
+ char AC[20];
+ int32_t DE_length;
+ int32_t SQ_length;
+ int32_t CSQ_length;
+
+ char data[1];
+
+} ecoseqformat_t;
+
+typedef struct {
+ int32_t taxid;
+ int32_t SQ_length;
+ char *AC;
+ char *DE;
+ char *SQ;
+} ecoseq_t;
+
+/*
+ *
+ * Taxonomy taxon types
+ *
+ */
+
+
+typedef struct {
+ int32_t taxid;
+ int32_t rank;
+ int32_t parent;
+ int32_t namelength;
+ char name[1];
+
+} ecotxformat_t;
+
+typedef struct ecotxnode {
+ int32_t taxid;
+ int32_t rank;
+ struct ecotxnode *parent;
+ char *name;
+} ecotx_t;
+
+typedef struct {
+ int32_t count;
+ ecotx_t taxon[1];
+} ecotxidx_t;
+
+
+/*
+ *
+ * Taxonomy rank types
+ *
+ */
+
+typedef struct {
+ int32_t count;
+ char* label[1];
+} ecorankidx_t;
+
+/*
+ *
+ * Taxonomy name types
+ *
+ */
+
+typedef struct {
+ int32_t is_scientificname;
+ int32_t namelength;
+ int32_t classlength;
+ int32_t taxid;
+ char names[1];
+} econameformat_t;
+
+
+ typedef struct {
+ char *name;
+ char *classname;
+ int32_t is_scientificname;
+ struct ecotxnode *taxon;
+} econame_t;
+
+
+typedef struct {
+ int32_t count;
+ econame_t names[1];
+} econameidx_t;
+
+
+ typedef struct {
+ ecorankidx_t *ranks;
+ econameidx_t *names;
+ ecotxidx_t *taxons;
+} ecotaxonomy_t;
+
+
+/*****************************************************
+ *
+ * Function declarations
+ *
+ *****************************************************/
+
+/*
+ *
+ * Low level system functions
+ *
+ */
+
+int32_t is_big_endian();
+int32_t swap_int32_t(int32_t);
+
+void *eco_malloc(int32_t chunksize,
+ const char *error_message,
+ const char *filename,
+ int32_t line);
+
+
+void *eco_realloc(void *chunk,
+ int32_t chunksize,
+ const char *error_message,
+ const char *filename,
+ int32_t line);
+
+void eco_free(void *chunk,
+ const char *error_message,
+ const char *filename,
+ int32_t line);
+
+void eco_trace_memory_allocation();
+void eco_untrace_memory_allocation();
+
+#define ECOMALLOC(size,error_message) \
+ eco_malloc((size),(error_message),__FILE__,__LINE__)
+
+#define ECOREALLOC(chunk,size,error_message) \
+ eco_realloc((chunk),(size),(error_message),__FILE__,__LINE__)
+
+#define ECOFREE(chunk,error_message) \
+ eco_free((chunk),(error_message),__FILE__,__LINE__)
+
+
+
+
+/*
+ *
+ * Error managment
+ *
+ */
+
+
+void ecoError(int32_t,const char*,const char *,int);
+
+#define ECOERROR(code,message) ecoError((code),(message),__FILE__,__LINE__)
+
+#define ECO_IO_ERROR (1)
+#define ECO_MEM_ERROR (2)
+#define ECO_ASSERT_ERROR (3)
+#define ECO_NOTFOUND_ERROR (4)
+
+
+/*
+ *
+ * Low level Disk access functions
+ *
+ */
+
+FILE *open_ecorecorddb(const char *filename,
+ int32_t *sequencecount,
+ int32_t abort_on_open_error);
+
+void *read_ecorecord(FILE *,int32_t *recordSize);
+
+
+
+/*
+ * Read function in internal binary format
+ */
+
+FILE *open_ecoseqdb(const char *filename,
+ int32_t *sequencecount);
+
+ecoseq_t *readnext_ecoseq(FILE *);
+
+ecorankidx_t *read_rankidx(const char *filename);
+
+econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy);
+
+
+
+ /**
+ * Read taxonomy data as formated by the ecoPCRFormat.py script.
+ *
+ * This function is normaly uses internaly by the read_taxonomy
+ * function and should not be called directly.
+ *
+ * @arg filename path to the *.tdx file of the reformated db
+ *
+ * @return pointer to a taxonomy index structure
+ */
+
+ecotxidx_t *read_taxonomyidx(const char *filename,const char *filename2);
+
+ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName);
+
+ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy, int32_t taxid);
+
+int eco_isundertaxon(ecotx_t *taxon, int other_taxid);
+
+ecoseq_t *ecoseq_iterator(const char *prefix);
+
+
+
+ecoseq_t *new_ecoseq();
+int32_t delete_ecoseq(ecoseq_t *);
+ecoseq_t *new_ecoseq_with_data( char *AC,
+ char *DE,
+ char *SQ,
+ int32_t taxid
+ );
+
+
+int32_t delete_taxon(ecotx_t *taxon);
+int32_t delete_taxonomy(ecotxidx_t *index);
+
+
+int32_t rank_index(const char* label,ecorankidx_t* ranks);
+
+int32_t delete_apatseq(SeqPtr pseq);
+PatternPtr buildPattern(const char *pat, int32_t error_max);
+PatternPtr complementPattern(PatternPtr pat);
+
+SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular);
+
+char *ecoComplementPattern(char *nucAcSeq);
+char *ecoComplementSequence(char *nucAcSeq);
+char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end);
+
+ecotx_t *eco_getspecies(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getgenus(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getfamily(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+
+int eco_is_taxid_ignored(int32_t *ignored_taxid, int32_t tab_len, int32_t taxid);
+int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int32_t *included_taxid, int32_t tab_len, int32_t taxid);
+
+#endif /*ECOPCR_H_*/
diff --git a/src/libecoPCR/ecoapat.c b/src/libecoPCR/ecoapat.c
new file mode 100644
index 0000000..79a722e
--- /dev/null
+++ b/src/libecoPCR/ecoapat.c
@@ -0,0 +1,199 @@
+#include "../libapat/libstki.h"
+#include "../libapat/apat.h"
+
+#include "ecoPCR.h"
+
+#include <string.h>
+
+static void EncodeSequence(SeqPtr seq);
+static void UpperSequence(char *seq);
+
+/* -------------------------------------------- */
+/* uppercase sequence */
+/* -------------------------------------------- */
+
+#define IS_LOWER(c) (((c) >= 'a') && ((c) <= 'z'))
+#define TO_UPPER(c) ((c) - 'a' + 'A')
+
+void UpperSequence(char *seq)
+{
+ char *cseq;
+
+ for (cseq = seq ; *cseq ; cseq++)
+ if (IS_LOWER(*cseq))
+ *cseq = TO_UPPER(*cseq);
+}
+
+#undef IS_LOWER
+#undef TO_UPPER
+
+
+
+
+/* -------------------------------------------- */
+/* encode sequence */
+/* IS_UPPER is slightly faster than isupper */
+/* -------------------------------------------- */
+
+#define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'Z'))
+
+
+
+void EncodeSequence(SeqPtr seq)
+{
+ int i;
+ UInt8 *data;
+ char *cseq;
+
+ data = seq->data;
+ cseq = seq->cseq;
+
+ while (*cseq) {
+
+ *data = (IS_UPPER(*cseq) ? *cseq - 'A' : 0x0);
+ data++;
+ cseq++;
+ }
+
+ for (i=0,cseq=seq->cseq;i < seq->circular; i++,cseq++,data++)
+ *data = (IS_UPPER(*cseq) ? *cseq - 'A' : 0x0);
+
+ for (i = 0 ; i < MAX_PATTERN ; i++)
+ seq->hitpos[i]->top = seq->hiterr[i]->top = 0;
+
+}
+
+#undef IS_UPPER
+
+
+SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular)
+{
+ int i;
+
+ if (!out)
+ {
+ out = ECOMALLOC(sizeof(Seq),
+ "Error in Allocation of a new Seq structure");
+
+ for (i = 0 ; i < MAX_PATTERN ; i++)
+ {
+
+ if (! (out->hitpos[i] = NewStacki(kMinStackiSize)))
+ ECOERROR(ECO_MEM_ERROR,"Error in hit stack Allocation");
+
+ if (! (out->hiterr[i] = NewStacki(kMinStackiSize)))
+ ECOERROR(ECO_MEM_ERROR,"Error in error stack Allocation");
+ }
+ }
+
+
+ out->name = in->AC;
+ out->seqsiz = out->seqlen = in->SQ_length;
+ out->circular = circular;
+
+ if (!out->data)
+ {
+ out->data = ECOMALLOC((out->seqlen+circular) *sizeof(UInt8),
+ "Error in Allocation of a new Seq data member");
+ out->datsiz= out->seqlen+circular;
+ }
+ else if ((out->seqlen +circular) >= out->datsiz)
+ {
+ out->data = ECOREALLOC(out->data,(out->seqlen+circular),
+ "Error during Seq data buffer realloc");
+ out->datsiz= out->seqlen+circular;
+ }
+
+ out->cseq = in->SQ;
+
+ EncodeSequence(out);
+
+ return out;
+}
+
+int32_t delete_apatseq(SeqPtr pseq)
+{
+ int i;
+
+ if (pseq) {
+
+ if (pseq->data)
+ ECOFREE(pseq->data,"Freeing sequence data buffer");
+
+ for (i = 0 ; i < MAX_PATTERN ; i++) {
+ if (pseq->hitpos[i]) FreeStacki(pseq->hitpos[i]);
+ if (pseq->hiterr[i]) FreeStacki(pseq->hiterr[i]);
+ }
+
+ ECOFREE(pseq,"Freeing apat sequence structure");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+PatternPtr buildPattern(const char *pat, int32_t error_max)
+{
+ PatternPtr pattern;
+ int32_t patlen;
+
+ pattern = ECOMALLOC(sizeof(Pattern),
+ "Error in pattern allocation");
+
+ pattern->ok = Vrai;
+ pattern->hasIndel= Faux;
+ pattern->maxerr = error_max;
+ patlen = strlen(pat);
+
+ pattern->cpat = ECOMALLOC(sizeof(char)*patlen+1,
+ "Error in sequence pattern allocation");
+
+ strncpy(pattern->cpat,pat,patlen);
+ pattern->cpat[patlen]=0;
+ UpperSequence(pattern->cpat);
+
+ if (!CheckPattern(pattern))
+ ECOERROR(ECO_ASSERT_ERROR,"Error in pattern checking");
+
+ if (! EncodePattern(pattern, dna))
+ ECOERROR(ECO_ASSERT_ERROR,"Error in pattern encoding");
+
+ if (! CreateS(pattern, ALPHA_LEN))
+ ECOERROR(ECO_ASSERT_ERROR,"Error in pattern compiling");
+
+ return pattern;
+
+}
+
+PatternPtr complementPattern(PatternPtr pat)
+{
+ PatternPtr pattern;
+
+ pattern = ECOMALLOC(sizeof(Pattern),
+ "Error in pattern allocation");
+
+ pattern->ok = Vrai;
+ pattern->hasIndel= pat->hasIndel;
+ pattern->maxerr = pat->maxerr;
+ pattern->patlen = pat->patlen;
+
+ pattern->cpat = ECOMALLOC(sizeof(char)*(strlen(pat->cpat)+1),
+ "Error in sequence pattern allocation");
+
+ strcpy(pattern->cpat,pat->cpat);
+
+ ecoComplementPattern(pattern->cpat);
+
+ if (!CheckPattern(pattern))
+ ECOERROR(ECO_ASSERT_ERROR,"Error in pattern checking");
+
+ if (! EncodePattern(pattern, dna))
+ ECOERROR(ECO_ASSERT_ERROR,"Error in pattern encoding");
+
+ if (! CreateS(pattern, ALPHA_LEN))
+ ECOERROR(ECO_ASSERT_ERROR,"Error in pattern compiling");
+
+ return pattern;
+
+}
diff --git a/src/libecoPCR/ecodna.c b/src/libecoPCR/ecodna.c
new file mode 100644
index 0000000..86d2012
--- /dev/null
+++ b/src/libecoPCR/ecodna.c
@@ -0,0 +1,156 @@
+#include <string.h>
+#include "ecoPCR.h"
+
+/*
+ * @doc: DNA alphabet (IUPAC)
+ */
+#define LX_BIO_DNA_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]"
+
+/*
+ * @doc: complementary DNA alphabet (IUPAC)
+ */
+#define LX_BIO_CDNA_ALPHA "TVGHEFCDIJMLKNOPQYSAABWXRZ#!]["
+
+
+static char sNuc[] = LX_BIO_DNA_ALPHA;
+static char sAnuc[] = LX_BIO_CDNA_ALPHA;
+
+static char LXBioBaseComplement(char nucAc);
+static char *LXBioSeqComplement(char *nucAcSeq);
+static char *reverseSequence(char *str,char isPattern);
+
+
+/* ---------------------------- */
+
+char LXBioBaseComplement(char nucAc)
+{
+ char *c;
+
+ if ((c = strchr(sNuc, nucAc)))
+ return sAnuc[(c - sNuc)];
+ else
+ return nucAc;
+}
+
+/* ---------------------------- */
+
+char *LXBioSeqComplement(char *nucAcSeq)
+{
+ char *s;
+
+ for (s = nucAcSeq ; *s ; s++)
+ *s = LXBioBaseComplement(*s);
+
+ return nucAcSeq;
+}
+
+
+char *reverseSequence(char *str,char isPattern)
+{
+ char *sb, *se, c;
+
+ if (! str)
+ return str;
+
+ sb = str;
+ se = str + strlen(str) - 1;
+
+ while(sb <= se) {
+ c = *sb;
+ *sb++ = *se;
+ *se-- = c;
+ }
+
+ sb = str;
+ se = str + strlen(str) - 1;
+
+ if (isPattern)
+ for (;sb < se; sb++)
+ {
+ if (*sb=='#')
+ {
+ if (((se - sb) > 2) && (*(sb+2)=='!'))
+ {
+ *sb='!';
+ sb+=2;
+ *sb='#';
+ }
+ else
+ {
+ *sb=*(sb+1);
+ sb++;
+ *sb='#';
+ }
+ }
+ else if (*sb=='!')
+ {
+ *sb=*(sb-1);
+ *(sb-1)='!';
+ }
+ }
+
+ return str;
+}
+
+char *ecoComplementPattern(char *nucAcSeq)
+{
+ return reverseSequence(LXBioSeqComplement(nucAcSeq),1);
+}
+
+char *ecoComplementSequence(char *nucAcSeq)
+{
+ return reverseSequence(LXBioSeqComplement(nucAcSeq),0);
+}
+
+
+char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end)
+/*
+ extract subsequence from nucAcSeq [begin,end[
+*/
+{
+ static char *buffer = NULL;
+ static int32_t buffSize= 0;
+ int32_t length;
+
+ if (begin < end)
+ {
+ length = end - begin;
+
+ if (length >= buffSize)
+ {
+ buffSize = length+1;
+ if (buffer)
+ buffer=ECOREALLOC(buffer,buffSize,
+ "Error in reallocating sub sequence buffer");
+ else
+ buffer=ECOMALLOC(buffSize,
+ "Error in allocating sub sequence buffer");
+
+ }
+
+ strncpy(buffer,nucAcSeq + begin,length);
+ buffer[length]=0;
+ }
+ else
+ {
+ length = end + strlen(nucAcSeq) - begin;
+
+ if (length >= buffSize)
+ {
+ buffSize = length+1;
+ if (buffer)
+ buffer=ECOREALLOC(buffer,buffSize,
+ "Error in reallocating sub sequence buffer");
+ else
+ buffer=ECOMALLOC(buffSize,
+ "Error in allocating sub sequence buffer");
+
+ }
+ strncpy(buffer,nucAcSeq+begin,length - end);
+ strncpy(buffer+(length-end),nucAcSeq ,end);
+ buffer[length]=0;
+ }
+
+ return buffer;
+}
+
diff --git a/src/libecoPCR/ecofilter.c b/src/libecoPCR/ecofilter.c
new file mode 100644
index 0000000..64276c0
--- /dev/null
+++ b/src/libecoPCR/ecofilter.c
@@ -0,0 +1,20 @@
+#include "ecoPCR.h"
+
+int eco_is_taxid_included( ecotaxonomy_t *taxonomy,
+ int32_t *restricted_taxid,
+ int32_t tab_len,
+ int32_t taxid)
+{
+ int i;
+ ecotx_t *taxon;
+
+ taxon = eco_findtaxonbytaxid(taxonomy, taxid);
+
+ if (taxon)
+ for (i=0; i < tab_len; i++)
+ if ( (taxon->taxid == restricted_taxid[i]) ||
+ (eco_isundertaxon(taxon, restricted_taxid[i])) )
+ return 1;
+
+ return 0;
+}
diff --git a/src/libecoPCR/econame.c b/src/libecoPCR/econame.c
new file mode 100644
index 0000000..835d79c
--- /dev/null
+++ b/src/libecoPCR/econame.c
@@ -0,0 +1,61 @@
+#include "ecoPCR.h"
+#include <string.h>
+#include <stdlib.h>
+
+static econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy);
+
+econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy)
+{
+
+ int32_t count;
+ FILE *f;
+ econameidx_t *indexname;
+ int32_t i;
+
+ f = open_ecorecorddb(filename,&count,1);
+
+ indexname = (econameidx_t*) ECOMALLOC(sizeof(econameidx_t) + sizeof(econame_t) * (count-1),"Allocate names");
+
+ indexname->count=count;
+
+ for (i=0; i < count; i++){
+ readnext_econame(f,(indexname->names)+i,taxonomy);
+ }
+
+ return indexname;
+}
+
+econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy)
+{
+
+ econameformat_t *raw;
+ int32_t rs;
+
+ raw = read_ecorecord(f,&rs);
+
+ if (!raw)
+ return NULL;
+
+ if (is_big_endian())
+ {
+ raw->is_scientificname = swap_int32_t(raw->is_scientificname);
+ raw->namelength = swap_int32_t(raw->namelength);
+ raw->classlength = swap_int32_t(raw->classlength);
+ raw->taxid = swap_int32_t(raw->taxid);
+ }
+
+ name->is_scientificname=raw->is_scientificname;
+
+ name->name = ECOMALLOC((raw->namelength+1) * sizeof(char),"Allocate name");
+ strncpy(name->name,raw->names,raw->namelength);
+ name->name[raw->namelength]=0;
+
+ name->classname = ECOMALLOC((raw->classlength+1) * sizeof(char),"Allocate classname");
+ strncpy(name->classname,(raw->names+raw->namelength),raw->classlength);
+ name->classname[raw->classlength]=0;
+
+ name->taxon = taxonomy->taxons->taxon + raw->taxid;
+
+ return name;
+}
+
diff --git a/src/libecoPCR/ecorank.c b/src/libecoPCR/ecorank.c
new file mode 100644
index 0000000..4796088
--- /dev/null
+++ b/src/libecoPCR/ecorank.c
@@ -0,0 +1,52 @@
+#include "ecoPCR.h"
+#include <string.h>
+#include <stdlib.h>
+
+static int compareRankLabel(const void *label1, const void *label2);
+
+ecorankidx_t *read_rankidx(const char *filename)
+{
+ int32_t count;
+ FILE *f;
+ ecorankidx_t *index;
+ int32_t i;
+ int32_t rs;
+ char *buffer;
+
+ f = open_ecorecorddb(filename,&count,1);
+
+ index = (ecorankidx_t*) ECOMALLOC(sizeof(ecorankidx_t) + sizeof(char*) * (count-1),
+ "Allocate rank index");
+
+ index->count=count;
+
+ for (i=0; i < count; i++)
+ {
+ buffer = read_ecorecord(f,&rs);
+ index->label[i]=(char*) ECOMALLOC(rs+1,
+ "Allocate rank label");
+ strncpy(index->label[i],buffer,rs);
+ }
+
+ return index;
+}
+
+int32_t rank_index(const char* label,ecorankidx_t* ranks)
+{
+ char **rep;
+
+ rep = bsearch(label,ranks->label,ranks->count,sizeof(char*),compareRankLabel);
+
+ if (rep)
+ return rep-ranks->label;
+ else
+ ECOERROR(ECO_NOTFOUND_ERROR,"Rank label not found");
+
+ return -1;
+}
+
+
+int compareRankLabel(const void *label1, const void *label2)
+{
+ return strcmp((const char*)label1,*(const char**)label2);
+}
diff --git a/src/libecoPCR/ecoseq.c b/src/libecoPCR/ecoseq.c
new file mode 100644
index 0000000..fbea3e5
--- /dev/null
+++ b/src/libecoPCR/ecoseq.c
@@ -0,0 +1,230 @@
+#include "ecoPCR.h"
+#include <stdlib.h>
+#include <string.h>
+#include <zlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+
+static FILE *open_seqfile(const char *prefix,int32_t index);
+
+
+ecoseq_t *new_ecoseq()
+{
+ void *tmp;
+
+ tmp = ECOMALLOC(sizeof(ecoseq_t),"Allocate new ecoseq structure");
+
+ return tmp;
+}
+
+int32_t delete_ecoseq(ecoseq_t * seq)
+{
+
+ if (seq)
+ {
+ if (seq->AC)
+ ECOFREE(seq->AC,"Free sequence AC");
+
+ if (seq->DE)
+ ECOFREE(seq->DE,"Free sequence DE");
+
+ if (seq->SQ)
+ ECOFREE(seq->SQ,"Free sequence SQ");
+
+ ECOFREE(seq,"Free sequence structure");
+
+ return 0;
+
+ }
+
+ return 1;
+}
+
+ecoseq_t *new_ecoseq_with_data( char *AC,
+ char *DE,
+ char *SQ,
+ int32_t taxid_idx
+ )
+{
+ ecoseq_t *tmp;
+ int32_t lstr;
+ tmp = new_ecoseq();
+
+ tmp->taxid=taxid_idx;
+
+ if (AC)
+ {
+ lstr =strlen(AC);
+ tmp->AC=ECOMALLOC((lstr+1) * sizeof(char),
+ "Allocate sequence accession");
+ strcpy(tmp->AC,AC);
+ }
+
+ if (DE)
+ {
+ lstr =strlen(DE);
+ tmp->DE=ECOMALLOC((lstr+1) * sizeof(char),
+ "Allocate sequence definition");
+ strcpy(tmp->DE,DE);
+ }
+
+ if (SQ)
+ {
+ lstr =strlen(SQ);
+ tmp->SQ=ECOMALLOC((lstr+1) * sizeof(char),
+ "Allocate sequence data");
+ strcpy(tmp->SQ,SQ);
+ }
+ return tmp;
+
+}
+
+/**
+ * ?? used ??
+ **/
+FILE *open_ecoseqdb(const char *filename,
+ int32_t *sequencecount)
+{
+ return open_ecorecorddb(filename,sequencecount,1);
+}
+
+ecoseq_t *readnext_ecoseq(FILE *f)
+{
+ char *compressed=NULL;
+
+ ecoseqformat_t *raw;
+ ecoseq_t *seq;
+ int32_t comp_status;
+ unsigned long int seqlength;
+ int32_t rs;
+ char *c;
+ int32_t i;
+
+ raw = read_ecorecord(f,&rs);
+
+ if (!raw)
+ return NULL;
+
+ if (is_big_endian())
+ {
+ raw->CSQ_length = swap_int32_t(raw->CSQ_length);
+ raw->DE_length = swap_int32_t(raw->DE_length);
+ raw->SQ_length = swap_int32_t(raw->SQ_length);
+ raw->taxid = swap_int32_t(raw->taxid);
+ }
+
+ seq = new_ecoseq();
+
+ seq->taxid = raw->taxid;
+
+ seq->AC = ECOMALLOC(strlen(raw->AC) +1,
+ "Allocate Sequence Accesion number");
+ strncpy(seq->AC,raw->AC,strlen(raw->AC));
+
+
+ seq->DE = ECOMALLOC(raw->DE_length+1,
+ "Allocate Sequence definition");
+ strncpy(seq->DE,raw->data,raw->DE_length);
+
+ seqlength = seq->SQ_length = raw->SQ_length;
+
+ compressed = raw->data + raw->DE_length;
+
+ seq->SQ = ECOMALLOC(seqlength+1,
+ "Allocate sequence buffer");
+
+ comp_status = uncompress((unsigned char*)seq->SQ,
+ &seqlength,
+ (unsigned char*)compressed,
+ raw->CSQ_length);
+
+ if (comp_status != Z_OK)
+ ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data");
+
+ for (c=seq->SQ,i=0;i<seqlength;c++,i++)
+ *c=toupper(*c);
+
+
+ return seq;
+}
+
+/**
+ * Open the sequences database (.sdx file)
+ * @param prefix name of the database (radical without extension)
+ * @param index integer
+ *
+ * @return file object
+ */
+FILE *open_seqfile(const char *prefix,int32_t index)
+{
+ char filename_buffer[1024];
+ int32_t filename_length;
+ FILE *input;
+ int32_t seqcount;
+
+ filename_length = snprintf(filename_buffer,
+ 1023,
+ "%s_%03d.sdx",
+ prefix,
+ index);
+
+ // fprintf(stderr,"# Coucou %s\n",filename_buffer);
+
+
+ if (filename_length >= 1024)
+ ECOERROR(ECO_ASSERT_ERROR,"file name is too long");
+
+ filename_buffer[filename_length]=0;
+
+ input=open_ecorecorddb(filename_buffer,&seqcount,0);
+
+ if (input)
+ fprintf(stderr,"# Reading file %s containing %d sequences...\n",
+ filename_buffer,
+ seqcount);
+
+ return input;
+}
+
+ecoseq_t *ecoseq_iterator(const char *prefix)
+{
+ static FILE *current_seq_file= NULL;
+ static int32_t current_file_idx = 1;
+ static char current_prefix[1024];
+ ecoseq_t *seq;
+
+ if (prefix)
+ {
+ current_file_idx = 1;
+
+ if (current_seq_file)
+ fclose(current_seq_file);
+
+ strncpy(current_prefix,prefix,1023);
+ current_prefix[1024]=0;
+
+ current_seq_file = open_seqfile(current_prefix,
+ current_file_idx);
+
+ if (!current_seq_file)
+ return NULL;
+
+ }
+
+ seq = readnext_ecoseq(current_seq_file);
+
+ if (!seq && feof(current_seq_file))
+ {
+ current_file_idx++;
+ fclose(current_seq_file);
+ current_seq_file = open_seqfile(current_prefix,
+ current_file_idx);
+
+
+ if (current_seq_file)
+ seq = readnext_ecoseq(current_seq_file);
+ }
+
+ return seq;
+}
diff --git a/src/libecoPCR/ecotax.c b/src/libecoPCR/ecotax.c
new file mode 100644
index 0000000..77a46b2
--- /dev/null
+++ b/src/libecoPCR/ecotax.c
@@ -0,0 +1,351 @@
+#include "ecoPCR.h"
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+static ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon);
+
+ /**
+ * Open the taxonomy database
+ * @param pointer to the database (.tdx file)
+ * @return a ecotxidx_t structure
+ */
+ecotxidx_t *read_taxonomyidx(const char *filename,const char *filename2)
+{
+ int32_t count;
+ int32_t count2;
+ FILE *f;
+ FILE *f2;
+ ecotxidx_t *index;
+ int32_t i;
+
+ f = open_ecorecorddb(filename,&count,1);
+ f2 = open_ecorecorddb(filename2,&count2,0);
+
+
+
+ index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count+count2-1),
+ "Allocate taxonomy");
+
+ index->count=count+count2;
+
+ fprintf(stderr,"Reading %d taxa...\n",count);
+ for (i=0; i < count; i++){
+ readnext_ecotaxon(f,&(index->taxon[i]));
+ index->taxon[i].parent=index->taxon + (int32_t)index->taxon[i].parent;
+ }
+
+ if (count2>0)
+ fprintf(stderr,"Reading %d local taxa...\n",count2);
+ else
+ fprintf(stderr,"No local taxon\n");
+
+ for (i=0; i < count2; i++){
+ readnext_ecotaxon(f2,&(index->taxon[count+i]));
+ index->taxon[count+i].parent=index->taxon + (int32_t)index->taxon[count+i].parent;
+ }
+
+ return index;
+}
+
+
+int32_t delete_taxonomy(ecotxidx_t *index)
+{
+ int32_t i;
+
+ if (index)
+ {
+ for (i=0; i< index->count; i++)
+ if (index->taxon[i].name)
+ ECOFREE(index->taxon[i].name,"Free scientific name");
+
+ ECOFREE(index,"Free Taxonomy");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+
+
+int32_t delete_taxon(ecotx_t *taxon)
+{
+ if (taxon)
+ {
+ if (taxon->name)
+ ECOFREE(taxon->name,"Free scientific name");
+
+ ECOFREE(taxon,"Free Taxon");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+
+/**
+ * Read the database for a given taxon a save the data
+ * into the taxon structure(if any found)
+ * @param *f pointer to FILE type returned by fopen
+ * @param *taxon pointer to the structure
+ *
+ * @return a ecotx_t structure if any taxon found else NULL
+ */
+ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon)
+{
+
+ ecotxformat_t *raw;
+ int32_t rs;
+
+ raw = read_ecorecord(f,&rs);
+
+ if (!raw)
+ return NULL;
+
+ if (is_big_endian())
+ {
+ raw->namelength = swap_int32_t(raw->namelength);
+ raw->parent = swap_int32_t(raw->parent);
+ raw->rank = swap_int32_t(raw->rank);
+ raw->taxid = swap_int32_t(raw->taxid);
+ }
+
+ taxon->parent = (ecotx_t*)raw->parent;
+ taxon->taxid = raw->taxid;
+ taxon->rank = raw->rank;
+
+ taxon->name = ECOMALLOC((raw->namelength+1) * sizeof(char),
+ "Allocate taxon scientific name");
+
+ strncpy(taxon->name,raw->name,raw->namelength);
+
+ return taxon;
+}
+
+
+ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName)
+{
+ ecotaxonomy_t *tax;
+ char *filename;
+ char *filename2;
+ int buffsize;
+
+ tax = ECOMALLOC(sizeof(ecotaxonomy_t),
+ "Allocate taxonomy structure");
+
+ buffsize = strlen(prefix)+10;
+
+ filename = ECOMALLOC(buffsize,
+ "Allocate filename");
+ filename2= ECOMALLOC(buffsize,
+ "Allocate filename");
+
+ snprintf(filename,buffsize,"%s.rdx",prefix);
+
+ tax->ranks = read_rankidx(filename);
+
+ snprintf(filename,buffsize,"%s.tdx",prefix);
+ snprintf(filename2,buffsize,"%s.ldx",prefix);
+
+ tax->taxons = read_taxonomyidx(filename,filename2);
+
+ if (readAlternativeName)
+ {
+ snprintf(filename,buffsize,"%s.ndx",prefix);
+ tax->names=read_nameidx(filename,tax);
+ }
+ else
+ tax->names=NULL;
+ return tax;
+
+}
+
+
+
+int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy)
+{
+ if (taxonomy)
+ {
+ if (taxonomy->ranks)
+ ECOFREE(taxonomy->ranks,"Free rank index");
+
+ if (taxonomy->taxons)
+ ECOFREE(taxonomy->taxons,"Free taxon index");
+
+ ECOFREE(taxonomy,"Free taxonomy structure");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+ecotx_t *eco_findtaxonatrank(ecotx_t *taxon,
+ int32_t rankidx)
+{
+ ecotx_t *current_taxon;
+ ecotx_t *next_taxon;
+
+ current_taxon = taxon;
+ next_taxon = current_taxon->parent;
+
+ while ((current_taxon!=next_taxon) && // I' am the root node
+ (current_taxon->rank!=rankidx))
+ {
+ current_taxon = next_taxon;
+ next_taxon = current_taxon->parent;
+ }
+
+ if (current_taxon->rank==rankidx)
+ return current_taxon;
+ else
+ return NULL;
+}
+
+/**
+ * Get back information concerning a taxon from a taxonomic id
+ * @param *taxonomy the taxonomy database
+ * @param taxid the taxonomic id
+ *
+ * @result a ecotx_t structure containing the taxonimic information
+ **/
+ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy,
+ int32_t taxid)
+{
+ ecotx_t *current_taxon;
+ int32_t taxoncount;
+ int32_t i;
+
+ taxoncount=taxonomy->taxons->count;
+
+ for (current_taxon=taxonomy->taxons->taxon,
+ i=0;
+ i < taxoncount;
+ i++,
+ current_taxon++){
+ if (current_taxon->taxid==taxid){
+ return current_taxon;
+ }
+ }
+
+ return (ecotx_t*)NULL;
+}
+
+/**
+ * Find out if taxon is son of other taxon (identified by its taxid)
+ * @param *taxon son taxon
+ * @param parent_taxid taxonomic id of the other taxon
+ *
+ * @return 1 is the other taxid math a parent taxid, else 0
+ **/
+int eco_isundertaxon(ecotx_t *taxon,
+ int other_taxid)
+{
+ ecotx_t *next_parent;
+
+ next_parent = taxon->parent;
+
+ while ( (other_taxid != next_parent->taxid) &&
+ (strcmp(next_parent->name, "root")) )
+ {
+ next_parent = next_parent->parent;
+ }
+
+ if (other_taxid == next_parent->taxid)
+ return 1;
+ else
+ return 0;
+}
+
+ecotx_t *eco_getspecies(ecotx_t *taxon,
+ ecotaxonomy_t *taxonomy)
+{
+ static ecotaxonomy_t *tax=NULL;
+ static int32_t rankindex=-1;
+
+ if (taxonomy && tax!=taxonomy)
+ {
+ rankindex = rank_index("species",taxonomy->ranks);
+ tax=taxonomy;
+ }
+
+ if (!tax || rankindex < 0)
+ ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+
+ return eco_findtaxonatrank(taxon,rankindex);
+}
+
+ecotx_t *eco_getgenus(ecotx_t *taxon,
+ ecotaxonomy_t *taxonomy)
+{
+ static ecotaxonomy_t *tax=NULL;
+ static int32_t rankindex=-1;
+
+ if (taxonomy && tax!=taxonomy)
+ {
+ rankindex = rank_index("genus",taxonomy->ranks);
+ tax=taxonomy;
+ }
+
+ if (!tax || rankindex < 0)
+ ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+
+ return eco_findtaxonatrank(taxon,rankindex);
+}
+
+
+ecotx_t *eco_getfamily(ecotx_t *taxon,
+ ecotaxonomy_t *taxonomy)
+{
+ static ecotaxonomy_t *tax=NULL;
+ static int32_t rankindex=-1;
+
+ if (taxonomy && tax!=taxonomy)
+ {
+ rankindex = rank_index("family",taxonomy->ranks);
+ tax=taxonomy;
+ }
+
+ if (!tax || rankindex < 0)
+ ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+
+ return eco_findtaxonatrank(taxon,rankindex);
+}
+
+ecotx_t *eco_getkingdom(ecotx_t *taxon,
+ ecotaxonomy_t *taxonomy)
+{
+ static ecotaxonomy_t *tax=NULL;
+ static int32_t rankindex=-1;
+
+ if (taxonomy && tax!=taxonomy)
+ {
+ rankindex = rank_index("kingdom",taxonomy->ranks);
+ tax=taxonomy;
+ }
+
+ if (!tax || rankindex < 0)
+ ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+
+ return eco_findtaxonatrank(taxon,rankindex);
+}
+
+ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,
+ ecotaxonomy_t *taxonomy)
+{
+ static ecotaxonomy_t *tax=NULL;
+ static int32_t rankindex=-1;
+
+ if (taxonomy && tax!=taxonomy)
+ {
+ rankindex = rank_index("superkingdom",taxonomy->ranks);
+ tax=taxonomy;
+ }
+
+ if (!tax || rankindex < 0)
+ ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+
+ return eco_findtaxonatrank(taxon,rankindex);
+}
diff --git a/src/libthermo/Makefile b/src/libthermo/Makefile
new file mode 100644
index 0000000..e038b92
--- /dev/null
+++ b/src/libthermo/Makefile
@@ -0,0 +1,22 @@
+
+SOURCES = nnparams.c
+
+SRCS=$(SOURCES)
+
+OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
+
+LIBFILE= libthermo.a
+RANLIB= ranlib
+
+
+include ../global.mk
+
+
+all: $(LIBFILE)
+
+clean:
+ rm -rf $(OBJECTS) $(LIBFILE)
+
+$(LIBFILE): $(OBJECTS)
+ ar -cr $@ $?
+ $(RANLIB) $@
diff --git a/src/libthermo/nnparams.c b/src/libthermo/nnparams.c
new file mode 100644
index 0000000..71e71d5
--- /dev/null
+++ b/src/libthermo/nnparams.c
@@ -0,0 +1,619 @@
+/*
+ * nnparams.cpp
+ * PHunterLib
+ *
+ * Nearest Neighbor Model / Parameters
+ *
+ * Created by Tiayyba Riaz on 7/2/09.
+ *
+ */
+
+#include <memory.h>
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include"nnparams.h"
+
+static char bpencoder[] = { 1, // A
+ 0, // b
+ 2, // C
+ 0,0,0, // d, e, f
+ 3, // G
+ 0,0,0,0,0,0,0,0,0,0,0,0, // h,i,j,k,l,m,n,o,p,q,r,s
+ 4,0, // T,U
+ 0,0,0,0,0}; // v,w,x,y,z
+
+
+double forbidden_entropy;
+
+
+double nparam_GetInitialEntropy(PNNParams nparm)
+{
+ return -5.9f+nparm->rlogc;
+}
+
+
+//Retrieve Enthalpy for given NN-Pair from parameter table
+double nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1)
+{
+ return ndH(x0,x1,y0,y1); //xx, yx are already numbers
+}
+
+
+//Retrieve Entropy for given NN-Pair from parameter table
+double nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1)
+{
+ //xx and yx are already numbers
+ char nx0=x0;//nparam_convertNum(x0);
+ char nx1=x1;//nparam_convertNum(x1);
+ char ny0=y0;//nparam_convertNum(y0);
+ char ny1=y1;//nparam_convertNum(y1);
+ double answer = ndS(nx0,nx1,ny0,ny1);
+ /*Salt correction Santalucia*/
+ if (nparm->saltMethod == SALT_METHOD_SANTALUCIA) {
+ if(nx0!=5 && 1<= nx1 && nx1<=4) {
+ answer += 0.5*nparm->kfac;
+ }
+ if(ny1!=5 && 1<= ny0 && ny0<=4) {
+ answer += 0.5*nparm->kfac;
+ }
+ }
+ /*Salt correction Owczarzy*/
+ if (nparm->saltMethod == SALT_METHOD_OWCZARZY) {
+ double logk = log(nparm->kplus);
+ answer += ndH(nx0,nx1,ny0,ny1)*((4.29 * nparm->gcContent-3.95)*0.00001*logk+ 0.0000094*logk*logk);
+ }
+ return answer;
+}
+
+/* PURPOSE: Return melting temperature TM for given entropy and enthalpy
+* Assuming a one-state transition and using the formula
+* TM = dH / (dS + R ln(Ct/4))
+* entropy = dS + R ln Ct/4 (must already be included!)
+* enthaklpy = dH
+* where
+* dH = enthalpy
+* dS = entropy
+* R = Boltzmann factor
+* Ct = Strand Concentration
+*
+* PARAMETERS:
+* entrypy and enthalpy
+*
+* RETURN VALUE:
+* temperature
+*/
+
+double nparam_CalcTM(double entropy,double enthalpy)
+{
+ double tm = 0; // absolute zero - return if model fails!
+ if (enthalpy>=forbidden_enthalpy) //||(entropy==-cfact))
+ return 0;
+ if (entropy<0) // avoid division by zero and model errors!
+ {
+ tm = enthalpy/entropy;// - kfac; //LKFEB
+ if (tm<0)
+ return 0;
+ }
+ return tm;
+}
+
+
+void nparam_InitParams(PNNParams nparm, double c1, double c2, double kp, int sm)
+{
+ nparm->Ct1 = c1;
+ nparm->Ct2 = c2;
+ nparm->kplus = kp;
+ int maxCT = 1;
+ if(nparm->Ct2 > nparm->Ct1)
+ {
+ maxCT = 2;
+ }
+ double ctFactor;
+ if(nparm->Ct1 == nparm->Ct2)
+ {
+ ctFactor = nparm->Ct1/2;
+ }
+ else if (maxCT == 1)
+ {
+ ctFactor = nparm->Ct1-nparm->Ct2/2;
+ }
+ else
+ {
+ ctFactor = nparm->Ct2-nparm->Ct1/2;
+ }
+ nparm->rlogc = R * log(ctFactor);
+ forbidden_entropy = nparm->rlogc;
+ nparm->kfac = 0.368 * log (nparm->kplus);
+ nparm->saltMethod = sm;
+ int x,y,a,b; // variables used as counters...
+
+ // Set all parameters to zero!
+ memset(nparm->dH,0,sizeof(nparm->dH));
+ memset(nparm->dS,0,sizeof(nparm->dS));
+
+ // Set all X-/Y-, -X/Y- and X-/-Y so, that TM will be VERY small!
+ for (x=1;x<=4;x++)
+ {
+ for (y=1;y<=4;y++)
+ {
+ ndH(0,x,y,0)=forbidden_enthalpy;
+ ndS(0,x,y,0)=forbidden_entropy;
+ ndH(x,0,0,y)=forbidden_enthalpy;
+ ndS(x,0,0,y)=forbidden_entropy;
+ ndH(x,0,y,0)=forbidden_enthalpy;
+ ndS(x,0,y,0)=forbidden_entropy;
+ // forbid X-/Y$ and X$/Y- etc., i.e. terminal must not be paired with gap!
+ ndH(x,5,y,0)=forbidden_enthalpy;
+ ndS(x,5,y,0)=forbidden_entropy;
+ ndH(x,0,y,5)=forbidden_enthalpy;
+ ndS(x,0,y,5)=forbidden_entropy;
+ ndH(5,x,0,y)=forbidden_enthalpy;
+ ndS(5,x,0,y)=forbidden_entropy;
+ ndH(0,x,5,y)=forbidden_enthalpy;
+ ndS(0,x,5,y)=forbidden_entropy;
+ // forbid X$/-Y etc.
+ ndH(x,5,0,y)=forbidden_enthalpy;
+ ndS(x,5,0,y)=forbidden_entropy;
+ ndH(x,0,5,y)=forbidden_enthalpy;
+ ndS(x,0,5,y)=forbidden_entropy;
+ ndH(5,x,y,0)=forbidden_enthalpy;
+ ndS(5,x,y,0)=forbidden_entropy;
+ ndH(0,x,y,5)=forbidden_enthalpy;
+ ndS(0,x,y,5)=forbidden_entropy;
+
+ }
+ // also, forbid x-/-- and --/x-, i.e. no two inner gaps paired
+ ndH(x,0,0,0)=forbidden_enthalpy;
+ ndS(x,0,0,0)=forbidden_entropy;
+ ndH(0,0,x,0)=forbidden_enthalpy;
+ ndS(0,0,x,0)=forbidden_entropy;
+ // x-/-$
+ ndH(x,0,0,5)=forbidden_enthalpy;
+ ndS(x,0,0,5)=forbidden_entropy;
+ ndH(5,0,0,x)=forbidden_enthalpy;
+ ndS(5,0,0,x)=forbidden_entropy;
+ ndH(0,5,x,0)=forbidden_enthalpy;
+ ndS(x,0,0,5)=forbidden_entropy;
+ ndH(0,x,5,0)=forbidden_enthalpy;
+ ndS(0,x,5,0)=forbidden_entropy;
+ }
+ // forbid --/--
+ ndH(0,0,0,0)=forbidden_enthalpy;
+ ndS(0,0,0,0)=forbidden_entropy;
+
+ ndH(5,0,0,0)=forbidden_enthalpy;
+ ndS(5,0,0,0)=forbidden_entropy;
+ ndH(0,0,5,0)=forbidden_enthalpy;
+ ndS(0,0,5,0)=forbidden_entropy;
+ ndH(0,5,5,0)=forbidden_enthalpy;
+ ndS(0,5,5,0)=forbidden_entropy;
+
+ // Interior loops (double Mismatches)
+ #define iloop_entropy -0.97f
+ #define iloop_enthalpy 0.0f
+ for (x=1; x<=4; x++)
+ for (y=1; y<=4; y++)
+ for (a=1; a<=4; a++)
+ for (b=1; b<=4; b++)
+ // AT and CG pair, and as A=1, C=2, G=3, T=4 this means
+ // we have Watson-Crick pairs if (x+a==5) and (y+b)==5.
+ if (!((x+a==5)||(y+b==5)))
+ {
+ // No watson-crick-pair, i.e. double mismatch!
+ // set enthalpy/entropy to loop expansion!
+ ndH(x,y,a,b) = iloop_enthalpy;
+ ndS(x,y,a,b) = iloop_entropy;
+ }
+
+ // xy/-- and --/xy (Bulge Loops of size > 1)
+ #define bloop_entropy -1.3f
+ #define bloop_enthalpy 0.0f
+ for (x=1; x<=4; x++)
+ for (y=1; y<=4; y++)
+ {
+ ndH(x,y,0,0) = bloop_enthalpy;
+ ndS(x,y,0,0) = bloop_entropy;
+ ndH(0,0,x,y) = bloop_enthalpy;
+ ndS(0,0,x,y) = bloop_entropy;
+ }
+
+ // x-/ya abd xa/y- as well as -x/ay and ax/-y
+ // bulge opening and closing parameters with
+ // adjacent matches / mismatches
+ // obulge_mism and cbulge_mism chosen so high to avoid
+ // AAAAAAAAA
+ // T--G----T
+ // being better than
+ // AAAAAAAAA
+ // TG------T
+ #define obulge_match_H (-2.66f * 1000)
+ #define obulge_match_S -14.22f
+ #define cbulge_match_H (-2.66f * 1000)
+ #define cbulge_match_S -14.22f
+ #define obulge_mism_H (0.0f * 1000)
+ #define obulge_mism_S -6.45f
+ #define cbulge_mism_H 0.0f
+ #define cbulge_mism_S -6.45f
+ for (x=1; x<=4; x++)
+ for (y=1; y<=4; y++)
+ for (a=1; a<=4; a++)
+ {
+ if (x+y==5) // other base pair matches!
+ {
+ ndH(x,0,y,a)=obulge_match_H; // bulge opening
+ ndS(x,0,y,a)=obulge_match_S;
+ ndH(x,a,y,0)=obulge_match_H;
+ ndS(x,a,y,0)=obulge_match_S;
+ ndH(0,x,a,y)=cbulge_match_H; // bulge closing
+ ndS(0,x,a,y)=cbulge_match_S;
+ ndH(a,x,0,y)=cbulge_match_H;
+ ndS(a,x,0,y)=cbulge_match_S;
+ }
+ else
+ { // mismatch in other base pair!
+ ndH(x,0,y,a)=obulge_mism_H; // bulge opening
+ ndS(x,0,y,a)=obulge_mism_S;
+ ndH(x,a,y,0)=obulge_mism_H;
+ ndS(x,a,y,0)=obulge_mism_S;
+ ndH(0,x,a,y)=cbulge_mism_H; // bulge closing
+ ndS(0,x,a,y)=cbulge_mism_S;
+ ndH(a,x,0,y)=cbulge_mism_H;
+ ndS(a,x,0,y)=cbulge_mism_S;
+ }
+ }
+
+ // Watson-Crick pairs (note that only ten are unique, as obviously
+ // 5'-AG-3'/3'-TC-5' = 5'-CT-3'/3'-GA-5' etc.
+ ndH(1,1,4,4)=-7.6f*1000; ndS(1,1,4,4)=-21.3f; // AA/TT 04
+ ndH(1,2,4,3)=-8.4f*1000; ndS(1,2,4,3)=-22.4f; // AC/TG adapted GT/CA
+ ndH(1,3,4,2)=-7.8f*1000; ndS(1,3,4,2)=-21.0f; // AG/TC adapted CT/GA
+ ndH(1,4,4,1)=-7.2f*1000; ndS(1,4,4,1)=-20.4f; // AT/TA 04
+ ndH(2,1,3,4)=-8.5f*1000; ndS(2,1,3,4)=-22.7f; // CA/GT 04
+ ndH(2,2,3,3)=-8.0f*1000; ndS(2,2,3,3)=-19.9f; // CC/GG adapted GG/CC
+ ndH(2,3,3,2)=-10.6f*1000; ndS(2,3,3,2)=-27.2f; // CG/GC 04
+ ndH(2,4,3,1)=-7.8f*1000; ndS(2,4,3,1)=-21.0f; // CT/GA 04
+ ndH(3,1,2,4)=-8.2f*1000; ndS(3,1,2,4)=-22.2f; // GA/CT 04
+ ndH(3,2,2,3)=-9.8f*1000; ndS(3,2,2,3)=-24.4f; // GC/CG 04
+ ndH(3,3,2,2)=-8.0f*1000; ndS(3,3,2,2)=-19.9f; // GG/CC 04
+ ndH(3,4,2,1)=-8.4f*1000; ndS(3,4,2,1)=-22.4f; // GT/CA 04
+ ndH(4,1,1,4)=-7.2f*1000; ndS(4,1,1,4)=-21.3f; // TA/AT 04
+ ndH(4,2,1,3)=-8.2f*1000; ndS(4,2,1,3)=-22.2f; // TC/AG adapted GA/CT
+ ndH(4,3,1,2)=-8.5f*1000; ndS(4,3,1,2)=-22.7f; // TG/AC adapted CA/GT
+ ndH(4,4,1,1)=-7.6f*1000; ndS(4,4,1,1)=-21.3f; // TT/AA adapted AA/TT
+
+ // A-C Mismatches (Values for pH 7.0)
+ ndH(1,1,2,4)=7.6f*1000; ndS(1,1,2,4)=20.2f; // AA/CT
+ ndH(1,1,4,2)=2.3f*1000; ndS(1,1,4,2)=4.6f; // AA/TC
+ ndH(1,2,2,3)=-0.7f*1000; ndS(1,2,2,3)=-3.8f; // AC/CG
+ ndH(1,2,4,1)=5.3f*1000; ndS(1,2,4,1)=14.6f; // AC/TA
+ ndH(1,3,2,2)=0.6f*1000; ndS(1,3,2,2)=-0.6f; // AG/CC
+ ndH(1,4,2,1)=5.3f*1000; ndS(1,4,2,1)=14.6f; // AT/CA
+ ndH(2,1,1,4)=3.4f*1000; ndS(2,1,1,4)=8.0f; // CA/AT
+ ndH(2,1,3,2)=1.9f*1000; ndS(2,1,3,2)=3.7f; // CA/GC
+ ndH(2,2,1,3)=5.2f*1000; ndS(2,2,1,3)=14.2f; // CC/AG
+ ndH(2,2,3,1)=0.6f*1000; ndS(2,2,3,1)=-0.6f; // CC/GA
+ ndH(2,3,1,2)=1.9f*1000; ndS(2,3,1,2)=3.7f; // CG/AC
+ ndH(2,4,1,1)=2.3f*1000; ndS(2,4,1,1)=4.6f; // CT/AA
+ ndH(3,1,2,2)=5.2f*1000; ndS(3,1,2,2)=14.2f; // GA/CC
+ ndH(3,2,2,1)=-0.7f*1000; ndS(3,2,2,1)=-3.8f; // GC/CA
+ ndH(4,1,1,2)=3.4f*1000; ndS(4,1,1,2)=8.0f; // TA/AC
+ ndH(4,2,1,1)=7.6f*1000; ndS(4,2,1,1)=20.2f; // TC/AA
+
+ // C-T Mismatches
+ ndH(1,2,4,4)=0.7f*1000; ndS(1,2,4,4)=0.2f; // AC/TT
+ ndH(1,4,4,2)=-1.2f*1000; ndS(1,4,4,2)=-6.2f; // AT/TC
+ ndH(2,1,4,4)=1.0f*1000; ndS(2,1,4,4)=0.7f; // CA/TT
+ ndH(2,2,3,4)=-0.8f*1000; ndS(2,2,3,4)=-4.5f; // CC/GT
+ ndH(2,2,4,3)=5.2f*1000; ndS(2,2,4,3)=13.5f; // CC/TG
+ ndH(2,3,4,2)=-1.5f*1000; ndS(2,3,4,2)=-6.1f; // CG/TC
+ ndH(2,4,3,2)=-1.5f*1000; ndS(2,4,3,2)=-6.1f; // CT/GC
+ ndH(2,4,4,1)=-1.2f*1000; ndS(2,4,4,1)=-6.2f; // CT/TA
+ ndH(3,2,2,4)=2.3f*1000; ndS(3,2,2,4)=5.4f; // GC/CT
+ ndH(3,4,2,2)=5.2f*1000; ndS(3,4,2,2)=13.5f; // GT/CC
+ ndH(4,1,2,4)=1.2f*1000; ndS(4,1,2,4)=0.7f; // TA/CT
+ ndH(4,2,2,3)=2.3f*1000; ndS(4,2,2,3)=5.4f; // TC/CG
+ ndH(4,2,1,4)=1.2f*1000; ndS(4,2,1,4)=0.7f; // TC/AT
+ ndH(4,3,2,2)=-0.8f*1000; ndS(4,3,2,2)=-4.5f; // TG/CC
+ ndH(4,4,2,1)=0.7f*1000; ndS(4,4,2,1)=0.2f; // TT/CA
+ ndH(4,4,1,2)=1.0f*1000; ndS(4,4,1,2)=0.7f; // TT/AC
+
+ // G-A Mismatches
+ ndH(1,1,3,4)=3.0f*1000; ndS(1,1,3,4)=7.4f; // AA/GT
+ ndH(1,1,4,3)=-0.6f*1000; ndS(1,1,4,3)=-2.3f; // AA/TG
+ ndH(1,2,3,3)=0.5f*1000; ndS(1,2,3,3)=3.2f; // AC/GG
+ ndH(1,3,3,2)=-4.0f*1000; ndS(1,3,3,2)=-13.2f; // AG/GC
+ ndH(1,3,4,1)=-0.7f*1000; ndS(1,3,4,1)=-2.3f; // AG/TA
+ ndH(1,4,3,1)=-0.7f*1000; ndS(1,4,3,1)=-2.3f; // AT/GA
+ ndH(2,1,3,3)=-0.7f*1000; ndS(2,1,3,3)=-2.3f; // CA/GG
+ ndH(2,3,3,1)=-4.0f*1000; ndS(2,3,3,1)=-13.2f; // CG/GA
+ ndH(3,1,1,4)=0.7f*1000; ndS(3,1,1,4)=0.7f; // GA/AT
+ ndH(3,1,2,3)=-0.6f*1000; ndS(3,1,2,3)=-1.0f; // GA/CG
+ ndH(3,2,1,3)=-0.6f*1000; ndS(3,2,1,3)=-1.0f; // GC/AG
+ ndH(3,3,1,2)=-0.7f*1000; ndS(3,3,1,2)=-2.3f; // GG/AC
+ ndH(3,3,2,1)=0.5f*1000; ndS(3,3,2,1)=3.2f; // GG/CA
+ ndH(3,4,1,1)=-0.6f*1000; ndS(3,4,1,1)=-2.3f; // GT/AA
+ ndH(4,1,1,3)=0.7f*1000; ndS(4,1,1,3)=0.7f; // TA/AG
+ ndH(4,3,1,1)=3.0f*1000; ndS(4,3,1,1)=7.4f; // TG/AA
+
+ // G-T Mismatches
+ ndH(1,3,4,4)=1.0f*1000; ndS(1,3,4,4)=0.9f; // AG/TT
+ ndH(1,4,4,3)=-2.5f*1000; ndS(1,4,4,3)=-8.3f; // AT/TG
+ ndH(2,3,3,4)=-4.1f*1000; ndS(2,3,3,4)=-11.7f; // CG/GT
+ ndH(2,4,3,3)=-2.8f*1000; ndS(2,4,3,3)=-8.0f; // CT/GG
+ ndH(3,1,4,4)=-1.3f*1000; ndS(3,1,4,4)=-5.3f; // GA/TT
+ ndH(3,2,4,3)=-4.4f*1000; ndS(3,2,4,3)=-12.3f; // GC/TG
+ ndH(3,3,2,4)=3.3f*1000; ndS(3,3,2,4)=10.4f; // GG/CT
+ ndH(3,3,4,2)=-2.8f*1000; ndS(3,3,4,2)=-8.0f; // GG/TC
+// ndH(3,3,4,4)=5.8f*1000; ndS(3,3,4,4)=16.3f; // GG/TT
+ ndH(3,4,2,3)=-4.4f*1000; ndS(3,4,2,3)=-12.3f; // GT/CG
+ ndH(3,4,4,1)=-2.5f*1000; ndS(3,4,4,1)=-8.3f; // GT/TA
+// ndH(3,4,4,3)=4.1f*1000; ndS(3,4,4,3)=9.5f; // GT/TG
+ ndH(4,1,3,4)=-0.1f*1000; ndS(4,1,3,4)=-1.7f; // TA/GT
+ ndH(4,2,3,3)=3.3f*1000; ndS(4,2,3,3)=10.4f; // TC/GG
+ ndH(4,3,1,4)=-0.1f*1000; ndS(4,3,1,4)=-1.7f; // TG/AT
+ ndH(4,3,3,2)=-4.1f*1000; ndS(4,3,3,2)=-11.7f; // TG/GC
+// ndH(4,3,3,4)=-1.4f*1000; ndS(4,3,3,4)=-6.2f; // TG/GT
+ ndH(4,4,1,3)=-1.3f*1000; ndS(4,4,1,3)=-5.3f; // TT/AG
+ ndH(4,4,3,1)=1.0f*1000; ndS(4,4,3,1)=0.9f; // TT/GA
+// ndH(4,4,3,3)=5.8f*1000; ndS(4,4,3,3)=16.3f; // TT/GG
+
+ // A-A Mismatches
+ ndH(1,1,1,4)=4.7f*1000; ndS(1,1,1,4)=12.9f; // AA/AT
+ ndH(1,1,4,1)=1.2f*1000; ndS(1,1,4,1)=1.7f; // AA/TA
+ ndH(1,2,1,3)=-2.9f*1000; ndS(1,2,1,3)=-9.8f; // AC/AG
+ ndH(1,3,1,2)=-0.9f*1000; ndS(1,3,1,2)=-4.2f; // AG/AC
+ ndH(1,4,1,1)=1.2f*1000; ndS(1,4,1,1)=1.7f; // AT/AA
+ ndH(2,1,3,1)=-0.9f*1000; ndS(2,1,3,1)=-4.2f; // CA/GA
+ ndH(3,1,2,1)=-2.9f*1000; ndS(3,1,2,1)=-9.8f; // GA/CA
+ ndH(4,1,1,1)=4.7f*1000; ndS(4,1,1,1)=12.9f; // TA/AA
+
+ // C-C Mismatches
+ ndH(1,2,4,2)=0.0f*1000; ndS(1,2,4,2)=-4.4f; // AC/TC
+ ndH(2,1,2,4)=6.1f*1000; ndS(2,1,2,4)=16.4f; // CA/CT
+ ndH(2,2,2,3)=3.6f*1000; ndS(2,2,2,3)=8.9f; // CC/CG
+ ndH(2,2,3,2)=-1.5f*1000; ndS(2,2,3,2)=-7.2f; // CC/GC
+ ndH(2,3,2,2)=-1.5f*1000; ndS(2,3,2,2)=-7.2f; // CG/CC
+ ndH(2,4,2,1)=0.0f*1000; ndS(2,4,2,1)=-4.4f; // CT/CA
+ ndH(3,2,2,2)=3.6f*1000; ndS(3,2,2,2)=8.9f; // GC/CC
+ ndH(4,2,1,2)=6.1f*1000; ndS(4,2,1,2)=16.4f; // TC/AC
+
+ // G-G Mismatches
+ ndH(1,3,4,3)=-3.1f*1000; ndS(1,3,4,3)=-9.5f; // AG/TG
+ ndH(2,3,3,3)=-4.9f*1000; ndS(2,3,3,3)=-15.3f; // CG/GG
+ ndH(3,1,3,4)=1.6f*1000; ndS(3,1,3,4)=3.6f; // GA/GT
+ ndH(3,2,3,3)=-6.0f*1000; ndS(3,2,3,3)=-15.8f; // GC/GG
+ ndH(3,3,2,3)=-6.0f*1000; ndS(3,3,2,3)=-15.8f; // GG/CG
+ ndH(3,3,3,2)=-4.9f*1000; ndS(3,3,3,2)=-15.3f; // GG/GC
+ ndH(3,4,3,1)=-3.1f*1000; ndS(3,4,3,1)=-9.5f; // GT/GA
+ ndH(4,3,1,3)=1.6f*1000; ndS(4,3,1,3)=3.6f; // TG/AG
+
+ // T-T Mismatches
+ ndH(1,4,4,4)=-2.7f*1000; ndS(1,4,4,4)=-10.8f; // AT/TT
+ ndH(2,4,3,4)=-5.0f*1000; ndS(2,4,3,4)=-15.8f; // CT/GT
+ ndH(3,4,2,4)=-2.2f*1000; ndS(3,4,2,4)=-8.4f; // GT/CT
+ ndH(4,1,4,4)=0.2f*1000; ndS(4,1,4,4)=-1.5f; // TA/TT
+ ndH(4,2,4,3)=-2.2f*1000; ndS(4,2,4,3)=-8.4f; // TC/TG
+ ndH(4,3,4,2)=-5.0f*1000; ndS(4,3,4,2)=-15.8f; // TG/TC
+ ndH(4,4,1,4)=0.2f*1000; ndS(4,4,1,4)=-1.5f; // TT/AT
+ ndH(4,4,4,1)=-2.7f*1000; ndS(4,4,4,1)=-10.8f; // TT/TA
+
+ // Dangling Ends
+ ndH(5,1,1,4)=-0.7f*1000; ndS(5,1,1,4)=-0.8f; // $A/AT
+ ndH(5,1,2,4)=4.4f*1000; ndS(5,1,2,4)=14.9f; // $A/CT
+ ndH(5,1,3,4)=-1.6f*1000; ndS(5,1,3,4)=-3.6f; // $A/GT
+ ndH(5,1,4,4)=2.9f*1000; ndS(5,1,4,4)=10.4f; // $A/TT
+ ndH(5,2,1,3)=-2.1f*1000; ndS(5,2,1,3)=-3.9f; // $C/AG
+ ndH(5,2,2,3)=-0.2f*1000; ndS(5,2,2,3)=-0.1f; // $C/CG
+ ndH(5,2,3,3)=-3.9f*1000; ndS(5,2,3,3)=-11.2f; // $C/GG
+ ndH(5,2,4,3)=-4.4f*1000; ndS(5,2,4,3)=-13.1f; // $C/TG
+ ndH(5,3,1,2)=-5.9f*1000; ndS(5,3,1,2)=-16.5f; // $G/AC
+ ndH(5,3,2,2)=-2.6f*1000; ndS(5,3,2,2)=-7.4f; // $G/CC
+ ndH(5,3,3,2)=-3.2f*1000; ndS(5,3,3,2)=-10.4f; // $G/GC
+ ndH(5,3,4,2)=-5.2f*1000; ndS(5,3,4,2)=-15.0f; // $G/TC
+ ndH(5,4,1,1)=-0.5f*1000; ndS(5,4,1,1)=-1.1f; // $T/AA
+ ndH(5,4,2,1)=4.7f*1000; ndS(5,4,2,1)=14.2f; // $T/CA
+ ndH(5,4,3,1)=-4.1f*1000; ndS(5,4,3,1)=-13.1f; // $T/GA
+ ndH(5,4,4,1)=-3.8f*1000; ndS(5,4,4,1)=-12.6f; // $T/TA
+ ndH(1,5,4,1)=-2.9f*1000; ndS(1,5,4,1)=-7.6f; // A$/TA
+ ndH(1,5,4,2)=-4.1f*1000; ndS(1,5,4,2)=-13.0f; // A$/TC
+ ndH(1,5,4,3)=-4.2f*1000; ndS(1,5,4,3)=-15.0f; // A$/TG
+ ndH(1,5,4,4)=-0.2f*1000; ndS(1,5,4,4)=-0.5f; // A$/TT
+ ndH(1,1,5,4)=0.2f*1000; ndS(1,1,5,4)=2.3f; // AA/$T
+ ndH(1,1,4,5)=-0.5f*1000; ndS(1,1,4,5)=-1.1f; // AA/T$
+ ndH(1,2,5,3)=-6.3f*1000; ndS(1,2,5,3)=-17.1f; // AC/$G
+ ndH(1,2,4,5)=4.7f*1000; ndS(1,2,4,5)=14.2f; // AC/T$
+ ndH(1,3,5,2)=-3.7f*1000; ndS(1,3,5,2)=-10.0f; // AG/$C
+ ndH(1,3,4,5)=-4.1f*1000; ndS(1,3,4,5)=-13.1f; // AG/T$
+ ndH(1,4,5,1)=-2.9f*1000; ndS(1,4,5,1)=-7.6f; // AT/$A
+ ndH(1,4,4,5)=-3.8f*1000; ndS(1,4,4,5)=-12.6f; // AT/T$
+ ndH(2,5,3,1)=-3.7f*1000; ndS(2,5,3,1)=-10.0f; // C$/GA
+ ndH(2,5,3,2)=-4.0f*1000; ndS(2,5,3,2)=-11.9f; // C$/GC
+ ndH(2,5,3,3)=-3.9f*1000; ndS(2,5,3,3)=-10.9f; // C$/GG
+ ndH(2,5,3,4)=-4.9f*1000; ndS(2,5,3,4)=-13.8f; // C$/GT
+ ndH(2,1,5,4)=0.6f*1000; ndS(2,1,5,4)=3.3f; // CA/$T
+ ndH(2,1,3,5)=-5.9f*1000; ndS(2,1,3,5)=-16.5f; // CA/G$
+ ndH(2,2,5,3)=-4.4f*1000; ndS(2,2,5,3)=-12.6f; // CC/$G
+ ndH(2,2,3,5)=-2.6f*1000; ndS(2,2,3,5)=-7.4f; // CC/G$
+ ndH(2,3,5,2)=-4.0f*1000; ndS(2,3,5,2)=-11.9f; // CG/$C
+ ndH(2,3,3,5)=-3.2f*1000; ndS(2,3,3,5)=-10.4f; // CG/G$
+ ndH(2,4,5,1)=-4.1f*1000; ndS(2,4,5,1)=-13.0f; // CT/$A
+ ndH(2,4,3,5)=-5.2f*1000; ndS(2,4,3,5)=-15.0f; // CT/G$
+ ndH(3,5,2,1)=-6.3f*1000; ndS(3,5,2,1)=-17.1f; // G$/CA
+ ndH(3,5,2,2)=-4.4f*1000; ndS(3,5,2,2)=-12.6f; // G$/CC
+ ndH(3,5,2,3)=-5.1f*1000; ndS(3,5,2,3)=-14.0f; // G$/CG
+ ndH(3,5,2,4)=-4.0f*1000; ndS(3,5,2,4)=-10.9f; // G$/CT
+ ndH(3,1,5,4)=-1.1f*1000; ndS(3,1,5,4)=-1.6f; // GA/$T
+ ndH(3,1,2,5)=-2.1f*1000; ndS(3,1,2,5)=-3.9f; // GA/C$
+ ndH(3,2,5,3)=-5.1f*1000; ndS(3,2,5,3)=-14.0f; // GC/$G
+ ndH(3,2,2,5)=-0.2f*1000; ndS(3,2,2,5)=-0.1f; // GC/C$
+ ndH(3,3,5,2)=-3.9f*1000; ndS(3,3,5,2)=-10.9f; // GG/$C
+ ndH(3,3,2,5)=-3.9f*1000; ndS(3,3,2,5)=-11.2f; // GG/C$
+ ndH(3,4,5,1)=-4.2f*1000; ndS(3,4,5,1)=-15.0f; // GT/$A
+ ndH(3,4,2,5)=-4.4f*1000; ndS(3,4,2,5)=-13.1f; // GT/C$
+ ndH(4,5,1,1)=0.2f*1000; ndS(4,5,1,1)=2.3f; // T$/AA
+ ndH(4,5,1,2)=0.6f*1000; ndS(4,5,1,2)=3.3f; // T$/AC
+ ndH(4,5,1,3)=-1.1f*1000; ndS(4,5,1,3)=-1.6f; // T$/AG
+ ndH(4,5,1,4)=-6.9f*1000; ndS(4,5,1,4)=-20.0f; // T$/AT
+ ndH(4,1,5,4)=-6.9f*1000; ndS(4,1,5,4)=-20.0f; // TA/$T
+ ndH(4,1,1,5)=-0.7f*1000; ndS(4,1,1,5)=-0.7f; // TA/A$
+ ndH(4,2,5,3)=-4.0f*1000; ndS(4,2,5,3)=-10.9f; // TC/$G
+ ndH(4,2,1,5)=4.4f*1000; ndS(4,2,1,5)=14.9f; // TC/A$
+ ndH(4,3,5,2)=-4.9f*1000; ndS(4,3,5,2)=-13.8f; // TG/$C
+ ndH(4,3,1,5)=-1.6f*1000; ndS(4,3,1,5)=-3.6f; // TG/A$
+ ndH(4,4,5,1)=-0.2f*1000; ndS(4,4,5,1)=-0.5f; // TT/$A
+ ndH(4,4,1,5)=2.9f*1000; ndS(4,4,1,5)=10.4f; // TT/A$
+
+ return;
+}
+
+int nparam_CountGCContent(char * seq ) {
+ int lseq = strlen(seq);
+ int k;
+ double count = 0;
+ for( k=0;k<lseq;k++) {
+ if (seq[k] == 'G' || seq[k] == 'C' ) {
+ count+=1;
+ }
+ }
+ return count;
+}
+
+void nparam_CleanSeq (char* inseq, char* outseq, int len)
+{
+ int seqlen = strlen (inseq);
+ int i, j;
+
+ if (len != 0)
+ seqlen = len;
+
+ outseq[0]='x';
+
+ for (i = 0, j = 0; i < seqlen && outseq[0]; i++,j++)
+ {
+ switch (inseq[i])
+ {
+ case 'a':
+ case '\0':
+ case 'A':
+ outseq[j] = 'A'; break;
+ case 'c':
+ case '\1':
+ case 'C':
+ outseq[j] = 'C'; break;
+ case 'g':
+ case '\2':
+ case 'G':
+ outseq[j] = 'G'; break;
+ case 't':
+ case '\3':
+ case 'T':
+ outseq[j] = 'T'; break;
+ default:
+ outseq[0]=0;
+ }
+ }
+ outseq[j] = '\0';
+}
+
+//Calculate TM for given sequence against its complement
+double nparam_CalcSelfTM(PNNParams nparm, char* seq, int len)
+{
+ const unsigned long long minus1 = 0xFFFFFFFFFFFFFFFFLLU;
+ const double NaN = *((double*)&minus1);
+ double thedH = 0;
+ //double thedS = nparam_GetInitialEntropy(nparm);
+ double thedS = -5.9f+nparm->rlogc;
+ double mtemp;
+ char c1;
+ char c2;
+ char c3;
+ char c4;
+ unsigned int i;
+ char nseq[50];
+ char *useq = seq;
+
+ nparam_CleanSeq (seq, nseq, len);
+ if (!nseq[0])
+ return NaN;
+ useq = nseq;
+
+ for ( i=1;i<len;i++)
+ {
+ c1 = GETREVCODE(useq[i-1]); //nparam_getComplement(seq[i-1],1);
+ c2 = GETREVCODE(useq[i]); //nparam_getComplement(seq[i],1);
+ c3 = GETNUMCODE(useq[i-1]);
+ c4 = GETNUMCODE(useq[i]);
+
+
+ thedH += nparm->dH[c3][c4][c1][c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2);
+ thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2);
+ }
+ //printf("------------------\n");
+ mtemp = nparam_CalcTM(thedS,thedH);
+ //fprintf(stderr,"Enthalpy: %f, entropy: %f, seq: %s rloc=%f\n", thedH, thedS, useq, nparm->rlogc);
+ //exit (0);
+ return mtemp;
+}
+
+double nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len)
+{
+ const unsigned long long minus1 = 0xFFFFFFFFFFFFFFFFLLU;
+ const double NaN = *((double*)&minus1);
+ double thedH = 0;
+ //double thedS = nparam_GetInitialEntropy(nparm);
+ double thedS = -5.9f+nparm->rlogc;
+ double mtemp;
+ char c1;
+ char c2;
+ char c3;
+ char c4;
+ unsigned int i;
+ char nseq1[50];
+ char nseq2[50];
+ char *useq1;
+ char *useq2;
+
+ nparam_CleanSeq (seq1, nseq1, len);
+ if (!nseq1[0])
+ return NaN;
+ useq1 = nseq1;
+
+ nparam_CleanSeq (seq2, nseq2, len);
+ if (!nseq2[0])
+ return NaN;
+ useq2 = nseq2;
+
+ //fprintf (stderr,"Primer : %s\n",useq);
+ for ( i=1;i<len;i++)
+ {
+ c1 = GETREVCODE(useq2[i-1]); //nparam_getComplement(seq[i-1],1);
+ c2 = GETREVCODE(useq2[i]); //nparam_getComplement(seq[i],1);
+ c3 = GETNUMCODE(useq1[i-1]);
+ c4 = GETNUMCODE(useq1[i]);
+
+ //fprintf (stderr,"Primer : %s %f %f %d %d, %d %d %f\n",useq,thedH,thedS,(int)c3,(int)c4,(int)c1,(int)c2,nparam_GetEnthalpy(nparm, c3,c4,c1,c2));
+
+ thedH += nparm->dH[c3][c4][c1][c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2);
+ thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2);
+ }
+ //fprintf(stderr,"------------------\n");
+ mtemp = nparam_CalcTM(thedS,thedH);
+ //if (mtemp == 0)
+ //{
+ // fprintf(stderr,"Enthalpy: %f, entropy: %f, seq: %s\n", thedH, thedS, useq);
+ //exit (0);
+ //}
+ return mtemp;
+}
+
+double calculateMeltingTemperatureBasic (char * seq) {
+ int gccount;
+ double temp;
+ int seqlen;
+
+ seqlen = strlen (seq);
+ gccount = nparam_CountGCContent (seq);
+ temp = 64.9 + 41*(gccount - 16.4)/seqlen;
+ return temp;
+}
diff --git a/src/libthermo/nnparams.h b/src/libthermo/nnparams.h
new file mode 100644
index 0000000..7934858
--- /dev/null
+++ b/src/libthermo/nnparams.h
@@ -0,0 +1,63 @@
+/*
+ * nnparams.h
+ * PHunterLib
+ *
+ * Nearest Neighbor Model Parameters
+ *
+ * Created by Tiayyba Riaz on 02/07/09.
+ *
+ */
+
+#ifndef NNPARAMS_H_
+#define NNPARAMS_H_
+
+#include <math.h>
+#include <string.h>
+//#include "../libecoprimer/ecoprimer.h"
+
+// following defines to simplify coding...
+#define ndH(a,b,c,d) nparm->dH[(int)a][(int)b][(int)c][(int)d]
+#define ndS(a,b,c,d) nparm->dS[(int)a][(int)b][(int)c][(int)d]
+#define forbidden_enthalpy 1000000000000000000.0f
+#define R 1.987f
+#define SALT_METHOD_SANTALUCIA 1
+#define SALT_METHOD_OWCZARZY 2
+
+#define DEF_CONC_PRIMERS 0.0000008
+#define DEF_CONC_SEQUENCES 0
+#define DEF_SALT 0.05
+
+#define GETNUMCODE(a) bpencoder[a - 'A']
+#define GETREVCODE(a) 5-bpencoder[a - 'A']
+
+
+extern double forbidden_entropy;
+
+
+typedef struct CNNParams_st
+{
+ double Ct1;
+ double Ct2;
+ double rlogc;
+ double kplus;
+ double kfac;
+ int saltMethod;
+ double gcContent;
+ double new_TM;
+ double dH[6][6][6][6]; // A-C-G-T + gap + initiation (dangling end, $ sign)
+ double dS[6][6][6][6];
+}CNNParams, * PNNParams;
+
+void nparam_InitParams(PNNParams nparm, double c1, double c2, double kp, int sm);
+int nparam_CountGCContent(char * seq );
+double nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1);
+double nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1);
+double nparam_CalcTM(double entropy,double enthalpy);
+double nparam_CalcSelfTM(PNNParams nparm, char* seq, int len);
+double nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len);
+
+double nparam_GetInitialEntropy(PNNParams nparm) ;
+double calculateMeltingTemperatureBasic (char * seq);
+//void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options);
+
+#endif
diff --git a/tests/ecodb.rdx b/tests/ecodb.rdx
new file mode 100644
index 0000000..db10ce6
Binary files /dev/null and b/tests/ecodb.rdx differ
diff --git a/tests/ecodb.tdx b/tests/ecodb.tdx
new file mode 100644
index 0000000..9cd8879
Binary files /dev/null and b/tests/ecodb.tdx differ
diff --git a/tests/ecodb_001.sdx b/tests/ecodb_001.sdx
new file mode 100644
index 0000000..7f73e97
Binary files /dev/null and b/tests/ecodb_001.sdx differ
diff --git a/tools/ecoPCRFilter.py b/tools/ecoPCRFilter.py
new file mode 100755
index 0000000..9bf0a51
--- /dev/null
+++ b/tools/ecoPCRFilter.py
@@ -0,0 +1,303 @@
+#!/usr/bin/env python
+
+import struct
+import sys
+import os
+import gzip
+
+
+#####
+#
+# Generic file function
+#
+#####
+
+class Filter(object):
+
+
+ def __init__(self,path):
+ self._path = path
+ self._taxonFile = "%s.tdx" % self._path
+ self._ranksFile = "%s.rdx" % self._path
+ self._namesFile = "%s.ndx" % self._path
+ self._taxonomy, self._index, self._ranks, self._name = self.__readNodeTable()
+
+
+ def __universalOpen(self,file):
+ if isinstance(file,str):
+ if file[-3:] == '.gz':
+ rep = gzip.open(file)
+ else:
+ rep = open(file)
+ else:
+ rep = file
+ return rep
+
+ def __universalTell(self,file):
+ if isinstance(file, gzip.GzipFile):
+ file=file.myfileobj
+ return file.tell()
+
+ def __fileSize(self,file):
+ if isinstance(file, gzip.GzipFile):
+ file=file.myfileobj
+ pos = file.tell()
+ file.seek(0,2)
+ length = file.tell()
+ file.seek(pos,0)
+ return length
+
+ def __progressBar(self,pos,max,reset=False,delta=[]):
+ if reset:
+ del delta[:]
+ if not delta:
+ delta.append(time.time())
+ delta.append(time.time())
+
+ delta[1]=time.time()
+ elapsed = delta[1]-delta[0]
+ percent = float(pos)/max * 100
+ remain = time.strftime('%H:%M:%S',time.gmtime(elapsed / percent * (100-percent)))
+ bar = '#' * int(percent/2)
+ bar+= '|/-\\-'[pos % 5]
+ bar+= ' ' * (50 - int(percent/2))
+ sys.stderr.write('\r%5.1f %% |%s] remain : %s' %(percent,bar,remain))
+
+
+
+
+ #####
+ #
+ # Iterator functions
+ #
+ #####
+
+
+
+ def __ecoRecordIterator(self,file):
+ file = self.__universalOpen(file)
+ (recordCount,) = struct.unpack('> I',file.read(4))
+
+ for i in xrange(recordCount):
+ (recordSize,)=struct.unpack('>I',file.read(4))
+ record = file.read(recordSize)
+ yield record
+
+
+ def __ecoNameIterator(self):
+ for record in self.__ecoRecordIterator(self._namesFile):
+ lrecord = len(record)
+ lnames = lrecord - 16
+ (isScientificName,namelength,classLength,indextaxid,names)=struct.unpack('> I I I I %ds' % lnames, record)
+ name=names[:namelength]
+ classname=names[namelength:]
+ yield (name,classname,indextaxid)
+
+
+ def __ecoTaxonomicIterator(self):
+ for record in self.__ecoRecordIterator(self._taxonFile):
+ lrecord = len(record)
+ lnames = lrecord - 16
+ (taxid,rankid,parentidx,nameLength,name)=struct.unpack('> I I I I %ds' % lnames, record)
+ yield (taxid,rankid,parentidx,name)
+
+
+ def __ecoSequenceIterator(self,file):
+ for record in self.__ecoRecordIterator(file):
+ lrecord = len(record)
+ lnames = lrecord - (4*4+20)
+ (taxid,seqid,deflength,seqlength,cptseqlength,string)=struct.unpack('> I 20s I I I %ds' % lnames, record)
+ de = string[:deflength]
+ seq = gzip.zlib.decompress(string[deflength:])
+ yield (taxid,seqid,deflength,seqlength,cptseqlength,de,seq)
+
+
+ def __ecoRankIterator(self):
+ for record in self.__ecoRecordIterator(self._ranksFile):
+ yield record
+
+
+ #####
+ #
+ # Indexes
+ #
+ #####
+
+ def __ecoNameIndex(self):
+ indexName = [x for x in self.__ecoNameIterator()]
+ return indexName
+
+ def __ecoRankIndex(self):
+ rank = [r for r in self.__ecoRankIterator()]
+ return rank
+
+ def __ecoTaxonomyIndex(self):
+ taxonomy = []
+ index = {}
+ i = 0;
+ for x in self.__ecoTaxonomicIterator():
+ taxonomy.append(x)
+ index[x[0]] = i
+ i = i + 1
+ return taxonomy, index
+
+ def __readNodeTable(self):
+ taxonomy, index = self.__ecoTaxonomyIndex()
+ ranks = self.__ecoRankIndex()
+ name = self.__ecoNameIndex()
+ return taxonomy,index,ranks,name
+
+
+ def findTaxonByTaxid(self,taxid):
+ return self._taxonomy[self._index[taxid]]
+
+
+
+ #####
+ #
+ # PUBLIC METHODS
+ #
+ #####
+
+
+ def subTreeIterator(self, taxid):
+ "return subtree for given taxonomic id "
+ idx = self._index[taxid]
+ yield self._taxonomy[idx]
+ for t in self._taxonomy:
+ if t[2] == idx:
+ for subt in self.subTreeIterator(t[0]):
+ yield subt
+
+
+ def parentalTreeIterator(self, taxid):
+ """
+ return parental tree for given taxonomic id starting from
+ first ancester to the root.
+ """
+ taxon=self.findTaxonByTaxid(taxid)
+ while taxon[2]!= 0:
+ yield taxon
+ taxon = self._taxonomy[taxon[2]]
+ yield self._taxonomy[0]
+
+
+ def ecoPCRResultIterator(self, file):
+ "iteration on ecoPCR result file"
+ file = self.__universalOpen(file)
+ data = ColumnFile(file,
+ sep='|',
+ types=(str,int,int,
+ str,int,str,
+ int,str,int,
+ str,int,str,
+ str,str,int,
+ str,int,int,
+ str,str),skip='#')
+
+ for ac, sq_len, taxid,\
+ rank, sp_taxid, species,\
+ ge_taxid, genus, fa_taxid,\
+ family, sk_taxid, s_kgdom,\
+ strand, oligo_1, error_1,\
+ oligo_2, error_2, amp_len,\
+ sq_des, definition in data:
+
+ yield {'ac':ac, 'sq_len':sq_len, 'taxid':taxid,
+ 'rank':rank, 'sp_taxid':sp_taxid, 'species':species,
+ 'ge_taxid':ge_taxid, 'genus':genus, 'fa_taxid':fa_taxid,
+ 'family':family, 'sk_taxid':sk_taxid, 's_kgdom':s_kgdom,
+ 'strand':strand, 'oligo_1':oligo_1, 'error_1':error_1,
+ 'oligo_2':oligo_2, 'error_2':error_2, 'amp_len':amp_len,
+ 'sq_des':sq_des, 'definition':definition}
+
+ def rankFilter(self,rankid,filter):
+ return self._ranks[rankid] == filter
+
+
+ def lastCommonTaxon(self,taxid_1, taxid_2):
+ t1 = [x[0] for x in self.parentalTreeIterator(taxid_1)]
+ t2 = [x[0] for x in self.parentalTreeIterator(taxid_2)]
+ t1.reverse()
+ t2.reverse()
+ count = t1 < t2 and len(t1) or len(t2)
+ for i in range(count):
+ if t1[i] != t2[i]:
+ return t1[i-1]
+
+
+
+
+class ColumnFile(object):
+
+ def __init__(self,stream,sep=None,strip=True,types=None,skip=None):
+ if isinstance(stream,str):
+ self._stream = open(stream)
+ elif hasattr(stream,'next'):
+ self._stream = stream
+ else:
+ raise ValueError,'stream must be string or an iterator'
+ self._delimiter=sep
+ self._strip=strip
+ if types:
+ self._types=[x for x in types]
+ for i in xrange(len(self._types)):
+ if self._types[i] is bool:
+ self._types[i]=ColumnFile.str2bool
+ else:
+ self._types=None
+ self._skip = skip
+
+ def str2bool(x):
+ return bool(eval(x.strip()[0].upper(),{'T':True,'V':True,'F':False}))
+
+ str2bool = staticmethod(str2bool)
+
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ ligne = self._stream.next()
+ while ligne[0] == self._skip:
+ ligne = self._stream.next()
+ data = ligne.split(self._delimiter)
+ if self._strip or self._types:
+ data = [x.strip() for x in data]
+ if self._types:
+ it = self.endLessIterator(self._types)
+ data = [x[1](x[0]) for x in ((y,it.next()) for y in data)]
+ return data
+
+ def endLessIterator(self,endedlist):
+ for x in endedlist:
+ yield x
+ while(1):
+ yield endedlist[-1]
+
+
+class Table(list):
+
+ def __init__(self, headers, types):
+ list.__init__(self)
+ self.headers = headers
+ self.types = types
+ self.lines = []
+
+ def printTable(self):
+ for h in self.headers:
+ print "\t%s\t|" % h,
+ print "\n"
+ for l in self.lines:
+ for c in l:
+ print "\t%s\t|" % c
+ print "\n"
+
+ def getColumn(self,n):
+ print "\t%s\n" % self.header[n]
+ for i in range(len(self.lines)):
+ print "\t%s\n" % i[n]
+
+
+
+
diff --git a/tools/ecoPCRFormat.py b/tools/ecoPCRFormat.py
new file mode 100755
index 0000000..3884001
--- /dev/null
+++ b/tools/ecoPCRFormat.py
@@ -0,0 +1,651 @@
+#!/usr/bin/env python
+
+import re
+import gzip
+import struct
+import sys
+import time
+import getopt
+
+try:
+ import psycopg2
+ _dbenable=True
+except ImportError:
+ _dbenable=False
+
+#####
+#
+#
+# Generic file function
+#
+#
+#####
+
+def universalOpen(file):
+ if isinstance(file,str):
+ if file[-3:] == '.gz':
+ rep = gzip.open(file)
+ else:
+ rep = open(file)
+ else:
+ rep = file
+ return rep
+
+def universalTell(file):
+ if isinstance(file, gzip.GzipFile):
+ file=file.myfileobj
+ return file.tell()
+
+def fileSize(file):
+ if isinstance(file, gzip.GzipFile):
+ file=file.myfileobj
+ pos = file.tell()
+ file.seek(0,2)
+ length = file.tell()
+ file.seek(pos,0)
+ return length
+
+def progressBar(pos,max,reset=False,delta=[]):
+ if reset:
+ del delta[:]
+ if not delta:
+ delta.append(time.time())
+ delta.append(time.time())
+
+ delta[1]=time.time()
+ elapsed = delta[1]-delta[0]
+ percent = float(pos)/max * 100
+ remain = time.strftime('%H:%M:%S',time.gmtime(elapsed / percent * (100-percent)))
+ bar = '#' * int(percent/2)
+ bar+= '|/-\\-'[pos % 5]
+ bar+= ' ' * (50 - int(percent/2))
+ sys.stderr.write('\r%5.1f %% |%s] remain : %s' %(percent,bar,remain))
+
+#####
+#
+#
+# NCBI Dump Taxonomy reader
+#
+#
+#####
+
+def endLessIterator(endedlist):
+ for x in endedlist:
+ yield x
+ while(1):
+ yield endedlist[-1]
+
+class ColumnFile(object):
+
+ def __init__(self,stream,sep=None,strip=True,types=None):
+ if isinstance(stream,str):
+ self._stream = open(stream)
+ elif hasattr(stream,'next'):
+ self._stream = stream
+ else:
+ raise ValueError,'stream must be string or an iterator'
+ self._delimiter=sep
+ self._strip=strip
+ if types:
+ self._types=[x for x in types]
+ for i in xrange(len(self._types)):
+ if self._types[i] is bool:
+ self._types[i]=ColumnFile.str2bool
+ else:
+ self._types=None
+
+ def str2bool(x):
+ return bool(eval(x.strip()[0].upper(),{'T':True,'V':True,'F':False}))
+
+ str2bool = staticmethod(str2bool)
+
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ ligne = self._stream.next()
+ data = ligne.split(self._delimiter)
+ if self._strip or self._types:
+ data = [x.strip() for x in data]
+ if self._types:
+ it = endLessIterator(self._types)
+ data = [x[1](x[0]) for x in ((y,it.next()) for y in data)]
+ return data
+
+def taxonCmp(t1,t2):
+ if t1[0] < t2[0]:
+ return -1
+ elif t1[0] > t2[0]:
+ return +1
+ return 0
+
+def bsearchTaxon(taxonomy,taxid):
+ taxCount = len(taxonomy)
+ begin = 0
+ end = taxCount
+ oldcheck=taxCount
+ check = begin + end / 2
+ while check != oldcheck and taxonomy[check][0]!=taxid :
+ if taxonomy[check][0] < taxid:
+ begin=check
+ else:
+ end=check
+ oldcheck=check
+ check = (begin + end) / 2
+
+
+ if taxonomy[check][0]==taxid:
+ return check
+ else:
+ return None
+
+
+
+def readNodeTable(file):
+
+ file = universalOpen(file)
+
+ nodes = ColumnFile(file,
+ sep='|',
+ types=(int,int,str,
+ str,str,bool,
+ int,bool,int,
+ bool,bool,bool,str))
+ print >>sys.stderr,"Reading taxonomy dump file..."
+ taxonomy=[[n[0],n[2],n[1]] for n in nodes]
+ print >>sys.stderr,"List all taxonomy rank..."
+ ranks =list(set(x[1] for x in taxonomy))
+ ranks.sort()
+ ranks = dict(map(None,ranks,xrange(len(ranks))))
+
+ print >>sys.stderr,"Sorting taxons..."
+ taxonomy.sort(taxonCmp)
+
+ print >>sys.stderr,"Indexing taxonomy..."
+ index = {}
+ for t in taxonomy:
+ index[t[0]]=bsearchTaxon(taxonomy, t[0])
+
+ print >>sys.stderr,"Indexing parent and rank..."
+ for t in taxonomy:
+ t[1]=ranks[t[1]]
+ t[2]=index[t[2]]
+
+
+ return taxonomy,ranks,index
+
+def nameIterator(file):
+ file = universalOpen(file)
+ names = ColumnFile(file,
+ sep='|',
+ types=(int,str,
+ str,str))
+ for taxid,name,unique,classname,white in names:
+ yield taxid,name,classname
+
+def mergedNodeIterator(file):
+ file = universalOpen(file)
+ merged = ColumnFile(file,
+ sep='|',
+ types=(int,int,str))
+ for taxid,current,white in merged:
+ yield taxid,current
+
+def deletedNodeIterator(file):
+ file = universalOpen(file)
+ deleted = ColumnFile(file,
+ sep='|',
+ types=(int,str))
+ for taxid,white in deleted:
+ yield taxid
+
+def readTaxonomyDump(taxdir):
+ taxonomy,ranks,index = readNodeTable('%s/nodes.dmp' % taxdir)
+
+ print >>sys.stderr,"Adding scientific name..."
+
+ alternativeName=[]
+ for taxid,name,classname in nameIterator('%s/names.dmp' % taxdir):
+ alternativeName.append((name,classname,index[taxid]))
+ if classname == 'scientific name':
+ taxonomy[index[taxid]].append(name)
+
+ print >>sys.stderr,"Adding taxid alias..."
+ for taxid,current in mergedNodeIterator('%s/merged.dmp' % taxdir):
+ index[taxid]=index[current]
+
+ print >>sys.stderr,"Adding deleted taxid..."
+ for taxid in deletedNodeIterator('%s/delnodes.dmp' % taxdir):
+ index[taxid]=None
+
+ return taxonomy,ranks,alternativeName,index
+
+def readTaxonomyDB(dbname):
+ connection = psycopg2.connect(database=dbname)
+
+ cursor = connection.cursor()
+ cursor.execute("select numid,rank,parent from ncbi_taxonomy.taxon")
+ taxonomy=[list(x) for x in cursor]
+
+ cursor.execute("select rank_class from ncbi_taxonomy.taxon_rank_class order by rank_class")
+ ranks=cursor.fetchall()
+ ranks = dict(map(None,(x[0] for x in ranks),xrange(len(ranks))))
+
+ print >>sys.stderr,"Sorting taxons..."
+ taxonomy.sort(taxonCmp)
+
+ print >>sys.stderr,"Indexing taxonomy..."
+ index = {}
+ for t in taxonomy:
+ index[t[0]]=bsearchTaxon(taxonomy, t[0])
+
+ print >>sys.stderr,"Indexing parent and rank..."
+ for t in taxonomy:
+ t[1]=ranks[t[1]]
+ try:
+ t[2]=index[t[2]]
+ except KeyError,e:
+ if t[2] is None and t[0]==1:
+ t[2]=index[t[0]]
+ else:
+ raise e
+
+ cursor.execute("select taxid,name,category from ncbi_taxonomy.name")
+
+ alternativeName=[]
+ for taxid,name,classname in cursor:
+ alternativeName.append((name,classname,index[taxid]))
+ if classname == 'scientific name':
+ taxonomy[index[taxid]].append(name)
+
+ cursor.execute("select old_numid,current_numid from ncbi_taxonomy.taxon_id_alias")
+
+ print >>sys.stderr,"Adding taxid alias..."
+ for taxid,current in cursor:
+ if current is not None:
+ index[taxid]=index[current]
+ else:
+ index[taxid]=None
+
+
+ return taxonomy,ranks,alternativeName,index
+
+#####
+#
+#
+# Genbank/EMBL sequence reader
+#
+#
+#####
+
+def entryIterator(file):
+ file = universalOpen(file)
+ rep =[]
+ for ligne in file:
+ rep.append(ligne)
+ if ligne == '//\n':
+ rep = ''.join(rep)
+ yield rep
+ rep = []
+
+def fastaEntryIterator(file):
+ file = universalOpen(file)
+ rep =[]
+ for ligne in file:
+ if ligne[0] == '>' and rep:
+ rep = ''.join(rep)
+ yield rep
+ rep = []
+ rep.append(ligne)
+ if rep:
+ rep = ''.join(rep)
+ yield rep
+
+_cleanSeq = re.compile('[ \n0-9]+')
+
+def cleanSeq(seq):
+ return _cleanSeq.sub('',seq)
+
+
+_gbParseID = re.compile('(?<=^LOCUS {7})[^ ]+(?= )',re.MULTILINE)
+_gbParseDE = re.compile('(?<=^DEFINITION {2}).+?\. *$(?=[^ ])',re.MULTILINE+re.DOTALL)
+_gbParseSQ = re.compile('(?<=^ORIGIN).+?(?=^//$)',re.MULTILINE+re.DOTALL)
+_gbParseTX = re.compile('(?<= /db_xref="taxon:)[0-9]+(?=")')
+
+def genbankEntryParser(entry):
+ Id = _gbParseID.findall(entry)[0]
+ De = ' '.join(_gbParseDE.findall(entry)[0].split())
+ Sq = cleanSeq(_gbParseSQ.findall(entry)[0].upper())
+ try:
+ Tx = int(_gbParseTX.findall(entry)[0])
+ except IndexError:
+ Tx = None
+ return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}
+
+######################
+
+_cleanDef = re.compile('[\nDE]')
+
+def cleanDef(definition):
+ return _cleanDef.sub('',definition)
+
+_emblParseID = re.compile('(?<=^ID {3})[^ ]+(?=;)',re.MULTILINE)
+_emblParseDE = re.compile('(?<=^DE {3}).+?\. *$(?=[^ ])',re.MULTILINE+re.DOTALL)
+_emblParseSQ = re.compile('(?<=^ ).+?(?=^//$)',re.MULTILINE+re.DOTALL)
+_emblParseTX = re.compile('(?<= /db_xref="taxon:)[0-9]+(?=")')
+
+def emblEntryParser(entry):
+ Id = _emblParseID.findall(entry)[0]
+ De = ' '.join(cleanDef(_emblParseDE.findall(entry)[0]).split())
+ Sq = cleanSeq(_emblParseSQ.findall(entry)[0].upper())
+ try:
+ Tx = int(_emblParseTX.findall(entry)[0])
+ except IndexError:
+ Tx = None
+ return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}
+
+
+######################
+
+_fastaSplit=re.compile(';\W*')
+
+def parseFasta(seq):
+ seq=seq.split('\n')
+ title = seq[0].strip()[1:].split(None,1)
+ id=title[0]
+ if len(title) == 2:
+ field = _fastaSplit.split(title[1])
+ else:
+ field=[]
+ info = dict(x.split('=',1) for x in field if '=' in x)
+ definition = ' '.join([x for x in field if '=' not in x])
+ seq=(''.join([x.strip() for x in seq[1:]])).upper()
+ return id,seq,definition,info
+
+
+def fastaEntryParser(entry):
+ id,seq,definition,info = parseFasta(entry)
+ Tx = info.get('taxid',None)
+ if Tx is not None:
+ Tx=int(Tx)
+ return {'id':id,'taxid':Tx,'definition':definition,'sequence':seq}
+
+
+def sequenceIteratorFactory(entryParser,entryIterator):
+ def sequenceIterator(file):
+ for entry in entryIterator(file):
+ yield entryParser(entry)
+ return sequenceIterator
+
+
+def taxonomyInfo(entry,connection):
+ taxid = entry['taxid']
+ curseur = connection.cursor()
+ curseur.execute("""
+ select taxid,species,genus,family,
+ taxonomy.scientificName(taxid) as sn,
+ taxonomy.scientificName(species) as species_sn,
+ taxonomy.scientificName(genus) as genus_sn,
+ taxonomy.scientificName(family) as family_sn
+ from
+ (
+ select alias as taxid,
+ taxonomy.getSpecies(alias) as species,
+ taxonomy.getGenus(alias) as genus,
+ taxonomy.getFamily(alias) as family
+ from taxonomy.aliases
+ where id=%d ) as tax
+ """ % taxid)
+ rep = curseur.fetchone()
+ entry['current_taxid']=rep[0]
+ entry['species']=rep[1]
+ entry['genus']=rep[2]
+ entry['family']=rep[3]
+ entry['scientific_name']=rep[4]
+ entry['species_sn']=rep[5]
+ entry['genus_sn']=rep[6]
+ entry['family_sn']=rep[7]
+ return entry
+
+#####
+#
+#
+# Binary writer
+#
+#
+#####
+
+def ecoSeqPacker(sq):
+
+ compactseq = gzip.zlib.compress(sq['sequence'],9)
+ cptseqlength = len(compactseq)
+ delength = len(sq['definition'])
+
+ totalSize = 4 + 20 + 4 + 4 + 4 + cptseqlength + delength
+
+ packed = struct.pack('> I I 20s I I I %ds %ds' % (delength,cptseqlength),
+ totalSize,
+ sq['taxid'],
+ sq['id'],
+ delength,
+ len(sq['sequence']),
+ cptseqlength,
+ sq['definition'],
+ compactseq)
+
+ assert len(packed) == totalSize+4, "error in sequence packing"
+
+ return packed
+
+def ecoTaxPacker(tx):
+
+ namelength = len(tx[3])
+
+ totalSize = 4 + 4 + 4 + 4 + namelength
+
+ packed = struct.pack('> I I I I I %ds' % namelength,
+ totalSize,
+ tx[0],
+ tx[1],
+ tx[2],
+ namelength,
+ tx[3])
+
+ return packed
+
+def ecoRankPacker(rank):
+
+ namelength = len(rank)
+
+ packed = struct.pack('> I %ds' % namelength,
+ namelength,
+ rank)
+
+ return packed
+
+def ecoNamePacker(name):
+
+ namelength = len(name[0])
+ classlength= len(name[1])
+ totalSize = namelength + classlength + 4 + 4 + 4 + 4
+
+ packed = struct.pack('> I I I I I %ds %ds' % (namelength,classlength),
+ totalSize,
+ int(name[1]=='scientific name'),
+ namelength,
+ classlength,
+ name[2],
+ name[0],
+ name[1])
+
+ return packed
+
+def ecoSeqWriter(file,input,taxindex,parser):
+ output = open(file,'wb')
+ input = universalOpen(input)
+ inputsize = fileSize(input)
+ entries = parser(input)
+ seqcount=0
+ skipped = []
+
+ output.write(struct.pack('> I',seqcount))
+
+ progressBar(1, inputsize,reset=True)
+ for entry in entries:
+ if entry['taxid'] is not None:
+ try:
+ entry['taxid']=taxindex[entry['taxid']]
+ except KeyError:
+ entry['taxid']=None
+ if entry['taxid'] is not None:
+ seqcount+=1
+ output.write(ecoSeqPacker(entry))
+ else:
+ skipped.append(entry['id'])
+ where = universalTell(input)
+ progressBar(where, inputsize)
+ print >>sys.stderr," Readed sequences : %d " % seqcount,
+ else:
+ skipped.append(entry['id'])
+
+ print >>sys.stderr
+ output.seek(0,0)
+ output.write(struct.pack('> I',seqcount))
+
+ output.close()
+ return skipped
+
+
+def ecoTaxWriter(file,taxonomy):
+ output = open(file,'wb')
+ output.write(struct.pack('> I',len(taxonomy)))
+
+ for tx in taxonomy:
+ output.write(ecoTaxPacker(tx))
+
+ output.close()
+
+def ecoRankWriter(file,ranks):
+ output = open(file,'wb')
+ output.write(struct.pack('> I',len(ranks)))
+
+ rankNames = ranks.keys()
+ rankNames.sort()
+
+ for rank in rankNames:
+ output.write(ecoRankPacker(rank))
+
+ output.close()
+
+def nameCmp(n1,n2):
+ name1=n1[0].upper()
+ name2=n2[0].upper()
+ if name1 < name2:
+ return -1
+ elif name1 > name2:
+ return 1
+ return 0
+
+
+def ecoNameWriter(file,names):
+ output = open(file,'wb')
+ output.write(struct.pack('> I',len(names)))
+
+ names.sort(nameCmp)
+
+ for name in names:
+ output.write(ecoNamePacker(name))
+
+ output.close()
+
+def ecoDBWriter(prefix,taxonomy,seqFileNames,parser):
+
+ ecoRankWriter('%s.rdx' % prefix, taxonomy[1])
+ ecoTaxWriter('%s.tdx' % prefix, taxonomy[0])
+ ecoNameWriter('%s.ndx' % prefix, taxonomy[2])
+
+ filecount = 0
+ for filename in seqFileNames:
+ filecount+=1
+ sk=ecoSeqWriter('%s_%03d.sdx' % (prefix,filecount),
+ filename,
+ taxonomy[3],
+ parser)
+ if sk:
+ print >>sys.stderr,"Skipped entry :"
+ print >>sys.stderr,sk
+
+def ecoParseOptions(arguments):
+ opt = {
+ 'prefix' : 'ecodb',
+ 'taxdir' : 'taxdump',
+ 'parser' : sequenceIteratorFactory(genbankEntryParser,
+ entryIterator)
+ }
+
+ o,filenames = getopt.getopt(arguments,
+ 'ht:T:n:gfe',
+ ['help',
+ 'taxonomy=',
+ 'taxonomy_db=',
+ 'name=',
+ 'genbank',
+ 'fasta',
+ 'embl'])
+
+ for name,value in o:
+ if name in ('-h','--help'):
+ printHelp()
+ exit()
+ elif name in ('-t','--taxonomy'):
+ opt['taxmod']='dump'
+ opt['taxdir']=value
+ elif name in ('-T','--taxonomy_db'):
+ opt['taxmod']='db'
+ opt['taxdb']=value
+ elif name in ('-n','--name'):
+ opt['prefix']=value
+ elif name in ('-g','--genbank'):
+ opt['parser']=sequenceIteratorFactory(genbankEntryParser,
+ entryIterator)
+
+ elif name in ('-f','--fasta'):
+ opt['parser']=sequenceIteratorFactory(fastaEntryParser,
+ fastaEntryIterator)
+
+ elif name in ('-e','--embl'):
+ opt['parser']=sequenceIteratorFactory(emblEntryParser,
+ entryIterator)
+ else:
+ raise ValueError,'Unknown option %s' % name
+
+ return opt,filenames
+
+def printHelp():
+ print "-----------------------------------"
+ print " ecoPCRFormat.py"
+ print "-----------------------------------"
+ print "ecoPCRFormat.py [option] <argument>"
+ print "-----------------------------------"
+ print "-e --embl :[E]mbl format"
+ print "-f --fasta :[F]asta format"
+ print "-g --genbank :[G]enbank format"
+ print "-h --help :[H]elp - print this help"
+ print "-n --name :[N]ame of the new database created"
+ print "-t --taxonomy :[T]axonomy - path to the taxonomy database"
+ print " :bcp-like dump from GenBank taxonomy database."
+ print "-----------------------------------"
+
+if __name__ == '__main__':
+
+ opt,filenames = ecoParseOptions(sys.argv[1:])
+
+ if opt['taxmod']=='dump':
+ taxonomy = readTaxonomyDump(opt['taxdir'])
+ elif opt['taxmod']=='db':
+ taxonomy = readTaxonomyDB(opt['taxdb'])
+
+
+ ecoDBWriter(opt['prefix'], taxonomy, filenames, opt['parser'])
+
diff --git a/tools/ecoSort.py b/tools/ecoSort.py
new file mode 100755
index 0000000..c7d6ec3
--- /dev/null
+++ b/tools/ecoSort.py
@@ -0,0 +1,811 @@
+#!/usr/bin/env python
+
+import struct
+import sys
+import os
+import gzip
+import re
+import string
+
+from reportlab.graphics.shapes import *
+from reportlab.graphics.charts.barcharts import VerticalBarChart
+from reportlab.graphics.charts.piecharts import Pie
+from reportlab.lib.styles import getSampleStyleSheet
+from reportlab.lib.units import cm
+from reportlab.lib import colors
+from reportlab.platypus import *
+
+
+#####
+#
+# Generic file function
+#
+#####
+
+class Filter(object):
+ """
+ This object provides different iterators and method :
+ * findTaxonByTaxid
+ * subTreeIterator
+ * parentalTreeIterator
+ * ecoPCRResultIterator
+ * rankFilter
+ * lastCommonTaxon
+
+ see each method for more informations
+ """
+
+ def __init__(self,path):
+ self._path = path
+ self._taxonFile = "%s.tdx" % self._path
+ self._ranksFile = "%s.rdx" % self._path
+ self._namesFile = "%s.ndx" % self._path
+ self._taxonomy, self._index, self._ranks, self._name = self.__readNodeTable()
+
+
+ def __universalOpen(self,file):
+ if isinstance(file,str):
+ if file[-3:] == '.gz':
+ rep = gzip.open(file)
+ else:
+ rep = open(file)
+ else:
+ rep = file
+ return rep
+
+ def __universalTell(self,file):
+ if isinstance(file, gzip.GzipFile):
+ file=file.myfileobj
+ return file.tell()
+
+ def __fileSize(self,file):
+ if isinstance(file, gzip.GzipFile):
+ file=file.myfileobj
+ pos = file.tell()
+ file.seek(0,2)
+ length = file.tell()
+ file.seek(pos,0)
+ return length
+
+ def __progressBar(self,pos,max,reset=False,delta=[]):
+ if reset:
+ del delta[:]
+ if not delta:
+ delta.append(time.time())
+ delta.append(time.time())
+
+ delta[1]=time.time()
+ elapsed = delta[1]-delta[0]
+ percent = float(pos)/max * 100
+ remain = time.strftime('%H:%M:%S',time.gmtime(elapsed / percent * (100-percent)))
+ bar = '#' * int(percent/2)
+ bar+= '|/-\\-'[pos % 5]
+ bar+= ' ' * (50 - int(percent/2))
+ sys.stderr.write('\r%5.1f %% |%s] remain : %s' %(percent,bar,remain))
+
+
+
+
+ #####
+ #
+ # Iterator functions
+ #
+ #####
+
+
+
+ def __ecoRecordIterator(self,file):
+ file = self.__universalOpen(file)
+ (recordCount,) = struct.unpack('> I',file.read(4))
+
+ for i in xrange(recordCount):
+ (recordSize,)=struct.unpack('>I',file.read(4))
+ record = file.read(recordSize)
+ yield record
+
+
+ def __ecoNameIterator(self):
+ for record in self.__ecoRecordIterator(self._namesFile):
+ lrecord = len(record)
+ lnames = lrecord - 16
+ (isScientificName,namelength,classLength,indextaxid,names)=struct.unpack('> I I I I %ds' % lnames, record)
+ name=names[:namelength]
+ classname=names[namelength:]
+ yield (name,classname,indextaxid)
+
+
+ def __ecoTaxonomicIterator(self):
+ for record in self.__ecoRecordIterator(self._taxonFile):
+ lrecord = len(record)
+ lnames = lrecord - 16
+ (taxid,rankid,parentidx,nameLength,name)=struct.unpack('> I I I I %ds' % lnames, record)
+ yield (taxid,rankid,parentidx,name)
+
+
+ def __ecoSequenceIterator(self,file):
+ for record in self.__ecoRecordIterator(file):
+ lrecord = len(record)
+ lnames = lrecord - (4*4+20)
+ (taxid,seqid,deflength,seqlength,cptseqlength,string)=struct.unpack('> I 20s I I I %ds' % lnames, record)
+ de = string[:deflength]
+ seq = gzip.zlib.decompress(string[deflength:])
+ yield (taxid,seqid,deflength,seqlength,cptseqlength,de,seq)
+
+
+ def __ecoRankIterator(self):
+ for record in self.__ecoRecordIterator(self._ranksFile):
+ yield record
+
+
+ #####
+ #
+ # Indexes
+ #
+ #####
+
+ def __ecoNameIndex(self):
+ indexName = [x for x in self.__ecoNameIterator()]
+ return indexName
+
+ def __ecoRankIndex(self):
+ rank = [r for r in self.__ecoRankIterator()]
+ return rank
+
+ def __ecoTaxonomyIndex(self):
+ taxonomy = []
+ index = {}
+ i = 0;
+ for x in self.__ecoTaxonomicIterator():
+ taxonomy.append(x)
+ index[x[0]] = i
+ i = i + 1
+ return taxonomy, index
+
+ def __readNodeTable(self):
+ taxonomy, index = self.__ecoTaxonomyIndex()
+ ranks = self.__ecoRankIndex()
+ name = self.__ecoNameIndex()
+ return taxonomy,index,ranks,name
+
+
+ #####
+ #
+ # PUBLIC METHODS
+ #
+ #####
+
+ def findTaxonByTaxid(self,taxid):
+ """
+ Returns a list containing [taxid,rankid,parent_index,nameLength,name]
+ It takes one argument : a taxonomic id
+ """
+ return self._taxonomy[self._index[taxid]]
+
+
+ def subTreeIterator(self, taxid):
+ """
+ Returns subtree for given taxid from first child
+ to last child. It takes one argument : a taxonomic id
+ """
+ idx = self._index[taxid]
+ yield self._taxonomy[idx]
+ for t in self._taxonomy:
+ if t[2] == idx:
+ for subt in self.subTreeIterator(t[0]):
+ yield subt
+
+
+ def parentalTreeIterator(self, taxid):
+ """
+ return parental tree for given taxonomic id starting from
+ first ancester to the root.
+ """
+ taxon=self.findTaxonByTaxid(taxid)
+ while taxon[2]!= 0:
+ yield taxon
+ taxon = self._taxonomy[taxon[2]]
+ yield self._taxonomy[0]
+
+
+ def ecoPCRResultIterator(self, file):
+ """
+ iteration on ecoPCR result file"
+ It returns a dictionnary
+ """
+ file = self.__universalOpen(file)
+ data = ColumnFile(file,
+ sep='|',
+ types=(str,int,int,
+ str,int,str,
+ int,str,int,
+ str,int,str,
+ str,str,int,
+ str,int,int,
+ str,str),skip='#')
+
+
+ for ac, sq_len, taxid,\
+ rank, sp_taxid, species,\
+ ge_taxid, genus, fa_taxid,\
+ family, sk_taxid, s_kgdom,\
+ strand, oligo_1, error_1,\
+ oligo_2, error_2, amp_len,\
+ sq_des, definition in data:
+
+ yield {'ac':ac, 'sq_len':sq_len, 'taxid':taxid,
+ 'rank':rank, 'sp_taxid':sp_taxid, 'species':species,
+ 'ge_taxid':ge_taxid, 'genus':genus, 'fa_taxid':fa_taxid,
+ 'family':family, 'sk_taxid':sk_taxid, 's_kgdom':s_kgdom,
+ 'strand':strand, 'oligo_1':oligo_1, 'error_1':error_1,
+ 'oligo_2':oligo_2, 'error_2':error_2, 'amp_len':amp_len,
+ 'sq_des':sq_des, 'definition':definition}
+
+ def rankFilter(self,rankid,filter):
+ """
+ boolean telling whether rankid match filter
+ takes 2 arguments :
+ 1- rankid
+ 2- filter (i.e genus)
+ """
+ return self._ranks[rankid] == filter
+
+
+ def lastCommonTaxon(self,taxid_1, taxid_2):
+ """
+ returns a last common parent for two given taxon.
+ It starts from the root and goes down the tree
+ until their parents diverge.
+ It takes 2 arguments :
+ 1- taxid 1
+ 2- taxid 2
+ """
+ t1 = [x[0] for x in self.parentalTreeIterator(taxid_1)]
+ t2 = [x[0] for x in self.parentalTreeIterator(taxid_2)]
+ t1.reverse()
+ t2.reverse()
+ count = t1 < t2 and len(t1) or len(t2)
+ for i in range(count):
+ if t1[i] != t2[i]:
+ return t1[i-1]
+ return t1[count-1]
+
+
+
+
+
+class ColumnFile(object):
+ """
+ cut an ecoPCR file into a list
+ """
+ def __init__(self,stream,sep=None,strip=True,types=None,skip=None):
+ if isinstance(stream,str):
+ self._stream = open(stream)
+ elif hasattr(stream,'next'):
+ self._stream = stream
+ else:
+ raise ValueError,'stream must be string or an iterator'
+ self._delimiter=sep
+ self._strip=strip
+ if types:
+ self._types=[x for x in types]
+ for i in xrange(len(self._types)):
+ if self._types[i] is bool:
+ self._types[i]=ColumnFile.str2bool
+ else:
+ self._types=None
+ self._skip = skip
+ self._oligo = {}
+
+ def str2bool(x):
+ return bool(eval(x.strip()[0].upper(),{'T':True,'V':True,'F':False}))
+
+ str2bool = staticmethod(str2bool)
+
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ ligne = self._stream.next()
+ while ligne[0] == self._skip:
+ ligne = self._stream.next()
+ data = ligne.split(self._delimiter)
+ if self._strip or self._types:
+ data = [x.strip() for x in data]
+ if self._types:
+ it = self.endLessIterator(self._types)
+ data = [x[1](x[0]) for x in ((y,it.next()) for y in data)]
+ return data
+
+ def endLessIterator(self,endedlist):
+ for x in endedlist:
+ yield x
+ while(1):
+ yield endedlist[-1]
+
+ def getOligo(self,line):
+ if line[2:8] == 'direct':
+ r = re.compile('(?<=direct strand oligo1 : )[A-Z]+(?= *)')
+ self._oligo['o1'] = r.findall(line)
+ if line[2:9] == 'reverse':
+ r = re.compile('(?<=reverse strand oligo2 : )[A-Z]+(?= *)')
+ self._oligo['o2'] = r.findall(line)
+ return None
+
+
+
+
+###########
+#
+# DATA STRUCTURE
+#
+###########
+
+
+class ecoTable(list):
+ """
+ Data object inheriting from list
+ """
+ def __init__(self, headers, types):
+ list.__init__(self)
+ self.headers = headers
+ self.types = types
+
+
+ def __setitem__ (self,key,value):
+ """
+ method overloaded to check data types
+ """
+ for e in range(len(value)):
+ value[e] = self.types[e](value[e])
+ list.__setitem__(self,key,value)
+
+ def __getitem__(self,index):
+ newtable = ecoTable(self.headers,self.types)
+ if isinstance(index,slice):
+ newtable.extend(list.__getitem__(self,index))
+ else:
+ newtable.append(list.__getitem__(self,index))
+
+ return newtable
+
+ def getColumns(self,columnList):
+ newhead = [self.headers[x] for x in columnList]
+ newtype = [self.types[x] for x in columnList]
+ newtable = ecoTable(newhead,newtype)
+ for line in self:
+ newtable.append([line[x] for x in columnList])
+
+ return newtable
+
+
+###########
+#
+# PARSE FUNCTIONS
+#
+###########
+
+def _parseOligoResult(filter,file,strand):
+ seq = {}
+
+ if strand == 'direct':
+ key = 'oligo_1'
+ elif strand == 'reverse':
+ key = 'oligo_2'
+
+ for s in filter.ecoPCRResultIterator(file):
+ o = s[key]
+ taxid = s['taxid']
+ if not seq.has_key(o):
+ seq[o] = [1,taxid]
+ else:
+ seq[o][0] = seq[o][0] + 1
+ seq[o][1] = filter.lastCommonTaxon(seq[o][1],taxid)
+ return seq
+
+
+def _parseTaxonomyResult(table):
+ tax = {}
+ for l in table:
+ taxid = l[2]
+ scName = l[3]
+ count = l[1]
+ if not tax.has_key(taxid):
+ tax[taxid] = [1,scName,count]
+ else:
+ tax[taxid][0] = tax[taxid][0] + 1
+ tax[taxid][2] = tax[taxid][2] + count
+ return tax
+
+
+def _sortTable(e1,e2):
+ e1 = e1[1]
+ e2 = e2[1]
+ if e1 < e2:
+ return 1
+ if e1 > e2:
+ return -1
+ return 0
+
+
+def _countOligoMismatch(o1,o2):
+ """
+ define mismatch between two oligonucleotids
+ return number of mismatch
+ """
+ mmatch = 0
+ if len(o1) < len(o2):
+ mmatch = int(len(o2) - len(o1))
+ for i in range(len(o1)):
+ try:
+ if o1[i] != o2[i]:
+ mmatch = mmatch + 1
+ except:
+ mmatch = mmatch + 1
+
+ return mmatch
+
+###########
+#
+# TOOLS FUNCTIONS
+#
+###########
+
+def customSort(table,x,y):
+ """
+
+ """
+ x = x-1
+ y = y-1
+ h = (table.headers[x],table.headers[y])
+ t = (table.types[x],table.types[y])
+ cTable = ecoTable(h,t)
+
+ tmp = {}
+
+ for l in table:
+ if tmp.has_key(l[x]):
+ tmp[l[x]] = tmp[l[x]] + l[y]
+ else:
+ tmp[l[x]] = l[y]
+
+ for k,v in tmp.items():
+ cTable.append([k,v])
+
+ return cTable
+
+
+def countColumnOccurrence(table,x):
+ x = x-1
+ h = (table.headers[x],"count")
+ t = (table.types[x],int)
+ cTable = Table(h,t)
+
+ tmp = {}
+
+ for l in table:
+ if tmp.has_key(l[x]):
+ tmp[l[x]] = tmp[l[x]] + 1
+ else:
+ tmp[l[x]] = 1
+
+ for k,v in tmp.items():
+ cTable.append([k,v])
+
+ return cTable
+
+
+def buildSpecificityTable(table):
+ header = ("mismatch","taxon","count")
+ type = (int,str,int)
+ speTable = ecoTable(header,type)
+
+ tmp = {}
+ for l in table:
+ if not tmp.has_key(l[5]):
+ tmp[l[5]] = {}
+ if not tmp[l[5]].has_key(l[3]):
+ tmp[l[5]][l[3]] = l[1]
+ else:
+ tmp[l[5]][l[3]] = tmp[l[5]][l[3]] + l[1]
+
+ for mismatch in tmp.items():
+ for taxon,count in mismatch[1].items():
+ speTable.append([mismatch[0],taxon,count])
+
+ return speTable
+
+
+def buildOligoTable(table, file, filter, oligoRef, strand='direct'):
+ """
+ Fills and sorts a Table object with ecoPCR result file
+
+ Takes 4 arguments
+ 1- Table object
+ 2- ecoPCR result file path
+ 3- Filter Object
+ 4- the oligo used in ecoPCR
+ 5- the oligo type : direct or reverse
+
+ """
+ seq = _parseOligoResult(filter, file, strand)
+
+ i = 0
+ for oligo, info in seq.items():
+ table.append(0)
+ count, lctTaxid = info[0], info[1]
+ scName = filter.findTaxonByTaxid(info[1])[3]
+ rank = filter._ranks[filter.findTaxonByTaxid(info[1])[1]]
+ mismatch = _countOligoMismatch(oligoRef,oligo)
+ table[i]=[oligo,count,lctTaxid,scName,rank,mismatch]
+ i = i + 1
+
+ table.sort(_sortTable)
+
+
+def buildTaxonomicTable(table):
+ """
+ Fill a Table object with a taxonomic synthesis
+ """
+ taxHeaders = ("scName","numOfOligo","numOfAmpl","taxid")
+ taxTypes = (str, int, int, int)
+ taxTable = ecoTable(taxHeaders, taxTypes)
+
+ tax = _parseTaxonomyResult(table)
+
+ i = 0
+ for taxid, info in tax.items():
+ taxTable.append(0)
+ numOfOligo, scName, numOfAmpl = info[0], info[1], info[2]
+ taxTable[i]=[scName,numOfOligo,numOfAmpl,taxid]
+ i = i + 1
+
+ taxTable.sort(_sortTable)
+
+ return taxTable
+
+def _parseSequenceResult(filter,file,id):
+ sequences = {}
+ idIndex = {}
+
+ if id == 'family':
+ key = 'fa_taxid'
+ elif id == 'genus':
+ key = 'ge_taxid'
+ else:
+ key = 'taxid'
+
+ for s in filter.ecoPCRResultIterator(file):
+ seq = s['sq_des']
+ id = s[key]
+ if not idIndex.has_key(id):
+ idIndex[id] = []
+ if not sequences.has_key(seq):
+ sequences[seq] = [id]
+ else:
+ sequences[seq].append(id)
+ return sequences, idIndex
+
+
+def _sameValuesInList(array):
+ for i in range(len(array)-1):
+ if array[i] != array[i+1]:
+ return False
+ return True
+
+
+def _sortSequences(file,filter):
+
+ sequences, idIndex = _parseSequenceResult(filter,file,'species')
+
+ for s,id in sequences.items():
+ if len(id) == 1 or _sameValuesInList(id):
+ idIndex[id[0]].append(1)
+ else:
+ for e in id:
+ idIndex[e].append(0)
+
+
+ for id,values in idIndex.items():
+ idIndex[id] = float(values.count(1)) / float(len(values)) * 100
+
+
+ identified = {}
+ non_identified = {}
+ ambiguous = {}
+
+ return sequences, idIndex
+
+def getIntraSpeciesDiversity(table,file,filter):
+
+ intraDiv = {}
+
+ seq, idIndex = _sortSequences(file,filter)
+
+ for id,percent in idIndex.items():
+ if percent == 100:
+ intraDiv[id] = [0,[]]
+ for seq,idList in sequences.items():
+ if id in idList:
+ intraDiv[id][0] = intraDiv[id][0] + 1
+ intraDiv[id][1].append(seq)
+
+ for id, values in intraDiv.items():
+ table.append(id,values[0],values[1])
+
+
+
+###########
+#
+# OUTPUT FUNCTIONS
+#
+###########
+
+def printTable(table):
+ """
+ Displays the content a of Table object
+ Take 1 arguments
+ 1- Table object
+ """
+
+ format = ("%20s | " * len(table.headers))[:-3]
+ print format % tuple([str(e) for e in table.headers ]) +"\n" + "-"*23*len(table.headers)
+ for l in table:
+ print format % tuple([str(e) for e in l ])
+ print "# %d results" % len(table)
+
+
+def saveAsCSV(table,path):
+ """
+ Creates a csv file from a Table object
+ Takes 2 arguments
+ 1- Table object
+ 2- path of the file-to-be
+ """
+ file = open(path,'w')
+ file.write(','.join([str(e) for e in table.headers ]) + "\n")
+ for l in table:
+ file.write(','.join([str(e) for e in l ]) + "\n")
+ file.close()
+
+
+def grepTable(table,col,pattern):
+ """
+ Filters a Table object with regular expression
+ Takes 3 arguments :
+ 1- Table object
+ 2- number of column to match with
+ 3- regular expression pattern
+
+ Returns a Table object
+ """
+ col = col -1
+ p = re.compile(pattern, re.IGNORECASE)
+ out = ecoTable(table.headers,table.types)
+ for l in table:
+ if p.search(l[col]):
+ out.append(l)
+ return out
+
+
+###########
+#
+# GRAPH FUNCTIONS
+#
+###########
+
+class EcoGraph(object):
+
+ def __init__(self):
+ self._styles = getSampleStyleSheet()
+
+ self._element = []
+ self._element.append(self._box(Paragraph("EcoPCR report", self._styles['Title'])))
+ self._element.append(Spacer(0, 0.5 * cm))
+
+ def _box(self,flt, center=True):
+ box_style = [('BOX', (0, 0), (-1, -1), 0.5, colors.lightgrey)]
+ if center:
+ box_style += [('ALIGN', (0, 0), (-1, -1), 'CENTER')]
+ return Table([[flt]], style=box_style)
+
+ def _addChart(self,chart,title):
+ drawing = Drawing(300, 250)
+ drawing.add(chart)
+ self._element.append(self._box(Paragraph(title, self._styles['Normal'])))
+ self._element.append(self._box(drawing))
+ self._element.append(Spacer(0, 0.5 * cm))
+
+ def _formatData(self,table):
+ data, label = [],[]
+ for i in range(len(table)):
+ label.append(table[i][0])
+ data.append(table[i][1])
+ return data, label
+
+ def makePie(self, table, title):
+ data, label = self._formatData(table)
+ pie = Pie()
+ pie.x = 100
+ pie.y = 100
+ pie.data = data
+ pie.labels = label
+ self._addChart(pie, title)
+
+ def makeHistogram(self, table, title):
+ data, label = self._formatData(table)
+ data = [tuple(data)]
+
+ histo = VerticalBarChart()
+ histo.x = 10
+ histo.y = 70
+ histo.height = 150
+ histo.width = 300
+ histo.bars.strokeWidth = 1
+ histo.barSpacing = 1
+ histo.barLabels.dy = 5
+ histo.barLabelFormat = '%d'
+ histo.barLabels.fontSize = 9 - (len(data[0])/10)
+ histo.data = data
+
+ histo.categoryAxis.labels.boxAnchor = 'e'
+ histo.categoryAxis.labels.textAnchor = 'start'
+ histo.categoryAxis.labels.dx = -40
+ histo.categoryAxis.labels.dy = -50
+ histo.categoryAxis.labels.angle = 45
+ histo.categoryAxis.labels.width = 10
+ histo.categoryAxis.labels.height = 4
+ histo.categoryAxis.categoryNames = label
+ histo.categoryAxis.strokeWidth = 1
+ histo.categoryAxis.labels.fontSize = 8
+
+ histo.valueAxis.valueMin = min(data[0])*0.7
+ histo.valueAxis.valueMax = max(data[0])
+ step = (max(data[0]) - min(data[0])) / 10
+ histo.valueAxis.valueStep = step > 1 and step or 1
+
+ self._addChart(histo, title)
+
+ def makeReport(self,path):
+ doc = SimpleDocTemplate(path)
+ doc.build(self._element)
+
+
+######################
+
+
+def init():
+ file = "/Users/bessiere/Documents/workspace/ecoPCR/src/toto.tmp"
+ oligo = {'o1':'ATGTTTAAAA','o2':'ATGGGGGTATTG'}
+
+ filter = Filter("/ecoPCRDB/gbmam")
+
+ headers = ("oligo", "Num", "LCT Taxid", "Sc Name", "Rank", "distance")
+ types = (str, int, int, str, str, int)
+
+ o1Table = ecoTable(headers, types)
+ o2Table = ecoTable(headers, types)
+
+ buildOligoTable(o1Table, file, filter, oligo['o1'], 'direct')
+ buildOligoTable(o2Table, file, filter, oligo['o2'], 'reverse')
+
+
+ taxTable = buildTaxonomicTable(o1Table)
+ speTable = buildSpecificityTable(o1Table)
+
+ return o1Table, o2Table, taxTable
+
+
+
+def start():
+ file = "/Users/bessiere/Documents/workspace/ecoPCR/src/toto.tmp"
+ filter = Filter("/ecoPCRDB/gbmam")
+
+ speHeaders = ("taxid","num of seq","list of seq")
+ speTypes = (int,int,list)
+ speTable = ecoTable(speHeaders,speTypes)
+
+ getIntraSpeciesDiversity(speTable, file, filter)
+
+
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/ecopcr.git
More information about the debian-med-commit
mailing list