[med-svn] [malt] 01/02: New upstream version 0.0+20160916
Andreas Tille
tille at debian.org
Tue Oct 11 18:35:32 UTC 2016
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository malt.
commit 44f21de8cd606588500a5f00cade38aa1ffc4699
Author: Andreas Tille <tille at debian.org>
Date: Tue Oct 11 20:32:43 2016 +0200
New upstream version 0.0+20160916
---
.gitignore | 30 +
LICENSE | 539 +++++++++
README.md | 3 +
antbuild/build.xml | 95 ++
installer/License.txt | 18 +
installer/malt.install4j | 1496 +++++++++++++++++++++++
installer/malt2.install4j | 1566 +++++++++++++++++++++++++
resources/files/megan5PublicKey.txt | Bin 0 -> 443 bytes
resources/icons/malt-build.icns | Bin 0 -> 10383 bytes
resources/icons/malt-build16.png | Bin 0 -> 3991 bytes
resources/icons/malt-build32.png | Bin 0 -> 4263 bytes
resources/icons/malt-build48.pdf | Bin 0 -> 62140 bytes
resources/icons/malt-build48.png | Bin 0 -> 5475 bytes
resources/icons/malt-run.icns | Bin 0 -> 9737 bytes
resources/icons/malt-run16.png | Bin 0 -> 4029 bytes
resources/icons/malt-run32.png | Bin 0 -> 4200 bytes
resources/icons/malt-run48.pdf | Bin 0 -> 36102 bytes
resources/icons/malt-run48.png | Bin 0 -> 4421 bytes
resources/log4j.properties | 58 +
src/malt/AlignmentEngine.java | 649 ++++++++++
src/malt/DataForInnerLoop.java | 227 ++++
src/malt/ITextProducer.java | 28 +
src/malt/MaltBuild.java | 246 ++++
src/malt/MaltOptions.java | 367 ++++++
src/malt/MaltRun.java | 584 +++++++++
src/malt/Notes | 23 +
src/malt/TestIO.java | 326 +++++
src/malt/Version.java | 29 +
src/malt/align/AlignerOptions.java | 217 ++++
src/malt/align/BandedAligner.java | 1407 ++++++++++++++++++++++
src/malt/align/BlastStatisticsHelper.java | 216 ++++
src/malt/align/DNAScoringMatrix.java | 52 +
src/malt/align/IScoringMatrix.java | 42 +
src/malt/align/ProteinScoringMatrix.java | 373 ++++++
src/malt/align/SimpleAligner4DNA.java | 222 ++++
src/malt/analysis/OrganismsProfile.java | 205 ++++
src/malt/analysis/OrganismsProfileMerger.java | 301 +++++
src/malt/analysis/QueryItem.java | 50 +
src/malt/analysis/ReadMatchItem.java | 38 +
src/malt/data/BuildRow.java | 125 ++
src/malt/data/DNA5.java | 236 ++++
src/malt/data/IAlphabet.java | 80 ++
src/malt/data/INormalizer.java | 34 +
src/malt/data/ISequenceAccessor.java | 34 +
src/malt/data/ProteinAlphabet.java | 141 +++
src/malt/data/QuerySequence2MatchesCache.java | 268 +++++
src/malt/data/ReadMatch.java | 170 +++
src/malt/data/ReducedAlphabet.java | 219 ++++
src/malt/data/RefIndex2ClassId.java | 157 +++
src/malt/data/ReferencesDBAccess.java | 187 +++
src/malt/data/ReferencesDBBuilder.java | 292 +++++
src/malt/data/ReferencesHashTableAccess.java | 301 +++++
src/malt/data/ReferencesHashTableBuilder.java | 504 ++++++++
src/malt/data/Row.java | 117 ++
src/malt/data/SeedMatch.java | 140 +++
src/malt/data/SeedShape.java | 214 ++++
src/malt/data/SequenceType.java | 65 +
src/malt/data/Translator.java | 90 ++
src/malt/genes/GeneItem.java | 189 +++
src/malt/genes/GeneTableAccess.java | 209 ++++
src/malt/genes/GeneTableBuilder.java | 467 ++++++++
src/malt/io/BlastTextHelper.java | 111 ++
src/malt/io/FastAFileIteratorBytes.java | 254 ++++
src/malt/io/FastAReader.java | 307 +++++
src/malt/io/FastARecord.java | 139 +++
src/malt/io/FileWriterRanked.java | 266 +++++
src/malt/io/RMA6Writer.java | 217 ++++
src/malt/io/SAMHelper.java | 442 +++++++
src/malt/io/xml/DatasetType.java | 75 ++
src/malt/io/xml/GeneType.java | 209 ++++
src/malt/io/xml/GenesType.java | 82 ++
src/malt/io/xml/ObjectFactory.java | 169 +++
src/malt/io/xml/OrganismType.java | 237 ++++
src/malt/io/xml/OrganismsType.java | 84 ++
src/malt/io/xml/OutputSchema.xsd | 103 ++
src/malt/io/xml/ReadsType.java | 84 ++
src/malt/io/xml/RelativeAmount.java | 96 ++
src/malt/io/xml/ReportType.java | 98 ++
src/malt/io/xml/Taxonomy.java | 95 ++
src/malt/mapping/Mapping.java | 104 ++
src/malt/mapping/MappingManager.java | 115 ++
src/malt/sequence/Alphabet.java | 189 +++
src/malt/sequence/DNA5Alphabet.java | 67 ++
src/malt/sequence/FastAFileIteratorCode.java | 275 +++++
src/malt/sequence/ISeedExtractor.java | 64 +
src/malt/sequence/ProteinAlphabet.java | 46 +
src/malt/sequence/ProteinSequenceEncoder.java | 160 +++
src/malt/sequence/ReducedAlphabet.java | 228 ++++
src/malt/sequence/SeedShape2.java | 138 +++
src/malt/sequence/SequenceEncoder.java | 414 +++++++
src/malt/sequence/SequenceStore.java | 236 ++++
src/malt/util/FixedSizePriorityQueue.java | 104 ++
src/malt/util/MurmurHash3.java | 100 ++
src/malt/util/ProfileUtilities.java | 43 +
src/malt/util/ReusableByteBuffer.java | 128 ++
src/malt/util/TaxonomyUtilities.java | 151 +++
src/malt/util/Tester.java | 119 ++
src/malt/util/Utilities.java | 424 +++++++
tex/manual/Makefile | 10 +
tex/manual/definitions.tex | 189 +++
tex/manual/manual.bbl | 91 ++
tex/manual/manual.pdf | Bin 0 -> 202539 bytes
tex/manual/manual.tex | 640 ++++++++++
tex/manual/versioninfo.tex | 1 +
tex/manual/versioninfo_new.tex | 1 +
tex/manual/versioninfo_old.tex | 1 +
106 files changed, 20752 insertions(+)
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ce93214
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,30 @@
+# Class files
+class/
+*.class
+
+# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
+hs_err_pid*
+
+# intellij
+*.iml
+.idea
+
+# MacOS
+.DS_Store
+
+# LaTeX auxiliary files
+*.aux
+*.blg
+*.idx
+*.ilg
+*.ind
+*.log
+*.out
+*.toc
+
+# antbuild:
+antbuild/*.jar
+antbuild/*rename*
+antbuild/*shrink*
+antbuild/src/
+antbuild/classes/
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..ef5ee91
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,539 @@
+GNU GENERAL PUBLIC LICENSE
+
+Version 3, 29 June 2007
+
+Copyright © 2007 Free Software Foundation, Inc. <http://fsf.org/>
+
+Everyone is permitted to copy and distribute verbatim copies of this license
+document, but changing it is not allowed.
+
+Preamble
+
+The GNU General Public License is a free, copyleft license for software and
+other kinds of works.
+
+The licenses for most software and other practical works are designed to take
+away your freedom to share and change the works. By contrast, the GNU General
+Public License is intended to guarantee your freedom to share and change all
+versions of a program--to make sure it remains free software for all its users.
+We, the Free Software Foundation, use the GNU General Public License for most
+of our software; it applies also to any other work released this way by its
+authors. You can apply it to your programs, too.
+
+When we speak of free software, we are referring to freedom, not price. Our
+General Public Licenses are designed to make sure that you have the freedom to
+distribute copies of free software (and charge for them if you wish), that you
+receive source code or can get it if you want it, that you can change the
+software or use pieces of it in new free programs, and that you know you can do
+these things.
+
+To protect your rights, we need to prevent others from denying you these rights
+or asking you to surrender the rights. Therefore, you have certain
+responsibilities if you distribute copies of the software, or if you modify it:
+responsibilities to respect the freedom of others.
+
+For example, if you distribute copies of such a program, whether gratis or for
+a fee, you must pass on to the recipients the same freedoms that you received.
+You must make sure that they, too, receive or can get the source code. And you
+must show them these terms so they know their rights.
+
+Developers that use the GNU GPL protect your rights with two steps: (1) assert
+copyright on the software, and (2) offer you this License giving you legal
+permission to copy, distribute and/or modify it.
+
+For the developers' and authors' protection, the GPL clearly explains that
+there is no warranty for this free software. For both users' and authors' sake,
+the GPL requires that modified versions be marked as changed, so that their
+problems will not be attributed erroneously to authors of previous versions.
+
+Some devices are designed to deny users access to install or run modified
+versions of the software inside them, although the manufacturer can do so. This
+is fundamentally incompatible with the aim of protecting users' freedom to
+change the software. The systematic pattern of such abuse occurs in the area of
+products for individuals to use, which is precisely where it is most
+unacceptable. Therefore, we have designed this version of the GPL to prohibit
+the practice for those products. If such problems arise substantially in other
+domains, we stand ready to extend this provision to those domains in future
+versions of the GPL, as needed to protect the freedom of users.
+
+Finally, every program is threatened constantly by software patents. States
+should not allow patents to restrict development and use of software on
+general-purpose computers, but in those that do, we wish to avoid the special
+danger that patents applied to a free program could make it effectively
+proprietary. To prevent this, the GPL assures that patents cannot be used to
+render the program non-free.
+
+The precise terms and conditions for copying, distribution and modification
+follow.
+
+TERMS AND CONDITIONS
+
+0. Definitions.
+“This License” refers to version 3 of the GNU General Public License.
+
+“Copyright” also means copyright-like laws that apply to other kinds of works,
+such as semiconductor masks.
+
+“The Program” refers to any copyrightable work licensed under this License.
+Each licensee is addressed as “you”. “Licensees” and “recipients” may be
+individuals or organizations.
+
+To “modify” a work means to copy from or adapt all or part of the work in a
+fashion requiring copyright permission, other than the making of an exact copy.
+The resulting work is called a “modified version” of the earlier work or a work
+“based on” the earlier work.
+
+A “covered work” means either the unmodified Program or a work based on the
+Program.
+
+To “propagate” a work means to do anything with it that, without permission,
+would make you directly or secondarily liable for infringement under applicable
+copyright law, except executing it on a computer or modifying a private copy.
+Propagation includes copying, distribution (with or without modification),
+making available to the public, and in some countries other activities as well.
+
+To “convey” a work means any kind of propagation that enables other parties to
+make or receive copies. Mere interaction with a user through a computer
+network, with no transfer of a copy, is not conveying.
+
+An interactive user interface displays “Appropriate Legal Notices” to the
+extent that it includes a convenient and prominently visible feature that (1)
+displays an appropriate copyright notice, and (2) tells the user that there is
+no warranty for the work (except to the extent that warranties are provided),
+that licensees may convey the work under this License, and how to view a copy
+of this License. If the interface presents a list of user commands or options,
+such as a menu, a prominent item in the list meets this criterion.
+
+1. Source Code.
+The “source code” for a work means the preferred form of the work for making
+modifications to it. “Object code” means any non-source form of a work.
+
+A “Standard Interface” means an interface that either is an official standard
+defined by a recognized standards body, or, in the case of interfaces specified
+for a particular programming language, one that is widely used among developers
+working in that language.
+
+The “System Libraries” of an executable work include anything, other than the
+work as a whole, that (a) is included in the normal form of packaging a Major
+Component, but which is not part of that Major Component, and (b) serves only
+to enable use of the work with that Major Component, or to implement a Standard
+Interface for which an implementation is available to the public in source code
+form. A “Major Component”, in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system (if any) on
+which the executable work runs, or a compiler used to produce the work, or an
+object code interpreter used to run it.
+
+The “Corresponding Source” for a work in object code form means all the source
+code needed to generate, install, and (for an executable work) run the object
+code and to modify the work, including scripts to control those activities.
+However, it does not include the work's System Libraries, or general-purpose
+tools or generally available free programs which are used unmodified in
+performing those activities but which are not part of the work. For example,
+Corresponding Source includes interface definition files associated with source
+files for the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require, such as
+by intimate data communication or control flow between those subprograms and
+other parts of the work.
+
+The Corresponding Source need not include anything that users can regenerate
+automatically from other parts of the Corresponding Source.
+
+The Corresponding Source for a work in source code form is that same work.
+
+2. Basic Permissions.
+All rights granted under this License are granted for the term of copyright on
+the Program, and are irrevocable provided the stated conditions are met. This
+License explicitly affirms your unlimited permission to run the unmodified
+Program. The output from running a covered work is covered by this License only
+if the output, given its content, constitutes a covered work. This License
+acknowledges your rights of fair use or other equivalent, as provided by
+copyright law.
+
+You may make, run and propagate covered works that you do not convey, without
+conditions so long as your license otherwise remains in force. You may convey
+covered works to others for the sole purpose of having them make modifications
+exclusively for you, or provide you with facilities for running those works,
+provided that you comply with the terms of this License in conveying all
+material for which you do not control copyright. Those thus making or running
+the covered works for you must do so exclusively on your behalf, under your
+direction and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+Conveying under any other circumstances is permitted solely under the
+conditions stated below. Sublicensing is not allowed; section 10 makes it
+unnecessary.
+
+3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+No covered work shall be deemed part of an effective technological measure
+under any applicable law fulfilling obligations under article 11 of the WIPO
+copyright treaty adopted on 20 December 1996, or similar laws prohibiting or
+restricting circumvention of such measures.
+
+When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention is
+effected by exercising rights under this License with respect to the covered
+work, and you disclaim any intention to limit operation or modification of the
+work as a means of enforcing, against the work's users, your or third parties'
+legal rights to forbid circumvention of technological measures.
+
+4. Conveying Verbatim Copies.
+You may convey verbatim copies of the Program's source code as you receive it,
+in any medium, provided that you conspicuously and appropriately publish on
+each copy an appropriate copyright notice; keep intact all notices stating that
+this License and any non-permissive terms added in accord with section 7 apply
+to the code; keep intact all notices of the absence of any warranty; and give
+all recipients a copy of this License along with the Program.
+
+You may charge any price or no price for each copy that you convey, and you may
+offer support or warranty protection for a fee.
+
+5. Conveying Modified Source Versions.
+You may convey a work based on the Program, or the modifications to produce it
+from the Program, in the form of source code under the terms of section 4,
+provided that you also meet all of these conditions:
+
+a) The work must carry prominent notices stating that you modified it, and
+giving a relevant date.
+b) The work must carry prominent notices stating that it is released under this
+License and any conditions added under section 7. This requirement modifies the
+requirement in section 4 to “keep intact all notices”.
+c) You must license the entire work, as a whole, under this License to anyone
+who comes into possession of a copy. This License will therefore apply, along
+with any applicable section 7 additional terms, to the whole of the work, and
+all its parts, regardless of how they are packaged. This License gives no
+permission to license the work in any other way, but it does not invalidate
+such permission if you have separately received it.
+d) If the work has interactive user interfaces, each must display Appropriate
+Legal Notices; however, if the Program has interactive interfaces that do not
+display Appropriate Legal Notices, your work need not make them do so.
+A compilation of a covered work with other separate and independent works,
+which are not by their nature extensions of the covered work, and which are not
+combined with it such as to form a larger program, in or on a volume of a
+storage or distribution medium, is called an “aggregate” if the compilation and
+its resulting copyright are not used to limit the access or legal rights of the
+compilation's users beyond what the individual works permit. Inclusion of a
+covered work in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+6. Conveying Non-Source Forms.
+You may convey a covered work in object code form under the terms of sections 4
+and 5, provided that you also convey the machine-readable Corresponding Source
+under the terms of this License, in one of these ways:
+
+a) Convey the object code in, or embodied in, a physical product (including a
+physical distribution medium), accompanied by the Corresponding Source fixed on
+a durable physical medium customarily used for software interchange.
+b) Convey the object code in, or embodied in, a physical product (including a
+physical distribution medium), accompanied by a written offer, valid for at
+least three years and valid for as long as you offer spare parts or customer
+support for that product model, to give anyone who possesses the object code
+either (1) a copy of the Corresponding Source for all the software in the
+product that is covered by this License, on a durable physical medium
+customarily used for software interchange, for a price no more than your
+reasonable cost of physically performing this conveying of source, or (2)
+access to copy the Corresponding Source from a network server at no charge.
+c) Convey individual copies of the object code with a copy of the written offer
+to provide the Corresponding Source. This alternative is allowed only
+occasionally and noncommercially, and only if you received the object code with
+such an offer, in accord with subsection 6b.
+d) Convey the object code by offering access from a designated place (gratis or
+for a charge), and offer equivalent access to the Corresponding Source in the
+same way through the same place at no further charge. You need not require
+recipients to copy the Corresponding Source along with the object code. If the
+place to copy the object code is a network server, the Corresponding Source may
+be on a different server (operated by you or a third party) that supports
+equivalent copying facilities, provided you maintain clear directions next to
+the object code saying where to find the Corresponding Source. Regardless of
+what server hosts the Corresponding Source, you remain obligated to ensure that
+it is available for as long as needed to satisfy these requirements.
+e) Convey the object code using peer-to-peer transmission, provided you inform
+other peers where the object code and Corresponding Source of the work are
+being offered to the general public at no charge under subsection 6d.
+A separable portion of the object code, whose source code is excluded from the
+Corresponding Source as a System Library, need not be included in conveying the
+object code work.
+
+A “User Product” is either (1) a “consumer product”, which means any tangible
+personal property which is normally used for personal, family, or household
+purposes, or (2) anything designed or sold for incorporation into a dwelling.
+In determining whether a product is a consumer product, doubtful cases shall be
+resolved in favor of coverage. For a particular product received by a
+particular user, “normally used” refers to a typical or common use of that
+class of product, regardless of the status of the particular user or of the way
+in which the particular user actually uses, or expects or is expected to use,
+the product. A product is a consumer product regardless of whether the product
+has substantial commercial, industrial or non-consumer uses, unless such uses
+represent the only significant mode of use of the product.
+
+“Installation Information” for a User Product means any methods, procedures,
+authorization keys, or other information required to install and execute
+modified versions of a covered work in that User Product from a modified
+version of its Corresponding Source. The information must suffice to ensure
+that the continued functioning of the modified object code is in no case
+prevented or interfered with solely because modification has been made.
+
+If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as part of a
+transaction in which the right of possession and use of the User Product is
+transferred to the recipient in perpetuity or for a fixed term (regardless of
+how the transaction is characterized), the Corresponding Source conveyed under
+this section must be accompanied by the Installation Information. But this
+requirement does not apply if neither you nor any third party retains the
+ability to install modified object code on the User Product (for example, the
+work has been installed in ROM).
+
+The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates for a
+work that has been modified or installed by the recipient, or for the User
+Product in which it has been modified or installed. Access to a network may be
+denied when the modification itself materially and adversely affects the
+operation of the network or violates the rules and protocols for communication
+across the network.
+
+Corresponding Source conveyed, and Installation Information provided, in accord
+with this section must be in a format that is publicly documented (and with an
+implementation available to the public in source code form), and must require
+no special password or key for unpacking, reading or copying.
+
+7. Additional Terms.
+“Additional permissions” are terms that supplement the terms of this License by
+making exceptions from one or more of its conditions. Additional permissions
+that are applicable to the entire Program shall be treated as though they were
+included in this License, to the extent that they are valid under applicable
+law. If additional permissions apply only to part of the Program, that part may
+be used separately under those permissions, but the entire Program remains
+governed by this License without regard to the additional permissions.
+
+When you convey a copy of a covered work, you may at your option remove any
+additional permissions from that copy, or from any part of it. (Additional
+permissions may be written to require their own removal in certain cases when
+you modify the work.) You may place additional permissions on material, added
+by you to a covered work, for which you have or can give appropriate copyright
+permission.
+
+Notwithstanding any other provision of this License, for material you add to a
+covered work, you may (if authorized by the copyright holders of that material)
+supplement the terms of this License with terms:
+
+a) Disclaiming warranty or limiting liability differently from the terms of
+sections 15 and 16 of this License; or
+b) Requiring preservation of specified reasonable legal notices or author
+attributions in that material or in the Appropriate Legal Notices displayed by
+works containing it; or
+c) Prohibiting misrepresentation of the origin of that material, or requiring
+that modified versions of such material be marked in reasonable ways as
+different from the original version; or
+d) Limiting the use for publicity purposes of names of licensors or authors of
+the material; or
+e) Declining to grant rights under trademark law for use of some trade names,
+trademarks, or service marks; or
+f) Requiring indemnification of licensors and authors of that material by
+anyone who conveys the material (or modified versions of it) with contractual
+assumptions of liability to the recipient, for any liability that these
+contractual assumptions directly impose on those licensors and authors.
+All other non-permissive additional terms are considered “further restrictions”
+within the meaning of section 10. If the Program as you received it, or any
+part of it, contains a notice stating that it is governed by this License along
+with a term that is a further restriction, you may remove that term. If a
+license document contains a further restriction but permits relicensing or
+conveying under this License, you may add to a covered work material governed
+by the terms of that license document, provided that the further restriction
+does not survive such relicensing or conveying.
+
+If you add terms to a covered work in accord with this section, you must place,
+in the relevant source files, a statement of the additional terms that apply to
+those files, or a notice indicating where to find the applicable terms.
+
+Additional terms, permissive or non-permissive, may be stated in the form of a
+separately written license, or stated as exceptions; the above requirements
+apply either way.
+
+8. Termination.
+You may not propagate or modify a covered work except as expressly provided
+under this License. Any attempt otherwise to propagate or modify it is void,
+and will automatically terminate your rights under this License (including any
+patent licenses granted under the third paragraph of section 11).
+
+However, if you cease all violation of this License, then your license from a
+particular copyright holder is reinstated (a) provisionally, unless and until
+the copyright holder explicitly and finally terminates your license, and (b)
+permanently, if the copyright holder fails to notify you of the violation by
+some reasonable means prior to 60 days after the cessation.
+
+Moreover, your license from a particular copyright holder is reinstated
+permanently if the copyright holder notifies you of the violation by some
+reasonable means, this is the first time you have received notice of violation
+of this License (for any work) from that copyright holder, and you cure the
+violation prior to 30 days after your receipt of the notice.
+
+Termination of your rights under this section does not terminate the licenses
+of parties who have received copies or rights from you under this License. If
+your rights have been terminated and not permanently reinstated, you do not
+qualify to receive new licenses for the same material under section 10.
+
+9. Acceptance Not Required for Having Copies.
+You are not required to accept this License in order to receive or run a copy
+of the Program. Ancillary propagation of a covered work occurring solely as a
+consequence of using peer-to-peer transmission to receive a copy likewise does
+not require acceptance. However, nothing other than this License grants you
+permission to propagate or modify any covered work. These actions infringe
+copyright if you do not accept this License. Therefore, by modifying or
+propagating a covered work, you indicate your acceptance of this License to do
+so.
+
+10. Automatic Licensing of Downstream Recipients.
+Each time you convey a covered work, the recipient automatically receives a
+license from the original licensors, to run, modify and propagate that work,
+subject to this License. You are not responsible for enforcing compliance by
+third parties with this License.
+
+An “entity transaction” is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered work
+results from an entity transaction, each party to that transaction who receives
+a copy of the work also receives whatever licenses to the work the party's
+predecessor in interest had or could give under the previous paragraph, plus a
+right to possession of the Corresponding Source of the work from the
+predecessor in interest, if the predecessor has it or can get it with
+reasonable efforts.
+
+You may not impose any further restrictions on the exercise of the rights
+granted or affirmed under this License. For example, you may not impose a
+license fee, royalty, or other charge for exercise of rights granted under this
+License, and you may not initiate litigation (including a cross-claim or
+counterclaim in a lawsuit) alleging that any patent claim is infringed by
+making, using, selling, offering for sale, or importing the Program or any
+portion of it.
+
+11. Patents.
+A “contributor” is a copyright holder who authorizes use under this License of
+the Program or a work on which the Program is based. The work thus licensed is
+called the contributor's “contributor version”.
+
+A contributor's “essential patent claims” are all patent claims owned or
+controlled by the contributor, whether already acquired or hereafter acquired,
+that would be infringed by some manner, permitted by this License, of making,
+using, or selling its contributor version, but do not include claims that would
+be infringed only as a consequence of further modification of the contributor
+version. For purposes of this definition, “control” includes the right to grant
+patent sublicenses in a manner consistent with the requirements of this License.
+
+Each contributor grants you a non-exclusive, worldwide, royalty-free patent
+license under the contributor's essential patent claims, to make, use, sell,
+offer for sale, import and otherwise run, modify and propagate the contents of
+its contributor version.
+
+In the following three paragraphs, a “patent license” is any express agreement
+or commitment, however denominated, not to enforce a patent (such as an express
+permission to practice a patent or covenant not to sue for patent
+infringement). To “grant” such a patent license to a party means to make such
+an agreement or commitment not to enforce a patent against the party.
+
+If you convey a covered work, knowingly relying on a patent license, and the
+Corresponding Source of the work is not available for anyone to copy, free of
+charge and under the terms of this License, through a publicly available
+network server or other readily accessible means, then you must either (1)
+cause the Corresponding Source to be so available, or (2) arrange to deprive
+yourself of the benefit of the patent license for this particular work, or (3)
+arrange, in a manner consistent with the requirements of this License, to
+extend the patent license to downstream recipients. “Knowingly relying” means
+you have actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work in a
+country, would infringe one or more identifiable patents in that country that
+you have reason to believe are valid.
+
+If, pursuant to or in connection with a single transaction or arrangement, you
+convey, or propagate by procuring conveyance of, a covered work, and grant a
+patent license to some of the parties receiving the covered work authorizing
+them to use, propagate, modify or convey a specific copy of the covered work,
+then the patent license you grant is automatically extended to all recipients
+of the covered work and works based on it.
+
+A patent license is “discriminatory” if it does not include within the scope of
+its coverage, prohibits the exercise of, or is conditioned on the non-exercise
+of one or more of the rights that are specifically granted under this License.
+You may not convey a covered work if you are a party to an arrangement with a
+third party that is in the business of distributing software, under which you
+make payment to the third party based on the extent of your activity of
+conveying the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory patent
+license (a) in connection with copies of the covered work conveyed by you (or
+copies made from those copies), or (b) primarily for and in connection with
+specific products or compilations that contain the covered work, unless you
+entered into that arrangement, or that patent license was granted, prior to 28
+March 2007.
+
+Nothing in this License shall be construed as excluding or limiting any implied
+license or other defenses to infringement that may otherwise be available to
+you under applicable patent law.
+
+12. No Surrender of Others' Freedom.
+If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not excuse
+you from the conditions of this License. If you cannot convey a covered work so
+as to satisfy simultaneously your obligations under this License and any other
+pertinent obligations, then as a consequence you may not convey it at all. For
+example, if you agree to terms that obligate you to collect a royalty for
+further conveying from those to whom you convey the Program, the only way you
+could satisfy both those terms and this License would be to refrain entirely
+from conveying the Program.
+
+13. Use with the GNU Affero General Public License.
+Notwithstanding any other provision of this License, you have permission to
+link or combine any covered work with a work licensed under version 3 of the
+GNU Affero General Public License into a single combined work, and to convey
+the resulting work. The terms of this License will continue to apply to the
+part which is the covered work, but the special requirements of the GNU Affero
+General Public License, section 13, concerning interaction through a network
+will apply to the combination as such.
+
+14. Revised Versions of this License.
+The Free Software Foundation may publish revised and/or new versions of the GNU
+General Public License from time to time. Such new versions will be similar in
+spirit to the present version, but may differ in detail to address new problems
+or concerns.
+
+Each version is given a distinguishing version number. If the Program specifies
+that a certain numbered version of the GNU General Public License “or any later
+version” applies to it, you have the option of following the terms and
+conditions either of that numbered version or of any later version published by
+the Free Software Foundation. If the Program does not specify a version number
+of the GNU General Public License, you may choose any version ever published by
+the Free Software Foundation.
+
+If the Program specifies that a proxy can decide which future versions of the
+GNU General Public License can be used, that proxy's public statement of
+acceptance of a version permanently authorizes you to choose that version for
+the Program.
+
+Later license versions may give you additional or different permissions.
+However, no additional obligations are imposed on any author or copyright
+holder as a result of your choosing to follow a later version.
+
+15. Disclaimer of Warranty.
+THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE
+LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER
+PARTIES PROVIDE THE PROGRAM “AS IS” WITHOUT WARRANTY OF ANY KIND, EITHER
+EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE
+QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
+DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+16. Limitation of Liability.
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY
+COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS
+PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL,
+INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE
+THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED
+INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE
+PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY
+HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+17. Interpretation of Sections 15 and 16.
+If the disclaimer of warranty and limitation of liability provided above cannot
+be given local legal effect according to their terms, reviewing courts shall
+apply local law that most closely approximates an absolute waiver of all civil
+liability in connection with the Program, unless a warranty or assumption of
+liability accompanies a copy of the Program in return for a fee.
+
+END OF TERMS AND CONDITIONS
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..1cc4226
--- /dev/null
+++ b/README.md
@@ -0,0 +1,3 @@
+# malt
+
+MALT - MEGAN alignment tool
diff --git a/antbuild/build.xml b/antbuild/build.xml
new file mode 100644
index 0000000..f8d9724
--- /dev/null
+++ b/antbuild/build.xml
@@ -0,0 +1,95 @@
+<!-- file build.xml in your project root directory -->
+
+<project name="project" default="jar" basedir=".">
+
+ <!-- edit the following lines to your needs -->
+ <property name="project_name" value="MALT"/>
+ <property name="jlodaSrcDir" value="../../jloda/src"/>
+ <property name="meganSrcDir" value="../../megan-ce/src"/>
+ <property name="maltSrcDir" value="../../malt/src"/>
+ <property name="srcDir" value="src"/>
+ <property name="classDir" value="classes"/>
+ <property name="jar" value="${project_name}.jar"/>
+ <property name="mainclass" value="malt.MaltBuild"/>
+
+ <!-- class path -->
+
+ <path id="build.classpath">
+ <fileset dir="../../jloda/jars" includes="*.jar"/>
+ <fileset dir="../../jloda/jars/batik-1.8" includes="*.jar"/>
+ <fileset dir="../../megan-ce/jars" includes="*.jar"/>
+ <fileset dir="../../megan-ce/jars/megan6server" includes="*.jar"/>
+ <fileset dir="../../malt/jars/" includes="*.jar"/>
+ </path>
+
+ <!-- init -->
+ <target name="init">
+ <mkdir dir="${srcDir}"/>
+ <mkdir dir="${classDir}"/>
+ <mkdir dir="${classDir}/resources"/>
+ </target>
+
+ <!-- copy resources -->
+ <target name="copy_resources" depends="init">
+ <copy todir="${classDir}/resources">
+ <fileset dir="../resources" excludes=".svn/**"/>
+ </copy>
+ </target>
+
+ <!-- copy sources -->
+ <target name="copy_sources" depends="copy_resources">
+ <copy todir="${srcDir}">
+ <fileset dir="${jlodaSrcDir}"
+ excludes=".svn/** jloda/matrix/** jloda/models/** test/** bioinf1/** gbi/** gitter/**"/>
+ <fileset dir="${meganSrcDir}" excludes=".svn/** test/** malt/**"/>
+ <fileset dir="${maltSrcDir}" excludes=".svn/** test/**"/>
+ </copy>
+ </target>
+
+ <!-- compile MALT -->
+ <target name="compile" depends="copy_sources">
+ <javac srcdir="${srcDir}"
+ destdir="${classDir}"
+ debug="on"
+ compiler="javac1.7"
+ classpathref="build.classpath"
+ source="1.7"
+ target="1.7"
+ includeantruntime="false"
+ />
+ </target>
+
+ <!-- create .jar -->
+ <target name="jar" depends="compile">
+ <jar jarfile="${jar}"
+ basedir="${classDir}"
+ includes="jloda/** megan/** log4j.properties malt/** rusch/**">
+ </jar>
+ </target>
+
+ <!-- run project -->
+ <target name="run" depends="jar">
+ <java classname="${mainclass}" fork="true"
+ classpathref="build.classpath">
+ <classpath>
+ <pathelement location="${jar}"/>
+ </classpath>
+ <arg value="--help"/>
+ <jvmarg value="-Dapple.laf.useScreenMenuBar=true"/>
+ <jvmarg value="-server"/>
+ <jvmarg value="-d64"/>
+ <jvmarg value="-Xmx2000M"/>
+ <jvmarg value="-Duser.language=en"/>
+ <jvmarg value="-Duser.region=US"/>
+ </java>
+ </target>
+
+ <!-- removes all that has been built -->
+ <target name="clean">
+ <delete dir="${classDir}" includeEmptyDirs="true"/>
+ <delete dir="src" includeEmptyDirs="true"/>
+ <delete file="MALT.jar"/>
+ </target>
+</project>
+
+ <!-- end file build.xml -->
diff --git a/installer/License.txt b/installer/License.txt
new file mode 100644
index 0000000..cd81f34
--- /dev/null
+++ b/installer/License.txt
@@ -0,0 +1,18 @@
+MALT - MEGAN ALignment Tool
+
+Copyright (c) 2016, Daniel H. Huson
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+For more info on this program, see <http://www-ab3.informatik.uni-tuebingen.de/software/malt/>.
diff --git a/installer/malt.install4j b/installer/malt.install4j
new file mode 100644
index 0000000..8835a82
--- /dev/null
+++ b/installer/malt.install4j
@@ -0,0 +1,1496 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<install4j version="6.1.1" transformSequenceNumber="5">
+ <directoryPresets config="../../megan6/jars/data.jar" />
+ <application name="MALT" distributionSourceDir="" applicationId="3229-5251-7410-5330" mediaDir="../../../builds" mediaFilePattern="${compiler:sys.shortName}_${compiler:sys.platform}_${compiler:sys.version}" compression="6" lzmaCompression="false" pack200Compression="false" excludeSignedFromPacking="true" commonExternalFiles="false" createMd5Sums="true" shrinkRuntime="true" shortName="MALT" publisher="Daniel Huson's Lab, University of Tuebingen" publisherWeb="www-ab.informatik.uni-tuebi [...]
+ <languages skipLanguageSelection="false" languageSelectionInPrincipalLanguage="false">
+ <principalLanguage id="en" customLocalizationFile="" />
+ <additionalLanguages />
+ </languages>
+ <searchSequence>
+ <registry />
+ <envVar name="JAVA_HOME" />
+ <envVar name="JDK_HOME" />
+ </searchSequence>
+ <variables>
+ <variable name="variable" value="" description="" category="" />
+ </variables>
+ <mergedProjects />
+ <codeSigning macEnabled="true" macPkcs12File="../../../../etc/Certificates.p12" windowsEnabled="false" windowsKeySource="pvkAndSpc" windowsPvkFile="" windowsSpcFile="" windowsPkcs12File="" />
+ </application>
+ <files keepModificationTimes="false" missingFilesStrategy="warn" globalExcludeSuffixes=".svn,.CVS,*.psd,*.java" defaultOverwriteMode="4" defaultUninstallMode="0" launcherOverwriteMode="3" defaultFileMode="644" defaultDirMode="755">
+ <filesets />
+ <roots />
+ <mountPoints>
+ <mountPoint id="1691242408" root="" location="class" mode="755" />
+ <mountPoint id="1691242409" root="" location="class/resources" mode="755" />
+ <mountPoint id="1691243191" root="" location="class/resources/icons" mode="755" />
+ <mountPoint id="1691242151" root="" location="jars" mode="755" />
+ <mountPoint id="1691242160" root="" location="" mode="755" />
+ </mountPoints>
+ <entries>
+ <fileEntry mountPoint="1691243191" file="../resources/icons/malt-build48.png" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="false" overrideOverwriteMode="false" overrideUninstallMode="false" />
+ <fileEntry mountPoint="1691243191" file="../resources/icons/malt-run48.png" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="false" overrideOverwriteMode="false" overrideUninstallMode="false" />
+ <fileEntry mountPoint="1691242408" file="../resources/log4j.properties" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="true" overrideOverwriteMode="true" overrideUninstallMode="true" />
+ <fileEntry mountPoint="1691242151" file="../antbuild/MALT.jar" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="false" overrideOverwriteMode="false" overrideUninstallMode="false" />
+ <dirEntry mountPoint="1691242151" file="../jars" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="false" overrideOverwriteMode="false" overrideUninstallMode="false" entryMode="direct" subDirectory="jars" excludeSuffixes="" dirMode="755" overrideDirMode="false">
+ <exclude />
+ </dirEntry>
+ <fileEntry mountPoint="1691242151" file="../../megan-ce/jars/data.jar" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="false" overrideOverwriteMode="false" overrideUninstallMode="false" />
+ <fileEntry mountPoint="1691242160" file="./License.txt" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="true" overrideOverwriteMode="true" overrideUninstallMode="true" />
+ <fileEntry mountPoint="1691242160" file="../tex/manual/manual.pdf" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="true" overrideOverwriteMode="true" overrideUninstallMode="true" />
+ </entries>
+ <components>
+ <component name="Manual" id="1691242193" customizedId="" displayDescription="false" hideHelpButton="false" selected="true" changeable="true" downloadable="false" hidden="false">
+ <description />
+ <include all="false">
+ <entry location="License.txt" fileType="regular" />
+ <entry location="manual.pdf" fileType="regular" />
+ </include>
+ <dependencies />
+ </component>
+ <component name="malt-build" id="1691243097" customizedId="" displayDescription="false" hideHelpButton="false" selected="true" changeable="true" downloadable="false" hidden="false">
+ <description />
+ <include all="false">
+ <entry location="class" fileType="regular" />
+ <entry location="jars" fileType="regular" />
+ <entry location="License.txt" fileType="regular" />
+ <entry location="malt-build-gui" fileType="launcher" />
+ <entry location="malt-build" fileType="launcher" />
+ </include>
+ <dependencies />
+ </component>
+ <component name="malt-run" id="1691243098" customizedId="" displayDescription="false" hideHelpButton="false" selected="true" changeable="true" downloadable="false" hidden="false">
+ <description />
+ <include all="false">
+ <entry location="class" fileType="regular" />
+ <entry location="jars" fileType="regular" />
+ <entry location="License.txt" fileType="regular" />
+ <entry location="malt-run-gui" fileType="launcher" />
+ <entry location="malt-run" fileType="launcher" />
+ </include>
+ <dependencies />
+ </component>
+ </components>
+ </files>
+ <launchers>
+ <launcher name="malt-build-gui" id="1691242235" customizedId="" external="false" excludeFromMenu="false" unixMode="755" menuName="" icnsFile="../resources/icons/malt-build.icns" customMacBundleIdentifier="false" macBundleIdentifier="" swtApp="false" fileset="" macBundleBinary="JavaApplicationStub" addMacEntitlements="false" macEntitlementsFile="" useCustomMacosExecutableName="false" customMacosExecutableName="">
+ <executable name="malt-build-gui" type="1" iconSet="true" iconFile="" executableDir="" redirectStderr="true" stderrFile="malt-build.log" stderrMode="overwrite" redirectStdout="true" stdoutFile="malt-build.log" stdoutMode="overwrite" failOnStderrOutput="true" executableMode="1" changeWorkingDirectory="false" workingDirectory="." singleInstance="false" serviceStartType="2" serviceDependencies="" serviceDescription="" jreLocation="" executionLevel="asInvoker" checkConsoleParameter="fa [...]
+ <versionInfo include="false" fileVersion="" fileDescription="" legalCopyright="" internalName="" productName="" />
+ </executable>
+ <splashScreen show="false" width="450" height="300" bitmapFile="" windowsNative="false" textOverlay="false">
+ <text>
+ <statusLine x="383" y="15" text="" fontSize="8" fontColor="0,0,0" bold="false" />
+ <versionLine x="17" y="10" text="MALT ${compiler:sys.version}" fontSize="12" fontColor="0,0,0" bold="false" />
+ </text>
+ </splashScreen>
+ <java mainClass="malt.MaltBuild" vmParameters="-server -Dapple.laf.useScreenMenuBar=true -Duser.language=en -Duser.region=US -XX:NewRatio=2" arguments="--argsGui" allowVMPassthroughParameters="true" preferredVM="server" bundleRuntime="true">
+ <classPath>
+ <directory location="class" failOnError="false" />
+ <scanDirectory location="jars" failOnError="false" />
+ </classPath>
+ <nativeLibraryDirectories />
+ <vmOptions />
+ </java>
+ <includedFiles />
+ <unextractableFiles />
+ <vmOptionsFile mode="template" overwriteMode="1" fileMode="644">
+ <content />
+ </vmOptionsFile>
+ <customScript mode="1" file="">
+ <content />
+ </customScript>
+ <infoPlist mode="1" file="">
+ <content />
+ </infoPlist>
+ <iconImageFiles>
+ <file path="../resources/icons/malt-build16.png" />
+ <file path="../resources/icons/malt-build32.png" />
+ <file path="../resources/icons/malt-build48.png" />
+ </iconImageFiles>
+ </launcher>
+ <launcher name="malt-run-gui" id="1691243093" customizedId="" external="false" excludeFromMenu="false" unixMode="755" menuName="" icnsFile="../resources/icons/malt-run.icns" customMacBundleIdentifier="false" macBundleIdentifier="" swtApp="false" fileset="" macBundleBinary="JavaApplicationStub" addMacEntitlements="false" macEntitlementsFile="" useCustomMacosExecutableName="false" customMacosExecutableName="">
+ <executable name="malt-run-gui" type="1" iconSet="true" iconFile="" executableDir="" redirectStderr="true" stderrFile="malt-run.log" stderrMode="overwrite" redirectStdout="true" stdoutFile="malt-run.log" stdoutMode="overwrite" failOnStderrOutput="true" executableMode="1" changeWorkingDirectory="false" workingDirectory="." singleInstance="false" serviceStartType="2" serviceDependencies="" serviceDescription="" jreLocation="" executionLevel="asInvoker" checkConsoleParameter="false" g [...]
+ <versionInfo include="false" fileVersion="" fileDescription="" legalCopyright="" internalName="" productName="" />
+ </executable>
+ <splashScreen show="false" width="0" height="0" bitmapFile="" windowsNative="false" textOverlay="false">
+ <text>
+ <statusLine x="20" y="20" text="" fontSize="8" fontColor="0,0,0" bold="false" />
+ <versionLine x="20" y="40" text="version ${compiler:sys.version}" fontSize="8" fontColor="0,0,0" bold="false" />
+ </text>
+ </splashScreen>
+ <java mainClass="malt.MaltRun" vmParameters="-server -Dapple.laf.useScreenMenuBar=true -Duser.language=en -Duser.region=US -XX:NewRatio=2" arguments="--argsGui" allowVMPassthroughParameters="true" preferredVM="" bundleRuntime="true">
+ <classPath>
+ <directory location="class" failOnError="false" />
+ <scanDirectory location="jars" failOnError="false" />
+ </classPath>
+ <nativeLibraryDirectories />
+ <vmOptions />
+ </java>
+ <includedFiles />
+ <unextractableFiles />
+ <vmOptionsFile mode="template" overwriteMode="0" fileMode="644">
+ <content />
+ </vmOptionsFile>
+ <customScript mode="1" file="">
+ <content />
+ </customScript>
+ <infoPlist mode="1" file="">
+ <content />
+ </infoPlist>
+ <iconImageFiles>
+ <file path="../resources/icons/malt-run16.png" />
+ <file path="../resources/icons/malt-run32.png" />
+ <file path="../resources/icons/malt-run48.png" />
+ </iconImageFiles>
+ </launcher>
+ <launcher name="malt-build" id="1691243204" customizedId="" external="false" excludeFromMenu="false" unixMode="755" menuName="" icnsFile="../resources/icons/malt-build.icns" customMacBundleIdentifier="false" macBundleIdentifier="" swtApp="false" fileset="" macBundleBinary="JavaApplicationStub" addMacEntitlements="false" macEntitlementsFile="" useCustomMacosExecutableName="false" customMacosExecutableName="">
+ <executable name="malt-build" type="1" iconSet="true" iconFile="" executableDir="" redirectStderr="false" stderrFile="error.log" stderrMode="overwrite" redirectStdout="false" stdoutFile="output.log" stdoutMode="overwrite" failOnStderrOutput="true" executableMode="2" changeWorkingDirectory="false" workingDirectory="." singleInstance="false" serviceStartType="2" serviceDependencies="" serviceDescription="" jreLocation="" executionLevel="asInvoker" checkConsoleParameter="false" global [...]
+ <versionInfo include="false" fileVersion="" fileDescription="" legalCopyright="" internalName="" productName="" />
+ </executable>
+ <splashScreen show="false" width="450" height="300" bitmapFile="" windowsNative="false" textOverlay="false">
+ <text>
+ <statusLine x="383" y="15" text="" fontSize="8" fontColor="0,0,0" bold="false" />
+ <versionLine x="17" y="10" text="MALT ${compiler:sys.version}" fontSize="12" fontColor="0,0,0" bold="false" />
+ </text>
+ </splashScreen>
+ <java mainClass="malt.MaltBuild" vmParameters="-server -Dapple.laf.useScreenMenuBar=true -Duser.language=en -Duser.region=US" arguments="" allowVMPassthroughParameters="true" preferredVM="server" bundleRuntime="true">
+ <classPath>
+ <directory location="class" failOnError="false" />
+ <scanDirectory location="jars" failOnError="false" />
+ </classPath>
+ <nativeLibraryDirectories />
+ <vmOptions />
+ </java>
+ <includedFiles />
+ <unextractableFiles />
+ <vmOptionsFile mode="template" overwriteMode="1" fileMode="644">
+ <content />
+ </vmOptionsFile>
+ <customScript mode="1" file="">
+ <content />
+ </customScript>
+ <infoPlist mode="1" file="">
+ <content />
+ </infoPlist>
+ <iconImageFiles>
+ <file path="../resources/icons/malt-build16.png" />
+ <file path="../resources/icons/malt-build32.png" />
+ <file path="../resources/icons/malt-build48.png" />
+ </iconImageFiles>
+ </launcher>
+ <launcher name="malt-run" id="1691243207" customizedId="" external="false" excludeFromMenu="false" unixMode="755" menuName="" icnsFile="../resources/icons/malt-run.icns" customMacBundleIdentifier="false" macBundleIdentifier="" swtApp="false" fileset="" macBundleBinary="JavaApplicationStub" addMacEntitlements="false" macEntitlementsFile="" useCustomMacosExecutableName="false" customMacosExecutableName="">
+ <executable name="malt-run" type="1" iconSet="true" iconFile="" executableDir="" redirectStderr="false" stderrFile="error.log" stderrMode="overwrite" redirectStdout="false" stdoutFile="output.log" stdoutMode="overwrite" failOnStderrOutput="true" executableMode="2" changeWorkingDirectory="false" workingDirectory="." singleInstance="false" serviceStartType="2" serviceDependencies="" serviceDescription="" jreLocation="" executionLevel="asInvoker" checkConsoleParameter="false" globalSi [...]
+ <versionInfo include="false" fileVersion="" fileDescription="" legalCopyright="" internalName="" productName="" />
+ </executable>
+ <splashScreen show="false" width="0" height="0" bitmapFile="" windowsNative="false" textOverlay="false">
+ <text>
+ <statusLine x="20" y="20" text="" fontSize="8" fontColor="0,0,0" bold="false" />
+ <versionLine x="20" y="40" text="version ${compiler:sys.version}" fontSize="8" fontColor="0,0,0" bold="false" />
+ </text>
+ </splashScreen>
+ <java mainClass="malt.MaltRun" vmParameters="-server -Dapple.laf.useScreenMenuBar=true -Duser.language=en -Duser.region=US" arguments="" allowVMPassthroughParameters="true" preferredVM="" bundleRuntime="true">
+ <classPath>
+ <directory location="class" failOnError="false" />
+ <scanDirectory location="jars" failOnError="false" />
+ </classPath>
+ <nativeLibraryDirectories />
+ <vmOptions />
+ </java>
+ <includedFiles />
+ <unextractableFiles />
+ <vmOptionsFile mode="template" overwriteMode="0" fileMode="644">
+ <content />
+ </vmOptionsFile>
+ <customScript mode="1" file="">
+ <content />
+ </customScript>
+ <infoPlist mode="1" file="">
+ <content />
+ </infoPlist>
+ <iconImageFiles>
+ <file path="../resources/icons/malt-run16.png" />
+ <file path="../resources/icons/malt-run32.png" />
+ <file path="../resources/icons/malt-run48.png" />
+ </iconImageFiles>
+ </launcher>
+ </launchers>
+ <installerGui installerType="1" addOnAppId="" suggestPreviousLocations="true" autoUpdateDescriptorUrl="http://www-ab.informatik.uni-tuebingen.de/data/software/malt/download/updates.xml" useAutoUpdateBaseUrl="false" autoUpdateBaseUrl="">
+ <staticMembers script="" />
+ <customCode />
+ <autoUpdate useMinUpdatableVersion="true" minUpdatableVersion="0" useMaxUpdatableVersion="false" maxUpdatableVersion="">
+ <commentFiles />
+ <customAttributes />
+ </autoUpdate>
+ <applications>
+ <application name="" id="installer" customizedId="" beanClass="com.install4j.runtime.beans.applications.InstallerApplication" enabled="true" commentSet="false" comment="" actionElevationType="none" fileset="" customIcnsFile="" customIcoFile="" macEntitlementsFile="" automaticLauncherIntegration="false" launchMode="startupFirstWindow" launchInNewProcess="false" launchSchedule="updateSchedule" allLaunchers="true">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.applications.InstallerApplication">
+ <void property="customWatermarkText">
+ <string>Daniel Huson's lab, University of Tübingen</string>
+ </void>
+ <void property="frameSizeClientArea">
+ <boolean>false</boolean>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <launcherIds />
+ <variables />
+ <startup>
+ <screen name="" id="15" customizedId="" beanClass="com.install4j.runtime.beans.screens.StartupScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.StartupScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions>
+ <action name="" id="1691242625" customizedId="" beanClass="com.install4j.runtime.beans.actions.misc.RequestPrivilegesAction" enabled="true" commentSet="false" comment="" actionElevationType="none" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.misc.RequestPrivilegesAction" />
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ </actions>
+ <formComponents />
+ </screen>
+ </startup>
+ <screens>
+ <screen name="" id="1691242173" customizedId="" beanClass="com.install4j.runtime.beans.screens.WelcomeScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.WelcomeScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions>
+ <action name="" id="1691242626" customizedId="" beanClass="com.install4j.runtime.beans.actions.misc.LoadResponseFileAction" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" multiExec="true" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.misc.LoadResponseFileAction" />
+ </java>
+ </serializedBean>
+ <condition>context.getBooleanVariable("sys.confirmedUpdateInstallation")</condition>
+ </action>
+ </actions>
+ <formComponents />
+ </screen>
+ <screen name="" id="1691242174" customizedId="" beanClass="com.install4j.runtime.beans.screens.LicenseScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.LicenseScreen" id="LicenseScreen0">
+ <void id="LocalizedExternalFile0" property="displayedTextFile">
+ <void property="languageIdToExternalFile">
+ <void method="put">
+ <string>en</string>
+ <object class="com.install4j.api.beans.ExternalFile">
+ <string>./License.txt</string>
+ </object>
+ </void>
+ </void>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions />
+ <formComponents />
+ </screen>
+ <screen name="" id="1691242175" customizedId="" beanClass="com.install4j.runtime.beans.screens.InstallationDirectoryScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.InstallationDirectoryScreen" />
+ </java>
+ </serializedBean>
+ <condition>!context.getBooleanVariable("sys.confirmedUpdateInstallation")</condition>
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions />
+ <formComponents />
+ </screen>
+ <screen name="" id="1691242176" customizedId="" beanClass="com.install4j.runtime.beans.screens.ComponentsScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.ComponentsScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions />
+ <formComponents />
+ </screen>
+ <screen name="" id="1691242177" customizedId="" beanClass="com.install4j.runtime.beans.screens.StandardProgramGroupScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.StandardProgramGroupScreen">
+ <void property="programGroupName">
+ <string>${compiler:sys.fullName}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition>!context.getBooleanVariable("sys.confirmedUpdateInstallation")</condition>
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions />
+ <formComponents />
+ </screen>
+ <screen name="" id="1691242178" customizedId="" beanClass="com.install4j.runtime.beans.screens.FileAssociationsScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.FileAssociationsScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions />
+ <formComponents />
+ </screen>
+ <screen name="" id="1691242179" customizedId="" beanClass="com.install4j.runtime.beans.screens.InstallationScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="true" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.InstallationScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions>
+ <action name="" id="1691243478" customizedId="" beanClass="com.install4j.runtime.beans.actions.UninstallPreviousAction" enabled="true" commentSet="false" comment="" actionElevationType="none" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.UninstallPreviousAction" />
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="" id="1691242180" customizedId="" beanClass="com.install4j.runtime.beans.actions.InstallFilesAction" enabled="true" commentSet="false" comment="" actionElevationType="elevated" rollbackBarrier="false" multiExec="false" failureStrategy="2" errorMessage="${i18n:FileCorrupted}">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.InstallFilesAction" />
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="" id="1691242181" customizedId="" beanClass="com.install4j.runtime.beans.actions.desktop.CreateProgramGroupAction" enabled="true" commentSet="false" comment="" actionElevationType="elevated" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.desktop.CreateProgramGroupAction">
+ <void property="uninstallerMenuName">
+ <string>${i18n:UninstallerMenuEntry(${compiler:sys.fullName})}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition>!context.getBooleanVariable("sys.programGroupDisabled")</condition>
+ </action>
+ <action name="" id="1691242182" customizedId="" beanClass="com.install4j.runtime.beans.actions.desktop.RegisterAddRemoveAction" enabled="true" commentSet="false" comment="" actionElevationType="elevated" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.desktop.RegisterAddRemoveAction">
+ <void property="itemName">
+ <string>${compiler:sys.fullName} ${compiler:sys.version}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ </actions>
+ <formComponents />
+ </screen>
+ <screen name="" id="1691242744" customizedId="" beanClass="com.install4j.runtime.beans.screens.FormScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.FormScreen">
+ <void property="subTitle">
+ <string>Check for updates how often?</string>
+ </void>
+ <void property="title">
+ <string>MALT Update Scheduler</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions />
+ <formComponents>
+ <formComponent name="" id="1691242745" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.UpdateScheduleSelectorComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.UpdateScheduleSelectorComponent" />
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript />
+ </formComponent>
+ </formComponents>
+ </screen>
+ <screen name="" id="1691242944" customizedId="" beanClass="com.install4j.runtime.beans.screens.FormScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.FormScreen">
+ <void property="subTitle">
+ <string>Set maximum allowed memory usage for MALT</string>
+ </void>
+ <void property="title">
+ <string>Set MALT memory</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions>
+ <action name="" id="1691242947" customizedId="" beanClass="com.install4j.runtime.beans.actions.misc.AddVmOptionsAction" enabled="true" commentSet="false" comment="" actionElevationType="elevated" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.misc.AddVmOptionsAction">
+ <void property="launcherId">
+ <string>1691242235</string>
+ </void>
+ <void property="macosVmOptionsType">
+ <object class="java.lang.Enum" method="valueOf">
+ <class>com.install4j.runtime.beans.actions.misc.MacosVmOptionsType</class>
+ <string>OUTER</string>
+ </object>
+ </void>
+ <void property="vmOptions">
+ <array class="java.lang.String" length="1">
+ <void index="0">
+ <string>-Xmx${installer:myXmx}G</string>
+ </void>
+ </array>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="" id="1691243401" customizedId="" beanClass="com.install4j.runtime.beans.actions.misc.AddVmOptionsAction" enabled="true" commentSet="false" comment="" actionElevationType="elevated" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.misc.AddVmOptionsAction">
+ <void property="launcherId">
+ <string>1691243093</string>
+ </void>
+ <void property="macosVmOptionsType">
+ <object class="java.lang.Enum" method="valueOf">
+ <class>com.install4j.runtime.beans.actions.misc.MacosVmOptionsType</class>
+ <string>OUTER</string>
+ </object>
+ </void>
+ <void property="vmOptions">
+ <array class="java.lang.String" length="1">
+ <void index="0">
+ <string>-Xmx${installer:myXmx}G</string>
+ </void>
+ </array>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="" id="1691243399" customizedId="" beanClass="com.install4j.runtime.beans.actions.misc.AddVmOptionsAction" enabled="true" commentSet="false" comment="" actionElevationType="elevated" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.misc.AddVmOptionsAction">
+ <void property="launcherId">
+ <string>1691243204</string>
+ </void>
+ <void property="macosVmOptionsType">
+ <object class="java.lang.Enum" method="valueOf">
+ <class>com.install4j.runtime.beans.actions.misc.MacosVmOptionsType</class>
+ <string>OUTER</string>
+ </object>
+ </void>
+ <void property="vmOptions">
+ <array class="java.lang.String" length="1">
+ <void index="0">
+ <string>-Xmx${installer:myXmx}G</string>
+ </void>
+ </array>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="" id="1691243400" customizedId="" beanClass="com.install4j.runtime.beans.actions.misc.AddVmOptionsAction" enabled="true" commentSet="false" comment="" actionElevationType="elevated" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.misc.AddVmOptionsAction">
+ <void property="launcherId">
+ <string>1691243207</string>
+ </void>
+ <void property="macosVmOptionsType">
+ <object class="java.lang.Enum" method="valueOf">
+ <class>com.install4j.runtime.beans.actions.misc.MacosVmOptionsType</class>
+ <string>OUTER</string>
+ </object>
+ </void>
+ <void property="vmOptions">
+ <array class="java.lang.String" length="1">
+ <void index="0">
+ <string>-Xmx${installer:myXmx}G</string>
+ </void>
+ </array>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ </actions>
+ <formComponents>
+ <formComponent name="" id="1691242946" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.NumberSpinnerComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.NumberSpinnerComponent">
+ <void property="helpText">
+ <string>Set maximum amount of memory that MALT can use. </string>
+ </void>
+ <void property="initialValue">
+ <int>64</int>
+ </void>
+ <void property="labelText">
+ <string>Set max memory usage (in gigabytes)</string>
+ </void>
+ <void property="maxValue">
+ <int>1024</int>
+ </void>
+ <void property="minValue">
+ <int>1</int>
+ </void>
+ <void property="stepSize">
+ <int>1</int>
+ </void>
+ <void property="variableName">
+ <string>myXmx</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript />
+ </formComponent>
+ </formComponents>
+ </screen>
+ <screen name="" id="1691242183" customizedId="" beanClass="com.install4j.runtime.beans.screens.FinishedScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="true" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.FinishedScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions />
+ <formComponents />
+ </screen>
+ </screens>
+ </application>
+ <application name="" id="uninstaller" customizedId="" beanClass="com.install4j.runtime.beans.applications.UninstallerApplication" enabled="true" commentSet="false" comment="" actionElevationType="none" fileset="" customIcnsFile="" customIcoFile="" macEntitlementsFile="" automaticLauncherIntegration="false" launchMode="startupFirstWindow" launchInNewProcess="false" launchSchedule="updateSchedule" allLaunchers="true">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.applications.UninstallerApplication">
+ <void property="customMacosExecutableName">
+ <string>${i18n:UninstallerMenuEntry(${compiler:sys.fullName})}</string>
+ </void>
+ <void property="customWatermarkText">
+ <string>Daniel Huson's lab, University of Tübingen</string>
+ </void>
+ <void property="frameSizeClientArea">
+ <boolean>false</boolean>
+ </void>
+ <void property="useCustomMacosExecutableName">
+ <boolean>true</boolean>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <launcherIds />
+ <variables />
+ <startup>
+ <screen name="" id="16" customizedId="" beanClass="com.install4j.runtime.beans.screens.StartupScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.StartupScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions>
+ <link name="" id="1691242627" customizedId="" beanClass="" enabled="true" targetId="1691242625" mergedProjectId="" />
+ </actions>
+ <formComponents />
+ </screen>
+ </startup>
+ <screens>
+ <screen name="" id="1691242184" customizedId="" beanClass="com.install4j.runtime.beans.screens.UninstallWelcomeScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.UninstallWelcomeScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions />
+ <formComponents />
+ </screen>
+ <screen name="" id="1691242185" customizedId="" beanClass="com.install4j.runtime.beans.screens.UninstallationScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.UninstallationScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions>
+ <action name="" id="1691242186" customizedId="" beanClass="com.install4j.runtime.beans.actions.UninstallFilesAction" enabled="true" commentSet="false" comment="" actionElevationType="elevated" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.UninstallFilesAction" />
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ </actions>
+ <formComponents />
+ </screen>
+ <screen name="" id="1691242187" customizedId="" beanClass="com.install4j.runtime.beans.screens.UninstallFailureScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="true" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.UninstallFailureScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions />
+ <formComponents />
+ </screen>
+ <screen name="" id="1691242188" customizedId="" beanClass="com.install4j.runtime.beans.screens.UninstallSuccessScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="true" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.UninstallSuccessScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions />
+ <formComponents />
+ </screen>
+ </screens>
+ </application>
+ <application name="Updater with silent version check" id="1691242905" customizedId="" beanClass="com.install4j.runtime.beans.applications.CustomApplication" enabled="true" commentSet="false" comment="" actionElevationType="none" fileset="" customIcnsFile="${compiler:sys.install4jHome}/resource/macos/updater.icns" customIcoFile="${compiler:sys.install4jHome}/resource/updater.ico" macEntitlementsFile="" automaticLauncherIntegration="true" launchMode="startupSync" launchInNewProcess=" [...]
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.applications.CustomApplication" id="CustomApplication0">
+ <void property="customIconImageFiles">
+ <void method="add">
+ <object class="com.install4j.api.beans.ExternalFile">
+ <string>${compiler:sys.install4jHome}/resource/updater_16.png</string>
+ </object>
+ </void>
+ <void method="add">
+ <object class="com.install4j.api.beans.ExternalFile">
+ <string>${compiler:sys.install4jHome}/resource/updater_32.png</string>
+ </object>
+ </void>
+ <void method="add">
+ <object class="com.install4j.api.beans.ExternalFile">
+ <string>${compiler:sys.install4jHome}/resource/updater_48.png</string>
+ </object>
+ </void>
+ </void>
+ <void property="customWatermarkText">
+ <string>Daniel Huson's lab, University of Tübingen</string>
+ </void>
+ <void property="executableName">
+ <string>automaticUpdater</string>
+ </void>
+ <void property="useCustomIcon">
+ <boolean>true</boolean>
+ </void>
+ <void property="windowTitle">
+ <string>${i18n:updater.WindowTitle("${compiler:sys.fullName}")}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <launcherIds>
+ <launcher id="1691242235" />
+ <launcher id="1691243093" />
+ <launcher id="1691243296" />
+ <launcher id="1691243297" />
+ </launcherIds>
+ <variables />
+ <startup>
+ <screen name="" id="1691242906" customizedId="" beanClass="com.install4j.runtime.beans.screens.StartupScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.StartupScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions>
+ <action name="" id="1691242932" customizedId="" beanClass="com.install4j.runtime.beans.actions.update.CheckForUpdateAction" enabled="true" commentSet="false" comment="" actionElevationType="none" rollbackBarrier="false" multiExec="false" failureStrategy="2" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.update.CheckForUpdateAction">
+ <void property="showError">
+ <boolean>false</boolean>
+ </void>
+ <void property="url">
+ <string>${compiler:sys.updatesUrl}</string>
+ </void>
+ <void property="variable">
+ <string>updateDescriptor</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="Update descriptor entry" id="1691242933" customizedId="" beanClass="com.install4j.runtime.beans.actions.control.SetVariableAction" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" multiExec="false" failureStrategy="2" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.control.SetVariableAction">
+ <void property="failIfNull">
+ <boolean>true</boolean>
+ </void>
+ <void property="script">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>((UpdateDescriptor)context.getVariable("updateDescriptor")).getPossibleUpdateEntry()</string>
+ </void>
+ </object>
+ </void>
+ <void property="variableName">
+ <string>updateDescriptorEntry</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <group name="Update available" id="1691242934" customizedId="" beanClass="com.install4j.runtime.beans.groups.ActionGroup" enabled="true" commentSet="false" comment="" actionElevationType="inherit">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.groups.ActionGroup">
+ <void property="conditionExpression">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>context.getVariable("updateDescriptorEntry") != null</string>
+ </void>
+ </object>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <beans>
+ <action name="New version" id="1691242935" customizedId="" beanClass="com.install4j.runtime.beans.actions.control.SetVariableAction" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.control.SetVariableAction">
+ <void property="script">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>((UpdateDescriptorEntry)context.getVariable("updateDescriptorEntry")).getNewVersion()</string>
+ </void>
+ </object>
+ </void>
+ <void property="variableName">
+ <string>updaterNewVersion</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="Download size" id="1691242936" customizedId="" beanClass="com.install4j.runtime.beans.actions.control.SetVariableAction" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.control.SetVariableAction">
+ <void property="script">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>((UpdateDescriptorEntry)context.getVariable("updateDescriptorEntry")).getFileSizeVerbose()</string>
+ </void>
+ </object>
+ </void>
+ <void property="variableName">
+ <string>updaterDownloadSize</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="Comment" id="1691242937" customizedId="" beanClass="com.install4j.runtime.beans.actions.control.SetVariableAction" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.control.SetVariableAction">
+ <void property="script">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>((UpdateDescriptorEntry)context.getVariable("updateDescriptorEntry")).getComment()</string>
+ </void>
+ </object>
+ </void>
+ <void property="variableName">
+ <string>updaterComment</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="Download directory" id="1691242938" customizedId="" beanClass="com.install4j.runtime.beans.actions.control.SetVariableAction" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.control.SetVariableAction">
+ <void property="script">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>Util.getUserHome()</string>
+ </void>
+ </object>
+ </void>
+ <void property="variableName">
+ <string>updaterDownloadDir</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="Download URL" id="1691242939" customizedId="" beanClass="com.install4j.runtime.beans.actions.control.SetVariableAction" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.control.SetVariableAction">
+ <void property="script">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>((UpdateDescriptorEntry)context.getVariable("updateDescriptorEntry")).getURL().toExternalForm()</string>
+ </void>
+ </object>
+ </void>
+ <void property="variableName">
+ <string>updaterDownloadUrl</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="Archive" id="1691242940" customizedId="" beanClass="com.install4j.runtime.beans.actions.control.SetVariableAction" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.control.SetVariableAction">
+ <void property="script">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>((UpdateDescriptorEntry)context.getVariable("updateDescriptorEntry")).isArchive() ? Boolean.TRUE : Boolean.FALSE</string>
+ </void>
+ </object>
+ </void>
+ <void property="variableName">
+ <string>isArchive</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ </beans>
+ </group>
+ </actions>
+ <formComponents />
+ </screen>
+ </startup>
+ <screens>
+ <group name="Update available" id="1691242907" customizedId="" beanClass="com.install4j.runtime.beans.groups.ScreenGroup" enabled="true" commentSet="false" comment="" actionElevationType="inherit">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.groups.ScreenGroup">
+ <void property="conditionExpression">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>context.getVariable("updateDescriptorEntry") != null</string>
+ </void>
+ </object>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <beans>
+ <screen name="New version available" id="1691242908" customizedId="" beanClass="com.install4j.runtime.beans.screens.FormScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.FormScreen">
+ <void property="subTitle">
+ <string>${i18n:updater.NewVersionAvailableSubtitle("${compiler:sys.fullName}")}</string>
+ </void>
+ <void property="title">
+ <string>${i18n:updater.NewVersionAvailableTitle}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions />
+ <formComponents>
+ <formComponent name="" id="1691242909" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.KeyValuePairComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.KeyValuePairComponent">
+ <void property="labelText">
+ <string>${i18n:updater.CurrentVersionLabel}</string>
+ </void>
+ <void property="valueLabelColor">
+ <object class="java.awt.Color">
+ <int>128</int>
+ <int>0</int>
+ <int>0</int>
+ <int>255</int>
+ </object>
+ </void>
+ <void property="valueLabelFont">
+ <object class="java.awt.Font">
+ <string>dialog</string>
+ <int>1</int>
+ <int>0</int>
+ </object>
+ </void>
+ <void property="valueLabelText">
+ <string>${installer:sys.version}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript />
+ </formComponent>
+ <group name="" id="1691242910" customizedId="" beanClass="com.install4j.runtime.beans.groups.HorizontalFormComponentGroup" enabled="true" commentSet="false" comment="" actionElevationType="inherit">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.groups.HorizontalFormComponentGroup" />
+ </java>
+ </serializedBean>
+ <beans>
+ <formComponent name="" id="1691242911" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.KeyValuePairComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.KeyValuePairComponent">
+ <void property="labelText">
+ <string>${i18n:updater.NewVersionLabel}</string>
+ </void>
+ <void property="valueLabelColor">
+ <object class="java.awt.Color">
+ <int>0</int>
+ <int>128</int>
+ <int>0</int>
+ <int>255</int>
+ </object>
+ </void>
+ <void property="valueLabelFont">
+ <object class="java.awt.Font">
+ <string>dialog</string>
+ <int>1</int>
+ <int>0</int>
+ </object>
+ </void>
+ <void property="valueLabelText">
+ <string>${installer:updaterNewVersion}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript />
+ </formComponent>
+ <formComponent name="" id="1691242912" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.HyperlinkActionLabelComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="5" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.HyperlinkActionLabelComponent">
+ <void property="actionScript">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>context.goForward(1, false, false);</string>
+ </void>
+ </object>
+ </void>
+ <void property="hyperlinkText">
+ <string>${i18n:updater.ShowComments}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript> ((String)context.getVariable("updaterComment")).length() > 0</visibilityScript>
+ </formComponent>
+ </beans>
+ </group>
+ <formComponent name="" id="1691242913" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.SpacerComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.SpacerComponent" />
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript />
+ </formComponent>
+ <formComponent name="" id="1691242914" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.MultilineLabelComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.MultilineLabelComponent">
+ <void property="labelText">
+ <string>${i18n:updater.DownloadLocationLabel}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript />
+ </formComponent>
+ <formComponent name="" id="1691242915" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.DirectoryChooserComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.DirectoryChooserComponent">
+ <void property="initialFile">
+ <string>${installer:updaterDownloadDir}</string>
+ </void>
+ <void property="labelText">
+ <string>${i18n:updater.DownloadToLabel}</string>
+ </void>
+ <void property="manualEntryAllowed">
+ <boolean>false</boolean>
+ </void>
+ <void property="variableName">
+ <string>updaterDownloadLocation</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript />
+ </formComponent>
+ <formComponent name="" id="1691242916" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.KeyValuePairComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.KeyValuePairComponent">
+ <void property="labelText">
+ <string>${i18n:updater.DownloadSizeLabel}</string>
+ </void>
+ <void property="valueLabelText">
+ <string>${installer:updaterDownloadSize}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript />
+ </formComponent>
+ </formComponents>
+ </screen>
+ <screen name="Update message" id="1691242917" customizedId="" beanClass="com.install4j.runtime.beans.screens.CustomizableInfoScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.CustomizableInfoScreen">
+ <void property="displayedText">
+ <string>${installer:updaterComment}</string>
+ </void>
+ <void property="infoText">
+ <string>${i18n:updater.CommentsLabel}</string>
+ </void>
+ <void property="subTitle">
+ <string>${i18n:updater.CommentsSubTitle}</string>
+ </void>
+ <void property="textSource">
+ <object class="java.lang.Enum" method="valueOf">
+ <class>com.install4j.runtime.beans.screens.components.TextSource</class>
+ <string>DIRECT</string>
+ </object>
+ </void>
+ <void property="title">
+ <string>${i18n:updater.CommentsTitle}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition>false // This screen is only shown if the user clicks the "Show comments" hyperlink label in the previous screen.
+</condition>
+ <validation>if (context.isConsole()) {
+ context.goBackInHistory(1);
+}
+return true;</validation>
+ <preActivation>WizardContext wizardContext = context.getWizardContext();
+wizardContext.setNextButtonVisible(false);
+wizardContext.setCancelButtonVisible(false);</preActivation>
+ <postActivation />
+ <actions />
+ <formComponents />
+ </screen>
+ <screen name="Download new version" id="1691242918" customizedId="" beanClass="com.install4j.runtime.beans.screens.CustomizableProgressScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.CustomizableProgressScreen">
+ <void property="subTitle">
+ <string>${i18n:updater.DownloadSubTitle}</string>
+ </void>
+ <void property="title">
+ <string>${i18n:updater.DownloadTitle}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions>
+ <action name="Download location" id="1691242919" customizedId="" beanClass="com.install4j.runtime.beans.actions.control.SetVariableAction" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.control.SetVariableAction">
+ <void property="script">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>context.getVariable("updaterDownloadLocation") + File.separator + ((UpdateDescriptorEntry)context.getVariable("updateDescriptorEntry")).getFileName()</string>
+ </void>
+ </object>
+ </void>
+ <void property="variableName">
+ <string>updaterDownloadFile</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="" id="1691242920" customizedId="" beanClass="com.install4j.runtime.beans.actions.net.DownloadFileAction" enabled="true" commentSet="false" comment="" actionElevationType="elevated" rollbackBarrier="false" multiExec="false" failureStrategy="2" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.net.DownloadFileAction">
+ <void property="targetFile">
+ <object class="java.io.File">
+ <string>${installer:updaterDownloadFile}</string>
+ </object>
+ </void>
+ <void property="url">
+ <string>${installer:updaterDownloadUrl}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="" id="1691242921" customizedId="" beanClass="com.install4j.runtime.beans.actions.files.SetModeAction" enabled="true" commentSet="false" comment="" actionElevationType="elevated" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.files.SetModeAction">
+ <void property="files">
+ <array class="java.io.File" length="1">
+ <void index="0">
+ <object class="java.io.File">
+ <string>${installer:updaterDownloadFile}</string>
+ </object>
+ </void>
+ </array>
+ </void>
+ <void property="mode">
+ <string>755</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ </actions>
+ <formComponents />
+ </screen>
+ <screen name="Finish" id="1691242922" customizedId="" beanClass="com.install4j.runtime.beans.screens.BannerFormScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="true" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.BannerFormScreen">
+ <void property="infoText">
+ <string>${i18n:updater.FinishInfoText("${compiler:sys.fullName}")}</string>
+ </void>
+ <void property="title">
+ <string>${i18n:updater.FinishTitle}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions>
+ <group name="Execute installer" id="1691242923" customizedId="" beanClass="com.install4j.runtime.beans.groups.ActionGroup" enabled="true" commentSet="false" comment="" actionElevationType="inherit">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.groups.ActionGroup">
+ <void property="conditionExpression">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>((Integer)context.getVariable("updaterLaunchSelection")).intValue() == 0 && !context.getBooleanVariable("isArchive")</string>
+ </void>
+ </object>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <beans>
+ <action name="Set installer arguments" id="1691242924" customizedId="" beanClass="com.install4j.runtime.beans.actions.control.SetVariableAction" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.control.SetVariableAction">
+ <void property="script">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>if (context.isUnattended()) {
+ return new String[] {"-q", "-wait", "20"};
+} else if (context.isConsole()) {
+ return "-c";
+} else {
+ return "";
+}</string>
+ </void>
+ </object>
+ </void>
+ <void property="variableName">
+ <string>installerArguments</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="" id="1691242925" customizedId="" beanClass="com.install4j.runtime.beans.actions.update.ShutdownCallingLauncherAction" enabled="true" commentSet="false" comment="" actionElevationType="none" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.update.ShutdownCallingLauncherAction" />
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="" id="1691242926" customizedId="" beanClass="com.install4j.runtime.beans.actions.misc.RunExecutableAction" enabled="true" commentSet="false" comment="" actionElevationType="elevated" rollbackBarrier="false" multiExec="false" failureStrategy="2" errorMessage="${i18n:updater.LaunchError}">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.misc.RunExecutableAction">
+ <void property="arguments">
+ <array class="java.lang.String" length="1">
+ <void index="0">
+ <string>${installer:installerArguments}</string>
+ </void>
+ </array>
+ </void>
+ <void property="executable">
+ <object class="java.io.File">
+ <string>${installer:updaterDownloadFile}</string>
+ </object>
+ </void>
+ <void property="workingDirectory">
+ <object class="java.io.File">
+ <string>${installer:updaterDownloadLocation}</string>
+ </object>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ </beans>
+ </group>
+ </actions>
+ <formComponents>
+ <formComponent name="" id="1691242927" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.LabelComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.LabelComponent">
+ <void property="labelText">
+ <string>${i18n:updater.LaunchUpdaterQuestion}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript />
+ </formComponent>
+ <formComponent name="" id="1691242928" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.SpacerComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.SpacerComponent">
+ <void property="height">
+ <int>5</int>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript />
+ </formComponent>
+ <formComponent name="" id="1691242929" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.RadiobuttonsComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.RadiobuttonsComponent">
+ <void property="radioButtonLabels">
+ <array class="java.lang.String" length="2">
+ <void index="0">
+ <string>${i18n:updater.LaunchUpdaterLabel}</string>
+ </void>
+ <void index="1">
+ <string>${i18n:updater.DoNotLaunchUpdaterLabel}</string>
+ </void>
+ </array>
+ </void>
+ <void property="variableName">
+ <string>updaterLaunchSelection</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript>!context.getBooleanVariable("isArchive")</visibilityScript>
+ </formComponent>
+ <formComponent name="" id="1691242930" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.HyperlinkActionLabelComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.HyperlinkActionLabelComponent">
+ <void property="actionScript">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>Util.showPath((String)context.getVariable("updaterDownloadFile"));</string>
+ </void>
+ </object>
+ </void>
+ <void property="hyperlinkText">
+ <string>${i18n:updater.OpenContainingFolderLabel}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript>!context.isConsole()</visibilityScript>
+ </formComponent>
+ <formComponent name="" id="1691242931" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.ProgressComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.ProgressComponent">
+ <void property="detailVisible">
+ <boolean>false</boolean>
+ </void>
+ <void property="hideInitially">
+ <boolean>true</boolean>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript />
+ </formComponent>
+ </formComponents>
+ </screen>
+ </beans>
+ </group>
+ </screens>
+ </application>
+ </applications>
+ </installerGui>
+ <mediaSets>
+ <unixInstaller name="Unix Installer" id="1691242146" customizedId="" mediaFileName="" installDir="malt" overridePrincipalLanguage="true" jreBitType="all" runPostProcessor="false" postProcessor="" failOnPostProcessorError="false" useLegacyMediaFileIds="false" legacyMediaFileIds="" downloadURL="" includeAllDownloadableComponents="false" includedJRE="" manualJREEntry="false" bundleType="1" jreURL="" jreShared="false" directDownload="false" installOnlyIfNecessary="false" customInstallBas [...]
+ <excludedComponents />
+ <includedDownloadableComponents />
+ <excludedLaunchers>
+ <launcher id="1691242235" />
+ <launcher id="1691243093" />
+ </excludedLaunchers>
+ <excludedBeans />
+ <overriddenPrincipalLanguage id="en" customLocalizationFile="" />
+ <exclude />
+ <variables />
+ <autoUpdate useMinUpdatableVersion="false" minUpdatableVersion="" useMaxUpdatableVersion="false" maxUpdatableVersion="">
+ <commentFiles />
+ <customAttributes />
+ </autoUpdate>
+ <installerScript mode="1" file="">
+ <content />
+ </installerScript>
+ </unixInstaller>
+ <macosFolder name="Mac OS X Folder" id="1691242149" customizedId="" mediaFileName="" installDir="MALT" overridePrincipalLanguage="true" jreBitType="all" runPostProcessor="false" postProcessor="" failOnPostProcessorError="false" useLegacyMediaFileIds="false" legacyMediaFileIds="" downloadURL="" includeAllDownloadableComponents="false" includedJRE="macosx-amd64-1.8.0_92" manualJREEntry="false" bundleType="1" jreURL="" jreShared="false" directDownload="false" installOnlyIfNecessary="fal [...]
+ <excludedComponents />
+ <includedDownloadableComponents />
+ <excludedLaunchers />
+ <excludedBeans />
+ <overriddenPrincipalLanguage id="en" customLocalizationFile="" />
+ <exclude />
+ <variables />
+ <autoUpdate useMinUpdatableVersion="false" minUpdatableVersion="" useMaxUpdatableVersion="false" maxUpdatableVersion="">
+ <commentFiles />
+ <customAttributes />
+ </autoUpdate>
+ <topLevelFiles />
+ </macosFolder>
+ <windows name="Windows" id="1691242407" customizedId="" mediaFileName="${compiler:sys.shortName}_${compiler:sys.platform}_${compiler:sys.version}" installDir="Malt" overridePrincipalLanguage="true" jreBitType="64" runPostProcessor="false" postProcessor="" failOnPostProcessorError="false" useLegacyMediaFileIds="false" legacyMediaFileIds="" downloadURL="" includeAllDownloadableComponents="false" includedJRE="windows-amd64-1.8.0_92" manualJREEntry="false" bundleType="1" jreURL="" jreSha [...]
+ <excludedComponents />
+ <includedDownloadableComponents />
+ <excludedLaunchers />
+ <excludedBeans />
+ <overriddenPrincipalLanguage id="en" customLocalizationFile="" />
+ <exclude />
+ <variables />
+ <autoUpdate useMinUpdatableVersion="false" minUpdatableVersion="" useMaxUpdatableVersion="false" maxUpdatableVersion="">
+ <commentFiles />
+ <customAttributes />
+ </autoUpdate>
+ </windows>
+ </mediaSets>
+ <buildIds buildAll="false">
+ <mediaSet refId="1691242149" />
+ </buildIds>
+ <buildOptions verbose="false" faster="false" disableSigning="false" disableJreBundling="false" debug="false" />
+</install4j>
diff --git a/installer/malt2.install4j b/installer/malt2.install4j
new file mode 100644
index 0000000..7114ec1
--- /dev/null
+++ b/installer/malt2.install4j
@@ -0,0 +1,1566 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<install4j version="5.1.14" transformSequenceNumber="4">
+ <directoryPresets config="../resources/icons/malt-run48.png" />
+ <application name="MALT" distributionSourceDir="" applicationId="3229-5251-7410-5330" mediaDir="../../../builds" mediaFilePattern="${compiler:sys.shortName}_${compiler:sys.platform}_${compiler:sys.version}" compression="6" lzmaCompression="false" pack200Compression="false" excludeSignedFromPacking="true" commonExternalFiles="false" createMd5Sums="true" shrinkRuntime="true" shortName="MALT" publisher="Daniel Huson's Lab, University of Tuebingen" publisherWeb="www-ab.informatik.uni-tuebi [...]
+ <languages skipLanguageSelection="false" languageSelectionInPrincipalLanguage="false">
+ <principalLanguage id="en" customLocalizationFile="" />
+ <additionalLanguages />
+ </languages>
+ <searchSequence>
+ <registry />
+ <envVar name="JAVA_HOME" />
+ <envVar name="JDK_HOME" />
+ </searchSequence>
+ <variables>
+ <variable name="variable" value="" description="" category="" />
+ </variables>
+ <mergedProjects />
+ <codeSigning macEnabled="false" macPkcs12File="" windowsEnabled="false" windowsKeySource="pvkAndSpc" windowsPvkFile="" windowsSpcFile="" windowsPkcs12File="" />
+ </application>
+ <files keepModificationTimes="false" missingFilesStrategy="warn" globalExcludeSuffixes=".svn,.CVS,*.psd,*.java" defaultOverwriteMode="4" defaultUninstallMode="0" launcherOverwriteMode="3" defaultFileMode="644" defaultDirMode="755">
+ <filesets />
+ <roots />
+ <mountPoints>
+ <mountPoint id="1691242408" root="" location="class" mode="755" />
+ <mountPoint id="1691242409" root="" location="class/resources" mode="755" />
+ <mountPoint id="1691242410" root="" location="class/resources/files" mode="755" />
+ <mountPoint id="1691243191" root="" location="class/resources/icons" mode="755" />
+ <mountPoint id="1691242151" root="" location="jars" mode="755" />
+ <mountPoint id="1691242160" root="" location="" mode="755" />
+ </mountPoints>
+ <entries>
+ <fileEntry mountPoint="1691242410" file="../resources/files/megan5PublicKey.txt" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="false" overrideOverwriteMode="false" overrideUninstallMode="false" />
+ <fileEntry mountPoint="1691243191" file="../resources/icons/malt-build.icns" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="false" overrideOverwriteMode="false" overrideUninstallMode="false" />
+ <fileEntry mountPoint="1691243191" file="../resources/icons/malt-build16.png" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="false" overrideOverwriteMode="false" overrideUninstallMode="false" />
+ <fileEntry mountPoint="1691243191" file="../resources/icons/malt-build32.png" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="false" overrideOverwriteMode="false" overrideUninstallMode="false" />
+ <fileEntry mountPoint="1691243191" file="../resources/icons/malt-build48.png" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="false" overrideOverwriteMode="false" overrideUninstallMode="false" />
+ <fileEntry mountPoint="1691243191" file="../resources/icons/malt-run.icns" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="false" overrideOverwriteMode="false" overrideUninstallMode="false" />
+ <fileEntry mountPoint="1691243191" file="../resources/icons/malt-run16.png" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="false" overrideOverwriteMode="false" overrideUninstallMode="false" />
+ <fileEntry mountPoint="1691243191" file="../resources/icons/malt-run32.png" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="false" overrideOverwriteMode="false" overrideUninstallMode="false" />
+ <fileEntry mountPoint="1691243191" file="../resources/icons/malt-run48.png" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="false" overrideOverwriteMode="false" overrideUninstallMode="false" />
+ <fileEntry mountPoint="1691242408" file="../resources/log4j.properties" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="true" overrideOverwriteMode="true" overrideUninstallMode="true" />
+ <fileEntry mountPoint="1691242151" file="../jars/picard-1.105.jar" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="false" overrideOverwriteMode="false" overrideUninstallMode="false" />
+ <fileEntry mountPoint="1691242151" file="../jars/picard-license.txt" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="false" overrideOverwriteMode="false" overrideUninstallMode="false" />
+ <fileEntry mountPoint="1691242151" file="../antbuild/MALT.jar" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="false" overrideOverwriteMode="false" overrideUninstallMode="false" />
+ <fileEntry mountPoint="1691242160" file="./License.txt" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="true" overrideOverwriteMode="true" overrideUninstallMode="true" />
+ <fileEntry mountPoint="1691242160" file="../tex/manual/manual.pdf" overwriteMode="4" shared="false" fileMode="644" uninstallMode="0" overrideFileMode="true" overrideOverwriteMode="true" overrideUninstallMode="true" />
+ </entries>
+ <components>
+ <component name="Manual" id="1691242193" customizedId="" displayDescription="false" hideHelpButton="false" selected="true" changeable="true" downloadable="false" hidden="false">
+ <description />
+ <include all="false">
+ <entry location="License.txt" fileType="regular" />
+ <entry location="manual.pdf" fileType="regular" />
+ </include>
+ <dependencies />
+ </component>
+ <component name="malt-build" id="1691243097" customizedId="" displayDescription="false" hideHelpButton="false" selected="true" changeable="true" downloadable="false" hidden="false">
+ <description />
+ <include all="false">
+ <entry location="class" fileType="regular" />
+ <entry location="jars" fileType="regular" />
+ <entry location="License.txt" fileType="regular" />
+ <entry location="malt-build-gui" fileType="launcher" />
+ <entry location="malt-build" fileType="launcher" />
+ </include>
+ <dependencies />
+ </component>
+ <component name="malt-run" id="1691243098" customizedId="" displayDescription="false" hideHelpButton="false" selected="true" changeable="true" downloadable="false" hidden="false">
+ <description />
+ <include all="false">
+ <entry location="class" fileType="regular" />
+ <entry location="jars" fileType="regular" />
+ <entry location="License.txt" fileType="regular" />
+ <entry location="malt-run-gui" fileType="launcher" />
+ <entry location="malt-run" fileType="launcher" />
+ </include>
+ <dependencies />
+ </component>
+ <component name="malt2-build" id="1691243304" customizedId="" displayDescription="false" hideHelpButton="false" selected="true" changeable="true" downloadable="false" hidden="false">
+ <description />
+ <include all="false">
+ <entry location="class" fileType="regular" />
+ <entry location="jars" fileType="regular" />
+ <entry location="License.txt" fileType="regular" />
+ <entry location="malt2-build-gui" fileType="launcher" />
+ <entry location="malt2-build" fileType="launcher" />
+ </include>
+ <dependencies />
+ </component>
+ <component name="malt2-run" id="1691243305" customizedId="" displayDescription="false" hideHelpButton="false" selected="true" changeable="true" downloadable="false" hidden="false">
+ <description />
+ <include all="false">
+ <entry location="class" fileType="regular" />
+ <entry location="jars" fileType="regular" />
+ <entry location="License.txt" fileType="regular" />
+ <entry location="malt2-run-gui" fileType="launcher" />
+ <entry location="malt2-run" fileType="launcher" />
+ </include>
+ <dependencies />
+ </component>
+ </components>
+ </files>
+ <launchers>
+ <launcher name="malt-build-gui" id="1691242235" customizedId="" external="false" excludeFromMenu="false" unixMode="755" menuName="" icnsFile="../resources/icons/malt-build.icns" customMacBundleIdentifier="false" macBundleIdentifier="" swtApp="false" fileset="" macBundleBinary="JavaApplicationStub" addMacEntitlements="false" macEntitlementsFile="">
+ <executable name="malt-build-gui" type="1" iconSet="true" iconFile="" executableDir="" redirectStderr="true" stderrFile="malt-build.log" stderrMode="overwrite" redirectStdout="true" stdoutFile="malt-build.log" stdoutMode="overwrite" failOnStderrOutput="true" executableMode="1" changeWorkingDirectory="false" workingDirectory="." singleInstance="false" serviceStartType="2" serviceDependencies="" serviceDescription="" jreLocation="" executionLevel="asInvoker" checkConsoleParameter="fa [...]
+ <versionInfo include="false" fileVersion="" fileDescription="" legalCopyright="" internalName="" productName="" />
+ </executable>
+ <splashScreen show="false" autoOff="true" alwaysOnTop="true" width="450" height="300" bitmapFile="" java6SplashScreen="false">
+ <text>
+ <statusLine x="383" y="15" text="" font="Arial" fontSize="8" fontColor="0,0,0" fontWeight="500" />
+ <versionLine x="17" y="10" text="MALT ${compiler:sys.version}" font="Arial" fontSize="12" fontColor="0,0,0" fontWeight="500" />
+ </text>
+ </splashScreen>
+ <java mainClass="malt.MaltBuild" vmParameters="-server -Dapple.laf.useScreenMenuBar=true -Duser.language=en -Duser.region=US -XX:NewRatio=2" arguments="--argsGui" allowVMPassthroughParameters="true" preferredVM="server" bundleRuntime="true">
+ <classPath>
+ <directory location="class" failOnError="false" />
+ <scanDirectory location="jars" failOnError="false" />
+ </classPath>
+ <nativeLibraryDirectories />
+ </java>
+ <includedFiles />
+ <unextractableFiles />
+ <vmOptionsFile mode="template" overwriteMode="1" fileMode="644">
+ <content>-XX:NewRatio=2
+-Xmx${installer:myXmx}</content>
+ </vmOptionsFile>
+ <customScript mode="1" file="">
+ <content />
+ </customScript>
+ <infoPlist mode="1" file="">
+ <content />
+ </infoPlist>
+ <iconImageFiles>
+ <file path="../resources/icons/malt-build16.png" />
+ <file path="../resources/icons/malt-build32.png" />
+ <file path="../resources/icons/malt-build48.png" />
+ </iconImageFiles>
+ </launcher>
+ <launcher name="malt-run-gui" id="1691243093" customizedId="" external="false" excludeFromMenu="false" unixMode="755" menuName="" icnsFile="../resources/icons/malt-run.icns" customMacBundleIdentifier="false" macBundleIdentifier="" swtApp="false" fileset="" macBundleBinary="JavaApplicationStub" addMacEntitlements="false" macEntitlementsFile="">
+ <executable name="malt-run-gui" type="1" iconSet="true" iconFile="" executableDir="" redirectStderr="true" stderrFile="malt-run.log" stderrMode="overwrite" redirectStdout="true" stdoutFile="malt-run.log" stdoutMode="overwrite" failOnStderrOutput="true" executableMode="1" changeWorkingDirectory="false" workingDirectory="." singleInstance="false" serviceStartType="2" serviceDependencies="" serviceDescription="" jreLocation="" executionLevel="asInvoker" checkConsoleParameter="false" g [...]
+ <versionInfo include="false" fileVersion="" fileDescription="" legalCopyright="" internalName="" productName="" />
+ </executable>
+ <splashScreen show="false" autoOff="true" alwaysOnTop="true" width="0" height="0" bitmapFile="" java6SplashScreen="false">
+ <text>
+ <statusLine x="20" y="20" text="" font="Arial" fontSize="8" fontColor="0,0,0" fontWeight="500" />
+ <versionLine x="20" y="40" text="version ${compiler:sys.version}" font="Arial" fontSize="8" fontColor="0,0,0" fontWeight="500" />
+ </text>
+ </splashScreen>
+ <java mainClass="malt.MaltRun" vmParameters="-server -Dapple.laf.useScreenMenuBar=true -Duser.language=en -Duser.region=US -XX:NewRatio=2" arguments="--argsGui" allowVMPassthroughParameters="true" preferredVM="" bundleRuntime="true">
+ <classPath>
+ <directory location="class" failOnError="false" />
+ <scanDirectory location="jars" failOnError="false" />
+ </classPath>
+ <nativeLibraryDirectories />
+ </java>
+ <includedFiles />
+ <unextractableFiles />
+ <vmOptionsFile mode="template" overwriteMode="0" fileMode="644">
+ <content>-XX:NewRatio=2
+-Xmx${installer:myXmx}</content>
+ </vmOptionsFile>
+ <customScript mode="1" file="">
+ <content />
+ </customScript>
+ <infoPlist mode="1" file="">
+ <content />
+ </infoPlist>
+ <iconImageFiles>
+ <file path="../resources/icons/malt-run16.png" />
+ <file path="../resources/icons/malt-run32.png" />
+ <file path="../resources/icons/malt-run48.png" />
+ </iconImageFiles>
+ </launcher>
+ <launcher name="malt-build" id="1691243204" customizedId="" external="false" excludeFromMenu="false" unixMode="755" menuName="" icnsFile="../resources/icons/malt-build.icns" customMacBundleIdentifier="false" macBundleIdentifier="" swtApp="false" fileset="" macBundleBinary="JavaApplicationStub" addMacEntitlements="false" macEntitlementsFile="">
+ <executable name="malt-build" type="1" iconSet="true" iconFile="" executableDir="" redirectStderr="false" stderrFile="error.log" stderrMode="overwrite" redirectStdout="false" stdoutFile="output.log" stdoutMode="overwrite" failOnStderrOutput="true" executableMode="2" changeWorkingDirectory="false" workingDirectory="." singleInstance="false" serviceStartType="2" serviceDependencies="" serviceDescription="" jreLocation="" executionLevel="asInvoker" checkConsoleParameter="false" global [...]
+ <versionInfo include="false" fileVersion="" fileDescription="" legalCopyright="" internalName="" productName="" />
+ </executable>
+ <splashScreen show="false" autoOff="true" alwaysOnTop="true" width="450" height="300" bitmapFile="" java6SplashScreen="false">
+ <text>
+ <statusLine x="383" y="15" text="" font="Arial" fontSize="8" fontColor="0,0,0" fontWeight="500" />
+ <versionLine x="17" y="10" text="MALT ${compiler:sys.version}" font="Arial" fontSize="12" fontColor="0,0,0" fontWeight="500" />
+ </text>
+ </splashScreen>
+ <java mainClass="malt.MaltBuild" vmParameters="-server -Dapple.laf.useScreenMenuBar=true -Duser.language=en -Duser.region=US -XX:NewRatio=2" arguments="" allowVMPassthroughParameters="true" preferredVM="server" bundleRuntime="true">
+ <classPath>
+ <directory location="class" failOnError="false" />
+ <scanDirectory location="jars" failOnError="false" />
+ </classPath>
+ <nativeLibraryDirectories />
+ </java>
+ <includedFiles />
+ <unextractableFiles />
+ <vmOptionsFile mode="template" overwriteMode="1" fileMode="644">
+ <content>-XX:NewRatio=2
+-Xmx${installer:myXmx}</content>
+ </vmOptionsFile>
+ <customScript mode="1" file="">
+ <content />
+ </customScript>
+ <infoPlist mode="1" file="">
+ <content />
+ </infoPlist>
+ <iconImageFiles>
+ <file path="../resources/icons/malt-build16.png" />
+ <file path="../resources/icons/malt-build32.png" />
+ <file path="../resources/icons/malt-build48.png" />
+ </iconImageFiles>
+ </launcher>
+ <launcher name="malt-run" id="1691243207" customizedId="" external="false" excludeFromMenu="false" unixMode="755" menuName="" icnsFile="../resources/icons/malt-run.icns" customMacBundleIdentifier="false" macBundleIdentifier="" swtApp="false" fileset="" macBundleBinary="JavaApplicationStub" addMacEntitlements="false" macEntitlementsFile="">
+ <executable name="malt-run" type="1" iconSet="true" iconFile="" executableDir="" redirectStderr="false" stderrFile="error.log" stderrMode="overwrite" redirectStdout="false" stdoutFile="output.log" stdoutMode="overwrite" failOnStderrOutput="true" executableMode="2" changeWorkingDirectory="false" workingDirectory="." singleInstance="false" serviceStartType="2" serviceDependencies="" serviceDescription="" jreLocation="" executionLevel="asInvoker" checkConsoleParameter="false" globalSi [...]
+ <versionInfo include="false" fileVersion="" fileDescription="" legalCopyright="" internalName="" productName="" />
+ </executable>
+ <splashScreen show="false" autoOff="true" alwaysOnTop="true" width="0" height="0" bitmapFile="" java6SplashScreen="false">
+ <text>
+ <statusLine x="20" y="20" text="" font="Arial" fontSize="8" fontColor="0,0,0" fontWeight="500" />
+ <versionLine x="20" y="40" text="version ${compiler:sys.version}" font="Arial" fontSize="8" fontColor="0,0,0" fontWeight="500" />
+ </text>
+ </splashScreen>
+ <java mainClass="malt.MaltRun" vmParameters="-server -Dapple.laf.useScreenMenuBar=true -Duser.language=en -Duser.region=US -XX:NewRatio=2" arguments="" allowVMPassthroughParameters="true" preferredVM="" bundleRuntime="true">
+ <classPath>
+ <directory location="class" failOnError="false" />
+ <scanDirectory location="jars" failOnError="false" />
+ </classPath>
+ <nativeLibraryDirectories />
+ </java>
+ <includedFiles />
+ <unextractableFiles />
+ <vmOptionsFile mode="template" overwriteMode="0" fileMode="644">
+ <content>-XX:NewRatio=2
+-Xmx${installer:myXmx}</content>
+ </vmOptionsFile>
+ <customScript mode="1" file="">
+ <content />
+ </customScript>
+ <infoPlist mode="1" file="">
+ <content />
+ </infoPlist>
+ <iconImageFiles>
+ <file path="../resources/icons/malt-run16.png" />
+ <file path="../resources/icons/malt-run32.png" />
+ <file path="../resources/icons/malt-run48.png" />
+ </iconImageFiles>
+ </launcher>
+ <launcher name="malt2-build-gui" id="1691243296" customizedId="" external="false" excludeFromMenu="false" unixMode="755" menuName="" icnsFile="../resources/icons/malt-build.icns" customMacBundleIdentifier="false" macBundleIdentifier="" swtApp="false" fileset="" macBundleBinary="JavaApplicationStub" addMacEntitlements="false" macEntitlementsFile="">
+ <executable name="malt2-build-gui" type="1" iconSet="true" iconFile="" executableDir="" redirectStderr="true" stderrFile="malt-build.log" stderrMode="overwrite" redirectStdout="true" stdoutFile="malt-build.log" stdoutMode="overwrite" failOnStderrOutput="true" executableMode="1" changeWorkingDirectory="false" workingDirectory="." singleInstance="false" serviceStartType="2" serviceDependencies="" serviceDescription="" jreLocation="" executionLevel="asInvoker" checkConsoleParameter="f [...]
+ <versionInfo include="false" fileVersion="" fileDescription="" legalCopyright="" internalName="" productName="" />
+ </executable>
+ <splashScreen show="false" autoOff="true" alwaysOnTop="true" width="450" height="300" bitmapFile="" java6SplashScreen="false">
+ <text>
+ <statusLine x="383" y="15" text="" font="Arial" fontSize="8" fontColor="0,0,0" fontWeight="500" />
+ <versionLine x="17" y="10" text="MALT ${compiler:sys.version}" font="Arial" fontSize="12" fontColor="0,0,0" fontWeight="500" />
+ </text>
+ </splashScreen>
+ <java mainClass="malt.malt2.MaltBuild2" vmParameters="-server -Dapple.laf.useScreenMenuBar=true -Duser.language=en -Duser.region=US" arguments="--argsGui" allowVMPassthroughParameters="true" preferredVM="server" bundleRuntime="true">
+ <classPath>
+ <directory location="class" failOnError="false" />
+ <scanDirectory location="jars" failOnError="false" />
+ </classPath>
+ <nativeLibraryDirectories />
+ </java>
+ <includedFiles />
+ <unextractableFiles />
+ <vmOptionsFile mode="template" overwriteMode="1" fileMode="644">
+ <content>-XX:NewRatio=2
+-Xmx${installer:myXmx}</content>
+ </vmOptionsFile>
+ <customScript mode="1" file="">
+ <content />
+ </customScript>
+ <infoPlist mode="1" file="">
+ <content />
+ </infoPlist>
+ <iconImageFiles>
+ <file path="../resources/icons/malt-build16.png" />
+ <file path="../resources/icons/malt-build32.png" />
+ <file path="../resources/icons/malt-build48.png" />
+ </iconImageFiles>
+ </launcher>
+ <launcher name="malt2-run-gui" id="1691243297" customizedId="" external="false" excludeFromMenu="false" unixMode="755" menuName="" icnsFile="../resources/icons/malt-run.icns" customMacBundleIdentifier="false" macBundleIdentifier="" swtApp="false" fileset="" macBundleBinary="JavaApplicationStub" addMacEntitlements="false" macEntitlementsFile="">
+ <executable name="malt2-run-gui" type="1" iconSet="true" iconFile="" executableDir="" redirectStderr="true" stderrFile="malt-run.log" stderrMode="overwrite" redirectStdout="true" stdoutFile="malt-run.log" stdoutMode="overwrite" failOnStderrOutput="true" executableMode="1" changeWorkingDirectory="false" workingDirectory="." singleInstance="false" serviceStartType="2" serviceDependencies="" serviceDescription="" jreLocation="" executionLevel="asInvoker" checkConsoleParameter="false" [...]
+ <versionInfo include="false" fileVersion="" fileDescription="" legalCopyright="" internalName="" productName="" />
+ </executable>
+ <splashScreen show="false" autoOff="true" alwaysOnTop="true" width="0" height="0" bitmapFile="" java6SplashScreen="false">
+ <text>
+ <statusLine x="20" y="20" text="" font="Arial" fontSize="8" fontColor="0,0,0" fontWeight="500" />
+ <versionLine x="20" y="40" text="version ${compiler:sys.version}" font="Arial" fontSize="8" fontColor="0,0,0" fontWeight="500" />
+ </text>
+ </splashScreen>
+ <java mainClass="malt.malt2.MaltRun2" vmParameters="-server -Dapple.laf.useScreenMenuBar=true -Duser.language=en -Duser.region=US" arguments="--argsGui" allowVMPassthroughParameters="true" preferredVM="" bundleRuntime="true">
+ <classPath>
+ <directory location="class" failOnError="false" />
+ <scanDirectory location="jars" failOnError="false" />
+ </classPath>
+ <nativeLibraryDirectories />
+ </java>
+ <includedFiles />
+ <unextractableFiles />
+ <vmOptionsFile mode="template" overwriteMode="0" fileMode="644">
+ <content>-XX:NewRatio=2
+-Xmx${installer:myXmx}</content>
+ </vmOptionsFile>
+ <customScript mode="1" file="">
+ <content />
+ </customScript>
+ <infoPlist mode="1" file="">
+ <content />
+ </infoPlist>
+ <iconImageFiles>
+ <file path="../resources/icons/malt-run16.png" />
+ <file path="../resources/icons/malt-run32.png" />
+ <file path="../resources/icons/malt-run48.png" />
+ </iconImageFiles>
+ </launcher>
+ <launcher name="malt2-build" id="1691243298" customizedId="" external="false" excludeFromMenu="false" unixMode="755" menuName="" icnsFile="../resources/icons/malt-build.icns" customMacBundleIdentifier="false" macBundleIdentifier="" swtApp="false" fileset="" macBundleBinary="JavaApplicationStub" addMacEntitlements="false" macEntitlementsFile="">
+ <executable name="malt2-build" type="1" iconSet="true" iconFile="" executableDir="" redirectStderr="false" stderrFile="error.log" stderrMode="overwrite" redirectStdout="false" stdoutFile="output.log" stdoutMode="overwrite" failOnStderrOutput="true" executableMode="2" changeWorkingDirectory="false" workingDirectory="." singleInstance="false" serviceStartType="2" serviceDependencies="" serviceDescription="" jreLocation="" executionLevel="asInvoker" checkConsoleParameter="false" globa [...]
+ <versionInfo include="false" fileVersion="" fileDescription="" legalCopyright="" internalName="" productName="" />
+ </executable>
+ <splashScreen show="false" autoOff="true" alwaysOnTop="true" width="450" height="300" bitmapFile="" java6SplashScreen="false">
+ <text>
+ <statusLine x="383" y="15" text="" font="Arial" fontSize="8" fontColor="0,0,0" fontWeight="500" />
+ <versionLine x="17" y="10" text="MALT ${compiler:sys.version}" font="Arial" fontSize="12" fontColor="0,0,0" fontWeight="500" />
+ </text>
+ </splashScreen>
+ <java mainClass="malt.malt2.MaltBuild2" vmParameters="-server -Dapple.laf.useScreenMenuBar=true -Duser.language=en -Duser.region=US" arguments="" allowVMPassthroughParameters="true" preferredVM="server" bundleRuntime="true">
+ <classPath>
+ <directory location="class" failOnError="false" />
+ <scanDirectory location="jars" failOnError="false" />
+ </classPath>
+ <nativeLibraryDirectories />
+ </java>
+ <includedFiles />
+ <unextractableFiles />
+ <vmOptionsFile mode="template" overwriteMode="1" fileMode="644">
+ <content>-XX:NewRatio=2
+-Xmx${installer:myXmx}</content>
+ </vmOptionsFile>
+ <customScript mode="1" file="">
+ <content />
+ </customScript>
+ <infoPlist mode="1" file="">
+ <content />
+ </infoPlist>
+ <iconImageFiles>
+ <file path="../resources/icons/malt-build16.png" />
+ <file path="../resources/icons/malt-build32.png" />
+ <file path="../resources/icons/malt-build48.png" />
+ </iconImageFiles>
+ </launcher>
+ <launcher name="malt2-run" id="1691243299" customizedId="" external="false" excludeFromMenu="false" unixMode="755" menuName="" icnsFile="../resources/icons/malt-run.icns" customMacBundleIdentifier="false" macBundleIdentifier="" swtApp="false" fileset="" macBundleBinary="JavaApplicationStub" addMacEntitlements="false" macEntitlementsFile="">
+ <executable name="malt2-run" type="1" iconSet="true" iconFile="" executableDir="" redirectStderr="false" stderrFile="error.log" stderrMode="overwrite" redirectStdout="false" stdoutFile="output.log" stdoutMode="overwrite" failOnStderrOutput="true" executableMode="2" changeWorkingDirectory="false" workingDirectory="." singleInstance="false" serviceStartType="2" serviceDependencies="" serviceDescription="" jreLocation="" executionLevel="asInvoker" checkConsoleParameter="false" globalS [...]
+ <versionInfo include="false" fileVersion="" fileDescription="" legalCopyright="" internalName="" productName="" />
+ </executable>
+ <splashScreen show="false" autoOff="true" alwaysOnTop="true" width="0" height="0" bitmapFile="" java6SplashScreen="false">
+ <text>
+ <statusLine x="20" y="20" text="" font="Arial" fontSize="8" fontColor="0,0,0" fontWeight="500" />
+ <versionLine x="20" y="40" text="version ${compiler:sys.version}" font="Arial" fontSize="8" fontColor="0,0,0" fontWeight="500" />
+ </text>
+ </splashScreen>
+ <java mainClass="malt.malt2.MaltRun2" vmParameters="-server -Dapple.laf.useScreenMenuBar=true -Duser.language=en -Duser.region=US" arguments="" allowVMPassthroughParameters="true" preferredVM="" bundleRuntime="true">
+ <classPath>
+ <directory location="class" failOnError="false" />
+ <scanDirectory location="jars" failOnError="false" />
+ </classPath>
+ <nativeLibraryDirectories />
+ </java>
+ <includedFiles />
+ <unextractableFiles />
+ <vmOptionsFile mode="template" overwriteMode="0" fileMode="644">
+ <content>-XX:NewRatio=2
+-Xmx${installer:myXmx}</content>
+ </vmOptionsFile>
+ <customScript mode="1" file="">
+ <content />
+ </customScript>
+ <infoPlist mode="1" file="">
+ <content />
+ </infoPlist>
+ <iconImageFiles>
+ <file path="../resources/icons/malt-run16.png" />
+ <file path="../resources/icons/malt-run32.png" />
+ <file path="../resources/icons/malt-run48.png" />
+ </iconImageFiles>
+ </launcher>
+ </launchers>
+ <installerGui installerType="1" addOnAppId="" suggestPreviousLocations="true" autoUpdateDescriptorUrl="http://www-ab.informatik.uni-tuebingen.de/data/software/malt/download/updates.xml" useAutoUpdateBaseUrl="false" autoUpdateBaseUrl="">
+ <customCode />
+ <autoUpdate useMinUpdatableVersion="true" minUpdatableVersion="0" useMaxUpdatableVersion="false" maxUpdatableVersion="">
+ <commentFiles />
+ <customAttributes />
+ </autoUpdate>
+ <applications>
+ <application name="" id="installer" customizedId="" beanClass="com.install4j.runtime.beans.applications.InstallerApplication" enabled="true" commentSet="false" comment="" actionElevationType="none" fileset="" customIcnsFile="" customIcoFile="" macEntitlementsFile="" automaticLauncherIntegration="false" launchMode="startupFirstWindow" launchInNewProcess="false" launchSchedule="updateSchedule" allLaunchers="true">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.applications.InstallerApplication">
+ <void property="customWatermarkText">
+ <string>Daniel Huson's lab, University of Tübingen</string>
+ </void>
+ <void property="frameSizeClientArea">
+ <boolean>false</boolean>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <launcherIds />
+ <variables />
+ <startup>
+ <screen name="" id="15" customizedId="" beanClass="com.install4j.runtime.beans.screens.StartupScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.StartupScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions>
+ <action name="" id="1691242625" customizedId="" beanClass="com.install4j.runtime.beans.actions.misc.RequestPrivilegesAction" enabled="true" commentSet="false" comment="" actionElevationType="none" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.misc.RequestPrivilegesAction" />
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ </actions>
+ <formComponents />
+ </screen>
+ </startup>
+ <screens>
+ <screen name="" id="1691242173" customizedId="" beanClass="com.install4j.runtime.beans.screens.WelcomeScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.WelcomeScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions>
+ <action name="" id="1691242626" customizedId="" beanClass="com.install4j.runtime.beans.actions.misc.LoadResponseFileAction" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" multiExec="true" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.misc.LoadResponseFileAction" />
+ </java>
+ </serializedBean>
+ <condition>context.getBooleanVariable("sys.confirmedUpdateInstallation")</condition>
+ </action>
+ </actions>
+ <formComponents />
+ </screen>
+ <screen name="" id="1691242174" customizedId="" beanClass="com.install4j.runtime.beans.screens.LicenseScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.LicenseScreen">
+ <void property="displayedTextFile">
+ <void property="languageIdToExternalFile">
+ <void method="put">
+ <string>en</string>
+ <object class="com.install4j.api.beans.ExternalFile">
+ <string>./License.txt</string>
+ </object>
+ </void>
+ </void>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions />
+ <formComponents />
+ </screen>
+ <screen name="" id="1691242175" customizedId="" beanClass="com.install4j.runtime.beans.screens.InstallationDirectoryScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.InstallationDirectoryScreen" />
+ </java>
+ </serializedBean>
+ <condition>!context.getBooleanVariable("sys.confirmedUpdateInstallation")</condition>
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions />
+ <formComponents />
+ </screen>
+ <screen name="" id="1691242176" customizedId="" beanClass="com.install4j.runtime.beans.screens.ComponentsScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.ComponentsScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions />
+ <formComponents />
+ </screen>
+ <screen name="" id="1691242177" customizedId="" beanClass="com.install4j.runtime.beans.screens.StandardProgramGroupScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.StandardProgramGroupScreen">
+ <void property="programGroupName">
+ <string>${compiler:sys.fullName}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition>!context.getBooleanVariable("sys.confirmedUpdateInstallation")</condition>
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions />
+ <formComponents />
+ </screen>
+ <screen name="" id="1691242178" customizedId="" beanClass="com.install4j.runtime.beans.screens.FileAssociationsScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.FileAssociationsScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions />
+ <formComponents />
+ </screen>
+ <screen name="" id="1691242179" customizedId="" beanClass="com.install4j.runtime.beans.screens.InstallationScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="true" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.InstallationScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions>
+ <action name="" id="1691242180" customizedId="" beanClass="com.install4j.runtime.beans.actions.InstallFilesAction" enabled="true" commentSet="false" comment="" actionElevationType="elevated" rollbackBarrier="false" multiExec="false" failureStrategy="2" errorMessage="${i18n:FileCorrupted}">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.InstallFilesAction" />
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="" id="1691242181" customizedId="" beanClass="com.install4j.runtime.beans.actions.desktop.CreateProgramGroupAction" enabled="true" commentSet="false" comment="" actionElevationType="elevated" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.desktop.CreateProgramGroupAction">
+ <void property="uninstallerMenuName">
+ <string>${i18n:UninstallerMenuEntry(${compiler:sys.fullName})}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition>!context.getBooleanVariable("sys.programGroupDisabled")</condition>
+ </action>
+ <action name="" id="1691242182" customizedId="" beanClass="com.install4j.runtime.beans.actions.desktop.RegisterAddRemoveAction" enabled="true" commentSet="false" comment="" actionElevationType="elevated" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.desktop.RegisterAddRemoveAction">
+ <void property="itemName">
+ <string>${compiler:sys.fullName} ${compiler:sys.version}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ </actions>
+ <formComponents />
+ </screen>
+ <screen name="" id="1691242744" customizedId="" beanClass="com.install4j.runtime.beans.screens.FormScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.FormScreen">
+ <void property="subTitle">
+ <string>Check for updates how often?</string>
+ </void>
+ <void property="title">
+ <string>MALT Update Scheduler</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions />
+ <formComponents>
+ <formComponent name="" id="1691242745" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.UpdateScheduleSelectorComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.UpdateScheduleSelectorComponent" />
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript />
+ </formComponent>
+ </formComponents>
+ </screen>
+ <screen name="" id="1691242944" customizedId="" beanClass="com.install4j.runtime.beans.screens.FormScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.FormScreen">
+ <void property="subTitle">
+ <string>Set maximum allowed memory usage for MALT</string>
+ </void>
+ <void property="title">
+ <string>Set MALT memory</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition> !context.getMediaName().toLowerCase().contains("windows") || context.getMediaName().toLowerCase().contains("x64") || context.getMediaName().toLowerCase().contains("64bits") </condition>
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions>
+ <action name="" id="1691242947" customizedId="" beanClass="com.install4j.runtime.beans.actions.misc.AddVmOptionsAction" enabled="true" commentSet="false" comment="" actionElevationType="elevated" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.misc.AddVmOptionsAction">
+ <void property="launcherId">
+ <string>1691242235</string>
+ </void>
+ <void property="vmOptions">
+ <array class="java.lang.String" length="1">
+ <void index="0">
+ <string>-Xmx${installer:myXmx}M</string>
+ </void>
+ </array>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ </actions>
+ <formComponents>
+ <formComponent name="" id="1691242946" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.NumberSpinnerComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.NumberSpinnerComponent">
+ <void property="helpText">
+ <string>Set maximum amount of memory that MALT can use. </string>
+ </void>
+ <void property="initialValue">
+ <int>64000</int>
+ </void>
+ <void property="labelText">
+ <string>Set max memory usage (in megabytes)</string>
+ </void>
+ <void property="maxValue">
+ <int>512000</int>
+ </void>
+ <void property="minValue">
+ <int>4000</int>
+ </void>
+ <void property="stepSize">
+ <int>1000</int>
+ </void>
+ <void property="variableName">
+ <string>myXmx</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript />
+ </formComponent>
+ </formComponents>
+ </screen>
+ <screen name="" id="1691242183" customizedId="" beanClass="com.install4j.runtime.beans.screens.FinishedScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="true" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.FinishedScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions />
+ <formComponents />
+ </screen>
+ </screens>
+ </application>
+ <application name="" id="uninstaller" customizedId="" beanClass="com.install4j.runtime.beans.applications.UninstallerApplication" enabled="true" commentSet="false" comment="" actionElevationType="none" fileset="" customIcnsFile="" customIcoFile="" macEntitlementsFile="" automaticLauncherIntegration="false" launchMode="startupFirstWindow" launchInNewProcess="false" launchSchedule="updateSchedule" allLaunchers="true">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.applications.UninstallerApplication">
+ <void property="customWatermarkText">
+ <string>Daniel Huson's lab, University of Tübingen</string>
+ </void>
+ <void property="frameSizeClientArea">
+ <boolean>false</boolean>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <launcherIds />
+ <variables />
+ <startup>
+ <screen name="" id="16" customizedId="" beanClass="com.install4j.runtime.beans.screens.StartupScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.StartupScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions>
+ <link name="" id="1691242627" customizedId="" beanClass="" enabled="true" targetId="1691242625" mergedProjectId="" />
+ </actions>
+ <formComponents />
+ </screen>
+ </startup>
+ <screens>
+ <screen name="" id="1691242184" customizedId="" beanClass="com.install4j.runtime.beans.screens.UninstallWelcomeScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.UninstallWelcomeScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions />
+ <formComponents />
+ </screen>
+ <screen name="" id="1691242185" customizedId="" beanClass="com.install4j.runtime.beans.screens.UninstallationScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.UninstallationScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions>
+ <action name="" id="1691242186" customizedId="" beanClass="com.install4j.runtime.beans.actions.UninstallFilesAction" enabled="true" commentSet="false" comment="" actionElevationType="elevated" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.UninstallFilesAction" />
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ </actions>
+ <formComponents />
+ </screen>
+ <screen name="" id="1691242187" customizedId="" beanClass="com.install4j.runtime.beans.screens.UninstallFailureScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="true" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.UninstallFailureScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions />
+ <formComponents />
+ </screen>
+ <screen name="" id="1691242188" customizedId="" beanClass="com.install4j.runtime.beans.screens.UninstallSuccessScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="true" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.UninstallSuccessScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions />
+ <formComponents />
+ </screen>
+ </screens>
+ </application>
+ <application name="Updater with silent version check" id="1691242905" customizedId="" beanClass="com.install4j.runtime.beans.applications.CustomApplication" enabled="true" commentSet="false" comment="" actionElevationType="none" fileset="" customIcnsFile="${compiler:sys.install4jHome}/resource/macos/updater.icns" customIcoFile="${compiler:sys.install4jHome}/resource/updater.ico" macEntitlementsFile="" automaticLauncherIntegration="true" launchMode="startupSync" launchInNewProcess=" [...]
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.applications.CustomApplication">
+ <void property="customIconImageFiles">
+ <void method="add">
+ <object class="com.install4j.api.beans.ExternalFile">
+ <string>${compiler:sys.install4jHome}/resource/updater_16.png</string>
+ </object>
+ </void>
+ <void method="add">
+ <object class="com.install4j.api.beans.ExternalFile">
+ <string>${compiler:sys.install4jHome}/resource/updater_32.png</string>
+ </object>
+ </void>
+ <void method="add">
+ <object class="com.install4j.api.beans.ExternalFile">
+ <string>${compiler:sys.install4jHome}/resource/updater_48.png</string>
+ </object>
+ </void>
+ </void>
+ <void property="customWatermarkText">
+ <string>Daniel Huson's lab, University of Tübingen</string>
+ </void>
+ <void property="executableName">
+ <string>automaticUpdater</string>
+ </void>
+ <void property="useCustomIcon">
+ <boolean>true</boolean>
+ </void>
+ <void property="windowTitle">
+ <string>${i18n:updater.WindowTitle("${compiler:sys.fullName}")}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <launcherIds>
+ <launcher id="1691242235" />
+ <launcher id="1691243093" />
+ </launcherIds>
+ <variables />
+ <startup>
+ <screen name="" id="1691242906" customizedId="" beanClass="com.install4j.runtime.beans.screens.StartupScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.StartupScreen" />
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions>
+ <action name="" id="1691242932" customizedId="" beanClass="com.install4j.runtime.beans.actions.update.CheckForUpdateAction" enabled="true" commentSet="false" comment="" actionElevationType="none" rollbackBarrier="false" multiExec="false" failureStrategy="2" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.update.CheckForUpdateAction">
+ <void property="showError">
+ <boolean>false</boolean>
+ </void>
+ <void property="updateDescriptorUrl">
+ <string>${compiler:sys.updatesUrl}</string>
+ </void>
+ <void property="variable">
+ <string>updateDescriptor</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="Update descriptor entry" id="1691242933" customizedId="" beanClass="com.install4j.runtime.beans.actions.control.SetVariableAction" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" multiExec="false" failureStrategy="2" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.control.SetVariableAction">
+ <void property="failIfNull">
+ <boolean>true</boolean>
+ </void>
+ <void property="script">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>((UpdateDescriptor)context.getVariable("updateDescriptor")).getPossibleUpdateEntry()</string>
+ </void>
+ </object>
+ </void>
+ <void property="variableName">
+ <string>updateDescriptorEntry</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <group name="Update available" id="1691242934" customizedId="" beanClass="com.install4j.runtime.beans.groups.ActionGroup" enabled="true" commentSet="false" comment="" actionElevationType="inherit">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.groups.ActionGroup">
+ <void property="conditionExpression">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>context.getVariable("updateDescriptorEntry") != null</string>
+ </void>
+ </object>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <beans>
+ <action name="New version" id="1691242935" customizedId="" beanClass="com.install4j.runtime.beans.actions.control.SetVariableAction" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.control.SetVariableAction">
+ <void property="script">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>((UpdateDescriptorEntry)context.getVariable("updateDescriptorEntry")).getNewVersion()</string>
+ </void>
+ </object>
+ </void>
+ <void property="variableName">
+ <string>updaterNewVersion</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="Download size" id="1691242936" customizedId="" beanClass="com.install4j.runtime.beans.actions.control.SetVariableAction" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.control.SetVariableAction">
+ <void property="script">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>((UpdateDescriptorEntry)context.getVariable("updateDescriptorEntry")).getFileSizeVerbose()</string>
+ </void>
+ </object>
+ </void>
+ <void property="variableName">
+ <string>updaterDownloadSize</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="Comment" id="1691242937" customizedId="" beanClass="com.install4j.runtime.beans.actions.control.SetVariableAction" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.control.SetVariableAction">
+ <void property="script">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>((UpdateDescriptorEntry)context.getVariable("updateDescriptorEntry")).getComment()</string>
+ </void>
+ </object>
+ </void>
+ <void property="variableName">
+ <string>updaterComment</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="Download directory" id="1691242938" customizedId="" beanClass="com.install4j.runtime.beans.actions.control.SetVariableAction" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.control.SetVariableAction">
+ <void property="script">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>Util.getUserHome()</string>
+ </void>
+ </object>
+ </void>
+ <void property="variableName">
+ <string>updaterDownloadDir</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="Download URL" id="1691242939" customizedId="" beanClass="com.install4j.runtime.beans.actions.control.SetVariableAction" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.control.SetVariableAction">
+ <void property="script">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>((UpdateDescriptorEntry)context.getVariable("updateDescriptorEntry")).getURL().toExternalForm()</string>
+ </void>
+ </object>
+ </void>
+ <void property="variableName">
+ <string>updaterDownloadUrl</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="Archive" id="1691242940" customizedId="" beanClass="com.install4j.runtime.beans.actions.control.SetVariableAction" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.control.SetVariableAction">
+ <void property="script">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>((UpdateDescriptorEntry)context.getVariable("updateDescriptorEntry")).isArchive() ? Boolean.TRUE : Boolean.FALSE</string>
+ </void>
+ </object>
+ </void>
+ <void property="variableName">
+ <string>isArchive</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ </beans>
+ </group>
+ </actions>
+ <formComponents />
+ </screen>
+ </startup>
+ <screens>
+ <group name="Update available" id="1691242907" customizedId="" beanClass="com.install4j.runtime.beans.groups.ScreenGroup" enabled="true" commentSet="false" comment="" actionElevationType="inherit">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.groups.ScreenGroup">
+ <void property="conditionExpression">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>context.getVariable("updateDescriptorEntry") != null</string>
+ </void>
+ </object>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <beans>
+ <screen name="New version available" id="1691242908" customizedId="" beanClass="com.install4j.runtime.beans.screens.FormScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.FormScreen">
+ <void property="subTitle">
+ <string>${i18n:updater.NewVersionAvailableSubtitle("${compiler:sys.fullName}")}</string>
+ </void>
+ <void property="title">
+ <string>${i18n:updater.NewVersionAvailableTitle}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions />
+ <formComponents>
+ <formComponent name="" id="1691242909" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.KeyValuePairComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.KeyValuePairComponent">
+ <void property="labelText">
+ <string>${i18n:updater.CurrentVersionLabel}</string>
+ </void>
+ <void property="valueLabelColor">
+ <object class="java.awt.Color">
+ <int>128</int>
+ <int>0</int>
+ <int>0</int>
+ <int>255</int>
+ </object>
+ </void>
+ <void property="valueLabelFont">
+ <object class="java.awt.Font">
+ <string>dialog</string>
+ <int>1</int>
+ <int>0</int>
+ </object>
+ </void>
+ <void property="valueLabelText">
+ <string>${installer:sys.version}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript />
+ </formComponent>
+ <group name="" id="1691242910" customizedId="" beanClass="com.install4j.runtime.beans.groups.HorizontalFormComponentGroup" enabled="true" commentSet="false" comment="" actionElevationType="inherit">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.groups.HorizontalFormComponentGroup" />
+ </java>
+ </serializedBean>
+ <beans>
+ <formComponent name="" id="1691242911" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.KeyValuePairComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.KeyValuePairComponent">
+ <void property="labelText">
+ <string>${i18n:updater.NewVersionLabel}</string>
+ </void>
+ <void property="valueLabelColor">
+ <object class="java.awt.Color">
+ <int>0</int>
+ <int>128</int>
+ <int>0</int>
+ <int>255</int>
+ </object>
+ </void>
+ <void property="valueLabelFont">
+ <object class="java.awt.Font">
+ <string>dialog</string>
+ <int>1</int>
+ <int>0</int>
+ </object>
+ </void>
+ <void property="valueLabelText">
+ <string>${installer:updaterNewVersion}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript />
+ </formComponent>
+ <formComponent name="" id="1691242912" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.HyperlinkActionLabelComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="5" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.HyperlinkActionLabelComponent">
+ <void property="actionScript">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>context.goForward(1, false, false);</string>
+ </void>
+ </object>
+ </void>
+ <void property="hyperlinkText">
+ <string>${i18n:updater.ShowComments}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript> ((String)context.getVariable("updaterComment")).length() > 0</visibilityScript>
+ </formComponent>
+ </beans>
+ </group>
+ <formComponent name="" id="1691242913" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.SpacerComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.SpacerComponent" />
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript />
+ </formComponent>
+ <formComponent name="" id="1691242914" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.MultilineLabelComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.MultilineLabelComponent">
+ <void property="labelText">
+ <string>${i18n:updater.DownloadLocationLabel}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript />
+ </formComponent>
+ <formComponent name="" id="1691242915" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.DirectoryChooserComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.DirectoryChooserComponent">
+ <void property="initialFile">
+ <string>${installer:updaterDownloadDir}</string>
+ </void>
+ <void property="labelText">
+ <string>${i18n:updater.DownloadToLabel}</string>
+ </void>
+ <void property="manualEntryAllowed">
+ <boolean>false</boolean>
+ </void>
+ <void property="variableName">
+ <string>updaterDownloadLocation</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript />
+ </formComponent>
+ <formComponent name="" id="1691242916" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.KeyValuePairComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.KeyValuePairComponent">
+ <void property="labelText">
+ <string>${i18n:updater.DownloadSizeLabel}</string>
+ </void>
+ <void property="valueLabelText">
+ <string>${installer:updaterDownloadSize}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript />
+ </formComponent>
+ </formComponents>
+ </screen>
+ <screen name="Update message" id="1691242917" customizedId="" beanClass="com.install4j.runtime.beans.screens.CustomizableInfoScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.CustomizableInfoScreen">
+ <void property="displayedText">
+ <string>${installer:updaterComment}</string>
+ </void>
+ <void property="infoText">
+ <string>${i18n:updater.CommentsLabel}</string>
+ </void>
+ <void property="subTitle">
+ <string>${i18n:updater.CommentsSubTitle}</string>
+ </void>
+ <void property="textSource">
+ <object class="com.install4j.runtime.beans.screens.components.TextSource" field="DIRECT" />
+ </void>
+ <void property="title">
+ <string>${i18n:updater.CommentsTitle}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition>false // This screen is only shown if the user clicks the "Show comments" hyperlink label in the previous screen.
+</condition>
+ <validation>if (context.isConsole()) {
+ context.goBackInHistory(1);
+}
+return true;</validation>
+ <preActivation>WizardContext wizardContext = context.getWizardContext();
+wizardContext.setNextButtonVisible(false);
+wizardContext.setCancelButtonVisible(false);</preActivation>
+ <postActivation />
+ <actions />
+ <formComponents />
+ </screen>
+ <screen name="Download new version" id="1691242918" customizedId="" beanClass="com.install4j.runtime.beans.screens.CustomizableProgressScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="false" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.CustomizableProgressScreen">
+ <void property="subTitle">
+ <string>${i18n:updater.DownloadSubTitle}</string>
+ </void>
+ <void property="title">
+ <string>${i18n:updater.DownloadTitle}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions>
+ <action name="Download location" id="1691242919" customizedId="" beanClass="com.install4j.runtime.beans.actions.control.SetVariableAction" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.control.SetVariableAction">
+ <void property="script">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>context.getVariable("updaterDownloadLocation") + File.separator + ((UpdateDescriptorEntry)context.getVariable("updateDescriptorEntry")).getFileName()</string>
+ </void>
+ </object>
+ </void>
+ <void property="variableName">
+ <string>updaterDownloadFile</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="" id="1691242920" customizedId="" beanClass="com.install4j.runtime.beans.actions.update.DownloadFileAction" enabled="true" commentSet="false" comment="" actionElevationType="elevated" rollbackBarrier="false" multiExec="false" failureStrategy="2" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.update.DownloadFileAction">
+ <void property="targetFile">
+ <string>${installer:updaterDownloadFile}</string>
+ </void>
+ <void property="url">
+ <string>${installer:updaterDownloadUrl}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="" id="1691242921" customizedId="" beanClass="com.install4j.runtime.beans.actions.files.SetModeAction" enabled="true" commentSet="false" comment="" actionElevationType="elevated" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.files.SetModeAction">
+ <void property="files">
+ <array class="java.io.File" length="1">
+ <void index="0">
+ <object class="java.io.File">
+ <string>${installer:updaterDownloadFile}</string>
+ </object>
+ </void>
+ </array>
+ </void>
+ <void property="mode">
+ <string>755</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ </actions>
+ <formComponents />
+ </screen>
+ <screen name="Finish" id="1691242922" customizedId="" beanClass="com.install4j.runtime.beans.screens.BannerFormScreen" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" backButton="2" finishScreen="true" wizardIndexChangeType="unchanged" wizardIndexKey="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.screens.BannerFormScreen">
+ <void property="infoText">
+ <string>${i18n:updater.FinishInfoText("${compiler:sys.fullName}")}</string>
+ </void>
+ <void property="title">
+ <string>${i18n:updater.FinishTitle}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ <validation />
+ <preActivation />
+ <postActivation />
+ <actions>
+ <group name="Execute installer" id="1691242923" customizedId="" beanClass="com.install4j.runtime.beans.groups.ActionGroup" enabled="true" commentSet="false" comment="" actionElevationType="inherit">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.groups.ActionGroup">
+ <void property="conditionExpression">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>((Integer)context.getVariable("updaterLaunchSelection")).intValue() == 0 && !context.getBooleanVariable("isArchive")</string>
+ </void>
+ </object>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <beans>
+ <action name="Set installer arguments" id="1691242924" customizedId="" beanClass="com.install4j.runtime.beans.actions.control.SetVariableAction" enabled="true" commentSet="false" comment="" actionElevationType="inherit" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.control.SetVariableAction">
+ <void property="script">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>if (context.isUnattended()) {
+ return new String[] {"-q", "-wait", "20"};
+} else if (context.isConsole()) {
+ return "-c";
+} else {
+ return "";
+}</string>
+ </void>
+ </object>
+ </void>
+ <void property="variableName">
+ <string>installerArguments</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="" id="1691242925" customizedId="" beanClass="com.install4j.runtime.beans.actions.update.ShutdownCallingLauncherAction" enabled="true" commentSet="false" comment="" actionElevationType="none" rollbackBarrier="false" multiExec="false" failureStrategy="1" errorMessage="">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.update.ShutdownCallingLauncherAction" />
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ <action name="" id="1691242926" customizedId="" beanClass="com.install4j.runtime.beans.actions.misc.RunExecutableAction" enabled="true" commentSet="false" comment="" actionElevationType="elevated" rollbackBarrier="false" multiExec="false" failureStrategy="2" errorMessage="${i18n:updater.LaunchError}">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.actions.misc.RunExecutableAction">
+ <void property="arguments">
+ <array class="java.lang.String" length="1">
+ <void index="0">
+ <string>${installer:installerArguments}</string>
+ </void>
+ </array>
+ </void>
+ <void property="executable">
+ <object class="java.io.File">
+ <string>${installer:updaterDownloadFile}</string>
+ </object>
+ </void>
+ <void property="workingDirectory">
+ <object class="java.io.File">
+ <string>${installer:updaterDownloadLocation}</string>
+ </object>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <condition />
+ </action>
+ </beans>
+ </group>
+ </actions>
+ <formComponents>
+ <formComponent name="" id="1691242927" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.LabelComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.LabelComponent">
+ <void property="labelText">
+ <string>${i18n:updater.LaunchUpdaterQuestion}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript />
+ </formComponent>
+ <formComponent name="" id="1691242928" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.SpacerComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.SpacerComponent">
+ <void property="height">
+ <int>5</int>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript />
+ </formComponent>
+ <formComponent name="" id="1691242929" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.RadiobuttonsComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.RadiobuttonsComponent">
+ <void property="radioButtonLabels">
+ <array class="java.lang.String" length="2">
+ <void index="0">
+ <string>${i18n:updater.LaunchUpdaterLabel}</string>
+ </void>
+ <void index="1">
+ <string>${i18n:updater.DoNotLaunchUpdaterLabel}</string>
+ </void>
+ </array>
+ </void>
+ <void property="variableName">
+ <string>updaterLaunchSelection</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript>!context.getBooleanVariable("isArchive")</visibilityScript>
+ </formComponent>
+ <formComponent name="" id="1691242930" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.HyperlinkActionLabelComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.HyperlinkActionLabelComponent">
+ <void property="actionScript">
+ <object class="com.install4j.api.beans.ScriptProperty">
+ <void property="value">
+ <string>Util.showPath((String)context.getVariable("updaterDownloadFile"));</string>
+ </void>
+ </object>
+ </void>
+ <void property="hyperlinkText">
+ <string>${i18n:updater.OpenContainingFolderLabel}</string>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript>!context.isConsole()</visibilityScript>
+ </formComponent>
+ <formComponent name="" id="1691242931" customizedId="" beanClass="com.install4j.runtime.beans.formcomponents.ProgressComponent" enabled="true" commentSet="false" comment="" insetTop="" insetLeft="" insetBottom="" insetRight="" resetInitOnPrevious="false">
+ <serializedBean>
+ <java class="java.beans.XMLDecoder">
+ <object class="com.install4j.runtime.beans.formcomponents.ProgressComponent">
+ <void property="detailVisible">
+ <boolean>false</boolean>
+ </void>
+ <void property="hideInitially">
+ <boolean>true</boolean>
+ </void>
+ </object>
+ </java>
+ </serializedBean>
+ <initScript />
+ <visibilityScript />
+ </formComponent>
+ </formComponents>
+ </screen>
+ </beans>
+ </group>
+ </screens>
+ </application>
+ </applications>
+ </installerGui>
+ <mediaSets>
+ <unixInstaller name="Unix Installer" id="1691242146" customizedId="" mediaFileName="" installDir="malt" overridePrincipalLanguage="true" jreBitType="all" runPostProcessor="false" postProcessor="" failOnPostProcessorError="false" useLegacyMediaFileIds="false" legacyMediaFileIds="" includedJRE="" manualJREEntry="false" bundleType="1" jreURL="" jreShared="false" directDownload="false" installOnlyIfNecessary="false" customInstallBaseDir="" contentFilesType="1" downloadURL="">
+ <excludedLaunchers>
+ <launcher id="1691242363" />
+ </excludedLaunchers>
+ <excludedBeans />
+ <overriddenPrincipalLanguage id="en" customLocalizationFile="" />
+ <exclude />
+ <variables />
+ <autoUpdate useMinUpdatableVersion="false" minUpdatableVersion="" useMaxUpdatableVersion="false" maxUpdatableVersion="">
+ <commentFiles />
+ <customAttributes />
+ </autoUpdate>
+ <excludedComponents />
+ <includedDownloadableComponents />
+ <installerScript mode="1" file="">
+ <content />
+ </installerScript>
+ </unixInstaller>
+ <macosFolder name="Mac OS X Folder" id="1691242149" customizedId="" mediaFileName="" installDir="MALT" overridePrincipalLanguage="true" jreBitType="all" runPostProcessor="false" postProcessor="" failOnPostProcessorError="false" useLegacyMediaFileIds="false" legacyMediaFileIds="" includedJRE="macosx-amd64-1.7.0_60" manualJREEntry="false" bundleType="1" jreURL="" jreShared="false" directDownload="false" installOnlyIfNecessary="false" appleJre="false" requiredVmIdPrefix="" customInstall [...]
+ <excludedLaunchers>
+ <launcher id="1691242363" />
+ </excludedLaunchers>
+ <excludedBeans />
+ <overriddenPrincipalLanguage id="en" customLocalizationFile="" />
+ <exclude />
+ <variables />
+ <autoUpdate useMinUpdatableVersion="false" minUpdatableVersion="" useMaxUpdatableVersion="false" maxUpdatableVersion="">
+ <commentFiles />
+ <customAttributes />
+ </autoUpdate>
+ <excludedComponents>
+ <component id="1691242252" />
+ </excludedComponents>
+ <includedDownloadableComponents />
+ </macosFolder>
+ <windows name="Windows" id="1691242407" customizedId="" mediaFileName="${compiler:sys.shortName}_${compiler:sys.platform}_${compiler:sys.version}" installDir="Malt" overridePrincipalLanguage="true" jreBitType="64" runPostProcessor="false" postProcessor="" failOnPostProcessorError="false" useLegacyMediaFileIds="false" legacyMediaFileIds="" includedJRE="windows-amd64-1.7.0_60" manualJREEntry="false" bundleType="1" jreURL="" jreShared="false" directDownload="false" installOnlyIfNecessar [...]
+ <excludedLaunchers>
+ <launcher id="1691242363" />
+ </excludedLaunchers>
+ <excludedBeans />
+ <overriddenPrincipalLanguage id="en" customLocalizationFile="" />
+ <exclude />
+ <variables />
+ <autoUpdate useMinUpdatableVersion="false" minUpdatableVersion="" useMaxUpdatableVersion="false" maxUpdatableVersion="">
+ <commentFiles />
+ <customAttributes />
+ </autoUpdate>
+ <excludedComponents />
+ <includedDownloadableComponents />
+ </windows>
+ </mediaSets>
+ <buildIds buildAll="false">
+ <mediaSet refId="1691242149" />
+ </buildIds>
+ <buildOptions verbose="false" faster="false" disableSigning="false" debug="false" />
+</install4j>
+
diff --git a/resources/files/megan5PublicKey.txt b/resources/files/megan5PublicKey.txt
new file mode 100644
index 0000000..4dbf848
Binary files /dev/null and b/resources/files/megan5PublicKey.txt differ
diff --git a/resources/icons/malt-build.icns b/resources/icons/malt-build.icns
new file mode 100644
index 0000000..cec1a29
Binary files /dev/null and b/resources/icons/malt-build.icns differ
diff --git a/resources/icons/malt-build16.png b/resources/icons/malt-build16.png
new file mode 100644
index 0000000..dff41aa
Binary files /dev/null and b/resources/icons/malt-build16.png differ
diff --git a/resources/icons/malt-build32.png b/resources/icons/malt-build32.png
new file mode 100644
index 0000000..56bf495
Binary files /dev/null and b/resources/icons/malt-build32.png differ
diff --git a/resources/icons/malt-build48.pdf b/resources/icons/malt-build48.pdf
new file mode 100644
index 0000000..7a249d6
Binary files /dev/null and b/resources/icons/malt-build48.pdf differ
diff --git a/resources/icons/malt-build48.png b/resources/icons/malt-build48.png
new file mode 100644
index 0000000..512a949
Binary files /dev/null and b/resources/icons/malt-build48.png differ
diff --git a/resources/icons/malt-run.icns b/resources/icons/malt-run.icns
new file mode 100644
index 0000000..5353210
Binary files /dev/null and b/resources/icons/malt-run.icns differ
diff --git a/resources/icons/malt-run16.png b/resources/icons/malt-run16.png
new file mode 100644
index 0000000..ca613d4
Binary files /dev/null and b/resources/icons/malt-run16.png differ
diff --git a/resources/icons/malt-run32.png b/resources/icons/malt-run32.png
new file mode 100644
index 0000000..98e9fef
Binary files /dev/null and b/resources/icons/malt-run32.png differ
diff --git a/resources/icons/malt-run48.pdf b/resources/icons/malt-run48.pdf
new file mode 100644
index 0000000..d69741b
Binary files /dev/null and b/resources/icons/malt-run48.pdf differ
diff --git a/resources/icons/malt-run48.png b/resources/icons/malt-run48.png
new file mode 100644
index 0000000..707052c
Binary files /dev/null and b/resources/icons/malt-run48.png differ
diff --git a/resources/log4j.properties b/resources/log4j.properties
new file mode 100644
index 0000000..d49ad5a
--- /dev/null
+++ b/resources/log4j.properties
@@ -0,0 +1,58 @@
+#------------------------------------------------------------------------------
+#
+# The following properties set the logging levels and log appender. The
+# log4j.rootCategory variable defines the default log level and one or more
+# appenders. For the console, use 'S'. For the daily rolling file, use 'R'.
+# For an HTML formatted log, use 'H'.
+#
+# To override the default (rootCategory) log level, define a property of the
+# form (see below for available values):
+#
+# log4j.logger. =
+#
+# Available logger names:
+# TODO
+#
+# Possible Log Levels:
+# FATAL, ERROR, WARN, INFO, DEBUG
+#
+#------------------------------------------------------------------------------
+log4j.rootCategory=FATAL, S
+
+log4j.logger.com.dappit.Dapper.parser=FATAL
+log4j.logger.org.w3c.tidy=FATAL
+
+#------------------------------------------------------------------------------
+#
+# The following properties configure the console (stdout) appender.
+# See http://logging.apache.org/log4j/docs/api/index.html for details.
+#
+#------------------------------------------------------------------------------
+log4j.appender.S = org.apache.log4j.ConsoleAppender
+log4j.appender.S.layout = org.apache.log4j.PatternLayout
+log4j.appender.S.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %c{1} [%p] %m%n
+
+#------------------------------------------------------------------------------
+#
+# The following properties configure the Daily Rolling File appender.
+# See http://logging.apache.org/log4j/docs/api/index.html for details.
+#
+#------------------------------------------------------------------------------
+log4j.appender.R = org.apache.log4j.DailyRollingFileAppender
+log4j.appender.R.File = logs/bensApps.log
+log4j.appender.R.Append = true
+log4j.appender.R.DatePattern = '.'yyy-MM-dd
+log4j.appender.R.layout = org.apache.log4j.PatternLayout
+log4j.appender.R.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %c{1} [%p] %m%n
+
+#------------------------------------------------------------------------------
+#
+# The following properties configure the Rolling File appender in HTML.
+# See http://logging.apache.org/log4j/docs/api/index.html for details.
+#
+#------------------------------------------------------------------------------
+log4j.appender.H = org.apache.log4j.RollingFileAppender
+log4j.appender.H.File = logs/bensApps.html
+log4j.appender.H.MaxFileSize = 100KB
+log4j.appender.H.Append = false
+log4j.appender.H.layout = org.apache.log4j.HTMLLayout
diff --git a/src/malt/AlignmentEngine.java b/src/malt/AlignmentEngine.java
new file mode 100644
index 0000000..dbb55fe
--- /dev/null
+++ b/src/malt/AlignmentEngine.java
@@ -0,0 +1,649 @@
+/**
+ * AlignmentEngine.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt;
+
+import jloda.util.Basic;
+import malt.align.AlignerOptions;
+import malt.align.BandedAligner;
+import malt.analysis.OrganismsProfile;
+import malt.data.*;
+import malt.io.*;
+import malt.mapping.MappingManager;
+import malt.util.FixedSizePriorityQueue;
+import malt.util.Utilities;
+import megan.parsers.blast.BlastMode;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * the main alignment engine. This runs in its own thread. It grabs the next read from the read queue and writes
+ * the output to the ranked output writer
+ * Daniel Huson, 8.2014
+ */
+public class AlignmentEngine {
+ private final int threadNumber; // used for output queuing
+
+ // general data structures:
+ private final MaltOptions maltOptions;
+ private final ReferencesDBAccess referencesDB;
+ private final ReferencesHashTableAccess[] tables;
+ private final SeedShape[] seedShapes;
+
+ private final int shift;
+
+ // io:
+ private final FastAReader fastAReader;
+ private final MaltOptions.MatchOutputFormat matchOutputFormat;
+ private final OutputStream organismsOutStream;
+ private final FileWriterRanked matchesWriter;
+ private final FileWriterRanked alignedReadsWriter;
+ private final FileWriterRanked unalignedReadsWriter;
+ private final RMA6Writer rmaWriter;
+
+ // parameters
+ private final double minRawScore;
+ private final double minBitScore;
+ private final double maxExpected;
+ private final double percentIdentity;
+
+ // xdrop heuristic:
+ private final int xDrop;
+ private final int minUngappedRawScore;
+
+ // keep track of all used references:
+ private final BitSet alignedReferenceIds;
+
+ private final OrganismsProfile organismsProfile;
+
+ // used for stats:
+ long countSequencesProcessed;
+ long countSequencesWithAlignments;
+ long countSeedMatches;
+ long countHashSeedMismatches;
+ long countAlignments;
+
+ // used in inner loop:
+ private final FixedSizePriorityQueue<ReadMatch> matchesQueue;
+ private final ReadMatch[] recycledMatchesArray;
+ private final BandedAligner aligner;
+ private final Map<Integer, SeedMatchArray> refIndex2ASeedMatches;
+
+ private final ReadMatch[] readMatchesForRefIndex;
+
+ private SeedMatchArray[] seedArrays; // used in innerloop to keep track of seedmatches per reference sequence
+ private int seedArraysLength = 0;
+
+ static private QuerySequence2MatchesCache querySequence2MatchesCache = null;
+
+ /**
+ * construct an instance of the alignment engine. Each instance is run in a separate thread
+ *
+ * @param maltOptions
+ * @param alignerOptions
+ * @param referencesDB
+ * @param tables
+ * @param fastAReader
+ * @param matchesWriter
+ * @param alignedReadsWriter
+ * @param unalignedReadsWriter
+ * @throws IOException
+ */
+ public AlignmentEngine(final int threadNumber, final MaltOptions maltOptions, AlignerOptions alignerOptions, final ReferencesDBAccess referencesDB,
+ final ReferencesHashTableAccess[] tables, final FastAReader fastAReader,
+ final FileWriterRanked matchesWriter, final RMA6Writer rmaWriter, final OutputStream organismsOutStream,
+ final FileWriterRanked alignedReadsWriter, final FileWriterRanked unalignedReadsWriter) throws IOException {
+ this.threadNumber = threadNumber;
+ this.maltOptions = maltOptions;
+ this.referencesDB = referencesDB;
+ this.tables = tables;
+ this.fastAReader = fastAReader;
+ this.matchOutputFormat = maltOptions.getMatchOutputFormat();
+ this.matchesWriter = matchesWriter;
+ this.rmaWriter = rmaWriter;
+ this.organismsOutStream = organismsOutStream;
+ this.alignedReadsWriter = alignedReadsWriter;
+ this.unalignedReadsWriter = unalignedReadsWriter;
+
+ this.shift = maltOptions.getShift();
+
+ this.alignedReferenceIds = (maltOptions.isSparseSAM() ? null : new BitSet());
+
+ seedShapes = new SeedShape[tables.length];
+ for (int t = 0; t < tables.length; t++) {
+ seedShapes[t] = tables[t].getSeedShape();
+ }
+
+ // aligner and parameters
+ aligner = new BandedAligner(alignerOptions, maltOptions.getMode());
+
+ minRawScore = aligner.getRawScoreForBitScore(maltOptions.getMinBitScore());
+ minBitScore = maltOptions.getMinBitScore();
+ maxExpected = maltOptions.getMaxExpected();
+ percentIdentity = maltOptions.getMinProportionIdentity();
+
+ // ungapped alignment parameters:
+ xDrop = alignerOptions.getUngappedXDrop(maltOptions.getMode());
+ minUngappedRawScore = alignerOptions.getUngappedMinRawScore(maltOptions.getMode());
+
+
+ // data structures used in inner loop:
+ matchesQueue = new FixedSizePriorityQueue<>(maltOptions.getMaxAlignmentsPerQuery(), ReadMatch.createComparator());
+ recycledMatchesArray = new ReadMatch[maltOptions.getMaxAlignmentsPerQuery()];
+ refIndex2ASeedMatches = new HashMap<>(10000, 0.9f);
+ readMatchesForRefIndex = new ReadMatch[maltOptions.getMaxAlignmentsPerReference()];
+ for (int i = 0; i < readMatchesForRefIndex.length; i++)
+ readMatchesForRefIndex[i] = new ReadMatch();
+
+ if (organismsOutStream != null) {
+ organismsProfile = new OrganismsProfile(MappingManager.getTaxonomyMapping());
+ organismsProfile.setTopPercent(maltOptions.getTopPercentLCA());
+ } else
+ organismsProfile = null;
+
+ seedArrays = resizeAndConstructEntries(new SeedMatchArray[0], 1000, maltOptions.getMaxSeedsPerReference());
+ }
+
+ /**
+ * The main outer loop. Grabs the next input read and determines all possible seed matches. Then calls the inner loop
+ */
+ public void runOuterLoop() {
+ try {
+ final int maxFramesPerQuery = Utilities.getMaxFramesPerQuery(maltOptions.getMode(), maltOptions.isDoForward(), maltOptions.isDoReverse());
+
+ // setup thread specific data-structure:
+ final DataForInnerLoop dataForInnerLoop = new DataForInnerLoop(maltOptions.getMode(), maltOptions.isDoForward(), maltOptions.isDoReverse(), maxFramesPerQuery, tables.length);
+
+ // setup buffers for seeds.
+ final byte[][][] seedBytes = new byte[maxFramesPerQuery][tables.length][];
+ for (int s = 0; s < maxFramesPerQuery; s++) {
+ for (int t = 0; t < tables.length; t++) {
+ seedBytes[s][t] = seedShapes[t].createBuffer(); // shape-specific buffer
+ }
+ }
+
+ // iterate over all available queries, this method is thread-safe
+ final FastARecord query = FastAReader.createFastARecord(1024, isWantQualityValues());
+ while (fastAReader.readAsFastA(query)) {
+ if (querySequence2MatchesCache != null && querySequence2MatchesCache.contains(query.getSequence(), query.getSequenceLength())) {
+ runInnerLoop(query, 0, null); // query is cached, no need to compute frames etc
+ } else {
+ // determine all frames to use:
+ dataForInnerLoop.computeFrames(query.getSequence(), query.getQualityValues(), query.getSequenceLength());
+
+ // find seed matches for all frames and using all seed tables:
+ int totalSize = 0;
+ for (int s = 0; s < dataForInnerLoop.numberOfFrames; s++) { // for each frame of query
+ for (int t = 0; t < tables.length; t++) { // consider each seed table
+ final ReferencesHashTableAccess table = tables[t];
+ final SeedShape seedShape = table.getSeedShape();
+ int top = dataForInnerLoop.frameSequenceLength[s] - seedShape.getLength() + 1;
+ for (int qOffset = 0; qOffset < dataForInnerLoop.frameSequenceLength[s]; qOffset += shift) { // consider all offsets
+ if (qOffset < top) {
+ final byte[] seed = seedShape.getSeed(dataForInnerLoop.frameSequence[s], qOffset, seedBytes[s][t]);
+ totalSize += table.lookup(seed, dataForInnerLoop.frameXTableXSeed2Reference[s][t][qOffset]);
+ } else
+ dataForInnerLoop.frameXTableXSeed2Reference[s][t][qOffset].setEmpty();
+ }
+ }
+ }
+ // run the inner loop
+ runInnerLoop(query, totalSize, dataForInnerLoop);
+ }
+ }
+ } catch (Exception ex) {
+ Basic.caught(ex);
+ System.exit(1); // just die...
+ }
+ }
+
+ /**
+ * run the inner loop. This tries to extend all found seed matches. If caching is used, first tries to find alignments in cache
+ *
+ * @param query
+ * @param totalSize
+ * @param dataForInnerLoop
+ * @throws IOException
+ */
+ public void runInnerLoop(final FastARecord query, final int totalSize, final DataForInnerLoop dataForInnerLoop) throws IOException {
+ countSequencesProcessed++;
+
+ // if cache active and query found, use the cached matches:
+ ReadMatch[] matchesArray = (querySequence2MatchesCache != null ? querySequence2MatchesCache.get(query.getSequence(), query.getSequenceLength()) : null);
+ int numberOfMatches = (matchesArray != null ? matchesArray.length : 0);
+
+ if (matchesArray != null) // found is cache, rescan counts
+ {
+ if (numberOfMatches > 0) {
+ countAlignments += numberOfMatches;
+ countSequencesWithAlignments++;
+ }
+ } else // not found in cache, need to compute...
+ {
+ if (totalSize > 0) { // have some seeds to look at
+ try {
+ // key a list of seed arrays that we reuse and reset here:
+ if (seedArraysLength > 0) {
+ for (int i = 0; i < seedArraysLength; i++) {
+ seedArrays[i].clear();
+ }
+ seedArraysLength = 0;
+ }
+
+ // determine all the seeds to be used, map each ref-index to its seeds, seeds know which frame of the query was used
+ for (int s = 0; s < dataForInnerLoop.numberOfFrames; s++) {
+ for (int t = 0; t < seedShapes.length; t++) { // for each seed-shape specific hash table
+ for (int qOffset = 0; qOffset < dataForInnerLoop.frameSequenceLength[s]; qOffset += shift) {
+ final Row matchLocations = dataForInnerLoop.frameXTableXSeed2Reference[s][t][qOffset]; // all locations of a particular seed
+ int seedMatchesUsed = 0;
+
+ for (int a = 0; a < matchLocations.size(); a += 2) {
+ countSeedMatches++;
+ final int refIndex = matchLocations.get(a);
+ final int refOffset = matchLocations.get(a + 1);
+
+ // todo: debugging
+ if (refIndex >= referencesDB.getNumberOfSequences()) {
+ System.err.println("matchLocations=" + matchLocations.toString());
+ throw new IOException("refIndex=" + refIndex + ": out of bounds: " + referencesDB.getNumberOfSequences());
+ }
+
+ final byte[] referenceSequence = referencesDB.getSequence(refIndex);
+
+ try {
+ if (seedShapes[t].equalSequences(dataForInnerLoop.frameSequence[s], qOffset, referenceSequence, refOffset)) {
+ if (seedMatchesUsed++ >= maltOptions.getMaxSeedsPerOffsetPerFrame()) {
+ break; // exceeded the maximum number of seeds per frame
+ }
+
+ SeedMatchArray set = refIndex2ASeedMatches.get(refIndex);
+ if (set == null) {
+ if (seedArraysLength >= seedArrays.length) {
+ //System.err.println("seedArray: " + seedArrays.length + " -> " + (2 * seedArraysLength));
+ seedArrays = resizeAndConstructEntries(seedArrays, 2 * seedArraysLength, maltOptions.getMaxSeedsPerReference());
+ }
+ set = seedArrays[seedArraysLength++];
+ refIndex2ASeedMatches.put(refIndex, set);
+ }
+ if (set.size() < maltOptions.getMaxSeedsPerReference()) {
+ set.setNext(qOffset, refOffset, s, seedShapes[t].getLength());
+ // else System.err.println("SKIPPED");
+ }
+ } else
+ countHashSeedMismatches++;
+ } catch (Exception ex) {
+ Basic.caught(ex);
+ }
+ }
+ }
+ }
+ }
+
+ // try to align each seed
+
+ for (Integer refIndex : refIndex2ASeedMatches.keySet()) {
+ SeedMatch previous = null;
+ final SeedMatchArray seedMatches = refIndex2ASeedMatches.get(refIndex);
+ seedMatches.sort();
+ int numberOfReadMatchesForRefIndex = 0; // we keep a short array of best hits for the given reference index
+
+ for (int i = 0; i < seedMatches.size(); i++) {
+ SeedMatch seedMatch = seedMatches.get(i);
+ if (!seedMatch.follows(previous)) { // ignore back-to-back matches
+ // todo: debugging
+ if (refIndex >= referencesDB.getNumberOfSequences()) {
+ System.err.println("seedMatch=" + seedMatch.toString());
+ throw new IOException("refIndex=" + refIndex + ": out of bounds: " + referencesDB.getNumberOfSequences());
+ }
+ final byte[] referenceSequence = referencesDB.getSequence(refIndex);
+ final byte[] sequence = dataForInnerLoop.frameSequence[seedMatch.getRank()];
+ int length = dataForInnerLoop.frameSequenceLength[seedMatch.getRank()];
+
+ if (aligner.quickCheck(sequence, length, referenceSequence, referenceSequence.length, seedMatch.getQueryOffset(), seedMatch.getReferenceOffset())) {
+
+ aligner.computeAlignment(sequence, length, referenceSequence, referenceSequence.length, seedMatch.getQueryOffset(), seedMatch.getReferenceOffset(), seedMatch.getSeedLength());
+
+ if (aligner.getRawScore() >= minRawScore) { // have found match with sufficient rawScore
+ // compute bitscore and expected score
+ aligner.computeBitScoreAndExpected();
+
+ if (aligner.getBitScore() >= minBitScore && aligner.getExpected() <= maxExpected) {
+ ReadMatch readMatch;
+ boolean foundPlaceToKeepThisMatch;
+ boolean incrementedNumberOfReadMatchesForRefIndex = false;
+
+ if (readMatchesForRefIndex.length == 1) { // only allowing one hit per reference...
+ readMatch = readMatchesForRefIndex[0];
+ numberOfReadMatchesForRefIndex = 1;
+ foundPlaceToKeepThisMatch = true;
+ incrementedNumberOfReadMatchesForRefIndex = true;
+ } else { //allow more than one hit
+ // ensure that this match does not overlap an existing match of same or better quality
+ boolean overlap = false;
+ for (int z = 0; z < numberOfReadMatchesForRefIndex; z++) {
+ readMatch = readMatchesForRefIndex[z];
+ if (readMatch.getBitScore() >= aligner.getBitScore() && readMatch.overlap(aligner.getStartReference(), aligner.getEndReference())) {
+ overlap = true;
+ break;
+ }
+ }
+ if (overlap)
+ continue;
+
+ // keep this match, if array not full:
+ if (numberOfReadMatchesForRefIndex < readMatchesForRefIndex.length) {
+ readMatch = readMatchesForRefIndex[numberOfReadMatchesForRefIndex++];
+ foundPlaceToKeepThisMatch = true;
+ incrementedNumberOfReadMatchesForRefIndex = true;
+ } else { // otherwise replace one with lower rawScore
+ foundPlaceToKeepThisMatch = false;
+ readMatch = null;
+ for (int z = 0; z < numberOfReadMatchesForRefIndex; z++) {
+ readMatch = readMatchesForRefIndex[z];
+ if (aligner.getBitScore() > readMatch.getBitScore()) {
+ foundPlaceToKeepThisMatch = true;
+ break;
+ }
+ }
+ }
+ }
+
+ if (foundPlaceToKeepThisMatch) {
+ byte[] text = null;
+ byte[] rma3Text = null;
+ if (matchesWriter != null) {
+ switch (matchOutputFormat) {
+ default:
+ case Text: {
+ text = aligner.getAlignmentText(dataForInnerLoop, seedMatch.getRank());
+ break;
+ }
+ case Tab: {
+ text = aligner.getAlignmentTab(dataForInnerLoop, null, referencesDB.getHeader(refIndex), seedMatch.getRank()); // don't pass queryHeader, it is added below
+ break;
+ }
+ case SAM: {
+ rma3Text = text = aligner.getAlignmentSAM(dataForInnerLoop, null, query.getSequence(), referencesDB.getHeader(refIndex), seedMatch.getRank()); // don't pass queryHeader, it is added below
+ break;
+ }
+ }
+ }
+ if (rmaWriter != null && rma3Text == null) {
+ rma3Text = aligner.getAlignmentSAM(dataForInnerLoop, null, query.getSequence(), referencesDB.getHeader(refIndex), seedMatch.getRank()); // don't pass queryHeader, it is added below
+ }
+ if (percentIdentity > 0) // need to filter by percent identity. Can't do this earlier because number of matches not known until alignment has been computed
+ {
+ if (text == null && rma3Text == null) // haven't computed alignment, so number of matches not yet computed
+ aligner.computeAlignmentByTraceBack(); // compute number of matches
+ if (aligner.getIdentities() < percentIdentity * aligner.getAlignmentLength()) { // too few identities
+ if (incrementedNumberOfReadMatchesForRefIndex)
+ numberOfReadMatchesForRefIndex--; // undo increment, won't be saving this match
+ continue;
+ }
+ }
+ readMatch.set(aligner.getBitScore(), refIndex, text, rma3Text, aligner.getStartReference(), aligner.getEndReference());
+ }
+ previous = seedMatch;
+ }
+ }
+ }
+ }
+ }
+ for (int z = 0; z < numberOfReadMatchesForRefIndex; z++) {
+ matchesQueue.add(readMatchesForRefIndex[z].getCopy());
+ }
+ }
+ } finally {
+ // erase the seed sets
+ refIndex2ASeedMatches.clear();
+ }
+ }
+
+ if (matchesQueue.size() > 0) {
+ countAlignments += matchesQueue.size();
+ countSequencesWithAlignments++;
+ numberOfMatches = matchesQueue.size();
+ for (int i = numberOfMatches - 1; i >= 0; i--) { // places matches into array ordered by descending score
+ recycledMatchesArray[i] = matchesQueue.poll();
+ }
+ matchesArray = recycledMatchesArray; // we reuse the matches array in the case that we are not using matches cache
+ }
+ // if use caching, save, even if no matches found!
+ if (querySequence2MatchesCache != null) {
+ querySequence2MatchesCache.put(query.getSequence(), query.getSequenceLength(), matchesArray, numberOfMatches); // ok to pass matchesArray==null when numberOfMatches==0
+ }
+ }
+
+ // output the alignments or skip the read (or output on skip, if negative filter...):
+ if (numberOfMatches > 0) {
+ if (matchesWriter != null) {
+ switch (matchOutputFormat) {
+ default:
+ case Text: {
+ byte[][] strings = new byte[3 * numberOfMatches + 1][];
+ strings[0] = BlastTextHelper.makeQueryLine(query);
+ for (int i = 0; i < numberOfMatches; i++) {
+ final ReadMatch readMatch = matchesArray[i];
+ strings[3 * i + 1] = referencesDB.getHeader(readMatch.getReferenceId());
+ strings[3 * i + 2] = String.format("\tLength=%d\n", referencesDB.getSequenceLength(readMatch.getReferenceId())).getBytes();
+ strings[3 * i + 3] = readMatch.getText();
+ }
+ matchesWriter.writeByRank(threadNumber, query.getId(), strings);
+ break;
+ }
+ case SAM:
+ case Tab: {
+ byte[] queryNamePlusTab = BlastTextHelper.getQueryNamePlusTab(query);
+ byte[][] strings = new byte[2 * numberOfMatches][];
+ for (int i = 0; i < numberOfMatches; i++) {
+ ReadMatch readMatch = matchesArray[i];
+ strings[2 * i] = queryNamePlusTab;
+ strings[2 * i + 1] = readMatch.getText();
+ }
+ matchesWriter.writeByRank(threadNumber, query.getId(), strings);
+ break;
+ }
+ }
+ }
+ if (rmaWriter != null) {
+ rmaWriter.processMatches(query.getHeaderString(), query.getSequenceString(), matchesArray, numberOfMatches);
+ }
+
+ if (alignedReferenceIds != null) {
+ for (int i = 0; i < numberOfMatches; i++) {
+ final ReadMatch readMatch = matchesArray[i];
+ alignedReferenceIds.set(readMatch.getReferenceId());
+ }
+ }
+
+ if (organismsOutStream != null) {
+ organismsProfile.addRead(Utilities.getFirstWordSkipLeadingGreaterSign(query.getHeader()), numberOfMatches, matchesArray);
+ }
+
+ if (alignedReadsWriter != null) {
+ alignedReadsWriter.writeByRank(threadNumber, query.getId(), Utilities.getFirstWordEnsureLeadingGreaterSign(query.getHeader()), Utilities.copy0Terminated(query.getSequence()));
+ }
+ if (unalignedReadsWriter != null) {
+ unalignedReadsWriter.skipByRank(threadNumber, query.getId());
+ }
+ // matchesQueue.erase(); // not necessary because queue is consumed when building array
+ } else { // no match
+ if (matchesWriter != null) {
+ switch (matchOutputFormat) {
+ case Text: // report no-hits statement
+ matchesWriter.writeByRank(threadNumber, query.getId(), BlastTextHelper.makeQueryLine(query), BlastTextHelper.NO_HITS);
+ break;
+ default:
+ matchesWriter.skipByRank(threadNumber, query.getId());
+ break;
+ }
+ }
+ if (rmaWriter != null && maltOptions.isSaveUnalignedToRMA()) {
+ rmaWriter.processMatches(query.getHeaderString(), query.getSequenceString(), matchesArray, 0);
+ }
+ if (organismsOutStream != null) {
+ organismsProfile.addNoHitsRead();
+ }
+
+ if (alignedReadsWriter != null) {
+ alignedReadsWriter.skipByRank(threadNumber, query.getId());
+ }
+ if (unalignedReadsWriter != null) {
+ unalignedReadsWriter.writeByRank(threadNumber, query.getId(), Utilities.getFirstWordEnsureLeadingGreaterSign(query.getHeader()), Utilities.copy0Terminated(query.getSequence()));
+ }
+ }
+ }
+
+ /**
+ * finish up after outer loop completed
+ */
+ public void finish() {
+ if (organismsOutStream != null) {
+ organismsProfile.finishAnalysis();
+ }
+ }
+
+ /**
+ * compute total sequences processed
+ *
+ * @param alignmentEngines
+ * @return total
+ */
+ public static long getTotalSequencesProcessed(final AlignmentEngine[] alignmentEngines) {
+ long total = 0;
+ for (AlignmentEngine alignmentEngine : alignmentEngines) {
+ total += alignmentEngine.countSequencesProcessed;
+ }
+ return total;
+ }
+
+ /**
+ * compute total with alignments
+ *
+ * @param alignmentEngines
+ * @return total
+ */
+ public static long getTotalSequencesWithAlignments(final AlignmentEngine[] alignmentEngines) {
+ long total = 0;
+ for (AlignmentEngine alignmentEngine : alignmentEngines) {
+ total += alignmentEngine.countSequencesWithAlignments;
+ }
+ return total;
+ }
+
+ /**
+ * compute total number of alignments
+ *
+ * @param alignmentEngines
+ * @return total
+ */
+ public static long getTotalAlignments(final AlignmentEngine[] alignmentEngines) {
+ long total = 0;
+ for (AlignmentEngine alignmentEngine : alignmentEngines) {
+ total += alignmentEngine.countAlignments;
+ }
+ return total;
+ }
+
+ public OrganismsProfile getOrganismsProfile() {
+ return organismsProfile;
+ }
+
+ public BitSet getAlignedReferenceIds() {
+ return alignedReferenceIds;
+ }
+
+ /**
+ * resize the array of seed match arrays
+ *
+ * @param array
+ * @param newSize
+ * @return new array
+ */
+ public SeedMatchArray[] resizeAndConstructEntries(SeedMatchArray[] array, int newSize, int maxLength) {
+ SeedMatchArray[] result = new SeedMatchArray[newSize];
+ for (int i = array.length; i < newSize; i++)
+ result[i] = new SeedMatchArray(maxLength);
+ System.arraycopy(array, 0, result, 0, Math.min(newSize, array.length));
+ return result;
+ }
+
+ /**
+ * initialize the read sequence 2 matches cache
+ */
+ public static void activateReplicateQueryCaching(int bits) {
+ System.err.println("Using replicate query cache (cache size=" + (1 << bits) + ")");
+ querySequence2MatchesCache = new QuerySequence2MatchesCache(bits);
+ }
+
+ /**
+ * report on cache usage, if any
+ */
+ public static void reportStats() {
+ if (querySequence2MatchesCache != null)
+ querySequence2MatchesCache.reportStats();
+ }
+
+ /**
+ * do we want to collect and save quality values?
+ *
+ * @return true, if mode is BLASTN, output format is SAM and input file is fastQ
+ */
+ private boolean isWantQualityValues() {
+ return (maltOptions.getMode() == BlastMode.BlastN &&
+ maltOptions.getMatchOutputFormat() == MaltOptions.MatchOutputFormat.SAM && fastAReader.isFastQ());
+ }
+
+ /**
+ * an array of seed matches
+ */
+ class SeedMatchArray {
+ int size;
+ SeedMatch[] matches;
+
+ SeedMatchArray(int length) {
+ matches = SeedMatch.resizeAndConstructEntries(matches, length);
+ }
+
+ public int size() {
+ return size;
+ }
+
+ public SeedMatch get(int i) {
+ return matches[i];
+ }
+
+ public SeedMatch setNext(int queryOffset, int referenceOffset, int rank, int seedLength) {
+ return matches[size++].set(queryOffset, referenceOffset, rank, seedLength);
+ }
+
+ public void clear() {
+ size = 0;
+ }
+
+ public void sort() {
+ Arrays.sort(matches, 0, size, SeedMatch.getComparator());
+ }
+ }
+}
diff --git a/src/malt/DataForInnerLoop.java b/src/malt/DataForInnerLoop.java
new file mode 100644
index 0000000..e8cd43b
--- /dev/null
+++ b/src/malt/DataForInnerLoop.java
@@ -0,0 +1,227 @@
+/**
+ * DataForInnerLoop.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt;
+
+/**
+ * this contains all query specific data that is passed to the inner loop of the algorithm
+ * Daniel Huson, 8.2014
+ */
+
+import malt.data.DNA5;
+import malt.data.Row;
+import malt.data.Translator;
+import malt.util.Utilities;
+import megan.parsers.blast.BlastMode;
+
+import java.io.IOException;
+
+/**
+ * this contains all query specific data that is passed to the inner loop of the algorithm
+ */
+public class DataForInnerLoop {
+ private final BlastMode mode;
+ private final boolean doForward;
+ private final boolean doReverse;
+ final int maxNumberOfFrames;
+ final int numberOfTables;
+ final String[] positiveFrameInfoString;
+ final String[] negativeFrameInfoString;
+
+ private int maxQueryLength;
+ private int queryLength;
+ public int numberOfFrames;
+ private int[] frame;
+ public byte[][] frameSequence;
+ public byte[] qualityValues;
+ public int[] frameSequenceLength;
+ public Row[][][] frameXTableXSeed2Reference;
+
+ /**
+ * constructor
+ *
+ * @param mode
+ * @param maxNumberOfFrames
+ * @param numberOfTables
+ */
+ public DataForInnerLoop(BlastMode mode, boolean doForward, boolean doReverse, int maxNumberOfFrames, int numberOfTables) {
+ this.mode = mode;
+ this.doForward = doForward;
+ this.doReverse = doReverse;
+ this.maxNumberOfFrames = maxNumberOfFrames;
+ this.numberOfTables = numberOfTables;
+ maxQueryLength = 0;
+ frame = new int[maxNumberOfFrames];
+ frameSequence = new byte[maxNumberOfFrames][maxQueryLength];
+ frameSequenceLength = new int[maxNumberOfFrames];
+ frameXTableXSeed2Reference = new Row[maxNumberOfFrames][numberOfTables][maxQueryLength];
+
+ // for BlastP and BlastN the frames never replace so we set them here once and for all:
+ switch (mode) {
+ default:
+ case BlastP: {
+ frame[0] = 1;
+ numberOfFrames = 1;
+ positiveFrameInfoString = new String[2]; // no frame info line for BlastP
+ negativeFrameInfoString = null;
+ break;
+ }
+ case BlastN: {
+ int s = 0;
+ if (doForward) {
+ positiveFrameInfoString = new String[2];
+ positiveFrameInfoString[1] = " Strand = Plus / Plus\n";
+ frame[s] = 1;
+ numberOfFrames++;
+ s++;
+ } else
+ positiveFrameInfoString = null;
+ if (doReverse) {
+ negativeFrameInfoString = new String[2];
+ frame[s] = -1;
+ negativeFrameInfoString[1] = " Strand = Minus / Plus\n";
+ numberOfFrames++;
+ } else
+ negativeFrameInfoString = null;
+ break;
+ }
+ case BlastX: {
+ positiveFrameInfoString = new String[4];
+ negativeFrameInfoString = new String[4];
+ for (int i = 1; i <= 3; i++) {
+ positiveFrameInfoString[i] = " Frame = +" + i + "\n";
+ negativeFrameInfoString[i] = " Frame = -" + i + "\n";
+ }
+ }
+ }
+ }
+
+ /**
+ * compute frames and resize data-structures if necessary
+ * For BlastP there are no frames, for BlastN there can be up to two frames, +1 and -1,
+ * for BlastX the max number is 6
+ *
+ * @param query
+ * @param queryLength
+ * @throws java.io.IOException
+ */
+ public void computeFrames(byte[] query, byte[] queryQualityValues, int queryLength) throws IOException {
+ this.queryLength = queryLength;
+
+ switch (mode) {
+ case BlastN: {
+ int s = 0;
+ if (doForward) {
+ frameSequence[s] = query;
+ frameSequenceLength[s] = queryLength;
+ s++;
+ }
+ if (doReverse) {
+ if (maxQueryLength < queryLength) {
+ frameSequence[s] = new byte[queryLength];
+ }
+ DNA5.getInstance().getReverseComplement(query, queryLength, frameSequence[s]);
+ frameSequenceLength[s] = queryLength;
+ }
+ qualityValues = queryQualityValues;
+ break;
+ }
+ case BlastP:
+ frameSequence[0] = query;
+ frameSequenceLength[0] = queryLength;
+ break;
+ case BlastX:
+ if (maxQueryLength < queryLength) // don't worry about dividing by 3
+ {
+ for (int s = 0; s < maxNumberOfFrames; s++)
+ frameSequence[s] = new byte[queryLength];
+ }
+ numberOfFrames = Translator.getBestFrames(doForward, doReverse, query, queryLength, frame, frameSequence, frameSequenceLength);
+ break;
+ default:
+ throw new IOException("Unsupported mode: " + mode);
+ }
+ // resize arrays:
+ if (maxQueryLength < queryLength) {
+ maxQueryLength = queryLength;
+ for (int s = 0; s < maxNumberOfFrames; s++) {
+ for (int t = 0; t < numberOfTables; t++) {
+ frameXTableXSeed2Reference[s][t] = Utilities.resizeAndConstructEntries(frameXTableXSeed2Reference[s][t], maxQueryLength);
+ }
+ }
+ }
+ }
+
+ public int getStartQueryForOutput(int frameRank, int startQuery) {
+ switch (mode) {
+ case BlastN: {
+ if (frame[frameRank] == 1)
+ return startQuery + 1;
+ else
+ return queryLength - startQuery;
+ }
+ case BlastX: {
+ if (frame[frameRank] > 0)
+ return 3 * startQuery + frame[frameRank];
+ else
+ return queryLength - 3 * startQuery + frame[frameRank] + 1;
+ }
+ default:
+ case BlastP:
+ return startQuery + 1;
+ }
+ }
+
+ public int getEndQueryForOutput(int frameRank, int endQuery) {
+ switch (mode) {
+ case BlastN: {
+ if (frame[frameRank] == 1)
+ return endQuery;
+ else
+ return queryLength - endQuery + 1;
+ }
+ case BlastX: {
+ if (frame[frameRank] > 0)
+ return 3 * endQuery + frame[frameRank] - 1;
+ else
+ return queryLength - 3 * endQuery + frame[frameRank] + 2;
+ }
+ default:
+ case BlastP:
+ return endQuery;
+ }
+ }
+
+ public String getFrameInfoLine(int frameRank) {
+ int f = frame[frameRank];
+ if (f > 0)
+ return positiveFrameInfoString[f];
+ else
+ return negativeFrameInfoString[-f];
+ }
+
+ public int getFrameForFrameRank(int frameRank) {
+ return frame[frameRank];
+ }
+
+ public byte[] getQualityValues() {
+ return qualityValues;
+ }
+}
+
diff --git a/src/malt/ITextProducer.java b/src/malt/ITextProducer.java
new file mode 100644
index 0000000..625568f
--- /dev/null
+++ b/src/malt/ITextProducer.java
@@ -0,0 +1,28 @@
+/**
+ * ITextProducer.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt;
+
+/**
+ * a callback method that returns a text in bytes
+ * Daniel Huson, 9.2014
+ */
+public interface ITextProducer {
+ byte[] getText();
+}
diff --git a/src/malt/MaltBuild.java b/src/malt/MaltBuild.java
new file mode 100644
index 0000000..db0aaa8
--- /dev/null
+++ b/src/malt/MaltBuild.java
@@ -0,0 +1,246 @@
+/**
+ * MaltBuild.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt;
+
+import jloda.util.*;
+import malt.data.*;
+import malt.genes.GeneTableBuilder;
+import malt.mapping.Mapping;
+import malt.util.Utilities;
+import megan.classification.Classification;
+import megan.classification.ClassificationManager;
+import megan.classification.IdMapper;
+import megan.classification.IdParser;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * build MALT index
+ * Daniel Huson, 8.2014
+ */
+public class MaltBuild {
+ /**
+ * run the program
+ *
+ * @param args
+ * @throws jloda.util.UsageException
+ * @throws java.io.IOException
+ */
+ public static void main(String[] args) {
+ try {
+ PeakMemoryUsageMonitor.start();
+ final MaltBuild maltBuild = new MaltBuild();
+ ResourceManager.setWarningMissingIcon(false);
+ ProgramProperties.setProgramIcon(ResourceManager.getIcon("malt-build48.png"));
+ ProgramProperties.setProgramName("MaltBuild");
+ ProgramProperties.setProgramVersion(Version.SHORT_DESCRIPTION);
+
+ maltBuild.run(args);
+
+ System.err.println("Total time: " + PeakMemoryUsageMonitor.getSecondsSinceStartString());
+ System.err.println("Peak memory: " + PeakMemoryUsageMonitor.getPeakUsageString());
+ if (!ArgsOptions.hasMessageWindow())
+ System.exit(0);
+ else
+ System.err.println("DONE - close window to quit");
+ } catch (Exception ex) {
+ if (ex.getMessage() == null || !ex.getMessage().startsWith("Help"))
+ Basic.caught(ex);
+ if (!ArgsOptions.hasMessageWindow())
+ System.exit(1);
+ else
+ System.err.println("DONE - close window to quit");
+ }
+ }
+
+ /**
+ * run the program
+ *
+ * @param args
+ * @throws UsageException
+ * @throws IOException
+ */
+ public void run(String[] args) throws Exception {
+// parse commandline options:
+ final ArgsOptions options = new ArgsOptions(args, ProgramProperties.getProgramName(), "MaltBuild", "Builds an index for MALT (MEGAN alignment tool)");
+ options.setAuthors("Daniel H. Huson");
+ options.setVersion(ProgramProperties.getProgramVersion());
+ options.setLicense("Copyright (C) 2016 Daniel H. Huson. This program comes with ABSOLUTELY NO WARRANTY.");
+
+ options.comment("Input:");
+ final List<String> inputFiles = options.getOptionMandatory("i", "input", "Input reference file(s)", new LinkedList<String>());
+ final SequenceType sequenceType = SequenceType.valueOfIgnoreCase(options.getOptionMandatory("s", "sequenceType", "Sequence type", SequenceType.values(), SequenceType.Protein.toString()));
+
+ options.comment("Output:");
+ final String indexDirectoryName = options.getOptionMandatory("d", "index", "Name of index directory", "");
+
+ options.comment("Performance:");
+ final int numberOfThreads = options.getOption("t", "threads", "Number of worker threads", Runtime.getRuntime().availableProcessors());
+ final int stepSize = options.getOption("st", "step", "Step size used to advance seed, values greater than 1 reduce index size and sensitivity", 1, 1, 100);
+
+ options.comment("Seed:");
+ String[] shapes = options.getOption("ss", "shapes", "Seed shape(s)", new String[]{"default"});
+ int maxHitsPerSeed = options.getOption("mh", "maxHitsPerSeed", "Maximum number of hits per seed", 1000);
+ final String proteinReduction;
+ if (sequenceType == SequenceType.Protein || options.isDoHelp())
+ proteinReduction = options.getOption("-pr", "proteinReduct", "Name or definition of protein alphabet reduction ("
+ + Basic.toString(malt.data.ReducedAlphabet.reductions.keySet(), ",") + ")", "DIAMOND_11");
+ else
+ proteinReduction = "";
+
+ final String[] availableFNames = ClassificationManager.getAllSupportedClassifications().toArray(new String[ClassificationManager.getAllSupportedClassifications().size()]);
+ options.comment("Classification:");
+ String[] cNames = options.getOption("-c", "classify", "Classifications (any of " + Basic.toString(availableFNames, " ") + ")", new String[]{Classification.Taxonomy});
+ for (String cName : cNames) {
+ if (!ClassificationManager.getAllSupportedClassifications().contains(cName))
+ throw new UsageException("--classify: Unknown classification: " + cName);
+ }
+
+ if (options.isDoHelp())
+ cNames = availableFNames;
+
+ final boolean parseTaxonNames = true;
+
+ final String[] gi2FNames = new String[cNames.length];
+ final String[] acc2FNames = new String[cNames.length];
+ final String[] synonyms2FNames = new String[cNames.length];
+
+ for (int i1 = 0; i1 < cNames.length; i1++) {
+ String cName = cNames[i1];
+ gi2FNames[i1] = options.getOption("-g2" + cName.toLowerCase(), "gi2" + cName.toLowerCase(), "GI-to-" + cName + " mapping file", "");
+ acc2FNames[i1] = options.getOption("-a2" + cName.toLowerCase(), "acc2" + cName.toLowerCase(), "Accession-to-" + cName + " mapping file", "");
+ synonyms2FNames[i1] = options.getOption("-s2" + cName.toLowerCase(), "syn2" + cName.toLowerCase(), "Synonyms-to-" + cName + " mapping file", "");
+
+ if (cName.equalsIgnoreCase(Classification.Taxonomy))
+ options.getOption("-tn", "parseTaxonNames", "Parse taxon names", true);
+ }
+
+ final String geneTableFile = options.getOption("-gif", "-geneInfoFile", "File containing gene information", "");
+
+ options.comment(ArgsOptions.OTHER);
+ ProgramProperties.put(IdParser.PROPERTIES_FIRST_WORD_IS_ACCESSION, options.getOption("-fwa", "firstWordIsAccession", "First word in reference header is accession number", ProgramProperties.get(IdParser.PROPERTIES_FIRST_WORD_IS_ACCESSION, true)));
+ ProgramProperties.put(IdParser.PROPERTIES_ACCESSION_TAGS, options.getOption("-atags", "accessionTags", "List of accession tags", ProgramProperties.get(IdParser.PROPERTIES_ACCESSION_TAGS, IdParser.ACCESSION_TAGS)));
+
+ final boolean saveFirstWordOfReferenceHeaderOnly = options.getOption("-fwo", "firstWordOnly", "Save only first word of reference header", false);
+ final int randomSeed = options.getOption("rns", "random", "Random number generator seed", 666);
+ final float hashTableLoadFactor = options.getOption("hsf", "hashScaleFactor", "Hash table scale factor", 0.9f, 0.1f, 1.0f);
+ //final boolean buildTableInMemory = options.getOption("btm", "buildTableInMemory", "Build the hash table in memory and then save (more memory, much faster)", true);
+ final boolean buildTableInMemory = true; // don't make this an option because it is really slow...
+ final boolean doBuildTables = !options.getOption("!xX", "xSkipTable", "Don't recompute index and tables, just compute profile support", false);
+
+ options.done();
+ Basic.setDebugMode(options.isVerbose());
+
+ if (sequenceType == null)
+ throw new IOException("Sequence type undefined");
+
+ System.err.println("Reference sequence type set to: " + sequenceType.toString());
+ final IAlphabet referenceAlphabet;
+ final IAlphabet seedAlphabet;
+
+ switch (sequenceType) {
+ case DNA:
+ if (shapes[0].equalsIgnoreCase("default")) {
+ shapes = new String[]{SeedShape.SINGLE_DNA_SEED};
+ }
+ referenceAlphabet = DNA5.getInstance();
+ seedAlphabet = DNA5.getInstance();
+ break;
+ case Protein:
+ if (shapes[0].equalsIgnoreCase("default")) {
+ shapes = SeedShape.PROTEIN_SEEDS;
+ }
+ referenceAlphabet = ProteinAlphabet.getInstance();
+ seedAlphabet = new ReducedAlphabet(proteinReduction);
+ break;
+ default:
+ throw new UsageException("Undefined sequence type: " + sequenceType);
+ }
+ System.err.println("Seed shape(s): " + Basic.toString(shapes, ", "));
+
+ final File indexDirectory = new File(indexDirectoryName);
+ if (doBuildTables) {
+ if (indexDirectory.exists()) {
+ Utilities.cleanIndexDirectory(indexDirectory);
+ } else {
+ if (!indexDirectory.mkdir())
+ throw new IOException("mkdir failed: " + indexDirectoryName);
+ }
+ } else
+ System.err.println("NOT BUILDING INDEX OR TABLES");
+
+ final File referenceFile = new File(indexDirectory, "ref.idx");
+ if ((!referenceFile.exists() || referenceFile.delete()) && !referenceFile.createNewFile())
+ throw new IOException("Can't create file: " + referenceFile);
+
+ ReferencesHashTableBuilder.checkCanWriteFiles(indexDirectoryName, 0);
+
+ // load the reference file:
+ final ReferencesDBBuilder referencesDB = new ReferencesDBBuilder();
+ referencesDB.loadFastAFiles(inputFiles, referenceAlphabet);
+ System.err.println(String.format("Number of sequences:%12d", referencesDB.getNumberOfSequences()));
+ System.err.println(String.format("Number of letters: %12d", referencesDB.getNumberOfLetters()));
+
+ // generate hash table for each seed shape
+ if (doBuildTables) {
+ for (int tableNumber = 0; tableNumber < shapes.length; tableNumber++) {
+ final String shape = shapes[tableNumber];
+ final SeedShape seedShape = new SeedShape(seedAlphabet, shape);
+ System.err.println("BUILDING table (" + tableNumber + ")...");
+ final ReferencesHashTableBuilder hashTable = new ReferencesHashTableBuilder(sequenceType, seedAlphabet, seedShape,
+ referencesDB.getNumberOfSequences(), referencesDB.getNumberOfLetters(), randomSeed, maxHitsPerSeed, hashTableLoadFactor, stepSize);
+ hashTable.buildTable(new File(indexDirectory, "table" + tableNumber + ".idx"), new File(indexDirectory, "table" + tableNumber + ".db"), referencesDB, numberOfThreads, buildTableInMemory);
+ hashTable.saveIndexFile(new File(indexDirectory, "index" + tableNumber + ".idx"));
+ }
+ }
+
+ // setup classification support
+ for (int i = 0; i < cNames.length; i++) {
+ final String cName = cNames[i];
+ final String cNameLowerCase = cName.toLowerCase();
+ final String sourceName = (cName.equals(Classification.Taxonomy) ? "ncbi" : cNameLowerCase);
+
+ ClassificationManager.ensureTreeIsLoaded(cName);
+ Basic.writeStreamToFile(ResourceManager.getFileAsStream(sourceName + ".tre"), new File(indexDirectory, cNameLowerCase + ".tre"));
+ Basic.writeStreamToFile(ResourceManager.getFileAsStream(sourceName + ".map"), new File(indexDirectory, cNameLowerCase + ".map"));
+
+ Utilities.loadMapping(synonyms2FNames[i], IdMapper.MapType.Synonyms, cName);
+ Utilities.loadMapping(acc2FNames[i], IdMapper.MapType.Accession, cName);
+ Utilities.loadMapping(gi2FNames[i], IdMapper.MapType.GI, cName);
+
+ final IdParser idParser = ClassificationManager.get(cName, true).getIdMapper().createIdParser();
+ if (cName.equals(Classification.Taxonomy))
+ idParser.setUseTextParsing(parseTaxonNames);
+ final Mapping mapping = Mapping.create(cName, referencesDB, idParser, new ProgressPercentage("Building " + cName + "-mapping..."));
+ mapping.save(new File(indexDirectory, cNameLowerCase + ".idx"));
+ }
+
+ if (doBuildTables) // don't write until after running classification mappers, as they add tags to reference sequences
+ referencesDB.save(new File(indexDirectory, "ref.idx"), new File(indexDirectory, "ref.db"), new File(indexDirectory, "ref.inf"), saveFirstWordOfReferenceHeaderOnly);
+
+ if (geneTableFile.length() > 0) {
+ GeneTableBuilder geneTableBuilder = new GeneTableBuilder();
+ geneTableBuilder.buildAndSaveGeneTable(referencesDB, geneTableFile, new File(indexDirectory, "gene-table.idx"), numberOfThreads);
+ }
+ }
+}
diff --git a/src/malt/MaltOptions.java b/src/malt/MaltOptions.java
new file mode 100644
index 0000000..e53fce0
--- /dev/null
+++ b/src/malt/MaltOptions.java
@@ -0,0 +1,367 @@
+/**
+ * MaltOptions.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt;
+
+import malt.data.IAlphabet;
+import megan.parsers.blast.BlastMode;
+
+/**
+ * maintains the set of Malt options
+ * Daniel Huson, 8.2014
+ */
+public class MaltOptions {
+ private String commandLine;
+
+ public enum MatchOutputFormat {
+ SAM, Tab, Text;
+
+ public static MatchOutputFormat valueOfIgnoreCase(String label) {
+ for (MatchOutputFormat type : values())
+ if (label.equalsIgnoreCase(type.toString()))
+ return type;
+ return null;
+ }
+ }
+
+ public enum MemoryMode {load, page, map} // load data into memory, load data in pages on demand, use memory mapping
+
+ private boolean saveUnalignedToRMA;
+
+ private int maxAlignmentsPerQuery = 25;
+ private int maxAlignmentsPerReference = 1;
+
+ private double minBitScore = 50;
+ private double maxExpected = 1;
+ private double minProportionIdentity = 0;
+
+ private boolean doForward = true;
+ private boolean doReverse = true;
+
+ private BlastMode mode;
+
+ private MatchOutputFormat matchOutputFormat = MatchOutputFormat.SAM;
+
+ private boolean sparseSAM = false;
+
+ private boolean gzipMatches = true;
+ private boolean gzipOrganisms = true;
+ private boolean gzipAlignedReads = true;
+ private boolean gzipUnalignedReads = true;
+
+ private boolean useWeightedLCA = false;
+ private float weightedLCAPercent = 80.0f;
+
+ private float topPercentLCA = 10;
+ private float minSupportPercentLCA = 0.001f;
+ private int minSupportLCA = 1;
+ private float minPercentIdentityLCA = 0.0f;
+
+ private boolean usePercentIdentityFilterLCA = false;
+
+ private int maxSeedsPerReference = 20;
+ private int maxSeedsPerOffsetPerFrame = 100;
+
+ private int shift = 1;
+ private int numberOfThreads = 8;
+
+ private IAlphabet queryAlphabet;
+
+ private boolean useReplicateQueryCaching = false;
+
+ private boolean pairedReads = false;
+
+ /**
+ * get seed shift step
+ *
+ * @return shift
+ */
+ public int getShift() {
+ return shift;
+ }
+
+ /**
+ * set seed shift step
+ *
+ * @param shift
+ */
+ public void setShift(int shift) {
+ this.shift = shift;
+ }
+
+ /**
+ * get number of worker threads
+ *
+ * @return threads
+ */
+ public int getNumberOfThreads() {
+ return numberOfThreads;
+ }
+
+ /**
+ * set number of worker threads
+ *
+ * @param numberOfThreads
+ */
+ public void setNumberOfThreads(int numberOfThreads) {
+ this.numberOfThreads = numberOfThreads;
+ }
+
+ public int getMaxAlignmentsPerQuery() {
+ return maxAlignmentsPerQuery;
+ }
+
+ public void setMaxAlignmentsPerQuery(int maxAlignmentsPerQuery) {
+ this.maxAlignmentsPerQuery = maxAlignmentsPerQuery;
+ }
+
+ public int getMaxAlignmentsPerReference() {
+ return maxAlignmentsPerReference;
+ }
+
+ public void setMaxAlignmentsPerReference(int maxAlignmentsPerReference) {
+ this.maxAlignmentsPerReference = maxAlignmentsPerReference;
+ }
+
+ public double getMinBitScore() {
+ return minBitScore;
+ }
+
+ public void setMinBitScore(double minBitScore) {
+ this.minBitScore = minBitScore;
+ }
+
+ public double getMaxExpected() {
+ return maxExpected;
+ }
+
+ public void setMaxExpected(double maxExpected) {
+ this.maxExpected = maxExpected;
+ }
+
+ public double getMinProportionIdentity() {
+ return minProportionIdentity;
+ }
+
+ public void setMinProportionIdentity(double minProportionIdentity) {
+ this.minProportionIdentity = minProportionIdentity;
+ }
+
+ public boolean isDoForward() {
+ return doForward;
+ }
+
+ public void setDoForward(boolean doForward) {
+ this.doForward = doForward;
+ }
+
+ public boolean isDoReverse() {
+ return doReverse;
+ }
+
+ public void setDoReverse(boolean doReverse) {
+ this.doReverse = doReverse;
+ }
+
+ public int getMaxSeedsPerReference() {
+ return maxSeedsPerReference;
+ }
+
+ public void setMaxSeedsPerReference(int maxSeedsPerReference) {
+ this.maxSeedsPerReference = maxSeedsPerReference;
+ }
+
+ public int getMaxSeedsPerOffsetPerFrame() {
+ return maxSeedsPerOffsetPerFrame;
+ }
+
+ public void setMaxSeedsPerOffsetPerFrame(int maxSeedsPerOffsetPerFrame) {
+ this.maxSeedsPerOffsetPerFrame = maxSeedsPerOffsetPerFrame;
+ }
+
+ public void setSaveUnalignedToRMA(boolean saveUnalignedToRMA) {
+ this.saveUnalignedToRMA = saveUnalignedToRMA;
+ }
+
+ public boolean isSaveUnalignedToRMA() {
+ return saveUnalignedToRMA;
+ }
+
+ public BlastMode getMode() {
+ return mode;
+ }
+
+ public void setMode(BlastMode mode) {
+ this.mode = mode;
+ }
+
+ public MatchOutputFormat getMatchOutputFormat() {
+ return matchOutputFormat;
+ }
+
+ public void setMatchOutputFormat(MatchOutputFormat matchOutputFormat) {
+ this.matchOutputFormat = matchOutputFormat;
+ }
+
+ public void setMatchOutputFormat(String matchOutputFormat) {
+ this.matchOutputFormat = MatchOutputFormat.valueOfIgnoreCase(matchOutputFormat);
+ }
+
+ public boolean isGzipMatches() {
+ return gzipMatches;
+ }
+
+ public void setGzipMatches(boolean gzipMatches) {
+ this.gzipMatches = gzipMatches;
+ }
+
+ public boolean isGzipOrganisms() {
+ return gzipOrganisms;
+ }
+
+ public void setGzipOrganisms(boolean gzipOrganisms) {
+ this.gzipOrganisms = gzipOrganisms;
+ }
+
+ public boolean isGzipAlignedReads() {
+ return gzipAlignedReads;
+ }
+
+ public void setGzipAlignedReads(boolean gzipAlignedReads) {
+ this.gzipAlignedReads = gzipAlignedReads;
+ }
+
+ public boolean isGzipUnalignedReads() {
+ return gzipUnalignedReads;
+ }
+
+ public void setGzipUnalignedReads(boolean gzipUnalignedReads) {
+ this.gzipUnalignedReads = gzipUnalignedReads;
+ }
+
+ public float getTopPercentLCA() {
+ return topPercentLCA;
+ }
+
+ public void setTopPercentLCA(float topPercentLCA) {
+ this.topPercentLCA = topPercentLCA;
+ }
+
+ public int getMinSupportLCA() {
+ return minSupportLCA;
+ }
+
+ public void setMinSupportLCA(int minSupportLCA) {
+ this.minSupportLCA = minSupportLCA;
+ }
+
+ public float getMinSupportPercentLCA() {
+ return minSupportPercentLCA;
+ }
+
+ public void setMinSupportPercentLCA(float minSupportPercentLCA) {
+ this.minSupportPercentLCA = minSupportPercentLCA;
+ }
+
+ public float getMinPercentIdentityLCA() {
+ return minPercentIdentityLCA;
+ }
+
+ public void setMinPercentIdentityLCA(float minPercentIdentityLCA) {
+ this.minPercentIdentityLCA = minPercentIdentityLCA;
+ }
+
+ public IAlphabet getQueryAlphabet() {
+ return queryAlphabet;
+ }
+
+ public void setQueryAlphabet(IAlphabet queryAlphabet) {
+ this.queryAlphabet = queryAlphabet;
+ }
+
+ public boolean isUseReplicateQueryCaching() {
+ return useReplicateQueryCaching;
+ }
+
+ public void setUseReplicateQueryCaching(boolean useReplicateQueryCaching) {
+ this.useReplicateQueryCaching = useReplicateQueryCaching;
+ }
+
+ public boolean isUseWeightedLCA() {
+ return useWeightedLCA;
+ }
+
+ public void setUseWeightedLCA(boolean useWeightedLCA) {
+ this.useWeightedLCA = useWeightedLCA;
+ }
+
+ public float getWeightedLCAPercent() {
+ return weightedLCAPercent;
+ }
+
+ public void setWeightedLCAPercent(float weightedLCAPercent) {
+ this.weightedLCAPercent = weightedLCAPercent;
+ }
+
+ public boolean isPairedReads() {
+ return pairedReads;
+ }
+
+ public void setPairedReads(boolean pairedReads) {
+ this.pairedReads = pairedReads;
+ }
+
+ public boolean isUsePercentIdentityFilterLCA() {
+ return usePercentIdentityFilterLCA;
+ }
+
+ public void setUsePercentIdentityFilterLCA(boolean usePercentIdentityFilterLCA) {
+ this.usePercentIdentityFilterLCA = usePercentIdentityFilterLCA;
+ }
+
+ /**
+ * get the appropriate suffix for a matches output file
+ *
+ * @return suffix
+ */
+ public String getMatchesOutputSuffix() {
+ if (matchOutputFormat == MatchOutputFormat.SAM)
+ return "." + mode.name().toLowerCase() + ".sam";
+ else if (matchOutputFormat == MatchOutputFormat.Tab)
+ return "." + mode.name().toLowerCase() + ".tab";
+ else return "." + mode.name().toLowerCase();
+ }
+
+ public boolean isSparseSAM() {
+ return sparseSAM;
+ }
+
+ public void setSparseSAM(boolean sparseSAM) {
+ this.sparseSAM = sparseSAM;
+ }
+
+ public void setCommandLine(String commandLine) {
+ this.commandLine = commandLine;
+ }
+
+ public String getCommandLine() {
+ return commandLine;
+ }
+
+}
diff --git a/src/malt/MaltRun.java b/src/malt/MaltRun.java
new file mode 100644
index 0000000..05b3421
--- /dev/null
+++ b/src/malt/MaltRun.java
@@ -0,0 +1,584 @@
+/**
+ * MaltRun.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt;
+
+import jloda.util.*;
+import malt.align.AlignerOptions;
+import malt.align.BlastStatisticsHelper;
+import malt.align.DNAScoringMatrix;
+import malt.align.ProteinScoringMatrix;
+import malt.analysis.OrganismsProfileMerger;
+import malt.data.*;
+import malt.genes.GeneTableAccess;
+import malt.io.*;
+import malt.mapping.MappingManager;
+import malt.util.ProfileUtilities;
+import malt.util.Utilities;
+import megan.classification.Classification;
+import megan.classification.ClassificationManager;
+import megan.core.Document;
+import megan.parsers.blast.BlastMode;
+import megan.util.ReadMagnitudeParser;
+
+import javax.xml.bind.JAXBException;
+import java.io.*;
+import java.security.NoSuchAlgorithmException;
+import java.security.NoSuchProviderException;
+import java.security.spec.InvalidKeySpecException;
+import java.util.BitSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+/**
+ * the MALT metagenome alignment tool
+ * Daniel Huson, 8.2014
+ */
+public class MaltRun {
+ public static String version;
+
+ private long totalReads = 0;
+ private long totalAlignedReads = 0;
+ private long totalAlignments = 0;
+
+ /**
+ * run the MALT program
+ *
+ * @param args
+ * @throws jloda.util.UsageException
+ * @throws java.io.IOException
+ */
+ public static void main(String[] args) {
+ try {
+ PeakMemoryUsageMonitor.start();
+ MaltRun program = new MaltRun();
+ ResourceManager.setWarningMissingIcon(false);
+ ProgramProperties.setProgramIcon(ResourceManager.getIcon("malt-run48.png"));
+ ProgramProperties.setProgramName("MaltRun");
+ ProgramProperties.setProgramVersion(Version.SHORT_DESCRIPTION);
+
+ program.run(args);
+
+ System.err.println("Total time: " + PeakMemoryUsageMonitor.getSecondsSinceStartString());
+ System.err.println("Peak memory: " + PeakMemoryUsageMonitor.getPeakUsageString());
+ if (!ArgsOptions.hasMessageWindow())
+ System.exit(0);
+ else
+ System.err.println("DONE - close window to quit");
+ } catch (Exception ex) {
+ if (ex.getMessage() == null || !ex.getMessage().startsWith("Help"))
+ Basic.caught(ex);
+ if (!ArgsOptions.hasMessageWindow())
+ System.exit(1);
+ else
+ System.err.println("DONE - close window to quit");
+ }
+ }
+
+ /**
+ * run the program
+ *
+ * @param args
+ * @throws jloda.util.UsageException
+ * @throws java.io.IOException
+ */
+ public void run(final String[] args) throws UsageException, IOException, CanceledException, JAXBException, InvalidKeySpecException, NoSuchAlgorithmException, NoSuchProviderException {
+ version = Basic.getVersion(this.getClass());
+ final MaltOptions maltOptions = new MaltOptions();
+ final AlignerOptions alignerOptions = new AlignerOptions();
+
+ // parse commandline options:
+ final ArgsOptions options = new ArgsOptions(args, this, ProgramProperties.getProgramName(), "Aligns sequences using MALT (MEGAN alignment tool)");
+ options.setAuthors("Daniel H. Huson");
+ options.setVersion(ProgramProperties.getProgramVersion());
+ options.setLicense("Copyright (C) 2016 Daniel H. Huson. This program comes with ABSOLUTELY NO WARRANTY.");
+
+ options.comment("Mode:");
+ maltOptions.setMode(BlastMode.valueOfIgnoreCase(options.getOptionMandatory("m", "mode", "Program mode", BlastMode.values(), BlastMode.BlastX.toString())));
+ alignerOptions.setAlignmentType(options.getOption("at", "alignmentType", "Type of alignment to be performed", AlignerOptions.AlignmentMode.values(), alignerOptions.getAlignmentType().toString()));
+
+ SequenceType querySequenceType = Utilities.getQuerySequenceTypeFromMode(maltOptions.getMode());
+ SequenceType referenceSequenceType = Utilities.getReferenceSequenceTypeFromMode(maltOptions.getMode());
+ options.comment("Input:");
+ List<String> inputFileNames = options.getOptionMandatory("i", "inFile", "Input file(s) containing queries in FastA or FastQ format (gzip or zip ok)", new LinkedList<String>());
+ String indexDirectory = options.getOptionMandatory("d", "index", "Index directory as generated by MaltBuild", "");
+
+ options.comment("Output:");
+ final List<String> outputRMAFileNames = options.getOption("o", "output", "Output RMA file(s) or directory", new LinkedList<String>());
+ if (outputRMAFileNames.size() > 0 || options.isDoHelp())
+ maltOptions.setSaveUnalignedToRMA(options.getOption("iu", "includeUnaligned", "Include unaligned queries in RMA output file",false));
+
+ final List<String> outputMatchesFileNames = options.getOption("a", "alignments", "Output alignment file(s) or directory or STDOUT", new LinkedList<String>());
+ if (outputMatchesFileNames.size() > 0 || options.isDoHelp()) {
+ maltOptions.setMatchOutputFormat(options.getOption("f", "format", "Alignment output format", MaltOptions.MatchOutputFormat.values(), maltOptions.getMatchOutputFormat().toString()));
+ maltOptions.setGzipMatches(options.getOption("za", "gzipAlignments", "Compress alignments using gzip", maltOptions.isGzipMatches()));
+ }
+
+ if ((maltOptions.getMatchOutputFormat() == MaltOptions.MatchOutputFormat.SAM && maltOptions.getMode() == BlastMode.BlastN) || options.isDoHelp()) {
+ alignerOptions.setSamSoftClipping(options.getOption("ssc", "samSoftClip", "Use soft clipping in SAM files (BlastN mode only)", alignerOptions.isSamSoftClipping()));
+ }
+ if (maltOptions.getMatchOutputFormat() == MaltOptions.MatchOutputFormat.SAM || options.isDoHelp()) {
+ maltOptions.setSparseSAM(options.getOption("sps", "sparseSAM", "Produce sparse SAM format (smaller, faster, but only suitable for MEGAN)", maltOptions.isSparseSAM()));
+ }
+ final List<String> outputOrganismFileNames;
+ if (true) // do not allow organisms output
+ outputOrganismFileNames = new LinkedList<>();
+ else {
+ outputOrganismFileNames = options.getOption("oo", "outOrganism", "Organism profile XML output file(s) or directory or STDOUT", new LinkedList<String>());
+ if (outputOrganismFileNames.size() > 0 || options.isDoHelp()) {
+ maltOptions.setGzipOrganisms(options.getOption("zo", "gzipOrganism", "Compress organism output using gzip", maltOptions.isGzipOrganisms()));
+ }
+ }
+ final List<String> outputAlignedFileNames = options.getOption("oa", "outAligned", "Aligned reads output file(s) or directory or STDOUT", new LinkedList<String>());
+ if (outputAlignedFileNames.size() > 0 || options.isDoHelp()) {
+ maltOptions.setGzipAlignedReads(options.getOption("zal", "gzipAligned", "Compress aligned reads output using gzip", maltOptions.isGzipAlignedReads()));
+ }
+ final List<String> outputUnAlignedFileNames = options.getOption("ou", "outUnaligned", "Unaligned reads output file(s) or directory or STDOUT", new LinkedList<String>());
+ if (outputUnAlignedFileNames.size() > 0 || options.isDoHelp()) {
+ maltOptions.setGzipUnalignedReads(options.getOption("zul", "gzipUnaligned", "Compress unaligned reads output using gzip", maltOptions.isGzipUnalignedReads()));
+ }
+
+ options.comment("Performance:");
+ maltOptions.setNumberOfThreads(options.getOption("t", "numThreads", "Number of worker threads", Runtime.getRuntime().availableProcessors()));
+ final MaltOptions.MemoryMode memoryMode = MaltOptions.MemoryMode.valueOf(options.getOption("mem", "memoryMode", "Memory mode", MaltOptions.MemoryMode.values(), MaltOptions.MemoryMode.load.toString()));
+ final int maxNumberOfSeedShapes = options.getOption("mt", "maxTables", "Set the maximum number of seed tables to use (0=all)", 0);
+ maltOptions.setUseReplicateQueryCaching(options.getOption("rqc", "replicateQueryCache", "Cache results for replicated queries", false));
+
+ options.comment("Filter:");
+ maltOptions.setMinBitScore(options.getOption("b", "minBitScore", "Minimum bit score", maltOptions.getMinBitScore()));
+ maltOptions.setMaxExpected(options.getOption("e", "maxExpected", "Maximum expected score", maltOptions.getMaxExpected()));
+ maltOptions.setMinProportionIdentity(options.getOption("id", "minPercentIdentity", "Minimum percent identity", 100 * maltOptions.getMinProportionIdentity()) / 100.0);
+ maltOptions.setMaxAlignmentsPerQuery(options.getOption("mq", "maxAlignmentsPerQuery", "Maximum number of alignments per query", maltOptions.getMaxAlignmentsPerQuery()));
+ maltOptions.setMaxAlignmentsPerReference(options.getOption("mrf", "maxAlignmentsPerRef", "Maximum number of (non-overlapping) alignments per reference", maltOptions.getMaxAlignmentsPerReference()));
+
+ if ((maltOptions.getMode() == BlastMode.BlastN || options.isDoHelp())) {
+ options.comment("BlastN parameters:");
+ alignerOptions.setMatchScore(options.getOption("ma", "matchScore", "Match score", alignerOptions.getMatchScore()));
+ alignerOptions.setMismatchScore(options.getOption("mm", "mismatchScore", "Mismatch score", alignerOptions.getMismatchScore()));
+ alignerOptions.setLambda(options.getOption("la", "setLambda", "Parameter Lambda for BLASTN statistics", alignerOptions.getLambda()));
+ alignerOptions.setK(options.getOption("K", "setK", "Parameter K for BLASTN statistics", (float) alignerOptions.getK()));
+ }
+
+ String nameOfProteinScoringMatrix = null;
+ if (maltOptions.getMode() == BlastMode.BlastP || maltOptions.getMode() == BlastMode.BlastX || options.isDoHelp()) {
+ options.comment("BlastP and BlastX parameters:");
+ nameOfProteinScoringMatrix = options.getOption("psm", "subMatrix", "Protein substitution matrix to use", ProteinScoringMatrix.ScoringScheme.values(), ProteinScoringMatrix.ScoringScheme.BLOSUM62.toString());
+ }
+
+ if (querySequenceType == SequenceType.DNA || options.isDoHelp()) {
+ options.comment("DNA query parameters:");
+ maltOptions.setDoReverse(!options.getOption("fo", "forwardOnly", "Align query forward strand only", false));
+ maltOptions.setDoForward(!options.getOption("ro", "reverseOnly", "Align query reverse strand only", false));
+ }
+
+ options.comment("LCA parameters:");
+ final String[] cNames = (options.isDoHelp() ? ClassificationManager.getAllSupportedClassifications().toArray(new String[ClassificationManager.getAllSupportedClassifications().size()]) : MappingManager.determineAvailableMappings(indexDirectory));
+
+ if (false) {
+ for (String cName : cNames) {
+ final boolean useLCA = options.getOption("-l_" + cName.toLowerCase(), "lca_" + cName.toLowerCase(), "Use LCA for assigning to '" + cName + "' (otherwise 'best-hit')", ProgramProperties.get(cName + "UseLCA", cName.equals(Classification.Taxonomy)));
+ ProgramProperties.put(cName + "UseLCA", useLCA);
+ }
+ }
+
+ maltOptions.setTopPercentLCA(options.getOption("top", "topPercent", "Top percent value for LCA algorithm", maltOptions.getTopPercentLCA()));
+ maltOptions.setMinSupportPercentLCA(options.getOption("supp", "minSupportPercent", "Min support value for LCA algorithm as a percent of assigned reads (0==off)", maltOptions.getMinSupportPercentLCA()));
+ maltOptions.setMinSupportLCA(options.getOption("sup", "minSupport", "Min support value for LCA algorithm (overrides --minSupportPercent)", 0));
+ if (maltOptions.getMinSupportLCA() == 0) {
+ maltOptions.setMinSupportLCA(1);
+ } else if (maltOptions.getMinSupportLCA() > 0) {
+ maltOptions.setMinSupportPercentLCA(0); // if user sets minSupport,then turn of minSupportPercentLCA
+ if (options.isVerbose())
+ System.err.println("\t(--minSupportPercent: overridden, set to 0)");
+ }
+ maltOptions.setMinPercentIdentityLCA(options.getOption("mpi", "minPercentIdentityLCA", "Min percent identity used by LCA algorithm", maltOptions.getMinPercentIdentityLCA()));
+
+ maltOptions.setUsePercentIdentityFilterLCA(options.getOption("mif", "useMinPercentIdentityFilterLCA", "Use percent identity assignment filter", maltOptions.isUsePercentIdentityFilterLCA()));
+
+ maltOptions.setUseWeightedLCA(options.getOption("-wlca", "weightedLCA", "Use the weighted LCA for taxonomic assignment", Document.DEFAULT_WEIGHTED_LCA));
+ if (options.isDoHelp() || maltOptions.isUseWeightedLCA())
+ maltOptions.setWeightedLCAPercent(options.getOption("-wlp", "weightedLCAPercent", "Set the percent weight to cover", Document.DEFAULT_WEIGHTED_LCA_PERCENT));
+
+ ReadMagnitudeParser.setEnabled(options.getOption("mag", "magnitudes", "Reads have magnitudes (to be used in taxonomic or functional analysis)", false));
+
+ options.comment("Heuristics:");
+ maltOptions.setMaxSeedsPerOffsetPerFrame(options.getOption("spf", "maxSeedsPerFrame", "Maximum number of seed matches per offset per read frame", maltOptions.getMaxSeedsPerOffsetPerFrame()));
+ maltOptions.setMaxSeedsPerReference(options.getOption("spr", "maxSeedsPerRef", "Maximum number of seed matches per read and reference", maltOptions.getMaxSeedsPerReference()));
+ maltOptions.setShift(options.getOption("sh", "seedShift", "Seed shift", maltOptions.getShift()));
+
+ options.comment("Banded alignment parameters:");
+ alignerOptions.setGapOpenPenalty(options.getOption("go", "gapOpen", "Gap open penalty", referenceSequenceType == SequenceType.DNA ? 7 : 11));
+ alignerOptions.setGapExtensionPenalty(options.getOption("ge", "gapExtend", "Gap extension penalty", referenceSequenceType == SequenceType.DNA ? 3 : 1));
+ alignerOptions.setBand(options.getOption("bd", "band", "Band width/2 for banded alignment", alignerOptions.getBand()));
+
+ options.comment(ArgsOptions.OTHER);
+ int replicateQueryCacheBits = options.getOption("rqcb", "replicateQueryCacheBits", "Bits used for caching replicate queries (size is then 2^bits)", 20);
+ final boolean showAPart = options.getOption("xP", "xPart", "Show part of the table in human readable form for debugging", false);
+
+ options.done();
+ Basic.setDebugMode(options.isVerbose());
+
+ maltOptions.setCommandLine(Basic.toString(args, " "));
+
+ // END OF OPTIONS
+
+ if (replicateQueryCacheBits < 10 || replicateQueryCacheBits > 31)
+ throw new IOException("replicateQueryCacheBits: supported range is 10-31");
+
+ // make sure that the index contains the correct type of sequences:
+ {
+ SequenceType indexSequencesType = ReferencesHashTableAccess.getIndexSequenceType(indexDirectory);
+ if (referenceSequenceType != indexSequencesType)
+ throw new IOException("--mode " + maltOptions.getMode() + " not compatible with index containing sequences of type: " + indexSequencesType);
+ }
+
+ if (querySequenceType == SequenceType.Protein) {
+ maltOptions.setQueryAlphabet(ProteinAlphabet.getInstance());
+ } else if (querySequenceType == SequenceType.DNA) {
+ maltOptions.setQueryAlphabet(DNA5.getInstance());
+ } else
+ throw new UsageException("Undefined query sequence type: " + querySequenceType);
+
+ if (referenceSequenceType == SequenceType.Protein) {
+ alignerOptions.setScoringMatrix(ProteinScoringMatrix.create(nameOfProteinScoringMatrix));
+ alignerOptions.setReferenceIsDNA(false);
+ alignerOptions.setLambdaAndK(BlastStatisticsHelper.lookupLambdaAndK(nameOfProteinScoringMatrix, alignerOptions.getGapOpenPenalty(), alignerOptions.getGapExtensionPenalty()));
+ } else if (referenceSequenceType == SequenceType.DNA) {
+ alignerOptions.setScoringMatrix(new DNAScoringMatrix(alignerOptions.getMatchScore(), alignerOptions.getMismatchScore()));
+ alignerOptions.setReferenceIsDNA(true);
+ } else
+ throw new UsageException("Undefined reference sequence type: " + referenceSequenceType);
+
+ // check consistency of all options:
+ if (inputFileNames.size() == 0)
+ throw new UsageException("You must specify at least one input file");
+ Utilities.checkFileExists(new File(inputFileNames.iterator().next()));
+
+ if (!maltOptions.isDoForward() && !maltOptions.isDoReverse())
+ throw new UsageException("Illegal to specify both --forwardOnly and --reverseOnly");
+
+ Utilities.checkFileExists(new File(indexDirectory));
+
+ if (outputOrganismFileNames.size() > 0) {
+ try {
+ Utilities.checkFileExists(new File(indexDirectory, "gene-table.idx"));
+ } catch (IOException ex) {
+ throw new IOException("Specified index does not support '--outOrganisms': " + ex);
+ }
+ }
+
+ try {
+ ReferencesHashTableAccess.checkFilesExist(indexDirectory, 0);
+ } catch (IOException ex) {
+ throw new IOException("Index '" + indexDirectory + "' appears to be incomplete: " + ex);
+ }
+
+ // load the index:
+ System.err.println("--- LOADING ---:");
+ // load the reference file:
+ final ReferencesDBAccess referencesDB = new ReferencesDBAccess(memoryMode, new File(indexDirectory, "ref.idx"), new File(indexDirectory, "ref.db"), new File(indexDirectory, "ref.inf"));
+ alignerOptions.setReferenceDatabaseLength(referencesDB.getNumberOfLetters());
+
+ int numberOfTables = ReferencesHashTableAccess.determineNumberOfTables(indexDirectory);
+ if (maxNumberOfSeedShapes > 0 && maxNumberOfSeedShapes < numberOfTables) {
+ System.err.println("Using " + maxNumberOfSeedShapes + " of " + numberOfTables + " available seed shapes");
+ numberOfTables = maxNumberOfSeedShapes;
+ }
+
+ // load all tables:
+ final ReferencesHashTableAccess[] hashTables = new ReferencesHashTableAccess[numberOfTables];
+ for (int t = 0; t < numberOfTables; t++) {
+ System.err.println("LOADING table (" + t + ") ...");
+ hashTables[t] = new ReferencesHashTableAccess(memoryMode, indexDirectory, t);
+ System.err.println(String.format("Table size:%,15d", hashTables[t].size()));
+ if (showAPart)
+ hashTables[t].showAPart();
+ }
+ // table.show();
+
+ // load mapping files, if we are going to generate RMA
+ if ((outputRMAFileNames.size() > 0)) {
+ MappingManager.loadMappings(cNames, indexDirectory);
+ }
+
+ final GeneTableAccess geneTableAccess;
+ if (outputOrganismFileNames.size() > 0 && (new File(indexDirectory, "gene-table.idx")).exists())
+ geneTableAccess = new GeneTableAccess(new File(indexDirectory, "gene-table.idx"));
+ else
+ geneTableAccess = null;
+
+ // run alignment for each input file:
+ int fileNumber = 0;
+ System.err.println("--- ALIGNING ---:");
+ if (maltOptions.isUseReplicateQueryCaching())
+ AlignmentEngine.activateReplicateQueryCaching(replicateQueryCacheBits);
+
+
+ for (String inFile : inputFileNames) {
+ try {
+ if ((new File(inFile).exists())) {
+ String rmaOutputFile = getOutputFileName(fileNumber, inputFileNames, outputRMAFileNames, ".rma6", false);
+ String matchesOutputFile = getOutputFileName(fileNumber, inputFileNames, outputMatchesFileNames, maltOptions.getMatchesOutputSuffix(), maltOptions.isGzipMatches());
+ String organismProfileOutputFile = getOutputFileName(fileNumber, inputFileNames, outputOrganismFileNames, "-organisms.xml", maltOptions.isGzipOrganisms());
+ String alignedReadsOutputFile = getOutputFileName(fileNumber, inputFileNames, outputAlignedFileNames, "-aligned.fna", maltOptions.isGzipAlignedReads());
+ String unalignedReadsOutputFile = getOutputFileName(fileNumber, inputFileNames, outputUnAlignedFileNames, "-unaligned.fna", maltOptions.isGzipUnalignedReads());
+ launchAlignmentThreads(alignerOptions, maltOptions, inFile, rmaOutputFile, matchesOutputFile, organismProfileOutputFile,
+ alignedReadsOutputFile, unalignedReadsOutputFile, referencesDB, hashTables, geneTableAccess);
+ } else {
+ System.err.println("File not found: '" + inFile + "', skipped");
+ }
+ } catch (IOException ex) {
+ System.err.println("Exception for file: '" + inFile + "', skipped (" + ex + ")");
+
+ } finally {
+ fileNumber++;
+ }
+ }
+
+ // close everything:
+ referencesDB.close();
+ for (int t = 0; t < numberOfTables; t++) {
+ hashTables[t].close();
+ }
+
+ AlignmentEngine.reportStats();
+ if (inputFileNames.size() > 1) {
+ System.err.println(String.format("Number of input files: %10d", inputFileNames.size()));
+ System.err.println(String.format("Total num. of queries: %10d", totalReads));
+ System.err.println(String.format("Total aligned queries: %10d", totalAlignedReads));
+ System.err.println(String.format("Total num. alignments: %10d", totalAlignments));
+
+ }
+ }
+
+ /**
+ * run search on file of input sequences
+ *
+ * @param maltOptions
+ * @param infile
+ * @param tables
+ * @throws jloda.util.CanceledException
+ * @throws java.io.IOException
+ */
+ public void launchAlignmentThreads(final AlignerOptions alignerOptions, final MaltOptions maltOptions, final String infile, final String rmaOutputFile,
+ final String matchesOutputFile, final String organismProfileOutputFile,
+ final String alignedReadsOutputFile, final String unalignedReadsOutputFile,
+ final ReferencesDBAccess referencesDB, final ReferencesHashTableAccess[] tables,
+ final GeneTableAccess geneTableAccess) throws IOException, JAXBException {
+
+ final ExecutorService executor = Executors.newFixedThreadPool(maltOptions.getNumberOfThreads());
+ final CountDownLatch countDownLatch = new CountDownLatch(maltOptions.getNumberOfThreads());
+
+ final FastAReader fastAReader = new FastAReader(infile, maltOptions.getQueryAlphabet(), new ProgressPercentage("+++++ Aligning file: " + infile));
+
+ final String matchesOutputFileUsed;
+ final boolean usingTemporarySAMOutputFile;
+ if (matchesOutputFile != null && maltOptions.getMatchOutputFormat() == MaltOptions.MatchOutputFormat.SAM && !maltOptions.isSparseSAM()) {
+ matchesOutputFileUsed = Basic.getTemporaryFileName(matchesOutputFile);
+ usingTemporarySAMOutputFile = true;
+ } else {
+ matchesOutputFileUsed = matchesOutputFile;
+ usingTemporarySAMOutputFile = false;
+ }
+
+ final FileWriterRanked matchesWriter = (matchesOutputFileUsed != null ? new FileWriterRanked(matchesOutputFileUsed, maltOptions.getNumberOfThreads(), 1) : null);
+ final RMA6Writer rmaWriter = (rmaOutputFile != null ? new RMA6Writer(maltOptions, rmaOutputFile) : null);
+ final FileWriterRanked alignedReadsWriter = (alignedReadsOutputFile != null ? new FileWriterRanked(alignedReadsOutputFile, maltOptions.getNumberOfThreads(), 1) : null);
+ final FileWriterRanked unalignedReadsWriter = (unalignedReadsOutputFile != null ? new FileWriterRanked(unalignedReadsOutputFile, maltOptions.getNumberOfThreads(), 1) : null);
+
+ final OutputStream organismOutStream = (organismProfileOutputFile != null ? new BufferedOutputStream(new FileOutputStream(organismProfileOutputFile)) : null);
+
+ if (matchesWriter == null && rmaWriter == null && alignedReadsWriter == null && unalignedReadsWriter == null)
+ System.err.println("Warning: no output specified");
+
+ if (matchesWriter != null) {
+ if (maltOptions.getMatchOutputFormat() == MaltOptions.MatchOutputFormat.Text)
+ matchesWriter.writeFirst(BlastTextHelper.getBlastTextHeader(maltOptions.getMode()));
+ else if (maltOptions.getMatchOutputFormat() == MaltOptions.MatchOutputFormat.SAM && !usingTemporarySAMOutputFile) {
+ matchesWriter.writeFirst(SAMHelper.getSAMHeader(maltOptions.getMode(), maltOptions.getCommandLine()));
+ }
+ }
+
+ final AlignmentEngine[] alignmentEngines = new AlignmentEngine[maltOptions.getNumberOfThreads()];
+
+ // launch the worker threads
+ for (int thread = 0; thread < maltOptions.getNumberOfThreads(); thread++) {
+ final int threadNumber = thread;
+ executor.execute(new Runnable() {
+ public void run() {
+ try {
+ alignmentEngines[threadNumber] = new AlignmentEngine(threadNumber, maltOptions, alignerOptions, referencesDB, tables, fastAReader,
+ matchesWriter, rmaWriter, organismOutStream, alignedReadsWriter, unalignedReadsWriter);
+ alignmentEngines[threadNumber].runOuterLoop();
+ alignmentEngines[threadNumber].finish();
+ } catch (Exception ex) {
+ Basic.caught(ex);
+ System.exit(1); // just die...
+ } finally {
+ countDownLatch.countDown();
+ }
+ }
+ });
+ }
+
+ try {
+ countDownLatch.await(); // await completion of alignment threads
+ } catch (InterruptedException e) {
+ Basic.caught(e);
+ } finally {
+ // shut down threads:
+ executor.shutdownNow();
+ fastAReader.close();
+ }
+ if (matchesWriter != null) {
+ if (maltOptions.getMatchOutputFormat() == MaltOptions.MatchOutputFormat.Text)
+ matchesWriter.writeLast(BlastTextHelper.FILE_FOOTER_BLAST);
+ matchesWriter.close();
+ System.err.println("Alignments written to file: " + matchesOutputFileUsed);
+ }
+ if (rmaWriter != null) {
+ rmaWriter.close();
+ System.err.println("Analysis written to file: " + rmaOutputFile);
+ }
+
+ // if using temporary file, prepend @SQ lines, if requested, and sort by query name, if requested
+ if (usingTemporarySAMOutputFile) {
+ final BufferedWriter w = new BufferedWriter(new OutputStreamWriter(Basic.getOutputStreamPossiblyZIPorGZIP(matchesOutputFile)));
+ w.write(SAMHelper.getSAMHeader(maltOptions.getMode(), maltOptions.getCommandLine()));
+
+ // prepend SQ lines
+ {
+ final BitSet allIds = new BitSet();
+ for (AlignmentEngine engine : alignmentEngines) {
+ allIds.or(engine.getAlignedReferenceIds());
+ }
+
+ if (allIds.cardinality() > 0) {
+ ProgressPercentage progress = new ProgressPercentage("Prepending @SQ lines to SAM file: " + matchesOutputFile, allIds.size());
+ for (int r = allIds.nextSetBit(0); r != -1; r = allIds.nextSetBit(r + 1)) {
+ w.write("@SQ\tSN:" + (Basic.toString(Utilities.getFirstWordSkipLeadingGreaterSign(referencesDB.getHeader(r)))) + "\tLN:" + referencesDB.getSequenceLength(r));
+ w.write('\n');
+ progress.incrementProgress();
+ }
+ progress.close();
+ }
+ }
+
+ // copy matches
+ {
+ final FileInputIterator it = new FileInputIterator(matchesOutputFileUsed);
+ final ProgressPercentage progress = new ProgressPercentage("Copying from temporary file:", it.getMaximumProgress());
+
+ while (it.hasNext()) {
+ w.write(it.next());
+ w.write("\tRG:Z:1\n");
+ progress.incrementProgress();
+ }
+ it.close();
+ progress.close();
+ }
+
+ w.close();
+ if (new File(matchesOutputFileUsed).delete())
+ System.err.println("Deleted temporary file: " + matchesOutputFileUsed);
+ }
+
+ if (organismOutStream != null) {
+ OrganismsProfileMerger organismsProfileMerger = new OrganismsProfileMerger(MappingManager.getTaxonomyMapping(), geneTableAccess);
+ organismsProfileMerger.setName(Basic.getFileBaseName(Basic.getFileNameWithoutPath(infile)));
+ organismsProfileMerger.mergeAndCompute(ProfileUtilities.getOrganismsProfiles(alignmentEngines));
+ organismsProfileMerger.write(organismOutStream);
+ organismOutStream.close();
+ }
+ if (alignedReadsWriter != null) {
+ // merge all thread-specific taxon profiles. This can be quite major computation...
+ alignedReadsWriter.close();
+ System.err.println("Aligned reads written to file: " + alignedReadsOutputFile);
+ }
+ if (unalignedReadsWriter != null) {
+ // merge all thread-specific taxon profiles. This can be quite major computation...
+ unalignedReadsWriter.close();
+ System.err.println("Unaligned reads written to file: " + unalignedReadsOutputFile);
+ }
+
+ final long countReads = AlignmentEngine.getTotalSequencesProcessed(alignmentEngines);
+ totalReads += countReads;
+ final long countAlignedReads = AlignmentEngine.getTotalSequencesWithAlignments(alignmentEngines);
+ totalAlignedReads += countAlignedReads;
+ final long countAlignments = AlignmentEngine.getTotalAlignments(alignmentEngines);
+ totalAlignments += countAlignments;
+
+ System.err.println(String.format("Num. of queries: %10d", countReads));
+ System.err.println(String.format("Aligned queries: %10d", countAlignedReads));
+ System.err.println(String.format("Num. alignments: %10d", countAlignments));
+ }
+
+ /**
+ * creates the output file name
+ *
+ * @param fileNumber
+ * @param inFiles
+ * @param outFiles
+ * @param suffix
+ * @return
+ * @throws IOException
+ */
+ private String getOutputFileName(final int fileNumber, final List<String> inFiles, final List<String> outFiles, final String suffix, final boolean gzip) throws IOException {
+ if (outFiles.size() == 0)
+ return null;
+ if (outFiles.size() == 1) {
+ if (outFiles.get(0).equalsIgnoreCase("STDOUT")) {
+ return "STDOUT";
+ } else if (inFiles.size() == 1 && !Basic.isDirectory(outFiles.get(0))) {
+ String outfileName = outFiles.get(0);
+ if (gzip && !outfileName.endsWith(".gz"))
+ return outfileName + ".gz";
+ else
+ return outfileName;
+ } else {
+ if (!Basic.isDirectory(outFiles.get(0)))
+ throw new IOException("Specified output location does not exist or is not a directory: " + outFiles.get(0));
+ File infile = new File(inFiles.get(fileNumber));
+ String outfileName = Basic.getFileNameWithoutPath(inFiles.get(fileNumber));
+ if (Basic.isZIPorGZIPFile(outfileName))
+ outfileName = Basic.replaceFileSuffix(outfileName, "");
+ outfileName = Basic.replaceFileSuffix(outfileName, suffix);
+ File outfile = new File(outFiles.get(0), outfileName);
+ if (infile.equals(outfile))
+ throw new IOException("Output file equals input file: " + infile);
+ if (gzip && !outfile.toString().endsWith(".gz"))
+ return outfile.toString() + ".gz";
+ else
+ return outfile.toString();
+ }
+ } else {
+ if (inFiles.size() != outFiles.size())
+ throw new IOException("Number of output files=" + outFiles.size() + " must equal 1 or number of input files (" + inFiles.size() + ")");
+ if (gzip && !outFiles.get(fileNumber).endsWith(".gz"))
+ return outFiles.get(fileNumber) + ".gz";
+ else
+ return outFiles.get(fileNumber);
+ }
+ }
+}
+
+
diff --git a/src/malt/Notes b/src/malt/Notes
new file mode 100644
index 0000000..c1ecc65
--- /dev/null
+++ b/src/malt/Notes
@@ -0,0 +1,23 @@
+Difference between sass-n and malt:
+
+
+Weird gap spacing:
+
+In the following alignment C-T-C seems weird, but this is correct, as this scores better than C--T:
+
+ Score = 25.6 bits (27), Expected = 1e-05
+ Identities = 22/25 (88%), Gaps = 2/25 (8%)
+ Strand = Plus / Plus
+
+Query: 1 CCCACAACCC-T-CCACAAGGGGGG 23
+ |||||||||| | |||||| |||||
+Sbjct: 1 CCCACAACCCATCCCACAATGGGGG 25
+
+
+
+Be very careful with classes that have static components that are initialized upon use. These can
+cause problems when used in a multi-threaded context
+
+
+
+
diff --git a/src/malt/TestIO.java b/src/malt/TestIO.java
new file mode 100644
index 0000000..393bd52
--- /dev/null
+++ b/src/malt/TestIO.java
@@ -0,0 +1,326 @@
+/**
+ * TestIO.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt;
+
+import jloda.util.Basic;
+
+import java.io.*;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.util.Random;
+
+/**
+ * Test IO
+ * Daniel Huson, 8.2014
+ */
+public class TestIO {
+ public static final byte[] MAGIC_NUMBER = "HEAT-IDX".getBytes();
+
+ public static void main(String[] args) throws IOException {
+
+ String choice = (args.length == 0 ? "wfrf wnrn" : Basic.toString(args, " "));
+
+ // create a test buffer
+ int[][] arrays = createArrays();
+
+ long start = System.currentTimeMillis();
+ if (choice.contains("wn")) {
+ // do the first test (the normal way of writing files)
+ normalToFile("/Users/huson/tmp/heat/first.idx", arrays);
+ }
+ long one = System.currentTimeMillis();
+ if (choice.contains("wn"))
+ System.out.println("normal write: " + (one - start));
+
+ if (choice.contains("wf")) {
+ // use the faster nio stuff
+ fasterToFile("/Users/huson/tmp/heat/second.idx", arrays);
+ }
+ long two = System.currentTimeMillis();
+
+ // print the result
+ if (choice.contains("wf"))
+ System.out.println("faster write: " + (two - one));
+
+ System.out.println();
+
+ long a = System.currentTimeMillis();
+ int[][] normalResults = null;
+ if (choice.contains("rn")) {
+ normalResults = normalFromFile("/Users/huson/tmp/heat/first.idx");
+ }
+
+ long b = System.currentTimeMillis();
+ if (choice.contains("rn"))
+ System.out.println("normal read: " + (b - a));
+
+ if (normalResults != null) {
+ if (arrays.length != normalResults.length)
+ throw new IOException("arrays.length=" + arrays.length + "!= normalResults.length: " + normalResults.length);
+ for (int i = 0; i < arrays.length; i++) {
+ if (arrays[i].length != normalResults[i].length) {
+ throw new IOException("array[" + i + "].length=" + arrays[i].length + "!= normalResults[" + i + "].length: " + normalResults[i].length);
+ }
+ }
+ System.err.println("normalResults ok");
+ }
+
+ int[][] fasterResults = null;
+ if (choice.contains("rf")) {
+ fasterResults = fasterFromFile("/Users/huson/tmp/heat/second.idx");
+ }
+ long c = System.currentTimeMillis();
+ if (choice.contains("rf"))
+ System.out.println("faster read: " + (c - b));
+
+ if (fasterResults != null) {
+ if (arrays.length != fasterResults.length)
+ throw new IOException("arrays.length=" + arrays.length + "!= fasterResults.length: " + fasterResults.length);
+ for (int i = 0; i < arrays.length; i++) {
+ if (arrays[i].length != fasterResults[i].length) {
+ throw new IOException("array[" + i + "].length=" + arrays[i].length + "!= fasterResults[" + i + "].length: " + fasterResults[i].length);
+ }
+ }
+ System.err.println("fasterResults ok");
+ }
+
+ }
+
+ public static void fasterToFile(String fileName, int[][] arrays) throws IOException {
+ // final long maxNumberOfInts = Integer.MAX_VALUE / 4 - MAGIC_NUMBER.length;
+ final long maxNumberOfInts = 100 * arrays.length; // about 10 files
+
+ int start = 0;
+ int fileCount = 0;
+ while (start < arrays.length) {
+
+ int numberOfInts = (fileCount == 0 ? 2 : 1);
+
+ int end = start;
+ while (end < arrays.length && numberOfInts + arrays[end].length < maxNumberOfInts) {
+ numberOfInts += arrays[end++].length;
+ }
+
+ final File file = new File(replaceFileSuffix(fileName, "-" + fileCount + ".idx"));
+ if (file.exists() && !file.delete())
+ throw new IOException("Failed to delete existing file: " + file);
+
+ final RandomAccessFile out = new RandomAccessFile(file, "rw");
+ final FileChannel fc = out.getChannel();
+
+ final int size = 4 * numberOfInts + MAGIC_NUMBER.length;
+
+ final ByteBuffer buf = fc.map(FileChannel.MapMode.READ_WRITE, 0, size);
+
+ buf.put(MAGIC_NUMBER); // magic number comes first
+ buf.putInt(fileCount); // file number comes second
+ if (fileCount == 0)
+ buf.putInt(arrays.length); // first file additionally contains total number of arrays
+
+ for (int i = start; i < end; i++) {
+ final int[] array = arrays[i];
+ final int length = array.length;
+ buf.putInt(length);
+ for (int j = 1; j < length; j++)
+ buf.putInt(array[j]);
+ }
+ out.close();
+ start = end;
+ fileCount++;
+ }
+ }
+
+ public static int[][] fasterFromFile(String fileName) throws IOException {
+
+ int[][] arrays = null;
+ int theArraysLength = Integer.MAX_VALUE;
+
+ int fileCount = 0;
+
+ int arrayNumber = 0; // which array are we reading
+
+ while (arrayNumber < theArraysLength) {
+ final File file = new File(replaceFileSuffix(fileName, "-" + fileCount + ".idx"));
+
+ final FileInputStream ins = new FileInputStream(file);
+ final FileChannel fc = ins.getChannel();
+
+ final ByteBuffer buf = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
+
+ byte[] theMagicNumber = new byte[MAGIC_NUMBER.length];
+ buf.get(theMagicNumber, 0, theMagicNumber.length); // magic number comes first
+ int theFileCount = buf.getInt();
+ if (theFileCount != fileCount)
+ throw new IOException("Incorrect file count=" + theFileCount + ", expected: " + fileCount);
+
+ if (fileCount == 0) {
+ theArraysLength = buf.getInt();
+ arrays = new int[theArraysLength][];
+ }
+
+ while (buf.hasRemaining()) {
+ int length = buf.getInt();
+ int[] array = new int[length];
+ arrays[arrayNumber++] = array;
+ array[0] = length;
+ for (int i = 1; i < length; i++)
+ array[i] = buf.getInt();
+ // System.err.println("Got: "+Basic.toString(array,","));
+ }
+ ins.close();
+ fileCount++;
+ }
+ return arrays;
+ }
+
+ private static void normalToFile(String fileName, int[][] arrays) throws IOException {
+ try (OutputStream outs = new BufferedOutputStream(new FileOutputStream(fileName))) {
+ outs.write(MAGIC_NUMBER, 0, MAGIC_NUMBER.length);
+ byte[] buffer = new byte[8];
+
+ writeInt(outs, arrays.length, buffer);
+ for (int[] array : arrays) {
+ int length = array.length;
+ writeInt(outs, length, buffer);
+ for (int i = 1; i < length; i++)
+ writeInt(outs, array[i], buffer);
+ }
+
+ }
+ }
+
+ private static int[][] normalFromFile(String fileName) throws IOException {
+ InputStream ins = new BufferedInputStream(new FileInputStream(fileName));
+ byte[] theMagicNumber = new byte[MAGIC_NUMBER.length];
+ ins.read(theMagicNumber);
+ byte[] buffer = new byte[8];
+ int theArraysLength = readInt(ins, buffer);
+ int[][] arrays = new int[theArraysLength][];
+ for (int i = 0; i < theArraysLength; i++) {
+ int length = readInt(ins, buffer);
+ int[] array = new int[length];
+ arrays[i] = array;
+ for (int j = 1; j < length; j++)
+ array[j] = readInt(ins, buffer);
+ }
+
+ ins.close();
+ return arrays;
+ }
+
+
+ private static int[][] createArrays() {
+ if (true) {
+ Random random = new Random(666);
+ int[][] arrays = new int[50000][];
+ for (int i = 0; i < arrays.length; i++) {
+ arrays[i] = new int[random.nextInt(1000) + 1];
+ arrays[i][0] = arrays[i].length;
+ for (int j = 1; j < arrays[i].length; j++)
+ arrays[i][j] = random.nextInt(100);
+ }
+ return arrays;
+ } else {
+ int[][] arrays = new int[10][];
+ for (int i = 0; i < arrays.length; i++) {
+ int length = i + 1;
+ int[] array = new int[length];
+ arrays[i] = array;
+ array[0] = length;
+ for (int j = 1; j < length; j++)
+ array[j] = j;
+ }
+ return arrays;
+ }
+ }
+
+ /**
+ * writes an int value
+ *
+ * @param outs
+ * @param value
+ * @param bytes
+ * @throws java.io.IOException
+ */
+ public static void writeInt(OutputStream outs, int value, byte[] bytes) throws IOException {
+ bytes[0] = (byte) (value >> 24);
+ bytes[1] = (byte) (value >> 16);
+ bytes[2] = (byte) (value >> 8);
+ bytes[3] = (byte) value;
+ outs.write(bytes, 0, 4);
+ }
+
+ /**
+ * read an int from an input stream
+ *
+ * @param ins
+ * @param bytes
+ * @return long value
+ * @throws java.io.IOException
+ */
+ public static int readInt(InputStream ins, byte[] bytes) throws IOException {
+ if (ins.read(bytes, 0, 4) != 4)
+ throw new IOException("Read int: too few bytes");
+ return ((bytes[0] & 0xFF) << 24) | ((bytes[1] & 0xFF) << 16) | ((bytes[2] & 0xFF) << 8) | (bytes[3] & 0xFF);
+ }
+
+ /**
+ * replace the suffix of a file
+ *
+ * @param fileName
+ * @param newSuffix
+ * @return new file name
+ */
+ public static String replaceFileSuffix(String fileName, String newSuffix) {
+ return replaceFileSuffix(new File(fileName), newSuffix).getPath();
+ }
+
+ /**
+ * replace the suffix of a file
+ *
+ * @param file
+ * @param newSuffix
+ * @return new file
+ */
+ public static File replaceFileSuffix(File file, String newSuffix) {
+ String name = getFileBaseName(file.getName());
+ if (!name.endsWith(newSuffix))
+ name = name + (newSuffix != null ? newSuffix : "");
+ return new File(file.getParent(), name);
+
+ }
+
+ /**
+ * returns name without any .suffix removed
+ *
+ * @param name
+ * @return name without .suffix
+ */
+ public static String getFileBaseName(String name) {
+ {
+ if (name != null) {
+ int pos = name.lastIndexOf(".");
+ if (pos > 0)
+ name = name.substring(0, pos);
+ }
+ }
+ return name;
+ }
+}
diff --git a/src/malt/Version.java b/src/malt/Version.java
new file mode 100644
index 0000000..9730616
--- /dev/null
+++ b/src/malt/Version.java
@@ -0,0 +1,29 @@
+/**
+ * Version.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt;
+
+/**
+ * Malt version string
+ * Daniel Huson, 2014
+ */
+public class Version {
+ public static final String NAME = "MALT";
+ public static final String SHORT_DESCRIPTION = "MALT (version 0.3.8, built 2 Jun 2016)";
+}
diff --git a/src/malt/align/AlignerOptions.java b/src/malt/align/AlignerOptions.java
new file mode 100644
index 0000000..6c879e5
--- /dev/null
+++ b/src/malt/align/AlignerOptions.java
@@ -0,0 +1,217 @@
+/**
+ * AlignerOptions.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.align;
+
+import jloda.util.Pair;
+import megan.parsers.blast.BlastMode;
+
+/**
+ * all options required by an aligner
+ * Daniel Huson, 8.2014
+ */
+public class AlignerOptions {
+ public enum AlignmentMode {Local, SemiGlobal}
+
+ private AlignmentMode alignmentType = AlignmentMode.Local;
+
+ private int minSeedIdentities = 0;
+ private int ungappedXDrop = 0;
+ private int ungappedMinRawScore = 0;
+
+ private int gapOpenPenalty = 7;
+ private int gapExtensionPenalty = 3;
+ private int matchScore = 2;
+ private int mismatchScore = -3;
+ private int band = 4;
+
+ private boolean referenceIsDNA = true;
+
+ // two values for computing blast statistics:
+ private double lambda = 0.625;
+ private double lnK = -0.89159811928378356416921953633132;
+
+ private IScoringMatrix scoringMatrix;
+
+ private long referenceDatabaseLength = 100000;
+
+ private boolean samSoftClipping = false;
+
+
+ public AlignmentMode getAlignmentType() {
+ return alignmentType;
+ }
+
+ public void setAlignmentType(AlignmentMode alignmentType) {
+ this.alignmentType = alignmentType;
+ }
+
+ public void setAlignmentType(String alignmentType) {
+ setAlignmentType(AlignmentMode.valueOf(alignmentType));
+ }
+
+ public int getGapOpenPenalty() {
+ return gapOpenPenalty;
+ }
+
+ public void setGapOpenPenalty(int gapOpenPenalty) {
+ this.gapOpenPenalty = gapOpenPenalty;
+ }
+
+ public int getGapExtensionPenalty() {
+ return gapExtensionPenalty;
+ }
+
+ public void setGapExtensionPenalty(int gapExtensionPenalty) {
+ this.gapExtensionPenalty = gapExtensionPenalty;
+ }
+
+ public int getMatchScore() {
+ return matchScore;
+ }
+
+ public void setMatchScore(int matchScore) {
+ this.matchScore = matchScore;
+ }
+
+ public int getMismatchScore() {
+ return mismatchScore;
+ }
+
+ public void setMismatchScore(int mismatchScore) {
+ this.mismatchScore = mismatchScore;
+ }
+
+ public int getBand() {
+ return band;
+ }
+
+ public void setBand(int band) {
+ this.band = band;
+ }
+
+ public long getReferenceDatabaseLength() {
+ return referenceDatabaseLength;
+ }
+
+ public void setReferenceDatabaseLength(long referenceDatabaseLength) {
+ this.referenceDatabaseLength = referenceDatabaseLength;
+ }
+
+ public IScoringMatrix getScoringMatrix() {
+ return scoringMatrix;
+ }
+
+ public void setScoringMatrix(IScoringMatrix scoringMatrix) {
+ this.scoringMatrix = scoringMatrix;
+ }
+
+ public void setLambdaAndK(Pair<Double, Double> lambdaAndK) {
+ System.err.println("BLAST statistics parameters: lambda=" + lambdaAndK.get1() + " k=" + lambdaAndK.get2());
+ lambda = lambdaAndK.get1();
+ lnK = Math.log(lambdaAndK.get2());
+ }
+
+ public void setK(double K) {
+ this.lnK = Math.log(K);
+ }
+
+ public double getK() {
+ return Math.exp(lnK);
+ }
+
+ public void setLambda(double lambda) {
+ this.lambda = lambda;
+ }
+
+ public double getLambda() {
+ return lambda;
+ }
+
+ public double getLnK() {
+ return lnK;
+ }
+
+ public boolean isReferenceIsDNA() {
+ return referenceIsDNA;
+ }
+
+ public void setReferenceIsDNA(boolean referenceIsDNA) {
+ this.referenceIsDNA = referenceIsDNA;
+ }
+
+ public int getMinSeedIdentities(final BlastMode mode) {
+ if (minSeedIdentities == 0) {
+ switch (mode) {
+ case BlastP:
+ case BlastX:
+ return 10;
+ case BlastN:
+ return 0; // no need to set this, because BlastN seeds are always completely identical
+ }
+ }
+ return minSeedIdentities;
+ }
+
+ public void setMinSeedIdentities(int minSeedIdentities) {
+ this.minSeedIdentities = minSeedIdentities;
+ }
+
+ public int getUngappedXDrop(final BlastMode mode) {
+ if (ungappedXDrop == 0) {
+ switch (mode) {
+ case BlastP:
+ case BlastX:
+ return 20;
+ case BlastN:
+ return 8; // todo: need to figure out best default
+ }
+ }
+ return ungappedXDrop;
+ }
+
+ public void setUngappedXDrop(int ungappedXDrop) {
+ this.ungappedXDrop = ungappedXDrop;
+ }
+
+ public int getUngappedMinRawScore(final BlastMode mode) {
+ if (ungappedMinRawScore == 0) {
+ switch (mode) {
+ case BlastP:
+ case BlastX:
+ return 60;
+ case BlastN:
+ return 60; // todo: need to figure out best default
+ }
+ }
+ return ungappedMinRawScore;
+ }
+
+ public void setUngappedMinRawScore(int ungappedMinRawScore) {
+ this.ungappedMinRawScore = ungappedMinRawScore;
+ }
+
+ public boolean isSamSoftClipping() {
+ return samSoftClipping;
+ }
+
+ public void setSamSoftClipping(boolean samSoftClipping) {
+ this.samSoftClipping = samSoftClipping;
+ }
+}
diff --git a/src/malt/align/BandedAligner.java b/src/malt/align/BandedAligner.java
new file mode 100644
index 0000000..6de942b
--- /dev/null
+++ b/src/malt/align/BandedAligner.java
@@ -0,0 +1,1407 @@
+/**
+ * BandedAligner.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.align;
+
+import jloda.util.Basic;
+import malt.DataForInnerLoop;
+import malt.data.DNA5;
+import malt.io.SAMHelper;
+import malt.util.ReusableByteBuffer;
+import malt.util.Utilities;
+import megan.parsers.blast.BlastMode;
+
+/**
+ * banded DNA aligner. Does both local and semiGlobal alignment
+ * Daniel Huson, 8.2014
+ */
+public class BandedAligner {
+ private double lambda = 0.625;
+ private double lnK = -0.89159811928378356416921953633132;
+ private final static double LN_2 = 0.69314718055994530941723212145818;
+ private final static int MINUS_INFINITY = -100000000;
+
+ public static int ALIGNMENT_SEGMENT_LENGTH = 60; // length of alignment segment in text format output
+ private final static byte[] MID_TRACK_LEADING_SPACES = " ".getBytes(); // spaces used in text format output
+
+ private long referenceDatabaseLength = 10000000;
+
+ private byte[] query;
+ private int queryLength;
+ private byte[] reference;
+ private int referenceLength;
+
+ private final int[][] scoringMatrix;
+ private final int gapOpenPenalty;
+ private final int gapExtensionPenalty;
+ private final int band;
+
+ private int rawScore;
+ private float bitScore = 0;
+ private double expected = 0;
+
+ private final boolean isDNAAlignment;
+
+ private int identities;
+ private int mismatches;
+ private int gaps;
+ private int gapOpens;
+ private int alignmentLength;
+
+ private final BlastMode mode;
+ private final boolean doSemiGlobal;
+
+ private int refOffset; // needed convert from row to position in reference
+
+ private int startQuery; // first alignment position of query
+ private int endQuery = -1; // last alignment position of query +1
+ private int startReference;
+ private int endReference;
+
+ private int[][] matrixM;
+ private int[][] matrixIRef;
+ private int[][] matrixIQuery;
+
+ private byte[][] traceBackM;
+ private byte[][] traceBackIRef;
+ private byte[][] traceBackIQuery;
+
+ private static final byte DONE = 9;
+ private static final byte M_FROM_M = 1;
+ private static final byte M_FROM_IRef = 2;
+ private static final byte M_FROM_IQuery = 3;
+ private static final byte IRef_FROM_M = 4;
+ private static final byte IRef_FROM_IRef = 5;
+ private static final byte IQuery_FROM_M = 6;
+ private static final byte IQuery_FROM_IQuery = 7;
+
+ // buffers:
+ private byte[] queryTrack = new byte[1000];
+ private byte[] midTrack = new byte[1000];
+ private byte[] referenceTrack = new byte[1000];
+
+ private ReusableByteBuffer alignmentBuffer = new ReusableByteBuffer(10000);
+
+ private int queryPos;
+ private int refPos;
+
+ private final boolean samSoftClipping;
+
+ // new stuff:
+
+ private byte[][] alignment; // last computed alignment
+ private int seedLength;
+ // number of rows depends only on band width
+ private final int rows;
+ private final int lastRowToFill;
+ private final int middleRow;
+
+ /**
+ * constructor
+ *
+ * @param alignerOptions
+ */
+ public BandedAligner(final AlignerOptions alignerOptions, final BlastMode mode) {
+ this.scoringMatrix = alignerOptions.getScoringMatrix().getMatrix();
+ this.isDNAAlignment = (mode == BlastMode.BlastN);
+ this.doSemiGlobal = alignerOptions.getAlignmentType() == AlignerOptions.AlignmentMode.SemiGlobal;
+
+ this.lambda = alignerOptions.getLambda();
+ this.lnK = alignerOptions.getLnK();
+
+ this.mode = mode;
+
+ band = alignerOptions.getBand();
+ gapOpenPenalty = alignerOptions.getGapOpenPenalty();
+ gapExtensionPenalty = alignerOptions.getGapExtensionPenalty();
+ referenceDatabaseLength = alignerOptions.getReferenceDatabaseLength();
+
+ rows = 2 * band + 3;
+ lastRowToFill = rows - 2;
+ middleRow = rows / 2; // half
+
+ matrixM = new int[0][0]; // don't init here, need to initialize properly
+ matrixIRef = new int[0][0];
+ matrixIQuery = new int[0][0];
+ traceBackM = new byte[0][0];
+ traceBackIRef = new byte[0][0];
+ traceBackIQuery = new byte[0][0];
+ // todo: only use one traceback matrix
+
+ samSoftClipping = alignerOptions.isSamSoftClipping();
+ }
+
+ /**
+ * Computes a banded local or semiGlobal alignment.
+ * The raw score is computed.
+ *
+ * @param query
+ * @param queryLength
+ * @param reference
+ * @param referenceLength
+ * @param queryPos
+ * @param refPos
+ */
+ public void computeAlignment(byte[] query, int queryLength, byte[] reference, int referenceLength, int queryPos, int refPos, int seedLength) {
+ this.query = query;
+ this.queryLength = queryLength;
+ this.reference = reference;
+ this.referenceLength = referenceLength;
+ this.queryPos = queryPos;
+ this.refPos = refPos;
+ this.seedLength = seedLength;
+
+ startQuery = startReference = endQuery = endReference=-1;
+
+ if (doSemiGlobal)
+ computeSemiGlobalAlignment();
+ else
+ computeLocalAlignment();
+ }
+
+ /**
+ * Performs a banded local alignment and return the raw score.
+ */
+ private void computeLocalAlignment() {
+ alignment = null; // will need to call alignmentByTraceBack to compute this
+
+ refOffset = refPos - queryPos - band - 2; // need this to compute index in reference sequence
+
+ final int cols = queryLength + 2; // query plus one col before and one after
+
+ final int firstSeedCol = queryPos + 1; // +1 because col=pos+1
+ final int lastSeedCol = queryPos + seedLength; // +1 because col=pos+1, but then -1 because want to be last in seed (not first after seed)
+
+ //if (lastSeedCol > queryLength)
+ // return; // too long
+
+ // ------- compute score that comes from seed (without first and last member)
+ rawScore = 0;
+ {
+ for (int col = firstSeedCol + 1; col < lastSeedCol; col++) {
+ final int refIndex = middleRow + col + refOffset;
+ rawScore += scoringMatrix[query[col - 1]][reference[refIndex]];
+ }
+ if (rawScore <= 0) {
+ rawScore = 0;
+ return;
+ }
+ }
+
+ // ------- resize matrices if necessary:
+ if (cols >= matrixM.length) { // all values will be 0
+ // resize:
+ matrixM = new int[cols][rows];
+ matrixIRef = new int[cols][rows];
+ matrixIQuery = new int[cols][rows];
+ traceBackM = new byte[cols][rows];
+ traceBackIRef = new byte[cols][rows];
+ traceBackIQuery = new byte[cols][rows];
+
+ // initialize first column:
+ for (int r = 1; r < rows; r++) {
+ // matrixM[0][r] = matrixIRef[0][r] = matrixIQuery[0][r] = 0;
+ traceBackM[0][r] = traceBackIRef[0][r] = traceBackIQuery[0][r] = DONE;
+ }
+ // initialize the first and last row:
+ for (int c = 0; c < cols; c++) {
+ // matrixM[c][0] = matrixIRef[c][0] = matrixIQuery[c][0] = matrixM[c][rows - 1] = matrixIRef[c][rows - 1] = matrixIQuery[c][rows - 1] = 0;
+ traceBackM[c][0] = traceBackIRef[c][0] = traceBackIQuery[c][0] = traceBackM[c][rows - 1] = traceBackIRef[0][rows - 1] = traceBackIQuery[0][rows - 1] = DONE;
+ }
+ }
+
+
+ // ------- fill dynamic programming matrix from 0 to first column of seed:
+ {
+ final int firstCol = Math.max(1, -refOffset - 2 * band - 1); // the column for which refIndex(firstCol,bottom-to-last row)==0
+ if (firstCol > 1) {
+ final int prevCol = firstCol - 1;
+ final int secondToLastRow = rows - 2;
+ traceBackM[prevCol][secondToLastRow] = traceBackIRef[prevCol][secondToLastRow] = traceBackIQuery[prevCol][secondToLastRow] = DONE; // set previous column to done
+ matrixM[prevCol][secondToLastRow] = matrixIRef[prevCol][secondToLastRow] = matrixIQuery[prevCol][secondToLastRow] = 0;
+ }
+
+ // note that query pos is c-1, because c==0 is before start of query
+
+ for (int col = firstCol; col <= firstSeedCol; col++) { // we never modify the first column or the first or last row
+ for (int row = 1; row <= lastRowToFill; row++) {
+ final int refIndex = row + col + refOffset;
+
+ if (refIndex == -1) { // in column before reference starts, init
+ traceBackM[col][row] = traceBackIRef[col][row] = traceBackIQuery[col][row] = DONE;
+ matrixM[col][row] = matrixIRef[col][row] = matrixIQuery[col][row] = 0;
+ } else if (refIndex >= 0) //do the actual alignment:
+ {
+ int bestMScore = 0;
+ // match or mismatch
+ {
+ final int s = scoringMatrix[query[col - 1]][reference[refIndex]];
+
+ int score = matrixM[col - 1][row] + s;
+ if (score > 0) {
+ traceBackM[col][row] = M_FROM_M;
+ bestMScore = score;
+ }
+ score = matrixIRef[col - 1][row] + s;
+ if (score > bestMScore) {
+ traceBackM[col][row] = M_FROM_IRef;
+ bestMScore = score;
+ }
+ score = matrixIQuery[col - 1][row] + s;
+ if (score > bestMScore) {
+ traceBackM[col][row] = M_FROM_IQuery;
+ bestMScore = score;
+ }
+ if (bestMScore == 0) {
+ traceBackM[col][row] = DONE;
+ }
+ matrixM[col][row] = bestMScore;
+ }
+
+ // insertion in reference:
+ int bestIRefScore = 0;
+ {
+ int score = matrixM[col][row - 1] - gapOpenPenalty;
+
+ if (score > bestIRefScore) {
+ traceBackIRef[col][row] = IRef_FROM_M;
+ bestIRefScore = score;
+ }
+
+ score = matrixIRef[col][row - 1] - gapExtensionPenalty;
+ if (score > bestIRefScore) {
+ bestIRefScore = score;
+ traceBackIRef[col][row] = IRef_FROM_IRef;
+ }
+ if (bestIRefScore == 0) {
+ traceBackIRef[col][row] = DONE;
+ }
+ matrixIRef[col][row] = bestIRefScore;
+
+ }
+
+ // insertion in query:
+ int bestIQueryScore = 0;
+ {
+ int score = matrixM[col - 1][row + 1] - gapOpenPenalty;
+
+ if (score > bestIQueryScore) {
+ bestIQueryScore = score;
+ traceBackIQuery[col][row] = IQuery_FROM_M;
+ }
+
+ score = matrixIQuery[col - 1][row + 1] - gapExtensionPenalty;
+ if (score > bestIQueryScore) {
+ bestIQueryScore = score;
+ traceBackIQuery[col][row] = IQuery_FROM_IQuery;
+ }
+ if (bestIQueryScore == 0) {
+ traceBackIQuery[col][row] = DONE;
+ }
+ matrixIQuery[col][row] = bestIQueryScore;
+ }
+
+ }
+ // else refIndex < -1
+
+ }
+ }
+ }
+
+ // ------- fill dynamic programming matrix from end of query to last column of seed:
+ {
+ final int lastCol = Math.min(queryLength + 1, queryPos + referenceLength - refPos + 1); // last column, fill upto lastCol-1
+
+ // initial last column:
+
+ for (int row = 1; row < rows; row++) {
+ matrixM[lastCol][row] = matrixIRef[lastCol][row] = matrixIQuery[lastCol][row] = 0;
+ traceBackM[lastCol][row] = traceBackIRef[lastCol][row] = traceBackIQuery[lastCol][row] = DONE;
+ }
+
+ // note that col=pos-1, or pos=col+1, because c==0 is before start of query
+
+ /*
+ System.err.println("lastSeedCol: " + lastSeedCol);
+ System.err.println("lastCol: " + lastCol);
+ System.err.println("lastRowToFill: " + lastRowToFill);
+*/
+
+ for (int col = lastCol - 1; col >= lastSeedCol; col--) { // we never modify the first column or the first or last row
+ for (int row = lastRowToFill; row >= 1; row--) {
+ final int refIndex = row + col + refOffset;
+
+ if (refIndex >= referenceLength) { // out of range of the alignment
+ traceBackM[col][row] = traceBackIRef[col][row] = traceBackIQuery[col][row] = DONE;
+ matrixM[col][row] = matrixIRef[col][row] = matrixIQuery[col][row] = 0;
+ } else if (refIndex >= 0 && refIndex < referenceLength) { // do the actual alignment:
+ int bestMScore = 0;
+ // match or mismatch
+ {
+ final int s = scoringMatrix[query[col - 1]][reference[refIndex]]; // pos in query=col-1
+
+ int score = matrixM[col + 1][row] + s;
+ if (score > 0) {
+ traceBackM[col][row] = M_FROM_M;
+ bestMScore = score;
+ }
+ score = matrixIRef[col + 1][row] + s;
+ if (score > bestMScore) {
+ traceBackM[col][row] = M_FROM_IRef;
+ bestMScore = score;
+ }
+ score = matrixIQuery[col + 1][row] + s;
+ if (score > bestMScore) {
+ traceBackM[col][row] = M_FROM_IQuery;
+ bestMScore = score;
+ }
+ if (bestMScore == 0) {
+ traceBackM[col][row] = DONE;
+ }
+ matrixM[col][row] = bestMScore;
+ }
+
+ // insertion in ref
+ int bestIRefScore = 0;
+ {
+ int score = matrixM[col][row + 1] - gapOpenPenalty;
+
+ if (score > bestIRefScore) {
+ traceBackIRef[col][row] = IRef_FROM_M;
+ bestIRefScore = score;
+ }
+
+ score = matrixIRef[col][row + 1] - gapExtensionPenalty;
+ if (score > bestIRefScore) {
+ bestIRefScore = score;
+ traceBackIRef[col][row] = IRef_FROM_IRef;
+ }
+ if (bestIRefScore == 0) {
+ traceBackIRef[col][row] = DONE;
+ }
+ matrixIRef[col][row] = bestIRefScore;
+
+ }
+
+ // insertion in query:
+ int bestIQueryScore = 0;
+ {
+ int score = matrixM[col + 1][row - 1] - gapOpenPenalty;
+
+ if (score > bestIQueryScore) {
+ bestIQueryScore = score;
+ traceBackIQuery[col][row] = IQuery_FROM_M;
+ }
+
+ score = matrixIQuery[col + 1][row - 1] - gapExtensionPenalty;
+ if (score > bestIQueryScore) {
+ bestIQueryScore = score;
+ traceBackIQuery[col][row] = IQuery_FROM_IQuery;
+ }
+ if (bestIQueryScore == 0) {
+ traceBackIQuery[col][row] = DONE;
+ }
+ matrixIQuery[col][row] = bestIQueryScore;
+ }
+
+ }
+ // else refIndex >referenceLength
+ }
+ }
+ }
+
+ if (false) {
+ {
+ System.err.println("queryPos: " + queryPos);
+ System.err.println("refPos: " + refPos);
+ System.err.println("seedLen.: " + seedLength);
+
+ System.err.println("Query:");
+ System.err.println(Basic.toString(query));
+ System.err.println("Reference:");
+ System.err.println(Basic.toString(reference));
+ }
+
+ {
+ System.err.println("SeedScore: " + rawScore);
+ int firstScore = Math.max(Math.max(matrixIQuery[firstSeedCol][middleRow], matrixIRef[firstSeedCol][middleRow]), matrixM[firstSeedCol][middleRow]);
+ System.err.println("FirstScore: " + firstScore);
+ int secondScore = Math.max(Math.max(matrixIQuery[lastSeedCol][middleRow], matrixIRef[lastSeedCol][middleRow]), matrixM[lastSeedCol][middleRow]);
+ System.err.println("secondScore: " + secondScore);
+ System.err.println("totalScore: " + (rawScore + firstScore + secondScore));
+ }
+ {
+ System.err.println("Matrix M:");
+ System.err.println(toString(matrixM, 0, cols, query));
+ System.err.println("Matrix IQuery:");
+ System.err.println(toString(matrixIQuery, 0, cols, query));
+ System.err.println("Matrix IRef:");
+ System.err.println(toString(matrixIRef, 0, cols, query));
+ }
+ }
+
+ rawScore += Math.max(Math.max(matrixIQuery[firstSeedCol][middleRow], matrixIRef[firstSeedCol][middleRow]), matrixM[firstSeedCol][middleRow]);
+ rawScore += Math.max(Math.max(matrixIQuery[lastSeedCol][middleRow], matrixIRef[lastSeedCol][middleRow]), matrixM[lastSeedCol][middleRow]);
+ }
+
+ /**
+ * Performs a banded semi-global alignment.
+ */
+ private void computeSemiGlobalAlignment() {
+ alignment = null; // will need to call alignmentByTraceBack to compute this
+
+ refOffset = refPos - queryPos - band - 2; // need this to compute index in reference sequence
+
+ final int cols = queryLength + 2; // query plus one col before and one after
+
+ final int firstSeedCol = queryPos + 1; // +1 because col=pos+1
+ final int lastSeedCol = queryPos + seedLength; // +1 because col=pos+1, but then -1 because want to be last in seed (not first after seed)
+
+ //if (lastSeedCol > queryLength)
+ // return; // too long
+
+ // ------- compute score that comes from seed (without first and last member)
+ rawScore = 0;
+ {
+ for (int col = firstSeedCol + 1; col < lastSeedCol; col++) {
+ final int refIndex = middleRow + col + refOffset;
+ rawScore += scoringMatrix[query[col - 1]][reference[refIndex]];
+ }
+ if (rawScore <= 0) {
+ rawScore = 0;
+ return;
+ }
+ }
+
+ // ------- resize matrices if necessary:
+ if (cols >= matrixM.length) { // all values will be 0
+ // resize:
+ matrixM = new int[cols][rows];
+ matrixIRef = new int[cols][rows];
+ matrixIQuery = new int[cols][rows];
+ traceBackM = new byte[cols][rows];
+ traceBackIRef = new byte[cols][rows];
+ traceBackIQuery = new byte[cols][rows];
+
+ // initialize first column:
+ for (int r = 1; r < rows; r++) {
+ traceBackM[0][r] = traceBackIRef[0][r] = traceBackIQuery[0][r] = DONE;
+ matrixIQuery[0][r] = -gapOpenPenalty;
+ }
+ // initialize the first and last row:
+ for (int c = 0; c < cols; c++) {
+ matrixM[c][0] = matrixIRef[c][0] = matrixIQuery[c][0]
+ = matrixM[c][rows - 1] = matrixIRef[c][rows - 1] = matrixIQuery[c][rows - 1]
+ = MINUS_INFINITY; // must never go outside band
+ }
+ }
+
+ // ------- fill dynamic programming matrix from 0 to first column of seed:
+ {
+ final int firstCol = Math.max(1, -refOffset - 2 * band - 1); // the column for which refIndex(firstCol,bottom-to-last row)==0
+ if (firstCol > 1) {
+ final int prevCol = firstCol - 1;
+ final int secondToLastRow = rows - 2;
+ traceBackM[prevCol][secondToLastRow] = traceBackIRef[prevCol][secondToLastRow] = traceBackIQuery[prevCol][secondToLastRow] = DONE; // set previous column to done
+ matrixM[prevCol][secondToLastRow] = matrixIRef[prevCol][secondToLastRow] = matrixIQuery[prevCol][secondToLastRow] = 0;
+ }
+
+ // note that query pos is c-1, because c==0 is before start of query
+
+ for (int col = firstCol; col <= firstSeedCol; col++) { // we never modify the first column or the first or last row
+ for (int row = 1; row <= lastRowToFill; row++) {
+ final int refIndex = row + col + refOffset;
+ if (refIndex >= reference.length)
+ continue; // todo: debug this, sometimes happens, but shouldn't
+
+ if (refIndex == -1) { // in column before reference starts, init
+ traceBackM[col][row] = traceBackIRef[col][row] = traceBackIQuery[col][row] = DONE;
+ matrixM[col][row] = 0;
+ matrixIRef[col][row] = matrixIQuery[col][row] = -gapOpenPenalty;
+ } else if (refIndex >= 0) //do the actual alignment:
+ {
+ int bestMScore = Integer.MIN_VALUE;
+ // match or mismatch
+ {
+ final int s = scoringMatrix[query[col - 1]][reference[refIndex]];
+
+ int score = matrixM[col - 1][row] + s;
+ if (score > bestMScore) {
+ traceBackM[col][row] = M_FROM_M;
+ bestMScore = score;
+ }
+ score = matrixIRef[col - 1][row] + s;
+ if (score > bestMScore) {
+ traceBackM[col][row] = M_FROM_IRef;
+ bestMScore = score;
+ }
+ score = matrixIQuery[col - 1][row] + s;
+ if (score > bestMScore) {
+ traceBackM[col][row] = M_FROM_IQuery;
+ bestMScore = score;
+ }
+ matrixM[col][row] = bestMScore;
+ }
+
+ // insertion in reference:
+ int bestIRefScore = Integer.MIN_VALUE;
+ {
+ int score = matrixM[col][row - 1] - gapOpenPenalty;
+
+ if (score > bestIRefScore) {
+ traceBackIRef[col][row] = IRef_FROM_M;
+ bestIRefScore = score;
+ }
+
+ score = matrixIRef[col][row - 1] - gapExtensionPenalty;
+ if (score > bestIRefScore) {
+ bestIRefScore = score;
+ traceBackIRef[col][row] = IRef_FROM_IRef;
+ }
+ matrixIRef[col][row] = bestIRefScore;
+ }
+
+ // insertion in query:
+ int bestIQueryScore = Integer.MIN_VALUE;
+ {
+ int score = matrixM[col - 1][row + 1] - gapOpenPenalty;
+
+ if (score > bestIQueryScore) {
+ bestIQueryScore = score;
+ traceBackIQuery[col][row] = IQuery_FROM_M;
+ }
+
+ score = matrixIQuery[col - 1][row + 1] - gapExtensionPenalty;
+ if (score > bestIQueryScore) {
+ bestIQueryScore = score;
+ traceBackIQuery[col][row] = IQuery_FROM_IQuery;
+ }
+ matrixIQuery[col][row] = bestIQueryScore;
+ }
+ }
+ // else refIndex < -1
+ }
+ }
+ }
+
+ // ------- fill dynamic programming matrix from end of query to last column of seed:
+ {
+ final int lastCol = Math.min(queryLength + 1, queryPos + referenceLength - refPos + 1); // last column, fill upto lastCol-1
+
+ // initial last column:
+
+ for (int row = 1; row < rows - 1; row++) { // no need to init first or last row...
+ matrixM[lastCol][row] = 0;
+ matrixIRef[lastCol][row] = matrixIQuery[lastCol][row] = -gapOpenPenalty;
+ traceBackM[lastCol][row] = traceBackIRef[lastCol][row] = traceBackIQuery[lastCol][row] = DONE;
+ }
+
+ // note that col=pos-1, or pos=col+1, because c==0 is before start of query
+
+ /*
+ System.err.println("lastSeedCol: " + lastSeedCol);
+ System.err.println("lastCol: " + lastCol);
+ System.err.println("lastRowToFill: " + lastRowToFill);
+ */
+
+ for (int col = lastCol - 1; col >= lastSeedCol; col--) { // we never modify the first column or the first or last row
+ for (int row = lastRowToFill; row >= 1; row--) {
+ final int refIndex = row + col + refOffset;
+
+ if (refIndex >= referenceLength) { // out of range of the alignment
+ traceBackM[col][row] = traceBackIRef[col][row] = traceBackIQuery[col][row] = DONE;
+ matrixM[col][row] = matrixIRef[col][row] = matrixIQuery[col][row] = -gapOpenPenalty;
+ } else if (refIndex >= 0 && refIndex < referenceLength) { // do the actual alignment:
+ int bestMScore = Integer.MIN_VALUE;
+ // match or mismatch
+ {
+ final int s = scoringMatrix[query[col - 1]][reference[refIndex]]; // pos in query=col-1
+
+ int score = matrixM[col + 1][row] + s;
+ if (score > bestMScore) {
+ traceBackM[col][row] = M_FROM_M;
+ bestMScore = score;
+ }
+ score = matrixIRef[col + 1][row] + s;
+ if (score > bestMScore) {
+ traceBackM[col][row] = M_FROM_IRef;
+ bestMScore = score;
+ }
+ score = matrixIQuery[col + 1][row] + s;
+ if (score > bestMScore) {
+ traceBackM[col][row] = M_FROM_IQuery;
+ bestMScore = score;
+ }
+ matrixM[col][row] = bestMScore;
+ }
+
+ // insertion in ref
+ int bestIRefScore = Integer.MIN_VALUE;
+ {
+ int score = matrixM[col][row + 1] - gapOpenPenalty;
+
+ if (score > bestIRefScore) {
+ traceBackIRef[col][row] = IRef_FROM_M;
+ bestIRefScore = score;
+ }
+
+ score = matrixIRef[col][row + 1] - gapExtensionPenalty;
+ if (score > bestIRefScore) {
+ bestIRefScore = score;
+ traceBackIRef[col][row] = IRef_FROM_IRef;
+ }
+ matrixIRef[col][row] = bestIRefScore;
+ }
+
+ // insertion in query:
+ int bestIQueryScore = Integer.MIN_VALUE;
+ {
+ int score = matrixM[col + 1][row - 1] - gapOpenPenalty;
+
+ if (score > bestIQueryScore) {
+ bestIQueryScore = score;
+ traceBackIQuery[col][row] = IQuery_FROM_M;
+ }
+
+ score = matrixIQuery[col + 1][row - 1] - gapExtensionPenalty;
+ if (score > bestIQueryScore) {
+ bestIQueryScore = score;
+ traceBackIQuery[col][row] = IQuery_FROM_IQuery;
+ }
+ matrixIQuery[col][row] = bestIQueryScore;
+ }
+ }
+ // else refIndex >referenceLength
+ }
+ }
+ }
+
+ if (false) {
+ {
+ System.err.println("queryPos: " + queryPos);
+ System.err.println("refPos: " + refPos);
+ System.err.println("seedLen.: " + seedLength);
+
+ System.err.println("Query:");
+ System.err.println(Basic.toString(query));
+ System.err.println("Reference:");
+ System.err.println(Basic.toString(reference));
+ }
+
+ {
+ System.err.println("SeedScore: " + rawScore);
+ int firstScore = Math.max(Math.max(matrixIQuery[firstSeedCol][middleRow], matrixIRef[firstSeedCol][middleRow]), matrixM[firstSeedCol][middleRow]);
+ System.err.println("FirstScore: " + firstScore);
+ int secondScore = Math.max(Math.max(matrixIQuery[lastSeedCol][middleRow], matrixIRef[lastSeedCol][middleRow]), matrixM[lastSeedCol][middleRow]);
+ System.err.println("secondScore: " + secondScore);
+ System.err.println("totalScore: " + (rawScore + firstScore + secondScore));
+ }
+ {
+ System.err.println("Matrix M:");
+ System.err.println(toString(matrixM, 0, cols, query));
+ System.err.println("Matrix IQuery:");
+ System.err.println(toString(matrixIQuery, 0, cols, query));
+ System.err.println("Matrix IRef:");
+ System.err.println(toString(matrixIRef, 0, cols, query));
+ }
+ }
+
+ rawScore += Math.max(Math.max(matrixIQuery[firstSeedCol][middleRow], matrixIRef[firstSeedCol][middleRow]), matrixM[firstSeedCol][middleRow]);
+ rawScore += Math.max(Math.max(matrixIQuery[lastSeedCol][middleRow], matrixIRef[lastSeedCol][middleRow]), matrixM[lastSeedCol][middleRow]);
+ }
+
+ /**
+ * compute the bit score and expected score from the raw score
+ */
+ public void computeBitScoreAndExpected() {
+ if (rawScore > 0) {
+ bitScore = (float) ((lambda * rawScore - lnK) / LN_2);
+ expected = referenceDatabaseLength * queryLength * Math.pow(2, -bitScore);
+ } else {
+ bitScore = 0;
+ expected = Double.MAX_VALUE;
+ }
+ }
+
+ /**
+ * gets the alignment. Also sets the number of matches, mismatches and gaps
+ *
+ * @return alignment
+ */
+ public void computeAlignmentByTraceBack() {
+ if (rawScore <= 0) {
+ alignment = null;
+ return;
+ }
+
+ gaps = 0;
+ gapOpens = 0;
+ identities = 0;
+ mismatches = 0;
+
+ // get first part of alignment:
+ int length = 0;
+ {
+ int r = middleRow;
+ int c = queryPos + 1;
+
+ byte[][] traceBack;
+ traceBack = traceBackM;
+ if (matrixIRef[c][r] > matrixM[c][r]) {
+ traceBack = traceBackIRef;
+ if (matrixIQuery[c][r] > matrixIRef[c][r])
+ traceBack = traceBackIQuery;
+ } else if (matrixIQuery[c][r] > matrixM[c][r])
+ traceBack = traceBackIQuery;
+
+ loop:
+ while (true) {
+ int refIndex = r + c + refOffset;
+
+ switch (traceBack[c][r]) {
+ case DONE:
+ startQuery = c;
+ startReference = r + c + refOffset + 1;
+ break loop;
+ case M_FROM_M:
+ queryTrack[length] = query[c - 1];
+ referenceTrack[length] = reference[refIndex];
+ if (queryTrack[length] == referenceTrack[length]) {
+ if (isDNAAlignment)
+ midTrack[length] = '|';
+ else
+ midTrack[length] = queryTrack[length];
+ identities++;
+ } else {
+ if (isDNAAlignment || scoringMatrix[queryTrack[length]][referenceTrack[length]] <= 0)
+ midTrack[length] = ' ';
+ else
+ midTrack[length] = '+';
+ mismatches++;
+ }
+ c--;
+ traceBack = traceBackM;
+ break;
+ case M_FROM_IRef:
+ queryTrack[length] = query[c - 1];
+ referenceTrack[length] = reference[refIndex];
+ if (queryTrack[length] == referenceTrack[length]) {
+ if (isDNAAlignment)
+ midTrack[length] = '|';
+ else
+ midTrack[length] = queryTrack[length];
+ identities++;
+ } else {
+ if (isDNAAlignment || scoringMatrix[queryTrack[length]][referenceTrack[length]] <= 0)
+ midTrack[length] = ' ';
+ else
+ midTrack[length] = '+';
+ }
+ c--;
+ traceBack = traceBackIRef;
+ break;
+ case M_FROM_IQuery:
+ queryTrack[length] = query[c - 1];
+ referenceTrack[length] = reference[refIndex];
+ if (queryTrack[length] == referenceTrack[length]) {
+ if (isDNAAlignment)
+ midTrack[length] = '|';
+ else
+ midTrack[length] = queryTrack[length];
+ identities++;
+ } else {
+ if (isDNAAlignment || scoringMatrix[queryTrack[length]][referenceTrack[length]] <= 0)
+ midTrack[length] = ' ';
+ else
+ midTrack[length] = '+';
+ }
+ c--;
+ traceBack = traceBackIQuery;
+ break;
+ case IRef_FROM_M:
+ queryTrack[length] = '-';
+ referenceTrack[length] = reference[refIndex];
+ midTrack[length] = ' ';
+ r--;
+ traceBack = traceBackM;
+ gaps++;
+ gapOpens++;
+ break;
+ case IRef_FROM_IRef:
+ queryTrack[length] = '-';
+ referenceTrack[length] = reference[refIndex];
+ midTrack[length] = ' ';
+ r--;
+ traceBack = traceBackIRef;
+ gaps++;
+ break;
+ case IQuery_FROM_M:
+ queryTrack[length] = query[c - 1];
+ referenceTrack[length] = '-';
+ midTrack[length] = ' ';
+ c--;
+ r++;
+ traceBack = traceBackM;
+ gaps++;
+ gapOpens++;
+ break;
+ case IQuery_FROM_IQuery:
+ queryTrack[length] = query[c - 1];
+ referenceTrack[length] = '-';
+ midTrack[length] = ' ';
+ c--;
+ r++;
+ traceBack = traceBackIQuery;
+ gaps++;
+ break;
+ default:
+ throw new RuntimeException("Undefined trace-back state: " + traceBack[c][r]);
+ }
+ if (queryTrack[length] == '-' && referenceTrack[length] == '-')
+ System.err.println("gap-gap at: " + length);
+
+ if (++length >= queryTrack.length) {
+ queryTrack = grow(queryTrack);
+ midTrack = grow(midTrack);
+ referenceTrack = grow(referenceTrack);
+ }
+ } // end of loop
+
+ reverseInPlace(queryTrack, length);
+ reverseInPlace(midTrack, length);
+ reverseInPlace(referenceTrack, length);
+ }
+
+ // get second part of alignment:
+ {
+ for (int i = 1; i < seedLength - 1; i++) {
+ queryTrack[length] = query[queryPos + i];
+ referenceTrack[length] = reference[refPos + i];
+ if (queryTrack[length] == referenceTrack[length]) {
+ if (isDNAAlignment)
+ midTrack[length] = '|';
+ else
+ midTrack[length] = queryTrack[length];
+ identities++;
+ } else {
+ if (isDNAAlignment || scoringMatrix[queryTrack[length]][referenceTrack[length]] <= 0)
+ midTrack[length] = ' ';
+ else
+ midTrack[length] = '+';
+ mismatches++;
+ }
+ if (++length >= queryTrack.length) {
+ queryTrack = grow(queryTrack);
+ midTrack = grow(midTrack);
+ referenceTrack = grow(referenceTrack);
+ }
+ }
+ }
+
+
+ // get third part of alignment:
+ {
+ int r = middleRow;
+ int c = queryPos + seedLength; // +1 because col=pos+1, but -1 because want to be in last position of seed
+
+ byte[][] traceBack;
+ traceBack = traceBackM;
+ if (matrixIRef[c][r] > matrixM[c][r]) {
+ traceBack = traceBackIRef;
+ if (matrixIQuery[c][r] > matrixIRef[c][r])
+ traceBack = traceBackIQuery;
+ } else if (matrixIQuery[c][r] > matrixM[c][r])
+ traceBack = traceBackIQuery;
+
+ loop:
+ while (true) {
+ int refIndex = r + c + refOffset;
+
+ switch (traceBack[c][r]) {
+ case DONE:
+ endQuery = c-1;
+ endReference = r + c + refOffset + 1;
+ break loop;
+ case M_FROM_M:
+ queryTrack[length] = query[c - 1];
+ referenceTrack[length] = reference[refIndex];
+ if (queryTrack[length] == referenceTrack[length]) {
+ if (isDNAAlignment)
+ midTrack[length] = '|';
+ else
+ midTrack[length] = queryTrack[length];
+ identities++;
+ } else {
+ if (isDNAAlignment || scoringMatrix[queryTrack[length]][referenceTrack[length]] <= 0)
+ midTrack[length] = ' ';
+ else
+ midTrack[length] = '+';
+ mismatches++;
+ }
+ c++;
+ traceBack = traceBackM;
+ break;
+ case M_FROM_IRef:
+ queryTrack[length] = query[c - 1];
+ referenceTrack[length] = reference[refIndex];
+ if (queryTrack[length] == referenceTrack[length]) {
+ if (isDNAAlignment)
+ midTrack[length] = '|';
+ else
+ midTrack[length] = queryTrack[length];
+ identities++;
+ } else {
+ if (isDNAAlignment || scoringMatrix[queryTrack[length]][referenceTrack[length]] <= 0)
+ midTrack[length] = ' ';
+ else
+ midTrack[length] = '+';
+ }
+ c++;
+ traceBack = traceBackIRef;
+ break;
+ case M_FROM_IQuery:
+ queryTrack[length] = query[c - 1];
+ referenceTrack[length] = reference[refIndex];
+ if (queryTrack[length] == referenceTrack[length]) {
+ if (isDNAAlignment)
+ midTrack[length] = '|';
+ else
+ midTrack[length] = queryTrack[length];
+ identities++;
+ } else {
+ if (isDNAAlignment || scoringMatrix[queryTrack[length]][referenceTrack[length]] <= 0)
+ midTrack[length] = ' ';
+ else
+ midTrack[length] = '+';
+ }
+ c++;
+ traceBack = traceBackIQuery;
+ break;
+ case IRef_FROM_M:
+ queryTrack[length] = '-';
+ referenceTrack[length] = reference[refIndex];
+ midTrack[length] = ' ';
+ r++;
+ traceBack = traceBackM;
+ gaps++;
+ gapOpens++;
+ break;
+ case IRef_FROM_IRef:
+ queryTrack[length] = '-';
+ referenceTrack[length] = reference[refIndex];
+ midTrack[length] = ' ';
+ r++;
+ traceBack = traceBackIRef;
+ gaps++;
+ break;
+ case IQuery_FROM_M:
+ queryTrack[length] = query[c - 1];
+ referenceTrack[length] = '-';
+ midTrack[length] = ' ';
+ c++;
+ r--;
+ traceBack = traceBackM;
+ gaps++;
+ gapOpens++;
+ break;
+ case IQuery_FROM_IQuery:
+ queryTrack[length] = query[c - 1];
+ referenceTrack[length] = '-';
+ midTrack[length] = ' ';
+ c++;
+ r--;
+ traceBack = traceBackIQuery;
+ gaps++;
+ break;
+ default: {
+ throw new RuntimeException("Undefined trace-back state: " + traceBack[c][r]);
+ }
+ }
+ if (queryTrack[length] == '-' && referenceTrack[length] == '-')
+ System.err.println("gap-gap at: " + length);
+
+ if (++length >= queryTrack.length) {
+ queryTrack = grow(queryTrack);
+ midTrack = grow(midTrack);
+ referenceTrack = grow(referenceTrack);
+ }
+ } // end of loop
+ }
+
+ alignmentLength = length;
+ alignment = new byte[][]{copy(queryTrack, length), copy(midTrack, length), copy(referenceTrack, length)};
+ }
+
+ public int getStartQuery() {
+ return startQuery;
+ }
+
+ public int getEndQuery() {
+ return endQuery;
+ }
+
+ public int getStartReference() {
+ return startReference;
+ }
+
+ public int getEndReference() {
+ return endReference;
+ }
+
+ public int getGaps() {
+ return gaps;
+ }
+
+ public int getGapOpens() {
+ return gapOpens;
+ }
+
+ public int getIdentities() {
+ return identities;
+ }
+
+ public float getPercentIdentity() {
+ if (alignment == null)
+ computeAlignmentByTraceBack();
+ return getAlignmentLength() == 0 ? 0 : (float) (100 * getIdentities()) / (float) getAlignmentLength();
+ }
+
+ public int getMismatches() {
+ return mismatches;
+ }
+
+ public int getRawScore() {
+ return rawScore;
+ }
+
+ public float getBitScore() {
+ return bitScore;
+ }
+
+ public double getExpected() {
+ return expected;
+ }
+
+ public int getAlignmentLength() {
+ return alignmentLength;
+ }
+
+ public long getReferenceDatabaseLength() {
+ return referenceDatabaseLength;
+ }
+
+ public void setReferenceDatabaseLength(long referenceDatabaseLength) {
+ this.referenceDatabaseLength = referenceDatabaseLength;
+ }
+
+ /**
+ * reverse bytes
+ *
+ * @param array
+ * @return reversed bytes
+ */
+ private void reverseInPlace(byte[] array, int length) {
+ int top = length / 2;
+ for (int i = 0; i < top; i++) {
+ byte tmp = array[i];
+ int j = length - i - 1;
+ array[i] = array[j];
+ array[j]=tmp;
+ }
+ }
+
+ /**
+ * grow an array
+ *
+ * @param a
+ * @return larger array containing values
+ */
+ private byte[] grow(byte[] a) {
+ byte[] result = new byte[Math.max(2, 2 * a.length)];
+ System.arraycopy(a, 0, result, 0, a.length);
+ return result;
+ }
+
+ /**
+ * return a copy
+ *
+ * @param array
+ * @param length
+ * @return copy
+ */
+ public byte[] copy(byte[] array, int length) {
+ byte[] result = new byte[length];
+ System.arraycopy(array, 0, result, 0, length);
+ return result;
+ }
+
+ /**
+ * return a reverse copy
+ *
+ * @param array
+ * @param length
+ * @return copy
+ */
+ public byte[] copyReverse(byte[] array, int length) {
+ byte[] result = new byte[length];
+ for (int i = 0; i < length; i++)
+ result[i] = array[length - 1 - i];
+ return result;
+ }
+
+ /**
+ * to string
+ *
+ * @param colRowMatrix
+ * @return
+ */
+ private String toString(int[][] colRowMatrix, int firstCol, int cols, byte[] query) {
+ StringBuilder buf = new StringBuilder();
+
+ buf.append(" |");
+ for (int i = firstCol; i < cols; i++) {
+ buf.append(String.format(" %3d", i));
+ }
+ buf.append("\n");
+ buf.append(" | ");
+ for (int i = firstCol + 1; i < cols; i++) {
+ buf.append(" ").append((char) query[i - 1]);
+ }
+ buf.append("\n");
+ buf.append("---+");
+ for (int i = firstCol; i < cols; i++) {
+ buf.append("----");
+ }
+ buf.append("\n");
+
+
+ int r = 0;
+ boolean hasRow = true;
+ while (hasRow) {
+ hasRow = false;
+ for (int i = firstCol; i < cols; i++) {
+ int[] aColRowMatrix = colRowMatrix[i];
+ if (aColRowMatrix.length > r) {
+ if (!hasRow) {
+ hasRow = true;
+ buf.append(String.format("%2d |", r));
+ }
+ int value = aColRowMatrix[r];
+ if (value <= MINUS_INFINITY)
+ buf.append(" -oo");
+ else
+ buf.append(String.format(" %3d", value));
+ }
+ }
+ buf.append("\n");
+ r++;
+ }
+ return buf.toString();
+ }
+
+ /**
+ * gets the alignment text
+ *
+ * @param data
+ * @return alignment text
+ */
+ public byte[] getAlignmentText(DataForInnerLoop data, int frameRank) {
+ if (alignment == null)
+ computeAlignmentByTraceBack();
+
+ alignmentBuffer.reset();
+
+ if (getExpected() != 0)
+ alignmentBuffer.writeAsAscii(String.format(" Score = %.1f bits (%d), Expect = %.1g\n", getBitScore(), getRawScore(), getExpected()));
+ else
+ alignmentBuffer.writeAsAscii(String.format(" Score = %.1f bits (%d), Expect = 0.0\n", getBitScore(), getRawScore()));
+
+ if (isDNAAlignment)
+ alignmentBuffer.writeAsAscii(String.format(" Identities = %d/%d (%.0f%%), Gaps = %d/%d (%.0f%%)\n", getIdentities(), getAlignmentLength(),
+ (100.0 * (getIdentities()) / getAlignmentLength()), getGaps(), getAlignmentLength(), (100.0 * (getGaps()) / getAlignmentLength())));
+ else // protein alignment
+ {
+ int numberOfPositives = getAlignmentLength() - Basic.countOccurrences(alignment[1], ' ');
+ alignmentBuffer.writeAsAscii(String.format(" Identities = %d/%d (%.0f%%), Positives = %d/%d (%.0f%%), Gaps = %d/%d (%.0f%%)\n",
+ getIdentities(), getAlignmentLength(), (100.0 * (getIdentities()) / getAlignmentLength()),
+ numberOfPositives, getAlignmentLength(), (100.0 * (numberOfPositives) / getAlignmentLength()),
+ getGaps(), getAlignmentLength(), (100.0 * (getGaps()) / getAlignmentLength())));
+ }
+
+ String frameInfo = data.getFrameInfoLine(frameRank);
+ if (frameInfo != null)
+ alignmentBuffer.writeAsAscii(frameInfo);
+
+ int qFactor;
+ if (mode == BlastMode.BlastN)
+ qFactor = 1;
+ else
+ qFactor = 3;
+
+ if (alignment != null) {
+ int qStart = data.getStartQueryForOutput(frameRank, startQuery);
+ int qDirection = (data.getEndQueryForOutput(frameRank, endQuery) - qStart >= 0 ? 1 : -1);
+ int sStart = startReference + 1;
+
+ for (int pos = 0; pos < alignment[0].length; pos += ALIGNMENT_SEGMENT_LENGTH) {
+ int add = Math.min(ALIGNMENT_SEGMENT_LENGTH, alignment[0].length - pos);
+ int qGaps = Utilities.countGaps(alignment[0], pos, add);
+ int qEnd = qStart + qFactor * qDirection * ((add - qGaps) - 1);
+ if (qFactor == 3) {
+ qEnd += 2 * qDirection;
+ }
+ alignmentBuffer.writeAsAscii(String.format("\nQuery: %9d ", qStart));
+ alignmentBuffer.write(alignment[0], pos, add);
+ alignmentBuffer.writeAsAscii(String.format(" %d\n", qEnd));
+ qStart = qEnd + qDirection;
+ alignmentBuffer.write(MID_TRACK_LEADING_SPACES);
+ alignmentBuffer.write(alignment[1], pos, add);
+ int sGaps = Utilities.countGaps(alignment[2], pos, add);
+ int sEnd = sStart + (add - sGaps) - 1;
+ alignmentBuffer.writeAsAscii(String.format("\nSbjct: %9d ", sStart));
+ alignmentBuffer.write(alignment[2], pos, add);
+ alignmentBuffer.writeAsAscii(String.format(" %d\n", sEnd));
+ sStart = sEnd + 1;
+ }
+ }
+ return alignmentBuffer.makeCopy();
+ }
+
+ /**
+ * gets simple text, for debugging purproses
+ * @return text
+ */
+ public byte[] getAlignmentSimpleText() {
+ DataForInnerLoop dataForInnerLoop = new DataForInnerLoop(mode, true, false, 1, 1);
+ return getAlignmentText(dataForInnerLoop, 0);
+ }
+
+ /**
+ * get alignment in tabular format. If queryHeader==null, skips the first entry which is the query name
+ *
+ * @param data
+ * @param queryHeader
+ * @param referenceHeader
+ * @param frameRank
+ * @return tabular format without first field
+ */
+ public byte[] getAlignmentTab(final DataForInnerLoop data, final byte[] queryHeader, final byte[] referenceHeader, final int frameRank) {
+ if (alignment == null)
+ computeAlignmentByTraceBack();
+
+ int outputStartQuery = data.getStartQueryForOutput(frameRank, startQuery);
+ int outputEndQuery = data.getEndQueryForOutput(frameRank, endQuery);
+
+ // queryId, subjectId, percIdentity, alnLength, mismatchCount, gapOpenCount, queryStart, queryEnd, subjectStart, subjectEnd, eVal, bitScore
+ alignmentBuffer.reset();
+ if (queryHeader != null) {
+ int length = Utilities.getFirstWordSkipLeadingGreaterSign(queryHeader, queryTrack);
+ alignmentBuffer.write(queryTrack, 0, length);
+ alignmentBuffer.write('\t');
+ }
+ int length = Utilities.getFirstWordSkipLeadingGreaterSign(referenceHeader, queryTrack);
+ alignmentBuffer.write(queryTrack, 0, length);
+ alignmentBuffer.write('\t');
+ if (getExpected() == 0)
+ alignmentBuffer.writeAsAscii(String.format("%.1f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t0.0\t%d", ((100.0 * getIdentities()) / getAlignmentLength()), getAlignmentLength(),
+ getMismatches(), getGapOpens(), outputStartQuery, outputEndQuery, getStartReference() + 1, getEndReference(), Math.round(getBitScore())));
+ else
+ alignmentBuffer.writeAsAscii(String.format("%.1f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.1g\t%d", ((100.0 * getIdentities()) / getAlignmentLength()), getAlignmentLength(),
+ getMismatches(), getGapOpens(), outputStartQuery, outputEndQuery, getStartReference() + 1, getEndReference(), getExpected(), Math.round(getBitScore())));
+
+ return alignmentBuffer.makeCopy();
+ }
+
+ /**
+ * get alignment in SAM format
+ *
+ * @param queryHeader
+ * @param referenceHeader
+ * @param frameRank
+ * @return SAM line
+ */
+ public byte[] getAlignmentSAM(final DataForInnerLoop data, final byte[] queryHeader, final byte[] querySequence, final byte[] referenceHeader, final int frameRank) {
+ if (alignment == null)
+ computeAlignmentByTraceBack();
+
+ final int frame = data.getFrameForFrameRank(frameRank);
+ final boolean queryIsReverseComplemented = isDNAAlignment && frame < 0;
+
+ final int outputStartReference;
+ final int outputEndReference;
+ if (queryIsReverseComplemented) {
+ outputStartReference = endReference + 1;
+ outputEndReference = startReference + 1;
+ DNA5.getInstance().reverseComplement(alignment[0]);
+ DNA5.getInstance().reverse(alignment[1]);
+ DNA5.getInstance().reverseComplement(alignment[2]);
+ } else {
+ outputStartReference = startReference + 1;
+ outputEndReference = endReference;
+ }
+ int blastXQueryStart = 0;
+ if (mode == BlastMode.BlastX) {
+ blastXQueryStart = data.getStartQueryForOutput(frameRank, startQuery);
+ }
+
+ return SAMHelper.createSAMLine(mode, queryHeader, querySequence, startQuery, blastXQueryStart, endQuery, queryLength, alignment[0], referenceHeader,
+ outputStartReference, outputEndReference, alignment[2], referenceLength, bitScore, rawScore, expected, 100 * identities / alignmentLength, frame, data.getQualityValues(), samSoftClipping).getBytes();
+ }
+
+ /**
+ * maps a bit score to a raw score
+ *
+ * @param bitScore
+ * @return raw score
+ */
+ public int getRawScoreForBitScore(double bitScore) {
+ return (int) Math.floor((LN_2 * bitScore + lnK) / lambda);
+ }
+
+ private static final int minNumberOfExactMatches = 10;
+ private static final int windowForMinNumberOfExactMatches = 30;
+
+ /**
+ * heuristically check whether there is going to be a good alignment
+ *
+ * @param query
+ * @param reference
+ * @param queryPos
+ * @param refPos
+ * @return true, if good alignment is likely
+ */
+ public boolean quickCheck(final byte[] query, final int queryLength, final byte[] reference, final int referenceLength, final int queryPos, final int refPos) {
+ if (mode == BlastMode.BlastN)
+ return true;
+
+ if (queryPos + minNumberOfExactMatches >= queryLength || refPos + minNumberOfExactMatches >= referenceLength)
+ return false;
+
+ int count = 0;
+ final int maxSteps = Math.min(windowForMinNumberOfExactMatches, Math.min(queryLength - queryPos, referenceLength - refPos));
+ for (int i = 0; i < maxSteps; i++) {
+ if (query[queryPos + i] == reference[refPos + i]) {
+ count++;
+ if (count == minNumberOfExactMatches)
+ return true;
+ }
+ }
+ return false;
+ }
+}
diff --git a/src/malt/align/BlastStatisticsHelper.java b/src/malt/align/BlastStatisticsHelper.java
new file mode 100644
index 0000000..1f65045
--- /dev/null
+++ b/src/malt/align/BlastStatisticsHelper.java
@@ -0,0 +1,216 @@
+/**
+ * BlastStatisticsHelper.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.align;
+
+import jloda.util.Basic;
+import jloda.util.Pair;
+
+import java.io.IOException;
+
+/**
+ * DESCRIPTION
+ * Daniel Huson, 8.2014
+ */
+public class BlastStatisticsHelper {
+ private long referenceLength;
+ private final double lnK;
+ private final double k;
+ private final double lambda;
+ private final double LN2 = (float) Math.log(2);
+
+
+ /**
+ * lookup table, source: Blast book, appendix C
+ */
+ private static String[] table = new String[]
+ {
+ "Matrix open extension lambda K H",
+ "BLOSUM80 32767 32767 0.343 0.177 0.657",
+ "BLOSUM80 25 2 0.342 0.170 0.660",
+ "BLOSUM80 13 2 0.336 0.150 0.570",
+ "BLOSUM80 9 2 0.319 0.110 0.420",
+ "BLOSUM80 8 2 0.308 0.0900 0.350",
+ "BLOSUM80 7 2 0.293 0.0700 0.270",
+ "BLOSUM80 6 2 0.268 0.0450 0.190",
+ "BLOSUM80 11 1 0.314 0.0950 0.350",
+ "BLOSUM80 10 1 0.299 0.0710 0.270",
+ "BLOSUM80 9 1 0.279 0.0480 0.200",
+ "BLOSUM62 32767 32767 0.318 0.134 0.401",
+ "BLOSUM62 11 2 0.297 0.0820 0.270",
+ "BLOSUM62 10 2 0.291 0.0750 0.230",
+ "BLOSUM62 9 2 0.279 0.0580 0.190",
+ "BLOSUM62 8 2 0.264 0.0450 0.150",
+ "BLOSUM62 7 2 0.239 0.0270 0.100",
+ "BLOSUM62 6 2 0.201 0.0120 0.0610",
+ "BLOSUM62 13 1 0.292 0.0710 0.230",
+ "BLOSUM62 12 1 0.283 0.0590 0.190",
+ "BLOSUM62 11 1 0.267 0.0410 0.140",
+ "BLOSUM62 10 1 0.243 0.0240 0.100",
+ "BLOSUM62 9 1 0.206 0.0100 0.0520",
+ "BLOSUM50 32767 32767 0.232 0.112 0.336",
+ "BLOSUM50 13 3 0.212 0.0630 0.190",
+ "BLOSUM50 12 3 0.206 0.0550 0.170",
+ "BLOSUM50 11 3 0.197 0.0420 0.140",
+ "BLOSUM50 10 3 0.186 0.0310 0.110",
+ "BLOSUM50 9 3 0.172 0.0220 0.0820",
+ "BLOSUM50 16 2 0.215 0.0660 0.200",
+ "BLOSUM50 15 2 0.210 0.0580 0.170",
+ "BLOSUM50 14 2 0.202 0.0450 0.140",
+ "BLOSUM50 13 2 0.193 0.0350 0.120",
+ "BLOSUM50 12 2 0.181 0.0250 0.0950",
+ "BLOSUM50 19 1 0.212 0.0570 0.180",
+ "BLOSUM50 18 1 0.207 0.0500 0.150",
+ "BLOSUM50 17 1 0.198 0.0370 0.120",
+ "BLOSUM50 16 1 0.186 0.0250 0.100",
+ "BLOSUM50 15 1 0.171 0.0150 0.0630",
+ "BLOSUM45 32767 32767 0.229 0.0924 0.251",
+ "BLOSUM45 13 3 0.207 0.0490 0.140",
+ "BLOSUM45 12 3 0.199 0.0390 0.110",
+ "BLOSUM45 11 3 0.190 0.0310 0.0950",
+ "BLOSUM45 10 3 0.179 0.0230 0.0750",
+ "BLOSUM45 16 2 0.210 0.0510 0.140",
+ "BLOSUM45 15 2 0.203 0.0410 0.120",
+ "BLOSUM45 14 2 0.195 0.0320 0.100",
+ "BLOSUM45 13 2 0.185 0.0240 0.0840",
+ "BLOSUM45 12 2 0.171 0.0160 0.0610",
+ "BLOSUM45 19 1 0.205 0.0400 0.110",
+ "BLOSUM45 18 1 0.198 0.0320 0.100",
+ "BLOSUM45 17 1 0.189 0.0240 0.0790",
+ "BLOSUM45 16 1 0.176 0.0160 0.0630",
+ "BLOSUM90 32767 32767 0.335 0.190 0.755",
+ "BLOSUM90 9 2 0.310 0.120 0.460",
+ "BLOSUM90 8 2 0.300 0.0990 0.390",
+ "BLOSUM90 7 2 0.283 0.0720 0.300",
+ "BLOSUM90 6 2 0.259 0.0480 0.220",
+ "BLOSUM90 11 1 0.302 0.0930 0.390",
+ "BLOSUM90 10 1 0.290 0.0750 0.280",
+ "BLOSUM90 9 1 0.265 0.0440 0.200"
+ };
+
+
+ /**
+ * constructor
+ *
+ * @param referenceLength
+ * @param blosumName
+ * @param gapOpenPenalty
+ * @param gapExtensionPenalty
+ */
+ public BlastStatisticsHelper(long referenceLength, String blosumName, int gapOpenPenalty, int gapExtensionPenalty) throws IOException {
+ this.referenceLength = referenceLength;
+ Pair<Double, Double> pair = lookupLambdaAndK(blosumName, gapOpenPenalty, gapExtensionPenalty);
+ this.lambda = pair.get1();
+ this.k = pair.get2();
+ this.lnK = (float) Math.log(k);
+ System.err.println("Blast-stats: matrix=" + blosumName + " gapOpen=" + gapOpenPenalty + " gapExtend=" + gapExtensionPenalty + " lambda=" + getLambda() + " k=" + getK());
+ }
+
+ /**
+ * constructor
+ *
+ * @param referenceLength
+ * @param k
+ * @param lambda
+ */
+ public BlastStatisticsHelper(long referenceLength, float k, float lambda) {
+ this.referenceLength = referenceLength;
+ this.k = k;
+ this.lnK = (float) Math.log(k);
+ this.lambda = lambda;
+ }
+
+ /**
+ * set the reference length
+ *
+ * @param referenceLength
+ */
+ public void setReferenceLength(long referenceLength) {
+ this.referenceLength = referenceLength;
+ }
+
+ /**
+ * get the bit score
+ *
+ * @param alignmentScore
+ * @return bit score
+ */
+ public double getBitScore(int alignmentScore) {
+ return (lambda * alignmentScore - lnK) / LN2;
+ }
+
+ /**
+ * get the e-value
+ *
+ * @param queryLength
+ * @param alignmentScore
+ * @return e-evalue
+ */
+ public double getExpect(int queryLength, int alignmentScore) {
+ return k * referenceLength * queryLength * Math.exp(-lambda * alignmentScore);
+ }
+
+ /**
+ * get blast's k value
+ *
+ * @return k
+ */
+ public double getK() {
+ return k;
+ }
+
+ /**
+ * get blast's lambda value
+ *
+ * @return lambda
+ */
+ public double getLambda() {
+ return lambda;
+ }
+
+ /**
+ * lookup the blast K and Lambda values for a given setting
+ *
+ * @param blosumName
+ * @param gapOpen
+ * @param gapExtend
+ * @return k and lambda
+ * @throws IOException
+ */
+ public static Pair<Double, Double> lookupLambdaAndK(String blosumName, int gapOpen, int gapExtend) throws IOException {
+ blosumName = blosumName.toUpperCase();
+ for (String line : table) {
+ if (line.startsWith(blosumName)) {
+ String[] tokens = line.split("\t");
+ if (tokens.length == 6) {
+ int gop = Integer.parseInt(tokens[1]);
+ int gep = Integer.parseInt(tokens[2]);
+ if (gop == gapOpen && gep == gapExtend) {
+ return new Pair<>(Double.parseDouble(tokens[3]), Double.parseDouble(tokens[4]));
+
+ }
+
+ }
+ }
+ }
+ System.err.println("Known combinations of BLOSUM matrices and gap penalties:");
+ System.err.println(Basic.toString(table, "\n"));
+ throw new IOException("Can't determine BLAST statistics for given combination of BLOSUM matrix and gap penalties");
+ }
+}
diff --git a/src/malt/align/DNAScoringMatrix.java b/src/malt/align/DNAScoringMatrix.java
new file mode 100644
index 0000000..0ab4337
--- /dev/null
+++ b/src/malt/align/DNAScoringMatrix.java
@@ -0,0 +1,52 @@
+/**
+ * DNAScoringMatrix.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.align;
+
+/**
+ * Basic DNA scoring matrix
+ * Daniel Huson, 8.2014
+ */
+public class DNAScoringMatrix implements IScoringMatrix {
+ private final int[][] matrix = new int[128][128];
+
+ public DNAScoringMatrix(int matchScore, int mismatchScore) {
+ for (int i = 0; i < 128; i++) {
+ matrix[i][i] = matchScore;
+ for (int j = i + 1; j < 128; j++)
+ matrix[i][j] = matrix[j][i] = mismatchScore;
+ }
+ }
+
+ /**
+ * get score for letters a and b
+ *
+ * @param a
+ * @param b
+ * @return score
+ */
+ public int getScore(byte a, byte b) {
+ return matrix[a][b];
+ }
+
+ @Override
+ public int[][] getMatrix() {
+ return matrix;
+ }
+}
diff --git a/src/malt/align/IScoringMatrix.java b/src/malt/align/IScoringMatrix.java
new file mode 100644
index 0000000..02fcd5a
--- /dev/null
+++ b/src/malt/align/IScoringMatrix.java
@@ -0,0 +1,42 @@
+/**
+ * IScoringMatrix.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.align;
+
+/**
+ * interface for scoring matrix
+ * Daniel Huson, 8.2014
+ */
+public interface IScoringMatrix {
+ /**
+ * gets the score for aligning letters a and b
+ *
+ * @param a
+ * @param b
+ * @return score
+ */
+ int getScore(byte a, byte b);
+
+ /**
+ * get the scoring matrix
+ *
+ * @return matrix
+ */
+ int[][] getMatrix();
+}
diff --git a/src/malt/align/ProteinScoringMatrix.java b/src/malt/align/ProteinScoringMatrix.java
new file mode 100644
index 0000000..9c4e933
--- /dev/null
+++ b/src/malt/align/ProteinScoringMatrix.java
@@ -0,0 +1,373 @@
+/**
+ * ProteinScoringMatrix.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.align;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Arrays;
+
+/**
+ * A number of different BLOSUM matrices
+ * Daniel Huson, 11.2011
+ */
+public class ProteinScoringMatrix implements IScoringMatrix {
+ public enum ScoringScheme {BLOSUM45, BLOSUM50, BLOSUM62, BLOSUM80, BLOSUM90}
+
+ private final int[][] matrix;
+
+ private static ProteinScoringMatrix BLOSUM90;
+ private static ProteinScoringMatrix BLOSUM80;
+ private static ProteinScoringMatrix BLOSUM62;
+ private static ProteinScoringMatrix BLOSUM50;
+ private static ProteinScoringMatrix BLOSUM45;
+
+ public final static String BLOSUM90_INPUT =
+ "A R N D C Q E G H I L K M F P S T W Y V B J Z X * \n" +
+ "A 5 -2 -2 -3 -1 -1 -1 0 -2 -2 -2 -1 -2 -3 -1 1 0 -4 -3 -1 -2 -2 -1 -1 -6 \n" +
+ "R -2 6 -1 -3 -5 1 -1 -3 0 -4 -3 2 -2 -4 -3 -1 -2 -4 -3 -3 -2 -3 0 -1 -6 \n" +
+ "N -2 -1 7 1 -4 0 -1 -1 0 -4 -4 0 -3 -4 -3 0 0 -5 -3 -4 5 -4 -1 -1 -6 \n" +
+ "D -3 -3 1 7 -5 -1 1 -2 -2 -5 -5 -1 -4 -5 -3 -1 -2 -6 -4 -5 5 -5 1 -1 -6 \n" +
+ "C -1 -5 -4 -5 9 -4 -6 -4 -5 -2 -2 -4 -2 -3 -4 -2 -2 -4 -4 -2 -4 -2 -5 -1 -6 \n" +
+ "Q -1 1 0 -1 -4 7 2 -3 1 -4 -3 1 0 -4 -2 -1 -1 -3 -3 -3 -1 -3 5 -1 -6 \n" +
+ "E -1 -1 -1 1 -6 2 6 -3 -1 -4 -4 0 -3 -5 -2 -1 -1 -5 -4 -3 1 -4 5 -1 -6 \n" +
+ "G 0 -3 -1 -2 -4 -3 -3 6 -3 -5 -5 -2 -4 -5 -3 -1 -3 -4 -5 -5 -2 -5 -3 -1 -6 \n" +
+ "H -2 0 0 -2 -5 1 -1 -3 8 -4 -4 -1 -3 -2 -3 -2 -2 -3 1 -4 -1 -4 0 -1 -6 \n" +
+ "I -2 -4 -4 -5 -2 -4 -4 -5 -4 5 1 -4 1 -1 -4 -3 -1 -4 -2 3 -5 3 -4 -1 -6 \n" +
+ "L -2 -3 -4 -5 -2 -3 -4 -5 -4 1 5 -3 2 0 -4 -3 -2 -3 -2 0 -5 4 -4 -1 -6 \n" +
+ "K -1 2 0 -1 -4 1 0 -2 -1 -4 -3 6 -2 -4 -2 -1 -1 -5 -3 -3 -1 -3 1 -1 -6 \n" +
+ "M -2 -2 -3 -4 -2 0 -3 -4 -3 1 2 -2 7 -1 -3 -2 -1 -2 -2 0 -4 2 -2 -1 -6 \n" +
+ "F -3 -4 -4 -5 -3 -4 -5 -5 -2 -1 0 -4 -1 7 -4 -3 -3 0 3 -2 -4 0 -4 -1 -6 \n" +
+ "P -1 -3 -3 -3 -4 -2 -2 -3 -3 -4 -4 -2 -3 -4 8 -2 -2 -5 -4 -3 -3 -4 -2 -1 -6 \n" +
+ "S 1 -1 0 -1 -2 -1 -1 -1 -2 -3 -3 -1 -2 -3 -2 5 1 -4 -3 -2 0 -3 -1 -1 -6 \n" +
+ "T 0 -2 0 -2 -2 -1 -1 -3 -2 -1 -2 -1 -1 -3 -2 1 6 -4 -2 -1 -1 -2 -1 -1 -6 \n" +
+ "W -4 -4 -5 -6 -4 -3 -5 -4 -3 -4 -3 -5 -2 0 -5 -4 -4 11 2 -3 -6 -3 -4 -1 -6 \n" +
+ "Y -3 -3 -3 -4 -4 -3 -4 -5 1 -2 -2 -3 -2 3 -4 -3 -2 2 8 -3 -4 -2 -3 -1 -6 \n" +
+ "V -1 -3 -4 -5 -2 -3 -3 -5 -4 3 0 -3 0 -2 -3 -2 -1 -3 -3 5 -4 1 -3 -1 -6 \n" +
+ "B -2 -2 5 5 -4 -1 1 -2 -1 -5 -5 -1 -4 -4 -3 0 -1 -6 -4 -4 5 -5 0 -1 -6 \n" +
+ "J -2 -3 -4 -5 -2 -3 -4 -5 -4 3 4 -3 2 0 -4 -3 -2 -3 -2 1 -5 4 -4 -1 -6 \n" +
+ "Z -1 0 -1 1 -5 5 5 -3 0 -4 -4 1 -2 -4 -2 -1 -1 -4 -3 -3 0 -4 5 -1 -6 \n" +
+ "X -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -6 \n" +
+ "* -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 1 \n";
+
+ public final static String BLOSUM80_INPUT =
+ "A R N D C Q E G H I L K M F P S T W Y V B J Z X * \n" +
+ "A 5 -2 -2 -2 -1 -1 -1 0 -2 -2 -2 -1 -1 -3 -1 1 0 -3 -2 0 -2 -2 -1 -1 -6 \n" +
+ "R -2 6 -1 -2 -4 1 -1 -3 0 -3 -3 2 -2 -4 -2 -1 -1 -4 -3 -3 -1 -3 0 -1 -6 \n" +
+ "N -2 -1 6 1 -3 0 -1 -1 0 -4 -4 0 -3 -4 -3 0 0 -4 -3 -4 5 -4 0 -1 -6 \n" +
+ "D -2 -2 1 6 -4 -1 1 -2 -2 -4 -5 -1 -4 -4 -2 -1 -1 -6 -4 -4 5 -5 1 -1 -6 \n" +
+ "C -1 -4 -3 -4 9 -4 -5 -4 -4 -2 -2 -4 -2 -3 -4 -2 -1 -3 -3 -1 -4 -2 -4 -1 -6 \n" +
+ "Q -1 1 0 -1 -4 6 2 -2 1 -3 -3 1 0 -4 -2 0 -1 -3 -2 -3 0 -3 4 -1 -6 \n" +
+ "E -1 -1 -1 1 -5 2 6 -3 0 -4 -4 1 -2 -4 -2 0 -1 -4 -3 -3 1 -4 5 -1 -6 \n" +
+ "G 0 -3 -1 -2 -4 -2 -3 6 -3 -5 -4 -2 -4 -4 -3 -1 -2 -4 -4 -4 -1 -5 -3 -1 -6 \n" +
+ "H -2 0 0 -2 -4 1 0 -3 8 -4 -3 -1 -2 -2 -3 -1 -2 -3 2 -4 -1 -4 0 -1 -6 \n" +
+ "I -2 -3 -4 -4 -2 -3 -4 -5 -4 5 1 -3 1 -1 -4 -3 -1 -3 -2 3 -4 3 -4 -1 -6 \n" +
+ "L -2 -3 -4 -5 -2 -3 -4 -4 -3 1 4 -3 2 0 -3 -3 -2 -2 -2 1 -4 3 -3 -1 -6 \n" +
+ "K -1 2 0 -1 -4 1 1 -2 -1 -3 -3 5 -2 -4 -1 -1 -1 -4 -3 -3 -1 -3 1 -1 -6 \n" +
+ "M -1 -2 -3 -4 -2 0 -2 -4 -2 1 2 -2 6 0 -3 -2 -1 -2 -2 1 -3 2 -1 -1 -6 \n" +
+ "F -3 -4 -4 -4 -3 -4 -4 -4 -2 -1 0 -4 0 6 -4 -3 -2 0 3 -1 -4 0 -4 -1 -6 \n" +
+ "P -1 -2 -3 -2 -4 -2 -2 -3 -3 -4 -3 -1 -3 -4 8 -1 -2 -5 -4 -3 -2 -4 -2 -1 -6 \n" +
+ "S 1 -1 0 -1 -2 0 0 -1 -1 -3 -3 -1 -2 -3 -1 5 1 -4 -2 -2 0 -3 0 -1 -6 \n" +
+ "T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -2 -1 -1 -2 -2 1 5 -4 -2 0 -1 -1 -1 -1 -6 \n" +
+ "W -3 -4 -4 -6 -3 -3 -4 -4 -3 -3 -2 -4 -2 0 -5 -4 -4 11 2 -3 -5 -3 -3 -1 -6 \n" +
+ "Y -2 -3 -3 -4 -3 -2 -3 -4 2 -2 -2 -3 -2 3 -4 -2 -2 2 7 -2 -3 -2 -3 -1 -6 \n" +
+ "V 0 -3 -4 -4 -1 -3 -3 -4 -4 3 1 -3 1 -1 -3 -2 0 -3 -2 4 -4 2 -3 -1 -6 \n" +
+ "B -2 -1 5 5 -4 0 1 -1 -1 -4 -4 -1 -3 -4 -2 0 -1 -5 -3 -4 5 -4 0 -1 -6 \n" +
+ "J -2 -3 -4 -5 -2 -3 -4 -5 -4 3 3 -3 2 0 -4 -3 -1 -3 -2 2 -4 3 -3 -1 -6 \n" +
+ "Z -1 0 0 1 -4 4 5 -3 0 -4 -3 1 -1 -4 -2 0 -1 -3 -3 -3 0 -3 5 -1 -6 \n" +
+ "X -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -6 \n" +
+ "* -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 1 \n";
+
+ public final static String BLOSUM62_INPUT =
+ "A R N D C Q E G H I L K M F P S T W Y V B Z X *\n" +
+ "A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4 \n" +
+ "R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4 \n" +
+ "N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4 \n" +
+ "D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4 \n" +
+ "C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4 \n" +
+ "Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4 \n" +
+ "E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 \n" +
+ "G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4 \n" +
+ "H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4 \n" +
+ "I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4 \n" +
+ "L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4 \n" +
+ "K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4 \n" +
+ "M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4 \n" +
+ "F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4 \n" +
+ "P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4 \n" +
+ "S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4 \n" +
+ "T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4 \n" +
+ "W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4 \n" +
+ "Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4 \n" +
+ "V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4 \n" +
+ "B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4 \n" +
+ "Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 \n" +
+ "X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4 \n" +
+ "* -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1 \n";
+
+ public final static String BLOSUM50_INPUT =
+ "A R N D C Q E G H I L K M F P S T W Y V B Z X * \n" +
+ "A 5 -2 -1 -2 -1 -1 -1 0 -2 -1 -2 -1 -1 -3 -1 1 0 -3 -2 0 -2 -1 -1 -5 \n" +
+ "R -2 7 -1 -2 -4 1 0 -3 0 -4 -3 3 -2 -3 -3 -1 -1 -3 -1 -3 -1 0 -1 -5 \n" +
+ "N -1 -1 7 2 -2 0 0 0 1 -3 -4 0 -2 -4 -2 1 0 -4 -2 -3 4 0 -1 -5 \n" +
+ "D -2 -2 2 8 -4 0 2 -1 -1 -4 -4 -1 -4 -5 -1 0 -1 -5 -3 -4 5 1 -1 -5 \n" +
+ "C -1 -4 -2 -4 13 -3 -3 -3 -3 -2 -2 -3 -2 -2 -4 -1 -1 -5 -3 -1 -3 -3 -2 -5 \n" +
+ "Q -1 1 0 0 -3 7 2 -2 1 -3 -2 2 0 -4 -1 0 -1 -1 -1 -3 0 4 -1 -5 \n" +
+ "E -1 0 0 2 -3 2 6 -3 0 -4 -3 1 -2 -3 -1 -1 -1 -3 -2 -3 1 5 -1 -5 \n" +
+ "G 0 -3 0 -1 -3 -2 -3 8 -2 -4 -4 -2 -3 -4 -2 0 -2 -3 -3 -4 -1 -2 -2 -5 \n" +
+ "H -2 0 1 -1 -3 1 0 -2 10 -4 -3 0 -1 -1 -2 -1 -2 -3 2 -4 0 0 -1 -5 \n" +
+ "I -1 -4 -3 -4 -2 -3 -4 -4 -4 5 2 -3 2 0 -3 -3 -1 -3 -1 4 -4 -3 -1 -5 \n" +
+ "L -2 -3 -4 -4 -2 -2 -3 -4 -3 2 5 -3 3 1 -4 -3 -1 -2 -1 1 -4 -3 -1 -5 \n" +
+ "K -1 3 0 -1 -3 2 1 -2 0 -3 -3 6 -2 -4 -1 0 -1 -3 -2 -3 0 1 -1 -5 \n" +
+ "M -1 -2 -2 -4 -2 0 -2 -3 -1 2 3 -2 7 0 -3 -2 -1 -1 0 1 -3 -1 -1 -5 \n" +
+ "F -3 -3 -4 -5 -2 -4 -3 -4 -1 0 1 -4 0 8 -4 -3 -2 1 4 -1 -4 -4 -2 -5 \n" +
+ "P -1 -3 -2 -1 -4 -1 -1 -2 -2 -3 -4 -1 -3 -4 10 -1 -1 -4 -3 -3 -2 -1 -2 -5 \n" +
+ "S 1 -1 1 0 -1 0 -1 0 -1 -3 -3 0 -2 -3 -1 5 2 -4 -2 -2 0 0 -1 -5 \n" +
+ "T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 2 5 -3 -2 0 0 -1 0 -5 \n" +
+ "W -3 -3 -4 -5 -5 -1 -3 -3 -3 -3 -2 -3 -1 1 -4 -4 -3 15 2 -3 -5 -2 -3 -5 \n" +
+ "Y -2 -1 -2 -3 -3 -1 -2 -3 2 -1 -1 -2 0 4 -3 -2 -2 2 8 -1 -3 -2 -1 -5 \n" +
+ "V 0 -3 -3 -4 -1 -3 -3 -4 -4 4 1 -3 1 -1 -3 -2 0 -3 -1 5 -4 -3 -1 -5 \n" +
+ "B -2 -1 4 5 -3 0 1 -1 0 -4 -4 0 -3 -4 -2 0 0 -5 -3 -4 5 2 -1 -5 \n" +
+ "Z -1 0 0 1 -3 4 5 -2 0 -3 -3 1 -1 -4 -1 0 -1 -2 -2 -3 2 5 -1 -5 \n" +
+ "X -1 -1 -1 -1 -2 -1 -1 -2 -1 -1 -1 -1 -1 -2 -2 -1 0 -3 -1 -1 -1 -1 -1 -5 \n" +
+ "* -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 1 \n";
+
+ public final static String BLOSUM45_INPUT =
+ "A R N D C Q E G H I L K M F P S T W Y V B J Z X * \n" +
+ "A 5 -2 -1 -2 -1 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -2 -2 0 -1 -1 -1 -1 -5 \n" +
+ "R -2 7 0 -1 -3 1 0 -2 0 -3 -2 3 -1 -2 -2 -1 -1 -2 -1 -2 -1 -3 1 -1 -5 \n" +
+ "N -1 0 6 2 -2 0 0 0 1 -2 -3 0 -2 -2 -2 1 0 -4 -2 -3 5 -3 0 -1 -5 \n" +
+ "D -2 -1 2 7 -3 0 2 -1 0 -4 -3 0 -3 -4 -1 0 -1 -4 -2 -3 6 -3 1 -1 -5 \n" +
+ "C -1 -3 -2 -3 12 -3 -3 -3 -3 -3 -2 -3 -2 -2 -4 -1 -1 -5 -3 -1 -2 -2 -3 -1 -5 \n" +
+ "Q -1 1 0 0 -3 6 2 -2 1 -2 -2 1 0 -4 -1 0 -1 -2 -1 -3 0 -2 4 -1 -5 \n" +
+ "E -1 0 0 2 -3 2 6 -2 0 -3 -2 1 -2 -3 0 0 -1 -3 -2 -3 1 -3 5 -1 -5 \n" +
+ "G 0 -2 0 -1 -3 -2 -2 7 -2 -4 -3 -2 -2 -3 -2 0 -2 -2 -3 -3 -1 -4 -2 -1 -5 \n" +
+ "H -2 0 1 0 -3 1 0 -2 10 -3 -2 -1 0 -2 -2 -1 -2 -3 2 -3 0 -2 0 -1 -5 \n" +
+ "I -1 -3 -2 -4 -3 -2 -3 -4 -3 5 2 -3 2 0 -2 -2 -1 -2 0 3 -3 4 -3 -1 -5 \n" +
+ "L -1 -2 -3 -3 -2 -2 -2 -3 -2 2 5 -3 2 1 -3 -3 -1 -2 0 1 -3 4 -2 -1 -5 \n" +
+ "K -1 3 0 0 -3 1 1 -2 -1 -3 -3 5 -1 -3 -1 -1 -1 -2 -1 -2 0 -3 1 -1 -5 \n" +
+ "M -1 -1 -2 -3 -2 0 -2 -2 0 2 2 -1 6 0 -2 -2 -1 -2 0 1 -2 2 -1 -1 -5 \n" +
+ "F -2 -2 -2 -4 -2 -4 -3 -3 -2 0 1 -3 0 8 -3 -2 -1 1 3 0 -3 1 -3 -1 -5 \n" +
+ "P -1 -2 -2 -1 -4 -1 0 -2 -2 -2 -3 -1 -2 -3 9 -1 -1 -3 -3 -3 -2 -3 -1 -1 -5 \n" +
+ "S 1 -1 1 0 -1 0 0 0 -1 -2 -3 -1 -2 -2 -1 4 2 -4 -2 -1 0 -2 0 -1 -5 \n" +
+ "T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -1 -1 2 5 -3 -1 0 0 -1 -1 -1 -5 \n" +
+ "W -2 -2 -4 -4 -5 -2 -3 -2 -3 -2 -2 -2 -2 1 -3 -4 -3 15 3 -3 -4 -2 -2 -1 -5 \n" +
+ "Y -2 -1 -2 -2 -3 -1 -2 -3 2 0 0 -1 0 3 -3 -2 -1 3 8 -1 -2 0 -2 -1 -5 \n" +
+ "V 0 -2 -3 -3 -1 -3 -3 -3 -3 3 1 -2 1 0 -3 -1 0 -3 -1 5 -3 2 -3 -1 -5 \n" +
+ "B -1 -1 5 6 -2 0 1 -1 0 -3 -3 0 -2 -3 -2 0 0 -4 -2 -3 5 -3 1 -1 -5 \n" +
+ "J -1 -3 -3 -3 -2 -2 -3 -4 -2 4 4 -3 2 1 -3 -2 -1 -2 0 2 -3 4 -2 -1 -5 \n" +
+ "Z -1 1 0 1 -3 4 5 -2 0 -3 -2 1 -1 -3 -1 0 -1 -2 -2 -3 1 -2 5 -1 -5 \n" +
+ "X -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -5 \n" +
+ "* -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 1 \n";
+
+ /**
+ * constructor
+ */
+ public ProteinScoringMatrix() {
+ matrix = new int[128][128];
+ for (int[] row : matrix) {
+ Arrays.fill(row, 0, row.length, -20);
+ }
+ }
+
+ /**
+ * get the score for two letters
+ *
+ * @param a
+ * @param b
+ * @return score, or -20, if matrix not defined for (a,b)
+ */
+ public int getScore(byte a, byte b) {
+ return matrix[a][b];
+ }
+
+ /**
+ * creates a scoring matrix
+ *
+ * @param name
+ * @return protein scoring matrix
+ * @throws IOException
+ */
+ public static ProteinScoringMatrix create(String name) throws IOException {
+ return create(ScoringScheme.valueOf(name));
+ }
+
+ /**
+ * creates a scoring matrix
+ *
+ * @param which
+ * @return protein scoring matrix
+ * @throws IOException
+ */
+ public static ProteinScoringMatrix create(ScoringScheme which) throws IOException {
+ switch (which) {
+ case BLOSUM90:
+ return getBlosum90();
+ case BLOSUM80:
+ return getBlosum80();
+ case BLOSUM62:
+ return getBlosum62();
+ case BLOSUM50:
+ return getBlosum50();
+ case BLOSUM45:
+ return getBlosum45();
+ default:
+ throw new IOException("Unrecognized BLOSUM matrix: " + which);
+ }
+ }
+
+ /**
+ * get the blosum 90 matrix
+ *
+ * @return blosum 90
+ */
+ public static ProteinScoringMatrix getBlosum90() {
+ try {
+ if (BLOSUM90 == null) {
+ BLOSUM90 = new ProteinScoringMatrix();
+ BLOSUM90.load(new StringReader(BLOSUM90_INPUT));
+ }
+ return BLOSUM90;
+ } catch (Exception ex) {
+ throw new RuntimeException(ex);
+ }
+ }
+
+ /**
+ * get the blosum 80 matrix
+ *
+ * @return blosum 80
+ */
+ public static ProteinScoringMatrix getBlosum80() {
+ try {
+ if (BLOSUM80 == null) {
+ BLOSUM80 = new ProteinScoringMatrix();
+ BLOSUM80.load(new StringReader(BLOSUM80_INPUT));
+ }
+ return BLOSUM80;
+ } catch (Exception ex) {
+ throw new RuntimeException(ex);
+ }
+ }
+
+ /**
+ * get the blosum 62 matrix
+ *
+ * @return blosum 62
+ */
+ public static ProteinScoringMatrix getBlosum62() {
+ try {
+ if (BLOSUM62 == null) {
+ BLOSUM62 = new ProteinScoringMatrix();
+ BLOSUM62.load(new StringReader(BLOSUM62_INPUT));
+ }
+ return BLOSUM62;
+ } catch (Exception ex) {
+ throw new RuntimeException(ex);
+ }
+ }
+
+ /**
+ * get the blosum 50 matrix
+ *
+ * @return blosum 50
+ */
+ public static ProteinScoringMatrix getBlosum50() {
+ try {
+ if (BLOSUM50 == null) {
+ BLOSUM50 = new ProteinScoringMatrix();
+ BLOSUM50.load(new StringReader(BLOSUM50_INPUT));
+ }
+ return BLOSUM50;
+ } catch (Exception ex) {
+ throw new RuntimeException(ex);
+ }
+ }
+
+ /**
+ * get the blosum 45 matrix
+ *
+ * @return blosum 45
+ */
+ public static ProteinScoringMatrix getBlosum45() {
+ try {
+ if (BLOSUM45 == null) {
+ BLOSUM45 = new ProteinScoringMatrix();
+ BLOSUM45.load(new StringReader(BLOSUM45_INPUT));
+ }
+ return BLOSUM45;
+ } catch (Exception ex) {
+ throw new RuntimeException(ex);
+ }
+ }
+
+ /**
+ * load a matrix
+ *
+ * @param r0
+ * @throws java.io.IOException
+ */
+ public void load(Reader r0) throws IOException {
+ BufferedReader r = new BufferedReader(r0);
+
+ char[] mapPos2Char = null;
+ String aLine;
+ int cols = 0;
+
+ while ((aLine = r.readLine()) != null) {
+ aLine = aLine.trim();
+ if (aLine.length() == 0 || aLine.startsWith("#"))
+ continue;
+ if (mapPos2Char == null) // must be first line listing all letters
+ {
+ String[] tokens = aLine.split(" ");
+ cols = tokens.length;
+ if (tokens.length < 20)
+ throw new IOException("Expected >=20 tokens, got: " + tokens.length + " in line: " + aLine);
+ int count = 0;
+ mapPos2Char = new char[tokens.length];
+ for (String label : tokens) {
+ char c = Character.toUpperCase(label.charAt(0));
+ mapPos2Char[count++] = c;
+ }
+ } else // a definition line
+ {
+ String[] tokens = aLine.split(" ");
+ if (tokens.length != cols + 1)
+ throw new IOException("Expected " + (cols + 1) + " tokens, got: " + tokens.length + " in line: " + aLine);
+ char c = Character.toUpperCase(tokens[0].charAt(0));
+ for (int i = 1; i < tokens.length; i++) {
+ int value = Integer.parseInt(tokens[i]);
+ char d = mapPos2Char[i - 1];
+ matrix[c][d] = value;
+ matrix[c][Character.toLowerCase(d)] = value;
+ matrix[Character.toLowerCase(c)][d] = value;
+ matrix[Character.toLowerCase(c)][Character.toLowerCase(d)] = value;
+ }
+ }
+ }
+ }
+
+ @Override
+ public int[][] getMatrix() {
+ return matrix;
+ }
+}
diff --git a/src/malt/align/SimpleAligner4DNA.java b/src/malt/align/SimpleAligner4DNA.java
new file mode 100644
index 0000000..ca11b12
--- /dev/null
+++ b/src/malt/align/SimpleAligner4DNA.java
@@ -0,0 +1,222 @@
+/*
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package malt.align;
+
+import jloda.util.Basic;
+import jloda.util.Single;
+import megan.parsers.blast.BlastMode;
+import megan.util.BoyerMoore;
+
+import java.util.Iterator;
+
+/**
+ * convenience class for aligning a DNA query into a DNA reference
+ * Created by huson on 2/9/16.
+ */
+public class SimpleAligner4DNA {
+ public enum OverlapType {QuerySuffix2RefPrefix, QueryContainedInRef, QueryPrefix2RefSuffix, None} // what is query?
+
+ private final AlignerOptions alignerOptions;
+ private final BandedAligner bandedAligner;
+ private int minRawScore = 1;
+ private float minPercentIdentity = 0;
+
+ public SimpleAligner4DNA() {
+ alignerOptions = new AlignerOptions();
+ alignerOptions.setAlignmentType(AlignerOptions.AlignmentMode.SemiGlobal);
+ alignerOptions.setScoringMatrix(new DNAScoringMatrix(alignerOptions.getMatchScore(), alignerOptions.getMismatchScore()));
+ bandedAligner = new BandedAligner(alignerOptions, BlastMode.BlastN);
+ }
+
+ /**
+ * compute a semi-global alignment between the query and the reference
+ *
+ * @param query
+ * @param reference
+ * @param queryPos
+ * @param refPos
+ * @param seedLength
+ * @return true, if alignment found
+ */
+ public boolean computeAlignment(byte[] query, byte[] reference, int queryPos, int refPos, int seedLength) {
+ bandedAligner.computeAlignment(query, query.length, reference, reference.length, queryPos, refPos, seedLength);
+ return bandedAligner.getRawScore() >= minRawScore && (minPercentIdentity == 0 || bandedAligner.getPercentIdentity() >= minPercentIdentity);
+ }
+
+ /**
+ * set the parameters
+ *
+ * @param matchScore
+ * @param mismatchScore
+ * @param gapOpenPenality
+ * @param gapExtensionPenality
+ */
+ public void setAlignmentParameters(int matchScore, int mismatchScore, int gapOpenPenality, int gapExtensionPenality) {
+ alignerOptions.setScoringMatrix(new DNAScoringMatrix(matchScore, mismatchScore));
+ alignerOptions.setGapOpenPenalty(gapOpenPenality);
+ alignerOptions.setGapExtensionPenalty(gapExtensionPenality);
+ }
+
+ /**
+ * get the min score to be attained
+ *
+ * @return
+ */
+ public int getMinRawScore() {
+ return minRawScore;
+ }
+
+ /**
+ * set the min raw score
+ *
+ * @param minRawScore
+ */
+ public void setMinRawScore(int minRawScore) {
+ this.minRawScore = minRawScore;
+ }
+
+ /**
+ * get the min percent identity
+ *
+ * @return
+ */
+ public float getMinPercentIdentity() {
+ return minPercentIdentity;
+ }
+
+ /**
+ * set the min identity
+ *
+ * @param minPercentIdentity
+ */
+ public void setMinPercentIdentity(float minPercentIdentity) {
+ this.minPercentIdentity = minPercentIdentity;
+ }
+
+ /**
+ * get simple alignment text
+ *
+ * @return as string
+ */
+ public String getAlignmentString() {
+ return Basic.toString(bandedAligner.getAlignmentSimpleText());
+ }
+
+ /**
+ * gets a position of the query in the reference, or reference.length if not contained
+ *
+ * @param query
+ * @param reference
+ * @param queryMustBeContained
+ * @return pos or reference.length
+ */
+ public int getPositionInReference(byte[] query, byte[] reference, boolean queryMustBeContained) {
+ if (queryMustBeContained && getMinPercentIdentity() >= 100) {
+ return (new BoyerMoore(query, 0, query.length, 127)).search(reference);
+ }
+
+ int bestQueryPos = 0;
+ int bestRefPos = 0;
+ int bestScore = 0;
+
+ final int k = Math.max(10, (int) (100.0 / (100.0 - minPercentIdentity + 1))); // determine smallest exact match that must be present
+ for (int queryPos = 0; queryPos < query.length - k + 1; queryPos += k) {
+ BoyerMoore boyerMoore = new BoyerMoore(query, queryPos, k, 127);
+ for (Iterator<Integer> it = boyerMoore.iterator(reference); it.hasNext(); ) {
+ int refPos = it.next();
+ if ((!queryMustBeContained && computeAlignment(query, reference, queryPos, refPos, k))
+ || (queryMustBeContained && refPos <= reference.length - query.length && computeAlignment(query, reference, queryPos, refPos, k) && bandedAligner.getAlignmentLength() >= query.length)) {
+ {
+ if (bandedAligner.getRawScore() > bestScore) {
+ bestScore = bandedAligner.getRawScore();
+ bestQueryPos = queryPos;
+ bestRefPos = refPos;
+ }
+ }
+ }
+ }
+ }
+ if (bestScore > 0) {
+ computeAlignment(query, reference, bestQueryPos, bestRefPos, k);
+ return bestRefPos;
+ }
+ return reference.length;
+ }
+
+ /**
+ * gets the overlap type of the query in the reference
+ *
+ * @param query
+ * @param reference
+ * @param overlap length
+ * @return type
+ */
+ public OverlapType getOverlap(byte[] query, byte[] reference, Single<Integer> overlap) {
+ if (getPositionInReference(query, reference, false) != reference.length) {
+ if (bandedAligner.getStartQuery() > 0 && bandedAligner.getStartReference() == 0 && bandedAligner.getAlignmentLength() < reference.length) {
+ overlap.set(query.length - bandedAligner.getStartQuery());
+ return OverlapType.QuerySuffix2RefPrefix;
+ } else if (bandedAligner.getStartQuery() == 0 && bandedAligner.getStartReference() > 0 && bandedAligner.getAlignmentLength() < query.length) {
+ overlap.set(bandedAligner.getEndQuery());
+ return OverlapType.QueryPrefix2RefSuffix;
+ } else if (bandedAligner.getStartQuery() == 0 && bandedAligner.getEndQuery() == query.length) {
+ overlap.set(query.length);
+ return OverlapType.QueryContainedInRef;
+ }
+ }
+ overlap.set(0);
+ return OverlapType.None;
+ }
+
+ /**
+ * get the percent identity of the last alignment
+ *
+ * @return percent identity
+ */
+ public float getPercentIdentity() {
+ return bandedAligner.getPercentIdentity();
+ }
+
+ /**
+ * test this class
+ *
+ * @param args
+ */
+ public static void main(String[] args) {
+ final SimpleAligner4DNA simpleAligner4DNA = new SimpleAligner4DNA();
+
+ byte[] reference = "acttgcatcacgactacactgacacggctctttacatcggtatatcgctacacagtcacagactacacgtcacagcattt".getBytes();
+
+ //byte[] query="gactgtgtagcgatattaccgatgtaaagagcc".getBytes();
+ String[] queries = {"ggtatatcgctacacagtcacagactacacgtcacagcataaaaaaaa",
+ "aaaaaaaaaaacttgcatcacgactacactgacacggctctttacatc"
+ , "tatatcgctacacagtcacagactacacgtcacagc"
+ };
+
+ simpleAligner4DNA.setMinPercentIdentity(90);
+
+ for (String query : queries) {
+ final Single<Integer> overlap = new Single<>(0);
+ System.err.println("Overlap type: " + simpleAligner4DNA.getOverlap(query.getBytes(), reference, overlap) + ", length=" + overlap);
+ System.err.println(simpleAligner4DNA.getAlignmentString());
+ }
+ }
+
+}
diff --git a/src/malt/analysis/OrganismsProfile.java b/src/malt/analysis/OrganismsProfile.java
new file mode 100644
index 0000000..401a0aa
--- /dev/null
+++ b/src/malt/analysis/OrganismsProfile.java
@@ -0,0 +1,205 @@
+/**
+ * OrganismsProfile.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.analysis;
+
+import malt.data.ReadMatch;
+import malt.mapping.Mapping;
+import malt.util.Utilities;
+import megan.algorithms.LCAAlgorithm;
+
+import java.util.*;
+
+/**
+ * an organism profile reports organisms, contained genes and contained reads.
+ * THe weighted LCA is used to determine organisms. For each read, genes are ranked by reference weight
+ * Daniel Huson, 8.2014
+ */
+public class OrganismsProfile {
+ protected final Mapping taxonMapping;
+ protected final Map<Integer, Integer> refIndex2weight = new HashMap<>(100000);
+ protected final QueryItem head = new QueryItem(); // head of query item list
+ protected QueryItem tail = head; // tail of query item list
+
+ protected String name;
+
+ protected int totalReads;
+
+ private final Set<Integer> refIdAlreadySeenInAddRead = new HashSet<>(2000, 0.9f);
+
+ private final LCAAlgorithm lcaAlgorithm = new LCAAlgorithm();
+
+ protected int[] taxIds = new int[1000];
+ private int[] refIds = new int[1000];
+
+ protected double topPercentFactor = 0.9;
+
+ /**
+ * constructor
+ */
+ public OrganismsProfile(final Mapping taxonMapping) {
+ this.taxonMapping = taxonMapping;
+ }
+
+ /**
+ * add a read to the organism profile
+ *
+ * @param queryHeader
+ * @param numberOfMatches
+ * @param readMatches
+ */
+ public void addRead(final byte[] queryHeader, final int numberOfMatches, final ReadMatch[] readMatches) {
+ final byte[] queryName = Utilities.getFirstWordSkipLeadingGreaterSign(queryHeader);
+
+ // increment reference weights using naive LCA algorithm
+ if (numberOfMatches == 0) { // no hits
+ addNoHitsRead();
+ } else if (numberOfMatches == 1) { // exactly one hit, will use this
+ ReadMatch match = readMatches[0];
+ int refId = match.getReferenceId();
+ Integer weight = refIndex2weight.get(refId); // increment reference sequence weight
+ if (weight == null)
+ refIndex2weight.put(refId, 1);
+ else
+ refIndex2weight.put(refId, weight + 1);
+ totalReads++;
+ tail.next = new QueryItem(queryName, numberOfMatches, readMatches);
+ tail = tail.next;
+ } else { // more than one hit.
+ // For each read, we store the set of references matched and after processing all reads in this way we
+ // then apply the weighted LCA to all such sets of references
+ if (refIds.length < numberOfMatches) { // resize if necessary
+ int newSize = Math.max(2 * refIds.length, numberOfMatches);
+ refIds = new int[newSize];
+ taxIds = new int[newSize];
+ }
+
+ final double topScore = readMatches[0].getBitScore();
+ final double minScore = Math.min(topScore, topPercentFactor * topScore);
+ int numberOfMatchesToUse = 0;
+ refIdAlreadySeenInAddRead.clear();
+ for (int i = 0; i < numberOfMatches; i++) { // consider all matches in descending order of bit score
+ ReadMatch match = readMatches[i];
+ if (match.getBitScore() < minScore)
+ break;
+ final int refId = match.getReferenceId();
+ final int taxId = taxonMapping.get(refId);
+ if (taxId > 0 && numberOfMatchesToUse < refIds.length) {
+ if (!refIdAlreadySeenInAddRead.contains(refId)) { // don't use more than one match to the same reference
+ refIdAlreadySeenInAddRead.add(refId);
+ taxIds[numberOfMatchesToUse] = taxId;
+ refIds[numberOfMatchesToUse++] = refId;
+ }
+ }
+ }
+ if (numberOfMatchesToUse == 0) {
+ addNoHitsRead(); // should never happen...
+ } else if (numberOfMatchesToUse == 1) { // only has one good match, increment reference weight
+ final int refId = refIds[0];
+ final Integer weight = refIndex2weight.get(refId); // increment reference sequence weight
+ if (weight == null)
+ refIndex2weight.put(refId, 1);
+ else
+ refIndex2weight.put(refId, weight + 1);
+ totalReads++;
+ tail.next = new QueryItem(queryName, numberOfMatchesToUse, readMatches);
+ tail = tail.next;
+ } else { // compute naive LCA. Then increment weight for any reference whose taxon matches the LCA
+ final int lca = lcaAlgorithm.computeNaiveLCA(taxIds, numberOfMatchesToUse);
+ if (lca > 0) {
+ for (int i = 0; i < numberOfMatchesToUse; i++) {
+ if (taxIds[i] == lca) {
+ int refId = refIds[i];
+ Integer weight = refIndex2weight.get(refId);
+ if (weight == null)
+ refIndex2weight.put(refId, 1);
+ else
+ refIndex2weight.put(refId, weight + 1);
+ }
+ }
+ totalReads++;
+ tail.next = new QueryItem(queryName, numberOfMatchesToUse, readMatches);
+ tail = tail.next;
+ } else
+ addNoHitsRead();
+ }
+ }
+ }
+
+ /**
+ * skip a read
+ */
+ public void addNoHitsRead() {
+ totalReads++;
+ }
+
+ /**
+ * returns getLetterCodeIterator over all query items
+ *
+ * @return query item getLetterCodeIterator
+ */
+ public Iterator<QueryItem> iterator() {
+ return new Iterator<QueryItem>() {
+ private QueryItem item = head.next;
+
+ public boolean hasNext() {
+ return item != null;
+ }
+
+ public QueryItem next() {
+ QueryItem result = item;
+ item = item.next;
+ return result;
+ }
+
+ public void remove() {
+ }
+ };
+ }
+
+ public double getTopPercent() {
+ return 100 * (1.0 - topPercentFactor);
+ }
+
+ public void setTopPercent(double topPercent) {
+ this.topPercentFactor = 1.0 - topPercent / 100.0;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public long getTotalReads() {
+ return totalReads;
+ }
+
+ protected LCAAlgorithm getLcaAlgorithm() {
+ return lcaAlgorithm;
+ }
+
+ /**
+ * finish the analysis
+ */
+ public void finishAnalysis() {
+ }
+}
diff --git a/src/malt/analysis/OrganismsProfileMerger.java b/src/malt/analysis/OrganismsProfileMerger.java
new file mode 100644
index 0000000..de30847
--- /dev/null
+++ b/src/malt/analysis/OrganismsProfileMerger.java
@@ -0,0 +1,301 @@
+/**
+ * OrganismsProfileMerger.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.analysis;
+
+import jloda.util.Basic;
+import jloda.util.ProgressPercentage;
+import malt.genes.GeneItem;
+import malt.genes.GeneTableAccess;
+import malt.io.xml.*;
+import malt.mapping.Mapping;
+import malt.util.TaxonomyUtilities;
+import megan.algorithms.LCAAlgorithm;
+import megan.classification.Classification;
+import megan.classification.ClassificationManager;
+import megan.classification.data.Name2IdMap;
+
+import javax.xml.bind.JAXBException;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.util.*;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+/**
+ * merges all thread-specific organism profiles and generates output file
+ * Daniel Huson, 8.2014
+ */
+public class OrganismsProfileMerger extends OrganismsProfile {
+ private final Map<Integer, OrganismItem> taxonId2OrganismItem;
+
+ private final GeneTableAccess geneTableAccess;
+
+ private final int numberOfSyncObjects = 1024;
+ private final Object[] syncObjects = new Object[numberOfSyncObjects]; // use lots of objects to synchronize on so that threads don't in each others way
+
+ /**
+ * constructor
+ *
+ * @param taxonMapping
+ */
+ public OrganismsProfileMerger(Mapping taxonMapping, GeneTableAccess geneTableAccess) {
+ super(taxonMapping);
+ this.geneTableAccess = geneTableAccess;
+ taxonId2OrganismItem = new HashMap<>(10000, 1f);
+
+ // create the synchronization objects
+ for (int i = 0; i < numberOfSyncObjects; i++)
+ syncObjects[i] = new Object();
+ }
+
+ /**
+ * merge all thread-specific profiles and build final organism profiles
+ *
+ * @param profiles
+ */
+ public void mergeAndCompute(final OrganismsProfile[] profiles) {
+ final int maxGenesPerRead = 5;
+ final double proportionOfWeightToCover = 0.8; // todo: makes these options user accessible
+ // merge all refIndex to weight maps:
+ final Map<Integer, Integer> refIndex2weight = new HashMap<>(100000);
+ for (OrganismsProfile current : profiles) {
+ // merge refIndex2weight
+ for (Map.Entry<Integer, Integer> entry : current.refIndex2weight.entrySet()) {
+ if (entry.getValue() != null) {
+ Integer count = refIndex2weight.get(entry.getKey());
+ if (count == null)
+ refIndex2weight.put(entry.getKey(), entry.getValue());
+ else
+ refIndex2weight.put(entry.getKey(), count + entry.getValue());
+ }
+ }
+ totalReads += current.totalReads;
+ }
+
+ final ProgressPercentage progress = new ProgressPercentage("Computing organism profiles...", totalReads);
+
+ // one thread for each profile (as we can assume that each profile was computed in a separate thread)
+ int numberOfThreads = profiles.length;
+ final ExecutorService executor = Executors.newFixedThreadPool(numberOfThreads);
+ final CountDownLatch countDownLatch = new CountDownLatch(numberOfThreads);
+
+ // launch the worker threads
+ for (int thread = 0; thread < numberOfThreads; thread++) {
+ final int threadNumber = thread;
+ executor.execute(new Runnable() {
+ public void run() {
+ try {
+ final OrganismsProfile profile = profiles[threadNumber];
+ final LCAAlgorithm lcaAlgorithm = profiles[threadNumber].getLcaAlgorithm();
+ final HashMap<Integer, Integer> tax2weight = new HashMap<>(10000, 1f);
+
+ final GeneItem[] genes = new GeneItem[maxGenesPerRead];
+
+ // Consider all queries:
+ for (Iterator<QueryItem> it = profile.iterator(); it.hasNext(); ) {
+ QueryItem queryItem = it.next();
+ // prepare for weighted-LCA algorithm:
+ tax2weight.clear();
+ int numberOfTaxa = 0;
+ for (int i = 0; i < queryItem.readMatchItems.length; i++) {
+ ReadMatchItem readMatchItem = queryItem.readMatchItems[i];
+ int taxId = taxonMapping.get(readMatchItem.refIndex);
+ if (taxId > 0) {
+ taxIds[numberOfTaxa++] = taxId;
+ Integer add = refIndex2weight.get(readMatchItem.refIndex);
+ if (add == null)
+ add = 1;
+ Integer weight = tax2weight.get(taxId);
+ if (weight == null || weight < add) // keep best weight seen for this taxon
+ tax2weight.put(taxId, add);
+ }
+ }
+ /*
+ int totalWeight = 0; // compute total weight
+ for (Integer w : tax2weight.values()) {
+ totalWeight += w;
+ }
+ */
+
+ // apply weighted LCA algorithm:
+ final int lca = lcaAlgorithm.computeWeightedLCA(tax2weight, proportionOfWeightToCover);
+
+ // only report those reads that have a taxon assignment:
+ if (lca > 0) {
+ int numberOfGenes = geneTableAccess.getGenes(refIndex2weight, queryItem.readMatchItems, genes);
+
+ synchronized (syncObjects[Math.abs(lca) % numberOfSyncObjects]) {
+ OrganismItem organismItem = taxonId2OrganismItem.get(lca);
+ if (organismItem == null) {
+ organismItem = new OrganismItem();
+ taxonId2OrganismItem.put(lca, organismItem);
+ }
+ if (numberOfGenes > 0)
+ organismItem.genes.addAll(Arrays.asList(genes).subList(0, numberOfGenes));
+ organismItem.queryNames.add(queryItem.queryName);
+ }
+ }
+ progress.incrementProgress();
+ }
+ } catch (Exception ex)
+
+ {
+ Basic.caught(ex);
+ System.exit(1); // just die...
+ } finally {
+ countDownLatch.countDown();
+ }
+ }
+ });
+ }
+
+ try {
+ countDownLatch.await(); // await completion of threads
+ } catch (InterruptedException e) {
+ Basic.caught(e);
+ } finally {
+ // shut down threads:
+ executor.shutdownNow();
+ }
+
+ progress.close();
+ }
+
+ private class OrganismItem {
+ final List<GeneItem> genes;
+ final List<byte[]> queryNames;
+
+ OrganismItem() {
+ genes = new LinkedList<>();
+ queryNames = new LinkedList<>();
+ }
+
+ }
+
+ /**
+ * write the profile to a file
+ *
+ * @param outs
+ */
+ public void write(OutputStream outs) throws JAXBException, IOException {
+ // setup xml report
+ ReportType report = generateXMLReport();
+ // write to stream
+ writeReport(outs, report);
+ }
+
+ /**
+ * generates the XML report
+ *
+ * @return XML report
+ */
+ private ReportType generateXMLReport() {
+ ReportType report = new ReportType();
+ report.setOrganisms(new OrganismsType());
+ report.setDataset(new DatasetType());
+ if (name != null)
+ report.getDataset().setDatasetName(name);
+
+ int countOrganisms = 0;
+ long countGenes = 0;
+ long countReads = 0;
+
+ final Name2IdMap name2IdMap = ClassificationManager.get(Classification.Taxonomy, false).getName2IdMap();
+
+ for (Integer taxId : taxonId2OrganismItem.keySet()) {
+ OrganismItem organismItem = taxonId2OrganismItem.get(taxId);
+ if (organismItem != null) {
+ countOrganisms++;
+ final OrganismType organism = new OrganismType();
+ report.getOrganisms().getOrganism().add(organism);
+ organism.setTaxonomy(new Taxonomy());
+
+ organism.setOrganismName(name2IdMap.get(taxId));
+ String genus = TaxonomyUtilities.getContainingGenus(taxId);
+ if (genus != null)
+ organism.setGenus(genus);
+ final String species = TaxonomyUtilities.getContainingSpecies(taxId);
+ if (species != null)
+ organism.setSpecies(species);
+ final String strain = TaxonomyUtilities.getStrain(taxId);
+ if (strain != null)
+ organism.setStrain(strain);
+ organism.setOrganismName(name2IdMap.get(taxId));
+ organism.getTaxonomy().setTaxonId(BigInteger.valueOf(taxId));
+ organism.getTaxonomy().setValue(TaxonomyUtilities.getPath(taxId));
+ organism.getTaxonomy().setValue(TaxonomyUtilities.getPath(taxId));
+
+ organism.setRelativeAmount(new RelativeAmount());
+ organism.getRelativeAmount().setCount(BigInteger.valueOf(organismItem.queryNames.size()));
+ organism.getRelativeAmount().setValue(BigDecimal.valueOf(100.0 * (double) organismItem.queryNames.size() / (double) totalReads));
+
+ // add all genes to organism
+ organism.setGenes(new GenesType());
+ final Set<String> seen = new HashSet<>();
+ for (GeneItem geneItem : organismItem.genes) {
+ if (geneItem.getGeneName() != null) {
+ String geneName = Basic.toString(geneItem.getGeneName());
+ if (!seen.contains(geneName)) {
+ seen.add(geneName);
+ GeneType geneType = new GeneType();
+ geneType.setValue(geneName);
+ if (geneItem.getGiNumber() != 0)
+ geneType.setGi(BigInteger.valueOf(geneItem.getGiNumber()));
+ if (geneItem.getProduct() != null)
+ geneType.setProduct(Basic.toString(geneItem.getProduct()));
+ if (geneItem.getProteinId() != null)
+ geneType.setProteinId(Basic.toString(geneItem.getProteinId()));
+ organism.getGenes().getGene().add(geneType);
+ countGenes++;
+ }
+ }
+ }
+
+ // add all reads to organism
+ organism.setReads(new ReadsType());
+ for (byte[] queryName : organismItem.queryNames) {
+ organism.getReads().getSequence().add(Basic.toString(queryName));
+ }
+ countReads += organismItem.queryNames.size();
+ }
+ }
+ System.err.println("Organisms: " + countOrganisms + " genes: " + countGenes + " reads: " + countReads);
+ return report;
+ }
+
+ /**
+ * write the report
+ *
+ * @param outs
+ * @param report
+ * @throws javax.xml.bind.JAXBException
+ * @throws java.io.FileNotFoundException
+ */
+ private void writeReport(OutputStream outs, ReportType report) throws JAXBException, IOException {
+ javax.xml.bind.JAXBContext jaxbContext = javax.xml.bind.JAXBContext.newInstance("malt.io.xml");
+ // create a Marshaller and do marshal
+ javax.xml.bind.Marshaller marshaller = jaxbContext.createMarshaller();
+ marshaller.setProperty(javax.xml.bind.Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE);
+ marshaller.marshal(report, outs);
+ }
+}
diff --git a/src/malt/analysis/QueryItem.java b/src/malt/analysis/QueryItem.java
new file mode 100644
index 0000000..cdbe450
--- /dev/null
+++ b/src/malt/analysis/QueryItem.java
@@ -0,0 +1,50 @@
+/**
+ * QueryItem.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.analysis;
+
+import malt.data.ReadMatch;
+
+/**
+ * a single link in the linked list of queries
+ * Daniel Huson, 8.2014
+ */
+public class QueryItem {
+ final byte[] queryName;
+ final ReadMatchItem[] readMatchItems;
+ QueryItem next;
+
+ public QueryItem() {
+ queryName = null;
+ readMatchItems = null;
+ }
+
+ public QueryItem(final byte[] queryName, final int length, final ReadMatch[] readMatches) {
+ this.queryName = queryName;
+ this.readMatchItems = new ReadMatchItem[length];
+ for (int i = 0; i < length; i++) {
+ ReadMatch readMatch = readMatches[i];
+ this.readMatchItems[i] = new ReadMatchItem(readMatch.getReferenceId(), (float) readMatch.getBitScore(), readMatch.getStartRef(), readMatch.getEndRef());
+ }
+ }
+
+ public ReadMatchItem[] getReadMatchItems() {
+ return readMatchItems;
+ }
+}
diff --git a/src/malt/analysis/ReadMatchItem.java b/src/malt/analysis/ReadMatchItem.java
new file mode 100644
index 0000000..90290e9
--- /dev/null
+++ b/src/malt/analysis/ReadMatchItem.java
@@ -0,0 +1,38 @@
+/**
+ * ReadMatchItem.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.analysis;
+
+/**
+ * a single read match
+ * Daniel Huson, 8.2014
+ */
+public class ReadMatchItem {
+ final public int refIndex;
+ final public float score;
+ final public int refStart;
+ final public int refEnd;
+
+ public ReadMatchItem(int refIndex, float score, int refStart, int refEnd) {
+ this.refIndex = refIndex;
+ this.score = score;
+ this.refStart = refStart;
+ this.refEnd = refEnd;
+ }
+}
diff --git a/src/malt/data/BuildRow.java b/src/malt/data/BuildRow.java
new file mode 100644
index 0000000..253ad5f
--- /dev/null
+++ b/src/malt/data/BuildRow.java
@@ -0,0 +1,125 @@
+/**
+ * BuildRow.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.data;
+
+/**
+ * a row of numbers that are stored in a larger array.
+ * Daniel Huson, 8.2014
+ */
+
+public class BuildRow {
+ private int size;
+ private int offset;
+ private int[] containingArray;
+ private final int[] pair = new int[]{0, 0};
+
+ /**
+ * constructor
+ */
+ public BuildRow() {
+ }
+
+ /**
+ * Set the row. array[offset] must contain size, i.e. the number of integers to be used
+ * array[offset+1]... array[offset+size-1] are the numbers
+ *
+ * @param array array containing size followed by entries
+ * @param offset location of of size entry in array
+ */
+ public void set(int[] array, int offset) {
+ this.size = array[offset];
+ this.containingArray = array;
+ this.offset = offset + 1;
+ }
+
+ /**
+ * setting a single pair of numbers
+ *
+ * @param firstNumber
+ * @param secondNumber
+ */
+ public void setPair(int firstNumber, int secondNumber) {
+ size = 2;
+ offset = 0;
+ pair[0] = firstNumber;
+ pair[1] = secondNumber;
+ containingArray = pair;
+ }
+
+ /**
+ * set to empty
+ */
+ public void setEmpty() {
+ size = 0;
+ }
+
+ /**
+ * gets the number of int in this row
+ *
+ * @return size
+ */
+ public int size() {
+ return size;
+ }
+
+ /**
+ * use this to access numbers 0,..,size-1
+ *
+ * @param index
+ * @return item
+ */
+ public int get(int index) {
+ return containingArray[offset + index];
+ }
+
+ /**
+ * get array that contains numbers
+ *
+ * @return full row
+ */
+ public int[] getContainingArray() {
+ return containingArray;
+ }
+
+ /**
+ * get offset at which numbers start (position of size entry plus 1)
+ *
+ * @return offset
+ */
+ public int getOffset() {
+ return offset;
+ }
+
+ /**
+ * get string representation
+ *
+ * @return
+ */
+ public String toString() {
+ if (size > 0) {
+ StringBuilder buf = new StringBuilder();
+ buf.append("(").append(size()).append("): ");
+ for (int i = 0; i < size(); i++)
+ buf.append(" ").append(get(i));
+ return buf.toString();
+ } else
+ return "null";
+ }
+}
diff --git a/src/malt/data/DNA5.java b/src/malt/data/DNA5.java
new file mode 100644
index 0000000..404a1b9
--- /dev/null
+++ b/src/malt/data/DNA5.java
@@ -0,0 +1,236 @@
+/**
+ * DNA5.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.data;
+
+/**
+ * DNA characters
+ * Daniel Huson, 8.2014
+ */
+public class DNA5 implements IAlphabet {
+
+ final static private byte[] normalizedLetters = {
+ 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
+ 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
+ 'N', '-', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'A',
+ 'N', 'C', 'N', 'N', 'N', 'G', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'T', 'T', 'N', 'N',
+ 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'A', 'N', 'C', 'N', 'N', 'N', 'G', 'N', 'N', 'N', 'N', 'N', 'N',
+ 'N', 'N', 'N', 'N', 'N', 'N', 'T', 'T', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N'
+ };
+
+ final static private byte[] normalizedComplement = {
+ 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
+ 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
+ 'N', '-', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'T',
+ 'N', 'G', 'N', 'N', 'N', 'C', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'A', 'A', 'N', 'N',
+ 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'T', 'N', 'G', 'N', 'N', 'N', 'C', 'N', 'N', 'N', 'N', 'N', 'N',
+ 'N', 'N', 'N', 'N', 'N', 'N', 'A', 'A', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N'
+ };
+
+ private static DNA5 instance = new DNA5();
+
+ /**
+ * return an instance
+ *
+ * @return instance
+ */
+ public static DNA5 getInstance() {
+ return instance;
+ }
+
+ /**
+ * maps letter to 'A', 'C', 'G', 'T' or 'N'
+ *
+ * @param letter
+ * @return
+ */
+ public byte getNormalized(byte letter) {
+ return DNA5.normalizedLetters[letter];
+ }
+
+ /**
+ * normalize a sequence
+ * @param sequence
+ */
+ public void normalize(byte[] sequence) {
+ for (int i = 0; i < sequence.length; i++) {
+ sequence[i] = getNormalized(sequence[i]);
+ }
+ }
+
+ /**
+ * get complement of base
+ *
+ * @param letter
+ * @return
+ */
+ public byte getBaseComplement(byte letter) {
+ return normalizedComplement[letter];
+ }
+
+ /**
+ * do letters a and b correspond to the same base?
+ *
+ * @param a
+ * @param b
+ * @return true, if equalOverShorterOfBoth bases
+ */
+ public boolean equal(byte a, byte b) {
+ return normalizedLetters[a] == normalizedLetters[b];
+ }
+
+ /**
+ * do strings a and b correspond to the same DNA sequences?
+ *
+ * @param a
+ * @param b
+ * @return true, if equalOverShorterOfBoth DNA sequences
+ */
+ public boolean equal(byte[] a, byte[] b) {
+ if (a.length != b.length)
+ return false;
+ for (int i = 0; i < a.length; i++)
+ if (normalizedLetters[a[i]] != normalizedLetters[b[i]])
+ return false;
+ return true;
+ }
+
+
+ /**
+ * gets reverse complement of a DNA sequence
+ *
+ * @param sequence
+ * @return reverse complement
+ */
+ public byte[] getReverseComplement(byte[] sequence) {
+ byte[] result = new byte[sequence.length];
+ for (int i = 0; i < sequence.length; i++) {
+ result[i] = normalizedComplement[sequence[sequence.length - 1 - i]];
+ }
+ return result;
+ }
+
+ /**
+ * gets reverse complement of a DNA sequence
+ *
+ * @param sequence
+ * @param length
+ * @param reverseComplement
+ */
+ public void getReverseComplement(byte[] sequence, int length, byte[] reverseComplement) {
+ for (int i = 0; i < length; i++) {
+ reverseComplement[i] = normalizedComplement[sequence[length - 1 - i]];
+ }
+ }
+
+ /**
+ * gets reverse, but not complement, of a DNA sequence
+ *
+ * @param sequence
+ * @param length
+ * @param reverse
+ */
+ public void getReverseNotComplement(byte[] sequence, int length, byte[] reverse) {
+ for (int i = 0; i < length; i++) {
+ reverse[i] = sequence[length - 1 - i];
+ }
+ }
+
+ /**
+ * is this a protein alphabet?
+ *
+ * @return true, if protein
+ */
+ public boolean isProtein() {
+ return false;
+ }
+
+ /**
+ * is this a DNA alphabet?
+ *
+ * @return true, if DNA
+ */
+ public boolean isDNA() {
+ return true;
+ }
+
+ @Override
+ public String getName() {
+ return "DNA";
+ }
+
+ /**
+ * reverse complement in place
+ *
+ * @param bytes
+ */
+ public void reverseComplement(byte[] bytes) {
+ int top = (bytes.length + 1) / 2;
+ for (int i = 0; i < top; i++) {
+ int j = bytes.length - (i + 1);
+ byte tmp = bytes[i];
+ bytes[i] = getBaseComplement(bytes[j]);
+ bytes[j] = getBaseComplement(tmp);
+ }
+ }
+
+ /**
+ * reverse (but no complement) in place
+ *
+ * @param bytes
+ */
+ public void reverse(byte[] bytes) {
+ int top = bytes.length / 2;
+ for (int i = 0; i < top; i++) {
+ int j = bytes.length - (i + 1);
+ byte tmp = bytes[i];
+ bytes[i] = bytes[j];
+ bytes[j] = tmp;
+ }
+ }
+
+ /**
+ * size
+ *
+ * @return size
+ */
+ public int size() {
+ return 5;
+ }
+
+ /**
+ * a DNA seed is good it does not contain an N and contains at least two different letters
+ *
+ * @param word
+ * @param length
+ * @return
+ */
+ @Override
+ public boolean isGoodSeed(byte[] word, int length) {
+ byte a = word[0];
+ byte b = 0;
+ for (int i = 0; i < length; i++) {
+ if (word[i] == 'N')
+ return false;
+ else if (b == 0 && word[i] != a)
+ b = word[i];
+ }
+ return b != 0;
+ }
+}
diff --git a/src/malt/data/IAlphabet.java b/src/malt/data/IAlphabet.java
new file mode 100644
index 0000000..61d00f9
--- /dev/null
+++ b/src/malt/data/IAlphabet.java
@@ -0,0 +1,80 @@
+/**
+ * IAlphabet.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.data;
+
+/**
+ * DNA or protein alphabet
+ * Daniel Huson, 8.2014
+ */
+public interface IAlphabet extends INormalizer {
+ /**
+ * maps letter to normalized base or amino acid
+ *
+ * @param letter
+ * @return
+ */
+ byte getNormalized(byte letter);
+
+ /**
+ * do letters a and b correspond to the same base or amino acid?
+ *
+ * @param a
+ * @param b
+ * @return true, if equalOverShorterOfBoth bases
+ */
+ boolean equal(byte a, byte b);
+
+ /**
+ * is this a protein alphabet?
+ *
+ * @return true, if protein
+ */
+ boolean isProtein();
+
+ /**
+ * is this a DNA alphabet?
+ *
+ * @return true, if DNA
+ */
+ boolean isDNA();
+
+ /**
+ * gets the name of this alphabet
+ *
+ * @return name
+ */
+ String getName();
+
+ /**
+ * get the number of different letters
+ *
+ * @return size
+ */
+ int size();
+
+ /**
+ * is this word a good seed?
+ *
+ * @param word
+ * @param length
+ * @return true, if good
+ */
+ boolean isGoodSeed(byte[] word, int length);
+}
diff --git a/src/malt/data/INormalizer.java b/src/malt/data/INormalizer.java
new file mode 100644
index 0000000..a10cb63
--- /dev/null
+++ b/src/malt/data/INormalizer.java
@@ -0,0 +1,34 @@
+/**
+ * INormalizer.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.data;
+
+/**
+ * normalization interface
+ * Daniel Huson, 10.2014
+ */
+public interface INormalizer {
+ /**
+ * returns normalized letter
+ *
+ * @param letter
+ * @return normalized letter
+ */
+ byte getNormalized(byte letter);
+}
diff --git a/src/malt/data/ISequenceAccessor.java b/src/malt/data/ISequenceAccessor.java
new file mode 100644
index 0000000..50236db
--- /dev/null
+++ b/src/malt/data/ISequenceAccessor.java
@@ -0,0 +1,34 @@
+/**
+ * ISequenceAccessor.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.data;
+
+/**
+ * Seed accessor
+ * Daniel Huson, 2014
+ */
+public interface ISequenceAccessor {
+ int getNumberOfSequences();
+
+ byte[] getHeader(int index);
+
+ byte[] getSequence(int index);
+
+ void extendHeader(int index, String tag, Integer id);
+}
diff --git a/src/malt/data/ProteinAlphabet.java b/src/malt/data/ProteinAlphabet.java
new file mode 100644
index 0000000..274e6bc
--- /dev/null
+++ b/src/malt/data/ProteinAlphabet.java
@@ -0,0 +1,141 @@
+/**
+ * ProteinAlphabet.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.data;
+
+/**
+ * implements a protein alphabet
+ * Daniel Huson, 8.2014
+ */
+public class ProteinAlphabet implements IAlphabet {
+
+ final static private byte[] normalizedLetters = {
+ 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X',
+ 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', '*', 'X',
+ 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'A',
+ 'X', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'X', 'K', 'L', 'M', 'N', 'X', 'P', 'Q', 'R', 'S', 'T', 'X', 'V', 'W',
+ 'X', 'Y', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'A', 'X', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'X', 'K', 'L', 'M',
+ 'N', 'X', 'P', 'Q', 'R', 'S', 'T', 'X', 'V', 'W', 'X', 'Y', 'X', 'X', 'X', 'X', 'X', 'X'};
+
+ private static ProteinAlphabet instance = new ProteinAlphabet();
+
+ /**
+ * return an instance
+ *
+ * @return instance
+ */
+ public static ProteinAlphabet getInstance() {
+ return instance;
+ }
+
+ /**
+ * maps letter to normalized base or amino acid
+ *
+ * @param letter
+ * @return
+ */
+ public byte getNormalized(byte letter) {
+ return normalizedLetters[letter];
+ }
+
+ /**
+ * do letters a and b correspond to the same base or amino acid?
+ *
+ * @param a
+ * @param b
+ * @return true, if equalOverShorterOfBoth bases
+ */
+ public boolean equal(byte a, byte b) {
+ return normalizedLetters[a] == normalizedLetters[b];
+ }
+
+ @Override
+ public String getName() {
+ return "PROTEIN";
+ }
+
+ /**
+ * returns the used alphabet
+ *
+ * @return alphabet
+ */
+ public String toString() {
+ return "A C D E F G H I K [L*] M N P Q R S T V W X Y";
+ }
+
+ /**
+ * is this a protein alphabet?
+ *
+ * @return true, if protein
+ */
+ public boolean isProtein() {
+ return true;
+ }
+
+ /**
+ * is this a DNA alphabet?
+ *
+ * @return true, if DNA
+ */
+ public boolean isDNA() {
+ return false;
+ }
+
+ public int size() {
+ return 20;
+ }
+
+ /**
+ * a protein seed is a good seed if it contains more than 2 different letters and no unknown
+ *
+ * @param word
+ * @param length
+ * @return
+ */
+ @Override
+ public boolean isGoodSeed(byte[] word, int length) {
+ final byte a = word[0];
+ byte b = 0;
+ byte c = 0;
+
+ for (int i = 0; i < length; i++) {
+ final byte z = word[i];
+ if (z == 'X')
+ return false;
+ if (z != a) {
+ if (b == 0)
+ b = z;
+ else if (c == 0 && z != b)
+ c = z;
+ }
+ }
+ return b != 0 && c != 0;
+ }
+
+ public static void main(String[] args) {
+ for (int i = 0; i < 128; i++) {
+ char ch = Character.toUpperCase((char) i);
+ if ("ACDEFGHIKLMNPQRSTVWXY*".contains("" + ch))
+ System.err.print(" '" + ch + "',");
+ else
+ System.err.print(" 'X',");
+
+ }
+ }
+}
diff --git a/src/malt/data/QuerySequence2MatchesCache.java b/src/malt/data/QuerySequence2MatchesCache.java
new file mode 100644
index 0000000..de4f037
--- /dev/null
+++ b/src/malt/data/QuerySequence2MatchesCache.java
@@ -0,0 +1,268 @@
+/**
+ * QuerySequence2MatchesCache.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.data;
+
+import malt.util.MurmurHash3;
+
+/**
+ * hash table used for caching matches associated with a given read
+ * Created by huson on 7/9/14.
+ */
+public class QuerySequence2MatchesCache {
+ private final int hashMask;
+ private final Item[] hash2data;
+ private int randomNumberSeed = 666;
+
+ private final int numberOfSyncObjects = (1 << 10);
+ private final int syncObjectsMask = numberOfSyncObjects - 1;
+ // use lots of objects to synchronize on so that threads don't in each others way
+ private final Object[] syncTable = new Object[numberOfSyncObjects];
+
+ private long countGet = 0;
+ private long countPut = 0;
+
+ /**
+ * constructor
+ *
+ * @param bits
+ * @throws Exception
+ */
+ public QuerySequence2MatchesCache(int bits) {
+ if (bits > 31)
+ throw new RuntimeException("bits exceed 31");
+ hash2data = new Item[1 << bits];
+ hashMask = (1 << bits) - 1;
+
+ for (int i = 0; i < numberOfSyncObjects; i++) {
+ syncTable[i] = new Object();
+ }
+ }
+
+ /**
+ * put a copy into the cache
+ *
+ * @param sequence
+ * @param sequenceLength
+ * @param matches
+ * @param numberOfMatches
+ */
+ public void put(byte[] sequence, int sequenceLength, ReadMatch[] matches, int numberOfMatches) {
+ int hash = getHash(sequence, sequenceLength);
+ synchronized (syncTable[hash & syncObjectsMask]) {
+ if (hash2data[hash] == null) // check again here, because could have been set while waiting...
+ {
+ // System.err.println("Put: "+ Basic.toString(sequence, sequenceLength)+" hash: "+hash);
+ Item item = hash2data[hash];
+ if (item == null) {
+ hash2data[hash] = new Item(sequence, sequenceLength, matches, numberOfMatches);
+ countPut++;
+ } else if (item.addIfNew(sequence, sequenceLength, matches, numberOfMatches))
+ countPut++;
+ }
+ }
+ }
+
+ /**
+ * get the read matches associated with this sequence, if cached
+ *
+ * @param sequence
+ * @param sequenceLength
+ * @return associated read matches
+ */
+ public ReadMatch[] get(byte[] sequence, int sequenceLength) {
+ int hash = getHash(sequence, sequenceLength);
+ // System.err.println("Get: "+ Basic.toString(sequence, sequenceLength)+" hash: "+hash);
+
+ synchronized (syncTable[hash & syncObjectsMask]) {
+ Item item = hash2data[hash];
+ if (item != null) {
+ countGet++;
+ return item.getMatches(sequence, sequenceLength); // get matches if correct sequence found
+ }
+ return null;
+ }
+ }
+
+ /**
+ * for a given key, add the reference id and sequence offset to table
+ * uses very naive synchronization
+ *
+ * @param key
+ * @return hash value
+ */
+ private int getHash(byte[] key, int length) {
+ int value = MurmurHash3.murmurhash3x8632(key, 0, length, randomNumberSeed) & hashMask; // & also removes negative sign
+ if (value >= hash2data.length)
+ value %= hash2data.length;
+ return value;
+ }
+
+ /**
+ * report stats on usage of the table
+ */
+ public void reportStats() {
+ System.err.println("Replicate query cache: in=" + countPut + ", out=" + countGet);
+ }
+
+ /**
+ * determine whether cache contains this sequence
+ *
+ * @param sequence
+ * @param sequenceLength
+ * @return true, if sequence contained in cache
+ */
+ public boolean contains(byte[] sequence, int sequenceLength) {
+ int hash = getHash(sequence, sequenceLength);
+ // System.err.println("Get: "+ Basic.toString(sequence, sequenceLength)+" hash: "+hash);
+
+ synchronized (syncTable[hash & syncObjectsMask]) {
+ Item item = hash2data[hash];
+ return item != null && item.contains(sequence, sequenceLength);
+ }
+ }
+
+ /**
+ * hash table item
+ */
+ class Item {
+ private Item next;
+ private byte[] sequence;
+ private ReadMatch[] matches;
+
+ /**
+ * constructor
+ *
+ * @param sequence
+ * @param sequenceLength
+ * @param matches
+ * @param numberOfMatches
+ */
+ public Item(byte[] sequence, int sequenceLength, ReadMatch[] matches, int numberOfMatches) {
+ this.sequence = copy(sequence, sequenceLength);
+ this.matches = copy(matches, numberOfMatches);
+ }
+
+ /**
+ * add item if new
+ *
+ * @param sequence
+ * @param sequenceLength
+ * @param matches
+ * @param numberOfMatches
+ * @return true, if added, false if not
+ */
+ public boolean addIfNew(byte[] sequence, int sequenceLength, ReadMatch[] matches, int numberOfMatches) {
+ Item current = this;
+
+ while (current != null && !equal(sequence, sequenceLength, current.sequence, current.sequence.length)) {
+ if (current.next == null) {
+ current.next = new Item(sequence, sequenceLength, matches, numberOfMatches);
+ return true;
+ } else
+ current = current.next;
+ }
+ return false;
+ }
+
+ /**
+ * does this item contain this sequence
+ *
+ * @param sequence
+ * @param sequenceLength
+ * @return true, if this item or any chained to it equals the given one
+ */
+ public boolean contains(byte[] sequence, int sequenceLength) {
+ Item current = this;
+
+ while (current != null) {
+ if (equal(sequence, sequenceLength, current.sequence, current.sequence.length))
+ return true;
+ current = current.next;
+ }
+ return false;
+ }
+
+ /**
+ * get list of matches for the given sequence2
+ *
+ * @param sequence
+ * @param sequenceLength
+ * @return matches or null
+ */
+ public ReadMatch[] getMatches(byte[] sequence, int sequenceLength) {
+ Item current = this;
+
+ while (current != null) {
+ if (equal(sequence, sequenceLength, current.sequence, current.sequence.length))
+ return current.matches;
+ current = current.next;
+ }
+ return null;
+ }
+
+
+ /**
+ * check whether two strings are equalOverShorterOfBoth
+ *
+ * @param a
+ * @param aLength
+ * @param b
+ * @param bLength
+ * @return true, if equalOverShorterOfBoth
+ */
+ private boolean equal(byte[] a, int aLength, byte[] b, int bLength) {
+ if (aLength != bLength)
+ return false;
+ for (int i = 0; i < aLength; i++) {
+ if (a[i] != b[i])
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * copy a byte array
+ *
+ * @param array
+ * @param length
+ * @return copy
+ */
+ private byte[] copy(byte[] array, int length) {
+ byte[] tmp = new byte[length];
+ System.arraycopy(array, 0, tmp, 0, length);
+ return tmp;
+ }
+
+ /**
+ * copy a read match array. Makes a copy of each entry
+ *
+ * @param array
+ * @param length
+ * @return read match array copy
+ */
+ private ReadMatch[] copy(ReadMatch[] array, int length) {
+ ReadMatch[] tmp = new ReadMatch[length];
+ for (int i = 0; i < length; i++) {
+ tmp[i] = array[i].getCopy();
+ }
+ return tmp;
+ }
+ }
+}
diff --git a/src/malt/data/ReadMatch.java b/src/malt/data/ReadMatch.java
new file mode 100644
index 0000000..13c3861
--- /dev/null
+++ b/src/malt/data/ReadMatch.java
@@ -0,0 +1,170 @@
+/**
+ * ReadMatch.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.data;
+
+import jloda.util.Basic;
+
+import java.util.Comparator;
+
+/**
+ * A match for a given read
+ * Daniel Huson, 8.2014
+ */
+public
+/**
+ * a read match, consisting of a score, reference ID and the match text
+ */
+class ReadMatch {
+ private static long numberOfEntries = 0;
+ private long entryNumber; // used to make all matches unique
+
+ private float bitScore;
+ private float expected;
+ private int percentIdentity;
+ private int referenceId;
+ private byte[] text; // match text
+ private byte[] rma3Text;
+
+ private int startRef; // start position of match in reference sequence
+ private int endRef; // end position of match in reference sequence
+
+ /**
+ * constructor
+ */
+ public ReadMatch() {
+
+ }
+
+ /**
+ * returns a copy
+ *
+ * @return copy
+ */
+ public ReadMatch getCopy() {
+ return new ReadMatch(bitScore, expected, percentIdentity, referenceId, text, rma3Text, startRef, endRef);
+ }
+
+ /**
+ * constructor
+ *
+ * @param bitScore
+ * @param referenceId
+ * @param text
+ */
+ public ReadMatch(float bitScore, float expected, int percentIdentity, int referenceId, byte[] text, byte[] rma3Text, int startRef, int endRef) {
+ this.bitScore = bitScore;
+ this.expected = expected;
+ this.percentIdentity = percentIdentity;
+ this.referenceId = referenceId;
+ this.entryNumber = ++numberOfEntries;
+ this.text = text;
+ this.rma3Text = rma3Text;
+ this.startRef = startRef;
+ this.endRef = endRef;
+ }
+
+ /**
+ * reuse this object
+ *
+ * @param score
+ * @param referenceId
+ * @param text
+ */
+ public void set(float score, int referenceId, byte[] text, byte[] rma3Text, int startRef, int endRef) {
+ this.bitScore = score;
+ this.referenceId = referenceId;
+ this.entryNumber = ++numberOfEntries;
+ this.text = text;
+ this.rma3Text = rma3Text;
+ this.startRef = startRef;
+ this.endRef = endRef;
+ }
+
+ public float getBitScore() {
+ return bitScore;
+ }
+
+ public float getExpected() {
+ return expected;
+ }
+
+ public int getPercentIdentity() {
+ return percentIdentity;
+ }
+
+ public int getReferenceId() {
+ return referenceId;
+ }
+
+ public byte[] getText() {
+ return text;
+ }
+
+ public byte[] getRMA3Text() {
+ return rma3Text;
+ }
+
+ public int getStartRef() {
+ return startRef;
+ }
+
+ public int getEndRef() {
+ return endRef;
+ }
+
+ public String toString() {
+ return "RefId=" + referenceId + " bitScore=" + bitScore + " start=" + startRef + " end=" + endRef + " text=" + (text == null ? "null" : Basic.toString(text));
+ }
+
+ /**
+ * get comparator
+ */
+ static public Comparator<ReadMatch> createComparator() {
+ return new Comparator<ReadMatch>() {
+ public int compare(ReadMatch a, ReadMatch b) {
+ if (a.bitScore < b.bitScore)
+ return -1;
+ else if (a.bitScore > b.bitScore)
+ return 1;
+ else if (a.referenceId < b.referenceId)
+ return 1;
+ else if (a.referenceId > b.referenceId)
+ return -1;
+ else if (a.entryNumber < b.entryNumber)
+ return -1;
+ else if (a.entryNumber > b.entryNumber)
+ return 1;
+ else
+ return 0;
+ }
+ };
+ }
+
+ /**
+ * does this overlap the given reference coordinates?
+ *
+ * @param start
+ * @param end
+ * @return overlaps the given coordinates?
+ */
+ public boolean overlap(int start, int end) {
+ return !(Math.min(startRef, endRef) >= Math.max(start, end) || Math.max(startRef, endRef) <= Math.min(start, end));
+ }
+}
diff --git a/src/malt/data/ReducedAlphabet.java b/src/malt/data/ReducedAlphabet.java
new file mode 100644
index 0000000..099dd7e
--- /dev/null
+++ b/src/malt/data/ReducedAlphabet.java
@@ -0,0 +1,219 @@
+/**
+ * ReducedAlphabet.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.data;
+
+import jloda.util.Basic;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+
+/**
+ * implements a reduced protein alphabet
+ * Daniel Huson, 8.2014
+ */
+public class ReducedAlphabet implements IAlphabet {
+ private final String description;
+ private final int size;
+
+ private final byte[] normalizedLetters = {
+ 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X',
+ 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X',
+ 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X',
+ 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X',
+ 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X',
+ 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X', 'X'};
+
+ public static Map<String, String> reductions = new TreeMap<>();
+
+ static {
+ // From: Bioinformatics. 2009 June 1; 25(11): 1356–1362. Published online 2009 April 7. doi: 10.1093/bioinformatics/btp164:
+ reductions.put("GBMR4", "[ADKERNTSQ] [YFLIVMCWH*X] G P");
+ reductions.put("SDM12", "A D [KER] N [STQ] [YF] [LIVM*X] C W H G P");
+ reductions.put("HSDM17", "A D [KE] R N T S Q Y F [LIV*X] M C W H G P");
+ // Murphy, Lynne Reed and Wallqvist, Anders and Levy, Ronald M., 2000 :
+ reductions.put("BLOSUM50_4", "[LVIMC*X] [AGSTP] [FYW] [EDNQKRH]");
+ reductions.put("BLOSUM50_8", "[LVIMC*X] [AG] [ST] P [FYW] [EDNQ] [KR] H");
+ reductions.put("BLOSUM50_10", "[LVIM*X] C A G [ST] P [FYW] [EDNQ] [KR] H");
+ reductions.put("BLOSUM50_11", "[LVIM*X] C A G S T P [FYW] [EDNQ] [KR] H"); // this was produced from BLOSUM50_10 by separating S and T
+ reductions.put("BLOSUM50_15", "[LVIM*X] C A G S T P [FY] W E D N Q [KR] H");
+
+ reductions.put("DIAMOND_11", "[KREDQN*X] C G H [ILV] M F Y W P [STA]"); // DIAMOND default
+
+ // produced especially for MALT:
+ reductions.put("MALT_10", "[LVIM*X] C [AST] G P [WYF] [DEQ] N [RK] H");
+
+ reductions.put("UNREDUCED", "A D K E R N T S Q Y F [L*] I V M C W H G P");
+ }
+
+ /**
+ * constructs a reduction protein alphabet mapper
+ *
+ * @param reduction either name or definition of a reduction
+ */
+ public ReducedAlphabet(String reduction) throws IOException {
+ if (reduction.equalsIgnoreCase("default"))
+ reduction = "DIAMOND_11";
+
+ if (Basic.isOneWord(reduction)) {
+ if (!reductions.containsKey(reduction))
+ throw new IOException("Unknown protein reduction: " + reduction);
+ reduction = reductions.get(reduction);
+ }
+
+ StringBuilder buffer = new StringBuilder();
+ char group = 'A';
+ buffer.append("[");
+
+ boolean inWhiteSpace = true;
+ for (int i = 0; i < reduction.length(); i++) {
+ int ch = Character.toUpperCase(reduction.charAt(i));
+ if (Character.isWhitespace(ch)) {
+ if (!inWhiteSpace) {
+ group++;
+ buffer.append("] [");
+ inWhiteSpace = true;
+ }
+ } else {
+ if (inWhiteSpace)
+ inWhiteSpace = false;
+ if (Character.isLetter(ch) || ch == '*') {
+ normalizedLetters[Character.toLowerCase(ch)] = normalizedLetters[ch] = (byte) group;
+ buffer.append((char) ch);
+ }
+ }
+ }
+ buffer.append("]");
+ if (normalizedLetters['*'] == 0)
+ normalizedLetters['*'] = '*';
+ description = buffer.toString();
+ size = group - 'A' + 1;
+ }
+
+ /**
+ * maps letter to normalized base or amino acid
+ *
+ * @param letter
+ * @return
+ */
+ public byte getNormalized(byte letter) {
+ return normalizedLetters[letter];
+ }
+
+ /**
+ * do letters a and b correspond to the same base or amino acid?
+ *
+ * @param a
+ * @param b
+ * @return true, if equalOverShorterOfBoth bases
+ */
+ public boolean equal(byte a, byte b) {
+ return normalizedLetters[a] == normalizedLetters[b];
+ }
+
+ /**
+ * gets human-readable description of reduction
+ *
+ * @return string
+ */
+ public String toString() {
+ return description;
+ }
+
+ /**
+ * gets the name of this alphabet
+ *
+ * @return name
+ */
+ public String getName() {
+ return description;
+ }
+
+ /**
+ * is this a full protein alphabet?
+ *
+ * @return true, if protein
+ */
+ public boolean isProtein() {
+ return false;
+ }
+
+ /**
+ * size of alphabet
+ *
+ * @return
+ */
+ public int size() {
+ return size;
+ }
+
+ /**
+ * is this a DNA alphabet?
+ *
+ * @return true, if DNA
+ */
+ public boolean isDNA() {
+ return false;
+ }
+
+ /**
+ * a reduced protein seed good if it doesn't contain an X
+ *
+ * @param word
+ * @param length
+ * @return
+ */
+ @Override
+ public boolean isGoodSeed(byte[] word, int length) {
+ for (int i = 0; i < length; i++) {
+ if (word[i] == 'X')
+ return false;
+ }
+ return true;
+ }
+
+
+ public static void main(String[] args) throws IOException {
+ Set<Character> firstSet = null;
+ for (String name : reductions.keySet()) {
+ Set<Character> letters = new HashSet<>();
+
+ String def = reductions.get(name);
+ for (int i = 0; i < def.length(); i++) {
+ if (Character.isLetter(def.charAt(i)) || def.charAt(i) == '*')
+ letters.add(def.charAt(i));
+ }
+ System.err.println(name + ": " + letters.size() + ": " + Basic.toString(letters, ","));
+ if (firstSet == null)
+ firstSet = letters;
+ else {
+ for (Character ch : letters) {
+ if (!firstSet.contains(ch))
+ System.err.println("Unexpected letter: " + ch);
+ }
+ }
+ ReducedAlphabet alphabet = new ReducedAlphabet(name);
+ System.err.println("Alphabet: " + alphabet.toString());
+ System.err.println("Size: " + alphabet.size());
+ }
+ }
+}
diff --git a/src/malt/data/RefIndex2ClassId.java b/src/malt/data/RefIndex2ClassId.java
new file mode 100644
index 0000000..cb9f20e
--- /dev/null
+++ b/src/malt/data/RefIndex2ClassId.java
@@ -0,0 +1,157 @@
+/**
+ * RefIndex2ClassId.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.data;
+
+import jloda.util.Basic;
+import jloda.util.CanceledException;
+import jloda.util.ProgressPercentage;
+
+import java.io.*;
+
+/**
+ * maintains a mapping from reference indices to class ids (e.g. taxon ids or KEGG KOs)
+ * todo: mappings now start at 0, this breaks old Malt
+ * Daniel Huson, 8.2014
+ */
+public class RefIndex2ClassId {
+ private static final byte[] MAGIC_NUMBER = "MAClassV1.1.".getBytes();
+
+ private int maxRefId;
+ private int[] refIndex2ClassId;
+
+ public RefIndex2ClassId(int numberOfReferences) {
+ refIndex2ClassId = new int[numberOfReferences];
+ maxRefId = numberOfReferences;
+ }
+
+ /**
+ * put, indices start at 0
+ *
+ * @param refIndex
+ * @param classId
+ */
+ public void put(int refIndex, int classId) {
+ refIndex2ClassId[refIndex] = classId;
+ }
+
+ /**
+ * get, indices start at 0
+ *
+ * @param refIndex
+ * @return class id for given reference id
+ */
+ public int get(int refIndex) {
+ return refIndex2ClassId[refIndex];
+ }
+
+ /**
+ * save to a stream and then close the stream
+ *
+ * @param file
+ * @throws java.io.IOException
+ */
+ public void save(File file) throws IOException, CanceledException {
+ save(file, MAGIC_NUMBER);
+ }
+
+ /**
+ * save to a stream and then close the stream
+ *
+ * @param file
+ * @throws java.io.IOException
+ */
+ public void save(File file, byte[] magicNumber) throws IOException, CanceledException {
+ try (BufferedOutputStream outs = new BufferedOutputStream(new FileOutputStream(file)); ProgressPercentage progressListener = new ProgressPercentage("Writing file: " + file, maxRefId)) {
+ outs.write(magicNumber);
+
+ // number of entries
+ writeInt(outs, maxRefId);
+
+ // write headers and sequences:
+ for (int i = 0; i < maxRefId; i++) {
+ writeInt(outs, refIndex2ClassId[i]);
+ // System.err.println("write: "+i+" "+refIndex2ClassId[i]);
+ progressListener.incrementProgress();
+ }
+ }
+ }
+
+ /**
+ * constructor from a file
+ *
+ * @param file
+ */
+ public RefIndex2ClassId(File file) throws IOException, CanceledException {
+ this(file, MAGIC_NUMBER);
+ }
+
+ /**
+ * constructor from a file
+ *
+ * @param file
+ */
+ public RefIndex2ClassId(File file, byte[] magicNumber) throws IOException, CanceledException {
+ ProgressPercentage progressListener = null;
+ try (BufferedInputStream ins = new BufferedInputStream(new FileInputStream(file))) {
+ // check magic number:
+ Basic.readAndVerifyMagicNumber(ins, magicNumber);
+ maxRefId = readInt(ins);
+ progressListener = new ProgressPercentage("Reading file: " + file, maxRefId);
+ refIndex2ClassId = new int[maxRefId + 1];
+ // write headers and sequences:
+ for (int i = 0; i < maxRefId; i++) {
+ refIndex2ClassId[i] = readInt(ins);
+ // System.err.println("read: "+i+" "+refIndex2ClassId[i]);
+ progressListener.incrementProgress();
+ }
+ } finally {
+ if (progressListener != null)
+ progressListener.close();
+ }
+ }
+
+ /**
+ * read an int from an input stream
+ *
+ * @param ins
+ * @return long value
+ * @throws java.io.IOException
+ */
+ public static int readInt(InputStream ins) throws IOException {
+ return ((ins.read() & 0xFF) << 24)
+ + ((ins.read() & 0xFF) << 16)
+ + ((ins.read() & 0xFF) << 8)
+ + ((ins.read() & 0xFF));
+ }
+
+ /**
+ * writes an int value
+ *
+ * @param outs
+ * @param value
+ * @throws java.io.IOException
+ */
+ public static void writeInt(OutputStream outs, int value) throws IOException {
+ outs.write((byte) (value >> 24));
+ outs.write((byte) (value >> 16));
+ outs.write((byte) (value >> 8));
+ outs.write((byte) value);
+ }
+}
diff --git a/src/malt/data/ReferencesDBAccess.java b/src/malt/data/ReferencesDBAccess.java
new file mode 100644
index 0000000..0e33a1f
--- /dev/null
+++ b/src/malt/data/ReferencesDBAccess.java
@@ -0,0 +1,187 @@
+/**
+ * ReferencesDBAccess.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.data;
+
+import jloda.util.Basic;
+import jloda.util.CanceledException;
+import jloda.util.FileInputIterator;
+import malt.MaltOptions;
+import megan.io.*;
+import megan.io.experimental.ByteFileGetterPagedMemory;
+import megan.io.experimental.LongFileGetterPagedMemory;
+
+import java.io.Closeable;
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * accesses the references DB
+ * Daniel Huson, 3.2015
+ */
+public class ReferencesDBAccess implements Closeable {
+ private byte[][] headers;
+ private byte[][] sequences;
+
+ private int numberOfSequences;
+ private long numberOfLetters;
+
+ private static final int SYNC_BITMASK = 1023;// length of vector must be SYNC_BITMASK+1
+ private final Object[] syncObjects;
+
+ private final ILongGetter refIndex;
+ private final IByteGetter refDB;
+
+
+ /**
+ * construct from an input file
+ *
+ * @param refIndexFile
+ * @throws java.io.IOException
+ */
+ public ReferencesDBAccess(MaltOptions.MemoryMode memoryMode, File refIndexFile, File refDBFile, File refInfFile) throws IOException, CanceledException {
+ syncObjects = new Object[SYNC_BITMASK + 1];
+ for (int i = 0; i < syncObjects.length; i++) {
+ syncObjects[i] = new Object();
+ }
+
+ switch (memoryMode) {
+ default:
+ case load:
+ refIndex = new LongFileGetterInMemory(refIndexFile);
+ refDB = new ByteFileGetterInMemory(refDBFile);
+ break;
+ case page:
+ refIndex = new LongFileGetterPagedMemory(refIndexFile);
+ refDB = new ByteFileGetterPagedMemory(refDBFile);
+ break;
+ case map:
+ refIndex = new LongFileGetterMappedMemory(refIndexFile);
+ refDB = new ByteFileGetterMappedMemory(refDBFile);
+ break;
+ }
+
+ try (FileInputIterator it = new FileInputIterator(refInfFile)) {
+ while (it.hasNext()) {
+ String aLine = it.next();
+ if (aLine.startsWith("sequences")) {
+ numberOfSequences = Integer.parseInt(Basic.getTokenFromTabSeparatedLine(aLine, 1));
+ } else if (aLine.startsWith("letters")) {
+ numberOfLetters = Long.parseLong(Basic.getTokenFromTabSeparatedLine(aLine, 1));
+ }
+ }
+ }
+ System.err.println(String.format("Number of sequences:%,13d", numberOfSequences));
+ System.err.println(String.format("Number of letters:%,15d", numberOfLetters));
+
+ if (numberOfSequences != refIndex.limit())
+ throw new IOException("Expected " + numberOfSequences + "sequences , index contains: " + refIndex.limit());
+
+ headers = new byte[numberOfSequences][];
+ sequences = new byte[numberOfSequences][];
+ }
+
+ /**
+ * Get header string. Index starts at 0
+ *
+ * @param index
+ * @return header
+ */
+ public byte[] getHeader(int index) throws IOException {
+ byte[] array = headers[index];
+ if (array == null) {
+ synchronized (syncObjects[index & SYNC_BITMASK]) {
+ if (headers[index] == null) {
+ long dbIndex = refIndex.get(index);
+ dbIndex += 4 + refDB.getInt(dbIndex); // increment dbIndex by 4 plus length of sequence (to skip over sequence)
+ int headerLength = refDB.getInt(dbIndex);
+ dbIndex += 4;
+ array = new byte[headerLength];
+ refDB.get(dbIndex, array, 0, headerLength);
+ headers[index] = array;
+ } else
+ array = headers[index];
+ }
+ }
+ return array;
+ }
+
+ /**
+ * Get sequence. Index starts at 0
+ *
+ * @param index
+ * @return sequence
+ */
+ public byte[] getSequence(int index) throws IOException {
+ byte[] array = sequences[index];
+ if (array == null) {
+ synchronized (syncObjects[index & SYNC_BITMASK]) {
+ if (sequences[index] == null) {
+ long dbIndex = refIndex.get(index);
+ int sequenceLength = refDB.getInt(dbIndex);
+ dbIndex += 4;
+ array = new byte[sequenceLength];
+ refDB.get(dbIndex, array, 0, sequenceLength);
+ sequences[index] = array;
+ } else
+ array = sequences[index];
+ }
+ }
+ return array;
+ }
+
+ /**
+ * Get sequence length
+ *
+ * @param index
+ * @return sequence length
+ */
+ public int getSequenceLength(int index) throws IOException {
+ if (sequences[index] != null)
+ return sequences[index].length;
+ else
+ return refDB.getInt(refIndex.get(index));
+ }
+
+ /**
+ * number of sequences
+ *
+ * @return number of sequences
+ */
+ public int getNumberOfSequences() {
+ return numberOfSequences;
+ }
+
+ /**
+ * total number of letters
+ *
+ * @return number of letters
+ */
+ public long getNumberOfLetters() {
+ return numberOfLetters;
+ }
+
+ /**
+ * close
+ */
+ public void close() {
+ refIndex.close();
+ refDB.close();
+ }
+}
diff --git a/src/malt/data/ReferencesDBBuilder.java b/src/malt/data/ReferencesDBBuilder.java
new file mode 100644
index 0000000..9546dbd
--- /dev/null
+++ b/src/malt/data/ReferencesDBBuilder.java
@@ -0,0 +1,292 @@
+/**
+ * ReferencesDBBuilder.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.data;
+
+import jloda.util.CanceledException;
+import jloda.util.ProgressListener;
+import jloda.util.ProgressPercentage;
+import malt.io.FastAFileIteratorBytes;
+import megan.io.OutputWriter;
+
+import java.io.*;
+import java.util.List;
+
+/**
+ * builds the reference sequences database
+ * Daniel Huson, 8.2014
+ */
+public class ReferencesDBBuilder implements ISequenceAccessor {
+ private byte[][] headers;
+ private byte[][] sequences;
+ private int numberOfSequences;
+ private long numberOfLetters;
+
+ /**
+ * constructor
+ */
+ public ReferencesDBBuilder() {
+ headers = new byte[10000][];
+ sequences = new byte[10000][];
+ numberOfSequences = 0;
+ numberOfLetters = 0;
+ }
+
+ /**
+ * resize
+ *
+ * @param newSize
+ */
+ public void grow(int newSize) {
+ if (newSize > headers.length) {
+ byte[][] newHeaders = new byte[newSize][];
+ int top = Math.min(newSize, headers.length);
+ System.arraycopy(headers, 0, newHeaders, 0, top);
+ headers = newHeaders;
+ byte[][] newSequences = new byte[newSize][];
+ top = Math.min(newSize, sequences.length);
+ System.arraycopy(sequences, 0, newSequences, 0, top);
+ sequences = newSequences;
+ }
+ }
+
+ /**
+ * add a header and sequence to the list of sequences
+ *
+ * @param header
+ * @param sequence
+ */
+ public void add(byte[] header, byte[] sequence) {
+ if (numberOfSequences == sequences.length) {
+ headers = grow(headers);
+ sequences = grow(sequences);
+ }
+ headers[numberOfSequences] = header;
+ sequences[numberOfSequences] = sequence;
+ numberOfSequences++;
+ numberOfLetters += sequence.length;
+ }
+
+ /**
+ * grow an array
+ *
+ * @param array
+ * @return bigger array
+ */
+ private byte[][] grow(byte[][] array) {
+ byte[][] result = new byte[Math.min(Integer.MAX_VALUE, 2 * Math.max(1, array.length))][];
+ System.arraycopy(array, 0, result, 0, array.length);
+ return result;
+ }
+
+ /**
+ * Get header string. Index starts at 0
+ *
+ * @param index
+ * @return header
+ */
+ public byte[] getHeader(int index) {
+ return headers[index];
+ }
+
+ /**
+ * Get sequence. Index starts at 0
+ *
+ * @param index
+ * @return sequence
+ */
+ public byte[] getSequence(int index) {
+ return sequences[index];
+ }
+
+ /**
+ * load a collection of fastA files
+ *
+ * @param fileNames
+ * @throws IOException
+ * @throws CanceledException
+ */
+ public void loadFastAFiles(final List<String> fileNames, final IAlphabet alphabet) throws IOException, CanceledException {
+ long totalSize = 0;
+ for (String fileName : fileNames) {
+ totalSize += (new File(fileName)).length();
+ }
+ int guessNumberOfSequences = (int) Math.min(Integer.MAX_VALUE, totalSize / 1000L);
+ grow(guessNumberOfSequences);
+
+ for (String fileName : fileNames) {
+ loadFastAFile(fileName, new ProgressPercentage("Reading file: " + fileName), alphabet);
+ }
+ }
+
+ /**
+ * load data from a fastA file
+ *
+ * @param fileName
+ * @throws FileNotFoundException
+ */
+ public void loadFastAFile(final String fileName, final ProgressListener progressListener, final IAlphabet alphabet) throws IOException, CanceledException {
+ FastAFileIteratorBytes it = new FastAFileIteratorBytes(fileName, alphabet);
+ progressListener.setMaximum(it.getMaximumProgress());
+ progressListener.setProgress(0);
+
+ try {
+ while (it.hasNext()) {
+ byte[] header = it.next();
+ if (it.hasNext()) {
+ byte[] sequence = it.next();
+ add(header, sequence);
+ progressListener.setProgress(it.getProgress());
+ }
+ }
+ } finally {
+ if (progressListener instanceof ProgressPercentage)
+ progressListener.close();
+ it.close();
+ }
+ }
+
+ /**
+ * save sequences in fastA format
+ *
+ * @param fileName
+ * @param progressListener
+ * @throws IOException
+ * @throws CanceledException
+ */
+ public void saveFastAFile(String fileName, ProgressListener progressListener) throws IOException, CanceledException {
+ progressListener.setMaximum(numberOfSequences);
+ progressListener.setProgress(0);
+
+ try (BufferedWriter w = new BufferedWriter(new FileWriter(fileName), 8192)) {
+ for (int i = 0; i < numberOfSequences; i++) {
+ w.write(headers[i] + "\n");
+ w.write(sequences[i] + "\n");
+ progressListener.incrementProgress();
+ }
+ } finally {
+ if (progressListener instanceof ProgressPercentage)
+ progressListener.close();
+
+ }
+ }
+
+ /**
+ * Save the reference data as an index file and a datafile
+ * @param refIndexFile
+ * @param refDBFile
+ * @throws IOException
+ * @throws CanceledException
+ */
+ public void save(File refIndexFile, File refDBFile, File refInfFile, boolean saveFirstWordOnly) throws IOException, CanceledException {
+ final ProgressPercentage progress = new ProgressPercentage("Writing file: " + refIndexFile, numberOfLetters);
+ System.err.println("Writing file: " + refDBFile);
+
+ try (final OutputWriter refDBOutputStream = new OutputWriter(refDBFile); OutputWriter refIndexOutputStream = new OutputWriter(refIndexFile)) {
+ long dbFilePos = 0;
+
+ for (int i = 0; i < numberOfSequences; i++) {
+ refIndexOutputStream.writeLong(dbFilePos);
+
+ final byte[] sequence = sequences[i];
+ refDBOutputStream.writeInt(sequence.length);
+ refDBOutputStream.write(sequence, 0, sequence.length);
+ dbFilePos += 4 + sequence.length;
+
+ final byte[] header = (saveFirstWordOnly ? getFirstWord(headers[i]) : headers[i]);
+ refDBOutputStream.writeInt(header.length);
+ refDBOutputStream.write(header, 0, header.length);
+ dbFilePos += 4 + header.length;
+
+ progress.incrementProgress();
+ }
+ } finally {
+ progress.close();
+ }
+ final Writer writer = new FileWriter(refInfFile);
+ writer.write("sequences\t" + numberOfSequences + "\n");
+ writer.write("letters\t" + numberOfLetters + "\n");
+ writer.close();
+ }
+
+ /**
+ * get string consisting of first word
+ *
+ * @param str
+ * @return first word
+ */
+ static public byte[] getFirstWord(byte[] str) {
+ for (int i = 0; i < str.length; i++) {
+ if (Character.isWhitespace(str[i])) {
+ byte[] result = new byte[i];
+ System.arraycopy(str, 0, result, 0, i);
+ return result;
+ }
+ }
+ return str;
+ }
+
+ /**
+ * number of sequences
+ *
+ * @return number of sequences
+ */
+ public int getNumberOfSequences() {
+ return numberOfSequences;
+ }
+
+ /**
+ * total number of letters
+ *
+ * @return number of letters
+ */
+ public long getNumberOfLetters() {
+ return numberOfLetters;
+ }
+
+ /**
+ * extend the header by the given tag. We use this to write the taxon id into a reference sequence
+ *
+ * @param index
+ * @param tag
+ * @param id
+ */
+ public void extendHeader(int index, String tag, Integer id) {
+ byte[] header = headers[index];
+ int pos = 0;
+ while (Character.isWhitespace(header[pos]) && pos < header.length) // skip leading white space
+ pos++;
+ while (!Character.isWhitespace(header[pos]) && pos < header.length) // go to next white space or end
+ pos++;
+ byte[] add;
+ if (header[pos - 1] == '|')
+ add = String.format("%s%d|", tag, id).getBytes();
+ else
+ add = String.format("|%s%d|", tag, id).getBytes();
+
+ byte[] newHeader = new byte[header.length + add.length];
+ System.arraycopy(header, 0, newHeader, 0, pos);
+ System.arraycopy(add, 0, newHeader, pos, add.length);
+ if (pos < header.length) {
+ System.arraycopy(header, pos, newHeader, add.length + pos, header.length - pos);
+ }
+ headers[index] = newHeader;
+ //System.err.println("Header="+Basic.toString(headers[index]));
+ }
+}
diff --git a/src/malt/data/ReferencesHashTableAccess.java b/src/malt/data/ReferencesHashTableAccess.java
new file mode 100644
index 0000000..2762894
--- /dev/null
+++ b/src/malt/data/ReferencesHashTableAccess.java
@@ -0,0 +1,301 @@
+/**
+ * ReferencesHashTableAccess.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.data;
+
+import jloda.util.Basic;
+import jloda.util.CanceledException;
+import jloda.util.ProgressPercentage;
+import malt.MaltOptions;
+import malt.util.MurmurHash3;
+import malt.util.Utilities;
+import megan.io.*;
+import megan.io.experimental.IntFileGetterPagedMemory;
+import megan.io.experimental.LongFileGetterPagedMemory;
+
+import java.io.*;
+
+/**
+ * hash table used for mapping k-mers to sequences and offsets (given by a pair of integers)
+ * Daniel Huson, 8.2014
+ */
+
+public class ReferencesHashTableAccess implements Closeable {
+ public static int BUFFER_SIZE = 8192; // benchmarking suggested that choosing a large size doesn't make a difference
+ private final ILongGetter tableIndexGetter; // each entry points to a row of integers that is contained in the data table
+
+ private final int tableSize;
+ private final int hashMask;
+
+ private final int randomNumberSeed;
+
+ private long theSize = 0; // counts items
+
+ private final IAlphabet seedAlphabet; // alphabet used by seeds
+ private final SeedShape seedShape; // seed shape that is saved and loaded from index
+
+ private IIntGetter tableDataGetter; // used for memory mapping
+
+ /**
+ * construct the table from the given directory
+ *
+ * @param indexDirectory
+ */
+ public ReferencesHashTableAccess(MaltOptions.MemoryMode memoryMode, String indexDirectory, int tableNumber) throws IOException, CanceledException {
+ final File indexFile = new File(indexDirectory, "index" + tableNumber + ".idx");
+ final File tableIndexFile = new File(indexDirectory, "table" + tableNumber + ".idx");
+ final File tableDataFile = new File(indexDirectory, "table" + tableNumber + ".db");
+
+ try (DataInputStream ins = new DataInputStream(new BufferedInputStream(new FileInputStream(indexFile), BUFFER_SIZE))) {
+ ProgressPercentage progress = new ProgressPercentage("Reading file: " + indexFile);
+ Basic.readAndVerifyMagicNumber(ins, ReferencesHashTableBuilder.MAGIC_NUMBER);
+
+ SequenceType referenceSequenceType = SequenceType.valueOf(ins.readInt());
+ System.err.println("Reference sequence type: " + referenceSequenceType.toString());
+ if (referenceSequenceType == SequenceType.Protein) {
+ int length = ins.readInt();
+ byte[] reductionBytes = new byte[length];
+ if (ins.read(reductionBytes, 0, length) != length)
+ throw new IOException("Read failed");
+ seedAlphabet = new ReducedAlphabet(Basic.toString(reductionBytes));
+ System.err.println("Protein reduction: " + seedAlphabet);
+ } else
+ seedAlphabet = DNA5.getInstance();
+
+ // get all sizes:
+ tableSize = ins.readInt();
+ // get mask used in hashing
+ hashMask = ins.readInt();
+
+ randomNumberSeed = ins.readInt();
+ theSize = ins.readLong();
+ final int stepSize = ins.readInt();
+ if (stepSize > 1)
+ System.err.println("Index was built using stepSize=" + stepSize);
+
+ {
+ int length = ins.readInt();
+ byte[] shapeBytes = new byte[length];
+ if (ins.read(shapeBytes, 0, length) != length)
+ throw new IOException("Read failed");
+ seedShape = new SeedShape(seedAlphabet, shapeBytes);
+ }
+
+ progress.reportTaskCompleted();
+ }
+
+ switch (memoryMode) {
+ default:
+ case load:
+ tableIndexGetter = new LongFileGetterInMemory(tableIndexFile);
+ tableDataGetter = new IntFileGetterInMemory(tableDataFile);
+ break;
+ case page:
+ tableIndexGetter = new LongFileGetterPagedMemory(tableIndexFile);
+ tableDataGetter = new IntFileGetterPagedMemory(tableDataFile);
+ break;
+ case map:
+ tableIndexGetter = new LongFileGetterMappedMemory(tableIndexFile);
+ tableDataGetter = new IntFileGetterMappedMemory(tableDataFile);
+ break;
+ }
+ }
+
+ /**
+ * lookup all entries for a given key and put them in the given row object. If none found, row is set to empty
+ * todo: re-implement this
+ *
+ * @param key
+ * @param row
+ */
+ public int lookup(byte[] key, Row row) throws IOException {
+ int hashValue = getHash(key);
+ if (hashValue >= 0 && hashValue < tableIndexGetter.limit() && setRow(tableIndexGetter.get(hashValue), row))
+ return row.size();
+ row.setEmpty();
+ return 0;
+ }
+
+ /**
+ * get the hash value
+ *
+ * @param key
+ * @return hash value
+ */
+ public int getHash(byte[] key) {
+ int value = MurmurHash3.murmurhash3x8632(key, 0, key.length, randomNumberSeed) & hashMask;
+ if (value >= Basic.MAX_ARRAY_SIZE) // only use modulo if we are on or above table size
+ value %= Basic.MAX_ARRAY_SIZE;
+ return value;
+ }
+
+ /**
+ * get the number of entries
+ *
+ * @return number of entries
+ */
+ public long size() {
+ return theSize;
+ }
+
+ /**
+ * get the seed shape associated with this table
+ *
+ * @return seed shape
+ */
+ public SeedShape getSeedShape() {
+ return seedShape;
+ }
+
+ /**
+ * show the whole hash table in human readable form
+ *
+ * @throws java.io.IOException
+ */
+ public void show() throws IOException {
+ System.err.println("Table (" + tableSize + "):");
+
+ Row row = new Row();
+
+ for (int z = 0; z < tableIndexGetter.limit(); z++) {
+ if (z > 50)
+ continue;
+ System.err.print("hash " + z + " -> ");
+ if (setRow(tableIndexGetter.get(z), row)) {
+ System.err.print("(" + row.size() / 2 + ")");
+ for (int i = 0; i < row.size(); i += 2) {
+ if (i > 100) {
+ System.err.print(" ...");
+ break;
+ }
+ System.err.print(" " + row.get(i) + "/" + row.get(i + 1));
+ }
+ }
+ System.err.println();
+ }
+ }
+
+ /**
+ * set the row for the given location
+ *
+ * @param location
+ * @param row
+ * @return false, if location invalid
+ */
+ private boolean setRow(long location, Row row) throws IOException {
+ if (location == 0)
+ return false;
+ if (location < 0) {
+ location = -location;
+ row.setPair((int) (location >> 32), (int) location); // is a singleton entry
+ } else {
+ int length = tableDataGetter.get(location); // length is number int's that follow this first int that tells us the length
+ if (row.tmpArray.length <= length)
+ row.tmpArray = new int[length + 1];
+ row.tmpArray[0] = length;
+ for (int i = 1; i <= length; i++)
+ row.tmpArray[i] = tableDataGetter.get(location + i);
+ row.set(row.tmpArray, 0);
+ }
+ return true;
+ }
+
+ /**
+ * get alphabet used for seeds. Note that the seed alphabet may differ from the query alphabet i.e. when using a protein reduction alphabet for seeding
+ *
+ * @return seed alphabet
+ */
+ public IAlphabet getSeedAlphabet() {
+ return seedAlphabet;
+ }
+
+ /**
+ * make sure that we can reads the files
+ *
+ * @param indexDirectory
+ * @throws IOException
+ */
+ public static void checkFilesExist(String indexDirectory, int tableNumber) throws IOException {
+ Utilities.checkFileExists(new File(indexDirectory));
+ Utilities.checkFileExists(new File(indexDirectory, "index" + tableNumber + ".idx"));
+ Utilities.checkFileExists(new File(indexDirectory, "table" + tableNumber + ".idx"));
+ Utilities.checkFileExists(new File(indexDirectory, "table" + tableNumber + ".db"));
+ }
+
+ /**
+ * determines the number of tables existing in the index
+ *
+ * @param indexDirectory
+ * @return number of tables
+ */
+ public static int determineNumberOfTables(String indexDirectory) {
+ int tableNumber = 0;
+ while ((new File(indexDirectory, "index" + tableNumber + ".idx")).exists()) {
+ tableNumber++;
+ }
+ return tableNumber;
+ }
+
+ /**
+ * show part of the hash table in human readable form
+ *
+ * @throws java.io.IOException
+ */
+ public void showAPart() throws IOException {
+ final Row row = new Row();
+
+ System.err.println("Seed table (" + tableIndexGetter.limit() + "):");
+ for (int z = 0; z < tableIndexGetter.limit(); z++) {
+ if (z > 10)
+ continue;
+ System.err.print("hash " + z + " -> ");
+ if (setRow(tableIndexGetter.get(z), row)) {
+ System.err.print("(" + row.size() / 2 + ")");
+ for (int i = 0; i < row.size(); i += 2) {
+ if (i > 100) {
+ System.err.print(" ...");
+ break;
+ }
+ System.err.print(" " + row.get(i) + "/" + row.get(i + 1));
+ }
+ }
+ System.err.println();
+ }
+
+ }
+
+ /**
+ * construct the table from the given directory
+ *
+ * @param indexDirectory
+ */
+ public static SequenceType getIndexSequenceType(String indexDirectory) throws IOException, CanceledException {
+ File indexFile = new File(indexDirectory, "index0.idx");
+ try (DataInputStream ins = new DataInputStream(new BufferedInputStream(new FileInputStream(indexFile), 8192))) {
+ Basic.readAndVerifyMagicNumber(ins, ReferencesHashTableBuilder.MAGIC_NUMBER);
+ return SequenceType.valueOf(ins.readInt());
+ }
+ }
+
+ public void close() {
+ tableIndexGetter.close();
+ tableDataGetter.close();
+ }
+}
+
diff --git a/src/malt/data/ReferencesHashTableBuilder.java b/src/malt/data/ReferencesHashTableBuilder.java
new file mode 100644
index 0000000..fd95a61
--- /dev/null
+++ b/src/malt/data/ReferencesHashTableBuilder.java
@@ -0,0 +1,504 @@
+/**
+ * ReferencesHashTableBuilder.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.data;
+
+import jloda.util.Basic;
+import jloda.util.ProgressPercentage;
+import jloda.util.Single;
+import malt.util.MurmurHash3;
+import malt.util.Utilities;
+import megan.io.IntFilePutter;
+import megan.io.OutputWriter;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Random;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+/**
+ * builds the reference hash table
+ * Daniel Huson, 8.2014
+ */
+
+public class ReferencesHashTableBuilder {
+ public static byte[] MAGIC_NUMBER = "MATableV0.12.".getBytes();
+
+ private final SequenceType referenceSequenceType;
+ private final IAlphabet alphabet;
+
+ private long[] tableIndex; // main table index
+
+ private IntFilePutter tableDataPutter; // main table data
+
+ private final int tableSize;
+ private final int hashMask; // use bit mask rather than modulo (five times as fast)
+
+ private final int randomNumberSeed;
+
+ private long theSize = 0; // counts items
+
+ private final int maxHitsPerHash; // this was 10000000
+
+ private final SeedShape seedShape; // seed shape that is saved and loaded from index
+
+ private final int SYNC_BITMASK = 1023;
+ // use lots of objects to synchronize on so that threads don't in each others way
+ private final Object[] syncObjects = new Object[SYNC_BITMASK + 1];
+
+ private final int stepSize;
+
+ /**
+ * constructor
+ *
+ * @param seedShape
+ * @param numberOfSequences
+ * @param numberOfLetters
+ * @param randomNumberSeed
+ */
+ public ReferencesHashTableBuilder(SequenceType referenceSequenceType, IAlphabet alphabet, SeedShape seedShape,
+ int numberOfSequences, long numberOfLetters, int randomNumberSeed, int maxHitPerSeed, float hashTableLoadFactor, int stepSize) throws IOException {
+ this.referenceSequenceType = referenceSequenceType;
+ this.alphabet = alphabet;
+ this.seedShape = seedShape;
+ this.randomNumberSeed = randomNumberSeed;
+ this.stepSize = stepSize;
+
+ // total is numberOfLetters minus last letter of each sequence divided by stepSize
+ final long totalNumberOfSeeds = (long) (Math.ceil((numberOfLetters - (numberOfSequences * (seedShape.getLength() - 1))) / stepSize));
+ // number of possible different seed values:
+ final long numberOfPossibleHashValues = (long) Math.ceil(Math.pow(alphabet.size(), seedShape.getWeight()));
+
+ System.err.println("Number of seeds in references: " + totalNumberOfSeeds);
+ // System.err.println("Number of possible hash values: " + numberOfPossibleHashValues);
+
+ long entriesPerTable = (long) (hashTableLoadFactor * Math.min(totalNumberOfSeeds, numberOfPossibleHashValues)); // assume only 90% are used
+
+ if (entriesPerTable >= Integer.MAX_VALUE / 2) {
+ tableSize = Basic.MAX_ARRAY_SIZE;
+ hashMask = Integer.MAX_VALUE;
+ } else {
+ long size = 1;
+ while (entriesPerTable > size) {
+ size *= 2;
+ }
+ tableSize = (int) size;
+ hashMask = tableSize - 1;
+ }
+
+ System.err.println("TableSize= " + tableSize);
+ System.err.println("hashMask.length= " + Integer.toBinaryString(hashMask).length());
+
+ maxHitsPerHash = maxHitPerSeed; // we use the same value because the actual number of seeds used is usually smaller than the table size
+ // final double averageWordsPerHashValue = Math.max(1, (totalNumberOfSeeds / (double) tableSize));
+ // maxHitsPerHash = (int)Math.max(1, maxHitPerSeed * averageWordsPerHashValue);
+ System.err.println("maxHitsPerHash set to: " + maxHitsPerHash);
+
+ final ProgressPercentage progress = new ProgressPercentage("Initializing arrays...");
+
+ for (int i = 0; i < syncObjects.length; i++) {
+ syncObjects[i] = new Object();
+ }
+
+ progress.reportTaskCompleted();
+ }
+
+ /**
+ * build the hash table
+ * @param referencesDB
+ * @param numberOfThreads
+ */
+ public void buildTable(final File tableIndexFile, final File tableDataFile, final ReferencesDBBuilder referencesDB, int numberOfThreads, boolean buildTableInMemory) throws IOException {
+ tableIndex = new long[tableSize];
+
+ countSeeds(referencesDB, numberOfThreads);
+ long limit = allocateTable(numberOfThreads);
+ tableDataPutter = new IntFilePutter(tableDataFile, limit + 1, buildTableInMemory); // limit+1 because we start with index 1
+ fillTable(referencesDB, numberOfThreads);
+ randomizeBuildRows(numberOfThreads);
+ saveTableIndex(tableIndex, tableIndexFile);
+ tableIndex = null;
+ tableDataPutter.close();
+ }
+
+ /**
+ * save the table index
+ * @param tableIndex
+ * @param tableIndexFile
+ * @throws IOException
+ */
+ private void saveTableIndex(long[] tableIndex, File tableIndexFile) throws IOException {
+ final ProgressPercentage progress = new ProgressPercentage("Writing file: " + tableIndexFile, tableIndex.length);
+ try (OutputWriter outs = new OutputWriter(tableIndexFile)) {
+ for (long value : tableIndex) {
+ outs.writeLong(value);
+ progress.incrementProgress();
+ }
+ }
+ progress.close();
+ }
+
+ /**
+ * count the seeds. He we use forwardTable and reverseTable to hold the counts, later the counts are replaced by locations
+ *
+ * @param referencesDB
+ * @param numberOfThreads0
+ */
+ private void countSeeds(final ReferencesDBBuilder referencesDB, int numberOfThreads0) {
+ final int numberOfThreads = Math.min(referencesDB.getNumberOfSequences(), numberOfThreads0);
+ final ExecutorService executor = Executors.newFixedThreadPool(numberOfThreads);
+
+ final CountDownLatch countDownLatch = new CountDownLatch(numberOfThreads);
+ final ProgressPercentage progressPercentage = new ProgressPercentage("Analysing seeds...", referencesDB.getNumberOfSequences());
+ final int[] countsForProgress = new int[numberOfThreads];
+ final long[] countLowComplexitySeeds = new long[numberOfThreads];
+
+ // launch the worker threads
+ for (int i = 0; i < numberOfThreads; i++) {
+ final int threadNumber = i;
+
+ executor.execute(new Runnable() {
+ public void run() {
+ try {
+ final byte[] seedBytes = seedShape.createBuffer();
+ for (int refIndex = threadNumber; refIndex < referencesDB.getNumberOfSequences(); refIndex += numberOfThreads) {
+ byte[] sequence = referencesDB.getSequence(refIndex);
+ int top = sequence.length - seedShape.getLength() + 1;
+ for (int pos = 0; pos < top; pos += stepSize) {
+ seedShape.getSeed(sequence, pos, seedBytes);
+ if (!Utilities.hasAtMostTwoLetters(seedBytes)) {
+ int hashValue = getHash(seedBytes);
+ synchronized (syncObjects[hashValue & SYNC_BITMASK]) {
+ if (tableIndex[hashValue] <= maxHitsPerHash)
+ tableIndex[hashValue]++;
+ }
+ } else
+ countLowComplexitySeeds[threadNumber]++;
+ }
+ countsForProgress[threadNumber]++;
+ }
+ } finally {
+ countDownLatch.countDown();
+ }
+ }
+ });
+ }
+
+ // wait for jobs to complete:
+ while (countDownLatch.getCount() > 0) {
+ try {
+ Thread.sleep(100); // sleep and then report progress
+ } catch (InterruptedException e) {
+ Basic.caught(e);
+ break;
+ }
+ progressPercentage.setProgress(Basic.getSum(countsForProgress));
+ }
+ progressPercentage.close();
+ System.err.println("Number of low-complexity seeds skipped: " + Basic.getSum(countLowComplexitySeeds));
+ executor.shutdown();
+ }
+
+ /**
+ * allocate the hash table
+ *
+ * @param numberOfThreads0
+ */
+ private long allocateTable(final int numberOfThreads0) throws IOException {
+ final int numberOfThreads = Math.min(tableSize, numberOfThreads0);
+ final ExecutorService executor = Executors.newFixedThreadPool(numberOfThreads);
+
+ final CountDownLatch countDownLatch = new CountDownLatch(numberOfThreads);
+ ProgressPercentage progressPercentage = new ProgressPercentage("Allocating hash table...", tableSize);
+ final int[] countsForProgress = new int[numberOfThreads];
+
+ final long[] totalKeys = new long[numberOfThreads];
+ final long[] totalSeeds = new long[numberOfThreads];
+ final long[] totalDropped = new long[numberOfThreads];
+
+ final Single<Long> nextFreeIndex = new Single<>(1L);
+
+ // launch the worker threads
+ for (int i = 0; i < numberOfThreads; i++) {
+ final int threadNumber = i;
+
+ executor.execute(new Runnable() {
+ public void run() {
+ try {
+ for (long index = threadNumber; index < tableSize; index += numberOfThreads) {
+ final long count = tableIndex[(int) index]; // here count is number of seeds that will be saved for given index
+
+ if (count > maxHitsPerHash) {
+ tableIndex[(int) index] = 0L; // need to overwrite the count
+ totalDropped[threadNumber] += count;
+ } else if (count > 1) {
+ totalSeeds[threadNumber] += count;
+ totalKeys[threadNumber]++;
+ synchronized (nextFreeIndex) {
+ final long location = nextFreeIndex.get();
+ tableIndex[(int) index] = location;
+ nextFreeIndex.set(location + 2 * count + 1);
+ }
+ } else if (count == 1) { // will write refInd and offset directly into table, use value of -1 to indicate this
+ totalSeeds[threadNumber]++;
+ totalKeys[threadNumber]++;
+ tableIndex[(int) index] = -1L;
+ } else if (count < 0)
+ throw new IOException("negative count: " + count);
+ countsForProgress[threadNumber]++;
+ }
+ } catch (Exception ex) {
+ Basic.caught(ex);
+ System.exit(1);
+ } finally {
+ countDownLatch.countDown();
+ }
+ }
+ });
+ }
+
+ // wait for jobs to complete:
+ while (countDownLatch.getCount() > 0) {
+ try {
+ Thread.sleep(100); // sleep and then report progress
+ } catch (InterruptedException e) {
+ Basic.caught(e);
+ break;
+ }
+ progressPercentage.setProgress(Basic.getSum(countsForProgress));
+ }
+ progressPercentage.reportTaskCompleted();
+ System.err.println(String.format("Total keys used: %12d", Basic.getSum(totalKeys)));
+ System.err.println(String.format("Total seeds matched:%12d", Basic.getSum(totalSeeds)));
+ System.err.println(String.format("Total seeds dropped:%12d", Basic.getSum(totalDropped)));
+ // shut down threads:
+ executor.shutdownNow();
+ return nextFreeIndex.get();
+ }
+
+
+ /**
+ * Fill the hash table
+ *
+ * @param referencesDB
+ * @param numberOfThreads0
+ */
+ private void fillTable(final ReferencesDBBuilder referencesDB, int numberOfThreads0) {
+ final int numberOfThreads = Math.min(referencesDB.getNumberOfSequences(), numberOfThreads0);
+ final ExecutorService executor = Executors.newFixedThreadPool(numberOfThreads);
+
+ // populate the table
+ final ProgressPercentage progressPercentage = new ProgressPercentage("Filling hash table...", referencesDB.getNumberOfSequences());
+ final CountDownLatch countDownLatch = new CountDownLatch(numberOfThreads);
+ final int[] countsForProgress = new int[numberOfThreads];
+ final long[] counts = new long[numberOfThreads];
+
+ // launch the worker threads
+ for (int i = 0; i < numberOfThreads; i++) {
+ final int threadNumber = i;
+
+ executor.execute(new Runnable() {
+ public void run() {
+ try {
+ final byte[] seedBytes = seedShape.createBuffer();
+ for (int refIndex = threadNumber; refIndex < referencesDB.getNumberOfSequences(); refIndex += numberOfThreads) {
+ final byte[] sequence = referencesDB.getSequence(refIndex);
+ final int top = sequence.length - seedShape.getLength() + 1;
+ for (int pos = 0; pos < top; pos += stepSize) {
+ seedShape.getSeed(sequence, pos, seedBytes);
+ if (!Utilities.hasAtMostTwoLetters(seedBytes)) {
+ final int hashValue = getHash(seedBytes);
+
+ synchronized (syncObjects[hashValue & SYNC_BITMASK]) {
+ final long location = tableIndex[hashValue];
+ if (location == -1) { // has been marked as singleton, so store value directly
+ final long value = -(((long) refIndex << 32) | pos);
+ tableIndex[hashValue] = value;
+ } else if (location > 0) {
+ final int length = tableDataPutter.get(location);
+ tableDataPutter.put(location, length + 2);
+ tableDataPutter.put(location + length + 1, refIndex);
+ tableDataPutter.put(location + length + 2, pos);
+ }
+ }
+ counts[threadNumber]++;
+ }
+ }
+ countsForProgress[threadNumber]++;
+ }
+ } finally {
+ countDownLatch.countDown();
+ }
+ }
+ });
+ }
+
+ // wait for jobs to complete:
+ while (countDownLatch.getCount() > 0) {
+ try {
+ Thread.sleep(100); // sleep and then report progress
+ } catch (InterruptedException e) {
+ Basic.caught(e);
+ break;
+ }
+ progressPercentage.setProgress(Basic.getSum(countsForProgress));
+ }
+ progressPercentage.reportTaskCompleted();
+
+ // shut down threads:
+ executor.shutdownNow();
+
+ theSize = Basic.getSum(counts);
+ }
+
+ /**
+ * randomize the rows of the table, parallel version
+ *
+ * @param numberOfThreads
+ */
+ private void randomizeBuildRows(final int numberOfThreads) {
+ final ProgressPercentage progressPercentage = new ProgressPercentage("Randomizing rows...", tableSize);
+
+ final ExecutorService executor = Executors.newFixedThreadPool(numberOfThreads);
+ final CountDownLatch countDownLatch = new CountDownLatch(numberOfThreads);
+ final int[] countsForProgress = new int[numberOfThreads];
+
+ // launch the worker threads
+ for (int i = 0; i < numberOfThreads; i++) {
+ final int threadNumber = i;
+ executor.execute(new Runnable() {
+ public void run() {
+ try {
+ final Random random = new Random();
+ for (long index = threadNumber; index < tableSize; index += numberOfThreads) { // need to use long otherwise can get overflow
+ if (index < tableIndex.length) {
+ long location = tableIndex[(int) index];
+ if (location > 0) {
+ int size = tableDataPutter.get(location);
+ if (size > 2) {
+ random.setSeed(index * index); // use location in hash table as seed.
+ Utilities.randomizePairs(tableDataPutter, location + 1, size, random);
+ }
+ }
+ }
+ countsForProgress[threadNumber]++;
+ }
+ } catch (Exception ex) {
+ Basic.caught(ex);
+ } finally {
+ countDownLatch.countDown();
+ }
+ }
+ });
+ }
+
+ // wait for all tasks to be completed:
+ while (countDownLatch.getCount() > 0) {
+ try {
+ Thread.sleep(100); // sleep and then report progress
+ } catch (InterruptedException e) {
+ Basic.caught(e);
+ break;
+ }
+ progressPercentage.setProgress(Basic.getSum(countsForProgress));
+ }
+ progressPercentage.reportTaskCompleted();
+
+ // shut down threads:
+ executor.shutdownNow();
+ }
+
+
+ /**
+ * for a given key, add the reference id and sequence offset to table
+ * uses very naive synchronization
+ *
+ * @param key
+ * @return hash value
+ */
+ public int getHash(byte[] key) {
+ int value = MurmurHash3.murmurhash3x8632(key, 0, key.length, randomNumberSeed) & hashMask; // & also removes negative sign
+
+ if (value >= Basic.MAX_ARRAY_SIZE)
+ value %= Basic.MAX_ARRAY_SIZE;
+ return value;
+ }
+
+ /**
+ * get the number of entries
+ *
+ * @return number of entries
+ */
+ public long size() {
+ return theSize;
+ }
+
+ /**
+ * save master index file
+ *
+ * @param file
+ * @throws IOException
+ */
+ public void saveIndexFile(File file) throws IOException {
+ final ProgressPercentage progressPercentage = new ProgressPercentage("Writing file: " + file);
+
+ try (OutputWriter outs = new OutputWriter(file)) {
+ outs.write(MAGIC_NUMBER, 0, MAGIC_NUMBER.length);
+ outs.writeInt(SequenceType.rankOf(referenceSequenceType));
+ if (referenceSequenceType == SequenceType.Protein) {
+ final byte[] bytes = alphabet.toString().getBytes();
+ outs.writeInt(bytes.length);
+ outs.write(bytes, 0, bytes.length);
+ }
+ outs.writeInt(tableSize);
+ outs.writeInt(hashMask);
+ outs.writeInt(randomNumberSeed);
+ outs.writeLong(theSize);
+ outs.writeInt(stepSize);
+
+ final byte[] shapeBytes = seedShape.getBytes();
+ outs.writeInt(shapeBytes.length);
+ outs.write(shapeBytes, 0, shapeBytes.length);
+ } finally {
+ progressPercentage.reportTaskCompleted();
+
+ }
+ }
+
+ /**
+ * make sure that we can write the files
+ *
+ * @param indexDirectory
+ * @throws IOException
+ */
+ public static void checkCanWriteFiles(String indexDirectory, int tableNumber) throws IOException {
+ final File indexFile = new File(indexDirectory, "index" + tableNumber + ".idx");
+ if ((!indexFile.exists() || indexFile.delete()) && !indexFile.createNewFile())
+ throw new IOException("Can't create file: " + indexFile);
+ final File tableIndexFile = new File(indexDirectory, "table" + tableNumber + ".idx");
+ if ((!tableIndexFile.exists() || tableIndexFile.delete()) && !tableIndexFile.createNewFile())
+ throw new IOException("Can't create file: " + tableIndexFile);
+ final File tableDBFile = new File(indexDirectory, "table" + tableNumber + ".db");
+ if ((!tableDBFile.exists() || tableDBFile.delete()) && !tableDBFile.createNewFile())
+ throw new IOException("Can't create file: " + tableDBFile);
+ }
+
+}
diff --git a/src/malt/data/Row.java b/src/malt/data/Row.java
new file mode 100644
index 0000000..f03a7f9
--- /dev/null
+++ b/src/malt/data/Row.java
@@ -0,0 +1,117 @@
+/**
+ * Row.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.data;
+
+/**
+ * a row of numbers that are stored in a larger array.
+ * Daniel Huson, 8.2014
+ */
+
+public class Row {
+ private int size;
+ private int offset;
+ private int[] containingArray;
+
+ public int[] tmpArray = new int[10000]; // // todo temporary array used during implementation of memory mapped index
+
+ /**
+ * constructor
+ */
+ public Row() {
+ }
+
+ /**
+ * Set the row. array[offset] must contain size, i.e. the number of integers to be used
+ * array[offset+1]... array[offset+size-1] are the numbers
+ *
+ * @param array array containing size followed by entries
+ * @param offset location of size entry in array
+ */
+ public void set(int[] array, int offset) {
+ this.size = array[offset];
+ this.containingArray = array;
+ this.offset = offset + 1;
+ }
+
+ /**
+ * setting a single pair of numbers
+ *
+ * @param refId
+ * @param position
+ */
+ public void setPair(int refId, int position) {
+ size = 2;
+ offset = 0;
+ tmpArray[0] = refId;
+ tmpArray[1] = position;
+ containingArray = tmpArray;
+ }
+
+ /**
+ * set to empty
+ */
+ public void setEmpty() {
+ size = 0;
+ }
+
+ /**
+ * gets the number of int in this row
+ *
+ * @return size
+ */
+ public int size() {
+ return size;
+ }
+
+ /**
+ * use this to access numbers 0,..,size-1
+ *
+ * @param index
+ * @return item
+ */
+ public int get(int index) {
+ return containingArray[offset + index];
+ }
+
+ /**
+ * get offset at which numbers start (position of size entry plus 1)
+ *
+ * @return offset
+ */
+ public int getOffset() {
+ return offset;
+ }
+
+ /**
+ * get string representation
+ *
+ * @return
+ */
+ public String toString() {
+ if (size > 0) {
+ final StringBuilder buf = new StringBuilder();
+ buf.append("(").append(size()).append("): ");
+ for (int i = 0; i < size(); i += 2)
+ buf.append(" ").append(get(i)).append("/").append(get(i + 1));
+ return buf.toString();
+ } else
+ return "null";
+ }
+}
diff --git a/src/malt/data/SeedMatch.java b/src/malt/data/SeedMatch.java
new file mode 100644
index 0000000..952c918
--- /dev/null
+++ b/src/malt/data/SeedMatch.java
@@ -0,0 +1,140 @@
+/**
+ * SeedMatch.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.data;
+
+/**
+ * A seed match as used in the inner loop of the alignment engine
+ * Daniel Huson, 8.2014
+ */
+
+import java.util.Comparator;
+
+/**
+ * a seed match, consisting of a location in a query and in a reference
+ */
+public class SeedMatch {
+ private int queryOffset;
+ private int referenceOffset;
+ private int rank; // rank of frame. Frame is given by frame[rank]
+ private int seedLength;
+
+ static private Comparator<SeedMatch> comparator = new Comparator<SeedMatch>() {
+ public int compare(SeedMatch a, SeedMatch b) {
+ if (a.queryOffset < b.queryOffset)
+ return -1;
+ else if (a.queryOffset > b.queryOffset)
+ return 1;
+ else if (a.referenceOffset < b.referenceOffset)
+ return -1;
+ else if (a.referenceOffset > b.referenceOffset)
+ return 1;
+ else if (a.rank < b.rank)
+ return -1;
+ else if (a.rank > b.rank)
+ return 1;
+ else if (a.seedLength < b.seedLength)
+ return -1;
+ else if (a.seedLength > b.seedLength)
+ return 1;
+ else
+ return 0;
+ }
+ };
+
+ /**
+ * constructor
+ */
+ SeedMatch() {
+ }
+
+ /**
+ * set the seed match
+ *
+ * @param queryOffset
+ * @param referenceOffset
+ * @param rank
+ * @return this
+ */
+ public SeedMatch set(int queryOffset, int referenceOffset, int rank, int seedLength) {
+ this.queryOffset = queryOffset;
+ this.referenceOffset = referenceOffset;
+ this.rank = rank;
+ this.seedLength = seedLength;
+ return this;
+ }
+
+ public int getRank() {
+ return rank;
+ }
+
+ public int getQueryOffset() {
+ return queryOffset;
+ }
+
+ public int getReferenceOffset() {
+ return referenceOffset;
+ }
+
+ public int getSeedLength() {
+ return seedLength;
+ }
+
+ public String toString() {
+ return queryOffset + "/" + referenceOffset;
+ }
+
+ /**
+ * compare first by query position and then by reference position
+ *
+ * @return comparator
+ */
+ static public Comparator<SeedMatch> getComparator() {
+ return comparator;
+ }
+
+ /**
+ * determines whether this seed follows the previous one. It is deemed to follow, if on the same diagonal +-3
+ *
+ * @param prev
+ * @return true if prev not null and in same frame and similar coordinates
+ */
+ public boolean follows(SeedMatch prev) {
+ return prev != null && prev.rank == rank && Math.abs((referenceOffset - queryOffset) - (prev.referenceOffset - prev.queryOffset)) < 3;
+ }
+
+ /**
+ * resize array
+ *
+ * @param array
+ * @return new array
+ */
+ public static SeedMatch[] resizeAndConstructEntries(SeedMatch[] array, int newSize) {
+ SeedMatch[] result = new SeedMatch[newSize];
+ if (array == null) {
+ for (int i = 0; i < newSize; i++)
+ result[i] = new SeedMatch();
+ } else {
+ for (int i = array.length; i < newSize; i++)
+ result[i] = new SeedMatch();
+ System.arraycopy(array, 0, result, 0, Math.min(newSize, array.length));
+ }
+ return result;
+ }
+}
diff --git a/src/malt/data/SeedShape.java b/src/malt/data/SeedShape.java
new file mode 100644
index 0000000..d321e37
--- /dev/null
+++ b/src/malt/data/SeedShape.java
@@ -0,0 +1,214 @@
+/**
+ * SeedShape.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.data;
+
+import java.io.IOException;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * seed shape
+ * Daniel Huson, 8.2014
+ */
+public class SeedShape {
+ private final String shape;
+ private final int[] map;
+ private final int length;
+ private final int weight;
+ private final IAlphabet alphabet;
+ private int jumpToFirstZero = -1;
+
+ // Source for all seed patterns: Ilie et al. BMC Genomics 2011, 12:280 http://www.biomedcentral.com/1471-2164/12/280
+ public static final String SINGLE_DNA_SEED = "111110111011110110111111";
+ public static final String SINGLE_PROTEIN_SEED = "111101101110111";
+ public static final String[] PROTEIN_SEEDS = new String[]{"111101101110111", "1111000101011001111", "11101001001000100101111", "11101001000010100010100111"};
+ private int id; // id is 0,..,number of seed shapes-1
+
+ /**
+ * constructor
+ *
+ * @param shape
+ * @throws IOException
+ */
+ public SeedShape(IAlphabet alphabet, String shape) throws IOException {
+ this(alphabet, shape.getBytes());
+ }
+
+ /**
+ * constructor
+ *
+ * @param shapeBytes
+ * @throws IOException
+ */
+ public SeedShape(IAlphabet alphabet, byte[] shapeBytes) throws IOException {
+ this.alphabet = alphabet;
+ StringBuilder buf = new StringBuilder();
+ for (byte a : shapeBytes) buf.append((char) a);
+ this.shape = buf.toString();
+ int pos = 0;
+ List<Integer> list = new LinkedList<>();
+ for (int i = 0; i < shapeBytes.length; i++) {
+ byte a = shapeBytes[i];
+ switch (a) {
+ case '0':
+ if (jumpToFirstZero == -1)
+ jumpToFirstZero = i;
+ break;
+ case '1':
+ list.add(pos);
+ break;
+ default:
+ throw new IOException("Illegal character '" + (char) a + "' in shape: " + this.shape);
+ }
+ pos++;
+ }
+ length = shapeBytes.length;
+ weight = list.size();
+ map = new int[weight];
+ int i = 0;
+ for (Integer value : list) {
+ map[i++] = value;
+ }
+ // System.err.println("Seed='" + toString()+"', length: " + getMaxIndex() + ", weight: " + getWeight());
+ // System.err.println("Map: " + Basic.toString(map, ","));
+ }
+
+ /**
+ * gets a spaced seed from the given sequence starting at the given offset
+ *
+ * @param sequence
+ * @param offset
+ * @param result if non-null, is used for result
+ * @return spaced seed
+ */
+ public byte[] getSeed(byte[] sequence, int offset, byte[] result) {
+ if (result == null)
+ result = new byte[weight];
+ for (int i = 0; i < weight; i++) {
+ result[i] = alphabet.getNormalized(sequence[offset + map[i]]);
+ }
+
+ // String seq=new String(sequence).substring(offset,offset+length);
+ // System.err.println("Sequence: "+seq+": seed: "+new String(result));
+ return result;
+ }
+
+ /**
+ * are query and reference equalOverShorterOfBoth sequences at given offset for the given seed shape?
+ *
+ * @param query
+ * @param qOffset
+ * @param reference
+ * @param rOffset
+ * @return true if equalOverShorterOfBoth for seed shape
+ */
+ public boolean equalSequences(byte[] query, int qOffset, byte[] reference, int rOffset) {
+ for (int i = 0; i < weight; i++) {
+ if (!alphabet.equal(query[qOffset + map[i]], reference[rOffset + map[i]])) // sequences are normalized, so ok to compare directly
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * string representation of shaped seed
+ *
+ * @return string
+ */
+ public String toString() {
+ return shape;
+ }
+
+ /**
+ * get bytes
+ *
+ * @return string as bytes
+ */
+ public byte[] getBytes() {
+ return shape.getBytes();
+ }
+
+ /**
+ * length of spaced seed
+ *
+ * @return length
+ */
+ public int getLength() {
+ return length;
+ }
+
+ /**
+ * weight of spaced seed
+ *
+ * @return weight
+ */
+ public int getWeight() {
+ return weight;
+ }
+
+ /**
+ * create correct size byte array for holding seed results
+ *
+ * @return bytes
+ */
+ public byte[] createBuffer() {
+ return new byte[getWeight()];
+ }
+
+ public IAlphabet getAlphabet() {
+ return alphabet;
+ }
+
+ /**
+ * compute the number of positions to jump over to get to first 0
+ *
+ * @return number of ones before first zero
+ */
+ public int getJumpToFirstZero() {
+ return jumpToFirstZero;
+ }
+
+ /**
+ * gets the expected number of seeds
+ *
+ * @param numberOfSequences
+ * @param numberOfLetters
+ * @return expected number of seeds
+ */
+ public long getEstimatedSeedCount(int numberOfSequences, long numberOfLetters, int numberOfJobs) {
+ return Math.max(1, numberOfLetters - numberOfSequences * (weight - 1)) / numberOfJobs;
+ }
+
+ public void setId(int id) {
+ this.id = id;
+ }
+
+ public int getId() {
+ return id;
+ }
+
+ public boolean[] getMask() {
+ boolean[] mask = new boolean[shape.length()];
+ for (int i = 0; i < shape.length(); i++)
+ if (shape.charAt(i) == '1')
+ mask[i] = true;
+ return mask;
+ }
+}
diff --git a/src/malt/data/SequenceType.java b/src/malt/data/SequenceType.java
new file mode 100644
index 0000000..a6bca7a
--- /dev/null
+++ b/src/malt/data/SequenceType.java
@@ -0,0 +1,65 @@
+/**
+ * SequenceType.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.data;
+
+/**
+ * sequence type
+ * Daniel Huson, 8.2014
+ */
+public enum SequenceType {
+ DNA, // DNA sequence
+ Protein; // protein sequence
+
+ /**
+ * get rank
+ *
+ * @param sequenceType
+ * @return rank
+ */
+ public static int rankOf(SequenceType sequenceType) {
+ for (int i = 0; i < values().length; i++)
+ if (values()[i] == sequenceType)
+ return i;
+ return -1;
+ }
+
+ /**
+ * get type from rank
+ *
+ * @param rank
+ * @return
+ */
+ public static SequenceType valueOf(int rank) {
+ return values()[rank];
+ }
+
+ /**
+ * get value ignoring case
+ *
+ * @param label
+ * @return value
+ */
+ public static SequenceType valueOfIgnoreCase(String label) {
+ for (SequenceType type : values())
+ if (label.equalsIgnoreCase(type.toString()))
+ return type;
+ return null;
+ }
+}
diff --git a/src/malt/data/Translator.java b/src/malt/data/Translator.java
new file mode 100644
index 0000000..545bc3d
--- /dev/null
+++ b/src/malt/data/Translator.java
@@ -0,0 +1,90 @@
+/**
+ * Translator.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.data;
+
+import jloda.util.SequenceUtils;
+
+/**
+ * translate DNA sequences into protein sequences
+ * Daniel Huson, 8.2014
+ */
+public class Translator {
+ /**
+ * translate a given DNA sequence into protein sequences
+ *
+ * @param doForward
+ * @param doReverse
+ * @param dnaSequence
+ * @param length
+ * @param frame
+ * @param proteinSequences
+ * @param proteinLengths
+ * @return number of sequences returned
+ */
+ public static int getBestFrames(boolean doForward, boolean doReverse, byte[] dnaSequence, int length, int[] frame, byte[][] proteinSequences, int[] proteinLengths) {
+ int numberOfResults = 0;
+ for (int shift = 0; shift <= 2; shift++) {
+ if (doForward) {
+ int posProteins = 0;
+ for (int pos = shift; pos < length - 2; pos += 3) {
+ proteinSequences[numberOfResults][posProteins++] = SequenceUtils.getAminoAcid(dnaSequence, pos);
+ }
+ if (isPossibleCodingSequence(proteinSequences[numberOfResults], posProteins)) {
+ proteinLengths[numberOfResults] = posProteins;
+ frame[numberOfResults] = shift + 1;
+ numberOfResults++;
+ }
+ }
+ if (doReverse) {
+ int posProteins = 0;
+ for (int pos = length - 3 - shift; pos >= 0; pos -= 3) {
+ proteinSequences[numberOfResults][posProteins++] = SequenceUtils.getAminoAcidReverse(dnaSequence, pos);
+ }
+ if (isPossibleCodingSequence(proteinSequences[numberOfResults], posProteins)) {
+ proteinLengths[numberOfResults] = posProteins;
+ frame[numberOfResults] = -(shift + 1);
+ numberOfResults++;
+ }
+ }
+ }
+ return numberOfResults;
+ }
+
+ /**
+ * heuristically determine whether this looks like real coding sequence
+ *
+ * @param sequence
+ * @param length
+ * @return true, if there is a stop-free run of at least 20 amino acids or if whole sequence is stop-free
+ */
+ private static boolean isPossibleCodingSequence(byte[] sequence, int length) {
+ int nonStopRun = 0;
+ for (int i = 0; i < length; i++) {
+ if (sequence[i] == '*')
+ nonStopRun = 0;
+ else {
+ nonStopRun++;
+ if (nonStopRun == 20)
+ return true;
+ }
+ }
+ return nonStopRun == length;
+ }
+}
diff --git a/src/malt/genes/GeneItem.java b/src/malt/genes/GeneItem.java
new file mode 100644
index 0000000..c92463f
--- /dev/null
+++ b/src/malt/genes/GeneItem.java
@@ -0,0 +1,189 @@
+/**
+ * GeneItem.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.genes;
+
+import jloda.util.Basic;
+import megan.io.OutputWriter;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+
+/**
+ * a gene item
+ * Daniel Huson, 8.2014
+ */
+public class GeneItem {
+ private long giNumber;
+ private byte[] product;
+ private byte[] geneName;
+ private byte[] proteinId;
+ private byte[] keggId;
+ private byte[] cogId;
+
+ public GeneItem() {
+ }
+
+ public long getGiNumber() {
+ return giNumber;
+ }
+
+ public void setGiNumber(long giNumber) {
+ this.giNumber = giNumber;
+ }
+
+ public byte[] getProduct() {
+ return product;
+ }
+
+ public void setProduct(byte[] product) {
+ this.product = product;
+ }
+
+ public byte[] getGeneName() {
+ return geneName;
+ }
+
+ public void setGeneName(byte[] geneName) {
+ this.geneName = geneName;
+ }
+
+ public byte[] getProteinId() {
+ return proteinId;
+ }
+
+ public void setProteinId(byte[] proteinId) {
+ this.proteinId = proteinId;
+ }
+
+ public byte[] getKeggId() {
+ return keggId;
+ }
+
+ public void setKeggId(byte[] keggId) {
+ this.keggId = keggId;
+ }
+
+ public byte[] getCogId() {
+ return cogId;
+ }
+
+ public void setCogId(byte[] cogId) {
+ this.cogId = cogId;
+ }
+
+ public String toString() {
+ return "gene=" + (geneName == null ? "null" : Basic.toString(geneName))
+ + " gi=" + giNumber
+ + ", product=" + (product == null ? "null" : Basic.toString(product))
+ + ", proteinId=" + (proteinId == null ? "null" : Basic.toString(proteinId))
+ + ", keggId=" + (keggId == null ? "null" : Basic.toString(keggId))
+ + ", cogId=" + (cogId == null ? "null" : Basic.toString(cogId));
+ }
+
+ /**
+ * write
+ *
+ * @param outs
+ * @throws java.io.IOException
+ */
+ public void write(OutputWriter outs) throws IOException {
+ outs.writeLong(giNumber);
+ if (product == null || product.length == 0)
+ outs.writeInt(0);
+ else {
+ outs.writeInt(product.length);
+ outs.write(product, 0, product.length);
+ }
+ if (geneName == null || geneName.length == 0)
+ outs.writeInt(0);
+ else {
+ outs.writeInt(geneName.length);
+ outs.write(geneName, 0, geneName.length);
+ }
+ if (proteinId == null || proteinId.length == 0)
+ outs.writeInt(0);
+ else {
+ outs.writeInt(proteinId.length);
+ outs.write(proteinId, 0, proteinId.length);
+ }
+ if (keggId == null || keggId.length == 0)
+ outs.writeInt(0);
+ else {
+ outs.writeInt(keggId.length);
+ outs.write(keggId, 0, keggId.length);
+ }
+ if (cogId == null || cogId.length == 0)
+ outs.writeInt(0);
+ else {
+ outs.writeInt(cogId.length);
+ outs.write(cogId, 0, cogId.length);
+ }
+ }
+
+ /**
+ * read
+ *
+ * @param ins
+ * @throws IOException
+ */
+ public void read(DataInputStream ins) throws IOException {
+ giNumber = ins.readLong();
+ int length = ins.readInt();
+ if (length == 0)
+ product = null;
+ else {
+ product = new byte[length];
+ if (ins.read(product, 0, length) != length)
+ throw new IOException("read failed");
+ }
+ length = ins.readInt();
+ if (length == 0)
+ geneName = null;
+ else {
+ geneName = new byte[length];
+ if (ins.read(geneName, 0, length) != length)
+ throw new IOException("read failed");
+ }
+ length = ins.readInt();
+ if (length == 0)
+ proteinId = null;
+ else {
+ proteinId = new byte[length];
+ if (ins.read(proteinId, 0, length) != length)
+ throw new IOException("read failed");
+ }
+ length = ins.readInt();
+ if (length == 0)
+ keggId = null;
+ else {
+ keggId = new byte[length];
+ if (ins.read(keggId, 0, length) != length)
+ throw new IOException("read failed");
+ }
+ length = ins.readInt();
+ if (length == 0)
+ cogId = null;
+ else {
+ cogId = new byte[length];
+ if (ins.read(cogId, 0, length) != length)
+ throw new IOException("read failed");
+ }
+ }
+}
diff --git a/src/malt/genes/GeneTableAccess.java b/src/malt/genes/GeneTableAccess.java
new file mode 100644
index 0000000..37540d9
--- /dev/null
+++ b/src/malt/genes/GeneTableAccess.java
@@ -0,0 +1,209 @@
+/**
+ * GeneTableAccess.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.genes;
+
+import jloda.util.*;
+import malt.analysis.QueryItem;
+import malt.analysis.ReadMatchItem;
+import malt.data.ReadMatch;
+import net.sf.picard.util.IntervalTree;
+
+import java.io.*;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.Map;
+
+/**
+ * class used to access gene table
+ * Daniel Huson, 8.2014
+ */
+public class GeneTableAccess {
+ private final IntervalTree<GeneItem>[] refIndex2IntervalsTable;
+
+ final static private Comparator<ReadMatchItem>
+ unweightedComparator = new Comparator<ReadMatchItem>() {
+ public int compare(ReadMatchItem a, ReadMatchItem b) {
+ if (a.score > b.score)
+ return -1;
+ else if (a.score < b.score)
+ return 1;
+ else
+ return 0;
+ }
+ };
+
+
+ /**
+ * construct the gene table from the gene-table index file
+ *
+ * @param inputFile
+ * @throws IOException
+ */
+ public GeneTableAccess(File inputFile) throws IOException {
+
+ DataInputStream ins = new DataInputStream(new BufferedInputStream(new FileInputStream(inputFile)));
+
+ Basic.readAndVerifyMagicNumber(ins, GeneTableBuilder.MAGIC_NUMBER);
+
+ int tableLength = ins.readInt();
+ ProgressPercentage progress = new ProgressPercentage("Reading file: " + inputFile, tableLength);
+
+ long numberOfGeneLocations = 0;
+ refIndex2IntervalsTable = new IntervalTree[tableLength];
+
+ for (int refIndex = 0; refIndex < tableLength; refIndex++) {
+ int intervalsLength = ins.readInt();
+ if (intervalsLength > 0) {
+ IntervalTree<GeneItem> intervals = new IntervalTree<>();
+ for (int i = 0; i < intervalsLength; i++) {
+ int start = ins.readInt();
+ int end = ins.readInt();
+ GeneItem geneItem = new GeneItem();
+ geneItem.read(ins);
+ intervals.put(start, end, geneItem);
+ //System.err.println(refIndex+"("+start+"-"+end+") -> "+geneItem);
+ numberOfGeneLocations++;
+ }
+ refIndex2IntervalsTable[refIndex] = intervals;
+ }
+ progress.incrementProgress();
+ }
+ progress.close();
+ System.err.println("Number of gene locations: " + numberOfGeneLocations);
+ }
+
+ /**
+ * get genes associated with this read. Genes are reported by decreasing weight of the reference sequence and then decreasing bit score
+ *
+ * @param refIndex2weight
+ * @param matches
+ * @param genes
+ * @return number of genes returned in array "genes"
+ */
+ public int getGenes(final Map<Integer, Integer> refIndex2weight, final ReadMatchItem[] matches, GeneItem[] genes) {
+
+ ReadMatchItem[] sorted = new ReadMatchItem[matches.length];
+ System.arraycopy(matches, 0, sorted, 0, matches.length);
+
+ // sort matches by decreasing weight of reference sequence:
+ if (refIndex2weight != null) {
+ Arrays.sort(sorted, new Comparator<ReadMatchItem>() {
+ public int compare(ReadMatchItem a, ReadMatchItem b) {
+ Integer aWeight = refIndex2weight.get(a.refIndex);
+ Integer bWeight = refIndex2weight.get(b.refIndex);
+ if (aWeight != null) {
+ if (bWeight == null || bWeight < aWeight)
+ return -1;
+ else if (bWeight > aWeight)
+ return 1;
+ else {
+ if (a.score > b.score)
+ return -1;
+ else if (a.score < b.score)
+ return 1;
+ else
+ return 0;
+ }
+ } else if (bWeight != null)
+ return 1;
+ else { // both references have zero weight
+ if (a.score > b.score)
+ return -1;
+ else if (a.score < b.score)
+ return 1;
+ else
+ return 0;
+ }
+ }
+ });
+ } else {
+ Arrays.sort(sorted, unweightedComparator);
+ }
+
+ int numberOfGenes = 0;
+ loop:
+ for (ReadMatchItem match : sorted) {
+ if (match.refIndex < refIndex2IntervalsTable.length) {
+ IntervalTree<GeneItem> intervals = refIndex2IntervalsTable[match.refIndex];
+ if (intervals != null) {
+ for (Iterator<IntervalTree.Node<GeneItem>> it = intervals.iterator(match.refStart, match.refEnd); it.hasNext(); ) {
+ IntervalTree.Node<GeneItem> node = it.next();
+ genes[numberOfGenes++] = node.getValue();
+ if (numberOfGenes == genes.length)
+ break loop;
+ }
+ }
+ }
+ }
+ return numberOfGenes;
+ }
+
+ /**
+ * gets the KEGG id
+ *
+ * @param numberOfMatches
+ * @param readMatches
+ * @return kegg id or 0
+ */
+ public int getKegg(final int numberOfMatches, final ReadMatch[] readMatches) {
+ if (numberOfMatches > 0) {
+ final GeneItem[] genes = new GeneItem[100];
+ final QueryItem queryItem = new QueryItem(null, numberOfMatches, readMatches);
+ int numberOfGenes = getGenes(null, queryItem.getReadMatchItems(), genes);
+ for (int i = 0; i < numberOfGenes; i++) {
+ if (genes[i].getKeggId() != null)
+ return Basic.parseInt(Basic.toString(genes[i].getKeggId()));
+ }
+ }
+ return 0;
+ }
+
+ /**
+ * dump gene table to standard out
+ *
+ * @param args
+ */
+ public static void main(String[] args) throws IOException, UsageException, CanceledException {
+ args = new String[]{"-i", "/Users/huson/data/ma/index/gene-table.idx"};
+
+ final ArgsOptions options = new ArgsOptions(args, null, "GeneTableDump", "Dump gene table");
+ final String inputFile = options.getOptionMandatory("i", "input", "Gene table file", "index/gene-table.idx");
+ final String outputFile = options.getOption("o", "output", "Output file", Basic.replaceFileSuffix(inputFile, ".txt"));
+ options.done();
+
+ final GeneTableAccess geneTableAccess = new GeneTableAccess(new File(inputFile));
+
+ Writer w = new BufferedWriter(new FileWriter(outputFile));
+
+ for (int i = 0; i < geneTableAccess.refIndex2IntervalsTable.length; i++) {
+ final IntervalTree<GeneItem> tree = geneTableAccess.refIndex2IntervalsTable[i];
+ if (tree != null) {
+ w.write("RefIndex=" + i + "\n");
+ for (IntervalTree.Node<GeneItem> gene : tree) {
+ final GeneItem geneItem = gene.getValue();
+ w.write(geneItem.toString() + "\n");
+ }
+ w.write("----\n");
+ }
+ }
+ w.close();
+ }
+}
diff --git a/src/malt/genes/GeneTableBuilder.java b/src/malt/genes/GeneTableBuilder.java
new file mode 100644
index 0000000..6e0c910
--- /dev/null
+++ b/src/malt/genes/GeneTableBuilder.java
@@ -0,0 +1,467 @@
+/**
+ * GeneTableBuilder.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.genes;
+
+import jloda.util.Basic;
+import jloda.util.FileInputIterator;
+import jloda.util.ProgressPercentage;
+import malt.data.ReferencesDBBuilder;
+import megan.classification.ClassificationManager;
+import megan.classification.IdMapper;
+import megan.io.OutputWriter;
+import net.sf.picard.util.IntervalTree;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+
+/**
+ * Builds a table mapping reference indices and positions to genes
+ * Daniel Huson, 8.2014
+ */
+public class GeneTableBuilder {
+ final public static byte[] MAGIC_NUMBER = "MAGenesV0.3.".getBytes();
+
+ private final int numberOfSyncObjects = 1024;
+ private final Object[] syncObjects = new Object[numberOfSyncObjects]; // use lots of objects to synchronize on so that threads don't in each others way
+ private final IdMapper keggMapper;
+ private final IdMapper cogMapper;
+
+ /**
+ * constructor
+ *
+ * @throws IOException
+ */
+ public GeneTableBuilder() throws IOException {
+ // create the synchronization objects
+ for (int i = 0; i < numberOfSyncObjects; i++)
+ syncObjects[i] = new Object();
+
+ if (ClassificationManager.get("KEGG", false).getIdMapper().isActiveMap(IdMapper.MapType.GI))
+ keggMapper = ClassificationManager.get("KEGG", false).getIdMapper();
+ else
+ keggMapper = null;
+ if (ClassificationManager.get("EGGNOG", false).getIdMapper().isActiveMap(IdMapper.MapType.GI))
+ cogMapper = ClassificationManager.get("EGGNOG", false).getIdMapper();
+ else
+ cogMapper = null;
+ }
+
+ /**
+ * build and then save the gene table
+ *
+ * @param referencesDB
+ * @param inputTableFile
+ * @param indexFile
+ * @param numberOfThreads
+ * @throws IOException
+ */
+ public void buildAndSaveGeneTable(final ReferencesDBBuilder referencesDB, final String inputTableFile, final File indexFile, final int numberOfThreads) throws IOException {
+ System.err.println("Building gene table...");
+ Map<Long, Integer> gi2refIndex = computeGi2RefIndex(referencesDB, numberOfThreads);
+
+ IntervalTree<GeneItem>[] table = computeTable(referencesDB, gi2refIndex, inputTableFile, numberOfThreads);
+ gi2refIndex.clear();
+
+ writeTable(indexFile, table);
+ }
+
+ /**
+ * Compute the GI to references mapping
+ *
+ * @param referencesDB
+ * @param numberOfThreads
+ * @return gi to reference index mapping
+ */
+ private Map<Long, Integer> computeGi2RefIndex(final ReferencesDBBuilder referencesDB, final int numberOfThreads) {
+ final Map<Long, Integer> gi2refIndex = new HashMap<>(referencesDB.getNumberOfSequences(), 1f);
+
+ final ExecutorService executor = Executors.newFixedThreadPool(numberOfThreads);
+ final CountDownLatch countDownLatch = new CountDownLatch(numberOfThreads);
+
+ final ProgressPercentage progress = new ProgressPercentage("Mapping GI numbers to references...", referencesDB.getNumberOfSequences());
+
+ // launch the worker threads
+ for (int thread = 0; thread < numberOfThreads; thread++) {
+ final int threadNumber = thread;
+ executor.execute(new Runnable() {
+ public void run() {
+ try {
+ for (int refIndex = threadNumber + 1; refIndex <= referencesDB.getNumberOfSequences(); refIndex += numberOfThreads) {
+ long gi = parseGI(Basic.toString(referencesDB.getHeader(refIndex)));
+ if (gi > 0)
+ gi2refIndex.put(gi, refIndex);
+ progress.incrementProgress();
+ }
+ } catch (Exception ex) {
+ Basic.caught(ex);
+ System.exit(1); // just die...
+ } finally {
+ countDownLatch.countDown();
+ }
+ }
+ });
+ }
+
+ try {
+ countDownLatch.await(); // await completion of alignment threads
+ } catch (InterruptedException e) {
+ Basic.caught(e);
+ } finally {
+ // shut down threads:
+ executor.shutdownNow();
+ }
+ progress.close();
+ return gi2refIndex;
+ }
+
+ /**
+ * compute the gene location table
+ *
+ * @param referencesDB
+ * @param gi2refIndex
+ * @param geneTableFile
+ * @param numberOfThreads
+ * @return
+ * @throws FileNotFoundException
+ */
+ private IntervalTree<GeneItem>[] computeTable(final ReferencesDBBuilder referencesDB, final Map<Long, Integer> gi2refIndex, String geneTableFile, int numberOfThreads) throws IOException {
+ final IntervalTree<GeneItem>[] refIndex2Intervals = new IntervalTree[referencesDB.getNumberOfSequences() + 1]; // plus one because refindices start at 1
+ final FileInputIterator it = new FileInputIterator(geneTableFile);
+
+ final ProgressPercentage progress = new ProgressPercentage("Processing file: " + geneTableFile, (new File(geneTableFile)).length() / 100);
+
+ final ExecutorService executor = Executors.newFixedThreadPool(numberOfThreads);
+ final CountDownLatch countDownLatch = new CountDownLatch(numberOfThreads);
+
+ final long[] countLinesRead = new long[]{0L};
+ final long[] countLinesParsed = new long[numberOfThreads];
+
+ // launch the worker threads
+ for (int thread = 0; thread < numberOfThreads; thread++) {
+ final int threadNumber = thread;
+ executor.execute(new Runnable() {
+ public void run() {
+ try {
+ while (it.hasNext()) {
+ String aLine;
+ synchronized (refIndex2Intervals) {
+ if (it.hasNext()) {
+ aLine = it.next();
+ progress.setProgress(it.getProgress());
+ countLinesRead[0]++;
+ } else
+ return; // input has finished
+
+ }
+ if (processALine(aLine, referencesDB, gi2refIndex, refIndex2Intervals)) {
+ countLinesParsed[threadNumber]++;
+ }
+ }
+
+ } catch (Exception ex) {
+ Basic.caught(ex);
+ System.exit(1); // just die...
+ } finally {
+ countDownLatch.countDown();
+ }
+ }
+ });
+ }
+
+ try {
+ countDownLatch.await(); // await completion of alignment threads
+ } catch (InterruptedException e) {
+ Basic.caught(e);
+ } finally {
+ // shut down threads:
+ executor.shutdownNow();
+ }
+ it.close();
+ progress.close();
+ System.err.println("Lines parsed: " + Basic.getSum(countLinesParsed) + " of " + countLinesRead[0]);
+ return refIndex2Intervals;
+ }
+
+ /**
+ * processes a line of input and adds genes to appropriate interval tree
+ * todo: Format: reference-gi-number coordinates gene-gi-number protein-id gene-name product
+ *
+ * @param aLine
+ * @param gi2refIndex
+ * @param refIndex2Intervals
+ * @return true, if successfully parsed
+ */
+ private boolean processALine(String aLine, final ReferencesDBBuilder referencesDB, final Map<Long, Integer> gi2refIndex, final IntervalTree<GeneItem>[] refIndex2Intervals) {
+
+ String[] tokens = aLine.split("\t");
+ if (tokens.length == 6) {
+ try {
+ GeneItem geneItem = new GeneItem();
+ long referenceGi = Basic.parseLong(tokens[0].trim());
+ if (referenceGi == 0)
+ return false;
+ Integer refIndex = gi2refIndex.get(referenceGi);
+ if (refIndex == null || refIndex == 0)
+ return false;
+
+ int[] location;
+ if (tokens[1].equals("*")) { // a "*" indicates to use the whole sequence
+ location = new int[]{1, referencesDB.getSequence(refIndex).length};
+ } else {
+ location = parseLocations(tokens[1]);
+ if (location == null || location.length == 0)
+ return false; // no locations, skip
+ }
+ if (tokens[2].equals("*")) { // a "*" indicates to use the same GI number as reference
+ geneItem.setGiNumber(referenceGi);
+ } else
+ geneItem.setGiNumber(Basic.parseLong(tokens[2].trim()));
+ // set ko number:
+ if (keggMapper != null)
+ {
+ Integer ko = keggMapper.getIdFromGI(geneItem.getGiNumber());
+ if (ko != null && ko != 0) {
+ geneItem.setKeggId(String.format("K%05d", ko).getBytes());
+ // System.err.println("gi: "+geneItem.getGiNumber()+" ko: "+ko);
+ }
+ }
+ // set cog:
+ if (cogMapper != null)
+ {
+ Integer cog = cogMapper.getIdFromGI(geneItem.getGiNumber());
+ if (cog != null && cog != 0) {
+ String name = cogMapper.getName2IdMap().get(cog);
+ if (name != null)
+ geneItem.setCogId(name.getBytes());
+ }
+ }
+
+ if (tokens.length > 3)
+ geneItem.setProteinId(tokens[3].trim().getBytes());
+ if (tokens.length > 4)
+ geneItem.setGeneName(tokens[4].trim().getBytes());
+ if (tokens.length > 5)
+ geneItem.setProduct(tokens[5].trim().getBytes());
+
+ if (geneItem.getGiNumber() == 0 && geneItem.getProteinId().length == 0 && (geneItem.getGeneName() == null || geneItem.getGeneName().length == 0)
+ && (geneItem.getProduct() == null || geneItem.getProduct().length == 0))
+ return false; // no info, skip
+
+ synchronized (syncObjects[refIndex % numberOfSyncObjects]) {
+ IntervalTree<GeneItem> intervals = refIndex2Intervals[refIndex];
+ if (intervals == null) {
+ intervals = new IntervalTree<>();
+ refIndex2Intervals[refIndex] = intervals;
+ }
+ int start = location[0];
+ int end = location[1];
+ if (start > 0 && end >= start + 50) {
+ int length = end - start + 1;
+ if (length >= 20 && length <= 500000)
+ intervals.put(start, end, geneItem);
+ else
+ System.err.println("Unrealistic gene coordinates: " + start + " - " + end + ", length= "
+ + (end - start + 1) + " for gi number=" + referenceGi);
+ }
+
+ if (location.length == 4) {
+ start = location[2];
+ end = location[3];
+ if (start > 0 && end >= start + 50) {
+ int length = end - start + 1;
+ if (length >= 20 && length <= 500000)
+ intervals.put(start, end, geneItem);
+ else
+ System.err.println("Unrealistic gene coordinates: " + start + " - " + end + ", length= "
+ + (end - start + 1) + " for gi number=" + referenceGi);
+ }
+ }
+ return location.length > 0;
+ }
+ } catch (Exception ex) {
+ // Basic.caught(ex);
+ // System.err.println("Skipping line: " + aLine);
+ }
+ }
+ return false;
+ }
+
+ /**
+ * write the table to the named file
+ *
+ * @param file
+ * @param refIndex2Intervals
+ * @throws IOException
+ */
+ private void writeTable(File file, final IntervalTree<GeneItem>[] refIndex2Intervals) throws IOException {
+
+ int totalRefWithAGene = 0;
+ try (OutputWriter outs = new OutputWriter(file)) {
+ outs.write(MAGIC_NUMBER, 0, MAGIC_NUMBER.length);
+
+ outs.writeInt(refIndex2Intervals.length);
+
+ ProgressPercentage progress = new ProgressPercentage("Writing file: " + file, refIndex2Intervals.length);
+
+ for (IntervalTree<GeneItem> intervals : refIndex2Intervals) {
+ if (intervals == null) {
+ outs.writeInt(0);
+ } else {
+ outs.writeInt(intervals.size());
+ for (IntervalTree.Node<GeneItem> node : intervals) {
+ outs.writeInt(node.getStart());
+ outs.writeInt(node.getEnd());
+ node.getValue().write(outs);
+ }
+ totalRefWithAGene++;
+ }
+ progress.incrementProgress();
+ }
+ progress.close();
+ }
+ System.err.println("Reference sequences with at least one gene: " + totalRefWithAGene + " of " + refIndex2Intervals.length);
+ }
+
+ /**
+ * gets gi number
+ *
+ * @param string
+ * @return accession
+ */
+ public static long parseGI(String string) {
+ try {
+ int a = string.indexOf("gi|");
+ if (a >= 0 && a + "gi|".length() < string.length()) {
+ a += "gi|".length();
+ while (!Character.isDigit(string.charAt(a)) && a < string.length())
+ a++;
+ int b = a;
+ while (b < string.length() && Character.isDigit(string.charAt(b))) {
+ b++;
+ }
+ if (a < b)
+ return Basic.parseLong(string.substring(a, b));
+ }
+ } catch (Exception ex) {
+ }
+ return 0l;
+ }
+
+ /**
+ * parses the location of a gene.
+ * Possible formats
+ * START..END,START..END,...
+ * complement(START..END,..)
+ * join(START..END,START..END,...)
+ * complement(join(START..END,START..END,..))
+ * START and END are integer
+ * START can have prefix LABEL: - if it does, then we will ignore this entry
+ * In addition,
+ * START can have prefix <
+ * END can have prefix >
+ *
+ * @param aLine
+ * @return
+ */
+ private static int[] parseLocations(String aLine) {
+ if (Basic.countOccurrences(aLine, '(') != Basic.countOccurrences(aLine, ')'))
+ return null;
+ int a = aLine.lastIndexOf("(");
+ if (a != -1)
+ aLine = aLine.substring(a + 1, aLine.length());
+ int b = aLine.indexOf(")");
+ if (b != -1)
+ aLine = aLine.substring(0, b);
+
+ String[] tokens = aLine.split(",");
+ int count = 0;
+ int start1 = -1;
+ int end1 = 0;
+ int start2 = 0;
+ int end2 = -1;
+ for (String token : tokens) {
+ if (!token.contains(":")) {
+ switch (count) {
+ case 0: {
+ start1 = getFirstNumber(token);
+ end1 = getLastNumber(token);
+ count++;
+ break;
+ }
+ case 1: {
+ start2 = getFirstNumber(token);
+ end2 = getLastNumber(token);
+ count++;
+ break;
+ }
+ }
+ if (count == 2)
+ break;
+ }
+ }
+ int length1 = (end1 - start1 + 1);
+ int length2 = (end2 - start2 + 1);
+
+ if (length1 >= 20) {
+ if (length2 >= 20 && start2 > 0 && end2 >= start1 + 50) {
+ return new int[]{start1, end1, start2, end2};
+ } else
+ return new int[]{start1, end1};
+ } else if (length2 >= 20) {
+ return new int[]{start2, end2};
+ } else
+ return null;
+ }
+
+ private static int getFirstNumber(String str) {
+ int a = 0;
+ while (a < str.length() && !Character.isDigit(str.charAt(a)))
+ a++;
+ int b = a;
+ while (b < str.length() && Character.isDigit(str.charAt(b)))
+ b++;
+ if (a < b)
+ return Integer.parseInt(str.substring(a, b));
+ else
+ return 0;
+ }
+
+ private static int getLastNumber(String str) {
+ int b = str.length();
+ while (b > 0 && !Character.isDigit(str.charAt(b - 1)))
+ b--;
+ int a = b - 1;
+ while (a > 0 && Character.isDigit(str.charAt(a - 1)))
+ a--;
+ if (a < b)
+ return Integer.parseInt(str.substring(a, b));
+ else
+ return 0;
+ }
+}
+
+
diff --git a/src/malt/io/BlastTextHelper.java b/src/malt/io/BlastTextHelper.java
new file mode 100644
index 0000000..0f68afb
--- /dev/null
+++ b/src/malt/io/BlastTextHelper.java
@@ -0,0 +1,111 @@
+/**
+ * BlastTextHelper.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.io;
+
+
+import megan.parsers.blast.BlastMode;
+
+/**
+ * some methods to help generate BLAST text output
+ * Daniel Huson, 8.2014
+ */
+public class BlastTextHelper {
+ public static final String FILE_HEADER_BLASTN = "BLASTN output produced by MALT\n\n";
+ public static final String FILE_HEADER_BLASTX = "BLASTX output produced by MALT\n\n";
+ public static final String FILE_HEADER_BLASTP = "BLASTP output produced by MALT\n\n";
+ public static final String FILE_FOOTER_BLAST = "\nEOF\n";
+ public static final String NO_HITS_STRING = "***** No hits found ******";
+ public static final byte[] NO_HITS = NO_HITS_STRING.getBytes();
+ public static final String QUERY_EQUALS_STRING = "\nQuery= ";
+ private static final byte[] QUERY_EQUALS = QUERY_EQUALS_STRING.getBytes();
+ public static final String QUERY_LETTERS_FORMAT_STRING = "\n (%d letters)\n\n";
+ public static final String REFERENCE_LENGTH_FORMAT_STRING = "\n Length = %d\n\n";
+
+ /**
+ * make the query line
+ *
+ * @return query line
+ */
+ public static byte[] makeQueryLine(final FastARecord query) {
+ final byte[] header = query.getHeader();
+ int startOfFirstWord = (header.length > 0 && header[0] == '>' ? 1 : 0);
+ while (startOfFirstWord < header.length && Character.isWhitespace(header[startOfFirstWord])) {
+ startOfFirstWord++;
+ }
+ int endOfFirstWord = startOfFirstWord;
+ while (endOfFirstWord < header.length) {
+ if (Character.isWhitespace(header[endOfFirstWord]) || header[endOfFirstWord] == 0)
+ break;
+ else
+ endOfFirstWord++;
+ }
+ int lengthOfFirstWord = endOfFirstWord - startOfFirstWord;
+ final byte[] result = new byte[QUERY_EQUALS.length + lengthOfFirstWord + 1]; // add one for new-line
+ System.arraycopy(QUERY_EQUALS, 0, result, 0, QUERY_EQUALS.length);
+ System.arraycopy(query.getHeader(), startOfFirstWord, result, QUERY_EQUALS.length, lengthOfFirstWord);
+ result[result.length - 1] = '\n';
+ return result;
+ }
+
+ /**
+ * gets the appropriate header line
+ *
+ * @param mode
+ * @return header line
+ */
+ public static String getBlastTextHeader(BlastMode mode) {
+ switch (mode) {
+ case BlastN:
+ return BlastTextHelper.FILE_HEADER_BLASTN;
+ case BlastX:
+ return BlastTextHelper.FILE_HEADER_BLASTX;
+ case BlastP:
+ return BlastTextHelper.FILE_HEADER_BLASTP;
+ default:
+ return "unknown";
+ }
+ }
+
+ /**
+ * get query name followed by tab
+ *
+ * @param query
+ * @return query name plus tab
+ */
+ public static byte[] getQueryNamePlusTab(final FastARecord query) {
+ final byte[] header = query.getHeader();
+ int startOfFirstWord = (header.length > 0 && header[0] == '>' ? 1 : 0);
+ while (startOfFirstWord < header.length && Character.isWhitespace(header[startOfFirstWord])) {
+ startOfFirstWord++;
+ }
+ int endOfFirstWord = startOfFirstWord;
+ while (endOfFirstWord < header.length) {
+ if (Character.isWhitespace(header[endOfFirstWord]) || header[endOfFirstWord] == 0)
+ break;
+ else
+ endOfFirstWord++;
+ }
+ int lengthOfFirstWord = endOfFirstWord - startOfFirstWord;
+ byte[] result = new byte[lengthOfFirstWord + 1]; // plus one for tab
+ System.arraycopy(header, startOfFirstWord, result, 0, lengthOfFirstWord);
+ result[lengthOfFirstWord] = '\t';
+ return result;
+ }
+}
diff --git a/src/malt/io/FastAFileIteratorBytes.java b/src/malt/io/FastAFileIteratorBytes.java
new file mode 100644
index 0000000..a250e01
--- /dev/null
+++ b/src/malt/io/FastAFileIteratorBytes.java
@@ -0,0 +1,254 @@
+/**
+ * FastAFileIteratorBytes.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.io;
+
+import jloda.util.Basic;
+import jloda.util.ICloseableIterator;
+import malt.data.INormalizer;
+
+import java.io.BufferedInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.Iterator;
+
+/**
+ * Reads in a multifast file and places all headers and sequences in byte arrays
+ * Daniel Huson, 8.2014
+ */
+public class FastAFileIteratorBytes implements Iterator<byte[]>, ICloseableIterator<byte[]> {
+ private final INormalizer normalizer;
+ private byte[] buffer = new byte[10000000];
+ private int length = 0;
+ private long position = 0;
+ private final long maxProgress;
+ private boolean expectingHeader = true;
+ private BufferedInputStream inputStream;
+ private boolean isFastQ = false;
+
+ private boolean ok = true; // haven't seen next() fail yet
+ private boolean nextIsLoaded = false; // have already loaded the next item
+
+ /**
+ * constructor
+ *
+ * @param fileName
+ * @throws FileNotFoundException
+ */
+ public FastAFileIteratorBytes(final String fileName, final INormalizer normalizer) throws IOException {
+ this.normalizer = normalizer;
+ inputStream = new BufferedInputStream(Basic.getInputStreamPossiblyZIPorGZIP(fileName), 8192);
+ maxProgress = Basic.guessUncompressedSizeOfFile(fileName);
+
+ try {
+ int value = inputStream.read();
+ isFastQ = (value == '@');
+ } catch (IOException e) {
+ }
+ }
+
+ /**
+ * has next header or sequence
+ *
+ * @return true if has a header or sequence
+ */
+ public boolean hasNext() {
+ if (!ok)
+ return false;
+ else if (nextIsLoaded)
+ return true;
+
+ try {
+ if (isFastQ) { // expect four lines per read
+ try {
+ length = 0;
+ if (expectingHeader) {
+ buffer[length++] = (byte) '>';
+ int value = inputStream.read();
+ if (value == -1)
+ return ok = false;
+ if (value != '@')
+ buffer[length++] = (byte) value;
+ length = readLineIntoBuffer(inputStream, length);
+ position += length;
+ return ok = (length > 1);
+ } else {
+ length = readLineIntoBuffer(inputStream, length);
+ if (length == 0)
+ return ok = false;
+ position += length;
+ position += skipLine(inputStream); // skip comment line
+ position += skipLine(inputStream); // skip quality line
+ return ok = true;
+ }
+ } catch (IOException e) {
+ return ok = false;
+ }
+ } else {
+ int value;
+ length = 0;
+ boolean first = true;
+ try {
+ while (true) {
+ value = inputStream.read();
+ if (expectingHeader) {
+ if (value == -1)
+ return ok = false;
+ if (first) {
+ first = false;
+ if (value != '>')
+ buffer[length++] = '>';
+ }
+ if (value == '\n' || value == '\r') {
+ position += length;
+ return ok = (length > 0);
+ }
+ } else {
+ if (Character.isWhitespace(value))
+ continue; // skip white space
+ if (value == '>' || value == -1) {
+ position += length;
+ return ok = (length > 0);
+ }
+ }
+ if (length >= buffer.length)
+ growBuffer();
+ buffer[length++] = (byte) value;
+ }
+ } catch (IOException e) {
+ return ok = false;
+ }
+ }
+ } finally {
+ nextIsLoaded = true;
+ }
+ }
+
+ /**
+ * get next header or sequence
+ *
+ * @return header or sequence
+ */
+ public byte[] next() {
+ try {
+ if (!nextIsLoaded && !hasNext())
+ return null;
+
+ expectingHeader = !expectingHeader;
+ if (length > 0 || hasNext()) {
+ byte[] result = new byte[length];
+ if (expectingHeader) // was not expecting header when we entered this method, so this is sequence, so normalize:
+ {
+ for (int i = 0; i < length; i++)
+ result[i] = normalizer.getNormalized(buffer[i]);
+ } else
+ System.arraycopy(buffer, 0, result, 0, length);
+ length = 0;
+ return result;
+ }
+ return null;
+ } finally {
+ nextIsLoaded = false;
+ }
+ }
+
+ /**
+ * read the next line into the buffer
+ *
+ * @param inputStream
+ * @param offset
+ * @return position of next available position in buffer
+ */
+ private int readLineIntoBuffer(BufferedInputStream inputStream, int offset) throws IOException {
+ int value = inputStream.read();
+ while (value != '\r' && value != '\n' && value != -1) {
+ if (offset >= buffer.length) { // need to grow buffer
+ growBuffer();
+ }
+ buffer[offset++] = (byte) value;
+ value = inputStream.read();
+ }
+ return offset;
+ }
+
+ /**
+ * grows the line buffer
+ */
+ private void growBuffer() {
+ byte[] nextBuffer = new byte[(int) Math.min(Integer.MAX_VALUE - 10L, 2 * buffer.length)];
+ System.arraycopy(buffer, 0, nextBuffer, 0, buffer.length);
+ buffer = nextBuffer;
+ }
+
+ /**
+ * skip the current line
+ *
+ * @param inputStream
+ * @throws IOException
+ */
+ private int skipLine(BufferedInputStream inputStream) throws IOException {
+ int skipped = 0;
+ int value = inputStream.read();
+ while (value != '\r' && value != '\n' && value != -1) {
+ value = inputStream.read();
+ skipped++;
+ }
+ return skipped;
+ }
+
+
+ public void remove() {
+ }
+
+ /**
+ * close the stream
+ *
+ * @throws IOException
+ */
+ public void close() throws IOException {
+ inputStream.close();
+ }
+
+ /**
+ * gets the maximum progress value
+ *
+ * @return maximum progress value
+ */
+ public long getMaximumProgress() {
+ return maxProgress;
+ }
+
+ /**
+ * gets the current progress value
+ *
+ * @return current progress value
+ */
+ public long getProgress() {
+ return position;
+ }
+
+ /**
+ * is the file we are reading actually a fastQ file?
+ *
+ * @return true, if fastQ
+ */
+ public boolean isFastQ() {
+ return isFastQ;
+ }
+}
diff --git a/src/malt/io/FastAReader.java b/src/malt/io/FastAReader.java
new file mode 100644
index 0000000..0e43bb4
--- /dev/null
+++ b/src/malt/io/FastAReader.java
@@ -0,0 +1,307 @@
+/**
+ * FastAReader.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.io;
+
+import jloda.util.Basic;
+import jloda.util.ProgressPercentage;
+import malt.data.DNA5;
+import malt.data.IAlphabet;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.concurrent.locks.ReentrantLock;
+
+/**
+ * Reads in a multifastA (or fastQ) file and places all headers and sequences in byte arrays. In addition, the headers and sequences are 0-terminated
+ * Daniel Huson, 8.2014
+ */
+public class FastAReader {
+ final public static int BUFFER_SIZE = 8192;
+
+ private final IAlphabet alphabet;
+
+ private long position = 0;
+ private long maxProgress = 0;
+ private int readCount = 0;
+ private final BufferedInputStream inputStream;
+ private boolean isFastQ = false;
+
+ private final ProgressPercentage progress;
+ private final ReentrantLock lock = new ReentrantLock();
+
+ /**
+ * constructor
+ *
+ * @param fileName
+ * @throws java.io.FileNotFoundException
+ */
+ public FastAReader(final String fileName, final IAlphabet alphabet) throws IOException {
+ this(fileName, alphabet, null);
+ }
+
+ /**
+ * constructor
+ *
+ * @param fileName
+ * @param progress
+ * @throws java.io.FileNotFoundException
+ */
+ public FastAReader(final String fileName, final IAlphabet alphabet, final ProgressPercentage progress) throws IOException {
+ this.alphabet = alphabet;
+
+ maxProgress = Basic.guessUncompressedSizeOfFile(fileName);
+
+ this.progress = progress;
+ if (progress != null) {
+ progress.setMaximum(maxProgress);
+ progress.setProgress(0);
+ }
+
+ // determine file type:
+ {
+ InputStream tmp = new BufferedInputStream(Basic.getInputStreamPossiblyZIPorGZIP(fileName));
+ int value = tmp.read();
+ if (value != '@' && value != '>')
+ throw new IOException("Input file '" + fileName + "' does not appear to be in FastA or FastQ format, as it does not start with a '>' or '@'");
+ isFastQ = (value == '@');
+ tmp.close();
+ }
+ inputStream = new BufferedInputStream(Basic.getInputStreamPossiblyZIPorGZIP(fileName), BUFFER_SIZE);
+ }
+
+ /**
+ * read the next record as fastA. Can be applied to both fastA and fastQ files.
+ * Header and sequence are both 0-terminated.
+ * This method is thread safe
+ *
+ * @param fastARecord
+ * @return true if read
+ * @throws IOException
+ */
+ public boolean readAsFastA(FastARecord fastARecord) throws IOException {
+ lock.lock();
+ try {
+ if (isFastQ) { // expect four lines per read
+ readHeader(fastARecord); // read header
+ if (fastARecord.getHeaderLength() == 0)
+ return false; // done
+ fastARecord.getHeader()[0] = '>';
+ readSequence(fastARecord);
+ if (fastARecord.getSequenceLength() == 0)
+ return false; // done
+ skipLine();
+ if (fastARecord.isWantQualityValues()) {
+ if (readQualityValues(fastARecord) != fastARecord.sequenceLength)
+ throw new IOException("Error reading quality values: wrong number of bytes");
+ } else
+ skipLine();
+ fastARecord.setId(++readCount);
+ return true;
+ } else { // fastA
+ readHeader(fastARecord); // read header
+
+ byte[] sequence = fastARecord.sequence;
+ // read the sequence, which might be spread over multiple lines
+ int value;
+ int length = 0;
+ while (true) {
+ value = inputStream.read();
+ position++;
+ if (Character.isWhitespace(value))
+ continue; // skip white space
+ if (value == '>' || value == -1) {
+ position += length;
+ sequence[length] = 0;
+ fastARecord.sequenceLength = length;
+ if (length == 0)
+ return false;
+ else {
+ fastARecord.setId(++readCount);
+ return true;
+ }
+ }
+ sequence[length++] = alphabet.getNormalized((byte) value);
+ if (length >= sequence.length)
+ sequence = fastARecord.sequence = grow(sequence);
+ }
+ }
+ } finally {
+ if (progress != null)
+ progress.setProgress(position);
+ lock.unlock();
+ }
+ }
+
+ /**
+ * reads the header line
+ *
+ * @param fastARecord
+ * @throws IOException
+ */
+ private void readHeader(FastARecord fastARecord) throws IOException {
+ byte[] aline = fastARecord.header;
+ int value = inputStream.read();
+ position++;
+ int length = 0;
+ while (value != '\r' && value != '\n' && value != -1) {
+ if (length == 0 && isFastQ)
+ value = '>';
+ aline[length++] = (byte) value;
+ value = inputStream.read();
+ position++;
+ if (length >= aline.length) { // need to grow buffer
+ aline = fastARecord.header = grow(aline);
+ }
+ }
+ aline[length] = 0;
+ fastARecord.headerLength = length;
+ }
+
+ /**
+ * reads the sequence line
+ *
+ * @param fastARecord
+ * @throws IOException
+ */
+ private void readSequence(FastARecord fastARecord) throws IOException {
+ byte[] aline = fastARecord.sequence;
+ int value = inputStream.read();
+ position++;
+ int length = 0;
+ while (value != '\r' && value != '\n' && value != -1) {
+ aline[length++] = alphabet.getNormalized((byte) value);
+ value = inputStream.read();
+ position++;
+ if (length >= aline.length) { // need to grow buffer
+ aline = fastARecord.sequence = grow(aline);
+ }
+ }
+ aline[length] = 0;
+ fastARecord.sequenceLength = length;
+ }
+
+ /**
+ * reads the quality values line
+ *
+ * @param fastARecord
+ * @return the number of letters read
+ * @throws IOException
+ */
+ private int readQualityValues(FastARecord fastARecord) throws IOException {
+ byte[] aline = fastARecord.qualityValues;
+ int value = inputStream.read();
+ position++;
+ int length = 0;
+ while (value != '\r' && value != '\n' && value != -1) {
+ aline[length++] = (byte) value;
+ value = inputStream.read();
+ position++;
+ if (length >= aline.length) { // need to grow buffer
+ aline = fastARecord.qualityValues = grow(aline);
+ }
+ }
+ aline[length] = 0;
+ return length;
+ }
+
+ /**
+ * grow the array
+ *
+ * @param bytes
+ * @return bigger copy of array
+ */
+ private byte[] grow(byte[] bytes) {
+ byte[] result = new byte[Math.min(Integer.MAX_VALUE >> 1, 2 * bytes.length)];
+ System.arraycopy(bytes, 0, result, 0, bytes.length);
+ return result;
+ }
+
+ /**
+ * skip the current line
+ *
+ * @throws java.io.IOException
+ */
+ private void skipLine() throws IOException {
+ int value = inputStream.read();
+ position++;
+ while (value != '\r' && value != '\n' && value != -1) {
+ value = inputStream.read();
+ position++;
+ }
+ }
+
+ /**
+ * close the stream
+ *
+ * @throws java.io.IOException
+ */
+ public void close() throws IOException {
+ inputStream.close();
+ if (progress != null)
+ progress.reportTaskCompleted();
+ }
+
+ /**
+ * gets the maximum progress value
+ *
+ * @return maximum progress value
+ */
+ public long getMaximumProgress() {
+ return maxProgress;
+ }
+
+ /**
+ * gets the current progress value
+ *
+ * @return current progress value
+ */
+ public long getProgress() {
+ return position;
+ }
+
+ /**
+ * is the file we are reading actually a fastQ file?
+ *
+ * @return true, if fastQ
+ */
+ public boolean isFastQ() {
+ return isFastQ;
+ }
+
+ /**
+ * create a fastA record to be used with this reader
+ *
+ * @param initialLength
+ * @return fastA record
+ */
+ public static FastARecord createFastARecord(int initialLength, boolean wantQualityValues) {
+ return new FastARecord(initialLength, wantQualityValues);
+ }
+
+ public static void main(String[] args) throws IOException {
+ FastAReader reader = new FastAReader("/Users/huson/data/ma/input/more.fna", DNA5.getInstance());
+ FastARecord fastARecord = createFastARecord(1000, false);
+ while (reader.readAsFastA(fastARecord)) {
+ System.err.print(fastARecord);
+ }
+ }
+}
+
diff --git a/src/malt/io/FastARecord.java b/src/malt/io/FastARecord.java
new file mode 100644
index 0000000..0cae075
--- /dev/null
+++ b/src/malt/io/FastARecord.java
@@ -0,0 +1,139 @@
+/**
+ * FastARecord.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.io;
+
+import jloda.util.Basic;
+
+/**
+ * A simple fastA record. Note that header and sequence are 0-terminated
+ * Daniel Huson, 8.2014
+ */
+
+public class FastARecord {
+ int id;
+ int headerLength;
+ byte[] header;
+ int sequenceLength;
+ byte[] sequence;
+
+ byte[] qualityValues;
+
+ /**
+ * constructor
+ */
+ public FastARecord() {
+ }
+
+ /**
+ * constructor
+ *
+ * @param initialLength
+ */
+ public FastARecord(int initialLength, boolean wantQualityValuesIfAvailable) {
+ header = new byte[1000];
+ sequence = new byte[initialLength];
+ if (wantQualityValuesIfAvailable)
+ qualityValues = new byte[initialLength];
+ }
+
+ public int getId() {
+ return id;
+ }
+
+ public void setId(int id) {
+ this.id = id;
+ }
+
+ /**
+ * get header length
+ *
+ * @return length
+ */
+ public int getHeaderLength() {
+ return headerLength;
+ }
+
+ /**
+ * get the header
+ *
+ * @return header (0-terminated)
+ */
+ public byte[] getHeader() {
+ return header;
+ }
+
+ public String getHeaderString() {
+ return Basic.toString(header, headerLength);
+ }
+
+ /**
+ * get the sequence length
+ *
+ * @return length
+ */
+ public int getSequenceLength() {
+ return sequenceLength;
+ }
+
+ /**
+ * get the sequence
+ *
+ * @return sequence (0-terminated)
+ */
+ public byte[] getSequence() {
+ return sequence;
+ }
+
+ /**
+ * set the sequence
+ *
+ * @param sequence
+ * @param length
+ */
+ public void setSequence(byte[] sequence, int length) {
+ this.sequence = sequence;
+ this.sequence[length] = 0;
+ }
+
+ public String getSequenceString() {
+ return Basic.toString(sequence, sequenceLength);
+ }
+
+ public byte[] getQualityValues() {
+ return qualityValues;
+ }
+
+ public String getQualityValuesString() {
+ return Basic.toString(qualityValues, sequenceLength);
+ }
+
+ /**
+ * get as string
+ *
+ * @return
+ */
+ public String toString() {
+ return (new StringBuilder()).append(Basic.toString(header, headerLength)).append("\n").append(Basic.toString(sequence, sequenceLength)).append("\n").toString();
+ }
+
+ public boolean isWantQualityValues() {
+ return qualityValues != null;
+ }
+}
diff --git a/src/malt/io/FileWriterRanked.java b/src/malt/io/FileWriterRanked.java
new file mode 100644
index 0000000..93bfba0
--- /dev/null
+++ b/src/malt/io/FileWriterRanked.java
@@ -0,0 +1,266 @@
+/**
+ * FileWriterRanked.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.io;
+
+import jloda.util.Basic;
+
+import java.io.*;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.CountDownLatch;
+
+/**
+ * writes byte strings to a file in increasing order of rank
+ * Daniel Huson, 8.2014
+ */
+public class FileWriterRanked {
+ final public static int QUEUE_LENGTH = 1000000;
+
+ private final static OutputItem SENTINEL = new OutputItem(0, null);
+
+ private final ArrayBlockingQueue<OutputItem> outputQueue;
+ private final ArrayBlockingQueue<OutputItem>[] threadSpecificWaitQueues;
+ private final Writer writer;
+ private final boolean isFile;
+ private final StringBuilder fileFooter;
+
+ private long nextRank;
+ private boolean isClosing = false;
+ private final CountDownLatch hasFinishedOutput = new CountDownLatch(1);
+
+ private int queueHighWaterMark = 0;
+
+ /**
+ * constructor
+ *
+ * @param fileName
+ * @param smallestRank value of first byte string to be written
+ * @throws java.io.IOException
+ */
+ public FileWriterRanked(String fileName, final int numberOfThreads, int smallestRank) throws IOException {
+ // one wait queue for each thread:
+ threadSpecificWaitQueues = new ArrayBlockingQueue[numberOfThreads];
+ for (int i = 0; i < threadSpecificWaitQueues.length; i++)
+ threadSpecificWaitQueues[i] = new ArrayBlockingQueue<>(QUEUE_LENGTH);
+ // the output queue:
+ outputQueue = new ArrayBlockingQueue<>(QUEUE_LENGTH);
+
+ // the output writer:
+ OutputStream outs;
+ if (fileName == null || fileName.equalsIgnoreCase("STDOUT")) {
+ isFile = false;
+ outs = System.out;
+ } else {
+ isFile = true;
+ outs = Basic.getOutputStreamPossiblyZIPorGZIP(fileName);
+ }
+ writer = new BufferedWriter(new OutputStreamWriter(outs), 10 * 1024 * 1024); // ten megabyte buffer, not sure whether this makes a difference
+
+ fileFooter = new StringBuilder();
+ nextRank = smallestRank;
+
+ // this thread collects output items in order from thread-specific waiting queues and places them on the output queue
+ final Thread thread1 = new Thread(new Runnable() {
+ public void run() {
+ try {
+ while (true) {
+ boolean allEmpty = true;
+ for (ArrayBlockingQueue<OutputItem> queue : threadSpecificWaitQueues) {
+ OutputItem item = queue.peek();
+ while (item != null && item.rank == nextRank) {
+ if (allEmpty) {
+ allEmpty = false;
+ if (queue.size() > queueHighWaterMark)
+ queueHighWaterMark = queue.size();
+ }
+ try {
+ outputQueue.put(item);
+ } catch (InterruptedException e) {
+ Basic.caught(e);
+ }
+ nextRank++;
+ item = queue.poll(); // don't use take(), don't want to block here...
+ }
+ }
+ if (allEmpty) {
+ if (isClosing) {
+ outputQueue.put(SENTINEL);
+ return;
+ } else
+ try {
+ Thread.sleep(1);
+ } catch (InterruptedException e) {
+ Basic.caught(e);
+ }
+ }
+ }
+ } catch (InterruptedException ex) {
+ Basic.caught(ex);
+ }
+ }
+ });
+ thread1.start();
+
+ // this thread writes output to file
+ final Thread thread2 = new Thread(new Runnable() {
+ public void run() {
+ try {
+ while (true) {
+ OutputItem item = outputQueue.take();
+ if (item == SENTINEL) {
+ hasFinishedOutput.countDown();
+ return;
+ }
+ byte[][] strings = item.strings;
+ if (strings != null) {
+ for (byte[] string : strings) {
+ byte b = 0;
+ for (byte aString : string) {
+ b = aString;
+ if (b == 0)
+ break; // zero-terminated byte string
+ writer.write((char) b);
+ }
+ if (b != '\t') // if this ends on a tab, don't add new line, it is the query-name for BlastTab or SAM
+ writer.write('\n');
+ }
+ }
+ }
+ } catch (Exception ex) {
+ Basic.caught(ex);
+ }
+ }
+ });
+ thread2.start();
+ }
+
+ /**
+ * close
+ *
+ * @throws java.io.IOException
+ */
+ public void close() throws IOException {
+ isClosing = true;
+ try {
+ hasFinishedOutput.await();
+ } catch (InterruptedException e) {
+ Basic.caught(e);
+ }
+ if (fileFooter.length() > 0)
+ writer.write(fileFooter.toString());
+ writer.flush();
+ if (isFile)
+ writer.close();
+ /*
+ if (queueHighWaterMark > 1) {
+ System.err.println("(outputQueueHighWaterMark: " + queueHighWaterMark+")");
+ }
+ */
+ }
+
+ /**
+ * Write byte strings to the out stream by rank.
+ * By rank means that output is generated only when all output of lower output
+ * has already been written
+ * Does not make a copy of the byte arrays, so shouldn't recycle because unclear when this will be written
+ * Then must not be overwritten
+ *
+ * @param rank each call must have a different rank and no rank can be skipped
+ * @param strings can be null
+ */
+ public void writeByRank(int threadId, long rank, byte[][] strings) {
+ try {
+ threadSpecificWaitQueues[threadId].put(new OutputItem(rank, strings));
+ } catch (InterruptedException e) {
+ Basic.caught(e);
+ }
+ }
+
+ /**
+ * write a header and body by rank. By rank means that output is generated only when all output of lower output
+ * has already been written
+ * Does not make a copy of the byte arrays, so shouldn't recycle because unclear when this will be written
+ *
+ * @param rank
+ * @param header
+ * @param body
+ */
+ public void writeByRank(int threadId, long rank, byte[] header, byte[] body) {
+ try {
+ threadSpecificWaitQueues[threadId].put(new OutputItem(rank, new byte[][]{header, body}));
+ } catch (InterruptedException e) {
+ Basic.caught(e);
+ }
+ }
+
+ /**
+ * skip a rank
+ *
+ * @param rank
+ */
+ public void skipByRank(int threadId, int rank) {
+ try {
+ threadSpecificWaitQueues[threadId].put(new OutputItem(rank, null));
+ } catch (InterruptedException e) {
+ Basic.caught(e);
+ }
+ }
+
+ /**
+ * write this at the top of the file
+ *
+ * @param string
+ * @throws java.io.IOException
+ */
+ public void writeFirst(String string) throws IOException {
+ writer.write(string);
+ }
+
+ /**
+ * write this at the end of the file
+ *
+ * @param string
+ * @throws java.io.IOException
+ */
+ public void writeLast(String string) throws IOException {
+ fileFooter.append(string);
+ }
+}
+
+/**
+ * output item consists of rank and bytes to write
+ */
+class OutputItem {
+ long rank;
+ byte[][] strings;
+
+ OutputItem(long rank, byte[][] strings) {
+ this.rank = rank;
+ this.strings = strings;
+ }
+
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append("rank=").append(this.rank);
+ if (strings != null) {
+ for (byte[] string : strings) buf.append(Basic.toString(string));
+ }
+ return buf.toString();
+ }
+}
diff --git a/src/malt/io/RMA6Writer.java b/src/malt/io/RMA6Writer.java
new file mode 100644
index 0000000..63ff3cd
--- /dev/null
+++ b/src/malt/io/RMA6Writer.java
@@ -0,0 +1,217 @@
+/**
+ * RMA6Writer.java
+ * Copyright (C) 2015 Daniel H. Huson
+ * <p>
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ * <p>
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * <p>
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ * <p>
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.io;
+
+import jloda.util.Basic;
+import jloda.util.CanceledException;
+import jloda.util.ProgressPercentage;
+import malt.MaltOptions;
+import malt.Version;
+import malt.data.ReadMatch;
+import malt.mapping.MappingManager;
+import megan.classification.Classification;
+import megan.core.Document;
+import megan.core.SyncArchiveAndDataTable;
+import megan.data.IReadBlock;
+import megan.data.IReadBlockIterator;
+import megan.io.InputOutputReaderWriter;
+import megan.rma6.MatchLineRMA6;
+import megan.rma6.RMA6Connector;
+import megan.rma6.RMA6FileCreator;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * Create an RMA6 file from SAM data in Malt
+ *
+ * Daniel Huson, 6.2015
+ */
+public class RMA6Writer {
+ private final RMA6FileCreator rma6FileCreator;
+ private final String rma6File;
+
+ private final String[] cNames;
+
+ private final int maxMatchesPerQuery;
+ private final MaltOptions maltOptions;
+
+ private final MatchLineRMA6[] matches;
+
+ final int[][] match2classification2id;
+
+ private byte[] queryText = new byte[10000];
+ private byte[] matchesText = new byte[10000];
+
+ /**
+ * constructor
+ *
+ * @param maltOptions
+ * @param rma6File
+ * @throws IOException
+ */
+ public RMA6Writer(final MaltOptions maltOptions, String rma6File) throws IOException {
+ System.err.println("Starting file: " + rma6File);
+ this.maltOptions = maltOptions;
+ this.rma6File = rma6File;
+
+ maxMatchesPerQuery = maltOptions.getMaxAlignmentsPerQuery();
+
+ cNames = MappingManager.getCNames();
+ int taxonMapperIndex = Basic.getIndex(Classification.Taxonomy, Arrays.asList(cNames));
+
+ matches = new MatchLineRMA6[maxMatchesPerQuery];
+ for (int i = 0; i < matches.length; i++) {
+ matches[i] = new MatchLineRMA6(cNames.length, taxonMapperIndex);
+ }
+
+ match2classification2id = new int[maxMatchesPerQuery][cNames.length];
+
+ rma6FileCreator = new RMA6FileCreator(rma6File, true);
+ rma6FileCreator.writeHeader(Version.SHORT_DESCRIPTION, maltOptions.getMode(), cNames, false);
+
+ rma6FileCreator.startAddingQueries();
+ }
+
+ /**
+ * process the matches associated with a given query.
+ * This is used in malt1
+ *
+ * @param queryHeader
+ * @param matchesArray
+ * @param numberOfMatches
+ * @throws IOException
+ */
+ public synchronized void processMatches(String queryHeader, String querySequence, ReadMatch[] matchesArray, int numberOfMatches) throws IOException {
+ // setup query text:
+ byte[] queryName = Basic.swallowLeadingGreaterSign(Basic.getFirstWord(queryHeader)).getBytes();
+ byte[] queryHeaderText = queryHeader.getBytes();
+ byte[] querySequenceText = querySequence.getBytes();
+ if (queryHeaderText.length + querySequenceText.length + 100 > queryText.length) {
+ queryText = new byte[100 + queryHeaderText.length + querySequenceText.length];
+ }
+ System.arraycopy(queryHeaderText, 0, queryText, 0, queryHeaderText.length);
+ int queryTextLength = queryHeaderText.length;
+ queryText[queryTextLength++] = '\n';
+ System.arraycopy(querySequenceText, 0, queryText, queryTextLength, querySequenceText.length);
+ queryTextLength += querySequenceText.length;
+ queryText[queryTextLength++] = '\n';
+
+ // setup matches text:
+ int matchesTextLength = 0;
+ numberOfMatches = Math.min(maxMatchesPerQuery, numberOfMatches);
+ for (int m = 0; m < numberOfMatches; m++) {
+ final ReadMatch match = matchesArray[m];
+ final byte[] matchText = match.getRMA3Text();
+
+ final int approximateLengthToAdd = matchesTextLength + matchText.length + queryName.length;
+ if (approximateLengthToAdd + 100 > matchesText.length) {
+ byte[] tmp = new byte[approximateLengthToAdd + 10000];
+ System.arraycopy(matchesText, 0, tmp, 0, matchesTextLength);
+ matchesText = tmp;
+ }
+ System.arraycopy(queryName, 0, matchesText, matchesTextLength, queryName.length);
+ matchesTextLength += queryName.length;
+ matchesText[matchesTextLength++] = '\t';
+
+ System.arraycopy(matchText, 0, matchesText, matchesTextLength, matchText.length);
+ matchesTextLength += matchText.length;
+ matchesText[matchesTextLength++] = '\n';
+
+ matches[m].setBitScore(match.getBitScore());
+ matches[m].setExpected(match.getExpected());
+ matches[m].setPercentIdentity(match.getPercentIdentity());
+ for (int i = 0; i < cNames.length; i++) {
+ final int id = MappingManager.getMapping(i).get(match.getReferenceId());
+ match2classification2id[m][i] = id;
+ matches[m].setFId(i, id);
+ }
+ }
+
+ rma6FileCreator.addQuery(queryText, queryTextLength, numberOfMatches, matchesText, matchesTextLength, match2classification2id, 0);
+ }
+
+ /**
+ * finish generation of RMA6 file
+ *
+ * @throws IOException
+ * @throws CanceledException
+ */
+ public void close() throws IOException {
+ try {
+ System.err.println("Finishing file: " + rma6File);
+
+ rma6FileCreator.endAddingQueries();
+ rma6FileCreator.writeClassifications(new String[0], null, null);
+ rma6FileCreator.close();
+
+ final boolean pairedReads = maltOptions.isPairedReads();
+ if (pairedReads) { // update paired reads info and then run dataprocessor
+ long count = 0;
+ try (InputOutputReaderWriter raf = new InputOutputReaderWriter(rma6File, "rw");
+ IReadBlockIterator it = (new RMA6Connector(rma6File)).getAllReadsIterator(0, 1000, false, false)) {
+ final ProgressPercentage progress = new ProgressPercentage("Linking paired reads");
+ progress.setProgress(0);
+ progress.setProgress(it.getMaximumProgress());
+
+ while (it.hasNext()) {
+ final IReadBlock readBlock = it.next();
+ if (readBlock.getMateUId() > 0) {
+ if (readBlock.getMateUId() > readBlock.getUId())
+ throw new IOException("Mate uid=" + readBlock.getMateUId() + ": too big");
+ raf.seek(readBlock.getMateUId());
+ raf.writeLong(readBlock.getUId());
+ count++;
+ }
+ progress.setProgress(it.getProgress());
+ }
+ progress.close();
+ System.err.println(String.format("Number of pairs:%,14d", count));
+ }
+ }
+
+ // we need to run data processor
+
+ final Document doc = new Document();
+ doc.setTopPercent(maltOptions.getTopPercentLCA());
+ doc.setWeightedLCA(maltOptions.isUseWeightedLCA());
+ doc.setWeightedLCAPercent(maltOptions.getWeightedLCAPercent());
+ doc.setMinSupportPercent(maltOptions.getMinSupportPercentLCA());
+ doc.setMinSupport(maltOptions.getMinSupportLCA());
+ doc.setMaxExpected((float) maltOptions.getMaxExpected());
+ doc.setMinScore((float) maltOptions.getMinBitScore());
+ doc.setPairedReads(pairedReads);
+ doc.setMaxExpected((float) maltOptions.getMaxExpected());
+ doc.setMinPercentIdentity(maltOptions.getMinPercentIdentityLCA());
+ doc.setUseIdentityFilter(maltOptions.isUsePercentIdentityFilterLCA());
+
+ doc.getMeganFile().setFileFromExistingFile(rma6File, false);
+ doc.loadMeganFile();
+ doc.processReadHits();
+
+ // update and then save auxiliary data:
+ final String sampleName = Basic.replaceFileSuffix(Basic.getFileNameWithoutPath(rma6File), "");
+ SyncArchiveAndDataTable.syncRecomputedArchive2Summary(sampleName, "LCA", doc.getBlastMode(), doc.getParameterString(), new RMA6Connector(rma6File), doc.getDataTable(), 0);
+ doc.saveAuxiliaryData();
+ } catch (CanceledException ex) {
+ throw new IOException(ex); // this can't happen because ProgressPercent never throws CanceledException
+ }
+ }
+}
diff --git a/src/malt/io/SAMHelper.java b/src/malt/io/SAMHelper.java
new file mode 100644
index 0000000..57cfcf0
--- /dev/null
+++ b/src/malt/io/SAMHelper.java
@@ -0,0 +1,442 @@
+/**
+ * SAMHelper.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.io;
+
+import malt.data.DNA5;
+import megan.parsers.blast.BlastMode;
+
+/**
+ * helps to create a SAM line from an alignment
+ * Daniel Huson, 8.2014
+ */
+public class SAMHelper {
+ private static final String FILE_HEADER_BLASTN_TEMPLATE = "@HD\tVN:1.5\tSO:unsorted\tGO:query\n at PG\tID:1\tPN:MALT\tCL:%s\tDS:BlastN\n at RG\tID:1\tPL:unknown\tSM:unknown\n at CO\tBlastN-like alignments\n" +
+ "@CO\tReporting AS: bitScore, ZR: rawScore, ZE: expected, ZI: percent identity, ZL: reference length\n";
+ private static final String FILE_HEADER_BLASTP_TEMPLATE = "@HD\tVN:1.5\tSO:unsorted\tGO:query\n at PG\tID:1\tPN:MALT\tCL:%s\tDS:BlastP\n at RG\tID:1\tPL:unknown\tSM:unknown\n at CO\tBlastP-like alignments\n" +
+ "@CO\tReporting AS: bitScore, ZR: rawScore, ZE: expected, ZI: percent identity, ZL: reference length\n";
+ private static final String FILE_HEADER_BLASTX_TEMPLATE = "@HD\tVN:1.5\tSO:unsorted\tGO:query\n at PG\tID:1\tPN:MALT\tCL:%s\tDS:BlastX\n at RG\tID:1\tPL:unknown\tSM:unknown\n at CO\tBlastX-like alignments\n" +
+ "@CO\tReporting AS: bitScore, ZR: rawScore, ZE: expected, ZI: percent identity, ZL: reference length, ZF: frame, ZS: query start DNA coordinate\n";
+
+ /*
+ 0 QNAME String
+ 1 FLAG Int
+ 2 RNAME String
+ 3 POS Int
+ 4 MAPQ Int
+ 5 CIGAR String
+ 6 RNEXT String
+ 7 PNEXT Int
+ 8 TLEN Int
+ 9 SEQ String
+ 10 QUAL String Regexp/Range [!-?A-~]{1,255} [0,216 -1] \*|[!-()+-<>-~][!-~]* [0,229 -1][0,28 -1] \*|([0-9]+[MIDNSHPX=])+ \*|=|[!-()+-<>-~][!-~]* [0,229 -1] [-229 +1,229 -1] \*|[A-Za-z=.]+ [!-~]+
+ 11 additional stuff including score and MD
+ */
+
+ /**
+ * creates a SAM line. If queryHeader==null, does not output the initial query token
+ *
+ * @param mode
+ * @param queryHeader
+ * @param queryStart
+ * @param queryStartBlastX
+ * @param queryEnd
+ * @param queryLength
+ * @param alignedQuery
+ * @param referenceHeader
+ * @param referenceStart
+ * @param referenceEnd
+ * @param alignedReference
+ * @param referenceLength
+ * @param bitScore
+ * @param rawScore
+ * @param expected
+ * @param percentIdentity
+ * @param frame
+ * @param softClipped
+ * @return
+ */
+ public static String createSAMLine(final BlastMode mode, final byte[] queryHeader, final byte[] querySequence, final int queryStart, final int queryStartBlastX, final int queryEnd, final int queryLength, final byte[] alignedQuery,
+ final byte[] referenceHeader, final int referenceStart, final int referenceEnd, final byte[] alignedReference, final int referenceLength,
+ final double bitScore, final int rawScore, final double expected, final float percentIdentity, int frame, final byte[] qualityValues, boolean softClipped) {
+
+ if (querySequence == null && softClipped)
+ softClipped = false;
+
+ final StringBuilder buffer = new StringBuilder();
+
+ // QNAME:
+ boolean first = true;
+ if (queryHeader != null) {
+ for (byte a : queryHeader) {
+ if (first && a == '>') {
+ first = false;
+ continue;
+ }
+ if (a == 0 || Character.isSpaceChar(a))
+ break;
+ buffer.append((char) a);
+ }
+ buffer.append('\t');
+ }
+
+ // FLAG
+ final boolean reverseComplemented = ((queryStart < queryEnd) != (referenceStart < referenceEnd));
+ final int queryOffset;
+
+ switch (mode) {
+ case BlastN:
+ if (reverseComplemented) {
+ queryOffset = queryLength - queryEnd;
+ buffer.append(0x10); // SEQ is reverse complemented
+ } else {
+ queryOffset = queryStart;
+ buffer.append(0);
+ }
+ break;
+ case BlastX:
+ if (reverseComplemented)
+ buffer.append(0x10); // SEQ is reverse complemented
+ else
+ buffer.append(0);
+ queryOffset = 0; // will explicitly save query start and query end
+ break;
+ default:
+ case BlastP:
+ queryOffset = queryStart;
+ buffer.append(0);
+ }
+ buffer.append('\t');
+
+ // RNAME:
+ first = true;
+ for (byte a : referenceHeader) {
+ if (first && a == '>') {
+ first = false;
+ continue;
+ }
+ if (a == 0 || Character.isSpaceChar(a))
+ break;
+ buffer.append((char) a);
+ }
+ buffer.append('\t');
+
+ // POS:
+ int pos = Math.min(referenceStart, referenceEnd);
+ buffer.append(pos);
+ buffer.append('\t');
+
+ // MAPQ
+ buffer.append("255"); // unknown
+ buffer.append('\t');
+
+ // CIGAR
+ appendCigar(alignedQuery, queryOffset, queryLength, alignedReference, reverseComplemented, softClipped, buffer);
+ buffer.append('\t');
+
+ // RNEXT
+ buffer.append("*"); // unknown
+ buffer.append('\t');
+
+ // PNEXT
+ buffer.append("0"); // unknown
+ buffer.append('\t');
+
+ // TLEN
+ buffer.append("0");
+ buffer.append('\t');
+
+ // SEQ
+ if (softClipped && querySequence != null) {
+ if (reverseComplemented) {
+ for (int i = queryLength - 1; i >= 0; i--) {
+ buffer.append((char) DNA5.getInstance().getBaseComplement(querySequence[i]));
+ }
+ } else {
+ for (int i = 0; i < queryLength; i++)
+ buffer.append((char) querySequence[i]);
+ }
+ } else {
+ if (reverseComplemented) {
+ for (int i = alignedQuery.length - 1; i >= 0; i--) {
+ byte a = alignedQuery[i];
+ if (a != '-')
+ buffer.append((char) DNA5.getInstance().getBaseComplement(a));
+ }
+ } else {
+ for (byte a : alignedQuery) {
+ if (a != '-')
+ buffer.append((char) a);
+ }
+ }
+ }
+ buffer.append('\t');
+
+ // QUAL
+ if (qualityValues == null)
+ buffer.append("*");
+ else {
+ if (softClipped) {
+ if (reverseComplemented) {
+ for (int i = queryLength - 1; i >= 0; i--)
+ buffer.append((char) qualityValues[i]);
+ } else {
+ for (int i = 0; i < queryLength; i++)
+ buffer.append((char) qualityValues[i]);
+ }
+ } else {
+ if (reverseComplemented) {
+ for (int i = queryStart; i < queryEnd; i++)
+ buffer.append((char) qualityValues[queryLength - (i + 1)]);
+ } else {
+ for (int i = queryStart; i < queryEnd; i++)
+ buffer.append((char) qualityValues[i]);
+ }
+ }
+ }
+ buffer.append('\t');
+
+ // optional stuff:
+ buffer.append(String.format("AS:i:%d\t", (int) Math.round(bitScore)));
+ buffer.append(String.format("NM:i:%d\t", computeEditDistance(alignedQuery, alignedReference)));
+ buffer.append(String.format("ZL:i:%d\t", referenceLength));
+ buffer.append(String.format("ZR:i:%d\t", rawScore));
+ buffer.append(String.format("ZE:f:%g\t", (float) expected));
+ buffer.append(String.format("ZI:i:%d\t", (int) Math.round(percentIdentity)));
+ if (mode == BlastMode.BlastX) {
+ buffer.append(String.format("ZF:i:%d\t", frame));
+ buffer.append(String.format("ZS:i:%d\t", queryStartBlastX));
+ }
+
+ appendMDString(alignedQuery, alignedReference, reverseComplemented, buffer);
+
+ return buffer.toString();
+ }
+
+ /**
+ * append the cigar string
+ *
+ * @param alignedQuery
+ * @param queryOffset
+ * @param queryLength
+ * @param alignedReference
+ * @param reverseComplemented
+ * @param softClipped
+ * @param buffer
+ */
+ private static void appendCigar(byte[] alignedQuery, int queryOffset, int queryLength, byte[] alignedReference, boolean reverseComplemented, boolean softClipped, StringBuilder buffer) {
+ int clip = (!reverseComplemented ? queryOffset : (queryLength - queryOffset - alignedQuery.length));
+ if (clip > 0) {
+ buffer.append(clip).append(softClipped ? "S" : "H");
+ }
+
+ if (reverseComplemented) {
+ char state = 'M'; // M in match, I insert, D deletion
+ int count = 0;
+ for (int i = alignedQuery.length - 1; i >= 0; i--) {
+ if (alignedQuery[i] == '-') {
+ if (state == 'D') {
+ count++;
+ } else if (count > 0) {
+ buffer.append(count).append(state);
+ state = 'D';
+ count = 1;
+ }
+ } else if (alignedReference[i] == '-') {
+ if (state == 'I') {
+ count++;
+ } else if (count > 0) {
+ buffer.append(count).append(state);
+ state = 'I';
+ count = 1;
+ }
+ } else { // match or mismatch
+ if (state == 'M') {
+ count++;
+ } else if (count > 0) {
+ buffer.append(count).append(state);
+ state = 'M';
+ count = 1;
+ }
+ }
+ }
+ if (count > 0) {
+ buffer.append(count).append(state);
+
+ }
+ } else {
+ char cigarState = 'M'; // M in match, D deletion, I insertion
+ int count = 0;
+ for (int i = 0; i < alignedQuery.length; i++) {
+ if (alignedQuery[i] == '-') {
+ if (cigarState == 'D') {
+ count++;
+ } else if (count > 0) {
+ buffer.append(count).append(cigarState);
+ cigarState = 'D';
+ count = 1;
+ }
+ } else if (alignedReference[i] == '-') {
+ if (cigarState == 'I') {
+ count++;
+ } else if (count > 0) {
+ buffer.append(count).append(cigarState);
+ cigarState = 'I';
+ count = 1;
+ }
+ } else { // match or mismatch
+ if (cigarState == 'M') {
+ count++;
+ } else if (count > 0) {
+ buffer.append(count).append(cigarState);
+ cigarState = 'M';
+ count = 1;
+ }
+ }
+ }
+ if (count > 0) {
+ buffer.append(count).append(cigarState);
+
+ }
+ }
+
+ clip = (reverseComplemented ? queryOffset : (queryLength - queryOffset - alignedQuery.length));
+ if (clip > 0) {
+ buffer.append(clip).append(softClipped ? "S" : "H");
+ }
+ }
+
+ /**
+ * append the MD string
+ *
+ * @param alignedQuery
+ * @param alignedReference
+ * @param reverseComplemented
+ * @param buffer
+ */
+ private static void appendMDString(final byte[] alignedQuery, final byte[] alignedReference, final boolean reverseComplemented, final StringBuilder buffer) {
+ buffer.append("MD:Z:");
+ if (reverseComplemented) {
+ int countMatches = 0;
+ boolean inDeletion = false;
+ for (int i = alignedQuery.length - 1; i >= 0; i--) {
+ if (alignedQuery[i] == '-') { // gap in query
+ if (countMatches > 0) {
+ buffer.append(countMatches);
+ countMatches = 0;
+ }
+ if (!inDeletion) {
+ buffer.append("^");
+ inDeletion = true;
+ }
+ buffer.append((char) (DNA5.getInstance().getBaseComplement(alignedReference[i])));
+ } else if (alignedReference[i] != '-') { // match or mismatch
+ if (alignedQuery[i] == alignedReference[i]) {
+ countMatches++;
+ } else {
+ if (inDeletion)
+ buffer.append(0);
+ if (countMatches > 0) {
+ buffer.append(countMatches);
+ countMatches = 0;
+ }
+ buffer.append((char) (DNA5.getInstance().getBaseComplement(alignedReference[i])));
+ }
+ if (inDeletion)
+ inDeletion = false;
+ }
+ // else alignedReference[i] == '-': this has no effect
+ }
+ if (countMatches > 0)
+ buffer.append(countMatches);
+ else if (inDeletion)
+ buffer.append(0);
+ } else {
+ int countMatches = 0;
+ boolean inDeletion = false;
+ for (int i = 0; i < alignedQuery.length; i++) {
+ if (alignedQuery[i] == '-') { // gap in query
+ if (countMatches > 0) {
+ buffer.append(countMatches);
+ countMatches = 0;
+ }
+ if (!inDeletion) {
+ buffer.append("^");
+ inDeletion = true;
+ }
+ buffer.append((char) alignedReference[i]);
+ } else if (alignedReference[i] != '-') { // match or mismatch
+ if (alignedQuery[i] == alignedReference[i]) {
+ countMatches++;
+ } else {
+ if (inDeletion)
+ buffer.append("0");
+ if (countMatches > 0) {
+ buffer.append(countMatches);
+ countMatches = 0;
+ }
+ buffer.append((char) alignedReference[i]);
+ }
+ if (inDeletion)
+ inDeletion = false;
+ }
+ // else alignedReference[i] == '-': this has no effect
+ }
+ if (countMatches > 0)
+ buffer.append(countMatches);
+ else if (inDeletion)
+ buffer.append(0);
+ }
+ }
+
+ /**
+ * compute edit distance from alignment
+ *
+ * @param alignedQuery
+ * @param alignedReference
+ * @return edit distance
+ */
+ private static int computeEditDistance(byte[] alignedQuery, byte[] alignedReference) {
+ int distance = 0;
+ for (int i = 0; i < alignedQuery.length; i++) {
+ if (alignedQuery[i] == '-' || alignedReference[i] == '-' || alignedQuery[i] != alignedReference[i])
+ distance++;
+ }
+ return distance;
+ }
+
+ /**
+ * gets the SAM header line
+ *
+ * @param mode
+ * @return SAM header line or null
+ */
+ public static String getSAMHeader(BlastMode mode, String commandLine) {
+ switch (mode) {
+ case BlastN:
+ return String.format(FILE_HEADER_BLASTN_TEMPLATE, (commandLine != null ? commandLine:""));
+ case BlastP:
+ return String.format(FILE_HEADER_BLASTP_TEMPLATE, (commandLine != null ? commandLine:""));
+ case BlastX:
+ return String.format(FILE_HEADER_BLASTX_TEMPLATE, (commandLine != null ? commandLine:""));
+ default:
+ return null;
+ }
+ }
+}
diff --git a/src/malt/io/xml/DatasetType.java b/src/malt/io/xml/DatasetType.java
new file mode 100644
index 0000000..406c925
--- /dev/null
+++ b/src/malt/io/xml/DatasetType.java
@@ -0,0 +1,75 @@
+/**
+ * DatasetType.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package malt.io.xml;
+
+import javax.xml.bind.annotation.XmlAccessType;
+import javax.xml.bind.annotation.XmlAccessorType;
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlType;
+
+
+/**
+ * <p>Java class for DatasetType complex type.
+ * <p/>
+ * <p>The following schema fragment specifies the expected content contained within this class.
+ * <p/>
+ * <pre>
+ * <complexType name="DatasetType">
+ * <complexContent>
+ * <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ * <sequence>
+ * <element ref="{}datasetName"/>
+ * </sequence>
+ * </restriction>
+ * </complexContent>
+ * </complexType>
+ * </pre>
+ */
+ at XmlAccessorType(XmlAccessType.FIELD)
+ at XmlType(name = "DatasetType", propOrder = {
+ "datasetName"
+})
+public class DatasetType {
+
+ @XmlElement(required = true)
+ protected String datasetName;
+
+ /**
+ * Gets the value of the datasetName property.
+ *
+ * @return possible object is
+ * {@link String }
+ */
+ public String getDatasetName() {
+ return datasetName;
+ }
+
+ /**
+ * Sets the value of the datasetName property.
+ *
+ * @param value allowed object is
+ * {@link String }
+ */
+ public void setDatasetName(String value) {
+ this.datasetName = value;
+ }
+
+}
diff --git a/src/malt/io/xml/GeneType.java b/src/malt/io/xml/GeneType.java
new file mode 100644
index 0000000..93e5264
--- /dev/null
+++ b/src/malt/io/xml/GeneType.java
@@ -0,0 +1,209 @@
+/**
+ * GeneType.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package malt.io.xml;
+
+import javax.xml.bind.annotation.*;
+import java.math.BigInteger;
+
+
+/**
+ * <p>Java class for GeneType complex type.
+ * <p/>
+ * <p>The following schema fragment specifies the expected content contained within this class.
+ * <p/>
+ * <pre>
+ * <complexType name="GeneType">
+ * <simpleContent>
+ * <extension base="<http://www.w3.org/2001/XMLSchema>string">
+ * <attribute name="gi" type="{http://www.w3.org/2001/XMLSchema}positiveInteger" />
+ * <attribute name="ref" type="{http://www.w3.org/2001/XMLSchema}string" />
+ * <attribute name="protein_id" type="{http://www.w3.org/2001/XMLSchema}string" />
+ * <attribute name="product" type="{http://www.w3.org/2001/XMLSchema}string" />
+ * <attribute name="kegg" type="{http://www.w3.org/2001/XMLSchema}string" />
+ * <attribute name="cog" type="{http://www.w3.org/2001/XMLSchema}string" />
+ * </extension>
+ * </simpleContent>
+ * </complexType>
+ * </pre>
+ */
+ at XmlAccessorType(XmlAccessType.FIELD)
+ at XmlType(name = "GeneType", propOrder = {
+ "value"
+})
+public class GeneType {
+
+ @XmlValue
+ protected String value;
+ @XmlAttribute
+ @XmlSchemaType(name = "positiveInteger")
+ protected BigInteger gi;
+ @XmlAttribute
+ protected String ref;
+ @XmlAttribute(name = "protein_id")
+ protected String proteinId;
+ @XmlAttribute
+ protected String product;
+ @XmlAttribute
+ protected String kegg;
+ @XmlAttribute
+ protected String cog;
+
+ /**
+ * Gets the value of the value property.
+ *
+ * @return possible object is
+ * {@link String }
+ */
+ public String getValue() {
+ return value;
+ }
+
+ /**
+ * Sets the value of the value property.
+ *
+ * @param value allowed object is
+ * {@link String }
+ */
+ public void setValue(String value) {
+ this.value = value;
+ }
+
+ /**
+ * Gets the value of the gi property.
+ *
+ * @return possible object is
+ * {@link BigInteger }
+ */
+ public BigInteger getGi() {
+ return gi;
+ }
+
+ /**
+ * Sets the value of the gi property.
+ *
+ * @param value allowed object is
+ * {@link BigInteger }
+ */
+ public void setGi(BigInteger value) {
+ this.gi = value;
+ }
+
+ /**
+ * Gets the value of the ref property.
+ *
+ * @return possible object is
+ * {@link String }
+ */
+ public String getRef() {
+ return ref;
+ }
+
+ /**
+ * Sets the value of the ref property.
+ *
+ * @param value allowed object is
+ * {@link String }
+ */
+ public void setRef(String value) {
+ this.ref = value;
+ }
+
+ /**
+ * Gets the value of the proteinId property.
+ *
+ * @return possible object is
+ * {@link String }
+ */
+ public String getProteinId() {
+ return proteinId;
+ }
+
+ /**
+ * Sets the value of the proteinId property.
+ *
+ * @param value allowed object is
+ * {@link String }
+ */
+ public void setProteinId(String value) {
+ this.proteinId = value;
+ }
+
+ /**
+ * Gets the value of the product property.
+ *
+ * @return possible object is
+ * {@link String }
+ */
+ public String getProduct() {
+ return product;
+ }
+
+ /**
+ * Sets the value of the product property.
+ *
+ * @param value allowed object is
+ * {@link String }
+ */
+ public void setProduct(String value) {
+ this.product = value;
+ }
+
+ /**
+ * Gets the value of the kegg property.
+ *
+ * @return possible object is
+ * {@link String }
+ */
+ public String getKegg() {
+ return kegg;
+ }
+
+ /**
+ * Sets the value of the kegg property.
+ *
+ * @param value allowed object is
+ * {@link String }
+ */
+ public void setKegg(String value) {
+ this.kegg = value;
+ }
+
+ /**
+ * Gets the value of the cog property.
+ *
+ * @return possible object is
+ * {@link String }
+ */
+ public String getCog() {
+ return cog;
+ }
+
+ /**
+ * Sets the value of the cog property.
+ *
+ * @param value allowed object is
+ * {@link String }
+ */
+ public void setCog(String value) {
+ this.cog = value;
+ }
+
+}
diff --git a/src/malt/io/xml/GenesType.java b/src/malt/io/xml/GenesType.java
new file mode 100644
index 0000000..3ec7318
--- /dev/null
+++ b/src/malt/io/xml/GenesType.java
@@ -0,0 +1,82 @@
+/**
+ * GenesType.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package malt.io.xml;
+
+import javax.xml.bind.annotation.XmlAccessType;
+import javax.xml.bind.annotation.XmlAccessorType;
+import javax.xml.bind.annotation.XmlType;
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * <p>Java class for GenesType complex type.
+ * <p/>
+ * <p>The following schema fragment specifies the expected content contained within this class.
+ * <p/>
+ * <pre>
+ * <complexType name="GenesType">
+ * <complexContent>
+ * <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ * <sequence maxOccurs="unbounded" minOccurs="0">
+ * <element name="gene" type="{}GeneType"/>
+ * </sequence>
+ * </restriction>
+ * </complexContent>
+ * </complexType>
+ * </pre>
+ */
+ at XmlAccessorType(XmlAccessType.FIELD)
+ at XmlType(name = "GenesType", propOrder = {
+ "gene"
+})
+public class GenesType {
+
+ protected List<GeneType> gene;
+
+ /**
+ * Gets the value of the gene property.
+ * <p/>
+ * <p/>
+ * This accessor method returns a reference to the live list,
+ * not a snapshot. Therefore any modification you make to the
+ * returned list will be present inside the JAXB object.
+ * This is why there is not a <CODE>set</CODE> method for the gene property.
+ * <p/>
+ * <p/>
+ * For example, to add a new item, do as follows:
+ * <pre>
+ * getGene().add(newItem);
+ * </pre>
+ * <p/>
+ * <p/>
+ * <p/>
+ * Objects of the following type(s) are allowed in the list
+ * {@link GeneType }
+ */
+ public List<GeneType> getGene() {
+ if (gene == null) {
+ gene = new ArrayList<>();
+ }
+ return this.gene;
+ }
+
+}
diff --git a/src/malt/io/xml/ObjectFactory.java b/src/malt/io/xml/ObjectFactory.java
new file mode 100644
index 0000000..96e9ab9
--- /dev/null
+++ b/src/malt/io/xml/ObjectFactory.java
@@ -0,0 +1,169 @@
+/**
+ * ObjectFactory.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package malt.io.xml;
+
+import javax.xml.bind.JAXBElement;
+import javax.xml.bind.annotation.XmlElementDecl;
+import javax.xml.bind.annotation.XmlRegistry;
+import javax.xml.namespace.QName;
+
+
+/**
+ * This object contains factory methods for each
+ * Java content interface and Java element interface
+ * generated in the malan.io.xml package.
+ * <p>An ObjectFactory allows you to programatically
+ * construct new instances of the Java representation
+ * for XML content. The Java representation of XML
+ * content can consist of schema derived interfaces
+ * and classes representing the binding of schema
+ * type definitions, element declarations and model
+ * groups. Factory methods for each of these are
+ * provided in this class.
+ */
+ at XmlRegistry
+public class ObjectFactory {
+
+ private final static QName _Species_QNAME = new QName("", "species");
+ private final static QName _Strain_QNAME = new QName("", "strain");
+ private final static QName _DatasetName_QNAME = new QName("", "datasetName");
+ private final static QName _OrganismsReport_QNAME = new QName("", "organismsReport");
+ private final static QName _OrganismName_QNAME = new QName("", "organismName");
+ private final static QName _Genus_QNAME = new QName("", "genus");
+
+ /**
+ * Create a new ObjectFactory that can be used to create new instances of schema derived classes for package: malan.io.xml
+ */
+ public ObjectFactory() {
+ }
+
+ /**
+ * Create an instance of {@link GeneType }
+ */
+ public GeneType createGeneType() {
+ return new GeneType();
+ }
+
+ /**
+ * Create an instance of {@link ReadsType }
+ */
+ public ReadsType createReadsType() {
+ return new ReadsType();
+ }
+
+ /**
+ * Create an instance of {@link RelativeAmount }
+ */
+ public RelativeAmount createRelativeAmount() {
+ return new RelativeAmount();
+ }
+
+ /**
+ * Create an instance of {@link DatasetType }
+ */
+ public DatasetType createDatasetType() {
+ return new DatasetType();
+ }
+
+ /**
+ * Create an instance of {@link ReportType }
+ */
+ public ReportType createReportType() {
+ return new ReportType();
+ }
+
+ /**
+ * Create an instance of {@link GenesType }
+ */
+ public GenesType createGenesType() {
+ return new GenesType();
+ }
+
+ /**
+ * Create an instance of {@link OrganismType }
+ */
+ public OrganismType createOrganismType() {
+ return new OrganismType();
+ }
+
+ /**
+ * Create an instance of {@link Taxonomy }
+ */
+ public Taxonomy createTaxonomy() {
+ return new Taxonomy();
+ }
+
+ /**
+ * Create an instance of {@link OrganismsType }
+ */
+ public OrganismsType createOrganismsType() {
+ return new OrganismsType();
+ }
+
+ /**
+ * Create an instance of {@link JAXBElement }{@code <}{@link String }{@code >}}
+ */
+ @XmlElementDecl(namespace = "", name = "species")
+ public JAXBElement<String> createSpecies(String value) {
+ return new JAXBElement<>(_Species_QNAME, String.class, null, value);
+ }
+
+ /**
+ * Create an instance of {@link JAXBElement }{@code <}{@link String }{@code >}}
+ */
+ @XmlElementDecl(namespace = "", name = "strain")
+ public JAXBElement<String> createStrain(String value) {
+ return new JAXBElement<>(_Strain_QNAME, String.class, null, value);
+ }
+
+ /**
+ * Create an instance of {@link JAXBElement }{@code <}{@link String }{@code >}}
+ */
+ @XmlElementDecl(namespace = "", name = "datasetName")
+ public JAXBElement<String> createDatasetName(String value) {
+ return new JAXBElement<>(_DatasetName_QNAME, String.class, null, value);
+ }
+
+ /**
+ * Create an instance of {@link JAXBElement }{@code <}{@link ReportType }{@code >}}
+ */
+ @XmlElementDecl(namespace = "", name = "organismsReport")
+ public JAXBElement<ReportType> createOrganismsReport(ReportType value) {
+ return new JAXBElement<>(_OrganismsReport_QNAME, ReportType.class, null, value);
+ }
+
+ /**
+ * Create an instance of {@link JAXBElement }{@code <}{@link String }{@code >}}
+ */
+ @XmlElementDecl(namespace = "", name = "organismName")
+ public JAXBElement<String> createOrganismName(String value) {
+ return new JAXBElement<>(_OrganismName_QNAME, String.class, null, value);
+ }
+
+ /**
+ * Create an instance of {@link JAXBElement }{@code <}{@link String }{@code >}}
+ */
+ @XmlElementDecl(namespace = "", name = "genus")
+ public JAXBElement<String> createGenus(String value) {
+ return new JAXBElement<>(_Genus_QNAME, String.class, null, value);
+ }
+
+}
diff --git a/src/malt/io/xml/OrganismType.java b/src/malt/io/xml/OrganismType.java
new file mode 100644
index 0000000..f8785ad
--- /dev/null
+++ b/src/malt/io/xml/OrganismType.java
@@ -0,0 +1,237 @@
+/**
+ * OrganismType.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package malt.io.xml;
+
+import javax.xml.bind.annotation.XmlAccessType;
+import javax.xml.bind.annotation.XmlAccessorType;
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlType;
+
+
+/**
+ * <p>Java class for OrganismType complex type.
+ * <p/>
+ * <p>The following schema fragment specifies the expected content contained within this class.
+ * <p/>
+ * <pre>
+ * <complexType name="OrganismType">
+ * <complexContent>
+ * <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ * <sequence>
+ * <element ref="{}relativeAmount"/>
+ * <element ref="{}taxonomy"/>
+ * <element ref="{}organismName" minOccurs="0"/>
+ * <element ref="{}genus" minOccurs="0"/>
+ * <element ref="{}species" minOccurs="0"/>
+ * <element ref="{}strain" minOccurs="0"/>
+ * <element name="genes" type="{}GenesType" minOccurs="0"/>
+ * <element name="reads" type="{}ReadsType" minOccurs="0"/>
+ * </sequence>
+ * </restriction>
+ * </complexContent>
+ * </complexType>
+ * </pre>
+ */
+ at XmlAccessorType(XmlAccessType.FIELD)
+ at XmlType(name = "OrganismType", propOrder = {
+ "relativeAmount",
+ "taxonomy",
+ "organismName",
+ "genus",
+ "species",
+ "strain",
+ "genes",
+ "reads"
+})
+public class OrganismType {
+
+ @XmlElement(required = true)
+ protected RelativeAmount relativeAmount;
+ @XmlElement(required = true)
+ protected Taxonomy taxonomy;
+ protected String organismName;
+ protected String genus;
+ protected String species;
+ protected String strain;
+ protected GenesType genes;
+ protected ReadsType reads;
+
+ /**
+ * Gets the value of the relativeAmount property.
+ *
+ * @return possible object is
+ * {@link RelativeAmount }
+ */
+ public RelativeAmount getRelativeAmount() {
+ return relativeAmount;
+ }
+
+ /**
+ * Sets the value of the relativeAmount property.
+ *
+ * @param value allowed object is
+ * {@link RelativeAmount }
+ */
+ public void setRelativeAmount(RelativeAmount value) {
+ this.relativeAmount = value;
+ }
+
+ /**
+ * Gets the value of the taxonomy property.
+ *
+ * @return possible object is
+ * {@link Taxonomy }
+ */
+ public Taxonomy getTaxonomy() {
+ return taxonomy;
+ }
+
+ /**
+ * Sets the value of the taxonomy property.
+ *
+ * @param value allowed object is
+ * {@link Taxonomy }
+ */
+ public void setTaxonomy(Taxonomy value) {
+ this.taxonomy = value;
+ }
+
+ /**
+ * Gets the value of the organismName property.
+ *
+ * @return possible object is
+ * {@link String }
+ */
+ public String getOrganismName() {
+ return organismName;
+ }
+
+ /**
+ * Sets the value of the organismName property.
+ *
+ * @param value allowed object is
+ * {@link String }
+ */
+ public void setOrganismName(String value) {
+ this.organismName = value;
+ }
+
+ /**
+ * Gets the value of the genus property.
+ *
+ * @return possible object is
+ * {@link String }
+ */
+ public String getGenus() {
+ return genus;
+ }
+
+ /**
+ * Sets the value of the genus property.
+ *
+ * @param value allowed object is
+ * {@link String }
+ */
+ public void setGenus(String value) {
+ this.genus = value;
+ }
+
+ /**
+ * Gets the value of the species property.
+ *
+ * @return possible object is
+ * {@link String }
+ */
+ public String getSpecies() {
+ return species;
+ }
+
+ /**
+ * Sets the value of the species property.
+ *
+ * @param value allowed object is
+ * {@link String }
+ */
+ public void setSpecies(String value) {
+ this.species = value;
+ }
+
+ /**
+ * Gets the value of the strain property.
+ *
+ * @return possible object is
+ * {@link String }
+ */
+ public String getStrain() {
+ return strain;
+ }
+
+ /**
+ * Sets the value of the strain property.
+ *
+ * @param value allowed object is
+ * {@link String }
+ */
+ public void setStrain(String value) {
+ this.strain = value;
+ }
+
+ /**
+ * Gets the value of the genes property.
+ *
+ * @return possible object is
+ * {@link GenesType }
+ */
+ public GenesType getGenes() {
+ return genes;
+ }
+
+ /**
+ * Sets the value of the genes property.
+ *
+ * @param value allowed object is
+ * {@link GenesType }
+ */
+ public void setGenes(GenesType value) {
+ this.genes = value;
+ }
+
+ /**
+ * Gets the value of the reads property.
+ *
+ * @return possible object is
+ * {@link ReadsType }
+ */
+ public ReadsType getReads() {
+ return reads;
+ }
+
+ /**
+ * Sets the value of the reads property.
+ *
+ * @param value allowed object is
+ * {@link ReadsType }
+ */
+ public void setReads(ReadsType value) {
+ this.reads = value;
+ }
+
+}
diff --git a/src/malt/io/xml/OrganismsType.java b/src/malt/io/xml/OrganismsType.java
new file mode 100644
index 0000000..3230ad6
--- /dev/null
+++ b/src/malt/io/xml/OrganismsType.java
@@ -0,0 +1,84 @@
+/**
+ * OrganismsType.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package malt.io.xml;
+
+import javax.xml.bind.annotation.XmlAccessType;
+import javax.xml.bind.annotation.XmlAccessorType;
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlType;
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * <p>Java class for OrganismsType complex type.
+ * <p/>
+ * <p>The following schema fragment specifies the expected content contained within this class.
+ * <p/>
+ * <pre>
+ * <complexType name="OrganismsType">
+ * <complexContent>
+ * <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ * <sequence maxOccurs="unbounded">
+ * <element name="organism" type="{}OrganismType"/>
+ * </sequence>
+ * </restriction>
+ * </complexContent>
+ * </complexType>
+ * </pre>
+ */
+ at XmlAccessorType(XmlAccessType.FIELD)
+ at XmlType(name = "OrganismsType", propOrder = {
+ "organism"
+})
+public class OrganismsType {
+
+ @XmlElement(required = true)
+ protected List<OrganismType> organism;
+
+ /**
+ * Gets the value of the organism property.
+ * <p/>
+ * <p/>
+ * This accessor method returns a reference to the live list,
+ * not a snapshot. Therefore any modification you make to the
+ * returned list will be present inside the JAXB object.
+ * This is why there is not a <CODE>set</CODE> method for the organism property.
+ * <p/>
+ * <p/>
+ * For example, to add a new item, do as follows:
+ * <pre>
+ * getOrganism().add(newItem);
+ * </pre>
+ * <p/>
+ * <p/>
+ * <p/>
+ * Objects of the following type(s) are allowed in the list
+ * {@link OrganismType }
+ */
+ public List<OrganismType> getOrganism() {
+ if (organism == null) {
+ organism = new ArrayList<>();
+ }
+ return this.organism;
+ }
+
+}
diff --git a/src/malt/io/xml/OutputSchema.xsd b/src/malt/io/xml/OutputSchema.xsd
new file mode 100644
index 0000000..661010d
--- /dev/null
+++ b/src/malt/io/xml/OutputSchema.xsd
@@ -0,0 +1,103 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
+ <!-- enumeration lists -->
+
+ <!-- definition of simple elements -->
+
+ <xs:element name="datasetName" type="xs:string"/>
+
+ <xs:element name="taxonomy">
+ <xs:complexType>
+ <xs:simpleContent>
+ <xs:extension base="xs:string">
+ <xs:attribute name="taxon_id" type="xs:positiveInteger"/>
+ </xs:extension>
+ </xs:simpleContent>
+ </xs:complexType>
+ </xs:element>
+
+
+ <!-- Organisms list fields -->
+
+ <xs:element name="relativeAmount">
+ <xs:complexType>
+ <xs:simpleContent>
+ <xs:extension base="xs:decimal">
+ <xs:attribute name="count" type="xs:positiveInteger"/>
+ </xs:extension>
+ </xs:simpleContent>
+ </xs:complexType>
+ </xs:element>
+
+ <xs:element name="genus" type="xs:string"/>
+ <xs:element name="species" type="xs:string"/>
+ <xs:element name="strain" type="xs:string"/>
+ <xs:element name="organismName" type="xs:string"/>
+
+
+ <!-- Dataset -->
+
+ <xs:complexType name="DatasetType">
+ <xs:sequence>
+ <xs:element ref="datasetName"/>
+ </xs:sequence>
+ </xs:complexType>
+
+ <!-- Reads -->
+
+ <xs:complexType name="ReadsType">
+ <xs:sequence minOccurs="1" maxOccurs="unbounded">
+ <xs:element name="sequence" type="xs:string"/>
+ </xs:sequence>
+ </xs:complexType>
+
+ <!-- Genes -->
+
+ <xs:complexType name="GeneType">
+ <xs:simpleContent>
+ <xs:extension base="xs:string">
+ <xs:attribute name="gi" type="xs:positiveInteger"/>
+ <xs:attribute name="ref" type="xs:string"/>
+ <xs:attribute name="protein_id" type="xs:string"/>
+ <xs:attribute name="product" type="xs:string"/>
+ <xs:attribute name="kegg" type="xs:string"/>
+ <xs:attribute name="eGGNOG" type="xs:string"/>
+ </xs:extension>
+ </xs:simpleContent>
+ </xs:complexType>
+
+ <xs:complexType name="GenesType">
+ <xs:sequence minOccurs="0" maxOccurs="unbounded">
+ <xs:element name="gene" type="GeneType"/>
+ </xs:sequence>
+ </xs:complexType>
+
+ <!-- Organisms list -->
+
+ <xs:complexType name="OrganismType">
+ <xs:sequence>
+ <xs:element ref="relativeAmount"/>
+ <xs:element ref="taxonomy"/>
+ <xs:element ref="organismName" minOccurs="0"/>
+ <xs:element ref="genus" minOccurs="0" maxOccurs="1"/>
+ <xs:element ref="species" minOccurs="0" maxOccurs="1"/>
+ <xs:element ref="strain" minOccurs="0" maxOccurs="1"/>
+ <xs:element name="genes" type="GenesType" minOccurs="0"/>
+ <xs:element name="reads" type="ReadsType" minOccurs="0"/>
+ </xs:sequence>
+ </xs:complexType>
+
+ <xs:complexType name="OrganismsType">
+ <xs:sequence maxOccurs="unbounded">
+ <xs:element name="organism" type="OrganismType"/>
+ </xs:sequence>
+ </xs:complexType>
+
+ <xs:complexType name="ReportType">
+ <xs:sequence>
+ <xs:element name="dataset" type="DatasetType"/>
+ <xs:element name="organisms" type="OrganismsType"/>
+ </xs:sequence>
+ </xs:complexType>
+ <xs:element name="organismsReport" type="ReportType"/>
+</xs:schema>
\ No newline at end of file
diff --git a/src/malt/io/xml/ReadsType.java b/src/malt/io/xml/ReadsType.java
new file mode 100644
index 0000000..2f9b075
--- /dev/null
+++ b/src/malt/io/xml/ReadsType.java
@@ -0,0 +1,84 @@
+/**
+ * ReadsType.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package malt.io.xml;
+
+import javax.xml.bind.annotation.XmlAccessType;
+import javax.xml.bind.annotation.XmlAccessorType;
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlType;
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * <p>Java class for ReadsType complex type.
+ * <p/>
+ * <p>The following schema fragment specifies the expected content contained within this class.
+ * <p/>
+ * <pre>
+ * <complexType name="ReadsType">
+ * <complexContent>
+ * <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ * <sequence maxOccurs="unbounded">
+ * <element name="sequence" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ * </sequence>
+ * </restriction>
+ * </complexContent>
+ * </complexType>
+ * </pre>
+ */
+ at XmlAccessorType(XmlAccessType.FIELD)
+ at XmlType(name = "ReadsType", propOrder = {
+ "sequence"
+})
+public class ReadsType {
+
+ @XmlElement(required = true)
+ protected List<String> sequence;
+
+ /**
+ * Gets the value of the sequence property.
+ * <p/>
+ * <p/>
+ * This accessor method returns a reference to the live list,
+ * not a snapshot. Therefore any modification you make to the
+ * returned list will be present inside the JAXB object.
+ * This is why there is not a <CODE>set</CODE> method for the sequence property.
+ * <p/>
+ * <p/>
+ * For example, to add a new item, do as follows:
+ * <pre>
+ * getSequence().add(newItem);
+ * </pre>
+ * <p/>
+ * <p/>
+ * <p/>
+ * Objects of the following type(s) are allowed in the list
+ * {@link String }
+ */
+ public List<String> getSequence() {
+ if (sequence == null) {
+ sequence = new ArrayList<>();
+ }
+ return this.sequence;
+ }
+
+}
diff --git a/src/malt/io/xml/RelativeAmount.java b/src/malt/io/xml/RelativeAmount.java
new file mode 100644
index 0000000..af16fe9
--- /dev/null
+++ b/src/malt/io/xml/RelativeAmount.java
@@ -0,0 +1,96 @@
+/**
+ * RelativeAmount.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package malt.io.xml;
+
+import javax.xml.bind.annotation.*;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+
+
+/**
+ * <p>Java class for anonymous complex type.
+ * <p/>
+ * <p>The following schema fragment specifies the expected content contained within this class.
+ * <p/>
+ * <pre>
+ * <complexType>
+ * <simpleContent>
+ * <extension base="<http://www.w3.org/2001/XMLSchema>decimal">
+ * <attribute name="count" type="{http://www.w3.org/2001/XMLSchema}positiveInteger" />
+ * </extension>
+ * </simpleContent>
+ * </complexType>
+ * </pre>
+ */
+ at XmlAccessorType(XmlAccessType.FIELD)
+ at XmlType(name = "", propOrder = {
+ "value"
+})
+ at XmlRootElement(name = "relativeAmount")
+public class RelativeAmount {
+
+ @XmlValue
+ protected BigDecimal value;
+ @XmlAttribute
+ @XmlSchemaType(name = "positiveInteger")
+ protected BigInteger count;
+
+ /**
+ * Gets the value of the value property.
+ *
+ * @return possible object is
+ * {@link BigDecimal }
+ */
+ public BigDecimal getValue() {
+ return value;
+ }
+
+ /**
+ * Sets the value of the value property.
+ *
+ * @param value allowed object is
+ * {@link BigDecimal }
+ */
+ public void setValue(BigDecimal value) {
+ this.value = value;
+ }
+
+ /**
+ * Gets the value of the count property.
+ *
+ * @return possible object is
+ * {@link BigInteger }
+ */
+ public BigInteger getCount() {
+ return count;
+ }
+
+ /**
+ * Sets the value of the count property.
+ *
+ * @param value allowed object is
+ * {@link BigInteger }
+ */
+ public void setCount(BigInteger value) {
+ this.count = value;
+ }
+
+}
diff --git a/src/malt/io/xml/ReportType.java b/src/malt/io/xml/ReportType.java
new file mode 100644
index 0000000..83f714f
--- /dev/null
+++ b/src/malt/io/xml/ReportType.java
@@ -0,0 +1,98 @@
+/**
+ * ReportType.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package malt.io.xml;
+
+import javax.xml.bind.annotation.*;
+
+
+/**
+ * <p>Java class for ReportType complex type.
+ * <p/>
+ * <p>The following schema fragment specifies the expected content contained within this class.
+ * <p/>
+ * <pre>
+ * <complexType name="ReportType">
+ * <complexContent>
+ * <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ * <sequence>
+ * <element name="dataset" type="{}DatasetType"/>
+ * <element name="organisms" type="{}OrganismsType"/>
+ * </sequence>
+ * </restriction>
+ * </complexContent>
+ * </complexType>
+ * </pre>
+ */
+
+ at XmlAccessorType(XmlAccessType.FIELD)
+ at XmlType(name = "ReportType", propOrder = {
+ "dataset",
+ "organisms"
+})
+ at XmlRootElement(name = "organismsReport")
+public class ReportType {
+
+ @XmlElement(required = true)
+ protected DatasetType dataset;
+ @XmlElement(required = true)
+ protected OrganismsType organisms;
+
+ /**
+ * Gets the value of the dataset property.
+ *
+ * @return possible object is
+ * {@link DatasetType }
+ */
+ public DatasetType getDataset() {
+ return dataset;
+ }
+
+ /**
+ * Sets the value of the dataset property.
+ *
+ * @param value allowed object is
+ * {@link DatasetType }
+ */
+ public void setDataset(DatasetType value) {
+ this.dataset = value;
+ }
+
+ /**
+ * Gets the value of the organisms property.
+ *
+ * @return possible object is
+ * {@link OrganismsType }
+ */
+ public OrganismsType getOrganisms() {
+ return organisms;
+ }
+
+ /**
+ * Sets the value of the organisms property.
+ *
+ * @param value allowed object is
+ * {@link OrganismsType }
+ */
+ public void setOrganisms(OrganismsType value) {
+ this.organisms = value;
+ }
+
+}
diff --git a/src/malt/io/xml/Taxonomy.java b/src/malt/io/xml/Taxonomy.java
new file mode 100644
index 0000000..735acb7
--- /dev/null
+++ b/src/malt/io/xml/Taxonomy.java
@@ -0,0 +1,95 @@
+/**
+ * Taxonomy.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package malt.io.xml;
+
+import javax.xml.bind.annotation.*;
+import java.math.BigInteger;
+
+
+/**
+ * <p>Java class for anonymous complex type.
+ * <p/>
+ * <p>The following schema fragment specifies the expected content contained within this class.
+ * <p/>
+ * <pre>
+ * <complexType>
+ * <simpleContent>
+ * <extension base="<http://www.w3.org/2001/XMLSchema>string">
+ * <attribute name="taxon_id" type="{http://www.w3.org/2001/XMLSchema}positiveInteger" />
+ * </extension>
+ * </simpleContent>
+ * </complexType>
+ * </pre>
+ */
+ at XmlAccessorType(XmlAccessType.FIELD)
+ at XmlType(name = "", propOrder = {
+ "value"
+})
+ at XmlRootElement(name = "taxonomy")
+public class Taxonomy {
+
+ @XmlValue
+ protected String value;
+ @XmlAttribute(name = "taxon_id")
+ @XmlSchemaType(name = "positiveInteger")
+ protected BigInteger taxonId;
+
+ /**
+ * Gets the value of the value property.
+ *
+ * @return possible object is
+ * {@link String }
+ */
+ public String getValue() {
+ return value;
+ }
+
+ /**
+ * Sets the value of the value property.
+ *
+ * @param value allowed object is
+ * {@link String }
+ */
+ public void setValue(String value) {
+ this.value = value;
+ }
+
+ /**
+ * Gets the value of the taxonId property.
+ *
+ * @return possible object is
+ * {@link BigInteger }
+ */
+ public BigInteger getTaxonId() {
+ return taxonId;
+ }
+
+ /**
+ * Sets the value of the taxonId property.
+ *
+ * @param value allowed object is
+ * {@link BigInteger }
+ */
+ public void setTaxonId(BigInteger value) {
+ this.taxonId = value;
+ }
+
+}
diff --git a/src/malt/mapping/Mapping.java b/src/malt/mapping/Mapping.java
new file mode 100644
index 0000000..0b30730
--- /dev/null
+++ b/src/malt/mapping/Mapping.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.mapping;
+
+import jloda.util.Basic;
+import jloda.util.CanceledException;
+import jloda.util.ProgressListener;
+import jloda.util.ProgressPercentage;
+import malt.data.ISequenceAccessor;
+import malt.data.RefIndex2ClassId;
+import megan.classification.Classification;
+import megan.classification.IdParser;
+
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * Maintains mapping from Reference indices to classification
+ * Daniel Huson, 2.2016
+ */
+public class Mapping extends RefIndex2ClassId {
+ private final static String version = "V1.1";
+ private final String fName;
+
+ /**
+ * construct a table
+ *
+ * @param maxIndex
+ */
+ public Mapping(String fName, int maxIndex) {
+ super(maxIndex);
+ this.fName = fName;
+ }
+
+ /**
+ * compute the mapping for the given reference database
+ *
+ * @param referencesDB
+ * @param progress
+ */
+ public static Mapping create(String fName, ISequenceAccessor referencesDB, IdParser classificationMapper, ProgressListener progress) throws CanceledException, IOException {
+ final Mapping mapping = new Mapping(fName, referencesDB.getNumberOfSequences());
+ final String tag = Classification.createShortTag(fName);
+
+ progress.setMaximum(referencesDB.getNumberOfSequences());
+ progress.setProgress(0);
+ for (int i = 0; i < referencesDB.getNumberOfSequences(); i++) {
+ String header = Basic.toString(referencesDB.getHeader(i));
+ Integer classId = classificationMapper.getIdFromHeaderLine(header);
+ if (classId != 0) {
+ mapping.put(i, classId);
+ referencesDB.extendHeader(i, tag, classId);
+ }
+ progress.incrementProgress();
+ }
+ if (progress instanceof ProgressPercentage)
+ progress.close();
+
+ return mapping;
+ }
+
+ /**
+ * save to a stream and then close the stream
+ *
+ * @param file
+ * @throws IOException
+ */
+ public void save(File file) throws IOException, CanceledException {
+ super.save(file, makeMagicNumber(fName));
+ }
+
+ /**
+ * construct from an existing file
+ *
+ * @param file
+ * @throws IOException
+ * @throws CanceledException
+ */
+ public Mapping(String fName, File file) throws IOException, CanceledException {
+ super(file, makeMagicNumber(fName));
+ this.fName = fName;
+ }
+
+ private static byte[] makeMagicNumber(String fName) {
+ return ("MA" + fName + version).getBytes();
+ }
+
+}
diff --git a/src/malt/mapping/MappingManager.java b/src/malt/mapping/MappingManager.java
new file mode 100644
index 0000000..7ebdeca
--- /dev/null
+++ b/src/malt/mapping/MappingManager.java
@@ -0,0 +1,115 @@
+/**
+ * MappingHelper.java
+ * Copyright (C) 2015 Daniel H. Huson
+ * <p>
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ * <p>
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * <p>
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ * <p>
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.mapping;
+
+import jloda.util.Basic;
+import jloda.util.CanceledException;
+import megan.classification.Classification;
+import megan.classification.ClassificationManager;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+
+/**
+ * manages MALT mapping files
+ * Daniel Huson, 2.2016
+ */
+public class MappingManager {
+ private static String[] cNames;
+ private static int taxonomyIndex;
+ private static Mapping[] mappings;
+
+ /**
+ * load all mappings
+ * @param cNames
+ * @param indexDirectory
+ * @throws IOException
+ * @throws CanceledException
+ */
+ public static void loadMappings(String[] cNames, String indexDirectory) throws IOException, CanceledException {
+ MappingManager.cNames = cNames;
+ mappings = new Mapping[cNames.length];
+ taxonomyIndex = -1;
+ for (int i = 0; i < cNames.length; i++) {
+ String cName = cNames[i];
+ if (cName.equals(Classification.Taxonomy))
+ taxonomyIndex = i;
+ String fileName = cName.toLowerCase() + ".idx";
+ ClassificationManager.ensureTreeIsLoaded(cName);
+ mappings[i] = new Mapping(cName, new File(indexDirectory, fileName));
+ }
+ }
+
+ /**
+ * get all names of loaded mappings
+ * @return names
+ */
+ public static String[] getCNames() {
+ return cNames;
+ }
+
+ /**
+ * gets the appopriate mapping for the given fID
+ *
+ * @param fID
+ * @return mapping
+ */
+ public static Mapping getMapping(int fID) {
+ return mappings[fID];
+ }
+
+ /**
+ * gets the taxonomy mapping
+ * @return taxonomy mapping
+ */
+ public static Mapping getTaxonomyMapping() {
+ if (taxonomyIndex >= 0)
+ return getMapping(taxonomyIndex);
+ else
+ return null;
+ }
+
+ /**
+ * determine all available classifications
+ * @param indexDirectory
+ * @return list of available classifications
+ */
+ public static String[] determineAvailableMappings(String indexDirectory) {
+ File[] files = (new File(indexDirectory)).listFiles();
+ if (files != null) {
+
+ ArrayList<String> cNames = new ArrayList<>(files.length);
+ for (File file : files) {
+ String name = file.getName();
+ if (name.endsWith(".tre")) {
+ name = Basic.replaceFileSuffix(name, "");
+ for (String cName : ClassificationManager.getAllSupportedClassifications()) {
+ if (cName.equalsIgnoreCase(name))
+ cNames.add(cName);
+ }
+ }
+ }
+ return cNames.toArray(new String[cNames.size()]);
+ } else
+ return new String[0];
+
+ }
+}
diff --git a/src/malt/sequence/Alphabet.java b/src/malt/sequence/Alphabet.java
new file mode 100644
index 0000000..7ac37e0
--- /dev/null
+++ b/src/malt/sequence/Alphabet.java
@@ -0,0 +1,189 @@
+/**
+ * Alphabet.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.sequence;
+
+import malt.data.INormalizer;
+
+/**
+ * Alphabet base class
+ * <p/>
+ * Created by huson on 9/30/14.
+ */
+public class Alphabet implements INormalizer {
+ protected final byte alphabetSize;
+ protected final long[] letter2code;
+ protected final byte[] letter2normalized;
+ protected final byte[] code2letter;
+ protected final int bitsPerLetter;
+ protected final int unusedBits;
+ protected final int lettersPerWord;
+ protected final long letterMask;
+ protected final byte undefinedLetterCode;
+ protected final String definitionString;
+
+ /**
+ * constructor
+ *
+ * @param definitionString
+ * @param undefinedLetter
+ */
+ public Alphabet(String definitionString, byte undefinedLetter) {
+ boolean isUndefinedContained = (definitionString.indexOf(undefinedLetter) != -1);
+ this.definitionString = definitionString.replaceAll("\\[", "").replaceAll("\\]", "").replaceAll(" ", " ");
+ String[] letterGroups = this.definitionString.split(" ");
+ alphabetSize = (byte) (letterGroups.length + (isUndefinedContained ? 0 : 1));
+
+ {
+ int bits = 1;
+ while (true) {
+ if (Math.pow(2, bits) > alphabetSize) {
+ break;
+ }
+ bits++;
+ }
+ bitsPerLetter = bits;
+ }
+ letterMask = (1l << bitsPerLetter) - 1;
+ lettersPerWord = 64 / bitsPerLetter;
+ unusedBits = 64 - lettersPerWord * bitsPerLetter;
+
+ System.err.println("Alphabet: " + definitionString + " bits: " + bitsPerLetter);
+
+ code2letter = new byte[alphabetSize + 1];
+
+ undefinedLetterCode = alphabetSize;
+ letter2code = new long[127];
+ letter2normalized = new byte[127];
+
+ for (int i = 0; i < 127; i++) {
+ letter2code[i] = undefinedLetterCode;
+ letter2normalized[i] = undefinedLetter;
+ }
+ code2letter[undefinedLetterCode] = undefinedLetter;
+
+ int bits = 1;
+ for (String letterGroup : letterGroups) {
+ for (int j = 0; j < letterGroup.length(); j++) {
+ int letter = Character.toLowerCase(letterGroup.charAt(j));
+ letter2code[letter] = bits;
+ letter = Character.toUpperCase(letterGroup.charAt(j));
+ letter2code[letter] = bits;
+ letter2normalized[letter] = (byte) letterGroup.charAt(0);
+ if (j == 0)
+ code2letter[bits] = (byte) letter;
+ }
+ // System.err.println(letterGroups[i]+" -> "+Integer.toBinaryString(bits)+" -> "+(char)code2letter[bits]);
+ bits++;
+ }
+ }
+
+ /**
+ * gets the alphabet size
+ *
+ * @return alphabet size
+ */
+ public byte getAlphabetSize() {
+ return alphabetSize;
+ }
+
+ /**
+ * gets the number of bits used to encode a letter
+ *
+ * @return number of bits
+ */
+ public int getBitsPerLetter() {
+ return bitsPerLetter;
+ }
+
+ /**
+ * gets the letter to code mapping
+ *
+ * @return letter to code
+ */
+ public long[] getLetter2Code() {
+ return letter2code;
+ }
+
+
+ /**
+ * gets the code to letter mapping
+ *
+ * @return code to letter
+ */
+ public byte[] getCode2Letter() {
+ return code2letter;
+ }
+
+ /**
+ * gets the mask used for a single letter
+ *
+ * @return letter mask
+ */
+ public long getLetterMask() {
+ return letterMask;
+ }
+
+ /**
+ * gets the number of letters per 64-bit word
+ *
+ * @return letters per word
+ */
+ public int getLettersPerWord() {
+ return lettersPerWord;
+ }
+
+ /**
+ * gets the number of unused bits
+ *
+ * @return number of unused (per 64-bit word)
+ */
+ public int getUnusedBits() {
+ return unusedBits;
+ }
+
+ /**
+ * gets the code assigned to undefined letter
+ *
+ * @return code
+ */
+ public byte getUndefinedLetterCode() {
+ return undefinedLetterCode;
+ }
+
+ /**
+ * gets the definition string
+ *
+ * @return defintion
+ */
+ public String getDefinitionString() {
+ return definitionString;
+ }
+
+ /**
+ * returns normalized letter
+ *
+ * @param letter
+ * @return normalized letter
+ */
+ @Override
+ public byte getNormalized(byte letter) {
+ return letter2normalized[letter];
+ }
+}
diff --git a/src/malt/sequence/DNA5Alphabet.java b/src/malt/sequence/DNA5Alphabet.java
new file mode 100644
index 0000000..0bc3096
--- /dev/null
+++ b/src/malt/sequence/DNA5Alphabet.java
@@ -0,0 +1,67 @@
+/**
+ * DNA5Alphabet.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.sequence;
+
+/**
+ * DNA5 alphabet
+ * Created by huson on 9/30/14.
+ */
+public class DNA5Alphabet extends Alphabet {
+ private static DNA5Alphabet instance;
+
+ final static private byte[] normalizedComplement = {
+ 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
+ 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N',
+ 'N', '-', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'T',
+ 'N', 'G', 'N', 'N', 'N', 'C', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'A', 'A', 'N', 'N',
+ 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'T', 'N', 'G', 'N', 'N', 'N', 'C', 'N', 'N', 'N', 'N', 'N', 'N',
+ 'N', 'N', 'N', 'N', 'N', 'N', 'A', 'A', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N'
+ };
+
+ /**
+ * gets the single instance of the protein alphabet
+ *
+ * @return instance
+ */
+ public static DNA5Alphabet getInstance() {
+ if (instance == null)
+ instance = new DNA5Alphabet();
+ return instance;
+ }
+
+ /**
+ * constructor
+ */
+ private DNA5Alphabet() {
+ super("A C G TU", (byte) 'N');
+ }
+
+ /**
+ * gets the reverse complement
+ *
+ * @param sequence
+ * @param reverseComplement
+ */
+ public static void reverseComplement(byte[] sequence, byte[] reverseComplement) {
+ for (int i = 0; i < sequence.length; i++) {
+ reverseComplement[sequence.length - (i + 1)] = normalizedComplement[sequence[i]];
+ }
+ }
+}
diff --git a/src/malt/sequence/FastAFileIteratorCode.java b/src/malt/sequence/FastAFileIteratorCode.java
new file mode 100644
index 0000000..eb136e8
--- /dev/null
+++ b/src/malt/sequence/FastAFileIteratorCode.java
@@ -0,0 +1,275 @@
+/**
+ * FastAFileIteratorCode.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.sequence;
+
+import jloda.util.Basic;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+
+/**
+ * Reads in a multifast file and places all headers in byte arrays and all sequences in encoded arrays
+ * Daniel Huson, 8.2014
+ */
+public class FastAFileIteratorCode {
+ private final SequenceEncoder sequenceEncoder;
+ private byte[] buffer = new byte[10000000];
+ private int length = 0;
+ private long position = 0;
+ private final long maxProgress;
+ private boolean expectingHeader = true;
+ private BufferedInputStream inputStream;
+ private boolean isFastQ = false;
+
+ private boolean ok = true; // haven't seen next() fail yet
+ private boolean nextIsLoaded = false; // have already loaded the next item
+
+ /**
+ * constructor
+ *
+ * @param fileName
+ * @throws java.io.FileNotFoundException
+ */
+ public FastAFileIteratorCode(final String fileName, final SequenceEncoder sequenceEncoder) throws IOException {
+ this.sequenceEncoder = sequenceEncoder;
+ inputStream = new BufferedInputStream(Basic.getInputStreamPossiblyZIPorGZIP(fileName), 8192);
+ maxProgress = Basic.guessUncompressedSizeOfFile(fileName);
+
+ try {
+ int value = inputStream.read();
+ isFastQ = (value == '@');
+ } catch (IOException e) {
+ }
+ }
+
+ /**
+ * has next header or sequence
+ *
+ * @return true if has a header or sequence
+ */
+ public boolean hasNext() {
+ if (!ok)
+ return false;
+ else if (nextIsLoaded)
+ return true;
+
+ try {
+ if (isFastQ) { // expect four lines per read
+ try {
+ length = 0;
+ if (expectingHeader) {
+ buffer[length++] = (byte) '>';
+ int value = inputStream.read();
+ if (value == -1)
+ return ok = false;
+ if (value != '@')
+ buffer[length++] = (byte) value;
+ length = readLineIntoBuffer(inputStream, length);
+ position += length;
+ return ok = (length > 1);
+ } else {
+ length = readLineIntoBuffer(inputStream, length);
+ if (length == 0)
+ return ok = false;
+ position += length;
+ position += skipLine(inputStream); // skip comment line
+ position += skipLine(inputStream); // skip quality line
+ return ok = true;
+ }
+ } catch (IOException e) {
+ return ok = false;
+ }
+ } else {
+ int value;
+ length = 0;
+ boolean first = true;
+ try {
+ while (true) {
+ value = inputStream.read();
+ if (expectingHeader) {
+ if (value == -1)
+ return ok = false;
+ if (first) {
+ first = false;
+ if (value != '>')
+ buffer[length++] = '>';
+ }
+ if (value == '\n' || value == '\r') {
+ position += length;
+ return ok = (length > 0);
+ }
+ } else {
+ if (Character.isWhitespace(value))
+ continue; // skip white space
+ if (value == '>' || value == -1) {
+ position += length;
+ return ok = (length > 0);
+ }
+ }
+ if (length >= buffer.length)
+ growBuffer();
+ buffer[length++] = (byte) value;
+ }
+ } catch (IOException e) {
+ return ok = false;
+ }
+ }
+ } finally {
+ nextIsLoaded = true;
+ }
+ }
+
+ /**
+ * get next header
+ *
+ * @return header
+ */
+ public byte[] nextHeader() {
+ try {
+ if (!nextIsLoaded && !hasNext())
+ return null;
+
+ if (expectingHeader)
+ expectingHeader = false;
+ else
+ nextSequenceCode(); // skip sequence
+
+ if (length > 0 || hasNext()) {
+ byte[] result = new byte[length];
+ System.arraycopy(buffer, 0, result, 0, length);
+ length = 0;
+ return result;
+ }
+ return null;
+ } finally {
+ nextIsLoaded = false;
+ }
+ }
+
+ /**
+ * get next sequence
+ *
+ * @return sequence
+ */
+ public long[] nextSequenceCode() {
+ try {
+ if (!nextIsLoaded && !hasNext())
+ return null;
+
+ if (expectingHeader)
+ nextHeader(); // skip header
+ else
+ expectingHeader = true;
+
+ if (length > 0 || hasNext()) {
+ long[] result = sequenceEncoder.encode(buffer, length);
+ length = 0;
+ return result;
+ }
+ return null;
+ } finally {
+ nextIsLoaded = false;
+ }
+ }
+
+ /**
+ * read the next line into the buffer
+ *
+ * @param inputStream
+ * @param offset
+ * @return position of next available position in buffer
+ */
+ private int readLineIntoBuffer(BufferedInputStream inputStream, int offset) throws IOException {
+ int value = inputStream.read();
+ while (value != '\r' && value != '\n' && value != -1) {
+ if (offset >= buffer.length) { // need to grow buffer
+ growBuffer();
+ }
+ buffer[offset++] = (byte) value;
+ value = inputStream.read();
+ }
+ return offset;
+ }
+
+ /**
+ * grows the line buffer
+ */
+ private void growBuffer() {
+ byte[] nextBuffer = new byte[(int) Math.min(Integer.MAX_VALUE - 10L, 2 * buffer.length)];
+ System.arraycopy(buffer, 0, nextBuffer, 0, buffer.length);
+ buffer = nextBuffer;
+ }
+
+ /**
+ * skip the current line
+ *
+ * @param inputStream
+ * @throws java.io.IOException
+ */
+ private int skipLine(BufferedInputStream inputStream) throws IOException {
+ int skipped = 0;
+ int value = inputStream.read();
+ while (value != '\r' && value != '\n' && value != -1) {
+ value = inputStream.read();
+ skipped++;
+ }
+ return skipped;
+ }
+
+
+ public void remove() {
+ }
+
+ /**
+ * close the stream
+ *
+ * @throws java.io.IOException
+ */
+ public void close() throws IOException {
+ inputStream.close();
+ }
+
+ /**
+ * gets the maximum progress value
+ *
+ * @return maximum progress value
+ */
+ public long getMaximumProgress() {
+ return maxProgress;
+ }
+
+ /**
+ * gets the current progress value
+ *
+ * @return current progress value
+ */
+ public long getProgress() {
+ return position;
+ }
+
+ /**
+ * is the file we are reading actually a fastQ file?
+ *
+ * @return true, if fastQ
+ */
+ public boolean isFastQ() {
+ return isFastQ;
+ }
+}
diff --git a/src/malt/sequence/ISeedExtractor.java b/src/malt/sequence/ISeedExtractor.java
new file mode 100644
index 0000000..f10e844
--- /dev/null
+++ b/src/malt/sequence/ISeedExtractor.java
@@ -0,0 +1,64 @@
+/**
+ * ISeedExtractor.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.sequence;
+
+/**
+ * seed extract
+ * Daniel Huson, 2014
+ */
+public interface ISeedExtractor {
+ byte[] decodeSeed(long seedCode, int seedWeight);
+
+ /**
+ * extract a seed from a sequence code
+ *
+ * @param seedShape
+ * @param seedWeight
+ * @param sequenceCode
+ * @param pos @return seed
+ */
+ long getSeedCode(boolean[] seedShape, int seedWeight, long[] sequenceCode, int pos);
+
+ /**
+ * extract a seed from a sequence code
+ *
+ * @param seedShape
+ * @param seedWeight
+ * @param sequenceCode
+ * @param pos
+ * @param failValue value returned if sequence too short @return seed
+ */
+ long getSeedCode(boolean[] seedShape, int seedWeight, long[] sequenceCode, int pos, int failValue);
+
+ /**
+ * is this a good seed?
+ *
+ * @param seedCode
+ * @return true, if good
+ */
+ boolean isGoodSeed(long seedCode, int seedWeight);
+
+ /**
+ * get the number of bits per letter
+ *
+ * @return bits
+ */
+ int getBitsPerLetter();
+}
diff --git a/src/malt/sequence/ProteinAlphabet.java b/src/malt/sequence/ProteinAlphabet.java
new file mode 100644
index 0000000..ab1611a
--- /dev/null
+++ b/src/malt/sequence/ProteinAlphabet.java
@@ -0,0 +1,46 @@
+/**
+ * ProteinAlphabet.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.sequence;
+
+/**
+ * protein alphabet
+ * Daniel Huson, 2014
+ */
+public class ProteinAlphabet extends Alphabet {
+ private static ProteinAlphabet instance;
+
+ /**
+ * gets the single instance of the protein alphabet
+ *
+ * @return instance
+ */
+ public static ProteinAlphabet getInstance() {
+ if (instance == null)
+ instance = new ProteinAlphabet();
+ return instance;
+ }
+
+ /**
+ * constructor
+ */
+ private ProteinAlphabet() {
+ super("A C D E F G H I K L M N P Q R S T V W Y", (byte) 'X');
+ }
+}
diff --git a/src/malt/sequence/ProteinSequenceEncoder.java b/src/malt/sequence/ProteinSequenceEncoder.java
new file mode 100644
index 0000000..c141098
--- /dev/null
+++ b/src/malt/sequence/ProteinSequenceEncoder.java
@@ -0,0 +1,160 @@
+/**
+ * ProteinSequenceEncoder.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.sequence;
+
+import jloda.util.Basic;
+import jloda.util.ProgressPercentage;
+import malt.data.SeedShape;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+/**
+ * test protein sequence encoder
+ * Created by huson on 9/30/14.
+ */
+public class ProteinSequenceEncoder {
+
+ /**
+ * test program
+ *
+ * @param args
+ * @throws java.io.IOException
+ */
+ public static void main(String[] args) throws IOException {
+ SequenceEncoder encoder = new SequenceEncoder(ProteinAlphabet.getInstance());
+ ReducedAlphabet reducedAlphabet = new ReducedAlphabet(ProteinAlphabet.getInstance(), "DIAMOND_11");
+
+ byte[] sequence1 = "MKTKSSNNIKKIYYISSILVGIYLCWQIIIQIIFLMDNSIAILEAIGMVVFISVYSLAVAINGWILVGRMKKSSKKAQYE".getBytes();
+
+ System.err.println("set: " + Basic.toString(sequence1));
+ long[] encoded = encoder.encode(sequence1, sequence1.length, null);
+
+ byte[] sequence2 = encoder.decode(encoded);
+ System.err.println("get: " + Basic.toString(sequence2));
+
+ System.err.println("SAME: " + Basic.equal(sequence1, sequence2));
+
+ for (int i = 0; i < sequence2.length; i++) {
+ if (sequence2[i] != encoder.decode(encoder.getLetterCode(encoded, i)))
+ System.err.println((char) sequence2[i] + " != " + (char) encoder.getLetterCode(encoded, i));
+ }
+
+ System.err.print("It.: ");
+ for (Iterator<Byte> it = encoder.getLetterIterator(encoded); it.hasNext(); ) {
+ System.err.print((char) (byte) it.next());
+ }
+ System.err.println();
+ int skip = 30;
+ System.err.print("It.: ");
+ for (int i = 0; i < skip; i++)
+ System.err.print(" ");
+ for (Iterator<Byte> it = encoder.getLetterIterator(encoded, skip); it.hasNext(); ) {
+ System.err.print((char) (byte) it.next());
+ }
+ System.err.println();
+ int rev = 30;
+ System.err.print("It.: ");
+ for (Iterator<Byte> it = encoder.getLetterReverseIterator(encoded, rev); it.hasNext(); ) {
+ System.err.print((char) (byte) it.next());
+ }
+ System.err.println(" reverse " + rev);
+
+
+ SeedShape2 seedShape = new SeedShape2(SeedShape.SINGLE_PROTEIN_SEED);
+
+ System.err.println("SeedShape: " + seedShape);
+
+ for (int i = 0; i < 5; i++) {
+ System.err.print("Span at " + i + ": ");
+ long[] span = encoder.getSeedSpanCode(seedShape.getLength(), encoded, i, null);
+ System.err.print(Basic.toString(encoder.decode(span)));
+ System.err.println();
+
+ long seedCode = encoder.getSeedCode(seedShape.getMask(), seedShape.getWeight(), encoded, i);
+ System.err.println("Full seed at " + i + ": " + Basic.toString(encoder.decodeSeed(seedCode, seedShape.getWeight()))
+ + " " + Basic.toBinaryString(seedCode));
+
+ long reducedSeedCode = reducedAlphabet.getSeedCode(seedShape.getMask(), seedShape.getWeight(), encoded, i);
+ System.err.println("Reduced seed at " + i + ": " +
+ Basic.toString(reducedAlphabet.decodeSeed(reducedSeedCode, seedShape.getWeight()))
+ + " " + Basic.toBinaryString(reducedSeedCode));
+ }
+
+ int limit = sequence1.length - seedShape.getLength();
+ long[] seeds = new long[2 * limit];
+ for (int i = 0; i < limit; i++) {
+ seeds[2 * i] = reducedAlphabet.getSeedCode(seedShape.getMask(), seedShape.getWeight(), encoded, i);
+ seeds[2 * i + 1] = i;
+ }
+
+ seeds = ProteinSequenceEncoder.radixSort2(seeds, seeds.length, 64 - reducedAlphabet.unusedBits, reducedAlphabet.bitsPerLetter, new ProgressPercentage("Sorting..."));
+
+ for (int i = 0; i < seeds.length; i += 2) {
+ System.err.println(String.format("i=%3d pos=%3d seed=%s", i, seeds[i + 1], Basic.toString(reducedAlphabet.decodeSeed(seeds[i], seedShape.getWeight()))));
+ }
+ }
+
+ /**
+ * radix sort list of longs, using entries with even index as keys and entries with odd indices as associated values
+ *
+ * @param array
+ * @param length
+ * @param w number of bits to use (64 to sort full numbers)
+ * @param d number of bits to consider at a time - in the case of 4-bit encoded letters: 4
+ * @return sorted array
+ */
+ public static long[] radixSort2(long[] array, int length, int w, int d, final ProgressPercentage progress) {
+ if (length % 2 != 0)
+ throw new RuntimeException("radixSort2(length=" + length + "): length must be even");
+
+ final int steps = w / d;
+ long[] a = array;
+ long[] b = new long[length];
+
+ if (progress != null) {
+ progress.setMaximum(steps);
+ progress.setProgress(0);
+ }
+
+ for (int p = 0; p < steps; p++) {
+ final int c[] = new int[1 << d];
+ // the next three for loops implement counting-sort
+ for (int i = 0; i < length; i += 2) {
+ c[(int) ((a[i] >> d * p) & ((1 << d) - 1))]++;
+ }
+ for (int i = 1; i < 1 << d; i++)
+ c[i] += c[i - 1];
+ for (int i = length - 2; i >= 0; i -= 2) {
+ final int index = (--c[(int) ((a[i] >> d * p) & ((1 << d) - 1))]) << 1;
+ b[index] = a[i];
+ b[index + 1] = a[i + 1];
+ }
+ // swap arrays
+ final long[] tmp = b;
+ b = a;
+ a = tmp;
+ if (progress != null)
+ progress.setProgress(p);
+ }
+ return a;
+ }
+
+}
diff --git a/src/malt/sequence/ReducedAlphabet.java b/src/malt/sequence/ReducedAlphabet.java
new file mode 100644
index 0000000..cb3b109
--- /dev/null
+++ b/src/malt/sequence/ReducedAlphabet.java
@@ -0,0 +1,228 @@
+/**
+ * ReducedAlphabet.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.sequence;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.TreeMap;
+
+/**
+ * Reduced protein alphabet
+ * Daniel Huson, 9.2014
+ */
+public class ReducedAlphabet extends Alphabet implements ISeedExtractor {
+ private final Alphabet proteinAlphabet;
+ private final long[] proteinCode2ReducedCode;
+
+ private static Map<String, String> reductions;
+
+ /**
+ * constructor
+ *
+ * @param proteinAlphabet
+ * @param reductionDefinition
+ */
+ public ReducedAlphabet(final Alphabet proteinAlphabet, String reductionDefinition) throws IOException {
+ super(getReductionDefinition(reductionDefinition), (byte) 'X');
+ this.proteinAlphabet = proteinAlphabet;
+ proteinCode2ReducedCode = new long[proteinAlphabet.getCode2Letter().length];
+ for (int i = 1; i < proteinCode2ReducedCode.length; i++) {
+ proteinCode2ReducedCode[i] = getLetter2Code()[proteinAlphabet.getCode2Letter()[i]];
+ /*
+ System.err.println(Integer.toBinaryString(i)+" -> "+(char)proteinAlphabet.getCode2Letter()[i]+" -> "
+ +Integer.toBinaryString((int)proteinCode2ReducedCode[i]));
+ */
+ }
+ }
+
+ /**
+ * decode a sequenceCode
+ *
+ * @param sequenceCode
+ * @param bytes sequence
+ * @return sequence length
+ */
+ public int decode(long[] sequenceCode, byte[] bytes) {
+ int shift = 64 - bitsPerLetter;
+ int word = 0;
+ int length = 0;
+ while (true) {
+ byte bits = (byte) ((sequenceCode[word] & (letterMask << shift)) >>> shift);
+ if (bits == 0)
+ break;
+ bytes[length++] = code2letter[bits];
+
+ shift -= bitsPerLetter;
+ if (shift < 0) {
+ if (++word == sequenceCode.length)
+ break;
+ shift = 64 - bitsPerLetter;
+ }
+ }
+ return length;
+ }
+
+ /**
+ * decode a sequence
+ *
+ * @param seedCode
+ * @param seedWeight
+ * @return seed sequence
+ */
+ @Override
+ public byte[] decodeSeed(long seedCode, int seedWeight) {
+ byte[] sequence = new byte[seedWeight];
+ decode(new long[]{seedCode << (64 - seedWeight * bitsPerLetter)}, sequence);
+ return sequence;
+ }
+
+ /**
+ * gets a seed code using the reduced alphabet
+ *
+ * @param seedShape
+ * @param seedWeight
+ * @param sequenceCode
+ * @param pos
+ * @return reduced code
+ */
+ public long getSeedCode(final boolean[] seedShape, int seedWeight, long[] sequenceCode, int pos) {
+ return getSeedCode(seedShape, seedWeight, sequenceCode, pos, 0);
+ }
+
+ /**
+ * gets a seed code using the reduced alphabet
+ *
+ * @param seedShape
+ * @param seedWeight
+ * @param sequenceCode
+ * @param pos
+ * @param failValue value to return if seed extraction fails due to sequence being too short @return reduced code
+ */
+ public long getSeedCode(final boolean[] seedShape, int seedWeight, long[] sequenceCode, int pos, int failValue) {
+ // this code is a bit tricky:
+ // we need to use protein alphabet encoding parameters to step through the sequence code
+ // we need to use reduced protein alphabet encoding parameters to compute seed code
+ long seed = 0;
+ int seedShift = (seedWeight - 1) * bitsPerLetter;
+
+ int word = pos / proteinAlphabet.lettersPerWord;
+ int letterInWord = pos - proteinAlphabet.lettersPerWord * word;
+ int shift = 64 - (letterInWord + 1) * proteinAlphabet.bitsPerLetter;
+
+ for (boolean aSeedShape : seedShape) {
+ if (aSeedShape) {
+ long bits = (sequenceCode[word] & (proteinAlphabet.letterMask << shift)) >>> shift;
+ if (bits == 0)
+ return failValue;
+ bits = proteinCode2ReducedCode[(int) bits];
+ seed |= (bits << seedShift);
+ seedShift -= bitsPerLetter;
+ }
+ shift -= proteinAlphabet.bitsPerLetter;
+ if (shift < 0) {
+ shift = 64 - proteinAlphabet.bitsPerLetter;
+ word++;
+ if (word == sequenceCode.length)
+ return failValue;
+ }
+ }
+ return seed;
+ }
+
+ /**
+ * get a reduction by name
+ *
+ * @param name
+ * @return reduction definition string or null
+ */
+ public static String getReductionDefinition(String name) throws IOException {
+ if (reductions == null)
+ reductions = initReductions();
+ String value = reductions.get(name);
+ if (value == null) {
+ if (name.split(" ").length > 1)
+ return name;
+ else
+ throw new IOException("Unknown reduction: " + name);
+ }
+ return value;
+ }
+
+ /**
+ * is this a good seed? Yes, if it contains at least three different letters and none is undefined
+ *
+ * @param seedCode
+ * @return true, if good
+ */
+ public boolean isGoodSeed(long seedCode, int seedWeight) {
+ int shift = 0;
+ byte a = 0;
+ byte b = 0;
+ byte c = 0;
+
+ while (shift < 64) {
+ byte bits = (byte) ((seedCode & (letterMask << shift)) >>> shift);
+ if (bits == 0)
+ break;
+ else if (bits == undefinedLetterCode)
+ return false;
+ if (a == 0)
+ a = bits;
+ else if (bits != a && b == 0)
+ b = bits;
+ else if (bits != a && bits != b && c == 0)
+ c = bits;
+ shift += bitsPerLetter;
+ }
+ return c != 0;
+ }
+
+ /**
+ * setup the set of all known reductions
+ *
+ * @return reductions
+ */
+ private static Map<String, String> initReductions() {
+ Map<String, String> reductions = new TreeMap<>();
+ // From: Bioinformatics. 2009 June 1; 25(11): 1356–1362. Published online 2009 April 7. doi: 10.1093/bioinformatics/btp164:
+ reductions.put("GBMR4", "[ADKERNTSQ] [YFLIVMCWH*X] G P");
+ reductions.put("SDM12", "A D [KER] N [STQ] [YF] [LIVM*X] C W H G P");
+ reductions.put("HSDM17", "A D [KE] R N T S Q Y F [LIV*X] M C W H G P");
+ // Murphy, Lynne Reed and Wallqvist, Anders and Levy, Ronald M., 2000 :
+ reductions.put("BLOSUM50_4", "[LVIMC*] [AGSTP] [FYW] [EDNQKRH]");
+ reductions.put("BLOSUM50_8", "[LVIMC*] [AG] [ST] P [FYW] [EDNQ] [KR] H");
+ reductions.put("BLOSUM50_10", "[LVIM*] C A G [ST] P [FYW] [EDNQ] [KR] H");
+ reductions.put("BLOSUM50_11", "[LVIM*] C A G S T P [FYW] [EDNQ] [KR] H"); // this was produced from BLOSUM50_10 by separating S and T
+ reductions.put("BLOSUM50_15", "[LVIM*] C A G S T P [FY] W E D N Q [KR] H");
+
+ reductions.put("DIAMOND_11", "[KREDQN*] C G H [ILV] M F Y W P [STA]"); // DIAMOND default
+
+ // produced especially for MALT:
+ reductions.put("MALT_10", "[LVIM*X] C [AST] G P [WYF] [DEQ] N [RK] H");
+
+ // use these together to get good sensitivity:
+ reductions.put("MALT_12A", " [LVMI*WYF] C [AST] G P D E Q N R K H");
+ reductions.put("MALT_12B", " [LVM*I] W Y F C A S T G P [DEQNRK] H");
+ reductions.put("MALT_12C", "[LVIM*] C [AST] G P [FY] [DE] W N Q [KR] H");
+
+ reductions.put("UNREDUCED", "A D K E R N T S Q Y F L I V M C W H G P *");
+ return reductions;
+ }
+}
diff --git a/src/malt/sequence/SeedShape2.java b/src/malt/sequence/SeedShape2.java
new file mode 100644
index 0000000..d95c346
--- /dev/null
+++ b/src/malt/sequence/SeedShape2.java
@@ -0,0 +1,138 @@
+/**
+ * SeedShape2.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.sequence;
+
+import java.io.IOException;
+
+/**
+ * seed shape
+ * Daniel Huson, 8.2014
+ */
+public class SeedShape2 {
+ private final boolean[] mask;
+ private final String shape;
+ private final int weight;
+ private int jumpToFirstZero = -1;
+
+ // Source for all seed patterns: Ilie et al. BMC Genomics 2011, 12:280 http://www.biomedcentral.com/1471-2164/12/280
+ public static final String SINGLE_DNA_SEED = "111110111011110110111111";
+ public static final String SINGLE_PROTEIN_SEED = "111101101110111";
+ public static final String[] PROTEIN_SEEDS = new String[]{"111101101110111", "1111000101011001111", "11101001001000100101111", "11101001000010100010100111"};
+ private int id; // id is 0,..,number of seed shapes-1
+
+ /**
+ * constructor
+ *
+ * @param shape
+ * @throws java.io.IOException
+ */
+ public SeedShape2(String shape) throws IOException {
+ this.shape = shape;
+ mask = new boolean[shape.length()];
+ int ones = 0;
+ for (int i = 0; i < shape.length(); i++) {
+ if (shape.charAt(i) != '0') {
+ mask[i] = true;
+ ones++;
+ } else {
+ if (jumpToFirstZero == -1)
+ jumpToFirstZero = i;
+ }
+ }
+ weight = ones;
+ }
+
+ /**
+ * string representation of shaped seed
+ *
+ * @return string
+ */
+ public String toString() {
+ return shape;
+ }
+
+ /**
+ * get bytes
+ *
+ * @return string as bytes
+ */
+ public byte[] getBytes() {
+ return shape.getBytes();
+ }
+
+ /**
+ * length of spaced seed
+ *
+ * @return length
+ */
+ public int getLength() {
+ return mask.length;
+ }
+
+ /**
+ * weight of spaced seed
+ *
+ * @return weight
+ */
+ public int getWeight() {
+ return weight;
+ }
+
+ /**
+ * create correct size byte array for holding seed results
+ *
+ * @return bytes
+ */
+ public byte[] createBuffer() {
+ return new byte[getWeight()];
+ }
+
+ /**
+ * compute the number of positions to jump over to get to first 0
+ *
+ * @return number of ones before first zero
+ */
+ public int getJumpToFirstZero() {
+ return jumpToFirstZero;
+ }
+
+ /**
+ * gets the expected number of seeds
+ *
+ * @param numberOfSequences
+ * @param numberOfLetters
+ * @return expected number of seeds
+ */
+ public long getMaxSeedCount(int numberOfSequences, long numberOfLetters, int numberOfJobs) {
+ return Math.max(1, numberOfLetters - numberOfSequences * (weight - 1)) / numberOfJobs;
+ }
+
+ public void setId(int id) {
+ this.id = id;
+ }
+
+ public int getId() {
+ return id;
+ }
+
+ public boolean[] getMask() {
+ return mask;
+ }
+}
diff --git a/src/malt/sequence/SequenceEncoder.java b/src/malt/sequence/SequenceEncoder.java
new file mode 100644
index 0000000..764ac91
--- /dev/null
+++ b/src/malt/sequence/SequenceEncoder.java
@@ -0,0 +1,414 @@
+/**
+ * SequenceEncoder.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.sequence;
+
+import java.util.Iterator;
+
+/**
+ * sequence encoder
+ * Daniel Huson, 2014
+ */
+public class SequenceEncoder implements ISeedExtractor {
+ protected final int bitsPerLetter;
+ protected final int lettersPerWord;
+ protected final long letterMask;
+ protected final int unusedBits;
+ protected final long[] letter2code;
+ protected final byte[] code2letter;
+ protected final byte undefinedLetterCode;
+
+ /**
+ * constructor
+ *
+ * @param alphabet
+ */
+ public SequenceEncoder(final Alphabet alphabet) {
+ bitsPerLetter = alphabet.getBitsPerLetter();
+ lettersPerWord = 64 / bitsPerLetter;
+ letterMask = alphabet.getLetterMask();
+ unusedBits = alphabet.getUnusedBits();
+
+ letter2code = alphabet.getLetter2Code();
+ code2letter = alphabet.getCode2Letter();
+ undefinedLetterCode = alphabet.getUndefinedLetterCode();
+ }
+
+ /**
+ * encode a sequence
+ *
+ * @param sequence
+ * @param length
+ * @param sequenceCode array to use or null
+ * @return encoded sequence
+ */
+ public long[] encode(byte[] sequence, int length, long[] sequenceCode) {
+ int numberOfWords = length / lettersPerWord + 1;
+
+ if (sequenceCode == null || sequenceCode.length < numberOfWords)
+ sequenceCode = new long[numberOfWords];
+
+ int shift = 64 - bitsPerLetter;
+ int word = 0;
+ for (int i = 0; i < length; i++) {
+ sequenceCode[word] |= letter2code[sequence[i]] << shift;
+
+ shift -= bitsPerLetter;
+ if (shift < 0) {
+ shift = 64 - bitsPerLetter;
+ word++;
+ }
+ }
+ /*
+ for (int i = 0; i < numberOfWords; i++) {
+ System.err.println(Long.toBinaryString(sequenceCode[i]));
+ }
+ */
+
+ return sequenceCode;
+ }
+
+ /**
+ * encode a sequence
+ *
+ * @param sequence
+ * @param length
+ * @return encoded sequence
+ */
+ public long[] encode(byte[] sequence, int length) {
+ return encode(sequence, length, new long[length / lettersPerWord + 1]);
+ }
+
+ /**
+ * encode a sequence
+ *
+ * @param sequence
+ * @return encoded sequence
+ */
+ public long[] encode(byte[] sequence) {
+ return encode(sequence, sequence.length, new long[sequence.length / lettersPerWord + 1]);
+ }
+
+ /**
+ * encode a single letter
+ *
+ * @param letter
+ * @return code
+ */
+ public byte encode(byte letter) {
+ return (byte) letter2code[letter];
+ }
+
+ /**
+ * decode a sequenceCode
+ *
+ * @param sequenceCode
+ * @param bytes sequence
+ * @return sequence length
+ */
+ public int decode(long[] sequenceCode, byte[] bytes) {
+ int shift = 64 - bitsPerLetter;
+ int word = 0;
+ int length = 0;
+ while (true) {
+ byte bits = (byte) ((sequenceCode[word] & (letterMask << shift)) >>> shift);
+ if (bits == 0)
+ break;
+ bytes[length++] = code2letter[bits];
+
+ shift -= bitsPerLetter;
+ if (shift < 0) {
+ if (++word == sequenceCode.length)
+ break;
+ shift = 64 - bitsPerLetter;
+ }
+ }
+ return length;
+ }
+
+ /**
+ * decode a sequence
+ *
+ * @param sequenceCode
+ * @return sequence
+ */
+ public byte[] decode(long[] sequenceCode) {
+ byte[] sequence = new byte[computeLength(sequenceCode)];
+ decode(sequenceCode, sequence);
+ return sequence;
+ }
+
+ /**
+ * compute the length of the sequence. It is not stored explicitly.
+ *
+ * @param sequenceCode
+ * @return sequence length
+ */
+ public int computeLength(long[] sequenceCode) {
+ int length = lettersPerWord * (sequenceCode.length - 1); // assume all but last word are full
+
+ int shift = 64 - bitsPerLetter;
+ final int word = sequenceCode.length - 1;
+ while (shift >= 0) {
+ byte bits = (byte) ((sequenceCode[word] & (letterMask << shift)) >>> shift);
+ if (bits == 0)
+ break;
+ length++;
+ shift -= bitsPerLetter;
+ }
+ return length;
+ }
+
+ /**
+ * get a letter
+ *
+ * @param sequenceCode
+ * @param pos
+ * @return letter
+ */
+ public byte getLetterCode(long[] sequenceCode, int pos) {
+ int word = pos / lettersPerWord;
+ int letterInWord = pos - lettersPerWord * word;
+ int shift = 64 - (letterInWord + 1) * bitsPerLetter;
+ return (byte) ((sequenceCode[word] & (letterMask << shift)) >>> shift);
+
+ }
+
+ /**
+ * gets an getLetterCodeIterator over all letters
+ *
+ * @param sequenceCode
+ * @return getLetterCodeIterator
+ */
+ public Iterator<Byte> getLetterIterator(final long[] sequenceCode) {
+ return getLetterIterator(sequenceCode, 0);
+ }
+
+ /**
+ * gets an iterator over all letters.
+ * No check is performed to see whether pos is in range
+ *
+ * @param sequenceCode
+ * @return iterator
+ */
+ public Iterator<Byte> getLetterIterator(final long[] sequenceCode, final int pos) {
+ return new Iterator<Byte>() {
+ private int word = pos / lettersPerWord;
+ private int shift = 64 - ((pos - lettersPerWord * word) + 1) * bitsPerLetter;
+ byte bits = (byte) ((sequenceCode[word] & (letterMask << shift)) >>> shift);
+
+ public boolean hasNext() {
+ return bits > 0;
+ }
+
+ public Byte next() {
+ byte result = decode(bits);
+ // get next bits:
+ shift -= bitsPerLetter;
+ if (shift < 0) {
+ word++;
+ shift = 64 - bitsPerLetter;
+ }
+ if (word < sequenceCode.length)
+ bits = (byte) ((sequenceCode[word] & (letterMask << shift)) >>> shift);
+ else
+ bits = 0;
+ // else done
+ return result;
+ }
+
+ public void remove() {
+ }
+ };
+ }
+
+ /**
+ * gets a reverse iterator over all letters.
+ * No check is performed to see whether pos is in range
+ *
+ * @param sequenceCode
+ * @return iterator
+ */
+ public Iterator<Byte> getLetterReverseIterator(final long[] sequenceCode, final int pos) {
+ return new Iterator<Byte>() {
+ private int word = pos / lettersPerWord;
+ private int shift = 64 - ((pos - lettersPerWord * word) + 1) * bitsPerLetter;
+
+ byte bits = (byte) ((sequenceCode[word] & (letterMask << shift)) >>> shift);
+
+ public boolean hasNext() {
+ return bits > 0;
+ }
+
+ public Byte next() {
+ byte result = decode(bits);
+ shift += bitsPerLetter;
+ if (shift >= 64) {
+ shift = unusedBits;
+ word--;
+ }
+ if (word >= 0)
+ bits = (byte) ((sequenceCode[word] & (letterMask << shift)) >>> shift);
+ else
+ bits = 0;
+ return result;
+ }
+
+ public void remove() {
+ }
+ };
+ }
+
+ /**
+ * gets a seed code using the reduced alphabet
+ *
+ * @param seedShape
+ * @param seedWeight
+ * @param sequenceCode
+ * @param pos @return reduced code
+ */
+ public long getSeedCode(final boolean[] seedShape, int seedWeight, long[] sequenceCode, int pos) {
+ return getSeedCode(seedShape, seedWeight, sequenceCode, pos, 0);
+ }
+
+ /**
+ * get the code for a given seed
+ *
+ * @param seedShape
+ * @param seedWeight
+ * @param sequenceCode
+ * @param pos
+ * @param failValue @return code
+ */
+ public long getSeedCode(boolean[] seedShape, int seedWeight, long[] sequenceCode, int pos, int failValue) {
+ long seed = 0;
+ int seedShift = (seedWeight - 1) * bitsPerLetter;
+
+ int word = pos / lettersPerWord;
+ int letterInWord = pos - lettersPerWord * word;
+ int shift = 64 - (letterInWord + 1) * bitsPerLetter;
+ for (boolean aSeedShape : seedShape) {
+ if (aSeedShape) {
+ long bits = (sequenceCode[word] & (letterMask << shift)) >>> shift;
+ if (bits == 0)
+ return failValue;
+ // System.err.println(Long.toBinaryString(bits));
+ seed |= (bits << seedShift);
+ seedShift -= bitsPerLetter;
+ }
+ shift -= bitsPerLetter;
+ if (shift < 0) {
+ shift = 64 - bitsPerLetter;
+ word++;
+ if (word == sequenceCode.length)
+ return failValue;
+ }
+ }
+ return seed;
+ }
+
+ /**
+ * decodes a seed code. For debugging only
+ *
+ * @param seedCode
+ * @return bytes for seed
+ */
+ public byte[] decodeSeed(long seedCode, int seedWeight) {
+ return decode(new long[]{seedCode << (64 - seedWeight * bitsPerLetter)});
+ }
+
+ /**
+ * decodes a letter code
+ *
+ * @param letterCode
+ * @return letter
+ */
+ public byte decode(byte letterCode) {
+ return code2letter[letterCode];
+ }
+
+ /**
+ * get the code for sequence spanned by a seed
+ *
+ * @param seedLength
+ * @param sequenceCode
+ * @param pos
+ * @param seedWords
+ * @return
+ */
+ public long[] getSeedSpanCode(int seedLength, long[] sequenceCode, int pos, long[] seedWords) {
+ if (seedWords == null)
+ seedWords = new long[1 + seedLength / lettersPerWord];
+ int seedWord = 0;
+
+ long seed = 0;
+ int seedShift = 64 - bitsPerLetter;
+ int word = pos / lettersPerWord;
+ int letterInWord = pos - lettersPerWord * word;
+ int shift = 64 - (letterInWord + 1) * bitsPerLetter;
+ for (int i = 0; i < seedLength; i++) {
+ long bits = (sequenceCode[word] & (letterMask << shift)) >>> shift;
+ seed |= (bits << seedShift);
+ seedShift -= bitsPerLetter;
+
+ shift -= bitsPerLetter;
+ if (shift < 0) {
+ shift = 64 - bitsPerLetter;
+ word++;
+ }
+ if (seedShift < 0) {
+ seedShift = 64 - bitsPerLetter;
+ seedWords[seedWord++] = seed;
+ seed = 0;
+ }
+ }
+ seedWords[seedWord] = seed;
+ return seedWords;
+ }
+
+ /**
+ * is this a good seed? Yes, if it contains at least two different letters and none is undefined
+ *
+ * @param seedCode
+ * @return true, if good
+ */
+ public boolean isGoodSeed(long seedCode, int seedWeight) {
+ int shift = 0;
+ byte a = 0;
+ byte b = 0;
+
+ while (shift < 64) {
+ byte bits = (byte) ((seedCode & (letterMask << shift)) >>> shift);
+ if (bits == 0)
+ break;
+ else if (bits == undefinedLetterCode)
+ return false;
+ if (a == 0)
+ a = bits;
+ else if (bits != a && b == 0)
+ b = bits;
+ shift += bitsPerLetter;
+ }
+ return b != 0;
+ }
+
+ public int getBitsPerLetter() {
+ return bitsPerLetter;
+ }
+}
diff --git a/src/malt/sequence/SequenceStore.java b/src/malt/sequence/SequenceStore.java
new file mode 100644
index 0000000..4dcd8cc
--- /dev/null
+++ b/src/malt/sequence/SequenceStore.java
@@ -0,0 +1,236 @@
+/**
+ * SequenceStore.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.sequence;
+
+
+import jloda.util.Basic;
+import jloda.util.CanceledException;
+import jloda.util.ProgressListener;
+import jloda.util.ProgressPercentage;
+import megan.io.OutputWriter;
+
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+/**
+ * Sequence store using encoded sequenceCodes
+ * Created by huson on 10/1/14.
+ */
+public class SequenceStore {
+ public final static int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8;
+
+ private final SequenceEncoder sequenceEncoder;
+ private int numberOfSequences;
+ private byte[][] headers;
+ private long[][] sequenceCodes;
+
+ /**
+ * constructor
+ *
+ * @param sequenceEncoder
+ * @param size
+ */
+ public SequenceStore(final SequenceEncoder sequenceEncoder, final int size) {
+ this.sequenceEncoder = sequenceEncoder;
+ headers = new byte[size][];
+ sequenceCodes = new long[size][];
+ }
+
+ /**
+ * read sequences from a fastA or fastQ file
+ *
+ * @param it
+ * @param numberToRead
+ * @return number of sequences read
+ */
+ public int readFromFastA(FastAFileIteratorCode it, int numberToRead, ProgressListener progress) throws CanceledException {
+ progress.setMaximum(numberToRead);
+ progress.setProgress(0);
+ numberOfSequences = 0;
+ while (it.hasNext()) {
+ if (numberOfSequences >= headers.length) {
+ grow();
+ }
+ headers[numberOfSequences] = it.nextHeader();
+ sequenceCodes[numberOfSequences] = it.nextSequenceCode();
+ numberOfSequences++;
+ if (numberOfSequences >= numberToRead)
+ break;
+ progress.incrementProgress();
+ }
+ return numberOfSequences;
+ }
+
+ /**
+ * write to a file in binary format
+ *
+ * @param fileName
+ */
+ public void write(final String fileName) throws IOException {
+ try (OutputWriter outs = new OutputWriter(new File(fileName)); ProgressPercentage progress = new ProgressPercentage("Writing file: " + fileName, numberOfSequences)) {
+ outs.writeInt(numberOfSequences);
+ for (int i = 0; i < numberOfSequences; i++) {
+ {
+ int length = headers[i].length;
+ outs.writeInt(length);
+ outs.write(headers[i], 0, length);
+ }
+ {
+ int length = sequenceCodes[i].length;
+ outs.writeInt(length);
+ for (int j = 0; j < length; j++)
+ outs.writeLong(sequenceCodes[i][j]);
+ }
+ progress.incrementProgress();
+ }
+ progress.close();
+ }
+ }
+
+ /**
+ * read a file in binary format
+ *
+ * @param fileName
+ * @return number of sequences read
+ * @throws IOException
+ */
+ public int read(final String fileName) throws IOException {
+ DataInputStream ins = new DataInputStream(new FileInputStream(fileName));
+ numberOfSequences = ins.readInt();
+ if (headers.length < numberOfSequences) { // resize
+ headers = new byte[numberOfSequences][];
+ sequenceCodes = new long[numberOfSequences][];
+ }
+
+ ProgressPercentage progress = new ProgressPercentage("Reading file: " + fileName, numberOfSequences);
+ for (int i = 0; i < numberOfSequences; i++) {
+ {
+ int length = ins.readInt();
+ for (int j = 0; j < length; j++)
+ headers[i][j] = (byte) ins.read();
+ }
+ {
+ int length = ins.readInt();
+ for (int j = 0; j < length; j++)
+ sequenceCodes[i][j] = ins.readLong();
+ }
+ progress.incrementProgress();
+ }
+ ins.close();
+ progress.close();
+ return numberOfSequences;
+ }
+
+ /**
+ * get the number of sequences
+ *
+ * @return
+ */
+ public int getNumberOfSequences() {
+ return numberOfSequences;
+ }
+
+ /**
+ * gets the i-th header
+ *
+ * @param i
+ * @return header
+ */
+ public byte[] getHeader(int i) {
+ return headers[i];
+ }
+
+ /**
+ * gets the i-th sequence
+ *
+ * @param i
+ * @return sequence
+ */
+ public byte[] getSequence(int i) {
+ return sequenceEncoder.decode(sequenceCodes[i]);
+ }
+
+ /**
+ * gets the i-th sequence code
+ *
+ * @param i
+ * @return sequence code
+ */
+ public long[] getSequenceCode(int i) {
+ return sequenceCodes[i];
+ }
+
+ /**
+ * gets the sequence encoder
+ *
+ * @return
+ */
+ public SequenceEncoder getSequenceEncoder() {
+ return sequenceEncoder;
+ }
+
+ /**
+ * grow the data arrays
+ */
+ private void grow() {
+ final int newLength = (int) Math.min(MAX_ARRAY_SIZE, 2l * Math.max(16, headers.length));
+ System.err.print("[Grow: " + headers.length + " -> " + newLength + "]");
+
+ {
+ byte[][] tmp = new byte[newLength][];
+ System.arraycopy(headers, 0, tmp, 0, headers.length);
+ headers = tmp;
+ }
+ {
+ long[][] tmp = new long[newLength][];
+ System.arraycopy(sequenceCodes, 0, tmp, 0, sequenceCodes.length);
+ sequenceCodes = tmp;
+ }
+ }
+
+ public static void main(String[] args) throws IOException, CanceledException {
+ String fileName = "/Users/huson/data/megan/ecoli/x.fna";
+
+ SequenceStore sequenceStore = new SequenceStore(new SequenceEncoder(DNA5Alphabet.getInstance()), 2000);
+ FastAFileIteratorCode fastAFileIteratorCode = new FastAFileIteratorCode(fileName, sequenceStore.getSequenceEncoder());
+ ProgressPercentage progress = new ProgressPercentage("Reading file: " + fileName);
+ sequenceStore.readFromFastA(fastAFileIteratorCode, 2000, progress);
+ progress.close();
+
+ System.err.println("Got:");
+ for (int i = 0; i < Math.min(5, sequenceStore.getNumberOfSequences()); i++) {
+ System.err.println(Basic.toString(sequenceStore.getHeader(i)));
+ System.err.println(Basic.toString(sequenceStore.getSequence(i)));
+ }
+
+ String binFile = "/Users/huson/tmp/x.idx";
+ sequenceStore.write(binFile);
+
+ sequenceStore.read(binFile);
+ System.err.println("Read: " + sequenceStore.numberOfSequences);
+ System.err.println("Got:");
+ for (int i = 0; i < Math.min(5, sequenceStore.getNumberOfSequences()); i++) {
+ System.err.println(Basic.toString(sequenceStore.getHeader(i)));
+ System.err.println(Basic.toString(sequenceStore.getSequence(i)));
+ }
+ }
+}
diff --git a/src/malt/util/FixedSizePriorityQueue.java b/src/malt/util/FixedSizePriorityQueue.java
new file mode 100644
index 0000000..811fd9e
--- /dev/null
+++ b/src/malt/util/FixedSizePriorityQueue.java
@@ -0,0 +1,104 @@
+/**
+ * FixedSizePriorityQueue.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.util;
+
+import java.util.Comparator;
+import java.util.PriorityQueue;
+
+/**
+ * A priority queue implementation with a fixed size
+ */
+public class FixedSizePriorityQueue<E> {
+ private final PriorityQueue<E> priorityQueue; /* backing data structure */
+ private final Comparator<? super E> comparator;
+ private final int maxSize;
+
+ /**
+ * Constructs a {@link FixedSizePriorityQueue} with the specified {@code maxSize}
+ * and {@code comparator}.
+ *
+ * @param maxSize - The maximum size the queue can reach, must be a positive integer.
+ * @param comparator - The comparator to be used to compare the elements in the queue, must be non-null.
+ */
+ public FixedSizePriorityQueue(final int maxSize, final Comparator<? super E> comparator) {
+ super();
+ if (maxSize <= 0) {
+ throw new IllegalArgumentException("maxSize = " + maxSize + "; expected a positive integer.");
+ }
+ if (comparator == null) {
+ throw new NullPointerException("Comparator is null.");
+ }
+ this.maxSize = maxSize;
+ this.comparator = comparator;
+
+ this.priorityQueue = new PriorityQueue<>(maxSize, comparator);
+ }
+
+ /**
+ * Adds an element to the queue. If the queue contains {@code maxSize} elements, {@code e} will
+ * be compared to the lowest element in the queue using {@code comparator}.
+ * If {@code e} is greater than or equalOverShorterOfBoth to the lowest element, that element will be removed and
+ * {@code e} will be added instead. Otherwise, the queue will not be modified
+ * and {@code e} will not be added.
+ *
+ * @param e - Element to be added, must be non-null.
+ * @return returns true if added
+ */
+ public boolean add(final E e) {
+ if (e == null) {
+ throw new NullPointerException("e is null.");
+ }
+ if (priorityQueue.size() >= maxSize) {
+ if (comparator.compare(e, priorityQueue.peek()) <= 0)
+ return false;
+ priorityQueue.poll(); // remove smallest element
+ }
+ return priorityQueue.add(e);
+ }
+
+ public int getMaxSize() {
+ return maxSize;
+ }
+
+ public E poll() {
+ return priorityQueue.poll();
+ }
+
+ public int size() {
+ return priorityQueue.size();
+ }
+
+ public void clear() {
+ priorityQueue.clear();
+ }
+
+ public boolean remove(E entry) {
+ return priorityQueue.remove(entry);
+ }
+
+ /**
+ * get as collection
+ *
+ * @return collection
+ */
+ public java.util.Collection<E> getCollection() {
+ return priorityQueue;
+ }
+}
diff --git a/src/malt/util/MurmurHash3.java b/src/malt/util/MurmurHash3.java
new file mode 100644
index 0000000..6c7414c
--- /dev/null
+++ b/src/malt/util/MurmurHash3.java
@@ -0,0 +1,100 @@
+/**
+ * MurmurHash3.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.util;
+/*
+ * This code is public domain.
+ *
+ * The MurmurHash3 algorithm was created by Austin Appleby and put into the public domain.
+ * See http://code.google.com/p/smhasher/
+ *
+ * This java port was authored by
+ * Yonik Seeley and was placed into the public domain per
+ * https://github.com/yonik/java_util/blob/master/src/util/hash/MurmurHash3.java.
+ */
+
+/**
+ * <p/>
+ * This produces exactly the same hash values as the final C+
+ * + * version of MurmurHash3 and is thus suitable for producing the same hash values across
+ * platforms.
+ * <p/>
+ * The 32 bit x86 version of this hash should be the fastest variant for relatively short keys like ids.
+ * <p/>
+ * Note - The x86 and x64 versions do _not_ produce the same results, as the
+ * algorithms are optimized for their respective platforms.
+ * <p/>
+ * See also http://github.com/yonik/java_util for future updates to this file.
+ */
+public final class MurmurHash3 {
+ /**
+ * Returns the MurmurHash3_x86_32 hash.
+ */
+ public static int murmurhash3x8632(byte[] data, int offset, int len, int seed) {
+
+ int c1 = 0xcc9e2d51;
+ int c2 = 0x1b873593;
+
+ int h1 = seed;
+ int roundedEnd = offset + (len & 0xfffffffc); // round down to 4 byte block
+
+ for (int i = offset; i < roundedEnd; i += 4) {
+ // little endian load order
+ int k1 = (data[i] & 0xff) | ((data[i + 1] & 0xff) << 8) | ((data[i + 2] & 0xff) << 16) | (data[i + 3] << 24);
+ k1 *= c1;
+ k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15);
+ k1 *= c2;
+
+ h1 ^= k1;
+ h1 = (h1 << 13) | (h1 >>> 19); // ROTL32(h1,13);
+ h1 = h1 * 5 + 0xe6546b64;
+ }
+
+ // tail
+ int k1 = 0;
+
+ switch (len & 0x03) {
+ case 3:
+ k1 = (data[roundedEnd + 2] & 0xff) << 16;
+ // fallthrough
+ case 2:
+ k1 |= (data[roundedEnd + 1] & 0xff) << 8;
+ // fallthrough
+ case 1:
+ k1 |= data[roundedEnd] & 0xff;
+ k1 *= c1;
+ k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15);
+ k1 *= c2;
+ h1 ^= k1;
+ default:
+ }
+
+ // finalization
+ h1 ^= len;
+
+ // fmix(h1);
+ h1 ^= h1 >>> 16;
+ h1 *= 0x85ebca6b;
+ h1 ^= h1 >>> 13;
+ h1 *= 0xc2b2ae35;
+ h1 ^= h1 >>> 16;
+
+ return h1;
+ }
+}
diff --git a/src/malt/util/ProfileUtilities.java b/src/malt/util/ProfileUtilities.java
new file mode 100644
index 0000000..a7ac5e9
--- /dev/null
+++ b/src/malt/util/ProfileUtilities.java
@@ -0,0 +1,43 @@
+/**
+ * ProfileUtilities.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.util;
+
+import malt.AlignmentEngine;
+import malt.analysis.OrganismsProfile;
+
+/**
+ * methods for merging profiles
+ * Daniel Huson, 8.2014
+ */
+public class ProfileUtilities {
+ /**
+ /**
+ * just get all organism profiles
+ *
+ * @param alignmentEngines
+ * @return profiles
+ */
+ public static OrganismsProfile[] getOrganismsProfiles(AlignmentEngine[] alignmentEngines) {
+ OrganismsProfile[] profiles = new OrganismsProfile[alignmentEngines.length];
+ for (int i = 0; i < alignmentEngines.length; i++)
+ profiles[i] = alignmentEngines[i].getOrganismsProfile();
+ return profiles;
+ }
+}
diff --git a/src/malt/util/ReusableByteBuffer.java b/src/malt/util/ReusableByteBuffer.java
new file mode 100644
index 0000000..45e5391
--- /dev/null
+++ b/src/malt/util/ReusableByteBuffer.java
@@ -0,0 +1,128 @@
+/**
+ * ReusableByteBuffer.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.util;
+
+/**
+ * a reusable byte buffer
+ * Daniel Huson, 8.2014
+ */
+public class ReusableByteBuffer {
+ private byte[] bytes;
+ private int pos = 0;
+
+ /**
+ * constructor
+ *
+ * @param size
+ */
+ public ReusableByteBuffer(int size) {
+ bytes = new byte[size];
+ }
+
+ /**
+ * write string
+ *
+ * @param str
+ */
+ public void writeAsAscii(String str) {
+ if (pos + str.length() >= bytes.length) {
+ bytes = resize(bytes, pos + str.length() + 1024);
+ }
+ for (int i = 0; i < str.length(); i++) {
+ bytes[pos++] = (byte) str.charAt(i);
+ }
+ }
+
+ /**
+ * write bytes
+ *
+ * @param add
+ */
+ public void write(byte[] add) {
+ if (pos + add.length >= bytes.length) {
+ bytes = resize(bytes, pos + add.length + 1024);
+ }
+ System.arraycopy(add, 0, bytes, pos, add.length);
+ pos += add.length;
+ }
+
+ /**
+ * write char as byte
+ *
+ * @param add
+ */
+ public void write(char add) {
+ if (pos + 1 >= bytes.length) {
+ bytes = resize(bytes, pos + 1024);
+ }
+ bytes[pos++] = (byte) add;
+ }
+
+ /**
+ * write byte
+ *
+ * @param add
+ */
+ public void write(byte add) {
+ if (pos + 1 >= bytes.length) {
+ bytes = resize(bytes, pos + 1024);
+ }
+ bytes[pos++] = add;
+ }
+
+ /**
+ * write bytes
+ *
+ * @param add
+ * @param offset
+ * @param length
+ */
+ public void write(byte[] add, int offset, int length) {
+ if (pos + length >= bytes.length) {
+ bytes = resize(bytes, pos + length + 1024);
+ }
+ System.arraycopy(add, offset, bytes, pos, length);
+ pos += length;
+ }
+
+ /**
+ * erase
+ */
+ public void reset() {
+ pos = 0;
+ }
+
+ /**
+ * return a copy of the byte buffer
+ *
+ * @return copy
+ */
+ public byte[] makeCopy() {
+ byte[] result = new byte[pos];
+ System.arraycopy(bytes, 0, result, 0, pos);
+ return result;
+ }
+
+ private byte[] resize(byte[] array, int newSize) {
+ byte[] result = new byte[newSize];
+ System.arraycopy(array, 0, result, 0, Math.min(newSize, array.length));
+ return result;
+ }
+}
diff --git a/src/malt/util/TaxonomyUtilities.java b/src/malt/util/TaxonomyUtilities.java
new file mode 100644
index 0000000..4d483c5
--- /dev/null
+++ b/src/malt/util/TaxonomyUtilities.java
@@ -0,0 +1,151 @@
+/**
+ * TaxonomyUtilities.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.util;
+
+import jloda.graph.Node;
+import megan.classification.Classification;
+import megan.classification.ClassificationManager;
+import megan.viewer.TaxonomicLevels;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * simple utilities
+ * Daniel Huson, 8.2014
+ */
+public class TaxonomyUtilities {
+
+ /**
+ * get the species for the taxon
+ *
+ * @param taxonId
+ * @return species
+ */
+
+ public static String getContainingSpecies(Integer taxonId) {
+ final Classification classification = ClassificationManager.get(Classification.Taxonomy, false);
+ Node v = classification.getFullTree().getTaxon2Node(taxonId);
+ while (v != null) {
+ taxonId = (Integer) v.getInfo();
+ int level = classification.getName2IdMap().getRank(taxonId);
+ if (level == TaxonomicLevels.getSpeciesId())
+ return classification.getName2IdMap().get(taxonId);
+ if (v.getInDegree() > 0)
+ v = v.getFirstInEdge().getSource();
+ else
+ v = null;
+ }
+ return null;
+ }
+
+ /**
+ * get the genus
+ *
+ * @param taxonId
+ * @return genus
+ */
+ public static String getContainingGenus(Integer taxonId) {
+ final Classification classification = ClassificationManager.get(Classification.Taxonomy, false);
+ Node v = classification.getFullTree().getTaxon2Node(taxonId);
+ while (v != null) {
+ taxonId = (Integer) v.getInfo();
+ int level = classification.getName2IdMap().getRank(taxonId);
+ if (level == TaxonomicLevels.getGenusId())
+ return classification.getName2IdMap().get(taxonId);
+ if (v.getInDegree() > 0)
+ v = v.getFirstInEdge().getSource();
+ else
+ v = null;
+ }
+ return null;
+ }
+
+ /**
+ * gets the strain
+ *
+ * @param taxonId
+ * @return
+ */
+ public static String getStrain(int taxonId) {
+ final Classification classification = ClassificationManager.get(Classification.Taxonomy, false);
+ final int speciesId = TaxonomicLevels.getSpeciesId();
+ final int subspeciesId = TaxonomicLevels.getSubspeciesId();
+ Node v = classification.getFullTree().getTaxon2Node(taxonId);
+ while (v != null && v.getInDegree() > 0 && v.getInfo() != null) {
+ v = v.getFirstInEdge().getSource();
+ taxonId = (Integer) v.getInfo();
+ String name = classification.getName2IdMap().get(taxonId);
+ if (name != null && (name.equals("root") || name.equals("cellular organisms")))
+ break;
+ int level = classification.getName2IdMap().getRank(taxonId);
+ if (level > speciesId)
+ break;
+ if (level > subspeciesId)
+ break;
+ if (level != subspeciesId)
+ break;
+ }
+ return null;
+ }
+
+ /**
+ * gets the taxonomic path to the named taxon
+ *
+ * @param taxonId
+ * @return
+ */
+ public static String getPath(int taxonId) {
+ final Classification classification = ClassificationManager.get(Classification.Taxonomy, false);
+ final int genus = TaxonomicLevels.getGenusId();
+
+ if (taxonId == 1)// root taxon
+ return classification.getName2IdMap().get(taxonId);
+
+ List<Integer> path = new ArrayList<>();
+ Node v = classification.getFullTree().getTaxon2Node(taxonId);
+
+ while (v != null) {
+ taxonId = (Integer) v.getInfo();
+ if (classification.getName2IdMap().getRank(taxonId) != 0) // ignore unranked nodes
+ path.add(taxonId);
+ if (v.getInDegree() > 0)
+ v = v.getFirstInEdge().getSource();
+ else
+ v = null;
+ }
+
+ StringBuilder buf = new StringBuilder();
+ if (path.size() > 0) {
+ boolean isFirst = true;
+ for (int i = path.size() - 1; i >= 0; i--) {
+ if (isFirst)
+ isFirst = false;
+ else
+ buf.append("; ");
+ buf.append(classification.getName2IdMap().get(path.get(i)));
+ }
+ int level = classification.getName2IdMap().getRank(path.get(path.size() - 1));
+ if (level == genus)
+ buf.append(".");
+ }
+ return buf.toString();
+ }
+}
diff --git a/src/malt/util/Tester.java b/src/malt/util/Tester.java
new file mode 100644
index 0000000..197d158
--- /dev/null
+++ b/src/malt/util/Tester.java
@@ -0,0 +1,119 @@
+/**
+ * Tester.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.util;
+
+/**
+ * tester
+ * Daniel Huson, 2014
+ */
+public class Tester {
+
+ /**
+ * shows that modulo takes three times as long as bitwise-and or shift
+ *
+ * @param args
+ */
+ public static void main(String[] args) {
+ System.err.println("MAX: " + Integer.MAX_VALUE + " bin: " + Integer.toBinaryString(Integer.MAX_VALUE));
+ System.err.println("MIN+1: " + (Integer.MIN_VALUE + 1));
+ System.err.println("-MIN+1: " + (-(Integer.MIN_VALUE + 1)) + " equals MAX? " + (Integer.MAX_VALUE == (-(Integer.MIN_VALUE + 1))));
+
+ int hash = Integer.MAX_VALUE;
+ System.err.print("hash: " + hash);
+ if ((long) hash == Integer.MAX_VALUE || (long) hash <= Integer.MIN_VALUE + 1)
+ hash = 0;
+ System.err.println(" -> " + hash);
+
+ hash = Integer.MIN_VALUE;
+ System.err.print("hash: " + hash);
+ if ((long) hash == Integer.MAX_VALUE || (long) hash <= Integer.MIN_VALUE + 1)
+ hash = 0;
+ System.err.println(" -> " + hash);
+
+ hash = Integer.MIN_VALUE + 1;
+ System.err.print("hash: " + hash);
+ if ((long) hash == Integer.MAX_VALUE || (long) hash <= Integer.MIN_VALUE + 1)
+ hash = 0;
+ System.err.println(" -> " + hash);
+
+ /*
+ for(int i=0;i<32;i++) {
+ System.err.println("i="+i+" (1<<i)="+(1<<i)+" ((1<<i) -1)="+((1<<i)-1));
+ }
+ */
+
+ long numberOfSeeds = (long) (10000);
+ System.err.println("numberOfSeeds: " + numberOfSeeds);
+
+ int tableSize;
+ if ((long) (0.9 * numberOfSeeds) >= Integer.MAX_VALUE) {
+ tableSize = Integer.MAX_VALUE;
+ } else {
+ tableSize = 1;
+ while (numberOfSeeds > tableSize) {
+ tableSize *= 2;
+ }
+ }
+ final int mask = tableSize - 1;
+
+ System.err.println("tableSize: " + tableSize);
+ System.err.println("mask: " + mask + " bits: " + Integer.toBinaryString(mask));
+
+ for (int i = tableSize - 5; i < tableSize + 5; i++) {
+ System.err.println("i=" + i + " i&mask=" + (i & mask));
+ }
+
+ for (int i = tableSize - 5; i < tableSize + 5; i++) {
+ System.err.println("i=" + (-i) + " (-i)&mask=" + ((i) & mask));
+ }
+
+
+ if (true) {
+ int top = Integer.MAX_VALUE >>> 8;
+ int aMask = top;
+ System.err.println("top: " + top);
+ System.err.println("aMask: " + Integer.toBinaryString(aMask));
+
+ {
+ long start = System.currentTimeMillis();
+
+ long sum = 0;
+ for (int i = 0; i < Integer.MAX_VALUE; i++) {
+ sum += (i % top);
+ }
+ System.err.println("Time: " + ((System.currentTimeMillis() - start) / 1000.0));
+ System.err.println("Sum: " + sum);
+ }
+
+ {
+ long start = System.currentTimeMillis();
+
+ long sum = 0;
+ for (int i = 0; i < Integer.MAX_VALUE; i++) {
+ sum += (i & aMask);
+ }
+ System.err.println("Time: " + ((System.currentTimeMillis() - start) / 1000.0));
+ System.err.println("Sum: " + sum);
+ }
+ }
+
+ System.exit(0);
+ }
+}
diff --git a/src/malt/util/Utilities.java b/src/malt/util/Utilities.java
new file mode 100644
index 0000000..7fff135
--- /dev/null
+++ b/src/malt/util/Utilities.java
@@ -0,0 +1,424 @@
+/**
+ * Utilities.java
+ * Copyright (C) 2015 Daniel H. Huson
+ *
+ * (Some files contain contributions from other authors, who are then mentioned separately.)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package malt.util;
+
+import jloda.util.Basic;
+import jloda.util.UsageException;
+import malt.data.ReadMatch;
+import malt.data.Row;
+import malt.data.SequenceType;
+import megan.classification.IdMapper;
+import megan.classification.commandtemplates.LoadMappingFileCommand;
+import megan.io.IIntPutter;
+import megan.parsers.blast.BlastMode;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.util.Random;
+
+/**
+ * some utilities
+ * Daniel Huson, 8.2014
+ */
+public class Utilities {
+ /**
+ * randomize array of longs using (Durstenfeld 1964)
+ *
+ * @param array
+ * @param offset start of numbers to be randomized
+ * @param length number of numbers to be randomized
+ * @param random
+ */
+ public static void randomize(long[] array, int offset, int length, Random random) {
+ for (int i = offset + length - 1; i >= offset + 1; i--) {
+ int j = random.nextInt(i - offset) + offset;
+ long tmp = array[i];
+ array[i] = array[j];
+ array[j] = tmp;
+ }
+ }
+
+ /**
+ * randomize array of integers using (Durstenfeld 1964) in consecutive pairs
+ *
+ * @param array
+ * @param offset start of numbers to be randomized
+ * @param length number of numbers to be randomized. Must be even for this to make sense
+ * @param random
+ */
+ public static void randomizePairs(int[] array, int offset, int length, Random random) {
+ int end = offset + length / 2;
+ for (int i = end - 1; i >= offset + 1; i--) {
+ int j = random.nextInt(i - offset) + offset;
+ int i2 = 2 * i - offset;
+ int j2 = 2 * j - offset;
+ int tmp = array[i2];
+ array[i2] = array[j2];
+ array[j2] = tmp;
+ i2++;
+ j2++;
+ tmp = array[i2];
+ array[i2] = array[j2];
+ array[j2] = tmp;
+ }
+ }
+
+ /**
+ * randomize array of integers using (Durstenfeld 1964) in consecutive pairs
+ *
+ * @param array
+ * @param offset start of numbers to be randomized
+ * @param length number of numbers to be randomized. Must be even for this to make sense
+ * @param random
+ */
+ public static void randomizePairs(IIntPutter array, long offset, int length, Random random) {
+ long end = offset + length / 2;
+ for (long i = end - 1; i >= offset + 1; i--) {
+ long j = random.nextInt((int) (i - offset)) + offset;
+ long i2 = 2 * i - offset;
+ long j2 = 2 * j - offset;
+ int tmp = array.get(i2);
+ array.put(i2, array.get(j2));
+ array.put(j2, tmp);
+ i2++;
+ j2++;
+ tmp = array.get(i2);
+ array.put(i2, array.get(j2));
+ array.put(j2, tmp);
+ }
+ }
+
+ /**
+ * resize array
+ *
+ * @param array
+ * @return new array
+ */
+ public static int[] resize(int[] array, int newSize) {
+ int[] result = new int[newSize];
+ System.arraycopy(array, 0, result, 0, Math.min(newSize, array.length));
+ return result;
+ }
+
+ /**
+ * resize array
+ *
+ * @param array
+ * @return new array
+ */
+ public static Row[] resizeAndConstructEntries(Row[] array, int newSize) {
+ Row[] result = new Row[newSize];
+ for (int i = array.length; i < newSize; i++)
+ result[i] = new Row();
+ System.arraycopy(array, 0, result, 0, Math.min(newSize, array.length));
+ return result;
+ }
+
+ /**
+ * resize array
+ *
+ * @param array
+ * @return new array
+ */
+ public static ReadMatch[] resize(ReadMatch[] array, int newSize) {
+ ReadMatch[] result = new ReadMatch[newSize];
+ System.arraycopy(array, 0, result, 0, Math.min(newSize, array.length));
+ return result;
+ }
+
+ /**
+ * get first word of header
+ *
+ * @param header
+ * @return first word
+ */
+ public static byte[] getFirstWordSkipLeadingGreaterSign(byte[] header) {
+ int start = 0;
+ while (start < header.length) {
+ byte ch = header[start];
+ if (ch != '>' && !Character.isWhitespace(ch))
+ break;
+ start++;
+ }
+ int finish = start;
+ while (finish < header.length) {
+ byte ch = header[finish];
+ if (ch == 0 || Character.isWhitespace(ch))
+ break;
+ finish++;
+ }
+ byte[] result = new byte[finish - start];
+ System.arraycopy(header, start, result, 0, finish - start);
+ return result;
+ }
+
+ /**
+ * get first word of header and make sure it starts with a greater sign
+ *
+ * @param header
+ * @return first word
+ */
+ public static byte[] getFirstWordEnsureLeadingGreaterSign(byte[] header) {
+ int length = 0;
+ boolean hasLeadingGreaterSign = (header.length > 0 && header[0] == '>');
+ while (length < header.length) {
+ if (header[length] == 0 || Character.isWhitespace(header[length])) {
+ if (hasLeadingGreaterSign) {
+ byte[] result = new byte[length];
+ System.arraycopy(header, 0, result, 0, length);
+ return result;
+ } else {
+ byte[] result = new byte[length + 1];
+ result[0] = '>';
+ System.arraycopy(header, 0, result, 1, length);
+ return result;
+ }
+ }
+ length++;
+ }
+ return header;
+ }
+
+
+ /**
+ * copy a 0-terminated byte array
+ *
+ * @param bytes
+ * @return copy (up to first 0)
+ */
+ public static byte[] copy0Terminated(byte[] bytes) {
+ int length = 0;
+ while (length < bytes.length) {
+ if (bytes[length] == 0)
+ break;
+ length++;
+ }
+ byte[] result = new byte[length];
+ System.arraycopy(bytes, 0, result, 0, length);
+ return result;
+ }
+
+ /**
+ * get first word of header and write it to result
+ *
+ * @param header
+ * @return first word
+ */
+ public static int getFirstWordSkipLeadingGreaterSign(byte[] header, byte[] result) {
+ int start = (header.length > 0 && header[0] == '>' ? 1 : 0);
+
+ while (start < header.length && Character.isWhitespace(header[start])) {
+ start++;
+ }
+ int end = start;
+ while (end < header.length && !Character.isWhitespace(header[end]) && header[end] != 0) {
+ end++;
+ }
+ int length = end - start;
+ if (length > 0)
+ System.arraycopy(header, start, result, 0, length);
+ return length;
+ }
+
+ public static void checkFileExists(File file) throws IOException {
+ checkFileExists(file.getPath(), false);
+ }
+
+ public static void checkFileExists(String fileName, boolean allowToAddGZSuffix) throws IOException {
+ if ((new File(fileName)).exists())
+ return; // ok
+ if (!allowToAddGZSuffix) {
+ throw new IOException("File not found: " + fileName);
+ } else if (!(new File(fileName + ".gz")).exists()) {
+ throw new IOException("File not found: " + fileName + " nor " + fileName + ".gz");
+ }
+ }
+
+ /**
+ * remove all existing index files
+ *
+ * @param indexDirectory
+ */
+ public static void cleanIndexDirectory(File indexDirectory) throws IOException {
+ if (!indexDirectory.isDirectory())
+ throw new IOException("Not a directory: " + indexDirectory);
+
+ File[] files = indexDirectory.listFiles(new FilenameFilter() {
+ public boolean accept(File file, String s) {
+ return s.endsWith(".idx") || s.contains(".idx.");
+ }
+ });
+ if (files != null) {
+ System.err.println("Deleting index files: " + files.length);
+ for (File file : files)
+ if (!file.delete())
+ throw new IOException("Failed to delete file: " + file);
+ }
+ }
+
+ /**
+ * gets the query sequence type from the alignment program mode
+ *
+ * @param mode
+ * @return query type
+ * @throws UsageException
+ */
+ public static SequenceType getQuerySequenceTypeFromMode(BlastMode mode) throws IOException {
+ switch (mode) {
+ case BlastN:
+ case BlastX:
+ return SequenceType.DNA;
+ case BlastP:
+ return SequenceType.Protein;
+ default:
+ throw new IOException("Unsupported mode: " + mode);
+ }
+ }
+
+ /**
+ * gets the reference sequence type from the alignment program mode
+ *
+ * @param mode
+ * @return query type
+ * @throws UsageException
+ */
+ public static SequenceType getReferenceSequenceTypeFromMode(BlastMode mode) throws IOException {
+ switch (mode) {
+ case BlastN:
+ return SequenceType.DNA;
+ case BlastX:
+ case BlastP:
+ return SequenceType.Protein;
+ default:
+ throw new IOException("Unsupported mode: " + mode);
+ }
+
+ }
+
+ /**
+ * determines how many different frames are possible for a given query
+ *
+ * @param mode
+ * @param dnaDoForward
+ * @param dnaDoReverse
+ * @return number of frames
+ * @throws IOException
+ */
+ public static int getMaxFramesPerQuery(BlastMode mode, boolean dnaDoForward, boolean dnaDoReverse) throws IOException {
+ switch (mode) {
+ case BlastN:
+ return (dnaDoForward ? 1 : 0) + (dnaDoReverse ? 1 : 0);
+ case BlastX:
+ return 3 * ((dnaDoForward ? 1 : 0) + (dnaDoReverse ? 1 : 0));
+ case BlastP:
+ return 1;
+ default:
+ throw new IOException("Unsupported mode: " + mode);
+ }
+ }
+
+ /**
+ * count the number of gaps ('-') in a sequence
+ *
+ * @param sequence
+ * @return number of gaps
+ */
+ public static int countGaps(byte[] sequence, int offset, int length) {
+ int count = 0;
+ for (int i = 0; i < length; i++) {
+ int a = sequence[offset + i];
+ if (a == '-')
+ count++;
+ }
+ return count;
+ }
+
+ /**
+ * does this contain only at most two different letters
+ *
+ * @param seq
+ * @return True at most two different letters occur
+ */
+ public static boolean hasAtMostTwoLetters(byte[] seq) {
+ byte a = seq[0];
+ byte b = 0;
+
+ int pos = 1;
+ while (pos < seq.length) {
+ if (seq[pos] != a) {
+ b = seq[pos];
+ break;
+ }
+ pos++;
+ }
+ while (pos < seq.length) {
+ if (seq[pos] != a && seq[pos] != b)
+ return false;
+ pos++;
+ }
+ return true;
+ }
+
+ public static int getNextPowerOf2(int value) {
+ long i = 1;
+ for (; i < Integer.MAX_VALUE; i <<= 1) {
+ if (i > value)
+ return (int) i;
+ }
+ return Integer.MAX_VALUE;
+ }
+
+ /**
+ * gets a file for a given directory with a given name, if it exists. If gzippedOk, also tries adding .gz or replacing the suffix by .gz
+ *
+ * @param directory
+ * @param name
+ * @param gzippedOk
+ * @return file or null
+ */
+ public static File getFile(String directory, String name, boolean gzippedOk) {
+ File file = new File(directory, name);
+ if (file.exists())
+ return file;
+ if (gzippedOk) {
+ file = new File(directory, name + ".gz");
+ if (file.exists())
+ return file;
+ file = new File(directory, Basic.replaceFileSuffix(name, ".gz"));
+ if (file.exists())
+ return file;
+ }
+ return null;
+ }
+
+ /**
+ * load a mapping file
+ *
+ * @param fileName
+ * @param mapType
+ * @param cName
+ * @throws Exception
+ */
+ public static void loadMapping(String fileName, IdMapper.MapType mapType, String cName) throws Exception {
+ if (fileName.length() > 0)
+ (new LoadMappingFileCommand()).apply("load mapFile='" + fileName + "' mapType=" + mapType.toString() + " cName=" + cName + ";");
+ }
+}
diff --git a/tex/manual/Makefile b/tex/manual/Makefile
new file mode 100644
index 0000000..ca7ea5c
--- /dev/null
+++ b/tex/manual/Makefile
@@ -0,0 +1,10 @@
+manual.pdf: manual.tex clean
+ pdflatex manual
+ bibtex manual
+ pdflatex manual
+ pdflatex manual
+ makeindex manual
+ pdflatex manual
+
+clean:
+ rm -f manual.dvi manu.idx manual.aux manual.ps manual.pdf
diff --git a/tex/manual/definitions.tex b/tex/manual/definitions.tex
new file mode 100644
index 0000000..26a3bd6
--- /dev/null
+++ b/tex/manual/definitions.tex
@@ -0,0 +1,189 @@
+% Show optional stuff:
+\newcommand{\optional}[1]{{#1}}
+% Hide optional stuff:
+\renewcommand{\optional}[1]{}
+
+% link stuff using hyperlinks:
+\newcommand{\link}[2]{\hyperlink{#2}{#1}}
+
+% a todo command:
+\newcommand{\todo}[1]{
+\optional{\marginpar{\raggedright \tiny \textbf{todo:} {#1}}}}
+
+% use \mylabel instead of \label to see labels in print out
+\newcommand{\mylabel}[1]{
+\optional{\marginpar{\raggedright\tiny{#1}}}
+\label{#1}
+\hypertarget{#1}{}
+}
+
+% use \target instead of \hypertarget to see targets in print out
+\newcommand{\target}[2]{
+\hypertarget{#2}{#1}
+\optional{\marginpar{\raggedright\tiny{#2}}}
+}
+
+% use \mysection instead of \section to produce labels and hyperlink targets
+\newcommand{\mysection}[1]{\section{#1}\mylabel{sec:#1}}
+
+% use \mysubsection instead of \section to produce labels and hyperlink targets
+\newcommand{\mysubsection}[1]{\subsection{#1}\mylabel{subsec:#1}}
+
+\newcommand{\menu}[1]{
+\link{{\tt #1}}{menu:#1}}
+
+\newcommand{\pmenu}[1]{
+\target{{\tt #1}}{menu:#1}
+\index{#1}\hspace{-0.3cm}
+}
+
+\newcommand{\program}[1]{
+\link{{\tt #1}}{program:#1}}
+
+\newcommand{\pprogram}[1]{
+\target{{\tt #1}}{program:#1}
+\index{#1}\hspace{-0.3cm}
+}
+
+\newcommand{\windowmenu}[2]{
+\link{{\tt $#2}}{windowmenu:#1-#2}\index{#2}\index{#2}}
+
+\newcommand{\pwindowmenu}[2]
+{\target{{\tt#2}}{windowmenu:#1-#2}
+\index{#2}\index{#2}}
+
+\newcommand{\submenu}[1]{
+\link{{\tt #1}}{submenu:#1}}
+
+\newcommand{\psubmenu}[1]{
+\target{{\tt #1}}{submenu:#1}
+\index{#1}}
+
+\newcommand{\menuitem}[2]{
+\link{{\tt #1$\to$#2}}{menuitem:#1-#2}
+\index{#1$\to$#2}\index{#2}}
+
+\newcommand{\pmenuitem}[2]{\index{#1$\to$#2}\index{#2}
+\target{{\tt #1$\to$#2}}{menuitem:#1-#2}\hspace{-0.3cm}
+}
+
+\newcommand{\menuitemh}[3]{
+\link{{\tt #1$\to$#2$\to$#3}}{menuitem:#1-#2-#3}\index{#1$\to$#2$\to$#3}\index{#3}}
+
+\newcommand{\pmenuitemh}[3]
+{\target{{\tt#1$\to$#2$\to$#3}}{menuitem:#1-#2-#3}
+\index{#1$\to$#2$\to$#3}\index{#3}}
+
+\newcommand{\windowmenuitem}[3]{
+\link{{\tt #2$\to$#3}}{windowmenuitem:#1-#2-#3}\index{#2$\to$#3}\index{#3}}
+
+\newcommand{\pwindowmenuitem}[3]
+{\target{{\tt#2$\to$#3}}{windowmenuitem:#1-#2-#3}
+\index{#2$\to$#3}\index{#3}}
+
+\newcommand{\ppopupmenu}[1]{
+\target{{\tt #1}}{popupmenu:#1}
+\index{#1}}
+
+\newcommand{\popupmenu}[1]{
+\link{{\tt #1}}{popupmenu:#1}
+\index{#1}}
+
+\newcommand{\ppopupmenuitem}[2]{
+\target{{\tt #2}}{popupmenuitem:#1-#2}
+\index{#1$\to$#2}\index{#2}}
+
+\newcommand{\popupmenuitem}[2]{
+\link{{\tt #2}}{popupmenuitem:#1-#2}
+\index{#1$\to$#2}\index{#2}}
+
+\newcommand{\block}[1]{
+\link{{\tt #1}}{block:#1}}
+
+\newcommand{\pblock}[1]{
+\target{{\tt #1}}{block:#1}
+\index{#1}}
+
+\newcommand{\button}[1]{
+\link{{\tt #1}}{button:#1}}
+
+\newcommand{\pbutton}[1]{\hspace{-0.3cm}
+\target{{\tt #1}}{button:#1}
+\index{#1}\hspace{-0.3cm}}
+
+\newcommand{\method}[1]{
+\link{{\tt #1}}{method:#1}}
+
+\newcommand{\pmethod}[1]{
+\target{{\tt #1}}{method:#1}
+\index{#1}}
+
+\newcommand{\window}[1]{
+\link{{\tt #1}}{window:#1}}
+
+\newcommand{\pwindow}[1]{\hspace{-0.3cm}
+\target{{\tt #1}}{window:#1}
+\index{#1}\hspace{-0.3cm}}
+
+\newcommand{\tab}[2]{
+\link{{\tt #1:#2}}{tab:#1-#2}}
+
+\newcommand{\ptab}[2]{
+\target{{\tt #1:#2}}{tab:#1-#2}
+\index{#1:#2}}
+
+\newcommand{\tabtab}[3]{
+\link{{\tt #1:#2:#3}}{tabtab:#1-#2-#3}}
+
+\newcommand{\ptabtab}[3]{
+\target{{\tt #1:#2:#3}}{tabtab:#1-#2-#3}
+\index{#1:#2:#3}}
+
+\newcommand{\concept}[1]{
+\link{{#1}}{concept:#1}}
+
+\newcommand{\pconcept}[1]{\hspace{-0.3cm}
+\target{{\em #1}}{concept:#1}\index{#1}\hspace{-0.2cm}}
+
+%%% The following commands are to help make hyperlinks:
+
+% use this to emphasize a word and to put it into the index:
+\newcommand{\iem}[1]{{\em #1}\index{#1}}
+\newcommand{\irm}[1]{{#1}\index{#1}}
+\newcommand{\itt}[1]{{\tt #1}\index{#1}}
+\newcommand{\ibf}[1]{{\bf #1}\index{#1}}
+\newcommand{\iit}[1]{{\it #1}\index{#1}}
+\newcommand{\isc}[1]{{\sc #1}\index{#1}}
+
+% use this to emphasize a word and to put it into the index and link it
+% to it's primary occurrence:
+\newcommand{\ieml}[1]{\link{{\em #1}}{#1}\index{#1}}
+\newcommand{\irml}[1]{\link{#1}{#1}\index{#1}}
+\newcommand{\ittl}[1]{\link{{\tt #1}}{#1}\index{#1}}
+\newcommand{\iitl}[1]{\link{{\it #1}}{#1}\index{#1}}
+\newcommand{\iscl}[1]{\link{{\sc #1}}{#1}\index{#1}}
+\newcommand{\il}[1]{\link{#1}{#1}\index{#1}}
+
+% use this to emphasize the primary occurrence of
+% word and to put it into the index, also make this occurrence of the word
+% a hyperlink target:
+\newcommand{\pem}[1]{{\em \hypertarget{#1}{#1}}\index{#1}
+\optional{{\marginpar{\raggedright\tiny{#1}}}}}
+
+
+% use this to ignore stuff:
+
+\newcommand{\ignore}[1]{}
+% some definitions:
+
+\def\kb{{\rm kb }}
+\def\bp{{\rm bp }}
+\def\Gb{{\rm Gb }}
+\def\Mb{{\rm Mb }}
+
+\def\this{.}
+
+% Headings
+%\pagestyle{myheadings}
+%\markboth{}{$ $Date: 2006-04-26 19:12:01 $ $\hfil User Manual \Megan v4b25}
+
diff --git a/tex/manual/manual.bbl b/tex/manual/manual.bbl
new file mode 100644
index 0000000..2acc67d
--- /dev/null
+++ b/tex/manual/manual.bbl
@@ -0,0 +1,91 @@
+\begin{thebibliography}{10}
+
+\bibitem{GeneOntology2000}
+M.~Ashburner, C.~A. Ball, J.~A. Blake, D.~Botstein, H.~Butler, J.~M. Cherry,
+ A.~P. Davis, K.~Dolinski, S.~S. Dwight, J.~T. Eppig, M.~A. Harris, D.~P.
+ Hill, L.~Issel-Tarver, A.~Kasarskis, S.~Lewis, J.~C. Matese, J.~E.
+ Richardson, M.~Ringwald, G.~M. Rubin, and G.~Sherlock.
+\newblock Gene ontology: tool for the unification of biology. the gene ontology
+ consortium.
+\newblock {\em Nat Genet}, 25(1):25--29, May 2000.
+
+\bibitem{Burkhardt01}
+Stefan Burkhardt and Juha K{\"a}rkk{\"a}inen.
+\newblock Better filtering with gapped q-grams.
+\newblock {\em Fundamenta Informaticae}, XXIII:1001--1018, 2001.
+
+\bibitem{ChaoPM92}
+Kun-Mao Chao, William~R. Pearson, and Webb Miller.
+\newblock Aligning two sequences within a specified diagonal band.
+\newblock {\em Computer Applications in the Biosciences}, 8(5):481--487, 1992.
+
+\bibitem{MEGAN2007}
+D.~H. Huson, A.~F. Auch, J.~Qi, and S.~C. Schuster.
+\newblock {MEGAN} analysis of metagenomic data.
+\newblock {\em Genome Res}, 17(3):377--386, March 2007.
+
+\bibitem{MEGAN2011}
+D.~H. Huson, S.~Mitra, N.~Weber, H.-J. Ruscheweyh, and S.~C. Schuster.
+\newblock Integrative analysis of environmental sequences using {MEGAN\,4}.
+\newblock {\em Genome Research}, 21:1552--1560, 2011.
+
+\bibitem{Ilie:2011fk}
+Lucian Ilie, Silvana Ilie, Shima Khoshraftar, and Anahita~Mansouri Bigvand.
+\newblock Seeds for effective oligonucleotide design.
+\newblock {\em BMC Genomics}, 12:280, 2011.
+
+\bibitem{Kanehisa2000}
+M.~Kanehisa and S.~Goto.
+\newblock {KEGG}: {Kyoto} encyclopedia of genes and genomes.
+\newblock {\em Nucleic Acids Res}, 28(1):27--30, Jan 2000.
+
+\bibitem{Ma02}
+Bin Ma, John Tromp, and Ming Li.
+\newblock {PatternHunter}: faster and more sensitive homology search.
+\newblock {\em Bioinformatics}, 18(3):440--445, 2002.
+
+\bibitem{Murphy2000}
+Lynne~Reed Murphy, Anders Wallqvist, and Ronald~M. Levy.
+\newblock Simplified amino acid alphabets for protein fold recognition and
+ implications for folding.
+\newblock {\em Protein Engineering}, 13:149--152(4), 2000.
+
+\bibitem{SSAHA}
+Z.~Ning, A.~J. Cox, and J.~C. Mullikin.
+\newblock {SSAHA}: a fast search method for large {DNA} databases.
+\newblock {\em Genome Res}, 11(10):1725--1729, 2001.
+
+\bibitem{SEED2005}
+Ross Overbeek, Tadhg Begley, Ralph~M Butler, Jomuna~V Choudhuri, Han-Yu Chuang,
+ Matthew Cohoon, Val\'erie de~Cr\'ecy-Lagard, Naryttza Diaz, Terry Disz,
+ Robert Edwards, Michael Fonstein, Ed~D Frank, Svetlana Gerdes, Elizabeth~M
+ Glass, Alexander Goesmann, Andrew Hanson, Dirk Iwata-Reuyl, Roy Jensen, Neema
+ Jamshidi, Lutz Krause, Michael Kubal, Niels Larsen, Burkhard Linke, Alice~C
+ McHardy, Folker Meyer, Heiko Neuweger, Gary Olsen, Robert Olson, Andrei
+ Osterman, Vasiliy Portnoy, Gordon~D Pusch, Dmitry~A Rodionov, Christian
+ R\"uckert, Jason Steiner, Rick Stevens, Ines Thiele, Olga Vassieva, Yuzhen
+ Ye, Olga Zagnitko, and Veronika Vonstein.
+\newblock The subsystems approach to genome annotation and its use in the
+ project to annotate 1000 genomes.
+\newblock {\em Nucleic Acids Res}, 33(17):5691--5702, 2005.
+
+\bibitem{eggNOG}
+Sean Powell, Damian Szklarczyk, Kalliopi Trachana, Alexander Roth, Michael
+ Kuhn, Jean Muller, Roland Arnold, Thomas Rattei, Ivica Letunic, Tobias
+ Doerks, Lars~Juhl Jensen, Christian von Mering, and Peer Bork.
+\newblock {eggNOG} v3.0: orthologous groups covering 1133 organisms at 41
+ different taxonomic ranges.
+\newblock {\em Nucleic Acids Research}, 40(Database-Issue):284--289, 2012.
+
+\bibitem{Tatusov1997}
+R.~L. Tatusov, E.~V. Koonin, and D.~J. Lipman.
+\newblock A genomic perspective on protein families.
+\newblock {\em Science}, 278(5338):631--637, Oct 1997.
+
+\bibitem{RapSearch2}
+Yongan Zhao, Haixu Tang, and Yuzhen Ye.
+\newblock {RAPSearch2}: a fast and memory-efficient protein similarity search
+ tool for next-generation sequencing data.
+\newblock {\em Bioinformatics}, 28(1):125--126, 2012.
+
+\end{thebibliography}
diff --git a/tex/manual/manual.pdf b/tex/manual/manual.pdf
new file mode 100644
index 0000000..1241776
Binary files /dev/null and b/tex/manual/manual.pdf differ
diff --git a/tex/manual/manual.tex b/tex/manual/manual.tex
new file mode 100644
index 0000000..1c0618d
--- /dev/null
+++ b/tex/manual/manual.tex
@@ -0,0 +1,640 @@
+%\listfiles
+\documentclass[11pt]{article}
+\usepackage{fullpage}
+\usepackage{amssymb}
+\usepackage{graphics, graphicx}
+\usepackage{fancyhdr}
+\usepackage{subfigure}
+\usepackage{ifthen}
+\usepackage{version}
+\usepackage{tocbibind}
+\usepackage{makeidx}
+\usepackage{xspace}
+\usepackage{placeins}
+
+%\usepackage{times}
+\usepackage{booktabs}
+\usepackage[colorlinks=true, pdfstartview=FitV, linkcolor=blue,
+ citecolor=blue, urlcolor=blue]{hyperref}
+\raggedbottom
+\sloppy
+
+\parindent=0pt
+\parskip=5pt
+
+\newcommand\MALT{{\sf MALT}\xspace}
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\input versioninfo.tex
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\title{\bf User Manual for \MALT V\VERSION}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\author{Daniel H.~Huson}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\makeindex
+
+\input definitions.tex
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{document}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\maketitle
+
+%\hfil\includegraphics[height=4cm]{about.pdf}\hfil
+
+{\small
+\setcounter{tocdepth}{1}
+\tableofcontents
+}
+\newpage
+
+\ibf{License}:
+Copyright (c) 2015, Daniel H. Huson
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see \url{http://www.gnu.org/licenses}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\mysection{Introduction}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+\MALT, an acronym for \iit{MEGAN alignment tool}, is a sequence alignment and analysis tool designed for processing high-throughput sequencing data, especially in the context of metagenomics.
+It is an extension of MEGAN6, the \iit{MEGenome Analyzer} and is designed to provide the input for MEGAN6,
+but can also be used independently of MEGAN6.
+
+The core of the program is a sequence alignment engine that aligns DNA or protein sequences
+to a {DNA or} protein reference database in either {BLASTN (DNA queries and DNA references),}
+BLASTX (DNA queries and protein references) or BLASTP (protein queries and protein references)
+mode. The engine uses a banded-alignment algorithm with affine gap scores
+and BLOSUM substitution matrices (in the case of protein alignments).
+The program can compute both local alignments
+(Smith-Waterman) or semi-global alignments (in which reads are aligned end-to-end into reference sequences), the latter being more appropriate for aligning metagenomic reads to references.
+
+By default, \MALT produces a MEGAN ``RMA6'' file that contains taxonomic and functional classifications of the reads
+that can be opened in MEGAN6.
+The taxonomic analysis use the naive LCA algorithm (introduced in \cite{MEGAN2011}).
+
+Used as an alignment tool, \MALT can produce alignments in BLAST text format,
+BLAST-tab format or SAM format (both for DNA and protein alignments).
+In addition, the program can be used as a filter to obtain all reads that have a significant alignment, or
+do not have a significant alignment, to the given reference database.
+
+{
+\MALT can also be used to compute a taxonomic analysis of 16S sequences. Here the
+ability to compute a semi-global alignment rather than a local alignment is crucial.
+
+When provided with a listing of gene locations and annotations for a given database of DNA sequences, \MALT is able to predict genes based on BLASTN-style alignments.
+}
+
+\MALT actually consists of two programs, \program{malt-build} and
+\program{malt-run}.
+The \program{malt-build} program is first used to build an index for the given reference database. It
+can index arbitrary large databases, provided the used computer has enough memory.
+For maximum speed, the program uses a hash-table and thus require a large memory machine.
+The \program{malt-run} program is then used to perform alignments and analyses.
+
+\MALT does not use a new approach, but is rather a new carefully crafted implementation of existing approaches.
+The program uses spaced seeds rather than consecutive seeds \cite{Burkhardt01,Ma02}.
+It uses a hash table to store seed matches, see, for example, \cite{SSAHA}.
+ It uses a reduced alphabet to determine potential matches between protein sequences \cite{Murphy2000,RapSearch2}.
+ Finally, it uses a banded alignment algorithm \cite{ChaoPM92} that can compute both local and semi global alignments.
+
+Both programs make heavy use of parallelization and require a lot of memory. The ideal \pconcept{hardware requirements}
+are a linux server with 64 cores and 512 GB of memory.
+
+\MALT performs alignment and analysis of high-throughput sequencing data in a high-throughput manner. Here are some examples:
+
+\begin{enumerate}
+\item
+Using the RefSeq microbial protein database (version 50, containing $10$ million protein sequences with a total length of $3.2$ billion amino acids), a BLASTX-style analysis of taxonomic and functional content of
+a collection of 11 million Illumina reads takes about $900$ wall-clock seconds (using 64 cores).
+The program found about $4.5$ million significant alignments covering about $15$\% of the total reads.
+{
+\item Using the Genbank DNA database (microbes and viruses,
+downloaded early 2013, containing about 2.3 million DNA sequences with a total length of
+11 billion nucleotides), a BLASTN-style analysis of one million reads takes about $70$ wall-clock seconds.
+The program finds about two million significant alignments covering one quarter of the total reads.
+\item Using the Silva database (\itt{SSURef\_NR99\_115\_tax\_silva.fasta}, containing $479,726$ DNA sequences with a total length of $690$ million nucleotides), the semi-global alignment of $5000$ 16S reads takes about 100 seconds (using 64 cores), producing
+about $100,000$ significant alignments.
+}
+\end{enumerate}
+
+This document provides both an introduction and a reference manual for \MALT.
+
+\pagebreak
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\mysection{Getting Started}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+This section describes how to get started.
+
+Download the program from \url{http://www-ab.informatik.uni-tuebingen.de/software/malt},
+see Section~\ref{sec:Obtaining and Installing the Program}
+for details.
+
+
+First, use \program{malt-build} to build an index for \MALT. For example,
+to build an index for all viral proteins in RefSeq, download the following file:
+\url{ftp://ftp.ncbi.nlm.nih.gov/refseq//release/viral/viral.1.protein.faa.gz}
+
+
+Put this file in a single directory called {\tt references}, say. There is no need to unzip the file
+because \MALT is able to read zipped files. Also, in general, when using more than one file of reference sequences,
+there is no need to concatenate the files into one file, as \MALT can process multiple files.
+
+The program \program{malt-build} will be used to build an index for viral reference sequences. We will write the index
+directory to a directory called {\tt index}.
+In the parent directory of the {\tt references} directory, run \program{malt-build} as follows:
+{\footnotesize
+\begin{verbatim}
+set MALT=<path-to-malt-directory>
+malt-build -i references/*.* -d index -g2t $MALT/data/gi_taxid_prot-2014Jan04.bin \
+ -tre $MALT/data/ncbi.tre.gz -map$MALT/data/ncbi.tre.gz -L megan5-license.txt
+\end{verbatim}
+}
+
+The input files are specified using {\tt -i}, the index is specified using {\tt -d}.
+The option {\tt -g2t} is used to specify a GI to taxon-id mapping which will be used to identify the taxa associated with
+the reference sequences. A mapping file is supplied in the data directory of \MALT.
+The options {\tt -tre} and {\tt -map} are used to access the NCBI taxonomy, which is needed to perform a taxonomic analysis of the reads as they are aligned. Use {\tt -L} to explicitly provide a MEGAN5 license file to the program, if you have not previously used a licensed version of MEGAN5.
+
+Then, use \program{malt-run} to analyze a file of DNA reads. Assume that the DNA reads are
+contained in two files, {\tt reads1.fna} and {\tt reads2.fna}. Call the program as follows:
+{\footnotesize
+\begin{verbatim}
+malt-run -i reads1.fna reads2.fna -d index -m BlastX -o . -L megan5-license.txt
+\end{verbatim}
+}
+
+If either of the two programs abort due to lack insufficient memory, then please edit the files {\tt malt-build-gui.vmoptions} and/or {\tt malt-run-gui.vmoptions} to allocate more memory to the programs;
+By default, for testing purposes, the memory reserved for the programs is set to $64GB$.
+For comparison against the NCBI-NR database, for example, you will need about $300GB$.
+
+All input files are specified using {\tt -i}. The index to use is specified using {\tt -d}. The option {\tt -m} defines the alignment mode of the program, in this
+case {\tt BlastX}. Use {\tt -at} to specify the alignment type.The option {\tt -om} is used to specify the output directory for matches.
+Here we specify the current directory ({\tt .}). The option {\tt --tax} requests that a taxonomic analysis of the reads be performed and {\tt -om .}
+requests that the resulting MEGAN file be written to the current directory.
+The file option {\tt -t} specifies the maximum number of threads.
+
+By default, \MALT uses memory mapping to access its index files. If you intend to align a large number of files in a single run of \MALT,
+then it may be more efficient to have the program preload the complete index. To achieve this, use the command-line option \itt{-mem false}.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\mysection{Obtaining and Installing the Program}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\MALT is written in Java and requires a 64-bit Java runtime environment
+version 7 or latter, freely available from \url{http://www.java.org}.
+The Windows and MacOS X installers contain a suitable Java runtime environment that will be used if
+a suitable Java runtime environment cannot be found on the computer.
+
+\MALT is currently in ``open alpha testing'' and is available from:
+
+\url{http://www-ab.informatik.uni-tuebingen.de/software/malt}.
+
+There are three different installers that target major operating systems:
+\begin{itemize}
+\item \itt{MALT\_windows-x64\_\VERSION.exe} provides an installer for \irm{Windows}.
+\item \itt{MALT\_macos\_\VERSION.dmg} provides an installer for \irm{MacOS X}.
+\item \itt{MALT\_unix\_\VERSION.sh} provides an installer for \irm{Linux} and \irm{Unix}.
+\end{itemize}
+
+Download the installer that is appropriate for your computer. Please note that the \irm{memory requirement}
+of \MALT grows dramatically with the size of the reference database that you wish to employ.
+For example, to align sequences against the NR database requires that you have 512GB of main memory.
+
+Double-click on the downloaded installer program to start the interactive installation dialog.
+
+Alternatively, under Linux, change into the directory containing the installer and type
+
+{\tt ./\itt{MALT\_unix\_\VERSION.sh}}
+
+This will launch the \MALT installer in GUI mode. To install the program in non-gui console mode,
+type
+
+{\tt ./\itt{MALT\_unix\_\VERSION.sh} -c}
+
+Finally, when updating the installation under Linux, one can perform a completely
+\irm{non-interactive installation} like this (quiet mode):
+
+{\tt ./\itt{MALT\_unix\_\VERSION.sh} -q}
+
+The installation dialog will ask how much memory the program may use. Please set this variable carefully.
+If the amount needs to be changed after installation, then this can be done by editing the files
+ending on \itt{vmoptions} in the installation directory.
+
+Two copies of each of the program \program{malt-build} and \program{malt-run} will be installed.
+The two copies named \itt{malt-build} and \itt{malt-run} are intended in non-interactive, commandline use.
+The two copies named \itt{malt-build-gui} and \itt{malt-run-gui} provide a very simple GUI interface.
+
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\mysection{The MALT index builder}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+The first step in a \MALT analysis is to build an index for the given reference database. This is done
+using a program called \pprogram{malt-build}.
+
+In summary, \program{malt-build} takes a reference sequence database (represented by
+one or more FastA files, possibly in \itt{gzip} format) as input and produces an index that then can subsequently be used
+by the main analysis program \program{malt-run} as input.
+If \MALT is to be used as an taxonomic and/or functional analysis tool as well as an alignment tool, then
+in addition, \program{malt-build} must be provided with a number of mapping files that are used
+to map reference sequences to taxonomic or functional classes{, or to locate genes in DNA reference sequences}.
+
+The \program{malt-build} program is controlled by command-line options, as summarized in Figure~\ref{fig:malt-build-usage}.
+There are three options for determining input and output:
+\begin{itemize}
+\setlength{\itemindent}{30pt}
+\item [\itt{--input}] Use to specify all files that contains reference sequences. The files must be in FastA format and
+may be {\em gzipped} (in which case they must end on \itt{.gz}.)
+{
+\item[\itt{--sequenceType}] Use to specify whether the reference sequences are \itt{DNA} or \itt{Protein} sequences.
+(For \itt{RNA} sequences, use the DNA setting).
+}
+\item[\itt{--index}] Use to specify the name of the index directory. If the directory does not already exist then it will be created.
+If it already exists, then any previous index files will be overwritten.
+\end{itemize}
+
+
+There are two performance-related options:
+\begin{itemize}
+\setlength{\itemindent}{30pt}
+\item[\itt{--threads}] Use to set the number of threads to use in parallel computations. Default is 8. Set this to
+the number of available cores.
+\item[\itt{--step}] Use to set step size used to advance seed, values greater than 1 reduce index size and sensitivity. Default value: 1.
+\end{itemize}
+The most important performance-related option is the maximum amount of memory that \program{malt-build}
+is allowed to use. This cannot be set from within the program but rather is set during installation of the software.
+
+\MALT uses a seed-and-extend approach based on ``spaced seeds'' \cite{Burkhardt01,Ma02}. The following options control this:
+\begin{itemize}
+\setlength{\itemindent}{30pt}
+\item[\itt{--shapes}]
+{Use this to specify the seed shapes used. For DNA sequences, the \irm{default seed shape} is:
+{\tt 111110111011110110111111}.}
+For protein sequences, by default the program uses the following four shapes:
+{\tt 111101101110111}, {\tt 1111000101011001111}, {\tt 11101001001000100101111} and {\tt 11101001000010100010100111}.
+These seeds were suggested in \cite{Ilie:2011fk}, see \url{http://www.biomedcentral.com/content/supplementary/1471-2164-12-280-s1.pdf}.
+\item[\itt{--maxHitsPerSeed}] Use to specify the maximum number of hits per seed. The program uses this
+to calculate a maximum number of hits per hash value.
+\item[\itt{--proteinReduct}] Use this to specify the alphabet reduction in the case of protein reference sequences.
+By default, the program reduces amino acids to 8 different letters, grouped as follows:
+[LVIMC] [AG] [ST] [P] [FYW] [EDNQ] [KR] [H]. This is referred to as the \iit{BLOSUM50\_8} reduction in \MALT and
+was suggested in \cite{Murphy2000}.
+
+\end{itemize}
+
+MALT is able to generate RMA files that can be directly opened in MEGAN.
+\begin{itemize}
+\setlength{\itemindent}{30pt}
+\item[\itt{--classify}] Use this option to determine which classifications should be computed, such as Taxonomy, EGGNOG, INTERPRO2GO, KEGG and/or SEED.
+\end{itemize}
+
+There are numerous options that can be used to provide mapping files to \program{malt-build} for classification support.
+These are used
+by the program to map reference sequences or genes to taxonomic and/or functional classes.
+\begin{itemize}
+\setlength{\itemindent}{30pt}
+\item[\itt{-g2taxonomy}] \itt{-a2taxonomy} \itt{-s2taxonomy} Use to specify mapping files to map reference sequences to taxonomic identifiers (NCBI taxon integer ids).
+Use {\tt -g2taxonomy} for a file mapping GI numbers to taxon ids. Use {\tt -r2taxonomy} for a file mapping RefSeq identifiers to taxon ids.
+Use {\tt -s2taxonomy} for a file that maps \pconcept{synonyms} to taxon ids. A synonym is any word that may occur in
+the header line of a reference sequence.
+\item[\itt{-g2interpro2go}] \itt{-r2interpro2go} \itt{-s2interpro2go} Use to specify mapping files to map reference sequences to InterPro numbers \cite{GeneOntology2000,Mitchell2015} .
+The detailed usage of three different options is analogous to above.
+\item[\itt{-g2seed}] \itt{-r2seed} \itt{-s2seed} Use to specify mapping files to map reference sequences to SEED \cite{SEED2005} classes.
+Unfortunately, the SEED classification does not assign numerical identifiers to classes. As a work-around,
+\program{malt-build} uses the numerical identifiers defined and used by \irm{MEGAN} \cite{MEGAN2011}.
+The detailed usage of three different options is analogous to above.
+\item[\itt{-g2eggnog}] \itt{-r2eggnog} \itt{-s2eggnog} Use to specify mapping files to map reference sequences to COG and NOG \cite{Tatusov1997,eggNOG} classes.
+Unfortunately, COG's and NOG's do not share the same space of numerical identifiers. As a work-around,
+\program{malt-build} uses the numerical identifiers defined and used by \irm{MEGAN} \cite{MEGAN2011}.
+The detailed usage of three different options is analogous to above.
+\item[\itt{-g2kegg}] \itt{-r2kegg} \itt{-s2kegg} Use to specify mapping files to map reference sequences to KEGG KO numbers \cite{Kanehisa2000} .
+The detailed usage of three different options is analogous to above.
+\ignore{
+\item[\itt{-gif}] Use this option specify a \concept{gene information file}. Such a file assigns maps genes to intervals
+in reference sequences, as described below. This is usually used when the reference sequences are genomes.
+}
+\end{itemize}
+
+
+There are a couple of other options:
+\begin{itemize}
+\setlength{\itemindent}{30pt}
+\item[\itt{--firstWordOnly}] Use to specify to save only the first word of each reference header. Default value: false.
+\item[\itt{--random}] Use to specify the seed used by the random number generator.
+\item[\itt{--verbose}] Use to run program in verbose mode.
+\item[\itt{--help}] Report command-line usage.
+\end{itemize}
+
+\begin{figure}[h]
+{\tiny
+\begin{verbatim}
+SYNOPSIS
+ MaltBuild [options]
+DESCRIPTION
+ Build an index for MALT (MEGAN alignment tool)
+OPTIONS
+ Input:
+ -i, --input [string(s)] Input reference file(s). Mandatory option.
+ -s, --sequenceType [string] Sequence type. Mandatory option. Legal values: DNA, Protein
+ Output:
+ -d, --index [string] Name of index directory. Mandatory option.
+ Performance:
+ -t, --threads [number] Number of worker threads. Default value: 8.
+ -st, --step [number] Step size used to advance seed, values greater than 1 reduce index size and sensitivity. Default value: 1.
+ Seed:
+ -ss, --shapes [string(s)] Seed shape(s). Default value(s): default.
+ -mh, --maxHitsPerSeed [number] Maximum number of hits per seed. Default value: 1000.
+ -pr, --proteinReduct [string] Name or definition of protein alphabet reduction (BLOSUM50_10,BLOSUM50_11,BLOSUM50_15,BLOSUM50_4,BLOSUM50_8,DIAMOND_11,GBMR4,HSDM17,MALT_10,SDM12,UNREDUCED). Default value: DIAMOND_11.
+ Classification:
+ -c, --classify [string(s)] Classifications (any of EGGNOG INTERPRO2GO KEGG SEED Taxonomy). Mandatory option.
+ -g2eggnog, --gi2eggnog [string] GI-to-EGGNOG mapping file.
+ -r2eggnog, --ref2eggnog [string] RefSeq-to-EGGNOG mapping file.
+ -s2eggnog, --syn2eggnog [string] Synonyms-to-EGGNOG mapping file.
+ -g2interpro2go, --gi2interpro2go [string] GI-to-INTERPRO2GO mapping file.
+ -r2interpro2go, --ref2interpro2go [string] RefSeq-to-INTERPRO2GO mapping file.
+ -s2interpro2go, --syn2interpro2go [string] Synonyms-to-INTERPRO2GO mapping file.
+ -g2kegg, --gi2kegg [string] GI-to-KEGG mapping file.
+ -r2kegg, --ref2kegg [string] RefSeq-to-KEGG mapping file.
+ -s2kegg, --syn2kegg [string] Synonyms-to-KEGG mapping file.
+ -g2seed, --gi2seed [string] GI-to-SEED mapping file.
+ -r2seed, --ref2seed [string] RefSeq-to-SEED mapping file.
+ -s2seed, --syn2seed [string] Synonyms-to-SEED mapping file.
+ -g2taxonomy, --gi2taxonomy [string] GI-to-Taxonomy mapping file.
+ -a2taxonomy, --ref2taxonomy [string] Accession-to-Taxonomy mapping file.
+ -s2taxonomy, --syn2taxonomy [string] Synonyms-to-Taxonomy mapping file.
+ -tn, --parseTaxonNames Parse taxon names. Default value: true.
+ -gif, -geneInfoFile [string] File containing gene information.
+ Other:
+ -fwo, --firstWordOnly Save only first word of reference header. Default value: false.
+ -rns, --random [number] Random number generator seed. Default value: 666.
+ -hsf, --hashScaleFactor [number] Hash table scale factor. Default value: 0.9.
+ -v, --verbose Echo commandline options and be verbose. Default value: false.
+ -h, --help Show program usage and quit.
+\end{verbatim}
+}
+\caption{Summary of command-line usage of malt-build.}\label{fig:malt-build-usage}
+\end{figure}
+
+\FloatBarrier
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\mysection{The MALT analyzer}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+In summary,
+the program \pprogram{malt-run} is used to align one or more files of input sequences (DNA or proteins) against
+an index representing a collection of reference {DNA or} protein sequences. In a preprocessing step, the index is computed
+using the \program{malt-build}, as described above. Depending on the type of input and reference sequences,
+the program can be be run in {BLASTN,} BLASTP or BLASTX mode.
+
+The \program{malt-run} program is controlled by command-line options (see Figure~\ref{fig:malt-run-usage}).
+The first options specifies the program mode and alignment type.
+\begin{itemize}
+\setlength{\itemindent}{30pt}
+
+\item[\itt{--mode}] Use this to run the program in {\pconcept{BlastN mode},} \pconcept{BlastP mode} or
+\pconcept{BlastX mode}, that is, to align {DNA and DNA,} protein and protein, or DNA reads against protein references, respectively. Obviously, the former mode can only be used if the employed index contains DNA
+sequences whereas the latter two modes are only applicable to an index based on protein reference sequences.
+\item[\itt{--alignmentType}] Use this to specify the type of alignments to be performed.
+By default, this is set to \itt{Local} and the program performs \pconcept{local alignment} just like BLAST programs do.
+Alternatively, this can be set to \itt{SemiGlobal}, in which case the program will perform \pconcept{semi global alignment}
+in which reads are aligned end-to-end.
+\end{itemize}
+
+There are two options for specifying the input.
+\begin{itemize}
+\setlength{\itemindent}{30pt}
+\item [\itt{--inFile}] Use this to specify all input files. Input files must be in FastA or FastQ format and
+may be gzipped, in which case their names must end on \itt{.gz}.
+\item[\itt{--index}] Use this to specify the directory that contains the index built by \program{malt-build}.
+\end{itemize}
+
+There is a number of options for specifying the output generated by the program.
+\begin{itemize}
+\setlength{\itemindent}{30pt}
+\item[\itt{--output}] Use to specify the names or locations of the output RMA files.
+ If a single directory is specified, then one output file
+per input file is written to the specified directory. Alternatively, if one or more output files are named, then
+the number of output files must equal the number of input files, in which case the output for the first
+input file is written to first output file, etc.
+\item[\itt{--includeUnaligned}] Use this to ensure that all unaligned queries are placed into the output RMA file. By default, only
+queries that have an alignment are included in the output RMA file.
+\item[\itt{--alignments}] Use to specify the files to which alignments should be written.
+ If a single directory is specified, then one output file
+per input file is written to the specified directory. Alternatively, if one or more output files are named, then
+the number of output files must equal the number of input files, in which case the output for the first
+input file is written to first output file, etc. If the argument is the special value \itt{STDOUT} then output is written
+to standard-output rather than to a file. If this option is not supplied, then the program will not output any matches.
+\item[\itt{--format}] Determines the format used to report alignments. The default format is \itt{SAM}.
+ Other choices are \itt{Text} (full text BLAST matches) and \itt{Tab} (tabulated BLAST format).
+\item[\itt{--gzipOutput}] Use this to specify whether alignment output should be gzipped. Default is true.
+\item[\itt{--outAligned}] Use this to specify that all reads that have at least one significant alignment to some reference
+sequence should be saved.
+File specification possibilities as for \itt{--alignments}.
+\item[\itt{--samSoftClip}] Request that SAM output uses soft clipping.
+\item[\itt{--sparseSAM}] Request a sparse version of SAM output. This is faster and uses less memory, but the files are not necessary compatible with
+other SAM processing tools.
+\item[\itt{--gzipAligned}] Compress aligned reads output using gzip. Default value: true.
+\item[\itt{--outUnaligned}] Use this to specify that all reads that do not have any significant alignment to any reference
+sequence should be saved.
+File specification possibilities as for \itt{--alignments}.
+\item[\itt{ --gzipUnaligned}] Compress unaligned reads output using gzip. Default value: true.
+\end{itemize}
+
+
+There are three performance-related options:
+\begin{itemize}
+\setlength{\itemindent}{30pt}
+\item[\itt{--threads}] Use to set the number of threads to use in parallel computations. Default is 8. Set this to
+the number of available cores.
+ -rqc, Cache results for replicated queries.
+\item[\itt{--memoryMode}] Load all indices into memory, load indices page by page when needed or use memory mapping (load, page or map).
+\item[\itt{--maxTables}] Use to set the maximum number of seed tables to use (0=all). Default value: 0.
+\item[\itt{--replicateQueryCache}] Use to turn on caching of replicated queries. This is especially useful for processing 16S datasets
+in which identical sequences occur multiple times. Turning on this feature does not change the output of the program, but can
+cause a significant speed-up. Default value: false.
+\end{itemize}
+The most important performance-related option is the maximum amount of memory that \program{malt-run}
+is allowed to use. This cannot be set from within the program but rather is set during installation of the software.
+
+The following options are used to filter matches by significance. Matches that do not meet all criteria specified are completely ignored.
+\begin{itemize}
+\setlength{\itemindent}{30pt}
+\item[\itt{--minBitScore}] Minimum bit score. Default value: 50.0.
+\item[\itt{--maxExpected}] Maximum expected score. Default value: 1.0.
+\item[{\itt{--minPercentIdentity}}] Minimum percent identity. Default value: 0.0.
+\item[{\itt{--maxAlignmentsPerQuery}}] Maximum number of alignments per query. Default value: 100.
+\item[{\itt{ --maxAlignmentsPerRef}}] Maximum number of (non-overlapping) alignments per reference. Default value: 1.
+\MALT reports up to this many best scoring matches for each hit reference sequence.
+\end{itemize}
+
+{
+There are a number of options that are specific to the \concept{BlastN mode}. They are used to specify scoring and
+are also used in the computation of expected values.
+\begin{itemize}
+\setlength{\itemindent}{30pt}
+\item[\itt{--matchScore}] Use to specify the alignment match score. Default value: 2.
+\item[\itt{--mismatchScore}] Use to specify the alignment mis-match score. Default value: -3.
+\item[\itt{--setLambda}] Parameter Lambda \index{Lambda parameter} for \irm{BLASTN statistics}. Default value: 0.625.
+\item[\itt{--setK}] Parameter K \index{K parameter} for BLASTN statistics. Default value: 0.41.
+\end{itemize}
+}
+
+For \concept{BlastP mode} and \concept{BlastX mode} the user need only specify a substitution matrix. The Lambda and
+K values are set automatically.
+\begin{itemize}
+\setlength{\itemindent}{30pt}
+\item[\itt{--subMatrix}] Use to specify the protein substitution matrix to use. Default value: {\tt BLOSUM62}. Legal values:
+\itt{BLOSUM45}, \itt{BLOSUM50}, \itt{BLOSUM62}, \itt{BLOSUM80}, \itt{BLOSUM90}.
+\end{itemize}
+
+If the query sequences are DNA (or RNA) sequences, that is, if the program is running in {\concept{BlastN mode}}
+or \concept{BlastX mode}, then the following options are available.
+\begin{itemize}
+\setlength{\itemindent}{30pt}
+\item[\itt{--forwardOnly}] Use to align query forward strand only. Default value: false.
+\item[\itt{ --reverseOnly}] Use to align query reverse strand only. Default value: false.
+\end{itemize}
+
+The program uses the LCA algorithm \cite{MEGAN2007} to assign reads to taxa. There are a number of options that control this.
+\begin{itemize}
+\setlength{\itemindent}{30pt}
+\item[\itt{lca\_taxonomy}] Use to specify that the LCA algorithm should be applied to the taxonomy classification. Similar switches are available to turn on the use of the LCA algorithm for other classifications.
+But using the LCA algorithms only makes sense when providing additional taxonomic classifications such as the RDP tree.
+\item[\itt{--topPercent}] Use to specify the \pconcept{top percent} value for LCA algorithm. Default value is 10\%. For each read,
+only those matches are used for taxonomic placement whose bit score is within 10\% of the best score for that read.
+\item[\itt{--minSupport}] Use to specify the \pconcept{min support} value for the LCA algorithm.
+\end{itemize}
+
+There are a number of options that control the heuristics used by \program{malt-run}.
+\begin{itemize}
+\setlength{\itemindent}{30pt}
+\item[{\itt{--maxSeedsPerFrame}}] Maximum number of seed matches per offset per read frame. Default value: 100.
+\item[{\itt{--maxSeedsPerRef}}] Maximum number of seed matches per read and reference. Default value: 20.
+\item[\itt{ --seedShift}] Seed shift. Default value: 1.
+\end{itemize}
+
+The program uses a banded-aligner as described in \cite{ChaoPM92}. There are a number of associated options.
+\begin{itemize}
+\setlength{\itemindent}{30pt}
+\item[\itt{--gapOpen}] Use this to specify the gap open penalty. Default value: 7.
+\item[\itt{--gapExtend}] Use this to specify gap extension penalty. Default value: 3.
+\item[\itt{--band}] Use this to specify width/2 for banded alignment. Default value: 4.
+\end{itemize}
+
+The are a couple of other options:
+\begin{itemize}
+\setlength{\itemindent}{30pt}
+\item[\itt{--replicateQueryCacheBits}] Specify the number of bits used to cache replicate queries (default is 20).
+\item[\itt{--verbose}] Use to run program in verbose mode.
+\item[\itt{--help}] Report command-line usage.
+\end{itemize}
+
+ \begin{figure}[h]
+{\tiny
+\begin{verbatim}
+SYNOPSIS
+ MaltRun [options]
+DESCRIPTION
+ Align sequences using MALT (MEGAN alignment tool)
+OPTIONS
+ Mode:
+ -m, --mode [string] Program mode. Mandatory option. Legal values: Unknown, BlastN, BlastP, BlastX, Classifier
+ -at, --alignmentType [string] Type of alignment to be performed. Default value: Local. Legal values: Local, SemiGlobal
+ Input:
+ -i, --inFile [string(s)] Input file(s) containing queries in FastA or FastQ format. Mandatory option.
+ -d, --index [string] Index directory as generated by MaltBuild. Mandatory option.
+ Output:
+ -o, --output [string(s)] Output RMA file(s) or directory.
+ -iu, --includeUnaligned Include unaligned queries in RMA output file. Default value: false.
+ -a, --alignments [string(s)] Output alignment file(s) or directory or STDOUT.
+ -f, --format [string] Alignment output format. Default value: SAM. Legal values: SAM, Tab, Text
+ -za, --gzipAlignments Compress alignments using gzip. Default value: true.
+ -ssc, --samSoftClip Use soft clipping in SAM files (BlastN mode only). Default value: false.
+ -sps, --sparseSAM Produce sparse SAM format (smaller, faster, suitable for MEGAN). Default value: false.
+ -oa, --outAligned [string(s)] Aligned reads output file(s) or directory or STDOUT.
+ -zal, --gzipAligned Compress aligned reads output using gzip. Default value: true.
+ -ou, --outUnaligned [string(s)] Unaligned reads output file(s) or directory or STDOUT.
+ -zul, --gzipUnaligned Compress unaligned reads output using gzip. Default value: true.
+ Performance:
+ -t, --numThreads [number] Number of worker threads. Default value: 8.
+ -mem, --memoryMode [string] Memory mode. Default value: load. Legal values: load, page, map
+ -mt, --maxTables [number] Set the maximum number of seed tables to use (0=all). Default value: 0.
+ -rqc, --replicateQueryCache Cache results for replicated queries. Default value: false.
+ Filter:
+ -b, --minBitScore [number] Minimum bit score. Default value: 50.0.
+ -e, --maxExpected [number] Maximum expected score. Default value: 1.0.
+ -id, --minPercentIdentity [number] Minimum percent identity. Default value: 0.0.
+ -mq, --maxAlignmentsPerQuery [number] Maximum number of alignments per query. Default value: 25.
+ -mrf, --maxAlignmentsPerRef [number] Maximum number of (non-overlapping) alignments per reference. Default value: 1.
+ BlastN parameters:
+ -ma, --matchScore [number] Match score. Default value: 2.
+ -mm, --mismatchScore [number] Mismatch score. Default value: -3.
+ -la, --setLambda [number] Parameter Lambda for BLASTN statistics. Default value: 0.625.
+ -K, --setK [number] Parameter K for BLASTN statistics. Default value: 0.41.
+ BlastP and BlastX parameters:
+ -psm, --subMatrix [string] Protein substitution matrix to use. Default value: BLOSUM62. Legal values: BLOSUM45, BLOSUM50, BLOSUM62, BLOSUM80, BLOSUM90
+ DNA query parameters:
+ -fo, --forwardOnly Align query forward strand only. Default value: false.
+ -ro, --reverseOnly Align query reverse strand only. Default value: false.
+ LCA:
+ -wLCA, --useWeightedLCA Use the weighted-LCA algorithm. Default value: false.
+ -wLCAP, --weightedLCAPercent [number] Set the weighted-LCA percentage of weight to cover. Default value: 80.0.
+ -top, --topPercent [number] Top percent value for LCA algorithm. Default value: 10.0.
+ -supp, --minSupportPercent [number] Min support value for LCA algorithm as a percent of assigned reads (0==off). Default value: 0.001.
+ -sup, --minSupport [number] Min support value for LCA algorithm (overrides --minSupportPercent). Default value: 1.
+ -mpi, --minPercentIdentityLCA [number] Min percent identity used by LCA algorithm. Default: 0.
+ -mif, --useMinPercentIdentityFilterLCA Use min percent identity assignment filter (Species 99%, Genus 9\%, Family 95%, Order 90%, Class 85%, Phylum 80%).
+ -mag, --magnitudes Reads have magnitudes (to be used in taxonomic or functional analysis). Default value: false.
+ Heuristics:
+ -spf, --maxSeedsPerFrame [number] Maximum number of seed matches per offset per read frame. Default value: 100.
+ -spr, --maxSeedsPerRef [number] Maximum number of seed matches per read and reference. Default value: 20.
+ -sh, --seedShift [number] Seed shift. Default value: 1.
+ Banded alignment parameters:
+ -go, --gapOpen [number] Gap open penalty. Default value: 11.
+ -ge, --gapExtend [number] Gap extension penalty. Default value: 1.
+ -bd, --band [number] Band width/2 for banded alignment. Default value: 4.
+ Other:
+ -rqcb, --replicateQueryCacheBits [number] Bits used for caching replicate queries (size is then 2^bits). Default value: 20.
+ -v, --verbose Echo commandline options and be verbose. Default value: false.
+ -h, --help Show program usage and quit.\end{verbatim}
+}
+\caption{Summary of command-line usage of {\tt malt-run}.}\label{fig:malt-run-usage}
+\end{figure}
+
+
+\FloatBarrier
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+{\small
+\bibliographystyle{plain}
+\bibliography{compbio-2012}
+}
+
+\printindex
+
+\end{document}
diff --git a/tex/manual/versioninfo.tex b/tex/manual/versioninfo.tex
new file mode 100644
index 0000000..c3276a4
--- /dev/null
+++ b/tex/manual/versioninfo.tex
@@ -0,0 +1 @@
+\def\VERSION{{0.3.6}}
diff --git a/tex/manual/versioninfo_new.tex b/tex/manual/versioninfo_new.tex
new file mode 100644
index 0000000..c3276a4
--- /dev/null
+++ b/tex/manual/versioninfo_new.tex
@@ -0,0 +1 @@
+\def\VERSION{{0.3.6}}
diff --git a/tex/manual/versioninfo_old.tex b/tex/manual/versioninfo_old.tex
new file mode 100644
index 0000000..34ffc48
--- /dev/null
+++ b/tex/manual/versioninfo_old.tex
@@ -0,0 +1 @@
+\def\VERSION{{0.3.5}}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/malt.git
More information about the debian-med-commit
mailing list