[med-svn] [kmc] 01/01: had recreate the repo as I got confused with prsitine tar
Jorge Soares
jssoares-guest at moszumanska.debian.org
Tue Nov 18 12:52:20 UTC 2014
This is an automated email from the git hooks/post-receive script.
jssoares-guest pushed a commit to branch master
in repository kmc.
commit b90dee14dd18a417a7a9c878a39ae7423634b738
Author: Jorge Soares <j.s.soares at gmail.com>
Date: Tue Nov 18 12:52:01 2014 +0000
had recreate the repo as I got confused with prsitine tar
---
LICENSE | 674 ++++++++++
README.md | 37 +
debian/changelog | 5 +
debian/compat | 1 +
debian/control | 39 +
debian/copyright | 23 +
debian/kmc.install | 1 +
debian/kmc.manpages | 1 +
...-libz2-include-statements-to-native-debian-libs | 30 +
...hange-kmc-zlib-and-libz2-to-deb-zlib-and-libbz2 | 18 +
debian/patches/debian-dependent-makefile.patch | 36 +
.../remove-all-zlib-libz2-include-statements | 15 +
.../patches/remove-call-to-asmlib-sub-in-kbsorter | 16 +
.../remove-embedded-libraries-from-makefile | 26 +
.../remove-gz-and-bz-flags-from-compilation | 13 +
debian/patches/remove-references-to-asmlib | 44 +
.../patches/remove-use-of-asmlib-subroutines.patch | 141 ++
debian/patches/removing-static-option-in-makefile | 1 +
debian/patches/series | 6 +
debian/rules | 25 +
debian/source/format | 1 +
debian/upstream/metadata | 12 +
debian/usage_to_man | 140 ++
debian/watch | 5 +
kmc_api/kmc_file.cpp | 667 ++++++++++
kmc_api/kmc_file.h | 122 ++
kmc_api/kmer_api.cpp | 23 +
kmc_api/kmer_api.h | 539 ++++++++
kmc_api/kmer_defs.h | 47 +
kmc_api/mmer.cpp | 49 +
kmc_api/mmer.h | 182 +++
kmc_api/stdafx.h | 4 +
kmc_dump/ReadMe.txt | 40 +
kmc_dump/kmc_dump.cpp | 146 +++
kmc_dump/kmc_dump.vcxproj | 169 +++
kmc_dump/nc_utils.cpp | 20 +
kmc_dump/nc_utils.h | 138 ++
kmc_dump/stdafx.cpp | 8 +
kmc_dump/stdafx.h | 26 +
kmc_dump/targetver.h | 8 +
kmc_dump_sample/ReadMe.txt | 40 +
kmc_dump_sample/kmc_dump_sample.cpp | 133 ++
kmc_dump_sample/kmc_dump_sample.vcxproj | 167 +++
kmc_dump_sample/stdafx.cpp | 8 +
kmc_dump_sample/stdafx.h | 26 +
kmc_dump_sample/targetver.h | 8 +
kmer_counter.sln | 62 +
kmer_counter/ReadMe.txt | 40 +
kmer_counter/defs.h | 125 ++
kmer_counter/fastq_reader.cpp | 475 +++++++
kmer_counter/fastq_reader.h | 123 ++
kmer_counter/kb_collector.h | 227 ++++
kmer_counter/kb_completer.cpp | 251 ++++
kmer_counter/kb_completer.h | 72 ++
kmer_counter/kb_reader.h | 228 ++++
kmer_counter/kb_sorter.h | 1362 ++++++++++++++++++++
kmer_counter/kb_storer.cpp | 268 ++++
kmer_counter/kb_storer.h | 91 ++
kmer_counter/kmc.h | 767 +++++++++++
kmer_counter/kmer.cpp | 18 +
kmer_counter/kmer.h | 1049 +++++++++++++++
kmer_counter/kmer_counter.cpp | 390 ++++++
kmer_counter/kmer_counter.vcxproj | 228 ++++
kmer_counter/kxmer_set.h | 118 ++
kmer_counter/mem_disk_file.cpp | 109 ++
kmer_counter/mem_disk_file.h | 41 +
kmer_counter/meta_oper.h | 45 +
kmer_counter/mmer.cpp | 49 +
kmer_counter/mmer.h | 182 +++
kmer_counter/params.h | 155 +++
kmer_counter/queues.h | 940 ++++++++++++++
kmer_counter/radix.cpp | 292 +++++
kmer_counter/radix.h | 44 +
kmer_counter/rev_byte.cpp | 15 +
kmer_counter/rev_byte.h | 29 +
kmer_counter/s_mapper.h | 166 +++
kmer_counter/splitter.h | 941 ++++++++++++++
kmer_counter/stdafx.cpp | 8 +
kmer_counter/stdafx.h | 28 +
kmer_counter/targetver.h | 8 +
kmer_counter/timer.cpp | 62 +
kmer_counter/timer.h | 58 +
makefile | 32 +
83 files changed, 12978 insertions(+)
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..1a0079f
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,674 @@
+GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. {http://fsf.org/}
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ {one line to give the program's name and a brief idea of what it does.}
+ Copyright (C) {year} {name of author}
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see {http://www.gnu.org/licenses/}.
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ Fastaq Copyright (C) 2013 Pathogen Genomics, Wellcome Trust Sanger Institute
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+{http://www.gnu.org/licenses/}.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+{http://www.gnu.org/philosophy/why-not-lgpl.html}.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..46ef143
--- /dev/null
+++ b/README.md
@@ -0,0 +1,37 @@
+kmc
+===
+KMC is a disk-based programm for counting k-mers from (possibly gzipped) FASTQ/FASTA files.
+The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+Instalation
+==========
+Before compilation you need to install some libraries and modify makefile.
+
+The necessary libraries that should be installed on a computer are:
+* Boost version 1.51 or higher (for Boost/filesystem and Boost/thread libraries)
+ change BOOST_LIB and BOOST_H in makefile to the directories where Boost is installed.
+
+The following libraries come with KMC in a binary (64-bit compiled for x86 platform) form.
+If your system needs other binary formats, you should put the following libraries in src/kmc/libs:
+* asmlib - for fast memcpy operation (http://www.agner.org/optimize/asmlib-instructions.pdf)
+* libbzip2 - for support for bzip2-compressed input FASTQ/FASTA files (http://www.bzip.org/)
+* zlib - for support for gzip-compressed input FASTQ/FASTA files (http://www.zlib.net/)
+
+If needed, you can also redefine maximal length of k-mer, which is 256 in the current version.
+Note: KMC is highly optimized and spends only as many bytes for k-mer (rounded up to 8) as
+necessary, so using large values of MAX_K does not affect the KMC performance for short k-mers.
+
+Some parts of KMC use C++11 features, so you need a compatible C++ compiler, e.g., gcc 4.7
+or higher.
+
+After that, you can run make to compile kmc and kmc_dump applications.
+
+
+Directory structure
+===================
+. - main directory of KMC (programs after compilation will be stored here)
+kmer_counter - source code of kmc program
+kmer_counter/libs - compiled binary versions of libraries used by KMC
+kmc_api - C++ source codes implementing API; must be used by any program that
+ wants to process databases produced by kmc
+kmc_dump - source codes of kmc_dump program listing k-mers in databases produced by kmc
diff --git a/debian/changelog b/debian/changelog
new file mode 100644
index 0000000..d941639
--- /dev/null
+++ b/debian/changelog
@@ -0,0 +1,5 @@
+kmc (2.0+dfsg-1) unstable; urgency=medium
+
+ * Initial release (Closes: #1234)
+
+ -- Jorge Soares <j.s.soares at gmail.com> Mon, 03 Nov 2014 14:46:54 +0200
diff --git a/debian/compat b/debian/compat
new file mode 100644
index 0000000..ec63514
--- /dev/null
+++ b/debian/compat
@@ -0,0 +1 @@
+9
diff --git a/debian/control b/debian/control
new file mode 100644
index 0000000..365cc7a
--- /dev/null
+++ b/debian/control
@@ -0,0 +1,39 @@
+Source: kmc
+Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
+Uploaders: Andreas Tille <tille at debian.org>,
+ Jorge Soares <j.s.soares at gmail.com>
+Section: science
+Priority: optional
+Build-Depends: debhelper (>= 9),
+ zlib1g-dev,
+ libbz2-dev,
+ libboost1.54-all-dev,
+ help2man,
+ d-shlibs
+Standards-Version: 3.9.6
+Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/kmc.git
+Homepage: https://github.com/js21/kmc
+
+Package: kmc
+Architecture: any
+Depends: ${shlibs:Depends},
+ ${misc:Depends}
+Description: Designed for counting k-mers (sequences of consecutive k symbols)
+ in a set of reads K-mer counting is important for many bioinformatics
+ applications, e.g., developing de Bruijn graph assemblers.
+ Building de Bruijn graphs is a commonly used approach for genome
+ assembly with data from second-generation sequencer.
+ Unfortunately, sequencing errors (frequent in practice)
+ results in huge memory requirements for de Bruijn graphs, as well
+ as long build time. One of the popular approaches to handle this
+ problem is filtering the input reads in such a way that unique k-mers
+ (very likely obtained as a result of an error) are discarded.
+ .
+ Thus, KMC scans the raw reads and produces a compact representation
+ of all non-unique reads accompanied with number of their occurrences.
+ The algorithm implemented in KMC makes use mostly of disk space rather
+ than RAM, which allows one to use KMC even on rather typical personal
+ computers. When run at high-end server (what is necessary for KMC
+ competitors) it outperforms them in both memory requirements and
+ speed of computation. The disk space necessary for computation is in
+ order of the size of input data (usually it is smaller).
\ No newline at end of file
diff --git a/debian/copyright b/debian/copyright
new file mode 100644
index 0000000..2f76f2e
--- /dev/null
+++ b/debian/copyright
@@ -0,0 +1,23 @@
+Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: KMC
+Source: https://github.com/js21/kmc
+Files-Excluded: kmer_counter/libs/*
+
+Files: *
+Copyright: © 2012-2013 username <mail address>
+License: GPL-3
+ This package is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+ .
+ This package is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+ .
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>
+ .
+ On Debian systems, the complete text of the GNU General
+ Public License version 3 can be found in "/usr/share/common-licenses/GPL-3".
diff --git a/debian/kmc.install b/debian/kmc.install
new file mode 100644
index 0000000..efa6632
--- /dev/null
+++ b/debian/kmc.install
@@ -0,0 +1 @@
+bin/*
\ No newline at end of file
diff --git a/debian/kmc.manpages b/debian/kmc.manpages
new file mode 100644
index 0000000..bf519d7
--- /dev/null
+++ b/debian/kmc.manpages
@@ -0,0 +1 @@
+man/*.1
diff --git a/debian/patches/change-all-zlib-libz2-include-statements-to-native-debian-libs b/debian/patches/change-all-zlib-libz2-include-statements-to-native-debian-libs
new file mode 100644
index 0000000..0355fbe
--- /dev/null
+++ b/debian/patches/change-all-zlib-libz2-include-statements-to-native-debian-libs
@@ -0,0 +1,30 @@
+Description: zlib and libbz2 are distributed with the KMC.
+This patch alters the include statments for these libraries,
+using the debian libs instead.--- a/kmer_counter/fastq_reader.h
++++ b/kmer_counter/fastq_reader.h
+@@ -16,8 +16,8 @@
+ #include <stdio.h>
+ #include <iostream>
+
+-#include "libs/zlib.h"
+-#include "libs/bzlib.h"
++#include "zlib.h"
++#include "bzlib.h"
+
+
+ using namespace std;
+Index: b/kmer_counter/fastq_reader.h
+===================================================================
+--- a/kmer_counter/fastq_reader.h
++++ b/kmer_counter/fastq_reader.h
+@@ -16,8 +16,8 @@
+ #include <stdio.h>
+ #include <iostream>
+
+-#include "libs/zlib.h"
+-#include "libs/bzlib.h"
++#include "zlib.h"
++#include "bzlib.h"
+
+
+ using namespace std;
diff --git a/debian/patches/change-kmc-zlib-and-libz2-to-deb-zlib-and-libbz2 b/debian/patches/change-kmc-zlib-and-libz2-to-deb-zlib-and-libbz2
new file mode 100644
index 0000000..80091d2
--- /dev/null
+++ b/debian/patches/change-kmc-zlib-and-libz2-to-deb-zlib-and-libbz2
@@ -0,0 +1,18 @@
+Description: KMC distributes zlib and libbz2.
+This changes the usage of the distributed libs
+and makes KMC use the native debian libs
+Index: b/kmer_counter/fastq_reader.h
+===================================================================
+--- a/kmer_counter/fastq_reader.h
++++ b/kmer_counter/fastq_reader.h
+@@ -16,8 +16,8 @@
+ #include <stdio.h>
+ #include <iostream>
+
+-#include "libs/zlib.h"
+-#include "libs/bzlib.h"
++#include "zlib.h"
++#include "bzlib.h"
+
+
+ using namespace std;
diff --git a/debian/patches/debian-dependent-makefile.patch b/debian/patches/debian-dependent-makefile.patch
new file mode 100644
index 0000000..8bb32d4
--- /dev/null
+++ b/debian/patches/debian-dependent-makefile.patch
@@ -0,0 +1,36 @@
+Description: Removes all local dependencies from the makefile and
+establishes links to the respective debian packages
+Index: b/makefile
+===================================================================
+--- a/makefile
++++ b/makefile
+@@ -1,7 +1,7 @@
+ all: kmc
+-
+-BOOST_LIB = /boost/boost_1_55_0/stage/lib
+-BOOST_H = /boost/boost_1_55_0
++
++BOOST_LIB = /usr/lib/x86_64-linux-gnu
++BOOST_H = /usr/include/boost
+
+ KMC_BIN_DIR = bin
+ KMC_MAIN_DIR = kmer_counter
+@@ -13,15 +13,15 @@ CFLAGS = -Wall -O3 -m64 -static -fopenmp
+ CLINK = -lm -static -fopenmp -O3 -std=c++11
+
+ .cpp.o:
+- $(CC) $(CFLAGS) -c $< -o $@
++ $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@
+
+ kmc: $(KMC_MAIN_DIR)/kmer_counter.o $(KMC_MAIN_DIR)/mmer.o $(KMC_MAIN_DIR)/mem_disk_file.o $(KMC_MAIN_DIR)/rev_byte.o $(KMC_MAIN_DIR)/fastq_reader.o $(KMC_MAIN_DIR)/timer.o $(KMC_MAIN_DIR)/radix.o $(KMC_MAIN_DIR)/kb_completer.o $(KMC_MAIN_DIR)/kb_storer.o $(KMC_MAIN_DIR)/kmer.o
+ -mkdir -p $(KMC_BIN_DIR)
+- $(CC) $(CLINK) -o $(KMC_BIN_DIR)/$@ $(KMC_MAIN_DIR)/kmer_counter.o $(KMC_MAIN_DIR)/mem_disk_file.o $(KMC_MAIN_DIR)/rev_byte.o $(KMC_MAIN_DIR)/mmer.o $(KMC_MAIN_DIR)/fastq_reader.o $(KMC_MAIN_DIR)/timer.o $(KMC_MAIN_DIR)/radix.o $(KMC_MAIN_DIR)/kb_completer.o $(KMC_MAIN_DIR)/kb_storer.o $(KMC_MAIN_DIR)/kmer.o $(KMC_MAIN_DIR)/libs/alibelf64.a $(KMC_MAIN_DIR)/libs/libz.a $(KMC_MAIN_DIR)/libs/libbz2.a $(BOOST_LIB)/libboost_thread.a $(BOOST_LIB)/libboost_filesystem.a $(BOOST_LIB)/libboost_system.a
++ $(CC) $(CPPFLAGS) $(CLINK) -o $(KMC_BIN_DIR)/$@ $(KMC_MAIN_DIR)/kmer_counter.o $(KMC_MAIN_DIR)/mem_disk_file.o $(KMC_MAIN_DIR)/rev_byte.o $(KMC_MAIN_DIR)/mmer.o $(KMC_MAIN_DIR)/fastq_reader.o $(KMC_MAIN_DIR)/timer.o $(KMC_MAIN_DIR)/radix.o $(KMC_MAIN_DIR)/kb_completer.o $(KMC_MAIN_DIR)/kb_storer.o $(KMC_MAIN_DIR)/kmer.o $(KMC_MAIN_DIR)/libs/alibelf64.a $(KMC_MAIN_DIR)/libs/libz.a $(KMC_MAIN_DIR)/libs/libbz2.a $(BOOST_LIB)/libboost_thread.a $(BOOST_LIB)/libboost_filesystem.a $(BOOST_LIB [...]
+
+ kmc_dump: $(KMC_DUMP_DIR)/nc_utils.o $(KMC_API_DIR)/mmer.o $(KMC_DUMP_DIR)/kmc_dump.o $(KMC_API_DIR)/kmc_file.o $(KMC_API_DIR)/kmer_api.o
+ -mkdir -p $(KMC_BIN_DIR)
+- $(CC) $(CLINK) -o $(KMC_BIN_DIR)/$@ $(KMC_DUMP_DIR)/nc_utils.o $(KMC_API_DIR)/mmer.o $(KMC_DUMP_DIR)/kmc_dump.o $(KMC_API_DIR)/kmc_file.o $(KMC_API_DIR)/kmer_api.o
++ $(CC) $(CPPFLAGS) $(CLINK) -o $(KMC_BIN_DIR)/$@ $(KMC_DUMP_DIR)/nc_utils.o $(KMC_API_DIR)/mmer.o $(KMC_DUMP_DIR)/kmc_dump.o $(KMC_API_DIR)/kmc_file.o $(KMC_API_DIR)/kmer_api.o $(LDFLAGS)
+
+ clean:
+ -rm $(KMC_MAIN_DIR)/*.o
diff --git a/debian/patches/remove-all-zlib-libz2-include-statements b/debian/patches/remove-all-zlib-libz2-include-statements
new file mode 100644
index 0000000..019c8b3
--- /dev/null
+++ b/debian/patches/remove-all-zlib-libz2-include-statements
@@ -0,0 +1,15 @@
+Index: b/kmer_counter/fastq_reader.h
+===================================================================
+--- a/kmer_counter/fastq_reader.h
++++ b/kmer_counter/fastq_reader.h
+@@ -16,8 +16,8 @@
+ #include <stdio.h>
+ #include <iostream>
+
+-#include "libs/zlib.h"
+-#include "libs/bzlib.h"
++#include "zlib.h"
++#include "bzlib.h"
+
+
+ using namespace std;
diff --git a/debian/patches/remove-call-to-asmlib-sub-in-kbsorter b/debian/patches/remove-call-to-asmlib-sub-in-kbsorter
new file mode 100644
index 0000000..b085a86
--- /dev/null
+++ b/debian/patches/remove-call-to-asmlib-sub-in-kbsorter
@@ -0,0 +1,16 @@
+Description: Removes a call to set memory cache.
+asmlib subroutineIndex: kmc/kmer_counter/kb_sorter.h
+===================================================================
+Index: b/kmer_counter/kb_sorter.h
+===================================================================
+--- a/kmer_counter/kb_sorter.h
++++ b/kmer_counter/kb_sorter.h
+@@ -247,8 +247,6 @@ template <typename KMER_T, unsigned SIZE
+ uint64 tmp_n_rec;
+ CMemDiskFile *file;
+
+- SetMemcpyCacheLimit(8);
+-
+ // Process bins
+ while (!bq->completed())
+ {
diff --git a/debian/patches/remove-embedded-libraries-from-makefile b/debian/patches/remove-embedded-libraries-from-makefile
new file mode 100644
index 0000000..e39ec14
--- /dev/null
+++ b/debian/patches/remove-embedded-libraries-from-makefile
@@ -0,0 +1,26 @@
+Description: Removes the calls to the distributed
+libraries
+===================================================================
+Index: b/makefile
+===================================================================
+--- a/makefile
++++ b/makefile
+@@ -9,15 +9,15 @@ KMC_API_DIR = kmc_api
+ KMC_DUMP_DIR = kmc_dump
+
+ CC = g++
+-CFLAGS = -Wall -O3 -m64 -static -fopenmp -std=c++11 -I $(BOOST_H)
+-CLINK = -lm -static -fopenmp -O3 -std=c++11
++CFLAGS = -Wall -O3 -m64 -fopenmp -std=c++11 -I $(BOOST_H)
++CLINK = -lm -fopenmp -O3 -std=c++11
+
+ .cpp.o:
+ $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@
+
+ kmc: $(KMC_MAIN_DIR)/kmer_counter.o $(KMC_MAIN_DIR)/mmer.o $(KMC_MAIN_DIR)/mem_disk_file.o $(KMC_MAIN_DIR)/rev_byte.o $(KMC_MAIN_DIR)/fastq_reader.o $(KMC_MAIN_DIR)/timer.o $(KMC_MAIN_DIR)/radix.o $(KMC_MAIN_DIR)/kb_completer.o $(KMC_MAIN_DIR)/kb_storer.o $(KMC_MAIN_DIR)/kmer.o
+ -mkdir -p $(KMC_BIN_DIR)
+- $(CC) $(CPPFLAGS) $(CLINK) -o $(KMC_BIN_DIR)/$@ $(KMC_MAIN_DIR)/kmer_counter.o $(KMC_MAIN_DIR)/mem_disk_file.o $(KMC_MAIN_DIR)/rev_byte.o $(KMC_MAIN_DIR)/mmer.o $(KMC_MAIN_DIR)/fastq_reader.o $(KMC_MAIN_DIR)/timer.o $(KMC_MAIN_DIR)/radix.o $(KMC_MAIN_DIR)/kb_completer.o $(KMC_MAIN_DIR)/kb_storer.o $(KMC_MAIN_DIR)/kmer.o $(KMC_MAIN_DIR)/libs/alibelf64.a $(KMC_MAIN_DIR)/libs/libz.a $(KMC_MAIN_DIR)/libs/libbz2.a $(BOOST_LIB)/libboost_thread.a $(BOOST_LIB)/libboost_filesystem.a $(BOOST_LIB [...]
++ $(CC) $(CPPFLAGS) $(CLINK) -o $(KMC_BIN_DIR)/$@ $(KMC_MAIN_DIR)/kmer_counter.o $(KMC_MAIN_DIR)/mem_disk_file.o $(KMC_MAIN_DIR)/rev_byte.o $(KMC_MAIN_DIR)/mmer.o $(KMC_MAIN_DIR)/fastq_reader.o $(KMC_MAIN_DIR)/timer.o $(KMC_MAIN_DIR)/radix.o $(KMC_MAIN_DIR)/kb_completer.o $(KMC_MAIN_DIR)/kb_storer.o $(KMC_MAIN_DIR)/kmer.o $(BOOST_LIB)/libboost_thread.a $(BOOST_LIB)/libboost_filesystem.a $(BOOST_LIB)/libboost_system.a $(LDFLAGS) -lz -lbz2
+
+ kmc_dump: $(KMC_DUMP_DIR)/nc_utils.o $(KMC_API_DIR)/mmer.o $(KMC_DUMP_DIR)/kmc_dump.o $(KMC_API_DIR)/kmc_file.o $(KMC_API_DIR)/kmer_api.o
+ -mkdir -p $(KMC_BIN_DIR)
diff --git a/debian/patches/remove-gz-and-bz-flags-from-compilation b/debian/patches/remove-gz-and-bz-flags-from-compilation
new file mode 100644
index 0000000..86df646
--- /dev/null
+++ b/debian/patches/remove-gz-and-bz-flags-from-compilation
@@ -0,0 +1,13 @@
+Description: Removing -lz and -lbz2 from compilation command.Index: kmc/makefile
+===================================================================
+--- kmc.orig/makefile
++++ kmc/makefile
+@@ -17,7 +17,7 @@ CLINK = -lm -static -fopenmp -O3 -std=c+
+
+ kmc: $(KMC_MAIN_DIR)/kmer_counter.o $(KMC_MAIN_DIR)/mmer.o $(KMC_MAIN_DIR)/mem_disk_file.o $(KMC_MAIN_DIR)/rev_byte.o $(KMC_MAIN_DIR)/fastq_reader.o $(KMC_MAIN_DIR)/timer.o $(KMC_MAIN_DIR)/radix.o $(KMC_MAIN_DIR)/kb_completer.o $(KMC_MAIN_DIR)/kb_storer.o $(KMC_MAIN_DIR)/kmer.o
+ -mkdir -p $(KMC_BIN_DIR)
+- $(CC) $(CPPFLAGS) $(CLINK) -o $(KMC_BIN_DIR)/$@ $(KMC_MAIN_DIR)/kmer_counter.o $(KMC_MAIN_DIR)/mem_disk_file.o $(KMC_MAIN_DIR)/rev_byte.o $(KMC_MAIN_DIR)/mmer.o $(KMC_MAIN_DIR)/fastq_reader.o $(KMC_MAIN_DIR)/timer.o $(KMC_MAIN_DIR)/radix.o $(KMC_MAIN_DIR)/kb_completer.o $(KMC_MAIN_DIR)/kb_storer.o $(KMC_MAIN_DIR)/kmer.o $(BOOST_LIB)/libboost_thread.a $(BOOST_LIB)/libboost_filesystem.a $(BOOST_LIB)/libboost_system.a $(LDFLAGS) -lz -lbz2
++ $(CC) $(CPPFLAGS) $(CLINK) -o $(KMC_BIN_DIR)/$@ $(KMC_MAIN_DIR)/kmer_counter.o $(KMC_MAIN_DIR)/mem_disk_file.o $(KMC_MAIN_DIR)/rev_byte.o $(KMC_MAIN_DIR)/mmer.o $(KMC_MAIN_DIR)/fastq_reader.o $(KMC_MAIN_DIR)/timer.o $(KMC_MAIN_DIR)/radix.o $(KMC_MAIN_DIR)/kb_completer.o $(KMC_MAIN_DIR)/kb_storer.o $(KMC_MAIN_DIR)/kmer.o $(BOOST_LIB)/libboost_thread.a $(BOOST_LIB)/libboost_filesystem.a $(BOOST_LIB)/libboost_system.a $(LDFLAGS)
+
+ kmc_dump: $(KMC_DUMP_DIR)/nc_utils.o $(KMC_API_DIR)/mmer.o $(KMC_DUMP_DIR)/kmc_dump.o $(KMC_API_DIR)/kmc_file.o $(KMC_API_DIR)/kmer_api.o
+ -mkdir -p $(KMC_BIN_DIR)
diff --git a/debian/patches/remove-references-to-asmlib b/debian/patches/remove-references-to-asmlib
new file mode 100644
index 0000000..9ae0eac
--- /dev/null
+++ b/debian/patches/remove-references-to-asmlib
@@ -0,0 +1,44 @@
+Description: Removes all - include "libs/asmlib.h".
+This is a customised subroutine optimisaiton library
+that gives KMC a speed performance increase of ~0.66%
+when compared ot the native library.
+A decision was made togehter with upstream to remove
+this dependency for the Debian package.
+
+
+Index: b/kmer_counter/kmc.h
+===================================================================
+--- a/kmer_counter/kmc.h
++++ b/kmer_counter/kmc.h
+@@ -29,7 +29,6 @@
+ #include "kb_storer.h"
+ #include "s_mapper.h"
+ #include "splitter.h"
+-#include "libs/asmlib.h"
+ #include <boost/filesystem.hpp>
+
+ #ifdef DEVELOP_MODE
+Index: b/kmer_counter/mem_disk_file.cpp
+===================================================================
+--- a/kmer_counter/mem_disk_file.cpp
++++ b/kmer_counter/mem_disk_file.cpp
+@@ -10,7 +10,6 @@
+ */
+
+ #include "mem_disk_file.h"
+-#include "libs/asmlib.h"
+
+ //----------------------------------------------------------------------------------
+ // Constructor
+Index: b/kmer_counter/radix.h
+===================================================================
+--- a/kmer_counter/radix.h
++++ b/kmer_counter/radix.h
+@@ -16,7 +16,6 @@
+ #include <iostream>
+ #include <omp.h>
+ #include <algorithm>
+-#include "libs/asmlib.h"
+ #include "defs.h"
+ #include "queues.h"
+ #include <boost/static_assert.hpp>
diff --git a/debian/patches/remove-use-of-asmlib-subroutines.patch b/debian/patches/remove-use-of-asmlib-subroutines.patch
new file mode 100644
index 0000000..22a89fa
--- /dev/null
+++ b/debian/patches/remove-use-of-asmlib-subroutines.patch
@@ -0,0 +1,141 @@
+Description: Removes all usage of the "A_mem" subroutines.
+The A_mem type of calls are defined in the asmlib library.
+
+Index: b/kmer_counter/defs.h
+===================================================================
+--- a/kmer_counter/defs.h
++++ b/kmer_counter/defs.h
+@@ -73,8 +73,6 @@ typedef float count_t;
+ #define KMER_WORDS ((MAX_K + 31) / 32)
+
+ #ifdef _DEBUG
+-#define A_memcpy memcpy
+-#define A_memset memset
+ #endif
+
+
+Index: b/kmer_counter/kb_sorter.h
+===================================================================
+--- a/kmer_counter/kb_sorter.h
++++ b/kmer_counter/kb_sorter.h
+@@ -406,7 +406,7 @@ template <unsigned SIZE> void CKmerBinSo
+ template <unsigned SIZE> void CKmerBinSorter_Impl<CKmer<SIZE>, SIZE>::FromChildThread(CKmerBinSorter<CKmer<SIZE>, SIZE>& ptr, CKmer<SIZE>* thread_buffer, uint64 size)
+ {
+ lock_guard<mutex> lcx(ptr.expander_mtx);
+- A_memcpy(ptr.buffer_input + ptr.input_pos, thread_buffer, size * sizeof(CKmer<SIZE>));
++ memcpy(ptr.buffer_input + ptr.input_pos, thread_buffer, size * sizeof(CKmer<SIZE>));
+ ptr.input_pos += size;
+ }
+
+@@ -1359,4 +1359,4 @@ template <typename KMER_T, unsigned SIZE
+
+ #endif
+
+-// ***** EOF
+\ No newline at end of file
++// ***** EOF
+Index: b/kmer_counter/kb_storer.cpp
+===================================================================
+--- a/kmer_counter/kb_storer.cpp
++++ b/kmer_counter/kb_storer.cpp
+@@ -156,7 +156,7 @@ void CKmerBinStorer::PutBinToTmpFile(uin
+ {
+ buf = get<0>(*p);
+ size = get<1>(*p);
+- A_memcpy(tmp_buff + tmp_buff_pos, buf, size);
++ memcpy(tmp_buff + tmp_buff_pos, buf, size);
+ tmp_buff_pos += size;
+ pmm_bins->free(buf);
+ }
+Index: b/kmer_counter/mem_disk_file.cpp
+===================================================================
+--- a/kmer_counter/mem_disk_file.cpp
++++ b/kmer_counter/mem_disk_file.cpp
+@@ -78,7 +78,7 @@ size_t CMemDiskFile::Read(uchar * ptr, s
+ uint64 pos = 0;
+ for(auto& p : container)
+ {
+- A_memcpy(ptr + pos, p.first, p.second);
++ memcpy(ptr + pos, p.first, p.second);
+ pos += p.second;
+ delete[] p.first;
+ }
+@@ -97,7 +97,7 @@ size_t CMemDiskFile::Write(const uchar *
+ if(memory_mode)
+ {
+ uchar *buf = new uchar[size * count];
+- A_memcpy(buf, ptr, size * count);
++ memcpy(buf, ptr, size * count);
+ container.push_back(make_pair(buf, size * count));
+ return size * count;
+ }
+Index: b/kmer_counter/radix.cpp
+===================================================================
+--- a/kmer_counter/radix.cpp
++++ b/kmer_counter/radix.cpp
+@@ -72,7 +72,7 @@ void RadixOMP_uint8(uint32 *SourcePtr, u
+
+ ++privateByteCounter[byteValue];
+ }
+- A_memcpy(&ByteCounter[myID][0], privateByteCounter, sizeof(privateByteCounter));
++ memcpy(&ByteCounter[myID][0], privateByteCounter, sizeof(privateByteCounter));
+
+ #pragma omp barrier
+
+@@ -104,7 +104,7 @@ void RadixOMP_uint8(uint32 *SourcePtr, u
+ for (private_i = 0; private_i < 256; private_i++)
+ ByteCounter[myID][private_i] += globalHisto[private_i];
+
+- A_memcpy(privateByteCounter, &ByteCounter[myID][0], sizeof(privateByteCounter));
++ memcpy(privateByteCounter, &ByteCounter[myID][0], sizeof(privateByteCounter));
+
+ #pragma omp for schedule(static)
+ for(i = data_offset; i < SourceSize_in_bytes; i = i + rec_size)
+@@ -199,7 +199,7 @@ template<typename COUNTER_TYPE, typename
+ byteValue = *(reinterpret_cast<const uint8_t*>(&tempSource[i]) + ByteIndex);
+ ++privateByteCounter[byteValue];
+ }
+- A_memcpy(&ByteCounter[myID][0], privateByteCounter, sizeof(privateByteCounter));
++ memcpy(&ByteCounter[myID][0], privateByteCounter, sizeof(privateByteCounter));
+
+ #pragma omp barrier
+
+@@ -230,7 +230,7 @@ template<typename COUNTER_TYPE, typename
+ for (private_i = 0; private_i < 256; private_i++)
+ ByteCounter[myID][private_i] += globalHisto[private_i];
+
+- A_memcpy(privateByteCounter, &ByteCounter[myID][0], sizeof(privateByteCounter));
++ memcpy(privateByteCounter, &ByteCounter[myID][0], sizeof(privateByteCounter));
+
+
+ #pragma omp for schedule(static)
+@@ -245,7 +245,7 @@ template<typename COUNTER_TYPE, typename
+ privateByteCounter[byteValue]++;
+
+ if(index_x == (BUFFER_WIDTH -1))
+- A_memcpy ( &tempDest[privateByteCounter[byteValue] - (BUFFER_WIDTH)], &Buffer[byteValue * BUFFER_WIDTH], BUFFER_WIDTH *sizeof(uint64) );
++ memcpy ( &tempDest[privateByteCounter[byteValue] - (BUFFER_WIDTH)], &Buffer[byteValue * BUFFER_WIDTH], BUFFER_WIDTH *sizeof(uint64) );
+ } //end_for
+
+ INT_TYPE elemInBuffer;
+@@ -265,7 +265,7 @@ template<typename COUNTER_TYPE, typename
+ elemInBuffer = index_stop - index_start;
+
+ if(elemInBuffer != 0)
+- A_memcpy ( &tempDest[privateByteCounter[private_i] - elemInBuffer], &Buffer[private_i * BUFFER_WIDTH + (privateByteCounter[private_i] - elemInBuffer)%BUFFER_WIDTH], (elemInBuffer)*sizeof(uint64) );
++ memcpy ( &tempDest[privateByteCounter[private_i] - elemInBuffer], &Buffer[private_i * BUFFER_WIDTH + (privateByteCounter[private_i] - elemInBuffer)%BUFFER_WIDTH], (elemInBuffer)*sizeof(uint64) );
+
+ }
+ #pragma omp barrier
+Index: b/kmer_counter/kmc.h
+===================================================================
+--- a/kmer_counter/kmc.h
++++ b/kmer_counter/kmc.h
+@@ -164,7 +164,6 @@ template <typename KMER_T, unsigned SIZE
+
+ initialized = true;
+
+- SetMemcpyCacheLimit(8); // Sets the asmlib's memcpy function to make copy without use of cache memory
+ }
+
+ //----------------------------------------------------------------------------------
diff --git a/debian/patches/removing-static-option-in-makefile b/debian/patches/removing-static-option-in-makefile
new file mode 100644
index 0000000..4415469
--- /dev/null
+++ b/debian/patches/removing-static-option-in-makefile
@@ -0,0 +1 @@
+Description: Removes the static compilation option
\ No newline at end of file
diff --git a/debian/patches/series b/debian/patches/series
new file mode 100644
index 0000000..b081585
--- /dev/null
+++ b/debian/patches/series
@@ -0,0 +1,6 @@
+debian-dependent-makefile.patch
+remove-references-to-asmlib
+remove-use-of-asmlib-subroutines.patch
+change-kmc-zlib-and-libz2-to-deb-zlib-and-libbz2
+remove-call-to-asmlib-sub-in-kbsorter
+remove-embedded-libraries-from-makefile
diff --git a/debian/rules b/debian/rules
new file mode 100755
index 0000000..ad78fa4
--- /dev/null
+++ b/debian/rules
@@ -0,0 +1,25 @@
+#!/usr/bin/make -f
+
+export DH_VERBOSE := 1
+DEBVERS := $(shell dpkg-parsechangelog | sed -n -e 's/^Version: //p')
+VERSION := $(shell echo '$(DEBVERS)' | sed -e 's/^[[:digit:]]*://' -e 's/[~-].*//')
+OVERSION := $(shell echo '$(DEBVERS)' | sed -e 's/^[[:digit:]]*://' -e 's/[+].*//')
+
+
+mandir := $(CURDIR)/debian/man
+debfolder := $(CURDIR)/debian
+
+%:
+ dh $@
+
+override_dh_install:
+ dh_install
+
+override_dh_installman:
+ mkdir -p $(mandir)
+ $(debfolder)/usage_to_man
+ dh_installman --
+
+get-orig-source:
+ uscan --verbose --force-download --repack --compression gz --destdir=..
+ mv ../kmc_$(OVERSION).orig.tar.gz ../kmc_$(VERSION).orig.tar.gz
diff --git a/debian/source/format b/debian/source/format
new file mode 100644
index 0000000..46ebe02
--- /dev/null
+++ b/debian/source/format
@@ -0,0 +1 @@
+3.0 (quilt)
\ No newline at end of file
diff --git a/debian/upstream/metadata b/debian/upstream/metadata
new file mode 100644
index 0000000..d8b5812
--- /dev/null
+++ b/debian/upstream/metadata
@@ -0,0 +1,12 @@
+Reference:
+ Author:
+ Title:
+ Journal:
+ Year:
+ Volume:
+ Number:
+ Pages:
+ DOI:
+ PMID:
+ URL:
+ eprint:
diff --git a/debian/usage_to_man b/debian/usage_to_man
new file mode 100755
index 0000000..e69a4bb
--- /dev/null
+++ b/debian/usage_to_man
@@ -0,0 +1,140 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+use Data::Dumper;
+
+#Converts Fastaq python scripts usage into man pages.
+#The man pages are placed in the man folder of the main Fastaq directory
+
+createManPages();
+
+sub createManPages {
+
+ my $source= 'bin';
+ my $destination= '.';
+ my $app_name = 'KMC';
+
+
+ unless ( -d $destination ) {
+ system(mkdir $destination);
+ }
+
+ my @files;
+
+ push(@files,`ls $source/kmc*`);
+
+ if ( scalar @files > 0 ) {
+
+ print "Creating manpages\n";
+ for my $file ( @files ) {
+ $file =~ s/\n$//;
+
+ my $filename = $file;
+ $filename =~ s/$source\///;
+
+ my $uc_filename = uc($filename);
+ my $man_file = $filename;
+
+ $man_file = $destination . '/' . $man_file . '.1';
+
+ my $kmc_short_description = 'Count kmers in genomic sequences';
+ my $kmc_long_description = 'KMC—K-mer Counter is a utility designed for counting k-mers (sequences of consecutive k symbols) in a set of reads from genome sequencing projects.';
+
+
+ my $cmd = "help2man -N -m $filename -n $filename --no-discard-stderr $file | sed 's/usage://gi'";
+ my @output;
+ push(@output, `$cmd`);
+
+ for my $line (@output) {
+ $line =~ s/\n$//;
+
+ }
+ my $ss_param_seen = 0;
+ my $ss_param_seen2 = 1;
+ my $ip_tag_seen = 0;
+ my @lines_not_to_print;
+ my @lines_to_print;
+
+ for (my $i = 0; $i < scalar @output; $i++) {
+
+
+ my $output_line = $output[$i];
+
+ $output_line =~ s/^(\.TH\s)([a-zA-Z0-9-]+)(\s.*)/$1 $filename $3/;
+ $output_line =~ s/^(K\-Mer \\- )(kmc)/$2 - $kmc_short_description/;
+ if ($output_line =~ m/^K\\-Mer Counter/) {
+ $output_line = $kmc_long_description;
+ }
+
+ if ($output_line =~ m/^\.SS \"Parameters\:\"/ && ! $ss_param_seen) {
+ push(@lines_not_to_print, $i);
+ push(@lines_not_to_print, $i + 1);
+ push(@lines_not_to_print, $i + 2);
+ push(@lines_not_to_print, $i + 3);
+
+ $ss_param_seen = 1;
+ $ss_param_seen2 = 0;
+
+ }
+ elsif ($output_line =~ m/^\.IP/ && ! $ip_tag_seen) {
+ $output_line = '.SH SYNOPSIS';
+ if ($output[$i + 2] =~ /^kmc/) {
+ $output[$i +2] = ".PP\n" . $output[$i + 2];
+ }
+ $ip_tag_seen = 1;
+ }
+ elsif ($output_line =~ m/^\.SS ""/) {
+ push(@lines_not_to_print, $i);
+ push(@lines_not_to_print, $i + 1);
+ push(@lines_not_to_print, $i + 2);
+ push(@lines_not_to_print, $i + 3);
+ }
+ elsif ($output_line =~ m/^Example:/) {
+ $output_line = '.SH EXAMPLES';
+ $output[$i + 2] = ".PP\n" . $output[$i + 2];
+
+ }
+ elsif ($output_line =~ m/^\.SS \"Parameters\:\"/ && $ss_param_seen && ! $ss_param_seen2) {
+ last;
+
+ }
+
+ if ( ! grep( /^$i$/, @lines_not_to_print ) ){
+ #print "$output_line\n";
+ push(@lines_to_print, $output_line);
+ }
+
+
+ }
+ print Dumper(\@lines_not_to_print);
+ open (my $man_fh, ">", $man_file);
+ for my $line(@lines_to_print) {
+ print $man_fh "$line\n";
+ }
+ close($man_fh);
+ print "Manpage creation complete\n";
+ }
+ }
+}
+
+sub writeAuthorAndCopyright {
+
+ my ($man_fh,$filename) = @_;
+
+ my $author_blurb = <<END_OF_AUTHOR_BLURB;
+.SH "AUTHOR"
+.sp
+$filename was originally written by Martin Hunt (mh12\@sanger.ac.uk)
+END_OF_AUTHOR_BLURB
+
+ print $man_fh "$author_blurb\n";
+
+ my $copyright_blurb = <<'END_OF_C_BLURB';
+.SH "COPYING"
+.sp
+Wellcome Trust Sanger Institute Copyright \(co 2013 Wellcome Trust Sanger Institute This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version\&.
+END_OF_C_BLURB
+
+ print $man_fh "$copyright_blurb\n";
+
+}
diff --git a/debian/watch b/debian/watch
new file mode 100644
index 0000000..bf30b45
--- /dev/null
+++ b/debian/watch
@@ -0,0 +1,5 @@
+version=3
+
+opts=dversionmangle=s/[~\+]dfsg[0-9]*// \
+ https://github.com/js21/kmc/releases .*/archive/v(\d[\d.-]+)\.(?:tar(?:\.gz|\.bz2)?|tgz)
+
diff --git a/kmc_api/kmc_file.cpp b/kmc_api/kmc_file.cpp
new file mode 100755
index 0000000..de2cd9b
--- /dev/null
+++ b/kmc_api/kmc_file.cpp
@@ -0,0 +1,667 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#include "stdafx.h"
+#include "mmer.h"
+#include "kmc_file.h"
+#include <iostream>
+
+
+uint64 CKMCFile::part_size = 1 << 25;
+
+// ----------------------------------------------------------------------------------
+// Open files *.kmc_pre & *.kmc_suf, read them to RAM, close files.
+// The file *.kmc_suf is opened for random access
+// IN : file_name - the name of kmer_counter's output
+// RET : true - if successful
+// ----------------------------------------------------------------------------------
+bool CKMCFile::OpenForRA(const std::string &file_name)
+{
+ uint64 size;
+ size_t result;
+
+ if(file_pre || file_suf)
+ return false;
+
+ if(!OpenASingleFile(file_name + ".kmc_pre", file_pre, size, (char *)"KMCP"))
+ return false;
+
+ ReadParamsFrom_prefix_file_buf(size);
+
+ fclose(file_pre);
+ file_pre = NULL;
+
+ if(!OpenASingleFile(file_name + ".kmc_suf", file_suf, size, (char *)"KMCS"))
+ return false;
+
+ sufix_file_buf = new uchar[size];
+ result = fread (sufix_file_buf, 1, size, file_suf);
+ if(result == 0)
+ return false;
+
+ fclose(file_suf);
+ file_suf = NULL;
+
+ is_opened = opened_for_RA;
+ prefix_index = 0;
+ sufix_number = 0;
+ return true;
+}
+
+//----------------------------------------------------------------------------------
+// Open files *kmc_pre & *.kmc_suf, read *.kmc_pre to RAM, close *kmc.pre
+// *.kmc_suf is buffered
+// IN : file_name - the name of kmer_counter's output
+// RET : true - if successful
+//----------------------------------------------------------------------------------
+bool CKMCFile::OpenForListing(const std::string &file_name)
+{
+ uint64 size;
+ size_t result;
+
+ if(is_opened)
+ return false;
+
+ if(file_pre || file_suf)
+ return false;
+
+ if(!OpenASingleFile(file_name + ".kmc_pre", file_pre, size, (char *)"KMCP"))
+ return false;
+
+ ReadParamsFrom_prefix_file_buf(size);
+ fclose(file_pre);
+ file_pre = NULL;
+
+ end_of_file = total_kmers == 0;
+
+ if(!OpenASingleFile(file_name + ".kmc_suf", file_suf, size, (char *)"KMCS"))
+ return false;
+
+ sufix_file_buf = new uchar[part_size];
+ result = fread (sufix_file_buf, 1, part_size, file_suf);
+ if(result == 0)
+ return false;
+
+ is_opened = opened_for_listing;
+ prefix_index = 0;
+ sufix_number = 0;
+ index_in_partial_buf = 0;
+ return true;
+}
+//----------------------------------------------------------------------------------
+CKMCFile::CKMCFile()
+{
+ file_pre = NULL;
+ file_suf = NULL;
+
+ prefix_file_buf = NULL;
+ sufix_file_buf = NULL;
+ signature_map = NULL;
+
+ is_opened = closed;
+ end_of_file = false;
+};
+//----------------------------------------------------------------------------------
+CKMCFile::~CKMCFile()
+{
+ if(file_pre)
+ fclose(file_pre);
+ if(file_suf)
+ fclose(file_suf);
+ if(prefix_file_buf)
+ delete [] prefix_file_buf;
+ if(sufix_file_buf)
+ delete [] sufix_file_buf;
+ if (signature_map)
+ delete[] signature_map;
+};
+//----------------------------------------------------------------------------------
+// Open a file, recognize its size and check its marker. Auxiliary function.
+// IN : file_name - the name of a file to open
+// RET : true - if successful
+//----------------------------------------------------------------------------------
+bool CKMCFile::OpenASingleFile(const std::string &file_name, FILE *&file_handler, uint64 &size, char marker[])
+{
+ char _marker[4];
+ size_t result;
+
+ if((file_handler = my_fopen(file_name.c_str(), "rb")) == NULL)
+ return false;
+
+ my_fseek(file_handler, 0, SEEK_END);
+ size = my_ftell(file_handler); //the size of a whole file
+
+ my_fseek(file_handler, -4, SEEK_CUR);
+ result = fread (_marker, 1, 4, file_handler);
+ if(result == 0)
+ return false;
+
+ size = size - 4; //the size of the file without the terminal marker
+ if (strncmp (marker, _marker, 4) != 0)
+ {
+ fclose(file_handler);
+ file_handler = NULL;
+ return false;
+ }
+
+ rewind (file_handler);
+ result = fread (_marker, 1, 4, file_handler);
+ if(result == 0)
+ return false;
+
+ size = size - 4; //the size of the file without initial and terminal markers
+
+ if (strncmp (marker, _marker, 4) != 0)
+ {
+ fclose(file_handler);
+ file_handler = NULL;
+ return false;
+ }
+
+ return true;
+};
+//-------------------------------------------------------------------------------------
+// Recognize current parameters from kmc_databese. Auxiliary function.
+// IN : the size of the file *.kmc_pre, without initial and terminal markers
+// RET : true - if succesfull
+//----------------------------------------------------------------------------------
+bool CKMCFile::ReadParamsFrom_prefix_file_buf(uint64 &size)
+{
+ size_t result;
+
+ my_fseek(file_pre, -8, SEEK_END);
+
+ int64 header_offset;
+ header_offset = fgetc(file_pre);
+
+ size = size - 4; //file size without the size of header_offset (and without 2 markers)
+
+ my_fseek(file_pre, (0LL - (header_offset + 8)), SEEK_END);
+ result = fread(&kmer_length, 1, sizeof(uint32), file_pre);
+ result = fread(&mode, 1, sizeof(uint32), file_pre);
+ result = fread(&counter_size, 1, sizeof(uint32), file_pre);
+ result = fread(&lut_prefix_length, 1, sizeof(uint32), file_pre);
+ result = fread(&signature_len, 1, sizeof(uint32), file_pre);
+ result = fread(&min_count, 1, sizeof(uint32), file_pre);
+ original_min_count = min_count;
+ result = fread(&max_count, 1, sizeof(uint32), file_pre);
+ original_max_count = max_count;
+ result = fread(&total_kmers, 1, sizeof(uint64), file_pre);
+
+ signature_map_size = ((1 << (2 * signature_len)) + 1);
+ uint64 lut_area_size_in_bytes = size - (signature_map_size * sizeof(uint32) + header_offset + 8);
+ single_LUT_size = 1 << (2 * lut_prefix_length);
+ uint64 last_data_index = lut_area_size_in_bytes / sizeof(uint64);
+
+ rewind(file_pre);
+ my_fseek(file_pre, +4, SEEK_CUR);
+ prefix_file_buf_size = (lut_area_size_in_bytes + 8) / sizeof(uint64); //reads without 4 bytes of a header_offset (and without markers)
+ prefix_file_buf = new uint64[prefix_file_buf_size];
+ result = fread(prefix_file_buf, 1, (size_t)(lut_area_size_in_bytes + 8), file_pre);
+ if (result == 0)
+ return false;
+ prefix_file_buf[last_data_index] = total_kmers + 1;
+
+ signature_map = new uint32[signature_map_size];
+ result = fread(signature_map, 1, signature_map_size * sizeof(uint32), file_pre);
+ if (result == 0)
+ return false;
+
+ sufix_size = (kmer_length - lut_prefix_length) / 4;
+
+ sufix_rec_size = sufix_size + counter_size;
+
+ return true;
+}
+//------------------------------------------------------------------------------------------
+// Check if kmer exists.
+// IN : kmer - kmer
+// OUT: count - kmer's counter if kmer exists
+// RET: true - if kmer exists
+//------------------------------------------------------------------------------------------
+bool CKMCFile::CheckKmer(CKmerAPI &kmer, float &count)
+{
+ if(is_opened != opened_for_RA)
+ return false;
+ if(end_of_file)
+ return false;
+
+ uint32 signature = kmer.get_signature(signature_len);
+
+ uint32 bin_start_pos = signature_map[signature];
+ bin_start_pos *= single_LUT_size;
+
+ //recognize a prefix:
+ uint64 pattern_prefix_value = kmer.kmer_data[0];
+
+ uint32 pattern_offset = (sizeof(pattern_prefix_value) * 8) - (lut_prefix_length * 2) - (kmer.byte_alignment * 2);
+
+ pattern_prefix_value = pattern_prefix_value >> pattern_offset; //complements with 0
+ if(pattern_prefix_value >= prefix_file_buf_size)
+ return false;
+ //look into the array with data
+
+ int64 index_start = *(prefix_file_buf + bin_start_pos + pattern_prefix_value);
+ int64 index_stop = *(prefix_file_buf + bin_start_pos + pattern_prefix_value + 1) - 1;
+
+ uchar *sufix_byte_ptr;
+ uint64 sufix = 0;
+
+ //sufix_offset is always 56
+ uint32 sufix_offset = 56; // the ofset of a sufix is for shifting the sufix towards MSB, to compare the sufix with a pattern
+ // Bytes of a pattern to search are always shifted towards MSB
+
+ uint32 row_index = 0; // the number of a current row in an array kmer_data
+
+ bool found = false;
+
+ //binary search:
+
+ while (index_start <= index_stop)
+ {
+ int64 mid_index = (index_start + index_stop) / 2;
+ sufix_byte_ptr = &sufix_file_buf[mid_index * sufix_rec_size];
+
+ uint64 pattern = 0;
+
+ pattern_offset = (lut_prefix_length + kmer.byte_alignment ) * 2;
+
+ for(uint32 a = 0; a < sufix_size; a ++) //check byte by byte
+ {
+ pattern = kmer.kmer_data[row_index];
+ pattern = pattern << pattern_offset;
+ pattern = pattern & 0xff00000000000000;
+
+ sufix = sufix_byte_ptr[a];
+ sufix = sufix << sufix_offset;
+
+ if(pattern != sufix)
+ break;
+
+ pattern_offset += 8;
+
+ if (pattern_offset == 64) //the end of a word
+ {
+ pattern_offset = 0;
+ row_index++;
+ }
+ }
+
+ if(pattern == sufix)
+ {
+ found = true;
+ break;
+ }
+ if( sufix < pattern )
+ index_start = mid_index + 1;
+ else
+ index_stop = mid_index - 1;
+ }
+
+ if(found)
+ {
+ sufix_byte_ptr += sufix_size;
+ uint32 int_counter;
+
+ int_counter = *sufix_byte_ptr;
+
+ for(uint32 b = 1; b < counter_size; b ++)
+ {
+ uint32 aux = 0x000000ff & *(sufix_byte_ptr + b);
+
+ aux = aux << 8 * ( b);
+ int_counter = aux | int_counter;
+ }
+
+ if(mode == 0)
+ count = (float)int_counter;
+ else
+ memcpy(&count, &int_counter, counter_size);
+
+ if((count >= min_count) && (count <= max_count))
+ return true;
+ else
+ return false;
+ }
+ return false;
+}
+
+//-----------------------------------------------------------------------------------------------
+// Check if end of file
+// RET: true - all kmers are listed
+//-----------------------------------------------------------------------------------------------
+bool CKMCFile::Eof(void)
+{
+ return end_of_file;
+}
+//-----------------------------------------------------------------------------------------------
+// Read next kmer
+// OUT: kmer - next kmer
+// OUT: count - kmer's counter
+// RET: true - if not EOF
+//-----------------------------------------------------------------------------------------------
+bool CKMCFile::ReadNextKmer(CKmerAPI &kmer, float &count)
+{
+ uint32 int_counter;
+
+ if(is_opened != opened_for_listing)
+ return false;
+ do
+ {
+ if(end_of_file)
+ return false;
+
+ if(sufix_number == prefix_file_buf[prefix_index + 1])
+ {
+ prefix_index++;
+
+ while (prefix_file_buf[prefix_index] == prefix_file_buf[prefix_index + 1])
+ prefix_index++;
+ }
+
+ uint32 off = (sizeof(prefix_index) * 8) - (lut_prefix_length * 2) - kmer.byte_alignment * 2;
+
+ uint64 temp_prefix = prefix_index << off; // shift prefix towards MSD
+
+ kmer.kmer_data[0] = temp_prefix; // store prefix in an object CKmerAPI
+
+ for(uint32 i = 1; i < kmer.no_of_rows; i++)
+ kmer.kmer_data[i] = 0;
+
+ //read sufix:
+ uint32 row_index = 0;
+ uint64 suf = 0;
+
+ off = off - 8;
+
+ for(uint32 a = 0; a < sufix_size; a ++)
+ {
+ if(index_in_partial_buf == part_size)
+ Reload_sufix_file_buf();
+
+ suf = sufix_file_buf[index_in_partial_buf++];
+ suf = suf << off;
+ kmer.kmer_data[row_index] = kmer.kmer_data[row_index] | suf;
+
+ if (off == 0) //the end of a word in kmer_data
+ {
+ off = 56;
+ row_index++;
+ }
+ else
+ off -=8;
+ }
+
+ //read counter:
+ if(index_in_partial_buf == part_size)
+ Reload_sufix_file_buf();
+
+ int_counter = sufix_file_buf[index_in_partial_buf++];
+
+ for(uint32 b = 1; b < counter_size; b++)
+ {
+ if(index_in_partial_buf == part_size)
+ Reload_sufix_file_buf();
+
+ uint32 aux = 0x000000ff & sufix_file_buf[index_in_partial_buf++];
+ aux = aux << 8 * ( b);
+ int_counter = aux | int_counter;
+ }
+
+ if(mode == 0)
+ count = (float)int_counter;
+ else
+ memcpy(&count, &int_counter, counter_size);
+
+ sufix_number++;
+
+ if(sufix_number == total_kmers)
+ end_of_file = true;
+ }
+ while((count < min_count) || (count > max_count));
+
+ return true;
+}
+//-------------------------------------------------------------------------------
+// Reload a contents of an array "sufix_file_buf" for listing mode. Auxiliary function.
+//-------------------------------------------------------------------------------
+void CKMCFile::Reload_sufix_file_buf()
+{
+ fread (sufix_file_buf, 1, (size_t) part_size, file_suf);
+ index_in_partial_buf = 0;
+};
+//-------------------------------------------------------------------------------
+// Release memory and close files in case they were opened
+// RET: true - if files have been readed
+//-------------------------------------------------------------------------------
+bool CKMCFile::Close()
+{
+ if(is_opened)
+ {
+ if(file_pre)
+ {
+ fclose(file_pre);
+ file_pre = NULL;
+ }
+ if(file_suf)
+ {
+ fclose(file_suf);
+ file_suf = NULL;
+ }
+
+ is_opened = closed;
+ end_of_file = false;
+ delete [] prefix_file_buf;
+ prefix_file_buf = NULL;
+ delete [] sufix_file_buf;
+ sufix_file_buf = NULL;
+ delete[] signature_map;
+ signature_map = NULL;
+
+ return true;
+ }
+ else
+ return false;
+};
+//----------------------------------------------------------------------------------
+// Set initial values to enable listing kmers from the begining. Only in listing mode
+// RET: true - if a file has been opened for listing
+//----------------------------------------------------------------------------------
+bool CKMCFile::RestartListing(void)
+{
+ if(is_opened == opened_for_listing)
+ {
+
+ my_fseek ( file_suf , 4 , SEEK_SET );
+ fread (sufix_file_buf, 1, (size_t) part_size, file_suf);
+
+ prefix_index = 0;
+ sufix_number = 0;
+ index_in_partial_buf = 0;
+
+ end_of_file = total_kmers == 0;
+
+ return true;
+ }
+ return false;
+
+};
+//----------------------------------------------------------------------------------------
+// Set the minimal value for a counter. Kmers with counters below this theshold are ignored
+// IN : x - minimal value for a counter
+// RET : true - if successful
+//----------------------------------------------------------------------------------------
+bool CKMCFile::SetMinCount(uint32 x)
+{
+ if((original_min_count <= x) && (x < max_count))
+ {
+ min_count = x;
+ return true;
+ }
+ else
+ return false;
+}
+
+//----------------------------------------------------------------------------------------
+// Return a value of min_count. Kmers with counters below this theshold are ignored
+// RET : a value of min_count
+//----------------------------------------------------------------------------------------
+uint32 CKMCFile::GetMinCount(void)
+{
+ return min_count;
+};
+
+//----------------------------------------------------------------------------------------
+// Set the maximal value for a counter. Kmers with counters above this theshold are ignored
+// IN : x - maximal value for a counter
+// RET : true - if successful
+//----------------------------------------------------------------------------------------
+bool CKMCFile::SetMaxCount(uint32 x)
+{
+ if((original_max_count >= x) && (x > min_count))
+ {
+ max_count = x;
+ return true;
+ }
+ else
+ return false;
+}
+
+
+//----------------------------------------------------------------------------------------
+// Return a value of max_count. Kmers with counters above this theshold are ignored
+// RET : a value of max_count
+//----------------------------------------------------------------------------------------
+uint32 CKMCFile::GetMaxCount(void)
+{
+ return max_count;
+}
+
+//----------------------------------------------------------------------------------------
+// Set original (readed from *.kmer_pre) values for min_count and max_count
+//----------------------------------------------------------------------------------------
+void CKMCFile::ResetMinMaxCounts(void)
+{
+ min_count = original_min_count;
+ max_count = original_max_count;
+}
+
+//----------------------------------------------------------------------------------------
+// Return the length of kmers
+// RET : the length of kmers
+//----------------------------------------------------------------------------------------
+uint32 CKMCFile::KmerLength(void)
+{
+ return kmer_length;
+}
+
+//----------------------------------------------------------------------------------------
+// Check if kmer exists
+// IN : kmer - kmer
+// RET : true if kmer exists
+//----------------------------------------------------------------------------------------
+bool CKMCFile::IsKmer(CKmerAPI &kmer)
+{
+ float _count;
+ if(CheckKmer(kmer, _count))
+ return true;
+ else
+ return false;
+}
+
+//-----------------------------------------------------------------------------------------
+// Check the total number of kmers between current min_count and max_count
+// RET : total number of kmers or 0 if a database has not been opened
+//-----------------------------------------------------------------------------------------
+uint64 CKMCFile::KmerCount(void)
+{
+ if(is_opened)
+ if((min_count == original_min_count) && (max_count == original_max_count))
+ return total_kmers;
+ else
+ {
+ uint32 count;
+ uint32 int_counter;
+ uint64 aux_kmerCount = 0;
+
+ if(is_opened == opened_for_RA)
+ {
+ uchar *ptr = sufix_file_buf;
+
+ for(uint64 i = 0; i < total_kmers; i++)
+ {
+ ptr += sufix_size;
+ int_counter = *ptr;
+ ptr++;
+
+ for(uint32 b = 1; b < counter_size; b ++)
+ {
+ uint32 aux = 0x000000ff & *(ptr);
+ aux = aux << 8 * ( b);
+ int_counter = aux | int_counter;
+ ptr++;
+ }
+
+ if(mode == 0)
+ count = int_counter;
+ else
+ memcpy(&count, &int_counter, counter_size);
+
+ if((count >= min_count) && (count <= max_count))
+ aux_kmerCount++;
+ }
+ }
+ else //opened_for_listing
+ {
+ CKmerAPI kmer(kmer_length);
+ float count;
+ RestartListing();
+ for(uint64 i = 0; i < total_kmers; i++)
+ {
+ ReadNextKmer(kmer, count);
+ if((count >= min_count) && (count <= max_count))
+ aux_kmerCount++;
+ }
+ RestartListing();
+ }
+ return aux_kmerCount;
+ }
+ else
+ return 0 ;
+}
+//---------------------------------------------------------------------------------
+// Get current parameters from kmer_database
+// OUT : _kmer_length - the length of kmers
+// _mode - mode
+// _counter_size - the size of a counter in bytes
+// _lut_prefix_length - the number of prefix's symbols cut from kmers
+// _min_count - the minimal number of kmer's appearances
+// _max_count - the maximal number of kmer's appearances
+// _total_kmers - the total number of kmers
+// RET : true if kmer_database has been opened
+//---------------------------------------------------------------------------------
+bool CKMCFile::Info(uint32 &_kmer_length, uint32 &_mode, uint32 &_counter_size, uint32 &_lut_prefix_length, uint32 &_signature_len, uint32 &_min_count, uint32 &_max_count, uint64 &_total_kmers)
+{
+ if(is_opened)
+ {
+ _kmer_length = kmer_length;
+ _mode = mode;
+ _counter_size = counter_size;
+ _lut_prefix_length = lut_prefix_length;
+ _signature_len = signature_len;
+ _min_count = min_count;
+ _max_count = max_count;
+ _total_kmers = total_kmers;
+ return true;
+ }
+ return false;
+};
+
+// ***** EOF
diff --git a/kmc_api/kmc_file.h b/kmc_api/kmc_file.h
new file mode 100755
index 0000000..deb00f6
--- /dev/null
+++ b/kmc_api/kmc_file.h
@@ -0,0 +1,122 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#ifndef _KMC_FILE_H
+#define _KMC_FILE_H
+
+#include "kmer_defs.h"
+#include "kmer_api.h"
+#include <string>
+
+class CKMCFile
+{
+ enum open_mode {closed, opened_for_RA, opened_for_listing};
+ open_mode is_opened;
+
+ bool end_of_file;
+
+ FILE *file_pre;
+ FILE *file_suf;
+
+ uint64* prefix_file_buf;
+ uint64 prefix_file_buf_size;
+ uint64 prefix_index; // The current prefix's index in an array "prefix_file_buf", readed from *.kmc_pre
+ uint32 single_LUT_size; // The size of a single LUT (in no. of elements)
+
+ uint32* signature_map;
+ uint32 signature_map_size;
+
+ uchar* sufix_file_buf;
+ uint32 sufix_number; // The sufix's number to be listed
+ uint64 index_in_partial_buf; // The current byte's number in an array "sufix_file_buf", for listing mode
+
+ uint32 kmer_length;
+ uint32 mode;
+ uint32 counter_size;
+ uint32 lut_prefix_length;
+ uint32 signature_len;
+ uint32 min_count;
+ uint32 max_count;
+ uint64 total_kmers;
+
+ uint32 sufix_size; // sufix's size in bytes
+ uint32 sufix_rec_size; // sufix_size + counter_size
+
+ uint32 original_min_count;
+ uint32 original_max_count;
+
+ static uint64 part_size; // the size of a block readed to sufix_file_buf, in listing mode
+
+ // Open a file, recognize its size and check its marker. Auxiliary function.
+ bool OpenASingleFile(const std::string &file_name, FILE *&file_handler, uint64 &size, char marker[]);
+
+ // Recognize current parameters. Auxiliary function.
+ bool ReadParamsFrom_prefix_file_buf(uint64 &size);
+
+ // Reload a contents of an array "sufix_file_buf" for listing mode. Auxiliary function.
+ void Reload_sufix_file_buf();
+
+public:
+
+ CKMCFile();
+ ~CKMCFile();
+
+ // Open files *.kmc_pre & *.kmc_suf, read them to RAM, close files. *.kmc_suf is opened for random access
+ bool OpenForRA(const std::string &file_name);
+
+ // Open files *kmc_pre & *.kmc_suf, read *.kmc_pre to RAM, *.kmc_suf is buffered
+ bool OpenForListing(const std::string& file_name);
+
+ // Return next kmer in CKmerAPI &kmer. Return its counter in float &count. Return true if not EOF
+ bool ReadNextKmer(CKmerAPI &kmer, float &count);
+
+ // Release memory and close files in case they were opened
+ bool Close();
+
+ // Set the minimal value for a counter. Kmers with counters below this theshold are ignored
+ bool SetMinCount(uint32 x);
+
+ // Return a value of min_count. Kmers with counters below this theshold are ignored
+ uint32 GetMinCount(void);
+
+ // Set the maximal value for a counter. Kmers with counters above this theshold are ignored
+ bool SetMaxCount(uint32 x);
+
+ // Return a value of max_count. Kmers with counters above this theshold are ignored
+ uint32 GetMaxCount(void);
+
+ // Return the total number of kmers between min_count and max_count
+ uint64 KmerCount(void);
+
+ // Return the length of kmers
+ uint32 KmerLength(void);
+
+ // Set initial values to enable listing kmers from the begining. Only in listing mode
+ bool RestartListing(void);
+
+ // Return true if all kmers are listed
+ bool Eof(void);
+
+ // Return true if kmer exists. In this case return kmer's counter in count
+ bool CheckKmer(CKmerAPI &kmer, float &count);
+
+ // Return true if kmer exists
+ bool IsKmer(CKmerAPI &kmer);
+
+ // Set original (readed from *.kmer_pre) values for min_count and max_count
+ void ResetMinMaxCounts(void);
+
+ // Get current parameters from kmer_database
+ bool Info(uint32 &_kmer_length, uint32 &_mode, uint32 &_counter_size, uint32 &_lut_prefix_length, uint32 &_signature_len, uint32 &_min_count, uint32 &_max_count, uint64 &_total_kmers);
+};
+
+#endif
+
+// ***** EOF
diff --git a/kmc_api/kmer_api.cpp b/kmc_api/kmer_api.cpp
new file mode 100755
index 0000000..b845b37
--- /dev/null
+++ b/kmc_api/kmer_api.cpp
@@ -0,0 +1,23 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz and Agnieszka Debudaj-Grabysz
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+
+#include "stdafx.h"
+#include "kmer_api.h"
+#include <vector>
+#include <math.h>
+
+using namespace std;
+
+const char CKmerAPI::char_codes[] = {'A','C', 'G', 'T'};
+char CKmerAPI::num_codes[];
+CKmerAPI::_si CKmerAPI::_init;
+
+// ***** EOF
diff --git a/kmc_api/kmer_api.h b/kmc_api/kmer_api.h
new file mode 100755
index 0000000..0339459
--- /dev/null
+++ b/kmc_api/kmer_api.h
@@ -0,0 +1,539 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz and Agnieszka Debudaj-Grabysz
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#ifndef _KMER_API_H
+#define _KMER_API_H
+
+
+#include "kmer_defs.h"
+#include <string>
+#include <iostream>
+#include "mmer.h"
+class CKMCFile;
+
+class CKmerAPI
+{
+protected:
+
+ uint64 *kmer_data; // An array to store kmer's data. On 64 bits 32 symbols can be stored
+ // Data are shifted to let sufix's symbols to start with a border of a byte
+
+
+ uint32 kmer_length; // Kmer's length, in symbols
+ uchar byte_alignment; // A number of "empty" symbols placed before prefix to let sufix's symbols to start with a border of a byte
+
+ uint32 no_of_rows; // A number of 64-bits words allocated for kmer_data
+
+ friend class CKMCFile;
+
+public:
+ static const char char_codes[];
+ static char num_codes[256];
+ struct _si
+ { _si()
+ {
+ for(int i = 0; i < 256; i++)
+ num_codes[i] = -1;
+ num_codes['A'] = num_codes['a'] = 0;
+ num_codes['C'] = num_codes['c'] = 1;
+ num_codes['G'] = num_codes['g'] = 2;
+ num_codes['T'] = num_codes['t'] = 3;
+ }
+ } static _init;
+
+
+// ----------------------------------------------------------------------------------
+// The constructor creates kmer for the number of symbols equal to length.
+// The array kmer_data has the size of ceil((length + byte_alignment) / 32))
+// IN : length - a number of symbols of a kmer
+// ----------------------------------------------------------------------------------
+ inline CKmerAPI(uint32 length = 0)
+ {
+ if(length)
+ {
+ if(length % 4)
+ byte_alignment = 4 - (length % 4);
+ else
+ byte_alignment = 0;
+
+ no_of_rows = (((length + byte_alignment) % 32) ? (length + byte_alignment) / 32 + 1 : (length + byte_alignment) / 32);
+ //no_of_rows = (int)ceil((double)(length + byte_alignment) / 32);
+ kmer_data = new uint64[no_of_rows];
+
+ memset(kmer_data, 0, sizeof(*kmer_data) * no_of_rows);
+ }
+ else
+ {
+ kmer_data = NULL;
+ no_of_rows = 0;
+ }
+ kmer_length = length;
+ };
+//-----------------------------------------------------------------------
+// The destructor
+//-----------------------------------------------------------------------
+ inline ~CKmerAPI()
+ {
+ if (kmer_data != NULL)
+ delete [] kmer_data;
+ };
+
+//-----------------------------------------------------------------------
+// The copy constructor
+//-----------------------------------------------------------------------
+ inline CKmerAPI(const CKmerAPI &kmer)
+ {
+ kmer_length = kmer.kmer_length;
+ byte_alignment = kmer.byte_alignment;
+ no_of_rows = kmer.no_of_rows;
+
+ kmer_data = new uint64[no_of_rows];
+
+ for(uint32 i = 0; i < no_of_rows; i++)
+ kmer_data[i] = kmer.kmer_data[i];
+
+ };
+
+//-----------------------------------------------------------------------
+// The operator =
+//-----------------------------------------------------------------------
+ inline CKmerAPI& operator=(const CKmerAPI &kmer)
+ {
+ if(kmer.kmer_length != kmer_length)
+ {
+ if(kmer_length && kmer_data)
+ delete [] kmer_data;
+
+ kmer_length = kmer.kmer_length;
+ byte_alignment = kmer.byte_alignment;
+ no_of_rows = kmer.no_of_rows;
+
+ kmer_data = new uint64[no_of_rows];
+ }
+
+ for(uint32 i = 0; i < no_of_rows; i++)
+ kmer_data[i] = kmer.kmer_data[i];
+
+ return *this;
+ };
+
+//-----------------------------------------------------------------------
+// The operator ==
+//-----------------------------------------------------------------------
+ inline bool operator==(const CKmerAPI &kmer)
+ {
+ if(kmer.kmer_length != kmer_length)
+ return false;
+
+ for(uint32 i = 0; i < no_of_rows; i++)
+ if(kmer.kmer_data[i] != kmer_data[i])
+ return false;
+
+ return true;
+
+ };
+
+//-----------------------------------------------------------------------
+// Operator < . If arguments differ in length a result is undefined
+//-----------------------------------------------------------------------
+ inline bool operator<(const CKmerAPI &kmer)
+ {
+ if(kmer.kmer_length != kmer_length)
+ return false;
+
+ for(uint32 i = 0; i < no_of_rows; i++)
+ if(kmer.kmer_data[i] > kmer_data[i])
+ return true;
+ else
+ if(kmer.kmer_data[i] < kmer_data[i])
+ return false;
+
+ return false;
+ };
+
+//-----------------------------------------------------------------------
+// Return a symbol of a kmer from an indicated position (numbered form 0).
+// The symbol is returned as an ASCI character A/C/G/T
+// IN : pos - a position of a symbol
+// RET : symbol - a symbol placed on a position pos
+//-----------------------------------------------------------------------
+ inline char get_asci_symbol(unsigned int pos)
+ {
+ if(pos >= kmer_length)
+ return 0;
+
+ uint32 current_row = (pos + byte_alignment) / 32;
+ uint32 current_pos = ((pos + byte_alignment) % 32) * 2;
+ uint64 mask = 0xc000000000000000 >> current_pos;
+ uint64 symbol = kmer_data[current_row] & mask;
+ symbol = symbol >> (64 - current_pos - 2);
+ return char_codes[symbol];
+
+ };
+
+ //-----------------------------------------------------------------------
+ // Return a symbol of a kmer from an indicated position (numbered form 0)
+ // The symbol is returned as a numerical value 0/1/2/3
+ // IN : pos - a position of a symbol
+ // RET : symbol - a symbol placed on a position pos
+ //-----------------------------------------------------------------------
+ inline uchar get_num_symbol(unsigned int pos)
+ {
+ if (pos >= kmer_length)
+ return 0;
+
+ uint32 current_row = (pos + byte_alignment) / 32;
+ uint32 current_pos = ((pos + byte_alignment) % 32) * 2;
+ uint64 mask = 0xc000000000000000 >> current_pos;
+ uint64 symbol = kmer_data[current_row] & mask;
+ symbol = symbol >> (64 - current_pos - 2);
+ uchar* byte_ptr = reinterpret_cast<uchar*>(&symbol);
+ return *byte_ptr;
+
+ };
+
+//-----------------------------------------------------------------------
+// Convert kmer into string (an alphabet ACGT)
+// RET : string kmer
+//-----------------------------------------------------------------------
+ inline std::string to_string()
+ {
+ std::string string_kmer;
+ uchar *byte_ptr;
+ uchar c;
+ uchar temp_byte_alignment = byte_alignment;
+ uint32 cur_string_size = 0;
+
+ string_kmer.reserve(kmer_length + 1);
+ string_kmer.resize(kmer_length + 1);
+
+
+ for(uint32 row_counter = 0; row_counter < no_of_rows; row_counter++)
+ {
+ byte_ptr = reinterpret_cast<uchar*>(&kmer_data[row_counter]);
+
+ byte_ptr += 7; // shift a pointer towards a MSB
+
+ for(uint32 i = 0; (i < kmer_length) && (i < 32); i += 4) // 32 symbols of any "row" in kmer_data
+ {
+ if((i == 0) && temp_byte_alignment) // check if a byte_alignment placed before a prefix is to be skipped
+ temp_byte_alignment--;
+ else
+ {
+ c = 0xc0 & *byte_ptr; //11000000
+ c = c >> 6;
+ string_kmer[cur_string_size++] = char_codes[c];
+ if (cur_string_size == kmer_length) break;
+ }
+
+ if((i == 0) && temp_byte_alignment) // check if a byte_alignment placed before a prefix is to be skipped
+ temp_byte_alignment--;
+ else
+ {
+ c = 0x30 & *byte_ptr; //00110000
+ c = c >> 4;
+ string_kmer[cur_string_size++] = char_codes[c];
+ if (cur_string_size == kmer_length) break;
+ }
+
+ if((i == 0) && temp_byte_alignment) // check if a byte_alignment placed before a prefix is to be skipped
+ temp_byte_alignment--;
+ else
+ {
+ c = 0x0c & *byte_ptr; //00001100
+ c = c >> 2;
+ string_kmer[cur_string_size++] = char_codes[c];
+ if (cur_string_size == kmer_length) break;
+ }
+ // no need to check byte alignment as its length is at most 3
+ c = 0x03 & *byte_ptr; //00000011
+ string_kmer[cur_string_size++] = char_codes[c];
+ if (cur_string_size == kmer_length) break;
+
+ byte_ptr--;
+ }
+ }
+ string_kmer[cur_string_size] = '\0';
+ return string_kmer;
+ };
+//-----------------------------------------------------------------------
+// Convert kmer into string (an alphabet ACGT). The function assumes enough memory was allocated
+// OUT : str - string kmer.
+//-----------------------------------------------------------------------
+ inline void to_string(char *str)
+ {
+ uchar *byte_ptr;
+ uchar c;
+ uchar temp_byte_alignment = byte_alignment;
+ uchar cur_string_size = 0;
+
+
+ for(uint32 row_counter = 0; row_counter < no_of_rows; row_counter++)
+ {
+ byte_ptr = reinterpret_cast<uchar*>(&kmer_data[row_counter]);
+
+ byte_ptr += 7; // shift a pointer towards a MSB
+
+ for(uint32 i = 0; (i < kmer_length) && (i < 32); i += 4) // 32 symbols of any "row" in kmer_data
+ {
+
+ if((i == 0) && temp_byte_alignment) // check if a byte_alignment placed before a prefix is to be skipped
+ temp_byte_alignment--;
+ else
+ {
+ c = 0xc0 & *byte_ptr; //11000000
+ c = c >> 6;
+ str[cur_string_size++] = char_codes[c];
+ if (cur_string_size == kmer_length) break;
+ }
+
+ if((i == 0) && temp_byte_alignment) // check if a byte_alignment placed before a prefix is to be skipped
+ temp_byte_alignment--;
+ else
+ {
+ c = 0x30 & *byte_ptr; //00110000
+ c = c >> 4;
+ str[cur_string_size++] = char_codes[c];
+ if (cur_string_size == kmer_length) break;
+ }
+
+ if((i == 0) && temp_byte_alignment) // check if a byte_alignment placed before a prefix is to be skipped
+ temp_byte_alignment--;
+ else
+ {
+ c = 0x0c & *byte_ptr; //00001100
+ c = c >> 2;
+ str[cur_string_size++] = char_codes[c];
+ if (cur_string_size == kmer_length) break;
+ }
+ // no need to check byte alignment as its length is at most 3
+ c = 0x03 & *byte_ptr; //00000011
+ str[cur_string_size++] = char_codes[c];
+ if (cur_string_size == kmer_length) break;
+
+ byte_ptr--;
+ }
+ }
+ str[cur_string_size] = '\0';
+ };
+
+//-----------------------------------------------------------------------
+// Convert kmer into string (an alphabet ACGT)
+// OUT : str - string kmer
+//-----------------------------------------------------------------------
+ inline void to_string(std::string &str)
+ {
+ uchar *byte_ptr;
+ uchar c;
+ uchar temp_byte_alignment = byte_alignment;
+ uint32 cur_string_size = 0;
+
+ str.reserve(kmer_length + 1);
+ str.resize(kmer_length + 1);
+
+
+ for(uint32 row_counter = 0; row_counter < no_of_rows; row_counter++)
+ {
+ byte_ptr = reinterpret_cast<uchar*>(&kmer_data[row_counter]);
+
+ byte_ptr += 7; // shift a pointer towards a MSB
+
+ for(uint32 i = 0; (i < kmer_length) && (i < 32); i += 4) // 32 symbols of any "row" in kmer_data
+ {
+
+ if((i == 0) && temp_byte_alignment) // check if a byte_alignment placed before a prefix is to be skipped
+ temp_byte_alignment--;
+ else
+ {
+ c = 0xc0 & *byte_ptr; //11000000
+ c = c >> 6;
+ str[cur_string_size++] = char_codes[c];
+ if (cur_string_size == kmer_length) break;
+ }
+
+ if((i == 0) && temp_byte_alignment) // check if a byte_alignment placed before a prefix is to be skipped
+ temp_byte_alignment--;
+ else
+ {
+ c = 0x30 & *byte_ptr; //00110000
+ c = c >> 4;
+ str[cur_string_size++] = char_codes[c];
+ if (cur_string_size == kmer_length) break;
+ }
+
+ if((i == 0) && temp_byte_alignment) // check if a byte_alignment placed before a prefix is to be skipped
+ temp_byte_alignment--;
+ else
+ {
+ c = 0x0c & *byte_ptr; //00001100
+ c = c >> 2;
+ str[cur_string_size++] = char_codes[c];
+ if (cur_string_size == kmer_length) break;
+ }
+ // no need to check byte alignment as its length is at most 3
+ c = 0x03 & *byte_ptr; //00000011
+ str[cur_string_size++] = char_codes[c];
+ if (cur_string_size == kmer_length) break;
+
+ byte_ptr--;
+ }
+ }
+ str[cur_string_size] = '\0';
+ };
+
+//-----------------------------------------------------------------------
+// Convert a string of an alphabet ACGT into a kmer of a CKmerAPI
+// IN : kmer_string - a string of an alphabet ACGT
+// RET : true - if succesfull
+//-----------------------------------------------------------------------
+ inline bool from_string(std::string kmer_string)
+ {
+ unsigned char c_char;
+ uchar c_binary;
+ uchar temp_byte_alignment;
+
+ for (uint32 ii = 0; ii < kmer_string.size(); ++ii)
+ {
+ if (num_codes[(uchar)kmer_string[ii]] == -1)
+ return false;
+ }
+
+
+ if(kmer_length != kmer_string.size())
+ {
+ if(kmer_length && kmer_data)
+ delete [] kmer_data;
+
+ kmer_length = (uint32)kmer_string.size();
+
+ if(kmer_length % 4)
+ byte_alignment = 4 - (kmer_length % 4);
+ else
+ byte_alignment = 0;
+
+
+ if(kmer_length != 0)
+ {
+ no_of_rows = (((kmer_length + byte_alignment) % 32) ? (kmer_length + byte_alignment) / 32 + 1 : (kmer_length + byte_alignment) / 32);
+ //no_of_rows = (int)ceil((double)(kmer_length + byte_alignment) / 32);
+ kmer_data = new uint64[no_of_rows];
+ //memset(kmer_data, 0, sizeof(*kmer_data) * no_of_rows);
+ }
+ }
+
+ memset(kmer_data, 0, sizeof(*kmer_data) * no_of_rows);
+ temp_byte_alignment = byte_alignment;
+ uint32 i = 0;
+ uint32 i_in_string = 0;
+ uchar *byte_ptr;
+
+ for(uint32 row_index = 0; row_index < no_of_rows; row_index++)
+ {
+ byte_ptr = reinterpret_cast<uchar*>(&kmer_data[row_index]);
+ byte_ptr += 7; // shift a pointer towards a MSB
+
+ while(i < kmer_length)
+ {
+ if((i_in_string == 0) && temp_byte_alignment) // check if a byte_alignment placed before a prefix is to be skipped
+ {
+ temp_byte_alignment--;
+ i++;
+ }
+ else
+ {
+ c_char = kmer_string[i_in_string];
+ c_binary = num_codes[c_char];
+ c_binary = c_binary << 6; //11000000
+ *byte_ptr = *byte_ptr | c_binary;
+ i++;
+ i_in_string++;
+ if(i_in_string == kmer_length) break;
+ }
+
+ if((i_in_string == 0) && temp_byte_alignment) // check if a byte_alignment placed before a prefix is to be skipped
+ {
+ temp_byte_alignment--;
+ i++;
+ }
+ else
+ {
+ c_char = kmer_string[i_in_string];
+ c_binary = num_codes[c_char];
+ c_binary = c_binary << 4;
+ *byte_ptr = *byte_ptr | c_binary;
+ i++;
+ i_in_string++;
+ if(i_in_string == kmer_length) break;
+ }
+
+ //!!!if((i == 0) && temp_byte_alignment) //poprawka zg3oszona przez Maaka D3ugosza // check if a byte_alignment placed before a prefix is to be skipped
+ if ((i_in_string == 0) && temp_byte_alignment) // check if a byte_alignment placed before a prefix is to be skipped
+ {
+ temp_byte_alignment--;
+ i++;
+ }
+ else
+ {
+ c_char = kmer_string[i_in_string];
+ c_binary = num_codes[c_char];
+ c_binary = c_binary << 2;
+ *byte_ptr = *byte_ptr | c_binary;
+ i++;
+ i_in_string++;
+ if(i_in_string == kmer_length) break;
+ }
+
+ c_char = kmer_string[i_in_string];
+ c_binary = num_codes[c_char];
+ *byte_ptr = *byte_ptr | c_binary;
+ i++;
+ i_in_string++;
+ if(i_in_string == kmer_length) break;
+
+ if( i % 32 == 0)
+ break; //check if a new "row" is to be started
+ byte_ptr--;
+ }
+ };
+ return true;
+ }
+//-----------------------------------------------------------------------
+// Counts a signature of an existing kmer
+// IN : sig_len - the length of a signature
+// RET : signature value
+//-----------------------------------------------------------------------
+ uint32 get_signature(uint32 sig_len)
+ {
+ uchar symb;
+ CMmer cur_mmr(sig_len);
+
+ for(uint32 i = 0; i < sig_len; ++i)
+ {
+ symb = get_num_symbol(i);
+ cur_mmr.insert(symb);
+ }
+ CMmer min_mmr(cur_mmr);
+ for (uint32 i = sig_len; i < kmer_length; ++i)
+ {
+ symb = get_num_symbol(i);
+ cur_mmr.insert(symb);
+
+ if (cur_mmr < min_mmr)
+ min_mmr = cur_mmr;
+ }
+ return min_mmr.get();
+ }
+
+};
+
+
+#endif
+
+// ***** EOF
diff --git a/kmc_api/kmer_defs.h b/kmc_api/kmer_defs.h
new file mode 100755
index 0000000..30c18b8
--- /dev/null
+++ b/kmc_api/kmer_defs.h
@@ -0,0 +1,47 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz and Agnieszka Debudaj-Grabysz
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+
+#ifndef _KMER_DEFS_H
+#define _KMER_DEFS_H
+
+#define KMC_VER "2.0"
+#define KMC_DATE "2014-07-04"
+
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+
+#ifndef WIN32
+ #include <stdint.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <math.h>
+ #include <string.h>
+
+ #define _TCHAR char
+ #define _tmain main
+
+ #define my_fopen fopen
+ #define my_fseek fseek
+ #define my_ftell ftell
+#else
+ #define my_fopen fopen
+ #define my_fseek _fseeki64
+ #define my_ftell _ftelli64
+#endif
+ //typedef unsigned char uchar;
+
+ typedef int int32;
+ typedef unsigned int uint32;
+ typedef long long int64;
+ typedef unsigned long long uint64;
+ typedef unsigned char uchar;
+#endif
+
+// ***** EOF
diff --git a/kmc_api/mmer.cpp b/kmc_api/mmer.cpp
new file mode 100755
index 0000000..b0ceb73
--- /dev/null
+++ b/kmc_api/mmer.cpp
@@ -0,0 +1,49 @@
+#include "stdafx.h"
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#include "../kmc_api/mmer.h"
+
+
+uint32 CMmer::norm5[];
+uint32 CMmer::norm6[];
+uint32 CMmer::norm7[];
+uint32 CMmer::norm8[];
+
+CMmer::_si CMmer::_init;
+
+
+//--------------------------------------------------------------------------
+CMmer::CMmer(uint32 _len)
+{
+ switch (_len)
+ {
+ case 5:
+ norm = norm5;
+ break;
+ case 6:
+ norm = norm6;
+ break;
+ case 7:
+ norm = norm7;
+ break;
+ case 8:
+ norm = norm8;
+ break;
+ default:
+ break;
+ }
+ len = _len;
+ mask = (1 << _len * 2) - 1;
+ str = 0;
+}
+
+//--------------------------------------------------------------------------
+
diff --git a/kmc_api/mmer.h b/kmc_api/mmer.h
new file mode 100755
index 0000000..6f12f0e
--- /dev/null
+++ b/kmc_api/mmer.h
@@ -0,0 +1,182 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#ifndef _MMER_H
+#define _MMER_H
+#include "kmer_defs.h"
+
+// *************************************************************************
+// *************************************************************************
+
+
+class CMmer
+{
+ uint32 str;
+ uint32 mask;
+ uint32 current_val;
+ uint32* norm;
+ uint32 len;
+ static uint32 norm5[1 << 10];
+ static uint32 norm6[1 << 12];
+ static uint32 norm7[1 << 14];
+ static uint32 norm8[1 << 16];
+
+ static bool is_allowed(uint32 mmer, uint32 len)
+ {
+ if ((mmer & 0x3f) == 0x3f) // TTT suffix
+ return false;
+ if ((mmer & 0x3f) == 0x3b) // TGT suffix
+ return false;
+ if ((mmer & 0x3c) == 0x3c) // TG* suffix
+ return false;
+
+ for (uint32 j = 0; j < len - 3; ++j)
+ if ((mmer & 0xf) == 0) // AA inside
+ return false;
+ else
+ mmer >>= 2;
+
+ if (mmer == 0) // AAA prefix
+ return false;
+ if (mmer == 0x04) // ACA prefix
+ return false;
+ if ((mmer & 0xf) == 0) // *AA prefix
+ return false;
+
+ return true;
+ }
+
+ friend class CSignatureMapper;
+ struct _si
+ {
+ static uint32 get_rev(uint32 mmer, uint32 len)
+ {
+ uint32 rev = 0;
+ uint32 shift = len*2 - 2;
+ for(uint32 i = 0 ; i < len ; ++i)
+ {
+ rev += (3 - (mmer & 3)) << shift;
+ mmer >>= 2;
+ shift -= 2;
+ }
+ return rev;
+ }
+
+
+
+ static void init_norm(uint32* norm, uint32 len)
+ {
+ uint32 special = 1 << len * 2;
+ for(uint32 i = 0 ; i < special ; ++i)
+ {
+ uint32 rev = get_rev(i, len);
+ uint32 str_val = is_allowed(i, len) ? i : special;
+ uint32 rev_val = is_allowed(rev, len) ? rev : special;
+ norm[i] = MIN(str_val, rev_val);
+ }
+ }
+
+ _si()
+ {
+ init_norm(norm5, 5);
+ init_norm(norm6, 6);
+ init_norm(norm7, 7);
+ init_norm(norm8, 8);
+ }
+
+ }static _init;
+public:
+ CMmer(uint32 _len);
+ inline void insert(uchar symb);
+ inline uint32 get() const;
+ inline bool operator==(const CMmer& x);
+ inline bool operator<(const CMmer& x);
+ inline void clear();
+ inline bool operator<=(const CMmer& x);
+ inline void set(const CMmer& x);
+ inline void insert(char* seq);
+
+};
+
+
+
+//--------------------------------------------------------------------------
+inline void CMmer::insert(uchar symb)
+{
+ str <<= 2;
+ str += symb;
+ str &= mask;
+
+ current_val = norm[str];
+}
+
+//--------------------------------------------------------------------------
+inline uint32 CMmer::get() const
+{
+ return current_val;
+}
+
+//--------------------------------------------------------------------------
+inline bool CMmer::operator==(const CMmer& x)
+{
+ return current_val == x.current_val;
+}
+
+//--------------------------------------------------------------------------
+inline bool CMmer::operator<(const CMmer& x)
+{
+ return current_val < x.current_val;
+}
+
+//--------------------------------------------------------------------------
+inline void CMmer::clear()
+{
+ str = 0;
+}
+
+//--------------------------------------------------------------------------
+inline bool CMmer::operator<=(const CMmer& x)
+{
+ return current_val <= x.current_val;
+}
+
+//--------------------------------------------------------------------------
+inline void CMmer::set(const CMmer& x)
+{
+ str = x.str;
+ current_val = x.current_val;
+}
+
+//--------------------------------------------------------------------------
+inline void CMmer::insert(char* seq)
+{
+ switch (len)
+ {
+ case 5:
+ str = (seq[0] << 8) + (seq[1] << 6) + (seq[2] << 4) + (seq[3] << 2) + (seq[4]);
+ break;
+ case 6:
+ str = (seq[0] << 10) + (seq[1] << 8) + (seq[2] << 6) + (seq[3] << 4) + (seq[4] << 2) + (seq[5]);
+ break;
+ case 7:
+ str = (seq[0] << 12) + (seq[1] << 10) + (seq[2] << 8) + (seq[3] << 6) + (seq[4] << 4 ) + (seq[5] << 2) + (seq[6]);
+ break;
+ case 8:
+ str = (seq[0] << 14) + (seq[1] << 12) + (seq[2] << 10) + (seq[3] << 8) + (seq[4] << 6) + (seq[5] << 4) + (seq[6] << 2) + (seq[7]);
+ break;
+ default:
+ break;
+ }
+
+ current_val = norm[str];
+}
+
+
+#endif
\ No newline at end of file
diff --git a/kmc_api/stdafx.h b/kmc_api/stdafx.h
new file mode 100755
index 0000000..5b8d29d
--- /dev/null
+++ b/kmc_api/stdafx.h
@@ -0,0 +1,4 @@
+#include <stdio.h>
+#include <ext/algorithm>
+#include <iostream>
+using namespace std;
diff --git a/kmc_dump/ReadMe.txt b/kmc_dump/ReadMe.txt
new file mode 100755
index 0000000..5bb3111
--- /dev/null
+++ b/kmc_dump/ReadMe.txt
@@ -0,0 +1,40 @@
+========================================================================
+ CONSOLE APPLICATION : kmc_dump Project Overview
+========================================================================
+
+AppWizard has created this kmc_dump application for you.
+
+This file contains a summary of what you will find in each of the files that
+make up your kmc_dump application.
+
+
+kmc_dump.vcxproj
+ This is the main project file for VC++ projects generated using an Application Wizard.
+ It contains information about the version of Visual C++ that generated the file, and
+ information about the platforms, configurations, and project features selected with the
+ Application Wizard.
+
+kmc_dump.vcxproj.filters
+ This is the filters file for VC++ projects generated using an Application Wizard.
+ It contains information about the association between the files in your project
+ and the filters. This association is used in the IDE to show grouping of files with
+ similar extensions under a specific node (for e.g. ".cpp" files are associated with the
+ "Source Files" filter).
+
+kmc_dump.cpp
+ This is the main application source file.
+
+/////////////////////////////////////////////////////////////////////////////
+Other standard files:
+
+StdAfx.h, StdAfx.cpp
+ These files are used to build a precompiled header (PCH) file
+ named kmc_dump.pch and a precompiled types file named StdAfx.obj.
+
+/////////////////////////////////////////////////////////////////////////////
+Other notes:
+
+AppWizard uses "TODO:" comments to indicate parts of the source code you
+should add to or customize.
+
+/////////////////////////////////////////////////////////////////////////////
diff --git a/kmc_dump/kmc_dump.cpp b/kmc_dump/kmc_dump.cpp
new file mode 100755
index 0000000..9b653e2
--- /dev/null
+++ b/kmc_dump/kmc_dump.cpp
@@ -0,0 +1,146 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ This file demonstrates the example usage of kmc_api software.
+ It reads kmer_counter's output and prints kmers to an output file.
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#include "stdafx.h"
+#include <iostream>
+#include "../kmc_api/kmc_file.h"
+#include "nc_utils.h"
+
+
+void print_info(void);
+
+int _tmain(int argc, char* argv[])
+{
+ CKMCFile kmer_data_base;
+ int32 i;
+ uint32 min_count_to_set = 0;
+ uint32 max_count_to_set = 0;
+ std::string input_file_name;
+ std::string output_file_name;
+
+ FILE * out_file;
+ //------------------------------------------------------------
+ // Parse input parameters
+ //------------------------------------------------------------
+ if(argc < 3)
+ {
+ print_info();
+ return EXIT_FAILURE;
+ }
+
+ for(i = 1; i < argc; ++i)
+ {
+ if(argv[i][0] == '-')
+ {
+ if(strncmp(argv[i], "-ci", 3) == 0)
+ min_count_to_set = atoi(&argv[i][3]);
+ else if(strncmp(argv[i], "-cx", 3) == 0)
+ max_count_to_set = atoi(&argv[i][3]);
+ }
+ else
+ break;
+ }
+
+ if(argc - i < 2)
+ {
+ print_info();
+ return EXIT_FAILURE;
+ }
+
+ input_file_name = std::string(argv[i++]);
+ output_file_name = std::string(argv[i]);
+
+ if((out_file = fopen (output_file_name.c_str(),"wb")) == NULL)
+ {
+ print_info();
+ return EXIT_FAILURE;
+ }
+
+ setvbuf(out_file, NULL ,_IOFBF, 1 << 24);
+
+ //------------------------------------------------------------------------------
+ // Open kmer database for listing and print kmers within min_count and max_count
+ //------------------------------------------------------------------------------
+
+ if (!kmer_data_base.OpenForListing(input_file_name))
+ {
+ print_info();
+ return EXIT_FAILURE ;
+ }
+ else
+ {
+ uint32 _kmer_length;
+ uint32 _mode;
+ uint32 _counter_size;
+ uint32 _lut_prefix_length;
+ uint32 _signature_len;
+ uint32 _min_count;
+ uint32 _max_count;
+ uint64 _total_kmers;
+
+ kmer_data_base.Info(_kmer_length, _mode, _counter_size, _lut_prefix_length, _signature_len, _min_count, _max_count, _total_kmers);
+
+ float counter;
+ //std::string str;
+ char str[1024];
+ uint32 counter_len;
+
+ CKmerAPI kmer_object(_kmer_length);
+
+ if(min_count_to_set)
+ if (!(kmer_data_base.SetMinCount(min_count_to_set)))
+ return EXIT_FAILURE;
+ if(max_count_to_set)
+ if (!(kmer_data_base.SetMaxCount(max_count_to_set)))
+ return EXIT_FAILURE;
+
+ while (kmer_data_base.ReadNextKmer(kmer_object, counter))
+ {
+ kmer_object.to_string(str);
+
+ str[_kmer_length] = '\t';
+ if (_mode)
+ counter_len = CNumericConversions::Double2PChar(counter, 6, (uchar*)str + _kmer_length + 1);
+ else
+ counter_len = CNumericConversions::Int2PChar((uint64)counter, (uchar*)str + _kmer_length + 1);
+
+ str[_kmer_length + 1 + counter_len] = '\n';
+ fwrite(str, 1, _kmer_length + counter_len + 2, out_file);
+
+ /*if(_mode)
+ fprintf(out_file, "%s\t%f\n", str.c_str(), counter);
+ else
+ fprintf(out_file, "%s\t%d\n", str.c_str(), (int)counter);*/
+ }
+
+ fclose(out_file);
+ kmer_data_base.Close();
+ }
+
+ return EXIT_SUCCESS;
+}
+// -------------------------------------------------------------------------
+// Print execution options
+// -------------------------------------------------------------------------
+void print_info(void)
+{
+ std::cout << "KMC dump ver. " << KMC_VER << " (" << KMC_DATE << ")\n";
+ std::cout << "\nUsage:\nkmc_dump [options] <kmc_database> <output_file>\n";
+ std::cout << "Parameters:\n";
+ std::cout << "<kmc_database> - kmer_counter's output\n";
+ std::cout << "Options:\n";
+ std::cout << "-ci<value> - print k-mers occurring less than <value> times\n";
+ std::cout << "-cx<value> - print k-mers occurring more of than <value> times\n";
+};
+
+// ***** EOF
diff --git a/kmc_dump/kmc_dump.vcxproj b/kmc_dump/kmc_dump.vcxproj
new file mode 100755
index 0000000..77ad066
--- /dev/null
+++ b/kmc_dump/kmc_dump.vcxproj
@@ -0,0 +1,169 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{8939AD12-23D5-469C-806B-DC3F98F8A514}</ProjectGuid>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>kmc_dump</RootNamespace>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v120</PlatformToolset>
+ <CharacterSet>NotSet</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v120</PlatformToolset>
+ <CharacterSet>Unicode</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v120</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>NotSet</CharacterSet>
+ <UseOfMfc>Static</UseOfMfc>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v120</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>NotSet</CharacterSet>
+ <UseOfMfc>Static</UseOfMfc>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <LinkIncremental>true</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <LinkIncremental>true</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>Use</PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <PrecompiledHeader>Use</PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>Use</PrecompiledHeader>
+ <Optimization>Full</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>Use</PrecompiledHeader>
+ <Optimization>Full</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <Text Include="ReadMe.txt" />
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="..\kmc_api\kmc_file.h" />
+ <ClInclude Include="..\kmc_api\kmer_api.h" />
+ <ClInclude Include="..\kmc_api\kmer_defs.h" />
+ <ClInclude Include="..\kmc_api\mmer.h" />
+ <ClInclude Include="nc_utils.h" />
+ <ClInclude Include="stdafx.h" />
+ <ClInclude Include="targetver.h" />
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="..\kmc_api\kmc_file.cpp" />
+ <ClCompile Include="..\kmc_api\kmer_api.cpp" />
+ <ClCompile Include="..\kmc_api\mmer.cpp" />
+ <ClCompile Include="kmc_dump.cpp" />
+ <ClCompile Include="nc_utils.cpp" />
+ <ClCompile Include="stdafx.cpp">
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
+ </ClCompile>
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/kmc_dump/nc_utils.cpp b/kmc_dump/nc_utils.cpp
new file mode 100755
index 0000000..ec50bf4
--- /dev/null
+++ b/kmc_dump/nc_utils.cpp
@@ -0,0 +1,20 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ This file demonstrates the example usage of kmc_api software.
+ It reads kmer_counter's output and prints kmers to an output file.
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-02-17
+*/
+
+#include "stdafx.h"
+#include "nc_utils.h"
+
+
+uchar CNumericConversions::digits[100000*5];
+int CNumericConversions::powOf10[30];
+CNumericConversions::_si CNumericConversions::_init;
\ No newline at end of file
diff --git a/kmc_dump/nc_utils.h b/kmc_dump/nc_utils.h
new file mode 100755
index 0000000..39aabbc
--- /dev/null
+++ b/kmc_dump/nc_utils.h
@@ -0,0 +1,138 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ This file demonstrates the example usage of kmc_api software.
+ It reads kmer_counter's output and prints kmers to an output file.
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-02-17
+*/
+
+#include <string>
+#include "../kmc_api/kmer_defs.h"
+
+#ifndef _NC_UTILS_H
+#define _NC_UTILS_H
+class CNumericConversions {
+public:
+ static uchar digits[100000*5];
+ static int powOf10[30];
+ struct _si {
+ _si()
+ {
+ for(int i = 0; i < 100000; ++i)
+ {
+ int dig = i;
+
+ digits[i*5+4] = '0' + (dig % 10);
+ dig /= 10;
+ digits[i*5+3] = '0' + (dig % 10);
+ dig /= 10;
+ digits[i*5+2] = '0' + (dig % 10);
+ dig /= 10;
+ digits[i*5+1] = '0' + (dig % 10);
+ dig /= 10;
+ digits[i*5+0] = '0' + dig;
+ }
+ powOf10[0] = 1;
+ for(int i = 1 ; i < 30 ; ++i)
+ {
+ powOf10[i] = powOf10[i-1]*10;
+ }
+ }
+ } static _init;
+
+ static int NDigits(uint64 val)
+ {
+ if(val >= 10000)
+ return 5;
+ else if(val >= 1000)
+ return 4;
+ else if(val >= 100)
+ return 3;
+ else if(val >= 10)
+ return 2;
+ else
+ return 1;
+ }
+
+ static int Int2PChar(uint64 val, uchar *str)
+ {
+ if(val >= 1000000000000000ull)
+ {
+ uint64 dig1 = val / 1000000000000000ull;
+ val -= dig1 * 1000000000000000ull;
+ uint64 dig2 = val / 10000000000ull;
+ val -= dig2 * 10000000000ull;
+ uint64 dig3 = val / 100000ull;
+ uint64 dig4 = val - dig3 * 100000ull;
+
+ int ndig = NDigits(dig1);
+
+ memcpy(str, digits+dig1*5+(5-ndig), ndig);
+ memcpy(str+ndig, digits+dig2*5, 5);
+ memcpy(str+ndig+5, digits+dig3*5, 5);
+ memcpy(str+ndig+10, digits+dig4*5, 5);
+
+ return ndig+15;
+ }
+ else if(val >= 10000000000ull)
+ {
+ uint64 dig1 = val / 10000000000ull;
+ val -= dig1 * 10000000000ull;
+ uint64 dig2 = val / 100000ull;
+ uint64 dig3 = val - dig2 * 100000ull;
+
+ int ndig = NDigits(dig1);
+
+ memcpy(str, digits+dig1*5+(5-ndig), ndig);
+ memcpy(str+ndig, digits+dig2*5, 5);
+ memcpy(str+ndig+5, digits+dig3*5, 5);
+
+ return ndig+10;
+ }
+ else if(val >= 100000ull)
+ {
+ uint64 dig1 = val / 100000ull;
+ uint64 dig2 = val - dig1 * 100000ull;
+
+ int ndig = NDigits(dig1);
+
+ memcpy(str, digits+dig1*5+(5-ndig), ndig);
+ memcpy(str+ndig, digits+dig2*5, 5);
+
+ return ndig+5;
+ }
+ else
+ {
+ int ndig = NDigits(val);
+
+ memcpy(str, digits+val*5+(5-ndig), ndig);
+
+ return ndig;
+ }
+ }
+
+ static int Double2PChar(double val, int prec, uchar *str)
+ {
+ double corrector = .5 / powOf10[prec];
+ val += corrector;
+ double ipart;
+ double fractPart = std::modf(val, &ipart);
+ uint32 intPart = (uint32)ipart;
+ uint32 len = Int2PChar(intPart, str);
+ uint32 pos = len;
+ str[pos++] = '.';
+ for(int i = 0 ; i < prec ; ++i)
+ {
+ fractPart *= 10;
+ str[pos++] = '0' + (uint32)fractPart % 10 ;
+ }
+ return len + prec + 1;
+ }
+};
+
+#endif
\ No newline at end of file
diff --git a/kmc_dump/stdafx.cpp b/kmc_dump/stdafx.cpp
new file mode 100755
index 0000000..666224a
--- /dev/null
+++ b/kmc_dump/stdafx.cpp
@@ -0,0 +1,8 @@
+// stdafx.cpp : source file that includes just the standard includes
+// kmc_dump.pch will be the pre-compiled header
+// stdafx.obj will contain the pre-compiled type information
+
+#include "stdafx.h"
+
+// TODO: reference any additional headers you need in STDAFX.H
+// and not in this file
diff --git a/kmc_dump/stdafx.h b/kmc_dump/stdafx.h
new file mode 100755
index 0000000..6ae71a9
--- /dev/null
+++ b/kmc_dump/stdafx.h
@@ -0,0 +1,26 @@
+#ifdef WIN32
+
+// stdafx.h : include file for standard system include files,
+// or project specific include files that are used frequently, but
+// are changed infrequently
+//
+
+#pragma once
+
+#include "targetver.h"
+
+#include <stdio.h>
+#include <tchar.h>
+
+
+
+// TODO: reference additional headers your program requires here
+
+#else
+
+#include <stdio.h>
+#include <ext/algorithm>
+#include <iostream>
+using namespace std;
+
+#endif
\ No newline at end of file
diff --git a/kmc_dump/targetver.h b/kmc_dump/targetver.h
new file mode 100755
index 0000000..90e767b
--- /dev/null
+++ b/kmc_dump/targetver.h
@@ -0,0 +1,8 @@
+#pragma once
+
+// Including SDKDDKVer.h defines the highest available Windows platform.
+
+// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
+// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
+
+#include <SDKDDKVer.h>
diff --git a/kmc_dump_sample/ReadMe.txt b/kmc_dump_sample/ReadMe.txt
new file mode 100755
index 0000000..2ef5fa9
--- /dev/null
+++ b/kmc_dump_sample/ReadMe.txt
@@ -0,0 +1,40 @@
+========================================================================
+ CONSOLE APPLICATION : kmc_dump_sample Project Overview
+========================================================================
+
+AppWizard has created this kmc_dump_sample application for you.
+
+This file contains a summary of what you will find in each of the files that
+make up your kmc_dump_sample application.
+
+
+kmc_dump_sample.vcxproj
+ This is the main project file for VC++ projects generated using an Application Wizard.
+ It contains information about the version of Visual C++ that generated the file, and
+ information about the platforms, configurations, and project features selected with the
+ Application Wizard.
+
+kmc_dump_sample.vcxproj.filters
+ This is the filters file for VC++ projects generated using an Application Wizard.
+ It contains information about the association between the files in your project
+ and the filters. This association is used in the IDE to show grouping of files with
+ similar extensions under a specific node (for e.g. ".cpp" files are associated with the
+ "Source Files" filter).
+
+kmc_dump_sample.cpp
+ This is the main application source file.
+
+/////////////////////////////////////////////////////////////////////////////
+Other standard files:
+
+StdAfx.h, StdAfx.cpp
+ These files are used to build a precompiled header (PCH) file
+ named kmc_dump_sample.pch and a precompiled types file named StdAfx.obj.
+
+/////////////////////////////////////////////////////////////////////////////
+Other notes:
+
+AppWizard uses "TODO:" comments to indicate parts of the source code you
+should add to or customize.
+
+/////////////////////////////////////////////////////////////////////////////
diff --git a/kmc_dump_sample/kmc_dump_sample.cpp b/kmc_dump_sample/kmc_dump_sample.cpp
new file mode 100755
index 0000000..552938f
--- /dev/null
+++ b/kmc_dump_sample/kmc_dump_sample.cpp
@@ -0,0 +1,133 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ This file demonstrates the example usage of kmc_api software.
+ It reads kmer_counter's output and prints kmers to an output file.
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#include "stdafx.h"
+#include <iostream>
+#include "../kmc_api/kmc_file.h"
+
+void print_info(void);
+
+int _tmain(int argc, char* argv[])
+{
+ CKMCFile kmer_data_base;
+ int32 i;
+ uint32 min_count_to_set = 0;
+ uint32 max_count_to_set = 0;
+ std::string input_file_name;
+ std::string output_file_name;
+
+ FILE * out_file;
+ //------------------------------------------------------------
+ // Parse input parameters
+ //------------------------------------------------------------
+ if(argc < 3)
+ {
+ print_info();
+ return EXIT_FAILURE;
+ }
+
+ for(i = 1; i < argc; ++i)
+ {
+ if(argv[i][0] == '-')
+ {
+ if(strncmp(argv[i], "-ci", 3) == 0)
+ min_count_to_set = atoi(&argv[i][3]);
+ else if(strncmp(argv[i], "-cx", 3) == 0)
+ max_count_to_set = atoi(&argv[i][3]);
+ }
+ else
+ break;
+ }
+
+ if(argc - i < 2)
+ {
+ print_info();
+ return EXIT_FAILURE;
+ }
+
+ input_file_name = std::string(argv[i++]);
+ output_file_name = std::string(argv[i]);
+
+ if((out_file = fopen (output_file_name.c_str(),"wb")) == NULL)
+ {
+ print_info();
+ return EXIT_FAILURE;
+ }
+
+ setvbuf(out_file, NULL ,_IOFBF, 1 << 24);
+
+ //------------------------------------------------------------------------------
+ // Open kmer database for listing and print kmers within min_count and max_count
+ //------------------------------------------------------------------------------
+
+ if (!kmer_data_base.OpenForListing(input_file_name))
+ {
+ print_info();
+ return EXIT_FAILURE ;
+ }
+ else
+ {
+ uint32 _kmer_length;
+ uint32 _mode;
+ uint32 _counter_size;
+ uint32 _lut_prefix_length;
+ uint32 _signature_len;
+ uint32 _min_count;
+ uint32 _max_count;
+ uint64 _total_kmers;
+
+ kmer_data_base.Info(_kmer_length, _mode, _counter_size, _lut_prefix_length, _signature_len, _min_count, _max_count, _total_kmers);
+
+ float counter;
+ std::string str;
+
+ CKmerAPI kmer_object(_kmer_length);
+
+ if(min_count_to_set)
+ if (!(kmer_data_base.SetMinCount(min_count_to_set)))
+ return EXIT_FAILURE;
+ if(max_count_to_set)
+ if (!(kmer_data_base.SetMaxCount(max_count_to_set)))
+ return EXIT_FAILURE;
+
+ while (kmer_data_base.ReadNextKmer(kmer_object, counter))
+ {
+ kmer_object.to_string(str);
+
+ if(_mode)
+ fprintf(out_file, "%s\t%f\n", str.c_str(), counter);
+ else
+ fprintf(out_file, "%s\t%d\n", str.c_str(), (int)counter);
+ }
+
+ fclose(out_file);
+ kmer_data_base.Close();
+ }
+
+ return EXIT_SUCCESS;
+}
+// -------------------------------------------------------------------------
+// Print execution options
+// -------------------------------------------------------------------------
+void print_info(void)
+{
+ std::cout << "KMC dump ver. " << KMC_VER << " (" << KMC_DATE << ")\n";
+ std::cout << "\nUsage:\nkmc_dump [options] <kmc_database> <output_file>\n";
+ std::cout << "Parameters:\n";
+ std::cout << "<kmc_database> - kmer_counter's output\n";
+ std::cout << "Options:\n";
+ std::cout << "-ci<value> - print k-mers occurring less than <value> times\n";
+ std::cout << "-cx<value> - print k-mers occurring more of than <value> times\n";
+};
+
+// ***** EOF
diff --git a/kmc_dump_sample/kmc_dump_sample.vcxproj b/kmc_dump_sample/kmc_dump_sample.vcxproj
new file mode 100755
index 0000000..4defb6c
--- /dev/null
+++ b/kmc_dump_sample/kmc_dump_sample.vcxproj
@@ -0,0 +1,167 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{17823F37-86DE-4E58-B354-B84DA9EDA6A1}</ProjectGuid>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>kmc_dump_sample</RootNamespace>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v120</PlatformToolset>
+ <CharacterSet>NotSet</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <PlatformToolset>v120</PlatformToolset>
+ <CharacterSet>Unicode</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v120</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>NotSet</CharacterSet>
+ <UseOfMfc>Static</UseOfMfc>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <PlatformToolset>v120</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>NotSet</CharacterSet>
+ <UseOfMfc>Static</UseOfMfc>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <LinkIncremental>true</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <LinkIncremental>true</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>Use</PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <PrecompiledHeader>Use</PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>Use</PrecompiledHeader>
+ <Optimization>Full</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>Use</PrecompiledHeader>
+ <Optimization>Full</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <Text Include="ReadMe.txt" />
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="..\kmc_api\kmc_file.h" />
+ <ClInclude Include="..\kmc_api\kmer_api.h" />
+ <ClInclude Include="..\kmc_api\kmer_defs.h" />
+ <ClInclude Include="..\kmc_api\mmer.h" />
+ <ClInclude Include="stdafx.h" />
+ <ClInclude Include="targetver.h" />
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="..\kmc_api\kmc_file.cpp" />
+ <ClCompile Include="..\kmc_api\kmer_api.cpp" />
+ <ClCompile Include="..\kmc_api\mmer.cpp" />
+ <ClCompile Include="kmc_dump_sample.cpp" />
+ <ClCompile Include="stdafx.cpp">
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
+ </ClCompile>
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/kmc_dump_sample/stdafx.cpp b/kmc_dump_sample/stdafx.cpp
new file mode 100755
index 0000000..a798ebf
--- /dev/null
+++ b/kmc_dump_sample/stdafx.cpp
@@ -0,0 +1,8 @@
+// stdafx.cpp : source file that includes just the standard includes
+// kmc_dump_sample.pch will be the pre-compiled header
+// stdafx.obj will contain the pre-compiled type information
+
+#include "stdafx.h"
+
+// TODO: reference any additional headers you need in STDAFX.H
+// and not in this file
diff --git a/kmc_dump_sample/stdafx.h b/kmc_dump_sample/stdafx.h
new file mode 100755
index 0000000..6ae71a9
--- /dev/null
+++ b/kmc_dump_sample/stdafx.h
@@ -0,0 +1,26 @@
+#ifdef WIN32
+
+// stdafx.h : include file for standard system include files,
+// or project specific include files that are used frequently, but
+// are changed infrequently
+//
+
+#pragma once
+
+#include "targetver.h"
+
+#include <stdio.h>
+#include <tchar.h>
+
+
+
+// TODO: reference additional headers your program requires here
+
+#else
+
+#include <stdio.h>
+#include <ext/algorithm>
+#include <iostream>
+using namespace std;
+
+#endif
\ No newline at end of file
diff --git a/kmc_dump_sample/targetver.h b/kmc_dump_sample/targetver.h
new file mode 100755
index 0000000..90e767b
--- /dev/null
+++ b/kmc_dump_sample/targetver.h
@@ -0,0 +1,8 @@
+#pragma once
+
+// Including SDKDDKVer.h defines the highest available Windows platform.
+
+// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
+// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
+
+#include <SDKDDKVer.h>
diff --git a/kmer_counter.sln b/kmer_counter.sln
new file mode 100755
index 0000000..84ed1ff
--- /dev/null
+++ b/kmer_counter.sln
@@ -0,0 +1,62 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 2013
+VisualStudioVersion = 12.0.21005.1
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "kmer_counter", "kmer_counter\kmer_counter.vcxproj", "{8C8B90DA-28B7-4D82-81F3-C0E7CE52D59F}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "kmc_dump", "kmc_dump\kmc_dump.vcxproj", "{8939AD12-23D5-469C-806B-DC3F98F8A514}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "kmc_dump_sample", "kmc_dump_sample\kmc_dump_sample.vcxproj", "{17823F37-86DE-4E58-B354-B84DA9EDA6A1}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Mixed Platforms = Debug|Mixed Platforms
+ Debug|Win32 = Debug|Win32
+ Debug|x64 = Debug|x64
+ Release|Mixed Platforms = Release|Mixed Platforms
+ Release|Win32 = Release|Win32
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {8C8B90DA-28B7-4D82-81F3-C0E7CE52D59F}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32
+ {8C8B90DA-28B7-4D82-81F3-C0E7CE52D59F}.Debug|Mixed Platforms.Build.0 = Debug|Win32
+ {8C8B90DA-28B7-4D82-81F3-C0E7CE52D59F}.Debug|Win32.ActiveCfg = Debug|Win32
+ {8C8B90DA-28B7-4D82-81F3-C0E7CE52D59F}.Debug|Win32.Build.0 = Debug|Win32
+ {8C8B90DA-28B7-4D82-81F3-C0E7CE52D59F}.Debug|x64.ActiveCfg = Debug|x64
+ {8C8B90DA-28B7-4D82-81F3-C0E7CE52D59F}.Debug|x64.Build.0 = Debug|x64
+ {8C8B90DA-28B7-4D82-81F3-C0E7CE52D59F}.Release|Mixed Platforms.ActiveCfg = Release|Win32
+ {8C8B90DA-28B7-4D82-81F3-C0E7CE52D59F}.Release|Mixed Platforms.Build.0 = Release|Win32
+ {8C8B90DA-28B7-4D82-81F3-C0E7CE52D59F}.Release|Win32.ActiveCfg = Release|Win32
+ {8C8B90DA-28B7-4D82-81F3-C0E7CE52D59F}.Release|Win32.Build.0 = Release|Win32
+ {8C8B90DA-28B7-4D82-81F3-C0E7CE52D59F}.Release|x64.ActiveCfg = Release|x64
+ {8C8B90DA-28B7-4D82-81F3-C0E7CE52D59F}.Release|x64.Build.0 = Release|x64
+ {8939AD12-23D5-469C-806B-DC3F98F8A514}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32
+ {8939AD12-23D5-469C-806B-DC3F98F8A514}.Debug|Mixed Platforms.Build.0 = Debug|Win32
+ {8939AD12-23D5-469C-806B-DC3F98F8A514}.Debug|Win32.ActiveCfg = Debug|Win32
+ {8939AD12-23D5-469C-806B-DC3F98F8A514}.Debug|Win32.Build.0 = Debug|Win32
+ {8939AD12-23D5-469C-806B-DC3F98F8A514}.Debug|x64.ActiveCfg = Debug|Win32
+ {8939AD12-23D5-469C-806B-DC3F98F8A514}.Release|Mixed Platforms.ActiveCfg = Release|Win32
+ {8939AD12-23D5-469C-806B-DC3F98F8A514}.Release|Mixed Platforms.Build.0 = Release|Win32
+ {8939AD12-23D5-469C-806B-DC3F98F8A514}.Release|Win32.ActiveCfg = Release|Win32
+ {8939AD12-23D5-469C-806B-DC3F98F8A514}.Release|Win32.Build.0 = Release|Win32
+ {8939AD12-23D5-469C-806B-DC3F98F8A514}.Release|x64.ActiveCfg = Release|x64
+ {8939AD12-23D5-469C-806B-DC3F98F8A514}.Release|x64.Build.0 = Release|x64
+ {17823F37-86DE-4E58-B354-B84DA9EDA6A1}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32
+ {17823F37-86DE-4E58-B354-B84DA9EDA6A1}.Debug|Mixed Platforms.Build.0 = Debug|Win32
+ {17823F37-86DE-4E58-B354-B84DA9EDA6A1}.Debug|Win32.ActiveCfg = Debug|Win32
+ {17823F37-86DE-4E58-B354-B84DA9EDA6A1}.Debug|Win32.Build.0 = Debug|Win32
+ {17823F37-86DE-4E58-B354-B84DA9EDA6A1}.Debug|x64.ActiveCfg = Debug|Win32
+ {17823F37-86DE-4E58-B354-B84DA9EDA6A1}.Release|Mixed Platforms.ActiveCfg = Release|Win32
+ {17823F37-86DE-4E58-B354-B84DA9EDA6A1}.Release|Mixed Platforms.Build.0 = Release|Win32
+ {17823F37-86DE-4E58-B354-B84DA9EDA6A1}.Release|Win32.ActiveCfg = Release|Win32
+ {17823F37-86DE-4E58-B354-B84DA9EDA6A1}.Release|Win32.Build.0 = Release|Win32
+ {17823F37-86DE-4E58-B354-B84DA9EDA6A1}.Release|x64.ActiveCfg = Release|Win32
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+ GlobalSection(ExtensibilityGlobals) = postSolution
+ VisualSVNWorkingCopyRoot = .
+ EndGlobalSection
+EndGlobal
diff --git a/kmer_counter/ReadMe.txt b/kmer_counter/ReadMe.txt
new file mode 100755
index 0000000..a003e5d
--- /dev/null
+++ b/kmer_counter/ReadMe.txt
@@ -0,0 +1,40 @@
+========================================================================
+ CONSOLE APPLICATION : kmer_counter Project Overview
+========================================================================
+
+AppWizard has created this kmer_counter application for you.
+
+This file contains a summary of what you will find in each of the files that
+make up your kmer_counter application.
+
+
+kmer_counter.vcxproj
+ This is the main project file for VC++ projects generated using an Application Wizard.
+ It contains information about the version of Visual C++ that generated the file, and
+ information about the platforms, configurations, and project features selected with the
+ Application Wizard.
+
+kmer_counter.vcxproj.filters
+ This is the filters file for VC++ projects generated using an Application Wizard.
+ It contains information about the association between the files in your project
+ and the filters. This association is used in the IDE to show grouping of files with
+ similar extensions under a specific node (for e.g. ".cpp" files are associated with the
+ "Source Files" filter).
+
+kmer_counter.cpp
+ This is the main application source file.
+
+/////////////////////////////////////////////////////////////////////////////
+Other standard files:
+
+StdAfx.h, StdAfx.cpp
+ These files are used to build a precompiled header (PCH) file
+ named kmer_counter.pch and a precompiled types file named StdAfx.obj.
+
+/////////////////////////////////////////////////////////////////////////////
+Other notes:
+
+AppWizard uses "TODO:" comments to indicate parts of the source code you
+should add to or customize.
+
+/////////////////////////////////////////////////////////////////////////////
diff --git a/kmer_counter/defs.h b/kmer_counter/defs.h
new file mode 100755
index 0000000..a187692
--- /dev/null
+++ b/kmer_counter/defs.h
@@ -0,0 +1,125 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#ifndef _DEFS_H
+#define _DEFS_H
+
+#define KMC_VER "2.0"
+#define KMC_DATE "2014-07-04"
+
+#define _CRT_SECURE_NO_WARNINGS
+
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#define NORM(x, lower, upper) ((x) < (lower) ? (lower) : (x) > (upper) ? (upper) : (x))
+
+#define uchar unsigned char
+
+#include <time.h>
+
+//#define DEBUG_MODE
+//#define DEVELOP_MODE
+
+#define USE_META_PROG
+
+#define KMER_X 3
+
+#define STATS_FASTQ_SIZE (1 << 28)
+
+#define EXPAND_BUFFER_RECS (1 << 16)
+
+
+#define MAX_BINS 512
+
+
+#ifndef MAX_K
+#define MAX_K 256
+#endif
+
+#define MIN_K 10
+
+#define MIN_MEM 4
+
+// Range of number of FASTQ/FASTA reading threads
+#define MIN_SF 1
+#define MAX_SF 32
+
+// Range of number of signature length
+#define MIN_SL 5
+#define MAX_SL 8
+
+// Range of number of splitting threads
+#define MIN_SP 1
+#define MAX_SP 64
+
+// Range of number of sorting threads
+#define MIN_SO 1
+#define MAX_SO 64
+
+// Range of number of threads per single sorting thread
+#define MIN_SR 1
+#define MAX_SR 16
+
+
+typedef float count_t;
+
+#define KMER_WORDS ((MAX_K + 31) / 32)
+
+#ifdef _DEBUG
+#define A_memcpy memcpy
+#define A_memset memset
+#endif
+
+
+// Choose between:
+//#define BOOST_THREAD // Boost threads
+//#define THREADS_NATIVE // C++11 threads
+
+#ifdef WIN32
+#define my_fopen fopen
+#define my_fseek _fseeki64
+#define my_ftell _ftelli64
+typedef int int32;
+typedef unsigned int uint32;
+typedef long long int64;
+typedef unsigned long long uint64;
+
+#define THREADS_BOOST
+//#define THREADS_NATIVE
+#else
+#define my_fopen fopen
+#define my_fseek fseek
+#define my_ftell ftell
+#define _TCHAR char
+#define _tmain main
+
+typedef int int32;
+typedef unsigned int uint32;
+typedef long long int64;
+typedef unsigned long long uint64;
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#define THREADS_BOOST
+//#define THREADS_NATIVE
+#endif
+
+
+const int32 MAX_STR_LEN = 32768;
+#define ALIGNMENT 0x100
+
+#define BYTE_LOG(x) (((x) < (1 << 8)) ? 1 : ((x) < (1 << 16)) ? 2 : ((x) < (1 << 24)) ? 3 : 4)
+
+#endif
+
+// ***** EOF
diff --git a/kmer_counter/fastq_reader.cpp b/kmer_counter/fastq_reader.cpp
new file mode 100755
index 0000000..373b344
--- /dev/null
+++ b/kmer_counter/fastq_reader.cpp
@@ -0,0 +1,475 @@
+#include "stdafx.h"
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#include <algorithm>
+#include <boost/filesystem.hpp>
+#include "defs.h"
+#include "fastq_reader.h"
+
+//************************************************************************************************************
+// CFastqReader - reader class
+//************************************************************************************************************
+
+uint64 CFastqReader::OVERHEAD_SIZE = 1 << 16;
+
+//----------------------------------------------------------------------------------
+// Constructor of FASTA/FASTQ reader
+// Parameters:
+// * _mm - pointer to memory monitor (to check the memory limits)
+CFastqReader::CFastqReader(CMemoryMonitor *_mm, CMemoryPool *_pmm_fastq, input_type _file_type, uint32 _gzip_buffer_size, uint32 _bzip2_buffer_size, int _kmer_len)
+{
+ mm = _mm;
+ pmm_fastq = _pmm_fastq;
+
+ file_type = _file_type;
+ kmer_len = _kmer_len;
+ // Input file mode (default: uncompressed)
+ mode = m_plain;
+
+ // Pointers to input files in various formats (uncompressed, gzip-compressed, bzip2-compressed)
+ in = NULL;
+ in_gzip = NULL;
+ in_bzip2 = NULL;
+ bzerror = BZ_OK;
+
+ // Size and pointer for the buffer
+ part_size = 1 << 23;
+ part = NULL;
+
+ gzip_buffer_size = _gzip_buffer_size;
+ bzip2_buffer_size = _bzip2_buffer_size;
+
+ containsNextChromosome = false;
+}
+
+//----------------------------------------------------------------------------------
+// Destructor - close the files
+CFastqReader::~CFastqReader()
+{
+ if(mode == m_plain)
+ {
+ if(in)
+ fclose(in);
+ }
+ else if(mode == m_gzip)
+ {
+ if(in_gzip)
+ gzclose(in_gzip);
+ }
+ else if(mode == m_bzip2)
+ {
+ if(in)
+ {
+ BZ2_bzReadClose(&bzerror, in_bzip2);
+ fclose(in);
+ }
+ }
+
+ if(part)
+ pmm_fastq->free(part);
+}
+
+//----------------------------------------------------------------------------------
+// Set the name of the file to process
+bool CFastqReader::SetNames(string _input_file_name)
+{
+ input_file_name = _input_file_name;
+
+ // Set mode according to the extension of the file name
+ if(input_file_name.size() > 3 && string(input_file_name.end()-3, input_file_name.end()) == ".gz")
+ mode = m_gzip;
+ else if(input_file_name.size() > 4 && string(input_file_name.end()-4, input_file_name.end()) == ".bz2")
+ mode = m_bzip2;
+ else
+ mode = m_plain;
+
+ return true;
+}
+
+//----------------------------------------------------------------------------------
+// Set part size of the buffer
+bool CFastqReader::SetPartSize(uint64 _part_size)
+{
+ if(in || in_gzip || in_bzip2)
+ return false;
+
+ if(_part_size < (1 << 20) || _part_size > (1 << 30))
+ return false;
+
+ part_size = _part_size;
+
+ return true;
+}
+
+//----------------------------------------------------------------------------------
+// Open the file
+bool CFastqReader::OpenFiles()
+{
+ if(in || in_gzip || in_bzip2)
+ return false;
+
+ // Uncompressed file
+ if(mode == m_plain)
+ {
+ if((in = fopen(input_file_name.c_str(), "rb")) == NULL)
+ return false;
+ }
+ // Gzip-compressed file
+ else if(mode == m_gzip)
+ {
+ if((in_gzip = gzopen(input_file_name.c_str(), "rb")) == NULL)
+ return false;
+ gzbuffer(in_gzip, gzip_buffer_size);
+ }
+ // Bzip2-compressed file
+ else if(mode == m_bzip2)
+ {
+ in = fopen(input_file_name.c_str(), "rb");
+ if(!in)
+ return false;
+ setvbuf(in, NULL, _IOFBF, bzip2_buffer_size);
+ if((in_bzip2 = BZ2_bzReadOpen(&bzerror, in, 0, 0, NULL, 0)) == NULL)
+ {
+ fclose(in);
+ return false;
+ }
+ }
+
+ // Reserve via PMM
+ pmm_fastq->reserve(part);
+
+ part_filled = 0;
+
+ return true;
+}
+//----------------------------------------------------------------------------------
+// Read a part of the file in multi line fasta format
+bool CFastqReader::GetPartFromMultilneFasta(uchar *&_part, uint64 &_size)
+{
+ uint64 readed = 0;
+
+ if(!containsNextChromosome)
+ {
+ if(IsEof())
+ return false;
+ }
+ if(mode == m_plain)
+ readed = fread(part+part_filled, 1, part_size-part_filled, in);
+ else if(mode == m_gzip)
+ readed = gzread(in_gzip, part+part_filled, (int) (part_size-part_filled));
+ else if(mode == m_bzip2)
+ readed = BZ2_bzRead(&bzerror, in_bzip2, part+part_filled, (int) (part_size-part_filled));
+ int64 total_filled = part_filled + readed;
+ int64 last_header_pos = 0;
+ int64 pos = 0;
+ for(int64 i = 0 ; i < total_filled ;++i )//find last '>' and remove EOLs
+ {
+ if(part[i] == '>')
+ {
+ int64 tmp = i;
+ SkipNextEOL(part,i,total_filled);
+ copy(part+tmp, part+i, part+pos);
+ last_header_pos = pos;
+ pos += i - tmp;
+ }
+ if(part[i] != '\n' && part[i] != '\r')
+ {
+ part[pos++] = part[i];
+ }
+ }
+
+ _part = part;
+ if(last_header_pos == 0)//data in block belong to one seq
+ {
+ part_filled = kmer_len - 1;
+ _size = pos;
+ pmm_fastq->reserve(part);
+ copy(_part+_size-part_filled, _part+_size, part);
+ containsNextChromosome = false;
+ }
+ else//next seq starts at last_header_pos
+ {
+ _size = last_header_pos;
+ part_filled = pos - last_header_pos;
+ pmm_fastq->reserve(part);
+ copy(_part + last_header_pos, _part + pos, part);
+ containsNextChromosome = true;
+ }
+ return true;
+}
+
+//----------------------------------------------------------------------------------
+// Read a part of the file
+bool CFastqReader::GetPart(uchar *&_part, uint64 &_size)
+{
+ if(!in && !in_gzip && !in_bzip2)
+ return false;
+
+
+
+ if(file_type == multiline_fasta)
+ return GetPartFromMultilneFasta(_part,_size);
+
+ if(IsEof())
+ return false;
+ uint64 readed;
+
+ // Read data
+ if(mode == m_plain)
+ readed = fread(part+part_filled, 1, part_size, in);
+ else if(mode == m_gzip)
+ readed = gzread(in_gzip, part+part_filled, (int) part_size);
+ else if(mode == m_bzip2)
+ readed = BZ2_bzRead(&bzerror, in_bzip2, part+part_filled, (int) part_size);
+ else
+ readed = 0; // Never should be here
+
+ int64 total_filled = part_filled + readed;
+ int64 i;
+
+ if(part_filled >= OVERHEAD_SIZE)
+ {
+ cout << "Error: Wrong input file!\n";
+ exit(1);
+ }
+
+ if(IsEof())
+ {
+ _part = part;
+ _size = total_filled;
+
+ part = NULL;
+ return true;
+ }
+
+ // Look for the end of the last complete record in a buffer
+ if(file_type == fasta) // FASTA files
+ {
+ // Looking for a FASTA record at the end of the area
+ int64 line_start[3];
+ int32 j;
+
+ i = total_filled - OVERHEAD_SIZE / 2;
+ for(j = 0; j < 3; ++j)
+ {
+ if(!SkipNextEOL(part, i, total_filled))
+ break;
+ line_start[j] = i;
+ }
+
+ _part = part;
+ if(j < 3)
+ _size = 0;
+ else
+ {
+ int k;
+ for(k = 0; k < 2; ++k)
+ if(part[line_start[k]+0] == '>')
+ break;
+
+ if(k == 2)
+ _size = 0;
+ else
+ _size = line_start[k];
+ }
+ }
+ else // FASTQ file
+ {
+ // Looking for a FASTQ record at the end of the area
+ int64 line_start[9];
+ int32 j;
+
+ i = total_filled - OVERHEAD_SIZE / 2;
+ for(j = 0; j < 9; ++j)
+ {
+ if(!SkipNextEOL(part, i, total_filled))
+ break;
+ line_start[j] = i;
+ }
+
+ _part = part;
+ if(j < 9)
+ _size = 0;
+ else
+ {
+ int k;
+ for(k = 0; k < 4; ++k)
+ {
+ if(part[line_start[k]+0] == '@' && part[line_start[k+2]+0] == '+')
+ {
+ if(part[line_start[k+2]+1] == '\n' || part[line_start[k+2]+1] == '\r')
+ break;
+ if(line_start[k+1]-line_start[k] == line_start[k+3]-line_start[k+2] &&
+ memcmp(part+line_start[k]+1, part+line_start[k+2]+1, line_start[k+3]-line_start[k+2]-1) == 0)
+ break;
+ }
+ }
+
+ if(k == 4)
+ _size = 0;
+ else
+ _size = line_start[k];
+ }
+ }
+ // Allocate new memory for the buffer
+
+ pmm_fastq->reserve(part);
+ copy(_part+_size, _part+total_filled, part);
+ part_filled = total_filled - _size;
+
+ return true;
+}
+
+//----------------------------------------------------------------------------------
+// Skip to next EOL from the current position in a buffer
+bool CFastqReader::SkipNextEOL(uchar *part, int64 &pos, int64 max_pos)
+{
+ int64 i;
+ for(i = pos; i < max_pos-2; ++i)
+ if((part[i] == '\n' || part[i] == '\r') && !(part[i+1] == '\n' || part[i+1] == '\r'))
+ break;
+
+ if(i >= max_pos-2)
+ return false;
+
+ pos = i+1;
+
+ return true;
+}
+
+//----------------------------------------------------------------------------------
+// Check whether there is an EOF
+bool CFastqReader::IsEof()
+{
+ if(mode == m_plain)
+ return feof(in) != 0;
+ else if(mode == m_gzip)
+ return gzeof(in_gzip) != 0;
+ else if(mode == m_bzip2)
+ return bzerror == BZ_STREAM_END;
+
+ return true;
+}
+
+
+
+//************************************************************************************************************
+// CWFastqReader - wrapper for multithreading purposes
+//************************************************************************************************************
+CWFastqReader::CWFastqReader(CKMCParams &Params, CKMCQueues &Queues)
+{
+ mm = Queues.mm;
+ pmm_fastq = Queues.pmm_fastq;
+
+ input_files_queue = Queues.input_files_queue;
+ part_size = Params.fastq_buffer_size;
+ part_queue = Queues.part_queue;
+ file_type = Params.file_type;
+ kmer_len = Params.p_k;
+
+ gzip_buffer_size = Params.gzip_buffer_size;
+ bzip2_buffer_size = Params.bzip2_buffer_size;
+
+ fqr = NULL;
+}
+
+//----------------------------------------------------------------------------------
+CWFastqReader::~CWFastqReader()
+{
+}
+
+//----------------------------------------------------------------------------------
+void CWFastqReader::operator()()
+{
+ uchar *part;
+ uint64 part_filled;
+
+ while(input_files_queue->pop(file_name))
+ {
+ fqr = new CFastqReader(mm, pmm_fastq, file_type, gzip_buffer_size, bzip2_buffer_size, kmer_len);
+ fqr->SetNames(file_name);
+ fqr->SetPartSize(part_size);
+
+ if(fqr->OpenFiles())
+ {
+ // Reading Fastq parts
+ while(fqr->GetPart(part, part_filled))
+ part_queue->push(part, part_filled);
+ }
+ else
+ cerr << "Error: Cannot open file " << file_name << "\n";
+ delete fqr;
+ }
+ part_queue->mark_completed();
+}
+
+
+
+//************************************************************************************************************
+// CWStatsFastqReader - wrapper for multithreading purposes
+//************************************************************************************************************
+CWStatsFastqReader::CWStatsFastqReader(CKMCParams &Params, CKMCQueues &Queues)
+{
+ mm = Queues.mm;
+ pmm_fastq = Queues.pmm_fastq;
+
+ input_files_queue = Queues.input_files_queue;
+ part_size = Params.fastq_buffer_size;
+ stats_part_queue = Queues.stats_part_queue;
+ file_type = Params.file_type;
+ kmer_len = Params.p_k;
+
+ gzip_buffer_size = Params.gzip_buffer_size;
+ bzip2_buffer_size = Params.bzip2_buffer_size;
+
+ fqr = NULL;
+}
+
+//----------------------------------------------------------------------------------
+CWStatsFastqReader::~CWStatsFastqReader()
+{
+}
+
+//----------------------------------------------------------------------------------
+void CWStatsFastqReader::operator()()
+{
+ uchar *part;
+ uint64 part_filled;
+ bool finished = false;
+ while (input_files_queue->pop(file_name) && !finished)
+ {
+ fqr = new CFastqReader(mm, pmm_fastq, file_type, gzip_buffer_size, bzip2_buffer_size, kmer_len);
+ fqr->SetNames(file_name);
+ fqr->SetPartSize(part_size);
+
+ if (fqr->OpenFiles())
+ {
+ // Reading Fastq parts
+ while (fqr->GetPart(part, part_filled))
+ {
+ if (!stats_part_queue->push(part, part_filled))
+ {
+ finished = true;
+ pmm_fastq->free(part);
+ break;
+ }
+
+ }
+ }
+ else
+ cerr << "Error: Cannot open file " << file_name << "\n";
+ delete fqr;
+ }
+ stats_part_queue->mark_completed();
+}
+
+
+// ***** EOF
diff --git a/kmer_counter/fastq_reader.h b/kmer_counter/fastq_reader.h
new file mode 100755
index 0000000..e32c5f0
--- /dev/null
+++ b/kmer_counter/fastq_reader.h
@@ -0,0 +1,123 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#ifndef _FASTQ_READER_H
+#define _FASTQ_READER_H
+
+#include "defs.h"
+#include "params.h"
+#include <stdio.h>
+#include <iostream>
+
+#include "libs/zlib.h"
+#include "libs/bzlib.h"
+
+
+using namespace std;
+
+//************************************************************************************************************
+// FASTA/FASTQ reader class
+//************************************************************************************************************
+class CFastqReader {
+ typedef enum {m_plain, m_gzip, m_bzip2} t_mode;
+
+ CMemoryMonitor *mm;
+ CMemoryPool *pmm_fastq;
+
+ string input_file_name;
+ input_type file_type;
+ int kmer_len;
+ t_mode mode;
+
+ FILE *in;
+ gzFile_s *in_gzip;
+ BZFILE *in_bzip2;
+ int bzerror;
+
+ uint64 part_size;
+
+ uchar *part;
+ uint64 part_filled;
+
+ uint32 gzip_buffer_size;
+ uint32 bzip2_buffer_size;
+
+ bool containsNextChromosome; //for multiline_fasta processing
+
+ bool SkipNextEOL(uchar *part, int64 &pos, int64 max_pos);
+
+ bool IsEof();
+
+public:
+ CFastqReader(CMemoryMonitor *_mm, CMemoryPool *_pmm_fastq, input_type _file_type, uint32 _gzip_buffer_size, uint32 _bzip2_buffer_size, int _kmer_len);
+ ~CFastqReader();
+
+ static uint64 OVERHEAD_SIZE;
+
+ bool SetNames(string _input_file_name);
+ bool SetPartSize(uint64 _part_size);
+ bool OpenFiles();
+
+ bool GetPartFromMultilneFasta(uchar *&_part, uint64 &_size);
+ bool GetPart(uchar *&_part, uint64 &_size);
+};
+
+//************************************************************************************************************
+// Wrapper for FASTA/FASTQ reader class - for multithreading purposes
+//************************************************************************************************************
+class CWFastqReader {
+ CMemoryMonitor *mm;
+ CMemoryPool *pmm_fastq;
+
+ CFastqReader *fqr;
+ string file_name;
+ uint64 part_size;
+ CInputFilesQueue *input_files_queue;
+ CPartQueue *part_queue;
+ input_type file_type;
+ uint32 gzip_buffer_size;
+ uint32 bzip2_buffer_size;
+ int kmer_len;
+
+public:
+ CWFastqReader(CKMCParams &Params, CKMCQueues &Queues);
+ ~CWFastqReader();
+
+ void operator()();
+};
+
+
+
+//************************************************************************************************************
+// Wrapper for FASTA/FASTQ reader class (stats mode) - for multithreading purposes
+//************************************************************************************************************
+class CWStatsFastqReader {
+ CMemoryMonitor *mm;
+ CMemoryPool *pmm_fastq;
+
+ CFastqReader *fqr;
+ string file_name;
+ uint64 part_size;
+ CInputFilesQueue *input_files_queue;
+ CStatsPartQueue *stats_part_queue;
+ input_type file_type;
+ uint32 gzip_buffer_size;
+ uint32 bzip2_buffer_size;
+ int kmer_len;
+
+public:
+ CWStatsFastqReader(CKMCParams &Params, CKMCQueues &Queues);
+ ~CWStatsFastqReader();
+
+ void operator()();
+};
+#endif
+
+// ***** EOF
diff --git a/kmer_counter/kb_collector.h b/kmer_counter/kb_collector.h
new file mode 100755
index 0000000..3e4ed45
--- /dev/null
+++ b/kmer_counter/kb_collector.h
@@ -0,0 +1,227 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#ifndef _KB_COLLECTOR_H
+#define _KB_COLLECTOR_H
+
+#include "defs.h"
+#include "params.h"
+#include "kmer.h"
+#include "queues.h"
+#include "radix.h"
+#include "rev_byte.h"
+#include <string>
+#include <algorithm>
+#include <numeric>
+#include <array>
+#include <vector>
+#include <stdio.h>
+
+using namespace std;
+
+
+
+//----------------------------------------------------------------------------------
+// Class collecting kmers belonging to a single bin
+class CKmerBinCollector
+{
+ enum comparision_state { kmer_smaller, rev_smaller, equals };
+ uint32 bin_no;
+ CBinPartQueue *bin_part_queue;
+ CBinDesc *bd;
+ uint32 kmer_len;
+ uchar* buffer;
+ uint32 buffer_size;
+ uint32 buffer_pos;
+ CMemoryPool *pmm_bins;
+ uint32 n_recs;
+ uint32 n_plus_x_recs;
+ uint32 n_super_kmers;
+ int lowest_quality;
+ uint32 max_x;
+
+ uint32 kmer_bytes;
+ bool both_strands;
+
+ template<unsigned DIVIDE_FACTOR> void update_n_plus_x_recs(char* seq, uint32 n);
+
+public:
+ CKmerBinCollector(CKMCQueues& Queues, CKMCParams& Params, uint32 _buffer_size, uint32 _bin_no);
+ void PutExtendedKmer(char* seq, uint32 n);
+ void PutExtendedKmer(char* seq, char* quals, uint32 n);//for quake mode
+ inline void Flush();
+};
+
+//----------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------
+//----------------------------------------------------------------------------------
+
+CKmerBinCollector::CKmerBinCollector(CKMCQueues& Queues, CKMCParams& Params, uint32 _buffer_size, uint32 _bin_no)
+{
+ bin_part_queue = Queues.bpq;
+ kmer_len = Params.kmer_len;
+ bd = Queues.bd;
+ buffer_size = _buffer_size;
+ pmm_bins = Queues.pmm_bins;
+ lowest_quality = Params.lowest_quality;
+ max_x = Params.max_x;
+
+ bin_no = _bin_no;
+ n_recs = 0;
+ n_super_kmers = 0;
+ n_plus_x_recs = 0;
+ buffer_pos = 0;
+ pmm_bins->reserve(buffer);
+
+ both_strands = Params.both_strands;
+ kmer_bytes = (kmer_len + 3) / 4;
+}
+//---------------------------------------------------------------------------------
+void CKmerBinCollector::PutExtendedKmer(char* seq, uint32 n)
+{
+ uint32 bytes = 1 + (n + 3) / 4;
+ if(buffer_pos + bytes > buffer_size)
+ {
+ //send current buff
+ Flush();
+
+ pmm_bins->reserve(buffer);
+ buffer_pos = 0;
+ n_recs = 0;
+ n_super_kmers = 0;
+ n_plus_x_recs = 0;
+ }
+
+
+ buffer[buffer_pos++] = n - kmer_len;
+ for(uint32 i = 0, j = 0 ; i < n / 4 ; ++i,j+=4)
+ buffer[buffer_pos++] = (seq[j] << 6) + (seq[j + 1] << 4) + (seq[j + 2] << 2) + seq[j + 3];
+ switch (n%4)
+ {
+ case 1:
+ buffer[buffer_pos++] = (seq[n-1] << 6);
+ break;
+ case 2:
+ buffer[buffer_pos++] = (seq[n-2] << 6) + (seq[n-1] << 4);
+ break;
+ case 3:
+ buffer[buffer_pos++] = (seq[n-3] << 6) + (seq[n-2] << 4) + (seq[n-1] << 2);
+ break;
+ }
+
+ ++n_super_kmers;
+ n_recs += n - kmer_len + 1;
+ if (max_x) ///for max_x = 0 k-mers (not k+x-mers) will be sorted
+ {
+ if (!both_strands)
+ n_plus_x_recs += 1 + (n - kmer_len) / (max_x + 1);
+ else
+ {
+ switch (max_x)
+ {
+ case 1: update_n_plus_x_recs<2>(seq, n); break;
+ case 2: update_n_plus_x_recs<3>(seq, n); break;
+ case 3: update_n_plus_x_recs<4>(seq, n); break;
+ }
+
+ }
+ }
+}
+
+//---------------------------------------------------------------------------------
+template<unsigned DIVIDE_FACTOR> void CKmerBinCollector::update_n_plus_x_recs(char* seq, uint32 n)
+{
+ uchar kmer, rev;
+ uint32 kmer_pos = 4;
+ uint32 rev_pos = kmer_len;
+ uint32 x;
+
+ kmer = (seq[0] << 6) + (seq[1] << 4) + (seq[2] << 2) + seq[3];
+ rev = ((3 - seq[kmer_len - 1]) << 6) + ((3 - seq[kmer_len - 2]) << 4) + ((3 - seq[kmer_len - 3]) << 2) + (3 - seq[kmer_len - 4]);
+
+ x = 0;
+ comparision_state current_state, new_state;
+ if (kmer < rev)
+ current_state = kmer_smaller;
+ else if (rev < kmer)
+ current_state = rev_smaller;
+ else
+ current_state = equals;
+
+
+ for (uint32 i = 0; i < n - kmer_len; ++i)
+ {
+ rev >>= 2;
+ rev += (3 - seq[rev_pos++]) << 6;
+ kmer <<= 2;
+ kmer += seq[kmer_pos++];
+
+ if (kmer < rev)
+ new_state = kmer_smaller;
+ else if (rev < kmer)
+ new_state = rev_smaller;
+ else
+ new_state = equals;
+
+ if (new_state == current_state)
+ {
+ if (current_state == equals)
+ ++n_plus_x_recs;
+ else
+ ++x;
+ }
+ else
+ {
+ current_state = new_state;
+ n_plus_x_recs += 1 + x / DIVIDE_FACTOR;
+
+ x = 0;
+ }
+ }
+ n_plus_x_recs += 1 + x / DIVIDE_FACTOR;
+}
+
+//---------------------------------------------------------------------------------
+void CKmerBinCollector::PutExtendedKmer(char* seq, char* quals, uint32 n)
+{
+ uint32 bytes = n + 1;
+ if (buffer_pos + bytes > buffer_size)
+ {
+ Flush();
+
+ pmm_bins->reserve(buffer);
+ buffer_pos = 0;
+ n_recs = 0;
+ n_super_kmers = 0;
+ }
+
+ n_recs += n - kmer_len + 1;
+ ++n_super_kmers;
+ buffer[buffer_pos++] = n - kmer_len;
+ char qual;
+ for (uint32 i = 0; i < n; ++i)
+ {
+ qual = quals[i] - lowest_quality;
+ if (qual > 63)
+ qual = 63;
+ buffer[buffer_pos++] = (seq[i] << 6) + qual;
+ }
+}
+
+//---------------------------------------------------------------------------------
+void CKmerBinCollector::Flush()
+{
+ bin_part_queue->push(bin_no, buffer, buffer_pos, buffer_size);
+ bd->insert(bin_no, NULL, "", buffer_pos, n_recs, n_plus_x_recs, n_super_kmers);
+}
+
+#endif
+
+// ***** EOF
diff --git a/kmer_counter/kb_completer.cpp b/kmer_counter/kb_completer.cpp
new file mode 100755
index 0000000..c9e38ff
--- /dev/null
+++ b/kmer_counter/kb_completer.cpp
@@ -0,0 +1,251 @@
+#include "stdafx.h"
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+#include <algorithm>
+#include <numeric>
+#include <iostream>
+#include "kb_completer.h"
+
+using namespace std;
+
+extern uint64 total_reads;
+
+
+
+//************************************************************************************************************
+// CKmerBinCompleter
+//************************************************************************************************************
+
+//----------------------------------------------------------------------------------
+// Assign queues and monitors
+CKmerBinCompleter::CKmerBinCompleter(CKMCParams &Params, CKMCQueues &Queues)
+{
+ mm = Queues.mm;
+ file_name = Params.output_file_name;
+ kq = Queues.kq;
+ bd = Queues.bd;
+ s_mapper = Queues.s_mapper;
+ memory_bins = Queues.memory_bins;
+
+ kmer_file_name = file_name + ".kmc_suf";
+ lut_file_name = file_name + ".kmc_pre";
+
+ kmer_len = Params.kmer_len;
+ signature_len = Params.signature_len;
+
+ cutoff_min = Params.cutoff_min;
+ cutoff_max = Params.cutoff_max;
+ counter_max = Params.counter_max;
+ lut_prefix_len = Params.lut_prefix_len;
+
+ kmer_t_size = Params.KMER_T_size;
+
+ use_quake = Params.use_quake;
+}
+
+//----------------------------------------------------------------------------------
+CKmerBinCompleter::~CKmerBinCompleter()
+{
+}
+
+//----------------------------------------------------------------------------------
+// Store sorted and compacted bins to the output file
+void CKmerBinCompleter::ProcessBins()
+{
+ int32 bin_id;
+ uchar *data = NULL;
+ uint64 data_size = 0;
+ uchar *lut = NULL;
+ uint64 lut_size = 0;
+ uint64 counter_size = 0;
+ uint32 sig_map_size = (1 << (signature_len * 2)) + 1;
+ uint32 *sig_map = new uint32[sig_map_size];
+ fill_n(sig_map, sig_map_size, 0);
+ uint32 lut_pos = 0;
+ if(use_quake)
+ counter_size = 4;
+ else
+ counter_size = min(BYTE_LOG(cutoff_max), BYTE_LOG(counter_max));
+
+ // Open output file
+ FILE *out_kmer = fopen(kmer_file_name.c_str(), "wb");
+ if(!out_kmer)
+ {
+ cout << "Error: Cannot create " << kmer_file_name << "\n";
+ exit(1);
+ return;
+ }
+
+ FILE *out_lut = fopen(lut_file_name.c_str(), "wb");
+ if(!out_lut)
+ {
+ cout << "Error: Cannot create " << lut_file_name << "\n";
+ fclose(out_kmer);
+ exit(1);
+ return;
+ }
+
+ uint64 _n_unique, _n_cutoff_min, _n_cutoff_max, _n_total;
+ uint64 n_recs = 0;
+
+ _n_unique = _n_cutoff_min = _n_cutoff_max = _n_total = 0;
+ n_unique = n_cutoff_min = n_cutoff_max = n_total = 0;
+
+ char s_kmc_pre[] = "KMCP";
+ char s_kmc_suf[] = "KMCS";
+
+ // Markers at the beginning
+ fwrite(s_kmc_pre, 1, 4, out_lut);
+ fwrite(s_kmc_suf, 1, 4, out_kmer);
+
+ // Process priority queue of ready-to-output bins
+ while(!kq->empty())
+ {
+ // Get the next bin
+ if (!kq->pop(bin_id, data, data_size, lut, lut_size, _n_unique, _n_cutoff_min, _n_cutoff_max, _n_total))
+ continue;
+
+ // Decrease memory size allocated by stored bin
+ string name;
+ uint64 n_rec;
+ uint64 n_plus_x_recs;
+ uint64 n_super_kmers;
+ uint64 raw_size;
+ CMemDiskFile *file;
+
+ bd->read(bin_id, file, name, raw_size, n_rec, n_plus_x_recs, n_super_kmers);
+
+ uint64 lut_recs = lut_size / sizeof(uint64);
+
+ // Write bin data to the output file
+ fwrite(data, 1, data_size, out_kmer);
+ memory_bins->free(bin_id, CMemoryBins::mba_suffix);
+
+ uint64 *ulut = (uint64*) lut;
+ for(uint64 i = 0; i < lut_recs; ++i)
+ {
+ uint64 x = ulut[i];
+ ulut[i] = n_recs;
+ n_recs += x;
+ }
+ fwrite(lut, lut_recs, sizeof(uint64), out_lut);
+ //fwrite(&n_rec, 1, sizeof(uint64), out_lut);
+ memory_bins->free(bin_id, CMemoryBins::mba_lut);
+
+ n_unique += _n_unique;
+ n_cutoff_min += _n_cutoff_min;
+ n_cutoff_max += _n_cutoff_max;
+ n_total += _n_total;
+ for (uint32 i = 0; i < sig_map_size; ++i)
+ {
+ if (s_mapper->get_bin_id(i) == bin_id)
+ {
+ sig_map[i] = lut_pos;
+ }
+ }
+ ++lut_pos;
+ }
+
+ // Marker at the end
+ fwrite(s_kmc_suf, 1, 4, out_kmer);
+ fclose(out_kmer);
+
+ fwrite(&n_recs, 1, sizeof(uint64), out_lut);
+
+ //store signature mapping
+ fwrite(sig_map, sizeof(uint32), sig_map_size, out_lut);
+
+ // Store header
+ uint32 offset = 0;
+
+ store_uint(out_lut, kmer_len, 4); offset += 4;
+ store_uint(out_lut, (uint32) use_quake, 4); offset += 4; // mode: 0 (counting), 1 (Quake-compatibile counting)
+ store_uint(out_lut, counter_size, 4); offset += 4;
+ store_uint(out_lut, lut_prefix_len, 4); offset += 4;
+ store_uint(out_lut, signature_len, 4); offset += 4;
+ store_uint(out_lut, cutoff_min, 4); offset += 4;
+ store_uint(out_lut, cutoff_max, 4); offset += 4;
+ store_uint(out_lut, n_unique - n_cutoff_min - n_cutoff_max, 8); offset += 8;
+
+ // Space for future use
+ for(int32 i = 0; i < 7; ++i)
+ {
+ store_uint(out_lut, 0, 4);
+ offset += 4;
+ }
+
+ store_uint(out_lut, 0x200, 4);
+ offset += 4;
+
+ store_uint(out_lut, offset, 4);
+
+ // Marker at the end
+ fwrite(s_kmc_pre, 1, 4, out_lut);
+ fclose(out_lut);
+ cout << "\n";
+
+ delete[] sig_map;
+}
+
+//----------------------------------------------------------------------------------
+// Return statistics
+void CKmerBinCompleter::GetTotal(uint64 &_n_unique, uint64 &_n_cutoff_min, uint64 &_n_cutoff_max, uint64 &_n_total)
+{
+ _n_unique = n_unique;
+ _n_cutoff_min = n_cutoff_min;
+ _n_cutoff_max = n_cutoff_max;
+ _n_total = n_total;
+}
+
+//----------------------------------------------------------------------------------
+// Store single unsigned integer in LSB fashion
+bool CKmerBinCompleter::store_uint(FILE *out, uint64 x, uint32 size)
+{
+ for(uint32 i = 0; i < size; ++i)
+ putc((x >> (i * 8)) & 0xFF, out);
+
+ return true;
+}
+
+
+//************************************************************************************************************
+// CWKmerBinCompleter
+//************************************************************************************************************
+
+//----------------------------------------------------------------------------------
+// Constructor
+CWKmerBinCompleter::CWKmerBinCompleter(CKMCParams &Params, CKMCQueues &Queues)
+{
+ kbc = new CKmerBinCompleter(Params, Queues);
+}
+
+//----------------------------------------------------------------------------------
+// Destructor
+CWKmerBinCompleter::~CWKmerBinCompleter()
+{
+ delete kbc;
+}
+
+//----------------------------------------------------------------------------------
+// Execution
+void CWKmerBinCompleter::operator()()
+{
+ kbc->ProcessBins();
+}
+
+//----------------------------------------------------------------------------------
+// Return statistics
+void CWKmerBinCompleter::GetTotal(uint64 &_n_unique, uint64 &_n_cutoff_min, uint64 &_n_cutoff_max, uint64 &_n_total)
+{
+ if(kbc)
+ kbc->GetTotal(_n_unique, _n_cutoff_min, _n_cutoff_max, _n_total);
+}
+
+// ***** EOF
diff --git a/kmer_counter/kb_completer.h b/kmer_counter/kb_completer.h
new file mode 100755
index 0000000..12770db
--- /dev/null
+++ b/kmer_counter/kb_completer.h
@@ -0,0 +1,72 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+#ifndef _KB_COMPLETER_H
+#define _KB_COMPLETER_H
+
+#include "defs.h"
+#include "params.h"
+#include "kmer.h"
+#include "radix.h"
+#include <string>
+#include <algorithm>
+#include <numeric>
+#include <array>
+#include <stdio.h>
+
+
+//************************************************************************************************************
+// CKmerBinCompleter - complete the sorted bins and store in a file
+//************************************************************************************************************
+class CKmerBinCompleter {
+ CMemoryMonitor *mm;
+ string file_name, kmer_file_name, lut_file_name;
+ CKmerQueue *kq;
+ CBinDesc *bd;
+ CSignatureMapper *s_mapper;
+
+ CMemoryBins *memory_bins;
+ uint32 lut_prefix_len;
+ uint64 n_unique, n_cutoff_min, n_cutoff_max, n_total;
+ uint32 kmer_t_size;
+ int32 cutoff_min, cutoff_max;
+ int32 counter_max;
+ int32 kmer_len;
+ int32 signature_len;
+ bool use_quake;
+
+ bool store_uint(FILE *out, uint64 x, uint32 size);
+
+public:
+ CKmerBinCompleter(CKMCParams &Params, CKMCQueues &Queues);
+ ~CKmerBinCompleter();
+
+ void ProcessBins();
+ void GetTotal(uint64 &_n_unique, uint64 &_n_cutoff_min, uint64 &_n_cutoff_max, uint64 &_n_total);
+};
+
+
+//************************************************************************************************************
+// CWKmerBinCompleter - wrapper for multithreading purposes
+//************************************************************************************************************
+class CWKmerBinCompleter {
+ CKmerBinCompleter *kbc;
+
+public:
+ CWKmerBinCompleter(CKMCParams &Params, CKMCQueues &Queues);
+ ~CWKmerBinCompleter();
+
+ void operator()();
+
+ void GetTotal(uint64 &_n_unique, uint64 &_n_cutoff_min, uint64 &_n_cutoff_max, uint64 &_n_total);
+};
+
+#endif
+
+// ***** EOF
diff --git a/kmer_counter/kb_reader.h b/kmer_counter/kb_reader.h
new file mode 100755
index 0000000..8afee21
--- /dev/null
+++ b/kmer_counter/kb_reader.h
@@ -0,0 +1,228 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#ifndef _KB_READER_H
+#define _KB_READER_H
+
+#include "defs.h"
+#include "params.h"
+#include "kmer.h"
+#include "s_mapper.h"
+#include "radix.h"
+#include <string>
+#include <algorithm>
+#include <numeric>
+#include <array>
+#include <vector>
+#include <stdio.h>
+#include <boost/filesystem.hpp>
+
+
+//************************************************************************************************************
+// CKmerBinReader - reader of bins from distribution phase
+//************************************************************************************************************
+template <typename KMER_T, unsigned SIZE> class CKmerBinReader {
+ CMemoryMonitor *mm;
+ CSignatureMapper* s_mapper;
+
+ CBinDesc *bd;
+ CBinQueue *bq;
+
+ CMemoryBins *memory_bins;
+
+ int32 cutoff_min, cutoff_max;
+ int32 counter_max;
+ int32 kmer_len;
+ int32 lut_prefix_len;
+ uint32 max_x;
+
+ bool both_strands;
+ bool use_quake;
+
+ int64 round_up_to_alignment(int64 x)
+ {
+ return (x + ALIGNMENT-1) / ALIGNMENT * ALIGNMENT;
+ }
+
+public:
+ CKmerBinReader(CKMCParams &Params, CKMCQueues &Queues);
+ ~CKmerBinReader();
+
+ void ProcessBins();
+};
+
+
+//----------------------------------------------------------------------------------
+// Assign monitors and queues
+template <typename KMER_T, unsigned SIZE> CKmerBinReader<KMER_T, SIZE>::CKmerBinReader(CKMCParams &Params, CKMCQueues &Queues)
+{
+ mm = Queues.mm;
+// dm = Queues.dm;
+ bd = Queues.bd;
+ bq = Queues.bq;
+
+
+
+ memory_bins = Queues.memory_bins;
+
+ kmer_len = Params.kmer_len;
+ cutoff_min = Params.cutoff_min;
+ cutoff_max = Params.cutoff_max;
+ counter_max = Params.counter_max;
+ both_strands = Params.both_strands;
+ use_quake = Params.use_quake;
+ max_x = Params.max_x;
+ s_mapper = Queues.s_mapper;
+ lut_prefix_len = Params.lut_prefix_len;
+}
+
+//----------------------------------------------------------------------------------
+template <typename KMER_T, unsigned SIZE> CKmerBinReader<KMER_T, SIZE>::~CKmerBinReader()
+{
+}
+
+//----------------------------------------------------------------------------------
+// Read all bins from temporary HDD
+template <typename KMER_T, unsigned SIZE> void CKmerBinReader<KMER_T, SIZE>::ProcessBins()
+{
+ uchar *data;
+ uint64 readed;
+
+ int32 bin_id;
+ CMemDiskFile *file;
+ string name;
+ uint64 size;
+ uint64 n_rec;
+ uint64 n_plus_x_recs;
+ uint32 buffer_size;
+ uint32 kmer_len;
+
+ bd->init_random();
+ while((bin_id = bd->get_next_random_bin()) >= 0) // Get id of the next bin to read
+ {
+ bd->read(bin_id, file, name, size, n_rec, n_plus_x_recs, buffer_size, kmer_len);
+#ifdef DEBUG_MODE
+ cout << bin_id << ": " << name << " " << c_disk << " " << size << " " << n_rec << "\n";
+#else
+ cout << "*";
+#endif
+ fflush(stdout);
+
+
+ // Reserve memory necessary to process the current bin at all next stages
+ uint64 input_kmer_size;
+ uint32 kxmer_counter_size;
+ uint32 kxmer_symbols;
+ if (max_x && !use_quake)
+ {
+ input_kmer_size = n_plus_x_recs * sizeof(KMER_T);
+ kxmer_counter_size = n_plus_x_recs * sizeof(uint32);
+ kxmer_symbols = kmer_len + max_x + 1;
+ }
+ else
+ {
+ input_kmer_size = n_rec * sizeof(KMER_T);
+ kxmer_counter_size = 0;
+ kxmer_symbols = kmer_len;
+ }
+ uint64 max_out_recs = (n_rec+1) / max(cutoff_min, 1);
+
+ uint64 counter_size = min(BYTE_LOG(cutoff_max), BYTE_LOG(counter_max));
+ if(KMER_T::QUALITY_SIZE > counter_size)
+ counter_size = KMER_T::QUALITY_SIZE;
+
+ uint32 kmer_symbols = kmer_len - lut_prefix_len;
+ uint64 kmer_bytes = kmer_symbols / 4;
+ uint64 out_buffer_size = max_out_recs * (kmer_bytes + counter_size);
+
+ uint32 rec_len = (kxmer_symbols + 3) / 4;
+
+ uint64 lut_recs = 1 << (2 * lut_prefix_len);
+ uint64 lut_size = lut_recs * sizeof(uint64);
+
+ memory_bins->init(bin_id, rec_len, round_up_to_alignment(size), round_up_to_alignment(input_kmer_size), round_up_to_alignment(out_buffer_size), round_up_to_alignment(kxmer_counter_size), round_up_to_alignment(lut_size));
+
+ // Process the bin if it is not empty
+ if(size > 0)
+ {
+ if (file == NULL)
+ {
+ cout << "Error: Cannot open temporary file: " << name << "\n"; fflush(stdout);
+ exit(1);
+ }
+ else
+ file->Rewind();
+
+ memory_bins->reserve(bin_id, data, CMemoryBins::mba_input_file);
+ //readed = fread(data, 1, size, file);
+ readed = file->Read(data, 1, size);
+ if(readed != size)
+ {
+ cout << "Error: Corrupted file: " << name << " " << "Real size : " << readed << " " << "Should be : " << size << "\n";
+ fflush(stdout);
+ exit(1);
+ }
+
+ // Push bin data to a queue of bins to process
+ bq->push(bin_id, data, size, n_rec);
+ }
+ else
+ // Push empty bin to process (necessary, since all bin ids must be processed)
+ bq->push(bin_id, NULL, 0, 0);
+
+ file->Close();
+ // Unlock HDD related to the current bin
+// dm->unblock(c_disk);
+ }
+ bq->mark_completed();
+
+ fflush(stdout);
+}
+
+
+//************************************************************************************************************
+// CWKmerBinReader - wrapper for multithreading purposes
+//************************************************************************************************************
+
+//----------------------------------------------------------------------------------
+template <typename KMER_T, unsigned SIZE> class CWKmerBinReader {
+ CKmerBinReader<KMER_T, SIZE> *kbr;
+
+public:
+ CWKmerBinReader(CKMCParams &Params, CKMCQueues &Queues);
+ ~CWKmerBinReader();
+
+ void operator()();
+};
+
+//----------------------------------------------------------------------------------
+// Constructor
+template <typename KMER_T, unsigned SIZE> CWKmerBinReader<KMER_T, SIZE>::CWKmerBinReader(CKMCParams &Params, CKMCQueues &Queues)
+{
+ kbr = new CKmerBinReader<KMER_T, SIZE>(Params, Queues);
+}
+
+//----------------------------------------------------------------------------------
+// Destructor
+template <typename KMER_T, unsigned SIZE> CWKmerBinReader<KMER_T, SIZE>::~CWKmerBinReader()
+{
+ delete kbr;
+}
+
+//----------------------------------------------------------------------------------
+// Execution
+template <typename KMER_T, unsigned SIZE> void CWKmerBinReader<KMER_T, SIZE>::operator()()
+{
+ kbr->ProcessBins();
+}
+
+#endif
+
+// ***** EOF
diff --git a/kmer_counter/kb_sorter.h b/kmer_counter/kb_sorter.h
new file mode 100755
index 0000000..a3df2b0
--- /dev/null
+++ b/kmer_counter/kb_sorter.h
@@ -0,0 +1,1362 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#ifndef _KB_SORTER_H
+#define _KB_SORTER_H
+
+#define DEBUGG_INFO
+
+#include "defs.h"
+#include "params.h"
+#include "kmer.h"
+#include "radix.h"
+#include "s_mapper.h"
+#include <string>
+#include <algorithm>
+#include <numeric>
+#include <array>
+#include <vector>
+#include <stdio.h>
+
+#include "kxmer_set.h"
+#include "rev_byte.h"
+
+template<unsigned SIZE> class CKxmerExpander;
+//************************************************************************************************************
+template <typename KMER_T, unsigned SIZE> class CKmerBinSorter_Impl;
+
+//************************************************************************************************************
+// CKmerBinSorter - sorting of k-mers in a bin
+//************************************************************************************************************
+template <typename KMER_T, unsigned SIZE> class CKmerBinSorter {
+ mutable mutex expander_mtx;
+ uint32 input_pos;
+ CMemoryMonitor *mm;
+ CBinDesc *bd;
+ CBinQueue *bq;
+ CKmerQueue *kq;
+ CMemoryPool *pmm_prob, *pmm_radix_buf, *pmm_expand;
+ CMemoryBins *memory_bins;
+
+ CKXmerSet<KMER_T, SIZE> kxmer_set;
+
+ int32 n_bins;
+ int32 bin_id;
+
+ uchar *data;
+ uint64 size;
+ uint64 n_rec;
+ uint64 n_plus_x_recs;
+ string desc;
+ uint32 buffer_size;
+ uint32 kmer_len;
+ uint32 max_x;
+
+ //KMC_2 : usunac te zmienne potem
+ uint64 sum_n_rec, sum_n_plus_x_rec;
+
+ //
+ int n_omp_threads;
+
+ bool both_strands;
+ bool use_quake;
+ CSignatureMapper* s_mapper;
+
+ uint64 n_unique, n_cutoff_min, n_cutoff_max, n_total;
+ int32 cutoff_min, cutoff_max;
+ int32 lut_prefix_len;
+ int32 counter_max;
+
+ KMER_T *buffer_input, *buffer_tmp, *buffer;
+ uint32 *kxmer_counters;
+
+ //void Expand(uint64 tmp_size);
+ void Sort();
+
+ friend class CKmerBinSorter_Impl<KMER_T, SIZE>;
+ friend class CKxmerExpander<SIZE>;
+
+public:
+ static uint32 PROB_BUF_SIZE;
+ CKmerBinSorter(CKMCParams &Params, CKMCQueues &Queues, int thread_no);
+ ~CKmerBinSorter();
+
+
+ void GetDebugStats(uint64& _sum_n_recs, uint64& _sum_n_plus_x_recs)
+ {
+ _sum_n_recs = sum_n_rec;
+ _sum_n_plus_x_recs = sum_n_plus_x_rec;
+ }
+
+ void ProcessBins();
+};
+
+template <typename KMER_T, unsigned SIZE> uint32 CKmerBinSorter<KMER_T, SIZE>::PROB_BUF_SIZE = 1 << 14;
+
+
+//************************************************************************************************************
+// CKmerBinSorter_Impl - implementation of k-mer type- and size-dependent functions
+//************************************************************************************************************
+template <typename KMER_T, unsigned SIZE> class CKmerBinSorter_Impl {
+public:
+ static void Compact(CKmerBinSorter<KMER_T, SIZE> &ptr);
+ static void Expand(CKmerBinSorter<KMER_T, SIZE> &ptr, uint64 tmp_size);
+ static void ComapctKXmers(CKmerBinSorter<KMER_T, SIZE> &ptr, uint64& compacted_count);
+};
+
+template <unsigned SIZE> class CKmerBinSorter_Impl<CKmer<SIZE>, SIZE> {
+ static uint64 FindFirstSymbOccur(CKmerBinSorter<CKmer<SIZE>, SIZE> &ptr, uint64 start_pos, uint64 end_pos, uint32 offset, uchar symb);
+ static void InitKXMerSet(CKmerBinSorter<CKmer<SIZE>, SIZE> &ptr, uint64 start_pos, uint64 end_pos, uint32 offset, uint32 depth);
+ static void CompactKxmers(CKmerBinSorter<CKmer<SIZE>, SIZE> &ptr);
+ static void PreCompactKxmers(CKmerBinSorter<CKmer<SIZE>, SIZE> &ptr, uint64& compacted_count);
+ static void CompactKmers(CKmerBinSorter<CKmer<SIZE>, SIZE> &ptr);
+ static void ExpandKxmersAll(CKmerBinSorter<CKmer<SIZE>, SIZE>& ptr, uint64 tmp_size);
+ static void ExpandKxmersBoth(CKmerBinSorter<CKmer<SIZE>, SIZE>& ptr, uint64 tmp_size);
+ static void ExpandKmersAll(CKmerBinSorter<CKmer<SIZE>, SIZE>& ptr, uint64 tmp_size);
+ static void ExpandKmersBoth(CKmerBinSorter<CKmer<SIZE>, SIZE>& ptr, uint64 tmp_size);
+ static void GetNextSymb(uchar& symb, uchar& byte_shift, uint64& pos, uchar* data_p);
+ static void FromChildThread(CKmerBinSorter<CKmer<SIZE>, SIZE>& ptr, CKmer<SIZE>* thread_buffer, uint64 size);
+ static void ExpandKxmerBothParaller(CKmerBinSorter<CKmer<SIZE>, SIZE>& ptr, uint64 start_pos, uint64 end_pos);
+ friend class CKxmerExpander<SIZE>;
+public:
+ static void Compact(CKmerBinSorter<CKmer<SIZE>, SIZE> &ptr);
+ static void Expand(CKmerBinSorter<CKmer<SIZE>, SIZE> &ptr, uint64 tmp_size);
+};
+
+template <unsigned SIZE> class CKmerBinSorter_Impl<CKmerQuake<SIZE>, SIZE> {
+ static double prob_qual[94];
+ static double inv_prob_qual[94];
+ static double MIN_PROB_QUAL_VALUE;
+public:
+ static void Compact(CKmerBinSorter<CKmerQuake<SIZE>, SIZE> &ptr);
+ static void Expand(CKmerBinSorter<CKmerQuake<SIZE>, SIZE> &ptr, uint64 tmp_size);
+};
+
+// K-mers with probability less than MIN_PROB_QUAL_VALUE will not be counted
+template <unsigned SIZE> double CKmerBinSorter_Impl<CKmerQuake<SIZE>, SIZE>::MIN_PROB_QUAL_VALUE = 0.0000;
+
+
+template <unsigned SIZE> double CKmerBinSorter_Impl<CKmerQuake<SIZE>, SIZE>::prob_qual[94] = {
+ 0.2500000000000000, 0.2500000000000000, 0.3690426555198070, 0.4988127663727280,
+ 0.6018928294465030, 0.6837722339831620, 0.7488113568490420, 0.8004737685031120,
+ 0.8415106807538890, 0.8741074588205830, 0.9000000000000000, 0.9205671765275720,
+ 0.9369042655519810, 0.9498812766372730, 0.9601892829446500, 0.9683772233983160,
+ 0.9748811356849040, 0.9800473768503110, 0.9841510680753890, 0.9874107458820580,
+ 0.9900000000000000, 0.9920567176527570, 0.9936904265551980, 0.9949881276637270,
+ 0.9960189282944650, 0.9968377223398320, 0.9974881135684900, 0.9980047376850310,
+ 0.9984151068075390, 0.9987410745882060, 0.9990000000000000, 0.9992056717652760,
+ 0.9993690426555200, 0.9994988127663730, 0.9996018928294460, 0.9996837722339830,
+ 0.9997488113568490, 0.9998004737685030, 0.9998415106807540, 0.9998741074588210,
+ 0.9999000000000000, 0.9999205671765280, 0.9999369042655520, 0.9999498812766370,
+ 0.9999601892829450, 0.9999683772233980, 0.9999748811356850, 0.9999800473768500,
+ 0.9999841510680750, 0.9999874107458820, 0.9999900000000000, 0.9999920567176530,
+ 0.9999936904265550, 0.9999949881276640, 0.9999960189282940, 0.9999968377223400,
+ 0.9999974881135680, 0.9999980047376850, 0.9999984151068080, 0.9999987410745880,
+ 0.9999990000000000, 0.9999992056717650, 0.9999993690426560, 0.9999994988127660,
+ 0.9999996018928290, 0.9999996837722340, 0.9999997488113570, 0.9999998004737680,
+ 0.9999998415106810, 0.9999998741074590, 0.9999999000000000, 0.9999999205671770,
+ 0.9999999369042660, 0.9999999498812770, 0.9999999601892830, 0.9999999683772230,
+ 0.9999999748811360, 0.9999999800473770, 0.9999999841510680, 0.9999999874107460,
+ 0.9999999900000000, 0.9999999920567180, 0.9999999936904270, 0.9999999949881280,
+ 0.9999999960189280, 0.9999999968377220, 0.9999999974881140, 0.9999999980047380,
+ 0.9999999984151070, 0.9999999987410750, 0.9999999990000000, 0.9999999992056720,
+ 0.9999999993690430, 0.9999999994988130 };
+
+template <unsigned SIZE> double CKmerBinSorter_Impl<CKmerQuake<SIZE>, SIZE>::inv_prob_qual[94] = {
+ 4.0000000000000000, 4.0000000000000000, 2.7097138638119600, 2.0047602375372500,
+ 1.6614253419825500, 1.4624752955742600, 1.3354498310601800, 1.2492601748462100,
+ 1.1883390465158700, 1.1440241012807300, 1.1111111111111100, 1.0862868300084900,
+ 1.0673449110735400, 1.0527631448218000, 1.0414613220148200, 1.0326554320337200,
+ 1.0257660789563300, 1.0203588353185700, 1.0161041657513100, 1.0127497641386300,
+ 1.0101010101010100, 1.0080068832818700, 1.0063496369454600, 1.0050371177272600,
+ 1.0039969839853900, 1.0031723093832600, 1.0025182118938000, 1.0019992513458400,
+ 1.0015874090662800, 1.0012605123027600, 1.0010010010010000, 1.0007949596936500,
+ 1.0006313557030000, 1.0005014385482300, 1.0003982657229900, 1.0003163277976500,
+ 1.0002512517547400, 1.0001995660501600, 1.0001585144420900, 1.0001259083921100,
+ 1.0001000100010000, 1.0000794391335500, 1.0000630997157700, 1.0000501212353700,
+ 1.0000398123020100, 1.0000316237766300, 1.0000251194952900, 1.0000199530212600,
+ 1.0000158491831200, 1.0000125894126100, 1.0000100001000000, 1.0000079433454400,
+ 1.0000063096132600, 1.0000050118974600, 1.0000039810875500, 1.0000031622876600,
+ 1.0000025118927400, 1.0000019952663000, 1.0000015848957000, 1.0000012589270000,
+ 1.0000010000010000, 1.0000007943288700, 1.0000006309577400, 1.0000005011874800,
+ 1.0000003981073300, 1.0000003162278700, 1.0000002511887100, 1.0000001995262700,
+ 1.0000001584893400, 1.0000001258925600, 1.0000001000000100, 1.0000000794328300,
+ 1.0000000630957400, 1.0000000501187300, 1.0000000398107200, 1.0000000316227800,
+ 1.0000000251188600, 1.0000000199526200, 1.0000000158489300, 1.0000000125892500,
+ 1.0000000100000000, 1.0000000079432800, 1.0000000063095700, 1.0000000050118700,
+ 1.0000000039810700, 1.0000000031622800, 1.0000000025118900, 1.0000000019952600,
+ 1.0000000015848900, 1.0000000012589300, 1.0000000010000000, 1.0000000007943300,
+ 1.0000000006309600, 1.0000000005011900 };
+
+
+//************************************************************************************************************
+// CKmerBinSorter
+//************************************************************************************************************
+
+//----------------------------------------------------------------------------------
+// Assign queues and monitors
+template <typename KMER_T, unsigned SIZE> CKmerBinSorter<KMER_T, SIZE>::CKmerBinSorter(CKMCParams &Params, CKMCQueues &Queues, int thread_no) : kxmer_set(Params.kmer_len)
+{
+ both_strands = Params.both_strands;
+ mm = Queues.mm;
+ n_bins = Params.n_bins;
+ bd = Queues.bd;
+ bq = Queues.bq;
+ kq = Queues.kq;
+
+
+ s_mapper = Queues.s_mapper;
+
+ pmm_radix_buf = Queues.pmm_radix_buf;
+ pmm_prob = Queues.pmm_prob;
+ pmm_expand = Queues.pmm_expand;
+
+ memory_bins = Queues.memory_bins;
+
+ cutoff_min = Params.cutoff_min;
+ cutoff_max = Params.cutoff_max;
+ counter_max = Params.counter_max;
+ max_x = Params.max_x;
+ use_quake = Params.use_quake;
+
+ lut_prefix_len = Params.lut_prefix_len;
+
+ n_omp_threads = Params.n_omp_threads[thread_no];
+
+ sum_n_rec = sum_n_plus_x_rec = 0;
+}
+
+//----------------------------------------------------------------------------------
+template <typename KMER_T, unsigned SIZE> CKmerBinSorter<KMER_T, SIZE>::~CKmerBinSorter()
+{
+
+}
+
+//----------------------------------------------------------------------------------
+// Process the bins
+template <typename KMER_T, unsigned SIZE> void CKmerBinSorter<KMER_T, SIZE>::ProcessBins()
+{
+ uint64 tmp_size;
+ uint64 tmp_n_rec;
+ CMemDiskFile *file;
+
+ SetMemcpyCacheLimit(8);
+
+ // Process bins
+ while (!bq->completed())
+ {
+ // Gat bin data description to sort
+ if (!bq->pop(bin_id, data, size, n_rec))
+ {
+ continue;
+ }
+
+ // Get bin data
+ bd->read(bin_id, file, desc, tmp_size, tmp_n_rec, n_plus_x_recs, buffer_size, kmer_len);
+
+
+ // Uncompact the kmers - append truncate prefixes
+ //Expand(tmp_size);
+ CKmerBinSorter_Impl<KMER_T, SIZE>::Expand(*this, tmp_size);
+ memory_bins->free(bin_id, CMemoryBins::mba_input_file);
+
+ // Perfor sorting of kmers in a bin
+ Sort();
+
+ // Compact the same kmers (occurring at neighbour positions now)
+ CKmerBinSorter_Impl<KMER_T, SIZE>::Compact(*this);
+ }
+
+ // Mark all the kmers are already processed
+ kq->mark_completed();
+}
+
+template <unsigned SIZE> inline void CKmerBinSorter_Impl<CKmer<SIZE>, SIZE>::GetNextSymb(uchar& symb, uchar& byte_shift, uint64& pos, uchar* data_p)
+{
+ symb = (data_p[pos] >> byte_shift) & 3;
+ if (byte_shift == 0)
+ {
+ ++pos;
+ byte_shift = 6;
+ }
+ else
+ byte_shift -= 2;
+}
+
+template <unsigned SIZE> void CKmerBinSorter_Impl<CKmer<SIZE>, SIZE>::ExpandKmersAll(CKmerBinSorter<CKmer<SIZE>, SIZE>& ptr, uint64 tmp_size)
+{
+ uint64 pos = 0;
+ ptr.input_pos = 0;
+ CKmer<SIZE> kmer;
+ kmer.clear();
+ uint32 kmer_bytes = (ptr.kmer_len + 3) / 4;
+
+ CKmer<SIZE> kmer_mask;
+ kmer_mask.set_n_1(ptr.kmer_len * 2);
+ uchar *data_p = ptr.data;
+ uchar additional_symbols;
+ uint32 kmer_shr = SIZE * 32 - ptr.kmer_len;
+ while (pos < tmp_size)
+ {
+ additional_symbols = data_p[pos++];
+ for (uint32 i = 0, kmer_pos = 8 * SIZE - 1, kmer_rev_pos = 0; i < kmer_bytes; ++i, --kmer_pos, ++kmer_rev_pos)
+ {
+ kmer.set_byte(kmer_pos, data_p[pos + i]);
+ }
+ pos += kmer_bytes;
+ uchar byte_shift = 6 - (ptr.kmer_len % 4) * 2;
+ if (byte_shift != 6)
+ --pos;
+
+ if (kmer_shr)
+ kmer.SHR(kmer_shr);
+
+ kmer.mask(kmer_mask);
+ ptr.buffer_input[ptr.input_pos++].set(kmer);
+ for (int i = 0; i < additional_symbols; ++i)
+ {
+ uchar symb = (data_p[pos] >> byte_shift) & 3;
+ if (byte_shift == 0)
+ {
+ ++pos;
+ byte_shift = 6;
+ }
+ else
+ byte_shift -= 2;
+ kmer.SHL_insert_2bits(symb);
+ kmer.mask(kmer_mask);
+ ptr.buffer_input[ptr.input_pos++].set(kmer);
+ }
+ if (byte_shift != 6)
+ ++pos;
+ }
+}
+template <unsigned SIZE> void CKmerBinSorter_Impl<CKmer<SIZE>, SIZE>::ExpandKmersBoth(CKmerBinSorter<CKmer<SIZE>, SIZE>& ptr, uint64 tmp_size)
+{
+ uint64 pos = 0;
+ CKmer<SIZE> kmer;
+ CKmer<SIZE> rev_kmer;
+ CKmer<SIZE> kmer_can;
+
+ uint32 kmer_bytes = (ptr.kmer_len + 3) / 4;
+ uint32 kmer_len_shift = (ptr.kmer_len - 1) * 2;
+ CKmer<SIZE> kmer_mask;
+ kmer_mask.set_n_1(ptr.kmer_len * 2);
+ uchar *data_p = ptr.data;
+ ptr.input_pos = 0;
+ uint32 kmer_shr = SIZE * 32 - ptr.kmer_len;
+
+ uchar additional_symbols;
+
+ uchar symb;
+ while (pos < tmp_size)
+ {
+ kmer.clear();
+ rev_kmer.clear();
+ additional_symbols = data_p[pos++];
+
+ //building kmer
+ for (uint32 i = 0, kmer_pos = 8 * SIZE - 1, kmer_rev_pos = 0; i < kmer_bytes; ++i, --kmer_pos, ++kmer_rev_pos)
+ {
+ kmer.set_byte(kmer_pos, data_p[pos + i]);
+ rev_kmer.set_byte(kmer_rev_pos, CRev_byte::lut[data_p[pos + i]]);
+ }
+ pos += kmer_bytes;
+ uchar byte_shift = 6 - (ptr.kmer_len % 4) * 2;
+ if (byte_shift != 6)
+ --pos;
+
+ if (kmer_shr)
+ kmer.SHR(kmer_shr);
+
+ kmer.mask(kmer_mask);
+ rev_kmer.mask(kmer_mask);
+
+ kmer_can = kmer < rev_kmer ? kmer : rev_kmer;
+ ptr.buffer_input[ptr.input_pos++].set(kmer_can);
+
+ for (int i = 0; i < additional_symbols; ++i)
+ {
+ symb = (data_p[pos] >> byte_shift) & 3;
+ if (byte_shift == 0)
+ {
+ ++pos;
+ byte_shift = 6;
+ }
+ else
+ byte_shift -= 2;
+ kmer.SHL_insert_2bits(symb);
+ kmer.mask(kmer_mask);
+ rev_kmer.SHR_insert_2bits(3 - symb, kmer_len_shift);
+ kmer_can = kmer < rev_kmer ? kmer : rev_kmer;
+ ptr.buffer_input[ptr.input_pos++].set(kmer_can);
+ }
+ if (byte_shift != 6)
+ ++pos;
+ }
+}
+
+template <unsigned SIZE> void CKmerBinSorter_Impl<CKmer<SIZE>, SIZE>::FromChildThread(CKmerBinSorter<CKmer<SIZE>, SIZE>& ptr, CKmer<SIZE>* thread_buffer, uint64 size)
+{
+ lock_guard<mutex> lcx(ptr.expander_mtx);
+ A_memcpy(ptr.buffer_input + ptr.input_pos, thread_buffer, size * sizeof(CKmer<SIZE>));
+ ptr.input_pos += size;
+}
+
+template<unsigned SIZE> void CKmerBinSorter_Impl<CKmer<SIZE>, SIZE>::ExpandKxmerBothParaller(CKmerBinSorter<CKmer<SIZE>, SIZE>& ptr, uint64 start_pos, uint64 end_pos)
+{
+ uchar* _raw_buffer;
+ ptr.pmm_expand->reserve(_raw_buffer);
+ CKmer<SIZE>* buffer = (CKmer<SIZE>*)_raw_buffer;
+
+ CKmer<SIZE> kmer, rev_kmer, kmer_mask;
+ CKmer<SIZE> kxmer_mask;
+ bool kmer_lower; //true if kmer is lower than its rev. comp
+ uint32 x, additional_symbols;
+ uchar symb;
+ uint32 kmer_bytes = (ptr.kmer_len + 3) / 4;
+ uint32 rev_shift = ptr.kmer_len * 2 - 2;
+ uchar *data_p = ptr.data;
+ kmer_mask.set_n_1(ptr.kmer_len * 2);
+ uint32 kmer_shr = SIZE * 32 - ptr.kmer_len;
+
+ kxmer_mask.set_n_1((ptr.kmer_len + ptr.max_x + 1) * 2);
+
+ uint64 buffer_pos = 0;
+ uint64 pos = start_pos;
+
+ while (pos < end_pos)
+ {
+ kmer.clear();
+ rev_kmer.clear();
+ additional_symbols = data_p[pos++];
+
+ //building kmer
+ for (uint32 i = 0, kmer_pos = 8 * SIZE - 1, kmer_rev_pos = 0; i < kmer_bytes; ++i, --kmer_pos, ++kmer_rev_pos)
+ {
+ kmer.set_byte(kmer_pos, data_p[pos + i]);
+ rev_kmer.set_byte(kmer_rev_pos, CRev_byte::lut[data_p[pos + i]]);
+ }
+ pos += kmer_bytes;
+ uchar byte_shift = 6 - (ptr.kmer_len % 4) * 2;
+ if (byte_shift != 6)
+ --pos;
+
+ if (kmer_shr)
+ kmer.SHR(kmer_shr);
+
+ kmer.mask(kmer_mask);
+ rev_kmer.mask(kmer_mask);
+
+ kmer_lower = kmer < rev_kmer;
+ x = 0;
+ if (kmer_lower)
+ buffer[buffer_pos].set(kmer);
+ else
+ buffer[buffer_pos].set(rev_kmer);
+
+ uint32 symbols_left = additional_symbols;
+ while (symbols_left)
+ {
+ GetNextSymb(symb, byte_shift, pos, data_p);
+ kmer.SHL_insert_2bits(symb);
+ kmer.mask(kmer_mask);
+ rev_kmer.SHR_insert_2bits(3 - symb, rev_shift);
+ --symbols_left;
+
+ if (kmer_lower)
+ {
+ if (kmer < rev_kmer)
+ {
+ buffer[buffer_pos].SHL_insert_2bits(symb);
+ ++x;
+ if (x == ptr.max_x)
+ {
+ if (!symbols_left)
+ break;
+
+ buffer[buffer_pos++].set_2bits(x, ptr.kmer_len * 2 + ptr.max_x * 2);
+ if (buffer_pos >= EXPAND_BUFFER_RECS)
+ {
+ FromChildThread(ptr, buffer, buffer_pos);
+ buffer_pos = 0;
+ }
+ x = 0;
+
+ GetNextSymb(symb, byte_shift, pos, data_p);
+ kmer.SHL_insert_2bits(symb);
+ kmer.mask(kmer_mask);
+ rev_kmer.SHR_insert_2bits(3 - symb, rev_shift);
+ --symbols_left;
+
+ kmer_lower = kmer < rev_kmer;
+
+ if (kmer_lower)
+ buffer[buffer_pos].set(kmer);
+ else
+ buffer[buffer_pos].set(rev_kmer);
+ }
+ }
+ else
+ {
+ buffer[buffer_pos++].set_2bits(x, ptr.kmer_len * 2 + ptr.max_x * 2);
+ if (buffer_pos >= EXPAND_BUFFER_RECS)
+ {
+ FromChildThread(ptr, buffer, buffer_pos);
+ buffer_pos = 0;
+ }
+ x = 0;
+
+ kmer_lower = false;
+ buffer[buffer_pos].set(rev_kmer);
+
+ }
+ }
+ else
+ {
+ if (!(kmer < rev_kmer))
+ {
+ buffer[buffer_pos].set_2bits(3 - symb, ptr.kmer_len * 2 + x * 2);
+ ++x;
+ if (x == ptr.max_x)
+ {
+ if (!symbols_left)
+ break;
+
+ buffer[buffer_pos++].set_2bits(x, ptr.kmer_len * 2 + ptr.max_x * 2);
+ if (buffer_pos >= EXPAND_BUFFER_RECS)
+ {
+ FromChildThread(ptr, buffer, buffer_pos);
+ buffer_pos = 0;
+ }
+ x = 0;
+
+ GetNextSymb(symb, byte_shift, pos, data_p);
+ kmer.SHL_insert_2bits(symb);
+ kmer.mask(kmer_mask);
+ rev_kmer.SHR_insert_2bits(3 - symb, rev_shift);
+ --symbols_left;
+
+ kmer_lower = kmer < rev_kmer;
+
+ if (kmer_lower)
+ buffer[buffer_pos].set(kmer);
+ else
+ buffer[buffer_pos].set(rev_kmer);
+ }
+ }
+ else
+ {
+ buffer[buffer_pos++].set_2bits(x, ptr.kmer_len * 2 + ptr.max_x * 2);
+ if (buffer_pos >= EXPAND_BUFFER_RECS)
+ {
+ FromChildThread(ptr, buffer, buffer_pos);
+ buffer_pos = 0;
+ }
+ x = 0;
+
+ buffer[buffer_pos].set(kmer);
+ kmer_lower = true;
+ }
+ }
+
+ }
+ buffer[buffer_pos++].set_2bits(x, ptr.kmer_len * 2 + ptr.max_x * 2);
+ if (buffer_pos >= EXPAND_BUFFER_RECS)
+ {
+ FromChildThread(ptr, buffer, buffer_pos);
+ buffer_pos = 0;
+ }
+ if (byte_shift != 6)
+ ++pos;
+ }
+ if (buffer_pos)
+ {
+ FromChildThread(ptr, buffer, buffer_pos);
+ buffer_pos = 0;
+ }
+ ptr.pmm_expand->free(_raw_buffer);
+}
+
+
+template <unsigned SIZE> void CKmerBinSorter_Impl<CKmer<SIZE>, SIZE>::ExpandKxmersBoth(CKmerBinSorter<CKmer<SIZE>, SIZE>& ptr, uint64 tmp_size)
+{
+ ptr.input_pos = 0;
+ uint32 threads = ptr.n_omp_threads;
+
+ uint64 bytes_per_thread = (tmp_size + threads - 1) / threads;
+ uint32 thread_no = 0;
+ vector<thread> exp_threads;
+ uint64 start = 0;
+ uint64 pos = 0;
+ for (; pos < tmp_size; pos += 1 + (ptr.data[pos] + ptr.kmer_len + 3) / 4)
+ {
+ if ((thread_no + 1) * bytes_per_thread <= pos)
+ {
+ exp_threads.push_back(thread(ExpandKxmerBothParaller, std::ref(ptr), start, pos));
+ start = pos;
+ ++thread_no;
+ }
+ }
+ if (start < pos)
+ {
+ exp_threads.push_back(thread(ExpandKxmerBothParaller, std::ref(ptr), start, tmp_size));
+ }
+
+ for (auto& p : exp_threads)
+ p.join();
+
+ ptr.n_plus_x_recs = ptr.input_pos;// !!!!!!!!
+}
+
+template<unsigned SIZE> void CKmerBinSorter_Impl<CKmer<SIZE>, SIZE>::ExpandKxmersAll(CKmerBinSorter<CKmer<SIZE>, SIZE>& ptr, uint64 tmp_size)
+{
+ ptr.input_pos = 0;
+ uint64 pos = 0;
+ CKmer<SIZE> kmer_mask;
+
+ CKmer<SIZE> kxmer;
+ CKmer<SIZE> kxmer_mask;
+ kxmer_mask.set_n_1((ptr.kmer_len + ptr.max_x) * 2);
+ uchar *data_p = ptr.data;
+
+ kmer_mask.set_n_1(ptr.kmer_len * 2);
+ while (pos < tmp_size)
+ {
+ kxmer.clear();
+ uint32 additional_symbols = data_p[pos++];
+
+ uchar symb;
+
+ uint32 kmer_bytes = (ptr.kmer_len + 3) / 4;
+ //building kmer
+ for (uint32 i = 0, kmer_pos = 8 * SIZE - 1; i < kmer_bytes; ++i, --kmer_pos)
+ {
+ kxmer.set_byte(kmer_pos, data_p[pos + i]);
+ }
+
+ pos += kmer_bytes;
+ uchar byte_shift = 6 - (ptr.kmer_len % 4) * 2;
+ if (byte_shift != 6)
+ --pos;
+ uint32 kmer_shr = SIZE * 32 - ptr.kmer_len;
+
+ if (kmer_shr)
+ kxmer.SHR(kmer_shr);
+
+ kxmer.mask(kmer_mask);
+ uint32 tmp = MIN(ptr.max_x, additional_symbols);
+
+ for (uint32 i = 0; i < tmp; ++i)
+ {
+ GetNextSymb(symb, byte_shift, pos, data_p);
+ kxmer.SHL_insert_2bits(symb);
+ }
+ kxmer.set_2bits(tmp, (ptr.kmer_len + ptr.max_x) * 2);
+
+ ptr.buffer_input[ptr.input_pos++].set(kxmer);
+ additional_symbols -= tmp;
+
+ uint32 kxmers_count = additional_symbols / (ptr.max_x + 1);
+ uint32 kxmer_rest = additional_symbols % (ptr.max_x + 1);
+
+ for (uint32 j = 0; j < kxmers_count; ++j)
+ {
+ for (uint32 i = 0; i < ptr.max_x + 1; ++i)
+ {
+ GetNextSymb(symb, byte_shift, pos, data_p);
+ kxmer.SHL_insert_2bits(symb);
+ }
+
+ kxmer.mask(kxmer_mask);
+
+ kxmer.set_2bits(ptr.max_x, (ptr.kmer_len + ptr.max_x) * 2);
+
+ ptr.buffer_input[ptr.input_pos++].set(kxmer);
+ }
+ if (kxmer_rest)
+ {
+ uint32 i = 0;
+ GetNextSymb(symb, byte_shift, pos, data_p);
+ kxmer.SHL_insert_2bits(symb);
+ kxmer.mask(kmer_mask);
+ --kxmer_rest;
+ for (; i < kxmer_rest; ++i)
+ {
+ GetNextSymb(symb, byte_shift, pos, data_p);
+ kxmer.SHL_insert_2bits(symb);
+ }
+
+ kxmer.set_2bits(kxmer_rest, (ptr.kmer_len + ptr.max_x) * 2);
+ ptr.buffer_input[ptr.input_pos++].set(kxmer);
+ }
+ if (byte_shift != 6)
+ ++pos;
+ }
+}
+
+//----------------------------------------------------------------------------------
+// Uncompact the kmers
+template <unsigned SIZE> void CKmerBinSorter_Impl<CKmer<SIZE>, SIZE>::Expand(CKmerBinSorter<CKmer<SIZE>, SIZE>& ptr, uint64 tmp_size)
+{
+ uchar *raw_buffer_input, *raw_buffer_tmp;
+
+ ptr.memory_bins->reserve(ptr.bin_id, raw_buffer_input, CMemoryBins::mba_input_array);
+ ptr.memory_bins->reserve(ptr.bin_id, raw_buffer_tmp, CMemoryBins::mba_tmp_array);
+
+ ptr.buffer_input = (CKmer<SIZE> *) raw_buffer_input;
+ ptr.buffer_tmp = (CKmer<SIZE> *) raw_buffer_tmp;
+
+ if (ptr.max_x)
+ {
+ if (ptr.both_strands)
+ ExpandKxmersBoth(ptr, tmp_size);
+ else
+ ExpandKxmersAll(ptr, tmp_size);
+ }
+ else
+ {
+ if (ptr.both_strands)
+ ExpandKmersBoth(ptr, tmp_size);
+ else
+ ExpandKmersAll(ptr, tmp_size);
+ }
+}
+
+
+//----
+template <unsigned SIZE> void CKmerBinSorter_Impl<CKmerQuake<SIZE>, SIZE>::Expand(CKmerBinSorter<CKmerQuake<SIZE>, SIZE>& ptr, uint64 tmp_size)
+{
+ uchar *data_p = ptr.data;
+ uchar *raw_buffer_input, *raw_buffer_tmp;
+
+ ptr.memory_bins->reserve(ptr.bin_id, raw_buffer_input, CMemoryBins::mba_input_array);
+ ptr.memory_bins->reserve(ptr.bin_id, raw_buffer_tmp, CMemoryBins::mba_tmp_array);
+
+
+ ptr.buffer_input = (CKmerQuake<SIZE> *) raw_buffer_input;
+ ptr.buffer_tmp = (CKmerQuake<SIZE> *) raw_buffer_tmp;
+ CKmerQuake<SIZE> current_kmer;
+ CKmerQuake<SIZE> kmer_rev;
+ CKmerQuake<SIZE> kmer_can;
+ kmer_rev.clear();
+ uint32 kmer_len_shift = (ptr.kmer_len - 1) * 2;
+ CKmerQuake<SIZE> kmer_mask;
+ kmer_mask.set_n_1(ptr.kmer_len * 2);
+
+ ptr.input_pos = 0;
+ uint64 pos = 0;
+
+ double *inv_probs;
+ ptr.pmm_prob->reserve(inv_probs);
+ double kmer_prob;
+ uchar qual, symb;
+ uint32 inv_probs_pos;
+ if (ptr.both_strands)
+ while (pos < tmp_size)
+ {
+ uchar additional_symbols = data_p[pos++];
+ inv_probs_pos = 0;
+ kmer_prob = 1.0;
+
+ for (uint32 i = 0; i < ptr.kmer_len; ++i)
+ {
+ symb = (data_p[pos] >> 6) & 3;
+ qual = data_p[pos++] & 63;
+
+ inv_probs[inv_probs_pos++] = inv_prob_qual[qual];
+
+ current_kmer.SHL_insert_2bits(symb);
+ kmer_rev.SHR_insert_2bits(3 - symb, kmer_len_shift);
+ kmer_prob *= prob_qual[qual];
+ }
+ current_kmer.mask(kmer_mask);
+ if (kmer_prob >= MIN_PROB_QUAL_VALUE)
+ {
+ kmer_can = current_kmer < kmer_rev ? current_kmer : kmer_rev;
+ kmer_can.quality = (float)kmer_prob;
+ ptr.buffer_input[ptr.input_pos++].set(kmer_can);
+ }
+ for (uint32 i = 0; i < additional_symbols; ++i)
+ {
+ symb = (data_p[pos] >> 6) & 3;
+ qual = data_p[pos++] & 63;
+
+ current_kmer.SHL_insert_2bits(symb);
+ current_kmer.mask(kmer_mask);
+ kmer_rev.SHR_insert_2bits(3 - symb, kmer_len_shift);
+
+ kmer_prob *= prob_qual[qual] * inv_probs[inv_probs_pos - ptr.kmer_len];
+ inv_probs[inv_probs_pos++] = inv_prob_qual[qual];
+ if (kmer_prob >= MIN_PROB_QUAL_VALUE)
+ {
+ kmer_can = current_kmer < kmer_rev ? current_kmer : kmer_rev;
+ kmer_can.quality = (float)kmer_prob;
+ ptr.buffer_input[ptr.input_pos++].set(kmer_can);
+ }
+ }
+ }
+ else
+ while (pos < tmp_size)
+ {
+ uchar additional_symbols = data_p[pos++];
+ inv_probs_pos = 0;
+ kmer_prob = 1.0;
+
+ for (uint32 i = 0; i < ptr.kmer_len; ++i)
+ {
+ symb = (data_p[pos] >> 6) & 3;
+ qual = data_p[pos++] & 63;
+
+ inv_probs[inv_probs_pos++] = inv_prob_qual[qual];
+
+ current_kmer.SHL_insert_2bits(symb);
+ kmer_prob *= prob_qual[qual];
+ }
+ current_kmer.mask(kmer_mask);
+ if (kmer_prob >= MIN_PROB_QUAL_VALUE)
+ {
+ current_kmer.quality = (float)kmer_prob;
+ ptr.buffer_input[ptr.input_pos++].set(current_kmer);
+ }
+ for (uint32 i = 0; i < additional_symbols; ++i)
+ {
+ symb = (data_p[pos] >> 6) & 3;
+ qual = data_p[pos++] & 63;
+
+ current_kmer.SHL_insert_2bits(symb);
+ current_kmer.mask(kmer_mask);
+
+ kmer_prob *= prob_qual[qual] * inv_probs[inv_probs_pos - ptr.kmer_len];
+ inv_probs[inv_probs_pos++] = inv_prob_qual[qual];
+ if (kmer_prob >= MIN_PROB_QUAL_VALUE)
+ {
+ current_kmer.quality = (float)kmer_prob;
+ ptr.buffer_input[ptr.input_pos++].set(current_kmer);
+ }
+ }
+ }
+ ptr.pmm_prob->free(inv_probs);
+}
+
+
+//----------------------------------------------------------------------------------
+// Sort the kmers
+template <typename KMER_T, unsigned SIZE> void CKmerBinSorter<KMER_T, SIZE>::Sort()
+{
+ uint32 rec_len;
+ uint64 sort_rec;
+ if (max_x && !use_quake)
+ {
+ sort_rec = n_plus_x_recs;
+ rec_len = (kmer_len + max_x + 1 + 3) / 4;
+ }
+ else
+ {
+ sort_rec = n_rec;
+ rec_len = (kmer_len + 3) / 4;
+ }
+ sum_n_plus_x_rec += n_plus_x_recs;
+ sum_n_rec += n_rec;
+
+ if (sizeof(KMER_T) == 8)
+ {
+ uint64 *_buffer_input = (uint64*)buffer_input;
+ uint64 *_buffer_tmp = (uint64*)buffer_tmp;
+
+ RadixSort_buffer(pmm_radix_buf, _buffer_input, _buffer_tmp, sort_rec, rec_len, n_omp_threads);
+
+ if (rec_len % 2)
+ buffer = (KMER_T*)_buffer_tmp;
+ else
+ buffer = (KMER_T*)_buffer_input;
+ }
+ else
+ {
+ uint32 *_buffer_input = (uint32*)buffer_input;
+ uint32 *_buffer_tmp = (uint32*)buffer_tmp;
+
+ RadixSort_uint8(_buffer_input, _buffer_tmp, sort_rec, sizeof(KMER_T), offsetof(KMER_T, data), SIZE*sizeof(typename KMER_T::data_t), rec_len, n_omp_threads);
+ if (rec_len % 2)
+ buffer = (KMER_T*)_buffer_tmp;
+ else
+ buffer = (KMER_T*)_buffer_input;
+ }
+}
+
+//----------------------------------------------------------------------------------
+//Binary search position of first occurence of symbol 'symb' in [start_pos,end_pos). Offset defines which symbol in k+x-mer is taken.
+template <unsigned SIZE> uint64 CKmerBinSorter_Impl<CKmer<SIZE>, SIZE>::FindFirstSymbOccur(CKmerBinSorter<CKmer<SIZE>, SIZE> &ptr, uint64 start_pos, uint64 end_pos, uint32 offset, uchar symb)
+{
+ uint32 kxmer_offset = (ptr.kmer_len + ptr.max_x - offset) * 2;
+ uint64 middle_pos;
+ uchar middle_symb;
+ while (start_pos < end_pos)
+ {
+ middle_pos = (start_pos + end_pos) / 2;
+ middle_symb = ptr.buffer[middle_pos].get_2bits(kxmer_offset);
+ if (middle_symb < symb)
+ start_pos = middle_pos + 1;
+ else
+ end_pos = middle_pos;
+ }
+ return end_pos;
+}
+
+//----------------------------------------------------------------------------------
+template<unsigned SIZE> void CKmerBinSorter_Impl<CKmer<SIZE>, SIZE>::InitKXMerSet(CKmerBinSorter<CKmer<SIZE>, SIZE> &ptr, uint64 start_pos, uint64 end_pos, uint32 offset, uint32 depth)
+{
+ if (start_pos == end_pos)
+ return;
+ uint32 shr = ptr.max_x + 1 - offset;
+ ptr.kxmer_set.init_add(start_pos, end_pos, shr);
+
+ --depth;
+ if (depth > 0)
+ {
+ uint64 pos[5];
+ pos[0] = start_pos;
+ pos[4] = end_pos;
+ for (uint32 i = 1; i < 4; ++i)
+ pos[i] = FindFirstSymbOccur(ptr, pos[i - 1], end_pos, offset, i);
+ for (uint32 i = 1; i < 5; ++i)
+ InitKXMerSet(ptr, pos[i - 1], pos[i], offset + 1, depth);
+ }
+}
+
+//----------------------------------------------------------------------------------
+template<unsigned SIZE> void CKmerBinSorter_Impl<CKmer<SIZE>, SIZE>::PreCompactKxmers(CKmerBinSorter<CKmer<SIZE>, SIZE> &ptr, uint64& compacted_count)
+{
+ compacted_count = 0;
+
+ CKmer<SIZE> *act_kmer;
+ act_kmer = &ptr.buffer[0];
+ ptr.kxmer_counters[compacted_count] = 1;
+
+ for (uint32 i = 1; i < ptr.n_plus_x_recs; ++i)
+ {
+ if (*act_kmer == ptr.buffer[i])
+ ++ptr.kxmer_counters[compacted_count];
+ else
+ {
+ ptr.buffer[compacted_count++] = *act_kmer;
+ ptr.kxmer_counters[compacted_count] = 1;
+ act_kmer = &ptr.buffer[i];
+ }
+ }
+ ptr.buffer[compacted_count++] = *act_kmer;
+}
+
+//----------------------------------------------------------------------------------
+template <unsigned SIZE> void CKmerBinSorter_Impl<CKmer<SIZE>, SIZE>::CompactKxmers(CKmerBinSorter<CKmer<SIZE>, SIZE> &ptr)
+{
+ ptr.kxmer_set.clear();
+ ptr.kxmer_set.set_buffer(ptr.buffer);
+ ptr.n_unique = 0;
+ ptr.n_cutoff_min = 0;
+ ptr.n_cutoff_max = 0;
+ ptr.n_total = 0;
+
+ uint32 kmer_symbols = ptr.kmer_len - ptr.lut_prefix_len;
+ uint64 kmer_bytes = kmer_symbols / 4;
+ uint64 lut_recs = 1 << (2 * ptr.lut_prefix_len);
+ uint64 lut_size = lut_recs * sizeof(uint64);
+
+
+ uchar *out_buffer = NULL;
+ uchar *raw_lut = NULL;
+
+ ptr.memory_bins->reserve(ptr.bin_id, out_buffer, CMemoryBins::mba_suffix);
+ ptr.memory_bins->reserve(ptr.bin_id, raw_lut, CMemoryBins::mba_lut);
+
+ uint64 *lut = (uint64*)raw_lut;
+ fill_n(lut, lut_recs, 0);
+
+ uint32 out_pos = 0;
+
+ if (ptr.n_plus_x_recs)
+ {
+ uchar* raw_kxmer_counters = NULL;
+ ptr.memory_bins->reserve(ptr.bin_id, raw_kxmer_counters, CMemoryBins::mba_kxmer_counters);
+ ptr.kxmer_counters = (uint32*)raw_kxmer_counters;
+ uint64 compacted_count;
+ PreCompactKxmers(ptr, compacted_count);
+
+ uint64 pos[5];//pos[symb] is first position where symb occur (at first position of k+x-mer) and pos[symb+1] jest first position where symb is not starting symbol of k+x-mer
+ pos[0] = 0;
+ pos[4] = compacted_count;
+ for (uint32 i = 1; i < 4; ++i)
+ pos[i] = FindFirstSymbOccur(ptr, pos[i - 1], compacted_count, 0, i);
+ for (uint32 i = 1; i < 5; ++i)
+ InitKXMerSet(ptr, pos[i - 1], pos[i], ptr.max_x + 2 - i, i);
+
+
+
+
+
+
+ uint64 counter_pos;
+
+ uint64 counter_size = min(BYTE_LOG(ptr.cutoff_max), BYTE_LOG(ptr.counter_max));
+
+ CKmer<SIZE> kmer, next_kmer;
+ CKmer<SIZE> kmer_mask;
+ kmer_mask.set_n_1(ptr.kmer_len * 2);
+ uint32 count;
+ //first
+ ptr.kxmer_set.get_min(counter_pos, kmer);
+ count = ptr.kxmer_counters[counter_pos];
+ //rest
+ while (ptr.kxmer_set.get_min(counter_pos, next_kmer))
+ {
+ if (kmer == next_kmer)
+ count += ptr.kxmer_counters[counter_pos];
+ else
+ {
+ ptr.n_total += count;
+ ++ptr.n_unique;
+ if (count < (uint32)ptr.cutoff_min)
+ ptr.n_cutoff_min++;
+ else if (count >(uint32)ptr.cutoff_max)
+ ptr.n_cutoff_max++;
+ else
+ {
+ lut[kmer.remove_suffix(2 * kmer_symbols)]++;
+ if (count > (uint32)ptr.counter_max)
+ count = ptr.counter_max;
+
+ // Store compacted kmer
+
+ for (int32 j = (int32)kmer_bytes - 1; j >= 0; --j)
+ out_buffer[out_pos++] = kmer.get_byte(j);
+ for (int32 j = 0; j < (int32)counter_size; ++j)
+ out_buffer[out_pos++] = (count >> (j * 8)) & 0xFF;
+ }
+ count = ptr.kxmer_counters[counter_pos];
+ kmer = next_kmer;
+ }
+ }
+
+
+ //last one
+ ++ptr.n_unique;
+ ptr.n_total += count;
+ if (count < (uint32)ptr.cutoff_min)
+ ptr.n_cutoff_min++;
+ else if (count >(uint32)ptr.cutoff_max)
+ ptr.n_cutoff_max++;
+ else
+ {
+ lut[kmer.remove_suffix(2 * kmer_symbols)]++;
+ if (count > (uint32)ptr.counter_max)
+ count = ptr.counter_max;
+
+ // Store compacted kmer
+ for (int32 j = (int32)kmer_bytes - 1; j >= 0; --j)
+ out_buffer[out_pos++] = kmer.get_byte(j);
+ for (int32 j = 0; j < (int32)counter_size; ++j)
+ out_buffer[out_pos++] = (count >> (j * 8)) & 0xFF;
+ }
+
+
+ ptr.memory_bins->free(ptr.bin_id, CMemoryBins::mba_kxmer_counters);
+ }
+
+
+ // Push the sorted and compacted kmer bin to a priority queue in a form ready to be stored to HDD
+ ptr.kq->push(ptr.bin_id, out_buffer, out_pos, raw_lut, lut_size, ptr.n_unique, ptr.n_cutoff_min, ptr.n_cutoff_max, ptr.n_total);
+
+ if (ptr.buffer_input)
+ {
+ ptr.memory_bins->free(ptr.bin_id, CMemoryBins::mba_input_array);
+ ptr.memory_bins->free(ptr.bin_id, CMemoryBins::mba_tmp_array);
+ }
+ ptr.buffer = NULL;
+}
+
+
+
+
+//----------------------------------------------------------------------------------
+template <unsigned SIZE> void CKmerBinSorter_Impl<CKmer<SIZE>, SIZE>::CompactKmers(CKmerBinSorter<CKmer<SIZE>, SIZE> &ptr)
+{
+ uint64 i;
+
+ uint32 kmer_symbols = ptr.kmer_len - ptr.lut_prefix_len;
+ uint64 kmer_bytes = kmer_symbols / 4;
+ uint64 lut_recs = 1 << (2 * (ptr.lut_prefix_len));
+ uint64 lut_size = lut_recs * sizeof(uint64);
+
+ uint64 counter_size = min(BYTE_LOG(ptr.cutoff_max), BYTE_LOG(ptr.counter_max));
+
+ uchar *out_buffer;
+ uchar *raw_lut;
+
+ ptr.memory_bins->reserve(ptr.bin_id, out_buffer, CMemoryBins::mba_suffix);
+ ptr.memory_bins->reserve(ptr.bin_id, raw_lut, CMemoryBins::mba_lut);
+ uint64 *lut = (uint64*)raw_lut;
+ fill_n(lut, lut_recs, 0);
+
+ uint32 out_pos = 0;
+ uint32 count;
+ CKmer<SIZE> *act_kmer;
+
+
+ ptr.n_unique = 0;
+ ptr.n_cutoff_min = 0;
+ ptr.n_cutoff_max = 0;
+ ptr.n_total = 0;
+
+
+ if (ptr.n_rec) // non-empty bin
+ {
+ act_kmer = &ptr.buffer[0];
+ count = 1;
+
+ ptr.n_total = ptr.n_rec;
+
+ for (i = 1; i < ptr.n_rec; ++i)
+ {
+ if (*act_kmer == ptr.buffer[i])
+ count++;
+ else
+ {
+ if (count < (uint32)ptr.cutoff_min)
+ {
+ act_kmer = &ptr.buffer[i];
+ ptr.n_cutoff_min++;
+ ptr.n_unique++;
+ count = 1;
+ }
+ else if (count >(uint32) ptr.cutoff_max)
+ {
+ act_kmer = &ptr.buffer[i];
+ ptr.n_cutoff_max++;
+ ptr.n_unique++;
+ count = 1;
+ }
+ else
+ {
+ if (count > (uint32)ptr.counter_max)
+ count = ptr.counter_max;
+
+ // Store compacted kmer
+ for (int32 j = (int32)kmer_bytes - 1; j >= 0; --j)
+ out_buffer[out_pos++] = act_kmer->get_byte(j);
+ for (int32 j = 0; j < (int32)counter_size; ++j)
+ out_buffer[out_pos++] = (count >> (j * 8)) & 0xFF;
+
+ lut[act_kmer->remove_suffix(2 * kmer_symbols)]++;
+
+ act_kmer = &ptr.buffer[i];
+ count = 1;
+ ptr.n_unique++;
+ }
+ }
+ }
+
+ if (count < (uint32)ptr.cutoff_min)
+ {
+ ptr.n_cutoff_min++;
+ }
+ else if (count >= (uint32)ptr.cutoff_max)
+ {
+ ptr.n_cutoff_max++;
+ }
+ else
+ {
+ if (count >(uint32) ptr.counter_max)
+ count = ptr.counter_max;
+
+ for (int32 j = (int32)kmer_bytes - 1; j >= 0; --j)
+ out_buffer[out_pos++] = act_kmer->get_byte(j);
+ for (int32 j = 0; j < (int32)counter_size; ++j)
+ out_buffer[out_pos++] = (count >> (j * 8)) & 0xFF;
+ lut[act_kmer->remove_suffix(2 * kmer_symbols)]++;
+ }
+ ptr.n_unique++;
+ }
+
+ // Push the sorted and compacted kmer bin to a priority queue in a form ready to be stored to HDD
+ ptr.kq->push(ptr.bin_id, out_buffer, out_pos, raw_lut, lut_size, ptr.n_unique, ptr.n_cutoff_min, ptr.n_cutoff_max, ptr.n_total);
+
+ if (ptr.buffer_input)
+ {
+ ptr.memory_bins->free(ptr.bin_id, CMemoryBins::mba_input_array);
+ ptr.memory_bins->free(ptr.bin_id, CMemoryBins::mba_tmp_array);
+ }
+ ptr.buffer = NULL;
+}
+
+
+
+
+//----------------------------------------------------------------------------------
+template <unsigned SIZE> void CKmerBinSorter_Impl<CKmer<SIZE>, SIZE>::Compact(CKmerBinSorter<CKmer<SIZE>, SIZE> &ptr)
+{
+ if (ptr.max_x)
+ CompactKxmers(ptr);
+ else
+ CompactKmers(ptr);
+}
+//----------------------------------------------------------------------------------
+// Compact the kmers - the same kmers (at neighbour positions now) are compated to a single kmer and counter
+template <unsigned SIZE> void CKmerBinSorter_Impl<CKmerQuake<SIZE>, SIZE>::Compact(CKmerBinSorter<CKmerQuake<SIZE>, SIZE> &ptr)
+{
+ uint64 i;
+
+ uint32 kmer_symbols = ptr.kmer_len - ptr.lut_prefix_len;
+ uint64 kmer_bytes = kmer_symbols / 4;
+ uint64 lut_recs = 1 << (2 * (ptr.lut_prefix_len));
+ uint64 lut_size = lut_recs * sizeof(uint64);
+
+ uchar *out_buffer;
+ uchar *raw_lut;
+ ptr.memory_bins->reserve(ptr.bin_id, out_buffer, CMemoryBins::mba_suffix);
+ ptr.memory_bins->reserve(ptr.bin_id, raw_lut, CMemoryBins::mba_lut);
+ uint64 *lut = (uint64*)raw_lut;
+ fill_n(lut, lut_recs, 0);
+
+ uint32 out_pos = 0;
+ double count;
+ CKmerQuake<SIZE> *act_kmer;
+
+ ptr.n_unique = 0;
+ ptr.n_cutoff_min = 0;
+ ptr.n_cutoff_max = 0;
+ ptr.n_total = 0;
+
+ if(ptr.n_rec) // non-empty bin
+ {
+ act_kmer = &ptr.buffer[0];
+ count = (double)act_kmer->quality;
+ ptr.n_total = ptr.n_rec;
+ for(i = 1; i < ptr.n_rec; ++i)
+ {
+ if(*act_kmer == ptr.buffer[i])
+ count += ptr.buffer[i].quality;
+ else
+ {
+ if(count < (double) ptr.cutoff_min)
+ {
+ act_kmer = &ptr.buffer[i];
+ ++ptr.n_cutoff_min;
+ ++ptr.n_unique;
+ count = act_kmer->quality;
+ }
+ else if(count > (double) ptr.cutoff_max)
+ {
+ act_kmer = &ptr.buffer[i];
+ ++ptr.n_cutoff_max;
+ ++ptr.n_unique;
+ count = act_kmer->quality;
+ }
+ else
+ {
+ if(count > (double) ptr.counter_max)
+ count = (double) ptr.counter_max;
+
+ // Store compacted kmer
+ for(int32 j = (int32) kmer_bytes-1; j >= 0; --j)
+ out_buffer[out_pos++] = act_kmer->get_byte(j);
+ uint32 tmp;
+ float f_count = (float) count;
+ memcpy(&tmp, &f_count, 4);
+ for(int32 j = 0; j < 4; ++j)
+ out_buffer[out_pos++] = (tmp >> (j * 8)) & 0xFF;
+
+ lut[act_kmer->remove_suffix(2 * kmer_symbols)]++;
+
+ act_kmer = &ptr.buffer[i];
+ count = act_kmer->quality;
+ ++ptr.n_unique;
+ }
+ }
+ }
+
+ if(count < (double) ptr.cutoff_min)
+ {
+ ++ptr.n_cutoff_min;
+ }
+ else if(count > (double) ptr.cutoff_max)
+ {
+ ++ptr.n_cutoff_max;
+ }
+ else
+ {
+ if(count > (double) ptr.counter_max)
+ count = (double) ptr.counter_max;
+
+ for(int32 j = (int32) kmer_bytes-1; j >= 0; --j)
+ out_buffer[out_pos++] = act_kmer->get_byte(j);
+
+ uint32 tmp;
+ float f_count = (float) count;
+ memcpy(&tmp, &f_count, 4);
+ for(int32 j = 0; j < 4; ++j)
+ out_buffer[out_pos++] = (tmp >> (j * 8)) & 0xFF;
+
+ lut[act_kmer->remove_suffix(2 * kmer_symbols)]++;
+ }
+ ++ptr.n_unique;
+ }
+
+ //// Push the sorted and compacted kmer bin to a priority queue in a form ready to be stored to HDD
+ ptr.kq->push(ptr.bin_id, out_buffer, out_pos, raw_lut, lut_size, ptr.n_unique, ptr.n_cutoff_min, ptr.n_cutoff_max, ptr.n_total);
+
+ if(ptr.buffer_input)
+ {
+ ptr.memory_bins->free(ptr.bin_id, CMemoryBins::mba_input_array);
+ ptr.memory_bins->free(ptr.bin_id, CMemoryBins::mba_tmp_array);
+ }
+ ptr.buffer = NULL;
+}
+
+
+//************************************************************************************************************
+// CWKmerBinSorter - wrapper for multithreading purposes
+//************************************************************************************************************
+template <typename KMER_T, unsigned SIZE> class CWKmerBinSorter {
+ CKmerBinSorter<KMER_T, SIZE> *kbs;
+
+public:
+ CWKmerBinSorter(CKMCParams &Params, CKMCQueues &Queues, int thread_no);
+ ~CWKmerBinSorter();
+ void GetDebugStats(uint64& _sum_n_recs, uint64& _sum_n_plus_x_recs)
+ {
+ kbs->GetDebugStats(_sum_n_recs, _sum_n_plus_x_recs);
+ }
+ void operator()();
+};
+
+//----------------------------------------------------------------------------------
+// Constructor
+template <typename KMER_T, unsigned SIZE> CWKmerBinSorter<KMER_T, SIZE>::CWKmerBinSorter(CKMCParams &Params, CKMCQueues &Queues, int thread_no)
+{
+ kbs = new CKmerBinSorter<KMER_T, SIZE>(Params, Queues, thread_no);
+}
+
+//----------------------------------------------------------------------------------
+// Destructor
+template <typename KMER_T, unsigned SIZE> CWKmerBinSorter<KMER_T, SIZE>::~CWKmerBinSorter()
+{
+ delete kbs;
+}
+
+//----------------------------------------------------------------------------------
+// Execution
+template <typename KMER_T, unsigned SIZE> void CWKmerBinSorter<KMER_T, SIZE>::operator()()
+{
+ kbs->ProcessBins();
+}
+
+#endif
+
+// ***** EOF
\ No newline at end of file
diff --git a/kmer_counter/kb_storer.cpp b/kmer_counter/kb_storer.cpp
new file mode 100755
index 0000000..bd9f0c0
--- /dev/null
+++ b/kmer_counter/kb_storer.cpp
@@ -0,0 +1,268 @@
+#include "stdafx.h"
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+#include <algorithm>
+#include <numeric>
+#include <iostream>
+#include <boost/lexical_cast.hpp>
+#include "kb_storer.h"
+
+using namespace std;
+
+extern uint64 total_reads;
+
+//************************************************************************************************************
+// CKmerBinStorer - storer for bins
+//************************************************************************************************************
+
+//----------------------------------------------------------------------------------
+// Constructor
+CKmerBinStorer::CKmerBinStorer(CKMCParams &Params, CKMCQueues &Queues)
+{
+ pmm_bins = Queues.pmm_bins;
+ mm = Queues.mm;
+ n_bins = Params.n_bins;
+ q_part = Queues.bpq;
+ bd = Queues.bd;
+ working_directory = Params.working_directory;
+
+ mem_mode = Params.mem_mode;
+
+ s_mapper = Queues.s_mapper;
+
+ files = NULL;
+ buf_sizes = NULL;
+ buffer_size_bytes = 0;
+ max_buf_size = 0;
+ max_buf_size_id = 0;
+ max_mem_buffer = Params.max_mem_storer;
+
+ max_mem_single_package = Params.max_mem_storer_pkg;
+ tmp_buff = new uchar[max_mem_single_package*2];
+
+ buffer = new elem_t*[n_bins];
+ for(int i = 0; i < n_bins; ++i)
+ buffer[i] = NULL;
+
+
+ total_size = 0 ;
+
+}
+
+//----------------------------------------------------------------------------------
+// Destructor
+CKmerBinStorer::~CKmerBinStorer()
+{
+ Release();
+}
+
+//----------------------------------------------------------------------------------
+// Write ends of bins and release memory
+void CKmerBinStorer::Release()
+{
+ if(!files)
+ return;
+ for(int i = 0; i < n_bins; ++i)
+ if(buffer[i])
+ delete buffer[i];
+
+ delete[] buffer;
+ buffer = NULL;
+
+ delete[] files;
+ files = NULL;
+
+ delete[] buf_sizes;
+ buf_sizes = NULL;
+
+ delete [] tmp_buff;
+
+ cout << "\n";
+}
+
+//----------------------------------------------------------------------------------
+// Put buffer items to the queue
+void CKmerBinStorer::ReleaseBuffer()
+{
+ for(int i = 0; i < n_bins; ++i)
+ if(buffer[i])
+ PutBinToTmpFile(i);
+
+ for(int i = n_bins-1; i >= 0; --i)
+ if(buffer[i])
+ {
+ delete buffer[i];
+ buffer[i] = NULL;
+ }
+}
+
+//----------------------------------------------------------------------------------
+// Return name of a file related to a kmer of given id.
+string CKmerBinStorer::GetName(int n)
+{
+ string s_tmp = boost::lexical_cast<string>(n);
+ while(s_tmp.length() < 5)
+ s_tmp = string("0") + s_tmp;
+
+ if (*working_directory.rbegin() != '/' && *working_directory.rbegin() != '\\')
+ working_directory += "/";
+ return working_directory + "kmc_" + s_tmp + ".bin";
+}
+
+//----------------------------------------------------------------------------------
+// Check wheter it is necessary to store some bin to a HDD
+void CKmerBinStorer::CheckBuffer()
+{
+ int32 i;
+
+ if(buffer_size_bytes < max_mem_buffer && max_buf_size < max_mem_single_package)
+ return;
+
+ PutBinToTmpFile(max_buf_size_id);
+
+ buf_sizes[max_buf_size_id] = 0;
+
+ max_buf_size = buf_sizes[0];
+ max_buf_size_id = 0;
+ for(i = 1; i < n_bins; ++i)
+ {
+ if(buf_sizes[i] > max_buf_size)
+ {
+ max_buf_size = buf_sizes[i];
+ max_buf_size_id = i;
+ }
+
+ }
+}
+
+//----------------------------------------------------------------------------------
+// Send bin to temp file
+void CKmerBinStorer::PutBinToTmpFile(uint32 n)
+{
+ if(buf_sizes[n])
+ {
+ uint64 w;
+ uint64 tmp_buff_pos = 0;
+ uint32 size;
+ uchar* buf;
+ for(auto p = buffer[n]->begin() ; p != buffer[n]->end() ; ++p)
+ {
+ buf = get<0>(*p);
+ size = get<1>(*p);
+ A_memcpy(tmp_buff + tmp_buff_pos, buf, size);
+ tmp_buff_pos += size;
+ pmm_bins->free(buf);
+ }
+
+ w = files[n]->Write(tmp_buff, 1, tmp_buff_pos);
+ if(w != tmp_buff_pos)
+ {
+ cout<<"Error while writing to temporary file " << n;
+ exit(1);
+ }
+ total_size += w;
+ buffer_size_bytes -= buf_sizes[n];
+ }
+ buffer[n]->clear();
+}
+//
+
+
+//----------------------------------------------------------------------------------
+// Open temporary files for all bins
+bool CKmerBinStorer::OpenFiles()
+{
+ string f_name;
+
+ files = new CMemDiskFile*[n_bins];
+ for (int i = 0 ; i < n_bins ; ++i)
+ {
+ files[i] = new CMemDiskFile(mem_mode);
+ }
+ buf_sizes = new uint64[n_bins];
+
+ for(int i = 0; i < n_bins; ++i)
+ {
+ f_name = GetName(i);
+ buf_sizes[i] = 0;
+
+ files[i]->Open(f_name);
+
+ bd->insert(i, files[i], f_name, 0, 0, 0, 0);
+ }
+
+ return true;
+}
+
+
+//----------------------------------------------------------------------------------
+//
+void CKmerBinStorer::ProcessQueue()
+{
+ // Process the queue
+ while(!q_part->completed())
+ {
+ int32 bin_id;
+ uchar *part;
+ uint32 true_size;
+ uint32 alloc_size;
+
+ if(q_part->pop(bin_id, part, true_size, alloc_size))
+ {
+ if(!buffer[bin_id])
+ buffer[bin_id] = new elem_t;
+ buffer[bin_id]->push_back(make_tuple(part, true_size, alloc_size));
+ buffer_size_bytes += alloc_size;
+ buf_sizes[bin_id] += alloc_size;
+
+ if(buf_sizes[bin_id] > max_buf_size)
+ {
+ max_buf_size = buf_sizes[bin_id];
+ max_buf_size_id = bin_id;
+ }
+
+ CheckBuffer();
+ }
+ }
+
+ // Move all remaining parts to queue
+ ReleaseBuffer();
+
+
+}
+
+
+//************************************************************************************************************
+// CWKmerBinStorer - wrapper
+//************************************************************************************************************
+
+//----------------------------------------------------------------------------------
+// Constructor
+CWKmerBinStorer::CWKmerBinStorer(CKMCParams &Params, CKMCQueues &Queues)
+{
+ kbs = new CKmerBinStorer(Params, Queues);
+ kbs->OpenFiles();
+}
+
+//----------------------------------------------------------------------------------
+// Destructore
+CWKmerBinStorer::~CWKmerBinStorer()
+{
+ delete kbs;
+}
+
+//----------------------------------------------------------------------------------
+// Execution
+void CWKmerBinStorer::operator()()
+{
+ kbs->ProcessQueue();
+}
+
+// ***** EOF
diff --git a/kmer_counter/kb_storer.h b/kmer_counter/kb_storer.h
new file mode 100755
index 0000000..7e99cdd
--- /dev/null
+++ b/kmer_counter/kb_storer.h
@@ -0,0 +1,91 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#ifndef _KB_STORER_H
+#define _KB_STORER_H
+
+#include "defs.h"
+#include "params.h"
+#include "kmer.h"
+#include "radix.h"
+#include <string>
+#include <algorithm>
+#include <numeric>
+#include <array>
+#include <tuple>
+#include <stdio.h>
+
+using namespace std;
+
+//************************************************************************************************************
+// CKmerBinStorer - storer of bins of k-mers
+//************************************************************************************************************
+class CKmerBinStorer {
+ CMemoryMonitor *mm;
+
+ uint64 total_size;
+ CMemoryPool *pmm_bins;
+ string working_directory;
+ int n_bins;
+ CBinPartQueue *q_part;
+ CBinDesc *bd;
+ uint64 buffer_size_bytes;
+ uint64 max_mem_buffer;
+ uint64 max_mem_single_package;
+
+ CSignatureMapper *s_mapper;
+ uchar* tmp_buff;
+ CMemDiskFile** files;
+ uint64 *buf_sizes;
+ uint64 max_buf_size;
+ uint32 max_buf_size_id;
+ bool mem_mode;
+
+ typedef list<tuple<uchar *, uint32, uint32>> elem_t;
+ elem_t** buffer;
+
+ void Release();
+ string GetName(int n);
+ void CheckBuffer();
+ void ReleaseBuffer();
+ void PutBinToTmpFile(uint32 n);
+
+public:
+ void GetTotal(uint64& _total)
+ {
+ _total = total_size;
+ }
+ CKmerBinStorer(CKMCParams &Params, CKMCQueues &Queues);
+ ~CKmerBinStorer();
+
+ bool OpenFiles();
+ void ProcessQueue();
+};
+
+//************************************************************************************************************
+// CWKmerBinStorer - wrapper for multithreading purposes
+//************************************************************************************************************
+class CWKmerBinStorer {
+ CKmerBinStorer *kbs;
+
+public:
+ void GetTotal(uint64& _total)
+ {
+ kbs->GetTotal(_total);
+ }
+ CWKmerBinStorer(CKMCParams &Params, CKMCQueues &Queues);
+ ~CWKmerBinStorer();
+
+ void operator()();
+};
+
+#endif
+
+// ***** EOF
diff --git a/kmer_counter/kmc.h b/kmer_counter/kmc.h
new file mode 100755
index 0000000..9e9936f
--- /dev/null
+++ b/kmer_counter/kmc.h
@@ -0,0 +1,767 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#ifndef _KMC_H
+#define _KMC_H
+
+#include "defs.h"
+#include "params.h"
+#include "kmer.h"
+#include <iostream>
+#include <iomanip>
+#include <string>
+#include <vector>
+#include <numeric>
+#include "queues.h"
+#include "timer.h"
+#include "fastq_reader.h"
+#include "kb_collector.h"
+#include "kb_completer.h"
+#include "kb_reader.h"
+#include "kb_sorter.h"
+#include "kb_storer.h"
+#include "s_mapper.h"
+#include "splitter.h"
+#include "libs/asmlib.h"
+#include <boost/filesystem.hpp>
+
+#ifdef DEVELOP_MODE
+#include "develop.h"
+#endif
+
+using namespace std;
+
+
+template <typename KMER_T, unsigned SIZE, bool QUAKE_MODE> class CKMC {
+ bool initialized;
+
+ CStopWatch w0, heuristic_time , w1, w2;
+
+ // Parameters (input and internal)
+ CKMCParams Params;
+
+ // Memory monitor and queues
+ CKMCQueues Queues;
+
+ // Thread groups
+ vector<thread> gr0_1, gr0_2;
+ vector<thread> gr1_1, gr1_2, gr1_3, gr1_4, gr1_5; // thread groups for 1st stage
+ vector<thread> gr2_1, gr2_2, gr2_3; // thread groups for 2nd stage
+
+ uint64 n_unique, n_cutoff_min, n_cutoff_max, n_total, n_reads, tmp_size, n_total_super_kmers;
+
+ // Threads
+ vector<CWStatsFastqReader*> w_stats_fastqs;
+ vector<CWStatsSplitter<false>*> w_stats_splitters;
+ vector<CWFastqReader*> w_fastqs;
+ vector<CWSplitter<QUAKE_MODE>*> w_splitters;
+ CWKmerBinStorer *w_storer;
+
+ CWKmerBinReader<KMER_T, SIZE>* w_reader;
+ vector<CWKmerBinSorter<KMER_T, SIZE>*> w_sorters;
+ CWKmerBinCompleter *w_completer;
+
+ void SetThreads1Stage();
+ void SetThreads2Stage(vector<int64>& sorted_sizes);
+
+ bool AdjustMemoryLimits();
+ void AdjustMemoryLimitsStage2();
+
+ void ShowSettingsStage1();
+ void ShowSettingsStage2();
+
+public:
+ CKMC();
+ ~CKMC();
+
+ void SetParams(CKMCParams &_Params);
+ bool Process();
+ void GetStats(double &time1, double &time2, uint64 &_n_unique, uint64 &_n_cutoff_min, uint64 &_n_cutoff_max, uint64 &_n_total, uint64 &_n_reads, uint64 &_tmp_size, uint64& _n_total_super_kmers);
+};
+
+
+//----------------------------------------------------------------------------------
+template <typename KMER_T, unsigned SIZE, bool QUAKE_MODE> CKMC<KMER_T, SIZE, QUAKE_MODE>::CKMC()
+{
+// OpenMP support is a must, so do not compile if it is not supported
+#if !defined(_OPENMP)
+ BOOST_STATIC_ASSERT_MSG(false, "You need to use OpenMP");
+#endif
+
+ initialized = false;
+ Params.kmer_len = 0;
+ Params.n_readers = 1;
+ Params.n_splitters = 1;
+ Params.n_sorters = 1;
+ //Params.n_omp_threads = 1;
+ Queues.s_mapper = NULL;
+}
+
+//----------------------------------------------------------------------------------
+template <typename KMER_T, unsigned SIZE, bool QUAKE_MODE> CKMC<KMER_T, SIZE, QUAKE_MODE>::~CKMC()
+{
+}
+
+//----------------------------------------------------------------------------------
+// Set params of the k-mer counter
+template <typename KMER_T, unsigned SIZE, bool QUAKE_MODE> void CKMC<KMER_T, SIZE, QUAKE_MODE>::SetParams(CKMCParams &_Params)
+{
+ Params = _Params;
+ Params.kmer_len = Params.p_k;
+
+ if (Params.kmer_len % 32 == 0)
+ Params.max_x = 0;
+ else
+ Params.max_x = MIN(31 - (Params.kmer_len % 32), KMER_X);
+
+ Params.verbose = Params.p_verbose;
+ // Technical parameters related to temporary files
+
+ Params.signature_len = Params.p_p1;
+ Params.bin_part_size = 1 << 16;
+
+
+ // Thresholds for counters
+ Params.cutoff_min = Params.p_ci;
+ Params.cutoff_max = Params.p_cx;
+ Params.counter_max = Params.p_cs;
+ Params.use_quake = Params.p_quake;
+
+ Params.lowest_quality = Params.p_quality;
+ Params.both_strands = Params.p_both_strands;
+ Params.mem_mode = Params.p_mem_mode;
+
+ // Technical parameters related to no. of threads and memory usage
+ if(Params.p_sf && Params.p_sp && Params.p_so && Params.p_sr)
+ {
+ Params.n_readers = NORM(Params.p_sf, 1, 32);
+ Params.n_splitters = NORM(Params.p_sp, 1, 32);
+ Params.n_sorters = NORM(Params.p_sr, 1, 32);
+ //Params.n_omp_threads = NORM(Params.p_so, 1, 32);
+ Params.n_omp_threads.assign(Params.n_sorters, NORM(Params.p_so, 1, 32));
+ }
+ else
+ {
+ // Adjust the number of threads according to the current hardware
+ Params.n_threads = Params.p_t;
+ if (!Params.n_threads)
+ Params.n_threads = thread::hardware_concurrency();
+ SetThreads1Stage();
+ }
+
+ //Params.max_mem_size = NORM(((uint64) Params.p_m) << 30, (uint64) MIN_MEM << 30, 1024ull << 30);
+ Params.max_mem_size = NORM(((uint64)Params.p_m) * 1000000000ull, (uint64)MIN_MEM * 1000000000ull, 1024ull * 1000000000ull);
+
+ Params.file_type = Params.p_file_type;
+
+ Params.KMER_T_size = sizeof(KMER_T);
+
+ initialized = true;
+
+ SetMemcpyCacheLimit(8); // Sets the asmlib's memcpy function to make copy without use of cache memory
+}
+
+//----------------------------------------------------------------------------------
+template <typename KMER_T, unsigned SIZE, bool QUAKE_MODE> void CKMC<KMER_T, SIZE, QUAKE_MODE>::SetThreads1Stage()
+{
+ if (!Params.p_sf || !Params.p_sp || !Params.p_sr || !Params.p_so)
+ {
+ int cores = Params.n_threads;
+ bool gz_bz2 = false;
+ vector<uint64> file_sizes;
+
+ for (auto& p : Params.input_file_names)
+ {
+ string ext(p.end() - 3, p.end());
+ if (ext == ".gz" || ext == ".bz2")
+ {
+ gz_bz2 = true;
+ //break;
+ }
+ FILE* tmp = my_fopen(p.c_str(), "rb");
+ if (!tmp)
+ {
+ cout << "Cannot open file: " << p.c_str();
+ exit(1);
+ }
+ my_fseek(tmp, 0, SEEK_END);
+ file_sizes.push_back(my_ftell(tmp));
+ fclose(tmp);
+ }
+ if (gz_bz2)
+ {
+ sort(file_sizes.begin(), file_sizes.end(), greater<uint64>());
+ uint64 file_size_threshold = (uint64)(file_sizes.front() * 0.05);
+ int32 n_allowed_files = 0;
+ for(auto& p : file_sizes)
+ if (p > file_size_threshold)
+ ++n_allowed_files;
+ Params.n_readers = MIN(n_allowed_files, MAX(1, cores / 2));
+ }
+ else
+ Params.n_readers = 1;
+ Params.n_splitters = MAX(1, cores - Params.n_readers);
+ }
+}
+//----------------------------------------------------------------------------------
+template<typename KMER_T, unsigned SIZE, bool QUAKE_MODE> void CKMC<KMER_T, SIZE, QUAKE_MODE>::SetThreads2Stage(vector<int64>& sorted_sizes)
+{
+ if (!Params.p_sf || !Params.p_sp || !Params.p_sr || !Params.p_so)
+ {
+ if (Params.n_threads == 1)
+ {
+ Params.n_sorters = 1;
+ Params.n_omp_threads.assign(1, 1);
+ }
+ else
+ {
+ int64 _10th_proc_bin_size = MAX(sorted_sizes[int(sorted_sizes.size() * 0.1)], 1);
+ Params.n_sorters = NORM(Params.max_mem_size / _10th_proc_bin_size, 1, Params.n_threads);
+ Params.n_omp_threads.assign(Params.n_sorters, MAX(1, Params.n_threads / Params.n_sorters));
+ int threads_left = Params.n_threads - Params.n_omp_threads.front() * Params.n_sorters;
+ for (uint32 i = 0; threads_left; --threads_left, ++i)
+ Params.n_omp_threads[i%Params.n_sorters]++;
+ }
+ }
+}
+
+template <typename KMER_T, unsigned SIZE, bool QUAKE_MODE> void CKMC<KMER_T, SIZE, QUAKE_MODE>::AdjustMemoryLimitsStage2()
+{
+ // Memory for 2nd stage
+ // Settings for memory manager of radix internal buffers
+ Params.mem_part_pmm_radix_buf = (256 * BUFFER_WIDTH + ALIGNMENT) * sizeof(uint64);
+
+
+ int64 sum_n_omp_threads = 0;
+ for (auto& p : Params.n_omp_threads)
+ sum_n_omp_threads += p;
+
+ //Params.mem_tot_pmm_radix_buf = Params.mem_part_pmm_radix_buf * Params.n_sorters * Params.n_omp_threads;
+
+ Params.mem_tot_pmm_radix_buf = Params.mem_part_pmm_radix_buf * sum_n_omp_threads;
+
+
+ if (Params.use_quake)
+ {
+ Params.mem_part_pmm_prob = (CKmerBinSorter<KMER_T, SIZE>::PROB_BUF_SIZE + 1) * sizeof(double);
+ Params.mem_tot_pmm_prob = Params.n_sorters * Params.mem_part_pmm_prob;
+ }
+ else
+ Params.mem_part_pmm_prob = Params.mem_tot_pmm_prob = 0;
+ if (!Params.use_quake && Params.both_strands)
+ {
+ Params.mem_part_pmm_epxand = EXPAND_BUFFER_RECS * sizeof(KMER_T);
+ Params.mem_tot_pmm_epxand = sum_n_omp_threads * Params.mem_part_pmm_epxand;
+ }
+ else
+ Params.mem_part_pmm_epxand = Params.mem_tot_pmm_epxand = 0;
+
+ Params.max_mem_stage2 = Params.max_mem_size - Params.mem_tot_pmm_radix_buf - Params.mem_tot_pmm_prob - Params.mem_tot_pmm_epxand;
+}
+
+//----------------------------------------------------------------------------------
+// Adjust the memory limits for queues and other large data structures
+template <typename KMER_T, unsigned SIZE, bool QUAKE_MODE> bool CKMC<KMER_T, SIZE, QUAKE_MODE>::AdjustMemoryLimits()
+{
+ // Memory for splitter internal buffers
+ int64 m_rest = Params.max_mem_size;
+
+ Params.mem_part_pmm_stats = ((1 << Params.signature_len * 2) + 1) * sizeof(uint32);
+ Params.mem_tot_pmm_stats = (Params.n_splitters + 1 + 1) * Params.mem_part_pmm_stats; //1 merged in main thread, 1 for sorting indices
+
+
+ // Settings for memory manager of FASTQ buffers
+ Params.fastq_buffer_size = 32 << 20;
+ do {
+ if(Params.fastq_buffer_size & (Params.fastq_buffer_size-1))
+ Params.fastq_buffer_size &= Params.fastq_buffer_size - 1;
+ else
+ Params.fastq_buffer_size = Params.fastq_buffer_size / 2 + Params.fastq_buffer_size / 4;
+ Params.mem_part_pmm_fastq = Params.fastq_buffer_size + CFastqReader::OVERHEAD_SIZE;
+ Params.mem_tot_pmm_fastq = Params.mem_part_pmm_fastq * (Params.n_readers + Params.n_splitters + 96);
+ } while(Params.mem_tot_pmm_fastq > m_rest * 0.17);
+ m_rest -= Params.mem_tot_pmm_fastq;
+
+ // Subtract memory for buffers for decompression of FASTQ files
+ while(Params.n_readers * Params.gzip_buffer_size > m_rest / 10)
+ Params.gzip_buffer_size /= 2;
+ m_rest -= Params.n_readers * Params.gzip_buffer_size;
+
+ // Subtract memory for bin collectors internal buffers
+ m_rest -= Params.n_splitters * Params.bin_part_size * sizeof(KMER_T);
+
+ // Settings for memory manager of reads
+ Params.mem_part_pmm_reads = (CSplitter<QUAKE_MODE>::MAX_LINE_SIZE + 1) * sizeof(double);
+ Params.mem_tot_pmm_reads = Params.mem_part_pmm_reads * 2 * Params.n_splitters;
+ m_rest -= Params.mem_tot_pmm_reads;
+
+ // Max. memory for single package
+ Params.max_mem_storer_pkg = 1ll << 25;
+
+ Params.mem_part_pmm_bins = Params.bin_part_size;
+
+ Params.mem_tot_pmm_bins = m_rest;
+
+ // memory for storer internal buffer
+ if(Params.max_mem_size >= 16ll << 30)
+ Params.max_mem_storer = (int64) (Params.mem_tot_pmm_bins * 0.75);
+ else
+ Params.max_mem_storer = (int64) (Params.mem_tot_pmm_bins * 0.65);
+
+ if(Params.max_mem_storer < (1ll << 28))
+ return false;
+
+ return true;
+}
+
+//----------------------------------------------------------------------------------
+// Show the settings of the KMC (in verbose mode only)
+template <typename KMER_T, unsigned SIZE, bool QUAKE_MODE> void CKMC<KMER_T, SIZE, QUAKE_MODE>::ShowSettingsStage1()
+{
+ if(!Params.verbose)
+ return;
+
+ cout << "\n********** Used parameters: **********\n";
+
+ cout << "No. of input files : " << Params.input_file_names.size() << "\n";
+ cout << "Output file name : " << Params.output_file_name << "\n";
+ cout << "No. of working directories : " << 1 << "\n";
+ cout << "Input format : ";
+ switch (Params.file_type)
+ {
+ case fasta:
+ cout << "FASTA\n";
+ break;
+ case fastq:
+ cout << "FASTQ\n";
+ break;
+ case multiline_fasta:
+ cout << "MULTI LINE FASTA\n";
+ break;
+ }
+ cout << "\n";
+ cout << "k-mer length : " << Params.kmer_len << "\n";
+ cout << "Max. k-mer length : " << MAX_K << "\n";
+ cout << "Signature length : " << Params.signature_len << "\n";
+ cout << "Min. count threshold : " << Params.cutoff_min << "\n";
+ cout << "Max. count threshold : " << Params.cutoff_max << "\n";
+ cout << "Max. counter value : " << Params.counter_max << "\n";
+ cout << "Type of counters : " << (Params.use_quake ? "Quake-compatibile\n" : "direct\n");
+ if(Params.use_quake)
+ cout << "Lowest quality value : " << Params.lowest_quality << "\n";
+ cout << "Both strands : " << (Params.both_strands ? "true\n" : "false\n");
+ cout << "RAM olny mode : " << (Params.mem_mode ? "true\n" : "false\n");
+
+ cout << "\n******* Stage 1 configuration: *******\n";
+ cout << "\n";
+ cout << "No. of bins : " << Params.n_bins << "\n";
+ cout << "Bin part size : " << Params.bin_part_size << "\n";
+ cout << "Input buffer size : " << Params.fastq_buffer_size << "\n";
+ cout << "\n";
+
+ cout << "No. of readers : " << Params.n_readers << "\n";
+ cout << "No. of splitters : " << Params.n_splitters << "\n";
+ cout << "\n";
+
+ cout << "Max. mem. size : " << setw(5) << (Params.max_mem_size / 1000000) << "MB\n";
+ cout << "Max. mem. per storer : " << setw(5) << (Params.max_mem_storer / 1000000) << "MB\n";
+ cout << "Max. mem. for single package : " << setw(5) << (Params.max_mem_storer_pkg / 1000000) << "MB\n";
+ cout << "\n";
+
+ cout << "Max. mem. for PMM (bin parts): " << setw(5) << (Params.mem_tot_pmm_bins / 1000000) << "MB\n";
+ cout << "Max. mem. for PMM (FASTQ) : " << setw(5) << (Params.mem_tot_pmm_fastq / 1000000) << "MB\n";
+ cout << "Max. mem. for PMM (reads) : " << setw(5) << (Params.mem_tot_pmm_reads / 1000000) << "MB\n";
+
+ cout << "\n";
+}
+
+//----------------------------------------------------------------------------------
+// Show the settings of the KMC (in verbose mode only)
+template <typename KMER_T, unsigned SIZE, bool QUAKE_MODE> void CKMC<KMER_T, SIZE, QUAKE_MODE>::ShowSettingsStage2()
+{
+ if (!Params.verbose)
+ return;
+
+ cout << "\n******* Stage 2 configuration: *******\n";
+
+ cout << "No. of sorters : " << Params.n_sorters << "\n";
+ cout << "No. of sort. threads : ";
+ for (uint32 i = 0; i < Params.n_omp_threads.size() - 1; ++i)
+ cout << Params.n_omp_threads[i] << ", ";
+ cout << Params.n_omp_threads.back() << "\n";
+
+ cout << "\n";
+
+ cout << "Max. mem. for 2nd stage : " << setw(5) << (Params.max_mem_stage2 / 1000000) << "MB\n";
+ cout << "\n";
+}
+//----------------------------------------------------------------------------------
+// Run the counter
+template <typename KMER_T, unsigned SIZE, bool QUAKE_MODE> bool CKMC<KMER_T, SIZE, QUAKE_MODE>::Process()
+{
+ int32 bin_id;
+ CMemDiskFile *file;
+ string name;
+ uint64 size;
+ uint64 n_rec;
+ uint64 n_plus_x_recs;
+ uint64 n_super_kmers;
+
+ if (!initialized)
+ return false;
+
+ if (!AdjustMemoryLimits())
+ return false;
+
+
+ w1.startTimer();
+
+ // Create monitors
+ Queues.mm = new CMemoryMonitor(Params.max_mem_stage2);
+
+
+ // Create queues
+ Queues.input_files_queue = new CInputFilesQueue(Params.input_file_names);
+ Queues.part_queue = new CPartQueue(Params.n_readers);
+ Queues.bpq = new CBinPartQueue(Params.n_splitters);
+ Queues.bd = new CBinDesc;
+ Queues.bq = new CBinQueue(1);
+
+ Queues.stats_part_queue = new CStatsPartQueue(Params.n_readers, STATS_FASTQ_SIZE);
+
+ // Create memory manager
+ Queues.pmm_bins = new CMemoryPool(Params.mem_tot_pmm_bins, Params.mem_part_pmm_bins);
+ Queues.pmm_fastq = new CMemoryPool(Params.mem_tot_pmm_fastq, Params.mem_part_pmm_fastq);
+ Queues.pmm_reads = new CMemoryPool(Params.mem_tot_pmm_reads, Params.mem_part_pmm_reads);
+ Queues.pmm_stats = new CMemoryPool(Params.mem_tot_pmm_stats, Params.mem_part_pmm_stats);
+
+
+
+ Queues.s_mapper = new CSignatureMapper(Queues.pmm_stats, Params.signature_len);
+
+ // ***** Stage 0 *****
+ w0.startTimer();
+ w_stats_splitters.resize(Params.n_splitters);
+
+
+ for (int i = 0; i < Params.n_splitters; ++i)
+ {
+ w_stats_splitters[i] = new CWStatsSplitter<false>(Params, Queues);
+ gr0_2.push_back(thread(std::ref(*w_stats_splitters[i])));
+ }
+
+ w_stats_fastqs.resize(Params.n_readers);
+
+ for (int i = 0; i < Params.n_readers; ++i)
+ {
+ w_stats_fastqs[i] = new CWStatsFastqReader(Params, Queues);
+ gr0_1.push_back(thread(std::ref(*w_stats_fastqs[i])));
+ }
+ for (auto p = gr0_1.begin(); p != gr0_1.end(); ++p)
+ p->join();
+ for (auto p = gr0_2.begin(); p != gr0_2.end(); ++p)
+ p->join();
+
+
+ uint32 *stats;
+ Queues.pmm_stats->reserve(stats);
+ fill_n(stats, (1 << Params.signature_len * 2) + 1, 0);
+
+
+ for (int i = 0; i < Params.n_readers; ++i)
+ delete w_stats_fastqs[i];
+
+ for (int i = 0; i < Params.n_splitters; ++i)
+ {
+ w_stats_splitters[i]->GetStats(stats);
+ delete w_stats_splitters[i];
+ }
+
+ delete Queues.stats_part_queue;
+ Queues.stats_part_queue = NULL;
+ delete Queues.input_files_queue;
+ Queues.input_files_queue = new CInputFilesQueue(Params.input_file_names);
+
+ heuristic_time.startTimer();
+ Queues.s_mapper->Init(stats);
+ heuristic_time.stopTimer();
+
+ cout << "\n";
+
+ w0.stopTimer();
+
+
+ Queues.pmm_stats->free(stats);
+ Queues.pmm_stats->release();
+ delete Queues.pmm_stats;
+ Queues.pmm_stats = NULL;
+
+ Params.n_bins = Queues.s_mapper->get_max_bin_no() + 1;
+
+ // ***** Stage 1 *****
+ ShowSettingsStage1();
+
+ w_splitters.resize(Params.n_splitters);
+
+ for(int i = 0; i < Params.n_splitters; ++i)
+ {
+ w_splitters[i] = new CWSplitter<QUAKE_MODE>(Params, Queues);
+ gr1_2.push_back(thread(std::ref(*w_splitters[i])));
+ }
+
+ w_storer = new CWKmerBinStorer(Params, Queues);
+ gr1_3.push_back(thread(std::ref(*w_storer)));
+
+ w_fastqs.resize(Params.n_readers);
+ for(int i = 0; i < Params.n_readers; ++i)
+ {
+ w_fastqs[i] = new CWFastqReader(Params, Queues);
+ gr1_1.push_back(thread(std::ref(*w_fastqs[i])));
+ }
+
+ for(auto p = gr1_1.begin(); p != gr1_1.end(); ++p)
+ p->join();
+ for(auto p = gr1_2.begin(); p != gr1_2.end(); ++p)
+ p->join();
+
+ Queues.pmm_fastq->release();
+ Queues.pmm_reads->release();
+
+ delete Queues.pmm_fastq;
+ delete Queues.pmm_reads;
+
+ for(auto p = gr1_3.begin(); p != gr1_3.end(); ++p)
+ p->join();
+
+ n_reads = 0;
+
+ thread *release_thr_st1_1 = new thread([&]{
+ for(int i = 0; i < Params.n_readers; ++i)
+ delete w_fastqs[i];
+
+ for(int i = 0; i < Params.n_splitters; ++i)
+ {
+ uint64 _n_reads;
+ w_splitters[i]->GetTotal(_n_reads);
+ n_reads += _n_reads;
+ delete w_splitters[i];
+ }
+
+ delete w_storer;
+ });
+
+ thread *release_thr_st1_2 = new thread([&]{
+ Queues.pmm_bins->release();
+ delete Queues.pmm_bins;
+ });
+
+
+ release_thr_st1_1->join();
+ release_thr_st1_2->join();
+
+ delete release_thr_st1_1;
+ delete release_thr_st1_2;
+
+
+ w1.stopTimer();
+ w2.startTimer();
+
+
+ // ***** End of Stage 1 *****
+
+ // Adjust RAM for 2nd stage
+ // Calculate LUT size
+ uint32 best_lut_prefix_len = 0;
+ uint64 best_mem_amount = 1ull << 62;
+
+ for (Params.lut_prefix_len = 2; Params.lut_prefix_len < 16; ++Params.lut_prefix_len)
+ {
+ uint32 suffix_len = Params.kmer_len - Params.lut_prefix_len;
+ if (suffix_len % 4)
+ continue;
+
+ uint64 est_suf_mem = n_reads * suffix_len;
+ uint64 lut_mem = Params.n_bins * (1ull << (2 * Params.lut_prefix_len)) * sizeof(uint64);
+
+ if (est_suf_mem + lut_mem < best_mem_amount)
+ {
+ best_lut_prefix_len = Params.lut_prefix_len;
+ best_mem_amount = est_suf_mem + lut_mem;
+ }
+ }
+
+ Params.lut_prefix_len = best_lut_prefix_len;
+
+#ifdef DEVELOP_MODE
+ save_bins_stats(Queues, Params, sizeof(KMER_T), KMER_T::QUALITY_SIZE, n_reads);
+#endif
+
+
+
+
+
+
+ Queues.bd->reset_reading();
+ vector<int64> bin_sizes;
+
+ while((bin_id = Queues.bd->get_next_bin()) >= 0)
+ {
+ Queues.bd->read(bin_id, file, name, size, n_rec, n_plus_x_recs, n_super_kmers);
+ if (Params.max_x)
+ bin_sizes.push_back(n_plus_x_recs * 2 * sizeof(KMER_T)); // estimation of RAM for sorting bins
+ else
+ bin_sizes.push_back(n_rec * 2 * sizeof(KMER_T));
+ }
+
+ sort(bin_sizes.begin(), bin_sizes.end(), greater<int64>());
+
+
+
+ SetThreads2Stage(bin_sizes);
+ AdjustMemoryLimitsStage2();
+
+ Queues.kq = new CKmerQueue(Params.n_bins, Params.n_sorters);
+
+ int64 stage2_size = 0;
+ for (int i = 0; i < 4 * Params.n_sorters; ++i)
+ stage2_size += bin_sizes[i];
+ stage2_size = MAX(stage2_size, 16 << 20);
+ Params.max_mem_stage2 = MIN(Params.max_mem_stage2, stage2_size);
+
+ ShowSettingsStage2();
+
+ // ***** Stage 2 *****
+ Queues.bd->reset_reading();
+ Queues.pmm_radix_buf = new CMemoryPool(Params.mem_tot_pmm_radix_buf, Params.mem_part_pmm_radix_buf );
+ if (!Params.use_quake && Params.both_strands)
+ Queues.pmm_expand = new CMemoryPool(Params.mem_tot_pmm_epxand, Params.mem_part_pmm_epxand);
+ else
+ Queues.pmm_expand = NULL;
+ Queues.memory_bins = new CMemoryBins(Params.max_mem_stage2, Params.n_bins);
+ if (Params.use_quake)
+ Queues.pmm_prob = new CMemoryPool(Params.mem_tot_pmm_prob, Params.mem_part_pmm_prob);
+ else
+ Queues.pmm_prob = NULL;
+ w_reader = new CWKmerBinReader<KMER_T, SIZE>(Params, Queues);
+ gr2_1.push_back(thread(std::ref(*w_reader)));
+
+ w_sorters.resize(Params.n_sorters);
+
+
+ for(int i = 0; i < Params.n_sorters; ++i)
+ {
+ w_sorters[i] = new CWKmerBinSorter<KMER_T, SIZE>(Params, Queues, i);
+ gr2_2.push_back(thread(std::ref(*w_sorters[i])));
+ }
+
+ w_completer = new CWKmerBinCompleter(Params, Queues);
+ gr2_3.push_back(thread(std::ref(*w_completer)));
+
+ for(auto p = gr2_1.begin(); p != gr2_1.end(); ++p)
+ p->join();
+ for(auto p = gr2_2.begin(); p != gr2_2.end(); ++p)
+ p->join();
+
+
+ for(auto p = gr2_3.begin(); p != gr2_3.end(); ++p)
+ p->join();
+
+
+ // ***** End of Stage 2 *****
+ w_completer->GetTotal(n_unique, n_cutoff_min, n_cutoff_max, n_total);
+
+ thread *release_thr_st2_1 = new thread([&]{
+ delete Queues.mm;
+ if (Queues.pmm_expand)
+ {
+ Queues.pmm_expand->release();
+ delete Queues.pmm_expand;
+ }
+ Queues.pmm_radix_buf->release();
+ Queues.memory_bins->release();
+ delete Queues.pmm_radix_buf;
+ delete Queues.memory_bins;
+ });
+
+
+ uint64 stat_n_plus_x_recs, stat_n_recs, stat_n_recs_tmp, stat_n_plus_x_recs_tmp;
+ stat_n_plus_x_recs = stat_n_recs = stat_n_recs_tmp = stat_n_plus_x_recs_tmp = 0;
+ thread *release_thr_st2_2 = new thread([&]{
+
+ delete w_reader;
+ for(int i = 0; i < Params.n_sorters; ++i)
+ {
+ w_sorters[i]->GetDebugStats(stat_n_recs_tmp, stat_n_plus_x_recs_tmp);
+ stat_n_plus_x_recs += stat_n_plus_x_recs_tmp;
+ stat_n_recs += stat_n_recs_tmp;
+ delete w_sorters[i];
+ }
+ delete w_completer;
+
+ delete Queues.input_files_queue;
+ delete Queues.bq;
+ delete Queues.part_queue;
+ delete Queues.bpq;
+ delete Queues.kq;
+ });
+
+
+
+
+ // ***** Removing temporary files *****
+
+ tmp_size = 0;
+ n_total_super_kmers = 0;
+ Queues.bd->reset_reading();
+ while((bin_id = Queues.bd->get_next_bin()) >= 0)
+ {
+ Queues.bd->read(bin_id, file, name, size, n_rec, n_plus_x_recs, n_super_kmers);
+#ifndef DEVELOP_MODE
+ boost::filesystem::remove(boost::filesystem::path(name));
+#endif // DEVELOP_MODE
+ tmp_size += size;
+ n_total_super_kmers += n_super_kmers;
+ }
+ delete Queues.bd;
+
+ release_thr_st2_1->join();
+ release_thr_st2_2->join();
+
+ //KMC_2
+ //cout << "n_recs: " << stat_n_recs << ", n_plus_x_recs: " << stat_n_plus_x_recs << "(" << (float)(stat_n_plus_x_recs) / stat_n_recs << ")\n";
+
+ delete release_thr_st2_1;
+ delete release_thr_st2_2;
+ delete Queues.s_mapper;
+ w2.stopTimer();
+
+ return true;
+}
+
+//----------------------------------------------------------------------------------
+// Return statistics
+template <typename KMER_T, unsigned SIZE, bool QUAKE_MODE> void CKMC<KMER_T, SIZE, QUAKE_MODE>::GetStats(double &time1,
+ double &time2, uint64 &_n_unique, uint64 &_n_cutoff_min, uint64 &_n_cutoff_max, uint64 &_n_total, uint64 &_n_reads, uint64 &_tmp_size, uint64& _n_total_super_kmers)
+{
+ time1 = w1.getElapsedTime();
+ time2 = w2.getElapsedTime();
+
+ _n_unique = n_unique;
+ _n_cutoff_min = n_cutoff_min;
+ _n_cutoff_max = n_cutoff_max;
+ _n_total = n_total;
+ _n_reads = n_reads;
+ _tmp_size = tmp_size;
+ _n_total_super_kmers = n_total_super_kmers;
+}
+
+#endif
+
+// ***** EOF
diff --git a/kmer_counter/kmer.cpp b/kmer_counter/kmer.cpp
new file mode 100755
index 0000000..557e24e
--- /dev/null
+++ b/kmer_counter/kmer.cpp
@@ -0,0 +1,18 @@
+#include "stdafx.h"
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#include "defs.h"
+#include "kmer.h"
+
+uint32 CKmer<1>::QUALITY_SIZE = 0;
+uint32 CKmerQuake<1>::QUALITY_SIZE = 4;
+
+// ***** EOF
diff --git a/kmer_counter/kmer.h b/kmer_counter/kmer.h
new file mode 100755
index 0000000..f91e9e6
--- /dev/null
+++ b/kmer_counter/kmer.h
@@ -0,0 +1,1049 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#ifndef _KMER_H
+#define _KMER_H
+
+// Important remark: there is no inheritance here to guarantee that all classes defined here are POD according to C++11
+
+#include "meta_oper.h"
+#include <string>
+
+// *************************************************************************
+// Ckmer class for k > 32 with classic kmer counting
+template<unsigned SIZE> struct CKmer {
+ unsigned long long data[SIZE];
+
+
+ typedef unsigned long long data_t;
+ static uint32 QUALITY_SIZE;
+
+ inline void set(const CKmer<SIZE> &x);
+
+ inline void from_kxmer(const CKmer<SIZE>& x, uint32 _shr, const CKmer<SIZE>& _mask);
+
+ template<unsigned X_SIZE> inline void to_kxmer(CKmer<X_SIZE>& x);
+
+ inline void mask(const CKmer<SIZE> &x);
+ inline uint32 end_mask(const uint32 mask);
+ inline void set_2bits(const uint64 x, const uint32 p);
+ inline uchar get_2bits(const uint32 p);
+ inline uchar get_byte(const uint32 p);
+ inline void set_byte(const uint32 p, uchar x);
+ inline void set_bits(const uint32 p, const uint32 n, uint64 x);
+
+ inline void SHL_insert_2bits(const uint64 x);
+ inline void SHR_insert_2bits(const uint64 x, const uint32 p);
+
+ inline void SHR(const uint32 p);
+ inline void SHL(const uint32 p);
+
+ inline uint64 remove_suffix(const uint32 n) const;
+ inline void set_n_1(const uint32 n);
+ inline void set_n_01(const uint32 n);
+
+ inline void store(uchar *&buffer, int32 n);
+ inline void store(uchar *buffer, int32 p, int32 n);
+ inline void load(uchar *&buffer, int32 n);
+
+ inline bool operator==(const CKmer<SIZE> &x);
+ inline bool operator<(const CKmer<SIZE> &x);
+
+ inline void clear(void);
+
+ inline char get_symbol(int p);
+};
+
+template <unsigned SIZE> uint32 CKmer<SIZE>::QUALITY_SIZE = 0;
+
+// *********************************************************************
+template<unsigned SIZE> inline void CKmer<SIZE>::set(const CKmer<SIZE> &x)
+{
+#ifdef USE_META_PROG
+ IterFwd([&](const int &i){
+ data[i] = x.data[i];
+ }, uint_<SIZE-1>());
+#else
+ for(uint32 i = 0; i < SIZE; ++i)
+ data[i] = x.data[i];
+#endif
+}
+
+
+// *********************************************************************
+template<unsigned SIZE>
+template<unsigned X_SIZE> inline void CKmer<SIZE>::to_kxmer(CKmer<X_SIZE>& x)
+{
+ x.data[X_SIZE - 1] = 0;
+#ifdef USE_META_PROG
+ IterFwd([&](const int &i){
+ x.data[i] = data[i];
+ }, uint_<SIZE - 1>());
+#else
+ for (uint32 i = 0; i < SIZE; ++i)
+ x.data[i] = data[i];
+#endif
+}
+
+// *********************************************************************
+template<unsigned SIZE> inline void CKmer<SIZE>::from_kxmer(const CKmer<SIZE>& x, uint32 _shr, const CKmer<SIZE>& _mask)
+{
+ if (_shr)
+ {
+#ifdef USE_META_PROG
+ IterFwd([&](const int &i){
+ data[i] = x.data[i] >> (2 * _shr);
+ data[i] += x.data[i + 1] << (64 - 2 * _shr);
+ }, uint_<SIZE - 2>());
+#else
+ for (uint32 i = 0; i < SIZE - 1; ++i)
+ {
+ data[i] = x.data[i] >> (2 * _shr);
+ data[i] += x.data[i+1]<<(64-2*_shr);
+ }
+#endif
+ data[SIZE - 1] = x.data[SIZE - 1] >> (2 * _shr);
+ }
+ else
+ {
+#ifdef USE_META_PROG
+ IterFwd([&](const int &i){
+ data[i] = x.data[i];
+ }, uint_<SIZE - 1>());
+#else
+ for (uint32 i = 0; i < SIZE; ++i)
+ data[i] = x.data[i];
+#endif
+ }
+ mask(_mask);
+}
+
+
+// *********************************************************************
+template<unsigned SIZE> inline void CKmer<SIZE>::mask(const CKmer<SIZE> &x)
+{
+#ifdef USE_META_PROG
+ IterFwd([&](const int &i){
+ data[i] &= x.data[i];
+ }, uint_<SIZE-1>());
+#else
+ for(uint32 i = 0; i < SIZE; ++i)
+ data[i] &= x.data[i];
+#endif
+}
+
+// *********************************************************************
+template<unsigned SIZE> inline uint32 CKmer<SIZE>::end_mask(const uint32 mask)
+{
+ return data[0] & mask;
+}
+// *********************************************************************
+template<unsigned SIZE> inline void CKmer<SIZE>::set_2bits(const uint64 x, const uint32 p)
+{
+// data[p >> 6] |= x << (p & 63);
+ data[p >> 6] += x << (p & 63);
+}
+
+template<unsigned SIZE> inline uchar CKmer<SIZE>::get_2bits(const uint32 p)
+{
+ return (data[p >> 6] >> (p & 63)) & 3;
+}
+// *********************************************************************
+template<unsigned SIZE> inline void CKmer<SIZE>::SHR_insert_2bits(const uint64 x, const uint32 p)
+{
+#ifdef USE_META_PROG
+ IterFwd([&](const int &i){
+ data[i] >>= 2;
+// data[i] |= data[i+1] << (64-2);
+ data[i] += data[i+1] << (64-2);
+ }, uint_<SIZE-2>());
+#else
+ for(uint32 i = 0; i < SIZE-1; ++i)
+ {
+ data[i] >>= 2;
+// data[i] |= data[i+1] << (64-2);
+ data[i] += data[i+1] << (64-2);
+ }
+#endif
+ data[SIZE-1] >>= 2;
+
+// data[p >> 6] |= x << (p & 63);
+ data[p >> 6] += x << (p & 63);
+}
+
+
+
+// *********************************************************************
+template<unsigned SIZE> inline void CKmer<SIZE>::SHR(const uint32 p)
+{
+#ifdef USE_META_PROG
+ IterFwd([&](const int &i){
+ data[i] >>= 2*p;
+// data[i] |= data[i+1] << (64-2*p);
+ data[i] += data[i+1] << (64-2*p);
+ }, uint_<SIZE-2>());
+#else
+ for(uint32 i = 0; i < SIZE-1; ++i)
+ {
+ data[i] >>= 2*p;
+// data[i] |= data[i+1] << (64-2*p);
+ data[i] += data[i+1] << (64-2*p);
+ }
+#endif
+ data[SIZE-1] >>= 2*p;
+}
+
+// *********************************************************************
+template<unsigned SIZE> inline void CKmer<SIZE>::SHL(const uint32 p)
+{
+#ifdef USE_META_PROG
+ IterRev([&](const int &i){
+ data[i+1] <<= p*2;
+// data[i+1] |= data[i] >> (64-p*2);
+ data[i+1] += data[i] >> (64-p*2);
+ }, uint_<SIZE-2>());
+#else
+ for(uint32 i = SIZE-1; i > 0; --i)
+ {
+ data[i] <<= p*2;
+// data[i] |= data[i-1] >> (64-p*2);
+ data[i] += data[i-1] >> (64-p*2);
+ }
+#endif
+ data[0] <<= p*2;
+}
+
+// *********************************************************************
+template<unsigned SIZE> inline void CKmer<SIZE>::SHL_insert_2bits(const uint64 x)
+{
+#ifdef USE_META_PROG
+ IterRev([&](const int &i){
+ data[i+1] <<= 2;
+// data[i+1] |= data[i] >> (64-2);
+ data[i+1] += data[i] >> (64-2);
+ }, uint_<SIZE-2>());
+#else
+ for(uint32 i = SIZE-1; i > 0; --i)
+ {
+ data[i] <<= 2;
+// data[i] |= data[i-1] >> (64-2);
+ data[i] += data[i-1] >> (64-2);
+ }
+#endif
+ data[0] <<= 2;
+// data[0] |= x;
+ data[0] += x;
+}
+
+// *********************************************************************
+template<unsigned SIZE> inline uchar CKmer<SIZE>::get_byte(const uint32 p)
+{
+ return (data[p >> 3] >> ((p << 3) & 63)) & 0xFF;
+}
+
+// *********************************************************************
+template<unsigned SIZE> inline void CKmer<SIZE>::set_byte(const uint32 p, uchar x)
+{
+// data[p >> 3] |= ((uint64) x) << ((p & 7) << 3);
+ data[p >> 3] += ((uint64) x) << ((p & 7) << 3);
+}
+
+// *********************************************************************
+template<unsigned SIZE> inline void CKmer<SIZE>::set_bits(const uint32 p, const uint32 n, uint64 x)
+{
+// data[p >> 6] |= x << (p & 63);
+ data[p >> 6] += x << (p & 63);
+ if((p >> 6) != ((p+n-1) >> 6))
+// data[(p >> 6) + 1] |= x >> (64 - (p & 63));
+ data[(p >> 6) + 1] += x >> (64 - (p & 63));
+}
+
+// *********************************************************************
+template<unsigned SIZE> inline bool CKmer<SIZE>::operator==(const CKmer<SIZE> &x) {
+ for(uint32 i = 0; i < SIZE; ++i)
+ if(data[i] != x.data[i])
+ return false;
+
+ return true;
+}
+
+// *********************************************************************
+template<unsigned SIZE> inline bool CKmer<SIZE>::operator<(const CKmer<SIZE> &x) {
+ for(int32 i = SIZE-1; i >= 0; --i)
+ if(data[i] < x.data[i])
+ return true;
+ else if(data[i] > x.data[i])
+ return false;
+ return false;
+}
+
+
+
+// *********************************************************************
+template<unsigned SIZE> inline void CKmer<SIZE>::clear(void)
+{
+#ifdef USE_META_PROG
+ IterFwd([&](const int &i){
+ data[i] = 0;
+ }, uint_<SIZE-1>());
+#else
+ for(uint32 i = 0; i < SIZE; ++i)
+ data[i] = 0;
+#endif
+}
+
+// *********************************************************************
+template<unsigned SIZE> inline uint64 CKmer<SIZE>::remove_suffix(const uint32 n) const
+{
+ uint32 p = n >> 6; // / 64;
+ uint32 r = n & 63; // % 64;
+
+ if(p == SIZE-1)
+ return data[p] >> r;
+ else
+// return (data[p+1] << (64-r)) | (data[p] >> r);
+ return (data[p+1] << (64-r)) + (data[p] >> r);
+}
+
+// *********************************************************************
+template<unsigned SIZE> inline void CKmer<SIZE>::set_n_1(const uint32 n)
+{
+ clear();
+
+ for(uint32 i = 0; i < (n >> 6); ++i)
+ data[i] = ~((uint64) 0);
+
+ uint32 r = n & 63;
+
+ if(r)
+ data[n >> 6] = (1ull << r) - 1;
+}
+
+
+// *********************************************************************
+template<unsigned SIZE> inline void CKmer<SIZE>::set_n_01(const uint32 n)
+{
+ clear();
+
+ for(uint32 i = 0; i < n; ++i)
+ if(!(i & 1))
+// data[i >> 6] |= (1ull << (i & 63));
+ data[i >> 6] += (1ull << (i & 63));
+}
+
+// *********************************************************************
+template<unsigned SIZE> inline void CKmer<SIZE>::store(uchar *&buffer, int32 n)
+{
+ for(int32 i = n-1; i >= 0; --i)
+ *buffer++ = get_byte(i);
+}
+
+// *********************************************************************
+template<unsigned SIZE> inline void CKmer<SIZE>::store(uchar *buffer, int32 p, int32 n)
+{
+ for(int32 i = n-1; i >= 0; --i)
+ buffer[p++] = get_byte(i);
+}
+
+// *********************************************************************
+template<unsigned SIZE> inline void CKmer<SIZE>::load(uchar *&buffer, int32 n)
+{
+ clear();
+ for(int32 i = n-1; i >= 0; --i)
+ set_byte(i, *buffer++);
+}
+
+// *********************************************************************
+template<unsigned SIZE> inline char CKmer<SIZE>::get_symbol(int p)
+{
+ uint32 x = (data[p >> 5] >> (2*(p & 31))) & 0x03;
+
+ switch(x)
+ {
+ case 0 : return 'A';
+ case 1 : return 'C';
+ case 2 : return 'G';
+ default: return 'T';
+ }
+}
+
+// *********************************************************************
+// *********************************************************************
+// *********************************************************************
+// *********************************************************************
+// Ckmer class for k <= 32 with classic kmer counting
+template<> struct CKmer<1> {
+ unsigned long long data;
+
+
+ typedef unsigned long long data_t;
+ static uint32 QUALITY_SIZE;
+
+ void set(const CKmer<1> &x);
+
+ void from_kxmer(const CKmer<1>& x, uint32 _shr, const CKmer<1>& _mask);
+
+ template <unsigned X_SIZE> void to_kxmer(CKmer<X_SIZE>& x);
+
+ void mask(const CKmer<1> &x);
+ uint32 end_mask(const uint32 mask);
+ void set_2bits(const uint64 x, const uint32 p);
+ uchar get_2bits(const uint32 p);
+ uchar get_byte(const uint32 p);
+ void set_byte(const uint32 p, uchar x);
+ void set_bits(const uint32 p, const uint32 n, uint64 x);
+
+ void SHL_insert_2bits(const uint64 x);
+ void SHR_insert_2bits(const uint64 x, const uint32 p);
+
+ void SHR(const uint32 p);
+ void SHL(const uint32 p);
+
+ uint64 remove_suffix(const uint32 n) const;
+ void set_n_1(const uint32 n);
+ void set_n_01(const uint32 n);
+
+ void store(uchar *&buffer, int32 n);
+ void store(uchar *buffer, int32 p, int32 n);
+ void load(uchar *&buffer, int32 n);
+
+ bool operator==(const CKmer<1> &x);
+ bool operator<(const CKmer<1> &x);
+
+ void clear(void);
+
+ inline char get_symbol(int p);
+};
+
+
+// *********************************************************************
+template <unsigned X_SIZE> inline void CKmer<1>::to_kxmer(CKmer<X_SIZE>&x)
+{
+ x.data[X_SIZE - 1] = 0;
+ x.data[0] = data;
+}
+
+// *********************************************************************
+template<> inline void CKmer<1>::to_kxmer(CKmer<1>& x)
+{
+ x.data = data;
+}
+
+
+// *********************************************************************
+inline void CKmer<1>::mask(const CKmer<1> &x)
+{
+ data &= x.data;
+}
+
+
+// *********************************************************************
+inline uint32 CKmer<1>::end_mask(const uint32 mask)
+{
+ return data & mask;
+}
+// *********************************************************************
+inline void CKmer<1>::set(const CKmer<1> &x)
+{
+ data = x.data;
+}
+
+// *********************************************************************
+inline void CKmer<1>::from_kxmer(const CKmer<1>& x, uint32 _shr, const CKmer<1>& _mask)
+{
+ data = (x.data >> (2 * _shr)) & _mask.data;
+}
+
+
+// *********************************************************************
+inline void CKmer<1>::set_2bits(const uint64 x, const uint32 p)
+{
+// data |= x << p;
+ data += x << p;
+}
+
+inline uchar CKmer<1>::get_2bits(const uint32 p)
+{
+ return (data >> p) & 3;
+}
+// *********************************************************************
+inline void CKmer<1>::SHR_insert_2bits(const uint64 x, const uint32 p)
+{
+ data >>= 2;
+// data |= x << p;
+ data += x << p;
+}
+
+// *********************************************************************
+inline void CKmer<1>::SHR(const uint32 p)
+{
+ data >>= 2*p;
+}
+
+// *********************************************************************
+inline void CKmer<1>::SHL(const uint32 p)
+{
+ data <<= p*2;
+}
+// *********************************************************************
+inline void CKmer<1>::SHL_insert_2bits(const uint64 x)
+{
+// data = (data << 2) | x;
+ data = (data << 2) + x;
+}
+
+// *********************************************************************
+inline uchar CKmer<1>::get_byte(const uint32 p)
+{
+ return (data >> (p << 3)) & 0xFF;
+}
+
+// *********************************************************************
+inline void CKmer<1>::set_byte(const uint32 p, uchar x)
+{
+// data |= ((uint64) x) << (p << 3);
+ data += ((uint64) x) << (p << 3);
+}
+
+// *********************************************************************
+inline void CKmer<1>::set_bits(const uint32 p, const uint32 n, uint64 x)
+{
+// data |= x << p;
+ data += x << p;
+}
+
+// *********************************************************************
+inline bool CKmer<1>::operator==(const CKmer<1> &x) {
+ return data == x.data;
+}
+
+// *********************************************************************
+inline bool CKmer<1>::operator<(const CKmer<1> &x) {
+ return data < x.data;
+}
+
+// *********************************************************************
+inline void CKmer<1>::clear(void)
+{
+ data = 0ull;
+}
+
+// *********************************************************************
+inline uint64 CKmer<1>::remove_suffix(const uint32 n) const
+{
+ return data >> n;
+}
+
+// *********************************************************************
+inline void CKmer<1>::set_n_1(const uint32 n)
+{
+ if(n == 64)
+ data = ~(0ull);
+ else
+ data = (1ull << n) - 1;
+}
+
+// *********************************************************************
+inline void CKmer<1>::set_n_01(const uint32 n)
+{
+ data = 0ull;
+
+ for(uint32 i = 0; i < n; ++i)
+ if(!(i & 1))
+// data |= (1ull << i);
+ data += (1ull << i);
+}
+
+// *********************************************************************
+inline void CKmer<1>::store(uchar *&buffer, int32 n)
+{
+ for(int32 i = n-1; i >= 0; --i)
+ *buffer++ = get_byte(i);
+}
+
+// *********************************************************************
+inline void CKmer<1>::store(uchar *buffer, int32 p, int32 n)
+{
+ for(int32 i = n-1; i >= 0; --i)
+ buffer[p++] = get_byte(i);
+}
+
+// *********************************************************************
+inline void CKmer<1>::load(uchar *&buffer, int32 n)
+{
+ clear();
+ for(int32 i = n-1; i >= 0; --i)
+ set_byte(i, *buffer++);
+}
+
+
+// *********************************************************************
+char CKmer<1>::get_symbol(int p)
+{
+ uint32 x = (data >> (2*p)) & 0x03;
+
+ switch(x)
+ {
+ case 0 : return 'A';
+ case 1 : return 'C';
+ case 2 : return 'G';
+ default: return 'T';
+ }
+}
+
+
+// *********************************************************************
+// *********************************************************************
+// *********************************************************************
+template<unsigned SIZE> struct CKmerQuake {
+ unsigned long long data[SIZE];
+ float quality;
+
+ typedef unsigned long long data_t;
+ static uint32 QUALITY_SIZE;
+
+ inline void set(const CKmerQuake<SIZE> &x);
+ inline void mask(const CKmerQuake<SIZE> &x);
+ inline void set_2bits(const uint64 x, const uint32 p);
+ inline uchar get_byte(const uint32 p);
+ inline void set_byte(const uint32 p, uchar x);
+ inline void set_bits(const uint32 p, const uint32 n, uint64 x);
+
+ inline void SHL_insert_2bits(const uint64 x);
+ inline void SHR_insert_2bits(const uint64 x, const uint32 p);
+
+ inline uint64 remove_suffix(const uint32 n);
+ inline void set_n_1(const uint32 n);
+ inline void set_n_01(const uint32 n);
+
+ inline void store(uchar *&buffer, int32 n);
+ inline void store(uchar *buffer, int32 p, int32 n);
+ inline void load(uchar *&buffer, int32 n);
+
+ inline bool operator==(const CKmerQuake<SIZE> &x);
+ inline bool operator<(const CKmerQuake<SIZE> &x);
+
+ inline void clear(void);
+
+ inline char get_symbol(int p);
+};
+
+template <unsigned SIZE> uint32 CKmerQuake<SIZE>::QUALITY_SIZE = sizeof(float);
+
+// *********************************************************************
+template<unsigned SIZE> void CKmerQuake<SIZE>::set(const CKmerQuake<SIZE> &x)
+{
+#ifdef USE_META_PROG
+ IterFwd([&](const int &i){
+ data[i] = x.data[i];
+ }, uint_<SIZE-1>());
+#else
+ for(uint32 i = 0; i < SIZE; ++i)
+ data[i] = x.data[i];
+#endif
+ quality = x.quality;
+}
+
+// *********************************************************************
+template<unsigned SIZE> void CKmerQuake<SIZE>::mask(const CKmerQuake<SIZE> &x)
+{
+#ifdef USE_META_PROG
+ IterFwd([&](const int &i){
+ data[i] &= x.data[i];
+ }, uint_<SIZE-1>());
+#else
+ for(uint32 i = 0; i < SIZE; ++i)
+ data[i] &= x.data[i];
+#endif
+}
+
+// *********************************************************************
+template<unsigned SIZE> void CKmerQuake<SIZE>::set_2bits(const uint64 x, const uint32 p)
+{
+// data[p >> 6] |= x << (p & 63);
+ data[p >> 6] += x << (p & 63);
+}
+
+// *********************************************************************
+template<unsigned SIZE> void CKmerQuake<SIZE>::SHR_insert_2bits(const uint64 x, const uint32 p)
+{
+#ifdef USE_META_PROG
+ IterFwd([&](const int &i){
+ data[i] >>= 2;
+// data[i] |= data[i+1] << (64-2);
+ data[i] += data[i+1] << (64-2);
+ }, uint_<SIZE-2>());
+#else
+ for(uint32 i = 0; i < SIZE-1; ++i)
+ {
+ data[i] >>= 2;
+// data[i] |= data[i+1] << (64-2);
+ data[i] += data[i+1] << (64-2);
+ }
+#endif
+ data[SIZE-1] >>= 2;
+
+// data[p >> 6] |= x << (p & 63);
+ data[p >> 6] += x << (p & 63);
+}
+
+// *********************************************************************
+template<unsigned SIZE> void CKmerQuake<SIZE>::SHL_insert_2bits(const uint64 x)
+{
+#ifdef USE_META_PROG
+ IterRev([&](const int &i){
+ data[i+1] <<= 2;
+// data[i+1] |= data[i] >> (64-2);
+ data[i+1] += data[i] >> (64-2);
+ }, uint_<SIZE-2>());
+#else
+ for(uint32 i = SIZE-1; i > 0; --i)
+ {
+ data[i] <<= 2;
+// data[i] |= data[i-1] >> (64-2);
+ data[i] += data[i-1] >> (64-2);
+ }
+#endif
+ data[0] <<= 2;
+// data[0] |= x;
+ data[0] += x;
+}
+
+// *********************************************************************
+template<unsigned SIZE> uchar CKmerQuake<SIZE>::get_byte(const uint32 p)
+{
+ return (data[p >> 3] >> ((p << 3) & 63)) & 0xFF;
+}
+
+// *********************************************************************
+template<unsigned SIZE> void CKmerQuake<SIZE>::set_byte(const uint32 p, uchar x)
+{
+// data[p >> 3] |= ((uint64) x) << ((p & 7) << 3);
+ data[p >> 3] += ((uint64) x) << ((p & 7) << 3);
+}
+
+// *********************************************************************
+template<unsigned SIZE> void CKmerQuake<SIZE>::set_bits(const uint32 p, const uint32 n, uint64 x)
+{
+// data[p >> 6] |= x << (p & 63);
+ data[p >> 6] += x << (p & 63);
+ if((p >> 6) != ((p+n-1) >> 6))
+// data[(p >> 6) + 1] |= x >> (64 - (p & 63));
+ data[(p >> 6) + 1] += x >> (64 - (p & 63));
+}
+
+// *********************************************************************
+template<unsigned SIZE> bool CKmerQuake<SIZE>::operator==(const CKmerQuake<SIZE> &x) {
+ for(uint32 i = 0; i < SIZE; ++i)
+ if(data[i] != x.data[i])
+ return false;
+
+ return true;
+}
+
+// *********************************************************************
+template<unsigned SIZE> bool CKmerQuake<SIZE>::operator<(const CKmerQuake<SIZE> &x) {
+ for(int32 i = SIZE-1; i >= 0; --i)
+ if(data[i] < x.data[i])
+ return true;
+ else if(data[i] > x.data[i])
+ return false;
+
+ return false;
+}
+
+// *********************************************************************
+template<unsigned SIZE> void CKmerQuake<SIZE>::clear(void)
+{
+#ifdef USE_META_PROG
+ IterFwd([&](const int &i){
+ data[i] = 0;
+ }, uint_<SIZE-1>());
+#else
+ for(uint32 i = 0; i < SIZE; ++i)
+ data[i] = 0;
+#endif
+ quality = 0.0;
+}
+
+// *********************************************************************
+template<unsigned SIZE> uint64 CKmerQuake<SIZE>::remove_suffix(const uint32 n)
+{
+ uint32 p = n >> 6; // / 64;
+ uint32 r = n & 63; // % 64;
+
+ if(p == SIZE-1)
+ return data[p] >> r;
+ else
+// return (data[p+1] << (64-r)) | (data[p] >> r);
+ return (data[p+1] << (64-r)) + (data[p] >> r);
+}
+
+// *********************************************************************
+template<unsigned SIZE> void CKmerQuake<SIZE>::set_n_1(const uint32 n)
+{
+ clear();
+
+ for(uint32 i = 0; i < (n >> 6); ++i)
+ data[i] = ~((uint64) 0);
+
+ uint32 r = n & 63;
+
+ if(r)
+ data[n >> 6] = (1ull << r) - 1;
+
+ quality = 0.0;
+}
+
+// *********************************************************************
+template<unsigned SIZE> void CKmerQuake<SIZE>::set_n_01(const uint32 n)
+{
+ clear();
+
+ for(uint32 i = 0; i < n; ++i)
+ if(!(i & 1))
+// data[i >> 6] |= (1ull << (i & 63));
+ data[i >> 6] += (1ull << (i & 63));
+
+ quality = 0.0;
+}
+
+// *********************************************************************
+template<unsigned SIZE> void CKmerQuake<SIZE>::store(uchar *&buffer, int32 n)
+{
+ for(int32 i = n-1; i >= 0; --i)
+ *buffer++ = get_byte(i);
+
+ memcpy(buffer, &quality, sizeof(quality));
+ buffer += sizeof(quality);
+}
+
+// *********************************************************************
+template<unsigned SIZE> void CKmerQuake<SIZE>::store(uchar *buffer, int32 p, int32 n)
+{
+ for(int32 i = n-1; i >= 0; --i)
+ buffer[p++] = get_byte(i);
+
+ memcpy(buffer+p, &quality, sizeof(quality));
+}
+
+// *********************************************************************
+template<unsigned SIZE> void CKmerQuake<SIZE>::load(uchar *&buffer, int32 n)
+{
+ clear();
+ for(int32 i = n-1; i >= 0; --i)
+ set_byte(i, *buffer++);
+
+ memcpy(&quality, buffer, sizeof(quality));
+ buffer += sizeof(quality);
+}
+
+// *********************************************************************
+template<unsigned SIZE> char CKmerQuake<SIZE>::get_symbol(int p)
+{
+ uint32 x = (data[p >> 5] >> (2*(p & 31))) & 0x03;
+
+ switch(x)
+ {
+ case 0 : return 'A';
+ case 1 : return 'C';
+ case 2 : return 'G';
+ default: return 'T';
+ }
+}
+
+
+// *********************************************************************
+// *********************************************************************
+// *********************************************************************
+template<> struct CKmerQuake<1> {
+ unsigned long long data;
+ float quality;
+
+ typedef unsigned long long data_t;
+ static uint32 QUALITY_SIZE;
+
+ void set(const CKmerQuake<1> &x);
+ void mask(const CKmerQuake<1> &x);
+ void set_2bits(const uint64 x, const uint32 p);
+ uchar get_byte(const uint32 p);
+ void set_byte(const uint32 p, uchar x);
+ void set_bits(const uint32 p, const uint32 n, uint64 x);
+
+ void SHL_insert_2bits(const uint64 x);
+ void SHR_insert_2bits(const uint64 x, const uint32 p);
+
+ uint64 remove_suffix(const uint32 n);
+ void set_n_1(const uint32 n);
+ void set_n_01(const uint32 n);
+
+ void store(uchar *&buffer, int32 n);
+ void store(uchar *buffer, int32 p, int32 n);
+ void load(uchar *&buffer, int32 n);
+
+ bool operator==(const CKmerQuake<1> &x);
+ bool operator<(const CKmerQuake<1> &x);
+
+ void clear(void);
+
+ inline char get_symbol(int p);
+};
+
+// *********************************************************************
+inline void CKmerQuake<1>::set(const CKmerQuake<1> &x)
+{
+ data = x.data;
+ quality = x.quality;
+}
+
+// *********************************************************************
+inline void CKmerQuake<1>::mask(const CKmerQuake<1> &x)
+{
+ data &= x.data;
+}
+
+// *********************************************************************
+inline void CKmerQuake<1>::set_2bits(const uint64 x, const uint32 p)
+{
+// data |= x << p;
+ data += x << p;
+}
+
+// *********************************************************************
+inline void CKmerQuake<1>::SHR_insert_2bits(const uint64 x, const uint32 p)
+{
+ data >>= 2;
+// data |= x << p;
+ data += x << p;
+}
+
+// *********************************************************************
+inline void CKmerQuake<1>::SHL_insert_2bits(const uint64 x)
+{
+// data = (data << 2) | x;
+ data = (data << 2) + x;
+}
+
+// *********************************************************************
+inline uchar CKmerQuake<1>::get_byte(const uint32 p)
+{
+ return (data >> (p << 3)) & 0xFF;
+}
+
+// *********************************************************************
+inline void CKmerQuake<1>::set_byte(const uint32 p, uchar x)
+{
+// data |= ((uint64) x) << (p << 3);
+ data += ((uint64) x) << (p << 3);
+}
+
+// *********************************************************************
+inline void CKmerQuake<1>::set_bits(const uint32 p, const uint32 n, uint64 x)
+{
+// data |= x << p;
+ data += x << p;
+}
+
+// *********************************************************************
+inline bool CKmerQuake<1>::operator==(const CKmerQuake<1> &x) {
+ return data == x.data;
+}
+
+// *********************************************************************
+inline bool CKmerQuake<1>::operator<(const CKmerQuake<1> &x) {
+ return data < x.data;
+}
+
+// *********************************************************************
+inline void CKmerQuake<1>::clear(void)
+{
+ data = 0;
+ quality = 0.0;
+}
+
+// *********************************************************************
+inline uint64 CKmerQuake<1>::remove_suffix(const uint32 n)
+{
+ return data >> n;
+}
+
+// *********************************************************************
+inline void CKmerQuake<1>::set_n_1(const uint32 n)
+{
+ if(n == 64)
+ data = ~(0ull);
+ else
+ data = (1ull << n) - 1;
+
+ quality = 0.0;
+}
+
+// *********************************************************************
+inline void CKmerQuake<1>::set_n_01(const uint32 n)
+{
+ data = 0ull;
+
+ for(uint32 i = 0; i < n; ++i)
+ if(!(i & 1))
+// data |= (1ull << i);
+ data += (1ull << i);
+
+ quality = 0.0;
+}
+
+// *********************************************************************
+inline void CKmerQuake<1>::store(uchar *&buffer, int32 n)
+{
+ for(int32 i = n-1; i >= 0; --i)
+ *buffer++ = get_byte(i);
+
+ memcpy(buffer, &quality, sizeof(quality));
+ buffer += sizeof(quality);
+}
+
+// *********************************************************************
+inline void CKmerQuake<1>::store(uchar *buffer, int32 p, int32 n)
+{
+ for(int32 i = n-1; i >= 0; --i)
+ buffer[p++] = get_byte(i);
+
+ memcpy(buffer+p, &quality, sizeof(quality));
+}
+
+// *********************************************************************
+inline void CKmerQuake<1>::load(uchar *&buffer, int32 n)
+{
+ clear();
+ for(int32 i = n-1; i >= 0; --i)
+ set_byte(i, *buffer++);
+
+ memcpy(&quality, buffer, sizeof(quality));
+ buffer += sizeof(quality);
+}
+
+// *********************************************************************
+char CKmerQuake<1>::get_symbol(int p)
+{
+ uint32 x = (data >> (2*p)) & 0x03;
+
+ switch(x)
+ {
+ case 0 : return 'A';
+ case 1 : return 'C';
+ case 2 : return 'G';
+ default: return 'T';
+ }
+}
+
+#endif
+
+// ***** EOF
+
+
diff --git a/kmer_counter/kmer_counter.cpp b/kmer_counter/kmer_counter.cpp
new file mode 100755
index 0000000..bd91c37
--- /dev/null
+++ b/kmer_counter/kmer_counter.cpp
@@ -0,0 +1,390 @@
+#include "stdafx.h"
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#include <fstream>
+#include <string>
+#include <vector>
+#include <time.h>
+#include <functional>
+#include "timer.h"
+#include "kmc.h"
+#include "meta_oper.h"
+
+using namespace std;
+
+uint64 total_reads, total_fastq_size;
+
+void usage();
+bool parse_parameters(int argc, char *argv[]);
+
+CKMCParams Params;
+
+//----------------------------------------------------------------------------------
+// Application class
+// Template parameters:
+// * KMER_TPL - k-mer class
+// * SIZE - maximal size of the k-mer (divided by 32)
+template<template<unsigned X> class KMER_TPL, unsigned SIZE, bool QUAKE_MODE> class CApplication
+{
+ CApplication<KMER_TPL, SIZE - 1, QUAKE_MODE> *app_1;
+ CKMC<KMER_TPL<SIZE>, SIZE, QUAKE_MODE> *kmc;
+ int p_k;
+ bool is_selected;
+
+public:
+ CApplication(CKMCParams &Params) {
+ p_k = Params.p_k;
+ is_selected = p_k <= (int32) SIZE * 32 && p_k > ((int32) SIZE-1)*32;
+
+ app_1 = new CApplication<KMER_TPL, SIZE - 1, QUAKE_MODE>(Params);
+ if(is_selected)
+ {
+ kmc = new CKMC<KMER_TPL<SIZE>, SIZE, QUAKE_MODE>;
+ kmc->SetParams(Params);
+ }
+ else
+ {
+ kmc = NULL;
+ }
+ };
+ ~CApplication() {
+ delete app_1;
+ if (kmc)
+ delete kmc;
+ }
+
+ void GetStats(double &time1, double &time2, uint64 &_n_unique, uint64 &_n_cutoff_min, uint64 &_n_cutoff_max, uint64 &_n_total, uint64 &_n_reads, uint64 &_tmp_size, uint64& _n_total_super_kmers) {
+ if (is_selected)
+ {
+ kmc->GetStats(time1, time2, _n_unique, _n_cutoff_min, _n_cutoff_max, _n_total, _n_reads, _tmp_size, _n_total_super_kmers);
+ }
+ else
+ app_1->GetStats(time1, time2, _n_unique, _n_cutoff_min, _n_cutoff_max, _n_total, _n_reads, _tmp_size, _n_total_super_kmers);
+ }
+
+ bool Process() {
+ if (is_selected)
+ {
+ return kmc->Process();
+ }
+ else
+ return app_1->Process();
+ }
+};
+
+//----------------------------------------------------------------------------------
+// Specialization of the application class for the SIZE=1
+template<template<unsigned X> class KMER_TPL, bool QUAKE_MODE> class CApplication<KMER_TPL, 1, QUAKE_MODE>
+{
+ CKMC<KMER_TPL<1>, 1, QUAKE_MODE> *kmc;
+ int p_k;
+ bool is_selected;
+
+public:
+ CApplication(CKMCParams &Params) {
+ is_selected = Params.p_k <= 32;
+ if(is_selected)
+ {
+ kmc = new CKMC<KMER_TPL<1>, 1, QUAKE_MODE>;
+ kmc->SetParams(Params);
+ }
+ else
+ {
+ kmc = NULL;
+ }
+ };
+ ~CApplication() {
+ if(kmc)
+ delete kmc;
+ };
+
+ void GetStats(double &time1, double &time2, uint64 &_n_unique, uint64 &_n_cutoff_min, uint64 &_n_cutoff_max, uint64 &_n_total, uint64 &_n_reads, uint64 &_tmp_size, uint64& _n_total_super_kmers) {
+ if (is_selected)
+ {
+ if(kmc)
+ kmc->GetStats(time1, time2, _n_unique, _n_cutoff_min, _n_cutoff_max, _n_total, _n_reads, _tmp_size, _n_total_super_kmers);
+ }
+ }
+
+ bool Process() {
+ if (is_selected)
+ {
+ return kmc->Process();
+ }
+ return false;
+ }
+};
+
+
+//----------------------------------------------------------------------------------
+// Show execution options of the software
+void usage()
+{
+ cout << "K-Mer Counter (KMC) ver. " << KMC_VER << " (" << KMC_DATE << ")\n";
+ cout << "Usage:\n kmc [options] <input_file_name> <output_file_name> <working_directory>\n";
+ cout << " kmc [options] <@input_file_names> <output_file_name> <working_directory>\n";
+ cout << "Parameters:\n";
+ cout << " input_file_name - single file in FASTQ format (gziped or not)\n";
+ cout << " @input_file_names - file name with list of input files in FASTQ format (gziped or not)\n";
+ cout << "Options:\n";
+ cout << " -v - verbose mode (shows all parameter settings); default: false\n";
+ cout << " -k<len> - k-mer length (k from " << MIN_K << " to " << MAX_K << "; default: 25\n";
+ cout << " -m<size> - max amount of RAM in GB (from 4 to 1024); default: 12\n";
+ cout << " -p<par> - signature length (5, 6, 7, 8); default: 7\n";
+ cout << " -f<a/q/m> - input in FASTA format (-fa), FASTQ format (-fq) or mulit FASTA (-fm); default: FASTQ\n";
+ cout << " -q[value] - use Quake's compatible counting with [value] representing lowest quality (default: 33)\n";
+ cout << " -ci<value> - exclude k-mers occurring less than <value> times (default: 2)\n";
+ cout << " -cs<value> - maximal value of a counter (default: 255)\n";
+ cout << " -cx<value> - exclude k-mers occurring more of than <value> times (default: 1e9)\n";
+ cout << " -b - turn off transformation of k-mers into canonical form\n";
+ cout << " -r - turn on RAM-only mode \n";
+ cout << " -t<value> - total number of threads (default: no. of CPU cores)\n";
+ cout << " -sf<value> - number of FASTQ reading threads\n";
+ cout << " -sp<value> - number of splitting threads\n";
+ cout << " -sr<value> - number of sorter threads\n";
+ cout << " -so<value> - number of threads per single sorter\n";
+ cout << "Example:\n";
+ cout << "kmc -k27 -m24 NA19238.fastq NA.res \\data\\kmc_tmp_dir\\\n";
+ cout << "kmc -k27 -q -m24 @files.lst NA.res \\data\\kmc_tmp_dir\\\n";
+}
+
+//----------------------------------------------------------------------------------
+// Parse the parameters
+bool parse_parameters(int argc, char *argv[])
+{
+ int i;
+ int tmp;
+
+ if(argc < 4)
+ return false;
+
+ for(i = 1 ; i < argc; ++i)
+ {
+ if(argv[i][0] != '-')
+ break;
+ // Number of threads
+ if(strncmp(argv[i], "-t", 2) == 0)
+ Params.p_t = atoi(&argv[i][2]);
+// else
+ // k-mer length
+ if(strncmp(argv[i], "-k", 2) == 0)
+ {
+ tmp = atoi(&argv[i][2]);
+ if(tmp < MIN_K || tmp > MAX_K)
+ {
+ cout << "Wrong parameter: k must be from range <" << MIN_K << "," << MAX_K << ">\n";
+ return false;
+ }
+ else
+ Params.p_k = tmp;
+ }
+ // Memory limit
+ else if(strncmp(argv[i], "-m", 2) == 0)
+ {
+ tmp = atoi(&argv[i][2]);
+ if(tmp < MIN_MEM)
+ {
+ cout << "Wrong parameret: min memory must be at least " << MIN_MEM << "GB\n";
+ return false;
+ }
+ else
+ Params.p_m = tmp;
+ }
+ // Minimum counter threshold
+ else if(strncmp(argv[i], "-ci", 3) == 0)
+ Params.p_ci = atoi(&argv[i][3]);
+ // Maximum counter threshold
+ else if(strncmp(argv[i], "-cx", 3) == 0)
+ Params.p_cx = atoi(&argv[i][3]);
+ // Maximal counter value
+ else if(strncmp(argv[i], "-cs", 3) == 0)
+ Params.p_cs = atoi(&argv[i][3]);
+ // Quake mode
+ else if(strncmp(argv[i], "-q", 2) == 0)
+ {
+ Params.p_quake = true;
+ if(strlen(argv[i]) > 2)
+ Params.p_quality = atoi(argv[i]+2);
+ }
+ // Set p1
+ else if (strncmp(argv[i], "-p", 2) == 0)
+ {
+ tmp = atoi(&argv[i][2]);
+ if (tmp < MIN_SL || tmp > MAX_SL)
+ {
+ cout << "Wrong parameter: p must be from range <" << MIN_SL << "," << MAX_SL << ">\n";
+ return false;
+ }
+ else
+ Params.p_p1 = tmp;
+ }
+ // FASTA input files
+ else if(strncmp(argv[i], "-fa", 3) == 0)
+ Params.p_file_type = fasta;
+ // FASTQ input files
+ else if(strncmp(argv[i], "-fq", 3) == 0)
+ Params.p_file_type = fastq;
+ else if(strncmp(argv[i], "-fm", 3) == 0)
+ Params.p_file_type = multiline_fasta;
+ else if(strncmp(argv[i], "-v", 2) == 0)
+ Params.p_verbose = true;
+ else if (strncmp(argv[i], "-r", 2) == 0)
+ Params.p_mem_mode = true;
+ else if(strncmp(argv[i], "-b", 2) == 0)
+ Params.p_both_strands = false;
+ // Number of reading threads
+ else if(strncmp(argv[i], "-sf", 3) == 0)
+ {
+ tmp = atoi(&argv[i][3]);
+ if(tmp < MIN_SF || tmp > MAX_SF)
+ {
+ cout << "Wrong parameter: number of reading thread must be from range <" << MIN_SF << "," << MAX_SF << ">\n";
+ return false;
+ }
+ else
+ Params.p_sf = tmp;
+ }
+ // Number of splitting threads
+ else if(strncmp(argv[i], "-sp", 3) == 0)
+ {
+ tmp = atoi(&argv[i][3]);
+ if(tmp < MIN_SP || tmp > MAX_SP)
+ {
+ cout << "Wrong parameter: number of splitting threads must be in range <" << MIN_SP << "," << MAX_SP << "<\n";
+ return false;
+ }
+ else
+ Params.p_sp = tmp;
+ }
+ // Number of sorting threads
+ else if(strncmp(argv[i], "-so", 3) == 0)
+ {
+ tmp = atoi(&argv[i][3]);
+ if(tmp < MIN_SO || tmp > MAX_SO)
+ {
+ cout << "Wrong parameter: number of sorter threads must be in range <" << MIN_SO << "," << MAX_SO << "\n";
+ return false;
+ }
+ else
+ Params.p_so = tmp;
+ }
+ // Number of internal sorting threads (per single sorter)
+ else if(strncmp(argv[i], "-sr", 3) == 0)
+ {
+ tmp = atoi(&argv[i][3]);
+ if(tmp < MIN_SR || tmp > MAX_SR)
+ {
+ cout << "Wrong parameter: number of sotring threads per single sorter must be in range <" << MIN_SR << "," << MAX_SR << "\n";
+ return false;
+ }
+ else
+ Params.p_sr = tmp;
+ }
+ }
+
+ if(argc - i < 3)
+ return false;
+
+ string input_file_name = string(argv[i++]);
+ Params.output_file_name = string(argv[i++]);
+ Params.working_directory = string(argv[i++]);
+
+ Params.input_file_names.clear();
+ if(input_file_name[0] != '@')
+ Params.input_file_names.push_back(input_file_name);
+ else
+ {
+ ifstream in(input_file_name.c_str()+1);
+ if(!in.good())
+ {
+ cout << "Error: No " << input_file_name.c_str()+1 << " file\n";
+ return false;
+ }
+
+ string s;
+ while(getline(in, s))
+ if(s != "")
+ Params.input_file_names.push_back(s);
+
+ in.close();
+ random_shuffle(Params.input_file_names.begin(), Params.input_file_names.end());
+ }
+
+ return true;
+}
+
+//----------------------------------------------------------------------------------
+// Main function
+int _tmain(int argc, _TCHAR* argv[])
+{
+ CStopWatch w0, w1;
+ double time1, time2;
+ uint64 n_unique, n_cutoff_min, n_cutoff_max, n_total, n_reads, tmp_size, n_total_super_kmers;
+
+ omp_set_num_threads(1);
+
+#ifdef WIN32
+ _setmaxstdio(2040);
+#endif
+
+ if(!parse_parameters(argc, argv))
+ {
+ usage();
+ return 0;
+ }
+
+ if(Params.p_quake)
+ {
+ CApplication<CKmerQuake, KMER_WORDS, true> *app = new CApplication<CKmerQuake, KMER_WORDS, true>(Params);
+
+ if(!app->Process())
+ {
+ cout << "Not enough memory or some other error\n";
+ delete app;
+ return 0;
+ }
+ app->GetStats(time1, time2, n_unique, n_cutoff_min, n_cutoff_max, n_total, n_reads, tmp_size, n_total_super_kmers);
+ delete app;
+ }
+ else
+ {
+ CApplication<CKmer, KMER_WORDS, false> *app = new CApplication<CKmer, KMER_WORDS, false>(Params);
+
+ if(!app->Process())
+ {
+ cout << "Not enough memory or some other error\n";
+ delete app;
+ return 0;
+ }
+ app->GetStats(time1, time2, n_unique, n_cutoff_min, n_cutoff_max, n_total, n_reads, tmp_size, n_total_super_kmers);
+ delete app;
+ }
+
+ cout << "1st stage: " << time1 << "s\n";
+ cout << "2nd stage: " << time2 << "s\n";
+ cout << "Total : " << (time1+time2) << "s\n";
+ //cout << "Tmp size : " << tmp_size / (1 << 20) << "MB\n";
+ cout << "Tmp size : " << tmp_size / 1000000 << "MB\n";
+ cout << "\nStats:\n";
+ cout << " No. of k-mers below min. threshold : " << setw(12) << n_cutoff_min << "\n";
+ cout << " No. of k-mers above max. threshold : " << setw(12) << n_cutoff_max << "\n";
+ cout << " No. of unique k-mers : " << setw(12) << n_unique << "\n";
+ cout << " No. of unique counted k-mers : " << setw(12) << n_unique-n_cutoff_min-n_cutoff_max << "\n";
+ cout << " Total no. of k-mers : " << setw(12) << n_total << "\n";
+if(Params.p_file_type != multiline_fasta)
+ cout << " Total no. of reads : " << setw(12) << n_reads << "\n";
+else
+ cout << " Total no. of sequences : " << setw(12) << n_reads << "\n";
+ cout << " Total no. of super-k-mers : " << setw(12) << n_total_super_kmers << "\n";
+ return 0;
+}
+
+// ***** EOF
diff --git a/kmer_counter/kmer_counter.vcxproj b/kmer_counter/kmer_counter.vcxproj
new file mode 100755
index 0000000..62569d0
--- /dev/null
+++ b/kmer_counter/kmer_counter.vcxproj
@@ -0,0 +1,228 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Debug|x64">
+ <Configuration>Debug</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|x64">
+ <Configuration>Release</Configuration>
+ <Platform>x64</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{8C8B90DA-28B7-4D82-81F3-C0E7CE52D59F}</ProjectGuid>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>kmer_counter</RootNamespace>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <CharacterSet>NotSet</CharacterSet>
+ <PlatformToolset>v120</PlatformToolset>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <CharacterSet>NotSet</CharacterSet>
+ <UseOfMfc>Static</UseOfMfc>
+ <PlatformToolset>v120</PlatformToolset>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>NotSet</CharacterSet>
+ <PlatformToolset>v120</PlatformToolset>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>NotSet</CharacterSet>
+ <UseOfMfc>Static</UseOfMfc>
+ <PlatformToolset>v120</PlatformToolset>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <LinkIncremental>true</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <LinkIncremental>true</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>Use</PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <OpenMPSupport>true</OpenMPSupport>
+ <AdditionalIncludeDirectories>C:\Kompilatory\boost\boost_1_48_0;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <AdditionalLibraryDirectories>C:\Kompilatory\boost\boost_1_48_0\stage\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+ <ClCompile>
+ <PrecompiledHeader>Use</PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>c:\boost_1_55_0;C:\Kompilatory\boost\boost_1_51_0;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <OpenMPSupport>true</OpenMPSupport>
+ <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ <AdditionalOptions>/D "_VARIADIC_MAX=10" %(AdditionalOptions)</AdditionalOptions>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <AdditionalLibraryDirectories>C:\boost_1_55_0\stage\lib;C:\kompilatory\boost\boost_1_51_0\stage\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ <IgnoreAllDefaultLibraries>
+ </IgnoreAllDefaultLibraries>
+ <IgnoreSpecificDefaultLibraries>libcmt.lib</IgnoreSpecificDefaultLibraries>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>Use</PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>C:\Kompilatory\boost\boost_1_51_0;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <AdditionalLibraryDirectories>C:\Kompilatory\boost\boost_1_48_0\stage\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>Use</PrecompiledHeader>
+ <Optimization>Full</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <MultiProcessorCompilation>true</MultiProcessorCompilation>
+ <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
+ <OpenMPSupport>true</OpenMPSupport>
+ <AdditionalIncludeDirectories>c:\boost_1_55_0;C:\Kompilatory\boost\boost_1_54_0;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <InlineFunctionExpansion>Default</InlineFunctionExpansion>
+ <EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
+ <AdditionalOptions>/D "_VARIADIC_MAX=10"</AdditionalOptions>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>false</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <AdditionalLibraryDirectories>C:\boost_1_55_0\stage\lib;C:\Kompilatory\boost\boost_1_54_0\stage\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <None Include="ReadMe.txt" />
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="defs.h" />
+ <ClInclude Include="fastq_reader.h" />
+ <ClInclude Include="kb_collector.h" />
+ <ClInclude Include="kb_completer.h" />
+ <ClInclude Include="kb_reader.h" />
+ <ClInclude Include="kb_sorter.h" />
+ <ClInclude Include="kb_storer.h" />
+ <ClInclude Include="kmc.h" />
+ <ClInclude Include="kmer.h" />
+ <ClInclude Include="kxmer_set.h" />
+ <ClInclude Include="libs\asmlib.h" />
+ <ClInclude Include="libs\bzlib.h" />
+ <ClInclude Include="libs\bzlib_private.h" />
+ <ClInclude Include="libs\zconf.h" />
+ <ClInclude Include="libs\zlib.h" />
+ <ClInclude Include="mem_disk_file.h" />
+ <ClInclude Include="meta_oper.h" />
+ <ClInclude Include="mmer.h" />
+ <ClInclude Include="rev_byte.h" />
+ <ClInclude Include="s_mapper.h" />
+ <ClInclude Include="params.h" />
+ <ClInclude Include="queues.h" />
+ <ClInclude Include="radix.h" />
+ <ClInclude Include="splitter.h" />
+ <ClInclude Include="stdafx.h" />
+ <ClInclude Include="targetver.h" />
+ <ClInclude Include="timer.h" />
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="fastq_reader.cpp" />
+ <ClCompile Include="kb_completer.cpp" />
+ <ClCompile Include="kb_storer.cpp" />
+ <ClCompile Include="kmer.cpp" />
+ <ClCompile Include="kmer_counter.cpp" />
+ <ClCompile Include="mem_disk_file.cpp" />
+ <ClCompile Include="mmer.cpp" />
+ <ClCompile Include="radix.cpp" />
+ <ClCompile Include="rev_byte.cpp" />
+ <ClCompile Include="stdafx.cpp">
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
+ </ClCompile>
+ <ClCompile Include="timer.cpp" />
+ </ItemGroup>
+ <ItemGroup>
+ <Reference Include="System" />
+ <Reference Include="System.Data" />
+ <Reference Include="System.Drawing" />
+ <Reference Include="System.Windows.Forms" />
+ <Reference Include="System.Xml" />
+ </ItemGroup>
+ <ItemGroup>
+ <Library Include="libs\alibcof64.lib" />
+ <Library Include="libs\libbzip2.lib" />
+ <Library Include="libs\zlibstat.lib" />
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/kmer_counter/kxmer_set.h b/kmer_counter/kxmer_set.h
new file mode 100755
index 0000000..633169e
--- /dev/null
+++ b/kmer_counter/kxmer_set.h
@@ -0,0 +1,118 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+#ifndef _KXMER_SET_
+#define _KXMER_SET_
+#include "defs.h"
+#include <tuple>
+
+using namespace std;
+
+#define KXMER_SET_SIZE 1024 //KMC_2: temporarily
+
+
+
+template <typename KMER_T, unsigned SIZE>
+class CKXmerSet
+{
+ typedef tuple<uint64, uint64, uint32> elem_desc_t; //start_pos, end_pos, shr
+ typedef pair<KMER_T, uint32> heap_elem_t; //kxmer val, desc_id
+ elem_desc_t data_desc[KXMER_SET_SIZE];
+ heap_elem_t data[KXMER_SET_SIZE];
+ uint32 pos;
+ uint32 desc_pos;
+ KMER_T mask;
+
+ KMER_T* buffer;
+
+ inline void update_heap()
+ {
+ uint32 desc_id = data[1].second;
+ KMER_T kmer;
+ if (++get<0>(data_desc[desc_id]) < get<1>(data_desc[desc_id]))
+ {
+ kmer.from_kxmer(buffer[get<0>(data_desc[desc_id])], get<2>(data_desc[desc_id]), mask);
+ }
+ else
+ {
+ kmer.set(data[--pos].first);
+ desc_id = data[pos].second;
+ }
+
+ uint32 parent, less;
+ parent = less = 1;
+ while (true)
+ {
+ if (parent * 2 >= pos)
+ break;
+ if (parent * 2 + 1 >= pos)
+ less = parent * 2;
+ else if (data[parent * 2].first < data[parent * 2 + 1].first)
+ less = parent * 2;
+ else
+ less = parent * 2 + 1;
+ if (data[less].first < kmer)
+ {
+ data[parent] = data[less];
+ parent = less;
+ }
+ else
+ break;
+ }
+ data[parent] = make_pair(kmer, desc_id);
+ }
+
+public:
+ CKXmerSet(uint32 kmer_len)
+ {
+ pos = 1;
+ mask.set_n_1(kmer_len * 2);
+ desc_pos = 0;
+ }
+ inline void init_add(uint64 start_pos, uint64 end_pos, uint32 shr)
+ {
+ data_desc[desc_pos] = make_tuple(start_pos, end_pos, shr);
+ data[pos].first.from_kxmer(buffer[start_pos], shr, mask);
+ data[pos].second = desc_pos;
+ uint32 child_pos = pos++;
+
+ while (child_pos > 1 && data[child_pos].first < data[child_pos / 2].first)
+ {
+ swap(data[child_pos], data[child_pos / 2]);
+ child_pos /= 2;
+ }
+ ++desc_pos;
+ }
+ inline void set_buffer(KMER_T* _buffer)
+ {
+ buffer = _buffer;
+ }
+ inline void clear()
+ {
+ pos = 1;
+ desc_pos = 0;
+ }
+
+ inline bool get_min(uint64& _pos, KMER_T& kmer)
+ {
+ if (pos <= 1)
+ return false;
+
+ kmer = data[1].first;
+ _pos = get<0>(data_desc[data[1].second]);
+ update_heap();
+
+
+ return true;
+ }
+};
+
+
+
+#endif
\ No newline at end of file
diff --git a/kmer_counter/mem_disk_file.cpp b/kmer_counter/mem_disk_file.cpp
new file mode 100755
index 0000000..ea89b94
--- /dev/null
+++ b/kmer_counter/mem_disk_file.cpp
@@ -0,0 +1,109 @@
+#include "stdafx.h"
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#include "mem_disk_file.h"
+#include "libs/asmlib.h"
+
+//----------------------------------------------------------------------------------
+// Constructor
+CMemDiskFile::CMemDiskFile(bool _memory_mode)
+{
+ memory_mode = _memory_mode;
+ file = NULL;
+}
+
+//----------------------------------------------------------------------------------
+void CMemDiskFile::Open(const string& f_name)
+{
+ if(memory_mode)
+ {
+
+ }
+ else
+ {
+ file = fopen(f_name.c_str(), "wb+");
+
+ if (!file)
+ {
+ cout << "Error: Cannot open temporary file " << f_name << "\n";
+ exit(1);
+ }
+ setbuf(file, nullptr);
+ }
+}
+
+//----------------------------------------------------------------------------------
+void CMemDiskFile::Rewind()
+{
+ if(memory_mode)
+ {
+
+ }
+ else
+ {
+ rewind(file);
+ }
+}
+
+//----------------------------------------------------------------------------------
+int CMemDiskFile::Close()
+{
+ if(memory_mode)
+ {
+ for(auto& p : container)
+ {
+ delete[] p.first;
+ }
+ container.clear();
+ return 0;
+ }
+ else
+ {
+ return fclose(file);
+ }
+}
+
+//----------------------------------------------------------------------------------
+size_t CMemDiskFile::Read(uchar * ptr, size_t size, size_t count)
+{
+ if(memory_mode)
+ {
+ uint64 pos = 0;
+ for(auto& p : container)
+ {
+ A_memcpy(ptr + pos, p.first, p.second);
+ pos += p.second;
+ delete[] p.first;
+ }
+ container.clear();
+ return pos;
+ }
+ else
+ {
+ return fread(ptr, size, count, file);
+ }
+}
+
+//----------------------------------------------------------------------------------
+size_t CMemDiskFile::Write(const uchar * ptr, size_t size, size_t count)
+{
+ if(memory_mode)
+ {
+ uchar *buf = new uchar[size * count];
+ A_memcpy(buf, ptr, size * count);
+ container.push_back(make_pair(buf, size * count));
+ return size * count;
+ }
+ else
+ {
+ return fwrite(ptr, size, count, file);
+ }
+}
diff --git a/kmer_counter/mem_disk_file.h b/kmer_counter/mem_disk_file.h
new file mode 100755
index 0000000..4f22cd8
--- /dev/null
+++ b/kmer_counter/mem_disk_file.h
@@ -0,0 +1,41 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#ifndef _MEM_DISK_FILE_H
+#define _MEM_DISK_FILE_H
+
+#include "defs.h"
+#include <string>
+#include <stdio.h>
+using namespace std;
+
+
+//************************************************************************************************************
+// CMemDiskFile - wrapper for FILE* or memory equivalent
+//************************************************************************************************************
+class CMemDiskFile
+{
+ bool memory_mode;
+ FILE* file;
+ typedef pair<uchar*, uint64> elem_t;//buf,size
+ typedef vector<elem_t> container_t;
+
+ container_t container;
+public:
+ CMemDiskFile(bool _memory_mode);
+ void Open(const string& f_name);
+ void Rewind();
+ int Close();
+ size_t Read(uchar * ptr, size_t size, size_t count);
+ size_t Write(const uchar * ptr, size_t size, size_t count);
+};
+
+#endif
+
diff --git a/kmer_counter/meta_oper.h b/kmer_counter/meta_oper.h
new file mode 100755
index 0000000..470c0b5
--- /dev/null
+++ b/kmer_counter/meta_oper.h
@@ -0,0 +1,45 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#ifndef _META_OPER_H
+#define _META_OPER_H
+
+//#include <functional>
+
+
+template <size_t N> struct uint_{ };
+
+// For loop (forward)
+template <size_t N, typename Lambda>
+inline void IterFwd(const Lambda &oper, uint_<N>) {
+ IterFwd(oper, uint_<N-1>());
+ oper(N);
+}
+
+template <typename Lambda>
+inline void IterFwd(const Lambda &oper, uint_<0>) {
+ oper(0);
+}
+
+// For loop (backward)
+template <size_t N, typename Lambda>
+inline void IterRev(const Lambda &oper, uint_<N>) {
+ oper(N);
+ IterRev(oper, uint_<N-1>());
+}
+
+template <typename Lambda>
+inline void IterRev(const Lambda &oper, uint_<0>) {
+ oper(0);
+}
+
+#endif
+
+// ***** EOF
diff --git a/kmer_counter/mmer.cpp b/kmer_counter/mmer.cpp
new file mode 100755
index 0000000..56c216b
--- /dev/null
+++ b/kmer_counter/mmer.cpp
@@ -0,0 +1,49 @@
+#include "stdafx.h"
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#include "mmer.h"
+
+
+uint32 CMmer::norm5[];
+uint32 CMmer::norm6[];
+uint32 CMmer::norm7[];
+uint32 CMmer::norm8[];
+
+CMmer::_si CMmer::_init;
+
+
+//--------------------------------------------------------------------------
+CMmer::CMmer(uint32 _len)
+{
+ switch (_len)
+ {
+ case 5:
+ norm = norm5;
+ break;
+ case 6:
+ norm = norm6;
+ break;
+ case 7:
+ norm = norm7;
+ break;
+ case 8:
+ norm = norm8;
+ break;
+ default:
+ break;
+ }
+ len = _len;
+ mask = (1 << _len * 2) - 1;
+ str = 0;
+}
+
+//--------------------------------------------------------------------------
+
diff --git a/kmer_counter/mmer.h b/kmer_counter/mmer.h
new file mode 100755
index 0000000..e38f056
--- /dev/null
+++ b/kmer_counter/mmer.h
@@ -0,0 +1,182 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#ifndef _MMER_H
+#define _MMER_H
+#include "defs.h"
+
+// *************************************************************************
+// *************************************************************************
+
+
+class CMmer
+{
+ uint32 str;
+ uint32 mask;
+ uint32 current_val;
+ uint32* norm;
+ uint32 len;
+ static uint32 norm5[1 << 10];
+ static uint32 norm6[1 << 12];
+ static uint32 norm7[1 << 14];
+ static uint32 norm8[1 << 16];
+
+ static bool is_allowed(uint32 mmer, uint32 len)
+ {
+ if ((mmer & 0x3f) == 0x3f) // TTT suffix
+ return false;
+ if ((mmer & 0x3f) == 0x3b) // TGT suffix
+ return false;
+ if ((mmer & 0x3c) == 0x3c) // TG* suffix
+ return false;
+
+ for (uint32 j = 0; j < len - 3; ++j)
+ if ((mmer & 0xf) == 0) // AA inside
+ return false;
+ else
+ mmer >>= 2;
+
+ if (mmer == 0) // AAA prefix
+ return false;
+ if (mmer == 0x04) // ACA prefix
+ return false;
+ if ((mmer & 0xf) == 0) // *AA prefix
+ return false;
+
+ return true;
+ }
+
+ friend class CSignatureMapper;
+ struct _si
+ {
+ static uint32 get_rev(uint32 mmer, uint32 len)
+ {
+ uint32 rev = 0;
+ uint32 shift = len*2 - 2;
+ for(uint32 i = 0 ; i < len ; ++i)
+ {
+ rev += (3 - (mmer & 3)) << shift;
+ mmer >>= 2;
+ shift -= 2;
+ }
+ return rev;
+ }
+
+
+
+ static void init_norm(uint32* norm, uint32 len)
+ {
+ uint32 special = 1 << len * 2;
+ for(uint32 i = 0 ; i < special ; ++i)
+ {
+ uint32 rev = get_rev(i, len);
+ uint32 str_val = is_allowed(i, len) ? i : special;
+ uint32 rev_val = is_allowed(rev, len) ? rev : special;
+ norm[i] = MIN(str_val, rev_val);
+ }
+ }
+
+ _si()
+ {
+ init_norm(norm5, 5);
+ init_norm(norm6, 6);
+ init_norm(norm7, 7);
+ init_norm(norm8, 8);
+ }
+
+ }static _init;
+public:
+ CMmer(uint32 _len);
+ inline void insert(uchar symb);
+ inline uint32 get() const;
+ inline bool operator==(const CMmer& x);
+ inline bool operator<(const CMmer& x);
+ inline void clear();
+ inline bool operator<=(const CMmer& x);
+ inline void set(const CMmer& x);
+ inline void insert(char* seq);
+
+};
+
+
+
+//--------------------------------------------------------------------------
+inline void CMmer::insert(uchar symb)
+{
+ str <<= 2;
+ str += symb;
+ str &= mask;
+
+ current_val = norm[str];
+}
+
+//--------------------------------------------------------------------------
+inline uint32 CMmer::get() const
+{
+ return current_val;
+}
+
+//--------------------------------------------------------------------------
+inline bool CMmer::operator==(const CMmer& x)
+{
+ return current_val == x.current_val;
+}
+
+//--------------------------------------------------------------------------
+inline bool CMmer::operator<(const CMmer& x)
+{
+ return current_val < x.current_val;
+}
+
+//--------------------------------------------------------------------------
+inline void CMmer::clear()
+{
+ str = 0;
+}
+
+//--------------------------------------------------------------------------
+inline bool CMmer::operator<=(const CMmer& x)
+{
+ return current_val <= x.current_val;
+}
+
+//--------------------------------------------------------------------------
+inline void CMmer::set(const CMmer& x)
+{
+ str = x.str;
+ current_val = x.current_val;
+}
+
+//--------------------------------------------------------------------------
+inline void CMmer::insert(char* seq)
+{
+ switch (len)
+ {
+ case 5:
+ str = (seq[0] << 8) + (seq[1] << 6) + (seq[2] << 4) + (seq[3] << 2) + (seq[4]);
+ break;
+ case 6:
+ str = (seq[0] << 10) + (seq[1] << 8) + (seq[2] << 6) + (seq[3] << 4) + (seq[4] << 2) + (seq[5]);
+ break;
+ case 7:
+ str = (seq[0] << 12) + (seq[1] << 10) + (seq[2] << 8) + (seq[3] << 6) + (seq[4] << 4 ) + (seq[5] << 2) + (seq[6]);
+ break;
+ case 8:
+ str = (seq[0] << 14) + (seq[1] << 12) + (seq[2] << 10) + (seq[3] << 8) + (seq[4] << 6) + (seq[5] << 4) + (seq[6] << 2) + (seq[7]);
+ break;
+ default:
+ break;
+ }
+
+ current_val = norm[str];
+}
+
+
+#endif
\ No newline at end of file
diff --git a/kmer_counter/params.h b/kmer_counter/params.h
new file mode 100755
index 0000000..bc4efaa
--- /dev/null
+++ b/kmer_counter/params.h
@@ -0,0 +1,155 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#ifndef _PARAMS_H
+#define _PARAMS_H
+
+#include "defs.h"
+#include "queues.h"
+#include "s_mapper.h"
+#include <vector>
+#include <string>
+
+typedef enum {fasta, fastq, multiline_fasta} input_type;
+
+using namespace std;
+
+// Structure for passing KMC parameters
+struct CKMCParams {
+
+ // Input parameters
+ int p_m; // max. total RAM usage
+ int p_k; // k-mer length
+ int p_t; // no. of threads
+ int p_sf; // no. of reading threads
+ int p_sp; // no. of splitting threads
+ int p_so; // no. of OpenMP threads for sorting
+ int p_sr; // no. of sorting threads
+ int p_ci; // do not count k-mers occurring less than
+ int p_cx; // do not count k-mers occurring more than
+ int p_cs; // maximal counter value
+ bool p_quake; // use Quake-compatibile counting
+ bool p_mem_mode; // use RAM instead of disk
+ int p_quality; // lowest quality
+ input_type p_file_type; // input in FASTA format
+ bool p_verbose; // verbose mode
+ bool p_both_strands; // compute canonical k-mer representation
+ int p_p1; // signature length
+
+ // File names
+ vector<string> input_file_names;
+ string output_file_name;
+ string working_directory;
+ input_type file_type;
+
+ uint32 lut_prefix_len;
+
+ uint32 KMER_T_size;
+
+ // Memory sizes
+ int64 max_mem_size; // maximum amount of memory to be used in GBs; default: 30GB
+ int64 max_mem_storer; // maximum amount of memory for internal buffers of KmerStorer
+ int64 max_mem_stage2; // maximum amount of memory in stage 2
+ int64 max_mem_storer_pkg; // maximum amount of memory for single package
+
+ int64 mem_tot_pmm_bins; // maximal amount of memory per pool memory manager (PMM) of bin parts
+ int64 mem_part_pmm_bins; // maximal amount of memory per single part of memory maintained by PMM of bin parts
+ int64 mem_tot_pmm_fastq;
+ int64 mem_part_pmm_fastq;
+ int64 mem_part_pmm_reads;
+ int64 mem_tot_pmm_reads;
+ int64 mem_part_pmm_radix_buf;
+ int64 mem_tot_pmm_radix_buf;
+ int64 mem_part_pmm_prob;
+ int64 mem_tot_pmm_prob;
+ int64 mem_part_pmm_cnts_sort;
+ int64 mem_tot_pmm_stats;
+ int64 mem_part_pmm_stats;
+
+ int64 mem_tot_pmm_epxand;
+ int64 mem_part_pmm_epxand;
+
+ bool verbose;
+
+ int kmer_len; // kmer length
+ int signature_len;
+ int cutoff_min; // exclude k-mers occurring less than times
+ int cutoff_max; // exclude k-mers occurring more than times
+ int counter_max; // maximal counter value
+ bool use_quake; // use Quake's counting based on qualities
+ int lowest_quality; // lowest quality value
+ bool both_strands; // find canonical representation of each k-mer
+ bool mem_mode; // use RAM instead of disk
+
+ int n_bins; // number of bins; fixed: 448
+ int bin_part_size; // size of a bin part; fixed: 2^15
+ int fastq_buffer_size; // size of FASTQ file buffer; fixed: 2^23
+
+ int n_threads; // number of cores
+ int n_readers; // number of FASTQ readers; default: 1
+ int n_splitters; // number of splitters; default: 1
+ int n_sorters; // number of sorters; default: 1
+ vector<int> n_omp_threads;// number of OMP threads per sorters
+ uint32 max_x; //k+x-mers will be counted
+
+ uint32 gzip_buffer_size;
+ uint32 bzip2_buffer_size;
+
+ CKMCParams()
+ {
+ p_m = 12;
+ p_k = 25;
+ p_t = 0;
+ p_sf = 0;
+ p_sp = 0;
+ p_so = 0;
+ p_sr = 0;
+ p_ci = 2;
+ p_cx = 1000000000;
+ p_cs = 255;
+ p_quake = false;
+ p_mem_mode = false;
+ p_quality = 33;
+ p_file_type = fastq;
+ p_verbose = false;
+ p_both_strands = true;
+ p_p1 = 7;
+
+ gzip_buffer_size = 64 << 20;
+ bzip2_buffer_size = 64 << 20;
+ }
+};
+
+// Structure for passing KMC queues and monitors to threads
+struct CKMCQueues
+{
+ //Signature mapper
+ CSignatureMapper* s_mapper;
+ // Memory monitors
+ CMemoryMonitor *mm;
+
+ // Queues
+ CInputFilesQueue *input_files_queue;
+ CPartQueue *part_queue;
+ CStatsPartQueue* stats_part_queue;
+
+ CBinPartQueue *bpq;
+ CBinDesc *bd;
+ CBinQueue *bq;
+ CKmerQueue *kq;
+ CMemoryPool *pmm_bins, *pmm_fastq, *pmm_reads, *pmm_radix_buf, *pmm_prob, *pmm_stats, *pmm_expand;
+ CMemoryBins *memory_bins;
+
+ CKMCQueues() {}
+};
+
+#endif
+
+// ***** EOF
diff --git a/kmer_counter/queues.h b/kmer_counter/queues.h
new file mode 100755
index 0000000..9146ef5
--- /dev/null
+++ b/kmer_counter/queues.h
@@ -0,0 +1,940 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#ifndef _QUEUES_H
+#define _QUEUES_H
+
+#include "defs.h"
+#include <stdio.h>
+#include <iostream>
+#include <tuple>
+#include <queue>
+#include <list>
+#include <map>
+#include <string>
+#include "mem_disk_file.h"
+
+using namespace std;
+
+#ifdef THREADS_NATIVE // C++11 threads
+#include <thread>
+#include <mutex>
+#include <condition_variable>
+
+using std::thread;
+#else // Boost threads
+#include <boost/thread.hpp>
+#include <boost/thread/mutex.hpp>
+#include <boost/thread/condition_variable.hpp>
+
+using namespace boost;
+#endif
+
+//************************************************************************************************************
+class CInputFilesQueue {
+ typedef string elem_t;
+ typedef queue<elem_t, list<elem_t>> queue_t;
+
+ queue_t q;
+ bool is_completed;
+
+ mutable mutex mtx; // The mutex to synchronise on
+
+public:
+ CInputFilesQueue(const vector<string> &file_names) {
+ unique_lock<mutex> lck(mtx);
+
+ for(vector<string>::const_iterator p = file_names.begin(); p != file_names.end(); ++p)
+ q.push(*p);
+
+ is_completed = false;
+ };
+ ~CInputFilesQueue() {};
+
+ bool empty() {
+ lock_guard<mutex> lck(mtx);
+ return q.empty();
+ }
+ bool completed() {
+ lock_guard<mutex> lck(mtx);
+ return q.empty() && is_completed;
+ }
+ void mark_completed() {
+ lock_guard<mutex> lck(mtx);
+ is_completed = true;
+ }
+ bool pop(string &file_name) {
+ lock_guard<mutex> lck(mtx);
+
+ if(q.empty())
+ return false;
+
+ file_name = q.front();
+ q.pop();
+
+ return true;
+ }
+};
+
+//************************************************************************************************************
+class CPartQueue {
+ typedef pair<uchar *, uint64> elem_t;
+ typedef queue<elem_t, list<elem_t>> queue_t;
+
+ queue_t q;
+ bool is_completed;
+ int n_readers;
+
+ mutable mutex mtx; // The mutex to synchronise on
+ condition_variable cv_queue_empty;
+
+public:
+ CPartQueue(int _n_readers) {
+ unique_lock<mutex> lck(mtx);
+ is_completed = false;
+ n_readers = _n_readers;
+ };
+ ~CPartQueue() {};
+
+ bool empty() {
+ lock_guard<mutex> lck(mtx);
+ return q.empty();
+ }
+ bool completed() {
+ lock_guard<mutex> lck(mtx);
+ return q.empty() && !n_readers;
+ }
+ void mark_completed() {
+ lock_guard<mutex> lck(mtx);
+ n_readers--;
+ if(!n_readers)
+ cv_queue_empty.notify_all();
+ }
+ void push(uchar *part, uint64 size) {
+ unique_lock<mutex> lck(mtx);
+
+ bool was_empty = q.empty();
+ q.push(make_pair(part, size));
+
+ if(was_empty)
+ cv_queue_empty.notify_all();
+ }
+ bool pop(uchar *&part, uint64 &size) {
+ unique_lock<mutex> lck(mtx);
+ cv_queue_empty.wait(lck, [this]{return !this->q.empty() || !this->n_readers;});
+
+ if(q.empty())
+ return false;
+
+ part = q.front().first;
+ size = q.front().second;
+ q.pop();
+
+ return true;
+ }
+};
+
+//************************************************************************************************************
+class CStatsPartQueue
+{
+ typedef pair<uchar *, uint64> elem_t;
+ typedef queue<elem_t, list<elem_t>> queue_t;
+
+ queue_t q;
+
+ mutable mutex mtx;
+ condition_variable cv_queue_empty;
+ int n_readers;
+ int64 bytes_to_read;
+public:
+ CStatsPartQueue(int _n_readers, int64 _bytes_to_read)
+ {
+ unique_lock<mutex> lck(mtx);
+ n_readers = _n_readers;
+ bytes_to_read = _bytes_to_read;
+ }
+
+ ~CStatsPartQueue() {};
+
+ void mark_completed() {
+ lock_guard<mutex> lck(mtx);
+ n_readers--;
+ if (!n_readers)
+ cv_queue_empty.notify_all();
+ }
+
+ bool completed() {
+ lock_guard<mutex> lck(mtx);
+ return q.empty() && !n_readers;
+ }
+
+ bool push(uchar *part, uint64 size) {
+ unique_lock<mutex> lck(mtx);
+
+ if (bytes_to_read <= 0)
+ return false;
+
+ bool was_empty = q.empty();
+ q.push(make_pair(part, size));
+ bytes_to_read -= size;
+ if (was_empty)
+ cv_queue_empty.notify_one();
+
+ return true;
+ }
+
+ bool pop(uchar *&part, uint64 &size) {
+ unique_lock<mutex> lck(mtx);
+ cv_queue_empty.wait(lck, [this]{return !this->q.empty() || !this->n_readers; });
+
+ if (q.empty())
+ return false;
+
+ part = q.front().first;
+ size = q.front().second;
+ q.pop();
+
+ return true;
+ }
+
+
+};
+
+//************************************************************************************************************
+class CBinPartQueue {
+ typedef tuple<int32, uchar *, uint32, uint32> elem_t;
+ typedef queue<elem_t, list<elem_t>> queue_t;
+ queue_t q;
+
+ int n_writers;
+ bool is_completed;
+
+ mutable mutex mtx; // The mutex to synchronise on
+ condition_variable cv_queue_empty;
+
+public:
+ CBinPartQueue(int _n_writers) {
+ lock_guard<mutex> lck(mtx);
+
+ n_writers = _n_writers;
+ is_completed = false;
+ }
+ ~CBinPartQueue() {}
+
+ bool empty() {
+ lock_guard<mutex> lck(mtx);
+ return q.empty();
+ }
+ bool completed() {
+ lock_guard<mutex> lck(mtx);
+ return q.empty() && !n_writers;
+ }
+ void mark_completed() {
+ lock_guard<mutex> lck(mtx);
+ n_writers--;
+ if(!n_writers)
+ cv_queue_empty.notify_all();
+ }
+ void push(int32 bin_id, uchar *part, uint32 true_size, uint32 alloc_size) {
+ unique_lock<mutex> lck(mtx);
+
+ bool was_empty = q.empty();
+ q.push(std::make_tuple(bin_id, part, true_size, alloc_size));
+ if(was_empty)
+ cv_queue_empty.notify_all();
+ }
+ bool pop(int32 &bin_id, uchar *&part, uint32 &true_size, uint32 &alloc_size) {
+ unique_lock<mutex> lck(mtx);
+ cv_queue_empty.wait(lck, [this]{return !q.empty() || !n_writers;});
+
+ if(q.empty())
+ return false;
+
+ bin_id = get<0>(q.front());
+ part = get<1>(q.front());
+ true_size = get<2>(q.front());
+ alloc_size = get<3>(q.front());
+ q.pop();
+
+ return true;
+ }
+};
+
+//************************************************************************************************************
+class CBinDesc {
+ typedef tuple<string, int64, uint64, uint32, uint32, CMemDiskFile*, uint64, uint64> desc_t;
+ typedef map<int32, desc_t> map_t;
+
+ map_t m;
+ int32 bin_id;
+
+ vector<int32> random_bins;
+
+ mutable mutex mtx;
+
+public:
+ CBinDesc() {
+ lock_guard<mutex> lck(mtx);
+ bin_id = -1;
+ }
+ ~CBinDesc() {}
+
+ void reset_reading() {
+ lock_guard<mutex> lck(mtx);
+ bin_id = -1;
+ }
+
+ bool empty() {
+ lock_guard<mutex> lck(mtx);
+ return m.empty();
+ }
+
+ void init_random()
+ {
+ lock_guard<mutex> lck(mtx);
+ vector<pair<int32, int64>> bin_sizes;
+
+ for (auto& p : m)
+ bin_sizes.push_back(make_pair(p.first, get<2>(p.second)));
+
+ sort(bin_sizes.begin(), bin_sizes.end(), [](const pair<int32, int64>& l, const pair<int32, int64>& r){
+ return l.second > r.second;
+ });
+
+ uint32 no_sort_start = uint32(0.6 * bin_sizes.size());
+ uint32 no_sort_end = uint32(0.8 * bin_sizes.size());
+
+ for (uint32 i = 0; i < no_sort_start; ++i)
+ random_bins.push_back(bin_sizes[i].first);
+
+ for (uint32 i = no_sort_end; i < bin_sizes.size(); ++i)
+ random_bins.push_back(bin_sizes[i].first);
+
+ random_shuffle(random_bins.begin(), random_bins.end());
+
+ for (uint32 i = no_sort_start; i < no_sort_end; ++i)
+ random_bins.push_back(bin_sizes[i].first);
+ }
+
+ int32 get_next_random_bin()
+ {
+ lock_guard<mutex> lck(mtx);
+ if (bin_id == -1)
+ bin_id = 0;
+ else
+ ++bin_id;
+
+ if (bin_id >= (int32)m.size())
+ return -1000;
+ return random_bins[bin_id];
+ }
+
+ int32 get_next_bin()
+ {
+ lock_guard<mutex> lck(mtx);
+ map_t::iterator p;
+ if(bin_id == -1)
+ p = m.begin();
+ else
+ {
+ p = m.find(bin_id);
+ if(p != m.end())
+ ++p;
+ }
+
+ if(p == m.end())
+ bin_id = -1000;
+ else
+ bin_id = p->first;
+
+ return bin_id;
+ }
+ void insert(int32 bin_id, CMemDiskFile *file, string desc, int64 size, uint64 n_rec, uint64 n_plus_x_recs, uint64 n_super_kmers, uint32 buffer_size = 0, uint32 kmer_len = 0) {
+ lock_guard<mutex> lck(mtx);
+
+ map_t::iterator p = m.find(bin_id);
+ if(p != m.end())
+ {
+ if(desc != "")
+ {
+ get<0>(m[bin_id]) = desc;
+ get<5>(m[bin_id]) = file;
+ }
+ get<1>(m[bin_id]) += size;
+ get<2>(m[bin_id]) += n_rec;
+ get<6>(m[bin_id]) += n_plus_x_recs;
+ get<7>(m[bin_id]) += n_super_kmers;
+ if(buffer_size)
+ {
+ get<3>(m[bin_id]) = buffer_size;
+ get<4>(m[bin_id]) = kmer_len;
+ }
+ }
+ else
+ m[bin_id] = std::make_tuple(desc, size, n_rec, buffer_size, kmer_len, file, n_plus_x_recs, n_super_kmers);
+ }
+ void read(int32 bin_id, CMemDiskFile *&file, string &desc, uint64 &size, uint64 &n_rec, uint64 &n_plus_x_recs, uint32 &buffer_size, uint32 &kmer_len) {
+ lock_guard<mutex> lck(mtx);
+
+ desc = get<0>(m[bin_id]);
+ file = get<5>(m[bin_id]);
+ size = (uint64) get<1>(m[bin_id]);
+ n_rec = get<2>(m[bin_id]);
+ buffer_size = get<3>(m[bin_id]);
+ kmer_len = get<4>(m[bin_id]);
+ n_plus_x_recs = get<6>(m[bin_id]);
+ }
+ void read(int32 bin_id, CMemDiskFile *&file, string &desc, uint64 &size, uint64 &n_rec, uint64 &n_plus_x_recs, uint64 &n_super_kmers) {
+ lock_guard<mutex> lck(mtx);
+
+ desc = get<0>(m[bin_id]);
+ file = get<5>(m[bin_id]);
+ size = (uint64) get<1>(m[bin_id]);
+ n_rec = get<2>(m[bin_id]);
+ n_plus_x_recs = get<6>(m[bin_id]);
+ n_super_kmers = get<7>(m[bin_id]);
+ }
+};
+
+//************************************************************************************************************
+class CBinQueue {
+ typedef tuple<int32, uchar *, uint64, uint64> elem_t;
+ typedef queue<elem_t, list<elem_t>> queue_t;
+ queue_t q;
+
+ int n_writers;
+
+ mutable mutex mtx; // The mutex to synchronise on
+ condition_variable cv_queue_empty;
+
+public:
+ CBinQueue(int _n_writers) {
+ lock_guard<mutex> lck(mtx);
+ n_writers = _n_writers;
+ }
+ ~CBinQueue() {}
+
+ bool empty() {
+ lock_guard<mutex> lck(mtx);
+ return q.empty();
+ }
+ bool completed() {
+ lock_guard<mutex> lck(mtx);
+ return q.empty() && !n_writers;
+ }
+ void mark_completed() {
+ lock_guard<mutex> lck(mtx);
+ n_writers--;
+ if(n_writers == 0)
+ cv_queue_empty.notify_all();
+ }
+ void push(int32 bin_id, uchar *part, uint64 size, uint64 n_rec) {
+ lock_guard<mutex> lck(mtx);
+ bool was_empty = q.empty();
+ q.push(std::make_tuple(bin_id, part, size, n_rec));
+ if(was_empty)
+ cv_queue_empty.notify_all();
+ }
+ bool pop(int32 &bin_id, uchar *&part, uint64 &size, uint64 &n_rec) {
+ unique_lock<mutex> lck(mtx);
+
+ cv_queue_empty.wait(lck, [this]{return !q.empty() || !n_writers;});
+
+ if(q.empty())
+ return false;
+
+ bin_id = get<0>(q.front());
+ part = get<1>(q.front());
+ size = get<2>(q.front());
+ n_rec = get<3>(q.front());
+ q.pop();
+
+ return true;
+ }
+};
+
+//************************************************************************************************************
+class CKmerQueue {
+ typedef tuple<int32, uchar*, uint64, uchar*, uint64, uint64, uint64, uint64, uint64> data_t;
+ typedef list<data_t> list_t;
+
+ int n_writers;
+ mutable mutex mtx; // The mutex to synchronise on
+ condition_variable cv_queue_empty;
+
+ list_t l;
+ int32 n_bins;
+public:
+ CKmerQueue(int32 _n_bins, int _n_writers) {
+ lock_guard<mutex> lck(mtx);
+ n_bins = _n_bins;
+ n_writers = _n_writers;
+ }
+ ~CKmerQueue() {
+ }
+
+ bool empty() {
+ lock_guard<mutex> lck(mtx);
+ return l.empty() && !n_writers;
+ }
+ void mark_completed() {
+ lock_guard<mutex> lck(mtx);
+ n_writers--;
+ if (!n_writers)
+ cv_queue_empty.notify_all();
+ }
+ void push(int32 bin_id, uchar *data, uint64 data_size, uchar *lut, uint64 lut_size, uint64 n_unique, uint64 n_cutoff_min, uint64 n_cutoff_max, uint64 n_total) {
+ lock_guard<mutex> lck(mtx);
+ l.push_back(std::make_tuple(bin_id, data, data_size, lut, lut_size, n_unique, n_cutoff_min, n_cutoff_max, n_total));
+ cv_queue_empty.notify_all();
+ }
+ bool pop(int32 &bin_id, uchar *&data, uint64 &data_size, uchar *&lut, uint64 &lut_size, uint64 &n_unique, uint64 &n_cutoff_min, uint64 &n_cutoff_max, uint64 &n_total) {
+ unique_lock<mutex> lck(mtx);
+ cv_queue_empty.wait(lck, [this]{return !l.empty() || !n_writers; });
+ if (l.empty())
+ return false;
+
+ bin_id = get<0>(l.front());
+ data = get<1>(l.front());
+ data_size = get<2>(l.front());
+ lut = get<3>(l.front());
+ lut_size = get<4>(l.front());
+ n_unique = get<5>(l.front());
+ n_cutoff_min = get<6>(l.front());
+ n_cutoff_max = get<7>(l.front());
+ n_total = get<8>(l.front());
+
+ l.pop_front();
+
+ if (l.empty())
+ cv_queue_empty.notify_all();
+
+ return true;
+ }
+};
+
+
+
+
+//************************************************************************************************************
+class CMemoryMonitor {
+ uint64 max_memory;
+ uint64 memory_in_use;
+
+ mutable mutex mtx; // The mutex to synchronise on
+ condition_variable cv_memory_full; // The condition to wait for
+
+public:
+ CMemoryMonitor(uint64 _max_memory) {
+ lock_guard<mutex> lck(mtx);
+ max_memory = _max_memory;
+ memory_in_use = 0;
+ }
+ ~CMemoryMonitor() {
+ }
+
+ void increase(uint64 n) {
+ unique_lock<mutex> lck(mtx);
+ cv_memory_full.wait(lck, [this, n]{return memory_in_use + n <= max_memory;});
+ memory_in_use += n;
+ }
+ void force_increase(uint64 n) {
+ unique_lock<mutex> lck(mtx);
+ cv_memory_full.wait(lck, [this, n]{return memory_in_use + n <= max_memory || memory_in_use == 0;});
+ memory_in_use += n;
+ }
+ void decrease(uint64 n) {
+ lock_guard<mutex> lck(mtx);
+ memory_in_use -= n;
+ cv_memory_full.notify_all();
+ }
+ void info(uint64 &_max_memory, uint64 &_memory_in_use)
+ {
+ lock_guard<mutex> lck(mtx);
+ _max_memory = max_memory;
+ _memory_in_use = memory_in_use;
+ }
+};
+
+//************************************************************************************************************
+class CMemoryPool {
+ int64 total_size;
+ int64 part_size;
+ int64 n_parts_total;
+ int64 n_parts_free;
+
+ uchar *buffer, *raw_buffer;
+ uint32 *stack;
+
+ mutable mutex mtx; // The mutex to synchronise on
+ condition_variable cv; // The condition to wait for
+
+public:
+ CMemoryPool(int64 _total_size, int64 _part_size) {
+ raw_buffer = NULL;
+ buffer = NULL;
+ stack = NULL;
+ prepare(_total_size, _part_size);
+ }
+ ~CMemoryPool() {
+ release();
+ }
+
+ void prepare(int64 _total_size, int64 _part_size) {
+ release();
+
+ n_parts_total = _total_size / _part_size;
+ part_size = (_part_size + 15) / 16 * 16; // to allow mapping pointer to int*
+ n_parts_free = n_parts_total;
+
+ total_size = n_parts_total * part_size;
+
+ raw_buffer = new uchar[total_size+64];
+ buffer = raw_buffer;
+ while(((uint64) buffer) % 64)
+ buffer++;
+
+ stack = new uint32[n_parts_total];
+ for(uint32 i = 0; i < n_parts_total; ++i)
+ stack[i] = i;
+ }
+
+ void release(void) {
+ if(raw_buffer)
+ delete[] raw_buffer;
+ raw_buffer = NULL;
+ buffer = NULL;
+
+ if(stack)
+ delete[] stack;
+ stack = NULL;
+ }
+
+ // Allocate memory buffer - uchar*
+ void reserve(uchar* &part)
+ {
+ unique_lock<mutex> lck(mtx);
+ cv.wait(lck, [this]{return n_parts_free > 0;});
+
+ part = buffer + stack[--n_parts_free]*part_size;
+ }
+ // Allocate memory buffer - char*
+ void reserve(char* &part)
+ {
+ unique_lock<mutex> lck(mtx);
+ cv.wait(lck, [this]{return n_parts_free > 0;});
+
+ part = (char*) (buffer + stack[--n_parts_free]*part_size);
+ }
+ // Allocate memory buffer - uint32*
+ void reserve(uint32* &part)
+ {
+ unique_lock<mutex> lck(mtx);
+ cv.wait(lck, [this]{return n_parts_free > 0;});
+
+ part = (uint32*) (buffer + stack[--n_parts_free]*part_size);
+ }
+ // Allocate memory buffer - uint64*
+ void reserve(uint64* &part)
+ {
+ unique_lock<mutex> lck(mtx);
+ cv.wait(lck, [this]{return n_parts_free > 0;});
+
+ part = (uint64*) (buffer + stack[--n_parts_free]*part_size);
+ }
+ // Allocate memory buffer - double*
+ void reserve(double* &part)
+ {
+ unique_lock<mutex> lck(mtx);
+ cv.wait(lck, [this]{return n_parts_free > 0;});
+
+ part = (double*) (buffer + stack[--n_parts_free]*part_size);
+ }
+
+ // Deallocate memory buffer - uchar*
+ void free(uchar* part)
+ {
+ lock_guard<mutex> lck(mtx);
+
+ stack[n_parts_free++] = (uint32) ((part - buffer) / part_size);
+
+ cv.notify_all();
+ }
+ // Deallocate memory buffer - char*
+ void free(char* part)
+ {
+ lock_guard<mutex> lck(mtx);
+
+ stack[n_parts_free++] = (uint32) (((uchar*) part - buffer) / part_size);
+ cv.notify_all();
+ }
+ // Deallocate memory buffer - uint32*
+ void free(uint32* part)
+ {
+ lock_guard<mutex> lck(mtx);
+
+ stack[n_parts_free++] = (uint32) ((((uchar *) part) - buffer) / part_size);
+ cv.notify_all();
+ }
+ // Deallocate memory buffer - uint64*
+ void free(uint64* part)
+ {
+ lock_guard<mutex> lck(mtx);
+
+ stack[n_parts_free++] = (uint32) ((((uchar *) part) - buffer) / part_size);
+ cv.notify_all();
+ }
+ // Deallocate memory buffer - double*
+ void free(double* part)
+ {
+ lock_guard<mutex> lck(mtx);
+
+ stack[n_parts_free++] = (uint32) ((((uchar *) part) - buffer) / part_size);
+ cv.notify_all();
+ }
+};
+
+
+class CMemoryBins {
+ int64 total_size;
+ int64 free_size;
+
+ uint32 n_bins;
+
+ typedef tuple<uchar*, uchar*, uchar*, uchar*, uchar*, uchar*, uchar*, int64> bin_ptrs_t;
+
+public:
+ typedef enum{ mba_input_file, mba_input_array, mba_tmp_array, mba_suffix, mba_kxmer_counters, mba_lut } mba_t;
+
+private:
+ uchar *buffer, *raw_buffer;
+ bin_ptrs_t *bin_ptrs;
+
+ list<pair<uint64, uint64>> list_reserved;
+ list<pair<uint32, uint64>> list_insert_order;
+
+ mutable mutex mtx; // The mutex to synchronise on
+ condition_variable cv; // The condition to wait for
+
+public:
+ CMemoryBins(int64 _total_size, uint32 _n_bins) {
+ raw_buffer = NULL;
+ buffer = NULL;
+ bin_ptrs = NULL;
+ prepare(_total_size, _n_bins);
+ }
+ ~CMemoryBins() {
+ release();
+ }
+
+ int64 round_up_to_alignment(int64 x)
+ {
+ return (x + ALIGNMENT - 1) / ALIGNMENT * ALIGNMENT;
+ }
+
+ void prepare(int64 _total_size, uint32 _n_bins) {
+ release();
+
+ n_bins = _n_bins;
+ bin_ptrs = new bin_ptrs_t[n_bins];
+
+ total_size = round_up_to_alignment(_total_size - n_bins * sizeof(bin_ptrs_t));
+ free_size = total_size;
+
+ raw_buffer = (uchar*)malloc(total_size + ALIGNMENT);
+ buffer = raw_buffer;
+ while (((uint64)buffer) % ALIGNMENT)
+ buffer++;
+
+ list_reserved.clear();
+ list_insert_order.clear();
+ list_reserved.push_back(make_pair(total_size, 0)); // guard
+ }
+
+ void release(void) {
+ if (raw_buffer)
+ ::free(raw_buffer);
+ raw_buffer = NULL;
+ buffer = NULL;
+
+ if (bin_ptrs)
+ delete[] bin_ptrs;
+ bin_ptrs = NULL;
+ }
+
+ // Prepare memory buffer for bin of given id
+ void init(uint32 bin_id, uint32 sorting_phases, int64 file_size, int64 kxmers_size, int64 out_buffer_size, int64 kxmer_counter_size, int64 lut_size)
+ {
+ unique_lock<mutex> lck(mtx);
+ int64 part1_size;
+ int64 part2_size;
+
+ if (sorting_phases % 2 == 0)
+ {
+ part1_size = kxmers_size + kxmer_counter_size;
+ part2_size = max(max(file_size, kxmers_size), out_buffer_size + lut_size);
+ }
+ else
+ {
+ part1_size = max(kxmers_size + kxmer_counter_size, file_size);
+ part2_size = max(kxmers_size, out_buffer_size + lut_size);
+ }
+ int64 req_size = part1_size + part2_size;
+ uint64 found_pos;
+ uint64 last_found_pos;
+
+ // Look for space to insert
+ cv.wait(lck, [&]() -> bool{
+ found_pos = total_size;
+ if (!list_insert_order.empty())
+ {
+ last_found_pos = list_insert_order.back().second;
+ for (auto p = list_reserved.begin(); p != list_reserved.end(); ++p)
+ if (p->first == last_found_pos)
+ {
+ uint64 last_end_pos = p->first + p->second;
+ ++p;
+ if (last_end_pos + req_size <= p->first)
+ {
+ found_pos = last_end_pos;
+ return true;
+ }
+ else
+ break;
+ }
+ }
+
+ uint64 prev_end_pos = 0;
+
+ for (auto p = list_reserved.begin(); p != list_reserved.end(); ++p)
+ {
+ if (prev_end_pos + req_size <= p->first)
+ {
+ found_pos = prev_end_pos;
+ return true;
+ }
+ prev_end_pos = p->first + p->second;
+ }
+
+ // Reallocate memory for buffer if necessary
+ if (list_insert_order.empty() && req_size > (int64)list_reserved.back().first)
+ {
+ ::free(raw_buffer);
+ total_size = round_up_to_alignment(req_size);
+ free_size = total_size;
+
+ raw_buffer = (uchar*)malloc(total_size + ALIGNMENT);
+ buffer = raw_buffer;
+ while (((uint64)buffer) % ALIGNMENT)
+ buffer++;
+
+ list_reserved.back().first = total_size;
+ found_pos = 0;
+ return true;
+ }
+
+ return false;
+ });
+
+ // Reserve found free space
+ list_insert_order.push_back(make_pair(bin_id, found_pos));
+ for (auto p = list_reserved.begin(); p != list_reserved.end(); ++p)
+ if (found_pos < p->first)
+ {
+ list_reserved.insert(p, make_pair(found_pos, req_size));
+ break;
+ }
+
+ uchar *base_ptr = get<0>(bin_ptrs[bin_id]) = buffer + found_pos;
+
+ if (sorting_phases % 2 == 0) // the result of sorting is in the same place as input
+ {
+ get<1>(bin_ptrs[bin_id]) = base_ptr + part1_size;
+ get<2>(bin_ptrs[bin_id]) = base_ptr;
+ get<3>(bin_ptrs[bin_id]) = base_ptr + part1_size;
+ }
+ else
+ {
+ get<1>(bin_ptrs[bin_id]) = base_ptr;
+ get<2>(bin_ptrs[bin_id]) = base_ptr + part1_size;
+ get<3>(bin_ptrs[bin_id]) = base_ptr;
+ }
+ get<4>(bin_ptrs[bin_id]) = base_ptr + part1_size; // data
+ get<5>(bin_ptrs[bin_id]) = get<4>(bin_ptrs[bin_id]) + out_buffer_size;
+ if (kxmer_counter_size)
+ get<6>(bin_ptrs[bin_id]) = base_ptr + kxmers_size; //kxmers counter
+ else
+ get<6>(bin_ptrs[bin_id]) = NULL;
+ free_size -= req_size;
+ get<7>(bin_ptrs[bin_id]) = req_size;
+ }
+
+ void reserve(uint32 bin_id, uchar* &part, mba_t t)
+ {
+ unique_lock<mutex> lck(mtx);
+ if (t == mba_input_file)
+ part = get<1>(bin_ptrs[bin_id]);
+ else if (t == mba_input_array)
+ part = get<2>(bin_ptrs[bin_id]);
+ else if (t == mba_tmp_array)
+ part = get<3>(bin_ptrs[bin_id]);
+ else if (t == mba_suffix)
+ part = get<4>(bin_ptrs[bin_id]);
+ else if (t == mba_lut)
+ part = get<5>(bin_ptrs[bin_id]);
+ else if (t == mba_kxmer_counters)
+ part = get<6>(bin_ptrs[bin_id]);
+ }
+
+ // Deallocate memory buffer - uchar*
+ void free(uint32 bin_id, mba_t t)
+ {
+ unique_lock<mutex> lck(mtx);
+ if (t == mba_input_file)
+ get<1>(bin_ptrs[bin_id]) = NULL;
+ else if (t == mba_input_array)
+ get<2>(bin_ptrs[bin_id]) = NULL;
+ else if (t == mba_tmp_array)
+ get<3>(bin_ptrs[bin_id]) = NULL;
+ else if (t == mba_suffix)
+ get<4>(bin_ptrs[bin_id]) = NULL;
+ else if (t == mba_lut)
+ get<5>(bin_ptrs[bin_id]) = NULL;
+ else if (t == mba_kxmer_counters)
+ get<6>(bin_ptrs[bin_id]) = NULL;
+
+ if (!get<1>(bin_ptrs[bin_id]) && !get<2>(bin_ptrs[bin_id]) && !get<3>(bin_ptrs[bin_id]) && !get<4>(bin_ptrs[bin_id]) && !get<5>(bin_ptrs[bin_id]) && !get<6>(bin_ptrs[bin_id]))
+ {
+ for (auto p = list_reserved.begin(); p != list_reserved.end() && p->second != 0; ++p)
+ {
+ if ((int64)p->first == get<0>(bin_ptrs[bin_id]) - buffer)
+ {
+ list_reserved.erase(p);
+ break;
+ }
+ }
+ for (auto p = list_insert_order.begin(); p != list_insert_order.end(); ++p)
+ if (p->first == bin_id)
+ {
+ list_insert_order.erase(p);
+ break;
+ }
+
+ get<0>(bin_ptrs[bin_id]) = NULL;
+ free_size += get<7>(bin_ptrs[bin_id]);
+ cv.notify_all();
+ }
+ }
+};
+
+
+#endif
+
+// ***** EOF
diff --git a/kmer_counter/radix.cpp b/kmer_counter/radix.cpp
new file mode 100755
index 0000000..0d3d123
--- /dev/null
+++ b/kmer_counter/radix.cpp
@@ -0,0 +1,292 @@
+#include "stdafx.h"
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#include <stdio.h>
+#include "radix.h"
+
+//----------------------------------------------------------------------------------
+/*Parallel radix sort. The input data to be sorted are divided evenly among threads.
+ Each thread is responsible for building a local histogram to enable sorting keys
+ according to a given digit. Then a global histogram is created as a combination
+ of local ones and the write offset (location) to which each digit should be written
+ is computed. Finally, threads scatter the data to the appropriate locations.*/
+template<typename COUNTER_TYPE>
+void RadixOMP_uint8(uint32 *SourcePtr, uint32 *DestPtr, const int64 SourceSize, unsigned rec_size, unsigned data_offset, unsigned data_size, const unsigned n_phases, const unsigned n_threads)
+{
+/* SourceSize - number of records */
+/* rec_size - in bytes */
+/* data_offset - in bytes*/
+/* data_size - in bytes - not used now */
+
+
+#ifdef WIN32
+ __declspec( align( WIN_ALIGNMENT ) ) COUNTER_TYPE ByteCounter[MAX_NUM_THREADS][256];
+#else
+ COUNTER_TYPE ByteCounter[MAX_NUM_THREADS][256] __attribute__((aligned(ALIGNMENT)));
+#endif
+
+#ifdef WIN32
+ __declspec( align( WIN_ALIGNMENT ) ) COUNTER_TYPE globalHisto[256];
+#else
+ COUNTER_TYPE globalHisto[256] __attribute__((aligned(ALIGNMENT)));
+#endif
+
+#pragma omp parallel num_threads(n_threads)
+ {
+ int myID = omp_get_thread_num();
+ uint8_t ByteIndex = 0;
+ long long i;
+ COUNTER_TYPE prevSum;
+ COUNTER_TYPE temp;
+ uint32 n;
+
+ int private_i;
+ int byteValue;
+
+ int64 SourceSize_in_bytes = SourceSize * rec_size;
+
+ uint8_t *char_ptr_tempSource = (uint8_t*)(SourcePtr);
+ uint8_t *char_ptr_tempDest = (uint8_t*)(DestPtr);
+ uint8_t *char_tempPtr;
+
+#ifdef WIN32
+ __declspec( align( WIN_ALIGNMENT ) ) COUNTER_TYPE privateByteCounter[256] = {0};
+#else
+ __attribute__((aligned(ALIGNMENT))) COUNTER_TYPE privateByteCounter[256] = {0};
+#endif
+
+ for(uint32 privatePhaseCounter = 0; privatePhaseCounter < n_phases; privatePhaseCounter++)
+ {
+ #pragma omp for private(i) schedule(static)
+ for(i = data_offset; i < SourceSize_in_bytes; i = i + rec_size)
+ {
+ byteValue = *(&char_ptr_tempSource[i] + ByteIndex);
+
+ ++privateByteCounter[byteValue];
+ }
+ A_memcpy(&ByteCounter[myID][0], privateByteCounter, sizeof(privateByteCounter));
+
+ #pragma omp barrier
+
+ #pragma omp for schedule(static)
+ for(i = 0; i < 256; ++i)
+ {
+ prevSum = 0;
+ for(n = 0; n < n_threads; n++)
+ {
+ temp = ByteCounter[n][i];
+ ByteCounter[n][i] = prevSum;
+ prevSum += temp;
+ }
+ globalHisto[i] = prevSum;
+ }
+
+ #pragma omp single
+ {
+ prevSum = 0;
+ for(i = 0; i < 256; ++i)
+ {
+ temp = globalHisto[i];
+ globalHisto[i] = prevSum;
+ prevSum += temp;
+ }
+ }
+
+
+ for (private_i = 0; private_i < 256; private_i++)
+ ByteCounter[myID][private_i] += globalHisto[private_i];
+
+ A_memcpy(privateByteCounter, &ByteCounter[myID][0], sizeof(privateByteCounter));
+
+ #pragma omp for schedule(static)
+ for(i = data_offset; i < SourceSize_in_bytes; i = i + rec_size)
+ {
+ byteValue = *(&char_ptr_tempSource[i] + ByteIndex);
+
+ memcpy(&char_ptr_tempDest[privateByteCounter[byteValue] * rec_size], &char_ptr_tempSource[i - data_offset], rec_size);
+
+ (privateByteCounter[byteValue])++;
+ }
+
+
+ #pragma omp barrier
+
+ char_tempPtr = char_ptr_tempDest;
+ char_ptr_tempDest = char_ptr_tempSource;
+ char_ptr_tempSource = char_tempPtr;
+ ByteIndex++;
+ memset(privateByteCounter, 0, sizeof(privateByteCounter));
+ }
+ }
+}
+
+//----------------------------------------------------------------------------------
+void RadixSort_uint8(uint32 *&data_ptr, uint32 *&tmp_ptr, uint64 size, unsigned rec_size, unsigned data_offset, unsigned data_size, const unsigned n_phases, const unsigned n_threads)
+{
+ if(size * rec_size >= (1ull << 32))
+ RadixOMP_uint8<uint64>(data_ptr, tmp_ptr, size, rec_size, data_offset, data_size, n_phases, n_threads);
+ else
+ RadixOMP_uint8<uint32>(data_ptr, tmp_ptr, size, rec_size, data_offset, data_size, n_phases, n_threads);
+}
+
+
+//----------------------------------------------------------------------------------
+/*Parallel radix sort. Parallelization scheme taken from
+ Satish, N., Kim, C., Chhugani, J., Nguyen, A.D., Lee, V.W., Kim, D., Dubey, P. (2010).
+ Fast Sort on CPUs and GPUs. A Case for Bandwidth Oblivious SIMD Sort.
+ Proc. of the 2010 Int. Conf. on Management of data, pp. 351�362.
+ The usage of software-managed buffers in the writting phase results in diminishing
+ the influence of irregular memory accesses. As the number of cache conflict misses
+ is reduced better efficiency is reached.*/
+template<typename COUNTER_TYPE, typename INT_TYPE>
+ void RadixOMP_buffer(CMemoryPool *pmm_radix_buf, uint64 *Source, uint64 *Dest, const int64 SourceSize, const unsigned n_phases, const unsigned n_threads)
+{
+#ifdef WIN32
+ __declspec( align( WIN_ALIGNMENT ) ) COUNTER_TYPE ByteCounter[MAX_NUM_THREADS][256];
+#else
+ COUNTER_TYPE ByteCounter[MAX_NUM_THREADS][256] __attribute__((aligned(ALIGNMENT)));
+#endif
+
+#ifdef WIN32
+ __declspec( align( WIN_ALIGNMENT ) ) COUNTER_TYPE globalHisto[256];
+#else
+ COUNTER_TYPE globalHisto[256] __attribute__((aligned(ALIGNMENT)));
+#endif
+
+#pragma omp parallel num_threads(n_threads)
+ {
+ int myID = omp_get_thread_num();
+ uint8_t ByteIndex = 0;
+ long long i;
+ COUNTER_TYPE prevSum;
+ COUNTER_TYPE temp;
+
+ uint32 n;
+
+ int index_x;
+ int private_i;
+ int byteValue;
+ uint64 *tempSource = Source;
+ uint64 *tempDest = Dest;
+ uint64 *tempPtr;
+
+ uint64 *raw_Buffer;
+ pmm_radix_buf->reserve(raw_Buffer);
+ uint64 *Buffer = raw_Buffer;
+
+ while(((unsigned long long) Buffer) % ALIGNMENT)
+ Buffer++;
+
+#ifdef WIN32
+ __declspec( align( WIN_ALIGNMENT ) ) COUNTER_TYPE privateByteCounter[256] = {0};
+#else
+ __attribute__((aligned(ALIGNMENT))) COUNTER_TYPE privateByteCounter[256] = {0};
+#endif
+
+ for(uint32 privatePhaseCounter = 0; privatePhaseCounter < n_phases; privatePhaseCounter++)
+ {
+ #pragma omp for private(i) schedule(static)
+ for(i = 0; i < SourceSize; ++i)
+ {
+ byteValue = *(reinterpret_cast<const uint8_t*>(&tempSource[i]) + ByteIndex);
+ ++privateByteCounter[byteValue];
+ }
+ A_memcpy(&ByteCounter[myID][0], privateByteCounter, sizeof(privateByteCounter));
+
+ #pragma omp barrier
+
+ #pragma omp for schedule(static)
+ for(i = 0; i < 256; ++i)
+ {
+ prevSum = 0;
+ for(n = 0; n < n_threads; n++)
+ {
+ temp = ByteCounter[n][i];
+ ByteCounter[n][i] = prevSum;
+ prevSum += temp;
+ }
+ globalHisto[i] = prevSum;
+ }
+
+ #pragma omp single
+ {
+ prevSum = 0;
+ for(i = 0; i < 256; ++i)
+ {
+ temp = globalHisto[i];
+ globalHisto[i] = prevSum;
+ prevSum += temp;
+ }
+ }
+
+ for (private_i = 0; private_i < 256; private_i++)
+ ByteCounter[myID][private_i] += globalHisto[private_i];
+
+ A_memcpy(privateByteCounter, &ByteCounter[myID][0], sizeof(privateByteCounter));
+
+
+ #pragma omp for schedule(static)
+ for(i = 0; i < SourceSize; ++i)
+ {
+ byteValue = *(reinterpret_cast<const uint8_t*>(&tempSource[i]) + ByteIndex);
+
+ index_x = privateByteCounter[byteValue] % BUFFER_WIDTH;
+
+ Buffer[byteValue * BUFFER_WIDTH + index_x] = tempSource[i];
+
+ privateByteCounter[byteValue]++;
+
+ if(index_x == (BUFFER_WIDTH -1))
+ A_memcpy ( &tempDest[privateByteCounter[byteValue] - (BUFFER_WIDTH)], &Buffer[byteValue * BUFFER_WIDTH], BUFFER_WIDTH *sizeof(uint64) );
+ } //end_for
+
+ INT_TYPE elemInBuffer;
+ INT_TYPE index_stop;
+ INT_TYPE index_start;
+ INT_TYPE elemWrittenIntoBuffer;
+
+ for(private_i = 0; private_i < 256; private_i++)
+ {
+ index_stop = privateByteCounter[private_i] % BUFFER_WIDTH;
+ index_start = ByteCounter[myID][private_i] % BUFFER_WIDTH;
+ elemWrittenIntoBuffer = privateByteCounter[private_i] - ByteCounter[myID][private_i];
+
+ if((index_stop - elemWrittenIntoBuffer) <= 0)
+ elemInBuffer = index_stop;
+ else
+ elemInBuffer = index_stop - index_start;
+
+ if(elemInBuffer != 0)
+ A_memcpy ( &tempDest[privateByteCounter[private_i] - elemInBuffer], &Buffer[private_i * BUFFER_WIDTH + (privateByteCounter[private_i] - elemInBuffer)%BUFFER_WIDTH], (elemInBuffer)*sizeof(uint64) );
+
+ }
+ #pragma omp barrier
+
+ tempPtr = tempDest;
+ tempDest = tempSource;
+ tempSource = tempPtr;
+ ByteIndex++;
+ memset(privateByteCounter, 0, sizeof(privateByteCounter));
+ }
+ pmm_radix_buf->free(raw_Buffer);
+ }
+}
+
+//----------------------------------------------------------------------------------
+void RadixSort_buffer(CMemoryPool *pmm_radix_buf, uint64 *&data, uint64 *&tmp, uint64 size, const unsigned n_phases, const unsigned n_threads)
+{
+ if(size >= (1ull << 31))
+ RadixOMP_buffer<uint64, int64>(pmm_radix_buf, data, tmp, size, n_phases, n_threads);
+ else
+ RadixOMP_buffer<uint32, int32>(pmm_radix_buf, data, tmp, size, n_phases, n_threads);
+}
+
+// ***** EOF
diff --git a/kmer_counter/radix.h b/kmer_counter/radix.h
new file mode 100755
index 0000000..bc9379b
--- /dev/null
+++ b/kmer_counter/radix.h
@@ -0,0 +1,44 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+#ifndef _RADIX_H
+#define _RADIX_H
+
+#include <cassert>
+#include <cstring>
+#include <cstdlib>
+#include <iostream>
+#include <omp.h>
+#include <algorithm>
+#include "libs/asmlib.h"
+#include "defs.h"
+#include "queues.h"
+#include <boost/static_assert.hpp>
+
+#ifdef WIN32
+typedef unsigned __int8 uint8_t;
+#else
+#include <stdint.h>
+#endif
+
+#define MAX_NUM_THREADS 32
+#define BUFFER_WIDTH 32
+#define ALIGNMENT 0x100
+#define WIN_ALIGNMENT 64
+
+#define shift_BUFFER_WIDTH 5
+#define BUFFER_WIDTH_MINUS_1 31
+#define BUFFER_WIDTH_MUL_sizeof_UINT 256
+
+void RadixSort_uint8(uint32 *&data_ptr, uint32 *&tmp_ptr, uint64 size, unsigned rec_size, unsigned data_offset, unsigned data_size, const unsigned n_phases, const unsigned n_threads);
+void RadixSort_buffer(CMemoryPool *pmm_radix_buf, uint64 *&data, uint64 *&tmp, uint64 size, const unsigned n_phases, const unsigned n_threads);
+
+#endif
+
+// ***** EOF
diff --git a/kmer_counter/rev_byte.cpp b/kmer_counter/rev_byte.cpp
new file mode 100755
index 0000000..9ec84ed
--- /dev/null
+++ b/kmer_counter/rev_byte.cpp
@@ -0,0 +1,15 @@
+#include "stdafx.h"
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#include "rev_byte.h"
+
+uchar CRev_byte::lut[256];
+CRev_byte::_si CRev_byte::_init;
\ No newline at end of file
diff --git a/kmer_counter/rev_byte.h b/kmer_counter/rev_byte.h
new file mode 100755
index 0000000..61bbfb6
--- /dev/null
+++ b/kmer_counter/rev_byte.h
@@ -0,0 +1,29 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#ifndef _REV_BYTE_H
+#define _REV_BYTE_H
+
+#include "defs.h"
+struct CRev_byte
+{
+ static uchar lut[256];
+ struct _si
+ {
+ _si()
+ {
+ for (uint32 i = 0; i < 256; ++i)
+ lut[i] = ((3 - (i & 3)) << 6) + ((3 - ((i >> 2) & 3)) << 4) + ((3 - ((i >> 4) & 3)) << 2) + (3 - ((i >> 6) & 3));
+ }
+
+ }static _init;
+};
+
+#endif
\ No newline at end of file
diff --git a/kmer_counter/s_mapper.h b/kmer_counter/s_mapper.h
new file mode 100755
index 0000000..93ee336
--- /dev/null
+++ b/kmer_counter/s_mapper.h
@@ -0,0 +1,166 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#ifndef _S_MAPPER_H
+#define _S_MAPPER_H
+#include "defs.h"
+#include "mmer.h"
+#include "params.h"
+
+#ifdef DEVELOP_MODE
+#include "develop.h"
+#endif
+
+
+class CSignatureMapper
+{
+ uint32 map_size;
+ int32* signature_map;
+ uint32 signature_len;
+ uint32 special_signature;
+ CMemoryPool* pmm_stats;
+
+ class Comp
+ {
+ uint32* signature_occurences;
+ public:
+ Comp(uint32* _signature_occurences) : signature_occurences(_signature_occurences){}
+ bool operator()(int i, int j)
+ {
+ return signature_occurences[i] > signature_occurences[j];
+ }
+ };
+
+public:
+ void Init(uint32* stats)
+ {
+ uint32 *sorted;
+ pmm_stats->reserve(sorted);
+ for (uint32 i = 0; i < map_size ; ++i)
+ sorted[i] = i;
+ sort(sorted, sorted + map_size, Comp(stats));
+
+ list<pair<uint32, uint64>> _stats;
+ for (uint32 i = 0; i < map_size ; ++i)
+ {
+ if (CMmer::is_allowed(sorted[i], signature_len))
+ _stats.push_back(make_pair(sorted[i], stats[sorted[i]]));
+ }
+
+ list<pair<uint32, uint64>> group;
+ uint32 bin_no = 0;
+ //counting sum
+ double sum = 0.0;
+ for (auto &i : _stats)
+ {
+ i.second += 1000;
+ sum += i.second;
+ }
+
+ double mean = sum / MAX_BINS;
+ double max_bin_size = 1.1 * mean;
+ uint32 n = MAX_BINS - 1; //one is needed for disabled signatures
+ uint32 max_bins = MAX_BINS - 1;
+ while (_stats.size() > n)
+ {
+ pair<uint32, uint64>& max = _stats.front();
+
+ if (max.second > mean)
+ {
+ signature_map[max.first] = bin_no++;
+ sum -= max.second;
+ mean = sum / (max_bins - bin_no);
+ max_bin_size = 1.1 * mean;
+
+ _stats.pop_front();
+ --n;
+ }
+ else
+ {
+ //heuristic
+ group.clear();
+ double tmp_sum = 0.0;
+ uint32 in_current = 0;
+ for (auto it = _stats.begin(); it != _stats.end();)
+ {
+ if (tmp_sum + it->second < max_bin_size)
+ {
+ tmp_sum += it->second;
+ group.push_back(*it);
+ it = _stats.erase(it);
+ ++in_current;
+ }
+ else
+ ++it;
+ }
+
+ for (auto i = group.begin(); i != group.end(); ++i)
+ {
+ signature_map[i->first] = bin_no;
+ }
+ --n;
+ ++bin_no;
+
+ sum -= tmp_sum;
+ mean = sum / (max_bins - bin_no);
+ max_bin_size = 1.1 * mean;
+ }
+ }
+ if (_stats.size() > 0)
+ {
+ for (auto i = _stats.begin(); i != _stats.end(); ++i)
+ {
+ signature_map[i->first] = bin_no++;
+ cout << "rest bin: " << i->second << "\n";
+ }
+ }
+ signature_map[special_signature] = bin_no;
+ pmm_stats->free(sorted);
+
+#ifdef DEVELOP_MODE
+ map_log(signature_len, map_size, signature_map);
+#endif
+
+ }
+ CSignatureMapper(CMemoryPool* _pmm_stats, uint32 _signature_len)
+ {
+ pmm_stats = _pmm_stats;
+ signature_len = _signature_len;
+ special_signature = 1 << 2 * signature_len;
+ map_size = (1 << 2 * signature_len) + 1;
+ signature_map = new int32[map_size];
+ fill_n(signature_map, map_size, -1);
+ }
+ inline int32 get_bin_id(uint32 signature)
+ {
+ return signature_map[signature];
+ }
+ inline void next_bin_id(int32& _cur_id)
+ {
+ ++_cur_id;
+ }
+ inline int bin_no_skip(int32 cur)
+ {
+ return 1;
+ }
+
+ inline int32 get_max_bin_no()
+ {
+ return signature_map[special_signature];
+ }
+
+ ~CSignatureMapper()
+ {
+ delete [] signature_map;
+ }
+
+};
+
+#endif
\ No newline at end of file
diff --git a/kmer_counter/splitter.h b/kmer_counter/splitter.h
new file mode 100755
index 0000000..4ba332a
--- /dev/null
+++ b/kmer_counter/splitter.h
@@ -0,0 +1,941 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#ifndef _SPLITTER_H
+#define _SPLITTER_H
+
+#include "defs.h"
+#include "kmer.h"
+#include "kb_storer.h"
+#include "kb_collector.h"
+#include "kb_reader.h"
+#include "kb_sorter.h"
+#include "kb_completer.h"
+#include "queues.h"
+#include "s_mapper.h"
+#include "mmer.h"
+#include <stdio.h>
+#include <iostream>
+#include <vector>
+using namespace std;
+
+
+//************************************************************************************************************
+//************************************************************************************************************
+template <bool QUAKE_MODE> class CSplitter_Impl;
+
+//************************************************************************************************************
+// CSplitter class - splits kmers into bins according to their prefix
+//************************************************************************************************************
+template <bool QUAKE_MODE> class CSplitter {
+ CMemoryMonitor *mm;
+
+ //CExKmer ex_kmer;
+ uchar *part;
+ uint64 part_size, part_pos;
+ CKmerBinCollector **bins;
+ CBinPartQueue *bin_part_queue;
+ CBinDesc *bd;
+ CMemoryPool *pmm_reads;
+ int64 mem_part_pmm_bins;
+ int64 mem_part_pmm_reads;
+
+ char codes[256];
+ bool use_quake;
+ input_type file_type;
+ int lowest_quality;
+ bool both_strands;
+
+ uint32 kmer_len;
+ //uint32 prefix_len;
+ uint32 signature_len;
+ uint32 n_bins;
+ uint64 n_reads;//for multifasta its a sequences counter
+
+ CSignatureMapper* s_mapper;
+
+ inline bool GetSeq(char *seq, uint32 &seq_size);
+ inline bool GetSeq(char *seq, char *quals, uint32 &seq_size);
+
+
+
+ friend class CSplitter_Impl<QUAKE_MODE>;
+
+public:
+ inline void CalcStats(uchar* _part, uint64 _part_size, uint32* _stats);
+
+ static uint32 MAX_LINE_SIZE;
+
+ CSplitter(CKMCParams &Params, CKMCQueues &Queues);
+ void InitBins(CKMCParams &Params, CKMCQueues &Queues);
+ ~CSplitter();
+
+ bool ProcessReads(uchar *_part, uint64 _part_size);
+ void Complete();
+
+ void GetTotal(uint64 &_n_reads);
+};
+
+template <bool QUAKE_MODE> uint32 CSplitter<QUAKE_MODE>::MAX_LINE_SIZE = 1 << 14;
+
+
+//************************************************************************************************************
+// Implementation of ProcessReads and Complete methods for various types and sizes of kmer class
+//************************************************************************************************************
+template <bool QUAKE_MODE> class CSplitter_Impl {
+public:
+ static bool ProcessReads(CSplitter<QUAKE_MODE> &ptr, uchar *_part, uint64 _part_size);
+};
+
+template <> class CSplitter_Impl<false> {
+public:
+ static bool ProcessReads(CSplitter<false> &ptr, uchar *_part, uint64 _part_size);
+};
+
+template <> class CSplitter_Impl<true> {
+public:
+ static bool ProcessReads(CSplitter<true> &ptr, uchar *_part, uint64 _part_size);
+};
+
+//----------------------------------------------------------------------------------
+// Return a single record from FASTA/FASTQ data
+template <bool QUAKE_MODE> bool CSplitter<QUAKE_MODE>::GetSeq(char *seq, uint32 &seq_size)
+{
+ uchar c = 0;
+ uint32 pos = 0;
+
+ if(file_type == fasta)
+ {
+ // Title
+ if(part_pos >= part_size)
+ return false;
+ c = part[part_pos++];
+ if(c != '>')
+ return false;
+ for(; part_pos < part_size;)
+ {
+ c = part[part_pos++];
+ if(c < 32) // newliners
+ break;
+ }
+ if(part_pos >= part_size)
+ return false;
+
+ c = part[part_pos++];
+ if(c >= 32)
+ part_pos--;
+ else if(part_pos >= part_size)
+ return false;
+
+ // Sequence
+ for(; part_pos < part_size;)
+ {
+ c = part[part_pos++];
+ if(c < 32) // newliners
+ break;
+ seq[pos++] = codes[c];
+ }
+ seq_size = pos;
+
+ if(part_pos >= part_size)
+ return true;
+
+ if(part[part_pos++] >= 32)
+ part_pos--;
+ else if(part_pos >= part_size)
+ return true;
+ }
+ else if(file_type == fastq)
+ {
+ // Title
+ if(part_pos >= part_size)
+ return false;
+ c = part[part_pos++];
+ if(c != '@')
+ return false;
+ for(; part_pos < part_size;)
+ {
+ c = part[part_pos++];
+ if(c < 32) // newliners
+ break;
+ }
+ if(part_pos >= part_size)
+ return false;
+
+ c = part[part_pos++];
+ if(c >= 32)
+ part_pos--;
+ else if(part_pos >= part_size)
+ return false;
+
+ // Sequence
+ for(; part_pos < part_size;)
+ {
+ c = part[part_pos++];
+ if(c < 32) // newliners
+ break;
+ seq[pos++] = codes[c];
+ }
+ if(part_pos >= part_size)
+ return false;
+
+ c = part[part_pos++];
+ if(c >= 32)
+ part_pos--;
+ else if(part_pos >= part_size)
+ return false;
+
+ // Plus
+ c = part[part_pos++];
+ if(part_pos >= part_size)
+ return false;
+ if(c != '+')
+ return false;
+ for(; part_pos < part_size;)
+ {
+ c = part[part_pos++];
+ if(c < 32) // newliners
+ break;
+ }
+ if(part_pos >= part_size)
+ return false;
+
+ c = part[part_pos++];
+ if(c >= 32)
+ part_pos--;
+ else if(part_pos >= part_size)
+ return false;
+
+ // Quality
+ part_pos += pos;
+ if(part_pos >= part_size)
+ return false;
+ c = part[part_pos++];
+ seq_size = pos;
+
+ if(part_pos >= part_size)
+ return true;
+
+ if(part[part_pos++] >= 32)
+ part_pos--;
+ else if(part_pos >= part_size)
+ return true;
+ }
+ else if(file_type == multiline_fasta)
+ {
+ if(part_pos >= part_size)
+ return false;
+ if(part[part_pos] == '>')//need to ommit header
+ {
+ ++n_reads;
+ for(;part_pos < part_size && part[part_pos] != '\n' && part[part_pos] != '\r';++part_pos);//find EOF
+ ++part_pos;
+ if(part[part_pos] == '\n' || part[part_pos] == '\r')
+ ++part_pos;
+ }
+ for(;part_pos < part_size && pos < mem_part_pmm_reads && part[part_pos] != '>';)
+ {
+ seq[pos++] = codes[part[part_pos++]];
+ }
+ seq_size = pos;
+ if(part_pos < part_size && part[part_pos] != '>')//need to copy last k-1 kmers
+ {
+ part_pos -= kmer_len - 1;
+ }
+ return true;
+
+ }
+
+ return (c == '\n' || c == '\r');
+}
+
+//----------------------------------------------------------------------------------
+// Return a single record with quality codes from FASTA/FASTQ data
+template <bool QUAKE_MODE> bool CSplitter<QUAKE_MODE>::GetSeq(char *seq, char *quals, uint32 &seq_size)
+{
+ uchar c;
+ uint32 pos = 0;
+
+ if(file_type == fasta || file_type == multiline_fasta)
+ {
+ return false; // FASTA file does not store quality values
+ }
+ else
+ {
+ // Title
+ if(part_pos >= part_size)
+ return false;
+ c = part[part_pos++];
+ if(c != '@')
+ return false;
+ for(; part_pos < part_size;)
+ {
+ c = part[part_pos++];
+ if(c < 32) // newliners
+ break;
+ }
+ if(part_pos >= part_size)
+ return false;
+
+ c = part[part_pos++];
+ if(c >= 32)
+ part_pos--;
+ else if(part_pos >= part_size)
+ return false;
+
+ // Sequence
+ for(; part_pos < part_size;)
+ {
+ c = part[part_pos++];
+ if(c < 32) // newliners
+ break;
+ seq[pos++] = codes[c];
+ }
+ if(part_pos >= part_size)
+ return false;
+
+ c = part[part_pos++];
+ if(c >= 32)
+ part_pos--;
+ else if(part_pos >= part_size)
+ return false;
+
+ // Plus
+ c = part[part_pos++];
+ if(part_pos >= part_size)
+ return false;
+ if(c != '+')
+ return false;
+ for(; part_pos < part_size;)
+ {
+ c = part[part_pos++];
+ if(c < 32) // newliners
+ break;
+ }
+ if(part_pos >= part_size)
+ return false;
+
+ c = part[part_pos++];
+ if(c >= 32)
+ part_pos--;
+ else if(part_pos >= part_size)
+ return false;
+
+ // Quality
+ copy(part+part_pos, part+part_pos+pos, quals);
+
+ part_pos += pos;
+ if(part_pos >= part_size)
+ return false;
+ c = part[part_pos++];
+ seq_size = pos;
+
+ if(part_pos >= part_size)
+ return true;
+
+ if(part[part_pos++] >= 32)
+ part_pos--;
+ else if(part_pos >= part_size)
+ return true;
+ }
+
+ return (c == '\n' || c == '\r');
+}
+
+
+template <bool QUAKE_MODE> void CSplitter<QUAKE_MODE>::CalcStats(uchar* _part, uint64 _part_size, uint32* _stats)
+{
+ part = _part;
+ part_size = _part_size;
+ part_pos = 0;
+
+ char *seq;
+ uint32 seq_size;
+ pmm_reads->reserve(seq);
+
+ uint32 signature_start_pos;
+ CMmer current_signature(signature_len), end_mmer(signature_len);
+
+ uint32 i;
+ uint32 len;//length of extended kmer
+
+ while (GetSeq(seq, seq_size))
+ {
+ i = 0;
+ len = 0;
+ while (i + kmer_len - 1 < seq_size)
+ {
+ bool contains_N = false;
+ //building first signature after 'N' or at the read begining
+ for (uint32 j = 0; j < signature_len; ++j, ++i)
+ if (seq[i] < 0)//'N'
+ {
+ contains_N = true;
+ break;
+ }
+ //signature must be shorter than k-mer so if signature contains 'N', k-mer will contains it also
+ if (contains_N)
+ {
+ ++i;
+ continue;
+ }
+ len = signature_len;
+ signature_start_pos = i - signature_len;
+ current_signature.insert(seq + signature_start_pos);
+ end_mmer.set(current_signature);
+ for (; i < seq_size; ++i)
+ {
+ if (seq[i] < 0)//'N'
+ {
+ if (len >= kmer_len)
+ _stats[current_signature.get()] += 1 + len - kmer_len;
+ len = 0;
+ ++i;
+ break;
+ }
+ end_mmer.insert(seq[i]);
+ if (end_mmer < current_signature)//signature at the end of current k-mer is lower than current
+ {
+ if (len >= kmer_len)
+ {
+ _stats[current_signature.get()] += 1 + len - kmer_len;
+ len = kmer_len - 1;
+ }
+ current_signature.set(end_mmer);
+ signature_start_pos = i - signature_len + 1;
+ }
+ else if (end_mmer == current_signature)
+ {
+ current_signature.set(end_mmer);
+ signature_start_pos = i - signature_len + 1;
+ }
+ else if (signature_start_pos + kmer_len - 1 < i)//need to find new signature
+ {
+ _stats[current_signature.get()] += 1 + len - kmer_len;
+ len = kmer_len - 1;
+ //looking for new signature
+ ++signature_start_pos;
+ //building first signature in current k-mer
+ end_mmer.insert(seq + signature_start_pos);
+ current_signature.set(end_mmer);
+ for (uint32 j = signature_start_pos + signature_len; j <= i; ++j)
+ {
+ end_mmer.insert(seq[j]);
+ if (end_mmer <= current_signature)
+ {
+ current_signature.set(end_mmer);
+ signature_start_pos = j - signature_len + 1;
+ }
+ }
+ }
+ ++len;
+ }
+ }
+ if (len >= kmer_len)//last one in read
+ _stats[current_signature.get()] += 1 + len - kmer_len;
+ }
+
+ putchar('*');
+ fflush(stdout);
+
+ pmm_reads->free(seq);
+}
+//----------------------------------------------------------------------------------
+// Assigns queues and monitors
+template <bool QUAKE_MODE> CSplitter<QUAKE_MODE>::CSplitter(CKMCParams &Params, CKMCQueues &Queues)
+{
+ mm = Queues.mm;
+ file_type = Params.file_type;
+ use_quake = Params.use_quake;
+ lowest_quality = Params.lowest_quality;
+ both_strands = Params.both_strands;
+
+ bin_part_queue = Queues.bpq;
+ bd = Queues.bd;
+ pmm_reads = Queues.pmm_reads;
+ kmer_len = Params.kmer_len;
+ signature_len = Params.signature_len;
+
+ mem_part_pmm_bins = Params.mem_part_pmm_bins;
+
+ mem_part_pmm_reads = Params.mem_part_pmm_reads;
+
+ s_mapper = Queues.s_mapper;
+
+ part = NULL;
+
+ // Prepare encoding of symbols
+ for(int i = 0; i < 256; ++i)
+ codes[i] = -1;
+ codes['A'] = codes['a'] = 0;
+ codes['C'] = codes['c'] = 1;
+ codes['G'] = codes['g'] = 2;
+ codes['T'] = codes['t'] = 3;
+
+ n_reads = 0;
+ bins = NULL;
+}
+
+
+template <bool QUAKE_MODE> void CSplitter<QUAKE_MODE>::InitBins(CKMCParams &Params, CKMCQueues &Queues)
+{
+ n_bins = Params.n_bins;
+ uint32 buffer_size = Params.bin_part_size;
+ // Create objects for all bin
+ bins = new CKmerBinCollector*[n_bins];
+ for (uint32 i = 0; i < n_bins; ++i)
+ {
+ bins[i] = new CKmerBinCollector(Queues, Params, buffer_size, i);
+ bd->insert(i, NULL, "", 0, 0, 0, 0, buffer_size, kmer_len);
+ }
+}
+//----------------------------------------------------------------------------------
+// Release memory
+template <bool QUAKE_MODE> CSplitter<QUAKE_MODE>::~CSplitter()
+{
+ if (bins)
+ {
+ for (uint32 i = 0; i < n_bins; ++i)
+ if (bins[i])
+ delete bins[i];
+ delete[] bins;
+ }
+}
+
+//----------------------------------------------------------------------------------
+// Finish the processing of input file
+template <bool QUAKE_MODE> void CSplitter<QUAKE_MODE>::Complete()
+{
+ if (bins)
+ for (uint32 i = 0; i < n_bins; ++i)
+ if(bins[i])
+ bins[i]->Flush();
+}
+
+//----------------------------------------------------------------------------------
+// Process the reads from the given FASTQ file part
+template <bool QUAKE_MODE> bool CSplitter<QUAKE_MODE>::ProcessReads(uchar *_part, uint64 _part_size)
+{
+ return CSplitter_Impl<QUAKE_MODE>::ProcessReads(*this, _part, _part_size);
+}
+
+//----------------------------------------------------------------------------------
+// Return the number of reads processed by splitter
+template <bool QUAKE_MODE> void CSplitter<QUAKE_MODE>::GetTotal(uint64 &_n_reads)
+{
+ _n_reads = n_reads;
+}
+
+
+//************************************************************************************************************
+// Implementation of specific splitter methods for various types and sizes of kmers
+//************************************************************************************************************
+
+//----------------------------------------------------------------------------------
+// Process the reads from the given FASTQ file part
+bool CSplitter_Impl<false>::ProcessReads(CSplitter<false> &ptr, uchar *_part, uint64 _part_size)
+{
+ ptr.part = _part;
+ ptr.part_size = _part_size;
+ ptr.part_pos = 0;
+
+ char *seq;
+ uint32 seq_size;
+ ptr.pmm_reads->reserve(seq);
+
+ uint32 signature_start_pos;
+ CMmer current_signature(ptr.signature_len), end_mmer(ptr.signature_len);
+ uint32 bin_no;
+
+ uint32 i;
+ uint32 len;//length of extended kmer
+
+ while (ptr.GetSeq(seq, seq_size))
+ {
+ if (ptr.file_type != multiline_fasta)
+ ptr.n_reads++;
+ i = 0;
+ len = 0;
+ while (i + ptr.kmer_len - 1 < seq_size)
+ {
+ bool contains_N = false;
+ //building first signature after 'N' or at the read begining
+ for (uint32 j = 0; j < ptr.signature_len; ++j, ++i)
+ if (seq[i] < 0)//'N'
+ {
+ contains_N = true;
+ break;
+ }
+ //signature must be shorter than k-mer so if signature contains 'N', k-mer will contains it also
+ if (contains_N)
+ {
+ ++i;
+ continue;
+ }
+ len = ptr.signature_len;
+ signature_start_pos = i - ptr.signature_len;
+ current_signature.insert(seq + signature_start_pos);
+ end_mmer.set(current_signature);
+ for (; i < seq_size; ++i)
+ {
+ if (seq[i] < 0)//'N'
+ {
+ if (len >= ptr.kmer_len)
+ {
+ bin_no = ptr.s_mapper->get_bin_id(current_signature.get());
+ ptr.bins[bin_no]->PutExtendedKmer(seq + i - len, len);
+ }
+ len = 0;
+ ++i;
+ break;
+ }
+ end_mmer.insert(seq[i]);
+ if (end_mmer < current_signature)//signature at the end of current k-mer is lower than current
+ {
+ if (len >= ptr.kmer_len)
+ {
+ bin_no = ptr.s_mapper->get_bin_id(current_signature.get());
+ ptr.bins[bin_no]->PutExtendedKmer(seq + i - len, len);
+ len = ptr.kmer_len - 1;
+ }
+ current_signature.set(end_mmer);
+ signature_start_pos = i - ptr.signature_len + 1;
+ }
+ else if (end_mmer == current_signature)
+ {
+ current_signature.set(end_mmer);
+ signature_start_pos = i - ptr.signature_len + 1;
+ }
+ else if (signature_start_pos + ptr.kmer_len - 1 < i)//need to find new signature
+ {
+ bin_no = ptr.s_mapper->get_bin_id(current_signature.get());
+ ptr.bins[bin_no]->PutExtendedKmer(seq + i - len, len);
+ len = ptr.kmer_len - 1;
+ //looking for new signature
+ ++signature_start_pos;
+ //building first signature in current k-mer
+ end_mmer.insert(seq + signature_start_pos);
+ current_signature.set(end_mmer);
+ for (uint32 j = signature_start_pos + ptr.signature_len; j <= i; ++j)
+ {
+ end_mmer.insert(seq[j]);
+ if (end_mmer <= current_signature)
+ {
+ current_signature.set(end_mmer);
+ signature_start_pos = j - ptr.signature_len + 1;
+ }
+ }
+ }
+ ++len;
+ if (len == ptr.kmer_len + 255) //one byte is used to store counter of additional symbols in extended k-mer
+ {
+ bin_no = ptr.s_mapper->get_bin_id(current_signature.get());
+ ptr.bins[bin_no]->PutExtendedKmer(seq + i + 1 - len, len);
+ i -= ptr.kmer_len - 2;
+ len = 0;
+ break;
+ }
+
+ }
+ }
+ if (len >= ptr.kmer_len)//last one in read
+ {
+ bin_no = ptr.s_mapper->get_bin_id(current_signature.get());
+ ptr.bins[bin_no]->PutExtendedKmer(seq + i - len, len);
+ }
+ }
+
+ putchar('*');
+ fflush(stdout);
+
+ ptr.pmm_reads->free(seq);
+
+
+ return true;
+}
+
+//----------------------------------------------------------------------------------
+// Process the reads from the given FASTQ file part
+bool CSplitter_Impl<true>::ProcessReads(CSplitter<true> &ptr, uchar *_part, uint64 _part_size)
+{
+ ptr.part = _part;
+ ptr.part_size = _part_size;
+ ptr.part_pos = 0;
+
+ char *seq;
+ char *quals;
+
+ ptr.pmm_reads->reserve(seq);
+ ptr.pmm_reads->reserve(quals);
+
+
+ uint32 seq_size;
+
+ uint32 signature_start_pos;
+ CMmer current_signature(ptr.signature_len), end_mmer(ptr.signature_len);
+ uint32 bin_no;
+
+ uint32 i;
+ uint32 len;//length of extended kmer
+
+
+
+ while (ptr.GetSeq(seq, quals, seq_size))
+ {
+ if (ptr.file_type != multiline_fasta)
+ ptr.n_reads++;
+ i = 0;
+ len = 0;
+ while (i + ptr.kmer_len - 1 < seq_size)
+ {
+ bool contains_N = false;
+ //building first signature after 'N' or at the read begining
+ for (uint32 j = 0; j < ptr.signature_len; ++j, ++i)
+ if (seq[i] < 0)//'N'
+ {
+ contains_N = true;
+ break;
+ }
+ //signature must be shorter than k-mer so if signature contains 'N', k-mer will contains it also
+ if (contains_N)
+ {
+ ++i;
+ continue;
+ }
+ len = ptr.signature_len;
+ signature_start_pos = i - ptr.signature_len;
+ current_signature.insert(seq + signature_start_pos);
+ end_mmer.set(current_signature);
+ for (; i < seq_size; ++i)
+ {
+ if (seq[i] < 0)//'N'
+ {
+ if (len >= ptr.kmer_len)
+ {
+ bin_no = ptr.s_mapper->get_bin_id(current_signature.get());
+ ptr.bins[bin_no]->PutExtendedKmer(seq + i - len, quals + i - len, len);
+ }
+ len = 0;
+ ++i;
+ break;
+ }
+ end_mmer.insert(seq[i]);
+ if (end_mmer < current_signature)//signature at the end of current k-mer is lower than current
+ {
+ if (len >= ptr.kmer_len)
+ {
+ bin_no = ptr.s_mapper->get_bin_id(current_signature.get());
+ ptr.bins[bin_no]->PutExtendedKmer(seq + i - len, quals + i - len, len);
+ len = ptr.kmer_len - 1;
+ }
+ current_signature.set(end_mmer);
+ signature_start_pos = i - ptr.signature_len + 1;
+ }
+ else if (end_mmer == current_signature)
+ {
+ current_signature.set(end_mmer);
+ signature_start_pos = i - ptr.signature_len + 1;
+ }
+ else if (signature_start_pos + ptr.kmer_len - 1 < i)//need to find new signature
+ {
+ bin_no = ptr.s_mapper->get_bin_id(current_signature.get());
+ ptr.bins[bin_no]->PutExtendedKmer(seq + i - len, quals + i - len, len);
+ len = ptr.kmer_len - 1;
+ //looking for new signature
+ ++signature_start_pos;
+ //building first signature in current k-mer
+ end_mmer.insert(seq + signature_start_pos);
+ current_signature.set(end_mmer);
+ for (uint32 j = signature_start_pos + ptr.signature_len; j <= i; ++j)
+ {
+ end_mmer.insert(seq[j]);
+ if (end_mmer <= current_signature)
+ {
+ current_signature.set(end_mmer);
+ signature_start_pos = j - ptr.signature_len + 1;
+ }
+ }
+ }
+ ++len;
+ if (len == ptr.kmer_len + 255) //one byte is used to store counter of additional symbols in extended k-mer
+ {
+ bin_no = ptr.s_mapper->get_bin_id(current_signature.get());
+ ptr.bins[bin_no]->PutExtendedKmer(seq + i + 1 - len, quals + i + 1 - len, len);
+ i -= ptr.kmer_len - 2;
+ len = 0;
+ break;
+ }
+ }
+ }
+ if (len >= ptr.kmer_len)//last one in read
+ {
+ bin_no = ptr.s_mapper->get_bin_id(current_signature.get());
+ ptr.bins[bin_no]->PutExtendedKmer(seq + i - len, quals + i - len, len);
+ }
+ }
+
+
+ putchar('*');
+ fflush(stdout);
+
+
+ ptr.pmm_reads->free(seq);
+ ptr.pmm_reads->free(quals);
+
+ return true;
+}
+
+//************************************************************************************************************
+// CWSplitter class - wrapper for multithreading purposes
+//************************************************************************************************************
+
+//----------------------------------------------------------------------------------
+template <bool QUAKE_MODE> class CWSplitter {
+ CPartQueue *pq;
+ CBinPartQueue *bpq;
+ CMemoryPool *pmm_fastq;
+
+ CSplitter<QUAKE_MODE> *spl;
+ uint64 n_reads;
+
+public:
+ CWSplitter(CKMCParams &Params, CKMCQueues &Queues);
+ ~CWSplitter();
+
+ void operator()();
+ void GetTotal(uint64 &_n_reads);
+};
+
+//----------------------------------------------------------------------------------
+// Constructor
+template <bool QUAKE_MODE> CWSplitter<QUAKE_MODE>::CWSplitter(CKMCParams &Params, CKMCQueues &Queues)
+{
+ pq = Queues.part_queue;
+ bpq = Queues.bpq;
+ pmm_fastq = Queues.pmm_fastq;
+ spl = new CSplitter<QUAKE_MODE>(Params, Queues);
+ spl->InitBins(Params, Queues);
+}
+
+//----------------------------------------------------------------------------------
+// Destructor
+template <bool QUAKE_MODE> CWSplitter<QUAKE_MODE>::~CWSplitter()
+{
+}
+
+//----------------------------------------------------------------------------------
+// Execution
+template <bool QUAKE_MODE> void CWSplitter<QUAKE_MODE>::operator()()
+{
+ // Splitting parts
+ while(!pq->completed())
+ {
+ uchar *part;
+ uint64 size;
+ if(pq->pop(part, size))
+ {
+ spl->ProcessReads(part, size);
+ pmm_fastq->free(part);
+ }
+ }
+ spl->Complete();
+ bpq->mark_completed();
+
+ spl->GetTotal(n_reads);
+
+ delete spl;
+ spl = NULL;
+}
+
+//----------------------------------------------------------------------------------
+// Return statistics
+template <bool QUAKE_MODE> void CWSplitter<QUAKE_MODE>::GetTotal(uint64 &_n_reads)
+{
+ if(spl)
+ spl->GetTotal(n_reads);
+
+ _n_reads = n_reads;
+}
+
+
+
+//************************************************************************************************************
+// CWStatsSplitter class - wrapper for multithreading purposes
+//************************************************************************************************************
+
+//----------------------------------------------------------------------------------
+template <bool QUAKE_MODE> class CWStatsSplitter {
+ CStatsPartQueue *spq;
+ CMemoryPool *pmm_fastq, *pmm_stats;
+ uint32 *stats;
+ CSplitter<QUAKE_MODE> *spl;
+ uint32 signature_len;
+
+public:
+ CWStatsSplitter(CKMCParams &Params, CKMCQueues &Queues);
+ ~CWStatsSplitter();
+
+ void operator()();
+ void GetStats(uint32* _stats);
+};
+
+//----------------------------------------------------------------------------------
+// Constructor
+template <bool QUAKE_MODE> CWStatsSplitter<QUAKE_MODE>::CWStatsSplitter(CKMCParams &Params, CKMCQueues &Queues)
+{
+ spq = Queues.stats_part_queue;
+ pmm_fastq = Queues.pmm_fastq;
+ pmm_stats = Queues.pmm_stats;
+ spl = new CSplitter<QUAKE_MODE>(Params, Queues);
+
+ signature_len = Params.signature_len;
+ pmm_stats->reserve(stats);
+ fill_n(stats, (1 << signature_len * 2) + 1, 0);
+}
+
+//----------------------------------------------------------------------------------
+// Destructor
+template <bool QUAKE_MODE> CWStatsSplitter<QUAKE_MODE>::~CWStatsSplitter()
+{
+ pmm_stats->free(stats);
+}
+
+//----------------------------------------------------------------------------------
+// Execution
+template <bool QUAKE_MODE> void CWStatsSplitter<QUAKE_MODE>::operator()()
+{
+ // Splitting parts
+ while (!spq->completed())
+ {
+ uchar *part;
+ uint64 size;
+ if (spq->pop(part, size))
+ {
+ spl->CalcStats(part, size, stats);
+ pmm_fastq->free(part);
+ }
+ }
+
+ delete spl;
+ spl = NULL;
+}
+
+//----------------------------------------------------------------------------------
+template <bool QUAKE_MODE> void CWStatsSplitter<QUAKE_MODE>::GetStats(uint32* _stats)
+{
+ uint32 size = (1 << signature_len * 2) + 1;
+ for (uint32 i = 0; i < size; ++i)
+ _stats[i] += stats[i];
+}
+
+
+#endif
+
+// ***** EOF
diff --git a/kmer_counter/stdafx.cpp b/kmer_counter/stdafx.cpp
new file mode 100755
index 0000000..0d61b81
--- /dev/null
+++ b/kmer_counter/stdafx.cpp
@@ -0,0 +1,8 @@
+// stdafx.cpp : source file that includes just the standard includes
+// kmer_counter.pch will be the pre-compiled header
+// stdafx.obj will contain the pre-compiled type information
+
+#include "stdafx.h"
+
+// TODO: reference any additional headers you need in STDAFX.H
+// and not in this file
diff --git a/kmer_counter/stdafx.h b/kmer_counter/stdafx.h
new file mode 100755
index 0000000..c4769f1
--- /dev/null
+++ b/kmer_counter/stdafx.h
@@ -0,0 +1,28 @@
+#ifdef WIN32
+
+// stdafx.h : include file for standard system include files,
+// or project specific include files that are used frequently, but
+// are changed infrequently
+//
+
+#pragma once
+
+#include "targetver.h"
+
+#include <stdio.h>
+#include <tchar.h>
+#include <boost/thread.hpp>
+
+
+
+// TODO: reference additional headers your program requires here
+
+#else
+
+#include <stdio.h>
+#include <ext/algorithm>
+#include <iostream>
+using namespace std;
+using __gnu_cxx::copy_n;
+
+#endif
\ No newline at end of file
diff --git a/kmer_counter/targetver.h b/kmer_counter/targetver.h
new file mode 100755
index 0000000..90e767b
--- /dev/null
+++ b/kmer_counter/targetver.h
@@ -0,0 +1,8 @@
+#pragma once
+
+// Including SDKDDKVer.h defines the highest available Windows platform.
+
+// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
+// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
+
+#include <SDKDDKVer.h>
diff --git a/kmer_counter/timer.cpp b/kmer_counter/timer.cpp
new file mode 100755
index 0000000..4667cb5
--- /dev/null
+++ b/kmer_counter/timer.cpp
@@ -0,0 +1,62 @@
+#include "stdafx.h"
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ The source codes are based on codes written by Dennis and published:
+ http://allmybrain.com/2008/06/10/timing-cc-code-on-linux/
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#ifdef WIN32
+#include <windows.h>
+#endif
+
+#include <cstdio> // NULL
+#include "timer.h"
+
+
+#ifdef WIN32
+double CStopWatch::LIToSecs( LARGE_INTEGER & L) {
+ return ((double)L.QuadPart /(double)frequency.QuadPart);
+}
+
+CStopWatch::CStopWatch(){
+ timer.start.QuadPart=0;
+ timer.stop.QuadPart=0;
+ QueryPerformanceFrequency( &frequency );
+}
+
+void CStopWatch::startTimer( ) {
+ QueryPerformanceCounter(&timer.start);
+}
+
+void CStopWatch::stopTimer( ) {
+ QueryPerformanceCounter(&timer.stop);
+}
+
+
+double CStopWatch::getElapsedTime() {
+ LARGE_INTEGER time;
+ time.QuadPart = timer.stop.QuadPart - timer.start.QuadPart;
+ return LIToSecs( time) ;
+}
+#else
+
+void CStopWatch::startTimer( ) {
+ gettimeofday(&(timer.start),NULL);
+}
+
+void CStopWatch::stopTimer( ) {
+ gettimeofday(&(timer.stop),NULL);
+}
+
+double CStopWatch::getElapsedTime() {
+ timeval res;
+ timersub(&(timer.stop),&(timer.start),&res);
+ return res.tv_sec + res.tv_usec/1000000.0; // 10^6 uSec per second
+}
+
+#endif
\ No newline at end of file
diff --git a/kmer_counter/timer.h b/kmer_counter/timer.h
new file mode 100755
index 0000000..10b38d0
--- /dev/null
+++ b/kmer_counter/timer.h
@@ -0,0 +1,58 @@
+/*
+ This file is a part of KMC software distributed under GNU GPL 3 licence.
+ The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
+
+ The source codes are based on codes written by Dennis and published:
+ http://allmybrain.com/2008/06/10/timing-cc-code-on-linux/
+
+ Version: 2.0
+ Date : 2014-07-04
+*/
+
+#ifndef _TIMER_H
+#define _TIMER_H
+
+#ifdef WIN32
+#include <windows.h>
+
+typedef struct {
+ LARGE_INTEGER start;
+ LARGE_INTEGER stop;
+} stopWatch;
+
+class CStopWatch {
+
+private:
+ stopWatch timer;
+ LARGE_INTEGER frequency;
+ double LIToSecs( LARGE_INTEGER & L);
+public:
+ CStopWatch();
+ void startTimer( );
+ void stopTimer( );
+ double getElapsedTime();
+};
+
+#else
+#include <sys/time.h>
+
+typedef struct {
+ timeval start;
+ timeval stop;
+} stopWatch;
+
+class CStopWatch {
+
+private:
+ stopWatch timer;
+public:
+ CStopWatch() {};
+ void startTimer( );
+ void stopTimer( );
+ double getElapsedTime();
+};
+
+#endif
+
+#endif
+// ***** EOF
diff --git a/makefile b/makefile
new file mode 100755
index 0000000..ec617aa
--- /dev/null
+++ b/makefile
@@ -0,0 +1,32 @@
+all: kmc
+
+BOOST_LIB = /boost/boost_1_55_0/stage/lib
+BOOST_H = /boost/boost_1_55_0
+
+KMC_BIN_DIR = bin
+KMC_MAIN_DIR = kmer_counter
+KMC_API_DIR = kmc_api
+KMC_DUMP_DIR = kmc_dump
+
+CC = g++
+CFLAGS = -Wall -O3 -m64 -static -fopenmp -std=c++11 -I $(BOOST_H)
+CLINK = -lm -static -fopenmp -O3 -std=c++11
+
+.cpp.o:
+ $(CC) $(CFLAGS) -c $< -o $@
+
+kmc: $(KMC_MAIN_DIR)/kmer_counter.o $(KMC_MAIN_DIR)/mmer.o $(KMC_MAIN_DIR)/mem_disk_file.o $(KMC_MAIN_DIR)/rev_byte.o $(KMC_MAIN_DIR)/fastq_reader.o $(KMC_MAIN_DIR)/timer.o $(KMC_MAIN_DIR)/radix.o $(KMC_MAIN_DIR)/kb_completer.o $(KMC_MAIN_DIR)/kb_storer.o $(KMC_MAIN_DIR)/kmer.o
+ -mkdir -p $(KMC_BIN_DIR)
+ $(CC) $(CLINK) -o $(KMC_BIN_DIR)/$@ $(KMC_MAIN_DIR)/kmer_counter.o $(KMC_MAIN_DIR)/mem_disk_file.o $(KMC_MAIN_DIR)/rev_byte.o $(KMC_MAIN_DIR)/mmer.o $(KMC_MAIN_DIR)/fastq_reader.o $(KMC_MAIN_DIR)/timer.o $(KMC_MAIN_DIR)/radix.o $(KMC_MAIN_DIR)/kb_completer.o $(KMC_MAIN_DIR)/kb_storer.o $(KMC_MAIN_DIR)/kmer.o $(KMC_MAIN_DIR)/libs/alibelf64.a $(KMC_MAIN_DIR)/libs/libz.a $(KMC_MAIN_DIR)/libs/libbz2.a $(BOOST_LIB)/libboost_thread.a $(BOOST_LIB)/libboost_filesystem.a $(BOOST_LIB)/libboost_system.a
+
+kmc_dump: $(KMC_DUMP_DIR)/nc_utils.o $(KMC_API_DIR)/mmer.o $(KMC_DUMP_DIR)/kmc_dump.o $(KMC_API_DIR)/kmc_file.o $(KMC_API_DIR)/kmer_api.o
+ -mkdir -p $(KMC_BIN_DIR)
+ $(CC) $(CLINK) -o $(KMC_BIN_DIR)/$@ $(KMC_DUMP_DIR)/nc_utils.o $(KMC_API_DIR)/mmer.o $(KMC_DUMP_DIR)/kmc_dump.o $(KMC_API_DIR)/kmc_file.o $(KMC_API_DIR)/kmer_api.o
+
+clean:
+ -rm $(KMC_MAIN_DIR)/*.o
+ -rm $(KMC_API_DIR)/*.o
+ -rm $(KMC_DUMP_DIR)/*.o
+ -rm -rf bin
+
+all: kmc kmc_dump
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/kmc.git
More information about the debian-med-commit
mailing list