[med-svn] [bambamc] 01/02: Imported Upstream version 0.0.49
Andreas Tille
tille at debian.org
Sun Apr 13 06:42:29 UTC 2014
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository bambamc.
commit 43b63dc5d57ea088478c5ba95246ba8cf3351e14
Author: Andreas Tille <tille at debian.org>
Date: Sun Apr 13 07:56:58 2014 +0200
Imported Upstream version 0.0.49
---
.gitignore | 35 +
AUTHORS | 1 +
COPYING | 7 +
ChangeLog | 0
GPLv3 | 674 ++++++++++
INSTALL | 370 ++++++
Makefile.am | 7 +
NEWS | 0
README | 10 +
README.md | 4 +
bambamc.pc.in | 11 +
bumpversion.sh | 34 +
configure.in | 165 +++
m4/keepme.m4 | 0
src/Makefile.am | 46 +
src/bambamc/BamBam_BamAlignmentPut.c | 351 +++++
src/bambamc/BamBam_BamAlignmentPut.h | 65 +
src/bambamc/BamBam_BamCollationHash.c | 57 +
src/bambamc/BamBam_BamCollationHash.h | 35 +
src/bambamc/BamBam_BamCollationHashEntry.c | 128 ++
src/bambamc/BamBam_BamCollationHashEntry.h | 44 +
src/bambamc/BamBam_BamCollationOutputVector.c | 84 ++
src/bambamc/BamBam_BamCollationOutputVector.h | 40 +
src/bambamc/BamBam_BamCollationTempFileGenerator.c | 164 +++
src/bambamc/BamBam_BamCollationTempFileGenerator.h | 39 +
src/bambamc/BamBam_BamCollationVector.c | 189 +++
src/bambamc/BamBam_BamCollationVector.h | 44 +
src/bambamc/BamBam_BamCollator.c | 906 +++++++++++++
src/bambamc/BamBam_BamCollator.h | 93 ++
src/bambamc/BamBam_BamCollatorInterface.c | 228 ++++
src/bambamc/BamBam_BamCollatorInterface.h | 29 +
src/bambamc/BamBam_BamFileDecoder.c | 80 ++
src/bambamc/BamBam_BamFileDecoder.h | 35 +
src/bambamc/BamBam_BamFileHeader.c | 981 ++++++++++++++
src/bambamc/BamBam_BamFileHeader.h | 58 +
src/bambamc/BamBam_BamFlagBase.h | 50 +
src/bambamc/BamBam_BamHeaderInfo.c | 181 +++
src/bambamc/BamBam_BamHeaderInfo.h | 42 +
src/bambamc/BamBam_BamSingleAlignment.c | 1382 ++++++++++++++++++++
src/bambamc/BamBam_BamSingleAlignment.h | 114 ++
src/bambamc/BamBam_BamWriter.c | 117 ++
src/bambamc/BamBam_BamWriter.h | 64 +
src/bambamc/BamBam_BgzfCompressor.c | 325 +++++
src/bambamc/BamBam_BgzfCompressor.h | 72 +
src/bambamc/BamBam_CharBuffer.c | 112 ++
src/bambamc/BamBam_CharBuffer.h | 105 ++
src/bambamc/BamBam_Chromosome.c | 55 +
src/bambamc/BamBam_Chromosome.h | 35 +
src/bambamc/BamBam_CollatorState.h | 30 +
src/bambamc/BamBam_Config.h.in | 24 +
src/bambamc/BamBam_FastQRead.h | 33 +
src/bambamc/BamBam_FormatAlignment.c | 166 +++
src/bambamc/BamBam_FormatAlignment.h | 29 +
src/bambamc/BamBam_FormatNumber.c | 140 ++
src/bambamc/BamBam_FormatNumber.h | 31 +
src/bambamc/BamBam_GzipFileDecoder.c | 70 +
src/bambamc/BamBam_GzipFileDecoder.h | 34 +
src/bambamc/BamBam_GzipReader.c | 141 ++
src/bambamc/BamBam_GzipReader.h | 39 +
src/bambamc/BamBam_GzipWriter.c | 145 ++
src/bambamc/BamBam_GzipWriter.h | 38 +
src/bambamc/BamBam_Hash.c | 105 ++
src/bambamc/BamBam_Hash.h | 27 +
src/bambamc/BamBam_LineBuffer.c | 212 +++
src/bambamc/BamBam_LineBuffer.h | 44 +
src/bambamc/BamBam_LineParsing.c | 136 ++
src/bambamc/BamBam_LineParsing.h | 33 +
src/bambamc/BamBam_List.c | 92 ++
src/bambamc/BamBam_List.h | 39 +
src/bambamc/BamBam_ListNode.c | 97 ++
src/bambamc/BamBam_ListNode.h | 44 +
src/bambamc/BamBam_MergeHeapEntry.h | 30 +
src/bambamc/BamBam_SamBamFileDecoder.c | 91 ++
src/bambamc/BamBam_SamBamFileDecoder.h | 35 +
src/bambamc/BamBam_SamFileDecoder.c | 183 +++
src/bambamc/BamBam_SamFileDecoder.h | 44 +
src/bambamc/BamBam_SamInfo.c | 614 +++++++++
src/bambamc/BamBam_SamInfo.h | 76 ++
src/bambamc/BamBam_StrDup.c | 51 +
src/bambamc/BamBam_StrDup.h | 24 +
src/bambamc/BamBam_Unused.h | 29 +
src/test/bamreadertest.c | 145 ++
src/test/bamwritertest.c | 85 ++
ubuntu.sh.in | 5 +
84 files changed, 10829 insertions(+)
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b06d403
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,35 @@
+*.o
+*~
+INSTALL
+Makefile.in
+aclocal.m4
+autom4te.cache
+config.guess
+config.h.in
+config.sub
+configure
+depcomp
+doconfig.sh
+doconfig32.sh
+install-sh
+ltmain.sh
+missing
+recomp.sh
+src/Makefile.in
+Makefile
+compile
+config.h
+config.log
+config.status
+doconfig48.sh
+doconfig49.sh
+libtool
+m4/libtool.m4
+m4/ltoptions.m4
+m4/ltsugar.m4
+m4/ltversion.m4
+m4/lt~obsolete.m4
+maccombined.sh
+src/.deps/
+stamp-h1
+
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..c11387b
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1 @@
+Please see the COPYING file.
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..1a8ce11
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,7 @@
+The hashing code in the source file src/bambamc/BamBam_Hash.c is by Bob
+Jenkins. This code is in the public domain (please see the corresponding
+remarks in src/bambamc/BamBam_Hash.c).
+
+The rest of the code is by German Tischler and distributed under version 3 of
+the GNU General Public License (see file GPLv3, which is part of the distribution).
+The single source files denote the respective copyright ownerships.
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 0000000..e69de29
diff --git a/GPLv3 b/GPLv3
new file mode 100644
index 0000000..94a9ed0
--- /dev/null
+++ b/GPLv3
@@ -0,0 +1,674 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ <program> Copyright (C) <year> <name of author>
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 0000000..007e939
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,370 @@
+Installation Instructions
+*************************
+
+Copyright (C) 1994-1996, 1999-2002, 2004-2013 Free Software Foundation,
+Inc.
+
+ Copying and distribution of this file, with or without modification,
+are permitted in any medium without royalty provided the copyright
+notice and this notice are preserved. This file is offered as-is,
+without warranty of any kind.
+
+Basic Installation
+==================
+
+ Briefly, the shell commands `./configure; make; make install' should
+configure, build, and install this package. The following
+more-detailed instructions are generic; see the `README' file for
+instructions specific to this package. Some packages provide this
+`INSTALL' file but do not implement all of the features documented
+below. The lack of an optional feature in a given package is not
+necessarily a bug. More recommendations for GNU packages can be found
+in *note Makefile Conventions: (standards)Makefile Conventions.
+
+ The `configure' shell script attempts to guess correct values for
+various system-dependent variables used during compilation. It uses
+those values to create a `Makefile' in each directory of the package.
+It may also create one or more `.h' files containing system-dependent
+definitions. Finally, it creates a shell script `config.status' that
+you can run in the future to recreate the current configuration, and a
+file `config.log' containing compiler output (useful mainly for
+debugging `configure').
+
+ It can also use an optional file (typically called `config.cache'
+and enabled with `--cache-file=config.cache' or simply `-C') that saves
+the results of its tests to speed up reconfiguring. Caching is
+disabled by default to prevent problems with accidental use of stale
+cache files.
+
+ If you need to do unusual things to compile the package, please try
+to figure out how `configure' could check whether to do them, and mail
+diffs or instructions to the address given in the `README' so they can
+be considered for the next release. If you are using the cache, and at
+some point `config.cache' contains results you don't want to keep, you
+may remove or edit it.
+
+ The file `configure.ac' (or `configure.in') is used to create
+`configure' by a program called `autoconf'. You need `configure.ac' if
+you want to change it or regenerate `configure' using a newer version
+of `autoconf'.
+
+ The simplest way to compile this package is:
+
+ 1. `cd' to the directory containing the package's source code and type
+ `./configure' to configure the package for your system.
+
+ Running `configure' might take a while. While running, it prints
+ some messages telling which features it is checking for.
+
+ 2. Type `make' to compile the package.
+
+ 3. Optionally, type `make check' to run any self-tests that come with
+ the package, generally using the just-built uninstalled binaries.
+
+ 4. Type `make install' to install the programs and any data files and
+ documentation. When installing into a prefix owned by root, it is
+ recommended that the package be configured and built as a regular
+ user, and only the `make install' phase executed with root
+ privileges.
+
+ 5. Optionally, type `make installcheck' to repeat any self-tests, but
+ this time using the binaries in their final installed location.
+ This target does not install anything. Running this target as a
+ regular user, particularly if the prior `make install' required
+ root privileges, verifies that the installation completed
+ correctly.
+
+ 6. You can remove the program binaries and object files from the
+ source code directory by typing `make clean'. To also remove the
+ files that `configure' created (so you can compile the package for
+ a different kind of computer), type `make distclean'. There is
+ also a `make maintainer-clean' target, but that is intended mainly
+ for the package's developers. If you use it, you may have to get
+ all sorts of other programs in order to regenerate files that came
+ with the distribution.
+
+ 7. Often, you can also type `make uninstall' to remove the installed
+ files again. In practice, not all packages have tested that
+ uninstallation works correctly, even though it is required by the
+ GNU Coding Standards.
+
+ 8. Some packages, particularly those that use Automake, provide `make
+ distcheck', which can by used by developers to test that all other
+ targets like `make install' and `make uninstall' work correctly.
+ This target is generally not run by end users.
+
+Compilers and Options
+=====================
+
+ Some systems require unusual options for compilation or linking that
+the `configure' script does not know about. Run `./configure --help'
+for details on some of the pertinent environment variables.
+
+ You can give `configure' initial values for configuration parameters
+by setting variables in the command line or in the environment. Here
+is an example:
+
+ ./configure CC=c99 CFLAGS=-g LIBS=-lposix
+
+ *Note Defining Variables::, for more details.
+
+Compiling For Multiple Architectures
+====================================
+
+ You can compile the package for more than one kind of computer at the
+same time, by placing the object files for each architecture in their
+own directory. To do this, you can use GNU `make'. `cd' to the
+directory where you want the object files and executables to go and run
+the `configure' script. `configure' automatically checks for the
+source code in the directory that `configure' is in and in `..'. This
+is known as a "VPATH" build.
+
+ With a non-GNU `make', it is safer to compile the package for one
+architecture at a time in the source code directory. After you have
+installed the package for one architecture, use `make distclean' before
+reconfiguring for another architecture.
+
+ On MacOS X 10.5 and later systems, you can create libraries and
+executables that work on multiple system types--known as "fat" or
+"universal" binaries--by specifying multiple `-arch' options to the
+compiler but only a single `-arch' option to the preprocessor. Like
+this:
+
+ ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
+ CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
+ CPP="gcc -E" CXXCPP="g++ -E"
+
+ This is not guaranteed to produce working output in all cases, you
+may have to build one architecture at a time and combine the results
+using the `lipo' tool if you have problems.
+
+Installation Names
+==================
+
+ By default, `make install' installs the package's commands under
+`/usr/local/bin', include files under `/usr/local/include', etc. You
+can specify an installation prefix other than `/usr/local' by giving
+`configure' the option `--prefix=PREFIX', where PREFIX must be an
+absolute file name.
+
+ You can specify separate installation prefixes for
+architecture-specific files and architecture-independent files. If you
+pass the option `--exec-prefix=PREFIX' to `configure', the package uses
+PREFIX as the prefix for installing programs and libraries.
+Documentation and other data files still use the regular prefix.
+
+ In addition, if you use an unusual directory layout you can give
+options like `--bindir=DIR' to specify different values for particular
+kinds of files. Run `configure --help' for a list of the directories
+you can set and what kinds of files go in them. In general, the
+default for these options is expressed in terms of `${prefix}', so that
+specifying just `--prefix' will affect all of the other directory
+specifications that were not explicitly provided.
+
+ The most portable way to affect installation locations is to pass the
+correct locations to `configure'; however, many packages provide one or
+both of the following shortcuts of passing variable assignments to the
+`make install' command line to change installation locations without
+having to reconfigure or recompile.
+
+ The first method involves providing an override variable for each
+affected directory. For example, `make install
+prefix=/alternate/directory' will choose an alternate location for all
+directory configuration variables that were expressed in terms of
+`${prefix}'. Any directories that were specified during `configure',
+but not in terms of `${prefix}', must each be overridden at install
+time for the entire installation to be relocated. The approach of
+makefile variable overrides for each directory variable is required by
+the GNU Coding Standards, and ideally causes no recompilation.
+However, some platforms have known limitations with the semantics of
+shared libraries that end up requiring recompilation when using this
+method, particularly noticeable in packages that use GNU Libtool.
+
+ The second method involves providing the `DESTDIR' variable. For
+example, `make install DESTDIR=/alternate/directory' will prepend
+`/alternate/directory' before all installation names. The approach of
+`DESTDIR' overrides is not required by the GNU Coding Standards, and
+does not work on platforms that have drive letters. On the other hand,
+it does better at avoiding recompilation issues, and works well even
+when some directory options were not specified in terms of `${prefix}'
+at `configure' time.
+
+Optional Features
+=================
+
+ If the package supports it, you can cause programs to be installed
+with an extra prefix or suffix on their names by giving `configure' the
+option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
+
+ Some packages pay attention to `--enable-FEATURE' options to
+`configure', where FEATURE indicates an optional part of the package.
+They may also pay attention to `--with-PACKAGE' options, where PACKAGE
+is something like `gnu-as' or `x' (for the X Window System). The
+`README' should mention any `--enable-' and `--with-' options that the
+package recognizes.
+
+ For packages that use the X Window System, `configure' can usually
+find the X include and library files automatically, but if it doesn't,
+you can use the `configure' options `--x-includes=DIR' and
+`--x-libraries=DIR' to specify their locations.
+
+ Some packages offer the ability to configure how verbose the
+execution of `make' will be. For these packages, running `./configure
+--enable-silent-rules' sets the default to minimal output, which can be
+overridden with `make V=1'; while running `./configure
+--disable-silent-rules' sets the default to verbose, which can be
+overridden with `make V=0'.
+
+Particular systems
+==================
+
+ On HP-UX, the default C compiler is not ANSI C compatible. If GNU
+CC is not installed, it is recommended to use the following options in
+order to use an ANSI C compiler:
+
+ ./configure CC="cc -Ae -D_XOPEN_SOURCE=500"
+
+and if that doesn't work, install pre-built binaries of GCC for HP-UX.
+
+ HP-UX `make' updates targets which have the same time stamps as
+their prerequisites, which makes it generally unusable when shipped
+generated files such as `configure' are involved. Use GNU `make'
+instead.
+
+ On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
+parse its `<wchar.h>' header file. The option `-nodtk' can be used as
+a workaround. If GNU CC is not installed, it is therefore recommended
+to try
+
+ ./configure CC="cc"
+
+and if that doesn't work, try
+
+ ./configure CC="cc -nodtk"
+
+ On Solaris, don't put `/usr/ucb' early in your `PATH'. This
+directory contains several dysfunctional programs; working variants of
+these programs are available in `/usr/bin'. So, if you need `/usr/ucb'
+in your `PATH', put it _after_ `/usr/bin'.
+
+ On Haiku, software installed for all users goes in `/boot/common',
+not `/usr/local'. It is recommended to use the following options:
+
+ ./configure --prefix=/boot/common
+
+Specifying the System Type
+==========================
+
+ There may be some features `configure' cannot figure out
+automatically, but needs to determine by the type of machine the package
+will run on. Usually, assuming the package is built to be run on the
+_same_ architectures, `configure' can figure that out, but if it prints
+a message saying it cannot guess the machine type, give it the
+`--build=TYPE' option. TYPE can either be a short name for the system
+type, such as `sun4', or a canonical name which has the form:
+
+ CPU-COMPANY-SYSTEM
+
+where SYSTEM can have one of these forms:
+
+ OS
+ KERNEL-OS
+
+ See the file `config.sub' for the possible values of each field. If
+`config.sub' isn't included in this package, then this package doesn't
+need to know the machine type.
+
+ If you are _building_ compiler tools for cross-compiling, you should
+use the option `--target=TYPE' to select the type of system they will
+produce code for.
+
+ If you want to _use_ a cross compiler, that generates code for a
+platform different from the build platform, you should specify the
+"host" platform (i.e., that on which the generated programs will
+eventually be run) with `--host=TYPE'.
+
+Sharing Defaults
+================
+
+ If you want to set default values for `configure' scripts to share,
+you can create a site shell script called `config.site' that gives
+default values for variables like `CC', `cache_file', and `prefix'.
+`configure' looks for `PREFIX/share/config.site' if it exists, then
+`PREFIX/etc/config.site' if it exists. Or, you can set the
+`CONFIG_SITE' environment variable to the location of the site script.
+A warning: not all `configure' scripts look for a site script.
+
+Defining Variables
+==================
+
+ Variables not defined in a site shell script can be set in the
+environment passed to `configure'. However, some packages may run
+configure again during the build, and the customized values of these
+variables may be lost. In order to avoid this problem, you should set
+them in the `configure' command line, using `VAR=value'. For example:
+
+ ./configure CC=/usr/local2/bin/gcc
+
+causes the specified `gcc' to be used as the C compiler (unless it is
+overridden in the site shell script).
+
+Unfortunately, this technique does not work for `CONFIG_SHELL' due to
+an Autoconf limitation. Until the limitation is lifted, you can use
+this workaround:
+
+ CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash
+
+`configure' Invocation
+======================
+
+ `configure' recognizes the following options to control how it
+operates.
+
+`--help'
+`-h'
+ Print a summary of all of the options to `configure', and exit.
+
+`--help=short'
+`--help=recursive'
+ Print a summary of the options unique to this package's
+ `configure', and exit. The `short' variant lists options used
+ only in the top level, while the `recursive' variant lists options
+ also present in any nested packages.
+
+`--version'
+`-V'
+ Print the version of Autoconf used to generate the `configure'
+ script, and exit.
+
+`--cache-file=FILE'
+ Enable the cache: use and save the results of the tests in FILE,
+ traditionally `config.cache'. FILE defaults to `/dev/null' to
+ disable caching.
+
+`--config-cache'
+`-C'
+ Alias for `--cache-file=config.cache'.
+
+`--quiet'
+`--silent'
+`-q'
+ Do not print messages saying which checks are being made. To
+ suppress all normal output, redirect it to `/dev/null' (any error
+ messages will still be shown).
+
+`--srcdir=DIR'
+ Look for the package's source code in directory DIR. Usually
+ `configure' can determine that directory automatically.
+
+`--prefix=DIR'
+ Use DIR as the installation prefix. *note Installation Names::
+ for more details, including other options available for fine-tuning
+ the installation locations.
+
+`--no-create'
+`-n'
+ Run the configure checks, but stop before creating any output
+ files.
+
+`configure' also accepts some other, not widely useful, options. Run
+`configure --help' for more details.
diff --git a/Makefile.am b/Makefile.am
new file mode 100644
index 0000000..1b308fa
--- /dev/null
+++ b/Makefile.am
@@ -0,0 +1,7 @@
+EXTRA_DIST = configure GPLv3
+
+SUBDIRS = src
+ACLOCAL_AMFLAGS=-I m4
+
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = bambamc.pc
diff --git a/NEWS b/NEWS
new file mode 100644
index 0000000..e69de29
diff --git a/README b/README
new file mode 100644
index 0000000..80b10a0
--- /dev/null
+++ b/README
@@ -0,0 +1,10 @@
+This package contains mainly a lightwight C implementation of the read name
+collation code from the larger bambam C++ project (see
+ftp://ftp.sanger.ac.uk/pub/users/gt1).
+
+The gibhub repository does not contain the final configure script but only
+the source files passed to autoconf/automake. Please use
+
+autoreconf -i -f
+
+to obtain a set of working configuration scripts.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..d944858
--- /dev/null
+++ b/README.md
@@ -0,0 +1,4 @@
+bambamc
+=======
+
+lightweight C implementation of name collating BAM file input and BAM file output
\ No newline at end of file
diff --git a/bambamc.pc.in b/bambamc.pc.in
new file mode 100644
index 0000000..e47d3d7
--- /dev/null
+++ b/bambamc.pc.in
@@ -0,0 +1,11 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: bambamc
+Description: bambamc
+Requires: @ZLIBREQ@
+Version: @VERSION@
+Libs: -L${libdir} -lbambamc @ZLIBPKGLIBS@
+Cflags: -I${includedir}
diff --git a/bumpversion.sh b/bumpversion.sh
new file mode 100644
index 0000000..663df56
--- /dev/null
+++ b/bumpversion.sh
@@ -0,0 +1,34 @@
+#! /bin/bash
+VERSION=`grep AC_INIT < configure.in | awk -F',' '{print $2}'`
+FIRST=`echo $VERSION | awk -F'.' '{print $1}'`
+SECOND=`echo $VERSION | awk -F'.' '{print $2}'`
+THIRD=`echo $VERSION | awk -F'.' '{print $3}'`
+NEXTTHIRD=`expr ${THIRD} + 1`
+
+awk -v first=${FIRST} -v second=${SECOND} -v third=${THIRD} '/^AC_INIT/ {gsub(first"."second"."third,first"."second"."third+1);print} ; !/^AC_INIT/{print}' < configure.in | \
+ awk -v first=${FIRST} -v second=${SECOND} -v third=${THIRD} '/^LIBRARY_VERSION=/ {gsub("="first"."third"."second,"="first":"third+1":"second);print} ; !/^LIBRARY_VERSION=/{print}' \
+ > configure.in.tmp
+mv configure.in.tmp configure.in
+
+pushd ../bambamc-debian
+git checkout master
+git pull
+pushd debian
+export DEBEMAIL=gt1 at sanger.ac.uk
+export DEBFULLNAME="German Tischler"
+dch --distribution unstable -v ${FIRST}.${SECOND}.${NEXTTHIRD}
+dch --release
+# dch --release -v ${FIRST}.${SECOND}.${NEXTTHIRD}-1
+popd
+git add debian/changelog
+git commit
+git push
+popd
+
+git add configure.in
+git commit
+git push
+
+TAG=bambamc_${FIRST}_${SECOND}_${NEXTTHIRD}
+git tag -a ${TAG} -m "bambamc version ${FIRST}_${SECOND}_${NEXTTHIRD}"
+git push origin ${TAG}
diff --git a/configure.in b/configure.in
new file mode 100644
index 0000000..5c06857
--- /dev/null
+++ b/configure.in
@@ -0,0 +1,165 @@
+AC_INIT(bambamc,0.0.49,[gt1 at sanger.ac.uk],[bambamc],[http://www.sanger.ac.uk])
+LIBRARY_VERSION=0:49:0
+AC_CANONICAL_SYSTEM
+AC_PROG_LIBTOOL
+
+AM_CONFIG_HEADER(config.h)
+AM_INIT_AUTOMAKE
+AC_CONFIG_MACRO_DIR([m4])
+
+# check for c99 compiler
+CFLAGS_SAVE="${CFLAGS}"
+CPPFLAGS_SAVE="${CPPFLAGS}"
+CFLAGS=""
+CPPFLAGS=""
+AC_PROG_CC_C99
+C99CFLAGS="${CFLAGS}"
+C99CPPFLAGS="${CPPFLAGS}"
+CFLAGS="${CFLAGS_SAVE}"
+CPPFLAGS="${CPPFLAGS_SAVE}"
+
+# AC_MSG_NOTICE([C99: :${C99CFLAGS}: :${C99CPPFLAGS}:])
+
+AM_PROG_CC_C_O
+
+AC_LANG_ASSERT([C])
+
+AC_HEADER_STDC
+
+PKG_PROG_PKG_CONFIG
+PKG_CHECK_MODULES([zlib],[zlib >= 0],[zlibpkg=yes],[zlibpkg=no])
+
+if [[ "${zlibpkg}" = "yes" ]] ; then
+ PKG_CHECK_MODULES([zlib],[zlib >= 0])
+
+ LIBSSAVE="${LIBS}"
+ LIBS="${LIBS} ${zlib_LIBS}"
+ CPPFLAGSSAVE="${CPPFLAGS}"
+ CPPFLAGS="${CPPFLAGS} ${zlib_CFLAGS}"
+ AC_MSG_CHECKING([whether we can compile a zlib program])
+ AC_TRY_LINK([#include <zlib.h>
+#include <string.h>
+],[
+ z_stream strm;
+ memset ( &strm , 0, sizeof(z_stream) );
+ strm.zalloc = Z_NULL;
+ strm.zfree = Z_NULL;
+ strm.opaque = Z_NULL;
+ deflateInit(&strm,Z_DEFAULT_COMPRESSION);
+ ],[zlib=yes],[zlib=no])
+ AC_MSG_RESULT([${zlib}])
+ LIBS="${LIBSSAVE}"
+ CPPFLAGS="${CPPFLAGSSAVE}"
+
+ if [[ "$zlib" = "no" ]] ; then
+ AC_MSG_ERROR([Required library zlib not found.]);
+ else
+ ZLIBREQ=zlib
+ ZLIBCPPFLAGS="${zlib_CFLAGS}"
+ ZLIBLIBS="${zlib_LIBS}"
+ ZLIBPKGLBIS=
+ fi
+else
+ AC_MSG_WARN([zlib pkgconfig file is not installed. Trying if -lz for LIBS is enough.])
+
+ LIBSSAVE="${LIBS}"
+ LIBS="${LIBS} -lz"
+ AC_MSG_CHECKING([whether we can compile a zlib program])
+ AC_TRY_LINK([#include <zlib.h>
+#include <string.h>
+],[
+ z_stream strm;
+ memset ( &strm , 0, sizeof(z_stream) );
+ strm.zalloc = Z_NULL;
+ strm.zfree = Z_NULL;
+ strm.opaque = Z_NULL;
+ deflateInit(&strm,Z_DEFAULT_COMPRESSION);
+ ],[zlib=yes],[zlib=no])
+ AC_MSG_RESULT([${zlib}])
+ LIBS="${LIBSSAVE}"
+
+ if [[ "$zlib" = "no" ]] ; then
+ AC_MSG_ERROR([Required library zlib not found.]);
+ else
+ ZLIBREQ=
+ ZLIBCPPFLAGS=
+ ZLIBLIBS="-lz"
+ ZLIBPKGLIBS="-lz"
+ fi
+fi
+
+if test "${CC}" = "gcc" ; then
+ CPPFLAGS="${CPPFLAGS} -W -Wall"
+fi
+if test "${CC}" = "cl.exe" ; then
+ CPPFLAGS="${CPPFLAGS} -W1" # -Wall
+fi
+
+AC_ARG_ENABLE(optimization,
+ AS_HELP_STRING([--enable-optimization],[use compiler optimization (default yes)]),
+ [optimization=${enableval}],[optimization=yes])
+AC_ARG_ENABLE(debug,
+ AS_HELP_STRING([--enable-debug],[use compiler debug flags (default no)]),
+ [debug=${enableval}],[debug=no])
+AC_ARG_ENABLE(profile,
+ AS_HELP_STRING([--enable-profile],[use compiler profiling flags (default no)]),
+ [profile=${enableval}],[profile=no])
+AC_ARG_ENABLE(fast,
+ AS_HELP_STRING([--enable-fast],[disable evaluation of assertions (default no)]),
+ [fast=${enableval}],[fast=no])
+
+if test "${fast}" = "yes" ; then
+ CPPFLAGS="${CPPFLAGS} -DNDEBUG"
+fi
+
+if test "${debug}" = "yes" ; then
+ CFLAGS="${CFLAGS} -g -O0 -rdynamic"
+else
+ if test "${profile}" = "yes" ; then
+ CFLAGS="${CFLAGS} -g -pg -rdynamic"
+ else
+ if test "${optimization}" = "yes" ; then
+ case ${CC} in
+ gcc)
+ CFLAGS="${CFLAGS} -O3 -rdynamic"
+ ;;
+ *-mingw32msvc-gcc)
+ CFLAGS="${CFLAGS} -O3 -rdynamic"
+ ;;
+ cl.exe)
+ CFLAGS="${CFLAGS} -O2 -Ob2 -Ot -Oy"
+ ;;
+ esac
+ fi
+
+ fi
+fi
+
+AC_CHECK_FUNC([_mkdir],[have_mkdir=yes],[have_mkdir=no])
+AC_CHECK_HEADER([direct.h],[have_direct_h=yes],[have_direct_h=no])
+
+if test "${have_mkdir}" = "yes" -a "${have_direct_h}" = "yes" ; then
+
+AC_MSG_CHECKING([whether we can compile a program using _mkdir])
+ AC_TRY_LINK([#include <direct.h>],[_mkdir("tmpdir");],[have_mkdir=yes],[have_mkdir=no])
+ AC_MSG_RESULT([${have_mkdir}])
+fi
+
+if test "${have_mkdir}" = "yes" ; then
+ HAVEWINDOWS_MKDIR="#define HAVEWINDOWS_MKDIR"
+else
+ HAVEWINDOWS_MKDIR=
+fi
+
+PACKAGE_NAME=${PACKAGE}
+PACKAGE_VERSION=${VERSION}
+
+AC_SUBST([PACKAGE_NAME])
+AC_SUBST([PACKAGE_VERSION])
+AC_SUBST([ZLIBCPPFLAGS])
+AC_SUBST([ZLIBLIBS])
+AC_SUBST([ZLIBPKGLIBS])
+AC_SUBST([ZLIBREQ])
+AC_SUBST([LIBRARY_VERSION])
+AC_SUBST([HAVEWINDOWS_MKDIR])
+AC_OUTPUT(Makefile src/Makefile ubuntu.sh bambamc.pc src/bambamc/BamBam_Config.h)
diff --git a/m4/keepme.m4 b/m4/keepme.m4
new file mode 100644
index 0000000..e69de29
diff --git a/src/Makefile.am b/src/Makefile.am
new file mode 100644
index 0000000..5a13403
--- /dev/null
+++ b/src/Makefile.am
@@ -0,0 +1,46 @@
+ACLOCAL_AMFLAGS=-I m4
+
+lib_LTLIBRARIES = libbambamc.la
+noinst_PROGRAMS = bamreadertest bamwritertest
+
+libbambamc_la_SOURCES = \
+ bambamc/BamBam_BamCollationHash.c bambamc/BamBam_BamCollatorInterface.c bambamc/BamBam_FormatAlignment.c \
+ bambamc/BamBam_BamCollationHashEntry.c bambamc/BamBam_FormatNumber.c \
+ bambamc/BamBam_BamCollationOutputVector.c bambamc/BamBam_BamHeaderInfo.c bambamc/BamBam_Hash.c \
+ bambamc/BamBam_BamCollationTempFileGenerator.c bambamc/BamBam_BamWriter.c bambamc/BamBam_List.c \
+ bambamc/BamBam_BamCollationVector.c bambamc/BamBam_CharBuffer.c bambamc/BamBam_ListNode.c \
+ bambamc/BamBam_BamCollator.c bambamc/BamBam_Chromosome.c \
+ bambamc/BamBam_GzipReader.c bambamc/BamBam_LineParsing.c bambamc/BamBam_BamFileHeader.c \
+ bambamc/BamBam_BamAlignmentPut.c bambamc/BamBam_BamSingleAlignment.c \
+ bambamc/BamBam_BamFileDecoder.c \
+ bambamc/BamBam_GzipWriter.c bambamc/BamBam_GzipFileDecoder.c \
+ bambamc/BamBam_BgzfCompressor.c bambamc/BamBam_LineBuffer.c \
+ bambamc/BamBam_SamInfo.c bambamc/BamBam_SamFileDecoder.c \
+ bambamc/BamBam_SamBamFileDecoder.c bambamc/BamBam_StrDup.c
+libbambamc_includedir=$(includedir)/bambamc
+libbambamc_include_HEADERS= \
+ bambamc/BamBam_BamAlignmentPut.h bambamc/BamBam_BamFlagBase.h bambamc/BamBam_FormatAlignment.h \
+ bambamc/BamBam_BamCollationHashEntry.h bambamc/BamBam_FormatNumber.h \
+ bambamc/BamBam_BamCollationHash.h bambamc/BamBam_BamHeaderInfo.h bambamc/BamBam_GzipReader.h \
+ bambamc/BamBam_BamCollationOutputVector.h bambamc/BamBam_BamSingleAlignment.h bambamc/BamBam_Hash.h \
+ bambamc/BamBam_BamCollationTempFileGenerator.h bambamc/BamBam_LineParsing.h \
+ bambamc/BamBam_BamCollationVector.h bambamc/BamBam_BamWriter.h bambamc/BamBam_List.h \
+ bambamc/BamBam_BamCollator.h bambamc/BamBam_CharBuffer.h bambamc/BamBam_ListNode.h \
+ bambamc/BamBam_BamCollatorInterface.h bambamc/BamBam_Chromosome.h bambamc/BamBam_MergeHeapEntry.h \
+ bambamc/BamBam_BamFileDecoder.h bambamc/BamBam_CollatorState.h bambamc/BamBam_Unused.h \
+ bambamc/BamBam_BamFileHeader.h bambamc/BamBam_FastQRead.h bambamc/BamBam_Config.h \
+ bambamc/BamBam_GzipWriter.h bambamc/BamBam_GzipFileDecoder.h \
+ bambamc/BamBam_BgzfCompressor.h bambamc/BamBam_LineBuffer.h \
+ bambamc/BamBam_SamInfo.h bambamc/BamBam_SamFileDecoder.h \
+ bambamc/BamBam_SamBamFileDecoder.h bambamc/BamBam_StrDup.h
+libbambamc_la_CPPFLAGS=${AM_CPPFLAGS}
+libbambamc_la_LDFLAGS=${ZLIBLDFLAGS} -version-info ${LIBRARY_VERSION}
+libbambamc_la_LIBADD=${ZLIBLIBS}
+
+bamreadertest_SOURCES = test/bamreadertest.c
+bamreadertest_CPPFLAGS = ${AM_CPPFLAGS}
+bamreadertest_LDADD = libbambamc.la ${ZLIBLIBS}
+
+bamwritertest_SOURCES = test/bamwritertest.c
+bamwritertest_CPPFLAGS = ${AM_CPPFLAGS}
+bamwritertest_LDADD = libbambamc.la ${ZLIBLIBS}
diff --git a/src/bambamc/BamBam_BamAlignmentPut.c b/src/bambamc/BamBam_BamAlignmentPut.c
new file mode 100644
index 0000000..f4ebca9
--- /dev/null
+++ b/src/bambamc/BamBam_BamAlignmentPut.c
@@ -0,0 +1,351 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#include <bambamc/BamBam_BamAlignmentPut.h>
+#include <bambamc/BamBam_BamFlagBase.h>
+
+BamBam_AlignmentPut * BamBam_AlignmentPut_Delete(BamBam_AlignmentPut * aput)
+{
+ if ( aput )
+ {
+ if ( aput->calignment )
+ {
+ BamBam_BamSingleAlignment_Delete(aput->calignment);
+ aput->calignment = 0;
+ }
+ BamBam_CharBuffer_Delete(aput->charbuffer); aput->charbuffer = 0;
+ free(aput);
+ }
+
+ return 0;
+}
+
+BamBam_AlignmentPut * BamBam_AlignmentPut_New()
+{
+ BamBam_AlignmentPut * aput = 0;
+
+ aput = (BamBam_AlignmentPut *)malloc(sizeof(BamBam_AlignmentPut));
+
+ if ( ! aput )
+ BamBam_AlignmentPut_Delete(aput);
+
+ aput->charbuffer = BamBam_CharBuffer_New();
+
+ if ( ! aput->charbuffer )
+ return BamBam_AlignmentPut_Delete(aput);
+
+ aput->calignment = BamBam_BamSingleAlignment_New();
+
+ if ( ! aput->calignment )
+ return BamBam_AlignmentPut_Delete(aput);
+
+ return aput;
+}
+
+/* reg2bin as defined in sam file format spec */
+static int reg2bin(uint32_t beg, uint32_t end)
+{
+ --end;
+ if (beg>>14 == end>>14) return ((1ul<<15)-1ul)/7ul + (beg>>14);
+ if (beg>>17 == end>>17) return ((1ul<<12)-1ul)/7ul + (beg>>17);
+ if (beg>>20 == end>>20) return ((1ul<<9)-1ul)/7ul + (beg>>20);
+ if (beg>>23 == end>>23) return ((1ul<<6)-1ul)/7ul + (beg>>23);
+ if (beg>>26 == end>>26) return ((1ul<<3)-1ul)/7ul + (beg>>26);
+ return 0;
+}
+
+int BamBam_CharBuffer_PutAlignmentC(
+ BamBam_AlignmentPut * aput,
+ /* flags */
+ int32_t const flags,
+ /* target (chromosome) id */
+ int32_t const tid,
+ /* position on chromosome (0 based) */
+ uint64_t const rpos,
+ /* mate target id */
+ int32_t const mtid,
+ /* position of mate on mate target id */
+ uint64_t const rmpos,
+ /* sequence name */
+ char const * name,
+ /* query sequence (read) */
+ char const * query,
+ /* quality string */
+ char const * qual,
+ /* cigar operations */
+ char const * cigar,
+ /* mapping quality */
+ int32_t const rqual,
+ /* insert size */
+ int32_t const isize
+ )
+{
+ /* buffer */
+ BamBam_CharBuffer * buffer = aput->charbuffer;
+ BamBam_BamSingleAlignment * alignment = aput->calignment;
+
+ int ret = 0;
+ uint64_t i = 0;
+ uint64_t const qlen = strlen(query);
+ uint64_t const quallen = qlen;
+ uint64_t const namelen = strlen(name);
+ uint64_t ncigar = 0;
+ uint64_t bin = 0;
+ int64_t endpos = rpos;
+ static char const cmap[5] = {1,2,4,8,15};
+
+ /*
+ * CharBuffer: buffer,buffersize,bufferfill
+ * BamBam_BamSingleAlignment: data,dataav,dataused
+ */
+ assert ( ! buffer->buffer );
+ buffer->buffer = alignment->data;
+ buffer->buffersize = alignment->dataav;
+ buffer->bufferfill = alignment->dataused;
+ alignment->data = 0;
+ alignment->dataav = 0;
+ alignment->dataused = 0;
+
+ BamBam_CharBuffer_Reset(buffer);
+
+ /* core data */
+ BamBam_CharBuffer_PushLE(buffer,(int32_t)tid,ret); if ( ret < 0 ) return ret;
+ BamBam_CharBuffer_PushLE(buffer,(int32_t)rpos,ret); if ( ret < 0 ) return ret;
+ /* bin_mq_nl, bin will be written later */
+ BamBam_CharBuffer_PushLE(buffer,(uint32_t)((rqual<<8)|(namelen+1)),ret); if ( ret < 0 ) return ret;
+ /* flag_nc, number of cigar operations will be written later */
+ BamBam_CharBuffer_PushLE(buffer,(uint32_t)(flags<<16),ret); if ( ret < 0 ) return ret;
+ /* length of query sequence */
+ BamBam_CharBuffer_PushLE(buffer,(int32_t)qlen,ret); if ( ret < 0 ) return ret;
+ BamBam_CharBuffer_PushLE(buffer,(int32_t)mtid,ret); if ( ret < 0 ) return ret;
+ BamBam_CharBuffer_PushLE(buffer,(int32_t)rmpos,ret); if ( ret < 0 ) return ret;
+ BamBam_CharBuffer_PushLE(buffer,(int32_t)isize,ret); if ( ret < 0 ) return ret;
+
+ /* copy read name */
+ BamBam_CharBuffer_PushString(buffer,name,ret); if ( ret < 0 ) return ret;
+ /* terminating zero */
+ BamBam_CharBuffer_PushCharQuick(buffer,0,ret); if ( ret < 0 ) return ret;
+
+ /* if cigar string is "*", then skip it */
+ if (
+ ret >= 0 &&
+ cigar[0] &&
+ (!cigar[1]) &&
+ (cigar[0] == '*')
+ )
+ {
+ ++cigar;
+ }
+ /* encode cigar string */
+ while ( ret >= 0 && *cigar )
+ {
+ uint64_t num = 0;
+ uint32_t op = 0;
+ uint32_t store = 0;
+
+ assert ( isdigit(cigar[0]) );
+ while ( (*cigar) && isdigit(*cigar) )
+ {
+ num *= 10;
+ num += (*(cigar++)) - '0';
+ }
+
+ assert ( *cigar );
+ switch ( *cigar )
+ {
+ case 'M':
+ op = BAMBAMC_CMATCH;
+ endpos += num;
+ break;
+ case 'I':
+ op = BAMBAMC_CINS;
+ break;
+ case 'D':
+ op = BAMBAMC_CDEL;
+ endpos += num;
+ break;
+ case 'N':
+ op = BAMBAMC_CREF_SKIP;
+ endpos += num;
+ break;
+ case 'S':
+ op = BAMBAMC_CSOFT_CLIP;
+ break;
+ case 'H':
+ op = BAMBAMC_CHARD_CLIP;
+ break;
+ case 'P':
+ op = BAMBAMC_CPAD;
+ endpos += num;
+ break;
+ case '=':
+ op = BAMBAMC_CEQUAL;
+ endpos += num;
+ break;
+ case 'X':
+ op = BAMBAMC_CDIFF;
+ endpos += num;
+ break;
+ default:
+ op = '*';
+ break;
+ }
+
+ cigar++;
+
+ /* fprintf(stderr,"op: %u cmatch: %u num: %llu\n", op, BAMBAMC_CMATCH, (unsigned long long)num); */
+
+ store = op | (num << 4);
+
+ BamBam_CharBuffer_PushLE(buffer,store,ret);
+
+ ++ncigar;
+ }
+
+ /* put number of cigar operations */
+ buffer->buffer[12] = ((ncigar>>0) & 0xFFu);
+ buffer->buffer[13] = ((ncigar>>8) & 0xFFu);
+
+ if ( ret < 0 )
+ return ret;
+
+ /* encode query string */
+ for ( i = 0; i < qlen/2 && ret >= 0; ++i )
+ {
+ BamBam_CharBuffer_PushCharQuick(
+ buffer,
+ (cmap[(int)(BamBam_MapBase((int)(query[2*i+0])))] << 4)
+ |
+ (cmap[(int)(BamBam_MapBase((int)(query[2*i+1])))] << 0),
+ ret);
+ }
+ if ( qlen % 2 && ret >= 0 )
+ BamBam_CharBuffer_PushCharQuick(buffer,((cmap[(int)(BamBam_MapBase((int)(query[qlen-1])))] << 4)),ret);
+
+ if ( ret < 0 )
+ return ret;
+
+ /* encode quality */
+ for ( i = 0; i < quallen && ret >= 0; ++i )
+ BamBam_CharBuffer_PushCharQuick(buffer,qual[i]-33,ret);
+
+ if ( ret < 0 )
+ return ret;
+
+ /* set bin */
+ bin = reg2bin(rpos, endpos);
+ buffer->buffer[10] = (bin>>0)&0xFFu;
+ buffer->buffer[11] = (bin>>8)&0xFFu;
+
+ /* swap buffers */
+ alignment->data = buffer->buffer;
+ alignment->dataav = buffer->buffersize;
+ alignment->dataused = buffer->bufferfill;
+ buffer->buffer = 0;
+ buffer->buffersize = 0;
+ buffer->bufferfill = 0;
+
+ return ret;
+}
+int BamBam_CharBuffer_PutAuxNumberC(BamBam_AlignmentPut * aput, char const * tag, char const type, void const * pvalue)
+{
+ /* buffer */
+ BamBam_CharBuffer * buffer = aput->charbuffer;
+ BamBam_BamSingleAlignment * alignment = aput->calignment;
+ int ret = 0;
+
+ /*
+ * swap buffers
+ */
+ assert ( ! buffer->buffer );
+ buffer->buffer = alignment->data;
+ buffer->buffersize = alignment->dataav;
+ buffer->bufferfill = alignment->dataused;
+ alignment->data = 0;
+ alignment->dataav = 0;
+ alignment->dataused = 0;
+
+ assert ( strlen(tag) == 2 );
+
+ /* put tag */
+ BamBam_CharBuffer_PushString(buffer,tag,ret); if ( ret < 0 ) return ret;
+ /* put type */
+ BamBam_CharBuffer_PushCharQuick(buffer,type,ret); if ( ret < 0 ) return ret;
+
+ switch ( type )
+ {
+ case 'A':
+ {
+ int8_t const value = *((int8_t*)pvalue);
+ BamBam_CharBuffer_PushLE(buffer,value,ret);
+ break;
+ }
+ case 'c':
+ {
+ int8_t const value = *((int8_t*)pvalue);
+ BamBam_CharBuffer_PushLE(buffer,value,ret);
+ break;
+ }
+ case 'C':
+ {
+ uint8_t const value = *((uint8_t*)pvalue);
+ BamBam_CharBuffer_PushLE(buffer,value,ret);
+ break;
+ }
+ case 's':
+ {
+ int16_t const value = *((int16_t*)pvalue);
+ BamBam_CharBuffer_PushLE(buffer,value,ret);
+ break;
+ }
+ case 'S':
+ {
+ uint16_t const value = *((uint16_t*)pvalue);
+ BamBam_CharBuffer_PushLE(buffer,value,ret);
+ break;
+ }
+ case 'i':
+ {
+ int32_t const value = *((int32_t*)pvalue);
+ BamBam_CharBuffer_PushLE(buffer,value,ret);
+ break;
+ }
+ case 'I':
+ {
+ uint32_t const value = *((uint32_t*)pvalue);
+ BamBam_CharBuffer_PushLE(buffer,value,ret);
+ break;
+ }
+ case 'f':
+ {
+ uint32_t const value = *((uint32_t*)pvalue);
+ BamBam_CharBuffer_PushLE(buffer,value,ret);
+ break;
+ }
+ }
+
+ /* swap buffers */
+ alignment->data = buffer->buffer;
+ alignment->dataav = buffer->buffersize;
+ alignment->dataused = buffer->bufferfill;
+ buffer->buffer = 0;
+ buffer->buffersize = 0;
+ buffer->bufferfill = 0;
+
+ return ret;
+}
diff --git a/src/bambamc/BamBam_BamAlignmentPut.h b/src/bambamc/BamBam_BamAlignmentPut.h
new file mode 100644
index 0000000..7640cd3
--- /dev/null
+++ b/src/bambamc/BamBam_BamAlignmentPut.h
@@ -0,0 +1,65 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#if ! defined(BAMBAM_BAMALIGNMENTPUT_H)
+#define BAMBAM_BAMALIGNMENTPUT_H
+
+#include <bambamc/BamBam_CharBuffer.h>
+#include <bambamc/BamBam_BamSingleAlignment.h>
+
+#include <assert.h>
+#include <ctype.h>
+
+#include <bambamc/BamBam_Config.h>
+
+typedef struct _BamBam_AlignmentPut
+{
+ BamBam_CharBuffer * charbuffer;
+ BamBam_BamSingleAlignment * calignment;
+} BamBam_AlignmentPut;
+
+extern BamBam_AlignmentPut * BamBam_AlignmentPut_New() BAMBAM_WARN_IF_UNUSEDRESULT;
+extern BamBam_AlignmentPut * BamBam_AlignmentPut_Delete(BamBam_AlignmentPut * aput);
+
+extern int BamBam_CharBuffer_PutAlignmentC(
+ BamBam_AlignmentPut * aput,
+ /* flags */
+ int32_t const flags,
+ /* target (chromosome) id */
+ int32_t const tid,
+ /* position on chromosome (0 based) */
+ uint64_t const rpos,
+ /* mate target id */
+ int32_t const mtid,
+ /* position of mate on mate target id */
+ uint64_t const rmpos,
+ /* sequence name */
+ char const * name,
+ /* query sequence (read) */
+ char const * query,
+ /* quality string */
+ char const * qual,
+ /* cigar operations */
+ char const * cigar,
+ /* mapping quality */
+ int32_t const rqual,
+ /* insert size */
+ int32_t const isize
+ ) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern int BamBam_CharBuffer_PutAuxNumberC(BamBam_AlignmentPut * aput, char const * tag, char const type, void const * rvalue) BAMBAM_WARN_IF_UNUSEDRESULT;
+#endif
diff --git a/src/bambamc/BamBam_BamCollationHash.c b/src/bambamc/BamBam_BamCollationHash.c
new file mode 100644
index 0000000..e5c0950
--- /dev/null
+++ b/src/bambamc/BamBam_BamCollationHash.c
@@ -0,0 +1,57 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#include <bambamc/BamBam_BamCollationHash.h>
+
+void BamBam_BamCollationHash_Delete(BamBam_BamCollationHash * hash)
+{
+ if ( hash )
+ {
+ free(hash->entries);
+ free(hash);
+ }
+}
+
+BamBam_BamCollationHash * BamBam_BamCollationHash_New(unsigned int sizelog)
+{
+ BamBam_BamCollationHash * hash = 0;
+ uint64_t i;
+
+ hash = (BamBam_BamCollationHash *)malloc(sizeof(BamBam_BamCollationHash));
+
+ if ( ! hash )
+ return 0;
+
+ hash->entries = 0;
+ hash->tablesize = 1ull << sizelog;
+ hash->tablemask = hash->tablesize-1;
+
+ hash->entries = (BamBam_BamCollationHashEntry **)malloc(hash->tablesize*(sizeof(BamBam_BamCollationHashEntry *)));
+
+ if ( ! hash->entries )
+ {
+ free(hash);
+ return 0;
+ }
+
+ for ( i = 0; i < hash->tablesize; ++i )
+ hash->entries[i] = 0;
+
+ return hash;
+}
diff --git a/src/bambamc/BamBam_BamCollationHash.h b/src/bambamc/BamBam_BamCollationHash.h
new file mode 100644
index 0000000..f09ed52
--- /dev/null
+++ b/src/bambamc/BamBam_BamCollationHash.h
@@ -0,0 +1,35 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#if ! defined(BAMBAM_BAMCOLLATIONHASH_H)
+#define BAMBAM_BAMCOLLATIONHASH_H
+
+#include <bambamc/BamBam_Unused.h>
+#include <bambamc/BamBam_BamCollationHashEntry.h>
+
+typedef struct _BamBam_BamCollationHash
+{
+ BamBam_BamCollationHashEntry ** entries;
+ uint64_t tablesize;
+ uint64_t tablemask;
+} BamBam_BamCollationHash;
+
+extern BamBam_BamCollationHash * BamBam_BamCollationHash_New(unsigned int sizelog) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern void BamBam_BamCollationHash_Delete(BamBam_BamCollationHash * hash);
+#endif
diff --git a/src/bambamc/BamBam_BamCollationHashEntry.c b/src/bambamc/BamBam_BamCollationHashEntry.c
new file mode 100644
index 0000000..da0b055
--- /dev/null
+++ b/src/bambamc/BamBam_BamCollationHashEntry.c
@@ -0,0 +1,128 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#include <bambamc/BamBam_BamCollationHashEntry.h>
+#include <bambamc/BamBam_BamFlagBase.h>
+#include <bambamc/BamBam_StrDup.h>
+
+int BamBam_BamHashEntry_Compare(BamBam_BamCollationHashEntry const * A, BamBam_BamCollationHashEntry const * B)
+{
+ int cmpres = strcmp(A->qname,B->qname);
+ int Amate1 = (BamBam_BamCollationHashEntry_GetFlags(A) & BAMBAMC_FREAD1) != 0;
+ int Bmate1 = (BamBam_BamCollationHashEntry_GetFlags(B) & BAMBAMC_FREAD1) != 0;
+
+ if ( cmpres != 0 )
+ return cmpres;
+ else if ( Amate1 < Bmate1 )
+ return -1;
+ else if ( Amate1 > Bmate1 )
+ return 1;
+ else
+ return 0;
+
+}
+
+int BamBam_BamHashEntry_CompareVerbose(BamBam_BamCollationHashEntry const * A, BamBam_BamCollationHashEntry const * B)
+{
+ int cmpres = strcmp(A->qname,B->qname);
+ int Amate1 = (BamBam_BamCollationHashEntry_GetFlags(A) & BAMBAMC_FREAD1) != 0;
+ int Bmate1 = (BamBam_BamCollationHashEntry_GetFlags(B) & BAMBAMC_FREAD1) != 0;
+
+ fprintf(stderr,"Comparing %s and %s result %d\n", A->qname, B->qname, cmpres);
+
+ if ( cmpres != 0 )
+ return cmpres;
+ else if ( Amate1 < Bmate1 )
+ return -1;
+ else if ( Amate1 > Bmate1 )
+ return 1;
+ else
+ return 0;
+
+}
+
+int BamBam_BamHashEntry_CompareVoidPtr(const void * VA, const void * VB)
+{
+ BamBam_BamCollationHashEntry const ** A = (BamBam_BamCollationHashEntry const **)VA;
+ BamBam_BamCollationHashEntry const ** B = (BamBam_BamCollationHashEntry const **)VB;
+ return BamBam_BamHashEntry_Compare(*A,*B);
+}
+
+void BamBam_BamCollationHashEntry_Delete(BamBam_BamCollationHashEntry * hashentry)
+{
+ if ( hashentry )
+ {
+ if ( hashentry->entry )
+ BamBam_BamSingleAlignment_Delete(hashentry->entry);
+ free(hashentry->qname);
+ free(hashentry);
+ }
+}
+
+uint32_t BamBam_BamCollationHashEntry_GetFlags(BamBam_BamCollationHashEntry const * hashentry)
+{
+ return BamBam_BamSingleAlignment_GetFlags(hashentry->entry);
+}
+
+BamBam_BamCollationHashEntry * BamBam_BamCollationHashEntry_NewDup(BamBam_BamSingleAlignment * alignment)
+{
+ BamBam_BamSingleAlignment * entry = 0;
+ BamBam_BamCollationHashEntry * hashentry = 0;
+ char const * qname = 0;
+
+ entry = BamBam_BamSingleAlignment_Clone(alignment);
+
+ if ( ! entry )
+ return 0;
+
+ hashentry = (BamBam_BamCollationHashEntry *)malloc(sizeof(BamBam_BamCollationHashEntry));
+
+ if ( ! hashentry )
+ {
+ BamBam_BamSingleAlignment_Delete(entry);
+ return 0;
+ }
+
+ hashentry->entry = entry;
+ hashentry->qnamelen = 0;
+ hashentry->qname = 0;
+
+ qname = BamBam_BamSingleAlignment_GetReadName(entry);
+
+ if ( ! qname )
+ {
+ BamBam_BamSingleAlignment_Delete(entry);
+ free(hashentry);
+ return 0;
+ }
+
+ hashentry->qname = BamBam_StrDup(qname);
+
+ if ( ! hashentry->qname )
+ {
+ BamBam_BamSingleAlignment_Delete(entry);
+ free(hashentry);
+ return 0;
+ }
+
+ hashentry->qnamelen = strlen(hashentry->qname);
+ hashentry->hashvalue = hashDefaultSeed((uint8_t const *)hashentry->qname,hashentry->qnamelen);
+
+ return hashentry;
+}
diff --git a/src/bambamc/BamBam_BamCollationHashEntry.h b/src/bambamc/BamBam_BamCollationHashEntry.h
new file mode 100644
index 0000000..cdd00da
--- /dev/null
+++ b/src/bambamc/BamBam_BamCollationHashEntry.h
@@ -0,0 +1,44 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#if ! defined(BAMBAM_BAMCOLLATIONHASHENTRY_H)
+#define BAMBAM_BAMCOLLATIONHASHENTRY_H
+
+#include <bambamc/BamBam_Unused.h>
+#include <bambamc/BamBam_BamCollationTempFileGenerator.h>
+#include <bambamc/BamBam_Hash.h>
+#include <bambamc/BamBam_BamSingleAlignment.h>
+
+#include <bambamc/BamBam_Config.h>
+
+typedef struct _BamBam_BamCollationHashEntry
+{
+ BamBam_BamSingleAlignment * entry;
+ char * qname;
+ unsigned int qnamelen;
+ uint32_t hashvalue;
+} BamBam_BamCollationHashEntry;
+
+extern int BamBam_BamHashEntry_Compare(BamBam_BamCollationHashEntry const * A, BamBam_BamCollationHashEntry const * B) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern int BamBam_BamHashEntry_CompareVerbose(BamBam_BamCollationHashEntry const * A, BamBam_BamCollationHashEntry const * B) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern int BamBam_BamHashEntry_CompareVoidPtr(const void * VA, const void * VB) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern void BamBam_BamCollationHashEntry_Delete(BamBam_BamCollationHashEntry * hashentry);
+extern uint32_t BamBam_BamCollationHashEntry_GetFlags(BamBam_BamCollationHashEntry const * hashentry) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern BamBam_BamCollationHashEntry * BamBam_BamCollationHashEntry_NewDup(BamBam_BamSingleAlignment * alignment) BAMBAM_WARN_IF_UNUSEDRESULT;
+#endif
diff --git a/src/bambamc/BamBam_BamCollationOutputVector.c b/src/bambamc/BamBam_BamCollationOutputVector.c
new file mode 100644
index 0000000..2aeb4cc
--- /dev/null
+++ b/src/bambamc/BamBam_BamCollationOutputVector.c
@@ -0,0 +1,84 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#include <bambamc/BamBam_BamCollationOutputVector.h>
+#include <assert.h>
+
+void BamBam_BamCollationOutputVector_PushFront(BamBam_BamCollationOutputVector * outputvector, BamBam_BamCollationHashEntry * entry)
+{
+ assert (outputvector->outputvectorfill < outputvector->outputvectorsize );
+ if ( outputvector->outputvectorlow == 0 )
+ {
+ outputvector->outputvectorlow += outputvector->outputvectorsize;
+ outputvector->outputvectorhigh += outputvector->outputvectorsize;
+ }
+ outputvector->outputvector[(--outputvector->outputvectorlow) % outputvector->outputvectorsize] =
+ entry;
+ outputvector->outputvectorfill++;
+}
+
+void BamBam_BamCollationOutputVector_PushBack(BamBam_BamCollationOutputVector * outputvector, BamBam_BamCollationHashEntry * entry)
+{
+ assert (outputvector->outputvectorfill < outputvector->outputvectorsize );
+ outputvector->outputvector[(outputvector->outputvectorhigh++) % outputvector->outputvectorsize] =
+ entry;
+ outputvector->outputvectorfill++;
+}
+
+BamBam_BamCollationHashEntry * BamBam_BamCollationOutputVector_PopFront(BamBam_BamCollationOutputVector * outputvector)
+{
+ assert ( outputvector->outputvectorfill );
+ BamBam_BamCollationHashEntry * entry = outputvector->outputvector[(outputvector->outputvectorlow++) % outputvector->outputvectorsize];
+ outputvector->outputvectorfill--;
+ return entry;
+}
+
+BamBam_BamCollationOutputVector * BamBam_BamCollationOutputVector_New(unsigned int const vectorsizelog)
+{
+ BamBam_BamCollationOutputVector * outputvector = 0;
+
+ outputvector = (BamBam_BamCollationOutputVector *)malloc(sizeof(BamBam_BamCollationOutputVector));
+
+ if ( ! outputvector )
+ return 0;
+
+ outputvector->outputvectorsize = 1ull<<vectorsizelog;
+ outputvector->outputvectorlow = 0;
+ outputvector->outputvectorhigh = 0;
+ outputvector->outputvectorfill = 0;
+ outputvector->outputvector = (BamBam_BamCollationHashEntry **)malloc((outputvector->outputvectorsize)*sizeof(BamBam_BamCollationHashEntry *));
+
+ return outputvector;
+}
+void BamBam_BamCollationOutputVector_Delete(BamBam_BamCollationOutputVector * outputvector)
+{
+ uint64_t i;
+
+ for ( i = outputvector->outputvectorlow; i != outputvector->outputvectorhigh; ++i )
+ {
+ if ( outputvector->outputvector[i % outputvector->outputvectorsize] )
+ {
+ BamBam_BamCollationHashEntry_Delete(outputvector->outputvector[i%outputvector->outputvectorsize]);
+ outputvector->outputvector[i%outputvector->outputvectorsize] = 0;
+ }
+ }
+
+ free(outputvector->outputvector);
+ free(outputvector);
+}
diff --git a/src/bambamc/BamBam_BamCollationOutputVector.h b/src/bambamc/BamBam_BamCollationOutputVector.h
new file mode 100644
index 0000000..e41cc64
--- /dev/null
+++ b/src/bambamc/BamBam_BamCollationOutputVector.h
@@ -0,0 +1,40 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#if ! defined(BAMBAM_BAMCOLLATIONOUTPUTVECTOR_H)
+#define BAMBAM_BAMCOLLATIONOUTPUTVECTOR_H
+
+#include <bambamc/BamBam_Unused.h>
+#include <bambamc/BamBam_BamCollationHashEntry.h>
+
+typedef struct _BamBam_BamCollationOutputVector
+{
+ BamBam_BamCollationHashEntry ** outputvector;
+ uint64_t outputvectorsize;
+ uint64_t outputvectorfill;
+ uint64_t outputvectorlow;
+ uint64_t outputvectorhigh;
+} BamBam_BamCollationOutputVector;
+
+extern void BamBam_BamCollationOutputVector_PushBack(BamBam_BamCollationOutputVector * outputvector, BamBam_BamCollationHashEntry * entry);
+extern void BamBam_BamCollationOutputVector_PushFront(BamBam_BamCollationOutputVector * outputvector, BamBam_BamCollationHashEntry * entry);
+extern BamBam_BamCollationHashEntry * BamBam_BamCollationOutputVector_PopFront(BamBam_BamCollationOutputVector * outputvector) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern BamBam_BamCollationOutputVector * BamBam_BamCollationOutputVector_New(unsigned int const vectorsizelog) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern void BamBam_BamCollationOutputVector_Delete(BamBam_BamCollationOutputVector * outputvector);
+#endif
diff --git a/src/bambamc/BamBam_BamCollationTempFileGenerator.c b/src/bambamc/BamBam_BamCollationTempFileGenerator.c
new file mode 100644
index 0000000..5d0538e
--- /dev/null
+++ b/src/bambamc/BamBam_BamCollationTempFileGenerator.c
@@ -0,0 +1,164 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#include <bambamc/BamBam_BamCollationTempFileGenerator.h>
+#include <bambamc/BamBam_StrDup.h>
+#include <string.h>
+#include <stdio.h>
+
+void BamBam_BamCollationTempFileGenerator_Print(FILE * file, BamBam_BamCollationTempFileGenerator const * gen)
+{
+ fprintf(file,"BamBam_BamCollationTempFileGenerator(prefix=%s,nextid=%llu,tempfilenames=",gen->prefix,(unsigned long long)gen->nextid);
+ BamBam_List_Print(file,gen->tempfilenames);
+ fprintf(file,")");
+}
+
+/* for getpid */
+#include <sys/types.h>
+#include <unistd.h>
+
+char * BamBam_ComputeTempFileName(char const * prefix, uint64_t const id)
+{
+ char * sid = 0;
+ char * spid = 0;
+ unsigned int tempnamelen = 0;
+ char * tempname = 0;
+
+ sid = BamBam_FormatNumberMinLen(id,6);
+
+ if ( ! sid )
+ return 0;
+
+ spid = BamBam_FormatNumber(getpid());
+
+ if ( ! spid )
+ {
+ free(sid);
+ return 0;
+ }
+
+ tempnamelen =
+ strlen(prefix) +
+ 1 + /* _ */
+ strlen(spid) +
+ 1 + /* _ */
+ strlen(sid) +
+ 1 /* NUL */
+ ;
+
+ tempname = (char *)malloc(tempnamelen);
+
+ if ( ! tempname )
+ {
+ free(sid);
+ free(spid);
+ return 0;
+ }
+
+ snprintf(tempname,tempnamelen,"%s_%s_%s", prefix, spid, sid);
+
+ free(sid);
+ free(spid);
+
+ return tempname;
+}
+
+void freeTempFile(void * vtmpfilename)
+{
+ if ( vtmpfilename )
+ {
+ char * tmpfilename = (char *)vtmpfilename;
+ unlink(tmpfilename);
+ free(tmpfilename);
+ }
+ else
+ {
+ fprintf(stderr,"freeTempFile() called for NULL pointer.\n");
+ }
+}
+
+char * BamBam_BamCollationTempFileGenerator_GetNextTempFileName(BamBam_BamCollationTempFileGenerator * gen)
+{
+ uint64_t id;
+ char * tmpfilename = 0;
+ BamBam_ListNode * listnode = 0;
+
+ id = (gen->nextid)++;
+
+ tmpfilename = BamBam_ComputeTempFileName(gen->prefix,id);
+
+ if ( ! tmpfilename )
+ return 0;
+
+ listnode = BamBam_ListNode_New();
+
+ if ( ! listnode )
+ {
+ free(tmpfilename);
+ return 0;
+ }
+
+ listnode->entry = tmpfilename;
+ listnode->bamBamListFreeFunction = freeTempFile;
+ listnode->bamBamListPrintFunction = BamBam_PrintStringFunction;
+ BamBam_ListNode_PushBack(gen->tempfilenames,listnode);
+
+ return tmpfilename;
+}
+
+BamBam_BamCollationTempFileGenerator * BamBam_BamCollationTempFileGenerator_New(char const * prefix)
+{
+ BamBam_BamCollationTempFileGenerator * gen = 0;
+
+ gen = (BamBam_BamCollationTempFileGenerator *)malloc(sizeof(BamBam_BamCollationTempFileGenerator));
+
+ if ( ! gen )
+ return 0;
+
+ gen->prefix = BamBam_StrDup(prefix);
+
+ if ( ! gen->prefix )
+ {
+ free(gen);
+ return 0;
+ }
+
+ gen->nextid = 0;
+
+ gen->tempfilenames = BamBam_List_New();
+
+ if ( ! gen->tempfilenames )
+ {
+ free(gen->prefix);
+ free(gen);
+ return 0;
+ }
+
+ return gen;
+}
+
+void BamBam_BamCollationTempFileGenerator_Delete(BamBam_BamCollationTempFileGenerator * gen)
+{
+ if ( gen )
+ {
+ BamBam_List_Delete(gen->tempfilenames);
+ free(gen->prefix);
+ free(gen);
+ }
+}
diff --git a/src/bambamc/BamBam_BamCollationTempFileGenerator.h b/src/bambamc/BamBam_BamCollationTempFileGenerator.h
new file mode 100644
index 0000000..f080b2b
--- /dev/null
+++ b/src/bambamc/BamBam_BamCollationTempFileGenerator.h
@@ -0,0 +1,39 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#if ! defined(BAMBAM_BAMCOLLATIONTEMPFILEGENERATOR_H)
+#define BAMBAM_BAMCOLLATIONTEMPFILEGENERATOR_H
+
+#include <bambamc/BamBam_Unused.h>
+#include <bambamc/BamBam_FormatNumber.h>
+#include <bambamc/BamBam_List.h>
+
+typedef struct _BamBam_BamCollationTempFileGenerator
+{
+ char * prefix;
+ uint64_t nextid;
+ BamBam_List * tempfilenames;
+} BamBam_BamCollationTempFileGenerator;
+
+extern void BamBam_BamCollationTempFileGenerator_Print(FILE * file, BamBam_BamCollationTempFileGenerator const * gen);
+extern char * BamBam_ComputeTempFileName(char const * prefix, uint64_t const id) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern char * BamBam_BamCollationTempFileGenerator_GetNextTempFileName(BamBam_BamCollationTempFileGenerator * gen) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern BamBam_BamCollationTempFileGenerator * BamBam_BamCollationTempFileGenerator_New(char const * prefix) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern void BamBam_BamCollationTempFileGenerator_Delete(BamBam_BamCollationTempFileGenerator * gen);
+#endif
diff --git a/src/bambamc/BamBam_BamCollationVector.c b/src/bambamc/BamBam_BamCollationVector.c
new file mode 100644
index 0000000..bb17ed6
--- /dev/null
+++ b/src/bambamc/BamBam_BamCollationVector.c
@@ -0,0 +1,189 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#include <bambamc/BamBam_BamCollationVector.h>
+#include <bambamc/BamBam_BamFlagBase.h>
+
+BamBam_BamCollationVector * BamBam_BamCollationVector_New(unsigned int sizelog)
+{
+ BamBam_BamCollationVector * vector = 0;
+ uint64_t i;
+
+ vector = (BamBam_BamCollationVector *)malloc(sizeof(BamBam_BamCollationVector));
+
+ if ( ! vector )
+ return 0;
+
+ vector->entries = 0;
+ vector->tablesize = 1ull << sizelog;
+ vector->fill = 0;
+
+ vector->entries = (BamBam_BamCollationHashEntry **)malloc(vector->tablesize*(sizeof(BamBam_BamCollationHashEntry *)));
+
+ if ( ! vector->entries )
+ {
+ free(vector);
+ return 0;
+ }
+
+ for ( i = 0; i < vector->tablesize; ++i )
+ vector->entries[i] = 0;
+
+ return vector;
+}
+
+int BamBam_BamCollationVector_PushBack(BamBam_BamCollationVector * vector, BamBam_BamCollationHashEntry * entry)
+{
+ vector->entries[vector->fill++] = entry;
+ return (vector->fill == vector->tablesize);
+}
+
+void BamBam_BamCollationVector_Erase(BamBam_BamCollationVector * vector)
+{
+ uint64_t i;
+ for ( i = 0; i < vector->fill; ++i )
+ BamBam_BamCollationHashEntry_Delete(vector->entries[i]);
+ vector->fill = 0;
+}
+
+int BamBam_BamCollationVector_Sort(
+ BamBam_BamCollationVector * vector,
+ BamBam_BamCollationOutputVector * outputvector,
+ BamBam_BamCollationTempFileGenerator * gen
+)
+{
+ uint64_t i, j;
+
+ qsort (
+ &(vector->entries[0]),
+ vector->fill,
+ sizeof(BamBam_BamCollationHashEntry *),
+ BamBam_BamHashEntry_CompareVoidPtr
+ );
+
+ j = 0;
+ for ( i = 0; i < vector->fill; )
+ {
+ if (
+ i+1 < vector->fill &&
+ strcmp(vector->entries[i]->qname,vector->entries[i+1]->qname) == 0 &&
+ (((BamBam_BamCollationHashEntry_GetFlags(vector->entries[i]) & BAMBAMC_FREAD1) && (BamBam_BamCollationHashEntry_GetFlags(vector->entries[i+1]) & BAMBAMC_FREAD2))
+ ||
+ ((BamBam_BamCollationHashEntry_GetFlags(vector->entries[i]) & BAMBAMC_FREAD2) && (BamBam_BamCollationHashEntry_GetFlags(vector->entries[i+1]) & BAMBAMC_FREAD1)))
+ )
+ {
+ /* fprintf(stderr,"Found pair %s %s\n", vector->entries[i]->qname, vector->entries[i+1]->qname); */
+ BamBam_BamCollationOutputVector_PushBack(outputvector,vector->entries[i ]);
+ BamBam_BamCollationOutputVector_PushBack(outputvector,vector->entries[i+1]);
+
+ i += 2;
+ }
+ else
+ {
+ vector->entries[j++] = vector->entries[i];
+ i++;
+ }
+ }
+
+ vector->fill = j;
+
+ if ( vector->fill )
+ {
+ char * tmpfilename = BamBam_BamCollationTempFileGenerator_GetNextTempFileName(gen);
+
+ #if 0
+ BamBam_GzipWriter * gzipfile = 0;
+ int status = -1;
+
+ if ( ! tmpfilename )
+ return -1;
+
+ gzipfile = BamBam_GzipWriter_New(tmpfilename,1);
+
+ if ( ! gzipfile )
+ return -1;
+
+ for ( i = 0; i < vector->fill; ++i )
+ {
+ int const r = BamBam_BamSingleAlignment_StoreAlignment(vector->entries[i]->entry,gzipfile);
+
+ if ( r < 0 )
+ {
+ BamBam_GzipWriter_Delete(gzipfile,&status);
+ return -1;
+ }
+ }
+
+ BamBam_GzipWriter_Delete(gzipfile,&status);
+
+ if ( status < 0 )
+ return -1;
+ #else
+
+ BamBam_BgzfCompressor * gzipfile = 0;
+ int status = -1;
+
+ if ( ! tmpfilename )
+ return -1;
+
+ gzipfile = BamBam_BgzfCompressor_New(tmpfilename,1);
+
+ if ( ! gzipfile )
+ return -1;
+
+ for ( i = 0; i < vector->fill; ++i )
+ {
+ int const r = BamBam_BamSingleAlignment_StoreAlignmentBgzf(vector->entries[i]->entry,gzipfile);
+
+ if ( r < 0 )
+ {
+ BamBam_BgzfCompressor_Delete(gzipfile);
+ return -1;
+ }
+ }
+
+ status = BamBam_BgzfCompressor_Terminate(gzipfile);
+
+ if ( status < 0 )
+ {
+ BamBam_BgzfCompressor_Delete(gzipfile);
+ return -1;
+ }
+
+ BamBam_BgzfCompressor_Delete(gzipfile);
+ #endif
+
+ }
+
+ return 0;
+}
+
+void BamBam_BamCollationVector_Delete(BamBam_BamCollationVector * vector)
+{
+ if ( vector )
+ {
+ uint64_t i;
+ for ( i = 0; i < vector->fill; ++i )
+ if ( vector->entries[i] )
+ BamBam_BamCollationHashEntry_Delete(vector->entries[i]);
+
+ free(vector->entries);
+ free(vector);
+ }
+}
diff --git a/src/bambamc/BamBam_BamCollationVector.h b/src/bambamc/BamBam_BamCollationVector.h
new file mode 100644
index 0000000..bffada4
--- /dev/null
+++ b/src/bambamc/BamBam_BamCollationVector.h
@@ -0,0 +1,44 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#if ! defined(BAMBAM_BAMCOLLATIONVECTOR_H)
+#define BAMBAM_BAMCOLLATIONVECTOR_H
+
+#include <bambamc/BamBam_Unused.h>
+#include <bambamc/BamBam_BamCollationHashEntry.h>
+#include <bambamc/BamBam_BamCollationOutputVector.h>
+#include <bambamc/BamBam_BamCollationTempFileGenerator.h>
+
+typedef struct _BamBam_BamCollationVector
+{
+ BamBam_BamCollationHashEntry ** entries;
+ uint64_t tablesize;
+ uint64_t fill;
+} BamBam_BamCollationVector;
+
+extern BamBam_BamCollationVector * BamBam_BamCollationVector_New(unsigned int sizelog) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern int BamBam_BamCollationVector_PushBack(BamBam_BamCollationVector * vector, BamBam_BamCollationHashEntry * entry) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern void BamBam_BamCollationVector_Erase(BamBam_BamCollationVector * vector);
+extern int BamBam_BamCollationVector_Sort(
+ BamBam_BamCollationVector * vector,
+ BamBam_BamCollationOutputVector * outputvector,
+ BamBam_BamCollationTempFileGenerator * gen
+) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern void BamBam_BamCollationVector_Delete(BamBam_BamCollationVector * vector);
+#endif
diff --git a/src/bambamc/BamBam_BamCollator.c b/src/bambamc/BamBam_BamCollator.c
new file mode 100644
index 0000000..2da339f
--- /dev/null
+++ b/src/bambamc/BamBam_BamCollator.c
@@ -0,0 +1,906 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#include <bambamc/BamBam_BamCollator.h>
+#include <bambamc/BamBam_CharBuffer.h>
+#include <bambamc/BamBam_LineParsing.h>
+#include <bambamc/BamBam_StrDup.h>
+#include <assert.h>
+#include <ctype.h>
+
+void BamBam_BamCollator_MergeHeapEntrySwap(BamBam_MergeHeapEntry * A, BamBam_MergeHeapEntry * B)
+{
+ {
+ uint64_t t = B->fileid;
+ B->fileid = A->fileid;
+ A->fileid = t;
+ }
+ {
+ BamBam_BamCollationHashEntry * t = B->hashentry;
+ B->hashentry = A->hashentry;
+ A->hashentry = t;
+ }
+}
+
+void BamBam_BamCollator_MergeHeapPrint(BamBam_BamCollator * collator)
+{
+ uint64_t p;
+ for ( p = 0; p < collator->mergeheapfill; ++p )
+ fprintf(stderr,"HEAP[%llu]=%s\n",(unsigned long long)p,collator->mergeheap[p].hashentry->qname);
+}
+
+void BamBam_BamCollator_MergeHeapCheck(BamBam_BamCollator * collator)
+{
+ uint64_t p;
+ for ( p = 1; p < collator->mergeheapfill; ++p )
+ {
+ uint64_t const parent = (p-1)/2;
+ BamBam_MergeHeapEntry * Pp = &(collator->mergeheap[p]);
+ BamBam_MergeHeapEntry * Pparent = &(collator->mergeheap[parent]);
+ assert ( BamBam_BamHashEntry_Compare( Pparent->hashentry, Pp->hashentry ) <= 0 );
+ }
+}
+
+static unsigned int BamBam_BamCollator_MergeHeapMinimum2(BamBam_BamCollator * collator, unsigned int p0, unsigned int p1)
+{
+ BamBam_MergeHeapEntry * Pp0 = &(collator->mergeheap[p0]);
+ BamBam_MergeHeapEntry * Pp1 = &(collator->mergeheap[p1]);
+
+ if ( BamBam_BamHashEntry_Compare( Pp0->hashentry, Pp1->hashentry ) <= 0 )
+ return p0;
+ else
+ return p1;
+}
+
+static unsigned int BamBam_BamCollator_MergeHeapMinimum3(BamBam_BamCollator * collator, unsigned int p0, unsigned int p1, unsigned int p2)
+{
+ return BamBam_BamCollator_MergeHeapMinimum2(collator,p0,BamBam_BamCollator_MergeHeapMinimum2(collator,p1,p2));
+}
+
+BamBam_MergeHeapEntry * BamBam_BamCollator_MergeHeapMinimum(BamBam_BamCollator * collator)
+{
+ assert ( collator->mergeheapfill );
+ return &(collator->mergeheap[0]);
+}
+
+void BamBam_BamCollator_MergeHeapDeleteMinimum(BamBam_BamCollator * collator)
+{
+ BamBam_MergeHeapEntry * first;
+ BamBam_MergeHeapEntry * last;
+ unsigned int p = 0;
+
+ assert ( collator->mergeheapfill );
+
+ /* swap first and last element, then delete the former minimum */
+ first = &(collator->mergeheap[0]);
+ last = &(collator->mergeheap[collator->mergeheapfill-1]);
+
+ BamBam_BamCollator_MergeHeapEntrySwap(first,last);
+ BamBam_BamCollationHashEntry_Delete(last->hashentry);
+ collator->mergeheapfill -= 1;
+
+ /* push former last element down the heap until the heap property holds */
+ while ( 2*p+1 < collator->mergeheapfill )
+ {
+ /* both children exist */
+ if ( 2*p+2 < collator->mergeheapfill )
+ {
+ /* find minimum of this node and both children */
+ unsigned int const mini = BamBam_BamCollator_MergeHeapMinimum3(collator,p,2*p+1,2*p+2);
+
+ /* if minimum is this node, then heap property holds */
+ if ( mini == p )
+ {
+ break;
+ }
+ /* otherwise swap minimum with this node and recurse into corresponding subtree */
+ else
+ {
+ BamBam_MergeHeapEntry * Pp = &(collator->mergeheap[p]);
+ BamBam_MergeHeapEntry * Pc = &(collator->mergeheap[mini]);
+ BamBam_BamCollator_MergeHeapEntrySwap(Pp,Pc);
+ p = mini;
+ }
+ }
+ /* only left child exists */
+ else
+ {
+ /* if minimum is this node, then heap property holds */
+ if ( BamBam_BamCollator_MergeHeapMinimum2(collator,p,2*p+1) == p )
+ {
+ break;
+ }
+ /* otherwise swap and recurse */
+ else
+ {
+ BamBam_MergeHeapEntry * Pp = &(collator->mergeheap[p]);
+ BamBam_MergeHeapEntry * Pl = &(collator->mergeheap[2*p+1]);
+ BamBam_BamCollator_MergeHeapEntrySwap(Pp,Pl);
+ p = 2*p+1;
+ }
+ }
+ }
+}
+
+void BamBam_BamCollator_MergeHeapInsert(BamBam_BamCollator * collator, BamBam_BamCollationHashEntry * hashentry, unsigned int fileid)
+{
+ unsigned int p;
+ int swapped = 1;
+
+ /* insert new element at the end of the heap */
+ p = collator->mergeheapfill++;
+ assert ( collator->mergeheapfill <= collator->nummergefiles );
+ collator->mergeheap [ p ] . hashentry = hashentry;
+ collator->mergeheap [ p ] . fileid = fileid;
+
+ /* move it up */
+ while ( p && swapped )
+ {
+ unsigned int parent = (p-1)/2;
+ BamBam_MergeHeapEntry * Pp = &(collator->mergeheap[p]);
+ BamBam_MergeHeapEntry * Pparent = &(collator->mergeheap[parent]);
+
+ if ( BamBam_BamHashEntry_Compare( Pp->hashentry, Pparent->hashentry ) < 0 )
+ {
+ BamBam_BamCollator_MergeHeapEntrySwap(Pp,Pparent);
+ }
+ else
+ {
+ swapped = 0;
+ }
+
+ p = parent;
+ }
+}
+
+#include <bambamc/BamBam_CollatorState.h>
+
+BamBam_BamCollationHashEntry * BamBam_BamCollator_GetNextRead(BamBam_BamCollator * collator)
+{
+ while ( (collator->state==BAMBAM_COLLATE_READING_INPUT) && !collator->outputvector->outputvectorfill )
+ {
+ if ( (collator->alignment = BamBam_SamBamFileDecoder_DecodeAlignment(collator->decoder)) )
+ {
+ BamBam_BamCollationHashEntry * nhashentry = 0;
+ BamBam_BamCollationHashEntry * ohashentry = 0;
+
+ nhashentry = BamBam_BamCollationHashEntry_NewDup(collator->alignment);
+
+ if ( ! nhashentry )
+ {
+ collator->state = BAMBAM_COLLATE_FAILED;
+ }
+ else
+ {
+ uint32_t nhashentryflags;
+ nhashentryflags = BamBam_BamCollationHashEntry_GetFlags(nhashentry);
+
+ /* single end? */
+ if ( !(nhashentryflags & BAMBAMC_FPAIRED) )
+ {
+ BamBam_BamCollationOutputVector_PushBack(collator->outputvector,nhashentry);
+ }
+ /* pair */
+ else
+ {
+ ohashentry = collator->hash->entries[ nhashentry->hashvalue & collator->hash->tablemask ];
+
+ /* fprintf(stderr,"Got name %s\n", hashentry->qname); */
+
+ /* hash place is empty */
+ if ( ! ohashentry )
+ {
+ collator->hash->entries[ nhashentry->hashvalue & collator->hash->tablemask ] = nhashentry;
+ }
+ else
+ {
+ uint32_t ohashentryflags = BamBam_BamCollationHashEntry_GetFlags(ohashentry);
+
+ if (
+ ohashentry->hashvalue == nhashentry->hashvalue
+ &&
+ ! strcmp(ohashentry->qname,nhashentry->qname)
+ &&
+ (
+ ((nhashentryflags & BAMBAMC_FREAD1) && (ohashentryflags & BAMBAMC_FREAD2))
+ ||
+ ((nhashentryflags & BAMBAMC_FREAD2) && (ohashentryflags & BAMBAMC_FREAD1))
+ )
+ )
+ {
+ /* found pair */
+ collator->hash->entries[ nhashentry->hashvalue & collator->hash->tablemask ] = 0;
+ if ( (ohashentryflags & BAMBAMC_FREAD1) != 0 )
+ {
+ BamBam_BamCollationOutputVector_PushBack(collator->outputvector,ohashentry);
+ BamBam_BamCollationOutputVector_PushBack(collator->outputvector,nhashentry);
+ }
+ else
+ {
+ BamBam_BamCollationOutputVector_PushBack(collator->outputvector,nhashentry);
+ BamBam_BamCollationOutputVector_PushBack(collator->outputvector,ohashentry);
+ }
+ }
+ else
+ {
+ int vecfull;
+ /* not pair, push previous entry out */
+ vecfull = BamBam_BamCollationVector_PushBack(collator->vector,ohashentry);
+
+ if ( vecfull )
+ {
+ int sortok;
+
+ sortok = BamBam_BamCollationVector_Sort(collator->vector,collator->outputvector,collator->gen);
+
+ if ( sortok < 0 )
+ {
+ collator->state = BAMBAM_COLLATE_FAILED;
+ }
+
+ BamBam_BamCollationVector_Erase(collator->vector);
+ }
+
+ collator->hash->entries[ nhashentry->hashvalue & collator->hash->tablemask ] = nhashentry;
+ }
+ }
+ }
+ }
+ }
+ /* no more alignments in input file */
+ else
+ {
+ /* handle alignments left in hash table */
+ unsigned int i = 0;
+ int sortok;
+
+ for ( i = 0; collator->state != BAMBAM_COLLATE_FAILED && i < collator->hash->tablesize; ++i )
+ if ( collator->hash->entries[i] )
+ {
+ int vecfull;
+ int sortok;
+ BamBam_BamCollationHashEntry * hashentry = collator->hash->entries[i];
+ collator->hash->entries[i] = 0;
+
+ /* push entry out */
+ vecfull = BamBam_BamCollationVector_PushBack(collator->vector,hashentry);
+
+ if ( vecfull )
+ {
+ /* fprintf(stderr,"erasing full vector.\n"); */
+ sortok = BamBam_BamCollationVector_Sort(collator->vector,collator->outputvector,collator->gen);
+
+ if ( sortok < 0 )
+ collator->state = BAMBAM_COLLATE_FAILED;
+
+ BamBam_BamCollationVector_Erase(collator->vector);
+ }
+ }
+
+ sortok = BamBam_BamCollationVector_Sort(collator->vector,collator->outputvector,collator->gen);
+ if ( sortok < 0 )
+ collator->state = BAMBAM_COLLATE_FAILED;
+ BamBam_BamCollationVector_Erase(collator->vector);
+
+ collator->state = BAMBAM_COLLATE_MERGING;
+ }
+ }
+ while ( (collator->state==BAMBAM_COLLATE_MERGING) && !collator->outputvector->outputvectorfill )
+ {
+ /* tempfiles not yet open, open them */
+ if ( ! collator->mergefiles )
+ {
+ unsigned int numtempfiles;
+ BamBam_ListNode * node = 0;
+ unsigned int k = 0;
+
+ numtempfiles = BamBam_List_Size(collator->gen->tempfilenames);
+ collator->mergefiles = (BamBam_GzipFileDecoder **)malloc(numtempfiles * sizeof(BamBam_GzipFileDecoder *));
+
+ if ( ! collator->mergefiles )
+ collator->state = BAMBAM_COLLATE_FAILED;
+ else
+ {
+ collator->nummergefiles = numtempfiles;
+ for ( k = 0; k < numtempfiles; ++k )
+ collator->mergefiles[k] = 0;
+
+ node = collator->gen->tempfilenames->first;
+
+ k = 0;
+ while ( node )
+ {
+ char const * filename = (char const *)(node->entry);
+
+ collator->mergefiles[k] = BamBam_GzipFileDecoder_New(filename);
+
+ if ( ! collator->mergefiles[k] )
+ collator->state = BAMBAM_COLLATE_FAILED;
+
+ node = node->next;
+ k++;
+ }
+
+ /* if we managed to open all temporary files, then allocate heap */
+ if ( collator->state != BAMBAM_COLLATE_FAILED )
+ {
+ collator->mergeheap = (BamBam_MergeHeapEntry *)malloc(numtempfiles * sizeof(BamBam_MergeHeapEntry));
+
+ if ( ! collator->mergeheap )
+ collator->state = BAMBAM_COLLATE_FAILED;
+ else
+ {
+ for ( k = 0; k < numtempfiles; ++k )
+ {
+ collator->mergeheap[k].hashentry = 0;
+ collator->mergeheap[k].fileid = numtempfiles;
+ }
+ collator->mergeheapfill = 0;
+ }
+
+ for ( k = 0; (collator->state != BAMBAM_COLLATE_FAILED) && k < numtempfiles; ++k )
+ {
+ if ( (collator->alignment = BamBam_GzipFileDecoder_DecodeAlignment(collator->mergefiles[k])) )
+ {
+ BamBam_BamCollationHashEntry * nhashentry = 0;
+ nhashentry = BamBam_BamCollationHashEntry_NewDup(collator->alignment);
+
+ if ( ! nhashentry )
+ collator->state = BAMBAM_COLLATE_FAILED;
+ else
+ BamBam_BamCollator_MergeHeapInsert(collator,nhashentry,k);
+ }
+ else
+ {
+ fprintf(stderr,"WARNING: temp %llu file seems empty.\n", (unsigned long long)k);
+ }
+ }
+
+ if ( collator->state != BAMBAM_COLLATE_FAILED )
+ {
+ /* BamBam_BamCollator_MergeHeapPrint(collator); */
+ BamBam_BamCollator_MergeHeapCheck(collator);
+ }
+ }
+ }
+ }
+ else
+ {
+ if ( collator->mergeheapfill )
+ {
+ BamBam_MergeHeapEntry * mini = BamBam_BamCollator_MergeHeapMinimum(collator);
+ BamBam_BamCollationHashEntry * hashentry = mini->hashentry;
+ unsigned int const fileid = mini->fileid;
+
+ BamBam_BamCollationOutputVector_PushBack(collator->outputvector,hashentry);
+
+ mini->hashentry = 0;
+ BamBam_BamCollator_MergeHeapDeleteMinimum(collator);
+
+ if ( (collator->alignment = BamBam_GzipFileDecoder_DecodeAlignment(collator->mergefiles[fileid])) )
+ {
+ BamBam_BamCollationHashEntry * nhashentry = 0;
+ nhashentry = BamBam_BamCollationHashEntry_NewDup(collator->alignment);
+
+ if ( ! nhashentry )
+ {
+ collator->state = BAMBAM_COLLATE_FAILED;
+ }
+ else
+ {
+ BamBam_BamCollator_MergeHeapInsert(collator,nhashentry,fileid);
+ }
+ }
+ }
+ else
+ collator->state = BAMBAM_COLLATE_DONE;
+ }
+ }
+
+ if ( collator->outputvector->outputvectorfill )
+ {
+ #if 0
+ int len = -1;
+ BamBam_BamCollationHashEntry *entry = BamBam_BamCollationOutputVector_PopFront(collator->outputvector);
+
+ len = BamBam_PutAlignmentBuffer(entry->entry,&(collator->bufferA),&(collator->bufferALength),'\n');
+
+ fwrite(collator->bufferA,1,len,stdout);
+
+ /* fprintf(stdout,"%s\n", entry->qname); */
+
+ BamBam_BamCollationHashEntry_Delete(entry);
+ return 1;
+ #endif
+
+ return BamBam_BamCollationOutputVector_PopFront(collator->outputvector);
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+enum tmpdirstate { BAMBAM_TMPDIR_FAILED = 1, BAMBAM_TMPDIR_CREATED = 2, BAMBAM_TMPDIR_WASPRESENT = 3 };
+
+#include <bambamc/BamBam_Config.h>
+
+#if defined(HAVEWINDOWS_MKDIR)
+#include <direct.h>
+#endif
+
+/* for stat() */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+
+static int BamBam_CreateTempDir(char const * tmpdirname)
+{
+ struct stat sb;
+
+ int statret = stat(tmpdirname,&sb);
+
+ if ( statret == -1 && errno == ENOENT )
+ {
+ #if defined(HAVEWINDOWS_MKDIR)
+ if ( ! _mkdir(tmpdirname) )
+ #else
+ if ( ! mkdir(tmpdirname, 0700) )
+ #endif
+ {
+ return BAMBAM_TMPDIR_CREATED;
+ }
+ else
+ {
+ return BAMBAM_TMPDIR_FAILED;
+ }
+ }
+ /* it exists, check whether it is a directory */
+ else
+ {
+ if ( S_ISDIR(sb.st_mode) )
+ {
+ return BAMBAM_TMPDIR_WASPRESENT;
+ }
+ else
+ {
+ return BAMBAM_TMPDIR_FAILED;
+ }
+ }
+}
+
+static char const * BamBam_BamCollator_InputFormatToMode(char const * inputformat)
+{
+ if ( strcmp(inputformat,"bam") == 0 )
+ return "rb";
+ else if ( strcmp(inputformat,"sam") == 0 )
+ return "r";
+ else
+ return 0;
+}
+
+BamBam_BamCollator * BamBam_BamCollator_Delete(BamBam_BamCollator * collator)
+{
+ if ( collator )
+ {
+ BamBam_BamHeaderInfo_Delete(collator->headerinfo);
+ BamBam_BamHeaderInfo_Delete(collator->parsedheaderinfo);
+ free(collator->headerline);
+ free(collator->bamheadertext);
+ free(collator->filteredbamheadertext);
+ free(collator->vn);
+ free(collator->so);
+ free(collator->bufferA);
+ free(collator->bufferB);
+ unsigned int k;
+ if ( collator->mergefiles )
+ {
+ for ( k = 0; k < collator->nummergefiles; ++k )
+ if ( collator->mergefiles[k] )
+ {
+ BamBam_GzipFileDecoder_Delete(collator->mergefiles[k]);
+ collator->mergefiles[k] = 0;
+ }
+ free(collator->mergefiles);
+ collator->mergefiles = 0;
+ }
+ if ( collator->mergeheap )
+ {
+ if ( collator->mergeheapfill )
+ BamBam_BamCollator_MergeHeapDeleteMinimum(collator);
+ free(collator->mergeheap);
+ collator->mergeheap = 0;
+ }
+ if ( collator->tmpdirstate == BAMBAM_TMPDIR_CREATED )
+ rmdir(collator->tempdirname);
+
+ if ( collator->decoder )
+ BamBam_SamBamFileDecoder_Delete(collator->decoder);
+
+ BamBam_BamCollationOutputVector_Delete(collator->outputvector);
+ BamBam_BamCollationTempFileGenerator_Delete(collator->gen);
+ BamBam_BamCollationVector_Delete(collator->vector);
+ BamBam_BamCollationHash_Delete(collator->hash);
+ free(collator->tempdirname);
+ free(collator->tempprefix);
+ BamBam_BamCollationHashEntry_Delete(collator->entryA);
+ BamBam_BamCollationHashEntry_Delete(collator->entryB);
+ free(collator);
+ }
+
+ return 0;
+}
+
+char const * BamBam_findNextTab(char const * t)
+{
+ char const * nexttab = t;
+
+ while ( *nexttab && *nexttab != '\n' && *nexttab != '\t' )
+ ++nexttab;
+
+ return nexttab;
+}
+
+char const * BamBam_skipTab(char const * t)
+{
+ if ( *t && *t == '\t' )
+ return t+1;
+ else
+ return t;
+}
+
+char const * BamBam_nextField(char const * t)
+{
+ return BamBam_skipTab(BamBam_findNextTab(t));
+}
+
+BamBam_BamCollator * BamBam_BamCollator_New(
+ char const * tempdirname,
+ unsigned int const hashsizelog,
+ unsigned int const vectorsizelog,
+ char const * inputformat,
+ char const * inputfilename
+)
+{
+ BamBam_BamCollator * collator = 0;
+ size_t tempdirnamelen = 0;
+ size_t tempfileprefixlen = 0;
+ char const * tempadd = "/file";
+ char const * mode = BamBam_BamCollator_InputFormatToMode(inputformat);
+ char const * hfilters[] = { "HD", "SQ", 0 };
+ int i;
+
+ tempdirnamelen = strlen(tempdirname);
+ tempfileprefixlen = tempdirnamelen + strlen(tempadd) + 1;
+
+ collator = (BamBam_BamCollator *)malloc(sizeof(BamBam_BamCollator));
+
+ if ( ! collator )
+ return BamBam_BamCollator_Delete(collator);
+
+ memset(collator,0,sizeof(BamBam_BamCollator));
+ collator->state = BAMBAM_COLLATE_READING_INPUT;
+ collator->tmpdirstate = BamBam_CreateTempDir(tempdirname);
+
+ if ( collator->tmpdirstate == BAMBAM_TMPDIR_FAILED )
+ return BamBam_BamCollator_Delete(collator);
+
+ collator->tempdirname = BamBam_StrDup(tempdirname);
+
+ if ( ! collator->tempdirname )
+ return BamBam_BamCollator_Delete(collator);
+
+ collator->tempprefix = (char *)malloc(tempfileprefixlen);
+
+ if ( ! collator->tempprefix )
+ return BamBam_BamCollator_Delete(collator);
+
+ snprintf(collator->tempprefix,tempfileprefixlen,"%s%s",tempdirname,tempadd);
+
+ collator->hash = BamBam_BamCollationHash_New(hashsizelog);
+
+ if ( ! collator->hash )
+ return BamBam_BamCollator_Delete(collator);
+
+ collator->vector = BamBam_BamCollationVector_New(vectorsizelog);
+
+ if ( ! collator->vector )
+ return BamBam_BamCollator_Delete(collator);
+
+ collator->gen = BamBam_BamCollationTempFileGenerator_New(collator->tempprefix);
+
+ if ( ! collator->gen )
+ return BamBam_BamCollator_Delete(collator);
+
+ collator->outputvector = BamBam_BamCollationOutputVector_New(vectorsizelog);
+
+ if ( ! collator->outputvector )
+ return BamBam_BamCollator_Delete(collator);
+
+ collator->decoder = BamBam_SamBamFileDecoder_New(inputfilename,mode);
+
+ if ( ! collator->decoder )
+ return BamBam_BamCollator_Delete(collator);
+
+ collator->bamheadertext = BamBam_StrDup(collator->decoder->header->headertext);
+
+ if ( ! collator->bamheadertext )
+ return BamBam_BamCollator_Delete(collator);
+
+ collator->filteredbamheadertext = BamBam_filterHeader(collator->bamheadertext,&hfilters[0]);
+
+ if ( ! collator->filteredbamheadertext )
+ return BamBam_BamCollator_Delete(collator);
+
+ /* parse header into header info structure, if we have a header text */
+ if ( collator->bamheadertext && (strlen(collator->bamheadertext) != 0) )
+ {
+ char const * s = collator->bamheadertext;
+
+ while ( (! collator->headerline) && *s )
+ {
+ if ( BamBam_getLineLength(s) >= 4 && s[0] == '@' && s[1] == 'H' && s[2] == 'D' && s[3] == '\t' )
+ {
+ collator->headerline = (char *)malloc(BamBam_getLineLength(s)+1);
+
+ if ( ! collator->headerline )
+ return BamBam_BamCollator_Delete(collator);
+
+ memset(collator->headerline,0,BamBam_getLineLength(s)+1);
+ memcpy(collator->headerline,s,BamBam_getLineLength(s));
+ }
+
+ s = BamBam_nextLine(s);
+ }
+
+ if ( collator->headerline )
+ {
+ char const * t = collator->headerline;
+
+ while ( *t && *t != '\n' )
+ {
+ char const * nexttab = BamBam_findNextTab(t);
+
+ /* version number */
+ if (
+ nexttab-t >= 3
+ &&
+ t[0] == 'V' && t[1] == 'N' && t[2] == ':'
+ )
+ {
+ collator->vn = (char *)malloc((nexttab-t)-3+1);
+
+ if ( ! collator->vn )
+ return BamBam_BamCollator_Delete(collator);
+
+ memset(collator->vn,0,(nexttab-t)-3+1);
+ memcpy(collator->vn,t+3,(nexttab-t)-3);
+ }
+ /* sort order */
+ if (
+ nexttab-t >= 3
+ &&
+ t[0] == 'S' && t[1] == 'O' && t[2] == ':'
+ )
+ {
+ collator->so = (char *)malloc((nexttab-t)-3+1);
+
+ if ( ! collator->so )
+ return BamBam_BamCollator_Delete(collator);
+
+ memset(collator->so,0,(nexttab-t)-3+1);
+ memcpy(collator->so,t+3,(nexttab-t)-3);
+ }
+
+ t = BamBam_nextField(t);
+ }
+ }
+
+ if ( ! collator->vn )
+ {
+ collator->vn = BamBam_StrDup("1.4");
+ if ( ! collator->vn )
+ return BamBam_BamCollator_Delete(collator);
+ }
+ if ( ! collator->so )
+ {
+ collator->so = BamBam_StrDup("unknown");
+ if ( ! collator->so )
+ return BamBam_BamCollator_Delete(collator);
+ }
+
+ collator->parsedheaderinfo = BamBam_BamHeaderInfo_New(collator->vn, collator->so, collator->filteredbamheadertext);
+
+ if ( ! collator->parsedheaderinfo )
+ return BamBam_BamCollator_Delete(collator);
+
+ s = collator->bamheadertext;
+
+ while ( *s )
+ {
+ if ( BamBam_getLineLength(s) >= 4 && s[0] == '@' && s[1] == 'S' && s[2] == 'Q' && s[3] == '\t' )
+ {
+ char const * t = s;
+ char * sn = 0;
+ long ln = -1;
+
+ while ( *t && *t != '\n' )
+ {
+ char const * nexttab = BamBam_findNextTab(t);
+
+ /* sequence name */
+ if (
+ nexttab-t >= 3
+ &&
+ t[0] == 'S' && t[1] == 'N' && t[2] == ':'
+ )
+ {
+ sn = (char *)malloc((nexttab-t)-3+1);
+ if ( ! sn )
+ return BamBam_BamCollator_Delete(collator);
+ memset(sn,0,(nexttab-t)-3+1);
+ memcpy(sn,t+3,(nexttab-t)-3);
+ }
+ /* sequence length */
+ if (
+ nexttab-t >= 3
+ &&
+ t[0] == 'L' && t[1] == 'N' && t[2] == ':'
+ )
+ {
+ ln = 0;
+ char const * tt = t+3;
+
+ while ( isdigit(*tt) )
+ {
+ ln *= 10;
+ ln += *tt-'0';
+ tt++;
+ }
+ }
+
+ t = BamBam_nextField(t);
+ }
+
+ if ( sn && ln != -1 )
+ {
+ if ( BamBam_BamHeaderInfo_AddChromosome(collator->parsedheaderinfo,sn,ln) )
+ {
+ free(sn);
+ return BamBam_BamCollator_Delete(collator);
+ }
+ }
+
+ free(sn);
+ }
+
+ s = BamBam_nextLine(s);
+ }
+
+ if ( BamBam_BamHeaderInfo_ProduceHeaderText(collator->parsedheaderinfo) < 0 )
+ return BamBam_BamCollator_Delete(collator);
+ }
+
+
+ if ( ! collator->vn )
+ {
+ collator->vn = BamBam_StrDup("1.4");
+ if ( ! collator->vn )
+ return BamBam_BamCollator_Delete(collator);
+ }
+ if ( ! collator->so )
+ {
+ collator->so = BamBam_StrDup("unknown");
+ if ( ! collator->so )
+ return BamBam_BamCollator_Delete(collator);
+ }
+
+ collator->headerinfo = BamBam_BamHeaderInfo_New(collator->vn, collator->so, collator->filteredbamheadertext);
+
+ if ( ! collator->headerinfo )
+ return BamBam_BamCollator_Delete(collator);
+
+ for ( i = 0; i < collator->decoder->header->n_ref; ++i )
+ {
+ if ( BamBam_BamHeaderInfo_AddChromosome(collator->headerinfo,
+ collator->decoder->header->chromosomevec[i]->name,collator->decoder->header->chromosomevec[i]->length)
+ )
+ return BamBam_BamCollator_Delete(collator);
+
+ }
+
+ if ( BamBam_BamHeaderInfo_ProduceHeaderText(collator->headerinfo) < 0 )
+ return BamBam_BamCollator_Delete(collator);
+
+ return collator;
+}
+
+int BamBam_BamCollator_Get(BamBam_BamCollator * collator, BamBam_BamCollationHashEntry ** entryA, BamBam_BamCollationHashEntry ** entryB)
+{
+ BamBam_BamCollationHashEntry * peekA = 0;
+ BamBam_BamCollationHashEntry * peekB = 0;
+
+ *entryA = 0;
+ *entryB = 0;
+
+ peekA = BamBam_BamCollator_GetNextRead(collator);
+ peekB = BamBam_BamCollator_GetNextRead(collator);
+
+ /* no alignments left */
+ if ( ! peekA )
+ {
+ return 0;
+ }
+ /* only one alignment left (orphan) */
+ else if ( ! peekB )
+ {
+ if ( BamBam_BamCollationHashEntry_GetFlags(peekA) & BAMBAMC_FREAD2 )
+ *entryB = peekA;
+ else
+ *entryA = peekA;
+
+ return 1;
+ }
+ else
+ {
+ assert ( peekA );
+ assert ( peekB );
+
+ /* pair */
+ if (
+ (peekA->hashvalue == peekB->hashvalue)
+ &&
+ (!strcmp(peekA->qname,peekB->qname))
+ &&
+ (
+ ((BamBam_BamCollationHashEntry_GetFlags(peekA) & BAMBAMC_FREAD1) &&(BamBam_BamCollationHashEntry_GetFlags(peekB) & BAMBAMC_FREAD2))
+ ||
+ ((BamBam_BamCollationHashEntry_GetFlags(peekB) & BAMBAMC_FREAD1) &&(BamBam_BamCollationHashEntry_GetFlags(peekA) & BAMBAMC_FREAD2))
+ )
+ )
+ {
+ if ( BamBam_BamCollationHashEntry_GetFlags(peekA) & BAMBAMC_FREAD1 )
+ {
+ *entryA = peekA;
+ *entryB = peekB;
+ }
+ else
+ {
+ *entryA = peekB;
+ *entryB = peekA;
+ }
+
+ return 2;
+ }
+ /* orphan */
+ else
+ {
+ if ( BamBam_BamCollationHashEntry_GetFlags(peekA) & BAMBAMC_FREAD2 )
+ *entryB = peekA;
+ else
+ *entryA = peekA;
+
+ /* return alignment to queue */
+ BamBam_BamCollationOutputVector_PushFront(collator->outputvector,peekB);
+
+ return 1;
+ }
+ }
+}
diff --git a/src/bambamc/BamBam_BamCollator.h b/src/bambamc/BamBam_BamCollator.h
new file mode 100644
index 0000000..1e65af5
--- /dev/null
+++ b/src/bambamc/BamBam_BamCollator.h
@@ -0,0 +1,93 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#if ! defined(BAMBAM_BAMCOLLATOR_H)
+#define BAMBAM_BAMCOLLATOR_H
+
+#include <bambamc/BamBam_Unused.h>
+#include <bambamc/BamBam_BamCollationHash.h>
+#include <bambamc/BamBam_BamCollationVector.h>
+#include <bambamc/BamBam_BamCollationOutputVector.h>
+#include <bambamc/BamBam_MergeHeapEntry.h>
+#include <bambamc/BamBam_BamHeaderInfo.h>
+#include <bambamc/BamBam_SamBamFileDecoder.h>
+#include <bambamc/BamBam_GzipFileDecoder.h>
+
+#include <bambamc/BamBam_Config.h>
+
+typedef struct _BamBam_BamCollator
+{
+ int state;
+ int tmpdirstate;
+ char * tempdirname;
+ char * tempprefix;
+ BamBam_BamCollationHash * hash;
+ BamBam_BamCollationVector * vector;
+ BamBam_BamCollationTempFileGenerator * gen;
+ BamBam_BamCollationOutputVector * outputvector;
+
+ BamBam_SamBamFileDecoder * decoder;
+
+ /* header text */
+ char * bamheadertext;
+ char * filteredbamheadertext;
+ char * headerline;
+ char * vn;
+ char * so;
+ BamBam_BamHeaderInfo * parsedheaderinfo;
+ BamBam_BamHeaderInfo * headerinfo;
+
+ BamBam_BamSingleAlignment * alignment;
+
+ uint64_t nummergefiles;
+ BamBam_GzipFileDecoder ** mergefiles;
+ BamBam_MergeHeapEntry * mergeheap;
+ uint64_t mergeheapfill;
+
+ char * bufferA;
+ unsigned int bufferALength;
+ char * bufferB;
+ unsigned int bufferBLength;
+
+ int keepOrphans;
+
+ BamBam_BamCollationHashEntry * entryA;
+ BamBam_BamCollationHashEntry * entryB;
+} BamBam_BamCollator;
+
+extern void BamBam_BamCollator_MergeHeapEntrySwap(BamBam_MergeHeapEntry * A, BamBam_MergeHeapEntry * B);
+extern void BamBam_BamCollator_MergeHeapPrint(BamBam_BamCollator * collator);
+extern void BamBam_BamCollator_MergeHeapCheck(BamBam_BamCollator * collator);
+extern BamBam_MergeHeapEntry * BamBam_BamCollator_MergeHeapMinimum(BamBam_BamCollator * collator) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern void BamBam_BamCollator_MergeHeapDeleteMinimum(BamBam_BamCollator * collator);
+extern void BamBam_BamCollator_MergeHeapInsert(BamBam_BamCollator * collator, BamBam_BamCollationHashEntry * hashentry, unsigned int fileid);
+extern BamBam_BamCollationHashEntry * BamBam_BamCollator_GetNextRead(BamBam_BamCollator * collator) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern BamBam_BamCollator * BamBam_BamCollator_Delete(BamBam_BamCollator * collator);
+extern BamBam_BamCollator * BamBam_BamCollator_New(
+ char const * tempdirname,
+ unsigned int const hashsizelog,
+ unsigned int const vectorsizelog,
+ char const * inputformat,
+ char const * inputfilename
+) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern int BamBam_BamCollator_Get(
+ BamBam_BamCollator * collator,
+ BamBam_BamCollationHashEntry ** entryA,
+ BamBam_BamCollationHashEntry ** entryB) BAMBAM_WARN_IF_UNUSEDRESULT;
+#endif
diff --git a/src/bambamc/BamBam_BamCollatorInterface.c b/src/bambamc/BamBam_BamCollatorInterface.c
new file mode 100644
index 0000000..47a0d7d
--- /dev/null
+++ b/src/bambamc/BamBam_BamCollatorInterface.c
@@ -0,0 +1,228 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#include <bambamc/BamBam_BamCollatorInterface.h>
+
+#include <bambamc/BamBam_List.h>
+BamBam_List collatorList;
+
+#include <bambamc/BamBam_BamCollator.h>
+
+int BamBam_AllocBamCollator(char const * filename, char const * filetype, char const * tempdirname, int const keepOrphans)
+{
+ BamBam_BamCollator * collator = BamBam_BamCollator_New(tempdirname,16,16,filetype,filename);
+ unsigned int id = -1;
+
+ if ( ! collator )
+ return id;
+
+ collator->keepOrphans = keepOrphans;
+
+ BamBam_ListNode * node = BamBam_ListNode_New();
+
+ if ( ! node )
+ {
+ BamBam_BamCollator_Delete(collator);
+ return id;
+ }
+
+ node->entry = collator;
+
+ id = BamBam_List_Size(&collatorList);
+
+ BamBam_ListNode_PushBack(&collatorList,node);
+
+ return id;
+}
+
+#include <assert.h>
+
+BamBam_BamCollator * BamBam_GetBamCollator(int id)
+{
+ BamBam_ListNode * node;
+
+ if ( id < 0 )
+ return 0;
+ if ( id >= (int)(BamBam_List_Size(&collatorList)) )
+ return 0;
+
+ node = collatorList.first;
+
+ while ( id )
+ {
+ assert ( node );
+ node = node->next;
+ --id;
+ }
+
+ if ( ! node )
+ return 0;
+
+ return node->entry;
+}
+
+void BamBam_FreeBamCollator(int const id)
+{
+ if ( id >= 0 && id < (int)(BamBam_List_Size(&collatorList)) )
+ {
+ int tid = id;
+ BamBam_ListNode * node;
+ node = collatorList.first;
+
+ while ( tid )
+ {
+ assert ( node );
+ node = node->next;
+ --tid;
+ }
+
+ if ( node )
+ {
+ BamBam_BamCollator_Delete((BamBam_BamCollator *)(node->entry));
+ node->entry = 0;
+ }
+ }
+}
+
+static void BamBam_ParseFastQ(char const * text, BamBam_FastQRead * reada, char const term)
+{
+
+ /* at line */
+ assert ( *text == '@' );
+ text++;
+ reada->name = text;
+ reada->namelength = 0;
+
+ while ( *text != term )
+ reada->namelength++, text++;
+
+ assert ( *text == term );
+ text++;
+
+ /* sequence line */
+ reada->seqlength = 0;
+ reada->seq = text;
+ while ( *text != term )
+ reada->seqlength++, text++;
+
+ assert ( *text == term );
+ text++;
+
+ /* plus line */
+ while ( *text != term )
+ text++;
+
+ assert ( *text == term );
+ text++;
+
+ /* quality line */
+ reada->qual = text;
+}
+
+#include <bambamc/BamBam_FormatAlignment.h>
+
+int BamBam_ReadPair(int const id, BamBam_FastQRead * const reada, BamBam_FastQRead * const readb, void ** aligna, void ** alignb, char const term)
+{
+ BamBam_BamCollator * collator = BamBam_GetBamCollator(id);
+ int getret = 0;
+ int ret = BAMBAM_ALIGNMENT_TYPE_UNDEFINED;
+
+ if ( !collator )
+ return ret;
+
+ while ( ret == BAMBAM_ALIGNMENT_TYPE_UNDEFINED )
+ {
+ if ( aligna )
+ *aligna = 0;
+ if ( alignb )
+ *alignb = 0;
+ BamBam_BamCollationHashEntry_Delete(collator->entryA); collator->entryA = 0;
+ BamBam_BamCollationHashEntry_Delete(collator->entryB); collator->entryB = 0;
+
+ getret = BamBam_BamCollator_Get(collator, &(collator->entryA), &(collator->entryB));
+
+ /* a pair */
+ if ( getret == 2 )
+ {
+ if ( aligna )
+ *aligna = collator->entryA->entry;
+ if ( alignb )
+ *alignb = collator->entryB->entry;
+ ret = BAMBAM_ALIGNMENT_TYPE_COMPLETE_PAIR;
+ }
+ /* single end */
+ else if (
+ getret == 1 && collator->entryA &&
+ (!((BamBam_BamCollationHashEntry_GetFlags(collator->entryA)) & BAMBAMC_FPAIRED))
+ )
+ {
+ if ( aligna )
+ *aligna = collator->entryA->entry;
+ ret = BAMBAM_ALIGNMENT_TYPE_SINGLE;
+ }
+ /* orphan, kept */
+ else if ( getret == 1 && collator->keepOrphans )
+ {
+ if ( collator->entryA )
+ {
+ if ( aligna )
+ *aligna = collator->entryA->entry;
+ ret = BAMBAM_ALIGNMENT_TYPE_ORPHAN1_PAIR;
+ }
+ else
+ {
+ *alignb = collator->entryB->entry;
+ ret = BAMBAM_ALIGNMENT_TYPE_ORPHAN2_PAIR;
+ }
+ }
+ /* orphan, not kept */
+ else if ( getret == 1 && ! collator->keepOrphans )
+ {
+ BamBam_BamCollationHashEntry_Delete(collator->entryA); collator->entryA = 0;
+ BamBam_BamCollationHashEntry_Delete(collator->entryB); collator->entryB = 0;
+ }
+ /* nothing */
+ else
+ {
+ assert ( getret == 0 );
+ ret = BAMBAM_ALIGNMENT_TYPE_NONE;
+ }
+
+ if ( aligna && *aligna )
+ {
+ int const lena = BamBam_PutAlignmentFastQBuffer(*aligna,&(collator->bufferA),&(collator->bufferALength),term);
+
+ if ( lena < 0 )
+ ret = BAMBAM_ALIGNMENT_TYPE_NONE;
+ else
+ BamBam_ParseFastQ(collator->bufferA,reada,term);
+ }
+ if ( alignb && *alignb )
+ {
+ int const lenb = BamBam_PutAlignmentFastQBuffer(*alignb,&(collator->bufferB),&(collator->bufferBLength),term);
+
+ if ( lenb < 0 )
+ ret = BAMBAM_ALIGNMENT_TYPE_NONE;
+ else
+ BamBam_ParseFastQ(collator->bufferB,readb,term);
+ }
+ }
+
+ return ret;
+}
diff --git a/src/bambamc/BamBam_BamCollatorInterface.h b/src/bambamc/BamBam_BamCollatorInterface.h
new file mode 100644
index 0000000..02e49e9
--- /dev/null
+++ b/src/bambamc/BamBam_BamCollatorInterface.h
@@ -0,0 +1,29 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#if ! defined(BAMBAM_BAMCOLLATORINTERFACE_H)
+#define BAMBAM_BAMCOLLATORINTERFACE_H
+
+#include <bambamc/BamBam_Unused.h>
+#include <bambamc/BamBam_FastQRead.h>
+
+extern int BamBam_AllocBamCollator(char const * filename, char const * filetype, char const * tempdirname, int const keepOrphans) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern void BamBam_FreeBamCollator(int const id);
+extern int BamBam_ReadPair(int const id, BamBam_FastQRead * const reada, BamBam_FastQRead * const readb, void ** aligna, void ** alignb, char const term) BAMBAM_WARN_IF_UNUSEDRESULT;
+#endif
diff --git a/src/bambamc/BamBam_BamFileDecoder.c b/src/bambamc/BamBam_BamFileDecoder.c
new file mode 100644
index 0000000..aa61b76
--- /dev/null
+++ b/src/bambamc/BamBam_BamFileDecoder.c
@@ -0,0 +1,80 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#include <bambamc/BamBam_BamFileDecoder.h>
+
+BamBam_BamSingleAlignment * BamBam_BamFileDecoder_DecodeAlignment(BamBam_BamFileDecoder * object)
+{
+ int const r = BamBam_BamSingleAlignment_LoadAlignment(object->alignment,object->reader);
+
+ if ( r > 0 )
+ return object->alignment;
+ else
+ return 0;
+}
+
+BamBam_BamFileDecoder * BamBam_BamFileDecoder_Delete(BamBam_BamFileDecoder * object)
+{
+ if ( object )
+ {
+ if ( object->alignment )
+ {
+ BamBam_BamSingleAlignment_Delete(object->alignment);
+ object->alignment = 0;
+ }
+ if ( object->reader )
+ {
+ BamBam_GzipReader_Delete(object->reader);
+ object->reader = 0;
+ }
+ if ( object->header )
+ {
+ BamBam_BamFileHeader_Delete(object->header);
+ object->header = 0;
+ }
+ free(object);
+ }
+ return 0;
+}
+
+BamBam_BamFileDecoder * BamBam_BamFileDecoder_New(char const * filename)
+{
+ BamBam_BamFileDecoder * object = 0;
+
+ object = (BamBam_BamFileDecoder *)malloc(sizeof(BamBam_BamFileDecoder));
+
+ if ( ! object )
+ return BamBam_BamFileDecoder_Delete(object);
+
+ object->reader = BamBam_GzipReader_New(filename);
+
+ if ( ! object->reader )
+ return BamBam_BamFileDecoder_Delete(object);
+
+ object->header = BamBam_BamFileHeader_New_BAM(object->reader);
+
+ if ( ! object->header )
+ return BamBam_BamFileDecoder_Delete(object);
+
+ object->alignment = BamBam_BamSingleAlignment_New();
+
+ if ( ! object->alignment )
+ return BamBam_BamFileDecoder_Delete(object);
+
+ return object;
+}
diff --git a/src/bambamc/BamBam_BamFileDecoder.h b/src/bambamc/BamBam_BamFileDecoder.h
new file mode 100644
index 0000000..3ce2b72
--- /dev/null
+++ b/src/bambamc/BamBam_BamFileDecoder.h
@@ -0,0 +1,35 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#if ! defined(BAMBAM_BAMFILEDECODER_H)
+#define BAMBAM_BAMFILEDECODER_H
+
+#include <bambamc/BamBam_BamSingleAlignment.h>
+#include <bambamc/BamBam_BamFlagBase.h>
+
+typedef struct _BamBam_BamFileDecoder
+{
+ BamBam_GzipReader * reader;
+ BamBam_BamFileHeader * header;
+ BamBam_BamSingleAlignment * alignment;
+} BamBam_BamFileDecoder;
+
+extern BamBam_BamSingleAlignment * BamBam_BamFileDecoder_DecodeAlignment(BamBam_BamFileDecoder * object);
+extern BamBam_BamFileDecoder * BamBam_BamFileDecoder_Delete(BamBam_BamFileDecoder * object);
+extern BamBam_BamFileDecoder * BamBam_BamFileDecoder_New(char const * filename);
+#endif
diff --git a/src/bambamc/BamBam_BamFileHeader.c b/src/bambamc/BamBam_BamFileHeader.c
new file mode 100644
index 0000000..d9b67f3
--- /dev/null
+++ b/src/bambamc/BamBam_BamFileHeader.c
@@ -0,0 +1,981 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#include <bambamc/BamBam_BamFileHeader.h>
+#include <bambamc/BamBam_LineParsing.h>
+#include <bambamc/BamBam_StrDup.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <ctype.h>
+
+static int chromosomeCompareNames(uint8_t const * unamea, uint8_t const * unameb)
+{
+ while ( *unamea && *unameb )
+ {
+ if ( isdigit(*unamea) && isdigit(*unameb) )
+ {
+ int numa = 0, numb = 0;
+
+ while ( *unamea && isdigit(*unamea) )
+ {
+ numa *= 10;
+ numa += (*unamea-'0');
+ unamea++;
+ }
+ while ( *unameb && isdigit(*unameb) )
+ {
+ numb *= 10;
+ numb += (*unameb-'0');
+ unameb++;
+ }
+
+ if ( numa != numb )
+ return numa-numb;
+ }
+ else if ( *unamea != *unameb )
+ {
+ return (int)(*unamea) - (int)(*unameb);
+ }
+ else
+ {
+ unamea++, unameb++;
+ }
+ }
+
+ return (int)(*unamea) - (int)(*unameb);
+}
+
+static int chromosomeNameComparison(void const * va, void const * vb)
+{
+ BamBam_Chromosome * a = *((BamBam_Chromosome**)(va));
+ BamBam_Chromosome * b = *((BamBam_Chromosome**)(vb));
+ char const * namea = a->name;
+ char const * nameb = b->name;
+ uint8_t const * unamea = (uint8_t const *)(namea);
+ uint8_t const * unameb = (uint8_t const *)(nameb);
+
+ return chromosomeCompareNames(unamea,unameb);
+}
+
+
+BamBam_BamFileHeader * BamBam_BamFileHeader_Delete(BamBam_BamFileHeader * object)
+{
+ if ( object )
+ {
+ if ( object->headertext )
+ {
+ free(object->headertext);
+ object->headertext = 0;
+ }
+ if ( object->sortorder )
+ {
+ free(object->sortorder);
+ object->sortorder = 0;
+ }
+ if ( object->version )
+ {
+ free(object->version);
+ object->version = 0;
+ }
+ if ( object->headerlines )
+ {
+ char ** h = object->headerlines;
+
+ for ( ; *h ; ++h )
+ free(*h);
+
+ free (object->headerlines);
+ object->headerlines = 0;
+ }
+ if ( object->chromosomevec )
+ {
+ free(object->chromosomevec);
+ object->chromosomevec = 0;
+ }
+ if ( object->sortedchromosomevec )
+ {
+ free(object->sortedchromosomevec);
+ object->sortedchromosomevec = 0;
+ }
+ if ( object->chromosomes )
+ {
+ BamBam_List_Delete(object->chromosomes);
+ }
+ if ( object->text )
+ {
+ free(object->text);
+ object->text = 0;
+ }
+ free(object);
+ }
+
+ return 0;
+}
+
+static void chromosomeDelete(void * chromosome)
+{
+ BamBam_Chromosome_Delete((BamBam_Chromosome *)chromosome);
+}
+
+static BamBam_BamFileHeader * parseHeaderText(BamBam_BamFileHeader * header)
+{
+ int headerlines = 0;
+ char const * c = header->text;
+ unsigned int i;
+ char ** hc = 0;
+
+ while ( *c )
+ {
+ headerlines++;
+ c = BamBam_nextLine(c);
+ }
+
+ header->headerlines = (char **)malloc((headerlines+1) * sizeof(char const *));
+
+ if ( ! header->headerlines )
+ {
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ for ( i = 0; (int)i < headerlines; ++i )
+ header->headerlines[i] = 0;
+ header->headerlines[headerlines] = 0;
+
+ headerlines = 0;
+ c = header->text;
+
+ while ( *c )
+ {
+ int const len = BamBam_getLineLength(c);
+
+ header->headerlines[headerlines] = malloc(len+1);
+
+ if ( ! header->headerlines[headerlines] )
+ return BamBam_BamFileHeader_Delete(header);
+
+ header->headerlines[headerlines][len] = 0;
+ memcpy(header->headerlines[headerlines],c,len);
+
+ headerlines++;
+ c = BamBam_nextLine(c);
+ }
+
+ for ( hc = header->headerlines; *hc; ++hc )
+ {
+ if ( strlen(*hc) >= 4 && !strncmp("@HD\t",*hc,4) )
+ header->hdline = *hc;
+ }
+
+ if ( header->hdline )
+ {
+ for ( c = header->hdline; *c; ++c )
+ if (
+ c[0] == '\t' &&
+ c[1] == 'S' &&
+ c[2] == 'O' &&
+ c[3] == ':' )
+ {
+ char const * d = c+4;
+ char const * e = d;
+
+ while ( *e != 0 && *e != '\t' )
+ ++e;
+
+ if ( e-d )
+ {
+ header->sortorder = (char *)malloc((e-d)+1);
+ if ( ! header->sortorder )
+ {
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ header->sortorder[e-d] = 0;
+ memcpy(header->sortorder,d,e-d);
+ }
+ }
+ else if (
+ c[0] == '\t' &&
+ c[1] == 'V' &&
+ c[2] == 'N' &&
+ c[3] == ':' )
+ {
+ char const * d = c+4;
+ char const * e = d;
+
+ while ( *e != 0 && *e != '\t' )
+ ++e;
+
+ if ( e-d )
+ {
+ header->version = (char *)malloc((e-d)+1);
+ if ( ! header->version )
+ {
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ header->version[e-d] = 0;
+ memcpy(header->version,d,e-d);
+ }
+ }
+ }
+
+ if ( ! header->version )
+ {
+ header->version = BamBam_StrDup("1.4");
+ if ( ! header->version )
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ if ( ! header->sortorder )
+ {
+ header->sortorder = BamBam_StrDup("unknown");
+ if ( ! header->sortorder )
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ return header;
+}
+
+static int findChromosomeByName(
+ BamBam_Chromosome const ** sc, unsigned int const numchr,
+ char const * name
+)
+{
+ int left = 0;
+ int right = numchr;
+
+ while ( right > left )
+ {
+ unsigned int const mid = left + (right-left)/2;
+ int const r = chromosomeCompareNames((uint8_t const *)name,(uint8_t const *)(sc[mid]->name));
+
+ if ( r == 0 )
+ return mid;
+ else if ( r < 0 )
+ right = mid;
+ else
+ left = mid+1;
+ }
+
+ return -1;
+}
+
+int BamBam_BamFileHeader_FindChromosomeIdByName(BamBam_BamFileHeader const * header, char const * name)
+{
+ return findChromosomeByName((BamBam_Chromosome const **)header->sortedchromosomevec,header->n_ref,name);
+}
+
+BamBam_Chromosome const * BamBam_BamFileHeader_FindChromosomeByName(BamBam_BamFileHeader const * header, char const * name)
+{
+ int const r = BamBam_BamFileHeader_FindChromosomeIdByName(header,name);
+
+ if ( r < 0 )
+ return 0;
+ else
+ return header->sortedchromosomevec[r];
+}
+
+BamBam_BamFileHeader * BamBam_BamFileHeader_New_SAM(FILE * reader)
+{
+ BamBam_BamFileHeader * header = 0;
+ BamBam_CharBuffer * buffer = 0;
+ int headerTextComplete = 0;
+ int r = 0;
+ char ** hc = 0;
+ BamBam_ListNode * node = 0;
+ size_t i;
+
+ header = (BamBam_BamFileHeader *)malloc(sizeof(BamBam_BamFileHeader));
+
+ if ( ! header )
+ return BamBam_BamFileHeader_Delete(header);
+
+ memset(header,0,sizeof(BamBam_BamFileHeader));
+
+ buffer = BamBam_CharBuffer_New();
+
+ if ( ! buffer )
+ return BamBam_BamFileHeader_Delete(header);
+
+ while ( !headerTextComplete )
+ {
+ int c = -1;
+
+ c = getc(reader);
+
+ if ( c < 0 || c != '@' )
+ {
+ headerTextComplete = 1;
+ if ( c >= 0 )
+ ungetc(c,reader);
+ }
+ else
+ {
+ while ( c >= 0 && c != '\n' )
+ {
+ BamBam_CharBuffer_PushCharQuick(buffer,c,r);
+
+ if ( r < 0 )
+ {
+ BamBam_CharBuffer_Delete(buffer);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ c = getc(reader);
+ }
+
+ if ( c < 0 )
+ {
+ BamBam_CharBuffer_Delete(buffer);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ BamBam_CharBuffer_PushCharQuick(buffer,'\n',r);
+
+ if ( r < 0 )
+ {
+ BamBam_CharBuffer_Delete(buffer);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ }
+ }
+
+ BamBam_CharBuffer_PushCharQuick(buffer,0,r);
+
+ if ( r < 0 )
+ {
+ BamBam_CharBuffer_Delete(buffer);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ /* fprintf(stderr,"::HEADER::\n%s::ENDHEADER::\n",(char const *)buffer->buffer); */
+
+ header->text = (char *)buffer->buffer;
+ buffer->buffer = 0;
+ header->l_text = buffer->bufferfill;
+
+ BamBam_CharBuffer_Delete(buffer);
+
+ header = parseHeaderText(header);
+
+ if (! header )
+ return BamBam_BamFileHeader_Delete(header);
+
+ header->chromosomes = BamBam_List_New();
+
+ if ( !header->chromosomes )
+ return BamBam_BamFileHeader_Delete(header);
+
+ for ( hc = header->headerlines; *hc; ++hc )
+ {
+ if ( strlen(*hc) >= 4 && !strncmp("@SQ\t",*hc,4) )
+ {
+ BamBam_Chromosome * chr = 0;
+ BamBam_ListNode * node = 0;
+ char const * c = *hc;
+ c += 3;
+
+ char * sn = 0;
+ int32_t ln = -1;
+
+ while ( *c )
+ {
+ while ( *c && *c == '\t' )
+ ++c;
+ if ( *c )
+ {
+ char const * ce = c;
+
+ while ( *ce && *ce != '\t' )
+ ++ce;
+
+ if ( ce-c >= 3 && !strncmp(c,"SN:",3) )
+ {
+ sn = malloc((ce-c)-3+1);
+
+ if ( ! sn )
+ {
+ free(sn); sn = 0;
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ sn [ (ce-c)-3 ] = 0;
+ memcpy(sn,c+3,(ce-c)-3);
+ }
+ else if ( ce-c >= 3 && !strncmp(c,"LN:",3) )
+ {
+ char const * p = c+3;
+ ln = 0;
+ while ( p != ce )
+ {
+ ln*=10;
+ ln+=(*(p++))-'0';
+ }
+ }
+
+ c = ce;
+ }
+ }
+
+ /* missing information, broken header */
+ if ( ! sn || ln < 0 )
+ {
+ if ( sn )
+ {
+ free(sn);
+ sn = 0;
+ }
+
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ /* fprintf(stderr,"Seq %s %d\n", sn,ln); */
+
+ chr = BamBam_Chromosome_New(sn,ln);
+
+ free(sn);
+ sn = 0;
+
+ if ( ! chr )
+ {
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ chr->headerline = BamBam_StrDup(*hc);
+
+ if ( ! chr->headerline )
+ {
+ fprintf(stderr,"Failed to allocate memory for sequence meta data in BAM header.\n");
+ BamBam_Chromosome_Delete(chr);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ node = BamBam_ListNode_New();
+
+ if ( ! node )
+ {
+ fprintf(stderr,"Failed to allocate memory for sequence meta data in BAM header.\n");
+ BamBam_Chromosome_Delete(chr);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ node->entry = chr;
+ node->bamBamListFreeFunction = chromosomeDelete;
+
+ BamBam_ListNode_PushBack(header->chromosomes,node);
+ header->n_ref++;
+ }
+ }
+
+ header->chromosomevec = (BamBam_Chromosome **)malloc(header->n_ref * sizeof(BamBam_Chromosome *));
+
+ if ( ! header->chromosomevec )
+ {
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ header->sortedchromosomevec = (BamBam_Chromosome **)malloc(header->n_ref * sizeof(BamBam_Chromosome *));
+
+ if ( ! header->sortedchromosomevec )
+ {
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ i = 0;
+ for ( node = header->chromosomes->first; node; node = node->next )
+ {
+ BamBam_Chromosome * chr = (BamBam_Chromosome *)node->entry;
+ header->sortedchromosomevec[i] = chr;
+ header->chromosomevec[i++] = chr;
+ }
+
+ qsort(header->sortedchromosomevec, header->n_ref, sizeof(BamBam_Chromosome *),chromosomeNameComparison);
+
+ #if 0
+ for ( i = 0; (int)i < (int)header->n_ref; ++i )
+ {
+ BamBam_Chromosome * chr = header->sortedchromosomevec[i];
+ fprintf(stderr,"seq[%d] = %s %d\n", (int)i, chr->name, (int)chr->length);
+ }
+ #endif
+
+ header->headertext = BamBam_StrDup(header->text);
+
+ if ( ! header->headertext )
+ {
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ return header;
+}
+
+BamBam_BamFileHeader * BamBam_BamFileHeader_New_BAM(BamBam_GzipReader * reader)
+{
+ BamBam_BamFileHeader * header = 0;
+ char magic[4];
+ static char const expMagic[4] = {'B','A','M',1};
+ unsigned int i;
+ int l;
+ char ** hc = 0;
+ BamBam_CharBuffer * htextbuf = 0;
+
+ header = (BamBam_BamFileHeader *)malloc(sizeof(BamBam_BamFileHeader));
+
+ if ( ! header )
+ return BamBam_BamFileHeader_Delete(header);
+
+ memset(header,0,sizeof(BamBam_BamFileHeader));
+
+ magic[0] = BamBam_GzipReader_Getc(reader);
+ magic[1] = BamBam_GzipReader_Getc(reader);
+ magic[2] = BamBam_GzipReader_Getc(reader);
+ magic[3] = BamBam_GzipReader_Getc(reader);
+
+ for ( i = 0; i < sizeof(expMagic)/sizeof(expMagic[0]); ++i )
+ if ( magic[i] != expMagic[i] )
+ {
+ fprintf(stderr,"Stream is not a BAM file (magic is wrong).\n");
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ if ( BamBam_GzipReader_GetInt32(reader,&(header->l_text)) )
+ {
+ fprintf(stderr,"Failed to read length of plain text in BAM header.\n");
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ header->text = (char *)malloc(header->l_text);
+
+ if ( header->l_text && (! header->text) )
+ {
+ fprintf(stderr,"Failed to allocate memory for plain text in BAM header.\n");
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ l = BamBam_GzipReader_Read(reader,header->text,header->l_text);
+
+ if ( l != header->l_text )
+ {
+ fprintf(stderr,"Failed to read plain text in BAM header.\n");
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ /* terminate by zero if not already so */
+ if ( (!l) || (header->text[l-1]) )
+ {
+ char * ztext = malloc(l+1);
+
+ if ( ! ztext )
+ {
+ fprintf(stderr,"Failed to allocate memory for plain text in BAM header.\n");
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ memcpy(ztext,header->text,l);
+ ztext[l] = 0;
+ free(header->text);
+ header->text = ztext;
+ header->l_text = l+1;
+ l += 1;
+ }
+
+ assert ( l == header->l_text );
+ assert ( (header->l_text != 0) && (header->text[header->l_text-1] == 0) );
+
+ if ( BamBam_GzipReader_GetInt32(reader,&(header->n_ref)) )
+ {
+ fprintf(stderr,"Failed to read number of references in BAM header.\n");
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ header->chromosomes = BamBam_List_New();
+
+ if ( ! header->chromosomes )
+ {
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ header->chromosomevec = (BamBam_Chromosome **)malloc(header->n_ref * sizeof(BamBam_Chromosome *));
+
+ if ( ! header->chromosomevec )
+ {
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ header->sortedchromosomevec = (BamBam_Chromosome **)malloc(header->n_ref * sizeof(BamBam_Chromosome *));
+
+ if ( ! header->sortedchromosomevec )
+ {
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ for ( i = 0; i < (unsigned int)(header->n_ref); ++i )
+ {
+ int32_t chrnamelen = -1;
+ char * chrname = 0;
+ int32_t chrlen = -1;
+ BamBam_Chromosome * chr = 0;
+ BamBam_ListNode * node = 0;
+
+ if ( BamBam_GzipReader_GetInt32(reader,&(chrnamelen)) )
+ {
+ fprintf(stderr,"Failed to read sequence name length in BAM header.\n");
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ chrname = (char*)malloc(chrnamelen);
+
+ if ( ! chrname )
+ {
+ fprintf(stderr,"Failed to allocate space for chromosome name while reading BAM header.\n");
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ if ( BamBam_GzipReader_Read(reader,chrname,chrnamelen) != chrnamelen )
+ {
+ fprintf(stderr,"Failed to read chromosome name while reading BAM header.\n");
+ free(chrname);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ if ( BamBam_GzipReader_GetInt32(reader,&(chrlen)) )
+ {
+ fprintf(stderr,"Failed to read sequence length in BAM header.\n");
+ free(chrname);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ chr = BamBam_Chromosome_New(chrname,chrlen);
+ free(chrname);
+
+ if ( !chr )
+ {
+ fprintf(stderr,"Failed to allocate memory for sequence meta data in BAM header.\n");
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ node = BamBam_ListNode_New();
+
+ if ( ! node )
+ {
+ fprintf(stderr,"Failed to allocate memory for sequence meta data in BAM header.\n");
+ BamBam_Chromosome_Delete(chr);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ node->entry = chr;
+ node->bamBamListFreeFunction = chromosomeDelete;
+
+ BamBam_ListNode_PushBack(header->chromosomes,node);
+
+ header->chromosomevec[i] = chr;
+ header->sortedchromosomevec[i] = chr;
+ }
+
+ qsort(header->sortedchromosomevec, header->n_ref, sizeof(BamBam_Chromosome *),chromosomeNameComparison);
+
+ /* check binary search */
+ for ( i = 0; i < (unsigned int)(header->n_ref); ++i )
+ {
+ int r = -1;
+
+ #if 0
+ fprintf(stderr,"%s\t%llu\n", header->sortedchromosomevec[i]->name, (unsigned long long)header->sortedchromosomevec[i]->length);
+ #endif
+
+ r = findChromosomeByName((BamBam_Chromosome const **)header->sortedchromosomevec,header->n_ref,header->sortedchromosomevec[i]->name);
+ assert ( r == (int)i );
+
+ /* fprintf(stderr,"%d == %d\n", r, i); */
+ }
+
+ header = parseHeaderText(header);
+
+ if ( ! header )
+ return BamBam_BamFileHeader_Delete(header);
+
+ htextbuf = BamBam_CharBuffer_New();
+
+ if ( ! htextbuf )
+ return BamBam_BamFileHeader_Delete(header);
+
+ if ( header->hdline )
+ {
+ int r = 0;
+ BamBam_CharBuffer * buffer = htextbuf;
+
+ BamBam_CharBuffer_PushString(buffer,header->hdline,r);
+ if ( r < 0 )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ BamBam_CharBuffer_PushString(buffer,"\n",r);
+ if ( r < 0 )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ }
+ else
+ {
+ int r = 0;
+ BamBam_CharBuffer * buffer = htextbuf;
+
+ BamBam_CharBuffer_PushString(buffer,"@HD\tVN:",r);
+ if ( r < 0 )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ BamBam_CharBuffer_PushString(buffer,header->version,r);
+ if ( r < 0 )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ BamBam_CharBuffer_PushString(buffer,"\tSO:",r);
+ if ( r < 0 )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ BamBam_CharBuffer_PushString(buffer,header->sortorder,r);
+ if ( r < 0 )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ BamBam_CharBuffer_PushString(buffer,"\n",r);
+ if ( r < 0 )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ }
+
+ #if 0
+ for ( i = 0; i < (unsigned int)(header->n_ref); ++i )
+ {
+ BamBam_Chromosome const * chr = header->chromosomevec[i];
+ int r = 0;
+ BamBam_CharBuffer * buffer = htextbuf;
+
+ BamBam_CharBuffer_PushString(buffer,"@SQ\tSN:",r);
+ if ( r < 0 )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ BamBam_CharBuffer_PushString(buffer,chr->name,r);
+ if ( r < 0 )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ BamBam_CharBuffer_PushString(buffer,"\tLN:",r);
+ if ( r < 0 )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ BamBam_CharBuffer_PushNumber(buffer,chr->length,r);
+ if ( r < 0 )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ BamBam_CharBuffer_PushString(buffer,"\n",r);
+ if ( r < 0 )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ }
+ #endif
+
+ /* copy rest of the lines without SQ and HD lines */
+ for ( hc = header->headerlines; *hc; ++hc )
+ {
+ /* fprintf(stderr, "Checking %s\n", *hc); */
+
+ if (
+ strlen(*hc) >= 4
+ && strncmp("@HD\t",*hc,4)
+ && strncmp("@SQ\t",*hc,4)
+ )
+ {
+ int r = 0;
+ BamBam_CharBuffer * buffer = htextbuf;
+
+ BamBam_CharBuffer_PushString(buffer,*hc,r);
+ if ( r < 0 )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ BamBam_CharBuffer_PushString(buffer,"\n",r);
+ if ( r < 0 )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ }
+ else if ( strlen(*hc) >= 4 && strncmp("@SQ\t",*hc,4) == 0 )
+ {
+ char const * line = *hc;
+
+ /* fprintf(stderr,"Here: %s\n", line); */
+
+ while ( *line )
+ {
+ char const * field = line;
+ char const * fielde = field;
+
+ while ( (*fielde) && (*fielde != '\t') )
+ ++fielde;
+
+ /* fprintf(stderr, "field length %d\n", (fielde-field)); */
+
+ if ( fielde-field >= 3 && field[0] == 'S' && field[1] == 'N' && field[2] == ':' )
+ {
+ char const * name = field + 3;
+ char const * namee = fielde;
+ char * cname = (char *)malloc(namee-name+1);
+ int r = -1, q = 0;
+ BamBam_CharBuffer * buffer = htextbuf;
+
+ if ( ! cname )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ memcpy(cname,name,namee-name);
+ cname[namee-name] = 0;
+
+ r = findChromosomeByName((BamBam_Chromosome const **)header->sortedchromosomevec,header->n_ref,cname);
+
+ if ( r < 0 )
+ {
+ fprintf(stderr,"Chromosome %s in text index is not in binary header\n",cname);
+ free(cname);
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ assert ( strcmp(header->sortedchromosomevec[r]->name,cname) == 0 );
+
+ free(cname);
+ cname = 0;
+
+ header->sortedchromosomevec[r]->headerline = BamBam_StrDup(*hc);
+
+ if ( ! header->sortedchromosomevec[r]->headerline )
+ {
+ fprintf(stderr,"Unable to allocate space for header line\n");
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ BamBam_CharBuffer_PushString(buffer,*hc,q);
+ if ( q < 0 )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ BamBam_CharBuffer_PushString(buffer,"\n",q);
+ if ( q < 0 )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ }
+
+ line = fielde;
+ if ( *line )
+ {
+ assert ( *line == '\t' );
+ ++line;
+ }
+ }
+ }
+ }
+
+ for ( i = 0; i < (unsigned int)(header->n_ref); ++i )
+ if ( ! header->chromosomevec[i]->headerline )
+ {
+ /* fprintf(stderr,"Sequence %s is in binary header but not in text\n", header->chromosomevec[i]->name); */
+
+ BamBam_Chromosome const * chr = header->chromosomevec[i];
+ int r = 0;
+ BamBam_CharBuffer * buffer = htextbuf;
+
+ BamBam_CharBuffer_PushString(buffer,"@SQ\tSN:",r);
+ if ( r < 0 )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ BamBam_CharBuffer_PushString(buffer,chr->name,r);
+ if ( r < 0 )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ BamBam_CharBuffer_PushString(buffer,"\tLN:",r);
+ if ( r < 0 )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ BamBam_CharBuffer_PushNumber(buffer,chr->length,r);
+ if ( r < 0 )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ BamBam_CharBuffer_PushString(buffer,"\n",r);
+ if ( r < 0 )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+ }
+
+ /* push terminator symbol */
+ if ( BamBam_CharBuffer_PushChar(htextbuf,0) < 0 )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ header->headertext = BamBam_StrDup((char const *)(htextbuf->buffer));
+
+ if ( ! header->headertext )
+ {
+ BamBam_CharBuffer_Delete(htextbuf);
+ return BamBam_BamFileHeader_Delete(header);
+ }
+
+ BamBam_CharBuffer_Delete(htextbuf);
+
+ /* fprintf(stderr,"%s",header->headertext); */
+
+ return header;
+}
diff --git a/src/bambamc/BamBam_BamFileHeader.h b/src/bambamc/BamBam_BamFileHeader.h
new file mode 100644
index 0000000..fa81b43
--- /dev/null
+++ b/src/bambamc/BamBam_BamFileHeader.h
@@ -0,0 +1,58 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#if ! defined(BAMBAM_BAMFILEHEADER_H)
+#define BAMBAM_BAMFILEHEADER_H
+
+#include <stdint.h>
+#include <bambamc/BamBam_List.h>
+#include <bambamc/BamBam_Chromosome.h>
+#include <bambamc/BamBam_GzipReader.h>
+
+typedef struct _BamBam_BamFileHeader
+{
+ /* length of orignal text */
+ int32_t l_text;
+ /* original text */
+ char * text;
+ /* number of chromosomes/references */
+ int32_t n_ref;
+ /* linked list of chromosomes */
+ BamBam_List * chromosomes;
+ /* chromosomes in numerical id order */
+ BamBam_Chromosome ** chromosomevec;
+ /* chromosomes in name sorted order */
+ BamBam_Chromosome ** sortedchromosomevec;
+ /* single header lines */
+ char ** headerlines;
+ /* HD header line */
+ char const * hdline;
+ /* sort order (coordinate or queryname) */
+ char * sortorder;
+ /* bam file version */
+ char * version;
+ /* processed header text */
+ char * headertext;
+} BamBam_BamFileHeader;
+
+extern BamBam_BamFileHeader * BamBam_BamFileHeader_New_SAM(FILE * reader);
+extern BamBam_BamFileHeader * BamBam_BamFileHeader_New_BAM(BamBam_GzipReader * reader);
+extern BamBam_BamFileHeader * BamBam_BamFileHeader_Delete(BamBam_BamFileHeader * object);
+extern BamBam_Chromosome const * BamBam_BamFileHeader_FindChromosomeByName(BamBam_BamFileHeader const * header, char const * name);
+extern int BamBam_BamFileHeader_FindChromosomeIdByName(BamBam_BamFileHeader const * header, char const * name);
+#endif
diff --git a/src/bambamc/BamBam_BamFlagBase.h b/src/bambamc/BamBam_BamFlagBase.h
new file mode 100644
index 0000000..750487d
--- /dev/null
+++ b/src/bambamc/BamBam_BamFlagBase.h
@@ -0,0 +1,50 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#if ! defined(BAMBAM_BAMFLAGS)
+#define BAMBAM_BAMFLAGS
+
+enum bam_flags
+{
+ BAMBAMC_FPAIRED = (1u << 0),
+ BAMBAMC_FPROPER_PAIR = (1u << 1),
+ BAMBAMC_FUNMAP = (1u << 2),
+ BAMBAMC_FMUNMAP = (1u << 3),
+ BAMBAMC_FREVERSE = (1u << 4),
+ BAMBAMC_FMREVERSE = (1u << 5),
+ BAMBAMC_FREAD1 = (1u << 6),
+ BAMBAMC_FREAD2 = (1u << 7),
+ BAMBAMC_FSECONDARY = (1u << 8),
+ BAMBAMC_FQCFAIL = (1u << 9),
+ BAMBAMC_FDUP = (1u << 10)
+};
+
+enum bambam_cigar_operators
+{
+ BAMBAMC_CMATCH = 0,
+ BAMBAMC_CINS = 1,
+ BAMBAMC_CDEL = 2,
+ BAMBAMC_CREF_SKIP = 3,
+ BAMBAMC_CSOFT_CLIP = 4,
+ BAMBAMC_CHARD_CLIP = 5,
+ BAMBAMC_CPAD = 6,
+ BAMBAMC_CEQUAL = 7,
+ BAMBAMC_CDIFF = 8
+};
+#endif
+
diff --git a/src/bambamc/BamBam_BamHeaderInfo.c b/src/bambamc/BamBam_BamHeaderInfo.c
new file mode 100644
index 0000000..a3512d0
--- /dev/null
+++ b/src/bambamc/BamBam_BamHeaderInfo.c
@@ -0,0 +1,181 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#include <bambamc/BamBam_BamHeaderInfo.h>
+#include <bambamc/BamBam_StrDup.h>
+#include <string.h>
+#include <assert.h>
+
+int BamBam_BamHeaderInfo_ProduceHeaderText(BamBam_BamHeaderInfo * info)
+{
+ int ret = 0;
+ BamBam_ListNode * node = 0;
+ BamBam_CharBuffer * buffer = info->cb;
+
+ BamBam_CharBuffer_Reset(buffer);
+
+ BamBam_CharBuffer_PushString(buffer,"@HD\tVN:",ret); if ( ret < 0 ) return -1;
+ BamBam_CharBuffer_PushString(buffer,info->version,ret); if ( ret < 0 ) return -1;
+ BamBam_CharBuffer_PushString(buffer,"\tSO:",ret); if ( ret < 0 ) return -1;
+ BamBam_CharBuffer_PushString(buffer,info->sortorder,ret); if ( ret < 0 ) return -1;
+ BamBam_CharBuffer_PushString(buffer,"\n",ret); if ( ret < 0 ) return -1;
+
+ node = info->chrlist->first;
+ while ( ret >= 0 && node )
+ {
+ BamBam_Chromosome const * chr = (BamBam_Chromosome const *)(node->entry);
+
+ BamBam_CharBuffer_PushString(buffer,"@SQ\tSN:",ret); if ( ret < 0 ) return -1;
+ BamBam_CharBuffer_PushString(buffer,chr->name,ret); if ( ret < 0 ) return -1;
+ BamBam_CharBuffer_PushString(buffer,"\tLN:",ret); if ( ret < 0 ) return -1;
+ BamBam_CharBuffer_PushNumber(buffer,chr->length,ret); if ( ret < 0 ) return -1;
+ BamBam_CharBuffer_PushString(buffer,"\n",ret); if ( ret < 0 ) return -1;
+
+ node = node->next;
+ }
+
+ if ( info->plaintext )
+ BamBam_CharBuffer_PushString(buffer,info->plaintext,ret); if ( ret < 0 ) return -1;
+
+ return ret;
+}
+
+BamBam_BamHeaderInfo * BamBam_BamHeaderInfo_Delete(BamBam_BamHeaderInfo * info)
+{
+ if ( info )
+ {
+ free(info->plaintext);
+ free(info->version);
+ free(info->sortorder);
+ BamBam_List_Delete(info->chrlist);
+ BamBam_CharBuffer_Delete(info->cb);
+ free(info);
+ }
+
+ return 0;
+}
+
+BamBam_BamHeaderInfo * BamBam_BamHeaderInfo_New(char const * version, char const * sortorder, char const * plaintext)
+{
+ BamBam_BamHeaderInfo * info = 0;
+
+ info = (BamBam_BamHeaderInfo *)malloc(sizeof(BamBam_BamHeaderInfo));
+
+ if ( ! info )
+ return BamBam_BamHeaderInfo_Delete(info);
+
+ memset(info,0,sizeof(BamBam_BamHeaderInfo));
+
+ info->sortorder = BamBam_StrDup(sortorder);
+
+ if ( ! info->sortorder )
+ return BamBam_BamHeaderInfo_Delete(info);
+
+ info->version = BamBam_StrDup(version);
+
+ if ( ! info->version )
+ return BamBam_BamHeaderInfo_Delete(info);
+
+ if ( plaintext )
+ {
+ info->plaintext = BamBam_StrDup(plaintext);
+
+ if ( ! info->plaintext )
+ return BamBam_BamHeaderInfo_Delete(info);
+ }
+ else
+ {
+ info->plaintext = 0;
+ }
+
+ info->cb = BamBam_CharBuffer_New();
+
+ if ( ! info->cb )
+ return BamBam_BamHeaderInfo_Delete(info);
+
+ info->chrlist = BamBam_List_New();
+
+ if ( ! info->chrlist )
+ return BamBam_BamHeaderInfo_Delete(info);
+
+ return info;
+}
+
+static void BamBam_Chromosome_NodeDelete(void * node)
+{
+ if ( node )
+ {
+ BamBam_Chromosome_Delete((BamBam_Chromosome *)node);
+ }
+}
+
+int BamBam_BamHeaderInfo_AddChromosome(BamBam_BamHeaderInfo * info, char const * name, uint64_t len)
+{
+ BamBam_Chromosome * chr = BamBam_Chromosome_New(name,len);
+
+ if ( ! chr )
+ return -1;
+
+ BamBam_ListNode * node = BamBam_ListNode_New();
+
+ if ( ! node )
+ {
+ BamBam_Chromosome_Delete(chr);
+ return -1;
+ }
+
+ node->entry = chr;
+ node->bamBamListFreeFunction = BamBam_Chromosome_NodeDelete;
+ node->bamBamListPrintFunction = 0;
+
+ BamBam_ListNode_PushBack(info->chrlist,node);
+
+ return 0;
+}
+
+int BamBam_BamHeaderInfo_WriteBamHeader(BamBam_BamHeaderInfo * info, BamBam_BgzfCompressor * writer)
+{
+ static char const magic[4] = { 'B', 'A', 'M', 1 };
+ int32_t numseq = BamBam_List_Size(info->chrlist);
+ BamBam_ListNode const * node = 0;
+
+ if ( BamBam_BamHeaderInfo_ProduceHeaderText(info) < 0 )
+ return -1;
+ if ( BamBam_BgzfCompressor_Write(writer,(uint8_t const *)&magic[0],4) < 0 )
+ return -1;
+ if ( BamBam_BgzfCompressor_PutInt32(writer,info->cb->bufferfill) < 0 )
+ return -1;
+ if ( BamBam_BgzfCompressor_Write(writer,info->cb->buffer,info->cb->bufferfill) < 0 )
+ return -1;
+ if ( BamBam_BgzfCompressor_PutInt32(writer,numseq) < 0 )
+ return -1;
+
+ for ( node = info->chrlist->first; node; node = node->next )
+ {
+ BamBam_Chromosome const * chr = (BamBam_Chromosome *)node->entry;
+ assert ( chr );
+
+ if ( BamBam_BgzfCompressor_PutLenStringZ(writer,chr->name) < 0 )
+ return -1;
+ if ( BamBam_BgzfCompressor_PutInt32(writer,chr->length) < 0 )
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/src/bambamc/BamBam_BamHeaderInfo.h b/src/bambamc/BamBam_BamHeaderInfo.h
new file mode 100644
index 0000000..3a2f1c3
--- /dev/null
+++ b/src/bambamc/BamBam_BamHeaderInfo.h
@@ -0,0 +1,42 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#if ! defined(BAMBAM_BAMHEADERINFO_H)
+#define BAMBAM_BAMHEADERINFO_H
+
+#include <bambamc/BamBam_List.h>
+#include <bambamc/BamBam_CharBuffer.h>
+#include <bambamc/BamBam_Chromosome.h>
+#include <bambamc/BamBam_BgzfCompressor.h>
+
+typedef struct _BamBam_BamHeaderInfo
+{
+ char * version;
+ char * sortorder;
+ char * plaintext;
+ BamBam_List * chrlist;
+ BamBam_CharBuffer * cb;
+} BamBam_BamHeaderInfo;
+
+extern int BamBam_BamHeaderInfo_ProduceHeaderText(BamBam_BamHeaderInfo * info) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern BamBam_BamHeaderInfo * BamBam_BamHeaderInfo_Delete(BamBam_BamHeaderInfo * info);
+extern BamBam_BamHeaderInfo * BamBam_BamHeaderInfo_New(char const * version, char const * sortorder, char const * plaintext) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern int BamBam_BamHeaderInfo_AddChromosome(BamBam_BamHeaderInfo * info, char const * name, uint64_t len) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern int BamBam_BamHeaderInfo_WriteBamHeader(BamBam_BamHeaderInfo * info, BamBam_BgzfCompressor * writer) BAMBAM_WARN_IF_UNUSEDRESULT;
+#endif
diff --git a/src/bambamc/BamBam_BamSingleAlignment.c b/src/bambamc/BamBam_BamSingleAlignment.c
new file mode 100644
index 0000000..c323cce
--- /dev/null
+++ b/src/bambamc/BamBam_BamSingleAlignment.c
@@ -0,0 +1,1382 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#include <bambamc/BamBam_BamSingleAlignment.h>
+#include <bambamc/BamBam_CharBuffer.h>
+#include <bambamc/BamBam_BamFlagBase.h>
+#include <assert.h>
+
+#if defined(_linux)
+#include <alloca.h>
+#endif
+
+#include <stdlib.h>
+
+static uint32_t getNumLength(uint32_t num, uint32_t const base)
+{
+ uint32_t len;
+ if ( ! num )
+ return 1;
+
+ len = 0;
+ while ( num )
+ {
+ len++;
+ num /= base;
+ }
+
+ return len;
+}
+
+
+static int32_t decodeInt(uint8_t const * D, unsigned int length)
+{
+ int32_t v = 0;
+ unsigned int i;
+ for ( i = 0; i < length; ++i )
+ v |= ((*(D++)) << (i*8));
+ return v;
+}
+
+static uint32_t decodeUInt(uint8_t const * D, unsigned int length)
+{
+ uint32_t v = 0;
+ unsigned int i;
+ for ( i = 0; i < length; ++i )
+ v |= ((*(D++)) << (i*8));
+ return v;
+}
+
+
+static uint64_t getPrimLengthByType(uint8_t const c)
+{
+ switch ( c )
+ {
+ case 'A': case 'c': case 'C': return sizeof(int8_t);
+ case 's': case 'S': return sizeof(int16_t);
+ case 'i': case 'I': return sizeof(int32_t);
+ case 'f': return sizeof(float);
+ default: return 0;
+ }
+}
+
+static int64_t getAuxLength(uint8_t const * D)
+{
+ switch ( D[2] )
+ {
+ case 'A': case 'c': case 'C': case 's': case 'S': case 'i': case 'I': case 'f': return 2+1+getPrimLengthByType(D[2]);
+ case 'H':
+ {
+ uint64_t len = 2+1;
+ D += len;
+ while ( *D )
+ len++, D++;
+ len++;
+ return len;
+ }
+ case 'B':
+ {
+ uint8_t const eltype = D[3];
+ uint32_t const numel = decodeUInt(D+4,4);
+ return 2/*tag*/+1/*B*/+1/*type*/+4/* array length */+numel*getPrimLengthByType(eltype);
+ }
+ default:
+ {
+ return -1;
+ }
+ }
+}
+union numberpun
+{
+ float fvalue;
+ uint32_t uvalue;
+};
+
+
+static int auxValueToString(uint8_t const * D, BamBam_CharBuffer * buffer)
+{
+ int r = 0;
+
+ switch ( D[2] )
+ {
+ case 'A':
+ {
+ r = BamBam_CharBuffer_PushChar(buffer,D[3]);
+ break;
+ }
+ case 'c':
+ {
+ int8_t const v = ((int8_t const *)(D+3))[0];
+ if ( r >= 0 && v < 0 )
+ BamBam_CharBuffer_PushCharQuick(buffer,'-',r);
+ if ( r >= 0 )
+ BamBam_CharBuffer_PushNumber(buffer,(v>=0)?v:-v,r);
+ break;
+ }
+ case 'C':
+ {
+ uint8_t const v = (D+3)[0];
+ if ( r >= 0 )
+ BamBam_CharBuffer_PushNumber(buffer,v,r);
+ break;
+ }
+ case 's':
+ {
+ int16_t const v = decodeInt(D+3,2);
+ if ( r >= 0 && v < 0 )
+ BamBam_CharBuffer_PushCharQuick(buffer,'-',r);
+ if ( r >= 0 )
+ BamBam_CharBuffer_PushNumber(buffer,(v>=0)?v:-v,r);
+ break;
+ }
+ case 'S':
+ {
+ uint16_t const v = decodeUInt(D+3,2);
+ if ( r >= 0 )
+ BamBam_CharBuffer_PushNumber(buffer,v,r);
+ break;
+ }
+ case 'i':
+ {
+ int32_t const v = decodeInt(D+3,4);
+ if ( r >= 0 && v < 0 )
+ BamBam_CharBuffer_PushCharQuick(buffer,'-',r);
+ if ( r >= 0 )
+ BamBam_CharBuffer_PushNumber(buffer,(v>=0)?v:-v,r);
+ break;
+ }
+ case 'I':
+ {
+ uint32_t const v = decodeUInt(D+3,4);
+ if ( r >= 0 )
+ BamBam_CharBuffer_PushNumber(buffer,v,r);
+ break;
+ }
+ case 'f':
+ {
+ static size_t const tmpsize = 32;
+ uint32_t const u = decodeUInt(D+3,4);
+ float v;
+ char * tmpmem = 0;
+ union numberpun np;
+ np.uvalue = u;
+ v = np.fvalue;
+
+ tmpmem = (char *)alloca(tmpsize);
+ memset(tmpmem,0,tmpsize);
+ snprintf(tmpmem,tmpsize-1,"%f",v);
+
+ BamBam_CharBuffer_PushString(buffer,tmpmem,r);
+
+ break;
+ }
+ case 'H':
+ {
+ uint8_t const * p = D+3;
+ while ( *p && (r >= 0) )
+ BamBam_CharBuffer_PushCharQuick(buffer,*(p++),r);
+ break;
+ }
+ case 'Z':
+ {
+ uint8_t const * p = D+3;
+ while ( *p && (r >= 0) )
+ BamBam_CharBuffer_PushCharQuick(buffer,*(p++),r);
+ break;
+ }
+ case 'B':
+ {
+ uint8_t const type = D[3];
+ uint32_t const len = decodeUInt(D+4,4);
+ uint8_t const * p = D+8;
+ uint64_t i;
+
+ if ( r >= 0 )
+ BamBam_CharBuffer_PushCharQuick(buffer,type,r);
+
+ for ( i = 0; (r >= 0) && i < len; ++i )
+ {
+ if ( r >= 0 )
+ BamBam_CharBuffer_PushCharQuick(buffer,',',r);
+
+ switch ( type )
+ {
+ case 'A':
+ {
+ r = BamBam_CharBuffer_PushChar(buffer,*p);
+ p += 1;
+ break;
+ }
+ case 'c':
+ {
+ int8_t const v = ((int8_t const *)p)[0];
+ if ( r >= 0 && v < 0 )
+ BamBam_CharBuffer_PushCharQuick(buffer,'-',r);
+ if ( r >= 0 )
+ BamBam_CharBuffer_PushNumber(buffer,(v>=0)?v:-v,r);
+ p += 1;
+ break;
+ }
+ case 'C':
+ {
+ uint8_t const v = p[0];
+ if ( r >= 0 )
+ BamBam_CharBuffer_PushNumber(buffer,v,r);
+ p += 1;
+ break;
+ }
+ case 's':
+ {
+ int16_t const v = decodeInt(p,2);
+ if ( r >= 0 && v < 0 )
+ BamBam_CharBuffer_PushCharQuick(buffer,'-',r);
+ if ( r >= 0 )
+ BamBam_CharBuffer_PushNumber(buffer,(v>=0)?v:-v,r);
+ p += 2;
+ break;
+ }
+ case 'S':
+ {
+ uint16_t const v = decodeUInt(p,2);
+ if ( r >= 0 )
+ BamBam_CharBuffer_PushNumber(buffer,v,r);
+ p += 2;
+ break;
+ }
+ case 'i':
+ {
+ int32_t const v = decodeInt(p,4);
+ if ( r >= 0 && v < 0 )
+ BamBam_CharBuffer_PushCharQuick(buffer,'-',r);
+ if ( r >= 0 )
+ BamBam_CharBuffer_PushNumber(buffer,(v>=0)?v:-v,r);
+ p += 4;
+ break;
+ }
+ case 'I':
+ {
+ uint32_t const v = decodeUInt(p,4);
+ if ( r >= 0 )
+ BamBam_CharBuffer_PushNumber(buffer,v,r);
+ p += 4;
+ break;
+ }
+ case 'f':
+ {
+ static size_t const tmpsize = 32;
+ uint32_t const u = decodeUInt(p,4);
+ float v;
+ char * tmpmem = 0;
+ union numberpun np;
+ np.uvalue = u;
+ v = np.fvalue;
+
+ tmpmem = (char *)alloca(tmpsize);
+ memset(tmpmem,0,tmpsize);
+ snprintf(tmpmem,tmpsize-1,"%f",v);
+
+ BamBam_CharBuffer_PushString(buffer,tmpmem,r);
+
+ p += 4;
+
+ break;
+ }
+ case 'Z':
+ {
+ while ( *p && (r >= 0) )
+ BamBam_CharBuffer_PushCharQuick(buffer,*(p++),r);
+ if ( r >= 0 )
+ {
+ assert ( ! *p );
+ }
+ p += 1;
+ break;
+ }
+ }
+ }
+ break;
+ }
+ /* unknown data type */
+ default:
+ {
+ r = -1;
+ }
+ }
+
+ return r;
+}
+
+static int auxToString(uint8_t const * D, BamBam_CharBuffer * buffer)
+{
+ int r = 0;
+
+ if ( r >= 0 )
+ BamBam_CharBuffer_PushCharQuick(buffer,D[0],r);
+ if ( r >= 0 )
+ BamBam_CharBuffer_PushCharQuick(buffer,D[1],r);
+ if ( r >= 0 )
+ BamBam_CharBuffer_PushCharQuick(buffer,':',r);
+ if ( r >= 0 )
+ BamBam_CharBuffer_PushCharQuick(buffer,D[2],r);
+ if ( r >= 0 )
+ BamBam_CharBuffer_PushCharQuick(buffer,':',r);
+ if ( r >= 0 )
+ r = auxValueToString(D,buffer);
+
+ return r;
+}
+
+int BamBam_BamSingleAlignment_DecodeAuxSingle(
+ BamBam_BamSingleAlignment * algn,
+ uint8_t const * p,
+ int reset)
+{
+ int r = 0;
+
+ if ( reset )
+ BamBam_CharBuffer_Reset(algn->auxbuffer);
+
+ if ( r >= 0 )
+ r = auxToString(p,algn->auxbuffer);
+
+ return r;
+}
+
+char const * BamBam_BamSingleAlignment_DecodeAux(BamBam_BamSingleAlignment * algn)
+{
+ int r = 0;
+ int first = 1;
+ uint8_t const * data = BamBam_BamSingleAlignment_GetEncodedAux(algn);
+ uint8_t const * datae = algn->data + algn->dataused;
+
+ BamBam_CharBuffer_Reset(algn->auxbuffer);
+
+ while ( r >= 0 && data < datae && *data )
+ {
+ if ( first )
+ {
+ first = 0;
+ }
+ else
+ {
+ if ( r >= 0 )
+ r = BamBam_CharBuffer_PushChar(algn->auxbuffer,'\t');
+ }
+ if ( r >= 0 )
+ r = BamBam_BamSingleAlignment_DecodeAuxSingle(algn,data,0);
+ data += getAuxLength(data);
+ }
+
+ if ( r >= 0 )
+ {
+ r = BamBam_CharBuffer_PushChar(algn->auxbuffer,0);
+ algn->auxbuffer->bufferfill -= 1;
+ }
+
+ if ( r >= 0 )
+ return (char const *)algn->auxbuffer->buffer;
+ else
+ return 0;
+}
+
+uint8_t const * BamBam_BamSingleAlignment_FindAux(BamBam_BamSingleAlignment const * algn, char const tag[2])
+{
+ uint8_t const * data = BamBam_BamSingleAlignment_GetEncodedAux(algn);
+ uint8_t const * datae = algn->data + algn->dataused;
+
+ while ( data < datae && *data )
+ {
+ if ( data[0] == tag[0] && data[1] == tag[1] )
+ return data;
+ data += getAuxLength(data);
+ }
+
+ return 0;
+}
+
+uint64_t BamBam_BamSingleAlignment_GetAuxLength(BamBam_BamSingleAlignment const * algn)
+{
+ uint8_t const * dataa = BamBam_BamSingleAlignment_GetEncodedAux(algn);
+ uint8_t const * data = dataa;
+ uint8_t const * datae = algn->data + algn->dataused;
+
+ while ( data < datae && *data )
+ data += getAuxLength(data);
+
+ return data-dataa;
+}
+
+static uint32_t getCigarStringLength(uint8_t const * D, uint32_t ncigar)
+{
+ uint32_t length = 0;
+ uint32_t i;
+ #if 0
+ static char const * opc = "MIDNSHP=X???????";
+ #endif
+
+ for ( i = 0; i < ncigar; ++i, D+=4 )
+ {
+ uint32_t const oppair = decodeUInt(D,4);
+ uint32_t const len = (oppair>>4)&((1ul <<28)-1);
+ #if 0
+ uint32_t const op = oppair & 0xF;
+ uint8_t const c = op[opc];
+ #endif
+ length += (1+getNumLength(len,10));
+ }
+
+ return length;
+}
+
+
+int32_t BamBam_BamSingleAlignment_GetRefId(BamBam_BamSingleAlignment const * data)
+{
+ return decodeInt(data->data + 0, 4);
+}
+
+int32_t BamBam_BamSingleAlignment_GetPos(BamBam_BamSingleAlignment const * data)
+{
+ return decodeInt(data->data + 4, 4);
+}
+
+uint32_t BamBam_BamSingleAlignment_GetBinMQNL(BamBam_BamSingleAlignment const * data)
+{
+ return decodeUInt(data->data + 8, 4);
+}
+
+uint32_t BamBam_BamSingleAlignment_GetBin(BamBam_BamSingleAlignment const * data)
+{
+ return (BamBam_BamSingleAlignment_GetBinMQNL(data) >> 16) & 0xFFFFU;
+}
+
+uint32_t BamBam_BamSingleAlignment_GetMQ(BamBam_BamSingleAlignment const * data)
+{
+ return (BamBam_BamSingleAlignment_GetBinMQNL(data) >> 8) & 0xFFU;
+}
+
+uint32_t BamBam_BamSingleAlignment_GetNL(BamBam_BamSingleAlignment const * data)
+{
+ return (BamBam_BamSingleAlignment_GetBinMQNL(data) >> 0) & 0xFFU;
+}
+
+uint32_t BamBam_BamSingleAlignment_GetFlagNC(BamBam_BamSingleAlignment const * data)
+{
+ return decodeUInt(data->data + 12, 4);
+}
+
+uint32_t BamBam_BamSingleAlignment_GetFlags(BamBam_BamSingleAlignment const * data)
+{
+ return (BamBam_BamSingleAlignment_GetFlagNC(data)>>16)&0xFFFFUL;
+}
+
+uint32_t BamBam_BamSingleAlignment_GetNC(BamBam_BamSingleAlignment const * data)
+{
+ return (BamBam_BamSingleAlignment_GetFlagNC(data)>>0)&0xFFFFUL;
+}
+
+int32_t BamBam_BamSingleAlignment_GetLSeq(BamBam_BamSingleAlignment const * data)
+{
+ return decodeInt(data->data + 16, 4);
+}
+
+int32_t BamBam_BamSingleAlignment_GetNextRefID(BamBam_BamSingleAlignment const * data)
+{
+ return decodeInt(data->data + 20, 4);
+}
+
+int32_t BamBam_BamSingleAlignment_GetNextPos(BamBam_BamSingleAlignment const * data)
+{
+ return decodeInt(data->data + 24, 4);
+}
+
+int32_t BamBam_BamSingleAlignment_GetTLen(BamBam_BamSingleAlignment const * data)
+{
+ return decodeInt(data->data + 28, 4);
+}
+
+char const * BamBam_BamSingleAlignment_GetReadName(BamBam_BamSingleAlignment const * data)
+{
+ return (char const *)(data->data + 32);
+}
+
+uint8_t const * BamBam_BamSingleAlignment_GetEncodedCigar(BamBam_BamSingleAlignment const * data)
+{
+ return ((uint8_t const *)BamBam_BamSingleAlignment_GetReadName(data)) + BamBam_BamSingleAlignment_GetNL(data);
+}
+
+uint8_t const * BamBam_BamSingleAlignment_GetEncodedQuery(BamBam_BamSingleAlignment const * data)
+{
+ return BamBam_BamSingleAlignment_GetEncodedCigar(data) + 4 * BamBam_BamSingleAlignment_GetNC(data);
+}
+
+uint8_t const * BamBam_BamSingleAlignment_GetEncodedQual(BamBam_BamSingleAlignment const * data)
+{
+ return BamBam_BamSingleAlignment_GetEncodedQuery(data) + (BamBam_BamSingleAlignment_GetLSeq(data)+1)/2;
+}
+
+uint8_t const * BamBam_BamSingleAlignment_GetEncodedAux(BamBam_BamSingleAlignment const * data)
+{
+ return BamBam_BamSingleAlignment_GetEncodedQual(data) + (BamBam_BamSingleAlignment_GetLSeq(data));
+}
+
+int BamBam_BamSingleAlignment_DecodeQueryRc(BamBam_BamSingleAlignment * algn, int const rc)
+{
+ /* length of query */
+ int32_t lseq = BamBam_BamSingleAlignment_GetLSeq(algn);
+ int32_t i;
+ static char const * mapping = "=ACMGRSVTWYHKDBN";
+ static char const * rmapping = "=TGKCYSBAWRDMHVN";
+
+ /*
+ * reverse complements for ambiguity codes:
+ * M={A,C} -> K={G,T}
+ * R={A,G} -> Y={C,T}
+ * S={C,G} -> S={C,G}
+ * V={A,C,G} -> B={C,G,T}
+ * W={A,T} -> W={A,T}
+ * Y={C,T} -> R={A,G}
+ * H={A,C,T} -> D={A,G,T}
+ * K={G,T} -> M={A,C}
+ * D={A,G,T} -> H={A,C,T}
+ * B={C,G,T} -> V={A,C,G}
+ */
+
+ if ( lseq > algn->queryspace )
+ {
+ free(algn->query);
+ algn->query = 0;
+ algn->query = (char *)malloc(lseq+1);
+ if ( ! algn->query )
+ return -1;
+ algn->queryspace = lseq;
+ }
+ algn->query[lseq] = 0;
+
+ if ( ! rc )
+ {
+ char * out = algn->query;
+ uint8_t const * in = BamBam_BamSingleAlignment_GetEncodedQuery(algn);
+
+ for ( i = 0; i < lseq/2; ++i, ++in )
+ {
+ *(out++) = mapping[(int)((*in) >> 4) & 0xF];
+ *(out++) = mapping[(int)((*in) >> 0) & 0xF];
+ }
+
+ if ( lseq & 1 )
+ *(out++) = mapping[(int)((*in) >> 4) & 0xF];
+ }
+ else
+ {
+ char * out = algn->query + lseq;
+ uint8_t const * in = BamBam_BamSingleAlignment_GetEncodedQuery(algn);
+
+ for ( i = 0; i < lseq/2; ++i, ++in )
+ {
+ *(--out) = rmapping[(int)((*in) >> 4) & 0xF];
+ *(--out) = rmapping[(int)((*in) >> 0) & 0xF];
+ }
+
+ if ( lseq & 1 )
+ *(--out) = rmapping[(int)((*in) >> 4) & 0xF];
+ }
+
+ return lseq;
+}
+
+int BamBam_BamSingleAlignment_DecodeQuery(BamBam_BamSingleAlignment * algn)
+{
+ int const rc = (BamBam_BamSingleAlignment_GetFlags(algn) & BAMBAMC_FREVERSE) != 0;
+ return BamBam_BamSingleAlignment_DecodeQueryRc(algn,rc);
+}
+
+int BamBam_BamSingleAlignment_DecodeQualRc(BamBam_BamSingleAlignment * algn, int const rc)
+{
+ int32_t lseq = BamBam_BamSingleAlignment_GetLSeq(algn);
+ int32_t i;
+
+ if ( lseq > algn->qualspace )
+ {
+ free(algn->qual);
+ algn->qual = 0;
+ algn->qual = (char *)malloc(lseq+1);
+ if ( ! algn->qual )
+ return -1;
+ algn->qualspace = lseq;
+ }
+ algn->qual[lseq] = 0;
+
+ if ( ! rc )
+ {
+ char * out = algn->qual;
+ uint8_t const * in = BamBam_BamSingleAlignment_GetEncodedQual(algn);
+
+ for ( i = 0; i < lseq; ++i )
+ *(out++) = *(in++)+33;
+ }
+ else
+ {
+ char * out = algn->qual + lseq;
+ uint8_t const * in = BamBam_BamSingleAlignment_GetEncodedQual(algn);
+
+ for ( i = 0; i < lseq; ++i )
+ *(--out) = (*(in++))+33;
+ }
+
+ return lseq;
+}
+
+int BamBam_BamSingleAlignment_DecodeQual(BamBam_BamSingleAlignment * algn)
+{
+ int const rc = (BamBam_BamSingleAlignment_GetFlags(algn) & BAMBAMC_FREVERSE) != 0;
+ return BamBam_BamSingleAlignment_DecodeQualRc(algn,rc);
+}
+
+int BamBam_BamSingleAlignment_DecodeCigarRc(BamBam_BamSingleAlignment * algn, int const rc)
+{
+ uint8_t const * ecigar = BamBam_BamSingleAlignment_GetEncodedCigar(algn);
+ uint32_t const ncigar = BamBam_BamSingleAlignment_GetNC(algn);
+ int32_t const cigstrlen = getCigarStringLength(ecigar,ncigar);
+ uint32_t i;
+ static char const * opc = "MIDNSHP=X???????";
+ char * out = 0;
+
+ if ( cigstrlen > algn->cigarspace )
+ {
+ free(algn->cigar);
+ algn->cigar = 0;
+ algn->cigar = (char *)malloc(cigstrlen+1);
+ if ( ! algn->cigar )
+ return -1;
+ algn->cigarspace = cigstrlen;
+ }
+ algn->cigar[cigstrlen] = 0;
+
+ out = algn->cigar;
+
+ if ( ! rc )
+ for ( i = 0; i < ncigar; ++i, ecigar += 4 )
+ {
+ uint32_t const oppair = decodeUInt(ecigar,4);
+ uint32_t len = (oppair>>4)&((1ul <<28)-1);
+ uint32_t const op = oppair & 0xF;
+ uint8_t const c = op[opc];
+
+ if ( !len )
+ (*(out++)) = '0';
+ else
+ {
+ uint32_t const numlen = getNumLength(len,10);
+ char * pout = out + numlen;
+ uint32_t j;
+ for ( j = 0; j < numlen; ++j )
+ {
+ *(--pout) = '0'+(len%10);
+ len /= 10;
+ }
+ out += numlen;
+ }
+
+ *(out++) = c;
+ }
+ else
+ {
+ ecigar += 4*ncigar;
+
+ for ( i = 0; i < ncigar; ++i )
+ {
+ ecigar -= 4;
+ uint32_t const oppair = decodeUInt(ecigar,4);
+ uint32_t len = (oppair>>4)&((1ul <<28)-1);
+ uint32_t const op = oppair & 0xF;
+ uint8_t const c = op[opc];
+
+ if ( !len )
+ (*(out++)) = '0';
+ else
+ {
+ uint32_t const numlen = getNumLength(len,10);
+ char * pout = out + numlen;
+ uint32_t j;
+ for ( j = 0; j < numlen; ++j )
+ {
+ *(--pout) = '0'+(len%10);
+ len /= 10;
+ }
+ out += numlen;
+ }
+
+ *(out++) = c;
+ }
+ }
+
+
+ return cigstrlen;
+}
+
+int BamBam_BamSingleAlignment_DecodeCigar(BamBam_BamSingleAlignment * algn)
+{
+ int const rc = (BamBam_BamSingleAlignment_GetFlags(algn) & BAMBAMC_FREVERSE) != 0;
+ return BamBam_BamSingleAlignment_DecodeCigarRc(algn,rc);
+}
+
+int32_t BamBam_BamSingleAlignment_DecodeQueryQualCigarRc(
+ BamBam_BamSingleAlignment * algn, int rc, int32_t * querylen, int32_t * cigarlen)
+{
+ int32_t r = 0;
+
+ if ( r >= 0 )
+ {
+ r = BamBam_BamSingleAlignment_DecodeQueryRc(algn,rc);
+ *querylen = r;
+ }
+ if ( r >= 0 )
+ {
+ r = BamBam_BamSingleAlignment_DecodeQualRc(algn,rc);
+ if ( r != *querylen )
+ r = -1;
+ }
+ if ( r >= 0 )
+ {
+ r = BamBam_BamSingleAlignment_DecodeCigarRc(algn,rc);
+ *cigarlen = r;
+ }
+
+ return r;
+}
+
+
+int32_t BamBam_BamSingleAlignment_DecodeQueryQualCigar(
+ BamBam_BamSingleAlignment * algn, int32_t * querylen, int32_t * cigarlen
+)
+{
+ int const rc = (BamBam_BamSingleAlignment_GetFlags(algn) & BAMBAMC_FREVERSE) != 0;
+ return BamBam_BamSingleAlignment_DecodeQueryQualCigarRc(algn,rc,querylen,cigarlen);
+}
+
+int BamBam_BamSingleAlignment_StoreAlignment(BamBam_BamSingleAlignment const * data, BamBam_GzipWriter * writer)
+{
+ /* put length of entry */
+ if ( BamBam_GzipWriter_PutInt32(writer,data->dataused) < 0 )
+ return -1;
+ /* put entry */
+ if ( BamBam_GzipWriter_Write(writer,(char const *)(data->data),data->dataused) != (int64_t)(data->dataused) )
+ return -1;
+
+ return 0;
+}
+
+int BamBam_BamSingleAlignment_StoreAlignmentBgzf(BamBam_BamSingleAlignment const * data, BamBam_BgzfCompressor * writer)
+{
+ uint8_t used[4] =
+ {
+ (data->dataused >> 0) & 0xFFu,
+ (data->dataused >> 8) & 0xFFu,
+ (data->dataused >> 16) & 0xFFu,
+ (data->dataused >> 24) & 0xFFu
+ };
+
+ if ( BamBam_BgzfCompressor_Write(writer,&used[0],4) < 0 )
+ return -1;
+ if ( BamBam_BgzfCompressor_Write(writer,data->data,data->dataused) < 0 )
+ return -1;
+
+ return 0;
+}
+
+static uint8_t const bambamc_qnameValidTable[256] = {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+};
+
+int BamBam_BamSingleAlignment_LoadAlignment(BamBam_BamSingleAlignment * data, BamBam_GzipReader * reader)
+{
+ int32_t reclen = -1;
+ char const * ca = 0;
+ char const * cc = 0;
+ char const * ce = 0;
+ uint8_t const * ue = 0;
+ uint8_t const * ecigar;
+ uint32_t ncigar;
+ uint32_t i = 0;
+ uint64_t ciglen = 0;
+ int32_t pos = 0;
+ uint64_t lseq = 0;
+ uint8_t const * quala = 0;
+ uint8_t const * qualc = 0;
+ uint8_t const * quale = 0;
+ uint8_t const * auxa = 0;
+ uint8_t const * auxc = 0;
+ uint64_t alen = 0;
+
+ if ( BamBam_GzipReader_Peek(reader) < 0 )
+ {
+ /* fprintf(stderr,"EOF.\n"); */
+ return 0;
+ }
+
+ /* assume block is valid until it turns out otherwise */
+ data->valid = bambamc_alignment_validity_ok;
+
+ /* read length of alignment block */
+ if ( BamBam_GzipReader_GetInt32(reader,&reclen) < 0 )
+ return -1;
+
+ if ( reclen < 32 )
+ {
+ data->valid = bambamc_alignment_validity_block_too_small;
+ return -1;
+ }
+
+ /* increase size of memory block if needed */
+ if ( (int32_t)data->dataav < reclen )
+ {
+ free(data->data);
+ data->data = (uint8_t *)malloc(reclen);
+ if ( ! data->data )
+ return -1;
+ data->dataav = reclen;
+ }
+
+ /* read block */
+ if ( BamBam_GzipReader_Read(reader,(char*)data->data,reclen) != reclen )
+ return -1;
+
+ data->dataused = reclen;
+
+ if ( reclen < 32 )
+ {
+ data->valid = bambamc_alignment_validity_block_too_small;
+ return -1;
+ }
+
+ /* compute length of query name */
+ ca = BamBam_BamSingleAlignment_GetReadName(data);
+ cc = ca;
+ ce = ((char const *)data->data) + data->dataused;
+ ue = ((uint8_t *)data->data) + data->dataused;
+
+ while ( cc != ce && *cc )
+ {
+ if ( ! bambamc_qnameValidTable[*cc] )
+ {
+ data->valid = bambamc_alignment_validity_queryname_contains_illegal_symbols;
+ return -1;
+ }
+ ++cc;
+ }
+
+ if ( cc == ce )
+ {
+ data->valid = bambamc_alignment_validity_queryname_extends_over_block;
+ return -1;
+ }
+
+ assert ( ! *cc );
+
+ if ( (cc-ca)+1 != BamBam_BamSingleAlignment_GetNL(data) )
+ {
+ data->valid = bambamc_alignment_validity_queryname_length_inconsistent;
+ return -1;
+ }
+
+ if ( cc == ca )
+ {
+ data->valid = bambamc_alignment_validity_queryname_empty;
+ return -1;
+ }
+
+ if ( ue-BamBam_BamSingleAlignment_GetEncodedCigar(data) < BamBam_BamSingleAlignment_GetNC(data)*sizeof(uint32_t) )
+ {
+ data->valid = bambamc_alignment_validity_cigar_extends_over_block;
+ return -1;
+ }
+
+ if ( ue-BamBam_BamSingleAlignment_GetEncodedQuery(data) < (BamBam_BamSingleAlignment_GetLSeq(data)+1)/2 )
+ {
+ data->valid = bambamc_alignment_validity_sequence_extends_over_block;
+ return -1;
+ }
+
+ if ( ue-BamBam_BamSingleAlignment_GetEncodedQual(data) < BamBam_BamSingleAlignment_GetLSeq(data) )
+ {
+ data->valid = bambamc_alignment_validity_quality_extends_over_block;
+ return -1;
+ }
+
+ if ( !(BamBam_BamSingleAlignment_GetFlags(data) & BAMBAMC_FUNMAP) )
+ {
+ ecigar = BamBam_BamSingleAlignment_GetEncodedCigar(data);
+ ncigar = BamBam_BamSingleAlignment_GetNC(data);
+
+ for ( i = 0; i < ncigar; ++i )
+ {
+ uint32_t const oppair = decodeUInt(ecigar + i*sizeof(uint32_t),4);
+ uint32_t len = (oppair>>4)&((1ul <<28)-1);
+ uint32_t const op = oppair & 0xF;
+
+ if ( op > (uint32_t)BAMBAMC_CDIFF )
+ {
+ data->valid = bambamc_alignment_validity_unknown_cigar_op;
+ return -1;
+ }
+
+ switch ( op )
+ {
+ case BAMBAMC_CMATCH:
+ case BAMBAMC_CINS:
+ case BAMBAMC_CSOFT_CLIP:
+ case BAMBAMC_CEQUAL:
+ case BAMBAMC_CDIFF:
+ ciglen += len;
+ break;
+ }
+ }
+
+ if ( ciglen != BamBam_BamSingleAlignment_GetLSeq(data) )
+ {
+ data->valid = bambamc_alignment_validity_cigar_is_inconsistent_with_sequence_length;
+ return -1;
+ }
+ }
+
+ pos = BamBam_BamSingleAlignment_GetPos(data);
+
+ if ( pos < -1 || pos > (((1ll<<29)-1)-1) )
+ {
+ data->valid = bambamc_alignment_validity_invalid_mapping_position;
+ return -1;
+ }
+
+ pos = BamBam_BamSingleAlignment_GetNextPos(data);
+
+ if ( pos < -1 || pos > (((1ll<<29)-1)-1) )
+ {
+ data->valid = bambamc_alignment_validity_invalid_next_mapping_position;
+ return -1;
+ }
+
+ pos = BamBam_BamSingleAlignment_GetTLen(data);
+
+ if ( pos < ((-(1ll<<29))+1) || pos > ((1ll<<29)-1) )
+ {
+ data->valid = bambamc_alignment_validity_invalid_tlen;
+ return -1;
+ }
+
+ lseq = BamBam_BamSingleAlignment_GetLSeq(data);
+ quala = BamBam_BamSingleAlignment_GetEncodedQual(data);
+ quale = quala + lseq;
+ for ( qualc = quala; qualc != quale; ++qualc )
+ if ( (int)(*qualc) > (int)('~'-33) )
+ {
+ if ( *qualc == 255 )
+ {
+ if ( qualc - quala )
+ {
+ data->valid = bambamc_alignment_validity_invalid_quality_value;
+ return -1;
+ }
+
+ while ( qualc != quale )
+ if ( *(qualc++) != 255 )
+ {
+ data->valid = bambamc_alignment_validity_invalid_quality_value;
+ return -1;
+ }
+
+ /* go back by one to leave loop above */
+ --qualc;
+ }
+ else
+ {
+ data->valid = bambamc_alignment_validity_invalid_quality_value;
+ return -1;
+ }
+ }
+
+ auxa = BamBam_BamSingleAlignment_GetEncodedAux(data);
+ auxc = auxa;
+
+ while ( auxc != ue )
+ {
+ uint64_t lauxlen = 0;
+
+ if ( (ue-auxc) < 3 )
+ {
+ data->valid = bambamc_alignment_validity_invalid_auxiliary_data;
+ return -1;
+ }
+
+ switch ( auxc[2] )
+ {
+ case 'A':
+ if ( ue-auxc < 3+1 )
+ {
+ data->valid = bambamc_alignment_validity_invalid_auxiliary_data;
+ return -1;
+ }
+ if ( auxc[3] < '!' || auxc[3] > '~' )
+ {
+ data->valid = bambamc_alignment_validity_invalid_auxiliary_data;
+ return -1;
+ }
+ lauxlen = 3+1;
+ break;
+ case 'c':
+ case 'C':
+ if ( ue-auxc < 3+1 )
+ {
+ data->valid = bambamc_alignment_validity_invalid_auxiliary_data;
+ return -1;
+ }
+ lauxlen = 3+1;
+ break;
+ case 's':
+ case 'S':
+ if ( ue-auxc < 3+2 )
+ {
+ data->valid = bambamc_alignment_validity_invalid_auxiliary_data;
+ return -1;
+ }
+ lauxlen = 3+2;
+ break;
+ case 'i':
+ case 'I':
+ case 'f':
+ if ( ue-auxc < 3+4 )
+ {
+ data->valid = bambamc_alignment_validity_invalid_auxiliary_data;
+ return -1;
+ }
+ lauxlen = 3+4;
+ break;
+ case 'B':
+ {
+ if ( ue-auxc < 3+1/*data type*/+4/*array length*/ )
+ {
+ data->valid = bambamc_alignment_validity_invalid_auxiliary_data;
+ return -1;
+ }
+ /* length of array */
+ alen = decodeUInt(auxc+4,4);
+ /* valid element data types */
+ switch ( auxc[3] )
+ {
+ case 'c':
+ case 'C':
+ if ( (ue-auxc) < (3+1+4+ 1*alen) )
+ {
+ data->valid = bambamc_alignment_validity_invalid_auxiliary_data;
+ return -1;
+ }
+ lauxlen = 3+1+4+ 1*alen;
+ break;
+ case 's':
+ case 'S':
+ if ( (ue-auxc) < (3+1+4+ 2*alen) )
+ {
+ data->valid = bambamc_alignment_validity_invalid_auxiliary_data;
+ return -1;
+ }
+ lauxlen = 3+1+4+ 2*alen;
+ break;
+ case 'i':
+ case 'I':
+ case 'f':
+ if ( (ue-auxc) < (3+1+4+ 4*alen) )
+ {
+ data->valid = bambamc_alignment_validity_invalid_auxiliary_data;
+ return -1;
+ }
+ lauxlen = 3+1+4+ 4*alen;
+ break;
+ default:
+ {
+ data->valid = bambamc_alignment_validity_invalid_auxiliary_data;
+ return -1;
+ }
+ }
+ break;
+ }
+ case 'Z':
+ {
+ uint8_t const * p = auxc+3;
+
+ while ( p != ue && *p )
+ ++p;
+
+ /* if terminator byte 0 is not inside block */
+ if ( p == ue )
+ {
+ data->valid = bambamc_alignment_validity_invalid_auxiliary_data;
+ return -1;
+ }
+
+ assert ( ! *p );
+
+ lauxlen = (p-auxc)+1;
+
+ break;
+ }
+ case 'H':
+ {
+ uint8_t const * p = auxc+3;
+
+ while ( p != ue && *p )
+ ++p;
+
+ /* if terminator byte 0 is not inside block */
+ if ( p == ue )
+ {
+ data->valid = bambamc_alignment_validity_invalid_auxiliary_data;
+ return -1;
+ }
+
+ assert ( ! *p );
+
+ lauxlen = (p-auxc)+1;
+
+ break;
+ }
+ default:
+ {
+ data->valid = bambamc_alignment_validity_invalid_auxiliary_data;
+ return -1;
+ }
+ break;
+ }
+
+ auxc = auxc + lauxlen;
+ }
+
+/*
+typedef enum _bambamc_alignment_validity {
+ bambamc_alignment_validity_invalid_refseq = 15,
+ bambamc_alignment_validity_invalid_next_refseq = 16,
+} bambamc_alignment_validity;
+*/
+
+ return 1;
+}
+
+BamBam_BamSingleAlignment * BamBam_BamSingleAlignment_Delete(BamBam_BamSingleAlignment * data)
+{
+ if ( data )
+ {
+ if ( data->auxbuffer )
+ {
+ BamBam_CharBuffer_Delete(data->auxbuffer);
+ data->auxbuffer = 0;
+ }
+ if ( data->cigar )
+ {
+ free(data->cigar);
+ data->cigar = 0;
+ data->cigarspace = 0;
+ }
+ if ( data->query )
+ {
+ free(data->query);
+ data->query = 0;
+ data->queryspace = 0;
+ }
+ if ( data->qual )
+ {
+ free(data->qual);
+ data->qual = 0;
+ data->qualspace = 0;
+ }
+ if ( data->data )
+ {
+ free(data->data);
+ data->data = 0;
+ }
+ free(data);
+ }
+ return 0;
+}
+
+BamBam_BamSingleAlignment * BamBam_BamSingleAlignment_New()
+{
+ BamBam_BamSingleAlignment * data = 0;
+
+ data = (BamBam_BamSingleAlignment *)malloc(sizeof(BamBam_BamSingleAlignment));
+
+ if ( ! data )
+ return BamBam_BamSingleAlignment_Delete(data);
+
+ memset(data,0,sizeof(BamBam_BamSingleAlignment));
+
+ data->auxbuffer = BamBam_CharBuffer_New();
+
+ if ( ! data->auxbuffer )
+ return BamBam_BamSingleAlignment_Delete(data);
+
+ return data;
+}
+
+BamBam_BamSingleAlignment * BamBam_BamSingleAlignment_NewClone(uint8_t const * block, uint32_t const blocksize)
+{
+ BamBam_BamSingleAlignment * data = 0;
+
+ data = (BamBam_BamSingleAlignment *)malloc(sizeof(BamBam_BamSingleAlignment));
+
+ if ( ! data )
+ return BamBam_BamSingleAlignment_Delete(data);
+
+ memset(data,0,sizeof(BamBam_BamSingleAlignment));
+
+ data->auxbuffer = BamBam_CharBuffer_New();
+
+ if ( ! data->auxbuffer )
+ return BamBam_BamSingleAlignment_Delete(data);
+
+ if ( blocksize )
+ {
+ data->data = (uint8_t *)malloc(blocksize);
+
+ if ( ! data->data )
+ return BamBam_BamSingleAlignment_Delete(data);
+
+ memcpy(data->data,block,blocksize);
+ data->dataav = blocksize;
+ data->dataused = blocksize;
+ }
+
+ return data;
+}
+
+BamBam_BamSingleAlignment * BamBam_BamSingleAlignment_Clone(BamBam_BamSingleAlignment const * o)
+{
+ BamBam_BamSingleAlignment * data = 0;
+
+ /* return null if o is null */
+ if ( ! o )
+ return 0;
+
+ /* return null if o claims to have space for alignment data but pointer is null */
+ if ( (!(o->data)) && o->dataav )
+ return 0;
+
+ data = (BamBam_BamSingleAlignment *)malloc(sizeof(BamBam_BamSingleAlignment));
+
+ if ( ! data )
+ return BamBam_BamSingleAlignment_Delete(data);
+
+ /* erase newly allocated space */
+ memset(data,0,sizeof(BamBam_BamSingleAlignment));
+
+ /* copy alignment data if there is any */
+ if ( o->data )
+ {
+ data->dataav = o->dataav;
+ data->dataused = o->dataused;
+ data->data = (uint8_t *)malloc(data->dataav);
+
+ if ( ! data->data )
+ return BamBam_BamSingleAlignment_Delete(data);
+
+ memcpy(data->data,o->data,data->dataav);
+ }
+
+ if ( o->query )
+ {
+ data->query = (char *)malloc(o->queryspace);
+ if ( ! data->query )
+ return BamBam_BamSingleAlignment_Delete(data);
+ memcpy(data->query,o->query,o->queryspace);
+ data->queryspace = o->queryspace;
+ }
+ if ( o->qual )
+ {
+ data->qual = (char *)malloc(o->qualspace);
+ if ( ! data->qual )
+ return BamBam_BamSingleAlignment_Delete(data);
+ memcpy(data->qual,o->qual,o->qualspace);
+ data->qualspace = o->qualspace;
+ }
+ if ( o->cigar )
+ {
+ data->cigar = (char *)malloc(o->cigarspace);
+ if ( ! data->cigar )
+ return BamBam_BamSingleAlignment_Delete(data);
+ memcpy(data->cigar,o->cigar,o->cigarspace);
+ data->cigarspace = o->cigarspace;
+ }
+
+ if ( o->auxbuffer )
+ {
+ data->auxbuffer = BamBam_CharBuffer_Clone(o->auxbuffer);
+ if ( ! data->auxbuffer )
+ return BamBam_BamSingleAlignment_Delete(data);
+ }
+
+ return data;
+}
+
+char const * BamBam_Alignment_Validity_Str(bambamc_alignment_validity const code)
+{
+ switch ( code )
+ {
+ case bambamc_alignment_validity_ok:
+ return "Alignment valid";
+ case bambamc_alignment_validity_block_too_small:
+ return "Alignment block is too small to hold fixed size data";
+ case bambamc_alignment_validity_queryname_extends_over_block:
+ return "Null terminated query name extends beyond block boundary";
+ case bambamc_alignment_validity_queryname_length_inconsistent:
+ return "Length of null terminated query name is inconsistent with alignment header";
+ case bambamc_alignment_validity_cigar_extends_over_block:
+ return "Cigar data extends beyond block boundary";
+ case bambamc_alignment_validity_sequence_extends_over_block:
+ return "Sequence data extends beyond block boundary";
+ case bambamc_alignment_validity_quality_extends_over_block:
+ return "Quality data extends beyond block boundary";
+ case bambamc_alignment_validity_cigar_is_inconsistent_with_sequence_length:
+ return "Cigar operations are inconsistent with length of query sequence";
+ case bambamc_alignment_validity_unknown_cigar_op:
+ return "Unknown/invalid cigar operator";
+ case bambamc_alignment_validity_queryname_contains_illegal_symbols:
+ return "Query name contains illegal symbols";
+ case bambamc_alignment_validity_queryname_empty:
+ return "Query name is the empty string";
+ case bambamc_alignment_validity_invalid_mapping_position:
+ return "Invalid leftmost mapping position";
+ case bambamc_alignment_validity_invalid_next_mapping_position:
+ return "Invalid next segment mapping position";
+ case bambamc_alignment_validity_invalid_tlen:
+ return "Invalid observed template length";
+ case bambamc_alignment_validity_invalid_quality_value:
+ return "Quality string contains invalid quality value";
+ case bambamc_alignment_validity_invalid_refseq:
+ return "Invalid/unknown reference sequence identifier";
+ case bambamc_alignment_validity_invalid_next_refseq:
+ return "Invalid/unknown next segment reference sequence identifier";
+ case bambamc_alignment_validity_invalid_auxiliary_data:
+ return "Invalid auxiliary tag data";
+ default:
+ return "Unknown alignment validity value.";
+ };
+}
diff --git a/src/bambamc/BamBam_BamSingleAlignment.h b/src/bambamc/BamBam_BamSingleAlignment.h
new file mode 100644
index 0000000..69c9486
--- /dev/null
+++ b/src/bambamc/BamBam_BamSingleAlignment.h
@@ -0,0 +1,114 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#if ! defined(BAMBAM_BAMSINGLEALIGNMENT_H)
+#define BAMBAM_BAMSINGLEALIGNMENT_H
+
+#include <bambamc/BamBam_BamFileHeader.h>
+#include <bambamc/BamBam_CharBuffer.h>
+#include <bambamc/BamBam_GzipWriter.h>
+#include <bambamc/BamBam_BgzfCompressor.h>
+#include <string.h>
+
+typedef enum _bambamc_alignment_validity {
+ bambamc_alignment_validity_ok = 0,
+ bambamc_alignment_validity_block_too_small = 1,
+ bambamc_alignment_validity_queryname_extends_over_block = 2,
+ bambamc_alignment_validity_queryname_length_inconsistent = 3,
+ bambamc_alignment_validity_cigar_extends_over_block = 4,
+ bambamc_alignment_validity_sequence_extends_over_block = 5,
+ bambamc_alignment_validity_quality_extends_over_block = 6,
+ bambamc_alignment_validity_cigar_is_inconsistent_with_sequence_length = 7,
+ bambamc_alignment_validity_unknown_cigar_op = 8,
+ bambamc_alignment_validity_queryname_contains_illegal_symbols = 9,
+ bambamc_alignment_validity_queryname_empty = 10,
+ bambamc_alignment_validity_invalid_mapping_position = 11,
+ bambamc_alignment_validity_invalid_next_mapping_position = 12,
+ bambamc_alignment_validity_invalid_tlen = 13,
+ bambamc_alignment_validity_invalid_quality_value = 14,
+ bambamc_alignment_validity_invalid_refseq = 15,
+ bambamc_alignment_validity_invalid_next_refseq = 16,
+ bambamc_alignment_validity_invalid_auxiliary_data = 17,
+ bambamc_alignment_valididy_record_length_negative = 18
+} bambamc_alignment_validity;
+
+
+typedef struct _BamBam_BamSingleAlignment
+{
+ uint8_t * data;
+ uint32_t dataav;
+ uint32_t dataused;
+
+ char * query;
+ int32_t queryspace;
+
+ char * qual;
+ int32_t qualspace;
+
+ char * cigar;
+ int32_t cigarspace;
+
+ bambamc_alignment_validity valid;
+
+ BamBam_CharBuffer * auxbuffer;
+}
+BamBam_BamSingleAlignment;
+
+extern BamBam_BamSingleAlignment * BamBam_BamSingleAlignment_Delete(BamBam_BamSingleAlignment * data);
+extern BamBam_BamSingleAlignment * BamBam_BamSingleAlignment_New();
+extern BamBam_BamSingleAlignment * BamBam_BamSingleAlignment_NewClone(uint8_t const * block, uint32_t const blocksize);
+extern BamBam_BamSingleAlignment * BamBam_BamSingleAlignment_Clone(BamBam_BamSingleAlignment const * o);
+extern int BamBam_BamSingleAlignment_LoadAlignment(BamBam_BamSingleAlignment * data, BamBam_GzipReader * reader);
+extern int BamBam_BamSingleAlignment_StoreAlignment(BamBam_BamSingleAlignment const * data, BamBam_GzipWriter * writer);
+extern int BamBam_BamSingleAlignment_StoreAlignmentBgzf(BamBam_BamSingleAlignment const * data, BamBam_BgzfCompressor * writer);
+
+extern int32_t BamBam_BamSingleAlignment_GetRefId(BamBam_BamSingleAlignment const * data);
+extern int32_t BamBam_BamSingleAlignment_GetPos(BamBam_BamSingleAlignment const * data);
+extern uint32_t BamBam_BamSingleAlignment_GetBinMQNL(BamBam_BamSingleAlignment const * data);
+extern uint32_t BamBam_BamSingleAlignment_GetBin(BamBam_BamSingleAlignment const * data);
+extern uint32_t BamBam_BamSingleAlignment_GetMQ(BamBam_BamSingleAlignment const * data);
+extern uint32_t BamBam_BamSingleAlignment_GetNL(BamBam_BamSingleAlignment const * data);
+extern uint32_t BamBam_BamSingleAlignment_GetFlagNC(BamBam_BamSingleAlignment const * data);
+extern uint32_t BamBam_BamSingleAlignment_GetFlags(BamBam_BamSingleAlignment const * data);
+extern uint32_t BamBam_BamSingleAlignment_GetNC(BamBam_BamSingleAlignment const * data);
+extern int32_t BamBam_BamSingleAlignment_GetLSeq(BamBam_BamSingleAlignment const * data);
+extern int32_t BamBam_BamSingleAlignment_GetNextRefID(BamBam_BamSingleAlignment const * data);
+extern int32_t BamBam_BamSingleAlignment_GetNextPos(BamBam_BamSingleAlignment const * data);
+extern int32_t BamBam_BamSingleAlignment_GetTLen(BamBam_BamSingleAlignment const * data);
+extern char const * BamBam_BamSingleAlignment_GetReadName(BamBam_BamSingleAlignment const * data);
+extern uint8_t const * BamBam_BamSingleAlignment_GetEncodedCigar(BamBam_BamSingleAlignment const * data);
+extern uint8_t const * BamBam_BamSingleAlignment_GetEncodedQuery(BamBam_BamSingleAlignment const * data);
+extern uint8_t const * BamBam_BamSingleAlignment_GetEncodedQual(BamBam_BamSingleAlignment const * data);
+extern uint8_t const * BamBam_BamSingleAlignment_GetEncodedAux(BamBam_BamSingleAlignment const * data);
+extern int BamBam_BamSingleAlignment_DecodeQueryRc(BamBam_BamSingleAlignment * algn, int const rc);
+extern int BamBam_BamSingleAlignment_DecodeQualRc(BamBam_BamSingleAlignment * algn, int const rc);
+extern int BamBam_BamSingleAlignment_DecodeCigarRc(BamBam_BamSingleAlignment * algn, int const rc);
+extern int BamBam_BamSingleAlignment_DecodeQuery(BamBam_BamSingleAlignment * algn);
+extern int BamBam_BamSingleAlignment_DecodeQual(BamBam_BamSingleAlignment * algn);
+extern int BamBam_BamSingleAlignment_DecodeCigar(BamBam_BamSingleAlignment * algn);
+extern int32_t BamBam_BamSingleAlignment_DecodeQueryQualCigarRc(
+ BamBam_BamSingleAlignment * algn, int32_t rc, int32_t * querylen, int32_t * cigarlen);
+extern int32_t BamBam_BamSingleAlignment_DecodeQueryQualCigar(
+ BamBam_BamSingleAlignment * algn, int32_t * querylen, int32_t * cigarlen);
+extern int BamBam_BamSingleAlignment_DecodeAuxSingle(BamBam_BamSingleAlignment * algn,
+ uint8_t const * p, int reset);
+extern char const * BamBam_BamSingleAlignment_DecodeAux(BamBam_BamSingleAlignment * algn);
+extern uint8_t const * BamBam_BamSingleAlignment_FindAux(BamBam_BamSingleAlignment const * algn, char const tag[2]);
+extern uint64_t BamBam_BamSingleAlignment_GetAuxLength(BamBam_BamSingleAlignment const * algn);
+extern char const * BamBam_Alignment_Validity_Str(bambamc_alignment_validity const code);
+#endif
diff --git a/src/bambamc/BamBam_BamWriter.c b/src/bambamc/BamBam_BamWriter.c
new file mode 100644
index 0000000..9f5db7c
--- /dev/null
+++ b/src/bambamc/BamBam_BamWriter.c
@@ -0,0 +1,117 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#include <bambamc/BamBam_BamWriter.h>
+#include <bambamc/BamBam_BamAlignmentPut.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+BamBam_BamWriter * BamBam_BamWriter_Delete(BamBam_BamWriter * writer, int * termstatus)
+{
+ if ( writer )
+ {
+ if ( writer->aput )
+ {
+ BamBam_AlignmentPut_Delete(writer->aput);
+ writer->aput = 0;
+ }
+ if ( writer->bgzf )
+ {
+ int const r = BamBam_BgzfCompressor_Terminate(writer->bgzf);
+ if ( termstatus )
+ *termstatus = r;
+ BamBam_BgzfCompressor_Delete(writer->bgzf);
+ writer->bgzf = 0;
+ }
+
+ free( writer );
+ }
+
+ return 0;
+}
+
+BamBam_BamWriter * BamBam_BamWriter_New(
+ BamBam_BamHeaderInfo * info,
+ char const * filename,
+ int compressionLevel)
+{
+ BamBam_BamWriter * writer = (BamBam_BamWriter *)malloc(sizeof(BamBam_BamWriter));
+ char mode[3] = "w1\0";
+
+ assert ( mode[2] == 0 );
+
+ if ( !writer )
+ return BamBam_BamWriter_Delete(writer,0);
+
+ memset(writer,0,sizeof(BamBam_BamWriter));
+
+ if ( compressionLevel < 0 || compressionLevel > 9 )
+ return BamBam_BamWriter_Delete(writer,0);
+
+ mode[1] = compressionLevel + '0';
+
+ writer->bgzf = BamBam_BgzfCompressor_New(filename,compressionLevel);
+
+ if ( BamBam_BamHeaderInfo_WriteBamHeader(info,writer->bgzf) < 0 )
+ return BamBam_BamWriter_Delete(writer,0);
+
+ writer->aput = BamBam_AlignmentPut_New();
+
+ if ( ! writer->aput )
+ return BamBam_BamWriter_Delete(writer,0);
+
+ return writer;
+}
+int BamBam_BamWriter_PutAlignment(
+ BamBam_BamWriter * writer,
+ /* flags */
+ int32_t const flags,
+ /* target (chromosome) id */
+ int32_t const tid,
+ /* position on chromosome (0 based) */
+ uint64_t const rpos,
+ /* mate target id */
+ int32_t const mtid,
+ /* position of mate on mate target id */
+ uint64_t const rmpos,
+ /* sequence name */
+ char const * name,
+ /* query sequence (read) */
+ char const * query,
+ /* quality string */
+ char const * qual,
+ /* cigar operations */
+ char const * cigar,
+ /* mapping quality */
+ int32_t const rqual,
+ /* insert size */
+ int32_t const isize
+ )
+{
+ return BamBam_CharBuffer_PutAlignmentC(writer->aput,flags,tid,rpos,mtid,rmpos,name,query,qual,cigar,rqual,isize);
+}
+int BamBam_BamWriter_PutAuxNumber(BamBam_BamWriter * writer, char const * tag, char const type, void const * rvalue)
+{
+ return BamBam_CharBuffer_PutAuxNumberC(writer->aput,tag,type,rvalue);
+}
+int BamBam_BamWriter_Commit(BamBam_BamWriter * writer)
+{
+ return BamBam_BamSingleAlignment_StoreAlignmentBgzf(writer->aput->calignment,writer->bgzf);
+}
diff --git a/src/bambamc/BamBam_BamWriter.h b/src/bambamc/BamBam_BamWriter.h
new file mode 100644
index 0000000..6b74f01
--- /dev/null
+++ b/src/bambamc/BamBam_BamWriter.h
@@ -0,0 +1,64 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#if ! defined(BAMBAM_BAMWRITER_H)
+#define BAMBAM_BAMWRITER_H
+
+#include <bambamc/BamBam_BamAlignmentPut.h>
+#include <bambamc/BamBam_BamHeaderInfo.h>
+
+typedef struct _BamBam_BamWriter
+{
+ BamBam_AlignmentPut * aput;
+ BamBam_BgzfCompressor * bgzf;
+} BamBam_BamWriter;
+
+extern BamBam_BamWriter * BamBam_BamWriter_Delete(BamBam_BamWriter * writer, int * termstatus);
+extern BamBam_BamWriter * BamBam_BamWriter_New(
+ BamBam_BamHeaderInfo * info,
+ char const * filename,
+ int compressionLevel) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern int BamBam_BamWriter_PutAlignment(
+ BamBam_BamWriter * writer,
+ /* flags */
+ int32_t const flags,
+ /* target (chromosome) id */
+ int32_t const tid,
+ /* position on chromosome (0 based) */
+ uint64_t const rpos,
+ /* mate target id */
+ int32_t const mtid,
+ /* position of mate on mate target id */
+ uint64_t const rmpos,
+ /* sequence name */
+ char const * name,
+ /* query sequence (read) */
+ char const * query,
+ /* quality string */
+ char const * qual,
+ /* cigar operations */
+ char const * cigar,
+ /* mapping quality */
+ int32_t const rqual,
+ /* insert size */
+ int32_t const isize
+ ) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern int BamBam_BamWriter_PutAuxNumber(BamBam_BamWriter * writer, char const * tag, char const type, void const * rvalue) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern int BamBam_BamWriter_Commit(BamBam_BamWriter * writer);
+#endif
diff --git a/src/bambamc/BamBam_BgzfCompressor.c b/src/bambamc/BamBam_BgzfCompressor.c
new file mode 100644
index 0000000..03f14aa
--- /dev/null
+++ b/src/bambamc/BamBam_BgzfCompressor.c
@@ -0,0 +1,325 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#include <bambamc/BamBam_BgzfCompressor.h>
+
+static const char BamBam_GzipHeaderData[18] =
+{
+ (char)BAMBAMC_LZ_ID1, (char)BAMBAMC_LZ_ID2, BAMBAMC_LZ_CM, BAMBAMC_LZ_FEXTRA,
+ 0,0,0,0, /* modification time, not given */
+ 0, /* XFL */
+ (char)BAMBAMC_LZ_OS_UNKNOWN,
+ 6,0, /* XLEN, number 6 as little endian */
+ 'B', 'C', /* extra data block tag */
+ 2,0, /* length of extra field, 2 as little endian */
+ 0,0 /* extra field: blocksize-1 including block header and footer, to be filled */
+};
+
+static unsigned int const BAMBAMC_LZ_HEADERSIZE = sizeof(BamBam_GzipHeaderData);
+static unsigned int const BAMBAMC_LZ_FOOTERSIZE = 8;
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+static int BamBam_BgzfCompressor_FlushInternal(BamBam_BgzfCompressor * object, int level)
+{
+ uint32_t insize = object->inbufferfill;
+ z_stream strm;
+
+ do
+ {
+ int zret = -1;
+
+ memset ( &strm , 0, sizeof(z_stream) );
+ strm.zalloc = Z_NULL;
+ strm.zfree = Z_NULL;
+ strm.opaque = Z_NULL;
+ zret = deflateInit2(&strm, level, Z_DEFLATED, -15 /* window size */,
+ 8 /* mem level, gzip default */, Z_DEFAULT_STRATEGY);
+
+ if ( zret != Z_OK )
+ return -1;
+
+ strm.avail_in = insize;
+ strm.next_in = object->inbuffer;
+ strm.avail_out = BAMBAMC_LZ_MAXBUFSIZE - (BAMBAMC_LZ_HEADERSIZE+BAMBAMC_LZ_FOOTERSIZE);
+ strm.next_out = object->outbuffer + BAMBAMC_LZ_HEADERSIZE;
+
+ zret = deflate(&strm,Z_FINISH);
+
+ /* everything compressed, write block to file */
+ if ( zret == Z_STREAM_END )
+ {
+ /* size of compressed data */
+ uint32_t const payloadsize = (BAMBAMC_LZ_MAXBUFSIZE - (BAMBAMC_LZ_HEADERSIZE+BAMBAMC_LZ_FOOTERSIZE)) - strm.avail_out;
+ /* blocksize stored in extended header */
+ uint16_t const headblocksize = sizeof(BamBam_GzipHeaderData)/*header*/+8/*footer*/+payloadsize-1;
+ /* remaining uncompressed bytes */
+ uint32_t const rembytes = object->inbufferfill - insize;
+ /* pointer to uncompressed rest */
+ Bytef const * remp = object->inbuffer + insize;
+ /* empty crc */
+ uint32_t crc = crc32(0,0,0);
+ /* nextout ptr */
+ Bytef * nextout = strm.next_out;
+ /* total size of compressed block */
+ int32_t compsize = -1;
+
+ deflateEnd(&strm);
+
+ /* copy header into its place */
+ memcpy(object->outbuffer,BamBam_GzipHeaderData,sizeof(BamBam_GzipHeaderData));
+
+ /* put block size (2 byte little endian) */
+ object->outbuffer[16] = (headblocksize >> 0) & 0xFFu;
+ object->outbuffer[17] = (headblocksize >> 8) & 0xFFu;
+
+ /* compute crc */
+ crc = crc32(crc, object->inbuffer, insize);
+
+ /* put crc */
+ *(nextout++) = (crc >> 0) & 0xFFu;
+ *(nextout++) = (crc >> 8) & 0xFFu;
+ *(nextout++) = (crc >> 16) & 0xFFu;
+ *(nextout++) = (crc >> 24) & 0xFFu;
+
+ /* put uncompressed size */
+ *(nextout++) = (insize >> 0) & 0xFFu;
+ *(nextout++) = (insize >> 8) & 0xFFu;
+ *(nextout++) = (insize >> 16) & 0xFFu;
+ *(nextout++) = (insize >> 24) & 0xFFu;
+
+ /* write block */
+ compsize = nextout-object->outbuffer;
+ if ( fwrite(object->outbuffer,compsize,1,object->file) != 1 )
+ return -1;
+
+ /* move uncompressed rest (if any) to start of buffer */
+ if ( rembytes )
+ memmove(object->inbuffer,remp,rembytes);
+ object->inbufferfill = rembytes;
+
+ return compsize;
+ }
+ /* output buffer too small for compressed stream, reduce size of input (and try again) */
+ else if ( zret == Z_OK )
+ {
+ fprintf(stderr,"Bad compression, reducing.\n");
+ deflateEnd(&strm);
+ insize -= ((insize >= 1024) ? 1024 : insize);
+ }
+ /* other zlib error, give up */
+ else
+ {
+ deflateEnd(&strm);
+ return -1;
+ }
+ } while ( insize );
+
+ return -1;
+}
+
+int BamBam_BgzfCompressor_Flush(BamBam_BgzfCompressor * object)
+{
+ while ( object->inbufferfill )
+ {
+ int const r = BamBam_BgzfCompressor_FlushInternal(object,object->level);
+
+ if ( r < 0 )
+ return -1;
+ }
+
+ return 0;
+}
+
+int BamBam_BgzfCompressor_Terminate(BamBam_BgzfCompressor * object)
+{
+ int r = -1;
+ /* flush */
+ r = BamBam_BgzfCompressor_Flush(object);
+
+ if ( r < 0 )
+ return -1;
+
+ /* write empty block, default compression (EOF block) */
+ r = BamBam_BgzfCompressor_FlushInternal(object,Z_DEFAULT_COMPRESSION);
+
+ if ( r < 0 )
+ return -1;
+
+ /* flush the underlying file/stream */
+ if ( fflush(object->file) != 0 )
+ return -1;
+
+ return 0;
+}
+
+int BamBam_BgzfCompressor_Write(BamBam_BgzfCompressor * object, uint8_t const * data, uint32_t len)
+{
+ /* loop until all data is in buffer */
+ while ( len )
+ {
+ uint32_t const space = BAMBAMC_LZ_MAXBUFSIZE - object->inbufferfill;
+ uint32_t const towrite = (len <= space) ? len : space;
+
+ /* if there is no space, then try to flush out some data */
+ if ( ! space )
+ {
+ int const r = BamBam_BgzfCompressor_FlushInternal(object,object->level);
+ if ( r < 0 )
+ return -1;
+ }
+ else
+ {
+ assert ( towrite );
+ /* copy data */
+ memcpy(object->inbuffer+object->inbufferfill,data,towrite);
+ /* update counters */
+ object->inbufferfill += towrite;
+ data += towrite;
+ len -= towrite;
+ }
+ }
+
+ return 0;
+}
+
+
+static BamBam_BgzfCompressor * BamBam_BgzfCompressor_Setup(BamBam_BgzfCompressor * object, int const level)
+{
+ object->level = level;
+
+ object->inbuffer = (Bytef *)malloc(BAMBAMC_LZ_MAXBUFSIZE);
+ if ( ! object->inbuffer )
+ return BamBam_BgzfCompressor_Delete(object);
+
+ object->outbuffer = (Bytef *)malloc(BAMBAMC_LZ_MAXBUFSIZE);
+ if ( ! object->outbuffer )
+ return BamBam_BgzfCompressor_Delete(object);
+
+ object->inbufferfill = 0;
+
+ return object;
+}
+
+BamBam_BgzfCompressor * BamBam_BgzfCompressor_New(char const * filename, int const level)
+{
+ if ( strcmp(filename,"-") == 0 )
+ return BamBam_BgzfCompressor_NewFP(stdout,level);
+ else
+ return BamBam_BgzfCompressor_NewFilename(filename,level);
+}
+
+BamBam_BgzfCompressor * BamBam_BgzfCompressor_NewFilename(char const * filename, int const level)
+{
+ BamBam_BgzfCompressor * object = 0;
+
+ object = (BamBam_BgzfCompressor *)malloc(sizeof(BamBam_BgzfCompressor));
+
+ if ( ! object )
+ return BamBam_BgzfCompressor_Delete(object);
+
+ memset(object,0,sizeof(BamBam_BgzfCompressor));
+
+ object->closefile = fopen(filename,"wb");
+
+ if ( ! object->closefile )
+ return BamBam_BgzfCompressor_Delete(object);
+
+ object->file = object->closefile;
+
+ return BamBam_BgzfCompressor_Setup(object,level);
+}
+BamBam_BgzfCompressor * BamBam_BgzfCompressor_NewFP(FILE * rfile, int const level)
+{
+ BamBam_BgzfCompressor * object = 0;
+
+ object = (BamBam_BgzfCompressor *)malloc(sizeof(BamBam_BgzfCompressor));
+
+ if ( ! object )
+ return BamBam_BgzfCompressor_Delete(object);
+
+ memset(object,0,sizeof(BamBam_BgzfCompressor));
+
+ object->file = rfile;
+
+ if ( ! object->file )
+ return BamBam_BgzfCompressor_Delete(object);
+
+ return BamBam_BgzfCompressor_Setup(object,level);
+}
+BamBam_BgzfCompressor * BamBam_BgzfCompressor_Delete(BamBam_BgzfCompressor * object)
+{
+ if ( object )
+ {
+ if ( object->file )
+ {
+ fflush(object->file);
+ object->file = 0;
+ }
+ if ( object->closefile )
+ {
+ fclose(object->closefile);
+ object->closefile = 0;
+ }
+ if ( object->inbuffer )
+ {
+ free(object->inbuffer);
+ object->inbuffer = 0;
+ }
+ if ( object->outbuffer )
+ {
+ free(object->outbuffer);
+ object->outbuffer = 0;
+ }
+ free(object);
+ }
+
+ return 0;
+}
+int BamBam_BgzfCompressor_PutInt32(BamBam_BgzfCompressor * object, int32_t const v)
+{
+ uint8_t data[4] = { (v >> 0) & 0xFFu, (v >> 8) & 0xFFu, (v >> 16) & 0xFFu, (v >> 24) & 0xFFu };
+ return BamBam_BgzfCompressor_Write(object,&data[0],4);
+}
+int BamBam_BgzfCompressor_PutStringZ(BamBam_BgzfCompressor * object, char const * c)
+{
+ uint32_t const len = strlen(c);
+ return BamBam_BgzfCompressor_Write(object,(uint8_t const *)c,len+1);
+}
+int BamBam_BgzfCompressor_PutLenStringZ(BamBam_BgzfCompressor * object, char const * c)
+{
+ uint32_t const len = strlen(c);
+ int r = -1;
+
+ /* put length */
+ r = BamBam_BgzfCompressor_PutInt32(object,len+1);
+
+ if ( r < 0 )
+ return -1;
+
+ /* put string */
+ r = BamBam_BgzfCompressor_PutStringZ(object,c);
+
+ if ( r < 0 )
+ return -1;
+
+ return 0;
+}
diff --git a/src/bambamc/BamBam_BgzfCompressor.h b/src/bambamc/BamBam_BgzfCompressor.h
new file mode 100644
index 0000000..b8434f3
--- /dev/null
+++ b/src/bambamc/BamBam_BgzfCompressor.h
@@ -0,0 +1,72 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#if ! defined(BAMBAMC_BGZFCOMPRESSOR_H)
+#define BAMBAMC_BGZFCOMPRESSOR_H
+
+#include <bambamc/BamBam_Unused.h>
+#include <stdio.h>
+#include <zlib.h>
+#include <stdint.h>
+
+/* gzip/zlib header flags */
+enum bambamc_gzipheader_flags {
+ BAMBAMC_LZ_FTEXT = (1u << 0),
+ BAMBAMC_LZ_FHCRC = (1u << 1),
+ BAMBAMC_LZ_FEXTRA = (1u << 2),
+ BAMBAMC_LZ_FNAME = (1u << 3),
+ BAMBAMC_LZ_FCOMMENT = (1u << 4),
+ BAMBAMC_LZ_FRES0 = (1u<<5),
+ BAMBAMC_LZ_FRES1 = (1u<<6),
+ BAMBAMC_LZ_FRES2 = (1u<<7),
+ BAMBAMC_LZ_FRES = ((1u<<5)|(1u<<6)|(1u<<7))
+};
+
+/* gzip header id */
+#define BAMBAMC_LZ_ID1 0x1F
+#define BAMBAMC_LZ_ID2 0x8B
+/* compression method */
+#define BAMBAMC_LZ_CM 8 /* compression method: deflate */
+/* unknown operation system id */
+#define BAMBAMC_LZ_OS_UNKNOWN 255
+/* maximum buffer size */
+#define BAMBAMC_LZ_MAXBUFSIZE (64*1024)
+
+typedef struct _BamBam_BgzfCompressor
+{
+ FILE * file;
+ FILE * closefile;
+
+ int level;
+
+ Bytef * inbuffer;
+ uint32_t inbufferfill;
+ Bytef * outbuffer;
+} BamBam_BgzfCompressor;
+
+extern BamBam_BgzfCompressor * BamBam_BgzfCompressor_New(char const * filename, int const level) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern BamBam_BgzfCompressor * BamBam_BgzfCompressor_NewFilename(char const * filename, int const level) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern BamBam_BgzfCompressor * BamBam_BgzfCompressor_NewFP(FILE * rfile, int const level) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern BamBam_BgzfCompressor * BamBam_BgzfCompressor_Delete(BamBam_BgzfCompressor * object);
+extern int BamBam_BgzfCompressor_Flush(BamBam_BgzfCompressor * object) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern int BamBam_BgzfCompressor_Write(BamBam_BgzfCompressor * object, uint8_t const * data, uint32_t len) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern int BamBam_BgzfCompressor_PutInt32(BamBam_BgzfCompressor * object, int32_t const v) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern int BamBam_BgzfCompressor_PutStringZ(BamBam_BgzfCompressor * object, char const * const c) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern int BamBam_BgzfCompressor_PutLenStringZ(BamBam_BgzfCompressor * object, char const * const c) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern int BamBam_BgzfCompressor_Terminate(BamBam_BgzfCompressor * object) BAMBAM_WARN_IF_UNUSEDRESULT;
+#endif
diff --git a/src/bambamc/BamBam_CharBuffer.c b/src/bambamc/BamBam_CharBuffer.c
new file mode 100644
index 0000000..e4a0e61
--- /dev/null
+++ b/src/bambamc/BamBam_CharBuffer.c
@@ -0,0 +1,112 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#include <bambamc/BamBam_CharBuffer.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+BamBam_CharBuffer * BamBam_CharBuffer_Clone(BamBam_CharBuffer const * o)
+{
+ BamBam_CharBuffer * charbuffer = BamBam_CharBuffer_New();
+
+ if ( ! charbuffer )
+ return 0;
+
+ charbuffer->buffer = (uint8_t *)malloc(o->buffersize);
+
+ if ( ! charbuffer->buffer )
+ {
+ BamBam_CharBuffer_Delete(charbuffer);
+ return 0;
+ }
+
+ memcpy(charbuffer->buffer,o->buffer,o->buffersize);
+ charbuffer->buffersize = o->buffersize;
+ charbuffer->bufferfill = o->bufferfill;
+
+ return charbuffer;
+}
+
+BamBam_CharBuffer * BamBam_CharBuffer_New()
+{
+ BamBam_CharBuffer * charbuffer = 0;
+
+ charbuffer = (BamBam_CharBuffer *)malloc(sizeof(BamBam_CharBuffer));
+
+ if ( ! charbuffer )
+ return 0;
+
+ memset(charbuffer,0,sizeof(BamBam_CharBuffer));
+
+ return charbuffer;
+}
+
+void BamBam_CharBuffer_Reset(BamBam_CharBuffer * buffer)
+{
+ buffer->bufferfill = 0;
+}
+
+int BamBam_CharBuffer_PushChar(BamBam_CharBuffer * buffer, uint8_t c)
+{
+ if ( buffer->bufferfill == buffer->buffersize )
+ {
+ uint64_t newbuffersize = (buffer->buffersize) ? (2*buffer->buffersize) : 1;
+ uint8_t * newbuffer = 0;
+
+ newbuffer = (uint8_t *)malloc(newbuffersize*sizeof(uint8_t));
+
+ if ( ! newbuffer )
+ return -1;
+
+ memcpy(newbuffer,buffer->buffer,buffer->bufferfill);
+
+ free(buffer->buffer);
+ buffer->buffer = newbuffer;
+ buffer->buffersize = newbuffersize;
+ }
+
+ assert ( buffer->bufferfill < buffer->buffersize );
+
+ buffer->buffer [ buffer->bufferfill ++ ] = c;
+
+ return 0;
+}
+
+void BamBam_CharBuffer_Delete(BamBam_CharBuffer * buffer)
+{
+ if ( buffer )
+ {
+ free(buffer->buffer);
+ free(buffer);
+ }
+}
+
+int BamBam_MapBase(uint8_t const c)
+{
+ switch ( c )
+ {
+ case 'A': return 0;
+ case 'C': return 1;
+ case 'G': return 2;
+ case 'T': return 3;
+ default: return 4;
+ }
+}
+
diff --git a/src/bambamc/BamBam_CharBuffer.h b/src/bambamc/BamBam_CharBuffer.h
new file mode 100644
index 0000000..837bcc6
--- /dev/null
+++ b/src/bambamc/BamBam_CharBuffer.h
@@ -0,0 +1,105 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#if ! defined(BAMBAM_CHARBUFFER_H)
+#define BAMBAM_CHARBUFFER_H
+
+#include <stdint.h>
+
+#if defined(_linux)
+#include <alloca.h>
+#endif
+
+#include <stdlib.h>
+
+typedef struct _BamBam_CharBuffer
+{
+ uint8_t * buffer;
+ uint64_t buffersize;
+ uint64_t bufferfill;
+} BamBam_CharBuffer;
+
+#include <bambamc/BamBam_Unused.h>
+
+extern BamBam_CharBuffer * BamBam_CharBuffer_New() BAMBAM_WARN_IF_UNUSEDRESULT;
+extern int BamBam_CharBuffer_PushChar(BamBam_CharBuffer * buffer, uint8_t c) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern void BamBam_CharBuffer_Reset(BamBam_CharBuffer * buffer);
+extern void BamBam_CharBuffer_Delete(BamBam_CharBuffer * buffer);
+extern int BamBam_MapBase(uint8_t const c);
+extern BamBam_CharBuffer * BamBam_CharBuffer_Clone(BamBam_CharBuffer const * o) BAMBAM_WARN_IF_UNUSEDRESULT;
+
+#define BamBam_CharBuffer_PushCharQuick(buffer,c,ret) \
+ do { \
+ if (buffer->bufferfill < buffer->buffersize) \
+ { \
+ buffer->buffer[buffer->bufferfill++] = c; \
+ } \
+ else \
+ ret = BamBam_CharBuffer_PushChar(buffer,c); \
+ } while(0);
+
+#define BamBam_CharBuffer_PushLE(buffer,n,ret) \
+ do { \
+ unsigned int i; \
+ for ( i = 0; i < 8*sizeof(n); i += 8 ) \
+ { \
+ BamBam_CharBuffer_PushCharQuick(buffer,(uint8_t)(((uint8_t)(n >> i))&0xFFu),ret); \
+ if ( ret < 0 ) \
+ { \
+ ret = -1; \
+ break; \
+ } \
+ } \
+ } while(0);
+
+#define BamBam_CharBuffer_PushString(buffer,s,ret) \
+ do { \
+ char const * c = s; \
+ while ( *c ) \
+ { \
+ BamBam_CharBuffer_PushCharQuick(buffer,*(c++),ret); \
+ if ( ret < 0 ) \
+ { \
+ break; \
+ } \
+ } \
+ } while(0);
+
+#define BamBam_CharBuffer_PushNumber(buffer,n,ret) \
+ do { \
+ uint64_t tn = n; \
+ unsigned int numlen = 0; \
+ unsigned int i = 0; \
+ char * c = 0; \
+ if ( !tn ) numlen = 1; \
+ while ( tn ) \
+ tn /= 10, numlen++; \
+ c = (char *)alloca(numlen); \
+ tn = n; \
+ while ( i < numlen ) \
+ { \
+ c[numlen-i-1] = (tn % 10)+'0'; \
+ i++; \
+ tn /= 10; \
+ } \
+ for ( i = 0; ret >= 0 && i < numlen; ++i ) \
+ ret = BamBam_CharBuffer_PushChar(buffer,c[i]); \
+ } while(0);
+#endif
+
diff --git a/src/bambamc/BamBam_Chromosome.c b/src/bambamc/BamBam_Chromosome.c
new file mode 100644
index 0000000..0c6fe70
--- /dev/null
+++ b/src/bambamc/BamBam_Chromosome.c
@@ -0,0 +1,55 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#include <bambamc/BamBam_Chromosome.h>
+#include <bambamc/BamBam_StrDup.h>
+#include <stdlib.h>
+#include <string.h>
+
+BamBam_Chromosome * BamBam_Chromosome_New(char const * rname, uint64_t const rlength)
+{
+ BamBam_Chromosome * chr = (BamBam_Chromosome *)malloc(sizeof(BamBam_Chromosome));
+
+ if ( ! chr )
+ return 0;
+
+ memset(chr,0,sizeof(BamBam_Chromosome));
+
+ chr->name = BamBam_StrDup(rname);
+
+ if ( ! chr->name )
+ {
+ free(chr);
+ return 0;
+ }
+
+ chr->length = rlength;
+
+ return chr;
+}
+
+void BamBam_Chromosome_Delete(BamBam_Chromosome * chr)
+{
+ if ( chr )
+ {
+ free(chr->headerline);
+ free(chr->name);
+ free(chr);
+ }
+}
diff --git a/src/bambamc/BamBam_Chromosome.h b/src/bambamc/BamBam_Chromosome.h
new file mode 100644
index 0000000..564a28f
--- /dev/null
+++ b/src/bambamc/BamBam_Chromosome.h
@@ -0,0 +1,35 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#if ! defined(BAMBAM_CHROMOSOME_H)
+#define BAMBAM_CHROMOSOME_H
+
+#include <bambamc/BamBam_Unused.h>
+#include <stdint.h>
+
+typedef struct _BamBam_Chromosome
+{
+ char * name;
+ uint64_t length;
+ char * headerline;
+} BamBam_Chromosome;
+
+BamBam_Chromosome * BamBam_Chromosome_New(char const * rname, uint64_t const rlength) BAMBAM_WARN_IF_UNUSEDRESULT;
+void BamBam_Chromosome_Delete(BamBam_Chromosome * chr);
+#endif
diff --git a/src/bambamc/BamBam_CollatorState.h b/src/bambamc/BamBam_CollatorState.h
new file mode 100644
index 0000000..7e1fb2f
--- /dev/null
+++ b/src/bambamc/BamBam_CollatorState.h
@@ -0,0 +1,30 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#if ! defined(BAMBAM_COLLATORSTATE_H)
+#define BAMBAM_COLLATORSTATE_H
+
+enum BamBam_CollatorState
+{
+ BAMBAM_COLLATE_READING_INPUT,
+ BAMBAM_COLLATE_MERGING,
+ BAMBAM_COLLATE_DONE,
+ BAMBAM_COLLATE_FAILED
+};
+#endif
diff --git a/src/bambamc/BamBam_Config.h.in b/src/bambamc/BamBam_Config.h.in
new file mode 100644
index 0000000..35410b2
--- /dev/null
+++ b/src/bambamc/BamBam_Config.h.in
@@ -0,0 +1,24 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#if ! defined(BAMBAMC_CONFIG_H)
+#define BAMBAMC_CONFIG_H
+
+ at HAVEWINDOWS_MKDIR@
+
+#endif
diff --git a/src/bambamc/BamBam_FastQRead.h b/src/bambamc/BamBam_FastQRead.h
new file mode 100644
index 0000000..7535ea6
--- /dev/null
+++ b/src/bambamc/BamBam_FastQRead.h
@@ -0,0 +1,33 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#if ! defined(FASTQREAD_H)
+#define FASTQREAD_H
+
+enum alignment_type { BAMBAM_ALIGNMENT_TYPE_UNDEFINED = -2, BAMBAM_ALIGNMENT_TYPE_NONE = -1, BAMBAM_ALIGNMENT_TYPE_SINGLE = 1, BAMBAM_ALIGNMENT_TYPE_COMPLETE_PAIR = 2, BAMBAM_ALIGNMENT_TYPE_ORPHAN1_PAIR = 3, BAMBAM_ALIGNMENT_TYPE_ORPHAN2_PAIR = 4 };
+
+typedef struct _BamBam_FastQRead
+{
+ char const * name;
+ unsigned int namelength;
+ char const * seq;
+ unsigned int seqlength;
+ char const * qual;
+} BamBam_FastQRead;
+#endif
diff --git a/src/bambamc/BamBam_FormatAlignment.c b/src/bambamc/BamBam_FormatAlignment.c
new file mode 100644
index 0000000..83922bd
--- /dev/null
+++ b/src/bambamc/BamBam_FormatAlignment.c
@@ -0,0 +1,166 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#include <bambamc/BamBam_FormatAlignment.h>
+#include <bambamc/BamBam_BamSingleAlignment.h>
+#include <bambamc/BamBam_BamFlagBase.h>
+#include <ctype.h>
+
+static uint64_t BamBam_GetFastqNameLineLength(unsigned int const qnamelen, uint32_t const flags)
+{
+ return 1 /* @ */ + qnamelen + (( flags & BAMBAMC_FPAIRED ) ? 2 : 0) /* /[12] */ + 1 /* \n */;
+}
+static uint64_t BamBam_GetFastqSeqLineLength(unsigned int const seqlen)
+{
+ return seqlen + 1;
+}
+static uint64_t BamBam_GetFastqPlusLineLength()
+{
+ return 1 /* + */ + 1 /* \n */;
+}
+static uint64_t BamBam_GetFastqQualLineLength(unsigned int const seqlen)
+{
+ return seqlen + 1;
+}
+
+static uint64_t BamBam_GetFastqEntryLength(unsigned int const qnamelen, unsigned int const seqlen, uint32_t const flags)
+{
+ return
+ BamBam_GetFastqNameLineLength(qnamelen,flags) +
+ BamBam_GetFastqSeqLineLength(seqlen) +
+ BamBam_GetFastqPlusLineLength(qnamelen) +
+ BamBam_GetFastqQualLineLength(seqlen);
+}
+/**
+ * put fastq @ line
+ **/
+static char * BamBam_PutAtLine(
+ char const * qname,
+ unsigned int const qnamelen,
+ uint32_t const flags,
+ char * opc,
+ char const term
+)
+{
+ static int spaceTable[256];
+ static int spaceTableInitialized = 0;
+
+ char const * qnamee = qname+qnamelen;
+
+ if ( ! spaceTableInitialized )
+ {
+ int i;
+ for ( i = 0; i < 256; ++i )
+ spaceTable[i] = isspace(i);
+ spaceTableInitialized = 1;
+ }
+
+ *(opc++) = '@';
+
+ /* paired? add /1 or /2 before first space or at end of line */
+ if ( flags & BAMBAMC_FPAIRED )
+ {
+ while ( qname != qnamee && !spaceTable[(int)((uint8_t)(*qname))] )
+ *(opc++) = *(qname++);
+
+ *(opc++) = '/';
+ if ( flags & BAMBAMC_FREAD1 )
+ *(opc++) = '1';
+ else
+ *(opc++) = '2';
+
+ while ( qname != qnamee )
+ *(opc++) = *(qname++);
+ }
+ else
+ {
+ while ( qname != qnamee )
+ *(opc++) = *(qname++);
+ }
+
+
+ *(opc++) = term;
+
+ return opc;
+}
+static char * BamBam_PutPlusLine(char * opc, char const term)
+{
+ *opc++ = '+';
+ *opc++ = term;
+ return opc;
+}
+static char * BamBam_PutAlignmentFastQ(BamBam_BamSingleAlignment const * alignment, char * opc, char const term)
+{
+ char const * qname = BamBam_BamSingleAlignment_GetReadName(alignment);
+ uint32_t const flags = BamBam_BamSingleAlignment_GetFlags(alignment);
+ int32_t const seqlen = BamBam_BamSingleAlignment_GetLSeq(alignment);
+
+ /* @ line */
+ opc = BamBam_PutAtLine(qname,strlen(qname),flags,opc,term);
+ /* seq line */
+ memcpy(opc,alignment->query,seqlen); opc += seqlen; *(opc++) = term;
+ /* + line */
+ opc = BamBam_PutPlusLine(opc,term);
+ /* quality */
+ memcpy(opc,alignment->qual,seqlen); opc += seqlen; *(opc++) = term;
+
+ return opc;
+}
+static uint64_t BamBam_GetFastQAlignmentLength(BamBam_BamSingleAlignment const * alignment)
+{
+ int32_t const seqlen = BamBam_BamSingleAlignment_GetLSeq(alignment);
+ uint32_t const flags = BamBam_BamSingleAlignment_GetFlags(alignment);
+ char const * qname = BamBam_BamSingleAlignment_GetReadName(alignment);
+ unsigned int const qnamelen = strlen(qname);
+ return BamBam_GetFastqEntryLength(qnamelen, seqlen, flags);
+}
+int BamBam_PutAlignmentFastQBuffer(
+ BamBam_BamSingleAlignment * alignment,
+ char ** buffer,
+ unsigned int * bufferlen,
+ char const term
+)
+{
+ unsigned int const neededlength = BamBam_GetFastQAlignmentLength(alignment);
+ char * endptr = 0;
+
+ if ( neededlength > *bufferlen )
+ {
+ free(*buffer);
+ *buffer = 0;
+ *bufferlen = 0;
+
+ *buffer = (char *)malloc(neededlength);
+
+ if ( ! *buffer )
+ return -1;
+
+ *bufferlen = neededlength;
+ }
+
+ if ( BamBam_BamSingleAlignment_DecodeQuery(alignment) < 0 )
+ return -1;
+ if ( BamBam_BamSingleAlignment_DecodeQual(alignment) < 0 )
+ return -1;
+
+ endptr = BamBam_PutAlignmentFastQ(alignment,*buffer,term);
+
+ return endptr - *buffer;
+}
+
diff --git a/src/bambamc/BamBam_FormatAlignment.h b/src/bambamc/BamBam_FormatAlignment.h
new file mode 100644
index 0000000..ee0e6c2
--- /dev/null
+++ b/src/bambamc/BamBam_FormatAlignment.h
@@ -0,0 +1,29 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#if ! defined(BAMBAM_FORMATALIGNMENT_H)
+#define BAMBAM_FORMATALIGNMENT_H
+
+#include <bambamc/BamBam_Unused.h>
+#include <bambamc/BamBam_BamSingleAlignment.h>
+#include <bambamc/BamBam_Config.h>
+#include <stdint.h>
+
+extern int BamBam_PutAlignmentFastQBuffer(BamBam_BamSingleAlignment * alignment, char ** buffer, unsigned int * bufferlen, char const term);
+#endif
diff --git a/src/bambamc/BamBam_FormatNumber.c b/src/bambamc/BamBam_FormatNumber.c
new file mode 100644
index 0000000..4894de5
--- /dev/null
+++ b/src/bambamc/BamBam_FormatNumber.c
@@ -0,0 +1,140 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#include <bambamc/BamBam_FormatNumber.h>
+#include <bambamc/BamBam_StrDup.h>
+#include <stdio.h>
+
+#if defined(_linux)
+#include <alloca.h>
+#endif
+
+#include <stdlib.h>
+
+char * BamBam_ConstructNumberFormatString(int numlen)
+{
+ int lennumlen = 0;
+ int tnumlen = 0;
+ unsigned int fslen = 0;
+ char * fs = 0;
+
+ tnumlen = numlen;
+ while ( tnumlen )
+ {
+ tnumlen /= 10;
+ lennumlen++;
+ }
+
+ fslen = 6 + lennumlen + 1;
+
+ fs = (char *)malloc(fslen);
+
+ if ( ! fs )
+ return 0;
+
+ snprintf(fs,fslen,"%%0%dllu",numlen);
+
+ return fs;
+}
+
+char * BamBam_FormatNumberMinLenAlt(uint64_t const num, int const minlen)
+{
+ /* length of number in decimal repr */
+ int numlen = 0;
+ /* tmp */
+ uint64_t tnum;
+ /* string on stack */
+ char * s = 0;
+ char * t = 0;
+
+ if ( !num )
+ numlen = 1;
+ else
+ {
+ /* count number of digits */
+ tnum = num;
+ while ( tnum )
+ {
+ tnum /= 10;
+ numlen++;
+ }
+ }
+
+ numlen = (numlen >= minlen) ? numlen : minlen;
+
+ s = (char *)alloca(numlen+1);
+ s[numlen] = 0;
+
+ t = s + numlen;
+ tnum = num;
+
+ while ( t != s )
+ {
+ *(--t) = '0' + (tnum%10);
+ tnum /= 10;
+ }
+
+ return BamBam_StrDup(s);
+}
+
+char * BamBam_FormatNumberMinLen(uint64_t const num, int minlen)
+{
+ uint64_t tnum;
+ int numlen;
+ char * smem = 0;
+ char * fs = 0;
+
+ tnum = num;
+ numlen = 0;
+
+ while ( tnum )
+ {
+ tnum /= 10;
+ numlen++;
+ }
+
+ if ( ! numlen )
+ numlen = 1;
+
+ numlen = (numlen >= minlen) ? numlen : minlen;
+
+ smem = (char *)malloc(numlen+1);
+
+ if ( ! smem )
+ return 0;
+
+ fs = BamBam_ConstructNumberFormatString(numlen);
+
+ if ( ! fs )
+ {
+ free(smem);
+ return 0;
+ }
+
+ snprintf(smem,numlen+1,fs,(unsigned long long)num);
+
+ free(fs);
+
+ return smem;
+}
+
+char * BamBam_FormatNumber(uint64_t const num)
+{
+ return BamBam_FormatNumberMinLen(num,-1);
+}
diff --git a/src/bambamc/BamBam_FormatNumber.h b/src/bambamc/BamBam_FormatNumber.h
new file mode 100644
index 0000000..c969bfe
--- /dev/null
+++ b/src/bambamc/BamBam_FormatNumber.h
@@ -0,0 +1,31 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#if ! defined(BAMBAM_FORMATNUMBER_H)
+#define BAMBAM_FORMATNUMBER_H
+
+#include <bambamc/BamBam_Unused.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+
+extern char * BamBam_ConstructNumberFormatString(int numlen) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern char * BamBam_FormatNumberMinLen(uint64_t const num, int minlen) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern char * BamBam_FormatNumber(uint64_t const num) BAMBAM_WARN_IF_UNUSEDRESULT;
+#endif
diff --git a/src/bambamc/BamBam_GzipFileDecoder.c b/src/bambamc/BamBam_GzipFileDecoder.c
new file mode 100644
index 0000000..2b5c597
--- /dev/null
+++ b/src/bambamc/BamBam_GzipFileDecoder.c
@@ -0,0 +1,70 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#include <bambamc/BamBam_GzipFileDecoder.h>
+
+BamBam_BamSingleAlignment * BamBam_GzipFileDecoder_DecodeAlignment(BamBam_GzipFileDecoder * object)
+{
+ int const r = BamBam_BamSingleAlignment_LoadAlignment(object->alignment,object->reader);
+
+ if ( r > 0 )
+ return object->alignment;
+ else
+ return 0;
+}
+
+BamBam_GzipFileDecoder * BamBam_GzipFileDecoder_Delete(BamBam_GzipFileDecoder * object)
+{
+ if ( object )
+ {
+ if ( object->alignment )
+ {
+ BamBam_BamSingleAlignment_Delete(object->alignment);
+ object->alignment = 0;
+ }
+ if ( object->reader )
+ {
+ BamBam_GzipReader_Delete(object->reader);
+ object->reader = 0;
+ }
+ free(object);
+ }
+ return 0;
+}
+
+BamBam_GzipFileDecoder * BamBam_GzipFileDecoder_New(char const * filename)
+{
+ BamBam_GzipFileDecoder * object = 0;
+
+ object = (BamBam_GzipFileDecoder *)malloc(sizeof(BamBam_GzipFileDecoder));
+
+ if ( ! object )
+ return BamBam_GzipFileDecoder_Delete(object);
+
+ object->reader = BamBam_GzipReader_New(filename);
+
+ if ( ! object->reader )
+ return BamBam_GzipFileDecoder_Delete(object);
+
+ object->alignment = BamBam_BamSingleAlignment_New();
+
+ if ( ! object->alignment )
+ return BamBam_GzipFileDecoder_Delete(object);
+
+ return object;
+}
diff --git a/src/bambamc/BamBam_GzipFileDecoder.h b/src/bambamc/BamBam_GzipFileDecoder.h
new file mode 100644
index 0000000..2fb0961
--- /dev/null
+++ b/src/bambamc/BamBam_GzipFileDecoder.h
@@ -0,0 +1,34 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#if ! defined(BAMBAM_GZIPFILEDECODER_H)
+#define BAMBAM_GZIPFILEDECODER_H
+
+#include <bambamc/BamBam_BamSingleAlignment.h>
+#include <bambamc/BamBam_BamFlagBase.h>
+
+typedef struct _BamBam_GzipFileDecoder
+{
+ BamBam_GzipReader * reader;
+ BamBam_BamSingleAlignment * alignment;
+} BamBam_GzipFileDecoder;
+
+extern BamBam_BamSingleAlignment * BamBam_GzipFileDecoder_DecodeAlignment(BamBam_GzipFileDecoder * object);
+extern BamBam_GzipFileDecoder * BamBam_GzipFileDecoder_Delete(BamBam_GzipFileDecoder * object);
+extern BamBam_GzipFileDecoder * BamBam_GzipFileDecoder_New(char const * filename);
+#endif
diff --git a/src/bambamc/BamBam_GzipReader.c b/src/bambamc/BamBam_GzipReader.c
new file mode 100644
index 0000000..8cb9ed6
--- /dev/null
+++ b/src/bambamc/BamBam_GzipReader.c
@@ -0,0 +1,141 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#include <bambamc/BamBam_GzipReader.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+BamBam_GzipReader * BamBam_GzipReader_New_Filename(char const * filename)
+{
+ BamBam_GzipReader * object = 0;
+
+ object = (BamBam_GzipReader *)malloc(sizeof(BamBam_GzipReader));
+
+ if ( ! object )
+ return BamBam_GzipReader_Delete(object);
+
+ memset(object,0,sizeof(BamBam_GzipReader));
+
+ object->file = gzopen(filename,"rb");
+
+ if ( ! object->file )
+ return BamBam_GzipReader_Delete(object);
+
+ return object;
+}
+
+BamBam_GzipReader * BamBam_GzipReader_New_Fd(int fd)
+{
+ BamBam_GzipReader * object = 0;
+
+ object = (BamBam_GzipReader *)malloc(sizeof(BamBam_GzipReader));
+
+ if ( ! object )
+ return BamBam_GzipReader_Delete(object);
+
+ memset(object,0,sizeof(BamBam_GzipReader));
+
+ object->file = gzdopen(fd,"rb");
+
+ if ( ! object->file )
+ return BamBam_GzipReader_Delete(object);
+
+ return object;
+}
+
+BamBam_GzipReader * BamBam_GzipReader_New(char const * filename)
+{
+ if ( !filename || !strcmp(filename,"-") )
+ return BamBam_GzipReader_New_Fd(STDIN_FILENO);
+ else
+ return BamBam_GzipReader_New_Filename(filename);
+}
+
+BamBam_GzipReader * BamBam_GzipReader_Delete(BamBam_GzipReader * object)
+{
+ if ( object )
+ {
+ if ( object->file )
+ {
+ gzclose(object->file);
+ object->file = 0;
+ }
+ free(object);
+ }
+
+ return 0;
+}
+
+int BamBam_GzipReader_Peek(BamBam_GzipReader * object)
+{
+ int c = -1;
+
+ if ( ! object || ! object->file )
+ return -1;
+
+ c = gzgetc(object->file);
+
+ if ( c < 0 )
+ return -1;
+
+ if ( gzungetc(c,object->file) < 0 )
+ return -1;
+
+ return c;
+}
+
+int BamBam_GzipReader_Getc(BamBam_GzipReader * object)
+{
+ return gzgetc(object->file);
+}
+
+int BamBam_GzipReader_GetInt32(BamBam_GzipReader * object, int32_t * v)
+{
+ int i = 0;
+ *v = 0;
+
+ for ( i = 0; i < 4; ++i )
+ {
+ int const c = BamBam_GzipReader_Getc(object);
+ if ( c < 0 )
+ return -1;
+ *v |= ((int32_t)c) << (i*8);
+ }
+
+ return 0;
+}
+int BamBam_GzipReader_GetUInt32(BamBam_GzipReader * object, uint32_t * v)
+{
+ int i = 0;
+ *v = 0;
+
+ for ( i = 0; i < 4; ++i )
+ {
+ int const c = BamBam_GzipReader_Getc(object);
+ if ( c < 0 )
+ return -1;
+ *v |= ((uint32_t)c) << (i*8);
+ }
+
+ return 0;
+}
+int BamBam_GzipReader_Read(BamBam_GzipReader * object, char * c, int l)
+{
+ return gzread(object->file,c,l);
+}
diff --git a/src/bambamc/BamBam_GzipReader.h b/src/bambamc/BamBam_GzipReader.h
new file mode 100644
index 0000000..fbf4fa9
--- /dev/null
+++ b/src/bambamc/BamBam_GzipReader.h
@@ -0,0 +1,39 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#if ! defined(BAMBAMC_GZIPREADER_H)
+#define BAMBAMC_GZIPREADER_H
+
+#include <zlib.h>
+#include <stdint.h>
+
+typedef struct _BamBam_GzipReader
+{
+ gzFile file;
+} BamBam_GzipReader;
+
+extern BamBam_GzipReader * BamBam_GzipReader_New_Filename(char const * filename);
+extern BamBam_GzipReader * BamBam_GzipReader_New_Fd(int fd);
+extern BamBam_GzipReader * BamBam_GzipReader_New(char const * filename);
+extern BamBam_GzipReader * BamBam_GzipReader_Delete(BamBam_GzipReader * object);
+extern int BamBam_GzipReader_Peek(BamBam_GzipReader * object);
+extern int BamBam_GzipReader_Getc(BamBam_GzipReader * object);
+extern int BamBam_GzipReader_GetInt32(BamBam_GzipReader * object, int32_t * v);
+extern int BamBam_GzipReader_GetUInt32(BamBam_GzipReader * object, uint32_t * v);
+extern int BamBam_GzipReader_Read(BamBam_GzipReader * object, char * c, int l);
+#endif
diff --git a/src/bambamc/BamBam_GzipWriter.c b/src/bambamc/BamBam_GzipWriter.c
new file mode 100644
index 0000000..4471b28
--- /dev/null
+++ b/src/bambamc/BamBam_GzipWriter.c
@@ -0,0 +1,145 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#include <bambamc/BamBam_GzipWriter.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+BamBam_GzipWriter * BamBam_GzipWriter_New_Filename(char const * filename, int level)
+{
+ BamBam_GzipWriter * object = 0;
+ char modestr[4] = { 'w', 'b', '0', 0 };
+
+ if ( level < 0 || level > 9 )
+ return 0;
+
+ modestr[2] = level + '0';
+
+ object = (BamBam_GzipWriter *)malloc(sizeof(BamBam_GzipWriter));
+
+ if ( ! object )
+ return BamBam_GzipWriter_Delete(object,0);
+
+ memset(object,0,sizeof(BamBam_GzipWriter));
+
+ object->file = gzopen(filename,&modestr[0]);
+
+ if ( ! object->file )
+ return BamBam_GzipWriter_Delete(object,0);
+
+ return object;
+}
+
+BamBam_GzipWriter * BamBam_GzipWriter_New_Fd(int fd, int level)
+{
+ BamBam_GzipWriter * object = 0;
+
+ char modestr[4] = { 'w', 'b', '0', 0 };
+
+ if ( level < 0 || level > 9 )
+ return 0;
+
+ modestr[2] = level + '0';
+
+ object = (BamBam_GzipWriter *)malloc(sizeof(BamBam_GzipWriter));
+
+ if ( ! object )
+ return BamBam_GzipWriter_Delete(object,0);
+
+ memset(object,0,sizeof(BamBam_GzipWriter));
+
+ object->file = gzdopen(fd,&modestr[0]);
+
+ if ( ! object->file )
+ return BamBam_GzipWriter_Delete(object,0);
+
+ return object;
+}
+
+BamBam_GzipWriter * BamBam_GzipWriter_New(char const * filename, int level)
+{
+ if ( !filename || !strcmp(filename,"-") )
+ return BamBam_GzipWriter_New_Fd(STDIN_FILENO,level);
+ else
+ return BamBam_GzipWriter_New_Filename(filename,level);
+}
+
+BamBam_GzipWriter * BamBam_GzipWriter_Delete(BamBam_GzipWriter * object, int * state)
+{
+ if ( state )
+ *state = 0;
+
+ if ( object )
+ {
+ if ( object->file )
+ {
+ int const r = gzclose(object->file);
+
+ if ( state )
+ {
+ if ( r == Z_OK )
+ *state = 0;
+ else
+ *state = -1;
+ }
+
+ object->file = 0;
+ }
+ free(object);
+ }
+
+ return 0;
+}
+
+int BamBam_GzipWriter_Putc(BamBam_GzipWriter * object, int c)
+{
+ return gzputc(object->file,c);
+}
+
+int BamBam_GzipWriter_PutInt32(BamBam_GzipWriter * object, int32_t v)
+{
+ int i = 0;
+
+ for ( i = 0; i < 4; ++i )
+ {
+ int const c = BamBam_GzipWriter_Putc(object, (v >> (8*i)) & 0xFF);
+ if ( c < 0 )
+ return -1;
+ }
+
+ return 0;
+}
+int BamBam_GzipWriter_PutUInt32(BamBam_GzipWriter * object, uint32_t v)
+{
+ int i = 0;
+
+ for ( i = 0; i < 4; ++i )
+ {
+ int const c = BamBam_GzipWriter_Putc(object, (v >> (8*i)) & 0xFF);
+
+ if ( c < 0 )
+ return -1;
+ }
+
+ return 0;
+}
+int BamBam_GzipWriter_Write(BamBam_GzipWriter * object, char const * c, int l)
+{
+ return gzwrite(object->file,c,l);
+}
diff --git a/src/bambamc/BamBam_GzipWriter.h b/src/bambamc/BamBam_GzipWriter.h
new file mode 100644
index 0000000..9512275
--- /dev/null
+++ b/src/bambamc/BamBam_GzipWriter.h
@@ -0,0 +1,38 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#if ! defined(BAMBAMC_GZIPWRITER_H)
+#define BAMBAMC_GZIPWRITER_H
+
+#include <zlib.h>
+#include <stdint.h>
+
+typedef struct _BamBam_GzipWriter
+{
+ gzFile file;
+} BamBam_GzipWriter;
+
+extern BamBam_GzipWriter * BamBam_GzipWriter_New_Filename(char const * filename, int level);
+extern BamBam_GzipWriter * BamBam_GzipWriter_New_Fd(int fd, int level);
+extern BamBam_GzipWriter * BamBam_GzipWriter_New(char const * filename, int level);
+extern BamBam_GzipWriter * BamBam_GzipWriter_Delete(BamBam_GzipWriter * object, int * state);
+extern int BamBam_GzipWriter_Putc(BamBam_GzipWriter * object, int c);
+extern int BamBam_GzipWriter_PutInt32(BamBam_GzipWriter * object, int32_t const v);
+extern int BamBam_GzipWriter_PutUInt32(BamBam_GzipWriter * object, uint32_t const v);
+extern int BamBam_GzipWriter_Write(BamBam_GzipWriter * object, char const * c, int l);
+#endif
diff --git a/src/bambamc/BamBam_Hash.c b/src/bambamc/BamBam_Hash.c
new file mode 100644
index 0000000..0860553
--- /dev/null
+++ b/src/bambamc/BamBam_Hash.c
@@ -0,0 +1,105 @@
+/*
+ * This file contains an adaption of some of Bob Jenkins' hash functions
+ * For licensing please see Bob Jenkins's remarks below.
+ */
+#include <bambamc/BamBam_Hash.h>
+
+/*--------------------------------------------------------------------
+mix -- mix 3 32-bit values reversibly.
+For every delta with one or two bit set, and the deltas of all three
+high bits or all three low bits, whether the original value of a,b,c
+is almost all zero or is uniformly distributed,
+* If mix() is run forward or backward, at least 32 bits in a,b,c
+have at least 1/4 probability of changing.
+* If mix() is run forward, every bit of c will change between 1/3 and
+2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
+mix() was built out of 36 single-cycle latency instructions in a
+structure that could supported 2x parallelism, like so:
+a -= b;
+a -= c; x = (c>>13);
+b -= c; a ^= x;
+b -= a; x = (a<<8);
+c -= a; b ^= x;
+c -= b; x = (b>>13);
+...
+Unfortunately, superscalar Pentiums and Sparcs can't take advantage
+of that parallelism. They've also turned some of those single-cycle
+latency instructions into multi-cycle latency instructions. Still,
+this is the fastest good hash I could find. There were about 2^^68
+to choose from. I only looked at a billion or so.
+--------------------------------------------------------------------*/
+#define mix(a,b,c) do { a -= b; a -= c; a ^= (c>>13); b -= c; b -= a; b ^= (a<<8); c -= a; c -= b; c ^= (b>>13); a -= b; a -= c; a ^= (c>>12); \
+ b -= c; b -= a; b ^= (a<<16); c -= a; c -= b; c ^= (b>>5); a -= b; a -= c; a ^= (c>>3); b -= c; b -= a; b ^= (a<<10); c -= a; c -= b; c ^= (b>>15); } while(0);
+
+/*
+--------------------------------------------------------------------
+hash() -- hash a variable-length key into a 32-bit value
+k : the key (the unaligned variable-length array of bytes)
+len : the length of the key, counting by bytes
+level : can be any 4-byte value
+Returns a 32-bit value. Every bit of the key affects every bit of
+the return value. Every 1-bit and 2-bit delta achieves avalanche.
+About 36+6len instructions.
+
+The best hash table sizes are powers of 2. There is no need to do
+mod a prime (mod is sooo slow!). If you need less than 32 bits,
+use a bitmask. For example, if you need only 10 bits, do
+h = (h & hashmask(10));
+In which case, the hash table should have hashsize(10) elements.
+
+If you are hashing n strings (uint8_t **)k, do it like this:
+for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
+
+By Bob Jenkins, 1996. bob_jenkins at burtleburtle.net. You may use this
+code any way you wish, private, educational, or commercial. It's free.
+
+See http://burlteburtle.net/bob/hash/evahash.html
+Use for hash table lookup, or anything where one collision in 2^32 is
+acceptable. Do NOT use for cryptographic purposes.
+--------------------------------------------------------------------*/
+uint32_t hash(uint8_t const * k, uint32_t length, uint32_t initval /* = 0xb979379e */)
+{
+ uint32_t a,b,c,len;
+
+ /* Set up the internal state */
+ len = length;
+ a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
+ c = initval; /* the previous hash value */
+
+ /*---------------------------------------- handle most of the key */
+ while (len >= 12)
+ {
+ a += (k[0] +((uint32_t)k[1]<<8) +((uint32_t)k[2]<<16) +((uint32_t)k[3]<<24));
+ b += (k[4] +((uint32_t)k[5]<<8) +((uint32_t)k[6]<<16) +((uint32_t)k[7]<<24));
+ c += (k[8] +((uint32_t)k[9]<<8) +((uint32_t)k[10]<<16)+((uint32_t)k[11]<<24));
+ mix(a,b,c);
+ k += 12; len -= 12;
+ }
+
+ /*------------------------------------- handle the last 11 bytes */
+ c += length;
+ switch(len) /* all the case statements fall through */
+ {
+ case 11: c+=((uint32_t)k[10]<<24);
+ case 10: c+=((uint32_t)k[9]<<16);
+ case 9 : c+=((uint32_t)k[8]<<8);
+ /* the first byte of c is reserved for the length */
+ case 8 : b+=((uint32_t)k[7]<<24);
+ case 7 : b+=((uint32_t)k[6]<<16);
+ case 6 : b+=((uint32_t)k[5]<<8);
+ case 5 : b+=k[4];
+ case 4 : a+=((uint32_t)k[3]<<24);
+ case 3 : a+=((uint32_t)k[2]<<16);
+ case 2 : a+=((uint32_t)k[1]<<8);
+ case 1 : a+=k[0];
+ /* case 0: nothing left to add */
+ }
+ mix(a,b,c);
+ /*-------------------------------------------- report the result */
+ return c;
+}
+
+uint32_t hashDefaultSeed(uint8_t const * k, uint32_t length)
+{
+ return hash(k,length,0xb979379e);
+}
diff --git a/src/bambamc/BamBam_Hash.h b/src/bambamc/BamBam_Hash.h
new file mode 100644
index 0000000..e73212e
--- /dev/null
+++ b/src/bambamc/BamBam_Hash.h
@@ -0,0 +1,27 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#if ! defined(EVAHASH_H)
+#define EVAHASH_H
+
+#include <bambamc/BamBam_Unused.h>
+#include <stdint.h>
+
+extern uint32_t hash(uint8_t const * k, uint32_t length, uint32_t initval /* = 0xb979379e */) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern uint32_t hashDefaultSeed(uint8_t const * k, uint32_t length) BAMBAM_WARN_IF_UNUSEDRESULT;
+#endif
diff --git a/src/bambamc/BamBam_LineBuffer.c b/src/bambamc/BamBam_LineBuffer.c
new file mode 100644
index 0000000..c2c7b74
--- /dev/null
+++ b/src/bambamc/BamBam_LineBuffer.c
@@ -0,0 +1,212 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#include <bambamc/BamBam_LineBuffer.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <assert.h>
+
+BamBam_LineBuffer * BamBam_LineBuffer_New(FILE * rfile, int initsize)
+{
+ BamBam_LineBuffer * object = 0;
+ size_t iobytes = 0;
+
+ object = (BamBam_LineBuffer *)malloc(sizeof(BamBam_LineBuffer));
+
+ if ( ! object )
+ return BamBam_LineBuffer_Delete(object);
+
+ memset(object,0,sizeof(BamBam_LineBuffer));
+
+ object->file = rfile;
+
+ if ( ! object->file )
+ return BamBam_LineBuffer_Delete(object);
+
+ object->buffer = (char *)malloc(initsize);
+
+ if ( ! object->buffer )
+ return BamBam_LineBuffer_Delete(object);
+
+ object->bufsize = initsize;
+
+ object->bufferptra = object->buffer;
+ object->bufferptre = object->buffer + object->bufsize;
+ object->bufferptrout = object->buffer;
+
+ iobytes = fread(object->bufferptra,1,object->bufsize,object->file);
+
+ object->bufferptrin = object->bufferptra + iobytes;
+
+ if ( ferror(object->file) )
+ return BamBam_LineBuffer_Delete(object);
+ if ( feof(object->file) )
+ object->eof = 1;
+
+ return object;
+}
+
+BamBam_LineBuffer * BamBam_LineBuffer_Delete(BamBam_LineBuffer * object)
+{
+ if ( object )
+ {
+ if ( object->buffer )
+ {
+ free(object->buffer);
+ object->buffer = 0;
+ }
+ if ( object->tmpbuf )
+ {
+ free(object->tmpbuf);
+ object->tmpbuf = 0;
+ }
+ free(object);
+ }
+ return 0;
+}
+
+int BamBam_LineBuffer_GetLine(BamBam_LineBuffer * object, char const **a, char const **e)
+{
+ while ( 1 )
+ {
+ /* end of line pointer */
+ char * lineend = object->bufferptrout;
+
+ /* search for end of buffer or line end */
+ while ( lineend != object->bufferptrin && *(lineend) != '\n' )
+ ++lineend;
+
+ /* we reached the end of the data currently in memory */
+ if ( lineend == object->bufferptrin )
+ {
+ /* reached end of file, return what we have */
+ if ( object->eof )
+ {
+ /* this is the last line we will return */
+ if ( object->bufferptrout != object->bufferptrin )
+ {
+ /* if file ends with a newline */
+ if ( object->bufferptrin[-1] == '\n' )
+ {
+ *a = object->bufferptrout;
+ *e = object->bufferptrin-1;
+ object->bufferptrout = object->bufferptrin;
+ return 0;
+ }
+ /* otherwise we append an artifical newline */
+ else
+ {
+ uint64_t const numbytes = lineend - object->bufferptrout;
+ char * tmpbuf = (char *)malloc(numbytes+1);
+
+ if ( ! tmpbuf )
+ return -1;
+
+ memcpy(tmpbuf,object->bufferptrout,numbytes);
+ tmpbuf[numbytes] = '\n';
+
+ free(object->buffer);
+
+ object->buffer = tmpbuf;
+ object->bufsize = numbytes+1;
+ object->bufferptra = tmpbuf;
+ object->bufferptre = tmpbuf + object->bufsize;
+ object->bufferptrin = object->bufferptre;
+ object->bufferptrout = object->bufferptre;
+
+ *a = object->bufferptra;
+ *e = object->bufferptre - 1;
+ return 0;
+ }
+ }
+ else
+ {
+ return -1;
+ }
+ }
+ /* we need to read more data */
+ else
+ {
+ /* do we need to extend the buffer? */
+ if (
+ object->bufferptrout == object->bufferptra
+ &&
+ object->bufferptrin == object->bufferptre
+ )
+ {
+ unsigned int const newbufsize = object->bufsize ? 2*object->bufsize : 1;
+ char * newbuf = realloc(object->buffer,newbufsize);
+
+ if ( newbuf )
+ {
+ /* fprintf(stderr,"extended buffer to %d bytes\n", newbufsize); */
+ object->bufferptre = newbuf + newbufsize;
+ object->bufferptrout = newbuf + (object->bufferptrout - object->bufferptra);
+ object->bufferptrin = newbuf + (object->bufferptrin - object->bufferptra);
+ object->bufferptra = newbuf;
+ object->buffer = newbuf;
+ object->bufsize = newbufsize;
+ }
+ else
+ {
+ return -1;
+ }
+ }
+ else
+ {
+ /* move data to front and fill rest of buffer */
+ uint64_t const used = object->bufferptrin - object->bufferptrout;
+ uint64_t const unused = object->bufsize - used;
+ size_t iobytes = 0;
+
+ memmove(object->bufferptra, object->bufferptrout,used);
+
+ object->bufferptrout = object->bufferptra;
+ object->bufferptrin = object->bufferptrout + used;
+
+ iobytes = fread(object->bufferptrin,1,unused,object->file);
+
+ if ( ferror(object->file) )
+ return -1;
+ if ( feof(object->file) )
+ object->eof = 1;
+
+ /* fprintf(stderr,"Got %d bytes\n", iobytes); */
+
+ object->bufferptrin += iobytes;
+ }
+ }
+ }
+ else
+ {
+ *a = object->bufferptrout;
+ *e = lineend;
+ assert ( *lineend == '\n' );
+ object->bufferptrout = lineend+1;
+ return 0;
+ }
+ }
+
+ return -1;
+}
+void BamBam_LineBuffer_PutBack(BamBam_LineBuffer * object, char const *a)
+{
+ object->bufferptrout = (char *)a;
+}
diff --git a/src/bambamc/BamBam_LineBuffer.h b/src/bambamc/BamBam_LineBuffer.h
new file mode 100644
index 0000000..7b9c241
--- /dev/null
+++ b/src/bambamc/BamBam_LineBuffer.h
@@ -0,0 +1,44 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#if ! defined(BAMBAMC_LINEBUFFER_H)
+#define BAMBAMC_LINEBUFFER_H
+
+#include <bambamc/BamBam_Unused.h>
+#include <stdio.h>
+
+typedef struct _BamBam_LineBuffer
+{
+ FILE * file;
+
+ char * buffer;
+ unsigned int bufsize;
+ int eof;
+ char * tmpbuf;
+
+ char * bufferptra;
+ char * bufferptrin;
+ char * bufferptrout;
+ char * bufferptre;
+} BamBam_LineBuffer;
+
+extern BamBam_LineBuffer * BamBam_LineBuffer_New(FILE * rfile, int initsize) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern int BamBam_LineBuffer_GetLine(BamBam_LineBuffer * object, char const **a, char const **e) BAMBAM_WARN_IF_UNUSEDRESULT;
+extern void BamBam_LineBuffer_PutBack(BamBam_LineBuffer * object, char const *a);
+extern BamBam_LineBuffer * BamBam_LineBuffer_Delete(BamBam_LineBuffer * object);
+#endif
diff --git a/src/bambamc/BamBam_LineParsing.c b/src/bambamc/BamBam_LineParsing.c
new file mode 100644
index 0000000..0940cef
--- /dev/null
+++ b/src/bambamc/BamBam_LineParsing.c
@@ -0,0 +1,136 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#include <bambamc/BamBam_LineParsing.h>
+#include <bambamc/BamBam_CharBuffer.h>
+#include <bambamc/BamBam_StrDup.h>
+#include <assert.h>
+#include <ctype.h>
+#include <string.h>
+
+char const * BamBam_findLineEnd(char const * s)
+{
+ while ( *s && (*s != '\n') )
+ ++s;
+ return s;
+}
+char const * BamBam_skipEndOfLine(char const * s)
+{
+ if ( ! *s )
+ return s;
+
+ while ( *s && isspace(*s) )
+ s++;
+
+ return s;
+}
+char const * BamBam_nextLine(char const * s)
+{
+ return BamBam_skipEndOfLine(BamBam_findLineEnd(s));
+}
+unsigned int BamBam_getLineLength(char const * s)
+{
+ return BamBam_findLineEnd(s)-s;
+}
+int BamBam_copyLine(char const * s, BamBam_CharBuffer * buffer)
+{
+ unsigned int const l = BamBam_getLineLength(s);
+ unsigned int i = 0;
+ int ret = 0;
+
+ for ( i = 0; (! ret) && i < l; ++i )
+ ret = BamBam_CharBuffer_PushChar(buffer,s[i]);
+
+ if ( ! ret )
+ ret = BamBam_CharBuffer_PushChar(buffer,'\n');
+
+ return ret;
+}
+int BamBam_isValidLine(char const * s, char const ** filtered)
+{
+ char const * sa;
+ char const * se;
+
+ if ( ! *s )
+ return 0;
+ if ( s[0] != '@' )
+ return 0;
+
+ sa = s+1;
+ se = sa;
+ while ( *se && *se != '\n' && *se != '\t' )
+ ++se;
+
+ while ( *filtered )
+ {
+ char const * filter = *filtered;
+
+ if ( (strlen(filter) == (size_t)(se-sa)) && memcmp(filter,sa,se-sa) == 0 )
+ return 0;
+
+ ++filtered;
+ }
+
+ return 1;
+}
+char * BamBam_filterHeader(char const * header, char const ** filters)
+{
+ BamBam_CharBuffer * buffer = BamBam_CharBuffer_New();
+ char * filtered = 0;
+ int ret = 0;
+
+ if ( ! buffer )
+ return 0;
+
+ while ( *header )
+ {
+ if ( BamBam_isValidLine(header,filters) )
+ {
+ ret = BamBam_copyLine(header,buffer);
+
+ if ( ret < 0 )
+ {
+ BamBam_CharBuffer_Delete(buffer);
+ return 0;
+ }
+ }
+ header = BamBam_nextLine(header);
+ }
+
+ /* append terminator */
+ ret = BamBam_CharBuffer_PushChar(buffer,0);
+
+ if ( ret < 0 )
+ {
+ BamBam_CharBuffer_Delete(buffer);
+ return 0;
+ }
+
+ filtered = BamBam_StrDup((char const *)(buffer->buffer));
+
+ if ( ! filtered )
+ {
+ BamBam_CharBuffer_Delete(buffer);
+ return 0;
+ }
+
+ BamBam_CharBuffer_Delete(buffer);
+
+ return filtered;
+}
diff --git a/src/bambamc/BamBam_LineParsing.h b/src/bambamc/BamBam_LineParsing.h
new file mode 100644
index 0000000..222841b
--- /dev/null
+++ b/src/bambamc/BamBam_LineParsing.h
@@ -0,0 +1,33 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#if ! defined(BAMBAM_LINEPARSING_H)
+#define BAMBAM_LINEPARSING_H
+
+#include <bambamc/BamBam_Unused.h>
+#include <bambamc/BamBam_CharBuffer.h>
+
+extern char const * BamBam_findLineEnd(char const * s);
+extern char const * BamBam_skipEndOfLine(char const * s);
+extern char const * BamBam_nextLine(char const * s);
+extern unsigned int BamBam_getLineLength(char const * s);
+extern int BamBam_copyLine(char const * s, BamBam_CharBuffer * buffer);
+extern int BamBam_isValidLine(char const * s, char const ** filtered);
+extern char * BamBam_filterHeader(char const * header, char const ** filters);
+#endif
diff --git a/src/bambamc/BamBam_List.c b/src/bambamc/BamBam_List.c
new file mode 100644
index 0000000..f02befc
--- /dev/null
+++ b/src/bambamc/BamBam_List.c
@@ -0,0 +1,92 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#include <bambamc/BamBam_List.h>
+
+void BamBam_ListNode_PushBack(BamBam_List * list, BamBam_ListNode * listnode)
+{
+ if ( ! list->last )
+ {
+ list->first = list->last = listnode;
+ }
+ else
+ {
+ list->last->next = listnode;
+ list->last = listnode;
+ }
+}
+
+void BamBam_ListNode_PushFront(BamBam_List * list, BamBam_ListNode * listnode)
+{
+ if ( ! list->last )
+ {
+ list->first = list->last = listnode;
+ }
+ else
+ {
+ listnode->next = list->first;
+ list->first = listnode;
+ }
+}
+
+BamBam_List * BamBam_List_New()
+{
+ BamBam_List * list = 0;
+
+ list = (BamBam_List *)malloc(sizeof(BamBam_List));
+
+ if ( ! list )
+ return 0;
+
+ list->first = 0;
+ list->last = 0;
+
+ return list;
+}
+
+void BamBam_List_Delete(BamBam_List * list)
+{
+ if ( list )
+ {
+ if ( list->first )
+ BamBam_ListNode_DeleteRec(list->first);
+ free (list);
+ }
+}
+
+void BamBam_List_Print(FILE * file, BamBam_List const * list)
+{
+ if ( list )
+ {
+ BamBam_ListNode_PrintRec(file,list->first);
+ }
+}
+
+uint64_t BamBam_List_Size(BamBam_List * list)
+{
+ uint64_t length = 0;
+ BamBam_ListNode * node = list->first;
+
+ while ( node )
+ {
+ length++;
+ node = node->next;
+ }
+
+ return length;
+}
\ No newline at end of file
diff --git a/src/bambamc/BamBam_List.h b/src/bambamc/BamBam_List.h
new file mode 100644
index 0000000..7e7e0b5
--- /dev/null
+++ b/src/bambamc/BamBam_List.h
@@ -0,0 +1,39 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#if ! defined(BAMBAM_LIST_HPP)
+#define BAMBAM_LIST_HPP
+
+#include <bambamc/BamBam_Unused.h>
+#include <bambamc/BamBam_ListNode.h>
+#include <stdint.h>
+
+typedef struct _BamBam_List
+{
+ BamBam_ListNode * first;
+ BamBam_ListNode * last;
+} BamBam_List;
+
+extern void BamBam_ListNode_PushBack(BamBam_List * list, BamBam_ListNode * listnode);
+extern void BamBam_ListNode_PushFront(BamBam_List * list, BamBam_ListNode * listnode);
+extern BamBam_List * BamBam_List_New() BAMBAM_WARN_IF_UNUSEDRESULT;
+extern void BamBam_List_Delete(BamBam_List * list);
+extern void BamBam_List_Print(FILE * file, BamBam_List const * list);
+extern uint64_t BamBam_List_Size(BamBam_List * list) BAMBAM_WARN_IF_UNUSEDRESULT;
+#endif
diff --git a/src/bambamc/BamBam_ListNode.c b/src/bambamc/BamBam_ListNode.c
new file mode 100644
index 0000000..151f5bc
--- /dev/null
+++ b/src/bambamc/BamBam_ListNode.c
@@ -0,0 +1,97 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#include <bambamc/BamBam_ListNode.h>
+
+void BamBam_PrintStringFunction(FILE * file, void const * vstr)
+{
+ char const * str = (char const *)vstr;
+ fprintf(file,"%s",str);
+}
+
+void BamBam_ListNode_Print(FILE * file, BamBam_ListNode const * node)
+{
+ if ( node )
+ {
+ if ( node->bamBamListPrintFunction )
+ {
+ node->bamBamListPrintFunction(file,node->entry);
+ }
+ else
+ {
+ fprintf(file,"<nullprintfunction>");
+ }
+ }
+ else
+ {
+ fprintf(file,"<nullnode>");
+ }
+}
+
+void BamBam_ListNode_PrintRec(FILE * file, BamBam_ListNode const * node)
+{
+ if ( node )
+ {
+ fprintf(file,"Node(");
+ BamBam_ListNode_Print(file,node);
+ fprintf(file,");");
+
+ BamBam_ListNode_PrintRec(file,node->next);
+ }
+}
+
+BamBam_ListNode * BamBam_ListNode_New()
+{
+ BamBam_ListNode * node = 0;
+
+ node = (BamBam_ListNode *)malloc(sizeof(BamBam_ListNode));
+
+ if ( ! node )
+ return 0;
+
+ node->next = 0;
+ node->entry = 0;
+ node->bamBamListFreeFunction = 0;
+ node->bamBamListPrintFunction = 0;
+
+ return node;
+}
+
+void BamBam_ListNode_Delete(BamBam_ListNode * node)
+{
+ if ( node )
+ {
+ if ( node->bamBamListFreeFunction )
+ node->bamBamListFreeFunction(node->entry);
+ free(node);
+ }
+}
+
+void BamBam_ListNode_DeleteRec(BamBam_ListNode * node)
+{
+ if ( node )
+ {
+ if ( node->next )
+ {
+ BamBam_ListNode_DeleteRec(node->next);
+ node->next = 0;
+ }
+ BamBam_ListNode_Delete(node);
+ }
+}
diff --git a/src/bambamc/BamBam_ListNode.h b/src/bambamc/BamBam_ListNode.h
new file mode 100644
index 0000000..f76fc83
--- /dev/null
+++ b/src/bambamc/BamBam_ListNode.h
@@ -0,0 +1,44 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#if ! defined(BAMBAM_LISTNODE_H)
+#define BAMBAM_LISTNODE_H
+
+#include <bambamc/BamBam_Unused.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+typedef void (*bamBamListFreeFunction)(void *);
+typedef void (*bamBamListPrintFunction)(FILE * file, void const *);
+
+typedef struct _BamBam_ListNode
+{
+ struct _BamBam_ListNode * next;
+ void * entry;
+ bamBamListFreeFunction bamBamListFreeFunction;
+ bamBamListPrintFunction bamBamListPrintFunction;
+} BamBam_ListNode;
+
+extern void BamBam_PrintStringFunction(FILE * file, void const * vstr);
+extern void BamBam_ListNode_Print(FILE * file, BamBam_ListNode const * node);
+extern void BamBam_ListNode_PrintRec(FILE * file, BamBam_ListNode const * node);
+extern BamBam_ListNode * BamBam_ListNode_New() BAMBAM_WARN_IF_UNUSEDRESULT;
+extern void BamBam_ListNode_Delete(BamBam_ListNode * node);
+extern void BamBam_ListNode_DeleteRec(BamBam_ListNode * node);
+#endif
diff --git a/src/bambamc/BamBam_MergeHeapEntry.h b/src/bambamc/BamBam_MergeHeapEntry.h
new file mode 100644
index 0000000..59b267e
--- /dev/null
+++ b/src/bambamc/BamBam_MergeHeapEntry.h
@@ -0,0 +1,30 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#if ! defined(BAMBAM_MERGEHEAPENTRY_H)
+#define BAMBAM_MERGEHEAPENTRY_H
+
+#include <bambamc/BamBam_BamCollationHashEntry.h>
+
+typedef struct _BamBam_MergeHeapEntry
+{
+ BamBam_BamCollationHashEntry * hashentry;
+ uint64_t fileid;
+} BamBam_MergeHeapEntry;
+#endif
diff --git a/src/bambamc/BamBam_SamBamFileDecoder.c b/src/bambamc/BamBam_SamBamFileDecoder.c
new file mode 100644
index 0000000..40fa2c8
--- /dev/null
+++ b/src/bambamc/BamBam_SamBamFileDecoder.c
@@ -0,0 +1,91 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#include <bambamc/BamBam_SamBamFileDecoder.h>
+
+BamBam_SamBamFileDecoder * BamBam_SamBamFileDecoder_Delete(BamBam_SamBamFileDecoder * object)
+{
+ if ( object )
+ {
+ if ( object->bamdecoder )
+ BamBam_BamFileDecoder_Delete(object->bamdecoder);
+ if ( object->samdecoder )
+ BamBam_SamFileDecoder_Delete(object->samdecoder);
+ }
+
+ free(object);
+
+ return 0;
+}
+
+BamBam_BamSingleAlignment * BamBam_SamBamFileDecoder_DecodeAlignment(BamBam_SamBamFileDecoder * object)
+{
+ if ( object && object->bamdecoder )
+ return BamBam_BamFileDecoder_DecodeAlignment(object->bamdecoder);
+ else if ( object && object->samdecoder )
+ return BamBam_SamFileDecoder_LoadAlignment(object->samdecoder);
+ else
+ return 0;
+}
+
+static int BamBam_SamBamFileDecoder_ContainsLetter(char const * s, char const c)
+{
+ while ( *s )
+ if ( *s == c )
+ return 1;
+ else
+ ++s;
+
+ return 0;
+}
+
+BamBam_SamBamFileDecoder * BamBam_SamBamFileDecoder_New(char const * filename, char const * mode)
+{
+ BamBam_SamBamFileDecoder * object = 0;
+
+ if ( ! BamBam_SamBamFileDecoder_ContainsLetter(mode,'r') )
+ return 0;
+
+ object = (BamBam_SamBamFileDecoder *)malloc(sizeof(BamBam_SamBamFileDecoder));
+
+ if ( ! object )
+ return BamBam_SamBamFileDecoder_Delete(object);
+
+ memset(object,0,sizeof(BamBam_SamBamFileDecoder));
+
+ if ( BamBam_SamBamFileDecoder_ContainsLetter(mode,'b') )
+ {
+ object->bamdecoder = BamBam_BamFileDecoder_New(filename);
+
+ if ( ! object->bamdecoder || ! object->bamdecoder->header )
+ return BamBam_SamBamFileDecoder_Delete(object);
+
+ object->header = object->bamdecoder->header;
+ }
+ else
+ {
+ object->samdecoder = BamBam_SamFileDecoder_New(filename);
+
+ if ( ! object->samdecoder || ! object->samdecoder->header )
+ return BamBam_SamBamFileDecoder_Delete(object);
+
+ object->header = object->samdecoder->header;
+ }
+
+ return object;
+}
diff --git a/src/bambamc/BamBam_SamBamFileDecoder.h b/src/bambamc/BamBam_SamBamFileDecoder.h
new file mode 100644
index 0000000..65044b6
--- /dev/null
+++ b/src/bambamc/BamBam_SamBamFileDecoder.h
@@ -0,0 +1,35 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#if ! defined(BAMBAM_SAMBAMFILEDECODER_H)
+#define BAMBAM_SAMBAMFILEDECODER_H
+
+#include <bambamc/BamBam_BamFileDecoder.h>
+#include <bambamc/BamBam_SamFileDecoder.h>
+
+typedef struct _BamBam_SamBamFileDecoder
+{
+ BamBam_BamFileDecoder * bamdecoder;
+ BamBam_SamFileDecoder * samdecoder;
+ BamBam_BamFileHeader * header;
+} BamBam_SamBamFileDecoder;
+
+extern BamBam_BamSingleAlignment * BamBam_SamBamFileDecoder_DecodeAlignment(BamBam_SamBamFileDecoder * object);
+extern BamBam_SamBamFileDecoder * BamBam_SamBamFileDecoder_Delete(BamBam_SamBamFileDecoder * object);
+extern BamBam_SamBamFileDecoder * BamBam_SamBamFileDecoder_New(char const * filename, char const * mode);
+#endif
diff --git a/src/bambamc/BamBam_SamFileDecoder.c b/src/bambamc/BamBam_SamFileDecoder.c
new file mode 100644
index 0000000..cc4fc87
--- /dev/null
+++ b/src/bambamc/BamBam_SamFileDecoder.c
@@ -0,0 +1,183 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#include <bambamc/BamBam_SamFileDecoder.h>
+
+BamBam_BamSingleAlignment * BamBam_SamFileDecoder_LoadAlignment(BamBam_SamFileDecoder * object)
+{
+ char const * pa = 0;
+ char const * pe = 0;
+ char const * p = 0;
+ unsigned int col = 0;
+ int r = 0;
+
+ if ( BamBam_LineBuffer_GetLine(object->lb,&pa,&pe) )
+ return 0;
+
+ assert ( *pe == '\n' );
+
+ p = pa;
+ while ( p != pe && col < BAMBAMC_SAMFILEDECODER_MANDATORYCOLUMNS )
+ {
+ char const * pc = p;
+ while ( p != pe && *p != '\t' )
+ ++p;
+
+ object->fields[col][0] = pc;
+ object->fields[col][1] = p;
+
+ /* skip over tab */
+ if ( p != pe )
+ ++p;
+ col += 1;
+ }
+
+ if ( col != BAMBAMC_SAMFILEDECODER_MANDATORYCOLUMNS || BamBam_SamInfo_parseSamLine(&object->fields[0],object->saminfo) < 0 )
+ {
+ fprintf(stderr, "Defect SAM line: ");
+ fwrite(pa,pe-pa,1,stderr);
+ fprintf(stderr,"\n");
+
+ return 0;
+ }
+
+ r = BamBam_CharBuffer_PutAlignmentC(
+ object->aput,
+ object->saminfo->flag,
+ BamBam_BamFileHeader_FindChromosomeIdByName(object->header,object->saminfo->rname),
+ object->saminfo->pos-1,
+ BamBam_BamFileHeader_FindChromosomeIdByName(object->header,object->saminfo->rnext),
+ object->saminfo->pnext-1,
+ object->saminfo->qname,
+ object->saminfo->seq,
+ object->saminfo->qual,
+ object->saminfo->cigar,
+ object->saminfo->mapq,
+ object->saminfo->tlen
+ );
+
+ if ( r < 0 )
+ return 0;
+
+ #if 0
+ fprintf(stderr, "ok: ");
+ fwrite(pa,pe-pa,1,stderr);
+ fprintf(stderr,"\n");
+ #endif
+
+ return object->aput->calignment;
+}
+
+BamBam_SamFileDecoder * BamBam_SamFileDecoder_Delete(BamBam_SamFileDecoder * object)
+{
+ if ( object )
+ {
+ if ( object->closefile )
+ {
+ fclose(object->closefile);
+ }
+ if ( object->header )
+ {
+ BamBam_BamFileHeader_Delete(object->header);
+ }
+ if ( object->lb )
+ {
+ BamBam_LineBuffer_Delete(object->lb);
+ }
+ if ( object->saminfo )
+ {
+ BamBam_SamInfo_Delete(object->saminfo);
+ }
+ if ( object->aput )
+ {
+ BamBam_AlignmentPut_Delete(object->aput);
+ }
+ free(object);
+ }
+
+ return 0;
+}
+
+BamBam_SamFileDecoder * BamBam_SamFileDecoder_NewInternal(BamBam_SamFileDecoder * object, FILE * file)
+{
+ BamBam_SamInfo_initSamCharTables();
+
+ object->header = BamBam_BamFileHeader_New_SAM(file);
+
+ if ( ! object->header )
+ return BamBam_SamFileDecoder_Delete(object);
+
+ object->lb = BamBam_LineBuffer_New(file,1024);
+
+ if ( ! object->lb )
+ return BamBam_SamFileDecoder_Delete(object);
+
+ object->saminfo = BamBam_SamInfo_New();
+
+ if ( ! object->saminfo )
+ return BamBam_SamFileDecoder_Delete(object);
+
+ object->aput = BamBam_AlignmentPut_New();
+
+ if ( ! object->aput )
+ return BamBam_SamFileDecoder_Delete(object);
+
+ return object;
+}
+
+BamBam_SamFileDecoder * BamBam_SamFileDecoder_NewFilename(char const * filename)
+{
+ BamBam_SamFileDecoder * object = 0;
+
+ object = (BamBam_SamFileDecoder *)malloc(sizeof(BamBam_SamFileDecoder));
+
+ if ( ! object )
+ return BamBam_SamFileDecoder_Delete(object);
+
+ memset(object,0,sizeof(BamBam_SamFileDecoder));
+
+ object->closefile = fopen(filename,"rb");
+
+ if ( ! object->closefile )
+ return BamBam_SamFileDecoder_Delete(object);
+
+ return BamBam_SamFileDecoder_NewInternal(object,object->closefile);
+}
+
+BamBam_SamFileDecoder * BamBam_SamFileDecoder_NewFd(FILE * file)
+{
+ BamBam_SamFileDecoder * object = 0;
+
+ object = (BamBam_SamFileDecoder *)malloc(sizeof(BamBam_SamFileDecoder));
+
+ if ( ! object )
+ return BamBam_SamFileDecoder_Delete(object);
+
+ memset(object,0,sizeof(BamBam_SamFileDecoder));
+
+ return BamBam_SamFileDecoder_NewInternal(object,file);
+}
+
+BamBam_SamFileDecoder * BamBam_SamFileDecoder_New(char const * filename)
+{
+ if ( strcmp(filename,"-") == 0 )
+ return BamBam_SamFileDecoder_NewFd(stdin);
+ else
+ return BamBam_SamFileDecoder_NewFilename(filename);
+}
diff --git a/src/bambamc/BamBam_SamFileDecoder.h b/src/bambamc/BamBam_SamFileDecoder.h
new file mode 100644
index 0000000..8454255
--- /dev/null
+++ b/src/bambamc/BamBam_SamFileDecoder.h
@@ -0,0 +1,44 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#if ! defined(BAMBAMC_SAMFILEDECODER_H)
+#define BAMBAMC_SAMFILEDECODER_H
+
+#include <bambamc/BamBam_LineBuffer.h>
+#include <bambamc/BamBam_SamInfo.h>
+#include <bambamc/BamBam_BamAlignmentPut.h>
+#include <stdio.h>
+
+#define BAMBAMC_SAMFILEDECODER_MANDATORYCOLUMNS 11
+
+typedef struct _BamBam_SamFileDecoder
+{
+ FILE * closefile;
+ BamBam_BamFileHeader * header;
+ BamBam_LineBuffer * lb;
+ BamBam_SamInfo * saminfo;
+ BamBam_SamInfo_cptrpair fields[BAMBAMC_SAMFILEDECODER_MANDATORYCOLUMNS];
+ BamBam_AlignmentPut * aput;
+} BamBam_SamFileDecoder;
+
+extern BamBam_BamSingleAlignment * BamBam_SamFileDecoder_LoadAlignment(BamBam_SamFileDecoder * object);
+extern BamBam_SamFileDecoder * BamBam_SamFileDecoder_Delete(BamBam_SamFileDecoder * object);
+extern BamBam_SamFileDecoder * BamBam_SamFileDecoder_NewFd(FILE * file);
+extern BamBam_SamFileDecoder * BamBam_SamFileDecoder_NewFilename(char const * filename);
+extern BamBam_SamFileDecoder * BamBam_SamFileDecoder_New(char const * filename);
+#endif
diff --git a/src/bambamc/BamBam_SamInfo.c b/src/bambamc/BamBam_SamInfo.c
new file mode 100644
index 0000000..82efb43
--- /dev/null
+++ b/src/bambamc/BamBam_SamInfo.c
@@ -0,0 +1,614 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#include <bambamc/BamBam_SamInfo.h>
+#include <bambamc/BamBam_BamFlagBase.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+
+BamBam_SamInfo * BamBam_SamInfo_Delete(BamBam_SamInfo * object)
+{
+ if ( object )
+ {
+ if ( object->qname )
+ {
+ free(object->qname);
+ object->qname = 0;
+ }
+ if ( object->rname )
+ {
+ free(object->rname);
+ object->rname = 0;
+ }
+ if ( object->cigar )
+ {
+ free(object->cigar);
+ object->cigar = 0;
+ }
+ if ( object->rnext )
+ {
+ free(object->rnext);
+ object->rnext = 0;
+ }
+ if ( object->seq )
+ {
+ free(object->seq);
+ object->seq = 0;
+ }
+ if ( object->qual )
+ {
+ free(object->qual);
+ object->qual = 0;
+ }
+ free(object);
+ }
+ return 0;
+}
+
+BamBam_SamInfo * BamBam_SamInfo_New()
+{
+ BamBam_SamInfo * object = 0;
+
+ object = (BamBam_SamInfo *)malloc(sizeof(BamBam_SamInfo));
+
+ if ( ! object )
+ return BamBam_SamInfo_Delete(object);
+
+ memset(object,0,sizeof(BamBam_SamInfo));
+
+ return object;
+}
+
+static int BamBam_SamInfo_parseStringField(BamBam_SamInfo_cptrpair field, char ** str, unsigned int * maxlen, bambamc_sam_field_status * defined)
+{
+ unsigned int const fieldlen = field[1]-field[0];
+
+ /* undefined by default */
+ *defined = bambamc_sam_field_undefined;
+
+ if ( fieldlen > *maxlen )
+ {
+ char * newstr = realloc(*str,fieldlen+1);
+ if ( ! newstr )
+ return -1;
+ *str = newstr;
+ if ( ! *str )
+ return -1;
+ *maxlen = fieldlen;
+ }
+
+ if ( fieldlen == 1 && field[0][0] == '*' )
+ {
+ (*str)[0] = '*';
+ (*str)[1] = 0;
+ }
+ else
+ {
+ memcpy(*str,field[0],fieldlen);
+ (*str)[fieldlen] = 0;
+ *defined = bambamc_sam_field_undefined;
+ }
+
+ return 0;
+}
+
+
+static int BamBam_SamInfo_parseNumberField(BamBam_SamInfo_cptrpair field, int32_t * str, bambamc_sam_field_status * defined)
+{
+ char const * p = field[0];
+ uint32_t const fieldlen = field[1]-field[0];
+ *defined = bambamc_sam_field_undefined;
+
+ if ( fieldlen > 1 && p[0] == '-' )
+ {
+ int32_t num = 0;
+ ++p;
+
+ while ( p != field[1] )
+ if ( isdigit(*p) )
+ {
+ num *= 10;
+ num += (*p-'0');
+ ++p;
+ }
+ else
+ {
+ return -1;
+ }
+
+ *defined = bambamc_sam_field_defined;
+ *str = -num;
+ }
+ else
+ {
+ int32_t num = 0;
+
+ while ( p != field[1] )
+ if ( isdigit(*p) )
+ {
+ num *= 10;
+ num += (*p-'0');
+ ++p;
+ }
+ else
+ {
+ return -1;
+ }
+
+ *defined = bambamc_sam_field_defined;
+ *str = num;
+ }
+
+ return 0;
+}
+
+static char BamBam_SamInfo_qnameValid[256];
+static char BamBam_SamInfo_qnameValidInitialised = 0;
+static char BamBam_SamInfo_rnameFirstValid[256];
+static char BamBam_SamInfo_rnameFirstValidInitialised = 0;
+static char BamBam_SamInfo_rnameOtherValid[256];
+static char BamBam_SamInfo_rnameOtherValidInitialised = 0;
+static char BamBam_SamInfo_seqValid[256];
+static char BamBam_SamInfo_seqValidInitialised = 0;
+static char BamBam_SamInfo_qualValid[256];
+static char BamBam_SamInfo_qualValidInitialised = 0;
+static int BamBam_SamInfo_tablesInitialised = 0;
+
+void BamBam_SamInfo_initSamCharTables()
+{
+ if ( ! BamBam_SamInfo_tablesInitialised )
+ {
+ int i = 0;
+ memset(&BamBam_SamInfo_qnameValid[0],0,sizeof(BamBam_SamInfo_qnameValid));
+ memset(&BamBam_SamInfo_rnameFirstValid[0],0,sizeof(BamBam_SamInfo_rnameFirstValid));
+ memset(&BamBam_SamInfo_rnameOtherValid[0],0,sizeof(BamBam_SamInfo_rnameOtherValid));
+ memset(&BamBam_SamInfo_seqValid[0],0,sizeof(BamBam_SamInfo_seqValid));
+ memset(&BamBam_SamInfo_qualValid[0],0,sizeof(BamBam_SamInfo_qualValid));
+
+ for ( i = '!'; i <= '?'; ++i )
+ BamBam_SamInfo_qnameValid[i] = 1;
+ for ( i = 'A'; i <= '~'; ++i )
+ BamBam_SamInfo_qnameValid[i] = 1;
+ for ( i = '!'; i <= '('; ++i )
+ BamBam_SamInfo_rnameFirstValid[i] = 1;
+ BamBam_SamInfo_rnameFirstValid[')'] = 1;
+ for ( i = '+'; i <= '<'; ++i )
+ BamBam_SamInfo_rnameFirstValid[i] = 1;
+ for ( i = '>'; i <= '~'; ++i )
+ BamBam_SamInfo_rnameFirstValid[i] = 1;
+ for ( i = '!'; i <= '~'; ++i )
+ BamBam_SamInfo_rnameOtherValid[i] = 1;
+ for ( i = 'A'; i <= 'Z'; ++i )
+ BamBam_SamInfo_seqValid[i] = 1;
+ for ( i = 'a'; i <= 'z'; ++i )
+ BamBam_SamInfo_seqValid[i] = 1;
+ BamBam_SamInfo_seqValid['='] = 1;
+ BamBam_SamInfo_seqValid['.'] = 1;
+ for ( i = '!'; i <= '~'; ++i )
+ BamBam_SamInfo_qualValid[i] = 1;
+
+ BamBam_SamInfo_qnameValidInitialised = 1;
+ BamBam_SamInfo_rnameFirstValidInitialised = 1;
+ BamBam_SamInfo_rnameOtherValidInitialised = 1;
+ BamBam_SamInfo_seqValidInitialised = 1;
+ BamBam_SamInfo_qualValidInitialised = 1;
+
+ BamBam_SamInfo_tablesInitialised = 1;
+ }
+}
+
+int BamBam_SamInfo_parseSamLine(BamBam_SamInfo_cptrpair * fields, BamBam_SamInfo * saminfo)
+{
+ if ( BamBam_SamInfo_parseStringField(fields[0], &(saminfo->qname), &(saminfo->qnamemax), &(saminfo->qnamedefined)) < 0 )
+ {
+ fprintf(stderr,"Unable to parse column 0: ");
+ fwrite(fields[0][0],fields[0][1]-fields[0][0],1,stderr);
+ fprintf(stderr,"\n");
+ return -1;
+ }
+ else
+ {
+ saminfo->qnamelen = fields[0][1] - fields[0][0];
+ }
+ if ( BamBam_SamInfo_parseNumberField(fields[1], &(saminfo->flag), &(saminfo->flagdefined)) < 0 )
+ {
+ fprintf(stderr,"Unable to parse column 1: ");
+ fwrite(fields[1][0],fields[1][1]-fields[1][0],1,stderr);
+ fprintf(stderr,"\n");
+ return -1;
+ }
+ if ( BamBam_SamInfo_parseStringField(fields[2], &(saminfo->rname), &(saminfo->rnamemax), &(saminfo->rnamedefined)) < 0 )
+ {
+ fprintf(stderr,"Unable to parse column 2: ");
+ fwrite(fields[2][0],fields[2][1]-fields[2][0],1,stderr);
+ fprintf(stderr,"\n");
+ return -1;
+ }
+ if ( BamBam_SamInfo_parseNumberField(fields[3], &(saminfo->pos), &(saminfo->posdefined)) < 0 )
+ {
+ fprintf(stderr,"Unable to parse column 3: ");
+ fwrite(fields[3][0],fields[3][1]-fields[3][0],1,stderr);
+ fprintf(stderr,"\n");
+ return -1;
+ }
+ if ( BamBam_SamInfo_parseNumberField(fields[4], &(saminfo->mapq), &(saminfo->mapqdefined)) < 0 )
+ {
+ fprintf(stderr,"Unable to parse column 4: ");
+ fwrite(fields[4][0],fields[4][1]-fields[4][0],1,stderr);
+ fprintf(stderr,"\n");
+ return -1;
+ }
+ if ( BamBam_SamInfo_parseStringField(fields[5], &(saminfo->cigar), &(saminfo->cigarmax), &(saminfo->cigardefined)) < 0 )
+ {
+ fprintf(stderr,"Unable to parse column 5: ");
+ fwrite(fields[5][0],fields[5][1]-fields[5][0],1,stderr);
+ fprintf(stderr,"\n");
+ return -1;
+ }
+ if ( BamBam_SamInfo_parseStringField(fields[6], &(saminfo->rnext), &(saminfo->rnextmax), &(saminfo->rnextdefined)) < 0 )
+ {
+ fprintf(stderr,"Unable to parse column 6: ");
+ fwrite(fields[6][0],fields[6][1]-fields[6][0],1,stderr);
+ fprintf(stderr,"\n");
+ return -1;
+ }
+ if ( BamBam_SamInfo_parseNumberField(fields[7], &(saminfo->pnext), &(saminfo->pnextdefined)) < 0 )
+ {
+ fprintf(stderr,"Unable to parse column 7: ");
+ fwrite(fields[7][0],fields[7][1]-fields[7][0],1,stderr);
+ fprintf(stderr,"\n");
+ return -1;
+ }
+ if ( BamBam_SamInfo_parseNumberField(fields[8], &(saminfo->tlen), &(saminfo->tlendefined)) < 0 )
+ {
+ fprintf(stderr,"Unable to parse column 8: ");
+ fwrite(fields[8][0],fields[8][1]-fields[8][0],1,stderr);
+ fprintf(stderr,"\n");
+ return -1;
+ }
+ if ( BamBam_SamInfo_parseStringField(fields[9], &(saminfo->seq), &(saminfo->seqmax), &(saminfo->seqdefined)) < 0 )
+ {
+ fprintf(stderr,"Unable to parse column 9: ");
+ fwrite(fields[9][0],fields[9][1]-fields[9][0],1,stderr);
+ fprintf(stderr,"\n");
+ return -1;
+ }
+ else
+ {
+ saminfo->seqlen = fields[9][1] - fields[9][0];
+ }
+ if ( BamBam_SamInfo_parseStringField(fields[10], &(saminfo->qual), &(saminfo->qualmax), &(saminfo->qualdefined)) < 0 )
+ {
+ fprintf(stderr,"Unable to parse column 10: ");
+ fwrite(fields[10][0],fields[10][1]-fields[10][0],1,stderr);
+ fprintf(stderr,"\n");
+ return -1;
+ }
+ else
+ {
+ saminfo->seqlen = fields[10][1] - fields[10][0];
+ }
+
+ if ( saminfo->qnamedefined == bambamc_sam_field_defined )
+ {
+ int ok = 1;
+ char const * p = saminfo->qname;
+
+ while ( *p )
+ {
+ ok = ok && BamBam_SamInfo_qnameValid[(int)*p];
+ ++p;
+ }
+
+ if ( !ok )
+ {
+ fprintf(stderr,"Invalid name %s\n", saminfo->qname);
+ return -1;
+ }
+ if ( p == saminfo->qname )
+ {
+ fprintf(stderr,"Invalid name %s\n", saminfo->qname);
+ return -1;
+ }
+ }
+ if ( saminfo->flagdefined == bambamc_sam_field_defined )
+ {
+ if ( saminfo->flag < 0 || saminfo->flag >= (int32_t)(1u<<16) )
+ {
+ fprintf(stderr,"Invalid flag field %d\n", saminfo->flag);
+ return -1;
+ }
+ }
+ else
+ {
+ fprintf(stderr,"Invalid undefined flag field.\n");
+ return -1;
+ }
+ if ( saminfo->rnamedefined == bambamc_sam_field_defined )
+ {
+ int ok = 1;
+ char const * p = saminfo->rname;
+
+ if ( !*p )
+ {
+ fprintf(stderr,"Invalid empty rname field.\n");
+ return -1;
+ }
+
+ ok = ok && BamBam_SamInfo_rnameFirstValid[(int)*p];
+ ++p;
+
+ while ( *p )
+ {
+ ok = ok && BamBam_SamInfo_rnameOtherValid[(int)*p];
+ ++p;
+ }
+
+ if ( ! ok )
+ {
+ fprintf(stderr,"Invalid rname field %s.\n", saminfo->rname);
+ return -1;
+ }
+ }
+ if ( saminfo->posdefined == bambamc_sam_field_defined )
+ {
+ if ( saminfo->pos < 0 || saminfo->pos >= (int32_t)(1u<<29) )
+ {
+ fprintf(stderr,"Invalid pos field %d\n", saminfo->pos);
+ return -1;
+ }
+ }
+ else
+ {
+ fprintf(stderr,"Invalid undefined pos field\n");
+ return -1;
+ }
+ if ( saminfo->mapqdefined == bambamc_sam_field_defined )
+ {
+ if ( saminfo->mapq < 0 || saminfo->mapq >= (int32_t)(1u<<8) )
+ {
+ fprintf(stderr,"Invalid mapping quality %d\n", saminfo->mapq);
+ return -1;
+ }
+ }
+ else
+ {
+ fprintf(stderr,"Invalid undefined mapping quality.\n");
+ return -1;
+ }
+ if ( saminfo->cigardefined == bambamc_sam_field_defined )
+ {
+ char const * p = saminfo->cigar;
+ unsigned int exseqlen = 0;
+
+ while ( *p )
+ {
+ unsigned int num = 0;
+
+ if ( ! isdigit(*p) )
+ {
+ fprintf(stderr,"Invalid cigar string: %s\n", saminfo->cigar);
+ return -1;
+ }
+ while ( isdigit(*p) )
+ {
+ num *= 10;
+ num += (*p)-'0';
+ ++p;
+ }
+
+ switch ( *(p++) )
+ {
+ case 'M':
+ case 'I':
+ case 'S':
+ case '=':
+ case 'X':
+ exseqlen += num;
+ break;
+ case 'D':
+ case 'N':
+ case 'H':
+ case 'P':
+ break;
+ default:
+ fprintf(stderr,"Invalid cigar operator %c\n", *(p-1));
+ return -1;
+ }
+ }
+
+ if ( ! (saminfo->flag & BAMBAMC_FUNMAP) )
+ {
+ if ( exseqlen != strlen(saminfo->seq) )
+ {
+ fprintf(stderr,"Invalid cigar string %s for sequence %s\n", saminfo->cigar, saminfo->seq);
+ return -1;
+ }
+ }
+ }
+ if ( saminfo->rnextdefined == bambamc_sam_field_defined )
+ {
+ int ok = 1;
+ char const * p = saminfo->rnext;
+
+ if ( !*p )
+ {
+ fprintf(stderr,"Invalid empty rnext field.\n");
+ return -1;
+ }
+
+ if ( *p == '=' && !p[1] )
+ {
+
+ }
+ else
+ {
+ ok = ok && BamBam_SamInfo_rnameFirstValid[(int)*p];
+ ++p;
+
+ while ( *p )
+ {
+ ok = ok && BamBam_SamInfo_rnameOtherValid[(int)*p];
+ ++p;
+ }
+
+ if ( ! ok )
+ {
+ fprintf(stderr,"Invalid rnext field %s.\n", saminfo->rnext);
+ return -1;
+ }
+ }
+ }
+ if ( saminfo->pnextdefined == bambamc_sam_field_defined )
+ {
+ if ( saminfo->pnext < 0 || saminfo->pnext >= (int32_t)(1u<<29) )
+ {
+ fprintf(stderr,"Invalid pnext field %d\n", saminfo->pnext);
+ return -1;
+ }
+ }
+ else
+ {
+ fprintf(stderr,"Invalid undefined pnext field.\n");
+ return -1;
+ }
+ if ( saminfo->tlendefined == bambamc_sam_field_defined )
+ {
+ if ( saminfo->tlen < ((-((int32_t)(1u<<29)))+1) || saminfo->tlen >= (int32_t)(1u<<29) )
+ {
+ fprintf(stderr,"Invalid tlen field %d\n", saminfo->tlen);
+ return -1;
+ }
+ }
+ else
+ {
+ fprintf(stderr,"Invalid undefined tlen field\n");
+ return -1;
+ }
+ if ( saminfo->seqdefined == bambamc_sam_field_defined )
+ {
+ int ok = 1;
+ char const * p = saminfo->seq;
+
+ while ( *p )
+ {
+ ok = ok && BamBam_SamInfo_seqValid[(int)*p];
+ ++p;
+ }
+
+ if ( !ok )
+ {
+ fprintf(stderr,"Invalid sequence string %s\n", saminfo->seq);
+ return -1;
+ }
+ if ( p == saminfo->seq )
+ {
+ fprintf(stderr,"Invalid empty sequence string %s\n", saminfo->seq);
+ return -1;
+ }
+ }
+ if ( saminfo->qualdefined == bambamc_sam_field_defined )
+ {
+ int ok = 1;
+ char const * p = saminfo->qual;
+
+ while ( *p )
+ {
+ ok = ok && BamBam_SamInfo_qualValid[(int)*p];
+ ++p;
+ }
+
+ if ( !ok )
+ {
+ fprintf(stderr,"Invalid quality string %s\n", saminfo->qual);
+ return -1;
+ }
+ if ( p == saminfo->qual )
+ {
+ fprintf(stderr,"Invalid empty quality string %s\n", saminfo->qual);
+ return -1;
+ }
+ }
+ if (
+ saminfo->qualdefined == bambamc_sam_field_defined
+ &&
+ saminfo->seqdefined == bambamc_sam_field_defined
+ )
+ {
+ if ( saminfo->seqlen != saminfo->quallen )
+ {
+ fprintf(stderr,"Sequence length %u does not match length of quality string %u\n", saminfo->seqlen, saminfo->quallen);
+ return -1;
+ }
+ }
+ if (
+ saminfo->seqdefined == bambamc_sam_field_defined
+ &&
+ saminfo->qualdefined == bambamc_sam_field_undefined
+ )
+ {
+ if ( saminfo->qualmax < saminfo->seqlen )
+ {
+ char * newqual = realloc(saminfo->qual,saminfo->seqlen+1);
+ if ( ! newqual )
+ {
+ fprintf(stderr,"Failed to allocate memory for quality string.\n");
+ return -1;
+ }
+ saminfo->qual = newqual;
+ saminfo->qualmax = saminfo->seqlen;
+ }
+
+ saminfo->qual[saminfo->seqlen] = 0;
+ memset(saminfo->qual,255,saminfo->seqlen);
+ }
+ if ( saminfo->rnamedefined && saminfo->rnextdefined && saminfo->rnext && saminfo->rnext[0] == '=' && saminfo->rnext[1] == 0 )
+ {
+ unsigned int const rnamelen = strlen(saminfo->rname);
+ if ( saminfo->rnextmax < rnamelen )
+ {
+ char * newrnext = realloc(saminfo->rnext,rnamelen+1);
+ if ( ! newrnext )
+ {
+ fprintf(stderr,"Failed to allocate memory for rnext.\n");
+ return -1;
+ }
+ saminfo->rnext = newrnext;
+ saminfo->rnextmax = rnamelen;
+ strcpy(saminfo->rnext,saminfo->rname);
+ }
+ }
+
+ /*
+ fprintf(stdout,"Got name %s\n", saminfo->qname);
+ fprintf(stdout,"Got flag %d\n", saminfo->flag);
+ fprintf(stdout,"Got rname %s\n", saminfo->rname);
+ fprintf(stdout,"Got pos %d\n", saminfo->pos);
+ fprintf(stdout,"Got mapq %d\n", saminfo->mapq);
+ fprintf(stdout,"Got cigar %s\n", saminfo->cigar);
+ fprintf(stdout,"Got rnext %s\n", saminfo->rnext);
+ fprintf(stdout,"Got pnext %d\n", saminfo->pnext);
+ fprintf(stdout,"Got tlen %d\n", saminfo->tlen);
+ fprintf(stdout,"Got seq %s\n", saminfo->seq);
+ fprintf(stdout,"Got qual %s\n", saminfo->qual);
+ */
+
+ return 0;
+}
diff --git a/src/bambamc/BamBam_SamInfo.h b/src/bambamc/BamBam_SamInfo.h
new file mode 100644
index 0000000..618026b
--- /dev/null
+++ b/src/bambamc/BamBam_SamInfo.h
@@ -0,0 +1,76 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#if ! defined(BAMBAMC_SAMINFO_H)
+#define BAMBAMC_SAMINFO_H
+
+#include <stdint.h>
+
+typedef enum _bambamc_sam_field_status { bambamc_sam_field_undefined = 0, bambamc_sam_field_defined = 1 } bambamc_sam_field_status;
+typedef char const * BamBam_SamInfo_cptrpair[2];
+
+typedef struct _BamBam_SamInfo
+{
+ char * qname;
+ unsigned int qnamemax;
+ bambamc_sam_field_status qnamedefined;
+ unsigned int qnamelen;
+
+ int32_t flag;
+ bambamc_sam_field_status flagdefined;
+
+ char * rname;
+ unsigned int rnamemax;
+ bambamc_sam_field_status rnamedefined;
+
+ int32_t pos;
+ bambamc_sam_field_status posdefined;
+
+ int32_t mapq;
+ bambamc_sam_field_status mapqdefined;
+
+ char * cigar;
+ unsigned int cigarmax;
+ bambamc_sam_field_status cigardefined;
+
+ char * rnext;
+ unsigned int rnextmax;
+ bambamc_sam_field_status rnextdefined;
+
+ int32_t pnext;
+ bambamc_sam_field_status pnextdefined;
+
+ int32_t tlen;
+ bambamc_sam_field_status tlendefined;
+
+ char * seq;
+ unsigned int seqmax;
+ bambamc_sam_field_status seqdefined;
+ unsigned int seqlen;
+
+ char * qual;
+ unsigned int qualmax;
+ bambamc_sam_field_status qualdefined;
+ unsigned int quallen;
+} BamBam_SamInfo;
+
+extern BamBam_SamInfo * BamBam_SamInfo_Delete(BamBam_SamInfo * object);
+extern BamBam_SamInfo * BamBam_SamInfo_New();
+extern void BamBam_SamInfo_initSamCharTables();
+extern int BamBam_SamInfo_parseSamLine(BamBam_SamInfo_cptrpair * fields, BamBam_SamInfo * saminfo);
+#endif
diff --git a/src/bambamc/BamBam_StrDup.c b/src/bambamc/BamBam_StrDup.c
new file mode 100644
index 0000000..4360ffb
--- /dev/null
+++ b/src/bambamc/BamBam_StrDup.c
@@ -0,0 +1,51 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#include <bambamc/BamBam_StrDup.h>
+#include <string.h>
+#include <stdlib.h>
+
+char * BamBam_StrDup(char const * s)
+{
+ size_t const len = strlen(s);
+ char * c = (char *)malloc(len+1);
+
+ if ( ! c )
+ return 0;
+
+ memcpy(c,s,len);
+ c[len] = 0;
+
+ return c;
+}
+
+char * BamBam_StrCat(char const * sa, char const * sb)
+{
+ size_t const lena = strlen(sa);
+ size_t const lenb = strlen(sb);
+ char * c = (char *)malloc(lena+lenb+1);
+
+ if ( ! c )
+ return 0;
+
+ memcpy(c+0 ,sa,lena);
+ memcpy(c+lena,sb,lenb);
+ c[lena+lenb] = 0;
+
+ return c;
+}
diff --git a/src/bambamc/BamBam_StrDup.h b/src/bambamc/BamBam_StrDup.h
new file mode 100644
index 0000000..7fac441
--- /dev/null
+++ b/src/bambamc/BamBam_StrDup.h
@@ -0,0 +1,24 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#if ! defined(BAMBAMC_STRDUP_H)
+#define BAMBAMC_STRDUP_H
+
+extern char * BamBam_StrDup(char const * s);
+extern char * BamBam_StrCat(char const * sa, char const * sb);
+#endif
diff --git a/src/bambamc/BamBam_Unused.h b/src/bambamc/BamBam_Unused.h
new file mode 100644
index 0000000..e85c59b
--- /dev/null
+++ b/src/bambamc/BamBam_Unused.h
@@ -0,0 +1,29 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#if ! defined(BAMBAM_UNUSED_H)
+#define BAMBAM_UNUSED_H
+
+#if defined(__GNUC__)
+ #define BAMBAM_WARN_IF_UNUSEDRESULT __attribute__ ((warn_unused_result))
+#else
+ #define BAMBAM_WARN_IF_UNUSEDRESULT
+#endif
+
+#endif
diff --git a/src/test/bamreadertest.c b/src/test/bamreadertest.c
new file mode 100644
index 0000000..abdef47
--- /dev/null
+++ b/src/test/bamreadertest.c
@@ -0,0 +1,145 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+#include <bambamc/BamBam_LineBuffer.h>
+#include <bambamc/BamBam_BamCollator.h>
+#include <bambamc/BamBam_FormatAlignment.h>
+#include <bambamc/BamBam_SamBamFileDecoder.h>
+#include <assert.h>
+
+int runCollationTest()
+{
+ BamBam_BamCollator * col = 0;
+ int cnt = 0;
+ BamBam_BamCollationHashEntry * entryA = 0;
+ BamBam_BamCollationHashEntry * entryB = 0;
+ char * bufferA = 0, * bufferB = 0;
+ unsigned int bufferAlen = 0, bufferBlen = 0;
+ int aok, bok;
+
+ /* allocate collator */
+ col = BamBam_BamCollator_New("tmpdir",16,16,"bam","-");
+
+ if ( ! col )
+ {
+ fprintf(stderr,"Failed to open bam file.\n");
+ return EXIT_FAILURE;
+ }
+
+ /* get pairs */
+ while ( (cnt = BamBam_BamCollator_Get(col,&entryA,&entryB)) )
+ {
+ if ( cnt == 2 )
+ {
+ assert ( entryA );
+ assert ( entryB );
+ /* fprintf(stdout,"%s\t%s\n", entryA->qname, entryB->qname); */
+
+ aok = BamBam_PutAlignmentFastQBuffer(entryA->entry,&bufferA,&bufferAlen,'\n');
+ bok = BamBam_PutAlignmentFastQBuffer(entryB->entry,&bufferB,&bufferBlen,'\n');
+
+ if ( aok >= 0 && bok >= 0 )
+ {
+ fwrite(bufferA,aok,1,stdout);
+ fwrite(bufferB,bok,1,stdout);
+ }
+ }
+
+ BamBam_BamCollationHashEntry_Delete(entryA);
+ BamBam_BamCollationHashEntry_Delete(entryB);
+ }
+
+ free(bufferA);
+ free(bufferB);
+ BamBam_BamCollator_Delete(col);
+
+ return 0;
+}
+
+void lineBufferTest()
+{
+ char const * pa = 0;
+ char const * pe = 0;
+ unsigned int numlines = 0;
+ BamBam_LineBuffer * lb = BamBam_LineBuffer_New(stdin,1024);
+ assert ( lb );
+
+ while ( ! BamBam_LineBuffer_GetLine(lb,&pa,&pe) )
+ {
+ if ( pe != pa && pa[0] == '@' )
+ {
+ fprintf(stderr,"Header line: ");
+ fwrite(pa,pe-pa,1,stderr);
+ fprintf(stderr,"\n");
+ }
+ else
+ {
+ BamBam_LineBuffer_PutBack(lb,pa);
+ break;
+ }
+ }
+ while ( ! BamBam_LineBuffer_GetLine(lb,&pa,&pe) )
+ {
+ fprintf(stderr,"Non header line: ");
+ fwrite(pa,pe-pa,1,stderr);
+ fprintf(stderr,"\n");
+ }
+
+ BamBam_LineBuffer_Delete(lb);
+
+ fprintf(stderr,"number of lines is %u\n", numlines);
+
+}
+
+void samBamSamTest()
+{
+ uint64_t c = 0;
+ BamBam_SamBamFileDecoder * samdec = BamBam_SamBamFileDecoder_New("-","rb");
+ BamBam_BamSingleAlignment * algn = 0;
+ assert ( samdec );
+
+ while ( (algn = BamBam_SamBamFileDecoder_DecodeAlignment(samdec)) )
+ {
+ char const * name = BamBam_BamSingleAlignment_GetReadName(algn);
+ fprintf(stderr,"%s\n", name);
+ if ( ++c % (1024*1024) == 0 )
+ fprintf(stderr,"%d\n", (int)(c/(1024*1024)) );
+ }
+
+ BamBam_SamBamFileDecoder_Delete(samdec);
+}
+
+#if defined(_WIN32)
+# include <io.h>
+# include <fcntl.h>
+# define SET_BINARY_MODE(handle) _setmode(_fileno(handle), O_BINARY)
+#else
+# define SET_BINARY_MODE(handle) ((void)0)
+#endif
+
+int main(/* int argc, char * argv[] */)
+{
+ SET_BINARY_MODE(stdin);
+ SET_BINARY_MODE(stdout);
+
+ runCollationTest();
+ /* lineBufferTest(); */
+ // samBamSamTest();
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/test/bamwritertest.c b/src/test/bamwritertest.c
new file mode 100644
index 0000000..36d8b6c
--- /dev/null
+++ b/src/test/bamwritertest.c
@@ -0,0 +1,85 @@
+/**
+ bambamc
+ Copyright (C) 2009-2013 German Tischler
+ Copyright (C) 2011-2013 Genome Research Limited
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+**/
+
+#include <bambamc/BamBam_BamHeaderInfo.h>
+#include <bambamc/BamBam_BamWriter.h>
+#include <bambamc/BamBam_BamAlignmentPut.h>
+#include <assert.h>
+
+int testLibBamFree(char const * filename)
+{
+ int r = -1;
+ BamBam_BamHeaderInfo * hi = 0;
+ BamBam_BamWriter * writer = 0;
+ int const compressionLevel = 1;
+ int status = -1;
+
+ hi = BamBam_BamHeaderInfo_New("1.4","unknown",0);
+ assert ( hi );
+ r = BamBam_BamHeaderInfo_AddChromosome(hi, "chr1",10000);
+ assert ( ! r );
+
+ writer = BamBam_BamWriter_New(hi,filename,compressionLevel);
+ assert ( writer );
+
+ r = BamBam_BamWriter_PutAlignment(
+ writer,
+ 0,
+ 0,
+ 5000,
+ -1,
+ 0,
+ "readname",
+ "ACGTTGCA",
+ "HHHHHHHH",
+ "8M",
+ 60,
+ 100);
+ assert ( r >= 0 );
+
+ int val = 61;
+ r = BamBam_BamWriter_PutAuxNumber(writer,"AS",'i',&val);
+ assert ( r >= 0 );
+
+ r = BamBam_BamWriter_Commit(writer);
+ assert ( r >= 0 );
+
+ BamBam_BamWriter_Delete(writer,&status);
+ assert ( status >= 0 );
+ BamBam_BamHeaderInfo_Delete(hi);
+
+ return 0;
+}
+
+#if defined(_WIN32)
+# include <io.h>
+# include <fcntl.h>
+# define SET_BINARY_MODE(handle) _setmode(_fileno(handle), O_BINARY)
+#else
+# define SET_BINARY_MODE(handle) ((void)0)
+#endif
+
+int main()
+{
+ SET_BINARY_MODE(stdin);
+ SET_BINARY_MODE(stdout);
+
+ return testLibBamFree("-");
+ return 0;
+}
diff --git a/ubuntu.sh.in b/ubuntu.sh.in
new file mode 100755
index 0000000..54da142
--- /dev/null
+++ b/ubuntu.sh.in
@@ -0,0 +1,5 @@
+make distclean
+sh configure --prefix=/usr
+make
+sudo checkinstall --pkgname @PACKAGE_NAME@ --pkgversion @PACKAGE_VERSION@ --backup=no --default --deldoc \
+ --maintainer="German Tischler"
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/bambamc.git
More information about the debian-med-commit
mailing list