[med-svn] [nanook] 01/01: Manpage for version 1.26 as automatically created
Andreas Tille
tille at debian.org
Fri Sep 1 13:59:18 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch manpages
in repository nanook.
commit bbaaa37424a017b8d02a095ce2eee770e394afe7
Author: Andreas Tille <tille at debian.org>
Date: Fri Sep 1 15:58:03 2017 +0200
Manpage for version 1.26 as automatically created
---
.gitignore | 8 -
Dockerfile | 26 -
HDF5License.txt | 92 --
LICENSE | 674 ---------------
README.md | 5 -
bin/get_contig_stats.pl | 253 ------
bin/nanook | 21 -
bin/nanook_get_read_stats.pl | 59 --
bin/nanook_get_tracking.pl | 157 ----
bin/nanook_plot_comparison.R | 100 ---
bin/nanook_plot_comparison_reference.R | 215 -----
bin/nanook_plot_lengths.R | 87 --
bin/nanook_plot_reference.R | 475 ----------
bin/nanook_split_fasta | 83 --
bin/slurmit | 62 --
debian/compat | 1 -
debian/control | 41 -
debian/copyright | 27 -
debian/install | 1 -
debian/jlibs | 1 -
debian/links | 1 -
debian/manifest | 4 -
debian/nanook.1 | 19 +-
debian/patches/series | 1 -
debian/patches/set_jar_path_in_bin.patch | 22 -
debian/rules | 12 -
debian/source/format | 1 -
debian/upstream/metadata | 12 -
debian/watch | 4 -
src/nanook/Alignment.java | 146 ----
src/nanook/AlignmentFileParser.java | 83 --
src/nanook/AlignmentFileStats.java | 24 -
src/nanook/AlignmentInfo.java | 178 ----
src/nanook/AlignmentMerger.java | 411 ---------
src/nanook/AlignmentsTableFile.java | 146 ----
src/nanook/BLASRParser.java | 50 --
src/nanook/BWAParser.java | 71 --
src/nanook/BlastHandler.java | 202 -----
src/nanook/BlastMerger.java | 86 --
src/nanook/CIGARString.java | 285 ------
src/nanook/ComparisonReportWriter.java | 196 -----
src/nanook/DirectoryWatcher.java | 184 ----
src/nanook/DirectoryWatcherNative.java | 219 -----
src/nanook/Fast5File.java | 492 -----------
src/nanook/FastAQBlastMerger.java | 138 ---
src/nanook/FastAQFile.java | 103 ---
src/nanook/FileWatcher.java | 177 ----
src/nanook/FileWatcherItem.java | 31 -
src/nanook/GCCounter.java | 93 --
src/nanook/GraphMapParser.java | 75 --
src/nanook/KmerAbundance.java | 59 --
src/nanook/KmerMotifStatistic.java | 211 -----
src/nanook/KmerTable.java | 75 --
src/nanook/LastParser.java | 70 --
src/nanook/MAFAlignmentLine.java | 98 ---
src/nanook/MAFParser.java | 131 ---
src/nanook/MarginAlignParser.java | 71 --
src/nanook/MergedFastAQFile.java | 45 -
src/nanook/MotifStatistics.java | 217 -----
src/nanook/NanoOK.java | 428 ---------
src/nanook/NanoOKLog.java | 76 --
src/nanook/NanoOKOptions.java | 1389 ------------------------------
src/nanook/OverallStats.java | 39 -
src/nanook/ParserRunnable.java | 163 ----
src/nanook/ProcessLogger.java | 202 -----
src/nanook/RGraphPlotter.java | 145 ----
src/nanook/RGraphRunnable.java | 69 --
src/nanook/ReadAligner.java | 226 -----
src/nanook/ReadExtractor.java | 189 ----
src/nanook/ReadExtractorRunnable.java | 83 --
src/nanook/ReadFileMerger.java | 95 --
src/nanook/ReadLengthsSummaryFile.java | 64 --
src/nanook/ReadParser.java | 123 ---
src/nanook/ReadProcessor.java | 263 ------
src/nanook/ReadProcessorRunnable.java | 369 --------
src/nanook/ReadSet.java | 353 --------
src/nanook/ReadSetStats.java | 625 --------------
src/nanook/ReadStats.java | 6 -
src/nanook/ReferenceSequence.java | 190 ----
src/nanook/ReferenceSequenceStats.java | 548 ------------
src/nanook/References.java | 345 --------
src/nanook/SAMParser.java | 236 -----
src/nanook/SampleChecker.java | 281 ------
src/nanook/SampleComparer.java | 154 ----
src/nanook/SampleReportWriter.java | 822 ------------------
src/nanook/SequenceCoverage.java | 139 ---
src/nanook/SequenceLogo.java | 133 ---
src/nanook/SequenceReader.java | 319 -------
src/nanook/SystemCommandRunnable.java | 93 --
src/nanook/WatcherLog.java | 65 --
src/nanook/WatcherRunnable.java | 106 ---
91 files changed, 8 insertions(+), 15161 deletions(-)
diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index e4fecd6..0000000
--- a/.gitignore
+++ /dev/null
@@ -1,8 +0,0 @@
-build
-build.xml
-manifest.mf
-nbproject
-dist/README.TXT
-
-
-install-packages.R
diff --git a/Dockerfile b/Dockerfile
deleted file mode 100644
index 19ca29c..0000000
--- a/Dockerfile
+++ /dev/null
@@ -1,26 +0,0 @@
-# NanoOK Dockerfile
-FROM ubuntu:14.04
-MAINTAINER Richard Leggett <richard.leggett at earlham.ac.uk>
-
-RUN echo "deb http://cran.cnr.berkeley.edu/bin/linux/ubuntu trusty/" >> /etc/apt/sources.list
-RUN apt-get update
-RUN apt-get install -y --force-yes r-base
-RUN apt-get install -y --force-yes r-cran-ggplot2
-RUN apt-get install -y hdf5-tools
-RUN apt-get install -y texlive
-RUN apt-get install -y texlive-latex-extra
-RUN apt-get install -y default-jre
-RUN apt-get install -y git
-ADD http://last.cbrc.jp/last-761.zip /usr/
-RUN cd /usr ; unzip last-761 ; cd last-761 ; make ; make install
-RUN cd /usr ; git clone https://github.com/lh3/bwa.git
-RUN cd /usr/bwa ; make ; cp bwa /usr/local/bin
-RUN cd /usr ; git clone https://github.com/TGAC/NanoOK
-ENV NANOOK_DIR="/usr/NanoOK"
-RUN echo "export PATH=/usr/NanoOK/bin:${PATH}" >> ~/.bashrc
-RUN Rscript -e "install.packages('ggplot2', repos='https://cran.ma.imperial.ac.uk/')"
-RUN Rscript -e "install.packages('ggmap', repos='https://cran.ma.imperial.ac.uk/')"
-RUN Rscript -e "install.packages('plyr', repos='https://cran.ma.imperial.ac.uk/')"
-RUN Rscript -e "install.packages('scales', repos='https://cran.ma.imperial.ac.uk/')"
-RUN Rscript -e "install.packages('gridExtra', repos='https://cran.ma.imperial.ac.uk/')"
-RUN Rscript -e "install.packages('reshape', repos='https://cran.ma.imperial.ac.uk/')"
\ No newline at end of file
diff --git a/HDF5License.txt b/HDF5License.txt
deleted file mode 100644
index b6eee1e..0000000
--- a/HDF5License.txt
+++ /dev/null
@@ -1,92 +0,0 @@
-
-Copyright Notice and License Terms for
-HDF5 (Hierarchical Data Format 5) Software Library and Utilities
------------------------------------------------------------------------------
-
-HDF5 (Hierarchical Data Format 5) Software Library and Utilities
-Copyright 2006-2015 by The HDF Group.
-
-NCSA HDF5 (Hierarchical Data Format 5) Software Library and Utilities
-Copyright 1998-2006 by the Board of Trustees of the University of Illinois.
-
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted for any purpose (including commercial purposes)
-provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice,
- this list of conditions, and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions, and the following disclaimer in the documentation
- and/or materials provided with the distribution.
-
-3. In addition, redistributions of modified forms of the source or binary
- code must carry prominent notices stating that the original code was
- changed and the date of the change.
-
-4. All publications or advertising materials mentioning features or use of
- this software are asked, but not required, to acknowledge that it was
- developed by The HDF Group and by the National Center for Supercomputing
- Applications at the University of Illinois at Urbana-Champaign and
- credit the contributors.
-
-5. Neither the name of The HDF Group, the name of the University, nor the
- name of any Contributor may be used to endorse or promote products derived
- from this software without specific prior written permission from
- The HDF Group, the University, or the Contributor, respectively.
-
-DISCLAIMER:
-THIS SOFTWARE IS PROVIDED BY THE HDF GROUP AND THE CONTRIBUTORS
-"AS IS" WITH NO WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED. In no
-event shall The HDF Group or the Contributors be liable for any damages
-suffered by the users arising out of the use of this software, even if
-advised of the possibility of such damage.
-
------------------------------------------------------------------------------
------------------------------------------------------------------------------
-
-Contributors: National Center for Supercomputing Applications (NCSA) at
-the University of Illinois, Fortner Software, Unidata Program Center (netCDF),
-The Independent JPEG Group (JPEG), Jean-loup Gailly and Mark Adler (gzip),
-and Digital Equipment Corporation (DEC).
-
------------------------------------------------------------------------------
-
-Portions of HDF5 were developed with support from the Lawrence Berkeley
-National Laboratory (LBNL) and the United States Department of Energy
-under Prime Contract No. DE-AC02-05CH11231.
-
------------------------------------------------------------------------------
-
-Portions of HDF5 were developed with support from the University of
-California, Lawrence Livermore National Laboratory (UC LLNL).
-The following statement applies to those portions of the product and must
-be retained in any redistribution of source code, binaries, documentation,
-and/or accompanying materials:
-
- This work was partially produced at the University of California,
- Lawrence Livermore National Laboratory (UC LLNL) under contract
- no. W-7405-ENG-48 (Contract 48) between the U.S. Department of Energy
- (DOE) and The Regents of the University of California (University)
- for the operation of UC LLNL.
-
- DISCLAIMER:
- This work was prepared as an account of work sponsored by an agency of
- the United States Government. Neither the United States Government nor
- the University of California nor any of their employees, makes any
- warranty, express or implied, or assumes any liability or responsibility
- for the accuracy, completeness, or usefulness of any information,
- apparatus, product, or process disclosed, or represents that its use
- would not infringe privately- owned rights. Reference herein to any
- specific commercial products, process, or service by trade name,
- trademark, manufacturer, or otherwise, does not necessarily constitute
- or imply its endorsement, recommendation, or favoring by the United
- States Government or the University of California. The views and
- opinions of authors expressed herein do not necessarily state or reflect
- those of the United States Government or the University of California,
- and shall not be used for advertising or product endorsement purposes.
------------------------------------------------------------------------------
-
-
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index 70566f2..0000000
--- a/LICENSE
+++ /dev/null
@@ -1,674 +0,0 @@
-GNU GENERAL PUBLIC LICENSE
- Version 3, 29 June 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
- Preamble
-
- The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
-
- The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works. By contrast,
-the GNU General Public License is intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users. We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors. You can apply it to
-your programs, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
- To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights. Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
-
- For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received. You must make sure that they, too, receive
-or can get the source code. And you must show them these terms so they
-know their rights.
-
- Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
-
- For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software. For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
- Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so. This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software. The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable. Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products. If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
- Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary. To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
- TERMS AND CONDITIONS
-
- 0. Definitions.
-
- "This License" refers to version 3 of the GNU General Public License.
-
- "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
- "The Program" refers to any copyrightable work licensed under this
-License. Each licensee is addressed as "you". "Licensees" and
-"recipients" may be individuals or organizations.
-
- To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy. The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
- A "covered work" means either the unmodified Program or a work based
-on the Program.
-
- To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy. Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
- To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies. Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
- An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License. If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
- 1. Source Code.
-
- The "source code" for a work means the preferred form of the work
-for making modifications to it. "Object code" means any non-source
-form of a work.
-
- A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
- The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form. A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
- The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities. However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work. For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
- The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
- The Corresponding Source for a work in source code form is that
-same work.
-
- 2. Basic Permissions.
-
- All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met. This License explicitly affirms your unlimited
-permission to run the unmodified Program. The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work. This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
- You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force. You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright. Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
- Conveying under any other circumstances is permitted solely under
-the conditions stated below. Sublicensing is not allowed; section 10
-makes it unnecessary.
-
- 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
- No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
- When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
- 4. Conveying Verbatim Copies.
-
- You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
- You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
- 5. Conveying Modified Source Versions.
-
- You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
- a) The work must carry prominent notices stating that you modified
- it, and giving a relevant date.
-
- b) The work must carry prominent notices stating that it is
- released under this License and any conditions added under section
- 7. This requirement modifies the requirement in section 4 to
- "keep intact all notices".
-
- c) You must license the entire work, as a whole, under this
- License to anyone who comes into possession of a copy. This
- License will therefore apply, along with any applicable section 7
- additional terms, to the whole of the work, and all its parts,
- regardless of how they are packaged. This License gives no
- permission to license the work in any other way, but it does not
- invalidate such permission if you have separately received it.
-
- d) If the work has interactive user interfaces, each must display
- Appropriate Legal Notices; however, if the Program has interactive
- interfaces that do not display Appropriate Legal Notices, your
- work need not make them do so.
-
- A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit. Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
- 6. Conveying Non-Source Forms.
-
- You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
- a) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by the
- Corresponding Source fixed on a durable physical medium
- customarily used for software interchange.
-
- b) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by a
- written offer, valid for at least three years and valid for as
- long as you offer spare parts or customer support for that product
- model, to give anyone who possesses the object code either (1) a
- copy of the Corresponding Source for all the software in the
- product that is covered by this License, on a durable physical
- medium customarily used for software interchange, for a price no
- more than your reasonable cost of physically performing this
- conveying of source, or (2) access to copy the
- Corresponding Source from a network server at no charge.
-
- c) Convey individual copies of the object code with a copy of the
- written offer to provide the Corresponding Source. This
- alternative is allowed only occasionally and noncommercially, and
- only if you received the object code with such an offer, in accord
- with subsection 6b.
-
- d) Convey the object code by offering access from a designated
- place (gratis or for a charge), and offer equivalent access to the
- Corresponding Source in the same way through the same place at no
- further charge. You need not require recipients to copy the
- Corresponding Source along with the object code. If the place to
- copy the object code is a network server, the Corresponding Source
- may be on a different server (operated by you or a third party)
- that supports equivalent copying facilities, provided you maintain
- clear directions next to the object code saying where to find the
- Corresponding Source. Regardless of what server hosts the
- Corresponding Source, you remain obligated to ensure that it is
- available for as long as needed to satisfy these requirements.
-
- e) Convey the object code using peer-to-peer transmission, provided
- you inform other peers where the object code and Corresponding
- Source of the work are being offered to the general public at no
- charge under subsection 6d.
-
- A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
- A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling. In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage. For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product. A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
- "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source. The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
- If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information. But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
- The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed. Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
- Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
- 7. Additional Terms.
-
- "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law. If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
- When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it. (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.) You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
- Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
- a) Disclaiming warranty or limiting liability differently from the
- terms of sections 15 and 16 of this License; or
-
- b) Requiring preservation of specified reasonable legal notices or
- author attributions in that material or in the Appropriate Legal
- Notices displayed by works containing it; or
-
- c) Prohibiting misrepresentation of the origin of that material, or
- requiring that modified versions of such material be marked in
- reasonable ways as different from the original version; or
-
- d) Limiting the use for publicity purposes of names of licensors or
- authors of the material; or
-
- e) Declining to grant rights under trademark law for use of some
- trade names, trademarks, or service marks; or
-
- f) Requiring indemnification of licensors and authors of that
- material by anyone who conveys the material (or modified versions of
- it) with contractual assumptions of liability to the recipient, for
- any liability that these contractual assumptions directly impose on
- those licensors and authors.
-
- All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10. If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term. If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
- If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
- Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
- 8. Termination.
-
- You may not propagate or modify a covered work except as expressly
-provided under this License. Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
- However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
- Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
- Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License. If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
- 9. Acceptance Not Required for Having Copies.
-
- You are not required to accept this License in order to receive or
-run a copy of the Program. Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance. However,
-nothing other than this License grants you permission to propagate or
-modify any covered work. These actions infringe copyright if you do
-not accept this License. Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
- 10. Automatic Licensing of Downstream Recipients.
-
- Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License. You are not responsible
-for enforcing compliance by third parties with this License.
-
- An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations. If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
- You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License. For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
- 11. Patents.
-
- A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based. The
-work thus licensed is called the contributor's "contributor version".
-
- A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version. For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
- Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
- In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement). To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
- If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients. "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
- If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
- A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License. You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
- Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
- 12. No Surrender of Others' Freedom.
-
- If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all. For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
- 13. Use with the GNU Affero General Public License.
-
- Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU Affero General Public License into a single
-combined work, and to convey the resulting work. The terms of this
-License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU Affero General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
-
- 14. Revised Versions of this License.
-
- The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time. Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
- Each version is given a distinguishing version number. If the
-Program specifies that a certain numbered version of the GNU General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation. If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
- If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
- Later license versions may give you additional or different
-permissions. However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
- 15. Disclaimer of Warranty.
-
- THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
- 16. Limitation of Liability.
-
- IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
- 17. Interpretation of Sections 15 and 16.
-
- If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
- END OF TERMS AND CONDITIONS
-
- How to Apply These Terms to Your New Programs
-
- If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
- To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
- {one line to give the program's name and a brief idea of what it does.}
- Copyright (C) {year} {name of author}
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-Also add information on how to contact you by electronic and paper mail.
-
- If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
- {project} Copyright (C) {year} {fullname}
- This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
- This is free software, and you are welcome to redistribute it
- under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License. Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
-
- You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
-<http://www.gnu.org/licenses/>.
-
- The GNU General Public License does not permit incorporating your program
-into proprietary programs. If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library. If this is what you want to do, use the GNU Lesser General
-Public License instead of this License. But first, please read
-<http://www.gnu.org/philosophy/why-not-lgpl.html>.
\ No newline at end of file
diff --git a/README.md b/README.md
deleted file mode 100644
index 9a5aa1d..0000000
--- a/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-![NanoOK](https://documentation.tgac.ac.uk/download/thumbnails/7209095/nanook-01.jpg?version=1&modificationDate=1447675247000&api=v2)
-
-Full documentation can be found at https://documentation.tgac.ac.uk/display/NANOOK/NanoOK
-
-Contact richard.leggett at earlham.ac.uk for more information or for comments/bug reports.
diff --git a/bin/get_contig_stats.pl b/bin/get_contig_stats.pl
deleted file mode 100755
index 11d9f27..0000000
--- a/bin/get_contig_stats.pl
+++ /dev/null
@@ -1,253 +0,0 @@
-#!/usr/bin/perl -w
-
-# Script: get_contig_stats.pl
-# Purpose: Calculate mean, N50, average etc. stats for a FASTA file
-# Author: Richard Leggett
-
-use warnings;
-use strict;
-use Getopt::Long;
-
-my $inputfile;
-my $longerthan;
-my %contig_lengths;
-my $type = 0;
-my $id = "";
-my $contig_length = 0;
-my $total_length = 0;
-my $shortest;
-my $longest;
-my $cumulative = 0;
-my $counter = 0;
-my $n50;
-my $n50count;
-my $n90;
-my $n90count;
-my @lengths;
-my @length_counts;
-my $help_requested;
-my $is_fasta;
-my $is_fastq;
-my $histogram;
-my $length_file;
-my $length_fh;
-my %hist_counts;
-
-&GetOptions(
-'i|input:s' => \$inputfile,
-'g|histogram:s' => \$histogram,
-'h|help' => \$help_requested,
-'l|longerthan:s' => \$longerthan,
-'a|fasta' => \$is_fasta,
-'q|fastq' => \$is_fastq,
-'r|lengthfile:s' => \$length_file
-);
-
-if (defined $help_requested) {
- print "\nGet contig stats on a FASTA file.\n\n";
- print "Usage: get_contig_stats.pl <-f filename> [-l lengths]\n\n";
- print "Options:\n";
- print " -i | -input input file\n";
- print " -a | -fasta input file is FASTA (default)\n";
- print " -g | -histogram filename to output length histogram\n";
- print " -q | -fastq input file is FASTQ\n";
- print " -r | -lengthfile filename to output lengths\n";
- print " -l | -longerthan list of comma separated lengths for which you wish\n";
- print " to know number of contigs >= to - eg. 76,151\n\n";
-
- exit;
-}
-
-die "You must specify -input or -i\n" if not defined $inputfile;
-
-$is_fasta = 1 if not defined $is_fastq;
-
-if (defined $length_file) {
- open($length_fh, ">".$length_file) or die "Can't open $length_file\n";
-}
-
-if (defined $longerthan) {
- @lengths = split(/,/, $longerthan);
- for (my $i=0; $i<@lengths; $i++) {
- $length_counts[$i] = 0;
- }
-}
-
-if (defined $is_fastq) {
- read_fastq($inputfile);
-} else {
- read_fasta($inputfile);
-}
-
-if (defined $length_fh) {
- close($length_fh);
-}
-
-foreach $id (sort {$contig_lengths{$b} <=> $contig_lengths{$a}} keys %contig_lengths)
-{
- my $contig_length = $contig_lengths{$id};
- $cumulative += $contig_length;
- $counter++;
-
- if (not defined $n50) {
- if ($cumulative >= ($total_length * 0.5)) {
- $n50 = $contig_length;
- $n50count = $counter;
- }
- }
-
- if (not defined $n90) {
- if ($cumulative >= ($total_length * 0.9)) {
- $n90 = $contig_length;
- $n90count = $counter;
- }
- }
-}
-
-my $mean = $cumulative / $counter;
-
-my $header_string="NumContigs\tTotalSum\tMeanLength\tShortest\tLongest\tN50Length\tN50Count\tN90Length\tN90Count";
-
-print "NumContigs:\t", $counter, "\n";
-print "TotalSum:\t", $cumulative, "\n";
-printf "MeanLength:\t%.2f\n", $mean;
-print "Shortest:\t", $shortest, "\n";
-print "Longest:\t", $longest, "\n";
-print "N50Length:\t", $n50, "\n";
-print "N50Count:\t", $n50count, "\n";
-print "N90Length:\t", $n90, "\n";
-print "N90Count:\t", $n90count, "\n";
-
-if (defined $longerthan) {
- for (my $i=0; $i<@lengths; $i++) {
- $header_string = $header_string."\tGE".$lengths[$i]."Count";
- print "GE",$lengths[$i],"Count:\t", $length_counts[$i], "\n";
- }
-}
-
-print "Headings:\t", $header_string, "\n";
-print "AllFields:\t", $counter, "\t", $cumulative, "\t";
-printf "%.2f\t", $mean;
-print $shortest, "\t", $longest, "\t", $n50, "\t", $n50count, "\t", $n90, "\t", $n90count;
-
-if (defined $longerthan) {
- for (my $i=0; $i<@lengths; $i++) {
- print "\t", $length_counts[$i];
- }
-}
-
-print "\n";
-
-if (defined $histogram) {
- output_histogram();
-}
-
-
-sub read_fasta
-{
- my $filename = $_[0];
- open(INPUTFILE, $filename) or die "Can't open $filename\n";
-
-
- while(<INPUTFILE>) {
- chomp(my $line = $_);
-
- if ($line =~ /^>(\S+)/) {
- if ($contig_length > 0) {
- store_length($id, $contig_length);
- }
-
- $contig_length = 0;
- $id = $1;
- } else {
- $contig_length += length($line);
- }
- }
-
- if ($contig_length > 0) {
- store_length($id, $contig_length);
- }
-
- close(INPUTFILE);
-}
-
-sub read_fastq
-{
- my $filename = $_[0];
- open(INPUTFILE, $filename) or die "Can't open $filename\n";
-
- while(<INPUTFILE>) {
- chomp(my $seq_header = $_);
- chomp(my $sequence = <INPUTFILE>);
- chomp(my $qual_header = <INPUTFILE>);
- chomp(my $qualities = <INPUTFILE>);
- my @fields = split(/ /, $seq_header);
- my $id = substr $fields[0], 1;
- my $contig_length = length($sequence);
-
- store_length($id, $contig_length);
- }
-
- close(INPUTFILE);
-}
-
-sub store_length
-{
- my $id = $_[0];
- my $contig_length = $_[1];
-
- if (defined $contig_lengths{$id}) {
- my $new_id = $id;
- my $counter = 0;
- do {
- $counter++;
- $new_id = $id."_duplicate_".$counter;
- } while (defined $contig_lengths{$new_id});
- $id = $new_id;
- print "Found duplicate ID - used new ID $id\n";
- }
-
- $contig_lengths{$id} = $contig_length;
- $total_length += $contig_length;
-
- if ((not defined $longest) || ($contig_length > $longest)) {
- $longest = $contig_length;
- }
-
- if ((not defined $shortest) || ($contig_length < $shortest)) {
- $shortest = $contig_length;
- }
-
- if (defined $longerthan) {
- for (my $i=0; $i<@lengths; $i++) {
- if ($contig_length >= $lengths[$i]) {
- $length_counts[$i]++;
- }
- }
- }
-
- if (defined $hist_counts{$contig_length}) {
- $hist_counts{$contig_length}++;
- } else {
- $hist_counts{$contig_length}=1;
- }
-
- if (defined $length_fh) {
- print $length_fh $id, "\t", $contig_length, "\n";
- }
-}
-
-sub output_histogram
-{
- open(my $output_fh, ">".$histogram) or die "Can't open $histogram\n";
-
- for (my $i=1; $i<=$longest; $i++) {
- if (defined $hist_counts{$i}) {
- print $output_fh $i, "\t", $hist_counts{$i}, "\n";
- } else {
- print $output_fh $i, "\t0\n";
- }
- }
-
- close($output_fh);
-}
diff --git a/bin/nanook b/bin/nanook
deleted file mode 100755
index 16fe060..0000000
--- a/bin/nanook
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/sh
-
-JAVA_ARGS="-Xmx2048m"
-
-if [ -z "$NANOOK_DIR" ] ; then
- echo "Error: You must set NANOOK_DIR before running."
- exit 1
-fi
-
-JARFILE=${NANOOK_DIR}/dist/NanoOK.jar
-
-if [ ! -f ${JARFILE} ] ; then
- echo "Error: Can't find NanoOK.jar - it needs to be inside the dist subdirectory of the directory pointed to by NANOOK_DIR which is currently ${NANOOK_DIR}"
- exit 1
-fi
-
-# If your library path (DYLD_LIBRARY_PATH on MAC) doesn't include HDF5 libraries,
-# you can manually set it here by adding a -Djava.library.path=/path/to/lib/dir
-# to the java command...
-
-exec java ${JAVA_ARGS} -jar ${JARFILE} "$@"
diff --git a/bin/nanook_get_read_stats.pl b/bin/nanook_get_read_stats.pl
deleted file mode 100755
index 06be937..0000000
--- a/bin/nanook_get_read_stats.pl
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/perl
-#
-# Program: nanotools_extract_reads
-# Author: Richard Leggett
-# Contact: richard.leggett at tgac.ac.uk
-
-use strict;
-use warnings;
-use Getopt::Long;
-
-my $sample;
-my $help_requested;
-my $basedir="/Users/leggettr/Documents/Projects/Nanopore/";
-
-&GetOptions(
-'b|basedir:s' => \$basedir,
-'s|sample:s' => \$sample,
-'h|help' => \$help_requested
-);
-
-print "\nnanotools_get_read_stats\n\n";
-
-if (defined $help_requested) {
- print "Get stats on read lengths.\n\n";
- print "Usage: nanotools_get_read_stats.pl <-s sample> [-b directory]\n\n";
- print "Options:\n";
- print " -s | -sample Sample name\n";
- print " -b | -basedir Base directory containing all sample directories\n";
- print "\n";
-
- exit;
-}
-
-die "You must specify a sample name" if not defined $sample;
-
-print "Base directory: $basedir\n";
-print "Sample: $sample\n";
-
-print "Merging template reads\n";
-system("find ${basedir}/${sample}/fasta/Template -name '*BaseCalled_Template.fasta' | xargs cat > ${basedir}/${sample}/fasta/all_Template.fasta");
-print "Merging complement reads\n";
-system("find ${basedir}/${sample}/fasta/Complement -name '*BaseCalled_Complement.fasta' | xargs cat > ${basedir}/${sample}/fasta/all_Complement.fasta");
-print "Merging 2D reads\n";
-system("find ${basedir}/${sample}/fasta/2D -name '*BaseCalled_2D.fasta' | xargs cat > ${basedir}/${sample}/fasta/all_2D.fasta");
-
-print "Generating stats for template reads\n";
-system("get_contig_stats.pl -i ${basedir}/${sample}/fasta/all_Template.fasta -a -g ${basedir}/${sample}/analysis/all_Template_fasta_hist.txt -r ${basedir}/${sample}/analysis/all_Template_lengths.txt -l 500,1000,1500,2000,2500,3000,3500,4000,4500,5000 > ${basedir}/${sample}/analysis/all_Template_stats.txt");
-print "Generating stats for complement reads\n";
-system("get_contig_stats.pl -i ${basedir}/${sample}/fasta/all_Complement.fasta -a -g ${basedir}/${sample}/analysis/all_Complement_fasta_hist.txt -r ${basedir}/${sample}/analysis/all_Complement_lengths.txt -l 500,1000,1500,2000,2500,3000,3500,4000,4500,5000 > ${basedir}/${sample}/analysis/all_Complement_stats.txt");
-print "Generating stats for 2D reads\n";
-system("get_contig_stats.pl -i ${basedir}/${sample}/fasta/all_2D.fasta -a -g ${basedir}/${sample}/analysis/all_2D_fasta_hist.txt -r ${basedir}/${sample}/analysis/all_2D_lengths.txt -l 500,1000,1500,2000,2500,3000,3500,4000,4500,5000 > ${basedir}/${sample}/analysis/all_2D_stats.txt");
-
-system("echo \"\" >> ${basedir}/${sample}/summary.txt");
-system("cat ${basedir}/${sample}/all_Template_stats.txt | grep 'Headings:' | sed 's/Headings:/ ReadType/' >> ${basedir}/${sample}/summary.txt");
-system("cat ${basedir}/${sample}/all_Template_stats.txt | grep 'AllFields:' | sed 's/AllFields:/ Template/' >> ${basedir}/${sample}/summary.txt");
-system("cat ${basedir}/${sample}/all_Complement_stats.txt | grep 'AllFields:' | sed 's/AllFields:/Complement/' >> ${basedir}/${sample}/summary.txt");
-system("cat ${basedir}/${sample}/all_2D_stats.txt | grep 'AllFields:' | sed 's/AllFields:/ 2D/' >> ${basedir}/${sample}/summary.txt");
-
-print "DONE\n";
\ No newline at end of file
diff --git a/bin/nanook_get_tracking.pl b/bin/nanook_get_tracking.pl
deleted file mode 100644
index 32297e5..0000000
--- a/bin/nanook_get_tracking.pl
+++ /dev/null
@@ -1,157 +0,0 @@
-#!/usr/bin/perl
-#
-# Program: nanotools_extract_reads
-# Author: Richard Leggett
-# Contact: richard.leggett at tgac.ac.uk
-
-use strict;
-use warnings;
-use Getopt::Long;
-
-my $sample;
-my $help_requested;
-my $basedir="/Users/leggettr/Documents/Projects/Nanopore/";
-my @channels;
-my @count_in_time;
-
-&GetOptions(
-'b|basedir:s' => \$basedir,
-'s|sample:s' => \$sample,
-'h|help' => \$help_requested
-);
-
-print "\nnanotools_extract_reads\n\n";
-
-if (defined $help_requested) {
- print "Get tracking information.\n\n";
- print "Usage: nanotools_get_tracking.pl <-s sample> [-b directory]\n\n";
- print "Options:\n";
- print " -s | -sample Sample name\n";
- print " -b | -basedir Base directory containing all sample directories\n";
- print "\n";
- print "Sample directories should be inside the base directory. Within each sample\n";
- print "directory, there should be a fast5 directory containing the input files.\n";
- print "\n";
-
- exit;
-}
-
-die "You must specify a sample name" if not defined $sample;
-
-print "Base directory: $basedir\n";
-print "Sample: $sample\n";
-
-my $in_dir = $basedir."/".$sample."/fast5";
-my $out_fasta;
-my $out_fastq;
-
-
-if ((-d $in_dir."/pass") && (-d $in_dir."/fail")) {
- print "Got pass/fail directory\n";
- process_directory($in_dir."/pass");
- #process_directory($in_dir."/fail");
-} else {
- print "Got all-in-one directory\n";
- process_directory($in_dir);
-}
-
-print "\nAnalysing...\n\n";
-
-sub process_directory {
- my $input_dir = $_[0];
- my $total_reads = 0;
- my $total_2d = 0;
- my $total_template = 0;
- my $total_complement = 0;
- my $datatype_2d = "\/Analyses\/Basecall\_2D\_000\/BaseCalled\_2D\/Fastq";
- my $datatype_template = "\/Analyses\/Basecall\_2D\_000\/BaseCalled\_template\/Fastq";
- my $datatype_complement = "\/Analyses\/Basecall\_2D\_000\/BaseCalled\_complement\/Fastq";
-
- print "Processing reads\n";
- print " In: ", $input_dir, "\n";
-
- opendir(DIR, $input_dir) or die $!;
- while (my $file = readdir(DIR)) {
- next unless ($file =~ m/\.fast5$/);
- my $channel;
- my $template_time;
- my $complement_time;
-
- print "Extracting $file\n";
-
- if ($file =~ /_ch(\d+)_/) {
- $channel = $1;
- }
-
- my $pathname = "${input_dir}/${file}";
- my @dump = `h5dump -a /Analyses/Basecall_2D_000/BaseCalled_template/Events/start_time ${pathname}`;
- for (my $i=0; $i<@dump; $i++) {
- if ($dump[$i] =~ /\(0\)\: (\S+)/) {
- $template_time = $1;
- }
- }
-
- #@dump = `h5dump -a /Analyses/Basecall_2D_000/BaseCalled_complement/Events/start_time ${pathname}`;
- #for (my $i=0; $i<@dump; $i++) {
- # if ($dump[$i] =~ /\(0\)\: (\S+)/) {
- # $complement_time = $1;
- # }
- #}
-
- #if ((defined $channel) && (defined $template_time)) {
- # print $channel, "\t", $template_time, "\n";
- #}
-
- if (defined $channels[$channel]) {
- $channels[$channel] = $channels[$channel].",".$template_time;
- } else {
- $channels[$channel] = $template_time;
- }
-
- }
- closedir(DIR);
-}
-
-print "Channel\tTime\tDifference\tMean\n";
-
-for (my $c=0; $c<512; $c++) {
- if (defined $channels[$c]) {
- my @times = split(/,/, $channels[$c]);
- my @sorted_times = sort {$a <=> $b} @times;
- my $total;
- my $count = 0;
-
- for (my $i=0; $i<@times; $i++) {
- my $difference = 0;
- my $mean = 0;
-
- if ($i > 0) {
- $difference = $sorted_times[$i] - $sorted_times[$i-1];
- $total += $difference;
- $mean = $total / $i;
- }
-
- if ($sorted_times[$i] < (60*60*12)) {
- $count++;
- }
-
- print $c, "\t", $sorted_times[$i], "\t", $difference, "\t", $mean, "\n";
- }
-
- $count_in_time[$c] = $count;
- }
-}
-
-print "\n";
-print "Channel\tCount\n";
-my $total = 0;
-my $n = 0;
-for (my $c=0; $c<512; $c++) {
- if (defined $channels[$c]) {
- print $c, "\t", $count_in_time[$c], "\n";
- $total += $count_in_time[$c];
- $n++;
- }
-}
-
-print "\nMean: ".($total / $n)."\n";
\ No newline at end of file
diff --git a/bin/nanook_plot_comparison.R b/bin/nanook_plot_comparison.R
deleted file mode 100755
index e3cd79d..0000000
--- a/bin/nanook_plot_comparison.R
+++ /dev/null
@@ -1,100 +0,0 @@
-library(ggplot2)
-library(scales)
-library(grid)
-library(gridExtra)
-library(reshape2)
-
-# Filenames
-args <- commandArgs(TRUE)
-analysisdir <- args[1];
-graphsdir <- args[2];
-samplelist <- args[3];
-outdir <- args[4];
-format <- args[5];
-
-types = c("2D", "Template", "Complement");
-colours = c("#68B5B9", "#CF746D", "#91A851");
-
-if (format=="png") {
- textsize <- c(40)
- pointsize <- c(5)
- pointalpha <- c(0.5)
- pointshape <- c(1)
- pointwidth <- c(3)
- xvjust <- c(1.2)
- yvjust <- c(1.8)
-} else {
- textsize <- c(10)
- pointsize <- c(2)
- pointalpha <-c(0.4)
- pointshape <- c(1)
- pointwidth <- c(1)
- xvjust <- c(0.2)
- yvjust <- c(0.8)
-}
-
-
-data_samples = read.table(samplelist, header=TRUE);
-
-for (t in 1:3) {
- colourcode = colours[t];
-
- # Gather data for box plots of length
- df <- data.frame();
- listOfDataFrames <- NULL;
- count <- c(1);
- for (i in 1:nrow(data_samples)) {
- type = types[t];
- sampledir <- data_samples[i, "SampleDir"];
- filename_lengths <- paste(sampledir, "/", analysisdir, "/", "all_",type,"_lengths.txt", sep="");
- data_lengths = read.table(filename_lengths, col.name=c("name", "length"));
- #df$size <- data_lengths$length;
- thisid <- data_samples[i, "SampleName"];
- #paste(data_samples[i, "SampleName"], type, sep="_");
- thisid
- listOfDataFrames[[count]] <- data.frame(Sample=thisid, Length=data_lengths$length);
- count <- count + 1;
- }
-
- # Read lengths
- imagewidth <- 1 + (nrow(data_samples) * 0.5);
- df <- do.call("rbind", listOfDataFrames);
- output_file <- paste(graphsdir, "/", type, "_lengths.pdf", sep="");
- message(output_file);
- pdf(output_file, width=imagewidth, height = 4);
- print(ggplot(df, aes(x=Sample, y=Length, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]));
- garbage <- dev.off();
-
- # Bar stacked plot of mapping
- imagewidth <- 1 + (nrow(data_samples) * 0.5) + 1.5;
- filename_maps <- paste(outdir, "/", type,"_map_summary.txt", sep="");
- message(filename_maps)
- #filename_maps <- c("~/temp/2D_map_summary.txt");
- data_maps = read.table(filename_maps, header=TRUE);
- df <- melt(data_maps, id.var="Sample")
- output_file <- paste(graphsdir, "/", type, "_maps.pdf", sep="");
- message(output_file);
- pdf(output_file, width=imagewidth, height = 4);
- print(ggplot(df, aes(x = Sample, y = value, fill = variable)) + geom_bar(stat = "identity") + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("%"));
- garbage <- dev.off();
-
- imagewidth <- 1 + (nrow(data_samples) * 0.5);
-
- # Number of reads
- filename_comparison <- paste(outdir, "/", type,"_comparison.txt", sep="");
- data_comparison = read.table(filename_comparison, header=TRUE);
- output_file <- paste(graphsdir, "/", type, "_number_of_reads.pdf", sep="");
- message(output_file);
- pdf(output_file, width=imagewidth, height = 4);
- print(ggplot(data_comparison, aes(x=data_comparison$Name, y=data_comparison$NumReads)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Sample") + ylab("Number of reads") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-x [...]
- garbage <- dev.off();
-
- # Total bases
- data_comparison = read.table(filename_comparison, header=TRUE);
- output_file <- paste(graphsdir, "/", type, "_total_bases.pdf", sep="");
- message(output_file);
- pdf(output_file, width=imagewidth, height = 4);
- print(ggplot(data_comparison, aes(x=data_comparison$Name, y=data_comparison$TotalBases)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Sample") + ylab("Total bases") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvj [...]
- garbage <- dev.off();
-
-}
diff --git a/bin/nanook_plot_comparison_reference.R b/bin/nanook_plot_comparison_reference.R
deleted file mode 100755
index 103ab22..0000000
--- a/bin/nanook_plot_comparison_reference.R
+++ /dev/null
@@ -1,215 +0,0 @@
-library(ggplot2)
-library(scales)
-library(grid)
-library(gridExtra)
-
-# Filenames
-args <- commandArgs(TRUE)
-analysisdir <- args[1];
-graphsdir <- args[2];
-samplelist <- args[3];
-outdir <- args[4];
-reference <- args[5];
-format <- args[6];
-
-types = c("2D", "Template", "Complement");
-colours = c("#68B5B9", "#CF746D", "#91A851");
-
-if (format=="png") {
- textsize <- c(40)
- pointsize <- c(5)
- pointalpha <- c(0.5)
- pointshape <- c(1)
- pointwidth <- c(3)
- xvjust <- c(1.2)
- yvjust <- c(1.8)
-} else {
- textsize <- c(10)
- pointsize <- c(2)
- pointalpha <-c(0.4)
- pointshape <- c(1)
- pointwidth <- c(1)
- xvjust <- c(0.2)
- yvjust <- c(0.8)
-}
-
-
-data_samples = read.table(samplelist, header=TRUE);
-
-imagewidth <- 1 + nrow(data_samples) * 0.5;
-
-# Query identity
-for (t in 1:3) {
- df <- data.frame();
- listOfDataFrames <- NULL;
- count <- c(1);
- for (i in 1:nrow(data_samples)) {
- type = types[t];
- sampledir <- data_samples[i, "SampleDir"];
- filename_data <- paste(sampledir, "/", analysisdir, "/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
- message(filename_data);
- if (file.exists(filename_data)) {
- data_field = read.table(filename_data, header=TRUE);
- message(nrow(data_field));
- if (nrow(data_field) > 0) {
- thisid <- data_samples[i, "SampleName"];
- listOfDataFrames[[count]] <- data.frame(Sample=thisid, Variable=data_field$QueryPercentIdentity);
- count <- count + 1;
- }
- }
- }
-
- df <- do.call("rbind", listOfDataFrames);
- output_file <- paste(graphsdir, "/", reference, "_", type, "_query_identity.pdf", sep="");
- message(output_file);
- pdf(output_file, width=imagewidth, height = 4);
- print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Read identity %"));
- garbage <- dev.off();
-
- output_file <- paste(graphsdir, "/", reference, "_", type, "_query_identity_zoom.pdf", sep="");
- message(output_file);
- pdf(output_file, width=imagewidth, height = 4);
- print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Read identity %") + scale_y_continuous(limits=c(60, 100)));
- garbage <- dev.off();
-}
-
-# Query GC
-for (t in 1:3) {
- df <- data.frame();
- listOfDataFrames <- NULL;
- count <- c(1);
- for (i in 1:nrow(data_samples)) {
- type = types[t];
- sampledir <- data_samples[i, "SampleDir"];
- filename_data <- paste(sampledir, "/", analysisdir ,"/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
- if (file.exists(filename_data)) {
- data_field = read.table(filename_data, header=TRUE);
- message(nrow(data_field));
- if (nrow(data_field) > 0) {
- thisid <- data_samples[i, "SampleName"];
- listOfDataFrames[[count]] <- data.frame(Sample=thisid, Variable=data_field$QueryGC);
- count <- count + 1;
- }
- }
- }
-
- df <- do.call("rbind", listOfDataFrames);
- output_file <- paste(graphsdir, "/", reference, "_", type, "_query_gc.pdf", sep="");
- message(output_file);
- pdf(output_file, width=imagewidth, height = 4);
- print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Read GC %"));
- garbage <- dev.off();
-}
-
-# Best perfect kmer
-for (t in 1:3) {
- df <- data.frame();
- listOfDataFrames <- NULL;
- count <- c(1);
- for (i in 1:nrow(data_samples)) {
- type = types[t];
- sampledir <- data_samples[i, "SampleDir"];
- filename_data <- paste(sampledir, "/", analysisdir, "/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
- if (file.exists(filename_data)) {
- data_field = read.table(filename_data, header=TRUE);
- if (nrow(data_field) > 0) {
- thisid <- data_samples[i, "SampleName"];
- listOfDataFrames[[count]] <- data.frame(Sample=thisid, Variable=data_field$LongestPerfectKmer);
- count <- count + 1;
- }
- }
- }
-
- df <- do.call("rbind", listOfDataFrames);
- output_file <- paste(graphsdir, "/", reference, "_", type, "_best_perfect_kmer.pdf", sep="");
- message(output_file);
- pdf(output_file, width=imagewidth, height = 4);
- print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Best perfect kmer"));
- garbage <- dev.off();
-}
-
-#PercentQueryAligned
-for (t in 1:3) {
- df <- data.frame();
- listOfDataFrames <- NULL;
- count <- c(1);
- for (i in 1:nrow(data_samples)) {
- type = types[t];
- sampledir <- data_samples[i, "SampleDir"];
- filename_data <- paste(sampledir, "/", analysisdir, "/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
- if (file.exists(filename_data)) {
- data_field = read.table(filename_data, header=TRUE);
- if (nrow(data_field) > 0) {
- thisid <- data_samples[i, "SampleName"];
- listOfDataFrames[[count]] <- data.frame(Sample=thisid, Variable=data_field$PercentQueryAligned);
- count <- count + 1;
- }
- }
- }
-
- df <- do.call("rbind", listOfDataFrames);
- output_file <- paste(graphsdir, "/", reference, "_", type, "_percent_query_aligned.pdf", sep="");
- message(output_file);
- pdf(output_file, width=imagewidth, height = 4);
- print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("% read aligned"));
- garbage <- dev.off();
-
- output_file <- paste(graphsdir, "/", reference, "_", type, "_percent_query_aligned_zoom.pdf", sep="");
- message(output_file);
- pdf(output_file, width=imagewidth, height = 4);
- print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("% read aligned") + scale_y_continuous(limits=c(75, 100)));
- garbage <- dev.off();
-}
-
-#AlignmentSize
-for (t in 1:3) {
- df <- data.frame();
- listOfDataFrames <- NULL;
- count <- c(1);
- for (i in 1:nrow(data_samples)) {
- type = types[t];
- sampledir <- data_samples[i, "SampleDir"];
- filename_data <- paste(sampledir, "/", analysisdir, "/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
- if (file.exists(filename_data)) {
- data_field = read.table(filename_data, header=TRUE);
- if (nrow(data_field) > 0) {
- thisid <- data_samples[i, "SampleName"];
- listOfDataFrames[[count]] <- data.frame(Sample=thisid, Variable=data_field$AlignmentSize);
- count <- count + 1;
- }
- }
- }
-
- df <- do.call("rbind", listOfDataFrames);
- output_file <- paste(graphsdir, "/", reference, "_", type, "_alignment_size.pdf", sep="");
- message(output_file);
- pdf(output_file, width=imagewidth, height = 4);
- print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Alignment size"));
- garbage <- dev.off();
-}
-
-#AlignmentPercentIdentity
-#for (t in 1:3) {
-# df <- data.frame();
-# listOfDataFrames <- NULL;
-# count <- c(1);
-# for (i in 1:nrow(data_samples)) {
-# type = types[t];
-# sampledir <- data_samples[i, "SampleDir"];
-# filename_data <- paste(sampledir, "/analysis/", reference, "/", reference, "_",type,"_alignments.txt", sep="");
-# if (file.exists(filename_data)) {
-# data_field = read.table(filename_data, header=TRUE);
-# thisid <- data_samples[i, "SampleName"];
-# message(thisid);
-# listOfDataFrames[[count]] <- data.frame(Sample=thisid, Variable=data_field$AlignmentPercentIdentity);
-# count <- count + 1;
-# }
-# }
-#
-# df <- do.call("rbind", listOfDataFrames);
-# output_file <- paste(outdir, "/graphs/", reference, "_", type, "_alignment_identity.pdf", sep="");
-# message(output_file);
-# pdf(output_file, width=imagewidth, height = 4);
-# print(ggplot(df, aes(x=Sample, y=Variable, fill=Sample)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Alignment identity %"));
-# garbage <- dev.off();
-#}
diff --git a/bin/nanook_plot_lengths.R b/bin/nanook_plot_lengths.R
deleted file mode 100755
index 7c237c2..0000000
--- a/bin/nanook_plot_lengths.R
+++ /dev/null
@@ -1,87 +0,0 @@
-library(ggplot2)
-library(scales)
-library(grid)
-
-args <- commandArgs(TRUE)
-analysisdir <- args[1];
-graphsdir <- args[2];
-format <- args[3];
-
-types = c("2D", "Template", "Complement");
-colours = c("#68B5B9", "#CF746D", "#91A851");
-
-if (format=="png") {
- textsize <- c(40)
- pointsize <- c(5)
- pointalpha <- c(0.5)
- pointshape <- c(1)
- pointwidth <- c(3)
- xvjust <- c(1.2)
- yvjust <- c(1.8)
-} else {
- textsize <- c(14)
- pointsize <- c(2)
- pointalpha <- c(0.4)
- pointshape <- c(1)
- pointwidth <- c(1)
- xvjust <- c(0.2)
- yvjust <- c(0.8)
-}
-
-for (t in 1:3) {
- type = types[t];
- colourcode = colours[t];
- #cat(type, " ", colourcode, "\n");
-
- # Count vs length
- filename_lengths <- paste(analysisdir, "/", "all_",type,"_lengths.txt", sep="");
- filename_kmers <- paste(analysisdir, "/", "all_",type,"_kmers.txt", sep="");
-
- if (file.exists(filename_lengths)) {
- data_lengths = read.table(filename_lengths, col.name=c("name", "length"))
-
- if (nrow(data_lengths) > 1) {
- if (format=="png") {
- lengths_png <- paste(graphsdir, "/", "all_",type,"_lengths.png", sep="");
- png(lengths_png, width=1200, height=800)
- print(ggplot(data_lengths, aes(x=data_lengths$length), xlab="Length") + geom_histogram(binwidth=1000, fill=colourcode) + xlab("Length") +ylab("Count") + scale_x_continuous(limits=c(0, 35000)) + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
- } else {
- lengths_pdf <- paste(graphsdir, "/", "all_",type,"_lengths.pdf", sep="");
- pdf(lengths_pdf, width=6, height=4)
- print(ggplot(data_lengths, aes(x=data_lengths$length), xlab="Length") + geom_histogram(binwidth=1000, fill=colourcode) + xlab("Length") +ylab("Count") + scale_x_continuous(limits=c(0, 35000)) + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
- }
- garbage <- dev.off()
- } else {
- cat("WARNING: No data in ", filename_lengths, "\n");
- }
- } else {
- cat("WARNING: Couldn't find", filename_lengths, "\n");
- }
-
- # Number of perfect 21mers verses length scatter
- if (file.exists(filename_kmers)) {
- data_kmers = try(read.table(filename_kmers, header=TRUE), silent=TRUE)
-
- if (inherits(data_kmers, "try-error")) {
- cat("WARNING: Couldn't read", filename_kmers,"\n");
- } else {
- if (nrow(data_kmers) > 1) {
- if (format=="png") {
- kmers_png <- paste(graphsdir, "/", "all_",type,"_21mers.png", sep="");
- png(kmers_png, width=1200, height=800)
- print(ggplot(data_kmers, aes(x=data_kmers$Length, y=data_kmers$nk21), xlab="Read length") + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Number of perfect 21mers") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(breaks=seq(0, 40000, 4000)) + scale_y_continuous(breaks=seq(0, 400, 20)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_t [...]
- #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth))
- } else {
- kmers_pdf <- paste(graphsdir, "/", "all_",type,"_21mers.pdf", sep="");
- pdf(kmers_pdf, width=6, height=4)
- print(ggplot(data_kmers, aes(x=data_kmers$Length, y=data_kmers$nk21), xlab="Read length") + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Number of perfect 21mers") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(breaks=seq(0, 40000, 4000)) + scale_y_continuous(breaks=seq(0, 400, 20)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjus [...]
- }
- garbage <- dev.off()
- } else {
- cat("WARNING: No data in ", filename_kmers, "\n");
- }
- }
- } else {
- cat("WARNING: Couldn't find", filename_kmers, "\n");
- }
-}
diff --git a/bin/nanook_plot_reference.R b/bin/nanook_plot_reference.R
deleted file mode 100755
index 43ef583..0000000
--- a/bin/nanook_plot_reference.R
+++ /dev/null
@@ -1,475 +0,0 @@
-library(ggplot2)
-library(scales)
-library(grid)
-library(gridExtra)
-
-# Filenames
-args <- commandArgs(TRUE)
-analysisdir <- args[1];
-graphsdir <- args[2];
-refid <- args[3];
-format <- args[4];
-
-roundUp <- function(x,to=10)
-{
- to*(x%/%to + as.logical(x%%to))
-}
-
-types = c("Template", "Complement", "2D");
-colours = c("#CF746D", "#91A851", "#68B5B9");
-
-if (format=="png") {
- textsize <- c(40)
- pointsize <- c(5)
- pointalpha <- c(0.5)
- pointshape <- c(1)
- pointwidth <- c(3)
- xvjust <- c(1.2)
- yvjust <- c(1.8)
-} else {
- textsize <- c(14)
- pointsize <- c(2)
- pointalpha <-c(0.4)
- pointshape <- c(1)
- pointwidth <- c(1)
- xvjust <- c(0.2)
- yvjust <- c(0.8)
-}
-
-# Plot GC% vs position
-data_gc_filename <- paste(analysisdir, "/", refid, "/", refid, "_gc.txt", sep="");
-
-if (file.exists(data_gc_filename)) {
- data_gc = read.table(data_gc_filename, col.name=c("Position", "Coverage"))
-
- if (nrow(data_gc) > 1) {
- if (format=="png") {
- png_gc <- paste(graphsdir, "/", refid, "/", refid, "_gc.png", sep="");
- cat("Writing", png_gc, "\n");
- png(png_gc, width=1600, height=400)
- print(ggplot(data_gc, aes(x=data_gc$Position, y=data_gc$Coverage)) + geom_line(color="black") + ggtitle("GC content") + theme(text = element_text(size=textsize)) + xlab("Position") + ylab("GC %") + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
- } else {
- pdf_gc <- paste(graphsdir, "/", refid, "/", refid, "_gc.pdf", sep="");
- cat("Writing", pdf_gc, "\n");
- pdf(pdf_gc, width=16, height=4)
- print(ggplot(data_gc, aes(x=data_gc$Position, y=data_gc$Coverage)) + geom_line(color="black") + ggtitle("GC content") + theme(text = element_text(size=textsize)) + xlab("Position") + ylab("GC %") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text( [...]
- }
- garbage <- dev.off()
- } else {
- cat("WARNING: No data in ", data_gc_filename, "\n");
- }
-} else {
- cat("WARNING: Couldn't find", data_gc_filename, "\n");
-}
-
-listOfDataFrames <- NULL;
-count <-c(1);
-
-# Work out longest kmer
-cum_maxk <- 0;
-maxk <- 0;
-for (t in 1:3) {
- type = types[t];
- data_perfect_cumulative_filename <- paste(analysisdir, "/", refid, "/", refid, "_",type,"_cumulative_perfect_kmers.txt", sep="");
- if (file.exists(data_perfect_cumulative_filename)) {
- cat("Reading", data_perfect_cumulative_filename, "\n");
- data_perfect_cumulative = read.table(data_perfect_cumulative_filename, col.name=c("Size", "n", "Perfect"))
- cat("Read", nrow(data_perfect_cumulative), "rows\n");
-
- if (nrow(data_perfect_cumulative) > 0) {
- for (i in 1:length(data_perfect_cumulative$Perfect)) {
- if (data_perfect_cumulative$Size[i] > maxk) {
- maxk <- data_perfect_cumulative$Size[i];
- }
- if (data_perfect_cumulative$Size[i] > cum_maxk) {
- if (data_perfect_cumulative$Perfect[i] > 5) {
- cum_maxk <- data_perfect_cumulative$Size[i];
- }
- }
- }
- } else {
- cat("WARNING: No data in ", data_perfect_cumulative_filename, "\n");
- }
- }
-}
-maxk <- maxk + 10;
-cum_maxk <- roundUp(cum_maxk);
-cat("max k", maxk, "\n");
-cat("cum_max k", cum_maxk, "\n");
-
-
-for (t in 1:3) {
- type = types[t];
- colourcode = colours[t];
-
- # Plot coverage vs position
- data_coverage_filename <- paste(analysisdir, "/", refid, "/", refid, "_",type,"_coverage.txt", sep="");
- cat("Reading", data_coverage_filename, "\n");
-
- if (file.exists(data_coverage_filename)) {
- data_coverage = read.table(data_coverage_filename, col.name=c("Position", "Coverage"))
- if (nrow(data_coverage) > 0) {
- if (format=="png") {
- png_coverage <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_coverage.png", sep="");
- cat("Writing", png_coverage, "\n");
- png(png_coverage, width=1600, height=400)
- print(ggplot(data_coverage, aes(x=data_coverage$Position, y=data_coverage$Coverage)) + geom_line(color=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Position") + ylab("Mean coverage") + expand_limits(y = 0) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
- } else {
- pdf_coverage <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_coverage.pdf", sep="");
- cat("Writing", pdf_coverage, "\n");
- pdf(pdf_coverage, width=16, height=4)
- print(ggplot(data_coverage, aes(x=data_coverage$Position, y=data_coverage$Coverage)) + geom_line(color=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Position") + ylab("Mean coverage") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + expand_limits(y = 0) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=ele [...]
- }
- garbage <- dev.off()
- } else {
- cat("WARNING: No data in ", data_coverage_filename, "\n");
- }
- } else {
- cat("WARNING: Couldn't find", data_coverage_filename, "\n");
- }
-
- # Plot % reads with perfect kmer vs kmer size
- data_perfect_cumulative_filename <- paste(analysisdir, "/", refid, "/", refid, "_",type,"_cumulative_perfect_kmers.txt", sep="");
-
- if (file.exists(data_perfect_cumulative_filename)) {
- cat("Reading", data_perfect_cumulative_filename, "\n");
- data_perfect_cumulative = read.table(data_perfect_cumulative_filename, col.name=c("Size", "n", "Perfect"))
- if (nrow(data_perfect_cumulative) > 0) {
- if (format=="png") {
- png_perfect_cumulative <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_cumulative_perfect_kmers.png", sep="");
- cat("Writing", png_perfect_cumulative, "\n");
- png(png_perfect_cumulative, width=1200, height=800)
- print(ggplot(data_perfect_cumulative, aes(x=data_perfect_cumulative$Size, y=data_perfect_cumulative$Perfect)) + geom_bar(stat="identity", width=0.7, fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("kmer size") + ylab("% reads with perfect kmer") + scale_x_continuous(limits=c(0, cum_maxk), breaks=seq(0,cum_maxk,by=50)) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.titl [...]
- } else {
- pdf_perfect_cumulative <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_cumulative_perfect_kmers.pdf", sep="");
- cat("Writing", pdf_perfect_cumulative, "\n");
- pdf(pdf_perfect_cumulative, width=6, height=4)
- print(ggplot(data_perfect_cumulative, aes(x=data_perfect_cumulative$Size, y=data_perfect_cumulative$Perfect)) + geom_bar(stat="identity", width=0.7, fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("kmer size") + ylab("% reads with perfect kmer") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + scale_x_continuous(limits=c(0, cum_maxk) [...]
- }
- garbage <- dev.off()
- } else {
- cat("WARNING: No data in ", data_perfect_cumulative_filename, "\n");
- }
- } else {
- cat("WARNING: Couldn't find", data_perfect_cumulative_filename, "\n");
- }
-
- # Plot %reads vs best perfect kmer
- #data_perfect_best_filename <- paste(analysisdir, "/", refid, "/", refid, "_",type,"_best_perfect_kmers.txt", sep="");
- #if (format=="png") {
- # png_perfect_best <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_best_perfect_kmers.png", sep="");
- # png(png_perfect_best, width=1200, height=800)
- # data_perfect_best = read.table(data_perfect_best_filename, col.name=c("Size", "n", "Perfect"))
- # print(ggplot(data_perfect_best, aes(x=data_perfect_best$Size, y=data_perfect_best$Perfect)) + geom_bar(stat="identity", width=0.7, fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Best perfect kmer") + ylab("% reads") + scale_x_continuous(limits=c(0, 140)) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vju [...]
- #} else {
- # pdf_perfect_best <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_best_perfect_kmers.pdf", sep="");
- # pdf(pdf_perfect_best, width=6, height=4)
- # data_perfect_best = read.table(data_perfect_best_filename, col.name=c("Size", "n", "Perfect"))
- # print(ggplot(data_perfect_best, aes(x=data_perfect_best$Size, y=data_perfect_best$Perfect)) + geom_bar(stat="identity", width=0.7, fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Best perfect kmer") + ylab("% reads") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + scale_x_continuous(limits=c(0, 140)) + theme(text = element_text(size=texts [...]
- #}
- #garbage <- dev.off()
-
- # ========== Indels files ==========
-
- # Insertions
- data_insertions_filename <- paste(analysisdir, "/", refid, "/", refid, "_",type,"_insertions.txt", sep="");
- cat("Reading", data_insertions_filename, "\n");
- if (file.exists(data_insertions_filename)) {
- data_insertions = read.table(data_insertions_filename, col.name=c("Size", "Percent"))
- if (nrow(data_insertions) > 0) {
- if (format=="png") {
- png_insertions <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_insertions.png", sep="");
- cat("Writing", png_insertions, "\n");
- png(png_insertions, width=1200, height=800)
- print(ggplot(data_insertions, aes(x=data_insertions$Size, y=data_insertions$Percent)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Insertion size") + ylab("%") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
- } else {
- pdf_insertions <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_insertions.pdf", sep="");
- cat("Writing", pdf_insertions, "\n");
- pdf(pdf_insertions, width=6, height=4)
- print(ggplot(data_insertions, aes(x=data_insertions$Size, y=data_insertions$Percent)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Insertion size") + ylab("%") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vju [...]
- }
- garbage <- dev.off()
- } else {
- cat("WARNING: No data in ", data_insertions_filename, "\n");
- }
- } else {
- cat("WARNING: Couldn't find", data_insertions_filename, "\n");
- }
-
- # Deletions
- data_deletions_filename <- paste(analysisdir, "/", refid, "/", refid, "_",type,"_deletions.txt", sep="");
- cat("Reading", data_deletions_filename, "\n");
- if (file.exists(data_deletions_filename)) {
- data_deletions = read.table(data_deletions_filename, col.name=c("Size", "Percent"))
- if (nrow(data_deletions) > 0) {
- if (format=="png") {
- png_deletions <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_deletions.png", sep="");
- cat("Writing", png_deletions, "\n");
- png(png_deletions, width=1200, height=800)
- print(ggplot(data_deletions, aes(x=data_deletions$Size, y=data_deletions$Percent)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Deletion size") + ylab("%") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
- } else {
- pdf_deletions <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_deletions.pdf", sep="");
- cat("Writing", pdf_deletions, "\n");
- pdf(pdf_deletions, width=6, height=4)
- print(ggplot(data_deletions, aes(x=data_deletions$Size, y=data_deletions$Percent)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Deletion size") + ylab("%") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=- [...]
- }
- garbage <- dev.off()
- } else {
- cat("WARNING: No data in ", data_deletions_filename, "\n");
- }
- } else {
- cat("WARNING: Couldn't find", data_deletions_filename, "\n");
- }
-
- # ========== Alignments file ==========
-
- input_filename <- paste(analysisdir, "/", refid, "/", refid, "_",type,"_alignments.txt", sep="");
- cat("Reading", input_filename, "\n");
-
- if (file.exists(input_filename)) {
- data_alignments = read.table(input_filename, header=TRUE);
- if (nrow(data_alignments) > 1) {
- # Length vs Identity histograms
- if (format=="png") {
- identity_hist_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_hist.png", sep="")
- cat("Writing", identity_hist_png, "\n");
- png(identity_hist_png, width=1200, height=800)
- print(ggplot(data_alignments, aes(x=data_alignments$QueryPercentIdentity)) + geom_histogram(fill=colourcode) + xlab("Read identity %") +ylab("Count") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) )
- } else {
- identity_hist_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_hist.pdf", sep="")
- cat("Writing", identity_hist_pdf, "\n");
- pdf(identity_hist_pdf, width=6, height=4)
- print(ggplot(data_alignments, aes(x=data_alignments$QueryPercentIdentity)) + geom_histogram(fill=colourcode) + xlab("Read identity %") +ylab("Count") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
- }
- garbage <- dev.off()
-
- # GC histogram
- if (format=="png") {
- identity_hist_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_GC_hist.png", sep="")
- cat("Writing", identity_hist_png, "\n");
- png(identity_hist_png, width=1200, height=800)
- print(ggplot(data_alignments, aes(x=data_alignments$QueryGC)) + geom_histogram(fill=colourcode, binwidth=1) + xlab("GC %") +ylab("Read count") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) + scale_x_continuous(limits=c(0, 100)) )
- } else {
- identity_hist_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_GC_hist.pdf", sep="")
- cat("Writing", identity_hist_pdf, "\n");
- pdf(identity_hist_pdf, width=6, height=4)
- print(ggplot(data_alignments, aes(x=data_alignments$QueryGC)) + geom_histogram(fill=colourcode, binwidth = 1) + xlab("GC %") +ylab("Read count") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) + scale_x_continuous(limits=c(0, 100)))
- }
- garbage <- dev.off()
-
- # Identity vs Length Scatter plots
- if (format=="png") {
- identity_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_scatter.png", sep="");
- cat("Writing", identity_scatter_pdf, "\n");
- png(identity_scatter_pdf, width=1200, height=800)
- print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vju [...]
- #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth))
- } else {
- identity_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_scatter.pdf", sep="");
- cat("Writing", identity_scatter_pdf, "\n");
- pdf(identity_scatter_pdf, width=6, height=4)
- print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
- }
- garbage <- dev.off()
-
- # Identity vs Length heatmap
- if (format=="png") {
- identity_heatmap_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_heatmap.png", sep="");
- cat("Writing", identity_heatmap_png, "\n");
- png(identity_heatmap_png, width=1200, height=800)
- print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_bin2d(drop=TRUE, binwidth=c(500,2)) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=(textsize*0.75))) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) + scale_fill_gradient [...]
- #limits=c(0,50), breaks=seq(0, 40, by=10), colours=rainbow(4)
- } else {
- identity_heatmap_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_heatmap.pdf", sep="");
- cat("Writing", identity_heatmap_pdf, "\n");
- pdf(identity_heatmap_pdf, width=6, height=4)
- print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_bin2d(drop=TRUE, binwidth=c(500,2)) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=(textsize*0.75))) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))+ scale_fill_gradientn [...]
- }
- garbage <- dev.off()
-
- # Identity vs Length heatmap zoomed
- if (format=="png") {
- identity_heatmap_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_heatmap_zoom.png", sep="");
- cat("Writing", identity_heatmap_png, "\n");
- png(identity_heatmap_png, width=1200, height=800)
- print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_bin2d(drop=TRUE, binwidth=c(500,1)) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=(textsize*0.75))) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)) + scale_fill_gradientn(colours=rev(rainbow(n=30, end=4/6)), [...]
- #limits=c(0,50), breaks=seq(0, 40, by=10), colours=rainbow(4)
- } else {
- identity_heatmap_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_length_vs_identity_heatmap_zoom.pdf", sep="");
- cat("Writing", identity_heatmap_pdf, "\n");
- pdf(identity_heatmap_pdf, width=6, height=4)
- print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$QueryPercentIdentity)) + geom_bin2d(drop=TRUE, binwidth=c(500,1)) + xlab("Length") +ylab("Read identity %") + ggtitle(type) + theme(text = element_text(size=(textsize*0.75))) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust))+ scale_fill_gradientn(colours=rev(rainbow(n=30, end=4/6)), n [...]
- }
- garbage <- dev.off()
-
- # Identity boxplot
- listOfDataFrames[[count]] <- data.frame(Readset=type, Variable=data_alignments$QueryPercentIdentity);
- count <- count + 1;
-
- # Alignment identity vs. Fraction of read aligned scatter plots
- if (format=="png") {
- aid_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_read_fraction_vs_alignment_identity_scatter.png", sep="");
- cat("Writing", aid_scatter_png, "\n");
- png(aid_scatter_png, width=1200, height=800)
- print(ggplot(data_alignments, aes(x=data_alignments$PercentQueryAligned, y=data_alignments$AlignmentPercentIdentity)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Percentage of read aligned") +ylab("Alignment identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 105)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axi [...]
- #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth))
- } else {
- aid_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_read_fraction_vs_alignment_identity_scatter.pdf", sep="");
- cat("Writing", aid_scatter_pdf, "\n");
- pdf(aid_scatter_pdf, width=6, height=4)
- print(ggplot(data_alignments, aes(x=data_alignments$PercentQueryAligned, y=data_alignments$AlignmentPercentIdentity)) + geom_point(shape=pointshape, alpha=0.4, color=colourcode) + xlab("Percentage of read aligned") +ylab("Alignment identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 105)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text( [...]
- }
- garbage <- dev.off()
-
- # Query identity vs. Fraction of read aligned scatter plots
- if (format=="png") {
- qid_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_read_fraction_vs_query_identity_scatter.png", sep="");
- cat("Writing", qid_scatter_png, "\n");
- png(qid_scatter_png, width=1200, height=800)
- print(ggplot(data_alignments, aes(x=data_alignments$PercentQueryAligned, y=data_alignments$QueryPercentIdentity)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Percentage of read aligned") +ylab("Alignment identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 105)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.ti [...]
- #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth))
- } else {
- qid_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_read_fraction_vs_query_identity_scatter.pdf", sep="");
- cat("Writing", qid_scatter_pdf, "\n");
- pdf(qid_scatter_pdf, width=6, height=4)
- print(ggplot(data_alignments, aes(x=data_alignments$PercentQueryAligned, y=data_alignments$QueryPercentIdentity)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Percentage of read aligned") +ylab("Alignment identity %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 105)) + scale_y_continuous(limits=c(0, 100)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_te [...]
- }
- garbage <- dev.off()
-
- # Best perfect sequence vs. length scatters
- if (format=="png") {
- best_perf_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_longest_perfect_vs_length_scatter.png", sep="");
- cat("Writing", best_perf_scatter_png, "\n");
- png(best_perf_scatter_png, width=1200, height=800)
- print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$LongestPerfectKmer)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Longest perfect kmer") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
- #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth))
- } else {
- best_perf_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_longest_perfect_vs_length_scatter.pdf", sep="");
- cat("Writing", best_perf_scatter_pdf, "\n");
- pdf(best_perf_scatter_pdf, width=6, height=4)
- print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$LongestPerfectKmer)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Longest perfect kmer") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
- }
- garbage <- dev.off()
-
- # Best perfect sequence vs. length scatters zoomed
- if (format=="png") {
- best_perf_zoom_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_longest_perfect_vs_length_zoom_scatter.png", sep="");
- cat("Writing", best_perf_zoom_scatter_png, "\n");
- png(best_perf_zoom_scatter_png, width=1200, height=800)
- print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$LongestPerfectKmer)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Longest perfect kmer") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 10000)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=elemen [...]
- #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth))
- } else {
- best_perf_zoom_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_longest_perfect_vs_length_zoom_scatter.pdf", sep="");
- cat("Writing", best_perf_zoom_scatter_pdf, "\n");
- pdf(best_perf_zoom_scatter_pdf, width=6, height=4)
- print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$LongestPerfectKmer)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Longest perfect kmer") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 10000)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
- }
- garbage <- dev.off()
-
- # Plot %reads vs best perfect kmer
- cat(data_alignments$LongestPerfectKmer);
-
- hdf <- hist(breaks=seq(0,maxk,by=10), x=data_alignments$LongestPerfectKmer, plot=FALSE, right=FALSE); # bins are 0-9, 10-19, 20-29 etc.
- hdf$density = hdf$counts/sum(hdf$counts)*100
- tdf <- data.frame(Pos=hdf$mids, Counts=hdf$density);
-
- if (format=="png") {
- png_perfect_best <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_best_perfect_kmers.png", sep="");
- cat("Writing", png_perfect_best, "\n");
- png(png_perfect_best, width=1200, height=800)
- print(ggplot(tdf, aes(Pos, Counts)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Best perfect kmer") + ylab("% reads") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + scale_x_continuous(limits=c(0, cum_maxk), breaks=seq(0,cum_maxk,by=50)) + theme(text = element_text(size=textsize)) + theme(plot.margin [...]
- } else {
- pdf_perfect_best <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_best_perfect_kmers.pdf", sep="");
- cat("Writing", pdf_perfect_best, "\n");
- pdf(pdf_perfect_best, width=6, height=4)
- print(ggplot(tdf, aes(Pos, Counts)) + geom_bar(stat="identity", fill=colourcode) + ggtitle(type) + theme(text = element_text(size=textsize)) + xlab("Best perfect kmer") + ylab("% reads") + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.y=element_text(vjust=0.2)) + theme(axis.title.x=element_text(vjust=-0.2)) + scale_x_continuous(limits=c(0, cum_maxk), breaks=seq(0,cum_maxk,by=50)) + theme(text = element_text(size=textsize)) + theme(plot.margin [...]
- }
- garbage <- dev.off()
-
-
- # Number of perfect 21mers verses length scatter
- if (format=="png") {
- nk21_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_nk21_vs_length_scatter.png", sep="");
- cat("Writing", nk21_scatter_png, "\n");
- png(nk21_scatter_png, width=1200, height=800)
- print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$nk21)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Number of perfect 21mers") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
- #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth))
- } else {
- nk21_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_nk21_vs_length_scatter.pdf", sep="");
- cat("Writing", nk21_scatter_pdf, "\n");
- pdf(nk21_scatter_pdf, width=6, height=4)
- print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$nk21)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Number of perfect 21mers") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
- }
- garbage <- dev.off()
-
- # Mean perfect sequence vs. length scatters
- #mean_perf_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_mean_perfect_vs_length_scatter.pdf", sep="");
- #pdf(mean_perf_scatter_pdf, height=4, width=6)
- #print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$MeanPerfectKmer), xlab="Read length") + geom_point(shape=pointshape, alpha=pointalpha) + xlab("Read length") +ylab("Mean perfect kmer") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 10000)))
- #garbage <- dev.off()
-
- # Percentage of read aligned vs read length
- if (format=="png") {
- output_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_percent_aligned_vs_length_scatter.png", sep="");
- cat("Writing", output_png, "\n");
- png(output_png, width=1200, height=800)
- print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$PercentQueryAligned)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Percentage of read aligned") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
- #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth))
- } else {
- output_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_percent_aligned_vs_length_scatter.pdf", sep="");
- cat("Writing", output_pdf, "\n");
- pdf(output_pdf, width=6, height=4)
- print(ggplot(data_alignments, aes(x=data_alignments$QueryLength, y=data_alignments$PercentQueryAligned)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Read length") +ylab("Percentage of read aligned") + ggtitle(type) + theme(text = element_text(size=textsize)) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text(vjust=-xvjust)) + theme(axis.title.y=element_text(vjust=yvjust)))
- }
- garbage <- dev.off()
- } else {
- cat("WARNING: No data in ", input_filename, "\n");
- }
- } else {
- cat("WARNING: Couldn't find", input_filename, "\n");
- }
-
- # ========== Kmer file ==========
-
- # Kmer abundance with labels
- input_kmers <- paste(analysisdir, "/", refid, "/", refid, "_",type,"_kmers.txt", sep="");
- cat("Reading", input_kmers, "\n");
-
- if (file.exists(input_kmers)) {
- data_kmers = read.table(input_kmers, header=TRUE);
- if (nrow(data_kmers) > 1) {
- if (format=="png") {
- kmer_scatter_png <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_kmer_scatter.png", sep="");
- cat("Writing", kmer_scatter_png, "\n");
- png(kmer_scatter_png, width=1200, height=1200)
- print(ggplot(data_kmers, aes(x=data_kmers$RefPc, y=data_kmers$ReadPc)) + geom_point(shape=pointshape, size=pointsize, alpha=pointalpha, color=colourcode) + xlab("Reference abundance %") +ylab("Reads abundance %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 0.3)) + scale_y_continuous(limits=c(0, 0.3)) + geom_text(aes(label=data_kmers$Kmer), size=4) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title [...]
- #grid.edit("geom_point.points", grep = TRUE, gp = gpar(lwd = pointwidth))
- } else {
- kmer_scatter_pdf <- paste(graphsdir, "/", refid, "/", refid, "_",type,"_kmer_scatter.pdf", sep="");
- cat("Writing", kmer_scatter_pdf, "\n");
- pdf(kmer_scatter_pdf, width=6, height=6)
- print(ggplot(data_kmers, aes(x=data_kmers$RefPc, y=data_kmers$ReadPc)) + geom_point(shape=pointshape, alpha=pointalpha, color=colourcode) + xlab("Reference abundance %") +ylab("Reads abundance %") + ggtitle(type) + theme(text = element_text(size=textsize)) + scale_x_continuous(limits=c(0, 0.3)) + scale_y_continuous(limits=c(0, 0.3)) + geom_text(aes(label=data_kmers$Kmer), size=1) + theme(plot.margin = unit(c(0.02,0.02,0.04,0.02), "npc")) + theme(axis.title.x=element_text( [...]
- }
- garbage <- dev.off()
- } else {
- cat("WARNING: No data in ", input_kmers, "\n");
- }
- } else {
- cat("WARNING: Couldn't find", input_kmers, "\n");
- }
-}
-
-# Identity boxplot
-df <- do.call("rbind", listOfDataFrames);
-if (format=="png") {
- output_file <- paste(graphsdir, "/", refid, "/", refid, "_query_identity_boxplot.png", sep="");
- cat("Writing", output_file, "\n");
- png(output_file, width=1200, height=800);
- print(ggplot(df, aes(x=Readset, y=Variable, fill=Readset)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Read identity %") + scale_y_continuous(limits=c(70, 100)));
-} else {
- output_file <- paste(graphsdir, "/", refid, "/", refid, "_query_identity_boxplot.pdf", sep="");
- cat("Writing", output_file, "\n");
- pdf(output_file, width=6, height = 4);
- print(ggplot(df, aes(x=Readset, y=Variable, fill=Readset)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill=FALSE) + theme(text = element_text(size=textsize)) + ggtitle(types[t]) + ylab("Read identity %") + scale_y_continuous(limits=c(70, 100)));
-}
-garbage <- dev.off();
diff --git a/bin/nanook_split_fasta b/bin/nanook_split_fasta
deleted file mode 100755
index 6e474e9..0000000
--- a/bin/nanook_split_fasta
+++ /dev/null
@@ -1,83 +0,0 @@
-#!/usr/bin/perl
-#
-# Program: nanotools_split_fasta
-# Purpose: Split FASTA file into separate files for each read
-# Author: Richard Leggett
-# Contact: richard.leggett at tgac.ac.uk
-
-use strict;
-use warnings;
-use Getopt::Long;
-
-my $input_file;
-my $output_dir;
-my $help_requested;
-my %ids;
-my $count = 0;
-
-&GetOptions(
-'i|input:s' => \$input_file,
-'o|outputdir:s' => \$output_dir,
-'h|help' => \$help_requested
-);
-
-if (defined $help_requested) {
- print "\nnanotools_split_fasta\n\n";
- print "Split a multi-read FASTA into separate files.\n\n";
- print "Usage: nanotools_split_fasta.pl <-i input> [-o output_dir]\n\n";
- print "Options:\n";
- print " -i | -input Input FASTA file\n";
- print " -o | -outputdir Output directory\n";
- print "\n";
-
- exit;
-}
-
-die "You must specify an input file\n" if not defined $input_file;
-die "You must specify an output directory\n" if not defined $output_dir;
-
-my $fh;
-
-local $| = 1;
-
-open(INPUTFILE, $input_file) or die "Can't open input ".$input_file."\n";
-
-while(<INPUTFILE>) {
- my $line = $_;
-
- if ($line =~ /^>(\S+)/) {
- my $id = $1;
-
- if (not defined $ids{$id}) {
- $ids{$id} = 1;
-
- if (defined $fh) {
- close($fh);
- }
-
- my $out_filename = $output_dir."/".$id.".fasta";
- $count++;
- #print "Writing $out_filename\n";
-
- if (($count % 10) == 0) {
- print "\r$count";
- }
-
- open($fh, ">".$out_filename) or die "Can't open output ".$out_filename."\n";
- } else {
- print "WARNING: Repeat ID $id\n";
- }
- }
-
- if (defined $fh) {
- print $fh $line;
- } else {
- print "Eeek\n";
- }
-}
-
-if (defined $fh) {
- close($fh);
-}
-
-close(INPUTFILE);
diff --git a/bin/slurmit b/bin/slurmit
deleted file mode 100644
index 7d07a88..0000000
--- a/bin/slurmit
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/bin/bash
-
-commandtorun=""
-nodes=1
-ntasks=1
-mem=2G
-maxtime="6-23:00"
-outfile=""
-partition=""
-cpuspertask=1
-
-function usage
-{
-cat << EOF
-
-Submit commands to SLURM
-
-Usage: slurmit [options] "command to execute"
-
-Submission script for SLURM
-
-OPTIONS:
- -h Show this message
- -c Number of processors per task (--cpus-per-task parameter) (dedault 1)
- -m memory required per node (--mem parameter) (defualt "2G")
- -n maximum number of tasks (--ntasks parameter) (default 1)
- -o Output file (stdout and stderr) (default undefined)
- -p Parition (e.g. "tgac-medium") (default undefined)
- -t Time limit (--time parameter) (default "6-23:00")
- -N minimum number of nodes (--nodes parameter) (default 1)
-
-Example: slurmit -o logfile.txt "ls -l"
-
-Don't forget to backslash dollar variables, as appropriate.
-
-EOF
-}
-
-
-while getopts c:hm:n:o:p:t:N: OPTION
-do
- case $OPTION in
- c) cpuspertask=$OPTARG;;
- h) usage ; exit 1 ;;
- m) mem=$OPTARG;;
- n) ntasks=$OPTARG;;
- o) outfile=" -o $OPTARG";;
- p) partition=" -p $OPTARG";;
- t) maxtime=$OPTARG;;
- N) nodes=$OPTARG;;
- esac
-done
-shift $((OPTIND-1))
-
-commandtorun=$@
-
-if [ "$commandtorun" == "" ] ; then
- echo "You must specify a command to run"
- exit
-fi
-
-sbatch --nodes ${nodes} --cpus-per-task=${cpuspertask} --ntasks ${ntasks} --time ${maxtime} --mem ${mem}${outfile}${partition} --wrap="echo \"SLURM job output\" ; echo "" ; echo \"Command: ${commandtorun}\" ; echo \"Job ID: \${SLURM_JOB_ID}\" ; echo -n \"Start time: \" ; date ; printf \"%0.s-\" {1..70} ; printf \"\n\n\" ; ${commandtorun} ; printf \"\n\" ; printf \"%0.s-\" {1..70} ; printf \"\n\n\" ; sstat -j \${SLURM_JOB_ID}.batch ; printf \"\n\" ; echo \"SLURM ended\"; echo -n \"End tim [...]
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index f599e28..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-10
diff --git a/debian/control b/debian/control
deleted file mode 100644
index a740977..0000000
--- a/debian/control
+++ /dev/null
@@ -1,41 +0,0 @@
-Source: nanook
-Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Andreas Tille <tille at debian.org>
-Section: science
-Priority: optional
-Build-Depends: debhelper (>= 10),
- default-jdk,
- javahelper,
- libcommons-io-java
-Standards-Version: 4.1.0
-Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/nanook.git
-Vcs-Git: https://anonscm.debian.org/git/debian-med/nanook.git
-Homepage: https://documentation.tgac.ac.uk/display/NANOOK/NanoOK
-
-Package: nanook
-Architecture: all
-Depends: ${java:Depends},
- ${misc:Depends},
- default-jre-headless,
- r-cran-ggplot2,
- r-cran-scales,
- r-cran-gridextra,
- r-cran-reshape,
- texlive-latex-base,
- hdf5-tools,
- last-align
-Recommends: blasr,
- bwa
-Suggests: default-jre
-Description: pre- and post-alignment analysis of nanopore sequencing data
- NanoOK is a flexible, multi-reference software for pre- and post-
- alignment analysis of nanopore sequencing data, quality and error
- profiles.
- .
- NanoOK (pronounced na-nook) is a tool for extraction, alignment and
- analysis of Nanopore reads. NanoOK will extract reads as FASTA or FASTQ
- files, align them (with a choice of alignment tools), then generate a
- comprehensive multi-page PDF report containing yield, accuracy and
- quality analysis. Along the way, it generates plain text files which can
- be used for further analysis, as well as graphs suitable for inclusion
- in presentations and papers.
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index 582ad47..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,27 +0,0 @@
-Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Upstream-Name: NanoOK
-Upstream-Contact: Richard M. Leggett <richard.leggett at earlham.ac.uk>
-Source: https://github.com/TGAC/NanoOK
-Files-Excluded: *.jar
-
-Files: *
-Copyright: 2015-2017 Richard M. Leggett
-License: GPL-3+
-
-Files: debian/*
-Copyright: 2017 Andreas Tille <tille at debian.org>
-License: GPL-3+
-
-License: GPL-3+
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- .
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- .
- On Debian systems you can find the full text of the GNU General Public
- License version 3 or later at /usr/share/common-licenses/GPL-3.
diff --git a/debian/install b/debian/install
deleted file mode 100644
index d997d3b..0000000
--- a/debian/install
+++ /dev/null
@@ -1 +0,0 @@
-bin usr/share/nanook
diff --git a/debian/jlibs b/debian/jlibs
deleted file mode 100644
index 62fb97c..0000000
--- a/debian/jlibs
+++ /dev/null
@@ -1 +0,0 @@
-nanook.jar
diff --git a/debian/links b/debian/links
deleted file mode 100644
index 175907e..0000000
--- a/debian/links
+++ /dev/null
@@ -1 +0,0 @@
-usr/share/nanook/bin/nanook usr/bin/nanook
diff --git a/debian/manifest b/debian/manifest
deleted file mode 100644
index 63e0377..0000000
--- a/debian/manifest
+++ /dev/null
@@ -1,4 +0,0 @@
-usr/share/java/nanook.jar:
- Main-Class: nanook.NanoOK
- Manifest-Version: 1.0
- Class-Path: /usr/share/java/commons-io.jar
diff --git a/debian/nanook.1 b/debian/nanook.1
index 72c26ac..2e27dcf 100644
--- a/debian/nanook.1
+++ b/debian/nanook.1
@@ -2,18 +2,15 @@
.TH NANOOK "1" "September 2017" "nanook 1.26" "User Commands"
.SH NAME
nanook \- flexible, multi-reference software for pre- and post-alignment analysis of nanopore sequencing data, quality and error profiles
-.SH SYNOPSIS
-.B nanook
-\fI<extract|align|analyse|compare|process>\fR [options]
.SH DESCRIPTION
- NanoOK (pronounced na-nook) is a tool for extraction, alignment and
- analysis of Nanopore reads. NanoOK will extract reads as FASTA or FASTQ
- files, align them (with a choice of alignment tools), then generate a
- comprehensive multi-page PDF report containing yield, accuracy and
- quality analysis. Along the way, it generates plain text files which can
- be used for further analysis, as well as graphs suitable for inclusion
- in presentations and papers.
-.SH OPTIONS
+NanoOK v1.26
+.PP
+Comments/bugs to: richard.leggett at earlham.ac.uk
+Follow NanoOK on twitter: @NanoOK_Software
+.PP
+Scripts dir: \fI\,/usr/share/nanook/bin\/\fP
+.PP
+Syntax nanook <extract|align|analyse|compare|process> [options]
.SS "extract options:"
.HP
\fB\-s\fR|\-sample <dir> specifies sample directory
diff --git a/debian/patches/series b/debian/patches/series
deleted file mode 100644
index 91eadaa..0000000
--- a/debian/patches/series
+++ /dev/null
@@ -1 +0,0 @@
-set_jar_path_in_bin.patch
diff --git a/debian/patches/set_jar_path_in_bin.patch b/debian/patches/set_jar_path_in_bin.patch
deleted file mode 100644
index fb36152..0000000
--- a/debian/patches/set_jar_path_in_bin.patch
+++ /dev/null
@@ -1,22 +0,0 @@
-Author: Andreas Tille <tille at debian.org>
-Last-Update: Fri, 01 Sep 2017 14:39:50 +0200
-Description: Set internal pathes to fit Debian package layout
-
---- a/bin/nanook
-+++ b/bin/nanook
-@@ -2,12 +2,14 @@
-
- JAVA_ARGS="-Xmx2048m"
-
-+export NANOOK_DIR=/usr/share/nanook
-+
- if [ -z "$NANOOK_DIR" ] ; then
- echo "Error: You must set NANOOK_DIR before running."
- exit 1
- fi
-
--JARFILE=${NANOOK_DIR}/dist/NanoOK.jar
-+JARFILE=/usr/share/java/nanook.jar
-
- if [ ! -f ${JARFILE} ] ; then
- echo "Error: Can't find NanoOK.jar - it needs to be inside the dist subdirectory of the directory pointed to by NANOOK_DIR which is currently ${NANOOK_DIR}"
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index 4316c3c..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/make -f
-
-# DH_VERBOSE := 1
-export LC_ALL=C.UTF-8
-
-export CLASSPATH=/usr/share/java/commons-io.jar
-
-%:
- dh $@ --with javahelper
-
-override_dh_auto_build:
- jh_build -J nanook.jar src/nanook
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/debian/upstream/metadata b/debian/upstream/metadata
deleted file mode 100644
index c4e20f7..0000000
--- a/debian/upstream/metadata
+++ /dev/null
@@ -1,12 +0,0 @@
-Reference:
- Author: Richard M. Leggett and Darren Heavens and Mario Caccamo and Matthew D. Clark and Robert P. Davey
- Title: "NanoOK: multi-reference alignment analysis of nanopore sequencing data, quality and error profiles"
- Journal: Bioinformatics
- Year: 2016
- Volume: 32
- Number: 1
- Pages: 142-144
- DOI: 10.1093/bioinformatics/btv540
- PMID: 26382197
- URL: https://academic.oup.com/bioinformatics/article/32/1/142/1743578/NanoOK-multi-reference-alignment-analysis-of
- eprint: https://academic.oup.com/bioinformatics/article-pdf/32/1/142/16920033/btv540.pdf
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index 06d513c..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,4 +0,0 @@
-version=4
-
-opts="repacksuffix=+dfsg,dversionmangle=s/\+dfsg//g,repack,compression=xz" \
- https://github.com/TGAC/NanoOK/releases .*/archive/v at ANY_VERSION@@ARCHIVE_EXT@
diff --git a/src/nanook/Alignment.java b/src/nanook/Alignment.java
deleted file mode 100644
index f790fdb..0000000
--- a/src/nanook/Alignment.java
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-
-/**
- * Generic class to represent alignment
- * @author Richard Leggett
- */
-public class Alignment implements Comparable {
- private int score;
- private String queryName;
- private int querySequenceSize;
- private int queryStart;
- private int queryAlignmentSize;
- private int queryEnd;
- private String queryStrand;
- private String hitName;
- private int hitSequenceSize;
- private int hitStart;
- private int hitAlignmentSize;
- private int hitEnd;
- private String hitStrand;
- private String queryString;
- private String hitString;
- boolean fIsCIGAR;
-
- public Alignment(int s, String qName, int qSize, int qStart, int qAlnSize, String qs, String hName, int hSize, int hStart, int hAlnSize, String hs, boolean cigar) {
- score = s;
- queryName = qName;
- querySequenceSize = qSize;
- queryStart = qStart;
- queryAlignmentSize = qAlnSize;
- queryEnd = qStart + qAlnSize - 1;
- queryString = qs;
- hitName = hName;
- hitSequenceSize = hSize;
- hitStart = hStart;
- hitAlignmentSize = hAlnSize;
- hitEnd = hStart + hAlnSize - 1;
- hitString = hs;
- fIsCIGAR = cigar;
- queryStrand = "+";
- hitStrand = "+";
- }
-
- public void setQueryStrand(String s) {
- queryStrand = s;
- }
-
- public void setHitStrand(String s) {
- hitStrand = s;
- }
-
- public String getQueryStrand() {
- return queryStrand;
- }
-
- public String getHitStrand() {
- return hitStrand;
- }
-
- public int getScore() {
- return score;
- }
-
- public String getQueryName() {
- return queryName;
- }
-
- public int getQuerySequenceSize() {
- return querySequenceSize;
- }
-
- public int getQueryStart() {
- return queryStart;
- }
-
- public int getQueryAlignmentSize() {
- return queryAlignmentSize;
- }
-
- public int getQuertEnd() {
- return queryEnd;
- }
-
- public String getQueryString() {
- return queryString;
- }
-
- public String getHitName() {
- return hitName;
- }
-
- public int getHitSequenceSize() {
- return hitSequenceSize;
- }
-
- public int getHitStart() {
- return hitStart;
- }
-
- public int getHitAlignmentSize() {
- return hitAlignmentSize;
- }
-
- public int getHitEnd() {
- return hitEnd;
- }
-
- public String getHitString() {
- return hitString;
- }
-
- public boolean isCIGAR() {
- return fIsCIGAR;
- }
-
- public void writeMafFile(String filename) {
- try {
- PrintWriter pw = new PrintWriter(new FileWriter(filename));
- pw.printf("s %24s %5d %5d %s %5d %s", hitName, hitStart, hitAlignmentSize, hitStrand, hitSequenceSize, hitString);
- pw.println("");
- pw.printf("s %24s %5d %5d %s %5d %s", queryName, queryStart, queryAlignmentSize, queryStrand, querySequenceSize, queryString);
- pw.println("");
- pw.close();
- } catch (IOException e) {
- System.out.println("ReportWriter exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- @Override
- public int compareTo(Object o) {
- return ((Alignment)o).getScore() - score;
- }
-}
diff --git a/src/nanook/AlignmentFileParser.java b/src/nanook/AlignmentFileParser.java
deleted file mode 100644
index 27d01fb..0000000
--- a/src/nanook/AlignmentFileParser.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Interface for parsers of alignment files.
- *
- * @author Richard Leggett
- */
-
-public interface AlignmentFileParser {
- /**
- * Get identifier for the alignment program
- * @return ID in lower case e.g. "last"
- */
- public String getProgramID();
-
- /**
- * Get file extension of alignments
- * @return
- */
- public String getAlignmentFileExtension();
-
- /**
- * Get format of input reads expected
- * @return NanoOKOptions.FASTA or NanoOKOptions.FASTQ
- */
- public int getReadFormat();
-
- /**
- * Set alignment parameters to run executable
- * @return
- */
- public void setAlignmentParams(String p);
-
- /**
- * Get command to run aligner
- * @param query query file
- * @param output output file
- * @param reference reference file
- * @return
- */
- public String getRunCommand(String query, String output, String reference);
-
- /**
- * Parse an alignment file.
- * @param filename the filename of the alignments file
- * @param summaryFile the name of an alignments table summary file to write
- * @return
- */
- int parseFile(String filename, AlignmentsTableFile summaryFile, ReadSetStats overallStats);
-
- /**
- * Sort alignments by score
- */
- void sortAlignments();
-
- /**
- * Get highest scoring set of alignments (ie. highest scoring reference)
- * @return an List of Alignment objects
- */
- List<Alignment> getHighestScoringSet();
-
- /**
- * Return true if this aligner outputs to Stdout and not a file
- * @return true or false
- */
- public boolean outputsToStdout();
-
- /**
- * Check index files are present before aligning
- * @param referenceFile name of FASTA file
- */
- public void checkForIndex(String referenceFile);
-}
diff --git a/src/nanook/AlignmentFileStats.java b/src/nanook/AlignmentFileStats.java
deleted file mode 100644
index 0c152fd..0000000
--- a/src/nanook/AlignmentFileStats.java
+++ /dev/null
@@ -1,24 +0,0 @@
-package nanook;
-
-import java.io.File;
-
-public class AlignmentFileStats {
- private String alignmentPathname;
- private int nAlignments = 0;
-
- public AlignmentFileStats(String p) {
- alignmentPathname = p;
- }
-
- public void markNoAlignments() {
- nAlignments = 0;
- }
-
- public void legacyActions(AlignmentsTableFile nonAlignedSummaryFile, ReadSetStats overallStats) {
- if (nAlignments == 0) {
- String leafName = new File(alignmentPathname).getName();
- nonAlignedSummaryFile.writeNoAlignmentMessage(leafName);
- overallStats.addReadWithoutAlignment();
- }
- }
-}
diff --git a/src/nanook/AlignmentInfo.java b/src/nanook/AlignmentInfo.java
deleted file mode 100644
index 230ef7b..0000000
--- a/src/nanook/AlignmentInfo.java
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-/**
- * Class to hold information about an alignment.
- *
- * @author Richard Leggett
- */
-public class AlignmentInfo {
- private String hitName;
- private int hitSize;
- private String queryName;
- private int querySize;
- private int identicalBases;
- private int longest;
- private double meanPerfectKmer;
- private int total;
- private int count;
- private int alignmentSize;
- private int alignmentSizeMinusIndels;
- private double queryIdentity;
- private double alignmentIdentity;
- private double alignmentIdentityMinusIndels;
- private double percentQueryAligned;
- private int queryAlignmentSize;
- int kSizes[];
- int kCounts[];
- int nk;
-
-
- /**
- * Constructor.
- *
- * @param hn hit name
- * @param hs hit size
- * @param qn query name
- * @param qs query size
- * @param ib number of identical bases
- * @param l longest perfect kmer
- * @param t sum of perfect kmers
- * @param c count of perfect kmers
- * @param as alignment size
- * @param ad alignment size minus indels
- * @param qas query alignment size
- */
- public AlignmentInfo(String hn, int hs, String qn, int qs, int ib, int l, int t, int c, int as, int ami, int qas) {
- hitName = hn;
- hitSize = hs;
- querySize = qs;
- queryName = qn;
- identicalBases = ib;
- longest = l;
- total = t;
- count = c;
- meanPerfectKmer = (double)t / (double)c;
- alignmentSize = as;
- alignmentSizeMinusIndels = ami;
- queryAlignmentSize = qas;
- queryIdentity = (100.0 * (double)identicalBases) / (double)querySize;
- alignmentIdentity = (100.0 * (double)identicalBases) / (double)alignmentSize;
- alignmentIdentityMinusIndels = (100.0 * (double)identicalBases) / (double)alignmentSizeMinusIndels;
- //percentQueryAligned = (100.0 * (double)alignmentSize) / (double)querySize;
- percentQueryAligned = (100.0 * (double)queryAlignmentSize) / (double)querySize;
- }
-
- /**
- * Get identical bases count.
- * @return number of identical bases
- */
- public int getIdenticalBases() {
- return identicalBases;
- }
-
- /**
- * Get longest perfect kmer.
- * @return longest perfect kmer
- */
- public int getLongestPerfectKmer() {
- return longest;
- }
-
- /**
- * Get alignment size.
- * @return alignment size, in bases
- */
- public int getAlignmentSize() {
- return alignmentSize;
- }
-
- /**
- * Get query identity.
- * @return query identity percent
- */
- public double getQueryId() {
- return queryIdentity;
- }
-
- public String getQueryName() {
- return queryName;
- }
-
- public String getHitName() {
- return hitName;
- }
-
- /**
- * Get alignment identity.
- * @return alignment identity percent
- */
- public double getAlignmentId() {
- return alignmentIdentity;
- }
-
- /**
- * Get alignment identity.
- * @return alignment identity percent
- */
- public double getAlignmentIdMinusIndels() {
- return alignmentIdentityMinusIndels;
- }
-
- /**
- * Get mean perfect kmer size.
- * @return mean perfect kmer size, in bases
- */
- public double getMeanPerfectKmer() {
- return meanPerfectKmer;
- }
-
- /**
- * Get query size.
- * @return size of query, in bases.
- */
- public int getQuerySize() {
- return querySize;
- }
-
- /**
- * Get hit size.
- * @return size of hit, in bases
- */
- public int getHitSize() {
- return hitSize;
- }
-
- /**
- * Get percentage of query aligned.
- * @return percentage of hit sequence aligned
- */
- public double getPercentQueryAligned() {
- return percentQueryAligned;
- }
-
- public void addkCounts(int n, int[] s, int[] c) {
- nk = n;
- kSizes = s;
- kCounts = c;
- }
-
- public String getkCounts() {
- String s="";
-
- for (int i=0; i<nk; i++) {
- s = s + Integer.toString(kCounts[i]);
- if (i != (nk-1)) {
- s = s + "\t";
- }
- }
-
- return s;
- }
-}
diff --git a/src/nanook/AlignmentMerger.java b/src/nanook/AlignmentMerger.java
deleted file mode 100644
index cf41c91..0000000
--- a/src/nanook/AlignmentMerger.java
+++ /dev/null
@@ -1,411 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-/**
- * Class to merge alignments
- *
- * @author Richard Leggett
- */
-public class AlignmentMerger {
- private ReferenceSequence reference;
- private ReadSetStats overallStats;
- private NanoOKOptions options;
- private int readLength;
- private int[] covered;
- private int deletionSize = 0;
- private int insertionSize = 0;
- private String errorKmer = "";
- private int type;
- private int kmerTotal = 0;
- private int kmerCount = 0;
- private int currentPerfectKmerSize = 0;
- private int longestPerfectKmer = 0;
- private int overallQueryStart = -1;
- private int overallQueryEnd = -1;
- private int overallHitStart = -1;
- private int overallHitEnd = -1;
- private int hitSeqSize = 0;
- private int querySeqSize = 0;
- private String queryName = null;
- private String hitName = null;
- private int identicalBases = 0;
- private int alignmentSize = 0;
- private int alignmentSizeWithoutIndels = 0;
-
- // Bodge for speed - need to change way AlignmentInfo works
- int kSizes[] = {15, 17, 19, 21, 23, 25};
- int kCounts[] = {0, 0, 0, 0, 0, 0};
- int nk = 6;
-
- /**
- * Constructor
- * @param r the reference, as a ReferenceSequence object
- * @param l the read length
- * @param s the read set stats for this read set
- * @param t the type number of read (defined in NanoOKOptions)
- */
- public AlignmentMerger(NanoOKOptions o, ReferenceSequence r, int l, ReadSetStats s, int t) {
- options = o;
- reference = r;
- readLength = l;
- overallStats = s;
- type = t;
-
- covered = new int[readLength];
-
- options.getLog().println("");
- options.getLog().println("New AlignmentMerger");
- options.getLog().println("");
- }
-
- /**
- * Helper method to check if to store insertion or deletion (and store it).
- * @param reference Reference object this alignment relates to
- * @param errorKmer The perfect sequence before this error
- */
- private void checkStoreInsertionsOrDeletions() {
- if (deletionSize > 0) {
- reference.getStatsByType(type).addDeletionError(deletionSize, errorKmer, overallStats); // Reference
- deletionSize = 0;
- }
-
- if (insertionSize > 0) {
- reference.getStatsByType(type).addInsertionError(insertionSize, errorKmer, overallStats); // Reference
- insertionSize = 0;
- }
-
- errorKmer = "";
- }
-
- private void storePerfectKmerLength() {
- // Store perfect kmers
- if (currentPerfectKmerSize > 0) {
- reference.getStatsByType(type).addPerfectKmer(currentPerfectKmerSize); // Reference
-
- // Bodge - need to change
- for (int l=0; l<nk; l++) {
- if (currentPerfectKmerSize >= kSizes[l]) {
- kCounts[l]++;
- }
- }
-
- kmerTotal+=currentPerfectKmerSize;
- kmerCount++;
-
- if (currentPerfectKmerSize > longestPerfectKmer) {
- longestPerfectKmer = currentPerfectKmerSize;
- }
-
- currentPerfectKmerSize = 0;
- }
- }
-
- /**
- * Merge in a new alignment
- * @param a a Alignment
- */
- public void addAlignment(Alignment a) {
- String hitSeq = a.getHitString();
- String querySeq = a.getQueryString();
- int hitSize = hitSeq.length();
- int querySize = querySeq.length();
- int loopFrom = 0;
- int loopTo = hitSize <= querySize ? hitSize:querySize;
- int queryPos = a.getQueryStart();
- int hitPos = a.getHitStart();
- String currentKmer = "";
- AlignmentInfo ai;
- boolean mergeAlignment = true;
-
- // Deal with hit and query names
- if (queryName == null) {
- queryName = a.getQueryName();
- hitName = a.getHitName();
- querySeqSize = a.getQuerySequenceSize();
- hitSeqSize = a.getHitSequenceSize();
- }
-
- if (! hitName.equals(a.getHitName())) {
- System.out.println("Hit name ("+hitName+") doesn't match ("+a.getHitName()+")!");
- System.exit(1);
- }
-
- if (! queryName.equals(a.getQueryName())) {
- System.out.println("Query name ("+queryName+") doesn't match ("+a.getQueryName()+")!");
- System.exit(1);
- }
-
- options.getLog().println("Merging new block");
- options.getLog().println(" queryPos = "+queryPos);
- options.getLog().println(" hitPos = "+hitPos);
- options.getLog().println(" querySize = "+querySize);
- options.getLog().println(" hitSize = "+hitSize);
-
- // Check for new block too far from current block
- if ((overallHitStart != -1) && (hitPos < overallHitStart)) {
- int remainingQuerySequence = a.getQuerySequenceSize() - (overallQueryEnd - overallQueryStart);
- int maximumDistance = remainingQuerySequence * 2;
- if ((overallHitStart - hitPos) > maximumDistance) {
- options.getLog().println("WARNING: hitPos too far (>"+maximumDistance+") from overallHitStart ("+overallHitStart+")");
- mergeAlignment = false;
- }
- }
-
- if ((overallHitEnd != -1) && (hitPos > overallHitEnd)) {
- int remainingQuerySequence = a.getQuerySequenceSize() - (overallQueryEnd - overallQueryStart);
- int maximumDistance = remainingQuerySequence * 2;
-
- if ((hitPos - overallHitEnd) > maximumDistance) {
- options.getLog().println("WARNING: hitPos too far from (>"+maximumDistance+") from overallHitEnd ("+overallHitEnd+")");
- mergeAlignment = false;
- }
- }
-
- if (overallQueryStart >= 0) {
- int queryDistanceFromStart = Math.abs(queryPos - overallQueryStart);
- int hitDistanceFromStart = Math.abs(hitPos - overallHitStart);
- int difference = Math.abs(queryDistanceFromStart - hitDistanceFromStart);
- options.getLog().println("queryDistanceFromStart = "+queryDistanceFromStart);
- options.getLog().println("hitDistanceFromStart = "+hitDistanceFromStart);
- options.getLog().println("difference = " + difference);
-
- if (difference > (queryDistanceFromStart * 0.2)) {
- options.getLog().println("WARNING: query offset too far from hit offet - extra alignment ignored");
- mergeAlignment = false;
- }
- }
-
- if (mergeAlignment) {
- // Store alignment size
- if ((overallQueryStart == -1) || (queryPos < overallQueryStart)) {
- overallQueryStart = queryPos;
- options.getLog().println("Modifying overallQueryStart = "+overallQueryStart);
- }
- if ((overallHitStart == -1) || (hitPos < overallHitStart)) {
- overallHitStart = hitPos;
- options.getLog().println("Modifying overallHitStart = "+overallHitStart);
- }
-
- // Expect these to be equal
- if (hitSize != querySize) {
- System.out.println("hitSize not equal to querySize");
- }
-
- currentPerfectKmerSize = 0;
- insertionSize = 0;
- deletionSize = 0;
- errorKmer = "";
-
- // If alignment starts in middle of area already covered, move to end
- if (covered[queryPos] == 1) {
- while((loopFrom < loopTo) && (covered[queryPos] == 1)) {
- if (hitSeq.charAt(loopFrom)== '-') {
- queryPos++;
- } else if (querySeq.charAt(loopFrom) == '-') {
- hitPos++;
- } else {
- queryPos++;
- hitPos++;
- }
- loopFrom++;
- }
- }
-
- options.getLog().println(" loopFrom = "+loopFrom);
- options.getLog().println(" loopTo = "+loopTo);
-
- for (int i=loopFrom; i<loopTo; i++) {
- // If we've ventured into previously covered territory, break
- if (covered[queryPos] == 1) {
- break;
- }
-
- // Identical bases
- if (hitSeq.charAt(i) == querySeq.charAt(i)) {
- // Check if there are any insertions or deletions to store
- checkStoreInsertionsOrDeletions();
-
- currentPerfectKmerSize++;
- currentKmer += querySeq.charAt(i);
-
- // If reached end, store perfect sequence length
- if (i == (loopTo-1)) {
- storePerfectKmerLength();
- }
-
- // Mark this position and move on
- identicalBases++;
- covered[queryPos]= 1;
- queryPos++;
- hitPos++;
- alignmentSizeWithoutIndels++;
- } else {
- // An insertion or deletion or substitution, so store perfect sequence length, if we have some
- if (currentPerfectKmerSize > 0) {
- storePerfectKmerLength();
- }
-
- // Insertion
- if (hitSeq.charAt(i) == '-') {
- // If new insertion, check if we have a previous deletion we were tracking
- // And store the current perfect kmer as the one associated with this insertion
- if (insertionSize == 0) {
- checkStoreInsertionsOrDeletions();
- errorKmer = currentKmer;
- }
-
- // Keep track of insertion size
- insertionSize++;
-
- // Keep track of position
- queryPos++;
- }
-
- // Deletion
- else if (querySeq.charAt(i) == '-') {
- // If new deletion, check if we have a previous insertion we were tracking
- // And store the current perfect kmer as the one associated with this deletion
- if (deletionSize == 0) {
- checkStoreInsertionsOrDeletions();
- errorKmer = currentKmer;
- }
-
- // Keep track of size
- deletionSize++;
-
- // Keep track of position
- hitPos++;
- }
-
- // Substitution
- else {
- // Check if previous insertion or deletion we were tracking
- checkStoreInsertionsOrDeletions();
-
- // Store current perfect kmer associated with this substitution
- errorKmer = currentKmer;
-
- // Store substitution
- reference.getStatsByType(type).addSubstitutionError(errorKmer, hitSeq.charAt(i), querySeq.charAt(i), overallStats); // Reference
-
- // Mark this position and move on
- covered[queryPos] = 1;
- queryPos++;
- hitPos++;
- alignmentSizeWithoutIndels++;
- }
-
- // Reset current kmer
- currentKmer = "";
- }
-
- alignmentSize++;
- }
-
- options.getLog().println(" queryPos = " + queryPos);
- options.getLog().println(" hitPos = " + hitPos);
-
- if ((overallQueryEnd == -1) || (queryPos > overallQueryEnd)) {
- overallQueryEnd = queryPos;
- options.getLog().println("Modifying overallQueryEnd = "+overallQueryEnd);
- }
- if ((overallHitEnd == -1) || (hitPos > overallHitEnd)) {
- overallHitEnd = hitPos;
- options.getLog().println("Modifying overallHitEnd = "+overallHitEnd);
- }
-
- //reference.getStatsByType(type).addCoverage(a.getHitStart(), a.getHitAlignmentSize()); // Reference
- }
- }
-
- /**
- * Declare end of alignment merge
- * @return an AlignmentInfo object
- */
- public AlignmentInfo endMergeAndStoreStats() {
- AlignmentInfo ai = new AlignmentInfo(hitName,
- hitSeqSize,
- queryName,
- querySeqSize,
- identicalBases,
- longestPerfectKmer,
- kmerTotal,
- kmerCount,
- alignmentSize,
- alignmentSizeWithoutIndels,
- overallQueryEnd - overallQueryStart);
-
- ai.addkCounts(nk, kSizes, kCounts);
-
- overallStats.writekCounts(queryName, querySeqSize, nk, kSizes, kCounts); // ReadSetStats
- overallStats.addReadWithAlignment(); // ReadSetStats
- overallStats.addReadBestKmer(longestPerfectKmer); // ReadSetStats
-
- reference.getStatsByType(type).addAlignmentStats(querySeqSize, alignmentSize, alignmentSizeWithoutIndels, identicalBases, "?", "?"); // Reference
- reference.getStatsByType(type).addReadBestKmer(longestPerfectKmer); // Reference
-
- return ai;
- }
-
- /**
- * Get query start position of merged alignment
- * @return start position
- */
- public int getOverallQueryStart() {
- return overallQueryStart;
- }
-
- /**
- * Get query end position of merged alignment
- * @return end position
- */
- public int getOverallQueryEnd() {
- return overallQueryEnd;
- }
-
- /**
- * Get hit start position of merged alignment
- * @return start position
- */
- public int getOverallHitStart() {
- return overallHitStart;
- }
-
- /**
- * Get hit end position of merged alignment
- * @return end position
- */
- public int getOverallHitEnd() {
- return overallHitEnd;
- }
-
- /**
- * Get size of query covered by merged alignment
- * @return size of alignment
- */
- public int getOverallQuerySize() {
- return overallQueryEnd - overallQueryStart;
- }
-
- /**
- * Get size of hit covered by merged alignment
- * @return size of hit alignment
- */
- public int getOverallHitSize() {
- return overallHitEnd - overallHitStart;
- }
-
- /**
- * Get size of alignment without indels
- * @return size
- */
- public int getAlignmentSize() {
- return alignmentSizeWithoutIndels;
- }
-}
diff --git a/src/nanook/AlignmentsTableFile.java b/src/nanook/AlignmentsTableFile.java
deleted file mode 100644
index fd258ee..0000000
--- a/src/nanook/AlignmentsTableFile.java
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.*;
-
-/**
- * Represents alignment summary file written by tool and used for graph plotting.
- *
- * @author Richard Leggett
- */
-public class AlignmentsTableFile implements Serializable {
- private static final long serialVersionUID = NanoOK.SERIAL_VERSION;
- private String filename;
- private transient PrintWriter pw = null;
- private int count = 0;
-
- /**
- * Constructor.
- * @param f filename of output file
- */
- public AlignmentsTableFile(String f) {
- filename = f;
- writeHeader();
- }
-
- private synchronized void openFile(boolean append) {
- try {
- pw = new PrintWriter(new FileWriter(filename, append));
- } catch (IOException e) {
- System.out.println("AlignmentsTableFile exception");
- e.printStackTrace();
- }
- }
-
- /**
- * Write header row to file.
- */
- private synchronized void writeHeader() {
- openFile(false);
- pw.print("Filename\t");
- pw.print("QueryName\t");
- pw.print("QueryGC\t");
- pw.print("QueryStart\t");
- pw.print("QueryBasesCovered\t");
- pw.print("QueryStrand\t");
- pw.print("QueryLength\t");
- pw.print("HitName\t");
- pw.print("HitStart\t");
- pw.print("HitBasesCovered\t");
- pw.print("HitStrand\t");
- pw.print("HitLength\t");
- pw.print("AlignmentSize\t");
- pw.print("IdenticalBases\t");
- pw.print("AlignmentPercentIdentity\t");
- pw.print("QueryPercentIdentity\t");
- pw.print("LongestPerfectKmer\t");
- pw.print("MeanPerfectKmer\t");
- pw.print("PercentQueryAligned\t");
- pw.print("nk15\tnk17\tnk19\tnk21\tnk23\tnk25");
- pw.println("");
- pw.close();
- }
-
- /**
- * Write an alignment line.
- * @param alignmentFilename filename of alignment
- * @param hitLine hit object
- * @param queryLine query object
- * @param ais AlignmentInfo statistics
- */
- public synchronized void writeAlignment(ReadSetStats stats, String alignmentFilename, MAFAlignmentLine hitLine, MAFAlignmentLine queryLine, AlignmentInfo ais) {
- String outputLine = String.format("%s\t%s\t%.2f\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%d\t%d\t%.2f\t%.2f\t%d\t%.2f\t%.2f\t%s",
- alignmentFilename,
- queryLine.getName(),
- stats.getGC(alignmentFilename, ais.getQueryName()),
- queryLine.getStart(),
- queryLine.getAlnSize(),
- queryLine.getStrand(),
- queryLine.getSeqSize(),
- hitLine.getName(),
- hitLine.getStart(),
- hitLine.getAlnSize(),
- hitLine.getStrand(),
- hitLine.getSeqSize(),
- ais.getAlignmentSize(),
- ais.getIdenticalBases(),
- ais.getAlignmentId(),
- ais.getQueryId(),
- ais.getLongestPerfectKmer(),
- ais.getMeanPerfectKmer(),
- ais.getPercentQueryAligned(),
- ais.getkCounts());
-
- openFile(true);
- pw.println(outputLine);
- pw.close();
-
- count++;
- }
-
- public synchronized void writeMergedAlignment(ReadSetStats stats, String alignmentFilename, AlignmentMerger merger, AlignmentInfo ais) {
- String outputLine = String.format("%s\t%s\t%.2f\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%d\t%d\t%.2f\t%.2f\t%d\t%.2f\t%.2f\t%s",
- alignmentFilename,
- ais.getQueryName(),
- stats.getGC(alignmentFilename, ais.getQueryName()),
- merger.getOverallQueryStart(),
- merger.getOverallQuerySize(),
- "+",
- ais.getQuerySize(),
- ais.getHitName(),
- merger.getOverallHitStart(),
- merger.getOverallHitSize(),
- "+",
- ais.getHitSize(),
- ais.getAlignmentSize(),
- ais.getIdenticalBases(),
- ais.getAlignmentId(),
- ais.getQueryId(),
- ais.getLongestPerfectKmer(),
- ais.getMeanPerfectKmer(),
- ais.getPercentQueryAligned(),
- ais.getkCounts());
-
- openFile(true);
- pw.println(outputLine);
- pw.close();
-
- count++;
- }
-
- /**
- * Used when no alignment found for this query.
- * @param alignmentFilename - alignment filename
- */
- public synchronized void writeNoAlignmentMessage(String alignmentFilename) {
- openFile(true);
- pw.println(alignmentFilename+"\tNO ALIGNMENTS");
- pw.close();
- }
-}
diff --git a/src/nanook/BLASRParser.java b/src/nanook/BLASRParser.java
deleted file mode 100644
index 727ede5..0000000
--- a/src/nanook/BLASRParser.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-/**
- * Parser for BLASR files
- * @author Richard Leggett
- */
-public class BLASRParser extends SAMParser implements AlignmentFileParser {
- private String alignmentParams = "";
-
- public BLASRParser(NanoOKOptions o, References r) {
- super(o, r);
- }
-
- public String getProgramID() {
- return "blasr";
- }
-
- public int getReadFormat() {
- return NanoOKOptions.FASTA;
- }
-
- public void setAlignmentParams(String p) {
- alignmentParams = p;
- }
-
- public String getRunCommand(String query, String output, String reference) {
- String command = "blasr " + query + " " + reference + " -sam -out " + output;
-
- if (alignmentParams.length() > 0) {
- command = command + alignmentParams;
- }
-
- return command;
- }
-
- public boolean outputsToStdout() {
- return false;
- }
-
- public void checkForIndex(String referenceFile) {
- return;
- }
-}
diff --git a/src/nanook/BWAParser.java b/src/nanook/BWAParser.java
deleted file mode 100644
index eff6fae..0000000
--- a/src/nanook/BWAParser.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.File;
-
-/**
- * Parser for BWA files
- * @author Richard Leggett
- */
-public class BWAParser extends SAMParser implements AlignmentFileParser {
- private String alignmentParams = "-x ont2d";
- private NanoOKOptions options;
-
- public BWAParser(NanoOKOptions o, References r) {
- super(o, r);
- options = o;
- }
-
- public String getProgramID() {
- return "bwa";
- }
-
- public int getReadFormat() {
- int or = options.getReadFormat();
- return or;
-
- //return NanoOKOptions.FASTA;
- }
-
- public void setAlignmentParams(String p) {
- alignmentParams = p;
- }
-
- public boolean outputsToStdout() {
- return true;
- }
-
- public String getRunCommand(String query, String output, String reference) {
- //reference = reference.replaceAll("\\.fasta$", "");
- //reference = reference.replaceAll("\\.fa$", "");
-
- return "bwa mem " + alignmentParams + " " + reference + " " + query;
- }
-
- public void checkForIndex(String referenceFile) {
- String[] files = {referenceFile + ".fasta.bwt",
- referenceFile + ".fasta.pac"};
-
- for (int i=0; i<files.length; i++) {
- File f = new File(files[i]);
-
- if (!f.exists()) {
- System.out.println("");
- System.out.println("Error:");
- System.out.println("Can't find file " + f.getPath());
- System.out.println("Have you indexed the reference with bwa index?");
- System.out.println("Will continue but anticipate failure at analyse stage.");
- System.out.println("");
- return;
- }
- }
-
- return;
- }
-}
diff --git a/src/nanook/BlastHandler.java b/src/nanook/BlastHandler.java
deleted file mode 100644
index 544f8c4..0000000
--- a/src/nanook/BlastHandler.java
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett (richard.leggett at earlham.ac.uk)
- *
- * Copyright 2015-17 Earlham Institute
- */
-
-package nanook;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-
-public class BlastHandler {
- private NanoOKOptions options = null;
- private int type;
- private int passfail;
- private int nSeqs = 0;
- private int fileCounter = 0;
- private ArrayList<String> mergeList = new ArrayList<String>();
-
- public BlastHandler(NanoOKOptions o, int t, int pf) {
- options = o;
- type = t;
- passfail = pf;
- if (options.getFileCounterOffset() > 0) {
- fileCounter = options.getFileCounterOffset();
- System.out.println("File offset "+fileCounter);
- }
- }
-
- private void writeMeganFile() {
- ArrayList<String> blastProcesses = options.getBlastProcesses();
- String meganDir = options.getSampleDirectory() + File.separator + "megan";
- File f = new File(meganDir);
-
- if (!f.exists()) {
- f.mkdir();
- }
-
- for (int i=0; i<blastProcesses.size(); i++) {
- String[] params = blastProcesses.get(i).split(",");
- if (params.length == 5) {
- String blastName = params[0];
- String blastTool = params[1];
- String blastDb = params[2];
- String memory = params[3];
- String queue = params[4];
- String cmdPathname = meganDir + File.separator + "all_" + NanoOKOptions.getTypeFromInt(type) +
- "_" + NanoOKOptions.getPassFailFromInt(passfail) + "_" + Integer.toString(fileCounter) + ".cmds";
- String meganPathname = meganDir + File.separator + "all_" + NanoOKOptions.getTypeFromInt(type) +
- "_" + NanoOKOptions.getPassFailFromInt(passfail) + "_" + Integer.toString(fileCounter) + ".rma";
- String slurmPathname = meganDir + File.separator + "all_" + NanoOKOptions.getTypeFromInt(type) +
- "_" + NanoOKOptions.getPassFailFromInt(passfail) + "_" + Integer.toString(fileCounter) + ".slurm.sh";
- String slurmLogname = meganDir + File.separator + "all_" + NanoOKOptions.getTypeFromInt(type) +
- "_" + NanoOKOptions.getPassFailFromInt(passfail) + "_" + Integer.toString(fileCounter) + ".slurm.log";
-
- try {
- options.getLog().println("Writing MEGAN command file " + cmdPathname);
- PrintWriter pw = new PrintWriter(new FileWriter(cmdPathname));
- pw.println("setprop MaxNumberCores=4;");
- String blastFileString="";
- String fastaFileString="";
-
- for (int fc=0; fc<=fileCounter; fc++) {
- String fileName = "all_" + NanoOKOptions.getTypeFromInt(type) + "_" + NanoOKOptions.getPassFailFromInt(passfail) + "_" + Integer.toString(fc);
- String fastaPathname = options.getReadDir() + "_chunks" + File.separator + fileName + (options.getReadFormat() == NanoOKOptions.FASTA ? ".fasta":".fastq");
- String blastPathname = options.getSampleDirectory() + File.separator +
- blastTool + "_" + blastName + File.separator +
- fileName + "_" + blastTool + "_" + blastName + ".txt";
- if (blastFileString != "") {
- blastFileString += ",";
- fastaFileString += ",";
- }
- fastaFileString = fastaFileString + "'" + fastaPathname + "'";
- blastFileString = blastFileString + "'" + blastPathname + "'";
- }
-
- pw.print("import blastFile="+blastFileString+" fastaFile="+fastaFileString +" meganFile="+meganPathname);
- pw.println(" maxMatches=100 maxExpected=0.001 minSupport=1 minComplexity=0;");
- pw.println("quit;");
- pw.close();
-
- pw = new PrintWriter(new FileWriter(slurmPathname));
- pw.print("slurmit -p TempProject4 -c 4 -o " + slurmLogname + " -m \"8G\" \"source MEGAN-5.11.3 ; ");
- pw.println("xvfb-run -d MEGAN -g -c " + cmdPathname + " -L /tgac/workarea/group-si/BAMBI_Pt1/megan_support/MEGAN5-academic-license.txt\"");
- pw.close();
- } catch (Exception e) {
- System.out.println("writeMeganFile exception");
- e.printStackTrace();
- }
- }
- }
- }
-
- private void runBlasts(String inputPathname) {
- String formatString = "'6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore stitle'";
- ArrayList<String> blastProcesses = options.getBlastProcesses();
- File iff = new File(inputPathname);
- String fileName = iff.getName();
- String filePrefix = fileName;
-
- if (filePrefix.contains(".")) {
- filePrefix = fileName.substring(0, fileName.lastIndexOf('.'));
- }
-
- for (int i=0; i<blastProcesses.size(); i++) {
- String[] params = blastProcesses.get(i).split(",");
- if (params.length == 5) {
- String blastName = params[0];
- String blastTool = params[1];
- String blastDb = params[2];
- String memory = params[3];
- String queue = params[4];
- String outputBlast = options.getSampleDirectory() + File.separator +
- blastTool + "_" + blastName + File.separator +
- filePrefix + "_" + blastTool + "_" + blastName + ".txt";
- String commandFile = options.getSampleDirectory() + File.separator +
- blastTool + "_" + blastName + File.separator +
- filePrefix + "_" + blastTool + "_" + blastName + ".sh";
- String logFile = options.getLogsDir() + File.separator +
- blastTool + "_" + blastName + File.separator +
- filePrefix + "_" + blastTool + "_" + blastName + ".log";
-
- options.getLog().println(" BLAST input: " + inputPathname);
- options.getLog().println(" BLAST output: " + outputBlast);
- options.getLog().println("BLAST command: " + commandFile);
- options.getLog().println(" BLAST log: " + logFile);
-
- try {
- options.getLog().println("Writing blast command file "+commandFile);
- PrintWriter pw = new PrintWriter(new FileWriter(commandFile));
- // TODO: -task option shouldn't be hardcoded
- pw.write(blastTool + " -db " + blastDb + " -query " + inputPathname + " -evalue 0.001 -show_gis -task blastn -out " + outputBlast + " -outfmt "+formatString);
- pw.close();
-
- options.getLog().println("Submitting blast command file to SLURM "+commandFile);
- ProcessLogger pl = new ProcessLogger();
- String[] commands = {"slurmit",
- "-o", logFile,
- "-p", queue,
- "-m", memory,
- "sh "+commandFile};
- pl.runCommandToLog(commands, options.getLog());
- } catch (IOException e) {
- System.out.println("runBlast exception");
- e.printStackTrace();
- }
- } else {
- System.out.println("Badly formatted BLAST process: "+blastProcesses.get(i));
- }
- }
- }
-
- private String mergeInputFiles() {
- String mergedPathname = options.getReadDir() +
- "_chunks" + File.separator +
- "all_" + NanoOKOptions.getTypeFromInt(type) + "_" + NanoOKOptions.getPassFailFromInt(passfail) + "_" +
- Integer.toString(fileCounter) +
- (options.getReadFormat() == NanoOKOptions.FASTA ? ".fasta":".fastq");
-
- options.getLog().println("Writing merged file "+mergedPathname);
-
- try {
- PrintWriter pw = new PrintWriter(new FileWriter(mergedPathname));
-
- for (int i=0; i<mergeList.size(); i++) {
- BufferedReader br = new BufferedReader(new FileReader(mergeList.get(i)));
- String line;
- while ((line = br.readLine()) != null) {
- pw.println(line);
- }
- br.close();
- }
- pw.close();
- } catch (IOException e) {
- System.out.println("mergeFiles exception");
- e.printStackTrace();
- }
- return mergedPathname;
- }
-
- public synchronized void addRead(String readFilename) {
- mergeList.add(readFilename);
- nSeqs++;
- if (nSeqs == options.getReadsPerBlast()) {
- options.getLog().println("Merging files (nSeqs = "+nSeqs+")");
- String mergedPathname = mergeInputFiles();
- runBlasts(mergedPathname);
- writeMeganFile();
-
- //options.getThreadExecutor().execute(new FastAQMerger(options, mergedFilename, mergeList, fileCounter));
- mergeList = new ArrayList();
- fileCounter++;
- nSeqs = 0;
- }
- }
-}
diff --git a/src/nanook/BlastMerger.java b/src/nanook/BlastMerger.java
deleted file mode 100644
index 6443ba0..0000000
--- a/src/nanook/BlastMerger.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * To change this license header, choose License Headers in Project Properties.
- * To change this template file, choose Tools | Templates
- * and open the template in the editor.
- */
-package nanook;
-
-import java.io.BufferedReader;
-import java.io.FileNotFoundException;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.text.DateFormat;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-
-public class BlastMerger {
- private transient PrintWriter pw = null;
- private String filename = null;
-
- public BlastMerger(NanoOKOptions options) {
- }
-
- public synchronized void open(String f, boolean clearLogs) {
- if (clearLogs) {
- filename = f + ".blast.txt";
- } else {
- DateFormat df = new SimpleDateFormat("ddMMyy_HHmmss");
- Date dateobj = new Date();
- filename = f + "_" + df.format(dateobj).toString()+".blast.txt";
- }
- System.out.println("Opening "+filename);
-
- try {
- pw = new PrintWriter(new FileWriter(filename, true));
- } catch (IOException e) {
- System.out.println("NanoOKLog exception");
- e.printStackTrace();
- }
- }
-
- public synchronized void mergeFile(String fileToMerge) {
- try {
- String line;
- BufferedReader br = new BufferedReader(new FileReader(fileToMerge));
- while ((line = br.readLine()) != null) {
- if (!line.startsWith("#")) {
- pw.println(line);
- }
- }
- pw.flush();
- br.close();
- } catch (Exception e) {
- System.out.println("BlastMerger exception");
- e.printStackTrace();
- }
- }
-
- public synchronized void close() {
- if (pw != null) {
- pw.close();
- }
- }
-
- public synchronized void print(String s) {
- if (pw != null) {
- pw.print(s);
- pw.flush();
- }
- }
-
- public synchronized void println(String s) {
- if (pw != null) {
- pw.println(s);
- pw.flush();
- }
- }
-
- public synchronized PrintWriter getPrintWriter() {
- return pw;
- }
-}
diff --git a/src/nanook/CIGARString.java b/src/nanook/CIGARString.java
deleted file mode 100644
index 733baa7..0000000
--- a/src/nanook/CIGARString.java
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.util.ArrayList;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * Represent and parse a CIGAR string
- *
- * @author Richard Leggett
- */
-public class CIGARString {
- private StringBuilder queryString = new StringBuilder("");
- private StringBuilder hitString = new StringBuilder("");
- private String hitFilename;
- private String cigarString;
- private String querySeq;
- private String queryFilename;
- private String queryID;
- private ReferenceSequence hitReference;
- private int queryStart;
- private int hitStart;
- private int queryAlnSize;
- private int hitAlnSize;
-
- /**
- * Constructor
- * @param cs
- * @param qseq
- * @param qf
- * @param hs hit start position (0-based)
- * @param hf
- * @param hr
- */
- public CIGARString(String cs, String qseq, String qf, String qi, int hs, String hf, ReferenceSequence hr) {
- cigarString = cs;
- querySeq = qseq;
- queryFilename = qf;
- queryID = qi;
- hitStart = hs;
- hitFilename = hf;
- hitReference = hr;
- queryStart = 0;
-
- //trimCIGAR(cs, qseq);
- }
-
- /**
- * Attempt at handling marginAlign CIGAR strings
- * Needs work!
- * @param cs
- * @param qseq
- * @return
- */
- private String trimCIGAR(String cs, String qseq) {
- //System.out.println("Old cigar: "+cs);
-
- boolean foundStart = false;
- int trimQueryStart = 0;
- int trimCigarStart = 0;
- int trimCigarEnd = 0;
- int trimQueryEnd = 0;
- Pattern outPattern = Pattern.compile("(\\d+)\\S");
- Matcher outMatcher = outPattern.matcher(cs);
- ArrayList<String> tags = new ArrayList();
- while (outMatcher.find()) {
- tags.add(outMatcher.group(0));
- }
-
-
- for (int i=0; i<tags.size(); i++) {
- String tag = tags.get(i);
- int n = Integer.parseInt(tag.substring(0, tag.length()-1));
- String c = tag.substring(tag.length()-1);
-
- if (c.equals("I")) {
- queryStart += n;
- trimQueryStart += n;
- trimCigarStart += tag.length();
- } else if (c.equals("D")) {
- hitStart += n;
- trimCigarStart += tag.length();
- } else {
- break;
- }
- }
-
- for (int i=tags.size()-1; i>0; i--) {
- String tag = tags.get(i);
- int n = Integer.parseInt(tag.substring(0, tag.length()-1));
- String c = tag.substring(tag.length()-1);
-
- if (c.equals("I")) {
- trimQueryEnd += n;
- trimCigarEnd += tag.length();
- } else if (c.equals("D")) {
- trimCigarEnd += tag.length();
- } else {
- break;
- }
- }
-
- cigarString = cs.substring(trimCigarStart, cs.length()-trimCigarEnd);
- querySeq = qseq.substring(trimQueryStart, qseq.length()-trimQueryEnd);
-
- //System.out.println("New cigar: "+cigarString);
- return cigarString;
- }
-
- public boolean processString() {
- String value = "";
- SequenceReader r = new SequenceReader(true);
- r.indexFASTAFile(hitFilename, null, true);
- int l = 3*querySeq.length();
- String hitSeq = r.getSubSequence(hitReference.getId(), hitStart, hitStart+l);
- int hitPtr = 0;
- int queryPtr = 0;
- boolean displayResult = false;
- boolean donePreClipping = false;
- int tagCtr = 0;
- int i = 0;
- boolean continueParsing = true;
- int totalCount = 0;
- int delCount = 0;
- int insCount = 0;
- int matchCount = 0;
- boolean processed = true;
-
- //System.out.println("Query filename: "+queryFilename);
- //System.out.println("CIGAR: "+cigarString);
- //System.out.println(" Hit: "+hitSeq.length()+" "+hitSeq);
- //System.out.println("Query: "+querySeq.length()+" "+querySeq);
-
- hitAlnSize = 0;
- queryAlnSize = 0;
- hitAlnSize = 0;
- while ((i<cigarString.length()) && (continueParsing)) {
- //for (int i=0; i<cigarString.length(); i++) {
- //System.out.println("hitPtr="+hitPtr+" queryPtr="+queryPtr);
- //System.out.println("Query: " + queryString.toString());
- //System.out.println(" Hit: " + hitString.toString());
- char c = cigarString.charAt(i);
-
- if (Character.isDigit(c)) {
- value = value + c;
- } else {
- int n = Integer.parseInt(value);
- totalCount += n;
- //System.out.println(n + " " + c);
- switch(c) {
- case 'M':
- case '=':
- case 'X':
- //System.out.println(hitString.length() + " " + hitPtr);
- //System.out.println("Hit up: " + hitSeq.substring(hitPtr));
- queryString.append(querySeq.substring(queryPtr, queryPtr + n));
- hitString.append(hitSeq.substring(hitPtr, hitPtr + n));
- queryPtr += n;
- hitPtr += n;
- queryAlnSize += n;
- hitAlnSize += n;
- donePreClipping = true;
- matchCount+=n;
- break;
- case 'I':
- if (n > 100) {
- // DEBUG MODE TURNS OFF THIS
- System.out.println("");
- System.out.println("Error: large I ("+n+") - read "+queryID+" ignored");
- processed = false;
- continueParsing = false;
- } else {
- queryString.append(querySeq.substring(queryPtr, queryPtr + n));
- for (int j=0; j<n; j++) {
- hitString.append('-');
- }
- queryPtr += n;
- queryAlnSize += n;
- }
- donePreClipping = true;
- insCount+=n;
- break;
- case 'D':
- if (n > 100) {
- System.out.println("Error: large D ("+n+") - read "+queryID+" ignored");
- processed = false;
- continueParsing = false;
- } else {
- hitString.append(hitSeq.substring(hitPtr, hitPtr + n));
- for (int j=0; j<n; j++) {
- queryString.append('-');
- }
- hitPtr += n;
- hitAlnSize += n;
- }
- donePreClipping = true;
- delCount+=n;
- break;
- case 'N':
- System.out.println("Warning: encountered N in CIGAR format!");
- System.out.println("");
- displayResult = true;
- hitString.append(hitSeq.substring(hitPtr, hitPtr + n));
- for (int j=0; j<n; j++) {
- queryString.append('-');
- }
- queryPtr += n;
- hitPtr += n;
- donePreClipping = true;
- break;
- case 'S':
- //System.out.println("Warnning: encountered S in CIGAR format!");
- queryPtr += n;
- if (!donePreClipping) {
- queryStart += n;
- }
- displayResult = true;
- break;
- case 'H':
- //System.out.println("Warning: encountered H in CIGAR format!");
- if (!donePreClipping) {
- queryStart += n;
- } else {
- //System.out.println("Warning: hard clipping at end");
- }
- displayResult = true;
- break;
- case 'P':
- System.out.println("Warning: encountered P in CIGAR format!");
- System.out.println("");
- displayResult = true;
- donePreClipping = true;
- break;
- default:
- System.out.println("Unrecognised character in CIGAR string: "+c);
- processed = false;
- break;
- }
- value="";
- tagCtr++;
- //System.out.println("qseq="+querySeq.length()+" matchCount="+matchCount+" insCount="+insCount+" delCount="+delCount+" totalCount="+totalCount);
- //System.out.println("Query: "+queryString.toString());
- //System.out.println(" Hit: "+hitString.toString());
- }
-
- i++;
- //System.out.println("i="+i+" and length="+cigarString.length());
- }
-
- //if (displayResult) {
- //System.out.println(queryFilename);
- //System.out.println("Query: " + queryString.toString());
- //System.out.println(" Hit: " + hitString.toString());
- //System.exit(1);
- //}
- return processed;
- }
-
- public int getQueryStart() {
- return queryStart;
- }
-
- public int getQueryAlnSize() {
- return queryAlnSize;
- }
-
- public int getHitAlnSize() {
- return hitAlnSize;
- }
-
- public String getQueryString() {
- return queryString.toString();
- }
-
- public String getHitString() {
- return hitString.toString();
- }
-}
diff --git a/src/nanook/ComparisonReportWriter.java b/src/nanook/ComparisonReportWriter.java
deleted file mode 100644
index fc8a84d..0000000
--- a/src/nanook/ComparisonReportWriter.java
+++ /dev/null
@@ -1,196 +0,0 @@
-package nanook;
-
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-
-/**
- *
- * @author Richard Leggett
- */
-public class ComparisonReportWriter {
- private NanoOKOptions options;
- private PrintWriter pw = null;
- private SampleComparer sampleComparer = null;
-
- public ComparisonReportWriter(NanoOKOptions o, SampleComparer sc) {
- options = o;
- sampleComparer = sc;
- }
-
- /**
- * Check if graphic file exists and only insert if it does
- * @param preTex LaTeX before filename
- * @param filename the file
- * @param postTex LaTeX after filename
- */
- private void includeGraphicsIfExists(int type, String preTex, String filename, String postTex) {
- if (options.isProcessingReadType(type)) {
- String fullFilename = filename + "." + options.getImageFormat();
- File f = new File(fullFilename);
-
- if (f.exists()) {
- pw.print(preTex);
- pw.print(fullFilename);
- pw.println(postTex);
- } else {
- pw.print(" ");
- }
- }
- }
-
- /**
- * Open the .tex file.
- */
- public void open() {
- try {
- pw = new PrintWriter(new FileWriter(options.getLatexDir() + File.separator + "comparison.tex"));
- writeLaTeXHeader();
- } catch (IOException e) {
- System.out.println("ReportWriter exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Write end of LaTeX file.
- */
- private void writeLaTeXFooter() {
- pw.println("\\end{document}");
- }
-
- /**
- * Close the .tex file.
- */
- public void close() {
- writeLaTeXFooter();
- pw.close();
- }
-
- /**
- * Write the top of the LaTeX document.
- */
- private void writeLaTeXHeader() {
- pw.println("\\documentclass[a4paper,11pt,oneside]{article}");
- pw.println("\\usepackage{graphicx}");
- pw.println("\\usepackage{url}");
- pw.println("\\usepackage{multirow}");
- pw.println("\\usepackage{rotating}");
- pw.println("\\usepackage{color}");
- pw.println("\\usepackage[compact]{titlesec}");
- pw.println("\\usepackage[portrait,top=1cm, bottom=2cm, left=1cm, right=1cm]{geometry}");
- pw.println("\\usepackage{float}");
- pw.println("\\restylefloat{table}");
- pw.println("\\begin{document}");
- pw.println("\\renewcommand*{\\familydefault}{\\sfdefault}");
- pw.println("\\section*{\\large{NanoOK comparison report}}");
- }
-
- private void writeLengthSection() {
- String graphSize = "height=5.2cm";
- int type = options.getSpecifiedType();
-
- pw.println("\\subsection*{Read lengths}");
- includeGraphicsIfExists(type, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + NanoOKOptions.getTypeFromInt(type)+"_lengths", "} \\\\");
-
- pw.println("\\vspace{-3mm}");
- pw.println("\\subsection*{Number of reads}");
- includeGraphicsIfExists(type, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + NanoOKOptions.getTypeFromInt(type)+"_number_of_reads", "} \\\\");
-
- pw.println("\\vspace{-3mm}");
- pw.println("\\subsection*{Total bases}");
- includeGraphicsIfExists(type, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + NanoOKOptions.getTypeFromInt(type)+"_total_bases", "} \\\\");
-
- pw.println("\\vspace{-3mm}");
- pw.println("\\subsection*{Alignment summary}");
- includeGraphicsIfExists(type, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + NanoOKOptions.getTypeFromInt(type)+"_maps", "} \\\\");
- }
-
- public void writeReferenceSection(ReferenceSequence refSeq) {
- String id = refSeq.getName().replaceAll("_", " ");
- String graphSize = "height=6cm";
- int type = options.getSpecifiedType();
-
- pw.println("\\clearpage");
- pw.println("\\subsection*{" + id + " identity}");
- includeGraphicsIfExists(type, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + "_" + NanoOKOptions.getTypeFromInt(type)+"_query_identity", "} \\\\");
- includeGraphicsIfExists(type, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + "_" + NanoOKOptions.getTypeFromInt(type)+"_query_identity_zoom", "} \\\\");
- includeGraphicsIfExists(type, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + "_" + NanoOKOptions.getTypeFromInt(type)+"_percent_query_aligned", "} \\\\");
- includeGraphicsIfExists(type, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + "_" + NanoOKOptions.getTypeFromInt(type)+"_percent_query_aligned_zoom", "} \\\\");
-
- pw.println("\\subsection*{" + id + " best perfect kmer}");
- includeGraphicsIfExists(type, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + "_" + NanoOKOptions.getTypeFromInt(type)+"_best_perfect_kmer", "} \\\\");
-
- pw.println("\\subsection*{" + id + " GC}");
- includeGraphicsIfExists(type, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + "_" + NanoOKOptions.getTypeFromInt(type)+"_query_gc", "} \\\\");
-
- for (int ou=0; ou<2; ou++) {
- if (ou == 0) {
- pw.println("\\subsection*{" + id + " " + NanoOKOptions.getTypeFromInt(type) + " Over-represented 5-mers}");
- } else {
- pw.println("\\subsection*{" + id + " " + NanoOKOptions.getTypeFromInt(type) + " Under-represented 5-mers}");
- }
-
- pw.println("\\vspace{-3mm}");
- pw.println("\\begin{table}[H]");
- pw.println("{\\footnotesize");
- pw.println("\\fontsize{8pt}{10pt}\\selectfont");
- pw.println("\\tabcolsep=0.15cm");
- pw.println("\\begin{tabular}{|c|c c c c c c c c c c|}");
- pw.println("\\cline{1-11}");
- pw.println("Sample & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 \\\\");
- pw.println("\\cline{1-11}");
- for (int i=0; i<sampleComparer.getNumberOfSamples(); i++) {
- OverallStats os = sampleComparer.getSample(i);
- ReferenceSequence rs = os.getStatsByType(type).getOptions().getReferences().getReferenceById(refSeq.getId());
- rs.getStatsByType(type).sortKmerAbundance();
- ArrayList<KmerAbundance> ka = rs.getStatsByType(type).getKmerAbundance();
- pw.print(sampleComparer.getSampleName(i).replaceAll("_", "\\\\_"));
- for (int j=0; j<10; j++) {
- KmerAbundance ko;
-
- if (ou == 0) {
- ko = ka.get(j);
- } else {
- ko = ka.get(ka.size() - 1 - j);
- }
- pw.print(" & "+ko.getKmer());
- }
- pw.println(" \\\\");
- }
- pw.println("\\cline{1-11}");
- pw.println("\\end{tabular}");
- pw.println("}");
- pw.println("\\end{table}");
- }
- }
-
- private void writeReferenceSection() {
- ArrayList<ReferenceSequence> sortedRefs = options.getReferences().getSortedReferences();
- for (int i=0; i<sortedRefs.size(); i++) {
- ReferenceSequence rs = sortedRefs.get(i);
-
- if ((options.debugMode() && (!rs.getName().equalsIgnoreCase("DNA_CS")))) {
- writeReferenceSection(rs);
- }
- }
- }
-
- public void writeReport() {
- open();
- writeLengthSection();
- writeReferenceSection();
- close();
- }
-
- public void makePDF() {
- ProcessLogger pl = new ProcessLogger();
- String command = "pdflatex -interaction=nonstopmode -output-directory " + options.getLatexDir() + " " + options.getLatexDir() + File.separator + "comparison.tex";
- String logFilename = options.getLogsDir() + File.separator + "pdflatex_output_log_comparison.txt";
- System.out.println("pdflatex output " + logFilename);
- pl.runAndLogCommand(command, logFilename, false);
- }
-}
diff --git a/src/nanook/DirectoryWatcher.java b/src/nanook/DirectoryWatcher.java
deleted file mode 100644
index 8f2d1b4..0000000
--- a/src/nanook/DirectoryWatcher.java
+++ /dev/null
@@ -1,184 +0,0 @@
-package nanook;
-
-import java.io.*;
-import java.util.*;
-import java.io.File;
-import java.nio.file.*;
-import static java.nio.file.StandardWatchEventKinds.*;
-import static java.nio.file.LinkOption.*;
-import java.nio.file.attribute.*;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.concurrent.*;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import org.apache.commons.io.monitor.FileAlterationListener;
-import org.apache.commons.io.monitor.FileAlterationListenerAdaptor;
-import org.apache.commons.io.monitor.FileAlterationMonitor;
-import org.apache.commons.io.monitor.FileAlterationObserver;
-
-
-public class DirectoryWatcher implements FileAlterationListener {
- private WatchService watcher = null;
- private final Map<WatchKey,Path> keys;
- private NanoOKOptions options;
- private ReadAligner aligner;
- private AlignmentFileParser parser;
- private boolean keepWatching = true;
-
- public DirectoryWatcher(NanoOKOptions o, ReadAligner a, AlignmentFileParser p) {
- options = o;
- aligner = a;
- parser = p;
-
- keys = new HashMap<WatchKey,Path>();
-
-
- }
-
- public void onStop(FileAlterationObserver observer) {};
- public void onStart(FileAlterationObserver observer) {};
- public void onFileDelete(File file) {};
- public void onFileChange(File file) {};
- public void onDirectoryDelete(File directory) {};
- public void onDirectoryCreate(File directory) {};
- public void onDirectoryChange(File directory) {};
-
- public void onFileCreate(File file) {
- Path child = file.toPath();
-
- if (file.getName().toString().equals("stop")) {
- keepWatching = false;
- System.out.println("Stopping...");
- } else if (file.getName().toString().endsWith(".fast5")) {
- // print out event
- System.out.println("Got new file " + file.getName());
- String pf = child.getName(child.getNameCount() - 2).toString();
- String fastaqDir = child.getParent().getParent().getParent().toString() + File.separator + "fasta" + File.separator + pf;
- String alignDir = options.getAlignerDir() + File.separator + pf;
- String logDir = options.getLogsDir() + File.separator + options.getAligner() + File.separator + pf;
-
- options.getThreadExecutor().execute(new WatcherRunnable(options, child.getParent().toString(), child.getFileName().toString(), pf, fastaqDir, alignDir, parser));
- }
-
- }
-
- private void checkAndMakeDirectory(String dir) {
- File f = new File(dir);
- if (f.exists()) {
- if (!f.isDirectory()) {
- System.out.println("Error: " + dir + " is a file, not a directory!");
- System.exit(1);
- }
- } else {
- System.out.println("Making directory " + dir);
- f.mkdir();
- }
- }
-
- private void makeDirs(String pf) {
- checkAndMakeDirectory(options.getReadDir() + File.separator + pf);
- checkAndMakeDirectory(options.getLogsDir() + File.separator + "blastn_card" + File.separator + pf);
- checkAndMakeDirectory(options.getLogsDir() + File.separator + "blastn_nt" + File.separator + pf);
- checkAndMakeDirectory(options.getSampleDirectory() + File.separator + "blastn_card" + File.separator + pf);
- checkAndMakeDirectory(options.getSampleDirectory() + File.separator + "blastn_nt" + File.separator + pf);
-
- // Make output Template, Complement and 2D directories
- for (int t=0; t<3; t++) {
- if (options.isProcessingReadType(t)) {
- checkAndMakeDirectory(options.getReadDir() + File.separator + pf + File.separator + NanoOKOptions.getTypeFromInt(t));
- checkAndMakeDirectory(options.getSampleDirectory() + File.separator + "blastn_card" + File.separator + pf + File.separator + NanoOKOptions.getTypeFromInt(t));
- checkAndMakeDirectory(options.getSampleDirectory() + File.separator + "blastn_nt" + File.separator + pf + File.separator + NanoOKOptions.getTypeFromInt(t));
- checkAndMakeDirectory(options.getLogsDir() + File.separator + "blastn_card" + File.separator + pf + File.separator + NanoOKOptions.getTypeFromInt(t));
- checkAndMakeDirectory(options.getLogsDir() + File.separator + "blastn_nt" + File.separator + pf + File.separator + NanoOKOptions.getTypeFromInt(t));
- }
- }
- }
-
- public void watch() {
- checkAndMakeDirectory(options.getFast5Dir());
- checkAndMakeDirectory(options.getFast5Dir() + File.separator + "pass");
- checkAndMakeDirectory(options.getFast5Dir() + File.separator + "fail");
- checkAndMakeDirectory(options.getLogsDir());
- checkAndMakeDirectory(options.getReadDir());
- checkAndMakeDirectory(options.getLogsDir() + File.separator + "blastn_card");
- checkAndMakeDirectory(options.getLogsDir() + File.separator + "blastn_bacteria");
- checkAndMakeDirectory(options.getLogsDir() + File.separator + "blastn_nt");
- checkAndMakeDirectory(options.getSampleDirectory() + File.separator + "blastn_card");
- checkAndMakeDirectory(options.getSampleDirectory() + File.separator + "blastn_bacteria");
- checkAndMakeDirectory(options.getSampleDirectory() + File.separator + "blastn_nt");
-
- System.out.println("Opening logs");
- options.getWatcherReadLog().open(options.getLogsDir() + File.separator + "watcher_reads", options.clearLogsOnStart());
- options.getWatcherCardFileLog().open(options.getLogsDir() + File.separator + "watcher_CARD_files", options.clearLogsOnStart());
- options.getWatcherCardCommandLog().open(options.getLogsDir() + File.separator + "watcher_CARD_commands", options.clearLogsOnStart());
- options.getWatcherntFileLog().open(options.getLogsDir() + File.separator + "watcher_nt_files", options.clearLogsOnStart());
- options.getWatcherntCommandLog().open(options.getLogsDir() + File.separator + "watcher_nt_commands", options.clearLogsOnStart());
-
- options.getMergerCardPass().open(options.getSampleDirectory() + File.separator + "blastn_card" + File.separator + "all_pass_blastn_card", options.clearLogsOnStart());
- options.getMergerCardFail().open(options.getSampleDirectory() + File.separator + "blastn_card" + File.separator + "all_fail_blastn_card", options.clearLogsOnStart());
- //options.getMergerntPass().open(options.getSampleDirectory() + File.separator + "blastn_nt" + File.separator + "all_pass_blastn_nt", options.clearLogsOnStart());
- //options.getMergerntFail().open(options.getSampleDirectory() + File.separator + "blastn_nt" + File.separator + "all_fail_blastn_nt", options.clearLogsOnStart());
-
- System.out.println("Watching for new files...");
- try {
- FileAlterationMonitor monitor = new FileAlterationMonitor(500);
-
- watcher = FileSystems.getDefault().newWatchService();
-
- if (options.isProcessingPassReads()) {
- String dirName = options.getFast5Dir() + File.separator + "pass";
- Path passDir = Paths.get(dirName);
-
- options.openMergedFile(options.getReadDir() + File.separator + options.getSample()+"_pass_1d", NanoOKOptions.TYPE_TEMPLATE, NanoOKOptions.READTYPE_PASS);
- options.openMergedFile(options.getReadDir() + File.separator + options.getSample()+"_pass_2d", NanoOKOptions.TYPE_2D, NanoOKOptions.READTYPE_PASS);
-
- FileAlterationObserver observer = new FileAlterationObserver(dirName);
- observer.addListener(this);
- monitor.addObserver(observer);
-
- System.out.println("Watching "+dirName);
- makeDirs("pass");
- }
-
- if (options.isProcessingFailReads()) {
- String dirName = options.getFast5Dir() + File.separator + "fail";
- Path failDir = Paths.get(dirName);
-
- options.openMergedFile(options.getReadDir() + File.separator + options.getSample()+"_fail_1d", NanoOKOptions.TYPE_TEMPLATE, NanoOKOptions.READTYPE_FAIL);
- options.openMergedFile(options.getReadDir() + File.separator + options.getSample()+"_fail_2d", NanoOKOptions.TYPE_2D, NanoOKOptions.READTYPE_FAIL);
-
- FileAlterationObserver observer = new FileAlterationObserver(dirName);
- observer.addListener(this);
- monitor.addObserver(observer);
-
- System.out.println("Watching "+dirName);
- makeDirs("fail");
- }
-
- System.out.println("Waiting...\n");
- monitor.start();
- while (keepWatching) {
- Thread.sleep(1000);
- }
- monitor.stop();
- } catch (Exception e) {
- System.out.println("ReadExtractor exception:");
- e.printStackTrace();
- System.exit(1);
- }
-
- System.out.println("Closing logs");
- options.getWatcherReadLog().close();
- options.getWatcherCardFileLog().close();
- options.getWatcherCardCommandLog().close();
- options.getWatcherntFileLog().close();
- options.getWatcherntCommandLog().close();
- options.getMergerCardPass().close();
- options.getMergerntPass().close();
- options.getMergerCardFail().close();
- options.getMergerntFail().close();
- }
-}
diff --git a/src/nanook/DirectoryWatcherNative.java b/src/nanook/DirectoryWatcherNative.java
deleted file mode 100644
index 7dc10b7..0000000
--- a/src/nanook/DirectoryWatcherNative.java
+++ /dev/null
@@ -1,219 +0,0 @@
-package nanook;
-
-import java.io.*;
-import java.util.*;
-import java.io.File;
-import java.nio.file.*;
-import static java.nio.file.StandardWatchEventKinds.*;
-import static java.nio.file.LinkOption.*;
-import java.nio.file.attribute.*;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.concurrent.*;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import org.apache.commons.io.monitor.FileAlterationListener;
-import org.apache.commons.io.monitor.FileAlterationListenerAdaptor;
-import org.apache.commons.io.monitor.FileAlterationMonitor;
-import org.apache.commons.io.monitor.FileAlterationObserver;
-
-
-public class DirectoryWatcherNative implements FileAlterationListener {
- private WatchService watcher = null;
- private final Map<WatchKey,Path> keys;
- private NanoOKOptions options;
- private ReadAligner aligner;
- private AlignmentFileParser parser;
- private boolean keepWatching = true;
-
- public DirectoryWatcherNative(NanoOKOptions o, ReadAligner a, AlignmentFileParser p) {
- options = o;
- aligner = a;
- parser = p;
-
- keys = new HashMap<WatchKey,Path>();
- }
-
- @SuppressWarnings("unchecked")
- static <T> WatchEvent<T> cast(WatchEvent<?> event) {
- return (WatchEvent<T>)event;
- }
-
- public void onFileCreate(File file) {
- Path child = file.toPath();
- System.out.println("Created: "+file.getName());
-
- if (file.getName().toString().equals("stop")) {
- keepWatching = false;
- System.out.println("Stopping...");
- } else if (file.getName().toString().endsWith(".fast5")) {
- // print out event
- System.out.println("Got new file " + file.getName());
- String pf = child.getName(child.getNameCount() - 2).toString();
- String fastaqDir = child.getParent().getParent().getParent().toString() + File.separator + "fasta" + File.separator + pf;
- String alignDir = options.getAlignerDir() + File.separator + pf;
- String logDir = options.getLogsDir() + File.separator + options.getAligner() + File.separator + pf;
-
- //executor.execute(new WatcherRunnable(options, child.getParent().toString(), child.getFileName().toString(), fastaqDir, alignDir, parser));
- }
-
- }
-
- /**
- * Process all events for keys queued to the watcher
- */
- void processEvents() {
- System.out.println("Waiting...\n");
- while (keepWatching) {
-
- // wait for key to be signalled
- WatchKey key;
- try {
- key = watcher.take();
- } catch (InterruptedException x) {
- return;
- }
-
- Path dir = keys.get(key);
- if (dir == null) {
- System.err.println("WatchKey not recognized!!");
- continue;
- }
-
- for (WatchEvent<?> event: key.pollEvents()) {
- WatchEvent.Kind kind = event.kind();
-
- // Context for directory entry event is the file name of entry
- WatchEvent<Path> ev = cast(event);
- Path name = ev.context();
- Path child = dir.resolve(name);
-
- System.out.println("File "+child.getFileName().toString());
- if (child.getFileName().toString().equals("stop")) {
- keepWatching = false;
- System.out.println("Stopping...");
- } else if (child.getFileName().toString().endsWith(".fast5")) {
- // print out event
- System.out.println("Got new file " + child);
- String pf = child.getName(child.getNameCount() - 2).toString();
- String fastaqDir = child.getParent().getParent().getParent().toString() + File.separator + "fasta" + File.separator + pf;
- String alignDir = options.getAlignerDir() + File.separator + pf;
- String logDir = options.getLogsDir() + File.separator + options.getAligner() + File.separator + pf;
-
- //executor.execute(new WatcherRunnable(options, child.getParent().toString(), child.getFileName().toString(), fastaqDir, alignDir, parser));
- }
- }
-
- // reset key and remove from set if directory no longer accessible
- boolean valid = key.reset();
- if (!valid) {
- keys.remove(key);
-
- // all directories are inaccessible
- if (keys.isEmpty()) {
- break;
- }
- }
- }
- }
-
- private void checkAndMakeDirectory(String dir) {
- File f = new File(dir);
- if (f.exists()) {
- if (!f.isDirectory()) {
- System.out.println("Error: " + dir + " is a file, not a directory!");
- System.exit(1);
- }
- } else {
- System.out.println("Making directory " + dir);
- f.mkdir();
- }
- }
-
- private void makeDirs(String pf) {
- checkAndMakeDirectory(options.getReadDir());
- checkAndMakeDirectory(options.getReadDir() + File.separator + pf);
- checkAndMakeDirectory(options.getLogsDir());
- checkAndMakeDirectory(options.getLogsDir() + File.separator + "blastn_card");
- checkAndMakeDirectory(options.getLogsDir() + File.separator + "blastn_nt");
- checkAndMakeDirectory(options.getLogsDir() + File.separator + "blastn_card" + File.separator + pf);
- checkAndMakeDirectory(options.getLogsDir() + File.separator + "blastn_nt" + File.separator + pf);
- checkAndMakeDirectory(options.getSampleDirectory() + File.separator + "blastn_card");
- checkAndMakeDirectory(options.getSampleDirectory() + File.separator + "blastn_card" + File.separator + pf);
- checkAndMakeDirectory(options.getSampleDirectory() + File.separator + "blastn_nt");
- checkAndMakeDirectory(options.getSampleDirectory() + File.separator + "blastn_nt" + File.separator + pf);
-
- // Make output Template, Complement and 2D directories
- for (int t=0; t<3; t++) {
- if (options.isProcessingReadType(t)) {
- checkAndMakeDirectory(options.getReadDir() + File.separator + pf + File.separator + NanoOKOptions.getTypeFromInt(t));
- checkAndMakeDirectory(options.getSampleDirectory() + File.separator + "blastn_card" + File.separator + pf + File.separator + NanoOKOptions.getTypeFromInt(t));
- checkAndMakeDirectory(options.getSampleDirectory() + File.separator + "blastn_nt" + File.separator + pf + File.separator + NanoOKOptions.getTypeFromInt(t));
- checkAndMakeDirectory(options.getLogsDir() + File.separator + "blastn_card" + File.separator + pf + File.separator + NanoOKOptions.getTypeFromInt(t));
- checkAndMakeDirectory(options.getLogsDir() + File.separator + "blastn_nt" + File.separator + pf + File.separator + NanoOKOptions.getTypeFromInt(t));
- }
- }
- }
-
- public void watch() {
- System.out.println("Opening logs");
- options.getWatcherReadLog().open(options.getLogsDir() + File.separator + "watcher_reads", options.clearLogsOnStart());
- options.getWatcherCardFileLog().open(options.getLogsDir() + File.separator + "watcher_CARD_files", options.clearLogsOnStart());
- options.getWatcherCardCommandLog().open(options.getLogsDir() + File.separator + "watcher_CARD_commands", options.clearLogsOnStart());
- options.getWatcherntFileLog().open(options.getLogsDir() + File.separator + "watcher_nt_files", options.clearLogsOnStart());
- options.getWatcherntCommandLog().open(options.getLogsDir() + File.separator + "watcher_nt_commands", options.clearLogsOnStart());
-
- System.out.println("Watching for new files...");
- try {
- watcher = FileSystems.getDefault().newWatchService();
-
- if (options.isProcessingPassReads()) {
- String dirName = options.getFast5Dir() + File.separator + "pass";
- Path passDir = Paths.get(dirName);
-
- FileAlterationObserver observer = new FileAlterationObserver(dirName);
- FileAlterationMonitor monitor = new FileAlterationMonitor(500);
- observer.addListener(this);
- monitor.addObserver(observer);
- monitor.start();
-
- System.out.println("Watching "+dirName);
- makeDirs("pass");
- //WatchKey passKey = passDir.register(watcher, ENTRY_CREATE);
- //keys.put(passKey, passDir);
- }
-
- if (options.isProcessingFailReads()) {
- String dirName = options.getFast5Dir() + File.separator + "fail";
- Path failDir = Paths.get(dirName);
- System.out.println("Watching "+dirName);
- makeDirs("fail");
- //WatchKey failKey = failDir.register(watcher, ENTRY_CREATE);
- //keys.put(failKey, failDir);
- }
-
- this.processEvents();
- } catch (Exception e) {
- System.out.println("ReadExtractor exception:");
- e.printStackTrace();
- System.exit(1);
- }
-
- System.out.println("Closing logs");
- options.getWatcherReadLog().close();
- options.getWatcherCardFileLog().close();
- options.getWatcherCardCommandLog().close();
- options.getWatcherntFileLog().close();
- options.getWatcherntCommandLog().close();
- }
-
- public void onStop(FileAlterationObserver observer) {};
- public void onStart(FileAlterationObserver observer) {};
- public void onFileDelete(File file) {};
- public void onFileChange(File file) {};
- public void onDirectoryDelete(File directory) {};
- public void onDirectoryCreate(File directory) {};
- public void onDirectoryChange(File directory) {};
-}
diff --git a/src/nanook/Fast5File.java b/src/nanook/Fast5File.java
deleted file mode 100644
index 209e699..0000000
--- a/src/nanook/Fast5File.java
+++ /dev/null
@@ -1,492 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.File;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.Hashtable;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * Representation of a FAST5 file
- * @author leggettr
- */
-public class Fast5File {
- private NanoOKOptions options;
- private String filename = null;
- private HashSet<String> groups = new HashSet();
- private HashSet<String> datasets = new HashSet();
- private NanoOKLog log;
- private boolean oldFormat = false;
- private boolean isCorrupt = false;
- private int highestBasecall1D = -1;
- private int highestBasecall2D = -1;
- private double meanQScore = 0;
-
- /**
- * Constructor
- * @param f
- */
- public Fast5File(NanoOKOptions o, String f) {
- options = o;
- filename = f;
- log = options.getLog();
- indexFile();
- }
-
- /**
- * Index groups and datasets
- */
- public void indexFile() {
- boolean[] typesAvailable = new boolean[3];
- ProcessLogger pl = new ProcessLogger();
- ArrayList<String> response;
-
- log.println("Indexing file "+filename);
-
- response = pl.getCommandOutput("h5dump -n "+filename, true, true);
- for (int i=0; i<response.size(); i++) {
- String s = response.get(i).trim();
- String[] cols = s.split("(\\s+)");
- if (cols[0].equals("dataset")) {
- datasets.add(cols[1]);
- } else if (cols[0].equals("group")) {
- groups.add(cols[1]);
- if (cols[1].startsWith("/Analyses/Basecall_2D_")) {
- Pattern outPattern = Pattern.compile("^/Analyses/Basecall_2D_(\\d+)$");
- Matcher outMatcher = outPattern.matcher(cols[1]);
- if (outMatcher.find()) {
- int index = Integer.parseInt(outMatcher.group(1));
- if (index > highestBasecall2D) {
- highestBasecall2D = index;
- }
- }
- } else if (cols[1].startsWith("/Analyses/Basecall_1D_")) {
- Pattern outPattern = Pattern.compile("^/Analyses/Basecall_1D_(\\d+)$");
- Matcher outMatcher = outPattern.matcher(cols[1]);
- if (outMatcher.find()) {
- int index = Integer.parseInt(outMatcher.group(1));
- if (index > highestBasecall1D) {
- highestBasecall1D = index;
- }
- }
- }
- }
- }
-
- // Old format files did not have separate Basecall_1D section
- if ((highestBasecall1D == -1) && (highestBasecall2D == -1)) {
- isCorrupt = true;
- log.println("Error: couldn't find Basecall_1D or Basecall_2D in "+filename);
- } else if ((highestBasecall1D == -1) && (highestBasecall2D >= 0)) {
- oldFormat = true;
- highestBasecall1D = highestBasecall2D;
- } else {
- if ((highestBasecall1D >=0) && (highestBasecall2D >=0)) {
- if (highestBasecall1D != highestBasecall2D) {
- //isCorrupt = true;
- log.println("Warning: Basecall_1D and Basecall_2D highest indicies not the same in "+filename);
- }
- }
- }
-
- log.println(" Highest1D: "+highestBasecall1D+" Highest2D: "+highestBasecall2D);
- }
-
- public double getMeanQAttribute(String attribute) {
- ProcessLogger pl = new ProcessLogger();
- ArrayList<String> response = pl.getCommandOutput("h5dump -a "+attribute+" "+filename, true, true);
- double meanq = 0;
-
- // Look for value beginning (0):
- int l;
- for (l=0; l<response.size(); l++) {
- String line = response.get(l);
- if (line.contains("(0):")) {
- meanq = Double.parseDouble(line.substring(line.indexOf("(0):") + 5));
- }
- }
-
- return meanq;
- }
-
-
- /**
- * Get the FASTQ data out of the dataset
- *
- * @param inputFilename
- * @param dataset
- * @return
- */
- public FastAQFile getFastqFromDataset(String dataset) {
- ProcessLogger pl = new ProcessLogger();
- ArrayList<String> response = pl.getCommandOutput("h5dump -d "+dataset+" "+filename, true, true);
- FastAQFile ff = null;
-
- // Look for start of FASTQ section
- int l;
- for (l=0; l<response.size(); l++) {
- if (response.get(l).contains("\"@")) {
- break;
- }
- }
-
- // Parse FASTQ portion with regex
- if (l < response.size()) {
- String id = null;
- String seq = null;
- String qual = null;
-
- // Header row
- Pattern outPattern = Pattern.compile("@(.+)");
- Matcher outMatcher = outPattern.matcher(response.get(l));
- if (outMatcher.find()) {
- id = outMatcher.group(1);
- }
-
- // Sequence
- outPattern = Pattern.compile("(\\s*)(\\S+)");
- outMatcher = outPattern.matcher(response.get(l+1));
- if (outMatcher.find()) {
- seq = outMatcher.group(2);
- }
-
- // Qualities
- outPattern = Pattern.compile("(\\s*)(\\S+)");
- outMatcher = outPattern.matcher(response.get(l+3));
- if (outMatcher.find()) {
- qual = outMatcher.group(2);
- }
-
- // Fix IDs
- if (id != null) {
- outPattern = Pattern.compile("00000000-0000-0000-0000-000000000000(.+)");
- outMatcher = outPattern.matcher(id);
- if (outMatcher.find()) {
- if (options.fixIDs()) {
- id = id.replaceAll("^00000000-0000-0000-0000-000000000000_", "");
- id = id.replaceAll(" ", "");
- } else {
- System.out.println("Warning: " + id + " is non-unqiue. Recommend re-running with -fixids option.");
- System.out.println("");
- }
- }
- }
-
- if ((id != null) && (seq != null) && (qual != null)) {
- ff = new FastAQFile(id, seq, qual);
- }
- }
-
- return ff;
- }
-
- public double getMeanQ(int index, int type) {
- String meanQAttributePath = null;
- String indexString;
- double meanQ = 0;
-
- log.println(" Trying to get mean Q type "+type+" from "+filename+" with index "+index);
-
- if (!isCorrupt) {
- if (index == -1) {
- if (type == NanoOKOptions.TYPE_2D) {
- index = highestBasecall2D;
- } else {
- index = highestBasecall1D;
- }
- } else {
- int highestIndex = highestBasecall2D;
-
- if (type != NanoOKOptions.TYPE_2D) {
- highestIndex = highestBasecall1D;
- }
-
- if (index > highestIndex) {
- log.println("Error: index higher than highest Basecall available");
- isCorrupt = true;
- }
- }
- }
-
- if (!isCorrupt) {
- // Make string for group
- indexString = String.format("%03d", index);
-
- // Build path to dataset
- if (type == NanoOKOptions.TYPE_2D) {
- meanQAttributePath = "/Analyses/Basecall_2D_"+indexString+"/Summary/basecall_2d/mean_qscore";
- } else {
- // Now look if we are new format (with Basecall_1D_XXX)
- if (oldFormat) {
- // Old format
- if (type == NanoOKOptions.TYPE_TEMPLATE) {
- meanQAttributePath = "/Analyses/Basecall_2D_"+indexString+"/Summary/basecall_1d_template/mean_qscore";
- } else if (type == NanoOKOptions.TYPE_COMPLEMENT) {
- meanQAttributePath = "/Analyses/Basecall_2D_"+indexString+"/Summary/basecall_1d_complement/mean_qscore";
- } else {
- System.out.println("Error: bad type in getFastq");
- System.exit(1);
- }
- } else {
- // New format
- if (type == NanoOKOptions.TYPE_TEMPLATE) {
- meanQAttributePath = "/Analyses/Basecall_1D_"+indexString+"/Summary/basecall_1d_template/mean_qscore";
- } else if (type == NanoOKOptions.TYPE_COMPLEMENT) {
- meanQAttributePath = "/Analyses/Basecall_1D_"+indexString+"/Summary/basecall_1d_complement/mean_qscore";
- } else {
- System.out.println("Error: bad type in getFastq");
- System.exit(1);
- }
- }
- }
- }
-
- if (meanQAttributePath != null) {
- log.println(" Path: "+meanQAttributePath);
- meanQ = getMeanQAttribute(meanQAttributePath);
- log.println(" MeanQ: "+meanQ);
- }
-
- return meanQ;
- }
-
- /**
- * Get a FastQ/A file for given (Basecall_) index and type (2D/Template/Complement)
- * @param index
- * @param type
- * @return
- */
- public FastAQFile getFastq(int index, int type) {
- String fastqDatasetPath = null;
- String indexString;
- FastAQFile ff = null;
-
- log.println(" Trying to get FASTQ type "+type+" from "+filename+" with index "+index);
-
- if (!isCorrupt) {
- if (index == -1) {
- if (type == NanoOKOptions.TYPE_2D) {
- index = highestBasecall2D;
- } else {
- index = highestBasecall1D;
- }
- } else {
- int highestIndex = highestBasecall2D;
-
- if (type != NanoOKOptions.TYPE_2D) {
- highestIndex = highestBasecall1D;
- }
-
- if (index > highestIndex) {
- log.println("Error: index higher than highest Basecall available");
- isCorrupt = true;
- }
- }
- }
-
- if (!isCorrupt) {
- // Make string for group
- indexString = String.format("%03d", index);
-
- // Build path to dataset
- if (type == NanoOKOptions.TYPE_2D) {
- fastqDatasetPath = "/Analyses/Basecall_2D_"+indexString+"/BaseCalled_2D/Fastq";
- } else {
- // Now look if we are new format (with Basecall_1D_XXX)
- if (oldFormat) {
- // Old format
- if (type == NanoOKOptions.TYPE_TEMPLATE) {
- fastqDatasetPath = "/Analyses/Basecall_2D_"+indexString+"/BaseCalled_template/Fastq";
- } else if (type == NanoOKOptions.TYPE_COMPLEMENT) {
- fastqDatasetPath = "/Analyses/Basecall_2D_"+indexString+"/BaseCalled_complement/Fastq";
- } else {
- System.out.println("Error: bad type in getFastq");
- System.exit(1);
- }
- } else {
- // New format
- if (type == NanoOKOptions.TYPE_TEMPLATE) {
- fastqDatasetPath = "/Analyses/Basecall_1D_"+indexString+"/BaseCalled_template/Fastq";
- } else if (type == NanoOKOptions.TYPE_COMPLEMENT) {
- fastqDatasetPath = "/Analyses/Basecall_1D_"+indexString+"/BaseCalled_complement/Fastq";
- } else {
- System.out.println("Error: bad type in getFastq");
- System.exit(1);
- }
- }
- }
- }
-
- if (fastqDatasetPath != null) {
- log.println(" Path: "+fastqDatasetPath);
- if (datasets.contains(fastqDatasetPath)) {
- log.println(" Found data: "+fastqDatasetPath);
- ff = getFastqFromDataset(fastqDatasetPath);
- } else {
- log.println(" Not there: "+fastqDatasetPath);
- }
- }
-
- return ff;
- }
-
- /**
- * Print list of groups
- */
- public void printGroups() {
- for (String s : groups) {
- System.out.println(s);
- }
- }
-
-// JNI Library version
-// /**
-// * Get FASTQ section out of FAST5 file
-// * @param pathname path to FAST5 file
-// * @param type type of read
-// * @return multi-line String
-// */
-// public String getFastq(String pathname, int type) {
-// H5File file = null;
-// String[] fastq = null;
-//
-// // Open a file using default properties.
-// try {
-// file = new H5File(pathname, FileFormat.READ);
-//
-// // Find basecall group
-// H5Group grp;
-// String groupPath = new String();
-// String datasetPath = null;
-// String indexString;
-// int index = -1;
-// int i = 0;
-//
-// // Default behaviour is to find latest
-// if (options.getBasecallIndex() == -1) {
-// do {
-// indexString = String.format("%03d", i);
-// grp = (H5Group)file.get("/Analyses/Basecall_2D_" + indexString);
-// if (grp != null) {
-// index=i;
-// i++;
-// }
-// } while (grp != null);
-// } else {
-// // User has specified index - check it exists
-// indexString = String.format("%03d", options.getBasecallIndex());
-// grp = (H5Group)file.get("/Analyses/Basecall_2D_" + indexString);
-// if (grp != null) {
-// index=i;
-// }
-// }
-//
-// // index will = -1 if we didn't find any group
-// if (index >=0) {
-// // Make string for group
-// indexString = String.format("%03d", index);
-//
-// // Build path to dataset
-// if (type == NanoOKOptions.TYPE_2D) {
-// datasetPath = "/Analyses/Basecall_2D_"+indexString+"/BaseCalled_2D/Fastq";
-// } else {
-// // Now look if we are new format (with Basecall_1D_XXX)
-// grp = (H5Group)file.get("/Analyses/Basecall_1D_"+indexString);
-// if (grp == null) {
-// // Old format
-// if (type == NanoOKOptions.TYPE_TEMPLATE) {
-// datasetPath = "/Analyses/Basecall_2D_"+indexString+"/BaseCalled_template/Fastq";
-// } else if (type == NanoOKOptions.TYPE_COMPLEMENT) {
-// datasetPath = "/Analyses/Basecall_2D_"+indexString+"/BaseCalled_complement/Fastq";
-// } else {
-// System.out.println("Error: bad type in getFastq");
-// System.exit(1);
-// }
-// } else {
-// // New format
-// if (type == NanoOKOptions.TYPE_TEMPLATE) {
-// datasetPath = "/Analyses/Basecall_1D_"+indexString+"/BaseCalled_template/Fastq";
-// } else if (type == NanoOKOptions.TYPE_COMPLEMENT) {
-// datasetPath = "/Analyses/Basecall_1D_"+indexString+"/BaseCalled_complement/Fastq";
-// } else {
-// System.out.println("Error: bad type in getFastq");
-// System.exit(1);
-// }
-// }
-// }
-//
-// //System.out.println("Path: "+datasetPath);
-// Dataset ds = (Dataset)file.get(datasetPath);
-// if (ds == null) {
-// System.out.println("No dataset at "+datasetPath);
-// } else {
-// fastq = (String[])ds.getData();
-// }
-// }
-//
-// file.close();
-// } catch (Exception e) {
-// e.printStackTrace();
-// }
-//
-// if (fastq == null) {
-// return null;
-// } else {
-// return fastq[0];
-// }
-// }
-// /**
-// * Dump an individual read
-// * @param inputFilename filename of FAST5 file
-// * @param type type of read
-// */
-// private void dumpRead(String inputFilename, int type, String outputDir) {
-// String outName = new File(inputFilename).getName();
-//
-// String fastqDatafield = null; //getFastq(inputFilename, type);
-// if (fastqDatafield != null) {
-// String [] lines = fastqDatafield.split("\n");
-//
-// String id = null;
-// String seq = lines[1];
-// String qual = lines[3];
-//
-// if (lines[0].startsWith("@")) {
-// id = lines[0].substring(1);
-//
-// // Fix IDs
-// Pattern outPattern = Pattern.compile("00000000-0000-0000-0000-000000000000(.+)");
-// Matcher outMatcher = outPattern.matcher(id);
-// if (outMatcher.find()) {
-// if (options.fixIDs()) {
-// id = id.replaceAll("^00000000-0000-0000-0000-000000000000_", "");
-// id = id.replaceAll(" ", "");
-// } else {
-// System.out.println("Warning: " + id + " is non-unqiue. Recommend re-running with -fixids option.");
-// System.out.println("");
-// }
-// }
-// } else {
-// System.out.println("Couldn't parse "+inputFilename);
-// }
-//
-// if (id != null) {
-// if (options.getReadFormat() == NanoOKOptions.FASTA) {
-// writeFastaFile(id, seq, outputDir + File.separator + NanoOKOptions.getTypeFromInt(type) + File.separator + outName + "_BaseCalled_" + NanoOKOptions.getTypeFromInt(type) + ".fasta");
-// } else if (options.getReadFormat() == NanoOKOptions.FASTQ) {
-// writeFastqFile(id, seq, qual, outputDir + File.separator + NanoOKOptions.getTypeFromInt(type) + File.separator + outName + "_BaseCalled_" + NanoOKOptions.getTypeFromInt(type) + ".fastq");
-// }
-// }
-// } else {
-// System.out.println("Error: couldn't find payload in " + inputFilename);
-// }
-// }
-}
diff --git a/src/nanook/FastAQBlastMerger.java b/src/nanook/FastAQBlastMerger.java
deleted file mode 100644
index eb55c86..0000000
--- a/src/nanook/FastAQBlastMerger.java
+++ /dev/null
@@ -1,138 +0,0 @@
-package nanook;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-
-public class FastAQBlastMerger implements Runnable {
- private NanoOKOptions options;
- private ArrayList<String> listOfFiles;
- private String mergedFilePrefix;
- private int fileCounter;
-
- public FastAQBlastMerger(NanoOKOptions o, String m, ArrayList a, int fc) {
- options = o;
- mergedFilePrefix = m;
- listOfFiles = a;
- fileCounter = fc;
- }
-
- private void runBlastBacteria() {
- File iff = new File(mergedFilePrefix);
- String inputFasta = mergedFilePrefix + "_" + fileCounter + ".fasta";
- String outputBlast = options.getSampleDirectory() + File.separator + "blastn_bacteria" + File.separator + iff.getName() + "_" + fileCounter + "_blast_bacteria.txt";
- String commandFile = options.getSampleDirectory() + File.separator + "blastn_bacteria" + File.separator + iff.getName() + "_" + fileCounter + "_blast_bacteria.sh";
- String logFile = options.getSampleDirectory() + File.separator + "blastn_bacteria" + File.separator + iff.getName() + "_" + fileCounter + "_blast_bacteria.log";
- String formatString = "'6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore stitle'";
-
- try {
- System.out.println("Writing blast command file "+commandFile);
- PrintWriter pw = new PrintWriter(new FileWriter(commandFile));
- pw.write("blastn -db "+options.getBacteriaPath()+" -query " + inputFasta + " -evalue 0.001 -show_gis -out " + outputBlast + " -outfmt "+formatString);
- pw.close();
-
- options.getLog().println("Submitting blast command file to SLURM "+commandFile);
- ProcessLogger pl = new ProcessLogger();
- String[] commands = {"slurmit",
- "-o", logFile,
- "-p", "Nanopore",
- "-m", "8G",
- "sh "+commandFile};
- pl.runCommandToLog(commands, options.getLog());
- } catch (IOException e) {
- System.out.println("runBlast exception");
- e.printStackTrace();
- }
- }
-
- private void runBlastnt() {
- File iff = new File(mergedFilePrefix);
- String inputFasta = mergedFilePrefix + "_" + fileCounter + ".fasta";
- String outputBlast = options.getSampleDirectory() + File.separator + "blastn_nt" + File.separator + iff.getName() + "_" + fileCounter + "_blast_nt.txt";
- String commandFile = options.getSampleDirectory() + File.separator + "blastn_nt" + File.separator + iff.getName() + "_" + fileCounter + "_blast_nt.sh";
- String logFile = options.getSampleDirectory() + File.separator + "blastn_nt" + File.separator + iff.getName() + "_" + fileCounter + "_blast_nt.log";
- String formatString = "'6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore stitle'";
-
- try {
- System.out.println("Writing blast command file "+commandFile);
- PrintWriter pw = new PrintWriter(new FileWriter(commandFile));
- pw.write("blastn -db "+options.getntPath()+" -query " + inputFasta + " -evalue 0.001 -show_gis -out " + outputBlast + " -outfmt "+formatString);
- pw.close();
-
- options.getLog().println("Submitting blast command file to SLURM "+commandFile);
- ProcessLogger pl = new ProcessLogger();
- String[] commands = {"slurmit",
- "-o", logFile,
- "-p", "tgac-medium",
- "-m", "16G",
- "sh "+commandFile};
- pl.runCommandToLog(commands, options.getLog());
- } catch (IOException e) {
- System.out.println("runBlast exception");
- e.printStackTrace();
- }
- }
-
- private void runBlastCard() {
- File iff = new File(mergedFilePrefix);
- String inputFasta = mergedFilePrefix + "_" + fileCounter + ".fasta";
- String outputBlast = options.getSampleDirectory() + File.separator + "blastn_card" + File.separator + iff.getName() + "_" + fileCounter + "_blast_card.txt";
- String commandFile = options.getSampleDirectory() + File.separator + "blastn_card" + File.separator + iff.getName() + "_" + fileCounter + "_blast_card.sh";
- String logFile = options.getSampleDirectory() + File.separator + "blastn_card" + File.separator + iff.getName() + "_" + fileCounter + "_blast_card.log";
- String formatString = "'6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore stitle'";
-
- try {
- System.out.println("Writing blast command file "+commandFile);
- PrintWriter pw = new PrintWriter(new FileWriter(commandFile));
- pw.write("blastn -db "+options.getCardPath()+" -query " + inputFasta + " -evalue 0.001 -show_gis -out " + outputBlast + " -outfmt "+formatString);
- pw.close();
-
- options.getLog().println("Submitting blast command file to SLURM "+commandFile);
- ProcessLogger pl = new ProcessLogger();
- String[] commands = {"slurmit",
- "-o", logFile,
- "-p", "Nanopore",
- "-m", "4G",
- "sh "+commandFile};
- pl.runCommandToLog(commands, options.getLog());
- } catch (IOException e) {
- System.out.println("runBlast exception");
- e.printStackTrace();
- }
- }
-
- private void mergeFiles() {
- String mergedFile = mergedFilePrefix + "_" + fileCounter + ".fasta";
-
- options.getLog().println("Writing merged file "+mergedFile);
-
- try {
- PrintWriter pw = new PrintWriter(new FileWriter(mergedFile));
-
- for (int i=0; i<listOfFiles.size(); i++) {
- BufferedReader br = new BufferedReader(new FileReader(listOfFiles.get(i)));
- String line;
- while ((line = br.readLine()) != null) {
- pw.println(line);
- }
- br.close();
- }
- pw.close();
- } catch (IOException e) {
- System.out.println("mergeFiles exception");
- e.printStackTrace();
- }
-
- }
-
- public void run() {
- mergeFiles();
- runBlastBacteria();
- runBlastCard();
- runBlastnt();
- }
-}
diff --git a/src/nanook/FastAQFile.java b/src/nanook/FastAQFile.java
deleted file mode 100644
index 039e7fc..0000000
--- a/src/nanook/FastAQFile.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * To change this license header, choose License Headers in Project Properties.
- * To change this template file, choose Tools | Templates
- * and open the template in the editor.
- */
-package nanook;
-
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-
-/**
- * Represent FASTA/FASTQ file
- * @author leggettr
- */
-public class FastAQFile {
- private String id;
- private String sequence;
- private String qualities;
-
- /**
- * Constructor
- * @param i id
- * @param s sequence string
- * @param q qualities string
- */
- public FastAQFile(String i, String s, String q) {
- id = i;
- sequence = s;
- qualities = q;
- }
-
- public void writeFastqToHandle(PrintWriter pw) {
- pw.print("@");
- pw.println(id);
- pw.println(sequence);
- pw.println("+");
- pw.println(qualities);
- }
-
- /**
- * Write as FASTQ file
- * @param filename output filename
- */
- public synchronized void writeFastq(String filename) {
- PrintWriter pw;
-
- try {
- pw = new PrintWriter(new FileWriter(filename));
- pw.print("@");
- pw.println(id);
- pw.println(sequence);
- pw.println("+");
- pw.println(qualities);
- pw.close();
- } catch (IOException e) {
- System.out.println("writeFastaFile exception");
- e.printStackTrace();
- }
- }
-
- public void writeFastaToHandle(PrintWriter pw, String fast5Path) {
- pw.print(">");
- pw.print(id);
- if (fast5Path != null) {
- pw.print(" "+fast5Path);
- }
- pw.println("");
- pw.println(sequence);
- }
-
- /**
- * Write as FASTA file
- *
- * @param filename output filename
- */
- public void writeFasta(String filename, String fast5Path) {
- PrintWriter pw;
-
- try {
- pw = new PrintWriter(new FileWriter(filename));
- pw.print(">");
- pw.print(id);
- if (fast5Path != null) {
- pw.print(" "+fast5Path);
- }
- pw.println("");
- pw.println(sequence);
- pw.close();
- } catch (IOException e) {
- System.out.println("writeFastaFile exception");
- e.printStackTrace();
- }
- }
-
- public int getLength() {
- return sequence.length();
- }
-
- public String getID() {
- return id;
- }
-}
diff --git a/src/nanook/FileWatcher.java b/src/nanook/FileWatcher.java
deleted file mode 100644
index 0c28e98..0000000
--- a/src/nanook/FileWatcher.java
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett (richard.leggett at earlham.ac.uk)
- *
- * Copyright 2015-17 Earlham Institute
- */
-
-package nanook;
-
-import java.io.File;
-import java.util.*;
-import java.util.Hashtable;
-import java.util.LinkedList;
-import java.util.concurrent.ThreadPoolExecutor;
-
-public class FileWatcher {
- private NanoOKOptions options;
- private int filesToProcess = 0;
- private int filesProcessed = 0;
- private int lastCompleted = -1;
- private long lastFileTime = System.nanoTime();
- private long secsSinceLast = 0;
- private ArrayList<FileWatcherItem> batchContainersToWatch = new ArrayList();
- private ArrayList<FileWatcherItem> fileDirsToWatch = new ArrayList();
- private Hashtable<String, Integer> batchDirs = new Hashtable();
- private Hashtable<String, Integer> allFiles = new Hashtable();
- private LinkedList<FileWatcherItem> pendingFiles = new LinkedList<FileWatcherItem>();
-
- public FileWatcher(NanoOKOptions o) {
- options = o;
- }
-
- //public FileWatcher(NanoOKOptions o, String d) {
- // options = o;
- // fileDirsToWatch.add(new FileWatcherDir(d, pf));
- //}
-
- public void addBatchContainer(String d, int pf) {
- options.getLog().println("Added batch dir: "+d);
- batchContainersToWatch.add(new FileWatcherItem(d, pf));
- }
-
- public void addWatchDir(String d, int pf) {
- options.getLog().println("Added watch dir: "+d);
- fileDirsToWatch.add(new FileWatcherItem(d, pf));
- }
-
- public synchronized void addPendingFile(String s, int pf) {
- pendingFiles.add(new FileWatcherItem(s, pf));
- filesToProcess++;
- }
-
- public synchronized FileWatcherItem getPendingFile() {
- if (pendingFiles.size() > 0) {
- filesProcessed++;
- return pendingFiles.removeFirst();
- } else {
- return null;
- }
- }
-
- public void writeProgress() {
- long e = 0;
- long s = NanoOKOptions.PROGRESS_WIDTH;
-
- if (filesToProcess > 0) {
- e = NanoOKOptions.PROGRESS_WIDTH * filesProcessed / filesToProcess;
- s = NanoOKOptions.PROGRESS_WIDTH - e;
- }
-
- System.out.print("\rProcessing [");
- for (int i=0; i<e; i++) {
- System.out.print("=");
- }
- for (int i=0; i<s; i++) {
- System.out.print(" ");
- }
- System.out.print("] " + filesProcessed +"/" + filesToProcess);
- lastCompleted = filesProcessed;
- }
-
- private void checkForNewBatchDirs() {
- int count = 0;
- for (int i=0; i<batchContainersToWatch.size(); i++) {
- FileWatcherItem dir = batchContainersToWatch.get(i);
- int pf = dir.getPassOrFail();
- String dirName = dir.getPathname();
- File d = new File(dirName);
- File[] listOfFiles = d.listFiles();
-
- options.getLog().println("Scanning for new batch dirs "+dirName);
-
- if (listOfFiles == null) {
- options.getLog().println("Directory "+dirName+" doesn't exist");
- } else if (listOfFiles.length <= 0) {
- options.getLog().println("Directory "+dirName+" empty");
- } else {
- for (File file : listOfFiles) {
- if (file.isDirectory()) {
- if (!file.getName().startsWith(("."))) {
- if (!batchDirs.containsKey(file.getPath())) {
- count++;
- options.getLog().println("Got batch dir "+file.getPath());
- batchDirs.put(file.getPath(), 1);
- fileDirsToWatch.add(new FileWatcherItem(file.getPath(), pf));
- }
- }
- }
- }
- }
- }
- }
-
- public void scan() {
- int count = 0;
-
- if (options.usingBatchDirs()) {
- checkForNewBatchDirs();
- }
-
- for (int i=0; i<fileDirsToWatch.size(); i++) {
- FileWatcherItem dir = fileDirsToWatch.get(i);
- String dirName = dir.getPathname();
- File d = new File(dirName);
- File[] listOfFiles = d.listFiles();
-
- options.getLog().println("Scanning "+dirName);
-
- if (listOfFiles == null) {
- options.getLog().println("Directory "+dirName+" doesn't exist");
- } else if (listOfFiles.length <= 0) {
- options.getLog().println("Directory "+dirName+" empty");
- } else {
- for (File file : listOfFiles) {
- if (file.isFile()) {
- if (!file.getName().startsWith(("."))) {
- if (!allFiles.containsKey(file.getPath())) {
- count++;
- options.getLog().println("Got file "+file.getPath());
- allFiles.put(file.getPath(), 1);
- this.addPendingFile(file.getPath(), dir.getPassOrFail());
- }
- }
- }
- }
- }
- }
-
- options.getLog().println("Found "+count + " new files.");
-
- if (count == 0) {
- long timeSince = System.nanoTime() - lastFileTime;
- secsSinceLast = timeSince / 1000000000;
- options.getLog().println("Not seen file for " + (secsSinceLast) + "s");
- } else {
- lastFileTime = System.nanoTime();
- }
- }
-
- public long getSecsSinceLastFile() {
- return secsSinceLast;
- }
-
- public int getPendingFiles() {
- return pendingFiles.size();
- }
-
- public boolean timedOut() {
- if (pendingFiles.size() == 0) {
- if (secsSinceLast >= options.getFileWatcherTimeout()) {
- return true;
- }
- }
-
- return false;
- }
-}
diff --git a/src/nanook/FileWatcherItem.java b/src/nanook/FileWatcherItem.java
deleted file mode 100644
index 844bbc4..0000000
--- a/src/nanook/FileWatcherItem.java
+++ /dev/null
@@ -1,31 +0,0 @@
-package nanook;
-
-public class FileWatcherItem {
- private String pathname;
- private int passOrFail;
-
- public FileWatcherItem(String p, int pf) {
- pathname = p;
- passOrFail = pf;
- }
-
- public String getPathname() {
- return pathname;
- }
-
- public int getPassOrFail() {
- return passOrFail;
- }
-
- public boolean isPass() {
- return passOrFail == NanoOKOptions.READTYPE_PASS ? true: false;
- }
-
- public boolean isFail() {
- return passOrFail == NanoOKOptions.READTYPE_FAIL ? true: false;
- }
-
- public boolean isCombined() {
- return passOrFail == NanoOKOptions.READTYPE_COMBINED ? true: false;
- }
-}
diff --git a/src/nanook/GCCounter.java b/src/nanook/GCCounter.java
deleted file mode 100644
index ac38869..0000000
--- a/src/nanook/GCCounter.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-/**
- * Count GC content in references
- *
- * @author Richard Leggett
- */
-public class GCCounter {
- PrintWriter pw = null;
- int binSize = 0;
- int currentGCPosition = 0;
- int currentGCCounter = 0;
- int currentGC = 0;
- int counts[];
-
- public GCCounter(int bs, String outputFilename) {
- binSize = bs;
- counts = new int[binSize*2];
- currentGCPosition = binSize;
-
- try {
- pw = new PrintWriter(new FileWriter(outputFilename));
- } catch (IOException e) {
- System.out.println("GCCounter exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Store GC
- */
- private void storeGC() {
- int gc = 0;
- double pc;
-
- for (int i=0; i<binSize; i++) {
- gc += counts[i];
- }
-
- pc = (100.0 * (double)gc) / (double)binSize;
- if (pw != null) {
- pw.println(currentGCPosition + "\t" + pc);
- }
- }
-
- /**
- * Close file
- */
- public void closeFile() {
- pw.close();
- }
-
- /**
- * Process sequence string
- * @param line
- */
- public void addString(String line) {
- for (int i=0; i<line.length(); i++) {
- if ((line.charAt(i) == 'G') || (line.charAt(i) == 'C') || (line.charAt(i) == 'g') || (line.charAt(i) == 'c')) {
- counts[currentGCCounter] = 1;
- currentGC++;
- } else {
- counts[currentGCCounter] = 0;
- }
- currentGCCounter++;
-
- if (currentGCCounter == (binSize*2)) {
- storeGC();
-
- currentGCCounter = 0;
- for (int j=binSize; j<(binSize*2); j++) {
- counts[currentGCCounter++] = counts[j];
- }
- currentGCPosition += binSize;
- currentGC = 0;
- }
- }
- }
-}
diff --git a/src/nanook/GraphMapParser.java b/src/nanook/GraphMapParser.java
deleted file mode 100644
index 46ba9fd..0000000
--- a/src/nanook/GraphMapParser.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.File;
-
-/**
- * Parser for BWA files
- * @author Richard Leggett
- */
-public class GraphMapParser extends SAMParser implements AlignmentFileParser {
- private String alignmentParams = "";
- private NanoOKOptions options;
-
- public GraphMapParser(NanoOKOptions o, References r) {
- super(o, r);
- options = o;
- }
-
- public String getProgramID() {
- return "graphmap";
- }
-
- public int getReadFormat() {
- int or = options.getReadFormat();
- return or;
-
- //return NanoOKOptions.FASTA;
- }
-
- public void setAlignmentParams(String p) {
- alignmentParams = p;
- }
-
- public boolean outputsToStdout() {
- return false;
- }
-
- public String getRunCommand(String query, String output, String reference) {
- //reference = reference.replaceAll("\\.fasta$", "");
- //reference = reference.replaceAll("\\.fa$", "");
- String command = "graphmap align -v 0 -r " + reference + " -d " + query + " -o " + output;
- if (alignmentParams.length() > 0 ) {
- command = command + " " + alignmentParams;
- }
-
- return command;
- }
-
- public void checkForIndex(String referenceFile) {
- /*String[] files = {referenceFile + ".fasta.bwt",
- referenceFile + ".fasta.pac"};
-
- for (int i=0; i<files.length; i++) {
- File f = new File(files[i]);
-
- if (!f.exists()) {
- System.out.println("");
- System.out.println("Error:");
- System.out.println("Can't find file " + f.getPath());
- System.out.println("Have you indexed the reference with bwa index?");
- System.out.println("Will continue but anticipate failure at analyse stage.");
- System.out.println("");
- return;
- }
- }*/
-
- return;
- }
-}
diff --git a/src/nanook/KmerAbundance.java b/src/nanook/KmerAbundance.java
deleted file mode 100644
index c17af9d..0000000
--- a/src/nanook/KmerAbundance.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.Serializable;
-
-/**
- * Represent abundance of a kmer
- *
- * @author Richard Leggett
- */
-public class KmerAbundance implements Comparable, Serializable {
- private static final long serialVersionUID = NanoOK.SERIAL_VERSION;
- private String kmer;
- private double refAbundance;
- private double readAbundance;
- private double difference;
-
- public KmerAbundance(String k, double ref, double read) {
- kmer = k;
- refAbundance = ref;
- readAbundance = read;
- difference = read - ref;
- }
-
- public double getDifference() {
- return difference;
- }
-
- public int compareTo(Object o) {
- double d = ((KmerAbundance)o).getDifference() - difference;
- int r = 0;
-
- if (d < 0) {
- r = -1;
- } else if (d > 0) {
- r = 1;
- }
-
- return r;
- }
-
- public String getKmer() {
- return kmer;
- }
-
- public double getRefAbundance() {
- return refAbundance;
- }
-
- public double getReadAbundance() {
- return readAbundance;
- }
-}
diff --git a/src/nanook/KmerMotifStatistic.java b/src/nanook/KmerMotifStatistic.java
deleted file mode 100644
index 474c4d9..0000000
--- a/src/nanook/KmerMotifStatistic.java
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.Serializable;
-import java.util.*;
-
-/**
- * Class to store kmer motif statistics.
- *
- * @author Richard Leggett
- */
-public class KmerMotifStatistic implements Serializable {
- private static final long serialVersionUID = NanoOK.SERIAL_VERSION;
- public final static int TYPE_TOP = 1;
- public final static int TYPE_BOTTOM = 2;
- private int kSize;
- private Hashtable<String, Integer> motifs = new Hashtable();
- private Hashtable<String, Double> motifsPercent = new Hashtable();
- private int totalCount = 0;
- private int[][] baseCounts;
-
- /**
- * Constructor
- * @param s - kmer size
- */
- public KmerMotifStatistic(int s) {
- kSize = s;
- baseCounts = new int[4][kSize];
- }
-
- /**
- * Add a motif to store.
- * @param kmer motif to store
- */
- public void addMotif(String kmer) {
- Integer currentCount = motifs.get(kmer);
-
- if (currentCount == null) {
- currentCount = new Integer(1);
- } else {
- currentCount++;
- }
-
- motifs.put(kmer, currentCount);
-
- totalCount++;
-
- //System.out.println("Adding motif "+kmer+" to size "+kSize);
- }
-
- /**
- * Parse motif, updating count of bases seen at each position.
- * @param motif - kmer motif
- * @param count - count of number of times seen
- */
- private void updateBaseCounts(String motif, int count) {
- for (int i=0; i<motif.length(); i++) {
- switch(motif.charAt(i)) {
- case 'A': baseCounts[0][i]+=count; break;
- case 'C': baseCounts[1][i]+=count; break;
- case 'G': baseCounts[2][i]+=count; break;
- case 'T': baseCounts[3][i]+=count; break;
- }
- }
- }
-
- /**
- * Calculate percent each motif has been seen.
- */
- public void calculateMotifs() {
- Set<String> keys = motifs.keySet();
-
- for(String motif : keys) {
- int count = motifs.get(motif);
- double percent = (100.0 * (double)count) / (double)totalCount;
- motifsPercent.put(motif, percent);
- //updateBaseCounts(motif, count);
- }
- }
-
- /**
- * Update motif base counts for top 10 motifs
- */
- public void calculateTopBaseCounts() {
- ArrayList<Map.Entry<String, Integer>> list = getSortedMotifCounts();
-
- if (list.size() < 10) {
- System.out.println("Error: motif list smaller than 10");
- return;
- }
-
- for (int i=0; i<10; i++) {
- if (i < list.size()) {
- String motif = list.get(i).getKey();
- updateBaseCounts(motif, list.get(i).getValue());
- }
- }
- }
-
- /**
- * Update motif bases counts for bottom 10 motifs
- */
- public void calculateBottomBaseCounts() {
- ArrayList<Map.Entry<String, Integer>> list = getSortedMotifCounts();
-
- if (list.size() < 10) {
- System.out.println("Error: motif list smaller than 10");
- return;
- }
-
- for (int i=0; i<10; i++) {
- if (i >= 0) {
- String motif = list.get(list.size() - 1 - i).getKey();
- updateBaseCounts(motif, list.get(list.size() - 1 - i).getValue());
- }
- }
- }
-
- /**
- * Write a top 10 or bottom 10 logo image.
- * @param type TYPE_TOP for Top 10 or TYPE_BOTTOM for bottom 10
- * @param filename PNG output filename
- */
- public void writeLogoImage(int type, String filename) {
- baseCounts = new int[4][kSize];
- if (type == TYPE_TOP) {
- calculateTopBaseCounts();
- } else if (type == TYPE_BOTTOM) {
- calculateBottomBaseCounts();
- } else {
- System.out.println("Error: wrong type to writeLogoImgae");
- System.exit(1);
- }
-
- SequenceLogo sl = new SequenceLogo(kSize);
- for (int i=0; i<kSize; i++) {
- sl.addBase(i, baseCounts[0][i], baseCounts[1][i], baseCounts[2][i], baseCounts[3][i]);
- }
- sl.drawImage();
- sl.saveImage(filename);
- }
-
- /**
- * Return ArrayList of sorted motif counts.
- * @return sorted motifs
- */
- public ArrayList<Map.Entry<String, Integer>> getSortedMotifCounts() {
- ArrayList<Map.Entry<String, Integer>>list = new ArrayList(motifs.entrySet());
-
- Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
- public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2){
- return o2.getValue().compareTo(o1.getValue());
- }});
-
- return list;
- }
-
- /**
- * Return ArrayList of sorted motif percentages.
- * @return sorted motifs
- */
- public ArrayList<Map.Entry<String, Double>> getSortedMotifPercentages() {
- if (motifsPercent.size() == 0) {
- calculateMotifs();
- }
-
- ArrayList<Map.Entry<String, Double>>list = new ArrayList(motifsPercent.entrySet());
-
- Collections.sort(list, new Comparator<Map.Entry<String, Double>>() {
- public int compare(Map.Entry<String, Double> o1, Map.Entry<String, Double> o2){
- return o2.getValue().compareTo(o1.getValue());
- }});
-
- //for (int i=1; i<=10; i++) {
- // if (list.size() >= i) {
- // System.out.println(i + ". " + list.get(i-1).getKey() + "\t" + list.get(i-1).getValue());
- // }
- //}
-
- return list;
- }
-
- /**
- * Write motif counts to stdout.
- */
- public void outputMotifCounts() {
- ArrayList<Map.Entry<String, Integer>>list = getSortedMotifCounts();
-
- for (int i=1; i<=10; i++) {
- if (list.size() >= i) {
- System.out.println(i + ". " + list.get(i-1).getKey() + "\t" + list.get(i-1).getValue());
- }
- }
-
- System.out.println(list);
- }
-
- /**
- * Get total motif count.
- * @return total motif count
- */
- public int getTotalMotifCount() {
- return totalCount;
- }
-}
diff --git a/src/nanook/KmerTable.java b/src/nanook/KmerTable.java
deleted file mode 100644
index afffa3b..0000000
--- a/src/nanook/KmerTable.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.Serializable;
-import java.util.Hashtable;
-import java.util.Set;
-
-/**
- * KmerTable used for 5-mer comparison
- *
- * @author Richard Leggett
- */
-public class KmerTable implements Serializable {
- private static final long serialVersionUID = NanoOK.SERIAL_VERSION;
- private int kmerSize = 5;
- private Hashtable<String, Integer> counts = new Hashtable();
-
- public KmerTable(int k) {
- kmerSize = k;
- }
-
- public synchronized void countKmer(String kmer) {
- int count = 0;
-
- if (counts.containsKey(kmer)) {
- count = counts.get(kmer);
- }
-
- count++;
-
- counts.put(kmer, count);
- }
-
- public void writeKmerTable() {
- Set<String> keys = counts.keySet();
-
- System.out.println("");
- System.out.println("Writing kmer table...");
-
- for(String kmer : keys) {
- int count = counts.get(kmer);
- System.out.println(kmer + "\t" + count);
- }
-
- System.out.println("");
- }
-
- public int getKmerSize() {
- return kmerSize;
- }
-
- public Set<String> getKeys() {
- return counts.keySet();
- }
-
- public int get(String kmer) {
- int value = 0;
-
- if (counts.containsKey(kmer)) {
- value = counts.get(kmer);
- }
-
- return value;
- }
-
- public Hashtable getTable() {
- return counts;
- }
-}
diff --git a/src/nanook/LastParser.java b/src/nanook/LastParser.java
deleted file mode 100644
index 1581e5b..0000000
--- a/src/nanook/LastParser.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.File;
-
-/**
- * Parser for LAST alignments
- *
- * @author Richard Leggett
- */
-public class LastParser extends MAFParser implements AlignmentFileParser {
- private String alignmentParams = "-s 2 -T 0 -Q 0 -a 1";
-
- public LastParser(NanoOKOptions o, References r) {
- super(o, r);
- }
-
- public String getProgramID() {
- return "last";
- }
-
- public int getReadFormat() {
- return NanoOKOptions.FASTA;
- }
-
- public void setAlignmentParams(String p) {
- alignmentParams = p;
- }
-
- public String getRunCommand(String query, String output, String reference) {
- reference = reference.replaceAll("\\.fasta$", "");
- reference = reference.replaceAll("\\.fa$", "");
-
- return "lastal " + alignmentParams + " " + reference + " " + query;
- //return "lastal -o "+ output + " " + alignmentParams + " " + reference + " " + query;
- }
-
- public boolean outputsToStdout() {
- return true;
- }
-
- public void checkForIndex(String referenceFile) {
- String[] files = {referenceFile + ".bck",
- referenceFile + ".suf"};
-
- for (int i=0; i<files.length; i++) {
- File f = new File(files[i]);
-
- if (!f.exists()) {
- System.out.println("");
- System.out.println("Error:");
- System.out.println("Can't find file " + f.getPath());
- System.out.println("1. Have you indexed the reference with lastdb?");
- System.out.println("2. Have you made sure that the output prefix is the same name as the reference file, apart from the .fasta or .fa extension?");
- System.out.println(" e.g. lastdb -Q 0 referencename referencename.fasta");
- System.out.println("Will continue but anticipate failure at analyse stage.");
- System.out.println("");
- return;
- }
- }
-
- return;
- }
-}
diff --git a/src/nanook/MAFAlignmentLine.java b/src/nanook/MAFAlignmentLine.java
deleted file mode 100644
index ab3f84e..0000000
--- a/src/nanook/MAFAlignmentLine.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-/**
- * Class representing an alignment line in a LAST file.
- *
- * @author Richard Leggett
- */
-public class MAFAlignmentLine {
- private String name;
- private int start;
- private int alnSize;
- private String strand;
- private int seqSize;
- private String alignment;
-
- /**
- * Constructor.
- * @param s - alignment line string
- */
- public MAFAlignmentLine(String s) {
- String[] parts = s.split("\\s+");
-
- if (parts.length == 7) {
- name = parts[1];
- start = Integer.parseInt(parts[2]);
- alnSize = Integer.parseInt(parts[3]);
- strand = parts[4];
- seqSize = Integer.parseInt(parts[5]);
- alignment = parts[6];
- } else {
- System.out.println("Error: can't understand alignment file format.");
- System.exit(1);
- }
- }
-
- /**
- * Get name (ID) of sequence.
- * @return name, as String
- */
- public String getName() {
- return name;
- }
-
- /**
- * Get start position of alignment.
- * @return start position
- */
- public int getStart() {
- return start;
- }
-
- /**
- * Get end position of alignment
- * @return end position
- */
- public int getEnd() {
- return start + alnSize - 1;
- }
-
- /**
- * Get alignment size.
- * @return alignment size, in bases
- */
- public int getAlnSize() {
- return alnSize;
- }
-
- /**
- * Get strand.
- * @return strand, "+" or "-"
- */
- public String getStrand() {
- return strand;
- }
-
- /**
- * Get sequence size.
- * @return sequence size, in bases.
- */
- public int getSeqSize() {
- return seqSize;
- }
-
- /**
- * Get alignment string.
- * @return alignment string
- */
- public String getAlignment() {
- return alignment;
- }
-}
diff --git a/src/nanook/MAFParser.java b/src/nanook/MAFParser.java
deleted file mode 100644
index 1379cf1..0000000
--- a/src/nanook/MAFParser.java
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.*;
-import java.util.ArrayList;
-import java.util.Collections;
-
-/**
- * Parser for LAST aligner files.
- *
- * @author Richard Leggett
- */
-public abstract class MAFParser {
- private NanoOKOptions options;
- private References references;
- private SampleReportWriter report;
- ArrayList<Alignment> alignments;
- String leafName;
-
- /**
- * Constructor.
- * @param o NanoOKOptions object
- * @param s ReadSetStats object to store stats in
- * @param r References object
- */
- public MAFParser(NanoOKOptions o, References r) {
- options = o;
- references = r;
- }
-
- /**
- * Get file extension of alignment files
- * @return
- */
- public String getAlignmentFileExtension() {
- return ".maf";
- }
-
- /**
- * Parse a LAST file.
- * @param filename filename to parse
- * @param nonAlignedSummaryFile an AlignmentTableFile to output details of anything that doesn't align to
- * @return number of alignments parsed
- */
- public int parseFile(String filename, AlignmentsTableFile nonAlignedSummaryFile, ReadSetStats overallStats) {
- alignments = new ArrayList();
- leafName = new File(filename).getName();
-
- // Read all alignmnets and put into an ArrayList
- try
- {
- BufferedReader br = new BufferedReader(new FileReader(filename));
- String line;
-
- do {
- line = br.readLine();
- if (line != null) {
- if (line.startsWith("a score=")) {
- String[] fields = line.substring(8).split(" ");
- int score = Integer.parseInt(fields[0]);
- MAFAlignmentLine hitLine = new MAFAlignmentLine(br.readLine());
- MAFAlignmentLine queryLine = new MAFAlignmentLine(br.readLine());
- Alignment al = new Alignment(score,
- queryLine.getName(),
- queryLine.getSeqSize(),
- queryLine.getStart(),
- queryLine.getAlnSize(),
- queryLine.getAlignment(),
- hitLine.getName(),
- hitLine.getSeqSize(),
- hitLine.getStart(),
- hitLine.getAlnSize(),
- hitLine.getAlignment(),
- false);
- alignments.add(al);
- }
- }
- } while (line != null);
- br.close();
-
- if (alignments.size() == 0) {
- nonAlignedSummaryFile.writeNoAlignmentMessage(leafName);
- overallStats.addReadWithoutAlignment();
- }
-
- } catch (Exception e) {
- System.out.println("parseFile Exception:");
- e.printStackTrace();
- options.getLog().println("Exception parsing "+filename);
- options.getLog().close();
- System.exit(1);
- }
-
- return alignments.size();
- }
-
- /**
- * Sort alignments in order of score
- */
- public void sortAlignments() {
- if (alignments.size() > 0) {
- Collections.sort(alignments);
- }
- }
-
- /**
- * Get the set of alignments that match the highest scoring reference
- */
- public ArrayList getHighestScoringSet() {
- ArrayList hss = new ArrayList();
-
- if (alignments.size() > 0) {
- String readReferenceName = alignments.get(0).getHitName();
- ReferenceSequence readReference = references.getReferenceById(readReferenceName);
- for (int i=0; i<alignments.size(); i++) {
- Alignment a = alignments.get(i);
- if (a.getHitName().equals(readReferenceName)) {
- hss.add(a);
- }
- }
- }
-
- return hss;
- }
- }
diff --git a/src/nanook/MarginAlignParser.java b/src/nanook/MarginAlignParser.java
deleted file mode 100644
index 7f82b99..0000000
--- a/src/nanook/MarginAlignParser.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.File;
-
-/**
- * Parser for marginAlign files
- *
- * @author Richard Leggett
- */
-public class MarginAlignParser extends SAMParser implements AlignmentFileParser {
- private String alignmentParams="";
-
- public MarginAlignParser(NanoOKOptions o, References r) {
- super(o, r);
- }
-
- public String getProgramID() {
- return "marginalign";
- }
-
- public int getReadFormat() {
- return NanoOKOptions.FASTQ;
- }
-
- public void setAlignmentParams(String p) {
- alignmentParams = p;
- }
-
- private void removeJobTree(String dirName) {
- File jt = new File(dirName);
-
- if (jt.exists()) {
- if (jt.isDirectory()) {
- System.out.println("Removing "+dirName);
- String command = "rm -rf "+dirName;
- ProcessLogger pl = new ProcessLogger();
- pl.runCommand(command);
- }
- }
- }
-
- public String getRunCommand(String query, String output, String reference) {
- String jobtree = output + ".jobTree";
- String command = "marginAlign ";
-
- removeJobTree(jobtree);
-
- if (alignmentParams.length() > 0) {
- command = command + " " + alignmentParams + " ";
- }
-
- command = command + query + " " + reference + " " + output + " --jobTree " + jobtree;
-
- return command;
- }
-
- public boolean outputsToStdout() {
- return false;
- }
-
- public void checkForIndex(String referenceFile) {
- return;
- }
-}
diff --git a/src/nanook/MergedFastAQFile.java b/src/nanook/MergedFastAQFile.java
deleted file mode 100644
index 18eb424..0000000
--- a/src/nanook/MergedFastAQFile.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * To change this license header, choose License Headers in Project Properties.
- * To change this template file, choose Tools | Templates
- * and open the template in the editor.
- */
-package nanook;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-
-/**
- * Represent FASTA/FASTQ file
- * @author leggettr
- */
-public class MergedFastAQFile {
- private NanoOKOptions options;
- private String mergedFilename;
- private int nSeqs = 0;
- private int seqsPerFile = 500;
- private int fileCounter = 0;
- private ArrayList mergeList = new ArrayList();
-
- public MergedFastAQFile(NanoOKOptions o, String f) {
- mergedFilename = f;
- options = o;
- seqsPerFile = options.getReadsPerBlast();
- }
-
- public synchronized void addFile(String readFilename, String fast5Path) {
- mergeList.add(readFilename);
- nSeqs++;
- if (nSeqs == seqsPerFile) {
- System.out.println("Adding new thread...");
- options.getThreadExecutor().execute(new FastAQBlastMerger(options, mergedFilename, mergeList, fileCounter));
- mergeList = new ArrayList();
- fileCounter++;
- nSeqs = 0;
- }
- }
-}
diff --git a/src/nanook/MotifStatistics.java b/src/nanook/MotifStatistics.java
deleted file mode 100644
index 99bdac7..0000000
--- a/src/nanook/MotifStatistics.java
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.Hashtable;
-import java.util.Map;
-import static nanook.NanoOKOptions.TYPE_2D;
-import static nanook.NanoOKOptions.TYPE_COMPLEMENT;
-import static nanook.NanoOKOptions.TYPE_TEMPLATE;
-
-/**
- * Store all motif statistics (ie, insertion, deletion, substitution) at a range
- * of sizes (3, 4, 5) for a single read type (Template, Complement or 2D).
- *
- * @author Richard Leggett
- */
-public class MotifStatistics implements Serializable {
- private static final long serialVersionUID = NanoOK.SERIAL_VERSION;
- private KmerMotifStatistic[] insertionMotifs = new KmerMotifStatistic[3];
- private KmerMotifStatistic[] deletionMotifs = new KmerMotifStatistic[3];
- private KmerMotifStatistic[] substitutionMotifs = new KmerMotifStatistic[3];
-
- /**
- * Constructor
- */
- public MotifStatistics() {
- for (int k=0; k<3; k++) {
- insertionMotifs[k] = new KmerMotifStatistic(k+3);
- deletionMotifs[k] = new KmerMotifStatistic(k+3);
- substitutionMotifs[k] = new KmerMotifStatistic(k+3);
- }
- }
-
- /**
- * Given a stretch of perfect sequence, store motifs at all k size.
- * @param motif KmerMotifStatistic object to add to
- * @param kmer perfect sequence to get motifs from
- */
- public void addMotifs(KmerMotifStatistic[] motif, String kmer) {
- if (kmer.length() < 3) {
- return;
- }
-
- for (int k=3; k<=5; k++) {
- if (kmer.length() > k) {
- motif[k-3].addMotif(kmer.substring(kmer.length() - k));
- }
- }
- }
-
- /**
- * Add a insertion motif.
- * @param kmer motif to add
- */
- public void addInsertionMotifs(String kmer) {
- addMotifs(insertionMotifs, kmer);
- }
-
- /**
- * Add a deletion motif.
- * @param kmer motif to add
- */
- public void addDeletionMotifs(String kmer) {
- addMotifs(deletionMotifs, kmer);
- }
-
- /**
- * Add a substitution motif
- * @param kmer motif to add
- */
- public void addSubstitutionMotifs(String kmer) {
- addMotifs(substitutionMotifs, kmer);
- }
-
- /**
- * Output motif counts to screen (debugging).
- * @param motif KmerMotifStatistic object to get counts from
- */
- private void outputMotifCounts(KmerMotifStatistic[] motif) {
- for (int k=3; k<=5; k++) {
- System.out.println("k="+k);
- motif[k-3].outputMotifCounts();
- }
- }
-
- /**
- * Output motif counts for all types (debugging).
- */
- public void outputAllMotifCounts() {
- System.out.println("Outputtng motif data");
- System.out.println("Insertions");
- outputMotifCounts(insertionMotifs);
- System.out.println("Deletions");
- outputMotifCounts(deletionMotifs);
- System.out.println("Substitutions");
- outputMotifCounts(substitutionMotifs);
- }
-
- /**
- * Get a sorted list of insertion motif counts at given kmer size.
- * @param k kmer size required
- * @return ArrayList of counts.
- */
- public ArrayList<Map.Entry<String, Integer>> getSortedInsertionMotifCounts(int k) {
- return insertionMotifs[k-3].getSortedMotifCounts();
- }
-
- /**
- * Get a sorted list of deletion motif counts at given kmer size.
- * @param k kmer size required
- * @return ArrayList of counts.
- */
- public ArrayList<Map.Entry<String, Integer>> getSortedDeletionMotifCounts(int k) {
- return deletionMotifs[k-3].getSortedMotifCounts();
- }
-
- /**
- * Get a sorted list of substitution motif counts at given kmer size.
- * @param k kmer size required
- * @return ArrayList of counts.
- */
- public ArrayList<Map.Entry<String, Integer>> getSortedSubstitutionMotifCounts(int k) {
- return substitutionMotifs[k-3].getSortedMotifCounts();
- }
-
- /**
- * Get a sorted list of insertion motif percentages at given kmer size.
- * @param k kmer size required
- * @return ArrayList of counts.
- */
- public ArrayList<Map.Entry<String, Double>> getSortedInsertionMotifPercentages(int k) {
- return insertionMotifs[k-3].getSortedMotifPercentages();
- }
-
- /**
- * Get a sorted list of deletion motif percentages at given kmer size.
- * @param k kmer size required
- * @return ArrayList of counts.
- */
- public ArrayList<Map.Entry<String, Double>> getSortedDeletionMotifPercentages(int k) {
- return deletionMotifs[k-3].getSortedMotifPercentages();
- }
-
- /**
- * Get a sorted list of substitution motif percentages at given kmer size.
- * @param k kmer size required
- * @return ArrayList of counts.
- */
- public ArrayList<Map.Entry<String, Double>> getSortedSubstitutionMotifPercentages(int k) {
- return substitutionMotifs[k-3].getSortedMotifPercentages();
- }
-
- /**
- * Write insertion logo image (via KmerMotifStatistic object)
- * @param type either TYPE_TOP or TYPE_BOTTOM (Top 10 or bottom 10)
- * @param filename image filename
- * @param k kmer size
- */
- public void writeInsertionLogoImage(int type, String filename, int k) {
- insertionMotifs[k-3].writeLogoImage(type, filename);
- }
-
- /**
- * Write deletion logo image (via KmerMotifStatistic object)
- * @param type either TYPE_TOP or TYPE_BOTTOM (Top 10 or bottom 10)
- * @param filename image filename
- * @param k kmer size
- */
- public void writeDeletionLogoImage(int type, String filename, int k) {
- deletionMotifs[k-3].writeLogoImage(type, filename);
- }
-
- /**
- * Write substitution logo image (via KmerMotifStatistic object)
- * @param type either TYPE_TOP or TYPE_BOTTOM (Top 10 or bottom 10)
- * @param filename image filename
- * @param k kmer size
- */
- public void writeSubstitutionLogoImage(int type, String filename, int k) {
- substitutionMotifs[k-3].writeLogoImage(type, filename);
- }
-
- /**
- * Get total count of motifs seen
- * @param errorType type of error - TYPE_INSERTION etc.
- * @param k kmer size
- * @return count
- */
- public int getTotalMotifCounts(int errorType, int k) {
- int count = 0;
-
- switch(errorType) {
- case NanoOKOptions.TYPE_INSERTION:
- count = insertionMotifs[k-3].getTotalMotifCount();
- break;
- case NanoOKOptions.TYPE_DELETION:
- count = deletionMotifs[k-3].getTotalMotifCount();
- break;
- case NanoOKOptions.TYPE_SUBSTITUTION:
- count = substitutionMotifs[k-3].getTotalMotifCount();
- break;
- default:
- System.out.println("Error: bad error type in getTotalMotifCounts");
- System.exit(1);
- break;
- }
-
- return count;
- }
-}
diff --git a/src/nanook/NanoOK.java b/src/nanook/NanoOK.java
deleted file mode 100644
index 3150627..0000000
--- a/src/nanook/NanoOK.java
+++ /dev/null
@@ -1,428 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett (richard.leggett at earlham.ac.uk)
- *
- * Copyright 2015-17 Earlham Institute
- */
-
-package nanook;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.ObjectOutputStream;
-import java.util.ArrayList;
-import java.util.Locale;
-import java.util.Set;
-import java.util.concurrent.ThreadPoolExecutor;
-//import ncsa.hdf.object.FileFormat;
-//import ncsa.hdf.object.h5.H5File;
-
-/**
- * Entry class for tool.
- *
- * @author Richard Leggett
- */
-public class NanoOK {
- public final static String VERSION_STRING = "v1.26";
- public final static long SERIAL_VERSION = 3L;
-
- /**
- * Check for program dependencies - R, pdflatex
- */
- public static void checkDependencies() {
- ProcessLogger pl = new ProcessLogger();
- ArrayList<String> response;
- String rVersion = null;
- String pdflatexVersion = null;
- String hVersion = null;
-
- response = pl.checkCommand("Rscript --version");
- if (response != null) {
- for (int i=0; i<response.size(); i++) {
- String s = response.get(i);
- if (s.startsWith("R scripting front-end")) {
- rVersion = s;
- }
- }
- }
-
- if (rVersion == null) {
- System.out.println("*** ERROR: Couldn't find Rscript - is R installed? ***");
- } else {
- System.out.println(rVersion);
- }
-
- response = pl.checkCommand("pdflatex --version");
- if (response != null) {
- for (int i=0; i<response.size(); i++) {
- String s = response.get(i);
- if (s.contains("pdfTeX")) {
- pdflatexVersion = s;
- break;
- }
- }
- }
-
- if (pdflatexVersion == null) {
- System.out.println("*** ERROR: Couldn't find pdflatex - is TeX installed? ***");
- } else {
- System.out.println(pdflatexVersion);
- }
-
- response = pl.checkCommand("h5dump --version");
- if (response != null) {
- for (int i=0; i<response.size(); i++) {
- String s = response.get(i);
- if (s.startsWith("h5dump")) {
- hVersion = s;
- }
- }
- }
-
- if (hVersion == null) {
- System.out.println("*** ERROR: Couldn't find h5dump - is H5 Tools installed? ***");
- } else {
- System.out.println(hVersion);
- }
-
- //try {
- // H5File file = new H5File();
- //} catch (NoClassDefFoundError | UnsatisfiedLinkError e) {
- // e.printStackTrace();
- // System.out.println("");
- // System.out.println("Error: Could not initialise HDF5 classes. Check that the HDF libraries are correctly installed (and pointed to by LD_LIBRARY_PATH or DYLD_LIBRARY_PATH).");
- // System.out.println("Consult HDF documentation and/or NanoOK documentation.");
- // System.out.println("");
- // System.exit(1);
- //}
-
- System.out.println("");
- }
-
- /**
- * Test logo plotting
- */
- public static void testLogo() {
- SequenceLogo logo = new SequenceLogo();
- logo.drawImage();
- logo.saveImage("/Users/leggettr/Desktop/logo.png");
- }
-
- /**
- * Test SequenceReader class
- */
- public static void testSequenceReader() {
- SequenceReader r = new SequenceReader(true);
- r.indexFASTAFile("/Users/leggettr/Documents/Projects/Nanopore/test.fasta", null, true);
- String s = r.getSubSequence("gi|223667766|ref|NZ_DS264586.1|", 0, 499);
- System.out.println("String (0,499) = ["+s+"]");
- s = r.getSubSequence("gi|223667766|ref|NZ_DS264586.1|", 0, 9);
- System.out.println("String (0,9) = ["+s+"]");
- s = r.getSubSequence("gi|223667766|ref|NZ_DS264586.1|", 200, 209);
- System.out.println("String (200,209) = ["+s+"]");
- s = r.getSubSequence("gi|223667766|ref|NZ_DS264586.1|", 200, 214);
- System.out.println("String (200,214) = ["+s+"]");
- }
-
- public static void testSamToLast(NanoOKOptions options, References references) {
- BWAParser parser = new BWAParser(options, references);
- AlignmentsTableFile nonAlignedSummaryFile = new AlignmentsTableFile("atf.txt");
- ReadSetStats readSetStats = new ReadSetStats(options, NanoOKOptions.TYPE_2D);
- options.getReferences().loadReferences();
- parser.parseFile("/Users/leggettr/Desktop/test.fasta.sam", nonAlignedSummaryFile, readSetStats);
- }
-
- /**
- * Test parser
- * @param options
- * @param overallStats
- * @param references
- */
- public static void testParser(NanoOKOptions options, OverallStats overallStats, References references) {
- AlignmentFileParser p = new LastParser(options, references);
- AlignmentsTableFile nonAlignedSummary = new AlignmentsTableFile("blob.txt");
- //p.parseFile("/Users/leggettr/Documents/Projects/Nanopore/N79681_EvenMC_R7_06082014/last/2D/N79681_EvenMC_R7_0608215_5314_1_ch319_file116_strand.fast5_BaseCalled_2D.fasta.maf", nonAlignedSummary, overallStats);
- //System.exit(0);
- }
-
- /**
- * Test HDF5 library
- */
- public static void testHDF(NanoOKOptions options) {
- //ReadExtractorRunnable r = new ReadExtractorRunnable(options, null, null, null);
- //String fastq = r.getFastq("/Users/leggettr/Desktop/TEST12345_ch1_file0.fast5", NanoOKOptions.TYPE_TEMPLATE);
-
- Fast5File f = new Fast5File(options, "/Users/leggettr/Desktop/TEST12345_ch1_file0.fast5");
- Fast5File g = new Fast5File(options, "/Users/leggettr/Documents/Projects/Nanopore/NanoOK_lambda_test/fast5/pass/N79596_Lambda8kbp_LCv4_test_3559_1_ch37_file38_strand.fast5");
- FastAQFile ff = f.getFastq(-1, NanoOKOptions.TYPE_TEMPLATE);
- FastAQFile fg = g.getFastq(-1, NanoOKOptions.TYPE_TEMPLATE);
- if (ff != null) {
- ff.writeFastq("ff.fq");
- }
-
- if (fg != null) {
- fg.writeFastq("fg.fq");
- }
- //f.printGroups();
- System.exit(0);
- }
-
- private static void analyse(NanoOKOptions options) throws InterruptedException {
- OverallStats overallStats = new OverallStats(options);
- options.getReferences().setOverallStats(overallStats);
-
- options.getSampleChecker().checkReadDirectory();
-
- // Load reference data
- options.getReferences().loadReferences();
- options.setReadFormat(options.getParser().getReadFormat());
- options.initialiseAlignmentSummaryFile();
-
- System.out.println("");
-
- // Parse all reads sets
- if (options.doParseAlignments()) {
- ReadLengthsSummaryFile summary = new ReadLengthsSummaryFile(options.getLengthSummaryFilename());
- summary.open(options.getSample());
-
- for (int type = 0; type<3; type++) {
- if (options.isProcessingReadType(type)) {
- System.out.println("Parsing " + NanoOKOptions.getTypeFromInt(type));
- ReadSet readSet = new ReadSet(type, options, overallStats.getStatsByType(type));
- int nReads = readSet.processReads();
-
- if (nReads < 1) {
- System.out.println("Error: unable to find any " + NanoOKOptions.getTypeFromInt(type) + " reads to process.");
- System.out.println("");
- System.exit(1);
- }
-
- int nReadsWithAlignments = readSet.getStats().getNumberOfReadsWithAlignments();
- if (nReadsWithAlignments < 1) {
- System.out.println("");
- System.out.println("Error: unable to find any " + NanoOKOptions.getTypeFromInt(type) + " alignments to process.");
- System.out.println("Common reasons for this:");
- System.out.println("1. Failure to index the reference with the alignment tool, resulting in alignment files of 0 bytes");
- System.out.println("2. Wrong reference specified to the align stage, resulting in no alignments");
- System.out.println("3. When indexing with LAST, the output prefix needs to be the same as the reference FASTA file, minus the .fasta extension");
- System.out.println(" e.g. lastdb -Q 0 referencename referencename.fasta");
- System.out.println("");
- System.exit(1);
- } else if (nReadsWithAlignments < 400) {
- System.out.println("Warning: not many alignments ("+nReadsWithAlignments+") found to process.");
- }
-
- summary.addReadSetStats(overallStats.getStatsByType(type));
- overallStats.getStatsByType(type).closeKmersFile();
- overallStats.getStatsByType(type).writeSubstitutionStats();
- overallStats.getStatsByType(type).writeErrorMotifStats();
-
- int ignoredDuplicates = overallStats.getStatsByType(type).getIgnoredDuplicates();
- if (ignoredDuplicates > 0) {
- System.out.println(ignoredDuplicates + " ignored duplicate read IDs.");
- }
-
- System.out.println("");
-
- }
- }
- summary.close();
-
- // Write files
- System.out.println("Writing analysis files");
- Set<String> ids = options.getReferences().getAllIds();
- int allCount = 3; //ids.size() * 3;
- int counter = 1;
- for (int type=0; type<3; type++) {
- long completed = counter;
- long total = allCount;
- long e = 0;
- long s = NanoOKOptions.PROGRESS_WIDTH;
-
- if (total > 0) {
- e = NanoOKOptions.PROGRESS_WIDTH * completed / total;
- s = NanoOKOptions.PROGRESS_WIDTH - e;
- }
-
- System.out.print("\r[");
- for (int i=0; i<e; i++) {
- System.out.print("=");
- }
- for (int i=0; i<s; i++) {
- System.out.print(" ");
- }
- System.out.print("] " + completed +"/" + total);
- options.getReferences().writeReferenceStatFiles(type);
- options.getReferences().writeReferenceSummary(type);
- counter++;
- }
- System.out.println("");
-
- System.out.println("Writing object");
- try {
- FileOutputStream fos = new FileOutputStream(options.getAnalysisDir() + File.separator + "OverallStats.ser");
- ObjectOutputStream oos = new ObjectOutputStream(fos);
- oos.writeObject(overallStats);
- oos.close();
- } catch (Exception e) {
- System.out.println("Exception trying to write object:");
- e.printStackTrace();
- }
-
- }
-
- // Plot graphs
- if (options.doPlotGraphs()) {
- System.out.println("");
- System.out.println("Plotting graphs");
- RGraphPlotter plotter = new RGraphPlotter(options);
- plotter.plot(false);
- }
-
- // Make report
- if (options.doMakeReport()) {
- System.out.println("");
- System.out.println("Making report");
- SampleReportWriter rw = new SampleReportWriter(options, overallStats);
- rw.writeReport();
-
- if (options.doMakePDF()) {
- System.out.println("");
- System.out.println("Making PDF");
- rw.makePDF();
- }
- }
-
- System.out.println("");
- System.out.println("Done");
- }
-
- private static void extract(NanoOKOptions options) throws InterruptedException {
- ReadExtractor re = new ReadExtractor(options);
- re.createDirectories();
- re.extract();
- }
-
- private static void align(NanoOKOptions options) throws InterruptedException {
- AlignmentFileParser parser = options.getParser();
- parser.checkForIndex(options.getReferenceFile().substring(0, options.getReferenceFile().lastIndexOf('.')));
- ReadAligner aligner = new ReadAligner(options, parser);
- options.setReadFormat(parser.getReadFormat());
- aligner.createDirectories();
- aligner.align();
- }
-
- private static void compare(NanoOKOptions options) throws InterruptedException {
- System.out.println("Comparing");
- SampleComparer comparer = new SampleComparer(options);
- comparer.loadSamples();
- comparer.compareSamples();
-
- options.setReferences(comparer.getSample(0).getStatsByType(0).getOptions().getReferences());
-
- System.out.println("");
- System.out.println("Plotting graphs");
- RGraphPlotter plotter = new RGraphPlotter(options);
- plotter.plot(true);
-
- System.out.println("");
- System.out.println("Making PDF");
- ComparisonReportWriter crw = new ComparisonReportWriter(options, comparer);
- crw.writeReport();
- crw.makePDF();
- }
-
- private static void watch(NanoOKOptions options) throws InterruptedException {
- AlignmentFileParser parser = options.getParser();
- parser.checkForIndex(options.getReferenceFile().substring(0, options.getReferenceFile().lastIndexOf('.')));
- ReadAligner aligner = new ReadAligner(options, parser);
- options.setReadFormat(parser.getReadFormat());
- aligner.createDirectories();
-
- DirectoryWatcher dw = new DirectoryWatcher(options, aligner, parser);
- dw.watch();
- }
-
- private static void process(NanoOKOptions options) throws InterruptedException {
- ReadProcessor rp = new ReadProcessor(options);
- options.makeDirectories();
- options.initialiseReadMerger();
- rp.process();
- }
-
- private static void memoryReport() {
- Runtime runtime = Runtime.getRuntime();
- long mb = 1024 * 1024;
- long totalMem = runtime.totalMemory() / mb;
- long maxMem = runtime.maxMemory() / mb;
- long freeMem = runtime.freeMemory() / mb;
- System.out.println("totalMem: " + totalMem + "Mb");
- System.out.println(" maxMem: " + maxMem + "Mb");
- System.out.println(" freeMem: " + freeMem + "Mb");
- }
-
- /**
- * Entry to tool.
- * @param args command line arguments
- */
- public static void main(String[] args) throws InterruptedException {
- System.out.println("");
- System.out.println("NanoOK " + VERSION_STRING);
- System.out.println("");
- System.out.println("Comments/bugs to: richard.leggett at earlham.ac.uk");
- System.out.println("Follow NanoOK on twitter: @NanoOK_Software");
- System.out.println("");
-
- NanoOKOptions options = new NanoOKOptions();
-
- Locale.setDefault(new Locale("en", "US"));
-
- // Parse command line
- options.parseArgs(args);
-
- // Check dependencies
- System.out.println("");
- System.out.println("Checking dependencies");
- checkDependencies();
-
- //testHDF(options);
- //System.exit(0);
-
- File logsDir = new File(options.getLogsDir());
- if (!logsDir.exists()) {
- logsDir.mkdir();
- }
-
-
- if (options.getRunMode() == NanoOKOptions.MODE_EXTRACT) {
- //extract(options);
- process(options);
- } else if (options.getRunMode() == NanoOKOptions.MODE_ALIGN) {
- //align(options);
- process(options);
- } else if (options.getRunMode() == NanoOKOptions.MODE_ANALYSE) {
- options.checkAnalysisDirectoryStructure();
- analyse(options);
- //scan(options);
- } else if (options.getRunMode() == NanoOKOptions.MODE_COMPARE) {
- compare(options);
- } else if (options.getRunMode() == NanoOKOptions.MODE_WATCH) {
- watch(options);
- } else if (options.getRunMode() == NanoOKOptions.MODE_PROCESS) {
- process(options);
- }
-
- //memoryReport();
-
- options.getLog().close();
-
- options.getThreadExecutor().shutdown();
-
- if (options.getReturnValue() != 0) {
- System.out.println("Exiting with error code");
- System.exit(options.getReturnValue());
- }
- }
-}
diff --git a/src/nanook/NanoOKLog.java b/src/nanook/NanoOKLog.java
deleted file mode 100644
index 16c6290..0000000
--- a/src/nanook/NanoOKLog.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett (richard.leggett at earlham.ac.uk)
- *
- * Copyright 2015-17 Earlham Institute
- */
-
-package nanook;
-
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.io.Serializable;
-import java.util.Calendar;
-import java.util.GregorianCalendar;
-
-/**
- * Logging
- *
- * @author Richard Leggett
- */
-public class NanoOKLog implements Serializable {
- private static final long serialVersionUID = NanoOK.SERIAL_VERSION;
- private transient PrintWriter pw = null;
-
- public NanoOKLog() {
- }
-
- public synchronized void open(String filename) {
- try {
- pw = new PrintWriter(new FileWriter(filename, false));
- } catch (IOException e) {
- System.out.println("NanoOKLog exception");
- e.printStackTrace();
- }
- }
-
- public synchronized void close() {
- if (pw != null) {
- pw.close();
- }
- }
-
- public String getTime() {
- GregorianCalendar timeNow = new GregorianCalendar();
- String s = String.format("%d/%d/%d %02d:%02d:%02d",
- timeNow.get(Calendar.DAY_OF_MONTH),
- timeNow.get(Calendar.MONTH)+1,
- timeNow.get(Calendar.YEAR),
- timeNow.get(Calendar.HOUR_OF_DAY),
- timeNow.get(Calendar.MINUTE),
- timeNow.get(Calendar.SECOND));
- return s;
- }
-
- public synchronized void writeTimeStamp() {
- if (pw != null) {
- }
- }
-
- public synchronized void print(String s) {
- if (pw != null) {
- pw.print(getTime() + " " + s);
- }
- }
-
- public synchronized void println(String s) {
- if (pw != null) {
- pw.println(getTime() + " " + s);
- }
- }
-
- public synchronized PrintWriter getPrintWriter() {
- return pw;
- }
-}
diff --git a/src/nanook/NanoOKOptions.java b/src/nanook/NanoOKOptions.java
deleted file mode 100644
index de8376e..0000000
--- a/src/nanook/NanoOKOptions.java
+++ /dev/null
@@ -1,1389 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett (richard.leggett at earlham.ac.uk)
- *
- * Copyright 2015-17 Earlham Institute
- */
-
-package nanook;
-
-import java.io.*;
-import java.util.ArrayList;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-/**
- * Representation of program options and some global constants.
- *
- * @author Richard Leggett
- */
-public class NanoOKOptions implements Serializable {
- private static final long serialVersionUID = NanoOK.SERIAL_VERSION;
- public final static int MAX_KMER = 20000;
- public final static int MAX_READ_LENGTH = 1000000;
- public final static int MAX_READS = 1000000;
- public final static int MODE_EXTRACT = 1;
- public final static int MODE_ALIGN = 2;
- public final static int MODE_ANALYSE = 3;
- public final static int MODE_COMPARE = 4;
- public final static int MODE_WATCH = 5;
- public final static int MODE_PROCESS = 6;
- public final static int FASTA = 1;
- public final static int FASTQ = 2;
- public final static int TYPE_TEMPLATE = 0;
- public final static int TYPE_COMPLEMENT = 1;
- public final static int TYPE_2D = 2;
- public final static int TYPE_ALL = -1;
- public final static int TYPE_INSERTION = 0;
- public final static int TYPE_DELETION = 1;
- public final static int TYPE_SUBSTITUTION = 2;
- public final static int READTYPE_COMBINED = 0;
- public final static int READTYPE_PASS = 1;
- public final static int READTYPE_FAIL = 2;
- public final static int MIN_ALIGNMENTS = 10;
- public final static int PROGRESS_WIDTH = 50;
- private References references = new References(this);
- private String referenceFile=null;
- private String sampleDirectory = null;
- private String sampleName = null;
- private String scriptsDir="/Users/leggettr/Documents/github/nanotools/scripts";
- private String aligner="last";
- private String alignerParams="";
- private String scheduler="system";
- private String sampleList = null;
- private String comparisonDir = null;
- private String bacteriaPath = null;
- private String ntPath = null;
- private String cardPath = null;
- private String processFile = null;
- private int coverageBinSize = 100;
- private boolean processPassReads = true;
- private boolean processFailReads = true;
- private boolean parseAlignments = true;
- private boolean plotGraphs = true;
- private boolean makeReport = true;
- private boolean makePDF = true;
- private int maxReads = 0;
- private boolean process2DReads = true;
- private boolean processTemplateReads = true;
- private boolean processComplementReads = true;
- private boolean fixIDs = false;
- private boolean fixRandom = false;
- private boolean doKmerCounting = true;
- private boolean showAlignerCommand = false;
- private boolean extractingReads = false;
- private boolean aligningReads = false;
- private boolean parsingReads = false;
- private boolean blastingReads = false;
- private boolean mergeFastaFiles = false;
- private boolean force = false;
- private double minQForPass = -1;
- private int runMode = 0;
- private int readFormat = FASTA;
- private int numThreads = 1;
- private int fileWatcherTimeout = 10;
- private String jobQueue = "";
- private NanoOKLog logFile = new NanoOKLog();
- private String imageFormat = "pdf";
- private int specifiedType = TYPE_2D;
- private String readsDir = "fast5";
- private int returnValue = 0;
- private int basecallIndex = -1;
- private boolean outputFast5Path = true;
- private int readsPerBlast = 500;
- private boolean clearLogsOnStart = true;
- private transient WatcherLog watcherReadLog = new WatcherLog(this);
- private transient WatcherLog watcherCardFileLog = new WatcherLog(this);
- private transient WatcherLog watcherntFileLog = new WatcherLog(this);
- private transient WatcherLog watcherCardCommandLog = new WatcherLog(this);
- private transient WatcherLog watcherntCommandLog = new WatcherLog(this);
- private transient BlastMerger mergerCardPass = new BlastMerger(this);
- private transient BlastMerger mergerntPass = new BlastMerger(this);
- private transient BlastMerger mergerCardFail = new BlastMerger(this);
- private transient BlastMerger mergerntFail = new BlastMerger(this);
- private transient MergedFastAQFile mergedPass2D;
- private transient MergedFastAQFile mergedPass1D;
- private transient MergedFastAQFile mergedFail1D;
- private transient MergedFastAQFile mergedFail2D;
- private transient ThreadPoolExecutor executor;
- private transient BlastHandler[][] blastHandlers = new BlastHandler[3][2];
- private transient ArrayList<String> blastProcesses = new ArrayList<String>();
- private int fileCounterOffset = 0;
- private transient ReadFileMerger readFileMerger;
- private transient SampleChecker sampleChecker = new SampleChecker(this);
-
- public NanoOKOptions() {
- String value = System.getenv("NANOOK_DIR");
-
- if (value != null) {
- scriptsDir = value + File.separator + "bin";
- } else {
- System.out.println("*** WARNING: You should set NANOOK_DIR. Default value unlikely to work. ***");
- System.out.println("");
- }
-
- System.out.println("Scripts dir: "+scriptsDir);
- }
-
- public References getReferences() {
- return references;
- }
-
- public void setReferences(References r) {
- references = r;
- }
-
- public void setReturnValue(int r) {
- returnValue = r;
- }
-
- public int getReturnValue() {
- return returnValue;
- }
-
- /**
- * Parse command line arguments.
- * @param args array of command line arguments
- */
- public void parseArgs(String[] args) {
- int i=0;
-
- if (args.length <= 1) {
- System.out.println("");
- System.out.println("Syntax nanook <extract|align|analyse|compare|process> [options]");
- System.out.println("");
- System.out.println("extract options:");
- System.out.println(" -s|-sample <dir> specifies sample directory");
- System.out.println(" -f|-reads specifies alternative dir for FAST5 files (default fast5)");
- System.out.println(" Can be absolute (beginning with /) or relative");
- System.out.println(" e.g. -f reads/downloads if replicating Metrichor file structure");
- System.out.println(" -a|-fasta specifies FASTA file extraction (default)");
- System.out.println(" -q|-fastq specifies FASTQ file extraction");
- System.out.println(" -basecallindex specifies the index of the analysis (default: latest)");
- //System.out.println(" -printpath to output FAST5 path in FASTA read header");
- System.out.println(" -mergereads to generate merged FASTA files in addition to single read files");
- System.out.println(" -minquality <value> to set the minimum quality for a 'pass' read");
- System.out.println("");
- System.out.println("align options:");
- System.out.println(" -s|-sample <dir> specifies sample directory");
- System.out.println(" -r|-reference <path> specifies path to reference database");
- System.out.println(" -aligner <name> specifies the aligner (default last)");
- System.out.println(" -alignerparams <params> specifies paramters to the aligner");
- System.out.println(" -showaligns echoes aligner commands to screen");
- System.out.println("");
- System.out.println("analyse options:");
- System.out.println(" -s|-sample <dir> specifies sample directory");
- System.out.println(" -r|-reference <path> specifies path to reference database");
- System.out.println(" -aligner <name> specifies the aligner (default last)");
- System.out.println(" -coveragebin <int> specifies coverage bin size (default 100)");
- System.out.println(" -bitmaps to output bitmap PNG graphs instead of PDF");
- System.out.println("");
- System.out.println("compare options:");
- System.out.println(" -l|-samplelist <file> specifies a sample list file");
- System.out.println(" -o|-outputdir <directory> specifies an output directory");
- System.out.println(" -type <2d|template|complement> specifies an output directory");
- System.out.println("");
- System.out.println("process options:");
- System.out.println(" -process <file> specifies a process file");
- System.out.println("");
- //System.out.println("Sample type options:");
- //System.out.println(" -barcoding if reads are barcoded and sorted into subdirs");
- //System.out.println(" -batchdirs if using MinKNOW 1.4.2 or above with separate batch_ directories");
- //System.out.println("");
- System.out.println("Read type options:");
- System.out.println(" -passonly to analyse only pass reads");
- System.out.println(" -failonly to analyse only fail reads");
- System.out.println(" -2donly to analyse only 2D reads");
- System.out.println(" -templateonly to analyse just Template reads");
- System.out.println(" -complementonly to analyse just Complement reads");
- System.out.println("");
- System.out.println("Other options:");
- System.out.println(" -t|-numthreads <number> specifies the number of threads to use (default 1)");
- System.out.println(" -log <filename> enables debug logging to file");
- System.out.println(" -force to force NanoOK to ignore warnings");
- System.out.println(" -timeout to set the number of seconds before giving up waiting for new reads (default 2)");
- System.out.println("");
- System.out.println("Valid aligners: last, bwa, blasr, marginalign, graphmap");
- System.out.println("");
- System.exit(0);
- }
-
- parseAlignments = true;
- plotGraphs = true;
- makeReport = true;
-
- if (args[i].equals("extract")) {
- runMode = MODE_EXTRACT;
- extractingReads = true;
- aligningReads = false;
- parsingReads = false;
- blastingReads = false;
- mergeFastaFiles = false;
- fileWatcherTimeout = 2;
- } else if (args[i].equals("align")) {
- runMode = MODE_ALIGN;
- extractingReads = false;
- aligningReads = true;
- parsingReads = false;
- blastingReads = false;
- mergeFastaFiles = false;
- fileWatcherTimeout = 2;
- } else if (args[i].equals("analyse") || args[i].equals("analyze")) {
- runMode = MODE_ANALYSE;
- extractingReads = false;
- aligningReads = false;
- parsingReads = true;
- blastingReads = false;
- mergeFastaFiles = false;
- fileWatcherTimeout = 2;
- } else if (args[i].equals("compare")) {
- runMode = MODE_COMPARE;
- } else if (args[i].equals("watch")) {
- runMode = MODE_WATCH;
- } else if ((args[i].equals("process")) || (args[i].equals("scan"))) {
- runMode = MODE_PROCESS;
- } else {
- System.out.println("Unknonwn mode " + args[i] + " - must be extract, align or analyse");
- System.exit(1);
- }
- i++;
-
- while (i < (args.length)) {
- if (args[i].equalsIgnoreCase("-coveragebin")) {
- coverageBinSize = Integer.parseInt(args[i+1]);
- i+=2;
- } else if (args[i].equalsIgnoreCase("-batchdirs")) {
- System.out.println("-batchdirs option ignore - now detected automatically.");
- i++;
- } else if (args[i].equalsIgnoreCase("-timeout")) {
- fileWatcherTimeout = Integer.parseInt(args[i+1]);
- i+=2;
- } else if (args[i].equalsIgnoreCase("-fileoffset")) {
- fileCounterOffset = Integer.parseInt(args[i+1]);
- i+=2;
- } else if (args[i].equalsIgnoreCase("-reference") || args[i].equalsIgnoreCase("-r")) {
- referenceFile = args[i+1];
- i+=2;
- } else if (args[i].equalsIgnoreCase("-process")) {
- processFile = args[i+1];
- readProcessFile();
- i+=2;
- } else if (args[i].equalsIgnoreCase("-bacteria")) {
- bacteriaPath = args[i+1];
- i+=2;
- } else if (args[i].equalsIgnoreCase("-nt")) {
- ntPath = args[i+1];
- i+=2;
- } else if (args[i].equalsIgnoreCase("-card")) {
- cardPath = args[i+1];
- i+=2;
- } else if (args[i].equalsIgnoreCase("-force")) {
- force = true;
- i++;
- } else if (args[i].equalsIgnoreCase("-sample") | args[i].equalsIgnoreCase("-s")) {
- sampleDirectory = args[i+1];
- i+=2;
- } else if (args[i].equalsIgnoreCase("-samplelist") | args[i].equalsIgnoreCase("-l")) {
- sampleList = args[i+1];
- i+=2;
- } else if (args[i].equalsIgnoreCase("-outputdir") | args[i].equalsIgnoreCase("-o")) {
- comparisonDir = args[i+1];
- i+=2;
- } else if (args[i].equalsIgnoreCase("-reads") | args[i].equalsIgnoreCase("-f")) {
- readsDir = args[i+1];
- if (readsDir.endsWith("/")) {
- readsDir = readsDir.substring(0, readsDir.length()-1);
- }
- i+=2;
- } else if (args[i].equalsIgnoreCase("-maxreads")) {
- maxReads = Integer.parseInt(args[i+1]);
- i+=2;
- } else if (args[i].equalsIgnoreCase("-log")) {
- logFile.open(args[i+1]);
- i+=2;
- } else if (args[i].equalsIgnoreCase("-type")) {
- if (args[i+1].equalsIgnoreCase("template")) {
- specifiedType = TYPE_TEMPLATE;
- } else if (args[i+1].equalsIgnoreCase("complement")) {
- specifiedType = TYPE_COMPLEMENT;
- } else if (args[i+1].equalsIgnoreCase("2d")) {
- specifiedType = TYPE_2D;
- }
- i+=2;
- } else if (args[i].equalsIgnoreCase("-nofail") || args[i].equalsIgnoreCase("-passonly")) {
- processPassReads = true;
- processFailReads = false;
- i++;
- } else if (args[i].equalsIgnoreCase("-nopass") || args[i].equalsIgnoreCase("-failonly")) {
- processPassReads = false;
- processFailReads = true;
- i++;
- } else if (args[i].equalsIgnoreCase("-fasta") || args[i].equalsIgnoreCase("-a")) {
- //if (runMode == MODE_EXTRACT) {
- readFormat = FASTA;
- //}
- i++;
- } else if (args[i].equalsIgnoreCase("-fastq") || args[i].equalsIgnoreCase("-q")) {
- //if (runMode == MODE_EXTRACT) {
- readFormat = FASTQ;
- //}
- i++;
- } else if (args[i].equalsIgnoreCase("-2donly")) {
- process2DReads = true;
- processTemplateReads = false;
- processComplementReads = false;
- i++;
- } else if ((args[i].equalsIgnoreCase("-1d")) ||
- (args[i].equalsIgnoreCase("-templateonly")) ) {
- process2DReads = false;
- processTemplateReads = true;
- processComplementReads = false;
- i++;
- } else if (args[i].equalsIgnoreCase("-complementonly")) {
- process2DReads = false;
- processTemplateReads = false;
- processComplementReads = true;
- i++;
- } else if (args[i].equalsIgnoreCase("-printpath")) {
- outputFast5Path = true;
- i++;
- } else if (args[i].equalsIgnoreCase("-bitmaps")) {
- imageFormat = "png";
- i++;
- } else if (args[i].equalsIgnoreCase("-fixids")) {
- fixIDs = true;
- i++;
- } else if (args[i].equalsIgnoreCase("-showaligns")) {
- showAlignerCommand = true;
- i++;
- } else if (args[i].equalsIgnoreCase("-deterministic")) {
- fixRandom = true;
- i++;
- } else if (args[i].equalsIgnoreCase("-aligner")) {
- aligner = args[i+1];
- i+=2;
- } else if (args[i].equalsIgnoreCase("-alignerparams")) {
- alignerParams = args[i+1];
- System.out.println("Alignment parameters: "+alignerParams);
- i+=2;
- } else if (args[i].equalsIgnoreCase("-scheduler")) {
- scheduler = args[i+1];
- i+=2;
- } else if (args[i].equalsIgnoreCase("-queue")) {
- jobQueue = args[i+1];
- i+=2;
- } else if (args[i].equalsIgnoreCase("-readsperblast")) {
- readsPerBlast = Integer.parseInt(args[i+1]);
- i+=2;
- } else if (args[i].equalsIgnoreCase("-basecallindex")) {
- basecallIndex = Integer.parseInt(args[i+1]);
- i+=2;
- } else if (args[i].equalsIgnoreCase("-numthreads") || args[i].equalsIgnoreCase("-t")) {
- numThreads = Integer.parseInt(args[i+1]);
- i+=2;
- } else if (args[i].equalsIgnoreCase("-subdirs") || args[i].equalsIgnoreCase("-barcoding")) {
- System.out.println("-barcoding option ignore - now detected automatically.");
- i++;
- } else if (args[i].equalsIgnoreCase("-keeplogs")) {
- clearLogsOnStart = false;
- i++;
- } else if (args[i].equalsIgnoreCase("-mergereads")) {
- mergeFastaFiles = true;
- i++;
- } else if (args[i].equalsIgnoreCase("-minquality")) {
- minQForPass = Double.parseDouble(args[i+1]);
- i+=2;
- } else {
- System.out.println("Unknown parameter: " + args[i]);
- System.exit(0);
- }
- }
-
- if ((runMode == MODE_ALIGN) || (runMode == MODE_ANALYSE)) {
- if (referenceFile == null) {
- System.out.println("Error: You must specify a reference");
- System.exit(1);
- }
- if (!referenceFile.endsWith(".fa") && !referenceFile.endsWith(".fasta")) {
- System.out.println("Error: reference must specify a .fa or .fasta file");
- System.exit(1);
- }
- }
-
- if (runMode == MODE_PROCESS) {
- if (processFile == null) {
- System.out.println("Error: you must specify a process file");
- System.exit(1);
- }
- }
-
- if (runMode == MODE_COMPARE) {
- if (comparisonDir == null) {
- System.out.println("Error: you must specify an output dir for the comparison");
- System.exit(1);
- } else {
- checkAndMakeComparisonDirs();
- }
- } else {
- if (sampleDirectory == null) {
- System.out.println("Error: You must specify a sample");
- System.exit(1);
- } else {
- File s = new File(sampleDirectory);
- if (!s.exists()) {
- System.out.println("Error: sample directory doesn't exist");
- System.exit(1);
- }
-
- if (!s.isDirectory()) {
- System.out.println("Error: sample doesn't point to a directory");
- System.exit(1);
- }
-
- sampleDirectory = s.getAbsolutePath();
-
- sampleName = s.getName();
- }
- }
-
- initialiseBlastHandlers();
-
- System.out.println("Number of cores: "+Runtime.getRuntime().availableProcessors());
-
- executor = new ThreadPoolExecutor(numThreads, numThreads, 10, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>());
- }
-
- public String getAligner() {
- return aligner;
- }
-
- public String getAlignerParams() {
- return alignerParams;
- }
-
- public void setReadFormat(int f) {
- readFormat = f;
- //System.out.println("Read format "+f);
- }
-
- /**
- * Get sample name.
- * @return name String
- */
- public String getSample() {
- return sampleName;
- }
-
- /**
- * Get name of references file.
- * @return filename String
- */
- public String getReferenceFile() {
- return referenceFile;
- }
-
- /**
- * Get coverage graph bin size.
- * @return bin size
- */
- public int getCoverageBinSize() {
- return coverageBinSize;
- }
-
- /**
- * Get a type string (Template, Complement, 2D) from an integer.
- * @param n integer to convert
- * @return type String
- */
- public static String getTypeFromInt(int n) {
- String typeString;
-
- switch(n) {
- case TYPE_TEMPLATE: typeString = "Template"; break;
- case TYPE_COMPLEMENT: typeString = "Complement"; break;
- case TYPE_2D: typeString = "2D"; break;
- default: typeString = "Unknown"; break;
- }
-
- return typeString;
- }
-
- public static String getPassFailFromInt(int n) {
- String typeString;
-
- switch(n) {
- case READTYPE_PASS: typeString = "pass"; break;
- case READTYPE_FAIL: typeString = "fail"; break;
- default: typeString = "Unknown"; break;
- }
-
- return typeString;
- }
-
- /**
- * Get an error type string (Insertion, Deletion, Substitution) from an integer.
- * @param n error type integer
- * @return type String
- */
- public static String getErrorTypeFromInt(int n) {
- String typeString;
-
- switch(n) {
- case TYPE_INSERTION: typeString = "Insertion"; break;
- case TYPE_DELETION: typeString = "Deletion"; break;
- case TYPE_SUBSTITUTION: typeString = "Substitution"; break;
- default: typeString = "Unknown"; break;
- }
-
- return typeString;
- }
-
- /**
- * Check if various required directories exist and create if not.
- */
- public void checkAnalysisDirectoryStructure() {
- File analysisDir = new File(getAnalysisDir());
- File unalignedAnalysisDir = new File(getAnalysisDir()+File.separator+"Unaligned");
- File graphsDir = new File(getGraphsDir());
- File motifsDir = new File(getGraphsDir() + File.separator + "motifs");
- File latexDir = new File(getLatexDir());
-
- if (!analysisDir.exists()) {
- analysisDir.mkdir();
- }
-
- if (!unalignedAnalysisDir.exists()) {
- unalignedAnalysisDir.mkdir();
- }
-
- if (!graphsDir.exists()) {
- graphsDir.mkdir();
- }
-
- if (!motifsDir.exists()) {
- motifsDir.mkdir();
- }
-
- if (!latexDir.exists()) {
- latexDir.mkdir();
- }
- }
-
- public void checkAndMakeComparisonDirs() {
- File f = new File(comparisonDir);
- if (!f.exists()) {
- f.mkdir();
- }
-
- f = new File(this.getGraphsDir());
- if (!f.exists()) {
- f.mkdir();
- }
-
- f = new File(this.getLatexDir());
- if (!f.exists()) {
- f.mkdir();
- }
-
- f = new File(this.getLogsDir());
- if (!f.exists()) {
- f.mkdir();
- }
-
- f = new File(this.getLogsDir()+File.separator+"R");
- if (!f.exists()) {
- f.mkdir();
- }
- }
-
- /**
- * Check if an analysis reference directory exists and make if not.
- * @param reference name of reference
- */
- public void checkAndMakeReferenceAnalysisDir(String reference) {
- File analysisDir = new File(getAnalysisDir() + File.separator + reference);
- File graphsDir = new File(getGraphsDir() + File.separator + reference);
-
- if (!analysisDir.exists()) {
- analysisDir.mkdir();
- }
- if (!graphsDir.exists()) {
- graphsDir.mkdir();
- }
-
- }
-
- /**
- * Create a new alignment summary file.
- */
- public void initialiseAlignmentSummaryFile() {
- try {
- PrintWriter pw = new PrintWriter(new FileWriter(this.getAlignmentSummaryFilename()));
- pw.close();
- } catch (IOException e) {
- System.out.println("initialiseAlignmentSummaryFile exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Get filename of alignment summary file.
- * @return filename String
- */
- public String getAlignmentSummaryFilename() {
- return getAnalysisDir() + File.separator + "all_summary.txt";
- }
-
- /**
- * Get filename of length summary file.
- * @return filename String
- */
- public String getLengthSummaryFilename() {
- return getAnalysisDir() + File.separator + "length_summary.txt";
- }
-
- /**
- * Get scripts directory.
- * @return directory name as String
- */
- public String getScriptsDir() {
- return scriptsDir;
- }
-
- public String getSampleDirectory() {
- return sampleDirectory;
- }
-
- /**
- * Get graphs directory.
- * @return directory name as String
- */
- public String getGraphsDir() {
- if (runMode == MODE_COMPARE) {
- return comparisonDir + File.separator + "graphs";
- } else {
- return sampleDirectory + File.separator + "graphs" + getAnalysisSuffix();
- }
- }
-
- public String getFastaDir() {
- return sampleDirectory + File.separator + "fasta";
- }
-
- public String getFastqDir() {
- return sampleDirectory + File.separator + "fastq";
- }
-
- public String getFast5Dir() {
- // Check for full path
- if ((readsDir.startsWith("/")) || (readsDir.startsWith("~")) || (readsDir.startsWith("."))) {
- return readsDir;
- } else {
- return sampleDirectory + File.separator + readsDir;
- }
- }
-
- /**
- * Get FASTA directory.
- * @return directory name as String
- */
- public String getReadDir() {
- String dir;
-
- if (readFormat == FASTQ) {
- dir = getFastqDir();
- } else {
- dir = getFastaDir();
- }
-
- return dir;
- }
-
- public String getExpectedReadFormat() {
- String format;
-
- if (readFormat == FASTQ) {
- format = "FASTQ";
- } else {
- format = "FASTA";
- }
-
- return format;
- }
-
- /**
- * Get LAST directory.
- * @return directory name as String
- */
- public String getAlignerDir() {
- return sampleDirectory + File.separator + aligner;
- }
-
- /**
- * Get LAST directory.
- * @return directory name as String
- */
- public String getParserDir() {
- return sampleDirectory + File.separator + aligner+"_parse";
- }
-
- /**
- * Get LaTeX directory.
- * @return directory name as String
- */
- public String getLatexDir() {
- if (runMode == MODE_COMPARE) {
- return comparisonDir + File.separator + "latex" + getAnalysisSuffix();
- } else {
- return sampleDirectory + File.separator + "latex" + getAnalysisSuffix();
- }
- }
-
- /**
- * Get logs directory.
- * @return directory name as String
- */
- public String getLogsDir() {
- if (runMode == MODE_COMPARE) {
- return comparisonDir + File.separator + "logs";
- } else {
- return sampleDirectory + File.separator + "logs";
- }
- }
-
- //public boolean isPassFailFast5Dir() {
- // File passDir = new File(getFast5Dir() + File.separator + "pass");
- // File failDir = new File(getFast5Dir() + File.separator + "fail");
- // boolean rc = false;
-
- // if (((passDir.exists() && passDir.isDirectory()) || (failDir.exists() && failDir.isDirectory()))) {
- // rc = true;
- // }
-
- // return rc;
- //}
-
-
-
- //public boolean isPassFailReadDir() {
- // File passDir = new File(getReadDir() + File.separator + "pass");
- // File failDir = new File(getReadDir() + File.separator + "pass");
- // boolean rc = false;
- //
- // if (((passDir.exists() && passDir.isDirectory()) || failDir.exists() && failDir.isDirectory())) {
- // rc = true;
- // }
- //
- //
- //
- // return rc;
- //}
-
- public String getAnalysisSuffix() {
- String s = new String("_"+aligner);
- if (processPassReads && processFailReads) {
- s += "_passfail";
- } else if (processPassReads) {
- s += "_passonly";
- } else if (processFailReads) {
- s += "_failonly";
- }
-
- if (!processTemplateReads && !processComplementReads) {
- s += "_2donly";
- }
-
- return s;
- }
-
-
- /**
- * Get analysis directory.
- * @return directory name as String
- */
- public String getAnalysisDir() {
- return sampleDirectory + File.separator + "analysis" + getAnalysisSuffix();
- }
-
- /**
- * Get LaTeX filename.
- * @return filename as String
- */
- public String getTexFilename() {
- return sampleDirectory + File.separator + "latex" + getAnalysisSuffix() + File.separator + sampleName + ".tex";
- }
-
- /**
- * Check if processing "pass" reads.
- * @return true to process
- */
- public boolean isProcessingPassReads() {
- return processPassReads;
- }
-
- /**
- * Check if processing "fail" reads.
- * @return true to process
- */
- public boolean isProcessingFailReads() {
- return processFailReads;
- }
-
- public boolean isProcessingComplementReads() {
- return processComplementReads;
- }
-
- public boolean isProcessingTemplateReads() {
- return processTemplateReads;
- }
-
- public boolean isProcessing2DReads() {
- return process2DReads;
- }
-
- public boolean isProcessingReadType(int type) {
- boolean r = false;
-
- switch(type) {
- case TYPE_ALL:
- r = true;
- break;
- case TYPE_TEMPLATE:
- r = processTemplateReads;
- break;
- case TYPE_COMPLEMENT:
- r = processComplementReads;
- break;
- case TYPE_2D:
- r = process2DReads;
- break;
- }
-
- return r;
- }
-
- public int getNumberOfTypes() {
- int t = 0;
- if (processTemplateReads) t++;
- if (processComplementReads) t++;
- if (process2DReads) t++;
- return t;
- }
-
- /**
- * Check if to parse alignments or not
- * @return true to parse
- */
- public boolean doParseAlignments() {
- return parseAlignments;
- }
-
- /**
- * Check if to plot graphs or not
- * @return true to plot
- */
- public boolean doPlotGraphs() {
- return plotGraphs;
- }
-
- /**
- * Check if to make report or not
- * @return true to make report
- */
- public boolean doMakeReport() {
- return makeReport;
- }
-
- /**
- * Check if to make report or not
- * @return true to make report
- */
- public boolean doMakePDF() {
- return makePDF;
- }
-
- /**
- * Get maximum number of reads (used for debugging)
- * @return maximum number of reads
- */
- public int getMaxReads() {
- return maxReads;
- }
-
- public int getReadFormat() {
- return readFormat;
- }
-
- public int getRunMode() {
- return runMode;
- }
-
- public boolean fixIDs() {
- return fixIDs;
- }
-
- public boolean fixRandom() {
- return fixRandom;
- }
-
- public String getScheduler() {
- return scheduler;
- }
-
- public int getNumberOfThreads() {
- return numThreads;
- }
-
- public String getQueue() {
- return jobQueue;
- }
-
- public NanoOKLog getLog() {
- return logFile;
- }
-
- public boolean isBarcoded() {
- return sampleChecker.usingBarcodes();
- }
-
- /**
- * Get the right parser
- * @param options
- * @return
- */
- public AlignmentFileParser getParser() {
- AlignmentFileParser parser = null;
-
- switch(aligner) {
- case "last":
- parser = new LastParser(this, references);
- break;
- case "bwa":
- parser = new BWAParser(this, references);
- break;
- case "blasr":
- parser = new BLASRParser(this, references);
- break;
- case "marginalign":
- parser = new MarginAlignParser(this, references);
- break;
- case "graphmap":
- parser = new GraphMapParser(this, references);
- break;
- default:
- System.out.println("Aligner unknown!");
- System.out.println("");
- System.exit(1);
- break;
- }
-
- if (alignerParams != "") {
- parser.setAlignmentParams(alignerParams);
- }
-
- return parser;
- }
-
- public boolean doKmerCounting() {
- return doKmerCounting;
- }
-
- public String getImageFormat() {
- return imageFormat;
- }
-
- public String getSampleList() {
- return sampleList;
- }
-
- public String getComparisonDir() {
- return comparisonDir;
- }
-
- public int getSpecifiedType() {
- return specifiedType;
- }
-
- public boolean showAlignerCommand() {
- return showAlignerCommand;
- }
-
- public int getBasecallIndex() {
- return basecallIndex;
- }
-
- public boolean outputFast5Path() {
- return outputFast5Path;
- }
-
- public String getBacteriaPath() {
- if (bacteriaPath == null) {
- System.out.println("Error: no nt path set.\n");
- System.exit(1);
- }
-
- return bacteriaPath;
- }
-
- public String getntPath() {
- if (ntPath == null) {
- System.out.println("Error: no nt path set.\n");
- System.exit(1);
- }
-
- return ntPath;
- }
-
- public String getCardPath() {
- if (cardPath == null) {
- System.out.println("Error: no CARD path set.\n");
- System.exit(1);
- }
-
- return cardPath;
- }
-
- public WatcherLog getWatcherReadLog() {
- return watcherReadLog;
- }
-
- public WatcherLog getWatcherCardFileLog() {
- return watcherCardFileLog;
- }
-
- public WatcherLog getWatcherCardCommandLog() {
- return watcherCardCommandLog;
- }
-
- public WatcherLog getWatcherntFileLog() {
- return watcherntFileLog;
- }
-
- public WatcherLog getWatcherntCommandLog() {
- return watcherntCommandLog;
- }
-
- public BlastMerger getMergerCardPass() {
- return mergerCardPass;
- }
-
- public BlastMerger getMergerntPass() {
- return mergerntPass;
- }
-
- public BlastMerger getMergerCardFail() {
- return mergerCardFail;
- }
-
- public BlastMerger getMergerntFail() {
- return mergerntFail;
- }
-
-
- public boolean clearLogsOnStart() {
- return clearLogsOnStart;
- }
-
- public void openMergedFile(String filename, int type, int pf) {
- if (pf == NanoOKOptions.READTYPE_PASS) {
- if (type == NanoOKOptions.TYPE_TEMPLATE) {
- mergedPass1D = new MergedFastAQFile(this, filename);
- } else if (type == NanoOKOptions.TYPE_2D) {
- mergedPass2D = new MergedFastAQFile(this, filename);
- }
- } else if (pf == NanoOKOptions.READTYPE_FAIL) {
- if (type == NanoOKOptions.TYPE_TEMPLATE) {
- mergedFail1D = new MergedFastAQFile(this, filename);
- } else if (type == NanoOKOptions.TYPE_2D) {
- mergedFail2D = new MergedFastAQFile(this, filename);
- }
- }
- }
-
- public MergedFastAQFile getMergedFile(int type, int pf) {
- if (pf == NanoOKOptions.READTYPE_PASS) {
- if (type == NanoOKOptions.TYPE_TEMPLATE) {
- return mergedPass1D;
- } else if (type == NanoOKOptions.TYPE_2D) {
- return mergedPass2D;
- }
- } else if (pf == NanoOKOptions.READTYPE_FAIL) {
- if (type == NanoOKOptions.TYPE_TEMPLATE) {
- return mergedFail1D;
- } else if (type == NanoOKOptions.TYPE_2D) {
- return mergedFail2D;
- }
- }
-
- return null;
- }
-
- public int getReadsPerBlast() {
- return readsPerBlast;
- }
-
- public ThreadPoolExecutor getThreadExecutor() {
- return executor;
- }
-
- public boolean keepRunning() {
- return true;
- }
-
- public boolean isExtractingReads() {
- return extractingReads;
- }
-
- public boolean isAligningRead() {
- return aligningReads;
- }
-
- public boolean isParsingRead() {
- return parsingReads;
- }
-
- public boolean isBlastingRead() {
- return blastingReads;
- }
-
- public int getFileWatcherTimeout() {
- return fileWatcherTimeout;
- }
-
- private void checkAndMakeDirectory(String dir) {
- File f = new File(dir);
- if (f.exists()) {
- if (!f.isDirectory()) {
- System.out.println("Error: " + dir + " is a file, not a directory!");
- System.exit(1);
- }
- } else {
- System.out.println("Making directory " + dir);
- f.mkdir();
- }
- }
-
- private void checkAndMakeDirectoryWithChildren(String dirname) {
- checkAndMakeDirectory(dirname);
- for (int t=0; t<3; t++) {
- if (this.isProcessingReadType(t)) {
- checkAndMakeDirectory(dirname + File.separator + NanoOKOptions.getTypeFromInt(t));
- }
- }
- }
-
- // Directory structure
- // fast5
- // - pass
- // - BC01
- // - BC02
- // - fail
- // - unaligned
- // fasta
- // - pass
- // - BC01
- // - 2D
- // - Template
- // - Complement
- // - BC02
- // ...
- // - fail
- public void makeDirectories() {
- checkAndMakeDirectory(this.getLogsDir());
-
- if (this.isExtractingReads()) {
- checkAndMakeDirectory(this.getReadDir());
-
- if (this.isBlastingRead()) {
- checkAndMakeDirectory(this.getReadDir() + "_chunks");
- }
- //if (this.isNewStyleDir()) {
- // for (int i=READTYPE_PASS; i<=READTYPE_FAIL; i++) {
- // String pf = NanoOKOptions.getPassFailFromInt(i);
- // checkAndMakeDirectoryWithChildren(this.getReadDir() + File.separator + pf);
- // if (this.processSubdirs()) {
- // File inputDir = new File(this.getFast5Dir());
- // File[] listOfFiles = inputDir.listFiles();
- // for (File file : listOfFiles) {
- // if (file.isDirectory()) {
- // checkAndMakeDirectoryWithChildren(this.getReadDir() + File.separator + file.getName());
- // }
- // }
- // }
- // }
- //}
- }
-
- if (this.isAligningRead()) {
- checkAndMakeDirectory(this.getAlignerDir());
- checkAndMakeDirectory(this.getLogsDir() + File.separator + this.getAligner());
- //if (this.isNewStyleReadDir()) {
- // for (int i=READTYPE_PASS; i<=READTYPE_FAIL; i++) {
- // String pf = NanoOKOptions.getPassFailFromInt(i);
- // checkAndMakeDirectoryWithChildren(this.getAlignerDir() + File.separator + pf);
- // checkAndMakeDirectoryWithChildren(this.getLogsDir() + File.separator + this.getAligner() + File.separator + pf);
- // if (this.processSubdirs()) {
- // File inputDir = new File(this.getReadDir());
- // File[] listOfFiles = inputDir.listFiles();
- // for (File file : listOfFiles) {
- // if (file.isDirectory()) {
- // checkAndMakeDirectoryWithChildren(this.getAlignerDir() + File.separator + file.getName());
- // checkAndMakeDirectoryWithChildren(this.getLogsDir() + File.separator + this.getAligner() + File.separator + file.getName());
- // }
- // }
- // }
- // }
- //}
- }
-
- if (this.isParsingRead()) {
- checkAndMakeDirectory(this.getParserDir());
- //if (this.isNewStyleReadDir()) {
- // for (int i=READTYPE_PASS; i<=READTYPE_FAIL; i++) {
- // String pf = NanoOKOptions.getPassFailFromInt(i);
- // checkAndMakeDirectoryWithChildren(this.getParserDir() + File.separator + pf);
- // if (this.processSubdirs()) {
- // File inputDir = new File(this.getReadDir());
- // File[] listOfFiles = inputDir.listFiles();
- // for (File file : listOfFiles) {
- // if (file.isDirectory()) {
- // checkAndMakeDirectoryWithChildren(this.getParserDir() + File.separator + file.getName());
- // }
- // }
- // }
- // }
- //}
- }
-
- if (this.isBlastingRead()) {
- for (int i=0; i<blastProcesses.size(); i++) {
- String[] params = blastProcesses.get(i).split(",");
- if (params.length == 5) {
- String blastName = params[0];
- String blastTool = params[1];
- String blastDb = params[2];
- String memory = params[3];
- String queue = params[4];
- checkAndMakeDirectory(getSampleDirectory() + File.separator + blastTool + "_" + blastName + File.separator);
- checkAndMakeDirectory(getLogsDir() + File.separator + blastTool + "_" + blastName + File.separator);
- }
- }
- }
- }
-
- void readProcessFile() {
- BufferedReader br;
-
- System.out.println("\nReading process file "+processFile);
- try {
- br = new BufferedReader(new FileReader(processFile));
- String line;
-
- do {
- line = br.readLine();
- if (line != null) {
- if (line.length() > 1) {
- String[] tokens = line.split(":");
- if (tokens[0].compareToIgnoreCase("Extract") == 0) {
- extractingReads = true;
- System.out.println(" Extract "+tokens[1]);
- } else if (tokens[0].compareToIgnoreCase("Fast5Dir") == 0) {
- readsDir = tokens[1];
- System.out.println(" Fast5Dir "+tokens[1]);
- } else if (tokens[0].compareToIgnoreCase("Aligner") == 0) {
- aligningReads = true;
- System.out.println(" Aligner "+tokens[1]);
- } else if (tokens[0].compareToIgnoreCase("Reference") == 0) {
- referenceFile = tokens[1];
- System.out.println(" Reference "+tokens[1]);
- } else if (tokens[0].compareToIgnoreCase("Sample") == 0) {
- sampleDirectory = tokens[1];
- System.out.println(" Sample "+tokens[1]);
- } else if (tokens[0].compareToIgnoreCase("Analysis") == 0) {
- parsingReads = true;
- System.out.println(" Analysis "+tokens[1]);
- } else if (tokens[0].compareToIgnoreCase("Aligner") == 0) {
- aligner = tokens[1];
- System.out.println(" Aligner "+tokens[1]);
- } else if (tokens[0].compareToIgnoreCase("Blast") == 0) {
- blastingReads = true;
- blastProcesses.add(tokens[1]);
- System.out.println(" Blast "+tokens[1]);
- } else if (tokens[0].compareToIgnoreCase("ReadsPerBlast") == 0) {
- readsPerBlast = Integer.parseInt(tokens[1]);
- System.out.println(" ReadsPerBlast "+readsPerBlast);
- } else if (!tokens[0].startsWith("#")) {
- System.out.println("Unknown token "+tokens[0]);
- }
- }
- }
- } while (line != null);
- } catch (Exception e) {
- System.out.println("readProcessFile Exception:");
- e.printStackTrace();
- System.exit(1);
- }
-
- System.out.println("");
- }
-
- public void initialiseBlastHandlers() {
- for (int t=0; t<3; t++) {
- for (int pf=NanoOKOptions.READTYPE_PASS; pf<=NanoOKOptions.READTYPE_FAIL; pf++) {
- blastHandlers[t][pf-1] = new BlastHandler(this, t, pf);
- }
- }
- }
-
- public BlastHandler getBlastHandler(int t, int pf) {
- return blastHandlers[t][pf-1];
- }
-
- public ArrayList<String> getBlastProcesses() {
- return blastProcesses;
- }
-
- public int getFileCounterOffset() {
- return fileCounterOffset;
- }
-
- public boolean mergeFastaFiles() {
- return mergeFastaFiles;
- }
-
- public ReadFileMerger getReadFileMerger() {
- return readFileMerger;
- }
-
- public boolean usingBatchDirs() {
- return sampleChecker.usingBatchDirs();
- }
-
- public boolean doForce() {
- return force;
- }
-
- public SampleChecker getSampleChecker() {
- return sampleChecker;
- }
-
- public boolean usingPassFailDirs() {
- return sampleChecker.usingPassFailDirs();
- }
-
- public double getMinQ() {
- return minQForPass;
- }
-
- public void initialiseReadMerger() {
- readFileMerger = new ReadFileMerger(this);
- }
-
- public boolean debugMode() {
- return false;
- }
-}
diff --git a/src/nanook/OverallStats.java b/src/nanook/OverallStats.java
deleted file mode 100644
index db5e183..0000000
--- a/src/nanook/OverallStats.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.Serializable;
-
-/**
- * Represents overall (as opposed to per reference) stats for Template, Complement and 2D reads.
- *
- * @author Richard Leggett
- */
-public class OverallStats implements Serializable {
- private static final long serialVersionUID = NanoOK.SERIAL_VERSION;
- private ReadSetStats[] readStats = new ReadSetStats[3];
-
- /**
- * Constructor.
- * @param o NanoOKOptions structure
- */
- public OverallStats(NanoOKOptions o) {
- for (int t=0; t<3; t++) {
- readStats[t] = new ReadSetStats(o, t);
- }
- }
-
- /**
- * Get a set of stats (for either Template, Complement or 2D reads)
- * @param t integer type - see defs in NanoOKOptions
- * @return ReadSetStats object
- */
- public ReadSetStats getStatsByType(int t) {
- return readStats[t];
- }
-}
diff --git a/src/nanook/ParserRunnable.java b/src/nanook/ParserRunnable.java
deleted file mode 100644
index 7bc2826..0000000
--- a/src/nanook/ParserRunnable.java
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.File;
-import java.util.List;
-import java.util.Random;
-
-/**
- * Enables multi-threading of parsing
- *
- * @author Richard Leggett
- */
-public class ParserRunnable implements Runnable
-{
- private NanoOKOptions options;
- private ReadSetStats stats;
- private String readPath;
- private String alignmentPath;
- private AlignmentsTableFile nonAlignedSummary;
- private ReferenceSequence readReference = null;
- private SequenceReader sr;
- private int type;
- private int passfail;
-
- public ParserRunnable(NanoOKOptions o, ReadSetStats s, String rp, String ap, int t, int pf, AlignmentsTableFile nas) {
- options = o;
- readPath = rp;
- alignmentPath = ap;
- stats = s;
- type = t;
- passfail = pf;
- nonAlignedSummary = nas;
- }
-
- /**
- * Pick top alignment from sorted list. List is sorted in order of score, but if there are
- * matching scores, we pick one at random.
- * @param al list of alignments
- * @return index
- */
- private int pickTopAlignment(List<Alignment> al) {
- int index = 0;
- int topScore = al.get(0).getScore();
- int countSame = 0;
-
- if (!options.fixRandom()) {
- //for (int i=0; i<al.size(); i++) {
- // System.out.println(i+" = "+al.get(i).getScore());
- //}
-
- // Find out how many have the same score
- while ((countSame < al.size()) && (al.get(countSame).getScore() == topScore)) {
- countSame++;
- }
-
- if (countSame > 1) {
- Random rn = new Random();
- index = rn.nextInt(countSame);
- }
-
- //System.out.println("Index chosen ("+countSame+") "+index);
- }
-
- return index;
- }
-
- /**
- * Parse alignment
- */
- private void parseAlignment()
- {
- try {
- File file = new File(alignmentPath);
- AlignmentFileParser parser = options.getParser();
-
- options.getLog().println("");
- options.getLog().println("> New file " + file.getName());
- options.getLog().println("");
-
- int nAlignments = parser.parseFile(alignmentPath, nonAlignedSummary, stats);
-
- if (nAlignments > 0) {
- parser.sortAlignments();
- List<Alignment> al = parser.getHighestScoringSet();
- int topAlignment = pickTopAlignment(al);
- String readReferenceName = al.get(topAlignment).getHitName();
-
- options.getLog().println("Query size = " + al.get(topAlignment).getQuerySequenceSize());
- options.getLog().println(" Hit size = " + al.get(topAlignment).getHitSequenceSize());
-
- readReference = options.getReferences().getReferenceById(readReferenceName);
- AlignmentMerger merger = new AlignmentMerger(options, readReference, al.get(topAlignment).getQuerySequenceSize(), stats, stats.getType());
- for (int i=topAlignment; i<al.size(); i++) {
- Alignment a = al.get(i);
- merger.addAlignment(a);
- }
- AlignmentInfo ais = merger.endMergeAndStoreStats();
- readReference.getStatsByType(stats.getType()).addCoverage(merger.getOverallHitStart(), merger.getOverallHitEnd()-merger.getOverallHitStart()+1);
- readReference.getStatsByType(stats.getType()).getAlignmentsTableFile().writeMergedAlignment(stats, file.getName(), merger, ais);
- }
- } catch (Exception e) {
- System.out.println("Error parsing alignment "+ alignmentPath);
- options.setReturnValue(1);
- options.getLog().println("Error parsing alignment " + alignmentPath);
- e.printStackTrace();
- }
- }
-
- /**
- * Parse a FASTA or FASTQ file, noting length of reads etc.
- */
- private void readQueryFile() {
- int nReadsInFile;
-
- sr = new SequenceReader(true);
-
- if (options.getReadFormat() == NanoOKOptions.FASTQ) {
- nReadsInFile = sr.indexFASTQFile(readPath);
- } else {
- nReadsInFile = sr.indexFASTAFile(readPath, null, true);
- }
-
- if (nReadsInFile > 1) {
- System.out.println("Warning: File "+readPath+" has more than 1 read. NanoOK can't currently handle this.");
- }
-
- for (int i=0; i<sr.getSequenceCount(); i++) {
- String id = sr.getID(i);
-
- if (id.startsWith("00000000-0000-0000-0000-000000000000")) {
- System.out.println("Error:");
- System.out.println(readPath);
- System.out.println("The reads in this file do not have unique IDs because they were generated when MinKNOW was producing UUIDs, but Metrichor was not using them. To fix, run nanook_extract_reads with the -fixids option.");
- System.exit(1);
- }
-
- if (id.length() > 100) {
- System.out.println("Problem id " + id);
- System.out.println("Read path: " + readPath);
- System.exit(1);
- }
- stats.addLength(readPath, id, sr.getLength(i), sr.getGC(i));
- }
- }
-
- /**
- * Entry point to thread
- */
- public void run() {
- readQueryFile();
- stats.addReadFile(passfail);
- parseAlignment();
- if ((readReference != null) && (options.doKmerCounting())) {
- sr.storeKmers(0, readReference.getStatsByType(type).getReadKmerTable());
- }
- }
-}
diff --git a/src/nanook/ProcessLogger.java b/src/nanook/ProcessLogger.java
deleted file mode 100644
index a2b65b0..0000000
--- a/src/nanook/ProcessLogger.java
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.InputStreamReader;
-import java.io.PrintWriter;
-import java.lang.ProcessBuilder.Redirect;
-import java.util.ArrayList;
-
-/**
- * Execute a system process and log result to a file
- *
- * @author Richard Leggett
- */
-public class ProcessLogger {
- private boolean writeStdio = true;
- private boolean writeStderr = true;
- private boolean writeHeadings = true;
-
- public ArrayList getCommandOutput(String[] command, boolean stdin, boolean stderr) {
- ArrayList outputLines = new ArrayList();
-
- try {
- Process p = Runtime.getRuntime().exec(command);
- // ?? p.waitFor();
-
- if (stdin) {
- BufferedReader stdInput = new BufferedReader(new InputStreamReader(p.getInputStream()));
- String s = null;
- while ((s = stdInput.readLine()) != null) {
- outputLines.add(s);
- }
- }
-
- if (stderr) {
- BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream()));
- String s = null;
- while ((s = stdError.readLine()) != null) {
- outputLines.add(s);
- }
- }
- } catch (Exception e) {
- System.out.println("\nProcessLogger exception:");
- e.printStackTrace();
- System.exit(1);
- }
-
- return outputLines;
- }
-
- public void runCommandToLog(String[] command, NanoOKLog log) {
- ArrayList<String> response = getCommandOutput(command, true, true);
- for (int i=0; i<response.size(); i++) {
- log.println(response.get(i));
- }
- }
-
- public void runCommand(String[] command) {
- ArrayList<String> response = getCommandOutput(command, true, true);
- for (int i=0; i<response.size(); i++) {
- System.out.println(response.get(i));
- }
- }
-
- public ArrayList getCommandOutput(String command, boolean stdin, boolean stderr) {
- ArrayList outputLines = new ArrayList();
-
- try {
- Process p = Runtime.getRuntime().exec(command);
- // ?? p.waitFor();
-
- if (stdin) {
- BufferedReader stdInput = new BufferedReader(new InputStreamReader(p.getInputStream()));
- String s = null;
- while ((s = stdInput.readLine()) != null) {
- outputLines.add(s);
- }
- }
-
- if (stderr) {
- BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream()));
- String s = null;
- while ((s = stdError.readLine()) != null) {
- outputLines.add(s);
- }
- }
- } catch (Exception e) {
- System.out.println("ProcessLogger exception:");
- e.printStackTrace();
- System.exit(1);
- }
-
- return outputLines;
- }
-
- public void runCommand(String command) {
- ArrayList<String> response = getCommandOutput(command, true, true);
- for (int i=0; i<response.size(); i++) {
- System.out.println(response.get(i));
- }
- }
-
- public ArrayList checkCommand(String command) {
- ArrayList outputLines;
- boolean isOk = true;
-
- try {
- Process p = Runtime.getRuntime().exec(command);
- BufferedReader stdInput = new BufferedReader(new InputStreamReader(p.getInputStream()));
- BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream()));
- String s = null;
-
- outputLines = new ArrayList();
- while ((s = stdInput.readLine()) != null) {
- outputLines.add(s);
- }
- while ((s = stdError.readLine()) != null) {
- outputLines.add(s);
- }
- } catch (Exception e) {
- outputLines = null;
- }
-
- return outputLines;
- }
-
- private synchronized void writeLog(Process p, String command, String logFilename, boolean fAppend) {
- try {
- PrintWriter pw = new PrintWriter(new FileWriter(logFilename, fAppend));
- BufferedReader stdInput = new BufferedReader(new InputStreamReader(p.getInputStream()));
- BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream()));
-
- if (fAppend && writeHeadings) {
- pw.println("");
- pw.println("---");
- pw.println("");
- }
-
- if (writeHeadings) {
- pw.println("Running "+command);
- }
-
- // read the output from the command
- if (writeHeadings) {
- pw.println("");
- pw.println("Stdout:");
- }
-
- if (writeStdio) {
- String s = null;
- while ((s = stdInput.readLine()) != null) {
- pw.println(s);
- }
- }
-
- // read any errors from the attempted command
- if (writeHeadings) {
- pw.println("");
- pw.println("Stderr:");
- }
-
- if (writeStderr) {
- String s = null;
- while ((s = stdError.readLine()) != null) {
- pw.println(s);
- }
- }
-
- pw.close();
- } catch (Exception e) {
- System.out.println("ProcessLogger exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- public void runAndLogCommand(String command, String logFilename, boolean fAppend) {
- try {
- Process p = Runtime.getRuntime().exec(command);
- writeLog(p, command, logFilename, fAppend);
- p.waitFor();
- } catch (Exception e) {
- System.out.println("ProcessLogger exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- public void setWriteFormat(boolean headings, boolean io, boolean err) {
- writeHeadings = headings;
- writeStdio = io;
- writeStderr = err;
- }
-}
diff --git a/src/nanook/RGraphPlotter.java b/src/nanook/RGraphPlotter.java
deleted file mode 100644
index bacaa11..0000000
--- a/src/nanook/RGraphPlotter.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.*;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Set;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-
-/**
- * Executes command to plot graphs with R.
- *
- * @author Richard Leggett
- */
-public class RGraphPlotter {
- private ThreadPoolExecutor executor;
- private NanoOKOptions options;
- private long lastCompleted = -1;
- private String logDirectory;
-
- /**
- * Constructor.
- * @param o NanoOKOptions object
- */
- public RGraphPlotter(NanoOKOptions o) {
- options = o;
- executor = new ThreadPoolExecutor(options.getNumberOfThreads(), options.getNumberOfThreads(), 10, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>());
- logDirectory = options.getLogsDir() + File.separator + "R" + options.getAnalysisSuffix();
- File f = new File(logDirectory);
- if (!f.exists()) {
- f.mkdir();
- options.getLog().println("Made directory " + logDirectory);
- }
- }
-
- /**
- * Write progress
- */
- private void writeProgress() {
- long completed = executor.getCompletedTaskCount();
- long total = executor.getTaskCount();
- long e = 0;
- long s = NanoOKOptions.PROGRESS_WIDTH;
-
- if (total > 0) {
- e = NanoOKOptions.PROGRESS_WIDTH * completed / total;
- s = NanoOKOptions.PROGRESS_WIDTH - e;
- }
-
-
- if (completed != lastCompleted) {
- System.out.print("\r[");
- for (int i=0; i<e; i++) {
- System.out.print("=");
- }
- for (int i=0; i<s; i++) {
- System.out.print(" ");
- }
- System.out.print("] " + completed +"/" + total);
- lastCompleted = completed;
- }
- }
-
- public void runScript(boolean fComparison, String scriptName, String logPrefix, String refName) {
- ArrayList<String> args = new ArrayList<String>();
- String logFilename = logDirectory + File.separator + logPrefix;
-
- args.add("Rscript");
- args.add(options.getScriptsDir() + File.separator + scriptName);
-
- if (fComparison) {
- File f = new File(options.getAnalysisDir());
- args.add(f.getName());
- } else {
- args.add(options.getAnalysisDir());
- }
-
- args.add(options.getGraphsDir());
-
- if (fComparison) {
- args.add(options.getSampleList());
- args.add(options.getComparisonDir());
- }
-
- if (refName != null) {
- args.add(refName);
- logFilename = logFilename + "_"+refName;
- }
-
- args.add(options.getImageFormat());
-
- //System.out.println(args);
-
- options.getLog().println("Running Rscript "+scriptName);
- options.getLog().println("Log file is "+logFilename);
- executor.execute(new RGraphRunnable("Rscript", args, logFilename + ".txt"));
- writeProgress();
- }
-
- /**
- * Execute plot commands.
- * @param references References object containing all references
- */
- public void plot(boolean fComparison) throws InterruptedException {
- String s = null;
-
- if (fComparison) {
- runScript(fComparison, "nanook_plot_comparison.R", "plot_lengths", null);
- } else {
- runScript(fComparison, "nanook_plot_lengths.R", "plot_lengths", null);
- }
-
- Set<String> ids = options.getReferences().getAllIds();
- for (String id : ids) {
- ReferenceSequence rs = options.getReferences().getReferenceById(id);
- String name = rs.getName();
- if (fComparison) {
- runScript(fComparison, "nanook_plot_comparison_reference.R", "plot_reference", name);
- } else {
- if (rs.getTotalNumberOfAlignments() > NanoOKOptions.MIN_ALIGNMENTS) {
- runScript(fComparison, "nanook_plot_reference.R", "plot_reference", name);
- }
- }
- writeProgress();
- }
-
- // That's all - wait for all threads to finish
- executor.shutdown();
- while (!executor.isTerminated()) {
- writeProgress();
- Thread.sleep(100);
- }
-
- writeProgress();
- System.out.println("");
- }
-}
diff --git a/src/nanook/RGraphRunnable.java b/src/nanook/RGraphRunnable.java
deleted file mode 100644
index c16dd3a..0000000
--- a/src/nanook/RGraphRunnable.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.util.List;
-
-/**
- * Enable multi-threading of R plotting
- *
- * @author Richard Leggett
- */
-public class RGraphRunnable implements Runnable {
- private String command;
- private String logFilename;
- private List<String> args;
-
- public RGraphRunnable(String cmd, List<String> a, String log) {
- command = cmd;
- args = a;
- logFilename = log;
- }
-
- public void checkLogForErrors(String filename) {
- try
- {
- BufferedReader br = new BufferedReader(new FileReader(filename));
- String line = null;
-
- do {
- line = br.readLine();
- if (line != null) {
- if (line.contains("there is no package called")) {
- System.out.println("");
- System.out.println("R error - have you installed all the package dependencies? See documentation for help.");
- System.out.println(line);
- }
- }
- } while (line != null);
- br.close();
- } catch (Exception e) {
- System.out.println("Exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- public void run() {
- try {
- ProcessBuilder pb = new ProcessBuilder(args);
- pb.redirectErrorStream(true);
- pb.redirectOutput(ProcessBuilder.Redirect.to(new File(logFilename))); //appendTo
- Process p = pb.start();
- p.waitFor();
- checkLogForErrors(logFilename);
- } catch (Exception e) {
- System.out.println("RGraphRunnable exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-}
diff --git a/src/nanook/ReadAligner.java b/src/nanook/ReadAligner.java
deleted file mode 100644
index db06c74..0000000
--- a/src/nanook/ReadAligner.java
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett (richard.leggett at earlham.ac.uk)
- *
- * Copyright 2015-17 Earlham Institute
- */
-
-package nanook;
-
-import java.io.File;
-import java.io.FileWriter;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.concurrent.*;
-
-/**
- * Align reads
- *
- * @author Richard Leggett
- */
-public class ReadAligner {
- private NanoOKOptions options;
- private AlignmentFileParser parser;
- private ThreadPoolExecutor executor;
- private long lastCompleted = -1;
-
- /**
- * Constructor
- * @param o program options
- */
- public ReadAligner(NanoOKOptions o, AlignmentFileParser afp) {
- options = o;
- parser = afp;
-
- executor = new ThreadPoolExecutor(options.getNumberOfThreads(), options.getNumberOfThreads(), 10, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>());
- }
-
- /**
- * Write progress
- */
- private void writeProgress() {
- long completed = executor.getCompletedTaskCount();
- long total = executor.getTaskCount();
- long e = 0;
- long s = NanoOKOptions.PROGRESS_WIDTH;
-
- if (total > 0) {
- e = NanoOKOptions.PROGRESS_WIDTH * completed / total;
- s = NanoOKOptions.PROGRESS_WIDTH - e;
- }
-
- if (completed != lastCompleted) {
- System.out.print("\rAlignment [");
- for (int i=0; i<e; i++) {
- System.out.print("=");
- }
- for (int i=0; i<s; i++) {
- System.out.print(" ");
- }
- System.out.print("] " + completed +"/" + total);
- lastCompleted = completed;
- }
- }
-
- private void checkAndMakeDir(String dir) {
- File f = new File(dir);
- if (f.exists()) {
- if (!f.isDirectory()) {
- System.out.println("Error: " + dir + " is a file, not a directory!");
- System.exit(1);
- }
- } else {
- //System.out.println("Making directory " + dir);
- f.mkdir();
- }
- }
-
- /**
- * Create directories for output
- */
- public void createDirectories() {
- checkAndMakeDir(options.getAlignerDir());
- checkAndMakeDir(options.getLogsDir());
- checkAndMakeDir(options.getLogsDir() + File.separator + options.getAligner());
- }
-
- public boolean isValidReadFile(String filename) {
- boolean isValid = false;
-
- //System.out.println(filename);
-
- if (parser.getReadFormat() == NanoOKOptions.FASTA) {
- if (filename.endsWith(".fa") || filename.endsWith(".fasta")) {
- isValid = true;
- }
- } else if (parser.getReadFormat() == NanoOKOptions.FASTQ) {
- if (filename.endsWith(".fq") || filename.endsWith(".fastq")) {
- isValid = true;
- }
- }
-
- return isValid;
- }
-
- private void checkReferenceSizesFile(String referenceFile) {
- String sizesFilename = referenceFile + ".sizes";
- File sizesFile = new File(sizesFilename);
- if (!sizesFile.exists()) {
- System.out.println("");
- System.out.println("Generating .sizes file for reference. You may want to edit the display names.");
- SequenceReader sr = new SequenceReader(false);
- sr.indexFASTAFile(referenceFile, sizesFilename , false);
- }
- }
-
- private void processDirectory(String readsDir, String alignDir, String logDirName, boolean allowSubdir, boolean processThisDir) {
- String reference = options.getReferenceFile();
-
- checkReferenceSizesFile(reference);
- checkAndMakeDir(alignDir);
- checkAndMakeDir(logDirName);
-
- if (allowSubdir) {
- File inputDir = new File(readsDir);
- File[] listOfFiles = inputDir.listFiles();
- for (File file : listOfFiles) {
- if (file.isDirectory() &&
- (!file.getName().equals("2D")) &&
- (!file.getName().equals("Template")) &&
- (!file.getName().equals("Complement"))) {
- processDirectory(readsDir + File.separator + file.getName(),
- alignDir + File.separator + file.getName(),
- logDirName + File.separator + file.getName(),
- false,
- true);
- }
- }
- }
-
- if (processThisDir) {
- for (int t=0; t<3; t++) {
- if (options.isProcessingReadType(t)) {
- String inputDirName = readsDir + File.separator + NanoOKOptions.getTypeFromInt(t);
- String outputDirName = alignDir + File.separator + NanoOKOptions.getTypeFromInt(t);
-
- checkAndMakeDir(outputDirName);
-
- File inputDir = new File(inputDirName);
- File[] listOfFiles = inputDir.listFiles();
-
- if (listOfFiles == null) {
- System.out.println("");
- System.out.println("Directory "+inputDirName+" doesn't exist. Have you extracted reads as "+options.getExpectedReadFormat()+ " (some aligners require FASTA, some FASTQ)?");
- } else if (listOfFiles.length <= 0) {
- System.out.println("");
- System.out.println("Directory "+inputDirName+" empty. Have you extracted reads as "+options.getExpectedReadFormat()+ " (some aligners require FASTA, some FASTQ)?");
- } else {
- int readCount = 0;
- for (File file : listOfFiles) {
- if (file.isFile()) {
- if (isValidReadFile(file.getName())) {
- String inPath = inputDirName + File.separator + file.getName();
- String outPath = outputDirName + File.separator + file.getName() + parser.getAlignmentFileExtension();
- String logFile = logDirName + File.separator + file.getName() + ".log";
- String command = parser.getRunCommand(inPath, outPath, reference);
- if (options.showAlignerCommand()) {
- System.out.println("Running: " + command);
- }
- executor.execute(new SystemCommandRunnable(options, null, command, parser.outputsToStdout() ? outPath:null, logFile));
- writeProgress();
- readCount++;
- }
- }
- }
-
- if (readCount == 0) {
- System.out.print("Error: unable to find any ");
- if (parser.getReadFormat() == NanoOKOptions.FASTA) {
- System.out.print("FASTA");
- } else if (parser.getReadFormat() == NanoOKOptions.FASTQ) {
- System.out.print("FASTQ");
- }
- System.out.println(" files to align");
- System.out.println("");
- System.exit(1);
- }
- }
- }
- }
- }
- }
-
- public void align() throws InterruptedException {
- if (options.usingPassFailDirs()) {
- if (options.isProcessingPassReads()) {
- processDirectory(options.getReadDir() + File.separator + "pass",
- options.getAlignerDir() + File.separator + "pass",
- options.getLogsDir() + File.separator + options.getAligner() + File.separator + "pass",
- options.isBarcoded(),
- options.isBarcoded() ? false:true);
- }
-
- if (options.isProcessingFailReads()) {
- processDirectory(options.getReadDir() + File.separator + "fail",
- options.getAlignerDir() + File.separator + "fail",
- options.getLogsDir() + File.separator + options.getAligner() + File.separator + "fail",
- options.isBarcoded(),
- true);
- }
- } else {
- processDirectory(options.getReadDir(), options.getAlignerDir(), options.getLogsDir() + File.separator + options.getAligner(), false, true);
- }
-
- // That's all - wait for all threads to finish
- executor.shutdown();
- while (!executor.isTerminated()) {
- writeProgress();
- Thread.sleep(100);
- }
-
- writeProgress();
- System.out.println("");
- System.out.println("");
- System.out.println("DONE");
- }
-}
diff --git a/src/nanook/ReadExtractor.java b/src/nanook/ReadExtractor.java
deleted file mode 100644
index 30dd1f5..0000000
--- a/src/nanook/ReadExtractor.java
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett (richard.leggett at earlham.ac.uk)
- *
- * Copyright 2015-17 Earlham Institute
- */
-
-package nanook;
-
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.concurrent.*;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * Read extractor
- *
- * @author Richard Leggett
- */
-public class ReadExtractor {
- private NanoOKOptions options;
- private ThreadPoolExecutor executor;
- private long lastCompleted = -1;
-
- /**
- * Constructor
- * @param o program options
- */
- public ReadExtractor(NanoOKOptions o) {
- options = o;
-
- System.out.println("ERROR: ReadExtractor class deprecated.");
- System.exit(1);
-
- //executor = Executors.newFixedThreadPool(options.getNumberOfThreads());
- executor = new ThreadPoolExecutor(options.getNumberOfThreads(), options.getNumberOfThreads(), 10, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>());
- }
-
- /**
- * Write progress of extraction
- */
- private void writeProgress() {
- long completed = executor.getCompletedTaskCount();
- long total = executor.getTaskCount();
- long e = 0;
- long s = NanoOKOptions.PROGRESS_WIDTH;
-
- if (total > 0) {
- e = NanoOKOptions.PROGRESS_WIDTH * completed / total;
- s = NanoOKOptions.PROGRESS_WIDTH - e;
- }
-
- if (completed != lastCompleted) {
- System.out.print("\rExtraction [");
- for (int i=0; i<e; i++) {
- System.out.print("=");
- }
- for (int i=0; i<s; i++) {
- System.out.print(" ");
- }
- System.out.print("] " + completed +"/" + total);
- lastCompleted = completed;
- }
- }
-
- /**
- * Create directories for output
- */
- public void createDirectories() {
- File f = new File(options.getReadDir());
- if (f.exists()) {
- if (!f.isDirectory()) {
- System.out.println("Error: "+options.getReadDir()+" is a file, not a directory!");
- System.exit(1);
- }
- } else {
- //System.out.println("Making directory "+options.getReadDir());
- f.mkdir();
- }
-
- }
-
- /**
- * Process a directory and extract reads
- * @param inputDirName input directory name
- * @param outputDirName output directory name
- */
- private void processDirectory(String inputDirName, String outputDirName, boolean allowSubdir, boolean processThisDir) {
- File f = new File(outputDirName);
-
- options.getLog().println("Processing directory");
- options.getLog().println("Input dir name: "+inputDirName);
- options.getLog().println("Output dir name: "+outputDirName);
- options.getLog().println("allowSubdir: "+allowSubdir);
- options.getLog().println("processThisDir: "+processThisDir);
-
- // Make directory
- if (! f.exists()) {
- f.mkdir();
- }
-
- if (processThisDir) {
- // Make output Template, Complement and 2D directories
- for (int t=0; t<3; t++) {
- if (options.isProcessingReadType(t)) {
- f = new File(outputDirName + File.separator + NanoOKOptions.getTypeFromInt(t));
- if (! f.exists()) {
- f.mkdir();
- }
- }
- }
- }
-
- File inputDir = new File(inputDirName);
- File[] listOfFiles = inputDir.listFiles();
-
- if (listOfFiles == null) {
- System.out.println("");
- System.out.println("Directory "+inputDirName+" doesn't exist");
- } else if (listOfFiles.length <= 0) {
- System.out.println("");
- System.out.println("Directory "+inputDirName+" empty");
- } else {
- for (File file : listOfFiles) {
- if (file.isFile() && processThisDir) {
- if (file.getName().endsWith(".fast5")) {
- options.getLog().println("Got file "+file.getName());
- executor.execute(new ReadExtractorRunnable(options, inputDirName, file.getName(), outputDirName));
- writeProgress();
- }
- } else if (file.isDirectory() && allowSubdir) {
- processDirectory(inputDirName + File.separator + file.getName(),
- outputDirName + File.separator + file.getName(),
- false,
- true);
- }
- }
- }
- }
-
- /**
- * Extract reads
- */
- public void extract() throws InterruptedException {
- if (options.usingPassFailDirs()) {
- if (options.isProcessingPassReads()) {
- processDirectory(options.getFast5Dir() + File.separator + "pass",
- options.getReadDir() + File.separator + "pass",
- options.isBarcoded(),
- options.isBarcoded() ? false:true);
- }
-
- if (options.isProcessingFailReads()) {
- processDirectory(options.getFast5Dir() + File.separator + "fail",
- options.getReadDir() + File.separator + "fail",
- options.isBarcoded(),
- true);
- }
- } else {
- processDirectory(options.getFast5Dir(), options.getReadDir(), false, true);
- }
-
- // That's all - wait for all threads to finish
- executor.shutdown();
- while (!executor.isTerminated()) {
- writeProgress();
- Thread.sleep(100);
- }
-
- writeProgress();
-
- System.out.println("");
-
- options.getReadFileMerger().closeFiles();
- if (options.mergeFastaFiles()) {
- System.out.println("");
- options.getReadFileMerger().writeMergedFiles();
- }
-
- System.out.println("");
- System.out.println("DONE");
- }
-}
-
-
diff --git a/src/nanook/ReadExtractorRunnable.java b/src/nanook/ReadExtractorRunnable.java
deleted file mode 100644
index 72fb7c7..0000000
--- a/src/nanook/ReadExtractorRunnable.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import java.util.EnumSet;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Enable multi-threading of read extraction
- *
- * @author Richard Leggett
- */
-public class ReadExtractorRunnable implements Runnable {
- public final static String TYPE_STRING_TEMPLATE = "/Analyses/Basecall_2D_000/BaseCalled_template/Fastq";
- public final static String TYPE_STRING_COMPLEMENT = "/Analyses/Basecall_2D_000/BaseCalled_complement/Fastq";
- public final static String TYPE_STRING_2D = "/Analyses/Basecall_2D_000/BaseCalled_2D/Fastq";
- private String[] typeStrings = {TYPE_STRING_TEMPLATE, TYPE_STRING_COMPLEMENT, TYPE_STRING_2D};
- public NanoOKOptions options;
- public String inDir;
- public String filename;
- public String outDir;
-
- public ReadExtractorRunnable(NanoOKOptions o, String in, String file, String out) {
- options = o;
- inDir = in;
- filename = file;
- outDir = out;
-
- System.out.println("Error: Entered deprecated ReadExtractorRunnable!");
- System.exit(1);
- }
-
- /**
- * Extract reads of each type from file
- * @param inDir input directory
- * @param filename filename
- * @param outDir output directory
- */
- public void run() {
- String inputPathname = inDir + File.separator + filename;
- Fast5File inputFile = new Fast5File(options, inputPathname);
- //String outName = new File(inputPathname).getName();
- String filePrefix = ReadProcessorRunnable.getFilePrefixFromPathname(inputPathname);
-
- for (int t=0; t<3; t++) {
- if (options.isProcessingReadType(t)) {
- FastAQFile ff = inputFile.getFastq(options.getBasecallIndex(), t);
- if (ff != null) {
- if (options.getReadFormat() == NanoOKOptions.FASTA) {
- //String fastaqPathname = outDir + File.separator + NanoOKOptions.getTypeFromInt(t) + File.separator + outName + "_BaseCalled_" + NanoOKOptions.getTypeFromInt(t) + ".fasta";
- String fastaqPathname = outDir + File.separator + NanoOKOptions.getTypeFromInt(t) + File.separator + filePrefix + "_BaseCalled_" + NanoOKOptions.getTypeFromInt(t) + ".fasta";
- ff.writeFasta(fastaqPathname, options.outputFast5Path() ? inputPathname:null);
- if (options.mergeFastaFiles()) {
- options.getReadFileMerger().addReadFile(fastaqPathname, t, 0, "", 0, 0);
- }
- } else if (options.getReadFormat() == NanoOKOptions.FASTQ) {
- //String fastaqPathname = outDir + File.separator + NanoOKOptions.getTypeFromInt(t) + File.separator + outName + "_BaseCalled_" + NanoOKOptions.getTypeFromInt(t) + ".fastq";
- String fastaqPathname = outDir + File.separator + NanoOKOptions.getTypeFromInt(t) + File.separator + filePrefix + "_BaseCalled_" + NanoOKOptions.getTypeFromInt(t) + ".fastq";
- ff.writeFastq(fastaqPathname);
- if (options.mergeFastaFiles()) {
- options.getReadFileMerger().addReadFile(fastaqPathname, t, 0, "", 0, 0);
- }
- }
- }
- }
- }
- }
-}
diff --git a/src/nanook/ReadFileMerger.java b/src/nanook/ReadFileMerger.java
deleted file mode 100644
index d998856..0000000
--- a/src/nanook/ReadFileMerger.java
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015-2017 The Earlham Institute (formerly The Genome Analysis Centre)
- */
-
-package nanook;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-
-public class ReadFileMerger {
- private NanoOKOptions options;
- private ArrayList<String>[][] readFiles = new ArrayList[2][3];
- private PrintWriter[][] summaryFiles = new PrintWriter[2][3];
-
- public ReadFileMerger(NanoOKOptions o) {
- options = o;
-
- try {
- for (int pf = 0; pf<2; pf++) {
- for (int type=0; type<3; type++) {
- readFiles[pf][type] = new ArrayList<String>();
-
- String pathname = options.getReadDir() + File.separator +
- options.getSample() + "_all_" +
- NanoOKOptions.getTypeFromInt(type) + "_" +
- NanoOKOptions.getPassFailFromInt(pf + 1) +
- ".stats";
-
- options.getLog().println("Opening stats file "+pathname);
- summaryFiles[pf][type] = new PrintWriter(new FileWriter(pathname));
- }
- }
- } catch (Exception e) {
- System.out.println("ReadFileMerger exception");
- e.printStackTrace();
- }
- }
-
- public synchronized void addReadFile(String pathname, int type, int pf, String readID, int readLength, double meanQ) {
- if (options.mergeFastaFiles()) {
- readFiles[pf-1][type].add(pathname);
- }
-
- summaryFiles[pf-1][type].println(pathname+"\t"+readID+"\t"+readLength+"\t"+meanQ);
- }
-
- public void writeMergedFiles() {
- for (int pf = 0; pf<2; pf++) {
- for (int type=0; type<3; type++) {
- if (readFiles[pf][type].size() > 0) {
- String outputPathname = options.getReadDir() + File.separator +
- options.getSample() + "_all_" +
- NanoOKOptions.getTypeFromInt(type) + "_" +
- NanoOKOptions.getPassFailFromInt(pf + 1) +
- (options.getReadFormat() == NanoOKOptions.FASTA ? ".fasta":".fastq");
-
- System.out.println("Writing " + outputPathname);
-
- try {
- PrintWriter pw = new PrintWriter(new FileWriter(outputPathname));
- for (int i=0; i<readFiles[pf][type].size(); i++) {
- BufferedReader br = new BufferedReader(new FileReader(readFiles[pf][type].get(i)));
- String line;
- while ((line = br.readLine()) != null) {
- pw.println(line);
- }
- br.close();
- }
- pw.close();
- } catch (IOException e) {
- System.out.println("writeMergedFiles exception");
- e.printStackTrace();
- }
- }
- }
- }
- }
-
- public void closeFiles() {
- options.getLog().println("Closing stats files");
- for (int pf = 0; pf<2; pf++) {
- for (int type=0; type<3; type++) {
- summaryFiles[pf][type].close();
- }
- }
- }
-}
diff --git a/src/nanook/ReadLengthsSummaryFile.java b/src/nanook/ReadLengthsSummaryFile.java
deleted file mode 100644
index 17e3486..0000000
--- a/src/nanook/ReadLengthsSummaryFile.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.*;
-
-/**
- * Represents a summary file containing basic information on read lengths, N50 etc. for the three different read types.
- *
- * @author Richard Leggett
- */
-public class ReadLengthsSummaryFile {
- private PrintWriter pw;
- private String filename;
-
- /**
- * Constructor.
- * @param f filename of output file
- */
- public ReadLengthsSummaryFile(String f) {
- filename = f;
- }
-
- /**
- * Open output file.
- * @param sample sample name
- */
- public void open(String sample) {
- try {
- pw = new PrintWriter(new FileWriter(filename));
- pw.println("Nanotools report - "+sample);
- pw.println("");
- pw.println("Length summary");
- pw.println("");
- pw.printf("%-10s %-8s %-10s %-10s %-8s %-8s %-8s %-8s %-8s %-8s", "Type", "NumReads", "TotalBases", "Mean", "Long", "Short", "N50", "N50Count", "N90", "N90Count");
- pw.println("");
- } catch (IOException e) {
- System.out.println("ReadLengthsSummaryFile exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Output read stats for a particular type (Template, Complement, 2D).
- * @param r ReadSetStats object for the type
- */
- public void addReadSetStats(ReadSetStats r) {
- pw.printf("%-10s %-8d %-10d %-10.2f %-8d %-8d %-8d %-8d %-8d %-8d", r.getTypeString(), r.getNumReads(), r.getTotalBases(), r.getMeanLength(), r.getLongest(), r.getShortest(), r.getN50(), r.getN50Count(), r.getN90(), r.getN90Count());
- pw.println("");
- }
-
- /**
- * Close output file.
- */
- public void close() {
- pw.close();
- }
-}
diff --git a/src/nanook/ReadParser.java b/src/nanook/ReadParser.java
deleted file mode 100644
index 80c4ce3..0000000
--- a/src/nanook/ReadParser.java
+++ /dev/null
@@ -1,123 +0,0 @@
-
-/*
- TO DO:
- - Use the AlignmentFileStats structure to store ALL alignment stats and write this to the separate alignment files.
- - This requires rewriting the current parsers and methods.
-*/
-package nanook;
-
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.List;
-
-/**
- *
- * @author leggettr
- */
-public class ReadParser {
- private NanoOKOptions options;
- private SequenceReader sr;
-
-
- public ReadParser(NanoOKOptions o) {
- options = o;
- }
-
- /**
- * Parse a FASTA or FASTQ file, noting length of reads etc.
- */
- private void readQueryFile(String readPath, PrintWriter pw) {
- int nReadsInFile;
-
- sr = new SequenceReader(true);
-
- if (options.getReadFormat() == NanoOKOptions.FASTQ) {
- nReadsInFile = sr.indexFASTQFile(readPath);
- } else {
- nReadsInFile = sr.indexFASTAFile(readPath, null, true);
- }
-
- if (nReadsInFile > 1) {
- System.out.println("Warning: File "+readPath+" has more than 1 read. NanoOK can't currently handle this.");
- }
-
- for (int i=0; i<sr.getSequenceCount(); i++) {
- String id = sr.getID(i);
-
- if (id.startsWith("00000000-0000-0000-0000-000000000000")) {
- System.out.println("Error:");
- System.out.println(readPath);
- System.out.println("The reads in this file do not have unique IDs because they were generated when MinKNOW was producing UUIDs, but Metrichor was not using them. To fix, run nanook_extract_reads with the -fixids option.");
- System.exit(1);
- }
-
- //stats.addLength(readPath, id, sr.getLength(i), sr.getGC(i));
- pw.printf("Read:%s\t%d\t%.2f\n", id, sr.getLength(i), sr.getGC(i));
- }
- }
-
- /**
- * Parse alignment
- */
- private void parseAlignment(String alignmentPath)
- {
-// try {
-// File file = new File(alignmentPath);
-// AlignmentFileParser parser = options.getParser();
-//
-// options.getLog().println("");
-// options.getLog().println("> New file " + file.getName());
-// options.getLog().println("");
-//
-// int nAlignments = parser.parseFile(alignmentPath, nonAlignedSummary, stats);
-//
-// if (nAlignments > 0) {
-// parser.sortAlignments();
-// List<Alignment> al = parser.getHighestScoringSet();
-// int topAlignment = pickTopAlignment(al);
-// String readReferenceName = al.get(topAlignment).getHitName();
-//
-// options.getLog().println("Query size = " + al.get(topAlignment).getQuerySequenceSize());
-// options.getLog().println(" Hit size = " + al.get(topAlignment).getHitSequenceSize());
-//
-// readReference = options.getReferences().getReferenceById(readReferenceName);
-// AlignmentMerger merger = new AlignmentMerger(options, readReference, al.get(topAlignment).getQuerySequenceSize(), stats, stats.getType());
-// for (int i=topAlignment; i<al.size(); i++) {
-// Alignment a = al.get(i);
-// merger.addAlignment(a);
-// }
-// AlignmentInfo ais = merger.endMergeAndStoreStats();
-// readReference.getStatsByType(stats.getType()).getAlignmentsTableFile().writeMergedAlignment(stats, file.getName(), merger, ais);
-// }
-// } catch (Exception e) {
-// System.out.println("Error parsing alignment "+ alignmentPath);
-// options.setReturnValue(1);
-// options.getLog().println("Error parsing alignment " + alignmentPath);
-// e.printStackTrace();
-// }
- }
-
- public void parse(String fastaqPathname, String alignmentPathname, String parserPathname) {
- try {
- PrintWriter pw = new PrintWriter(new FileWriter(parserPathname, false));
- pw.println("NanoOKVersion:"+NanoOK.VERSION_STRING);
- pw.println("FastAQPath:"+fastaqPathname);
- pw.println("AlignmentPath:"+alignmentPathname);
- pw.println("Aligner:"+options.getAligner());
-
- readQueryFile(fastaqPathname, pw);
- //stats.addReadFile(passfail);
- //parseAlignment();
- //if ((readReference != null) && (options.doKmerCounting())) {
- // sr.storeKmers(0, readReference.getStatsByType(type).getReadKmerTable());
- //}
-
- pw.close();
- } catch (IOException e) {
- System.out.println("parseAlignment exception");
- e.printStackTrace();
- }
- }
-}
diff --git a/src/nanook/ReadProcessor.java b/src/nanook/ReadProcessor.java
deleted file mode 100644
index d3b72f0..0000000
--- a/src/nanook/ReadProcessor.java
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett (richard.leggett at earlham.ac.uk)
- *
- * Copyright 2015-17 Earlham Institute
- */
-
-package nanook;
-
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.concurrent.*;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * Read extractor
- *
- * @author Richard Leggett
- */
-public class ReadProcessor {
- private NanoOKOptions options;
- private ThreadPoolExecutor executor;
- private long lastCompleted = -1;
- FileWatcher fw = null;
-
- /**
- * Constructor
- * @param o program options
- */
- public ReadProcessor(NanoOKOptions o) {
- options = o;
- fw = new FileWatcher(options);
-
- //executor = Executors.newFixedThreadPool(options.getNumberOfThreads());
- executor = new ThreadPoolExecutor(options.getNumberOfThreads(), options.getNumberOfThreads(), 10, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>());
- }
-
- /**
- * Write progress of extraction
- */
- private void writeProgress() {
- long completed = executor.getCompletedTaskCount();
- long total = executor.getTaskCount();
- long e = 0;
- long s = NanoOKOptions.PROGRESS_WIDTH;
-
- if (total > 0) {
- e = NanoOKOptions.PROGRESS_WIDTH * completed / total;
- s = NanoOKOptions.PROGRESS_WIDTH - e;
- }
-
- if (completed != lastCompleted) {
- System.out.print("\rExtraction [");
- for (int i=0; i<e; i++) {
- System.out.print("=");
- }
- for (int i=0; i<s; i++) {
- System.out.print(" ");
- }
- System.out.print("] " + completed +"/" + total);
- lastCompleted = completed;
- }
- }
-
- /**
- * Process a directory and extract reads
- * @param inputDirName input directory name
- * @param outputDirName output directory name
- */
- private void processDirectory(String inputDirName, boolean allowSubdir, boolean processThisDir, int pf) {
- options.getLog().println("Processing directory");
- options.getLog().println("Input dir name: "+inputDirName);
- options.getLog().println("allowSubdir: "+allowSubdir);
- options.getLog().println("processThisDir: "+processThisDir);
-
- if (processThisDir) {
- if (options.usingBatchDirs()) {
- fw.addBatchContainer(inputDirName, pf);
- } else {
- fw.addWatchDir(inputDirName, pf);
- }
- } else {
- File inputDir = new File(inputDirName);
- File[] listOfFiles = inputDir.listFiles();
-
- if (listOfFiles == null) {
- options.getLog().println("Directory "+inputDirName+" doesn't exist");
- } else if (listOfFiles.length <= 0) {
- options.getLog().println("Directory "+inputDirName+" empty");
- } else {
- for (File file : listOfFiles) {
- if (file.isDirectory() && allowSubdir) {
- processDirectory(inputDirName + File.separator + file.getName(),
- false,
- true,
- pf);
- }
- }
- }
- }
- }
-
- private void addDirsForExtract() {
- if (options.usingPassFailDirs()) {
- if (options.isProcessingPassReads()) {
- processDirectory(options.getFast5Dir() + File.separator + "pass",
- options.isBarcoded(),
- options.isBarcoded() ? false:true,
- NanoOKOptions.READTYPE_PASS);
- }
-
- if (options.isProcessingFailReads()) {
- processDirectory(options.getFast5Dir() + File.separator + "fail",
- options.isBarcoded(),
- true,
- NanoOKOptions.READTYPE_FAIL);
- }
- } else {
- processDirectory(options.getFast5Dir(),
- options.isBarcoded(),
- options.isBarcoded() ? false:true,
- NanoOKOptions.READTYPE_COMBINED);
- }
- }
-
- private void addDirsForAlign() {
- // If using batch dirs, then we go sample/fasta/2D/pass/batch_XXX
- // or sample/fasta/2D/pass/barcodeXXX/batch_XXX
- // If using old style, then we go sample/fasta/pass/2D
- // or sample/fasta/pass/2D/barcodeXXX/barcodeXXX
-
- for (int t=0; t<3; t++) {
- if (options.isProcessingReadType(t)) {
- if (options.usingPassFailDirs()) {
- if (options.isProcessingPassReads()) {
- //if (options.usingBatchDirs()) {
- processDirectory(options.getReadDir() + File.separator + "pass" + File.separator + NanoOKOptions.getTypeFromInt(t),
- options.isBarcoded(),
- options.isBarcoded() ? false:true,
- NanoOKOptions.READTYPE_PASS);
- //} else {
- // processDirectory(options.getReadDir() + File.separator + "pass" + File.separator + NanoOKOptions.getTypeFromInt(t),
- // options.isBarcoded(),
- // options.isBarcoded() ? false:true);
- //}
- }
-
- if (options.isProcessingFailReads()) {
- //if (options.usingBatchDirs()) {
- processDirectory(options.getReadDir() + File.separator + "fail" + File.separator + NanoOKOptions.getTypeFromInt(t),
- options.isBarcoded(),
- options.isBarcoded() ? false:true,
- NanoOKOptions.READTYPE_FAIL);
- //} else {
- // processDirectory(options.getReadDir() + File.separator + "fail" + File.separator + NanoOKOptions.getTypeFromInt(t),
- // options.isBarcoded(),
- // true);
- //}
- }
- } else {
- processDirectory(options.getReadDir() + File.separator + NanoOKOptions.getTypeFromInt(t),
- options.isBarcoded(),
- options.isBarcoded() ? false:true,
- NanoOKOptions.READTYPE_COMBINED);
- }
- }
- }
- }
-
- private void addDirsForParse() {
- // If using batch dirs, then we go sample/last/2D/pass/batch_XXX
- // If using old style, then we go sample/last/pass/2D
- for (int t=0; t<3; t++) {
- if (options.isProcessingReadType(t)) {
- if (options.usingPassFailDirs()) {
- if (options.isProcessingPassReads()) {
- //if (options.usingBatchDirs()) {
- processDirectory(options.getAlignerDir() + File.separator + "pass" + File.separator + NanoOKOptions.getTypeFromInt(t),
- options.isBarcoded(),
- options.isBarcoded() ? false:true,
- NanoOKOptions.READTYPE_PASS);
- //} else {
- // processDirectory(options.getAlignerDir() + File.separator + "pass" + File.separator + NanoOKOptions.getTypeFromInt(t),
- // options.isBarcoded(),
- // options.isBarcoded() ? false:true);
- //}
- }
-
- if (options.isProcessingFailReads()) {
- //if (options.usingBatchDirs()) {
- processDirectory(options.getAlignerDir() + File.separator + "pass" + File.separator + NanoOKOptions.getTypeFromInt(t),
- options.isBarcoded(),
- options.isBarcoded() ? false:true,
- NanoOKOptions.READTYPE_FAIL);
- //} else {
- // processDirectory(options.getAlignerDir() + File.separator + "fail" + File.separator + NanoOKOptions.getTypeFromInt(t),
- // options.isBarcoded(),
- // true);
- //}
- }
- } else {
- processDirectory(options.getAlignerDir() + File.separator + NanoOKOptions.getTypeFromInt(t),
- options.isBarcoded(),
- options.isBarcoded() ? false:true,
- NanoOKOptions.READTYPE_COMBINED);
- }
- }
- }
- }
-
- /**
- * Extract reads
- */
- public void process() throws InterruptedException {
- String baseDir = "";
-
- if (options.isExtractingReads()) {
- options.getSampleChecker().checkFast5Directory();
- addDirsForExtract();
- } else if (options.isAligningRead()) {
- options.getSampleChecker().checkReadDirectory();
- addDirsForAlign();
- } else if (options.isParsingRead()) {
- options.getSampleChecker().checkReadDirectory();
- addDirsForParse();
- }
-
-
- for (int i=0; i<options.getNumberOfThreads(); i++) {
- executor.execute(new ReadProcessorRunnable(options, fw));
- }
-
- // Now keep scanning
- while (!fw.timedOut()) {
- fw.scan();
- fw.writeProgress();
- Thread.sleep(500);
- fw.writeProgress();
- Thread.sleep(500);
- }
- fw.writeProgress();
-
- // That's all - wait for all threads to finish
- executor.shutdown();
-
- options.getReadFileMerger().closeFiles();
- if (options.mergeFastaFiles()) {
- System.out.println("");
- options.getReadFileMerger().writeMergedFiles();
- }
-
- //writeProgress();
- System.out.println("");
- System.out.println("");
- System.out.println("DONE");
- }
-}
-
-
diff --git a/src/nanook/ReadProcessorRunnable.java b/src/nanook/ReadProcessorRunnable.java
deleted file mode 100644
index 29b2959..0000000
--- a/src/nanook/ReadProcessorRunnable.java
+++ /dev/null
@@ -1,369 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett (richard.leggett at earlham.ac.uk)
- *
- * Copyright 2015-17 Earlham Institute
- */
-
-package nanook;
-
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import java.util.EnumSet;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-/**
- * Enable multi-threading of read extraction
- *
- * @author Richard Leggett
- */
-public class ReadProcessorRunnable implements Runnable {
- public final static String TYPE_STRING_TEMPLATE = "/Analyses/Basecall_2D_000/BaseCalled_template/Fastq";
- public final static String TYPE_STRING_COMPLEMENT = "/Analyses/Basecall_2D_000/BaseCalled_complement/Fastq";
- public final static String TYPE_STRING_2D = "/Analyses/Basecall_2D_000/BaseCalled_2D/Fastq";
- private String[] typeStrings = {TYPE_STRING_TEMPLATE, TYPE_STRING_COMPLEMENT, TYPE_STRING_2D};
- public NanoOKOptions options;
- public FileWatcher fileWatcher;
- public boolean isNewStyleDir;
-
- public ReadProcessorRunnable(NanoOKOptions o, FileWatcher f) {
- options = o;
- fileWatcher = f;
- }
-
- //rivate String getFastaqDirFromFast5Name(String fast5Pathname, int type) {
- // File f = new File(fast5Pathname);
- // String inDir = f.getParent();
- // String outDir = options.getReadDir();
-
- // if (!inDir.startsWith(options.getFast5Dir())) {
- // System.out.println("Something wrong with fast5 filename - shouldn't get to this code. Please contact richard.leggett at earlham.ac.uk");
- // System.exit(1);
- // }
-
- // If using batch dirs, then we go sample/fasta/2D/pass/batch_XXX
- // If using old style, then we go sample/fasta/pass/2D
- //if (options.usingBatchDirs()) {
- // outDir = outDir + File.separator + NanoOKOptions.getTypeFromInt(type) + inDir.substring(options.getFast5Dir().length());
- //} else {
- // outDir = outDir + inDir.substring(options.getFast5Dir().length()) + File.separator + NanoOKOptions.getTypeFromInt(type);
- //}
-
- //options.getLog().println(" In: "+fast5Pathname);
- //options.getLog().println(" OutDir: "+outDir);
-
- // return outDir;
- //}/
-
- private String getAlignmentPathnameFromFastaqName(String fastaqPathname) {
- File f = new File(fastaqPathname);
- String inDir = f.getPath();
- String outPathname;
-
- if (!fastaqPathname.startsWith(options.getReadDir())) {
- System.out.println("Something wrong with read filename - shouldn't get to this code. Please contact richard.leggett at earlham.ac.uk");
- System.out.println("FastaPathname: "+fastaqPathname);
- System.out.println("ReadDir: "+options.getReadDir());
- System.exit(1);
- }
-
- outPathname = options.getAlignerDir() + inDir.substring(options.getReadDir().length());
- File outFile = new File(outPathname);
- File parent = new File(outFile.getParent());
-
- if (!parent.exists()) {
- options.getLog().println("Making directory " + parent.getPath());
- parent.mkdirs();
- }
-
- return outPathname;
- }
-
- private String getAlignmentLogPathnameFromFastaqName(String fastaqPathname) {
- File f = new File(fastaqPathname);
- String inDir = f.getPath();
- String outPathname;
-
- if (!fastaqPathname.startsWith(options.getReadDir())) {
- System.out.println("Something wrong with read filename - shouldn't get to this code. Please contact richard.leggett at earlham.ac.uk");
- System.out.println("FastaPathname: "+fastaqPathname);
- System.out.println("ReadDir: "+options.getReadDir());
- System.exit(1);
- }
-
- outPathname = options.getLogsDir() + File.separator + options.getAligner() + inDir.substring(options.getReadDir().length());
- File outFile = new File(outPathname);
- File parent = new File(outFile.getParent());
-
- if (!parent.exists()) {
- options.getLog().println("Making directory " + parent.getPath());
- parent.mkdirs();
- }
-
- return outPathname;
- }
-
- private String getParserPathnameFromAlignmentName(String alignmentPathname) {
- File f = new File(alignmentPathname);
- String inDir = f.getPath();
- String outPathname;
-
- if (!alignmentPathname.startsWith(options.getAlignerDir())) {
- System.out.println("Something wrong with read filename - shouldn't get to this code. Please contact richard.leggett at earlham.ac.uk");
- System.exit(1);
- }
-
- outPathname = options.getParserDir() + inDir.substring(options.getAlignerDir().length());
- File outFile = new File(outPathname);
- File parent = new File(outFile.getParent());
-
- if (!parent.exists()) {
- options.getLog().println("Making directory " + parent.getPath());
- parent.mkdirs();
- }
-
- return outPathname;
- }
-
- private String getFastaqPathnameFromAlignmentName(String alignmentPathname) {
- File f = new File(alignmentPathname);
- String inDir = f.getPath();
- String outPathname;
-
- if (!alignmentPathname.startsWith(options.getAlignerDir())) {
- System.out.println("Something wrong with read filename - shouldn't get to this code. Please contact richard.leggett at earlham.ac.uk");
- System.exit(1);
- }
-
- outPathname = options.getReadDir() + inDir.substring(options.getAlignerDir().length(),inDir.lastIndexOf('.'));
-
- return outPathname;
- }
-
- public static String getFilePrefixFromPathname(String pathname) {
- File f = new File(pathname);
- String inName = f.getName();
- int suffixPos = inName.lastIndexOf(".");
- String outName;
-
- if (suffixPos > 0) {
- outName = inName.substring(0, suffixPos);
- } else {
- outName = inName;
- }
-
- //options.getLog().println("OutName: "+outName);
-
- return outName;
- }
-
- private void runCommandLocal(String command, String outPath) {
- ProcessLogger pl = new ProcessLogger();
-
- // outPath only non-null if aligner will only write to screen (yes, BWA, I'm talking about you)
- if (outPath != null) {
- pl.setWriteFormat(false, true, false);
- pl.runAndLogCommand(command, outPath, false);
- } else {
- pl.runCommand(command);
- }
- }
-
- public void runParse(String alignmentPathname) {
- String parsedPathname = getParserPathnameFromAlignmentName(alignmentPathname) + ".txt";
- String fastaqPathname = getFastaqPathnameFromAlignmentName(alignmentPathname);
- ReadParser rp = new ReadParser(options);
-
- options.getLog().println("Parsing file "+ alignmentPathname);
- options.getLog().println(" to "+ parsedPathname);
-
- rp.parse(fastaqPathname, alignmentPathname, parsedPathname);
- }
-
- public void runAlign(String fastaqPathname) {
- String reference = options.getReferenceFile();
- AlignmentFileParser parser = options.getParser();
-
- String filePrefix = getFilePrefixFromPathname(fastaqPathname);
- String alignmentPathname = getAlignmentPathnameFromFastaqName(fastaqPathname) + parser.getAlignmentFileExtension();
- String alignmentLogPathname = getAlignmentLogPathnameFromFastaqName(fastaqPathname);
-
- options.getLog().println("Aligning file "+fastaqPathname);
- options.getLog().println(" to "+alignmentPathname);
- options.getLog().println(" with log "+alignmentLogPathname);
-
- String command = parser.getRunCommand(fastaqPathname, alignmentPathname, reference);
- if (options.showAlignerCommand()) {
- System.out.println("Running: " + command);
- }
- runCommandLocal(command, parser.outputsToStdout() ? alignmentPathname:null);
- if (options.isParsingRead()) {
- runParse(alignmentPathname);
- }
- }
-
- public void addToBlast(String fastaqPathname, int type) {
- int pf = NanoOKOptions.READTYPE_PASS;
-
- if (fastaqPathname.contains("/fail/")) {
- pf = NanoOKOptions.READTYPE_FAIL;
- }
-
- options.getBlastHandler(type, pf).addRead(fastaqPathname);
- }
-
- private String getFastaqFilename(String fast5Pathname, int t, int inputPF, int outputPF) {
- File f = new File(fast5Pathname);
- String inDir = f.getParent();
- String suffixDirs;
-
- if (!inDir.startsWith(options.getFast5Dir())) {
- System.out.println("Something wrong with fast5 filename - shouldn't get to this code. Please contact richard.leggett at earlham.ac.uk");
- System.exit(1);
- }
-
- if (inputPF == NanoOKOptions.READTYPE_COMBINED) {
- suffixDirs = inDir.substring(options.getFast5Dir().length());
- } else {
- // +5 for /pass or /fail
- suffixDirs = inDir.substring(options.getFast5Dir().length() + 5);
- }
-
- String fastaqDir = options.getReadDir() + File.separator;
- if (outputPF == NanoOKOptions.READTYPE_FAIL) {
- fastaqDir += "fail";
- } else {
- fastaqDir += "pass";
- }
-
- fastaqDir += File.separator + NanoOKOptions.getTypeFromInt(t) + suffixDirs;
- File dir = new File(fastaqDir);
-
- String filePrefix = getFilePrefixFromPathname(fast5Pathname);
- String fileExtension = options.getReadFormat() == NanoOKOptions.FASTA ? ".fasta":".fastq";
-
- if (!dir.exists()) {
- options.getLog().println("Making directory " + fastaqDir);
- dir.mkdirs();
- }
-
- String fastaqPathname = fastaqDir + File.separator + filePrefix + "_BaseCalled_" + NanoOKOptions.getTypeFromInt(t) + fileExtension;
-
- return fastaqPathname;
- }
-
- public void runExtract(String fast5Pathname, int inputPF) {
- Fast5File inputFile = new Fast5File(options, fast5Pathname);
- int outputPF;
-
- options.getLog().println("Extracting file "+fast5Pathname);
-
- for (int t=0; t<3; t++) {
- if (options.isProcessingReadType(t)) {
- FastAQFile ff = inputFile.getFastq(options.getBasecallIndex(), t);
- double meanQ = 0;
-
- if (ff != null) {
- // If pass/fail not assigned, default to pass directory output
- if (inputPF == NanoOKOptions.READTYPE_COMBINED) {
- outputPF = NanoOKOptions.READTYPE_PASS;
- } else {
- outputPF = inputPF;
- }
-
- // Have we set a min quality threshold? In which case, test...
- meanQ = inputFile.getMeanQ(options.getBasecallIndex(), t);
- if (options.getMinQ() >= 0) {
- if (meanQ == 0) {
- options.getLog().println(" Couldn't get mean quality value");
- } else {
- if (meanQ >= options.getMinQ()) {
- outputPF = NanoOKOptions.READTYPE_PASS;
- } else {
- outputPF = NanoOKOptions.READTYPE_FAIL;
- }
- }
- options.getLog().println(" Mean quality " + meanQ + " output class " + (outputPF == NanoOKOptions.READTYPE_PASS ? "pass":"fail"));
- }
-
- String fastaqPathname = getFastaqFilename(fast5Pathname, t, inputPF, outputPF);
- options.getLog().println(" Writing "+fastaqPathname);
-
- options.getReadFileMerger().addReadFile(fastaqPathname, t, outputPF, ff.getID(), ff.getLength(), meanQ);
-
- if (options.getReadFormat() == NanoOKOptions.FASTA) {
- ff.writeFasta(fastaqPathname, options.outputFast5Path() ? fast5Pathname:null);
- } else {
- ff.writeFastq(fastaqPathname);
- }
-
- if (options.isBlastingRead()) {
- addToBlast(fastaqPathname, t);
- }
-
- if (options.isAligningRead()) {
- runAlign(fastaqPathname);
- }
- }
- }
- }
- }
-
- public void run() {
- while (!fileWatcher.timedOut()) {
- FileWatcherItem fwi = null;
- String fastaqPathname = null;
- String alignmentPathname = null;
- String parsedPathname = null;
- String alignmentLogPathname = null;
-
- // Get next file to process
- while ((fwi == null) && !fileWatcher.timedOut()) {
- fwi = fileWatcher.getPendingFile();
- if (fwi == null) {
- try {
- Thread.sleep(500);
- } catch (InterruptedException ex) {
- Logger.getLogger(ReadProcessorRunnable.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- }
-
- if (fwi != null) {
- String nextPathname = fwi.getPathname();
- int pf = fwi.getPassOrFail();
-
- // Check valid filename
- if (options.isExtractingReads()) {
- if (nextPathname.toLowerCase().endsWith(".fast5")) {
- runExtract(nextPathname, pf);
- } else {
- options.getLog().println("Invalid "+nextPathname);
- }
- } else if (options.isAligningRead()) {
- if (nextPathname.toLowerCase().endsWith(".fasta") ||
- nextPathname.toLowerCase().endsWith(".fastq")) {
- runAlign(nextPathname);
- }
- } else if (options.isParsingRead()) {
- if (nextPathname.toLowerCase().endsWith(options.getParser().getAlignmentFileExtension())) {
- alignmentPathname = nextPathname;
- runParse(nextPathname);
- }
- }
- }
- }
-
- options.getLog().println("Thread exiting");
- }
-}
diff --git a/src/nanook/ReadSet.java b/src/nanook/ReadSet.java
deleted file mode 100644
index bc6981b..0000000
--- a/src/nanook/ReadSet.java
+++ /dev/null
@@ -1,353 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett (richard.leggett at earlham.ac.uk)
- *
- * Copyright 2015-17 Earlham Institute
- */
-
-package nanook;
-
-import java.io.BufferedReader;
-import java.io.*;
-import java.util.*;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-
-/**
- * Represents a read set (Template reads, Complement reads, or 2D reads).
- *
- * @author Richard Leggett
- */
-public class ReadSet {
- public final static int MAX_READ_DIRS = 1000;
- private ThreadPoolExecutor queryExecutor;
- private NanoOKOptions options;
- private ReadSetStats stats;
- private int type;
- private int nFastaFiles=0;
- private String typeString;
- private long lastCompleted = -1;
-
-
- /**
- * Constructor
- * @param t type (defined in NanoOKOprions)
- * @param o NanoOKOptions object
- * @param s set of stats to associate with this read set
- */
- public ReadSet(int t, NanoOKOptions o, ReadSetStats s) {
- options = o;
- type = t;
- stats = s;
-
- queryExecutor = new ThreadPoolExecutor(options.getNumberOfThreads(), options.getNumberOfThreads(), 10, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>());
- }
-
- /**
- * Write progress
- */
- private void writeProgress(ThreadPoolExecutor tpe) {
- long completed = tpe.getCompletedTaskCount();
- long total = tpe.getTaskCount();
- long e = 0;
- long s = NanoOKOptions.PROGRESS_WIDTH;
-
- if (total > 0) {
- e = NanoOKOptions.PROGRESS_WIDTH * completed / total;
- s = NanoOKOptions.PROGRESS_WIDTH - e;
- }
-
- if (completed != lastCompleted) {
- System.out.print("\r[");
- for (int i=0; i<e; i++) {
- System.out.print("=");
- }
- for (int i=0; i<s; i++) {
- System.out.print(" ");
- }
- System.out.print("] " + completed +"/" + total);
- lastCompleted = completed;
- }
- }
-
- /**
- * Check if filename has valid read extension
- * @param f flename
- * @return true if valid for chosen aligner
- */
- private boolean isValidReadExtension(String f) {
- boolean r = false;
-
- if (options.getReadFormat() == NanoOKOptions.FASTQ) {
- if ((f.endsWith(".fastq")) || (f.endsWith(".fq"))) {
- r = true;
- }
- } else {
- if ((f.endsWith(".fasta")) || (f.endsWith(".fa"))) {
- r = true;
- }
- }
-
- return r;
- }
-
- /**
- * Gather length statistics on reads and parse alignments
- */
- public int processReadsOld() throws InterruptedException {
- AlignmentFileParser parser = options.getParser();
- String[] readDirs = new String[MAX_READ_DIRS];
- String[] alignerDirs = new String[MAX_READ_DIRS];
- int readTypes[] = new int[MAX_READ_DIRS];
- int nDirs = 0;
- int maxReads = options.getMaxReads();
- String outputFilename = options.getAnalysisDir() + File.separator + "Unaligned" + File.separator + options.getTypeFromInt(type) + "_nonaligned.txt";
- AlignmentsTableFile nonAlignedSummary = new AlignmentsTableFile(outputFilename);
-
- nFastaFiles=0;
-
- stats.openLengthsFile();
-
- if (options.usingPassFailDirs()) {
- for (int pf=NanoOKOptions.READTYPE_PASS; pf<=NanoOKOptions.READTYPE_FAIL; pf++) {
- String passOrFail="";
-
- if ((pf == NanoOKOptions.READTYPE_PASS) && (options.isProcessingPassReads())) {
- passOrFail="pass";
- } else if ((pf == NanoOKOptions.READTYPE_FAIL) && (options.isProcessingFailReads())) {
- passOrFail="fail";
- }
-
- if (passOrFail != "") {
- if (options.isBarcoded()) {
- File inputDir = new File(options.getReadDir() + File.separator + passOrFail + File.separator + options.getTypeFromInt(type));
- File[] listOfFiles = inputDir.listFiles();
- for (File file : listOfFiles) {
- if (file.isDirectory()) {
- if (nDirs == MAX_READ_DIRS) {
- System.out.println("Error: too many directories.\n");
- System.exit(1);
- }
- readDirs[nDirs] = inputDir.getPath() + File.separator + file.getName();
- alignerDirs[nDirs] = options.getAlignerDir() + File.separator + passOrFail + File.separator + options.getTypeFromInt(type) + File.separator + file.getName();
- readTypes[nDirs++] = pf;
- }
- }
- } else {
- readDirs[nDirs] = options.getReadDir() + File.separator + passOrFail + File.separator + options.getTypeFromInt(type);
- alignerDirs[nDirs] = options.getAlignerDir() + File.separator + passOrFail + File.separator + options.getTypeFromInt(type);
- readTypes[nDirs++] = pf;
- }
-
- }
- }
- } else {
- readDirs[nDirs] = options.getReadDir();
- alignerDirs[nDirs] = options.getAlignerDir();
- readTypes[nDirs] = NanoOKOptions.READTYPE_COMBINED;
- nDirs++;
- }
-
- for (int dirIndex=0; dirIndex<nDirs; dirIndex++) {
- String inputDir = readDirs[dirIndex];
- String alignDir = alignerDirs[dirIndex];
- File folder = new File(inputDir);
- File[] listOfFiles = folder.listFiles();
-
- if (listOfFiles == null) {
- System.out.println("");
- System.out.println("Directory "+inputDir+" doesn't exist");
- } else if (listOfFiles.length <= 0) {
- System.out.println("");
- System.out.println("Directory "+inputDir+" empty");
- } else {
- for (File file : listOfFiles) {
- if (file.isFile()) {
- if (isValidReadExtension(file.getName())) {
- String alignmentFilename = alignDir + File.separator + file.getName() + parser.getAlignmentFileExtension();
- //System.out.println(alignmentFilename);
- //options.getLog().println("File: " + alignmentFilename);
- if (new File(alignmentFilename).exists()) {
- queryExecutor.execute(new ParserRunnable(options, stats, file.getAbsolutePath(), alignmentFilename, type, readTypes[dirIndex], nonAlignedSummary));
- writeProgress(queryExecutor);
-
- nFastaFiles++;
- if ((maxReads > 0) && (nFastaFiles >= maxReads)) {
- break;
- }
-
- } else {
- System.out.println("Error: Read ignored, can't find alignment "+alignmentFilename);
- }
- }
- }
- }
- }
- }
-
-
-
- // That's all - wait for all threads to finish
- queryExecutor.shutdown();
- while (!queryExecutor.isTerminated()) {
- writeProgress(queryExecutor);
- Thread.sleep(100);
- }
-
- writeProgress(queryExecutor);
- System.out.println("");
-
- stats.closeLengthsFile();
- stats.writeSummaryFile();
- stats.calculateStats();
-
- return nFastaFiles;
- }
-
- /**
- * Gather length statistics on reads and parse alignments
- */
- public int processReadsBatch() throws InterruptedException {
- AlignmentFileParser parser = options.getParser();
- String[] readDirs = new String[MAX_READ_DIRS];
- String[] alignerDirs = new String[MAX_READ_DIRS];
- int readTypes[] = new int[MAX_READ_DIRS];
- int nDirs = 0;
- int maxReads = options.getMaxReads();
- String outputFilename = options.getAnalysisDir() + File.separator + "Unaligned" + File.separator + options.getTypeFromInt(type) + "_nonaligned.txt";
- AlignmentsTableFile nonAlignedSummary = new AlignmentsTableFile(outputFilename);
-
- nFastaFiles=0;
-
- typeString = options.getTypeFromInt(type);
-
- stats.openLengthsFile();
-
- for (int pf=NanoOKOptions.READTYPE_PASS; pf<=NanoOKOptions.READTYPE_FAIL; pf++) {
- String passOrFail="";
-
- if ((pf == NanoOKOptions.READTYPE_PASS) && (options.isProcessingPassReads())) {
- passOrFail="pass";
- } else if ((pf == NanoOKOptions.READTYPE_FAIL) && (options.isProcessingFailReads())) {
- passOrFail="fail";
- }
-
- if (passOrFail != "") {
- if (options.isBarcoded()) {
- File inputDir = new File(options.getReadDir() + File.separator + passOrFail + File.separator + typeString);
- File[] listOfFiles = inputDir.listFiles();
- for (File file : listOfFiles) {
- if (file.isDirectory()) {
- if (nDirs == MAX_READ_DIRS) {
- System.out.println("Error: too many directories.\n");
- System.exit(1);
- }
- readDirs[nDirs] = options.getReadDir() + File.separator + passOrFail + File.separator + typeString + File.separator + file.getName();
- alignerDirs[nDirs] = options.getAlignerDir() + File.separator + passOrFail + File.separator + typeString + File.separator + file.getName();
- readTypes[nDirs++] = pf;
- }
- }
- } else {
- readDirs[nDirs] = options.getReadDir() + File.separator + passOrFail + File.separator + typeString;
- alignerDirs[nDirs] = options.getAlignerDir() + File.separator + passOrFail + File.separator + typeString;
- readTypes[nDirs++] = pf;
- }
-
- }
- }
-
- // Dirs should be e.g.
- // inputDir = sample/fasta/Template/pass
- // alignDir = sample/last/Template/pass
- for (int dirIndex=0; dirIndex<nDirs; dirIndex++) {
- String inputDir = readDirs[dirIndex];
- String alignDir = alignerDirs[dirIndex];
- File folder = new File(inputDir);
- File[] listOfFilesTop = folder.listFiles();
-
- options.getLog().println("Input: "+inputDir);
- options.getLog().println("Align: "+alignDir);
-
- // Now list of files should contain batch_XXX directories
- if (listOfFilesTop == null) {
- System.out.println("");
- System.out.println("Directory "+inputDir+" doesn't exist");
- } else if (listOfFilesTop.length <= 0) {
- System.out.println("");
- System.out.println("Directory "+inputDir+" empty");
- } else {
- for (File topLevelFile : listOfFilesTop) {
- options.getLog().println(" Got dir "+ topLevelFile.getName());
- if (topLevelFile.isDirectory()) {
- // Now go through reads in directory
- File[] listOfFiles = topLevelFile.listFiles();
- for (File file : listOfFiles) {
- if (file.isFile()) {
- if (isValidReadExtension(file.getName())) {
- String alignmentFilename = alignDir + File.separator + topLevelFile.getName() + File.separator + file.getName() + parser.getAlignmentFileExtension();
- //System.out.println(alignmentFilename);
- //options.getLog().println("File: " + alignmentFilename);
- if (new File(alignmentFilename).exists()) {
- queryExecutor.execute(new ParserRunnable(options, stats, file.getAbsolutePath(), alignmentFilename, type, readTypes[dirIndex], nonAlignedSummary));
- writeProgress(queryExecutor);
-
- nFastaFiles++;
- if ((maxReads > 0) && (nFastaFiles >= maxReads)) {
- break;
- }
-
- } else {
- System.out.println("Error: Read ignored, can't find alignment "+alignmentFilename);
- }
- }
- }
- }
- }
- }
- }
- }
-
-
-
- // That's all - wait for all threads to finish
- queryExecutor.shutdown();
- while (!queryExecutor.isTerminated()) {
- writeProgress(queryExecutor);
- Thread.sleep(100);
- }
-
- writeProgress(queryExecutor);
- System.out.println("");
-
- stats.closeLengthsFile();
- stats.writeSummaryFile();
- stats.calculateStats();
-
- return nFastaFiles;
- }
-
- public int processReads() throws InterruptedException {
- if (options.usingBatchDirs()) {
- return processReadsBatch();
- } else {
- return processReadsOld();
- }
- }
-
- /**
- * Get type of this read set.
- * @return a String (e.g. "Template")
- */
- public String getTypeString() {
- return typeString;
- }
-
- /**
- * Get stats object.
- * @return a ReadSetStats object
- */
- public ReadSetStats getStats() {
- return stats;
- }
-}
diff --git a/src/nanook/ReadSetStats.java b/src/nanook/ReadSetStats.java
deleted file mode 100644
index ab85a65..0000000
--- a/src/nanook/ReadSetStats.java
+++ /dev/null
@@ -1,625 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.Hashtable;
-import java.util.Map;
-
-/**
- * Represent statistics about a read set (for example Template read set).
- *
- * @author Richard Leggett
- */
-public class ReadSetStats implements Serializable {
- private static final long serialVersionUID = NanoOK.SERIAL_VERSION;
- private NanoOKOptions options;
- private transient PrintWriter pwLengths = null;
- private transient PrintWriter pwKmers = null;
- private String typeString = "";
- private int longest = 0;
- private int shortest = NanoOKOptions.MAX_READ_LENGTH;
- private long basesSum = 0;
- private double meanLength = 0;
- private int n50 = 0;
- private int n50Count = 0;
- private int n90 = 0;
- private int n90Count = 0;
- private int[] lengths = new int[NanoOKOptions.MAX_READ_LENGTH];
- private Hashtable<String,Integer> readLengths = new Hashtable();
- private Hashtable<String,Double> readGC = new Hashtable();
- private int nReads = 0;
- private int nReadFiles = 0;
- private int nPassFiles = 0;
- private int nFailFiles = 0;
- private int nReadsWithAlignments = 0;
- private int nReadsWithoutAlignments = 0;
- private int[] readBestPerfectKmer = new int[NanoOKOptions.MAX_KMER];
- private int[] readCumulativeBestPerfectKmer = new int[NanoOKOptions.MAX_KMER];
- private MotifStatistics motifStats = new MotifStatistics();
- private int substitutionErrors[][] = new int[4][4];
- private int nSubstitutions = 0;
- private int nInsertions = 0;
- private int nDeletions = 0;
- private int ignoredDuplicates = 0;
- private int type;
-
- /**
- * Constructor
- * @param o NanoOKOptions object
- * @param t Type integer (defined in NanoOKOptions)
- */
- public ReadSetStats(NanoOKOptions o, int t) {
- options=o;
- type = t;
- typeString = NanoOKOptions.getTypeFromInt(type);
- for (int i=0; i<NanoOKOptions.MAX_KMER; i++) {
- readBestPerfectKmer[i] = 0;
- readCumulativeBestPerfectKmer[i] = 0;
- }
- }
-
- /**
- * Open a text file to store read lengths.
- */
- public void openLengthsFile() {
- String lengthsFilename = options.getAnalysisDir() + File.separator + "all_" + typeString + "_lengths.txt";
- String kmersFilename = options.getAnalysisDir() + File.separator + "all_" + typeString + "_kmers.txt";
-
- options.getLog().println("Opening "+lengthsFilename);
- options.getLog().println("Opening "+kmersFilename);
-
- try {
- pwLengths = new PrintWriter(new FileWriter(lengthsFilename));
- pwKmers = new PrintWriter(new FileWriter(kmersFilename));
- pwKmers.write("Id\tLength\tnk15\tnk17\tnk19\tnk21\tnk23\tnk25");
- pwKmers.println("");
- } catch (IOException e) {
- System.out.println("openLengthsFile exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Close the read lengths file.
- */
- public void closeLengthsFile() {
- pwLengths.close();
- }
-
- /**
- * Close the kmers file
- */
- public void closeKmersFile() {
- pwKmers.close();
- }
-
- /**
- * Calculate various statistics, e.g. N50 etc.
- */
- public void calculateStats() {
- int total = 0;
- int c = 0;
-
- meanLength = (double)basesSum / (double)nReads;
-
- for (int i=longest; i>0; i--) {
- for (int j=0; j<lengths[i]; j++) {
- total += i;
- c++;
-
- if ((n50 == 0) && ((double)total >= ((double)basesSum * 0.5))) {
- n50 = i;
- n50Count = c;
- }
-
- if ((n90 == 0) && ((double)total >= ((double)basesSum * 0.9))) {
- n90 = i;
- n90Count = c;
- }
- }
- }
-
- }
-
- /**
- * Update count of read files.
- * @param type
- */
- public synchronized void addReadFile(int type) {
- nReadFiles++;
-
- if (type == NanoOKOptions.READTYPE_PASS) {
- nPassFiles++;
- } else if (type == NanoOKOptions.READTYPE_FAIL) {
- nFailFiles++;
- }
- }
-
- /**
- * Get number of read files in pass directory
- * @return Number of files in pass directory
- */
- public synchronized int getNumberOfPassFiles() {
- return nPassFiles;
- }
-
- /**
- * Get number of read files in fail directory
- * @return Number of files in fail directory
- */
- public synchronized int getNumberOfFailFiles() {
- return nFailFiles;
- }
-
- /**
- * Get type
- * @return type
- */
- public int getType() {
- return type;
- }
-
- /**
- * Get type as a string.
- * @return type String
- */
- public String getTypeString() {
- return typeString;
- }
-
- /**
- * Get mean length of reads in this read set.
- * @return length
- */
- public synchronized double getMeanLength() {
- return meanLength;
- }
-
- /**
- * Get longest read in this read set.
- * @return length
- */
- public synchronized int getLongest() {
- return longest;
- }
-
- /**
- * Get shortest read in this read set.
- * @return length
- */
- public synchronized int getShortest() {
- return shortest;
- }
-
- /**
- * Get N50 for this read set.
- * @return N50 length
- */
- public synchronized int getN50() {
- return n50;
- }
-
- /**
- * Get N50 count - number of reads of length N50 or greater.
- * @return count
- */
- public synchronized int getN50Count() {
- return n50Count;
- }
-
- /**
- * Get N90 for this read set.
- * @return N90 length
- */
- public synchronized int getN90() {
- return n90;
- }
-
- /**
- * Get N90 count - number of reads of length N90 or greater.
- * @return count
- */
- public synchronized int getN90Count() {
- return n90Count;
- }
-
- /**
- * Get number of reads.
- * @return number of reads
- */
- public synchronized int getNumReads() {
- return nReads;
- }
-
- /**
- * Get total bases represented by read set.
- * @return number of bases
- */
- public synchronized long getTotalBases() {
- return basesSum;
- }
-
- /**
- * Get number of read files.
- * @return number of files
- */
- public synchronized int getNumReadFiles() {
- return nReadFiles;
- }
-
- private String getPrefix(String path) {
- String leafname = new File(path).getName();
- leafname.replaceAll(":", "_");
- return leafname.substring(0, leafname.indexOf(".fa"));
- }
-
- /**
- * Store a read length in the array of read lengths.
- * @param id ID of read
- * @param l length
- */
- public synchronized void addLength(String readPath, String id, int l, double gc) {
- pwLengths.println(id + "\t" + l);
- id = getPrefix(readPath) + ":"+id;
-
- if (readLengths.containsKey(id)) {
- System.out.println("Error: Read ID "+id+" already seen. This occurrance ignored.");
- ignoredDuplicates++;
- } else {
- readLengths.put(id, l);
- readGC.put(id, gc);
- }
-
- if (l < NanoOKOptions.MAX_READ_LENGTH) {
- lengths[l]++;
- if (l > longest) {
- longest = l;
- }
-
- if (l < shortest) {
- shortest = l;
- }
- } else {
- System.out.println("Error: unexpectedly long ("+l+") read ignored - "+readPath);
- }
-
- basesSum += l;
- nReads++;
- }
-
- /**
- * Get length of read
- * @param id of read
- * @return length, in bases
- */
- public synchronized int getReadLength(String alignmentFile, String id) {
- int length = -1;
-
- id = getPrefix(alignmentFile) + ":"+id;
-
- Integer l = readLengths.get(id);
-
- if (l != null) {
- length = l.intValue();
- }
-
- return length;
- }
-
- /**
- * Get GC of read
- * @param id of read
- * @return GC percent
- */
- public synchronized double getGC(String alignmentFile, String id) {
- double gc = -1;
-
- id = getPrefix(alignmentFile) + ":"+id;
-
- Double g = readGC.get(id);
-
- if (g == null) {
- g = 50.0;
- System.out.println("Warning: couldn't get GC from " + alignmentFile + " - assumed 50%");
- }
-
- //if (g != null) {
- // gc = g.intValue();
- //}
-
- //return gc;
- return g;
- }
-
- /**
- * Store a read with an alignment.
- */
- public synchronized void addReadWithAlignment() {
- nReadsWithAlignments++;
- }
-
- /**
- * Store a read without an alignment.
- */
- public synchronized void addReadWithoutAlignment() {
- nReadsWithoutAlignments++;
- }
-
- /**
- * Store best perfect kmers for each read.
- * @param bestKmer length of best perfect kmer
- */
- public synchronized void addReadBestKmer(int bestKmer) {
- if (bestKmer >= NanoOKOptions.MAX_KMER) {
- System.out.println("Error: the unlikely event of a best kmer size of "+bestKmer+" has happened! (Max "+NanoOKOptions.MAX_KMER+")");
- System.exit(1);
- }
-
- readBestPerfectKmer[bestKmer]++;
-
- for (int i=0; i<bestKmer; i++) {
- readCumulativeBestPerfectKmer[i]++;
- }
- }
-
- /**
- * Get number of reads in this read set.
- * @return number of reads.
- */
- public synchronized int getNumberOfReads() {
- return nReads;
- }
-
- /**
- * Get number of reads with alignments in this read set.
- * @return number of reads
- */
- public synchronized int getNumberOfReadsWithAlignments() {
- return nReadsWithAlignments;
- }
-
- /**
- * Get number of reads without alignments in this read set.
- * @return number of reads
- */
- public synchronized int getNumberOfReadsWithoutAlignments() {
- return nReadsWithoutAlignments;
- }
-
- /**
- * Get percentage of reads with alignments
- * @return percentage of reads
- */
- public synchronized double getPercentOfReadsWithAlignments() {
- return (100.0 * (double)nReadsWithAlignments) / (double)nReads;
- }
-
- /**
- * Get percentage of reads without alignments
- * @return percentage of reads
- */
- public synchronized double getPercentOfReadsWithoutAlignments() {
- return (100.0 * (double)nReadsWithoutAlignments) / (double)nReads;
- }
-
- /**
- * Print statistics to screen.
- */
- public synchronized void printStats() {
- System.out.println("Parse " + typeString + " alignments");
- System.out.println(typeString + " reads: " + nReads);
- System.out.println(typeString + " reads with alignments: " + nReadsWithAlignments);
- System.out.println(typeString + " reads without alignments: " + nReadsWithoutAlignments);
- }
-
- /**
- * Write a short summary file for this read set.
- * @param filename output filename
- */
- public void writeSummaryFile() {
- String filename = options.getAlignmentSummaryFilename();
- try {
- PrintWriter pw = new PrintWriter(new FileWriter(filename, true));
- pw.println("");
- pw.printf("%s alignments", typeString);
- pw.println("");
- pw.println("");
- pw.printf("Num reads: %d", nReads);
- pw.println("");
- pw.printf("Num reads with alignments: %d", nReadsWithAlignments);
- pw.println("");
- pw.printf("Num reads without alignments: %d", nReadsWithoutAlignments);
- pw.println("");
- pw.close();
- } catch (IOException e) {
- System.out.println("writeSummaryFile exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Store a deletion error.
- * @param size size of deletion
- * @param kmer kmer prior to error
- */
- public synchronized void addDeletionError(int size, String kmer) {
- motifStats.addDeletionMotifs(kmer);
- nDeletions++;
- }
-
- /**
- * Store an insertion error.
- * @param size size of insertion
- * @param kmer kmer prior to error
- */
- public synchronized void addInsertionError(int size, String kmer) {
- motifStats.addInsertionMotifs(kmer);
- nInsertions++;
- }
-
- /**
- * Store a substitution error.
- * @param kmer kmer prior to error
- * @param refChar reference base
- * @param subChar substituted base
- */
- public synchronized void addSubstitutionError(String kmer, char refChar, char subChar) {
- int r = -1;
- int s = -1;
-
- motifStats.addSubstitutionMotifs(kmer);
-
- switch(refChar) {
- case 'A': r=0; break;
- case 'C': r=1; break;
- case 'G': r=2; break;
- case 'T': r=3; break;
- default: break; //System.out.println("Warning: Unknown base ("+refChar+") in reference"); break;
- }
-
- switch(subChar) {
- case 'A': s=0; break;
- case 'C': s=1; break;
- case 'G': s=2; break;
- case 'T': s=3; break;
- default: System.out.println("Warning: Unknown base ("+refChar+") in read"); break;
- }
-
- if ((r >= 0) && (s >= 0)) {
- nSubstitutions++;
- substitutionErrors[r][s]++;
- }
- }
-
- /**
- * Get substitution error matrix (A, C, G, T vs A, C, G, T).
- * @return Substitution error matrix
- */
- public synchronized int[][] getSubstitutionErrors() {
- return substitutionErrors;
- }
-
- /**
- * Get number of substitutions.
- * @return number
- */
- public synchronized int getNumberOfSubstitutions() {
- return nSubstitutions;
- }
-
- /**
- * Write motif stats to screen.
- */
- public synchronized void outputMotifStats() {
- motifStats.outputAllMotifCounts();
- }
-
- /**
- * Get motif statistics.
- * @return MotifStatistics object
- */
- public synchronized MotifStatistics getMotifStatistics() {
- return motifStats;
- }
-
- public synchronized void writekCounts(String id, int length, int nk, int[] s, int[] kCounts) {
- pwKmers.print(id+"\t"+Integer.toString(length));
- for (int i=0; i<nk; i++) {
- pwKmers.print("\t"+Integer.toString(kCounts[i]));
- }
- pwKmers.println("");
- }
-
- /**
- * Get options
- */
- public NanoOKOptions getOptions() {
- return options;
- }
-
- /**
- * Write substitution stats to a file
- */
- public void writeSubstitutionStats() {
- String filenamePc = options.getAnalysisDir() + File.separator + "all_" + NanoOKOptions.getTypeFromInt(type) + "_substitutions_percent.txt";
- String bases[] = {"A","C","G","T"};
- try {
- PrintWriter pwPc = new PrintWriter(new FileWriter(filenamePc));
- pwPc.println("\tSubA\tSubC\tSubG\tSubT");
- for (int r=0; r<4; r++) {
- pwPc.print("Ref"+bases[r]);
- for (int s=0; s<4; s++) {
- double pc = 0;
-
- if (substitutionErrors[r][s] > 0) {
- pc = (100.0 * (double)substitutionErrors[r][s]) / nSubstitutions;
- }
- pwPc.printf("\t%.2f", pc);
- }
- pwPc.println("");
- }
- pwPc.close();
- } catch (IOException e) {
- System.out.println("writeSubstitutionStats exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Write error motif stats to a file
- */
- public void writeErrorMotifStats() {
- try {
- for (int t=0; t<3; t++) {
- for (int n=3; n<=5; n++) {
- ArrayList<Map.Entry<String, Double>> motifs = null;
- String typeString = "";
- String filename = "";
-
- if (t == 0) {
- typeString = "insertion";
- motifs = motifStats.getSortedInsertionMotifPercentages(n);
- } else if (t == 1) {
- typeString = "deletion";
- motifs = motifStats.getSortedDeletionMotifPercentages(n);
- } else {
- typeString = "substitution";
- motifs = motifStats.getSortedSubstitutionMotifPercentages(n);
- }
-
- filename = options.getAnalysisDir() + File.separator + "all_" + NanoOKOptions.getTypeFromInt(type) + "_"+typeString+"_"+n+"mer_motifs.txt";
- PrintWriter pw = new PrintWriter(new FileWriter(filename));
- pw.println("Kmer\tPercentage");
-
- for (int i=0; i<motifs.size(); i++) {
- pw.printf("%s\t%.4f", motifs.get(i).getKey(), motifs.get(i).getValue());
- pw.println("");
- }
- pw.close();
- }
- }
- } catch (IOException e) {
- System.out.println("writeSubstitutionStats exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- public int getIgnoredDuplicates() {
- return ignoredDuplicates;
- }
-}
\ No newline at end of file
diff --git a/src/nanook/ReadStats.java b/src/nanook/ReadStats.java
deleted file mode 100644
index 3856b9c..0000000
--- a/src/nanook/ReadStats.java
+++ /dev/null
@@ -1,6 +0,0 @@
-
-package nanook;
-
-public class ReadStats {
-
-}
diff --git a/src/nanook/ReferenceSequence.java b/src/nanook/ReferenceSequence.java
deleted file mode 100644
index f9e3f46..0000000
--- a/src/nanook/ReferenceSequence.java
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.io.Serializable;
-import java.util.HashSet;
-import java.util.Hashtable;
-import java.util.Set;
-
-/**
- * Represents a sequence (contig) within a reference.
- *
- * @author Richard leggett
- */
-public class ReferenceSequence implements Comparable, Serializable {
- private static final long serialVersionUID = NanoOK.SERIAL_VERSION;
- private String id = null;
- private String name = null;
- private int size = 0;
- private int binSize = 500;
- private ReferenceSequenceStats referenceStats[] = new ReferenceSequenceStats[3];
- private KmerTable refKmerTable = new KmerTable(5);
-
- /**
- * Constructor
- * @param i sequence ID
- * @param s size (length) of sequence
- * @param n display name (may be difference to ID in file)
- */
- public ReferenceSequence(String i, int s, String n) {
- id = i;
- size = s;
- name = n;
-
- float b = size / 100;
-
- // Make a multiple of 10, 100 or 500...
- if (size < 50000) {
- binSize = 10 * (1 + Math.round(b / 10));
- } else if (size < 500000) {
- binSize = 100 * (1 + Math.round(b / 100));
- } else {
- binSize = 500 * (1 + Math.round(b / 500));
- }
-
- for (int t=0; t<3; t++) {
- referenceStats[t] = new ReferenceSequenceStats(size, name);
- }
- }
-
- /**
- * Open alignment summary files for each reference for each type (Template, Complement, 2D).
- *
- * @param analysisDir directory to write files to
- */
- public void openAlignmentSummaryFiles(NanoOKOptions options) {
- for (int t=0; t<3; t++) {
- if (options.isProcessingReadType(t)) {
- referenceStats[t].openAlignmentsTableFile(options.getAnalysisDir() + File.separator + name + File.separator + name + "_" + NanoOKOptions.getTypeFromInt(t) + "_alignments.txt");
- }
- }
- }
-
- /**
- * Get stats for a particular type (Template, Complement, 2D).
- * @param t integer type
- * @return ReferenceSequenceStats object
- */
- public ReferenceSequenceStats getStatsByType(int t) {
- return referenceStats[t];
- }
-
- /**
- * Get ID for this sequence.
- * @return ID String
- */
- public String getId() {
- return id;
- }
-
- /**
- * Get display name for this sequence.
- * @return name String
- */
- public String getName() {
- return name;
- }
-
- /**
- * Get size (length) of this sequence.
- * @return length
- */
- public int getSize() {
- return size;
- }
-
- /**
- * Get bin size for graph plotting
- * @return size (nt)
- */
- public int getBinSize() {
- return binSize;
- }
-
- public int compareTo(Object o) {
- ReferenceSequence r = (ReferenceSequence)o;
- return name.compareTo(r.getName());
- }
-
- /**
- * Get kmer table
- * @return
- */
- public KmerTable getKmerTable() {
- return refKmerTable;
- }
-
- /**
- *
- */
- public void writeKmerFile(int type, String filename) {
- KmerTable readKmerTable = referenceStats[type].getReadKmerTable();
-
- try {
- PrintWriter pw = new PrintWriter(new FileWriter(filename));
- pw.println("Kmer\tRefCount\tReadCount\tRefPc\tReadPc");
-
- Set<String> refKeys = refKmerTable.getKeys();
- Set<String> readKeys = readKmerTable.getKeys();
- HashSet<String> allKeys = new HashSet();
- int refTotal = 0;
- int readTotal = 0;
-
- for (String kmer : refKeys) {
- refTotal += refKmerTable.get(kmer);
- allKeys.add(kmer);
- }
-
- int count = 0;
- for (String kmer : readKeys) {
- readTotal += readKmerTable.get(kmer);
- if (! allKeys.contains(kmer)) {
- allKeys.add(kmer);
- count++;
- }
- }
-
- for (String kmer : allKeys) {
- int refCount = refKmerTable.get(kmer);
- int readCount = readKmerTable.get(kmer);
- double refPc = 0;
- double readPc = 0;
-
- if (refCount > 0) {
- refPc = (100 * refCount) / (double)refTotal;
- }
-
- if (readCount > 0) {
- readPc = (100 * readCount) / (double)readTotal;
- }
-
- referenceStats[type].addKmerAbundance(kmer, refPc, readPc);
-
- pw.printf("%s\t%d\t%d\t%.4f\t%.4f", kmer, refCount, readCount, refPc, readPc);
- pw.println("");
- }
-
- pw.close();
- } catch (IOException e) {
- System.out.println("Exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- public int getTotalNumberOfAlignments() {
- return referenceStats[0].getNumberOfReadsWithAlignments() +
- referenceStats[1].getNumberOfReadsWithAlignments() +
- referenceStats[2].getNumberOfReadsWithAlignments();
- }
-}
diff --git a/src/nanook/ReferenceSequenceStats.java b/src/nanook/ReferenceSequenceStats.java
deleted file mode 100644
index 39133e8..0000000
--- a/src/nanook/ReferenceSequenceStats.java
+++ /dev/null
@@ -1,548 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.Collections;
-
-/**
- * Stores stats for each reference sequence, one object per read type (Template, Complement, 2D).
- *
- * @author Richard Leggett
- */
-public class ReferenceSequenceStats implements Serializable {
- private static final long serialVersionUID = NanoOK.SERIAL_VERSION;
- private static final int MAX_INDEL = 100;
- private int size;
- private String name;
- private SequenceCoverage cov;
- //int[] coverage;
- private int[] perfectKmerCounts = new int[NanoOKOptions.MAX_KMER];
- private int[] readBestPerfectKmer = new int[NanoOKOptions.MAX_KMER];
- private int[] readCumulativeBestPerfectKmer = new int[NanoOKOptions.MAX_KMER];
- private int longestPerfectKmer = 0;
- private int nReadsWithAlignments = 0;
- private int totalReadBases = 0;
- private int totalAlignedBases = 0;
- private int totalAlignedBasesWithoutIndels = 0;
- private int totalIdentical = 0;
- private int nDeletionErrors = 0;
- private int nInsertionErrors = 0;
- private int nSubstitutionErrors = 0;
- private int nInsertedBases = 0;
- private int nDeletedBases = 0;
- private int largestInsertion = 0;
- private int largestDeletion = 0;
- private int insertionSizes[] = new int[MAX_INDEL];
- private int deletionSizes[] = new int[MAX_INDEL];
- private int alignedPositiveStrand = 0;
- private int alignedNegativeStrand = 0;
- private long totalBases = 0;
- private long totalReads = 0;
- private KmerTable readKmerTable = new KmerTable(5);
- private AlignmentsTableFile atf;
- private ArrayList<KmerAbundance> kmerAbundance = new ArrayList();
-
- /**
- * Constructor.
- * @param size size (length) of reference
- * @param n name of reference
- */
- public ReferenceSequenceStats(int s, String n) {
- size = s;
- name = n;
- cov = new SequenceCoverage(size);
- //coverage = new int[size];
- }
-
- /**
- * Create an alignments table file.
- * @param filename flename
- */
- public void openAlignmentsTableFile(String filename) {
- atf = new AlignmentsTableFile(filename);
- }
-
- /**
- * Get the associated AlignmentsTableFile object
- * @return an AlignmentsTableFile
- */
- public AlignmentsTableFile getAlignmentsTableFile() {
- return atf;
- }
-
- /**
- * Get number of reads with alignments.
- * @return number of reads
- */
- public synchronized int getNumberOfReadsWithAlignments() {
- return nReadsWithAlignments;
- }
-
- /**
- * Get longest perfect kmer length.
- * @return length longest perfect kmer, in bases
- */
- public synchronized int getLongestPerfectKmer() {
- return longestPerfectKmer;
- }
-
- /**
- * Store all perfect kmer sizes for later analysis.
- * @param size size of kmer
- */
- public synchronized void addPerfectKmer(int size) {
- if (size >= NanoOKOptions.MAX_KMER) {
- System.out.println("Error: very unlikely situation with perfect kmer of size " + size + " (Max " + NanoOKOptions.MAX_KMER + ")");
- System.exit(1);
- }
-
- perfectKmerCounts[size]++;
-
- if (size > longestPerfectKmer) {
- longestPerfectKmer = size;
- }
- }
-
- /**
- * Increment coverage between two points.
- * @param start start position
- * @param size size
- */
- public synchronized void addCoverage(int start, int size) {
- cov.addCoverage(start, size);
- //for (int i=start; i<(start+size); i++) {
- // coverage[i]++;
- //}
- }
-
- /**
- * Store best perfect kmer length for each read.
- * @param bestKmer length of best perfect kmer
- */
- public synchronized void addReadBestKmer(int bestKmer) {
- readBestPerfectKmer[bestKmer]++;
-
- for (int i=1; i<=bestKmer; i++) {
- readCumulativeBestPerfectKmer[i]++;
- }
-
- nReadsWithAlignments++;
- }
-
- /**
- * Write coverage file for later graph plotting.
- * @param filename output filename
- * @param binSize bin size
- */
- public void writeCoverageData(String filename, int binSize) {
- cov.writeCoverageData(filename, binSize);
-// try {
-// PrintWriter pw = new PrintWriter(new FileWriter(filename));
-// for (int i=0; i<(size-binSize); i+=binSize) {
-// int count = 0;
-// for (int j=0; j<binSize; j++) {
-// count += coverage[i+j];
-// }
-// pw.printf("%d\t%.2f", i, ((double)count / (double)binSize));
-// pw.println("");
-// }
-// pw.close();
-// } catch (IOException e) {
-// System.out.println("writeCoverageData exception:");
-// e.printStackTrace();
-// System.exit(1);
-// }
- }
-
- /**
- * Write data for perfect kmer histogram.
- * @param filename output filename
- */
- public void writePerfectKmerHist(String filename) {
- try {
- PrintWriter pw = new PrintWriter(new FileWriter(filename));
- for (int i=1; i<=longestPerfectKmer; i++) {
- pw.printf("%d\t%d", i, perfectKmerCounts[i]);
- pw.println("");
- }
- pw.close();
- } catch (IOException e) {
- System.out.println("writePerfectKmerHist exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Write data for best perfect kmer histogram.
- * @param filename output filename
- */
- public void writeBestPerfectKmerHist(String filename) {
- try {
- PrintWriter pw = new PrintWriter(new FileWriter(filename));
- for (int i=1; i<=longestPerfectKmer; i++) {
- double pc = 0;
-
- if ((readBestPerfectKmer[i] > 0) && (nReadsWithAlignments > 0)) {
- pc = ((double)100.0 * readBestPerfectKmer[i]) / (double)nReadsWithAlignments;
- }
-
- pw.printf("%d\t%d\t%.2f", i, readBestPerfectKmer[i], pc);
- pw.println("");
- }
- pw.close();
- } catch (IOException e) {
- System.out.println("writeBestPerfectKmerHist exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Write data for best perfect kmer cumulative histogram.
- * @param filename output filename
- */
- public void writeBestPerfectKmerHistCumulative(String filename) {
- int nr = 0;
-
- for (int i=1; i<=longestPerfectKmer; i++) {
- nr += readBestPerfectKmer[i];
- }
-
- if (nReadsWithAlignments != nr) {
- System.out.println("Discrepancy: "+nr+" not equal to "+nReadsWithAlignments);
- }
-
- try {
- PrintWriter pw = new PrintWriter(new FileWriter(filename));
- for (int i=1; i<=longestPerfectKmer; i++) {
- double pc = 0;
-
- if ((readCumulativeBestPerfectKmer[i]> 0) && (nReadsWithAlignments > 0)){
- pc = ((double)100.0 * readCumulativeBestPerfectKmer[i]) / (double)nr; //(double)nReadsWithAlignments;
- }
-
- pw.printf("%d\t%d\t%.2f", i, readCumulativeBestPerfectKmer[i], pc);
- pw.println("");
- }
- pw.close();
- } catch (IOException e) {
- System.out.println("writeBestPerfectKmerHistCumulative exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
- /**
- * Write a line to the reference sequence summary file.
- * @param pw PrintWriter object to write with
- * @param format format string for output
- */
- public void writeSummary(PrintWriter pw, String format) {
- pw.printf(format, name, size, nReadsWithAlignments, longestPerfectKmer);
- pw.println("");
- }
-
- /**
- * Get mean read length
- * @return mean read length
- */
- public synchronized double getMeanReadLength() {
- if (nReadsWithAlignments > 0) {
- return (double)totalReadBases / (double)nReadsWithAlignments;
- } else {
- return 0.0;
- }
- }
-
- /**
- * Store alignment stats.
- * @param querySize query size
- * @param alignedSize number of aligned bases
- * @param identicalBases number of identical bases
- */
- public synchronized void addAlignmentStats(int querySize, int alignedSize, int alignedSizeMinusIndels, int identicalBases, String hitStrand, String queryStrand) {
- totalAlignedBases += alignedSize;
- //System.out.println("\nAlignedBases " + alignedSize);
- totalAlignedBasesWithoutIndels += alignedSizeMinusIndels;
- totalReadBases += querySize;
- totalIdentical += identicalBases;
-
- if (hitStrand.equals("+")) {
- if (queryStrand.equals("+")) {
- alignedPositiveStrand++;
- } else if (queryStrand.equals("-")) {
- alignedNegativeStrand++;
- }
- }
- }
-
- /**
- * Store a deletion error.
- * @param size - size of deletion
- * @param kmer - kmer before error
- * @param stats - ReadSetStats associated with the error
- */
- public synchronized void addDeletionError(int size, String kmer, ReadSetStats stats) {
- //System.out.println("Delete " + size);
- if (size >= MAX_INDEL) {
- System.out.println("Error: indel much larger than expected ("+size+") - possible parsing error");
- System.out.println("");
- } else {
- nDeletionErrors++;
- nDeletedBases += size;
- deletionSizes[size]++;
- if (size > largestDeletion) {
- largestDeletion = size;
- }
- stats.addDeletionError(size, kmer);
- }
- }
-
- /**
- * Store an insertion error.
- * @param size - size of insertion
- * @param kmer - kmer before error
- * @param stats - ReadSetStats associated with the error
- */
- public synchronized void addInsertionError(int size, String kmer, ReadSetStats stats) {
- //System.out.println("Insert " + size);
- if (size >= MAX_INDEL) {
- System.out.println("Error: indel much larger than expected ("+size+") - possible parsing error");
- System.out.println("");
- } else {
- nInsertionErrors++;
- nInsertedBases += size;
- insertionSizes[size]++;
- if (size > largestInsertion) {
- largestInsertion = size;
- }
- stats.addInsertionError(size, kmer);
- }
- }
-
- /**
- * Get the mean deletion size
- * @return size, as double
- */
- public synchronized double getMeanDeletionSize() {
- return (double)nDeletedBases / (double)nDeletionErrors;
- }
-
- /**
- * Get the mean insertion size
- * @return size, as double
- */
- public synchronized double getMeanInsertionSize() {
- return (double)nInsertedBases / (double)nInsertionErrors;
- }
-
- /**
- * Store a substitution error.
- * @param kmer - kmer before error
- * @param refChar - reference base
- * @param subChar - substituted base
- * @param stats - ReadSetStats associated with the error
- */
- public synchronized void addSubstitutionError(String kmer, char refChar, char subChar, ReadSetStats stats) {
- nSubstitutionErrors++;
- //System.out.println("Kmer before substitution "+kmer);
- stats.addSubstitutionError(kmer, refChar, subChar);
- }
-
- /**
- * Get percent identity of aligned bases.
- * @return identity
- */
- public synchronized double getAlignedPercentIdentical() {
- if ((totalIdentical == 0) || (totalAlignedBases == 0)) {
- return 0;
- } else {
- return (100.0 * totalIdentical) / totalAlignedBases;
- }
- }
-
- /**
- * Get percent identity of aligned bases.
- * @return identity
- */
- public synchronized double getAlignedPercentIdenticalWithoutIndels() {
- if ((totalIdentical == 0) || (totalAlignedBasesWithoutIndels == 0)) {
- return 0;
- } else {
- return (100.0 * totalIdentical) / totalAlignedBasesWithoutIndels;
- }
- }
-
- /**
- * Get percent identity of read.
- * @return identity
- */
- public synchronized double getReadPercentIdentical() {
- if ((totalIdentical == 0) || (totalReadBases == 0)) {
- return 0;
- } else {
- return (100.0 * totalIdentical) / totalReadBases;
- }
- }
-
- /**
- * Getnumber of insertion errors.
- * @return number
- */
- public synchronized int getNumberOfInsertionErrors() {
- return nInsertionErrors;
- }
-
- /**
- * Get number of deletion errors.
- * @return number
- */
- public synchronized int getNumberOfDeletionErrors() {
- return nDeletionErrors;
- }
-
- /**
- * Get number of substitution errors.
- * @return number
- */
- public synchronized int getNumberOfSubstitutionErrors() {
- return nSubstitutionErrors;
- }
-
- /**
- * Get percentage of insertion errors
- * @return percentage
- */
- public synchronized double getPercentInsertionErrors() {
- if ((nInsertedBases == 0) || (totalAlignedBases == 0)) {
- return 0;
- } else {
- return (100.0 * nInsertedBases) / (totalAlignedBases);
- }
- }
-
- /**
- * Get percentage of deletion errors
- * @return percentage
- */
- public synchronized double getPercentDeletionErrors() {
- if ((nDeletedBases == 0) || (totalAlignedBases == 0)) {
- return 0;
- } else {
- return (100.0 * nDeletedBases) / (totalAlignedBases);
- }
- }
-
- /**
- * Get percentage of substitution errors
- * @return percentage
- */
- public synchronized double getPercentSubstitutionErrors() {
- if ((nSubstitutionErrors == 0) || (totalAlignedBases == 0)) {
- return 0;
- } else {
- return (100.0 * nSubstitutionErrors) / (totalAlignedBases);
- }
- }
-
- /**
- * Get the number of aligned bases
- * @return number of bases
- */
- public synchronized int getTotalAlignedBases() {
- return totalAlignedBases;
- }
-
- /**
- * Write a file of insertion stats for plotting.
- * @param filename output filename
- */
- public void writeInsertionStats(String filename) {
- try {
- PrintWriter pw = new PrintWriter(new FileWriter(filename));
- for (int i=1; i<=largestInsertion; i++) {
- //pw.println(i + "\t" + insertionSizes[i]);
- pw.printf("%d\t%.4f", i, (100.0 * (double)insertionSizes[i]/(double)nInsertionErrors));
- pw.println("");
- }
- pw.close();
- } catch (IOException e) {
- System.out.println("writeInsertionStats exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Write a file of deletion stats for plotting.
- * @param filename output filename
- */
- public void writeDeletionStats(String filename) {
- try {
- PrintWriter pw = new PrintWriter(new FileWriter(filename));
- for (int i=1; i<=largestDeletion; i++) {
- //pw.println(i + "\t" + deletionSizes[i]);
- pw.printf("%d\t%.4f", i, (100.0 * (double)deletionSizes[i]/(double)nDeletionErrors));
- pw.println("");
- }
- pw.close();
- } catch (IOException e) {
- System.out.println("writeDeletionStats exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Get percent of reads aligned on +ve strand
- * @return count
- */
- public synchronized double getAlignedPositiveStrandPercent() {
- if (alignedPositiveStrand > 0) {
- return (100.0 * (double)alignedPositiveStrand)/(double)(alignedPositiveStrand + alignedNegativeStrand);
- } else {
- return 0;
- }
- }
-
- /**
- * Get percent of reads aligned on -ve strand
- * @return count
- */
- public synchronized double getAlignedNegativeStrandPercent() {
- if (alignedNegativeStrand > 0) {
- return (100.0 * (double)alignedNegativeStrand)/(double)(alignedPositiveStrand + alignedNegativeStrand);
- } else {
- return 0;
- }
- }
-
- public KmerTable getReadKmerTable() {
- return readKmerTable;
- }
-
- public void addKmerAbundance(String kmer, double refAbundance, double readAbundance) {
- kmerAbundance.add(new KmerAbundance(kmer, refAbundance, readAbundance));
- }
-
- public void sortKmerAbundance() {
- Collections.sort(kmerAbundance);
- for (int i=0; i<10; i++) {
- KmerAbundance k = kmerAbundance.get(i);
- }
- }
-
- public ArrayList getKmerAbundance() {
- return kmerAbundance;
- }
-}
diff --git a/src/nanook/References.java b/src/nanook/References.java
deleted file mode 100644
index a48d80e..0000000
--- a/src/nanook/References.java
+++ /dev/null
@@ -1,345 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.*;
-import java.util.*;
-
-/**
- * Represents the set of references (sequences) used for the analysis.
- *
- * @author Richard Leggett
- */
-public class References implements Serializable {
- private static final long serialVersionUID = NanoOK.SERIAL_VERSION;
- private NanoOKOptions options;
- private File sizesFile;
- private Hashtable<String,ReferenceSequence> referenceSeqIds = new Hashtable();
- private Hashtable<String,ReferenceSequence> referenceSeqNames = new Hashtable();
- private int longestId = 0;
- private OverallStats overallStats = null;
-
- /**
- * Constructor
- * @param o a NanoOKOptions object
- */
- public References(NanoOKOptions o)
- {
- options = o;
- }
-
- public void setOverallStats(OverallStats s) {
- overallStats = s;
- }
-
- public void readSizesFile() {
- sizesFile = new File(options.getReferenceFile()+".sizes");
-
- if (sizesFile.exists()) {
- System.out.println("Using .sizes file "+sizesFile.getName());
- System.out.println("Note: if you have changed the reference file, you need to delete the .sizes file and re-run.\n");
- } else {
- int extensionIndex = options.getReferenceFile().lastIndexOf('.');
- if (extensionIndex > 0) {
- String minusExtension = options.getReferenceFile().substring(0, extensionIndex);
- sizesFile = new File(minusExtension + ".sizes");
- }
- }
-
- if (!sizesFile.exists()) {
- System.out.println("Error: can't read sizes file.");
- System.out.println("Generating .sizes file for reference. You may want to edit the display names.");
- SequenceReader sr = new SequenceReader(false);
- sr.indexFASTAFile(options.getReferenceFile(), options.getReferenceFile()+".sizes" , false);
- sizesFile = new File(options.getReferenceFile()+".sizes");
- }
-
- System.out.println("Reading reference sizes and making directories");
-
- try
- {
- BufferedReader br = new BufferedReader(new FileReader(sizesFile));
- String line = br.readLine();
- while (line != null) {
- if (!line.startsWith("#") && (!line.startsWith("SequenceID"))) {
- String[] values = line.split("\\t");
- int size = Integer.parseInt(values[1]);
-
- ReferenceSequence refSeqById = referenceSeqIds.get(values[0]);
- if (refSeqById != null) {
- System.out.println("Error: reference contig ID "+values[0]+" occurs more than once.");
- System.exit(1);
- }
-
- ReferenceSequence refSeqByName = referenceSeqNames.get(values[2]);
- if (refSeqByName != null) {
- System.out.println("Error: reference contig name "+values[2]+" occurs more than once.");
- System.exit(1);
- }
-
- System.out.println("\t" + values[2] + "\t" + size);
-
- refSeqById = new ReferenceSequence(values[0], size, values[2]);
- options.checkAndMakeReferenceAnalysisDir(refSeqById.getName());
- referenceSeqIds.put(values[0], refSeqById);
- referenceSeqNames.put(values[2], refSeqById);
- refSeqById.openAlignmentSummaryFiles(options);
-
- if (values[0].length() > longestId) {
- longestId = values[0].length();
- }
- }
-
- line = br.readLine();
- }
- br.close();
- } catch (Exception e) {
- System.out.println("NanotoolsReferences Exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Read reference FASTA file
- */
- private void readReferenceFile() {
- ReferenceSequence currentRef = null;
- KmerTable refKmerTable = null;
- GCCounter gcc = null;
-
- try
- {
- BufferedReader br = new BufferedReader(new FileReader(options.getReferenceFile()));
- String line;
- String id = null;
- String name = null;
- String seq = "";
- String previousKmerString = "";
-
- System.out.println("");
- System.out.println("Calculating reference GC");
-
- do {
- line = br.readLine();
- if (line != null) {
- line = line.trim();
- }
-
- // New ID
- if ((line == null) || (line.startsWith(">"))) {
- if (id != null) {
- if (gcc != null) {
- gcc.closeFile();
- }
- }
-
- if (line != null) {
- String[] parts = line.substring(1).split("(\\s+)");
- id = parts[0];
- currentRef = getReferenceById(id);
- System.out.println("\t" + currentRef.getName());
- refKmerTable = currentRef.getKmerTable();
- gcc = new GCCounter(currentRef.getBinSize(), options.getAnalysisDir() + File.separator + currentRef.getName() + File.separator + currentRef.getName() + "_gc.txt");
- }
- }
- // Continuing sequence read
- else if ((line != null) && (currentRef != null)) {
- if (!line.equals("")) {
- String kmerSeq = previousKmerString + line;
- int k = refKmerTable.getKmerSize();
-
- // Store kmers
- for (int o=0; o<kmerSeq.length() - k; o++) {
- refKmerTable.countKmer(kmerSeq.substring(o, o+5));
- }
-
- // Store end k-1 bases for start of next kmer
- if (line.length() > k) {
- previousKmerString = line.substring(line.length() - k + 1);
- } else {
- previousKmerString = "";
- }
-
- // Now for GC graph
- gcc.addString(line);
- }
- }
- } while (line != null);
-
- br.close();
- } catch (Exception e) {
- System.out.println("readFasta Exception:");
- e.printStackTrace();
- System.exit(1);
- }
-
- }
-
- /**
- * Load references
- */
- public void loadReferences() {
- readSizesFile();
- readReferenceFile();
- }
-
- /**
- * Get a ReferenceSequence object from sequence ID.
- */
- public ReferenceSequence getReferenceById(String id) {
- ReferenceSequence r = referenceSeqIds.get(id);
-
- if (r == null) {
- System.out.println("");
- System.out.println("Error: Couldn't find reference for "+id + ". This can occur if you have changed the refernce file, but not deleted the .sizes file associated with it. Try deleting reference.fasta.sizes and re-running.");
- System.exit(1);
- }
-
- return r;
- }
-
- /**
- * Return set of all reference sequence IDs.
- * @return a String set
- */
- public Set<String> getAllIds() {
- return referenceSeqIds.keySet();
- }
-
- /**
- * Return sorted set of all reference sequence IDs.
- * @return a String set
- */
- public ArrayList getSortedReferences() {
- ArrayList sortedReferences = new ArrayList();
- Set<String> keys = referenceSeqIds.keySet();
-
- for(String id : keys) {
- sortedReferences.add(referenceSeqIds.get(id));
- }
- Collections.sort(sortedReferences);
-
- return sortedReferences;
- }
-
-
- /**
- * Initiate writing of all statistics data files used to generate graphs.
- * @param type a type, as defined in NanoOKOptions (for example TYPE_TEMPLATE)
- */
- public void writeReferenceStatFiles(int type) {
- Set<String> keys = referenceSeqIds.keySet();
-
- for(String id : keys) {
- ReferenceSequence ref = referenceSeqIds.get(id);
- ref.getStatsByType(type).writeCoverageData(options.getAnalysisDir() + File.separator + ref.getName() + File.separator + ref.getName() + "_" + options.getTypeFromInt(type) + "_coverage.txt", ref.getBinSize());
- ref.getStatsByType(type).writePerfectKmerHist(options.getAnalysisDir() + File.separator + ref.getName() + File.separator + ref.getName() + "_" + options.getTypeFromInt(type) + "_all_perfect_kmers.txt");
- ref.getStatsByType(type).writeBestPerfectKmerHist(options.getAnalysisDir() + File.separator + ref.getName() + File.separator + ref.getName() + "_" + options.getTypeFromInt(type) + "_best_perfect_kmers.txt");
- ref.getStatsByType(type).writeBestPerfectKmerHistCumulative(options.getAnalysisDir() + File.separator + ref.getName() + File.separator + ref.getName() + "_" + options.getTypeFromInt(type) + "_cumulative_perfect_kmers.txt");
- ref.getStatsByType(type).writeInsertionStats(options.getAnalysisDir() + File.separator + ref.getName() + File.separator + ref.getName() + "_" + options.getTypeFromInt(type) + "_insertions.txt");
- ref.getStatsByType(type).writeDeletionStats(options.getAnalysisDir() + File.separator + ref.getName() + File.separator + ref.getName() + "_" + options.getTypeFromInt(type) + "_deletions.txt");
- ref.writeKmerFile(type, options.getAnalysisDir() + File.separator + ref.getName() + File.separator + ref.getName() + "_" + options.getTypeFromInt(type) + "_kmers.txt");
- }
- }
-
- /**
- * Get the length of the longest ID - used for formatting output.
- * @return length of longest sequence ID
- */
- public int getLongestIdLength() {
- return longestId;
- }
-
- /**
- * Get number of references.
- * @return number of references
- */
- public int getNumberOfReferences() {
- return referenceSeqIds.size();
- }
-
- /**
- * Write reference summary text file.
- * @param type type from NanoOKOptions
- */
- public void writeReferenceSummary(int type) {
- try {
- String filename = options.getAnalysisDir() + File.separator + "all_" + NanoOKOptions.getTypeFromInt(type) + "_alignment_summary.txt";
- PrintWriter pw = new PrintWriter(new FileWriter(filename));
- String formatString = "%-"+longestId+"s %-12s %-10s %-10s %-10s %-12s %-10s %-10s";
- //pw.printf(formatString, "ID", "Size", "ReadsAlign", "PcReads", "MeanLen", "TotalBases", "MeanCov", "LongPerfKm");
- pw.print("ID\tSize\tReadsAlign\tPcReads\tMeanLen\tTotalBases\tMeanCov\tLongPerfKm");
- pw.println("");
-
- //List<String> keys = new ArrayList<String>(referenceSeqIds.keySet());
- //Collections.sort(keys);
- //for(String id : keys) {
- // referenceSeqIds.get(id).getStatsByType(type).writeSummary(pw, "%-"+longestId+"s %-12d %-10d %-10.2f %-10d");
- //}
-
- formatString = "%s\t%d\t%d\t%.2f\t%.2f\t%d\t%.2f\t%d";
- ArrayList<ReferenceSequence> sortedRefs = getSortedReferences();
- for (int i=0; i<sortedRefs.size(); i++) {
- ReferenceSequence r = sortedRefs.get(i);
- ReferenceSequenceStats refStats = r.getStatsByType(type);
- pw.printf(formatString,
- r.getName(),
- r.getSize(),
- refStats.getNumberOfReadsWithAlignments(),
- 100.0 * (double)refStats.getNumberOfReadsWithAlignments() / (double)overallStats.getStatsByType(type).getNumberOfReads(),
- refStats.getMeanReadLength(),
- refStats.getTotalAlignedBases(),
- (double)refStats.getTotalAlignedBases() / r.getSize(),
- refStats.getLongestPerfectKmer());
- pw.println("");
- }
-
-
- pw.close();
- } catch (IOException e) {
- System.out.println("writeReferenceSummary exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
-// /**
-// * Write reference summary to LaTeX report.
-// * @param type type from NanoOKOptions
-// * @param pw handle to LaTeX file
-// */
-// public void writeTexSummary(int type, PrintWriter pw) {
-// pw.println("\\begin{table}[H]");
-// pw.println("{\\footnotesize");
-// pw.println("\\fontsize{9pt}{11pt}\\selectfont");
-// pw.println("\\begin{tabular}{l c c c c c c c}");
-// pw.println(" & & {\\bf Number of} & {\\bf \\% of} & {\\bf Mean read} & {\\bf Aligned} & {\\bf Mean} & {\\bf Longest} \\\\");
-// pw.println("{\\bf ID} & {\\bf Size} & {\\bf Reads} & {\\bf Reads} & {\\bf length} & {\\bf bases} & {\\bf coverage} & {\\bf Perf Kmer} \\\\");
-// ArrayList<ReferenceSequence> sortedRefs = getSortedReferences();
-// for (int i=0; i<sortedRefs.size(); i++) {
-// ReferenceSequence r = sortedRefs.get(i);
-// ReferenceSequenceStats refStats = r.getStatsByType(type);
-// if ((sortedRefs.size() < 100) || (refStats.getNumberOfReadsWithAlignments() > 0)) {
-// pw.printf("%s & %d & %d & %.2f & %.2f & %d & %.2f & %d \\\\",
-// r.getName().replaceAll("_", " "),
-// r.getSize(),
-// refStats.getNumberOfReadsWithAlignments(),
-// 100.0 * (double)refStats.getNumberOfReadsWithAlignments() / (double)overallStats.getStatsByType(type).getNumberOfReads(),
-// refStats.getMeanReadLength(),
-// refStats.getTotalAlignedBases(),
-// (double)refStats.getTotalAlignedBases() / r.getSize(),
-// refStats.getLongestPerfectKmer());
-// pw.println("");
-// }
-// }
-// pw.println("\\end{tabular}");
-// pw.println("}");
-// pw.println("\\end{table}");
-// }
-}
diff --git a/src/nanook/SAMParser.java b/src/nanook/SAMParser.java
deleted file mode 100644
index 75f020e..0000000
--- a/src/nanook/SAMParser.java
+++ /dev/null
@@ -1,236 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Hashtable;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * Parser for SAM format files.
- *
- * @author Richard Leggett
- */
-public abstract class SAMParser {
- private NanoOKOptions options;
- private References references;
- private SampleReportWriter report;
- private String programID = null;
- ArrayList<Alignment> alignments;
- private Hashtable<String,Integer> referenceSizes;
- String leafName;
-
- /**
- * Parse a SAM file.
- * @param filename filename to parse
- * @param nonAlignedSummaryFile an AlignmentTableFile to output details of anything that doesn't align to
- * @return number of alignments parsed
- */
- public SAMParser(NanoOKOptions o, References r) {
- options = o;
- references = r;
- }
-
- /**
- * Get file extension of alignment files
- * @return
- */
- public String getAlignmentFileExtension() {
- return ".sam";
- }
-
-
- private void processReferenceTag(String s) {
- Pattern pattern = Pattern.compile("@SQ(\\s+)SN:(\\S+)(\\s+)LN:(\\S+)");
- Matcher matcher = pattern.matcher(s);
- if (matcher.find()) {
- String refID = matcher.group(2);
- int size = Integer.parseInt(matcher.group(4));
- if (referenceSizes.containsKey(refID)) {
- System.out.println("Warning: Reference "+refID+" already seen.");
- } else {
- referenceSizes.put(refID, size);
- }
- } else {
- System.out.println("Warning: Badly formated tag: " + s);
- }
- }
-
- /**
- * Process @PG tag in SAM file
- * @param s
- */
- private void processProgramTag(String s) {
- Pattern pattern = Pattern.compile("(\\s+)ID:(\\S+)(\\s+)");
- Matcher matcher = pattern.matcher(s);
- if (matcher.find()) {
- programID = matcher.group(2);
- }
- }
-
- /**
- * Process an alignment line from a SAM file
- * @param s the line
- * @param outputFilename .maf file to write
- * @return ]
- */
- private Alignment processAlignmentLine(String alignmentFile, String s, String outputFilename, ReadSetStats overallStats) {
- String[] cols = s.split("\t");
- String queryName = cols[0];
- int flags = Integer.parseInt(cols[1]);
- String hitName = cols[2];
- int hitStart = Integer.parseInt(cols[3]) - 1; // SAM is 1-based, Last and NanoOK 0-based
- int mapQuality = Integer.parseInt(cols[4]);
- String cigar = cols[5];
- String rNext = cols[6];
- int pNext = Integer.parseInt(cols[7]);
- int tLen = Integer.parseInt(cols[8]);
- String seq = cols[9];
- String qual = cols[10];
- boolean mapped = ((flags & 0x04) == 0x04) ? false:true;
- int queryStart;
- Alignment al = null;
-
- //System.out.println("Alignment file "+alignmentFile);
- //System.out.println("CIGAR string "+cigar);
-
- if (options.getAligner().equals("blasr")) {
- queryName = cols[0].substring(0, cols[0].lastIndexOf("/"));
- }
-
- if (mapped) {
- ReferenceSequence readReference = references.getReferenceById(hitName);
- if (readReference != null) {
- int readLength = overallStats.getReadLength(alignmentFile, queryName);
- if (readLength != -1) {
- CIGARString cs = new CIGARString(cigar, seq, leafName, queryName, hitStart, options.getReferenceFile(), readReference);
- if (cs.processString()) {
- //System.out.println("hitName "+hitName);
- al = new Alignment(mapQuality,
- queryName,
- readLength,
- cs.getQueryStart(),
- cs.getQueryAlnSize(),
- cs.getQueryString(),
- hitName,
- readReference.getSize(),
- hitStart,
- cs.getHitAlnSize(),
- cs.getHitString(),
- false);
-
- // Check for reverse complement
- if ((flags & 0x10) == 0x10) {
- al.setQueryStrand("-");
- }
-
- al.writeMafFile(outputFilename);
-
- }
-
- } else {
- System.out.println("Error: can't find read length for ["+queryName+"]");
- System.exit(1);
- }
- } else {
- System.out.println("");
- System.out.println("Error: Couldn't find reference "+hitName);
- }
- }
-
- return al;
- }
-
- public int parseFile(String filename, AlignmentsTableFile nonAlignedSummaryFile, ReadSetStats overallStats) {
- alignments = new ArrayList();
- referenceSizes = new Hashtable();
- leafName = new File(filename).getName();
-
- // Read all alignmnets and put into an ArrayList
- try
- {
- options.getLog().println("Got file");
- BufferedReader br = new BufferedReader(new FileReader(filename));
- String line;
-
- do {
- line = br.readLine();
- if (line != null) {
- if (line.startsWith("@SQ")) {
- processReferenceTag(line);
- } else if (line.startsWith("@PG")) {
- processProgramTag(line);
- } else if (!line.startsWith("@")) {
- options.getLog().println("Got line");
- Alignment al = processAlignmentLine(filename, line, filename+".last", overallStats);
- if (al != null) {
- alignments.add(al);
- }
- options.getLog().println("Added");
- }
- }
- } while (line != null);
- br.close();
-
- options.getLog().println("Finished file");
-
- if (alignments.size() == 0) {
- nonAlignedSummaryFile.writeNoAlignmentMessage(leafName);
- overallStats.addReadWithoutAlignment();
- }
-
- } catch (Exception e) {
- System.out.println("parseFile Exception:");
- e.printStackTrace();
- options.getLog().println("Exception parsing "+filename);
- options.getLog().close();
- System.exit(1);
- }
-
- options.getLog().println("Returning");
-
- return alignments.size();
- }
-
- /**
- * Sort alignments in order of score
- */
- public void sortAlignments() {
- if (alignments.size() > 0) {
- Collections.sort(alignments);
- }
- }
-
- /**
- * Get the set of alignments that match the highest scoring reference
- */
- public ArrayList getHighestScoringSet() {
- ArrayList hss = new ArrayList();
-
- if (alignments.size() > 0) {
- String readReferenceName = alignments.get(0).getHitName();
- ReferenceSequence readReference = references.getReferenceById(readReferenceName);
- for (int i=0; i<alignments.size(); i++) {
- Alignment a = alignments.get(i);
- if (a.getHitName().equals(readReferenceName)) {
- hss.add(a);
- }
- }
- }
-
- return hss;
- }
-}
diff --git a/src/nanook/SampleChecker.java b/src/nanook/SampleChecker.java
deleted file mode 100644
index 32d241a..0000000
--- a/src/nanook/SampleChecker.java
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett (richard.leggett at earlham.ac.uk)
- *
- * Copyright 2015-17 Earlham Institute
- */
-package nanook;
-
-import java.io.File;
-import java.util.ArrayList;
-
-public class SampleChecker {
- private NanoOKOptions options;
- private boolean haveChecked = false;
- private boolean usingBarcodes = false;
- private boolean usingBatchDirs = false;
- private boolean usingPassFailDirs = false;
-
- public SampleChecker(NanoOKOptions o) {
- options = o;
- }
-
- private boolean dirExists(String dir) {
- File d = new File(dir);
- return d.exists();
- }
-
- private boolean checkIfDirHasSubdirs(String dir) {
- File d = new File(dir);
- File[] listOfFiles = d.listFiles();
- boolean contains = false;
-
- if (listOfFiles == null) {
- contains = false;
- } else if (listOfFiles.length <= 0) {
- contains = false;
- } else {
- for (File file : listOfFiles) {
- if (file.isDirectory()) {
- contains = true;
- break;
- }
- }
- }
-
- return contains;
- }
-
- private void checkForBarcodeAndBatch(String dir) {
- File d = new File(dir);
- File[] listOfFiles = d.listFiles();
- boolean contains = false;
- if (listOfFiles == null) {
- contains = false;
- } else if (listOfFiles.length <= 0) {
- contains = false;
- } else {
- boolean foundSubDir = false;
- for (File file : listOfFiles) {
- if (file.isDirectory()) {
- if (file.getName().startsWith("BC") || file.getName().startsWith("barcode")) {
- usingBarcodes = true;
- if (usingBatchDirs == false) {
- if (checkIfDirHasSubdirs(file.getPath())) {
- usingBatchDirs = true;
- }
- }
- //checkForBarcodeAndBatch(file.getPath());
- } else if (file.getName().startsWith("batch_")) {
- usingBatchDirs = true;
- break;
- } else {
- foundSubDir = true;
- break;
- }
- }
- }
-
- if ((usingBarcodes == false) && (usingBatchDirs == false) && (foundSubDir == true)) {
- System.out.println("Found subdirectory, assuming batched output");
- usingBatchDirs = true;
- }
- }
- }
-
- private void showDirectoryType() {
- System.out.println(" Using pass/fail dirs: " + (usingPassFailDirs?"yes":"no"));
- System.out.println(" Using batch dirs: " + (usingBatchDirs?"yes":"no"));
- System.out.println(" Using barcodes: " + (usingBarcodes?"yes":"no"));
- System.out.println("");
- }
-
- public void checkFast5Directory() {
- String passDir = options.getFast5Dir() + File.separator + "pass";
- String failDir = options.getFast5Dir() + File.separator + "fail";
-
- System.out.println("Checking FAST5 directory structure...");
-
- File f = new File(options.getFast5Dir());
- if (!f.exists()) {
- System.out.println("Error: can't find FAST5 directory "+options.getFast5Dir());
- System.exit(1);
- }
-
- if ((options.isProcessingPassReads()) && (dirExists(passDir))) {
- usingPassFailDirs = true;
- checkForBarcodeAndBatch(passDir);
- } else if ((options.isProcessingFailReads()) && (dirExists(failDir))) {
- usingPassFailDirs = true;
- checkForBarcodeAndBatch(failDir);
- } else {
- checkForBarcodeAndBatch(options.getFast5Dir());
- //File[] listOfFiles = f.listFiles();
- //usingPassFailDirs = false;
- //usingBatchDirs = false;
- //for (File file : listOfFiles) {
- // if (file.isDirectory()) {
- // usingBatchDirs = true;
- // break;
- // }
- //}
- }
-
- showDirectoryType();
- }
-
- public void checkReadDirectory() {
- boolean gotOne = false;
- ArrayList<String> al = new ArrayList<String>();
-
- System.out.println("Checking FASTA/Q directory structure...");
-
- File f = new File(options.getReadDir());
- if (!f.exists()) {
- System.out.println("Error: can't find read directory "+options.getReadDir());
- System.exit(1);
- }
-
- // Check for MinKNOW 1.4.2 and above
- if ((options.isProcessingPassReads()) && (options.isProcessing2DReads())) {
- al.add(new String(options.getReadDir() + File.separator + "pass" + File.separator + "2D"));
- }
- if ((options.isProcessingPassReads()) && (options.isProcessingTemplateReads())) {
- al.add(new String(options.getReadDir() + File.separator + "pass" + File.separator + "Template"));
- }
- if ((options.isProcessingPassReads()) && (options.isProcessingComplementReads())) {
- al.add(new String(options.getReadDir() + File.separator + "pass" + File.separator + "Complement"));
- }
- if ((options.isProcessingFailReads()) && (options.isProcessing2DReads())) {
- al.add(new String(options.getReadDir() + File.separator + "fail" + File.separator + "2D"));
- }
- if ((options.isProcessingFailReads()) && (options.isProcessingTemplateReads())) {
- al.add(new String(options.getReadDir() + File.separator + "fail" + File.separator + "Template"));
- }
- if ((options.isProcessingFailReads()) && (options.isProcessingComplementReads())) {
- al.add(new String(options.getReadDir() + File.separator + "fail" + File.separator + "Complement"));
- }
- for (int i=0; i<al.size(); i++) {
- if (dirExists(al.get(i))) {
- gotOne = true;
- usingPassFailDirs = true;
- checkForBarcodeAndBatch(al.get(i));
- }
- }
-
- // Original - no pass/fail dirs, no barcodes, no batch
- // Or Albacore - with separate directories
- if (gotOne == false) {
- System.out.println("Error: FASTA/Q directory structure not understood.");
- System.out.println("This may be because it was created with an earlier version of NanoOK.");
- System.out.println("NanoOK now expects the following structures:");
- System.out.println(" sampledir/fasta/pass/Template/*.fast5");
- System.out.println(" or sampledir/fasta/pass/Template/batch_XXX/*.fast5");
- System.out.println(" or sampledir/fasta/pass/Template/0/*.fast5");
- System.out.println("etc.");
- System.exit(0);
- }
-
- showDirectoryType();
- }
-
- public void checkReadDirectorOld() {
- boolean gotOne = false;
- ArrayList<String> al = new ArrayList<String>();
-
- System.out.println("Checking FASTA/Q directory structure...");
-
- File f = new File(options.getReadDir());
- if (!f.exists()) {
- System.out.println("Error: can't find read directory "+options.getReadDir());
- System.exit(1);
- }
-
- // Check for MinKNOW 1.4.2 and above
- if ((options.isProcessingPassReads()) && (options.isProcessing2DReads())) {
- al.add(new String(options.getReadDir() + File.separator + "2D" + File.separator + "pass"));
- }
- if ((options.isProcessingPassReads()) && (options.isProcessingTemplateReads())) {
- al.add(new String(options.getReadDir() + File.separator + "Template" + File.separator + "pass"));
- }
- if ((options.isProcessingPassReads()) && (options.isProcessingComplementReads())) {
- al.add(new String(options.getReadDir() + File.separator + "Complement" + File.separator + "pass"));
- }
- if ((options.isProcessingFailReads()) && (options.isProcessing2DReads())) {
- al.add(new String(options.getReadDir() + File.separator + "2D" + File.separator + "fail"));
- }
- if ((options.isProcessingFailReads()) && (options.isProcessingTemplateReads())) {
- al.add(new String(options.getReadDir() + File.separator + "Template" + File.separator + "fail"));
- }
- if ((options.isProcessingFailReads()) && (options.isProcessingComplementReads())) {
- al.add(new String(options.getReadDir() + File.separator + "Complement" + File.separator + "fail"));
- }
- for (int i=0; i<al.size(); i++) {
- if (dirExists(al.get(i))) {
- gotOne = true;
- usingPassFailDirs = true;
- checkForBarcodeAndBatch(al.get(i));
- }
- }
-
- // MinKNOW pre 1.4.2 and after intro of pass/fail dirs
- // Barcode dirs will only be for pass reads
- if (gotOne == false) {
- if ((options.isProcessingPassReads()) && (dirExists(options.getReadDir() + File.separator + "pass"))) {
- gotOne = true;
- usingBatchDirs = false;
- usingPassFailDirs = true;
- checkForBarcodeAndBatch(options.getReadDir() + File.separator + "pass");
- } else if ((options.isProcessingFailReads()) && (dirExists(options.getReadDir() + File.separator + "fail"))) {
- gotOne = true;
- usingBatchDirs = false;
- usingPassFailDirs = true;
- }
- }
-
- // Albacore - we end up with sample/fasta/2D/0 etc.
- if (gotOne == false) {
- al.clear();
- if (options.isProcessing2DReads()) {
- al.add(new String(options.getReadDir() + File.separator + "2D"));
- }
- if (options.isProcessingTemplateReads()) {
- al.add(new String(options.getReadDir() + File.separator + "Template"));
- }
- if (options.isProcessingComplementReads()) {
- al.add(new String(options.getReadDir() + File.separator + "Complement"));
- }
- for (int i=0; i<al.size(); i++) {
- if (dirExists(al.get(i))) {
- gotOne = true;
- checkForBarcodeAndBatch(al.get(i));
- }
- }
- }
-
- // Original - no pass/fail dirs, no barcodes, no batch
- // Or Albacore - with separate directories
- if (gotOne == false) {
- usingBatchDirs = false;
- usingPassFailDirs = false;
- }
-
- showDirectoryType();
- }
-
- public boolean haveChecked() {
- return haveChecked;
- }
-
- public boolean usingBarcodes() {
- return usingBarcodes;
- }
-
- public boolean usingBatchDirs() {
- return usingBatchDirs;
- }
-
- public boolean usingPassFailDirs() {
- return usingPassFailDirs;
- }
-}
diff --git a/src/nanook/SampleComparer.java b/src/nanook/SampleComparer.java
deleted file mode 100644
index 78c555c..0000000
--- a/src/nanook/SampleComparer.java
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.InvalidClassException;
-import java.io.ObjectInputStream;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-
-/**
- *
- * @author Richard Leggett
- */
-public class SampleComparer {
- private NanoOKOptions options;
- private ArrayList<String> sampleNames = new ArrayList();
- private ArrayList<OverallStats> sampleStats = new ArrayList();
-
- public SampleComparer(NanoOKOptions o) {
- options = o;
- }
-
- private void readSample(String sample, String name) {
- try {
- FileInputStream fis = new FileInputStream(sample + File.separator + "analysis" + options.getAnalysisSuffix() + File.separator + "OverallStats.ser");
- ObjectInputStream ois = new ObjectInputStream(fis);
- OverallStats os = (OverallStats)ois.readObject();
- sampleNames.add(name);
- sampleStats.add(os);
- ois.close();
- } catch (Exception e) {
- if (e instanceof InvalidClassException) {
- System.out.println("The saved data is incompatible with this version of NanoOK. You must re-run nanook analyse on all your samples before running compare.");
- } else {
- System.out.println("Exception trying to read object:");
- e.printStackTrace();
- }
- System.exit(1);
- }
- }
-
- public void loadSamples() {
- try
- {
- BufferedReader br = new BufferedReader(new FileReader(options.getSampleList()));
- String line;
-
- do {
- line = br.readLine();
- if (line != null) {
- if (!line.startsWith("SampleDir")) {
- String[] fields = line.split("\t");
- if (fields.length != 2) {
- System.out.println("Error: invalid format for sample list file. This file should be two fields, tab separated.");
- System.exit(1);
- } else {
- readSample(fields[0], fields[1]);
- }
- }
- }
- } while (line != null);
- br.close();
- } catch (Exception e) {
- System.out.println("parseFile Exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- public void compareSamples() {
- try {
- for (int type = 0; type<3; type++) {
- if (options.isProcessingReadType(type)) {
- String filename = options.getComparisonDir() + File.separator + NanoOKOptions.getTypeFromInt(type) + "_comparison.txt";
- PrintWriter pw = new PrintWriter(new FileWriter(filename, false));
-
- pw.println("Name\tNumReads\tTotalBases\tMeanLen\tLongest\tShortest\tN50\tN50Count\tN90\tN90Count");
-
- for (int i=0; i<sampleStats.size(); i++) {
- String name = sampleNames.get(i);
- OverallStats overallStats = sampleStats.get(i);
- ReadSetStats r = overallStats.getStatsByType(type);
-
- pw.printf("%s\t%d\t%d\t%.2f\t%d\t%d\t%d\t%d\t%d\t%d",
- name, r.getNumReads(), r.getTotalBases(), r.getMeanLength(), r.getLongest(), r.getShortest(), r.getN50(), r.getN50Count(), r.getN90(), r.getN90Count());
- pw.println("");
- }
-
- pw.close();
-
- filename = options.getComparisonDir() + File.separator + NanoOKOptions.getTypeFromInt(type) + "_map_summary.txt";
- pw = new PrintWriter(new FileWriter(filename, false));
- References refs = sampleStats.get(0).getStatsByType(type).getOptions().getReferences();
- ArrayList<ReferenceSequence> sortedRefs = refs.getSortedReferences();
- pw.print("Sample");
- for (int i=0; i<sortedRefs.size(); i++) {
- ReferenceSequence rs = sortedRefs.get(i);
- pw.print("\t" + rs.getName());
- }
- pw.println("\tUnaligned");
- for (int i=0; i<sampleStats.size(); i++) {
- String name = sampleNames.get(i);
- OverallStats overallStats = sampleStats.get(i);
- pw.print(name);
- for (int j=0; j<sortedRefs.size(); j++) {
- ReferenceSequence rs = overallStats.getStatsByType(type).getOptions().getReferences().getReferenceById(sortedRefs.get(j).getId());
- double value = 0.0;
-
- if (rs.getStatsByType(type).getNumberOfReadsWithAlignments() > 0) {
- value = 100.0 * (double)rs.getStatsByType(type).getNumberOfReadsWithAlignments() / (double)overallStats.getStatsByType(type).getNumberOfReads();
- }
-
- pw.printf("\t%.4f", value);
- }
-
- double value = 0;
- if (overallStats.getStatsByType(type).getNumberOfReadsWithoutAlignments() > 0) {
- value = 100.0 * (double)overallStats.getStatsByType(type).getNumberOfReadsWithoutAlignments() / (double)overallStats.getStatsByType(type).getNumberOfReads();
- }
- pw.printf("\t%.4f", value);
- pw.println("");
- }
- pw.close();
- }
- }
- } catch (IOException e) {
- System.out.println("AlignmentsTableFile exception");
- e.printStackTrace();
- }
- }
-
- public int getNumberOfSamples() {
- return sampleStats.size();
- }
-
- public OverallStats getSample(int i) {
- return sampleStats.get(i);
- }
-
- public String getSampleName(int i) {
- return sampleNames.get(i);
- }
-}
diff --git a/src/nanook/SampleReportWriter.java b/src/nanook/SampleReportWriter.java
deleted file mode 100644
index a27d76c..0000000
--- a/src/nanook/SampleReportWriter.java
+++ /dev/null
@@ -1,822 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.*;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-/**
- * Writes a LaTeX report file.
- *
- * @author Richard Leggett
- */
-public class SampleReportWriter {
- private static final int LONGTABLE_THRESHOLD = 25;
- private NanoOKOptions options;
- private References references;
- private OverallStats overallStats;
- private PrintWriter pw;
- private String sample;
-
- /**
- * Constructor.
- * @param o a NanoOKOptions object
- * @param r the references
- * @param s overall statistics
- */
- public SampleReportWriter(NanoOKOptions o, OverallStats s) {
- options = o;
- references = options.getReferences();
- overallStats = s;
- sample = o.getSample().replaceAll("_", "\\\\_");
- }
-
- /**
- * Open the .tex file.
- */
- public void open() {
- try {
- pw = new PrintWriter(new FileWriter(options.getTexFilename()));
- writeLaTeXHeader();
- } catch (IOException e) {
- System.out.println("ReportWriter exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Close the .tex file.
- */
- public void close() {
- writeLaTeXFooter();
- pw.close();
- }
-
- /**
- * Write the top of the LaTeX document.
- */
- private void writeLaTeXHeader() {
- pw.println("\\documentclass[a4paper,11pt,oneside]{article}");
- pw.println("\\usepackage{graphicx}");
- pw.println("\\usepackage{url}");
- pw.println("\\usepackage{multirow}");
- pw.println("\\usepackage{rotating}");
- pw.println("\\usepackage{color}");
- pw.println("\\usepackage[compact]{titlesec}");
- pw.println("\\usepackage[portrait,top=1cm, bottom=2cm, left=1cm, right=1cm]{geometry}");
- pw.println("\\usepackage{float}");
- if (references.getNumberOfReferences() >= LONGTABLE_THRESHOLD) {
- pw.println("\\usepackage{longtable}");
- }
- pw.println("\\restylefloat{table}");
- pw.println("\\begin{document}");
- pw.println("\\renewcommand*{\\familydefault}{\\sfdefault}");
- pw.println("\\normalfont");
- pw.println("\\section*{\\large{NanoOK report for " + sample + "}}");
- }
-
- /**
- * Add the pass/fail section
- */
- public void addPassFailSection() {
- if (options.usingPassFailDirs()) {
- pw.println("\\subsection*{Pass and fail counts}");
- pw.println("\\vspace{-3mm}");
- pw.println("\\begin{table}[H]");
- pw.println("{\\footnotesize");
- pw.println("\\fontsize{9pt}{11pt}\\selectfont");
- pw.println("\\begin{tabular}{l c c}");
- pw.println("{\\bf Type} & {\\bf Pass} & {\\bf Fail} \\\\");
-
- for (int type = 0; type<3; type++) {
- if (options.isProcessingReadType(type)) {
- ReadSetStats r = overallStats.getStatsByType(type);
- pw.printf("%s & %d & %d \\\\", r.getTypeString(), r.getNumberOfPassFiles(), r.getNumberOfFailFiles());
- pw.println("");
- }
- }
-
- pw.println("\\end{tabular}");
- pw.println("}");
- pw.println("\\end{table}");
- }
- }
-
- /**
- * Add the read lengths section.
- */
- public void addLengthsSection() {
- String graphWidth = "width=.3\\linewidth";
-
- if (options.getNumberOfTypes() == 1) {
- graphWidth = "width=.4\\linewidth";
- }
-
- pw.println("\\subsection*{Read lengths}");
- pw.println("\\vspace{-3mm}");
-
- pw.println("\\begin{table}[H]");
- pw.println("{\\footnotesize");
- pw.println("\\fontsize{9pt}{11pt}\\selectfont");
- pw.println("\\begin{tabular}{l c c c c c c c c c}");
- pw.println("{\\bf Type} & {\\bf NumReads} & {\\bf TotalBases} & {\\bf Mean} & {\\bf Longest} & {\\bf Shortest} & {\\bf N50} & {\\bf N50Count} & {\\bf N90} & {\\bf N90Count} \\\\");
-
- for (int type = 0; type<3; type++) {
- if (options.isProcessingReadType(type)) {
- ReadSetStats r = overallStats.getStatsByType(type);
- pw.printf("%s & %d & %d & %.2f & %d & %d & %d & %d & %d & %d \\\\", r.getTypeString(), r.getNumReads(), r.getTotalBases(), r.getMeanLength(), r.getLongest(), r.getShortest(), r.getN50(), r.getN50Count(), r.getN90(), r.getN90Count());
- pw.println("");
- }
- }
-
- pw.println("\\end{tabular}");
- pw.println("}");
- pw.println("\\end{table}");
- pw.println("\\vspace{-10mm}");
- pw.println("\\begin{figure}[H]");
- pw.println("\\centering");
-
-
-
- includeGraphicsIfExists(NanoOKOptions.TYPE_TEMPLATE, "\\includegraphics["+graphWidth+"]{", options.getGraphsDir() + File.separator + "all_Template_lengths", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_COMPLEMENT, "\\includegraphics["+graphWidth+"]{", options.getGraphsDir() + File.separator + "all_Complement_lengths", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_2D, "\\includegraphics["+graphWidth+"]{", options.getGraphsDir() + File.separator + "all_2D_lengths", "}");
-
-
- //pw.println("\\includegraphics[width=.3\\linewidth]{" + options.getGraphsDir() + File.separator + "all_Template_lengths.pdf}");
- //pw.println("\\includegraphics[width=.3\\linewidth]{" + options.getGraphsDir() + File.separator + "all_Complement_lengths.pdf}");
- //pw.println("\\includegraphics[width=.3\\linewidth]{" + options.getGraphsDir() + File.separator + "all_2D_lengths.pdf}");
- pw.println("\\end{figure}");
-
- }
-
- /**
- * Write the alignments section to the report.
- * @param stats a ReadSetStats object
- */
- public void writeAlignmentsSection(ReadSetStats stats) {
- //if ((stats.getTypeString() == "Template") || (references.getNumberOfReferences() > 8)) {
- // pw.println("\\clearpage");
- //}
- pw.println("\\subsection*{" + stats.getTypeString() + " alignments}");
- pw.println("\\vspace{-3mm}");
- pw.println("\\begin{table}[H]");
- pw.println("{\\footnotesize");
- pw.println("\\fontsize{9pt}{11pt}\\selectfont");
- pw.println("\\begin{tabular}{l c c}");
- pw.println("Number of reads & " + stats.getNumberOfReads() + " & \\\\");
- pw.printf("Number of reads with alignments & %d & (%.2f\\%%) \\\\", stats.getNumberOfReadsWithAlignments(), stats.getPercentOfReadsWithAlignments());
- pw.println("");
- pw.printf("Number of reads without alignments & %d & (%.2f\\%%) \\\\", stats.getNumberOfReadsWithoutAlignments(), stats.getPercentOfReadsWithoutAlignments());
- pw.println("");
- pw.println("\\end{tabular}");
- pw.println("}");
- pw.println("\\end{table}");
- pw.println("\\vspace{-10mm}");
- }
-
- /**
- * Check if graphic file exists and only insert if it does
- * @param preTex LaTeX before filename
- * @param filename the file
- * @param postTex LaTeX after filename
- */
- private void includeGraphicsIfExists(int type, String preTex, String filename, String postTex) {
- if (options.isProcessingReadType(type)) {
- String fullFilename = filename + "." + options.getImageFormat();
- File f = new File(fullFilename);
-
- if (f.exists()) {
- pw.print(preTex);
- pw.print(fullFilename);
- pw.println(postTex);
- } else {
- System.out.println("Can't find " + fullFilename);
- pw.print(" ");
- }
- }
- }
-
- /**
- * Write a section for a reference sequence.
- * @param refSeq reference to write
- */
- public void writeReferenceSection(ReferenceSequence refSeq) {
- String id = refSeq.getName().replaceAll("_", " ");
- String[] lines = new String[10];
- String newLineTag=" \\\\";
- String graphSize;
-
- if (options.getNumberOfTypes() == 1) {
- newLineTag = "";
- }
-
- pw.println("\\subsection*{" + id + " error analysis}");
- pw.println("\\vspace{-3mm}");
- pw.println("\\begin{table}[H]");
- pw.println("{\\footnotesize");
- pw.println("\\fontsize{9pt}{11pt}\\selectfont");
- pw.println("\\begin{tabular}{l c c c}");
-
-
- lines[0] = "";
- lines[1] = "Overall base identity (excluding indels)";
- lines[2] = "Aligned base identity (excluding indels)";
- lines[3] = "Identical bases per 100 aligned bases (including indels)";
- lines[4] = "Inserted bases per 100 aligned bases (including indels)";
- lines[5] = "Deleted bases per 100 aligned bases (including indels)";
- lines[6] = "Substitutions per 100 aligned bases (including indels)";
- lines[7] = "Mean insertion size";
- lines[8] = "Mean deletion size";
- for (int type=0; type<3; type++) {
- if (options.isProcessingReadType(type)) {
- lines[0] += " & " + NanoOKOptions.getTypeFromInt(type);
- lines[1] += String.format(" & %.2f\\%%", refSeq.getStatsByType(type).getReadPercentIdentical());
- lines[2] += String.format(" & %.2f\\%%", refSeq.getStatsByType(type).getAlignedPercentIdenticalWithoutIndels());
- lines[3] += String.format(" & %.2f\\%%", refSeq.getStatsByType(type).getAlignedPercentIdentical());
- lines[4] += String.format(" & %.2f\\%%", refSeq.getStatsByType(type).getPercentInsertionErrors());
- lines[5] += String.format(" & %.2f\\%%", refSeq.getStatsByType(type).getPercentDeletionErrors());
- lines[6] += String.format(" & %.2f\\%%", refSeq.getStatsByType(type).getPercentSubstitutionErrors());
- lines[7] += String.format(" & %.2f", refSeq.getStatsByType(type).getMeanInsertionSize());
- lines[8] += String.format(" & %.2f", refSeq.getStatsByType(type).getMeanDeletionSize());
- }
- }
-
- for (int i=0; i<=8; i++) {
- lines[i] += " \\\\";
- pw.println(lines[i]);
- }
-
- pw.println("\\end{tabular}");
- pw.println("}");
- pw.println("\\end{table}");
-
- if (options.getNumberOfTypes() == 1) {
- graphSize = "width=.4\\linewidth";
- } else {
- graphSize = "height=3.5cm";
- }
-
- pw.println("\\vspace{-5mm}");
- pw.println("\\begin{figure}[H]");
- pw.println("\\centering");
- includeGraphicsIfExists(NanoOKOptions.TYPE_TEMPLATE, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Template_insertions", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_COMPLEMENT, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Complement_insertions", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_2D, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_2D_insertions", "}"+newLineTag);
- includeGraphicsIfExists(NanoOKOptions.TYPE_TEMPLATE, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Template_deletions", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_COMPLEMENT, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Complement_deletions", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_2D, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_2D_deletions", "}");
- pw.println("\\end{figure}");
-
- pw.println("\\subsection*{" + id + " read identity}");
- pw.println("\\vspace{-3mm}");
- pw.println("\\begin{figure}[H]");
- pw.println("\\centering");
- includeGraphicsIfExists(NanoOKOptions.TYPE_TEMPLATE, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Template_length_vs_identity_hist", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_COMPLEMENT, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Complement_length_vs_identity_hist", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_2D, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_2D_length_vs_identity_hist", "}"+newLineTag);
- includeGraphicsIfExists(NanoOKOptions.TYPE_TEMPLATE, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Template_length_vs_identity_scatter", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_COMPLEMENT, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Complement_length_vs_identity_scatter", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_2D, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_2D_length_vs_identity_scatter", "}"+newLineTag);
- includeGraphicsIfExists(NanoOKOptions.TYPE_TEMPLATE, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Template_read_fraction_vs_alignment_identity_scatter", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_COMPLEMENT, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Complement_read_fraction_vs_alignment_identity_scatter", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_2D, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_2D_read_fraction_vs_alignment_identity_scatter", "}");
- if (options.getNumberOfTypes() > 1) {
- pw.println("\\end{figure}");
- pw.println("\\begin{figure}[H]");
- pw.println("\\centering");
- }
- includeGraphicsIfExists(NanoOKOptions.TYPE_TEMPLATE, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Template_percent_aligned_vs_length_scatter", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_COMPLEMENT, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Complement_percent_aligned_vs_length_scatter", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_2D, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_2D_percent_aligned_vs_length_scatter", "}");
- pw.println("\\end{figure}");
-
- if (options.getNumberOfTypes() == 1) {
- graphSize = "width=.4\\linewidth";
- } else {
- graphSize = "height=3.5cm";
- }
-
- pw.println("\\subsection*{" + id + " perfect kmers}");
- pw.println("\\vspace{-3mm}");
- pw.println("\\begin{figure}[H]");
- pw.println("\\centering");
- includeGraphicsIfExists(NanoOKOptions.TYPE_TEMPLATE, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Template_cumulative_perfect_kmers", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_COMPLEMENT, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Complement_cumulative_perfect_kmers", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_2D, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_2D_cumulative_perfect_kmers", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_TEMPLATE, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Template_best_perfect_kmers", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_COMPLEMENT, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Complement_best_perfect_kmers", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_2D, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_2D_best_perfect_kmers", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_TEMPLATE, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Template_longest_perfect_vs_length_scatter", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_COMPLEMENT, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Complement_longest_perfect_vs_length_scatter", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_2D, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_2D_longest_perfect_vs_length_scatter", "}");
- pw.println("\\end{figure}");
-
- if (options.getNumberOfTypes() == 1) {
- graphSize = "width=.7\\linewidth";
- } else {
- graphSize = "height=2cm";
- }
-
- pw.println("\\subsection*{" + id + " coverage}");
- pw.println("\\vspace{-3mm}");
- pw.println("\\begin{figure}[H]");
- pw.println("\\centering");
- includeGraphicsIfExists(NanoOKOptions.TYPE_TEMPLATE, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Template_coverage", "} \\\\");
- includeGraphicsIfExists(NanoOKOptions.TYPE_COMPLEMENT, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Complement_coverage", "} \\\\");
- includeGraphicsIfExists(NanoOKOptions.TYPE_2D, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_2D_coverage", "} \\\\");
- includeGraphicsIfExists(NanoOKOptions.TYPE_ALL, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_gc", "}");
- pw.println("\\end{figure}");
-
- if (options.getNumberOfTypes() == 1) {
- pw.println("\\clearpage");
- }
-
- if (options.getNumberOfTypes() == 1) {
- graphSize = "width=.7\\linewidth";
- } else {
- graphSize = "height=8cm";
- }
-
- pw.println("\\subsection*{" + id + " 5-mer analysis}");
-
- String[] overRepLines = new String[10];
- String[] underRepLines = new String[10];
- for (int i=0; i<10; i++) {
- overRepLines[i] = Integer.toString(i+1);
- underRepLines[i] = Integer.toString(i+1);
- for (int type=0; type<3; type++) {
- if (options.isProcessingReadType(type)) {
- if (i == 0) {
- refSeq.getStatsByType(type).sortKmerAbundance();
- }
-
- ArrayList<KmerAbundance> ka = refSeq.getStatsByType(type).getKmerAbundance();
- KmerAbundance ko = ka.get(i);
- KmerAbundance ku = ka.get(ka.size() - 1 - i);
- overRepLines[i] += String.format(" & %s & %.3f & %.3f & %.3f", ko.getKmer(), ko.getRefAbundance(), ko.getReadAbundance(), ko.getDifference());
- underRepLines[i] += String.format(" & %s & %.3f & %.3f & %.3f", ku.getKmer(), ku.getRefAbundance(), ku.getReadAbundance(), ku.getDifference());
- }
- }
- overRepLines[i] += " \\\\";
- underRepLines[i] += " \\\\";
- }
-
- pw.println("\\subsection*{Under-represented 5-mers}");
- pw.println("\\vspace{-3mm}");
- writeKmerTable(underRepLines);
- pw.println("\\vspace{-3mm}");
- pw.println("\\subsection*{Over-represented 5-mers}");
- pw.println("\\vspace{-3mm}");
- writeKmerTable(overRepLines);
- pw.println("\\vspace{-8mm}");
-
- pw.println("\\begin{figure}[H]");
- pw.println("\\centering");
- includeGraphicsIfExists(NanoOKOptions.TYPE_TEMPLATE, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Template_kmer_scatter", "} ");
- includeGraphicsIfExists(NanoOKOptions.TYPE_COMPLEMENT, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Complement_kmer_scatter", "} \\\\");
- includeGraphicsIfExists(NanoOKOptions.TYPE_2D, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_2D_kmer_scatter", "} \\\\");
- pw.println("\\end{figure}");
-
- if (options.getNumberOfTypes() == 1) {
- graphSize = "width=.4\\linewidth";
- } else {
- graphSize = "height=3.5cm";
- }
- pw.println("\\subsection*{" + id + " GC content}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_TEMPLATE, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Template_GC_hist", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_COMPLEMENT, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_Complement_GC_hist", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_2D, "\\includegraphics["+graphSize+"]{", options.getGraphsDir() + File.separator + refSeq.getName() + File.separator + refSeq.getName() + "_2D_GC_hist", "}");
- }
-
- private void writeKmerTable(String[] lines) {
- pw.println("\\begin{table}[H]");
- pw.println("{\\footnotesize");
- pw.println("\\fontsize{7pt}{9pt}\\selectfont");
- pw.print("\\begin{tabular}{|c");
- int colCount = 1;
- for (int type=0; type<3; type++) {
- if (options.isProcessingReadType(type)) {
- pw.print("|c c c c");
- colCount+=4;
- }
- }
- pw.println("|}");
- pw.println("\\cline{1-"+colCount+"}");
- for (int type=0; type<3; type++) {
- if (options.isProcessingReadType(type)) {
- pw.print(" & \\multicolumn{4}{c|}{" + NanoOKOptions.getTypeFromInt(type) + "}");
- }
- }
- pw.println(" \\\\");
- pw.print("Rank");
- for (int type=0; type<3; type++) {
- if (options.isProcessingReadType(type)) {
- pw.print(" & kmer & Ref \\% & Read \\% & Diff \\%");
- }
- }
- pw.println(" \\\\");
- pw.println("\\cline{1-"+colCount+"}");
-
- for (int i=0; i<10; i++) {
- pw.println(lines[i]);
- }
- pw.println("\\cline{1-"+colCount+"}");
- pw.println("\\end{tabular}");
- pw.println("}");
- pw.println("\\end{table}");
- }
-
- /**
- * Write Top 10 or Bottom 10 moitf section.
- * @param listType either TYPE_TOP or TYPE_BOTTOM
- * @param k kmer size
- */
- public void writeMotifRange(int listType, int k, int colCount) {
- ArrayList<Map.Entry<String, Double>>[] insertionMotifs = new ArrayList[3];
- ArrayList<Map.Entry<String, Double>>[] deletionMotifs = new ArrayList[3];
- ArrayList<Map.Entry<String, Double>>[] substitutionMotifs = new ArrayList[3];
- String logoTypeString = new String("Unknown");
-
- for (int type=0; type<3; type++) {
- if (options.isProcessingReadType(type)) {
- String typeString;
- if (listType == KmerMotifStatistic.TYPE_TOP) {
- typeString = overallStats.getStatsByType(type).getTypeString() + "_top";
- } else if (listType == KmerMotifStatistic.TYPE_BOTTOM) {
- typeString = overallStats.getStatsByType(type).getTypeString() + "_bottom";
- } else {
- typeString = overallStats.getStatsByType(type).getTypeString() + "_unknown";
- }
-
- insertionMotifs[type] = overallStats.getStatsByType(type).getMotifStatistics().getSortedInsertionMotifPercentages(k);
- deletionMotifs[type] = overallStats.getStatsByType(type).getMotifStatistics().getSortedDeletionMotifPercentages(k);
- substitutionMotifs[type] = overallStats.getStatsByType(type).getMotifStatistics().getSortedSubstitutionMotifPercentages(k);
-
- overallStats.getStatsByType(type).getMotifStatistics().writeInsertionLogoImage(listType, options.getGraphsDir() + File.separator + "motifs" + File.separator + "logo_insertion_" + typeString + "_k" + k + ".png", k);
- overallStats.getStatsByType(type).getMotifStatistics().writeDeletionLogoImage(listType, options.getGraphsDir() + File.separator + "motifs" + File.separator + "logo_deletion_" + typeString + "_k" + k + ".png", k);
- overallStats.getStatsByType(type).getMotifStatistics().writeSubstitutionLogoImage(listType, options.getGraphsDir() + File.separator + "motifs" + File.separator + "logo_substitution_" + typeString + "_k" + k + ".png", k);
- }
- }
-
- for (int i=0; i<10; i++) {
- if (listType == KmerMotifStatistic.TYPE_TOP) {
- pw.print(i+1);
- } else {
- pw.print("-"+(10-i));
- }
- for (int type=0; type<3; type++) {
- if (options.isProcessingReadType(type)) {
- int insertionPos = i;
- int deletionPos = i;
- int substitutionPos = i;
-
- if (listType == KmerMotifStatistic.TYPE_BOTTOM) {
- insertionPos = insertionMotifs[type].size() - 10 + i;
- deletionPos = deletionMotifs[type].size() - 10 + i;
- substitutionPos = substitutionMotifs[type].size() - 10 + i;
- }
-
- if ((insertionMotifs[type].size() > insertionPos) && (insertionPos >=0)) {
- pw.printf(" & %s (%.2f\\%%)", insertionMotifs[type].get(insertionPos).getKey(), insertionMotifs[type].get(insertionPos).getValue());
- } else {
- pw.print(" &");
- }
-
- if ((deletionMotifs[type].size() > deletionPos) && (deletionPos >=0)) {
- pw.printf(" & %s (%.2f\\%%)", deletionMotifs[type].get(deletionPos).getKey(), deletionMotifs[type].get(deletionPos).getValue());
- } else {
- pw.print(" &");
- }
-
- if ((substitutionMotifs[type].size() > substitutionPos) && (substitutionPos >=0)) {
- pw.printf(" & %s (%.2f\\%%)", substitutionMotifs[type].get(substitutionPos).getKey(), substitutionMotifs[type].get(substitutionPos).getValue());
- } else {
- pw.print(" &");
- }
- }
- }
-
- if (i == 0) {
- if (listType == KmerMotifStatistic.TYPE_TOP) {
- pw.print(" & \\multirow{10}{*}{\\rotatebox[origin=c]{90}{Most common}}");
- } else if (listType == KmerMotifStatistic.TYPE_BOTTOM) {
- pw.print(" & \\multirow{10}{*}{\\rotatebox[origin=c]{90}{Least common}}");
- } else {
- pw.print(" & \\multirow{10}{*}{\\rotatebox[origin=c]{90}{Unknown}}");
- }
- }
- pw.println("\\\\");
- }
-
- pw.println("\\cline{1-"+colCount+"}");
- pw.println("\\rule{0pt}{0.6cm}");
- pw.print(" ");
-
- for (int type=0; type<3; type++) {
- if (options.isProcessingReadType(type)) {
- String typeString;
- if (listType == KmerMotifStatistic.TYPE_TOP) {
- typeString = overallStats.getStatsByType(type).getTypeString() + "_top";
- } else if (listType == KmerMotifStatistic.TYPE_BOTTOM) {
- typeString = overallStats.getStatsByType(type).getTypeString() + "_bottom";
- } else {
- typeString = overallStats.getStatsByType(type).getTypeString() + "_unknown";
- }
-
- pw.print(" & \\includegraphics[height=0.5cm]{" + options.getGraphsDir()+File.separator + "motifs" + File.separator + "logo_insertion_" + typeString + "_k" + k + ".png}");
- pw.print(" & \\includegraphics[height=0.5cm]{" + options.getGraphsDir()+File.separator + "motifs" + File.separator + "logo_deletion_" + typeString + "_k" + k + ".png}");
- pw.print(" & \\includegraphics[height=0.5cm]{" + options.getGraphsDir()+File.separator + "motifs" + File.separator + "logo_substitution_" + typeString + "_k" + k + ".png}");
- }
- }
-
- pw.println(" \\\\");
- }
-
- /**
- * Write motif section of report.
- */
- public void writeMotifSection() {
- pw.println("\\subsection*{Kmer motifs before errors}");
-
- for (int k=3; k<=5; k++) {
- int colCount = 1;
-
- pw.println("\\subsection*{"+k+"-mer error motif analysis}");
- pw.println("\\vspace{-3mm}");
- pw.println("\\begin{table}[H]");
- pw.println("{\\footnotesize");
- pw.println("\\fontsize{6pt}{8pt}\\selectfont");
- pw.println("\\tabcolsep=0.15cm");
- pw.print("\\begin{tabular}{|c");
- for (int type=0; type<3; type++) {
- if (options.isProcessingReadType(type)) {
- pw.print("|c c c");
- colCount+=3;
- }
- }
- pw.println("|c}");
- pw.println("\\cline{1-"+colCount+"}");
- //pw.println("& \\multicolumn{3}{c|}{Template} & \\multicolumn{3}{c|}{Complement} & \\multicolumn{3}{c|}{2D} & \\\\");
- for (int type=0; type<3; type++) {
- if (options.isProcessingReadType(type)) {
- pw.print(" & \\multicolumn{3}{c|}{" + NanoOKOptions.getTypeFromInt(type) + "}");
- }
- }
- pw.println(" & \\\\");
- //pw.println("Rank & Insertion & Deletion & Substitution & Insertion & Deletion & Substitution & Insertion & Deletion & Substitution & \\\\");
- pw.print("Rank");
- for (int type=0; type<3; type++) {
- if (options.isProcessingReadType(type)) {
- pw.print(" & Insertion & Deletion & Substitution");
- }
- }
- pw.println(" & \\\\");
- pw.println("\\cline{1-"+colCount+"}");
- writeMotifRange(KmerMotifStatistic.TYPE_TOP, k, colCount);
- pw.println("\\cline{1-"+colCount+"}");
- writeMotifRange(KmerMotifStatistic.TYPE_BOTTOM, k, colCount);
- pw.println("\\cline{1-"+colCount+"}");
- pw.println("\\end{tabular}");
- pw.println("}");
- pw.println("\\end{table}");
- pw.println("\\vspace{-9mm}");
- pw.printf("{\\fontsize{8}{8}\\textsf{Kmer space for %d-mers: %d \\hspace{5mm} Random chance for any given %d-mer: %.2f\\%%}}", k, (int)Math.pow(4, k), k, 100.0/Math.pow(4, k));
- pw.println("");
- pw.println("\\vspace{5mm}");
- }
- }
-
- /**
- * Convert integer (0, 1, 2, 3) to base (A, C, G, T)
- * @param i number to convert
- * @return base character
- */
- private char intToBase(int i) {
- char c;
-
- switch(i) {
- case 0: c = 'A'; break;
- case 1: c = 'C'; break;
- case 2: c = 'G'; break;
- case 3: c = 'T'; break;
- default: c = 'N'; break;
- }
-
- return c;
- }
-
- /**
- * Write section to report on substitution errors.
- */
- public void writeSubstitutionErrorsSection()
- {
- pw.println("\\subsection*{All reference substitutions}");
- pw.println("\\vspace{-3mm}");
-
- pw.println("\\begin{table}[H]");
- pw.println("{\\footnotesize");
- pw.println("\\fontsize{8pt}{10pt}\\selectfont");
- //pw.println("\\begin{tabular}{|c c|c c c c|c c c c|c c c c|}");
- pw.print("\\begin{tabular}{|c c");
- for (int type=0; type<3; type++) {
- if (options.isProcessingReadType(type)) {
- pw.print("|c c c c");
- }
- }
- pw.println("|}");
- pw.println("\\hline");
- //pw.println(" & & \\multicolumn{4}{c|}{Template substituted \\%} & \\multicolumn{4}{c|}{Complement substituted \\%} & \\multicolumn{4}{c|}{2D substituted \\%} \\\\");
- pw.print(" &");
- for (int type=0; type<3; type++) {
- if (options.isProcessingReadType(type)) {
- pw.print(" & \\multicolumn{4}{c|}{" + NanoOKOptions.getTypeFromInt(type) + " substituted \\%}");
- }
- }
- pw.println(" \\\\");
- //pw.println(" & & a & c & g & t & a & c & g & t & a & c & g & t \\\\");
- pw.print(" &");
- for (int type=0; type<3; type++) {
- if (options.isProcessingReadType(type)) {
- pw.print(" & a & c & g & t");
- }
- }
- pw.println(" \\\\");
-
- pw.println("\\hline");
-
- for (int r=0; r<4; r++) {
- if (r == 0) {
- pw.print("\\multirow{4}{*}{\\rotatebox[origin=c]{90}{Reference}} & ");
- } else {
- pw.print(" & ");
- }
- pw.print(intToBase(r));
- for (int type=0; type<3; type++) {
- if (options.isProcessingReadType(type)) {
- int subs[][] = overallStats.getStatsByType(type).getSubstitutionErrors();
- double nSubstitutions = (double)overallStats.getStatsByType(type).getNumberOfSubstitutions();
- for (int s=0; s<4; s++) {
- double pc = (100.0 * (double)subs[r][s]) / nSubstitutions;
- pw.printf(" & %.2f", pc);
- }
- }
- }
- pw.println("\\\\");
- }
- pw.println("\\hline");
- pw.println("\\end{tabular}");
- pw.println("}");
- pw.println("\\end{table}");
- }
-
- private void writeOverallKmerSection() {
- String graphWidth = "width=.3\\linewidth";
-
- if (options.getNumberOfTypes() == 1) {
- graphWidth = "width=.5\\linewidth";
- }
-
- pw.println("\\subsection*{All reference 21mer analysis}");
- pw.println("\\vspace{-3mm}");
- pw.println("\\begin{figure}[H]");
- pw.println("\\centering");
- includeGraphicsIfExists(NanoOKOptions.TYPE_TEMPLATE, "\\includegraphics["+graphWidth+"]{", options.getGraphsDir() + File.separator + "all_Template_21mers", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_COMPLEMENT, "\\includegraphics["+graphWidth+"]{", options.getGraphsDir() + File.separator + "all_Complement_21mers", "}");
- includeGraphicsIfExists(NanoOKOptions.TYPE_2D, "\\includegraphics["+graphWidth+"]{", options.getGraphsDir() + File.separator + "all_2D_21mers", "}");
- pw.println("\\end{figure}");
- }
-
- /**
- * Add sections for each reference sequence.
- * @param refs reference sequences
- */
- public void addAllReferenceSections() {
- ArrayList<ReferenceSequence> sortedRefs = references.getSortedReferences();
- for (int i=0; i<sortedRefs.size(); i++) {
- ReferenceSequence rs = sortedRefs.get(i);
-
- if (rs.getTotalNumberOfAlignments() > NanoOKOptions.MIN_ALIGNMENTS) {
- if ((options.getNumberOfTypes() > 1) || (references.getNumberOfReferences() > 1)) {
- pw.println("\\clearpage");
- }
-
- writeReferenceSection(rs);
- }
- }
- }
-
- /**
- * Write end of LaTeX file.
- */
- private void writeLaTeXFooter() {
- pw.println("\\end{document}");
- }
-
- /**
- * Get handle to PrintWriter.
- * @return a PrintWriter object
- */
- public PrintWriter getPrintWriter() {
- return pw;
- }
-
- /**
- * Write the LaTeX report.
- */
- public void writeReport() {
- open();
- addPassFailSection();
- addLengthsSection();
-
- for (int type=0; type<3; type++) {
- if (options.isProcessingReadType(type)) {
- writeAlignmentsSection(overallStats.getStatsByType(type));
- // references.writeReferenceStatFiles(type);
- // references.writeReferenceSummary(type);
- writeAlignmentSummary(type, pw);
- }
- }
-
- addAllReferenceSections();
- //Set<String> ids = references.getAllIds();
- //for (String id : ids) {
- // writeReferenceSection(references.getReferenceById(id));
- //}
-
- if ((options.getNumberOfTypes() > 1) || (references.getNumberOfReferences() > 1)) {
- pw.println("\\clearpage");
- }
- writeOverallKmerSection();
- writeSubstitutionErrorsSection();
- writeMotifSection();
-
- writeLaTeXFooter();
- close();
- }
-
- /**
- * Write reference summary to LaTeX report.
- * @param type type from NanoOKOptions
- * @param pw handle to LaTeX file
- */
- public void writeAlignmentSummary(int type, PrintWriter pw) {
- if (references.getNumberOfReferences() < LONGTABLE_THRESHOLD) {
- pw.println("\\begin{table}[H]");
- }
- pw.println("{\\footnotesize");
- if (references.getNumberOfReferences() < LONGTABLE_THRESHOLD) {
- pw.println("\\fontsize{9pt}{11pt}\\selectfont");
- pw.println("\\begin{tabular}{l c c c c c c c}");
- } else {
- pw.println("\\begin{longtable}[l]{l c c c c c c c}");
- }
- pw.println(" & & {\\bf Number of} & {\\bf \\% of} & {\\bf Mean read} & {\\bf Aligned} & {\\bf Mean} & {\\bf Longest} \\\\");
- pw.println("{\\bf ID} & {\\bf Size} & {\\bf Reads} & {\\bf Reads} & {\\bf length} & {\\bf bases} & {\\bf coverage} & {\\bf Perf Kmer} \\\\");
- ArrayList<ReferenceSequence> sortedRefs = references.getSortedReferences();
- for (int i=0; i<sortedRefs.size(); i++) {
- ReferenceSequence r = sortedRefs.get(i);
- ReferenceSequenceStats refStats = r.getStatsByType(type);
- if ((sortedRefs.size() < 100) || (refStats.getNumberOfReadsWithAlignments() > 0)) {
- pw.printf("%s & %d & %d & %.2f & %.2f & %d & %.2f & %d \\\\",
- r.getName().replaceAll("_", " "),
- r.getSize(),
- refStats.getNumberOfReadsWithAlignments(),
- 100.0 * (double)refStats.getNumberOfReadsWithAlignments() / (double)overallStats.getStatsByType(type).getNumberOfReads(),
- refStats.getMeanReadLength(),
- refStats.getTotalAlignedBases(),
- (double)refStats.getTotalAlignedBases() / r.getSize(),
- refStats.getLongestPerfectKmer());
- pw.println("");
- }
- }
- if (references.getNumberOfReferences() < LONGTABLE_THRESHOLD) {
- pw.println("\\end{tabular}");
- } else {
- pw.println("\\end{longtable}");
- }
- pw.println("}");
- if (references.getNumberOfReferences() < LONGTABLE_THRESHOLD) {
- pw.println("\\end{table}");
- }
- }
-
-
- public void makePDF() {
- ProcessLogger pl = new ProcessLogger();
- String command = "pdflatex -interaction=nonstopmode -output-directory " +options.getLatexDir() + " " + options.getLatexDir() + File.separator + options.getSample() + ".tex";
- String logFilename = options.getLogsDir() + File.separator + "pdflatex_output_log" + options.getAnalysisSuffix() + ".txt";
- System.out.println("pdflatex output " + logFilename);
- pl.runAndLogCommand(command, logFilename, false);
- }
-}
diff --git a/src/nanook/SequenceCoverage.java b/src/nanook/SequenceCoverage.java
deleted file mode 100644
index e1610f1..0000000
--- a/src/nanook/SequenceCoverage.java
+++ /dev/null
@@ -1,139 +0,0 @@
-package nanook;
-
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.io.Serializable;
-
-/**
- * Represent reference coverage
- *
- * @author Richard Leggett
- */
-public class SequenceCoverage implements Serializable {
- private static final long serialVersionUID = NanoOK.SERIAL_VERSION;
- private int[] coverage;
- private int numBins = 1000;
- private int genomeSize = 0;
- private int binSize = 1;
- private boolean binEarly = false;
-
- public SequenceCoverage(int s) {
- genomeSize = s;
-
- // Approx hundred bins for coverage
- float b = genomeSize / 100;
-
- // Make a multiple of 10, 100 or 500...
- if (genomeSize < 50000) {
- binSize = 10 * (1 + Math.round(b / 10));
- } else if (genomeSize < 500000) {
- binSize = 100 * (1 + Math.round(b / 100));
- } else {
- binSize = 500 * (1 + Math.round(b / 500));
- }
-
- //binSize=50;
-
- numBins = (int) Math.ceil(genomeSize / (double)binSize);
-
-
- // Bin early for large genomes
- if (genomeSize < 10000000) {
- binEarly = false;
- } else {
- binEarly = true;
- }
-
- // Force this for now
- binEarly = true;
-
- if (binEarly) {
- coverage = new int[numBins];
- } else {
- coverage = new int[genomeSize];
- }
- }
-
- /**
- * Increment coverage between two points.
- * @param start start position
- * @param size size
- */
- public synchronized void addCoverage(int start, int size) {
- for (int i=start; i<(start+size); i++) {
- if (binEarly) {
- int b = i/binSize;
- if (b < numBins) {
- coverage[b]++;
- }
- } else {
- if (i < genomeSize) {
- coverage[i]++;
- }
- }
- }
- }
-
- /**
- * Write coverage file for later graph plotting.
- * @param filename output filename
- * @param binSize bin size
- */
- private synchronized void binAndWriteCoverageData(String filename, int pbinSize) {
- try {
- PrintWriter pw = new PrintWriter(new FileWriter(filename));
- for (int i=0; i<(genomeSize-pbinSize); i+=pbinSize) {
- int count = 0;
- for (int j=0; j<pbinSize; j++) {
- count += coverage[i+j];
- }
- pw.printf("%d\t%.2f", i, ((double)count / (double)pbinSize));
- pw.println("");
- }
- pw.close();
- } catch (IOException e) {
- System.out.println("writeCoverageData exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Write coverage file for later graph plotting.
- * @param filename output filename
- * @param binSize bin size
- */
- private synchronized void writeBinnedCoverageData(String filename) {
- try {
- PrintWriter pw = new PrintWriter(new FileWriter(filename));
- for (int i=0; i<numBins-1; i++) {
- double c = (double)coverage[i] / (double)binSize;
- if (i == (numBins - 1)) {
- c = (double)coverage[i] / (double)(genomeSize - (i*binSize));
- }
- pw.printf("%d\t%.2f", i*binSize, c);
- pw.println("");
- }
- pw.close();
- } catch (IOException e) {
- System.out.println("writeCoverageData exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- /**
- * Write coverage file for later graph plotting.
- * @param filename output filename
- * @param binSize bin size
- */
- public synchronized void writeCoverageData(String filename, int binSize) {
- if (binEarly) {
- writeBinnedCoverageData(filename);
- } else {
- binAndWriteCoverageData(filename, binSize);
- }
- }
-
-}
diff --git a/src/nanook/SequenceLogo.java b/src/nanook/SequenceLogo.java
deleted file mode 100644
index 5786306..0000000
--- a/src/nanook/SequenceLogo.java
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.awt.Color;
-import java.awt.Font;
-import java.awt.FontMetrics;
-import java.awt.Graphics;
-import java.awt.Graphics2D;
-import java.awt.geom.AffineTransform;
-import java.awt.image.BufferedImage;
-import java.io.File;
-import javax.imageio.ImageIO;
-
-/**
- * Create sequence logo (for error motifs etc.)
- *
- * @author Richard Leggett
- */
-public class SequenceLogo {
- private BufferedImage bImage;
- private int size = 0;
- private double[][] counts;
- private String[] bases = {"A", "C", "G", "T"};
- private Color[] baseColours = {Color.GREEN, Color.BLUE, Color.YELLOW, Color.RED};
- private int imageWidth = 0;
- private int imageHeight = 0;
- private int charWidth = 0;
- private int charHeight = 0;
-
- /**
- * Constructor
- * @param size size (in bases)
- */
- public SequenceLogo(int s) {
- size = s;
- counts = new double[4][size];
- }
-
- /**
- * Debugging constructor
- */
- public SequenceLogo() {
- this(6);
- this.addBase(0, 25, 25, 25, 25);
- this.addBase(1, 25, 25, 25, 25);
- this.addBase(2, 50, 0, 0, 50);
- this.addBase(3, 100, 0, 0, 0);
- this.addBase(4, 10, 10, 30, 50);
- this.addBase(5, 33, 33, 0, 34);
- }
-
- /**
- * Set relative counts at a given position in the logo.
- * @param position position (0-offset)
- * @param a number of As
- * @param c number of Cs
- * @param g number of Gs
- * @param t number of Ts
- */
- public void addBase(int position, int a, int c, int g, int t) {
- if (position < size) {
- counts[0][position] = (double)a / (double)(a + c + g + t);
- counts[1][position] = (double)c / (double)(a + c + g + t);
- counts[2][position] = (double)g / (double)(a + c + g + t);
- counts[3][position] = (double)t / (double)(a + c + g + t);
- } else {
- System.out.println("Warning: bad index passed to addBase.");
- }
- }
-
- /**
- * Draw the logo image.
- */
- public void drawImage() {
- // Create temporary image to work out sizing
- bImage = new BufferedImage(100, 100, BufferedImage.TYPE_INT_RGB);
- Graphics2D g = bImage.createGraphics();
- AffineTransform stretch;
- Font f = new Font("Arial", Font.BOLD, 40);
- FontMetrics metrics = g.getFontMetrics(f);
- g.setFont(f);
- charWidth = metrics.charWidth('G');
- charHeight = metrics.charWidth('G');
-
- // Re-create image at right size
- imageWidth = size * charWidth;
- imageHeight = charHeight*4;
- bImage = new BufferedImage(imageWidth, imageHeight, BufferedImage.TYPE_INT_RGB);
- g = bImage.createGraphics();
- g.setFont(f);
- g.setColor(Color.WHITE);
- g.fillRect(0, 0, imageWidth, imageHeight);
- //System.out.println("imagesize " + imageWidth + ", " + imageHeight);
-
- for (int i=0; i<size; i++) {
- double drawY = (double)imageHeight;
- for (int j=0; j<4; j++) {
- if (counts[j][i] > 0.0) {
- double yStretch = counts[j][i] * 4;
- int drawX = i * charWidth;
- stretch = AffineTransform.getScaleInstance(1.0, yStretch);
- g.setTransform(stretch);
- g.setColor(baseColours[j]);
- //System.out.println(bases[j] + " at "+drawX+", "+drawY+" with stretch "+yStretch);
- g.drawString(bases[j], drawX, (int)(drawY / yStretch));
- drawY -= (yStretch * (double)charHeight);
- }
- }
- //System.out.println("");
- }
-
- }
-
- /**
- * Save the logo as an image.
- * @param filename output filename
- */
- public void saveImage(String filename) {
- try {
- ImageIO.write(bImage, "PNG", new File(filename));
- }
- catch (Exception e)
- {
- System.out.println(e);
- }
- }
-}
diff --git a/src/nanook/SequenceReader.java b/src/nanook/SequenceReader.java
deleted file mode 100644
index c20c76d..0000000
--- a/src/nanook/SequenceReader.java
+++ /dev/null
@@ -1,319 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.BufferedReader;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * Read FASTA files
- *
- * @author Richard Leggett
- */
-public class SequenceReader {
- private ArrayList<String> seqIDs = new ArrayList();
- private ArrayList<Integer> seqLengths = new ArrayList();
- private ArrayList<String> sequence = new ArrayList();
- private ArrayList<Double> gcPc = new ArrayList();
- private int nSeqs = 0;
- private boolean cacheSequence = false;
- private String currentFilename;
-
- public SequenceReader(boolean cache) {
- cacheSequence = cache;
- }
-
- public int countGC(String s) {
- int g = s.length() - s.replace("G", "").length();
- int c = s.length() - s.replace("C", "").length();
-
- return g + c;
- }
-
- public int indexFASTQFile(String filename) {
- currentFilename = filename;
-
- try
- {
- BufferedReader br = new BufferedReader(new FileReader(filename));
- String line;
- String id = null;
- int contigLength = 0;
- int readsInThisFile = 0;
- boolean gotRead;
- int gc = 0;
-
- do {
- String sh = br.readLine();
- String s = br.readLine();
- String qh = br.readLine();
- String q = br.readLine();
- gotRead = false;
- if ((sh != null) && (s != null) && (qh != null) & (q != null)) {
- if (sh.startsWith("@")) {
- if (qh.startsWith("+")) {
- String sequenceHeader = sh.trim();
- String seq = s.trim();
- String[] parts = sequenceHeader.substring(1).split("(\\s+)");
- id = parts[0];
-
- if (id != null) {
- seqIDs.add(id);
- seqLengths.add(seq.length());
- gcPc.add(new Double(100.0 * (double)countGC(seq) / (double)seq.length()));
- if (cacheSequence) {
- sequence.add(seq);
- }
- nSeqs++;
- gotRead = true;
- }
-
- }
- }
- }
- } while (gotRead);
-
- br.close();
- } catch (Exception e) {
- System.out.println("readFasta Exception:");
- e.printStackTrace();
- System.exit(1);
- }
-
- return nSeqs;
- }
-
- private String makeName(String line, String id) {
- String name = id;
- Pattern p = Pattern.compile(">gi\\|(\\S+)\\|(\\S+)\\|(\\S+)\\| (\\S+) (\\S+)");
- Matcher m = p.matcher(line);
-
- if (m.find()) {
- name = m.group(4) + "_" + m.group(5);
- }
-
- name=name.replaceAll("\\.", "_");
- name=name.replaceAll(" ", "_");
- name=name.replaceAll("\\|", "_");
-
- return name;
- }
-
- /**
- * Parse a FASTA file
- * @param filename filename of FASTA file
- */
- public int indexFASTAFile(String filename, String indexFilename, boolean storeIds) {
- currentFilename = filename;
-
- try
- {
- BufferedReader br = new BufferedReader(new FileReader(filename));
- PrintWriter pw = null;
- String line;
- String id = null;
- String name = null;
- int contigLength = 0;
- int readsInThisFile = 0;
- //String seq = "";
- StringBuilder seq = new StringBuilder(100000);
- int gc = 0;
-
- if (indexFilename != null) {
- pw = new PrintWriter(new FileWriter(indexFilename, false));
- }
-
- do {
- line = br.readLine();
- if (line != null) {
- line = line.trim();
- }
-
- if ((line == null) || (line.startsWith(">"))) {
- if (id != null) {
- if (storeIds) {
- double gcpc = 100.0*(double)gc / (double)contigLength;
- seqIDs.add(id);
- seqLengths.add(contigLength);
- gcPc.add(new Double(gcpc));
- }
-
- if (pw != null) {
- pw.printf("%s\t%d\t%s", id, contigLength, name);
- pw.println("");
- }
-
- if (cacheSequence) {
- sequence.add(seq.toString());
- }
- nSeqs++;
- seq = new StringBuilder(100000);
- }
-
- if (line != null) {
- String[] parts = line.substring(1).split("(\\s+)");
- id = parts[0];
- name = makeName(line, id);
- }
-
- contigLength = 0;
- gc = 0;
- } else if (line != null) {
- contigLength += line.length();
- gc += countGC(line);
-
- if (cacheSequence) {
- seq.append(line);
- //seq = seq + line;
- }
- }
- } while (line != null);
-
- br.close();
- if (pw != null) {
- pw.close();
- }
- } catch (Exception e) {
- System.out.println("readFasta Exception:");
- e.printStackTrace();
- System.exit(1);
- }
-
- return nSeqs;
- }
-
- public int getSequenceCount() {
- return nSeqs;
- }
-
- public String getID(int i) {
- return seqIDs.get(i);
- }
-
- public int getLength(int i) {
- return seqLengths.get(i);
- }
-
- public double getGC(int i) {
- return gcPc.get(i);
- }
-
- public String getSubSequence(String id, int start, int end) {
- int index = -1;
- String seq = "";
-
- for (int i=0; i<nSeqs; i++) {
- if (seqIDs.get(i).equals(id)) {
- index = i;
- break;
- }
- }
-
- if (index == -1) {
- System.out.println("Error: can't find ID " + id);
- System.exit(1);
- }
-
- if (cacheSequence) {
- if (start < 0) {
- System.out.println("Warning: invalid index ("+start+") in SequenceReader");
- start = 0;
- }
- if (end >= sequence.get(index).length()) {
- //System.out.println("Warning: invalid index ("+end+") in SequenceReader");
- end = sequence.get(index).length() - 1;
- }
- seq = sequence.get(index).substring(start, end+1);
- } else {
- try
- {
- BufferedReader br = new BufferedReader(new FileReader(currentFilename));
- StringBuilder ssb = new StringBuilder("");
- String line;
- boolean foundId = false;
- int position = 0;
-
- do {
- line = br.readLine();
- if (line != null) {
- line = line.trim();
- }
-
-
- if (line != null) {
- if (line.startsWith(">")) {
- if (foundId) {
- // If we've found the ID we were after, then this new one means we can stop
- break;
- } else {
- String[] parts = line.substring(1).split("(\\s+)");
- String thisid = parts[0];
-
- // Check for ID we're after
- if (thisid.equals(id)) {
- foundId = true;
- }
- }
- } else {
- if (foundId) {
- int fStart = position;
- int fEnd = position + line.length() - 1;
-
- //System.out.println("fStart = "+fStart+" fEnd = "+fEnd);
-
- if (fEnd >= start) {
- int cutStart = (fStart >= start) ? 0:start-position;
- int cutEnd = (fEnd <= end) ? (line.length() - 1):end-position;
-
- //System.out.println(cutStart + " " +cutEnd+"["+line+"]");
- ssb.append(line.substring(cutStart, cutEnd+1));
-
- // Got all we wanted?
- if (fEnd >= end) {
- break;
- }
- }
- // Keep track of position
- position = position + line.length();
- }
- }
- }
- } while (line != null);
-
- br.close();
-
- seq = ssb.toString();
- } catch (Exception e) {
- System.out.println("readFasta Exception:");
- e.printStackTrace();
- System.exit(1);
- }
- }
-
- return seq;
- }
-
- public void storeKmers(int index, KmerTable t) {
- String seq = sequence.get(index);
- if (seq != null) {
- int k = t.getKmerSize();
-
- for (int o=0; o<seq.length() - k; o++) {
- t.countKmer(seq.substring(o, o+5));
- }
- } else {
- System.out.println("Need to handle the non-cached case");
- }
- }
-
-}
diff --git a/src/nanook/SystemCommandRunnable.java b/src/nanook/SystemCommandRunnable.java
deleted file mode 100644
index cfef515..0000000
--- a/src/nanook/SystemCommandRunnable.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.File;
-import java.util.LinkedList;
-
-/**
- * Enable multi-threading of system commands
- *
- * @author Richard Leggett
- */
-public class SystemCommandRunnable implements Runnable {
- NanoOKOptions options;
- private String message;
- private String command;
- private String logFile;
- private String outFile;
-
- /**
- * Constructor
- * @param o program options
- */
- public SystemCommandRunnable(NanoOKOptions ops, String msg, String com, String out, String log) {
- options = ops;
- message = msg;
- command = com;
- logFile = log;
- outFile = out;
- }
-
- private void runCommandLSF(String command, String outPath, String log) {
- // outPath only non-null if aligner will only write to screen (yes, BWA, I'm talking about you)
- if (outPath != null) {
- command = command + " > " + outPath;
- }
-
- // Make the LSF command
- String lsfCommand = "bsub -n " + options.getNumberOfThreads() + " -q " + options.getQueue() + " -oo " + log + " -R \"rusage[mem=8000] span[hosts=1]\" \"" + command + "\"";
- System.out.println(command);
- //pl = new ProcessLogger();
- //response = pl.getCommandOutput(lsfCommand, true, true);
- }
-
- private void runCommandLocal(String command, String outPath) {
- ProcessLogger pl = new ProcessLogger();
-
- // outPath only non-null if aligner will only write to screen (yes, BWA, I'm talking about you)
- if (outPath != null) {
- pl.setWriteFormat(false, true, false);
- pl.runAndLogCommand(command, outPath, false);
- } else {
- pl.runCommand(command);
- }
- }
-
- /**
- * Run the alignment command
- * @param command
- * @param outPath
- * @param log
- */
- private void runCommand(String command, String outPath, String log) {
- switch(options.getScheduler()) {
- case "screen":
- System.out.println(command);
- break;
- case "lsf":
- runCommandLSF(command, outPath, log);
- break;
- case "system":
- runCommandLocal(command, outPath);
- break;
- default:
- System.out.println("Error: scheduler " + options.getScheduler() + " not recognised.");
- System.exit(1);
- break;
- }
- }
-
- public void run() {
- if (message != null) {
- System.out.println(message);
- }
-
- runCommand(command, outFile, logFile);
- }
-}
diff --git a/src/nanook/WatcherLog.java b/src/nanook/WatcherLog.java
deleted file mode 100644
index 769a1d0..0000000
--- a/src/nanook/WatcherLog.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * To change this license header, choose License Headers in Project Properties.
- * To change this template file, choose Tools | Templates
- * and open the template in the editor.
- */
-package nanook;
-
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.text.DateFormat;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-
-
-public class WatcherLog {
- private transient PrintWriter pw = null;
- private String filename = null;
-
- public WatcherLog(NanoOKOptions options) {
- }
-
- public synchronized void open(String f, boolean clearLogs) {
- if (clearLogs) {
- filename = f + ".log";
- } else {
- DateFormat df = new SimpleDateFormat("ddMMyy_HHmmss");
- Date dateobj = new Date();
- filename = f + "_" + df.format(dateobj).toString()+".log";
- }
-
- System.out.println("Opening "+filename);
-
- try {
- pw = new PrintWriter(new FileWriter(filename, true));
- } catch (IOException e) {
- System.out.println("WatcherLog exception");
- e.printStackTrace();
- }
- }
-
- public synchronized void close() {
- if (pw != null) {
- pw.close();
- }
- }
-
- public synchronized void print(String s) {
- if (pw != null) {
- pw.print(s);
- pw.flush();
- }
- }
-
- public synchronized void println(String s) {
- if (pw != null) {
- pw.println(s);
- pw.flush();
- }
- }
-
- public synchronized PrintWriter getPrintWriter() {
- return pw;
- }
-}
diff --git a/src/nanook/WatcherRunnable.java b/src/nanook/WatcherRunnable.java
deleted file mode 100644
index e91439c..0000000
--- a/src/nanook/WatcherRunnable.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Program: NanoOK
- * Author: Richard M. Leggett
- *
- * Copyright 2015 The Genome Analysis Centre (TGAC)
- */
-
-package nanook;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import java.util.EnumSet;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Enable multi-threading of read extraction
- *
- * @author Richard Leggett
- */
-public class WatcherRunnable implements Runnable {
- public final static String TYPE_STRING_TEMPLATE = "/Analyses/Basecall_2D_000/BaseCalled_template/Fastq";
- public final static String TYPE_STRING_COMPLEMENT = "/Analyses/Basecall_2D_000/BaseCalled_complement/Fastq";
- public final static String TYPE_STRING_2D = "/Analyses/Basecall_2D_000/BaseCalled_2D/Fastq";
- private String[] typeStrings = {TYPE_STRING_TEMPLATE, TYPE_STRING_COMPLEMENT, TYPE_STRING_2D};
- private NanoOKOptions options;
- private AlignmentFileParser parser;
- private String inDir;
- private String filename;
- private String fastaqDir;
- private String alignDir;
- private String passOrFail;
- private int readType;
-
- public WatcherRunnable(NanoOKOptions o, String in, String file, String pf, String out, String ad, AlignmentFileParser p) {
- options = o;
- inDir = in;
- filename = file;
- passOrFail = pf;
- fastaqDir = out;
- alignDir = ad;
- parser = p;
-
- if (passOrFail.equals("pass")) {
- readType = NanoOKOptions.READTYPE_PASS;
- } else if (passOrFail.equals("fail")) {
- readType = NanoOKOptions.READTYPE_FAIL;
- } else {
- System.out.println("Error in WatcherRunnable - not pass or fail!");
- System.exit(1);
- }
- }
-
- /**
- * Extract reads of each type from file
- * @param inDir input directory
- * @param filename filename
- * @param outDir output directory
- */
- public void run() {
- String inputPathname = inDir + File.separator + filename;
- Fast5File inputFile = new Fast5File(options, inputPathname);
- String outName = new File(inputPathname).getName();
-
- //for (int t=0; t<3; t++) {
- int t;
- if (options.isProcessing2DReads()) {
- t = NanoOKOptions.TYPE_2D;
- } else {
- t = NanoOKOptions.TYPE_TEMPLATE;
- }
-
- if (options.isProcessingReadType(t)) {
- FastAQFile ff = inputFile.getFastq(options.getBasecallIndex(), t);
- if (ff != null) {
- String readFilename = null;
- String readPathname = null;
- if (options.getReadFormat() == NanoOKOptions.FASTA) {
- readFilename = outName + "_BaseCalled_" + NanoOKOptions.getTypeFromInt(t) + ".fasta";
- readPathname = fastaqDir + File.separator + NanoOKOptions.getTypeFromInt(t) + File.separator + readFilename;
- System.out.println(" Writing "+readPathname);
- options.getWatcherReadLog().println(readPathname);
- ff.writeFasta(readPathname, options.outputFast5Path() ? inputPathname:null);
- options.getMergedFile(t, readType).addFile(readPathname, options.outputFast5Path() ? inputPathname:null);
- } else if (options.getReadFormat() == NanoOKOptions.FASTQ) {
- readFilename = outName + "_BaseCalled_" + NanoOKOptions.getTypeFromInt(t) + ".fastq";
- readPathname = fastaqDir + File.separator + NanoOKOptions.getTypeFromInt(t) + File.separator + readFilename;
- System.out.println(" Writing "+readPathname);
- options.getWatcherReadLog().println(readPathname);
- ff.writeFastq(readPathname);
- options.getMergedFile(t, readType).addFile(readPathname, options.outputFast5Path() ? inputPathname:null);
- }
- }
- }
- //}
- }
-}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/nanook.git
More information about the debian-med-commit
mailing list