[med-svn] [mhap] 01/02: Imported Upstream version 1.6+dfsg
Afif Elghraoui
afif-guest at moszumanska.debian.org
Sun Aug 23 19:53:43 UTC 2015
This is an automated email from the git hooks/post-receive script.
afif-guest pushed a commit to branch master
in repository mhap.
commit d216c8f5c326f06b72c8ce8a79379c8b9ea4e0e7
Author: Afif Elghraoui <afif at ghraoui.name>
Date: Sun Aug 23 11:35:30 2015 -0700
Imported Upstream version 1.6+dfsg
---
.gitignore | 8 +
LICENSE.txt | 204 ++
NOTICE.txt | 1 +
README.md | 19 +
build.xml | 377 +++
docs/Makefile | 177 +
docs/make.bat | 242 ++
docs/source/conf.py | 261 ++
docs/source/contact.rst | 28 +
docs/source/index.rst | 23 +
docs/source/installation.rst | 85 +
docs/source/quickstart.rst | 73 +
docs/source/utilities.rst | 36 +
.../edu/umd/marbl/mhap/align/AlignElement.java | 40 +
.../marbl/mhap/align/AlignElementDoubleSketch.java | 169 +
.../umd/marbl/mhap/align/AlignElementSketch.java | 114 +
.../umd/marbl/mhap/align/AlignElementString.java | 78 +
.../java/edu/umd/marbl/mhap/align/Aligner.java | 320 ++
.../java/edu/umd/marbl/mhap/align/Alignment.java | 289 ++
.../umd/marbl/mhap/impl/AbstractMatchSearch.java | 342 ++
.../java/edu/umd/marbl/mhap/impl/FastaData.java | 231 ++
.../java/edu/umd/marbl/mhap/impl/MatchResult.java | 116 +
.../umd/marbl/mhap/impl/MhapRuntimeException.java | 61 +
.../mhap/impl/MinHashBitSequenceSubSketches.java | 259 ++
.../edu/umd/marbl/mhap/impl/MinHashSearch.java | 331 ++
.../java/edu/umd/marbl/mhap/impl/OverlapInfo.java | 64 +
.../java/edu/umd/marbl/mhap/impl/Sequence.java | 108 +
.../java/edu/umd/marbl/mhap/impl/SequenceId.java | 132 +
.../edu/umd/marbl/mhap/impl/SequenceSketch.java | 206 ++
.../marbl/mhap/impl/SequenceSketchStreamer.java | 365 +++
src/main/java/edu/umd/marbl/mhap/main/.gitignore | 5 +
.../java/edu/umd/marbl/mhap/main/AlignmentTry.java | 118 +
.../java/edu/umd/marbl/mhap/main/EstimateROC.java | 815 +++++
.../edu/umd/marbl/mhap/main/GetHistogramStats.java | 103 +
.../edu/umd/marbl/mhap/main/KmerStatSimulator.java | 481 +++
.../java/edu/umd/marbl/mhap/main/MhapMain.java | 607 ++++
.../java/edu/umd/marbl/mhap/math/BasicMath.java | 906 ++++++
.../java/edu/umd/marbl/mhap/math/FastMath.java | 3379 ++++++++++++++++++++
.../umd/marbl/mhap/math/MathRuntimeException.java | 85 +
.../umd/marbl/mhap/sketch/AbstractBitSketch.java | 135 +
.../edu/umd/marbl/mhap/sketch/BitVectorIndex.java | 206 ++
.../edu/umd/marbl/mhap/sketch/ClassicCounter.java | 100 +
.../marbl/mhap/sketch/CosineDistanceSketch.java | 71 +
.../java/edu/umd/marbl/mhap/sketch/CountMin.java | 154 +
.../java/edu/umd/marbl/mhap/sketch/Counter.java | 41 +
.../java/edu/umd/marbl/mhap/sketch/Filter.java | 34 +
.../edu/umd/marbl/mhap/sketch/FrequencyCounts.java | 120 +
.../java/edu/umd/marbl/mhap/sketch/HashUtils.java | 291 ++
.../umd/marbl/mhap/sketch/MinHashBitSketch.java | 92 +
.../edu/umd/marbl/mhap/sketch/MinHashSketch.java | 257 ++
.../umd/marbl/mhap/sketch/OrderedNGramHashes.java | 460 +++
.../java/edu/umd/marbl/mhap/sketch/SimHash.java | 103 +
.../java/edu/umd/marbl/mhap/sketch/Sketch.java | 37 +
.../marbl/mhap/sketch/SketchRuntimeException.java | 63 +
src/main/java/edu/umd/marbl/mhap/utils/.gitignore | 4 +
.../edu/umd/marbl/mhap/utils/CharacterHash.java | 51 +
.../java/edu/umd/marbl/mhap/utils/CyclicHash.java | 72 +
.../edu/umd/marbl/mhap/utils/HashCodeUtil.java | 192 ++
.../java/edu/umd/marbl/mhap/utils/HitCounter.java | 55 +
.../java/edu/umd/marbl/mhap/utils/Interval.java | 81 +
.../edu/umd/marbl/mhap/utils/IntervalNode.java | 167 +
.../edu/umd/marbl/mhap/utils/IntervalTree.java | 168 +
.../marbl/mhap/utils/LimitedSizeCollection.java | 222 ++
.../umd/marbl/mhap/utils/MersenneTwisterFast.java | 1530 +++++++++
.../java/edu/umd/marbl/mhap/utils/PackageInfo.java | 87 +
src/main/java/edu/umd/marbl/mhap/utils/Pair.java | 101 +
.../edu/umd/marbl/mhap/utils/ParseOptions.java | 369 +++
.../marbl/mhap/utils/RandomSequenceGenerator.java | 156 +
.../java/edu/umd/marbl/mhap/utils/ReadBuffer.java | 45 +
.../edu/umd/marbl/mhap/utils/SortablePair.java | 63 +
src/main/java/edu/umd/marbl/mhap/utils/Utils.java | 661 ++++
src/main/resources/edu/umd/marbl/mhap/README | 1 +
.../edu/umd/marbl/mhap/matrix/score_matrix.txt | 25 +
73 files changed, 17472 insertions(+)
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..3038ef8
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,8 @@
+/Utils$Pair.class
+/Utils$ToProtein.class
+/Utils$Translate.class
+/Utils.class
+/buildMulti.class
+/bin
+/target
+/classes/
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..24074a5
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,204 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright 2012 Konstantin Berlin
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+
diff --git a/NOTICE.txt b/NOTICE.txt
new file mode 100644
index 0000000..7673893
--- /dev/null
+++ b/NOTICE.txt
@@ -0,0 +1 @@
+Nothing here
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..003d328
--- /dev/null
+++ b/README.md
@@ -0,0 +1,19 @@
+# MHAP
+
+MinHash alignment process (MHAP pronounced MAP): locality sensitive hashing to detect overlaps and utilities. This is the development branch, please use the [latest tagged](https://github.com/marbl/MHAP/releases/tag/v1.0).
+
+## Build
+
+You must have a recent [JDK](http://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html "JDK") and [Apache ANT](http://ant.apache.org/ "ANT") available. To checkout and build run:
+
+ git clone https://github.com/marbl/MHAP.git
+ cd MHAP
+ ant
+
+For a quick user-quide, run:
+
+ cd target
+ java -jar mhap-1.0.jar
+
+## Docs
+For the full documentation information please see http://mhap.readthedocs.org/en/
diff --git a/build.xml b/build.xml
new file mode 100755
index 0000000..0570315
--- /dev/null
+++ b/build.xml
@@ -0,0 +1,377 @@
+<?xml version = '1.0' encoding = 'utf-8'?>
+
+<project name="MHAP" default="main" basedir=".">
+
+ <!-- ========== Component Declarations ==================================== -->
+
+ <property name="component.version" value="1.6"/>
+
+ <!-- The name of this component -->
+ <property name="copyright.name" value="Konstantin Berlin and Sergey Koren"/>
+
+ <!-- The name of this component -->
+ <property name="component.name" value="mhap"/>
+
+ <!-- The primary package name of this component -->
+ <property name="component.package" value="edu.umd.marbl.mhap"/>
+
+ <!-- The title of this component -->
+ <property name="component.title" value="MHAP"/>
+
+ <!-- The base directory for component sources -->
+ <property name="source.home" value="src/main/java"/>
+
+ <!-- The base directory for component resources -->
+ <property name="source.resources" value="src/main/resources"/>
+
+ <!-- The base directory for opencl files -->
+ <property name="source.opencl" value="src/main/opencl"/>
+
+ <!-- The base directory for matlab files -->
+ <property name="source.matlab" value="src/main/matlab"/>
+
+ <!-- The base directory for cc files -->
+ <property name="source.cc" value="src/main/cc"/>
+
+ <!-- The base directory for unit test sources -->
+ <property name="test.home" value="src/test/java"/>
+
+ <!-- The base directory for unit test resources -->
+ <property name="test.resources" value="src/test/resources"/>
+
+ <!-- The base directory for compilation targets -->
+ <property name="build.home" value="target"/>
+
+ <!-- The base directory for distribution targets -->
+ <property name="dist.home" value="${build.home}/dist"/>
+
+ <!-- The directory for library containing the dependency jar files -->
+ <property name="lib.dir" value="lib"/>
+
+ <!-- The directory that contains the binary files -->
+ <property name="bin.dir" value="bin"/>
+
+ <!-- The base directory for test reports -->
+ <property name="test.reports" value="${build.home}/test-reports"/>
+
+ <!-- Base file name for dist files -->
+ <property name="final.name" value="${component.name}-${component.version}"/>
+
+ <!-- Directory where binary release files are staged -->
+ <property name="stage.bin.dir" value="${dist.home}/stage-bin"/>
+
+ <!-- Directory where source release files are staged -->
+ <property name="stage.src.dir" value="${dist.home}/stage-src"/>
+
+ <!-- ========== Compiler Defaults ========================================= -->
+
+ <!-- Should Java compilations set the 'debug' compiler option? -->
+ <property name="compile.debug" value="false"/>
+
+ <!-- Should Java compilations set the 'deprecation' compiler option? -->
+ <property name="compile.deprecation" value="false"/>
+
+ <!-- Should Java compilations set the 'optimize' compiler option? -->
+ <property name="compile.optimize" value="true"/>
+
+ <!-- JDK level -->
+ <property name="compile.source" value="1.8"/>
+ <property name="compile.target" value="1.8"/>
+
+ <!-- Base compile classpath -->
+
+ <!-- Generate path for binary distrubtion dependencies -->
+ <path id="compile.classpath">
+ <fileset dir="${lib.dir}" includes="**/*.jar"/>
+ <pathelement location="${build.home}/classes"/>
+ </path>
+
+ <!-- ========== Test Execution Defaults =================================== -->
+
+
+ <!-- Construct unit test classpath -->
+ <path id="test.classpath">
+ <fileset dir="${lib.dir}" includes="**/*.jar"/>
+ <pathelement location="${build.home}/classes"/>
+ <pathelement location="${build.home}/test-classes"/>
+ <pathelement location="${junit.jar}"/>
+ </path>
+
+ <!-- Should the build fail if there are test failures? -->
+ <property name="test.failonerror" value="false"/>
+
+ <!-- ========== Executable Targets ======================================== -->
+
+ <target name="clean" description="Clean build and distribution directories">
+ <delete dir="${build.home}"/>
+ </target>
+
+
+ <target name="init"
+ description="Initialize and evaluate conditionals">
+ <echo message="-------- ${component.title} ${component.version} --------"/>
+ <filter token="name" value="${component.name}"/>
+ <filter token="package" value="${component.package}"/>
+ <filter token="version" value="${component.version}"/>
+ <filter token="compilesource" value="${compile.source}"/>
+ <filter token="compiletarget" value="${compile.target}"/>
+ <tstamp/>
+ <mkdir dir="${build.home}"/>
+ <mkdir dir="${build.home}/classes"/>
+ <mkdir dir="${build.home}/classes/${bin.dir}"/>
+ <mkdir dir="${build.home}/classes/properties"/>
+ <mkdir dir="${build.home}/test-classes"/>
+ <copy todir="${build.home}/classes/">
+ <fileset dir="${source.resources}" />
+ </copy>
+ </target>
+
+ <!-- ========== Build Info File =========================================== -->
+ <target name="buildinfo">
+ <tstamp>
+ <format property="builtat" pattern="MM/dd/yyyy hh:mm aa" timezone="America/New_York"/>
+ </tstamp>
+
+ <propertyfile file="${build.home}/classes/properties/mhap.properties"
+ comment="This file is automatically generated - DO NOT EDIT">
+ <entry key="buildtime" value="${builtat}"/>
+ <entry key="builder" value="${whoami}"/>
+ <entry key="component.version" value="${component.version}"/>
+ <entry key="system" value="${buildsystem}"/>
+ </propertyfile>
+ </target>
+
+ <!-- ========== Compile Targets =========================================== -->
+
+ <target name="compile" depends="init,copy-lib,buildinfo" description="Compile">
+
+ <javac srcdir="${source.home}"
+ destdir="${build.home}/classes"
+ source="${compile.source}"
+ target="${compile.target}"
+ debug="${compile.debug}"
+ deprecation="${compile.deprecation}"
+ includeantruntime="false"
+ optimize="${compile.optimize}">
+ <classpath refid="compile.classpath"/>
+ </javac>
+ </target>
+
+
+ <!-- ========== Unit Test Targets ========================================= -->
+
+ <target name="compile.tests" depends="compile" description="Compile unit tests.">
+
+ <javac srcdir="${test.home}"
+ destdir="${build.home}/test-classes"
+ source="${compile.source}"
+ target="${compile.target}"
+ debug="${compile.debug}"
+ deprecation="${compile.deprecation}"
+ includeantruntime="false"
+ optimize="${compile.optimize}">
+ <classpath refid="test.classpath"/>
+ </javac>
+
+ <copy todir="${build.home}/test-classes">
+ <fileset dir="${test.resources}">
+ </fileset>
+ </copy>
+
+ <copy todir="${build.home}">
+ <fileset dir="${source.matlab}" />
+ </copy>
+
+ </target>
+
+ <target name="test" depends="compile.tests"
+ description="Run unit tests">
+ <mkdir dir="${test.reports}"/>
+ <junit printsummary="true"
+ errorProperty="test.failed"
+ failureProperty="test.failed"
+ fork="true"
+ showOutput="true">
+ <formatter type="plain"/>
+ <classpath refid="test.classpath"/>
+ <!-- If test.entry is defined, run a single test, otherwise run all valid tests -->
+ <!-- N.B. test.entry must be the full path to the test class, for example:
+ -->
+ <test name="${test.entry}" todir="${test.reports}" if="test.entry"/>
+ <batchtest todir="${test.reports}" unless="test.entry">
+ <fileset dir="${test.home}">
+ <include name="**/*Test.java"/>
+ </fileset>
+ </batchtest>
+ </junit>
+ <fail message="There were test failures.">
+ <condition>
+ <and>
+ <istrue value="${test.failonerror}"/>
+ <isset property="test.failed"/>
+ </and>
+ </condition>
+ </fail>
+ </target>
+
+
+
+ <!-- ========== Produce JavaDocs ========================================== -->
+
+ <target name="javadoc" depends="compile" description="Create component Javadoc documentation">
+ <mkdir dir="${build.home}/apidocs"/>
+ <tstamp>
+ <format property="current.year" pattern="yyyy"/>
+ </tstamp>
+ <javadoc sourcepath="${source.home}"
+ destdir="${build.home}/apidocs"
+ packagenames="edu.umd.umiacs.armor.*"
+ author="true"
+ private="true"
+ version="true"
+ doctitle="<h1>${component.title} ${}</h1>"
+ windowtitle="${component.title} ${}"
+ bottom="Copyright (c) 2011-${current.year} ${copyright.name}"
+ classpathref="compile.classpath">
+ <link href="http://java.sun.com/j2se/1.6.0/docs/api/"/>
+ </javadoc>
+ </target>
+
+
+ <!-- ========== Create Jar ================================================ -->
+
+ <target name="jar" depends="compile" description="Create jar file">
+
+ <copy file="LICENSE.txt" tofile="${build.home}/classes/META-INF/LICENSE.txt"/>
+ <copy file="NOTICE.txt" tofile="${build.home}/classes/META-INF/NOTICE.txt"/>
+
+ <manifest file="${build.home}/MANIFEST.MF">
+ <attribute name="Specification-Title" value="${component.title}"/>
+ <attribute name="Specification-Version" value="${}"/>
+ <attribute name="Specification-Vendor" value="${copyright.name}"/>
+ <attribute name="Implementation-Title" value="${component.title}"/>
+ <attribute name="Implementation-Version" value="${}"/>
+ <attribute name="Implementation-Vendor" value="${copyright.name}"/>
+ <attribute name="Implementation-Vendor-Id" value=""/>
+ <attribute name="X-Compile-Source-JDK" value="${compile.source}"/>
+ <attribute name="X-Compile-Target-JDK" value="${compile.target}"/>
+ </manifest>
+
+ <jar jarfile="${build.home}/${final.name}.jar"
+ basedir="${build.home}/classes"
+ manifest="${build.home}/MANIFEST.MF"/>
+ </target>
+
+
+ <!-- ========== Distribution Target =========================================== -->
+
+ <target name="dist" depends="clean,jar,javadoc" description="Create distribution artifacts">
+
+ <mkdir dir="${dist.home}"/>
+
+ <!-- jar(s) -->
+ <copy todir="${dist.home}">
+ <fileset dir=".">
+ <include name="RELEASE-NOTES.txt"/>
+ </fileset>
+ <fileset dir="${build.home}">
+ <include name="*.jar"/>
+ </fileset>
+ </copy>
+
+ <!-- Binary Distro -->
+ <mkdir dir="${stage.bin.dir}/${final.name}"/>
+ <copy todir="${stage.bin.dir}/${final.name}">
+ <fileset dir=".">
+ <include name="LICENSE.txt"/>
+ <include name="NOTICE.txt"/>
+ </fileset>
+ <fileset dir="${build.home}">
+ <include name="*.jar"/>
+ </fileset>
+ </copy>
+ <copy todir="${stage.bin.dir}/${final.name}/apidocs">
+ <fileset dir="${build.home}/apidocs" />
+ </copy>
+
+ <!-- Source Distro -->
+ <mkdir dir="${stage.src.dir}/${final.name}-src"/>
+ <copy todir="${stage.src.dir}/${final.name}-src">
+ <fileset dir=".">
+ <include name="*.xml"/>
+ <include name="*.txt"/>
+ <include name="*.html"/>
+ </fileset>
+ </copy>
+ <copy todir="${stage.src.dir}/${final.name}-src/src">
+ <fileset dir="src" excludes="mantissa/**,experimental/**" />
+ </copy>
+ <zip zipfile="${dist.home}/${final.name}.zip" basedir="${stage.bin.dir}"/>
+ <zip zipfile="${dist.home}/${final.name}-src.zip" basedir="${stage.src.dir}"/>
+ <tar tarfile="${dist.home}/${final.name}.tar" basedir="${stage.bin.dir}" longfile="gnu"/>
+ <tar tarfile="${dist.home}/${final.name}-src.tar" basedir="${stage.src.dir}" longfile="gnu"/>
+ <gzip src="${dist.home}/${final.name}.tar" zipfile="${dist.home}/${final.name}.tar.gz"/>
+ <gzip src="${dist.home}/${final.name}-src.tar" zipfile="${dist.home}/${final.name}-src.tar.gz"/>
+
+ <!-- clean up staging directories -->
+ <delete dir="${stage.bin.dir}"/>
+ <delete dir="${stage.src.dir}"/>
+
+ </target>
+
+ <property name="params" value=""/>
+ <property name="javac.exe" value="javac"/>
+ <property name="java.exe" value="java"/>
+
+ <target name="copy-lib">
+ <mkdir dir="${build.home}/${lib.dir}"/>
+ <copy todir="${build.home}/${lib.dir}">
+ <fileset dir="${lib.dir}"/>
+ </copy>
+ </target>
+
+ <path id="dep.runtime">
+ <fileset dir="./${lib.dir}">
+ <include name="**/*.jar" />
+ </fileset>
+ </path>
+ <property name="dep_cp" value="${toString:dep.runtime}" />
+
+ <target name="main" depends="jar-mhap">
+ </target>
+
+ <target name="jar-mhap" depends="clean,compile">
+
+ <copy file="LICENSE.txt" tofile="${build.home}/classes/META-INF/LICENSE.txt"/>
+ <copy file="NOTICE.txt" tofile="${build.home}/classes/META-INF/NOTICE.txt"/>
+
+ <manifestclasspath property="lib.list" jarfile="mhap-${component.version}.jar">
+ <classpath refid="compile.classpath"/>
+ </manifestclasspath>
+
+ <manifest file="${build.home}/MANIFEST.MF">
+ <attribute name="Specification-Title" value="${component.title}"/>
+ <attribute name="Specification-Version" value="${}"/>
+ <attribute name="Specification-Vendor" value="${copyright.name}"/>
+ <attribute name="Implementation-Title" value="${component.title}"/>
+ <attribute name="Implementation-Version" value="${}"/>
+ <attribute name="Implementation-Vendor" value="${copyright.name}"/>
+ <attribute name="Implementation-Vendor-Id" value=""/>
+ <attribute name="X-Compile-Source-JDK" value="${compile.source}"/>
+ <attribute name="X-Compile-Target-JDK" value="${compile.target}"/>
+ <attribute name="Main-Class" value="edu.umd.marbl.mhap.main.MhapMain" />
+ <attribute name="Class-Path" value="${lib.list}" />
+ </manifest>
+
+ <jar jarfile="${build.home}/mhap-${component.version}.jar"
+ basedir="${build.home}/classes"
+ manifest="${build.home}/MANIFEST.MF"/>
+
+ <tar destfile="${build.home}/mhap-${component.version}.tar"
+ basedir="${build.home}"
+ includes="${lib.dir}/**, mhap-${component.version}.jar"
+ />
+
+ </target>
+
+</project>
\ No newline at end of file
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..85957e3
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,177 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS =
+SPHINXBUILD = sphinx-build-2.7
+PAPER =
+BUILDDIR = build
+
+# User-friendly check for sphinx-build
+ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
+$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
+endif
+
+# Internal variables.
+PAPEROPT_a4 = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
+
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
+
+help:
+ @echo "Please use \`make <target>' where <target> is one of"
+ @echo " html to make standalone HTML files"
+ @echo " dirhtml to make HTML files named index.html in directories"
+ @echo " singlehtml to make a single large HTML file"
+ @echo " pickle to make pickle files"
+ @echo " json to make JSON files"
+ @echo " htmlhelp to make HTML files and a HTML help project"
+ @echo " qthelp to make HTML files and a qthelp project"
+ @echo " devhelp to make HTML files and a Devhelp project"
+ @echo " epub to make an epub"
+ @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+ @echo " latexpdf to make LaTeX files and run them through pdflatex"
+ @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
+ @echo " text to make text files"
+ @echo " man to make manual pages"
+ @echo " texinfo to make Texinfo files"
+ @echo " info to make Texinfo files and run them through makeinfo"
+ @echo " gettext to make PO message catalogs"
+ @echo " changes to make an overview of all changed/added/deprecated items"
+ @echo " xml to make Docutils-native XML files"
+ @echo " pseudoxml to make pseudoxml-XML files for display purposes"
+ @echo " linkcheck to check all external links for integrity"
+ @echo " doctest to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+ rm -rf $(BUILDDIR)/*
+
+html:
+ $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+ @echo
+ @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+ $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+ @echo
+ @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+singlehtml:
+ $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+ @echo
+ @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+pickle:
+ $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+ @echo
+ @echo "Build finished; now you can process the pickle files."
+
+json:
+ $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+ @echo
+ @echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+ $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+ @echo
+ @echo "Build finished; now you can run HTML Help Workshop with the" \
+ ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+ $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+ @echo
+ @echo "Build finished; now you can run "qcollectiongenerator" with the" \
+ ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+ @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/MinHashAlignmentProcessMHAP.qhcp"
+ @echo "To view the help file:"
+ @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/MinHashAlignmentProcessMHAP.qhc"
+
+devhelp:
+ $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+ @echo
+ @echo "Build finished."
+ @echo "To view the help file:"
+ @echo "# mkdir -p $$HOME/.local/share/devhelp/MinHashAlignmentProcessMHAP"
+ @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/MinHashAlignmentProcessMHAP"
+ @echo "# devhelp"
+
+epub:
+ $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+ @echo
+ @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+latex:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo
+ @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+ @echo "Run \`make' in that directory to run these through (pdf)latex" \
+ "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo "Running LaTeX files through pdflatex..."
+ $(MAKE) -C $(BUILDDIR)/latex all-pdf
+ @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+latexpdfja:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo "Running LaTeX files through platex and dvipdfmx..."
+ $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
+ @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+text:
+ $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+ @echo
+ @echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+man:
+ $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+ @echo
+ @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+texinfo:
+ $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+ @echo
+ @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+ @echo "Run \`make' in that directory to run these through makeinfo" \
+ "(use \`make info' here to do that automatically)."
+
+info:
+ $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+ @echo "Running Texinfo files through makeinfo..."
+ make -C $(BUILDDIR)/texinfo info
+ @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+gettext:
+ $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+ @echo
+ @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+changes:
+ $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+ @echo
+ @echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+ $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+ @echo
+ @echo "Link check complete; look for any errors in the above output " \
+ "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+ $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+ @echo "Testing of doctests in the sources finished, look at the " \
+ "results in $(BUILDDIR)/doctest/output.txt."
+
+xml:
+ $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
+ @echo
+ @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
+
+pseudoxml:
+ $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
+ @echo
+ @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000..f8d1c1a
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,242 @@
+ at ECHO OFF
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+)
+set BUILDDIR=build
+set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
+set I18NSPHINXOPTS=%SPHINXOPTS% source
+if NOT "%PAPER%" == "" (
+ set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
+ set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
+)
+
+if "%1" == "" goto help
+
+if "%1" == "help" (
+ :help
+ echo.Please use `make ^<target^>` where ^<target^> is one of
+ echo. html to make standalone HTML files
+ echo. dirhtml to make HTML files named index.html in directories
+ echo. singlehtml to make a single large HTML file
+ echo. pickle to make pickle files
+ echo. json to make JSON files
+ echo. htmlhelp to make HTML files and a HTML help project
+ echo. qthelp to make HTML files and a qthelp project
+ echo. devhelp to make HTML files and a Devhelp project
+ echo. epub to make an epub
+ echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
+ echo. text to make text files
+ echo. man to make manual pages
+ echo. texinfo to make Texinfo files
+ echo. gettext to make PO message catalogs
+ echo. changes to make an overview over all changed/added/deprecated items
+ echo. xml to make Docutils-native XML files
+ echo. pseudoxml to make pseudoxml-XML files for display purposes
+ echo. linkcheck to check all external links for integrity
+ echo. doctest to run all doctests embedded in the documentation if enabled
+ goto end
+)
+
+if "%1" == "clean" (
+ for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
+ del /q /s %BUILDDIR%\*
+ goto end
+)
+
+
+%SPHINXBUILD% 2> nul
+if errorlevel 9009 (
+ echo.
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+ echo.installed, then set the SPHINXBUILD environment variable to point
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
+ echo.may add the Sphinx directory to PATH.
+ echo.
+ echo.If you don't have Sphinx installed, grab it from
+ echo.http://sphinx-doc.org/
+ exit /b 1
+)
+
+if "%1" == "html" (
+ %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The HTML pages are in %BUILDDIR%/html.
+ goto end
+)
+
+if "%1" == "dirhtml" (
+ %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
+ goto end
+)
+
+if "%1" == "singlehtml" (
+ %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
+ goto end
+)
+
+if "%1" == "pickle" (
+ %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished; now you can process the pickle files.
+ goto end
+)
+
+if "%1" == "json" (
+ %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished; now you can process the JSON files.
+ goto end
+)
+
+if "%1" == "htmlhelp" (
+ %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished; now you can run HTML Help Workshop with the ^
+.hhp project file in %BUILDDIR%/htmlhelp.
+ goto end
+)
+
+if "%1" == "qthelp" (
+ %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished; now you can run "qcollectiongenerator" with the ^
+.qhcp project file in %BUILDDIR%/qthelp, like this:
+ echo.^> qcollectiongenerator %BUILDDIR%\qthelp\MinHashAlignmentProcessMHAP.qhcp
+ echo.To view the help file:
+ echo.^> assistant -collectionFile %BUILDDIR%\qthelp\MinHashAlignmentProcessMHAP.ghc
+ goto end
+)
+
+if "%1" == "devhelp" (
+ %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished.
+ goto end
+)
+
+if "%1" == "epub" (
+ %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The epub file is in %BUILDDIR%/epub.
+ goto end
+)
+
+if "%1" == "latex" (
+ %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
+ goto end
+)
+
+if "%1" == "latexpdf" (
+ %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+ cd %BUILDDIR%/latex
+ make all-pdf
+ cd %BUILDDIR%/..
+ echo.
+ echo.Build finished; the PDF files are in %BUILDDIR%/latex.
+ goto end
+)
+
+if "%1" == "latexpdfja" (
+ %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+ cd %BUILDDIR%/latex
+ make all-pdf-ja
+ cd %BUILDDIR%/..
+ echo.
+ echo.Build finished; the PDF files are in %BUILDDIR%/latex.
+ goto end
+)
+
+if "%1" == "text" (
+ %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The text files are in %BUILDDIR%/text.
+ goto end
+)
+
+if "%1" == "man" (
+ %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The manual pages are in %BUILDDIR%/man.
+ goto end
+)
+
+if "%1" == "texinfo" (
+ %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
+ goto end
+)
+
+if "%1" == "gettext" (
+ %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
+ goto end
+)
+
+if "%1" == "changes" (
+ %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.The overview file is in %BUILDDIR%/changes.
+ goto end
+)
+
+if "%1" == "linkcheck" (
+ %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Link check complete; look for any errors in the above output ^
+or in %BUILDDIR%/linkcheck/output.txt.
+ goto end
+)
+
+if "%1" == "doctest" (
+ %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Testing of doctests in the sources finished, look at the ^
+results in %BUILDDIR%/doctest/output.txt.
+ goto end
+)
+
+if "%1" == "xml" (
+ %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The XML files are in %BUILDDIR%/xml.
+ goto end
+)
+
+if "%1" == "pseudoxml" (
+ %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
+ goto end
+)
+
+:end
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 0000000..e4f9473
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,261 @@
+# -*- coding: utf-8 -*-
+#
+# MinHash Alignment Process (MHAP) documentation build configuration file, created by
+# sphinx-quickstart on Sun Jul 13 18:13:46 2014.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys
+import os
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#sys.path.insert(0, os.path.abspath('.'))
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+ 'sphinx.ext.todo',
+ 'sphinx.ext.mathjax',
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'MinHash Alignment Process (MHAP)'
+copyright = u'2014, Sergey Koren and Konstantin Berlin'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '1.0'
+# The full version, including alpha/beta/rc tags.
+release = '1.0'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = []
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+#keep_warnings = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+html_theme = 'default'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further. For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+#html_theme_path = []
+
+# The name for this set of Sphinx documents. If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar. Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+#html_extra_path = []
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_domain_indices = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it. The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'MinHashAlignmentProcessMHAPdoc'
+
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+# The paper size ('letterpaper' or 'a4paper').
+#'papersize': 'letterpaper',
+
+# The font size ('10pt', '11pt' or '12pt').
+#'pointsize': '10pt',
+
+# Additional stuff for the LaTeX preamble.
+#'preamble': '',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+# author, documentclass [howto, manual, or own class]).
+latex_documents = [
+ ('index', 'MinHashAlignmentProcessMHAP.tex', u'MinHash Alignment Process (MHAP) Documentation',
+ u'Sergey Koren and Konstantin Berlin', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+ ('index', 'minhashalignmentprocessmhap', u'MinHash Alignment Process (MHAP) Documentation',
+ [u'Sergey Koren and Konstantin Berlin'], 1)
+]
+
+# If true, show URL addresses after external links.
+#man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+# dir menu entry, description, category)
+texinfo_documents = [
+ ('index', 'MinHashAlignmentProcessMHAP', u'MinHash Alignment Process (MHAP) Documentation',
+ u'Sergey Koren and Konstantin Berlin', 'MinHashAlignmentProcessMHAP', 'One line description of project.',
+ 'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#texinfo_appendices = []
+
+# If false, no module index is generated.
+#texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+#texinfo_no_detailmenu = False
diff --git a/docs/source/contact.rst b/docs/source/contact.rst
new file mode 100644
index 0000000..9f4c351
--- /dev/null
+++ b/docs/source/contact.rst
@@ -0,0 +1,28 @@
+############
+Contact
+############
+
+Bugs, feature requests, comments:
+================================
+
+If you encounter any problems/bugs, please check the known issues pages::
+
+https://github.com/marbl/MHAP/issues
+
+If not, please report the issue either using the contact information below or
+by submitting a new issue online.
+
+Please include information on your run::
+
+ 1) any output produced by MHAP
+
+ 3) sample data, if possible, to reproduce the issue
+
+Who to contact to report bugs, forward complaints, feature requests:
+
+Konstantin Berlin: kberlin at umd.edu
+----------------------------
+
+Sergey Koren: sergek at umd.edu
+----------------------------
+
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 0000000..efc07ad
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,23 @@
+=============================================================================
+MinHash Alignment Process (MHAP): a probabilistic sequence overlap algorithm.
+=============================================================================
+
+=================
+Overview
+=================
+MHAP (pronounced MAP) is a reference implementation of a probabilistic
+sequence overlapping algorithm. Designed to efficiently detect all overlaps
+between noisy long-read sequence data. It efficiently estimates Jaccard similarity
+by compressing sequences to their representative fingerprints composed on min-mers (minimum k-mer).
+
+MHAP is included within the Celera Assembler `PBcR <http://wgs-assembler.sourceforge.net/wiki/index.php?title=PBcR>`_ pipeline. The Celera Assembler can be downloaded `here <https://sourceforge.net/projects/wgs-assembler/files/wgs-assembler/wgs-8.3/>`_.
+
+Contents:
+
+.. toctree::
+ :maxdepth: 2
+
+ installation
+ quickstart
+ utilities
+ contact
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
new file mode 100644
index 0000000..a93c238
--- /dev/null
+++ b/docs/source/installation.rst
@@ -0,0 +1,85 @@
+############
+Installation
+############
+
+Before your start
+=================
+MHAP requires a recent version of the `JVM <http://www.oracle.com/technetwork/java/javase/downloads/jre8-downloads-2133155.html>`_ (1.8+). JDK 1.7 or earlier will not work. If you would like to build the code from source, you need to have the `JDK <http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html>`_ and the `ANT <http://ant.apache.org/>`_ build system available.
+
+Prerequisites
+==============
+ * java (1.8+)
+ * ant (1.8.2+)
+
+Here is a list of currently supported Operating Systems:
+
+1. Mac OSX (10.7 or newer)
+2. Linux 64-bit (tested on CentOS, Fedora, RedHat, OpenSUSE and Ubuntu)
+3. Windows (XP or newer)
+
+Installation
+======================
+Pre-compiled
+-----------------
+
+The pre-compiled version is recommended to users who want to run MHAP, without doing development. To download a pre-compiled tar run:
+
+.. code-block:: bash
+
+ $ wget https://github.com/marbl/MHAP/releases/download/1.6/mhap-1.6.tar.gz
+
+And if ``wget`` not available, you can use ``curl`` instead:
+
+.. code-block:: bash
+
+ $ curl -L https://github.com/marbl/MHAP/releases/download/1.6/mhap-1.6.tar.gz > mhap-1.6.tar.gz
+
+Then run
+
+.. code-block:: bash
+
+ $ tar xvzf mhap-1.6.tar.gz
+
+Source
+-----------------
+
+To build the code from the release:
+
+.. code-block:: bash
+
+ $ wget https://github.com/marbl/MHAP/archive/1.6.zip
+
+If you see a certificate not trusted error, you can add the following option to wget:
+
+.. code-block:: bash
+
+ $ --no-check-certificate
+
+And if ``wget`` not available, you can use ``curl`` instead:
+
+.. code-block:: bash
+
+ $ curl -L https://github.com/marbl/MHAP/archive/1.6.zip > 1.6.zip
+
+You can also browse the https://github.com/marbl/MHAP/tree/1.6
+and click on Downloads.
+
+Once downloaded, extract to unpack:
+
+.. code-block:: bash
+
+ $ unzip 1.6.zip
+
+Change to MHAP directory:
+
+.. code-block:: bash
+
+ $ cd MHAP-1.6
+
+Once inside the MHAP directory, run:
+
+.. code-block:: bash
+
+ $ ant
+
+This will compile the program and create a target/mhap-1.6.jar file which you can use to run MHAP. The quick-start instructions assume you are in the target directory when running the program. You can also use the target/mhap-1.6.tar file to copy MHAP to a different system or directory.
diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
new file mode 100644
index 0000000..018724c
--- /dev/null
+++ b/docs/source/quickstart.rst
@@ -0,0 +1,73 @@
+############
+Quick Start
+############
+
+Running MHAP
+-----------------
+
+Running MHAP provides command-line documenation if you run it without parameters. Assuming you have followed the `installation instructions <installation.html>`_ instructions, you can run:
+
+.. code-block:: bash
+
+ $ java -jar mhap-1.6.jar
+
+MHAP has two main usage modes, the main finds all overlaps between the input sequences. The second only constructs an index which can be subsequently reused.
+
+Finding overlaps
+-----------------
+
+.. code-block:: bash
+
+ $ java -Xmx32g -server -jar mhap-1.6.jar -s<fasta/dat from/self file> [-q<fasta/dat to file or directory>] [-f<kmer filter list, must be sorted>]
+
+Both the -s and -q options can accept either FastA sequences or binary dat files (generated as described below). The -q option can accept either a file or a directory, in which case all FastA/dat files in the specified directory will be used. By default, only the sequences specified by -s are indexed and the sequences in -q are streamed against the constructed index. Since MHAP is written in Java, the memory usage can be high. Generally, 32GB of RAM is sufficient to index 20K sequences. [...]
+
+The optional -f flag provides a file of repetitive k-mers which should not be selected as min-mers. The file is a two-column tab-delimited input specifying the kmer and the fraction of total kmers the k-mer comprises. For example:
+
+.. code-block:: bash
+
+ $ head kmers.ignore
+ GGGGGGGGGGGGG 0.0005
+
+means the k-mer GGGGGGGGGGG represents 0.05% of the k-mers in the dataset (so if there are 100,000 total k-mers, it occurs 50 times).
+
+Constructing binary index
+-----------------
+
+.. code-block:: bash
+
+ $ java -Xmx32g -server -jar mhap-1.6.jar -p<directory of fasta files> -q <output directory> [-f<kmer filter list, must be sorted>]
+
+In this use case, files in the -p directory will be converted to binary dat files in the -q directory. Subsequent runs using the dat files (instead of FastA files) will be faster as the sequences no longer need to be indexed, only loaded into memory.
+
+Output
+-----------------
+MHAP outputs overlaps in a format similar to BLASR's M4 format. Example output::
+
+ [A ID] [B ID] [Jaccard score] [# shared min-mers] [0=A fwd, 1=A rc] [A start] [A end] [A length] [0=B fwd, 1=B rc] [B start] [B end] [B length]
+
+An example of output from a small dataset is below::
+
+ 155 11 87.83225 206 0 69 1693 1704 0 1208 2831 5871
+ 155 15 85.08692 163 0 16 1041 1704 1 67 1088 2935
+ 155 27 87.11507 159 0 455 1678 1704 0 0 1225 1862
+
+In this case sequence 155 overlaps 11, 15, and 27.
+
+Options
+-----------------
+The full list of options is available via command-line help (--help or -h). Below is a list of commonly used options.
+
+ -k [int] K-mer size, default=16
+ --num-hashes [int] Sketch size, higher=more sensitive but more memory usage and runtime, default=1024
+ --num-min-matches [int] The number of hashes that maches before performing local alignment, default=3
+ --pacbio_fast [boolean] Set all the parameters for the PacBio fast setting. This is the current best guidance, and could change at any time without warning, default = false
+ --pacbio_sensitive [boolean] Set all the parameters for the PacBio sensitive settings. This is the current best guidance, and could change at any time without warning, default = false
+ --min-store-length [int length (in bp)] The minimum sequence length to index. Sequences shorter than this are ignored in the index, default=0
+ --threshold [int] The threshold for percentage of matching min-mers for a hit to be considered significant. Lowering will output more overlaps but increase false positives, higher will reduce overlaps but remove false positives, default=0.04
+ --filter-threshold [double] The cutoff at which the k-mer in the k-mer filter file is considered repetitive. This value for a specific k-mer is specified in the second column in the filter file. If no filter file is provided, this option is ignored, default = 1.0E-5
+ --max-shift [double] The fraction of the overlap size by which the overlap sizes in two sequences may differ, default=0.2
+ --num-threads [int] The number of threads to use for computation, default (2 x #cores on system)
+ --no-self Do not compute self-matches for sequences in the -s file, default=false
+ --store-full-id Output full sequence ID from the input FastA file. Otherwise, the output is the position of the sequence in the file (i.e. first sequence gets ID=1, second gets ID=2, and so on), default=false
+
diff --git a/docs/source/utilities.rst b/docs/source/utilities.rst
new file mode 100644
index 0000000..d58ed1e
--- /dev/null
+++ b/docs/source/utilities.rst
@@ -0,0 +1,36 @@
+############
+Utilities
+############
+
+Using MHAP extras
+-----------------
+
+In addition to the main overlapping algorithm, MHAP indcludes several utilities for validating overlaps and simulating data.
+
+Validating overlaps
+-----------------
+
+Assuming you have a mapping of sequences to a truth (such as a reference genome) in BLASR's M4 format, you can validate overlaps using MHAP's EstimateROC utility which will compute PPV/Sensitivity/Specificity:
+
+.. code-block:: bash
+
+ $ java -cp mhap-1.6.jar edu.umd.marbl.mhap.main.EstimateROC <reference mapping M4> <overlaps M4/MHAP> <fasta of sequences> [minimum overlap length to evaluate] [number of random trials] [use dynamic programming] [verbose]
+
+The default minimum overlap length is 2000 and default number of trials is 10000. This will estimate sensitivity/specificity to within 1%. It can be increased at the expense of runtime. Specifying 0 will examine all possible N^2 overlap pairs. If the dynamic programming is turned on (by typing true for the parameter), overlaps not present in the reference mapping will be confirmed if a Smith-Watermann alignment can identify the overlap specified.
+
+Simulating Data
+-----------------
+
+MHAP includes a tool to simulate sequencing data with random error as well as estimate Jaccard similarity for the simulated data.
+
+.. code-block:: bash
+
+ $ java -cp mhap-1.6.jar edu.umd.marbl.mhap.main.KmerStatSimulator <# sequences> <sequence length (bp)> <insertion error rate> <deletion error rate> <substitution error rate> [reference genome]
+
+The error rates must be between 0 and 1 and are additive. Specifying 10% insertion, 2% deletion, and 1% substitution will result in sequences with a 13% error rate. If no reference sequence is given, completely random sequences are generated and errors added. Otherwise, random sequences are drawn from the reference and errors added. Errors are added randomly with no bias.
+
+.. code-block:: bash
+
+ $ java -cp mhap-1.6.jar edu.umd.marbl.mhap.main.KmerStatSimulator <# trials> <kmer size> <sequence length> <overlap length> <insertion error rate> <deletion error rate> <substitution error rate> [one-sided error] [reference genome] [kmer filter]
+
+This usage will output a distribution of Jaccard similarity between a pair of overlapping sequences with the specified error rate (when using the specified k-mer size) and two random sequences of the same length. If no reference sequence is given, completely random sequences are generated and errors added, otherwise sequences are drawn from the reference. When one-sided error is specified (by typing true for the parameter), only one of the two sequences will have error simulated, matchin [...]
diff --git a/src/main/java/edu/umd/marbl/mhap/align/AlignElement.java b/src/main/java/edu/umd/marbl/mhap/align/AlignElement.java
new file mode 100644
index 0000000..3abc3db
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/align/AlignElement.java
@@ -0,0 +1,40 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.align;
+
+public interface AlignElement<S extends AlignElement<S>>
+{
+ public int length();
+ public double similarityScore(S e, int i, int j);
+
+ @Override
+ public String toString();
+ public String toString(int i);
+ public String toString(S match, int i, int j);
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/align/AlignElementDoubleSketch.java b/src/main/java/edu/umd/marbl/mhap/align/AlignElementDoubleSketch.java
new file mode 100644
index 0000000..a50c6ee
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/align/AlignElementDoubleSketch.java
@@ -0,0 +1,169 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.align;
+
+import edu.umd.marbl.mhap.impl.OverlapInfo;
+import edu.umd.marbl.mhap.sketch.Sketch;
+
+public final class AlignElementDoubleSketch<T extends Sketch<T>> implements AlignElement<AlignElementDoubleSketch<T>>
+{
+ private final T[] elements;
+ private final int seqLength;
+ private final int stepSize;
+
+ public AlignElementDoubleSketch(T[] sketchArray, int stepSize, int seqLength)
+ {
+ this.elements = sketchArray;
+ this.stepSize = stepSize;
+ this.seqLength = seqLength;
+ }
+
+ public OverlapInfo getOverlapInfo(Aligner<AlignElementDoubleSketch<T>> aligner, AlignElementDoubleSketch<T> b)
+ {
+ Alignment<AlignElementDoubleSketch<T>> alignment = localAlignOneSkip(aligner, b);
+
+ int a1 = alignment.getA1()*2;
+ int a2 = alignment.getA2()*2;
+ int b1 = alignment.getB1()*2;
+ int b2 = alignment.getB2()*2;
+
+ if (alignment.getScore()<0.0)
+ return new OverlapInfo(0.0, 0.0, 0, 0, 0, 0);
+
+ int offsetStart = similarityOffset(b, alignment.getA1(), alignment.getB1());
+ int offsetEnd = similarityOffset(b, alignment.getA2(), alignment.getB2());
+
+ if (offsetStart>0)
+ a1++;
+ else
+ if (offsetStart<0)
+ b1++;
+ if (offsetEnd>0)
+ a2++;
+ else
+ if (offsetEnd<0)
+ b2++;
+
+ a1 = a1*this.stepSize;
+ a2 = Math.min(getSequenceLength()-1, (a2*this.stepSize+this.stepSize-1));
+
+ b1 = b1*b.stepSize;
+ b2 = Math.min(b.getSequenceLength()-1, (b2*b.stepSize+b.stepSize-1));
+
+ double score = alignment.getScore();
+
+ //int overlapSize = Math.max(a2-a1, b2-b1);
+ //double relOverlapSize = (double)overlapSize/(double)this.stepSize;
+ //score = score/relOverlapSize;
+
+ return new OverlapInfo(score/100000.0, score, a1, a2, b1, b2);
+ }
+
+ public int getSequenceLength()
+ {
+ return this.seqLength;
+ }
+
+ public T getSketch(int index)
+ {
+ return this.elements[index];
+ }
+
+ public int getStepSize()
+ {
+ return this.stepSize;
+ }
+
+ @Override
+ public int length()
+ {
+ int val = this.elements.length/2;
+ if (this.elements.length%2!=0)
+ val++;
+
+ return val;
+ }
+
+ public Alignment<AlignElementDoubleSketch<T>> localAlignOneSkip(Aligner<AlignElementDoubleSketch<T>> aligner, AlignElementDoubleSketch<T> b)
+ {
+ return aligner.localAlignOneSkip(this, b);
+ }
+
+ @Override
+ public double similarityScore(AlignElementDoubleSketch<T> e, int i, int j)
+ {
+ double max = this.elements[2*i].similarity(e.elements[2*j]);
+
+ if ((2*i+1)<this.elements.length)
+ max = Math.max(max, this.elements[2*i+1].similarity(e.elements[2*j]));
+ if ((2*j+1)<e.elements.length)
+ max = Math.max(max, this.elements[2*i].similarity(e.elements[2*j+1]));
+
+ return max;
+ }
+
+ private int similarityOffset(AlignElementDoubleSketch<T> e, int i, int j)
+ {
+ double max = this.elements[2*i].similarity(e.elements[2*j]);
+ int diff = 0;
+
+ if ((2*i+1)<this.elements.length)
+ {
+ double val = this.elements[2*i+1].similarity(e.elements[2*j]);
+ if (max<val)
+ {
+ max = val;
+ diff = 1;
+ }
+ }
+ if ((2*j+1)<e.elements.length)
+ {
+ double val = this.elements[2*i].similarity(e.elements[2*j+1]);
+ if (max<val)
+ {
+ max = val;
+ diff = -1;
+ }
+ }
+
+ return diff;
+ }
+
+ @Override
+ public String toString(AlignElementDoubleSketch<T> match, int i, int j)
+ {
+ return toString();
+ }
+
+ @Override
+ public String toString(int i)
+ {
+ return this.elements[i].toString();
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/align/AlignElementSketch.java b/src/main/java/edu/umd/marbl/mhap/align/AlignElementSketch.java
new file mode 100644
index 0000000..4538716
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/align/AlignElementSketch.java
@@ -0,0 +1,114 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.align;
+
+import edu.umd.marbl.mhap.impl.OverlapInfo;
+import edu.umd.marbl.mhap.sketch.Sketch;
+
+public final class AlignElementSketch<T extends Sketch<T>> implements AlignElement<AlignElementSketch<T>>
+{
+ private final T[] elements;
+ private final int seqLength;
+ private final int stepSize;
+
+ public AlignElementSketch(T[] sketchArray, int stepSize, int seqLength)
+ {
+ this.elements = sketchArray;
+ this.stepSize = stepSize;
+ this.seqLength = seqLength;
+ }
+
+ public OverlapInfo getOverlapInfo(Aligner<AlignElementSketch<T>> aligner, AlignElementSketch<T> b)
+ {
+ Alignment<AlignElementSketch<T>> alignment = localAlignOneSkip(aligner, b);
+
+ int a1 = alignment.getA1();
+ int a2 = alignment.getA2();
+ int b1 = alignment.getB1();
+ int b2 = alignment.getB2();
+
+ a1 = alignment.getA1()*this.stepSize;
+ a2 = Math.min(getSequenceLength()-1, alignment.getA2()*this.stepSize+this.stepSize-1);
+
+ b1 = alignment.getB1()*b.stepSize;
+ b2 = Math.min(b.getSequenceLength()-1, alignment.getB2()*b.stepSize+b.stepSize-1);
+
+ double score = alignment.getScore();
+
+ //int overlapSize = Math.max(a2-a1, b2-b1);
+ //double relOverlapSize = (double)overlapSize/(double)this.stepSize;
+ //score = score/relOverlapSize;
+
+ return new OverlapInfo(score/100000.0, score, a1, a2, b1, b2);
+ }
+
+ public int getSequenceLength()
+ {
+ return this.seqLength;
+ }
+
+ public T getSketch(int index)
+ {
+ return this.elements[index];
+ }
+
+ public int getStepSize()
+ {
+ return this.stepSize;
+ }
+
+ @Override
+ public int length()
+ {
+ return this.elements.length;
+ }
+
+ public Alignment<AlignElementSketch<T>> localAlignOneSkip(Aligner<AlignElementSketch<T>> aligner, AlignElementSketch<T> b)
+ {
+ return aligner.localAlignOneSkip(this, b);
+ }
+
+ @Override
+ public double similarityScore(AlignElementSketch<T> e, int i, int j)
+ {
+ return this.elements[i].similarity(e.elements[j]);
+ }
+
+ @Override
+ public String toString(AlignElementSketch<T> match, int i, int j)
+ {
+ return toString();
+ }
+
+ @Override
+ public String toString(int i)
+ {
+ return this.elements[i].toString();
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/align/AlignElementString.java b/src/main/java/edu/umd/marbl/mhap/align/AlignElementString.java
new file mode 100644
index 0000000..c478703
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/align/AlignElementString.java
@@ -0,0 +1,78 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.align;
+
+public class AlignElementString implements AlignElement<AlignElementString>
+{
+ private final double EXACT_MATCH_SCORE = 1.0;
+
+ private final double MISMATCH_SCORE = -1.0;
+ private final char[] s;
+
+ public AlignElementString(String s)
+ {
+ this.s = s.toCharArray();
+ }
+
+ @Override
+ public int length()
+ {
+ return s.length;
+ }
+
+ @Override
+ public double similarityScore(AlignElementString e, int i, int j)
+ {
+ if (this.s[i]==e.s[j])
+ return EXACT_MATCH_SCORE;
+ else
+ return MISMATCH_SCORE;
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#toString()
+ */
+ @Override
+ public String toString()
+ {
+ return new String(s);
+ }
+
+ @Override
+ public String toString(AlignElementString match, int i, int j)
+ {
+ return ""+s[i];
+ }
+
+ @Override
+ public String toString(int i)
+ {
+ return ""+s[i];
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/align/Aligner.java b/src/main/java/edu/umd/marbl/mhap/align/Aligner.java
new file mode 100644
index 0000000..3a03e88
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/align/Aligner.java
@@ -0,0 +1,320 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.align;
+
+import java.util.ArrayList;
+import java.util.Collections;
+
+import edu.umd.marbl.mhap.align.Alignment.Operation;
+
+public final class Aligner<S extends AlignElement<S>>
+{
+ private final float gapOpen;
+ private final float gapExtend;
+ private final boolean storePath;
+ private final float scoreOffset;
+
+ public Aligner(boolean storePath, double gapOpen, double gapExtend, double scoreOffset)
+ {
+ this.gapOpen = (float)gapOpen;
+ this.gapExtend = (float)gapExtend;
+ this.storePath = storePath;
+ this.scoreOffset = (float)scoreOffset;
+ }
+
+ /*
+ public Alignment<S> localAlignSmithWater(S a, S b)
+ {
+ if (a.length()==0 && b.length()==0)
+ return null;
+ else
+ if (a.length()==0 || b.length()==0)
+ return null;
+
+ float[][] scores = new float[a.length()+1][b.length()+1];
+
+ for (int i=1; i<=a.length(); i++)
+ for (int j=1; j<=b.length(); j++)
+ {
+ float hNext = scores[i-1][j-1]+Math.min(0.0f, (float)a.similarityScore(b, i-1, j-1));
+
+ float hDeletion = scores[i-1][j]+this.gapOpen;
+ float hInsertion = scores[i][j-1]+this.gapOpen;
+
+ //adjustments for end
+ //if (i==a.length())
+ // hInsertion = hInsertion-this.gapOpen;
+ //if (j==b.length())
+ // hDeletion = hDeletion-this.gapOpen;
+
+ float value = Math.max(Math.max(Math.max(0.0f, hNext), hDeletion), hInsertion);
+
+ scores[i][j] = value;
+ }
+
+ double bestValue = scores[a.length()-1][b.length()-1];
+ double score = bestValue/(double)Math.max(a.length(), b.length());
+
+ //if (a.length()<500)
+ // System.err.println(edu.umd.marbl.mhap.utils.Utils.toString(scores));
+
+ if (storePath)
+ {
+ //figure out the path
+ ArrayList<Alignment.Operation> backOperations = new ArrayList<>(a.length()+b.length());
+
+
+ int i = a.length();
+ int j = b.length();
+ while (i>0 || j>0)
+ {
+ if (i==0)
+ {
+ backOperations.add(Operation.DELETE);
+ j--;
+ }
+ else
+ if (j==0)
+ {
+ backOperations.add(Operation.INSERT);
+ i--;
+ }
+ else
+ if (scores[i-1][j-1]>=scores[i-1][j] && scores[i-1][j-1]>=scores[i][j-1])
+ {
+ backOperations.add(Operation.MATCH);
+ i--;
+ j--;
+ }
+ else
+ if (scores[i-1][j]>=scores[i-1][j-1])
+ {
+ backOperations.add(Operation.INSERT);
+ i--;
+ }
+ else
+ {
+ backOperations.add(Operation.DELETE);
+ j--;
+ }
+ }
+
+ return new Alignment<S>(a, b, score, this.gapOpen, backOperations);
+ }
+
+ return new Alignment<S>(a, b, score, this.gapOpen, null);
+ }
+ */
+
+ public Alignment<S> localAlignSmithWaterGotoh(S a, S b)
+ {
+ float[][] D = new float[a.length()+1][b.length()+1];
+ float[][] P = new float[a.length()+1][b.length()+1];
+ float[][] Q = new float[a.length()+1][b.length()+1];
+
+ for (int i=1; i<=a.length(); i++)
+ {
+ D[i][0] = 0.0f;
+ P[i][0] = Float.NEGATIVE_INFINITY;
+ Q[i][0] = Float.NEGATIVE_INFINITY;
+ }
+ for (int j=1; j<=b.length(); j++)
+ {
+ D[0][j] = 0.0f;
+ P[0][j] = Float.NEGATIVE_INFINITY;
+ Q[0][j] = Float.NEGATIVE_INFINITY;
+ }
+
+ float maxValue = 0.0f;
+ int maxI = 0;
+ int maxJ = 0;
+ for (int i=1; i<=a.length(); i++) {
+ for (int j=1; j<=b.length(); j++)
+ {
+ P[i][j] = Math.max(D[i-1][j]+this.gapOpen, P[i-1][j]+this.gapExtend);
+ Q[i][j] = Math.max(D[i][j-1]+this.gapOpen, Q[i][j-1]+this.gapExtend);
+
+ float score = D[i-1][j-1]+(float)a.similarityScore(b, i-1, j-1)+this.scoreOffset;
+
+ //compute the actual score
+ D[i][j] = Math.max(score, Math.max(P[i][j], Q[i][j]));
+
+ if (D[i][j] > maxValue) {
+ maxValue = D[i][j];
+ maxI = i;
+ maxJ = j;
+ }
+ }
+ }
+
+ float score = maxValue;
+
+ int a1 = 0;
+ int a2 = Math.max(0, maxI-1);
+ int b1 = 0;
+ int b2 = Math.max(0, maxJ-1);
+
+ if (storePath)
+ {
+ //figure out the path
+ ArrayList<Alignment.Operation> backOperations = new ArrayList<>(a.length()+b.length());
+ int i = a.length();
+ while (i > maxI) {
+ backOperations.add(Operation.DELETE);
+ i--;
+ }
+
+ i = maxI;
+ int j = maxJ;
+ while (i>0 && j>0)
+ {
+ if ((P[i][j]>=Q[i][j] && P[i][j]==D[i][j]) || j==0)
+ {
+ backOperations.add(Operation.DELETE);
+ i--;
+ }
+ else
+ if (Q[i][j]==D[i][j] || i==0)
+ {
+ backOperations.add(Operation.INSERT);
+ j--;
+ }
+ else
+ {
+ backOperations.add(Operation.MATCH);
+ i--;
+ j--;
+ }
+ }
+ a1 = i;
+ b1 = j;
+ while (i > 0) {
+ backOperations.add(Operation.DELETE);
+ i--;
+ }
+
+ //reverse the direction
+ Collections.reverse(backOperations);
+
+ return new Alignment<S>(a, b, a1, a2, b1, b2, score, this.gapOpen, backOperations);
+ }
+
+ return new Alignment<S>(a, b, a1, a2, b1, b2, score, this.gapOpen, null);
+ }
+
+ public Alignment<S> localAlignOneSkip(S a, S b)
+ {
+ float[][] D = new float[a.length()+1][b.length()+1];
+ float[][] P = new float[a.length()+1][b.length()+1];
+ float[][] S = new float[a.length()+1][b.length()+1];
+
+ float maxValue = 0.0f;
+ int maxI = 0;
+ int maxJ = 0;
+ for (int i=1; i<=a.length(); i++) {
+ for (int j=1; j<=b.length(); j++)
+ {
+ float sim = (float)a.similarityScore(b, i-1, j-1)+this.scoreOffset;
+
+ P[i][j] = Math.max(D[i-1][j]+this.gapOpen, D[i][j-1]+this.gapOpen);
+ D[i][j] = D[i-1][j-1]+sim;
+
+ S[i][j] = Math.max(P[i][j], D[i][j]);
+ if (i==a.length())
+ S[i][j] = Math.max(S[i][j], P[i][j-1]+this.gapOpen);
+ if (j==b.length())
+ S[i][j] = Math.max(S[i][j], P[i-1][j]+this.gapOpen);
+
+
+ if (S[i][j] > maxValue && (i==a.length() || j==b.length()))
+ {
+ maxValue = S[i][j];
+ maxI = i;
+ maxJ = j;
+ }
+ }
+ }
+
+ float score = maxValue;
+
+ int a1 = 0;
+ int a2 = Math.max(0, maxI-1);
+ int b1 = 0;
+ int b2 = Math.max(0, maxJ-1);
+
+ if (this.storePath)
+ {
+ //figure out the path
+ ArrayList<Alignment.Operation> backOperations = new ArrayList<>(a.length()+b.length());
+
+ int i = maxI;
+ int j = maxJ;
+ while (i>0 && j>0)
+ {
+ if (S[i][j]==D[i-1][j]+this.gapOpen)
+ {
+ backOperations.add(Operation.DELETE);
+ i--;
+ }
+ else
+ if (S[i][j]==D[i][j-1]+this.gapOpen)
+ {
+ backOperations.add(Operation.INSERT);
+ j--;
+ }
+ else
+ {
+ backOperations.add(Operation.MATCH);
+ i--;
+ j--;
+ }
+ }
+
+ a1 = i;
+ b1 = j;
+ while (i > 0)
+ {
+ backOperations.add(Operation.DELETE);
+ i--;
+ }
+ while (j > 0)
+ {
+ backOperations.add(Operation.INSERT);
+ j--;
+ }
+
+ //reverse the direction
+ Collections.reverse(backOperations);
+
+ return new Alignment<S>(a, b, a1, a2, b1, b2, score, this.gapOpen, backOperations);
+ }
+
+ return new Alignment<S>(a, b, a1, a2, b1, b2, score, this.gapOpen, null);
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/align/Alignment.java b/src/main/java/edu/umd/marbl/mhap/align/Alignment.java
new file mode 100644
index 0000000..353f7ed
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/align/Alignment.java
@@ -0,0 +1,289 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.align;
+
+import java.util.Iterator;
+import java.util.List;
+
+public final class Alignment<S extends AlignElement<S>>
+{
+ public enum Operation
+ {
+ MATCH,
+ INSERT,
+ DELETE;
+ }
+
+ private final double score;
+ //private final double gapOpen;
+ private final List<Operation> operations;
+ private final S a;
+ private final S b;
+ private int a1;
+ private int a2;
+ private int b1;
+ private int b2;
+
+ protected Alignment(S a, S b, int a1, int a2, int b1, int b2, double score, double gapOpen, List<Operation> operations)
+ {
+ this.score = score;
+ this.operations = operations;
+ this.a = a;
+ this.b = b;
+ this.a1 = a1;
+ this.a2 = a2;
+ this.b1 = b1;
+ this.b2 = b2;
+ //this.gapOpen = gapOpen;
+ }
+
+ public double getOverlapScore(int minMatches)
+ {
+ int i = 0;
+ int j = 0;
+
+ Iterator<Operation> iter = this.operations.iterator();
+
+ if (!iter.hasNext())
+ return 0.0;
+
+ //remove the start
+ Operation o = iter.next();
+ while (o==Operation.DELETE)
+ {
+ i++;
+
+ if (iter.hasNext())
+ o = iter.next();
+ else
+ return 0.0;
+ }
+ if (i==0)
+ {
+ while (o==Operation.INSERT)
+ {
+ if (iter.hasNext())
+ o = iter.next();
+ else
+ return 0.0;
+ }
+ }
+
+ double score = 0.0;
+ int count = 0;
+ while (true)
+ {
+ if (o == Operation.DELETE)
+ {
+ i++;
+ }
+ else
+ if (o == Operation.INSERT)
+ {
+ //count++;
+ j++;
+ }
+ else
+ {
+ score = score + a.similarityScore(b, i, j);
+ count++;
+ i++;
+ j++;
+ }
+
+ if (iter.hasNext())
+ o = iter.next();
+ else
+ break;
+ }
+
+ //System.err.println(this.operations);
+ //System.err.println("HI="+count+" "+minMatches);
+
+ if (count<minMatches)
+ return 0.0;
+
+ if (score<=0.0)
+ return 0.0;
+
+ return score/(double)(count);
+ }
+
+ protected List<Operation> getOperations()
+ {
+ return this.operations;
+ }
+
+ public double getScore()
+ {
+ return this.score;
+ }
+
+ public int getA1()
+ {
+ return this.a1;
+ }
+
+ public int getA2()
+ {
+ return this.a2;
+ }
+
+ public int getB1()
+ {
+ return this.b1;
+ }
+
+ public int getB2()
+ {
+ return this.b2;
+ }
+
+ public String outputAlignmentSelf()
+ {
+ StringBuilder str = new StringBuilder();
+
+ int i = 0;
+ int j = 0;
+
+ int count = 0;
+ while(i<a.length() || j<b.length())
+ {
+ Operation o;
+ if (count<this.operations.size())
+ o = this.operations.get(count);
+ else
+ if (i<a.length())
+ o = Operation.DELETE;
+ else
+ o = Operation.INSERT;
+
+ if (o == Operation.DELETE)
+ {
+ String aStr;
+ if (j>=b.length())
+ aStr = a.toString(i);
+ else
+ aStr = a.toString(b, i, j);
+
+ str.append(aStr);
+ i++;
+ }
+ else
+ if (o == Operation.INSERT)
+ {
+ String bStr;
+ if (i>=a.length())
+ bStr = b.toString(j);
+ else
+ bStr = b.toString(a, j, i);
+ for (int space=0; space<bStr.length(); space++)
+ str.append("-");
+ j++;
+ }
+ else
+ {
+ str.append(a.toString(b, i, j));
+ i++;
+ j++;
+ }
+
+ count++;
+ }
+
+ return str.toString();
+ }
+
+ public String outputAlignmentOther()
+ {
+ StringBuilder str = new StringBuilder();
+
+ int i = 0;
+ int j = 0;
+ int count = 0;
+ while(i<a.length() || j<b.length())
+ {
+ Operation o;
+ if (count<this.operations.size())
+ o = this.operations.get(count);
+ else
+ if (i<a.length())
+ o = Operation.DELETE;
+ else
+ o = Operation.INSERT;
+
+ if (o == Operation.INSERT)
+ {
+ String bStr;
+ if (i>=a.length())
+ bStr = b.toString(j);
+ else
+ bStr = b.toString(a, j, i);
+
+ str.append(bStr);
+ j++;
+ }
+ else
+ if (o == Operation.DELETE)
+ {
+ String aStr;
+ if (j>=b.length())
+ aStr = a.toString(i);
+ else
+ aStr = a.toString(b, i, j);
+
+ for (int space=0; space<aStr.length(); space++)
+ str.append("-");
+ i++;
+ }
+ else
+ {
+ str.append(b.toString(a, j, i));
+ i++;
+ j++;
+ }
+
+ count++;
+ }
+
+ return str.toString();
+ }
+
+ public String outputAlignment()
+ {
+ StringBuilder str = new StringBuilder();
+
+ str.append("Sequence 1: ");
+ str.append(outputAlignmentSelf()+"\n");
+ str.append("Sequence 2: ");
+ str.append(outputAlignmentOther()+"\n");
+
+ return str.toString();
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/impl/AbstractMatchSearch.java b/src/main/java/edu/umd/marbl/mhap/impl/AbstractMatchSearch.java
new file mode 100644
index 0000000..496a0ee
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/impl/AbstractMatchSearch.java
@@ -0,0 +1,342 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.impl;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+
+import edu.umd.marbl.mhap.utils.ReadBuffer;
+import edu.umd.marbl.mhap.utils.Utils;
+
+public abstract class AbstractMatchSearch
+{
+ private final AtomicLong matchesProcessed;
+ protected final int numThreads;
+
+ private final AtomicLong sequencesSearched;
+ private final boolean storeResults;
+
+ public final static int NUM_ELEMENTS_PER_OUTPUT = 20000;
+ protected final static BufferedWriter STD_OUT_BUFFER = new BufferedWriter(new OutputStreamWriter(System.out),
+ Utils.BUFFER_BYTE_SIZE);
+
+ public AbstractMatchSearch(int numThreads, boolean storeResults)
+ {
+ this.numThreads = numThreads;
+ this.storeResults = storeResults;
+ this.matchesProcessed = new AtomicLong();
+ this.sequencesSearched = new AtomicLong();
+ }
+
+ protected void addData(final SequenceSketchStreamer data)
+ {
+ // figure out number of cores
+ ExecutorService execSvc = Executors.newFixedThreadPool(this.numThreads);
+
+ final AtomicInteger counter = new AtomicInteger();
+ for (int iter = 0; iter < this.numThreads; iter++)
+ {
+ Runnable task = new Runnable()
+ {
+ @Override
+ public void run()
+ {
+ try
+ {
+ ReadBuffer buf = new ReadBuffer();
+ SequenceSketch seqHashes = data.dequeue(false, buf);
+ while (seqHashes != null)
+ {
+ addSequence(seqHashes);
+
+ int currCount = counter.incrementAndGet();
+ if (currCount % 5000 == 0)
+ System.err.println("Current # sequences stored: " + currCount + "...");
+
+ seqHashes = data.dequeue(false, buf);
+ }
+ }
+ catch (IOException e)
+ {
+ throw new MhapRuntimeException(e);
+ }
+ }
+ };
+
+ // enqueue the task
+ execSvc.execute(task);
+ }
+
+ // shutdown the service
+ execSvc.shutdown();
+ try
+ {
+ execSvc.awaitTermination(365L, TimeUnit.DAYS);
+ }
+ catch (InterruptedException e)
+ {
+ execSvc.shutdownNow();
+ throw new MhapRuntimeException("Unable to finish all tasks.");
+ }
+ }
+
+ protected abstract boolean addSequence(SequenceSketch seqHashes);
+
+ public ArrayList<MatchResult> findMatches()
+ {
+ // figure out number of cores
+ ExecutorService execSvc = Executors.newFixedThreadPool(this.numThreads);
+
+ // allocate the storage and get the list of valeus
+ final ArrayList<MatchResult> combinedList = new ArrayList<MatchResult>();
+ final ConcurrentLinkedQueue<SequenceId> seqList = new ConcurrentLinkedQueue<SequenceId>(
+ getStoredForwardSequenceIds());
+
+ // for each thread create a task
+ for (int iter = 0; iter < this.numThreads; iter++)
+ {
+ Runnable task = new Runnable()
+ {
+ @Override
+ public void run()
+ {
+ List<MatchResult> localMatches = new ArrayList<MatchResult>();
+
+ // get next sequence
+ SequenceId nextSequence = seqList.poll();
+
+ while (nextSequence != null)
+ {
+ SequenceSketch sequenceHashes = getStoredSequenceHash(nextSequence);
+
+ // only search the forward sequences
+ localMatches.addAll(findMatches(sequenceHashes, true));
+
+ // record search
+ AbstractMatchSearch.this.sequencesSearched.getAndIncrement();
+
+ // get next sequence
+ nextSequence = seqList.poll();
+
+ // output stored results
+ if (nextSequence == null || localMatches.size() >= NUM_ELEMENTS_PER_OUTPUT)
+ {
+ // count the number of matches
+ AbstractMatchSearch.this.matchesProcessed.getAndAdd(localMatches.size());
+
+ if (AbstractMatchSearch.this.storeResults)
+ {
+ // combine the results
+ synchronized (combinedList)
+ {
+ combinedList.addAll(localMatches);
+ }
+ }
+ else
+ outputResults(localMatches);
+
+ localMatches.clear();
+ }
+ }
+ }
+ };
+
+ // enqueue the task
+ execSvc.execute(task);
+ }
+
+ // shutdown the service
+ execSvc.shutdown();
+ try
+ {
+ execSvc.awaitTermination(365L, TimeUnit.DAYS);
+ }
+ catch (InterruptedException e)
+ {
+ execSvc.shutdownNow();
+ throw new MhapRuntimeException("Unable to finish all tasks.");
+ }
+
+ flushOutput();
+
+ return combinedList;
+ }
+
+ protected abstract List<MatchResult> findMatches(SequenceSketch hashes, boolean toSelf);
+
+ public ArrayList<MatchResult> findMatches(final SequenceSketchStreamer data) throws IOException
+ {
+ // figure out number of cores
+ ExecutorService execSvc = Executors.newFixedThreadPool(this.numThreads);
+
+ // allocate the storage and get the list of valeus
+ final ArrayList<MatchResult> combinedList = new ArrayList<MatchResult>();
+
+ // for each thread create a task
+ for (int iter = 0; iter < this.numThreads; iter++)
+ {
+ Runnable task = new Runnable()
+ {
+ @Override
+ public void run()
+ {
+ List<MatchResult> localMatches = new ArrayList<MatchResult>();
+
+ try
+ {
+ ReadBuffer buf = new ReadBuffer();
+
+ SequenceSketch sequenceHashes = data.dequeue(true, buf);
+
+ while (sequenceHashes != null)
+ {
+ // only search the forward sequences
+ localMatches.addAll(findMatches(sequenceHashes, false));
+
+ // record search
+ AbstractMatchSearch.this.sequencesSearched.getAndIncrement();
+
+ // get the sequence hashes
+ sequenceHashes = data.dequeue(true, buf);
+
+ // output stored results
+ if (sequenceHashes == null || localMatches.size() >= NUM_ELEMENTS_PER_OUTPUT)
+ {
+ // count the number of matches
+ AbstractMatchSearch.this.matchesProcessed.getAndAdd(localMatches.size());
+
+ if (AbstractMatchSearch.this.storeResults)
+ {
+ // combine the results
+ synchronized (combinedList)
+ {
+ combinedList.addAll(localMatches);
+ }
+ }
+ else
+ outputResults(localMatches);
+
+ localMatches.clear();
+ }
+ }
+ }
+ catch (IOException e)
+ {
+ throw new MhapRuntimeException(e);
+ }
+ }
+ };
+
+ // enqueue the task
+ execSvc.execute(task);
+ }
+
+ // shutdown the service
+ execSvc.shutdown();
+ try
+ {
+ execSvc.awaitTermination(365L, TimeUnit.DAYS);
+ }
+ catch (InterruptedException e)
+ {
+ execSvc.shutdownNow();
+ throw new MhapRuntimeException("Unable to finish all tasks.");
+ }
+
+ flushOutput();
+
+ return combinedList;
+ }
+
+ protected void flushOutput()
+ {
+ try
+ {
+ STD_OUT_BUFFER.flush();
+ }
+ catch (IOException e)
+ {
+ throw new MhapRuntimeException(e);
+ }
+ }
+
+ public long getMatchesProcessed()
+ {
+ return this.matchesProcessed.get();
+ }
+
+ /**
+ * @return the sequencesSearched
+ */
+ public long getNumberSequencesSearched()
+ {
+ return this.sequencesSearched.get();
+ }
+
+ public abstract List<SequenceId> getStoredForwardSequenceIds();
+
+ public abstract SequenceSketch getStoredSequenceHash(SequenceId id);
+
+ protected void outputResults(List<MatchResult> matches)
+ {
+ if (this.storeResults || matches.isEmpty())
+ return;
+
+ try
+ {
+ synchronized (STD_OUT_BUFFER)
+ {
+ for (MatchResult currResult : matches)
+ {
+ STD_OUT_BUFFER.write(currResult.toString());
+ STD_OUT_BUFFER.newLine();
+ }
+
+ STD_OUT_BUFFER.flush();
+ }
+ }
+ catch (IOException e)
+ {
+ throw new MhapRuntimeException(e);
+ }
+ }
+
+ public abstract int size();
+
+}
\ No newline at end of file
diff --git a/src/main/java/edu/umd/marbl/mhap/impl/FastaData.java b/src/main/java/edu/umd/marbl/mhap/impl/FastaData.java
new file mode 100644
index 0000000..2cf2f6d
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/impl/FastaData.java
@@ -0,0 +1,231 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.impl;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.Locale;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.atomic.AtomicLong;
+
+import edu.umd.marbl.mhap.utils.Utils;
+
+public class FastaData implements Cloneable
+{
+ private final BufferedReader fileReader;
+ private final int offset;
+ private String lastLine;
+ private AtomicLong numberProcessed;
+ private boolean readFullFile;
+ // length of sequences loaded
+ private final ConcurrentLinkedQueue<Sequence> sequenceList;
+
+ private static final String[] fastaSuffix = { "fna", "contigs", "contig", "final", "fasta", "fa" };
+
+ private FastaData(ConcurrentLinkedQueue<Sequence> seqList)
+ {
+ this.sequenceList = new ConcurrentLinkedQueue<Sequence>(seqList);
+ this.fileReader = null;
+ this.lastLine = null;
+ this.readFullFile = true;
+ this.numberProcessed = new AtomicLong(this.sequenceList.size());
+ this.offset = 0;
+ }
+
+ public FastaData(String file, int offset) throws IOException
+ {
+ try
+ {
+ this.fileReader = Utils.getFile(file, fastaSuffix);
+ }
+ catch (Exception e)
+ {
+ throw new MhapRuntimeException(e);
+ }
+
+ this.offset = offset;
+ this.lastLine = null;
+ this.readFullFile = false;
+ this.numberProcessed = new AtomicLong(0);
+ this.sequenceList = new ConcurrentLinkedQueue<Sequence>();
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see java.lang.Object#clone()
+ */
+ @Override
+ public synchronized FastaData clone()
+ {
+ // enqueue all the data
+ try
+ {
+ enqueueFullFile();
+ }
+ catch (IOException e)
+ {
+ throw new MhapRuntimeException(e);
+ }
+
+ return new FastaData(this.sequenceList);
+ }
+
+ public Sequence dequeue() throws IOException
+ {
+ Sequence seq;
+ synchronized (this.sequenceList)
+ {
+ if (this.sequenceList.isEmpty())
+ {
+ enqueueNextSequenceInFile();
+ }
+
+ // get the sequence
+ seq = this.sequenceList.poll();
+ }
+
+ return seq;
+ }
+
+ public void enqueueFullFile() throws IOException
+ {
+ while (enqueueNextSequenceInFile())
+ {
+ }
+ }
+
+ private boolean enqueueNextSequenceInFile() throws IOException
+ {
+ synchronized (this.fileReader)
+ {
+ if (this.readFullFile)
+ return false;
+
+ // try to read the next line
+ if (this.lastLine == null)
+ {
+ this.lastLine = this.fileReader.readLine();
+
+ // there is no next line
+ if (this.lastLine == null)
+ {
+ this.fileReader.close();
+ this.readFullFile = true;
+ return false;
+ }
+ }
+
+ // process the header
+ if (!this.lastLine.startsWith(">"))
+ throw new MhapRuntimeException("Next sequence does not start with >. Invalid format.");
+
+ // process the current header
+ String header = null;
+ if (SequenceId.STORE_FULL_ID)
+ header = this.lastLine.substring(1).split("[\\s,]+", 2)[0];
+
+ //read the first line of the sequence
+ this.lastLine = this.fileReader.readLine();
+
+ StringBuilder fastaSeq = new StringBuilder();
+ while (true)
+ {
+ if (this.lastLine == null || this.lastLine.startsWith(">"))
+ {
+ //generate sequence id
+ SequenceId id;
+ if (SequenceId.STORE_FULL_ID)
+ id = new SequenceId(this.numberProcessed.intValue() + this.offset + 1, true, header);
+ else
+ id = new SequenceId(this.numberProcessed.intValue() + this.offset + 1);
+
+ Sequence seq = new Sequence(fastaSeq.toString().toUpperCase(Locale.ENGLISH), id);
+
+ // enqueue sequence
+ this.sequenceList.add(seq);
+ this.numberProcessed.getAndIncrement();
+
+ if (this.lastLine == null)
+ {
+ this.fileReader.close();
+ this.readFullFile = true;
+ }
+
+ return true;
+ }
+
+ // append the last line
+ fastaSeq.append(this.lastLine);
+ this.lastLine = this.fileReader.readLine();
+ }
+ }
+
+ }
+
+ public int getNumberProcessed()
+ {
+ return this.numberProcessed.intValue();
+ }
+
+ public Sequence getSequence(SequenceId id)
+ {
+ if (id.isForward())
+ {
+ for (Sequence seq : this.sequenceList)
+ if (seq.getId().equals(id))
+ return seq;
+ }
+
+ id = id.complimentId();
+ for (Sequence seq : this.sequenceList)
+ if (seq.getId().equals(id))
+ return seq.getReverseCompliment();
+
+ return null;
+ }
+
+ public boolean isEmpty()
+ {
+ return this.sequenceList.isEmpty() && this.readFullFile;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see java.lang.Object#finalize()
+ */
+ @Override
+ protected void finalize() throws Throwable
+ {
+ super.finalize();
+ this.fileReader.close();
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/impl/MatchResult.java b/src/main/java/edu/umd/marbl/mhap/impl/MatchResult.java
new file mode 100644
index 0000000..3848873
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/impl/MatchResult.java
@@ -0,0 +1,116 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.impl;
+
+
+public final class MatchResult implements Comparable<MatchResult>
+{
+ private final SequenceId fromId;
+ private final SequenceId toId;
+ private final int a1;
+ private final int a2;
+ private final int b1;
+ private final int b2;
+ private final double score;
+ private final double rawScore;
+ private final int fromLength;
+ private final int toLength;
+
+ protected MatchResult(SequenceId fromId, SequenceId toId, OverlapInfo overlap, int fromLength, int toLength)
+ {
+ this.fromId = fromId;
+ this.toId = toId;
+
+ this.fromLength = fromLength;
+ this.toLength = toLength;
+
+ this.a1 = getFromId().isForward() ? overlap.a1 : fromLength-overlap.a2-1;
+ this.a2 = getFromId().isForward() ? overlap.a2 : fromLength-overlap.a1-1;
+ this.b1 = getToId().isForward() ? overlap.b1 : toLength-overlap.b2-1;
+ this.b2 = getToId().isForward() ? overlap.b2 : toLength-overlap.b1-1;
+
+ this.rawScore = overlap.rawScore;
+
+ if (overlap.score>1.0)
+ this.score = 1.0;
+ else
+ this.score = overlap.score;
+ }
+
+ /**
+ * @return the fromId
+ */
+ public SequenceId getFromId()
+ {
+ return this.fromId;
+ }
+
+ /**
+ * @return the toId
+ */
+ public SequenceId getToId()
+ {
+ return this.toId;
+ }
+
+ /**
+ * @return the score
+ */
+ public double getScore()
+ {
+ return this.score;
+ }
+
+ @Override
+ public int compareTo(MatchResult o)
+ {
+ return -Double.compare(this.score, o.score);
+ }
+
+ @Override
+ public String toString()
+ {
+ return String.format("%s %s %.6f %.6f %d %d %d %d %d %d %d %d",
+ getFromId().getHeader(),
+ getToId().getHeader(),
+ (1.0-getScore())*100.0,
+ this.rawScore,
+ getFromId().isForward() ? 0 : 1,
+ this.a1,
+ this.a2,
+ this.fromLength,
+ getToId().isForward() ? 0 : 1,
+ this.b1,
+ this.b2,
+ this.toLength);
+ }
+
+
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/impl/MhapRuntimeException.java b/src/main/java/edu/umd/marbl/mhap/impl/MhapRuntimeException.java
new file mode 100644
index 0000000..4d3beca
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/impl/MhapRuntimeException.java
@@ -0,0 +1,61 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.impl;
+
+public class MhapRuntimeException extends RuntimeException
+{
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = 56387323839744808L;
+
+ public MhapRuntimeException()
+ {
+ super();
+ }
+
+ public MhapRuntimeException(String message, Throwable cause)
+ {
+ super(message, cause);
+ }
+
+ public MhapRuntimeException(String message)
+ {
+ super(message);
+ }
+
+ public MhapRuntimeException(Throwable cause)
+ {
+ super(cause);
+ }
+
+
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/impl/MinHashBitSequenceSubSketches.java b/src/main/java/edu/umd/marbl/mhap/impl/MinHashBitSequenceSubSketches.java
new file mode 100644
index 0000000..004feac
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/impl/MinHashBitSequenceSubSketches.java
@@ -0,0 +1,259 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.impl;
+
+import java.io.DataInputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map.Entry;
+
+import edu.umd.marbl.mhap.align.AlignElementDoubleSketch;
+import edu.umd.marbl.mhap.align.Aligner;
+import edu.umd.marbl.mhap.sketch.HashUtils;
+import edu.umd.marbl.mhap.sketch.MinHashBitSketch;
+import edu.umd.marbl.mhap.sketch.SketchRuntimeException;
+import edu.umd.marbl.mhap.utils.HitCounter;
+
+public final class MinHashBitSequenceSubSketches
+{
+ private final AlignElementDoubleSketch<MinHashBitSketch> alignmentSketch;
+
+ private final static int[] computeNgramMinHashesWeighted(String seq, final int nGramSize, final int numHashes)
+ {
+ final int numberNGrams = seq.length() - nGramSize + 1;
+
+ if (numberNGrams < 1)
+ throw new SketchRuntimeException("N-gram size bigger than string length.");
+
+ // get the kmer hashes
+ final long[] kmerHashes = HashUtils.computeSequenceHashesLong(seq, nGramSize, 0);
+
+ //now compute the counts of occurance
+ HashMap<Long, HitCounter> hitMap = new LinkedHashMap<>(kmerHashes.length);
+ int maxCount = 0;
+ for (long kmer : kmerHashes)
+ {
+ HitCounter counter = hitMap.get(kmer);
+ if (counter==null)
+ {
+ counter = new HitCounter(1);
+ hitMap.put(kmer, counter);
+ }
+ else
+ counter.addHit();
+
+ if (maxCount<counter.count)
+ maxCount = counter.count;
+ }
+
+ int[] best = new int[numHashes];
+ Arrays.fill(best, Integer.MAX_VALUE);
+
+ for (Entry<Long, HitCounter> kmer : hitMap.entrySet())
+ {
+ long key = kmer.getKey();
+ int weight = kmer.getValue().count;
+
+ //set the initial shift value
+ int x = (int)key;
+ for (int word = 0; word < numHashes; word++)
+ {
+ for (int count = 0; count<weight; count++)
+ {
+ // XORShift Random Number Generators
+ x ^= (x << 21);
+ x ^= (x >>> 35);
+ x ^= (x << 4);
+
+ int intX = (int)x;
+
+ if (intX < best[word])
+ best[word] = intX;
+ }
+ }
+ }
+
+ return best;
+ }
+
+ public final static MinHashBitSketch[] computeSequences(String seq, int nGramSize, int stepSize, int numWords)
+ {
+ int remainder = seq.length()%stepSize;
+
+ //get number of sequence
+ int numSequence = (seq.length()-remainder)/stepSize;
+
+ if (remainder>0)
+ numSequence++;
+
+ //make sketches out of them
+ int start = 0;
+ MinHashBitSketch[] sequence = new MinHashBitSketch[numSequence];
+ for (int iter=0; iter<numSequence; iter++)
+ {
+ int end = Math.min(seq.length(), start+stepSize);
+ int currStart = Math.max(0, end-stepSize);
+
+ //compute minhashes
+ int[] sketch = computeNgramMinHashesWeighted(seq.substring(currStart, end), nGramSize, numWords*64);
+
+ sequence[iter] = new MinHashBitSketch(sketch);
+
+ start += stepSize;
+ }
+
+ return sequence;
+ }
+
+ public final static MinHashBitSketch[] computeSequencesDouble(String seq, int nGramSize, int stepSize, int numWords)
+ {
+ int remainder = seq.length()%stepSize;
+
+ //get number of sequence
+ int numSequence = (seq.length()-remainder)/stepSize;
+
+ if (remainder>0)
+ numSequence++;
+
+ //make sketches out of them
+ int start = 0;
+ int[][] sketches = new int[numSequence][numWords*64];
+ for (int iter=0; iter<numSequence; iter++)
+ {
+ int end = Math.min(seq.length(), start+stepSize);
+ int currStart = Math.max(0, end-stepSize);
+
+ //compute minhashes
+ sketches[iter] = computeNgramMinHashesWeighted(seq.substring(currStart, end), nGramSize, numWords*64);
+
+ start += stepSize;
+ }
+
+ MinHashBitSketch[] sequence = new MinHashBitSketch[numSequence];
+ for (int iter=0; iter<sketches.length; iter++)
+ {
+ //now convert in sequence double the length
+ if ((iter+1)<sketches.length)
+ {
+ sequence[iter] = new MinHashBitSketch(union(sketches[iter], sketches[iter+1]));
+ if ((iter+2)<sketches.length)
+ sequence[iter+1] = new MinHashBitSketch(union(sketches[iter+1], sketches[iter+2]));
+ else
+ sequence[iter+1] = new MinHashBitSketch(sketches[iter+1]);
+ }
+ else
+ sequence[iter] = new MinHashBitSketch(sketches[iter]);
+
+ }
+
+ return sequence;
+ }
+
+ public OverlapInfo getOverlapInfo(Aligner<AlignElementDoubleSketch<MinHashBitSketch>> aligner, MinHashBitSequenceSubSketches b)
+ {
+ return this.alignmentSketch.getOverlapInfo(aligner, b.alignmentSketch);
+ }
+
+ public final static MinHashBitSequenceSubSketches fromByteStream(DataInputStream input) throws IOException
+ {
+ try
+ {
+ int numSketches = input.readInt();
+ int numWordsPerSketch = input.readInt();
+ int stepSize = input.readInt();
+ int seqLength = input.readInt();
+
+ MinHashBitSketch[] sequence = new MinHashBitSketch[numSketches];
+
+ for (int iter=0; iter<numSketches; iter++)
+ {
+ long[] bits = new long[numWordsPerSketch];
+ for (int word=0; word<numWordsPerSketch; word++)
+ bits[word] = input.readLong();
+
+ sequence[iter] = new MinHashBitSketch(bits);
+ }
+
+ return new MinHashBitSequenceSubSketches(sequence, stepSize, seqLength);
+
+ }
+ catch (EOFException e)
+ {
+ return null;
+ }
+ }
+
+ protected MinHashBitSequenceSubSketches(MinHashBitSketch[] sketches, int stepSize, int seqLength)
+ {
+ this.alignmentSketch = new AlignElementDoubleSketch<>(sketches, stepSize, seqLength);
+ }
+
+ public MinHashBitSequenceSubSketches(String seq, int kmerSize, int stepSize, int numWords)
+ {
+ this.alignmentSketch = new AlignElementDoubleSketch<>(computeSequencesDouble(seq, kmerSize, stepSize, numWords), stepSize, seq.length());
+ }
+
+ private static int[] union(int[] minHashes1, int[] minHashes2)
+ {
+ int[] newHashes = new int[minHashes1.length];
+
+ for (int iter=0; iter<newHashes.length; iter++)
+ newHashes[iter] = Math.min(minHashes1[iter], minHashes2[iter]);
+
+ return newHashes;
+ }
+
+ public byte[] getAsByteArray()
+ {
+ int numSketches = this.alignmentSketch.length();
+ int numWordsPerSketch = this.alignmentSketch.getSketch(0).numberOfWords();
+
+ ByteBuffer bb = ByteBuffer.allocate(8*numWordsPerSketch*numSketches+4*4);
+
+ //store the size
+ bb.putInt(numSketches);
+ bb.putInt(numWordsPerSketch);
+ bb.putInt(this.alignmentSketch.getStepSize());
+ bb.putInt(this.alignmentSketch.getSequenceLength());
+
+ //store the array
+ for (int sketchIndex=0; sketchIndex<numSketches; sketchIndex++)
+ {
+ MinHashBitSketch sketch = this.alignmentSketch.getSketch(sketchIndex);
+ for (int word=0; word<numWordsPerSketch; word++)
+ bb.putLong(sketch.getWord(word));
+ }
+
+ return bb.array();
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/impl/MinHashSearch.java b/src/main/java/edu/umd/marbl/mhap/impl/MinHashSearch.java
new file mode 100644
index 0000000..9073477
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/impl/MinHashSearch.java
@@ -0,0 +1,331 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.impl;
+
+import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap;
+import it.unimi.dsi.fastutil.objects.Object2ObjectOpenHashMap;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.concurrent.atomic.AtomicLong;
+
+import edu.umd.marbl.mhap.align.AlignElementDoubleSketch;
+import edu.umd.marbl.mhap.align.Aligner;
+import edu.umd.marbl.mhap.sketch.MinHashBitSketch;
+import edu.umd.marbl.mhap.sketch.MinHashSketch;
+import edu.umd.marbl.mhap.utils.HitCounter;
+
+public final class MinHashSearch extends AbstractMatchSearch
+{
+ private final double acceptScore;
+
+ private final ArrayList<Map<Integer, ArrayList<SequenceId>>> hashes;
+ private final double maxShift;
+ private final AtomicLong minhashSearchTime;
+ private final AtomicLong sortMergeSearchTime;
+ private final int minStoreLength;
+ private final AtomicLong numberElementsProcessed;
+
+ private final Aligner<AlignElementDoubleSketch<MinHashBitSketch>> aligner;
+
+ private final AtomicLong numberSequencesFullyCompared;
+ private final AtomicLong numberSequencesHit;
+ private final AtomicLong numberSequencesMinHashed;
+
+ private final int numMinMatches;
+ private final double alignmentScore;
+ private final Map<SequenceId, SequenceSketch> sequenceVectorsHash;
+
+
+ public MinHashSearch(SequenceSketchStreamer data, int numHashes, int numMinMatches, int numThreads,
+ boolean storeResults, int minStoreLength, double maxShift, double acceptScore, double alignmentOffset, double alignmentScore) throws IOException
+ {
+ super(numThreads, storeResults);
+
+ this.minStoreLength = minStoreLength;
+ this.numMinMatches = numMinMatches;
+ this.maxShift = maxShift;
+ this.acceptScore = acceptScore;
+ this.numberSequencesHit = new AtomicLong();
+ this.numberSequencesFullyCompared = new AtomicLong();
+ this.numberSequencesMinHashed = new AtomicLong();
+ this.numberElementsProcessed = new AtomicLong();
+ this.minhashSearchTime = new AtomicLong();
+ this.sortMergeSearchTime = new AtomicLong();
+
+ // enqueue full file, since have to know full size
+ data.enqueueFullFile(false, this.numThreads);
+
+ //store the bit aligner
+ this.aligner = new Aligner<AlignElementDoubleSketch<MinHashBitSketch>>(true, 0.0, 0.0, alignmentOffset);
+ this.alignmentScore = alignmentScore;
+
+ //this.sequenceVectorsHash = new HashMap<>(data.getNumberProcessed());
+ this.sequenceVectorsHash = new Object2ObjectOpenHashMap<>(data.getNumberProcessed());
+
+ this.hashes = new ArrayList<>(numHashes);
+ for (int iter = 0; iter < numHashes; iter++)
+ {
+ //Map<Integer,ArrayList<SequenceId>> map = new HashMap<Integer, ArrayList<SequenceId>>(data.getNumberProcessed());
+ Map<Integer,ArrayList<SequenceId>> map = new Int2ObjectOpenHashMap<ArrayList<SequenceId>>(data.getNumberProcessed());
+
+ this.hashes.add(map);
+ }
+
+ addData(data);
+
+
+ System.err.println("Stored "+this.sequenceVectorsHash.size()+" sequences in the index.");
+ }
+
+ @Override
+ public boolean addSequence(SequenceSketch currHash)
+ {
+ int[] currMinHashes = currHash.getMinHashes().getMinHashArray();
+
+ if (currMinHashes.length != this.hashes.size())
+ throw new MhapRuntimeException("Number of MinHashes of the sequence does not match current settings.");
+
+ // put the result into the hashmap
+ synchronized (this.sequenceVectorsHash)
+ {
+ SequenceSketch minHash = this.sequenceVectorsHash.put(currHash.getSequenceId(), currHash);
+ if (minHash != null)
+ {
+ this.sequenceVectorsHash.put(currHash.getSequenceId(), minHash);
+
+ throw new MhapRuntimeException("Sequence ID already exists in the hash table.");
+ }
+ }
+
+ // add the hashes
+ int count = 0;
+ SequenceId id = currHash.getSequenceId();
+ for (Map<Integer, ArrayList<SequenceId>> hash : this.hashes)
+ {
+ ArrayList<SequenceId> currList;
+ final int hashVal = currMinHashes[count];
+
+ // get the list
+ synchronized (hash)
+ {
+ currList = hash.computeIfAbsent(hashVal, k-> new ArrayList<SequenceId>(2));
+ }
+
+ // add the element
+ synchronized (currList)
+ {
+ currList.add(id);
+ }
+
+ count++;
+ }
+
+ //increment the counter
+ this.numberSequencesMinHashed.getAndIncrement();
+
+ return true;
+ }
+
+ @Override
+ public List<MatchResult> findMatches(SequenceSketch seqHashes, boolean toSelf)
+ {
+ //for performance reasons might need to change
+ long startTime = System.nanoTime();
+
+ MinHashSketch minHash = seqHashes.getMinHashes();
+
+ if (this.hashes.size() != minHash.numHashes())
+ throw new MhapRuntimeException("Number of hashes does not match. Stored size " + this.hashes.size()
+ + ", input size " + minHash.numHashes() + ".");
+
+ //estimate size
+ long numLookups = this.getNumberSequencesSearched();
+ long numProcessed = this.numberElementsProcessed.get();
+ int mapSize = Math.max(256, (int)(4.0*(double)numLookups/(double)numProcessed));
+
+ Map<SequenceId, HitCounter> bestSequenceHit = new Object2ObjectOpenHashMap<>(mapSize);
+ int[] minHashes = minHash.getMinHashArray();
+
+ int hashIndex = 0;
+ for (Map<Integer,ArrayList<SequenceId>> currHash : this.hashes)
+ {
+ ArrayList<SequenceId> currentHashMatchList = currHash.get(minHashes[hashIndex]);
+
+ // if some matches exist add them
+ if (currentHashMatchList != null)
+ {
+ this.numberElementsProcessed.getAndAdd(currentHashMatchList.size());
+
+ for (SequenceId sequenceId : currentHashMatchList)
+ {
+ bestSequenceHit.compute(sequenceId, (k,v)-> (v==null) ? new HitCounter(1) : v.addHit());
+ }
+ }
+
+ hashIndex++;
+ }
+
+ //record the search time
+ long minHashEndTime = System.nanoTime();
+ this.minhashSearchTime.getAndAdd(minHashEndTime - startTime);
+
+ //record number of hash matches processed
+ this.numberSequencesHit.getAndAdd(bestSequenceHit.size());
+
+ // compute the proper counts for all sets and remove below threshold
+ ArrayList<MatchResult> matches = new ArrayList<MatchResult>(32);
+
+ for (Entry<SequenceId, HitCounter> match : bestSequenceHit.entrySet())
+ {
+ //get the match id
+ SequenceId matchId = match.getKey();
+
+ // do not store matches with smaller ids, unless its coming from a short read
+ if (toSelf && matchId.getHeaderId() == seqHashes.getSequenceId().getHeaderId())
+ continue;
+
+ //see if the hit number is high enough
+ if (match.getValue().count >= this.numMinMatches)
+ {
+ SequenceSketch matchedHashes = this.sequenceVectorsHash.get(match.getKey());
+ if (matchedHashes==null)
+ throw new MhapRuntimeException("Hashes not found for given id.");
+
+ //never process short to short
+ if (matchedHashes.getSequenceLength()<this.minStoreLength && seqHashes.getSequenceLength()<this.minStoreLength)
+ continue;
+
+ //never process long to long in self, with greater id
+ if (toSelf
+ && matchId.getHeaderId() > seqHashes.getSequenceId().getHeaderId()
+ && matchedHashes.getSequenceLength()>=this.minStoreLength
+ && seqHashes.getSequenceLength()>=this.minStoreLength)
+ continue;
+
+ //never do short to long
+ if (toSelf
+ && matchedHashes.getSequenceLength()<this.minStoreLength
+ && seqHashes.getSequenceLength()>=this.minStoreLength)
+ continue;
+
+ //compute the direct hash score
+ OverlapInfo result;
+ boolean accept;
+ if (seqHashes.useAlignment())
+ {
+ result = seqHashes.getAlignmentSequence().getOverlapInfo(this.aligner, matchedHashes.getAlignmentSequence());
+ accept = result.rawScore>this.alignmentScore;
+ }
+ else
+ {
+ result = seqHashes.getOrderedHashes().getOverlapInfo(matchedHashes.getOrderedHashes(), this.maxShift);
+ accept = result.score >= this.acceptScore;
+ }
+
+ //increment the counter
+ this.numberSequencesFullyCompared.getAndIncrement();
+
+ //if score is good add
+ if (accept)
+ {
+ MatchResult currResult = new MatchResult(seqHashes.getSequenceId(), matchId, result, seqHashes.getSequenceLength(), matchedHashes.getSequenceLength());
+
+ // add to list
+ matches.add(currResult);
+ }
+ }
+ }
+
+ //record the search time
+ //TODO not clear why not working. Perhaps everything is too fast?
+ long endTime = System.nanoTime();
+ this.sortMergeSearchTime.getAndAdd(endTime-minHashEndTime);
+
+ return matches;
+ }
+
+ public double getMinHashSearchTime()
+ {
+ return this.minhashSearchTime.longValue() * 1.0e-9;
+ }
+
+ public double getSortMergeTime()
+ {
+ return this.sortMergeSearchTime.longValue() * 1.0e-9;
+ }
+
+
+ public long getNumberElementsProcessed()
+ {
+ return this.numberElementsProcessed.get();
+ }
+
+ public long getNumberSequenceHashed()
+ {
+ return this.numberSequencesMinHashed.get();
+ }
+
+ public long getNumberSequencesFullyCompared()
+ {
+ return this.numberSequencesFullyCompared.get();
+ }
+
+ public long getNumberSequencesHit()
+ {
+ return this.numberSequencesHit.get();
+ }
+
+ @Override
+ public List<SequenceId> getStoredForwardSequenceIds()
+ {
+ ArrayList<SequenceId> seqIds = new ArrayList<SequenceId>(this.sequenceVectorsHash.size());
+ for (SequenceSketch hashes : this.sequenceVectorsHash.values())
+ if (hashes.getSequenceId().isForward())
+ seqIds.add(hashes.getSequenceId());
+
+ return seqIds;
+ }
+
+ @Override
+ public SequenceSketch getStoredSequenceHash(SequenceId id)
+ {
+ return this.sequenceVectorsHash.get(id);
+ }
+
+ @Override
+ public int size()
+ {
+ return this.sequenceVectorsHash.size();
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/impl/OverlapInfo.java b/src/main/java/edu/umd/marbl/mhap/impl/OverlapInfo.java
new file mode 100644
index 0000000..ac0f101
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/impl/OverlapInfo.java
@@ -0,0 +1,64 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.impl;
+
+public final class OverlapInfo
+{
+ public final double score;
+ public final double rawScore;
+ public final int a1;
+ public final int b1;
+ public final int a2;
+ public final int b2;
+
+ public OverlapInfo(double score, double rawScore, int a1, int a2, int b1, int b2)
+ {
+ this.score = score;
+ this.rawScore = rawScore;
+ this.a1 = a1;
+ this.a2 = a2;
+ this.b1 = b1;
+ this.b2 = b2;
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#toString()
+ */
+ @Override
+ public String toString()
+ {
+ return "[score="+this.score+", a1="+this.a1+" a2="+this.a2+", b1="+this.b1+" b2="+this.b2+"]";
+ }
+
+ public String toBlasrString()
+ {
+ return ""+this.score+", "+this.a1+", "+this.a2+", "+this.b1+", "+this.b2;
+ }
+
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/impl/Sequence.java b/src/main/java/edu/umd/marbl/mhap/impl/Sequence.java
new file mode 100644
index 0000000..4744cb2
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/impl/Sequence.java
@@ -0,0 +1,108 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.impl;
+
+import edu.umd.marbl.mhap.utils.Utils;
+
+public final class Sequence
+{
+ private final String sequence;
+ private final SequenceId id;
+
+ public Sequence(int[] sequence, SequenceId id)
+ {
+ this.id = id;
+
+ StringBuilder s = new StringBuilder();
+ for (int iter=0; iter<sequence.length; iter++)
+ {
+ switch(sequence[iter])
+ {
+ case 0 : s.append("U"); break;
+ case 1 : s.append("C"); break;
+ case 2 : s.append("G"); break;
+ case 3 : s.append("T"); break;
+ default : throw new RuntimeException("Uknown integer value.");
+ }
+ }
+
+ this.sequence = s.toString();
+ }
+
+ public Sequence(String sequence, SequenceId id)
+ {
+ this.sequence = sequence;
+ this.id = id;
+ }
+
+ public String getSquenceString()
+ {
+ return this.sequence;
+ }
+
+ public SequenceId getId()
+ {
+ return this.id;
+ }
+
+ public Sequence getReverseCompliment()
+ {
+ return new Sequence(Utils.rc(this.sequence), this.id.complimentId());
+ }
+
+ public String getKmer(int index, int kmerSize)
+ {
+ return this.sequence.substring(index, index+kmerSize);
+ }
+
+ public int numKmers(int kmerSize)
+ {
+ return this.sequence.length()-kmerSize+1;
+ }
+
+ public int length()
+ {
+ return this.sequence.length();
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#toString()
+ */
+ @Override
+ public String toString()
+ {
+ StringBuilder str = new StringBuilder();
+
+ str.append(">"+this.id+"\n");
+ str.append(this.sequence);
+
+ return str.toString();
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/impl/SequenceId.java b/src/main/java/edu/umd/marbl/mhap/impl/SequenceId.java
new file mode 100644
index 0000000..e07f344
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/impl/SequenceId.java
@@ -0,0 +1,132 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.impl;
+
+import java.io.Serializable;
+
+public final class SequenceId implements Serializable
+{
+ /**
+ *
+ */
+ private static final long serialVersionUID = 2181572437818064822L;
+ private final int id;
+ private final boolean isFwd;
+ private final String strId;
+
+ public static boolean STORE_FULL_ID = false;
+
+ public SequenceId(int id)
+ {
+ this(id, true);
+ }
+
+ public SequenceId(int id, boolean isFwd)
+ {
+ this.id = id;
+ this.isFwd = isFwd;
+ this.strId = null;
+ }
+
+ public SequenceId(int id, boolean isFwd, String strId)
+ {
+ this.id = id;
+ this.isFwd = isFwd;
+ this.strId = strId;
+ }
+
+ public SequenceId createOffset(int offset)
+ {
+ return new SequenceId(this.id+offset, this.isFwd, this.strId);
+ }
+
+ public SequenceId complimentId()
+ {
+ return new SequenceId(this.id, !this.isFwd, this.strId);
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#equals(java.lang.Object)
+ */
+ @Override
+ public boolean equals(Object obj)
+ {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ SequenceId other = (SequenceId) obj;
+
+ return (this.id==other.id) && (this.isFwd == other.isFwd);
+ }
+
+ public boolean isForward()
+ {
+ return this.isFwd;
+ }
+
+ public int getHeaderId()
+ {
+ return this.id;
+ }
+
+ public String getHeader()
+ {
+ if (this.strId!=null)
+ return this.strId;
+
+ return String.valueOf(this.id);
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#hashCode()
+ */
+ @Override
+ public int hashCode()
+ {
+ return this.isFwd? this.id : -this.id;
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#toString()
+ */
+ @Override
+ public String toString()
+ {
+ return ""+getHeader()+(this.isFwd ? "(fwd)" : "(rev)");
+ }
+
+ public String toStringInt()
+ {
+ return ""+getHeader()+(this.isFwd ? " 1" : " 0");
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/impl/SequenceSketch.java b/src/main/java/edu/umd/marbl/mhap/impl/SequenceSketch.java
new file mode 100644
index 0000000..8465ff4
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/impl/SequenceSketch.java
@@ -0,0 +1,206 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.impl;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.Serializable;
+
+import edu.umd.marbl.mhap.sketch.FrequencyCounts;
+import edu.umd.marbl.mhap.sketch.MinHashSketch;
+import edu.umd.marbl.mhap.sketch.OrderedNGramHashes;
+
+public final class SequenceSketch implements Serializable
+{
+ /**
+ *
+ */
+ private static final long serialVersionUID = -3155689614837922443L;
+
+ private final SequenceId id;
+ private final MinHashSketch mainHashes;
+ private final OrderedNGramHashes orderedHashes;
+ private final MinHashBitSequenceSubSketches alignmentSketches;
+ private final int sequenceLength;
+
+ public final static double SHIFT_CONSENSUS_PERCENTAGE = 0.75;
+ public final static int BIT_SKETCH_SIZE = 16;
+ public final static int SUBSEQUENCE_SIZE = 200;
+ public final static int BIT_KMER_SIZE = 7;
+
+ public static SequenceSketch fromByteStream(DataInputStream input, int offset, boolean useAlignment) throws IOException
+ {
+ try
+ {
+ // input.
+
+ // dos.writeBoolean(this.id.isForward());
+ boolean isFwd = input.readBoolean();
+
+ // dos.writeInt(this.id.getHeaderId());
+ SequenceId id = new SequenceId(input.readInt() + offset, isFwd);
+
+ //dos.writeInt(this.sequenceLength);
+ int sequenceLength = input.readInt();
+
+ // dos.write(this.mainHashes.getAsByteArray());
+ MinHashSketch mainHashes = MinHashSketch.fromByteStream(input);
+
+ if (mainHashes == null)
+ throw new MhapRuntimeException("Unexpected data read error.");
+
+ OrderedNGramHashes orderedHashes = null;
+ MinHashBitSequenceSubSketches alignmentSketch = null;
+ if (useAlignment)
+ {
+ alignmentSketch = MinHashBitSequenceSubSketches.fromByteStream(input);
+ if (alignmentSketch == null)
+ throw new MhapRuntimeException("Unexpected data read when reading alignment sketches.");
+ }
+ else
+ {
+ orderedHashes = OrderedNGramHashes.fromByteStream(input);
+ if (orderedHashes == null)
+ throw new MhapRuntimeException("Unexpected data read error when reading ordered k-mers.");
+ }
+
+ return new SequenceSketch(id, sequenceLength, mainHashes, orderedHashes, alignmentSketch);
+
+ }
+ catch (EOFException e)
+ {
+ return null;
+ }
+ }
+
+ public SequenceSketch(SequenceId id, int sequenceLength, MinHashSketch mainHashes, OrderedNGramHashes orderedHashes, MinHashBitSequenceSubSketches alignmentSketch)
+ {
+ this.sequenceLength = sequenceLength;
+ this.id = id;
+ this.mainHashes = mainHashes;
+ this.orderedHashes = orderedHashes;
+ this.alignmentSketches = alignmentSketch;
+ }
+
+ public SequenceSketch(Sequence seq, int kmerSize, int numHashes, int orderedKmerSize, boolean storeHashes,
+ FrequencyCounts kmerFilter, boolean weighted, boolean useAlignment)
+ {
+ this.sequenceLength = seq.length();
+ this.id = seq.getId();
+ this.mainHashes = new MinHashSketch(seq.getSquenceString(), kmerSize, numHashes, kmerFilter, weighted);
+
+ if (useAlignment)
+ {
+ this.orderedHashes = null;
+ this.alignmentSketches = new MinHashBitSequenceSubSketches(seq.getSquenceString(), BIT_KMER_SIZE, SUBSEQUENCE_SIZE, BIT_SKETCH_SIZE);
+ }
+ else
+ {
+ this.orderedHashes = new OrderedNGramHashes(seq.getSquenceString(), orderedKmerSize);
+ this.alignmentSketches = null;
+ }
+ }
+
+ public SequenceSketch createOffset(int offset)
+ {
+ return new SequenceSketch(this.id.createOffset(offset), this.sequenceLength, this.mainHashes, this.orderedHashes, this.alignmentSketches);
+ }
+
+ public byte[] getAsByteArray()
+ {
+ byte[] mainHashesBytes = this.mainHashes.getAsByteArray();
+
+ byte[] orderedHashesBytes = null;
+ byte[] alignmentSketchesBytes = null;
+ if (this.orderedHashes!=null)
+ orderedHashesBytes = this.orderedHashes.getAsByteArray();
+ if (this.alignmentSketches!=null)
+ alignmentSketchesBytes = alignmentSketches.getAsByteArray();
+
+ //get size
+
+ ByteArrayOutputStream bos = new ByteArrayOutputStream(mainHashesBytes.length
+ +(orderedHashesBytes==null ? 0 : orderedHashesBytes.length)
+ +(alignmentSketchesBytes==null ? 0 : alignmentSketchesBytes.length));
+ DataOutputStream dos = new DataOutputStream(bos);
+
+ try
+ {
+ dos.writeBoolean(this.id.isForward());
+ dos.writeInt(this.id.getHeaderId());
+ dos.writeInt(this.sequenceLength);
+ dos.write(mainHashesBytes);
+ if (orderedHashesBytes!=null)
+ dos.write(orderedHashesBytes);
+ if (alignmentSketchesBytes!=null)
+ dos.write(alignmentSketchesBytes);
+
+ dos.flush();
+ return bos.toByteArray();
+ }
+ catch (IOException e)
+ {
+ throw new MhapRuntimeException("Unexpected IO error.");
+ }
+ }
+
+ public boolean useAlignment()
+ {
+ return this.alignmentSketches!=null;
+ }
+
+ public MinHashSketch getMinHashes()
+ {
+ return this.mainHashes;
+ }
+
+ public OrderedNGramHashes getOrderedHashes()
+ {
+ return this.orderedHashes;
+ }
+
+ public MinHashBitSequenceSubSketches getAlignmentSequence()
+ {
+ return this.alignmentSketches;
+ }
+
+ public SequenceId getSequenceId()
+ {
+ return this.id;
+ }
+
+ public int getSequenceLength()
+ {
+ return this.sequenceLength;
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/impl/SequenceSketchStreamer.java b/src/main/java/edu/umd/marbl/mhap/impl/SequenceSketchStreamer.java
new file mode 100644
index 0000000..42e243d
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/impl/SequenceSketchStreamer.java
@@ -0,0 +1,365 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.impl;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.EOFException;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.util.Iterator;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicLong;
+
+import edu.umd.marbl.mhap.sketch.FrequencyCounts;
+import edu.umd.marbl.mhap.utils.ReadBuffer;
+import edu.umd.marbl.mhap.utils.Utils;
+
+public class SequenceSketchStreamer
+{
+ private final DataInputStream buffInput;
+ private final FastaData fastaData;
+ private final FrequencyCounts kmerFilter;
+ private final int kmerSize;
+ private final AtomicLong numberProcessed;
+ private final int numHashes;
+ private final int offset;
+ private final boolean weighted;
+ private final boolean useAlignment;
+
+ private final int orderedKmerSize;
+ private boolean readClosed;
+ private final boolean readingFasta;
+ private final ConcurrentLinkedQueue<SequenceSketch> sequenceHashList;
+
+ public SequenceSketchStreamer(String file, int offset, boolean useAlignment) throws FileNotFoundException
+ {
+ this.fastaData = null;
+ this.readingFasta = false;
+ this.sequenceHashList = new ConcurrentLinkedQueue<SequenceSketch>();
+ this.numberProcessed = new AtomicLong();
+ this.kmerFilter = null;
+ this.weighted = true;
+
+ this.kmerSize = 0;
+ this.numHashes = 0;
+ this.orderedKmerSize = 0;
+ this.readClosed = false;
+ this.offset = offset;
+ this.useAlignment = useAlignment;
+
+ this.buffInput = new DataInputStream(new BufferedInputStream(new FileInputStream(file), Utils.BUFFER_BYTE_SIZE));
+ }
+
+ public SequenceSketchStreamer(String file, int kmerSize, int numHashes, int orderedKmerSize,
+ FrequencyCounts kmerFilter, boolean weighted, int offset, boolean useAlignment) throws IOException
+ {
+ this.fastaData = new FastaData(file, offset);
+ this.readingFasta = true;
+ this.sequenceHashList = new ConcurrentLinkedQueue<SequenceSketch>();
+ this.numberProcessed = new AtomicLong();
+
+ this.weighted = weighted;
+ this.kmerFilter = kmerFilter;
+ this.kmerSize = kmerSize;
+ this.numHashes = numHashes;
+ this.orderedKmerSize = orderedKmerSize;
+ this.buffInput = null;
+ this.readClosed = false;
+ this.offset = offset;
+ this.useAlignment = useAlignment;
+ }
+
+ public SequenceSketch dequeue(boolean fwdOnly, ReadBuffer buf) throws IOException
+ {
+ enqueue(fwdOnly, buf);
+
+ return this.sequenceHashList.poll();
+ }
+
+ private boolean enqueue(boolean fwdOnly, ReadBuffer buf) throws IOException
+ {
+ SequenceSketch seqHashes;
+ if (this.readingFasta)
+ {
+ Sequence seq = this.fastaData.dequeue();
+
+ // compute the hashes
+ seqHashes = null;
+ if (seq != null)
+ seqHashes = getSketch(seq);
+
+ if (seqHashes == null)
+ return false;
+ processAddition(seqHashes);
+
+ this.sequenceHashList.add(seqHashes);
+
+ // fasta files are all fwd
+ if (!fwdOnly)
+ {
+ // compute the hashes
+ seqHashes = getSketch(seq.getReverseCompliment());
+
+ this.sequenceHashList.add(seqHashes);
+ processAddition(seqHashes);
+ }
+ }
+ else
+ {
+ // read the binary file
+ seqHashes = readFromBinary(buf, fwdOnly);
+ while (seqHashes != null && fwdOnly && !seqHashes.getSequenceId().isForward())
+ {
+ seqHashes = readFromBinary(buf, fwdOnly);
+ }
+
+ // do nothing and return
+ // record
+ if (seqHashes == null)
+ return false;
+
+ processAddition(seqHashes);
+
+ this.sequenceHashList.add(seqHashes);
+
+ }
+
+ return true;
+ }
+
+ public synchronized void enqueueFullFile(final boolean fwdOnly, int numThreads) throws IOException
+ {
+ // figure out number of cores
+ ExecutorService execSvc = Executors.newFixedThreadPool(numThreads);
+
+ // for each thread create a task
+ for (int iter = 0; iter < numThreads; iter++)
+ {
+ Runnable task = new Runnable()
+ {
+ @Override
+ public void run()
+ {
+ ReadBuffer buf = new ReadBuffer();
+
+ try
+ {
+ while (enqueue(fwdOnly, buf))
+ {
+ }
+ }
+ catch (IOException e)
+ {
+ throw new MhapRuntimeException(e);
+ }
+ }
+ };
+
+ // enqueue the task
+ execSvc.execute(task);
+ }
+
+ // shutdown the service
+ execSvc.shutdown();
+ try
+ {
+ execSvc.awaitTermination(365L, TimeUnit.DAYS);
+ }
+ catch (InterruptedException e)
+ {
+ execSvc.shutdownNow();
+ throw new MhapRuntimeException("Unable to finish all tasks.");
+ }
+ }
+
+ public Iterator<SequenceSketch> getDataIterator()
+ {
+ return this.sequenceHashList.iterator();
+ }
+
+ public int getFastaProcessed()
+ {
+ if (this.fastaData == null)
+ return 0;
+
+ return this.fastaData.getNumberProcessed();
+ }
+
+ public SequenceSketch getSketch(Sequence seq)
+ {
+ // compute the hashes
+ return new SequenceSketch(seq, this.kmerSize, this.numHashes, this.orderedKmerSize, false, this.kmerFilter, this.weighted, this.useAlignment);
+ }
+
+ public int getNumberProcessed()
+ {
+ return this.numberProcessed.intValue();
+ }
+
+ protected void processAddition(SequenceSketch seqHashes)
+ {
+ // increment counter
+ this.numberProcessed.getAndIncrement();
+
+ int numProcessed = getNumberProcessed();
+ if (numProcessed % 5000 == 0)
+ System.err.println("Current # sequences loaded and processed from file: " + numProcessed + "...");
+ }
+
+ protected SequenceSketch readFromBinary(ReadBuffer buf, boolean fwdOnly) throws IOException
+ {
+ byte[] byteArray = null;
+ synchronized (this.buffInput)
+ {
+ if (this.readClosed)
+ return null;
+
+ try
+ {
+ boolean keepReading = true;
+ while (keepReading)
+ {
+ byte isFwd = this.buffInput.readByte();
+
+ if (!fwdOnly || isFwd == 1)
+ keepReading = false;
+
+ // get the size in bytes
+ int byteSize = this.buffInput.readInt();
+
+ // allocate the array
+ byteArray = buf.getBuffer(byteSize);
+ // byteArray = new byte[byteSize];
+
+ // read that many bytes
+ this.buffInput.read(byteArray, 0, byteSize);
+ }
+ }
+ catch (EOFException e)
+ {
+ this.buffInput.close();
+ this.readClosed = true;
+
+ return null;
+ }
+ }
+
+ // get as byte array stream
+ SequenceSketch seqHashes = SequenceSketch.fromByteStream(new DataInputStream(
+ new ByteArrayInputStream(byteArray)), this.offset, this.useAlignment);
+
+ return seqHashes;
+ }
+
+ public void writeToBinary(String file, final boolean fwdOnly, int numThreads) throws IOException
+ {
+ OutputStream output = null;
+ try
+ {
+ output = new BufferedOutputStream(new FileOutputStream(file), Utils.BUFFER_BYTE_SIZE);
+ final OutputStream finalOutput = output;
+
+ // figure out number of cores
+ ExecutorService execSvc = Executors.newFixedThreadPool(numThreads);
+
+ // for each thread create a task
+ for (int iter = 0; iter < numThreads; iter++)
+ {
+ Runnable task = new Runnable()
+ {
+ @Override
+ public void run()
+ {
+ SequenceSketch seqHashes;
+ ReadBuffer buf = new ReadBuffer();
+
+ try
+ {
+ seqHashes = dequeue(fwdOnly, buf);
+ while (seqHashes != null)
+ {
+ byte[] byteArray = seqHashes.getAsByteArray();
+ int arraySize = byteArray.length;
+ byte isFwd = seqHashes.getSequenceId().isForward() ? (byte) 1 : (byte) 0;
+
+ // store the size as byte array
+ byte[] byteSize = ByteBuffer.allocate(5).put(isFwd).putInt(arraySize).array();
+
+ synchronized (finalOutput)
+ {
+ finalOutput.write(byteSize);
+ finalOutput.write(byteArray);
+ }
+
+ seqHashes = dequeue(fwdOnly, buf);
+ }
+ }
+ catch (IOException e)
+ {
+ throw new MhapRuntimeException(e);
+ }
+ }
+ };
+
+ // enqueue the task
+ execSvc.execute(task);
+ }
+
+ // shutdown the service
+ execSvc.shutdown();
+ try
+ {
+ execSvc.awaitTermination(365L, TimeUnit.DAYS);
+ }
+ catch (InterruptedException e)
+ {
+ execSvc.shutdownNow();
+ throw new MhapRuntimeException("Unable to finish all tasks.");
+ }
+
+ finalOutput.flush();
+ }
+ finally
+ {
+ if (output != null)
+ output.close();
+ }
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/main/.gitignore b/src/main/java/edu/umd/marbl/mhap/main/.gitignore
new file mode 100644
index 0000000..48d6f58
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/main/.gitignore
@@ -0,0 +1,5 @@
+/buildMulti.class
+/Utils$Pair.class
+/Utils$ToProtein.class
+/Utils$Translate.class
+/Utils.class
diff --git a/src/main/java/edu/umd/marbl/mhap/main/AlignmentTry.java b/src/main/java/edu/umd/marbl/mhap/main/AlignmentTry.java
new file mode 100644
index 0000000..a81f838
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/main/AlignmentTry.java
@@ -0,0 +1,118 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.main;
+
+import edu.umd.marbl.mhap.align.AlignElementDoubleSketch;
+import edu.umd.marbl.mhap.align.AlignElementString;
+import edu.umd.marbl.mhap.align.Aligner;
+import edu.umd.marbl.mhap.align.Alignment;
+import edu.umd.marbl.mhap.impl.MinHashBitSequenceSubSketches;
+import edu.umd.marbl.mhap.impl.OverlapInfo;
+import edu.umd.marbl.mhap.sketch.MinHashBitSketch;
+import edu.umd.marbl.mhap.sketch.OrderedNGramHashes;
+import edu.umd.marbl.mhap.utils.RandomSequenceGenerator;
+
+public class AlignmentTry
+{
+
+ public static void main(String[] args)
+ {
+ String a = "bcdefghij1234567890";
+ String b = "abcdefghij1234567890";
+
+ RandomSequenceGenerator generator = new RandomSequenceGenerator();
+ a = generator.generateRandomSequence(2000);
+ b = a.substring(800, 1800);
+ a = generator.addPacBioError(a);
+ b = generator.addPacBioError(b);
+ //b = generator.generateRandomSequence(1400);
+ //b = a;
+
+ Aligner<AlignElementString> aligner = new Aligner<AlignElementString>(true, -2.0, -1*Float.MAX_VALUE, 0.0);
+
+ Alignment<AlignElementString> alignment = aligner.localAlignSmithWaterGotoh(new AlignElementString(a), new AlignElementString(b));
+
+ System.err.println(alignment.getOverlapScore(5));
+
+ System.out.println(alignment.outputAlignment());
+
+ System.err.println("A1="+alignment.getA1());
+ System.err.println("B1="+alignment.getB1());
+ System.err.println("A2="+alignment.getA2());
+ System.err.println("B2="+alignment.getB2());
+
+ MinHashBitSequenceSubSketches m1 = new MinHashBitSequenceSubSketches(a, 7, 200, 20);
+ MinHashBitSequenceSubSketches m2 = new MinHashBitSequenceSubSketches(b, 7, 200, 20);
+
+ OverlapInfo info = m1.getOverlapInfo(new Aligner<AlignElementDoubleSketch<MinHashBitSketch>>(true, 0.00, 0.0, -0.52), m2);
+
+ System.err.println("Compressed=");
+ System.err.println(info.rawScore);
+ System.err.println(info.a1);
+ System.err.println(info.b1);
+ System.err.println(info.a2);
+ System.err.println(info.b2);
+
+ OverlapInfo info2 = m2.getOverlapInfo(new Aligner<AlignElementDoubleSketch<MinHashBitSketch>>(true, 0.00, 0.0, -0.52), m1);
+ System.err.println("Swap=");
+ System.err.println(info2.rawScore);
+ System.err.println(info2.a1);
+ System.err.println(info2.b1);
+ System.err.println(info2.a2);
+ System.err.println(info2.b2);
+
+
+ System.exit(1);
+
+ OrderedNGramHashes hashes1 = new OrderedNGramHashes(a, 10);
+ OrderedNGramHashes hashes2 = new OrderedNGramHashes(b, 10);
+
+ System.err.println("Ordered=");
+ System.err.println(hashes1.getOverlapInfo(hashes2, .2).a1);
+ System.err.println(hashes1.getOverlapInfo(hashes2, .2).b1);
+ System.err.println(hashes1.getOverlapInfo(hashes2, .2).a2);
+ System.err.println(hashes1.getOverlapInfo(hashes2, .2).b2);
+
+ /*
+ SimHash s1 = new SimHash(a, kmerSize, 100);
+ SimHash s2 = new SimHash(b, kmerSize, 100);
+
+ MinHashSketch h1 = new MinHashSketch(a, kmerSize, 8000);
+ MinHashSketch h2 = new MinHashSketch(b, kmerSize, 8000);
+
+ MinHashBitSketch hb1 = new MinHashBitSketch(a, kmerSize, 100);
+ MinHashBitSketch hb2 = new MinHashBitSketch(b, kmerSize, 100);
+
+ System.err.println(s1.jaccard(s2));
+ System.err.println(h1.jaccard(h2));
+ System.err.println(hb1.jaccard(hb2));
+ */
+ }
+
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/main/EstimateROC.java b/src/main/java/edu/umd/marbl/mhap/main/EstimateROC.java
new file mode 100755
index 0000000..4b6978f
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/main/EstimateROC.java
@@ -0,0 +1,815 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.main;
+
+import jaligner.Alignment;
+import jaligner.SmithWatermanGotoh;
+import jaligner.NeedlemanWunschGotoh;
+import jaligner.matrix.MatrixLoader;
+import jaligner.matrix.MatrixLoaderException;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.Calendar;
+import java.util.GregorianCalendar;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ForkJoinPool;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import java.util.stream.Stream;
+
+import edu.umd.marbl.mhap.impl.FastaData;
+import edu.umd.marbl.mhap.impl.Sequence;
+import edu.umd.marbl.mhap.utils.IntervalTree;
+import edu.umd.marbl.mhap.utils.Utils;
+
+public class EstimateROC {
+ private static final boolean ALIGN_SW = true;
+ private static final double MIN_REF_OVERLAP_DIFFERENCE = 0.8;
+ private static double MIN_IDENTITY = 0.70;
+ private static final double REF_IDENTITY_ADJUSTMENT = 0.1;
+ private static double MIN_REF_IDENTITY = MIN_IDENTITY + REF_IDENTITY_ADJUSTMENT;
+ private static double MIN_OVERLAP_DIFFERENCE = 0.30;
+ private static final int DEFAULT_NUM_TRIALS = 10000;
+ private static final int DEFAULT_MIN_OVL = 2000;
+ private static final boolean DEFAULT_DO_DP = false;
+ private static boolean DEBUG = false;
+
+ private static class Pair {
+ public int first;
+ public int second;
+
+ public Pair(int startInRef, int endInRef) {
+ this.first = startInRef;
+ this.second = endInRef;
+ }
+
+ @SuppressWarnings("unused")
+ public int size() {
+ return (Math.max(this.first, this.second)
+ - Math.min(this.first, this.second) + 1);
+ }
+ }
+
+ private static class Overlap {
+ public int afirst;
+ public int bfirst;
+ public int asecond;
+ public int bsecond;
+ public boolean isFwd;
+ public String id1;
+ public String id2;
+
+ public Overlap() {
+ // do nothing
+ }
+
+ public int getSize() {
+ double first = (double)Math.max(this.asecond, this.afirst) - (double)Math.min(this.asecond, this.afirst);
+ first += (double)Math.max(this.bsecond, this.bfirst) - (double)Math.min(this.bsecond, this.bfirst);
+ return (int)Math.round(first/2);
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder stringBuilder = new StringBuilder();
+ stringBuilder.append("Overlap Fwd=" + this.isFwd);
+ stringBuilder.append(" Aid=");
+ stringBuilder.append(this.id1);
+ stringBuilder.append(" (");
+ stringBuilder.append(this.afirst);
+ stringBuilder.append(", ");
+ stringBuilder.append(this.asecond);
+ stringBuilder.append("), Bid=");
+ stringBuilder.append(this.id2);
+ stringBuilder.append(" (");
+ stringBuilder.append(this.bfirst);
+ stringBuilder.append(", ");
+ stringBuilder.append(this.bsecond);
+ stringBuilder.append(")");
+ return stringBuilder.toString();
+ }
+ }
+
+ private static Random generator = null;
+ public static int seed = 0;
+
+ private HashMap<String, IntervalTree<Integer>> clusters = new HashMap<String, IntervalTree<Integer>>();
+ private HashMap<String, String> seqToChr = new HashMap<String, String>(10000000);
+ private HashMap<String, Integer> seqToScore = new HashMap<String, Integer>(10000000);
+ private HashMap<String, Pair> seqToPosition = new HashMap<String, Pair>(10000000);
+ private HashMap<Integer, String> seqToName = new HashMap<Integer, String>(10000000);
+ private HashMap<String, Integer> seqNameToIndex = new HashMap<String, Integer>(10000000);
+ private HashMap<String, Integer> ovlNames = new HashMap<String, Integer>(10000000*10);
+ private HashMap<String, Overlap> ovlInfo = new HashMap<String, Overlap>(10000000*10);
+ private HashMap<Integer, String> ovlToName = new HashMap<Integer, String>(10000000*10);
+
+ private int minOvlLen = DEFAULT_MIN_OVL;
+ private int numTrials = DEFAULT_NUM_TRIALS;
+ private boolean doDP = false;
+ private long tp = 0;
+ private long fn = 0;
+ private long tn = 0;
+ private long fp = 0;
+ private double ppv = 0;
+ private Sequence[] dataSeq = null;
+
+ public static void printUsage() {
+ System.err
+ .println("This program uses random sampling to estimate PPV/Sensitivity/Specificity");
+ System.err.println("The sequences in the fasta file used to generate the truth must be sequentially numbered from 1 to N!");
+ System.err
+ .println("\t1. A blasr M4 file mapping sequences to a reference (or reference subset)");
+ System.err
+ .println("\t2. All-vs-all mappings of same sequences in CA ovl format");
+ System.err
+ .println("\t3. Fasta sequences sequentially numbered from 1 to N.");
+ System.err.println("\t4. Minimum overlap length (default: " + DEFAULT_MIN_OVL);
+ System.err.println("\t5. Number of random trials, 0 means full compute (default : " + DEFAULT_NUM_TRIALS);
+ System.err.println("\t6. Compute DP during PPV true/false");
+ System.err.println("\t7. Debug output true/false");
+ }
+
+ public static void main(String[] args) throws Exception {
+ if (args.length < 3) {
+ printUsage();
+ System.exit(1);
+ }
+ EstimateROC g = null;
+ if (args.length > 5) {
+ g = new EstimateROC(Integer.parseInt(args[3]), Integer.parseInt(args[4]), Boolean.parseBoolean(args[5]));
+ } else if (args.length > 4) {
+ g = new EstimateROC(Integer.parseInt(args[3]), Integer.parseInt(args[4]));
+ } else if (args.length > 3) {
+ g = new EstimateROC(Integer.parseInt(args[3]));
+ } else {
+ g = new EstimateROC();
+ }
+ if (args.length > 6) {
+ DEBUG = Boolean.parseBoolean(args[6]);
+ }
+ if (args.length > 7) {
+ MIN_IDENTITY = Double.parseDouble(args[7]);
+ MIN_REF_IDENTITY = MIN_IDENTITY + REF_IDENTITY_ADJUSTMENT;
+ }
+ if (args.length > 8) {
+ MIN_OVERLAP_DIFFERENCE = Double.parseDouble(args[8]);
+ }
+
+ System.err.println("Running, reference: " + args[0] + " matches: " + args[1]);
+ System.err.println("Number trials: " + (g.numTrials == 0 ? "all" : g.numTrials));
+ System.err.println("Minimum ovl: " + g.minOvlLen);
+ System.err.println("Minimum acceptable %" + MIN_IDENTITY);
+ System.err.println("Minimum acceptable shift " + MIN_OVERLAP_DIFFERENCE);
+ System.err.println("Minimum overlap to ref %" + MIN_REF_IDENTITY);
+
+ // load and cluster reference
+ System.err.print("Loading reference...");
+ long startTime = System.nanoTime();
+ long totalTime = startTime;
+ g.processReference(args[0]);
+ System.err.println("done " + (System.nanoTime() - startTime) * 1.0e-9 + "s.");
+
+ // load fasta
+ System.err.print("Loading fasta...");
+ startTime = System.nanoTime();
+ g.loadFasta(args[2]);
+ System.err.println("done " + (System.nanoTime() - startTime) * 1.0e-9 + "s.");
+
+ // load matches
+ System.err.print("Loading matches...");
+ startTime = System.nanoTime();
+ g.processOverlaps(args[1]);
+ System.err.println("done " + (System.nanoTime() - startTime) * 1.0e-9 + "s.");
+
+ if (g.numTrials == 0) {
+ System.err.print("Computing full statistics O(" + g.seqToName.size() + "^2) operations!...");
+ startTime = System.nanoTime();
+ g.fullEstimate();
+ System.err.println("done " + (System.nanoTime() - startTime) * 1.0e-9 + "s.");
+ } else {
+ System.err.print("Computing sensitivity...");
+ startTime = System.nanoTime();
+ g.estimateSensitivity();
+ System.err.println("done " + (System.nanoTime() - startTime) * 1.0e-9 + "s.");
+
+ // now estimate FP/TN by picking random match and checking reference
+ // mapping
+ System.err.print("Computing specificity...");
+ startTime = System.nanoTime();
+ g.estimateSpecificity();
+ System.err.println("done " + (System.nanoTime() - startTime) * 1.0e-9 + "s.");
+
+ // last but not least PPV, pick random subset of our matches and see what percentage are true
+ System.err.print("Computing PPV...");
+ startTime = System.nanoTime();
+ g.estimatePPV();
+ System.err.println("done " + (System.nanoTime() - startTime) * 1.0e-9 + "s.");
+ }
+ System.err.println("Total time: " + (System.nanoTime() - totalTime) * 1.0e-9 + "s.");
+
+ System.out.println("Estimated sensitivity:\t"
+ + Utils.DECIMAL_FORMAT.format((double) g.tp / (double)(g.tp + g.fn)));
+ System.out.println("Estimated specificity:\t"
+ + Utils.DECIMAL_FORMAT.format((double) g.tn / (double)(g.fp + g.tn)));
+ System.out.println("Estimated PPV:\t "
+ + Utils.DECIMAL_FORMAT.format(g.ppv));
+ }
+
+ public EstimateROC() {
+ this(DEFAULT_MIN_OVL, DEFAULT_NUM_TRIALS);
+ }
+
+ public EstimateROC(int minOvlLen) {
+ this(minOvlLen, DEFAULT_NUM_TRIALS);
+ }
+
+ public EstimateROC(int minOvlLen, int numTrials) {
+ this(minOvlLen, numTrials, DEFAULT_DO_DP);
+ }
+
+ @SuppressWarnings("unused")
+ public EstimateROC(int minOvlLen, int numTrials, boolean doDP) {
+ this.minOvlLen = minOvlLen;
+ this.numTrials = numTrials;
+ this.doDP = doDP;
+ if (false) {
+ GregorianCalendar t = new GregorianCalendar();
+ int t1 = t.get(Calendar.SECOND);
+ int t2 = t.get(Calendar.MINUTE);
+ int t3 = t.get(Calendar.HOUR_OF_DAY);
+ int t4 = t.get(Calendar.DAY_OF_MONTH);
+ int t5 = t.get(Calendar.MONTH);
+ int t6 = t.get(Calendar.YEAR);
+ seed = t6 + 65 * (t5 + 12 * (t4 + 31 * (t3 + 24 * (t2 + 60 * t1))));
+ }
+
+ generator = new Random(seed);
+ }
+
+ private static int getSequenceId(String id) {
+ return Integer.parseInt(id)-1;
+ }
+
+ private static String getOvlName(String id, String id2) {
+ return (id.compareTo(id2) <= 0 ? id + "_" + id2 : id2
+ + "_" + id);
+ }
+ private String pickRandomSequence() {
+ int val = generator.nextInt(this.seqToName.size());
+ return this.seqToName.get(val);
+ }
+
+ private String pickRandomMatch() {
+ int val = generator.nextInt(this.ovlToName.size());
+ return this.ovlToName.get(val);
+ }
+
+ private int getOverlapSize(String id, String id2) {
+ String chr = this.seqToChr.get(id);
+ String chr2 = this.seqToChr.get(id2);
+ Pair p1 = this.seqToPosition.get(id);
+ Pair p2 = this.seqToPosition.get(id2);
+ if (!chr.equalsIgnoreCase(chr2)) {
+ System.err.println("Error: comparing wrong chromosomes betweeen sequences " + id + " and sequence " + id2);
+ System.exit(1);
+ }
+ return Utils.getRangeOverlap(p1.first, p1.second,
+ p2.first, p2.second);
+ }
+
+ private HashSet<String> getSequenceMatches(String id, int min) {
+ String chr = this.seqToChr.get(id);
+ Pair p1 = this.seqToPosition.get(id);
+ List<Integer> intersect = this.clusters.get(chr).get(p1.first,
+ p1.second);
+ HashSet<String> result = new HashSet<String>();
+
+ Iterator<Integer> it = intersect.iterator();
+ while (it.hasNext()) {
+ String id2 = this.seqToName.get(it.next());
+ Pair p2 = this.seqToPosition.get(id2);
+ String chr2 = this.seqToChr.get(id2);
+ if (!chr.equalsIgnoreCase(chr2)) {
+ System.err.println("Error: comparing wrong chromosomes betweeen sequences " + id + " and sequence in its cluster " + id2);
+ System.exit(1);
+ }
+ int overlap = Utils.getRangeOverlap(p1.first, p1.second,
+ p2.first, p2.second);
+ if (overlap >= min && !id.equalsIgnoreCase(id2)) {
+ result.add(id2);
+ }
+ }
+
+ return result;
+ }
+
+ private Overlap getOverlapInfo(String line) {
+ Overlap overlap = new Overlap();
+ String[] splitLine = line.trim().split("\\s+");
+
+ try {
+ // CA format
+ if (splitLine.length == 7 || splitLine.length == 6) {
+ overlap.id1 = splitLine[0];
+ overlap.id2 = splitLine[1];
+ @SuppressWarnings("unused")
+ double score = Double.parseDouble(splitLine[5]) * 5;
+ int aoffset = Integer.parseInt(splitLine[3]);
+ int boffset = Integer.parseInt(splitLine[4]);
+ overlap.isFwd = "N".equalsIgnoreCase(splitLine[2]);
+ if (this.dataSeq != null) {
+ int alen = this.dataSeq[Integer.parseInt(overlap.id1)-1].length();
+ int blen = this.dataSeq[Integer.parseInt(overlap.id2)-1].length();
+ overlap.afirst = Math.max(0, aoffset);
+ overlap.asecond = Math.min(alen, alen + boffset);
+ overlap.bfirst = -1*Math.min(0, aoffset);
+ overlap.bsecond = Math.min(blen, blen - boffset);
+ }
+ //mhap format
+ } else if (splitLine.length == 12) {
+ overlap.id1 = splitLine[0];
+ overlap.id2 = splitLine[1];
+ @SuppressWarnings("unused")
+ double score = Double.parseDouble(splitLine[2]);
+ overlap.isFwd = Integer.parseInt(splitLine[8]) == 0;
+ if (this.dataSeq != null) {
+ int alen = this.dataSeq[getSequenceId(overlap.id1)].length();
+ int blen = this.dataSeq[getSequenceId(overlap.id2)].length();
+ overlap.afirst = Integer.parseInt(splitLine[5]);
+ overlap.asecond = Integer.parseInt(splitLine[6]);
+ overlap.bfirst = Integer.parseInt(splitLine[9]);
+ overlap.bsecond = Integer.parseInt(splitLine[10]);
+ if (overlap.asecond > alen) {
+ overlap.asecond = alen;
+ }
+ if (overlap.bsecond > blen) {
+ overlap.bsecond = blen;
+ }
+ }
+ // blasr format
+ } else if (splitLine.length == 13 && !line.contains("[")) {
+ overlap.afirst = Integer.parseInt(splitLine[5]);
+ overlap.asecond = Integer.parseInt(splitLine[6]);
+ overlap.bfirst = Integer.parseInt(splitLine[9]);
+ overlap.bsecond = Integer.parseInt(splitLine[10]);
+ overlap.isFwd = (Integer.parseInt(splitLine[8]) == 0);
+ if (!overlap.isFwd) {
+ overlap.bsecond = Integer.parseInt(splitLine[11]) - Integer.parseInt(splitLine[9]);
+ overlap.bfirst = Integer.parseInt(splitLine[11]) - Integer.parseInt(splitLine[10]);
+ }
+ overlap.id1 = splitLine[0];
+ if (overlap.id1.indexOf("/") != -1) {
+ overlap.id1 = overlap.id1.substring(0,
+ splitLine[0].indexOf("/"));
+ }
+ if (overlap.id1.indexOf(",") != -1) {
+ overlap.id1 = overlap.id1.split(",")[1];
+ }
+ overlap.id2 = splitLine[1];
+ if (overlap.id2.indexOf(",") != -1) {
+ overlap.id2 = overlap.id2.split(",")[1];
+ }
+ if (this.dataSeq != null) {
+ int alen = this.dataSeq[getSequenceId(overlap.id1)].length();
+ int blen = this.dataSeq[getSequenceId(overlap.id2)].length();
+ if (overlap.asecond > alen) {
+ overlap.asecond = alen;
+ }
+ if (overlap.bsecond > blen) {
+ overlap.bsecond = blen;
+ }
+ }
+ // 1 1,182 n [ 4,746.. 8,108] x [ 0.. 3,896] : < 982 diffs ( 34 trace pts)
+ } else if (splitLine.length >= 13 && splitLine.length <= 18) {
+ overlap.id1 = splitLine[0].replaceAll(",", "");
+ overlap.id2 = splitLine[1].replaceAll(",", "");
+ overlap.isFwd = (splitLine[2].equalsIgnoreCase("n"));
+ String[] splitTwo = line.split("\\[");
+ String aInfo = splitTwo[1].substring(0, splitTwo[1].indexOf("]"));
+ String bInfo = splitTwo[2].substring(0, splitTwo[2].indexOf("]"));
+ String[] aSplit = aInfo.replaceAll(",", "").split("\\.\\.");
+ String[] bSplit = bInfo.replaceAll(",", "").split("\\.\\.");
+ overlap.afirst=Integer.parseInt(aSplit[0].trim());
+ overlap.asecond=Integer.parseInt(aSplit[1].trim());
+ overlap.bfirst=Integer.parseInt(bSplit[0].trim());
+ overlap.bsecond=Integer.parseInt(bSplit[1].trim());
+ if (!overlap.isFwd) {
+ overlap.bsecond = this.dataSeq[getSequenceId(overlap.id2)].length() - Integer.parseInt(bSplit[0].trim());
+ overlap.bfirst = this.dataSeq[getSequenceId(overlap.id2)].length() - Integer.parseInt(bSplit[1].trim());
+ }
+ }
+ } catch (NumberFormatException e) {
+ System.err.println("Warning: could not parse input line: " + line
+ + " " + e.getMessage());
+ }
+
+ return overlap;
+ }
+
+ private void loadFasta(String file) throws IOException {
+ FastaData data = new FastaData(file, 0);
+ data.enqueueFullFile();
+ this.dataSeq = new Sequence[data.getNumberProcessed()];
+ int i = 0;
+ while (!data.isEmpty()) {
+ this.dataSeq[i++] = data.dequeue();
+ }
+ }
+
+ private void processOverlaps(String file) throws Exception {
+ BufferedReader bf = new BufferedReader(new InputStreamReader(
+ new FileInputStream(file)));
+
+ String line = null;
+ int counter = 0;
+ while ((line = bf.readLine()) != null) {
+ Overlap ovl = getOverlapInfo(line);
+ int ovlLen = ovl.getSize();
+ String id = ovl.id1;
+ String id2 = ovl.id2;
+
+ if (id == null || id2 == null) {
+ continue;
+ }
+ if (id.equalsIgnoreCase(id2)) {
+ continue;
+ }
+ if (this.seqToChr.get(id) == null || this.seqToChr.get(id2) == null) {
+ continue;
+ }
+ String ovlName = getOvlName(id, id2);
+ if (this.ovlNames.containsKey(ovlName) && ovlLen < this.ovlNames.get(ovlName)) {
+ continue;
+ }
+
+ // if we see same overlap between a pair of sequences, dont update counter just update its length and info
+ if (this.ovlNames.containsKey(ovlName)) {
+ this.ovlNames.put(ovlName, ovlLen);
+ this.ovlInfo.put(ovlName, ovl);
+ } else {
+ this.ovlNames.put(ovlName, ovlLen);
+ this.ovlToName.put(counter, ovlName);
+ this.ovlInfo.put(ovlName, ovl);
+ counter++;
+ }
+
+ if (counter % 100000 == 0) {
+ System.err.println("Loaded " + counter);
+ }
+
+ }
+ System.err.print("Processed " + this.ovlNames.size() + " overlaps");
+ if (this.ovlNames.isEmpty()) {
+ System.err
+ .println("Error: No sequence matches to reference loaded!");
+ System.exit(1);
+ }
+
+ bf.close();
+ }
+
+ /**
+ * We are parsing file of the format 18903/0_100 ref000001|lambda_NEB3011
+ * -462 96.9697 0 0 99 100 0 2 101 48502 254 21589/0_100
+ * ref000001|lambda_NEB3011 -500 100 0 0 100 100 1 4 104 48502 254
+ * 15630/0_100 ref000001|lambda_NEB3011 -478 98 0 0 100 100 0 5 105 48502
+ * 254
+ **/
+ @SuppressWarnings("unused")
+ private void processReference(String file) throws Exception {
+ BufferedReader bf = new BufferedReader(new InputStreamReader(
+ new FileInputStream(file)));
+ String line = null;
+ int counter = 0;
+ while ((line = bf.readLine()) != null) {
+ String[] splitLine = line.trim().split("\\s+");
+
+ String id = splitLine[0];
+ if (id.indexOf("/") != -1) {
+ id = id.substring(0, splitLine[0].indexOf("/"));
+ }
+ if (id.indexOf(",") != -1) {
+ id = id.split(",")[1];
+ }
+ double idy = Double.parseDouble(splitLine[3]);
+ int start = Integer.parseInt(splitLine[5]);
+ int end = Integer.parseInt(splitLine[6]);
+ int length = Integer.parseInt(splitLine[7]);
+ int seqIsFwd = Integer.parseInt(splitLine[4]);
+ if (seqIsFwd != 0) {
+ System.err.println("Error: malformed line, first sequences should always be in fwd orientation");
+ System.exit(1);
+ }
+ int startInRef = Integer.parseInt(splitLine[9]);
+ int endInRef = Integer.parseInt(splitLine[10]);
+ int refLen = Integer.parseInt(splitLine[11]);
+ int isRev = Integer.parseInt(splitLine[8]);
+ int score = Integer.parseInt(splitLine[2]);
+ if (isRev == 1) {
+ int tmp = refLen - endInRef;
+ endInRef = refLen - startInRef;
+ startInRef = tmp;
+ }
+ if (idy < MIN_REF_IDENTITY*100) {
+ continue;
+ }
+ double diff = ((double)(end - start) / (double)(endInRef-startInRef));
+ if (diff < MIN_REF_OVERLAP_DIFFERENCE) {
+ continue;
+ }
+ String chr = splitLine[1];
+ if (!this.clusters.containsKey(chr)) {
+ this.clusters.put(chr, new IntervalTree<Integer>());
+ }
+ if (this.seqToPosition.containsKey(id)) {
+ if (score < this.seqToScore.get(id)) {
+ // replace
+ this.seqToPosition.put(id, new Pair(startInRef, endInRef));
+ this.seqToChr.put(id, chr);
+ this.seqToScore.put(id, score);
+ }
+ } else {
+ this.seqToPosition.put(id, new Pair(startInRef, endInRef));
+ this.seqToChr.put(id, chr);
+ this.seqToName.put(counter, id);
+ this.seqNameToIndex.put(id, counter);
+ this.seqToScore.put(id, score);
+ counter++;
+ }
+ }
+ bf.close();
+ for (String id : this.seqToPosition.keySet()) {
+ String chr = this.seqToChr.get(id);
+ if (!this.clusters.containsKey(chr)) {
+ this.clusters.put(chr, new IntervalTree<Integer>());
+ }
+ Pair p = this.seqToPosition.get(id);
+ this.clusters.get(chr).addInterval(p.first, p.second,
+ this.seqNameToIndex.get(id));
+ }
+
+ System.err.print("Processed " + this.clusters.size() + " chromosomes, "
+ + this.seqToPosition.size() + " sequences matching ref");
+ if (this.seqToPosition.isEmpty()) {
+ System.err
+ .println("Error: No sequence matches to reference loaded!");
+ System.exit(1);
+ }
+ }
+
+ private boolean overlapExists(String id, String id2) {
+ return this.ovlNames.containsKey(getOvlName(id, id2));
+ }
+
+ private boolean overlapMatches(String id, String m) {
+ int refOverlap = getOverlapSize(id, m);
+ Overlap ovl = this.ovlInfo.get(getOvlName(id, m));
+ if (ovl == null) {
+ return false;
+ }
+ int diff = Math.abs(ovl.getSize() - refOverlap);
+ double diffPercent = (double)diff / (double)refOverlap;
+ if (DEBUG) { System.err.println("Overlap " + ovl + " " + ovl.getSize() + " versus ref " + refOverlap + " " + " diff is " + diff + "(" + diffPercent + ")"); }
+ if (diffPercent > MIN_OVERLAP_DIFFERENCE) {
+ return false;
+ }
+ return true;
+ }
+
+ private void checkMatches(String id, HashSet<String> matches) {
+ for (String m : matches) {
+ if (overlapMatches(id, m)) {
+ this.tp++;
+ } else {
+ this.fn++;
+ if (DEBUG) {
+ System.err.println("Overlap between sequences: " + id + ", " + m + " is missing.");
+ System.err.println(">" + id + " reference location " + this.seqToChr.get(id) + " " + this.seqToPosition.get(id).first + ", " + this.seqToPosition.get(id).second);
+ System.err.println(this.dataSeq[Integer.parseInt(id)-1].getSquenceString());
+ System.err.println(">" + m + " reference location " + this.seqToChr.get(m) + " " + this.seqToPosition.get(m).first + ", " + this.seqToPosition.get(m).second);
+ System.err.println(this.dataSeq[Integer.parseInt(m)-1].getSquenceString());
+ }
+ }
+ }
+ }
+
+ private static double getScore(Alignment alignment) {
+ char[] sequence1 = alignment.getSequence1();
+ char[] sequence2 = alignment.getSequence2();
+ int length = Math.max(sequence1.length, sequence2.length);
+ int ovlLen = Math.min(sequence1.length, sequence2.length);
+ char GAP = '-';
+ @SuppressWarnings("unused")
+ int errors = 0;
+ int matches = 0;
+ for (int i = 0; i <= length; i++)
+ {
+ char c1 = GAP;
+ char c2 = GAP;
+ if (i < sequence1.length) {
+ c1 = sequence1[i];
+ }
+ if (i < sequence2.length) {
+ c2 = sequence2[i];
+ }
+ if (c1 != c2 || c1 == GAP || c2 == GAP) {
+ errors++;
+ } else {
+ matches++;
+ }
+ }
+ return (matches / (double)ovlLen);
+ }
+
+ private boolean computeDP(String id, String id2) {
+ if (this.doDP == false) {
+ return false;
+ }
+ Logger logger = null;
+ if (ALIGN_SW) {
+ logger = Logger.getLogger(SmithWatermanGotoh.class.getName());
+ } else {
+ logger = Logger.getLogger(NeedlemanWunschGotoh.class.getName());
+ }
+ logger.setLevel(Level.OFF);
+ logger = Logger.getLogger(MatrixLoader.class.getName());
+ logger.setLevel(Level.OFF);
+ Overlap ovl = this.ovlInfo.get(getOvlName(id, id2));
+ System.err.println("Aligning sequence " + ovl.id1 + " to " + ovl.id2 + " " + ovl.bfirst + " to " + ovl.bsecond + " and " + ovl.isFwd + " and " + ovl.afirst + " " + ovl.asecond);
+
+ jaligner.Sequence s1 = new jaligner.Sequence(this.dataSeq[getSequenceId(ovl.id1)].getSquenceString().substring(ovl.afirst, ovl.asecond));
+ jaligner.Sequence s2 = null;
+ if (ovl.isFwd) {
+ s2 = new jaligner.Sequence(this.dataSeq[getSequenceId(ovl.id2)].getSquenceString().substring(ovl.bfirst, ovl.bsecond));
+ } else {
+ s2 = new jaligner.Sequence(Utils.rc(this.dataSeq[getSequenceId(ovl.id2)].getSquenceString().substring(ovl.bfirst, ovl.bsecond)));
+ }
+ Alignment alignment;
+ try {
+ if (ALIGN_SW) {
+ alignment = SmithWatermanGotoh.align(s1, s2, MatrixLoader.load("MATCH"), 2f, 1f);
+ } else {
+ alignment = NeedlemanWunschGotoh.align(s1, s2, MatrixLoader.load("MATCH"), 2f, 1f);
+ }
+ } catch (MatrixLoaderException e) {
+ return false;
+ }
+ double score = getScore(alignment); // alignment.getIdentity() / 100;
+ if (DEBUG) {
+ System.err.println(alignment.getSummary());
+ System.err.println("My score: " + score);
+ System.err.println (new jaligner.formats.Pair().format(alignment));
+ }
+ return (score > MIN_IDENTITY && alignment.getLength() > this.minOvlLen);
+ }
+
+ private void estimateSensitivity() {
+ // we estimate TP/FN by randomly picking a sequence, getting its
+ // cluster, and checking our matches
+ for (int i = 0; i < this.numTrials; i++) {
+ String id = null;
+ HashSet<String> matches = null;
+ while (matches == null || matches.size() == 0) {
+ // pick cluster
+ id = pickRandomSequence();
+ matches = getSequenceMatches(id, this.minOvlLen);
+ }
+
+ if (DEBUG) { System.err.println("Estimated sensitivity trial #" + i + " " + id + " matches " + matches); }
+ checkMatches(id, matches);
+ }
+ }
+
+ private void estimateSpecificity() {
+ // we estimate FP/TN by randomly picking two sequences
+ for (int i = 0; i < this.numTrials; i++) {
+ // pick cluster
+ String id = pickRandomSequence();
+ String other = pickRandomSequence();
+ while (id.equalsIgnoreCase(other)) {
+ other = pickRandomSequence();
+ }
+ HashSet<String> matches = getSequenceMatches(id, 0);
+
+ if (overlapExists(id, other)) {
+ if (!matches.contains(other)) {
+ this.fp++;
+ }
+ } else {
+ if (!matches.contains(other)) {
+ this.tn++;
+ }
+ }
+ }
+ }
+
+ private void estimatePPV() throws InterruptedException, ExecutionException {
+ AtomicInteger numTP = new AtomicInteger();
+
+
+ ForkJoinPool forkJoinPool = new ForkJoinPool(Runtime.getRuntime().availableProcessors()/2+1);
+
+ forkJoinPool.submit(() ->
+ Stream.iterate(0, i->i+1).limit(this.numTrials).parallel().forEach(i-> {
+ int ovlLen = 0;
+ String[] ovl = null;
+ String ovlName = null;
+ while (ovlLen < this.minOvlLen) {
+ // pick an overlap
+ ovlName = pickRandomMatch();
+ Overlap o = this.ovlInfo.get(ovlName);
+ ovlLen = Utils.getRangeOverlap(o.afirst, o.asecond, o.bfirst, o.bsecond);
+ }
+ if (ovlName == null) {
+ System.err.println("Could not find any computed overlaps > " + this.minOvlLen);
+ System.exit(1);
+ } else {
+ ovl = ovlName.split("_");
+ String id = ovl[0];
+ String id2 = ovl[1];
+
+ HashSet<String> matches = getSequenceMatches(id, 0);
+ if (matches.contains(id2)) {
+ numTP.getAndIncrement();
+ } else {
+ if (computeDP(id, id2)) {
+ numTP.getAndIncrement();
+ } else {
+ if (DEBUG) { System.err.println("Overlap between sequences: " + id + ", " + id2 + " is not correct."); }
+ }
+ }
+ }
+ })
+ ).get();
+
+ // now our formula for PPV. Estimate percent of our matches which are true
+ this.ppv = numTP.doubleValue() / (double)this.numTrials;
+ }
+
+ @SuppressWarnings("cast")
+ private void fullEstimate() {
+ for (int i = 0; i < this.seqToName.size(); i++) {
+ String id = this.seqToName.get(i);
+ for (int j = i+1; j < this.seqToName.size(); j++) {
+ String id2 = this.seqToName.get(j);
+ if (id == null || id2 == null) { continue; }
+ HashSet<String> matches = getSequenceMatches(id, 0);
+
+ if (!overlapMatches(id, id2)) {
+ if (!matches.contains(id2)) {
+ this.tn++;
+ } else if (getOverlapSize(id, id2) > this.minOvlLen) {
+ this.fn++;
+ }
+ } else {
+ if (matches.contains(id2)) {
+ this.tp++;
+ } else {
+ if (computeDP(id, id2)) {
+ this.tp++;
+ } else {
+ this.fp++;
+ }
+ }
+ }
+ }
+ }
+ this.ppv = (double) this.tp / ((double)this.tp+(double)this.fp);
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/main/GetHistogramStats.java b/src/main/java/edu/umd/marbl/mhap/main/GetHistogramStats.java
new file mode 100755
index 0000000..a6224e8
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/main/GetHistogramStats.java
@@ -0,0 +1,103 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.main;
+
+import java.io.BufferedReader;
+import java.util.TreeMap;
+
+import edu.umd.marbl.mhap.utils.Utils;
+
+public class GetHistogramStats {
+ private static final int NUM_SD = 7;
+ private TreeMap<Integer, Long> histogram = new TreeMap<Integer, Long>();
+ private double percent = 0.99;
+ private double mean = 0;
+ private double stdev = 0;
+ private long cut = 0;
+
+ public GetHistogramStats(String fileName, double p) {
+ try {
+ BufferedReader bf = Utils.getFile(fileName, "hist");
+ String line = null;
+
+ while ((line = bf.readLine()) != null) {
+ String[] split = line.trim().split("\\s+");
+ int val = Integer.parseInt(split[0]);
+ long count = Long.parseLong(split[1]);
+ this.histogram.put(val, count);
+ }
+ bf.close();
+ this.percent = p;
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ public void process() throws NumberFormatException {
+ double variance = 0;
+ double sum = 0;
+ long total = 0;
+
+ for (int val : this.histogram.keySet()) {
+ long count = this.histogram.get(val);
+ for (long i = 0; i < count; i++) {
+ total++;
+ double delta = (val - this.mean);
+ this.mean += (delta / total);
+ variance += delta * (val - this.mean);
+ sum += val;
+ }
+ }
+ variance /= total;
+ this.stdev = Math.sqrt(variance);
+
+ double runningSum = 0;
+ for (int val : this.histogram.keySet()) {
+ long count = this.histogram.get(val);
+ runningSum += (double) val * count;
+ if ((runningSum / sum) > this.percent) {
+ this.cut = val;
+ break;
+ }
+ }
+ }
+
+ @Override
+ public String toString() {
+ return Utils.DECIMAL_FORMAT.format(this.mean) + "\t" + Utils.DECIMAL_FORMAT.format(this.stdev) + "\t" + "\t" + this.cut
+ + "\t" + Utils.DECIMAL_FORMAT.format(this.mean + NUM_SD * this.stdev);
+ }
+
+ public static void main(String[] args) throws NumberFormatException {
+ GetHistogramStats s = new GetHistogramStats(args[0], Double.parseDouble(args[1]));
+ s.process();
+ System.out.println(s.toString());
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/main/KmerStatSimulator.java b/src/main/java/edu/umd/marbl/mhap/main/KmerStatSimulator.java
new file mode 100644
index 0000000..860ddc6
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/main/KmerStatSimulator.java
@@ -0,0 +1,481 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.main;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.ListIterator;
+import java.util.Random;
+import java.util.GregorianCalendar;
+import java.util.Calendar;
+import java.util.HashSet;
+import java.io.BufferedReader;
+import java.io.PrintStream;
+
+import edu.umd.marbl.mhap.impl.FastaData;
+import edu.umd.marbl.mhap.sketch.MinHashSketch;
+import edu.umd.marbl.mhap.utils.Utils;
+
+public class KmerStatSimulator {
+ private boolean verbose = false;
+ private int kmer = -1;
+ private int overlap = 100;
+
+ private ArrayList<Double> randomJaccard = new ArrayList<Double>();
+ private ArrayList<Double> randomMinHash = new ArrayList<Double>();
+ private ArrayList<Double> randomMerCounts = new ArrayList<Double>();
+ private String reference = null;
+ private double requestedLength = 5000;
+
+ private double sharedCount = 0;
+ private ArrayList<Double> sharedJaccard = new ArrayList<Double>();
+ private ArrayList<Double> sharedMinHash = new ArrayList<Double>();
+ private ArrayList<Double> sharedMerCounts = new ArrayList<Double>();
+ private HashMap<String, Integer> skipMers = new HashMap<String, Integer>();
+
+ private int totalTrials = 10000;
+ private boolean halfError = false;
+
+ private static Random generator = null;
+ public static int seed = 0;
+
+ public static void main(String[] args) throws Exception {
+ boolean usage1 = true;
+ if (args.length >= 5 && args.length <= 6) {
+ usage1=false;
+ } else if (args.length >= 7) {
+ usage1 = true;
+ } else {
+ printUsage();
+ System.exit(1);
+ }
+
+ KmerStatSimulator f = new KmerStatSimulator();
+
+ f.totalTrials = Integer.parseInt(args[0]);
+ if (usage1) {
+ f.requestedLength = Double.parseDouble(args[2]);
+ f.kmer = Integer.parseInt(args[1]);
+ f.overlap = Integer.parseInt(args[3]);
+ if (args.length > 7) {
+ f.halfError = Boolean.parseBoolean(args[7]);
+ }
+ if (args.length > 8) {
+ f.reference = args[8];
+ }
+ if (f.overlap > f.requestedLength) {
+ System.err.println("Cannot have overlap > sequence length");
+ System.exit(1);
+ }
+ if (args.length > 9) {
+ f.loadSkipMers(args[9]);
+ }
+
+ f.simulate(Double.parseDouble(args[4]), Double.parseDouble(args[5]),
+ Double.parseDouble(args[6]));
+
+ } else {
+ f.requestedLength = Double.parseDouble(args[1]);
+ if (args.length > 5) {
+ f.reference = args[5];
+ }
+
+ f.simulate(Double.parseDouble(args[2]), Double.parseDouble(args[3]),
+ Double.parseDouble(args[4]));
+ }
+ }
+
+ public static void printUsage() {
+ System.err
+ .println("Example usage: simulateSharedKmers <#trials> <kmer size> <seq length> <overlap length> <insertion> <del> <subst> [only one sequence error] [reference genome] [kmers to ignore]");
+ System.err
+ .println("Usage 2: simulateSharedKmers <#trials> <seq length> <insertion> <del> <subst> [reference genome]");
+ }
+
+ @SuppressWarnings("unused")
+ public KmerStatSimulator() {
+ if (false) {
+ GregorianCalendar t = new GregorianCalendar();
+ int t1 = t.get(Calendar.SECOND);
+ int t2 = t.get(Calendar.MINUTE);
+ int t3 = t.get(Calendar.HOUR_OF_DAY);
+ int t4 = t.get(Calendar.DAY_OF_MONTH);
+ int t5 = t.get(Calendar.MONTH);
+ int t6 = t.get(Calendar.YEAR);
+ seed = t6 + 65 * (t5 + 12 * (t4 + 31 * (t3 + 24 * (t2 + 60 * t1))));
+ }
+
+ generator = new Random(seed);
+ }
+
+ private void loadSkipMers(String file) throws Exception {
+ BufferedReader bf = Utils.getFile(file, "repeats");
+ String line = null;
+
+ while ((line = bf.readLine()) != null) {
+ String[] split = line.trim().split("\\s+");
+ String mer = split[0].trim();
+ int count = Integer.parseInt(split[1]);
+ this.skipMers.put(mer, count);
+ }
+ bf.close();
+ }
+
+ private String buildRandomSequence(int length) {
+ StringBuilder st = new StringBuilder();
+
+ for (int i = 0; i < length; i++) {
+ st.append(getRandomBase(null));
+ }
+ return st.toString();
+ }
+
+ public double compareKmers(String first, String second) {
+ HashSet<String> firstSeqs = new HashSet<String>(first.length());
+ HashSet<String> totalSeqs = new HashSet<String>(first.length()+second.length());
+ HashSet<String> shared = new HashSet<String>(first.length());
+
+ for (int i = 0; i <= first.length() - this.kmer; i++) {
+ String fmer = first.substring(i, i + this.kmer);
+ if (!this.skipMers.containsKey(fmer)) {
+ firstSeqs.add(fmer);
+ }
+ totalSeqs.add(fmer);
+ }
+
+ for (int i = 0; i <= second.length() - this.kmer; i++) {
+ String smer = second.substring(i, i + this.kmer);
+ if (firstSeqs.contains(smer)) {
+ shared.add(smer);
+ } else {
+ totalSeqs.add(smer);
+ }
+ }
+ this.sharedCount = shared.size();
+ return shared.size() / (double) totalSeqs.size();
+ }
+
+ public double compareMinHash(String first, String second) {
+ MinHashSketch h1 = new MinHashSketch(first, this.kmer, 1256, null, true);
+ MinHashSketch h2 = new MinHashSketch(second, this.kmer, 1256, null, true);
+
+ return h1.jaccard(h2);
+ }
+
+ private char getRandomBase(Character toExclude) {
+ Character result = null;
+
+ while (result == null) {
+ double base = generator.nextDouble();
+ if (base < 0.25) {
+ result = 'A';
+ } else if (base < 0.5) {
+ result = 'C';
+ } else if (base < 0.75) {
+ result = 'G';
+ } else {
+ result = 'T';
+ }
+
+ if (toExclude != null && toExclude.equals(result)) {
+ result = null;
+ }
+ }
+
+ return result;
+ }
+
+ @SuppressWarnings("unused")
+ private String getSequence(int firstLen, int firstPos, String sequence,
+ double errorRate, StringBuilder profile, StringBuilder realErrorStr) {
+ return getSequence(firstLen, firstPos, sequence, errorRate, profile,
+ realErrorStr, 0.792, 0.122, 0.086, true);
+ }
+
+ private String getSequence(int seqLength, int firstPos, String sequence,
+ double errorRate, StringBuilder profile,
+ StringBuilder realErrorStr, double insertionRate,
+ double deletionRate, double substitutionRate, boolean trimRight) {
+
+ StringBuilder firstSeq = new StringBuilder();
+ firstSeq.append(sequence.substring(firstPos,
+ Math.min(sequence.length(), firstPos + 2 * seqLength)));
+
+ if (firstSeq.length() < 2 * seqLength) {
+ firstSeq.append(sequence.substring(
+ 0,
+ Math.min(sequence.length(),
+ (2 * seqLength - firstSeq.length()))));
+ }
+
+ //use a linked list for insertions
+ LinkedList<Character> modifiedSequence = new LinkedList<>();
+ for (char a : firstSeq.toString().toCharArray())
+ modifiedSequence.add(a);
+
+ // now mutate
+ int realError = 0;
+ ListIterator<Character> iter = modifiedSequence.listIterator();
+ while (iter.hasNext()) {
+ char i = iter.next();
+
+ if (generator.nextDouble() < errorRate) {
+ double errorType = generator.nextDouble();
+ if (errorType < substitutionRate) { // mismatch
+ // switch base
+
+ iter.set(getRandomBase(i));
+
+ //firstSeq.setCharAt(i, getRandomBase(firstSeq.charAt(i)));
+ //System.err.println("sub");
+ realError++;
+ i++;
+ } else if (errorType < insertionRate + substitutionRate) { // insert
+
+ iter.previous();
+ iter.add(getRandomBase(null));
+ //firstSeq.insert(i, getRandomBase(null));
+ // profile.insert(i+1,"X");
+ realError++;
+ //i += 2;
+ } else { // delete
+
+ iter.remove();
+ // firstSeq.setCharAt(i, 'D');
+ // profile.setCharAt(i, '-');
+ //System.err.println("delete");
+ realError++;
+ }
+ } else {
+ //i++;
+ }
+ }
+
+ firstSeq = new StringBuilder();
+ for (char c : modifiedSequence)
+ firstSeq.append(c);
+
+ realErrorStr.append((double) realError / seqLength);
+
+ if (trimRight) {
+ return firstSeq.substring(0, seqLength).toString();
+ }
+
+ return firstSeq.substring(firstSeq.length()-seqLength, firstSeq.length()).toString();
+ }
+
+ private void outputStats(ArrayList<Double> values, PrintStream out) {
+ double mean = 0.0;
+ double variance = 0.0;
+
+ int N = 0;
+ for (double d : values) {
+ N++;
+ mean += d;
+ }
+ mean = mean/N;
+
+ N = 0;
+ for (double d : values) {
+ N++;
+ variance += (d-mean)*(d-mean);
+ }
+
+ variance /= (N-1);
+
+ double stdev = Math.sqrt(variance);
+ out.print(mean + "\t" + stdev);
+ }
+
+ public void simulate(double insertionRate, double delRate, double subRate)
+ throws Exception {
+ double errorRate = insertionRate + delRate + subRate;
+ double insertionPercentage = insertionRate / errorRate;
+ double deletionPercentage = delRate / errorRate;
+ double subPercentage = subRate / errorRate;
+
+ if (errorRate < 0 || errorRate > 1) {
+ System.err.println("Error rate must be between 0 and 1");
+ System.exit(1);
+ }
+ System.err.println("Started...");
+
+ String[] sequences = null;
+ if (this.reference != null) {
+ FastaData data = new FastaData(this.reference, 0);
+ data.enqueueFullFile();
+ sequences = new String[data.getNumberProcessed()];
+ int i = 0;
+ while (!data.isEmpty())
+ sequences[i++] = data.dequeue().getSquenceString().toUpperCase().replace("N", "");
+ }
+ System.err.println("Loaded reference");
+
+ for (int i = 0; i < this.totalTrials; i++) {
+ if (i % 100 == 0) {
+ System.err.println("Done " + i + "/" + this.totalTrials);
+ }
+ int sequenceLength = (int) this.requestedLength;
+ int firstPos = 0;
+
+ String sequence = null;
+ int seqID = 0;
+ if (this.reference != null) {
+ sequence = null;
+ while (sequence == null
+ || sequence.length() < 4 * sequenceLength) {
+ // pick a sequence from our reference
+ seqID = generator.nextInt(sequences.length);
+ sequence = sequences[seqID];
+ }
+
+ // now pick a position
+ firstPos = generator.nextInt(sequence.length());
+ } else {
+ sequence = buildRandomSequence(sequenceLength * 4);
+ }
+
+ // simulate sequence with error
+ StringBuilder firstAdj = new StringBuilder();
+ StringBuilder errors = new StringBuilder();
+ String firstSeq = getSequence(sequenceLength, firstPos, sequence,
+ errorRate, firstAdj, errors, insertionPercentage,
+ deletionPercentage, subPercentage, false);
+
+ if (this.kmer < 0) { // we were only asked to simulate sequences not compare
+ System.out.println(">s" + i + " " + seqID + " " + (firstPos+sequenceLength));
+ System.out.println(Utils.convertToFasta(firstSeq));
+ continue;
+ }
+
+ // compare number of shared kmers out of total to another sequence
+ // from
+ // same position
+ int offset = (int) ((this.requestedLength * 2) - this.overlap);
+ int secondPos = (firstPos + offset) % sequence.length();
+ String secondSeq = getSequence(sequenceLength, secondPos, sequence,
+ (this.halfError ? 0 : errorRate), firstAdj, errors, (this.halfError ? 0 :insertionPercentage),
+ (this.halfError ? 0 : deletionPercentage), (this.halfError ? 0 : subPercentage), true);
+ if (this.verbose) {
+ System.err.println("Given seq " + firstPos + " of len " + sequence.length() + " and offset " + secondPos + " due to offset " + offset);
+ System.err.println(">" + seqID + "_" + firstPos + "\n" + firstSeq);
+ System.err.println(">" + seqID + "_" + secondPos + "\n" + secondSeq);
+ }
+ if (firstSeq.length() != secondSeq.length() || firstSeq.length() != this.requestedLength) {
+ System.err.println("Error wrong length first: " + firstSeq.length() + " second: " + secondSeq.length() + " requested " + this.requestedLength);
+ System.exit(1);
+ }
+ this.sharedJaccard.add(compareKmers(firstSeq, secondSeq));
+ this.sharedMinHash.add(compareMinHash(firstSeq, secondSeq));
+ this.sharedMerCounts.add(this.sharedCount);
+
+ // compare number of shared kmers out of total to another sequence
+ // from a
+ // non-overlapping position
+ // get a non-overlapping position
+ if (this.reference != null) {
+ sequence = null;
+ int secondSeqID = 0;
+ while (sequence == null
+ || sequence.length() < 2 * sequenceLength) {
+ secondSeqID = generator.nextInt(sequences.length);
+ sequence = sequences[secondSeqID];
+ }
+ secondPos = generator.nextInt(sequence.length());
+ while (seqID == secondSeqID && Utils
+ .getRangeOverlap(firstPos, firstPos + sequenceLength,
+ secondPos, secondPos + sequenceLength) > 0) {
+ secondPos = generator.nextInt(sequence.length());
+ }
+ // generate error for second sequence
+ secondSeq = getSequence(sequenceLength, secondPos, sequence,
+ (this.halfError ? 0 : errorRate), firstAdj, errors, (this.halfError ? 0 : insertionPercentage),
+ (this.halfError ? 0 : deletionPercentage), (this.halfError ? 0 : subPercentage), true);
+ } else {
+ secondPos = 0;
+ secondSeq = buildRandomSequence(sequenceLength);
+ }
+
+ if (firstSeq.length() != secondSeq.length() || firstSeq.length() != this.requestedLength) {
+ System.err.println("Error wrong length " + firstSeq.length());
+ System.exit(1);
+ }
+ // System.err.println("First: "+firstSeq.length());
+ // System.err.println("Second: "+secondSeq.length());
+
+ this.randomJaccard.add(compareKmers(firstSeq, secondSeq));
+ this.randomMinHash.add(compareMinHash(firstSeq, secondSeq));
+ this.randomMerCounts.add(this.sharedCount);
+ }
+
+ if (this.randomJaccard.size() != this.randomMerCounts.size()
+ || this.sharedJaccard.size() != this.sharedMerCounts.size()
+ || this.sharedJaccard.size() != this.randomJaccard.size()) {
+ System.err.println("Error trial number not consistent!");
+ }
+
+ if (this.sharedMerCounts.size() == 0) {
+ return;
+ }
+
+ for (int i = 0; i < this.totalTrials; i++) {
+ System.out.println(this.sharedMerCounts.get(i) + "\t"
+ + this.sharedJaccard.get(i) + "\t"
+ + this.sharedMinHash.get(i) + "\t"
+ + this.randomMerCounts.get(i) + "\t"
+ + this.randomJaccard.get(i) + "\t"
+ + this.randomMinHash.get(i));
+ }
+ System.out.print("Shared mer counts stats: ");
+ outputStats(this.sharedMerCounts, System.out);
+ System.out.println();
+
+ System.out.print("Shared jaccard stats: ");
+ outputStats(this.sharedJaccard, System.out);
+ System.out.println();
+
+ System.out.print("Shared MinHash jaccard stats: ");
+ outputStats(this.sharedMinHash, System.out);
+ System.out.println();
+
+ System.out.print("Random mer counts stats: ");
+ outputStats(this.randomMerCounts, System.out);
+ System.out.println();
+
+ System.out.print("Random jaccard stats: ");
+ outputStats(this.randomJaccard, System.out);
+ System.out.println();
+
+ System.out.print("Random MinHash jaccard stats: ");
+ outputStats(this.randomMinHash, System.out);
+ System.out.println();
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/main/MhapMain.java b/src/main/java/edu/umd/marbl/mhap/main/MhapMain.java
new file mode 100644
index 0000000..0e41e6c
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/main/MhapMain.java
@@ -0,0 +1,607 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.main;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Locale;
+import edu.umd.marbl.mhap.impl.MhapRuntimeException;
+import edu.umd.marbl.mhap.impl.MinHashSearch;
+import edu.umd.marbl.mhap.impl.SequenceId;
+import edu.umd.marbl.mhap.impl.SequenceSketchStreamer;
+import edu.umd.marbl.mhap.sketch.FrequencyCounts;
+import edu.umd.marbl.mhap.utils.PackageInfo;
+import edu.umd.marbl.mhap.utils.ParseOptions;
+import edu.umd.marbl.mhap.utils.Utils;
+
+public final class MhapMain
+{
+ private final double acceptScore;
+ private final String inFile;
+ private final int kmerSize;
+ private final double maxShift;
+ private final int minStoreLength;
+ private final boolean noSelf;
+ private final int numHashes;
+ private final int numMinMatches;
+ protected final int numThreads;
+ private final String processFile;
+ private final String toFile;
+ private final boolean weighted;
+ private final boolean useAlignment;
+ private final double alignmentOffset;
+ private final double alignmentScore;
+
+ private final FrequencyCounts kmerFilter;
+
+ private static final double DEFAULT_ACCEPT_SCORE = 0.04;
+
+ private static final double DEFAULT_FILTER_CUTOFF = 1.0e-5;
+
+ private static final int DEFAULT_KMER_SIZE = 16;
+
+ private static final double DEFAULT_MAX_SHIFT_PERCENT = 0.2;
+
+ private static final int DEFAULT_MIN_STORE_LENGTH = 0;
+
+ private static final int DEFAULT_NUM_MIN_MATCHES = 3;
+
+ private static final double DEFAULT_BIT_ALIGNMENT_MISMATCH_PANELTY = -0.535;
+
+ private static final double DEFAULT_BIT_ALIGNMENT_SCORE = 1.0e-6;
+
+ private static final int DEFAULT_NUM_THREADS = Runtime.getRuntime().availableProcessors();
+
+ private static final int DEFAULT_NUM_WORDS = 512;
+
+ private static final int DEFAULT_ORDERED_KMER_SIZE = 12;
+
+ public static void main(String[] args) throws Exception
+ {
+ // set the locale
+ Locale.setDefault(Locale.US);
+
+ ParseOptions options = new ParseOptions();
+ options.addStartTextLine("MHAP: MinHash Alignment Protocol. A tool for finding overlaps of long-read sequences (such as PacBio or Nanopore) in bioinformatics.");
+ options.addStartTextLine("\tVersion: "+PackageInfo.VERSION+", Build time: "+PackageInfo.BUILD_TIME);
+ options.addStartTextLine("\tUsage 1 (direct execution): java -server -Xmx<memory> -jar <MHAP jar> -s<fasta/dat from/self file> [-q<fasta/dat to file>] [-f<kmer filter list, must be sorted>]");
+ options.addStartTextLine("\tUsage 2 (generate precomputed binaries): java -server -Xmx<memory> -jar <MHAP jar> -p<directory of fasta files> -q <output directory> [-f<kmer filter list, must be sorted>]");
+ options.addOption("-s", "Usage 1 only. The FASTA or binary dat file (see Usage 2) of reads that will be stored in a box, and that all subsequent reads will be compared to.", "");
+ options.addOption("-q", "Usage 1: The FASTA file of reads, or a directory of files, that will be compared to the set of reads in the box (see -s). Usage 2: The output directory for the binary formatted dat files.", "");
+ options.addOption("-p", "Usage 2 only. The directory containing FASTA files that should be converted to binary format for storage.", "");
+ options.addOption("-f", "k-mer filter file used for filtering out highly repetative k-mers. Must be sorted in descending order of frequency (second column).", "");
+ options.addOption("-k", "[int], k-mer size used for MinHashing. The k-mer size for second stage filter is seperate, and cannot be modified.", DEFAULT_KMER_SIZE);
+ options.addOption("--num-hashes", "[int], number of min-mers to be used in MinHashing.", DEFAULT_NUM_WORDS);
+ options.addOption("--threshold", "[double], the threshold similarity score cutoff for the second stage sort-merge filter. This is based on the average number of k-mers matching in the overlapping region.", DEFAULT_ACCEPT_SCORE);
+ options.addOption("--filter-threshold", "[double], the cutoff at which the k-mer in the k-mer filter file is considered repetitive. This value for a specific k-mer is specified in the second column in the filter file. If no filter file is provided, this option is ignored.", DEFAULT_FILTER_CUTOFF);
+ options.addOption("--max-shift", "[double], region size to the left and right of the estimated overlap, as derived from the median shift and sequence length, where a k-mer matches are still considered valid. Second stage filter only.", DEFAULT_MAX_SHIFT_PERCENT);
+ options.addOption("--num-min-matches", "[int], minimum # min-mer that must be shared before computing second stage filter. Any sequences below that value are considered non-overlapping.", DEFAULT_NUM_MIN_MATCHES);
+ options.addOption("--num-threads", "[int], number of threads to use for computation. Typically set to 2 x #cores.", DEFAULT_NUM_THREADS);
+ options.addOption("--weighted", "Perform weighted MinHashing.", false);
+ //options.addOption("--alignment", "Perform sudo-alignment instead of ordered k-mer merging.", false);
+ options.addOption("--alignment", "Experimental option.", false);
+ options.addOption("--alignment-offset", "The offset to account for the variance in the alignment match score.", DEFAULT_BIT_ALIGNMENT_MISMATCH_PANELTY);
+ options.addOption("--alignment-score", "The cutoff score for alignment matches.", DEFAULT_BIT_ALIGNMENT_SCORE);
+ options.addOption("--min-store-length", "[int], The minimum length of the read that is stored in the box. Used to filter out short reads from FASTA file.", DEFAULT_MIN_STORE_LENGTH);
+ options.addOption("--no-self", "Do not compute the overlaps between sequences inside a box. Should be used when the to and from sequences are coming from different files.", false);
+ options.addOption("--store-full-id", "Store full IDs as seen in FASTA file, rather than storing just the sequence position in the file. Some FASTA files have long IDS, slowing output of results. This options is ignored when using compressed file format.", false);
+ options.addOption("--pacbio-fast", "Set all the parameters for the PacBio fast setting. This is the current best guidance, and could change at any time without warning.", false);
+ options.addOption("--pacbio-sensitive", "Set all the parameters for the PacBio sensitive settings. This is the current best guidance, and could change at any time without warning.", false);
+ options.addOption("--nanopore-fast", "Set all the parameters for the Nanopore fast settings. This is the current best guidance, and could change at any time without warning.", false);
+
+ if (!options.process(args))
+ System.exit(0);
+
+ if (options.get("--pacbio-fast").getBoolean() && options.get("--pacbio-sensitive").getBoolean())
+ {
+ System.out.println("Two default sequence type parameters cannot be set at the same time.");
+ System.out.println(options.helpMenuString());
+ System.exit(1);
+ }
+
+ if (options.get("--pacbio-fast").getBoolean())
+ {
+ if (!options.get("-k").isSet())
+ options.setOptions("-k", 16);
+
+ if (!options.get("--num-min-matches").isSet())
+ options.setOptions("--num-min-matches", 3);
+
+ if (!options.get("--num-hashes").isSet())
+ options.setOptions("--num-hashes", 512);
+ }
+ else
+ if (options.get("--pacbio-sensitive").getBoolean())
+ {
+ if (!options.get("-k").isSet())
+ options.setOptions("-k", 16);
+
+ if (!options.get("--num-min-matches").isSet())
+ options.setOptions("--num-min-matches", 2);
+
+ if (!options.get("--num-hashes").isSet())
+ options.setOptions("--num-hashes", 768);
+ }
+ else
+ if (options.get("--nanopore-fast").getBoolean())
+ {
+ if (!options.get("-k").isSet())
+ options.setOptions("-k", 16);
+
+ if (!options.get("--num-min-matches").isSet())
+ options.setOptions("--num-min-matches", 2);
+
+ if (!options.get("--num-hashes").isSet())
+ options.setOptions("--num-hashes", 768);
+ }
+
+ if (options.get("-s").getString().isEmpty() && options.get("-p").getString().isEmpty())
+ {
+ System.out.println("Please set the -s or the -p options. See options below:");
+ System.out.println(options.helpMenuString());
+ System.exit(1);
+ }
+
+ if (!options.get("-p").getString().isEmpty() && options.get("-q").getString().isEmpty() )
+ {
+ System.out.println("Please set the -q option. See options below:");
+ System.out.println(options.helpMenuString());
+ System.exit(1);
+ }
+
+ //check for file existance
+ if (!options.get("-p").getString().isEmpty() && !new File(options.get("-p").getString()).exists())
+ {
+ System.out.println("Could not find requested file/folder: "+options.get("-p").getString());
+ System.exit(1);
+ }
+
+ //check for file existance
+ if (!options.get("-s").getString().isEmpty() && !new File(options.get("-s").getString()).exists())
+ {
+ System.out.println("Could not find requested file/folder: "+options.get("-s").getString());
+ System.exit(1);
+ }
+
+ //check for file existance
+ if (!options.get("-q").getString().isEmpty() && !new File(options.get("-q").getString()).exists())
+ {
+ System.out.println("Could not find requested file/folder: "+options.get("-q").getString());
+ System.exit(1);
+ }
+
+ //check for file existance
+ if (!options.get("-f").getString().isEmpty() && !new File(options.get("-f").getString()).exists())
+ {
+ System.out.println("Could not find requested file/folder: "+options.get("-f").getString());
+ System.exit(1);
+ }
+
+ //check range
+ if (options.get("--num-threads").getInteger()<=0)
+ {
+ System.out.println("Number of threads must be positive.");
+ System.exit(1);
+ }
+
+ //check range
+ if (options.get("-k").getInteger()<=0)
+ {
+ System.out.println("k-mer size must be positive.");
+ System.exit(1);
+ }
+
+ //check range
+ if (options.get("--num-min-matches").getInteger()<=0)
+ {
+ System.out.println("Minimum number of matches must be positive.");
+ System.exit(1);
+ }
+
+ //check range
+ if (options.get("--min-store-length").getInteger()<0)
+ {
+ System.out.println("The minimum read length stored must be >=0.");
+ System.exit(1);
+ }
+
+ //check range
+ if (options.get("--max-shift").getDouble()<-1.0)
+ {
+ System.out.println("The minimum shift must be greater than -1.");
+ System.exit(1);
+ }
+
+ //check range
+ if (options.get("--threshold").getDouble()<0.0)
+ {
+ System.out.println("The second stage filter cutoff must be >=0.");
+ System.exit(1);
+ }
+
+ //check other options
+ //TODO move into the class
+ if (options.get("--store-full-id").getBoolean())
+ SequenceId.STORE_FULL_ID = true;
+ else
+ SequenceId.STORE_FULL_ID = false;
+
+
+ //printing the options used
+ System.err.println("Running with these settings:");
+ System.err.println("Version = "+PackageInfo.VERSION);
+ System.err.println("Build time = "+PackageInfo.BUILD_TIME);
+ System.err.println(options);
+
+ // start the main program
+ MhapMain main = new MhapMain(options);
+
+ //execute main computation code
+ main.computeMain();
+ }
+
+
+ public MhapMain(ParseOptions options) throws IOException
+ {
+ this.processFile = options.get("-p").getString();
+ this.inFile = options.get("-s").getString();
+ this.toFile = options.get("-q").getString();
+ this.noSelf = options.get("--no-self").getBoolean();
+ this.numThreads = options.get("--num-threads").getInteger();
+
+ this.numHashes = options.get("--num-hashes").getInteger();
+ this.kmerSize = options.get("-k").getInteger();
+ this.numMinMatches = options.get("--num-min-matches").getInteger();
+ this.minStoreLength = options.get("--min-store-length").getInteger();
+ this.maxShift = options.get("--max-shift").getDouble();
+ this.acceptScore = options.get("--threshold").getDouble();
+ this.weighted = options.get("--weighted").getBoolean();
+ this.useAlignment = options.get("--alignment").getBoolean();
+ this.alignmentOffset = options.get("--alignment-offset").getDouble();
+ this.alignmentScore = options.get("--alignment-score").getDouble();
+
+ // read in the kmer filter set
+ String filterFile = options.get("-f").getString();
+
+ if (!filterFile.isEmpty())
+ {
+ long startTime = System.nanoTime();
+ System.err.println("Reading in filter file " + filterFile + ".");
+ try
+ {
+ this.kmerFilter = Utils.createKmerFilter(filterFile, options.get("--filter-threshold").getDouble(), this.kmerSize, 0);
+ }
+ catch (Exception e)
+ {
+ throw new MhapRuntimeException("Could not parse k-mer filter file.", e);
+ }
+ System.err.println("Time (s) to read filter file: " + (System.nanoTime() - startTime) * 1.0e-9);
+ }
+ else
+ {
+ this.kmerFilter = null;
+ }
+
+ }
+
+ /*
+ public FrequencyCounts recordFastaKmerCounts(String file, double filterCutoff) throws IOException
+ {
+ System.err.println("Computing k-mer counts...");
+
+ final FastaData data = new FastaData(this.inFile, 0);
+
+ final CountMin<Long> countMin = new CountMin<>(1.0e-5, 1.0-1.0e-5, 0);
+ //System.err.println(countMin.getDepth()+" "+countMin.getWidth());
+
+ // figure out number of cores
+ ExecutorService execSvc = Executors.newFixedThreadPool(this.numThreads);
+
+ final AtomicInteger counter = new AtomicInteger();
+ for (int iter = 0; iter < this.numThreads; iter++)
+ {
+ Runnable task = new Runnable()
+ {
+ @Override
+ public void run()
+ {
+ try
+ {
+ Sequence seq = data.dequeue();
+ while (seq != null)
+ {
+ //get the kmers integers
+ long[] kmerHashes = HashUtils.computeSequenceHashesLong(seq.getSquenceString(), MhapMain.this.kmerSize, 0);
+
+ //store the values
+ for (long val : kmerHashes)
+ countMin.add(val);
+
+ //get the kmers integers for reverse compliment
+ kmerHashes = HashUtils.computeSequenceHashesLong(seq.getReverseCompliment().getSquenceString(), MhapMain.this.kmerSize, 0);
+
+ //store the values
+ for (long val : kmerHashes)
+ countMin.add(val);
+
+ int currCount = counter.addAndGet(2);
+ if (currCount % 5000 == 0)
+ System.err.println("Kmers counted for " + currCount + " sequences (including reverse compliment)...");
+
+ seq = data.dequeue();
+ }
+ }
+ catch (IOException e)
+ {
+ throw new MhapRuntimeException(e);
+ }
+ }
+ };
+
+ // enqueue the task
+ execSvc.execute(task);
+ }
+
+ // shutdown the service
+ execSvc.shutdown();
+ try
+ {
+ execSvc.awaitTermination(365L, TimeUnit.DAYS);
+ }
+ catch (InterruptedException e)
+ {
+ execSvc.shutdownNow();
+ throw new MhapRuntimeException("Unable to finish all tasks.");
+ }
+
+ System.err.println("Computed k-mer counts for "+counter.get()+" sequences.");
+
+ return new NGramCounts(countMin, counter.get(), filterCutoff);
+ }
+ */
+
+ public void computeMain() throws IOException
+ {
+ long startTotalTime = System.nanoTime();
+ long startTime = System.nanoTime();
+ long processTime = System.nanoTime();
+
+ //if processing a directory
+ if (this.processFile!=null && !this.processFile.isEmpty())
+ {
+ System.err.println("Processing FASTA files for binary compression...");
+
+ File file = new File(this.processFile);
+ if (!file.exists())
+ throw new MhapRuntimeException("Process file does not exist.");
+
+ if (this.toFile==null || this.toFile.isEmpty())
+ throw new MhapRuntimeException("Target directory must be defined.");
+
+ File toDirectory = new File(this.toFile);
+ if (!toDirectory.exists() || !toDirectory.isDirectory())
+ throw new MhapRuntimeException("Target directory doesn't exit.");
+
+ //allocate directory files
+ ArrayList<File> processFiles = new ArrayList<>();
+
+ //if not dictory just add the file
+ if (!file.isDirectory())
+ {
+ processFiles.add(file);
+ }
+ else
+ {
+ //read the directory content
+ File[] fileList = file.listFiles(new FilenameFilter()
+ {
+ @Override
+ public boolean accept(File dir, String name)
+ {
+ if (!name.startsWith("."))
+ return true;
+
+ return false;
+ }
+ });
+
+ for (File cf : fileList)
+ processFiles.add(cf);
+ }
+
+ for (File pf : processFiles)
+ {
+ startTime = System.nanoTime();
+
+ SequenceSketchStreamer seqStreamer = getSequenceHashStreamer(pf.getAbsolutePath(), 0);
+
+ String outputString = pf.getName();
+ int i = outputString.lastIndexOf('.');
+ if (i>0)
+ outputString = outputString.substring(0, i);
+
+ //combine with the directory name
+ outputString = toDirectory.getPath()+File.separator+outputString+".dat";
+
+ //store the file to disk
+ seqStreamer.writeToBinary(outputString, false, this.numThreads);
+
+ System.err.println("Processed "+seqStreamer.getNumberProcessed()+" sequences (fwd and rev).");
+ System.err.println("Read, hashed, and stored file "+pf.getPath()+" to "+outputString+".");
+ System.err.println("Time (s): " + (System.nanoTime() - startTime)*1.0e-9);
+ }
+
+ System.err.println("Total time (s): " + (System.nanoTime() - startTotalTime)*1.0e-9);
+
+ return;
+ }
+
+ System.err.println("Processing files for storage in reverse index...");
+
+ // read and index the kmers
+ int seqNumberProcessed = 0;
+
+ //create search object
+ SequenceSketchStreamer seqStreamer = getSequenceHashStreamer(this.inFile, seqNumberProcessed);
+ MinHashSearch hashSearch = getMatchSearch(seqStreamer);
+
+ seqNumberProcessed += seqStreamer.getNumberProcessed()/2;
+ System.err.println("Processed "+seqStreamer.getNumberProcessed()+" unique sequences (fwd and rev).");
+ System.err.println("Time (s) to read and hash from file: " + (System.nanoTime() - processTime)*1.0e-9);
+
+ long startTotalScoringTime = System.nanoTime();
+
+ //System.err.println("Press Enter...");
+ //System.in.read();
+
+ // now that we have the hash constructed, go through all sequences to recompute their min and score their matches
+ if (this.toFile==null || this.toFile.isEmpty())
+ {
+ startTime = System.nanoTime();
+ hashSearch.findMatches();
+ System.err.println("Time (s) to score and output to self: " + (System.nanoTime() - startTime)*1.0e-9);
+ }
+ else
+ {
+ File file = new File(this.toFile);
+
+ if (!file.exists())
+ throw new MhapRuntimeException("To-file does not exist.");
+
+ ArrayList<File> toFiles = new ArrayList<>();
+
+ //if not dictory just add the file
+ if (!file.isDirectory())
+ {
+ toFiles.add(file);
+ }
+ else
+ {
+ //read the directory content
+ File[] fileList = file.listFiles(new FilenameFilter()
+ {
+ @Override
+ public boolean accept(File dir, String name)
+ {
+ if (!name.startsWith("."))
+ return true;
+
+ return false;
+ }
+ });
+
+ for (File cf : fileList)
+ toFiles.add(cf);
+ }
+
+ //sort the files in alphabetical order
+ Collections.sort(toFiles);
+
+ //first perform to self
+ startTime = System.nanoTime();
+ if (!this.noSelf)
+ {
+ hashSearch.findMatches();
+ System.out.flush();
+ System.err.println("Time (s) to score and output to self: " + (System.nanoTime() - startTime)*1.0e-9);
+ }
+
+ //no do to all files
+ for (File cf : toFiles)
+ {
+ // read and index the kmers
+ seqStreamer = getSequenceHashStreamer(cf.getAbsolutePath(), seqNumberProcessed);
+ System.err.println("Opened fasta file "+cf.getCanonicalPath()+".");
+
+ //match the file
+ startTime = System.nanoTime();
+ hashSearch.findMatches(seqStreamer);
+
+ //flush to get the output
+ System.out.flush();
+
+ seqNumberProcessed += seqStreamer.getNumberProcessed();
+ System.err.println("Processed "+seqStreamer.getNumberProcessed()+" to sequences.");
+ System.err.println("Time (s) to score, hash to-file, and output: " + (System.nanoTime() - startTime)*1.0e-9);
+ }
+ }
+
+ //flush output
+ System.out.flush();
+
+ //output time
+ System.err.println("Total scoring time (s): " + (System.nanoTime() - startTotalScoringTime)*1.0e-9);
+ System.err.println("Total time (s): " + (System.nanoTime() - startTotalTime)*1.0e-9);
+
+ //output final stats
+ outputFinalStat(hashSearch);
+ }
+
+ public MinHashSearch getMatchSearch(SequenceSketchStreamer hashStreamer) throws IOException
+ {
+ return new MinHashSearch(hashStreamer, this.numHashes, this.numMinMatches, this.numThreads, false,
+ this.minStoreLength, this.maxShift, this.acceptScore, this.alignmentOffset, this.alignmentScore);
+ }
+
+ public SequenceSketchStreamer getSequenceHashStreamer(String file, int offset) throws IOException
+ {
+ SequenceSketchStreamer seqStreamer;
+ if (file.endsWith(".dat"))
+ seqStreamer = new SequenceSketchStreamer(file, offset, this.useAlignment);
+ else
+ seqStreamer = new SequenceSketchStreamer(file, this.kmerSize, this.numHashes,
+ DEFAULT_ORDERED_KMER_SIZE, this.kmerFilter, this.weighted, offset, this.useAlignment);
+
+ return seqStreamer;
+ }
+
+ protected void outputFinalStat(MinHashSearch matchSearch)
+ {
+ System.err.println("MinHash search time (s): " + matchSearch.getMinHashSearchTime());
+ //System.err.println("Sort-merge search time (s): " + matchSearch.getSortMergeTime());
+ System.err.println("Total matches found: " + matchSearch.getMatchesProcessed());
+ System.err.println("Average number of matches per lookup: " + (double) matchSearch.getMatchesProcessed()
+ / (double) matchSearch.getNumberSequencesSearched());
+ System.err.println("Average number of table elements processed per lookup: " + (double) matchSearch.getNumberElementsProcessed()
+ / (double) (matchSearch.getNumberSequencesSearched()));
+ System.err.println("Average number of table elements processed per match: " + (double) matchSearch.getNumberElementsProcessed()
+ / (double) (matchSearch.getMatchesProcessed()));
+ System.err.println("Average % of hashed sequences hit per lookup: " + (double) matchSearch.getNumberSequencesHit()
+ / (double) (matchSearch.size() * matchSearch.getNumberSequencesSearched()) * 100.0);
+ System.err.println("Average % of hashed sequences hit that are matches: "
+ + (double) matchSearch.getMatchesProcessed() / (double) matchSearch.getNumberSequencesHit() * 100.0);
+ System.err.println("Average % of hashed sequences fully compared that are matches: "
+ + (double)matchSearch.getMatchesProcessed()/(double)matchSearch.getNumberSequencesFullyCompared()*100.0);
+ System.err.flush();
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/edu/umd/marbl/mhap/math/BasicMath.java b/src/main/java/edu/umd/marbl/mhap/math/BasicMath.java
new file mode 100644
index 0000000..40b9e73
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/math/BasicMath.java
@@ -0,0 +1,906 @@
+/*
+ * ARMOR package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2012 by Konstantin Berlin
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.math;
+
+/**
+ * The Class BasicMath.
+ */
+public final class BasicMath
+{
+
+ /** The Constant PI. */
+ public static final double PI = Math.PI;
+
+ /** The Constant TWOPI. */
+ public static final double TWOPI = 2.0 * PI;
+
+ public static double abs(double a)
+ {
+ return Math.abs(a);
+ }
+
+ public static double[] abs(double[] a)
+ {
+ double[] val = new double[a.length];
+
+ for (int iter = 0; iter < a.length; iter++)
+ val[iter] = BasicMath.abs(a[iter]);
+
+ return val;
+ }
+
+ /**
+ * Acos.
+ *
+ * @param x
+ * the x
+ * @return the double
+ */
+ public static double acos(double x)
+ {
+ return FastMath.acos(x);
+ }
+
+ /**
+ * Adds the.
+ *
+ * @param a
+ * the a
+ * @param b
+ * the b
+ * @return the double[]
+ */
+ public final static double[] add(final double[] a, final double b)
+ {
+ double[] val = new double[a.length];
+
+ for (int iter = 0; iter < a.length; iter++)
+ val[iter] = a[iter] + b;
+
+ return val;
+ }
+
+ /**
+ * Adds the.
+ *
+ * @param a
+ * the a
+ * @param b
+ * the b
+ * @return the double[]
+ */
+ public final static double[] add(final double[] a, final double[] b)
+ {
+ double[] val = new double[a.length];
+
+ for (int iter = 0; iter < a.length; iter++)
+ val[iter] = a[iter] + b[iter];
+
+ return val;
+ }
+
+ /**
+ * Angle.
+ *
+ * @param a
+ * the a
+ * @param b
+ * the b
+ * @return the double
+ */
+ public final static double angle(final double[] a, final double[] b)
+ {
+ double angle = acos(normalizedDotProduct(a, b));
+
+ return angle;
+ }
+
+ public static double angleAbsolute(double[] a, double[] b)
+ {
+ return Math.min(Math.abs(angle(a, b)), Math.abs(angle(a, BasicMath.mult(b, -1.0))));
+ }
+
+ /**
+ * Asin.
+ *
+ * @param x
+ * the x
+ * @return the double
+ */
+ public final static double asin(final double x)
+ {
+ return FastMath.asin(x);
+ }
+
+ public final static double[][] catColumns(final double[][] A, final double[][] B)
+ {
+ if (A.length != B.length)
+ throw new MathRuntimeException("Number of rows must be equal in A and B.");
+
+ double[][] C = new double[A.length][A[0].length + B[0].length];
+
+ for (int row = 0; row < C.length; row++)
+ {
+ for (int column = 0; column < A[row].length; column++)
+ C[row][column] = A[row][column];
+
+ for (int column = 0; column < B[row].length; column++)
+ C[row][A[row].length + column] = B[row][column];
+ }
+
+ return C;
+ }
+
+ /**
+ * Closest power of two.
+ *
+ * @param a
+ * the a
+ * @return the int
+ */
+ public final static int closestPowerOfTwo(final int a)
+ {
+ int power = a == 0 ? 0 : 32 - Integer.numberOfLeadingZeros(a - 1);
+
+ return 1 << power;
+ }
+
+ /**
+ * Cos.
+ *
+ * @param angle
+ * the angle
+ * @return the double
+ */
+ public final static double cos(final double angle)
+ {
+ return FastMath.cos(angle);
+ }
+
+ /**
+ * Creates the identity matrix.
+ *
+ * @param m
+ * the m
+ * @param n
+ * the n
+ * @return the double[][]
+ */
+ public final static double[][] createIdentityMatrix(final int m, final int n)
+ {
+ double[][] A = new double[m][n];
+
+ for (int iterRow = 0; iterRow < A.length; iterRow++)
+ {
+ for (int iterColumn = 0; iterColumn < A[iterRow].length; iterColumn++)
+ {
+ A[iterRow][iterColumn] = 0.0;
+ if (iterRow == iterColumn)
+ A[iterRow][iterColumn] = 1.0;
+ }
+ }
+
+ return A;
+ }
+
+ /**
+ * Cube.
+ *
+ * @param a
+ * the a
+ * @return the double
+ */
+ public final static double cube(final double a)
+ {
+ return a * a * a;
+ }
+
+ public final static double det(final double[][] A)
+ {
+ if (A == null || A.length != 3 || A[0].length != 3)
+ throw new MathRuntimeException("Currently can only compute determinant of 3x3 matrix.");
+
+ double det = A[0][0] * (A[1][1] * A[2][2] - A[2][1] * A[1][2]) - A[1][0]
+ * (A[0][1] * A[2][2] - A[2][1] * A[0][2]) + A[2][0] * (A[0][1] * A[1][2] - A[1][1] * A[0][2]);
+
+ return det;
+ }
+
+ /**
+ * Divide.
+ *
+ * @param a
+ * the a
+ * @param b
+ * the b
+ * @return the double[]
+ */
+ public final static double[] divide(final double[] a, final double b)
+ {
+ double[] val = new double[a.length];
+
+ for (int iter = 0; iter < a.length; iter++)
+ val[iter] = a[iter] / b;
+
+ return val;
+ }
+
+ /**
+ * Divide.
+ *
+ * @param a
+ * the a
+ * @param b
+ * the b
+ * @return the double[]
+ */
+ public final static double[] divide(final double[] a, final double[] b)
+ {
+ double[] val = new double[a.length];
+
+ for (int iter = 0; iter < a.length; iter++)
+ val[iter] = a[iter] / b[iter];
+
+ return val;
+ }
+
+ /**
+ * Dot product.
+ *
+ * @param a
+ * the a
+ * @param b
+ * the b
+ * @return the double
+ */
+ public final static double dotProduct(final double[] a, final double[] b)
+ {
+ if (a.length != b.length)
+ throw new MathRuntimeException("Vector lengths must be equal.");
+
+ double val = 0.0;
+
+ for (int iter = 0; iter < a.length; iter++)
+ val += a[iter] * b[iter];
+
+ return val;
+ }
+
+ /**
+ * Euclidean distance.
+ *
+ * @param x1
+ * the x1
+ * @param y1
+ * the y1
+ * @param z1
+ * the z1
+ * @param x2
+ * the x2
+ * @param y2
+ * the y2
+ * @param z2
+ * the z2
+ * @return the double
+ */
+ public final static double euclideanDistance(double x1, double y1, double z1, double x2, double y2, double z2)
+ {
+ return sqrt(euclideanDistanceSquared(x1, y1, z1, x2, y2, z2));
+ }
+
+ /**
+ * Euclidean distance squared.
+ *
+ * @param x1
+ * the x1
+ * @param x2
+ * the x2
+ * @return the double
+ */
+ public final static double euclideanDistanceSquared(final double x1, final double x2)
+ {
+ double xdif = x2 - x1;
+
+ return xdif * xdif;
+ }
+
+ /**
+ * Euclidean distance squared.
+ *
+ * @param x1
+ * the x1
+ * @param y1
+ * the y1
+ * @param x2
+ * the x2
+ * @param y2
+ * the y2
+ * @return the double
+ */
+ public final static double euclideanDistanceSquared(double x1, double y1, double x2, double y2)
+ {
+ double xdif = x2 - x1;
+ double ydif = y2 - y1;
+
+ return (xdif * xdif + ydif * ydif);
+ }
+
+ /**
+ * Euclidean distance squared.
+ *
+ * @param x1
+ * the x1
+ * @param y1
+ * the y1
+ * @param z1
+ * the z1
+ * @param x2
+ * the x2
+ * @param y2
+ * the y2
+ * @param z2
+ * the z2
+ * @return the double
+ */
+ public final static double euclideanDistanceSquared(double x1, double y1, double z1, double x2, double y2, double z2)
+ {
+ double xdif = x2 - x1;
+ double ydif = y2 - y1;
+ double zdif = z2 - z1;
+
+ return (xdif * xdif + ydif * ydif + zdif * zdif);
+ }
+
+ public static boolean hasNaN(double[] x)
+ {
+ for (double val : x)
+ if (Double.isNaN(val))
+ return true;
+
+ return false;
+ }
+
+ /**
+ * Checks if is identity matrix.
+ *
+ * @param A
+ * the a
+ * @return true, if is identity matrix
+ */
+ public static boolean isIdentityMatrix(double[][] A)
+ {
+ if (A == null)
+ return false;
+
+ if (A.length != A[0].length)
+ return false;
+
+ for (int iterRow = 0; iterRow < A.length; iterRow++)
+ for (int iterColumn = 0; iterColumn < A[iterRow].length; iterColumn++)
+ {
+ if (iterRow == iterColumn)
+ {
+ if (A[iterRow][iterColumn] != 1.0)
+ return false;
+ }
+ else if (A[iterRow][iterColumn] != 0.0)
+ return false;
+
+ }
+
+ return true;
+ }
+
+ public static boolean isNonNegative(double[] x)
+ {
+ for (double val : x)
+ if (val < 0)
+ return false;
+
+ return true;
+ }
+
+ /*
+ * public static long nearestPow2(long x) { double logX =
+ * Math.log10(x)/Math.log10(2); if (Math.round(logX)<=logX) return x; else
+ * return (int)Math.pow(2, Math.floor(logX+1)); }
+ */
+
+ public final static double laplanceProbabilty(double x, double b)
+ {
+ return 1.0/(2.0*b)*Math.exp(-Math.abs(x)/b);
+ }
+
+ /**
+ * Matrix to array.
+ *
+ * @param A
+ * the a
+ * @return the double[]
+ */
+ public static double[] matrixToArray(double A[][])
+ {
+ double[] val = new double[A.length * A[0].length];
+
+ for (int iterRow = 0; iterRow < A.length; iterRow++)
+ {
+ for (int iterColumn = 0; iterColumn < A[iterRow].length; iterColumn++)
+ val[iterRow * A[0].length] = A[iterRow][iterColumn];
+ }
+
+ return val;
+ }
+
+ /**
+ * Max.
+ *
+ * @param a
+ * the a
+ * @return the double
+ */
+ public final static double max(final double[] a)
+ {
+ double val = a[0];
+ for (double elem : a)
+ val = Math.max(val, elem);
+
+ return val;
+ }
+
+ /**
+ * Min.
+ *
+ * @param a
+ * the a
+ * @return the double
+ */
+ public final static double min(final double[] a)
+ {
+ double val = a[0];
+ for (double elem : a)
+ val = Math.min(val, elem);
+
+ return val;
+ }
+
+ /**
+ * Mult.
+ *
+ * @param a
+ * the a
+ * @param b
+ * the b
+ * @return the double[]
+ */
+ public final static double[] mult(final double[] a, final double b)
+ {
+ double[] val = new double[a.length];
+
+ for (int iter = 0; iter < a.length; iter++)
+ val[iter] = a[iter] * b;
+
+ return val;
+ }
+
+ /**
+ * Mult.
+ *
+ * @param a
+ * the a
+ * @param b
+ * the b
+ * @return the double[]
+ */
+ public final static double[] mult(final double[] a, final double[] b)
+ {
+ if (a == null || b == null)
+ throw new MathRuntimeException("Arrays cannot be null.");
+
+ if (a.length != b.length)
+ throw new MathRuntimeException("Arrays must be of equal length.");
+
+ double[] val = new double[a.length];
+
+ for (int iter = 0; iter < a.length; iter++)
+ val[iter] = a[iter] * b[iter];
+
+ return val;
+ }
+
+ /**
+ * Mult.
+ *
+ * @param A
+ * the a
+ * @param b
+ * the b
+ * @return the double[][]
+ */
+ public final static double[][] mult(final double[][] A, final double b)
+ {
+ double[][] X = new double[A.length][A[0].length];
+
+ for (int iterRow = 0; iterRow < A.length; iterRow++)
+ {
+ for (int iterColumn = 0; iterColumn < A[iterRow].length; iterColumn++)
+ {
+ X[iterRow][iterColumn] = A[iterRow][iterColumn] * b;
+ }
+ }
+
+ return X;
+ }
+
+ public final static double[] mult(final double[][] A, final double[] b)
+ {
+ if (A == null || b == null)
+ throw new java.lang.NullPointerException("Values cannot be null.");
+
+ if (A[0].length != b.length)
+ throw new MathRuntimeException("Matrix dimension [" + A.length + ", " + A[0].length
+ + "] does not match vector length " + b.length + ".");
+
+ double[] x = new double[A.length];
+
+ for (int iterRow = 0; iterRow < A.length; iterRow++)
+ {
+ x[iterRow] = 0.0;
+ for (int iterColumn = 0; iterColumn < A[iterRow].length; iterColumn++)
+ {
+ x[iterRow] += A[iterRow][iterColumn] * b[iterColumn];
+ }
+ }
+
+ return x;
+ }
+
+ public final static double[][] mult(final double[][] A, final double[][] B)
+ {
+ if (A == null || B == null)
+ throw new java.lang.NullPointerException("Matrices cannot be null.");
+
+ if (A[0].length != B.length)
+ throw new MathRuntimeException("Matrices' dimensions do not match.");
+
+ double[][] C = new double[A.length][B[0].length];
+
+ for (int row = 0; row < A.length; row++)
+ {
+ for (int col = 0; col < B[0].length; col++)
+ {
+ C[row][col] = 0.0;
+ for (int iter = 0; iter < B.length; iter++)
+ C[row][col] += A[row][iter] * B[iter][col];
+ }
+ }
+
+ return C;
+ }
+
+ public final static double[] multTranspose(final double[][] A, final double[] x)
+ {
+ double[] value = new double[A[0].length];
+
+ for (int iterRow = 0; iterRow < A[0].length; iterRow++)
+ {
+ value[iterRow] = 0.0;
+ for (int iterColumn = 0; iterColumn < A.length; iterColumn++)
+ {
+ value[iterRow] += A[iterColumn][iterRow] * x[iterColumn];
+ }
+ }
+
+ return value;
+ }
+
+ public final static double[][] multTranspose(double[][] A, double[][] B)
+ {
+ if (A == null || B == null)
+ throw new java.lang.NullPointerException("Matrices cannot be null.");
+
+ if (A.length != B.length)
+ throw new MathRuntimeException("Matrices' dimensions do not match.");
+
+ double[][] C = new double[A[0].length][B[0].length];
+
+ for (int colA = 0; colA < A[0].length; colA++)
+ {
+ for (int colB = 0; colB < B[0].length; colB++)
+ {
+ C[colA][colB] = 0.0;
+ for (int iter = 0; iter < A.length; iter++)
+ C[colA][colB] += A[iter][colA] * B[iter][colB];
+ }
+ }
+
+ return C;
+ }
+
+ /**
+ * Nearest multiple.
+ *
+ * @param n
+ * the n
+ * @param base
+ * the base
+ * @return the int
+ */
+ public final static int nearestMultiple(int n, int base)
+ {
+ int x = n / base;
+
+ if (x * base == n)
+ return n;
+ else
+ return x * base + base;
+ }
+
+ public final static int[] nonZeroIndicies(final double[] x, final double absTolerance)
+ {
+ // count the number of elements
+ int size = 0;
+ for (int iter = 0; iter < x.length; iter++)
+ if (Math.abs(x[iter]) > absTolerance)
+ size++;
+
+ // record the elements
+ int[] list = new int[size];
+ size = 0;
+ for (int iter = 0; iter < x.length; iter++)
+ if (Math.abs(x[iter]) > absTolerance)
+ {
+ list[size] = iter;
+ size++;
+ }
+
+ return list;
+ }
+
+ public final static double[] nonZeroValues(final double[] x, final double absTolerance)
+ {
+ int[] list = nonZeroIndicies(x, absTolerance);
+ double[] xnew = new double[list.length];
+
+ int count = 0;
+ for (int index : list)
+ {
+ xnew[count] = x[index];
+ count++;
+ }
+
+ return xnew;
+ }
+
+ /*
+ * public static double[] legendrePolynomial(int n, double x) throws
+ * ArithmeticException { double P[] = new double[n + 1];
+ *
+ * P[0] = 1.0; P[1] = x;
+ *
+ * for (int m = 1; m < n - 1; m++) { P[m + 1] = ((2.0 * m + 1.0) * x * P[m]
+ * - m * P[m - 1]) / (m + 1.0); }
+ *
+ * return P; }
+ */
+
+ /*
+ * static public double[] normalizedlegendrePolynomial(int n, double x)
+ * throws ArithmeticException { double[] P = legendrePolynomial(n, x);
+ *
+ * double norm = BasicMath.sqrt(n + .5); double sign = -1;
+ *
+ * for (int m = 0; m < P.length; m++) { P[m] = sign * P[m] / norm; sign =
+ * -sign; }
+ *
+ * return P; }
+ */
+
+ /**
+ * Norm.
+ *
+ * @param a
+ * the a
+ * @return the double
+ */
+ public static double norm(double[] a)
+ {
+ return BasicMath.sqrt(normSquared(a));
+ }
+
+ public final static double normalizedDotProduct(final double[] a, final double[] b)
+ {
+ return dotProduct(a, b) / (norm(a) * norm(b));
+ }
+
+ /**
+ * Norm squared.
+ *
+ * @param a
+ * the a
+ * @return the double
+ */
+ public final static double normSquared(final double[] a)
+ {
+ double r = 0.0;
+
+ for (double elem : a)
+ r += elem * elem;
+
+ return r;
+ }
+
+ /**
+ * Round to nearest.
+ *
+ * @param x
+ * the x
+ * @param n
+ * the n
+ * @return the double
+ */
+ public final static double roundToNearest(final double x, final int n)
+ {
+ double shift = Math.pow(10, n);
+ return Math.round(x * shift) / shift;
+ }
+
+ /**
+ * Sin.
+ *
+ * @param angle
+ * the angle
+ * @return the double
+ */
+ public final static double sin(final double angle)
+ {
+ return FastMath.sin(angle);
+ }
+
+ /**
+ * Sinc.
+ *
+ * @param x
+ * the x
+ * @return the double
+ */
+ public final static double sinc(final double x)
+ {
+ return (x == 0 || (x < 1.0e-8 && x > -1.0e-8)) ? 1.0 : sin(x) / x;
+ }
+
+ /**
+ * Sqrt.
+ *
+ * @param a
+ * the a
+ * @return the double
+ */
+ public final static double sqrt(final double a)
+ {
+ return FastMath.sqrt(a);
+ }
+
+ /**
+ * Square.
+ *
+ * @param a
+ * the a
+ * @return the double
+ */
+ public final static double square(final double a)
+ {
+ return a * a;
+ }
+
+ /**
+ * Square.
+ *
+ * @param a
+ * the a
+ * @return the double[]
+ */
+ public final static double[] square(final double[] a)
+ {
+ double[] val = new double[a.length];
+
+ for (int iter = 0; iter < a.length; iter++)
+ val[iter] = a[iter] * a[iter];
+
+ return val;
+ }
+
+ /**
+ * Subtract.
+ *
+ * @param a
+ * the a
+ * @param b
+ * the b
+ * @return the double[]
+ */
+ public final static double[] subtract(final double[] a, final double b)
+ {
+ double[] val = new double[a.length];
+
+ for (int iter = 0; iter < a.length; iter++)
+ val[iter] = a[iter] - b;
+
+ return val;
+ }
+
+ /**
+ * Subtract.
+ *
+ * @param a
+ * the a
+ * @param b
+ * the b
+ * @return the double[]
+ */
+ public final static double[] subtract(final double[] a, final double[] b)
+ {
+ if (a.length != b.length)
+ throw new MathRuntimeException("Vectors must be of same length.");
+
+ double[] val = new double[a.length];
+
+ for (int iter = 0; iter < a.length; iter++)
+ val[iter] = a[iter] - b[iter];
+
+ return val;
+ }
+
+ public final static double sum(final double[] a)
+ {
+ if (a == null)
+ return 0.0;
+
+ double sum = 0.0;
+ for (double val : a)
+ sum += val;
+
+ return sum;
+ }
+
+ public final static double[][] transpose(double[][] A)
+ {
+ if (A == null)
+ return null;
+
+ double[][] At = new double[A[0].length][A.length];
+
+ for (int row = 0; row < A.length; row++)
+ for (int col = 0; col < A[row].length; col++)
+ At[col][row] = A[row][col];
+
+ return At;
+ }
+
+}
\ No newline at end of file
diff --git a/src/main/java/edu/umd/marbl/mhap/math/FastMath.java b/src/main/java/edu/umd/marbl/mhap/math/FastMath.java
new file mode 100644
index 0000000..3a470b3
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/math/FastMath.java
@@ -0,0 +1,3379 @@
+/*
+ * Copyright 2012 Jeff Hain
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * =============================================================================
+ * Notice of fdlibm package this program is partially derived from:
+ *
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * =============================================================================
+ */
+package edu.umd.marbl.mhap.math;
+
+/**
+ * Class providing math treatments that: - are meant to be faster than those of
+ * java.lang.Math class (depending on JVM or JVM options, they might be slower),
+ * - are still somehow accurate and robust (handling of NaN and such), - do not
+ * (or not directly) generate objects at run time (no "new").
+ *
+ * Other than optimized treatments, a valuable feature of this class is the
+ * presence of angles normalization methods, derived from those used in
+ * java.lang.Math (for which, sadly, no API is provided, letting everyone with
+ * the terrible responsibility to write their own ones).
+ *
+ * Non-redefined methods of java.lang.Math class are also available, for easy
+ * replacement.
+ *
+ * Use of look-up tables: around 1 Mo total, and initialized on class load.
+ *
+ * - Methods with same signature than Math ones, are meant to return "good"
+ * approximations on all range. - Methods terminating with "Fast" are meant to
+ * return "good" approximation on a reduced range only. - Methods terminating
+ * with "Quick" are meant to be quick, but do not return a good approximation,
+ * and might only work on a reduced range.
+ *
+ * Properties:
+ *
+ * - jodk.fastmath.strict (boolean, default is true): If true, non-redefined
+ * Math methods which results could vary between Math and StrictMath, delegate
+ * to StrictMath, and if false, to Math. Default is true to ensure consistency
+ * across various architectures.
+ *
+ * - jodk.fastmath.usejdk (boolean, default is false): If true, redefined Math
+ * methods, as well as their "Fast" or "Quick" terminated counterparts, delegate
+ * to StrictMath or Math, depending on jodk.fastmath.strict property.
+ *
+ * - jodk.fastmath.fastlog (boolean, default is true): If true, using redefined
+ * log(double), if false using StrictMath.log(double) or Math.log(double),
+ * depending on jodk.fastmath.strict property. Default is true because
+ * jodk.fastmath.strict is true by default, and StrictMath.log(double) seems
+ * usually slow.
+ *
+ * - jodk.fastmath.fastsqrt (boolean, default is false): If true, using
+ * redefined sqrt(double), if false using StrictMath.sqrt(double) or
+ * Math.sqrt(double), depending on jodk.fastmath.strict property. Default is
+ * false because StrictMath.sqrt(double) seems to usually delegate to hardware
+ * sqrt.
+ *
+ * Unless jodk.fastmath.strict is false and jodk.fastmath.usejdk is true, these
+ * treatments are consistent across various architectures, for constants and
+ * look-up tables are computed with StrictMath, or exact Math methods.
+ *
+ * --- words, words, words ---
+ *
+ * "0x42BE0000 percents of the folks out there are completely clueless about
+ * floating-point."
+ *
+ * The difference between precision and accuracy: "3.177777777777777 is a
+ * precise (16 digits) but inaccurate (only correct up to the second digit)
+ * approximation of PI=3.141592653589793(etc.)."
+ */
+public strictfp final class FastMath
+{
+
+ /*
+ * For trigonometric functions, use of look-up tables and Taylor-Lagrange
+ * formula with 4 derivatives (more take longer to compute and don't add
+ * much accuracy, less require larger tables (which use more memory, take
+ * more time to initialize, and are slower to access (at least on the
+ * machine they were developed on))).
+ *
+ * For angles reduction of cos/sin/tan functions: - for small values,
+ * instead of reducing angles, and then computing the best index for look-up
+ * tables, we compute this index right away, and use it for reduction, - for
+ * large values, treatments derived from fdlibm package are used, as done in
+ * java.lang.Math. They are faster but still "slow", so if you work with
+ * large numbers and need speed over accuracy for them, you might want to
+ * use normalizeXXXFast treatments before your function, or modify
+ * cos/sin/tan so that they call the fast normalization treatments instead
+ * of the accurate ones. NB: If an angle is huge (like PI*1e20), in double
+ * precision format its last digits are zeros, which most likely is not the
+ * case for the intended value, and doing an accurate reduction on a very
+ * inaccurate value is most likely pointless. But it gives some sort of
+ * coherence that could be needed in some cases.
+ *
+ * Multiplication on double appears to be about as fast (or not much slower)
+ * than call to <double_array>[<index>], and regrouping some doubles in a
+ * private class, to use index only once, does not seem to speed things up,
+ * so: - for uniformly tabulated values, to retrieve the parameter
+ * corresponding to an index, we recompute it rather than using an array to
+ * store it, - for cos/sin, we recompute derivatives divided by (multiplied
+ * by inverse of) factorial each time, rather than storing them in arrays.
+ *
+ * Lengths of look-up tables are usually of the form 2^n+1, for their values
+ * to be of the form (<a_constant> * k/2^n, k in 0 .. 2^n), so that
+ * particular values (PI/2, etc.) are "exactly" computed, as well as for
+ * other reasons.
+ *
+ * Most math treatments I could find on the web, including "fast" ones,
+ * usually take care of special cases (NaN, etc.) at the beginning, and then
+ * deal with the general case, which adds a useless overhead for the general
+ * (and common) case. In this class, special cases are only dealt with when
+ * needed, and if the general case does not already handle them.
+ */
+
+ // --------------------------------------------------------------------------
+ // CONFIGURATION
+ // --------------------------------------------------------------------------
+
+ private static final boolean STRICT_MATH = true;
+
+ private static final boolean USE_JDK_MATH = false;
+
+ /**
+ * Used for both log(double) and log10(double).
+ */
+ // private static final boolean USE_REDEFINED_LOG = true;
+
+ private static final boolean USE_REDEFINED_SQRT = false;
+
+ // Set it to true if FastMath.sqrt(double) is slow (more tables, but less
+ // calls to FastMath.sqrt(double)).
+ private static final boolean USE_POWTABS_FOR_ASIN = false;
+
+ // --------------------------------------------------------------------------
+ // GENERAL CONSTANTS
+ // --------------------------------------------------------------------------
+
+ /**
+ * High approximation of PI, which is further from PI than the low
+ * approximation Math.PI: PI ~= 3.14159265358979323846... Math.PI ~=
+ * 3.141592653589793 FastMath.PI_SUP ~= 3.1415926535897936
+ */
+ public static final double PI_SUP = Math.nextUp(Math.PI);
+
+ private static final double ONE_DIV_F2 = 1 / 2.0;
+ private static final double ONE_DIV_F3 = 1 / 6.0;
+ private static final double ONE_DIV_F4 = 1 / 24.0;
+
+ private static final double TWO_POW_24 = Double.longBitsToDouble(0x4170000000000000L);
+ private static final double TWO_POW_N24 = Double.longBitsToDouble(0x3E70000000000000L);
+
+ private static final double TWO_POW_26 = Double.longBitsToDouble(0x4190000000000000L);
+ private static final double TWO_POW_N26 = Double.longBitsToDouble(0x3E50000000000000L);
+
+ // First double value (from zero) such as (value+-1/value == value).
+ private static final double TWO_POW_27 = Double.longBitsToDouble(0x41A0000000000000L);
+ private static final double TWO_POW_N27 = Double.longBitsToDouble(0x3E40000000000000L);
+
+ private static final double TWO_POW_N28 = Double.longBitsToDouble(0x3E30000000000000L);
+
+ private static final double TWO_POW_52 = Double.longBitsToDouble(0x4330000000000000L);
+
+ private static final double TWO_POW_N54 = Double.longBitsToDouble(0x3C90000000000000L);
+
+ private static final double TWO_POW_N55 = Double.longBitsToDouble(0x3C80000000000000L);
+
+ private static final double TWO_POW_66 = Double.longBitsToDouble(0x4410000000000000L);
+
+ private static final double TWO_POW_450 = Double.longBitsToDouble(0x5C10000000000000L);
+ private static final double TWO_POW_N450 = Double.longBitsToDouble(0x23D0000000000000L);
+
+ private static final double TWO_POW_750 = Double.longBitsToDouble(0x6ED0000000000000L);
+ private static final double TWO_POW_N750 = Double.longBitsToDouble(0x1110000000000000L);
+
+ // Smallest double normal value.
+ private static final double MIN_DOUBLE_NORMAL = Double.longBitsToDouble(0x0010000000000000L); // 2.2250738585072014E-308
+
+ private static final int MIN_DOUBLE_EXPONENT = -1074;
+ private static final int MAX_DOUBLE_EXPONENT = 1023;
+
+ private static final int MAX_FLOAT_EXPONENT = 127;
+
+ private static final double LOG_2 = StrictMath.log(2.0);
+ private static final double LOG_TWO_POW_27 = StrictMath.log(TWO_POW_27);
+ private static final double LOG_DOUBLE_MAX_VALUE = StrictMath.log(Double.MAX_VALUE);
+
+ private static final double INV_LOG_10 = 1.0 / StrictMath.log(10.0);
+
+ // private static final double DOUBLE_BEFORE_60 = Math.nextAfter(60.0, 0.0);
+
+ // --------------------------------------------------------------------------
+ // CONSTANTS FOR NORMALIZATIONS
+ // --------------------------------------------------------------------------
+
+ /*
+ * Table of constants for 1/(2*PI), 282 Hex digits (enough for normalizing
+ * doubles). 1/(2*PI) approximation = sum of
+ * ONE_OVER_TWOPI_TAB[i]*2^(-24*(i+1)).
+ */
+ private static final double ONE_OVER_TWOPI_TAB[] = { 0x28BE60, 0xDB9391, 0x054A7F, 0x09D5F4, 0x7D4D37, 0x7036D8,
+ 0xA5664F, 0x10E410, 0x7F9458, 0xEAF7AE, 0xF1586D, 0xC91B8E, 0x909374, 0xB80192, 0x4BBA82, 0x746487,
+ 0x3F877A, 0xC72C4A, 0x69CFBA, 0x208D7D, 0x4BAED1, 0x213A67, 0x1C09AD, 0x17DF90, 0x4E6475, 0x8E60D4,
+ 0xCE7D27, 0x2117E2, 0xEF7E4A, 0x0EC7FE, 0x25FFF7, 0x816603, 0xFBCBC4, 0x62D682, 0x9B47DB, 0x4D9FB3,
+ 0xC9F2C2, 0x6DD3D1, 0x8FD9A7, 0x97FA8B, 0x5D49EE, 0xB1FAF9, 0x7C5ECF, 0x41CE7D, 0xE294A4, 0xBA9AFE,
+ 0xD7EC47 };
+
+ /*
+ * Constants for 2*PI. Only the 23 most significant bits of each mantissa
+ * are used. 2*PI approximation = sum of TWOPI_TAB<i>.
+ */
+ private static final double TWOPI_TAB0 = Double.longBitsToDouble(0x401921FB40000000L);
+ private static final double TWOPI_TAB1 = Double.longBitsToDouble(0x3E94442D00000000L);
+ private static final double TWOPI_TAB2 = Double.longBitsToDouble(0x3D18469880000000L);
+ private static final double TWOPI_TAB3 = Double.longBitsToDouble(0x3B98CC5160000000L);
+ private static final double TWOPI_TAB4 = Double.longBitsToDouble(0x3A101B8380000000L);
+
+ private static final double INVPIO2 = Double.longBitsToDouble(0x3FE45F306DC9C883L); // 6.36619772367581382433e-01
+ // 53
+ // bits
+ // of
+ // 2/pi
+ private static final double PIO2_HI = Double.longBitsToDouble(0x3FF921FB54400000L); // 1.57079632673412561417e+00
+ // first
+ // 33
+ // bits
+ // of
+ // pi/2
+ private static final double PIO2_LO = Double.longBitsToDouble(0x3DD0B4611A626331L); // 6.07710050650619224932e-11
+ // pi/2
+ // -
+ // PIO2_HI
+ private static final double INVTWOPI = INVPIO2 / 4;
+ private static final double TWOPI_HI = 4 * PIO2_HI;
+ private static final double TWOPI_LO = 4 * PIO2_LO;
+
+ // fdlibm uses 2^19*PI/2 here, but we normalize with % 2*PI instead of %
+ // PI/2,
+ // and we can bear some more error.
+ private static final double NORMALIZE_ANGLE_MAX_MEDIUM_DOUBLE = StrictMath.pow(2, 20) * (2 * Math.PI);
+
+ /**
+ * 2*Math.PI, normalized into [-PI,PI]. Computed using
+ * normalizeMinusPiPi(double).
+ */
+ private static final double TWO_MATH_PI_IN_MINUS_PI_PI = -2.449293598153844E-16;
+
+ // --------------------------------------------------------------------------
+ // CONSTANTS AND TABLES FOR COS, SIN
+ // --------------------------------------------------------------------------
+
+ private static final int SIN_COS_TABS_SIZE = (1 << getTabSizePower(11)) + 1;
+ private static final double SIN_COS_DELTA_HI = TWOPI_HI / (SIN_COS_TABS_SIZE - 1);
+ private static final double SIN_COS_DELTA_LO = TWOPI_LO / (SIN_COS_TABS_SIZE - 1);
+ private static final double SIN_COS_INDEXER = 1 / (SIN_COS_DELTA_HI + SIN_COS_DELTA_LO);
+ private static final double[] sinTab = new double[SIN_COS_TABS_SIZE];
+ private static final double[] cosTab = new double[SIN_COS_TABS_SIZE];
+
+ // Max abs value for fast modulo, above which we use regular angle
+ // normalization.
+ // This value must be < (Integer.MAX_VALUE / SIN_COS_INDEXER), to stay in
+ // range of int type.
+ // The higher it is, the higher the error, but also the faster it is for
+ // lower values.
+ // If you set it to ((Integer.MAX_VALUE / SIN_COS_INDEXER) * 0.99), worse
+ // accuracy on double range is about 1e-10.
+ private static final double SIN_COS_MAX_VALUE_FOR_INT_MODULO = ((Integer.MAX_VALUE >> 9) / SIN_COS_INDEXER) * 0.99;
+
+ // --------------------------------------------------------------------------
+ // CONSTANTS AND TABLES FOR TAN
+ // --------------------------------------------------------------------------
+
+ // We use the following formula:
+ // 1) tan(-x) = -tan(x)
+ // 2) tan(x) = 1/tan(PI/2-x)
+ // ---> we only have to compute tan(x) on [0,A] with PI/4<=A<PI/2.
+
+ // We use indexing past look-up tables, so that indexing information
+ // allows for fast recomputation of angle in [0,PI/2] range.
+ private static final int TAN_VIRTUAL_TABS_SIZE = (1 << getTabSizePower(12)) + 1;
+
+ // Must be >= 45deg, and supposed to be >= 51.4deg, as fdlibm code is not
+ // supposed to work with values inferior to that (51.4deg is about
+ // (PI/2-Double.longBitsToDouble(0x3FE5942800000000L))).
+ private static final double TAN_MAX_VALUE_FOR_TABS = Math.toRadians(77.0);
+
+ private static final int TAN_TABS_SIZE = (int) ((TAN_MAX_VALUE_FOR_TABS / (Math.PI / 2)) * (TAN_VIRTUAL_TABS_SIZE - 1)) + 1;
+ private static final double TAN_DELTA_HI = PIO2_HI / (TAN_VIRTUAL_TABS_SIZE - 1);
+ private static final double TAN_DELTA_LO = PIO2_LO / (TAN_VIRTUAL_TABS_SIZE - 1);
+ private static final double TAN_INDEXER = 1 / (TAN_DELTA_HI + TAN_DELTA_LO);
+ private static final double[] tanTab = new double[TAN_TABS_SIZE];
+ private static final double[] tanDer1DivF1Tab = new double[TAN_TABS_SIZE];
+ private static final double[] tanDer2DivF2Tab = new double[TAN_TABS_SIZE];
+ private static final double[] tanDer3DivF3Tab = new double[TAN_TABS_SIZE];
+ private static final double[] tanDer4DivF4Tab = new double[TAN_TABS_SIZE];
+
+ // Max abs value for fast modulo, above which we use regular angle
+ // normalization.
+ // This value must be < (Integer.MAX_VALUE / TAN_INDEXER), to stay in range
+ // of int type.
+ // The higher it is, the higher the error, but also the faster it is for
+ // lower values.
+ private static final double TAN_MAX_VALUE_FOR_INT_MODULO = (((Integer.MAX_VALUE >> 9) / TAN_INDEXER) * 0.99);
+
+ // --------------------------------------------------------------------------
+ // CONSTANTS AND TABLES FOR ACOS, ASIN
+ // --------------------------------------------------------------------------
+
+ // We use the following formula:
+ // 1) acos(x) = PI/2 - asin(x)
+ // 2) asin(-x) = -asin(x)
+ // ---> we only have to compute asin(x) on [0,1].
+ // For values not close to +-1, we use look-up tables;
+ // for values near +-1, we use code derived from fdlibm.
+
+ // Supposed to be >= sin(77.2deg), as fdlibm code is supposed to work with
+ // values > 0.975,
+ // but seems to work well enough as long as value >= sin(25deg).
+ private static final double ASIN_MAX_VALUE_FOR_TABS = StrictMath.sin(Math.toRadians(73.0));
+
+ private static final int ASIN_TABS_SIZE = (1 << getTabSizePower(13)) + 1;
+ private static final double ASIN_DELTA = ASIN_MAX_VALUE_FOR_TABS / (ASIN_TABS_SIZE - 1);
+ private static final double ASIN_INDEXER = 1 / ASIN_DELTA;
+ private static final double[] asinTab = new double[ASIN_TABS_SIZE];
+ private static final double[] asinDer1DivF1Tab = new double[ASIN_TABS_SIZE];
+ private static final double[] asinDer2DivF2Tab = new double[ASIN_TABS_SIZE];
+ private static final double[] asinDer3DivF3Tab = new double[ASIN_TABS_SIZE];
+ private static final double[] asinDer4DivF4Tab = new double[ASIN_TABS_SIZE];
+
+ private static final double ASIN_MAX_VALUE_FOR_POWTABS = StrictMath.sin(Math.toRadians(88.6));
+ private static final int ASIN_POWTABS_POWER = 84;
+
+ private static final double ASIN_POWTABS_ONE_DIV_MAX_VALUE = 1 / ASIN_MAX_VALUE_FOR_POWTABS;
+ private static final int ASIN_POWTABS_SIZE = USE_POWTABS_FOR_ASIN ? (1 << getTabSizePower(12)) + 1 : 0;
+ private static final int ASIN_POWTABS_SIZE_MINUS_ONE = ASIN_POWTABS_SIZE - 1;
+ private static final double[] asinParamPowTab = new double[ASIN_POWTABS_SIZE];
+ private static final double[] asinPowTab = new double[ASIN_POWTABS_SIZE];
+ private static final double[] asinDer1DivF1PowTab = new double[ASIN_POWTABS_SIZE];
+ private static final double[] asinDer2DivF2PowTab = new double[ASIN_POWTABS_SIZE];
+ private static final double[] asinDer3DivF3PowTab = new double[ASIN_POWTABS_SIZE];
+ private static final double[] asinDer4DivF4PowTab = new double[ASIN_POWTABS_SIZE];
+
+ private static final double ASIN_PIO2_HI = Double.longBitsToDouble(0x3FF921FB54442D18L); // 1.57079632679489655800e+00
+ private static final double ASIN_PIO2_LO = Double.longBitsToDouble(0x3C91A62633145C07L); // 6.12323399573676603587e-17
+ private static final double ASIN_PS0 = Double.longBitsToDouble(0x3fc5555555555555L); // 1.66666666666666657415e-01
+ private static final double ASIN_PS1 = Double.longBitsToDouble(0xbfd4d61203eb6f7dL); // -3.25565818622400915405e-01
+ private static final double ASIN_PS2 = Double.longBitsToDouble(0x3fc9c1550e884455L); // 2.01212532134862925881e-01
+ private static final double ASIN_PS3 = Double.longBitsToDouble(0xbfa48228b5688f3bL); // -4.00555345006794114027e-02
+ private static final double ASIN_PS4 = Double.longBitsToDouble(0x3f49efe07501b288L); // 7.91534994289814532176e-04
+ private static final double ASIN_PS5 = Double.longBitsToDouble(0x3f023de10dfdf709L); // 3.47933107596021167570e-05
+ private static final double ASIN_QS1 = Double.longBitsToDouble(0xc0033a271c8a2d4bL); // -2.40339491173441421878e+00
+ private static final double ASIN_QS2 = Double.longBitsToDouble(0x40002ae59c598ac8L); // 2.02094576023350569471e+00
+ private static final double ASIN_QS3 = Double.longBitsToDouble(0xbfe6066c1b8d0159L); // -6.88283971605453293030e-01
+ private static final double ASIN_QS4 = Double.longBitsToDouble(0x3fb3b8c5b12e9282L); // 7.70381505559019352791e-02
+
+ // --------------------------------------------------------------------------
+ // CONSTANTS AND TABLES FOR ATAN
+ // --------------------------------------------------------------------------
+
+ // We use the formula atan(-x) = -atan(x)
+ // ---> we only have to compute atan(x) on [0,+infinity[.
+ // For values corresponding to angles not close to +-PI/2, we use look-up
+ // tables;
+ // for values corresponding to angles near +-PI/2, we use code derived from
+ // fdlibm.
+
+ // Supposed to be >= tan(67.7deg), as fdlibm code is supposed to work with
+ // values > 2.4375.
+ private static final double ATAN_MAX_VALUE_FOR_TABS = StrictMath.tan(Math.toRadians(74.0));
+
+ private static final int ATAN_TABS_SIZE = (1 << getTabSizePower(12)) + 1;
+ private static final double ATAN_DELTA = ATAN_MAX_VALUE_FOR_TABS / (ATAN_TABS_SIZE - 1);
+ private static final double ATAN_INDEXER = 1 / ATAN_DELTA;
+ private static final double[] atanTab = new double[ATAN_TABS_SIZE];
+ private static final double[] atanDer1DivF1Tab = new double[ATAN_TABS_SIZE];
+ private static final double[] atanDer2DivF2Tab = new double[ATAN_TABS_SIZE];
+ private static final double[] atanDer3DivF3Tab = new double[ATAN_TABS_SIZE];
+ private static final double[] atanDer4DivF4Tab = new double[ATAN_TABS_SIZE];
+
+ private static final double ATAN_HI3 = Double.longBitsToDouble(0x3ff921fb54442d18L); // 1.57079632679489655800e+00
+ // atan(inf)hi
+ private static final double ATAN_LO3 = Double.longBitsToDouble(0x3c91a62633145c07L); // 6.12323399573676603587e-17
+ // atan(inf)lo
+ private static final double ATAN_AT0 = Double.longBitsToDouble(0x3fd555555555550dL); // 3.33333333333329318027e-01
+ private static final double ATAN_AT1 = Double.longBitsToDouble(0xbfc999999998ebc4L); // -1.99999999998764832476e-01
+ private static final double ATAN_AT2 = Double.longBitsToDouble(0x3fc24924920083ffL); // 1.42857142725034663711e-01
+ private static final double ATAN_AT3 = Double.longBitsToDouble(0xbfbc71c6fe231671L); // -1.11111104054623557880e-01
+ private static final double ATAN_AT4 = Double.longBitsToDouble(0x3fb745cdc54c206eL); // 9.09088713343650656196e-02
+ private static final double ATAN_AT5 = Double.longBitsToDouble(0xbfb3b0f2af749a6dL); // -7.69187620504482999495e-02
+ private static final double ATAN_AT6 = Double.longBitsToDouble(0x3fb10d66a0d03d51L); // 6.66107313738753120669e-02
+ private static final double ATAN_AT7 = Double.longBitsToDouble(0xbfadde2d52defd9aL); // -5.83357013379057348645e-02
+ private static final double ATAN_AT8 = Double.longBitsToDouble(0x3fa97b4b24760debL); // 4.97687799461593236017e-02
+ private static final double ATAN_AT9 = Double.longBitsToDouble(0xbfa2b4442c6a6c2fL); // -3.65315727442169155270e-02
+ private static final double ATAN_AT10 = Double.longBitsToDouble(0x3f90ad3ae322da11L); // 1.62858201153657823623e-02
+
+ // --------------------------------------------------------------------------
+ // CONSTANTS AND TABLES FOR EXP AND EXPM1
+ // --------------------------------------------------------------------------
+
+ private static final double EXP_OVERFLOW_LIMIT = Double.longBitsToDouble(0x40862E42FEFA39EFL); // 7.09782712893383973096e+02
+ private static final double EXP_UNDERFLOW_LIMIT = Double.longBitsToDouble(0xC0874910D52D3051L); // -7.45133219101941108420e+02
+ private static final double EXP_MIN_INT_LIMIT = -705;
+ private static final int EXP_LO_DISTANCE_TO_ZERO_POT = 0;
+ private static final int EXP_LO_DISTANCE_TO_ZERO = (1 << EXP_LO_DISTANCE_TO_ZERO_POT);
+ private static final int EXP_LO_TAB_SIZE_POT = getTabSizePower(11);
+ private static final int EXP_LO_TAB_SIZE = (1 << EXP_LO_TAB_SIZE_POT) + 1;
+ private static final int EXP_LO_TAB_MID_INDEX = ((EXP_LO_TAB_SIZE - 1) / 2);
+ private static final int EXP_LO_INDEXING = EXP_LO_TAB_MID_INDEX / EXP_LO_DISTANCE_TO_ZERO;
+ private static final int EXP_LO_INDEXING_DIV_SHIFT = EXP_LO_TAB_SIZE_POT - 1 - EXP_LO_DISTANCE_TO_ZERO_POT;
+ private static final double[] expHiTab = new double[1 + (int) EXP_OVERFLOW_LIMIT];
+ private static final double[] expHiInvTab = new double[1 - (int) EXP_UNDERFLOW_LIMIT];
+ private static final double[] expLoPosTab = new double[EXP_LO_TAB_SIZE];
+ private static final double[] expLoNegTab = new double[EXP_LO_TAB_SIZE];
+
+ // --------------------------------------------------------------------------
+ // CONSTANTS FOR QUICK EXP
+ // --------------------------------------------------------------------------
+
+ private static final double EXP_QUICK_A = TWO_POW_52 / LOG_2;
+ private static final double EXP_QUICK_B = MAX_DOUBLE_EXPONENT * TWO_POW_52;
+ private static final double EXP_QUICK_C = Math.ceil((StrictMath.log(LOG_2 + 2 / Math.E) - LOG_2 - StrictMath
+ .log(LOG_2)) * EXP_QUICK_A);
+
+ // --------------------------------------------------------------------------
+ // CONSTANTS AND TABLES FOR LOG AND LOG1P
+ // --------------------------------------------------------------------------
+
+ private static final int LOG_BITS = getTabSizePower(12);
+ private static final int LOG_TAB_SIZE = (1 << LOG_BITS);
+ private static final double[] logXLogTab = new double[LOG_TAB_SIZE];
+ private static final double[] logXTab = new double[LOG_TAB_SIZE];
+ private static final double[] logXInvTab = new double[LOG_TAB_SIZE];
+
+ // --------------------------------------------------------------------------
+ // TABLE FOR POWERS OF TWO
+ // --------------------------------------------------------------------------
+
+ private static final double[] twoPowTab = new double[(MAX_DOUBLE_EXPONENT - MIN_DOUBLE_EXPONENT) + 1];
+
+ // --------------------------------------------------------------------------
+ // CONSTANTS AND TABLES FOR SQRT
+ // --------------------------------------------------------------------------
+
+ private static final int SQRT_LO_BITS = getTabSizePower(12);
+ private static final int SQRT_LO_TAB_SIZE = (1 << SQRT_LO_BITS);
+ private static final double[] sqrtXSqrtHiTab = new double[MAX_DOUBLE_EXPONENT - MIN_DOUBLE_EXPONENT + 1];
+ private static final double[] sqrtXSqrtLoTab = new double[SQRT_LO_TAB_SIZE];
+ private static final double[] sqrtSlopeHiTab = new double[MAX_DOUBLE_EXPONENT - MIN_DOUBLE_EXPONENT + 1];
+ private static final double[] sqrtSlopeLoTab = new double[SQRT_LO_TAB_SIZE];
+
+ // --------------------------------------------------------------------------
+ // CONSTANTS AND TABLES FOR CBRT
+ // --------------------------------------------------------------------------
+
+ private static final int CBRT_LO_BITS = getTabSizePower(12);
+ private static final int CBRT_LO_TAB_SIZE = (1 << CBRT_LO_BITS);
+ // For CBRT_LO_BITS = 12:
+ // cbrtXCbrtLoTab[0] = 1.0.
+ // cbrtXCbrtLoTab[1] = cbrt(1. 000000000000
+ // 1111111111111111111111111111111111111111b)
+ // cbrtXCbrtLoTab[2] = cbrt(1. 000000000001
+ // 1111111111111111111111111111111111111111b)
+ // cbrtXCbrtLoTab[3] = cbrt(1. 000000000010
+ // 1111111111111111111111111111111111111111b)
+ // cbrtXCbrtLoTab[4] = cbrt(1. 000000000011
+ // 1111111111111111111111111111111111111111b)
+ // etc.
+ private static final double[] cbrtXCbrtHiTab = new double[MAX_DOUBLE_EXPONENT - MIN_DOUBLE_EXPONENT + 1];
+ private static final double[] cbrtXCbrtLoTab = new double[CBRT_LO_TAB_SIZE];
+ private static final double[] cbrtSlopeHiTab = new double[MAX_DOUBLE_EXPONENT - MIN_DOUBLE_EXPONENT + 1];
+ private static final double[] cbrtSlopeLoTab = new double[CBRT_LO_TAB_SIZE];
+
+ // --------------------------------------------------------------------------
+ // PUBLIC TREATMENTS
+ // --------------------------------------------------------------------------
+
+ /**
+ * @param angle
+ * Angle in radians.
+ * @return Angle cosine.
+ */
+ public static double cos(double angle)
+ {
+ if (USE_JDK_MATH)
+ {
+ return STRICT_MATH ? StrictMath.cos(angle) : Math.cos(angle);
+ }
+ angle = Math.abs(angle);
+ if (angle > SIN_COS_MAX_VALUE_FOR_INT_MODULO)
+ {
+ // Faster than using normalizeZeroTwoPi.
+ angle = remainderTwoPi(angle);
+ if (angle < 0.0)
+ {
+ angle += 2 * Math.PI;
+ }
+ }
+ // index: possibly outside tables range.
+ int index = (int) (angle * SIN_COS_INDEXER + 0.5);
+ double delta = (angle - index * SIN_COS_DELTA_HI) - index * SIN_COS_DELTA_LO;
+ // Making sure index is within tables range.
+ // Last value of each table is the same than first, so we ignore it
+ // (tabs size minus one) for modulo.
+ index &= (SIN_COS_TABS_SIZE - 2); // index % (SIN_COS_TABS_SIZE-1)
+ double indexCos = cosTab[index];
+ double indexSin = sinTab[index];
+ return indexCos
+ + delta
+ * (-indexSin + delta
+ * (-indexCos * ONE_DIV_F2 + delta * (indexSin * ONE_DIV_F3 + delta * indexCos * ONE_DIV_F4)));
+ }
+
+ /**
+ * Quick cosine, with accuracy of about 1.6e-3 (PI/<look-up tabs size>) for
+ * |angle| < 6588397.0 (Integer.MAX_VALUE * (2*PI/<look-up tabs size>)), and
+ * no accuracy at all for larger values.
+ *
+ * @param angle
+ * Angle in radians.
+ * @return Angle cosine.
+ */
+ public static double cosQuick(double angle)
+ {
+ if (USE_JDK_MATH)
+ {
+ return STRICT_MATH ? StrictMath.cos(angle) : Math.cos(angle);
+ }
+ return cosTab[((int) (Math.abs(angle) * SIN_COS_INDEXER + 0.5)) & (SIN_COS_TABS_SIZE - 2)];
+ }
+
+ /**
+ * @param angle
+ * Angle in radians.
+ * @return Angle sine.
+ */
+ public static double sin(double angle)
+ {
+ if (USE_JDK_MATH)
+ {
+ return STRICT_MATH ? StrictMath.sin(angle) : Math.sin(angle);
+ }
+ boolean negateResult;
+ if (angle < 0.0)
+ {
+ angle = -angle;
+ negateResult = true;
+ }
+ else
+ {
+ negateResult = false;
+ }
+ if (angle > SIN_COS_MAX_VALUE_FOR_INT_MODULO)
+ {
+ // Faster than using normalizeZeroTwoPi.
+ angle = remainderTwoPi(angle);
+ if (angle < 0.0)
+ {
+ angle += 2 * Math.PI;
+ }
+ }
+ int index = (int) (angle * SIN_COS_INDEXER + 0.5);
+ double delta = (angle - index * SIN_COS_DELTA_HI) - index * SIN_COS_DELTA_LO;
+ index &= (SIN_COS_TABS_SIZE - 2); // index % (SIN_COS_TABS_SIZE-1)
+ double indexSin = sinTab[index];
+ double indexCos = cosTab[index];
+ double result = indexSin
+ + delta
+ * (indexCos + delta
+ * (-indexSin * ONE_DIV_F2 + delta * (-indexCos * ONE_DIV_F3 + delta * indexSin * ONE_DIV_F4)));
+ return negateResult ? -result : result;
+ }
+
+ /**
+ * Quick sine, with accuracy of about 1.6e-3 (PI/<look-up tabs size>) for
+ * |angle| < 6588397.0 (Integer.MAX_VALUE * (2*PI/<look-up tabs size>)), and
+ * no accuracy at all for larger values.
+ *
+ * @param angle
+ * Angle in radians.
+ * @return Angle sine.
+ */
+ public static double sinQuick(double angle)
+ {
+ if (USE_JDK_MATH)
+ {
+ return STRICT_MATH ? StrictMath.sin(angle) : Math.sin(angle);
+ }
+ return cosTab[((int) (Math.abs(angle - Math.PI / 2) * SIN_COS_INDEXER + 0.5)) & (SIN_COS_TABS_SIZE - 2)];
+ }
+
+ /**
+ * @param angle
+ * Angle in radians.
+ * @return Angle tangent.
+ */
+ public static double tan(double angle)
+ {
+ if (USE_JDK_MATH)
+ {
+ return STRICT_MATH ? StrictMath.tan(angle) : Math.tan(angle);
+ }
+ if (Math.abs(angle) > TAN_MAX_VALUE_FOR_INT_MODULO)
+ {
+ // Faster than using normalizeMinusHalfPiHalfPi.
+ angle = remainderTwoPi(angle);
+ if (angle < -Math.PI / 2)
+ {
+ angle += Math.PI;
+ }
+ else if (angle > Math.PI / 2)
+ {
+ angle -= Math.PI;
+ }
+ }
+ boolean negateResult;
+ if (angle < 0.0)
+ {
+ angle = -angle;
+ negateResult = true;
+ }
+ else
+ {
+ negateResult = false;
+ }
+ int index = (int) (angle * TAN_INDEXER + 0.5);
+ double delta = (angle - index * TAN_DELTA_HI) - index * TAN_DELTA_LO;
+ // index modulo PI, i.e. 2*(virtual tab size minus one).
+ index &= (2 * (TAN_VIRTUAL_TABS_SIZE - 1) - 1); // index %
+ // (2*(TAN_VIRTUAL_TABS_SIZE-1))
+ // Here, index is in [0,2*(TAN_VIRTUAL_TABS_SIZE-1)-1], i.e. indicates
+ // an angle in [0,PI[.
+ if (index > (TAN_VIRTUAL_TABS_SIZE - 1))
+ {
+ index = (2 * (TAN_VIRTUAL_TABS_SIZE - 1)) - index;
+ delta = -delta;
+ negateResult = !negateResult;
+ }
+ double result;
+ if (index < TAN_TABS_SIZE)
+ {
+ result = tanTab[index]
+ + delta
+ * (tanDer1DivF1Tab[index] + delta
+ * (tanDer2DivF2Tab[index] + delta
+ * (tanDer3DivF3Tab[index] + delta * tanDer4DivF4Tab[index])));
+ }
+ else
+ { // angle in ]TAN_MAX_VALUE_FOR_TABS,TAN_MAX_VALUE_FOR_INT_MODULO], or
+ // angle is NaN
+ // Using tan(angle) == 1/tan(PI/2-angle) formula: changing angle
+ // (index and delta), and inverting.
+ index = (TAN_VIRTUAL_TABS_SIZE - 1) - index;
+ result = 1 / (tanTab[index] - delta
+ * (tanDer1DivF1Tab[index] - delta
+ * (tanDer2DivF2Tab[index] - delta
+ * (tanDer3DivF3Tab[index] - delta * tanDer4DivF4Tab[index]))));
+ }
+ return negateResult ? -result : result;
+ }
+
+ /**
+ * @param value
+ * Value in [-1,1].
+ * @return Value arccosine, in radians, in [0,PI].
+ */
+ public static double acos(double value)
+ {
+ if (USE_JDK_MATH)
+ {
+ return STRICT_MATH ? StrictMath.acos(value) : Math.acos(value);
+ }
+ return Math.PI / 2 - FastMath.asin(value);
+ }
+
+ /**
+ * If value is not NaN and is outside [-1,1] range, closest value in this
+ * range is used.
+ *
+ * @param value
+ * Value in [-1,1].
+ * @return Value arccosine, in radians, in [0,PI].
+ */
+ public static double acosInRange(double value)
+ {
+ if (value <= -1)
+ {
+ return Math.PI;
+ }
+ else if (value >= 1)
+ {
+ return 0.0;
+ }
+ else
+ {
+ return FastMath.acos(value);
+ }
+ }
+
+ /**
+ * @param value
+ * Value in [-1,1].
+ * @return Value arcsine, in radians, in [-PI/2,PI/2].
+ */
+ @SuppressWarnings("unused")
+ public static double asin(double value)
+ {
+ if (USE_JDK_MATH)
+ {
+ return STRICT_MATH ? StrictMath.asin(value) : Math.asin(value);
+ }
+ boolean negateResult;
+ if (value < 0.0)
+ {
+ value = -value;
+ negateResult = true;
+ }
+ else
+ {
+ negateResult = false;
+ }
+ if (value <= ASIN_MAX_VALUE_FOR_TABS)
+ {
+ int index = (int) (value * ASIN_INDEXER + 0.5);
+ double delta = value - index * ASIN_DELTA;
+ double result = asinTab[index]
+ + delta
+ * (asinDer1DivF1Tab[index] + delta
+ * (asinDer2DivF2Tab[index] + delta
+ * (asinDer3DivF3Tab[index] + delta * asinDer4DivF4Tab[index])));
+ return negateResult ? -result : result;
+ }
+ else if (USE_POWTABS_FOR_ASIN && (value <= ASIN_MAX_VALUE_FOR_POWTABS))
+ {
+ int index = (int) (FastMath.powFast(value * ASIN_POWTABS_ONE_DIV_MAX_VALUE, ASIN_POWTABS_POWER)
+ * ASIN_POWTABS_SIZE_MINUS_ONE + 0.5);
+ double delta = value - asinParamPowTab[index];
+ double result = asinPowTab[index]
+ + delta
+ * (asinDer1DivF1PowTab[index] + delta
+ * (asinDer2DivF2PowTab[index] + delta
+ * (asinDer3DivF3PowTab[index] + delta * asinDer4DivF4PowTab[index])));
+ return negateResult ? -result : result;
+ }
+ else
+ { // value > ASIN_MAX_VALUE_FOR_TABS, or value is NaN
+ // This part is derived from fdlibm.
+ if (value < 1.0)
+ {
+ double t = (1.0 - value) * 0.5;
+ double p = t
+ * (ASIN_PS0 + t * (ASIN_PS1 + t * (ASIN_PS2 + t * (ASIN_PS3 + t * (ASIN_PS4 + t * ASIN_PS5)))));
+ double q = 1.0 + t * (ASIN_QS1 + t * (ASIN_QS2 + t * (ASIN_QS3 + t * ASIN_QS4)));
+ double s = FastMath.sqrt(t);
+ double z = s + s * (p / q);
+ double result = ASIN_PIO2_HI - ((z + z) - ASIN_PIO2_LO);
+ return negateResult ? -result : result;
+ }
+ else
+ { // value >= 1.0, or value is NaN
+ if (value == 1.0)
+ {
+ return negateResult ? -Math.PI / 2 : Math.PI / 2;
+ }
+ else
+ {
+ return Double.NaN;
+ }
+ }
+ }
+ }
+
+ /**
+ * If value is not NaN and is outside [-1,1] range, closest value in this
+ * range is used.
+ *
+ * @param value
+ * Value in [-1,1].
+ * @return Value arcsine, in radians, in [-PI/2,PI/2].
+ */
+ public static double asinInRange(double value)
+ {
+ if (value <= -1)
+ {
+ return -Math.PI / 2;
+ }
+ else if (value >= 1)
+ {
+ return Math.PI / 2;
+ }
+ else
+ {
+ return FastMath.asin(value);
+ }
+ }
+
+ /**
+ * @param value
+ * A double value.
+ * @return Value arctangent, in radians, in [-PI/2,PI/2].
+ */
+ public static double atan(double value)
+ {
+ if (USE_JDK_MATH)
+ {
+ return STRICT_MATH ? StrictMath.atan(value) : Math.atan(value);
+ }
+ boolean negateResult;
+ if (value < 0.0)
+ {
+ value = -value;
+ negateResult = true;
+ }
+ else
+ {
+ negateResult = false;
+ }
+ if (value == 1.0)
+ {
+ // We want "exact" result for 1.0.
+ return negateResult ? -Math.PI / 4 : Math.PI / 4;
+ }
+ else if (value <= ATAN_MAX_VALUE_FOR_TABS)
+ {
+ int index = (int) (value * ATAN_INDEXER + 0.5);
+ double delta = value - index * ATAN_DELTA;
+ double result = atanTab[index]
+ + delta
+ * (atanDer1DivF1Tab[index] + delta
+ * (atanDer2DivF2Tab[index] + delta
+ * (atanDer3DivF3Tab[index] + delta * atanDer4DivF4Tab[index])));
+ return negateResult ? -result : result;
+ }
+ else
+ { // value > ATAN_MAX_VALUE_FOR_TABS, or value is NaN
+ // This part is derived from fdlibm.
+ if (value < TWO_POW_66)
+ {
+ double x = -1 / value;
+ double x2 = x * x;
+ double x4 = x2 * x2;
+ double s1 = x2
+ * (ATAN_AT0 + x4
+ * (ATAN_AT2 + x4 * (ATAN_AT4 + x4 * (ATAN_AT6 + x4 * (ATAN_AT8 + x4 * ATAN_AT10)))));
+ double s2 = x4 * (ATAN_AT1 + x4 * (ATAN_AT3 + x4 * (ATAN_AT5 + x4 * (ATAN_AT7 + x4 * ATAN_AT9))));
+ double result = ATAN_HI3 - ((x * (s1 + s2) - ATAN_LO3) - x);
+ return negateResult ? -result : result;
+ }
+ else
+ { // value >= 2^66, or value is NaN
+ if (Double.isNaN(value))
+ {
+ return Double.NaN;
+ }
+ else
+ {
+ return negateResult ? -Math.PI / 2 : Math.PI / 2;
+ }
+ }
+ }
+ }
+
+ /**
+ * For special values for which multiple conventions could be adopted,
+ * behaves like Math.atan2(double,double).
+ *
+ * @param y
+ * Coordinate on y axis.
+ * @param x
+ * Coordinate on x axis.
+ * @return Angle from x axis positive side to (x,y) position, in radians, in
+ * [-PI,PI]. Angle measure is positive when going from x axis to y
+ * axis (positive sides).
+ */
+ public static double atan2(double y, double x)
+ {
+ if (USE_JDK_MATH)
+ {
+ return STRICT_MATH ? StrictMath.atan2(y, x) : Math.atan2(y, x);
+ }
+ if (x > 0.0)
+ {
+ if (y == 0.0)
+ {
+ return (1 / y == Double.NEGATIVE_INFINITY) ? -0.0 : 0.0;
+ }
+ if (x == Double.POSITIVE_INFINITY)
+ {
+ if (y == Double.POSITIVE_INFINITY)
+ {
+ return Math.PI / 4;
+ }
+ else if (y == Double.NEGATIVE_INFINITY)
+ {
+ return -Math.PI / 4;
+ }
+ else if (y > 0.0)
+ {
+ return 0.0;
+ }
+ else if (y < 0.0)
+ {
+ return -0.0;
+ }
+ else
+ {
+ return Double.NaN;
+ }
+ }
+ else
+ {
+ return FastMath.atan(y / x);
+ }
+ }
+ else if (x < 0.0)
+ {
+ if (y == 0.0)
+ {
+ return (1 / y == Double.NEGATIVE_INFINITY) ? -Math.PI : Math.PI;
+ }
+ if (x == Double.NEGATIVE_INFINITY)
+ {
+ if (y == Double.POSITIVE_INFINITY)
+ {
+ return 3 * Math.PI / 4;
+ }
+ else if (y == Double.NEGATIVE_INFINITY)
+ {
+ return -3 * Math.PI / 4;
+ }
+ else if (y > 0.0)
+ {
+ return Math.PI;
+ }
+ else if (y < 0.0)
+ {
+ return -Math.PI;
+ }
+ else
+ {
+ return Double.NaN;
+ }
+ }
+ else if (y > 0.0)
+ {
+ return Math.PI / 2 + FastMath.atan(-x / y);
+ }
+ else if (y < 0.0)
+ {
+ return -Math.PI / 2 - FastMath.atan(x / y);
+ }
+ else
+ {
+ return Double.NaN;
+ }
+ }
+ else if (x == 0.0)
+ {
+ if (y == 0.0)
+ {
+ if (1 / x == Double.NEGATIVE_INFINITY)
+ {
+ return (1 / y == Double.NEGATIVE_INFINITY) ? -Math.PI : Math.PI;
+ }
+ else
+ {
+ return (1 / y == Double.NEGATIVE_INFINITY) ? -0.0 : 0.0;
+ }
+ }
+ if (y > 0.0)
+ {
+ return Math.PI / 2;
+ }
+ else if (y < 0.0)
+ {
+ return -Math.PI / 2;
+ }
+ else
+ {
+ return Double.NaN;
+ }
+ }
+ else
+ {
+ return Double.NaN;
+ }
+ }
+
+ /**
+ * @param value
+ * A double value.
+ * @return Value hyperbolic cosine.
+ */
+ public static double cosh(double value)
+ {
+ if (USE_JDK_MATH)
+ {
+ return STRICT_MATH ? StrictMath.cosh(value) : Math.cosh(value);
+ }
+ // cosh(x) = (exp(x)+exp(-x))/2
+ if (value < 0.0)
+ {
+ value = -value;
+ }
+ if (value < LOG_TWO_POW_27)
+ {
+ if (value < TWO_POW_N27)
+ {
+ // cosh(x)
+ // = (exp(x)+exp(-x))/2
+ // = ((1+x+x^2/2!+...) + (1-x+x^2/2!-...))/2
+ // = 1+x^2/2!+x^4/4!+...
+ // For value of x small in magnitude, the sum of the terms does
+ // not add to 1.
+ return 1;
+ }
+ else
+ {
+ double t = FastMath.exp(value);
+ return 0.5 * (t + 1 / t);
+ }
+ }
+ else if (value < LOG_DOUBLE_MAX_VALUE)
+ {
+ return 0.5 * FastMath.exp(value);
+ }
+ else
+ {
+ double t = FastMath.exp(value * 0.5);
+ return (0.5 * t) * t;
+ }
+ }
+
+ /**
+ * @param value
+ * A double value.
+ * @return Value hyperbolic sine.
+ */
+ public static double sinh(double value)
+ {
+ if (USE_JDK_MATH)
+ {
+ return STRICT_MATH ? StrictMath.sinh(value) : Math.sinh(value);
+ }
+ // sinh(x) = (exp(x)-exp(-x))/2
+ double h;
+ if (value < 0.0)
+ {
+ value = -value;
+ h = -0.5;
+ }
+ else
+ {
+ h = 0.5;
+ }
+ if (value < 22.0)
+ {
+ if (value < TWO_POW_N28)
+ {
+ return (h < 0.0) ? -value : value;
+ }
+ else
+ {
+ double t = FastMath.expm1(value);
+ // Might be more accurate, if value < 1: return
+ // h*((t+t)-t*t/(t+1.0)).
+ return h * (t + t / (t + 1.0));
+ }
+ }
+ else if (value < LOG_DOUBLE_MAX_VALUE)
+ {
+ return h * FastMath.exp(value);
+ }
+ else
+ {
+ double t = FastMath.exp(value * 0.5);
+ return (h * t) * t;
+ }
+ }
+
+ /**
+ * @param value
+ * A double value.
+ * @return Value hyperbolic tangent.
+ */
+ public static double tanh(double value)
+ {
+ if (USE_JDK_MATH)
+ {
+ return STRICT_MATH ? StrictMath.tanh(value) : Math.tanh(value);
+ }
+ // tanh(x) = sinh(x)/cosh(x)
+ // = (exp(x)-exp(-x))/(exp(x)+exp(-x))
+ // = (exp(2*x)-1)/(exp(2*x)+1)
+ boolean negateResult;
+ if (value < 0.0)
+ {
+ value = -value;
+ negateResult = true;
+ }
+ else
+ {
+ negateResult = false;
+ }
+ double z;
+ if (value < 22.0)
+ {
+ if (value < TWO_POW_N55)
+ {
+ return negateResult ? -value * (1.0 - value) : value * (1.0 + value);
+ }
+ else if (value >= 1)
+ {
+ z = 1.0 - 2.0 / (FastMath.expm1(value + value) + 2.0);
+ }
+ else
+ {
+ double t = FastMath.expm1(-(value + value));
+ z = -t / (t + 2.0);
+ }
+ }
+ else
+ {
+ z = (value != value) ? Double.NaN : 1.0;
+ }
+ return negateResult ? -z : z;
+ }
+
+ /**
+ * @param value
+ * A double value.
+ * @return e^value.
+ */
+ public static double exp(double value)
+ {
+ if (USE_JDK_MATH)
+ {
+ return STRICT_MATH ? StrictMath.exp(value) : Math.exp(value);
+ }
+ // exp(x) = exp([x])*exp(y)
+ // with [x] the integer part of x, and y = x-[x]
+ // ===>
+ // We find an approximation of y, called z.
+ // ===>
+ // exp(x) = exp([x])*(exp(z)*exp(epsilon))
+ // ===>
+ // We have exp([x]) and exp(z) pre-computed in tables, we "just" have to
+ // compute exp(epsilon).
+ //
+ // We use the same indexing (cast to int) to compute x integer part and
+ // the
+ // table index corresponding to z, to avoid two int casts.
+ // Also, to optimize index multiplication and division, we use powers of
+ // two,
+ // so that we can do it with bits shifts.
+ if (value >= 0.0)
+ {
+ if (value > EXP_OVERFLOW_LIMIT)
+ {
+ return Double.POSITIVE_INFINITY;
+ }
+ int i = (int) (value * EXP_LO_INDEXING);
+ int valueInt = (i >> EXP_LO_INDEXING_DIV_SHIFT);
+ i -= (valueInt << EXP_LO_INDEXING_DIV_SHIFT);
+ double delta = (value - valueInt) - i * (1.0 / EXP_LO_INDEXING);
+ return expHiTab[valueInt]
+ * (expLoPosTab[i + EXP_LO_TAB_MID_INDEX] * (1 + delta
+ * (1 + delta * (1.0 / 2 + delta * (1.0 / 6 + delta * (1.0 / 24))))));
+ }
+ else
+ { // value < 0.0, or value is NaN
+ if (!(value >= EXP_UNDERFLOW_LIMIT))
+ { // value < EXP_UNDERFLOW_LIMIT, or value is NaN
+ return (value < EXP_UNDERFLOW_LIMIT) ? 0.0 : Double.NaN;
+ }
+ // TODO JVM bug with -server option: test with values of all
+ // magnitudes
+ // is very slow, if using (int)x instead of -(int)-x or (int)(long)x
+ // (which give the same result).
+ // The guessed cause is that when the same expression is used to
+ // define "i" in
+ // both sides of the above "else", some (desastrous) optimization is
+ // done which factorizes
+ // it above the first "if" statement, making it computed all the
+ // time, without the protecting "sub-ifs".
+ // Since cast from double to int with huge values is extremely slow,
+ // this makes this whole treatment extremely slow for huge values.
+ // The solution is therefore to modify a bit the expression for the
+ // "optimization" not to occur.
+ int i = -(int) -(value * EXP_LO_INDEXING);
+ int valueInt = -((-i) >> EXP_LO_INDEXING_DIV_SHIFT);
+ i -= ((valueInt) << EXP_LO_INDEXING_DIV_SHIFT);
+ double delta = (value - valueInt) - i * (1.0 / EXP_LO_INDEXING);
+ double tmp = expHiInvTab[-valueInt]
+ * (expLoPosTab[i + EXP_LO_TAB_MID_INDEX] * (1 + delta
+ * (1 + delta * (1.0 / 2 + delta * (1.0 / 6 + delta * (1.0 / 24))))));
+ // We took care not to compute with subnormal values.
+ return (valueInt >= EXP_MIN_INT_LIMIT) ? tmp : tmp * TWO_POW_N54;
+ }
+ }
+
+ /**
+ * Quick exp, with a max relative error of about 3e-2 for |value| < 700.0 or
+ * so, and no accuracy at all outside this range. Derived from a note by
+ * Nicol N. Schraudolph, IDSIA, 1998.
+ *
+ * @param value
+ * A double value.
+ * @return e^value.
+ */
+ public static double expQuick(double value)
+ {
+ if (USE_JDK_MATH)
+ {
+ return STRICT_MATH ? StrictMath.exp(value) : Math.exp(value);
+ }
+ /*
+ * Cast of double values, even in long range, into long, is slower than
+ * from double to int for values in int range, and then from int to
+ * long. For that reason, we only work with integer values in int range
+ * (corresponding to the 32 first bits of the long, containing sign,
+ * exponent, and highest significant bits of double's mantissa), and
+ * cast twice.
+ */
+ return Double.longBitsToDouble(((long) (int) (EXP_QUICK_A / (1L << 32) * value + (EXP_QUICK_B - EXP_QUICK_C)
+ / (1L << 32))) << 32);
+ }
+
+ /**
+ * Much more accurate than exp(value)-1, for values close to zero.
+ *
+ * @param value
+ * A double value.
+ * @return e^value-1.
+ */
+ public static double expm1(double value)
+ {
+ if (USE_JDK_MATH)
+ {
+ return STRICT_MATH ? StrictMath.expm1(value) : Math.expm1(value);
+ }
+ // If value is far from zero, we use exp(value)-1.
+ //
+ // If value is close to zero, we use the following formula:
+ // exp(value)-1
+ // = exp(valueApprox)*exp(epsilon)-1
+ // = exp(valueApprox)*(exp(epsilon)-exp(-valueApprox))
+ // = exp(valueApprox)*(1+epsilon+epsilon^2/2!+...-exp(-valueApprox))
+ // = exp(valueApprox)*((1-exp(-valueApprox))+epsilon+epsilon^2/2!+...)
+ // exp(valueApprox) and exp(-valueApprox) being stored in tables.
+
+ if (Math.abs(value) < EXP_LO_DISTANCE_TO_ZERO)
+ {
+ // Taking int part instead of rounding, which takes too long.
+ int i = (int) (value * EXP_LO_INDEXING);
+ double delta = value - i * (1.0 / EXP_LO_INDEXING);
+ return expLoPosTab[i + EXP_LO_TAB_MID_INDEX]
+ * (expLoNegTab[i + EXP_LO_TAB_MID_INDEX] + delta
+ * (1 + delta * (1.0 / 2 + delta * (1.0 / 6 + delta * (1.0 / 24 + delta * (1.0 / 120))))));
+ }
+ else
+ {
+ return FastMath.exp(value) - 1;
+ }
+ }
+
+ /**
+ * @param value
+ * A double value.
+ * @return Value logarithm (base e).
+ */
+ public static double log(double value)
+ {
+ if (value > 0.0)
+ {
+ if (value == Double.POSITIVE_INFINITY)
+ {
+ return Double.POSITIVE_INFINITY;
+ }
+
+ // For normal values not close to 1.0, we use the following formula:
+ // log(value)
+ // = log(2^exponent*1.mantissa)
+ // = log(2^exponent) + log(1.mantissa)
+ // = exponent * log(2) + log(1.mantissa)
+ // = exponent * log(2) + log(1.mantissaApprox) +
+ // log(1.mantissa/1.mantissaApprox)
+ // = exponent * log(2) + log(1.mantissaApprox) + log(1+epsilon)
+ // = exponent * log(2) + log(1.mantissaApprox) +
+ // epsilon-epsilon^2/2+epsilon^3/3-epsilon^4/4+...
+ // with:
+ // 1.mantissaApprox <= 1.mantissa,
+ // log(1.mantissaApprox) in table,
+ // epsilon = (1.mantissa/1.mantissaApprox)-1
+ //
+ // To avoid bad relative error for small results,
+ // values close to 1.0 are treated aside, with the formula:
+ // log(x) = z*(2+z^2*((2.0/3)+z^2*((2.0/5))+z^2*((2.0/7))+...)))
+ // with z=(x-1)/(x+1)
+
+ double h;
+ if (value > 0.95)
+ {
+ if (value < 1.14)
+ {
+ double z = (value - 1.0) / (value + 1.0);
+ double z2 = z * z;
+ return z
+ * (2 + z2
+ * ((2.0 / 3) + z2
+ * ((2.0 / 5) + z2 * ((2.0 / 7) + z2 * ((2.0 / 9) + z2 * ((2.0 / 11)))))));
+ }
+ h = 0.0;
+ }
+ else if (value < MIN_DOUBLE_NORMAL)
+ {
+ // Ensuring value is normal.
+ value *= TWO_POW_52;
+ // log(x*2^52)
+ // = log(x)-ln(2^52)
+ // = log(x)-52*ln(2)
+ h = -52 * LOG_2;
+ }
+ else
+ {
+ h = 0.0;
+ }
+
+ int valueBitsHi = (int) (Double.doubleToRawLongBits(value) >> 32);
+ int valueExp = (valueBitsHi >> 20) - MAX_DOUBLE_EXPONENT;
+ // Getting the first LOG_BITS bits of the mantissa.
+ int xIndex = ((valueBitsHi << 12) >>> (32 - LOG_BITS));
+
+ // 1.mantissa/1.mantissaApprox - 1
+ double z = (value * twoPowTab[-valueExp - MIN_DOUBLE_EXPONENT]) * logXInvTab[xIndex] - 1;
+
+ z *= (1 - z * ((1.0 / 2) - z * ((1.0 / 3))));
+
+ return h + valueExp * LOG_2 + (logXLogTab[xIndex] + z);
+
+ }
+ else if (value == 0.0)
+ {
+ return Double.NEGATIVE_INFINITY;
+ }
+ else
+ { // value < 0.0, or value is NaN
+ return Double.NaN;
+ }
+ }
+
+ /**
+ * Quick log, with a max relative error of about 2.8e-4 for values in
+ * ]0,+infinity[, and no accuracy at all outside this range.
+ */
+ public static double logQuick(double value)
+ {
+ if (USE_JDK_MATH)
+ {
+ return STRICT_MATH ? StrictMath.log(value) : Math.log(value);
+ }
+ /*
+ * Inverse of Schraudolph's method for exp, is very inaccurate near 1,
+ * and not that fast (even using floats), especially with added if's to
+ * deal with values near 1, so we don't use it, and use a simplified
+ * version of our log's redefined algorithm.
+ */
+
+ // Simplified version of log's redefined algorithm:
+ // log(value) ~= exponent * log(2) + log(1.mantissaApprox)
+
+ double h;
+ if (value > 0.87)
+ {
+ if (value < 1.16)
+ {
+ return 2.0 * (value - 1.0) / (value + 1.0);
+ }
+ h = 0.0;
+ }
+ else if (value < MIN_DOUBLE_NORMAL)
+ {
+ value *= TWO_POW_52;
+ h = -52 * LOG_2;
+ }
+ else
+ {
+ h = 0.0;
+ }
+
+ int valueBitsHi = (int) (Double.doubleToRawLongBits(value) >> 32);
+ int valueExp = (valueBitsHi >> 20) - MAX_DOUBLE_EXPONENT;
+ int xIndex = ((valueBitsHi << 12) >>> (32 - LOG_BITS));
+
+ return h + valueExp * LOG_2 + logXLogTab[xIndex];
+ }
+
+ /**
+ * @param value
+ * A double value.
+ * @return Value logarithm (base 10).
+ */
+ public static double log10(double value)
+ {
+ // INV_LOG_10 is < 1, but there is no risk of log(double)
+ // overflow (positive or negative) while the end result shouldn't,
+ // since log(Double.MIN_VALUE) and log(Double.MAX_VALUE) have
+ // magnitudes of just a few hundreds.
+ return log(value) * INV_LOG_10;
+ }
+
+ /**
+ * Much more accurate than log(1+value), for values close to zero.
+ *
+ * @param value
+ * A double value.
+ * @return Logarithm (base e) of (1+value).
+ */
+ public static double log1p(double value)
+ {
+ if (USE_JDK_MATH)
+ {
+ return STRICT_MATH ? StrictMath.log1p(value) : Math.log1p(value);
+ }
+
+ if (value > -1.0)
+ {
+ if (value == Double.POSITIVE_INFINITY)
+ {
+ return Double.POSITIVE_INFINITY;
+ }
+
+ // ln'(x) = 1/x
+ // so
+ // log(x+epsilon) ~= log(x) + epsilon/x
+ //
+ // Let u be 1+value rounded:
+ // 1+value = u+epsilon
+ //
+ // log(1+value)
+ // = log(u+epsilon)
+ // ~= log(u) + epsilon/value
+ // We compute log(u) as done in log(double), and then add the
+ // corrective term.
+
+ double valuePlusOne = 1.0 + value;
+ if (valuePlusOne == 1.0)
+ {
+ return value;
+ }
+ else if (Math.abs(value) < 0.15)
+ {
+ double z = value / (value + 2.0);
+ double z2 = z * z;
+ return z
+ * (2 + z2
+ * ((2.0 / 3) + z2
+ * ((2.0 / 5) + z2 * ((2.0 / 7) + z2 * ((2.0 / 9) + z2 * ((2.0 / 11)))))));
+ }
+
+ int valuePlusOneBitsHi = (int) (Double.doubleToRawLongBits(valuePlusOne) >> 32) & 0x7FFFFFFF;
+ int valuePlusOneExp = (valuePlusOneBitsHi >> 20) - MAX_DOUBLE_EXPONENT;
+ // Getting the first LOG_BITS bits of the mantissa.
+ int xIndex = ((valuePlusOneBitsHi << 12) >>> (32 - LOG_BITS));
+
+ // 1.mantissa/1.mantissaApprox - 1
+ double z = (valuePlusOne * twoPowTab[-valuePlusOneExp - MIN_DOUBLE_EXPONENT]) * logXInvTab[xIndex] - 1;
+
+ z *= (1 - z * ((1.0 / 2) - z * (1.0 / 3)));
+
+ // Adding epsilon/valuePlusOne to z,
+ // with
+ // epsilon = value - (valuePlusOne-1)
+ // (valuePlusOne + epsilon ~= 1+value (not rounded))
+
+ return valuePlusOneExp * LOG_2 + logXLogTab[xIndex] + (z + (value - (valuePlusOne - 1)) / valuePlusOne);
+ }
+ else if (value == -1.0)
+ {
+ return Double.NEGATIVE_INFINITY;
+ }
+ else
+ { // value < -1.0, or value is NaN
+ return Double.NaN;
+ }
+ }
+
+ /**
+ * 1e-13ish accuracy (or better) on whole double range.
+ *
+ * @param value
+ * A double value.
+ * @param power
+ * A power.
+ * @return value^power.
+ */
+ public static double pow(double value, double power)
+ {
+ if (USE_JDK_MATH)
+ {
+ return STRICT_MATH ? StrictMath.pow(value, power) : Math.pow(value, power);
+ }
+ if (power == 0.0)
+ {
+ return 1.0;
+ }
+ else if (power == 1.0)
+ {
+ return value;
+ }
+ if (value <= 0.0)
+ {
+ // powerInfo: 0 if not integer, 1 if even integer, -1 if odd integer
+ int powerInfo;
+ if (Math.abs(power) >= (TWO_POW_52 * 2))
+ {
+ // The binary digit just before comma is outside mantissa,
+ // thus it is always 0: power is an even integer.
+ powerInfo = 1;
+ }
+ else
+ {
+ // If power's magnitude permits, we cast into int instead of
+ // into long,
+ // as it is faster.
+ if (Math.abs(power) <= (double) Integer.MAX_VALUE)
+ {
+ int powerAsInt = (int) power;
+ if (power == (double) powerAsInt)
+ {
+ powerInfo = ((powerAsInt & 1) == 0) ? 1 : -1;
+ }
+ else
+ { // power is not an integer (and not NaN, due to test
+ // against Integer.MAX_VALUE)
+ powerInfo = 0;
+ }
+ }
+ else
+ {
+ long powerAsLong = (long) power;
+ if (power == (double) powerAsLong)
+ {
+ powerInfo = ((powerAsLong & 1) == 0) ? 1 : -1;
+ }
+ else
+ { // power is not an integer, or is NaN
+ if (power != power)
+ {
+ return Double.NaN;
+ }
+ powerInfo = 0;
+ }
+ }
+ }
+
+ if (value == 0.0)
+ {
+ if (power < 0.0)
+ {
+ return (powerInfo < 0) ? 1 / value : Double.POSITIVE_INFINITY;
+ }
+ else
+ { // power > 0.0 (0 and NaN cases already treated)
+ return (powerInfo < 0) ? value : 0.0;
+ }
+ }
+ else
+ { // value < 0.0
+ if (value == Double.NEGATIVE_INFINITY)
+ {
+ if (powerInfo < 0)
+ { // power odd integer
+ return (power < 0.0) ? -0.0 : Double.NEGATIVE_INFINITY;
+ }
+ else
+ { // power even integer, or not an integer
+ return (power < 0.0) ? 0.0 : Double.POSITIVE_INFINITY;
+ }
+ }
+ else
+ {
+ return (powerInfo != 0) ? powerInfo * FastMath.exp(power * FastMath.log(-value)) : Double.NaN;
+ }
+ }
+ }
+ else
+ { // value > 0.0, or value is NaN
+ return FastMath.exp(power * FastMath.log(value));
+ }
+ }
+
+ /**
+ * Quick pow, with a max relative error of about 3.5e-2 for |a^b| < 1e10, of
+ * about 0.17 for |a^b| < 1e50, and worse accuracy above.
+ *
+ * @param value
+ * A double value, in ]0,+infinity[ (strictly positive and
+ * finite).
+ * @param power
+ * A double value.
+ * @return value^power.
+ */
+ public static double powQuick(double value, double power)
+ {
+ if (USE_JDK_MATH)
+ {
+ return STRICT_MATH ? StrictMath.pow(value, power) : Math.pow(value, power);
+ }
+ return FastMath.exp(power * FastMath.logQuick(value));
+ }
+
+ /**
+ * This treatment is somehow accurate for low values of |power|, and for
+ * |power*getExponent(value)| < 1023 or so (to stay away from double extreme
+ * magnitudes (large and small)).
+ *
+ * @param value
+ * A double value.
+ * @param power
+ * A power.
+ * @return value^power.
+ */
+ public static double powFast(double value, int power)
+ {
+ if (USE_JDK_MATH)
+ {
+ return STRICT_MATH ? StrictMath.pow(value, power) : Math.pow(value, power);
+ }
+ if (power > 5)
+ { // Most common case first.
+ double oddRemains = 1.0;
+ do
+ {
+ // Test if power is odd.
+ if ((power & 1) != 0)
+ {
+ oddRemains *= value;
+ }
+ value *= value;
+ power >>= 1; // power = power / 2
+ }
+ while (power > 5);
+ // Here, power is in [3,5]: faster to finish outside the loop.
+ if (power == 3)
+ {
+ return oddRemains * value * value * value;
+ }
+ else
+ {
+ double v2 = value * value;
+ if (power == 4)
+ {
+ return oddRemains * v2 * v2;
+ }
+ else
+ { // power == 5
+ return oddRemains * v2 * v2 * value;
+ }
+ }
+ }
+ else if (power >= 0)
+ { // power in [0,5]
+ if (power < 3)
+ { // power in [0,2]
+ if (power == 2)
+ { // Most common case first.
+ return value * value;
+ }
+ else if (power != 0)
+ { // faster than == 1
+ return value;
+ }
+ else
+ { // power == 0
+ return 1.0;
+ }
+ }
+ else
+ { // power in [3,5]
+ if (power == 3)
+ {
+ return value * value * value;
+ }
+ else
+ { // power in [4,5]
+ double v2 = value * value;
+ if (power == 4)
+ {
+ return v2 * v2;
+ }
+ else
+ { // power == 5
+ return v2 * v2 * value;
+ }
+ }
+ }
+ }
+ else
+ { // power < 0
+ // Opposite of Integer.MIN_VALUE does not exist as int.
+ if (power == Integer.MIN_VALUE)
+ {
+ // Integer.MAX_VALUE = -(power+1)
+ return 1.0 / (FastMath.powFast(value, Integer.MAX_VALUE) * value);
+ }
+ else
+ {
+ return 1.0 / FastMath.powFast(value, -power);
+ }
+ }
+ }
+
+ /**
+ * Returns the exact result, provided it's in double range.
+ *
+ * @param power
+ * A power.
+ * @return 2^power.
+ */
+ public static double twoPow(int power)
+ {
+ /*
+ * Using table, to go faster than NumbersUtils.twoPow(int).
+ */
+ if (power >= 0)
+ {
+ if (power <= MAX_DOUBLE_EXPONENT)
+ {
+ return twoPowTab[power - MIN_DOUBLE_EXPONENT];
+ }
+ else
+ {
+ // Overflow.
+ return Double.POSITIVE_INFINITY;
+ }
+ }
+ else
+ {
+ if (power >= MIN_DOUBLE_EXPONENT)
+ {
+ return twoPowTab[power - MIN_DOUBLE_EXPONENT];
+ }
+ else
+ {
+ // Underflow.
+ return 0.0;
+ }
+ }
+ }
+
+ /**
+ * @param value
+ * A double value.
+ * @return Value square root.
+ */
+ @SuppressWarnings("unused")
+ public static double sqrt(double value)
+ {
+ if (USE_JDK_MATH || (!USE_REDEFINED_SQRT))
+ {
+ return STRICT_MATH ? StrictMath.sqrt(value) : Math.sqrt(value);
+ }
+ // See cbrt for comments, sqrt uses the same ideas.
+
+ if (!(value > 0.0))
+ { // value <= 0.0, or value is NaN
+ return (value == 0.0) ? value : Double.NaN;
+ }
+ else if (value == Double.POSITIVE_INFINITY)
+ {
+ return Double.POSITIVE_INFINITY;
+ }
+
+ double h;
+ if (value < MIN_DOUBLE_NORMAL)
+ {
+ value *= TWO_POW_52;
+ h = 2 * TWO_POW_N26;
+ }
+ else
+ {
+ h = 2.0;
+ }
+
+ int valueBitsHi = (int) (Double.doubleToRawLongBits(value) >> 32);
+ int valueExponentIndex = (valueBitsHi >> 20) + (-MAX_DOUBLE_EXPONENT - MIN_DOUBLE_EXPONENT);
+ int xIndex = ((valueBitsHi << 12) >>> (32 - SQRT_LO_BITS));
+
+ double result = sqrtXSqrtHiTab[valueExponentIndex] * sqrtXSqrtLoTab[xIndex];
+ double slope = sqrtSlopeHiTab[valueExponentIndex] * sqrtSlopeLoTab[xIndex];
+ value *= 0.25;
+
+ result += (value - result * result) * slope;
+ result += (value - result * result) * slope;
+ return h * (result + (value - result * result) * slope);
+ }
+
+ /**
+ * @param value
+ * A double value.
+ * @return Value cubic root.
+ */
+ public static double cbrt(double value)
+ {
+ if (USE_JDK_MATH)
+ {
+ return STRICT_MATH ? StrictMath.cbrt(value) : Math.cbrt(value);
+ }
+ double h;
+ if (value < 0.0)
+ {
+ if (value == Double.NEGATIVE_INFINITY)
+ {
+ return Double.NEGATIVE_INFINITY;
+ }
+ value = -value;
+ // Making sure value is normal.
+ if (value < MIN_DOUBLE_NORMAL)
+ {
+ value *= (TWO_POW_52 * TWO_POW_26);
+ // h = <result_sign> * <result_multiplicator_to_avoid_overflow>
+ // / <cbrt(value_multiplicator_to_avoid_subnormal)>
+ h = -2 * TWO_POW_N26;
+ }
+ else
+ {
+ h = -2.0;
+ }
+ }
+ else
+ {
+ if (!(value < Double.POSITIVE_INFINITY))
+ { // value is +infinity, or value is NaN
+ return value;
+ }
+ // Making sure value is normal.
+ if (value < MIN_DOUBLE_NORMAL)
+ {
+ if (value == 0.0)
+ {
+ // cbrt(0.0) = 0.0, cbrt(-0.0) = -0.0
+ return value;
+ }
+ value *= (TWO_POW_52 * TWO_POW_26);
+ h = 2 * TWO_POW_N26;
+ }
+ else
+ {
+ h = 2.0;
+ }
+ }
+
+ // Normal value is (2^<value exponent> * <a value in [1,2[>).
+ // First member cubic root is computed, and multiplied with an
+ // approximation
+ // of the cubic root of the second member, to end up with a good guess
+ // of
+ // the result before using Newton's (or Archimedes's) method.
+ // To compute the cubic root approximation, we use the formula
+ // "cbrt(value) = cbrt(x) * cbrt(value/x)",
+ // choosing x as close to value as possible but inferior to it, so that
+ // cbrt(value/x) is close to 1
+ // (we could iterate on this method, using value/x as new value for each
+ // iteration,
+ // but finishing with Newton's method is faster).
+
+ // Shift and cast into an int, which overall is faster than working with
+ // a long.
+ int valueBitsHi = (int) (Double.doubleToRawLongBits(value) >> 32);
+ int valueExponentIndex = (valueBitsHi >> 20) + (-MAX_DOUBLE_EXPONENT - MIN_DOUBLE_EXPONENT);
+ // Getting the first CBRT_LO_BITS bits of the mantissa.
+ int xIndex = ((valueBitsHi << 12) >>> (32 - CBRT_LO_BITS));
+ double result = cbrtXCbrtHiTab[valueExponentIndex] * cbrtXCbrtLoTab[xIndex];
+ double slope = cbrtSlopeHiTab[valueExponentIndex] * cbrtSlopeLoTab[xIndex];
+
+ // Lowering values to avoid overflows when using Newton's method
+ // (we will then just have to return twice the result).
+ // result^3 = value
+ // (result/2)^3 = value/8
+ value *= 0.125;
+ // No need to divide result here, as division is factorized in result
+ // computation tables.
+ // result *= 0.5;
+
+ // Newton's method, looking for y = x^(1/p):
+ // y(n) = y(n-1) + (x-y(n-1)^p) * slope(y(n-1))
+ // y(n) = y(n-1) + (x-y(n-1)^p) * (1/p)*(x(n-1)^(1/p-1))
+ // y(n) = y(n-1) + (x-y(n-1)^p) * (1/p)*(x(n-1)^((1-p)/p))
+ // with x(n-1)=y(n-1)^p, i.e.:
+ // y(n) = y(n-1) + (x-y(n-1)^p) * (1/p)*(y(n-1)^(1-p))
+ //
+ // For p=3:
+ // y(n) = y(n-1) + (x-y(n-1)^3) * (1/(3*y(n-1)^2))
+
+ // To save time, we don't recompute the slope between Newton's method
+ // steps,
+ // as initial slope is good enough for a few iterations.
+ //
+ // NB: slope = 1/(3*trueResult*trueResult)
+ // As we have result = trueResult/2 (to avoid overflows), we have:
+ // slope = 4/(3*result*result)
+ // = (4/3)*resultInv*resultInv
+ // with newResultInv = 1/newResult
+ // = 1/(oldResult+resultDelta)
+ // = (oldResultInv)*1/(1+resultDelta/oldResult)
+ // = (oldResultInv)*1/(1+resultDelta*oldResultInv)
+ // ~= (oldResultInv)*(1-resultDelta*oldResultInv)
+ // ===> Successive slopes could be computed without division, if needed,
+ // by computing resultInv (instead of slope right away) and retrieving
+ // slopes from it.
+
+ result += (value - result * result * result) * slope;
+ result += (value - result * result * result) * slope;
+ return h * (result + (value - result * result * result) * slope);
+ }
+
+ /**
+ * Returns dividend - divisor * n, where n is the mathematical integer
+ * closest to dividend/divisor. If dividend/divisor is equally close to
+ * surrounding integers, we choose n to be the integer of smallest
+ * magnitude, which makes this treatment differ from
+ * Math.IEEEremainder(double,double), where n is chosen to be the even
+ * integer. Note that the choice of n is not done considering the double
+ * approximation of dividend/divisor, because it could cause result to be
+ * outside [-|divisor|/2,|divisor|/2] range. The practical effect is that if
+ * multiple results would be possible, we always choose the result that is
+ * the closest to (and has the same sign as) the dividend. Ex. : - for
+ * (-3.0,2.0), this method returns -1.0, whereas Math.IEEEremainder returns
+ * 1.0. - for (-5.0,2.0), both this method and Math.IEEEremainder return
+ * -1.0.
+ *
+ * If the remainder is zero, its sign is the same as the sign of the first
+ * argument. If either argument is NaN, or the first argument is infinite,
+ * or the second argument is positive zero or negative zero, then the result
+ * is NaN. If the first argument is finite and the second argument is
+ * infinite, then the result is the same as the first argument.
+ *
+ * NB: - Modulo operator (%) returns a value in ]-|divisor|,|divisor|[,
+ * which sign is the same as dividend. - As for modulo operator, the sign of
+ * the divisor has no effect on the result.
+ *
+ * @param dividend
+ * Dividend.
+ * @param divisor
+ * Divisor.
+ * @return Remainder of dividend/divisor, i.e. a value in
+ * [-|divisor|/2,|divisor|/2].
+ */
+ public static double remainder(double dividend, double divisor)
+ {
+ if (USE_JDK_MATH)
+ {
+ // no Math equivalent (differs from IEEEremainder(double,double))
+ }
+ if (Double.isInfinite(divisor))
+ {
+ if (Double.isInfinite(dividend))
+ {
+ return Double.NaN;
+ }
+ else
+ {
+ return dividend;
+ }
+ }
+ double value = dividend % divisor;
+ if (Math.abs(value + value) > Math.abs(divisor))
+ {
+ return value + ((value > 0.0) ? -Math.abs(divisor) : Math.abs(divisor));
+ }
+ else
+ {
+ return value;
+ }
+ }
+
+ /**
+ * @param angle
+ * Angle in radians.
+ * @return The same angle, in radians, but in [-Math.PI,Math.PI].
+ */
+ public static double normalizeMinusPiPi(double angle)
+ {
+ // Not modifying values in output range.
+ if ((angle >= -Math.PI) && (angle <= Math.PI))
+ {
+ return angle;
+ }
+ double angleMinusPiPiOrSo = remainderTwoPi(angle);
+ if (angleMinusPiPiOrSo < -Math.PI)
+ {
+ return -Math.PI;
+ }
+ else if (angleMinusPiPiOrSo > Math.PI)
+ {
+ return Math.PI;
+ }
+ else
+ {
+ return angleMinusPiPiOrSo;
+ }
+ }
+
+ /**
+ * Not accurate for large values.
+ *
+ * @param angle
+ * Angle in radians.
+ * @return The same angle, in radians, but in [-Math.PI,Math.PI].
+ */
+ public static double normalizeMinusPiPiFast(double angle)
+ {
+ // Not modifying values in output range.
+ if ((angle >= -Math.PI) && (angle <= Math.PI))
+ {
+ return angle;
+ }
+ double angleMinusPiPiOrSo = remainderTwoPiFast(angle);
+ if (angleMinusPiPiOrSo < -Math.PI)
+ {
+ return -Math.PI;
+ }
+ else if (angleMinusPiPiOrSo > Math.PI)
+ {
+ return Math.PI;
+ }
+ else
+ {
+ return angleMinusPiPiOrSo;
+ }
+ }
+
+ /**
+ * @param angle
+ * Angle in radians.
+ * @return The same angle, in radians, but in [0,2*Math.PI].
+ */
+ public static double normalizeZeroTwoPi(double angle)
+ {
+ // Not modifying values in output range.
+ if ((angle >= 0.0) && (angle <= 2 * Math.PI))
+ {
+ return angle;
+ }
+ double angleMinusPiPiOrSo = remainderTwoPi(angle);
+ if (angleMinusPiPiOrSo < 0.0)
+ {
+ // Not a problem if angle is slightly < -Math.PI,
+ // since result ends up around PI, which is not near output range
+ // borders.
+ return angleMinusPiPiOrSo + 2 * Math.PI;
+ }
+ else
+ {
+ // Not a problem if angle is slightly > Math.PI,
+ // since result ends up around PI, which is not near output range
+ // borders.
+ return angleMinusPiPiOrSo;
+ }
+ }
+
+ /**
+ * Not accurate for large values.
+ *
+ * @param angle
+ * Angle in radians.
+ * @return The same angle, in radians, but in [0,2*Math.PI].
+ */
+ public static double normalizeZeroTwoPiFast(double angle)
+ {
+ // Not modifying values in output range.
+ if ((angle >= 0.0) && (angle <= 2 * Math.PI))
+ {
+ return angle;
+ }
+ double angleMinusPiPiOrSo = remainderTwoPiFast(angle);
+ if (angleMinusPiPiOrSo < 0.0)
+ {
+ // Not a problem if angle is slightly < -Math.PI,
+ // since result ends up around PI, which is not near output range
+ // borders.
+ return angleMinusPiPiOrSo + 2 * Math.PI;
+ }
+ else
+ {
+ // Not a problem if angle is slightly > Math.PI,
+ // since result ends up around PI, which is not near output range
+ // borders.
+ return angleMinusPiPiOrSo;
+ }
+ }
+
+ /**
+ * @param angle
+ * Angle in radians.
+ * @return Angle value modulo PI, in radians, in [-Math.PI/2,Math.PI/2].
+ */
+ public static double normalizeMinusHalfPiHalfPi(double angle)
+ {
+ // Not modifying values in output range.
+ if ((angle >= -Math.PI / 2) && (angle <= Math.PI / 2))
+ {
+ return angle;
+ }
+ double angleMinusPiPiOrSo = remainderTwoPi(angle);
+ if (angleMinusPiPiOrSo < -Math.PI / 2)
+ {
+ // Not a problem if angle is slightly < -Math.PI,
+ // since result ends up around zero, which is not near output range
+ // borders.
+ return angleMinusPiPiOrSo + Math.PI;
+ }
+ else if (angleMinusPiPiOrSo > Math.PI / 2)
+ {
+ // Not a problem if angle is slightly > Math.PI,
+ // since result ends up around zero, which is not near output range
+ // borders.
+ return angleMinusPiPiOrSo - Math.PI;
+ }
+ else
+ {
+ return angleMinusPiPiOrSo;
+ }
+ }
+
+ /**
+ * Not accurate for large values.
+ *
+ * @param angle
+ * Angle in radians.
+ * @return Angle value modulo PI, in radians, in [-Math.PI/2,Math.PI/2].
+ */
+ public static double normalizeMinusHalfPiHalfPiFast(double angle)
+ {
+ // Not modifying values in output range.
+ if ((angle >= -Math.PI / 2) && (angle <= Math.PI / 2))
+ {
+ return angle;
+ }
+ double angleMinusPiPiOrSo = remainderTwoPiFast(angle);
+ if (angleMinusPiPiOrSo < -Math.PI / 2)
+ {
+ // Not a problem if angle is slightly < -Math.PI,
+ // since result ends up around zero, which is not near output range
+ // borders.
+ return angleMinusPiPiOrSo + Math.PI;
+ }
+ else if (angleMinusPiPiOrSo > Math.PI / 2)
+ {
+ // Not a problem if angle is slightly > Math.PI,
+ // since result ends up around zero, which is not near output range
+ // borders.
+ return angleMinusPiPiOrSo - Math.PI;
+ }
+ else
+ {
+ return angleMinusPiPiOrSo;
+ }
+ }
+
+ /**
+ * Returns sqrt(x^2+y^2) without intermediate overflow or underflow.
+ */
+ public static double hypot(double x, double y)
+ {
+ if (USE_JDK_MATH)
+ {
+ return STRICT_MATH ? StrictMath.hypot(x, y) : Math.hypot(x, y);
+ }
+ x = Math.abs(x);
+ y = Math.abs(y);
+ if (y < x)
+ {
+ double a = x;
+ x = y;
+ y = a;
+ }
+ else if (!(y >= x))
+ { // Testing if we have some NaN.
+ if ((x == Double.POSITIVE_INFINITY) || (y == Double.POSITIVE_INFINITY))
+ {
+ return Double.POSITIVE_INFINITY;
+ }
+ else
+ {
+ return Double.NaN;
+ }
+ }
+ if (y - x == y)
+ { // x too small to substract from y
+ return y;
+ }
+ else
+ {
+ double factor;
+ if (x > TWO_POW_450)
+ { // 2^450 < x < y
+ x *= TWO_POW_N750;
+ y *= TWO_POW_N750;
+ factor = TWO_POW_750;
+ }
+ else if (y < TWO_POW_N450)
+ { // x < y < 2^-450
+ x *= TWO_POW_750;
+ y *= TWO_POW_750;
+ factor = TWO_POW_N750;
+ }
+ else
+ {
+ factor = 1.0;
+ }
+ return factor * FastMath.sqrt(x * x + y * y);
+ }
+ }
+
+ /**
+ * @param value
+ * A float value.
+ * @return Ceiling of value.
+ */
+ public static float ceil(float value)
+ {
+ if (USE_JDK_MATH)
+ {
+ // TODO use Math.ceil(float) if exists
+ return (float) Math.ceil((double) value);
+ }
+ return -FastMath.floor(-value);
+ }
+
+ /**
+ * Supposed to behave like Math.ceil(double), for safe interchangeability.
+ *
+ * @param value
+ * A double value.
+ * @return Ceiling of value.
+ */
+ public static double ceil(double value)
+ {
+ if (USE_JDK_MATH)
+ {
+ return Math.ceil(value);
+ }
+ return -FastMath.floor(-value);
+ }
+
+ /**
+ * @param value
+ * A float value.
+ * @return Floor of value.
+ */
+ public static float floor(float value)
+ {
+ if (USE_JDK_MATH)
+ {
+ // TODO use Math.floor(float) if exists
+ return (float) Math.floor((double) value);
+ }
+ int exp = FastMath.getExponent(value);
+ if (exp < 0)
+ {
+ if (value < 0.0f)
+ {
+ return -1.0f;
+ }
+ else
+ { // value in [0.0f,1.0f[
+ return 0.0f * value; // 0.0f, or -0.0f if value is -0.0f
+ }
+ }
+ else
+ {
+ if (exp < 24)
+ {
+ int valueBits = Float.floatToRawIntBits(value);
+ int anteCommaDigits = valueBits & (0xFF800000 >> exp);
+ if ((value < 0.0f) && (anteCommaDigits != valueBits))
+ {
+ return Float.intBitsToFloat(anteCommaDigits) - 1.0f;
+ }
+ else
+ {
+ return Float.intBitsToFloat(anteCommaDigits);
+ }
+ }
+ else
+ {
+ return value;
+ }
+ }
+ }
+
+ /**
+ * Supposed to behave like Math.floor(double), for safe interchangeability.
+ *
+ * @param value
+ * A double value.
+ * @return Floor of value.
+ */
+ public static double floor(double value)
+ {
+ if (USE_JDK_MATH)
+ {
+ return Math.floor(value);
+ }
+ // Faster than to work directly on bits.
+ if (Math.abs(value) <= (double) Integer.MAX_VALUE)
+ {
+ if (value > 0.0)
+ {
+ return (double) (int) value;
+ }
+ else if (value < 0.0)
+ {
+ double anteCommaDigits = (double) (int) value;
+ if (value != anteCommaDigits)
+ {
+ return anteCommaDigits - 1.0;
+ }
+ else
+ {
+ return anteCommaDigits;
+ }
+ }
+ else
+ { // value is +-0.0 (not NaN due to test against Integer.MAX_VALUE)
+ return value;
+ }
+ }
+ else if (Math.abs(value) < TWO_POW_52)
+ {
+ // We split the value in two:
+ // high part, which is a mathematical integer,
+ // and the rest, for which we can get rid of the
+ // post comma digits by casting into an int.
+ double highPart = ((int) (value * TWO_POW_N26)) * TWO_POW_26;
+ if (value > 0.0)
+ {
+ return highPart + (double) ((int) (value - highPart));
+ }
+ else
+ {
+ double anteCommaDigits = highPart + (double) ((int) (value - highPart));
+ if (value != anteCommaDigits)
+ {
+ return anteCommaDigits - 1.0;
+ }
+ else
+ {
+ return anteCommaDigits;
+ }
+ }
+ }
+ else
+ { // abs(value) >= 2^52, or value is NaN
+ return value;
+ }
+ }
+
+ /**
+ * Supposed to behave like Math.round(float), for safe interchangeability.
+ *
+ * @param value
+ * A double value.
+ * @return Value rounded to nearest int.
+ */
+ public static int round(float value)
+ {
+ if (USE_JDK_MATH)
+ {
+ return Math.round(value);
+ }
+ // "return (int)FastMath.floor((float)(value+0.5));" would be more
+ // accurate for values in [8388609.0f,16777216.0f]
+ // (i.e. [0x800001,0x1000000]), but would not give same results than
+ // Math.round(float).
+ return (int) FastMath.floor(value + 0.5f);
+ }
+
+ /**
+ * Supposed to behave like Math.round(double), for safe interchangeability.
+ *
+ * @param value
+ * A double value.
+ * @return Value rounded to nearest long.
+ */
+ public static long round(double value)
+ {
+ if (USE_JDK_MATH)
+ {
+ return Math.round(value);
+ }
+ // Would be more coherent with rint, to call rint(double) instead of
+ // floor(double), but that would not give same results than
+ // Math.round(double).
+ double roundedValue = FastMath.floor(value + 0.5);
+ if (Math.abs(roundedValue) <= (double) Integer.MAX_VALUE)
+ {
+ // Faster with intermediary cast in int.
+ return (long) (int) roundedValue;
+ }
+ else
+ {
+ return (long) roundedValue;
+ }
+ }
+
+ /**
+ * @param value
+ * A float value.
+ * @return Value unbiased exponent.
+ */
+ public static int getExponent(float value)
+ {
+ if (USE_JDK_MATH)
+ {
+ return Math.getExponent(value);
+ }
+ return ((Float.floatToRawIntBits(value) >> 23) & 0xFF) - MAX_FLOAT_EXPONENT;
+ }
+
+ /**
+ * @param value
+ * A double value.
+ * @return Value unbiased exponent.
+ */
+ public static int getExponent(double value)
+ {
+ if (USE_JDK_MATH)
+ {
+ return Math.getExponent(value);
+ }
+ return (((int) (Double.doubleToRawLongBits(value) >> 52)) & 0x7FF) - MAX_DOUBLE_EXPONENT;
+ }
+
+ /**
+ * Gives same result as Math.toDegrees for some particular values like
+ * Math.PI/2, Math.PI or 2*Math.PI, but is faster (no division).
+ *
+ * @param angrad
+ * Angle value in radians.
+ * @return Angle value in degrees.
+ */
+ public static double toDegrees(double angrad)
+ {
+ if (USE_JDK_MATH)
+ {
+ return Math.toDegrees(angrad);
+ }
+ return angrad * (180 / Math.PI);
+ }
+
+ /**
+ * Gives same result as Math.toRadians for some particular values like 90.0,
+ * 180.0 or 360.0, but is faster (no division).
+ *
+ * @param angdeg
+ * Angle value in degrees.
+ * @return Angle value in radians.
+ */
+ public static double toRadians(double angdeg)
+ {
+ if (USE_JDK_MATH)
+ {
+ return Math.toRadians(angdeg);
+ }
+ return angdeg * (Math.PI / 180);
+ }
+
+ /**
+ * @param sign
+ * Sign of the angle: true for positive, false for negative.
+ * @param degrees
+ * Degrees, in [0,180].
+ * @param minutes
+ * Minutes, in [0,59].
+ * @param seconds
+ * Seconds, in [0.0,60.0[.
+ * @return Angle in radians.
+ */
+ public static double toRadians(boolean sign, int degrees, int minutes, double seconds)
+ {
+ return FastMath.toRadians(FastMath.toDegrees(sign, degrees, minutes, seconds));
+ }
+
+ /**
+ * @param sign
+ * Sign of the angle: true for positive, false for negative.
+ * @param degrees
+ * Degrees, in [0,180].
+ * @param minutes
+ * Minutes, in [0,59].
+ * @param seconds
+ * Seconds, in [0.0,60.0[.
+ * @return Angle in degrees.
+ */
+ public static double toDegrees(boolean sign, int degrees, int minutes, double seconds)
+ {
+ double signFactor = sign ? 1.0 : -1.0;
+ return signFactor * (degrees + (1.0 / 60) * (minutes + (1.0 / 60) * seconds));
+ }
+
+ /**
+ * NB: Since 2*Math.PI < 2*PI, a span of 2*Math.PI does not mean full
+ * angular range. ex.: isInClockwiseDomain(0.0, 2*Math.PI, -1e-20) returns
+ * false. ---> For full angular range, use a span > 2*Math.PI, like 2*PI_SUP
+ * constant of this class.
+ *
+ * @param startAngRad
+ * An angle, in radians.
+ * @param angSpanRad
+ * An angular span, >= 0.0, in radians.
+ * @param angRad
+ * An angle, in radians.
+ * @return True if angRad is in the clockwise angular domain going from
+ * startAngRad, over angSpanRad, extremities included, false
+ * otherwise.
+ */
+ public static boolean isInClockwiseDomain(double startAngRad, double angSpanRad, double angRad)
+ {
+ if (Math.abs(angRad) < -TWO_MATH_PI_IN_MINUS_PI_PI)
+ {
+ // special case for angular values of small magnitude
+ if (angSpanRad < 0.0)
+ {
+ // empty domain
+ return false;
+ }
+ else if (angSpanRad <= 2 * Math.PI)
+ { // angSpanRad is in [0.0,2*Math.PI]
+ startAngRad = FastMath.normalizeMinusPiPi(startAngRad);
+ double endAngRad = FastMath.normalizeMinusPiPi(startAngRad + angSpanRad);
+ //
+ if (startAngRad <= endAngRad)
+ {
+ return (angRad >= startAngRad) && (angRad <= endAngRad);
+ }
+ else
+ {
+ return (angRad >= startAngRad) || (angRad <= endAngRad);
+ }
+ }
+ else if (angSpanRad != angSpanRad)
+ { // angSpanRad is NaN
+ return false;
+ }
+ else
+ { // angSpanRad > 2*Math.PI
+ // we know angRad is not NaN, due to a previous test
+ return true;
+ }
+ }
+ else
+ {
+ // general case
+ return (FastMath.normalizeZeroTwoPi(angRad - startAngRad) <= angSpanRad);
+ }
+ }
+
+ public static final double E = Math.E;
+ public static final double PI = Math.PI;
+
+ public static double abs(double a)
+ {
+ return Math.abs(a);
+ }
+
+ public static float abs(float a)
+ {
+ return Math.abs(a);
+ }
+
+ public static long abs(long a)
+ {
+ return Math.abs(a);
+ }
+
+ public static double copySign(double magnitude, double sign)
+ {
+ return Math.copySign(magnitude, sign);
+ }
+
+ public static float copySign(float magnitude, float sign)
+ {
+ return Math.copySign(magnitude, sign);
+ }
+
+ public static double IEEEremainder(double f1, double f2)
+ {
+ return Math.IEEEremainder(f1, f2);
+ }
+
+ public static double max(double a, double b)
+ {
+ return Math.max(a, b);
+ }
+
+ public static float max(float a, float b)
+ {
+ return Math.max(a, b);
+ }
+
+ public static int max(int a, int b)
+ {
+ return Math.max(a, b);
+ }
+
+ public static long max(long a, long b)
+ {
+ return Math.max(a, b);
+ }
+
+ public static double min(double a, double b)
+ {
+ return Math.min(a, b);
+ }
+
+ public static float min(float a, float b)
+ {
+ return Math.min(a, b);
+ }
+
+ public static int min(int a, int b)
+ {
+ return Math.min(a, b);
+ }
+
+ public static long min(long a, long b)
+ {
+ return Math.min(a, b);
+ }
+
+ public static double nextAfter(double start, double direction)
+ {
+ return Math.nextAfter(start, direction);
+ }
+
+ public static float nextAfter(float start, float direction)
+ {
+ return Math.nextAfter(start, direction);
+ }
+
+ public static double nextUp(double d)
+ {
+ return Math.nextUp(d);
+ }
+
+ public static float nextUp(float f)
+ {
+ return Math.nextUp(f);
+ }
+
+ public static double random()
+ {
+ // StrictMath and Math use different RNG instances,
+ // so their random() methods are not equivalent.
+ return STRICT_MATH ? StrictMath.random() : Math.random();
+ }
+
+ public static double rint(double a)
+ {
+ return Math.rint(a);
+ }
+
+ public static double scalb(double d, int scaleFactor)
+ {
+ return Math.scalb(d, scaleFactor);
+ }
+
+ public static float scalb(float f, int scaleFactor)
+ {
+ return Math.scalb(f, scaleFactor);
+ }
+
+ public static double signum(double d)
+ {
+ return Math.signum(d);
+ }
+
+ public static float signum(float f)
+ {
+ return Math.signum(f);
+ }
+
+ public static double ulp(double d)
+ {
+ return Math.ulp(d);
+ }
+
+ public static float ulp(float f)
+ {
+ return Math.ulp(f);
+ }
+
+ // --------------------------------------------------------------------------
+ // PRIVATE TREATMENTS
+ // --------------------------------------------------------------------------
+
+ /**
+ * FastMath is non-instantiable.
+ */
+ private FastMath()
+ {
+ }
+
+ /**
+ * Use look-up tables size power through this method, to make sure is it
+ * small in case java.lang.Math is directly used.
+ */
+ private static int getTabSizePower(int tabSizePower)
+ {
+ return USE_JDK_MATH ? Math.min(2, tabSizePower) : tabSizePower;
+ }
+
+ /**
+ * Remainder using an accurate definition of PI. Derived from a fdlibm
+ * treatment called __ieee754_rem_pio2.
+ *
+ * This method can return values slightly (like one ULP or so) outside
+ * [-Math.PI,Math.PI] range.
+ *
+ * @param angle
+ * Angle in radians.
+ * @return Remainder of (angle % (2*PI)), which is in [-PI,PI] range.
+ */
+ private static double remainderTwoPi(double angle)
+ {
+ if (USE_JDK_MATH)
+ {
+ double y = STRICT_MATH ? StrictMath.sin(angle) : Math.sin(angle);
+ double x = STRICT_MATH ? StrictMath.cos(angle) : Math.cos(angle);
+ return STRICT_MATH ? StrictMath.atan2(y, x) : Math.atan2(y, x);
+ }
+ boolean negateResult;
+ if (angle < 0.0)
+ {
+ negateResult = true;
+ angle = -angle;
+ }
+ else
+ {
+ negateResult = false;
+ }
+ if (angle <= NORMALIZE_ANGLE_MAX_MEDIUM_DOUBLE)
+ {
+ double fn = (double) (int) (angle * INVTWOPI + 0.5);
+ double result = (angle - fn * TWOPI_HI) - fn * TWOPI_LO;
+ return negateResult ? -result : result;
+ }
+ else if (angle < Double.POSITIVE_INFINITY)
+ {
+ // Reworking exponent to have a value < 2^24.
+ long lx = Double.doubleToRawLongBits(angle);
+ long exp = ((lx >> 52) & 0x7FF) - 1046;
+ double z = Double.longBitsToDouble(lx - (exp << 52));
+
+ double x0 = (double) ((int) z);
+ z = (z - x0) * TWO_POW_24;
+ double x1 = (double) ((int) z);
+ double x2 = (z - x1) * TWO_POW_24;
+
+ double result = subRemainderTwoPi(x0, x1, x2, (int) exp, (x2 == 0) ? 2 : 3);
+ return negateResult ? -result : result;
+ }
+ else
+ { // angle is +infinity or NaN
+ return Double.NaN;
+ }
+ }
+
+ /**
+ * Not accurate for large values.
+ *
+ * This method can return values slightly (like one ULP or so) outside
+ * [-Math.PI,Math.PI] range.
+ *
+ * @param angle
+ * Angle in radians.
+ * @return Remainder of (angle % (2*PI)), which is in [-PI,PI] range.
+ */
+ private static double remainderTwoPiFast(double angle)
+ {
+ if (USE_JDK_MATH)
+ {
+ return remainderTwoPi(angle);
+ }
+ boolean negateResult;
+ if (angle < 0.0)
+ {
+ negateResult = true;
+ angle = -angle;
+ }
+ else
+ {
+ negateResult = false;
+ }
+ // - We don't bother with values higher than (2*PI*(2^52)),
+ // since they are spaced by 2*PI or more from each other.
+ // - For large values, we don't use % because it might be very slow,
+ // and we split computation in two, because cast from double to int
+ // with large numbers might be very slow also.
+ if (angle <= TWO_POW_26 * (2 * Math.PI))
+ {
+ double fn = (double) (int) (angle * INVTWOPI + 0.5);
+ double result = (angle - fn * TWOPI_HI) - fn * TWOPI_LO;
+ return negateResult ? -result : result;
+ }
+ else if (angle <= TWO_POW_52 * (2 * Math.PI))
+ {
+ // 1) Computing remainder of angle modulo TWO_POW_26*(2*PI).
+ double fn = (double) (int) (angle * (INVTWOPI / TWO_POW_26) + 0.5);
+ double result = (angle - fn * (TWOPI_HI * TWO_POW_26)) - fn * (TWOPI_LO * TWO_POW_26);
+ // Here, result is in [-TWO_POW_26*Math.PI,TWO_POW_26*Math.PI].
+ if (result < 0.0)
+ {
+ result = -result;
+ negateResult = !negateResult;
+ }
+ // 2) Computing remainder of angle modulo 2*PI.
+ fn = (double) (int) (result * INVTWOPI + 0.5);
+ result = (result - fn * TWOPI_HI) - fn * TWOPI_LO;
+ return negateResult ? -result : result;
+ }
+ else if (angle < Double.POSITIVE_INFINITY)
+ {
+ return 0.0;
+ }
+ else
+ { // angle is +infinity or NaN
+ return Double.NaN;
+ }
+ }
+
+ /**
+ * Remainder using an accurate definition of PI. Derived from a fdlibm
+ * treatment called __kernel_rem_pio2.
+ *
+ * @param x0
+ * Most significant part of the value, as an integer < 2^24, in
+ * double precision format. Must be >= 0.
+ * @param x1
+ * Following significant part of the value, as an integer < 2^24,
+ * in double precision format.
+ * @param x2
+ * Least significant part of the value, as an integer < 2^24, in
+ * double precision format.
+ * @param e0
+ * Exponent of x0 (value is (2^e0)*(x0+(2^-24)*(x1+(2^-24)*x2))).
+ * Must be >= -20.
+ * @param nx
+ * Number of significant parts to take into account. Must be 2 or
+ * 3.
+ * @return Remainder of (value % (2*PI)), which is in [-PI,PI] range.
+ */
+ private static double subRemainderTwoPi(double x0, double x1, double x2, int e0, int nx)
+ {
+ int ih;
+ double z, fw;
+ double f0, f1, f2, f3, f4, f5, f6 = 0.0, f7;
+ double q0, q1, q2, q3, q4, q5;
+ int iq0, iq1, iq2, iq3, iq4;
+
+ final int jx = nx - 1; // jx in [1,2] (nx in [2,3])
+ // Could use a table to avoid division, but the gain isn't worth it most
+ // likely...
+ final int jv = (e0 - 3) / 24; // We do not handle the case (e0-3 < -23).
+ int q = e0 - ((jv << 4) + (jv << 3)) - 24; // e0-24*(jv+1)
+
+ final int j = jv + 4;
+ if (jx == 1)
+ {
+ f5 = (j >= 0) ? ONE_OVER_TWOPI_TAB[j] : 0.0;
+ f4 = (j >= 1) ? ONE_OVER_TWOPI_TAB[j - 1] : 0.0;
+ f3 = (j >= 2) ? ONE_OVER_TWOPI_TAB[j - 2] : 0.0;
+ f2 = (j >= 3) ? ONE_OVER_TWOPI_TAB[j - 3] : 0.0;
+ f1 = (j >= 4) ? ONE_OVER_TWOPI_TAB[j - 4] : 0.0;
+ f0 = (j >= 5) ? ONE_OVER_TWOPI_TAB[j - 5] : 0.0;
+
+ q0 = x0 * f1 + x1 * f0;
+ q1 = x0 * f2 + x1 * f1;
+ q2 = x0 * f3 + x1 * f2;
+ q3 = x0 * f4 + x1 * f3;
+ q4 = x0 * f5 + x1 * f4;
+ }
+ else
+ { // jx == 2
+ f6 = (j >= 0) ? ONE_OVER_TWOPI_TAB[j] : 0.0;
+ f5 = (j >= 1) ? ONE_OVER_TWOPI_TAB[j - 1] : 0.0;
+ f4 = (j >= 2) ? ONE_OVER_TWOPI_TAB[j - 2] : 0.0;
+ f3 = (j >= 3) ? ONE_OVER_TWOPI_TAB[j - 3] : 0.0;
+ f2 = (j >= 4) ? ONE_OVER_TWOPI_TAB[j - 4] : 0.0;
+ f1 = (j >= 5) ? ONE_OVER_TWOPI_TAB[j - 5] : 0.0;
+ f0 = (j >= 6) ? ONE_OVER_TWOPI_TAB[j - 6] : 0.0;
+
+ q0 = x0 * f2 + x1 * f1 + x2 * f0;
+ q1 = x0 * f3 + x1 * f2 + x2 * f1;
+ q2 = x0 * f4 + x1 * f3 + x2 * f2;
+ q3 = x0 * f5 + x1 * f4 + x2 * f3;
+ q4 = x0 * f6 + x1 * f5 + x2 * f4;
+ }
+
+ z = q4;
+ fw = (double) ((int) (TWO_POW_N24 * z));
+ iq0 = (int) (z - TWO_POW_24 * fw);
+ z = q3 + fw;
+ fw = (double) ((int) (TWO_POW_N24 * z));
+ iq1 = (int) (z - TWO_POW_24 * fw);
+ z = q2 + fw;
+ fw = (double) ((int) (TWO_POW_N24 * z));
+ iq2 = (int) (z - TWO_POW_24 * fw);
+ z = q1 + fw;
+ fw = (double) ((int) (TWO_POW_N24 * z));
+ iq3 = (int) (z - TWO_POW_24 * fw);
+ z = q0 + fw;
+
+ // Here, q is in [-25,2] range or so, so we can use the table right
+ // away.
+ double twoPowQ = twoPowTab[q - MIN_DOUBLE_EXPONENT];
+
+ z = (z * twoPowQ) % 8.0;
+ z -= (double) ((int) z);
+ if (q > 0)
+ {
+ iq3 &= 0xFFFFFF >> q;
+ ih = iq3 >> (23 - q);
+ }
+ else if (q == 0)
+ {
+ ih = iq3 >> 23;
+ }
+ else if (z >= 0.5)
+ {
+ ih = 2;
+ }
+ else
+ {
+ ih = 0;
+ }
+ if (ih > 0)
+ {
+ int carry;
+ if (iq0 != 0)
+ {
+ carry = 1;
+ iq0 = 0x1000000 - iq0;
+ iq1 = 0x0FFFFFF - iq1;
+ iq2 = 0x0FFFFFF - iq2;
+ iq3 = 0x0FFFFFF - iq3;
+ }
+ else
+ {
+ if (iq1 != 0)
+ {
+ carry = 1;
+ iq1 = 0x1000000 - iq1;
+ iq2 = 0x0FFFFFF - iq2;
+ iq3 = 0x0FFFFFF - iq3;
+ }
+ else
+ {
+ if (iq2 != 0)
+ {
+ carry = 1;
+ iq2 = 0x1000000 - iq2;
+ iq3 = 0x0FFFFFF - iq3;
+ }
+ else
+ {
+ if (iq3 != 0)
+ {
+ carry = 1;
+ iq3 = 0x1000000 - iq3;
+ }
+ else
+ {
+ carry = 0;
+ }
+ }
+ }
+ }
+ if (q > 0)
+ {
+ switch (q)
+ {
+ case 1:
+ iq3 &= 0x7FFFFF;
+ break;
+ case 2:
+ iq3 &= 0x3FFFFF;
+ break;
+ }
+ }
+ if (ih == 2)
+ {
+ z = 1.0 - z;
+ if (carry != 0)
+ {
+ z -= twoPowQ;
+ }
+ }
+ }
+
+ if (z == 0.0)
+ {
+ if (jx == 1)
+ {
+ f6 = ONE_OVER_TWOPI_TAB[jv + 5];
+ q5 = x0 * f6 + x1 * f5;
+ }
+ else
+ { // jx == 2
+ f7 = ONE_OVER_TWOPI_TAB[jv + 5];
+ q5 = x0 * f7 + x1 * f6 + x2 * f5;
+ }
+
+ z = q5;
+ fw = (double) ((int) (TWO_POW_N24 * z));
+ iq0 = (int) (z - TWO_POW_24 * fw);
+ z = q4 + fw;
+ fw = (double) ((int) (TWO_POW_N24 * z));
+ iq1 = (int) (z - TWO_POW_24 * fw);
+ z = q3 + fw;
+ fw = (double) ((int) (TWO_POW_N24 * z));
+ iq2 = (int) (z - TWO_POW_24 * fw);
+ z = q2 + fw;
+ fw = (double) ((int) (TWO_POW_N24 * z));
+ iq3 = (int) (z - TWO_POW_24 * fw);
+ z = q1 + fw;
+ fw = (double) ((int) (TWO_POW_N24 * z));
+ iq4 = (int) (z - TWO_POW_24 * fw);
+ z = q0 + fw;
+
+ z = (z * twoPowQ) % 8.0;
+ z -= (double) ((int) z);
+ if (q > 0)
+ {
+ // some parentheses for Eclipse formatter's weaknesses with bits
+ // shifts
+ iq4 &= (0xFFFFFF >> q);
+ ih = (iq4 >> (23 - q));
+ }
+ else if (q == 0)
+ {
+ ih = iq4 >> 23;
+ }
+ else if (z >= 0.5)
+ {
+ ih = 2;
+ }
+ else
+ {
+ ih = 0;
+ }
+ if (ih > 0)
+ {
+ if (iq0 != 0)
+ {
+ iq0 = 0x1000000 - iq0;
+ iq1 = 0x0FFFFFF - iq1;
+ iq2 = 0x0FFFFFF - iq2;
+ iq3 = 0x0FFFFFF - iq3;
+ iq4 = 0x0FFFFFF - iq4;
+ }
+ else
+ {
+ if (iq1 != 0)
+ {
+ iq1 = 0x1000000 - iq1;
+ iq2 = 0x0FFFFFF - iq2;
+ iq3 = 0x0FFFFFF - iq3;
+ iq4 = 0x0FFFFFF - iq4;
+ }
+ else
+ {
+ if (iq2 != 0)
+ {
+ iq2 = 0x1000000 - iq2;
+ iq3 = 0x0FFFFFF - iq3;
+ iq4 = 0x0FFFFFF - iq4;
+ }
+ else
+ {
+ if (iq3 != 0)
+ {
+ iq3 = 0x1000000 - iq3;
+ iq4 = 0x0FFFFFF - iq4;
+ }
+ else
+ {
+ if (iq4 != 0)
+ {
+ iq4 = 0x1000000 - iq4;
+ }
+ }
+ }
+ }
+ }
+ if (q > 0)
+ {
+ switch (q)
+ {
+ case 1:
+ iq4 &= 0x7FFFFF;
+ break;
+ case 2:
+ iq4 &= 0x3FFFFF;
+ break;
+ }
+ }
+ }
+ fw = twoPowQ * TWO_POW_N24; // q -= 24, so initializing fw with
+ // ((2^q)*(2^-24)=2^(q-24))
+ }
+ else
+ {
+ // Here, q is in [-25,-2] range or so, so we could use twoPow's
+ // table right away with
+ // iq4 = (int)(z*twoPowTab[-q-TWO_POW_TAB_MIN_POW]);
+ // but tests show using division is faster...
+ iq4 = (int) (z / twoPowQ);
+ fw = twoPowQ;
+ }
+
+ q4 = fw * (double) iq4;
+ fw *= TWO_POW_N24;
+ q3 = fw * (double) iq3;
+ fw *= TWO_POW_N24;
+ q2 = fw * (double) iq2;
+ fw *= TWO_POW_N24;
+ q1 = fw * (double) iq1;
+ fw *= TWO_POW_N24;
+ q0 = fw * (double) iq0;
+ fw *= TWO_POW_N24;
+
+ fw = TWOPI_TAB0 * q4;
+ fw += TWOPI_TAB0 * q3 + TWOPI_TAB1 * q4;
+ fw += TWOPI_TAB0 * q2 + TWOPI_TAB1 * q3 + TWOPI_TAB2 * q4;
+ fw += TWOPI_TAB0 * q1 + TWOPI_TAB1 * q2 + TWOPI_TAB2 * q3 + TWOPI_TAB3 * q4;
+ fw += TWOPI_TAB0 * q0 + TWOPI_TAB1 * q1 + TWOPI_TAB2 * q2 + TWOPI_TAB3 * q3 + TWOPI_TAB4 * q4;
+
+ return (ih == 0) ? fw : -fw;
+ }
+
+ // --------------------------------------------------------------------------
+ // STATIC INITIALIZATIONS
+ // --------------------------------------------------------------------------
+
+ /**
+ * Initializes look-up tables.
+ *
+ * Might use some FastMath methods in there, not to spend an hour in it, but
+ * must take care not to use methods which look-up tables have not yet been
+ * initialized, or that are not accurate enough.
+ */
+ static
+ {
+
+ // sin and cos
+
+ final int SIN_COS_PI_INDEX = (SIN_COS_TABS_SIZE - 1) / 2;
+ final int SIN_COS_PI_MUL_2_INDEX = 2 * SIN_COS_PI_INDEX;
+ final int SIN_COS_PI_MUL_0_5_INDEX = SIN_COS_PI_INDEX / 2;
+ final int SIN_COS_PI_MUL_1_5_INDEX = 3 * SIN_COS_PI_INDEX / 2;
+ for (int i = 0; i < SIN_COS_TABS_SIZE; i++)
+ {
+ // angle: in [0,2*PI].
+ double angle = i * SIN_COS_DELTA_HI + i * SIN_COS_DELTA_LO;
+ double sinAngle = StrictMath.sin(angle);
+ double cosAngle = StrictMath.cos(angle);
+ // For indexes corresponding to null cosine or sine, we make sure
+ // the value is zero
+ // and not an epsilon. This allows for a much better accuracy for
+ // results close to zero.
+ if (i == SIN_COS_PI_INDEX)
+ {
+ sinAngle = 0.0;
+ }
+ else if (i == SIN_COS_PI_MUL_2_INDEX)
+ {
+ sinAngle = 0.0;
+ }
+ else if (i == SIN_COS_PI_MUL_0_5_INDEX)
+ {
+ cosAngle = 0.0;
+ }
+ else if (i == SIN_COS_PI_MUL_1_5_INDEX)
+ {
+ cosAngle = 0.0;
+ }
+ sinTab[i] = sinAngle;
+ cosTab[i] = cosAngle;
+ }
+
+ // tan
+
+ for (int i = 0; i < TAN_TABS_SIZE; i++)
+ {
+ // angle: in [0,TAN_MAX_VALUE_FOR_TABS].
+ double angle = i * TAN_DELTA_HI + i * TAN_DELTA_LO;
+ tanTab[i] = StrictMath.tan(angle);
+ double cosAngle = StrictMath.cos(angle);
+ double sinAngle = StrictMath.sin(angle);
+ double cosAngleInv = 1 / cosAngle;
+ double cosAngleInv2 = cosAngleInv * cosAngleInv;
+ double cosAngleInv3 = cosAngleInv2 * cosAngleInv;
+ double cosAngleInv4 = cosAngleInv2 * cosAngleInv2;
+ double cosAngleInv5 = cosAngleInv3 * cosAngleInv2;
+ tanDer1DivF1Tab[i] = cosAngleInv2;
+ tanDer2DivF2Tab[i] = ((2 * sinAngle) * cosAngleInv3) * ONE_DIV_F2;
+ tanDer3DivF3Tab[i] = ((2 * (1 + 2 * sinAngle * sinAngle)) * cosAngleInv4) * ONE_DIV_F3;
+ tanDer4DivF4Tab[i] = ((8 * sinAngle * (2 + sinAngle * sinAngle)) * cosAngleInv5) * ONE_DIV_F4;
+ }
+
+ // asin
+
+ for (int i = 0; i < ASIN_TABS_SIZE; i++)
+ {
+ // x: in [0,ASIN_MAX_VALUE_FOR_TABS].
+ double x = i * ASIN_DELTA;
+ asinTab[i] = StrictMath.asin(x);
+ double oneMinusXSqInv = 1.0 / (1 - x * x);
+ double oneMinusXSqInv0_5 = StrictMath.sqrt(oneMinusXSqInv);
+ double oneMinusXSqInv1_5 = oneMinusXSqInv0_5 * oneMinusXSqInv;
+ double oneMinusXSqInv2_5 = oneMinusXSqInv1_5 * oneMinusXSqInv;
+ double oneMinusXSqInv3_5 = oneMinusXSqInv2_5 * oneMinusXSqInv;
+ asinDer1DivF1Tab[i] = oneMinusXSqInv0_5;
+ asinDer2DivF2Tab[i] = (x * oneMinusXSqInv1_5) * ONE_DIV_F2;
+ asinDer3DivF3Tab[i] = ((1 + 2 * x * x) * oneMinusXSqInv2_5) * ONE_DIV_F3;
+ asinDer4DivF4Tab[i] = ((5 + 2 * x * (2 + x * (5 - 2 * x))) * oneMinusXSqInv3_5) * ONE_DIV_F4;
+ }
+
+ if (USE_POWTABS_FOR_ASIN)
+ {
+ for (int i = 0; i < ASIN_POWTABS_SIZE; i++)
+ {
+ // x: in [0,ASIN_MAX_VALUE_FOR_POWTABS].
+ double x = StrictMath.pow(i * (1.0 / ASIN_POWTABS_SIZE_MINUS_ONE), 1.0 / ASIN_POWTABS_POWER)
+ * ASIN_MAX_VALUE_FOR_POWTABS;
+ asinParamPowTab[i] = x;
+ asinPowTab[i] = StrictMath.asin(x);
+ double oneMinusXSqInv = 1.0 / (1 - x * x);
+ double oneMinusXSqInv0_5 = StrictMath.sqrt(oneMinusXSqInv);
+ double oneMinusXSqInv1_5 = oneMinusXSqInv0_5 * oneMinusXSqInv;
+ double oneMinusXSqInv2_5 = oneMinusXSqInv1_5 * oneMinusXSqInv;
+ double oneMinusXSqInv3_5 = oneMinusXSqInv2_5 * oneMinusXSqInv;
+ asinDer1DivF1PowTab[i] = oneMinusXSqInv0_5;
+ asinDer2DivF2PowTab[i] = (x * oneMinusXSqInv1_5) * ONE_DIV_F2;
+ asinDer3DivF3PowTab[i] = ((1 + 2 * x * x) * oneMinusXSqInv2_5) * ONE_DIV_F3;
+ asinDer4DivF4PowTab[i] = ((5 + 2 * x * (2 + x * (5 - 2 * x))) * oneMinusXSqInv3_5) * ONE_DIV_F4;
+ }
+ }
+
+ // atan
+
+ for (int i = 0; i < ATAN_TABS_SIZE; i++)
+ {
+ // x: in [0,ATAN_MAX_VALUE_FOR_TABS].
+ double x = i * ATAN_DELTA;
+ double onePlusXSqInv = 1.0 / (1 + x * x);
+ double onePlusXSqInv2 = onePlusXSqInv * onePlusXSqInv;
+ double onePlusXSqInv3 = onePlusXSqInv2 * onePlusXSqInv;
+ double onePlusXSqInv4 = onePlusXSqInv2 * onePlusXSqInv2;
+ atanTab[i] = StrictMath.atan(x);
+ atanDer1DivF1Tab[i] = onePlusXSqInv;
+ atanDer2DivF2Tab[i] = (-2 * x * onePlusXSqInv2) * ONE_DIV_F2;
+ atanDer3DivF3Tab[i] = ((-2 + 6 * x * x) * onePlusXSqInv3) * ONE_DIV_F3;
+ atanDer4DivF4Tab[i] = ((24 * x * (1 - x * x)) * onePlusXSqInv4) * ONE_DIV_F4;
+ }
+
+ // exp
+
+ for (int i = 0; i < EXP_LO_TAB_SIZE; i++)
+ {
+ // x: in [-EXPM1_DISTANCE_TO_ZERO,EXPM1_DISTANCE_TO_ZERO].
+ double x = -EXP_LO_DISTANCE_TO_ZERO + i / (double) EXP_LO_INDEXING;
+ // exp(x)
+ expLoPosTab[i] = StrictMath.exp(x);
+ // 1-exp(-x), accurately computed
+ expLoNegTab[i] = -StrictMath.expm1(-x);
+ }
+ for (int i = 0; i <= (int) EXP_OVERFLOW_LIMIT; i++)
+ {
+ expHiTab[i] = StrictMath.exp(i);
+ }
+ for (int i = 0; i <= -(int) EXP_UNDERFLOW_LIMIT; i++)
+ {
+ // We take care not to compute with subnormal values.
+ if ((double) -i >= EXP_MIN_INT_LIMIT)
+ {
+ expHiInvTab[i] = StrictMath.exp(-i);
+ }
+ else
+ {
+ expHiInvTab[i] = StrictMath.exp(54 * LOG_2 - i);
+ }
+ }
+
+ // log
+
+ for (int i = 0; i < LOG_TAB_SIZE; i++)
+ {
+ // Exact to use inverse of tab size, since it is a power of two.
+ double x = 1 + i * (1.0 / LOG_TAB_SIZE);
+ logXLogTab[i] = StrictMath.log(x);
+ logXTab[i] = x;
+ logXInvTab[i] = 1 / x;
+ }
+
+ // twoPow
+
+ for (int i = MIN_DOUBLE_EXPONENT; i <= MAX_DOUBLE_EXPONENT; i++)
+ {
+ twoPowTab[i - MIN_DOUBLE_EXPONENT] = StrictMath.pow(2.0, i);
+ }
+
+ // sqrt
+
+ for (int i = MIN_DOUBLE_EXPONENT; i <= MAX_DOUBLE_EXPONENT; i++)
+ {
+ double twoPowExpDiv2 = StrictMath.pow(2.0, i * 0.5);
+ sqrtXSqrtHiTab[i - MIN_DOUBLE_EXPONENT] = twoPowExpDiv2 * 0.5; // Half
+ // sqrt,
+ // to
+ // avoid
+ // overflows.
+ sqrtSlopeHiTab[i - MIN_DOUBLE_EXPONENT] = 1 / twoPowExpDiv2;
+ }
+ sqrtXSqrtLoTab[0] = 1.0;
+ sqrtSlopeLoTab[0] = 1.0;
+ final long SQRT_LO_MASK = (0x3FF0000000000000L | (0x000FFFFFFFFFFFFFL >> SQRT_LO_BITS));
+ for (int i = 1; i < SQRT_LO_TAB_SIZE; i++)
+ {
+ long xBits = SQRT_LO_MASK | (((long) (i - 1)) << (52 - SQRT_LO_BITS));
+ double sqrtX = StrictMath.sqrt(Double.longBitsToDouble(xBits));
+ sqrtXSqrtLoTab[i] = sqrtX;
+ sqrtSlopeLoTab[i] = 1 / sqrtX;
+ }
+
+ // cbrt
+
+ for (int i = MIN_DOUBLE_EXPONENT; i <= MAX_DOUBLE_EXPONENT; i++)
+ {
+ double twoPowExpDiv3 = StrictMath.pow(2.0, i / 3.0);
+ cbrtXCbrtHiTab[i - MIN_DOUBLE_EXPONENT] = twoPowExpDiv3 * 0.5; // Half
+ // cbrt,
+ // to
+ // avoid
+ // overflows.
+ double tmp = 1 / twoPowExpDiv3;
+ cbrtSlopeHiTab[i - MIN_DOUBLE_EXPONENT] = (4.0 / 3) * tmp * tmp;
+ }
+ cbrtXCbrtLoTab[0] = 1.0;
+ cbrtSlopeLoTab[0] = 1.0;
+ final long CBRT_LO_MASK = (0x3FF0000000000000L | (0x000FFFFFFFFFFFFFL >> CBRT_LO_BITS));
+ for (int i = 1; i < CBRT_LO_TAB_SIZE; i++)
+ {
+ long xBits = CBRT_LO_MASK | (((long) (i - 1)) << (52 - CBRT_LO_BITS));
+ double cbrtX = StrictMath.cbrt(Double.longBitsToDouble(xBits));
+ cbrtXCbrtLoTab[i] = cbrtX;
+ cbrtSlopeLoTab[i] = 1 / (cbrtX * cbrtX);
+ }
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/math/MathRuntimeException.java b/src/main/java/edu/umd/marbl/mhap/math/MathRuntimeException.java
new file mode 100644
index 0000000..6506ba4
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/math/MathRuntimeException.java
@@ -0,0 +1,85 @@
+/*
+ * ARMOR package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2012 by Konstantin Berlin
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.math;
+
+/**
+ * The Class MathException.
+ */
+public class MathRuntimeException extends RuntimeException
+{
+ /**
+ *
+ */
+ private static final long serialVersionUID = 6939427297903213601L;
+
+ /**
+ * Instantiates a new math exception.
+ */
+ public MathRuntimeException()
+ {
+ super();
+ }
+
+ /**
+ * Instantiates a new math exception.
+ *
+ * @param arg0
+ * the arg0
+ */
+ public MathRuntimeException(String arg0)
+ {
+ super(arg0);
+ }
+
+ /**
+ * Instantiates a new math exception.
+ *
+ * @param arg0
+ * the arg0
+ * @param arg1
+ * the arg1
+ */
+ public MathRuntimeException(String arg0, Throwable arg1)
+ {
+ super(arg0, arg1);
+ }
+
+ /**
+ * Instantiates a new math exception.
+ *
+ * @param arg0
+ * the arg0
+ */
+ public MathRuntimeException(Throwable arg0)
+ {
+ super(arg0);
+ }
+
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/sketch/AbstractBitSketch.java b/src/main/java/edu/umd/marbl/mhap/sketch/AbstractBitSketch.java
new file mode 100644
index 0000000..32bc20f
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/sketch/AbstractBitSketch.java
@@ -0,0 +1,135 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.sketch;
+
+public abstract class AbstractBitSketch<T extends AbstractBitSketch<T>> implements Sketch<T>,
+ Comparable<T>
+{
+ protected final long[] bits;
+ /**
+ *
+ */
+ private static final long serialVersionUID = -3392030412388403092L;
+
+ protected AbstractBitSketch(long[] bits)
+ {
+ this.bits = bits;
+ }
+
+ @Override
+ public int compareTo(final T sim)
+ {
+ for (int bitIndex = 0; bitIndex < this.bits.length; bitIndex++)
+ {
+ if (this.bits[bitIndex] < sim.bits[bitIndex])
+ return -1;
+ if (this.bits[bitIndex] > sim.bits[bitIndex])
+ return 1;
+ }
+
+ return 0;
+ }
+
+ public final boolean getBit(long index)
+ {
+ int arrayIndex = (int)(index/64L);
+ int bitPos = (int)(index%64L);
+
+ long mask = 0b1<<bitPos;
+
+ return (bits[arrayIndex] & mask) != 0L;
+ }
+
+ public final long[] getBits()
+ {
+ return this.bits;
+ }
+
+ public final int getIntersectionCount(final T sh)
+ {
+ if (this.bits.length != sh.bits.length)
+ throw new SketchRuntimeException("Size of bits in tables must match.");
+
+ int count = 0;
+ for (int longIndex = 0; longIndex < this.bits.length; longIndex++)
+ {
+ final long xor = this.bits[longIndex] ^ sh.bits[longIndex];
+
+ count += Long.bitCount(xor);
+ }
+
+ return this.bits.length * 64 - count;
+ }
+
+ public long getWord(int index)
+ {
+ return this.bits[index];
+ }
+
+ public long numberOfBits()
+ {
+ return this.bits.length*64;
+ }
+
+ public int numberOfWords()
+ {
+ return this.bits.length;
+ }
+
+ @Override
+ public final double similarity(T v)
+ {
+ int count = getIntersectionCount(v);
+
+ return (double)count/(double) this.numberOfBits();
+ }
+
+ @Override
+ public String toString()
+ {
+ StringBuilder s = new StringBuilder();
+ for (int longIndex = 0; longIndex < this.bits.length; longIndex++)
+ {
+ long mask = 1L << 63;
+
+ for (int bit = 63; bit >= 0; bit--)
+ {
+ if ((this.bits[longIndex] & mask) == 0)
+ s.append("0");
+ else
+ s.append("1");
+
+ mask = mask >>> 1;
+ }
+ }
+
+ return s.toString();
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/edu/umd/marbl/mhap/sketch/BitVectorIndex.java b/src/main/java/edu/umd/marbl/mhap/sketch/BitVectorIndex.java
new file mode 100644
index 0000000..27c82b6
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/sketch/BitVectorIndex.java
@@ -0,0 +1,206 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.sketch;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+
+import edu.umd.marbl.mhap.utils.MersenneTwisterFast;
+import edu.umd.marbl.mhap.utils.Pair;
+import edu.umd.marbl.mhap.utils.SortablePair;
+
+public final class BitVectorIndex<T,B extends AbstractBitSketch<B>>
+{
+ private final long bitsUsed[][];
+ private final ArrayList<HashMap<Integer,ArrayList<Pair<T,B>>>> hashList;
+ private final HashMap<T,B> indexedWords;
+ private final double minSimilarity;
+
+ public BitVectorIndex(List<Pair<T,B>> valuePairs, double minSimilarity, double confidence)
+ {
+ this.minSimilarity = minSimilarity;
+
+ //should go off the valuePairs list
+ int b = 10;
+
+ //probability of a hit in numIndexes when using b: confidence = 1-(1-minSimilarity^b)^(numIndexes)
+ //solve for b, Step 1: root_numIndexes (1-confidence) = (1-minSimilarity^b)
+ // Step 2: b = log(1-root_numIndexes (1-confidence))/log(minSimilarity)
+
+ //figure out b
+ int numIndexes = (int)Math.ceil(Math.log(1.0-confidence)/Math.log(1.0-Math.pow(this.minSimilarity, (double)b)));
+
+ //allocate the memory
+ this.bitsUsed = new long[numIndexes][b];
+
+ //now generate random permuations
+ MersenneTwisterFast rand = new MersenneTwisterFast();
+
+ //get number of bits
+ long numBits = 1;
+ if (!valuePairs.isEmpty())
+ numBits = valuePairs.get(0).y.numberOfBits();
+
+ //generate the bits
+ for (int index=0; index<numIndexes; index++)
+ for (int bit=0; bit<b; bit++)
+ this.bitsUsed[index][bit] = rand.nextLong(numBits);
+
+ //allocate the memory
+ this.hashList = new ArrayList<>(numIndexes);
+ for (int iter=0; iter<numIndexes; iter++)
+ this.hashList.add(new HashMap<>(valuePairs.size()));
+
+ this.indexedWords = new HashMap<>(valuePairs.size());
+
+ //encode all data in parallel
+ valuePairs.parallelStream().forEach(pair-> {
+
+ //get the lookup positions
+ int[] lookupPositions = lookupPositions(pair.y);
+
+ int count = 0;
+ for(HashMap<Integer,ArrayList<Pair<T,B>>> map : this.hashList)
+ {
+ //get the array list
+ ArrayList<Pair<T,B>> list;
+ synchronized (map)
+ {
+ list = map.computeIfAbsent(lookupPositions[count], key-> new ArrayList<>(1));
+ }
+
+ //add the pair to the index
+ synchronized(list)
+ {
+ list.add(pair);
+ }
+
+ count++;
+ }
+
+ //add the word
+ synchronized (this.indexedWords)
+ {
+ this.indexedWords.put(pair.x, pair.y);
+ }
+ });
+ }
+
+ public int getBitsPerHash()
+ {
+ return bitsUsed[0].length;
+ }
+
+ public Map<T,B> getIndexedItems()
+ {
+ return Collections.unmodifiableMap(this.indexedWords);
+ }
+
+ public List<SortablePair<Double,T>> getNeighbors(B sketch, double minSimilarity)
+ {
+ if (minSimilarity<this.minSimilarity)
+ throw new SketchRuntimeException("Similarity request threshold below the ability of the indexer to compute.");
+
+ int[] lookupPositions = lookupPositions(sketch);
+
+ //now get a large hashset of items
+ HashSet<Pair<T,B>> set = new HashSet<>();
+
+ int count = 0;
+ for(HashMap<Integer,ArrayList<Pair<T,B>>> map : this.hashList)
+ {
+ ArrayList<Pair<T,B>> list = map.get(lookupPositions[count]);
+
+ if (list==null)
+ continue;
+
+ //add all the elements
+ set.addAll(list);
+
+ count++;
+ }
+
+ ArrayList<SortablePair<Double,T>> returnList = new ArrayList<SortablePair<Double,T>>();
+
+ //now do direct compare
+ for (Pair<T,B> pair : set)
+ {
+ double score = pair.y.similarity(sketch);
+
+ if (score>=minSimilarity)
+ returnList.add(new SortablePair<>(score, pair.x));
+ }
+
+ return returnList;
+ }
+
+ public int getNumberOfIndexes()
+ {
+ return this.hashList.size();
+ }
+
+ public B getSketch(T word)
+ {
+ return this.indexedWords.get(word);
+ }
+
+ public boolean isEmpty()
+ {
+ return indexedWords.isEmpty();
+ }
+
+ private int[] lookupPositions(B bits)
+ {
+ int numIndexes = hashList.size();
+
+ int[] returnValues = new int[numIndexes];
+ for (int index=0; index<numIndexes; index++)
+ {
+ long[] usedBits = bitsUsed[index];
+
+ int val = 0b0;
+ int mask = 0b1;
+ for (int bit=0; bit<usedBits.length; bit++)
+ {
+ if (bits.getBit(usedBits[bit]))
+ val = val | mask;
+
+ mask = mask<<1;
+ }
+
+ returnValues[index] = val;
+ }
+
+ return returnValues;
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/sketch/ClassicCounter.java b/src/main/java/edu/umd/marbl/mhap/sketch/ClassicCounter.java
new file mode 100644
index 0000000..5251fe8
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/sketch/ClassicCounter.java
@@ -0,0 +1,100 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.sketch;
+
+import java.util.HashMap;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.LongAdder;
+
+public final class ClassicCounter<T extends Object> implements Counter<T>
+{
+ private final HashMap<Object, LongAdder> map;
+ private final LongAdder numAdditions;
+ private final AtomicLong maxCount;
+
+ public ClassicCounter(int size)
+ {
+ this.map = new HashMap<>(size);
+ this.maxCount = new AtomicLong();
+ this.numAdditions = new LongAdder();
+ }
+
+ @Override
+ public long getCount(Object obj)
+ {
+ LongAdder adder = map.get(obj);
+ if (adder==null)
+ return 0;
+
+ return map.get(obj).longValue();
+ }
+
+ @Override
+ public void add(Object obj)
+ {
+ add(obj, 1);
+ }
+
+ @Override
+ public long maxCount()
+ {
+ return this.maxCount.longValue();
+ }
+
+ @Override
+ public void add(Object obj, long count)
+ {
+ LongAdder adder = null;
+ synchronized (this.map)
+ {
+ adder = this.map.get(obj);
+ if (adder==null)
+ {
+ adder = new LongAdder();
+ this.map.put(obj, adder);
+ }
+ }
+
+ adder.add(count);
+
+ // assumes value always increasing
+ if (maxCount.longValue() < count)
+ {
+ synchronized (maxCount)
+ {
+ //TODO fix
+ if (maxCount.longValue() < count)
+ maxCount.set(count);
+ }
+ }
+
+ this.numAdditions.add(count);
+ }
+
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/sketch/CosineDistanceSketch.java b/src/main/java/edu/umd/marbl/mhap/sketch/CosineDistanceSketch.java
new file mode 100644
index 0000000..52a494c
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/sketch/CosineDistanceSketch.java
@@ -0,0 +1,71 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.sketch;
+
+import edu.umd.marbl.mhap.math.BasicMath;
+
+public final class CosineDistanceSketch extends AbstractBitSketch<CosineDistanceSketch>
+{
+ /**
+ *
+ */
+ private static final long serialVersionUID = -6501138603779963996L;
+
+ private static long[] getCuts(double[] vector, int numWords, int seed)
+ {
+ long[] bitVector = new long[numWords];
+
+ for (int word=0; word<numWords; word++)
+ {
+ long currBitLong = 0b0;
+
+ long mask = 0b1;
+ for (int bit=0; bit<64; bit++)
+ {
+ double[] rVec = HashUtils.randomGuassianVector(vector.length, seed+(word+1)*bit);
+ double proj = BasicMath.dotProduct(vector, rVec);
+
+ if (proj>0.0)
+ currBitLong = currBitLong | mask;
+
+ mask = mask<<1;
+ }
+
+ bitVector[word] = currBitLong;
+ }
+
+ return bitVector;
+ }
+
+ public CosineDistanceSketch(double[] vector, int numWords, int seed)
+ {
+ super(getCuts(vector,numWords,seed));
+ }
+
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/sketch/CountMin.java b/src/main/java/edu/umd/marbl/mhap/sketch/CountMin.java
new file mode 100644
index 0000000..efd49fe
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/sketch/CountMin.java
@@ -0,0 +1,154 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.sketch;
+
+import java.util.concurrent.atomic.LongAdder;
+
+public final class CountMin<T extends Object> implements Counter<T>
+{
+ private final LongAdder[][] countTable;
+ private final int depth;
+ private final int seed;
+
+ private final LongAdder totalAdded;
+ private final int width;
+
+ public CountMin(double eps, double confidence, int seed)
+ {
+ // 2/w = eps ; w = 2/eps
+ // 1/2^depth <= 1-confidence ; depth >= -log2 (1-confidence)
+
+ // estimate the table size
+ // this.width = (int) Math.ceil((double)2 / eps);
+ // this.depth = (int) Math.ceil(-Math.log(1.0 - confidence) /
+ // Math.log(2));
+ // this.seed = seed;
+
+ this((int) Math.ceil(-Math.log(1.0 - confidence) / Math.log(2)), (int) Math.ceil((double) 2 / eps), seed);
+ }
+
+ public CountMin(int depth, int width, int seed)
+ {
+ this.depth = depth;
+ this.width = width;
+ this.seed = seed;
+
+ this.countTable = new LongAdder[depth][width];
+ this.totalAdded = new LongAdder();
+
+ // zero all the elements
+ for (int iter1 = 0; iter1 < depth; iter1++)
+ for (int iter2 = 0; iter2 < width; iter2++)
+ this.countTable[iter1][iter2] = new LongAdder();
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see com.invincea.labs.pace.hash.Counter#add(java.lang.Object)
+ */
+ @Override
+ public void add(T obj)
+ {
+ add(obj, 1);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see com.invincea.labs.pace.hash.Counter#add(java.lang.Object, int)
+ */
+ @Override
+ public void add(T obj, long increment)
+ {
+ if (increment <= 0)
+ throw new SketchRuntimeException("Positive value expected for increment.");
+
+ // compute the hash
+ int[] hashes = HashUtils.computeHashesInt(obj, depth, seed);
+
+ for (int iter = 0; iter < depth; iter++)
+ {
+ this.countTable[iter][((hashes[iter] << 1) >>> 1) % width].add(increment);
+ }
+
+ //store the total
+ this.totalAdded.add(increment);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see com.invincea.labs.pace.hash.Counter#getCount(java.lang.Object)
+ */
+ @Override
+ public long getCount(Object obj)
+ {
+ // compute the hash
+ int[] hashes = HashUtils.computeHashesInt(obj, depth, seed);
+
+ long mincount = Long.MAX_VALUE;
+
+ for (int iter = 0; iter < depth; iter++)
+ {
+ long value = this.countTable[iter][((hashes[iter] << 1) >>> 1) % width].longValue();
+ if (mincount > value)
+ mincount = value;
+ }
+
+ return mincount;
+ }
+
+ public int getDepth()
+ {
+ return this.depth;
+ }
+
+ public int getWidth()
+ {
+ return this.width;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see com.invincea.labs.pace.hash.Counter#maxCount()
+ */
+ @Override
+ public long maxCount()
+ {
+ throw new SketchRuntimeException("Method not implemented.");
+ }
+
+ public long totalAdded()
+ {
+ return this.totalAdded.longValue();
+ }
+
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/sketch/Counter.java b/src/main/java/edu/umd/marbl/mhap/sketch/Counter.java
new file mode 100644
index 0000000..6b8764c
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/sketch/Counter.java
@@ -0,0 +1,41 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.sketch;
+
+public interface Counter<T extends Object> extends Filter
+{
+ public long getCount(T obj);
+
+ public void add(T obj);
+
+ public long maxCount();
+
+ public void add(T obj, long count);
+
+}
\ No newline at end of file
diff --git a/src/main/java/edu/umd/marbl/mhap/sketch/Filter.java b/src/main/java/edu/umd/marbl/mhap/sketch/Filter.java
new file mode 100644
index 0000000..4415eed
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/sketch/Filter.java
@@ -0,0 +1,34 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.sketch;
+
+public interface Filter
+{
+
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/sketch/FrequencyCounts.java b/src/main/java/edu/umd/marbl/mhap/sketch/FrequencyCounts.java
new file mode 100644
index 0000000..523601b
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/sketch/FrequencyCounts.java
@@ -0,0 +1,120 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.sketch;
+
+import java.util.HashMap;
+import java.util.Map;
+
+
+public final class FrequencyCounts
+{
+ private final double filterCutoff;
+ private final Map<Long,Double> fractionCounts;
+ private final double maxIdfValue;
+ private final double maxValue;
+ private final double minIdfValue;
+ private final double minValue;
+
+ public FrequencyCounts(Map<Long,Double> fractionCounts, double filterCutoff)
+ {
+ this.fractionCounts = new HashMap<>(fractionCounts);
+ this.filterCutoff = filterCutoff;
+
+ double maxValue = Double.NEGATIVE_INFINITY;
+ for (double val : this.fractionCounts.values())
+ maxValue = Math.max(maxValue, val);
+
+ this.maxValue = maxValue;
+ this.minValue = this.filterCutoff;
+
+ this.minIdfValue = idf(this.maxValue);
+ this.maxIdfValue = idf(this.minValue);
+ }
+
+ public boolean contains(long hash)
+ {
+ return this.fractionCounts.containsKey(hash);
+ }
+
+ public double documentFrequencyRatio(long hash)
+ {
+ Double val = this.fractionCounts.get(hash);
+ if (val == null)
+ val = this.minValue;
+
+ return val;
+ }
+
+ public double getFilterCutoff()
+ {
+ return this.filterCutoff;
+ }
+
+ public double idf(double freq)
+ {
+ return Math.log(this.maxValue/freq);
+ //return Math.log1p(this.maxValue/freq);
+ }
+
+ public double idf(long hash)
+ {
+ double freq = documentFrequencyRatio(hash);
+ return idf(freq);
+ }
+
+ public double idfDiscrete(long hash, int maxValue)
+ {
+ Double val = this.fractionCounts.get(hash);
+ if (val == null)
+ return maxValue;
+
+ //get the true value
+ double idf = idf(val);
+
+ //scale it to match max
+ double scale = (maxIdf()-minIdf())/(double)(maxValue-1.0);
+
+ return 1.0+(idf-minIdf())/scale;
+ }
+
+ public double inverseDocumentFrequency(long hash)
+ {
+ return 1.0/documentFrequencyRatio(hash);
+ }
+
+ public double maxIdf()
+ {
+ return this.maxIdfValue;
+ }
+
+ public double minIdf()
+ {
+ return this.minIdfValue;
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/sketch/HashUtils.java b/src/main/java/edu/umd/marbl/mhap/sketch/HashUtils.java
new file mode 100644
index 0000000..336af3b
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/sketch/HashUtils.java
@@ -0,0 +1,291 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.sketch;
+
+import java.nio.ByteBuffer;
+
+import com.google.common.hash.HashCode;
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hasher;
+import com.google.common.hash.Hashing;
+
+import edu.umd.marbl.mhap.math.BasicMath;
+import edu.umd.marbl.mhap.utils.MersenneTwisterFast;
+
+public class HashUtils
+{
+ public static long[] computeHashes(String item, int numWords, int seed)
+ {
+ long[] hashes = new long[numWords];
+
+ for (int word = 0; word < numWords; word += 2)
+ {
+ HashFunction hashFunc = Hashing.murmur3_128(seed + word);
+ Hasher hasher = hashFunc.newHasher();
+ hasher.putUnencodedChars(item);
+
+ // get the two longs out
+ HashCode hc = hasher.hash();
+ ByteBuffer bb = ByteBuffer.wrap(hc.asBytes());
+ hashes[word] = bb.getLong(0);
+ if (word + 1 < numWords)
+ hashes[word + 1] = bb.getLong(8);
+ }
+
+ return hashes;
+ }
+
+ public final static int[] computeHashesInt(Object obj, int numWords, int seed)
+ {
+ if (obj instanceof Integer)
+ return computeHashesIntInt((Integer) obj, numWords, seed);
+ if (obj instanceof Long)
+ return computeHashesIntLong((Long) obj, numWords, seed);
+ if (obj instanceof Double)
+ return computeHashesIntDouble((Double) obj, numWords, seed);
+ if (obj instanceof Float)
+ return computeHashesIntFloat((Float) obj, numWords, seed);
+ if (obj instanceof String)
+ return computeHashesIntString((String) obj, numWords, seed);
+
+ throw new SketchRuntimeException("Cannot hash class type " + obj.getClass().getCanonicalName());
+ }
+
+ public final static int[] computeHashesIntDouble(double obj, int numWords, int seed)
+ {
+ int[] hashes = new int[numWords];
+
+ HashFunction hf = Hashing.murmur3_32(seed);
+
+ for (int iter = 0; iter < numWords; iter++)
+ {
+ HashCode hc = hf.newHasher().putDouble(obj).putInt(iter).hash();
+
+ hashes[iter] = hc.asInt();
+ }
+
+ return hashes;
+ }
+
+ public final static int[] computeHashesIntFloat(float obj, int numWords, int seed)
+ {
+ int[] hashes = new int[numWords];
+
+ HashFunction hf = Hashing.murmur3_32(seed);
+
+ for (int iter = 0; iter < numWords; iter++)
+ {
+ HashCode hc = hf.newHasher().putFloat(obj).putInt(iter).hash();
+
+ hashes[iter] = hc.asInt();
+ }
+
+ return hashes;
+ }
+
+ public final static int[] computeHashesIntInt(int obj, int numWords, int seed)
+ {
+ int[] hashes = new int[numWords];
+
+ HashFunction hf = Hashing.murmur3_32(seed);
+
+ for (int iter = 0; iter < numWords; iter++)
+ {
+ HashCode hc = hf.newHasher().putInt(obj).putInt(iter).hash();
+
+ hashes[iter] = hc.asInt();
+ }
+
+ return hashes;
+ }
+
+ public final static int[] computeHashesIntLong(long obj, int numWords, int seed)
+ {
+ int[] hashes = new int[numWords];
+
+ HashFunction hf = Hashing.murmur3_32(seed);
+
+ for (int iter = 0; iter < numWords; iter++)
+ {
+ HashCode hc = hf.newHasher().putLong(obj).putInt(iter).hash();
+
+ hashes[iter] = hc.asInt();
+ }
+
+ return hashes;
+ }
+
+ public final static int[] computeHashesIntString(String obj, int numWords, int seed)
+ {
+ int[] hashes = new int[numWords];
+
+ HashFunction hf = Hashing.murmur3_32(seed);
+
+ for (int iter = 0; iter < numWords; iter++)
+ {
+ HashCode hc = hf.newHasher().putUnencodedChars(obj).putInt(iter).hash();
+
+ hashes[iter] = hc.asInt();
+ }
+
+ return hashes;
+ }
+
+ public final static long[][] computeNGramHashes(final String seq, final int nGramSize, final int numWords, final int seed)
+ {
+ final int numberNGrams = seq.length()-nGramSize+1;
+
+ if (numberNGrams < 1)
+ throw new SketchRuntimeException("N-gram size bigger than string length.");
+
+ // get the rabin hashes
+ final long[] rabinHashes = computeSequenceHashesLong(seq, nGramSize, seed);
+
+ final long[][] hashes = new long[rabinHashes.length][numWords];
+
+ // Random rand = new Random(0);
+ for (int iter = 0; iter < rabinHashes.length; iter++)
+ {
+ // rand.setSeed(rabinHashes[iter]);
+ long x = rabinHashes[iter];
+
+ for (int word = 0; word < numWords; word++)
+ {
+ // hashes[iter][word] = rand.nextLong();
+
+ // XORShift Random Number Generators
+ x ^= (x << 21);
+ x ^= (x >>> 35);
+ x ^= (x << 4);
+ hashes[iter][word] = x;
+ }
+ }
+
+ return hashes;
+ }
+
+ public final static long[][] computeNGramHashesExact(final String seq, final int nGramSize, final int numWords, final int seed)
+ {
+ HashFunction hf = Hashing.murmur3_128(seed);
+
+ long[][] hashes = new long[seq.length() - nGramSize + 1][numWords];
+ for (int iter = 0; iter < hashes.length; iter++)
+ {
+ String subStr = seq.substring(iter, iter + nGramSize);
+
+ for (int word=0; word<numWords; word++)
+ {
+ HashCode hc = hf.newHasher().putUnencodedChars(subStr).putInt(word).hash();
+ hashes[iter][word] = hc.asLong();
+ }
+ }
+
+ return hashes;
+ }
+
+ public final static int[] computeSequenceHashes(final String seq, final int nGramSize)
+ {
+ HashFunction hf = Hashing.murmur3_32(0);
+
+ int[] hashes = new int[seq.length() - nGramSize + 1];
+ for (int iter = 0; iter < hashes.length; iter++)
+ {
+ HashCode hc = hf.newHasher().putUnencodedChars(seq.substring(iter, iter + nGramSize)).hash();
+ hashes[iter] = hc.asInt();
+ }
+
+ return hashes;
+ }
+
+ public final static long[] computeSequenceHashesLong(final String seq, final int nGramSize, final int seed)
+ {
+ HashFunction hf = Hashing.murmur3_128(seed);
+
+ long[] hashes = new long[seq.length() - nGramSize + 1];
+ for (int iter = 0; iter < hashes.length; iter++)
+ {
+ HashCode hc = hf.newHasher().putUnencodedChars(seq.substring(iter, iter + nGramSize)).hash();
+ hashes[iter] = hc.asLong();
+ }
+
+ return hashes;
+ }
+
+ public static double[] randomGuassianVector(int n, int seed)
+ {
+ //now generate the guassian
+ MersenneTwisterFast rand = new MersenneTwisterFast(seed);
+
+ double[] vec = new double[n];
+ for (int iter=0; iter<n; iter++)
+ {
+ vec[iter] = rand.nextGaussian();
+ }
+
+ //normalize
+ double norm = BasicMath.norm(vec);
+ if (norm<1.0e-10)
+ return vec;
+
+ return BasicMath.mult(vec, 1.0/norm);
+ }
+
+ public static double[] randomStringGuassianVector(String str, int n, int seed)
+ {
+ int[] seeds = new int[4];
+ for (int iter=0; iter<4; iter++)
+ {
+ HashFunction hf = Hashing.murmur3_32(seed*4+iter);
+ HashCode hc = hf.newHasher().putUnencodedChars(str).hash();
+
+ seeds[iter] = hc.asInt();
+ }
+
+ //now generate the guassian
+ MersenneTwisterFast rand = new MersenneTwisterFast(seeds);
+
+ double[] vec = new double[n];
+ for (int iter=0; iter<n; iter++)
+ {
+ vec[iter] = rand.nextGaussian();
+ }
+
+ //normalize
+ double norm = BasicMath.norm(vec);
+ if (norm<1.0e-10)
+ return vec;
+
+ return BasicMath.mult(vec, 1.0/norm);
+ }
+
+ private HashUtils()
+ {
+ }
+
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/sketch/MinHashBitSketch.java b/src/main/java/edu/umd/marbl/mhap/sketch/MinHashBitSketch.java
new file mode 100644
index 0000000..de0f91a
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/sketch/MinHashBitSketch.java
@@ -0,0 +1,92 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.sketch;
+
+public final class MinHashBitSketch extends AbstractBitSketch<MinHashBitSketch>
+{
+ /**
+ *
+ */
+ private static final long serialVersionUID = -44448450811302477L;
+
+ private final static long[] getAsBits(int[] minHashes)
+ {
+ int numWords = minHashes.length/64;
+
+ //now convert them to bits
+ long[] bits = new long[numWords];
+
+ //take only the last bit
+ long mask = 0b1;
+
+ int bitCount = 0;
+ int wordCount = 0;
+ for (int word = 0; word<numWords; word++)
+ {
+ long currWord = 0b0;
+
+ for (int bit=0; bit<64; bit++)
+ {
+ currWord = (currWord << 1) | (minHashes[bitCount] & mask);
+
+ bitCount++;
+ }
+
+ bits[wordCount] = currWord;
+ wordCount++;
+ }
+
+ return bits;
+ }
+
+ public MinHashBitSketch(long[] bits)
+ {
+ super(bits);
+ }
+
+ public MinHashBitSketch(int[] minHashes)
+ {
+ super(getAsBits(minHashes));
+ }
+
+ public MinHashBitSketch(String seq, int nGramSize, int numWords)
+ {
+ super(getAsBits(new MinHashSketch(seq, nGramSize, numWords*64).getMinHashArray()));
+ }
+
+ public final double jaccard(final MinHashBitSketch sh)
+ {
+ int count = getIntersectionCount(sh);
+
+ double sim = (double)count/(double) this.numberOfBits();
+ double jaccard = (sim- 0.5) * 2.0;
+
+ return Math.max(0.0, jaccard);
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/sketch/MinHashSketch.java b/src/main/java/edu/umd/marbl/mhap/sketch/MinHashSketch.java
new file mode 100644
index 0000000..22711b2
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/sketch/MinHashSketch.java
@@ -0,0 +1,257 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.sketch;
+
+import it.unimi.dsi.fastutil.longs.Long2ObjectLinkedOpenHashMap;
+
+import java.io.DataInputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Map.Entry;
+
+import edu.umd.marbl.mhap.utils.HitCounter;
+
+public final class MinHashSketch implements Sketch<MinHashSketch>
+{
+ private final int[] minHashes;
+ /**
+ *
+ */
+ private static final long serialVersionUID = 8846482698636860862L;
+
+ private final static int[] computeNgramMinHashesWeighted(String seq, final int nGramSize, final int numHashes,
+ FrequencyCounts kmerFilter, boolean weighted)
+ {
+ final int numberNGrams = seq.length() - nGramSize + 1;
+
+ if (numberNGrams < 1)
+ throw new SketchRuntimeException("N-gram size bigger than string length.");
+
+ // get the kmer hashes
+ final long[] kmerHashes = HashUtils.computeSequenceHashesLong(seq, nGramSize, 0);
+
+ //now compute the counts of occurance
+ Long2ObjectLinkedOpenHashMap<HitCounter> hitMap = new Long2ObjectLinkedOpenHashMap<HitCounter>(kmerHashes.length);
+ int maxCount = 0;
+ for (long kmer : kmerHashes)
+ {
+ HitCounter counter = hitMap.get(kmer);
+ if (counter==null)
+ {
+ counter = new HitCounter(1);
+ hitMap.put(kmer, counter);
+ }
+ else
+ counter.addHit();
+
+ if (maxCount<counter.count)
+ maxCount = counter.count;
+ }
+
+ //allocate the space
+ int[] hashes = new int[Math.max(1,numHashes)];
+ long[] best = new long[numHashes];
+ Arrays.fill(best, Long.MAX_VALUE);
+
+ //go through all the k-mers and find the min values
+ for (Entry<Long, HitCounter> kmer : hitMap.entrySet())
+ {
+ long key = kmer.getKey();
+ int weight = kmer.getValue().count;
+
+ if (!weighted)
+ weight = 1;
+
+ if (kmerFilter!=null)
+ {
+ if (weighted)
+ {
+ //compute the td part
+ double td = (double)weight;
+ //td = Math.log1p(td)*3.4;
+
+ //compute the idf part
+ double idf = kmerFilter.idfDiscrete(key, 3);
+
+ //compute td-idf
+ weight = (int)Math.round(td*idf);
+ if (weight<1)
+ weight = 1;
+ }
+ else
+ {
+ if (kmerFilter.contains(key))
+ weight = 0;
+ }
+ }
+
+ if (weight<=0)
+ continue;
+
+ //set the initial shift value
+ long x = key;
+ for (int word = 0; word < numHashes; word++)
+ {
+ for (int count = 0; count<weight; count++)
+ {
+ // XORShift Random Number Generators
+ x ^= (x << 21);
+ x ^= (x >>> 35);
+ x ^= (x << 4);
+
+ if (x < best[word])
+ {
+ best[word] = x;
+ if (word%2==0)
+ hashes[word] = (int)key;
+ else
+ hashes[word] = (int)(key>>>32);
+ }
+ }
+ }
+ }
+
+ //now combine into super shingles
+ /*
+ HashFunction hf = Hashing.murmur3_32(0);
+
+ int[] superShingles = new int[numHashes];
+ for (int iter=0; iter<hashes.length; iter++)
+ {
+ int i1 = iter;
+ int i2 = (iter+1)%numHashes;
+
+ HashCode hc = hf.newHasher().
+ putInt(hashes[i1]).
+ putInt(hashes[i2]).
+ hash();
+ superShingles[iter] = hc.asInt();
+ }
+ hashes = superShingles;
+ */
+
+ return hashes;
+ }
+
+ public static MinHashSketch fromByteStream(DataInputStream input) throws IOException
+ {
+ try
+ {
+ //store the size
+ int hashNum = input.readInt();
+
+ //store the array
+ int[] minHashes = new int[hashNum];
+ for (int hash=0; hash<hashNum; hash++)
+ {
+ minHashes[hash] = input.readInt();
+ }
+
+ return new MinHashSketch(minHashes);
+ }
+ catch (EOFException e)
+ {
+ return null;
+ }
+ }
+
+ private MinHashSketch(int[] minHashes)
+ {
+ this.minHashes = minHashes;
+ }
+
+ public MinHashSketch(String str, int nGramSize, int numHashes)
+ {
+ this.minHashes = MinHashSketch.computeNgramMinHashesWeighted(str, nGramSize, numHashes, null, true);
+ }
+
+ public MinHashSketch(String seq, int nGramSize, int numHashes, FrequencyCounts freqFilter, boolean weighted)
+ {
+ this.minHashes = MinHashSketch.computeNgramMinHashesWeighted(seq, nGramSize, numHashes, freqFilter, weighted);
+ }
+
+ public byte[] getAsByteArray()
+ {
+ ByteBuffer bb = ByteBuffer.allocate(4*(1+this.minHashes.length));
+
+ //store the size
+ bb.putInt(this.minHashes.length);
+
+ //store the array
+ for (int hash=0; hash<this.minHashes.length; hash++)
+ bb.putInt(this.minHashes[hash]);
+
+ return bb.array();
+ }
+
+ public final int[] getMinHashArray()
+ {
+ return this.minHashes;
+ }
+
+ public final double jaccard(MinHashSketch h)
+ {
+ int count = 0;
+ int size = this.minHashes.length;
+
+ if (h.minHashes.length!=size)
+ throw new SketchRuntimeException("MinHashes must be of same length in order to be compared.");
+
+ for (int iter=0; iter<size; iter++)
+ {
+ if (this.minHashes[iter]==h.minHashes[iter])
+ count++;
+ }
+
+ return (double)count/(double)size;
+ }
+
+ public final int numHashes()
+ {
+ return this.minHashes.length;
+ }
+
+ @Override
+ public double similarity(MinHashSketch sh)
+ {
+ return jaccard(sh);
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#toString()
+ */
+ @Override
+ public String toString()
+ {
+ return "MinHash "+Arrays.toString(this.minHashes) + "";
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/sketch/OrderedNGramHashes.java b/src/main/java/edu/umd/marbl/mhap/sketch/OrderedNGramHashes.java
new file mode 100644
index 0000000..03ed327
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/sketch/OrderedNGramHashes.java
@@ -0,0 +1,460 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.sketch;
+
+import it.unimi.dsi.fastutil.ints.IntArrays;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Arrays;
+
+import edu.umd.marbl.mhap.impl.OverlapInfo;
+import edu.umd.marbl.mhap.utils.Utils;
+
+public final class OrderedNGramHashes
+{
+ private static final class SortableIntPair implements Comparable<SortableIntPair>, Serializable
+ {
+ public final int x;
+ public final int y;
+ /**
+ *
+ */
+ private static final long serialVersionUID = 2525278831423582446L;
+
+ public SortableIntPair(int x, int y)
+ {
+ this.x = x;
+ this.y = y;
+ }
+
+ @Override
+ public int compareTo(SortableIntPair p)
+ {
+ return Integer.compare(this.x, p.x);
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#toString()
+ */
+ @Override
+ public String toString()
+ {
+ return "["+x + ", " + y + "]";
+ }
+
+ }
+
+ private final int[][] orderedHashes;
+ private final int seqLength;
+
+ public final static int REDUCTION = 4;
+
+ private final static int[][] allocateMemory(int size)
+ {
+ // allocate the memory
+ int[][] completeHash = new int[size][2];
+
+ return completeHash;
+ }
+
+ public final static OrderedNGramHashes fromByteStream(DataInputStream input) throws IOException
+ {
+ try
+ {
+ // dos.writeInt(this.seqLength);
+ // dos.writeInt(size());
+ int seqLength = input.readInt();
+ int hashLength = input.readInt();
+
+ int[][] orderedHashes = allocateMemory(hashLength);
+
+ for (int iter = 0; iter < hashLength; iter++)
+ {
+ // dos.writeInt(this.completeHash[iter][iter2]);
+ orderedHashes[iter][0] = input.readInt();
+ orderedHashes[iter][1] = input.readInt();
+ }
+
+ return new OrderedNGramHashes(seqLength, orderedHashes);
+
+ }
+ catch (EOFException e)
+ {
+ return null;
+ }
+ }
+
+ private OrderedNGramHashes(int seqLength, int[][] orderedHashes)
+ {
+ this.seqLength = seqLength;
+ this.orderedHashes = orderedHashes;
+ }
+
+ public OrderedNGramHashes(String seq, int kmerSize)
+ {
+ this.seqLength = seq.length() - kmerSize + 1;
+
+ if (this.seqLength<=0)
+ throw new SketchRuntimeException("Sequence length must be greater or equal to kmerSize.");
+
+ this.orderedHashes = getFullHashes(seq, kmerSize);
+ }
+
+ public byte[] getAsByteArray()
+ {
+ ByteArrayOutputStream bos = new ByteArrayOutputStream(size() * 2);
+ DataOutputStream dos = new DataOutputStream(bos);
+
+ try
+ {
+ dos.writeInt(this.seqLength);
+ dos.writeInt(size());
+ for (int iter = 0; iter < this.orderedHashes.length; iter++)
+ {
+ dos.writeInt(this.orderedHashes[iter][0]);
+ dos.writeInt(this.orderedHashes[iter][1]);
+ }
+
+ dos.flush();
+ return bos.toByteArray();
+ }
+ catch (IOException e)
+ {
+ throw new SketchRuntimeException("Unexpected IO error.");
+ }
+ }
+
+ public int getHash(int index)
+ {
+ return this.orderedHashes[index][0];
+ }
+
+ private int[][] storeAsArray(SortableIntPair[] completeHashAsPair)
+ {
+ // allocate the memory
+ int[][] completeHash = allocateMemory(completeHashAsPair.length);
+
+ for (int iter = 0; iter < completeHashAsPair.length; iter++)
+ {
+ completeHash[iter][0] = completeHashAsPair[iter].x;
+ completeHash[iter][1] = completeHashAsPair[iter].y;
+ }
+
+ return completeHash;
+ }
+
+ private int[][] getFullHashes(String seq, int subKmerSize)
+ {
+ int cutoff = (int) ((long) Integer.MIN_VALUE + ((long) Integer.MAX_VALUE - (long) Integer.MIN_VALUE)
+ / (long) REDUCTION);
+
+ // compute just direct hash of sequence
+ int[] hashes = HashUtils.computeSequenceHashes(seq, subKmerSize);
+
+ int count = 0;
+ for (int val : hashes)
+ if (val <= cutoff)
+ count++;
+
+ int[] cutHashes = new int[count];
+ int[] perm = new int[count];
+ int[] pos = new int[count];
+
+ count = 0;
+ for (int iter = 0; iter < hashes.length; iter++)
+ if (hashes[iter] <= cutoff)
+ {
+ cutHashes[count] = hashes[iter];
+ perm[count] = count;
+ pos[count] = iter;
+
+ count++;
+ }
+
+ //sort the array
+ IntArrays.radixSortIndirect(perm, cutHashes, true);
+
+ SortableIntPair[] completeHashAsPair = new SortableIntPair[count];
+ for (int iter=0; iter<count; iter++)
+ {
+ int index = perm[iter];
+ completeHashAsPair[iter] = new SortableIntPair(cutHashes[index], pos[index]);
+ }
+
+ //System.err.println(Arrays.toString(completeHashAsPair));
+ // sort the results, sort in place so no need to look at second
+ //Arrays.sort(completeHashAsPair);
+
+ return storeAsArray(completeHashAsPair);
+ }
+
+ public OverlapInfo getOverlapInfo(OrderedNGramHashes s, double maxShiftPercent)
+ {
+ int[][] allKmerHashes = this.orderedHashes;
+
+ // get the kmers of the second sequence
+ int[][] sAllKmerHashes = s.orderedHashes;
+
+ // get sizes
+ int size1 = this.size();
+ int size2 = s.size();
+
+ int kmerSize1 = this.seqLength;
+ int kmerSize2 = s.seqLength;
+
+ // init the ok regions
+ int valid1Lower = 0;
+ int valid1Upper = kmerSize1;
+ int valid2Lower = 0;
+ int valid2Upper = kmerSize2;
+
+ int medianShift = 0;
+ int overlapSize = Math.min(kmerSize1, kmerSize2);
+ int absMaxShiftInOverlap = Math.max(kmerSize1, kmerSize2);
+
+ int count = 0;
+ int[] posShift = new int[Math.min(size1, size2) / 8 + 1];
+ int[] pos1Index = new int[posShift.length];
+ int[] pos2Index = new int[posShift.length];
+
+ // check the repeat flag
+ int numScoringRepeats = 2;
+ if (maxShiftPercent <= 0)
+ {
+ numScoringRepeats = 1;
+ maxShiftPercent = Math.abs(maxShiftPercent);
+ }
+
+ // refine multiple times to get better interval estimate
+ for (int repeat = 0; repeat < numScoringRepeats; repeat++)
+ {
+ // init counters
+ count = 0;
+ int i1 = 0;
+ int i2 = 0;
+
+ // init the loop storage
+ int hash1 = 0;
+ int hash2 = 0;
+ int pos1;
+ int pos2;
+
+ // perform merge operation to get the shift and the kmer count
+ while (true)
+ {
+ if (i1>=allKmerHashes.length)
+ break;
+ if (i2>=sAllKmerHashes.length)
+ break;
+
+ // get the values in the array
+ hash1 = allKmerHashes[i1][0];
+ pos1 = allKmerHashes[i1][1];
+
+ hash2 = sAllKmerHashes[i2][0];
+ pos2 = sAllKmerHashes[i2][1];
+
+ if (hash1 < hash2 || pos1 < valid1Lower || pos1 >= valid1Upper)
+ i1++;
+ else if (hash2 < hash1 || pos2 < valid2Lower || pos2 >= valid2Upper)
+ i2++;
+ else
+ {
+ // check if current shift makes sense positionally
+ int currShift = pos2 - pos1;
+ if (Math.abs(currShift - medianShift) > absMaxShiftInOverlap)
+ {
+ // do not record this shift and increase counter
+ i2++;
+ continue;
+ }
+
+ // adjust array size if needed
+ if (posShift.length <= count)
+ {
+ posShift = Arrays.copyOf(posShift, posShift.length * 2);
+ pos1Index = Arrays.copyOf(pos1Index, pos1Index.length * 2);
+ pos2Index = Arrays.copyOf(pos2Index, pos2Index.length * 2);
+ }
+
+ // compute the shift
+ posShift[count] = currShift;
+ pos1Index[count] = pos1;
+ pos2Index[count] = pos2;
+
+ // if first round, store only first hit
+ if (repeat == 0)
+ i1++;
+ i2++;
+
+ count++;
+ }
+ }
+
+ if (count <= 0)
+ return new OverlapInfo(0.0, 0, 0, 0, 0, 0);
+
+ // pick out only the matches that are best
+ if (repeat > 0)
+ {
+ int reducedCount = -1;
+
+ // copy over only the best values
+ for (int iter = 0; iter < count; iter++)
+ {
+ if (reducedCount >= 0 && pos1Index[reducedCount] == pos1Index[iter])
+ {
+ // if better, record it
+ if (Math.abs(posShift[reducedCount] - medianShift) > Math.abs(posShift[iter] - medianShift))
+ {
+ pos1Index[reducedCount] = pos1Index[iter];
+ pos2Index[reducedCount] = pos2Index[iter];
+ posShift[reducedCount] = posShift[iter];
+ }
+ }
+ else
+ {
+ // add the new data
+ reducedCount++;
+ pos1Index[reducedCount] = pos1Index[iter];
+ pos2Index[reducedCount] = pos2Index[iter];
+ posShift[reducedCount] = posShift[iter];
+ }
+ }
+
+ count = reducedCount + 1;
+ }
+
+ if (count <= 0)
+ medianShift = 0;
+ else
+ medianShift = Utils.quickSelect(Arrays.copyOf(posShift, count), count / 2, count);
+
+ // get the actual overlap size
+ int leftPosition = Math.max(0, -medianShift);
+ int rightPosition = Math.min(kmerSize1, kmerSize2 - medianShift);
+ overlapSize = Math.max(this.seqLength - kmerSize1, rightPosition - leftPosition);
+
+ // compute the max possible allowed shift in kmers
+ absMaxShiftInOverlap = Math.min(Math.max(kmerSize1, kmerSize2),
+ (int) ((double) overlapSize * maxShiftPercent));
+
+ // get the updated borders
+ valid1Lower = Math.max(0, -medianShift - absMaxShiftInOverlap);
+ valid1Upper = Math.min(kmerSize1, kmerSize2 - medianShift + absMaxShiftInOverlap);
+ valid2Lower = Math.max(0, medianShift - absMaxShiftInOverlap);
+ valid2Upper = Math.min(kmerSize2, kmerSize1 + medianShift + absMaxShiftInOverlap);
+
+ /*
+ * System.err.println(overlapSize);
+ * System.err.println("Size1= "+size1+" Lower:"+
+ * valid1Lower+" Upper:"+valid1Upper+" Shift="+shift);
+ * System.err.println("Size2= "+size2+" Lower:"+
+ * valid2Lower+" Upper:"+valid2Upper);
+ */
+ }
+
+ // storage for edge computation
+ int leftEdge1 = Integer.MAX_VALUE;
+ int leftEdge2 = Integer.MAX_VALUE;
+ int rightEdge1 = Integer.MIN_VALUE;
+ int rightEdge2 = Integer.MIN_VALUE;
+
+ // count only the shifts in the correct place
+ int validCount = 0;
+ for (int iter = 0; iter < count; iter++)
+ {
+ int pos1 = pos1Index[iter];
+ int pos2 = pos2Index[iter];
+
+ // take only valid values
+ if (Math.abs(posShift[iter] - medianShift) > absMaxShiftInOverlap)
+ continue;
+
+ // get the edges
+ if (pos1 < leftEdge1)
+ leftEdge1 = pos1;
+ if (pos2 < leftEdge2)
+ leftEdge2 = pos2;
+ if (pos1 > rightEdge1)
+ rightEdge1 = pos1;
+ if (pos2 > rightEdge2)
+ rightEdge2 = pos2;
+
+ validCount++;
+ }
+
+ if (validCount <= 1)
+ return new OverlapInfo(0.0, 0, 0, 0, 0, 0);
+
+ // compute the score
+ double score = (double) validCount / (double) (overlapSize);
+
+ // get edge info uniformly minimum variance unbiased (UMVU) estimators
+ // a = (n*a-b)/(n-1)
+ // b = (n*b-a)/(n-1)
+ int a1 = Math.max(0, (int) Math.round((validCount * leftEdge1 - rightEdge1) / (double) (validCount - 1)));
+ int b1 = Math.max(0, (int) Math.round((validCount * leftEdge2 - rightEdge2) / (double) (validCount - 1)));
+ int a2 = Math.min(this.seqLength,
+ (int) Math.round((validCount * rightEdge1 - leftEdge1) / (double) (validCount - 1)));
+ int b2 = Math.min(s.seqLength,
+ (int) Math.round((validCount * rightEdge2 - leftEdge2) / (double) (validCount - 1)));
+
+ // int ahang = a1-a2;
+ // int bhang = (this.size()-b1>s.size()-b2) ? b1-this.size() : s.size()
+ // - b2;
+
+ // if (score>0.06)
+ // {
+ // int[] test = Arrays.copyOf(posShift, count);
+ // int[] test2 = Arrays.copyOf(pos1Index, count);
+
+ // System.err.println("Start = "+Math.max(0,
+ // -medianShift)+", Overlap="+overlapSize+" Maxshift="+absMaxShiftInOverlap+": ["+Arrays.toString(test)+"; "+Arrays.toString(test2)+"];");
+ // System.err.println("Overlap="+overlapSize+", Shift/overlap="+(double)(test[test.length-10]-test[10])/(double)overlapSize);
+ // }
+
+ // the hangs are adjusted by the rate of slide*distance traveled
+ // relative to median, -medianShift-(a1-a2)
+ // return new OverlapInfo(score, ahang, bhang);
+
+ return new OverlapInfo(score * (double) REDUCTION, validCount, a1, a2, b1, b2);
+ }
+
+ public int size()
+ {
+ return this.orderedHashes.length;
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/sketch/SimHash.java b/src/main/java/edu/umd/marbl/mhap/sketch/SimHash.java
new file mode 100644
index 0000000..c060d61
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/sketch/SimHash.java
@@ -0,0 +1,103 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.sketch;
+
+
+public final class SimHash extends AbstractBitSketch<SimHash>
+{
+ /**
+ *
+ */
+ private static final long serialVersionUID = -2655482279264410602L;
+
+ private static final long[] recordHashes(final long[][] hashes, final int numWords)
+ {
+ final int[] counts = new int[numWords * 64];
+
+ // perform count for each ngram
+ for (long[] objectHashes : hashes)
+ {
+ for (int wordIndex = 0; wordIndex < numWords; wordIndex++)
+ {
+ final long val = objectHashes[wordIndex];
+ final int offset = wordIndex * 64;
+
+ long mask = 0b1;
+
+ for (int bit = 0; bit < 64; bit++)
+ {
+ // if not different then increase counts
+ if ((val & mask) == 0b0)
+ counts[offset + bit]--;
+ else
+ counts[offset + bit]++;
+
+ mask = mask << 1;
+ }
+ }
+ }
+
+ long[] bits = new long[numWords];
+ for (int wordIndex = 0; wordIndex < numWords; wordIndex++)
+ {
+ final int offset = wordIndex * 64;
+ long val = 0b0;
+ long mask = 0b1;
+
+ for (int bit = 0; bit < 64; bit++)
+ {
+ if (counts[offset + bit] > 0)
+ val = val | mask;
+
+ // adjust the mask
+ mask = mask << 1;
+ }
+
+ bits[wordIndex] = val;
+ }
+
+ return bits;
+ }
+
+ public SimHash(String string, int nGramSize, int numberWords)
+ {
+ super(recordHashes(HashUtils.computeNGramHashesExact(string, nGramSize, numberWords, 0), numberWords));
+ }
+
+ public final double jaccard(final SimHash sh)
+ {
+ int count = getIntersectionCount(sh);
+
+ double sim = (double)count/(double) this.numberOfBits();
+ double jaccard = (sim- 0.5) * 2.0;
+
+ return Math.max(0.0, jaccard);
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/sketch/Sketch.java b/src/main/java/edu/umd/marbl/mhap/sketch/Sketch.java
new file mode 100644
index 0000000..2d5ed16
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/sketch/Sketch.java
@@ -0,0 +1,37 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.sketch;
+
+import java.io.Serializable;
+
+public interface Sketch<T extends Sketch<T>> extends Serializable
+{
+ double similarity(T sh);
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/sketch/SketchRuntimeException.java b/src/main/java/edu/umd/marbl/mhap/sketch/SketchRuntimeException.java
new file mode 100644
index 0000000..5ae50d5
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/sketch/SketchRuntimeException.java
@@ -0,0 +1,63 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.sketch;
+
+public class SketchRuntimeException extends RuntimeException
+{
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = 8422390842382501317L;
+
+ public SketchRuntimeException()
+ {
+ }
+
+ public SketchRuntimeException(String message)
+ {
+ super(message);
+ }
+
+ public SketchRuntimeException(Throwable cause)
+ {
+ super(cause);
+ }
+
+ public SketchRuntimeException(String message, Throwable cause)
+ {
+ super(message, cause);
+ }
+
+ public SketchRuntimeException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace)
+ {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
+
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/utils/.gitignore b/src/main/java/edu/umd/marbl/mhap/utils/.gitignore
new file mode 100644
index 0000000..cb1017b
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/utils/.gitignore
@@ -0,0 +1,4 @@
+/Utils$Pair.class
+/Utils$ToProtein.class
+/Utils$Translate.class
+/Utils.class
diff --git a/src/main/java/edu/umd/marbl/mhap/utils/CharacterHash.java b/src/main/java/edu/umd/marbl/mhap/utils/CharacterHash.java
new file mode 100644
index 0000000..70dd8f4
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/utils/CharacterHash.java
@@ -0,0 +1,51 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.utils;
+
+import java.util.Random;
+
+public class CharacterHash
+{
+ public int hashvalues[] = new int[1 << 16];
+
+ static CharacterHash charhash = new CharacterHash();
+
+ public static CharacterHash getInstance()
+ {
+ return charhash;
+ }
+
+ public CharacterHash()
+ {
+ Random r = new Random(1);
+ for (int k = 0; k < this.hashvalues.length; ++k)
+ this.hashvalues[k] = r.nextInt();
+ }
+
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/utils/CyclicHash.java b/src/main/java/edu/umd/marbl/mhap/utils/CyclicHash.java
new file mode 100644
index 0000000..ceb7ff8
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/utils/CyclicHash.java
@@ -0,0 +1,72 @@
+/**
+ * Daniel Lemire, Owen Kaser: Recursive n-gram hashing is pairwise independent, at best, Computer Speech & Language, Volume 24, Issue 4, October 2010, Pages 698-710 http://arxiv.org/abs/0705.4676
+ */
+package edu.umd.marbl.mhap.utils;
+
+public final class CyclicHash
+{
+ public int hashvalue;
+
+ int myr;
+
+ int n;
+
+ private final static CharacterHash hasher = CharacterHash.getInstance();
+
+ public final static int wordsize = 32;
+
+ private final static int fastleftshift1(int x)
+ {
+ return (x << 1) | (x >>> (wordsize - 1));
+ }
+
+ // this is purely for testing purposes
+ public final static int nonRollingHash(CharSequence s)
+ {
+ int value = 0;
+ for (int i = 0; i < s.length(); ++i)
+ {
+ char c = s.charAt(i);
+ int z = hasher.hashvalues[c];
+ value = fastleftshift1(value) ^ z;
+ }
+ return value;
+ }
+
+ // myn is the length in characters of the blocks you want to hash
+ public CyclicHash(int myn)
+ {
+ this.n = myn;
+ if (this.n > wordsize)
+ {
+ throw new IllegalArgumentException();
+ }
+
+ }
+
+ // add new character (useful to initiate the hasher)
+ // to get a strongly universal hash value, you have to ignore the last or
+ // first (n-1) bits.
+ public final int eat(char c)
+ {
+ this.hashvalue = fastleftshift1(this.hashvalue);
+ this.hashvalue ^= hasher.hashvalues[c];
+ return this.hashvalue;
+ }
+
+ private final int fastleftshiftn(int x)
+ {
+ return (x << this.n) | (x >>> (wordsize - this.n));
+ }
+
+ // remove old character and add new one
+ // to get a strongly universal hash value, you have to ignore the last or
+ // first (n-1) bits.
+ public final int update(char outchar, char inchar)
+ {
+ int z = fastleftshiftn(hasher.hashvalues[outchar]);
+ this.hashvalue = fastleftshift1(this.hashvalue) ^ z ^ hasher.hashvalues[inchar];
+ return this.hashvalue;
+ }
+
+}
\ No newline at end of file
diff --git a/src/main/java/edu/umd/marbl/mhap/utils/HashCodeUtil.java b/src/main/java/edu/umd/marbl/mhap/utils/HashCodeUtil.java
new file mode 100644
index 0000000..2a88cb9
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/utils/HashCodeUtil.java
@@ -0,0 +1,192 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.utils;
+
+import java.lang.reflect.Array;
+
+/**
+ * The Class HashCodeUtil.
+ */
+public final class HashCodeUtil {
+
+ /// PRIVATE ///
+ /** The oD d_ prim e_ number. */
+ private static final int fODD_PRIME_NUMBER = 37;
+
+ /** The Constant SEED. */
+ public static final int SEED = 23;
+
+ /**
+ * First term.
+ *
+ * @param aSeed
+ * the a seed
+ * @return the int
+ */
+ private static int firstTerm( int aSeed ){
+ return fODD_PRIME_NUMBER * aSeed;
+ }
+
+ /**
+ * Hash.
+ *
+ * @param aSeed
+ * the a seed
+ * @param aBoolean
+ * the a boolean
+ * @return the int
+ */
+ public static int hash( int aSeed, boolean aBoolean ) {
+ return firstTerm( aSeed ) + ( aBoolean ? 1 : 0 );
+ }
+
+ /**
+ * Hash.
+ *
+ * @param aSeed
+ * the a seed
+ * @param aChar
+ * the a char
+ * @return the int
+ */
+ public static int hash( int aSeed, char aChar ) {
+ //System.out.println("char...");
+ return firstTerm( aSeed ) + aChar;
+ }
+
+ public final static int hash(int aSeed, char[] charArray, int start, int size)
+ {
+ int hash = 0;
+ for (int iter=0; iter<size; iter++)
+ {
+ hash += firstTerm(aSeed) + charArray[start+iter];
+ }
+
+ return hash;
+ }
+
+ /**
+ * Hash.
+ *
+ * @param aSeed
+ * the a seed
+ * @param aDouble
+ * the a double
+ * @return the int
+ */
+ public static int hash( int aSeed , double aDouble ) {
+ return hash( aSeed, Double.doubleToLongBits(aDouble) );
+ }
+
+ /**
+ * Hash.
+ *
+ * @param aSeed
+ * the a seed
+ * @param aFloat
+ * the a float
+ * @return the int
+ */
+ public static int hash( int aSeed , float aFloat ) {
+ return hash( aSeed, Float.floatToIntBits(aFloat) );
+ }
+
+ /**
+ * Hash.
+ *
+ * @param aSeed
+ * the a seed
+ * @param aInt
+ * the a int
+ * @return the int
+ */
+ public static int hash( int aSeed , int aInt ) {
+ /*
+ * Implementation Note
+ * Note that byte and short are handled by this method, through
+ * implicit conversion.
+ */
+ //System.out.println("int...");
+ return firstTerm( aSeed ) + aInt;
+ }
+
+
+ /**
+ * Hash.
+ *
+ * @param aSeed
+ * the a seed
+ * @param aLong
+ * the a long
+ * @return the int
+ */
+ public static int hash( int aSeed , long aLong ) {
+ //System.out.println("long...");
+ return firstTerm(aSeed) + (int)( aLong ^ (aLong >>> 32) );
+ }
+
+ /**
+ * Hash.
+ *
+ * @param aSeed
+ * the a seed
+ * @param aObject
+ * the a object
+ * @return the int
+ */
+ public static int hash( int aSeed , Object aObject ) {
+ int result = aSeed;
+ if ( aObject == null) {
+ result = hash(result, 0);
+ }
+ else if ( ! isArray(aObject) ) {
+ result = hash(result, aObject.hashCode());
+ }
+ else {
+ int length = Array.getLength(aObject);
+ for ( int idx = 0; idx < length; ++idx ) {
+ Object item = Array.get(aObject, idx);
+ //recursive call!
+ result = hash(result, item);
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Checks if is array.
+ *
+ * @param aObject
+ * the a object
+ * @return true, if is array
+ */
+ private static boolean isArray(Object aObject){
+ return aObject.getClass().isArray();
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/utils/HitCounter.java b/src/main/java/edu/umd/marbl/mhap/utils/HitCounter.java
new file mode 100644
index 0000000..70d544c
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/utils/HitCounter.java
@@ -0,0 +1,55 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.utils;
+
+public final class HitCounter
+{
+ public int count;
+
+ public HitCounter()
+ {
+ this.count = 0;
+ }
+
+ public HitCounter(int count)
+ {
+ this.count = count;
+ }
+
+ public HitCounter addHit()
+ {
+ this.count++;
+ return this;
+ }
+
+ public void addHits(int counts)
+ {
+ this.count+=counts;
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/edu/umd/marbl/mhap/utils/Interval.java b/src/main/java/edu/umd/marbl/mhap/utils/Interval.java
new file mode 100644
index 0000000..a839414
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/utils/Interval.java
@@ -0,0 +1,81 @@
+package edu.umd.marbl.mhap.utils;
+
+/**
+ * The Interval class maintains an interval with some associated data
+ * @author Kevin Dolan
+ *
+ * @param <Type> The type of data being stored
+ */
+public class Interval<Type> implements Comparable<Interval<Type>> {
+
+ private long start;
+ private long end;
+ private Type data;
+
+ public Interval(long start, long end, Type data) {
+ this.start = start;
+ this.end = end;
+ this.data = data;
+ }
+
+ public long getStart() {
+ return this.start;
+ }
+
+ public void setStart(long start) {
+ this.start = start;
+ }
+
+ public long getEnd() {
+ return this.end;
+ }
+
+ public void setEnd(long end) {
+ this.end = end;
+ }
+
+ public Type getData() {
+ return this.data;
+ }
+
+ public void setData(Type data) {
+ this.data = data;
+ }
+
+ /**
+ * @param time
+ * @return true if this interval contains time (invlusive)
+ */
+ public boolean contains(long time) {
+ return time < this.end && time > this.start;
+ }
+
+ /**
+ * @param other
+ * @return return true if this interval intersects other
+ */
+ public boolean intersects(Interval<?> other) {
+ return other.getEnd() > this.start && other.getStart() < this.end;
+ }
+
+ /**
+ * Return -1 if this interval's start time is less than the other, 1 if greater
+ * In the event of a tie, -1 if this interval's end time is less than the other, 1 if greater, 0 if same
+ * @param other
+ * @return 1 or -1
+ */
+ @Override
+ public int compareTo(Interval<Type> other) {
+ if(this.start < other.getStart())
+ return -1;
+ else if(this.start > other.getStart())
+ return 1;
+ else if(this.end < other.getEnd())
+ return -1;
+ else if(this.end > other.getEnd())
+ return 1;
+ else
+ return 0;
+ }
+
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/utils/IntervalNode.java b/src/main/java/edu/umd/marbl/mhap/utils/IntervalNode.java
new file mode 100644
index 0000000..0a2307c
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/utils/IntervalNode.java
@@ -0,0 +1,167 @@
+package edu.umd.marbl.mhap.utils;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.Map.Entry;
+
+/**
+ * The Node class contains the interval tree information for one single node
+ *
+ * @author Kevin Dolan
+ */
+public class IntervalNode<Type> {
+
+ private SortedMap<Interval<Type>, List<Interval<Type>>> intervals;
+ private long center;
+ private IntervalNode<Type> leftNode;
+ private IntervalNode<Type> rightNode;
+
+ public IntervalNode() {
+ this.intervals = new TreeMap<Interval<Type>, List<Interval<Type>>>();
+ this.center = 0;
+ this.leftNode = null;
+ this.rightNode = null;
+ }
+
+ public IntervalNode(List<Interval<Type>> intervalList) {
+
+ this.intervals = new TreeMap<Interval<Type>, List<Interval<Type>>>();
+
+ SortedSet<Long> endpoints = new TreeSet<Long>();
+
+ for(Interval<Type> interval: intervalList) {
+ endpoints.add(interval.getStart());
+ endpoints.add(interval.getEnd());
+ }
+
+ long median = getMedian(endpoints);
+ this.center = median;
+
+ List<Interval<Type>> left = new ArrayList<Interval<Type>>();
+ List<Interval<Type>> right = new ArrayList<Interval<Type>>();
+
+ for(Interval<Type> interval : intervalList) {
+ if(interval.getEnd() < median)
+ left.add(interval);
+ else if(interval.getStart() > median)
+ right.add(interval);
+ else {
+ List<Interval<Type>> posting = this.intervals.get(interval);
+ if(posting == null) {
+ posting = new ArrayList<Interval<Type>>();
+ this.intervals.put(interval, posting);
+ }
+ posting.add(interval);
+ }
+ }
+
+ if(left.size() > 0)
+ this.leftNode = new IntervalNode<Type>(left);
+ if(right.size() > 0)
+ this.rightNode = new IntervalNode<Type>(right);
+ }
+
+ /**
+ * Perform a stabbing query on the node
+ * @param time the time to query at
+ * @return all intervals containing time
+ */
+ public List<Interval<Type>> stab(long time) {
+ List<Interval<Type>> result = new ArrayList<Interval<Type>>();
+
+ for(Entry<Interval<Type>, List<Interval<Type>>> entry : this.intervals.entrySet()) {
+ if(entry.getKey().contains(time))
+ for(Interval<Type> interval : entry.getValue())
+ result.add(interval);
+ else if(entry.getKey().getStart() > time)
+ break;
+ }
+
+ if(time < this.center && this.leftNode != null)
+ result.addAll(this.leftNode.stab(time));
+ else if(time > this.center && this.rightNode != null)
+ result.addAll(this.rightNode.stab(time));
+ return result;
+ }
+
+ /**
+ * Perform an interval intersection query on the node
+ * @param target the interval to intersect
+ * @return all intervals containing time
+ */
+ public List<Interval<Type>> query(Interval<?> target) {
+ List<Interval<Type>> result = new ArrayList<Interval<Type>>();
+
+ for(Entry<Interval<Type>, List<Interval<Type>>> entry : this.intervals.entrySet()) {
+ if(entry.getKey().intersects(target))
+ for(Interval<Type> interval : entry.getValue())
+ result.add(interval);
+ else if(entry.getKey().getStart() > target.getEnd())
+ break;
+ }
+
+ if(target.getStart() < this.center && this.leftNode != null)
+ result.addAll(this.leftNode.query(target));
+ if(target.getEnd() > this.center && this.rightNode != null)
+ result.addAll(this.rightNode.query(target));
+ return result;
+ }
+
+ public long getCenter() {
+ return this.center;
+ }
+
+ public void setCenter(long center) {
+ this.center = center;
+ }
+
+ public IntervalNode<Type> getLeft() {
+ return this.leftNode;
+ }
+
+ public void setLeft(IntervalNode<Type> left) {
+ this.leftNode = left;
+ }
+
+ public IntervalNode<Type> getRight() {
+ return this.rightNode;
+ }
+
+ public void setRight(IntervalNode<Type> right) {
+ this.rightNode = right;
+ }
+
+ /**
+ * @param set the set to look on
+ * @return the median of the set, not interpolated
+ */
+ private Long getMedian(SortedSet<Long> set) {
+ int i = 0;
+ int middle = set.size() / 2;
+ for(Long point : set) {
+ if(i == middle)
+ return point;
+ i++;
+ }
+ return null;
+ }
+
+ @Override
+ public String toString() {
+ StringBuffer sb = new StringBuffer();
+ sb.append(this.center + ": ");
+ for(Entry<Interval<Type>, List<Interval<Type>>> entry : this.intervals.entrySet()) {
+ sb.append("[" + entry.getKey().getStart() + "," + entry.getKey().getEnd() + "]:{");
+ for(Interval<Type> interval : entry.getValue()) {
+ sb.append("("+interval.getStart()+","+interval.getEnd()+","+interval.getData()+")");
+ }
+ sb.append("} ");
+ }
+ return sb.toString();
+ }
+
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/utils/IntervalTree.java b/src/main/java/edu/umd/marbl/mhap/utils/IntervalTree.java
new file mode 100644
index 0000000..91c255c
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/utils/IntervalTree.java
@@ -0,0 +1,168 @@
+package edu.umd.marbl.mhap.utils;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * An Interval Tree is essentially a map from intervals to objects, which
+ * can be queried for all data associated with a particular interval of
+ * time
+ * @author Kevin Dolan
+ *
+ * @param <Type> the type of objects to associate
+ */
+public class IntervalTree<Type> {
+
+ private IntervalNode<Type> head;
+ private List<Interval<Type>> intervalList;
+ private boolean inSync;
+ private int size;
+
+ /**
+ * Instantiate a new interval tree with no intervals
+ */
+ public IntervalTree() {
+ this.head = new IntervalNode<Type>();
+ this.intervalList = new ArrayList<Interval<Type>>();
+ this.inSync = true;
+ this.size = 0;
+ }
+
+ /**
+ * Instantiate and build an interval tree with a preset list of intervals
+ * @param intervalList the list of intervals to use
+ */
+ public IntervalTree(List<Interval<Type>> intervalList) {
+ this.head = new IntervalNode<Type>(intervalList);
+ this.intervalList = new ArrayList<Interval<Type>>();
+ this.intervalList.addAll(intervalList);
+ this.inSync = true;
+ this.size = intervalList.size();
+ }
+
+ /**
+ * Perform a stabbing query, returning the associated data
+ * Will rebuild the tree if out of sync
+ * @param time the time to stab
+ * @return the data associated with all intervals that contain time
+ */
+ public List<Type> get(long time) {
+ List<Interval<Type>> intervals = getIntervals(time);
+ List<Type> result = new ArrayList<Type>();
+ for(Interval<Type> interval : intervals)
+ result.add(interval.getData());
+ return result;
+ }
+
+ /**
+ * Perform a stabbing query, returning the interval objects
+ * Will rebuild the tree if out of sync
+ * @param time the time to stab
+ * @return all intervals that contain time
+ */
+ public List<Interval<Type>> getIntervals(long time) {
+ build();
+ return this.head.stab(time);
+ }
+
+ /**
+ * Perform an interval query, returning the associated data
+ * Will rebuild the tree if out of sync
+ * @param start the start of the interval to check
+ * @param end the end of the interval to check
+ * @return the data associated with all intervals that intersect target
+ */
+ public List<Type> get(long start, long end) {
+ List<Interval<Type>> intervals = getIntervals(start, end);
+ List<Type> result = new ArrayList<Type>();
+ for(Interval<Type> interval : intervals)
+ result.add(interval.getData());
+ return result;
+ }
+
+ /**
+ * Perform an interval query, returning the interval objects
+ * Will rebuild the tree if out of sync
+ * @param start the start of the interval to check
+ * @param end the end of the interval to check
+ * @return all intervals that intersect target
+ */
+ public List<Interval<Type>> getIntervals(long start, long end) {
+ build();
+ return this.head.query(new Interval<Type>(start, end, null));
+ }
+
+ /**
+ * Add an interval object to the interval tree's list
+ * Will not rebuild the tree until the next query or call to build
+ * @param interval the interval object to add
+ */
+ public void addInterval(Interval<Type> interval) {
+ this.intervalList.add(interval);
+ this.inSync = false;
+ }
+
+ /**
+ * Add an interval object to the interval tree's list
+ * Will not rebuild the tree until the next query or call to build
+ * @param begin the beginning of the interval
+ * @param end the end of the interval
+ * @param data the data to associate
+ */
+ public void addInterval(long begin, long end, Type data) {
+ this.intervalList.add(new Interval<Type>(begin, end, data));
+ this.inSync = false;
+ }
+
+ /**
+ * Determine whether this interval tree is currently a reflection of all intervals in the interval list
+ * @return true if no changes have been made since the last build
+ */
+ public boolean inSync() {
+ return this.inSync;
+ }
+
+ /**
+ * Build the interval tree to reflect the list of intervals,
+ * Will not run if this is currently in sync
+ */
+ public void build() {
+ if(!this.inSync) {
+ this.head = new IntervalNode<Type>(this.intervalList);
+ this.inSync = true;
+ this.size = this.intervalList.size();
+ }
+ }
+
+ /**
+ * @return the number of entries in the currently built interval tree
+ */
+ public int currentSize() {
+ return this.size;
+ }
+
+ /**
+ * @return the number of entries in the interval list, equal to .size() if inSync()
+ */
+ public int listSize() {
+ return this.intervalList.size();
+ }
+
+ @Override
+ public String toString() {
+ return nodeString(this.head,0);
+ }
+
+ private String nodeString(IntervalNode<Type> node, int level) {
+ if(node == null)
+ return "";
+
+ StringBuffer sb = new StringBuffer();
+ for(int i = 0; i < level; i++)
+ sb.append("\t");
+ sb.append(node + "\n");
+ sb.append(nodeString(node.getLeft(), level + 1));
+ sb.append(nodeString(node.getRight(), level + 1));
+ return sb.toString();
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/utils/LimitedSizeCollection.java b/src/main/java/edu/umd/marbl/mhap/utils/LimitedSizeCollection.java
new file mode 100644
index 0000000..3f15eee
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/utils/LimitedSizeCollection.java
@@ -0,0 +1,222 @@
+/*
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2013 by Konstantin Berlin
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.utils;
+
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.PriorityQueue;
+
+public final class LimitedSizeCollection<T extends Comparable<T>> implements Collection<T>
+{
+ public enum Priority
+ {
+ MAX_VALUES, MIN_VALUES;
+ }
+
+ private T best;
+ private int maxSize;
+ private final PriorityQueue<T> queue;
+
+ public LimitedSizeCollection(int maxSize)
+ {
+ this(maxSize, Priority.MIN_VALUES);
+ }
+
+ public LimitedSizeCollection(int maxSize, Priority priority)
+ {
+ // initiate with reverse queue
+ if (priority == Priority.MIN_VALUES)
+ {
+ this.queue = new PriorityQueue<T>(maxSize, new Comparator<T>()
+ {
+ @Override
+ public final int compare(T s1, T s2)
+ {
+ return s2.compareTo(s1);
+ }
+ });
+ }
+ else
+ {
+ this.queue = new PriorityQueue<T>(maxSize, new Comparator<T>()
+ {
+ @Override
+ public final int compare(T s1, T s2)
+ {
+ return s1.compareTo(s2);
+ }
+ });
+ }
+
+ this.maxSize = maxSize;
+ this.best = null;
+ }
+
+ @Override
+ public boolean add(T o)
+ {
+ if (o == null)
+ return false;
+
+ if (this.maxSize <= 0)
+ return false;
+
+ // if can fit just add
+ if (this.queue.size() < this.maxSize)
+ {
+ this.queue.add(o);
+ }
+ else if (this.queue.comparator().compare(o, this.queue.peek()) > 0)
+ {
+ this.queue.add(o);
+ this.queue.poll();
+ }
+ else
+ return false;
+
+ if (this.best == null || this.queue.comparator().compare(o, this.best) > 0)
+ {
+ this.best = o;
+ }
+
+ return true;
+ }
+
+ @Override
+ public boolean addAll(Collection<? extends T> c)
+ {
+ for (T elem : c)
+ add(elem);
+
+ return true;
+ }
+
+ @Override
+ public void clear()
+ {
+ this.best = null;
+
+ this.queue.clear();
+ }
+
+ @Override
+ public boolean contains(Object o)
+ {
+ return this.queue.contains(o);
+ }
+
+ @Override
+ public boolean containsAll(Collection<?> c)
+ {
+ return this.queue.containsAll(c);
+ }
+
+ public T getBest()
+ {
+ return this.best;
+ }
+
+ public Collection<T> getCollection()
+ {
+ return this.queue;
+ }
+
+ public T getWorst()
+ {
+ return this.queue.peek();
+ }
+
+ @Override
+ public boolean isEmpty()
+ {
+ return this.queue.isEmpty();
+ }
+
+ public boolean isFull()
+ {
+ return this.queue.size() >= this.maxSize;
+ }
+
+ @Override
+ public Iterator<T> iterator()
+ {
+ return this.queue.iterator();
+ }
+
+ @Override
+ public boolean remove(Object o)
+ {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean removeAll(Collection<?> c)
+ {
+ this.best = null;
+
+ return this.queue.removeAll(c);
+ }
+
+ public T removeWorst()
+ {
+ return this.queue.poll();
+ }
+
+ @Override
+ public boolean retainAll(Collection<?> c)
+ {
+ throw new UnsupportedOperationException();
+ }
+
+ public void setSize(int maxSize)
+ {
+ this.maxSize = maxSize;
+ while (this.queue.size() > this.maxSize)
+ this.queue.poll();
+ }
+
+ @Override
+ public int size()
+ {
+ return this.queue.size();
+ }
+
+ @Override
+ public Object[] toArray()
+ {
+ return this.queue.toArray();
+ }
+
+ @Override
+ public <Y> Y[] toArray(Y[] a)
+ {
+ return this.queue.toArray(a);
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/edu/umd/marbl/mhap/utils/MersenneTwisterFast.java b/src/main/java/edu/umd/marbl/mhap/utils/MersenneTwisterFast.java
new file mode 100644
index 0000000..dfb1c8f
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/utils/MersenneTwisterFast.java
@@ -0,0 +1,1530 @@
+package edu.umd.marbl.mhap.utils;
+
+import java.io.*;
+import java.util.*;
+
+/**
+ * <h3>MersenneTwister and MersenneTwisterFast</h3>
+ * <p>
+ * <b>Version 20</b>, based on version MT199937(99/10/29) of the Mersenne
+ * Twister algorithm found at <a
+ * href="http://www.math.keio.ac.jp/matumoto/emt.html"> The Mersenne Twister
+ * Home Page</a>, with the initialization improved using the new 2002/1/26
+ * initialization algorithm By Sean Luke, October 2004.
+ *
+ * <p>
+ * <b>MersenneTwister</b> is a drop-in subclass replacement for
+ * java.util.Random. It is properly synchronized and can be used in a
+ * multithreaded environment. On modern VMs such as HotSpot, it is approximately
+ * 1/3 slower than java.util.Random.
+ *
+ * <p>
+ * <b>MersenneTwisterFast</b> is not a subclass of java.util.Random. It has the
+ * same public methods as Random does, however, and it is algorithmically
+ * identical to MersenneTwister. MersenneTwisterFast has hard-code inlined all
+ * of its methods directly, and made all of them final (well, the ones of
+ * consequence anyway). Further, these methods are <i>not</i> synchronized, so
+ * the same MersenneTwisterFast instance cannot be shared by multiple threads.
+ * But all this helps MersenneTwisterFast achieve well over twice the speed of
+ * MersenneTwister. java.util.Random is about 1/3 slower than
+ * MersenneTwisterFast.
+ *
+ * <h3>About the Mersenne Twister</h3>
+ * <p>
+ * This is a Java version of the C-program for MT19937: Integer version. The
+ * MT19937 algorithm was created by Makoto Matsumoto and Takuji Nishimura, who
+ * ask: "When you use this, send an email to: matumoto at math.keio.ac.jp with an
+ * appropriate reference to your work". Indicate that this is a translation of
+ * their algorithm into Java.
+ *
+ * <p>
+ * <b>Reference. </b> Makato Matsumoto and Takuji Nishimura, "Mersenne Twister:
+ * A 623-Dimensionally Equidistributed Uniform Pseudo-Random Number Generator",
+ * <i>ACM Transactions on Modeling and. Computer Simulation,</i> Vol. 8, No. 1,
+ * January 1998, pp 3--30.
+ *
+ * <h3>About this Version</h3>
+ *
+ * <p>
+ * <b>Changes since V19:</b> nextFloat(boolean, boolean) now returns float, not
+ * double.
+ *
+ * <p>
+ * <b>Changes since V18:</b> Removed old final declarations, which used to
+ * potentially speed up the code, but no longer.
+ *
+ * <p>
+ * <b>Changes since V17:</b> Removed vestigial references to &= 0xffffffff which
+ * stemmed from the original C code. The C code could not guarantee that ints
+ * were 32 bit, hence the masks. The vestigial references in the Java code were
+ * likely optimized out anyway.
+ *
+ * <p>
+ * <b>Changes since V16:</b> Added nextDouble(includeZero, includeOne) and
+ * nextFloat(includeZero, includeOne) to allow for half-open, fully-closed, and
+ * fully-open intervals.
+ *
+ * <p>
+ * <b>Changes Since V15:</b> Added serialVersionUID to quiet compiler warnings
+ * from Sun's overly verbose compilers as of JDK 1.5.
+ *
+ * <p>
+ * <b>Changes Since V14:</b> made strictfp, with StrictMath.log and
+ * StrictMath.sqrt in nextGaussian instead of Math.log and Math.sqrt. This is
+ * largely just to be safe, as it presently makes no difference in the speed,
+ * correctness, or results of the algorithm.
+ *
+ * <p>
+ * <b>Changes Since V13:</b> clone() method CloneNotSupportedException removed.
+ *
+ * <p>
+ * <b>Changes Since V12:</b> clone() method added.
+ *
+ * <p>
+ * <b>Changes Since V11:</b> stateEquals(...) method added. MersenneTwisterFast
+ * is equal to other MersenneTwisterFasts with identical state; likewise
+ * MersenneTwister is equal to other MersenneTwister with identical state. This
+ * isn't equals(...) because that requires a contract of immutability to compare
+ * by value.
+ *
+ * <p>
+ * <b>Changes Since V10:</b> A documentation error suggested that setSeed(int[])
+ * required an int[] array 624 long. In fact, the array can be any non-zero
+ * length. The new version also checks for this fact.
+ *
+ * <p>
+ * <b>Changes Since V9:</b> readState(stream) and writeState(stream) provided.
+ *
+ * <p>
+ * <b>Changes Since V8:</b> setSeed(int) was only using the first 28 bits of the
+ * seed; it should have been 32 bits. For small-number seeds the behavior is
+ * identical.
+ *
+ * <p>
+ * <b>Changes Since V7:</b> A documentation error in MersenneTwisterFast (but
+ * not MersenneTwister) stated that nextDouble selects uniformly from the
+ * full-open interval [0,1]. It does not. nextDouble's contract is identical
+ * across MersenneTwisterFast, MersenneTwister, and java.util.Random, namely,
+ * selection in the half-open interval [0,1). That is, 1.0 should not be
+ * returned. A similar contract exists in nextFloat.
+ *
+ * <p>
+ * <b>Changes Since V6:</b> License has changed from LGPL to BSD. New timing
+ * information to compare against java.util.Random. Recent versions of HotSpot
+ * have helped Random increase in speed to the point where it is faster than
+ * MersenneTwister but slower than MersenneTwisterFast (which should be the
+ * case, as it's a less complex algorithm but is synchronized).
+ *
+ * <p>
+ * <b>Changes Since V5:</b> New empty constructor made to work the same as
+ * java.util.Random -- namely, it seeds based on the current time in
+ * milliseconds.
+ *
+ * <p>
+ * <b>Changes Since V4:</b> New initialization algorithms. See (see <a
+ * href="http://www.math.keio.ac.jp/matumoto/MT2002/emt19937ar.html"</a>
+ * http://www.math.keio.ac.jp/matumoto/MT2002/emt19937ar.html</a>)
+ *
+ * <p>
+ * The MersenneTwister code is based on standard MT19937 C/C++ code by Takuji
+ * Nishimura, with suggestions from Topher Cooper and Marc Rieffel, July 1997.
+ * The code was originally translated into Java by Michael Lecuyer, January
+ * 1999, and the original code is Copyright (c) 1999 by Michael Lecuyer.
+ *
+ * <h3>Java notes</h3>
+ *
+ * <p>
+ * This implementation implements the bug fixes made in Java 1.2's version of
+ * Random, which means it can be used with earlier versions of Java. See <a
+ * href=
+ * "http://www.javasoft.com/products/jdk/1.2/docs/api/java/util/Random.html">
+ * the JDK 1.2 java.util.Random documentation</a> for further documentation on
+ * the random-number generation contracts made. Additionally, there's an
+ * undocumented bug in the JDK java.util.Random.nextBytes() method, which this
+ * code fixes.
+ *
+ * <p>
+ * Just like java.util.Random, this generator accepts a long seed but doesn't
+ * use all of it. java.util.Random uses 48 bits. The Mersenne Twister instead
+ * uses 32 bits (int size). So it's best if your seed does not exceed the int
+ * range.
+ *
+ * <p>
+ * MersenneTwister can be used reliably on JDK version 1.1.5 or above. Earlier
+ * Java versions have serious bugs in java.util.Random; only MersenneTwisterFast
+ * (and not MersenneTwister nor java.util.Random) should be used with them.
+ *
+ * <h3>License</h3>
+ *
+ * Copyright (c) 2003 by Sean Luke. <br>
+ * Portions copyright (c) 1993 by Michael Lecuyer. <br>
+ * All rights reserved. <br>
+ *
+ * <p>
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * <ul>
+ * <li>Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * <li>Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * <li>Neither the name of the copyright owners, their employers, nor the names
+ * of its contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * </ul>
+ * <p>
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * @version 20
+ */
+
+// Note: this class is hard-inlined in all of its methods. This makes some of
+// the methods well-nigh unreadable in their complexity. In fact, the Mersenne
+// Twister is fairly easy code to understand: if you're trying to get a handle
+// on the code, I strongly suggest looking at MersenneTwister.java first.
+// -- Sean
+
+public strictfp class MersenneTwisterFast implements Serializable, Cloneable
+{
+ // Serialization
+ private static final long serialVersionUID = -8219700664442619525L; // locked
+ // as of
+ // Version
+ // 15
+
+ // Period parameters
+ private static final int N = 624;
+ private static final int M = 397;
+ private static final int MATRIX_A = 0x9908b0df; // private static final *
+ // constant vector a
+ private static final int UPPER_MASK = 0x80000000; // most significant w-r
+ // bits
+ private static final int LOWER_MASK = 0x7fffffff; // least significant r
+ // bits
+
+ // Tempering parameters
+ private static final int TEMPERING_MASK_B = 0x9d2c5680;
+ private static final int TEMPERING_MASK_C = 0xefc60000;
+
+ private int mt[]; // the array for the state vector
+ private int mti; // mti==N+1 means mt[N] is not initialized
+ private int mag01[];
+
+ // a good initial seed (of int size, though stored in a long)
+ // private static final long GOOD_SEED = 4357;
+
+ private double __nextNextGaussian;
+ private boolean __haveNextNextGaussian;
+
+ /*
+ * We're overriding all internal data, to my knowledge, so this should be
+ * okay
+ */
+ @Override
+ public Object clone()
+ {
+ try
+ {
+ MersenneTwisterFast f = (MersenneTwisterFast) (super.clone());
+ f.mt = (int[]) (mt.clone());
+ f.mag01 = (int[]) (mag01.clone());
+ return f;
+ }
+ catch (CloneNotSupportedException e)
+ {
+ throw new InternalError();
+ } // should never happen
+ }
+
+ public boolean stateEquals(Object o)
+ {
+ if (o == this)
+ return true;
+ if (o == null || !(o instanceof MersenneTwisterFast))
+ return false;
+ MersenneTwisterFast other = (MersenneTwisterFast) o;
+ if (mti != other.mti)
+ return false;
+ for (int x = 0; x < mag01.length; x++)
+ if (mag01[x] != other.mag01[x])
+ return false;
+ for (int x = 0; x < mt.length; x++)
+ if (mt[x] != other.mt[x])
+ return false;
+ return true;
+ }
+
+ /** Reads the entire state of the MersenneTwister RNG from the stream */
+ public void readState(DataInputStream stream) throws IOException
+ {
+ int len = mt.length;
+ for (int x = 0; x < len; x++)
+ mt[x] = stream.readInt();
+
+ len = mag01.length;
+ for (int x = 0; x < len; x++)
+ mag01[x] = stream.readInt();
+
+ mti = stream.readInt();
+ __nextNextGaussian = stream.readDouble();
+ __haveNextNextGaussian = stream.readBoolean();
+ }
+
+ /** Writes the entire state of the MersenneTwister RNG to the stream */
+ public void writeState(DataOutputStream stream) throws IOException
+ {
+ int len = mt.length;
+ for (int x = 0; x < len; x++)
+ stream.writeInt(mt[x]);
+
+ len = mag01.length;
+ for (int x = 0; x < len; x++)
+ stream.writeInt(mag01[x]);
+
+ stream.writeInt(mti);
+ stream.writeDouble(__nextNextGaussian);
+ stream.writeBoolean(__haveNextNextGaussian);
+ }
+
+ /**
+ * Constructor using the default seed.
+ */
+ public MersenneTwisterFast()
+ {
+ this(System.currentTimeMillis());
+ }
+
+ /**
+ * Constructor using a given seed. Though you pass this seed in as a long,
+ * it's best to make sure it's actually an integer.
+ *
+ */
+ public MersenneTwisterFast(long seed)
+ {
+ setSeed(seed);
+ }
+
+ /**
+ * Constructor using an array of integers as seed. Your array must have a
+ * non-zero length. Only the first 624 integers in the array are used; if
+ * the array is shorter than this then integers are repeatedly used in a
+ * wrap-around fashion.
+ */
+ public MersenneTwisterFast(int[] array)
+ {
+ setSeed(array);
+ }
+
+ /**
+ * Initalize the pseudo random number generator. Don't pass in a long that's
+ * bigger than an int (Mersenne Twister only uses the first 32 bits for its
+ * seed).
+ */
+
+ synchronized public void setSeed(long seed)
+ {
+ // Due to a bug in java.util.Random clear up to 1.2, we're
+ // doing our own Gaussian variable.
+ __haveNextNextGaussian = false;
+
+ mt = new int[N];
+
+ mag01 = new int[2];
+ mag01[0] = 0x0;
+ mag01[1] = MATRIX_A;
+
+ mt[0] = (int) (seed & 0xffffffff);
+ for (mti = 1; mti < N; mti++)
+ {
+ mt[mti] = (1812433253 * (mt[mti - 1] ^ (mt[mti - 1] >>> 30)) + mti);
+ /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
+ /* In the previous versions, MSBs of the seed affect */
+ /* only MSBs of the array mt[]. */
+ /* 2002/01/09 modified by Makoto Matsumoto */
+ // mt[mti] &= 0xffffffff;
+ /* for >32 bit machines */
+ }
+ }
+
+ /**
+ * Sets the seed of the MersenneTwister using an array of integers. Your
+ * array must have a non-zero length. Only the first 624 integers in the
+ * array are used; if the array is shorter than this then integers are
+ * repeatedly used in a wrap-around fashion.
+ */
+
+ synchronized public void setSeed(int[] array)
+ {
+ if (array.length == 0)
+ throw new IllegalArgumentException("Array length must be greater than zero");
+ int i, j, k;
+ setSeed(19650218);
+ i = 1;
+ j = 0;
+ k = (N > array.length ? N : array.length);
+ for (; k != 0; k--)
+ {
+ mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >>> 30)) * 1664525)) + array[j] + j; /*
+ * non
+ * linear
+ */
+ // mt[i] &= 0xffffffff; /* for WORDSIZE > 32 machines */
+ i++;
+ j++;
+ if (i >= N)
+ {
+ mt[0] = mt[N - 1];
+ i = 1;
+ }
+ if (j >= array.length)
+ j = 0;
+ }
+ for (k = N - 1; k != 0; k--)
+ {
+ mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >>> 30)) * 1566083941)) - i; /*
+ * non
+ * linear
+ */
+ // mt[i] &= 0xffffffff; /* for WORDSIZE > 32 machines */
+ i++;
+ if (i >= N)
+ {
+ mt[0] = mt[N - 1];
+ i = 1;
+ }
+ }
+ mt[0] = 0x80000000; /* MSB is 1; assuring non-zero initial array */
+ }
+
+ public int nextInt()
+ {
+ int y;
+
+ if (mti >= N) // generate N words at one time
+ {
+ int kk;
+ final int[] mt = this.mt; // locals are slightly faster
+ final int[] mag01 = this.mag01; // locals are slightly faster
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ for (; kk < N - 1; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1];
+
+ mti = 0;
+ }
+
+ y = mt[mti++];
+ y ^= y >>> 11; // TEMPERING_SHIFT_U(y)
+ y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y)
+ y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y)
+ y ^= (y >>> 18); // TEMPERING_SHIFT_L(y)
+
+ return y;
+ }
+
+ public short nextShort()
+ {
+ int y;
+
+ if (mti >= N) // generate N words at one time
+ {
+ int kk;
+ final int[] mt = this.mt; // locals are slightly faster
+ final int[] mag01 = this.mag01; // locals are slightly faster
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ for (; kk < N - 1; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1];
+
+ mti = 0;
+ }
+
+ y = mt[mti++];
+ y ^= y >>> 11; // TEMPERING_SHIFT_U(y)
+ y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y)
+ y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y)
+ y ^= (y >>> 18); // TEMPERING_SHIFT_L(y)
+
+ return (short) (y >>> 16);
+ }
+
+ public char nextChar()
+ {
+ int y;
+
+ if (mti >= N) // generate N words at one time
+ {
+ int kk;
+ final int[] mt = this.mt; // locals are slightly faster
+ final int[] mag01 = this.mag01; // locals are slightly faster
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ for (; kk < N - 1; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1];
+
+ mti = 0;
+ }
+
+ y = mt[mti++];
+ y ^= y >>> 11; // TEMPERING_SHIFT_U(y)
+ y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y)
+ y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y)
+ y ^= (y >>> 18); // TEMPERING_SHIFT_L(y)
+
+ return (char) (y >>> 16);
+ }
+
+ public boolean nextBoolean()
+ {
+ int y;
+
+ if (mti >= N) // generate N words at one time
+ {
+ int kk;
+ final int[] mt = this.mt; // locals are slightly faster
+ final int[] mag01 = this.mag01; // locals are slightly faster
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ for (; kk < N - 1; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1];
+
+ mti = 0;
+ }
+
+ y = mt[mti++];
+ y ^= y >>> 11; // TEMPERING_SHIFT_U(y)
+ y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y)
+ y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y)
+ y ^= (y >>> 18); // TEMPERING_SHIFT_L(y)
+
+ return (boolean) ((y >>> 31) != 0);
+ }
+
+ /**
+ * This generates a coin flip with a probability <tt>probability</tt> of
+ * returning true, else returning false. <tt>probability</tt> must be
+ * between 0.0 and 1.0, inclusive. Not as precise a random real event as
+ * nextBoolean(double), but twice as fast. To explicitly use this, remember
+ * you may need to cast to float first.
+ */
+
+ public boolean nextBoolean(float probability)
+ {
+ int y;
+
+ if (probability < 0.0f || probability > 1.0f)
+ throw new IllegalArgumentException("probability must be between 0.0 and 1.0 inclusive.");
+ if (probability == 0.0f)
+ return false; // fix half-open issues
+ else if (probability == 1.0f)
+ return true; // fix half-open issues
+ if (mti >= N) // generate N words at one time
+ {
+ int kk;
+ final int[] mt = this.mt; // locals are slightly faster
+ final int[] mag01 = this.mag01; // locals are slightly faster
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ for (; kk < N - 1; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1];
+
+ mti = 0;
+ }
+
+ y = mt[mti++];
+ y ^= y >>> 11; // TEMPERING_SHIFT_U(y)
+ y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y)
+ y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y)
+ y ^= (y >>> 18); // TEMPERING_SHIFT_L(y)
+
+ return (y >>> 8) / ((float) (1 << 24)) < probability;
+ }
+
+ /**
+ * This generates a coin flip with a probability <tt>probability</tt> of
+ * returning true, else returning false. <tt>probability</tt> must be
+ * between 0.0 and 1.0, inclusive.
+ */
+
+ public boolean nextBoolean(double probability)
+ {
+ int y;
+ int z;
+
+ if (probability < 0.0 || probability > 1.0)
+ throw new IllegalArgumentException("probability must be between 0.0 and 1.0 inclusive.");
+ if (probability == 0.0)
+ return false; // fix half-open issues
+ else if (probability == 1.0)
+ return true; // fix half-open issues
+ if (mti >= N) // generate N words at one time
+ {
+ int kk;
+ final int[] mt = this.mt; // locals are slightly faster
+ final int[] mag01 = this.mag01; // locals are slightly faster
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ for (; kk < N - 1; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1];
+
+ mti = 0;
+ }
+
+ y = mt[mti++];
+ y ^= y >>> 11; // TEMPERING_SHIFT_U(y)
+ y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y)
+ y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y)
+ y ^= (y >>> 18); // TEMPERING_SHIFT_L(y)
+
+ if (mti >= N) // generate N words at one time
+ {
+ int kk;
+ final int[] mt = this.mt; // locals are slightly faster
+ final int[] mag01 = this.mag01; // locals are slightly faster
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + M] ^ (z >>> 1) ^ mag01[z & 0x1];
+ }
+ for (; kk < N - 1; kk++)
+ {
+ z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + (M - N)] ^ (z >>> 1) ^ mag01[z & 0x1];
+ }
+ z = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[N - 1] = mt[M - 1] ^ (z >>> 1) ^ mag01[z & 0x1];
+
+ mti = 0;
+ }
+
+ z = mt[mti++];
+ z ^= z >>> 11; // TEMPERING_SHIFT_U(z)
+ z ^= (z << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(z)
+ z ^= (z << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(z)
+ z ^= (z >>> 18); // TEMPERING_SHIFT_L(z)
+
+ /* derived from nextDouble documentation in jdk 1.2 docs, see top */
+ return ((((long) (y >>> 6)) << 27) + (z >>> 5)) / (double) (1L << 53) < probability;
+ }
+
+ public byte nextByte()
+ {
+ int y;
+
+ if (mti >= N) // generate N words at one time
+ {
+ int kk;
+ final int[] mt = this.mt; // locals are slightly faster
+ final int[] mag01 = this.mag01; // locals are slightly faster
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ for (; kk < N - 1; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1];
+
+ mti = 0;
+ }
+
+ y = mt[mti++];
+ y ^= y >>> 11; // TEMPERING_SHIFT_U(y)
+ y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y)
+ y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y)
+ y ^= (y >>> 18); // TEMPERING_SHIFT_L(y)
+
+ return (byte) (y >>> 24);
+ }
+
+ public void nextBytes(byte[] bytes)
+ {
+ int y;
+
+ for (int x = 0; x < bytes.length; x++)
+ {
+ if (mti >= N) // generate N words at one time
+ {
+ int kk;
+ final int[] mt = this.mt; // locals are slightly faster
+ final int[] mag01 = this.mag01; // locals are slightly faster
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ for (; kk < N - 1; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1];
+
+ mti = 0;
+ }
+
+ y = mt[mti++];
+ y ^= y >>> 11; // TEMPERING_SHIFT_U(y)
+ y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y)
+ y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y)
+ y ^= (y >>> 18); // TEMPERING_SHIFT_L(y)
+
+ bytes[x] = (byte) (y >>> 24);
+ }
+ }
+
+ public long nextLong()
+ {
+ int y;
+ int z;
+
+ if (mti >= N) // generate N words at one time
+ {
+ int kk;
+ final int[] mt = this.mt; // locals are slightly faster
+ final int[] mag01 = this.mag01; // locals are slightly faster
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ for (; kk < N - 1; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1];
+
+ mti = 0;
+ }
+
+ y = mt[mti++];
+ y ^= y >>> 11; // TEMPERING_SHIFT_U(y)
+ y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y)
+ y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y)
+ y ^= (y >>> 18); // TEMPERING_SHIFT_L(y)
+
+ if (mti >= N) // generate N words at one time
+ {
+ int kk;
+ final int[] mt = this.mt; // locals are slightly faster
+ final int[] mag01 = this.mag01; // locals are slightly faster
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + M] ^ (z >>> 1) ^ mag01[z & 0x1];
+ }
+ for (; kk < N - 1; kk++)
+ {
+ z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + (M - N)] ^ (z >>> 1) ^ mag01[z & 0x1];
+ }
+ z = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[N - 1] = mt[M - 1] ^ (z >>> 1) ^ mag01[z & 0x1];
+
+ mti = 0;
+ }
+
+ z = mt[mti++];
+ z ^= z >>> 11; // TEMPERING_SHIFT_U(z)
+ z ^= (z << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(z)
+ z ^= (z << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(z)
+ z ^= (z >>> 18); // TEMPERING_SHIFT_L(z)
+
+ return (((long) y) << 32) + (long) z;
+ }
+
+ /**
+ * Returns a long drawn uniformly from 0 to n-1. Suffice it to say, n must
+ * be > 0, or an IllegalArgumentException is raised.
+ */
+ public long nextLong(long n)
+ {
+ if (n <= 0)
+ throw new IllegalArgumentException("n must be positive, got: " + n);
+
+ long bits, val;
+ do
+ {
+ int y;
+ int z;
+
+ if (mti >= N) // generate N words at one time
+ {
+ int kk;
+ final int[] mt = this.mt; // locals are slightly faster
+ final int[] mag01 = this.mag01; // locals are slightly faster
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ for (; kk < N - 1; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1];
+
+ mti = 0;
+ }
+
+ y = mt[mti++];
+ y ^= y >>> 11; // TEMPERING_SHIFT_U(y)
+ y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y)
+ y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y)
+ y ^= (y >>> 18); // TEMPERING_SHIFT_L(y)
+
+ if (mti >= N) // generate N words at one time
+ {
+ int kk;
+ final int[] mt = this.mt; // locals are slightly faster
+ final int[] mag01 = this.mag01; // locals are slightly faster
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + M] ^ (z >>> 1) ^ mag01[z & 0x1];
+ }
+ for (; kk < N - 1; kk++)
+ {
+ z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + (M - N)] ^ (z >>> 1) ^ mag01[z & 0x1];
+ }
+ z = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[N - 1] = mt[M - 1] ^ (z >>> 1) ^ mag01[z & 0x1];
+
+ mti = 0;
+ }
+
+ z = mt[mti++];
+ z ^= z >>> 11; // TEMPERING_SHIFT_U(z)
+ z ^= (z << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(z)
+ z ^= (z << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(z)
+ z ^= (z >>> 18); // TEMPERING_SHIFT_L(z)
+
+ bits = (((((long) y) << 32) + (long) z) >>> 1);
+ val = bits % n;
+ }
+ while (bits - val + (n - 1) < 0);
+ return val;
+ }
+
+ /**
+ * Returns a random double in the half-open range from [0.0,1.0). Thus 0.0
+ * is a valid result but 1.0 is not.
+ */
+ public double nextDouble()
+ {
+ int y;
+ int z;
+
+ if (mti >= N) // generate N words at one time
+ {
+ int kk;
+ final int[] mt = this.mt; // locals are slightly faster
+ final int[] mag01 = this.mag01; // locals are slightly faster
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ for (; kk < N - 1; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1];
+
+ mti = 0;
+ }
+
+ y = mt[mti++];
+ y ^= y >>> 11; // TEMPERING_SHIFT_U(y)
+ y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y)
+ y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y)
+ y ^= (y >>> 18); // TEMPERING_SHIFT_L(y)
+
+ if (mti >= N) // generate N words at one time
+ {
+ int kk;
+ final int[] mt = this.mt; // locals are slightly faster
+ final int[] mag01 = this.mag01; // locals are slightly faster
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + M] ^ (z >>> 1) ^ mag01[z & 0x1];
+ }
+ for (; kk < N - 1; kk++)
+ {
+ z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + (M - N)] ^ (z >>> 1) ^ mag01[z & 0x1];
+ }
+ z = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[N - 1] = mt[M - 1] ^ (z >>> 1) ^ mag01[z & 0x1];
+
+ mti = 0;
+ }
+
+ z = mt[mti++];
+ z ^= z >>> 11; // TEMPERING_SHIFT_U(z)
+ z ^= (z << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(z)
+ z ^= (z << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(z)
+ z ^= (z >>> 18); // TEMPERING_SHIFT_L(z)
+
+ /* derived from nextDouble documentation in jdk 1.2 docs, see top */
+ return ((((long) (y >>> 6)) << 27) + (z >>> 5)) / (double) (1L << 53);
+ }
+
+ /**
+ * Returns a double in the range from 0.0 to 1.0, possibly inclusive of 0.0
+ * and 1.0 themselves. Thus:
+ *
+ * <p>
+ * <table border=0>
+ * <th>
+ * <td>Expression
+ * <td>Interval
+ * <tr>
+ * <td>nextDouble(false, false)
+ * <td>(0.0, 1.0)
+ * <tr>
+ * <td>nextDouble(true, false)
+ * <td>[0.0, 1.0)
+ * <tr>
+ * <td>nextDouble(false, true)
+ * <td>(0.0, 1.0]
+ * <tr>
+ * <td>nextDouble(true, true)
+ * <td>[0.0, 1.0]
+ * </table>
+ *
+ * <p>
+ * This version preserves all possible random values in the double range.
+ */
+ public double nextDouble(boolean includeZero, boolean includeOne)
+ {
+ double d = 0.0;
+ do
+ {
+ d = nextDouble(); // grab a value, initially from half-open [0.0,
+ // 1.0)
+ if (includeOne && nextBoolean())
+ d += 1.0; // if includeOne, with 1/2 probability, push to [1.0,
+ // 2.0)
+ }
+ while ((d > 1.0) || // everything above 1.0 is always invalid
+ (!includeZero && d == 0.0)); // if we're not including zero, 0.0
+ // is invalid
+ return d;
+ }
+
+ public double nextGaussian()
+ {
+ if (__haveNextNextGaussian)
+ {
+ __haveNextNextGaussian = false;
+ return __nextNextGaussian;
+ }
+ else
+ {
+ double v1, v2, s;
+ do
+ {
+ int y;
+ int z;
+ int a;
+ int b;
+
+ if (mti >= N) // generate N words at one time
+ {
+ int kk;
+ final int[] mt = this.mt; // locals are slightly faster
+ final int[] mag01 = this.mag01; // locals are slightly
+ // faster
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ for (; kk < N - 1; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1];
+
+ mti = 0;
+ }
+
+ y = mt[mti++];
+ y ^= y >>> 11; // TEMPERING_SHIFT_U(y)
+ y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y)
+ y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y)
+ y ^= (y >>> 18); // TEMPERING_SHIFT_L(y)
+
+ if (mti >= N) // generate N words at one time
+ {
+ int kk;
+ final int[] mt = this.mt; // locals are slightly faster
+ final int[] mag01 = this.mag01; // locals are slightly
+ // faster
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + M] ^ (z >>> 1) ^ mag01[z & 0x1];
+ }
+ for (; kk < N - 1; kk++)
+ {
+ z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + (M - N)] ^ (z >>> 1) ^ mag01[z & 0x1];
+ }
+ z = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[N - 1] = mt[M - 1] ^ (z >>> 1) ^ mag01[z & 0x1];
+
+ mti = 0;
+ }
+
+ z = mt[mti++];
+ z ^= z >>> 11; // TEMPERING_SHIFT_U(z)
+ z ^= (z << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(z)
+ z ^= (z << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(z)
+ z ^= (z >>> 18); // TEMPERING_SHIFT_L(z)
+
+ if (mti >= N) // generate N words at one time
+ {
+ int kk;
+ final int[] mt = this.mt; // locals are slightly faster
+ final int[] mag01 = this.mag01; // locals are slightly
+ // faster
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ a = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + M] ^ (a >>> 1) ^ mag01[a & 0x1];
+ }
+ for (; kk < N - 1; kk++)
+ {
+ a = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + (M - N)] ^ (a >>> 1) ^ mag01[a & 0x1];
+ }
+ a = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[N - 1] = mt[M - 1] ^ (a >>> 1) ^ mag01[a & 0x1];
+
+ mti = 0;
+ }
+
+ a = mt[mti++];
+ a ^= a >>> 11; // TEMPERING_SHIFT_U(a)
+ a ^= (a << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(a)
+ a ^= (a << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(a)
+ a ^= (a >>> 18); // TEMPERING_SHIFT_L(a)
+
+ if (mti >= N) // generate N words at one time
+ {
+ int kk;
+ final int[] mt = this.mt; // locals are slightly faster
+ final int[] mag01 = this.mag01; // locals are slightly
+ // faster
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ b = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + M] ^ (b >>> 1) ^ mag01[b & 0x1];
+ }
+ for (; kk < N - 1; kk++)
+ {
+ b = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + (M - N)] ^ (b >>> 1) ^ mag01[b & 0x1];
+ }
+ b = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[N - 1] = mt[M - 1] ^ (b >>> 1) ^ mag01[b & 0x1];
+
+ mti = 0;
+ }
+
+ b = mt[mti++];
+ b ^= b >>> 11; // TEMPERING_SHIFT_U(b)
+ b ^= (b << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(b)
+ b ^= (b << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(b)
+ b ^= (b >>> 18); // TEMPERING_SHIFT_L(b)
+
+ /*
+ * derived from nextDouble documentation in jdk 1.2 docs, see
+ * top
+ */
+ v1 = 2 * (((((long) (y >>> 6)) << 27) + (z >>> 5)) / (double) (1L << 53)) - 1;
+ v2 = 2 * (((((long) (a >>> 6)) << 27) + (b >>> 5)) / (double) (1L << 53)) - 1;
+ s = v1 * v1 + v2 * v2;
+ }
+ while (s >= 1 || s == 0);
+ double multiplier = StrictMath.sqrt(-2 * StrictMath.log(s) / s);
+ __nextNextGaussian = v2 * multiplier;
+ __haveNextNextGaussian = true;
+ return v1 * multiplier;
+ }
+ }
+
+ /**
+ * Returns a random float in the half-open range from [0.0f,1.0f). Thus 0.0f
+ * is a valid result but 1.0f is not.
+ */
+ public float nextFloat()
+ {
+ int y;
+
+ if (mti >= N) // generate N words at one time
+ {
+ int kk;
+ final int[] mt = this.mt; // locals are slightly faster
+ final int[] mag01 = this.mag01; // locals are slightly faster
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ for (; kk < N - 1; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1];
+
+ mti = 0;
+ }
+
+ y = mt[mti++];
+ y ^= y >>> 11; // TEMPERING_SHIFT_U(y)
+ y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y)
+ y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y)
+ y ^= (y >>> 18); // TEMPERING_SHIFT_L(y)
+
+ return (y >>> 8) / ((float) (1 << 24));
+ }
+
+ /**
+ * Returns a float in the range from 0.0f to 1.0f, possibly inclusive of
+ * 0.0f and 1.0f themselves. Thus:
+ *
+ * <p>
+ * <table border=0>
+ * <th>
+ * <td>Expression
+ * <td>Interval
+ * <tr>
+ * <td>nextFloat(false, false)
+ * <td>(0.0f, 1.0f)
+ * <tr>
+ * <td>nextFloat(true, false)
+ * <td>[0.0f, 1.0f)
+ * <tr>
+ * <td>nextFloat(false, true)
+ * <td>(0.0f, 1.0f]
+ * <tr>
+ * <td>nextFloat(true, true)
+ * <td>[0.0f, 1.0f]
+ * </table>
+ *
+ * <p>
+ * This version preserves all possible random values in the float range.
+ */
+ public float nextFloat(boolean includeZero, boolean includeOne)
+ {
+ float d = 0.0f;
+ do
+ {
+ d = nextFloat(); // grab a value, initially from half-open [0.0f,
+ // 1.0f)
+ if (includeOne && nextBoolean())
+ d += 1.0f; // if includeOne, with 1/2 probability, push to
+ // [1.0f, 2.0f)
+ }
+ while ((d > 1.0f) || // everything above 1.0f is always invalid
+ (!includeZero && d == 0.0f)); // if we're not including zero,
+ // 0.0f is invalid
+ return d;
+ }
+
+ /**
+ * Returns an integer drawn uniformly from 0 to n-1. Suffice it to say, n
+ * must be > 0, or an IllegalArgumentException is raised.
+ */
+ public int nextInt(int n)
+ {
+ if (n <= 0)
+ throw new IllegalArgumentException("n must be positive, got: " + n);
+
+ if ((n & -n) == n) // i.e., n is a power of 2
+ {
+ int y;
+
+ if (mti >= N) // generate N words at one time
+ {
+ int kk;
+ final int[] mt = this.mt; // locals are slightly faster
+ final int[] mag01 = this.mag01; // locals are slightly faster
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ for (; kk < N - 1; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1];
+
+ mti = 0;
+ }
+
+ y = mt[mti++];
+ y ^= y >>> 11; // TEMPERING_SHIFT_U(y)
+ y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y)
+ y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y)
+ y ^= (y >>> 18); // TEMPERING_SHIFT_L(y)
+
+ return (int) ((n * (long) (y >>> 1)) >> 31);
+ }
+
+ int bits, val;
+ do
+ {
+ int y;
+
+ if (mti >= N) // generate N words at one time
+ {
+ int kk;
+ final int[] mt = this.mt; // locals are slightly faster
+ final int[] mag01 = this.mag01; // locals are slightly faster
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ for (; kk < N - 1; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+ mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1];
+ }
+ y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1];
+
+ mti = 0;
+ }
+
+ y = mt[mti++];
+ y ^= y >>> 11; // TEMPERING_SHIFT_U(y)
+ y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y)
+ y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y)
+ y ^= (y >>> 18); // TEMPERING_SHIFT_L(y)
+
+ bits = (y >>> 1);
+ val = bits % n;
+ }
+ while (bits - val + (n - 1) < 0);
+ return val;
+ }
+
+ /**
+ * Tests the code.
+ */
+ public static void main(String args[])
+ {
+ int j;
+
+ MersenneTwisterFast r;
+
+ // CORRECTNESS TEST
+ // COMPARE WITH
+ // http://www.math.keio.ac.jp/matumoto/CODES/MT2002/mt19937ar.out
+
+ r = new MersenneTwisterFast(new int[] { 0x123, 0x234, 0x345, 0x456 });
+ System.out.println("Output of MersenneTwisterFast with new (2002/1/26) seeding mechanism");
+ for (j = 0; j < 1000; j++)
+ {
+ // first, convert the int from signed to "unsigned"
+ long l = (long) r.nextInt();
+ if (l < 0)
+ l += 4294967296L; // max int value
+ String s = String.valueOf(l);
+ while (s.length() < 10)
+ s = " " + s; // buffer
+ System.out.print(s + " ");
+ if (j % 5 == 4)
+ System.out.println();
+ }
+
+ // SPEED TEST
+
+ final long SEED = 4357;
+
+ int xx;
+ long ms;
+ System.out.println("\nTime to test grabbing 100000000 ints");
+
+ Random rr = new Random(SEED);
+ xx = 0;
+ ms = System.currentTimeMillis();
+ for (j = 0; j < 100000000; j++)
+ xx += rr.nextInt();
+ System.out.println("java.util.Random: " + (System.currentTimeMillis() - ms) + " Ignore this: " + xx);
+
+ r = new MersenneTwisterFast(SEED);
+ ms = System.currentTimeMillis();
+ xx = 0;
+ for (j = 0; j < 100000000; j++)
+ xx += r.nextInt();
+ System.out.println("Mersenne Twister Fast: " + (System.currentTimeMillis() - ms) + " Ignore this: "
+ + xx);
+
+ // TEST TO COMPARE TYPE CONVERSION BETWEEN
+ // MersenneTwisterFast.java AND MersenneTwister.java
+
+ System.out.println("\nGrab the first 1000 booleans");
+ r = new MersenneTwisterFast(SEED);
+ for (j = 0; j < 1000; j++)
+ {
+ System.out.print(r.nextBoolean() + " ");
+ if (j % 8 == 7)
+ System.out.println();
+ }
+ if (!(j % 8 == 7))
+ System.out.println();
+
+ System.out.println("\nGrab 1000 booleans of increasing probability using nextBoolean(double)");
+ r = new MersenneTwisterFast(SEED);
+ for (j = 0; j < 1000; j++)
+ {
+ System.out.print(r.nextBoolean((double) (j / 999.0)) + " ");
+ if (j % 8 == 7)
+ System.out.println();
+ }
+ if (!(j % 8 == 7))
+ System.out.println();
+
+ System.out.println("\nGrab 1000 booleans of increasing probability using nextBoolean(float)");
+ r = new MersenneTwisterFast(SEED);
+ for (j = 0; j < 1000; j++)
+ {
+ System.out.print(r.nextBoolean((float) (j / 999.0f)) + " ");
+ if (j % 8 == 7)
+ System.out.println();
+ }
+ if (!(j % 8 == 7))
+ System.out.println();
+
+ byte[] bytes = new byte[1000];
+ System.out.println("\nGrab the first 1000 bytes using nextBytes");
+ r = new MersenneTwisterFast(SEED);
+ r.nextBytes(bytes);
+ for (j = 0; j < 1000; j++)
+ {
+ System.out.print(bytes[j] + " ");
+ if (j % 16 == 15)
+ System.out.println();
+ }
+ if (!(j % 16 == 15))
+ System.out.println();
+
+ byte b;
+ System.out.println("\nGrab the first 1000 bytes -- must be same as nextBytes");
+ r = new MersenneTwisterFast(SEED);
+ for (j = 0; j < 1000; j++)
+ {
+ System.out.print((b = r.nextByte()) + " ");
+ if (b != bytes[j])
+ System.out.print("BAD ");
+ if (j % 16 == 15)
+ System.out.println();
+ }
+ if (!(j % 16 == 15))
+ System.out.println();
+
+ System.out.println("\nGrab the first 1000 shorts");
+ r = new MersenneTwisterFast(SEED);
+ for (j = 0; j < 1000; j++)
+ {
+ System.out.print(r.nextShort() + " ");
+ if (j % 8 == 7)
+ System.out.println();
+ }
+ if (!(j % 8 == 7))
+ System.out.println();
+
+ System.out.println("\nGrab the first 1000 ints");
+ r = new MersenneTwisterFast(SEED);
+ for (j = 0; j < 1000; j++)
+ {
+ System.out.print(r.nextInt() + " ");
+ if (j % 4 == 3)
+ System.out.println();
+ }
+ if (!(j % 4 == 3))
+ System.out.println();
+
+ System.out.println("\nGrab the first 1000 ints of different sizes");
+ r = new MersenneTwisterFast(SEED);
+ int max = 1;
+ for (j = 0; j < 1000; j++)
+ {
+ System.out.print(r.nextInt(max) + " ");
+ max *= 2;
+ if (max <= 0)
+ max = 1;
+ if (j % 4 == 3)
+ System.out.println();
+ }
+ if (!(j % 4 == 3))
+ System.out.println();
+
+ System.out.println("\nGrab the first 1000 longs");
+ r = new MersenneTwisterFast(SEED);
+ for (j = 0; j < 1000; j++)
+ {
+ System.out.print(r.nextLong() + " ");
+ if (j % 3 == 2)
+ System.out.println();
+ }
+ if (!(j % 3 == 2))
+ System.out.println();
+
+ System.out.println("\nGrab the first 1000 longs of different sizes");
+ r = new MersenneTwisterFast(SEED);
+ long max2 = 1;
+ for (j = 0; j < 1000; j++)
+ {
+ System.out.print(r.nextLong(max2) + " ");
+ max2 *= 2;
+ if (max2 <= 0)
+ max2 = 1;
+ if (j % 4 == 3)
+ System.out.println();
+ }
+ if (!(j % 4 == 3))
+ System.out.println();
+
+ System.out.println("\nGrab the first 1000 floats");
+ r = new MersenneTwisterFast(SEED);
+ for (j = 0; j < 1000; j++)
+ {
+ System.out.print(r.nextFloat() + " ");
+ if (j % 4 == 3)
+ System.out.println();
+ }
+ if (!(j % 4 == 3))
+ System.out.println();
+
+ System.out.println("\nGrab the first 1000 doubles");
+ r = new MersenneTwisterFast(SEED);
+ for (j = 0; j < 1000; j++)
+ {
+ System.out.print(r.nextDouble() + " ");
+ if (j % 3 == 2)
+ System.out.println();
+ }
+ if (!(j % 3 == 2))
+ System.out.println();
+
+ System.out.println("\nGrab the first 1000 gaussian doubles");
+ r = new MersenneTwisterFast(SEED);
+ for (j = 0; j < 1000; j++)
+ {
+ System.out.print(r.nextGaussian() + " ");
+ if (j % 3 == 2)
+ System.out.println();
+ }
+ if (!(j % 3 == 2))
+ System.out.println();
+
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/edu/umd/marbl/mhap/utils/PackageInfo.java b/src/main/java/edu/umd/marbl/mhap/utils/PackageInfo.java
new file mode 100644
index 0000000..8e74d54
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/utils/PackageInfo.java
@@ -0,0 +1,87 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.utils;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Properties;
+
+/**
+ * The Class PackageInfo.
+ */
+public final class PackageInfo
+{
+
+ private static Properties properties = getProjectProperties();
+
+ private static Properties getProjectProperties()
+ {
+ Properties initialProperties = new Properties();
+
+ initialProperties = getFileProperties("/properties/mhap.properties", new Properties());
+ Properties properties = getFileProperties("/mhap.properties", initialProperties);
+
+ return properties;
+ }
+
+ private static Properties getFileProperties(String file, Properties originalProperties)
+ {
+ Properties property = new Properties(originalProperties);
+ try
+ {
+ InputStream in = PackageInfo.class.getClass().getResourceAsStream(file);
+
+ if (in != null)
+ {
+ property.load(in);
+ in.close();
+ }
+ }
+ catch (IOException e)
+ {
+ return property;
+ }
+
+ return property;
+ }
+
+ public static final String VERSION = properties.getProperty("component.version", "unknown");
+ public static final String BUILD_TIME = properties.getProperty("buildtime", "unknown");
+
+ public static Properties getProperties()
+ {
+ return properties;
+ }
+
+ public static String getVersionTag()
+ {
+ return VERSION + " " + BUILD_TIME;
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/utils/Pair.java b/src/main/java/edu/umd/marbl/mhap/utils/Pair.java
new file mode 100644
index 0000000..737afc3
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/utils/Pair.java
@@ -0,0 +1,101 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.utils;
+
+import java.io.Serializable;
+
+public class Pair<A,B> implements Serializable
+{
+ public final A x;
+
+ public final B y;
+ /**
+ *
+ */
+ private static final long serialVersionUID = -5782450990742961765L;
+
+ public Pair(A x, B y)
+ {
+ this.x = x;
+ this.y = y;
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#equals(java.lang.Object)
+ */
+ @Override
+ public boolean equals(Object obj)
+ {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+
+ Pair<?, ?> other = (Pair<?,?>) obj;
+
+ if (this.x == null)
+ {
+ if (other.x != null)
+ return false;
+ }
+ else if (!this.x.equals(other.x))
+ return false;
+ if (this.y == null)
+ {
+ if (other.y != null)
+ return false;
+ }
+ else if (!this.y.equals(other.y))
+ return false;
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ return 31 * hashcode(this.x) + hashcode(this.y);
+ }
+
+ // todo move this to a helper class.
+ private static int hashcode(Object o) {
+ return o == null ? 0 : o.hashCode();
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#toString()
+ */
+ @Override
+ public String toString()
+ {
+ return "[x=" + this.x + ", y=" + this.y + "]";
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/edu/umd/marbl/mhap/utils/ParseOptions.java b/src/main/java/edu/umd/marbl/mhap/utils/ParseOptions.java
new file mode 100644
index 0000000..2af034c
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/utils/ParseOptions.java
@@ -0,0 +1,369 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.utils;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+
+/**
+ * The Class ParseOptions.
+ */
+public class ParseOptions
+{
+ public class Option<T extends Object>
+ {
+ protected final Class<T> objectClass;
+
+ private final String description;
+
+ private final String flag;
+
+ private Object value;
+
+ private boolean wasSet;
+
+ private boolean required;
+
+ @SuppressWarnings("unchecked")
+ private Option(String flag, String description, T defaultValue)
+ {
+ this.flag = flag;
+ this.description = description;
+ this.value = defaultValue;
+ this.wasSet = false;
+ this.required = false;
+ this.objectClass = (Class<T>) defaultValue.getClass();
+ }
+
+ private Option(String flag, String description, boolean required, Class<T> objectClass)
+ {
+ this.flag = parseFlag(flag);
+ this.description = description;
+ this.value = null;
+ this.wasSet = false;
+ this.required = required;
+ this.objectClass = objectClass;
+ }
+
+ private String parseFlag(String flag)
+ {
+ flag = flag.trim();
+ if (flag.startsWith("-"))
+ flag = flag.substring(1);
+
+ return flag;
+ }
+
+ public boolean isBoolean()
+ {
+ return Boolean.class.isAssignableFrom(this.objectClass);
+ }
+
+ public boolean isDouble()
+ {
+ return Double.class.isAssignableFrom(this.objectClass);
+ }
+
+ public boolean isInteger()
+ {
+ return Integer.class.isAssignableFrom(this.objectClass);
+ }
+
+ public boolean isString()
+ {
+ return String.class.isAssignableFrom(this.objectClass);
+ }
+
+ public boolean getBoolean()
+ {
+ return (Boolean) this.value;
+ }
+
+ public String getString()
+ {
+ if (this.value == null)
+ return "";
+
+ return (String) this.value;
+ }
+
+ public double getDouble()
+ {
+ return (Double) this.value;
+ }
+
+ public int getInteger()
+ {
+ return (Integer) this.value;
+ }
+
+ public void setValue(Object value)
+ {
+ if (!this.objectClass.isInstance(value))
+ throw new RuntimeException("Incompatable value type for flag \"" + this.flag + "\" of type "
+ + this.objectClass.getName() + ": " + value.getClass().getName());
+
+ this.value = value;
+ this.wasSet = true;
+ }
+
+ public String getFlag()
+ {
+ return this.flag;
+ }
+
+ public boolean isSet()
+ {
+ return this.wasSet;
+ }
+
+ public boolean isRequired()
+ {
+ return this.required;
+ }
+
+ public Class<T> getType()
+ {
+ return this.objectClass;
+ }
+ }
+
+ private final ArrayList<String> startText;
+ private final HashMap<String, Option<?>> optionsByFlag;
+ private final HashMap<String, Option<?>> optionsByName;
+
+ public ParseOptions()
+ {
+ this.startText = new ArrayList<>();
+ this.optionsByFlag = new HashMap<String, Option<?>>();
+ this.optionsByName = new HashMap<String, Option<?>>();
+ addOption("-h", "Displays the help menu.", false);
+ addOption("--help", "Displays the help menu.", false);
+ addOption("--version", "Displays the version and build time.", false);
+ }
+
+ public void addStartTextLine(String text)
+ {
+ this.startText.add(text);
+ }
+
+ public <T> void addOption(String flag, String description, T defaultValue)
+ {
+ flag = parseFlag(flag);
+ if (this.optionsByFlag.get(flag) != null)
+ return;
+
+ Option<T> option = new Option<T>(flag, description, defaultValue);
+ this.optionsByFlag.put(flag, option);
+ this.optionsByName.put(flag, option);
+ }
+
+ public <T> void setOptions(String flag, T defaultValue)
+ {
+ Option<T> option = new Option<T>(flag, getFlag(flag).description, defaultValue);
+ this.optionsByFlag.put(flag, option);
+ this.optionsByName.put(flag, option);
+ }
+
+ public <T> void addRequiredOption(String flag, String description, Class<T> objectClass)
+ {
+ flag = parseFlag(flag);
+
+ Option<T> option = new Option<T>(flag, description, true, objectClass);
+ this.optionsByFlag.put(flag, option);
+ this.optionsByName.put(flag, option);
+ }
+
+ public String parseFlag(String flag)
+ {
+ return flag;
+ }
+
+ public boolean process(String[] args)
+ {
+ try
+ {
+ parse(args);
+ if (needsHelp())
+ {
+ System.out.println(helpMenuString());
+ return false;
+ }
+ else
+ if (needsVersion())
+ {
+ System.out.println("MHAP Version = "+PackageInfo.VERSION+", Build time = "+PackageInfo.BUILD_TIME);
+ return false;
+ }
+
+ checkParameters();
+ }
+ catch (Exception e)
+ {
+ System.out.println(e.getMessage());
+ System.out.println(helpMenuString());
+ return false;
+ }
+
+ return true;
+ }
+
+ public Option<?> get(String name) throws RuntimeException
+ {
+ Option<?> option = this.optionsByName.get(name);
+
+ if (option == null)
+ throw new RuntimeException("Invalid option name \"" + name + "\".");
+
+ return option;
+ }
+
+ public Option<?> getFlag(String flag) throws RuntimeException
+ {
+ Option<?> option = this.optionsByFlag.get(flag);
+
+ if (option == null)
+ throw new RuntimeException("Invalid flag \"" + flag + "\".");
+
+ return option;
+ }
+
+ @Override
+ public String toString()
+ {
+ StringBuilder menuString = new StringBuilder();
+
+ // sort the list
+ ArrayList<String> list = new ArrayList<String>(this.optionsByFlag.keySet());
+ Collections.sort(list);
+
+ for (String key : list)
+ {
+ Option<?> currOption = this.optionsByFlag.get(key);
+ menuString.append("" + currOption.flag + " = ");
+ menuString.append("" + currOption.value);
+ menuString.append("\n");
+ }
+
+ return menuString.toString();
+ }
+
+ public String helpMenuString()
+ {
+ StringBuilder menuString = new StringBuilder();
+ for (String str : this.startText)
+ menuString.append(str+"\n");
+
+ // sort the list
+ ArrayList<String> list = new ArrayList<String>(this.optionsByFlag.keySet());
+ Collections.sort(list);
+
+ for (String key : list)
+ {
+ Option<?> currOption = this.optionsByFlag.get(key);
+ menuString.append("\t\t" + currOption.flag + ", ");
+ if (currOption.isRequired())
+ menuString.append("*required, ");
+ else
+ {
+ if (currOption.isString())
+ menuString.append("default = \"" + currOption.value + "\"");
+ else
+ menuString.append("default = " + currOption.value);
+ }
+ menuString.append("\n");
+ menuString.append("\t\t\t" + currOption.description);
+ menuString.append("\n");
+ }
+
+ return menuString.toString();
+ }
+
+ public void checkParameters()
+ {
+ for (Option<?> option : this.optionsByFlag.values())
+ if (option.required && !option.wasSet)
+ throw new RuntimeException("Required option flag \"" + option.flag + "\" was not set.");
+ }
+
+ public boolean needsHelp()
+ {
+ return get("--help").getBoolean() || get("-h").getBoolean();
+ }
+
+ public boolean needsVersion()
+ {
+ return get("--version").getBoolean();
+ }
+
+
+ public void parse(String[] args) throws RuntimeException
+ {
+ for (int iter = 0; iter < args.length; iter++)
+ {
+ String flag = args[iter].trim();
+
+ if (!flag.startsWith("-"))
+ throw new RuntimeException("Unknown parameter in command line: " + flag);
+
+ flag = parseFlag(flag);
+
+ Option<?> option = getFlag(flag);
+ if (option == null)
+ throw new RuntimeException("Unknown flag \"" + flag + "\".");
+ else if (option.isBoolean())
+ option.setValue(true);
+ else if (iter + 1 < args.length && !args[iter + 1].startsWith("-"))
+ {
+ if (option.isDouble())
+ {
+ option.setValue(new Double(args[iter + 1]));
+ iter++;
+ }
+ else if (option.isInteger())
+ {
+ option.setValue(new Integer(args[iter + 1]));
+ iter++;
+ }
+ else if (option.isString())
+ {
+ option.setValue(args[iter + 1]);
+ iter++;
+ }
+ else
+ throw new RuntimeException("Cannot parse flag \"" + option.getFlag() + "\" of type "
+ + option.getType().getName() + ".");
+ }
+ else
+ throw new RuntimeException("Not value provided for flag \"" + option.getFlag() + "\" of type "
+ + option.getType().getName() + ".");
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/edu/umd/marbl/mhap/utils/RandomSequenceGenerator.java b/src/main/java/edu/umd/marbl/mhap/utils/RandomSequenceGenerator.java
new file mode 100644
index 0000000..8ccec2b
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/utils/RandomSequenceGenerator.java
@@ -0,0 +1,156 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.utils;
+
+import java.util.LinkedList;
+import java.util.ListIterator;
+
+import edu.umd.marbl.mhap.impl.MhapRuntimeException;
+
+public final class RandomSequenceGenerator
+{
+ private MersenneTwisterFast randGenerator;
+
+ public RandomSequenceGenerator()
+ {
+ this.randGenerator = new MersenneTwisterFast();
+ }
+
+ public RandomSequenceGenerator(int seed)
+ {
+ this.randGenerator = new MersenneTwisterFast(seed);
+ }
+
+ private final char getRandomBase(Character toExclude)
+ {
+ Character result = null;
+
+ while (result == null)
+ {
+ double base = this.randGenerator.nextDouble();
+ if (base < 0.25)
+ {
+ result = 'A';
+ }
+ else if (base < 0.5)
+ {
+ result = 'C';
+ }
+ else if (base < 0.75)
+ {
+ result = 'G';
+ }
+ else
+ {
+ result = 'T';
+ }
+
+ if (toExclude != null && toExclude.equals(result))
+ {
+ result = null;
+ }
+ }
+
+ return result;
+ }
+
+ public String generateRandomSequence(int length)
+ {
+ StringBuilder str = new StringBuilder(length);
+
+ for (int iter=0; iter<length; iter++)
+ str.append(getRandomBase(null));
+
+ return str.toString();
+ }
+
+ //0.1188 0.0183 0.0129
+ public String addPacBioError(String str)
+ {
+ return addError(str, 0.1188, 0.0183, 0.0129);
+ }
+
+ public String addError(String str, double insertionRate, double deletionRate, double substitutionRate)
+ {
+ if (insertionRate < 0.0 || deletionRate < 0.0 || substitutionRate < 0.0)
+ throw new MhapRuntimeException("Error rate cannot be negative.");
+
+ if (insertionRate+deletionRate+substitutionRate>1.00001)
+ throw new MhapRuntimeException("Error rate must be less than or equal to 1.0.");
+
+ double errorRate = insertionRate + deletionRate + substitutionRate;
+
+ // use a linked list for insertions
+ LinkedList<Character> modifiedSequence = new LinkedList<>();
+ for (char a : str.toCharArray())
+ modifiedSequence.add(a);
+
+ // now mutate
+ ListIterator<Character> iter = modifiedSequence.listIterator();
+ while (iter.hasNext())
+ {
+ char i = iter.next();
+
+ if (randGenerator.nextDouble() < errorRate)
+ {
+ double errorType = randGenerator.nextDouble();
+ if (errorType < substitutionRate)
+ { // mismatch
+ // switch base
+
+ iter.set(getRandomBase(i));
+
+ i++;
+ }
+ else if (errorType < insertionRate + substitutionRate)
+ { // insert
+
+ iter.previous();
+ iter.add(getRandomBase(null));
+ }
+ else
+ { // delete
+
+ iter.remove();
+ }
+ }
+ else
+ {
+ // i++;
+ }
+ }
+
+ StringBuilder returnedString = new StringBuilder(modifiedSequence.size());
+ for (char c : modifiedSequence)
+ returnedString.append(c);
+
+ return returnedString.toString();
+ }
+
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/utils/ReadBuffer.java b/src/main/java/edu/umd/marbl/mhap/utils/ReadBuffer.java
new file mode 100644
index 0000000..8b9ee71
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/utils/ReadBuffer.java
@@ -0,0 +1,45 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.utils;
+
+public final class ReadBuffer
+{
+ private byte[] buff = new byte[2];
+
+ public final byte[] getBuffer(int size)
+ {
+ if (this.buff.length<size)
+ this.buff = new byte[size];
+
+ return this.buff;
+ }
+
+
+}
\ No newline at end of file
diff --git a/src/main/java/edu/umd/marbl/mhap/utils/SortablePair.java b/src/main/java/edu/umd/marbl/mhap/utils/SortablePair.java
new file mode 100644
index 0000000..86adea6
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/utils/SortablePair.java
@@ -0,0 +1,63 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.utils;
+
+public class SortablePair<ComparableType extends Comparable<ComparableType>, AnyType> extends
+ Pair<ComparableType, AnyType> implements Comparable<SortablePair<ComparableType, AnyType>>
+{
+ /**
+ *
+ */
+ private static final long serialVersionUID = 2817516347839329908L;
+
+ /**
+ * Instantiates a new sortable pair.
+ *
+ * @param x
+ * the x
+ * @param y
+ * the y
+ */
+ public SortablePair(ComparableType x, AnyType y)
+ {
+ super(x, y);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see java.lang.Comparable#compareTo(java.lang.Object)
+ */
+ @Override
+ public final int compareTo(SortablePair<ComparableType, AnyType> pair)
+ {
+ return this.x.compareTo(pair.x);
+ }
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/utils/Utils.java b/src/main/java/edu/umd/marbl/mhap/utils/Utils.java
new file mode 100644
index 0000000..5e31022
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/utils/Utils.java
@@ -0,0 +1,661 @@
+/*
+ * MHAP package
+ *
+ * This software is distributed "as is", without any warranty, including
+ * any implied warranty of merchantability or fitness for a particular
+ * use. The authors assume no responsibility for, and shall not be liable
+ * for, any special, indirect, or consequential damages, or any damages
+ * whatsoever, arising out of or in connection with the use of this
+ * software.
+ *
+ * Copyright (c) 2014 by Konstantin Berlin and Sergey Koren
+ * University Of Maryland
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package edu.umd.marbl.mhap.utils;
+
+import java.text.DecimalFormat;
+import java.text.NumberFormat;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Random;
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.FileReader;
+
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+
+import edu.umd.marbl.mhap.impl.MhapRuntimeException;
+import edu.umd.marbl.mhap.sketch.FrequencyCounts;
+import edu.umd.marbl.mhap.sketch.HashUtils;
+
+public final class Utils
+{
+
+ public enum ToProtein
+ {
+ AAA("K"), AAC("N"), AAG("K"), AAT("N"), ACA("T"), ACC("T"), ACG("T"), ACT("T"), AGA("R"), AGC("S"), AGG("R"), AGT(
+ "S"), ATA("I"), ATC("I"), ATG("M"), ATT("I"), CAA("Q"), CAC("H"), CAG("Q"), CAT("H"), CCA("P"), CCC("P"), CCG(
+ "P"), CCT("P"), CGA("R"), CGC("R"), CGG("R"), CGT("R"), CTA("L"), CTC("L"), CTG("L"), CTT("L"), GAA("E"), GAC(
+ "D"), GAG("E"), GAT("D"), GCA("A"), GCC("A"), GCG("A"), GCT("A"), GGA("G"), GGC("G"), GGG("G"), GGT("G"), GTA(
+ "V"), GTC("V"), GTG("V"), GTT("V"), TAA("X"), TAC("Y"), TAG("X"), TAT("Y"), TCA("S"), TCC("S"), TCG("S"), TCT(
+ "S"), TGA("X"), TGC("C"), TGG("W"), TGT("C"), TTA("L"), TTC("F"), TTG("L"), TTT("F");
+
+ /*
+ * Ala/A GCU, GCC, GCA, GCG Leu/L UUA, UUG, CUU, CUC, CUA, CUG Arg/R
+ * CGU, CGC, CGA, CGG, AGA, AGG Lys/K AAA, AAG Asn/N AAU, AAC Met/M AUG
+ * Asp/D GAU, GAC Phe/F UUU, UUC Cys/C UGU, UGC Pro/P CCU, CCC, CCA, CCG
+ * Gln/Q CAA, CAG Ser/S UCU, UCC, UCA, UCG, AGU, AGC Glu/E GAA, GAG
+ * Thr/T ACU, ACC, ACA, ACG Gly/G GGU, GGC, GGA, GGG Trp/W UGG His/H
+ * CAU, CAC Tyr/Y UAU, UAC Ile/I AUU, AUC, AUA Val/V GUU, GUC, GUA, GUG
+ * START AUG STOP UAG, UGA, UAA
+ */
+ private String other;
+
+ ToProtein(String other)
+ {
+ this.other = other;
+ }
+
+ public String getProtein()
+ {
+ return this.other;
+ }
+ }
+
+ public enum Translate
+ {
+ A("T"), B("V"), C("G"), D("H"), G("C"), H("D"), K("M"), M("K"), N("N"), R("Y"), S("S"), T("A"), V("B"), W("W"), Y(
+ "R");
+
+ private String other;
+
+ Translate(String other)
+ {
+ this.other = other;
+ }
+
+ public String getCompliment()
+ {
+ return this.other;
+ }
+ }
+
+ public static final int BUFFER_BYTE_SIZE = 8388608; // 8MB
+ public static final NumberFormat DECIMAL_FORMAT = new DecimalFormat("############.########");
+ public static final int FASTA_LINE_LENGTH = 60;
+
+ public static final int MBYTES = 1048576;
+
+ public static int checkForEnd(String line, int brackets)
+ {
+ if (line.startsWith("{"))
+ {
+ brackets++;
+ }
+ if (line.startsWith("}"))
+ {
+ brackets--;
+ }
+ if (brackets == 0)
+ {
+ return -1;
+ }
+
+ return brackets;
+ }
+
+ // add new line breaks every FASTA_LINE_LENGTH characters
+ public final static String convertToFasta(String supplied)
+ {
+ StringBuffer converted = new StringBuffer();
+ int i = 0;
+ String[] split = supplied.trim().split("\\s+");
+ if (split.length > 1)
+ { // process as a qual
+ int size = 0;
+ for (i = 0; i < split.length; i++)
+ {
+ converted.append(split[i]);
+ size += split[i].length();
+ if (i != (split.length - 1))
+ {
+ if (size >= FASTA_LINE_LENGTH)
+ {
+ size = 0;
+ converted.append("\n");
+ }
+ else
+ {
+ converted.append(" ");
+ }
+ }
+ }
+ }
+ else
+ {
+ for (i = 0; (i + FASTA_LINE_LENGTH) < supplied.length(); i += FASTA_LINE_LENGTH)
+ {
+ converted.append(supplied.substring(i, i + FASTA_LINE_LENGTH));
+ converted.append("\n");
+ }
+ converted.append(supplied.substring(i, supplied.length()));
+ }
+ return converted.toString();
+ }
+
+ public final static int countLetterInRead(String fasta, String letter)
+ {
+ return countLetterInRead(fasta, letter, false);
+ }
+
+ public final static int countLetterInRead(String fasta, String letter, Boolean caseSensitive)
+ {
+ String ungapped = Utils.getUngappedRead(fasta);
+ int len = ungapped.length();
+ if (len == 0)
+ {
+ return -1;
+ }
+
+ int increment = letter.length();
+ int count = 0;
+
+ for (int i = 0; i <= ungapped.length() - increment; i += increment)
+ {
+ if (letter.equals(ungapped.substring(i, i + increment)) && caseSensitive)
+ {
+ count++;
+ }
+ if (letter.equalsIgnoreCase(ungapped.substring(i, i + increment)) && !caseSensitive)
+ {
+ count++;
+ }
+ }
+ return count;
+ }
+
+ public final static FrequencyCounts createKmerFilter(String fileName, double maxFraction, int kmerSize, int seed)
+ throws IOException
+ {
+ File file = new File(fileName);
+
+ // make sure don't leak resources
+ try (BufferedReader bf = new BufferedReader(new FileReader(file), BUFFER_BYTE_SIZE);)
+ {
+ // generate hashset
+ HashMap<Long, Double> values = new HashMap<>();
+
+ String line = bf.readLine();
+ while (line != null)
+ {
+ String[] str = line.split("\\s+", 3);
+
+ if (str.length < 2)
+ throw new MhapRuntimeException(
+ "K-mer filter file must have at least two column [k-mer k-mer_fraction].");
+
+ double percent = Double.parseDouble(str[1]);
+
+ // if greater, add to hashset
+ if (percent > maxFraction)
+ {
+ long[] minHash = HashUtils.computeSequenceHashesLong(str[0], kmerSize, seed);
+
+ if (minHash.length > 1)
+ throw new MhapRuntimeException("K-mer filter file size greater than the specified k-mer size.");
+
+ for (long val : minHash)
+ values.put(val, percent);
+ }
+ else
+ break;
+
+ // read the next line
+ line = bf.readLine();
+ }
+ return new FrequencyCounts(values, maxFraction);
+ }
+ }
+
+ public final static int[] errorString(int[] s, double readError)
+ {
+ int[] snew = s.clone();
+
+ Random generator = new Random();
+ for (int iter = 0; iter < s.length; iter++)
+ {
+ if (generator.nextDouble() < readError)
+ while (snew[iter] == s[iter])
+ snew[iter] = generator.nextInt(3);
+ }
+
+ return snew;
+ }
+
+ public final static BufferedReader getFile(String fileName, String postfix) throws IOException
+ {
+ String[] array = new String[1];
+ array[0] = postfix;
+
+ return getFile(fileName, array);
+ }
+
+ public final static BufferedReader getFile(String fileName, String[] postfix) throws IOException
+ {
+ if (fileName.endsWith("bz2"))
+ {
+ BZip2CompressorInputStream bzIn = new BZip2CompressorInputStream(new BufferedInputStream(new FileInputStream(fileName), BUFFER_BYTE_SIZE));
+ return new BufferedReader(new InputStreamReader(bzIn));
+
+ // open file as a pipe
+ //System.err.println("Running command " + "bzip2 -dc " + new File(fileName).getAbsolutePath() + " |");
+ //Process p = Runtime.getRuntime().exec("bzip2 -dc " + new File(fileName).getAbsolutePath() + " |");
+ //bf = new BufferedReader(new InputStreamReader(p.getInputStream()), BUFFER_BYTE_SIZE);
+ //System.err.println(bf.ready());
+ }
+ else if (fileName.endsWith("gz"))
+ {
+ GzipCompressorInputStream bzIn = new GzipCompressorInputStream(new BufferedInputStream(new FileInputStream(fileName), BUFFER_BYTE_SIZE));
+ return new BufferedReader(new InputStreamReader(bzIn));
+
+ // open file as a pipe
+ //System.err.println("Runnning comand " + "gzip -dc " + new File(fileName).getAbsolutePath() + " |");
+ //Process p = Runtime.getRuntime().exec("gzip -dc " + new File(fileName).getAbsolutePath() + " |");
+ //bf = new BufferedReader(new InputStreamReader(p.getInputStream()), BUFFER_BYTE_SIZE);
+ //System.err.println(bf.ready());
+ }
+ else
+ {
+ int i = 0;
+ for (i = 0; i < postfix.length; i++)
+ {
+ if (fileName.endsWith(postfix[i]))
+ return new BufferedReader(new FileReader(fileName), BUFFER_BYTE_SIZE);
+ }
+
+ throw new IOException("Unknown file format of file " + fileName+".");
+ }
+ }
+
+ public final static String getID(String line)
+ {
+ String ids[] = line.split(":");
+ int commaPos = ids[1].indexOf(",");
+ if (commaPos != -1)
+ {
+ return ids[1].substring(1, commaPos).trim();
+ }
+ else
+ {
+ return ids[1];
+ }
+ }
+
+ public final static double getLetterPercentInRead(String fasta, String letter)
+ {
+ int ungappedLen = getUngappedRead(fasta).length();
+ int count = countLetterInRead(fasta, letter);
+
+ return count / (double) ungappedLen;
+ }
+
+ public final static int getOvlSize(int readA, int readB, int ahang, int bhang)
+ {
+ if ((ahang <= 0 && bhang >= 0) || (ahang >= 0 && bhang <= 0))
+ {
+ return -1;
+ }
+
+ if (ahang < 0)
+ {
+ return readA - Math.abs(bhang);
+ }
+ else
+ {
+ return readA - ahang;
+ }
+ }
+
+ public final static int getRangeOverlap(int startA, int endA, int startB, int endB)
+ {
+ int minA = Math.min(startA, endA);
+ int minB = Math.min(startB, endB);
+ int maxA = Math.max(startA, endA);
+ int maxB = Math.max(startB, endB);
+
+ int start = Math.max(minA, minB);
+ int end = Math.min(maxA, maxB);
+
+ return (end - start + 1);
+ }
+
+ public final static String getUngappedRead(String fasta)
+ {
+ fasta = fasta.replaceAll("N", "");
+ fasta = fasta.replaceAll("-", "");
+
+ assert (fasta.length() >= 0);
+
+ return fasta;
+ }
+
+ public final static String getValue(String line, String key)
+ {
+ if (line.startsWith(key))
+ {
+ return line.split(":")[1];
+ }
+
+ return null;
+ }
+
+ public final static <H> double hashEfficiency(HashMap<Integer, ArrayList<H>> c)
+ {
+ double e = hashEnthropy(c);
+ double log2inv = 1.0 / Math.log(2);
+ double scaling = Math.log(c.size()) * log2inv;
+
+ return e / scaling;
+ }
+
+ public final static <H> double hashEnthropy(HashMap<Integer, ArrayList<H>> c)
+ {
+ double sum = 0.0;
+ double log2inv = 1.0 / Math.log(2);
+
+ double[] p = new double[c.size()];
+ int size = 0;
+ int count = 0;
+ for (ArrayList<H> elem : c.values())
+ {
+ size += elem.size();
+ p[count++] = elem.size();
+ }
+
+ for (int iter = 0; iter < p.length; iter++)
+ {
+ double val = p[iter] / (double) size;
+ sum -= val * Math.log(val) * log2inv;
+ }
+
+ return sum;
+ }
+
+ public final static boolean isAContainedInB(int startA, int endA, int startB, int endB)
+ {
+ int minA = Math.min(startA, endA);
+ int minB = Math.min(startB, endB);
+ int maxA = Math.max(startA, endA);
+ int maxB = Math.max(startB, endB);
+
+ return (minB < minA && maxB > maxA);
+ }
+
+ public final static Pair<Double, Double> linearRegression(int[] a, int[] b, int size)
+ {
+ // take one pass and compute means
+ int xy = 0;
+ int x = 0;
+ int y = 0;
+ int x2 = 0;
+
+ for (int iter = 0; iter < size; iter++)
+ {
+ xy += a[iter] * b[iter];
+ x += a[iter];
+ y += b[iter];
+ x2 += a[iter] * a[iter];
+ }
+
+ double Ninv = 1.0 / (double) size;
+
+ double beta = ((double) xy - Ninv * (double) (x * y)) / ((double) x2 - Ninv * (double) (x * x));
+ double alpha = Ninv * ((double) y - beta * (double) x);
+
+ return new Pair<Double, Double>(alpha, beta);
+ }
+
+ public final static double mean(double[] a, int size)
+ {
+ double x = 0.0;
+ for (int iter = 0; iter < size; iter++)
+ x += a[iter];
+
+ return x / (double) size;
+ }
+
+ public final static double mean(int[] a, int size)
+ {
+ int x = 0;
+ for (int iter = 0; iter < size; iter++)
+ x += a[iter];
+
+ return x / (double) size;
+ }
+
+ public final static double pearsonCorr(int[] a, int[] b, int size)
+ {
+ if (size < 2)
+ return 0.0;
+
+ double meana = mean(a, size);
+ double meanb = mean(b, size);
+ double stda = std(a, size, meana);
+ double stdb = std(b, size, meanb);
+
+ double r = 0.0;
+ for (int iter = 0; iter < size; iter++)
+ {
+ r += ((double) a[iter] - meana) * ((double) b[iter] - meanb) / (stda * stdb);
+ }
+
+ return r / (double) (size - 1);
+ }
+
+ // adapted form
+ // http://blog.teamleadnet.com/2012/07/quick-select-algorithm-find-kth-element.html
+ public final static int quickSelect(int[] array, int k, int length)
+ {
+ if (array == null || length <= k)
+ return Integer.MAX_VALUE;
+
+ int from = 0;
+ int to = length - 1;
+
+ // if from == to we reached the kth element
+ while (from < to)
+ {
+ int r = from;
+ int w = to;
+ int mid = array[(r + w) / 2];
+
+ // stop if the reader and writer meets
+ while (r < w)
+ {
+ if (array[r] >= mid)
+ {
+ // put the large values at the end
+ int tmp = array[w];
+ array[w] = array[r];
+ array[r] = tmp;
+ w--;
+ }
+ else
+ {
+ // the value is smaller than the pivot, skip
+ r++;
+ }
+ }
+
+ // if we stepped up (r++) we need to step one down
+ if (array[r] > mid)
+ r--;
+
+ // the r pointer is on the end of the first k elements
+ if (k <= r)
+ {
+ to = r;
+ }
+ else
+ {
+ from = r + 1;
+ }
+ }
+
+ return array[k];
+ }
+
+ public final static String rc(String supplied)
+ {
+ StringBuilder st = new StringBuilder();
+ for (int i = supplied.length() - 1; i >= 0; i--)
+ {
+ char theChar = supplied.charAt(i);
+
+ if (theChar != '-')
+ {
+ Translate t = Translate.valueOf(Character.toString(theChar).toUpperCase());
+ st.append(t.getCompliment());
+ }
+ else
+ {
+ st.append("-");
+ }
+ }
+ return st.toString();
+ }
+
+ public final static double std(double[] a, int size, double mean)
+ {
+ double x = 0.0;
+ for (int iter = 0; iter < size; iter++)
+ {
+ double val = a[iter] - mean;
+ x += val * val;
+ }
+
+ return Math.sqrt(x / (double) (size - 1));
+ }
+
+ public final static double std(int[] a, int size, double mean)
+ {
+ double x = 0.0;
+ for (int iter = 0; iter < size; iter++)
+ {
+ double val = (double) a[iter] - mean;
+ x += val * val;
+ }
+
+ return Math.sqrt(x / (double) (size - 1));
+ }
+
+ public final static String toProtein(String genome, boolean isReversed, int frame)
+ {
+ StringBuilder result = new StringBuilder();
+
+ if (isReversed)
+ {
+ genome = rc(genome);
+ }
+ genome = genome.replaceAll("-", "");
+
+ for (int i = frame; i < (genome.length() - 3); i += 3)
+ {
+ String codon = genome.substring(i, i + 3);
+ String protein = ToProtein.valueOf(codon).getProtein();
+ result.append(protein);
+ }
+
+ return result.toString();
+ }
+
+ public static String toString(double[][] A)
+ {
+ StringBuilder s = new StringBuilder();
+
+ s.append("[");
+
+ for (double[] a : A)
+ {
+ if (a != null)
+ {
+ for (int iter = 0; iter < a.length - 1; iter++)
+ s.append("" + a[iter] + ",");
+
+ if (a.length > 0)
+ s.append("" + a[a.length - 1]);
+ }
+ s.append("\n");
+ }
+ s.append("]");
+
+ return new String(s);
+ }
+
+ public static String toString(float[][] A)
+ {
+ StringBuilder s = new StringBuilder();
+
+ s.append("[");
+
+ for (float[] a : A)
+ {
+ if (a != null)
+ {
+ for (int iter = 0; iter < a.length - 1; iter++)
+ s.append("" + a[iter] + ",");
+
+ if (a.length > 0)
+ s.append("" + a[a.length - 1]);
+ }
+ s.append("\n");
+ }
+ s.append("]");
+
+ return new String(s);
+ }
+
+ public static String toString(long[][] A)
+ {
+ StringBuilder s = new StringBuilder();
+
+ s.append("[");
+
+ for (long[] a : A)
+ {
+ if (a != null)
+ {
+ for (int iter = 0; iter < a.length - 1; iter++)
+ s.append("" + a[iter] + ",");
+
+ if (a.length > 0)
+ s.append("" + a[a.length - 1]);
+ }
+ s.append("\n");
+ }
+ s.append("]");
+
+ return new String(s);
+ }
+}
diff --git a/src/main/resources/edu/umd/marbl/mhap/README b/src/main/resources/edu/umd/marbl/mhap/README
new file mode 100644
index 0000000..91cb2e5
--- /dev/null
+++ b/src/main/resources/edu/umd/marbl/mhap/README
@@ -0,0 +1 @@
+<empty>
\ No newline at end of file
diff --git a/src/test/resources/edu/umd/marbl/mhap/matrix/score_matrix.txt b/src/test/resources/edu/umd/marbl/mhap/matrix/score_matrix.txt
new file mode 100644
index 0000000..3171b23
--- /dev/null
+++ b/src/test/resources/edu/umd/marbl/mhap/matrix/score_matrix.txt
@@ -0,0 +1,25 @@
+ A R N B D C Q Z E G H I L K M F P S T W Y V X *
+A 5 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6
+R -6 5 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6
+N -6 -6 5 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6
+B -6 -6 -6 5 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6
+D -6 -6 -6 -6 5 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6
+C -6 -6 -6 -6 -6 5 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6
+Q -6 -6 -6 -6 -6 -6 5 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6
+Z -6 -6 -6 -6 -6 -6 -6 5 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6
+E -6 -6 -6 -6 -6 -6 -6 -6 5 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6
+G -6 -6 -6 -6 -6 -6 -6 -6 -6 5 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6
+H -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 5 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6
+I -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 5 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6
+L -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 5 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6
+K -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 5 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6
+M -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 5 -6 -6 -6 -6 -6 -6 -6 -6 -6
+F -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 5 -6 -6 -6 -6 -6 -6 -6 -6
+P -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 5 -6 -6 -6 -6 -6 -6 -6
+S -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 5 -6 -6 -6 -6 -6 -6
+T -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 5 -6 -6 -6 -6 -6
+W -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 5 -6 -6 -6 -6
+Y -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 5 -6 -6 -6
+V -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 5 -6 -6
+X -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 0 -6
+* -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 0
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/mhap.git
More information about the debian-med-commit
mailing list