[med-svn] [mhap] 06/09: Imported Upstream version 2.0+dfsg
Afif Elghraoui
afif at moszumanska.debian.org
Fri Mar 25 06:05:19 UTC 2016
This is an automated email from the git hooks/post-receive script.
afif pushed a commit to branch master
in repository mhap.
commit a4d148f2cf9d3e9a15e69803032b578dee073897
Author: Afif Elghraoui <afif at ghraoui.name>
Date: Thu Mar 24 21:54:35 2016 -0700
Imported Upstream version 2.0+dfsg
---
README.md | 25 +-
build.xml | 377 ---
docs/source/conf.py | 4 +-
docs/source/contact.rst | 2 +-
docs/source/index.rst | 2 +-
docs/source/installation.rst | 32 +-
docs/source/quickstart.rst | 52 +-
docs/source/utilities.rst | 20 +-
pom.xml | 137 +
.../marbl/mhap/align/AlignElementDoubleSketch.java | 5 +-
.../java/edu/umd/marbl/mhap/align/Aligner.java | 43 +-
.../java/edu/umd/marbl/mhap/impl/FastaData.java | 74 +-
.../java/edu/umd/marbl/mhap/impl/MatchResult.java | 2 +-
.../mhap/impl/MinHashBitSequenceSubSketches.java | 103 +-
.../edu/umd/marbl/mhap/impl/MinHashSearch.java | 25 +-
.../java/edu/umd/marbl/mhap/impl/OverlapInfo.java | 15 +-
.../java/edu/umd/marbl/mhap/impl/SequenceId.java | 14 +-
.../edu/umd/marbl/mhap/impl/SequenceSketch.java | 82 +-
.../marbl/mhap/impl/SequenceSketchStreamer.java | 10 +-
.../java/edu/umd/marbl/mhap/main/AlignmentTry.java | 7 +-
.../java/edu/umd/marbl/mhap/main/EstimateROC.java | 128 +-
.../edu/umd/marbl/mhap/main/KmerStatSimulator.java | 10 +
.../java/edu/umd/marbl/mhap/main/MhapMain.java | 151 +-
.../java/edu/umd/marbl/mhap/math/BasicMath.java | 10 +-
.../java/edu/umd/marbl/mhap/math/FastMath.java | 3379 --------------------
.../umd/marbl/mhap/sketch/AbstractBitSketch.java | 2 +-
.../java/edu/umd/marbl/mhap/sketch/BottomHash.java | 72 +
.../java/edu/umd/marbl/mhap/sketch/CountMin.java | 20 -
.../umd/marbl/mhap/sketch/OrderedNGramHashes.java | 747 +++--
...NGramHashes.java => OrderedNGramHashesOld.java} | 12 +-
.../edu/umd/marbl/mhap/utils/CharacterHash.java | 51 -
.../java/edu/umd/marbl/mhap/utils/CyclicHash.java | 72 -
.../java/edu/umd/marbl/mhap/utils/PackageInfo.java | 87 -
.../edu/umd/marbl/mhap/utils/ParseOptions.java | 2 +-
34 files changed, 1013 insertions(+), 4761 deletions(-)
diff --git a/README.md b/README.md
index 003d328..51b300a 100644
--- a/README.md
+++ b/README.md
@@ -1,19 +1,36 @@
# MHAP
-MinHash alignment process (MHAP pronounced MAP): locality sensitive hashing to detect overlaps and utilities. This is the development branch, please use the [latest tagged](https://github.com/marbl/MHAP/releases/tag/v1.0).
+MinHash alignment process (MHAP pronounced MAP): locality sensitive hashing to detect overlaps and utilities. This is the development branch, please use the [latest tagged](https://github.com/marbl/MHAP/releases/tag/v2.0).
## Build
-You must have a recent [JDK](http://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html "JDK") and [Apache ANT](http://ant.apache.org/ "ANT") available. To checkout and build run:
+You must have a recent [JDK](http://www.oracle.com/technetwork/java/javase/downloads/index.html "JDK") and [Apache Maven](http://maven.apache.org/ "MAVEN") available. To checkout and build run:
git clone https://github.com/marbl/MHAP.git
cd MHAP
- ant
+ maven install
For a quick user-quide, run:
cd target
- java -jar mhap-1.0.jar
+ java -jar mhap-2.0.jar
## Docs
For the full documentation information please see http://mhap.readthedocs.org/en/
+
+## Cite
+ - Berlin K, Koren S, Chin CS, Drake PJ, Landolin JM, Phillippy AM [Assembling Large Genomes with Single-Molecule Sequencing and Locality Sensitive Hashing](http://www.nature.com/nbt/journal/v33/n6/abs/nbt.3238.html "nb"). Nature Biotechnology. (2015).
+
+## License
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/build.xml b/build.xml
deleted file mode 100755
index 0570315..0000000
--- a/build.xml
+++ /dev/null
@@ -1,377 +0,0 @@
-<?xml version = '1.0' encoding = 'utf-8'?>
-
-<project name="MHAP" default="main" basedir=".">
-
- <!-- ========== Component Declarations ==================================== -->
-
- <property name="component.version" value="1.6"/>
-
- <!-- The name of this component -->
- <property name="copyright.name" value="Konstantin Berlin and Sergey Koren"/>
-
- <!-- The name of this component -->
- <property name="component.name" value="mhap"/>
-
- <!-- The primary package name of this component -->
- <property name="component.package" value="edu.umd.marbl.mhap"/>
-
- <!-- The title of this component -->
- <property name="component.title" value="MHAP"/>
-
- <!-- The base directory for component sources -->
- <property name="source.home" value="src/main/java"/>
-
- <!-- The base directory for component resources -->
- <property name="source.resources" value="src/main/resources"/>
-
- <!-- The base directory for opencl files -->
- <property name="source.opencl" value="src/main/opencl"/>
-
- <!-- The base directory for matlab files -->
- <property name="source.matlab" value="src/main/matlab"/>
-
- <!-- The base directory for cc files -->
- <property name="source.cc" value="src/main/cc"/>
-
- <!-- The base directory for unit test sources -->
- <property name="test.home" value="src/test/java"/>
-
- <!-- The base directory for unit test resources -->
- <property name="test.resources" value="src/test/resources"/>
-
- <!-- The base directory for compilation targets -->
- <property name="build.home" value="target"/>
-
- <!-- The base directory for distribution targets -->
- <property name="dist.home" value="${build.home}/dist"/>
-
- <!-- The directory for library containing the dependency jar files -->
- <property name="lib.dir" value="lib"/>
-
- <!-- The directory that contains the binary files -->
- <property name="bin.dir" value="bin"/>
-
- <!-- The base directory for test reports -->
- <property name="test.reports" value="${build.home}/test-reports"/>
-
- <!-- Base file name for dist files -->
- <property name="final.name" value="${component.name}-${component.version}"/>
-
- <!-- Directory where binary release files are staged -->
- <property name="stage.bin.dir" value="${dist.home}/stage-bin"/>
-
- <!-- Directory where source release files are staged -->
- <property name="stage.src.dir" value="${dist.home}/stage-src"/>
-
- <!-- ========== Compiler Defaults ========================================= -->
-
- <!-- Should Java compilations set the 'debug' compiler option? -->
- <property name="compile.debug" value="false"/>
-
- <!-- Should Java compilations set the 'deprecation' compiler option? -->
- <property name="compile.deprecation" value="false"/>
-
- <!-- Should Java compilations set the 'optimize' compiler option? -->
- <property name="compile.optimize" value="true"/>
-
- <!-- JDK level -->
- <property name="compile.source" value="1.8"/>
- <property name="compile.target" value="1.8"/>
-
- <!-- Base compile classpath -->
-
- <!-- Generate path for binary distrubtion dependencies -->
- <path id="compile.classpath">
- <fileset dir="${lib.dir}" includes="**/*.jar"/>
- <pathelement location="${build.home}/classes"/>
- </path>
-
- <!-- ========== Test Execution Defaults =================================== -->
-
-
- <!-- Construct unit test classpath -->
- <path id="test.classpath">
- <fileset dir="${lib.dir}" includes="**/*.jar"/>
- <pathelement location="${build.home}/classes"/>
- <pathelement location="${build.home}/test-classes"/>
- <pathelement location="${junit.jar}"/>
- </path>
-
- <!-- Should the build fail if there are test failures? -->
- <property name="test.failonerror" value="false"/>
-
- <!-- ========== Executable Targets ======================================== -->
-
- <target name="clean" description="Clean build and distribution directories">
- <delete dir="${build.home}"/>
- </target>
-
-
- <target name="init"
- description="Initialize and evaluate conditionals">
- <echo message="-------- ${component.title} ${component.version} --------"/>
- <filter token="name" value="${component.name}"/>
- <filter token="package" value="${component.package}"/>
- <filter token="version" value="${component.version}"/>
- <filter token="compilesource" value="${compile.source}"/>
- <filter token="compiletarget" value="${compile.target}"/>
- <tstamp/>
- <mkdir dir="${build.home}"/>
- <mkdir dir="${build.home}/classes"/>
- <mkdir dir="${build.home}/classes/${bin.dir}"/>
- <mkdir dir="${build.home}/classes/properties"/>
- <mkdir dir="${build.home}/test-classes"/>
- <copy todir="${build.home}/classes/">
- <fileset dir="${source.resources}" />
- </copy>
- </target>
-
- <!-- ========== Build Info File =========================================== -->
- <target name="buildinfo">
- <tstamp>
- <format property="builtat" pattern="MM/dd/yyyy hh:mm aa" timezone="America/New_York"/>
- </tstamp>
-
- <propertyfile file="${build.home}/classes/properties/mhap.properties"
- comment="This file is automatically generated - DO NOT EDIT">
- <entry key="buildtime" value="${builtat}"/>
- <entry key="builder" value="${whoami}"/>
- <entry key="component.version" value="${component.version}"/>
- <entry key="system" value="${buildsystem}"/>
- </propertyfile>
- </target>
-
- <!-- ========== Compile Targets =========================================== -->
-
- <target name="compile" depends="init,copy-lib,buildinfo" description="Compile">
-
- <javac srcdir="${source.home}"
- destdir="${build.home}/classes"
- source="${compile.source}"
- target="${compile.target}"
- debug="${compile.debug}"
- deprecation="${compile.deprecation}"
- includeantruntime="false"
- optimize="${compile.optimize}">
- <classpath refid="compile.classpath"/>
- </javac>
- </target>
-
-
- <!-- ========== Unit Test Targets ========================================= -->
-
- <target name="compile.tests" depends="compile" description="Compile unit tests.">
-
- <javac srcdir="${test.home}"
- destdir="${build.home}/test-classes"
- source="${compile.source}"
- target="${compile.target}"
- debug="${compile.debug}"
- deprecation="${compile.deprecation}"
- includeantruntime="false"
- optimize="${compile.optimize}">
- <classpath refid="test.classpath"/>
- </javac>
-
- <copy todir="${build.home}/test-classes">
- <fileset dir="${test.resources}">
- </fileset>
- </copy>
-
- <copy todir="${build.home}">
- <fileset dir="${source.matlab}" />
- </copy>
-
- </target>
-
- <target name="test" depends="compile.tests"
- description="Run unit tests">
- <mkdir dir="${test.reports}"/>
- <junit printsummary="true"
- errorProperty="test.failed"
- failureProperty="test.failed"
- fork="true"
- showOutput="true">
- <formatter type="plain"/>
- <classpath refid="test.classpath"/>
- <!-- If test.entry is defined, run a single test, otherwise run all valid tests -->
- <!-- N.B. test.entry must be the full path to the test class, for example:
- -->
- <test name="${test.entry}" todir="${test.reports}" if="test.entry"/>
- <batchtest todir="${test.reports}" unless="test.entry">
- <fileset dir="${test.home}">
- <include name="**/*Test.java"/>
- </fileset>
- </batchtest>
- </junit>
- <fail message="There were test failures.">
- <condition>
- <and>
- <istrue value="${test.failonerror}"/>
- <isset property="test.failed"/>
- </and>
- </condition>
- </fail>
- </target>
-
-
-
- <!-- ========== Produce JavaDocs ========================================== -->
-
- <target name="javadoc" depends="compile" description="Create component Javadoc documentation">
- <mkdir dir="${build.home}/apidocs"/>
- <tstamp>
- <format property="current.year" pattern="yyyy"/>
- </tstamp>
- <javadoc sourcepath="${source.home}"
- destdir="${build.home}/apidocs"
- packagenames="edu.umd.umiacs.armor.*"
- author="true"
- private="true"
- version="true"
- doctitle="<h1>${component.title} ${}</h1>"
- windowtitle="${component.title} ${}"
- bottom="Copyright (c) 2011-${current.year} ${copyright.name}"
- classpathref="compile.classpath">
- <link href="http://java.sun.com/j2se/1.6.0/docs/api/"/>
- </javadoc>
- </target>
-
-
- <!-- ========== Create Jar ================================================ -->
-
- <target name="jar" depends="compile" description="Create jar file">
-
- <copy file="LICENSE.txt" tofile="${build.home}/classes/META-INF/LICENSE.txt"/>
- <copy file="NOTICE.txt" tofile="${build.home}/classes/META-INF/NOTICE.txt"/>
-
- <manifest file="${build.home}/MANIFEST.MF">
- <attribute name="Specification-Title" value="${component.title}"/>
- <attribute name="Specification-Version" value="${}"/>
- <attribute name="Specification-Vendor" value="${copyright.name}"/>
- <attribute name="Implementation-Title" value="${component.title}"/>
- <attribute name="Implementation-Version" value="${}"/>
- <attribute name="Implementation-Vendor" value="${copyright.name}"/>
- <attribute name="Implementation-Vendor-Id" value=""/>
- <attribute name="X-Compile-Source-JDK" value="${compile.source}"/>
- <attribute name="X-Compile-Target-JDK" value="${compile.target}"/>
- </manifest>
-
- <jar jarfile="${build.home}/${final.name}.jar"
- basedir="${build.home}/classes"
- manifest="${build.home}/MANIFEST.MF"/>
- </target>
-
-
- <!-- ========== Distribution Target =========================================== -->
-
- <target name="dist" depends="clean,jar,javadoc" description="Create distribution artifacts">
-
- <mkdir dir="${dist.home}"/>
-
- <!-- jar(s) -->
- <copy todir="${dist.home}">
- <fileset dir=".">
- <include name="RELEASE-NOTES.txt"/>
- </fileset>
- <fileset dir="${build.home}">
- <include name="*.jar"/>
- </fileset>
- </copy>
-
- <!-- Binary Distro -->
- <mkdir dir="${stage.bin.dir}/${final.name}"/>
- <copy todir="${stage.bin.dir}/${final.name}">
- <fileset dir=".">
- <include name="LICENSE.txt"/>
- <include name="NOTICE.txt"/>
- </fileset>
- <fileset dir="${build.home}">
- <include name="*.jar"/>
- </fileset>
- </copy>
- <copy todir="${stage.bin.dir}/${final.name}/apidocs">
- <fileset dir="${build.home}/apidocs" />
- </copy>
-
- <!-- Source Distro -->
- <mkdir dir="${stage.src.dir}/${final.name}-src"/>
- <copy todir="${stage.src.dir}/${final.name}-src">
- <fileset dir=".">
- <include name="*.xml"/>
- <include name="*.txt"/>
- <include name="*.html"/>
- </fileset>
- </copy>
- <copy todir="${stage.src.dir}/${final.name}-src/src">
- <fileset dir="src" excludes="mantissa/**,experimental/**" />
- </copy>
- <zip zipfile="${dist.home}/${final.name}.zip" basedir="${stage.bin.dir}"/>
- <zip zipfile="${dist.home}/${final.name}-src.zip" basedir="${stage.src.dir}"/>
- <tar tarfile="${dist.home}/${final.name}.tar" basedir="${stage.bin.dir}" longfile="gnu"/>
- <tar tarfile="${dist.home}/${final.name}-src.tar" basedir="${stage.src.dir}" longfile="gnu"/>
- <gzip src="${dist.home}/${final.name}.tar" zipfile="${dist.home}/${final.name}.tar.gz"/>
- <gzip src="${dist.home}/${final.name}-src.tar" zipfile="${dist.home}/${final.name}-src.tar.gz"/>
-
- <!-- clean up staging directories -->
- <delete dir="${stage.bin.dir}"/>
- <delete dir="${stage.src.dir}"/>
-
- </target>
-
- <property name="params" value=""/>
- <property name="javac.exe" value="javac"/>
- <property name="java.exe" value="java"/>
-
- <target name="copy-lib">
- <mkdir dir="${build.home}/${lib.dir}"/>
- <copy todir="${build.home}/${lib.dir}">
- <fileset dir="${lib.dir}"/>
- </copy>
- </target>
-
- <path id="dep.runtime">
- <fileset dir="./${lib.dir}">
- <include name="**/*.jar" />
- </fileset>
- </path>
- <property name="dep_cp" value="${toString:dep.runtime}" />
-
- <target name="main" depends="jar-mhap">
- </target>
-
- <target name="jar-mhap" depends="clean,compile">
-
- <copy file="LICENSE.txt" tofile="${build.home}/classes/META-INF/LICENSE.txt"/>
- <copy file="NOTICE.txt" tofile="${build.home}/classes/META-INF/NOTICE.txt"/>
-
- <manifestclasspath property="lib.list" jarfile="mhap-${component.version}.jar">
- <classpath refid="compile.classpath"/>
- </manifestclasspath>
-
- <manifest file="${build.home}/MANIFEST.MF">
- <attribute name="Specification-Title" value="${component.title}"/>
- <attribute name="Specification-Version" value="${}"/>
- <attribute name="Specification-Vendor" value="${copyright.name}"/>
- <attribute name="Implementation-Title" value="${component.title}"/>
- <attribute name="Implementation-Version" value="${}"/>
- <attribute name="Implementation-Vendor" value="${copyright.name}"/>
- <attribute name="Implementation-Vendor-Id" value=""/>
- <attribute name="X-Compile-Source-JDK" value="${compile.source}"/>
- <attribute name="X-Compile-Target-JDK" value="${compile.target}"/>
- <attribute name="Main-Class" value="edu.umd.marbl.mhap.main.MhapMain" />
- <attribute name="Class-Path" value="${lib.list}" />
- </manifest>
-
- <jar jarfile="${build.home}/mhap-${component.version}.jar"
- basedir="${build.home}/classes"
- manifest="${build.home}/MANIFEST.MF"/>
-
- <tar destfile="${build.home}/mhap-${component.version}.tar"
- basedir="${build.home}"
- includes="${lib.dir}/**, mhap-${component.version}.jar"
- />
-
- </target>
-
-</project>
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
index e4f9473..0da72ed 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -54,9 +54,9 @@ copyright = u'2014, Sergey Koren and Konstantin Berlin'
# built documents.
#
# The short X.Y version.
-version = '1.0'
+version = '2.0'
# The full version, including alpha/beta/rc tags.
-release = '1.0'
+release = '2.0'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
diff --git a/docs/source/contact.rst b/docs/source/contact.rst
index 9f4c351..5367156 100644
--- a/docs/source/contact.rst
+++ b/docs/source/contact.rst
@@ -20,7 +20,7 @@ Please include information on your run::
Who to contact to report bugs, forward complaints, feature requests:
-Konstantin Berlin: kberlin at umd.edu
+Konstantin Berlin: kberlin at gmail.com
----------------------------
Sergey Koren: sergek at umd.edu
diff --git a/docs/source/index.rst b/docs/source/index.rst
index efc07ad..b37a2a9 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -10,7 +10,7 @@ sequence overlapping algorithm. Designed to efficiently detect all overlaps
between noisy long-read sequence data. It efficiently estimates Jaccard similarity
by compressing sequences to their representative fingerprints composed on min-mers (minimum k-mer).
-MHAP is included within the Celera Assembler `PBcR <http://wgs-assembler.sourceforge.net/wiki/index.php?title=PBcR>`_ pipeline. The Celera Assembler can be downloaded `here <https://sourceforge.net/projects/wgs-assembler/files/wgs-assembler/wgs-8.3/>`_.
+MHAP is included within the `Canu <http://canu.readthedocs.org/>`_ assembler. Canu can be downloaded `here <https://github.com/marbl/canu/releases>`_.
Contents:
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
index a93c238..283bc04 100644
--- a/docs/source/installation.rst
+++ b/docs/source/installation.rst
@@ -4,12 +4,14 @@ Installation
Before your start
=================
-MHAP requires a recent version of the `JVM <http://www.oracle.com/technetwork/java/javase/downloads/jre8-downloads-2133155.html>`_ (1.8+). JDK 1.7 or earlier will not work. If you would like to build the code from source, you need to have the `JDK <http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html>`_ and the `ANT <http://ant.apache.org/>`_ build system available.
+MHAP requires a recent version of the `JVM <http://www.oracle.com/technetwork/java/javase/downloads/jre7-downloads-1880261.html>`_ (1.8u6+). JDK 1.7 or earlier will not work. If you would like to build the code from source, you need to have the `JDK <http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html>`_ and the `Maven <https://maven.apache.org>`_ build system available.
Prerequisites
==============
- * java (1.8+)
- * ant (1.8.2+)
+ * java (1.8u6+)
+ * maven (3.0+)
+
+If you have not already installed the dependencies using maven, you will need an internet connection to do so during maven installation.
Here is a list of currently supported Operating Systems:
@@ -26,19 +28,19 @@ The pre-compiled version is recommended to users who want to run MHAP, without d
.. code-block:: bash
- $ wget https://github.com/marbl/MHAP/releases/download/1.6/mhap-1.6.tar.gz
+ $ wget https://github.com/marbl/MHAP/releases/download/v2.0/mhap-2.0.tar.gz
And if ``wget`` not available, you can use ``curl`` instead:
.. code-block:: bash
- $ curl -L https://github.com/marbl/MHAP/releases/download/1.6/mhap-1.6.tar.gz > mhap-1.6.tar.gz
+ $ curl -L https://github.com/marbl/MHAP/releases/download/v2.0/mhap-2.0.tar.gz > mhap-2.0.tar.gz
Then run
.. code-block:: bash
- $ tar xvzf mhap-1.6.tar.gz
+ $ tar xvzf mhap-2.0.tar.gz
Source
-----------------
@@ -47,7 +49,7 @@ To build the code from the release:
.. code-block:: bash
- $ wget https://github.com/marbl/MHAP/archive/1.6.zip
+ $ wget https://github.com/marbl/MHAP/archive/v2.0.zip
If you see a certificate not trusted error, you can add the following option to wget:
@@ -59,27 +61,27 @@ And if ``wget`` not available, you can use ``curl`` instead:
.. code-block:: bash
- $ curl -L https://github.com/marbl/MHAP/archive/1.6.zip > 1.6.zip
+ $ curl -L https://github.com/marbl/MHAP/archive/v2.0.zip > v2.0.zip
-You can also browse the https://github.com/marbl/MHAP/tree/1.6
+You can also browse the https://github.com/marbl/MHAP/tree/v2.0
and click on Downloads.
Once downloaded, extract to unpack:
.. code-block:: bash
- $ unzip 1.6.zip
+ $ unzip v2.0.zip
-Change to MHAP directory:
+Change to MetAMOS directory:
.. code-block:: bash
- $ cd MHAP-1.6
+ $ cd MHAP-2.0
-Once inside the MHAP directory, run:
+Once inside the MetAMOS directory, run:
.. code-block:: bash
- $ ant
+ $ maven install
-This will compile the program and create a target/mhap-1.6.jar file which you can use to run MHAP. The quick-start instructions assume you are in the target directory when running the program. You can also use the target/mhap-1.6.tar file to copy MHAP to a different system or directory.
+This will compile the program and create a target/mhap-2.0.jar file which you can use to run MHAP. The quick-start instructions assume you are in the target directory when running the program. You can also use the target/mhap-2.0.tar file to copy MHAP to a different system or directory. If you would like to run the `validation utilties <utilities.html>`_ you must also download and build the `SSW Library <https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library>`_. Follow the in [...]
diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index 018724c..8b95733 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -9,7 +9,7 @@ Running MHAP provides command-line documenation if you run it without parameters
.. code-block:: bash
- $ java -jar mhap-1.6.jar
+ $ java -jar mhap-2.0.jar
MHAP has two main usage modes, the main finds all overlaps between the input sequences. The second only constructs an index which can be subsequently reused.
@@ -18,9 +18,9 @@ Finding overlaps
.. code-block:: bash
- $ java -Xmx32g -server -jar mhap-1.6.jar -s<fasta/dat from/self file> [-q<fasta/dat to file or directory>] [-f<kmer filter list, must be sorted>]
+ $ java -Xmx32g -server -jar mhap-2.0.jar -s<fasta/dat from/self file> [-q<fasta/dat to file or directory>] [-f<kmer filter list, must be sorted>]
-Both the -s and -q options can accept either FastA sequences or binary dat files (generated as described below). The -q option can accept either a file or a directory, in which case all FastA/dat files in the specified directory will be used. By default, only the sequences specified by -s are indexed and the sequences in -q are streamed against the constructed index. Since MHAP is written in Java, the memory usage can be high. Generally, 32GB of RAM is sufficient to index 20K sequences. [...]
+Both the -s and -q options can accept either FastA sequences or binary dat files (generated as described below). The -q option can accept either a file or a directory, in which case all FastA/dat files in the specified directory will be used. By default, only the sequences specified by -s are indexed and the sequences in -q are streamed against the constructed index. Since MHAP is written in Java, the memory usage can be high. Generally, 32GB of RAM is sufficient to index 40K sequences. [...]
The optional -f flag provides a file of repetitive k-mers which should not be selected as min-mers. The file is a two-column tab-delimited input specifying the kmer and the fraction of total kmers the k-mer comprises. For example:
@@ -36,38 +36,44 @@ Constructing binary index
.. code-block:: bash
- $ java -Xmx32g -server -jar mhap-1.6.jar -p<directory of fasta files> -q <output directory> [-f<kmer filter list, must be sorted>]
+ $ java -Xmx32g -server -jar mhap-2.0.jar -p<directory of fasta files> -q <output directory> [-f<kmer filter list, must be sorted>]
-In this use case, files in the -p directory will be converted to binary dat files in the -q directory. Subsequent runs using the dat files (instead of FastA files) will be faster as the sequences no longer need to be indexed, only loaded into memory.
+In this use case, files in the -p directory will be converted to binary sketch files in the -q directory. Subsequent runs using these files (instead of FastA files) will be faster as the sequences no longer need to be sketched, only loaded into memory.
Output
-----------------
MHAP outputs overlaps in a format similar to BLASR's M4 format. Example output::
- [A ID] [B ID] [Jaccard score] [# shared min-mers] [0=A fwd, 1=A rc] [A start] [A end] [A length] [0=B fwd, 1=B rc] [B start] [B end] [B length]
+ [A ID] [B ID] [% error] [# shared min-mers] [0=A fwd, 1=A rc] [A start] [A end] [A length] [0=B fwd, 1=B rc] [B start] [B end] [B length]
An example of output from a small dataset is below::
- 155 11 87.83225 206 0 69 1693 1704 0 1208 2831 5871
- 155 15 85.08692 163 0 16 1041 1704 1 67 1088 2935
- 155 27 87.11507 159 0 455 1678 1704 0 0 1225 1862
+ 155 11 0.164156 206 0 69 1693 1704 0 1208 2831 5871
+ 155 15 0.157788 163 0 16 1041 1704 1 67 1088 2935
+ 155 27 0.185483 159 0 455 1678 1704 0 0 1225 1862
-In this case sequence 155 overlaps 11, 15, and 27.
+In this case sequence 155 overlaps 11, 15, and 27. The error percent is computed from the Jaccard estimate using `mash distance <http://www.biorxiv.org/content/early/2015/10/26/029827.abstract>`_.
Options
-----------------
The full list of options is available via command-line help (--help or -h). Below is a list of commonly used options.
- -k [int] K-mer size, default=16
- --num-hashes [int] Sketch size, higher=more sensitive but more memory usage and runtime, default=1024
- --num-min-matches [int] The number of hashes that maches before performing local alignment, default=3
- --pacbio_fast [boolean] Set all the parameters for the PacBio fast setting. This is the current best guidance, and could change at any time without warning, default = false
- --pacbio_sensitive [boolean] Set all the parameters for the PacBio sensitive settings. This is the current best guidance, and could change at any time without warning, default = false
- --min-store-length [int length (in bp)] The minimum sequence length to index. Sequences shorter than this are ignored in the index, default=0
- --threshold [int] The threshold for percentage of matching min-mers for a hit to be considered significant. Lowering will output more overlaps but increase false positives, higher will reduce overlaps but remove false positives, default=0.04
- --filter-threshold [double] The cutoff at which the k-mer in the k-mer filter file is considered repetitive. This value for a specific k-mer is specified in the second column in the filter file. If no filter file is provided, this option is ignored, default = 1.0E-5
- --max-shift [double] The fraction of the overlap size by which the overlap sizes in two sequences may differ, default=0.2
- --num-threads [int] The number of threads to use for computation, default (2 x #cores on system)
- --no-self Do not compute self-matches for sequences in the -s file, default=false
- --store-full-id Output full sequence ID from the input FastA file. Otherwise, the output is the position of the sequence in the file (i.e. first sequence gets ID=1, second gets ID=2, and so on), default=false
-
+ -h Displays the help menu.
+ --version Displays the version.
+ --pacbio-fast Set all the parameters for the PacBio fast setting. This is the current best guidance, and could change at any time without warning, default = false.
+ --pacbio-sensitive Set all the parameters for the PacBio sensitive settings. This is the current best guidance, and could change at any time without warning, default = false.
+ --nanopore Set all the parameters for the Nanopore settings. This is the current best guidance, and could change at any time without warning, default = false.
+ -k [int], k-mer size used for MinHashing. The k-mer size for second stage filter is seperate, default = 16.
+ --num-hashes [int], number of min-mers to be used in MinHashing, default = 512.
+ --num-min-matches [int], minimum # min-mer that must be shared before computing second stage filter. Any sequences below that value are considered non-overlapping, default = 3.
+ --threshold [double], the threshold cutoff for the second stage sort-merge filter. This is based on the identity score computed from the Jaccard distance of k-mers (size given by ordered-kmer-size) in the overlapping regions, default = 0.78.
+ --filter-threshold [double], the cutoff at which the k-mer in the k-mer filter file is considered repetitive. This value for a specific k-mer is specified in the second column in the filter file. If no filter file is provided, this option is ignored, default = 1.0E-5.
+ --weighted Perform weighted MinHashing using tf-idf scaling which biases repetitive k-mers to higher hash values. default=false.
+ --max-shift [double], region size to the left and right of the estimated overlap, as derived from the median shift and sequence length, where a k-mer matches are still considered valid. Second stage filter only, default = 0.2.
+ --min-store-length [int], The minimum length of the read that is stored in the box. Used to filter out short reads from FASTA file, default = 0.
+ --no-self Do not compute the overlaps between sequences inside a box. Should be used when the to and from sequences are coming from different files, default = false.
+ --num-threads [int], number of threads to use for computation. Typically set to #cores, , default = 8.
+ --ordered-kmer-size [int], The size of k-mers used in the ordered second stage filter, , default = 12.
+ --ordered-sketch-size [int], The sketch size for second stage filter, default = 1536.
+ --store-full-id Store full IDs as seen in FASTA file, rather than storing just the sequence position in the file. Some FASTA files have long IDS, slowing output of results. This options is ignored when using compressed file format, default = false.
+ -f [string], k-mer filter file used for filtering out highly repetative k-mers. Must be sorted in descending order of frequency (second column), default = "".
diff --git a/docs/source/utilities.rst b/docs/source/utilities.rst
index d58ed1e..59b9470 100644
--- a/docs/source/utilities.rst
+++ b/docs/source/utilities.rst
@@ -14,9 +14,21 @@ Assuming you have a mapping of sequences to a truth (such as a reference genome)
.. code-block:: bash
- $ java -cp mhap-1.6.jar edu.umd.marbl.mhap.main.EstimateROC <reference mapping M4> <overlaps M4/MHAP> <fasta of sequences> [minimum overlap length to evaluate] [number of random trials] [use dynamic programming] [verbose]
+ $ java -cp mhap-2.0.jar edu.umd.marbl.mhap.main.EstimateROC <reference mapping M4> <overlaps M4/MHAP> <fasta of sequences> [minimum overlap length to evaluate] [number of random trials] [use dynamic programming] [verbose]
-The default minimum overlap length is 2000 and default number of trials is 10000. This will estimate sensitivity/specificity to within 1%. It can be increased at the expense of runtime. Specifying 0 will examine all possible N^2 overlap pairs. If the dynamic programming is turned on (by typing true for the parameter), overlaps not present in the reference mapping will be confirmed if a Smith-Watermann alignment can identify the overlap specified.
+The default minimum overlap length is 2000 and default number of trials is 10000. This will estimate sensitivity/specificity to within 1%. It can be increased at the expense of runtime. Specifying 0 will examine all possible N^2 overlap pairs.
+
+If the dynamic programming is turned on (by typing true for the parameter), overlaps not present in the reference mapping will be confirmed if a Smith-Watermann alignment can identify the overlap specified. This step requires the `SSW Library <https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library>`_ to be separately compiled and installed:
+
+.. code-block:: bash
+
+ $ wget https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library/archive/master.zip
+ $ unzip master.gip && cd Complete-Striped-Smith-Waterman-Library-master/src
+ $ make all
+ $ cd /full/path/to/mhap/target/lib
+ $ ln -s /full/path/to/Complete-Striped-Smith-Waterman-Library-master/src/libsswjni.so
+
+Now, you can run the EstimateROC command above.
Simulating Data
-----------------
@@ -25,12 +37,12 @@ MHAP includes a tool to simulate sequencing data with random error as well as es
.. code-block:: bash
- $ java -cp mhap-1.6.jar edu.umd.marbl.mhap.main.KmerStatSimulator <# sequences> <sequence length (bp)> <insertion error rate> <deletion error rate> <substitution error rate> [reference genome]
+ $ java -cp mhap-2.0.jar edu.umd.marbl.mhap.main.KmerStatSimulator <# sequences> <sequence length (bp)> <insertion error rate> <deletion error rate> <substitution error rate> [reference genome]
The error rates must be between 0 and 1 and are additive. Specifying 10% insertion, 2% deletion, and 1% substitution will result in sequences with a 13% error rate. If no reference sequence is given, completely random sequences are generated and errors added. Otherwise, random sequences are drawn from the reference and errors added. Errors are added randomly with no bias.
.. code-block:: bash
- $ java -cp mhap-1.6.jar edu.umd.marbl.mhap.main.KmerStatSimulator <# trials> <kmer size> <sequence length> <overlap length> <insertion error rate> <deletion error rate> <substitution error rate> [one-sided error] [reference genome] [kmer filter]
+ $ java -cp mhap-2.0.jar edu.umd.marbl.mhap.main.KmerStatSimulator <# trials> <kmer size> <sequence length> <overlap length> <insertion error rate> <deletion error rate> <substitution error rate> [one-sided error] [reference genome] [kmer filter]
This usage will output a distribution of Jaccard similarity between a pair of overlapping sequences with the specified error rate (when using the specified k-mer size) and two random sequences of the same length. If no reference sequence is given, completely random sequences are generated and errors added, otherwise sequences are drawn from the reference. When one-sided error is specified (by typing true for the parameter), only one of the two sequences will have error simulated, matchin [...]
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..363549c
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,137 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>mhap</groupId>
+ <artifactId>mhap</artifactId>
+ <version>2.0</version>
+ <name>MinHash Alignment Process</name>
+ <build>
+ <resources>
+ <resource>
+ <directory>src/main/java</directory>
+ <excludes>
+ <exclude>**/*.java</exclude>
+ </excludes>
+ </resource>
+ </resources>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>3.1</version>
+ <configuration>
+ <source>1.8</source>
+ <target>1.8</target>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>copy-dependencies</goal>
+ </goals>
+ <configuration>
+ <outputDirectory>${project.build.directory}/lib</outputDirectory>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>2.4.2</version>
+ <executions>
+ <execution>
+ <phase>install</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <minimizeJar>true</minimizeJar>
+ <transformers>
+ <transformer
+ implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+ <mainClass>edu.umd.marbl.mhap.main.MhapMain</mainClass>
+ </transformer>
+ </transformers>
+ </configuration>
+ </execution>
+ </executions>
+ <configuration>
+ <filters>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/*.SF</exclude>
+ <exclude>META-INF/*.DSA</exclude>
+ <exclude>META-INF/*.RSA</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <version>2.4</version>
+ <configuration>
+ <archive>
+ <manifest>
+ <addDefaultImplementationEntries>true</addDefaultImplementationEntries>
+ <addClasspath>true</addClasspath>
+ <classpathPrefix>lib</classpathPrefix>
+ <mainClass>edu.umd.marbl.mhap.main.MhapMain</mainClass>
+ </manifest>
+ </archive>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>3.3</version>
+ <configuration>
+ <source />
+ <target />
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+ <repositories>
+ <repository>
+ <id>in-project</id>
+ <name>In Project Repo</name>
+ <url>file://${project.basedir}/lib</url>
+ </repository>
+ </repositories>
+ <dependencies>
+ <dependency>
+ <groupId>it.unimi.dsi</groupId>
+ <artifactId>fastutil</artifactId>
+ <version>7.0.9</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-compress</artifactId>
+ <version>1.10</version>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <version>19.0</version>
+ </dependency>
+ <dependency>
+ <groupId>com.jaligner</groupId>
+ <artifactId>jaligner</artifactId>
+ <version>1.0</version>
+ </dependency>
+ <dependency>
+ <groupId>com.ssw</groupId>
+ <artifactId>ssw</artifactId>
+ <version>1.0</version>
+ </dependency>
+ </dependencies>
+ <url>https://github.com/marbl/MHAP</url>
+ <description>MinHash alignment process (MHAP pronounced MAP): locality sensitive hashing to detect overlaps and utilities.</description>
+</project>
\ No newline at end of file
diff --git a/src/main/java/edu/umd/marbl/mhap/align/AlignElementDoubleSketch.java b/src/main/java/edu/umd/marbl/mhap/align/AlignElementDoubleSketch.java
index a50c6ee..a6ee2ed 100644
--- a/src/main/java/edu/umd/marbl/mhap/align/AlignElementDoubleSketch.java
+++ b/src/main/java/edu/umd/marbl/mhap/align/AlignElementDoubleSketch.java
@@ -54,7 +54,7 @@ public final class AlignElementDoubleSketch<T extends Sketch<T>> implements Alig
int b2 = alignment.getB2()*2;
if (alignment.getScore()<0.0)
- return new OverlapInfo(0.0, 0.0, 0, 0, 0, 0);
+ return new OverlapInfo(0.0, 0.0, a1, a2, b1, b2);
int offsetStart = similarityOffset(b, alignment.getA1(), alignment.getB1());
int offsetEnd = similarityOffset(b, alignment.getA2(), alignment.getB2());
@@ -79,8 +79,11 @@ public final class AlignElementDoubleSketch<T extends Sketch<T>> implements Alig
double score = alignment.getScore();
//int overlapSize = Math.max(a2-a1, b2-b1);
+ //if (overlapSize<2000)
+ // return new OverlapInfo(0.0, 0.0, 0, 0, 0, 0);
//double relOverlapSize = (double)overlapSize/(double)this.stepSize;
//score = score/relOverlapSize;
+
return new OverlapInfo(score/100000.0, score, a1, a2, b1, b2);
}
diff --git a/src/main/java/edu/umd/marbl/mhap/align/Aligner.java b/src/main/java/edu/umd/marbl/mhap/align/Aligner.java
index 3a03e88..38da943 100644
--- a/src/main/java/edu/umd/marbl/mhap/align/Aligner.java
+++ b/src/main/java/edu/umd/marbl/mhap/align/Aligner.java
@@ -184,13 +184,8 @@ public final class Aligner<S extends AlignElement<S>>
{
//figure out the path
ArrayList<Alignment.Operation> backOperations = new ArrayList<>(a.length()+b.length());
- int i = a.length();
- while (i > maxI) {
- backOperations.add(Operation.DELETE);
- i--;
- }
- i = maxI;
+ int i = maxI;
int j = maxJ;
while (i>0 && j>0)
{
@@ -243,16 +238,16 @@ public final class Aligner<S extends AlignElement<S>>
float sim = (float)a.similarityScore(b, i-1, j-1)+this.scoreOffset;
P[i][j] = Math.max(D[i-1][j]+this.gapOpen, D[i][j-1]+this.gapOpen);
- D[i][j] = D[i-1][j-1]+sim;
+ D[i][j] = S[i-1][j-1]+sim;
S[i][j] = Math.max(P[i][j], D[i][j]);
if (i==a.length())
- S[i][j] = Math.max(S[i][j], P[i][j-1]+this.gapOpen);
+ S[i][j] = Math.max(S[i][j], S[i][j-1]);
if (j==b.length())
- S[i][j] = Math.max(S[i][j], P[i-1][j]+this.gapOpen);
+ S[i][j] = Math.max(S[i][j], S[i-1][j]);
- if (S[i][j] > maxValue && (i==a.length() || j==b.length()))
+ if (S[i][j] > maxValue && (i==a.length() || j==b.length()))
{
maxValue = S[i][j];
maxI = i;
@@ -314,7 +309,33 @@ public final class Aligner<S extends AlignElement<S>>
return new Alignment<S>(a, b, a1, a2, b1, b2, score, this.gapOpen, backOperations);
}
+ else
+ {
+ int i = maxI;
+ int j = maxJ;
+ while (i>0 && j>0)
+ {
+ if (S[i-1][j]>S[i][j-1] && S[i-1][j]>S[i-1][j-1])
+ {
+ i--;
+ }
+ else
+ if (S[i][j-1]>S[i-1][j-1])
+ {
+ j--;
+ }
+ else
+ {
+ i--;
+ j--;
+ }
+ }
+
+ a1 = i;
+ b1 = j;
+
+ return new Alignment<S>(a, b, a1, a2, b1, b2, score, this.gapOpen, null);
+ }
- return new Alignment<S>(a, b, a1, a2, b1, b2, score, this.gapOpen, null);
}
}
diff --git a/src/main/java/edu/umd/marbl/mhap/impl/FastaData.java b/src/main/java/edu/umd/marbl/mhap/impl/FastaData.java
index 2cf2f6d..0e7f65a 100644
--- a/src/main/java/edu/umd/marbl/mhap/impl/FastaData.java
+++ b/src/main/java/edu/umd/marbl/mhap/impl/FastaData.java
@@ -40,7 +40,7 @@ import edu.umd.marbl.mhap.utils.Utils;
public class FastaData implements Cloneable
{
private final BufferedReader fileReader;
- private final int offset;
+ private final long offset;
private String lastLine;
private AtomicLong numberProcessed;
private boolean readFullFile;
@@ -59,7 +59,7 @@ public class FastaData implements Cloneable
this.offset = 0;
}
- public FastaData(String file, int offset) throws IOException
+ public FastaData(String file, long offset) throws IOException
{
try
{
@@ -124,6 +124,9 @@ public class FastaData implements Cloneable
private boolean enqueueNextSequenceInFile() throws IOException
{
+ StringBuilder fastaSeq = new StringBuilder();
+ String header = null;
+
synchronized (this.fileReader)
{
if (this.readFullFile)
@@ -148,45 +151,53 @@ public class FastaData implements Cloneable
throw new MhapRuntimeException("Next sequence does not start with >. Invalid format.");
// process the current header
- String header = null;
if (SequenceId.STORE_FULL_ID)
header = this.lastLine.substring(1).split("[\\s,]+", 2)[0];
//read the first line of the sequence
this.lastLine = this.fileReader.readLine();
- StringBuilder fastaSeq = new StringBuilder();
while (true)
{
- if (this.lastLine == null || this.lastLine.startsWith(">"))
+ if (this.lastLine!=null && !this.lastLine.startsWith(">"))
{
- //generate sequence id
- SequenceId id;
- if (SequenceId.STORE_FULL_ID)
- id = new SequenceId(this.numberProcessed.intValue() + this.offset + 1, true, header);
- else
- id = new SequenceId(this.numberProcessed.intValue() + this.offset + 1);
-
- Sequence seq = new Sequence(fastaSeq.toString().toUpperCase(Locale.ENGLISH), id);
-
- // enqueue sequence
- this.sequenceList.add(seq);
- this.numberProcessed.getAndIncrement();
-
- if (this.lastLine == null)
- {
- this.fileReader.close();
- this.readFullFile = true;
- }
-
- return true;
+ // append the last line
+ fastaSeq.append(this.lastLine);
+ this.lastLine = this.fileReader.readLine();
}
-
- // append the last line
- fastaSeq.append(this.lastLine);
- this.lastLine = this.fileReader.readLine();
+ else
+ if (this.lastLine == null)
+ {
+ this.fileReader.close();
+ this.readFullFile = true;
+ break;
+ }
+ else
+ break;
}
+ }
+
+ String fastaSeqSring = fastaSeq.toString();
+ if (!fastaSeqSring.isEmpty())
+ {
+ long index = this.numberProcessed.incrementAndGet();
+
+ //generate sequence id
+ SequenceId id;
+ if (SequenceId.STORE_FULL_ID)
+ id = new SequenceId(index + this.offset, true, header);
+ else
+ id = new SequenceId(index + this.offset);
+
+ Sequence seq = new Sequence(fastaSeq.toString().toUpperCase(Locale.ENGLISH), id);
+
+ // enqueue sequence
+ this.sequenceList.add(seq);
+
+ return true;
}
+ else
+ return false;
}
@@ -214,7 +225,10 @@ public class FastaData implements Cloneable
public boolean isEmpty()
{
- return this.sequenceList.isEmpty() && this.readFullFile;
+ synchronized (this.fileReader)
+ {
+ return this.sequenceList.isEmpty() && this.readFullFile;
+ }
}
/*
diff --git a/src/main/java/edu/umd/marbl/mhap/impl/MatchResult.java b/src/main/java/edu/umd/marbl/mhap/impl/MatchResult.java
index 3848873..1042613 100644
--- a/src/main/java/edu/umd/marbl/mhap/impl/MatchResult.java
+++ b/src/main/java/edu/umd/marbl/mhap/impl/MatchResult.java
@@ -100,7 +100,7 @@ public final class MatchResult implements Comparable<MatchResult>
return String.format("%s %s %.6f %.6f %d %d %d %d %d %d %d %d",
getFromId().getHeader(),
getToId().getHeader(),
- (1.0-getScore())*100.0,
+ 1.0-getScore(),
this.rawScore,
getFromId().isForward() ? 0 : 1,
this.a1,
diff --git a/src/main/java/edu/umd/marbl/mhap/impl/MinHashBitSequenceSubSketches.java b/src/main/java/edu/umd/marbl/mhap/impl/MinHashBitSequenceSubSketches.java
index 004feac..24aa93b 100644
--- a/src/main/java/edu/umd/marbl/mhap/impl/MinHashBitSequenceSubSketches.java
+++ b/src/main/java/edu/umd/marbl/mhap/impl/MinHashBitSequenceSubSketches.java
@@ -32,80 +32,15 @@ import java.io.DataInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.nio.ByteBuffer;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.Map.Entry;
-
import edu.umd.marbl.mhap.align.AlignElementDoubleSketch;
import edu.umd.marbl.mhap.align.Aligner;
-import edu.umd.marbl.mhap.sketch.HashUtils;
import edu.umd.marbl.mhap.sketch.MinHashBitSketch;
-import edu.umd.marbl.mhap.sketch.SketchRuntimeException;
-import edu.umd.marbl.mhap.utils.HitCounter;
+import edu.umd.marbl.mhap.sketch.MinHashSketch;
public final class MinHashBitSequenceSubSketches
{
private final AlignElementDoubleSketch<MinHashBitSketch> alignmentSketch;
- private final static int[] computeNgramMinHashesWeighted(String seq, final int nGramSize, final int numHashes)
- {
- final int numberNGrams = seq.length() - nGramSize + 1;
-
- if (numberNGrams < 1)
- throw new SketchRuntimeException("N-gram size bigger than string length.");
-
- // get the kmer hashes
- final long[] kmerHashes = HashUtils.computeSequenceHashesLong(seq, nGramSize, 0);
-
- //now compute the counts of occurance
- HashMap<Long, HitCounter> hitMap = new LinkedHashMap<>(kmerHashes.length);
- int maxCount = 0;
- for (long kmer : kmerHashes)
- {
- HitCounter counter = hitMap.get(kmer);
- if (counter==null)
- {
- counter = new HitCounter(1);
- hitMap.put(kmer, counter);
- }
- else
- counter.addHit();
-
- if (maxCount<counter.count)
- maxCount = counter.count;
- }
-
- int[] best = new int[numHashes];
- Arrays.fill(best, Integer.MAX_VALUE);
-
- for (Entry<Long, HitCounter> kmer : hitMap.entrySet())
- {
- long key = kmer.getKey();
- int weight = kmer.getValue().count;
-
- //set the initial shift value
- int x = (int)key;
- for (int word = 0; word < numHashes; word++)
- {
- for (int count = 0; count<weight; count++)
- {
- // XORShift Random Number Generators
- x ^= (x << 21);
- x ^= (x >>> 35);
- x ^= (x << 4);
-
- int intX = (int)x;
-
- if (intX < best[word])
- best[word] = intX;
- }
- }
- }
-
- return best;
- }
-
public final static MinHashBitSketch[] computeSequences(String seq, int nGramSize, int stepSize, int numWords)
{
int remainder = seq.length()%stepSize;
@@ -125,7 +60,7 @@ public final class MinHashBitSequenceSubSketches
int currStart = Math.max(0, end-stepSize);
//compute minhashes
- int[] sketch = computeNgramMinHashesWeighted(seq.substring(currStart, end), nGramSize, numWords*64);
+ int[] sketch = new MinHashSketch(seq.substring(currStart, end), nGramSize, numWords*64).getMinHashArray();
sequence[iter] = new MinHashBitSketch(sketch);
@@ -140,43 +75,27 @@ public final class MinHashBitSequenceSubSketches
int remainder = seq.length()%stepSize;
//get number of sequence
- int numSequence = (seq.length()-remainder)/stepSize;
+ int numSequence = (seq.length()-remainder)/stepSize-1;
- if (remainder>0)
+ //make sure big engough
+ if (remainder>=stepSize/2 && remainder>=nGramSize)
numSequence++;
//make sketches out of them
int start = 0;
- int[][] sketches = new int[numSequence][numWords*64];
+ MinHashBitSketch[] sketches = new MinHashBitSketch[numSequence];
for (int iter=0; iter<numSequence; iter++)
{
- int end = Math.min(seq.length(), start+stepSize);
- int currStart = Math.max(0, end-stepSize);
+ int end = Math.min(seq.length(), start+stepSize*2);
+ int currStart = Math.max(0, end-stepSize*2);
//compute minhashes
- sketches[iter] = computeNgramMinHashesWeighted(seq.substring(currStart, end), nGramSize, numWords*64);
+ sketches[iter] = new MinHashBitSketch(new MinHashSketch(seq.substring(currStart, end), nGramSize, numWords*64).getMinHashArray());
start += stepSize;
}
- MinHashBitSketch[] sequence = new MinHashBitSketch[numSequence];
- for (int iter=0; iter<sketches.length; iter++)
- {
- //now convert in sequence double the length
- if ((iter+1)<sketches.length)
- {
- sequence[iter] = new MinHashBitSketch(union(sketches[iter], sketches[iter+1]));
- if ((iter+2)<sketches.length)
- sequence[iter+1] = new MinHashBitSketch(union(sketches[iter+1], sketches[iter+2]));
- else
- sequence[iter+1] = new MinHashBitSketch(sketches[iter+1]);
- }
- else
- sequence[iter] = new MinHashBitSketch(sketches[iter]);
-
- }
-
- return sequence;
+ return sketches;
}
public OverlapInfo getOverlapInfo(Aligner<AlignElementDoubleSketch<MinHashBitSketch>> aligner, MinHashBitSequenceSubSketches b)
@@ -223,6 +142,7 @@ public final class MinHashBitSequenceSubSketches
this.alignmentSketch = new AlignElementDoubleSketch<>(computeSequencesDouble(seq, kmerSize, stepSize, numWords), stepSize, seq.length());
}
+ /*
private static int[] union(int[] minHashes1, int[] minHashes2)
{
int[] newHashes = new int[minHashes1.length];
@@ -232,6 +152,7 @@ public final class MinHashBitSequenceSubSketches
return newHashes;
}
+ */
public byte[] getAsByteArray()
{
diff --git a/src/main/java/edu/umd/marbl/mhap/impl/MinHashSearch.java b/src/main/java/edu/umd/marbl/mhap/impl/MinHashSearch.java
index 9073477..1ea3f2b 100644
--- a/src/main/java/edu/umd/marbl/mhap/impl/MinHashSearch.java
+++ b/src/main/java/edu/umd/marbl/mhap/impl/MinHashSearch.java
@@ -39,9 +39,6 @@ import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.atomic.AtomicLong;
-import edu.umd.marbl.mhap.align.AlignElementDoubleSketch;
-import edu.umd.marbl.mhap.align.Aligner;
-import edu.umd.marbl.mhap.sketch.MinHashBitSketch;
import edu.umd.marbl.mhap.sketch.MinHashSketch;
import edu.umd.marbl.mhap.utils.HitCounter;
@@ -56,16 +53,12 @@ public final class MinHashSearch extends AbstractMatchSearch
private final int minStoreLength;
private final AtomicLong numberElementsProcessed;
- private final Aligner<AlignElementDoubleSketch<MinHashBitSketch>> aligner;
-
private final AtomicLong numberSequencesFullyCompared;
private final AtomicLong numberSequencesHit;
private final AtomicLong numberSequencesMinHashed;
private final int numMinMatches;
- private final double alignmentScore;
private final Map<SequenceId, SequenceSketch> sequenceVectorsHash;
-
public MinHashSearch(SequenceSketchStreamer data, int numHashes, int numMinMatches, int numThreads,
boolean storeResults, int minStoreLength, double maxShift, double acceptScore, double alignmentOffset, double alignmentScore) throws IOException
@@ -86,10 +79,6 @@ public final class MinHashSearch extends AbstractMatchSearch
// enqueue full file, since have to know full size
data.enqueueFullFile(false, this.numThreads);
- //store the bit aligner
- this.aligner = new Aligner<AlignElementDoubleSketch<MinHashBitSketch>>(true, 0.0, 0.0, alignmentOffset);
- this.alignmentScore = alignmentScore;
-
//this.sequenceVectorsHash = new HashMap<>(data.getNumberProcessed());
this.sequenceVectorsHash = new Object2ObjectOpenHashMap<>(data.getNumberProcessed());
@@ -240,18 +229,8 @@ public final class MinHashSearch extends AbstractMatchSearch
continue;
//compute the direct hash score
- OverlapInfo result;
- boolean accept;
- if (seqHashes.useAlignment())
- {
- result = seqHashes.getAlignmentSequence().getOverlapInfo(this.aligner, matchedHashes.getAlignmentSequence());
- accept = result.rawScore>this.alignmentScore;
- }
- else
- {
- result = seqHashes.getOrderedHashes().getOverlapInfo(matchedHashes.getOrderedHashes(), this.maxShift);
- accept = result.score >= this.acceptScore;
- }
+ OverlapInfo result = seqHashes.getOrderedHashes().getOverlapInfo(matchedHashes.getOrderedHashes(), this.maxShift);
+ boolean accept = result.score >= this.acceptScore;
//increment the counter
this.numberSequencesFullyCompared.getAndIncrement();
diff --git a/src/main/java/edu/umd/marbl/mhap/impl/OverlapInfo.java b/src/main/java/edu/umd/marbl/mhap/impl/OverlapInfo.java
index ac0f101..ef135db 100644
--- a/src/main/java/edu/umd/marbl/mhap/impl/OverlapInfo.java
+++ b/src/main/java/edu/umd/marbl/mhap/impl/OverlapInfo.java
@@ -30,12 +30,14 @@ package edu.umd.marbl.mhap.impl;
public final class OverlapInfo
{
- public final double score;
- public final double rawScore;
public final int a1;
- public final int b1;
public final int a2;
+ public final int b1;
public final int b2;
+ public final double rawScore;
+ public final double score;
+
+ public static OverlapInfo EMPTY = new OverlapInfo(0.0, 0.0, 0, 0, 0, 0);
public OverlapInfo(double score, double rawScore, int a1, int a2, int b1, int b2)
{
@@ -46,7 +48,7 @@ public final class OverlapInfo
this.b1 = b1;
this.b2 = b2;
}
-
+
/* (non-Javadoc)
* @see java.lang.Object#toString()
*/
@@ -55,10 +57,5 @@ public final class OverlapInfo
{
return "[score="+this.score+", a1="+this.a1+" a2="+this.a2+", b1="+this.b1+" b2="+this.b2+"]";
}
-
- public String toBlasrString()
- {
- return ""+this.score+", "+this.a1+", "+this.a2+", "+this.b1+", "+this.b2;
- }
}
diff --git a/src/main/java/edu/umd/marbl/mhap/impl/SequenceId.java b/src/main/java/edu/umd/marbl/mhap/impl/SequenceId.java
index e07f344..672b2c1 100644
--- a/src/main/java/edu/umd/marbl/mhap/impl/SequenceId.java
+++ b/src/main/java/edu/umd/marbl/mhap/impl/SequenceId.java
@@ -37,32 +37,32 @@ public final class SequenceId implements Serializable
*
*/
private static final long serialVersionUID = 2181572437818064822L;
- private final int id;
+ private final long id;
private final boolean isFwd;
private final String strId;
public static boolean STORE_FULL_ID = false;
- public SequenceId(int id)
+ public SequenceId(long id)
{
this(id, true);
}
- public SequenceId(int id, boolean isFwd)
+ public SequenceId(long id, boolean isFwd)
{
this.id = id;
this.isFwd = isFwd;
this.strId = null;
}
- public SequenceId(int id, boolean isFwd, String strId)
+ public SequenceId(long id, boolean isFwd, String strId)
{
this.id = id;
this.isFwd = isFwd;
this.strId = strId;
}
- public SequenceId createOffset(int offset)
+ public SequenceId createOffset(long offset)
{
return new SequenceId(this.id+offset, this.isFwd, this.strId);
}
@@ -94,7 +94,7 @@ public final class SequenceId implements Serializable
return this.isFwd;
}
- public int getHeaderId()
+ public long getHeaderId()
{
return this.id;
}
@@ -113,7 +113,7 @@ public final class SequenceId implements Serializable
@Override
public int hashCode()
{
- return this.isFwd? this.id : -this.id;
+ return this.isFwd? (int)this.id : -(int)this.id;
}
/* (non-Javadoc)
diff --git a/src/main/java/edu/umd/marbl/mhap/impl/SequenceSketch.java b/src/main/java/edu/umd/marbl/mhap/impl/SequenceSketch.java
index 8465ff4..9c9dfcc 100644
--- a/src/main/java/edu/umd/marbl/mhap/impl/SequenceSketch.java
+++ b/src/main/java/edu/umd/marbl/mhap/impl/SequenceSketch.java
@@ -50,12 +50,11 @@ public final class SequenceSketch implements Serializable
private final SequenceId id;
private final MinHashSketch mainHashes;
private final OrderedNGramHashes orderedHashes;
- private final MinHashBitSequenceSubSketches alignmentSketches;
+ //private final MinHashBitSequenceSubSketches alignmentSketches;
private final int sequenceLength;
- public final static double SHIFT_CONSENSUS_PERCENTAGE = 0.75;
- public final static int BIT_SKETCH_SIZE = 16;
- public final static int SUBSEQUENCE_SIZE = 200;
+ public final static int BIT_SKETCH_SIZE = 20;
+ public final static int SUBSEQUENCE_SIZE = 50;
public final static int BIT_KMER_SIZE = 7;
public static SequenceSketch fromByteStream(DataInputStream input, int offset, boolean useAlignment) throws IOException
@@ -68,7 +67,7 @@ public final class SequenceSketch implements Serializable
boolean isFwd = input.readBoolean();
// dos.writeInt(this.id.getHeaderId());
- SequenceId id = new SequenceId(input.readInt() + offset, isFwd);
+ SequenceId id = new SequenceId(input.readLong() + offset, isFwd);
//dos.writeInt(this.sequenceLength);
int sequenceLength = input.readInt();
@@ -80,21 +79,11 @@ public final class SequenceSketch implements Serializable
throw new MhapRuntimeException("Unexpected data read error.");
OrderedNGramHashes orderedHashes = null;
- MinHashBitSequenceSubSketches alignmentSketch = null;
- if (useAlignment)
- {
- alignmentSketch = MinHashBitSequenceSubSketches.fromByteStream(input);
- if (alignmentSketch == null)
- throw new MhapRuntimeException("Unexpected data read when reading alignment sketches.");
- }
- else
- {
- orderedHashes = OrderedNGramHashes.fromByteStream(input);
- if (orderedHashes == null)
- throw new MhapRuntimeException("Unexpected data read error when reading ordered k-mers.");
- }
-
- return new SequenceSketch(id, sequenceLength, mainHashes, orderedHashes, alignmentSketch);
+ orderedHashes = OrderedNGramHashes.fromByteStream(input);
+ if (orderedHashes == null)
+ throw new MhapRuntimeException("Unexpected data read error when reading ordered k-mers.");
+
+ return new SequenceSketch(id, sequenceLength, mainHashes, orderedHashes);
}
catch (EOFException e)
@@ -103,67 +92,44 @@ public final class SequenceSketch implements Serializable
}
}
- public SequenceSketch(SequenceId id, int sequenceLength, MinHashSketch mainHashes, OrderedNGramHashes orderedHashes, MinHashBitSequenceSubSketches alignmentSketch)
+ public SequenceSketch(SequenceId id, int sequenceLength, MinHashSketch mainHashes, OrderedNGramHashes orderedHashes)
{
this.sequenceLength = sequenceLength;
this.id = id;
this.mainHashes = mainHashes;
this.orderedHashes = orderedHashes;
- this.alignmentSketches = alignmentSketch;
}
- public SequenceSketch(Sequence seq, int kmerSize, int numHashes, int orderedKmerSize, boolean storeHashes,
- FrequencyCounts kmerFilter, boolean weighted, boolean useAlignment)
+ public SequenceSketch(Sequence seq, int kmerSize, int numHashes, int orderedKmerSize, int orderedSketchSize, FrequencyCounts kmerFilter, boolean weighted, boolean useAlignment)
{
this.sequenceLength = seq.length();
this.id = seq.getId();
this.mainHashes = new MinHashSketch(seq.getSquenceString(), kmerSize, numHashes, kmerFilter, weighted);
- if (useAlignment)
- {
- this.orderedHashes = null;
- this.alignmentSketches = new MinHashBitSequenceSubSketches(seq.getSquenceString(), BIT_KMER_SIZE, SUBSEQUENCE_SIZE, BIT_SKETCH_SIZE);
- }
- else
- {
- this.orderedHashes = new OrderedNGramHashes(seq.getSquenceString(), orderedKmerSize);
- this.alignmentSketches = null;
- }
+ this.orderedHashes = new OrderedNGramHashes(seq.getSquenceString(), orderedKmerSize, orderedSketchSize);
}
public SequenceSketch createOffset(int offset)
{
- return new SequenceSketch(this.id.createOffset(offset), this.sequenceLength, this.mainHashes, this.orderedHashes, this.alignmentSketches);
+ return new SequenceSketch(this.id.createOffset(offset), this.sequenceLength, this.mainHashes, this.orderedHashes);
}
public byte[] getAsByteArray()
{
- byte[] mainHashesBytes = this.mainHashes.getAsByteArray();
-
- byte[] orderedHashesBytes = null;
- byte[] alignmentSketchesBytes = null;
- if (this.orderedHashes!=null)
- orderedHashesBytes = this.orderedHashes.getAsByteArray();
- if (this.alignmentSketches!=null)
- alignmentSketchesBytes = alignmentSketches.getAsByteArray();
-
- //get size
+ byte[] mainHashesBytes = this.mainHashes.getAsByteArray();
+ byte[] orderedHashesBytes = this.orderedHashes.getAsByteArray();
- ByteArrayOutputStream bos = new ByteArrayOutputStream(mainHashesBytes.length
- +(orderedHashesBytes==null ? 0 : orderedHashesBytes.length)
- +(alignmentSketchesBytes==null ? 0 : alignmentSketchesBytes.length));
+ //get size
+ ByteArrayOutputStream bos = new ByteArrayOutputStream(mainHashesBytes.length+orderedHashesBytes.length);
DataOutputStream dos = new DataOutputStream(bos);
try
{
dos.writeBoolean(this.id.isForward());
- dos.writeInt(this.id.getHeaderId());
+ dos.writeLong(this.id.getHeaderId());
dos.writeInt(this.sequenceLength);
dos.write(mainHashesBytes);
- if (orderedHashesBytes!=null)
- dos.write(orderedHashesBytes);
- if (alignmentSketchesBytes!=null)
- dos.write(alignmentSketchesBytes);
+ dos.write(orderedHashesBytes);
dos.flush();
return bos.toByteArray();
@@ -174,11 +140,6 @@ public final class SequenceSketch implements Serializable
}
}
- public boolean useAlignment()
- {
- return this.alignmentSketches!=null;
- }
-
public MinHashSketch getMinHashes()
{
return this.mainHashes;
@@ -189,11 +150,6 @@ public final class SequenceSketch implements Serializable
return this.orderedHashes;
}
- public MinHashBitSequenceSubSketches getAlignmentSequence()
- {
- return this.alignmentSketches;
- }
-
public SequenceId getSequenceId()
{
return this.id;
diff --git a/src/main/java/edu/umd/marbl/mhap/impl/SequenceSketchStreamer.java b/src/main/java/edu/umd/marbl/mhap/impl/SequenceSketchStreamer.java
index 42e243d..5b1f603 100644
--- a/src/main/java/edu/umd/marbl/mhap/impl/SequenceSketchStreamer.java
+++ b/src/main/java/edu/umd/marbl/mhap/impl/SequenceSketchStreamer.java
@@ -64,6 +64,7 @@ public class SequenceSketchStreamer
private final boolean useAlignment;
private final int orderedKmerSize;
+ private final int orderedSketchSize;
private boolean readClosed;
private final boolean readingFasta;
private final ConcurrentLinkedQueue<SequenceSketch> sequenceHashList;
@@ -80,6 +81,7 @@ public class SequenceSketchStreamer
this.kmerSize = 0;
this.numHashes = 0;
this.orderedKmerSize = 0;
+ this.orderedSketchSize = 0;
this.readClosed = false;
this.offset = offset;
this.useAlignment = useAlignment;
@@ -87,19 +89,20 @@ public class SequenceSketchStreamer
this.buffInput = new DataInputStream(new BufferedInputStream(new FileInputStream(file), Utils.BUFFER_BYTE_SIZE));
}
- public SequenceSketchStreamer(String file, int kmerSize, int numHashes, int orderedKmerSize,
+ public SequenceSketchStreamer(String file, int kmerSize, int numHashes, int orderedKmerSize, int orderedSketchSize,
FrequencyCounts kmerFilter, boolean weighted, int offset, boolean useAlignment) throws IOException
{
this.fastaData = new FastaData(file, offset);
this.readingFasta = true;
this.sequenceHashList = new ConcurrentLinkedQueue<SequenceSketch>();
this.numberProcessed = new AtomicLong();
-
this.weighted = weighted;
+
this.kmerFilter = kmerFilter;
this.kmerSize = kmerSize;
this.numHashes = numHashes;
this.orderedKmerSize = orderedKmerSize;
+ this.orderedSketchSize = orderedSketchSize;
this.buffInput = null;
this.readClosed = false;
this.offset = offset;
@@ -225,7 +228,7 @@ public class SequenceSketchStreamer
public SequenceSketch getSketch(Sequence seq)
{
// compute the hashes
- return new SequenceSketch(seq, this.kmerSize, this.numHashes, this.orderedKmerSize, false, this.kmerFilter, this.weighted, this.useAlignment);
+ return new SequenceSketch(seq, this.kmerSize, this.numHashes, this.orderedKmerSize, this.orderedSketchSize, this.kmerFilter, this.weighted, this.useAlignment);
}
public int getNumberProcessed()
@@ -266,7 +269,6 @@ public class SequenceSketchStreamer
// allocate the array
byteArray = buf.getBuffer(byteSize);
- // byteArray = new byte[byteSize];
// read that many bytes
this.buffInput.read(byteArray, 0, byteSize);
diff --git a/src/main/java/edu/umd/marbl/mhap/main/AlignmentTry.java b/src/main/java/edu/umd/marbl/mhap/main/AlignmentTry.java
index a81f838..f5009e5 100644
--- a/src/main/java/edu/umd/marbl/mhap/main/AlignmentTry.java
+++ b/src/main/java/edu/umd/marbl/mhap/main/AlignmentTry.java
@@ -35,7 +35,6 @@ import edu.umd.marbl.mhap.align.Alignment;
import edu.umd.marbl.mhap.impl.MinHashBitSequenceSubSketches;
import edu.umd.marbl.mhap.impl.OverlapInfo;
import edu.umd.marbl.mhap.sketch.MinHashBitSketch;
-import edu.umd.marbl.mhap.sketch.OrderedNGramHashes;
import edu.umd.marbl.mhap.utils.RandomSequenceGenerator;
public class AlignmentTry
@@ -90,14 +89,16 @@ public class AlignmentTry
System.exit(1);
- OrderedNGramHashes hashes1 = new OrderedNGramHashes(a, 10);
- OrderedNGramHashes hashes2 = new OrderedNGramHashes(b, 10);
+ //OrderedNGramHashes hashes1 = new OrderedNGramHashes(a, 10, 1024);
+ //OrderedNGramHashes hashes2 = new OrderedNGramHashes(b, 10, 1024);
+ /*
System.err.println("Ordered=");
System.err.println(hashes1.getOverlapInfo(hashes2, .2).a1);
System.err.println(hashes1.getOverlapInfo(hashes2, .2).b1);
System.err.println(hashes1.getOverlapInfo(hashes2, .2).a2);
System.err.println(hashes1.getOverlapInfo(hashes2, .2).b2);
+ */
/*
SimHash s1 = new SimHash(a, kmerSize, 100);
diff --git a/src/main/java/edu/umd/marbl/mhap/main/EstimateROC.java b/src/main/java/edu/umd/marbl/mhap/main/EstimateROC.java
index 4b6978f..a55be7d 100755
--- a/src/main/java/edu/umd/marbl/mhap/main/EstimateROC.java
+++ b/src/main/java/edu/umd/marbl/mhap/main/EstimateROC.java
@@ -29,13 +29,8 @@
*/
package edu.umd.marbl.mhap.main;
-import jaligner.Alignment;
-import jaligner.SmithWatermanGotoh;
-import jaligner.NeedlemanWunschGotoh;
-import jaligner.matrix.MatrixLoader;
-import jaligner.matrix.MatrixLoaderException;
-
import java.io.BufferedReader;
+import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
@@ -49,17 +44,23 @@ import java.util.Random;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.atomic.AtomicInteger;
-import java.util.logging.Level;
-import java.util.logging.Logger;
import java.util.stream.Stream;
import edu.umd.marbl.mhap.impl.FastaData;
import edu.umd.marbl.mhap.impl.Sequence;
import edu.umd.marbl.mhap.utils.IntervalTree;
import edu.umd.marbl.mhap.utils.Utils;
+import jaligner.NeedlemanWunschGotoh;
+import jaligner.SmithWatermanGotoh;
+import jaligner.matrix.MatrixLoader;
+import jaligner.matrix.MatrixLoaderException;
+import ssw.Aligner;
+
public class EstimateROC {
private static final boolean ALIGN_SW = true;
+ private static final boolean ALIGN_JALIGN = false;
+ private static int[][] MATCH_MATRIX = new int[128][128];
private static final double MIN_REF_OVERLAP_DIFFERENCE = 0.8;
private static double MIN_IDENTITY = 0.70;
private static final double REF_IDENTITY_ADJUSTMENT = 0.1;
@@ -280,6 +281,27 @@ public class EstimateROC {
}
generator = new Random(seed);
+
+ if (!EstimateROC.ALIGN_JALIGN) {
+ try {
+ File f = new File(System.getProperty("java.class.path"));
+ File dir = f.getAbsoluteFile().getParentFile();
+ String path = dir.toString();
+ System.err.println("Loaded file from path " + path);
+ System.load(path + java.io.File.separator + "lib" + java.io.File.separator + "libsswjni.so");
+
+ // now initialize matrix
+ for (int i = 0; i < 128; i++) {
+ for (int j = 0; j < 128; j++) {
+ if (i == j) MATCH_MATRIX[i][j] = 1;
+ else MATCH_MATRIX[i][j] = -1;
+ }
+ }
+ } catch (Exception e) {
+ System.err.println("Error: could not load DP library: " + e);
+ System.exit(1);
+ }
+ }
}
private static int getSequenceId(String id) {
@@ -629,11 +651,10 @@ public class EstimateROC {
}
}
- private static double getScore(Alignment alignment) {
+ private static double getScore(jaligner.Alignment alignment, int ovlLen) {
char[] sequence1 = alignment.getSequence1();
char[] sequence2 = alignment.getSequence2();
int length = Math.max(sequence1.length, sequence2.length);
- int ovlLen = Math.min(sequence1.length, sequence2.length);
char GAP = '-';
@SuppressWarnings("unused")
int errors = 0;
@@ -655,48 +676,79 @@ public class EstimateROC {
}
}
return (matches / (double)ovlLen);
+
+ }
+
+ private static double getScore(ssw.Alignment alignment, int ovlLen) {
+ // the result is a cigar string of the format 3M1I9M1I9M1D10M1I13M1I9M1D1M2D45M1D6M1D5
+ int matches = 0;
+ for (int i = 0; i < alignment.cigar.length(); i++) {
+ if (alignment.cigar.charAt(i) == 'M') {
+ // get the match length
+ int s = i-1;
+ while (s >= 0 && Character.isDigit(alignment.cigar.charAt(s))) {
+ s--;
+ }
+ s++;
+ matches += Integer.parseInt(alignment.cigar.substring(s, i));
+ }
+ }
+ return matches/ (double) ovlLen;
}
private boolean computeDP(String id, String id2) {
if (this.doDP == false) {
return false;
}
- Logger logger = null;
- if (ALIGN_SW) {
- logger = Logger.getLogger(SmithWatermanGotoh.class.getName());
- } else {
- logger = Logger.getLogger(NeedlemanWunschGotoh.class.getName());
- }
- logger.setLevel(Level.OFF);
- logger = Logger.getLogger(MatrixLoader.class.getName());
- logger.setLevel(Level.OFF);
+
Overlap ovl = this.ovlInfo.get(getOvlName(id, id2));
System.err.println("Aligning sequence " + ovl.id1 + " to " + ovl.id2 + " " + ovl.bfirst + " to " + ovl.bsecond + " and " + ovl.isFwd + " and " + ovl.afirst + " " + ovl.asecond);
- jaligner.Sequence s1 = new jaligner.Sequence(this.dataSeq[getSequenceId(ovl.id1)].getSquenceString().substring(ovl.afirst, ovl.asecond));
- jaligner.Sequence s2 = null;
+ String s1 = this.dataSeq[getSequenceId(ovl.id1)].getSquenceString().substring(ovl.afirst, ovl.asecond);
+ String s2 = null;
+
if (ovl.isFwd) {
- s2 = new jaligner.Sequence(this.dataSeq[getSequenceId(ovl.id2)].getSquenceString().substring(ovl.bfirst, ovl.bsecond));
+ s2 = this.dataSeq[getSequenceId(ovl.id2)].getSquenceString().substring(ovl.bfirst, ovl.bsecond);
} else {
- s2 = new jaligner.Sequence(Utils.rc(this.dataSeq[getSequenceId(ovl.id2)].getSquenceString().substring(ovl.bfirst, ovl.bsecond)));
+ s2 = Utils.rc(this.dataSeq[getSequenceId(ovl.id2)].getSquenceString().substring(ovl.bfirst, ovl.bsecond));
}
- Alignment alignment;
- try {
- if (ALIGN_SW) {
- alignment = SmithWatermanGotoh.align(s1, s2, MatrixLoader.load("MATCH"), 2f, 1f);
- } else {
- alignment = NeedlemanWunschGotoh.align(s1, s2, MatrixLoader.load("MATCH"), 2f, 1f);
+ int ovlLen = Math.min(s1.length(), s2.length());
+
+ double score = 0;
+ int length = 0;
+ if (ALIGN_JALIGN) {
+ jaligner.Sequence js1 = new jaligner.Sequence(s1);
+ jaligner.Sequence js2 = new jaligner.Sequence(s2);
+ jaligner.Alignment alignment = null;
+
+ try {
+ if (ALIGN_SW) {
+ alignment = SmithWatermanGotoh.align(js1, js2, MatrixLoader.load("MATCH"), 2f, 1f);
+ } else {
+ alignment = NeedlemanWunschGotoh.align(js1, js2, MatrixLoader.load("MATCH"), 2f, 1f);
+ }
+ } catch (MatrixLoaderException e) {
+ return false;
+ }
+ score = getScore(alignment, ovlLen);
+ length = alignment.getLength();
+ if (DEBUG) {
+ System.err.println(alignment.getSummary());
+ System.err.println("My score: " + score);
+ System.err.println (new jaligner.formats.Pair().format(alignment));
}
- } catch (MatrixLoaderException e) {
- return false;
}
- double score = getScore(alignment); // alignment.getIdentity() / 100;
- if (DEBUG) {
- System.err.println(alignment.getSummary());
- System.err.println("My score: " + score);
- System.err.println (new jaligner.formats.Pair().format(alignment));
+ else {
+ ssw.Alignment alignment = Aligner.align(s1.getBytes(), s2.getBytes(), MATCH_MATRIX, 2, 1, true);
+ score = getScore(alignment, ovlLen);
+ length = Math.max(alignment.read_end1-alignment.read_begin1, alignment.ref_end1 - alignment.ref_begin1);
+ if (DEBUG) {
+ System.err.println(alignment.toString());
+ System.err.println("My score: " + score);
+ }
}
- return (score > MIN_IDENTITY && alignment.getLength() > this.minOvlLen);
+
+ return (score > MIN_IDENTITY && length > this.minOvlLen);
}
private void estimateSensitivity() {
@@ -743,7 +795,7 @@ public class EstimateROC {
AtomicInteger numTP = new AtomicInteger();
- ForkJoinPool forkJoinPool = new ForkJoinPool(Runtime.getRuntime().availableProcessors()/2+1);
+ ForkJoinPool forkJoinPool = new ForkJoinPool(Runtime.getRuntime().availableProcessors());
forkJoinPool.submit(() ->
Stream.iterate(0, i->i+1).limit(this.numTrials).parallel().forEach(i-> {
diff --git a/src/main/java/edu/umd/marbl/mhap/main/KmerStatSimulator.java b/src/main/java/edu/umd/marbl/mhap/main/KmerStatSimulator.java
index 860ddc6..d873465 100644
--- a/src/main/java/edu/umd/marbl/mhap/main/KmerStatSimulator.java
+++ b/src/main/java/edu/umd/marbl/mhap/main/KmerStatSimulator.java
@@ -41,7 +41,9 @@ import java.io.BufferedReader;
import java.io.PrintStream;
import edu.umd.marbl.mhap.impl.FastaData;
+import edu.umd.marbl.mhap.sketch.BottomHash;
import edu.umd.marbl.mhap.sketch.MinHashSketch;
+import edu.umd.marbl.mhap.sketch.OrderedNGramHashes;
import edu.umd.marbl.mhap.utils.Utils;
public class KmerStatSimulator {
@@ -184,6 +186,13 @@ public class KmerStatSimulator {
}
public double compareMinHash(String first, String second) {
+ BottomHash h1 = new BottomHash(first, this.kmer, 1256);
+ BottomHash h2 = new BottomHash(second, this.kmer, 1256);
+
+ return h1.jaccard(h2);
+ }
+
+ public double compareMinHash2(String first, String second) {
MinHashSketch h1 = new MinHashSketch(first, this.kmer, 1256, null, true);
MinHashSketch h2 = new MinHashSketch(second, this.kmer, 1256, null, true);
@@ -450,6 +459,7 @@ public class KmerStatSimulator {
System.out.println(this.sharedMerCounts.get(i) + "\t"
+ this.sharedJaccard.get(i) + "\t"
+ this.sharedMinHash.get(i) + "\t"
+ + OrderedNGramHashes.jaccardToIdentity(this.sharedMinHash.get(i), this.kmer) + "\t"
+ this.randomMerCounts.get(i) + "\t"
+ this.randomJaccard.get(i) + "\t"
+ this.randomMinHash.get(i));
diff --git a/src/main/java/edu/umd/marbl/mhap/main/MhapMain.java b/src/main/java/edu/umd/marbl/mhap/main/MhapMain.java
index 0e41e6c..1d5b470 100644
--- a/src/main/java/edu/umd/marbl/mhap/main/MhapMain.java
+++ b/src/main/java/edu/umd/marbl/mhap/main/MhapMain.java
@@ -40,14 +40,16 @@ import edu.umd.marbl.mhap.impl.MinHashSearch;
import edu.umd.marbl.mhap.impl.SequenceId;
import edu.umd.marbl.mhap.impl.SequenceSketchStreamer;
import edu.umd.marbl.mhap.sketch.FrequencyCounts;
-import edu.umd.marbl.mhap.utils.PackageInfo;
import edu.umd.marbl.mhap.utils.ParseOptions;
import edu.umd.marbl.mhap.utils.Utils;
public final class MhapMain
{
private final double acceptScore;
+ private final double alignmentOffset;
+ private final double alignmentScore;
private final String inFile;
+ private final FrequencyCounts kmerFilter;
private final int kmerSize;
private final double maxShift;
private final int minStoreLength;
@@ -55,30 +57,29 @@ public final class MhapMain
private final int numHashes;
private final int numMinMatches;
protected final int numThreads;
+ private final int orderedKmerSize;
+ private final int orderedSketchSize;
private final String processFile;
private final String toFile;
- private final boolean weighted;
private final boolean useAlignment;
- private final double alignmentOffset;
- private final double alignmentScore;
-
- private final FrequencyCounts kmerFilter;
+ private final boolean weighted;
+
+ //private static final double DEFAULT_OVERLAP_ACCEPT_SCORE = 0.024;
+ private static final double DEFAULT_OVERLAP_ACCEPT_SCORE = 0.78;
- private static final double DEFAULT_ACCEPT_SCORE = 0.04;
+ private static final double DEFAULT_BIT_ALIGNMENT_MISMATCH_PANELTY = -0.530;
+
+ private static final double DEFAULT_BIT_ALIGNMENT_SCORE = 1.0e-6;
private static final double DEFAULT_FILTER_CUTOFF = 1.0e-5;
private static final int DEFAULT_KMER_SIZE = 16;
private static final double DEFAULT_MAX_SHIFT_PERCENT = 0.2;
-
+
private static final int DEFAULT_MIN_STORE_LENGTH = 0;
private static final int DEFAULT_NUM_MIN_MATCHES = 3;
-
- private static final double DEFAULT_BIT_ALIGNMENT_MISMATCH_PANELTY = -0.535;
-
- private static final double DEFAULT_BIT_ALIGNMENT_SCORE = 1.0e-6;
private static final int DEFAULT_NUM_THREADS = Runtime.getRuntime().availableProcessors();
@@ -86,6 +87,8 @@ public final class MhapMain
private static final int DEFAULT_ORDERED_KMER_SIZE = 12;
+ private static final int DEFAULT_ORDERED_SKETCH_SIZE = 1536;
+
public static void main(String[] args) throws Exception
{
// set the locale
@@ -93,7 +96,7 @@ public final class MhapMain
ParseOptions options = new ParseOptions();
options.addStartTextLine("MHAP: MinHash Alignment Protocol. A tool for finding overlaps of long-read sequences (such as PacBio or Nanopore) in bioinformatics.");
- options.addStartTextLine("\tVersion: "+PackageInfo.VERSION+", Build time: "+PackageInfo.BUILD_TIME);
+ options.addStartTextLine("\tVersion: "+MhapMain.class.getPackage().getImplementationVersion());
options.addStartTextLine("\tUsage 1 (direct execution): java -server -Xmx<memory> -jar <MHAP jar> -s<fasta/dat from/self file> [-q<fasta/dat to file>] [-f<kmer filter list, must be sorted>]");
options.addStartTextLine("\tUsage 2 (generate precomputed binaries): java -server -Xmx<memory> -jar <MHAP jar> -p<directory of fasta files> -q <output directory> [-f<kmer filter list, must be sorted>]");
options.addOption("-s", "Usage 1 only. The FASTA or binary dat file (see Usage 2) of reads that will be stored in a box, and that all subsequent reads will be compared to.", "");
@@ -102,22 +105,24 @@ public final class MhapMain
options.addOption("-f", "k-mer filter file used for filtering out highly repetative k-mers. Must be sorted in descending order of frequency (second column).", "");
options.addOption("-k", "[int], k-mer size used for MinHashing. The k-mer size for second stage filter is seperate, and cannot be modified.", DEFAULT_KMER_SIZE);
options.addOption("--num-hashes", "[int], number of min-mers to be used in MinHashing.", DEFAULT_NUM_WORDS);
- options.addOption("--threshold", "[double], the threshold similarity score cutoff for the second stage sort-merge filter. This is based on the average number of k-mers matching in the overlapping region.", DEFAULT_ACCEPT_SCORE);
+ //options.addOption("--threshold", "[double], the threshold cutoff for the second stage sort-merge filter. This is based on the Jaccard distance of k-mers (size given by ordered-kmer-size) in the overlapping regions.", DEFAULT_OVERLAP_ACCEPT_SCORE);
+ options.addOption("--threshold", "[double], the threshold cutoff for the second stage sort-merge filter. This is based on the identity score computed from the Jaccard distance of k-mers (size given by ordered-kmer-size) in the overlapping regions.", DEFAULT_OVERLAP_ACCEPT_SCORE);
options.addOption("--filter-threshold", "[double], the cutoff at which the k-mer in the k-mer filter file is considered repetitive. This value for a specific k-mer is specified in the second column in the filter file. If no filter file is provided, this option is ignored.", DEFAULT_FILTER_CUTOFF);
options.addOption("--max-shift", "[double], region size to the left and right of the estimated overlap, as derived from the median shift and sequence length, where a k-mer matches are still considered valid. Second stage filter only.", DEFAULT_MAX_SHIFT_PERCENT);
options.addOption("--num-min-matches", "[int], minimum # min-mer that must be shared before computing second stage filter. Any sequences below that value are considered non-overlapping.", DEFAULT_NUM_MIN_MATCHES);
- options.addOption("--num-threads", "[int], number of threads to use for computation. Typically set to 2 x #cores.", DEFAULT_NUM_THREADS);
+ options.addOption("--num-threads", "[int], number of threads to use for computation. Typically set to #cores.", DEFAULT_NUM_THREADS);
options.addOption("--weighted", "Perform weighted MinHashing.", false);
- //options.addOption("--alignment", "Perform sudo-alignment instead of ordered k-mer merging.", false);
options.addOption("--alignment", "Experimental option.", false);
- options.addOption("--alignment-offset", "The offset to account for the variance in the alignment match score.", DEFAULT_BIT_ALIGNMENT_MISMATCH_PANELTY);
- options.addOption("--alignment-score", "The cutoff score for alignment matches.", DEFAULT_BIT_ALIGNMENT_SCORE);
+ options.addOption("--alignment-offset", "The offset to account for the variance in the alignment match score. Experimental option.", DEFAULT_BIT_ALIGNMENT_MISMATCH_PANELTY);
+ options.addOption("--alignment-score", "The cutoff score for alignment matches. Experimental option.", DEFAULT_BIT_ALIGNMENT_SCORE);
+ options.addOption("--ordered-kmer-size", "The size of k-mers used in the ordered second stage filter.", DEFAULT_ORDERED_KMER_SIZE);
+ options.addOption("--ordered-sketch-size", "The sketch size for second stage filter.", DEFAULT_ORDERED_SKETCH_SIZE);
options.addOption("--min-store-length", "[int], The minimum length of the read that is stored in the box. Used to filter out short reads from FASTA file.", DEFAULT_MIN_STORE_LENGTH);
options.addOption("--no-self", "Do not compute the overlaps between sequences inside a box. Should be used when the to and from sequences are coming from different files.", false);
options.addOption("--store-full-id", "Store full IDs as seen in FASTA file, rather than storing just the sequence position in the file. Some FASTA files have long IDS, slowing output of results. This options is ignored when using compressed file format.", false);
options.addOption("--pacbio-fast", "Set all the parameters for the PacBio fast setting. This is the current best guidance, and could change at any time without warning.", false);
options.addOption("--pacbio-sensitive", "Set all the parameters for the PacBio sensitive settings. This is the current best guidance, and could change at any time without warning.", false);
- options.addOption("--nanopore-fast", "Set all the parameters for the Nanopore fast settings. This is the current best guidance, and could change at any time without warning.", false);
+ options.addOption("--nanopore", "Set all the parameters for the Nanopore settings. This is the current best guidance, and could change at any time without warning.", false);
if (!options.process(args))
System.exit(0);
@@ -153,7 +158,7 @@ public final class MhapMain
options.setOptions("--num-hashes", 768);
}
else
- if (options.get("--nanopore-fast").getBoolean())
+ if (options.get("--nanopore").getBoolean())
{
if (!options.get("-k").isSet())
options.setOptions("-k", 16);
@@ -163,6 +168,9 @@ public final class MhapMain
if (!options.get("--num-hashes").isSet())
options.setOptions("--num-hashes", 768);
+
+ if (!options.get("--threshold").isSet())
+ options.setOptions("--threshold", 0.70);
}
if (options.get("-s").getString().isEmpty() && options.get("-p").getString().isEmpty())
@@ -243,9 +251,9 @@ public final class MhapMain
}
//check range
- if (options.get("--threshold").getDouble()<0.0)
+ if (options.get("--threshold").getDouble()<0.0 || options.get("--threshold").getDouble()>1.0)
{
- System.out.println("The second stage filter cutoff must be >=0.");
+ System.out.println("The second stage filter threshold must be 0<=threshold<=1.0.");
System.exit(1);
}
@@ -259,8 +267,6 @@ public final class MhapMain
//printing the options used
System.err.println("Running with these settings:");
- System.err.println("Version = "+PackageInfo.VERSION);
- System.err.println("Build time = "+PackageInfo.BUILD_TIME);
System.err.println(options);
// start the main program
@@ -289,6 +295,8 @@ public final class MhapMain
this.useAlignment = options.get("--alignment").getBoolean();
this.alignmentOffset = options.get("--alignment-offset").getDouble();
this.alignmentScore = options.get("--alignment-score").getDouble();
+ this.orderedKmerSize = options.get("--ordered-kmer-size").getInteger();
+ this.orderedSketchSize = options.get("--ordered-sketch-size").getInteger();
// read in the kmer filter set
String filterFile = options.get("-f").getString();
@@ -314,82 +322,6 @@ public final class MhapMain
}
- /*
- public FrequencyCounts recordFastaKmerCounts(String file, double filterCutoff) throws IOException
- {
- System.err.println("Computing k-mer counts...");
-
- final FastaData data = new FastaData(this.inFile, 0);
-
- final CountMin<Long> countMin = new CountMin<>(1.0e-5, 1.0-1.0e-5, 0);
- //System.err.println(countMin.getDepth()+" "+countMin.getWidth());
-
- // figure out number of cores
- ExecutorService execSvc = Executors.newFixedThreadPool(this.numThreads);
-
- final AtomicInteger counter = new AtomicInteger();
- for (int iter = 0; iter < this.numThreads; iter++)
- {
- Runnable task = new Runnable()
- {
- @Override
- public void run()
- {
- try
- {
- Sequence seq = data.dequeue();
- while (seq != null)
- {
- //get the kmers integers
- long[] kmerHashes = HashUtils.computeSequenceHashesLong(seq.getSquenceString(), MhapMain.this.kmerSize, 0);
-
- //store the values
- for (long val : kmerHashes)
- countMin.add(val);
-
- //get the kmers integers for reverse compliment
- kmerHashes = HashUtils.computeSequenceHashesLong(seq.getReverseCompliment().getSquenceString(), MhapMain.this.kmerSize, 0);
-
- //store the values
- for (long val : kmerHashes)
- countMin.add(val);
-
- int currCount = counter.addAndGet(2);
- if (currCount % 5000 == 0)
- System.err.println("Kmers counted for " + currCount + " sequences (including reverse compliment)...");
-
- seq = data.dequeue();
- }
- }
- catch (IOException e)
- {
- throw new MhapRuntimeException(e);
- }
- }
- };
-
- // enqueue the task
- execSvc.execute(task);
- }
-
- // shutdown the service
- execSvc.shutdown();
- try
- {
- execSvc.awaitTermination(365L, TimeUnit.DAYS);
- }
- catch (InterruptedException e)
- {
- execSvc.shutdownNow();
- throw new MhapRuntimeException("Unable to finish all tasks.");
- }
-
- System.err.println("Computed k-mer counts for "+counter.get()+" sequences.");
-
- return new NGramCounts(countMin, counter.get(), filterCutoff);
- }
- */
-
public void computeMain() throws IOException
{
long startTotalTime = System.nanoTime();
@@ -423,20 +355,17 @@ public final class MhapMain
else
{
//read the directory content
- File[] fileList = file.listFiles(new FilenameFilter()
+ File[] fileList = file.listFiles((dir,name) ->
{
- @Override
- public boolean accept(File dir, String name)
- {
- if (!name.startsWith("."))
- return true;
+ if (!name.startsWith("."))
+ return true;
- return false;
- }
+ return false;
});
- for (File cf : fileList)
- processFiles.add(cf);
+ if (fileList!=null)
+ for (File cf : fileList)
+ processFiles.add(cf);
}
for (File pf : processFiles)
@@ -580,7 +509,7 @@ public final class MhapMain
seqStreamer = new SequenceSketchStreamer(file, offset, this.useAlignment);
else
seqStreamer = new SequenceSketchStreamer(file, this.kmerSize, this.numHashes,
- DEFAULT_ORDERED_KMER_SIZE, this.kmerFilter, this.weighted, offset, this.useAlignment);
+ this.orderedKmerSize, this.orderedSketchSize, this.kmerFilter, this.weighted, offset, this.useAlignment);
return seqStreamer;
}
diff --git a/src/main/java/edu/umd/marbl/mhap/math/BasicMath.java b/src/main/java/edu/umd/marbl/mhap/math/BasicMath.java
index 40b9e73..71d79be 100644
--- a/src/main/java/edu/umd/marbl/mhap/math/BasicMath.java
+++ b/src/main/java/edu/umd/marbl/mhap/math/BasicMath.java
@@ -65,7 +65,7 @@ public final class BasicMath
*/
public static double acos(double x)
{
- return FastMath.acos(x);
+ return Math.acos(x);
}
/**
@@ -136,7 +136,7 @@ public final class BasicMath
*/
public final static double asin(final double x)
{
- return FastMath.asin(x);
+ return Math.asin(x);
}
public final static double[][] catColumns(final double[][] A, final double[][] B)
@@ -181,7 +181,7 @@ public final class BasicMath
*/
public final static double cos(final double angle)
{
- return FastMath.cos(angle);
+ return Math.cos(angle);
}
/**
@@ -780,7 +780,7 @@ public final class BasicMath
*/
public final static double sin(final double angle)
{
- return FastMath.sin(angle);
+ return Math.sin(angle);
}
/**
@@ -804,7 +804,7 @@ public final class BasicMath
*/
public final static double sqrt(final double a)
{
- return FastMath.sqrt(a);
+ return Math.sqrt(a);
}
/**
diff --git a/src/main/java/edu/umd/marbl/mhap/math/FastMath.java b/src/main/java/edu/umd/marbl/mhap/math/FastMath.java
deleted file mode 100644
index 3a470b3..0000000
--- a/src/main/java/edu/umd/marbl/mhap/math/FastMath.java
+++ /dev/null
@@ -1,3379 +0,0 @@
-/*
- * Copyright 2012 Jeff Hain
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * =============================================================================
- * Notice of fdlibm package this program is partially derived from:
- *
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * =============================================================================
- */
-package edu.umd.marbl.mhap.math;
-
-/**
- * Class providing math treatments that: - are meant to be faster than those of
- * java.lang.Math class (depending on JVM or JVM options, they might be slower),
- * - are still somehow accurate and robust (handling of NaN and such), - do not
- * (or not directly) generate objects at run time (no "new").
- *
- * Other than optimized treatments, a valuable feature of this class is the
- * presence of angles normalization methods, derived from those used in
- * java.lang.Math (for which, sadly, no API is provided, letting everyone with
- * the terrible responsibility to write their own ones).
- *
- * Non-redefined methods of java.lang.Math class are also available, for easy
- * replacement.
- *
- * Use of look-up tables: around 1 Mo total, and initialized on class load.
- *
- * - Methods with same signature than Math ones, are meant to return "good"
- * approximations on all range. - Methods terminating with "Fast" are meant to
- * return "good" approximation on a reduced range only. - Methods terminating
- * with "Quick" are meant to be quick, but do not return a good approximation,
- * and might only work on a reduced range.
- *
- * Properties:
- *
- * - jodk.fastmath.strict (boolean, default is true): If true, non-redefined
- * Math methods which results could vary between Math and StrictMath, delegate
- * to StrictMath, and if false, to Math. Default is true to ensure consistency
- * across various architectures.
- *
- * - jodk.fastmath.usejdk (boolean, default is false): If true, redefined Math
- * methods, as well as their "Fast" or "Quick" terminated counterparts, delegate
- * to StrictMath or Math, depending on jodk.fastmath.strict property.
- *
- * - jodk.fastmath.fastlog (boolean, default is true): If true, using redefined
- * log(double), if false using StrictMath.log(double) or Math.log(double),
- * depending on jodk.fastmath.strict property. Default is true because
- * jodk.fastmath.strict is true by default, and StrictMath.log(double) seems
- * usually slow.
- *
- * - jodk.fastmath.fastsqrt (boolean, default is false): If true, using
- * redefined sqrt(double), if false using StrictMath.sqrt(double) or
- * Math.sqrt(double), depending on jodk.fastmath.strict property. Default is
- * false because StrictMath.sqrt(double) seems to usually delegate to hardware
- * sqrt.
- *
- * Unless jodk.fastmath.strict is false and jodk.fastmath.usejdk is true, these
- * treatments are consistent across various architectures, for constants and
- * look-up tables are computed with StrictMath, or exact Math methods.
- *
- * --- words, words, words ---
- *
- * "0x42BE0000 percents of the folks out there are completely clueless about
- * floating-point."
- *
- * The difference between precision and accuracy: "3.177777777777777 is a
- * precise (16 digits) but inaccurate (only correct up to the second digit)
- * approximation of PI=3.141592653589793(etc.)."
- */
-public strictfp final class FastMath
-{
-
- /*
- * For trigonometric functions, use of look-up tables and Taylor-Lagrange
- * formula with 4 derivatives (more take longer to compute and don't add
- * much accuracy, less require larger tables (which use more memory, take
- * more time to initialize, and are slower to access (at least on the
- * machine they were developed on))).
- *
- * For angles reduction of cos/sin/tan functions: - for small values,
- * instead of reducing angles, and then computing the best index for look-up
- * tables, we compute this index right away, and use it for reduction, - for
- * large values, treatments derived from fdlibm package are used, as done in
- * java.lang.Math. They are faster but still "slow", so if you work with
- * large numbers and need speed over accuracy for them, you might want to
- * use normalizeXXXFast treatments before your function, or modify
- * cos/sin/tan so that they call the fast normalization treatments instead
- * of the accurate ones. NB: If an angle is huge (like PI*1e20), in double
- * precision format its last digits are zeros, which most likely is not the
- * case for the intended value, and doing an accurate reduction on a very
- * inaccurate value is most likely pointless. But it gives some sort of
- * coherence that could be needed in some cases.
- *
- * Multiplication on double appears to be about as fast (or not much slower)
- * than call to <double_array>[<index>], and regrouping some doubles in a
- * private class, to use index only once, does not seem to speed things up,
- * so: - for uniformly tabulated values, to retrieve the parameter
- * corresponding to an index, we recompute it rather than using an array to
- * store it, - for cos/sin, we recompute derivatives divided by (multiplied
- * by inverse of) factorial each time, rather than storing them in arrays.
- *
- * Lengths of look-up tables are usually of the form 2^n+1, for their values
- * to be of the form (<a_constant> * k/2^n, k in 0 .. 2^n), so that
- * particular values (PI/2, etc.) are "exactly" computed, as well as for
- * other reasons.
- *
- * Most math treatments I could find on the web, including "fast" ones,
- * usually take care of special cases (NaN, etc.) at the beginning, and then
- * deal with the general case, which adds a useless overhead for the general
- * (and common) case. In this class, special cases are only dealt with when
- * needed, and if the general case does not already handle them.
- */
-
- // --------------------------------------------------------------------------
- // CONFIGURATION
- // --------------------------------------------------------------------------
-
- private static final boolean STRICT_MATH = true;
-
- private static final boolean USE_JDK_MATH = false;
-
- /**
- * Used for both log(double) and log10(double).
- */
- // private static final boolean USE_REDEFINED_LOG = true;
-
- private static final boolean USE_REDEFINED_SQRT = false;
-
- // Set it to true if FastMath.sqrt(double) is slow (more tables, but less
- // calls to FastMath.sqrt(double)).
- private static final boolean USE_POWTABS_FOR_ASIN = false;
-
- // --------------------------------------------------------------------------
- // GENERAL CONSTANTS
- // --------------------------------------------------------------------------
-
- /**
- * High approximation of PI, which is further from PI than the low
- * approximation Math.PI: PI ~= 3.14159265358979323846... Math.PI ~=
- * 3.141592653589793 FastMath.PI_SUP ~= 3.1415926535897936
- */
- public static final double PI_SUP = Math.nextUp(Math.PI);
-
- private static final double ONE_DIV_F2 = 1 / 2.0;
- private static final double ONE_DIV_F3 = 1 / 6.0;
- private static final double ONE_DIV_F4 = 1 / 24.0;
-
- private static final double TWO_POW_24 = Double.longBitsToDouble(0x4170000000000000L);
- private static final double TWO_POW_N24 = Double.longBitsToDouble(0x3E70000000000000L);
-
- private static final double TWO_POW_26 = Double.longBitsToDouble(0x4190000000000000L);
- private static final double TWO_POW_N26 = Double.longBitsToDouble(0x3E50000000000000L);
-
- // First double value (from zero) such as (value+-1/value == value).
- private static final double TWO_POW_27 = Double.longBitsToDouble(0x41A0000000000000L);
- private static final double TWO_POW_N27 = Double.longBitsToDouble(0x3E40000000000000L);
-
- private static final double TWO_POW_N28 = Double.longBitsToDouble(0x3E30000000000000L);
-
- private static final double TWO_POW_52 = Double.longBitsToDouble(0x4330000000000000L);
-
- private static final double TWO_POW_N54 = Double.longBitsToDouble(0x3C90000000000000L);
-
- private static final double TWO_POW_N55 = Double.longBitsToDouble(0x3C80000000000000L);
-
- private static final double TWO_POW_66 = Double.longBitsToDouble(0x4410000000000000L);
-
- private static final double TWO_POW_450 = Double.longBitsToDouble(0x5C10000000000000L);
- private static final double TWO_POW_N450 = Double.longBitsToDouble(0x23D0000000000000L);
-
- private static final double TWO_POW_750 = Double.longBitsToDouble(0x6ED0000000000000L);
- private static final double TWO_POW_N750 = Double.longBitsToDouble(0x1110000000000000L);
-
- // Smallest double normal value.
- private static final double MIN_DOUBLE_NORMAL = Double.longBitsToDouble(0x0010000000000000L); // 2.2250738585072014E-308
-
- private static final int MIN_DOUBLE_EXPONENT = -1074;
- private static final int MAX_DOUBLE_EXPONENT = 1023;
-
- private static final int MAX_FLOAT_EXPONENT = 127;
-
- private static final double LOG_2 = StrictMath.log(2.0);
- private static final double LOG_TWO_POW_27 = StrictMath.log(TWO_POW_27);
- private static final double LOG_DOUBLE_MAX_VALUE = StrictMath.log(Double.MAX_VALUE);
-
- private static final double INV_LOG_10 = 1.0 / StrictMath.log(10.0);
-
- // private static final double DOUBLE_BEFORE_60 = Math.nextAfter(60.0, 0.0);
-
- // --------------------------------------------------------------------------
- // CONSTANTS FOR NORMALIZATIONS
- // --------------------------------------------------------------------------
-
- /*
- * Table of constants for 1/(2*PI), 282 Hex digits (enough for normalizing
- * doubles). 1/(2*PI) approximation = sum of
- * ONE_OVER_TWOPI_TAB[i]*2^(-24*(i+1)).
- */
- private static final double ONE_OVER_TWOPI_TAB[] = { 0x28BE60, 0xDB9391, 0x054A7F, 0x09D5F4, 0x7D4D37, 0x7036D8,
- 0xA5664F, 0x10E410, 0x7F9458, 0xEAF7AE, 0xF1586D, 0xC91B8E, 0x909374, 0xB80192, 0x4BBA82, 0x746487,
- 0x3F877A, 0xC72C4A, 0x69CFBA, 0x208D7D, 0x4BAED1, 0x213A67, 0x1C09AD, 0x17DF90, 0x4E6475, 0x8E60D4,
- 0xCE7D27, 0x2117E2, 0xEF7E4A, 0x0EC7FE, 0x25FFF7, 0x816603, 0xFBCBC4, 0x62D682, 0x9B47DB, 0x4D9FB3,
- 0xC9F2C2, 0x6DD3D1, 0x8FD9A7, 0x97FA8B, 0x5D49EE, 0xB1FAF9, 0x7C5ECF, 0x41CE7D, 0xE294A4, 0xBA9AFE,
- 0xD7EC47 };
-
- /*
- * Constants for 2*PI. Only the 23 most significant bits of each mantissa
- * are used. 2*PI approximation = sum of TWOPI_TAB<i>.
- */
- private static final double TWOPI_TAB0 = Double.longBitsToDouble(0x401921FB40000000L);
- private static final double TWOPI_TAB1 = Double.longBitsToDouble(0x3E94442D00000000L);
- private static final double TWOPI_TAB2 = Double.longBitsToDouble(0x3D18469880000000L);
- private static final double TWOPI_TAB3 = Double.longBitsToDouble(0x3B98CC5160000000L);
- private static final double TWOPI_TAB4 = Double.longBitsToDouble(0x3A101B8380000000L);
-
- private static final double INVPIO2 = Double.longBitsToDouble(0x3FE45F306DC9C883L); // 6.36619772367581382433e-01
- // 53
- // bits
- // of
- // 2/pi
- private static final double PIO2_HI = Double.longBitsToDouble(0x3FF921FB54400000L); // 1.57079632673412561417e+00
- // first
- // 33
- // bits
- // of
- // pi/2
- private static final double PIO2_LO = Double.longBitsToDouble(0x3DD0B4611A626331L); // 6.07710050650619224932e-11
- // pi/2
- // -
- // PIO2_HI
- private static final double INVTWOPI = INVPIO2 / 4;
- private static final double TWOPI_HI = 4 * PIO2_HI;
- private static final double TWOPI_LO = 4 * PIO2_LO;
-
- // fdlibm uses 2^19*PI/2 here, but we normalize with % 2*PI instead of %
- // PI/2,
- // and we can bear some more error.
- private static final double NORMALIZE_ANGLE_MAX_MEDIUM_DOUBLE = StrictMath.pow(2, 20) * (2 * Math.PI);
-
- /**
- * 2*Math.PI, normalized into [-PI,PI]. Computed using
- * normalizeMinusPiPi(double).
- */
- private static final double TWO_MATH_PI_IN_MINUS_PI_PI = -2.449293598153844E-16;
-
- // --------------------------------------------------------------------------
- // CONSTANTS AND TABLES FOR COS, SIN
- // --------------------------------------------------------------------------
-
- private static final int SIN_COS_TABS_SIZE = (1 << getTabSizePower(11)) + 1;
- private static final double SIN_COS_DELTA_HI = TWOPI_HI / (SIN_COS_TABS_SIZE - 1);
- private static final double SIN_COS_DELTA_LO = TWOPI_LO / (SIN_COS_TABS_SIZE - 1);
- private static final double SIN_COS_INDEXER = 1 / (SIN_COS_DELTA_HI + SIN_COS_DELTA_LO);
- private static final double[] sinTab = new double[SIN_COS_TABS_SIZE];
- private static final double[] cosTab = new double[SIN_COS_TABS_SIZE];
-
- // Max abs value for fast modulo, above which we use regular angle
- // normalization.
- // This value must be < (Integer.MAX_VALUE / SIN_COS_INDEXER), to stay in
- // range of int type.
- // The higher it is, the higher the error, but also the faster it is for
- // lower values.
- // If you set it to ((Integer.MAX_VALUE / SIN_COS_INDEXER) * 0.99), worse
- // accuracy on double range is about 1e-10.
- private static final double SIN_COS_MAX_VALUE_FOR_INT_MODULO = ((Integer.MAX_VALUE >> 9) / SIN_COS_INDEXER) * 0.99;
-
- // --------------------------------------------------------------------------
- // CONSTANTS AND TABLES FOR TAN
- // --------------------------------------------------------------------------
-
- // We use the following formula:
- // 1) tan(-x) = -tan(x)
- // 2) tan(x) = 1/tan(PI/2-x)
- // ---> we only have to compute tan(x) on [0,A] with PI/4<=A<PI/2.
-
- // We use indexing past look-up tables, so that indexing information
- // allows for fast recomputation of angle in [0,PI/2] range.
- private static final int TAN_VIRTUAL_TABS_SIZE = (1 << getTabSizePower(12)) + 1;
-
- // Must be >= 45deg, and supposed to be >= 51.4deg, as fdlibm code is not
- // supposed to work with values inferior to that (51.4deg is about
- // (PI/2-Double.longBitsToDouble(0x3FE5942800000000L))).
- private static final double TAN_MAX_VALUE_FOR_TABS = Math.toRadians(77.0);
-
- private static final int TAN_TABS_SIZE = (int) ((TAN_MAX_VALUE_FOR_TABS / (Math.PI / 2)) * (TAN_VIRTUAL_TABS_SIZE - 1)) + 1;
- private static final double TAN_DELTA_HI = PIO2_HI / (TAN_VIRTUAL_TABS_SIZE - 1);
- private static final double TAN_DELTA_LO = PIO2_LO / (TAN_VIRTUAL_TABS_SIZE - 1);
- private static final double TAN_INDEXER = 1 / (TAN_DELTA_HI + TAN_DELTA_LO);
- private static final double[] tanTab = new double[TAN_TABS_SIZE];
- private static final double[] tanDer1DivF1Tab = new double[TAN_TABS_SIZE];
- private static final double[] tanDer2DivF2Tab = new double[TAN_TABS_SIZE];
- private static final double[] tanDer3DivF3Tab = new double[TAN_TABS_SIZE];
- private static final double[] tanDer4DivF4Tab = new double[TAN_TABS_SIZE];
-
- // Max abs value for fast modulo, above which we use regular angle
- // normalization.
- // This value must be < (Integer.MAX_VALUE / TAN_INDEXER), to stay in range
- // of int type.
- // The higher it is, the higher the error, but also the faster it is for
- // lower values.
- private static final double TAN_MAX_VALUE_FOR_INT_MODULO = (((Integer.MAX_VALUE >> 9) / TAN_INDEXER) * 0.99);
-
- // --------------------------------------------------------------------------
- // CONSTANTS AND TABLES FOR ACOS, ASIN
- // --------------------------------------------------------------------------
-
- // We use the following formula:
- // 1) acos(x) = PI/2 - asin(x)
- // 2) asin(-x) = -asin(x)
- // ---> we only have to compute asin(x) on [0,1].
- // For values not close to +-1, we use look-up tables;
- // for values near +-1, we use code derived from fdlibm.
-
- // Supposed to be >= sin(77.2deg), as fdlibm code is supposed to work with
- // values > 0.975,
- // but seems to work well enough as long as value >= sin(25deg).
- private static final double ASIN_MAX_VALUE_FOR_TABS = StrictMath.sin(Math.toRadians(73.0));
-
- private static final int ASIN_TABS_SIZE = (1 << getTabSizePower(13)) + 1;
- private static final double ASIN_DELTA = ASIN_MAX_VALUE_FOR_TABS / (ASIN_TABS_SIZE - 1);
- private static final double ASIN_INDEXER = 1 / ASIN_DELTA;
- private static final double[] asinTab = new double[ASIN_TABS_SIZE];
- private static final double[] asinDer1DivF1Tab = new double[ASIN_TABS_SIZE];
- private static final double[] asinDer2DivF2Tab = new double[ASIN_TABS_SIZE];
- private static final double[] asinDer3DivF3Tab = new double[ASIN_TABS_SIZE];
- private static final double[] asinDer4DivF4Tab = new double[ASIN_TABS_SIZE];
-
- private static final double ASIN_MAX_VALUE_FOR_POWTABS = StrictMath.sin(Math.toRadians(88.6));
- private static final int ASIN_POWTABS_POWER = 84;
-
- private static final double ASIN_POWTABS_ONE_DIV_MAX_VALUE = 1 / ASIN_MAX_VALUE_FOR_POWTABS;
- private static final int ASIN_POWTABS_SIZE = USE_POWTABS_FOR_ASIN ? (1 << getTabSizePower(12)) + 1 : 0;
- private static final int ASIN_POWTABS_SIZE_MINUS_ONE = ASIN_POWTABS_SIZE - 1;
- private static final double[] asinParamPowTab = new double[ASIN_POWTABS_SIZE];
- private static final double[] asinPowTab = new double[ASIN_POWTABS_SIZE];
- private static final double[] asinDer1DivF1PowTab = new double[ASIN_POWTABS_SIZE];
- private static final double[] asinDer2DivF2PowTab = new double[ASIN_POWTABS_SIZE];
- private static final double[] asinDer3DivF3PowTab = new double[ASIN_POWTABS_SIZE];
- private static final double[] asinDer4DivF4PowTab = new double[ASIN_POWTABS_SIZE];
-
- private static final double ASIN_PIO2_HI = Double.longBitsToDouble(0x3FF921FB54442D18L); // 1.57079632679489655800e+00
- private static final double ASIN_PIO2_LO = Double.longBitsToDouble(0x3C91A62633145C07L); // 6.12323399573676603587e-17
- private static final double ASIN_PS0 = Double.longBitsToDouble(0x3fc5555555555555L); // 1.66666666666666657415e-01
- private static final double ASIN_PS1 = Double.longBitsToDouble(0xbfd4d61203eb6f7dL); // -3.25565818622400915405e-01
- private static final double ASIN_PS2 = Double.longBitsToDouble(0x3fc9c1550e884455L); // 2.01212532134862925881e-01
- private static final double ASIN_PS3 = Double.longBitsToDouble(0xbfa48228b5688f3bL); // -4.00555345006794114027e-02
- private static final double ASIN_PS4 = Double.longBitsToDouble(0x3f49efe07501b288L); // 7.91534994289814532176e-04
- private static final double ASIN_PS5 = Double.longBitsToDouble(0x3f023de10dfdf709L); // 3.47933107596021167570e-05
- private static final double ASIN_QS1 = Double.longBitsToDouble(0xc0033a271c8a2d4bL); // -2.40339491173441421878e+00
- private static final double ASIN_QS2 = Double.longBitsToDouble(0x40002ae59c598ac8L); // 2.02094576023350569471e+00
- private static final double ASIN_QS3 = Double.longBitsToDouble(0xbfe6066c1b8d0159L); // -6.88283971605453293030e-01
- private static final double ASIN_QS4 = Double.longBitsToDouble(0x3fb3b8c5b12e9282L); // 7.70381505559019352791e-02
-
- // --------------------------------------------------------------------------
- // CONSTANTS AND TABLES FOR ATAN
- // --------------------------------------------------------------------------
-
- // We use the formula atan(-x) = -atan(x)
- // ---> we only have to compute atan(x) on [0,+infinity[.
- // For values corresponding to angles not close to +-PI/2, we use look-up
- // tables;
- // for values corresponding to angles near +-PI/2, we use code derived from
- // fdlibm.
-
- // Supposed to be >= tan(67.7deg), as fdlibm code is supposed to work with
- // values > 2.4375.
- private static final double ATAN_MAX_VALUE_FOR_TABS = StrictMath.tan(Math.toRadians(74.0));
-
- private static final int ATAN_TABS_SIZE = (1 << getTabSizePower(12)) + 1;
- private static final double ATAN_DELTA = ATAN_MAX_VALUE_FOR_TABS / (ATAN_TABS_SIZE - 1);
- private static final double ATAN_INDEXER = 1 / ATAN_DELTA;
- private static final double[] atanTab = new double[ATAN_TABS_SIZE];
- private static final double[] atanDer1DivF1Tab = new double[ATAN_TABS_SIZE];
- private static final double[] atanDer2DivF2Tab = new double[ATAN_TABS_SIZE];
- private static final double[] atanDer3DivF3Tab = new double[ATAN_TABS_SIZE];
- private static final double[] atanDer4DivF4Tab = new double[ATAN_TABS_SIZE];
-
- private static final double ATAN_HI3 = Double.longBitsToDouble(0x3ff921fb54442d18L); // 1.57079632679489655800e+00
- // atan(inf)hi
- private static final double ATAN_LO3 = Double.longBitsToDouble(0x3c91a62633145c07L); // 6.12323399573676603587e-17
- // atan(inf)lo
- private static final double ATAN_AT0 = Double.longBitsToDouble(0x3fd555555555550dL); // 3.33333333333329318027e-01
- private static final double ATAN_AT1 = Double.longBitsToDouble(0xbfc999999998ebc4L); // -1.99999999998764832476e-01
- private static final double ATAN_AT2 = Double.longBitsToDouble(0x3fc24924920083ffL); // 1.42857142725034663711e-01
- private static final double ATAN_AT3 = Double.longBitsToDouble(0xbfbc71c6fe231671L); // -1.11111104054623557880e-01
- private static final double ATAN_AT4 = Double.longBitsToDouble(0x3fb745cdc54c206eL); // 9.09088713343650656196e-02
- private static final double ATAN_AT5 = Double.longBitsToDouble(0xbfb3b0f2af749a6dL); // -7.69187620504482999495e-02
- private static final double ATAN_AT6 = Double.longBitsToDouble(0x3fb10d66a0d03d51L); // 6.66107313738753120669e-02
- private static final double ATAN_AT7 = Double.longBitsToDouble(0xbfadde2d52defd9aL); // -5.83357013379057348645e-02
- private static final double ATAN_AT8 = Double.longBitsToDouble(0x3fa97b4b24760debL); // 4.97687799461593236017e-02
- private static final double ATAN_AT9 = Double.longBitsToDouble(0xbfa2b4442c6a6c2fL); // -3.65315727442169155270e-02
- private static final double ATAN_AT10 = Double.longBitsToDouble(0x3f90ad3ae322da11L); // 1.62858201153657823623e-02
-
- // --------------------------------------------------------------------------
- // CONSTANTS AND TABLES FOR EXP AND EXPM1
- // --------------------------------------------------------------------------
-
- private static final double EXP_OVERFLOW_LIMIT = Double.longBitsToDouble(0x40862E42FEFA39EFL); // 7.09782712893383973096e+02
- private static final double EXP_UNDERFLOW_LIMIT = Double.longBitsToDouble(0xC0874910D52D3051L); // -7.45133219101941108420e+02
- private static final double EXP_MIN_INT_LIMIT = -705;
- private static final int EXP_LO_DISTANCE_TO_ZERO_POT = 0;
- private static final int EXP_LO_DISTANCE_TO_ZERO = (1 << EXP_LO_DISTANCE_TO_ZERO_POT);
- private static final int EXP_LO_TAB_SIZE_POT = getTabSizePower(11);
- private static final int EXP_LO_TAB_SIZE = (1 << EXP_LO_TAB_SIZE_POT) + 1;
- private static final int EXP_LO_TAB_MID_INDEX = ((EXP_LO_TAB_SIZE - 1) / 2);
- private static final int EXP_LO_INDEXING = EXP_LO_TAB_MID_INDEX / EXP_LO_DISTANCE_TO_ZERO;
- private static final int EXP_LO_INDEXING_DIV_SHIFT = EXP_LO_TAB_SIZE_POT - 1 - EXP_LO_DISTANCE_TO_ZERO_POT;
- private static final double[] expHiTab = new double[1 + (int) EXP_OVERFLOW_LIMIT];
- private static final double[] expHiInvTab = new double[1 - (int) EXP_UNDERFLOW_LIMIT];
- private static final double[] expLoPosTab = new double[EXP_LO_TAB_SIZE];
- private static final double[] expLoNegTab = new double[EXP_LO_TAB_SIZE];
-
- // --------------------------------------------------------------------------
- // CONSTANTS FOR QUICK EXP
- // --------------------------------------------------------------------------
-
- private static final double EXP_QUICK_A = TWO_POW_52 / LOG_2;
- private static final double EXP_QUICK_B = MAX_DOUBLE_EXPONENT * TWO_POW_52;
- private static final double EXP_QUICK_C = Math.ceil((StrictMath.log(LOG_2 + 2 / Math.E) - LOG_2 - StrictMath
- .log(LOG_2)) * EXP_QUICK_A);
-
- // --------------------------------------------------------------------------
- // CONSTANTS AND TABLES FOR LOG AND LOG1P
- // --------------------------------------------------------------------------
-
- private static final int LOG_BITS = getTabSizePower(12);
- private static final int LOG_TAB_SIZE = (1 << LOG_BITS);
- private static final double[] logXLogTab = new double[LOG_TAB_SIZE];
- private static final double[] logXTab = new double[LOG_TAB_SIZE];
- private static final double[] logXInvTab = new double[LOG_TAB_SIZE];
-
- // --------------------------------------------------------------------------
- // TABLE FOR POWERS OF TWO
- // --------------------------------------------------------------------------
-
- private static final double[] twoPowTab = new double[(MAX_DOUBLE_EXPONENT - MIN_DOUBLE_EXPONENT) + 1];
-
- // --------------------------------------------------------------------------
- // CONSTANTS AND TABLES FOR SQRT
- // --------------------------------------------------------------------------
-
- private static final int SQRT_LO_BITS = getTabSizePower(12);
- private static final int SQRT_LO_TAB_SIZE = (1 << SQRT_LO_BITS);
- private static final double[] sqrtXSqrtHiTab = new double[MAX_DOUBLE_EXPONENT - MIN_DOUBLE_EXPONENT + 1];
- private static final double[] sqrtXSqrtLoTab = new double[SQRT_LO_TAB_SIZE];
- private static final double[] sqrtSlopeHiTab = new double[MAX_DOUBLE_EXPONENT - MIN_DOUBLE_EXPONENT + 1];
- private static final double[] sqrtSlopeLoTab = new double[SQRT_LO_TAB_SIZE];
-
- // --------------------------------------------------------------------------
- // CONSTANTS AND TABLES FOR CBRT
- // --------------------------------------------------------------------------
-
- private static final int CBRT_LO_BITS = getTabSizePower(12);
- private static final int CBRT_LO_TAB_SIZE = (1 << CBRT_LO_BITS);
- // For CBRT_LO_BITS = 12:
- // cbrtXCbrtLoTab[0] = 1.0.
- // cbrtXCbrtLoTab[1] = cbrt(1. 000000000000
- // 1111111111111111111111111111111111111111b)
- // cbrtXCbrtLoTab[2] = cbrt(1. 000000000001
- // 1111111111111111111111111111111111111111b)
- // cbrtXCbrtLoTab[3] = cbrt(1. 000000000010
- // 1111111111111111111111111111111111111111b)
- // cbrtXCbrtLoTab[4] = cbrt(1. 000000000011
- // 1111111111111111111111111111111111111111b)
- // etc.
- private static final double[] cbrtXCbrtHiTab = new double[MAX_DOUBLE_EXPONENT - MIN_DOUBLE_EXPONENT + 1];
- private static final double[] cbrtXCbrtLoTab = new double[CBRT_LO_TAB_SIZE];
- private static final double[] cbrtSlopeHiTab = new double[MAX_DOUBLE_EXPONENT - MIN_DOUBLE_EXPONENT + 1];
- private static final double[] cbrtSlopeLoTab = new double[CBRT_LO_TAB_SIZE];
-
- // --------------------------------------------------------------------------
- // PUBLIC TREATMENTS
- // --------------------------------------------------------------------------
-
- /**
- * @param angle
- * Angle in radians.
- * @return Angle cosine.
- */
- public static double cos(double angle)
- {
- if (USE_JDK_MATH)
- {
- return STRICT_MATH ? StrictMath.cos(angle) : Math.cos(angle);
- }
- angle = Math.abs(angle);
- if (angle > SIN_COS_MAX_VALUE_FOR_INT_MODULO)
- {
- // Faster than using normalizeZeroTwoPi.
- angle = remainderTwoPi(angle);
- if (angle < 0.0)
- {
- angle += 2 * Math.PI;
- }
- }
- // index: possibly outside tables range.
- int index = (int) (angle * SIN_COS_INDEXER + 0.5);
- double delta = (angle - index * SIN_COS_DELTA_HI) - index * SIN_COS_DELTA_LO;
- // Making sure index is within tables range.
- // Last value of each table is the same than first, so we ignore it
- // (tabs size minus one) for modulo.
- index &= (SIN_COS_TABS_SIZE - 2); // index % (SIN_COS_TABS_SIZE-1)
- double indexCos = cosTab[index];
- double indexSin = sinTab[index];
- return indexCos
- + delta
- * (-indexSin + delta
- * (-indexCos * ONE_DIV_F2 + delta * (indexSin * ONE_DIV_F3 + delta * indexCos * ONE_DIV_F4)));
- }
-
- /**
- * Quick cosine, with accuracy of about 1.6e-3 (PI/<look-up tabs size>) for
- * |angle| < 6588397.0 (Integer.MAX_VALUE * (2*PI/<look-up tabs size>)), and
- * no accuracy at all for larger values.
- *
- * @param angle
- * Angle in radians.
- * @return Angle cosine.
- */
- public static double cosQuick(double angle)
- {
- if (USE_JDK_MATH)
- {
- return STRICT_MATH ? StrictMath.cos(angle) : Math.cos(angle);
- }
- return cosTab[((int) (Math.abs(angle) * SIN_COS_INDEXER + 0.5)) & (SIN_COS_TABS_SIZE - 2)];
- }
-
- /**
- * @param angle
- * Angle in radians.
- * @return Angle sine.
- */
- public static double sin(double angle)
- {
- if (USE_JDK_MATH)
- {
- return STRICT_MATH ? StrictMath.sin(angle) : Math.sin(angle);
- }
- boolean negateResult;
- if (angle < 0.0)
- {
- angle = -angle;
- negateResult = true;
- }
- else
- {
- negateResult = false;
- }
- if (angle > SIN_COS_MAX_VALUE_FOR_INT_MODULO)
- {
- // Faster than using normalizeZeroTwoPi.
- angle = remainderTwoPi(angle);
- if (angle < 0.0)
- {
- angle += 2 * Math.PI;
- }
- }
- int index = (int) (angle * SIN_COS_INDEXER + 0.5);
- double delta = (angle - index * SIN_COS_DELTA_HI) - index * SIN_COS_DELTA_LO;
- index &= (SIN_COS_TABS_SIZE - 2); // index % (SIN_COS_TABS_SIZE-1)
- double indexSin = sinTab[index];
- double indexCos = cosTab[index];
- double result = indexSin
- + delta
- * (indexCos + delta
- * (-indexSin * ONE_DIV_F2 + delta * (-indexCos * ONE_DIV_F3 + delta * indexSin * ONE_DIV_F4)));
- return negateResult ? -result : result;
- }
-
- /**
- * Quick sine, with accuracy of about 1.6e-3 (PI/<look-up tabs size>) for
- * |angle| < 6588397.0 (Integer.MAX_VALUE * (2*PI/<look-up tabs size>)), and
- * no accuracy at all for larger values.
- *
- * @param angle
- * Angle in radians.
- * @return Angle sine.
- */
- public static double sinQuick(double angle)
- {
- if (USE_JDK_MATH)
- {
- return STRICT_MATH ? StrictMath.sin(angle) : Math.sin(angle);
- }
- return cosTab[((int) (Math.abs(angle - Math.PI / 2) * SIN_COS_INDEXER + 0.5)) & (SIN_COS_TABS_SIZE - 2)];
- }
-
- /**
- * @param angle
- * Angle in radians.
- * @return Angle tangent.
- */
- public static double tan(double angle)
- {
- if (USE_JDK_MATH)
- {
- return STRICT_MATH ? StrictMath.tan(angle) : Math.tan(angle);
- }
- if (Math.abs(angle) > TAN_MAX_VALUE_FOR_INT_MODULO)
- {
- // Faster than using normalizeMinusHalfPiHalfPi.
- angle = remainderTwoPi(angle);
- if (angle < -Math.PI / 2)
- {
- angle += Math.PI;
- }
- else if (angle > Math.PI / 2)
- {
- angle -= Math.PI;
- }
- }
- boolean negateResult;
- if (angle < 0.0)
- {
- angle = -angle;
- negateResult = true;
- }
- else
- {
- negateResult = false;
- }
- int index = (int) (angle * TAN_INDEXER + 0.5);
- double delta = (angle - index * TAN_DELTA_HI) - index * TAN_DELTA_LO;
- // index modulo PI, i.e. 2*(virtual tab size minus one).
- index &= (2 * (TAN_VIRTUAL_TABS_SIZE - 1) - 1); // index %
- // (2*(TAN_VIRTUAL_TABS_SIZE-1))
- // Here, index is in [0,2*(TAN_VIRTUAL_TABS_SIZE-1)-1], i.e. indicates
- // an angle in [0,PI[.
- if (index > (TAN_VIRTUAL_TABS_SIZE - 1))
- {
- index = (2 * (TAN_VIRTUAL_TABS_SIZE - 1)) - index;
- delta = -delta;
- negateResult = !negateResult;
- }
- double result;
- if (index < TAN_TABS_SIZE)
- {
- result = tanTab[index]
- + delta
- * (tanDer1DivF1Tab[index] + delta
- * (tanDer2DivF2Tab[index] + delta
- * (tanDer3DivF3Tab[index] + delta * tanDer4DivF4Tab[index])));
- }
- else
- { // angle in ]TAN_MAX_VALUE_FOR_TABS,TAN_MAX_VALUE_FOR_INT_MODULO], or
- // angle is NaN
- // Using tan(angle) == 1/tan(PI/2-angle) formula: changing angle
- // (index and delta), and inverting.
- index = (TAN_VIRTUAL_TABS_SIZE - 1) - index;
- result = 1 / (tanTab[index] - delta
- * (tanDer1DivF1Tab[index] - delta
- * (tanDer2DivF2Tab[index] - delta
- * (tanDer3DivF3Tab[index] - delta * tanDer4DivF4Tab[index]))));
- }
- return negateResult ? -result : result;
- }
-
- /**
- * @param value
- * Value in [-1,1].
- * @return Value arccosine, in radians, in [0,PI].
- */
- public static double acos(double value)
- {
- if (USE_JDK_MATH)
- {
- return STRICT_MATH ? StrictMath.acos(value) : Math.acos(value);
- }
- return Math.PI / 2 - FastMath.asin(value);
- }
-
- /**
- * If value is not NaN and is outside [-1,1] range, closest value in this
- * range is used.
- *
- * @param value
- * Value in [-1,1].
- * @return Value arccosine, in radians, in [0,PI].
- */
- public static double acosInRange(double value)
- {
- if (value <= -1)
- {
- return Math.PI;
- }
- else if (value >= 1)
- {
- return 0.0;
- }
- else
- {
- return FastMath.acos(value);
- }
- }
-
- /**
- * @param value
- * Value in [-1,1].
- * @return Value arcsine, in radians, in [-PI/2,PI/2].
- */
- @SuppressWarnings("unused")
- public static double asin(double value)
- {
- if (USE_JDK_MATH)
- {
- return STRICT_MATH ? StrictMath.asin(value) : Math.asin(value);
- }
- boolean negateResult;
- if (value < 0.0)
- {
- value = -value;
- negateResult = true;
- }
- else
- {
- negateResult = false;
- }
- if (value <= ASIN_MAX_VALUE_FOR_TABS)
- {
- int index = (int) (value * ASIN_INDEXER + 0.5);
- double delta = value - index * ASIN_DELTA;
- double result = asinTab[index]
- + delta
- * (asinDer1DivF1Tab[index] + delta
- * (asinDer2DivF2Tab[index] + delta
- * (asinDer3DivF3Tab[index] + delta * asinDer4DivF4Tab[index])));
- return negateResult ? -result : result;
- }
- else if (USE_POWTABS_FOR_ASIN && (value <= ASIN_MAX_VALUE_FOR_POWTABS))
- {
- int index = (int) (FastMath.powFast(value * ASIN_POWTABS_ONE_DIV_MAX_VALUE, ASIN_POWTABS_POWER)
- * ASIN_POWTABS_SIZE_MINUS_ONE + 0.5);
- double delta = value - asinParamPowTab[index];
- double result = asinPowTab[index]
- + delta
- * (asinDer1DivF1PowTab[index] + delta
- * (asinDer2DivF2PowTab[index] + delta
- * (asinDer3DivF3PowTab[index] + delta * asinDer4DivF4PowTab[index])));
- return negateResult ? -result : result;
- }
- else
- { // value > ASIN_MAX_VALUE_FOR_TABS, or value is NaN
- // This part is derived from fdlibm.
- if (value < 1.0)
- {
- double t = (1.0 - value) * 0.5;
- double p = t
- * (ASIN_PS0 + t * (ASIN_PS1 + t * (ASIN_PS2 + t * (ASIN_PS3 + t * (ASIN_PS4 + t * ASIN_PS5)))));
- double q = 1.0 + t * (ASIN_QS1 + t * (ASIN_QS2 + t * (ASIN_QS3 + t * ASIN_QS4)));
- double s = FastMath.sqrt(t);
- double z = s + s * (p / q);
- double result = ASIN_PIO2_HI - ((z + z) - ASIN_PIO2_LO);
- return negateResult ? -result : result;
- }
- else
- { // value >= 1.0, or value is NaN
- if (value == 1.0)
- {
- return negateResult ? -Math.PI / 2 : Math.PI / 2;
- }
- else
- {
- return Double.NaN;
- }
- }
- }
- }
-
- /**
- * If value is not NaN and is outside [-1,1] range, closest value in this
- * range is used.
- *
- * @param value
- * Value in [-1,1].
- * @return Value arcsine, in radians, in [-PI/2,PI/2].
- */
- public static double asinInRange(double value)
- {
- if (value <= -1)
- {
- return -Math.PI / 2;
- }
- else if (value >= 1)
- {
- return Math.PI / 2;
- }
- else
- {
- return FastMath.asin(value);
- }
- }
-
- /**
- * @param value
- * A double value.
- * @return Value arctangent, in radians, in [-PI/2,PI/2].
- */
- public static double atan(double value)
- {
- if (USE_JDK_MATH)
- {
- return STRICT_MATH ? StrictMath.atan(value) : Math.atan(value);
- }
- boolean negateResult;
- if (value < 0.0)
- {
- value = -value;
- negateResult = true;
- }
- else
- {
- negateResult = false;
- }
- if (value == 1.0)
- {
- // We want "exact" result for 1.0.
- return negateResult ? -Math.PI / 4 : Math.PI / 4;
- }
- else if (value <= ATAN_MAX_VALUE_FOR_TABS)
- {
- int index = (int) (value * ATAN_INDEXER + 0.5);
- double delta = value - index * ATAN_DELTA;
- double result = atanTab[index]
- + delta
- * (atanDer1DivF1Tab[index] + delta
- * (atanDer2DivF2Tab[index] + delta
- * (atanDer3DivF3Tab[index] + delta * atanDer4DivF4Tab[index])));
- return negateResult ? -result : result;
- }
- else
- { // value > ATAN_MAX_VALUE_FOR_TABS, or value is NaN
- // This part is derived from fdlibm.
- if (value < TWO_POW_66)
- {
- double x = -1 / value;
- double x2 = x * x;
- double x4 = x2 * x2;
- double s1 = x2
- * (ATAN_AT0 + x4
- * (ATAN_AT2 + x4 * (ATAN_AT4 + x4 * (ATAN_AT6 + x4 * (ATAN_AT8 + x4 * ATAN_AT10)))));
- double s2 = x4 * (ATAN_AT1 + x4 * (ATAN_AT3 + x4 * (ATAN_AT5 + x4 * (ATAN_AT7 + x4 * ATAN_AT9))));
- double result = ATAN_HI3 - ((x * (s1 + s2) - ATAN_LO3) - x);
- return negateResult ? -result : result;
- }
- else
- { // value >= 2^66, or value is NaN
- if (Double.isNaN(value))
- {
- return Double.NaN;
- }
- else
- {
- return negateResult ? -Math.PI / 2 : Math.PI / 2;
- }
- }
- }
- }
-
- /**
- * For special values for which multiple conventions could be adopted,
- * behaves like Math.atan2(double,double).
- *
- * @param y
- * Coordinate on y axis.
- * @param x
- * Coordinate on x axis.
- * @return Angle from x axis positive side to (x,y) position, in radians, in
- * [-PI,PI]. Angle measure is positive when going from x axis to y
- * axis (positive sides).
- */
- public static double atan2(double y, double x)
- {
- if (USE_JDK_MATH)
- {
- return STRICT_MATH ? StrictMath.atan2(y, x) : Math.atan2(y, x);
- }
- if (x > 0.0)
- {
- if (y == 0.0)
- {
- return (1 / y == Double.NEGATIVE_INFINITY) ? -0.0 : 0.0;
- }
- if (x == Double.POSITIVE_INFINITY)
- {
- if (y == Double.POSITIVE_INFINITY)
- {
- return Math.PI / 4;
- }
- else if (y == Double.NEGATIVE_INFINITY)
- {
- return -Math.PI / 4;
- }
- else if (y > 0.0)
- {
- return 0.0;
- }
- else if (y < 0.0)
- {
- return -0.0;
- }
- else
- {
- return Double.NaN;
- }
- }
- else
- {
- return FastMath.atan(y / x);
- }
- }
- else if (x < 0.0)
- {
- if (y == 0.0)
- {
- return (1 / y == Double.NEGATIVE_INFINITY) ? -Math.PI : Math.PI;
- }
- if (x == Double.NEGATIVE_INFINITY)
- {
- if (y == Double.POSITIVE_INFINITY)
- {
- return 3 * Math.PI / 4;
- }
- else if (y == Double.NEGATIVE_INFINITY)
- {
- return -3 * Math.PI / 4;
- }
- else if (y > 0.0)
- {
- return Math.PI;
- }
- else if (y < 0.0)
- {
- return -Math.PI;
- }
- else
- {
- return Double.NaN;
- }
- }
- else if (y > 0.0)
- {
- return Math.PI / 2 + FastMath.atan(-x / y);
- }
- else if (y < 0.0)
- {
- return -Math.PI / 2 - FastMath.atan(x / y);
- }
- else
- {
- return Double.NaN;
- }
- }
- else if (x == 0.0)
- {
- if (y == 0.0)
- {
- if (1 / x == Double.NEGATIVE_INFINITY)
- {
- return (1 / y == Double.NEGATIVE_INFINITY) ? -Math.PI : Math.PI;
- }
- else
- {
- return (1 / y == Double.NEGATIVE_INFINITY) ? -0.0 : 0.0;
- }
- }
- if (y > 0.0)
- {
- return Math.PI / 2;
- }
- else if (y < 0.0)
- {
- return -Math.PI / 2;
- }
- else
- {
- return Double.NaN;
- }
- }
- else
- {
- return Double.NaN;
- }
- }
-
- /**
- * @param value
- * A double value.
- * @return Value hyperbolic cosine.
- */
- public static double cosh(double value)
- {
- if (USE_JDK_MATH)
- {
- return STRICT_MATH ? StrictMath.cosh(value) : Math.cosh(value);
- }
- // cosh(x) = (exp(x)+exp(-x))/2
- if (value < 0.0)
- {
- value = -value;
- }
- if (value < LOG_TWO_POW_27)
- {
- if (value < TWO_POW_N27)
- {
- // cosh(x)
- // = (exp(x)+exp(-x))/2
- // = ((1+x+x^2/2!+...) + (1-x+x^2/2!-...))/2
- // = 1+x^2/2!+x^4/4!+...
- // For value of x small in magnitude, the sum of the terms does
- // not add to 1.
- return 1;
- }
- else
- {
- double t = FastMath.exp(value);
- return 0.5 * (t + 1 / t);
- }
- }
- else if (value < LOG_DOUBLE_MAX_VALUE)
- {
- return 0.5 * FastMath.exp(value);
- }
- else
- {
- double t = FastMath.exp(value * 0.5);
- return (0.5 * t) * t;
- }
- }
-
- /**
- * @param value
- * A double value.
- * @return Value hyperbolic sine.
- */
- public static double sinh(double value)
- {
- if (USE_JDK_MATH)
- {
- return STRICT_MATH ? StrictMath.sinh(value) : Math.sinh(value);
- }
- // sinh(x) = (exp(x)-exp(-x))/2
- double h;
- if (value < 0.0)
- {
- value = -value;
- h = -0.5;
- }
- else
- {
- h = 0.5;
- }
- if (value < 22.0)
- {
- if (value < TWO_POW_N28)
- {
- return (h < 0.0) ? -value : value;
- }
- else
- {
- double t = FastMath.expm1(value);
- // Might be more accurate, if value < 1: return
- // h*((t+t)-t*t/(t+1.0)).
- return h * (t + t / (t + 1.0));
- }
- }
- else if (value < LOG_DOUBLE_MAX_VALUE)
- {
- return h * FastMath.exp(value);
- }
- else
- {
- double t = FastMath.exp(value * 0.5);
- return (h * t) * t;
- }
- }
-
- /**
- * @param value
- * A double value.
- * @return Value hyperbolic tangent.
- */
- public static double tanh(double value)
- {
- if (USE_JDK_MATH)
- {
- return STRICT_MATH ? StrictMath.tanh(value) : Math.tanh(value);
- }
- // tanh(x) = sinh(x)/cosh(x)
- // = (exp(x)-exp(-x))/(exp(x)+exp(-x))
- // = (exp(2*x)-1)/(exp(2*x)+1)
- boolean negateResult;
- if (value < 0.0)
- {
- value = -value;
- negateResult = true;
- }
- else
- {
- negateResult = false;
- }
- double z;
- if (value < 22.0)
- {
- if (value < TWO_POW_N55)
- {
- return negateResult ? -value * (1.0 - value) : value * (1.0 + value);
- }
- else if (value >= 1)
- {
- z = 1.0 - 2.0 / (FastMath.expm1(value + value) + 2.0);
- }
- else
- {
- double t = FastMath.expm1(-(value + value));
- z = -t / (t + 2.0);
- }
- }
- else
- {
- z = (value != value) ? Double.NaN : 1.0;
- }
- return negateResult ? -z : z;
- }
-
- /**
- * @param value
- * A double value.
- * @return e^value.
- */
- public static double exp(double value)
- {
- if (USE_JDK_MATH)
- {
- return STRICT_MATH ? StrictMath.exp(value) : Math.exp(value);
- }
- // exp(x) = exp([x])*exp(y)
- // with [x] the integer part of x, and y = x-[x]
- // ===>
- // We find an approximation of y, called z.
- // ===>
- // exp(x) = exp([x])*(exp(z)*exp(epsilon))
- // ===>
- // We have exp([x]) and exp(z) pre-computed in tables, we "just" have to
- // compute exp(epsilon).
- //
- // We use the same indexing (cast to int) to compute x integer part and
- // the
- // table index corresponding to z, to avoid two int casts.
- // Also, to optimize index multiplication and division, we use powers of
- // two,
- // so that we can do it with bits shifts.
- if (value >= 0.0)
- {
- if (value > EXP_OVERFLOW_LIMIT)
- {
- return Double.POSITIVE_INFINITY;
- }
- int i = (int) (value * EXP_LO_INDEXING);
- int valueInt = (i >> EXP_LO_INDEXING_DIV_SHIFT);
- i -= (valueInt << EXP_LO_INDEXING_DIV_SHIFT);
- double delta = (value - valueInt) - i * (1.0 / EXP_LO_INDEXING);
- return expHiTab[valueInt]
- * (expLoPosTab[i + EXP_LO_TAB_MID_INDEX] * (1 + delta
- * (1 + delta * (1.0 / 2 + delta * (1.0 / 6 + delta * (1.0 / 24))))));
- }
- else
- { // value < 0.0, or value is NaN
- if (!(value >= EXP_UNDERFLOW_LIMIT))
- { // value < EXP_UNDERFLOW_LIMIT, or value is NaN
- return (value < EXP_UNDERFLOW_LIMIT) ? 0.0 : Double.NaN;
- }
- // TODO JVM bug with -server option: test with values of all
- // magnitudes
- // is very slow, if using (int)x instead of -(int)-x or (int)(long)x
- // (which give the same result).
- // The guessed cause is that when the same expression is used to
- // define "i" in
- // both sides of the above "else", some (desastrous) optimization is
- // done which factorizes
- // it above the first "if" statement, making it computed all the
- // time, without the protecting "sub-ifs".
- // Since cast from double to int with huge values is extremely slow,
- // this makes this whole treatment extremely slow for huge values.
- // The solution is therefore to modify a bit the expression for the
- // "optimization" not to occur.
- int i = -(int) -(value * EXP_LO_INDEXING);
- int valueInt = -((-i) >> EXP_LO_INDEXING_DIV_SHIFT);
- i -= ((valueInt) << EXP_LO_INDEXING_DIV_SHIFT);
- double delta = (value - valueInt) - i * (1.0 / EXP_LO_INDEXING);
- double tmp = expHiInvTab[-valueInt]
- * (expLoPosTab[i + EXP_LO_TAB_MID_INDEX] * (1 + delta
- * (1 + delta * (1.0 / 2 + delta * (1.0 / 6 + delta * (1.0 / 24))))));
- // We took care not to compute with subnormal values.
- return (valueInt >= EXP_MIN_INT_LIMIT) ? tmp : tmp * TWO_POW_N54;
- }
- }
-
- /**
- * Quick exp, with a max relative error of about 3e-2 for |value| < 700.0 or
- * so, and no accuracy at all outside this range. Derived from a note by
- * Nicol N. Schraudolph, IDSIA, 1998.
- *
- * @param value
- * A double value.
- * @return e^value.
- */
- public static double expQuick(double value)
- {
- if (USE_JDK_MATH)
- {
- return STRICT_MATH ? StrictMath.exp(value) : Math.exp(value);
- }
- /*
- * Cast of double values, even in long range, into long, is slower than
- * from double to int for values in int range, and then from int to
- * long. For that reason, we only work with integer values in int range
- * (corresponding to the 32 first bits of the long, containing sign,
- * exponent, and highest significant bits of double's mantissa), and
- * cast twice.
- */
- return Double.longBitsToDouble(((long) (int) (EXP_QUICK_A / (1L << 32) * value + (EXP_QUICK_B - EXP_QUICK_C)
- / (1L << 32))) << 32);
- }
-
- /**
- * Much more accurate than exp(value)-1, for values close to zero.
- *
- * @param value
- * A double value.
- * @return e^value-1.
- */
- public static double expm1(double value)
- {
- if (USE_JDK_MATH)
- {
- return STRICT_MATH ? StrictMath.expm1(value) : Math.expm1(value);
- }
- // If value is far from zero, we use exp(value)-1.
- //
- // If value is close to zero, we use the following formula:
- // exp(value)-1
- // = exp(valueApprox)*exp(epsilon)-1
- // = exp(valueApprox)*(exp(epsilon)-exp(-valueApprox))
- // = exp(valueApprox)*(1+epsilon+epsilon^2/2!+...-exp(-valueApprox))
- // = exp(valueApprox)*((1-exp(-valueApprox))+epsilon+epsilon^2/2!+...)
- // exp(valueApprox) and exp(-valueApprox) being stored in tables.
-
- if (Math.abs(value) < EXP_LO_DISTANCE_TO_ZERO)
- {
- // Taking int part instead of rounding, which takes too long.
- int i = (int) (value * EXP_LO_INDEXING);
- double delta = value - i * (1.0 / EXP_LO_INDEXING);
- return expLoPosTab[i + EXP_LO_TAB_MID_INDEX]
- * (expLoNegTab[i + EXP_LO_TAB_MID_INDEX] + delta
- * (1 + delta * (1.0 / 2 + delta * (1.0 / 6 + delta * (1.0 / 24 + delta * (1.0 / 120))))));
- }
- else
- {
- return FastMath.exp(value) - 1;
- }
- }
-
- /**
- * @param value
- * A double value.
- * @return Value logarithm (base e).
- */
- public static double log(double value)
- {
- if (value > 0.0)
- {
- if (value == Double.POSITIVE_INFINITY)
- {
- return Double.POSITIVE_INFINITY;
- }
-
- // For normal values not close to 1.0, we use the following formula:
- // log(value)
- // = log(2^exponent*1.mantissa)
- // = log(2^exponent) + log(1.mantissa)
- // = exponent * log(2) + log(1.mantissa)
- // = exponent * log(2) + log(1.mantissaApprox) +
- // log(1.mantissa/1.mantissaApprox)
- // = exponent * log(2) + log(1.mantissaApprox) + log(1+epsilon)
- // = exponent * log(2) + log(1.mantissaApprox) +
- // epsilon-epsilon^2/2+epsilon^3/3-epsilon^4/4+...
- // with:
- // 1.mantissaApprox <= 1.mantissa,
- // log(1.mantissaApprox) in table,
- // epsilon = (1.mantissa/1.mantissaApprox)-1
- //
- // To avoid bad relative error for small results,
- // values close to 1.0 are treated aside, with the formula:
- // log(x) = z*(2+z^2*((2.0/3)+z^2*((2.0/5))+z^2*((2.0/7))+...)))
- // with z=(x-1)/(x+1)
-
- double h;
- if (value > 0.95)
- {
- if (value < 1.14)
- {
- double z = (value - 1.0) / (value + 1.0);
- double z2 = z * z;
- return z
- * (2 + z2
- * ((2.0 / 3) + z2
- * ((2.0 / 5) + z2 * ((2.0 / 7) + z2 * ((2.0 / 9) + z2 * ((2.0 / 11)))))));
- }
- h = 0.0;
- }
- else if (value < MIN_DOUBLE_NORMAL)
- {
- // Ensuring value is normal.
- value *= TWO_POW_52;
- // log(x*2^52)
- // = log(x)-ln(2^52)
- // = log(x)-52*ln(2)
- h = -52 * LOG_2;
- }
- else
- {
- h = 0.0;
- }
-
- int valueBitsHi = (int) (Double.doubleToRawLongBits(value) >> 32);
- int valueExp = (valueBitsHi >> 20) - MAX_DOUBLE_EXPONENT;
- // Getting the first LOG_BITS bits of the mantissa.
- int xIndex = ((valueBitsHi << 12) >>> (32 - LOG_BITS));
-
- // 1.mantissa/1.mantissaApprox - 1
- double z = (value * twoPowTab[-valueExp - MIN_DOUBLE_EXPONENT]) * logXInvTab[xIndex] - 1;
-
- z *= (1 - z * ((1.0 / 2) - z * ((1.0 / 3))));
-
- return h + valueExp * LOG_2 + (logXLogTab[xIndex] + z);
-
- }
- else if (value == 0.0)
- {
- return Double.NEGATIVE_INFINITY;
- }
- else
- { // value < 0.0, or value is NaN
- return Double.NaN;
- }
- }
-
- /**
- * Quick log, with a max relative error of about 2.8e-4 for values in
- * ]0,+infinity[, and no accuracy at all outside this range.
- */
- public static double logQuick(double value)
- {
- if (USE_JDK_MATH)
- {
- return STRICT_MATH ? StrictMath.log(value) : Math.log(value);
- }
- /*
- * Inverse of Schraudolph's method for exp, is very inaccurate near 1,
- * and not that fast (even using floats), especially with added if's to
- * deal with values near 1, so we don't use it, and use a simplified
- * version of our log's redefined algorithm.
- */
-
- // Simplified version of log's redefined algorithm:
- // log(value) ~= exponent * log(2) + log(1.mantissaApprox)
-
- double h;
- if (value > 0.87)
- {
- if (value < 1.16)
- {
- return 2.0 * (value - 1.0) / (value + 1.0);
- }
- h = 0.0;
- }
- else if (value < MIN_DOUBLE_NORMAL)
- {
- value *= TWO_POW_52;
- h = -52 * LOG_2;
- }
- else
- {
- h = 0.0;
- }
-
- int valueBitsHi = (int) (Double.doubleToRawLongBits(value) >> 32);
- int valueExp = (valueBitsHi >> 20) - MAX_DOUBLE_EXPONENT;
- int xIndex = ((valueBitsHi << 12) >>> (32 - LOG_BITS));
-
- return h + valueExp * LOG_2 + logXLogTab[xIndex];
- }
-
- /**
- * @param value
- * A double value.
- * @return Value logarithm (base 10).
- */
- public static double log10(double value)
- {
- // INV_LOG_10 is < 1, but there is no risk of log(double)
- // overflow (positive or negative) while the end result shouldn't,
- // since log(Double.MIN_VALUE) and log(Double.MAX_VALUE) have
- // magnitudes of just a few hundreds.
- return log(value) * INV_LOG_10;
- }
-
- /**
- * Much more accurate than log(1+value), for values close to zero.
- *
- * @param value
- * A double value.
- * @return Logarithm (base e) of (1+value).
- */
- public static double log1p(double value)
- {
- if (USE_JDK_MATH)
- {
- return STRICT_MATH ? StrictMath.log1p(value) : Math.log1p(value);
- }
-
- if (value > -1.0)
- {
- if (value == Double.POSITIVE_INFINITY)
- {
- return Double.POSITIVE_INFINITY;
- }
-
- // ln'(x) = 1/x
- // so
- // log(x+epsilon) ~= log(x) + epsilon/x
- //
- // Let u be 1+value rounded:
- // 1+value = u+epsilon
- //
- // log(1+value)
- // = log(u+epsilon)
- // ~= log(u) + epsilon/value
- // We compute log(u) as done in log(double), and then add the
- // corrective term.
-
- double valuePlusOne = 1.0 + value;
- if (valuePlusOne == 1.0)
- {
- return value;
- }
- else if (Math.abs(value) < 0.15)
- {
- double z = value / (value + 2.0);
- double z2 = z * z;
- return z
- * (2 + z2
- * ((2.0 / 3) + z2
- * ((2.0 / 5) + z2 * ((2.0 / 7) + z2 * ((2.0 / 9) + z2 * ((2.0 / 11)))))));
- }
-
- int valuePlusOneBitsHi = (int) (Double.doubleToRawLongBits(valuePlusOne) >> 32) & 0x7FFFFFFF;
- int valuePlusOneExp = (valuePlusOneBitsHi >> 20) - MAX_DOUBLE_EXPONENT;
- // Getting the first LOG_BITS bits of the mantissa.
- int xIndex = ((valuePlusOneBitsHi << 12) >>> (32 - LOG_BITS));
-
- // 1.mantissa/1.mantissaApprox - 1
- double z = (valuePlusOne * twoPowTab[-valuePlusOneExp - MIN_DOUBLE_EXPONENT]) * logXInvTab[xIndex] - 1;
-
- z *= (1 - z * ((1.0 / 2) - z * (1.0 / 3)));
-
- // Adding epsilon/valuePlusOne to z,
- // with
- // epsilon = value - (valuePlusOne-1)
- // (valuePlusOne + epsilon ~= 1+value (not rounded))
-
- return valuePlusOneExp * LOG_2 + logXLogTab[xIndex] + (z + (value - (valuePlusOne - 1)) / valuePlusOne);
- }
- else if (value == -1.0)
- {
- return Double.NEGATIVE_INFINITY;
- }
- else
- { // value < -1.0, or value is NaN
- return Double.NaN;
- }
- }
-
- /**
- * 1e-13ish accuracy (or better) on whole double range.
- *
- * @param value
- * A double value.
- * @param power
- * A power.
- * @return value^power.
- */
- public static double pow(double value, double power)
- {
- if (USE_JDK_MATH)
- {
- return STRICT_MATH ? StrictMath.pow(value, power) : Math.pow(value, power);
- }
- if (power == 0.0)
- {
- return 1.0;
- }
- else if (power == 1.0)
- {
- return value;
- }
- if (value <= 0.0)
- {
- // powerInfo: 0 if not integer, 1 if even integer, -1 if odd integer
- int powerInfo;
- if (Math.abs(power) >= (TWO_POW_52 * 2))
- {
- // The binary digit just before comma is outside mantissa,
- // thus it is always 0: power is an even integer.
- powerInfo = 1;
- }
- else
- {
- // If power's magnitude permits, we cast into int instead of
- // into long,
- // as it is faster.
- if (Math.abs(power) <= (double) Integer.MAX_VALUE)
- {
- int powerAsInt = (int) power;
- if (power == (double) powerAsInt)
- {
- powerInfo = ((powerAsInt & 1) == 0) ? 1 : -1;
- }
- else
- { // power is not an integer (and not NaN, due to test
- // against Integer.MAX_VALUE)
- powerInfo = 0;
- }
- }
- else
- {
- long powerAsLong = (long) power;
- if (power == (double) powerAsLong)
- {
- powerInfo = ((powerAsLong & 1) == 0) ? 1 : -1;
- }
- else
- { // power is not an integer, or is NaN
- if (power != power)
- {
- return Double.NaN;
- }
- powerInfo = 0;
- }
- }
- }
-
- if (value == 0.0)
- {
- if (power < 0.0)
- {
- return (powerInfo < 0) ? 1 / value : Double.POSITIVE_INFINITY;
- }
- else
- { // power > 0.0 (0 and NaN cases already treated)
- return (powerInfo < 0) ? value : 0.0;
- }
- }
- else
- { // value < 0.0
- if (value == Double.NEGATIVE_INFINITY)
- {
- if (powerInfo < 0)
- { // power odd integer
- return (power < 0.0) ? -0.0 : Double.NEGATIVE_INFINITY;
- }
- else
- { // power even integer, or not an integer
- return (power < 0.0) ? 0.0 : Double.POSITIVE_INFINITY;
- }
- }
- else
- {
- return (powerInfo != 0) ? powerInfo * FastMath.exp(power * FastMath.log(-value)) : Double.NaN;
- }
- }
- }
- else
- { // value > 0.0, or value is NaN
- return FastMath.exp(power * FastMath.log(value));
- }
- }
-
- /**
- * Quick pow, with a max relative error of about 3.5e-2 for |a^b| < 1e10, of
- * about 0.17 for |a^b| < 1e50, and worse accuracy above.
- *
- * @param value
- * A double value, in ]0,+infinity[ (strictly positive and
- * finite).
- * @param power
- * A double value.
- * @return value^power.
- */
- public static double powQuick(double value, double power)
- {
- if (USE_JDK_MATH)
- {
- return STRICT_MATH ? StrictMath.pow(value, power) : Math.pow(value, power);
- }
- return FastMath.exp(power * FastMath.logQuick(value));
- }
-
- /**
- * This treatment is somehow accurate for low values of |power|, and for
- * |power*getExponent(value)| < 1023 or so (to stay away from double extreme
- * magnitudes (large and small)).
- *
- * @param value
- * A double value.
- * @param power
- * A power.
- * @return value^power.
- */
- public static double powFast(double value, int power)
- {
- if (USE_JDK_MATH)
- {
- return STRICT_MATH ? StrictMath.pow(value, power) : Math.pow(value, power);
- }
- if (power > 5)
- { // Most common case first.
- double oddRemains = 1.0;
- do
- {
- // Test if power is odd.
- if ((power & 1) != 0)
- {
- oddRemains *= value;
- }
- value *= value;
- power >>= 1; // power = power / 2
- }
- while (power > 5);
- // Here, power is in [3,5]: faster to finish outside the loop.
- if (power == 3)
- {
- return oddRemains * value * value * value;
- }
- else
- {
- double v2 = value * value;
- if (power == 4)
- {
- return oddRemains * v2 * v2;
- }
- else
- { // power == 5
- return oddRemains * v2 * v2 * value;
- }
- }
- }
- else if (power >= 0)
- { // power in [0,5]
- if (power < 3)
- { // power in [0,2]
- if (power == 2)
- { // Most common case first.
- return value * value;
- }
- else if (power != 0)
- { // faster than == 1
- return value;
- }
- else
- { // power == 0
- return 1.0;
- }
- }
- else
- { // power in [3,5]
- if (power == 3)
- {
- return value * value * value;
- }
- else
- { // power in [4,5]
- double v2 = value * value;
- if (power == 4)
- {
- return v2 * v2;
- }
- else
- { // power == 5
- return v2 * v2 * value;
- }
- }
- }
- }
- else
- { // power < 0
- // Opposite of Integer.MIN_VALUE does not exist as int.
- if (power == Integer.MIN_VALUE)
- {
- // Integer.MAX_VALUE = -(power+1)
- return 1.0 / (FastMath.powFast(value, Integer.MAX_VALUE) * value);
- }
- else
- {
- return 1.0 / FastMath.powFast(value, -power);
- }
- }
- }
-
- /**
- * Returns the exact result, provided it's in double range.
- *
- * @param power
- * A power.
- * @return 2^power.
- */
- public static double twoPow(int power)
- {
- /*
- * Using table, to go faster than NumbersUtils.twoPow(int).
- */
- if (power >= 0)
- {
- if (power <= MAX_DOUBLE_EXPONENT)
- {
- return twoPowTab[power - MIN_DOUBLE_EXPONENT];
- }
- else
- {
- // Overflow.
- return Double.POSITIVE_INFINITY;
- }
- }
- else
- {
- if (power >= MIN_DOUBLE_EXPONENT)
- {
- return twoPowTab[power - MIN_DOUBLE_EXPONENT];
- }
- else
- {
- // Underflow.
- return 0.0;
- }
- }
- }
-
- /**
- * @param value
- * A double value.
- * @return Value square root.
- */
- @SuppressWarnings("unused")
- public static double sqrt(double value)
- {
- if (USE_JDK_MATH || (!USE_REDEFINED_SQRT))
- {
- return STRICT_MATH ? StrictMath.sqrt(value) : Math.sqrt(value);
- }
- // See cbrt for comments, sqrt uses the same ideas.
-
- if (!(value > 0.0))
- { // value <= 0.0, or value is NaN
- return (value == 0.0) ? value : Double.NaN;
- }
- else if (value == Double.POSITIVE_INFINITY)
- {
- return Double.POSITIVE_INFINITY;
- }
-
- double h;
- if (value < MIN_DOUBLE_NORMAL)
- {
- value *= TWO_POW_52;
- h = 2 * TWO_POW_N26;
- }
- else
- {
- h = 2.0;
- }
-
- int valueBitsHi = (int) (Double.doubleToRawLongBits(value) >> 32);
- int valueExponentIndex = (valueBitsHi >> 20) + (-MAX_DOUBLE_EXPONENT - MIN_DOUBLE_EXPONENT);
- int xIndex = ((valueBitsHi << 12) >>> (32 - SQRT_LO_BITS));
-
- double result = sqrtXSqrtHiTab[valueExponentIndex] * sqrtXSqrtLoTab[xIndex];
- double slope = sqrtSlopeHiTab[valueExponentIndex] * sqrtSlopeLoTab[xIndex];
- value *= 0.25;
-
- result += (value - result * result) * slope;
- result += (value - result * result) * slope;
- return h * (result + (value - result * result) * slope);
- }
-
- /**
- * @param value
- * A double value.
- * @return Value cubic root.
- */
- public static double cbrt(double value)
- {
- if (USE_JDK_MATH)
- {
- return STRICT_MATH ? StrictMath.cbrt(value) : Math.cbrt(value);
- }
- double h;
- if (value < 0.0)
- {
- if (value == Double.NEGATIVE_INFINITY)
- {
- return Double.NEGATIVE_INFINITY;
- }
- value = -value;
- // Making sure value is normal.
- if (value < MIN_DOUBLE_NORMAL)
- {
- value *= (TWO_POW_52 * TWO_POW_26);
- // h = <result_sign> * <result_multiplicator_to_avoid_overflow>
- // / <cbrt(value_multiplicator_to_avoid_subnormal)>
- h = -2 * TWO_POW_N26;
- }
- else
- {
- h = -2.0;
- }
- }
- else
- {
- if (!(value < Double.POSITIVE_INFINITY))
- { // value is +infinity, or value is NaN
- return value;
- }
- // Making sure value is normal.
- if (value < MIN_DOUBLE_NORMAL)
- {
- if (value == 0.0)
- {
- // cbrt(0.0) = 0.0, cbrt(-0.0) = -0.0
- return value;
- }
- value *= (TWO_POW_52 * TWO_POW_26);
- h = 2 * TWO_POW_N26;
- }
- else
- {
- h = 2.0;
- }
- }
-
- // Normal value is (2^<value exponent> * <a value in [1,2[>).
- // First member cubic root is computed, and multiplied with an
- // approximation
- // of the cubic root of the second member, to end up with a good guess
- // of
- // the result before using Newton's (or Archimedes's) method.
- // To compute the cubic root approximation, we use the formula
- // "cbrt(value) = cbrt(x) * cbrt(value/x)",
- // choosing x as close to value as possible but inferior to it, so that
- // cbrt(value/x) is close to 1
- // (we could iterate on this method, using value/x as new value for each
- // iteration,
- // but finishing with Newton's method is faster).
-
- // Shift and cast into an int, which overall is faster than working with
- // a long.
- int valueBitsHi = (int) (Double.doubleToRawLongBits(value) >> 32);
- int valueExponentIndex = (valueBitsHi >> 20) + (-MAX_DOUBLE_EXPONENT - MIN_DOUBLE_EXPONENT);
- // Getting the first CBRT_LO_BITS bits of the mantissa.
- int xIndex = ((valueBitsHi << 12) >>> (32 - CBRT_LO_BITS));
- double result = cbrtXCbrtHiTab[valueExponentIndex] * cbrtXCbrtLoTab[xIndex];
- double slope = cbrtSlopeHiTab[valueExponentIndex] * cbrtSlopeLoTab[xIndex];
-
- // Lowering values to avoid overflows when using Newton's method
- // (we will then just have to return twice the result).
- // result^3 = value
- // (result/2)^3 = value/8
- value *= 0.125;
- // No need to divide result here, as division is factorized in result
- // computation tables.
- // result *= 0.5;
-
- // Newton's method, looking for y = x^(1/p):
- // y(n) = y(n-1) + (x-y(n-1)^p) * slope(y(n-1))
- // y(n) = y(n-1) + (x-y(n-1)^p) * (1/p)*(x(n-1)^(1/p-1))
- // y(n) = y(n-1) + (x-y(n-1)^p) * (1/p)*(x(n-1)^((1-p)/p))
- // with x(n-1)=y(n-1)^p, i.e.:
- // y(n) = y(n-1) + (x-y(n-1)^p) * (1/p)*(y(n-1)^(1-p))
- //
- // For p=3:
- // y(n) = y(n-1) + (x-y(n-1)^3) * (1/(3*y(n-1)^2))
-
- // To save time, we don't recompute the slope between Newton's method
- // steps,
- // as initial slope is good enough for a few iterations.
- //
- // NB: slope = 1/(3*trueResult*trueResult)
- // As we have result = trueResult/2 (to avoid overflows), we have:
- // slope = 4/(3*result*result)
- // = (4/3)*resultInv*resultInv
- // with newResultInv = 1/newResult
- // = 1/(oldResult+resultDelta)
- // = (oldResultInv)*1/(1+resultDelta/oldResult)
- // = (oldResultInv)*1/(1+resultDelta*oldResultInv)
- // ~= (oldResultInv)*(1-resultDelta*oldResultInv)
- // ===> Successive slopes could be computed without division, if needed,
- // by computing resultInv (instead of slope right away) and retrieving
- // slopes from it.
-
- result += (value - result * result * result) * slope;
- result += (value - result * result * result) * slope;
- return h * (result + (value - result * result * result) * slope);
- }
-
- /**
- * Returns dividend - divisor * n, where n is the mathematical integer
- * closest to dividend/divisor. If dividend/divisor is equally close to
- * surrounding integers, we choose n to be the integer of smallest
- * magnitude, which makes this treatment differ from
- * Math.IEEEremainder(double,double), where n is chosen to be the even
- * integer. Note that the choice of n is not done considering the double
- * approximation of dividend/divisor, because it could cause result to be
- * outside [-|divisor|/2,|divisor|/2] range. The practical effect is that if
- * multiple results would be possible, we always choose the result that is
- * the closest to (and has the same sign as) the dividend. Ex. : - for
- * (-3.0,2.0), this method returns -1.0, whereas Math.IEEEremainder returns
- * 1.0. - for (-5.0,2.0), both this method and Math.IEEEremainder return
- * -1.0.
- *
- * If the remainder is zero, its sign is the same as the sign of the first
- * argument. If either argument is NaN, or the first argument is infinite,
- * or the second argument is positive zero or negative zero, then the result
- * is NaN. If the first argument is finite and the second argument is
- * infinite, then the result is the same as the first argument.
- *
- * NB: - Modulo operator (%) returns a value in ]-|divisor|,|divisor|[,
- * which sign is the same as dividend. - As for modulo operator, the sign of
- * the divisor has no effect on the result.
- *
- * @param dividend
- * Dividend.
- * @param divisor
- * Divisor.
- * @return Remainder of dividend/divisor, i.e. a value in
- * [-|divisor|/2,|divisor|/2].
- */
- public static double remainder(double dividend, double divisor)
- {
- if (USE_JDK_MATH)
- {
- // no Math equivalent (differs from IEEEremainder(double,double))
- }
- if (Double.isInfinite(divisor))
- {
- if (Double.isInfinite(dividend))
- {
- return Double.NaN;
- }
- else
- {
- return dividend;
- }
- }
- double value = dividend % divisor;
- if (Math.abs(value + value) > Math.abs(divisor))
- {
- return value + ((value > 0.0) ? -Math.abs(divisor) : Math.abs(divisor));
- }
- else
- {
- return value;
- }
- }
-
- /**
- * @param angle
- * Angle in radians.
- * @return The same angle, in radians, but in [-Math.PI,Math.PI].
- */
- public static double normalizeMinusPiPi(double angle)
- {
- // Not modifying values in output range.
- if ((angle >= -Math.PI) && (angle <= Math.PI))
- {
- return angle;
- }
- double angleMinusPiPiOrSo = remainderTwoPi(angle);
- if (angleMinusPiPiOrSo < -Math.PI)
- {
- return -Math.PI;
- }
- else if (angleMinusPiPiOrSo > Math.PI)
- {
- return Math.PI;
- }
- else
- {
- return angleMinusPiPiOrSo;
- }
- }
-
- /**
- * Not accurate for large values.
- *
- * @param angle
- * Angle in radians.
- * @return The same angle, in radians, but in [-Math.PI,Math.PI].
- */
- public static double normalizeMinusPiPiFast(double angle)
- {
- // Not modifying values in output range.
- if ((angle >= -Math.PI) && (angle <= Math.PI))
- {
- return angle;
- }
- double angleMinusPiPiOrSo = remainderTwoPiFast(angle);
- if (angleMinusPiPiOrSo < -Math.PI)
- {
- return -Math.PI;
- }
- else if (angleMinusPiPiOrSo > Math.PI)
- {
- return Math.PI;
- }
- else
- {
- return angleMinusPiPiOrSo;
- }
- }
-
- /**
- * @param angle
- * Angle in radians.
- * @return The same angle, in radians, but in [0,2*Math.PI].
- */
- public static double normalizeZeroTwoPi(double angle)
- {
- // Not modifying values in output range.
- if ((angle >= 0.0) && (angle <= 2 * Math.PI))
- {
- return angle;
- }
- double angleMinusPiPiOrSo = remainderTwoPi(angle);
- if (angleMinusPiPiOrSo < 0.0)
- {
- // Not a problem if angle is slightly < -Math.PI,
- // since result ends up around PI, which is not near output range
- // borders.
- return angleMinusPiPiOrSo + 2 * Math.PI;
- }
- else
- {
- // Not a problem if angle is slightly > Math.PI,
- // since result ends up around PI, which is not near output range
- // borders.
- return angleMinusPiPiOrSo;
- }
- }
-
- /**
- * Not accurate for large values.
- *
- * @param angle
- * Angle in radians.
- * @return The same angle, in radians, but in [0,2*Math.PI].
- */
- public static double normalizeZeroTwoPiFast(double angle)
- {
- // Not modifying values in output range.
- if ((angle >= 0.0) && (angle <= 2 * Math.PI))
- {
- return angle;
- }
- double angleMinusPiPiOrSo = remainderTwoPiFast(angle);
- if (angleMinusPiPiOrSo < 0.0)
- {
- // Not a problem if angle is slightly < -Math.PI,
- // since result ends up around PI, which is not near output range
- // borders.
- return angleMinusPiPiOrSo + 2 * Math.PI;
- }
- else
- {
- // Not a problem if angle is slightly > Math.PI,
- // since result ends up around PI, which is not near output range
- // borders.
- return angleMinusPiPiOrSo;
- }
- }
-
- /**
- * @param angle
- * Angle in radians.
- * @return Angle value modulo PI, in radians, in [-Math.PI/2,Math.PI/2].
- */
- public static double normalizeMinusHalfPiHalfPi(double angle)
- {
- // Not modifying values in output range.
- if ((angle >= -Math.PI / 2) && (angle <= Math.PI / 2))
- {
- return angle;
- }
- double angleMinusPiPiOrSo = remainderTwoPi(angle);
- if (angleMinusPiPiOrSo < -Math.PI / 2)
- {
- // Not a problem if angle is slightly < -Math.PI,
- // since result ends up around zero, which is not near output range
- // borders.
- return angleMinusPiPiOrSo + Math.PI;
- }
- else if (angleMinusPiPiOrSo > Math.PI / 2)
- {
- // Not a problem if angle is slightly > Math.PI,
- // since result ends up around zero, which is not near output range
- // borders.
- return angleMinusPiPiOrSo - Math.PI;
- }
- else
- {
- return angleMinusPiPiOrSo;
- }
- }
-
- /**
- * Not accurate for large values.
- *
- * @param angle
- * Angle in radians.
- * @return Angle value modulo PI, in radians, in [-Math.PI/2,Math.PI/2].
- */
- public static double normalizeMinusHalfPiHalfPiFast(double angle)
- {
- // Not modifying values in output range.
- if ((angle >= -Math.PI / 2) && (angle <= Math.PI / 2))
- {
- return angle;
- }
- double angleMinusPiPiOrSo = remainderTwoPiFast(angle);
- if (angleMinusPiPiOrSo < -Math.PI / 2)
- {
- // Not a problem if angle is slightly < -Math.PI,
- // since result ends up around zero, which is not near output range
- // borders.
- return angleMinusPiPiOrSo + Math.PI;
- }
- else if (angleMinusPiPiOrSo > Math.PI / 2)
- {
- // Not a problem if angle is slightly > Math.PI,
- // since result ends up around zero, which is not near output range
- // borders.
- return angleMinusPiPiOrSo - Math.PI;
- }
- else
- {
- return angleMinusPiPiOrSo;
- }
- }
-
- /**
- * Returns sqrt(x^2+y^2) without intermediate overflow or underflow.
- */
- public static double hypot(double x, double y)
- {
- if (USE_JDK_MATH)
- {
- return STRICT_MATH ? StrictMath.hypot(x, y) : Math.hypot(x, y);
- }
- x = Math.abs(x);
- y = Math.abs(y);
- if (y < x)
- {
- double a = x;
- x = y;
- y = a;
- }
- else if (!(y >= x))
- { // Testing if we have some NaN.
- if ((x == Double.POSITIVE_INFINITY) || (y == Double.POSITIVE_INFINITY))
- {
- return Double.POSITIVE_INFINITY;
- }
- else
- {
- return Double.NaN;
- }
- }
- if (y - x == y)
- { // x too small to substract from y
- return y;
- }
- else
- {
- double factor;
- if (x > TWO_POW_450)
- { // 2^450 < x < y
- x *= TWO_POW_N750;
- y *= TWO_POW_N750;
- factor = TWO_POW_750;
- }
- else if (y < TWO_POW_N450)
- { // x < y < 2^-450
- x *= TWO_POW_750;
- y *= TWO_POW_750;
- factor = TWO_POW_N750;
- }
- else
- {
- factor = 1.0;
- }
- return factor * FastMath.sqrt(x * x + y * y);
- }
- }
-
- /**
- * @param value
- * A float value.
- * @return Ceiling of value.
- */
- public static float ceil(float value)
- {
- if (USE_JDK_MATH)
- {
- // TODO use Math.ceil(float) if exists
- return (float) Math.ceil((double) value);
- }
- return -FastMath.floor(-value);
- }
-
- /**
- * Supposed to behave like Math.ceil(double), for safe interchangeability.
- *
- * @param value
- * A double value.
- * @return Ceiling of value.
- */
- public static double ceil(double value)
- {
- if (USE_JDK_MATH)
- {
- return Math.ceil(value);
- }
- return -FastMath.floor(-value);
- }
-
- /**
- * @param value
- * A float value.
- * @return Floor of value.
- */
- public static float floor(float value)
- {
- if (USE_JDK_MATH)
- {
- // TODO use Math.floor(float) if exists
- return (float) Math.floor((double) value);
- }
- int exp = FastMath.getExponent(value);
- if (exp < 0)
- {
- if (value < 0.0f)
- {
- return -1.0f;
- }
- else
- { // value in [0.0f,1.0f[
- return 0.0f * value; // 0.0f, or -0.0f if value is -0.0f
- }
- }
- else
- {
- if (exp < 24)
- {
- int valueBits = Float.floatToRawIntBits(value);
- int anteCommaDigits = valueBits & (0xFF800000 >> exp);
- if ((value < 0.0f) && (anteCommaDigits != valueBits))
- {
- return Float.intBitsToFloat(anteCommaDigits) - 1.0f;
- }
- else
- {
- return Float.intBitsToFloat(anteCommaDigits);
- }
- }
- else
- {
- return value;
- }
- }
- }
-
- /**
- * Supposed to behave like Math.floor(double), for safe interchangeability.
- *
- * @param value
- * A double value.
- * @return Floor of value.
- */
- public static double floor(double value)
- {
- if (USE_JDK_MATH)
- {
- return Math.floor(value);
- }
- // Faster than to work directly on bits.
- if (Math.abs(value) <= (double) Integer.MAX_VALUE)
- {
- if (value > 0.0)
- {
- return (double) (int) value;
- }
- else if (value < 0.0)
- {
- double anteCommaDigits = (double) (int) value;
- if (value != anteCommaDigits)
- {
- return anteCommaDigits - 1.0;
- }
- else
- {
- return anteCommaDigits;
- }
- }
- else
- { // value is +-0.0 (not NaN due to test against Integer.MAX_VALUE)
- return value;
- }
- }
- else if (Math.abs(value) < TWO_POW_52)
- {
- // We split the value in two:
- // high part, which is a mathematical integer,
- // and the rest, for which we can get rid of the
- // post comma digits by casting into an int.
- double highPart = ((int) (value * TWO_POW_N26)) * TWO_POW_26;
- if (value > 0.0)
- {
- return highPart + (double) ((int) (value - highPart));
- }
- else
- {
- double anteCommaDigits = highPart + (double) ((int) (value - highPart));
- if (value != anteCommaDigits)
- {
- return anteCommaDigits - 1.0;
- }
- else
- {
- return anteCommaDigits;
- }
- }
- }
- else
- { // abs(value) >= 2^52, or value is NaN
- return value;
- }
- }
-
- /**
- * Supposed to behave like Math.round(float), for safe interchangeability.
- *
- * @param value
- * A double value.
- * @return Value rounded to nearest int.
- */
- public static int round(float value)
- {
- if (USE_JDK_MATH)
- {
- return Math.round(value);
- }
- // "return (int)FastMath.floor((float)(value+0.5));" would be more
- // accurate for values in [8388609.0f,16777216.0f]
- // (i.e. [0x800001,0x1000000]), but would not give same results than
- // Math.round(float).
- return (int) FastMath.floor(value + 0.5f);
- }
-
- /**
- * Supposed to behave like Math.round(double), for safe interchangeability.
- *
- * @param value
- * A double value.
- * @return Value rounded to nearest long.
- */
- public static long round(double value)
- {
- if (USE_JDK_MATH)
- {
- return Math.round(value);
- }
- // Would be more coherent with rint, to call rint(double) instead of
- // floor(double), but that would not give same results than
- // Math.round(double).
- double roundedValue = FastMath.floor(value + 0.5);
- if (Math.abs(roundedValue) <= (double) Integer.MAX_VALUE)
- {
- // Faster with intermediary cast in int.
- return (long) (int) roundedValue;
- }
- else
- {
- return (long) roundedValue;
- }
- }
-
- /**
- * @param value
- * A float value.
- * @return Value unbiased exponent.
- */
- public static int getExponent(float value)
- {
- if (USE_JDK_MATH)
- {
- return Math.getExponent(value);
- }
- return ((Float.floatToRawIntBits(value) >> 23) & 0xFF) - MAX_FLOAT_EXPONENT;
- }
-
- /**
- * @param value
- * A double value.
- * @return Value unbiased exponent.
- */
- public static int getExponent(double value)
- {
- if (USE_JDK_MATH)
- {
- return Math.getExponent(value);
- }
- return (((int) (Double.doubleToRawLongBits(value) >> 52)) & 0x7FF) - MAX_DOUBLE_EXPONENT;
- }
-
- /**
- * Gives same result as Math.toDegrees for some particular values like
- * Math.PI/2, Math.PI or 2*Math.PI, but is faster (no division).
- *
- * @param angrad
- * Angle value in radians.
- * @return Angle value in degrees.
- */
- public static double toDegrees(double angrad)
- {
- if (USE_JDK_MATH)
- {
- return Math.toDegrees(angrad);
- }
- return angrad * (180 / Math.PI);
- }
-
- /**
- * Gives same result as Math.toRadians for some particular values like 90.0,
- * 180.0 or 360.0, but is faster (no division).
- *
- * @param angdeg
- * Angle value in degrees.
- * @return Angle value in radians.
- */
- public static double toRadians(double angdeg)
- {
- if (USE_JDK_MATH)
- {
- return Math.toRadians(angdeg);
- }
- return angdeg * (Math.PI / 180);
- }
-
- /**
- * @param sign
- * Sign of the angle: true for positive, false for negative.
- * @param degrees
- * Degrees, in [0,180].
- * @param minutes
- * Minutes, in [0,59].
- * @param seconds
- * Seconds, in [0.0,60.0[.
- * @return Angle in radians.
- */
- public static double toRadians(boolean sign, int degrees, int minutes, double seconds)
- {
- return FastMath.toRadians(FastMath.toDegrees(sign, degrees, minutes, seconds));
- }
-
- /**
- * @param sign
- * Sign of the angle: true for positive, false for negative.
- * @param degrees
- * Degrees, in [0,180].
- * @param minutes
- * Minutes, in [0,59].
- * @param seconds
- * Seconds, in [0.0,60.0[.
- * @return Angle in degrees.
- */
- public static double toDegrees(boolean sign, int degrees, int minutes, double seconds)
- {
- double signFactor = sign ? 1.0 : -1.0;
- return signFactor * (degrees + (1.0 / 60) * (minutes + (1.0 / 60) * seconds));
- }
-
- /**
- * NB: Since 2*Math.PI < 2*PI, a span of 2*Math.PI does not mean full
- * angular range. ex.: isInClockwiseDomain(0.0, 2*Math.PI, -1e-20) returns
- * false. ---> For full angular range, use a span > 2*Math.PI, like 2*PI_SUP
- * constant of this class.
- *
- * @param startAngRad
- * An angle, in radians.
- * @param angSpanRad
- * An angular span, >= 0.0, in radians.
- * @param angRad
- * An angle, in radians.
- * @return True if angRad is in the clockwise angular domain going from
- * startAngRad, over angSpanRad, extremities included, false
- * otherwise.
- */
- public static boolean isInClockwiseDomain(double startAngRad, double angSpanRad, double angRad)
- {
- if (Math.abs(angRad) < -TWO_MATH_PI_IN_MINUS_PI_PI)
- {
- // special case for angular values of small magnitude
- if (angSpanRad < 0.0)
- {
- // empty domain
- return false;
- }
- else if (angSpanRad <= 2 * Math.PI)
- { // angSpanRad is in [0.0,2*Math.PI]
- startAngRad = FastMath.normalizeMinusPiPi(startAngRad);
- double endAngRad = FastMath.normalizeMinusPiPi(startAngRad + angSpanRad);
- //
- if (startAngRad <= endAngRad)
- {
- return (angRad >= startAngRad) && (angRad <= endAngRad);
- }
- else
- {
- return (angRad >= startAngRad) || (angRad <= endAngRad);
- }
- }
- else if (angSpanRad != angSpanRad)
- { // angSpanRad is NaN
- return false;
- }
- else
- { // angSpanRad > 2*Math.PI
- // we know angRad is not NaN, due to a previous test
- return true;
- }
- }
- else
- {
- // general case
- return (FastMath.normalizeZeroTwoPi(angRad - startAngRad) <= angSpanRad);
- }
- }
-
- public static final double E = Math.E;
- public static final double PI = Math.PI;
-
- public static double abs(double a)
- {
- return Math.abs(a);
- }
-
- public static float abs(float a)
- {
- return Math.abs(a);
- }
-
- public static long abs(long a)
- {
- return Math.abs(a);
- }
-
- public static double copySign(double magnitude, double sign)
- {
- return Math.copySign(magnitude, sign);
- }
-
- public static float copySign(float magnitude, float sign)
- {
- return Math.copySign(magnitude, sign);
- }
-
- public static double IEEEremainder(double f1, double f2)
- {
- return Math.IEEEremainder(f1, f2);
- }
-
- public static double max(double a, double b)
- {
- return Math.max(a, b);
- }
-
- public static float max(float a, float b)
- {
- return Math.max(a, b);
- }
-
- public static int max(int a, int b)
- {
- return Math.max(a, b);
- }
-
- public static long max(long a, long b)
- {
- return Math.max(a, b);
- }
-
- public static double min(double a, double b)
- {
- return Math.min(a, b);
- }
-
- public static float min(float a, float b)
- {
- return Math.min(a, b);
- }
-
- public static int min(int a, int b)
- {
- return Math.min(a, b);
- }
-
- public static long min(long a, long b)
- {
- return Math.min(a, b);
- }
-
- public static double nextAfter(double start, double direction)
- {
- return Math.nextAfter(start, direction);
- }
-
- public static float nextAfter(float start, float direction)
- {
- return Math.nextAfter(start, direction);
- }
-
- public static double nextUp(double d)
- {
- return Math.nextUp(d);
- }
-
- public static float nextUp(float f)
- {
- return Math.nextUp(f);
- }
-
- public static double random()
- {
- // StrictMath and Math use different RNG instances,
- // so their random() methods are not equivalent.
- return STRICT_MATH ? StrictMath.random() : Math.random();
- }
-
- public static double rint(double a)
- {
- return Math.rint(a);
- }
-
- public static double scalb(double d, int scaleFactor)
- {
- return Math.scalb(d, scaleFactor);
- }
-
- public static float scalb(float f, int scaleFactor)
- {
- return Math.scalb(f, scaleFactor);
- }
-
- public static double signum(double d)
- {
- return Math.signum(d);
- }
-
- public static float signum(float f)
- {
- return Math.signum(f);
- }
-
- public static double ulp(double d)
- {
- return Math.ulp(d);
- }
-
- public static float ulp(float f)
- {
- return Math.ulp(f);
- }
-
- // --------------------------------------------------------------------------
- // PRIVATE TREATMENTS
- // --------------------------------------------------------------------------
-
- /**
- * FastMath is non-instantiable.
- */
- private FastMath()
- {
- }
-
- /**
- * Use look-up tables size power through this method, to make sure is it
- * small in case java.lang.Math is directly used.
- */
- private static int getTabSizePower(int tabSizePower)
- {
- return USE_JDK_MATH ? Math.min(2, tabSizePower) : tabSizePower;
- }
-
- /**
- * Remainder using an accurate definition of PI. Derived from a fdlibm
- * treatment called __ieee754_rem_pio2.
- *
- * This method can return values slightly (like one ULP or so) outside
- * [-Math.PI,Math.PI] range.
- *
- * @param angle
- * Angle in radians.
- * @return Remainder of (angle % (2*PI)), which is in [-PI,PI] range.
- */
- private static double remainderTwoPi(double angle)
- {
- if (USE_JDK_MATH)
- {
- double y = STRICT_MATH ? StrictMath.sin(angle) : Math.sin(angle);
- double x = STRICT_MATH ? StrictMath.cos(angle) : Math.cos(angle);
- return STRICT_MATH ? StrictMath.atan2(y, x) : Math.atan2(y, x);
- }
- boolean negateResult;
- if (angle < 0.0)
- {
- negateResult = true;
- angle = -angle;
- }
- else
- {
- negateResult = false;
- }
- if (angle <= NORMALIZE_ANGLE_MAX_MEDIUM_DOUBLE)
- {
- double fn = (double) (int) (angle * INVTWOPI + 0.5);
- double result = (angle - fn * TWOPI_HI) - fn * TWOPI_LO;
- return negateResult ? -result : result;
- }
- else if (angle < Double.POSITIVE_INFINITY)
- {
- // Reworking exponent to have a value < 2^24.
- long lx = Double.doubleToRawLongBits(angle);
- long exp = ((lx >> 52) & 0x7FF) - 1046;
- double z = Double.longBitsToDouble(lx - (exp << 52));
-
- double x0 = (double) ((int) z);
- z = (z - x0) * TWO_POW_24;
- double x1 = (double) ((int) z);
- double x2 = (z - x1) * TWO_POW_24;
-
- double result = subRemainderTwoPi(x0, x1, x2, (int) exp, (x2 == 0) ? 2 : 3);
- return negateResult ? -result : result;
- }
- else
- { // angle is +infinity or NaN
- return Double.NaN;
- }
- }
-
- /**
- * Not accurate for large values.
- *
- * This method can return values slightly (like one ULP or so) outside
- * [-Math.PI,Math.PI] range.
- *
- * @param angle
- * Angle in radians.
- * @return Remainder of (angle % (2*PI)), which is in [-PI,PI] range.
- */
- private static double remainderTwoPiFast(double angle)
- {
- if (USE_JDK_MATH)
- {
- return remainderTwoPi(angle);
- }
- boolean negateResult;
- if (angle < 0.0)
- {
- negateResult = true;
- angle = -angle;
- }
- else
- {
- negateResult = false;
- }
- // - We don't bother with values higher than (2*PI*(2^52)),
- // since they are spaced by 2*PI or more from each other.
- // - For large values, we don't use % because it might be very slow,
- // and we split computation in two, because cast from double to int
- // with large numbers might be very slow also.
- if (angle <= TWO_POW_26 * (2 * Math.PI))
- {
- double fn = (double) (int) (angle * INVTWOPI + 0.5);
- double result = (angle - fn * TWOPI_HI) - fn * TWOPI_LO;
- return negateResult ? -result : result;
- }
- else if (angle <= TWO_POW_52 * (2 * Math.PI))
- {
- // 1) Computing remainder of angle modulo TWO_POW_26*(2*PI).
- double fn = (double) (int) (angle * (INVTWOPI / TWO_POW_26) + 0.5);
- double result = (angle - fn * (TWOPI_HI * TWO_POW_26)) - fn * (TWOPI_LO * TWO_POW_26);
- // Here, result is in [-TWO_POW_26*Math.PI,TWO_POW_26*Math.PI].
- if (result < 0.0)
- {
- result = -result;
- negateResult = !negateResult;
- }
- // 2) Computing remainder of angle modulo 2*PI.
- fn = (double) (int) (result * INVTWOPI + 0.5);
- result = (result - fn * TWOPI_HI) - fn * TWOPI_LO;
- return negateResult ? -result : result;
- }
- else if (angle < Double.POSITIVE_INFINITY)
- {
- return 0.0;
- }
- else
- { // angle is +infinity or NaN
- return Double.NaN;
- }
- }
-
- /**
- * Remainder using an accurate definition of PI. Derived from a fdlibm
- * treatment called __kernel_rem_pio2.
- *
- * @param x0
- * Most significant part of the value, as an integer < 2^24, in
- * double precision format. Must be >= 0.
- * @param x1
- * Following significant part of the value, as an integer < 2^24,
- * in double precision format.
- * @param x2
- * Least significant part of the value, as an integer < 2^24, in
- * double precision format.
- * @param e0
- * Exponent of x0 (value is (2^e0)*(x0+(2^-24)*(x1+(2^-24)*x2))).
- * Must be >= -20.
- * @param nx
- * Number of significant parts to take into account. Must be 2 or
- * 3.
- * @return Remainder of (value % (2*PI)), which is in [-PI,PI] range.
- */
- private static double subRemainderTwoPi(double x0, double x1, double x2, int e0, int nx)
- {
- int ih;
- double z, fw;
- double f0, f1, f2, f3, f4, f5, f6 = 0.0, f7;
- double q0, q1, q2, q3, q4, q5;
- int iq0, iq1, iq2, iq3, iq4;
-
- final int jx = nx - 1; // jx in [1,2] (nx in [2,3])
- // Could use a table to avoid division, but the gain isn't worth it most
- // likely...
- final int jv = (e0 - 3) / 24; // We do not handle the case (e0-3 < -23).
- int q = e0 - ((jv << 4) + (jv << 3)) - 24; // e0-24*(jv+1)
-
- final int j = jv + 4;
- if (jx == 1)
- {
- f5 = (j >= 0) ? ONE_OVER_TWOPI_TAB[j] : 0.0;
- f4 = (j >= 1) ? ONE_OVER_TWOPI_TAB[j - 1] : 0.0;
- f3 = (j >= 2) ? ONE_OVER_TWOPI_TAB[j - 2] : 0.0;
- f2 = (j >= 3) ? ONE_OVER_TWOPI_TAB[j - 3] : 0.0;
- f1 = (j >= 4) ? ONE_OVER_TWOPI_TAB[j - 4] : 0.0;
- f0 = (j >= 5) ? ONE_OVER_TWOPI_TAB[j - 5] : 0.0;
-
- q0 = x0 * f1 + x1 * f0;
- q1 = x0 * f2 + x1 * f1;
- q2 = x0 * f3 + x1 * f2;
- q3 = x0 * f4 + x1 * f3;
- q4 = x0 * f5 + x1 * f4;
- }
- else
- { // jx == 2
- f6 = (j >= 0) ? ONE_OVER_TWOPI_TAB[j] : 0.0;
- f5 = (j >= 1) ? ONE_OVER_TWOPI_TAB[j - 1] : 0.0;
- f4 = (j >= 2) ? ONE_OVER_TWOPI_TAB[j - 2] : 0.0;
- f3 = (j >= 3) ? ONE_OVER_TWOPI_TAB[j - 3] : 0.0;
- f2 = (j >= 4) ? ONE_OVER_TWOPI_TAB[j - 4] : 0.0;
- f1 = (j >= 5) ? ONE_OVER_TWOPI_TAB[j - 5] : 0.0;
- f0 = (j >= 6) ? ONE_OVER_TWOPI_TAB[j - 6] : 0.0;
-
- q0 = x0 * f2 + x1 * f1 + x2 * f0;
- q1 = x0 * f3 + x1 * f2 + x2 * f1;
- q2 = x0 * f4 + x1 * f3 + x2 * f2;
- q3 = x0 * f5 + x1 * f4 + x2 * f3;
- q4 = x0 * f6 + x1 * f5 + x2 * f4;
- }
-
- z = q4;
- fw = (double) ((int) (TWO_POW_N24 * z));
- iq0 = (int) (z - TWO_POW_24 * fw);
- z = q3 + fw;
- fw = (double) ((int) (TWO_POW_N24 * z));
- iq1 = (int) (z - TWO_POW_24 * fw);
- z = q2 + fw;
- fw = (double) ((int) (TWO_POW_N24 * z));
- iq2 = (int) (z - TWO_POW_24 * fw);
- z = q1 + fw;
- fw = (double) ((int) (TWO_POW_N24 * z));
- iq3 = (int) (z - TWO_POW_24 * fw);
- z = q0 + fw;
-
- // Here, q is in [-25,2] range or so, so we can use the table right
- // away.
- double twoPowQ = twoPowTab[q - MIN_DOUBLE_EXPONENT];
-
- z = (z * twoPowQ) % 8.0;
- z -= (double) ((int) z);
- if (q > 0)
- {
- iq3 &= 0xFFFFFF >> q;
- ih = iq3 >> (23 - q);
- }
- else if (q == 0)
- {
- ih = iq3 >> 23;
- }
- else if (z >= 0.5)
- {
- ih = 2;
- }
- else
- {
- ih = 0;
- }
- if (ih > 0)
- {
- int carry;
- if (iq0 != 0)
- {
- carry = 1;
- iq0 = 0x1000000 - iq0;
- iq1 = 0x0FFFFFF - iq1;
- iq2 = 0x0FFFFFF - iq2;
- iq3 = 0x0FFFFFF - iq3;
- }
- else
- {
- if (iq1 != 0)
- {
- carry = 1;
- iq1 = 0x1000000 - iq1;
- iq2 = 0x0FFFFFF - iq2;
- iq3 = 0x0FFFFFF - iq3;
- }
- else
- {
- if (iq2 != 0)
- {
- carry = 1;
- iq2 = 0x1000000 - iq2;
- iq3 = 0x0FFFFFF - iq3;
- }
- else
- {
- if (iq3 != 0)
- {
- carry = 1;
- iq3 = 0x1000000 - iq3;
- }
- else
- {
- carry = 0;
- }
- }
- }
- }
- if (q > 0)
- {
- switch (q)
- {
- case 1:
- iq3 &= 0x7FFFFF;
- break;
- case 2:
- iq3 &= 0x3FFFFF;
- break;
- }
- }
- if (ih == 2)
- {
- z = 1.0 - z;
- if (carry != 0)
- {
- z -= twoPowQ;
- }
- }
- }
-
- if (z == 0.0)
- {
- if (jx == 1)
- {
- f6 = ONE_OVER_TWOPI_TAB[jv + 5];
- q5 = x0 * f6 + x1 * f5;
- }
- else
- { // jx == 2
- f7 = ONE_OVER_TWOPI_TAB[jv + 5];
- q5 = x0 * f7 + x1 * f6 + x2 * f5;
- }
-
- z = q5;
- fw = (double) ((int) (TWO_POW_N24 * z));
- iq0 = (int) (z - TWO_POW_24 * fw);
- z = q4 + fw;
- fw = (double) ((int) (TWO_POW_N24 * z));
- iq1 = (int) (z - TWO_POW_24 * fw);
- z = q3 + fw;
- fw = (double) ((int) (TWO_POW_N24 * z));
- iq2 = (int) (z - TWO_POW_24 * fw);
- z = q2 + fw;
- fw = (double) ((int) (TWO_POW_N24 * z));
- iq3 = (int) (z - TWO_POW_24 * fw);
- z = q1 + fw;
- fw = (double) ((int) (TWO_POW_N24 * z));
- iq4 = (int) (z - TWO_POW_24 * fw);
- z = q0 + fw;
-
- z = (z * twoPowQ) % 8.0;
- z -= (double) ((int) z);
- if (q > 0)
- {
- // some parentheses for Eclipse formatter's weaknesses with bits
- // shifts
- iq4 &= (0xFFFFFF >> q);
- ih = (iq4 >> (23 - q));
- }
- else if (q == 0)
- {
- ih = iq4 >> 23;
- }
- else if (z >= 0.5)
- {
- ih = 2;
- }
- else
- {
- ih = 0;
- }
- if (ih > 0)
- {
- if (iq0 != 0)
- {
- iq0 = 0x1000000 - iq0;
- iq1 = 0x0FFFFFF - iq1;
- iq2 = 0x0FFFFFF - iq2;
- iq3 = 0x0FFFFFF - iq3;
- iq4 = 0x0FFFFFF - iq4;
- }
- else
- {
- if (iq1 != 0)
- {
- iq1 = 0x1000000 - iq1;
- iq2 = 0x0FFFFFF - iq2;
- iq3 = 0x0FFFFFF - iq3;
- iq4 = 0x0FFFFFF - iq4;
- }
- else
- {
- if (iq2 != 0)
- {
- iq2 = 0x1000000 - iq2;
- iq3 = 0x0FFFFFF - iq3;
- iq4 = 0x0FFFFFF - iq4;
- }
- else
- {
- if (iq3 != 0)
- {
- iq3 = 0x1000000 - iq3;
- iq4 = 0x0FFFFFF - iq4;
- }
- else
- {
- if (iq4 != 0)
- {
- iq4 = 0x1000000 - iq4;
- }
- }
- }
- }
- }
- if (q > 0)
- {
- switch (q)
- {
- case 1:
- iq4 &= 0x7FFFFF;
- break;
- case 2:
- iq4 &= 0x3FFFFF;
- break;
- }
- }
- }
- fw = twoPowQ * TWO_POW_N24; // q -= 24, so initializing fw with
- // ((2^q)*(2^-24)=2^(q-24))
- }
- else
- {
- // Here, q is in [-25,-2] range or so, so we could use twoPow's
- // table right away with
- // iq4 = (int)(z*twoPowTab[-q-TWO_POW_TAB_MIN_POW]);
- // but tests show using division is faster...
- iq4 = (int) (z / twoPowQ);
- fw = twoPowQ;
- }
-
- q4 = fw * (double) iq4;
- fw *= TWO_POW_N24;
- q3 = fw * (double) iq3;
- fw *= TWO_POW_N24;
- q2 = fw * (double) iq2;
- fw *= TWO_POW_N24;
- q1 = fw * (double) iq1;
- fw *= TWO_POW_N24;
- q0 = fw * (double) iq0;
- fw *= TWO_POW_N24;
-
- fw = TWOPI_TAB0 * q4;
- fw += TWOPI_TAB0 * q3 + TWOPI_TAB1 * q4;
- fw += TWOPI_TAB0 * q2 + TWOPI_TAB1 * q3 + TWOPI_TAB2 * q4;
- fw += TWOPI_TAB0 * q1 + TWOPI_TAB1 * q2 + TWOPI_TAB2 * q3 + TWOPI_TAB3 * q4;
- fw += TWOPI_TAB0 * q0 + TWOPI_TAB1 * q1 + TWOPI_TAB2 * q2 + TWOPI_TAB3 * q3 + TWOPI_TAB4 * q4;
-
- return (ih == 0) ? fw : -fw;
- }
-
- // --------------------------------------------------------------------------
- // STATIC INITIALIZATIONS
- // --------------------------------------------------------------------------
-
- /**
- * Initializes look-up tables.
- *
- * Might use some FastMath methods in there, not to spend an hour in it, but
- * must take care not to use methods which look-up tables have not yet been
- * initialized, or that are not accurate enough.
- */
- static
- {
-
- // sin and cos
-
- final int SIN_COS_PI_INDEX = (SIN_COS_TABS_SIZE - 1) / 2;
- final int SIN_COS_PI_MUL_2_INDEX = 2 * SIN_COS_PI_INDEX;
- final int SIN_COS_PI_MUL_0_5_INDEX = SIN_COS_PI_INDEX / 2;
- final int SIN_COS_PI_MUL_1_5_INDEX = 3 * SIN_COS_PI_INDEX / 2;
- for (int i = 0; i < SIN_COS_TABS_SIZE; i++)
- {
- // angle: in [0,2*PI].
- double angle = i * SIN_COS_DELTA_HI + i * SIN_COS_DELTA_LO;
- double sinAngle = StrictMath.sin(angle);
- double cosAngle = StrictMath.cos(angle);
- // For indexes corresponding to null cosine or sine, we make sure
- // the value is zero
- // and not an epsilon. This allows for a much better accuracy for
- // results close to zero.
- if (i == SIN_COS_PI_INDEX)
- {
- sinAngle = 0.0;
- }
- else if (i == SIN_COS_PI_MUL_2_INDEX)
- {
- sinAngle = 0.0;
- }
- else if (i == SIN_COS_PI_MUL_0_5_INDEX)
- {
- cosAngle = 0.0;
- }
- else if (i == SIN_COS_PI_MUL_1_5_INDEX)
- {
- cosAngle = 0.0;
- }
- sinTab[i] = sinAngle;
- cosTab[i] = cosAngle;
- }
-
- // tan
-
- for (int i = 0; i < TAN_TABS_SIZE; i++)
- {
- // angle: in [0,TAN_MAX_VALUE_FOR_TABS].
- double angle = i * TAN_DELTA_HI + i * TAN_DELTA_LO;
- tanTab[i] = StrictMath.tan(angle);
- double cosAngle = StrictMath.cos(angle);
- double sinAngle = StrictMath.sin(angle);
- double cosAngleInv = 1 / cosAngle;
- double cosAngleInv2 = cosAngleInv * cosAngleInv;
- double cosAngleInv3 = cosAngleInv2 * cosAngleInv;
- double cosAngleInv4 = cosAngleInv2 * cosAngleInv2;
- double cosAngleInv5 = cosAngleInv3 * cosAngleInv2;
- tanDer1DivF1Tab[i] = cosAngleInv2;
- tanDer2DivF2Tab[i] = ((2 * sinAngle) * cosAngleInv3) * ONE_DIV_F2;
- tanDer3DivF3Tab[i] = ((2 * (1 + 2 * sinAngle * sinAngle)) * cosAngleInv4) * ONE_DIV_F3;
- tanDer4DivF4Tab[i] = ((8 * sinAngle * (2 + sinAngle * sinAngle)) * cosAngleInv5) * ONE_DIV_F4;
- }
-
- // asin
-
- for (int i = 0; i < ASIN_TABS_SIZE; i++)
- {
- // x: in [0,ASIN_MAX_VALUE_FOR_TABS].
- double x = i * ASIN_DELTA;
- asinTab[i] = StrictMath.asin(x);
- double oneMinusXSqInv = 1.0 / (1 - x * x);
- double oneMinusXSqInv0_5 = StrictMath.sqrt(oneMinusXSqInv);
- double oneMinusXSqInv1_5 = oneMinusXSqInv0_5 * oneMinusXSqInv;
- double oneMinusXSqInv2_5 = oneMinusXSqInv1_5 * oneMinusXSqInv;
- double oneMinusXSqInv3_5 = oneMinusXSqInv2_5 * oneMinusXSqInv;
- asinDer1DivF1Tab[i] = oneMinusXSqInv0_5;
- asinDer2DivF2Tab[i] = (x * oneMinusXSqInv1_5) * ONE_DIV_F2;
- asinDer3DivF3Tab[i] = ((1 + 2 * x * x) * oneMinusXSqInv2_5) * ONE_DIV_F3;
- asinDer4DivF4Tab[i] = ((5 + 2 * x * (2 + x * (5 - 2 * x))) * oneMinusXSqInv3_5) * ONE_DIV_F4;
- }
-
- if (USE_POWTABS_FOR_ASIN)
- {
- for (int i = 0; i < ASIN_POWTABS_SIZE; i++)
- {
- // x: in [0,ASIN_MAX_VALUE_FOR_POWTABS].
- double x = StrictMath.pow(i * (1.0 / ASIN_POWTABS_SIZE_MINUS_ONE), 1.0 / ASIN_POWTABS_POWER)
- * ASIN_MAX_VALUE_FOR_POWTABS;
- asinParamPowTab[i] = x;
- asinPowTab[i] = StrictMath.asin(x);
- double oneMinusXSqInv = 1.0 / (1 - x * x);
- double oneMinusXSqInv0_5 = StrictMath.sqrt(oneMinusXSqInv);
- double oneMinusXSqInv1_5 = oneMinusXSqInv0_5 * oneMinusXSqInv;
- double oneMinusXSqInv2_5 = oneMinusXSqInv1_5 * oneMinusXSqInv;
- double oneMinusXSqInv3_5 = oneMinusXSqInv2_5 * oneMinusXSqInv;
- asinDer1DivF1PowTab[i] = oneMinusXSqInv0_5;
- asinDer2DivF2PowTab[i] = (x * oneMinusXSqInv1_5) * ONE_DIV_F2;
- asinDer3DivF3PowTab[i] = ((1 + 2 * x * x) * oneMinusXSqInv2_5) * ONE_DIV_F3;
- asinDer4DivF4PowTab[i] = ((5 + 2 * x * (2 + x * (5 - 2 * x))) * oneMinusXSqInv3_5) * ONE_DIV_F4;
- }
- }
-
- // atan
-
- for (int i = 0; i < ATAN_TABS_SIZE; i++)
- {
- // x: in [0,ATAN_MAX_VALUE_FOR_TABS].
- double x = i * ATAN_DELTA;
- double onePlusXSqInv = 1.0 / (1 + x * x);
- double onePlusXSqInv2 = onePlusXSqInv * onePlusXSqInv;
- double onePlusXSqInv3 = onePlusXSqInv2 * onePlusXSqInv;
- double onePlusXSqInv4 = onePlusXSqInv2 * onePlusXSqInv2;
- atanTab[i] = StrictMath.atan(x);
- atanDer1DivF1Tab[i] = onePlusXSqInv;
- atanDer2DivF2Tab[i] = (-2 * x * onePlusXSqInv2) * ONE_DIV_F2;
- atanDer3DivF3Tab[i] = ((-2 + 6 * x * x) * onePlusXSqInv3) * ONE_DIV_F3;
- atanDer4DivF4Tab[i] = ((24 * x * (1 - x * x)) * onePlusXSqInv4) * ONE_DIV_F4;
- }
-
- // exp
-
- for (int i = 0; i < EXP_LO_TAB_SIZE; i++)
- {
- // x: in [-EXPM1_DISTANCE_TO_ZERO,EXPM1_DISTANCE_TO_ZERO].
- double x = -EXP_LO_DISTANCE_TO_ZERO + i / (double) EXP_LO_INDEXING;
- // exp(x)
- expLoPosTab[i] = StrictMath.exp(x);
- // 1-exp(-x), accurately computed
- expLoNegTab[i] = -StrictMath.expm1(-x);
- }
- for (int i = 0; i <= (int) EXP_OVERFLOW_LIMIT; i++)
- {
- expHiTab[i] = StrictMath.exp(i);
- }
- for (int i = 0; i <= -(int) EXP_UNDERFLOW_LIMIT; i++)
- {
- // We take care not to compute with subnormal values.
- if ((double) -i >= EXP_MIN_INT_LIMIT)
- {
- expHiInvTab[i] = StrictMath.exp(-i);
- }
- else
- {
- expHiInvTab[i] = StrictMath.exp(54 * LOG_2 - i);
- }
- }
-
- // log
-
- for (int i = 0; i < LOG_TAB_SIZE; i++)
- {
- // Exact to use inverse of tab size, since it is a power of two.
- double x = 1 + i * (1.0 / LOG_TAB_SIZE);
- logXLogTab[i] = StrictMath.log(x);
- logXTab[i] = x;
- logXInvTab[i] = 1 / x;
- }
-
- // twoPow
-
- for (int i = MIN_DOUBLE_EXPONENT; i <= MAX_DOUBLE_EXPONENT; i++)
- {
- twoPowTab[i - MIN_DOUBLE_EXPONENT] = StrictMath.pow(2.0, i);
- }
-
- // sqrt
-
- for (int i = MIN_DOUBLE_EXPONENT; i <= MAX_DOUBLE_EXPONENT; i++)
- {
- double twoPowExpDiv2 = StrictMath.pow(2.0, i * 0.5);
- sqrtXSqrtHiTab[i - MIN_DOUBLE_EXPONENT] = twoPowExpDiv2 * 0.5; // Half
- // sqrt,
- // to
- // avoid
- // overflows.
- sqrtSlopeHiTab[i - MIN_DOUBLE_EXPONENT] = 1 / twoPowExpDiv2;
- }
- sqrtXSqrtLoTab[0] = 1.0;
- sqrtSlopeLoTab[0] = 1.0;
- final long SQRT_LO_MASK = (0x3FF0000000000000L | (0x000FFFFFFFFFFFFFL >> SQRT_LO_BITS));
- for (int i = 1; i < SQRT_LO_TAB_SIZE; i++)
- {
- long xBits = SQRT_LO_MASK | (((long) (i - 1)) << (52 - SQRT_LO_BITS));
- double sqrtX = StrictMath.sqrt(Double.longBitsToDouble(xBits));
- sqrtXSqrtLoTab[i] = sqrtX;
- sqrtSlopeLoTab[i] = 1 / sqrtX;
- }
-
- // cbrt
-
- for (int i = MIN_DOUBLE_EXPONENT; i <= MAX_DOUBLE_EXPONENT; i++)
- {
- double twoPowExpDiv3 = StrictMath.pow(2.0, i / 3.0);
- cbrtXCbrtHiTab[i - MIN_DOUBLE_EXPONENT] = twoPowExpDiv3 * 0.5; // Half
- // cbrt,
- // to
- // avoid
- // overflows.
- double tmp = 1 / twoPowExpDiv3;
- cbrtSlopeHiTab[i - MIN_DOUBLE_EXPONENT] = (4.0 / 3) * tmp * tmp;
- }
- cbrtXCbrtLoTab[0] = 1.0;
- cbrtSlopeLoTab[0] = 1.0;
- final long CBRT_LO_MASK = (0x3FF0000000000000L | (0x000FFFFFFFFFFFFFL >> CBRT_LO_BITS));
- for (int i = 1; i < CBRT_LO_TAB_SIZE; i++)
- {
- long xBits = CBRT_LO_MASK | (((long) (i - 1)) << (52 - CBRT_LO_BITS));
- double cbrtX = StrictMath.cbrt(Double.longBitsToDouble(xBits));
- cbrtXCbrtLoTab[i] = cbrtX;
- cbrtSlopeLoTab[i] = 1 / (cbrtX * cbrtX);
- }
- }
-}
diff --git a/src/main/java/edu/umd/marbl/mhap/sketch/AbstractBitSketch.java b/src/main/java/edu/umd/marbl/mhap/sketch/AbstractBitSketch.java
index 32bc20f..c008aed 100644
--- a/src/main/java/edu/umd/marbl/mhap/sketch/AbstractBitSketch.java
+++ b/src/main/java/edu/umd/marbl/mhap/sketch/AbstractBitSketch.java
@@ -62,7 +62,7 @@ public abstract class AbstractBitSketch<T extends AbstractBitSketch<T>> implemen
int arrayIndex = (int)(index/64L);
int bitPos = (int)(index%64L);
- long mask = 0b1<<bitPos;
+ long mask = 0b1L<<bitPos;
return (bits[arrayIndex] & mask) != 0L;
}
diff --git a/src/main/java/edu/umd/marbl/mhap/sketch/BottomHash.java b/src/main/java/edu/umd/marbl/mhap/sketch/BottomHash.java
new file mode 100644
index 0000000..a25434a
--- /dev/null
+++ b/src/main/java/edu/umd/marbl/mhap/sketch/BottomHash.java
@@ -0,0 +1,72 @@
+package edu.umd.marbl.mhap.sketch;
+
+import it.unimi.dsi.fastutil.ints.IntArrays;
+
+public class BottomHash implements Sketch<BottomHash>
+{
+ private final int[] hashPositions;
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = 9035607728472270206L;
+
+ public BottomHash(String str, int nGramSize, int k)
+ {
+ int[] hashes = HashUtils.computeSequenceHashes(str, nGramSize);
+
+ k = Math.min(k, hashes.length);
+
+ int[] perm = new int[hashes.length];
+ for (int iter=0; iter<hashes.length; iter++)
+ perm[iter] = iter;
+
+ //sort the array
+ IntArrays.radixSortIndirect(perm, hashes, true);
+
+ hashPositions = new int[k];
+
+ for (int iter=0; iter<k; iter++)
+ {
+ int index = perm[iter];
+ hashPositions[iter] = hashes[index];
+ }
+
+ }
+
+ public double jaccard(BottomHash sh)
+ {
+ //make sure you look at same number
+ int k = Math.min(this.hashPositions.length, sh.hashPositions.length);
+
+ int i = 0;
+ int j = 0;
+ int intersectCount = 0;
+ int unionCount = 0;
+ while (unionCount<k)
+ {
+ if (this.hashPositions[i]<sh.hashPositions[j])
+ i++;
+ else
+ if (this.hashPositions[i]>sh.hashPositions[j])
+ j++;
+ else
+ {
+ intersectCount++;
+ i++;
+ j++;
+ }
+
+ unionCount++;
+ }
+
+ return ((double)intersectCount)/(double)k;
+ }
+
+ @Override
+ public double similarity(BottomHash sh)
+ {
+ return jaccard(sh);
+ }
+
+}
diff --git a/src/main/java/edu/umd/marbl/mhap/sketch/CountMin.java b/src/main/java/edu/umd/marbl/mhap/sketch/CountMin.java
index efd49fe..b9ca1ef 100644
--- a/src/main/java/edu/umd/marbl/mhap/sketch/CountMin.java
+++ b/src/main/java/edu/umd/marbl/mhap/sketch/CountMin.java
@@ -68,22 +68,12 @@ public final class CountMin<T extends Object> implements Counter<T>
this.countTable[iter1][iter2] = new LongAdder();
}
- /*
- * (non-Javadoc)
- *
- * @see com.invincea.labs.pace.hash.Counter#add(java.lang.Object)
- */
@Override
public void add(T obj)
{
add(obj, 1);
}
- /*
- * (non-Javadoc)
- *
- * @see com.invincea.labs.pace.hash.Counter#add(java.lang.Object, int)
- */
@Override
public void add(T obj, long increment)
{
@@ -102,11 +92,6 @@ public final class CountMin<T extends Object> implements Counter<T>
this.totalAdded.add(increment);
}
- /*
- * (non-Javadoc)
- *
- * @see com.invincea.labs.pace.hash.Counter#getCount(java.lang.Object)
- */
@Override
public long getCount(Object obj)
{
@@ -135,11 +120,6 @@ public final class CountMin<T extends Object> implements Counter<T>
return this.width;
}
- /*
- * (non-Javadoc)
- *
- * @see com.invincea.labs.pace.hash.Counter#maxCount()
- */
@Override
public long maxCount()
{
diff --git a/src/main/java/edu/umd/marbl/mhap/sketch/OrderedNGramHashes.java b/src/main/java/edu/umd/marbl/mhap/sketch/OrderedNGramHashes.java
index 03ed327..db8591e 100644
--- a/src/main/java/edu/umd/marbl/mhap/sketch/OrderedNGramHashes.java
+++ b/src/main/java/edu/umd/marbl/mhap/sketch/OrderedNGramHashes.java
@@ -36,7 +36,6 @@ import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.IOException;
-import java.io.Serializable;
import java.util.Arrays;
import edu.umd.marbl.mhap.impl.OverlapInfo;
@@ -44,70 +43,322 @@ import edu.umd.marbl.mhap.utils.Utils;
public final class OrderedNGramHashes
{
- private static final class SortableIntPair implements Comparable<SortableIntPair>, Serializable
+ private final static class EdgeData
{
- public final int x;
- public final int y;
- /**
- *
- */
- private static final long serialVersionUID = 2525278831423582446L;
-
- public SortableIntPair(int x, int y)
+ public final int a1;
+ public final int a2;
+ public final int b1;
+ public final int b2;
+ public final int count;
+
+ public EdgeData(int a1, int a2, int b1, int b2, int count)
{
- this.x = x;
- this.y = y;
+ this.a1 = a1;
+ this.a2 = a2;
+ this.b1 = b1;
+ this.b2 = b2;
+ this.count = count;
}
+ }
+
+ private final static class MatchData
+ {
+ private int absMaxShiftInOverlap;
+ private int count;
+ private final double maxShiftPercent;
+ private int medianShift;
+ private boolean needRecompute;
+ public int[] pos1Index;
+ public int[] pos2Index;
+ public int[] posShift;
+ private final int seqLength1;
+ private final int seqLength2;
+
+ public MatchData(OrderedNGramHashes o1, OrderedNGramHashes o2, double maxShiftPercent)
+ {
+ this.seqLength1 = o1.getSequenceLength();
+ this.seqLength2 = o2.getSequenceLength();
+
+ this.posShift = new int[Math.max(o1.size(), o2.size())/4+1];
+ this.pos1Index = new int[posShift.length];
+ this.pos2Index = new int[posShift.length];
+
+ this.maxShiftPercent = maxShiftPercent;
+ reset();
+ }
+
+ public EdgeData computeEdges()
+ {
+ // storage for edge computation
+ int leftEdge1 = Integer.MAX_VALUE;
+ int leftEdge2 = Integer.MAX_VALUE;
+ int rightEdge1 = Integer.MIN_VALUE;
+ int rightEdge2 = Integer.MIN_VALUE;
+
+ // count only the shifts in the correct place
+ int validCount = 0;
+ int medianShift = getMedianShift();
+ int absMaxShiftInOverlap = getAbsMaxShift();
+ int count = size();
+ for (int iter = 0; iter < count; iter++)
+ {
+ int pos1 = this.pos1Index[iter];
+ int pos2 = this.pos2Index[iter];
+
+ // take only valid values
+ if (Math.abs(this.posShift[iter] - medianShift) > absMaxShiftInOverlap)
+ continue;
+
+ // get the edges
+ if (pos1 < leftEdge1)
+ leftEdge1 = pos1;
+ if (pos2 < leftEdge2)
+ leftEdge2 = pos2;
+ if (pos1 > rightEdge1)
+ rightEdge1 = pos1;
+ if (pos2 > rightEdge2)
+ rightEdge2 = pos2;
+
+ validCount++;
+ }
- @Override
- public int compareTo(SortableIntPair p)
+ if (validCount < 3)
+ return null;
+
+ // get edge info uniformly minimum variance unbiased (UMVU) estimators
+ // a = (n*a-b)/(n-1)
+ // b = (n*b-a)/(n-1)
+ int a1 = Math.max(0, (int) Math.round((double)(validCount * leftEdge1 - rightEdge1) / (double) (validCount - 1)));
+ int a2 = Math.min(this.seqLength1, (int) Math.round((double)(validCount * rightEdge1 - leftEdge1) / (double) (validCount - 1)));
+ int b1 = Math.max(0, (int) Math.round((double)(validCount * leftEdge2 - rightEdge2) / (double) (validCount - 1)));
+ int b2 = Math.min(this.seqLength2, (int) Math.round((double)(validCount * rightEdge2 - leftEdge2) / (double) (validCount - 1)));
+
+ return new EdgeData(a1, a2, b1, b2, validCount);
+ }
+
+ public int getAbsMaxShift()
+ {
+ performUpdate();
+ return this.absMaxShiftInOverlap;
+ }
+
+ public int getMedianShift()
{
- return Integer.compare(this.x, p.x);
+ performUpdate();
+ return this.medianShift;
}
+
+ public boolean isEmpty()
+ {
+ return this.count<=0;
+ }
+
+ public void optimizeShifts()
+ {
+ if (isEmpty())
+ return;
+
+ int reducedCount = -1;
+
+ // copy over only the best values
+ int medianShift = getMedianShift();
+ for (int iter = 0; iter < this.count; iter++)
+ {
+ if (reducedCount >= 0 && pos1Index[reducedCount] == pos1Index[iter])
+ {
+ // if better, record it
+ if (Math.abs(posShift[reducedCount] - medianShift) > Math.abs(posShift[iter] - medianShift))
+ {
+ pos1Index[reducedCount] = pos1Index[iter];
+ pos2Index[reducedCount] = pos2Index[iter];
+ posShift[reducedCount] = posShift[iter];
+ }
+ }
+ else
+ {
+ // add the new data
+ reducedCount++;
+ pos1Index[reducedCount] = pos1Index[iter];
+ pos2Index[reducedCount] = pos2Index[iter];
+ posShift[reducedCount] = posShift[iter];
+ }
+ }
- /* (non-Javadoc)
- * @see java.lang.Object#toString()
- */
- @Override
- public String toString()
+ this.count = reducedCount + 1;
+ this.needRecompute = true;
+ }
+
+ private void performUpdate()
{
- return "["+x + ", " + y + "]";
+ if (this.needRecompute)
+ {
+ if (this.count>0)
+ {
+ this.medianShift = Utils.quickSelect(Arrays.copyOf(this.posShift, this.count), this.count / 2, this.count);
+
+ // get the actual overlap size
+ int leftPosition = Math.max(0, -this.medianShift);
+ int rightPosition = Math.min(this.seqLength1, this.seqLength2 - this.medianShift);
+ int overlapSize = Math.max(10, rightPosition - leftPosition);
+
+ // compute the max possible allowed shift in kmers
+ this.absMaxShiftInOverlap = Math.min(Math.max(this.seqLength1, this.seqLength2), (int) ((double) overlapSize * maxShiftPercent));
+ }
+ else
+ {
+ this.medianShift = 0;
+ this.absMaxShiftInOverlap = Math.max(this.seqLength1, this.seqLength2);
+ }
+ }
+
+ this.needRecompute = false;
+ }
+
+ public void recordMatch(int pos1, int pos2, int shift)
+ {
+ // adjust array size if needed
+ if (posShift.length <= this.count)
+ {
+ this.posShift = Arrays.copyOf(this.posShift, this.posShift.length * 2);
+ this.pos1Index = Arrays.copyOf(this.pos1Index, this.pos1Index.length * 2);
+ this.pos2Index = Arrays.copyOf(this.pos2Index, this.pos2Index.length * 2);
+ }
+
+ posShift[this.count] = shift;
+ pos1Index[this.count] = pos1;
+ pos2Index[this.count] = pos2;
+
+ this.count++;
+ this.needRecompute = true;
+ }
+
+ public void reset()
+ {
+ this.count = 0;
+ this.needRecompute = true;
}
-
- }
+ public int size()
+ {
+ return this.count;
+ }
+
+ public int valid1Lower()
+ {
+ performUpdate();
+ int valid = Math.max(0, -getMedianShift() - getAbsMaxShift());
+
+ return valid;
+ }
+
+ public int valid1Upper()
+ {
+ performUpdate();
+ int valid = Math.min(this.seqLength1, this.seqLength2 - getMedianShift() + getAbsMaxShift());
+
+ return valid;
+ }
+
+ public int valid2Lower()
+ {
+ performUpdate();
+ int valid = Math.max(0, getMedianShift() - getAbsMaxShift());
+
+ return valid;
+ }
+
+ public int valid2Upper()
+ {
+ performUpdate();
+ int valid = Math.min(this.seqLength2, this.seqLength1 + getMedianShift() + getAbsMaxShift());
+
+ return valid;
+ }
+ }
+
private final int[][] orderedHashes;
private final int seqLength;
+ private final int kmerSize;
- public final static int REDUCTION = 4;
-
- private final static int[][] allocateMemory(int size)
+ private static double computeKBottomSketchJaccard(int[][] seq1Hashes, int[][] seq2Hashes, int medianShift, int absMaxShiftInOverlap, int a1, int a2, int b1, int b2)
{
- // allocate the memory
- int[][] completeHash = new int[size][2];
-
- return completeHash;
+ //get k for first string
+ int s1 = 0;
+ int[][] array1 = new int[seq1Hashes.length][];
+ for (int i=0; i<seq1Hashes.length; i++)
+ {
+ int pos = seq1Hashes[i][1];
+ if (pos >= a1 && pos <= a2)
+ {
+ array1[s1] = seq1Hashes[i];
+ s1++;
+ }
+ }
+
+ //get k for second string
+ int s2 = 0;
+ int[][] array2 = new int[seq2Hashes.length][];
+ for (int j=0; j<seq2Hashes.length; j++)
+ {
+ int pos = seq2Hashes[j][1];
+ if (pos >= b1 && pos <= b2)
+ {
+ array2[s2] = seq2Hashes[j];
+ s2++;
+ }
+ }
+
+ //compute k
+ int k = Math.min(s1,s2);
+
+ //empty has jaccard of 1
+ if (k==0)
+ return 0;
+
+ //perform the k-bottom count
+ int i = 0;
+ int j = 0;
+ int intersectCount = 0;
+ int unionCount = 0;
+ while (unionCount<k)
+ {
+ if (array1[i][0]<array2[j][0])
+ i++;
+ else
+ if (array1[i][0]>array2[j][0])
+ j++;
+ else
+ {
+ intersectCount++;
+ i++;
+ j++;
+ }
+
+ unionCount++;
+ }
+
+ double score = ((double)intersectCount)/(double)k;
+
+ return score;
}
public final static OrderedNGramHashes fromByteStream(DataInputStream input) throws IOException
{
try
{
- // dos.writeInt(this.seqLength);
- // dos.writeInt(size());
int seqLength = input.readInt();
+ int kmerSize = input.readInt();
int hashLength = input.readInt();
- int[][] orderedHashes = allocateMemory(hashLength);
+ int[][] orderedHashes = new int[hashLength][2];
for (int iter = 0; iter < hashLength; iter++)
{
- // dos.writeInt(this.completeHash[iter][iter2]);
orderedHashes[iter][0] = input.readInt();
orderedHashes[iter][1] = input.readInt();
}
- return new OrderedNGramHashes(seqLength, orderedHashes);
+ return new OrderedNGramHashes(seqLength, kmerSize, orderedHashes);
}
catch (EOFException e)
@@ -115,21 +366,123 @@ public final class OrderedNGramHashes
return null;
}
}
+
+ public static double jaccardToIdentity(double score, int kmerSize)
+ {
+ double d = -1.0/(double)kmerSize*Math.log(2.0*score/(1.0+score));
+ return Math.exp(-d);
+ }
- private OrderedNGramHashes(int seqLength, int[][] orderedHashes)
+ private static void recordMatchingKmers(
+ MatchData matchData,
+ int[][] seq1KmerHashes,
+ int[][] seq2KmerHashes,
+ int repeat)
+ {
+ // init the loop storage
+ int hash1;
+ int hash2;
+ int pos1;
+ int pos2;
+
+ // init the borders
+ int medianShift = matchData.getMedianShift();
+ int absMaxShift = matchData.getAbsMaxShift();
+ int valid1Lower = matchData.valid1Lower();
+ int valid2Lower = matchData.valid2Lower();
+ int valid1Upper = matchData.valid1Upper();
+ int valid2Upper = matchData.valid2Upper();
+
+ // init counters
+ int i1 = 0;
+ int i2 = 0;
+
+ //reset the data, redo the shifts
+ matchData.reset();
+
+ // perform merge operation to get the shift and the kmer count
+ while (true)
+ {
+ if (i1>=seq1KmerHashes.length)
+ break;
+ if (i2>=seq2KmerHashes.length)
+ break;
+
+ // get the values in the array
+ hash1 = seq1KmerHashes[i1][0];
+ pos1 = seq1KmerHashes[i1][1];
+ hash2 = seq2KmerHashes[i2][0];
+ pos2 = seq2KmerHashes[i2][1];
+
+ if (hash1 < hash2 || pos1 < valid1Lower || pos1 >= valid1Upper)
+ i1++;
+ else if (hash2 < hash1 || pos2 < valid2Lower || pos2 >= valid2Upper)
+ i2++;
+ else
+ {
+ // check if current shift makes sense positionally
+ int currShift = pos2 - pos1;
+ int diffFromExpected = currShift - medianShift;
+ if (diffFromExpected > absMaxShift)
+ i1++;
+ else
+ if (diffFromExpected < -absMaxShift)
+ i2++;
+ else
+ {
+ //record match
+ matchData.recordMatch(pos1, pos2, currShift);
+
+ // don't rely on repeats in the first iteration
+ if (repeat == 0)
+ i1++;
+ i2++;
+ }
+ }
+ }
+ }
+
+ private OrderedNGramHashes(int seqLength, int kmerSize, int[][] orderedHashes)
{
this.seqLength = seqLength;
this.orderedHashes = orderedHashes;
+ this.kmerSize = kmerSize;
}
- public OrderedNGramHashes(String seq, int kmerSize)
+ public OrderedNGramHashes(String seq, int kmerSize, int sketchSize)
{
+ this.kmerSize = kmerSize;
this.seqLength = seq.length() - kmerSize + 1;
if (this.seqLength<=0)
throw new SketchRuntimeException("Sequence length must be greater or equal to kmerSize.");
- this.orderedHashes = getFullHashes(seq, kmerSize);
+ // compute just direct hash of sequence
+ int[] hashes = HashUtils.computeSequenceHashes(seq, kmerSize);
+
+ int[] perm = new int[hashes.length];
+
+ //init the array
+ for (int iter = 0; iter < hashes.length; iter++)
+ perm[iter] = iter;
+
+ //sort the array
+ IntArrays.radixSortIndirect(perm, hashes, true);
+
+ //sketchSize = (int)Math.round(0.25*(double)this.seqLength);
+
+ //find the largest storage value
+ int k = Math.min(sketchSize, hashes.length);
+
+ //allocate the memory
+ this.orderedHashes = new int[k][2];
+
+ for (int iter = 0; iter < this.orderedHashes.length; iter++)
+ {
+ int index = perm[iter];
+ this.orderedHashes[iter][0] = hashes[index];
+ this.orderedHashes[iter][1] = index;
+ }
}
public byte[] getAsByteArray()
@@ -140,7 +493,9 @@ public final class OrderedNGramHashes
try
{
dos.writeInt(this.seqLength);
+ dos.writeInt(this.kmerSize);
dos.writeInt(size());
+
for (int iter = 0; iter < this.orderedHashes.length; iter++)
{
dos.writeInt(this.orderedHashes[iter][0]);
@@ -152,305 +507,57 @@ public final class OrderedNGramHashes
}
catch (IOException e)
{
- throw new SketchRuntimeException("Unexpected IO error.");
+ throw new SketchRuntimeException("Unexpected IO error.", e);
}
}
-
+
public int getHash(int index)
{
return this.orderedHashes[index][0];
}
-
- private int[][] storeAsArray(SortableIntPair[] completeHashAsPair)
+
+ public OverlapInfo getOverlapInfo(OrderedNGramHashes toSequence, double maxShiftPercent)
{
- // allocate the memory
- int[][] completeHash = allocateMemory(completeHashAsPair.length);
+ if (this.kmerSize!=toSequence.kmerSize)
+ throw new SketchRuntimeException("Sketch k-mer size does not match between the two sequences.");
+
+ //allocate the memory for the search
+ MatchData matchData = new MatchData(this, toSequence, maxShiftPercent);
- for (int iter = 0; iter < completeHashAsPair.length; iter++)
- {
- completeHash[iter][0] = completeHashAsPair[iter].x;
- completeHash[iter][1] = completeHashAsPair[iter].y;
- }
+ //get the initial matches
+ recordMatchingKmers(matchData, this.orderedHashes, toSequence.orderedHashes, 0);
+ if (matchData.isEmpty())
+ return OverlapInfo.EMPTY;
- return completeHash;
- }
+ //get matches again, but now in a better region
+ recordMatchingKmers(matchData, this.orderedHashes, toSequence.orderedHashes, 1);
- private int[][] getFullHashes(String seq, int subKmerSize)
- {
- int cutoff = (int) ((long) Integer.MIN_VALUE + ((long) Integer.MAX_VALUE - (long) Integer.MIN_VALUE)
- / (long) REDUCTION);
-
- // compute just direct hash of sequence
- int[] hashes = HashUtils.computeSequenceHashes(seq, subKmerSize);
+ if (matchData.isEmpty())
+ return OverlapInfo.EMPTY;
- int count = 0;
- for (int val : hashes)
- if (val <= cutoff)
- count++;
+ matchData.optimizeShifts();
+
+ if (matchData.isEmpty())
+ return OverlapInfo.EMPTY;
- int[] cutHashes = new int[count];
- int[] perm = new int[count];
- int[] pos = new int[count];
+ //get the edge data
+ EdgeData edgeData = matchData.computeEdges();
- count = 0;
- for (int iter = 0; iter < hashes.length; iter++)
- if (hashes[iter] <= cutoff)
- {
- cutHashes[count] = hashes[iter];
- perm[count] = count;
- pos[count] = iter;
-
- count++;
- }
+ if (edgeData==null)
+ return OverlapInfo.EMPTY;
- //sort the array
- IntArrays.radixSortIndirect(perm, cutHashes, true);
-
- SortableIntPair[] completeHashAsPair = new SortableIntPair[count];
- for (int iter=0; iter<count; iter++)
- {
- int index = perm[iter];
- completeHashAsPair[iter] = new SortableIntPair(cutHashes[index], pos[index]);
- }
+ //compute the jaccard score using bottom-k sketching
+ double score = computeKBottomSketchJaccard(this.orderedHashes, toSequence.orderedHashes, matchData.getMedianShift(), matchData.getAbsMaxShift(), edgeData.a1, edgeData.a2, edgeData.b1, edgeData.b2);
+ score = jaccardToIdentity(score, this.kmerSize);
- //System.err.println(Arrays.toString(completeHashAsPair));
- // sort the results, sort in place so no need to look at second
- //Arrays.sort(completeHashAsPair);
-
- return storeAsArray(completeHashAsPair);
+ double rawScore = (double)edgeData.count;
+
+ return new OverlapInfo(score, rawScore, edgeData.a1, edgeData.a2, edgeData.b1, edgeData.b2);
}
-
- public OverlapInfo getOverlapInfo(OrderedNGramHashes s, double maxShiftPercent)
+
+ public int getSequenceLength()
{
- int[][] allKmerHashes = this.orderedHashes;
-
- // get the kmers of the second sequence
- int[][] sAllKmerHashes = s.orderedHashes;
-
- // get sizes
- int size1 = this.size();
- int size2 = s.size();
-
- int kmerSize1 = this.seqLength;
- int kmerSize2 = s.seqLength;
-
- // init the ok regions
- int valid1Lower = 0;
- int valid1Upper = kmerSize1;
- int valid2Lower = 0;
- int valid2Upper = kmerSize2;
-
- int medianShift = 0;
- int overlapSize = Math.min(kmerSize1, kmerSize2);
- int absMaxShiftInOverlap = Math.max(kmerSize1, kmerSize2);
-
- int count = 0;
- int[] posShift = new int[Math.min(size1, size2) / 8 + 1];
- int[] pos1Index = new int[posShift.length];
- int[] pos2Index = new int[posShift.length];
-
- // check the repeat flag
- int numScoringRepeats = 2;
- if (maxShiftPercent <= 0)
- {
- numScoringRepeats = 1;
- maxShiftPercent = Math.abs(maxShiftPercent);
- }
-
- // refine multiple times to get better interval estimate
- for (int repeat = 0; repeat < numScoringRepeats; repeat++)
- {
- // init counters
- count = 0;
- int i1 = 0;
- int i2 = 0;
-
- // init the loop storage
- int hash1 = 0;
- int hash2 = 0;
- int pos1;
- int pos2;
-
- // perform merge operation to get the shift and the kmer count
- while (true)
- {
- if (i1>=allKmerHashes.length)
- break;
- if (i2>=sAllKmerHashes.length)
- break;
-
- // get the values in the array
- hash1 = allKmerHashes[i1][0];
- pos1 = allKmerHashes[i1][1];
-
- hash2 = sAllKmerHashes[i2][0];
- pos2 = sAllKmerHashes[i2][1];
-
- if (hash1 < hash2 || pos1 < valid1Lower || pos1 >= valid1Upper)
- i1++;
- else if (hash2 < hash1 || pos2 < valid2Lower || pos2 >= valid2Upper)
- i2++;
- else
- {
- // check if current shift makes sense positionally
- int currShift = pos2 - pos1;
- if (Math.abs(currShift - medianShift) > absMaxShiftInOverlap)
- {
- // do not record this shift and increase counter
- i2++;
- continue;
- }
-
- // adjust array size if needed
- if (posShift.length <= count)
- {
- posShift = Arrays.copyOf(posShift, posShift.length * 2);
- pos1Index = Arrays.copyOf(pos1Index, pos1Index.length * 2);
- pos2Index = Arrays.copyOf(pos2Index, pos2Index.length * 2);
- }
-
- // compute the shift
- posShift[count] = currShift;
- pos1Index[count] = pos1;
- pos2Index[count] = pos2;
-
- // if first round, store only first hit
- if (repeat == 0)
- i1++;
- i2++;
-
- count++;
- }
- }
-
- if (count <= 0)
- return new OverlapInfo(0.0, 0, 0, 0, 0, 0);
-
- // pick out only the matches that are best
- if (repeat > 0)
- {
- int reducedCount = -1;
-
- // copy over only the best values
- for (int iter = 0; iter < count; iter++)
- {
- if (reducedCount >= 0 && pos1Index[reducedCount] == pos1Index[iter])
- {
- // if better, record it
- if (Math.abs(posShift[reducedCount] - medianShift) > Math.abs(posShift[iter] - medianShift))
- {
- pos1Index[reducedCount] = pos1Index[iter];
- pos2Index[reducedCount] = pos2Index[iter];
- posShift[reducedCount] = posShift[iter];
- }
- }
- else
- {
- // add the new data
- reducedCount++;
- pos1Index[reducedCount] = pos1Index[iter];
- pos2Index[reducedCount] = pos2Index[iter];
- posShift[reducedCount] = posShift[iter];
- }
- }
-
- count = reducedCount + 1;
- }
-
- if (count <= 0)
- medianShift = 0;
- else
- medianShift = Utils.quickSelect(Arrays.copyOf(posShift, count), count / 2, count);
-
- // get the actual overlap size
- int leftPosition = Math.max(0, -medianShift);
- int rightPosition = Math.min(kmerSize1, kmerSize2 - medianShift);
- overlapSize = Math.max(this.seqLength - kmerSize1, rightPosition - leftPosition);
-
- // compute the max possible allowed shift in kmers
- absMaxShiftInOverlap = Math.min(Math.max(kmerSize1, kmerSize2),
- (int) ((double) overlapSize * maxShiftPercent));
-
- // get the updated borders
- valid1Lower = Math.max(0, -medianShift - absMaxShiftInOverlap);
- valid1Upper = Math.min(kmerSize1, kmerSize2 - medianShift + absMaxShiftInOverlap);
- valid2Lower = Math.max(0, medianShift - absMaxShiftInOverlap);
- valid2Upper = Math.min(kmerSize2, kmerSize1 + medianShift + absMaxShiftInOverlap);
-
- /*
- * System.err.println(overlapSize);
- * System.err.println("Size1= "+size1+" Lower:"+
- * valid1Lower+" Upper:"+valid1Upper+" Shift="+shift);
- * System.err.println("Size2= "+size2+" Lower:"+
- * valid2Lower+" Upper:"+valid2Upper);
- */
- }
-
- // storage for edge computation
- int leftEdge1 = Integer.MAX_VALUE;
- int leftEdge2 = Integer.MAX_VALUE;
- int rightEdge1 = Integer.MIN_VALUE;
- int rightEdge2 = Integer.MIN_VALUE;
-
- // count only the shifts in the correct place
- int validCount = 0;
- for (int iter = 0; iter < count; iter++)
- {
- int pos1 = pos1Index[iter];
- int pos2 = pos2Index[iter];
-
- // take only valid values
- if (Math.abs(posShift[iter] - medianShift) > absMaxShiftInOverlap)
- continue;
-
- // get the edges
- if (pos1 < leftEdge1)
- leftEdge1 = pos1;
- if (pos2 < leftEdge2)
- leftEdge2 = pos2;
- if (pos1 > rightEdge1)
- rightEdge1 = pos1;
- if (pos2 > rightEdge2)
- rightEdge2 = pos2;
-
- validCount++;
- }
-
- if (validCount <= 1)
- return new OverlapInfo(0.0, 0, 0, 0, 0, 0);
-
- // compute the score
- double score = (double) validCount / (double) (overlapSize);
-
- // get edge info uniformly minimum variance unbiased (UMVU) estimators
- // a = (n*a-b)/(n-1)
- // b = (n*b-a)/(n-1)
- int a1 = Math.max(0, (int) Math.round((validCount * leftEdge1 - rightEdge1) / (double) (validCount - 1)));
- int b1 = Math.max(0, (int) Math.round((validCount * leftEdge2 - rightEdge2) / (double) (validCount - 1)));
- int a2 = Math.min(this.seqLength,
- (int) Math.round((validCount * rightEdge1 - leftEdge1) / (double) (validCount - 1)));
- int b2 = Math.min(s.seqLength,
- (int) Math.round((validCount * rightEdge2 - leftEdge2) / (double) (validCount - 1)));
-
- // int ahang = a1-a2;
- // int bhang = (this.size()-b1>s.size()-b2) ? b1-this.size() : s.size()
- // - b2;
-
- // if (score>0.06)
- // {
- // int[] test = Arrays.copyOf(posShift, count);
- // int[] test2 = Arrays.copyOf(pos1Index, count);
-
- // System.err.println("Start = "+Math.max(0,
- // -medianShift)+", Overlap="+overlapSize+" Maxshift="+absMaxShiftInOverlap+": ["+Arrays.toString(test)+"; "+Arrays.toString(test2)+"];");
- // System.err.println("Overlap="+overlapSize+", Shift/overlap="+(double)(test[test.length-10]-test[10])/(double)overlapSize);
- // }
-
- // the hangs are adjusted by the rate of slide*distance traveled
- // relative to median, -medianShift-(a1-a2)
- // return new OverlapInfo(score, ahang, bhang);
-
- return new OverlapInfo(score * (double) REDUCTION, validCount, a1, a2, b1, b2);
+ return this.seqLength;
}
public int size()
diff --git a/src/main/java/edu/umd/marbl/mhap/sketch/OrderedNGramHashes.java b/src/main/java/edu/umd/marbl/mhap/sketch/OrderedNGramHashesOld.java
similarity index 96%
copy from src/main/java/edu/umd/marbl/mhap/sketch/OrderedNGramHashes.java
copy to src/main/java/edu/umd/marbl/mhap/sketch/OrderedNGramHashesOld.java
index 03ed327..cdb6470 100644
--- a/src/main/java/edu/umd/marbl/mhap/sketch/OrderedNGramHashes.java
+++ b/src/main/java/edu/umd/marbl/mhap/sketch/OrderedNGramHashesOld.java
@@ -42,7 +42,7 @@ import java.util.Arrays;
import edu.umd.marbl.mhap.impl.OverlapInfo;
import edu.umd.marbl.mhap.utils.Utils;
-public final class OrderedNGramHashes
+public final class OrderedNGramHashesOld
{
private static final class SortableIntPair implements Comparable<SortableIntPair>, Serializable
{
@@ -89,7 +89,7 @@ public final class OrderedNGramHashes
return completeHash;
}
- public final static OrderedNGramHashes fromByteStream(DataInputStream input) throws IOException
+ public final static OrderedNGramHashesOld fromByteStream(DataInputStream input) throws IOException
{
try
{
@@ -107,7 +107,7 @@ public final class OrderedNGramHashes
orderedHashes[iter][1] = input.readInt();
}
- return new OrderedNGramHashes(seqLength, orderedHashes);
+ return new OrderedNGramHashesOld(seqLength, orderedHashes);
}
catch (EOFException e)
@@ -116,13 +116,13 @@ public final class OrderedNGramHashes
}
}
- private OrderedNGramHashes(int seqLength, int[][] orderedHashes)
+ private OrderedNGramHashesOld(int seqLength, int[][] orderedHashes)
{
this.seqLength = seqLength;
this.orderedHashes = orderedHashes;
}
- public OrderedNGramHashes(String seq, int kmerSize)
+ public OrderedNGramHashesOld(String seq, int kmerSize)
{
this.seqLength = seq.length() - kmerSize + 1;
@@ -220,7 +220,7 @@ public final class OrderedNGramHashes
return storeAsArray(completeHashAsPair);
}
- public OverlapInfo getOverlapInfo(OrderedNGramHashes s, double maxShiftPercent)
+ public OverlapInfo getOverlapInfo(OrderedNGramHashesOld s, double maxShiftPercent)
{
int[][] allKmerHashes = this.orderedHashes;
diff --git a/src/main/java/edu/umd/marbl/mhap/utils/CharacterHash.java b/src/main/java/edu/umd/marbl/mhap/utils/CharacterHash.java
deleted file mode 100644
index 70dd8f4..0000000
--- a/src/main/java/edu/umd/marbl/mhap/utils/CharacterHash.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * MHAP package
- *
- * This software is distributed "as is", without any warranty, including
- * any implied warranty of merchantability or fitness for a particular
- * use. The authors assume no responsibility for, and shall not be liable
- * for, any special, indirect, or consequential damages, or any damages
- * whatsoever, arising out of or in connection with the use of this
- * software.
- *
- * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
- *
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- */
-package edu.umd.marbl.mhap.utils;
-
-import java.util.Random;
-
-public class CharacterHash
-{
- public int hashvalues[] = new int[1 << 16];
-
- static CharacterHash charhash = new CharacterHash();
-
- public static CharacterHash getInstance()
- {
- return charhash;
- }
-
- public CharacterHash()
- {
- Random r = new Random(1);
- for (int k = 0; k < this.hashvalues.length; ++k)
- this.hashvalues[k] = r.nextInt();
- }
-
-}
diff --git a/src/main/java/edu/umd/marbl/mhap/utils/CyclicHash.java b/src/main/java/edu/umd/marbl/mhap/utils/CyclicHash.java
deleted file mode 100644
index ceb7ff8..0000000
--- a/src/main/java/edu/umd/marbl/mhap/utils/CyclicHash.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/**
- * Daniel Lemire, Owen Kaser: Recursive n-gram hashing is pairwise independent, at best, Computer Speech & Language, Volume 24, Issue 4, October 2010, Pages 698-710 http://arxiv.org/abs/0705.4676
- */
-package edu.umd.marbl.mhap.utils;
-
-public final class CyclicHash
-{
- public int hashvalue;
-
- int myr;
-
- int n;
-
- private final static CharacterHash hasher = CharacterHash.getInstance();
-
- public final static int wordsize = 32;
-
- private final static int fastleftshift1(int x)
- {
- return (x << 1) | (x >>> (wordsize - 1));
- }
-
- // this is purely for testing purposes
- public final static int nonRollingHash(CharSequence s)
- {
- int value = 0;
- for (int i = 0; i < s.length(); ++i)
- {
- char c = s.charAt(i);
- int z = hasher.hashvalues[c];
- value = fastleftshift1(value) ^ z;
- }
- return value;
- }
-
- // myn is the length in characters of the blocks you want to hash
- public CyclicHash(int myn)
- {
- this.n = myn;
- if (this.n > wordsize)
- {
- throw new IllegalArgumentException();
- }
-
- }
-
- // add new character (useful to initiate the hasher)
- // to get a strongly universal hash value, you have to ignore the last or
- // first (n-1) bits.
- public final int eat(char c)
- {
- this.hashvalue = fastleftshift1(this.hashvalue);
- this.hashvalue ^= hasher.hashvalues[c];
- return this.hashvalue;
- }
-
- private final int fastleftshiftn(int x)
- {
- return (x << this.n) | (x >>> (wordsize - this.n));
- }
-
- // remove old character and add new one
- // to get a strongly universal hash value, you have to ignore the last or
- // first (n-1) bits.
- public final int update(char outchar, char inchar)
- {
- int z = fastleftshiftn(hasher.hashvalues[outchar]);
- this.hashvalue = fastleftshift1(this.hashvalue) ^ z ^ hasher.hashvalues[inchar];
- return this.hashvalue;
- }
-
-}
\ No newline at end of file
diff --git a/src/main/java/edu/umd/marbl/mhap/utils/PackageInfo.java b/src/main/java/edu/umd/marbl/mhap/utils/PackageInfo.java
deleted file mode 100644
index 8e74d54..0000000
--- a/src/main/java/edu/umd/marbl/mhap/utils/PackageInfo.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * MHAP package
- *
- * This software is distributed "as is", without any warranty, including
- * any implied warranty of merchantability or fitness for a particular
- * use. The authors assume no responsibility for, and shall not be liable
- * for, any special, indirect, or consequential damages, or any damages
- * whatsoever, arising out of or in connection with the use of this
- * software.
- *
- * Copyright (c) 2015 by Konstantin Berlin and Sergey Koren
- * University Of Maryland
- *
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- */
-package edu.umd.marbl.mhap.utils;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Properties;
-
-/**
- * The Class PackageInfo.
- */
-public final class PackageInfo
-{
-
- private static Properties properties = getProjectProperties();
-
- private static Properties getProjectProperties()
- {
- Properties initialProperties = new Properties();
-
- initialProperties = getFileProperties("/properties/mhap.properties", new Properties());
- Properties properties = getFileProperties("/mhap.properties", initialProperties);
-
- return properties;
- }
-
- private static Properties getFileProperties(String file, Properties originalProperties)
- {
- Properties property = new Properties(originalProperties);
- try
- {
- InputStream in = PackageInfo.class.getClass().getResourceAsStream(file);
-
- if (in != null)
- {
- property.load(in);
- in.close();
- }
- }
- catch (IOException e)
- {
- return property;
- }
-
- return property;
- }
-
- public static final String VERSION = properties.getProperty("component.version", "unknown");
- public static final String BUILD_TIME = properties.getProperty("buildtime", "unknown");
-
- public static Properties getProperties()
- {
- return properties;
- }
-
- public static String getVersionTag()
- {
- return VERSION + " " + BUILD_TIME;
- }
-}
diff --git a/src/main/java/edu/umd/marbl/mhap/utils/ParseOptions.java b/src/main/java/edu/umd/marbl/mhap/utils/ParseOptions.java
index 2af034c..f8c606b 100644
--- a/src/main/java/edu/umd/marbl/mhap/utils/ParseOptions.java
+++ b/src/main/java/edu/umd/marbl/mhap/utils/ParseOptions.java
@@ -219,7 +219,7 @@ public class ParseOptions
else
if (needsVersion())
{
- System.out.println("MHAP Version = "+PackageInfo.VERSION+", Build time = "+PackageInfo.BUILD_TIME);
+ System.out.println("Version = "+ParseOptions.class.getPackage().getImplementationVersion());
return false;
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/mhap.git
More information about the debian-med-commit
mailing list