[med-svn] [libhac-java] 06/11: New upstream version 0.20110510
Andreas Tille
tille at debian.org
Thu Nov 30 15:26:11 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository libhac-java.
commit 336514165bd9ef4c15cf984a6ebfdf3ac54df6fb
Author: Andreas Tille <tille at debian.org>
Date: Thu Nov 30 16:19:31 2017 +0100
New upstream version 0.20110510
---
.classpath | 6 +
.project | 17 ++
.settings/org.eclipse.jdt.core.prefs | 12 ++
COPYRIGHT | 4 +
LICENSE | 34 ++++
README | 2 +
debian/changelog | 11 --
debian/compat | 1 -
debian/control | 44 -----
debian/copyright | 41 -----
debian/libhac-java-doc.javadoc | 1 -
debian/libhac-java.docs | 1 -
debian/libhac-java.jlibs | 1 -
debian/rules | 14 --
debian/source/format | 1 -
debian/watch | 2 -
src/ch/usi/inf/sape/hac/ClusteringBuilder.java | 30 ++++
.../inf/sape/hac/ClusteringBuilderMultiplexer.java | 35 ++++
.../usi/inf/sape/hac/ClusteringMatrixBuilder.java | 64 ++++++++
.../hac/HierarchicalAgglomerativeClusterer.java | 177 +++++++++++++++++++++
.../hac/agglomeration/AgglomerationMethod.java | 60 +++++++
.../inf/sape/hac/agglomeration/AverageLinkage.java | 50 ++++++
.../sape/hac/agglomeration/CentroidLinkage.java | 55 +++++++
.../sape/hac/agglomeration/CompleteLinkage.java | 57 +++++++
.../inf/sape/hac/agglomeration/MedianLinkage.java | 56 +++++++
.../inf/sape/hac/agglomeration/SingleLinkage.java | 56 +++++++
.../inf/sape/hac/agglomeration/WardLinkage.java | 53 ++++++
.../hac/agglomeration/WeightedAverageLinkage.java | 46 ++++++
src/ch/usi/inf/sape/hac/dendrogram/Dendrogram.java | 48 ++++++
.../inf/sape/hac/dendrogram/DendrogramBuilder.java | 49 ++++++
.../inf/sape/hac/dendrogram/DendrogramNode.java | 28 ++++
src/ch/usi/inf/sape/hac/dendrogram/MergeNode.java | 51 ++++++
.../inf/sape/hac/dendrogram/ObservationNode.java | 45 ++++++
.../sape/hac/experiment/DissimilarityMeasure.java | 23 +++
src/ch/usi/inf/sape/hac/experiment/Experiment.java | 23 +++
35 files changed, 1081 insertions(+), 117 deletions(-)
diff --git a/.classpath b/.classpath
new file mode 100644
index 0000000..8727917
--- /dev/null
+++ b/.classpath
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry kind="src" path="src"/>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5"/>
+ <classpathentry kind="output" path="bin"/>
+</classpath>
diff --git a/.project b/.project
new file mode 100644
index 0000000..fc51c09
--- /dev/null
+++ b/.project
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>hac</name>
+ <comment></comment>
+ <projects>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.jdt.core.javabuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.jdt.core.javanature</nature>
+ </natures>
+</projectDescription>
diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000..89d3760
--- /dev/null
+++ b/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,12 @@
+#Tue May 10 12:00:59 CEST 2011
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
+org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
+org.eclipse.jdt.core.compiler.compliance=1.5
+org.eclipse.jdt.core.compiler.debug.lineNumber=generate
+org.eclipse.jdt.core.compiler.debug.localVariable=generate
+org.eclipse.jdt.core.compiler.debug.sourceFile=generate
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.source=1.5
diff --git a/COPYRIGHT b/COPYRIGHT
new file mode 100644
index 0000000..6cf7f14
--- /dev/null
+++ b/COPYRIGHT
@@ -0,0 +1,4 @@
+This work was developed by members of the Sape research group:
+http://sape.inf.usi.ch/
+
+See the LICENSE file for information about the license.
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..098c1cb
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,34 @@
+This software is licensed to You under the "Simplified BSD License".
+You may not use this software except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.opensource.org/licenses/bsd-license.php
+
+The legal text of the Simplified BSD License is appended below for reference:
+
+
+Copyright (c) 2011, Sape Research Group
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+* Neither the name of the Sape Research Group, nor
+ the names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README b/README
new file mode 100644
index 0000000..1ffd86a
--- /dev/null
+++ b/README
@@ -0,0 +1,2 @@
+Hac is a hierarchical agglomerative clustering library implemented in Java.
+For more information, visit http://sape.inf.usi.ch/hac
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index 8dbc9a7..0000000
--- a/debian/changelog
+++ /dev/null
@@ -1,11 +0,0 @@
-libhac-java (0.20110510-1) unstable; urgency=medium
-
- * Initial upload to Debian (Closes: #744233)
-
- -- Andreas Tille <tille at debian.org> Thu, 10 Apr 2014 20:59:14 +0200
-
-libhac-java (0.20110510-0ubuntu1) precise; urgency=low
-
- * Initial release to support Acacia
-
- -- Tim Booth <tbooth at ceh.ac.uk> Fri, 04 Apr 2014 09:54:37 +0100
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index ec63514..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-9
diff --git a/debian/control b/debian/control
deleted file mode 100644
index 24aabbb..0000000
--- a/debian/control
+++ /dev/null
@@ -1,44 +0,0 @@
-Source: libhac-java
-Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Tim Booth <tbooth at ceh.ac.uk>,
- Andreas Tille <tille at debian.org>
-Section: java
-Priority: optional
-Build-Depends: debhelper (>= 9),
- default-jdk,
- javahelper
-Build-Depends-Indep: default-jdk-doc
-Standards-Version: 3.9.5
-Vcs-Browser: http://anonscm.debian.org/viewvc/debian-med/trunk/packages/libhac-java
-Vcs-Svn: svn://anonscm.debian.org/debian-med/trunk/packages/libhac-java
-Homepage: http://sape.inf.usi.ch/hac
-
-Package: libhac-java
-Architecture: all
-Depends: ${misc:Depends},
- ${java:Depends}
-Suggests: libhac-java-doc
-Description: hierarchical agglomerative clustering
- Hac is a simple library for hierarchical agglomerative clustering. The goal of
- Hac is to be easy to use in any context that might require a hierarchical
- agglomerative clustering approach. You can use Hac by bundling Hac with your
- application, and by implementing two interfaces:
- * Experiment (to tell Hac what to cluster), and
- * DissimilarityMeasure (to tell Hac how to compute the dissimilarity between
- two observations).
-
-Package: libhac-java-doc
-Architecture: all
-Section: doc
-Depends: ${misc:Depends}
-Suggests: libhac-java
-Description: API documentation for hierarchical agglomerative clustering
- Hac is a simple library for hierarchical agglomerative clustering. The goal of
- Hac is to be easy to use in any context that might require a hierarchical
- agglomerative clustering approach. You can use Hac by bundling Hac with your
- application, and by implementing two interfaces:
- * Experiment (to tell Hac what to cluster), and
- * DissimilarityMeasure (to tell Hac how to compute the dissimilarity between
- two observations).
- .
- This package contains the API documentation of libhac-java.
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index ffabb20..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,41 +0,0 @@
-Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Upstream-Name: HAC
-Upstream-Contact: Matthias.Hauswirth at usi.ch
-Source: https://github.com/sape/hac
-
-Files: *
-Copyright: 2011, the Sape Research Group
- Matthias.Hauswirth at usi.ch
-License: SimplifiedBSD
-
-Files: debian/*
-Copyright: 2013-2014 Tim Booth <tbooth at ceh.ac.uk>
- 2014 Andreas Tille <tille at debian.org>
-License: SimplifiedBSD
-
-License: SimplifiedBSD
- This software is licensed to You under the "Simplified BSD License".
- You may not use this software except in compliance with the License.
- .
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- .
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
- * Neither the name of the Sape Research Group, nor
- the names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
- .
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/debian/libhac-java-doc.javadoc b/debian/libhac-java-doc.javadoc
deleted file mode 100644
index ce83b25..0000000
--- a/debian/libhac-java-doc.javadoc
+++ /dev/null
@@ -1 +0,0 @@
-internal
diff --git a/debian/libhac-java.docs b/debian/libhac-java.docs
deleted file mode 100644
index 30d29de..0000000
--- a/debian/libhac-java.docs
+++ /dev/null
@@ -1 +0,0 @@
-doc/*
diff --git a/debian/libhac-java.jlibs b/debian/libhac-java.jlibs
deleted file mode 100644
index d392f0e..0000000
--- a/debian/libhac-java.jlibs
+++ /dev/null
@@ -1 +0,0 @@
-*.jar
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index 93bb0e5..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/usr/bin/make -f
-
-export JAVA_HOME=/usr/lib/jvm/default-java
-
-%:
- dh $@ --with javahelper
-
-override_dh_auto_build:
- jh_build -J hac.jar src
-
-override_dh_auto_clean:
- dh_auto_clean
- rm -rf .[a-z]*
- rm -rf *.jar
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index 879c65b..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,2 +0,0 @@
-version=3
-# The project is not tagged on GitHub, so no automated watch is possible.
diff --git a/src/ch/usi/inf/sape/hac/ClusteringBuilder.java b/src/ch/usi/inf/sape/hac/ClusteringBuilder.java
new file mode 100644
index 0000000..852dc04
--- /dev/null
+++ b/src/ch/usi/inf/sape/hac/ClusteringBuilder.java
@@ -0,0 +1,30 @@
+/*
+ * This file is licensed to You under the "Simplified BSD License".
+ * You may not use this software except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.opensource.org/licenses/bsd-license.php
+ *
+ * See the COPYRIGHT file distributed with this work for information
+ * regarding copyright ownership.
+ */
+package ch.usi.inf.sape.hac;
+
+
+/**
+ * HierarchicalAgglomerativeClusterer.cluster() takes a ClusteringBuilder as its argument,
+ * calling its merge() method whenever it merges two clusters.
+ *
+ * @author Matthias.Hauswirth at usi.ch
+ */
+public interface ClusteringBuilder {
+
+ /**
+ * Merge two clusters.
+ * @param i the smaller of the two cluster indices
+ * @param j the larger of the two cluster indices
+ * @param dissimilarity between the two merged clusters
+ */
+ public void merge(int i, int j, double dissimilarity);
+
+}
diff --git a/src/ch/usi/inf/sape/hac/ClusteringBuilderMultiplexer.java b/src/ch/usi/inf/sape/hac/ClusteringBuilderMultiplexer.java
new file mode 100644
index 0000000..0d4d5f9
--- /dev/null
+++ b/src/ch/usi/inf/sape/hac/ClusteringBuilderMultiplexer.java
@@ -0,0 +1,35 @@
+/*
+ * This file is licensed to You under the "Simplified BSD License".
+ * You may not use this software except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.opensource.org/licenses/bsd-license.php
+ *
+ * See the COPYRIGHT file distributed with this work for information
+ * regarding copyright ownership.
+ */
+package ch.usi.inf.sape.hac;
+
+
+/**
+ * A ClusteringBuilderMultiplexer is a ClusteringBuilder that forwards calls to two other ClusteringBuilders.
+ *
+ * @author Matthias.Hauswirth at usi.ch
+ */
+public final class ClusteringBuilderMultiplexer implements ClusteringBuilder {
+
+ private final ClusteringBuilder a;
+ private final ClusteringBuilder b;
+
+
+ public ClusteringBuilderMultiplexer(final ClusteringBuilder a, final ClusteringBuilder b) {
+ this.a = a;
+ this.b = b;
+ }
+
+ public void merge(final int i, final int j, final double dissimilarity) {
+ a.merge(i, j, dissimilarity);
+ b.merge(i, j, dissimilarity);
+ }
+
+}
diff --git a/src/ch/usi/inf/sape/hac/ClusteringMatrixBuilder.java b/src/ch/usi/inf/sape/hac/ClusteringMatrixBuilder.java
new file mode 100644
index 0000000..fd3175c
--- /dev/null
+++ b/src/ch/usi/inf/sape/hac/ClusteringMatrixBuilder.java
@@ -0,0 +1,64 @@
+/*
+ * This file is licensed to You under the "Simplified BSD License".
+ * You may not use this software except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.opensource.org/licenses/bsd-license.php
+ *
+ * See the COPYRIGHT file distributed with this work for information
+ * regarding copyright ownership.
+ */
+package ch.usi.inf.sape.hac;
+
+
+/**
+ * A ClusteringMatrixBuilder builds a matrix in which
+ * each row represents a step in the clustering
+ * and each column represents an observation or cluster.
+ * In the first step (row 0), each column represents an observation.
+ * In the last step, each column refers to the same cluster.
+ * Each step represents a copy of the step above,
+ * with two clusters merged into one.
+ *
+ * @author Matthias.Hauswirth at usi.ch
+ */
+public final class ClusteringMatrixBuilder implements ClusteringBuilder {
+
+ private static final int INVALID = -1;
+
+ private final int[][] clustering;
+ private int currentStep;
+
+
+ public ClusteringMatrixBuilder(final int nObservations) {
+ final int nSteps = nObservations;
+ clustering = new int[nSteps][nObservations];
+ for (int observation = 0; observation<nObservations; observation++) {
+ // initialize original step (each observation is its own cluster)
+ clustering[0][observation] = observation;
+ // initialize subsequent steps to "invalid"
+ for (int step = 1; step<nSteps; step++) {
+ clustering[step][observation] = INVALID;
+ }
+ }
+ currentStep = 0;
+ }
+
+ public void merge(final int i, final int j, final double dissimilarity) {
+ final int previousStep = currentStep;
+ currentStep++;
+ for (int observation = 0; observation<clustering.length; observation++) {
+ final int previousCluster = clustering[previousStep][observation];
+ if (previousCluster==j) {
+ clustering[currentStep][observation] = i;
+ } else {
+ clustering[currentStep][observation] = previousCluster;
+ }
+ }
+ }
+
+ public int[][] getClustering() {
+ return clustering;
+ }
+
+}
diff --git a/src/ch/usi/inf/sape/hac/HierarchicalAgglomerativeClusterer.java b/src/ch/usi/inf/sape/hac/HierarchicalAgglomerativeClusterer.java
new file mode 100644
index 0000000..d793e26
--- /dev/null
+++ b/src/ch/usi/inf/sape/hac/HierarchicalAgglomerativeClusterer.java
@@ -0,0 +1,177 @@
+/*
+ * This file is licensed to You under the "Simplified BSD License".
+ * You may not use this software except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.opensource.org/licenses/bsd-license.php
+ *
+ * See the COPYRIGHT file distributed with this work for information
+ * regarding copyright ownership.
+ */
+package ch.usi.inf.sape.hac;
+
+import ch.usi.inf.sape.hac.agglomeration.AgglomerationMethod;
+import ch.usi.inf.sape.hac.experiment.DissimilarityMeasure;
+import ch.usi.inf.sape.hac.experiment.Experiment;
+
+
+/**
+ * The HierarchicalAgglomerativeClusterer creates a hierarchical agglomerative clustering.
+ *
+ * <pre>
+ * Experiment experiment = ...;
+ * DissimilarityMeasure dissimilarityMeasure = ...;
+ * AgglomerationMethod agglomerationMethod = ...;
+ * DendrogramBuilder dendrogramBuilder = new DendrogramBuilder(experiment.getNumberOfObservations());
+ * HierarchicalAgglomerativeClusterer clusterer = new HierarchicalAgglomerativeClusterer(experiment, dissimilarityMeasure, agglomerationMethod);
+ * clusterer.cluster(dendrogramBuilder);
+ * Dendrogram dendrogram = dendrogramBuilder.getDendrogram();
+ * </pre>
+ *
+ * @author Matthias.Hauswirth at usi.ch
+ */
+public final class HierarchicalAgglomerativeClusterer {
+
+ private Experiment experiment;
+ private DissimilarityMeasure dissimilarityMeasure;
+ private AgglomerationMethod agglomerationMethod;
+
+
+ public HierarchicalAgglomerativeClusterer(final Experiment experiment, final DissimilarityMeasure dissimilarityMeasure, final AgglomerationMethod agglomerationMethod) {
+ this.experiment = experiment;
+ this.dissimilarityMeasure = dissimilarityMeasure;
+ this.agglomerationMethod = agglomerationMethod;
+ }
+
+ public void setExperiment(final Experiment experiment) {
+ this.experiment = experiment;
+ }
+
+ public Experiment getExperiment() {
+ return experiment;
+ }
+
+ public void setDissimilarityMeasure(final DissimilarityMeasure dissimilarityMeasure) {
+ this.dissimilarityMeasure = dissimilarityMeasure;
+ }
+
+ public DissimilarityMeasure getDissimilarityMeasure() {
+ return dissimilarityMeasure;
+ }
+
+ public void setAgglomerationMethod(final AgglomerationMethod agglomerationMethod) {
+ this.agglomerationMethod = agglomerationMethod;
+ }
+
+ public AgglomerationMethod getAgglomerationMethod() {
+ return agglomerationMethod;
+ }
+
+ public void cluster(final ClusteringBuilder clusteringBuilder) {
+ final double[][] dissimilarityMatrix = computeDissimilarityMatrix();
+ final int nObservations = dissimilarityMatrix.length;
+
+ final boolean[] indexUsed = new boolean[nObservations];
+ final int[] clusterCardinalities = new int[nObservations];
+ for (int i = 0; i<nObservations; i++) {
+ indexUsed[i] = true;
+ clusterCardinalities[i] = 1;
+ }
+
+ // Perform nObservations-1 agglomerations
+ for (int a = 1; a<nObservations; a++) {
+ // Determine the two most similar clusters, i and j (such that i<j)
+ final Pair pair = findMostSimilarClusters(dissimilarityMatrix, indexUsed);
+ final int i = pair.getSmaller();
+ final int j = pair.getLarger();
+ final double d = dissimilarityMatrix[i][j];
+
+ /**
+ System.out.println("Agglomeration #"+a+
+ ": merging clusters "+i+
+ " (cardinality "+(clusterCardinalities[i])+") and "+j+
+ " (cardinality "+(clusterCardinalities[j])+") with dissimilarity "+d);
+ **/
+
+ // cluster i becomes new cluster
+ // (by agglomerating former clusters i and j)
+ // update dissimilarityMatrix[i][*] and dissimilarityMatrix[*][i]
+ for (int k = 0; k<nObservations; k++) {
+ if ((k!=i)&&(k!=j)&&indexUsed[k]) {
+ final double dissimilarity = agglomerationMethod.computeDissimilarity(dissimilarityMatrix[i][k], dissimilarityMatrix[j][k],
+ dissimilarityMatrix[i][j], clusterCardinalities[i], clusterCardinalities[j], clusterCardinalities[k]);
+ dissimilarityMatrix[i][k] = dissimilarity;
+ dissimilarityMatrix[k][i] = dissimilarity;
+ }
+ }
+ clusterCardinalities[i] = clusterCardinalities[i]+clusterCardinalities[j];
+
+ // erase cluster j
+ indexUsed[j] = false;
+ for (int k = 0; k<nObservations; k++) {
+ dissimilarityMatrix[j][k] = Double.POSITIVE_INFINITY;
+ dissimilarityMatrix[k][j] = Double.POSITIVE_INFINITY;
+ }
+
+ // update clustering
+ clusteringBuilder.merge(i, j, d);
+ }
+ }
+
+ private double[][] computeDissimilarityMatrix() {
+ final double[][] dissimilarityMatrix = new double[experiment.getNumberOfObservations()][experiment.getNumberOfObservations()];
+ // fill diagonal
+ for (int o = 0; o<dissimilarityMatrix.length; o++) {
+ dissimilarityMatrix[o][o] = 0.0;
+ }
+ // fill rest (only compute half, then mirror accross diagonal, assuming
+ // a symmetric dissimilarity measure)
+ for (int o1 = 0; o1<dissimilarityMatrix.length; o1++) {
+ for (int o2 = 0; o2<o1; o2++) {
+ final double dissimilarity = dissimilarityMeasure.computeDissimilarity(experiment, o1, o2);
+ dissimilarityMatrix[o1][o2] = dissimilarity;
+ dissimilarityMatrix[o2][o1] = dissimilarity;
+ }
+ }
+ return dissimilarityMatrix;
+ }
+
+ private static Pair findMostSimilarClusters(final double[][] dissimilarityMatrix, final boolean[] indexUsed) {
+ final Pair mostSimilarPair = new Pair();
+ double smallestDissimilarity = Double.POSITIVE_INFINITY;
+ for (int cluster = 0; cluster<dissimilarityMatrix.length; cluster++) {
+ if (indexUsed[cluster]) {
+ for (int neighbor = 0; neighbor<dissimilarityMatrix.length; neighbor++) {
+ if (indexUsed[neighbor]&&dissimilarityMatrix[cluster][neighbor]<smallestDissimilarity&&cluster!=neighbor) {
+ smallestDissimilarity = dissimilarityMatrix[cluster][neighbor];
+ mostSimilarPair.set(cluster, neighbor);
+ }
+ }
+ }
+ }
+ return mostSimilarPair;
+ }
+
+
+ private static final class Pair {
+
+ private int cluster1;
+ private int cluster2;
+
+
+ public final void set(final int cluster1, final int cluster2) {
+ this.cluster1 = cluster1;
+ this.cluster2 = cluster2;
+ }
+
+ public final int getLarger() {
+ return Math.max(cluster1, cluster2);
+ }
+
+ public final int getSmaller() {
+ return Math.min(cluster1, cluster2);
+ }
+
+ }
+
+}
diff --git a/src/ch/usi/inf/sape/hac/agglomeration/AgglomerationMethod.java b/src/ch/usi/inf/sape/hac/agglomeration/AgglomerationMethod.java
new file mode 100644
index 0000000..cbd30bb
--- /dev/null
+++ b/src/ch/usi/inf/sape/hac/agglomeration/AgglomerationMethod.java
@@ -0,0 +1,60 @@
+/*
+ * This file is licensed to You under the "Simplified BSD License".
+ * You may not use this software except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.opensource.org/licenses/bsd-license.php
+ *
+ * See the COPYRIGHT file distributed with this work for information
+ * regarding copyright ownership.
+ */
+package ch.usi.inf.sape.hac.agglomeration;
+
+
+/**
+ * An AgglomerationMethod represents the Lance-Williams dissimilarity update formula
+ * used for hierarchical agglomerative clustering.
+ *
+ * The general form of the Lance-Williams matrix-update formula:
+ * d[(i,j),k] = ai*d[i,k] + aj*d[j,k] + b*d[i,j] + g*|d[i,k]-d[j,k]|
+ *
+ * Parameters ai, aj, b, and g are defined differently for different methods:
+ *
+ * Method ai aj b g
+ * ------------- ------------------ ------------------ ------------------------ -----
+ * Single 0.5 0.5 0 -0.5
+ * Complete 0.5 0.5 0 0.5
+ * Average ci/(ci+cj) cj/(ci+cj) 0 0
+ *
+ * Centroid ci/(ci+cj) cj/(ci+cj) -ci*cj/((ci+cj)*(ci+cj)) 0
+ * Median 0.5 0.5 -0.25 0
+ * Ward (ci+ck)/(ci+cj+ck) (cj+ck)/(ci+cj+ck) -ck/(ci+cj+ck) 0
+ *
+ * WeightedAverage 0.5 0.5 0 0
+ *
+ * (ci, cj, ck are cluster cardinalities)
+ *
+ * @see http://www.mathworks.com/help/toolbox/stats/linkage.html
+ * @see http://www.stanford.edu/~maureenh/quals/html/ml/node73.html
+ * @see [The data analysis handbook. By Ildiko E. Frank, Roberto Todeschini. Pages 152-155]
+ *
+ * @author Matthias.Hauswirth at usi.ch
+ */
+public interface AgglomerationMethod {
+
+ /**
+ * Compute the dissimilarity between the
+ * newly formed cluster (i,j) and the existing cluster k.
+ *
+ * @param dik dissimilarity between clusters i and k
+ * @param djk dissimilarity between clusters j and k
+ * @param dij dissimilarity between clusters i and j
+ * @param ci cardinality of cluster i
+ * @param cj cardinality of cluster j
+ * @param ck cardinality of cluster k
+ *
+ * @return dissimilarity between cluster (i,j) and cluster k.
+ */
+ public double computeDissimilarity(double dik, double djk, double dij, int ci, int cj, int ck);
+
+}
diff --git a/src/ch/usi/inf/sape/hac/agglomeration/AverageLinkage.java b/src/ch/usi/inf/sape/hac/agglomeration/AverageLinkage.java
new file mode 100644
index 0000000..0231687
--- /dev/null
+++ b/src/ch/usi/inf/sape/hac/agglomeration/AverageLinkage.java
@@ -0,0 +1,50 @@
+/*
+ * This file is licensed to You under the "Simplified BSD License".
+ * You may not use this software except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.opensource.org/licenses/bsd-license.php
+ *
+ * See the COPYRIGHT file distributed with this work for information
+ * regarding copyright ownership.
+ */
+package ch.usi.inf.sape.hac.agglomeration;
+
+
+/**
+ * The "average", "group average", "unweighted average", or
+ * "Unweighted Pair Group Method using Arithmetic averages (UPGMA)",
+ * is a graph-based approach.
+ *
+ * The distance between two clusters is calculated as the average
+ * of the distances between all pairs of objects in opposite clusters.
+ * This method tends to produce small clusters of outliers,
+ * but does not deform the cluster space.
+ * [The data analysis handbook. By Ildiko E. Frank, Roberto Todeschini]
+ *
+ * The general form of the Lance-Williams matrix-update formula:
+ * d[(i,j),k] = ai*d[i,k] + aj*d[j,k] + b*d[i,j] + g*|d[i,k]-d[j,k]|
+ *
+ * For the "group average" method:
+ * ai = ci/(ci+cj)
+ * aj = cj/(ci+cj)
+ * b = 0
+ * g = 0
+ *
+ * Thus:
+ * d[(i,j),k] = ci/(ci+cj)*d[i,k] + cj/(ci+cj)*d[j,k]
+ * = ( ci*d[i,k] + cj*d[j,k] ) / (ci+cj)
+ *
+ * @author Matthias.Hauswirth at usi.ch
+ */
+public final class AverageLinkage implements AgglomerationMethod {
+
+ public double computeDissimilarity(final double dik, final double djk, final double dij, final int ci, final int cj, final int ck) {
+ return (ci*dik+cj*djk)/(ci+cj);
+ }
+
+ public String toString() {
+ return "Average";
+ }
+
+}
diff --git a/src/ch/usi/inf/sape/hac/agglomeration/CentroidLinkage.java b/src/ch/usi/inf/sape/hac/agglomeration/CentroidLinkage.java
new file mode 100644
index 0000000..68d7fd7
--- /dev/null
+++ b/src/ch/usi/inf/sape/hac/agglomeration/CentroidLinkage.java
@@ -0,0 +1,55 @@
+/*
+ * This file is licensed to You under the "Simplified BSD License".
+ * You may not use this software except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.opensource.org/licenses/bsd-license.php
+ *
+ * See the COPYRIGHT file distributed with this work for information
+ * regarding copyright ownership.
+ */
+package ch.usi.inf.sape.hac.agglomeration;
+
+
+/**
+ * The "centroid" or "Unweighted Pair-Group Method using Centroids (UPGMC)"
+ * method is a geometric approach that links the centroids of clusters.
+ *
+ * Each cluster is represented by its centroid.
+ * The distance between two clusters is calculated as the distance between their centriods.
+ * This method does not distort the cluster space.
+ * [The data analysis handbook. By Ildiko E. Frank, Roberto Todeschini]
+ *
+ * Can produce a dendrogram that is not monotonic
+ * (it can have so called inversions, which are hard to interpret).
+ * This occurs when the distance from the union of two clusters, r and s,
+ * to a third cluster is less than the distance between r and s.
+ *
+ * Used only for Euclidean distance!
+ *
+ * The general form of the Lance-Williams matrix-update formula:
+ * d[(i,j),k] = ai*d[i,k] + aj*d[j,k] + b*d[i,j] + g*|d[i,k]-d[j,k]|
+ *
+ * For the "centroid" method:
+ * ai = ci/(ci+cj)
+ * aj = cj/(ci+cj)
+ * b = -ci*cj/((ci+cj)*(ci+cj))
+ * g = 0
+ *
+ * Thus:
+ * d[(i,j),k] = ci/(ci+cj)*d[i,k] + cj/(ci+cj)*d[j,k] - ci*cj/((ci+cj)*(ci+cj))*d[i,j]
+ * = ( ci*d[i,k] + cj*d[j,k] - ci*cj/(ci+cj)*d[i,j] ) / (ci+cj)
+ *
+ * @author Matthias.Hauswirth at usi.ch
+ */
+public final class CentroidLinkage implements AgglomerationMethod {
+
+ public double computeDissimilarity(final double dik, final double djk, final double dij, final int ci, final int cj, final int ck) {
+ return (ci*dik+cj*djk-ci*cj*dij/(ci+cj))/(ci+cj);
+ }
+
+ public String toString() {
+ return "Centroid";
+ }
+
+}
diff --git a/src/ch/usi/inf/sape/hac/agglomeration/CompleteLinkage.java b/src/ch/usi/inf/sape/hac/agglomeration/CompleteLinkage.java
new file mode 100644
index 0000000..5290551
--- /dev/null
+++ b/src/ch/usi/inf/sape/hac/agglomeration/CompleteLinkage.java
@@ -0,0 +1,57 @@
+/*
+ * This file is licensed to You under the "Simplified BSD License".
+ * You may not use this software except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.opensource.org/licenses/bsd-license.php
+ *
+ * See the COPYRIGHT file distributed with this work for information
+ * regarding copyright ownership.
+ */
+package ch.usi.inf.sape.hac.agglomeration;
+
+
+/**
+ * The "complete", "maximum", "clique",
+ * "furthest neighbor", or "furthest distance" method is a graph-based approach.
+ *
+ * The distance between two clusters is calculated as the largest distance
+ * between two objects in opposite clusters.
+ * This method tends to produce well separated, small, compact spherical clusters.
+ * The cluster space is dilated.
+ * [The data analysis handbook. By Ildiko E. Frank, Roberto Todeschini]
+ *
+ * This method tends to produce compact clusters. Outliers are given more weight with this method.
+ * It is generally a good choice if the clusters are far apart in feature space, but not good if the data are noisy.
+ * @see http://www.stanford.edu/~maureenh/quals/html/ml/node75.html
+ *
+ * The general form of the Lance-Williams matrix-update formula:
+ * d[(i,j),k] = ai*d[i,k] + aj*d[j,k] + b*d[i,j] + g*|d[i,k]-d[j,k]|
+ *
+ * For the "single linkage" method:
+ * ai = 0.5
+ * aj = 0.5
+ * b = 0
+ * g = 0.5
+ *
+ * Thus:
+ * d[(i,j),k] = 0.5*d[i,k] + 0.5*d[j,k] + 0.5*|d[i,k]-d[j,k]|
+ * = 0.5*d[i,k] + 0.5*d[j,k] + | 0.5*d[i,k] - 0.5*d[j,k] |
+ * = d[i,j]<d[j,k] ? 0.5*d[i,k] + 0.5*d[j,k] - 0.5*d[i,k] + 0.5*d[j,k] : 0.5*d[i,k] + 0.5*d[j,k] + 0.5*d[i,k] - 0.5*d[j,k]
+ * = d[i,j]<d[j,k] ? 0.5*d[j,k] + 0.5*d[j,k] : 0.5*d[i,k] + 0.5*d[i,k]
+ * = d[i,j]<d[j,k] ? d[j,k] : d[i,k]
+ * = max( d[i,k] , d[j,k] )
+ *
+ * @author Matthias.Hauswirth at usi.ch
+ */
+public final class CompleteLinkage implements AgglomerationMethod {
+
+ public double computeDissimilarity(final double dik, final double djk, final double dij, final int ci, final int cj, final int ck) {
+ return Math.max(dik, djk);
+ }
+
+ public String toString() {
+ return "Complete";
+ }
+
+}
diff --git a/src/ch/usi/inf/sape/hac/agglomeration/MedianLinkage.java b/src/ch/usi/inf/sape/hac/agglomeration/MedianLinkage.java
new file mode 100644
index 0000000..d3bd8e1
--- /dev/null
+++ b/src/ch/usi/inf/sape/hac/agglomeration/MedianLinkage.java
@@ -0,0 +1,56 @@
+/*
+ * This file is licensed to You under the "Simplified BSD License".
+ * You may not use this software except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.opensource.org/licenses/bsd-license.php
+ *
+ * See the COPYRIGHT file distributed with this work for information
+ * regarding copyright ownership.
+ */
+package ch.usi.inf.sape.hac.agglomeration;
+
+
+/**
+ * The "median", "weighted centroid", "weighted center of mass distance", "Gower",
+ * or "Weighted Pair-Group Method using Centroids (WPGMC)" method is a geometric approach.
+ *
+ * The size of the clusters is assumed to be equal and
+ * the position of the new centroid is always between the two old centroids.
+ * This method preserves the importance of a small cluster when it is merged with a large cluster.
+ * [The data analysis handbook. By Ildiko E. Frank, Roberto Todeschini]
+ *
+ * Can produce a dendrogram that is not monotonic
+ * (it can have so called inversions, which are hard to interpret).
+ * This occurs when the distance from the union of two clusters, r and s,
+ * to a third cluster is less than the distance between r and s.
+ *
+ * Used only for Euclidean distance!
+ *
+ * The distance between two clusters is the Euclidean distance between their weighted centroids.
+ *
+ * The general form of the Lance-Williams matrix-update formula:
+ * d[(i,j),k] = ai*d[i,k] + aj*d[j,k] + b*d[i,j] + g*|d[i,k]-d[j,k]|
+ *
+ * For the "median" method:
+ * ai = 0.5
+ * aj = 0.5
+ * b = -0.25
+ * g = 0
+ *
+ * Thus:
+ * d[(i,j),k] = 0.5*d[i,k] + 0.5*d[j,k] - 0.25*d[i,j]
+ *
+ * @author Matthias.Hauswirth at usi.ch
+ */
+public final class MedianLinkage implements AgglomerationMethod {
+
+ public double computeDissimilarity(final double dik, final double djk, final double dij, final int ci, final int cj, final int ck) {
+ return 0.5*dik+0.5*djk-0.25*dij;
+ }
+
+ public String toString() {
+ return "Median";
+ }
+
+}
diff --git a/src/ch/usi/inf/sape/hac/agglomeration/SingleLinkage.java b/src/ch/usi/inf/sape/hac/agglomeration/SingleLinkage.java
new file mode 100644
index 0000000..8f00058
--- /dev/null
+++ b/src/ch/usi/inf/sape/hac/agglomeration/SingleLinkage.java
@@ -0,0 +1,56 @@
+/*
+ * This file is licensed to You under the "Simplified BSD License".
+ * You may not use this software except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.opensource.org/licenses/bsd-license.php
+ *
+ * See the COPYRIGHT file distributed with this work for information
+ * regarding copyright ownership.
+ */
+package ch.usi.inf.sape.hac.agglomeration;
+
+
+/**
+ * The "single linkage", "minimum", "shortest distance", or "nearest neighbor" method is a graph-based approach.
+ *
+ * The distance between two clusters is calculated as
+ * the smallest distance between two objects in opposite clusters.
+ * This method tends to produce loosely bound large clusters with little internal cohesion.
+ * Linear, elongated clusters are formed as opposed to the more usual spherical clusters.
+ * This pheonomenon is called chaining.
+ * [The data analysis handbook. By Ildiko E. Frank, Roberto Todeschini]
+ *
+ * This method can cause "chaining" of clusters.
+ * @see http://www.stanford.edu/~maureenh/quals/html/ml/node74.html
+ *
+ * The general form of the Lance-Williams matrix-update formula:
+ * d[(i,j),k] = ai*d[i,k] + aj*d[j,k] + b*d[i,j] + g*|d[i,k]-d[j,k]|
+ *
+ * For the "single linkage" method:
+ * ai = 0.5
+ * aj = 0.5
+ * b = 0
+ * g = -0.5
+ *
+ * Thus:
+ * d[(i,j),k] = 0.5*d[i,k] + 0.5*d[j,k] - 0.5*|d[i,k]-d[j,k]|
+ * = 0.5*d[i,k] + 0.5*d[j,k] - | 0.5*d[i,k] - 0.5*d[j,k] |
+ * = d[i,j]<d[j,k] ? 0.5*d[i,k] + 0.5*d[j,k] + 0.5*d[i,k] - 0.5*d[j,k] : 0.5*d[i,k] + 0.5*d[j,k] - 0.5*d[i,k] + 0.5*d[j,k]
+ * = d[i,j]<d[j,k] ? 0.5*d[i,k] + 0.5*d[i,k] : 0.5*d[j,k] + 0.5*d[j,k]
+ * = d[i,j]<d[j,k] ? d[i,k] : d[j,k]
+ * = min( d[i,k] , d[j,k] )
+ *
+ * @author Matthias.Hauswirth at usi.ch
+ */
+public final class SingleLinkage implements AgglomerationMethod {
+
+ public double computeDissimilarity(final double dik, final double djk, final double dij, final int ci, final int cj, final int ck) {
+ return Math.min(dik, djk);
+ }
+
+ public String toString() {
+ return "Single";
+ }
+
+}
diff --git a/src/ch/usi/inf/sape/hac/agglomeration/WardLinkage.java b/src/ch/usi/inf/sape/hac/agglomeration/WardLinkage.java
new file mode 100644
index 0000000..ba60d4c
--- /dev/null
+++ b/src/ch/usi/inf/sape/hac/agglomeration/WardLinkage.java
@@ -0,0 +1,53 @@
+/*
+ * This file is licensed to You under the "Simplified BSD License".
+ * You may not use this software except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.opensource.org/licenses/bsd-license.php
+ *
+ * See the COPYRIGHT file distributed with this work for information
+ * regarding copyright ownership.
+ */
+package ch.usi.inf.sape.hac.agglomeration;
+
+
+/**
+ * The "Ward", "inner squared distance", "sum of squares", "error sum of squares",
+ * or "minimum variance" method.
+ *
+ * This method fuses those two clusters that result in the smallest increase
+ * in the total within-group error sum of squares.
+ * This quantity is defined as the sum of squared deviation
+ * of each object from the centroid of its own cluster.
+ * In contrast to the other methods that use prior criteria,
+ * this method is based on a posterior fusion criterion.
+ * [The data analysis handbook. By Ildiko E. Frank, Roberto Todeschini]
+ *
+ * Used only for Euclidean distance!
+ *
+ * The general form of the Lance-Williams matrix-update formula:
+ * d[(i,j),k] = ai*d[i,k] + aj*d[j,k] + b*d[i,j] + g*|d[i,k]-d[j,k]|
+ *
+ * For the "Ward" method:
+ * ai = (ci+ck)/(ci+cj+ck)
+ * aj = (cj+ck)/(ci+cj+ck)
+ * b = -ck/(ci+cj+ck)
+ * g = 0
+ *
+ * Thus:
+ * d[(i,j),k] = (ci+ck)/(ci+cj+ck)*d[i,k] + (cj+ck)/(ci+cj+ck)*d[j,k] - ck/(ci+cj+ck)*d[i,j]
+ * = ( (ci+ck)*d[i,k] + (cj+ck)*d[j,k] - ck*d[i,j] ) / (ci+cj+ck)
+ *
+ * @author Matthias.Hauswirth at usi.ch
+ */
+public final class WardLinkage implements AgglomerationMethod {
+
+ public double computeDissimilarity(final double dik, final double djk, final double dij, final int ci, final int cj, final int ck) {
+ return ((ci+ck)*dik+(cj+ck)*djk-ck*dij)/(ci+cj+ck);
+ }
+
+ public String toString() {
+ return "Ward";
+ }
+
+}
diff --git a/src/ch/usi/inf/sape/hac/agglomeration/WeightedAverageLinkage.java b/src/ch/usi/inf/sape/hac/agglomeration/WeightedAverageLinkage.java
new file mode 100644
index 0000000..e404100
--- /dev/null
+++ b/src/ch/usi/inf/sape/hac/agglomeration/WeightedAverageLinkage.java
@@ -0,0 +1,46 @@
+/*
+ * This file is licensed to You under the "Simplified BSD License".
+ * You may not use this software except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.opensource.org/licenses/bsd-license.php
+ *
+ * See the COPYRIGHT file distributed with this work for information
+ * regarding copyright ownership.
+ */
+package ch.usi.inf.sape.hac.agglomeration;
+
+
+/**
+ * The "weighted average", "McQuitty", or
+ * "Weighted Pair-Group Method using Arithmetic averages, or WPGMA)" method.
+ *
+ * Average linkage where the sizes of the clusters are assumed to be equal.
+ * This method, similar to "Median", weights small and large clusters equally.
+ * [The data analysis handbook. By Ildiko E. Frank, Roberto Todeschini]
+ *
+ * The general form of the Lance-Williams matrix-update formula:
+ * d[(i,j),k] = ai*d[i,k] + aj*d[j,k] + b*d[i,j] + g*|d[i,k]-d[j,k]|
+ *
+ * For the "McQuitty" method:
+ * ai = 0.5
+ * aj = 0.5
+ * b = 0
+ * g = 0
+ *
+ * Thus:
+ * d[(i,j),k] = 0.5*d[i,k] + 0.5*d[j,k]
+ *
+ * @author Matthias.Hauswirth at usi.ch
+ */
+public final class WeightedAverageLinkage implements AgglomerationMethod {
+
+ public double computeDissimilarity(final double dik, final double djk, final double dij, final int ci, final int cj, final int ck) {
+ return 0.5*dik+0.5*djk;
+ }
+
+ public String toString() {
+ return "Weighted average";
+ }
+
+}
diff --git a/src/ch/usi/inf/sape/hac/dendrogram/Dendrogram.java b/src/ch/usi/inf/sape/hac/dendrogram/Dendrogram.java
new file mode 100644
index 0000000..fdc8915
--- /dev/null
+++ b/src/ch/usi/inf/sape/hac/dendrogram/Dendrogram.java
@@ -0,0 +1,48 @@
+/*
+ * This file is licensed to You under the "Simplified BSD License".
+ * You may not use this software except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.opensource.org/licenses/bsd-license.php
+ *
+ * See the COPYRIGHT file distributed with this work for information
+ * regarding copyright ownership.
+ */
+package ch.usi.inf.sape.hac.dendrogram;
+
+
+/**
+ * A Dendrogram represents the results of hierachical agglomerative clustering.
+ * The root represents a single cluster containing all observations.
+ *
+ * @author Matthias.Hauswirth at usi.ch
+ */
+public final class Dendrogram {
+
+ private final DendrogramNode root;
+
+
+ public Dendrogram(final DendrogramNode root) {
+ this.root = root;
+ }
+
+ public DendrogramNode getRoot() {
+ return root;
+ }
+
+ public void dump() {
+ dumpNode(" ", root);
+ }
+
+ private void dumpNode(final String indent, final DendrogramNode node) {
+ if (node==null) {
+ System.out.println(indent+"<null>");
+ } else if (node instanceof ObservationNode) {
+ System.out.println(indent+"Observation: "+node);
+ } else if (node instanceof MergeNode) {
+ System.out.println(indent+"Merge:");
+ dumpNode(indent+" ", ((MergeNode)node).getLeft());
+ dumpNode(indent+" ", ((MergeNode)node).getRight());
+ }
+ }
+}
diff --git a/src/ch/usi/inf/sape/hac/dendrogram/DendrogramBuilder.java b/src/ch/usi/inf/sape/hac/dendrogram/DendrogramBuilder.java
new file mode 100644
index 0000000..45b7555
--- /dev/null
+++ b/src/ch/usi/inf/sape/hac/dendrogram/DendrogramBuilder.java
@@ -0,0 +1,49 @@
+/*
+ * This file is licensed to You under the "Simplified BSD License".
+ * You may not use this software except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.opensource.org/licenses/bsd-license.php
+ *
+ * See the COPYRIGHT file distributed with this work for information
+ * regarding copyright ownership.
+ */
+package ch.usi.inf.sape.hac.dendrogram;
+
+import ch.usi.inf.sape.hac.ClusteringBuilder;
+
+
+/**
+ * A DendrogramBuilder creates a Dendrogram consisting of ObservationNodes and
+ * MergeNodes.
+ *
+ * @author Matthias.Hauswirth at usi.ch
+ */
+public final class DendrogramBuilder implements ClusteringBuilder {
+
+ private final DendrogramNode[] nodes;
+ private MergeNode lastMergeNode;
+
+
+ public DendrogramBuilder(final int nObservations) {
+ nodes = new DendrogramNode[nObservations];
+ for (int i = 0; i<nObservations; i++) {
+ nodes[i] = new ObservationNode(i);
+ }
+ }
+
+ public final void merge(final int i, final int j, final double dissimilarity) {
+ final MergeNode node = new MergeNode(nodes[i], nodes[j], dissimilarity);
+ nodes[i] = node;
+ lastMergeNode = node;
+ }
+
+ public final Dendrogram getDendrogram() {
+ if (nodes.length==1) {
+ return new Dendrogram(nodes[0]);
+ } else {
+ return new Dendrogram(lastMergeNode);
+ }
+ }
+
+}
diff --git a/src/ch/usi/inf/sape/hac/dendrogram/DendrogramNode.java b/src/ch/usi/inf/sape/hac/dendrogram/DendrogramNode.java
new file mode 100644
index 0000000..01b3c92
--- /dev/null
+++ b/src/ch/usi/inf/sape/hac/dendrogram/DendrogramNode.java
@@ -0,0 +1,28 @@
+/*
+ * This file is licensed to You under the "Simplified BSD License".
+ * You may not use this software except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.opensource.org/licenses/bsd-license.php
+ *
+ * See the COPYRIGHT file distributed with this work for information
+ * regarding copyright ownership.
+ */
+package ch.usi.inf.sape.hac.dendrogram;
+
+
+/**
+ * A DendrogramNode is a node in a Dendrogram.
+ * It represents a subtree of the dendrogram tree.
+ * It has two children (left and right),
+ * and it can provide the number of leaf nodes (ObservationNodes) in this subtree.
+ *
+ * @author Matthias.Hauswirth at unisi.ch
+ */
+public interface DendrogramNode {
+
+ public DendrogramNode getLeft();
+ public DendrogramNode getRight();
+ public int getObservationCount();
+
+}
\ No newline at end of file
diff --git a/src/ch/usi/inf/sape/hac/dendrogram/MergeNode.java b/src/ch/usi/inf/sape/hac/dendrogram/MergeNode.java
new file mode 100644
index 0000000..7f96c73
--- /dev/null
+++ b/src/ch/usi/inf/sape/hac/dendrogram/MergeNode.java
@@ -0,0 +1,51 @@
+/*
+ * This file is licensed to You under the "Simplified BSD License".
+ * You may not use this software except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.opensource.org/licenses/bsd-license.php
+ *
+ * See the COPYRIGHT file distributed with this work for information
+ * regarding copyright ownership.
+ */
+package ch.usi.inf.sape.hac.dendrogram;
+
+
+/**
+ * A MergeNode represents an interior node in a Dendrogram.
+ * It corresponds to a (non-singleton) cluster of observations.
+ *
+ * @author Matthias.Hauswirth at usi.ch
+ */
+public final class MergeNode implements DendrogramNode {
+
+ private final DendrogramNode left;
+ private final DendrogramNode right;
+ private final double dissimilarity;
+ private final int observationCount;
+
+
+ public MergeNode(final DendrogramNode left, final DendrogramNode right, final double dissimilarity) {
+ this.left = left;
+ this.right = right;
+ this.dissimilarity = dissimilarity;
+ observationCount = left.getObservationCount()+right.getObservationCount();
+ }
+
+ public int getObservationCount() {
+ return observationCount;
+ }
+
+ public final DendrogramNode getLeft() {
+ return left;
+ }
+
+ public final DendrogramNode getRight() {
+ return right;
+ }
+
+ public final double getDissimilarity() {
+ return dissimilarity;
+ }
+
+}
\ No newline at end of file
diff --git a/src/ch/usi/inf/sape/hac/dendrogram/ObservationNode.java b/src/ch/usi/inf/sape/hac/dendrogram/ObservationNode.java
new file mode 100644
index 0000000..b4608a1
--- /dev/null
+++ b/src/ch/usi/inf/sape/hac/dendrogram/ObservationNode.java
@@ -0,0 +1,45 @@
+/*
+ * This file is licensed to You under the "Simplified BSD License".
+ * You may not use this software except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.opensource.org/licenses/bsd-license.php
+ *
+ * See the COPYRIGHT file distributed with this work for information
+ * regarding copyright ownership.
+ */
+package ch.usi.inf.sape.hac.dendrogram;
+
+
+/**
+ * An ObservationNode represents a leaf node in a Dendrogram.
+ * It corresponds to a singleton cluster of one observation.
+ *
+ * @author Matthias.Hauswirth at usi.ch
+ */
+public final class ObservationNode implements DendrogramNode {
+
+ private final int observation;
+
+
+ public ObservationNode(final int observation) {
+ this.observation = observation;
+ }
+
+ public final DendrogramNode getLeft() {
+ return null;
+ }
+
+ public final DendrogramNode getRight() {
+ return null;
+ }
+
+ public int getObservationCount() {
+ return 1;
+ }
+
+ public final int getObservation() {
+ return observation;
+ }
+
+}
\ No newline at end of file
diff --git a/src/ch/usi/inf/sape/hac/experiment/DissimilarityMeasure.java b/src/ch/usi/inf/sape/hac/experiment/DissimilarityMeasure.java
new file mode 100644
index 0000000..5b7a6eb
--- /dev/null
+++ b/src/ch/usi/inf/sape/hac/experiment/DissimilarityMeasure.java
@@ -0,0 +1,23 @@
+/*
+ * This file is licensed to You under the "Simplified BSD License".
+ * You may not use this software except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.opensource.org/licenses/bsd-license.php
+ *
+ * See the COPYRIGHT file distributed with this work for information
+ * regarding copyright ownership.
+ */
+package ch.usi.inf.sape.hac.experiment;
+
+
+/**
+ * Computes the dissimilarity between two observations in an experiment.
+ *
+ * @author Matthias.Hauswirth at usi.ch
+ */
+public interface DissimilarityMeasure {
+
+ public double computeDissimilarity(Experiment experiment, int observation1, int observation2);
+
+}
diff --git a/src/ch/usi/inf/sape/hac/experiment/Experiment.java b/src/ch/usi/inf/sape/hac/experiment/Experiment.java
new file mode 100644
index 0000000..7c28d3a
--- /dev/null
+++ b/src/ch/usi/inf/sape/hac/experiment/Experiment.java
@@ -0,0 +1,23 @@
+/*
+ * This file is licensed to You under the "Simplified BSD License".
+ * You may not use this software except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.opensource.org/licenses/bsd-license.php
+ *
+ * See the COPYRIGHT file distributed with this work for information
+ * regarding copyright ownership.
+ */
+package ch.usi.inf.sape.hac.experiment;
+
+
+/**
+ * An experiment consists of a number of observations.
+ *
+ * @author Matthias.Hauswirth at usi.ch
+ */
+public interface Experiment {
+
+ public int getNumberOfObservations();
+
+}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/libhac-java.git
More information about the debian-med-commit
mailing list