[Pkg-javascript-commits] [science.js] 21/87: Add agglomerative hierarchical clustering.

bhuvan krishna bhuvan-guest at moszumanska.debian.org
Thu Dec 8 06:11:54 UTC 2016


This is an automated email from the git hooks/post-receive script.

bhuvan-guest pushed a commit to branch master
in repository science.js.

commit 377656c0ca7fbd0bb72fe901aff83d3318432619
Author: Jason Davies <jason at jasondavies.com>
Date:   Thu Aug 25 20:31:36 2011 +0100

    Add agglomerative hierarchical clustering.
---
 Makefile                    |  1 +
 science.stats.js            | 95 +++++++++++++++++++++++++++++++++++++++++++++
 science.stats.min.js        |  2 +-
 src/stats/hcluster.js       | 44 ++++++++++++---------
 test/stats/hcluster-test.js | 27 +++++++++++++
 5 files changed, 150 insertions(+), 19 deletions(-)

diff --git a/Makefile b/Makefile
index 36f6f21..f83302a 100644
--- a/Makefile
+++ b/Makefile
@@ -34,6 +34,7 @@ science.stats.js: \
 	src/stats/kernel.js \
 	src/stats/kde.js \
 	src/stats/kmeans.js \
+	src/stats/hcluster.js \
 	src/stats/iqr.js \
 	src/stats/loess.js \
 	src/stats/mean.js \
diff --git a/science.stats.js b/science.stats.js
index f43a434..4411920 100644
--- a/science.stats.js
+++ b/science.stats.js
@@ -250,6 +250,101 @@ function science_stats_kmeansRandom(k, vectors) {
   }
   return selected_vectors;
 }
+science.stats.hcluster = function() {
+  var distance = science.stats.distance.euclidean,
+      linkage = "";
+
+  function hcluster(vectors) {
+    var n = vectors.length;
+    var dMin = [];
+    var cSize = [];
+    var distMatrix = [];
+    var clusters = [];
+
+    var c1, c2, c1Cluster, c2Cluster, p, root , newCentroid;
+
+    var i,
+        j;
+
+    // Initialize distance matrix and vector of closest clusters
+    i = -1; while (++i < n) {
+      dMin[i] = 0;
+      distMatrix[i] = [];
+      j = -1; while (++j < n) {
+        distMatrix[i][j] = i === j ? Infinity : distance(vectors[i] , vectors[j]);
+        if (distMatrix[i][dMin[i]] > distMatrix[i][j]) dMin[i] = j ;
+      }
+    }
+
+    // create leaves of the tree
+    i = -1; while (++i < n) {
+      clusters[i] = [];
+      clusters[i][0] = {left: null, right: null, dist: 0, centroid: vectors[i], size: 1, depth: 0};
+      cSize[i] = 1;
+    }
+
+    // Main loop
+    for (p = 0; p < n-1; p++) {
+      // find the closest pair of clusters
+      c1 = 0 ;
+      for (i = 0 ; i < n ; i++) {
+        if (distMatrix[i][dMin[i]] < distMatrix[c1][dMin[c1]]) c1 = i;
+      }
+      c2 = dMin[c1];
+
+      // create node to store cluster info 
+      c1Cluster = clusters[c1][0] ;
+      c2Cluster = clusters[c2][0] ;
+
+      newCentroid = calculateCentroid(c1Cluster.size, c1Cluster.centroid, c2Cluster.size, c2Cluster.centroid) ;
+      newCluster = {left: c1Cluster, right: c2Cluster, dist: distMatrix[c1][c2], centroid: newCentroid, size: c1Cluster.size + c2Cluster.size, depth: 1 + Math.max(c1Cluster.depth, c2Cluster.depth)};
+      clusters[c1].splice(0, 0, newCluster);
+      cSize[c1] += cSize[c2];
+
+      // overwrite row c1 with respect to the linkage type
+      for (j = 0 ; j < n ; j++) {
+              if (linkage == "single") {
+                      if (distMatrix[c1][j] > distMatrix[c2][j])
+                              distMatrix[j][c1] = distMatrix[c1][j] = distMatrix[c2][j] ;
+              } else if (linkage == "complete") {
+                      if (distMatrix[c1][j] < distMatrix[c2][j])
+                              distMatrix[j][c1] = distMatrix[c1][j] = distMatrix[c2][j] ;
+              } else if (linkage == "average") {
+                      var avg = ( cSize[c1] * distMatrix[c1][j] + cSize[c2] * distMatrix[c2][j])  / (cSize[c1] + cSize[j]) 
+                      distMatrix[j][c1] = distMatrix[c1][j] = avg ;
+              }
+      }
+      distMatrix[c1][c1] = Infinity;
+
+      // infinity ­out old row c2 and column c2
+      for (i = 0 ; i < n ; i++)
+        distMatrix[i][c2] = distMatrix[c2][i] = Infinity;
+
+      // update dmin and replace ones that previous pointed to c2 to point to c1
+      for (j = 0; j < n ; j++) {
+        if (dMin[j] == c2)
+          dMin[j] = c1;
+        if (distMatrix[c1][j] < distMatrix[c1][dMin[c1]]) 
+          dMin[c1] = j;
+      }
+
+      // keep track of the last added cluster
+      root = newCluster;
+    }
+
+    return root;
+  }
+
+  return hcluster;
+};
+
+function calculateCentroid(c1Size, c1Centroid, c2Size, c2Centroid) {
+  var newCentroid = [];
+  var newSize = c1Size + c2Size;
+  for (var i = 0; i < c1Centroid.length; i++)
+    newCentroid[i] = (c1Size * c1Centroid[i] + c2Size * c2Centroid[i]) / newSize;
+  return newCentroid;
+}
 science.stats.iqr = function(x) {
   var quartiles = science.stats.quantiles(x, [.25, .75]);
   return quartiles[1] - quartiles[0];
diff --git a/science.stats.min.js b/science.stats.min.js
index 9875762..84c8fb5 100644
--- a/science.stats.min.js
+++ b/science.stats.min.js
@@ -1 +1 @@
-(function(){function g(a,b){var c=b+1;while(c<a.length&&a[c]===0)c++;return c}function f(a,b,c,d){var e=d[0],f=d[1],h=g(b,f);if(h<a.length&&a[h]-a[c]<a[c]-a[e]){var i=g(b,e);d[0]=i,d[1]=h}}function e(a){return(a=1-a*a*a)*a*a}function d(a){var b=a.length,c=0;while(++c<b)if(a[c-1]>=a[c])return!1;return!0}function c(a){var b=a.length,c=-1;while(++c<b)if(!isFinite(a[c]))return!1;return!0}function b(b,c){var d=c.length;if(b>d)return null;var e=[],f=[],g={},h=0,i=0,j,k,l;while(i<b){if(h===d)re [...]
\ No newline at end of file
+(function(){function h(a,b){var c=b+1;while(c<a.length&&a[c]===0)c++;return c}function g(a,b,c,d){var e=d[0],f=d[1],g=h(b,f);if(g<a.length&&a[g]-a[c]<a[c]-a[e]){var i=h(b,e);d[0]=i,d[1]=g}}function f(a){return(a=1-a*a*a)*a*a}function e(a){var b=a.length,c=0;while(++c<b)if(a[c-1]>=a[c])return!1;return!0}function d(a){var b=a.length,c=-1;while(++c<b)if(!isFinite(a[c]))return!1;return!0}function c(a,b,c,d){var e=[],f=a+c;for(var g=0;g<b.length;g++)e[g]=(a*b[g]+c*d[g])/f;return e}function b( [...]
\ No newline at end of file
diff --git a/src/stats/hcluster.js b/src/stats/hcluster.js
index 39372a5..cb0ae2a 100644
--- a/src/stats/hcluster.js
+++ b/src/stats/hcluster.js
@@ -1,16 +1,15 @@
 science.stats.hcluster = function() {
-  var distance = science.stats.distance.euclidean;
+  var distance = science.stats.distance.euclidean,
+      linkage = "";
 
   function hcluster(vectors) {
     var n = vectors.length;
     var dMin = [];
     var cSize = [];
-
-    var matrixObj = new figue.Matrix(N,N);
-    var distMatrix = matrixObj.mtx ;
+    var distMatrix = [];
     var clusters = [];
 
-    var c1, c2, c1Cluster, c2Cluster, i, j, p, root , newCentroid ;
+    var c1, c2, c1Cluster, c2Cluster, p, root , newCentroid;
 
     var i,
         j;
@@ -18,6 +17,7 @@ science.stats.hcluster = function() {
     // Initialize distance matrix and vector of closest clusters
     i = -1; while (++i < n) {
       dMin[i] = 0;
+      distMatrix[i] = [];
       j = -1; while (++j < n) {
         distMatrix[i][j] = i === j ? Infinity : distance(vectors[i] , vectors[j]);
         if (distMatrix[i][dMin[i]] > distMatrix[i][j]) dMin[i] = j ;
@@ -27,15 +27,15 @@ science.stats.hcluster = function() {
     // create leaves of the tree
     i = -1; while (++i < n) {
       clusters[i] = [];
-      clusters[i][0] = new Node(labels[i], null, null, 0, vectors[i]);
+      clusters[i][0] = {left: null, right: null, dist: 0, centroid: vectors[i], size: 1, depth: 0};
       cSize[i] = 1;
     }
 
     // Main loop
-    for (p = 0; p < N-1; p++) {
+    for (p = 0; p < n-1; p++) {
       // find the closest pair of clusters
       c1 = 0 ;
-      for (i = 0 ; i < N ; i++) {
+      for (i = 0 ; i < n ; i++) {
         if (distMatrix[i][dMin[i]] < distMatrix[c1][dMin[c1]]) c1 = i;
       }
       c2 = dMin[c1];
@@ -44,20 +44,20 @@ science.stats.hcluster = function() {
       c1Cluster = clusters[c1][0] ;
       c2Cluster = clusters[c2][0] ;
 
-      newCentroid = calculateCentroid ( c1Cluster.size , c1Cluster.centroid , c2Cluster.size , c2Cluster.centroid ) ;
-      newCluster = new Node (-1, c1Cluster, c2Cluster , distMatrix[c1][c2] , newCentroid) ;
-      clusters[c1].splice(0,0, newCluster) ;
-      cSize[c1] += cSize[c2] ;
+      newCentroid = calculateCentroid(c1Cluster.size, c1Cluster.centroid, c2Cluster.size, c2Cluster.centroid) ;
+      newCluster = {left: c1Cluster, right: c2Cluster, dist: distMatrix[c1][c2], centroid: newCentroid, size: c1Cluster.size + c2Cluster.size, depth: 1 + Math.max(c1Cluster.depth, c2Cluster.depth)};
+      clusters[c1].splice(0, 0, newCluster);
+      cSize[c1] += cSize[c2];
 
       // overwrite row c1 with respect to the linkage type
-      for (j = 0 ; j < N ; j++) {
-              if (linkage == figue.SINGLE_LINKAGE) {
+      for (j = 0 ; j < n ; j++) {
+              if (linkage == "single") {
                       if (distMatrix[c1][j] > distMatrix[c2][j])
                               distMatrix[j][c1] = distMatrix[c1][j] = distMatrix[c2][j] ;
-              } else if (linkage == figue.COMPLETE_LINKAGE) {
+              } else if (linkage == "complete") {
                       if (distMatrix[c1][j] < distMatrix[c2][j])
                               distMatrix[j][c1] = distMatrix[c1][j] = distMatrix[c2][j] ;
-              } else if (linkage == figue.AVERAGE_LINKAGE) {
+              } else if (linkage == "average") {
                       var avg = ( cSize[c1] * distMatrix[c1][j] + cSize[c2] * distMatrix[c2][j])  / (cSize[c1] + cSize[j]) 
                       distMatrix[j][c1] = distMatrix[c1][j] = avg ;
               }
@@ -65,11 +65,11 @@ science.stats.hcluster = function() {
       distMatrix[c1][c1] = Infinity;
 
       // infinity ­out old row c2 and column c2
-      for (i = 0 ; i < N ; i++)
+      for (i = 0 ; i < n ; i++)
         distMatrix[i][c2] = distMatrix[c2][i] = Infinity;
 
       // update dmin and replace ones that previous pointed to c2 to point to c1
-      for (j = 0; j < N ; j++) {
+      for (j = 0; j < n ; j++) {
         if (dMin[j] == c2)
           dMin[j] = c1;
         if (distMatrix[c1][j] < distMatrix[c1][dMin[c1]]) 
@@ -85,3 +85,11 @@ science.stats.hcluster = function() {
 
   return hcluster;
 };
+
+function calculateCentroid(c1Size, c1Centroid, c2Size, c2Centroid) {
+  var newCentroid = [];
+  var newSize = c1Size + c2Size;
+  for (var i = 0; i < c1Centroid.length; i++)
+    newCentroid[i] = (c1Size * c1Centroid[i] + c2Size * c2Centroid[i]) / newSize;
+  return newCentroid;
+}
diff --git a/test/stats/hcluster-test.js b/test/stats/hcluster-test.js
new file mode 100644
index 0000000..8a24edb
--- /dev/null
+++ b/test/stats/hcluster-test.js
@@ -0,0 +1,27 @@
+require("../../science");
+require("../../science.stats");
+
+var vows = require("vows"),
+    assert = require("assert");
+
+var suite = vows.describe("science.stats.hcluster");
+
+suite.addBatch({
+  "hcluster": {
+    "simple": function() {
+      var data = [],
+          i;
+      for (i=0; i<100; i++) {
+        data.push([Math.random(), Math.random()]);
+      }
+      for (i=0; i<100; i++) {
+        data.push([10 + Math.random(), 10 + Math.random()]);
+      }
+      var x = science.stats.hcluster()(data);
+      assert.equal(x.left.size, 100);
+      assert.equal(x.right.size, 100);
+    }
+  }
+});
+
+suite.export(module);

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/science.js.git



More information about the Pkg-javascript-commits mailing list