[Pkg-javascript-commits] [science.js] 21/87: Add agglomerative hierarchical clustering.
bhuvan krishna
bhuvan-guest at moszumanska.debian.org
Thu Dec 8 06:11:54 UTC 2016
This is an automated email from the git hooks/post-receive script.
bhuvan-guest pushed a commit to branch master
in repository science.js.
commit 377656c0ca7fbd0bb72fe901aff83d3318432619
Author: Jason Davies <jason at jasondavies.com>
Date: Thu Aug 25 20:31:36 2011 +0100
Add agglomerative hierarchical clustering.
---
Makefile | 1 +
science.stats.js | 95 +++++++++++++++++++++++++++++++++++++++++++++
science.stats.min.js | 2 +-
src/stats/hcluster.js | 44 ++++++++++++---------
test/stats/hcluster-test.js | 27 +++++++++++++
5 files changed, 150 insertions(+), 19 deletions(-)
diff --git a/Makefile b/Makefile
index 36f6f21..f83302a 100644
--- a/Makefile
+++ b/Makefile
@@ -34,6 +34,7 @@ science.stats.js: \
src/stats/kernel.js \
src/stats/kde.js \
src/stats/kmeans.js \
+ src/stats/hcluster.js \
src/stats/iqr.js \
src/stats/loess.js \
src/stats/mean.js \
diff --git a/science.stats.js b/science.stats.js
index f43a434..4411920 100644
--- a/science.stats.js
+++ b/science.stats.js
@@ -250,6 +250,101 @@ function science_stats_kmeansRandom(k, vectors) {
}
return selected_vectors;
}
+science.stats.hcluster = function() {
+ var distance = science.stats.distance.euclidean,
+ linkage = "";
+
+ function hcluster(vectors) {
+ var n = vectors.length;
+ var dMin = [];
+ var cSize = [];
+ var distMatrix = [];
+ var clusters = [];
+
+ var c1, c2, c1Cluster, c2Cluster, p, root , newCentroid;
+
+ var i,
+ j;
+
+ // Initialize distance matrix and vector of closest clusters
+ i = -1; while (++i < n) {
+ dMin[i] = 0;
+ distMatrix[i] = [];
+ j = -1; while (++j < n) {
+ distMatrix[i][j] = i === j ? Infinity : distance(vectors[i] , vectors[j]);
+ if (distMatrix[i][dMin[i]] > distMatrix[i][j]) dMin[i] = j ;
+ }
+ }
+
+ // create leaves of the tree
+ i = -1; while (++i < n) {
+ clusters[i] = [];
+ clusters[i][0] = {left: null, right: null, dist: 0, centroid: vectors[i], size: 1, depth: 0};
+ cSize[i] = 1;
+ }
+
+ // Main loop
+ for (p = 0; p < n-1; p++) {
+ // find the closest pair of clusters
+ c1 = 0 ;
+ for (i = 0 ; i < n ; i++) {
+ if (distMatrix[i][dMin[i]] < distMatrix[c1][dMin[c1]]) c1 = i;
+ }
+ c2 = dMin[c1];
+
+ // create node to store cluster info
+ c1Cluster = clusters[c1][0] ;
+ c2Cluster = clusters[c2][0] ;
+
+ newCentroid = calculateCentroid(c1Cluster.size, c1Cluster.centroid, c2Cluster.size, c2Cluster.centroid) ;
+ newCluster = {left: c1Cluster, right: c2Cluster, dist: distMatrix[c1][c2], centroid: newCentroid, size: c1Cluster.size + c2Cluster.size, depth: 1 + Math.max(c1Cluster.depth, c2Cluster.depth)};
+ clusters[c1].splice(0, 0, newCluster);
+ cSize[c1] += cSize[c2];
+
+ // overwrite row c1 with respect to the linkage type
+ for (j = 0 ; j < n ; j++) {
+ if (linkage == "single") {
+ if (distMatrix[c1][j] > distMatrix[c2][j])
+ distMatrix[j][c1] = distMatrix[c1][j] = distMatrix[c2][j] ;
+ } else if (linkage == "complete") {
+ if (distMatrix[c1][j] < distMatrix[c2][j])
+ distMatrix[j][c1] = distMatrix[c1][j] = distMatrix[c2][j] ;
+ } else if (linkage == "average") {
+ var avg = ( cSize[c1] * distMatrix[c1][j] + cSize[c2] * distMatrix[c2][j]) / (cSize[c1] + cSize[j])
+ distMatrix[j][c1] = distMatrix[c1][j] = avg ;
+ }
+ }
+ distMatrix[c1][c1] = Infinity;
+
+ // infinity out old row c2 and column c2
+ for (i = 0 ; i < n ; i++)
+ distMatrix[i][c2] = distMatrix[c2][i] = Infinity;
+
+ // update dmin and replace ones that previous pointed to c2 to point to c1
+ for (j = 0; j < n ; j++) {
+ if (dMin[j] == c2)
+ dMin[j] = c1;
+ if (distMatrix[c1][j] < distMatrix[c1][dMin[c1]])
+ dMin[c1] = j;
+ }
+
+ // keep track of the last added cluster
+ root = newCluster;
+ }
+
+ return root;
+ }
+
+ return hcluster;
+};
+
+function calculateCentroid(c1Size, c1Centroid, c2Size, c2Centroid) {
+ var newCentroid = [];
+ var newSize = c1Size + c2Size;
+ for (var i = 0; i < c1Centroid.length; i++)
+ newCentroid[i] = (c1Size * c1Centroid[i] + c2Size * c2Centroid[i]) / newSize;
+ return newCentroid;
+}
science.stats.iqr = function(x) {
var quartiles = science.stats.quantiles(x, [.25, .75]);
return quartiles[1] - quartiles[0];
diff --git a/science.stats.min.js b/science.stats.min.js
index 9875762..84c8fb5 100644
--- a/science.stats.min.js
+++ b/science.stats.min.js
@@ -1 +1 @@
-(function(){function g(a,b){var c=b+1;while(c<a.length&&a[c]===0)c++;return c}function f(a,b,c,d){var e=d[0],f=d[1],h=g(b,f);if(h<a.length&&a[h]-a[c]<a[c]-a[e]){var i=g(b,e);d[0]=i,d[1]=h}}function e(a){return(a=1-a*a*a)*a*a}function d(a){var b=a.length,c=0;while(++c<b)if(a[c-1]>=a[c])return!1;return!0}function c(a){var b=a.length,c=-1;while(++c<b)if(!isFinite(a[c]))return!1;return!0}function b(b,c){var d=c.length;if(b>d)return null;var e=[],f=[],g={},h=0,i=0,j,k,l;while(i<b){if(h===d)re [...]
\ No newline at end of file
+(function(){function h(a,b){var c=b+1;while(c<a.length&&a[c]===0)c++;return c}function g(a,b,c,d){var e=d[0],f=d[1],g=h(b,f);if(g<a.length&&a[g]-a[c]<a[c]-a[e]){var i=h(b,e);d[0]=i,d[1]=g}}function f(a){return(a=1-a*a*a)*a*a}function e(a){var b=a.length,c=0;while(++c<b)if(a[c-1]>=a[c])return!1;return!0}function d(a){var b=a.length,c=-1;while(++c<b)if(!isFinite(a[c]))return!1;return!0}function c(a,b,c,d){var e=[],f=a+c;for(var g=0;g<b.length;g++)e[g]=(a*b[g]+c*d[g])/f;return e}function b( [...]
\ No newline at end of file
diff --git a/src/stats/hcluster.js b/src/stats/hcluster.js
index 39372a5..cb0ae2a 100644
--- a/src/stats/hcluster.js
+++ b/src/stats/hcluster.js
@@ -1,16 +1,15 @@
science.stats.hcluster = function() {
- var distance = science.stats.distance.euclidean;
+ var distance = science.stats.distance.euclidean,
+ linkage = "";
function hcluster(vectors) {
var n = vectors.length;
var dMin = [];
var cSize = [];
-
- var matrixObj = new figue.Matrix(N,N);
- var distMatrix = matrixObj.mtx ;
+ var distMatrix = [];
var clusters = [];
- var c1, c2, c1Cluster, c2Cluster, i, j, p, root , newCentroid ;
+ var c1, c2, c1Cluster, c2Cluster, p, root , newCentroid;
var i,
j;
@@ -18,6 +17,7 @@ science.stats.hcluster = function() {
// Initialize distance matrix and vector of closest clusters
i = -1; while (++i < n) {
dMin[i] = 0;
+ distMatrix[i] = [];
j = -1; while (++j < n) {
distMatrix[i][j] = i === j ? Infinity : distance(vectors[i] , vectors[j]);
if (distMatrix[i][dMin[i]] > distMatrix[i][j]) dMin[i] = j ;
@@ -27,15 +27,15 @@ science.stats.hcluster = function() {
// create leaves of the tree
i = -1; while (++i < n) {
clusters[i] = [];
- clusters[i][0] = new Node(labels[i], null, null, 0, vectors[i]);
+ clusters[i][0] = {left: null, right: null, dist: 0, centroid: vectors[i], size: 1, depth: 0};
cSize[i] = 1;
}
// Main loop
- for (p = 0; p < N-1; p++) {
+ for (p = 0; p < n-1; p++) {
// find the closest pair of clusters
c1 = 0 ;
- for (i = 0 ; i < N ; i++) {
+ for (i = 0 ; i < n ; i++) {
if (distMatrix[i][dMin[i]] < distMatrix[c1][dMin[c1]]) c1 = i;
}
c2 = dMin[c1];
@@ -44,20 +44,20 @@ science.stats.hcluster = function() {
c1Cluster = clusters[c1][0] ;
c2Cluster = clusters[c2][0] ;
- newCentroid = calculateCentroid ( c1Cluster.size , c1Cluster.centroid , c2Cluster.size , c2Cluster.centroid ) ;
- newCluster = new Node (-1, c1Cluster, c2Cluster , distMatrix[c1][c2] , newCentroid) ;
- clusters[c1].splice(0,0, newCluster) ;
- cSize[c1] += cSize[c2] ;
+ newCentroid = calculateCentroid(c1Cluster.size, c1Cluster.centroid, c2Cluster.size, c2Cluster.centroid) ;
+ newCluster = {left: c1Cluster, right: c2Cluster, dist: distMatrix[c1][c2], centroid: newCentroid, size: c1Cluster.size + c2Cluster.size, depth: 1 + Math.max(c1Cluster.depth, c2Cluster.depth)};
+ clusters[c1].splice(0, 0, newCluster);
+ cSize[c1] += cSize[c2];
// overwrite row c1 with respect to the linkage type
- for (j = 0 ; j < N ; j++) {
- if (linkage == figue.SINGLE_LINKAGE) {
+ for (j = 0 ; j < n ; j++) {
+ if (linkage == "single") {
if (distMatrix[c1][j] > distMatrix[c2][j])
distMatrix[j][c1] = distMatrix[c1][j] = distMatrix[c2][j] ;
- } else if (linkage == figue.COMPLETE_LINKAGE) {
+ } else if (linkage == "complete") {
if (distMatrix[c1][j] < distMatrix[c2][j])
distMatrix[j][c1] = distMatrix[c1][j] = distMatrix[c2][j] ;
- } else if (linkage == figue.AVERAGE_LINKAGE) {
+ } else if (linkage == "average") {
var avg = ( cSize[c1] * distMatrix[c1][j] + cSize[c2] * distMatrix[c2][j]) / (cSize[c1] + cSize[j])
distMatrix[j][c1] = distMatrix[c1][j] = avg ;
}
@@ -65,11 +65,11 @@ science.stats.hcluster = function() {
distMatrix[c1][c1] = Infinity;
// infinity out old row c2 and column c2
- for (i = 0 ; i < N ; i++)
+ for (i = 0 ; i < n ; i++)
distMatrix[i][c2] = distMatrix[c2][i] = Infinity;
// update dmin and replace ones that previous pointed to c2 to point to c1
- for (j = 0; j < N ; j++) {
+ for (j = 0; j < n ; j++) {
if (dMin[j] == c2)
dMin[j] = c1;
if (distMatrix[c1][j] < distMatrix[c1][dMin[c1]])
@@ -85,3 +85,11 @@ science.stats.hcluster = function() {
return hcluster;
};
+
+function calculateCentroid(c1Size, c1Centroid, c2Size, c2Centroid) {
+ var newCentroid = [];
+ var newSize = c1Size + c2Size;
+ for (var i = 0; i < c1Centroid.length; i++)
+ newCentroid[i] = (c1Size * c1Centroid[i] + c2Size * c2Centroid[i]) / newSize;
+ return newCentroid;
+}
diff --git a/test/stats/hcluster-test.js b/test/stats/hcluster-test.js
new file mode 100644
index 0000000..8a24edb
--- /dev/null
+++ b/test/stats/hcluster-test.js
@@ -0,0 +1,27 @@
+require("../../science");
+require("../../science.stats");
+
+var vows = require("vows"),
+ assert = require("assert");
+
+var suite = vows.describe("science.stats.hcluster");
+
+suite.addBatch({
+ "hcluster": {
+ "simple": function() {
+ var data = [],
+ i;
+ for (i=0; i<100; i++) {
+ data.push([Math.random(), Math.random()]);
+ }
+ for (i=0; i<100; i++) {
+ data.push([10 + Math.random(), 10 + Math.random()]);
+ }
+ var x = science.stats.hcluster()(data);
+ assert.equal(x.left.size, 100);
+ assert.equal(x.right.size, 100);
+ }
+ }
+});
+
+suite.export(module);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/science.js.git
More information about the Pkg-javascript-commits
mailing list