[Pkg-javascript-commits] [science.js] 17/87: Add stats.kmeans and stats.distance.

bhuvan krishna bhuvan-guest at moszumanska.debian.org
Thu Dec 8 06:11:53 UTC 2016


This is an automated email from the git hooks/post-receive script.

bhuvan-guest pushed a commit to branch master
in repository science.js.

commit d28ed837dba64ab665a34804ee0eefd40a906c0b
Author: Jason Davies <jason at jasondavies.com>
Date:   Thu Aug 25 20:07:19 2011 +0100

    Add stats.kmeans and stats.distance.
---
 Makefile                    |   2 +
 science.stats.js            | 156 ++++++++++++++++++++++++++++++++++++++++++++
 science.stats.min.js        |   2 +-
 src/stats/distance.js       |  20 ++++++
 src/stats/kmeans.js         | 136 ++++++++++++++++++++++++++++++++++++++
 test/stats/distance-test.js |  28 ++++++++
 test/stats/kmeans-test.js   |  32 +++++++++
 7 files changed, 375 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index d146aec..36f6f21 100644
--- a/Makefile
+++ b/Makefile
@@ -30,8 +30,10 @@ science.stats.js: \
 	src/start.js \
 	src/stats/stats.js \
 	src/stats/bandwidth.js \
+	src/stats/distance.js \
 	src/stats/kernel.js \
 	src/stats/kde.js \
+	src/stats/kmeans.js \
 	src/stats/iqr.js \
 	src/stats/loess.js \
 	src/stats/mean.js \
diff --git a/science.stats.js b/science.stats.js
index a0ac7d3..6a8922d 100644
--- a/science.stats.js
+++ b/science.stats.js
@@ -19,6 +19,26 @@ science.stats.bandwidth = {
       * Math.pow(x.length, -1/5);
   }
 };
+science.stats.distance = {
+  euclidean: function(a, b) {
+    var n = a.length,
+        i = -1,
+        s = 0,
+        x;
+    while (++i < n) {
+      x = a[i] - b[i];
+      s += x * x;
+    }
+    return Math.sqrt(s);
+  },
+  manhattan: function(a, b) {
+    var n = a.length,
+        i = -1,
+        s = 0;
+    while (++i < n) s += Math.abs(a[i] - b[i]);
+    return s;
+  }
+};
 // See <http://en.wikipedia.org/wiki/Kernel_(statistics)>.
 science.stats.kernel = {
   uniform: function(u) {
@@ -94,6 +114,142 @@ science.stats.kde = function() {
 
   return kde;
 };
+// Based on figue implementation by Jean-Yves Delort.
+// http://code.google.com/p/figue/
+science.stats.kmeans = function() {
+  var distance = science.stats.distance.euclidean,
+      maxIterations = 1000,
+      k = 1;
+
+  function kmeans(vectors) {
+    var n = vectors.length,
+        assignments = [],
+        clusterSizes = [],
+        repeat = 1,
+        iterations = 0,
+        centroids = science_stats_kmeansRandom(k, vectors),
+        newCentroids,
+        i,
+        j,
+        x,
+        d,
+        min,
+        best;
+
+    while (repeat && iterations < maxIterations) {
+      // Assignment step.
+      j = -1; while (++j < k) {
+        clusterSizes[j] = 0;
+      }
+
+      i = -1; while (++i < n) {
+        x = vectors[i];
+        min = Infinity;
+        j = -1; while (++j < k) {
+          d = distance.call(this, centroids[j], x);
+          if (d < min) {
+            min = d;
+            best = j;
+          }
+        }
+        clusterSizes[assignments[i] = best]++;
+      }
+
+      // Update centroids step.
+      newCentroids = [];
+      i = -1; while (++i < n) {
+        x = assignments[i];
+        d = newCentroids[x];
+        if (d == null) newCentroids[x] = vectors[i].slice();
+        else {
+          j = -1; while (++j < d.length) {
+            d[j] += vectors[i][j];
+          }
+        }
+      }
+      j = -1; while (++j < k) {
+        x = newCentroids[j];
+        d = 1 / clusterSizes[j];
+        i = -1; while (++i < x.length) x[i] *= d;
+      }
+
+      // Check convergence.
+      repeat = 0;
+      j = -1; while (++j < k) {
+        if (!science_stats_kmeansCompare(newCentroids[j], centroids[j])) {
+          repeat = 1;
+          break;
+        }
+      }
+      centroids = newCentroids;
+      iterations++;
+    }
+    return {assignments: assignments, centroids: centroids};
+  }
+
+  kmeans.k = function(x) {
+    if (!arguments.length) return k;
+    k = x;
+    return kmeans;
+  };
+
+  kmeans.distance = function(x) {
+    if (!arguments.length) return distance;
+    distance = x;
+    return kmeans;
+  };
+
+  return kmeans;
+}
+
+function science_stats_kmeansCompare(a, b) {
+  if (!a || !b || a.length !== b.length) return false;
+  var n = a.length,
+      i = -1;
+  while (++i < n) if (a[i] !== b[i]) return false;
+  return true;
+}
+
+// Returns an array of k distinct vectors randomly selected from the input
+// array of vectors. Returns null if k > n or if there are less than k distinct
+// objects in vectors.
+function science_stats_kmeansRandom(k, vectors) {
+  var n = vectors.length;
+  if (k > n) return null;
+  
+  var selected_vectors = [];
+  var selected_indices = [];
+  var tested_indices = {};
+  var tested = 0;
+  var selected = 0;
+  var i,
+      vector,
+      select;
+
+  while (selected < k) {
+    if (tested === n) return null;
+    
+    var random_index = Math.floor(Math.random() * n);
+    if (random_index in tested_indices) continue;
+    
+    tested_indices[random_index] = 1;
+    tested++;
+    vector = vectors[random_index];
+    select = true;
+    for (i = 0; i < selected; i++) {
+      if (science_stats_kmeansCompare(vector, selected_vectors[i])) {
+        select = false;
+        break;
+      }
+    }
+    if (select) {
+      selected_vectors[selected] = vector;
+      selected_indices[selected] = random_index;
+      selected++;
+    }
+  }
+  return selected_vectors;
+}
 science.stats.iqr = function(x) {
   var quartiles = science.stats.quantiles(x, [.25, .75]);
   return quartiles[1] - quartiles[0];
diff --git a/science.stats.min.js b/science.stats.min.js
index c7d867c..9875762 100644
--- a/science.stats.min.js
+++ b/science.stats.min.js
@@ -1 +1 @@
-(function(){function e(a,b){var c=b+1;while(c<a.length&&a[c]===0)c++;return c}function d(a,b,c,d){var f=d[0],g=d[1],h=e(b,g);if(h<a.length&&a[h]-a[c]<a[c]-a[f]){var i=e(b,f);d[0]=i,d[1]=h}}function c(a){return(a=1-a*a*a)*a*a}function b(a){var b=a.length,c=0;while(++c<b)if(a[c-1]>=a[c])return!1;return!0}function a(a){var b=a.length,c=-1;while(++c<b)if(!isFinite(a[c]))return!1;return!0}science.stats={},science.stats.bandwidth={nrd0:function(a){var b=Math.sqrt(science.stats.variance(a));(lo [...]
\ No newline at end of file
+(function(){function g(a,b){var c=b+1;while(c<a.length&&a[c]===0)c++;return c}function f(a,b,c,d){var e=d[0],f=d[1],h=g(b,f);if(h<a.length&&a[h]-a[c]<a[c]-a[e]){var i=g(b,e);d[0]=i,d[1]=h}}function e(a){return(a=1-a*a*a)*a*a}function d(a){var b=a.length,c=0;while(++c<b)if(a[c-1]>=a[c])return!1;return!0}function c(a){var b=a.length,c=-1;while(++c<b)if(!isFinite(a[c]))return!1;return!0}function b(b,c){var d=c.length;if(b>d)return null;var e=[],f=[],g={},h=0,i=0,j,k,l;while(i<b){if(h===d)re [...]
\ No newline at end of file
diff --git a/src/stats/distance.js b/src/stats/distance.js
new file mode 100644
index 0000000..963ff05
--- /dev/null
+++ b/src/stats/distance.js
@@ -0,0 +1,20 @@
+science.stats.distance = {
+  euclidean: function(a, b) {
+    var n = a.length,
+        i = -1,
+        s = 0,
+        x;
+    while (++i < n) {
+      x = a[i] - b[i];
+      s += x * x;
+    }
+    return Math.sqrt(s);
+  },
+  manhattan: function(a, b) {
+    var n = a.length,
+        i = -1,
+        s = 0;
+    while (++i < n) s += Math.abs(a[i] - b[i]);
+    return s;
+  }
+};
diff --git a/src/stats/kmeans.js b/src/stats/kmeans.js
new file mode 100644
index 0000000..311d994
--- /dev/null
+++ b/src/stats/kmeans.js
@@ -0,0 +1,136 @@
+// Based on figue implementation by Jean-Yves Delort.
+// http://code.google.com/p/figue/
+science.stats.kmeans = function() {
+  var distance = science.stats.distance.euclidean,
+      maxIterations = 1000,
+      k = 1;
+
+  function kmeans(vectors) {
+    var n = vectors.length,
+        assignments = [],
+        clusterSizes = [],
+        repeat = 1,
+        iterations = 0,
+        centroids = science_stats_kmeansRandom(k, vectors),
+        newCentroids,
+        i,
+        j,
+        x,
+        d,
+        min,
+        best;
+
+    while (repeat && iterations < maxIterations) {
+      // Assignment step.
+      j = -1; while (++j < k) {
+        clusterSizes[j] = 0;
+      }
+
+      i = -1; while (++i < n) {
+        x = vectors[i];
+        min = Infinity;
+        j = -1; while (++j < k) {
+          d = distance.call(this, centroids[j], x);
+          if (d < min) {
+            min = d;
+            best = j;
+          }
+        }
+        clusterSizes[assignments[i] = best]++;
+      }
+
+      // Update centroids step.
+      newCentroids = [];
+      i = -1; while (++i < n) {
+        x = assignments[i];
+        d = newCentroids[x];
+        if (d == null) newCentroids[x] = vectors[i].slice();
+        else {
+          j = -1; while (++j < d.length) {
+            d[j] += vectors[i][j];
+          }
+        }
+      }
+      j = -1; while (++j < k) {
+        x = newCentroids[j];
+        d = 1 / clusterSizes[j];
+        i = -1; while (++i < x.length) x[i] *= d;
+      }
+
+      // Check convergence.
+      repeat = 0;
+      j = -1; while (++j < k) {
+        if (!science_stats_kmeansCompare(newCentroids[j], centroids[j])) {
+          repeat = 1;
+          break;
+        }
+      }
+      centroids = newCentroids;
+      iterations++;
+    }
+    return {assignments: assignments, centroids: centroids};
+  }
+
+  kmeans.k = function(x) {
+    if (!arguments.length) return k;
+    k = x;
+    return kmeans;
+  };
+
+  kmeans.distance = function(x) {
+    if (!arguments.length) return distance;
+    distance = x;
+    return kmeans;
+  };
+
+  return kmeans;
+}
+
+function science_stats_kmeansCompare(a, b) {
+  if (!a || !b || a.length !== b.length) return false;
+  var n = a.length,
+      i = -1;
+  while (++i < n) if (a[i] !== b[i]) return false;
+  return true;
+}
+
+// Returns an array of k distinct vectors randomly selected from the input
+// array of vectors. Returns null if k > n or if there are less than k distinct
+// objects in vectors.
+function science_stats_kmeansRandom(k, vectors) {
+  var n = vectors.length;
+  if (k > n) return null;
+  
+  var selected_vectors = [];
+  var selected_indices = [];
+  var tested_indices = {};
+  var tested = 0;
+  var selected = 0;
+  var i,
+      vector,
+      select;
+
+  while (selected < k) {
+    if (tested === n) return null;
+    
+    var random_index = Math.floor(Math.random() * n);
+    if (random_index in tested_indices) continue;
+    
+    tested_indices[random_index] = 1;
+    tested++;
+    vector = vectors[random_index];
+    select = true;
+    for (i = 0; i < selected; i++) {
+      if (science_stats_kmeansCompare(vector, selected_vectors[i])) {
+        select = false;
+        break;
+      }
+    }
+    if (select) {
+      selected_vectors[selected] = vector;
+      selected_indices[selected] = random_index;
+      selected++;
+    }
+  }
+  return selected_vectors;
+}
diff --git a/test/stats/distance-test.js b/test/stats/distance-test.js
new file mode 100644
index 0000000..2f5f435
--- /dev/null
+++ b/test/stats/distance-test.js
@@ -0,0 +1,28 @@
+require("../../science");
+require("../../science.stats");
+
+var vows = require("vows"),
+    assert = require("assert");
+
+var suite = vows.describe("science.stats.distance");
+
+suite.addBatch({
+  "distance": {
+    "euclidean": function() {
+      var euclidean = science.stats.distance.euclidean;
+      assert.equal(euclidean([], []), 0);
+      assert.equal(euclidean([0], [1]), 1);
+      assert.equal(euclidean([0, 0], [1, 1]), Math.sqrt(2));
+      assert.equal(euclidean([0, 0, 0], [1, 1, 1]), Math.sqrt(3));
+    },
+    "manhattan": function() {
+      var manhattan = science.stats.distance.manhattan;
+      assert.equal(manhattan([], []), 0);
+      assert.equal(manhattan([0], [1]), 1);
+      assert.equal(manhattan([0, 0], [1, 1]), 2);
+      assert.equal(manhattan([0, 0, 0], [1, 1, 1]), 3);
+    }
+  }
+});
+
+suite.export(module);
diff --git a/test/stats/kmeans-test.js b/test/stats/kmeans-test.js
new file mode 100644
index 0000000..4805d62
--- /dev/null
+++ b/test/stats/kmeans-test.js
@@ -0,0 +1,32 @@
+require("../../science");
+require("../../science.stats");
+
+var vows = require("vows"),
+    assert = require("assert");
+
+var suite = vows.describe("science.kmeans");
+
+suite.addBatch({
+  "kmeans": {
+    "simple": function() {
+      var data = [],
+          i;
+      for (i=0; i<100; i++) {
+        data.push([Math.random(), Math.random()]);
+      }
+      for (i=0; i<100; i++) {
+        data.push([10 + Math.random(), 10 + Math.random()]);
+      }
+      var x = science.stats.kmeans().k(2)(data);
+      var cluster0 = x.assignments[0];
+      for (i=0; i<100; i++) {
+        assert.equal(x.assignments[i], cluster0);
+      }
+      for (i=100; i<200; i++) {
+        assert.equal(x.assignments[i], 1 - cluster0);
+      }
+    }
+  }
+});
+
+suite.export(module);

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/science.js.git



More information about the Pkg-javascript-commits mailing list