[Pkg-javascript-commits] [science.js] 17/87: Add stats.kmeans and stats.distance.
bhuvan krishna
bhuvan-guest at moszumanska.debian.org
Thu Dec 8 06:11:53 UTC 2016
This is an automated email from the git hooks/post-receive script.
bhuvan-guest pushed a commit to branch master
in repository science.js.
commit d28ed837dba64ab665a34804ee0eefd40a906c0b
Author: Jason Davies <jason at jasondavies.com>
Date: Thu Aug 25 20:07:19 2011 +0100
Add stats.kmeans and stats.distance.
---
Makefile | 2 +
science.stats.js | 156 ++++++++++++++++++++++++++++++++++++++++++++
science.stats.min.js | 2 +-
src/stats/distance.js | 20 ++++++
src/stats/kmeans.js | 136 ++++++++++++++++++++++++++++++++++++++
test/stats/distance-test.js | 28 ++++++++
test/stats/kmeans-test.js | 32 +++++++++
7 files changed, 375 insertions(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index d146aec..36f6f21 100644
--- a/Makefile
+++ b/Makefile
@@ -30,8 +30,10 @@ science.stats.js: \
src/start.js \
src/stats/stats.js \
src/stats/bandwidth.js \
+ src/stats/distance.js \
src/stats/kernel.js \
src/stats/kde.js \
+ src/stats/kmeans.js \
src/stats/iqr.js \
src/stats/loess.js \
src/stats/mean.js \
diff --git a/science.stats.js b/science.stats.js
index a0ac7d3..6a8922d 100644
--- a/science.stats.js
+++ b/science.stats.js
@@ -19,6 +19,26 @@ science.stats.bandwidth = {
* Math.pow(x.length, -1/5);
}
};
+science.stats.distance = {
+ euclidean: function(a, b) {
+ var n = a.length,
+ i = -1,
+ s = 0,
+ x;
+ while (++i < n) {
+ x = a[i] - b[i];
+ s += x * x;
+ }
+ return Math.sqrt(s);
+ },
+ manhattan: function(a, b) {
+ var n = a.length,
+ i = -1,
+ s = 0;
+ while (++i < n) s += Math.abs(a[i] - b[i]);
+ return s;
+ }
+};
// See <http://en.wikipedia.org/wiki/Kernel_(statistics)>.
science.stats.kernel = {
uniform: function(u) {
@@ -94,6 +114,142 @@ science.stats.kde = function() {
return kde;
};
+// Based on figue implementation by Jean-Yves Delort.
+// http://code.google.com/p/figue/
+science.stats.kmeans = function() {
+ var distance = science.stats.distance.euclidean,
+ maxIterations = 1000,
+ k = 1;
+
+ function kmeans(vectors) {
+ var n = vectors.length,
+ assignments = [],
+ clusterSizes = [],
+ repeat = 1,
+ iterations = 0,
+ centroids = science_stats_kmeansRandom(k, vectors),
+ newCentroids,
+ i,
+ j,
+ x,
+ d,
+ min,
+ best;
+
+ while (repeat && iterations < maxIterations) {
+ // Assignment step.
+ j = -1; while (++j < k) {
+ clusterSizes[j] = 0;
+ }
+
+ i = -1; while (++i < n) {
+ x = vectors[i];
+ min = Infinity;
+ j = -1; while (++j < k) {
+ d = distance.call(this, centroids[j], x);
+ if (d < min) {
+ min = d;
+ best = j;
+ }
+ }
+ clusterSizes[assignments[i] = best]++;
+ }
+
+ // Update centroids step.
+ newCentroids = [];
+ i = -1; while (++i < n) {
+ x = assignments[i];
+ d = newCentroids[x];
+ if (d == null) newCentroids[x] = vectors[i].slice();
+ else {
+ j = -1; while (++j < d.length) {
+ d[j] += vectors[i][j];
+ }
+ }
+ }
+ j = -1; while (++j < k) {
+ x = newCentroids[j];
+ d = 1 / clusterSizes[j];
+ i = -1; while (++i < x.length) x[i] *= d;
+ }
+
+ // Check convergence.
+ repeat = 0;
+ j = -1; while (++j < k) {
+ if (!science_stats_kmeansCompare(newCentroids[j], centroids[j])) {
+ repeat = 1;
+ break;
+ }
+ }
+ centroids = newCentroids;
+ iterations++;
+ }
+ return {assignments: assignments, centroids: centroids};
+ }
+
+ kmeans.k = function(x) {
+ if (!arguments.length) return k;
+ k = x;
+ return kmeans;
+ };
+
+ kmeans.distance = function(x) {
+ if (!arguments.length) return distance;
+ distance = x;
+ return kmeans;
+ };
+
+ return kmeans;
+}
+
+function science_stats_kmeansCompare(a, b) {
+ if (!a || !b || a.length !== b.length) return false;
+ var n = a.length,
+ i = -1;
+ while (++i < n) if (a[i] !== b[i]) return false;
+ return true;
+}
+
+// Returns an array of k distinct vectors randomly selected from the input
+// array of vectors. Returns null if k > n or if there are less than k distinct
+// objects in vectors.
+function science_stats_kmeansRandom(k, vectors) {
+ var n = vectors.length;
+ if (k > n) return null;
+
+ var selected_vectors = [];
+ var selected_indices = [];
+ var tested_indices = {};
+ var tested = 0;
+ var selected = 0;
+ var i,
+ vector,
+ select;
+
+ while (selected < k) {
+ if (tested === n) return null;
+
+ var random_index = Math.floor(Math.random() * n);
+ if (random_index in tested_indices) continue;
+
+ tested_indices[random_index] = 1;
+ tested++;
+ vector = vectors[random_index];
+ select = true;
+ for (i = 0; i < selected; i++) {
+ if (science_stats_kmeansCompare(vector, selected_vectors[i])) {
+ select = false;
+ break;
+ }
+ }
+ if (select) {
+ selected_vectors[selected] = vector;
+ selected_indices[selected] = random_index;
+ selected++;
+ }
+ }
+ return selected_vectors;
+}
science.stats.iqr = function(x) {
var quartiles = science.stats.quantiles(x, [.25, .75]);
return quartiles[1] - quartiles[0];
diff --git a/science.stats.min.js b/science.stats.min.js
index c7d867c..9875762 100644
--- a/science.stats.min.js
+++ b/science.stats.min.js
@@ -1 +1 @@
-(function(){function e(a,b){var c=b+1;while(c<a.length&&a[c]===0)c++;return c}function d(a,b,c,d){var f=d[0],g=d[1],h=e(b,g);if(h<a.length&&a[h]-a[c]<a[c]-a[f]){var i=e(b,f);d[0]=i,d[1]=h}}function c(a){return(a=1-a*a*a)*a*a}function b(a){var b=a.length,c=0;while(++c<b)if(a[c-1]>=a[c])return!1;return!0}function a(a){var b=a.length,c=-1;while(++c<b)if(!isFinite(a[c]))return!1;return!0}science.stats={},science.stats.bandwidth={nrd0:function(a){var b=Math.sqrt(science.stats.variance(a));(lo [...]
\ No newline at end of file
+(function(){function g(a,b){var c=b+1;while(c<a.length&&a[c]===0)c++;return c}function f(a,b,c,d){var e=d[0],f=d[1],h=g(b,f);if(h<a.length&&a[h]-a[c]<a[c]-a[e]){var i=g(b,e);d[0]=i,d[1]=h}}function e(a){return(a=1-a*a*a)*a*a}function d(a){var b=a.length,c=0;while(++c<b)if(a[c-1]>=a[c])return!1;return!0}function c(a){var b=a.length,c=-1;while(++c<b)if(!isFinite(a[c]))return!1;return!0}function b(b,c){var d=c.length;if(b>d)return null;var e=[],f=[],g={},h=0,i=0,j,k,l;while(i<b){if(h===d)re [...]
\ No newline at end of file
diff --git a/src/stats/distance.js b/src/stats/distance.js
new file mode 100644
index 0000000..963ff05
--- /dev/null
+++ b/src/stats/distance.js
@@ -0,0 +1,20 @@
+science.stats.distance = {
+ euclidean: function(a, b) {
+ var n = a.length,
+ i = -1,
+ s = 0,
+ x;
+ while (++i < n) {
+ x = a[i] - b[i];
+ s += x * x;
+ }
+ return Math.sqrt(s);
+ },
+ manhattan: function(a, b) {
+ var n = a.length,
+ i = -1,
+ s = 0;
+ while (++i < n) s += Math.abs(a[i] - b[i]);
+ return s;
+ }
+};
diff --git a/src/stats/kmeans.js b/src/stats/kmeans.js
new file mode 100644
index 0000000..311d994
--- /dev/null
+++ b/src/stats/kmeans.js
@@ -0,0 +1,136 @@
+// Based on figue implementation by Jean-Yves Delort.
+// http://code.google.com/p/figue/
+science.stats.kmeans = function() {
+ var distance = science.stats.distance.euclidean,
+ maxIterations = 1000,
+ k = 1;
+
+ function kmeans(vectors) {
+ var n = vectors.length,
+ assignments = [],
+ clusterSizes = [],
+ repeat = 1,
+ iterations = 0,
+ centroids = science_stats_kmeansRandom(k, vectors),
+ newCentroids,
+ i,
+ j,
+ x,
+ d,
+ min,
+ best;
+
+ while (repeat && iterations < maxIterations) {
+ // Assignment step.
+ j = -1; while (++j < k) {
+ clusterSizes[j] = 0;
+ }
+
+ i = -1; while (++i < n) {
+ x = vectors[i];
+ min = Infinity;
+ j = -1; while (++j < k) {
+ d = distance.call(this, centroids[j], x);
+ if (d < min) {
+ min = d;
+ best = j;
+ }
+ }
+ clusterSizes[assignments[i] = best]++;
+ }
+
+ // Update centroids step.
+ newCentroids = [];
+ i = -1; while (++i < n) {
+ x = assignments[i];
+ d = newCentroids[x];
+ if (d == null) newCentroids[x] = vectors[i].slice();
+ else {
+ j = -1; while (++j < d.length) {
+ d[j] += vectors[i][j];
+ }
+ }
+ }
+ j = -1; while (++j < k) {
+ x = newCentroids[j];
+ d = 1 / clusterSizes[j];
+ i = -1; while (++i < x.length) x[i] *= d;
+ }
+
+ // Check convergence.
+ repeat = 0;
+ j = -1; while (++j < k) {
+ if (!science_stats_kmeansCompare(newCentroids[j], centroids[j])) {
+ repeat = 1;
+ break;
+ }
+ }
+ centroids = newCentroids;
+ iterations++;
+ }
+ return {assignments: assignments, centroids: centroids};
+ }
+
+ kmeans.k = function(x) {
+ if (!arguments.length) return k;
+ k = x;
+ return kmeans;
+ };
+
+ kmeans.distance = function(x) {
+ if (!arguments.length) return distance;
+ distance = x;
+ return kmeans;
+ };
+
+ return kmeans;
+}
+
+function science_stats_kmeansCompare(a, b) {
+ if (!a || !b || a.length !== b.length) return false;
+ var n = a.length,
+ i = -1;
+ while (++i < n) if (a[i] !== b[i]) return false;
+ return true;
+}
+
+// Returns an array of k distinct vectors randomly selected from the input
+// array of vectors. Returns null if k > n or if there are less than k distinct
+// objects in vectors.
+function science_stats_kmeansRandom(k, vectors) {
+ var n = vectors.length;
+ if (k > n) return null;
+
+ var selected_vectors = [];
+ var selected_indices = [];
+ var tested_indices = {};
+ var tested = 0;
+ var selected = 0;
+ var i,
+ vector,
+ select;
+
+ while (selected < k) {
+ if (tested === n) return null;
+
+ var random_index = Math.floor(Math.random() * n);
+ if (random_index in tested_indices) continue;
+
+ tested_indices[random_index] = 1;
+ tested++;
+ vector = vectors[random_index];
+ select = true;
+ for (i = 0; i < selected; i++) {
+ if (science_stats_kmeansCompare(vector, selected_vectors[i])) {
+ select = false;
+ break;
+ }
+ }
+ if (select) {
+ selected_vectors[selected] = vector;
+ selected_indices[selected] = random_index;
+ selected++;
+ }
+ }
+ return selected_vectors;
+}
diff --git a/test/stats/distance-test.js b/test/stats/distance-test.js
new file mode 100644
index 0000000..2f5f435
--- /dev/null
+++ b/test/stats/distance-test.js
@@ -0,0 +1,28 @@
+require("../../science");
+require("../../science.stats");
+
+var vows = require("vows"),
+ assert = require("assert");
+
+var suite = vows.describe("science.stats.distance");
+
+suite.addBatch({
+ "distance": {
+ "euclidean": function() {
+ var euclidean = science.stats.distance.euclidean;
+ assert.equal(euclidean([], []), 0);
+ assert.equal(euclidean([0], [1]), 1);
+ assert.equal(euclidean([0, 0], [1, 1]), Math.sqrt(2));
+ assert.equal(euclidean([0, 0, 0], [1, 1, 1]), Math.sqrt(3));
+ },
+ "manhattan": function() {
+ var manhattan = science.stats.distance.manhattan;
+ assert.equal(manhattan([], []), 0);
+ assert.equal(manhattan([0], [1]), 1);
+ assert.equal(manhattan([0, 0], [1, 1]), 2);
+ assert.equal(manhattan([0, 0, 0], [1, 1, 1]), 3);
+ }
+ }
+});
+
+suite.export(module);
diff --git a/test/stats/kmeans-test.js b/test/stats/kmeans-test.js
new file mode 100644
index 0000000..4805d62
--- /dev/null
+++ b/test/stats/kmeans-test.js
@@ -0,0 +1,32 @@
+require("../../science");
+require("../../science.stats");
+
+var vows = require("vows"),
+ assert = require("assert");
+
+var suite = vows.describe("science.kmeans");
+
+suite.addBatch({
+ "kmeans": {
+ "simple": function() {
+ var data = [],
+ i;
+ for (i=0; i<100; i++) {
+ data.push([Math.random(), Math.random()]);
+ }
+ for (i=0; i<100; i++) {
+ data.push([10 + Math.random(), 10 + Math.random()]);
+ }
+ var x = science.stats.kmeans().k(2)(data);
+ var cluster0 = x.assignments[0];
+ for (i=0; i<100; i++) {
+ assert.equal(x.assignments[i], cluster0);
+ }
+ for (i=100; i<200; i++) {
+ assert.equal(x.assignments[i], 1 - cluster0);
+ }
+ }
+ }
+});
+
+suite.export(module);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/science.js.git
More information about the Pkg-javascript-commits
mailing list