[Pkg-javascript-commits] [node-iconv-lite] 01/83: Initial commit
matthew pideil
mpideil-guest at moszumanska.debian.org
Tue Apr 1 19:56:44 UTC 2014
This is an automated email from the git hooks/post-receive script.
mpideil-guest pushed a commit to branch master
in repository node-iconv-lite.
commit 44cb32c78aff48da26d1a177c251d6c563324ae4
Author: Alexander Shtuchkin <ashtuchkin at gmail.com>
Date: Wed Nov 9 21:47:49 2011 +0400
Initial commit
---
.gitignore | 2 +
LICENSE | 21 ++++++++
README | 44 +++++++++++++++
encodings/cyrillic.js | 35 ++++++++++++
index.js | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++
package.json | 20 +++++++
test/cyrillic-test.js | 86 ++++++++++++++++++++++++++++++
test/main-test.js | 51 ++++++++++++++++++
8 files changed, 403 insertions(+)
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..042ccf2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+node_modules
+*~
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..d518d83
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+Copyright (c) 2011 Alexander Shtuchkin
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
diff --git a/README b/README
new file mode 100644
index 0000000..a60bda2
--- /dev/null
+++ b/README
@@ -0,0 +1,44 @@
+iconv-lite - native javascript conversion between character encodings.
+======================================================================
+
+## Usage
+
+ var iconv = require('iconv-lite');
+
+ // Convert from an encoded buffer to string.
+ str = icon.fromEncoding(buf, 'win-1251');
+
+ // Convert from string to an encoded buffer.
+ buf = iconv.toEncoding("Sample input string", 'win-1251');
+
+
+## Supported encodings
+
+Currently only a small part of encodings supported:
+
+* All node.js native encodings: 'utf8', 'ucs2', 'ascii', 'binary', 'base64'.
+* 'latin1'
+* Cyrillic encodings: 'windows-1251', 'koi8-r', 'iso 8859-5'.
+
+Other encodings are easy to add, see the source. Please, participate.
+
+
+## Encoding/decoding speed
+
+Comparison with iconv module (1000 times 256kb, on Core i5/2.5 GHz).
+
+ Operation\module iconv iconv-lite (this)
+ toEncoding('win1251') 19.57 mb/s 49.04 mb/s
+ fromEncoding('win1251') 16.39 mb/s 24.11 mb/s
+
+
+## Notes
+
+This module is JavaScript-only, thus can be used in a sandboxed environment like [Cloud9](http://c9.io).
+
+Untranslatable characters are set to '?'. No transliteration is currently supported, pull requests are welcome.
+
+## Testing
+
+ npm install --dev iconv-lite
+ vows
diff --git a/encodings/cyrillic.js b/encodings/cyrillic.js
new file mode 100644
index 0000000..ae571ed
--- /dev/null
+++ b/encodings/cyrillic.js
@@ -0,0 +1,35 @@
+// Cyrillic encodings
+// Easy way to get chars for encoding, f.ex python: ''.join(map(chr, range(128, 256))).decode('windows-1251', 'replace')
+// TODO: bestfit (http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit1251.txt)
+
+module.exports = {
+ // Win1251: http://msdn.microsoft.com/en-us/goglobal/cc305144
+ // http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit1251.txt
+ "windows1251": {
+ type: "singlebyte",
+ chars: "ЂЃ‚ѓ„…†‡€‰Љ‹ЊЌЋЏђ‘’“”•–—�™љ›њќћџ ЎўЈ¤Ґ¦§Ё©Є«¬\xAD®Ї°±Ііґµ¶·ё№є»јЅѕїАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя",
+ },
+ "win1251": "windows1251",
+ "cp1251": "windows1251",
+ "1251": "windows1251",
+ 1251: "windows1251",
+
+ // KOI8-R: http://tools.ietf.org/html/rfc1489
+ // http://unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT
+ "koi8r": {
+ type: 'singlebyte',
+ chars: '─│┌┐└┘├┤┬┴┼▀▄█▌▐░▒▓⌠■∙√≈≤≥ ⌡°²·÷═║╒ё╓╔╕╖╗╘╙╚╛╜╝╞╟╠╡Ё╢╣╤╥╦╧╨╩╪╫╬©юабцдефгхийклмнопярстужвьызшэщчъЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ',
+ },
+ "cp20866": "koi8r",
+ 20866: "koi8r",
+
+ // ISO-8859-5:
+ // http://unicode.org/Public/MAPPINGS/ISO8859/8859-5.TXT
+ "iso88595": {
+ type: 'singlebyte',
+ chars: '
ЁЂЃЄЅІЇЈЉЊЋЌЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя№ёђѓєѕіїјљњћќ§ўџ',
+ },
+ "cp28595": "iso88595",
+ 28595: "iso88595",
+}
+
diff --git a/index.js b/index.js
new file mode 100644
index 0000000..36c3c17
--- /dev/null
+++ b/index.js
@@ -0,0 +1,144 @@
+// Module exports
+module.exports = iconv = {
+ toEncoding: function(str, encoding) {
+ return iconv.getCodec(encoding).toEncoding(str);
+ },
+ fromEncoding: function(buf, encoding) {
+ return iconv.getCodec(encoding).fromEncoding(buf);
+ },
+
+ defaultCharUnicode: '�',
+ defaultCharSingleByte: '?',
+
+ // Get correct codec for given encoding.
+ getCodec: function(encoding) {
+ enc = encoding || "utf8";
+ codecOptions = undefined;
+ while (1) {
+ if (getType(enc) === "String")
+ enc = enc.replace(/[- ]/g, "").toLowerCase();
+ var codec = iconv.encodings[enc];
+ var type = getType(codec);
+ if (type === "String") {
+ // Link to other encoding.
+ codecOptions = {originalEncoding: enc};
+ enc = codec;
+ }
+ else if (type === "Object" && codec.type != undefined) {
+ // Options for other encoding.
+ codecOptions = codec;
+ enc = codec.type;
+ }
+ else if (type === "Function")
+ // Codec itself.
+ return codec(codecOptions);
+ else
+ throw new Error("Encoding not recognized: '" + encoding + "' (searched as: '"+enc+"')");
+ }
+ },
+
+ // Define basic encodings
+ encodings: {
+ internal: function(options) {
+ return {
+ toEncoding: function(str) {
+ return new Buffer(ensureString(str), options.originalEncoding);
+ },
+ fromEncoding: function(buf) {
+ return ensureBuffer(buf).toString(options.originalEncoding);
+ },
+ };
+ },
+ utf8: "internal",
+ ucs2: "internal",
+ binary: "internal",
+ ascii: "internal",
+ base64: "internal",
+ latin1: {
+ type: "internal",
+ originalEncoding: "binary"
+ },
+
+ // Codepage single-byte encodings.
+ singlebyte: function(options) {
+ // Prepare chars if needed
+ if (!options.chars || (options.chars.length !== 128 && options.chars.length !== 256))
+ throw new Error("Encoding '"+options.type+"' has incorrect 'chars' (must be of len 128 or 256)");
+
+ if (options.chars.length === 128)
+ options.chars = asciiString + options.chars;
+
+ if (!options.charsBuf) {
+ options.charsBuf = new Buffer(256*2);
+ for (var i = 0; i < options.chars.length; i++) {
+ var code = options.chars.charCodeAt(i);
+ options.charsBuf[i*2+0] = code & 0xFF;
+ options.charsBuf[i*2+1] = code >>> 8;
+ }
+ }
+
+ if (!options.revCharsBuf) {
+ options.revCharsBuf = new Buffer(65536);
+ var defChar = iconv.defaultCharSingleByte.charCodeAt(0);
+ for (var i = 0; i < options.revCharsBuf.length; i++)
+ options.revCharsBuf[i] = defChar;
+ for (var i = 0; i < options.chars.length; i++)
+ options.revCharsBuf[options.chars.charCodeAt(i)] = i;
+ }
+
+ return {
+ toEncoding: function(str) {
+ str = ensureString(str);
+
+ var buf = new Buffer(str.length);
+ var revCharsBuf = options.revCharsBuf;
+ for (var i = 0; i < str.length; i++)
+ buf[i] = revCharsBuf[str.charCodeAt(i)];
+
+ return buf;
+ },
+ fromEncoding: function(buf) {
+ buf = ensureBuffer(buf);
+
+ // As string are immutable in JS, we use ucs2 buffer to speed up computations.
+ var charsBuf = options.charsBuf;
+ var newBuf = new Buffer(buf.length*2);
+ var idx1 = 0, idx2 = 0;
+ for (var i = 0, _len = buf.length; i < _len; i++) {
+ idx1 = buf[i]*2; idx2 = i*2;
+ newBuf[idx2] = charsBuf[idx1];
+ newBuf[idx2+1] = charsBuf[idx1+1];
+ }
+ return newBuf.toString('ucs2');
+ },
+ };
+ },
+ },
+}
+
+// Load other encodings from files in /encodings dir.
+var encodingsDir = __dirname+"/encodings/";
+require('fs').readdirSync(encodingsDir).forEach(function(file) {
+ var encodings = require(encodingsDir + file)
+ for (var key in encodings)
+ iconv.encodings[key] = encodings[key]
+});
+
+// Utilities
+var asciiString = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'+
+ ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f';
+
+var ensureBuffer = function(buf) {
+ buf = buf || new Buffer(0);
+ return (buf instanceof Buffer) ? buf : new Buffer(buf.toString(), "utf8");
+}
+
+var ensureString = function(str) {
+ str = str || "";
+ return (str instanceof String) ? str : str.toString((str instanceof Buffer) ? 'utf8' : undefined);
+}
+
+var getType = function(obj) {
+ return Object.prototype.toString.call(obj).slice(8, -1);
+}
+
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..6266c9c
--- /dev/null
+++ b/package.json
@@ -0,0 +1,20 @@
+{
+ "name": "iconv-lite",
+ "description": "Convert character encodings in pure javascript.",
+ "version": "0.1.0",
+
+ "keywords": ["iconv", "convert", "charset"],
+ "author": "Alexander Shtuchkin <ashtuchkin at gmail.com>",
+
+ "homepage": "http://github.com/ashtuchkin/node-iconv/",
+ "repository": {
+ "type": "git",
+ "url": "git://github.com/ashtuchkin/node-iconv.git"
+ },
+ "engines": {
+ "node": ">=0.4.0"
+ },
+ "devDependencies": {
+ "vows": ""
+ }
+}
diff --git a/test/cyrillic-test.js b/test/cyrillic-test.js
new file mode 100644
index 0000000..259d283
--- /dev/null
+++ b/test/cyrillic-test.js
@@ -0,0 +1,86 @@
+var vows = require('vows'),
+ assert = require('assert'),
+ iconv = require(__dirname+'/../');
+
+var baseStrings = {
+ empty: "",
+ hi: "Привет!",
+ ascii: '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'+
+ ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f',
+ rus: "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя",
+ additional1: "ЂЃ‚ѓ„…†‡€‰Љ‹ЊЌЋЏђ‘’“”•–—™љ›њќћџ ЎўЈ¤Ґ¦§Ё©Є«¬\xAD®Ї°±Ііґµ¶·ё№є»јЅѕї",
+ additional2: "─│┌┐└┘├┤┬┴┼▀▄█▌▐░▒▓⌠■∙√≈≤≥ ⌡°²·÷═║╒ё╓╔╕╖╗╘╙╚╛╜╝╞╟╠╡Ё╢╣╤╥╦╧╨╩╪╫╬©",
+ additional3: " ЁЂЃЄЅІЇЈЉЊЋЌЎЏ№ёђѓєѕіїјљњћќ§ўџ",
+ untranslatable: "£Åçþÿ¿",
+};
+
+var encodings = [{
+ name: "Win-1251",
+ variations: ['win1251', 'Windows-1251', 'windows1251', 'CP1251', 1251],
+ encodedStrings: {
+ empty: new Buffer(''),
+ hi: new Buffer('\xcf\xf0\xe8\xe2\xe5\xf2!', 'binary'),
+ ascii: new Buffer(baseStrings.ascii, 'binary'),
+ rus: new Buffer('\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff', 'binary'),
+ additional1: new Buffer('\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf','binary'),
+ }
+}, {
+ name: "Koi8-R",
+ variations: ['koi8r', 'KOI8-R', 'cp20866', 20866],
+ encodedStrings: {
+ empty: new Buffer(''),
+ hi: new Buffer('\xf0\xd2\xc9\xd7\xc5\xd4!', 'binary'),
+ ascii: new Buffer(baseStrings.ascii, 'binary'),
+ rus: new Buffer('\xe1\xe2\xf7\xe7\xe4\xe5\xf6\xfa\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf2\xf3\xf4\xf5\xe6\xe8\xe3\xfe\xfb\xfd\xff\xf9\xf8\xfc\xe0\xf1\xc1\xc2\xd7\xc7\xc4\xc5\xd6\xda\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd2\xd3\xd4\xd5\xc6\xc8\xc3\xde\xdb\xdd\xdf\xd9\xd8\xdc\xc0\xd1', 'binary'),
+ additional2: new Buffer('\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf', 'binary'),
+ }
+}, {
+ name: "ISO 8859-5",
+ variations: ['iso88595', 'ISO-8859-5', 'ISO 8859-5', 'cp28595', 28595],
+ encodedStrings: {
+ empty: new Buffer(''),
+ hi: new Buffer('\xbf\xe0\xd8\xd2\xd5\xe2!', 'binary'),
+ ascii: new Buffer(baseStrings.ascii, 'binary'),
+ rus: new Buffer('\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef', 'binary'),
+ additional3: new Buffer('\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff', 'binary'),
+ }
+}];
+
+var testsBatch = {};
+encodings.forEach(function(encoding) {
+ var enc = encoding.variations[0];
+ var key = "hi";
+ var tests = {
+ "Convert to empty buffer": function() {
+ assert.strictEqual(iconv.toEncoding("", enc).toString('binary'), new Buffer('').toString('binary'));
+ },
+ "Convert from empty buffer": function() {
+ assert.strictEqual(iconv.fromEncoding(new Buffer(''), enc), "");
+ },
+ "Convert from buffer": function() {
+ for (var key in encoding.encodedStrings)
+ assert.strictEqual(iconv.fromEncoding(encoding.encodedStrings[key], enc),
+ baseStrings[key]);
+ },
+ "Convert to buffer": function() {
+ for (var key in encoding.encodedStrings)
+ assert.strictEqual(iconv.toEncoding(baseStrings[key], enc).toString('binary'),
+ encoding.encodedStrings[key].toString('binary'));
+ },
+ "Try different variations of encoding": function() {
+ encoding.variations.forEach(function(enc) {
+ assert.strictEqual(iconv.fromEncoding(encoding.encodedStrings[key], enc), baseStrings[key]);
+ assert.strictEqual(iconv.toEncoding(baseStrings[key], enc).toString('binary'), encoding.encodedStrings[key].toString('binary'));
+ });
+ },
+ "Untranslatable chars are converted to defaultCharSingleByte": function() {
+ var expected = baseStrings.untranslatable.split('').map(function(c) {return iconv.defaultCharSingleByte; }).join('');
+ assert.strictEqual(iconv.toEncoding(baseStrings.untranslatable, enc).toString('binary'), expected); // Only '?' characters.
+ }
+ };
+
+ testsBatch[encoding.name+":"] = tests;
+});
+
+vows.describe("Test Cyrillic encodings").addBatch(testsBatch).export(module);
+
diff --git a/test/main-test.js b/test/main-test.js
new file mode 100644
index 0000000..994b7bb
--- /dev/null
+++ b/test/main-test.js
@@ -0,0 +1,51 @@
+var vows = require('vows'),
+ assert = require('assert'),
+ iconv = require(__dirname+'/../');
+
+var testString = "Hello123!";
+var testStringLatin1 = "Hello123!£Å÷×çþÿ¿®";
+var testStringBase64 = "SGVsbG8xMjMh";
+
+vows.describe("Generic UTF8-UCS2 tests").addBatch({
+ "Vows is working": function() {},
+ "Return values are of correct types": function() {
+ assert.ok(iconv.toEncoding(testString, "utf8") instanceof Buffer);
+
+ var s = iconv.fromEncoding(new Buffer(testString), "utf8");
+ assert.strictEqual(Object.prototype.toString.call(s), "[object String]");
+ },
+ "Internal encodings all correctly encoded/decoded": function() {
+ ['utf8', "UTF-8", "UCS2", "binary", ""].forEach(function(enc) {
+ assert.strictEqual(iconv.toEncoding(testStringLatin1, enc).toString(enc), testStringLatin1);
+ assert.strictEqual(iconv.fromEncoding(new Buffer(testStringLatin1, enc), enc), testStringLatin1);
+ });
+ },
+ "Base64 correctly encoded/decoded": function() {
+ assert.strictEqual(iconv.toEncoding(testStringBase64, "base64").toString("binary"), testString);
+ assert.strictEqual(iconv.fromEncoding(new Buffer(testString, "binary"), "base64"), testStringBase64);
+ },
+ "Latin1 correctly encoded/decoded": function() {
+ assert.strictEqual(iconv.toEncoding(testStringLatin1, "latin1").toString("binary"), testStringLatin1);
+ assert.strictEqual(iconv.fromEncoding(new Buffer(testStringLatin1, "binary"), "latin1"), testStringLatin1);
+ },
+ "Convert from string, not buffer (utf8 used)": function() {
+ assert.strictEqual(iconv.fromEncoding(testStringLatin1, "utf8"), testStringLatin1);
+ },
+ "Convert to string, not buffer (utf8 used)": function() {
+ var res = iconv.toEncoding(new Buffer(testStringLatin1, "utf8"));
+ assert.ok(res instanceof Buffer);
+ assert.strictEqual(res.toString("utf8"), testStringLatin1);
+ },
+ "Throws on unknown encodings": function() {
+ assert.throws(function() { iconv.toEncoding("a", "xxx"); });
+ assert.throws(function() { iconv.fromEncoding("a", "xxx"); });
+ },
+ "Convert non-strings and non-buffers": function() {
+ assert.strictEqual(iconv.toEncoding({}, "utf8").toString(), "[object Object]");
+ assert.strictEqual(iconv.toEncoding(10, "utf8").toString(), "10");
+ assert.strictEqual(iconv.toEncoding(undefined, "utf8").toString(), "");
+ assert.strictEqual(iconv.fromEncoding({}, "utf8"), "[object Object]");
+ assert.strictEqual(iconv.fromEncoding(10, "utf8"), "10");
+ assert.strictEqual(iconv.fromEncoding(undefined, "utf8"), "");
+ },
+}).export(module)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/node-iconv-lite.git
More information about the Pkg-javascript-commits
mailing list