[Pkg-javascript-commits] [node-iconv-lite] 04/83: for Simple-Chinese lanuage, gbk and gb2312 encoding is supported
matthew pideil
mpideil-guest at moszumanska.debian.org
Tue Apr 1 19:56:45 UTC 2014
This is an automated email from the git hooks/post-receive script.
mpideil-guest pushed a commit to branch master
in repository node-iconv-lite.
commit e559c6b9cbbdf1213ab65d9ed06a7626098d215f
Author: Jinwu Zhan <jenkinv at 163.com>
Date: Wed Nov 23 15:02:53 2011 +0800
for Simple-Chinese lanuage, gbk and gb2312 encoding is supported
---
encodings/gbk.js | 8 ++++++
encodings/table/gbk.js | 1 +
index.js | 66 ++++++++++++++++++++++++++++++++++++++++++++++++--
package.json | 3 ++-
test/gbk-test.js | 30 +++++++++++++++++++++++
test/gbkFile.txt | 14 +++++++++++
6 files changed, 119 insertions(+), 3 deletions(-)
diff --git a/encodings/gbk.js b/encodings/gbk.js
new file mode 100644
index 0000000..56883a4
--- /dev/null
+++ b/encodings/gbk.js
@@ -0,0 +1,8 @@
+var gbkTable = require(__dirname + '/table/gbk.js');
+module.exports = {
+ 'gb2312': 'gbk',
+ 'gbk': {
+ type: 'table',
+ table: gbkTable
+ }
+}
diff --git a/encodings/table/gbk.js b/encodings/table/gbk.js
new file mode 100644
index 0000000..3692a15
--- /dev/null
+++ b/encodings/table/gbk.js
@@ -0,0 +1 @@
+module.exports={33088:19970,33089:19972,33090:19973,33091:19974,33092:19983,33093:19986,33094:19991,33095:19999,33096:20000,33097:20001,33098:20003,33099:20006,33100:20009,33101:20014,33102:20015,33103:20017,33104:20019,33105:20021,33106:20023,33107:20028,33108:20032,33109:20033,33110:20034,33111:20036,33112:20038,33113:20042,33114:20049,33115:20053,33116:20055,33117:20058,33118:20059,33119:20066,33120:20067,33121:20068,33122:20069,33123:20071,33124:20072,33125:20074,33126:20075,33127:20 [...]
\ No newline at end of file
diff --git a/index.js b/index.js
index 36c3c17..5f03f5a 100644
--- a/index.js
+++ b/index.js
@@ -113,12 +113,74 @@ module.exports = iconv = {
},
};
},
+
+ // Codepage double-byte encodings.
+ table: function(options) {
+ if (!options.table) {
+ throw new Error("Encoding '" + options.type +"' has incorect 'table' option");
+ }
+ var table = options.table, key,
+ revCharsTable = {};
+ for (key in table) {
+ revCharsTable[table[key]] = parseInt(key);
+ }
+ return {
+ toEncoding: function(str) {
+ str = ensureString(str);
+ var len = 0, strLen = str.length;
+ for (var i = 0; i < strLen; i++) {
+ if (!!(str.charCodeAt(i) >> 8)) {
+ len += 2;
+ } else {
+ len ++;
+ }
+ }
+ var newBuf = new Buffer(len);
+ for (var i = 0, j = 0; i < strLen; i++) {
+ var unicode = str.charCodeAt(i);
+ if (!!(unicode >> 7)) {
+ var gbkcode = revCharsTable[unicode] || 0xA1F0;//not found in table ,replace it
+ newBuf[j++] = gbkcode >> 8;//high byte;
+ newBuf[j++] = gbkcode & 0xFF;//low byte
+ } else {//ascii
+ newBuf[j++] = unicode;
+ }
+ }
+ return newBuf;
+ },
+ fromEncoding: function(buf) {
+ buf = ensureBuffer(buf);
+ var idx = 0, len = 0,
+ newBuf = new Buffer(len*2),unicode,gbkcode;
+ for (var i = 0, _len = buf.length; i < _len; i++, len++) {
+ if (!!(buf[i] & 0x80)) {//the high bit is 1, so this byte is gbkcode's high byte.skip next byte
+ i++;
+ }
+ }
+ var newBuf = new Buffer(len*2);
+ for (var i = 0, j = 0, _len = buf.length; i < _len; i++, j++) {
+ var temp = buf[i], gbkcode, unicode;
+ if (temp & 0x80) {
+ gbkcode = (temp << 8) + buf[++i];
+ unicode = table[gbkcode] || iconv.defaultCharUnicode.charCodeAt(0);//not found in table, replace with defaultCharUnicode
+ }else {
+ unicode = temp;
+ }
+ newBuf[j*2] = unicode & 0xFF;//low byte
+ newBuf[j*2+1] = unicode >> 8;//high byte
+ }
+ return newBuf.toString('ucs2');
+ }
+ }
+ },
},
}
// Load other encodings from files in /encodings dir.
-var encodingsDir = __dirname+"/encodings/";
-require('fs').readdirSync(encodingsDir).forEach(function(file) {
+var encodingsDir = __dirname+"/encodings/",
+ fs = require('fs');
+fs.readdirSync(encodingsDir).forEach(function(file) {
+ if(fs.statSync(encodingsDir + file).isDirectory()) return;
var encodings = require(encodingsDir + file)
for (var key in encodings)
iconv.encodings[key] = encodings[key]
diff --git a/package.json b/package.json
index 6266c9c..4222638 100644
--- a/package.json
+++ b/package.json
@@ -15,6 +15,7 @@
"node": ">=0.4.0"
},
"devDependencies": {
- "vows": ""
+ "vows": "",
+ "iconv": "",
}
}
diff --git a/test/gbk-test.js b/test/gbk-test.js
new file mode 100644
index 0000000..a40b4cc
--- /dev/null
+++ b/test/gbk-test.js
@@ -0,0 +1,30 @@
+var vows = require('vows'),
+ fs = require('fs'),
+ assert = require('assert'),
+ iconv = require(__dirname+'/../');
+
+var testString = "中国abc",//unicode contains GBK-code and ascii
+ testStringGBKBuffer = new Buffer([0xd6,0xd0,0xb9,0xfa,0x61,0x62,0x63]);
+
+vows.describe("GBK tests").addBatch({
+ "Vows is working": function() {},
+ "Return values are of correct types": function() {
+ assert.ok(iconv.toEncoding(testString, "utf8") instanceof Buffer);
+ var s = iconv.fromEncoding(new Buffer(testString), "utf8");
+ assert.strictEqual(Object.prototype.toString.call(s), "[object String]");
+ },
+ "GBK correctly encoded/decoded": function() {
+ assert.strictEqual(iconv.toEncoding(testString, "GBK").toString('binary'), testStringGBKBuffer.toString('binary'));
+ assert.strictEqual(iconv.fromEncoding(testStringGBKBuffer, "GBK"), testString);
+ },
+ "GB2312 correctly encoded/decoded": function() {
+ assert.strictEqual(iconv.toEncoding(testString, "GB2312").toString('binary'), testStringGBKBuffer.toString('binary'));
+ assert.strictEqual(iconv.fromEncoding(testStringGBKBuffer, "GB2312"), testString);
+ },
+ "GBK file read decoded,compare with iconv result": function() {
+ var contentBuffer = fs.readFileSync("gbkFile.txt");
+ var str = iconv.fromEncoding(contentBuffer, "GBK");
+ var iconvc = new (require('iconv').Iconv)('GBK','utf8');
+ assert.strictEqual(iconvc.convert(contentBuffer).toString(), str);
+ },
+}).export(module)
diff --git a/test/gbkFile.txt b/test/gbkFile.txt
new file mode 100644
index 0000000..345b5d0
--- /dev/null
+++ b/test/gbkFile.txt
@@ -0,0 +1,14 @@
+<!doctype html><html><head><meta http-equiv="Content-Type" content="text/html;charset=gb2312"><title>�ٶ�һ�£����֪�� </title><style>html{overflow-y:auto}body{font:12px arial;text-align:center;background:#fff}body,p,form,ul,li{margin:0;padding:0;list-style:none}body,form,#fm{position:relative}td{text-align:left}img{border:0}a{color:#00c}a:active{color:#f60}#u{padding:7px 10px 3px 0;text-align:right}#m{width:680px;margin:0 auto}#nv{font-size:16px;margin:0 0 4px;text-align:left;text-inde [...]
+</head>
+
+<body><div id="u"><a href="http://www.baidu.com/gaoji/preferences.html" name="tj_setting">��������</a> | <a href="http://passport.baidu.com/?login&tpl=mn" name="tj_login">��¼</a></div>
+<div id="m"><p id="lg"><img src="http://www.baidu.com/img/baidu_sylogo1.gif" width="270" height="129" usemap="#mp"><map name="mp"><area shape="rect" coords="40,25,230,95" href="http://hi.baidu.com/baidu/" target="_blank" title="��˽��� �ٶȵĿռ�" ></map></p><p id="nv"><a href="http://news.baidu.com">�� ��</a>��<b>�� ҳ</b>��<a href="http://tieba.baidu.com">�� ��</a>��<a href="http://zhidao.baidu.com">֪ ��</a>��<a href="http://mp3.baidu.com">MP3</a>��<a href="http://image.b [...]
+<p id="lk"><a href="http://hi.baidu.com">�ռ�</a>��<a href="http://baike.baidu.com">�ٿ�</a>��<a href="http://www.hao123.com">hao123</a><span> | <a href="/more/">����>></a></span></p><p id="lm"></p><p><a id="seth" onClick="this.setHomePage('http://www.baidu.com')" href="http://utility.baidu.com/traf/click.php?id=215&url=http://www.baidu.com" onmousedown="return ns_c({'fm':'behs','tab':'homepage','pos':0})">�Ѱٶ���Ϊ��ҳ</a><a id="setf" onClick="fa(this)" href="javascript:void(0)" onmous [...]
+<p id="lh"><a href="http://e.baidu.com/?refer=888">����ٶ��ƹ�</a> | <a href="http://top.baidu.com">�������ư�</a> | <a href="http://home.baidu.com">���ڰٶ�</a> | <a href="http://ir.baidu.com">About Baidu</a></p><p id="cp">©2011 Baidu <a href="/duty/">ʹ�ðٶ�ǰ�ض�</a> <a href="http://www.miibeian.gov.cn" target="_blank">��ICP֤030173��</a> <img src="http://gimg.baidu.com/img/gs.gif"></p></div></body>
+
+<script>var w=window,d=document,n=navigator,k=d.f.wd,a=d.getElementById("nv").getElementsByTagName("a"),isIE=n.userAgent.indexOf("MSIE")!=-1&&!window.opera;for(var i=0;i<a.length;i++){a[i].onclick=function(){if(k.value.length>0){var C=this,A=C.href,B=encodeURIComponent(k.value);if(A.indexOf("q=")!=-1){C.href=A.replace(/q=[^&\x24]*/,"q="+B)}else{this.href+="?q="+B}}}}(function(){if(/q=([^&]+)/.test(location.search)){k.value=decodeURIComponent(RegExp["\x241"])}})();if(n.cookieEnabled&&!/su [...]
+
+
+<script type="text/javascript" src="http://www.baidu.com/cache/hps/js/hps-1.2.js"></script>
+
+</html><!--b762345d979562e8-->
\ No newline at end of file
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-javascript/node-iconv-lite.git
More information about the Pkg-javascript-commits
mailing list